{ "best_metric": null, "best_model_checkpoint": null, "epoch": 25.0, "eval_steps": 8310, "global_step": 207750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012033694344163658, "eval_loss": 3.815816879272461, "eval_runtime": 112.4917, "eval_samples_per_second": 65.667, "eval_steps_per_second": 8.214, "step": 1 }, { "epoch": 0.0012033694344163659, "grad_norm": 5.570976257324219, "learning_rate": 3.8513383400731756e-07, "loss": 3.7364, "step": 10 }, { "epoch": 0.0024067388688327317, "grad_norm": 4.148311614990234, "learning_rate": 7.702676680146351e-07, "loss": 3.8316, "step": 20 }, { "epoch": 0.0036101083032490976, "grad_norm": 5.7906084060668945, "learning_rate": 1.1554015020219527e-06, "loss": 3.8671, "step": 30 }, { "epoch": 0.0048134777376654635, "grad_norm": 3.5879058837890625, "learning_rate": 1.5405353360292703e-06, "loss": 3.7271, "step": 40 }, { "epoch": 0.006016847172081829, "grad_norm": 8.534287452697754, "learning_rate": 1.925669170036588e-06, "loss": 3.7304, "step": 50 }, { "epoch": 0.007220216606498195, "grad_norm": 4.045993804931641, "learning_rate": 2.3108030040439054e-06, "loss": 3.6963, "step": 60 }, { "epoch": 0.00842358604091456, "grad_norm": 3.4035534858703613, "learning_rate": 2.695936838051223e-06, "loss": 3.6405, "step": 70 }, { "epoch": 0.009626955475330927, "grad_norm": 5.412262439727783, "learning_rate": 3.0810706720585405e-06, "loss": 3.4811, "step": 80 }, { "epoch": 0.010830324909747292, "grad_norm": 3.6570475101470947, "learning_rate": 3.466204506065858e-06, "loss": 3.4819, "step": 90 }, { "epoch": 0.012033694344163659, "grad_norm": 6.75805139541626, "learning_rate": 3.851338340073176e-06, "loss": 3.5358, "step": 100 }, { "epoch": 0.013237063778580024, "grad_norm": 4.606562614440918, "learning_rate": 4.236472174080494e-06, "loss": 3.3392, "step": 110 }, { "epoch": 0.01444043321299639, "grad_norm": 3.2331383228302, "learning_rate": 4.621606008087811e-06, "loss": 3.4324, "step": 120 }, { "epoch": 0.015643802647412757, "grad_norm": 5.177463054656982, "learning_rate": 5.006739842095128e-06, "loss": 3.2908, "step": 130 }, { "epoch": 0.01684717208182912, "grad_norm": 3.7441446781158447, "learning_rate": 5.391873676102446e-06, "loss": 3.2742, "step": 140 }, { "epoch": 0.018050541516245487, "grad_norm": 8.747030258178711, "learning_rate": 5.777007510109764e-06, "loss": 3.3198, "step": 150 }, { "epoch": 0.019253910950661854, "grad_norm": 4.705621242523193, "learning_rate": 6.162141344117081e-06, "loss": 3.267, "step": 160 }, { "epoch": 0.02045728038507822, "grad_norm": 3.8838236331939697, "learning_rate": 6.547275178124398e-06, "loss": 3.1912, "step": 170 }, { "epoch": 0.021660649819494584, "grad_norm": 5.968881607055664, "learning_rate": 6.932409012131716e-06, "loss": 3.2778, "step": 180 }, { "epoch": 0.02286401925391095, "grad_norm": 4.607678413391113, "learning_rate": 7.317542846139033e-06, "loss": 3.2558, "step": 190 }, { "epoch": 0.024067388688327317, "grad_norm": 7.0164594650268555, "learning_rate": 7.702676680146351e-06, "loss": 3.273, "step": 200 }, { "epoch": 0.02527075812274368, "grad_norm": 5.379019737243652, "learning_rate": 8.08781051415367e-06, "loss": 3.0331, "step": 210 }, { "epoch": 0.026474127557160047, "grad_norm": 4.538290023803711, "learning_rate": 8.472944348160987e-06, "loss": 3.2632, "step": 220 }, { "epoch": 0.027677496991576414, "grad_norm": 6.160151481628418, "learning_rate": 8.858078182168304e-06, "loss": 3.1785, "step": 230 }, { "epoch": 0.02888086642599278, "grad_norm": 4.559308052062988, "learning_rate": 9.243212016175622e-06, "loss": 3.2087, "step": 240 }, { "epoch": 0.030084235860409144, "grad_norm": 7.7826104164123535, "learning_rate": 9.62834585018294e-06, "loss": 3.2275, "step": 250 }, { "epoch": 0.031287605294825514, "grad_norm": 4.852511882781982, "learning_rate": 1.0013479684190256e-05, "loss": 3.0636, "step": 260 }, { "epoch": 0.032490974729241874, "grad_norm": 4.055009365081787, "learning_rate": 1.0398613518197574e-05, "loss": 3.1303, "step": 270 }, { "epoch": 0.03369434416365824, "grad_norm": 6.477360725402832, "learning_rate": 1.0783747352204892e-05, "loss": 3.0855, "step": 280 }, { "epoch": 0.03489771359807461, "grad_norm": 4.41857385635376, "learning_rate": 1.1168881186212208e-05, "loss": 3.1672, "step": 290 }, { "epoch": 0.036101083032490974, "grad_norm": 8.869633674621582, "learning_rate": 1.1554015020219528e-05, "loss": 3.0891, "step": 300 }, { "epoch": 0.03730445246690734, "grad_norm": 6.276860237121582, "learning_rate": 1.1939148854226844e-05, "loss": 3.0637, "step": 310 }, { "epoch": 0.03850782190132371, "grad_norm": 3.9365427494049072, "learning_rate": 1.2324282688234162e-05, "loss": 3.0161, "step": 320 }, { "epoch": 0.039711191335740074, "grad_norm": 5.813107490539551, "learning_rate": 1.270941652224148e-05, "loss": 3.002, "step": 330 }, { "epoch": 0.04091456077015644, "grad_norm": 4.736392498016357, "learning_rate": 1.3094550356248796e-05, "loss": 3.1451, "step": 340 }, { "epoch": 0.0421179302045728, "grad_norm": 7.715930938720703, "learning_rate": 1.3479684190256114e-05, "loss": 3.0218, "step": 350 }, { "epoch": 0.04332129963898917, "grad_norm": 5.3318328857421875, "learning_rate": 1.3864818024263432e-05, "loss": 2.9514, "step": 360 }, { "epoch": 0.044524669073405534, "grad_norm": 4.3995466232299805, "learning_rate": 1.4249951858270749e-05, "loss": 3.0154, "step": 370 }, { "epoch": 0.0457280385078219, "grad_norm": 5.849190711975098, "learning_rate": 1.4635085692278067e-05, "loss": 2.9262, "step": 380 }, { "epoch": 0.04693140794223827, "grad_norm": 5.229336261749268, "learning_rate": 1.5020219526285385e-05, "loss": 2.9599, "step": 390 }, { "epoch": 0.048134777376654635, "grad_norm": 7.649044513702393, "learning_rate": 1.5405353360292703e-05, "loss": 3.0405, "step": 400 }, { "epoch": 0.049338146811071, "grad_norm": 6.180991172790527, "learning_rate": 1.579048719430002e-05, "loss": 3.0349, "step": 410 }, { "epoch": 0.05054151624548736, "grad_norm": 3.732105016708374, "learning_rate": 1.617562102830734e-05, "loss": 3.0039, "step": 420 }, { "epoch": 0.05174488567990373, "grad_norm": 6.500801086425781, "learning_rate": 1.6560754862314655e-05, "loss": 3.1118, "step": 430 }, { "epoch": 0.052948255114320095, "grad_norm": 4.652256011962891, "learning_rate": 1.6945888696321974e-05, "loss": 2.8496, "step": 440 }, { "epoch": 0.05415162454873646, "grad_norm": 7.485434055328369, "learning_rate": 1.733102253032929e-05, "loss": 3.0901, "step": 450 }, { "epoch": 0.05535499398315283, "grad_norm": 5.527116298675537, "learning_rate": 1.7716156364336607e-05, "loss": 3.0363, "step": 460 }, { "epoch": 0.056558363417569195, "grad_norm": 3.4333620071411133, "learning_rate": 1.8101290198343927e-05, "loss": 2.9691, "step": 470 }, { "epoch": 0.05776173285198556, "grad_norm": 6.362924575805664, "learning_rate": 1.8486424032351243e-05, "loss": 2.9124, "step": 480 }, { "epoch": 0.05896510228640193, "grad_norm": 4.653800010681152, "learning_rate": 1.887155786635856e-05, "loss": 2.9238, "step": 490 }, { "epoch": 0.06016847172081829, "grad_norm": 7.520432949066162, "learning_rate": 1.925669170036588e-05, "loss": 3.0473, "step": 500 }, { "epoch": 0.061371841155234655, "grad_norm": 5.268134593963623, "learning_rate": 1.9641825534373195e-05, "loss": 2.9194, "step": 510 }, { "epoch": 0.06257521058965103, "grad_norm": 3.7793736457824707, "learning_rate": 2.002695936838051e-05, "loss": 2.8513, "step": 520 }, { "epoch": 0.06377858002406739, "grad_norm": 4.988191604614258, "learning_rate": 2.041209320238783e-05, "loss": 2.9907, "step": 530 }, { "epoch": 0.06498194945848375, "grad_norm": 3.8745498657226562, "learning_rate": 2.0797227036395148e-05, "loss": 2.9702, "step": 540 }, { "epoch": 0.06618531889290012, "grad_norm": 6.353143215179443, "learning_rate": 2.1182360870402464e-05, "loss": 2.8286, "step": 550 }, { "epoch": 0.06738868832731648, "grad_norm": 5.642485618591309, "learning_rate": 2.1567494704409784e-05, "loss": 2.8682, "step": 560 }, { "epoch": 0.06859205776173286, "grad_norm": 3.590162992477417, "learning_rate": 2.19526285384171e-05, "loss": 2.8957, "step": 570 }, { "epoch": 0.06979542719614922, "grad_norm": 5.774114608764648, "learning_rate": 2.2337762372424416e-05, "loss": 2.8963, "step": 580 }, { "epoch": 0.07099879663056559, "grad_norm": 4.017606258392334, "learning_rate": 2.2722896206431736e-05, "loss": 2.8724, "step": 590 }, { "epoch": 0.07220216606498195, "grad_norm": 7.137066841125488, "learning_rate": 2.3108030040439056e-05, "loss": 2.9016, "step": 600 }, { "epoch": 0.07340553549939831, "grad_norm": 4.75874137878418, "learning_rate": 2.3493163874446372e-05, "loss": 2.8465, "step": 610 }, { "epoch": 0.07460890493381468, "grad_norm": 3.671997547149658, "learning_rate": 2.3878297708453688e-05, "loss": 2.9338, "step": 620 }, { "epoch": 0.07581227436823104, "grad_norm": 5.378019332885742, "learning_rate": 2.4263431542461008e-05, "loss": 2.8506, "step": 630 }, { "epoch": 0.07701564380264742, "grad_norm": 4.3311662673950195, "learning_rate": 2.4648565376468324e-05, "loss": 2.9701, "step": 640 }, { "epoch": 0.07821901323706378, "grad_norm": 5.478089809417725, "learning_rate": 2.503369921047564e-05, "loss": 3.0287, "step": 650 }, { "epoch": 0.07942238267148015, "grad_norm": 4.632708549499512, "learning_rate": 2.541883304448296e-05, "loss": 2.8668, "step": 660 }, { "epoch": 0.08062575210589651, "grad_norm": 3.374662399291992, "learning_rate": 2.5803966878490276e-05, "loss": 2.8472, "step": 670 }, { "epoch": 0.08182912154031288, "grad_norm": 5.122900485992432, "learning_rate": 2.6189100712497593e-05, "loss": 2.8916, "step": 680 }, { "epoch": 0.08303249097472924, "grad_norm": 4.146032810211182, "learning_rate": 2.6574234546504912e-05, "loss": 2.9094, "step": 690 }, { "epoch": 0.0842358604091456, "grad_norm": 6.2430925369262695, "learning_rate": 2.695936838051223e-05, "loss": 2.8602, "step": 700 }, { "epoch": 0.08543922984356198, "grad_norm": 5.070657730102539, "learning_rate": 2.7344502214519545e-05, "loss": 2.8657, "step": 710 }, { "epoch": 0.08664259927797834, "grad_norm": 3.6268534660339355, "learning_rate": 2.7729636048526865e-05, "loss": 2.9479, "step": 720 }, { "epoch": 0.08784596871239471, "grad_norm": 5.12242317199707, "learning_rate": 2.811476988253418e-05, "loss": 2.9425, "step": 730 }, { "epoch": 0.08904933814681107, "grad_norm": 3.7903456687927246, "learning_rate": 2.8499903716541497e-05, "loss": 2.8374, "step": 740 }, { "epoch": 0.09025270758122744, "grad_norm": 6.039148330688477, "learning_rate": 2.8885037550548817e-05, "loss": 2.7863, "step": 750 }, { "epoch": 0.0914560770156438, "grad_norm": 4.456302165985107, "learning_rate": 2.9270171384556133e-05, "loss": 2.6648, "step": 760 }, { "epoch": 0.09265944645006016, "grad_norm": 3.65207576751709, "learning_rate": 2.965530521856345e-05, "loss": 2.8954, "step": 770 }, { "epoch": 0.09386281588447654, "grad_norm": 4.908219337463379, "learning_rate": 3.004043905257077e-05, "loss": 2.8401, "step": 780 }, { "epoch": 0.0950661853188929, "grad_norm": 4.265645503997803, "learning_rate": 3.0425572886578085e-05, "loss": 2.9104, "step": 790 }, { "epoch": 0.09626955475330927, "grad_norm": 6.547182559967041, "learning_rate": 3.0810706720585405e-05, "loss": 2.8935, "step": 800 }, { "epoch": 0.09747292418772563, "grad_norm": 4.835933208465576, "learning_rate": 3.119584055459272e-05, "loss": 2.6432, "step": 810 }, { "epoch": 0.098676293622142, "grad_norm": 3.267350435256958, "learning_rate": 3.158097438860004e-05, "loss": 2.8195, "step": 820 }, { "epoch": 0.09987966305655836, "grad_norm": 4.60970401763916, "learning_rate": 3.1966108222607354e-05, "loss": 2.6222, "step": 830 }, { "epoch": 0.10108303249097472, "grad_norm": 4.191972255706787, "learning_rate": 3.235124205661468e-05, "loss": 2.7868, "step": 840 }, { "epoch": 0.1022864019253911, "grad_norm": 6.102560997009277, "learning_rate": 3.273637589062199e-05, "loss": 2.7755, "step": 850 }, { "epoch": 0.10348977135980746, "grad_norm": 4.372929573059082, "learning_rate": 3.312150972462931e-05, "loss": 2.823, "step": 860 }, { "epoch": 0.10469314079422383, "grad_norm": 3.2868309020996094, "learning_rate": 3.350664355863663e-05, "loss": 2.6892, "step": 870 }, { "epoch": 0.10589651022864019, "grad_norm": 4.598580837249756, "learning_rate": 3.389177739264395e-05, "loss": 2.8837, "step": 880 }, { "epoch": 0.10709987966305656, "grad_norm": 3.940325975418091, "learning_rate": 3.4276911226651265e-05, "loss": 2.8312, "step": 890 }, { "epoch": 0.10830324909747292, "grad_norm": 6.136560916900635, "learning_rate": 3.466204506065858e-05, "loss": 2.747, "step": 900 }, { "epoch": 0.1095066185318893, "grad_norm": 4.117788314819336, "learning_rate": 3.50471788946659e-05, "loss": 2.6159, "step": 910 }, { "epoch": 0.11070998796630566, "grad_norm": 4.032705307006836, "learning_rate": 3.5432312728673214e-05, "loss": 2.7615, "step": 920 }, { "epoch": 0.11191335740072202, "grad_norm": 5.094011306762695, "learning_rate": 3.581744656268054e-05, "loss": 2.7753, "step": 930 }, { "epoch": 0.11311672683513839, "grad_norm": 3.8412024974823, "learning_rate": 3.6202580396687854e-05, "loss": 2.7813, "step": 940 }, { "epoch": 0.11432009626955475, "grad_norm": 4.643109321594238, "learning_rate": 3.658771423069517e-05, "loss": 2.9567, "step": 950 }, { "epoch": 0.11552346570397112, "grad_norm": 4.502158164978027, "learning_rate": 3.6972848064702486e-05, "loss": 2.6247, "step": 960 }, { "epoch": 0.11672683513838748, "grad_norm": 2.5785179138183594, "learning_rate": 3.73579818987098e-05, "loss": 2.7178, "step": 970 }, { "epoch": 0.11793020457280386, "grad_norm": 4.689826011657715, "learning_rate": 3.774311573271712e-05, "loss": 2.8354, "step": 980 }, { "epoch": 0.11913357400722022, "grad_norm": 4.020382404327393, "learning_rate": 3.812824956672444e-05, "loss": 2.6598, "step": 990 }, { "epoch": 0.12033694344163658, "grad_norm": 5.820950031280518, "learning_rate": 3.851338340073176e-05, "loss": 2.8073, "step": 1000 }, { "epoch": 0.12154031287605295, "grad_norm": 3.5636305809020996, "learning_rate": 3.8898517234739074e-05, "loss": 2.5984, "step": 1010 }, { "epoch": 0.12274368231046931, "grad_norm": 3.0366146564483643, "learning_rate": 3.928365106874639e-05, "loss": 2.7126, "step": 1020 }, { "epoch": 0.12394705174488568, "grad_norm": 5.1952385902404785, "learning_rate": 3.966878490275371e-05, "loss": 2.7211, "step": 1030 }, { "epoch": 0.12515042117930206, "grad_norm": 4.054571151733398, "learning_rate": 4.005391873676102e-05, "loss": 2.6443, "step": 1040 }, { "epoch": 0.1263537906137184, "grad_norm": 6.402095794677734, "learning_rate": 4.0439052570768346e-05, "loss": 2.7943, "step": 1050 }, { "epoch": 0.12755716004813478, "grad_norm": 4.076451301574707, "learning_rate": 4.082418640477566e-05, "loss": 2.4575, "step": 1060 }, { "epoch": 0.12876052948255115, "grad_norm": 3.4446961879730225, "learning_rate": 4.120932023878298e-05, "loss": 2.6509, "step": 1070 }, { "epoch": 0.1299638989169675, "grad_norm": 5.1258625984191895, "learning_rate": 4.1594454072790295e-05, "loss": 2.6947, "step": 1080 }, { "epoch": 0.13116726835138387, "grad_norm": 4.072508335113525, "learning_rate": 4.197958790679761e-05, "loss": 2.7828, "step": 1090 }, { "epoch": 0.13237063778580024, "grad_norm": 6.007488250732422, "learning_rate": 4.236472174080493e-05, "loss": 2.8042, "step": 1100 }, { "epoch": 0.13357400722021662, "grad_norm": 4.070166110992432, "learning_rate": 4.274985557481225e-05, "loss": 2.6465, "step": 1110 }, { "epoch": 0.13477737665463296, "grad_norm": 3.5824296474456787, "learning_rate": 4.313498940881957e-05, "loss": 2.6448, "step": 1120 }, { "epoch": 0.13598074608904934, "grad_norm": 4.705931186676025, "learning_rate": 4.3520123242826883e-05, "loss": 2.6401, "step": 1130 }, { "epoch": 0.1371841155234657, "grad_norm": 3.6984827518463135, "learning_rate": 4.39052570768342e-05, "loss": 2.5397, "step": 1140 }, { "epoch": 0.13838748495788206, "grad_norm": 5.334888458251953, "learning_rate": 4.4290390910841516e-05, "loss": 2.5313, "step": 1150 }, { "epoch": 0.13959085439229843, "grad_norm": 4.262475490570068, "learning_rate": 4.467552474484883e-05, "loss": 2.6548, "step": 1160 }, { "epoch": 0.1407942238267148, "grad_norm": 3.3582048416137695, "learning_rate": 4.506065857885615e-05, "loss": 2.5978, "step": 1170 }, { "epoch": 0.14199759326113118, "grad_norm": 4.077620983123779, "learning_rate": 4.544579241286347e-05, "loss": 2.716, "step": 1180 }, { "epoch": 0.14320096269554752, "grad_norm": 3.881532669067383, "learning_rate": 4.5830926246870795e-05, "loss": 2.7427, "step": 1190 }, { "epoch": 0.1444043321299639, "grad_norm": 4.926413059234619, "learning_rate": 4.621606008087811e-05, "loss": 2.5799, "step": 1200 }, { "epoch": 0.14560770156438027, "grad_norm": 3.9430980682373047, "learning_rate": 4.660119391488543e-05, "loss": 2.4586, "step": 1210 }, { "epoch": 0.14681107099879662, "grad_norm": 2.823546886444092, "learning_rate": 4.6986327748892744e-05, "loss": 2.6815, "step": 1220 }, { "epoch": 0.148014440433213, "grad_norm": 4.250391483306885, "learning_rate": 4.737146158290006e-05, "loss": 2.5357, "step": 1230 }, { "epoch": 0.14921780986762936, "grad_norm": 5.145439624786377, "learning_rate": 4.7756595416907376e-05, "loss": 2.4226, "step": 1240 }, { "epoch": 0.15042117930204574, "grad_norm": 6.212322235107422, "learning_rate": 4.81417292509147e-05, "loss": 2.6047, "step": 1250 }, { "epoch": 0.15162454873646208, "grad_norm": 4.376655578613281, "learning_rate": 4.8526863084922016e-05, "loss": 2.3968, "step": 1260 }, { "epoch": 0.15282791817087846, "grad_norm": 3.185271978378296, "learning_rate": 4.891199691892933e-05, "loss": 2.6196, "step": 1270 }, { "epoch": 0.15403128760529483, "grad_norm": 4.398382663726807, "learning_rate": 4.929713075293665e-05, "loss": 2.5637, "step": 1280 }, { "epoch": 0.1552346570397112, "grad_norm": 3.7962944507598877, "learning_rate": 4.9682264586943964e-05, "loss": 2.4871, "step": 1290 }, { "epoch": 0.15643802647412755, "grad_norm": 5.663314342498779, "learning_rate": 5.006739842095128e-05, "loss": 2.6053, "step": 1300 }, { "epoch": 0.15764139590854392, "grad_norm": 4.08873987197876, "learning_rate": 5.04525322549586e-05, "loss": 2.3832, "step": 1310 }, { "epoch": 0.1588447653429603, "grad_norm": 3.060068130493164, "learning_rate": 5.083766608896592e-05, "loss": 2.5909, "step": 1320 }, { "epoch": 0.16004813477737664, "grad_norm": 3.5497021675109863, "learning_rate": 5.122279992297323e-05, "loss": 2.4943, "step": 1330 }, { "epoch": 0.16125150421179302, "grad_norm": 3.9688732624053955, "learning_rate": 5.160793375698055e-05, "loss": 2.4693, "step": 1340 }, { "epoch": 0.1624548736462094, "grad_norm": 5.243858337402344, "learning_rate": 5.1993067590987876e-05, "loss": 2.5234, "step": 1350 }, { "epoch": 0.16365824308062576, "grad_norm": 4.390367031097412, "learning_rate": 5.2378201424995185e-05, "loss": 2.5178, "step": 1360 }, { "epoch": 0.1648616125150421, "grad_norm": 2.778979778289795, "learning_rate": 5.276333525900251e-05, "loss": 2.7806, "step": 1370 }, { "epoch": 0.16606498194945848, "grad_norm": 5.424134254455566, "learning_rate": 5.3148469093009825e-05, "loss": 2.6223, "step": 1380 }, { "epoch": 0.16726835138387486, "grad_norm": 3.2314844131469727, "learning_rate": 5.353360292701715e-05, "loss": 2.4702, "step": 1390 }, { "epoch": 0.1684717208182912, "grad_norm": 5.493102550506592, "learning_rate": 5.391873676102446e-05, "loss": 2.6778, "step": 1400 }, { "epoch": 0.16967509025270758, "grad_norm": 4.03582239151001, "learning_rate": 5.430387059503178e-05, "loss": 2.4206, "step": 1410 }, { "epoch": 0.17087845968712395, "grad_norm": 3.435091018676758, "learning_rate": 5.468900442903909e-05, "loss": 2.7106, "step": 1420 }, { "epoch": 0.17208182912154033, "grad_norm": 4.965870380401611, "learning_rate": 5.507413826304641e-05, "loss": 2.4973, "step": 1430 }, { "epoch": 0.17328519855595667, "grad_norm": 3.4605584144592285, "learning_rate": 5.545927209705373e-05, "loss": 2.5029, "step": 1440 }, { "epoch": 0.17448856799037304, "grad_norm": 5.244032382965088, "learning_rate": 5.584440593106105e-05, "loss": 2.5749, "step": 1450 }, { "epoch": 0.17569193742478942, "grad_norm": 4.023014068603516, "learning_rate": 5.622953976506836e-05, "loss": 2.5202, "step": 1460 }, { "epoch": 0.17689530685920576, "grad_norm": 2.943232297897339, "learning_rate": 5.6614673599075685e-05, "loss": 2.6431, "step": 1470 }, { "epoch": 0.17809867629362214, "grad_norm": 5.903708457946777, "learning_rate": 5.6999807433082994e-05, "loss": 2.5852, "step": 1480 }, { "epoch": 0.1793020457280385, "grad_norm": 4.0138444900512695, "learning_rate": 5.738494126709032e-05, "loss": 2.4998, "step": 1490 }, { "epoch": 0.18050541516245489, "grad_norm": 4.759198188781738, "learning_rate": 5.7770075101097634e-05, "loss": 2.4394, "step": 1500 }, { "epoch": 0.18170878459687123, "grad_norm": 4.2235188484191895, "learning_rate": 5.815520893510496e-05, "loss": 2.6585, "step": 1510 }, { "epoch": 0.1829121540312876, "grad_norm": 3.335575580596924, "learning_rate": 5.8540342769112266e-05, "loss": 2.4832, "step": 1520 }, { "epoch": 0.18411552346570398, "grad_norm": 4.41403341293335, "learning_rate": 5.892547660311959e-05, "loss": 2.3476, "step": 1530 }, { "epoch": 0.18531889290012032, "grad_norm": 3.597169876098633, "learning_rate": 5.93106104371269e-05, "loss": 2.4265, "step": 1540 }, { "epoch": 0.1865222623345367, "grad_norm": 5.240777015686035, "learning_rate": 5.969574427113422e-05, "loss": 2.4832, "step": 1550 }, { "epoch": 0.18772563176895307, "grad_norm": 4.511013984680176, "learning_rate": 6.008087810514154e-05, "loss": 2.456, "step": 1560 }, { "epoch": 0.18892900120336945, "grad_norm": 3.002626895904541, "learning_rate": 6.046601193914886e-05, "loss": 2.3578, "step": 1570 }, { "epoch": 0.1901323706377858, "grad_norm": 4.632908821105957, "learning_rate": 6.085114577315617e-05, "loss": 2.4645, "step": 1580 }, { "epoch": 0.19133574007220217, "grad_norm": 3.4771511554718018, "learning_rate": 6.12362796071635e-05, "loss": 2.4285, "step": 1590 }, { "epoch": 0.19253910950661854, "grad_norm": 6.660020351409912, "learning_rate": 6.162141344117081e-05, "loss": 2.6111, "step": 1600 }, { "epoch": 0.19374247894103488, "grad_norm": 3.7338061332702637, "learning_rate": 6.200654727517813e-05, "loss": 2.4041, "step": 1610 }, { "epoch": 0.19494584837545126, "grad_norm": 3.1107709407806396, "learning_rate": 6.239168110918544e-05, "loss": 2.5913, "step": 1620 }, { "epoch": 0.19614921780986763, "grad_norm": 4.126433372497559, "learning_rate": 6.277681494319276e-05, "loss": 2.5915, "step": 1630 }, { "epoch": 0.197352587244284, "grad_norm": 4.266234874725342, "learning_rate": 6.316194877720008e-05, "loss": 2.3163, "step": 1640 }, { "epoch": 0.19855595667870035, "grad_norm": 5.36894416809082, "learning_rate": 6.354708261120739e-05, "loss": 2.5416, "step": 1650 }, { "epoch": 0.19975932611311673, "grad_norm": 3.78735613822937, "learning_rate": 6.393221644521471e-05, "loss": 2.463, "step": 1660 }, { "epoch": 0.2009626955475331, "grad_norm": 3.2801718711853027, "learning_rate": 6.431735027922204e-05, "loss": 2.5265, "step": 1670 }, { "epoch": 0.20216606498194944, "grad_norm": 4.672789096832275, "learning_rate": 6.470248411322935e-05, "loss": 2.3468, "step": 1680 }, { "epoch": 0.20336943441636582, "grad_norm": 4.022416591644287, "learning_rate": 6.508761794723667e-05, "loss": 2.4621, "step": 1690 }, { "epoch": 0.2045728038507822, "grad_norm": 5.079505443572998, "learning_rate": 6.547275178124399e-05, "loss": 2.6353, "step": 1700 }, { "epoch": 0.20577617328519857, "grad_norm": 4.310054779052734, "learning_rate": 6.58578856152513e-05, "loss": 2.3306, "step": 1710 }, { "epoch": 0.2069795427196149, "grad_norm": 2.7068967819213867, "learning_rate": 6.624301944925862e-05, "loss": 2.5586, "step": 1720 }, { "epoch": 0.20818291215403129, "grad_norm": 4.833555221557617, "learning_rate": 6.662815328326594e-05, "loss": 2.5028, "step": 1730 }, { "epoch": 0.20938628158844766, "grad_norm": 3.597747802734375, "learning_rate": 6.701328711727327e-05, "loss": 2.4376, "step": 1740 }, { "epoch": 0.21058965102286403, "grad_norm": 5.5854010581970215, "learning_rate": 6.739842095128057e-05, "loss": 2.5534, "step": 1750 }, { "epoch": 0.21179302045728038, "grad_norm": 3.6864216327667236, "learning_rate": 6.77835547852879e-05, "loss": 2.4125, "step": 1760 }, { "epoch": 0.21299638989169675, "grad_norm": 2.955962657928467, "learning_rate": 6.81686886192952e-05, "loss": 2.5775, "step": 1770 }, { "epoch": 0.21419975932611313, "grad_norm": 5.092808723449707, "learning_rate": 6.855382245330253e-05, "loss": 2.3907, "step": 1780 }, { "epoch": 0.21540312876052947, "grad_norm": 3.32344913482666, "learning_rate": 6.893895628730985e-05, "loss": 2.5718, "step": 1790 }, { "epoch": 0.21660649819494585, "grad_norm": 4.911040782928467, "learning_rate": 6.932409012131716e-05, "loss": 2.5209, "step": 1800 }, { "epoch": 0.21780986762936222, "grad_norm": 3.9270997047424316, "learning_rate": 6.970922395532448e-05, "loss": 2.4563, "step": 1810 }, { "epoch": 0.2190132370637786, "grad_norm": 2.8259575366973877, "learning_rate": 7.00943577893318e-05, "loss": 2.5424, "step": 1820 }, { "epoch": 0.22021660649819494, "grad_norm": 4.02173376083374, "learning_rate": 7.047949162333911e-05, "loss": 2.4887, "step": 1830 }, { "epoch": 0.2214199759326113, "grad_norm": 4.133205890655518, "learning_rate": 7.086462545734643e-05, "loss": 2.5443, "step": 1840 }, { "epoch": 0.2226233453670277, "grad_norm": 5.094078540802002, "learning_rate": 7.124975929135374e-05, "loss": 2.5202, "step": 1850 }, { "epoch": 0.22382671480144403, "grad_norm": 4.140336513519287, "learning_rate": 7.163489312536107e-05, "loss": 2.4474, "step": 1860 }, { "epoch": 0.2250300842358604, "grad_norm": 2.8555362224578857, "learning_rate": 7.202002695936838e-05, "loss": 2.3452, "step": 1870 }, { "epoch": 0.22623345367027678, "grad_norm": 4.063607692718506, "learning_rate": 7.240516079337571e-05, "loss": 2.3434, "step": 1880 }, { "epoch": 0.22743682310469315, "grad_norm": 3.575995683670044, "learning_rate": 7.279029462738301e-05, "loss": 2.2894, "step": 1890 }, { "epoch": 0.2286401925391095, "grad_norm": 4.515583515167236, "learning_rate": 7.317542846139034e-05, "loss": 2.4199, "step": 1900 }, { "epoch": 0.22984356197352587, "grad_norm": 4.865725994110107, "learning_rate": 7.356056229539766e-05, "loss": 2.2267, "step": 1910 }, { "epoch": 0.23104693140794225, "grad_norm": 2.9381701946258545, "learning_rate": 7.394569612940497e-05, "loss": 2.3357, "step": 1920 }, { "epoch": 0.2322503008423586, "grad_norm": 5.303056240081787, "learning_rate": 7.433082996341229e-05, "loss": 2.4184, "step": 1930 }, { "epoch": 0.23345367027677497, "grad_norm": 3.5984139442443848, "learning_rate": 7.47159637974196e-05, "loss": 2.3659, "step": 1940 }, { "epoch": 0.23465703971119134, "grad_norm": 5.596217632293701, "learning_rate": 7.510109763142692e-05, "loss": 2.4371, "step": 1950 }, { "epoch": 0.2358604091456077, "grad_norm": 4.290953159332275, "learning_rate": 7.548623146543424e-05, "loss": 2.2033, "step": 1960 }, { "epoch": 0.23706377858002406, "grad_norm": 3.4179747104644775, "learning_rate": 7.587136529944155e-05, "loss": 2.385, "step": 1970 }, { "epoch": 0.23826714801444043, "grad_norm": 3.991859197616577, "learning_rate": 7.625649913344888e-05, "loss": 2.3238, "step": 1980 }, { "epoch": 0.2394705174488568, "grad_norm": 3.4576947689056396, "learning_rate": 7.664163296745619e-05, "loss": 2.3791, "step": 1990 }, { "epoch": 0.24067388688327315, "grad_norm": 4.9071149826049805, "learning_rate": 7.702676680146352e-05, "loss": 2.3563, "step": 2000 }, { "epoch": 0.24187725631768953, "grad_norm": 3.402372360229492, "learning_rate": 7.741190063547082e-05, "loss": 2.3858, "step": 2010 }, { "epoch": 0.2430806257521059, "grad_norm": 2.9735138416290283, "learning_rate": 7.779703446947815e-05, "loss": 2.4769, "step": 2020 }, { "epoch": 0.24428399518652227, "grad_norm": 4.522787570953369, "learning_rate": 7.818216830348546e-05, "loss": 2.259, "step": 2030 }, { "epoch": 0.24548736462093862, "grad_norm": 3.260927200317383, "learning_rate": 7.856730213749278e-05, "loss": 2.4279, "step": 2040 }, { "epoch": 0.246690734055355, "grad_norm": 4.7217326164245605, "learning_rate": 7.895243597150011e-05, "loss": 2.3933, "step": 2050 }, { "epoch": 0.24789410348977137, "grad_norm": 4.655685901641846, "learning_rate": 7.933756980550741e-05, "loss": 2.3567, "step": 2060 }, { "epoch": 0.2490974729241877, "grad_norm": 3.0860252380371094, "learning_rate": 7.972270363951474e-05, "loss": 2.488, "step": 2070 }, { "epoch": 0.2503008423586041, "grad_norm": 5.234461784362793, "learning_rate": 8.010783747352205e-05, "loss": 2.2966, "step": 2080 }, { "epoch": 0.25150421179302046, "grad_norm": 3.013174057006836, "learning_rate": 8.049297130752938e-05, "loss": 2.4847, "step": 2090 }, { "epoch": 0.2527075812274368, "grad_norm": 5.257478713989258, "learning_rate": 8.087810514153669e-05, "loss": 2.3881, "step": 2100 }, { "epoch": 0.2539109506618532, "grad_norm": 4.6101789474487305, "learning_rate": 8.126323897554401e-05, "loss": 2.2844, "step": 2110 }, { "epoch": 0.25511432009626955, "grad_norm": 3.3464407920837402, "learning_rate": 8.164837280955133e-05, "loss": 2.4395, "step": 2120 }, { "epoch": 0.2563176895306859, "grad_norm": 4.377286434173584, "learning_rate": 8.203350664355864e-05, "loss": 2.0636, "step": 2130 }, { "epoch": 0.2575210589651023, "grad_norm": 3.287522554397583, "learning_rate": 8.241864047756596e-05, "loss": 2.3959, "step": 2140 }, { "epoch": 0.25872442839951865, "grad_norm": 4.91052770614624, "learning_rate": 8.280377431157327e-05, "loss": 2.5249, "step": 2150 }, { "epoch": 0.259927797833935, "grad_norm": 3.9140472412109375, "learning_rate": 8.318890814558059e-05, "loss": 2.3234, "step": 2160 }, { "epoch": 0.2611311672683514, "grad_norm": 2.5841314792633057, "learning_rate": 8.357404197958792e-05, "loss": 2.3596, "step": 2170 }, { "epoch": 0.26233453670276774, "grad_norm": 5.444457530975342, "learning_rate": 8.395917581359522e-05, "loss": 2.4814, "step": 2180 }, { "epoch": 0.26353790613718414, "grad_norm": 3.8762309551239014, "learning_rate": 8.434430964760255e-05, "loss": 2.2627, "step": 2190 }, { "epoch": 0.2647412755716005, "grad_norm": 5.452149868011475, "learning_rate": 8.472944348160986e-05, "loss": 2.5046, "step": 2200 }, { "epoch": 0.26594464500601683, "grad_norm": 5.255434513092041, "learning_rate": 8.511457731561719e-05, "loss": 2.1281, "step": 2210 }, { "epoch": 0.26714801444043323, "grad_norm": 2.345582962036133, "learning_rate": 8.54997111496245e-05, "loss": 2.3343, "step": 2220 }, { "epoch": 0.2683513838748496, "grad_norm": 5.703465938568115, "learning_rate": 8.588484498363182e-05, "loss": 2.4233, "step": 2230 }, { "epoch": 0.2695547533092659, "grad_norm": 3.678798198699951, "learning_rate": 8.626997881763913e-05, "loss": 2.1464, "step": 2240 }, { "epoch": 0.27075812274368233, "grad_norm": 5.3430023193359375, "learning_rate": 8.665511265164645e-05, "loss": 2.3755, "step": 2250 }, { "epoch": 0.2719614921780987, "grad_norm": 4.224210739135742, "learning_rate": 8.704024648565377e-05, "loss": 2.068, "step": 2260 }, { "epoch": 0.273164861612515, "grad_norm": 3.020923137664795, "learning_rate": 8.742538031966108e-05, "loss": 2.2785, "step": 2270 }, { "epoch": 0.2743682310469314, "grad_norm": 5.135770320892334, "learning_rate": 8.78105141536684e-05, "loss": 2.43, "step": 2280 }, { "epoch": 0.27557160048134777, "grad_norm": 3.6533546447753906, "learning_rate": 8.819564798767572e-05, "loss": 2.1872, "step": 2290 }, { "epoch": 0.2767749699157641, "grad_norm": 5.642998218536377, "learning_rate": 8.858078182168303e-05, "loss": 2.4434, "step": 2300 }, { "epoch": 0.2779783393501805, "grad_norm": 3.8212432861328125, "learning_rate": 8.896591565569036e-05, "loss": 2.3333, "step": 2310 }, { "epoch": 0.27918170878459686, "grad_norm": 2.579754114151001, "learning_rate": 8.935104948969766e-05, "loss": 2.4356, "step": 2320 }, { "epoch": 0.28038507821901326, "grad_norm": 4.244583606719971, "learning_rate": 8.9736183323705e-05, "loss": 2.4217, "step": 2330 }, { "epoch": 0.2815884476534296, "grad_norm": 3.4745070934295654, "learning_rate": 9.01213171577123e-05, "loss": 2.4493, "step": 2340 }, { "epoch": 0.28279181708784595, "grad_norm": 5.872068881988525, "learning_rate": 9.050645099171963e-05, "loss": 2.2935, "step": 2350 }, { "epoch": 0.28399518652226236, "grad_norm": 3.9975595474243164, "learning_rate": 9.089158482572694e-05, "loss": 2.184, "step": 2360 }, { "epoch": 0.2851985559566787, "grad_norm": 3.1640396118164062, "learning_rate": 9.127671865973426e-05, "loss": 2.4179, "step": 2370 }, { "epoch": 0.28640192539109505, "grad_norm": 4.480766773223877, "learning_rate": 9.166185249374159e-05, "loss": 2.0609, "step": 2380 }, { "epoch": 0.28760529482551145, "grad_norm": 3.7833733558654785, "learning_rate": 9.204698632774889e-05, "loss": 2.349, "step": 2390 }, { "epoch": 0.2888086642599278, "grad_norm": 6.1306352615356445, "learning_rate": 9.243212016175622e-05, "loss": 2.373, "step": 2400 }, { "epoch": 0.29001203369434414, "grad_norm": 4.305640697479248, "learning_rate": 9.281725399576352e-05, "loss": 2.2492, "step": 2410 }, { "epoch": 0.29121540312876054, "grad_norm": 2.8770713806152344, "learning_rate": 9.320238782977085e-05, "loss": 2.1706, "step": 2420 }, { "epoch": 0.2924187725631769, "grad_norm": 4.647495269775391, "learning_rate": 9.358752166377817e-05, "loss": 2.1634, "step": 2430 }, { "epoch": 0.29362214199759323, "grad_norm": 3.7371299266815186, "learning_rate": 9.397265549778549e-05, "loss": 2.3072, "step": 2440 }, { "epoch": 0.29482551143200963, "grad_norm": 5.267301559448242, "learning_rate": 9.43577893317928e-05, "loss": 2.514, "step": 2450 }, { "epoch": 0.296028880866426, "grad_norm": 4.16841459274292, "learning_rate": 9.474292316580012e-05, "loss": 2.085, "step": 2460 }, { "epoch": 0.2972322503008424, "grad_norm": 3.1590590476989746, "learning_rate": 9.512805699980744e-05, "loss": 2.3315, "step": 2470 }, { "epoch": 0.29843561973525873, "grad_norm": 4.402718544006348, "learning_rate": 9.551319083381475e-05, "loss": 2.2205, "step": 2480 }, { "epoch": 0.2996389891696751, "grad_norm": 3.7688190937042236, "learning_rate": 9.589832466782207e-05, "loss": 2.3368, "step": 2490 }, { "epoch": 0.3008423586040915, "grad_norm": 3.71555757522583, "learning_rate": 9.62834585018294e-05, "loss": 2.2572, "step": 2500 }, { "epoch": 0.3020457280385078, "grad_norm": 4.018877983093262, "learning_rate": 9.66685923358367e-05, "loss": 2.2546, "step": 2510 }, { "epoch": 0.30324909747292417, "grad_norm": 3.1480233669281006, "learning_rate": 9.705372616984403e-05, "loss": 2.4473, "step": 2520 }, { "epoch": 0.30445246690734057, "grad_norm": 4.867123603820801, "learning_rate": 9.743886000385133e-05, "loss": 2.2999, "step": 2530 }, { "epoch": 0.3056558363417569, "grad_norm": 3.555675745010376, "learning_rate": 9.782399383785866e-05, "loss": 2.3267, "step": 2540 }, { "epoch": 0.30685920577617326, "grad_norm": 5.9477362632751465, "learning_rate": 9.820912767186598e-05, "loss": 2.3903, "step": 2550 }, { "epoch": 0.30806257521058966, "grad_norm": 3.783031940460205, "learning_rate": 9.85942615058733e-05, "loss": 2.3415, "step": 2560 }, { "epoch": 0.309265944645006, "grad_norm": 3.087137222290039, "learning_rate": 9.897939533988061e-05, "loss": 2.4655, "step": 2570 }, { "epoch": 0.3104693140794224, "grad_norm": 5.595900058746338, "learning_rate": 9.936452917388793e-05, "loss": 2.3899, "step": 2580 }, { "epoch": 0.31167268351383876, "grad_norm": 3.25927734375, "learning_rate": 9.974966300789525e-05, "loss": 2.3068, "step": 2590 }, { "epoch": 0.3128760529482551, "grad_norm": 5.424354553222656, "learning_rate": 0.00010013479684190256, "loss": 2.4695, "step": 2600 }, { "epoch": 0.3140794223826715, "grad_norm": 3.853463888168335, "learning_rate": 0.00010051993067590988, "loss": 2.206, "step": 2610 }, { "epoch": 0.31528279181708785, "grad_norm": 3.2356932163238525, "learning_rate": 0.0001009050645099172, "loss": 2.1866, "step": 2620 }, { "epoch": 0.3164861612515042, "grad_norm": 4.676336288452148, "learning_rate": 0.00010129019834392452, "loss": 2.4018, "step": 2630 }, { "epoch": 0.3176895306859206, "grad_norm": 4.09110689163208, "learning_rate": 0.00010167533217793184, "loss": 2.1298, "step": 2640 }, { "epoch": 0.31889290012033694, "grad_norm": 4.968358993530273, "learning_rate": 0.00010206046601193914, "loss": 2.1658, "step": 2650 }, { "epoch": 0.3200962695547533, "grad_norm": 3.5447754859924316, "learning_rate": 0.00010244559984594646, "loss": 2.1519, "step": 2660 }, { "epoch": 0.3212996389891697, "grad_norm": 3.910879373550415, "learning_rate": 0.00010283073367995379, "loss": 2.5106, "step": 2670 }, { "epoch": 0.32250300842358604, "grad_norm": 4.198660373687744, "learning_rate": 0.0001032158675139611, "loss": 2.3276, "step": 2680 }, { "epoch": 0.3237063778580024, "grad_norm": 3.775038003921509, "learning_rate": 0.00010360100134796842, "loss": 2.1432, "step": 2690 }, { "epoch": 0.3249097472924188, "grad_norm": 5.7064900398254395, "learning_rate": 0.00010398613518197575, "loss": 2.3454, "step": 2700 }, { "epoch": 0.32611311672683513, "grad_norm": 4.214547157287598, "learning_rate": 0.00010437126901598307, "loss": 2.0752, "step": 2710 }, { "epoch": 0.32731648616125153, "grad_norm": 2.9629077911376953, "learning_rate": 0.00010475640284999037, "loss": 2.0854, "step": 2720 }, { "epoch": 0.3285198555956679, "grad_norm": 5.487660884857178, "learning_rate": 0.00010514153668399769, "loss": 2.279, "step": 2730 }, { "epoch": 0.3297232250300842, "grad_norm": 4.312379360198975, "learning_rate": 0.00010552667051800502, "loss": 2.289, "step": 2740 }, { "epoch": 0.3309265944645006, "grad_norm": 5.722112655639648, "learning_rate": 0.00010591180435201233, "loss": 2.447, "step": 2750 }, { "epoch": 0.33212996389891697, "grad_norm": 3.474245309829712, "learning_rate": 0.00010629693818601965, "loss": 2.2201, "step": 2760 }, { "epoch": 0.3333333333333333, "grad_norm": 2.9945054054260254, "learning_rate": 0.00010668207202002695, "loss": 2.0959, "step": 2770 }, { "epoch": 0.3345367027677497, "grad_norm": 3.9089252948760986, "learning_rate": 0.0001070672058540343, "loss": 2.1162, "step": 2780 }, { "epoch": 0.33574007220216606, "grad_norm": 3.8732986450195312, "learning_rate": 0.0001074523396880416, "loss": 2.4375, "step": 2790 }, { "epoch": 0.3369434416365824, "grad_norm": 4.811378002166748, "learning_rate": 0.00010783747352204891, "loss": 2.3234, "step": 2800 }, { "epoch": 0.3381468110709988, "grad_norm": 4.7229108810424805, "learning_rate": 0.00010822260735605623, "loss": 1.9816, "step": 2810 }, { "epoch": 0.33935018050541516, "grad_norm": 3.5141379833221436, "learning_rate": 0.00010860774119006356, "loss": 2.3154, "step": 2820 }, { "epoch": 0.3405535499398315, "grad_norm": 4.4615888595581055, "learning_rate": 0.00010899287502407088, "loss": 2.3536, "step": 2830 }, { "epoch": 0.3417569193742479, "grad_norm": 4.35577392578125, "learning_rate": 0.00010937800885807818, "loss": 2.0574, "step": 2840 }, { "epoch": 0.34296028880866425, "grad_norm": 5.919861793518066, "learning_rate": 0.0001097631426920855, "loss": 2.2634, "step": 2850 }, { "epoch": 0.34416365824308065, "grad_norm": 4.538342475891113, "learning_rate": 0.00011014827652609283, "loss": 2.0598, "step": 2860 }, { "epoch": 0.345367027677497, "grad_norm": 3.16560435295105, "learning_rate": 0.00011053341036010014, "loss": 2.2131, "step": 2870 }, { "epoch": 0.34657039711191334, "grad_norm": 4.720436096191406, "learning_rate": 0.00011091854419410746, "loss": 2.2599, "step": 2880 }, { "epoch": 0.34777376654632974, "grad_norm": 4.121572971343994, "learning_rate": 0.00011130367802811476, "loss": 2.0998, "step": 2890 }, { "epoch": 0.3489771359807461, "grad_norm": 4.9068732261657715, "learning_rate": 0.0001116888118621221, "loss": 2.1145, "step": 2900 }, { "epoch": 0.35018050541516244, "grad_norm": 4.335965156555176, "learning_rate": 0.00011207394569612941, "loss": 2.2301, "step": 2910 }, { "epoch": 0.35138387484957884, "grad_norm": 3.4327359199523926, "learning_rate": 0.00011245907953013672, "loss": 2.3188, "step": 2920 }, { "epoch": 0.3525872442839952, "grad_norm": 5.5908894538879395, "learning_rate": 0.00011284421336414404, "loss": 2.1288, "step": 2930 }, { "epoch": 0.35379061371841153, "grad_norm": 3.7546348571777344, "learning_rate": 0.00011322934719815137, "loss": 2.1982, "step": 2940 }, { "epoch": 0.35499398315282793, "grad_norm": 5.478588104248047, "learning_rate": 0.00011361448103215869, "loss": 2.1034, "step": 2950 }, { "epoch": 0.3561973525872443, "grad_norm": 4.286993503570557, "learning_rate": 0.00011399961486616599, "loss": 2.279, "step": 2960 }, { "epoch": 0.3574007220216607, "grad_norm": 3.498656749725342, "learning_rate": 0.0001143847487001733, "loss": 2.1784, "step": 2970 }, { "epoch": 0.358604091456077, "grad_norm": 4.852539539337158, "learning_rate": 0.00011476988253418063, "loss": 2.3248, "step": 2980 }, { "epoch": 0.35980746089049337, "grad_norm": 3.67238450050354, "learning_rate": 0.00011515501636818795, "loss": 2.0548, "step": 2990 }, { "epoch": 0.36101083032490977, "grad_norm": 5.410921573638916, "learning_rate": 0.00011554015020219527, "loss": 2.395, "step": 3000 }, { "epoch": 0.3622141997593261, "grad_norm": 4.061553478240967, "learning_rate": 0.00011592528403620257, "loss": 2.1706, "step": 3010 }, { "epoch": 0.36341756919374246, "grad_norm": 2.949221611022949, "learning_rate": 0.00011631041787020991, "loss": 2.3203, "step": 3020 }, { "epoch": 0.36462093862815886, "grad_norm": 4.697376251220703, "learning_rate": 0.00011669555170421722, "loss": 2.3691, "step": 3030 }, { "epoch": 0.3658243080625752, "grad_norm": 3.6829640865325928, "learning_rate": 0.00011708068553822453, "loss": 2.2573, "step": 3040 }, { "epoch": 0.36702767749699156, "grad_norm": 5.870731830596924, "learning_rate": 0.00011746581937223186, "loss": 2.3537, "step": 3050 }, { "epoch": 0.36823104693140796, "grad_norm": 3.713322877883911, "learning_rate": 0.00011785095320623918, "loss": 2.1367, "step": 3060 }, { "epoch": 0.3694344163658243, "grad_norm": 3.511606454849243, "learning_rate": 0.0001182360870402465, "loss": 2.2126, "step": 3070 }, { "epoch": 0.37063778580024065, "grad_norm": 6.14950704574585, "learning_rate": 0.0001186212208742538, "loss": 2.2284, "step": 3080 }, { "epoch": 0.37184115523465705, "grad_norm": 3.878574848175049, "learning_rate": 0.00011900635470826114, "loss": 2.1863, "step": 3090 }, { "epoch": 0.3730445246690734, "grad_norm": 7.3028059005737305, "learning_rate": 0.00011939148854226844, "loss": 2.2284, "step": 3100 }, { "epoch": 0.3742478941034898, "grad_norm": 8.766607284545898, "learning_rate": 0.00011977662237627576, "loss": 2.2654, "step": 3110 }, { "epoch": 0.37545126353790614, "grad_norm": 3.1363110542297363, "learning_rate": 0.00012016175621028308, "loss": 2.2604, "step": 3120 }, { "epoch": 0.3766546329723225, "grad_norm": 5.78159761428833, "learning_rate": 0.0001205468900442904, "loss": 2.2574, "step": 3130 }, { "epoch": 0.3778580024067389, "grad_norm": 3.4998180866241455, "learning_rate": 0.00012093202387829772, "loss": 2.0765, "step": 3140 }, { "epoch": 0.37906137184115524, "grad_norm": 5.023599147796631, "learning_rate": 0.00012131715771230503, "loss": 2.1099, "step": 3150 }, { "epoch": 0.3802647412755716, "grad_norm": 4.230167865753174, "learning_rate": 0.00012170229154631234, "loss": 2.068, "step": 3160 }, { "epoch": 0.381468110709988, "grad_norm": 3.3696231842041016, "learning_rate": 0.00012208742538031967, "loss": 2.1214, "step": 3170 }, { "epoch": 0.38267148014440433, "grad_norm": 4.373711109161377, "learning_rate": 0.000122472559214327, "loss": 2.0965, "step": 3180 }, { "epoch": 0.3838748495788207, "grad_norm": 3.5923421382904053, "learning_rate": 0.0001228576930483343, "loss": 2.1647, "step": 3190 }, { "epoch": 0.3850782190132371, "grad_norm": 5.419064044952393, "learning_rate": 0.00012324282688234162, "loss": 2.2482, "step": 3200 }, { "epoch": 0.3862815884476534, "grad_norm": 4.109050273895264, "learning_rate": 0.00012362796071634894, "loss": 2.1084, "step": 3210 }, { "epoch": 0.38748495788206977, "grad_norm": 3.0200486183166504, "learning_rate": 0.00012401309455035625, "loss": 2.0833, "step": 3220 }, { "epoch": 0.38868832731648617, "grad_norm": 5.205567836761475, "learning_rate": 0.00012439822838436357, "loss": 2.223, "step": 3230 }, { "epoch": 0.3898916967509025, "grad_norm": 3.1712558269500732, "learning_rate": 0.00012478336221837089, "loss": 1.9621, "step": 3240 }, { "epoch": 0.3910950661853189, "grad_norm": 6.77772331237793, "learning_rate": 0.0001251684960523782, "loss": 2.1003, "step": 3250 }, { "epoch": 0.39229843561973526, "grad_norm": 4.719583988189697, "learning_rate": 0.00012555362988638552, "loss": 2.3107, "step": 3260 }, { "epoch": 0.3935018050541516, "grad_norm": 2.557326316833496, "learning_rate": 0.00012593876372039283, "loss": 2.2969, "step": 3270 }, { "epoch": 0.394705174488568, "grad_norm": 3.8585081100463867, "learning_rate": 0.00012632389755440015, "loss": 2.2111, "step": 3280 }, { "epoch": 0.39590854392298436, "grad_norm": 3.757640838623047, "learning_rate": 0.0001267090313884075, "loss": 2.1898, "step": 3290 }, { "epoch": 0.3971119133574007, "grad_norm": 5.546092987060547, "learning_rate": 0.00012709416522241478, "loss": 2.2393, "step": 3300 }, { "epoch": 0.3983152827918171, "grad_norm": 3.8220839500427246, "learning_rate": 0.0001274792990564221, "loss": 2.1682, "step": 3310 }, { "epoch": 0.39951865222623345, "grad_norm": 3.2766096591949463, "learning_rate": 0.00012786443289042942, "loss": 2.2139, "step": 3320 }, { "epoch": 0.4007220216606498, "grad_norm": 5.108669757843018, "learning_rate": 0.00012824956672443676, "loss": 2.0244, "step": 3330 }, { "epoch": 0.4019253910950662, "grad_norm": 3.889655113220215, "learning_rate": 0.00012863470055844408, "loss": 2.1231, "step": 3340 }, { "epoch": 0.40312876052948254, "grad_norm": 5.427848815917969, "learning_rate": 0.00012901983439245136, "loss": 2.158, "step": 3350 }, { "epoch": 0.4043321299638989, "grad_norm": 3.919846773147583, "learning_rate": 0.0001294049682264587, "loss": 2.1693, "step": 3360 }, { "epoch": 0.4055354993983153, "grad_norm": 2.8813560009002686, "learning_rate": 0.00012979010206046602, "loss": 2.2194, "step": 3370 }, { "epoch": 0.40673886883273164, "grad_norm": 5.581686019897461, "learning_rate": 0.00013017523589447334, "loss": 2.2722, "step": 3380 }, { "epoch": 0.40794223826714804, "grad_norm": 3.22434401512146, "learning_rate": 0.00013056036972848066, "loss": 2.3132, "step": 3390 }, { "epoch": 0.4091456077015644, "grad_norm": 5.471015930175781, "learning_rate": 0.00013094550356248797, "loss": 2.3227, "step": 3400 }, { "epoch": 0.41034897713598073, "grad_norm": 4.294218063354492, "learning_rate": 0.0001313306373964953, "loss": 2.3273, "step": 3410 }, { "epoch": 0.41155234657039713, "grad_norm": 4.315289497375488, "learning_rate": 0.0001317157712305026, "loss": 2.0973, "step": 3420 }, { "epoch": 0.4127557160048135, "grad_norm": 4.89292573928833, "learning_rate": 0.00013210090506450992, "loss": 2.2171, "step": 3430 }, { "epoch": 0.4139590854392298, "grad_norm": 3.9335169792175293, "learning_rate": 0.00013248603889851724, "loss": 2.363, "step": 3440 }, { "epoch": 0.4151624548736462, "grad_norm": 5.29353666305542, "learning_rate": 0.00013287117273252455, "loss": 2.3166, "step": 3450 }, { "epoch": 0.41636582430806257, "grad_norm": 4.394747257232666, "learning_rate": 0.00013325630656653187, "loss": 2.1407, "step": 3460 }, { "epoch": 0.4175691937424789, "grad_norm": 2.707184314727783, "learning_rate": 0.0001336414404005392, "loss": 2.3482, "step": 3470 }, { "epoch": 0.4187725631768953, "grad_norm": 5.04945182800293, "learning_rate": 0.00013402657423454653, "loss": 2.2553, "step": 3480 }, { "epoch": 0.41997593261131166, "grad_norm": 3.828322410583496, "learning_rate": 0.00013441170806855382, "loss": 2.0523, "step": 3490 }, { "epoch": 0.42117930204572807, "grad_norm": 6.2465925216674805, "learning_rate": 0.00013479684190256114, "loss": 2.4015, "step": 3500 }, { "epoch": 0.4223826714801444, "grad_norm": 5.26687479019165, "learning_rate": 0.00013518197573656845, "loss": 2.117, "step": 3510 }, { "epoch": 0.42358604091456076, "grad_norm": 4.215747356414795, "learning_rate": 0.0001355671095705758, "loss": 2.2339, "step": 3520 }, { "epoch": 0.42478941034897716, "grad_norm": 4.485904216766357, "learning_rate": 0.0001359522434045831, "loss": 2.2601, "step": 3530 }, { "epoch": 0.4259927797833935, "grad_norm": 4.049141883850098, "learning_rate": 0.0001363373772385904, "loss": 2.0584, "step": 3540 }, { "epoch": 0.42719614921780985, "grad_norm": 5.23921537399292, "learning_rate": 0.00013672251107259772, "loss": 2.1419, "step": 3550 }, { "epoch": 0.42839951865222625, "grad_norm": 5.058278560638428, "learning_rate": 0.00013710764490660506, "loss": 2.1201, "step": 3560 }, { "epoch": 0.4296028880866426, "grad_norm": 5.548385143280029, "learning_rate": 0.00013749277874061238, "loss": 2.3672, "step": 3570 }, { "epoch": 0.43080625752105894, "grad_norm": 4.900745868682861, "learning_rate": 0.0001378779125746197, "loss": 2.2147, "step": 3580 }, { "epoch": 0.43200962695547535, "grad_norm": 3.4081718921661377, "learning_rate": 0.00013826304640862698, "loss": 2.1727, "step": 3590 }, { "epoch": 0.4332129963898917, "grad_norm": 5.376938343048096, "learning_rate": 0.00013864818024263433, "loss": 2.0224, "step": 3600 }, { "epoch": 0.43441636582430804, "grad_norm": 4.5283708572387695, "learning_rate": 0.00013903331407664164, "loss": 2.2583, "step": 3610 }, { "epoch": 0.43561973525872444, "grad_norm": 3.4518930912017822, "learning_rate": 0.00013941844791064896, "loss": 2.27, "step": 3620 }, { "epoch": 0.4368231046931408, "grad_norm": 5.660602569580078, "learning_rate": 0.00013980358174465628, "loss": 2.0659, "step": 3630 }, { "epoch": 0.4380264741275572, "grad_norm": 3.406203269958496, "learning_rate": 0.0001401887155786636, "loss": 2.2138, "step": 3640 }, { "epoch": 0.43922984356197353, "grad_norm": 5.18516731262207, "learning_rate": 0.0001405738494126709, "loss": 2.3397, "step": 3650 }, { "epoch": 0.4404332129963899, "grad_norm": 4.447347164154053, "learning_rate": 0.00014095898324667822, "loss": 1.9989, "step": 3660 }, { "epoch": 0.4416365824308063, "grad_norm": 3.6428494453430176, "learning_rate": 0.00014134411708068554, "loss": 2.2966, "step": 3670 }, { "epoch": 0.4428399518652226, "grad_norm": 5.201837539672852, "learning_rate": 0.00014172925091469286, "loss": 2.2646, "step": 3680 }, { "epoch": 0.44404332129963897, "grad_norm": 4.126829147338867, "learning_rate": 0.00014211438474870017, "loss": 2.4197, "step": 3690 }, { "epoch": 0.4452466907340554, "grad_norm": 6.108100414276123, "learning_rate": 0.0001424995185827075, "loss": 2.1061, "step": 3700 }, { "epoch": 0.4464500601684717, "grad_norm": 4.69503116607666, "learning_rate": 0.00014288465241671483, "loss": 2.1273, "step": 3710 }, { "epoch": 0.44765342960288806, "grad_norm": 2.9712235927581787, "learning_rate": 0.00014326978625072215, "loss": 2.1659, "step": 3720 }, { "epoch": 0.44885679903730447, "grad_norm": 4.880232334136963, "learning_rate": 0.00014365492008472944, "loss": 2.1072, "step": 3730 }, { "epoch": 0.4500601684717208, "grad_norm": 4.117298126220703, "learning_rate": 0.00014404005391873675, "loss": 2.132, "step": 3740 }, { "epoch": 0.45126353790613716, "grad_norm": 6.599010944366455, "learning_rate": 0.0001444251877527441, "loss": 2.2534, "step": 3750 }, { "epoch": 0.45246690734055356, "grad_norm": 4.913687705993652, "learning_rate": 0.00014481032158675141, "loss": 2.2075, "step": 3760 }, { "epoch": 0.4536702767749699, "grad_norm": 3.421839714050293, "learning_rate": 0.00014519545542075873, "loss": 2.0303, "step": 3770 }, { "epoch": 0.4548736462093863, "grad_norm": 4.960007667541504, "learning_rate": 0.00014558058925476602, "loss": 2.1867, "step": 3780 }, { "epoch": 0.45607701564380265, "grad_norm": 4.16343879699707, "learning_rate": 0.00014596572308877336, "loss": 2.2308, "step": 3790 }, { "epoch": 0.457280385078219, "grad_norm": 5.702826023101807, "learning_rate": 0.00014635085692278068, "loss": 2.183, "step": 3800 }, { "epoch": 0.4584837545126354, "grad_norm": 4.250916957855225, "learning_rate": 0.000146735990756788, "loss": 2.0379, "step": 3810 }, { "epoch": 0.45968712394705175, "grad_norm": 3.0171141624450684, "learning_rate": 0.0001471211245907953, "loss": 2.2479, "step": 3820 }, { "epoch": 0.4608904933814681, "grad_norm": 5.237485885620117, "learning_rate": 0.00014750625842480263, "loss": 2.1692, "step": 3830 }, { "epoch": 0.4620938628158845, "grad_norm": 4.266112804412842, "learning_rate": 0.00014789139225880994, "loss": 2.2255, "step": 3840 }, { "epoch": 0.46329723225030084, "grad_norm": 5.257189750671387, "learning_rate": 0.00014827652609281726, "loss": 2.0497, "step": 3850 }, { "epoch": 0.4645006016847172, "grad_norm": 3.7063605785369873, "learning_rate": 0.00014866165992682458, "loss": 2.1092, "step": 3860 }, { "epoch": 0.4657039711191336, "grad_norm": 3.2208852767944336, "learning_rate": 0.0001490467937608319, "loss": 2.2522, "step": 3870 }, { "epoch": 0.46690734055354993, "grad_norm": 5.302590370178223, "learning_rate": 0.0001494319275948392, "loss": 2.1958, "step": 3880 }, { "epoch": 0.4681107099879663, "grad_norm": 3.7522192001342773, "learning_rate": 0.00014981706142884653, "loss": 2.2607, "step": 3890 }, { "epoch": 0.4693140794223827, "grad_norm": 6.742669105529785, "learning_rate": 0.00015020219526285384, "loss": 2.4315, "step": 3900 }, { "epoch": 0.470517448856799, "grad_norm": 3.790123462677002, "learning_rate": 0.00015058732909686119, "loss": 2.0291, "step": 3910 }, { "epoch": 0.4717208182912154, "grad_norm": 3.7009456157684326, "learning_rate": 0.00015097246293086847, "loss": 2.2023, "step": 3920 }, { "epoch": 0.4729241877256318, "grad_norm": 5.0835089683532715, "learning_rate": 0.0001513575967648758, "loss": 2.1321, "step": 3930 }, { "epoch": 0.4741275571600481, "grad_norm": 3.5800023078918457, "learning_rate": 0.0001517427305988831, "loss": 2.1163, "step": 3940 }, { "epoch": 0.4753309265944645, "grad_norm": 5.33997917175293, "learning_rate": 0.00015212786443289045, "loss": 2.0387, "step": 3950 }, { "epoch": 0.47653429602888087, "grad_norm": 4.518845558166504, "learning_rate": 0.00015251299826689777, "loss": 2.0584, "step": 3960 }, { "epoch": 0.4777376654632972, "grad_norm": 3.687009334564209, "learning_rate": 0.00015289813210090506, "loss": 2.2745, "step": 3970 }, { "epoch": 0.4789410348977136, "grad_norm": 4.833267688751221, "learning_rate": 0.00015328326593491237, "loss": 2.3606, "step": 3980 }, { "epoch": 0.48014440433212996, "grad_norm": 3.7437093257904053, "learning_rate": 0.00015366839976891972, "loss": 2.2791, "step": 3990 }, { "epoch": 0.4813477737665463, "grad_norm": 5.243849754333496, "learning_rate": 0.00015405353360292703, "loss": 2.2659, "step": 4000 }, { "epoch": 0.4825511432009627, "grad_norm": 4.449324131011963, "learning_rate": 0.00015443866743693435, "loss": 2.2892, "step": 4010 }, { "epoch": 0.48375451263537905, "grad_norm": 3.0258562564849854, "learning_rate": 0.00015482380127094164, "loss": 1.9254, "step": 4020 }, { "epoch": 0.48495788206979545, "grad_norm": 7.307828426361084, "learning_rate": 0.00015520893510494898, "loss": 2.2143, "step": 4030 }, { "epoch": 0.4861612515042118, "grad_norm": 3.417057991027832, "learning_rate": 0.0001555940689389563, "loss": 2.1549, "step": 4040 }, { "epoch": 0.48736462093862815, "grad_norm": 6.394173622131348, "learning_rate": 0.00015597920277296361, "loss": 2.403, "step": 4050 }, { "epoch": 0.48856799037304455, "grad_norm": 4.979101181030273, "learning_rate": 0.00015636433660697093, "loss": 2.0301, "step": 4060 }, { "epoch": 0.4897713598074609, "grad_norm": 3.3656721115112305, "learning_rate": 0.00015674947044097825, "loss": 2.2258, "step": 4070 }, { "epoch": 0.49097472924187724, "grad_norm": 5.95798397064209, "learning_rate": 0.00015713460427498556, "loss": 2.4844, "step": 4080 }, { "epoch": 0.49217809867629364, "grad_norm": 3.4689602851867676, "learning_rate": 0.00015751973810899288, "loss": 2.0246, "step": 4090 }, { "epoch": 0.49338146811071, "grad_norm": 5.611385345458984, "learning_rate": 0.00015790487194300022, "loss": 2.1991, "step": 4100 }, { "epoch": 0.49458483754512633, "grad_norm": 4.616299152374268, "learning_rate": 0.0001582900057770075, "loss": 2.0429, "step": 4110 }, { "epoch": 0.49578820697954273, "grad_norm": 3.6518077850341797, "learning_rate": 0.00015867513961101483, "loss": 2.4328, "step": 4120 }, { "epoch": 0.4969915764139591, "grad_norm": 5.260908603668213, "learning_rate": 0.00015906027344502214, "loss": 2.2006, "step": 4130 }, { "epoch": 0.4981949458483754, "grad_norm": 3.864971160888672, "learning_rate": 0.0001594454072790295, "loss": 2.2522, "step": 4140 }, { "epoch": 0.4993983152827918, "grad_norm": 7.640333652496338, "learning_rate": 0.0001598305411130368, "loss": 2.2165, "step": 4150 }, { "epoch": 0.5006016847172082, "grad_norm": 4.940099716186523, "learning_rate": 0.0001602156749470441, "loss": 2.1367, "step": 4160 }, { "epoch": 0.5018050541516246, "grad_norm": 2.9353463649749756, "learning_rate": 0.0001606008087810514, "loss": 2.2242, "step": 4170 }, { "epoch": 0.5030084235860409, "grad_norm": 4.441645622253418, "learning_rate": 0.00016098594261505875, "loss": 2.244, "step": 4180 }, { "epoch": 0.5042117930204573, "grad_norm": 3.1046464443206787, "learning_rate": 0.00016137107644906607, "loss": 2.1284, "step": 4190 }, { "epoch": 0.5054151624548736, "grad_norm": 4.9619903564453125, "learning_rate": 0.00016175621028307339, "loss": 2.3206, "step": 4200 }, { "epoch": 0.50661853188929, "grad_norm": 4.747543811798096, "learning_rate": 0.00016214134411708067, "loss": 2.1086, "step": 4210 }, { "epoch": 0.5078219013237064, "grad_norm": 3.993290424346924, "learning_rate": 0.00016252647795108802, "loss": 2.2709, "step": 4220 }, { "epoch": 0.5090252707581228, "grad_norm": 4.79225492477417, "learning_rate": 0.00016291161178509533, "loss": 2.1154, "step": 4230 }, { "epoch": 0.5102286401925391, "grad_norm": 3.7596824169158936, "learning_rate": 0.00016329674561910265, "loss": 2.2315, "step": 4240 }, { "epoch": 0.5114320096269555, "grad_norm": 7.182191848754883, "learning_rate": 0.00016368187945310997, "loss": 2.2752, "step": 4250 }, { "epoch": 0.5126353790613718, "grad_norm": 3.6913094520568848, "learning_rate": 0.00016406701328711728, "loss": 2.1639, "step": 4260 }, { "epoch": 0.5138387484957883, "grad_norm": 3.209007501602173, "learning_rate": 0.0001644521471211246, "loss": 2.2458, "step": 4270 }, { "epoch": 0.5150421179302046, "grad_norm": 4.694979667663574, "learning_rate": 0.00016483728095513192, "loss": 2.3178, "step": 4280 }, { "epoch": 0.516245487364621, "grad_norm": 4.524529457092285, "learning_rate": 0.00016522241478913923, "loss": 2.0244, "step": 4290 }, { "epoch": 0.5174488567990373, "grad_norm": 5.364401340484619, "learning_rate": 0.00016560754862314655, "loss": 2.3895, "step": 4300 }, { "epoch": 0.5186522262334536, "grad_norm": 4.454987049102783, "learning_rate": 0.00016599268245715386, "loss": 2.1674, "step": 4310 }, { "epoch": 0.51985559566787, "grad_norm": 3.4239635467529297, "learning_rate": 0.00016637781629116118, "loss": 2.2167, "step": 4320 }, { "epoch": 0.5210589651022864, "grad_norm": 6.292297840118408, "learning_rate": 0.0001667629501251685, "loss": 2.3381, "step": 4330 }, { "epoch": 0.5222623345367028, "grad_norm": 6.415258407592773, "learning_rate": 0.00016714808395917584, "loss": 2.422, "step": 4340 }, { "epoch": 0.5234657039711191, "grad_norm": 5.473740577697754, "learning_rate": 0.00016753321779318313, "loss": 2.0074, "step": 4350 }, { "epoch": 0.5246690734055355, "grad_norm": 3.851654052734375, "learning_rate": 0.00016791835162719045, "loss": 2.1131, "step": 4360 }, { "epoch": 0.5258724428399518, "grad_norm": 2.981456995010376, "learning_rate": 0.0001683034854611978, "loss": 2.0493, "step": 4370 }, { "epoch": 0.5270758122743683, "grad_norm": 5.323829174041748, "learning_rate": 0.0001686886192952051, "loss": 2.1481, "step": 4380 }, { "epoch": 0.5282791817087846, "grad_norm": 3.30295991897583, "learning_rate": 0.00016907375312921242, "loss": 2.2369, "step": 4390 }, { "epoch": 0.529482551143201, "grad_norm": 5.954172134399414, "learning_rate": 0.0001694588869632197, "loss": 2.188, "step": 4400 }, { "epoch": 0.5306859205776173, "grad_norm": 4.6917595863342285, "learning_rate": 0.00016984402079722705, "loss": 2.1564, "step": 4410 }, { "epoch": 0.5318892900120337, "grad_norm": 4.059054851531982, "learning_rate": 0.00017022915463123437, "loss": 2.1733, "step": 4420 }, { "epoch": 0.53309265944645, "grad_norm": 4.693612575531006, "learning_rate": 0.0001706142884652417, "loss": 2.049, "step": 4430 }, { "epoch": 0.5342960288808665, "grad_norm": 3.6918413639068604, "learning_rate": 0.000170999422299249, "loss": 2.2558, "step": 4440 }, { "epoch": 0.5354993983152828, "grad_norm": 6.020305156707764, "learning_rate": 0.00017138455613325632, "loss": 2.2933, "step": 4450 }, { "epoch": 0.5367027677496992, "grad_norm": 4.025328636169434, "learning_rate": 0.00017176968996726364, "loss": 2.2216, "step": 4460 }, { "epoch": 0.5379061371841155, "grad_norm": 3.8254079818725586, "learning_rate": 0.00017215482380127095, "loss": 2.3882, "step": 4470 }, { "epoch": 0.5391095066185319, "grad_norm": 4.954911231994629, "learning_rate": 0.00017253995763527827, "loss": 2.3088, "step": 4480 }, { "epoch": 0.5403128760529483, "grad_norm": 4.960634708404541, "learning_rate": 0.00017292509146928558, "loss": 2.2734, "step": 4490 }, { "epoch": 0.5415162454873647, "grad_norm": 5.230152130126953, "learning_rate": 0.0001733102253032929, "loss": 2.1023, "step": 4500 }, { "epoch": 0.542719614921781, "grad_norm": 4.745293140411377, "learning_rate": 0.00017369535913730022, "loss": 2.2787, "step": 4510 }, { "epoch": 0.5439229843561973, "grad_norm": 3.2070212364196777, "learning_rate": 0.00017408049297130753, "loss": 2.4571, "step": 4520 }, { "epoch": 0.5451263537906137, "grad_norm": 4.792518138885498, "learning_rate": 0.00017446562680531488, "loss": 2.3549, "step": 4530 }, { "epoch": 0.54632972322503, "grad_norm": 4.161391735076904, "learning_rate": 0.00017485076063932217, "loss": 2.2024, "step": 4540 }, { "epoch": 0.5475330926594465, "grad_norm": 6.9995012283325195, "learning_rate": 0.00017523589447332948, "loss": 2.3532, "step": 4550 }, { "epoch": 0.5487364620938628, "grad_norm": 4.922509670257568, "learning_rate": 0.0001756210283073368, "loss": 2.0588, "step": 4560 }, { "epoch": 0.5499398315282792, "grad_norm": 5.178070068359375, "learning_rate": 0.00017600616214134414, "loss": 2.3225, "step": 4570 }, { "epoch": 0.5511432009626955, "grad_norm": 4.753723621368408, "learning_rate": 0.00017639129597535143, "loss": 2.1332, "step": 4580 }, { "epoch": 0.5523465703971119, "grad_norm": 4.199524402618408, "learning_rate": 0.00017677642980935875, "loss": 2.1207, "step": 4590 }, { "epoch": 0.5535499398315282, "grad_norm": 6.0876007080078125, "learning_rate": 0.00017716156364336606, "loss": 2.1182, "step": 4600 }, { "epoch": 0.5547533092659447, "grad_norm": 6.212006568908691, "learning_rate": 0.0001775466974773734, "loss": 2.2262, "step": 4610 }, { "epoch": 0.555956678700361, "grad_norm": 3.594230890274048, "learning_rate": 0.00017793183131138072, "loss": 2.2525, "step": 4620 }, { "epoch": 0.5571600481347774, "grad_norm": 5.565987586975098, "learning_rate": 0.000178316965145388, "loss": 2.0538, "step": 4630 }, { "epoch": 0.5583634175691937, "grad_norm": 5.315800666809082, "learning_rate": 0.00017870209897939533, "loss": 1.9768, "step": 4640 }, { "epoch": 0.5595667870036101, "grad_norm": 5.953965187072754, "learning_rate": 0.00017908723281340267, "loss": 2.4237, "step": 4650 }, { "epoch": 0.5607701564380265, "grad_norm": 4.337621212005615, "learning_rate": 0.00017947236664741, "loss": 1.9684, "step": 4660 }, { "epoch": 0.5619735258724429, "grad_norm": 3.0972256660461426, "learning_rate": 0.0001798575004814173, "loss": 2.2303, "step": 4670 }, { "epoch": 0.5631768953068592, "grad_norm": 4.8460893630981445, "learning_rate": 0.0001802426343154246, "loss": 2.3611, "step": 4680 }, { "epoch": 0.5643802647412756, "grad_norm": 3.7181291580200195, "learning_rate": 0.00018062776814943194, "loss": 2.2953, "step": 4690 }, { "epoch": 0.5655836341756919, "grad_norm": 6.203132152557373, "learning_rate": 0.00018101290198343925, "loss": 2.1678, "step": 4700 }, { "epoch": 0.5667870036101083, "grad_norm": 4.8794941902160645, "learning_rate": 0.00018139803581744657, "loss": 2.2847, "step": 4710 }, { "epoch": 0.5679903730445247, "grad_norm": 3.7808096408843994, "learning_rate": 0.0001817831696514539, "loss": 2.2479, "step": 4720 }, { "epoch": 0.5691937424789411, "grad_norm": 4.418076038360596, "learning_rate": 0.0001821683034854612, "loss": 2.2857, "step": 4730 }, { "epoch": 0.5703971119133574, "grad_norm": 3.5367400646209717, "learning_rate": 0.00018255343731946852, "loss": 2.3112, "step": 4740 }, { "epoch": 0.5716004813477737, "grad_norm": 6.713764667510986, "learning_rate": 0.00018293857115347584, "loss": 2.3626, "step": 4750 }, { "epoch": 0.5728038507821901, "grad_norm": 4.047953128814697, "learning_rate": 0.00018332370498748318, "loss": 2.2012, "step": 4760 }, { "epoch": 0.5740072202166066, "grad_norm": 3.482131242752075, "learning_rate": 0.00018370883882149047, "loss": 2.2053, "step": 4770 }, { "epoch": 0.5752105896510229, "grad_norm": 4.9270339012146, "learning_rate": 0.00018409397265549778, "loss": 2.2103, "step": 4780 }, { "epoch": 0.5764139590854392, "grad_norm": 4.626224517822266, "learning_rate": 0.0001844791064895051, "loss": 2.1866, "step": 4790 }, { "epoch": 0.5776173285198556, "grad_norm": 5.537825584411621, "learning_rate": 0.00018486424032351244, "loss": 2.2666, "step": 4800 }, { "epoch": 0.5788206979542719, "grad_norm": 4.913242340087891, "learning_rate": 0.00018524937415751976, "loss": 2.08, "step": 4810 }, { "epoch": 0.5800240673886883, "grad_norm": 4.09318733215332, "learning_rate": 0.00018563450799152705, "loss": 2.2275, "step": 4820 }, { "epoch": 0.5812274368231047, "grad_norm": 5.854251861572266, "learning_rate": 0.00018601964182553437, "loss": 2.2121, "step": 4830 }, { "epoch": 0.5824308062575211, "grad_norm": 4.641722679138184, "learning_rate": 0.0001864047756595417, "loss": 2.3117, "step": 4840 }, { "epoch": 0.5836341756919374, "grad_norm": 5.240549564361572, "learning_rate": 0.00018678990949354903, "loss": 2.3387, "step": 4850 }, { "epoch": 0.5848375451263538, "grad_norm": 5.191614627838135, "learning_rate": 0.00018717504332755634, "loss": 2.2299, "step": 4860 }, { "epoch": 0.5860409145607701, "grad_norm": 3.6426289081573486, "learning_rate": 0.00018756017716156363, "loss": 2.3794, "step": 4870 }, { "epoch": 0.5872442839951865, "grad_norm": 5.039050579071045, "learning_rate": 0.00018794531099557097, "loss": 2.1374, "step": 4880 }, { "epoch": 0.5884476534296029, "grad_norm": 5.175183296203613, "learning_rate": 0.0001883304448295783, "loss": 2.1648, "step": 4890 }, { "epoch": 0.5896510228640193, "grad_norm": 6.7789387702941895, "learning_rate": 0.0001887155786635856, "loss": 2.2178, "step": 4900 }, { "epoch": 0.5908543922984356, "grad_norm": 5.478110313415527, "learning_rate": 0.00018910071249759292, "loss": 2.2999, "step": 4910 }, { "epoch": 0.592057761732852, "grad_norm": 3.7008423805236816, "learning_rate": 0.00018948584633160024, "loss": 2.1704, "step": 4920 }, { "epoch": 0.5932611311672683, "grad_norm": 5.629617214202881, "learning_rate": 0.00018987098016560756, "loss": 2.2908, "step": 4930 }, { "epoch": 0.5944645006016848, "grad_norm": 3.8162500858306885, "learning_rate": 0.00019025611399961487, "loss": 2.3308, "step": 4940 }, { "epoch": 0.5956678700361011, "grad_norm": 5.998647689819336, "learning_rate": 0.0001906412478336222, "loss": 2.2056, "step": 4950 }, { "epoch": 0.5968712394705175, "grad_norm": 4.687641620635986, "learning_rate": 0.0001910263816676295, "loss": 2.11, "step": 4960 }, { "epoch": 0.5980746089049338, "grad_norm": 3.5128839015960693, "learning_rate": 0.00019141151550163682, "loss": 2.22, "step": 4970 }, { "epoch": 0.5992779783393501, "grad_norm": 5.303665637969971, "learning_rate": 0.00019179664933564414, "loss": 2.15, "step": 4980 }, { "epoch": 0.6004813477737665, "grad_norm": 3.855205535888672, "learning_rate": 0.00019218178316965145, "loss": 2.2459, "step": 4990 }, { "epoch": 0.601684717208183, "grad_norm": 5.189699649810791, "learning_rate": 0.0001925669170036588, "loss": 2.3758, "step": 5000 }, { "epoch": 0.6028880866425993, "grad_norm": 4.4476823806762695, "learning_rate": 0.00019295205083766609, "loss": 2.0236, "step": 5010 }, { "epoch": 0.6040914560770156, "grad_norm": 4.877256870269775, "learning_rate": 0.0001933371846716734, "loss": 2.2245, "step": 5020 }, { "epoch": 0.605294825511432, "grad_norm": 6.751873016357422, "learning_rate": 0.00019372231850568075, "loss": 2.3168, "step": 5030 }, { "epoch": 0.6064981949458483, "grad_norm": 3.8965351581573486, "learning_rate": 0.00019410745233968806, "loss": 2.2746, "step": 5040 }, { "epoch": 0.6077015643802648, "grad_norm": 5.491021156311035, "learning_rate": 0.00019449258617369538, "loss": 2.3079, "step": 5050 }, { "epoch": 0.6089049338146811, "grad_norm": 3.6740262508392334, "learning_rate": 0.00019487772000770267, "loss": 2.1978, "step": 5060 }, { "epoch": 0.6101083032490975, "grad_norm": 3.504803419113159, "learning_rate": 0.00019526285384171, "loss": 2.4303, "step": 5070 }, { "epoch": 0.6113116726835138, "grad_norm": 4.8340840339660645, "learning_rate": 0.00019564798767571733, "loss": 2.1761, "step": 5080 }, { "epoch": 0.6125150421179302, "grad_norm": 4.319956302642822, "learning_rate": 0.00019603312150972464, "loss": 2.1507, "step": 5090 }, { "epoch": 0.6137184115523465, "grad_norm": 6.505810260772705, "learning_rate": 0.00019641825534373196, "loss": 2.3415, "step": 5100 }, { "epoch": 0.614921780986763, "grad_norm": 5.259067058563232, "learning_rate": 0.00019680338917773928, "loss": 2.236, "step": 5110 }, { "epoch": 0.6161251504211793, "grad_norm": 3.3339712619781494, "learning_rate": 0.0001971885230117466, "loss": 2.3776, "step": 5120 }, { "epoch": 0.6173285198555957, "grad_norm": 5.416616916656494, "learning_rate": 0.0001975736568457539, "loss": 2.0913, "step": 5130 }, { "epoch": 0.618531889290012, "grad_norm": 3.966322183609009, "learning_rate": 0.00019795879067976123, "loss": 2.1771, "step": 5140 }, { "epoch": 0.6197352587244284, "grad_norm": 6.432679176330566, "learning_rate": 0.00019834392451376854, "loss": 2.3724, "step": 5150 }, { "epoch": 0.6209386281588448, "grad_norm": 3.8340489864349365, "learning_rate": 0.00019872905834777586, "loss": 2.0687, "step": 5160 }, { "epoch": 0.6221419975932612, "grad_norm": 3.3140413761138916, "learning_rate": 0.00019911419218178317, "loss": 2.3506, "step": 5170 }, { "epoch": 0.6233453670276775, "grad_norm": 5.370321273803711, "learning_rate": 0.0001994993260157905, "loss": 2.1462, "step": 5180 }, { "epoch": 0.6245487364620939, "grad_norm": 5.8055925369262695, "learning_rate": 0.00019988445984979783, "loss": 2.4433, "step": 5190 }, { "epoch": 0.6257521058965102, "grad_norm": 5.655837059020996, "learning_rate": 0.00019999999996454245, "loss": 2.3347, "step": 5200 }, { "epoch": 0.6269554753309265, "grad_norm": 5.972043514251709, "learning_rate": 0.00019999999979087286, "loss": 2.0611, "step": 5210 }, { "epoch": 0.628158844765343, "grad_norm": 3.918379306793213, "learning_rate": 0.00019999999947247852, "loss": 2.4486, "step": 5220 }, { "epoch": 0.6293622141997594, "grad_norm": 6.398720741271973, "learning_rate": 0.0001999999990093595, "loss": 2.1424, "step": 5230 }, { "epoch": 0.6305655836341757, "grad_norm": 4.77095890045166, "learning_rate": 0.00019999999840151583, "loss": 2.3111, "step": 5240 }, { "epoch": 0.631768953068592, "grad_norm": 6.796426773071289, "learning_rate": 0.00019999999764894747, "loss": 2.4209, "step": 5250 }, { "epoch": 0.6329723225030084, "grad_norm": 7.171083450317383, "learning_rate": 0.00019999999675165438, "loss": 2.3567, "step": 5260 }, { "epoch": 0.6341756919374247, "grad_norm": 3.3638195991516113, "learning_rate": 0.00019999999570963664, "loss": 2.0822, "step": 5270 }, { "epoch": 0.6353790613718412, "grad_norm": 5.260736465454102, "learning_rate": 0.0001999999945228942, "loss": 2.2258, "step": 5280 }, { "epoch": 0.6365824308062575, "grad_norm": 3.7317612171173096, "learning_rate": 0.00019999999319142707, "loss": 2.2966, "step": 5290 }, { "epoch": 0.6377858002406739, "grad_norm": 6.7515645027160645, "learning_rate": 0.0001999999917152353, "loss": 2.3575, "step": 5300 }, { "epoch": 0.6389891696750902, "grad_norm": 6.711028575897217, "learning_rate": 0.00019999999009431884, "loss": 2.2671, "step": 5310 }, { "epoch": 0.6401925391095066, "grad_norm": 3.392085552215576, "learning_rate": 0.00019999998832867768, "loss": 2.3201, "step": 5320 }, { "epoch": 0.641395908543923, "grad_norm": 6.115386009216309, "learning_rate": 0.00019999998641831185, "loss": 2.422, "step": 5330 }, { "epoch": 0.6425992779783394, "grad_norm": 3.648564100265503, "learning_rate": 0.00019999998436322138, "loss": 2.4774, "step": 5340 }, { "epoch": 0.6438026474127557, "grad_norm": 5.814185619354248, "learning_rate": 0.0001999999821634062, "loss": 2.4827, "step": 5350 }, { "epoch": 0.6450060168471721, "grad_norm": 4.432163715362549, "learning_rate": 0.0001999999798188664, "loss": 2.0722, "step": 5360 }, { "epoch": 0.6462093862815884, "grad_norm": 4.281014919281006, "learning_rate": 0.00019999997732960193, "loss": 2.2363, "step": 5370 }, { "epoch": 0.6474127557160048, "grad_norm": 7.3086066246032715, "learning_rate": 0.0001999999746956128, "loss": 2.1118, "step": 5380 }, { "epoch": 0.6486161251504212, "grad_norm": 4.297363758087158, "learning_rate": 0.000199999971916899, "loss": 2.2896, "step": 5390 }, { "epoch": 0.6498194945848376, "grad_norm": 5.7083048820495605, "learning_rate": 0.00019999996899346058, "loss": 2.3377, "step": 5400 }, { "epoch": 0.6510228640192539, "grad_norm": 4.992364883422852, "learning_rate": 0.00019999996592529748, "loss": 2.321, "step": 5410 }, { "epoch": 0.6522262334536703, "grad_norm": 3.141991138458252, "learning_rate": 0.00019999996271240978, "loss": 2.2665, "step": 5420 }, { "epoch": 0.6534296028880866, "grad_norm": 5.258912086486816, "learning_rate": 0.00019999995935479742, "loss": 2.3438, "step": 5430 }, { "epoch": 0.6546329723225031, "grad_norm": 4.900660514831543, "learning_rate": 0.00019999995585246044, "loss": 2.0806, "step": 5440 }, { "epoch": 0.6558363417569194, "grad_norm": 6.445699691772461, "learning_rate": 0.00019999995220539883, "loss": 2.4233, "step": 5450 }, { "epoch": 0.6570397111913358, "grad_norm": 4.976849555969238, "learning_rate": 0.00019999994841361259, "loss": 2.1497, "step": 5460 }, { "epoch": 0.6582430806257521, "grad_norm": 3.247731924057007, "learning_rate": 0.00019999994447710175, "loss": 2.4414, "step": 5470 }, { "epoch": 0.6594464500601684, "grad_norm": 5.598658084869385, "learning_rate": 0.0001999999403958663, "loss": 2.3372, "step": 5480 }, { "epoch": 0.6606498194945848, "grad_norm": 4.76513671875, "learning_rate": 0.00019999993616990624, "loss": 2.2463, "step": 5490 }, { "epoch": 0.6618531889290012, "grad_norm": 6.446969509124756, "learning_rate": 0.0001999999317992216, "loss": 2.3092, "step": 5500 }, { "epoch": 0.6630565583634176, "grad_norm": 4.556867599487305, "learning_rate": 0.00019999992728381236, "loss": 2.085, "step": 5510 }, { "epoch": 0.6642599277978339, "grad_norm": 3.7941901683807373, "learning_rate": 0.00019999992262367851, "loss": 2.3755, "step": 5520 }, { "epoch": 0.6654632972322503, "grad_norm": 4.857911586761475, "learning_rate": 0.0001999999178188201, "loss": 2.2633, "step": 5530 }, { "epoch": 0.6666666666666666, "grad_norm": 3.6080713272094727, "learning_rate": 0.00019999991286923715, "loss": 2.0844, "step": 5540 }, { "epoch": 0.6678700361010831, "grad_norm": 6.234077453613281, "learning_rate": 0.0001999999077749296, "loss": 2.0721, "step": 5550 }, { "epoch": 0.6690734055354994, "grad_norm": 4.698109149932861, "learning_rate": 0.00019999990253589752, "loss": 2.2047, "step": 5560 }, { "epoch": 0.6702767749699158, "grad_norm": 3.984668731689453, "learning_rate": 0.00019999989715214087, "loss": 2.1264, "step": 5570 }, { "epoch": 0.6714801444043321, "grad_norm": 5.2652997970581055, "learning_rate": 0.0001999998916236597, "loss": 2.2808, "step": 5580 }, { "epoch": 0.6726835138387485, "grad_norm": 3.186023712158203, "learning_rate": 0.00019999988595045397, "loss": 2.305, "step": 5590 }, { "epoch": 0.6738868832731648, "grad_norm": 6.204652786254883, "learning_rate": 0.00019999988013252374, "loss": 2.2583, "step": 5600 }, { "epoch": 0.6750902527075813, "grad_norm": 5.521336555480957, "learning_rate": 0.00019999987416986897, "loss": 2.2934, "step": 5610 }, { "epoch": 0.6762936221419976, "grad_norm": 3.410998582839966, "learning_rate": 0.00019999986806248972, "loss": 2.1635, "step": 5620 }, { "epoch": 0.677496991576414, "grad_norm": 5.09378719329834, "learning_rate": 0.00019999986181038593, "loss": 2.1468, "step": 5630 }, { "epoch": 0.6787003610108303, "grad_norm": 3.893676280975342, "learning_rate": 0.00019999985541355772, "loss": 2.1738, "step": 5640 }, { "epoch": 0.6799037304452467, "grad_norm": 5.081244468688965, "learning_rate": 0.000199999848872005, "loss": 2.1461, "step": 5650 }, { "epoch": 0.681107099879663, "grad_norm": 4.685601711273193, "learning_rate": 0.00019999984218572778, "loss": 2.179, "step": 5660 }, { "epoch": 0.6823104693140795, "grad_norm": 2.979675054550171, "learning_rate": 0.00019999983535472614, "loss": 2.3638, "step": 5670 }, { "epoch": 0.6835138387484958, "grad_norm": 7.460003852844238, "learning_rate": 0.00019999982837900002, "loss": 2.2055, "step": 5680 }, { "epoch": 0.6847172081829122, "grad_norm": 4.4045538902282715, "learning_rate": 0.00019999982125854947, "loss": 2.0693, "step": 5690 }, { "epoch": 0.6859205776173285, "grad_norm": 5.677947044372559, "learning_rate": 0.0001999998139933745, "loss": 2.4137, "step": 5700 }, { "epoch": 0.6871239470517448, "grad_norm": 4.1160888671875, "learning_rate": 0.00019999980658347508, "loss": 2.2255, "step": 5710 }, { "epoch": 0.6883273164861613, "grad_norm": 4.297820091247559, "learning_rate": 0.00019999979902885127, "loss": 2.1269, "step": 5720 }, { "epoch": 0.6895306859205776, "grad_norm": 4.658998012542725, "learning_rate": 0.00019999979132950306, "loss": 2.3559, "step": 5730 }, { "epoch": 0.690734055354994, "grad_norm": 3.612006425857544, "learning_rate": 0.00019999978348543044, "loss": 2.2368, "step": 5740 }, { "epoch": 0.6919374247894103, "grad_norm": 5.840548515319824, "learning_rate": 0.00019999977549663348, "loss": 2.1801, "step": 5750 }, { "epoch": 0.6931407942238267, "grad_norm": 4.654530048370361, "learning_rate": 0.00019999976736311215, "loss": 2.2934, "step": 5760 }, { "epoch": 0.694344163658243, "grad_norm": 3.498783826828003, "learning_rate": 0.00019999975908486646, "loss": 2.0995, "step": 5770 }, { "epoch": 0.6955475330926595, "grad_norm": 5.616212368011475, "learning_rate": 0.00019999975066189643, "loss": 2.1991, "step": 5780 }, { "epoch": 0.6967509025270758, "grad_norm": 4.105483531951904, "learning_rate": 0.00019999974209420208, "loss": 2.3082, "step": 5790 }, { "epoch": 0.6979542719614922, "grad_norm": 6.668478012084961, "learning_rate": 0.00019999973338178338, "loss": 2.3708, "step": 5800 }, { "epoch": 0.6991576413959085, "grad_norm": 5.358325481414795, "learning_rate": 0.00019999972452464042, "loss": 2.2654, "step": 5810 }, { "epoch": 0.7003610108303249, "grad_norm": 3.947488784790039, "learning_rate": 0.00019999971552277313, "loss": 2.3865, "step": 5820 }, { "epoch": 0.7015643802647413, "grad_norm": 5.846844673156738, "learning_rate": 0.0001999997063761816, "loss": 2.2507, "step": 5830 }, { "epoch": 0.7027677496991577, "grad_norm": 3.6681337356567383, "learning_rate": 0.0001999996970848658, "loss": 2.1866, "step": 5840 }, { "epoch": 0.703971119133574, "grad_norm": 7.481490135192871, "learning_rate": 0.00019999968764882573, "loss": 2.29, "step": 5850 }, { "epoch": 0.7051744885679904, "grad_norm": 4.638186931610107, "learning_rate": 0.00019999967806806144, "loss": 2.1477, "step": 5860 }, { "epoch": 0.7063778580024067, "grad_norm": 3.310225486755371, "learning_rate": 0.0001999996683425729, "loss": 2.228, "step": 5870 }, { "epoch": 0.7075812274368231, "grad_norm": 5.039708614349365, "learning_rate": 0.0001999996584723602, "loss": 2.1469, "step": 5880 }, { "epoch": 0.7087845968712395, "grad_norm": 3.915811061859131, "learning_rate": 0.00019999964845742328, "loss": 2.2734, "step": 5890 }, { "epoch": 0.7099879663056559, "grad_norm": 6.468316555023193, "learning_rate": 0.00019999963829776216, "loss": 2.1917, "step": 5900 }, { "epoch": 0.7111913357400722, "grad_norm": 4.221285820007324, "learning_rate": 0.00019999962799337689, "loss": 2.1898, "step": 5910 }, { "epoch": 0.7123947051744886, "grad_norm": 3.1073007583618164, "learning_rate": 0.00019999961754426745, "loss": 2.1003, "step": 5920 }, { "epoch": 0.7135980746089049, "grad_norm": 5.455918788909912, "learning_rate": 0.00019999960695043392, "loss": 2.3484, "step": 5930 }, { "epoch": 0.7148014440433214, "grad_norm": 4.238619327545166, "learning_rate": 0.00019999959621187622, "loss": 2.1655, "step": 5940 }, { "epoch": 0.7160048134777377, "grad_norm": 5.345495700836182, "learning_rate": 0.00019999958532859445, "loss": 2.3268, "step": 5950 }, { "epoch": 0.717208182912154, "grad_norm": 4.478791236877441, "learning_rate": 0.00019999957430058855, "loss": 2.1228, "step": 5960 }, { "epoch": 0.7184115523465704, "grad_norm": 5.313588619232178, "learning_rate": 0.0001999995631278586, "loss": 2.1756, "step": 5970 }, { "epoch": 0.7196149217809867, "grad_norm": 5.476967811584473, "learning_rate": 0.00019999955181040463, "loss": 2.2398, "step": 5980 }, { "epoch": 0.7208182912154031, "grad_norm": 4.409827709197998, "learning_rate": 0.00019999954034822658, "loss": 2.2433, "step": 5990 }, { "epoch": 0.7220216606498195, "grad_norm": 6.954128265380859, "learning_rate": 0.0001999995287413245, "loss": 2.2805, "step": 6000 }, { "epoch": 0.7232250300842359, "grad_norm": 4.963774681091309, "learning_rate": 0.00019999951698969844, "loss": 2.0606, "step": 6010 }, { "epoch": 0.7244283995186522, "grad_norm": 3.1691906452178955, "learning_rate": 0.00019999950509334838, "loss": 2.2873, "step": 6020 }, { "epoch": 0.7256317689530686, "grad_norm": 5.281974792480469, "learning_rate": 0.00019999949305227435, "loss": 2.4136, "step": 6030 }, { "epoch": 0.7268351383874849, "grad_norm": 4.72628116607666, "learning_rate": 0.00019999948086647632, "loss": 2.264, "step": 6040 }, { "epoch": 0.7280385078219013, "grad_norm": 5.913517951965332, "learning_rate": 0.00019999946853595439, "loss": 2.2188, "step": 6050 }, { "epoch": 0.7292418772563177, "grad_norm": 4.688936233520508, "learning_rate": 0.00019999945606070856, "loss": 2.0428, "step": 6060 }, { "epoch": 0.7304452466907341, "grad_norm": 3.245565414428711, "learning_rate": 0.0001999994434407388, "loss": 2.3174, "step": 6070 }, { "epoch": 0.7316486161251504, "grad_norm": 4.206037998199463, "learning_rate": 0.0001999994306760452, "loss": 2.0825, "step": 6080 }, { "epoch": 0.7328519855595668, "grad_norm": 3.948371648788452, "learning_rate": 0.00019999941776662768, "loss": 2.2976, "step": 6090 }, { "epoch": 0.7340553549939831, "grad_norm": 6.331472873687744, "learning_rate": 0.00019999940471248634, "loss": 2.3223, "step": 6100 }, { "epoch": 0.7352587244283996, "grad_norm": 4.219931125640869, "learning_rate": 0.00019999939151362116, "loss": 2.1751, "step": 6110 }, { "epoch": 0.7364620938628159, "grad_norm": 3.756378173828125, "learning_rate": 0.00019999937817003215, "loss": 2.216, "step": 6120 }, { "epoch": 0.7376654632972323, "grad_norm": 4.940024375915527, "learning_rate": 0.0001999993646817194, "loss": 2.3813, "step": 6130 }, { "epoch": 0.7388688327316486, "grad_norm": 4.085123062133789, "learning_rate": 0.00019999935104868285, "loss": 2.3866, "step": 6140 }, { "epoch": 0.740072202166065, "grad_norm": 7.11965274810791, "learning_rate": 0.00019999933727092256, "loss": 2.6845, "step": 6150 }, { "epoch": 0.7412755716004813, "grad_norm": 5.067838668823242, "learning_rate": 0.0001999993233484385, "loss": 2.228, "step": 6160 }, { "epoch": 0.7424789410348978, "grad_norm": 4.1947808265686035, "learning_rate": 0.0001999993092812308, "loss": 2.297, "step": 6170 }, { "epoch": 0.7436823104693141, "grad_norm": 5.650806903839111, "learning_rate": 0.00019999929506929936, "loss": 2.324, "step": 6180 }, { "epoch": 0.7448856799037304, "grad_norm": 4.293774127960205, "learning_rate": 0.00019999928071264429, "loss": 2.1975, "step": 6190 }, { "epoch": 0.7460890493381468, "grad_norm": 6.2978315353393555, "learning_rate": 0.00019999926621126554, "loss": 2.3262, "step": 6200 }, { "epoch": 0.7472924187725631, "grad_norm": 5.376358985900879, "learning_rate": 0.00019999925156516317, "loss": 1.9778, "step": 6210 }, { "epoch": 0.7484957882069796, "grad_norm": 3.434368848800659, "learning_rate": 0.00019999923677433722, "loss": 2.3768, "step": 6220 }, { "epoch": 0.7496991576413959, "grad_norm": 4.872026443481445, "learning_rate": 0.00019999922183878762, "loss": 2.1686, "step": 6230 }, { "epoch": 0.7509025270758123, "grad_norm": 4.505314350128174, "learning_rate": 0.00019999920675851452, "loss": 2.2798, "step": 6240 }, { "epoch": 0.7521058965102286, "grad_norm": 7.478349208831787, "learning_rate": 0.00019999919153351785, "loss": 2.1283, "step": 6250 }, { "epoch": 0.753309265944645, "grad_norm": 5.635042667388916, "learning_rate": 0.00019999917616379768, "loss": 2.1289, "step": 6260 }, { "epoch": 0.7545126353790613, "grad_norm": 3.7895894050598145, "learning_rate": 0.000199999160649354, "loss": 2.206, "step": 6270 }, { "epoch": 0.7557160048134778, "grad_norm": 4.9072065353393555, "learning_rate": 0.00019999914499018684, "loss": 2.5128, "step": 6280 }, { "epoch": 0.7569193742478941, "grad_norm": 4.7990899085998535, "learning_rate": 0.00019999912918629623, "loss": 2.2511, "step": 6290 }, { "epoch": 0.7581227436823105, "grad_norm": 6.253719329833984, "learning_rate": 0.00019999911323768222, "loss": 2.3181, "step": 6300 }, { "epoch": 0.7593261131167268, "grad_norm": 4.957713603973389, "learning_rate": 0.00019999909714434475, "loss": 2.1288, "step": 6310 }, { "epoch": 0.7605294825511432, "grad_norm": 4.218201637268066, "learning_rate": 0.00019999908090628394, "loss": 2.1878, "step": 6320 }, { "epoch": 0.7617328519855595, "grad_norm": 5.5473432540893555, "learning_rate": 0.00019999906452349974, "loss": 2.0831, "step": 6330 }, { "epoch": 0.762936221419976, "grad_norm": 4.122081279754639, "learning_rate": 0.00019999904799599224, "loss": 2.2415, "step": 6340 }, { "epoch": 0.7641395908543923, "grad_norm": 5.6238555908203125, "learning_rate": 0.00019999903132376142, "loss": 2.1524, "step": 6350 }, { "epoch": 0.7653429602888087, "grad_norm": 4.484196662902832, "learning_rate": 0.00019999901450680732, "loss": 1.9236, "step": 6360 }, { "epoch": 0.766546329723225, "grad_norm": 3.451127290725708, "learning_rate": 0.00019999899754512992, "loss": 2.1563, "step": 6370 }, { "epoch": 0.7677496991576414, "grad_norm": 4.9223408699035645, "learning_rate": 0.0001999989804387293, "loss": 2.3576, "step": 6380 }, { "epoch": 0.7689530685920578, "grad_norm": 5.084750175476074, "learning_rate": 0.0001999989631876055, "loss": 2.1415, "step": 6390 }, { "epoch": 0.7701564380264742, "grad_norm": 6.764801979064941, "learning_rate": 0.00019999894579175847, "loss": 2.262, "step": 6400 }, { "epoch": 0.7713598074608905, "grad_norm": 5.669933795928955, "learning_rate": 0.00019999892825118827, "loss": 2.0838, "step": 6410 }, { "epoch": 0.7725631768953068, "grad_norm": 3.124565362930298, "learning_rate": 0.00019999891056589496, "loss": 2.2526, "step": 6420 }, { "epoch": 0.7737665463297232, "grad_norm": 5.475543022155762, "learning_rate": 0.0001999988927358785, "loss": 2.192, "step": 6430 }, { "epoch": 0.7749699157641395, "grad_norm": 4.148515701293945, "learning_rate": 0.000199998874761139, "loss": 2.2276, "step": 6440 }, { "epoch": 0.776173285198556, "grad_norm": 5.512551784515381, "learning_rate": 0.00019999885664167644, "loss": 2.1467, "step": 6450 }, { "epoch": 0.7773766546329723, "grad_norm": 6.695659160614014, "learning_rate": 0.0001999988383774908, "loss": 2.007, "step": 6460 }, { "epoch": 0.7785800240673887, "grad_norm": 3.3527722358703613, "learning_rate": 0.0001999988199685822, "loss": 2.0665, "step": 6470 }, { "epoch": 0.779783393501805, "grad_norm": 4.57830286026001, "learning_rate": 0.00019999880141495057, "loss": 2.1413, "step": 6480 }, { "epoch": 0.7809867629362214, "grad_norm": 4.720484733581543, "learning_rate": 0.00019999878271659602, "loss": 2.275, "step": 6490 }, { "epoch": 0.7821901323706378, "grad_norm": 6.585578441619873, "learning_rate": 0.00019999876387351853, "loss": 2.3317, "step": 6500 }, { "epoch": 0.7833935018050542, "grad_norm": 4.008000373840332, "learning_rate": 0.00019999874488571813, "loss": 2.0169, "step": 6510 }, { "epoch": 0.7845968712394705, "grad_norm": 3.417729377746582, "learning_rate": 0.0001999987257531949, "loss": 2.3291, "step": 6520 }, { "epoch": 0.7858002406738869, "grad_norm": 5.9073710441589355, "learning_rate": 0.0001999987064759488, "loss": 2.368, "step": 6530 }, { "epoch": 0.7870036101083032, "grad_norm": 3.870997190475464, "learning_rate": 0.00019999868705397988, "loss": 1.9916, "step": 6540 }, { "epoch": 0.7882069795427196, "grad_norm": 5.63076639175415, "learning_rate": 0.00019999866748728816, "loss": 2.2776, "step": 6550 }, { "epoch": 0.789410348977136, "grad_norm": 4.0659613609313965, "learning_rate": 0.0001999986477758737, "loss": 2.1265, "step": 6560 }, { "epoch": 0.7906137184115524, "grad_norm": 4.5430145263671875, "learning_rate": 0.00019999862791973648, "loss": 2.353, "step": 6570 }, { "epoch": 0.7918170878459687, "grad_norm": 6.095127582550049, "learning_rate": 0.00019999860791887657, "loss": 2.1899, "step": 6580 }, { "epoch": 0.7930204572803851, "grad_norm": 4.152281284332275, "learning_rate": 0.000199998587773294, "loss": 2.2989, "step": 6590 }, { "epoch": 0.7942238267148014, "grad_norm": 5.399205684661865, "learning_rate": 0.00019999856748298877, "loss": 2.436, "step": 6600 }, { "epoch": 0.7954271961492179, "grad_norm": 4.7240095138549805, "learning_rate": 0.00019999854704796092, "loss": 2.2057, "step": 6610 }, { "epoch": 0.7966305655836342, "grad_norm": 3.8349926471710205, "learning_rate": 0.0001999985264682105, "loss": 2.1351, "step": 6620 }, { "epoch": 0.7978339350180506, "grad_norm": 5.826876163482666, "learning_rate": 0.0001999985057437375, "loss": 2.3605, "step": 6630 }, { "epoch": 0.7990373044524669, "grad_norm": 3.7913665771484375, "learning_rate": 0.00019999848487454197, "loss": 2.1542, "step": 6640 }, { "epoch": 0.8002406738868832, "grad_norm": 5.375580787658691, "learning_rate": 0.000199998463860624, "loss": 2.0194, "step": 6650 }, { "epoch": 0.8014440433212996, "grad_norm": 5.2479166984558105, "learning_rate": 0.0001999984427019835, "loss": 2.1781, "step": 6660 }, { "epoch": 0.802647412755716, "grad_norm": 4.459693908691406, "learning_rate": 0.00019999842139862062, "loss": 2.3453, "step": 6670 }, { "epoch": 0.8038507821901324, "grad_norm": 5.684396266937256, "learning_rate": 0.0001999983999505353, "loss": 2.3262, "step": 6680 }, { "epoch": 0.8050541516245487, "grad_norm": 4.626715660095215, "learning_rate": 0.00019999837835772759, "loss": 2.2126, "step": 6690 }, { "epoch": 0.8062575210589651, "grad_norm": 6.133049964904785, "learning_rate": 0.00019999835662019757, "loss": 2.2424, "step": 6700 }, { "epoch": 0.8074608904933814, "grad_norm": 4.258509159088135, "learning_rate": 0.0001999983347379452, "loss": 1.99, "step": 6710 }, { "epoch": 0.8086642599277978, "grad_norm": 3.301567554473877, "learning_rate": 0.0001999983127109706, "loss": 2.1121, "step": 6720 }, { "epoch": 0.8098676293622142, "grad_norm": 5.264902591705322, "learning_rate": 0.00019999829053927374, "loss": 2.1922, "step": 6730 }, { "epoch": 0.8110709987966306, "grad_norm": 3.8825745582580566, "learning_rate": 0.00019999826822285466, "loss": 2.1548, "step": 6740 }, { "epoch": 0.8122743682310469, "grad_norm": 5.56144905090332, "learning_rate": 0.00019999824576171337, "loss": 2.3099, "step": 6750 }, { "epoch": 0.8134777376654633, "grad_norm": 4.648536205291748, "learning_rate": 0.00019999822315584998, "loss": 2.091, "step": 6760 }, { "epoch": 0.8146811070998796, "grad_norm": 3.6888937950134277, "learning_rate": 0.00019999820040526444, "loss": 2.1071, "step": 6770 }, { "epoch": 0.8158844765342961, "grad_norm": 4.9940314292907715, "learning_rate": 0.00019999817750995683, "loss": 2.1822, "step": 6780 }, { "epoch": 0.8170878459687124, "grad_norm": 4.372590065002441, "learning_rate": 0.00019999815446992717, "loss": 2.2962, "step": 6790 }, { "epoch": 0.8182912154031288, "grad_norm": 5.009139060974121, "learning_rate": 0.00019999813128517547, "loss": 2.3153, "step": 6800 }, { "epoch": 0.8194945848375451, "grad_norm": 4.939500331878662, "learning_rate": 0.00019999810795570178, "loss": 2.2014, "step": 6810 }, { "epoch": 0.8206979542719615, "grad_norm": 4.308764457702637, "learning_rate": 0.00019999808448150616, "loss": 2.1361, "step": 6820 }, { "epoch": 0.8219013237063778, "grad_norm": 6.462900161743164, "learning_rate": 0.0001999980608625886, "loss": 2.1952, "step": 6830 }, { "epoch": 0.8231046931407943, "grad_norm": 5.643688201904297, "learning_rate": 0.0001999980370989492, "loss": 2.0308, "step": 6840 }, { "epoch": 0.8243080625752106, "grad_norm": 6.7328104972839355, "learning_rate": 0.00019999801319058792, "loss": 2.11, "step": 6850 }, { "epoch": 0.825511432009627, "grad_norm": 5.818809986114502, "learning_rate": 0.00019999798913750483, "loss": 1.944, "step": 6860 }, { "epoch": 0.8267148014440433, "grad_norm": 3.0272462368011475, "learning_rate": 0.00019999796493969996, "loss": 2.1284, "step": 6870 }, { "epoch": 0.8279181708784596, "grad_norm": 6.05486536026001, "learning_rate": 0.00019999794059717335, "loss": 2.267, "step": 6880 }, { "epoch": 0.8291215403128761, "grad_norm": 3.904543161392212, "learning_rate": 0.00019999791610992505, "loss": 2.2204, "step": 6890 }, { "epoch": 0.8303249097472925, "grad_norm": 5.603869915008545, "learning_rate": 0.00019999789147795506, "loss": 2.2056, "step": 6900 }, { "epoch": 0.8315282791817088, "grad_norm": 5.127676963806152, "learning_rate": 0.00019999786670126343, "loss": 2.2686, "step": 6910 }, { "epoch": 0.8327316486161251, "grad_norm": 3.6282613277435303, "learning_rate": 0.0001999978417798502, "loss": 2.2868, "step": 6920 }, { "epoch": 0.8339350180505415, "grad_norm": 5.270694732666016, "learning_rate": 0.0001999978167137154, "loss": 2.2679, "step": 6930 }, { "epoch": 0.8351383874849578, "grad_norm": 3.836949110031128, "learning_rate": 0.00019999779150285907, "loss": 2.2726, "step": 6940 }, { "epoch": 0.8363417569193743, "grad_norm": 5.457431793212891, "learning_rate": 0.00019999776614728124, "loss": 2.2211, "step": 6950 }, { "epoch": 0.8375451263537906, "grad_norm": 5.116433620452881, "learning_rate": 0.00019999774064698197, "loss": 2.2302, "step": 6960 }, { "epoch": 0.838748495788207, "grad_norm": 3.554002285003662, "learning_rate": 0.00019999771500196127, "loss": 2.4411, "step": 6970 }, { "epoch": 0.8399518652226233, "grad_norm": 6.70152473449707, "learning_rate": 0.00019999768921221919, "loss": 2.1905, "step": 6980 }, { "epoch": 0.8411552346570397, "grad_norm": 4.895505428314209, "learning_rate": 0.0001999976632777558, "loss": 2.4138, "step": 6990 }, { "epoch": 0.8423586040914561, "grad_norm": 6.639925003051758, "learning_rate": 0.00019999763719857105, "loss": 2.3516, "step": 7000 }, { "epoch": 0.8435619735258725, "grad_norm": 4.094420433044434, "learning_rate": 0.00019999761097466508, "loss": 2.1786, "step": 7010 }, { "epoch": 0.8447653429602888, "grad_norm": 4.961806774139404, "learning_rate": 0.00019999758460603783, "loss": 2.2067, "step": 7020 }, { "epoch": 0.8459687123947052, "grad_norm": 4.95901346206665, "learning_rate": 0.0001999975580926894, "loss": 2.1451, "step": 7030 }, { "epoch": 0.8471720818291215, "grad_norm": 4.088491439819336, "learning_rate": 0.00019999753143461982, "loss": 2.1258, "step": 7040 }, { "epoch": 0.8483754512635379, "grad_norm": 5.4671525955200195, "learning_rate": 0.00019999750463182912, "loss": 2.2137, "step": 7050 }, { "epoch": 0.8495788206979543, "grad_norm": 4.36328649520874, "learning_rate": 0.00019999747768431736, "loss": 2.0348, "step": 7060 }, { "epoch": 0.8507821901323707, "grad_norm": 3.508300542831421, "learning_rate": 0.00019999745059208455, "loss": 2.0429, "step": 7070 }, { "epoch": 0.851985559566787, "grad_norm": 5.880530834197998, "learning_rate": 0.00019999742335513073, "loss": 2.1445, "step": 7080 }, { "epoch": 0.8531889290012034, "grad_norm": 4.054240703582764, "learning_rate": 0.00019999739597345597, "loss": 2.1468, "step": 7090 }, { "epoch": 0.8543922984356197, "grad_norm": 7.524439334869385, "learning_rate": 0.00019999736844706029, "loss": 2.3211, "step": 7100 }, { "epoch": 0.855595667870036, "grad_norm": 4.426852703094482, "learning_rate": 0.0001999973407759437, "loss": 2.0554, "step": 7110 }, { "epoch": 0.8567990373044525, "grad_norm": 3.4823224544525146, "learning_rate": 0.00019999731296010628, "loss": 2.2347, "step": 7120 }, { "epoch": 0.8580024067388689, "grad_norm": 5.5711894035339355, "learning_rate": 0.0001999972849995481, "loss": 2.1123, "step": 7130 }, { "epoch": 0.8592057761732852, "grad_norm": 3.9582128524780273, "learning_rate": 0.00019999725689426908, "loss": 2.0099, "step": 7140 }, { "epoch": 0.8604091456077015, "grad_norm": 5.960570335388184, "learning_rate": 0.00019999722864426941, "loss": 2.2012, "step": 7150 }, { "epoch": 0.8616125150421179, "grad_norm": 5.0911126136779785, "learning_rate": 0.00019999720024954903, "loss": 2.112, "step": 7160 }, { "epoch": 0.8628158844765343, "grad_norm": 4.357975482940674, "learning_rate": 0.00019999717171010804, "loss": 2.2499, "step": 7170 }, { "epoch": 0.8640192539109507, "grad_norm": 5.801352500915527, "learning_rate": 0.0001999971430259464, "loss": 2.2858, "step": 7180 }, { "epoch": 0.865222623345367, "grad_norm": 4.905974388122559, "learning_rate": 0.00019999711419706426, "loss": 2.1957, "step": 7190 }, { "epoch": 0.8664259927797834, "grad_norm": 6.450088977813721, "learning_rate": 0.00019999708522346158, "loss": 2.3496, "step": 7200 }, { "epoch": 0.8676293622141997, "grad_norm": 5.748147487640381, "learning_rate": 0.00019999705610513843, "loss": 2.2512, "step": 7210 }, { "epoch": 0.8688327316486161, "grad_norm": 3.711469888687134, "learning_rate": 0.00019999702684209486, "loss": 2.2549, "step": 7220 }, { "epoch": 0.8700361010830325, "grad_norm": 6.755938529968262, "learning_rate": 0.0001999969974343309, "loss": 2.4081, "step": 7230 }, { "epoch": 0.8712394705174489, "grad_norm": 5.398969650268555, "learning_rate": 0.0001999969678818466, "loss": 2.2644, "step": 7240 }, { "epoch": 0.8724428399518652, "grad_norm": 6.106263160705566, "learning_rate": 0.000199996938184642, "loss": 2.387, "step": 7250 }, { "epoch": 0.8736462093862816, "grad_norm": 4.6583709716796875, "learning_rate": 0.00019999690834271715, "loss": 2.0932, "step": 7260 }, { "epoch": 0.8748495788206979, "grad_norm": 2.823810338973999, "learning_rate": 0.0001999968783560721, "loss": 2.1311, "step": 7270 }, { "epoch": 0.8760529482551144, "grad_norm": 5.98362398147583, "learning_rate": 0.0001999968482247068, "loss": 2.1842, "step": 7280 }, { "epoch": 0.8772563176895307, "grad_norm": 3.847651720046997, "learning_rate": 0.00019999681794862143, "loss": 2.2082, "step": 7290 }, { "epoch": 0.8784596871239471, "grad_norm": 8.450338363647461, "learning_rate": 0.00019999678752781602, "loss": 2.2407, "step": 7300 }, { "epoch": 0.8796630565583634, "grad_norm": 6.108689785003662, "learning_rate": 0.0001999967569622905, "loss": 2.1711, "step": 7310 }, { "epoch": 0.8808664259927798, "grad_norm": 3.1826913356781006, "learning_rate": 0.000199996726252045, "loss": 2.3579, "step": 7320 }, { "epoch": 0.8820697954271961, "grad_norm": 4.625455856323242, "learning_rate": 0.00019999669539707955, "loss": 2.0504, "step": 7330 }, { "epoch": 0.8832731648616126, "grad_norm": 5.346113681793213, "learning_rate": 0.0001999966643973942, "loss": 2.075, "step": 7340 }, { "epoch": 0.8844765342960289, "grad_norm": 5.861973285675049, "learning_rate": 0.00019999663325298897, "loss": 2.0196, "step": 7350 }, { "epoch": 0.8856799037304453, "grad_norm": 4.9203972816467285, "learning_rate": 0.00019999660196386396, "loss": 2.0732, "step": 7360 }, { "epoch": 0.8868832731648616, "grad_norm": 3.6702864170074463, "learning_rate": 0.00019999657053001917, "loss": 2.1506, "step": 7370 }, { "epoch": 0.8880866425992779, "grad_norm": 4.6571431159973145, "learning_rate": 0.00019999653895145462, "loss": 2.2488, "step": 7380 }, { "epoch": 0.8892900120336944, "grad_norm": 4.352538585662842, "learning_rate": 0.00019999650722817044, "loss": 2.3038, "step": 7390 }, { "epoch": 0.8904933814681107, "grad_norm": 7.005280494689941, "learning_rate": 0.0001999964753601666, "loss": 2.4323, "step": 7400 }, { "epoch": 0.8916967509025271, "grad_norm": 4.320887088775635, "learning_rate": 0.00019999644334744315, "loss": 2.2703, "step": 7410 }, { "epoch": 0.8929001203369434, "grad_norm": 3.865842580795288, "learning_rate": 0.00019999641119000015, "loss": 2.3429, "step": 7420 }, { "epoch": 0.8941034897713598, "grad_norm": 5.025181293487549, "learning_rate": 0.00019999637888783772, "loss": 2.0387, "step": 7430 }, { "epoch": 0.8953068592057761, "grad_norm": 5.02504301071167, "learning_rate": 0.00019999634644095578, "loss": 2.357, "step": 7440 }, { "epoch": 0.8965102286401926, "grad_norm": 6.646202087402344, "learning_rate": 0.0001999963138493545, "loss": 2.4796, "step": 7450 }, { "epoch": 0.8977135980746089, "grad_norm": 4.324987888336182, "learning_rate": 0.0001999962811130338, "loss": 2.2562, "step": 7460 }, { "epoch": 0.8989169675090253, "grad_norm": 3.8772072792053223, "learning_rate": 0.00019999624823199383, "loss": 2.0955, "step": 7470 }, { "epoch": 0.9001203369434416, "grad_norm": 5.725499153137207, "learning_rate": 0.0001999962152062346, "loss": 2.0614, "step": 7480 }, { "epoch": 0.901323706377858, "grad_norm": 4.010166645050049, "learning_rate": 0.00019999618203575615, "loss": 2.168, "step": 7490 }, { "epoch": 0.9025270758122743, "grad_norm": 6.155289173126221, "learning_rate": 0.00019999614872055852, "loss": 2.3418, "step": 7500 }, { "epoch": 0.9037304452466908, "grad_norm": 4.861774444580078, "learning_rate": 0.0001999961152606418, "loss": 2.0682, "step": 7510 }, { "epoch": 0.9049338146811071, "grad_norm": 4.0041823387146, "learning_rate": 0.000199996081656006, "loss": 2.0419, "step": 7520 }, { "epoch": 0.9061371841155235, "grad_norm": 7.270376682281494, "learning_rate": 0.00019999604790665117, "loss": 1.976, "step": 7530 }, { "epoch": 0.9073405535499398, "grad_norm": 4.057343006134033, "learning_rate": 0.0001999960140125774, "loss": 2.3294, "step": 7540 }, { "epoch": 0.9085439229843562, "grad_norm": 5.586544036865234, "learning_rate": 0.00019999597997378467, "loss": 2.2769, "step": 7550 }, { "epoch": 0.9097472924187726, "grad_norm": 4.569085121154785, "learning_rate": 0.0001999959457902731, "loss": 2.2375, "step": 7560 }, { "epoch": 0.910950661853189, "grad_norm": 3.8890581130981445, "learning_rate": 0.0001999959114620427, "loss": 2.1112, "step": 7570 }, { "epoch": 0.9121540312876053, "grad_norm": 6.173394203186035, "learning_rate": 0.00019999587698909355, "loss": 2.1902, "step": 7580 }, { "epoch": 0.9133574007220217, "grad_norm": 5.014927387237549, "learning_rate": 0.00019999584237142566, "loss": 2.1683, "step": 7590 }, { "epoch": 0.914560770156438, "grad_norm": 5.847899913787842, "learning_rate": 0.0001999958076090391, "loss": 2.3996, "step": 7600 }, { "epoch": 0.9157641395908543, "grad_norm": 5.280930995941162, "learning_rate": 0.00019999577270193392, "loss": 2.0922, "step": 7610 }, { "epoch": 0.9169675090252708, "grad_norm": 3.5625391006469727, "learning_rate": 0.00019999573765011011, "loss": 2.1937, "step": 7620 }, { "epoch": 0.9181708784596871, "grad_norm": 4.805070400238037, "learning_rate": 0.00019999570245356787, "loss": 2.1327, "step": 7630 }, { "epoch": 0.9193742478941035, "grad_norm": 5.546348571777344, "learning_rate": 0.0001999956671123071, "loss": 2.2091, "step": 7640 }, { "epoch": 0.9205776173285198, "grad_norm": 7.768810749053955, "learning_rate": 0.00019999563162632794, "loss": 2.5238, "step": 7650 }, { "epoch": 0.9217809867629362, "grad_norm": 5.4269561767578125, "learning_rate": 0.00019999559599563044, "loss": 2.0429, "step": 7660 }, { "epoch": 0.9229843561973526, "grad_norm": 4.423208713531494, "learning_rate": 0.00019999556022021458, "loss": 2.3963, "step": 7670 }, { "epoch": 0.924187725631769, "grad_norm": 4.935546398162842, "learning_rate": 0.00019999552430008044, "loss": 2.1661, "step": 7680 }, { "epoch": 0.9253910950661853, "grad_norm": 4.095705986022949, "learning_rate": 0.00019999548823522814, "loss": 1.9773, "step": 7690 }, { "epoch": 0.9265944645006017, "grad_norm": 6.3755340576171875, "learning_rate": 0.00019999545202565764, "loss": 2.0078, "step": 7700 }, { "epoch": 0.927797833935018, "grad_norm": 5.321127891540527, "learning_rate": 0.00019999541567136905, "loss": 2.2288, "step": 7710 }, { "epoch": 0.9290012033694344, "grad_norm": 3.6329126358032227, "learning_rate": 0.0001999953791723624, "loss": 2.1282, "step": 7720 }, { "epoch": 0.9302045728038508, "grad_norm": 7.373484134674072, "learning_rate": 0.00019999534252863776, "loss": 2.215, "step": 7730 }, { "epoch": 0.9314079422382672, "grad_norm": 4.53382682800293, "learning_rate": 0.0001999953057401952, "loss": 2.2439, "step": 7740 }, { "epoch": 0.9326113116726835, "grad_norm": 5.549760341644287, "learning_rate": 0.00019999526880703467, "loss": 2.4746, "step": 7750 }, { "epoch": 0.9338146811070999, "grad_norm": 5.585546016693115, "learning_rate": 0.00019999523172915633, "loss": 2.1636, "step": 7760 }, { "epoch": 0.9350180505415162, "grad_norm": 4.102782726287842, "learning_rate": 0.0001999951945065602, "loss": 2.2475, "step": 7770 }, { "epoch": 0.9362214199759326, "grad_norm": 5.998604774475098, "learning_rate": 0.00019999515713924637, "loss": 2.262, "step": 7780 }, { "epoch": 0.937424789410349, "grad_norm": 4.92228364944458, "learning_rate": 0.00019999511962721483, "loss": 2.1335, "step": 7790 }, { "epoch": 0.9386281588447654, "grad_norm": 6.664450168609619, "learning_rate": 0.0001999950819704657, "loss": 2.1727, "step": 7800 }, { "epoch": 0.9398315282791817, "grad_norm": 4.493361949920654, "learning_rate": 0.00019999504416899893, "loss": 2.0277, "step": 7810 }, { "epoch": 0.941034897713598, "grad_norm": 3.9881207942962646, "learning_rate": 0.00019999500622281467, "loss": 1.8935, "step": 7820 }, { "epoch": 0.9422382671480144, "grad_norm": 5.744288921356201, "learning_rate": 0.00019999496813191296, "loss": 2.204, "step": 7830 }, { "epoch": 0.9434416365824309, "grad_norm": 3.685032367706299, "learning_rate": 0.00019999492989629386, "loss": 2.3238, "step": 7840 }, { "epoch": 0.9446450060168472, "grad_norm": 6.588531970977783, "learning_rate": 0.00019999489151595738, "loss": 2.0914, "step": 7850 }, { "epoch": 0.9458483754512635, "grad_norm": 5.450845718383789, "learning_rate": 0.0001999948529909036, "loss": 1.9164, "step": 7860 }, { "epoch": 0.9470517448856799, "grad_norm": 3.3784396648406982, "learning_rate": 0.0001999948143211326, "loss": 2.0871, "step": 7870 }, { "epoch": 0.9482551143200962, "grad_norm": 6.645673751831055, "learning_rate": 0.0001999947755066444, "loss": 2.1626, "step": 7880 }, { "epoch": 0.9494584837545126, "grad_norm": 3.890578508377075, "learning_rate": 0.0001999947365474391, "loss": 2.1516, "step": 7890 }, { "epoch": 0.950661853188929, "grad_norm": 6.042820453643799, "learning_rate": 0.0001999946974435167, "loss": 2.4586, "step": 7900 }, { "epoch": 0.9518652226233454, "grad_norm": 4.843379974365234, "learning_rate": 0.0001999946581948773, "loss": 2.0618, "step": 7910 }, { "epoch": 0.9530685920577617, "grad_norm": 3.511101484298706, "learning_rate": 0.00019999461880152093, "loss": 2.3543, "step": 7920 }, { "epoch": 0.9542719614921781, "grad_norm": 5.819178104400635, "learning_rate": 0.00019999457926344767, "loss": 2.1943, "step": 7930 }, { "epoch": 0.9554753309265944, "grad_norm": 4.102616310119629, "learning_rate": 0.00019999453958065755, "loss": 2.1075, "step": 7940 }, { "epoch": 0.9566787003610109, "grad_norm": 6.817751884460449, "learning_rate": 0.00019999449975315064, "loss": 2.2178, "step": 7950 }, { "epoch": 0.9578820697954272, "grad_norm": 5.780127048492432, "learning_rate": 0.00019999445978092703, "loss": 1.9242, "step": 7960 }, { "epoch": 0.9590854392298436, "grad_norm": 4.017819881439209, "learning_rate": 0.00019999441966398674, "loss": 2.4252, "step": 7970 }, { "epoch": 0.9602888086642599, "grad_norm": 6.4633331298828125, "learning_rate": 0.00019999437940232983, "loss": 2.0193, "step": 7980 }, { "epoch": 0.9614921780986763, "grad_norm": 4.9790496826171875, "learning_rate": 0.00019999433899595633, "loss": 2.1775, "step": 7990 }, { "epoch": 0.9626955475330926, "grad_norm": 6.155992031097412, "learning_rate": 0.0001999942984448664, "loss": 2.3467, "step": 8000 }, { "epoch": 0.9638989169675091, "grad_norm": 4.168816566467285, "learning_rate": 0.00019999425774906, "loss": 2.1639, "step": 8010 }, { "epoch": 0.9651022864019254, "grad_norm": 4.128564357757568, "learning_rate": 0.00019999421690853722, "loss": 2.0396, "step": 8020 }, { "epoch": 0.9663056558363418, "grad_norm": 5.868348598480225, "learning_rate": 0.00019999417592329812, "loss": 1.9762, "step": 8030 }, { "epoch": 0.9675090252707581, "grad_norm": 4.708838939666748, "learning_rate": 0.00019999413479334277, "loss": 2.1418, "step": 8040 }, { "epoch": 0.9687123947051745, "grad_norm": 6.427163600921631, "learning_rate": 0.0001999940935186712, "loss": 2.26, "step": 8050 }, { "epoch": 0.9699157641395909, "grad_norm": 4.871311664581299, "learning_rate": 0.00019999405209928352, "loss": 2.2123, "step": 8060 }, { "epoch": 0.9711191335740073, "grad_norm": 3.6231939792633057, "learning_rate": 0.0001999940105351797, "loss": 2.2806, "step": 8070 }, { "epoch": 0.9723225030084236, "grad_norm": 5.265206813812256, "learning_rate": 0.0001999939688263599, "loss": 2.1661, "step": 8080 }, { "epoch": 0.97352587244284, "grad_norm": 4.007277488708496, "learning_rate": 0.00019999392697282416, "loss": 2.1418, "step": 8090 }, { "epoch": 0.9747292418772563, "grad_norm": 5.873310089111328, "learning_rate": 0.0001999938849745725, "loss": 2.5009, "step": 8100 }, { "epoch": 0.9759326113116726, "grad_norm": 5.166054725646973, "learning_rate": 0.000199993842831605, "loss": 2.2151, "step": 8110 }, { "epoch": 0.9771359807460891, "grad_norm": 4.688544750213623, "learning_rate": 0.00019999380054392172, "loss": 2.1889, "step": 8120 }, { "epoch": 0.9783393501805054, "grad_norm": 5.526015758514404, "learning_rate": 0.00019999375811152273, "loss": 2.2725, "step": 8130 }, { "epoch": 0.9795427196149218, "grad_norm": 4.228044033050537, "learning_rate": 0.00019999371553440807, "loss": 2.3354, "step": 8140 }, { "epoch": 0.9807460890493381, "grad_norm": 5.739175319671631, "learning_rate": 0.00019999367281257785, "loss": 2.2704, "step": 8150 }, { "epoch": 0.9819494584837545, "grad_norm": 4.4686665534973145, "learning_rate": 0.00019999362994603206, "loss": 2.1563, "step": 8160 }, { "epoch": 0.9831528279181708, "grad_norm": 4.358283042907715, "learning_rate": 0.00019999358693477079, "loss": 2.3027, "step": 8170 }, { "epoch": 0.9843561973525873, "grad_norm": 4.596220016479492, "learning_rate": 0.00019999354377879414, "loss": 2.0057, "step": 8180 }, { "epoch": 0.9855595667870036, "grad_norm": 4.477689743041992, "learning_rate": 0.0001999935004781021, "loss": 2.0681, "step": 8190 }, { "epoch": 0.98676293622142, "grad_norm": 6.645779132843018, "learning_rate": 0.00019999345703269485, "loss": 2.4531, "step": 8200 }, { "epoch": 0.9879663056558363, "grad_norm": 4.308712959289551, "learning_rate": 0.00019999341344257234, "loss": 2.1801, "step": 8210 }, { "epoch": 0.9891696750902527, "grad_norm": 3.8493010997772217, "learning_rate": 0.0001999933697077347, "loss": 2.239, "step": 8220 }, { "epoch": 0.9903730445246691, "grad_norm": 5.303299427032471, "learning_rate": 0.00019999332582818193, "loss": 2.235, "step": 8230 }, { "epoch": 0.9915764139590855, "grad_norm": 4.349549770355225, "learning_rate": 0.00019999328180391414, "loss": 2.1783, "step": 8240 }, { "epoch": 0.9927797833935018, "grad_norm": 7.270279884338379, "learning_rate": 0.00019999323763493138, "loss": 2.3507, "step": 8250 }, { "epoch": 0.9939831528279182, "grad_norm": 4.912819862365723, "learning_rate": 0.00019999319332123373, "loss": 2.1266, "step": 8260 }, { "epoch": 0.9951865222623345, "grad_norm": 3.0928843021392822, "learning_rate": 0.00019999314886282122, "loss": 2.4408, "step": 8270 }, { "epoch": 0.9963898916967509, "grad_norm": 7.019705772399902, "learning_rate": 0.00019999310425969398, "loss": 2.2645, "step": 8280 }, { "epoch": 0.9975932611311673, "grad_norm": 4.224442005157471, "learning_rate": 0.000199993059511852, "loss": 2.2539, "step": 8290 }, { "epoch": 0.9987966305655837, "grad_norm": 5.672049045562744, "learning_rate": 0.0001999930146192954, "loss": 2.1419, "step": 8300 }, { "epoch": 1.0, "grad_norm": 5.581302165985107, "learning_rate": 0.00019999296958202417, "loss": 2.1409, "step": 8310 }, { "epoch": 1.0, "eval_loss": 2.080259084701538, "eval_runtime": 118.705, "eval_samples_per_second": 62.23, "eval_steps_per_second": 7.784, "step": 8310 }, { "epoch": 1.0012033694344165, "grad_norm": 4.850121974945068, "learning_rate": 0.00019999292440003847, "loss": 1.776, "step": 8320 }, { "epoch": 1.0024067388688327, "grad_norm": 3.2496631145477295, "learning_rate": 0.0001999928790733383, "loss": 2.1302, "step": 8330 }, { "epoch": 1.0036101083032491, "grad_norm": 5.475099086761475, "learning_rate": 0.00019999283360192375, "loss": 1.7934, "step": 8340 }, { "epoch": 1.0048134777376654, "grad_norm": 4.7173380851745605, "learning_rate": 0.00019999278798579488, "loss": 2.1552, "step": 8350 }, { "epoch": 1.0060168471720818, "grad_norm": 6.577690601348877, "learning_rate": 0.00019999274222495177, "loss": 2.0915, "step": 8360 }, { "epoch": 1.0072202166064983, "grad_norm": 4.1316375732421875, "learning_rate": 0.00019999269631939447, "loss": 1.8381, "step": 8370 }, { "epoch": 1.0084235860409145, "grad_norm": 4.386764049530029, "learning_rate": 0.00019999265026912307, "loss": 2.001, "step": 8380 }, { "epoch": 1.009626955475331, "grad_norm": 5.818033695220947, "learning_rate": 0.0001999926040741376, "loss": 1.9157, "step": 8390 }, { "epoch": 1.0108303249097472, "grad_norm": 4.358513832092285, "learning_rate": 0.00019999255773443813, "loss": 2.1462, "step": 8400 }, { "epoch": 1.0120336943441637, "grad_norm": 6.169714450836182, "learning_rate": 0.00019999251125002476, "loss": 1.9187, "step": 8410 }, { "epoch": 1.01323706377858, "grad_norm": 3.796625852584839, "learning_rate": 0.00019999246462089755, "loss": 1.7694, "step": 8420 }, { "epoch": 1.0144404332129964, "grad_norm": 3.1594159603118896, "learning_rate": 0.0001999924178470565, "loss": 1.9839, "step": 8430 }, { "epoch": 1.0156438026474128, "grad_norm": 5.432295322418213, "learning_rate": 0.00019999237092850177, "loss": 1.8924, "step": 8440 }, { "epoch": 1.016847172081829, "grad_norm": 3.7919535636901855, "learning_rate": 0.0001999923238652334, "loss": 2.0057, "step": 8450 }, { "epoch": 1.0180505415162455, "grad_norm": 6.825023651123047, "learning_rate": 0.00019999227665725146, "loss": 2.2807, "step": 8460 }, { "epoch": 1.0192539109506618, "grad_norm": 4.782787322998047, "learning_rate": 0.000199992229304556, "loss": 1.9329, "step": 8470 }, { "epoch": 1.0204572803850782, "grad_norm": 3.1457180976867676, "learning_rate": 0.00019999218180714707, "loss": 2.1785, "step": 8480 }, { "epoch": 1.0216606498194947, "grad_norm": 5.4611945152282715, "learning_rate": 0.00019999213416502479, "loss": 1.9522, "step": 8490 }, { "epoch": 1.022864019253911, "grad_norm": 4.97552490234375, "learning_rate": 0.0001999920863781892, "loss": 2.0244, "step": 8500 }, { "epoch": 1.0240673886883274, "grad_norm": 5.632757663726807, "learning_rate": 0.0001999920384466404, "loss": 2.0238, "step": 8510 }, { "epoch": 1.0252707581227436, "grad_norm": 5.427265167236328, "learning_rate": 0.0001999919903703784, "loss": 2.0926, "step": 8520 }, { "epoch": 1.02647412755716, "grad_norm": 4.070896625518799, "learning_rate": 0.00019999194214940328, "loss": 2.0128, "step": 8530 }, { "epoch": 1.0276774969915765, "grad_norm": 5.544242858886719, "learning_rate": 0.00019999189378371517, "loss": 2.0243, "step": 8540 }, { "epoch": 1.0288808664259927, "grad_norm": 3.901855230331421, "learning_rate": 0.0001999918452733141, "loss": 1.9422, "step": 8550 }, { "epoch": 1.0300842358604092, "grad_norm": 5.984253883361816, "learning_rate": 0.00019999179661820015, "loss": 1.9022, "step": 8560 }, { "epoch": 1.0312876052948254, "grad_norm": 4.536534786224365, "learning_rate": 0.00019999174781837336, "loss": 1.8929, "step": 8570 }, { "epoch": 1.032490974729242, "grad_norm": 3.4476656913757324, "learning_rate": 0.00019999169887383383, "loss": 1.9753, "step": 8580 }, { "epoch": 1.0336943441636581, "grad_norm": 5.469036102294922, "learning_rate": 0.00019999164978458164, "loss": 1.8637, "step": 8590 }, { "epoch": 1.0348977135980746, "grad_norm": 3.7836496829986572, "learning_rate": 0.00019999160055061683, "loss": 1.9978, "step": 8600 }, { "epoch": 1.036101083032491, "grad_norm": 6.112541198730469, "learning_rate": 0.0001999915511719395, "loss": 2.142, "step": 8610 }, { "epoch": 1.0373044524669073, "grad_norm": 5.205537796020508, "learning_rate": 0.0001999915016485497, "loss": 2.1229, "step": 8620 }, { "epoch": 1.0385078219013237, "grad_norm": 4.5776519775390625, "learning_rate": 0.0001999914519804475, "loss": 2.0338, "step": 8630 }, { "epoch": 1.03971119133574, "grad_norm": 7.718768119812012, "learning_rate": 0.000199991402167633, "loss": 1.9866, "step": 8640 }, { "epoch": 1.0409145607701564, "grad_norm": 4.679311275482178, "learning_rate": 0.00019999135221010624, "loss": 2.1309, "step": 8650 }, { "epoch": 1.0421179302045729, "grad_norm": 6.655802249908447, "learning_rate": 0.0001999913021078673, "loss": 1.8992, "step": 8660 }, { "epoch": 1.0433212996389891, "grad_norm": 5.339411735534668, "learning_rate": 0.0001999912518609163, "loss": 2.2236, "step": 8670 }, { "epoch": 1.0445246690734056, "grad_norm": 3.4371402263641357, "learning_rate": 0.00019999120146925323, "loss": 1.9414, "step": 8680 }, { "epoch": 1.0457280385078218, "grad_norm": 5.796459197998047, "learning_rate": 0.0001999911509328782, "loss": 2.1834, "step": 8690 }, { "epoch": 1.0469314079422383, "grad_norm": 4.311466693878174, "learning_rate": 0.0001999911002517913, "loss": 2.0023, "step": 8700 }, { "epoch": 1.0481347773766547, "grad_norm": 6.57638692855835, "learning_rate": 0.00019999104942599263, "loss": 1.9436, "step": 8710 }, { "epoch": 1.049338146811071, "grad_norm": 6.028642654418945, "learning_rate": 0.00019999099845548218, "loss": 1.7673, "step": 8720 }, { "epoch": 1.0505415162454874, "grad_norm": 3.5979578495025635, "learning_rate": 0.00019999094734026007, "loss": 2.0021, "step": 8730 }, { "epoch": 1.0517448856799037, "grad_norm": 6.466935634613037, "learning_rate": 0.00019999089608032638, "loss": 2.0623, "step": 8740 }, { "epoch": 1.05294825511432, "grad_norm": 4.704542636871338, "learning_rate": 0.00019999084467568117, "loss": 1.9281, "step": 8750 }, { "epoch": 1.0541516245487366, "grad_norm": 6.5226311683654785, "learning_rate": 0.00019999079312632453, "loss": 2.0969, "step": 8760 }, { "epoch": 1.0553549939831528, "grad_norm": 5.316224575042725, "learning_rate": 0.0001999907414322565, "loss": 1.9609, "step": 8770 }, { "epoch": 1.0565583634175693, "grad_norm": 3.4058215618133545, "learning_rate": 0.00019999068959347725, "loss": 2.2148, "step": 8780 }, { "epoch": 1.0577617328519855, "grad_norm": 6.79750394821167, "learning_rate": 0.00019999063760998671, "loss": 1.8923, "step": 8790 }, { "epoch": 1.058965102286402, "grad_norm": 4.632766246795654, "learning_rate": 0.00019999058548178506, "loss": 2.0139, "step": 8800 }, { "epoch": 1.0601684717208182, "grad_norm": 5.880581378936768, "learning_rate": 0.00019999053320887236, "loss": 1.9082, "step": 8810 }, { "epoch": 1.0613718411552346, "grad_norm": 4.673683166503906, "learning_rate": 0.00019999048079124865, "loss": 2.0224, "step": 8820 }, { "epoch": 1.062575210589651, "grad_norm": 3.230836868286133, "learning_rate": 0.00019999042822891404, "loss": 2.106, "step": 8830 }, { "epoch": 1.0637785800240673, "grad_norm": 5.293460369110107, "learning_rate": 0.00019999037552186858, "loss": 2.36, "step": 8840 }, { "epoch": 1.0649819494584838, "grad_norm": 4.429105281829834, "learning_rate": 0.00019999032267011235, "loss": 1.9762, "step": 8850 }, { "epoch": 1.0661853188929, "grad_norm": 6.840360641479492, "learning_rate": 0.00019999026967364546, "loss": 1.9054, "step": 8860 }, { "epoch": 1.0673886883273165, "grad_norm": 4.287208557128906, "learning_rate": 0.00019999021653246797, "loss": 1.9889, "step": 8870 }, { "epoch": 1.068592057761733, "grad_norm": 2.9769177436828613, "learning_rate": 0.00019999016324657992, "loss": 1.941, "step": 8880 }, { "epoch": 1.0697954271961492, "grad_norm": 7.548506259918213, "learning_rate": 0.00019999010981598146, "loss": 2.0448, "step": 8890 }, { "epoch": 1.0709987966305656, "grad_norm": 4.892628192901611, "learning_rate": 0.0001999900562406726, "loss": 2.0519, "step": 8900 }, { "epoch": 1.0722021660649819, "grad_norm": 7.174940586090088, "learning_rate": 0.0001999900025206534, "loss": 2.0359, "step": 8910 }, { "epoch": 1.0734055354993983, "grad_norm": 4.764652252197266, "learning_rate": 0.00019998994865592405, "loss": 1.812, "step": 8920 }, { "epoch": 1.0746089049338148, "grad_norm": 3.470702648162842, "learning_rate": 0.00019998989464648453, "loss": 2.1387, "step": 8930 }, { "epoch": 1.075812274368231, "grad_norm": 5.508899688720703, "learning_rate": 0.00019998984049233494, "loss": 1.8764, "step": 8940 }, { "epoch": 1.0770156438026475, "grad_norm": 4.350720405578613, "learning_rate": 0.00019998978619347537, "loss": 1.9498, "step": 8950 }, { "epoch": 1.0782190132370637, "grad_norm": 7.807206630706787, "learning_rate": 0.0001999897317499059, "loss": 2.1676, "step": 8960 }, { "epoch": 1.0794223826714802, "grad_norm": 5.922019958496094, "learning_rate": 0.00019998967716162658, "loss": 1.8992, "step": 8970 }, { "epoch": 1.0806257521058966, "grad_norm": 3.9038050174713135, "learning_rate": 0.0001999896224286375, "loss": 2.2461, "step": 8980 }, { "epoch": 1.0818291215403129, "grad_norm": 4.654329299926758, "learning_rate": 0.00019998956755093878, "loss": 2.008, "step": 8990 }, { "epoch": 1.0830324909747293, "grad_norm": 4.3668036460876465, "learning_rate": 0.00019998951252853047, "loss": 1.918, "step": 9000 }, { "epoch": 1.0842358604091455, "grad_norm": 6.6642842292785645, "learning_rate": 0.00019998945736141263, "loss": 1.9944, "step": 9010 }, { "epoch": 1.085439229843562, "grad_norm": 4.998341083526611, "learning_rate": 0.00019998940204958538, "loss": 2.0143, "step": 9020 }, { "epoch": 1.0866425992779782, "grad_norm": 3.4509310722351074, "learning_rate": 0.00019998934659304875, "loss": 2.1054, "step": 9030 }, { "epoch": 1.0878459687123947, "grad_norm": 4.551707744598389, "learning_rate": 0.00019998929099180286, "loss": 2.0293, "step": 9040 }, { "epoch": 1.0890493381468112, "grad_norm": 3.933088779449463, "learning_rate": 0.0001999892352458478, "loss": 2.0668, "step": 9050 }, { "epoch": 1.0902527075812274, "grad_norm": 6.708771228790283, "learning_rate": 0.00019998917935518362, "loss": 2.1052, "step": 9060 }, { "epoch": 1.0914560770156438, "grad_norm": 5.076292991638184, "learning_rate": 0.0001999891233198104, "loss": 2.0848, "step": 9070 }, { "epoch": 1.09265944645006, "grad_norm": 3.7987160682678223, "learning_rate": 0.00019998906713972824, "loss": 1.8708, "step": 9080 }, { "epoch": 1.0938628158844765, "grad_norm": 5.70545768737793, "learning_rate": 0.0001999890108149372, "loss": 2.0238, "step": 9090 }, { "epoch": 1.095066185318893, "grad_norm": 5.972437858581543, "learning_rate": 0.00019998895434543738, "loss": 1.8914, "step": 9100 }, { "epoch": 1.0962695547533092, "grad_norm": 5.777250289916992, "learning_rate": 0.00019998889773122888, "loss": 2.1697, "step": 9110 }, { "epoch": 1.0974729241877257, "grad_norm": 4.514420032501221, "learning_rate": 0.0001999888409723117, "loss": 2.0899, "step": 9120 }, { "epoch": 1.098676293622142, "grad_norm": 3.5541980266571045, "learning_rate": 0.00019998878406868605, "loss": 1.9751, "step": 9130 }, { "epoch": 1.0998796630565584, "grad_norm": 5.4803266525268555, "learning_rate": 0.00019998872702035192, "loss": 1.9228, "step": 9140 }, { "epoch": 1.1010830324909748, "grad_norm": 4.44549560546875, "learning_rate": 0.0001999886698273094, "loss": 1.8906, "step": 9150 }, { "epoch": 1.102286401925391, "grad_norm": 5.911207675933838, "learning_rate": 0.0001999886124895586, "loss": 2.2008, "step": 9160 }, { "epoch": 1.1034897713598075, "grad_norm": 5.027959823608398, "learning_rate": 0.0001999885550070996, "loss": 1.9231, "step": 9170 }, { "epoch": 1.1046931407942238, "grad_norm": 4.950850009918213, "learning_rate": 0.00019998849737993247, "loss": 2.0911, "step": 9180 }, { "epoch": 1.1058965102286402, "grad_norm": 5.364449977874756, "learning_rate": 0.0001999884396080573, "loss": 1.9605, "step": 9190 }, { "epoch": 1.1070998796630565, "grad_norm": 4.605388641357422, "learning_rate": 0.00019998838169147415, "loss": 2.1481, "step": 9200 }, { "epoch": 1.108303249097473, "grad_norm": 5.884681224822998, "learning_rate": 0.00019998832363018313, "loss": 1.8628, "step": 9210 }, { "epoch": 1.1095066185318894, "grad_norm": 4.382067680358887, "learning_rate": 0.00019998826542418435, "loss": 1.9347, "step": 9220 }, { "epoch": 1.1107099879663056, "grad_norm": 3.439744472503662, "learning_rate": 0.00019998820707347784, "loss": 2.1023, "step": 9230 }, { "epoch": 1.111913357400722, "grad_norm": 6.21846866607666, "learning_rate": 0.00019998814857806368, "loss": 2.0217, "step": 9240 }, { "epoch": 1.1131167268351383, "grad_norm": 4.801575660705566, "learning_rate": 0.00019998808993794203, "loss": 2.0597, "step": 9250 }, { "epoch": 1.1143200962695547, "grad_norm": 6.326198577880859, "learning_rate": 0.0001999880311531129, "loss": 2.066, "step": 9260 }, { "epoch": 1.1155234657039712, "grad_norm": 4.852086544036865, "learning_rate": 0.0001999879722235764, "loss": 1.8671, "step": 9270 }, { "epoch": 1.1167268351383874, "grad_norm": 4.770281791687012, "learning_rate": 0.00019998791314933265, "loss": 2.1625, "step": 9280 }, { "epoch": 1.117930204572804, "grad_norm": 5.579623699188232, "learning_rate": 0.0001999878539303817, "loss": 1.9572, "step": 9290 }, { "epoch": 1.1191335740072201, "grad_norm": 3.959735155105591, "learning_rate": 0.00019998779456672364, "loss": 2.0381, "step": 9300 }, { "epoch": 1.1203369434416366, "grad_norm": 8.383445739746094, "learning_rate": 0.0001999877350583585, "loss": 2.0783, "step": 9310 }, { "epoch": 1.121540312876053, "grad_norm": 4.759198188781738, "learning_rate": 0.00019998767540528646, "loss": 2.0799, "step": 9320 }, { "epoch": 1.1227436823104693, "grad_norm": 3.45686674118042, "learning_rate": 0.0001999876156075076, "loss": 2.1875, "step": 9330 }, { "epoch": 1.1239470517448857, "grad_norm": 5.586175441741943, "learning_rate": 0.0001999875556650219, "loss": 2.1816, "step": 9340 }, { "epoch": 1.125150421179302, "grad_norm": 4.440593242645264, "learning_rate": 0.00019998749557782958, "loss": 1.8717, "step": 9350 }, { "epoch": 1.1263537906137184, "grad_norm": 7.6358489990234375, "learning_rate": 0.00019998743534593066, "loss": 2.0476, "step": 9360 }, { "epoch": 1.1275571600481347, "grad_norm": 4.324075222015381, "learning_rate": 0.00019998737496932522, "loss": 1.9486, "step": 9370 }, { "epoch": 1.1287605294825511, "grad_norm": 4.16616153717041, "learning_rate": 0.00019998731444801339, "loss": 2.0713, "step": 9380 }, { "epoch": 1.1299638989169676, "grad_norm": 5.545494079589844, "learning_rate": 0.0001999872537819952, "loss": 2.1747, "step": 9390 }, { "epoch": 1.1311672683513838, "grad_norm": 4.747119426727295, "learning_rate": 0.00019998719297127077, "loss": 2.1822, "step": 9400 }, { "epoch": 1.1323706377858003, "grad_norm": 6.263423919677734, "learning_rate": 0.00019998713201584018, "loss": 2.0005, "step": 9410 }, { "epoch": 1.1335740072202167, "grad_norm": 5.5049896240234375, "learning_rate": 0.00019998707091570353, "loss": 1.9868, "step": 9420 }, { "epoch": 1.134777376654633, "grad_norm": 2.4110701084136963, "learning_rate": 0.00019998700967086092, "loss": 1.8796, "step": 9430 }, { "epoch": 1.1359807460890494, "grad_norm": 5.396789073944092, "learning_rate": 0.00019998694828131242, "loss": 2.1325, "step": 9440 }, { "epoch": 1.1371841155234657, "grad_norm": 4.019585609436035, "learning_rate": 0.0001999868867470581, "loss": 2.0537, "step": 9450 }, { "epoch": 1.1383874849578821, "grad_norm": 7.141306400299072, "learning_rate": 0.00019998682506809808, "loss": 1.94, "step": 9460 }, { "epoch": 1.1395908543922983, "grad_norm": 5.070889949798584, "learning_rate": 0.00019998676324443242, "loss": 1.8988, "step": 9470 }, { "epoch": 1.1407942238267148, "grad_norm": 3.384098768234253, "learning_rate": 0.00019998670127606126, "loss": 1.9526, "step": 9480 }, { "epoch": 1.1419975932611313, "grad_norm": 6.994525909423828, "learning_rate": 0.00019998663916298464, "loss": 2.0269, "step": 9490 }, { "epoch": 1.1432009626955475, "grad_norm": 3.67915415763855, "learning_rate": 0.00019998657690520265, "loss": 2.1269, "step": 9500 }, { "epoch": 1.144404332129964, "grad_norm": 7.615024089813232, "learning_rate": 0.00019998651450271542, "loss": 1.9923, "step": 9510 }, { "epoch": 1.1456077015643802, "grad_norm": 5.535434246063232, "learning_rate": 0.000199986451955523, "loss": 1.8611, "step": 9520 }, { "epoch": 1.1468110709987966, "grad_norm": 3.3452887535095215, "learning_rate": 0.00019998638926362552, "loss": 2.0757, "step": 9530 }, { "epoch": 1.1480144404332129, "grad_norm": 4.985581398010254, "learning_rate": 0.00019998632642702303, "loss": 2.0092, "step": 9540 }, { "epoch": 1.1492178098676293, "grad_norm": 4.592456817626953, "learning_rate": 0.00019998626344571566, "loss": 2.1094, "step": 9550 }, { "epoch": 1.1504211793020458, "grad_norm": 7.482632637023926, "learning_rate": 0.00019998620031970349, "loss": 2.0469, "step": 9560 }, { "epoch": 1.151624548736462, "grad_norm": 4.598583221435547, "learning_rate": 0.00019998613704898656, "loss": 2.1008, "step": 9570 }, { "epoch": 1.1528279181708785, "grad_norm": 3.9577908515930176, "learning_rate": 0.00019998607363356502, "loss": 2.0616, "step": 9580 }, { "epoch": 1.154031287605295, "grad_norm": 5.127796649932861, "learning_rate": 0.00019998601007343897, "loss": 2.1729, "step": 9590 }, { "epoch": 1.1552346570397112, "grad_norm": 4.676178455352783, "learning_rate": 0.00019998594636860847, "loss": 2.0056, "step": 9600 }, { "epoch": 1.1564380264741276, "grad_norm": 5.499486923217773, "learning_rate": 0.0001999858825190736, "loss": 2.1094, "step": 9610 }, { "epoch": 1.1576413959085439, "grad_norm": 4.813460826873779, "learning_rate": 0.0001999858185248345, "loss": 1.8542, "step": 9620 }, { "epoch": 1.1588447653429603, "grad_norm": 3.5271053314208984, "learning_rate": 0.00019998575438589117, "loss": 2.1226, "step": 9630 }, { "epoch": 1.1600481347773766, "grad_norm": 6.317688941955566, "learning_rate": 0.00019998569010224384, "loss": 1.9584, "step": 9640 }, { "epoch": 1.161251504211793, "grad_norm": 4.959247589111328, "learning_rate": 0.00019998562567389251, "loss": 1.9842, "step": 9650 }, { "epoch": 1.1624548736462095, "grad_norm": 6.804013252258301, "learning_rate": 0.00019998556110083728, "loss": 2.1276, "step": 9660 }, { "epoch": 1.1636582430806257, "grad_norm": 4.854011535644531, "learning_rate": 0.00019998549638307827, "loss": 1.8973, "step": 9670 }, { "epoch": 1.1648616125150422, "grad_norm": 3.1386466026306152, "learning_rate": 0.00019998543152061556, "loss": 1.9201, "step": 9680 }, { "epoch": 1.1660649819494584, "grad_norm": 5.494327545166016, "learning_rate": 0.00019998536651344925, "loss": 2.0264, "step": 9690 }, { "epoch": 1.1672683513838749, "grad_norm": 3.908991575241089, "learning_rate": 0.00019998530136157943, "loss": 2.0907, "step": 9700 }, { "epoch": 1.168471720818291, "grad_norm": 7.346467018127441, "learning_rate": 0.00019998523606500616, "loss": 2.0548, "step": 9710 }, { "epoch": 1.1696750902527075, "grad_norm": 4.82690954208374, "learning_rate": 0.0001999851706237296, "loss": 1.9129, "step": 9720 }, { "epoch": 1.170878459687124, "grad_norm": 3.32192063331604, "learning_rate": 0.0001999851050377498, "loss": 2.0197, "step": 9730 }, { "epoch": 1.1720818291215402, "grad_norm": 4.709057807922363, "learning_rate": 0.0001999850393070669, "loss": 2.1231, "step": 9740 }, { "epoch": 1.1732851985559567, "grad_norm": 4.261242866516113, "learning_rate": 0.0001999849734316809, "loss": 2.1172, "step": 9750 }, { "epoch": 1.1744885679903732, "grad_norm": 6.338689804077148, "learning_rate": 0.00019998490741159202, "loss": 2.1408, "step": 9760 }, { "epoch": 1.1756919374247894, "grad_norm": 5.811868667602539, "learning_rate": 0.00019998484124680025, "loss": 2.0779, "step": 9770 }, { "epoch": 1.1768953068592058, "grad_norm": 3.0623340606689453, "learning_rate": 0.00019998477493730573, "loss": 2.2344, "step": 9780 }, { "epoch": 1.178098676293622, "grad_norm": 5.205389022827148, "learning_rate": 0.0001999847084831086, "loss": 2.041, "step": 9790 }, { "epoch": 1.1793020457280385, "grad_norm": 6.058391094207764, "learning_rate": 0.0001999846418842089, "loss": 2.0143, "step": 9800 }, { "epoch": 1.1805054151624548, "grad_norm": 7.069987773895264, "learning_rate": 0.0001999845751406067, "loss": 2.1039, "step": 9810 }, { "epoch": 1.1817087845968712, "grad_norm": 4.928779125213623, "learning_rate": 0.00019998450825230214, "loss": 1.8592, "step": 9820 }, { "epoch": 1.1829121540312877, "grad_norm": 3.740241765975952, "learning_rate": 0.00019998444121929534, "loss": 2.1091, "step": 9830 }, { "epoch": 1.184115523465704, "grad_norm": 9.451654434204102, "learning_rate": 0.00019998437404158636, "loss": 2.158, "step": 9840 }, { "epoch": 1.1853188929001204, "grad_norm": 4.026190280914307, "learning_rate": 0.0001999843067191753, "loss": 2.0388, "step": 9850 }, { "epoch": 1.1865222623345366, "grad_norm": 6.800570011138916, "learning_rate": 0.00019998423925206226, "loss": 2.0448, "step": 9860 }, { "epoch": 1.187725631768953, "grad_norm": 4.920462131500244, "learning_rate": 0.00019998417164024735, "loss": 1.8885, "step": 9870 }, { "epoch": 1.1889290012033695, "grad_norm": 3.1672539710998535, "learning_rate": 0.00019998410388373064, "loss": 1.9803, "step": 9880 }, { "epoch": 1.1901323706377858, "grad_norm": 7.906315803527832, "learning_rate": 0.00019998403598251225, "loss": 2.2293, "step": 9890 }, { "epoch": 1.1913357400722022, "grad_norm": 5.510423183441162, "learning_rate": 0.0001999839679365923, "loss": 2.0506, "step": 9900 }, { "epoch": 1.1925391095066185, "grad_norm": 5.803864002227783, "learning_rate": 0.00019998389974597086, "loss": 2.1935, "step": 9910 }, { "epoch": 1.193742478941035, "grad_norm": 5.4688310623168945, "learning_rate": 0.000199983831410648, "loss": 1.8745, "step": 9920 }, { "epoch": 1.1949458483754514, "grad_norm": 4.938477993011475, "learning_rate": 0.00019998376293062385, "loss": 2.1407, "step": 9930 }, { "epoch": 1.1961492178098676, "grad_norm": 5.239728927612305, "learning_rate": 0.00019998369430589854, "loss": 2.0674, "step": 9940 }, { "epoch": 1.197352587244284, "grad_norm": 4.851728439331055, "learning_rate": 0.00019998362553647213, "loss": 1.9441, "step": 9950 }, { "epoch": 1.1985559566787003, "grad_norm": 7.1798415184021, "learning_rate": 0.00019998355662234475, "loss": 1.9134, "step": 9960 }, { "epoch": 1.1997593261131168, "grad_norm": 5.0848188400268555, "learning_rate": 0.00019998348756351643, "loss": 1.9767, "step": 9970 }, { "epoch": 1.200962695547533, "grad_norm": 4.1638288497924805, "learning_rate": 0.00019998341835998735, "loss": 1.8312, "step": 9980 }, { "epoch": 1.2021660649819494, "grad_norm": 5.722476959228516, "learning_rate": 0.00019998334901175758, "loss": 1.9314, "step": 9990 }, { "epoch": 1.203369434416366, "grad_norm": 4.66650390625, "learning_rate": 0.0001999832795188272, "loss": 2.1877, "step": 10000 }, { "epoch": 1.2045728038507821, "grad_norm": 8.986128807067871, "learning_rate": 0.00019998320988119634, "loss": 2.2097, "step": 10010 }, { "epoch": 1.2057761732851986, "grad_norm": 5.32340669631958, "learning_rate": 0.00019998314009886508, "loss": 2.1554, "step": 10020 }, { "epoch": 1.2069795427196148, "grad_norm": 3.7524874210357666, "learning_rate": 0.00019998307017183355, "loss": 1.9542, "step": 10030 }, { "epoch": 1.2081829121540313, "grad_norm": 5.027027606964111, "learning_rate": 0.00019998300010010182, "loss": 2.1156, "step": 10040 }, { "epoch": 1.2093862815884477, "grad_norm": 3.9726953506469727, "learning_rate": 0.00019998292988367003, "loss": 1.9822, "step": 10050 }, { "epoch": 1.210589651022864, "grad_norm": 7.025004863739014, "learning_rate": 0.00019998285952253824, "loss": 2.0503, "step": 10060 }, { "epoch": 1.2117930204572804, "grad_norm": 4.493628025054932, "learning_rate": 0.00019998278901670654, "loss": 2.0269, "step": 10070 }, { "epoch": 1.2129963898916967, "grad_norm": 3.804781675338745, "learning_rate": 0.00019998271836617508, "loss": 2.0031, "step": 10080 }, { "epoch": 1.2141997593261131, "grad_norm": 5.55324649810791, "learning_rate": 0.00019998264757094397, "loss": 1.8584, "step": 10090 }, { "epoch": 1.2154031287605296, "grad_norm": 4.546861171722412, "learning_rate": 0.00019998257663101323, "loss": 1.9519, "step": 10100 }, { "epoch": 1.2166064981949458, "grad_norm": 6.647198677062988, "learning_rate": 0.00019998250554638305, "loss": 1.9068, "step": 10110 }, { "epoch": 1.2178098676293623, "grad_norm": 6.195066452026367, "learning_rate": 0.0001999824343170535, "loss": 1.8015, "step": 10120 }, { "epoch": 1.2190132370637785, "grad_norm": 3.3782405853271484, "learning_rate": 0.00019998236294302467, "loss": 2.2316, "step": 10130 }, { "epoch": 1.220216606498195, "grad_norm": 4.880107879638672, "learning_rate": 0.00019998229142429667, "loss": 1.9407, "step": 10140 }, { "epoch": 1.2214199759326112, "grad_norm": 4.86598539352417, "learning_rate": 0.00019998221976086963, "loss": 2.1939, "step": 10150 }, { "epoch": 1.2226233453670277, "grad_norm": 6.2081804275512695, "learning_rate": 0.00019998214795274361, "loss": 2.1162, "step": 10160 }, { "epoch": 1.2238267148014441, "grad_norm": 4.876759052276611, "learning_rate": 0.00019998207599991873, "loss": 1.8673, "step": 10170 }, { "epoch": 1.2250300842358604, "grad_norm": 3.492619752883911, "learning_rate": 0.00019998200390239514, "loss": 2.2299, "step": 10180 }, { "epoch": 1.2262334536702768, "grad_norm": 5.097721576690674, "learning_rate": 0.0001999819316601729, "loss": 1.9695, "step": 10190 }, { "epoch": 1.2274368231046933, "grad_norm": 4.120977401733398, "learning_rate": 0.00019998185927325212, "loss": 2.0585, "step": 10200 }, { "epoch": 1.2286401925391095, "grad_norm": 6.539756774902344, "learning_rate": 0.00019998178674163287, "loss": 2.2924, "step": 10210 }, { "epoch": 1.229843561973526, "grad_norm": 5.448577404022217, "learning_rate": 0.00019998171406531532, "loss": 1.9213, "step": 10220 }, { "epoch": 1.2310469314079422, "grad_norm": 4.067614555358887, "learning_rate": 0.00019998164124429956, "loss": 1.9187, "step": 10230 }, { "epoch": 1.2322503008423586, "grad_norm": 5.904695510864258, "learning_rate": 0.00019998156827858563, "loss": 2.2251, "step": 10240 }, { "epoch": 1.2334536702767749, "grad_norm": 3.8210763931274414, "learning_rate": 0.00019998149516817373, "loss": 1.8841, "step": 10250 }, { "epoch": 1.2346570397111913, "grad_norm": 5.61317777633667, "learning_rate": 0.00019998142191306388, "loss": 1.9608, "step": 10260 }, { "epoch": 1.2358604091456078, "grad_norm": 5.043588161468506, "learning_rate": 0.00019998134851325627, "loss": 2.0538, "step": 10270 }, { "epoch": 1.237063778580024, "grad_norm": 3.8707103729248047, "learning_rate": 0.00019998127496875093, "loss": 2.2254, "step": 10280 }, { "epoch": 1.2382671480144405, "grad_norm": 6.206582546234131, "learning_rate": 0.00019998120127954802, "loss": 1.947, "step": 10290 }, { "epoch": 1.2394705174488567, "grad_norm": 4.542145729064941, "learning_rate": 0.00019998112744564763, "loss": 2.0519, "step": 10300 }, { "epoch": 1.2406738868832732, "grad_norm": 6.438401699066162, "learning_rate": 0.00019998105346704986, "loss": 2.0104, "step": 10310 }, { "epoch": 1.2418772563176894, "grad_norm": 4.163731098175049, "learning_rate": 0.00019998097934375482, "loss": 2.0364, "step": 10320 }, { "epoch": 1.2430806257521059, "grad_norm": 3.883605480194092, "learning_rate": 0.00019998090507576262, "loss": 2.0702, "step": 10330 }, { "epoch": 1.2442839951865223, "grad_norm": 5.311922073364258, "learning_rate": 0.00019998083066307337, "loss": 2.0248, "step": 10340 }, { "epoch": 1.2454873646209386, "grad_norm": 4.283480167388916, "learning_rate": 0.0001999807561056872, "loss": 2.0493, "step": 10350 }, { "epoch": 1.246690734055355, "grad_norm": 6.248193264007568, "learning_rate": 0.00019998068140360413, "loss": 1.9874, "step": 10360 }, { "epoch": 1.2478941034897715, "grad_norm": 5.551257133483887, "learning_rate": 0.00019998060655682435, "loss": 2.0025, "step": 10370 }, { "epoch": 1.2490974729241877, "grad_norm": 2.876491069793701, "learning_rate": 0.00019998053156534797, "loss": 2.0228, "step": 10380 }, { "epoch": 1.2503008423586042, "grad_norm": 4.5422515869140625, "learning_rate": 0.00019998045642917505, "loss": 1.7866, "step": 10390 }, { "epoch": 1.2515042117930204, "grad_norm": 5.026985168457031, "learning_rate": 0.00019998038114830575, "loss": 1.7722, "step": 10400 }, { "epoch": 1.2527075812274369, "grad_norm": 7.3900909423828125, "learning_rate": 0.00019998030572274014, "loss": 2.1746, "step": 10410 }, { "epoch": 1.253910950661853, "grad_norm": 5.162228584289551, "learning_rate": 0.00019998023015247834, "loss": 1.858, "step": 10420 }, { "epoch": 1.2551143200962696, "grad_norm": 2.786094903945923, "learning_rate": 0.00019998015443752046, "loss": 1.9791, "step": 10430 }, { "epoch": 1.256317689530686, "grad_norm": 6.485592842102051, "learning_rate": 0.00019998007857786662, "loss": 2.0075, "step": 10440 }, { "epoch": 1.2575210589651022, "grad_norm": 4.226631164550781, "learning_rate": 0.00019998000257351694, "loss": 1.9714, "step": 10450 }, { "epoch": 1.2587244283995187, "grad_norm": 8.003928184509277, "learning_rate": 0.0001999799264244715, "loss": 2.0834, "step": 10460 }, { "epoch": 1.259927797833935, "grad_norm": 5.195785045623779, "learning_rate": 0.0001999798501307304, "loss": 1.7946, "step": 10470 }, { "epoch": 1.2611311672683514, "grad_norm": 3.2900009155273438, "learning_rate": 0.0001999797736922938, "loss": 2.2332, "step": 10480 }, { "epoch": 1.2623345367027676, "grad_norm": 7.40859317779541, "learning_rate": 0.00019997969710916177, "loss": 2.1342, "step": 10490 }, { "epoch": 1.263537906137184, "grad_norm": 5.1050004959106445, "learning_rate": 0.00019997962038133444, "loss": 1.8853, "step": 10500 }, { "epoch": 1.2647412755716005, "grad_norm": 6.883108139038086, "learning_rate": 0.0001999795435088119, "loss": 2.1931, "step": 10510 }, { "epoch": 1.2659446450060168, "grad_norm": 5.1271281242370605, "learning_rate": 0.0001999794664915943, "loss": 1.9036, "step": 10520 }, { "epoch": 1.2671480144404332, "grad_norm": 3.9865400791168213, "learning_rate": 0.00019997938932968172, "loss": 2.1866, "step": 10530 }, { "epoch": 1.2683513838748497, "grad_norm": 5.753084182739258, "learning_rate": 0.00019997931202307427, "loss": 2.1757, "step": 10540 }, { "epoch": 1.269554753309266, "grad_norm": 4.798121929168701, "learning_rate": 0.0001999792345717721, "loss": 2.052, "step": 10550 }, { "epoch": 1.2707581227436824, "grad_norm": 6.817185401916504, "learning_rate": 0.00019997915697577526, "loss": 2.0153, "step": 10560 }, { "epoch": 1.2719614921780986, "grad_norm": 4.873058795928955, "learning_rate": 0.0001999790792350839, "loss": 2.3704, "step": 10570 }, { "epoch": 1.273164861612515, "grad_norm": 3.428636074066162, "learning_rate": 0.00019997900134969814, "loss": 2.3233, "step": 10580 }, { "epoch": 1.2743682310469313, "grad_norm": 6.904481887817383, "learning_rate": 0.00019997892331961807, "loss": 1.882, "step": 10590 }, { "epoch": 1.2755716004813478, "grad_norm": 4.372970104217529, "learning_rate": 0.00019997884514484385, "loss": 2.1738, "step": 10600 }, { "epoch": 1.2767749699157642, "grad_norm": 6.636587619781494, "learning_rate": 0.0001999787668253755, "loss": 2.4223, "step": 10610 }, { "epoch": 1.2779783393501805, "grad_norm": 5.977717876434326, "learning_rate": 0.00019997868836121323, "loss": 1.9153, "step": 10620 }, { "epoch": 1.279181708784597, "grad_norm": 3.2894933223724365, "learning_rate": 0.0001999786097523571, "loss": 2.2322, "step": 10630 }, { "epoch": 1.2803850782190134, "grad_norm": 6.185766696929932, "learning_rate": 0.00019997853099880722, "loss": 1.8874, "step": 10640 }, { "epoch": 1.2815884476534296, "grad_norm": 4.085166931152344, "learning_rate": 0.00019997845210056373, "loss": 1.968, "step": 10650 }, { "epoch": 1.2827918170878458, "grad_norm": 6.705855369567871, "learning_rate": 0.0001999783730576268, "loss": 1.9561, "step": 10660 }, { "epoch": 1.2839951865222623, "grad_norm": 5.585948467254639, "learning_rate": 0.00019997829386999641, "loss": 2.0039, "step": 10670 }, { "epoch": 1.2851985559566788, "grad_norm": 3.6671464443206787, "learning_rate": 0.00019997821453767274, "loss": 2.0347, "step": 10680 }, { "epoch": 1.286401925391095, "grad_norm": 4.681477069854736, "learning_rate": 0.00019997813506065595, "loss": 1.9104, "step": 10690 }, { "epoch": 1.2876052948255114, "grad_norm": 5.2901387214660645, "learning_rate": 0.0001999780554389461, "loss": 1.9701, "step": 10700 }, { "epoch": 1.288808664259928, "grad_norm": 5.807519435882568, "learning_rate": 0.0001999779756725433, "loss": 1.8904, "step": 10710 }, { "epoch": 1.2900120336943441, "grad_norm": 4.371450424194336, "learning_rate": 0.0001999778957614477, "loss": 1.6964, "step": 10720 }, { "epoch": 1.2912154031287606, "grad_norm": 4.2103590965271, "learning_rate": 0.0001999778157056594, "loss": 2.2595, "step": 10730 }, { "epoch": 1.2924187725631768, "grad_norm": 6.793369293212891, "learning_rate": 0.00019997773550517854, "loss": 2.0873, "step": 10740 }, { "epoch": 1.2936221419975933, "grad_norm": 5.259834289550781, "learning_rate": 0.00019997765516000519, "loss": 2.1054, "step": 10750 }, { "epoch": 1.2948255114320095, "grad_norm": 6.473832130432129, "learning_rate": 0.0001999775746701395, "loss": 1.9038, "step": 10760 }, { "epoch": 1.296028880866426, "grad_norm": 5.644038200378418, "learning_rate": 0.00019997749403558154, "loss": 1.9461, "step": 10770 }, { "epoch": 1.2972322503008424, "grad_norm": 3.3906755447387695, "learning_rate": 0.0001999774132563315, "loss": 1.9609, "step": 10780 }, { "epoch": 1.2984356197352587, "grad_norm": 6.086814880371094, "learning_rate": 0.00019997733233238947, "loss": 2.0266, "step": 10790 }, { "epoch": 1.2996389891696751, "grad_norm": 3.8884189128875732, "learning_rate": 0.00019997725126375552, "loss": 1.8661, "step": 10800 }, { "epoch": 1.3008423586040916, "grad_norm": 5.768379211425781, "learning_rate": 0.00019997717005042982, "loss": 2.0649, "step": 10810 }, { "epoch": 1.3020457280385078, "grad_norm": 4.84769344329834, "learning_rate": 0.00019997708869241246, "loss": 2.1423, "step": 10820 }, { "epoch": 1.303249097472924, "grad_norm": 4.503974437713623, "learning_rate": 0.0001999770071897036, "loss": 1.9141, "step": 10830 }, { "epoch": 1.3044524669073405, "grad_norm": 5.6907267570495605, "learning_rate": 0.00019997692554230332, "loss": 1.7882, "step": 10840 }, { "epoch": 1.305655836341757, "grad_norm": 4.08046293258667, "learning_rate": 0.00019997684375021172, "loss": 2.0279, "step": 10850 }, { "epoch": 1.3068592057761732, "grad_norm": 6.6718430519104, "learning_rate": 0.00019997676181342895, "loss": 2.0306, "step": 10860 }, { "epoch": 1.3080625752105897, "grad_norm": 5.364537715911865, "learning_rate": 0.00019997667973195514, "loss": 1.7702, "step": 10870 }, { "epoch": 1.3092659446450061, "grad_norm": 4.512383460998535, "learning_rate": 0.00019997659750579038, "loss": 1.9526, "step": 10880 }, { "epoch": 1.3104693140794224, "grad_norm": 5.895017147064209, "learning_rate": 0.0001999765151349348, "loss": 1.8212, "step": 10890 }, { "epoch": 1.3116726835138388, "grad_norm": 4.3670125007629395, "learning_rate": 0.0001999764326193885, "loss": 1.8725, "step": 10900 }, { "epoch": 1.312876052948255, "grad_norm": 6.285079002380371, "learning_rate": 0.00019997634995915168, "loss": 2.2052, "step": 10910 }, { "epoch": 1.3140794223826715, "grad_norm": 5.09568977355957, "learning_rate": 0.00019997626715422432, "loss": 1.7704, "step": 10920 }, { "epoch": 1.3152827918170877, "grad_norm": 3.565420389175415, "learning_rate": 0.00019997618420460666, "loss": 2.2321, "step": 10930 }, { "epoch": 1.3164861612515042, "grad_norm": 5.460746765136719, "learning_rate": 0.00019997610111029877, "loss": 2.1075, "step": 10940 }, { "epoch": 1.3176895306859207, "grad_norm": 4.214285373687744, "learning_rate": 0.0001999760178713008, "loss": 1.9047, "step": 10950 }, { "epoch": 1.3188929001203369, "grad_norm": 5.36951208114624, "learning_rate": 0.00019997593448761283, "loss": 2.1852, "step": 10960 }, { "epoch": 1.3200962695547533, "grad_norm": 6.230038166046143, "learning_rate": 0.000199975850959235, "loss": 2.2681, "step": 10970 }, { "epoch": 1.3212996389891698, "grad_norm": 3.9167563915252686, "learning_rate": 0.00019997576728616743, "loss": 1.9569, "step": 10980 }, { "epoch": 1.322503008423586, "grad_norm": 6.026401042938232, "learning_rate": 0.00019997568346841025, "loss": 1.9088, "step": 10990 }, { "epoch": 1.3237063778580023, "grad_norm": 4.840241432189941, "learning_rate": 0.00019997559950596356, "loss": 2.1188, "step": 11000 }, { "epoch": 1.3249097472924187, "grad_norm": 6.2873969078063965, "learning_rate": 0.0001999755153988275, "loss": 2.1796, "step": 11010 }, { "epoch": 1.3261131167268352, "grad_norm": 5.201504707336426, "learning_rate": 0.00019997543114700216, "loss": 1.8979, "step": 11020 }, { "epoch": 1.3273164861612514, "grad_norm": 4.130815505981445, "learning_rate": 0.00019997534675048772, "loss": 2.1705, "step": 11030 }, { "epoch": 1.3285198555956679, "grad_norm": 6.854120254516602, "learning_rate": 0.00019997526220928426, "loss": 2.1597, "step": 11040 }, { "epoch": 1.3297232250300843, "grad_norm": 3.4538803100585938, "learning_rate": 0.00019997517752339192, "loss": 1.741, "step": 11050 }, { "epoch": 1.3309265944645006, "grad_norm": 5.753960609436035, "learning_rate": 0.00019997509269281083, "loss": 2.0376, "step": 11060 }, { "epoch": 1.332129963898917, "grad_norm": 4.628495216369629, "learning_rate": 0.00019997500771754107, "loss": 1.7842, "step": 11070 }, { "epoch": 1.3333333333333333, "grad_norm": 3.723304271697998, "learning_rate": 0.00019997492259758278, "loss": 2.2054, "step": 11080 }, { "epoch": 1.3345367027677497, "grad_norm": 5.684840202331543, "learning_rate": 0.00019997483733293612, "loss": 1.8107, "step": 11090 }, { "epoch": 1.335740072202166, "grad_norm": 4.158824443817139, "learning_rate": 0.0001999747519236012, "loss": 1.9651, "step": 11100 }, { "epoch": 1.3369434416365824, "grad_norm": 6.488555431365967, "learning_rate": 0.00019997466636957812, "loss": 2.1014, "step": 11110 }, { "epoch": 1.3381468110709989, "grad_norm": 4.762753009796143, "learning_rate": 0.00019997458067086697, "loss": 2.0527, "step": 11120 }, { "epoch": 1.339350180505415, "grad_norm": 3.0060322284698486, "learning_rate": 0.00019997449482746796, "loss": 2.0986, "step": 11130 }, { "epoch": 1.3405535499398316, "grad_norm": 5.863572120666504, "learning_rate": 0.00019997440883938117, "loss": 2.0484, "step": 11140 }, { "epoch": 1.341756919374248, "grad_norm": 4.870255947113037, "learning_rate": 0.00019997432270660675, "loss": 2.0285, "step": 11150 }, { "epoch": 1.3429602888086642, "grad_norm": 6.171783924102783, "learning_rate": 0.00019997423642914477, "loss": 1.9961, "step": 11160 }, { "epoch": 1.3441636582430807, "grad_norm": 5.617040157318115, "learning_rate": 0.00019997415000699538, "loss": 2.1632, "step": 11170 }, { "epoch": 1.345367027677497, "grad_norm": 3.242630958557129, "learning_rate": 0.00019997406344015873, "loss": 1.8618, "step": 11180 }, { "epoch": 1.3465703971119134, "grad_norm": 6.868119239807129, "learning_rate": 0.00019997397672863492, "loss": 1.9079, "step": 11190 }, { "epoch": 1.3477737665463296, "grad_norm": 4.686202526092529, "learning_rate": 0.0001999738898724241, "loss": 2.25, "step": 11200 }, { "epoch": 1.348977135980746, "grad_norm": 5.856929779052734, "learning_rate": 0.00019997380287152636, "loss": 2.1734, "step": 11210 }, { "epoch": 1.3501805054151625, "grad_norm": 4.6299872398376465, "learning_rate": 0.00019997371572594186, "loss": 1.8238, "step": 11220 }, { "epoch": 1.3513838748495788, "grad_norm": 3.755962610244751, "learning_rate": 0.00019997362843567068, "loss": 1.9503, "step": 11230 }, { "epoch": 1.3525872442839952, "grad_norm": 5.344730377197266, "learning_rate": 0.00019997354100071302, "loss": 1.9743, "step": 11240 }, { "epoch": 1.3537906137184115, "grad_norm": 4.1081953048706055, "learning_rate": 0.0001999734534210689, "loss": 1.8849, "step": 11250 }, { "epoch": 1.354993983152828, "grad_norm": 7.932389736175537, "learning_rate": 0.0001999733656967386, "loss": 2.0345, "step": 11260 }, { "epoch": 1.3561973525872442, "grad_norm": 6.176091194152832, "learning_rate": 0.00019997327782772208, "loss": 1.8347, "step": 11270 }, { "epoch": 1.3574007220216606, "grad_norm": 4.27577543258667, "learning_rate": 0.00019997318981401958, "loss": 2.1063, "step": 11280 }, { "epoch": 1.358604091456077, "grad_norm": 5.581092357635498, "learning_rate": 0.0001999731016556312, "loss": 1.9629, "step": 11290 }, { "epoch": 1.3598074608904933, "grad_norm": 5.33603048324585, "learning_rate": 0.00019997301335255703, "loss": 2.0722, "step": 11300 }, { "epoch": 1.3610108303249098, "grad_norm": 6.872206687927246, "learning_rate": 0.00019997292490479723, "loss": 2.0029, "step": 11310 }, { "epoch": 1.3622141997593262, "grad_norm": 5.270725250244141, "learning_rate": 0.00019997283631235195, "loss": 2.0149, "step": 11320 }, { "epoch": 1.3634175691937425, "grad_norm": 3.964169502258301, "learning_rate": 0.0001999727475752213, "loss": 1.9302, "step": 11330 }, { "epoch": 1.364620938628159, "grad_norm": 4.73685359954834, "learning_rate": 0.00019997265869340538, "loss": 2.0308, "step": 11340 }, { "epoch": 1.3658243080625752, "grad_norm": 4.951168060302734, "learning_rate": 0.0001999725696669043, "loss": 1.9565, "step": 11350 }, { "epoch": 1.3670276774969916, "grad_norm": 6.389776229858398, "learning_rate": 0.0001999724804957183, "loss": 2.0621, "step": 11360 }, { "epoch": 1.3682310469314078, "grad_norm": 5.1416473388671875, "learning_rate": 0.00019997239117984741, "loss": 2.0558, "step": 11370 }, { "epoch": 1.3694344163658243, "grad_norm": 3.951632499694824, "learning_rate": 0.00019997230171929178, "loss": 1.9674, "step": 11380 }, { "epoch": 1.3706377858002408, "grad_norm": 6.3243184089660645, "learning_rate": 0.00019997221211405156, "loss": 2.1929, "step": 11390 }, { "epoch": 1.371841155234657, "grad_norm": 4.366563320159912, "learning_rate": 0.0001999721223641269, "loss": 2.033, "step": 11400 }, { "epoch": 1.3730445246690735, "grad_norm": 7.604349136352539, "learning_rate": 0.00019997203246951783, "loss": 2.2521, "step": 11410 }, { "epoch": 1.37424789410349, "grad_norm": 5.672321319580078, "learning_rate": 0.00019997194243022456, "loss": 2.0388, "step": 11420 }, { "epoch": 1.3754512635379061, "grad_norm": 3.863300085067749, "learning_rate": 0.00019997185224624722, "loss": 1.8918, "step": 11430 }, { "epoch": 1.3766546329723224, "grad_norm": 5.572450160980225, "learning_rate": 0.00019997176191758595, "loss": 1.9658, "step": 11440 }, { "epoch": 1.3778580024067388, "grad_norm": 4.11118745803833, "learning_rate": 0.00019997167144424082, "loss": 1.8633, "step": 11450 }, { "epoch": 1.3790613718411553, "grad_norm": 5.848665237426758, "learning_rate": 0.000199971580826212, "loss": 2.0335, "step": 11460 }, { "epoch": 1.3802647412755715, "grad_norm": 4.966587066650391, "learning_rate": 0.00019997149006349964, "loss": 2.0401, "step": 11470 }, { "epoch": 1.381468110709988, "grad_norm": 4.319188117980957, "learning_rate": 0.00019997139915610385, "loss": 2.2575, "step": 11480 }, { "epoch": 1.3826714801444044, "grad_norm": 6.347306251525879, "learning_rate": 0.00019997130810402478, "loss": 2.1662, "step": 11490 }, { "epoch": 1.3838748495788207, "grad_norm": 4.402041435241699, "learning_rate": 0.00019997121690726254, "loss": 2.0651, "step": 11500 }, { "epoch": 1.3850782190132371, "grad_norm": 6.501856327056885, "learning_rate": 0.00019997112556581725, "loss": 2.3038, "step": 11510 }, { "epoch": 1.3862815884476534, "grad_norm": 5.262118339538574, "learning_rate": 0.00019997103407968906, "loss": 2.0157, "step": 11520 }, { "epoch": 1.3874849578820698, "grad_norm": 3.0442757606506348, "learning_rate": 0.0001999709424488781, "loss": 1.7683, "step": 11530 }, { "epoch": 1.388688327316486, "grad_norm": 5.854463577270508, "learning_rate": 0.00019997085067338451, "loss": 1.9958, "step": 11540 }, { "epoch": 1.3898916967509025, "grad_norm": 4.829777717590332, "learning_rate": 0.00019997075875320843, "loss": 2.06, "step": 11550 }, { "epoch": 1.391095066185319, "grad_norm": 7.081327438354492, "learning_rate": 0.00019997066668834995, "loss": 1.9274, "step": 11560 }, { "epoch": 1.3922984356197352, "grad_norm": 6.35081672668457, "learning_rate": 0.00019997057447880927, "loss": 2.1442, "step": 11570 }, { "epoch": 1.3935018050541517, "grad_norm": 3.7289793491363525, "learning_rate": 0.00019997048212458645, "loss": 1.9846, "step": 11580 }, { "epoch": 1.3947051744885681, "grad_norm": 6.378157615661621, "learning_rate": 0.00019997038962568165, "loss": 2.0232, "step": 11590 }, { "epoch": 1.3959085439229844, "grad_norm": 4.062281131744385, "learning_rate": 0.00019997029698209508, "loss": 1.9276, "step": 11600 }, { "epoch": 1.3971119133574006, "grad_norm": 7.422173976898193, "learning_rate": 0.00019997020419382678, "loss": 2.1896, "step": 11610 }, { "epoch": 1.398315282791817, "grad_norm": 4.6294355392456055, "learning_rate": 0.0001999701112608769, "loss": 1.7339, "step": 11620 }, { "epoch": 1.3995186522262335, "grad_norm": 3.527700662612915, "learning_rate": 0.00019997001818324557, "loss": 1.9711, "step": 11630 }, { "epoch": 1.4007220216606497, "grad_norm": 5.8211445808410645, "learning_rate": 0.00019996992496093297, "loss": 2.1136, "step": 11640 }, { "epoch": 1.4019253910950662, "grad_norm": 3.9967219829559326, "learning_rate": 0.0001999698315939392, "loss": 1.9309, "step": 11650 }, { "epoch": 1.4031287605294827, "grad_norm": 6.780445575714111, "learning_rate": 0.0001999697380822644, "loss": 1.9845, "step": 11660 }, { "epoch": 1.404332129963899, "grad_norm": 5.2751545906066895, "learning_rate": 0.0001999696444259087, "loss": 1.8692, "step": 11670 }, { "epoch": 1.4055354993983153, "grad_norm": 3.81404709815979, "learning_rate": 0.00019996955062487226, "loss": 2.137, "step": 11680 }, { "epoch": 1.4067388688327316, "grad_norm": 5.983846664428711, "learning_rate": 0.0001999694566791552, "loss": 2.0787, "step": 11690 }, { "epoch": 1.407942238267148, "grad_norm": 4.335493087768555, "learning_rate": 0.0001999693625887576, "loss": 2.0673, "step": 11700 }, { "epoch": 1.4091456077015643, "grad_norm": 7.844127655029297, "learning_rate": 0.0001999692683536797, "loss": 2.2025, "step": 11710 }, { "epoch": 1.4103489771359807, "grad_norm": 4.600785255432129, "learning_rate": 0.0001999691739739216, "loss": 1.9487, "step": 11720 }, { "epoch": 1.4115523465703972, "grad_norm": 4.707550525665283, "learning_rate": 0.0001999690794494834, "loss": 1.9785, "step": 11730 }, { "epoch": 1.4127557160048134, "grad_norm": 6.067191123962402, "learning_rate": 0.00019996898478036527, "loss": 1.9159, "step": 11740 }, { "epoch": 1.4139590854392299, "grad_norm": 3.8539209365844727, "learning_rate": 0.00019996888996656734, "loss": 2.062, "step": 11750 }, { "epoch": 1.4151624548736463, "grad_norm": 6.2311248779296875, "learning_rate": 0.00019996879500808974, "loss": 2.0352, "step": 11760 }, { "epoch": 1.4163658243080626, "grad_norm": 4.776411533355713, "learning_rate": 0.00019996869990493262, "loss": 1.9179, "step": 11770 }, { "epoch": 1.4175691937424788, "grad_norm": 4.044094562530518, "learning_rate": 0.00019996860465709606, "loss": 2.0612, "step": 11780 }, { "epoch": 1.4187725631768953, "grad_norm": 5.775789260864258, "learning_rate": 0.00019996850926458032, "loss": 1.9522, "step": 11790 }, { "epoch": 1.4199759326113117, "grad_norm": 4.458516597747803, "learning_rate": 0.0001999684137273854, "loss": 2.096, "step": 11800 }, { "epoch": 1.421179302045728, "grad_norm": 5.984865665435791, "learning_rate": 0.00019996831804551155, "loss": 2.133, "step": 11810 }, { "epoch": 1.4223826714801444, "grad_norm": 5.28660249710083, "learning_rate": 0.00019996822221895886, "loss": 2.02, "step": 11820 }, { "epoch": 1.4235860409145609, "grad_norm": 3.703310251235962, "learning_rate": 0.00019996812624772746, "loss": 2.0494, "step": 11830 }, { "epoch": 1.424789410348977, "grad_norm": 5.994476795196533, "learning_rate": 0.0001999680301318175, "loss": 1.7702, "step": 11840 }, { "epoch": 1.4259927797833936, "grad_norm": 4.5734405517578125, "learning_rate": 0.00019996793387122913, "loss": 1.9721, "step": 11850 }, { "epoch": 1.4271961492178098, "grad_norm": 6.583423137664795, "learning_rate": 0.00019996783746596247, "loss": 1.9945, "step": 11860 }, { "epoch": 1.4283995186522263, "grad_norm": 4.987894058227539, "learning_rate": 0.00019996774091601764, "loss": 2.0069, "step": 11870 }, { "epoch": 1.4296028880866425, "grad_norm": 3.197599172592163, "learning_rate": 0.00019996764422139482, "loss": 2.0022, "step": 11880 }, { "epoch": 1.430806257521059, "grad_norm": 4.9459991455078125, "learning_rate": 0.00019996754738209416, "loss": 1.9842, "step": 11890 }, { "epoch": 1.4320096269554754, "grad_norm": 4.139046669006348, "learning_rate": 0.00019996745039811578, "loss": 2.0101, "step": 11900 }, { "epoch": 1.4332129963898916, "grad_norm": 5.977879047393799, "learning_rate": 0.00019996735326945978, "loss": 2.1852, "step": 11910 }, { "epoch": 1.434416365824308, "grad_norm": 4.959321022033691, "learning_rate": 0.0001999672559961264, "loss": 1.9614, "step": 11920 }, { "epoch": 1.4356197352587245, "grad_norm": 3.7874855995178223, "learning_rate": 0.00019996715857811568, "loss": 2.0799, "step": 11930 }, { "epoch": 1.4368231046931408, "grad_norm": 6.308355331420898, "learning_rate": 0.0001999670610154278, "loss": 2.1004, "step": 11940 }, { "epoch": 1.4380264741275572, "grad_norm": 4.808499813079834, "learning_rate": 0.0001999669633080629, "loss": 2.0538, "step": 11950 }, { "epoch": 1.4392298435619735, "grad_norm": 6.995471477508545, "learning_rate": 0.00019996686545602113, "loss": 2.045, "step": 11960 }, { "epoch": 1.44043321299639, "grad_norm": 4.54448127746582, "learning_rate": 0.00019996676745930264, "loss": 1.9182, "step": 11970 }, { "epoch": 1.4416365824308062, "grad_norm": 3.7178962230682373, "learning_rate": 0.00019996666931790754, "loss": 1.9286, "step": 11980 }, { "epoch": 1.4428399518652226, "grad_norm": 4.949057102203369, "learning_rate": 0.00019996657103183598, "loss": 1.9982, "step": 11990 }, { "epoch": 1.444043321299639, "grad_norm": 4.729292869567871, "learning_rate": 0.0001999664726010881, "loss": 2.0252, "step": 12000 }, { "epoch": 1.4452466907340553, "grad_norm": 7.523972511291504, "learning_rate": 0.00019996637402566408, "loss": 1.9543, "step": 12010 }, { "epoch": 1.4464500601684718, "grad_norm": 4.342970848083496, "learning_rate": 0.00019996627530556403, "loss": 1.7837, "step": 12020 }, { "epoch": 1.447653429602888, "grad_norm": 3.445206642150879, "learning_rate": 0.00019996617644078813, "loss": 2.2453, "step": 12030 }, { "epoch": 1.4488567990373045, "grad_norm": 6.204970836639404, "learning_rate": 0.00019996607743133645, "loss": 2.159, "step": 12040 }, { "epoch": 1.4500601684717207, "grad_norm": 4.197068214416504, "learning_rate": 0.0001999659782772092, "loss": 1.8933, "step": 12050 }, { "epoch": 1.4512635379061372, "grad_norm": 7.213578224182129, "learning_rate": 0.00019996587897840647, "loss": 2.0458, "step": 12060 }, { "epoch": 1.4524669073405536, "grad_norm": 5.023141384124756, "learning_rate": 0.00019996577953492844, "loss": 2.1336, "step": 12070 }, { "epoch": 1.4536702767749698, "grad_norm": 2.6657612323760986, "learning_rate": 0.00019996567994677524, "loss": 2.043, "step": 12080 }, { "epoch": 1.4548736462093863, "grad_norm": 5.107360363006592, "learning_rate": 0.00019996558021394704, "loss": 2.0071, "step": 12090 }, { "epoch": 1.4560770156438028, "grad_norm": 4.615115165710449, "learning_rate": 0.00019996548033644396, "loss": 2.0489, "step": 12100 }, { "epoch": 1.457280385078219, "grad_norm": 7.913209438323975, "learning_rate": 0.00019996538031426614, "loss": 2.4675, "step": 12110 }, { "epoch": 1.4584837545126355, "grad_norm": 5.84035062789917, "learning_rate": 0.00019996528014741376, "loss": 2.0735, "step": 12120 }, { "epoch": 1.4596871239470517, "grad_norm": 4.441844940185547, "learning_rate": 0.0001999651798358869, "loss": 2.1426, "step": 12130 }, { "epoch": 1.4608904933814681, "grad_norm": 5.56676721572876, "learning_rate": 0.00019996507937968577, "loss": 2.0858, "step": 12140 }, { "epoch": 1.4620938628158844, "grad_norm": 4.233429431915283, "learning_rate": 0.00019996497877881048, "loss": 2.0057, "step": 12150 }, { "epoch": 1.4632972322503008, "grad_norm": 7.260183334350586, "learning_rate": 0.0001999648780332612, "loss": 2.0318, "step": 12160 }, { "epoch": 1.4645006016847173, "grad_norm": 5.844688415527344, "learning_rate": 0.00019996477714303806, "loss": 1.8661, "step": 12170 }, { "epoch": 1.4657039711191335, "grad_norm": 3.717721700668335, "learning_rate": 0.0001999646761081412, "loss": 2.1405, "step": 12180 }, { "epoch": 1.46690734055355, "grad_norm": 6.341575622558594, "learning_rate": 0.00019996457492857075, "loss": 2.1799, "step": 12190 }, { "epoch": 1.4681107099879662, "grad_norm": 3.2736339569091797, "learning_rate": 0.0001999644736043269, "loss": 1.8986, "step": 12200 }, { "epoch": 1.4693140794223827, "grad_norm": 6.800692558288574, "learning_rate": 0.00019996437213540977, "loss": 1.9837, "step": 12210 }, { "epoch": 1.470517448856799, "grad_norm": 5.0401201248168945, "learning_rate": 0.00019996427052181953, "loss": 1.9493, "step": 12220 }, { "epoch": 1.4717208182912154, "grad_norm": 3.1545989513397217, "learning_rate": 0.00019996416876355628, "loss": 2.1774, "step": 12230 }, { "epoch": 1.4729241877256318, "grad_norm": 6.337563991546631, "learning_rate": 0.00019996406686062024, "loss": 2.2252, "step": 12240 }, { "epoch": 1.474127557160048, "grad_norm": 4.547989368438721, "learning_rate": 0.00019996396481301146, "loss": 2.0871, "step": 12250 }, { "epoch": 1.4753309265944645, "grad_norm": 8.071065902709961, "learning_rate": 0.0001999638626207302, "loss": 1.9863, "step": 12260 }, { "epoch": 1.476534296028881, "grad_norm": 5.691819190979004, "learning_rate": 0.00019996376028377652, "loss": 2.1144, "step": 12270 }, { "epoch": 1.4777376654632972, "grad_norm": 4.698114395141602, "learning_rate": 0.0001999636578021506, "loss": 2.1699, "step": 12280 }, { "epoch": 1.4789410348977137, "grad_norm": 6.224732398986816, "learning_rate": 0.00019996355517585258, "loss": 2.1088, "step": 12290 }, { "epoch": 1.48014440433213, "grad_norm": 5.283705711364746, "learning_rate": 0.00019996345240488262, "loss": 2.0153, "step": 12300 }, { "epoch": 1.4813477737665464, "grad_norm": 6.128474712371826, "learning_rate": 0.0001999633494892409, "loss": 2.1222, "step": 12310 }, { "epoch": 1.4825511432009626, "grad_norm": 4.254839897155762, "learning_rate": 0.00019996324642892747, "loss": 1.9586, "step": 12320 }, { "epoch": 1.483754512635379, "grad_norm": 3.5428590774536133, "learning_rate": 0.0001999631432239426, "loss": 2.1966, "step": 12330 }, { "epoch": 1.4849578820697955, "grad_norm": 4.892740249633789, "learning_rate": 0.00019996303987428635, "loss": 2.0384, "step": 12340 }, { "epoch": 1.4861612515042117, "grad_norm": 4.1757659912109375, "learning_rate": 0.0001999629363799589, "loss": 2.1789, "step": 12350 }, { "epoch": 1.4873646209386282, "grad_norm": 6.969892978668213, "learning_rate": 0.00019996283274096042, "loss": 2.1338, "step": 12360 }, { "epoch": 1.4885679903730447, "grad_norm": 4.55242919921875, "learning_rate": 0.00019996272895729105, "loss": 2.2497, "step": 12370 }, { "epoch": 1.489771359807461, "grad_norm": 3.550419569015503, "learning_rate": 0.00019996262502895089, "loss": 2.0019, "step": 12380 }, { "epoch": 1.4909747292418771, "grad_norm": 6.1821770668029785, "learning_rate": 0.00019996252095594015, "loss": 1.9442, "step": 12390 }, { "epoch": 1.4921780986762936, "grad_norm": 4.020054817199707, "learning_rate": 0.00019996241673825898, "loss": 1.9674, "step": 12400 }, { "epoch": 1.49338146811071, "grad_norm": 6.744810581207275, "learning_rate": 0.00019996231237590748, "loss": 2.0089, "step": 12410 }, { "epoch": 1.4945848375451263, "grad_norm": 5.037810325622559, "learning_rate": 0.00019996220786888588, "loss": 1.956, "step": 12420 }, { "epoch": 1.4957882069795427, "grad_norm": 4.0546040534973145, "learning_rate": 0.00019996210321719424, "loss": 2.0901, "step": 12430 }, { "epoch": 1.4969915764139592, "grad_norm": 6.947364330291748, "learning_rate": 0.0001999619984208328, "loss": 1.9892, "step": 12440 }, { "epoch": 1.4981949458483754, "grad_norm": 5.34039831161499, "learning_rate": 0.00019996189347980164, "loss": 2.0415, "step": 12450 }, { "epoch": 1.4993983152827919, "grad_norm": 6.241419315338135, "learning_rate": 0.00019996178839410092, "loss": 1.9568, "step": 12460 }, { "epoch": 1.5006016847172083, "grad_norm": 4.005120754241943, "learning_rate": 0.00019996168316373085, "loss": 1.7521, "step": 12470 }, { "epoch": 1.5018050541516246, "grad_norm": 3.5194106101989746, "learning_rate": 0.0001999615777886915, "loss": 2.0599, "step": 12480 }, { "epoch": 1.5030084235860408, "grad_norm": 4.650184154510498, "learning_rate": 0.0001999614722689831, "loss": 2.0227, "step": 12490 }, { "epoch": 1.5042117930204573, "grad_norm": 4.675848007202148, "learning_rate": 0.0001999613666046058, "loss": 1.9764, "step": 12500 }, { "epoch": 1.5054151624548737, "grad_norm": 8.725130081176758, "learning_rate": 0.00019996126079555965, "loss": 2.1664, "step": 12510 }, { "epoch": 1.50661853188929, "grad_norm": 4.552273750305176, "learning_rate": 0.00019996115484184493, "loss": 1.8285, "step": 12520 }, { "epoch": 1.5078219013237064, "grad_norm": 3.9225306510925293, "learning_rate": 0.00019996104874346173, "loss": 1.884, "step": 12530 }, { "epoch": 1.5090252707581229, "grad_norm": 4.873247146606445, "learning_rate": 0.0001999609425004102, "loss": 2.0491, "step": 12540 }, { "epoch": 1.510228640192539, "grad_norm": 4.000176906585693, "learning_rate": 0.0001999608361126905, "loss": 1.8773, "step": 12550 }, { "epoch": 1.5114320096269553, "grad_norm": 6.219770908355713, "learning_rate": 0.00019996072958030283, "loss": 1.9446, "step": 12560 }, { "epoch": 1.5126353790613718, "grad_norm": 5.555866718292236, "learning_rate": 0.0001999606229032473, "loss": 1.8992, "step": 12570 }, { "epoch": 1.5138387484957883, "grad_norm": 4.3533220291137695, "learning_rate": 0.000199960516081524, "loss": 2.0626, "step": 12580 }, { "epoch": 1.5150421179302045, "grad_norm": 5.74338960647583, "learning_rate": 0.00019996040911513324, "loss": 1.8156, "step": 12590 }, { "epoch": 1.516245487364621, "grad_norm": 4.544538974761963, "learning_rate": 0.00019996030200407504, "loss": 1.8866, "step": 12600 }, { "epoch": 1.5174488567990374, "grad_norm": 7.063827991485596, "learning_rate": 0.00019996019474834964, "loss": 2.1647, "step": 12610 }, { "epoch": 1.5186522262334536, "grad_norm": 6.014230728149414, "learning_rate": 0.0001999600873479571, "loss": 1.9253, "step": 12620 }, { "epoch": 1.5198555956678699, "grad_norm": 4.151496410369873, "learning_rate": 0.00019995997980289768, "loss": 2.1063, "step": 12630 }, { "epoch": 1.5210589651022866, "grad_norm": 5.740432262420654, "learning_rate": 0.0001999598721131715, "loss": 1.9949, "step": 12640 }, { "epoch": 1.5222623345367028, "grad_norm": 5.5747575759887695, "learning_rate": 0.00019995976427877868, "loss": 2.0398, "step": 12650 }, { "epoch": 1.523465703971119, "grad_norm": 6.39547872543335, "learning_rate": 0.0001999596562997194, "loss": 2.1979, "step": 12660 }, { "epoch": 1.5246690734055355, "grad_norm": 5.9210734367370605, "learning_rate": 0.00019995954817599382, "loss": 2.1166, "step": 12670 }, { "epoch": 1.525872442839952, "grad_norm": 4.516720771789551, "learning_rate": 0.0001999594399076021, "loss": 1.9733, "step": 12680 }, { "epoch": 1.5270758122743682, "grad_norm": 6.413763999938965, "learning_rate": 0.0001999593314945444, "loss": 1.9363, "step": 12690 }, { "epoch": 1.5282791817087846, "grad_norm": 5.059680461883545, "learning_rate": 0.00019995922293682085, "loss": 2.1826, "step": 12700 }, { "epoch": 1.529482551143201, "grad_norm": 6.301856517791748, "learning_rate": 0.00019995911423443163, "loss": 2.1322, "step": 12710 }, { "epoch": 1.5306859205776173, "grad_norm": 4.676280498504639, "learning_rate": 0.00019995900538737692, "loss": 1.7236, "step": 12720 }, { "epoch": 1.5318892900120336, "grad_norm": 3.2905354499816895, "learning_rate": 0.0001999588963956568, "loss": 2.035, "step": 12730 }, { "epoch": 1.53309265944645, "grad_norm": 6.076344966888428, "learning_rate": 0.0001999587872592715, "loss": 1.8391, "step": 12740 }, { "epoch": 1.5342960288808665, "grad_norm": 3.6273529529571533, "learning_rate": 0.00019995867797822117, "loss": 1.7275, "step": 12750 }, { "epoch": 1.5354993983152827, "grad_norm": 6.787859916687012, "learning_rate": 0.00019995856855250594, "loss": 1.9494, "step": 12760 }, { "epoch": 1.5367027677496992, "grad_norm": 4.670495986938477, "learning_rate": 0.00019995845898212599, "loss": 1.9612, "step": 12770 }, { "epoch": 1.5379061371841156, "grad_norm": 3.980814218521118, "learning_rate": 0.00019995834926708148, "loss": 2.186, "step": 12780 }, { "epoch": 1.5391095066185319, "grad_norm": 5.668554306030273, "learning_rate": 0.00019995823940737256, "loss": 1.7496, "step": 12790 }, { "epoch": 1.5403128760529483, "grad_norm": 4.963620662689209, "learning_rate": 0.00019995812940299933, "loss": 2.0943, "step": 12800 }, { "epoch": 1.5415162454873648, "grad_norm": 6.285371780395508, "learning_rate": 0.00019995801925396206, "loss": 1.9992, "step": 12810 }, { "epoch": 1.542719614921781, "grad_norm": 5.616092681884766, "learning_rate": 0.00019995790896026088, "loss": 1.9886, "step": 12820 }, { "epoch": 1.5439229843561972, "grad_norm": 3.815455675125122, "learning_rate": 0.0001999577985218959, "loss": 2.0842, "step": 12830 }, { "epoch": 1.5451263537906137, "grad_norm": 4.692776203155518, "learning_rate": 0.00019995768793886729, "loss": 1.8275, "step": 12840 }, { "epoch": 1.5463297232250302, "grad_norm": 4.133147239685059, "learning_rate": 0.00019995757721117524, "loss": 1.8246, "step": 12850 }, { "epoch": 1.5475330926594464, "grad_norm": 9.024523735046387, "learning_rate": 0.00019995746633881992, "loss": 1.9761, "step": 12860 }, { "epoch": 1.5487364620938628, "grad_norm": 4.864393711090088, "learning_rate": 0.00019995735532180142, "loss": 1.9203, "step": 12870 }, { "epoch": 1.5499398315282793, "grad_norm": 3.4315037727355957, "learning_rate": 0.00019995724416011998, "loss": 2.3091, "step": 12880 }, { "epoch": 1.5511432009626955, "grad_norm": 6.324875354766846, "learning_rate": 0.0001999571328537757, "loss": 2.1473, "step": 12890 }, { "epoch": 1.5523465703971118, "grad_norm": 4.298543930053711, "learning_rate": 0.0001999570214027688, "loss": 1.9465, "step": 12900 }, { "epoch": 1.5535499398315282, "grad_norm": 6.035646915435791, "learning_rate": 0.00019995690980709942, "loss": 2.0224, "step": 12910 }, { "epoch": 1.5547533092659447, "grad_norm": 5.38960599899292, "learning_rate": 0.00019995679806676767, "loss": 2.0193, "step": 12920 }, { "epoch": 1.555956678700361, "grad_norm": 4.020500659942627, "learning_rate": 0.0001999566861817738, "loss": 1.9763, "step": 12930 }, { "epoch": 1.5571600481347774, "grad_norm": 5.028992176055908, "learning_rate": 0.00019995657415211793, "loss": 2.0487, "step": 12940 }, { "epoch": 1.5583634175691938, "grad_norm": 4.500495910644531, "learning_rate": 0.0001999564619778002, "loss": 1.9695, "step": 12950 }, { "epoch": 1.55956678700361, "grad_norm": 8.437773704528809, "learning_rate": 0.00019995634965882077, "loss": 2.2308, "step": 12960 }, { "epoch": 1.5607701564380265, "grad_norm": 5.600293159484863, "learning_rate": 0.00019995623719517983, "loss": 2.1186, "step": 12970 }, { "epoch": 1.561973525872443, "grad_norm": 4.004968643188477, "learning_rate": 0.00019995612458687757, "loss": 1.9492, "step": 12980 }, { "epoch": 1.5631768953068592, "grad_norm": 6.176262378692627, "learning_rate": 0.0001999560118339141, "loss": 1.9266, "step": 12990 }, { "epoch": 1.5643802647412755, "grad_norm": 6.919417858123779, "learning_rate": 0.00019995589893628962, "loss": 1.9525, "step": 13000 }, { "epoch": 1.565583634175692, "grad_norm": 5.738315105438232, "learning_rate": 0.00019995578589400423, "loss": 1.9941, "step": 13010 }, { "epoch": 1.5667870036101084, "grad_norm": 4.617683410644531, "learning_rate": 0.00019995567270705817, "loss": 1.8318, "step": 13020 }, { "epoch": 1.5679903730445246, "grad_norm": 3.524646043777466, "learning_rate": 0.00019995555937545156, "loss": 2.0668, "step": 13030 }, { "epoch": 1.569193742478941, "grad_norm": 5.225677967071533, "learning_rate": 0.00019995544589918458, "loss": 2.0323, "step": 13040 }, { "epoch": 1.5703971119133575, "grad_norm": 4.304656028747559, "learning_rate": 0.00019995533227825743, "loss": 2.0233, "step": 13050 }, { "epoch": 1.5716004813477737, "grad_norm": 6.721856117248535, "learning_rate": 0.00019995521851267018, "loss": 2.0863, "step": 13060 }, { "epoch": 1.57280385078219, "grad_norm": 5.3623857498168945, "learning_rate": 0.00019995510460242308, "loss": 1.8831, "step": 13070 }, { "epoch": 1.5740072202166067, "grad_norm": 4.196499824523926, "learning_rate": 0.00019995499054751626, "loss": 2.0412, "step": 13080 }, { "epoch": 1.575210589651023, "grad_norm": 4.95134973526001, "learning_rate": 0.0001999548763479499, "loss": 1.7917, "step": 13090 }, { "epoch": 1.5764139590854391, "grad_norm": 4.267271041870117, "learning_rate": 0.00019995476200372413, "loss": 1.9908, "step": 13100 }, { "epoch": 1.5776173285198556, "grad_norm": 5.7748517990112305, "learning_rate": 0.00019995464751483915, "loss": 2.0942, "step": 13110 }, { "epoch": 1.578820697954272, "grad_norm": 5.3401689529418945, "learning_rate": 0.00019995453288129515, "loss": 1.9139, "step": 13120 }, { "epoch": 1.5800240673886883, "grad_norm": 3.759310483932495, "learning_rate": 0.00019995441810309223, "loss": 2.064, "step": 13130 }, { "epoch": 1.5812274368231047, "grad_norm": 5.477263450622559, "learning_rate": 0.00019995430318023059, "loss": 2.0441, "step": 13140 }, { "epoch": 1.5824308062575212, "grad_norm": 4.138892650604248, "learning_rate": 0.0001999541881127104, "loss": 2.0506, "step": 13150 }, { "epoch": 1.5836341756919374, "grad_norm": 5.960071563720703, "learning_rate": 0.00019995407290053185, "loss": 2.2549, "step": 13160 }, { "epoch": 1.5848375451263537, "grad_norm": 5.113036632537842, "learning_rate": 0.00019995395754369503, "loss": 1.9663, "step": 13170 }, { "epoch": 1.5860409145607701, "grad_norm": 4.506590843200684, "learning_rate": 0.0001999538420422002, "loss": 2.0485, "step": 13180 }, { "epoch": 1.5872442839951866, "grad_norm": 5.975399971008301, "learning_rate": 0.00019995372639604745, "loss": 1.9692, "step": 13190 }, { "epoch": 1.5884476534296028, "grad_norm": 5.632997989654541, "learning_rate": 0.000199953610605237, "loss": 1.7883, "step": 13200 }, { "epoch": 1.5896510228640193, "grad_norm": 6.302413463592529, "learning_rate": 0.000199953494669769, "loss": 1.9853, "step": 13210 }, { "epoch": 1.5908543922984357, "grad_norm": 4.656396389007568, "learning_rate": 0.0001999533785896436, "loss": 2.1029, "step": 13220 }, { "epoch": 1.592057761732852, "grad_norm": 3.7483510971069336, "learning_rate": 0.00019995326236486096, "loss": 1.9259, "step": 13230 }, { "epoch": 1.5932611311672682, "grad_norm": 6.738104820251465, "learning_rate": 0.00019995314599542132, "loss": 2.1684, "step": 13240 }, { "epoch": 1.5944645006016849, "grad_norm": 5.493054389953613, "learning_rate": 0.00019995302948132477, "loss": 2.0286, "step": 13250 }, { "epoch": 1.595667870036101, "grad_norm": 6.3818511962890625, "learning_rate": 0.00019995291282257153, "loss": 2.0926, "step": 13260 }, { "epoch": 1.5968712394705173, "grad_norm": 4.904103755950928, "learning_rate": 0.00019995279601916176, "loss": 2.24, "step": 13270 }, { "epoch": 1.5980746089049338, "grad_norm": 3.644404888153076, "learning_rate": 0.00019995267907109556, "loss": 1.9631, "step": 13280 }, { "epoch": 1.5992779783393503, "grad_norm": 5.314022541046143, "learning_rate": 0.0001999525619783732, "loss": 1.8594, "step": 13290 }, { "epoch": 1.6004813477737665, "grad_norm": 4.290368556976318, "learning_rate": 0.0001999524447409948, "loss": 2.0327, "step": 13300 }, { "epoch": 1.601684717208183, "grad_norm": 6.445858478546143, "learning_rate": 0.00019995232735896054, "loss": 1.9905, "step": 13310 }, { "epoch": 1.6028880866425994, "grad_norm": 5.4139275550842285, "learning_rate": 0.00019995220983227055, "loss": 1.8597, "step": 13320 }, { "epoch": 1.6040914560770156, "grad_norm": 4.069512367248535, "learning_rate": 0.00019995209216092506, "loss": 1.9423, "step": 13330 }, { "epoch": 1.6052948255114319, "grad_norm": 5.475891590118408, "learning_rate": 0.0001999519743449242, "loss": 1.9228, "step": 13340 }, { "epoch": 1.6064981949458483, "grad_norm": 4.515195846557617, "learning_rate": 0.00019995185638426818, "loss": 2.0322, "step": 13350 }, { "epoch": 1.6077015643802648, "grad_norm": 7.007673740386963, "learning_rate": 0.00019995173827895714, "loss": 2.0488, "step": 13360 }, { "epoch": 1.608904933814681, "grad_norm": 6.145302772521973, "learning_rate": 0.00019995162002899125, "loss": 2.0093, "step": 13370 }, { "epoch": 1.6101083032490975, "grad_norm": 4.393493175506592, "learning_rate": 0.0001999515016343707, "loss": 1.9531, "step": 13380 }, { "epoch": 1.611311672683514, "grad_norm": 7.322495937347412, "learning_rate": 0.00019995138309509566, "loss": 1.9249, "step": 13390 }, { "epoch": 1.6125150421179302, "grad_norm": 4.038941860198975, "learning_rate": 0.00019995126441116624, "loss": 1.9711, "step": 13400 }, { "epoch": 1.6137184115523464, "grad_norm": 6.373983860015869, "learning_rate": 0.00019995114558258272, "loss": 2.1252, "step": 13410 }, { "epoch": 1.614921780986763, "grad_norm": 5.309967517852783, "learning_rate": 0.0001999510266093452, "loss": 1.9081, "step": 13420 }, { "epoch": 1.6161251504211793, "grad_norm": 3.5125749111175537, "learning_rate": 0.00019995090749145384, "loss": 1.9977, "step": 13430 }, { "epoch": 1.6173285198555956, "grad_norm": 5.910234451293945, "learning_rate": 0.00019995078822890888, "loss": 2.0796, "step": 13440 }, { "epoch": 1.618531889290012, "grad_norm": 4.721339702606201, "learning_rate": 0.0001999506688217104, "loss": 2.0782, "step": 13450 }, { "epoch": 1.6197352587244285, "grad_norm": 6.251277923583984, "learning_rate": 0.0001999505492698587, "loss": 2.1032, "step": 13460 }, { "epoch": 1.6209386281588447, "grad_norm": 5.622414588928223, "learning_rate": 0.0001999504295733538, "loss": 2.0356, "step": 13470 }, { "epoch": 1.6221419975932612, "grad_norm": 3.497246265411377, "learning_rate": 0.000199950309732196, "loss": 2.0234, "step": 13480 }, { "epoch": 1.6233453670276776, "grad_norm": 5.098435878753662, "learning_rate": 0.0001999501897463854, "loss": 1.8807, "step": 13490 }, { "epoch": 1.6245487364620939, "grad_norm": 4.744088649749756, "learning_rate": 0.00019995006961592222, "loss": 2.1631, "step": 13500 }, { "epoch": 1.62575210589651, "grad_norm": 5.996286392211914, "learning_rate": 0.00019994994934080662, "loss": 1.9999, "step": 13510 }, { "epoch": 1.6269554753309265, "grad_norm": 4.346140384674072, "learning_rate": 0.00019994982892103874, "loss": 1.7685, "step": 13520 }, { "epoch": 1.628158844765343, "grad_norm": 3.85640811920166, "learning_rate": 0.00019994970835661877, "loss": 2.0004, "step": 13530 }, { "epoch": 1.6293622141997592, "grad_norm": 5.55426549911499, "learning_rate": 0.0001999495876475469, "loss": 2.1797, "step": 13540 }, { "epoch": 1.6305655836341757, "grad_norm": 4.2621941566467285, "learning_rate": 0.00019994946679382337, "loss": 2.1064, "step": 13550 }, { "epoch": 1.6317689530685922, "grad_norm": 6.668655872344971, "learning_rate": 0.00019994934579544823, "loss": 2.1616, "step": 13560 }, { "epoch": 1.6329723225030084, "grad_norm": 3.979400634765625, "learning_rate": 0.00019994922465242169, "loss": 1.8361, "step": 13570 }, { "epoch": 1.6341756919374246, "grad_norm": 3.248394250869751, "learning_rate": 0.00019994910336474395, "loss": 2.0907, "step": 13580 }, { "epoch": 1.6353790613718413, "grad_norm": 5.4946112632751465, "learning_rate": 0.00019994898193241524, "loss": 2.2238, "step": 13590 }, { "epoch": 1.6365824308062575, "grad_norm": 4.9663405418396, "learning_rate": 0.00019994886035543558, "loss": 2.0246, "step": 13600 }, { "epoch": 1.6377858002406738, "grad_norm": 8.015204429626465, "learning_rate": 0.0001999487386338053, "loss": 1.9332, "step": 13610 }, { "epoch": 1.6389891696750902, "grad_norm": 5.355557441711426, "learning_rate": 0.00019994861676752454, "loss": 1.8407, "step": 13620 }, { "epoch": 1.6401925391095067, "grad_norm": 3.132603883743286, "learning_rate": 0.00019994849475659342, "loss": 2.0967, "step": 13630 }, { "epoch": 1.641395908543923, "grad_norm": 5.896000385284424, "learning_rate": 0.00019994837260101217, "loss": 1.9129, "step": 13640 }, { "epoch": 1.6425992779783394, "grad_norm": 4.218536376953125, "learning_rate": 0.00019994825030078092, "loss": 1.9994, "step": 13650 }, { "epoch": 1.6438026474127558, "grad_norm": 6.520483016967773, "learning_rate": 0.0001999481278558999, "loss": 1.9769, "step": 13660 }, { "epoch": 1.645006016847172, "grad_norm": 5.227589130401611, "learning_rate": 0.00019994800526636924, "loss": 2.0373, "step": 13670 }, { "epoch": 1.6462093862815883, "grad_norm": 3.4769139289855957, "learning_rate": 0.00019994788253218916, "loss": 2.1869, "step": 13680 }, { "epoch": 1.6474127557160048, "grad_norm": 5.577840805053711, "learning_rate": 0.00019994775965335984, "loss": 1.9615, "step": 13690 }, { "epoch": 1.6486161251504212, "grad_norm": 4.6957855224609375, "learning_rate": 0.0001999476366298814, "loss": 2.0406, "step": 13700 }, { "epoch": 1.6498194945848375, "grad_norm": 6.101114273071289, "learning_rate": 0.00019994751346175406, "loss": 2.021, "step": 13710 }, { "epoch": 1.651022864019254, "grad_norm": 4.463509559631348, "learning_rate": 0.000199947390148978, "loss": 2.0715, "step": 13720 }, { "epoch": 1.6522262334536704, "grad_norm": 3.30945086479187, "learning_rate": 0.0001999472666915534, "loss": 2.0682, "step": 13730 }, { "epoch": 1.6534296028880866, "grad_norm": 5.090394020080566, "learning_rate": 0.00019994714308948038, "loss": 2.0736, "step": 13740 }, { "epoch": 1.654632972322503, "grad_norm": 4.4032883644104, "learning_rate": 0.00019994701934275923, "loss": 2.0155, "step": 13750 }, { "epoch": 1.6558363417569195, "grad_norm": 7.14220666885376, "learning_rate": 0.00019994689545139004, "loss": 2.0574, "step": 13760 }, { "epoch": 1.6570397111913358, "grad_norm": 4.546938896179199, "learning_rate": 0.000199946771415373, "loss": 1.8947, "step": 13770 }, { "epoch": 1.658243080625752, "grad_norm": 3.6042399406433105, "learning_rate": 0.00019994664723470832, "loss": 2.0706, "step": 13780 }, { "epoch": 1.6594464500601684, "grad_norm": 6.472089767456055, "learning_rate": 0.00019994652290939618, "loss": 1.9499, "step": 13790 }, { "epoch": 1.660649819494585, "grad_norm": 4.142209053039551, "learning_rate": 0.00019994639843943672, "loss": 1.9042, "step": 13800 }, { "epoch": 1.6618531889290011, "grad_norm": 7.9362688064575195, "learning_rate": 0.00019994627382483016, "loss": 2.3313, "step": 13810 }, { "epoch": 1.6630565583634176, "grad_norm": 4.758325576782227, "learning_rate": 0.00019994614906557664, "loss": 1.9777, "step": 13820 }, { "epoch": 1.664259927797834, "grad_norm": 3.6581249237060547, "learning_rate": 0.0001999460241616764, "loss": 2.0724, "step": 13830 }, { "epoch": 1.6654632972322503, "grad_norm": 5.850959300994873, "learning_rate": 0.0001999458991131296, "loss": 2.0206, "step": 13840 }, { "epoch": 1.6666666666666665, "grad_norm": 4.039100170135498, "learning_rate": 0.00019994577391993637, "loss": 2.0497, "step": 13850 }, { "epoch": 1.6678700361010832, "grad_norm": 7.437899112701416, "learning_rate": 0.00019994564858209695, "loss": 2.1651, "step": 13860 }, { "epoch": 1.6690734055354994, "grad_norm": 5.1110358238220215, "learning_rate": 0.0001999455230996115, "loss": 1.8606, "step": 13870 }, { "epoch": 1.6702767749699157, "grad_norm": 4.0577192306518555, "learning_rate": 0.00019994539747248018, "loss": 1.9556, "step": 13880 }, { "epoch": 1.6714801444043321, "grad_norm": 6.085712432861328, "learning_rate": 0.00019994527170070323, "loss": 1.8232, "step": 13890 }, { "epoch": 1.6726835138387486, "grad_norm": 4.046009540557861, "learning_rate": 0.00019994514578428076, "loss": 1.9657, "step": 13900 }, { "epoch": 1.6738868832731648, "grad_norm": 6.810433864593506, "learning_rate": 0.000199945019723213, "loss": 1.9914, "step": 13910 }, { "epoch": 1.6750902527075813, "grad_norm": 4.578562259674072, "learning_rate": 0.00019994489351750014, "loss": 2.0339, "step": 13920 }, { "epoch": 1.6762936221419977, "grad_norm": 3.8101818561553955, "learning_rate": 0.00019994476716714233, "loss": 2.0032, "step": 13930 }, { "epoch": 1.677496991576414, "grad_norm": 6.676901817321777, "learning_rate": 0.0001999446406721398, "loss": 2.0849, "step": 13940 }, { "epoch": 1.6787003610108302, "grad_norm": 4.303239822387695, "learning_rate": 0.00019994451403249265, "loss": 2.0743, "step": 13950 }, { "epoch": 1.6799037304452467, "grad_norm": 7.639218330383301, "learning_rate": 0.0001999443872482011, "loss": 2.1194, "step": 13960 }, { "epoch": 1.6811070998796631, "grad_norm": 4.64432954788208, "learning_rate": 0.00019994426031926538, "loss": 1.8633, "step": 13970 }, { "epoch": 1.6823104693140793, "grad_norm": 3.69960618019104, "learning_rate": 0.00019994413324568564, "loss": 2.2756, "step": 13980 }, { "epoch": 1.6835138387484958, "grad_norm": 5.044643878936768, "learning_rate": 0.00019994400602746205, "loss": 1.7677, "step": 13990 }, { "epoch": 1.6847172081829123, "grad_norm": 4.38433313369751, "learning_rate": 0.00019994387866459484, "loss": 1.9455, "step": 14000 }, { "epoch": 1.6859205776173285, "grad_norm": 10.714807510375977, "learning_rate": 0.00019994375115708413, "loss": 2.2971, "step": 14010 }, { "epoch": 1.6871239470517447, "grad_norm": 6.168661117553711, "learning_rate": 0.00019994362350493015, "loss": 2.0647, "step": 14020 }, { "epoch": 1.6883273164861614, "grad_norm": 3.642040729522705, "learning_rate": 0.00019994349570813308, "loss": 2.1859, "step": 14030 }, { "epoch": 1.6895306859205776, "grad_norm": 7.047633171081543, "learning_rate": 0.00019994336776669306, "loss": 2.0542, "step": 14040 }, { "epoch": 1.6907340553549939, "grad_norm": 3.594160318374634, "learning_rate": 0.00019994323968061033, "loss": 2.0876, "step": 14050 }, { "epoch": 1.6919374247894103, "grad_norm": 5.530572414398193, "learning_rate": 0.00019994311144988508, "loss": 2.0243, "step": 14060 }, { "epoch": 1.6931407942238268, "grad_norm": 5.095473289489746, "learning_rate": 0.00019994298307451745, "loss": 1.9181, "step": 14070 }, { "epoch": 1.694344163658243, "grad_norm": 4.656373500823975, "learning_rate": 0.00019994285455450763, "loss": 2.2075, "step": 14080 }, { "epoch": 1.6955475330926595, "grad_norm": 5.468774795532227, "learning_rate": 0.00019994272588985586, "loss": 2.036, "step": 14090 }, { "epoch": 1.696750902527076, "grad_norm": 4.336451530456543, "learning_rate": 0.00019994259708056226, "loss": 1.9637, "step": 14100 }, { "epoch": 1.6979542719614922, "grad_norm": 6.688697338104248, "learning_rate": 0.00019994246812662708, "loss": 1.9577, "step": 14110 }, { "epoch": 1.6991576413959084, "grad_norm": 4.899430274963379, "learning_rate": 0.00019994233902805047, "loss": 2.029, "step": 14120 }, { "epoch": 1.7003610108303249, "grad_norm": 4.238423824310303, "learning_rate": 0.0001999422097848326, "loss": 2.1059, "step": 14130 }, { "epoch": 1.7015643802647413, "grad_norm": 5.7632527351379395, "learning_rate": 0.00019994208039697368, "loss": 1.8448, "step": 14140 }, { "epoch": 1.7027677496991576, "grad_norm": 4.8242950439453125, "learning_rate": 0.00019994195086447389, "loss": 1.9235, "step": 14150 }, { "epoch": 1.703971119133574, "grad_norm": 6.783804416656494, "learning_rate": 0.00019994182118733344, "loss": 1.9539, "step": 14160 }, { "epoch": 1.7051744885679905, "grad_norm": 4.87070369720459, "learning_rate": 0.0001999416913655525, "loss": 2.0481, "step": 14170 }, { "epoch": 1.7063778580024067, "grad_norm": 4.1888041496276855, "learning_rate": 0.00019994156139913126, "loss": 2.0329, "step": 14180 }, { "epoch": 1.707581227436823, "grad_norm": 5.123957633972168, "learning_rate": 0.0001999414312880699, "loss": 1.882, "step": 14190 }, { "epoch": 1.7087845968712396, "grad_norm": 4.196894645690918, "learning_rate": 0.0001999413010323686, "loss": 2.0405, "step": 14200 }, { "epoch": 1.7099879663056559, "grad_norm": 5.899720668792725, "learning_rate": 0.00019994117063202758, "loss": 2.1224, "step": 14210 }, { "epoch": 1.711191335740072, "grad_norm": 4.157967567443848, "learning_rate": 0.000199941040087047, "loss": 1.7508, "step": 14220 }, { "epoch": 1.7123947051744886, "grad_norm": 3.6508965492248535, "learning_rate": 0.00019994090939742706, "loss": 1.9189, "step": 14230 }, { "epoch": 1.713598074608905, "grad_norm": 6.262744903564453, "learning_rate": 0.00019994077856316794, "loss": 1.8024, "step": 14240 }, { "epoch": 1.7148014440433212, "grad_norm": 4.435568809509277, "learning_rate": 0.00019994064758426986, "loss": 2.1275, "step": 14250 }, { "epoch": 1.7160048134777377, "grad_norm": 6.094752788543701, "learning_rate": 0.000199940516460733, "loss": 1.8911, "step": 14260 }, { "epoch": 1.7172081829121542, "grad_norm": 5.009769916534424, "learning_rate": 0.00019994038519255752, "loss": 1.9552, "step": 14270 }, { "epoch": 1.7184115523465704, "grad_norm": 3.6759421825408936, "learning_rate": 0.00019994025377974363, "loss": 2.107, "step": 14280 }, { "epoch": 1.7196149217809866, "grad_norm": 6.70467472076416, "learning_rate": 0.0001999401222222915, "loss": 1.9142, "step": 14290 }, { "epoch": 1.720818291215403, "grad_norm": 5.994391918182373, "learning_rate": 0.0001999399905202014, "loss": 2.1916, "step": 14300 }, { "epoch": 1.7220216606498195, "grad_norm": 6.835086822509766, "learning_rate": 0.00019993985867347341, "loss": 1.7309, "step": 14310 }, { "epoch": 1.7232250300842358, "grad_norm": 4.927305698394775, "learning_rate": 0.00019993972668210774, "loss": 2.0401, "step": 14320 }, { "epoch": 1.7244283995186522, "grad_norm": 4.515219688415527, "learning_rate": 0.00019993959454610467, "loss": 2.1645, "step": 14330 }, { "epoch": 1.7256317689530687, "grad_norm": 5.741243362426758, "learning_rate": 0.0001999394622654643, "loss": 2.144, "step": 14340 }, { "epoch": 1.726835138387485, "grad_norm": 4.268014907836914, "learning_rate": 0.00019993932984018687, "loss": 2.1394, "step": 14350 }, { "epoch": 1.7280385078219012, "grad_norm": 7.619650840759277, "learning_rate": 0.00019993919727027252, "loss": 1.7946, "step": 14360 }, { "epoch": 1.7292418772563178, "grad_norm": 4.427943229675293, "learning_rate": 0.00019993906455572152, "loss": 2.0447, "step": 14370 }, { "epoch": 1.730445246690734, "grad_norm": 3.5642616748809814, "learning_rate": 0.000199938931696534, "loss": 2.0119, "step": 14380 }, { "epoch": 1.7316486161251503, "grad_norm": 5.555940628051758, "learning_rate": 0.0001999387986927102, "loss": 2.1435, "step": 14390 }, { "epoch": 1.7328519855595668, "grad_norm": 4.9382147789001465, "learning_rate": 0.00019993866554425025, "loss": 1.9292, "step": 14400 }, { "epoch": 1.7340553549939832, "grad_norm": 6.797666072845459, "learning_rate": 0.0001999385322511544, "loss": 1.8982, "step": 14410 }, { "epoch": 1.7352587244283995, "grad_norm": 5.267373561859131, "learning_rate": 0.00019993839881342278, "loss": 1.9265, "step": 14420 }, { "epoch": 1.736462093862816, "grad_norm": 4.399893760681152, "learning_rate": 0.00019993826523105566, "loss": 1.9926, "step": 14430 }, { "epoch": 1.7376654632972324, "grad_norm": 4.862719535827637, "learning_rate": 0.00019993813150405318, "loss": 2.0627, "step": 14440 }, { "epoch": 1.7388688327316486, "grad_norm": 5.0265278816223145, "learning_rate": 0.00019993799763241555, "loss": 1.987, "step": 14450 }, { "epoch": 1.7400722021660648, "grad_norm": 6.209497451782227, "learning_rate": 0.000199937863616143, "loss": 1.945, "step": 14460 }, { "epoch": 1.7412755716004813, "grad_norm": 4.86743688583374, "learning_rate": 0.00019993772945523563, "loss": 1.8645, "step": 14470 }, { "epoch": 1.7424789410348978, "grad_norm": 4.553318977355957, "learning_rate": 0.00019993759514969374, "loss": 1.8207, "step": 14480 }, { "epoch": 1.743682310469314, "grad_norm": 5.645698070526123, "learning_rate": 0.00019993746069951746, "loss": 2.0576, "step": 14490 }, { "epoch": 1.7448856799037304, "grad_norm": 4.6090617179870605, "learning_rate": 0.00019993732610470698, "loss": 1.9278, "step": 14500 }, { "epoch": 1.746089049338147, "grad_norm": 7.098291397094727, "learning_rate": 0.00019993719136526252, "loss": 2.002, "step": 14510 }, { "epoch": 1.7472924187725631, "grad_norm": 4.5874409675598145, "learning_rate": 0.0001999370564811843, "loss": 2.0125, "step": 14520 }, { "epoch": 1.7484957882069796, "grad_norm": 4.147941589355469, "learning_rate": 0.00019993692145247243, "loss": 1.9628, "step": 14530 }, { "epoch": 1.749699157641396, "grad_norm": 5.5968170166015625, "learning_rate": 0.0001999367862791272, "loss": 1.7204, "step": 14540 }, { "epoch": 1.7509025270758123, "grad_norm": 5.026246547698975, "learning_rate": 0.00019993665096114879, "loss": 1.7494, "step": 14550 }, { "epoch": 1.7521058965102285, "grad_norm": 7.101372241973877, "learning_rate": 0.00019993651549853736, "loss": 2.1529, "step": 14560 }, { "epoch": 1.753309265944645, "grad_norm": 4.936278820037842, "learning_rate": 0.0001999363798912931, "loss": 1.8869, "step": 14570 }, { "epoch": 1.7545126353790614, "grad_norm": 3.160592555999756, "learning_rate": 0.00019993624413941623, "loss": 2.0949, "step": 14580 }, { "epoch": 1.7557160048134777, "grad_norm": 5.4424285888671875, "learning_rate": 0.00019993610824290694, "loss": 1.7147, "step": 14590 }, { "epoch": 1.7569193742478941, "grad_norm": 3.808987855911255, "learning_rate": 0.00019993597220176542, "loss": 2.1881, "step": 14600 }, { "epoch": 1.7581227436823106, "grad_norm": 5.4764018058776855, "learning_rate": 0.00019993583601599188, "loss": 2.0252, "step": 14610 }, { "epoch": 1.7593261131167268, "grad_norm": 4.963686466217041, "learning_rate": 0.00019993569968558653, "loss": 2.238, "step": 14620 }, { "epoch": 1.760529482551143, "grad_norm": 3.916469097137451, "learning_rate": 0.00019993556321054953, "loss": 1.908, "step": 14630 }, { "epoch": 1.7617328519855595, "grad_norm": 5.4917144775390625, "learning_rate": 0.00019993542659088113, "loss": 1.879, "step": 14640 }, { "epoch": 1.762936221419976, "grad_norm": 3.6770243644714355, "learning_rate": 0.00019993528982658145, "loss": 2.0372, "step": 14650 }, { "epoch": 1.7641395908543922, "grad_norm": 6.837653636932373, "learning_rate": 0.00019993515291765075, "loss": 2.0467, "step": 14660 }, { "epoch": 1.7653429602888087, "grad_norm": 5.158958435058594, "learning_rate": 0.0001999350158640892, "loss": 1.8737, "step": 14670 }, { "epoch": 1.7665463297232251, "grad_norm": 3.207634210586548, "learning_rate": 0.00019993487866589704, "loss": 1.9669, "step": 14680 }, { "epoch": 1.7677496991576414, "grad_norm": 4.8469696044921875, "learning_rate": 0.0001999347413230744, "loss": 1.8349, "step": 14690 }, { "epoch": 1.7689530685920578, "grad_norm": 4.001832008361816, "learning_rate": 0.00019993460383562153, "loss": 1.9146, "step": 14700 }, { "epoch": 1.7701564380264743, "grad_norm": 8.350502014160156, "learning_rate": 0.0001999344662035386, "loss": 2.0797, "step": 14710 }, { "epoch": 1.7713598074608905, "grad_norm": 4.965343475341797, "learning_rate": 0.00019993432842682586, "loss": 1.9329, "step": 14720 }, { "epoch": 1.7725631768953067, "grad_norm": 3.8536767959594727, "learning_rate": 0.00019993419050548346, "loss": 1.9399, "step": 14730 }, { "epoch": 1.7737665463297232, "grad_norm": 5.0429368019104, "learning_rate": 0.0001999340524395116, "loss": 2.0099, "step": 14740 }, { "epoch": 1.7749699157641396, "grad_norm": 3.9784657955169678, "learning_rate": 0.0001999339142289105, "loss": 2.0581, "step": 14750 }, { "epoch": 1.7761732851985559, "grad_norm": 7.18906831741333, "learning_rate": 0.0001999337758736804, "loss": 1.9906, "step": 14760 }, { "epoch": 1.7773766546329723, "grad_norm": 4.139045715332031, "learning_rate": 0.00019993363737382138, "loss": 1.989, "step": 14770 }, { "epoch": 1.7785800240673888, "grad_norm": 3.125943422317505, "learning_rate": 0.00019993349872933375, "loss": 2.062, "step": 14780 }, { "epoch": 1.779783393501805, "grad_norm": 6.117612838745117, "learning_rate": 0.00019993335994021764, "loss": 1.8821, "step": 14790 }, { "epoch": 1.7809867629362213, "grad_norm": 4.846273899078369, "learning_rate": 0.00019993322100647333, "loss": 1.8729, "step": 14800 }, { "epoch": 1.782190132370638, "grad_norm": 6.4829583168029785, "learning_rate": 0.00019993308192810095, "loss": 1.9971, "step": 14810 }, { "epoch": 1.7833935018050542, "grad_norm": 4.51899528503418, "learning_rate": 0.00019993294270510075, "loss": 2.0308, "step": 14820 }, { "epoch": 1.7845968712394704, "grad_norm": 3.4743294715881348, "learning_rate": 0.0001999328033374729, "loss": 2.0067, "step": 14830 }, { "epoch": 1.7858002406738869, "grad_norm": 6.282670021057129, "learning_rate": 0.0001999326638252176, "loss": 1.9238, "step": 14840 }, { "epoch": 1.7870036101083033, "grad_norm": 4.519623279571533, "learning_rate": 0.0001999325241683351, "loss": 1.9772, "step": 14850 }, { "epoch": 1.7882069795427196, "grad_norm": 6.4352216720581055, "learning_rate": 0.0001999323843668255, "loss": 2.0207, "step": 14860 }, { "epoch": 1.789410348977136, "grad_norm": 4.4146599769592285, "learning_rate": 0.00019993224442068912, "loss": 1.8675, "step": 14870 }, { "epoch": 1.7906137184115525, "grad_norm": 3.8463425636291504, "learning_rate": 0.0001999321043299261, "loss": 2.0264, "step": 14880 }, { "epoch": 1.7918170878459687, "grad_norm": 5.0575175285339355, "learning_rate": 0.00019993196409453663, "loss": 2.2029, "step": 14890 }, { "epoch": 1.793020457280385, "grad_norm": 4.583847522735596, "learning_rate": 0.00019993182371452094, "loss": 2.0102, "step": 14900 }, { "epoch": 1.7942238267148014, "grad_norm": 6.8686418533325195, "learning_rate": 0.00019993168318987924, "loss": 1.9933, "step": 14910 }, { "epoch": 1.7954271961492179, "grad_norm": 5.599619388580322, "learning_rate": 0.00019993154252061172, "loss": 2.1391, "step": 14920 }, { "epoch": 1.796630565583634, "grad_norm": 3.494934320449829, "learning_rate": 0.0001999314017067186, "loss": 1.9257, "step": 14930 }, { "epoch": 1.7978339350180506, "grad_norm": 5.740988254547119, "learning_rate": 0.00019993126074820006, "loss": 2.1163, "step": 14940 }, { "epoch": 1.799037304452467, "grad_norm": 4.562415599822998, "learning_rate": 0.00019993111964505632, "loss": 1.9872, "step": 14950 }, { "epoch": 1.8002406738868832, "grad_norm": 5.620573043823242, "learning_rate": 0.00019993097839728755, "loss": 2.0738, "step": 14960 }, { "epoch": 1.8014440433212995, "grad_norm": 5.37310266494751, "learning_rate": 0.00019993083700489402, "loss": 2.1425, "step": 14970 }, { "epoch": 1.8026474127557162, "grad_norm": 3.5825746059417725, "learning_rate": 0.00019993069546787585, "loss": 1.9019, "step": 14980 }, { "epoch": 1.8038507821901324, "grad_norm": 5.194967746734619, "learning_rate": 0.00019993055378623333, "loss": 1.8986, "step": 14990 }, { "epoch": 1.8050541516245486, "grad_norm": 4.382756233215332, "learning_rate": 0.00019993041195996662, "loss": 1.8099, "step": 15000 }, { "epoch": 1.806257521058965, "grad_norm": 6.057338237762451, "learning_rate": 0.00019993026998907593, "loss": 2.2388, "step": 15010 }, { "epoch": 1.8074608904933815, "grad_norm": 5.524228572845459, "learning_rate": 0.00019993012787356148, "loss": 1.9931, "step": 15020 }, { "epoch": 1.8086642599277978, "grad_norm": 3.8152716159820557, "learning_rate": 0.00019992998561342343, "loss": 1.8704, "step": 15030 }, { "epoch": 1.8098676293622142, "grad_norm": 6.073026180267334, "learning_rate": 0.00019992984320866205, "loss": 1.9418, "step": 15040 }, { "epoch": 1.8110709987966307, "grad_norm": 4.161746025085449, "learning_rate": 0.00019992970065927748, "loss": 1.9705, "step": 15050 }, { "epoch": 1.812274368231047, "grad_norm": 7.0932440757751465, "learning_rate": 0.00019992955796526996, "loss": 1.9387, "step": 15060 }, { "epoch": 1.8134777376654632, "grad_norm": 5.212820053100586, "learning_rate": 0.00019992941512663974, "loss": 1.8921, "step": 15070 }, { "epoch": 1.8146811070998796, "grad_norm": 3.6544785499572754, "learning_rate": 0.00019992927214338696, "loss": 2.0141, "step": 15080 }, { "epoch": 1.815884476534296, "grad_norm": 5.903501987457275, "learning_rate": 0.00019992912901551187, "loss": 2.358, "step": 15090 }, { "epoch": 1.8170878459687123, "grad_norm": 3.894439220428467, "learning_rate": 0.00019992898574301465, "loss": 2.089, "step": 15100 }, { "epoch": 1.8182912154031288, "grad_norm": 6.723239898681641, "learning_rate": 0.00019992884232589552, "loss": 1.8207, "step": 15110 }, { "epoch": 1.8194945848375452, "grad_norm": 4.931623935699463, "learning_rate": 0.00019992869876415465, "loss": 1.7152, "step": 15120 }, { "epoch": 1.8206979542719615, "grad_norm": 3.3028016090393066, "learning_rate": 0.0001999285550577923, "loss": 1.9668, "step": 15130 }, { "epoch": 1.8219013237063777, "grad_norm": 5.859135150909424, "learning_rate": 0.00019992841120680866, "loss": 1.9609, "step": 15140 }, { "epoch": 1.8231046931407944, "grad_norm": 3.4727377891540527, "learning_rate": 0.00019992826721120394, "loss": 1.8742, "step": 15150 }, { "epoch": 1.8243080625752106, "grad_norm": 6.720226764678955, "learning_rate": 0.00019992812307097836, "loss": 1.9918, "step": 15160 }, { "epoch": 1.8255114320096268, "grad_norm": 3.607022762298584, "learning_rate": 0.0001999279787861321, "loss": 1.953, "step": 15170 }, { "epoch": 1.8267148014440433, "grad_norm": 3.5477395057678223, "learning_rate": 0.00019992783435666538, "loss": 1.9944, "step": 15180 }, { "epoch": 1.8279181708784598, "grad_norm": 5.895172119140625, "learning_rate": 0.00019992768978257844, "loss": 2.0263, "step": 15190 }, { "epoch": 1.829121540312876, "grad_norm": 3.352827310562134, "learning_rate": 0.00019992754506387142, "loss": 1.8496, "step": 15200 }, { "epoch": 1.8303249097472925, "grad_norm": 7.611035346984863, "learning_rate": 0.00019992740020054458, "loss": 1.9174, "step": 15210 }, { "epoch": 1.831528279181709, "grad_norm": 5.316261291503906, "learning_rate": 0.00019992725519259814, "loss": 1.8641, "step": 15220 }, { "epoch": 1.8327316486161251, "grad_norm": 4.250819206237793, "learning_rate": 0.00019992711004003231, "loss": 2.0508, "step": 15230 }, { "epoch": 1.8339350180505414, "grad_norm": 6.519930839538574, "learning_rate": 0.00019992696474284722, "loss": 1.91, "step": 15240 }, { "epoch": 1.8351383874849578, "grad_norm": 4.351858615875244, "learning_rate": 0.0001999268193010432, "loss": 2.0671, "step": 15250 }, { "epoch": 1.8363417569193743, "grad_norm": 6.781875133514404, "learning_rate": 0.00019992667371462037, "loss": 2.0352, "step": 15260 }, { "epoch": 1.8375451263537905, "grad_norm": 4.305140972137451, "learning_rate": 0.00019992652798357896, "loss": 2.0264, "step": 15270 }, { "epoch": 1.838748495788207, "grad_norm": 3.690814733505249, "learning_rate": 0.00019992638210791924, "loss": 2.0293, "step": 15280 }, { "epoch": 1.8399518652226234, "grad_norm": 6.05497407913208, "learning_rate": 0.0001999262360876413, "loss": 2.1248, "step": 15290 }, { "epoch": 1.8411552346570397, "grad_norm": 4.777842998504639, "learning_rate": 0.00019992608992274548, "loss": 2.2184, "step": 15300 }, { "epoch": 1.8423586040914561, "grad_norm": 6.349704742431641, "learning_rate": 0.00019992594361323193, "loss": 1.9533, "step": 15310 }, { "epoch": 1.8435619735258726, "grad_norm": 4.9056878089904785, "learning_rate": 0.00019992579715910088, "loss": 2.0726, "step": 15320 }, { "epoch": 1.8447653429602888, "grad_norm": 2.93888521194458, "learning_rate": 0.00019992565056035252, "loss": 1.8859, "step": 15330 }, { "epoch": 1.845968712394705, "grad_norm": 7.04545259475708, "learning_rate": 0.00019992550381698707, "loss": 1.9061, "step": 15340 }, { "epoch": 1.8471720818291215, "grad_norm": 4.191628932952881, "learning_rate": 0.0001999253569290047, "loss": 2.1132, "step": 15350 }, { "epoch": 1.848375451263538, "grad_norm": 6.260605812072754, "learning_rate": 0.00019992520989640572, "loss": 1.9419, "step": 15360 }, { "epoch": 1.8495788206979542, "grad_norm": 4.712516784667969, "learning_rate": 0.0001999250627191903, "loss": 2.1588, "step": 15370 }, { "epoch": 1.8507821901323707, "grad_norm": 4.36837100982666, "learning_rate": 0.00019992491539735861, "loss": 2.0079, "step": 15380 }, { "epoch": 1.8519855595667871, "grad_norm": 5.028352737426758, "learning_rate": 0.00019992476793091092, "loss": 1.8677, "step": 15390 }, { "epoch": 1.8531889290012034, "grad_norm": 3.766176223754883, "learning_rate": 0.00019992462031984741, "loss": 1.927, "step": 15400 }, { "epoch": 1.8543922984356196, "grad_norm": 7.136031627655029, "learning_rate": 0.00019992447256416833, "loss": 2.0741, "step": 15410 }, { "epoch": 1.855595667870036, "grad_norm": 4.481396198272705, "learning_rate": 0.00019992432466387383, "loss": 1.9438, "step": 15420 }, { "epoch": 1.8567990373044525, "grad_norm": 3.7558610439300537, "learning_rate": 0.00019992417661896417, "loss": 2.1443, "step": 15430 }, { "epoch": 1.8580024067388687, "grad_norm": 5.567291736602783, "learning_rate": 0.00019992402842943955, "loss": 2.1461, "step": 15440 }, { "epoch": 1.8592057761732852, "grad_norm": 4.288766384124756, "learning_rate": 0.00019992388009530023, "loss": 2.009, "step": 15450 }, { "epoch": 1.8604091456077017, "grad_norm": 6.941226482391357, "learning_rate": 0.00019992373161654633, "loss": 2.1498, "step": 15460 }, { "epoch": 1.8616125150421179, "grad_norm": 5.285544395446777, "learning_rate": 0.00019992358299317816, "loss": 1.892, "step": 15470 }, { "epoch": 1.8628158844765343, "grad_norm": 3.7738306522369385, "learning_rate": 0.00019992343422519588, "loss": 2.0769, "step": 15480 }, { "epoch": 1.8640192539109508, "grad_norm": 6.734550476074219, "learning_rate": 0.0001999232853125997, "loss": 1.9419, "step": 15490 }, { "epoch": 1.865222623345367, "grad_norm": 4.281076431274414, "learning_rate": 0.0001999231362553899, "loss": 2.044, "step": 15500 }, { "epoch": 1.8664259927797833, "grad_norm": 6.944756507873535, "learning_rate": 0.00019992298705356665, "loss": 2.109, "step": 15510 }, { "epoch": 1.8676293622141997, "grad_norm": 4.5112810134887695, "learning_rate": 0.00019992283770713015, "loss": 1.9144, "step": 15520 }, { "epoch": 1.8688327316486162, "grad_norm": 3.437556505203247, "learning_rate": 0.0001999226882160806, "loss": 1.9841, "step": 15530 }, { "epoch": 1.8700361010830324, "grad_norm": 4.6591105461120605, "learning_rate": 0.0001999225385804183, "loss": 2.0429, "step": 15540 }, { "epoch": 1.8712394705174489, "grad_norm": 4.675044536590576, "learning_rate": 0.0001999223888001434, "loss": 2.2725, "step": 15550 }, { "epoch": 1.8724428399518653, "grad_norm": 6.455400466918945, "learning_rate": 0.0001999222388752561, "loss": 2.1985, "step": 15560 }, { "epoch": 1.8736462093862816, "grad_norm": 5.524036884307861, "learning_rate": 0.0001999220888057567, "loss": 1.6719, "step": 15570 }, { "epoch": 1.8748495788206978, "grad_norm": 3.953951597213745, "learning_rate": 0.00019992193859164534, "loss": 1.8878, "step": 15580 }, { "epoch": 1.8760529482551145, "grad_norm": 5.688681125640869, "learning_rate": 0.00019992178823292228, "loss": 1.8412, "step": 15590 }, { "epoch": 1.8772563176895307, "grad_norm": 3.426849126815796, "learning_rate": 0.0001999216377295877, "loss": 1.8212, "step": 15600 }, { "epoch": 1.878459687123947, "grad_norm": 6.213525295257568, "learning_rate": 0.00019992148708164188, "loss": 2.0282, "step": 15610 }, { "epoch": 1.8796630565583634, "grad_norm": 4.974131107330322, "learning_rate": 0.00019992133628908497, "loss": 1.7227, "step": 15620 }, { "epoch": 1.8808664259927799, "grad_norm": 4.237664699554443, "learning_rate": 0.0001999211853519172, "loss": 1.8608, "step": 15630 }, { "epoch": 1.882069795427196, "grad_norm": 5.078277111053467, "learning_rate": 0.00019992103427013885, "loss": 2.0934, "step": 15640 }, { "epoch": 1.8832731648616126, "grad_norm": 4.74167013168335, "learning_rate": 0.00019992088304375004, "loss": 1.9874, "step": 15650 }, { "epoch": 1.884476534296029, "grad_norm": 6.7923126220703125, "learning_rate": 0.00019992073167275108, "loss": 1.9907, "step": 15660 }, { "epoch": 1.8856799037304453, "grad_norm": 4.451918125152588, "learning_rate": 0.00019992058015714216, "loss": 1.9799, "step": 15670 }, { "epoch": 1.8868832731648615, "grad_norm": 3.4242498874664307, "learning_rate": 0.00019992042849692346, "loss": 1.8887, "step": 15680 }, { "epoch": 1.888086642599278, "grad_norm": 5.157835006713867, "learning_rate": 0.00019992027669209526, "loss": 1.9297, "step": 15690 }, { "epoch": 1.8892900120336944, "grad_norm": 5.290318012237549, "learning_rate": 0.00019992012474265774, "loss": 2.0429, "step": 15700 }, { "epoch": 1.8904933814681106, "grad_norm": 7.171584606170654, "learning_rate": 0.00019991997264861113, "loss": 2.1705, "step": 15710 }, { "epoch": 1.891696750902527, "grad_norm": 4.1063714027404785, "learning_rate": 0.00019991982040995564, "loss": 1.8669, "step": 15720 }, { "epoch": 1.8929001203369435, "grad_norm": 3.4416663646698, "learning_rate": 0.0001999196680266915, "loss": 1.9693, "step": 15730 }, { "epoch": 1.8941034897713598, "grad_norm": 6.243912220001221, "learning_rate": 0.00019991951549881893, "loss": 1.9294, "step": 15740 }, { "epoch": 1.895306859205776, "grad_norm": 5.662498950958252, "learning_rate": 0.00019991936282633818, "loss": 1.7883, "step": 15750 }, { "epoch": 1.8965102286401927, "grad_norm": 5.44572639465332, "learning_rate": 0.00019991921000924942, "loss": 2.029, "step": 15760 }, { "epoch": 1.897713598074609, "grad_norm": 6.009549617767334, "learning_rate": 0.0001999190570475529, "loss": 1.8292, "step": 15770 }, { "epoch": 1.8989169675090252, "grad_norm": 3.629751205444336, "learning_rate": 0.00019991890394124883, "loss": 1.965, "step": 15780 }, { "epoch": 1.9001203369434416, "grad_norm": 5.768898010253906, "learning_rate": 0.00019991875069033744, "loss": 1.8766, "step": 15790 }, { "epoch": 1.901323706377858, "grad_norm": 4.1167120933532715, "learning_rate": 0.00019991859729481893, "loss": 2.1118, "step": 15800 }, { "epoch": 1.9025270758122743, "grad_norm": 7.279045581817627, "learning_rate": 0.00019991844375469357, "loss": 1.837, "step": 15810 }, { "epoch": 1.9037304452466908, "grad_norm": 5.072184085845947, "learning_rate": 0.00019991829006996153, "loss": 1.9979, "step": 15820 }, { "epoch": 1.9049338146811072, "grad_norm": 4.018231391906738, "learning_rate": 0.00019991813624062305, "loss": 1.8404, "step": 15830 }, { "epoch": 1.9061371841155235, "grad_norm": 6.246421813964844, "learning_rate": 0.0001999179822666784, "loss": 2.1459, "step": 15840 }, { "epoch": 1.9073405535499397, "grad_norm": 5.623615741729736, "learning_rate": 0.00019991782814812771, "loss": 1.92, "step": 15850 }, { "epoch": 1.9085439229843562, "grad_norm": 6.654271602630615, "learning_rate": 0.0001999176738849713, "loss": 2.153, "step": 15860 }, { "epoch": 1.9097472924187726, "grad_norm": 4.772064685821533, "learning_rate": 0.00019991751947720933, "loss": 1.8769, "step": 15870 }, { "epoch": 1.9109506618531888, "grad_norm": 3.7780356407165527, "learning_rate": 0.00019991736492484203, "loss": 1.9606, "step": 15880 }, { "epoch": 1.9121540312876053, "grad_norm": 5.867314338684082, "learning_rate": 0.00019991721022786965, "loss": 2.1057, "step": 15890 }, { "epoch": 1.9133574007220218, "grad_norm": 3.425398111343384, "learning_rate": 0.00019991705538629237, "loss": 2.0502, "step": 15900 }, { "epoch": 1.914560770156438, "grad_norm": 6.75115442276001, "learning_rate": 0.00019991690040011047, "loss": 2.102, "step": 15910 }, { "epoch": 1.9157641395908542, "grad_norm": 4.971395492553711, "learning_rate": 0.0001999167452693241, "loss": 1.8051, "step": 15920 }, { "epoch": 1.916967509025271, "grad_norm": 3.4800236225128174, "learning_rate": 0.00019991658999393358, "loss": 1.9978, "step": 15930 }, { "epoch": 1.9181708784596871, "grad_norm": 5.5732855796813965, "learning_rate": 0.0001999164345739391, "loss": 1.9867, "step": 15940 }, { "epoch": 1.9193742478941034, "grad_norm": 3.989680051803589, "learning_rate": 0.00019991627900934082, "loss": 1.9552, "step": 15950 }, { "epoch": 1.9205776173285198, "grad_norm": 6.894227981567383, "learning_rate": 0.00019991612330013905, "loss": 2.0555, "step": 15960 }, { "epoch": 1.9217809867629363, "grad_norm": 4.548664569854736, "learning_rate": 0.00019991596744633395, "loss": 1.9647, "step": 15970 }, { "epoch": 1.9229843561973525, "grad_norm": 3.2041382789611816, "learning_rate": 0.0001999158114479258, "loss": 1.8206, "step": 15980 }, { "epoch": 1.924187725631769, "grad_norm": 5.597415447235107, "learning_rate": 0.0001999156553049148, "loss": 2.1227, "step": 15990 }, { "epoch": 1.9253910950661854, "grad_norm": 4.656121730804443, "learning_rate": 0.0001999154990173012, "loss": 2.095, "step": 16000 }, { "epoch": 1.9265944645006017, "grad_norm": 6.465431213378906, "learning_rate": 0.00019991534258508516, "loss": 1.9937, "step": 16010 }, { "epoch": 1.927797833935018, "grad_norm": 5.056748867034912, "learning_rate": 0.000199915186008267, "loss": 1.854, "step": 16020 }, { "epoch": 1.9290012033694344, "grad_norm": 3.3747501373291016, "learning_rate": 0.00019991502928684685, "loss": 2.0646, "step": 16030 }, { "epoch": 1.9302045728038508, "grad_norm": 5.906112194061279, "learning_rate": 0.00019991487242082502, "loss": 2.1587, "step": 16040 }, { "epoch": 1.931407942238267, "grad_norm": 3.7816739082336426, "learning_rate": 0.00019991471541020168, "loss": 1.8309, "step": 16050 }, { "epoch": 1.9326113116726835, "grad_norm": 5.560314655303955, "learning_rate": 0.00019991455825497706, "loss": 1.8769, "step": 16060 }, { "epoch": 1.9338146811071, "grad_norm": 5.638073444366455, "learning_rate": 0.00019991440095515147, "loss": 1.8768, "step": 16070 }, { "epoch": 1.9350180505415162, "grad_norm": 3.7518393993377686, "learning_rate": 0.000199914243510725, "loss": 2.0219, "step": 16080 }, { "epoch": 1.9362214199759324, "grad_norm": 5.50349235534668, "learning_rate": 0.000199914085921698, "loss": 1.8097, "step": 16090 }, { "epoch": 1.9374247894103491, "grad_norm": 4.016904354095459, "learning_rate": 0.00019991392818807064, "loss": 1.9738, "step": 16100 }, { "epoch": 1.9386281588447654, "grad_norm": 6.328729629516602, "learning_rate": 0.00019991377030984314, "loss": 1.9604, "step": 16110 }, { "epoch": 1.9398315282791816, "grad_norm": 5.4566168785095215, "learning_rate": 0.00019991361228701576, "loss": 2.1133, "step": 16120 }, { "epoch": 1.941034897713598, "grad_norm": 4.5634284019470215, "learning_rate": 0.00019991345411958872, "loss": 2.1536, "step": 16130 }, { "epoch": 1.9422382671480145, "grad_norm": 5.645349979400635, "learning_rate": 0.00019991329580756225, "loss": 1.9691, "step": 16140 }, { "epoch": 1.9434416365824307, "grad_norm": 4.688941955566406, "learning_rate": 0.00019991313735093655, "loss": 1.926, "step": 16150 }, { "epoch": 1.9446450060168472, "grad_norm": 6.3975043296813965, "learning_rate": 0.0001999129787497119, "loss": 1.9637, "step": 16160 }, { "epoch": 1.9458483754512637, "grad_norm": 4.604057312011719, "learning_rate": 0.0001999128200038885, "loss": 1.8218, "step": 16170 }, { "epoch": 1.94705174488568, "grad_norm": 3.644859552383423, "learning_rate": 0.00019991266111346655, "loss": 1.7935, "step": 16180 }, { "epoch": 1.9482551143200961, "grad_norm": 4.452095031738281, "learning_rate": 0.00019991250207844634, "loss": 1.8601, "step": 16190 }, { "epoch": 1.9494584837545126, "grad_norm": 4.731088161468506, "learning_rate": 0.00019991234289882804, "loss": 2.1167, "step": 16200 }, { "epoch": 1.950661853188929, "grad_norm": 7.040933132171631, "learning_rate": 0.00019991218357461195, "loss": 2.0416, "step": 16210 }, { "epoch": 1.9518652226233453, "grad_norm": 6.245818138122559, "learning_rate": 0.00019991202410579826, "loss": 1.96, "step": 16220 }, { "epoch": 1.9530685920577617, "grad_norm": 3.276776075363159, "learning_rate": 0.0001999118644923872, "loss": 2.1505, "step": 16230 }, { "epoch": 1.9542719614921782, "grad_norm": 6.008669853210449, "learning_rate": 0.00019991170473437897, "loss": 2.0642, "step": 16240 }, { "epoch": 1.9554753309265944, "grad_norm": 3.994748592376709, "learning_rate": 0.00019991154483177387, "loss": 1.776, "step": 16250 }, { "epoch": 1.9566787003610109, "grad_norm": 6.522171497344971, "learning_rate": 0.0001999113847845721, "loss": 2.0933, "step": 16260 }, { "epoch": 1.9578820697954273, "grad_norm": 4.953426837921143, "learning_rate": 0.00019991122459277384, "loss": 1.8951, "step": 16270 }, { "epoch": 1.9590854392298436, "grad_norm": 3.7762913703918457, "learning_rate": 0.00019991106425637942, "loss": 2.1214, "step": 16280 }, { "epoch": 1.9602888086642598, "grad_norm": 4.925233840942383, "learning_rate": 0.000199910903775389, "loss": 1.9764, "step": 16290 }, { "epoch": 1.9614921780986763, "grad_norm": 3.9478254318237305, "learning_rate": 0.00019991074314980287, "loss": 2.0233, "step": 16300 }, { "epoch": 1.9626955475330927, "grad_norm": 6.4193806648254395, "learning_rate": 0.0001999105823796212, "loss": 2.092, "step": 16310 }, { "epoch": 1.963898916967509, "grad_norm": 5.982722759246826, "learning_rate": 0.00019991042146484425, "loss": 1.7777, "step": 16320 }, { "epoch": 1.9651022864019254, "grad_norm": 3.0726537704467773, "learning_rate": 0.00019991026040547225, "loss": 2.0554, "step": 16330 }, { "epoch": 1.9663056558363419, "grad_norm": 4.883552551269531, "learning_rate": 0.00019991009920150546, "loss": 1.9911, "step": 16340 }, { "epoch": 1.967509025270758, "grad_norm": 4.612387180328369, "learning_rate": 0.00019990993785294408, "loss": 2.0314, "step": 16350 }, { "epoch": 1.9687123947051743, "grad_norm": 6.7942328453063965, "learning_rate": 0.00019990977635978835, "loss": 1.7717, "step": 16360 }, { "epoch": 1.969915764139591, "grad_norm": 4.324357986450195, "learning_rate": 0.00019990961472203848, "loss": 1.9512, "step": 16370 }, { "epoch": 1.9711191335740073, "grad_norm": 3.6651668548583984, "learning_rate": 0.00019990945293969477, "loss": 2.0161, "step": 16380 }, { "epoch": 1.9723225030084235, "grad_norm": 5.03709077835083, "learning_rate": 0.0001999092910127574, "loss": 2.1105, "step": 16390 }, { "epoch": 1.97352587244284, "grad_norm": 4.680255889892578, "learning_rate": 0.00019990912894122663, "loss": 1.9587, "step": 16400 }, { "epoch": 1.9747292418772564, "grad_norm": 7.243875026702881, "learning_rate": 0.00019990896672510266, "loss": 2.0094, "step": 16410 }, { "epoch": 1.9759326113116726, "grad_norm": 4.893266677856445, "learning_rate": 0.00019990880436438575, "loss": 1.9973, "step": 16420 }, { "epoch": 1.977135980746089, "grad_norm": 3.392618417739868, "learning_rate": 0.00019990864185907614, "loss": 2.0151, "step": 16430 }, { "epoch": 1.9783393501805056, "grad_norm": 4.7665696144104, "learning_rate": 0.00019990847920917405, "loss": 1.6015, "step": 16440 }, { "epoch": 1.9795427196149218, "grad_norm": 4.276864528656006, "learning_rate": 0.00019990831641467972, "loss": 1.9985, "step": 16450 }, { "epoch": 1.980746089049338, "grad_norm": 8.40011215209961, "learning_rate": 0.00019990815347559342, "loss": 2.0585, "step": 16460 }, { "epoch": 1.9819494584837545, "grad_norm": 5.415799617767334, "learning_rate": 0.00019990799039191535, "loss": 1.9161, "step": 16470 }, { "epoch": 1.983152827918171, "grad_norm": 3.8374929428100586, "learning_rate": 0.00019990782716364575, "loss": 2.0741, "step": 16480 }, { "epoch": 1.9843561973525872, "grad_norm": 5.7505083084106445, "learning_rate": 0.00019990766379078485, "loss": 1.941, "step": 16490 }, { "epoch": 1.9855595667870036, "grad_norm": 4.585755348205566, "learning_rate": 0.00019990750027333286, "loss": 1.572, "step": 16500 }, { "epoch": 1.98676293622142, "grad_norm": 6.762844562530518, "learning_rate": 0.00019990733661129012, "loss": 2.2276, "step": 16510 }, { "epoch": 1.9879663056558363, "grad_norm": 4.439902305603027, "learning_rate": 0.00019990717280465674, "loss": 2.1657, "step": 16520 }, { "epoch": 1.9891696750902526, "grad_norm": 3.378763437271118, "learning_rate": 0.00019990700885343302, "loss": 2.13, "step": 16530 }, { "epoch": 1.9903730445246692, "grad_norm": 5.796677589416504, "learning_rate": 0.00019990684475761924, "loss": 1.8933, "step": 16540 }, { "epoch": 1.9915764139590855, "grad_norm": 4.522659778594971, "learning_rate": 0.00019990668051721554, "loss": 1.9627, "step": 16550 }, { "epoch": 1.9927797833935017, "grad_norm": 6.611879825592041, "learning_rate": 0.00019990651613222224, "loss": 1.9534, "step": 16560 }, { "epoch": 1.9939831528279182, "grad_norm": 4.917909622192383, "learning_rate": 0.00019990635160263952, "loss": 1.864, "step": 16570 }, { "epoch": 1.9951865222623346, "grad_norm": 3.834904193878174, "learning_rate": 0.00019990618692846765, "loss": 2.1645, "step": 16580 }, { "epoch": 1.9963898916967509, "grad_norm": 7.771172046661377, "learning_rate": 0.00019990602210970688, "loss": 2.0834, "step": 16590 }, { "epoch": 1.9975932611311673, "grad_norm": 3.9011645317077637, "learning_rate": 0.00019990585714635742, "loss": 2.2294, "step": 16600 }, { "epoch": 1.9987966305655838, "grad_norm": 8.939842224121094, "learning_rate": 0.00019990569203841952, "loss": 1.7705, "step": 16610 }, { "epoch": 2.0, "grad_norm": 6.323520183563232, "learning_rate": 0.0001999055267858934, "loss": 1.7649, "step": 16620 }, { "epoch": 2.0, "eval_loss": 1.9142165184020996, "eval_runtime": 118.7366, "eval_samples_per_second": 62.213, "eval_steps_per_second": 7.782, "step": 16620 }, { "epoch": 2.0012033694344162, "grad_norm": 5.1999969482421875, "learning_rate": 0.0001999053613887793, "loss": 1.9435, "step": 16630 }, { "epoch": 2.002406738868833, "grad_norm": 3.153641700744629, "learning_rate": 0.00019990519584707755, "loss": 1.6858, "step": 16640 }, { "epoch": 2.003610108303249, "grad_norm": 5.022435665130615, "learning_rate": 0.00019990503016078823, "loss": 1.8025, "step": 16650 }, { "epoch": 2.0048134777376654, "grad_norm": 4.489079475402832, "learning_rate": 0.00019990486432991172, "loss": 1.8494, "step": 16660 }, { "epoch": 2.0060168471720816, "grad_norm": 6.600327968597412, "learning_rate": 0.00019990469835444818, "loss": 1.8307, "step": 16670 }, { "epoch": 2.0072202166064983, "grad_norm": 6.060215950012207, "learning_rate": 0.0001999045322343979, "loss": 1.8325, "step": 16680 }, { "epoch": 2.0084235860409145, "grad_norm": 3.5549871921539307, "learning_rate": 0.00019990436596976108, "loss": 1.8593, "step": 16690 }, { "epoch": 2.0096269554753308, "grad_norm": 5.249209880828857, "learning_rate": 0.00019990419956053798, "loss": 1.8023, "step": 16700 }, { "epoch": 2.0108303249097474, "grad_norm": 3.7713396549224854, "learning_rate": 0.00019990403300672885, "loss": 1.6932, "step": 16710 }, { "epoch": 2.0120336943441637, "grad_norm": 6.833237171173096, "learning_rate": 0.0001999038663083339, "loss": 1.8434, "step": 16720 }, { "epoch": 2.01323706377858, "grad_norm": 4.462503433227539, "learning_rate": 0.0001999036994653534, "loss": 1.8469, "step": 16730 }, { "epoch": 2.0144404332129966, "grad_norm": 4.416675567626953, "learning_rate": 0.00019990353247778756, "loss": 1.8928, "step": 16740 }, { "epoch": 2.015643802647413, "grad_norm": 5.76512336730957, "learning_rate": 0.00019990336534563666, "loss": 1.7162, "step": 16750 }, { "epoch": 2.016847172081829, "grad_norm": 4.121241569519043, "learning_rate": 0.0001999031980689009, "loss": 2.0468, "step": 16760 }, { "epoch": 2.0180505415162453, "grad_norm": 6.77939510345459, "learning_rate": 0.0001999030306475806, "loss": 2.0418, "step": 16770 }, { "epoch": 2.019253910950662, "grad_norm": 5.205681800842285, "learning_rate": 0.00019990286308167594, "loss": 1.8573, "step": 16780 }, { "epoch": 2.020457280385078, "grad_norm": 3.1619973182678223, "learning_rate": 0.00019990269537118716, "loss": 1.9974, "step": 16790 }, { "epoch": 2.0216606498194944, "grad_norm": 5.370748043060303, "learning_rate": 0.0001999025275161145, "loss": 1.9107, "step": 16800 }, { "epoch": 2.022864019253911, "grad_norm": 4.123819351196289, "learning_rate": 0.00019990235951645823, "loss": 2.043, "step": 16810 }, { "epoch": 2.0240673886883274, "grad_norm": 6.360437393188477, "learning_rate": 0.00019990219137221857, "loss": 1.9069, "step": 16820 }, { "epoch": 2.0252707581227436, "grad_norm": 5.279680252075195, "learning_rate": 0.00019990202308339577, "loss": 1.6415, "step": 16830 }, { "epoch": 2.02647412755716, "grad_norm": 3.7279410362243652, "learning_rate": 0.0001999018546499901, "loss": 1.855, "step": 16840 }, { "epoch": 2.0276774969915765, "grad_norm": 5.45247745513916, "learning_rate": 0.00019990168607200178, "loss": 2.0367, "step": 16850 }, { "epoch": 2.0288808664259927, "grad_norm": 4.370023727416992, "learning_rate": 0.00019990151734943103, "loss": 1.8561, "step": 16860 }, { "epoch": 2.030084235860409, "grad_norm": 7.897660255432129, "learning_rate": 0.00019990134848227813, "loss": 1.8043, "step": 16870 }, { "epoch": 2.0312876052948257, "grad_norm": 4.6693010330200195, "learning_rate": 0.00019990117947054335, "loss": 1.7646, "step": 16880 }, { "epoch": 2.032490974729242, "grad_norm": 3.690429210662842, "learning_rate": 0.00019990101031422684, "loss": 1.9676, "step": 16890 }, { "epoch": 2.033694344163658, "grad_norm": 6.373749256134033, "learning_rate": 0.00019990084101332895, "loss": 1.8346, "step": 16900 }, { "epoch": 2.034897713598075, "grad_norm": 4.280575752258301, "learning_rate": 0.00019990067156784984, "loss": 1.7286, "step": 16910 }, { "epoch": 2.036101083032491, "grad_norm": 6.801653861999512, "learning_rate": 0.00019990050197778983, "loss": 2.0454, "step": 16920 }, { "epoch": 2.0373044524669073, "grad_norm": 5.937229633331299, "learning_rate": 0.0001999003322431491, "loss": 1.6404, "step": 16930 }, { "epoch": 2.0385078219013235, "grad_norm": 4.118158340454102, "learning_rate": 0.00019990016236392792, "loss": 1.7363, "step": 16940 }, { "epoch": 2.03971119133574, "grad_norm": 6.1648850440979, "learning_rate": 0.00019989999234012657, "loss": 1.8067, "step": 16950 }, { "epoch": 2.0409145607701564, "grad_norm": 4.802121639251709, "learning_rate": 0.00019989982217174525, "loss": 1.926, "step": 16960 }, { "epoch": 2.0421179302045727, "grad_norm": 6.523443698883057, "learning_rate": 0.00019989965185878423, "loss": 1.6961, "step": 16970 }, { "epoch": 2.0433212996389893, "grad_norm": 5.312000274658203, "learning_rate": 0.00019989948140124376, "loss": 1.8128, "step": 16980 }, { "epoch": 2.0445246690734056, "grad_norm": 5.419356822967529, "learning_rate": 0.00019989931079912406, "loss": 1.841, "step": 16990 }, { "epoch": 2.045728038507822, "grad_norm": 4.926950931549072, "learning_rate": 0.00019989914005242538, "loss": 1.7705, "step": 17000 }, { "epoch": 2.046931407942238, "grad_norm": 4.7852654457092285, "learning_rate": 0.000199898969161148, "loss": 1.6895, "step": 17010 }, { "epoch": 2.0481347773766547, "grad_norm": 6.510125160217285, "learning_rate": 0.00019989879812529212, "loss": 1.8186, "step": 17020 }, { "epoch": 2.049338146811071, "grad_norm": 5.870050430297852, "learning_rate": 0.00019989862694485806, "loss": 1.6761, "step": 17030 }, { "epoch": 2.050541516245487, "grad_norm": 4.159613609313965, "learning_rate": 0.000199898455619846, "loss": 1.797, "step": 17040 }, { "epoch": 2.051744885679904, "grad_norm": 5.965736389160156, "learning_rate": 0.00019989828415025622, "loss": 1.8211, "step": 17050 }, { "epoch": 2.05294825511432, "grad_norm": 5.167622089385986, "learning_rate": 0.00019989811253608896, "loss": 1.7205, "step": 17060 }, { "epoch": 2.0541516245487363, "grad_norm": 5.409761905670166, "learning_rate": 0.00019989794077734441, "loss": 1.7689, "step": 17070 }, { "epoch": 2.055354993983153, "grad_norm": 5.324963092803955, "learning_rate": 0.00019989776887402295, "loss": 1.7323, "step": 17080 }, { "epoch": 2.0565583634175693, "grad_norm": 4.008548736572266, "learning_rate": 0.00019989759682612472, "loss": 1.6856, "step": 17090 }, { "epoch": 2.0577617328519855, "grad_norm": 6.131975173950195, "learning_rate": 0.00019989742463365003, "loss": 1.8537, "step": 17100 }, { "epoch": 2.0589651022864017, "grad_norm": 3.6276772022247314, "learning_rate": 0.00019989725229659905, "loss": 1.5171, "step": 17110 }, { "epoch": 2.0601684717208184, "grad_norm": 7.486250400543213, "learning_rate": 0.0001998970798149721, "loss": 1.7464, "step": 17120 }, { "epoch": 2.0613718411552346, "grad_norm": 5.081457138061523, "learning_rate": 0.00019989690718876944, "loss": 1.8673, "step": 17130 }, { "epoch": 2.062575210589651, "grad_norm": 4.898556232452393, "learning_rate": 0.0001998967344179913, "loss": 1.8692, "step": 17140 }, { "epoch": 2.0637785800240676, "grad_norm": 7.015048980712891, "learning_rate": 0.0001998965615026379, "loss": 1.9418, "step": 17150 }, { "epoch": 2.064981949458484, "grad_norm": 4.729976177215576, "learning_rate": 0.0001998963884427095, "loss": 2.0248, "step": 17160 }, { "epoch": 2.0661853188929, "grad_norm": 7.050203800201416, "learning_rate": 0.00019989621523820635, "loss": 1.7396, "step": 17170 }, { "epoch": 2.0673886883273163, "grad_norm": 5.0453948974609375, "learning_rate": 0.00019989604188912875, "loss": 1.8594, "step": 17180 }, { "epoch": 2.068592057761733, "grad_norm": 3.3906519412994385, "learning_rate": 0.0001998958683954769, "loss": 1.9991, "step": 17190 }, { "epoch": 2.069795427196149, "grad_norm": 5.707901477813721, "learning_rate": 0.00019989569475725106, "loss": 1.8237, "step": 17200 }, { "epoch": 2.0709987966305654, "grad_norm": 4.8201470375061035, "learning_rate": 0.0001998955209744515, "loss": 1.6789, "step": 17210 }, { "epoch": 2.072202166064982, "grad_norm": 7.725179672241211, "learning_rate": 0.00019989534704707845, "loss": 1.7809, "step": 17220 }, { "epoch": 2.0734055354993983, "grad_norm": 5.349000930786133, "learning_rate": 0.00019989517297513217, "loss": 1.8191, "step": 17230 }, { "epoch": 2.0746089049338146, "grad_norm": 3.1116085052490234, "learning_rate": 0.00019989499875861292, "loss": 1.7864, "step": 17240 }, { "epoch": 2.0758122743682312, "grad_norm": 5.141925811767578, "learning_rate": 0.00019989482439752092, "loss": 1.6877, "step": 17250 }, { "epoch": 2.0770156438026475, "grad_norm": 3.531400680541992, "learning_rate": 0.00019989464989185647, "loss": 1.7856, "step": 17260 }, { "epoch": 2.0782190132370637, "grad_norm": 6.522139072418213, "learning_rate": 0.00019989447524161976, "loss": 1.8581, "step": 17270 }, { "epoch": 2.07942238267148, "grad_norm": 5.104706764221191, "learning_rate": 0.00019989430044681114, "loss": 1.766, "step": 17280 }, { "epoch": 2.0806257521058966, "grad_norm": 3.9711599349975586, "learning_rate": 0.00019989412550743075, "loss": 1.8496, "step": 17290 }, { "epoch": 2.081829121540313, "grad_norm": 5.783777236938477, "learning_rate": 0.00019989395042347894, "loss": 1.7734, "step": 17300 }, { "epoch": 2.083032490974729, "grad_norm": 4.533494472503662, "learning_rate": 0.0001998937751949559, "loss": 1.828, "step": 17310 }, { "epoch": 2.0842358604091458, "grad_norm": 7.173120975494385, "learning_rate": 0.0001998935998218619, "loss": 2.0683, "step": 17320 }, { "epoch": 2.085439229843562, "grad_norm": 5.991465091705322, "learning_rate": 0.00019989342430419722, "loss": 1.5061, "step": 17330 }, { "epoch": 2.0866425992779782, "grad_norm": 3.940812110900879, "learning_rate": 0.00019989324864196206, "loss": 1.9932, "step": 17340 }, { "epoch": 2.087845968712395, "grad_norm": 5.346851348876953, "learning_rate": 0.00019989307283515672, "loss": 1.7388, "step": 17350 }, { "epoch": 2.089049338146811, "grad_norm": 5.48711633682251, "learning_rate": 0.00019989289688378143, "loss": 1.7923, "step": 17360 }, { "epoch": 2.0902527075812274, "grad_norm": 6.613363265991211, "learning_rate": 0.0001998927207878365, "loss": 1.8068, "step": 17370 }, { "epoch": 2.0914560770156436, "grad_norm": 4.155871868133545, "learning_rate": 0.0001998925445473221, "loss": 1.7835, "step": 17380 }, { "epoch": 2.0926594464500603, "grad_norm": 3.4781439304351807, "learning_rate": 0.00019989236816223853, "loss": 1.8167, "step": 17390 }, { "epoch": 2.0938628158844765, "grad_norm": 6.0336432456970215, "learning_rate": 0.00019989219163258604, "loss": 1.7585, "step": 17400 }, { "epoch": 2.0950661853188928, "grad_norm": 4.189842224121094, "learning_rate": 0.00019989201495836489, "loss": 1.676, "step": 17410 }, { "epoch": 2.0962695547533094, "grad_norm": 7.258090019226074, "learning_rate": 0.00019989183813957532, "loss": 1.8626, "step": 17420 }, { "epoch": 2.0974729241877257, "grad_norm": 5.412322044372559, "learning_rate": 0.00019989166117621762, "loss": 1.615, "step": 17430 }, { "epoch": 2.098676293622142, "grad_norm": 4.196578502655029, "learning_rate": 0.00019989148406829202, "loss": 1.7245, "step": 17440 }, { "epoch": 2.099879663056558, "grad_norm": 6.083682537078857, "learning_rate": 0.00019989130681579876, "loss": 1.872, "step": 17450 }, { "epoch": 2.101083032490975, "grad_norm": 4.3092732429504395, "learning_rate": 0.00019989112941873812, "loss": 1.863, "step": 17460 }, { "epoch": 2.102286401925391, "grad_norm": 6.418511867523193, "learning_rate": 0.00019989095187711038, "loss": 1.8506, "step": 17470 }, { "epoch": 2.1034897713598073, "grad_norm": 4.709066867828369, "learning_rate": 0.0001998907741909158, "loss": 1.8186, "step": 17480 }, { "epoch": 2.104693140794224, "grad_norm": 4.116019248962402, "learning_rate": 0.00019989059636015452, "loss": 1.8143, "step": 17490 }, { "epoch": 2.10589651022864, "grad_norm": 7.0124430656433105, "learning_rate": 0.0001998904183848269, "loss": 1.9522, "step": 17500 }, { "epoch": 2.1070998796630565, "grad_norm": 4.389957427978516, "learning_rate": 0.00019989024026493323, "loss": 1.9003, "step": 17510 }, { "epoch": 2.108303249097473, "grad_norm": 7.033724308013916, "learning_rate": 0.00019989006200047372, "loss": 1.6658, "step": 17520 }, { "epoch": 2.1095066185318894, "grad_norm": 4.837581157684326, "learning_rate": 0.0001998898835914486, "loss": 1.7855, "step": 17530 }, { "epoch": 2.1107099879663056, "grad_norm": 3.7902698516845703, "learning_rate": 0.00019988970503785817, "loss": 1.7216, "step": 17540 }, { "epoch": 2.111913357400722, "grad_norm": 7.119017124176025, "learning_rate": 0.00019988952633970267, "loss": 1.744, "step": 17550 }, { "epoch": 2.1131167268351385, "grad_norm": 4.171570777893066, "learning_rate": 0.00019988934749698233, "loss": 1.7703, "step": 17560 }, { "epoch": 2.1143200962695547, "grad_norm": 5.869996070861816, "learning_rate": 0.0001998891685096975, "loss": 1.7115, "step": 17570 }, { "epoch": 2.115523465703971, "grad_norm": 5.453281402587891, "learning_rate": 0.00019988898937784835, "loss": 1.5491, "step": 17580 }, { "epoch": 2.1167268351383877, "grad_norm": 3.6909027099609375, "learning_rate": 0.00019988881010143516, "loss": 1.9484, "step": 17590 }, { "epoch": 2.117930204572804, "grad_norm": 7.22687292098999, "learning_rate": 0.00019988863068045825, "loss": 1.7824, "step": 17600 }, { "epoch": 2.11913357400722, "grad_norm": 4.892089366912842, "learning_rate": 0.00019988845111491778, "loss": 1.6621, "step": 17610 }, { "epoch": 2.1203369434416364, "grad_norm": 6.53886079788208, "learning_rate": 0.00019988827140481408, "loss": 1.8786, "step": 17620 }, { "epoch": 2.121540312876053, "grad_norm": 5.709807395935059, "learning_rate": 0.00019988809155014738, "loss": 1.6452, "step": 17630 }, { "epoch": 2.1227436823104693, "grad_norm": 3.1700313091278076, "learning_rate": 0.00019988791155091797, "loss": 1.718, "step": 17640 }, { "epoch": 2.1239470517448855, "grad_norm": 5.557100296020508, "learning_rate": 0.00019988773140712607, "loss": 1.5421, "step": 17650 }, { "epoch": 2.125150421179302, "grad_norm": 4.083098888397217, "learning_rate": 0.00019988755111877198, "loss": 1.5452, "step": 17660 }, { "epoch": 2.1263537906137184, "grad_norm": 5.638491630554199, "learning_rate": 0.0001998873706858559, "loss": 1.7364, "step": 17670 }, { "epoch": 2.1275571600481347, "grad_norm": 5.190164089202881, "learning_rate": 0.00019988719010837817, "loss": 1.9735, "step": 17680 }, { "epoch": 2.1287605294825513, "grad_norm": 3.7146685123443604, "learning_rate": 0.00019988700938633903, "loss": 2.1372, "step": 17690 }, { "epoch": 2.1299638989169676, "grad_norm": 6.454514980316162, "learning_rate": 0.00019988682851973871, "loss": 2.0919, "step": 17700 }, { "epoch": 2.131167268351384, "grad_norm": 4.221381187438965, "learning_rate": 0.00019988664750857747, "loss": 1.7471, "step": 17710 }, { "epoch": 2.1323706377858, "grad_norm": 5.4702887535095215, "learning_rate": 0.0001998864663528556, "loss": 2.0442, "step": 17720 }, { "epoch": 2.1335740072202167, "grad_norm": 4.777248382568359, "learning_rate": 0.00019988628505257335, "loss": 2.0535, "step": 17730 }, { "epoch": 2.134777376654633, "grad_norm": 3.488917827606201, "learning_rate": 0.000199886103607731, "loss": 1.7097, "step": 17740 }, { "epoch": 2.135980746089049, "grad_norm": 5.957403182983398, "learning_rate": 0.00019988592201832875, "loss": 1.6971, "step": 17750 }, { "epoch": 2.137184115523466, "grad_norm": 4.465520858764648, "learning_rate": 0.00019988574028436697, "loss": 1.9346, "step": 17760 }, { "epoch": 2.138387484957882, "grad_norm": 6.462950706481934, "learning_rate": 0.00019988555840584583, "loss": 1.7709, "step": 17770 }, { "epoch": 2.1395908543922983, "grad_norm": 4.549928665161133, "learning_rate": 0.00019988537638276563, "loss": 1.9055, "step": 17780 }, { "epoch": 2.140794223826715, "grad_norm": 3.398759365081787, "learning_rate": 0.00019988519421512663, "loss": 1.6768, "step": 17790 }, { "epoch": 2.1419975932611313, "grad_norm": 4.999843597412109, "learning_rate": 0.0001998850119029291, "loss": 1.997, "step": 17800 }, { "epoch": 2.1432009626955475, "grad_norm": 4.756877899169922, "learning_rate": 0.0001998848294461733, "loss": 1.69, "step": 17810 }, { "epoch": 2.1444043321299637, "grad_norm": 5.443968296051025, "learning_rate": 0.00019988464684485947, "loss": 1.926, "step": 17820 }, { "epoch": 2.1456077015643804, "grad_norm": 5.2449421882629395, "learning_rate": 0.00019988446409898792, "loss": 1.8667, "step": 17830 }, { "epoch": 2.1468110709987966, "grad_norm": 4.510014533996582, "learning_rate": 0.00019988428120855888, "loss": 1.7223, "step": 17840 }, { "epoch": 2.148014440433213, "grad_norm": 6.149260997772217, "learning_rate": 0.0001998840981735726, "loss": 1.9277, "step": 17850 }, { "epoch": 2.1492178098676296, "grad_norm": 4.433861255645752, "learning_rate": 0.00019988391499402942, "loss": 1.765, "step": 17860 }, { "epoch": 2.150421179302046, "grad_norm": 6.538389682769775, "learning_rate": 0.00019988373166992954, "loss": 1.9675, "step": 17870 }, { "epoch": 2.151624548736462, "grad_norm": 3.9230477809906006, "learning_rate": 0.00019988354820127323, "loss": 1.6862, "step": 17880 }, { "epoch": 2.1528279181708783, "grad_norm": 3.716688632965088, "learning_rate": 0.00019988336458806073, "loss": 1.6858, "step": 17890 }, { "epoch": 2.154031287605295, "grad_norm": 5.82971715927124, "learning_rate": 0.00019988318083029237, "loss": 1.7986, "step": 17900 }, { "epoch": 2.155234657039711, "grad_norm": 4.714937686920166, "learning_rate": 0.0001998829969279684, "loss": 1.6655, "step": 17910 }, { "epoch": 2.1564380264741274, "grad_norm": 7.878839015960693, "learning_rate": 0.00019988281288108904, "loss": 1.6974, "step": 17920 }, { "epoch": 2.157641395908544, "grad_norm": 5.158947944641113, "learning_rate": 0.00019988262868965464, "loss": 1.965, "step": 17930 }, { "epoch": 2.1588447653429603, "grad_norm": 3.980877161026001, "learning_rate": 0.00019988244435366538, "loss": 1.9019, "step": 17940 }, { "epoch": 2.1600481347773766, "grad_norm": 5.513377666473389, "learning_rate": 0.00019988225987312158, "loss": 1.8467, "step": 17950 }, { "epoch": 2.1612515042117932, "grad_norm": 4.000430107116699, "learning_rate": 0.0001998820752480235, "loss": 1.9303, "step": 17960 }, { "epoch": 2.1624548736462095, "grad_norm": 7.605454444885254, "learning_rate": 0.00019988189047837136, "loss": 1.9393, "step": 17970 }, { "epoch": 2.1636582430806257, "grad_norm": 5.936460971832275, "learning_rate": 0.00019988170556416548, "loss": 1.7185, "step": 17980 }, { "epoch": 2.164861612515042, "grad_norm": 3.905332565307617, "learning_rate": 0.00019988152050540612, "loss": 1.9451, "step": 17990 }, { "epoch": 2.1660649819494586, "grad_norm": 5.122852325439453, "learning_rate": 0.00019988133530209355, "loss": 1.6526, "step": 18000 }, { "epoch": 2.167268351383875, "grad_norm": 5.5804667472839355, "learning_rate": 0.000199881149954228, "loss": 1.8796, "step": 18010 }, { "epoch": 2.168471720818291, "grad_norm": 7.340632438659668, "learning_rate": 0.00019988096446180978, "loss": 1.6315, "step": 18020 }, { "epoch": 2.1696750902527078, "grad_norm": 5.029829502105713, "learning_rate": 0.00019988077882483914, "loss": 1.535, "step": 18030 }, { "epoch": 2.170878459687124, "grad_norm": 3.562346935272217, "learning_rate": 0.00019988059304331635, "loss": 1.8802, "step": 18040 }, { "epoch": 2.1720818291215402, "grad_norm": 5.396111965179443, "learning_rate": 0.00019988040711724168, "loss": 1.8595, "step": 18050 }, { "epoch": 2.1732851985559565, "grad_norm": 3.975900173187256, "learning_rate": 0.00019988022104661544, "loss": 1.8975, "step": 18060 }, { "epoch": 2.174488567990373, "grad_norm": 6.670739650726318, "learning_rate": 0.0001998800348314378, "loss": 1.9552, "step": 18070 }, { "epoch": 2.1756919374247894, "grad_norm": 5.525782108306885, "learning_rate": 0.00019987984847170913, "loss": 1.7285, "step": 18080 }, { "epoch": 2.1768953068592056, "grad_norm": 3.629214286804199, "learning_rate": 0.00019987966196742963, "loss": 1.8154, "step": 18090 }, { "epoch": 2.1780986762936223, "grad_norm": 5.791319847106934, "learning_rate": 0.00019987947531859964, "loss": 1.8833, "step": 18100 }, { "epoch": 2.1793020457280385, "grad_norm": 5.108678817749023, "learning_rate": 0.00019987928852521937, "loss": 1.9112, "step": 18110 }, { "epoch": 2.1805054151624548, "grad_norm": 7.058563709259033, "learning_rate": 0.00019987910158728912, "loss": 1.8469, "step": 18120 }, { "epoch": 2.1817087845968715, "grad_norm": 5.198415756225586, "learning_rate": 0.00019987891450480913, "loss": 1.622, "step": 18130 }, { "epoch": 2.1829121540312877, "grad_norm": 3.8137941360473633, "learning_rate": 0.0001998787272777797, "loss": 2.0485, "step": 18140 }, { "epoch": 2.184115523465704, "grad_norm": 6.22781229019165, "learning_rate": 0.00019987853990620109, "loss": 1.7697, "step": 18150 }, { "epoch": 2.18531889290012, "grad_norm": 4.489179611206055, "learning_rate": 0.00019987835239007357, "loss": 1.9367, "step": 18160 }, { "epoch": 2.186522262334537, "grad_norm": 7.405348300933838, "learning_rate": 0.00019987816472939745, "loss": 1.9643, "step": 18170 }, { "epoch": 2.187725631768953, "grad_norm": 5.456291198730469, "learning_rate": 0.00019987797692417293, "loss": 1.6383, "step": 18180 }, { "epoch": 2.1889290012033693, "grad_norm": 4.349714756011963, "learning_rate": 0.00019987778897440032, "loss": 1.8251, "step": 18190 }, { "epoch": 2.190132370637786, "grad_norm": 4.806549072265625, "learning_rate": 0.00019987760088007993, "loss": 1.6985, "step": 18200 }, { "epoch": 2.191335740072202, "grad_norm": 4.846847057342529, "learning_rate": 0.00019987741264121194, "loss": 1.663, "step": 18210 }, { "epoch": 2.1925391095066185, "grad_norm": 6.967729568481445, "learning_rate": 0.00019987722425779668, "loss": 2.0329, "step": 18220 }, { "epoch": 2.1937424789410347, "grad_norm": 4.565564155578613, "learning_rate": 0.00019987703572983445, "loss": 1.6841, "step": 18230 }, { "epoch": 2.1949458483754514, "grad_norm": 4.039554119110107, "learning_rate": 0.00019987684705732547, "loss": 1.8231, "step": 18240 }, { "epoch": 2.1961492178098676, "grad_norm": 5.261109828948975, "learning_rate": 0.00019987665824027005, "loss": 1.7919, "step": 18250 }, { "epoch": 2.197352587244284, "grad_norm": 4.446812152862549, "learning_rate": 0.00019987646927866842, "loss": 1.9482, "step": 18260 }, { "epoch": 2.1985559566787005, "grad_norm": 6.604482173919678, "learning_rate": 0.0001998762801725209, "loss": 2.0712, "step": 18270 }, { "epoch": 2.1997593261131168, "grad_norm": 5.471787452697754, "learning_rate": 0.00019987609092182772, "loss": 1.7306, "step": 18280 }, { "epoch": 2.200962695547533, "grad_norm": 4.047811508178711, "learning_rate": 0.00019987590152658921, "loss": 1.7998, "step": 18290 }, { "epoch": 2.2021660649819497, "grad_norm": 4.471895217895508, "learning_rate": 0.00019987571198680558, "loss": 1.5925, "step": 18300 }, { "epoch": 2.203369434416366, "grad_norm": 3.748727798461914, "learning_rate": 0.00019987552230247717, "loss": 1.7194, "step": 18310 }, { "epoch": 2.204572803850782, "grad_norm": 7.51377534866333, "learning_rate": 0.0001998753324736042, "loss": 2.0262, "step": 18320 }, { "epoch": 2.2057761732851984, "grad_norm": 5.626355171203613, "learning_rate": 0.00019987514250018696, "loss": 1.9133, "step": 18330 }, { "epoch": 2.206979542719615, "grad_norm": 3.5132031440734863, "learning_rate": 0.00019987495238222577, "loss": 2.0141, "step": 18340 }, { "epoch": 2.2081829121540313, "grad_norm": 5.36952018737793, "learning_rate": 0.00019987476211972082, "loss": 1.6841, "step": 18350 }, { "epoch": 2.2093862815884475, "grad_norm": 4.150166034698486, "learning_rate": 0.00019987457171267246, "loss": 1.7867, "step": 18360 }, { "epoch": 2.210589651022864, "grad_norm": 6.524456024169922, "learning_rate": 0.0001998743811610809, "loss": 2.0483, "step": 18370 }, { "epoch": 2.2117930204572804, "grad_norm": 5.273902416229248, "learning_rate": 0.0001998741904649465, "loss": 1.8468, "step": 18380 }, { "epoch": 2.2129963898916967, "grad_norm": 3.610470771789551, "learning_rate": 0.00019987399962426947, "loss": 1.8327, "step": 18390 }, { "epoch": 2.214199759326113, "grad_norm": 6.863369464874268, "learning_rate": 0.0001998738086390501, "loss": 1.7468, "step": 18400 }, { "epoch": 2.2154031287605296, "grad_norm": 3.936666250228882, "learning_rate": 0.00019987361750928865, "loss": 1.7581, "step": 18410 }, { "epoch": 2.216606498194946, "grad_norm": 7.433270454406738, "learning_rate": 0.00019987342623498545, "loss": 1.9996, "step": 18420 }, { "epoch": 2.217809867629362, "grad_norm": 5.657661437988281, "learning_rate": 0.00019987323481614072, "loss": 1.7346, "step": 18430 }, { "epoch": 2.2190132370637787, "grad_norm": 3.727800130844116, "learning_rate": 0.00019987304325275477, "loss": 1.7012, "step": 18440 }, { "epoch": 2.220216606498195, "grad_norm": 6.024631977081299, "learning_rate": 0.00019987285154482787, "loss": 1.7962, "step": 18450 }, { "epoch": 2.221419975932611, "grad_norm": 7.292879104614258, "learning_rate": 0.0001998726596923603, "loss": 2.1374, "step": 18460 }, { "epoch": 2.222623345367028, "grad_norm": 6.066665172576904, "learning_rate": 0.00019987246769535234, "loss": 2.0177, "step": 18470 }, { "epoch": 2.223826714801444, "grad_norm": 4.70356559753418, "learning_rate": 0.00019987227555380425, "loss": 1.9923, "step": 18480 }, { "epoch": 2.2250300842358604, "grad_norm": 3.302060604095459, "learning_rate": 0.00019987208326771634, "loss": 1.9954, "step": 18490 }, { "epoch": 2.2262334536702766, "grad_norm": 5.334634304046631, "learning_rate": 0.00019987189083708884, "loss": 1.8809, "step": 18500 }, { "epoch": 2.2274368231046933, "grad_norm": 4.367860794067383, "learning_rate": 0.00019987169826192206, "loss": 1.9273, "step": 18510 }, { "epoch": 2.2286401925391095, "grad_norm": 5.571081161499023, "learning_rate": 0.00019987150554221628, "loss": 1.6665, "step": 18520 }, { "epoch": 2.2298435619735257, "grad_norm": 4.982118606567383, "learning_rate": 0.0001998713126779718, "loss": 1.7887, "step": 18530 }, { "epoch": 2.2310469314079424, "grad_norm": 4.198168754577637, "learning_rate": 0.00019987111966918882, "loss": 1.7969, "step": 18540 }, { "epoch": 2.2322503008423586, "grad_norm": 5.355137825012207, "learning_rate": 0.0001998709265158677, "loss": 1.7528, "step": 18550 }, { "epoch": 2.233453670276775, "grad_norm": 4.837794303894043, "learning_rate": 0.0001998707332180087, "loss": 1.7877, "step": 18560 }, { "epoch": 2.234657039711191, "grad_norm": 7.143967628479004, "learning_rate": 0.0001998705397756121, "loss": 1.9044, "step": 18570 }, { "epoch": 2.235860409145608, "grad_norm": 5.132889747619629, "learning_rate": 0.00019987034618867812, "loss": 1.6545, "step": 18580 }, { "epoch": 2.237063778580024, "grad_norm": 3.638214588165283, "learning_rate": 0.00019987015245720715, "loss": 1.9747, "step": 18590 }, { "epoch": 2.2382671480144403, "grad_norm": 5.9062604904174805, "learning_rate": 0.0001998699585811994, "loss": 2.0217, "step": 18600 }, { "epoch": 2.239470517448857, "grad_norm": 4.019762992858887, "learning_rate": 0.00019986976456065514, "loss": 1.9784, "step": 18610 }, { "epoch": 2.240673886883273, "grad_norm": 7.049264907836914, "learning_rate": 0.0001998695703955747, "loss": 1.855, "step": 18620 }, { "epoch": 2.2418772563176894, "grad_norm": 4.697753429412842, "learning_rate": 0.00019986937608595834, "loss": 1.8122, "step": 18630 }, { "epoch": 2.243080625752106, "grad_norm": 3.323150157928467, "learning_rate": 0.0001998691816318063, "loss": 1.8771, "step": 18640 }, { "epoch": 2.2442839951865223, "grad_norm": 4.828363418579102, "learning_rate": 0.0001998689870331189, "loss": 1.7434, "step": 18650 }, { "epoch": 2.2454873646209386, "grad_norm": 4.217282772064209, "learning_rate": 0.00019986879228989644, "loss": 1.8604, "step": 18660 }, { "epoch": 2.246690734055355, "grad_norm": 7.136775493621826, "learning_rate": 0.00019986859740213917, "loss": 2.0133, "step": 18670 }, { "epoch": 2.2478941034897715, "grad_norm": 5.266910552978516, "learning_rate": 0.0001998684023698474, "loss": 1.6699, "step": 18680 }, { "epoch": 2.2490974729241877, "grad_norm": 3.553563117980957, "learning_rate": 0.00019986820719302138, "loss": 2.0129, "step": 18690 }, { "epoch": 2.250300842358604, "grad_norm": 5.233100414276123, "learning_rate": 0.0001998680118716614, "loss": 1.8662, "step": 18700 }, { "epoch": 2.2515042117930206, "grad_norm": 4.113808631896973, "learning_rate": 0.00019986781640576778, "loss": 1.7728, "step": 18710 }, { "epoch": 2.252707581227437, "grad_norm": 7.668570518493652, "learning_rate": 0.00019986762079534073, "loss": 2.1581, "step": 18720 }, { "epoch": 2.253910950661853, "grad_norm": 4.77482795715332, "learning_rate": 0.0001998674250403806, "loss": 1.8397, "step": 18730 }, { "epoch": 2.2551143200962693, "grad_norm": 3.207192897796631, "learning_rate": 0.00019986722914088766, "loss": 2.2075, "step": 18740 }, { "epoch": 2.256317689530686, "grad_norm": 5.926999568939209, "learning_rate": 0.00019986703309686217, "loss": 1.8827, "step": 18750 }, { "epoch": 2.2575210589651022, "grad_norm": 4.553532600402832, "learning_rate": 0.00019986683690830442, "loss": 1.8602, "step": 18760 }, { "epoch": 2.2587244283995185, "grad_norm": 7.679495811462402, "learning_rate": 0.0001998666405752147, "loss": 1.7327, "step": 18770 }, { "epoch": 2.259927797833935, "grad_norm": 5.038778781890869, "learning_rate": 0.00019986644409759332, "loss": 1.7501, "step": 18780 }, { "epoch": 2.2611311672683514, "grad_norm": 2.6412038803100586, "learning_rate": 0.00019986624747544052, "loss": 1.9424, "step": 18790 }, { "epoch": 2.2623345367027676, "grad_norm": 6.090629577636719, "learning_rate": 0.00019986605070875662, "loss": 2.0169, "step": 18800 }, { "epoch": 2.2635379061371843, "grad_norm": 4.529531478881836, "learning_rate": 0.0001998658537975419, "loss": 1.7735, "step": 18810 }, { "epoch": 2.2647412755716005, "grad_norm": 7.689573764801025, "learning_rate": 0.00019986565674179655, "loss": 1.8555, "step": 18820 }, { "epoch": 2.2659446450060168, "grad_norm": 5.21312141418457, "learning_rate": 0.00019986545954152102, "loss": 1.8347, "step": 18830 }, { "epoch": 2.2671480144404335, "grad_norm": 3.2846319675445557, "learning_rate": 0.0001998652621967155, "loss": 1.7728, "step": 18840 }, { "epoch": 2.2683513838748497, "grad_norm": 5.718740940093994, "learning_rate": 0.00019986506470738027, "loss": 1.7359, "step": 18850 }, { "epoch": 2.269554753309266, "grad_norm": 4.876806259155273, "learning_rate": 0.00019986486707351567, "loss": 1.6682, "step": 18860 }, { "epoch": 2.270758122743682, "grad_norm": 7.878754138946533, "learning_rate": 0.0001998646692951219, "loss": 1.7444, "step": 18870 }, { "epoch": 2.271961492178099, "grad_norm": 5.099309921264648, "learning_rate": 0.0001998644713721993, "loss": 1.9449, "step": 18880 }, { "epoch": 2.273164861612515, "grad_norm": 3.5428872108459473, "learning_rate": 0.00019986427330474823, "loss": 1.7586, "step": 18890 }, { "epoch": 2.2743682310469313, "grad_norm": 6.376409530639648, "learning_rate": 0.00019986407509276885, "loss": 1.848, "step": 18900 }, { "epoch": 2.2755716004813475, "grad_norm": 5.111598491668701, "learning_rate": 0.0001998638767362615, "loss": 1.952, "step": 18910 }, { "epoch": 2.2767749699157642, "grad_norm": 6.774966239929199, "learning_rate": 0.00019986367823522644, "loss": 1.6374, "step": 18920 }, { "epoch": 2.2779783393501805, "grad_norm": 5.007774829864502, "learning_rate": 0.00019986347958966404, "loss": 2.0364, "step": 18930 }, { "epoch": 2.2791817087845967, "grad_norm": 3.2465569972991943, "learning_rate": 0.00019986328079957448, "loss": 1.8738, "step": 18940 }, { "epoch": 2.2803850782190134, "grad_norm": 5.423501491546631, "learning_rate": 0.0001998630818649581, "loss": 1.6515, "step": 18950 }, { "epoch": 2.2815884476534296, "grad_norm": 3.999821186065674, "learning_rate": 0.0001998628827858152, "loss": 1.654, "step": 18960 }, { "epoch": 2.282791817087846, "grad_norm": 5.91510009765625, "learning_rate": 0.00019986268356214605, "loss": 1.8803, "step": 18970 }, { "epoch": 2.2839951865222625, "grad_norm": 5.5415215492248535, "learning_rate": 0.00019986248419395094, "loss": 1.8058, "step": 18980 }, { "epoch": 2.2851985559566788, "grad_norm": 3.26278018951416, "learning_rate": 0.00019986228468123017, "loss": 1.8605, "step": 18990 }, { "epoch": 2.286401925391095, "grad_norm": 5.6961894035339355, "learning_rate": 0.00019986208502398403, "loss": 1.8349, "step": 19000 }, { "epoch": 2.2876052948255117, "grad_norm": 5.6630730628967285, "learning_rate": 0.00019986188522221277, "loss": 1.8018, "step": 19010 }, { "epoch": 2.288808664259928, "grad_norm": 7.9914398193359375, "learning_rate": 0.0001998616852759167, "loss": 1.9708, "step": 19020 }, { "epoch": 2.290012033694344, "grad_norm": 4.706212997436523, "learning_rate": 0.00019986148518509614, "loss": 1.8582, "step": 19030 }, { "epoch": 2.2912154031287604, "grad_norm": 4.37938117980957, "learning_rate": 0.00019986128494975134, "loss": 2.2603, "step": 19040 }, { "epoch": 2.292418772563177, "grad_norm": 6.98267936706543, "learning_rate": 0.0001998610845698826, "loss": 1.9635, "step": 19050 }, { "epoch": 2.2936221419975933, "grad_norm": 3.565274715423584, "learning_rate": 0.00019986088404549027, "loss": 1.752, "step": 19060 }, { "epoch": 2.2948255114320095, "grad_norm": 6.4155192375183105, "learning_rate": 0.00019986068337657454, "loss": 1.8159, "step": 19070 }, { "epoch": 2.2960288808664258, "grad_norm": 4.892825603485107, "learning_rate": 0.00019986048256313575, "loss": 1.7489, "step": 19080 }, { "epoch": 2.2972322503008424, "grad_norm": 3.5658504962921143, "learning_rate": 0.00019986028160517418, "loss": 1.8876, "step": 19090 }, { "epoch": 2.2984356197352587, "grad_norm": 5.374711990356445, "learning_rate": 0.00019986008050269013, "loss": 1.8162, "step": 19100 }, { "epoch": 2.299638989169675, "grad_norm": 4.758020401000977, "learning_rate": 0.0001998598792556839, "loss": 1.5871, "step": 19110 }, { "epoch": 2.3008423586040916, "grad_norm": 6.21620512008667, "learning_rate": 0.00019985967786415577, "loss": 1.7226, "step": 19120 }, { "epoch": 2.302045728038508, "grad_norm": 5.469278812408447, "learning_rate": 0.000199859476328106, "loss": 2.0724, "step": 19130 }, { "epoch": 2.303249097472924, "grad_norm": 4.54677152633667, "learning_rate": 0.00019985927464753495, "loss": 1.9097, "step": 19140 }, { "epoch": 2.3044524669073407, "grad_norm": 5.128119468688965, "learning_rate": 0.00019985907282244289, "loss": 1.6518, "step": 19150 }, { "epoch": 2.305655836341757, "grad_norm": 4.618437767028809, "learning_rate": 0.00019985887085283008, "loss": 2.0012, "step": 19160 }, { "epoch": 2.306859205776173, "grad_norm": 6.141031742095947, "learning_rate": 0.00019985866873869682, "loss": 2.0247, "step": 19170 }, { "epoch": 2.30806257521059, "grad_norm": 4.4620537757873535, "learning_rate": 0.0001998584664800434, "loss": 2.0114, "step": 19180 }, { "epoch": 2.309265944645006, "grad_norm": 4.38834810256958, "learning_rate": 0.00019985826407687015, "loss": 2.009, "step": 19190 }, { "epoch": 2.3104693140794224, "grad_norm": 5.883998870849609, "learning_rate": 0.0001998580615291773, "loss": 1.6037, "step": 19200 }, { "epoch": 2.3116726835138386, "grad_norm": 4.275137424468994, "learning_rate": 0.00019985785883696523, "loss": 2.0258, "step": 19210 }, { "epoch": 2.3128760529482553, "grad_norm": 6.975653648376465, "learning_rate": 0.00019985765600023415, "loss": 1.6874, "step": 19220 }, { "epoch": 2.3140794223826715, "grad_norm": 4.13192081451416, "learning_rate": 0.00019985745301898442, "loss": 1.786, "step": 19230 }, { "epoch": 2.3152827918170877, "grad_norm": 4.088593006134033, "learning_rate": 0.00019985724989321626, "loss": 1.9076, "step": 19240 }, { "epoch": 2.316486161251504, "grad_norm": 6.3527092933654785, "learning_rate": 0.00019985704662293005, "loss": 1.8687, "step": 19250 }, { "epoch": 2.3176895306859207, "grad_norm": 5.279321670532227, "learning_rate": 0.000199856843208126, "loss": 1.9265, "step": 19260 }, { "epoch": 2.318892900120337, "grad_norm": 7.881455421447754, "learning_rate": 0.00019985663964880446, "loss": 1.876, "step": 19270 }, { "epoch": 2.320096269554753, "grad_norm": 4.470962047576904, "learning_rate": 0.00019985643594496572, "loss": 1.5564, "step": 19280 }, { "epoch": 2.32129963898917, "grad_norm": 3.5736684799194336, "learning_rate": 0.00019985623209661007, "loss": 1.6959, "step": 19290 }, { "epoch": 2.322503008423586, "grad_norm": 5.825920104980469, "learning_rate": 0.00019985602810373775, "loss": 1.9138, "step": 19300 }, { "epoch": 2.3237063778580023, "grad_norm": 4.798040390014648, "learning_rate": 0.00019985582396634915, "loss": 1.9029, "step": 19310 }, { "epoch": 2.324909747292419, "grad_norm": 6.569250583648682, "learning_rate": 0.0001998556196844445, "loss": 1.9013, "step": 19320 }, { "epoch": 2.326113116726835, "grad_norm": 5.732389450073242, "learning_rate": 0.00019985541525802412, "loss": 1.6371, "step": 19330 }, { "epoch": 2.3273164861612514, "grad_norm": 3.9813873767852783, "learning_rate": 0.0001998552106870883, "loss": 1.9835, "step": 19340 }, { "epoch": 2.328519855595668, "grad_norm": 6.8331804275512695, "learning_rate": 0.00019985500597163733, "loss": 2.1205, "step": 19350 }, { "epoch": 2.3297232250300843, "grad_norm": 4.132762908935547, "learning_rate": 0.0001998548011116715, "loss": 1.8935, "step": 19360 }, { "epoch": 2.3309265944645006, "grad_norm": 7.562819957733154, "learning_rate": 0.00019985459610719116, "loss": 1.8519, "step": 19370 }, { "epoch": 2.332129963898917, "grad_norm": 5.643316268920898, "learning_rate": 0.00019985439095819655, "loss": 1.9336, "step": 19380 }, { "epoch": 2.3333333333333335, "grad_norm": 3.6668975353240967, "learning_rate": 0.00019985418566468796, "loss": 1.8849, "step": 19390 }, { "epoch": 2.3345367027677497, "grad_norm": 6.986433029174805, "learning_rate": 0.00019985398022666573, "loss": 1.836, "step": 19400 }, { "epoch": 2.335740072202166, "grad_norm": 4.0480241775512695, "learning_rate": 0.00019985377464413014, "loss": 1.9559, "step": 19410 }, { "epoch": 2.336943441636582, "grad_norm": 6.787583351135254, "learning_rate": 0.00019985356891708147, "loss": 2.1796, "step": 19420 }, { "epoch": 2.338146811070999, "grad_norm": 5.128816604614258, "learning_rate": 0.00019985336304552008, "loss": 1.6992, "step": 19430 }, { "epoch": 2.339350180505415, "grad_norm": 3.483121871948242, "learning_rate": 0.00019985315702944617, "loss": 1.9576, "step": 19440 }, { "epoch": 2.3405535499398313, "grad_norm": 4.72206449508667, "learning_rate": 0.0001998529508688601, "loss": 2.0157, "step": 19450 }, { "epoch": 2.341756919374248, "grad_norm": 3.925147533416748, "learning_rate": 0.00019985274456376216, "loss": 1.7235, "step": 19460 }, { "epoch": 2.3429602888086642, "grad_norm": 7.588326930999756, "learning_rate": 0.00019985253811415264, "loss": 1.8368, "step": 19470 }, { "epoch": 2.3441636582430805, "grad_norm": 5.377657890319824, "learning_rate": 0.00019985233152003184, "loss": 1.9796, "step": 19480 }, { "epoch": 2.345367027677497, "grad_norm": 4.09199333190918, "learning_rate": 0.00019985212478140007, "loss": 1.8423, "step": 19490 }, { "epoch": 2.3465703971119134, "grad_norm": 5.174166679382324, "learning_rate": 0.00019985191789825764, "loss": 1.7323, "step": 19500 }, { "epoch": 2.3477737665463296, "grad_norm": 4.120990753173828, "learning_rate": 0.0001998517108706048, "loss": 1.8738, "step": 19510 }, { "epoch": 2.3489771359807463, "grad_norm": 7.727570533752441, "learning_rate": 0.0001998515036984419, "loss": 1.6302, "step": 19520 }, { "epoch": 2.3501805054151625, "grad_norm": 4.989604949951172, "learning_rate": 0.00019985129638176922, "loss": 1.8801, "step": 19530 }, { "epoch": 2.351383874849579, "grad_norm": 3.2973055839538574, "learning_rate": 0.0001998510889205871, "loss": 2.0017, "step": 19540 }, { "epoch": 2.352587244283995, "grad_norm": 5.632620334625244, "learning_rate": 0.00019985088131489574, "loss": 1.6783, "step": 19550 }, { "epoch": 2.3537906137184117, "grad_norm": 4.893600940704346, "learning_rate": 0.00019985067356469553, "loss": 1.7866, "step": 19560 }, { "epoch": 2.354993983152828, "grad_norm": 8.183561325073242, "learning_rate": 0.00019985046566998676, "loss": 1.9867, "step": 19570 }, { "epoch": 2.356197352587244, "grad_norm": 5.605899333953857, "learning_rate": 0.00019985025763076967, "loss": 1.9443, "step": 19580 }, { "epoch": 2.357400722021661, "grad_norm": 3.518667221069336, "learning_rate": 0.00019985004944704464, "loss": 1.8668, "step": 19590 }, { "epoch": 2.358604091456077, "grad_norm": 5.827317714691162, "learning_rate": 0.00019984984111881193, "loss": 1.9045, "step": 19600 }, { "epoch": 2.3598074608904933, "grad_norm": 4.613023281097412, "learning_rate": 0.00019984963264607183, "loss": 1.9533, "step": 19610 }, { "epoch": 2.3610108303249095, "grad_norm": 6.259252071380615, "learning_rate": 0.00019984942402882468, "loss": 1.8545, "step": 19620 }, { "epoch": 2.3622141997593262, "grad_norm": 6.066045761108398, "learning_rate": 0.00019984921526707073, "loss": 1.9019, "step": 19630 }, { "epoch": 2.3634175691937425, "grad_norm": 3.8455309867858887, "learning_rate": 0.00019984900636081036, "loss": 1.8717, "step": 19640 }, { "epoch": 2.3646209386281587, "grad_norm": 6.248837471008301, "learning_rate": 0.0001998487973100438, "loss": 1.7347, "step": 19650 }, { "epoch": 2.3658243080625754, "grad_norm": 4.124472618103027, "learning_rate": 0.00019984858811477138, "loss": 1.7833, "step": 19660 }, { "epoch": 2.3670276774969916, "grad_norm": 6.054342269897461, "learning_rate": 0.00019984837877499341, "loss": 1.8304, "step": 19670 }, { "epoch": 2.368231046931408, "grad_norm": 4.665483474731445, "learning_rate": 0.00019984816929071017, "loss": 1.965, "step": 19680 }, { "epoch": 2.3694344163658245, "grad_norm": 3.366262912750244, "learning_rate": 0.000199847959661922, "loss": 1.8008, "step": 19690 }, { "epoch": 2.3706377858002408, "grad_norm": 6.070033550262451, "learning_rate": 0.00019984774988862915, "loss": 1.727, "step": 19700 }, { "epoch": 2.371841155234657, "grad_norm": 3.947265148162842, "learning_rate": 0.00019984753997083198, "loss": 1.6893, "step": 19710 }, { "epoch": 2.3730445246690732, "grad_norm": 6.37141752243042, "learning_rate": 0.00019984732990853074, "loss": 1.85, "step": 19720 }, { "epoch": 2.37424789410349, "grad_norm": 4.367625713348389, "learning_rate": 0.0001998471197017258, "loss": 1.8522, "step": 19730 }, { "epoch": 2.375451263537906, "grad_norm": 3.7760021686553955, "learning_rate": 0.0001998469093504174, "loss": 1.9469, "step": 19740 }, { "epoch": 2.3766546329723224, "grad_norm": 6.074053764343262, "learning_rate": 0.00019984669885460588, "loss": 1.8709, "step": 19750 }, { "epoch": 2.377858002406739, "grad_norm": 4.137975692749023, "learning_rate": 0.00019984648821429153, "loss": 1.8361, "step": 19760 }, { "epoch": 2.3790613718411553, "grad_norm": 8.683006286621094, "learning_rate": 0.00019984627742947465, "loss": 2.0524, "step": 19770 }, { "epoch": 2.3802647412755715, "grad_norm": 4.150774002075195, "learning_rate": 0.00019984606650015559, "loss": 1.6457, "step": 19780 }, { "epoch": 2.3814681107099878, "grad_norm": 4.112648010253906, "learning_rate": 0.0001998458554263346, "loss": 1.7112, "step": 19790 }, { "epoch": 2.3826714801444044, "grad_norm": 6.109230041503906, "learning_rate": 0.000199845644208012, "loss": 2.1384, "step": 19800 }, { "epoch": 2.3838748495788207, "grad_norm": 4.682973384857178, "learning_rate": 0.0001998454328451881, "loss": 1.7411, "step": 19810 }, { "epoch": 2.385078219013237, "grad_norm": 7.387558460235596, "learning_rate": 0.00019984522133786322, "loss": 1.8546, "step": 19820 }, { "epoch": 2.3862815884476536, "grad_norm": 4.582014083862305, "learning_rate": 0.00019984500968603763, "loss": 1.6319, "step": 19830 }, { "epoch": 2.38748495788207, "grad_norm": 4.170936107635498, "learning_rate": 0.00019984479788971168, "loss": 1.8747, "step": 19840 }, { "epoch": 2.388688327316486, "grad_norm": 6.265957355499268, "learning_rate": 0.00019984458594888568, "loss": 1.7328, "step": 19850 }, { "epoch": 2.3898916967509027, "grad_norm": 4.465024471282959, "learning_rate": 0.00019984437386355987, "loss": 1.8335, "step": 19860 }, { "epoch": 2.391095066185319, "grad_norm": 7.820645332336426, "learning_rate": 0.00019984416163373463, "loss": 1.9983, "step": 19870 }, { "epoch": 2.392298435619735, "grad_norm": 4.321065902709961, "learning_rate": 0.0001998439492594102, "loss": 1.8749, "step": 19880 }, { "epoch": 2.3935018050541514, "grad_norm": 3.951925039291382, "learning_rate": 0.00019984373674058697, "loss": 1.9719, "step": 19890 }, { "epoch": 2.394705174488568, "grad_norm": 6.700965404510498, "learning_rate": 0.00019984352407726518, "loss": 1.9855, "step": 19900 }, { "epoch": 2.3959085439229844, "grad_norm": 4.175863742828369, "learning_rate": 0.00019984331126944515, "loss": 1.7449, "step": 19910 }, { "epoch": 2.3971119133574006, "grad_norm": 8.502548217773438, "learning_rate": 0.00019984309831712723, "loss": 1.894, "step": 19920 }, { "epoch": 2.3983152827918173, "grad_norm": 5.1616387367248535, "learning_rate": 0.00019984288522031167, "loss": 1.9793, "step": 19930 }, { "epoch": 2.3995186522262335, "grad_norm": 3.631253957748413, "learning_rate": 0.0001998426719789988, "loss": 2.0157, "step": 19940 }, { "epoch": 2.4007220216606497, "grad_norm": 6.3541035652160645, "learning_rate": 0.00019984245859318897, "loss": 1.9913, "step": 19950 }, { "epoch": 2.401925391095066, "grad_norm": 4.3100104331970215, "learning_rate": 0.0001998422450628824, "loss": 1.7548, "step": 19960 }, { "epoch": 2.4031287605294827, "grad_norm": 6.630500316619873, "learning_rate": 0.0001998420313880795, "loss": 2.2605, "step": 19970 }, { "epoch": 2.404332129963899, "grad_norm": 4.805111408233643, "learning_rate": 0.0001998418175687805, "loss": 1.9942, "step": 19980 }, { "epoch": 2.405535499398315, "grad_norm": 3.27608585357666, "learning_rate": 0.00019984160360498574, "loss": 1.8665, "step": 19990 }, { "epoch": 2.406738868832732, "grad_norm": 5.916595458984375, "learning_rate": 0.00019984138949669556, "loss": 1.8347, "step": 20000 }, { "epoch": 2.407942238267148, "grad_norm": 4.1156907081604, "learning_rate": 0.0001998411752439102, "loss": 1.8613, "step": 20010 }, { "epoch": 2.4091456077015643, "grad_norm": 6.155092239379883, "learning_rate": 0.00019984096084663003, "loss": 1.9602, "step": 20020 }, { "epoch": 2.410348977135981, "grad_norm": 5.840784072875977, "learning_rate": 0.00019984074630485533, "loss": 1.9026, "step": 20030 }, { "epoch": 2.411552346570397, "grad_norm": 3.539097309112549, "learning_rate": 0.00019984053161858643, "loss": 2.1181, "step": 20040 }, { "epoch": 2.4127557160048134, "grad_norm": 4.8869709968566895, "learning_rate": 0.00019984031678782363, "loss": 1.7118, "step": 20050 }, { "epoch": 2.4139590854392297, "grad_norm": 4.145012378692627, "learning_rate": 0.00019984010181256726, "loss": 1.8493, "step": 20060 }, { "epoch": 2.4151624548736463, "grad_norm": 7.2109456062316895, "learning_rate": 0.00019983988669281758, "loss": 1.9388, "step": 20070 }, { "epoch": 2.4163658243080626, "grad_norm": 5.20852518081665, "learning_rate": 0.00019983967142857492, "loss": 1.6899, "step": 20080 }, { "epoch": 2.417569193742479, "grad_norm": 3.464789628982544, "learning_rate": 0.00019983945601983964, "loss": 1.7821, "step": 20090 }, { "epoch": 2.4187725631768955, "grad_norm": 6.397153854370117, "learning_rate": 0.00019983924046661203, "loss": 1.8121, "step": 20100 }, { "epoch": 2.4199759326113117, "grad_norm": 4.136017322540283, "learning_rate": 0.00019983902476889236, "loss": 1.642, "step": 20110 }, { "epoch": 2.421179302045728, "grad_norm": 7.516378879547119, "learning_rate": 0.000199838808926681, "loss": 2.1426, "step": 20120 }, { "epoch": 2.422382671480144, "grad_norm": 5.808988571166992, "learning_rate": 0.0001998385929399782, "loss": 1.7826, "step": 20130 }, { "epoch": 2.423586040914561, "grad_norm": 3.586364507675171, "learning_rate": 0.00019983837680878432, "loss": 1.9311, "step": 20140 }, { "epoch": 2.424789410348977, "grad_norm": 6.14862060546875, "learning_rate": 0.00019983816053309965, "loss": 1.7535, "step": 20150 }, { "epoch": 2.4259927797833933, "grad_norm": 4.461113452911377, "learning_rate": 0.00019983794411292452, "loss": 1.6474, "step": 20160 }, { "epoch": 2.42719614921781, "grad_norm": 7.186124324798584, "learning_rate": 0.00019983772754825923, "loss": 1.8949, "step": 20170 }, { "epoch": 2.4283995186522263, "grad_norm": 4.948721885681152, "learning_rate": 0.00019983751083910414, "loss": 1.7561, "step": 20180 }, { "epoch": 2.4296028880866425, "grad_norm": 3.0712690353393555, "learning_rate": 0.0001998372939854595, "loss": 1.9438, "step": 20190 }, { "epoch": 2.430806257521059, "grad_norm": 5.280097484588623, "learning_rate": 0.0001998370769873256, "loss": 1.9033, "step": 20200 }, { "epoch": 2.4320096269554754, "grad_norm": 4.612265586853027, "learning_rate": 0.00019983685984470287, "loss": 1.8435, "step": 20210 }, { "epoch": 2.4332129963898916, "grad_norm": 7.592869758605957, "learning_rate": 0.0001998366425575915, "loss": 2.0325, "step": 20220 }, { "epoch": 2.434416365824308, "grad_norm": 4.777297496795654, "learning_rate": 0.00019983642512599187, "loss": 1.8841, "step": 20230 }, { "epoch": 2.4356197352587245, "grad_norm": 3.448274850845337, "learning_rate": 0.00019983620754990432, "loss": 1.9001, "step": 20240 }, { "epoch": 2.436823104693141, "grad_norm": 4.815512657165527, "learning_rate": 0.0001998359898293291, "loss": 1.9369, "step": 20250 }, { "epoch": 2.438026474127557, "grad_norm": 4.012821674346924, "learning_rate": 0.00019983577196426656, "loss": 1.9488, "step": 20260 }, { "epoch": 2.4392298435619737, "grad_norm": 7.183079242706299, "learning_rate": 0.00019983555395471702, "loss": 1.8742, "step": 20270 }, { "epoch": 2.44043321299639, "grad_norm": 6.024168968200684, "learning_rate": 0.00019983533580068076, "loss": 1.6017, "step": 20280 }, { "epoch": 2.441636582430806, "grad_norm": 3.5001578330993652, "learning_rate": 0.00019983511750215814, "loss": 1.8542, "step": 20290 }, { "epoch": 2.4428399518652224, "grad_norm": 6.278378963470459, "learning_rate": 0.00019983489905914945, "loss": 1.6961, "step": 20300 }, { "epoch": 2.444043321299639, "grad_norm": 4.57430362701416, "learning_rate": 0.00019983468047165501, "loss": 1.8088, "step": 20310 }, { "epoch": 2.4452466907340553, "grad_norm": 7.231283187866211, "learning_rate": 0.00019983446173967514, "loss": 2.0128, "step": 20320 }, { "epoch": 2.4464500601684716, "grad_norm": 4.400961875915527, "learning_rate": 0.00019983424286321014, "loss": 1.8736, "step": 20330 }, { "epoch": 2.4476534296028882, "grad_norm": 3.3811419010162354, "learning_rate": 0.00019983402384226039, "loss": 1.7218, "step": 20340 }, { "epoch": 2.4488567990373045, "grad_norm": 5.952632427215576, "learning_rate": 0.00019983380467682613, "loss": 1.7915, "step": 20350 }, { "epoch": 2.4500601684717207, "grad_norm": 4.2375288009643555, "learning_rate": 0.0001998335853669077, "loss": 1.799, "step": 20360 }, { "epoch": 2.4512635379061374, "grad_norm": 7.192270278930664, "learning_rate": 0.00019983336591250544, "loss": 1.9928, "step": 20370 }, { "epoch": 2.4524669073405536, "grad_norm": 4.667410373687744, "learning_rate": 0.00019983314631361965, "loss": 1.7461, "step": 20380 }, { "epoch": 2.45367027677497, "grad_norm": 3.813703775405884, "learning_rate": 0.00019983292657025064, "loss": 1.8673, "step": 20390 }, { "epoch": 2.4548736462093865, "grad_norm": 5.364323616027832, "learning_rate": 0.0001998327066823987, "loss": 1.7289, "step": 20400 }, { "epoch": 2.4560770156438028, "grad_norm": 4.271958827972412, "learning_rate": 0.00019983248665006423, "loss": 1.9626, "step": 20410 }, { "epoch": 2.457280385078219, "grad_norm": 5.91215181350708, "learning_rate": 0.00019983226647324751, "loss": 1.8125, "step": 20420 }, { "epoch": 2.4584837545126352, "grad_norm": 5.356571674346924, "learning_rate": 0.00019983204615194885, "loss": 1.7691, "step": 20430 }, { "epoch": 2.459687123947052, "grad_norm": 3.500491142272949, "learning_rate": 0.00019983182568616856, "loss": 1.8946, "step": 20440 }, { "epoch": 2.460890493381468, "grad_norm": 5.471607685089111, "learning_rate": 0.00019983160507590697, "loss": 1.7181, "step": 20450 }, { "epoch": 2.4620938628158844, "grad_norm": 6.24301815032959, "learning_rate": 0.0001998313843211644, "loss": 1.7694, "step": 20460 }, { "epoch": 2.4632972322503006, "grad_norm": 6.391057968139648, "learning_rate": 0.00019983116342194116, "loss": 2.1287, "step": 20470 }, { "epoch": 2.4645006016847173, "grad_norm": 5.084754467010498, "learning_rate": 0.00019983094237823758, "loss": 1.5653, "step": 20480 }, { "epoch": 2.4657039711191335, "grad_norm": 4.073486804962158, "learning_rate": 0.00019983072119005398, "loss": 1.944, "step": 20490 }, { "epoch": 2.4669073405535498, "grad_norm": 5.489533424377441, "learning_rate": 0.00019983049985739074, "loss": 1.8211, "step": 20500 }, { "epoch": 2.4681107099879664, "grad_norm": 5.330563545227051, "learning_rate": 0.00019983027838024807, "loss": 2.0054, "step": 20510 }, { "epoch": 2.4693140794223827, "grad_norm": 6.520805358886719, "learning_rate": 0.00019983005675862633, "loss": 1.8473, "step": 20520 }, { "epoch": 2.470517448856799, "grad_norm": 5.546237468719482, "learning_rate": 0.00019982983499252583, "loss": 2.0259, "step": 20530 }, { "epoch": 2.4717208182912156, "grad_norm": 4.316344738006592, "learning_rate": 0.00019982961308194695, "loss": 1.7913, "step": 20540 }, { "epoch": 2.472924187725632, "grad_norm": 5.053488731384277, "learning_rate": 0.00019982939102688999, "loss": 1.7793, "step": 20550 }, { "epoch": 2.474127557160048, "grad_norm": 4.186037063598633, "learning_rate": 0.0001998291688273552, "loss": 1.9729, "step": 20560 }, { "epoch": 2.4753309265944647, "grad_norm": 6.9018635749816895, "learning_rate": 0.000199828946483343, "loss": 1.6633, "step": 20570 }, { "epoch": 2.476534296028881, "grad_norm": 5.570452690124512, "learning_rate": 0.0001998287239948536, "loss": 1.6677, "step": 20580 }, { "epoch": 2.477737665463297, "grad_norm": 3.0675888061523438, "learning_rate": 0.00019982850136188746, "loss": 2.1564, "step": 20590 }, { "epoch": 2.4789410348977134, "grad_norm": 5.23740816116333, "learning_rate": 0.0001998282785844448, "loss": 1.8553, "step": 20600 }, { "epoch": 2.48014440433213, "grad_norm": 4.496595859527588, "learning_rate": 0.000199828055662526, "loss": 1.6143, "step": 20610 }, { "epoch": 2.4813477737665464, "grad_norm": 6.116359710693359, "learning_rate": 0.00019982783259613136, "loss": 2.0443, "step": 20620 }, { "epoch": 2.4825511432009626, "grad_norm": 4.956220626831055, "learning_rate": 0.00019982760938526116, "loss": 1.7014, "step": 20630 }, { "epoch": 2.483754512635379, "grad_norm": 3.1296913623809814, "learning_rate": 0.0001998273860299158, "loss": 1.7959, "step": 20640 }, { "epoch": 2.4849578820697955, "grad_norm": 5.865634441375732, "learning_rate": 0.00019982716253009555, "loss": 1.874, "step": 20650 }, { "epoch": 2.4861612515042117, "grad_norm": 4.335112571716309, "learning_rate": 0.00019982693888580074, "loss": 1.9807, "step": 20660 }, { "epoch": 2.487364620938628, "grad_norm": 8.73818302154541, "learning_rate": 0.0001998267150970317, "loss": 1.8346, "step": 20670 }, { "epoch": 2.4885679903730447, "grad_norm": 4.9280171394348145, "learning_rate": 0.00019982649116378877, "loss": 1.8952, "step": 20680 }, { "epoch": 2.489771359807461, "grad_norm": 3.2900710105895996, "learning_rate": 0.00019982626708607226, "loss": 1.8782, "step": 20690 }, { "epoch": 2.490974729241877, "grad_norm": 5.722655773162842, "learning_rate": 0.0001998260428638825, "loss": 1.883, "step": 20700 }, { "epoch": 2.492178098676294, "grad_norm": 4.088940620422363, "learning_rate": 0.0001998258184972198, "loss": 1.8307, "step": 20710 }, { "epoch": 2.49338146811071, "grad_norm": 6.719072341918945, "learning_rate": 0.0001998255939860845, "loss": 1.901, "step": 20720 }, { "epoch": 2.4945848375451263, "grad_norm": 5.0046868324279785, "learning_rate": 0.00019982536933047692, "loss": 1.9503, "step": 20730 }, { "epoch": 2.495788206979543, "grad_norm": 3.331597328186035, "learning_rate": 0.0001998251445303974, "loss": 1.6925, "step": 20740 }, { "epoch": 2.496991576413959, "grad_norm": 6.52798318862915, "learning_rate": 0.00019982491958584625, "loss": 1.7648, "step": 20750 }, { "epoch": 2.4981949458483754, "grad_norm": 4.069162845611572, "learning_rate": 0.00019982469449682374, "loss": 1.918, "step": 20760 }, { "epoch": 2.4993983152827917, "grad_norm": 6.859929084777832, "learning_rate": 0.0001998244692633303, "loss": 2.1157, "step": 20770 }, { "epoch": 2.5006016847172083, "grad_norm": 5.4571075439453125, "learning_rate": 0.00019982424388536617, "loss": 1.7556, "step": 20780 }, { "epoch": 2.5018050541516246, "grad_norm": 2.8539035320281982, "learning_rate": 0.00019982401836293176, "loss": 1.8848, "step": 20790 }, { "epoch": 2.503008423586041, "grad_norm": 5.45432710647583, "learning_rate": 0.00019982379269602732, "loss": 1.8332, "step": 20800 }, { "epoch": 2.504211793020457, "grad_norm": 4.622279644012451, "learning_rate": 0.00019982356688465322, "loss": 1.9187, "step": 20810 }, { "epoch": 2.5054151624548737, "grad_norm": 6.064669609069824, "learning_rate": 0.00019982334092880977, "loss": 1.9692, "step": 20820 }, { "epoch": 2.50661853188929, "grad_norm": 5.854981422424316, "learning_rate": 0.00019982311482849728, "loss": 1.7879, "step": 20830 }, { "epoch": 2.507821901323706, "grad_norm": 3.0214333534240723, "learning_rate": 0.00019982288858371613, "loss": 1.9418, "step": 20840 }, { "epoch": 2.509025270758123, "grad_norm": 7.712893009185791, "learning_rate": 0.0001998226621944666, "loss": 1.6371, "step": 20850 }, { "epoch": 2.510228640192539, "grad_norm": 4.892581462860107, "learning_rate": 0.00019982243566074904, "loss": 1.8865, "step": 20860 }, { "epoch": 2.5114320096269553, "grad_norm": 8.19919204711914, "learning_rate": 0.00019982220898256378, "loss": 1.7838, "step": 20870 }, { "epoch": 2.512635379061372, "grad_norm": 5.1628098487854, "learning_rate": 0.00019982198215991114, "loss": 1.932, "step": 20880 }, { "epoch": 2.5138387484957883, "grad_norm": 4.552578926086426, "learning_rate": 0.0001998217551927914, "loss": 1.7088, "step": 20890 }, { "epoch": 2.5150421179302045, "grad_norm": 5.526708602905273, "learning_rate": 0.00019982152808120497, "loss": 1.8121, "step": 20900 }, { "epoch": 2.516245487364621, "grad_norm": 4.256577968597412, "learning_rate": 0.00019982130082515213, "loss": 1.7975, "step": 20910 }, { "epoch": 2.5174488567990374, "grad_norm": 7.442561149597168, "learning_rate": 0.00019982107342463322, "loss": 1.9656, "step": 20920 }, { "epoch": 2.5186522262334536, "grad_norm": 4.96824836730957, "learning_rate": 0.00019982084587964857, "loss": 1.6666, "step": 20930 }, { "epoch": 2.51985559566787, "grad_norm": 3.7357962131500244, "learning_rate": 0.00019982061819019852, "loss": 1.9951, "step": 20940 }, { "epoch": 2.5210589651022866, "grad_norm": 5.7628865242004395, "learning_rate": 0.00019982039035628342, "loss": 1.7586, "step": 20950 }, { "epoch": 2.522262334536703, "grad_norm": 4.2057695388793945, "learning_rate": 0.00019982016237790354, "loss": 2.0508, "step": 20960 }, { "epoch": 2.523465703971119, "grad_norm": 6.62906551361084, "learning_rate": 0.00019981993425505925, "loss": 1.8817, "step": 20970 }, { "epoch": 2.5246690734055353, "grad_norm": 4.822028636932373, "learning_rate": 0.00019981970598775085, "loss": 1.9177, "step": 20980 }, { "epoch": 2.525872442839952, "grad_norm": 3.578030586242676, "learning_rate": 0.0001998194775759787, "loss": 1.9915, "step": 20990 }, { "epoch": 2.527075812274368, "grad_norm": 6.2930498123168945, "learning_rate": 0.00019981924901974315, "loss": 1.9624, "step": 21000 }, { "epoch": 2.5282791817087844, "grad_norm": 3.8709285259246826, "learning_rate": 0.00019981902031904447, "loss": 1.8362, "step": 21010 }, { "epoch": 2.529482551143201, "grad_norm": 7.663669109344482, "learning_rate": 0.00019981879147388303, "loss": 1.9585, "step": 21020 }, { "epoch": 2.5306859205776173, "grad_norm": 4.16254997253418, "learning_rate": 0.00019981856248425914, "loss": 1.5374, "step": 21030 }, { "epoch": 2.5318892900120336, "grad_norm": 4.396316051483154, "learning_rate": 0.00019981833335017317, "loss": 2.0933, "step": 21040 }, { "epoch": 2.5330926594464502, "grad_norm": 5.005481719970703, "learning_rate": 0.0001998181040716254, "loss": 1.894, "step": 21050 }, { "epoch": 2.5342960288808665, "grad_norm": 4.025967121124268, "learning_rate": 0.00019981787464861623, "loss": 1.817, "step": 21060 }, { "epoch": 2.5354993983152827, "grad_norm": 6.891594886779785, "learning_rate": 0.00019981764508114595, "loss": 1.7859, "step": 21070 }, { "epoch": 2.5367027677496994, "grad_norm": 6.6164374351501465, "learning_rate": 0.00019981741536921485, "loss": 1.8623, "step": 21080 }, { "epoch": 2.5379061371841156, "grad_norm": 3.5325465202331543, "learning_rate": 0.00019981718551282334, "loss": 1.8581, "step": 21090 }, { "epoch": 2.539109506618532, "grad_norm": 5.685319423675537, "learning_rate": 0.00019981695551197167, "loss": 1.7876, "step": 21100 }, { "epoch": 2.5403128760529485, "grad_norm": 3.816117286682129, "learning_rate": 0.0001998167253666603, "loss": 1.9363, "step": 21110 }, { "epoch": 2.5415162454873648, "grad_norm": 6.195153713226318, "learning_rate": 0.0001998164950768894, "loss": 1.9416, "step": 21120 }, { "epoch": 2.542719614921781, "grad_norm": 4.386119365692139, "learning_rate": 0.00019981626464265944, "loss": 1.7258, "step": 21130 }, { "epoch": 2.5439229843561972, "grad_norm": 3.807783603668213, "learning_rate": 0.0001998160340639707, "loss": 2.0765, "step": 21140 }, { "epoch": 2.5451263537906135, "grad_norm": 6.045656204223633, "learning_rate": 0.0001998158033408235, "loss": 2.1099, "step": 21150 }, { "epoch": 2.54632972322503, "grad_norm": 4.081486701965332, "learning_rate": 0.00019981557247321821, "loss": 1.7787, "step": 21160 }, { "epoch": 2.5475330926594464, "grad_norm": 7.350472927093506, "learning_rate": 0.00019981534146115514, "loss": 2.0342, "step": 21170 }, { "epoch": 2.5487364620938626, "grad_norm": 4.729325294494629, "learning_rate": 0.0001998151103046346, "loss": 1.8585, "step": 21180 }, { "epoch": 2.5499398315282793, "grad_norm": 4.003896713256836, "learning_rate": 0.00019981487900365695, "loss": 2.1392, "step": 21190 }, { "epoch": 2.5511432009626955, "grad_norm": 5.162550449371338, "learning_rate": 0.00019981464755822255, "loss": 1.7903, "step": 21200 }, { "epoch": 2.5523465703971118, "grad_norm": 5.016550064086914, "learning_rate": 0.00019981441596833173, "loss": 1.7447, "step": 21210 }, { "epoch": 2.5535499398315284, "grad_norm": 6.916841983795166, "learning_rate": 0.00019981418423398477, "loss": 1.8037, "step": 21220 }, { "epoch": 2.5547533092659447, "grad_norm": 4.650218486785889, "learning_rate": 0.00019981395235518207, "loss": 1.8007, "step": 21230 }, { "epoch": 2.555956678700361, "grad_norm": 3.214818000793457, "learning_rate": 0.0001998137203319239, "loss": 1.8741, "step": 21240 }, { "epoch": 2.5571600481347776, "grad_norm": 5.459062576293945, "learning_rate": 0.00019981348816421065, "loss": 1.7321, "step": 21250 }, { "epoch": 2.558363417569194, "grad_norm": 3.7758872509002686, "learning_rate": 0.00019981325585204264, "loss": 1.7992, "step": 21260 }, { "epoch": 2.55956678700361, "grad_norm": 7.222395896911621, "learning_rate": 0.0001998130233954202, "loss": 1.6814, "step": 21270 }, { "epoch": 2.5607701564380267, "grad_norm": 4.6296515464782715, "learning_rate": 0.00019981279079434372, "loss": 1.8424, "step": 21280 }, { "epoch": 2.561973525872443, "grad_norm": 3.446894407272339, "learning_rate": 0.00019981255804881343, "loss": 1.9884, "step": 21290 }, { "epoch": 2.563176895306859, "grad_norm": 5.880203723907471, "learning_rate": 0.00019981232515882974, "loss": 1.793, "step": 21300 }, { "epoch": 2.5643802647412755, "grad_norm": 4.337691783905029, "learning_rate": 0.00019981209212439297, "loss": 1.7987, "step": 21310 }, { "epoch": 2.5655836341756917, "grad_norm": 5.445236682891846, "learning_rate": 0.0001998118589455035, "loss": 1.8639, "step": 21320 }, { "epoch": 2.5667870036101084, "grad_norm": 5.637457847595215, "learning_rate": 0.0001998116256221616, "loss": 1.7117, "step": 21330 }, { "epoch": 2.5679903730445246, "grad_norm": 4.634398460388184, "learning_rate": 0.00019981139215436762, "loss": 1.9762, "step": 21340 }, { "epoch": 2.569193742478941, "grad_norm": 6.838907241821289, "learning_rate": 0.00019981115854212194, "loss": 2.0023, "step": 21350 }, { "epoch": 2.5703971119133575, "grad_norm": 4.272093296051025, "learning_rate": 0.00019981092478542483, "loss": 1.8791, "step": 21360 }, { "epoch": 2.5716004813477737, "grad_norm": 5.1662750244140625, "learning_rate": 0.0001998106908842767, "loss": 1.815, "step": 21370 }, { "epoch": 2.57280385078219, "grad_norm": 4.997259616851807, "learning_rate": 0.00019981045683867786, "loss": 1.7007, "step": 21380 }, { "epoch": 2.5740072202166067, "grad_norm": 3.608267068862915, "learning_rate": 0.00019981022264862862, "loss": 1.9887, "step": 21390 }, { "epoch": 2.575210589651023, "grad_norm": 5.867269515991211, "learning_rate": 0.00019980998831412939, "loss": 1.825, "step": 21400 }, { "epoch": 2.576413959085439, "grad_norm": 4.047500133514404, "learning_rate": 0.0001998097538351804, "loss": 1.7734, "step": 21410 }, { "epoch": 2.577617328519856, "grad_norm": 6.287036418914795, "learning_rate": 0.0001998095192117821, "loss": 1.7597, "step": 21420 }, { "epoch": 2.578820697954272, "grad_norm": 5.959181308746338, "learning_rate": 0.0001998092844439348, "loss": 2.1468, "step": 21430 }, { "epoch": 2.5800240673886883, "grad_norm": 3.160041332244873, "learning_rate": 0.00019980904953163878, "loss": 1.7567, "step": 21440 }, { "epoch": 2.581227436823105, "grad_norm": 5.435869216918945, "learning_rate": 0.00019980881447489445, "loss": 1.9945, "step": 21450 }, { "epoch": 2.582430806257521, "grad_norm": 3.9015581607818604, "learning_rate": 0.0001998085792737021, "loss": 1.7607, "step": 21460 }, { "epoch": 2.5836341756919374, "grad_norm": 6.834968090057373, "learning_rate": 0.00019980834392806213, "loss": 1.8431, "step": 21470 }, { "epoch": 2.5848375451263537, "grad_norm": 5.225518226623535, "learning_rate": 0.00019980810843797485, "loss": 1.7017, "step": 21480 }, { "epoch": 2.58604091456077, "grad_norm": 4.1456379890441895, "learning_rate": 0.00019980787280344056, "loss": 1.8974, "step": 21490 }, { "epoch": 2.5872442839951866, "grad_norm": 5.812695026397705, "learning_rate": 0.00019980763702445964, "loss": 1.9485, "step": 21500 }, { "epoch": 2.588447653429603, "grad_norm": 4.893465995788574, "learning_rate": 0.0001998074011010324, "loss": 1.8701, "step": 21510 }, { "epoch": 2.589651022864019, "grad_norm": 6.693197250366211, "learning_rate": 0.0001998071650331593, "loss": 1.8423, "step": 21520 }, { "epoch": 2.5908543922984357, "grad_norm": 4.072821140289307, "learning_rate": 0.00019980692882084052, "loss": 1.6027, "step": 21530 }, { "epoch": 2.592057761732852, "grad_norm": 3.2784483432769775, "learning_rate": 0.0001998066924640765, "loss": 1.7356, "step": 21540 }, { "epoch": 2.593261131167268, "grad_norm": 5.643657207489014, "learning_rate": 0.00019980645596286752, "loss": 1.7846, "step": 21550 }, { "epoch": 2.594464500601685, "grad_norm": 4.14614725112915, "learning_rate": 0.00019980621931721398, "loss": 2.0322, "step": 21560 }, { "epoch": 2.595667870036101, "grad_norm": 6.763193130493164, "learning_rate": 0.0001998059825271162, "loss": 1.7348, "step": 21570 }, { "epoch": 2.5968712394705173, "grad_norm": 4.553989410400391, "learning_rate": 0.0001998057455925745, "loss": 1.7151, "step": 21580 }, { "epoch": 2.598074608904934, "grad_norm": 3.643972158432007, "learning_rate": 0.00019980550851358928, "loss": 1.8768, "step": 21590 }, { "epoch": 2.5992779783393503, "grad_norm": 6.013065338134766, "learning_rate": 0.00019980527129016083, "loss": 1.6403, "step": 21600 }, { "epoch": 2.6004813477737665, "grad_norm": 4.082161903381348, "learning_rate": 0.0001998050339222895, "loss": 1.7879, "step": 21610 }, { "epoch": 2.601684717208183, "grad_norm": 6.888028621673584, "learning_rate": 0.00019980479640997566, "loss": 1.8847, "step": 21620 }, { "epoch": 2.6028880866425994, "grad_norm": 4.781867504119873, "learning_rate": 0.00019980455875321965, "loss": 1.9966, "step": 21630 }, { "epoch": 2.6040914560770156, "grad_norm": 3.677938938140869, "learning_rate": 0.00019980432095202178, "loss": 1.8977, "step": 21640 }, { "epoch": 2.605294825511432, "grad_norm": 6.58203649520874, "learning_rate": 0.00019980408300638243, "loss": 1.8308, "step": 21650 }, { "epoch": 2.606498194945848, "grad_norm": 4.445764541625977, "learning_rate": 0.00019980384491630193, "loss": 2.0844, "step": 21660 }, { "epoch": 2.607701564380265, "grad_norm": 7.188562393188477, "learning_rate": 0.00019980360668178063, "loss": 1.7353, "step": 21670 }, { "epoch": 2.608904933814681, "grad_norm": 6.179540157318115, "learning_rate": 0.00019980336830281885, "loss": 1.9086, "step": 21680 }, { "epoch": 2.6101083032490973, "grad_norm": 3.520310401916504, "learning_rate": 0.00019980312977941697, "loss": 1.8717, "step": 21690 }, { "epoch": 2.611311672683514, "grad_norm": 6.002782344818115, "learning_rate": 0.00019980289111157533, "loss": 1.8961, "step": 21700 }, { "epoch": 2.61251504211793, "grad_norm": 4.381802082061768, "learning_rate": 0.00019980265229929426, "loss": 1.9205, "step": 21710 }, { "epoch": 2.6137184115523464, "grad_norm": 6.384566307067871, "learning_rate": 0.00019980241334257407, "loss": 1.7532, "step": 21720 }, { "epoch": 2.614921780986763, "grad_norm": 5.342832565307617, "learning_rate": 0.0001998021742414152, "loss": 1.9215, "step": 21730 }, { "epoch": 2.6161251504211793, "grad_norm": 3.5546505451202393, "learning_rate": 0.00019980193499581793, "loss": 1.9366, "step": 21740 }, { "epoch": 2.6173285198555956, "grad_norm": 6.061792850494385, "learning_rate": 0.0001998016956057826, "loss": 1.836, "step": 21750 }, { "epoch": 2.6185318892900122, "grad_norm": 3.879345655441284, "learning_rate": 0.0001998014560713096, "loss": 1.7855, "step": 21760 }, { "epoch": 2.6197352587244285, "grad_norm": 6.151535987854004, "learning_rate": 0.00019980121639239927, "loss": 1.8757, "step": 21770 }, { "epoch": 2.6209386281588447, "grad_norm": 5.681988716125488, "learning_rate": 0.0001998009765690519, "loss": 1.8547, "step": 21780 }, { "epoch": 2.6221419975932614, "grad_norm": 3.828183650970459, "learning_rate": 0.00019980073660126791, "loss": 1.8725, "step": 21790 }, { "epoch": 2.6233453670276776, "grad_norm": 5.166325569152832, "learning_rate": 0.0001998004964890476, "loss": 1.7821, "step": 21800 }, { "epoch": 2.624548736462094, "grad_norm": 5.182612419128418, "learning_rate": 0.00019980025623239132, "loss": 2.1479, "step": 21810 }, { "epoch": 2.62575210589651, "grad_norm": 6.3542094230651855, "learning_rate": 0.00019980001583129944, "loss": 1.5588, "step": 21820 }, { "epoch": 2.6269554753309263, "grad_norm": 5.207058429718018, "learning_rate": 0.00019979977528577228, "loss": 1.8539, "step": 21830 }, { "epoch": 2.628158844765343, "grad_norm": 3.4649484157562256, "learning_rate": 0.00019979953459581024, "loss": 1.7373, "step": 21840 }, { "epoch": 2.6293622141997592, "grad_norm": 5.4307780265808105, "learning_rate": 0.0001997992937614136, "loss": 1.8633, "step": 21850 }, { "epoch": 2.6305655836341755, "grad_norm": 3.1341311931610107, "learning_rate": 0.00019979905278258276, "loss": 1.8146, "step": 21860 }, { "epoch": 2.631768953068592, "grad_norm": 6.513323783874512, "learning_rate": 0.00019979881165931804, "loss": 1.9327, "step": 21870 }, { "epoch": 2.6329723225030084, "grad_norm": 4.858741283416748, "learning_rate": 0.00019979857039161981, "loss": 1.9581, "step": 21880 }, { "epoch": 2.6341756919374246, "grad_norm": 3.1470422744750977, "learning_rate": 0.00019979832897948843, "loss": 1.8119, "step": 21890 }, { "epoch": 2.6353790613718413, "grad_norm": 5.330724239349365, "learning_rate": 0.00019979808742292418, "loss": 1.7647, "step": 21900 }, { "epoch": 2.6365824308062575, "grad_norm": 3.593313217163086, "learning_rate": 0.00019979784572192748, "loss": 1.93, "step": 21910 }, { "epoch": 2.6377858002406738, "grad_norm": 7.264196395874023, "learning_rate": 0.00019979760387649868, "loss": 1.9577, "step": 21920 }, { "epoch": 2.6389891696750905, "grad_norm": 4.571249961853027, "learning_rate": 0.0001997973618866381, "loss": 1.8587, "step": 21930 }, { "epoch": 2.6401925391095067, "grad_norm": 4.024495601654053, "learning_rate": 0.00019979711975234607, "loss": 1.9326, "step": 21940 }, { "epoch": 2.641395908543923, "grad_norm": 6.552509307861328, "learning_rate": 0.000199796877473623, "loss": 1.8718, "step": 21950 }, { "epoch": 2.6425992779783396, "grad_norm": 4.4985527992248535, "learning_rate": 0.00019979663505046918, "loss": 1.8838, "step": 21960 }, { "epoch": 2.643802647412756, "grad_norm": 7.27703332901001, "learning_rate": 0.000199796392482885, "loss": 1.8949, "step": 21970 }, { "epoch": 2.645006016847172, "grad_norm": 3.9912779331207275, "learning_rate": 0.00019979614977087083, "loss": 1.93, "step": 21980 }, { "epoch": 2.6462093862815883, "grad_norm": 4.768033504486084, "learning_rate": 0.00019979590691442695, "loss": 1.7158, "step": 21990 }, { "epoch": 2.6474127557160045, "grad_norm": 7.420052528381348, "learning_rate": 0.00019979566391355378, "loss": 1.7915, "step": 22000 }, { "epoch": 2.648616125150421, "grad_norm": 4.023570537567139, "learning_rate": 0.00019979542076825163, "loss": 1.9327, "step": 22010 }, { "epoch": 2.6498194945848375, "grad_norm": 7.8548197746276855, "learning_rate": 0.00019979517747852087, "loss": 1.82, "step": 22020 }, { "epoch": 2.6510228640192537, "grad_norm": 5.435407638549805, "learning_rate": 0.00019979493404436187, "loss": 1.9069, "step": 22030 }, { "epoch": 2.6522262334536704, "grad_norm": 3.096574544906616, "learning_rate": 0.00019979469046577496, "loss": 1.967, "step": 22040 }, { "epoch": 2.6534296028880866, "grad_norm": 5.799994468688965, "learning_rate": 0.00019979444674276048, "loss": 1.8177, "step": 22050 }, { "epoch": 2.654632972322503, "grad_norm": 4.348621368408203, "learning_rate": 0.00019979420287531878, "loss": 1.7023, "step": 22060 }, { "epoch": 2.6558363417569195, "grad_norm": 6.873908996582031, "learning_rate": 0.00019979395886345026, "loss": 1.9659, "step": 22070 }, { "epoch": 2.6570397111913358, "grad_norm": 4.480973243713379, "learning_rate": 0.00019979371470715522, "loss": 1.7301, "step": 22080 }, { "epoch": 2.658243080625752, "grad_norm": 3.9883265495300293, "learning_rate": 0.00019979347040643405, "loss": 1.9525, "step": 22090 }, { "epoch": 2.6594464500601687, "grad_norm": 5.971280574798584, "learning_rate": 0.0001997932259612871, "loss": 1.7962, "step": 22100 }, { "epoch": 2.660649819494585, "grad_norm": 4.024284839630127, "learning_rate": 0.0001997929813717147, "loss": 1.6596, "step": 22110 }, { "epoch": 2.661853188929001, "grad_norm": 6.47664213180542, "learning_rate": 0.0001997927366377172, "loss": 1.7912, "step": 22120 }, { "epoch": 2.663056558363418, "grad_norm": 4.878775119781494, "learning_rate": 0.000199792491759295, "loss": 2.0033, "step": 22130 }, { "epoch": 2.664259927797834, "grad_norm": 3.3809902667999268, "learning_rate": 0.00019979224673644843, "loss": 1.9003, "step": 22140 }, { "epoch": 2.6654632972322503, "grad_norm": 5.8103346824646, "learning_rate": 0.00019979200156917783, "loss": 1.8983, "step": 22150 }, { "epoch": 2.6666666666666665, "grad_norm": 4.957248687744141, "learning_rate": 0.00019979175625748356, "loss": 1.9426, "step": 22160 }, { "epoch": 2.667870036101083, "grad_norm": 7.3740105628967285, "learning_rate": 0.00019979151080136597, "loss": 1.7852, "step": 22170 }, { "epoch": 2.6690734055354994, "grad_norm": 5.305781364440918, "learning_rate": 0.00019979126520082544, "loss": 1.967, "step": 22180 }, { "epoch": 2.6702767749699157, "grad_norm": 3.9674527645111084, "learning_rate": 0.00019979101945586235, "loss": 1.9288, "step": 22190 }, { "epoch": 2.671480144404332, "grad_norm": 5.577676773071289, "learning_rate": 0.00019979077356647697, "loss": 1.8592, "step": 22200 }, { "epoch": 2.6726835138387486, "grad_norm": 4.325507640838623, "learning_rate": 0.0001997905275326697, "loss": 1.6857, "step": 22210 }, { "epoch": 2.673886883273165, "grad_norm": 8.045052528381348, "learning_rate": 0.00019979028135444092, "loss": 1.9639, "step": 22220 }, { "epoch": 2.675090252707581, "grad_norm": 5.293313026428223, "learning_rate": 0.00019979003503179095, "loss": 1.891, "step": 22230 }, { "epoch": 2.6762936221419977, "grad_norm": 3.4179043769836426, "learning_rate": 0.0001997897885647202, "loss": 2.0617, "step": 22240 }, { "epoch": 2.677496991576414, "grad_norm": 5.773406982421875, "learning_rate": 0.00019978954195322895, "loss": 1.9614, "step": 22250 }, { "epoch": 2.67870036101083, "grad_norm": 4.094402313232422, "learning_rate": 0.00019978929519731762, "loss": 1.8016, "step": 22260 }, { "epoch": 2.679903730445247, "grad_norm": 7.269993305206299, "learning_rate": 0.0001997890482969865, "loss": 2.0542, "step": 22270 }, { "epoch": 2.681107099879663, "grad_norm": 4.553739070892334, "learning_rate": 0.00019978880125223603, "loss": 1.6936, "step": 22280 }, { "epoch": 2.6823104693140793, "grad_norm": 4.25542688369751, "learning_rate": 0.00019978855406306654, "loss": 2.0244, "step": 22290 }, { "epoch": 2.683513838748496, "grad_norm": 6.054823398590088, "learning_rate": 0.00019978830672947836, "loss": 1.836, "step": 22300 }, { "epoch": 2.6847172081829123, "grad_norm": 5.341670513153076, "learning_rate": 0.00019978805925147182, "loss": 1.7344, "step": 22310 }, { "epoch": 2.6859205776173285, "grad_norm": 6.641788959503174, "learning_rate": 0.00019978781162904735, "loss": 2.0983, "step": 22320 }, { "epoch": 2.6871239470517447, "grad_norm": 5.529277324676514, "learning_rate": 0.00019978756386220532, "loss": 1.8698, "step": 22330 }, { "epoch": 2.6883273164861614, "grad_norm": 3.4033820629119873, "learning_rate": 0.000199787315950946, "loss": 1.8614, "step": 22340 }, { "epoch": 2.6895306859205776, "grad_norm": 5.466459274291992, "learning_rate": 0.0001997870678952698, "loss": 1.6805, "step": 22350 }, { "epoch": 2.690734055354994, "grad_norm": 4.521859645843506, "learning_rate": 0.0001997868196951771, "loss": 1.6645, "step": 22360 }, { "epoch": 2.69193742478941, "grad_norm": 7.471880912780762, "learning_rate": 0.00019978657135066822, "loss": 1.8694, "step": 22370 }, { "epoch": 2.693140794223827, "grad_norm": 4.243253707885742, "learning_rate": 0.00019978632286174355, "loss": 2.0404, "step": 22380 }, { "epoch": 2.694344163658243, "grad_norm": 3.6581270694732666, "learning_rate": 0.0001997860742284034, "loss": 1.9062, "step": 22390 }, { "epoch": 2.6955475330926593, "grad_norm": 5.81765604019165, "learning_rate": 0.0001997858254506482, "loss": 1.7131, "step": 22400 }, { "epoch": 2.696750902527076, "grad_norm": 4.9746904373168945, "learning_rate": 0.00019978557652847826, "loss": 1.8687, "step": 22410 }, { "epoch": 2.697954271961492, "grad_norm": 5.633240699768066, "learning_rate": 0.00019978532746189393, "loss": 1.6438, "step": 22420 }, { "epoch": 2.6991576413959084, "grad_norm": 5.067403793334961, "learning_rate": 0.00019978507825089566, "loss": 1.854, "step": 22430 }, { "epoch": 2.700361010830325, "grad_norm": 3.9688496589660645, "learning_rate": 0.0001997848288954837, "loss": 1.7494, "step": 22440 }, { "epoch": 2.7015643802647413, "grad_norm": 5.424824237823486, "learning_rate": 0.00019978457939565846, "loss": 1.9343, "step": 22450 }, { "epoch": 2.7027677496991576, "grad_norm": 3.9777631759643555, "learning_rate": 0.00019978432975142029, "loss": 1.7792, "step": 22460 }, { "epoch": 2.7039711191335742, "grad_norm": 7.992976665496826, "learning_rate": 0.00019978407996276958, "loss": 2.0647, "step": 22470 }, { "epoch": 2.7051744885679905, "grad_norm": 4.650917053222656, "learning_rate": 0.00019978383002970665, "loss": 1.9787, "step": 22480 }, { "epoch": 2.7063778580024067, "grad_norm": 3.4096484184265137, "learning_rate": 0.0001997835799522319, "loss": 1.921, "step": 22490 }, { "epoch": 2.707581227436823, "grad_norm": 5.971583843231201, "learning_rate": 0.00019978332973034566, "loss": 1.8383, "step": 22500 }, { "epoch": 2.7087845968712396, "grad_norm": 4.310146331787109, "learning_rate": 0.0001997830793640483, "loss": 1.8006, "step": 22510 }, { "epoch": 2.709987966305656, "grad_norm": 6.434731960296631, "learning_rate": 0.00019978282885334018, "loss": 2.0012, "step": 22520 }, { "epoch": 2.711191335740072, "grad_norm": 4.9365129470825195, "learning_rate": 0.00019978257819822172, "loss": 1.8275, "step": 22530 }, { "epoch": 2.7123947051744883, "grad_norm": 3.4394466876983643, "learning_rate": 0.0001997823273986932, "loss": 1.87, "step": 22540 }, { "epoch": 2.713598074608905, "grad_norm": 5.213423728942871, "learning_rate": 0.000199782076454755, "loss": 1.803, "step": 22550 }, { "epoch": 2.7148014440433212, "grad_norm": 4.815708637237549, "learning_rate": 0.00019978182536640752, "loss": 1.8732, "step": 22560 }, { "epoch": 2.7160048134777375, "grad_norm": 6.802679061889648, "learning_rate": 0.0001997815741336511, "loss": 1.7972, "step": 22570 }, { "epoch": 2.717208182912154, "grad_norm": 4.965303421020508, "learning_rate": 0.00019978132275648615, "loss": 1.8636, "step": 22580 }, { "epoch": 2.7184115523465704, "grad_norm": 3.497936248779297, "learning_rate": 0.00019978107123491294, "loss": 1.997, "step": 22590 }, { "epoch": 2.7196149217809866, "grad_norm": 6.207457065582275, "learning_rate": 0.0001997808195689319, "loss": 1.5876, "step": 22600 }, { "epoch": 2.7208182912154033, "grad_norm": 4.572378158569336, "learning_rate": 0.00019978056775854337, "loss": 1.9223, "step": 22610 }, { "epoch": 2.7220216606498195, "grad_norm": 7.0065202713012695, "learning_rate": 0.00019978031580374772, "loss": 1.8662, "step": 22620 }, { "epoch": 2.7232250300842358, "grad_norm": 4.983758926391602, "learning_rate": 0.00019978006370454534, "loss": 1.7349, "step": 22630 }, { "epoch": 2.7244283995186525, "grad_norm": 3.7315967082977295, "learning_rate": 0.00019977981146093656, "loss": 1.8489, "step": 22640 }, { "epoch": 2.7256317689530687, "grad_norm": 5.7065839767456055, "learning_rate": 0.00019977955907292177, "loss": 1.7187, "step": 22650 }, { "epoch": 2.726835138387485, "grad_norm": 3.9562723636627197, "learning_rate": 0.0001997793065405013, "loss": 1.7005, "step": 22660 }, { "epoch": 2.728038507821901, "grad_norm": 6.072809219360352, "learning_rate": 0.00019977905386367554, "loss": 1.9656, "step": 22670 }, { "epoch": 2.729241877256318, "grad_norm": 4.331212520599365, "learning_rate": 0.0001997788010424449, "loss": 1.963, "step": 22680 }, { "epoch": 2.730445246690734, "grad_norm": 3.5483672618865967, "learning_rate": 0.00019977854807680964, "loss": 1.788, "step": 22690 }, { "epoch": 2.7316486161251503, "grad_norm": 5.671530246734619, "learning_rate": 0.0001997782949667702, "loss": 1.7867, "step": 22700 }, { "epoch": 2.7328519855595665, "grad_norm": 4.266991138458252, "learning_rate": 0.00019977804171232694, "loss": 1.804, "step": 22710 }, { "epoch": 2.7340553549939832, "grad_norm": 7.12966251373291, "learning_rate": 0.00019977778831348024, "loss": 1.9685, "step": 22720 }, { "epoch": 2.7352587244283995, "grad_norm": 5.654016494750977, "learning_rate": 0.00019977753477023043, "loss": 1.9926, "step": 22730 }, { "epoch": 2.7364620938628157, "grad_norm": 3.3825466632843018, "learning_rate": 0.0001997772810825779, "loss": 1.8915, "step": 22740 }, { "epoch": 2.7376654632972324, "grad_norm": 5.568051815032959, "learning_rate": 0.000199777027250523, "loss": 1.8189, "step": 22750 }, { "epoch": 2.7388688327316486, "grad_norm": 4.8424201011657715, "learning_rate": 0.0001997767732740661, "loss": 1.8559, "step": 22760 }, { "epoch": 2.740072202166065, "grad_norm": 8.739863395690918, "learning_rate": 0.0001997765191532076, "loss": 1.8546, "step": 22770 }, { "epoch": 2.7412755716004815, "grad_norm": 4.288040637969971, "learning_rate": 0.00019977626488794782, "loss": 1.5549, "step": 22780 }, { "epoch": 2.7424789410348978, "grad_norm": 3.2434864044189453, "learning_rate": 0.00019977601047828716, "loss": 1.98, "step": 22790 }, { "epoch": 2.743682310469314, "grad_norm": 4.835328102111816, "learning_rate": 0.00019977575592422598, "loss": 1.9714, "step": 22800 }, { "epoch": 2.7448856799037307, "grad_norm": 5.448550224304199, "learning_rate": 0.00019977550122576463, "loss": 1.8661, "step": 22810 }, { "epoch": 2.746089049338147, "grad_norm": 6.611584186553955, "learning_rate": 0.0001997752463829035, "loss": 2.0355, "step": 22820 }, { "epoch": 2.747292418772563, "grad_norm": 5.065571308135986, "learning_rate": 0.00019977499139564296, "loss": 1.6624, "step": 22830 }, { "epoch": 2.74849578820698, "grad_norm": 3.28558349609375, "learning_rate": 0.0001997747362639834, "loss": 2.0575, "step": 22840 }, { "epoch": 2.749699157641396, "grad_norm": 6.408359527587891, "learning_rate": 0.00019977448098792514, "loss": 1.8269, "step": 22850 }, { "epoch": 2.7509025270758123, "grad_norm": 4.822616100311279, "learning_rate": 0.00019977422556746857, "loss": 1.797, "step": 22860 }, { "epoch": 2.7521058965102285, "grad_norm": 7.014891147613525, "learning_rate": 0.00019977397000261408, "loss": 2.1429, "step": 22870 }, { "epoch": 2.7533092659446448, "grad_norm": 5.4905171394348145, "learning_rate": 0.000199773714293362, "loss": 1.7902, "step": 22880 }, { "epoch": 2.7545126353790614, "grad_norm": 3.7834863662719727, "learning_rate": 0.00019977345843971275, "loss": 1.8674, "step": 22890 }, { "epoch": 2.7557160048134777, "grad_norm": 5.6181535720825195, "learning_rate": 0.00019977320244166663, "loss": 1.8222, "step": 22900 }, { "epoch": 2.756919374247894, "grad_norm": 4.6425395011901855, "learning_rate": 0.00019977294629922406, "loss": 2.0321, "step": 22910 }, { "epoch": 2.7581227436823106, "grad_norm": 6.405762672424316, "learning_rate": 0.00019977269001238544, "loss": 1.8953, "step": 22920 }, { "epoch": 2.759326113116727, "grad_norm": 5.6902313232421875, "learning_rate": 0.0001997724335811511, "loss": 1.6785, "step": 22930 }, { "epoch": 2.760529482551143, "grad_norm": 3.4790284633636475, "learning_rate": 0.00019977217700552138, "loss": 1.9017, "step": 22940 }, { "epoch": 2.7617328519855597, "grad_norm": 6.034868240356445, "learning_rate": 0.00019977192028549672, "loss": 1.9224, "step": 22950 }, { "epoch": 2.762936221419976, "grad_norm": 4.756815433502197, "learning_rate": 0.00019977166342107741, "loss": 1.913, "step": 22960 }, { "epoch": 2.764139590854392, "grad_norm": 6.949899673461914, "learning_rate": 0.00019977140641226393, "loss": 1.7652, "step": 22970 }, { "epoch": 2.765342960288809, "grad_norm": 4.67728853225708, "learning_rate": 0.00019977114925905657, "loss": 1.9011, "step": 22980 }, { "epoch": 2.766546329723225, "grad_norm": 3.5102603435516357, "learning_rate": 0.00019977089196145573, "loss": 2.1067, "step": 22990 }, { "epoch": 2.7677496991576414, "grad_norm": 5.74012565612793, "learning_rate": 0.00019977063451946176, "loss": 2.0833, "step": 23000 }, { "epoch": 2.768953068592058, "grad_norm": 4.5373616218566895, "learning_rate": 0.00019977037693307508, "loss": 1.8623, "step": 23010 }, { "epoch": 2.7701564380264743, "grad_norm": 6.993628025054932, "learning_rate": 0.000199770119202296, "loss": 1.7051, "step": 23020 }, { "epoch": 2.7713598074608905, "grad_norm": 4.627161979675293, "learning_rate": 0.00019976986132712492, "loss": 1.8854, "step": 23030 }, { "epoch": 2.7725631768953067, "grad_norm": 4.533265590667725, "learning_rate": 0.00019976960330756222, "loss": 2.2145, "step": 23040 }, { "epoch": 2.773766546329723, "grad_norm": 5.27393102645874, "learning_rate": 0.0001997693451436083, "loss": 1.7967, "step": 23050 }, { "epoch": 2.7749699157641396, "grad_norm": 4.08444881439209, "learning_rate": 0.00019976908683526349, "loss": 2.0044, "step": 23060 }, { "epoch": 2.776173285198556, "grad_norm": 6.545950889587402, "learning_rate": 0.00019976882838252817, "loss": 1.9537, "step": 23070 }, { "epoch": 2.777376654632972, "grad_norm": 4.16444730758667, "learning_rate": 0.00019976856978540274, "loss": 1.7258, "step": 23080 }, { "epoch": 2.778580024067389, "grad_norm": 5.483246803283691, "learning_rate": 0.00019976831104388755, "loss": 1.8828, "step": 23090 }, { "epoch": 2.779783393501805, "grad_norm": 6.4330153465271, "learning_rate": 0.000199768052157983, "loss": 1.8548, "step": 23100 }, { "epoch": 2.7809867629362213, "grad_norm": 5.590136528015137, "learning_rate": 0.00019976779312768942, "loss": 1.7924, "step": 23110 }, { "epoch": 2.782190132370638, "grad_norm": 6.044424533843994, "learning_rate": 0.00019976753395300722, "loss": 1.9499, "step": 23120 }, { "epoch": 2.783393501805054, "grad_norm": 5.545258045196533, "learning_rate": 0.00019976727463393677, "loss": 1.7807, "step": 23130 }, { "epoch": 2.7845968712394704, "grad_norm": 3.408629894256592, "learning_rate": 0.00019976701517047843, "loss": 2.015, "step": 23140 }, { "epoch": 2.785800240673887, "grad_norm": 6.710221767425537, "learning_rate": 0.0001997667555626326, "loss": 1.7425, "step": 23150 }, { "epoch": 2.7870036101083033, "grad_norm": 4.039109230041504, "learning_rate": 0.00019976649581039964, "loss": 1.9283, "step": 23160 }, { "epoch": 2.7882069795427196, "grad_norm": 7.614040851593018, "learning_rate": 0.00019976623591377992, "loss": 1.803, "step": 23170 }, { "epoch": 2.7894103489771362, "grad_norm": 4.319331169128418, "learning_rate": 0.00019976597587277383, "loss": 1.6085, "step": 23180 }, { "epoch": 2.7906137184115525, "grad_norm": 3.3719117641448975, "learning_rate": 0.00019976571568738175, "loss": 1.9033, "step": 23190 }, { "epoch": 2.7918170878459687, "grad_norm": 5.799076557159424, "learning_rate": 0.00019976545535760405, "loss": 2.0389, "step": 23200 }, { "epoch": 2.793020457280385, "grad_norm": 3.9176876544952393, "learning_rate": 0.00019976519488344109, "loss": 1.9371, "step": 23210 }, { "epoch": 2.794223826714801, "grad_norm": 6.319335460662842, "learning_rate": 0.0001997649342648933, "loss": 2.0688, "step": 23220 }, { "epoch": 2.795427196149218, "grad_norm": 4.826409339904785, "learning_rate": 0.00019976467350196097, "loss": 1.9013, "step": 23230 }, { "epoch": 2.796630565583634, "grad_norm": 3.234429359436035, "learning_rate": 0.00019976441259464455, "loss": 1.9304, "step": 23240 }, { "epoch": 2.7978339350180503, "grad_norm": 5.633996486663818, "learning_rate": 0.00019976415154294438, "loss": 1.9082, "step": 23250 }, { "epoch": 2.799037304452467, "grad_norm": 6.302307605743408, "learning_rate": 0.00019976389034686085, "loss": 1.775, "step": 23260 }, { "epoch": 2.8002406738868832, "grad_norm": 6.776801109313965, "learning_rate": 0.00019976362900639435, "loss": 1.9379, "step": 23270 }, { "epoch": 2.8014440433212995, "grad_norm": 4.77065896987915, "learning_rate": 0.00019976336752154525, "loss": 1.7112, "step": 23280 }, { "epoch": 2.802647412755716, "grad_norm": 3.3480606079101562, "learning_rate": 0.00019976310589231392, "loss": 1.6538, "step": 23290 }, { "epoch": 2.8038507821901324, "grad_norm": 6.364193439483643, "learning_rate": 0.00019976284411870078, "loss": 1.9962, "step": 23300 }, { "epoch": 2.8050541516245486, "grad_norm": 3.677845001220703, "learning_rate": 0.0001997625822007061, "loss": 1.7511, "step": 23310 }, { "epoch": 2.8062575210589653, "grad_norm": 7.029861927032471, "learning_rate": 0.00019976232013833037, "loss": 1.8961, "step": 23320 }, { "epoch": 2.8074608904933815, "grad_norm": 4.60750150680542, "learning_rate": 0.00019976205793157396, "loss": 1.7058, "step": 23330 }, { "epoch": 2.808664259927798, "grad_norm": 4.142286777496338, "learning_rate": 0.0001997617955804372, "loss": 1.9601, "step": 23340 }, { "epoch": 2.8098676293622145, "grad_norm": 5.287271499633789, "learning_rate": 0.00019976153308492049, "loss": 1.9171, "step": 23350 }, { "epoch": 2.8110709987966307, "grad_norm": 4.549359321594238, "learning_rate": 0.00019976127044502422, "loss": 1.8671, "step": 23360 }, { "epoch": 2.812274368231047, "grad_norm": 6.705315589904785, "learning_rate": 0.00019976100766074873, "loss": 1.9865, "step": 23370 }, { "epoch": 2.813477737665463, "grad_norm": 4.726195335388184, "learning_rate": 0.0001997607447320945, "loss": 1.7513, "step": 23380 }, { "epoch": 2.8146811070998794, "grad_norm": 3.288010597229004, "learning_rate": 0.00019976048165906178, "loss": 1.8895, "step": 23390 }, { "epoch": 2.815884476534296, "grad_norm": 5.803439617156982, "learning_rate": 0.00019976021844165102, "loss": 2.0504, "step": 23400 }, { "epoch": 2.8170878459687123, "grad_norm": 5.1353535652160645, "learning_rate": 0.0001997599550798626, "loss": 1.9217, "step": 23410 }, { "epoch": 2.8182912154031285, "grad_norm": 6.596933364868164, "learning_rate": 0.00019975969157369692, "loss": 2.0369, "step": 23420 }, { "epoch": 2.8194945848375452, "grad_norm": 6.023066520690918, "learning_rate": 0.00019975942792315433, "loss": 1.9239, "step": 23430 }, { "epoch": 2.8206979542719615, "grad_norm": 3.893421173095703, "learning_rate": 0.00019975916412823518, "loss": 1.864, "step": 23440 }, { "epoch": 2.8219013237063777, "grad_norm": 6.207060813903809, "learning_rate": 0.00019975890018893993, "loss": 1.8543, "step": 23450 }, { "epoch": 2.8231046931407944, "grad_norm": 4.030089378356934, "learning_rate": 0.00019975863610526888, "loss": 2.0821, "step": 23460 }, { "epoch": 2.8243080625752106, "grad_norm": 7.685181617736816, "learning_rate": 0.0001997583718772225, "loss": 1.8729, "step": 23470 }, { "epoch": 2.825511432009627, "grad_norm": 4.690788269042969, "learning_rate": 0.00019975810750480112, "loss": 1.8349, "step": 23480 }, { "epoch": 2.8267148014440435, "grad_norm": 3.888273239135742, "learning_rate": 0.00019975784298800512, "loss": 1.9653, "step": 23490 }, { "epoch": 2.8279181708784598, "grad_norm": 5.698587894439697, "learning_rate": 0.00019975757832683489, "loss": 1.8071, "step": 23500 }, { "epoch": 2.829121540312876, "grad_norm": 4.445411205291748, "learning_rate": 0.0001997573135212908, "loss": 1.9974, "step": 23510 }, { "epoch": 2.8303249097472927, "grad_norm": 6.7462897300720215, "learning_rate": 0.00019975704857137325, "loss": 2.0135, "step": 23520 }, { "epoch": 2.831528279181709, "grad_norm": 4.6754536628723145, "learning_rate": 0.00019975678347708265, "loss": 1.6235, "step": 23530 }, { "epoch": 2.832731648616125, "grad_norm": 4.018809795379639, "learning_rate": 0.00019975651823841932, "loss": 1.7181, "step": 23540 }, { "epoch": 2.8339350180505414, "grad_norm": 6.560571193695068, "learning_rate": 0.0001997562528553837, "loss": 1.9334, "step": 23550 }, { "epoch": 2.8351383874849576, "grad_norm": 4.695711135864258, "learning_rate": 0.00019975598732797613, "loss": 1.7037, "step": 23560 }, { "epoch": 2.8363417569193743, "grad_norm": 5.652125835418701, "learning_rate": 0.00019975572165619704, "loss": 1.8348, "step": 23570 }, { "epoch": 2.8375451263537905, "grad_norm": 4.512273788452148, "learning_rate": 0.0001997554558400468, "loss": 1.7516, "step": 23580 }, { "epoch": 2.8387484957882068, "grad_norm": 3.05820894241333, "learning_rate": 0.00019975518987952576, "loss": 1.9117, "step": 23590 }, { "epoch": 2.8399518652226234, "grad_norm": 5.962392330169678, "learning_rate": 0.00019975492377463435, "loss": 1.6103, "step": 23600 }, { "epoch": 2.8411552346570397, "grad_norm": 5.1483049392700195, "learning_rate": 0.00019975465752537295, "loss": 1.9393, "step": 23610 }, { "epoch": 2.842358604091456, "grad_norm": 7.242753028869629, "learning_rate": 0.00019975439113174192, "loss": 1.9032, "step": 23620 }, { "epoch": 2.8435619735258726, "grad_norm": 5.350884437561035, "learning_rate": 0.00019975412459374164, "loss": 1.9788, "step": 23630 }, { "epoch": 2.844765342960289, "grad_norm": 3.617100715637207, "learning_rate": 0.0001997538579113725, "loss": 1.986, "step": 23640 }, { "epoch": 2.845968712394705, "grad_norm": 6.817924499511719, "learning_rate": 0.00019975359108463496, "loss": 1.9646, "step": 23650 }, { "epoch": 2.8471720818291217, "grad_norm": 5.259153366088867, "learning_rate": 0.0001997533241135293, "loss": 1.837, "step": 23660 }, { "epoch": 2.848375451263538, "grad_norm": 5.788957595825195, "learning_rate": 0.00019975305699805596, "loss": 1.7797, "step": 23670 }, { "epoch": 2.849578820697954, "grad_norm": 4.100399494171143, "learning_rate": 0.00019975278973821532, "loss": 1.723, "step": 23680 }, { "epoch": 2.850782190132371, "grad_norm": 3.2705655097961426, "learning_rate": 0.00019975252233400776, "loss": 1.6919, "step": 23690 }, { "epoch": 2.851985559566787, "grad_norm": 5.190719127655029, "learning_rate": 0.00019975225478543367, "loss": 1.7586, "step": 23700 }, { "epoch": 2.8531889290012034, "grad_norm": 4.4668731689453125, "learning_rate": 0.00019975198709249346, "loss": 1.6683, "step": 23710 }, { "epoch": 2.8543922984356196, "grad_norm": 5.66727352142334, "learning_rate": 0.00019975171925518747, "loss": 2.0834, "step": 23720 }, { "epoch": 2.855595667870036, "grad_norm": 4.898777008056641, "learning_rate": 0.00019975145127351613, "loss": 1.7352, "step": 23730 }, { "epoch": 2.8567990373044525, "grad_norm": 3.783613920211792, "learning_rate": 0.0001997511831474798, "loss": 1.827, "step": 23740 }, { "epoch": 2.8580024067388687, "grad_norm": 9.265079498291016, "learning_rate": 0.0001997509148770789, "loss": 1.8773, "step": 23750 }, { "epoch": 2.859205776173285, "grad_norm": 4.308231830596924, "learning_rate": 0.00019975064646231378, "loss": 1.86, "step": 23760 }, { "epoch": 2.8604091456077017, "grad_norm": 5.7617411613464355, "learning_rate": 0.00019975037790318488, "loss": 2.0498, "step": 23770 }, { "epoch": 2.861612515042118, "grad_norm": 6.328868389129639, "learning_rate": 0.00019975010919969253, "loss": 1.7433, "step": 23780 }, { "epoch": 2.862815884476534, "grad_norm": 3.3558099269866943, "learning_rate": 0.00019974984035183715, "loss": 1.9166, "step": 23790 }, { "epoch": 2.864019253910951, "grad_norm": 5.800656318664551, "learning_rate": 0.0001997495713596191, "loss": 2.0589, "step": 23800 }, { "epoch": 2.865222623345367, "grad_norm": 4.649380207061768, "learning_rate": 0.00019974930222303882, "loss": 1.8411, "step": 23810 }, { "epoch": 2.8664259927797833, "grad_norm": 6.917948246002197, "learning_rate": 0.00019974903294209666, "loss": 2.0134, "step": 23820 }, { "epoch": 2.8676293622142, "grad_norm": 4.925341606140137, "learning_rate": 0.00019974876351679303, "loss": 1.6361, "step": 23830 }, { "epoch": 2.868832731648616, "grad_norm": 4.769173622131348, "learning_rate": 0.00019974849394712832, "loss": 2.1269, "step": 23840 }, { "epoch": 2.8700361010830324, "grad_norm": 5.753753662109375, "learning_rate": 0.0001997482242331029, "loss": 1.9573, "step": 23850 }, { "epoch": 2.871239470517449, "grad_norm": 3.9171459674835205, "learning_rate": 0.00019974795437471715, "loss": 1.8201, "step": 23860 }, { "epoch": 2.8724428399518653, "grad_norm": 7.415256977081299, "learning_rate": 0.00019974768437197154, "loss": 2.1456, "step": 23870 }, { "epoch": 2.8736462093862816, "grad_norm": 5.117340087890625, "learning_rate": 0.00019974741422486636, "loss": 1.6893, "step": 23880 }, { "epoch": 2.874849578820698, "grad_norm": 3.578383207321167, "learning_rate": 0.00019974714393340205, "loss": 1.9563, "step": 23890 }, { "epoch": 2.8760529482551145, "grad_norm": 5.163645267486572, "learning_rate": 0.00019974687349757898, "loss": 1.83, "step": 23900 }, { "epoch": 2.8772563176895307, "grad_norm": 4.585817813873291, "learning_rate": 0.0001997466029173976, "loss": 1.9772, "step": 23910 }, { "epoch": 2.878459687123947, "grad_norm": 7.4253830909729, "learning_rate": 0.00019974633219285823, "loss": 1.9644, "step": 23920 }, { "epoch": 2.879663056558363, "grad_norm": 4.518739700317383, "learning_rate": 0.00019974606132396128, "loss": 1.839, "step": 23930 }, { "epoch": 2.88086642599278, "grad_norm": 4.008555889129639, "learning_rate": 0.00019974579031070718, "loss": 1.9355, "step": 23940 }, { "epoch": 2.882069795427196, "grad_norm": 5.686634063720703, "learning_rate": 0.0001997455191530963, "loss": 1.7624, "step": 23950 }, { "epoch": 2.8832731648616123, "grad_norm": 4.48956823348999, "learning_rate": 0.000199745247851129, "loss": 1.805, "step": 23960 }, { "epoch": 2.884476534296029, "grad_norm": 7.608232498168945, "learning_rate": 0.0001997449764048057, "loss": 2.0757, "step": 23970 }, { "epoch": 2.8856799037304453, "grad_norm": 5.478265285491943, "learning_rate": 0.0001997447048141268, "loss": 2.0261, "step": 23980 }, { "epoch": 2.8868832731648615, "grad_norm": 3.0724799633026123, "learning_rate": 0.0001997444330790927, "loss": 1.9523, "step": 23990 }, { "epoch": 2.888086642599278, "grad_norm": 6.806112289428711, "learning_rate": 0.00019974416119970378, "loss": 1.8005, "step": 24000 }, { "epoch": 2.8892900120336944, "grad_norm": 4.565250873565674, "learning_rate": 0.00019974388917596043, "loss": 1.8808, "step": 24010 }, { "epoch": 2.8904933814681106, "grad_norm": 5.74095344543457, "learning_rate": 0.00019974361700786304, "loss": 2.1453, "step": 24020 }, { "epoch": 2.8916967509025273, "grad_norm": 4.752941131591797, "learning_rate": 0.000199743344695412, "loss": 1.6872, "step": 24030 }, { "epoch": 2.8929001203369435, "grad_norm": 3.5324058532714844, "learning_rate": 0.00019974307223860773, "loss": 1.9335, "step": 24040 }, { "epoch": 2.89410348977136, "grad_norm": 5.88004732131958, "learning_rate": 0.00019974279963745061, "loss": 1.7958, "step": 24050 }, { "epoch": 2.895306859205776, "grad_norm": 4.072971820831299, "learning_rate": 0.000199742526891941, "loss": 2.0524, "step": 24060 }, { "epoch": 2.8965102286401927, "grad_norm": 8.117522239685059, "learning_rate": 0.00019974225400207937, "loss": 1.7339, "step": 24070 }, { "epoch": 2.897713598074609, "grad_norm": 4.792110443115234, "learning_rate": 0.00019974198096786607, "loss": 1.7673, "step": 24080 }, { "epoch": 2.898916967509025, "grad_norm": 3.5243046283721924, "learning_rate": 0.00019974170778930148, "loss": 1.6451, "step": 24090 }, { "epoch": 2.9001203369434414, "grad_norm": 6.484212398529053, "learning_rate": 0.00019974143446638602, "loss": 1.9361, "step": 24100 }, { "epoch": 2.901323706377858, "grad_norm": 4.859592437744141, "learning_rate": 0.0001997411609991201, "loss": 1.9698, "step": 24110 }, { "epoch": 2.9025270758122743, "grad_norm": 6.178374290466309, "learning_rate": 0.00019974088738750406, "loss": 1.7964, "step": 24120 }, { "epoch": 2.9037304452466906, "grad_norm": 5.469675064086914, "learning_rate": 0.00019974061363153833, "loss": 1.7654, "step": 24130 }, { "epoch": 2.9049338146811072, "grad_norm": 3.926400661468506, "learning_rate": 0.00019974033973122333, "loss": 2.025, "step": 24140 }, { "epoch": 2.9061371841155235, "grad_norm": 5.796122074127197, "learning_rate": 0.00019974006568655942, "loss": 1.7356, "step": 24150 }, { "epoch": 2.9073405535499397, "grad_norm": 3.973008871078491, "learning_rate": 0.000199739791497547, "loss": 1.7903, "step": 24160 }, { "epoch": 2.9085439229843564, "grad_norm": 6.530457496643066, "learning_rate": 0.0001997395171641865, "loss": 1.7029, "step": 24170 }, { "epoch": 2.9097472924187726, "grad_norm": 4.4459614753723145, "learning_rate": 0.00019973924268647827, "loss": 1.8556, "step": 24180 }, { "epoch": 2.910950661853189, "grad_norm": 3.5894134044647217, "learning_rate": 0.00019973896806442276, "loss": 1.9814, "step": 24190 }, { "epoch": 2.9121540312876055, "grad_norm": 5.986927032470703, "learning_rate": 0.00019973869329802034, "loss": 1.9392, "step": 24200 }, { "epoch": 2.9133574007220218, "grad_norm": 4.745354175567627, "learning_rate": 0.0001997384183872714, "loss": 1.9679, "step": 24210 }, { "epoch": 2.914560770156438, "grad_norm": 5.20849084854126, "learning_rate": 0.00019973814333217632, "loss": 1.8934, "step": 24220 }, { "epoch": 2.9157641395908542, "grad_norm": 5.41616153717041, "learning_rate": 0.00019973786813273553, "loss": 1.8729, "step": 24230 }, { "epoch": 2.916967509025271, "grad_norm": 3.55181884765625, "learning_rate": 0.0001997375927889494, "loss": 1.6742, "step": 24240 }, { "epoch": 2.918170878459687, "grad_norm": 6.453026294708252, "learning_rate": 0.0001997373173008184, "loss": 2.0943, "step": 24250 }, { "epoch": 2.9193742478941034, "grad_norm": 4.038445472717285, "learning_rate": 0.00019973704166834284, "loss": 1.8318, "step": 24260 }, { "epoch": 2.9205776173285196, "grad_norm": 7.606594562530518, "learning_rate": 0.00019973676589152316, "loss": 1.8344, "step": 24270 }, { "epoch": 2.9217809867629363, "grad_norm": 5.004695892333984, "learning_rate": 0.00019973648997035978, "loss": 1.7604, "step": 24280 }, { "epoch": 2.9229843561973525, "grad_norm": 3.423807144165039, "learning_rate": 0.00019973621390485303, "loss": 1.9921, "step": 24290 }, { "epoch": 2.9241877256317688, "grad_norm": 5.876102924346924, "learning_rate": 0.00019973593769500342, "loss": 1.8028, "step": 24300 }, { "epoch": 2.9253910950661854, "grad_norm": 5.172757625579834, "learning_rate": 0.00019973566134081123, "loss": 1.8126, "step": 24310 }, { "epoch": 2.9265944645006017, "grad_norm": 6.53843355178833, "learning_rate": 0.00019973538484227695, "loss": 1.7146, "step": 24320 }, { "epoch": 2.927797833935018, "grad_norm": 5.280789375305176, "learning_rate": 0.00019973510819940092, "loss": 1.8282, "step": 24330 }, { "epoch": 2.9290012033694346, "grad_norm": 2.917238473892212, "learning_rate": 0.00019973483141218356, "loss": 1.9238, "step": 24340 }, { "epoch": 2.930204572803851, "grad_norm": 6.673867225646973, "learning_rate": 0.00019973455448062528, "loss": 1.8187, "step": 24350 }, { "epoch": 2.931407942238267, "grad_norm": 4.365293979644775, "learning_rate": 0.0001997342774047265, "loss": 1.961, "step": 24360 }, { "epoch": 2.9326113116726837, "grad_norm": 7.935892105102539, "learning_rate": 0.00019973400018448757, "loss": 1.9479, "step": 24370 }, { "epoch": 2.9338146811071, "grad_norm": 4.404233455657959, "learning_rate": 0.00019973372281990894, "loss": 1.8156, "step": 24380 }, { "epoch": 2.935018050541516, "grad_norm": 3.4874532222747803, "learning_rate": 0.00019973344531099097, "loss": 2.0095, "step": 24390 }, { "epoch": 2.9362214199759324, "grad_norm": 5.68506383895874, "learning_rate": 0.00019973316765773408, "loss": 1.892, "step": 24400 }, { "epoch": 2.937424789410349, "grad_norm": 4.7850260734558105, "learning_rate": 0.00019973288986013866, "loss": 1.8653, "step": 24410 }, { "epoch": 2.9386281588447654, "grad_norm": 7.241191864013672, "learning_rate": 0.00019973261191820517, "loss": 1.9725, "step": 24420 }, { "epoch": 2.9398315282791816, "grad_norm": 5.28766393661499, "learning_rate": 0.00019973233383193396, "loss": 1.9111, "step": 24430 }, { "epoch": 2.941034897713598, "grad_norm": 3.953686475753784, "learning_rate": 0.0001997320556013254, "loss": 1.7484, "step": 24440 }, { "epoch": 2.9422382671480145, "grad_norm": 5.54633092880249, "learning_rate": 0.00019973177722637996, "loss": 1.8454, "step": 24450 }, { "epoch": 2.9434416365824307, "grad_norm": 4.033097267150879, "learning_rate": 0.00019973149870709803, "loss": 1.9392, "step": 24460 }, { "epoch": 2.944645006016847, "grad_norm": 6.493199348449707, "learning_rate": 0.00019973122004347997, "loss": 2.0715, "step": 24470 }, { "epoch": 2.9458483754512637, "grad_norm": 4.184035301208496, "learning_rate": 0.0001997309412355262, "loss": 1.8558, "step": 24480 }, { "epoch": 2.94705174488568, "grad_norm": 4.929866313934326, "learning_rate": 0.0001997306622832372, "loss": 1.8668, "step": 24490 }, { "epoch": 2.948255114320096, "grad_norm": 5.194972515106201, "learning_rate": 0.00019973038318661326, "loss": 1.9602, "step": 24500 }, { "epoch": 2.949458483754513, "grad_norm": 4.429738521575928, "learning_rate": 0.0001997301039456548, "loss": 1.8157, "step": 24510 }, { "epoch": 2.950661853188929, "grad_norm": 7.0151519775390625, "learning_rate": 0.00019972982456036232, "loss": 2.0124, "step": 24520 }, { "epoch": 2.9518652226233453, "grad_norm": 5.25596284866333, "learning_rate": 0.00019972954503073614, "loss": 1.9175, "step": 24530 }, { "epoch": 2.953068592057762, "grad_norm": 3.3625776767730713, "learning_rate": 0.00019972926535677667, "loss": 1.8197, "step": 24540 }, { "epoch": 2.954271961492178, "grad_norm": 5.699072360992432, "learning_rate": 0.00019972898553848433, "loss": 1.6902, "step": 24550 }, { "epoch": 2.9554753309265944, "grad_norm": 4.332197666168213, "learning_rate": 0.00019972870557585958, "loss": 1.7647, "step": 24560 }, { "epoch": 2.956678700361011, "grad_norm": 5.7750020027160645, "learning_rate": 0.00019972842546890269, "loss": 1.8058, "step": 24570 }, { "epoch": 2.9578820697954273, "grad_norm": 5.301845073699951, "learning_rate": 0.0001997281452176142, "loss": 1.8142, "step": 24580 }, { "epoch": 2.9590854392298436, "grad_norm": 3.2149767875671387, "learning_rate": 0.00019972786482199445, "loss": 1.8397, "step": 24590 }, { "epoch": 2.96028880866426, "grad_norm": 5.64735221862793, "learning_rate": 0.00019972758428204386, "loss": 1.744, "step": 24600 }, { "epoch": 2.961492178098676, "grad_norm": 3.4880824089050293, "learning_rate": 0.00019972730359776284, "loss": 1.8447, "step": 24610 }, { "epoch": 2.9626955475330927, "grad_norm": 6.680085182189941, "learning_rate": 0.00019972702276915176, "loss": 1.8838, "step": 24620 }, { "epoch": 2.963898916967509, "grad_norm": 5.064449787139893, "learning_rate": 0.00019972674179621108, "loss": 1.6851, "step": 24630 }, { "epoch": 2.965102286401925, "grad_norm": 3.714988946914673, "learning_rate": 0.00019972646067894115, "loss": 1.8391, "step": 24640 }, { "epoch": 2.966305655836342, "grad_norm": 6.406152248382568, "learning_rate": 0.00019972617941734245, "loss": 1.8827, "step": 24650 }, { "epoch": 2.967509025270758, "grad_norm": 4.480008125305176, "learning_rate": 0.00019972589801141535, "loss": 1.9606, "step": 24660 }, { "epoch": 2.9687123947051743, "grad_norm": 6.958746433258057, "learning_rate": 0.00019972561646116024, "loss": 2.0608, "step": 24670 }, { "epoch": 2.969915764139591, "grad_norm": 5.884395599365234, "learning_rate": 0.00019972533476657757, "loss": 1.7973, "step": 24680 }, { "epoch": 2.9711191335740073, "grad_norm": 4.368142127990723, "learning_rate": 0.00019972505292766766, "loss": 1.7885, "step": 24690 }, { "epoch": 2.9723225030084235, "grad_norm": 5.5579705238342285, "learning_rate": 0.00019972477094443102, "loss": 2.0045, "step": 24700 }, { "epoch": 2.97352587244284, "grad_norm": 4.20939826965332, "learning_rate": 0.00019972448881686798, "loss": 2.0522, "step": 24710 }, { "epoch": 2.9747292418772564, "grad_norm": 7.418403148651123, "learning_rate": 0.000199724206544979, "loss": 2.0363, "step": 24720 }, { "epoch": 2.9759326113116726, "grad_norm": 6.376554489135742, "learning_rate": 0.00019972392412876452, "loss": 1.9427, "step": 24730 }, { "epoch": 2.9771359807460893, "grad_norm": 3.917717933654785, "learning_rate": 0.00019972364156822483, "loss": 1.7764, "step": 24740 }, { "epoch": 2.9783393501805056, "grad_norm": 5.7326836585998535, "learning_rate": 0.00019972335886336043, "loss": 1.9515, "step": 24750 }, { "epoch": 2.979542719614922, "grad_norm": 3.9300084114074707, "learning_rate": 0.00019972307601417172, "loss": 1.9282, "step": 24760 }, { "epoch": 2.980746089049338, "grad_norm": 6.65960168838501, "learning_rate": 0.0001997227930206591, "loss": 1.9123, "step": 24770 }, { "epoch": 2.9819494584837543, "grad_norm": 5.051340579986572, "learning_rate": 0.00019972250988282297, "loss": 1.5143, "step": 24780 }, { "epoch": 2.983152827918171, "grad_norm": 3.710218906402588, "learning_rate": 0.00019972222660066377, "loss": 1.9307, "step": 24790 }, { "epoch": 2.984356197352587, "grad_norm": 5.473617076873779, "learning_rate": 0.00019972194317418186, "loss": 1.639, "step": 24800 }, { "epoch": 2.9855595667870034, "grad_norm": 3.464711904525757, "learning_rate": 0.00019972165960337768, "loss": 1.9858, "step": 24810 }, { "epoch": 2.98676293622142, "grad_norm": 7.262167453765869, "learning_rate": 0.00019972137588825163, "loss": 1.8256, "step": 24820 }, { "epoch": 2.9879663056558363, "grad_norm": 3.993345260620117, "learning_rate": 0.00019972109202880413, "loss": 1.4988, "step": 24830 }, { "epoch": 2.9891696750902526, "grad_norm": 3.6429708003997803, "learning_rate": 0.00019972080802503563, "loss": 1.8395, "step": 24840 }, { "epoch": 2.9903730445246692, "grad_norm": 6.208446979522705, "learning_rate": 0.00019972052387694645, "loss": 1.7543, "step": 24850 }, { "epoch": 2.9915764139590855, "grad_norm": 5.225845813751221, "learning_rate": 0.0001997202395845371, "loss": 1.827, "step": 24860 }, { "epoch": 2.9927797833935017, "grad_norm": 6.085515975952148, "learning_rate": 0.0001997199551478079, "loss": 1.921, "step": 24870 }, { "epoch": 2.9939831528279184, "grad_norm": 5.2673869132995605, "learning_rate": 0.00019971967056675932, "loss": 1.8041, "step": 24880 }, { "epoch": 2.9951865222623346, "grad_norm": 3.902085781097412, "learning_rate": 0.00019971938584139177, "loss": 2.168, "step": 24890 }, { "epoch": 2.996389891696751, "grad_norm": 5.707598686218262, "learning_rate": 0.00019971910097170564, "loss": 1.9218, "step": 24900 }, { "epoch": 2.9975932611311675, "grad_norm": 4.32947301864624, "learning_rate": 0.00019971881595770135, "loss": 1.9337, "step": 24910 }, { "epoch": 2.9987966305655838, "grad_norm": 6.908236503601074, "learning_rate": 0.0001997185307993793, "loss": 1.9814, "step": 24920 }, { "epoch": 3.0, "grad_norm": 6.15101957321167, "learning_rate": 0.00019971824549673992, "loss": 1.9834, "step": 24930 }, { "epoch": 3.0, "eval_loss": 1.8243311643600464, "eval_runtime": 119.5201, "eval_samples_per_second": 61.806, "eval_steps_per_second": 7.731, "step": 24930 }, { "epoch": 3.0012033694344162, "grad_norm": 5.2833147048950195, "learning_rate": 0.00019971796004978365, "loss": 1.7781, "step": 24940 }, { "epoch": 3.002406738868833, "grad_norm": 3.5444066524505615, "learning_rate": 0.00019971767445851085, "loss": 1.8386, "step": 24950 }, { "epoch": 3.003610108303249, "grad_norm": 5.110971927642822, "learning_rate": 0.00019971738872292193, "loss": 1.843, "step": 24960 }, { "epoch": 3.0048134777376654, "grad_norm": 3.896223545074463, "learning_rate": 0.00019971710284301736, "loss": 1.579, "step": 24970 }, { "epoch": 3.0060168471720816, "grad_norm": 5.891197204589844, "learning_rate": 0.0001997168168187975, "loss": 1.912, "step": 24980 }, { "epoch": 3.0072202166064983, "grad_norm": 4.517705917358398, "learning_rate": 0.0001997165306502628, "loss": 1.5632, "step": 24990 }, { "epoch": 3.0084235860409145, "grad_norm": 3.676591157913208, "learning_rate": 0.0001997162443374137, "loss": 1.6171, "step": 25000 }, { "epoch": 3.0096269554753308, "grad_norm": 5.9294610023498535, "learning_rate": 0.00019971595788025054, "loss": 2.0759, "step": 25010 }, { "epoch": 3.0108303249097474, "grad_norm": 4.007317066192627, "learning_rate": 0.00019971567127877375, "loss": 1.6752, "step": 25020 }, { "epoch": 3.0120336943441637, "grad_norm": 6.3257646560668945, "learning_rate": 0.00019971538453298376, "loss": 1.6235, "step": 25030 }, { "epoch": 3.01323706377858, "grad_norm": 4.893363952636719, "learning_rate": 0.00019971509764288103, "loss": 1.7263, "step": 25040 }, { "epoch": 3.0144404332129966, "grad_norm": 3.142510414123535, "learning_rate": 0.00019971481060846594, "loss": 1.6694, "step": 25050 }, { "epoch": 3.015643802647413, "grad_norm": 5.823273658752441, "learning_rate": 0.00019971452342973888, "loss": 1.7035, "step": 25060 }, { "epoch": 3.016847172081829, "grad_norm": 3.8851661682128906, "learning_rate": 0.00019971423610670028, "loss": 1.6738, "step": 25070 }, { "epoch": 3.0180505415162453, "grad_norm": 6.197877407073975, "learning_rate": 0.00019971394863935055, "loss": 1.5583, "step": 25080 }, { "epoch": 3.019253910950662, "grad_norm": 5.340153217315674, "learning_rate": 0.00019971366102769012, "loss": 1.48, "step": 25090 }, { "epoch": 3.020457280385078, "grad_norm": 3.749476909637451, "learning_rate": 0.00019971337327171943, "loss": 1.5379, "step": 25100 }, { "epoch": 3.0216606498194944, "grad_norm": 4.925633430480957, "learning_rate": 0.00019971308537143886, "loss": 1.5849, "step": 25110 }, { "epoch": 3.022864019253911, "grad_norm": 4.040213108062744, "learning_rate": 0.00019971279732684883, "loss": 1.5022, "step": 25120 }, { "epoch": 3.0240673886883274, "grad_norm": 7.1975297927856445, "learning_rate": 0.0001997125091379498, "loss": 1.7892, "step": 25130 }, { "epoch": 3.0252707581227436, "grad_norm": 4.804437637329102, "learning_rate": 0.0001997122208047421, "loss": 1.6166, "step": 25140 }, { "epoch": 3.02647412755716, "grad_norm": 4.398497104644775, "learning_rate": 0.00019971193232722623, "loss": 1.7888, "step": 25150 }, { "epoch": 3.0276774969915765, "grad_norm": 6.2576375007629395, "learning_rate": 0.00019971164370540255, "loss": 1.6956, "step": 25160 }, { "epoch": 3.0288808664259927, "grad_norm": 4.663084030151367, "learning_rate": 0.00019971135493927154, "loss": 1.6047, "step": 25170 }, { "epoch": 3.030084235860409, "grad_norm": 6.627993106842041, "learning_rate": 0.00019971106602883354, "loss": 1.6708, "step": 25180 }, { "epoch": 3.0312876052948257, "grad_norm": 4.21818733215332, "learning_rate": 0.00019971077697408904, "loss": 1.6286, "step": 25190 }, { "epoch": 3.032490974729242, "grad_norm": 4.1206135749816895, "learning_rate": 0.00019971048777503843, "loss": 1.7903, "step": 25200 }, { "epoch": 3.033694344163658, "grad_norm": 6.09546422958374, "learning_rate": 0.0001997101984316821, "loss": 1.7904, "step": 25210 }, { "epoch": 3.034897713598075, "grad_norm": 4.578518390655518, "learning_rate": 0.0001997099089440205, "loss": 1.6353, "step": 25220 }, { "epoch": 3.036101083032491, "grad_norm": 6.658674240112305, "learning_rate": 0.00019970961931205404, "loss": 1.7366, "step": 25230 }, { "epoch": 3.0373044524669073, "grad_norm": 5.015432834625244, "learning_rate": 0.0001997093295357832, "loss": 1.6397, "step": 25240 }, { "epoch": 3.0385078219013235, "grad_norm": 4.643667697906494, "learning_rate": 0.00019970903961520828, "loss": 1.5767, "step": 25250 }, { "epoch": 3.03971119133574, "grad_norm": 6.313589572906494, "learning_rate": 0.00019970874955032978, "loss": 1.6188, "step": 25260 }, { "epoch": 3.0409145607701564, "grad_norm": 3.5082130432128906, "learning_rate": 0.0001997084593411481, "loss": 1.7944, "step": 25270 }, { "epoch": 3.0421179302045727, "grad_norm": 7.634500026702881, "learning_rate": 0.0001997081689876637, "loss": 1.7638, "step": 25280 }, { "epoch": 3.0433212996389893, "grad_norm": 5.156243324279785, "learning_rate": 0.00019970787848987688, "loss": 1.75, "step": 25290 }, { "epoch": 3.0445246690734056, "grad_norm": 3.355189800262451, "learning_rate": 0.00019970758784778822, "loss": 1.662, "step": 25300 }, { "epoch": 3.045728038507822, "grad_norm": 5.785003185272217, "learning_rate": 0.00019970729706139803, "loss": 1.5954, "step": 25310 }, { "epoch": 3.046931407942238, "grad_norm": 4.599120616912842, "learning_rate": 0.00019970700613070676, "loss": 1.6176, "step": 25320 }, { "epoch": 3.0481347773766547, "grad_norm": 6.337950706481934, "learning_rate": 0.00019970671505571485, "loss": 1.5627, "step": 25330 }, { "epoch": 3.049338146811071, "grad_norm": 5.127763271331787, "learning_rate": 0.0001997064238364227, "loss": 1.6074, "step": 25340 }, { "epoch": 3.050541516245487, "grad_norm": 3.8004400730133057, "learning_rate": 0.00019970613247283073, "loss": 1.6712, "step": 25350 }, { "epoch": 3.051744885679904, "grad_norm": 6.495005130767822, "learning_rate": 0.0001997058409649394, "loss": 1.6494, "step": 25360 }, { "epoch": 3.05294825511432, "grad_norm": 3.9298477172851562, "learning_rate": 0.00019970554931274907, "loss": 1.5972, "step": 25370 }, { "epoch": 3.0541516245487363, "grad_norm": 6.946953773498535, "learning_rate": 0.00019970525751626021, "loss": 1.7613, "step": 25380 }, { "epoch": 3.055354993983153, "grad_norm": 4.861685276031494, "learning_rate": 0.0001997049655754732, "loss": 1.7822, "step": 25390 }, { "epoch": 3.0565583634175693, "grad_norm": 3.3610851764678955, "learning_rate": 0.0001997046734903885, "loss": 1.6958, "step": 25400 }, { "epoch": 3.0577617328519855, "grad_norm": 4.182857990264893, "learning_rate": 0.00019970438126100655, "loss": 1.6831, "step": 25410 }, { "epoch": 3.0589651022864017, "grad_norm": 4.551860809326172, "learning_rate": 0.0001997040888873277, "loss": 1.7159, "step": 25420 }, { "epoch": 3.0601684717208184, "grad_norm": 7.149112224578857, "learning_rate": 0.00019970379636935246, "loss": 1.5726, "step": 25430 }, { "epoch": 3.0613718411552346, "grad_norm": 5.928719997406006, "learning_rate": 0.0001997035037070812, "loss": 1.7758, "step": 25440 }, { "epoch": 3.062575210589651, "grad_norm": 3.2937114238739014, "learning_rate": 0.00019970321090051433, "loss": 1.5158, "step": 25450 }, { "epoch": 3.0637785800240676, "grad_norm": 5.856424808502197, "learning_rate": 0.00019970291794965233, "loss": 1.6228, "step": 25460 }, { "epoch": 3.064981949458484, "grad_norm": 4.503580093383789, "learning_rate": 0.00019970262485449557, "loss": 1.7698, "step": 25470 }, { "epoch": 3.0661853188929, "grad_norm": 7.763538360595703, "learning_rate": 0.0001997023316150445, "loss": 1.7132, "step": 25480 }, { "epoch": 3.0673886883273163, "grad_norm": 4.79904317855835, "learning_rate": 0.00019970203823129953, "loss": 1.6942, "step": 25490 }, { "epoch": 3.068592057761733, "grad_norm": 3.115844488143921, "learning_rate": 0.0001997017447032611, "loss": 1.587, "step": 25500 }, { "epoch": 3.069795427196149, "grad_norm": 6.3336944580078125, "learning_rate": 0.00019970145103092966, "loss": 1.7281, "step": 25510 }, { "epoch": 3.0709987966305654, "grad_norm": 3.8646318912506104, "learning_rate": 0.00019970115721430558, "loss": 1.6836, "step": 25520 }, { "epoch": 3.072202166064982, "grad_norm": 6.718202114105225, "learning_rate": 0.0001997008632533893, "loss": 1.6268, "step": 25530 }, { "epoch": 3.0734055354993983, "grad_norm": 4.689975738525391, "learning_rate": 0.00019970056914818128, "loss": 1.7044, "step": 25540 }, { "epoch": 3.0746089049338146, "grad_norm": 4.25233793258667, "learning_rate": 0.00019970027489868192, "loss": 1.6907, "step": 25550 }, { "epoch": 3.0758122743682312, "grad_norm": 7.111978530883789, "learning_rate": 0.00019969998050489162, "loss": 1.7677, "step": 25560 }, { "epoch": 3.0770156438026475, "grad_norm": 4.082468509674072, "learning_rate": 0.00019969968596681085, "loss": 1.7532, "step": 25570 }, { "epoch": 3.0782190132370637, "grad_norm": 6.769084930419922, "learning_rate": 0.00019969939128444, "loss": 1.5383, "step": 25580 }, { "epoch": 3.07942238267148, "grad_norm": 5.465221881866455, "learning_rate": 0.00019969909645777952, "loss": 1.4176, "step": 25590 }, { "epoch": 3.0806257521058966, "grad_norm": 4.117949962615967, "learning_rate": 0.00019969880148682986, "loss": 1.8286, "step": 25600 }, { "epoch": 3.081829121540313, "grad_norm": 4.822063446044922, "learning_rate": 0.0001996985063715914, "loss": 1.7715, "step": 25610 }, { "epoch": 3.083032490974729, "grad_norm": 4.072781562805176, "learning_rate": 0.00019969821111206462, "loss": 1.8508, "step": 25620 }, { "epoch": 3.0842358604091458, "grad_norm": 7.279651165008545, "learning_rate": 0.00019969791570824988, "loss": 1.7172, "step": 25630 }, { "epoch": 3.085439229843562, "grad_norm": 4.747053146362305, "learning_rate": 0.00019969762016014764, "loss": 1.7487, "step": 25640 }, { "epoch": 3.0866425992779782, "grad_norm": 3.4259531497955322, "learning_rate": 0.00019969732446775833, "loss": 1.6637, "step": 25650 }, { "epoch": 3.087845968712395, "grad_norm": 6.953102111816406, "learning_rate": 0.00019969702863108238, "loss": 1.6367, "step": 25660 }, { "epoch": 3.089049338146811, "grad_norm": 4.110683917999268, "learning_rate": 0.00019969673265012023, "loss": 1.695, "step": 25670 }, { "epoch": 3.0902527075812274, "grad_norm": 7.62086820602417, "learning_rate": 0.00019969643652487226, "loss": 1.7798, "step": 25680 }, { "epoch": 3.0914560770156436, "grad_norm": 4.716604709625244, "learning_rate": 0.00019969614025533897, "loss": 1.8736, "step": 25690 }, { "epoch": 3.0926594464500603, "grad_norm": 3.269044876098633, "learning_rate": 0.00019969584384152073, "loss": 1.9083, "step": 25700 }, { "epoch": 3.0938628158844765, "grad_norm": 7.067758083343506, "learning_rate": 0.000199695547283418, "loss": 1.9578, "step": 25710 }, { "epoch": 3.0950661853188928, "grad_norm": 4.73967170715332, "learning_rate": 0.0001996952505810312, "loss": 1.7247, "step": 25720 }, { "epoch": 3.0962695547533094, "grad_norm": 7.015458106994629, "learning_rate": 0.00019969495373436074, "loss": 1.5704, "step": 25730 }, { "epoch": 3.0974729241877257, "grad_norm": 4.821101188659668, "learning_rate": 0.00019969465674340707, "loss": 1.8243, "step": 25740 }, { "epoch": 3.098676293622142, "grad_norm": 4.758875846862793, "learning_rate": 0.00019969435960817062, "loss": 1.6514, "step": 25750 }, { "epoch": 3.099879663056558, "grad_norm": 5.373219966888428, "learning_rate": 0.00019969406232865183, "loss": 1.6879, "step": 25760 }, { "epoch": 3.101083032490975, "grad_norm": 4.321028709411621, "learning_rate": 0.00019969376490485116, "loss": 1.6593, "step": 25770 }, { "epoch": 3.102286401925391, "grad_norm": 6.139832973480225, "learning_rate": 0.00019969346733676892, "loss": 1.8688, "step": 25780 }, { "epoch": 3.1034897713598073, "grad_norm": 5.178373336791992, "learning_rate": 0.00019969316962440564, "loss": 1.7524, "step": 25790 }, { "epoch": 3.104693140794224, "grad_norm": 4.018850326538086, "learning_rate": 0.00019969287176776175, "loss": 1.8681, "step": 25800 }, { "epoch": 3.10589651022864, "grad_norm": 6.357236862182617, "learning_rate": 0.00019969257376683766, "loss": 1.7481, "step": 25810 }, { "epoch": 3.1070998796630565, "grad_norm": 4.7395243644714355, "learning_rate": 0.0001996922756216338, "loss": 1.8221, "step": 25820 }, { "epoch": 3.108303249097473, "grad_norm": 7.194039821624756, "learning_rate": 0.0001996919773321506, "loss": 1.9351, "step": 25830 }, { "epoch": 3.1095066185318894, "grad_norm": 5.321618556976318, "learning_rate": 0.00019969167889838848, "loss": 1.7784, "step": 25840 }, { "epoch": 3.1107099879663056, "grad_norm": 3.5895071029663086, "learning_rate": 0.0001996913803203479, "loss": 1.5046, "step": 25850 }, { "epoch": 3.111913357400722, "grad_norm": 6.43202018737793, "learning_rate": 0.0001996910815980293, "loss": 1.573, "step": 25860 }, { "epoch": 3.1131167268351385, "grad_norm": 3.8133111000061035, "learning_rate": 0.00019969078273143306, "loss": 1.8075, "step": 25870 }, { "epoch": 3.1143200962695547, "grad_norm": 6.6105194091796875, "learning_rate": 0.00019969048372055968, "loss": 1.8469, "step": 25880 }, { "epoch": 3.115523465703971, "grad_norm": 5.352079391479492, "learning_rate": 0.0001996901845654095, "loss": 1.7894, "step": 25890 }, { "epoch": 3.1167268351383877, "grad_norm": 4.465076923370361, "learning_rate": 0.00019968988526598307, "loss": 2.003, "step": 25900 }, { "epoch": 3.117930204572804, "grad_norm": 5.748092174530029, "learning_rate": 0.00019968958582228073, "loss": 1.6668, "step": 25910 }, { "epoch": 3.11913357400722, "grad_norm": 4.764005184173584, "learning_rate": 0.00019968928623430296, "loss": 1.7606, "step": 25920 }, { "epoch": 3.1203369434416364, "grad_norm": 7.332487106323242, "learning_rate": 0.0001996889865020502, "loss": 1.5667, "step": 25930 }, { "epoch": 3.121540312876053, "grad_norm": 4.77926778793335, "learning_rate": 0.00019968868662552283, "loss": 1.8115, "step": 25940 }, { "epoch": 3.1227436823104693, "grad_norm": 3.5133652687072754, "learning_rate": 0.0001996883866047213, "loss": 1.8562, "step": 25950 }, { "epoch": 3.1239470517448855, "grad_norm": 6.457540035247803, "learning_rate": 0.0001996880864396461, "loss": 1.523, "step": 25960 }, { "epoch": 3.125150421179302, "grad_norm": 3.8357396125793457, "learning_rate": 0.00019968778613029762, "loss": 1.4882, "step": 25970 }, { "epoch": 3.1263537906137184, "grad_norm": 6.633978843688965, "learning_rate": 0.0001996874856766763, "loss": 1.632, "step": 25980 }, { "epoch": 3.1275571600481347, "grad_norm": 5.3305792808532715, "learning_rate": 0.00019968718507878256, "loss": 1.6133, "step": 25990 }, { "epoch": 3.1287605294825513, "grad_norm": 3.532611608505249, "learning_rate": 0.00019968688433661687, "loss": 1.5845, "step": 26000 }, { "epoch": 3.1299638989169676, "grad_norm": 6.723804473876953, "learning_rate": 0.0001996865834501797, "loss": 1.609, "step": 26010 }, { "epoch": 3.131167268351384, "grad_norm": 5.00460147857666, "learning_rate": 0.00019968628241947133, "loss": 1.6016, "step": 26020 }, { "epoch": 3.1323706377858, "grad_norm": 7.103140354156494, "learning_rate": 0.00019968598124449235, "loss": 1.9265, "step": 26030 }, { "epoch": 3.1335740072202167, "grad_norm": 5.142113208770752, "learning_rate": 0.00019968567992524315, "loss": 1.7629, "step": 26040 }, { "epoch": 3.134777376654633, "grad_norm": 4.044442653656006, "learning_rate": 0.00019968537846172413, "loss": 1.8743, "step": 26050 }, { "epoch": 3.135980746089049, "grad_norm": 4.6342949867248535, "learning_rate": 0.00019968507685393577, "loss": 1.7952, "step": 26060 }, { "epoch": 3.137184115523466, "grad_norm": 4.4024786949157715, "learning_rate": 0.00019968477510187848, "loss": 1.7952, "step": 26070 }, { "epoch": 3.138387484957882, "grad_norm": 6.709743976593018, "learning_rate": 0.00019968447320555275, "loss": 1.8065, "step": 26080 }, { "epoch": 3.1395908543922983, "grad_norm": 5.030803680419922, "learning_rate": 0.00019968417116495894, "loss": 1.5954, "step": 26090 }, { "epoch": 3.140794223826715, "grad_norm": 3.494459629058838, "learning_rate": 0.00019968386898009755, "loss": 1.7154, "step": 26100 }, { "epoch": 3.1419975932611313, "grad_norm": 5.076336860656738, "learning_rate": 0.00019968356665096896, "loss": 1.7257, "step": 26110 }, { "epoch": 3.1432009626955475, "grad_norm": 4.133296012878418, "learning_rate": 0.00019968326417757365, "loss": 1.5624, "step": 26120 }, { "epoch": 3.1444043321299637, "grad_norm": 6.244276523590088, "learning_rate": 0.00019968296155991203, "loss": 1.603, "step": 26130 }, { "epoch": 3.1456077015643804, "grad_norm": 4.406643390655518, "learning_rate": 0.00019968265879798458, "loss": 1.4633, "step": 26140 }, { "epoch": 3.1468110709987966, "grad_norm": 3.8273041248321533, "learning_rate": 0.0001996823558917917, "loss": 1.8377, "step": 26150 }, { "epoch": 3.148014440433213, "grad_norm": 6.194026470184326, "learning_rate": 0.00019968205284133386, "loss": 1.5792, "step": 26160 }, { "epoch": 3.1492178098676296, "grad_norm": 4.927667617797852, "learning_rate": 0.00019968174964661147, "loss": 1.6037, "step": 26170 }, { "epoch": 3.150421179302046, "grad_norm": 7.056860446929932, "learning_rate": 0.00019968144630762495, "loss": 1.9054, "step": 26180 }, { "epoch": 3.151624548736462, "grad_norm": 4.70307731628418, "learning_rate": 0.00019968114282437478, "loss": 1.5009, "step": 26190 }, { "epoch": 3.1528279181708783, "grad_norm": 3.752289295196533, "learning_rate": 0.00019968083919686137, "loss": 1.8344, "step": 26200 }, { "epoch": 3.154031287605295, "grad_norm": 5.970653057098389, "learning_rate": 0.00019968053542508522, "loss": 1.7478, "step": 26210 }, { "epoch": 3.155234657039711, "grad_norm": 4.398066520690918, "learning_rate": 0.0001996802315090467, "loss": 1.8283, "step": 26220 }, { "epoch": 3.1564380264741274, "grad_norm": 7.221063137054443, "learning_rate": 0.0001996799274487463, "loss": 1.7113, "step": 26230 }, { "epoch": 3.157641395908544, "grad_norm": 4.993231773376465, "learning_rate": 0.0001996796232441844, "loss": 1.8439, "step": 26240 }, { "epoch": 3.1588447653429603, "grad_norm": 4.482081413269043, "learning_rate": 0.00019967931889536148, "loss": 1.7393, "step": 26250 }, { "epoch": 3.1600481347773766, "grad_norm": 6.5946221351623535, "learning_rate": 0.00019967901440227798, "loss": 1.7978, "step": 26260 }, { "epoch": 3.1612515042117932, "grad_norm": 5.204263687133789, "learning_rate": 0.00019967870976493433, "loss": 1.6644, "step": 26270 }, { "epoch": 3.1624548736462095, "grad_norm": 6.908845901489258, "learning_rate": 0.000199678404983331, "loss": 1.8345, "step": 26280 }, { "epoch": 3.1636582430806257, "grad_norm": 5.161684989929199, "learning_rate": 0.0001996781000574684, "loss": 1.5343, "step": 26290 }, { "epoch": 3.164861612515042, "grad_norm": 3.900618553161621, "learning_rate": 0.00019967779498734697, "loss": 1.7535, "step": 26300 }, { "epoch": 3.1660649819494586, "grad_norm": 5.767590522766113, "learning_rate": 0.00019967748977296713, "loss": 1.6541, "step": 26310 }, { "epoch": 3.167268351383875, "grad_norm": 3.6571078300476074, "learning_rate": 0.0001996771844143294, "loss": 1.6133, "step": 26320 }, { "epoch": 3.168471720818291, "grad_norm": 7.802633285522461, "learning_rate": 0.00019967687891143417, "loss": 1.5927, "step": 26330 }, { "epoch": 3.1696750902527078, "grad_norm": 5.249373435974121, "learning_rate": 0.0001996765732642819, "loss": 1.5744, "step": 26340 }, { "epoch": 3.170878459687124, "grad_norm": 4.153386116027832, "learning_rate": 0.00019967626747287298, "loss": 1.8146, "step": 26350 }, { "epoch": 3.1720818291215402, "grad_norm": 6.111057758331299, "learning_rate": 0.0001996759615372079, "loss": 1.7322, "step": 26360 }, { "epoch": 3.1732851985559565, "grad_norm": 4.793424129486084, "learning_rate": 0.0001996756554572871, "loss": 1.8235, "step": 26370 }, { "epoch": 3.174488567990373, "grad_norm": 6.269195556640625, "learning_rate": 0.00019967534923311103, "loss": 1.8598, "step": 26380 }, { "epoch": 3.1756919374247894, "grad_norm": 4.551454544067383, "learning_rate": 0.00019967504286468008, "loss": 1.7414, "step": 26390 }, { "epoch": 3.1768953068592056, "grad_norm": 2.942845106124878, "learning_rate": 0.00019967473635199479, "loss": 1.8039, "step": 26400 }, { "epoch": 3.1780986762936223, "grad_norm": 5.321710109710693, "learning_rate": 0.00019967442969505552, "loss": 1.7472, "step": 26410 }, { "epoch": 3.1793020457280385, "grad_norm": 4.249934673309326, "learning_rate": 0.00019967412289386273, "loss": 1.8743, "step": 26420 }, { "epoch": 3.1805054151624548, "grad_norm": 6.844578266143799, "learning_rate": 0.0001996738159484169, "loss": 1.8235, "step": 26430 }, { "epoch": 3.1817087845968715, "grad_norm": 4.887540340423584, "learning_rate": 0.00019967350885871844, "loss": 1.886, "step": 26440 }, { "epoch": 3.1829121540312877, "grad_norm": 4.189870357513428, "learning_rate": 0.0001996732016247678, "loss": 1.8164, "step": 26450 }, { "epoch": 3.184115523465704, "grad_norm": 5.997005939483643, "learning_rate": 0.00019967289424656544, "loss": 1.7669, "step": 26460 }, { "epoch": 3.18531889290012, "grad_norm": 5.44852876663208, "learning_rate": 0.00019967258672411178, "loss": 1.6729, "step": 26470 }, { "epoch": 3.186522262334537, "grad_norm": 7.447607040405273, "learning_rate": 0.00019967227905740728, "loss": 1.8408, "step": 26480 }, { "epoch": 3.187725631768953, "grad_norm": 5.315221309661865, "learning_rate": 0.0001996719712464524, "loss": 1.6769, "step": 26490 }, { "epoch": 3.1889290012033693, "grad_norm": 5.13824987411499, "learning_rate": 0.00019967166329124756, "loss": 1.5989, "step": 26500 }, { "epoch": 3.190132370637786, "grad_norm": 6.396768569946289, "learning_rate": 0.00019967135519179322, "loss": 1.5882, "step": 26510 }, { "epoch": 3.191335740072202, "grad_norm": 4.440583229064941, "learning_rate": 0.0001996710469480898, "loss": 1.657, "step": 26520 }, { "epoch": 3.1925391095066185, "grad_norm": 6.554316997528076, "learning_rate": 0.0001996707385601378, "loss": 1.9095, "step": 26530 }, { "epoch": 3.1937424789410347, "grad_norm": 4.493051528930664, "learning_rate": 0.0001996704300279376, "loss": 1.7128, "step": 26540 }, { "epoch": 3.1949458483754514, "grad_norm": 4.213508129119873, "learning_rate": 0.0001996701213514897, "loss": 1.7449, "step": 26550 }, { "epoch": 3.1961492178098676, "grad_norm": 6.3384833335876465, "learning_rate": 0.0001996698125307945, "loss": 1.5784, "step": 26560 }, { "epoch": 3.197352587244284, "grad_norm": 4.621504783630371, "learning_rate": 0.0001996695035658525, "loss": 1.5697, "step": 26570 }, { "epoch": 3.1985559566787005, "grad_norm": 6.80503511428833, "learning_rate": 0.00019966919445666412, "loss": 1.877, "step": 26580 }, { "epoch": 3.1997593261131168, "grad_norm": 5.692870616912842, "learning_rate": 0.0001996688852032298, "loss": 1.8058, "step": 26590 }, { "epoch": 3.200962695547533, "grad_norm": 3.311182737350464, "learning_rate": 0.00019966857580555, "loss": 1.8477, "step": 26600 }, { "epoch": 3.2021660649819497, "grad_norm": 5.926977634429932, "learning_rate": 0.00019966826626362515, "loss": 1.6738, "step": 26610 }, { "epoch": 3.203369434416366, "grad_norm": 3.704402208328247, "learning_rate": 0.00019966795657745573, "loss": 1.6778, "step": 26620 }, { "epoch": 3.204572803850782, "grad_norm": 8.318079948425293, "learning_rate": 0.00019966764674704215, "loss": 1.9835, "step": 26630 }, { "epoch": 3.2057761732851984, "grad_norm": 5.058340549468994, "learning_rate": 0.0001996673367723849, "loss": 1.6039, "step": 26640 }, { "epoch": 3.206979542719615, "grad_norm": 4.320777893066406, "learning_rate": 0.0001996670266534844, "loss": 1.7686, "step": 26650 }, { "epoch": 3.2081829121540313, "grad_norm": 5.931827068328857, "learning_rate": 0.0001996667163903411, "loss": 1.8661, "step": 26660 }, { "epoch": 3.2093862815884475, "grad_norm": 4.6182050704956055, "learning_rate": 0.00019966640598295546, "loss": 1.8591, "step": 26670 }, { "epoch": 3.210589651022864, "grad_norm": 7.831111431121826, "learning_rate": 0.00019966609543132792, "loss": 1.816, "step": 26680 }, { "epoch": 3.2117930204572804, "grad_norm": 5.658038139343262, "learning_rate": 0.00019966578473545893, "loss": 1.5925, "step": 26690 }, { "epoch": 3.2129963898916967, "grad_norm": 3.685478448867798, "learning_rate": 0.00019966547389534895, "loss": 1.739, "step": 26700 }, { "epoch": 3.214199759326113, "grad_norm": 5.0785956382751465, "learning_rate": 0.0001996651629109984, "loss": 1.5135, "step": 26710 }, { "epoch": 3.2154031287605296, "grad_norm": 4.372645854949951, "learning_rate": 0.0001996648517824078, "loss": 1.7063, "step": 26720 }, { "epoch": 3.216606498194946, "grad_norm": 7.1049628257751465, "learning_rate": 0.00019966454050957753, "loss": 1.5647, "step": 26730 }, { "epoch": 3.217809867629362, "grad_norm": 4.710076808929443, "learning_rate": 0.00019966422909250807, "loss": 1.6602, "step": 26740 }, { "epoch": 3.2190132370637787, "grad_norm": 4.299315929412842, "learning_rate": 0.00019966391753119985, "loss": 1.7935, "step": 26750 }, { "epoch": 3.220216606498195, "grad_norm": 7.561284065246582, "learning_rate": 0.0001996636058256533, "loss": 1.6962, "step": 26760 }, { "epoch": 3.221419975932611, "grad_norm": 4.574403285980225, "learning_rate": 0.00019966329397586895, "loss": 1.925, "step": 26770 }, { "epoch": 3.222623345367028, "grad_norm": 7.978187561035156, "learning_rate": 0.00019966298198184724, "loss": 1.7282, "step": 26780 }, { "epoch": 3.223826714801444, "grad_norm": 4.1126627922058105, "learning_rate": 0.00019966266984358853, "loss": 1.4681, "step": 26790 }, { "epoch": 3.2250300842358604, "grad_norm": 4.2365617752075195, "learning_rate": 0.00019966235756109334, "loss": 1.6335, "step": 26800 }, { "epoch": 3.2262334536702766, "grad_norm": 5.861513137817383, "learning_rate": 0.00019966204513436212, "loss": 1.7006, "step": 26810 }, { "epoch": 3.2274368231046933, "grad_norm": 4.411993980407715, "learning_rate": 0.00019966173256339532, "loss": 1.8046, "step": 26820 }, { "epoch": 3.2286401925391095, "grad_norm": 8.419879913330078, "learning_rate": 0.0001996614198481934, "loss": 1.833, "step": 26830 }, { "epoch": 3.2298435619735257, "grad_norm": 4.881294250488281, "learning_rate": 0.0001996611069887568, "loss": 1.774, "step": 26840 }, { "epoch": 3.2310469314079424, "grad_norm": 3.6665778160095215, "learning_rate": 0.00019966079398508595, "loss": 1.7573, "step": 26850 }, { "epoch": 3.2322503008423586, "grad_norm": 6.3349528312683105, "learning_rate": 0.00019966048083718132, "loss": 1.8857, "step": 26860 }, { "epoch": 3.233453670276775, "grad_norm": 4.5454511642456055, "learning_rate": 0.0001996601675450434, "loss": 1.8435, "step": 26870 }, { "epoch": 3.234657039711191, "grad_norm": 7.010152339935303, "learning_rate": 0.00019965985410867258, "loss": 1.7535, "step": 26880 }, { "epoch": 3.235860409145608, "grad_norm": 5.6580071449279785, "learning_rate": 0.00019965954052806936, "loss": 1.6308, "step": 26890 }, { "epoch": 3.237063778580024, "grad_norm": 3.9693939685821533, "learning_rate": 0.0001996592268032342, "loss": 1.9586, "step": 26900 }, { "epoch": 3.2382671480144403, "grad_norm": 5.903779029846191, "learning_rate": 0.00019965891293416752, "loss": 1.8053, "step": 26910 }, { "epoch": 3.239470517448857, "grad_norm": 4.855864524841309, "learning_rate": 0.00019965859892086979, "loss": 1.8226, "step": 26920 }, { "epoch": 3.240673886883273, "grad_norm": 6.977510452270508, "learning_rate": 0.00019965828476334143, "loss": 1.666, "step": 26930 }, { "epoch": 3.2418772563176894, "grad_norm": 5.6782612800598145, "learning_rate": 0.00019965797046158298, "loss": 1.7855, "step": 26940 }, { "epoch": 3.243080625752106, "grad_norm": 3.3649098873138428, "learning_rate": 0.0001996576560155948, "loss": 1.8999, "step": 26950 }, { "epoch": 3.2442839951865223, "grad_norm": 6.129069805145264, "learning_rate": 0.00019965734142537742, "loss": 1.8824, "step": 26960 }, { "epoch": 3.2454873646209386, "grad_norm": 4.5676984786987305, "learning_rate": 0.00019965702669093123, "loss": 1.7751, "step": 26970 }, { "epoch": 3.246690734055355, "grad_norm": 6.600085735321045, "learning_rate": 0.00019965671181225675, "loss": 1.8884, "step": 26980 }, { "epoch": 3.2478941034897715, "grad_norm": 5.7984395027160645, "learning_rate": 0.00019965639678935437, "loss": 1.6576, "step": 26990 }, { "epoch": 3.2490974729241877, "grad_norm": 3.6901161670684814, "learning_rate": 0.0001996560816222246, "loss": 1.9137, "step": 27000 }, { "epoch": 3.250300842358604, "grad_norm": 6.228165149688721, "learning_rate": 0.00019965576631086787, "loss": 1.8833, "step": 27010 }, { "epoch": 3.2515042117930206, "grad_norm": 3.7460849285125732, "learning_rate": 0.00019965545085528464, "loss": 1.6934, "step": 27020 }, { "epoch": 3.252707581227437, "grad_norm": 6.766097068786621, "learning_rate": 0.00019965513525547536, "loss": 1.8627, "step": 27030 }, { "epoch": 3.253910950661853, "grad_norm": 5.582569599151611, "learning_rate": 0.00019965481951144052, "loss": 1.7107, "step": 27040 }, { "epoch": 3.2551143200962693, "grad_norm": 3.7459499835968018, "learning_rate": 0.00019965450362318055, "loss": 1.9077, "step": 27050 }, { "epoch": 3.256317689530686, "grad_norm": 5.6414031982421875, "learning_rate": 0.00019965418759069586, "loss": 1.6465, "step": 27060 }, { "epoch": 3.2575210589651022, "grad_norm": 4.878627777099609, "learning_rate": 0.00019965387141398702, "loss": 1.7643, "step": 27070 }, { "epoch": 3.2587244283995185, "grad_norm": 6.926384449005127, "learning_rate": 0.00019965355509305442, "loss": 1.925, "step": 27080 }, { "epoch": 3.259927797833935, "grad_norm": 5.348418712615967, "learning_rate": 0.00019965323862789847, "loss": 1.55, "step": 27090 }, { "epoch": 3.2611311672683514, "grad_norm": 4.101518154144287, "learning_rate": 0.0001996529220185197, "loss": 1.6806, "step": 27100 }, { "epoch": 3.2623345367027676, "grad_norm": 6.153325080871582, "learning_rate": 0.00019965260526491857, "loss": 1.8083, "step": 27110 }, { "epoch": 3.2635379061371843, "grad_norm": 4.059078216552734, "learning_rate": 0.00019965228836709553, "loss": 1.6787, "step": 27120 }, { "epoch": 3.2647412755716005, "grad_norm": 6.101953983306885, "learning_rate": 0.000199651971325051, "loss": 1.6913, "step": 27130 }, { "epoch": 3.2659446450060168, "grad_norm": 4.951807498931885, "learning_rate": 0.00019965165413878547, "loss": 1.5629, "step": 27140 }, { "epoch": 3.2671480144404335, "grad_norm": 4.110108375549316, "learning_rate": 0.00019965133680829937, "loss": 1.9686, "step": 27150 }, { "epoch": 3.2683513838748497, "grad_norm": 6.574784278869629, "learning_rate": 0.0001996510193335932, "loss": 1.7583, "step": 27160 }, { "epoch": 3.269554753309266, "grad_norm": 4.252917766571045, "learning_rate": 0.0001996507017146674, "loss": 1.6502, "step": 27170 }, { "epoch": 3.270758122743682, "grad_norm": 6.667531490325928, "learning_rate": 0.00019965038395152246, "loss": 1.7054, "step": 27180 }, { "epoch": 3.271961492178099, "grad_norm": 5.201156139373779, "learning_rate": 0.00019965006604415878, "loss": 1.6824, "step": 27190 }, { "epoch": 3.273164861612515, "grad_norm": 3.730102062225342, "learning_rate": 0.00019964974799257685, "loss": 1.6422, "step": 27200 }, { "epoch": 3.2743682310469313, "grad_norm": 5.933589935302734, "learning_rate": 0.00019964942979677715, "loss": 1.6453, "step": 27210 }, { "epoch": 3.2755716004813475, "grad_norm": 4.717698097229004, "learning_rate": 0.00019964911145676012, "loss": 1.5944, "step": 27220 }, { "epoch": 3.2767749699157642, "grad_norm": 6.802528381347656, "learning_rate": 0.00019964879297252618, "loss": 2.118, "step": 27230 }, { "epoch": 3.2779783393501805, "grad_norm": 4.5299506187438965, "learning_rate": 0.0001996484743440759, "loss": 1.9077, "step": 27240 }, { "epoch": 3.2791817087845967, "grad_norm": 4.457561016082764, "learning_rate": 0.00019964815557140965, "loss": 1.813, "step": 27250 }, { "epoch": 3.2803850782190134, "grad_norm": 6.079787731170654, "learning_rate": 0.0001996478366545279, "loss": 1.7428, "step": 27260 }, { "epoch": 3.2815884476534296, "grad_norm": 4.322761535644531, "learning_rate": 0.00019964751759343115, "loss": 1.5646, "step": 27270 }, { "epoch": 3.282791817087846, "grad_norm": 6.566214084625244, "learning_rate": 0.00019964719838811982, "loss": 1.8035, "step": 27280 }, { "epoch": 3.2839951865222625, "grad_norm": 5.867137908935547, "learning_rate": 0.0001996468790385944, "loss": 1.7421, "step": 27290 }, { "epoch": 3.2851985559566788, "grad_norm": 3.765441417694092, "learning_rate": 0.00019964655954485535, "loss": 1.8022, "step": 27300 }, { "epoch": 3.286401925391095, "grad_norm": 6.182241916656494, "learning_rate": 0.00019964623990690312, "loss": 1.7031, "step": 27310 }, { "epoch": 3.2876052948255117, "grad_norm": 4.88283634185791, "learning_rate": 0.00019964592012473818, "loss": 1.5069, "step": 27320 }, { "epoch": 3.288808664259928, "grad_norm": 6.951658725738525, "learning_rate": 0.000199645600198361, "loss": 1.9607, "step": 27330 }, { "epoch": 3.290012033694344, "grad_norm": 5.33024787902832, "learning_rate": 0.00019964528012777203, "loss": 1.8222, "step": 27340 }, { "epoch": 3.2912154031287604, "grad_norm": 4.643368244171143, "learning_rate": 0.00019964495991297172, "loss": 1.6245, "step": 27350 }, { "epoch": 3.292418772563177, "grad_norm": 5.759073257446289, "learning_rate": 0.0001996446395539606, "loss": 1.7253, "step": 27360 }, { "epoch": 3.2936221419975933, "grad_norm": 4.402641773223877, "learning_rate": 0.00019964431905073905, "loss": 1.7703, "step": 27370 }, { "epoch": 3.2948255114320095, "grad_norm": 5.987547874450684, "learning_rate": 0.00019964399840330756, "loss": 1.8539, "step": 27380 }, { "epoch": 3.2960288808664258, "grad_norm": 5.379052639007568, "learning_rate": 0.00019964367761166662, "loss": 1.7614, "step": 27390 }, { "epoch": 3.2972322503008424, "grad_norm": 3.9488117694854736, "learning_rate": 0.00019964335667581668, "loss": 1.6899, "step": 27400 }, { "epoch": 3.2984356197352587, "grad_norm": 5.289420127868652, "learning_rate": 0.0001996430355957582, "loss": 1.6323, "step": 27410 }, { "epoch": 3.299638989169675, "grad_norm": 4.561107635498047, "learning_rate": 0.00019964271437149163, "loss": 1.605, "step": 27420 }, { "epoch": 3.3008423586040916, "grad_norm": 7.212870121002197, "learning_rate": 0.00019964239300301743, "loss": 1.6594, "step": 27430 }, { "epoch": 3.302045728038508, "grad_norm": 5.4261040687561035, "learning_rate": 0.00019964207149033612, "loss": 1.562, "step": 27440 }, { "epoch": 3.303249097472924, "grad_norm": 3.1123127937316895, "learning_rate": 0.00019964174983344815, "loss": 1.8834, "step": 27450 }, { "epoch": 3.3044524669073407, "grad_norm": 6.524343013763428, "learning_rate": 0.00019964142803235394, "loss": 1.6465, "step": 27460 }, { "epoch": 3.305655836341757, "grad_norm": 4.955365180969238, "learning_rate": 0.00019964110608705399, "loss": 1.7944, "step": 27470 }, { "epoch": 3.306859205776173, "grad_norm": 7.838707447052002, "learning_rate": 0.00019964078399754875, "loss": 1.9876, "step": 27480 }, { "epoch": 3.30806257521059, "grad_norm": 4.1487555503845215, "learning_rate": 0.00019964046176383873, "loss": 1.6249, "step": 27490 }, { "epoch": 3.309265944645006, "grad_norm": 3.6820261478424072, "learning_rate": 0.00019964013938592433, "loss": 1.773, "step": 27500 }, { "epoch": 3.3104693140794224, "grad_norm": 4.7243194580078125, "learning_rate": 0.00019963981686380605, "loss": 1.6834, "step": 27510 }, { "epoch": 3.3116726835138386, "grad_norm": 4.824052333831787, "learning_rate": 0.00019963949419748436, "loss": 1.7135, "step": 27520 }, { "epoch": 3.3128760529482553, "grad_norm": 7.697936058044434, "learning_rate": 0.0001996391713869597, "loss": 1.7224, "step": 27530 }, { "epoch": 3.3140794223826715, "grad_norm": 6.1690850257873535, "learning_rate": 0.0001996388484322326, "loss": 1.728, "step": 27540 }, { "epoch": 3.3152827918170877, "grad_norm": 4.328515529632568, "learning_rate": 0.00019963852533330344, "loss": 1.8722, "step": 27550 }, { "epoch": 3.316486161251504, "grad_norm": 6.749997615814209, "learning_rate": 0.00019963820209017278, "loss": 1.7789, "step": 27560 }, { "epoch": 3.3176895306859207, "grad_norm": 5.340113162994385, "learning_rate": 0.000199637878702841, "loss": 1.9433, "step": 27570 }, { "epoch": 3.318892900120337, "grad_norm": 7.050694465637207, "learning_rate": 0.00019963755517130865, "loss": 1.7184, "step": 27580 }, { "epoch": 3.320096269554753, "grad_norm": 4.519673824310303, "learning_rate": 0.0001996372314955761, "loss": 1.8321, "step": 27590 }, { "epoch": 3.32129963898917, "grad_norm": 3.892613649368286, "learning_rate": 0.00019963690767564393, "loss": 1.87, "step": 27600 }, { "epoch": 3.322503008423586, "grad_norm": 5.246593952178955, "learning_rate": 0.00019963658371151255, "loss": 1.6093, "step": 27610 }, { "epoch": 3.3237063778580023, "grad_norm": 4.326037883758545, "learning_rate": 0.00019963625960318243, "loss": 1.6772, "step": 27620 }, { "epoch": 3.324909747292419, "grad_norm": 6.822893142700195, "learning_rate": 0.000199635935350654, "loss": 1.9091, "step": 27630 }, { "epoch": 3.326113116726835, "grad_norm": 5.5352253913879395, "learning_rate": 0.0001996356109539278, "loss": 1.5584, "step": 27640 }, { "epoch": 3.3273164861612514, "grad_norm": 4.748112201690674, "learning_rate": 0.0001996352864130043, "loss": 1.7727, "step": 27650 }, { "epoch": 3.328519855595668, "grad_norm": 6.762351036071777, "learning_rate": 0.00019963496172788393, "loss": 1.8328, "step": 27660 }, { "epoch": 3.3297232250300843, "grad_norm": 3.778073787689209, "learning_rate": 0.0001996346368985672, "loss": 1.7227, "step": 27670 }, { "epoch": 3.3309265944645006, "grad_norm": 6.242730617523193, "learning_rate": 0.0001996343119250545, "loss": 1.7442, "step": 27680 }, { "epoch": 3.332129963898917, "grad_norm": 5.287921905517578, "learning_rate": 0.00019963398680734634, "loss": 1.7142, "step": 27690 }, { "epoch": 3.3333333333333335, "grad_norm": 4.432364463806152, "learning_rate": 0.00019963366154544324, "loss": 1.7966, "step": 27700 }, { "epoch": 3.3345367027677497, "grad_norm": 5.745725154876709, "learning_rate": 0.0001996333361393456, "loss": 1.7246, "step": 27710 }, { "epoch": 3.335740072202166, "grad_norm": 4.296098232269287, "learning_rate": 0.00019963301058905397, "loss": 1.6841, "step": 27720 }, { "epoch": 3.336943441636582, "grad_norm": 7.974771976470947, "learning_rate": 0.00019963268489456875, "loss": 1.723, "step": 27730 }, { "epoch": 3.338146811070999, "grad_norm": 4.985914707183838, "learning_rate": 0.00019963235905589045, "loss": 1.7322, "step": 27740 }, { "epoch": 3.339350180505415, "grad_norm": 4.049668788909912, "learning_rate": 0.0001996320330730195, "loss": 1.881, "step": 27750 }, { "epoch": 3.3405535499398313, "grad_norm": 6.071661472320557, "learning_rate": 0.00019963170694595642, "loss": 1.9796, "step": 27760 }, { "epoch": 3.341756919374248, "grad_norm": 4.73226261138916, "learning_rate": 0.00019963138067470167, "loss": 1.898, "step": 27770 }, { "epoch": 3.3429602888086642, "grad_norm": 6.587105751037598, "learning_rate": 0.00019963105425925567, "loss": 1.8603, "step": 27780 }, { "epoch": 3.3441636582430805, "grad_norm": 5.582319259643555, "learning_rate": 0.000199630727699619, "loss": 1.832, "step": 27790 }, { "epoch": 3.345367027677497, "grad_norm": 4.537518501281738, "learning_rate": 0.00019963040099579203, "loss": 1.7574, "step": 27800 }, { "epoch": 3.3465703971119134, "grad_norm": 6.402120113372803, "learning_rate": 0.00019963007414777525, "loss": 1.8413, "step": 27810 }, { "epoch": 3.3477737665463296, "grad_norm": 4.006775379180908, "learning_rate": 0.00019962974715556918, "loss": 1.8227, "step": 27820 }, { "epoch": 3.3489771359807463, "grad_norm": 7.730136394500732, "learning_rate": 0.00019962942001917428, "loss": 1.9199, "step": 27830 }, { "epoch": 3.3501805054151625, "grad_norm": 4.88864803314209, "learning_rate": 0.000199629092738591, "loss": 1.7746, "step": 27840 }, { "epoch": 3.351383874849579, "grad_norm": 4.529849529266357, "learning_rate": 0.00019962876531381984, "loss": 1.7188, "step": 27850 }, { "epoch": 3.352587244283995, "grad_norm": 6.139866352081299, "learning_rate": 0.00019962843774486123, "loss": 1.7252, "step": 27860 }, { "epoch": 3.3537906137184117, "grad_norm": 5.090317249298096, "learning_rate": 0.0001996281100317157, "loss": 1.573, "step": 27870 }, { "epoch": 3.354993983152828, "grad_norm": 7.82069730758667, "learning_rate": 0.00019962778217438366, "loss": 1.7227, "step": 27880 }, { "epoch": 3.356197352587244, "grad_norm": 6.4739813804626465, "learning_rate": 0.00019962745417286568, "loss": 1.7838, "step": 27890 }, { "epoch": 3.357400722021661, "grad_norm": 4.006805896759033, "learning_rate": 0.00019962712602716214, "loss": 1.8307, "step": 27900 }, { "epoch": 3.358604091456077, "grad_norm": 5.227746486663818, "learning_rate": 0.00019962679773727354, "loss": 1.8486, "step": 27910 }, { "epoch": 3.3598074608904933, "grad_norm": 4.264801025390625, "learning_rate": 0.0001996264693032004, "loss": 1.8223, "step": 27920 }, { "epoch": 3.3610108303249095, "grad_norm": 8.17780876159668, "learning_rate": 0.00019962614072494312, "loss": 1.8582, "step": 27930 }, { "epoch": 3.3622141997593262, "grad_norm": 4.795365333557129, "learning_rate": 0.00019962581200250225, "loss": 1.7779, "step": 27940 }, { "epoch": 3.3634175691937425, "grad_norm": 4.197148323059082, "learning_rate": 0.00019962548313587824, "loss": 1.8181, "step": 27950 }, { "epoch": 3.3646209386281587, "grad_norm": 5.381394386291504, "learning_rate": 0.00019962515412507155, "loss": 1.7951, "step": 27960 }, { "epoch": 3.3658243080625754, "grad_norm": 4.752811431884766, "learning_rate": 0.00019962482497008264, "loss": 1.7309, "step": 27970 }, { "epoch": 3.3670276774969916, "grad_norm": 6.7400407791137695, "learning_rate": 0.00019962449567091203, "loss": 1.6858, "step": 27980 }, { "epoch": 3.368231046931408, "grad_norm": 5.006594181060791, "learning_rate": 0.0001996241662275602, "loss": 1.8097, "step": 27990 }, { "epoch": 3.3694344163658245, "grad_norm": 3.310363292694092, "learning_rate": 0.00019962383664002758, "loss": 1.6574, "step": 28000 }, { "epoch": 3.3706377858002408, "grad_norm": 7.438520431518555, "learning_rate": 0.00019962350690831468, "loss": 1.6039, "step": 28010 }, { "epoch": 3.371841155234657, "grad_norm": 4.770726680755615, "learning_rate": 0.00019962317703242196, "loss": 1.8445, "step": 28020 }, { "epoch": 3.3730445246690732, "grad_norm": 7.135133266448975, "learning_rate": 0.00019962284701234993, "loss": 1.7904, "step": 28030 }, { "epoch": 3.37424789410349, "grad_norm": 4.703583240509033, "learning_rate": 0.000199622516848099, "loss": 1.6413, "step": 28040 }, { "epoch": 3.375451263537906, "grad_norm": 3.2322211265563965, "learning_rate": 0.00019962218653966972, "loss": 1.779, "step": 28050 }, { "epoch": 3.3766546329723224, "grad_norm": 7.324565410614014, "learning_rate": 0.00019962185608706255, "loss": 1.7111, "step": 28060 }, { "epoch": 3.377858002406739, "grad_norm": 5.59927225112915, "learning_rate": 0.00019962152549027795, "loss": 1.8828, "step": 28070 }, { "epoch": 3.3790613718411553, "grad_norm": 9.892529487609863, "learning_rate": 0.00019962119474931642, "loss": 1.7942, "step": 28080 }, { "epoch": 3.3802647412755715, "grad_norm": 5.770024299621582, "learning_rate": 0.0001996208638641784, "loss": 1.8067, "step": 28090 }, { "epoch": 3.3814681107099878, "grad_norm": 3.9775357246398926, "learning_rate": 0.0001996205328348644, "loss": 1.6265, "step": 28100 }, { "epoch": 3.3826714801444044, "grad_norm": 5.100301265716553, "learning_rate": 0.0001996202016613749, "loss": 2.0109, "step": 28110 }, { "epoch": 3.3838748495788207, "grad_norm": 4.932925701141357, "learning_rate": 0.00019961987034371038, "loss": 1.6067, "step": 28120 }, { "epoch": 3.385078219013237, "grad_norm": 6.15216064453125, "learning_rate": 0.0001996195388818713, "loss": 1.7458, "step": 28130 }, { "epoch": 3.3862815884476536, "grad_norm": 4.748860836029053, "learning_rate": 0.00019961920727585815, "loss": 1.7924, "step": 28140 }, { "epoch": 3.38748495788207, "grad_norm": 3.8551394939422607, "learning_rate": 0.00019961887552567142, "loss": 1.9233, "step": 28150 }, { "epoch": 3.388688327316486, "grad_norm": 5.9154534339904785, "learning_rate": 0.00019961854363131158, "loss": 1.7453, "step": 28160 }, { "epoch": 3.3898916967509027, "grad_norm": 4.891790390014648, "learning_rate": 0.00019961821159277912, "loss": 1.9713, "step": 28170 }, { "epoch": 3.391095066185319, "grad_norm": 7.742791652679443, "learning_rate": 0.0001996178794100745, "loss": 1.8933, "step": 28180 }, { "epoch": 3.392298435619735, "grad_norm": 4.845676422119141, "learning_rate": 0.00019961754708319818, "loss": 1.7511, "step": 28190 }, { "epoch": 3.3935018050541514, "grad_norm": 3.511476755142212, "learning_rate": 0.00019961721461215075, "loss": 1.7201, "step": 28200 }, { "epoch": 3.394705174488568, "grad_norm": 7.694055557250977, "learning_rate": 0.00019961688199693258, "loss": 1.7666, "step": 28210 }, { "epoch": 3.3959085439229844, "grad_norm": 4.1438164710998535, "learning_rate": 0.00019961654923754418, "loss": 1.5868, "step": 28220 }, { "epoch": 3.3971119133574006, "grad_norm": 7.416901588439941, "learning_rate": 0.00019961621633398605, "loss": 1.892, "step": 28230 }, { "epoch": 3.3983152827918173, "grad_norm": 5.698300361633301, "learning_rate": 0.00019961588328625863, "loss": 1.7046, "step": 28240 }, { "epoch": 3.3995186522262335, "grad_norm": 4.730085849761963, "learning_rate": 0.00019961555009436247, "loss": 1.8509, "step": 28250 }, { "epoch": 3.4007220216606497, "grad_norm": 7.788651943206787, "learning_rate": 0.000199615216758298, "loss": 1.7517, "step": 28260 }, { "epoch": 3.401925391095066, "grad_norm": 4.645796775817871, "learning_rate": 0.0001996148832780657, "loss": 1.8645, "step": 28270 }, { "epoch": 3.4031287605294827, "grad_norm": 6.676630973815918, "learning_rate": 0.00019961454965366612, "loss": 2.0621, "step": 28280 }, { "epoch": 3.404332129963899, "grad_norm": 4.801552772521973, "learning_rate": 0.00019961421588509965, "loss": 1.6914, "step": 28290 }, { "epoch": 3.405535499398315, "grad_norm": 3.3285067081451416, "learning_rate": 0.00019961388197236684, "loss": 1.8584, "step": 28300 }, { "epoch": 3.406738868832732, "grad_norm": 4.599587917327881, "learning_rate": 0.00019961354791546814, "loss": 1.788, "step": 28310 }, { "epoch": 3.407942238267148, "grad_norm": 5.92603874206543, "learning_rate": 0.00019961321371440404, "loss": 1.8698, "step": 28320 }, { "epoch": 3.4091456077015643, "grad_norm": 7.2303147315979, "learning_rate": 0.00019961287936917502, "loss": 1.8969, "step": 28330 }, { "epoch": 3.410348977135981, "grad_norm": 5.587429523468018, "learning_rate": 0.00019961254487978157, "loss": 1.7323, "step": 28340 }, { "epoch": 3.411552346570397, "grad_norm": 4.460331916809082, "learning_rate": 0.00019961221024622418, "loss": 1.8817, "step": 28350 }, { "epoch": 3.4127557160048134, "grad_norm": 6.216007232666016, "learning_rate": 0.00019961187546850334, "loss": 1.9292, "step": 28360 }, { "epoch": 3.4139590854392297, "grad_norm": 3.744415283203125, "learning_rate": 0.0001996115405466195, "loss": 1.7755, "step": 28370 }, { "epoch": 3.4151624548736463, "grad_norm": 7.921230792999268, "learning_rate": 0.00019961120548057316, "loss": 1.8186, "step": 28380 }, { "epoch": 3.4163658243080626, "grad_norm": 5.462467670440674, "learning_rate": 0.00019961087027036485, "loss": 1.787, "step": 28390 }, { "epoch": 3.417569193742479, "grad_norm": 4.054779052734375, "learning_rate": 0.00019961053491599498, "loss": 1.8242, "step": 28400 }, { "epoch": 3.4187725631768955, "grad_norm": 5.601592540740967, "learning_rate": 0.00019961019941746412, "loss": 1.9354, "step": 28410 }, { "epoch": 3.4199759326113117, "grad_norm": 4.777987480163574, "learning_rate": 0.00019960986377477266, "loss": 1.8717, "step": 28420 }, { "epoch": 3.421179302045728, "grad_norm": 7.961142063140869, "learning_rate": 0.00019960952798792113, "loss": 1.7858, "step": 28430 }, { "epoch": 3.422382671480144, "grad_norm": 4.813782691955566, "learning_rate": 0.00019960919205691007, "loss": 1.8777, "step": 28440 }, { "epoch": 3.423586040914561, "grad_norm": 3.7830121517181396, "learning_rate": 0.00019960885598173988, "loss": 1.7667, "step": 28450 }, { "epoch": 3.424789410348977, "grad_norm": 7.098151206970215, "learning_rate": 0.00019960851976241108, "loss": 1.8785, "step": 28460 }, { "epoch": 3.4259927797833933, "grad_norm": 3.914496660232544, "learning_rate": 0.0001996081833989242, "loss": 1.6083, "step": 28470 }, { "epoch": 3.42719614921781, "grad_norm": 6.490516662597656, "learning_rate": 0.00019960784689127965, "loss": 1.8888, "step": 28480 }, { "epoch": 3.4283995186522263, "grad_norm": 5.446444034576416, "learning_rate": 0.00019960751023947798, "loss": 1.6972, "step": 28490 }, { "epoch": 3.4296028880866425, "grad_norm": 4.287990570068359, "learning_rate": 0.0001996071734435196, "loss": 1.7736, "step": 28500 }, { "epoch": 3.430806257521059, "grad_norm": 5.645087242126465, "learning_rate": 0.00019960683650340508, "loss": 1.7633, "step": 28510 }, { "epoch": 3.4320096269554754, "grad_norm": 5.143428325653076, "learning_rate": 0.00019960649941913489, "loss": 1.6987, "step": 28520 }, { "epoch": 3.4332129963898916, "grad_norm": 6.8210859298706055, "learning_rate": 0.0001996061621907095, "loss": 1.8576, "step": 28530 }, { "epoch": 3.434416365824308, "grad_norm": 5.176815986633301, "learning_rate": 0.00019960582481812938, "loss": 1.6781, "step": 28540 }, { "epoch": 3.4356197352587245, "grad_norm": 3.825925588607788, "learning_rate": 0.00019960548730139506, "loss": 1.8528, "step": 28550 }, { "epoch": 3.436823104693141, "grad_norm": 5.517031192779541, "learning_rate": 0.000199605149640507, "loss": 1.5867, "step": 28560 }, { "epoch": 3.438026474127557, "grad_norm": 4.681079864501953, "learning_rate": 0.0001996048118354657, "loss": 1.6254, "step": 28570 }, { "epoch": 3.4392298435619737, "grad_norm": 8.500269889831543, "learning_rate": 0.00019960447388627162, "loss": 1.868, "step": 28580 }, { "epoch": 3.44043321299639, "grad_norm": 4.874539375305176, "learning_rate": 0.0001996041357929253, "loss": 1.7744, "step": 28590 }, { "epoch": 3.441636582430806, "grad_norm": 3.704907178878784, "learning_rate": 0.00019960379755542718, "loss": 1.8901, "step": 28600 }, { "epoch": 3.4428399518652224, "grad_norm": 8.360466003417969, "learning_rate": 0.0001996034591737778, "loss": 1.9189, "step": 28610 }, { "epoch": 3.444043321299639, "grad_norm": 4.338244915008545, "learning_rate": 0.0001996031206479776, "loss": 1.6906, "step": 28620 }, { "epoch": 3.4452466907340553, "grad_norm": 7.269326210021973, "learning_rate": 0.00019960278197802712, "loss": 1.9446, "step": 28630 }, { "epoch": 3.4464500601684716, "grad_norm": 5.376789093017578, "learning_rate": 0.0001996024431639268, "loss": 1.8607, "step": 28640 }, { "epoch": 3.4476534296028882, "grad_norm": 4.045756816864014, "learning_rate": 0.00019960210420567719, "loss": 1.7736, "step": 28650 }, { "epoch": 3.4488567990373045, "grad_norm": 6.43265962600708, "learning_rate": 0.0001996017651032787, "loss": 1.7796, "step": 28660 }, { "epoch": 3.4500601684717207, "grad_norm": 4.935984134674072, "learning_rate": 0.0001996014258567319, "loss": 1.8473, "step": 28670 }, { "epoch": 3.4512635379061374, "grad_norm": 5.484255313873291, "learning_rate": 0.00019960108646603723, "loss": 1.7414, "step": 28680 }, { "epoch": 3.4524669073405536, "grad_norm": 4.610409736633301, "learning_rate": 0.0001996007469311952, "loss": 1.8739, "step": 28690 }, { "epoch": 3.45367027677497, "grad_norm": 5.166720867156982, "learning_rate": 0.00019960040725220627, "loss": 1.7138, "step": 28700 }, { "epoch": 3.4548736462093865, "grad_norm": 6.070875644683838, "learning_rate": 0.000199600067429071, "loss": 1.9382, "step": 28710 }, { "epoch": 3.4560770156438028, "grad_norm": 5.008320331573486, "learning_rate": 0.00019959972746178982, "loss": 1.6997, "step": 28720 }, { "epoch": 3.457280385078219, "grad_norm": 5.522164344787598, "learning_rate": 0.00019959938735036325, "loss": 1.7085, "step": 28730 }, { "epoch": 3.4584837545126352, "grad_norm": 4.9618916511535645, "learning_rate": 0.0001995990470947918, "loss": 1.7234, "step": 28740 }, { "epoch": 3.459687123947052, "grad_norm": 3.4637858867645264, "learning_rate": 0.0001995987066950759, "loss": 1.805, "step": 28750 }, { "epoch": 3.460890493381468, "grad_norm": 6.136050701141357, "learning_rate": 0.00019959836615121609, "loss": 2.0882, "step": 28760 }, { "epoch": 3.4620938628158844, "grad_norm": 4.310469150543213, "learning_rate": 0.00019959802546321286, "loss": 1.853, "step": 28770 }, { "epoch": 3.4632972322503006, "grad_norm": 6.130943298339844, "learning_rate": 0.00019959768463106672, "loss": 1.5557, "step": 28780 }, { "epoch": 3.4645006016847173, "grad_norm": 4.595365524291992, "learning_rate": 0.00019959734365477814, "loss": 1.7351, "step": 28790 }, { "epoch": 3.4657039711191335, "grad_norm": 4.329321384429932, "learning_rate": 0.00019959700253434758, "loss": 1.67, "step": 28800 }, { "epoch": 3.4669073405535498, "grad_norm": 6.03887939453125, "learning_rate": 0.00019959666126977558, "loss": 1.628, "step": 28810 }, { "epoch": 3.4681107099879664, "grad_norm": 4.7212677001953125, "learning_rate": 0.00019959631986106262, "loss": 1.8574, "step": 28820 }, { "epoch": 3.4693140794223827, "grad_norm": 9.018230438232422, "learning_rate": 0.00019959597830820919, "loss": 1.8657, "step": 28830 }, { "epoch": 3.470517448856799, "grad_norm": 4.717865467071533, "learning_rate": 0.0001995956366112158, "loss": 1.8667, "step": 28840 }, { "epoch": 3.4717208182912156, "grad_norm": 3.674168586730957, "learning_rate": 0.00019959529477008294, "loss": 1.7367, "step": 28850 }, { "epoch": 3.472924187725632, "grad_norm": 5.015222549438477, "learning_rate": 0.0001995949527848111, "loss": 1.7573, "step": 28860 }, { "epoch": 3.474127557160048, "grad_norm": 5.016926288604736, "learning_rate": 0.00019959461065540074, "loss": 1.7272, "step": 28870 }, { "epoch": 3.4753309265944647, "grad_norm": 6.085020542144775, "learning_rate": 0.00019959426838185245, "loss": 1.8565, "step": 28880 }, { "epoch": 3.476534296028881, "grad_norm": 5.02958345413208, "learning_rate": 0.00019959392596416661, "loss": 1.8379, "step": 28890 }, { "epoch": 3.477737665463297, "grad_norm": 3.3129117488861084, "learning_rate": 0.0001995935834023438, "loss": 1.7124, "step": 28900 }, { "epoch": 3.4789410348977134, "grad_norm": 6.748370170593262, "learning_rate": 0.00019959324069638448, "loss": 1.7598, "step": 28910 }, { "epoch": 3.48014440433213, "grad_norm": 4.98140287399292, "learning_rate": 0.00019959289784628913, "loss": 1.6866, "step": 28920 }, { "epoch": 3.4813477737665464, "grad_norm": 8.136743545532227, "learning_rate": 0.0001995925548520583, "loss": 1.6813, "step": 28930 }, { "epoch": 3.4825511432009626, "grad_norm": 5.873473167419434, "learning_rate": 0.00019959221171369246, "loss": 1.7566, "step": 28940 }, { "epoch": 3.483754512635379, "grad_norm": 3.825920343399048, "learning_rate": 0.00019959186843119205, "loss": 1.708, "step": 28950 }, { "epoch": 3.4849578820697955, "grad_norm": 5.885858535766602, "learning_rate": 0.00019959152500455767, "loss": 1.702, "step": 28960 }, { "epoch": 3.4861612515042117, "grad_norm": 5.659242153167725, "learning_rate": 0.00019959118143378975, "loss": 1.6004, "step": 28970 }, { "epoch": 3.487364620938628, "grad_norm": 8.56484317779541, "learning_rate": 0.00019959083771888878, "loss": 1.8466, "step": 28980 }, { "epoch": 3.4885679903730447, "grad_norm": 5.031448841094971, "learning_rate": 0.0001995904938598553, "loss": 1.6605, "step": 28990 }, { "epoch": 3.489771359807461, "grad_norm": 3.6741833686828613, "learning_rate": 0.0001995901498566898, "loss": 2.004, "step": 29000 }, { "epoch": 3.490974729241877, "grad_norm": 5.330018997192383, "learning_rate": 0.00019958980570939275, "loss": 1.8313, "step": 29010 }, { "epoch": 3.492178098676294, "grad_norm": 5.911968231201172, "learning_rate": 0.00019958946141796467, "loss": 1.7885, "step": 29020 }, { "epoch": 3.49338146811071, "grad_norm": 7.138945579528809, "learning_rate": 0.00019958911698240604, "loss": 1.7559, "step": 29030 }, { "epoch": 3.4945848375451263, "grad_norm": 4.7268967628479, "learning_rate": 0.00019958877240271736, "loss": 1.6986, "step": 29040 }, { "epoch": 3.495788206979543, "grad_norm": 3.9076035022735596, "learning_rate": 0.00019958842767889916, "loss": 1.8307, "step": 29050 }, { "epoch": 3.496991576413959, "grad_norm": 6.532130718231201, "learning_rate": 0.0001995880828109519, "loss": 1.8026, "step": 29060 }, { "epoch": 3.4981949458483754, "grad_norm": 3.987971544265747, "learning_rate": 0.00019958773779887612, "loss": 1.6147, "step": 29070 }, { "epoch": 3.4993983152827917, "grad_norm": 7.269247531890869, "learning_rate": 0.00019958739264267227, "loss": 1.8936, "step": 29080 }, { "epoch": 3.5006016847172083, "grad_norm": 5.082418441772461, "learning_rate": 0.00019958704734234092, "loss": 1.8007, "step": 29090 }, { "epoch": 3.5018050541516246, "grad_norm": 5.6328535079956055, "learning_rate": 0.00019958670189788248, "loss": 1.8599, "step": 29100 }, { "epoch": 3.503008423586041, "grad_norm": 5.192950248718262, "learning_rate": 0.0001995863563092975, "loss": 1.9142, "step": 29110 }, { "epoch": 3.504211793020457, "grad_norm": 4.52918815612793, "learning_rate": 0.00019958601057658648, "loss": 1.6936, "step": 29120 }, { "epoch": 3.5054151624548737, "grad_norm": 8.50179386138916, "learning_rate": 0.00019958566469974992, "loss": 1.7705, "step": 29130 }, { "epoch": 3.50661853188929, "grad_norm": 4.8946213722229, "learning_rate": 0.00019958531867878834, "loss": 1.7076, "step": 29140 }, { "epoch": 3.507821901323706, "grad_norm": 3.867093324661255, "learning_rate": 0.00019958497251370218, "loss": 1.9257, "step": 29150 }, { "epoch": 3.509025270758123, "grad_norm": 5.413902759552002, "learning_rate": 0.00019958462620449198, "loss": 1.5908, "step": 29160 }, { "epoch": 3.510228640192539, "grad_norm": 4.138807773590088, "learning_rate": 0.00019958427975115827, "loss": 1.9396, "step": 29170 }, { "epoch": 3.5114320096269553, "grad_norm": 6.781573295593262, "learning_rate": 0.0001995839331537015, "loss": 1.5823, "step": 29180 }, { "epoch": 3.512635379061372, "grad_norm": 6.4733500480651855, "learning_rate": 0.00019958358641212218, "loss": 1.8299, "step": 29190 }, { "epoch": 3.5138387484957883, "grad_norm": 3.607250928878784, "learning_rate": 0.00019958323952642084, "loss": 1.7479, "step": 29200 }, { "epoch": 3.5150421179302045, "grad_norm": 7.795487880706787, "learning_rate": 0.00019958289249659796, "loss": 1.6971, "step": 29210 }, { "epoch": 3.516245487364621, "grad_norm": 4.45479679107666, "learning_rate": 0.00019958254532265405, "loss": 1.6372, "step": 29220 }, { "epoch": 3.5174488567990374, "grad_norm": 7.399784564971924, "learning_rate": 0.00019958219800458963, "loss": 1.9275, "step": 29230 }, { "epoch": 3.5186522262334536, "grad_norm": 5.853253364562988, "learning_rate": 0.00019958185054240514, "loss": 1.8425, "step": 29240 }, { "epoch": 3.51985559566787, "grad_norm": 3.379256248474121, "learning_rate": 0.00019958150293610118, "loss": 1.6625, "step": 29250 }, { "epoch": 3.5210589651022866, "grad_norm": 5.286121845245361, "learning_rate": 0.00019958115518567814, "loss": 1.6161, "step": 29260 }, { "epoch": 3.522262334536703, "grad_norm": 4.732770919799805, "learning_rate": 0.00019958080729113665, "loss": 1.7531, "step": 29270 }, { "epoch": 3.523465703971119, "grad_norm": 6.605644226074219, "learning_rate": 0.00019958045925247707, "loss": 1.8319, "step": 29280 }, { "epoch": 3.5246690734055353, "grad_norm": 6.168133735656738, "learning_rate": 0.00019958011106970005, "loss": 1.8519, "step": 29290 }, { "epoch": 3.525872442839952, "grad_norm": 3.8957347869873047, "learning_rate": 0.00019957976274280598, "loss": 1.825, "step": 29300 }, { "epoch": 3.527075812274368, "grad_norm": 5.978893280029297, "learning_rate": 0.00019957941427179544, "loss": 1.7849, "step": 29310 }, { "epoch": 3.5282791817087844, "grad_norm": 4.665668487548828, "learning_rate": 0.00019957906565666887, "loss": 1.6495, "step": 29320 }, { "epoch": 3.529482551143201, "grad_norm": 8.054835319519043, "learning_rate": 0.00019957871689742684, "loss": 1.8495, "step": 29330 }, { "epoch": 3.5306859205776173, "grad_norm": 5.350250244140625, "learning_rate": 0.0001995783679940698, "loss": 1.7535, "step": 29340 }, { "epoch": 3.5318892900120336, "grad_norm": 3.2606489658355713, "learning_rate": 0.00019957801894659826, "loss": 1.9664, "step": 29350 }, { "epoch": 3.5330926594464502, "grad_norm": 7.022886276245117, "learning_rate": 0.00019957766975501278, "loss": 1.7611, "step": 29360 }, { "epoch": 3.5342960288808665, "grad_norm": 4.701299667358398, "learning_rate": 0.0001995773204193138, "loss": 1.7028, "step": 29370 }, { "epoch": 3.5354993983152827, "grad_norm": 7.65661096572876, "learning_rate": 0.00019957697093950184, "loss": 1.8365, "step": 29380 }, { "epoch": 3.5367027677496994, "grad_norm": 5.906957626342773, "learning_rate": 0.00019957662131557747, "loss": 1.7081, "step": 29390 }, { "epoch": 3.5379061371841156, "grad_norm": 3.471609592437744, "learning_rate": 0.0001995762715475411, "loss": 1.9223, "step": 29400 }, { "epoch": 3.539109506618532, "grad_norm": 6.040035247802734, "learning_rate": 0.0001995759216353933, "loss": 1.9884, "step": 29410 }, { "epoch": 3.5403128760529485, "grad_norm": 4.192653179168701, "learning_rate": 0.00019957557157913454, "loss": 1.9343, "step": 29420 }, { "epoch": 3.5415162454873648, "grad_norm": 7.143955707550049, "learning_rate": 0.00019957522137876535, "loss": 1.8719, "step": 29430 }, { "epoch": 3.542719614921781, "grad_norm": 5.790853023529053, "learning_rate": 0.00019957487103428623, "loss": 1.7103, "step": 29440 }, { "epoch": 3.5439229843561972, "grad_norm": 3.4837894439697266, "learning_rate": 0.00019957452054569772, "loss": 1.9471, "step": 29450 }, { "epoch": 3.5451263537906135, "grad_norm": 6.555818557739258, "learning_rate": 0.00019957416991300025, "loss": 1.8474, "step": 29460 }, { "epoch": 3.54632972322503, "grad_norm": 4.449881553649902, "learning_rate": 0.0001995738191361944, "loss": 1.9303, "step": 29470 }, { "epoch": 3.5475330926594464, "grad_norm": 6.920135021209717, "learning_rate": 0.00019957346821528063, "loss": 1.7256, "step": 29480 }, { "epoch": 3.5487364620938626, "grad_norm": 4.741700172424316, "learning_rate": 0.00019957311715025945, "loss": 1.7866, "step": 29490 }, { "epoch": 3.5499398315282793, "grad_norm": 3.9805476665496826, "learning_rate": 0.00019957276594113142, "loss": 1.7484, "step": 29500 }, { "epoch": 3.5511432009626955, "grad_norm": 6.7731170654296875, "learning_rate": 0.00019957241458789702, "loss": 1.9032, "step": 29510 }, { "epoch": 3.5523465703971118, "grad_norm": 5.218935012817383, "learning_rate": 0.00019957206309055672, "loss": 1.8034, "step": 29520 }, { "epoch": 3.5535499398315284, "grad_norm": 7.347984313964844, "learning_rate": 0.00019957171144911108, "loss": 1.9651, "step": 29530 }, { "epoch": 3.5547533092659447, "grad_norm": 5.54328727722168, "learning_rate": 0.0001995713596635606, "loss": 1.5477, "step": 29540 }, { "epoch": 3.555956678700361, "grad_norm": 3.764723539352417, "learning_rate": 0.00019957100773390578, "loss": 1.7769, "step": 29550 }, { "epoch": 3.5571600481347776, "grad_norm": 6.686862945556641, "learning_rate": 0.0001995706556601471, "loss": 1.8664, "step": 29560 }, { "epoch": 3.558363417569194, "grad_norm": 4.710370063781738, "learning_rate": 0.00019957030344228512, "loss": 1.7083, "step": 29570 }, { "epoch": 3.55956678700361, "grad_norm": 7.936213493347168, "learning_rate": 0.00019956995108032032, "loss": 1.825, "step": 29580 }, { "epoch": 3.5607701564380267, "grad_norm": 4.937076091766357, "learning_rate": 0.0001995695985742532, "loss": 1.7293, "step": 29590 }, { "epoch": 3.561973525872443, "grad_norm": 4.393558502197266, "learning_rate": 0.00019956924592408432, "loss": 1.8649, "step": 29600 }, { "epoch": 3.563176895306859, "grad_norm": 5.727424144744873, "learning_rate": 0.00019956889312981418, "loss": 1.733, "step": 29610 }, { "epoch": 3.5643802647412755, "grad_norm": 5.007108688354492, "learning_rate": 0.0001995685401914432, "loss": 2.0541, "step": 29620 }, { "epoch": 3.5655836341756917, "grad_norm": 7.5373969078063965, "learning_rate": 0.00019956818710897202, "loss": 1.9797, "step": 29630 }, { "epoch": 3.5667870036101084, "grad_norm": 5.30107307434082, "learning_rate": 0.00019956783388240106, "loss": 1.8703, "step": 29640 }, { "epoch": 3.5679903730445246, "grad_norm": 4.112225532531738, "learning_rate": 0.00019956748051173088, "loss": 1.8334, "step": 29650 }, { "epoch": 3.569193742478941, "grad_norm": 5.8449602127075195, "learning_rate": 0.00019956712699696193, "loss": 1.8945, "step": 29660 }, { "epoch": 3.5703971119133575, "grad_norm": 4.755184173583984, "learning_rate": 0.00019956677333809482, "loss": 1.9676, "step": 29670 }, { "epoch": 3.5716004813477737, "grad_norm": 7.043939113616943, "learning_rate": 0.00019956641953512998, "loss": 1.8171, "step": 29680 }, { "epoch": 3.57280385078219, "grad_norm": 5.145519733428955, "learning_rate": 0.00019956606558806794, "loss": 1.805, "step": 29690 }, { "epoch": 3.5740072202166067, "grad_norm": 4.182318210601807, "learning_rate": 0.00019956571149690923, "loss": 1.81, "step": 29700 }, { "epoch": 3.575210589651023, "grad_norm": 5.767271518707275, "learning_rate": 0.00019956535726165436, "loss": 1.765, "step": 29710 }, { "epoch": 3.576413959085439, "grad_norm": 4.717093467712402, "learning_rate": 0.00019956500288230382, "loss": 1.9329, "step": 29720 }, { "epoch": 3.577617328519856, "grad_norm": 6.811041831970215, "learning_rate": 0.00019956464835885814, "loss": 1.6949, "step": 29730 }, { "epoch": 3.578820697954272, "grad_norm": 6.049142837524414, "learning_rate": 0.00019956429369131785, "loss": 1.7861, "step": 29740 }, { "epoch": 3.5800240673886883, "grad_norm": 3.6417489051818848, "learning_rate": 0.00019956393887968346, "loss": 1.9231, "step": 29750 }, { "epoch": 3.581227436823105, "grad_norm": 5.419465065002441, "learning_rate": 0.00019956358392395545, "loss": 1.7896, "step": 29760 }, { "epoch": 3.582430806257521, "grad_norm": 4.61770486831665, "learning_rate": 0.00019956322882413435, "loss": 1.8287, "step": 29770 }, { "epoch": 3.5836341756919374, "grad_norm": 7.098788738250732, "learning_rate": 0.00019956287358022066, "loss": 1.9132, "step": 29780 }, { "epoch": 3.5848375451263537, "grad_norm": 5.410339832305908, "learning_rate": 0.00019956251819221494, "loss": 1.6021, "step": 29790 }, { "epoch": 3.58604091456077, "grad_norm": 3.4524776935577393, "learning_rate": 0.00019956216266011766, "loss": 1.7949, "step": 29800 }, { "epoch": 3.5872442839951866, "grad_norm": 5.46954870223999, "learning_rate": 0.00019956180698392934, "loss": 1.9543, "step": 29810 }, { "epoch": 3.588447653429603, "grad_norm": 5.387228488922119, "learning_rate": 0.00019956145116365053, "loss": 1.9277, "step": 29820 }, { "epoch": 3.589651022864019, "grad_norm": 9.30466079711914, "learning_rate": 0.0001995610951992817, "loss": 1.8404, "step": 29830 }, { "epoch": 3.5908543922984357, "grad_norm": 7.823917865753174, "learning_rate": 0.0001995607390908234, "loss": 1.8102, "step": 29840 }, { "epoch": 3.592057761732852, "grad_norm": 3.8454854488372803, "learning_rate": 0.00019956038283827613, "loss": 1.9939, "step": 29850 }, { "epoch": 3.593261131167268, "grad_norm": 6.494553089141846, "learning_rate": 0.00019956002644164035, "loss": 2.0794, "step": 29860 }, { "epoch": 3.594464500601685, "grad_norm": 3.75325345993042, "learning_rate": 0.0001995596699009167, "loss": 1.6462, "step": 29870 }, { "epoch": 3.595667870036101, "grad_norm": 9.10042953491211, "learning_rate": 0.0001995593132161056, "loss": 1.748, "step": 29880 }, { "epoch": 3.5968712394705173, "grad_norm": 5.1772308349609375, "learning_rate": 0.00019955895638720762, "loss": 1.5885, "step": 29890 }, { "epoch": 3.598074608904934, "grad_norm": 3.6802306175231934, "learning_rate": 0.0001995585994142232, "loss": 1.9793, "step": 29900 }, { "epoch": 3.5992779783393503, "grad_norm": 5.864447116851807, "learning_rate": 0.00019955824229715293, "loss": 1.8902, "step": 29910 }, { "epoch": 3.6004813477737665, "grad_norm": 5.455096244812012, "learning_rate": 0.00019955788503599732, "loss": 1.9929, "step": 29920 }, { "epoch": 3.601684717208183, "grad_norm": 8.159369468688965, "learning_rate": 0.00019955752763075685, "loss": 1.8808, "step": 29930 }, { "epoch": 3.6028880866425994, "grad_norm": 4.807581901550293, "learning_rate": 0.00019955717008143204, "loss": 1.7032, "step": 29940 }, { "epoch": 3.6040914560770156, "grad_norm": 4.044142723083496, "learning_rate": 0.00019955681238802345, "loss": 1.8653, "step": 29950 }, { "epoch": 3.605294825511432, "grad_norm": 6.339328765869141, "learning_rate": 0.0001995564545505316, "loss": 1.859, "step": 29960 }, { "epoch": 3.606498194945848, "grad_norm": 4.411223888397217, "learning_rate": 0.00019955609656895694, "loss": 1.7288, "step": 29970 }, { "epoch": 3.607701564380265, "grad_norm": 7.663251876831055, "learning_rate": 0.00019955573844330005, "loss": 1.7244, "step": 29980 }, { "epoch": 3.608904933814681, "grad_norm": 5.699988842010498, "learning_rate": 0.0001995553801735614, "loss": 1.6705, "step": 29990 }, { "epoch": 3.6101083032490973, "grad_norm": 4.149991989135742, "learning_rate": 0.00019955502175974157, "loss": 1.8152, "step": 30000 }, { "epoch": 3.611311672683514, "grad_norm": 6.543876647949219, "learning_rate": 0.00019955466320184103, "loss": 1.7618, "step": 30010 }, { "epoch": 3.61251504211793, "grad_norm": 4.540354251861572, "learning_rate": 0.0001995543044998603, "loss": 1.9189, "step": 30020 }, { "epoch": 3.6137184115523464, "grad_norm": 8.781855583190918, "learning_rate": 0.00019955394565379991, "loss": 1.7893, "step": 30030 }, { "epoch": 3.614921780986763, "grad_norm": 4.790679454803467, "learning_rate": 0.0001995535866636604, "loss": 1.7217, "step": 30040 }, { "epoch": 3.6161251504211793, "grad_norm": 4.0239577293396, "learning_rate": 0.00019955322752944227, "loss": 1.8524, "step": 30050 }, { "epoch": 3.6173285198555956, "grad_norm": 5.290555000305176, "learning_rate": 0.00019955286825114606, "loss": 1.616, "step": 30060 }, { "epoch": 3.6185318892900122, "grad_norm": 4.982362747192383, "learning_rate": 0.00019955250882877222, "loss": 1.7095, "step": 30070 }, { "epoch": 3.6197352587244285, "grad_norm": 6.728891849517822, "learning_rate": 0.00019955214926232136, "loss": 1.4373, "step": 30080 }, { "epoch": 3.6209386281588447, "grad_norm": 4.579793453216553, "learning_rate": 0.00019955178955179394, "loss": 1.962, "step": 30090 }, { "epoch": 3.6221419975932614, "grad_norm": 4.157637596130371, "learning_rate": 0.00019955142969719052, "loss": 1.8267, "step": 30100 }, { "epoch": 3.6233453670276776, "grad_norm": 5.86622953414917, "learning_rate": 0.00019955106969851157, "loss": 1.6419, "step": 30110 }, { "epoch": 3.624548736462094, "grad_norm": 4.5456862449646, "learning_rate": 0.0001995507095557577, "loss": 1.7266, "step": 30120 }, { "epoch": 3.62575210589651, "grad_norm": 9.584850311279297, "learning_rate": 0.00019955034926892932, "loss": 1.8276, "step": 30130 }, { "epoch": 3.6269554753309263, "grad_norm": 5.99807596206665, "learning_rate": 0.00019954998883802704, "loss": 1.7605, "step": 30140 }, { "epoch": 3.628158844765343, "grad_norm": 4.4949421882629395, "learning_rate": 0.00019954962826305135, "loss": 1.9129, "step": 30150 }, { "epoch": 3.6293622141997592, "grad_norm": 6.591169834136963, "learning_rate": 0.00019954926754400275, "loss": 1.8907, "step": 30160 }, { "epoch": 3.6305655836341755, "grad_norm": 5.168471813201904, "learning_rate": 0.00019954890668088176, "loss": 1.6874, "step": 30170 }, { "epoch": 3.631768953068592, "grad_norm": 6.996469974517822, "learning_rate": 0.00019954854567368898, "loss": 1.7469, "step": 30180 }, { "epoch": 3.6329723225030084, "grad_norm": 5.0457048416137695, "learning_rate": 0.00019954818452242484, "loss": 1.7064, "step": 30190 }, { "epoch": 3.6341756919374246, "grad_norm": 4.4163055419921875, "learning_rate": 0.0001995478232270899, "loss": 1.7586, "step": 30200 }, { "epoch": 3.6353790613718413, "grad_norm": 7.42832612991333, "learning_rate": 0.0001995474617876847, "loss": 1.8413, "step": 30210 }, { "epoch": 3.6365824308062575, "grad_norm": 4.629641056060791, "learning_rate": 0.00019954710020420974, "loss": 1.7846, "step": 30220 }, { "epoch": 3.6377858002406738, "grad_norm": 7.234807968139648, "learning_rate": 0.00019954673847666555, "loss": 1.7984, "step": 30230 }, { "epoch": 3.6389891696750905, "grad_norm": 5.725372314453125, "learning_rate": 0.00019954637660505263, "loss": 1.7409, "step": 30240 }, { "epoch": 3.6401925391095067, "grad_norm": 3.353891134262085, "learning_rate": 0.00019954601458937156, "loss": 1.5833, "step": 30250 }, { "epoch": 3.641395908543923, "grad_norm": 6.6229753494262695, "learning_rate": 0.0001995456524296228, "loss": 1.6474, "step": 30260 }, { "epoch": 3.6425992779783396, "grad_norm": 4.420374393463135, "learning_rate": 0.00019954529012580692, "loss": 1.7998, "step": 30270 }, { "epoch": 3.643802647412756, "grad_norm": 7.485829830169678, "learning_rate": 0.00019954492767792443, "loss": 1.8961, "step": 30280 }, { "epoch": 3.645006016847172, "grad_norm": 4.7207465171813965, "learning_rate": 0.00019954456508597585, "loss": 1.4601, "step": 30290 }, { "epoch": 3.6462093862815883, "grad_norm": 4.8101606369018555, "learning_rate": 0.00019954420234996172, "loss": 1.8445, "step": 30300 }, { "epoch": 3.6474127557160045, "grad_norm": 5.383038520812988, "learning_rate": 0.00019954383946988252, "loss": 1.91, "step": 30310 }, { "epoch": 3.648616125150421, "grad_norm": 4.799562454223633, "learning_rate": 0.00019954347644573884, "loss": 1.8472, "step": 30320 }, { "epoch": 3.6498194945848375, "grad_norm": 8.128114700317383, "learning_rate": 0.00019954311327753115, "loss": 1.7467, "step": 30330 }, { "epoch": 3.6510228640192537, "grad_norm": 5.222129821777344, "learning_rate": 0.00019954274996526001, "loss": 1.6014, "step": 30340 }, { "epoch": 3.6522262334536704, "grad_norm": 4.164952278137207, "learning_rate": 0.00019954238650892592, "loss": 1.7273, "step": 30350 }, { "epoch": 3.6534296028880866, "grad_norm": 6.184064865112305, "learning_rate": 0.00019954202290852947, "loss": 2.0273, "step": 30360 }, { "epoch": 3.654632972322503, "grad_norm": 4.962084770202637, "learning_rate": 0.00019954165916407108, "loss": 1.6934, "step": 30370 }, { "epoch": 3.6558363417569195, "grad_norm": 7.644177436828613, "learning_rate": 0.00019954129527555134, "loss": 1.8554, "step": 30380 }, { "epoch": 3.6570397111913358, "grad_norm": 5.701297283172607, "learning_rate": 0.0001995409312429708, "loss": 1.7282, "step": 30390 }, { "epoch": 3.658243080625752, "grad_norm": 3.3246676921844482, "learning_rate": 0.00019954056706632993, "loss": 2.1334, "step": 30400 }, { "epoch": 3.6594464500601687, "grad_norm": 7.3835859298706055, "learning_rate": 0.0001995402027456293, "loss": 1.8398, "step": 30410 }, { "epoch": 3.660649819494585, "grad_norm": 4.011591911315918, "learning_rate": 0.0001995398382808694, "loss": 1.6743, "step": 30420 }, { "epoch": 3.661853188929001, "grad_norm": 5.709569454193115, "learning_rate": 0.0001995394736720508, "loss": 1.7296, "step": 30430 }, { "epoch": 3.663056558363418, "grad_norm": 4.6262712478637695, "learning_rate": 0.000199539108919174, "loss": 1.8377, "step": 30440 }, { "epoch": 3.664259927797834, "grad_norm": 4.076691627502441, "learning_rate": 0.00019953874402223954, "loss": 1.6846, "step": 30450 }, { "epoch": 3.6654632972322503, "grad_norm": 5.821899890899658, "learning_rate": 0.0001995383789812479, "loss": 2.0932, "step": 30460 }, { "epoch": 3.6666666666666665, "grad_norm": 4.39022159576416, "learning_rate": 0.0001995380137961997, "loss": 1.7241, "step": 30470 }, { "epoch": 3.667870036101083, "grad_norm": 6.937319755554199, "learning_rate": 0.0001995376484670954, "loss": 1.8848, "step": 30480 }, { "epoch": 3.6690734055354994, "grad_norm": 6.681645393371582, "learning_rate": 0.00019953728299393556, "loss": 1.7457, "step": 30490 }, { "epoch": 3.6702767749699157, "grad_norm": 4.0746750831604, "learning_rate": 0.0001995369173767207, "loss": 1.7131, "step": 30500 }, { "epoch": 3.671480144404332, "grad_norm": 5.25900411605835, "learning_rate": 0.0001995365516154513, "loss": 1.8158, "step": 30510 }, { "epoch": 3.6726835138387486, "grad_norm": 4.574855804443359, "learning_rate": 0.000199536185710128, "loss": 1.7533, "step": 30520 }, { "epoch": 3.673886883273165, "grad_norm": 7.876123905181885, "learning_rate": 0.0001995358196607512, "loss": 1.7022, "step": 30530 }, { "epoch": 3.675090252707581, "grad_norm": 6.505460739135742, "learning_rate": 0.00019953545346732153, "loss": 1.7364, "step": 30540 }, { "epoch": 3.6762936221419977, "grad_norm": 3.9832680225372314, "learning_rate": 0.0001995350871298395, "loss": 1.7994, "step": 30550 }, { "epoch": 3.677496991576414, "grad_norm": 6.498528480529785, "learning_rate": 0.00019953472064830561, "loss": 1.6826, "step": 30560 }, { "epoch": 3.67870036101083, "grad_norm": 5.466087341308594, "learning_rate": 0.00019953435402272042, "loss": 1.6335, "step": 30570 }, { "epoch": 3.679903730445247, "grad_norm": 8.307892799377441, "learning_rate": 0.0001995339872530844, "loss": 1.7924, "step": 30580 }, { "epoch": 3.681107099879663, "grad_norm": 5.839925289154053, "learning_rate": 0.00019953362033939818, "loss": 1.6798, "step": 30590 }, { "epoch": 3.6823104693140793, "grad_norm": 3.620140314102173, "learning_rate": 0.0001995332532816622, "loss": 1.7933, "step": 30600 }, { "epoch": 3.683513838748496, "grad_norm": 7.285762310028076, "learning_rate": 0.00019953288607987705, "loss": 1.7459, "step": 30610 }, { "epoch": 3.6847172081829123, "grad_norm": 3.719926357269287, "learning_rate": 0.0001995325187340432, "loss": 1.7365, "step": 30620 }, { "epoch": 3.6859205776173285, "grad_norm": 9.426250457763672, "learning_rate": 0.00019953215124416125, "loss": 2.0286, "step": 30630 }, { "epoch": 3.6871239470517447, "grad_norm": 5.778400897979736, "learning_rate": 0.0001995317836102317, "loss": 1.9127, "step": 30640 }, { "epoch": 3.6883273164861614, "grad_norm": 3.7114202976226807, "learning_rate": 0.0001995314158322551, "loss": 1.6488, "step": 30650 }, { "epoch": 3.6895306859205776, "grad_norm": 5.503793239593506, "learning_rate": 0.00019953104791023196, "loss": 1.6922, "step": 30660 }, { "epoch": 3.690734055354994, "grad_norm": 5.489040374755859, "learning_rate": 0.00019953067984416283, "loss": 1.787, "step": 30670 }, { "epoch": 3.69193742478941, "grad_norm": 7.977235317230225, "learning_rate": 0.0001995303116340482, "loss": 2.02, "step": 30680 }, { "epoch": 3.693140794223827, "grad_norm": 5.784267425537109, "learning_rate": 0.00019952994327988868, "loss": 1.9409, "step": 30690 }, { "epoch": 3.694344163658243, "grad_norm": 3.913222551345825, "learning_rate": 0.00019952957478168472, "loss": 1.8691, "step": 30700 }, { "epoch": 3.6955475330926593, "grad_norm": 8.454694747924805, "learning_rate": 0.00019952920613943692, "loss": 1.7532, "step": 30710 }, { "epoch": 3.696750902527076, "grad_norm": 5.654797554016113, "learning_rate": 0.00019952883735314576, "loss": 1.7633, "step": 30720 }, { "epoch": 3.697954271961492, "grad_norm": 10.2830171585083, "learning_rate": 0.0001995284684228118, "loss": 1.9116, "step": 30730 }, { "epoch": 3.6991576413959084, "grad_norm": 8.618124961853027, "learning_rate": 0.0001995280993484356, "loss": 1.8392, "step": 30740 }, { "epoch": 3.700361010830325, "grad_norm": 4.4848480224609375, "learning_rate": 0.00019952773013001766, "loss": 1.8059, "step": 30750 }, { "epoch": 3.7015643802647413, "grad_norm": 7.338475227355957, "learning_rate": 0.0001995273607675585, "loss": 1.9288, "step": 30760 }, { "epoch": 3.7027677496991576, "grad_norm": 5.886021614074707, "learning_rate": 0.0001995269912610587, "loss": 1.8206, "step": 30770 }, { "epoch": 3.7039711191335742, "grad_norm": 7.312557697296143, "learning_rate": 0.00019952662161051876, "loss": 2.0556, "step": 30780 }, { "epoch": 3.7051744885679905, "grad_norm": 5.307499885559082, "learning_rate": 0.00019952625181593922, "loss": 1.8527, "step": 30790 }, { "epoch": 3.7063778580024067, "grad_norm": 4.026417255401611, "learning_rate": 0.00019952588187732064, "loss": 1.6672, "step": 30800 }, { "epoch": 3.707581227436823, "grad_norm": 8.716653823852539, "learning_rate": 0.00019952551179466352, "loss": 1.7917, "step": 30810 }, { "epoch": 3.7087845968712396, "grad_norm": 4.396418571472168, "learning_rate": 0.00019952514156796843, "loss": 2.0722, "step": 30820 }, { "epoch": 3.709987966305656, "grad_norm": 8.340319633483887, "learning_rate": 0.00019952477119723587, "loss": 1.9754, "step": 30830 }, { "epoch": 3.711191335740072, "grad_norm": 6.628535747528076, "learning_rate": 0.0001995244006824664, "loss": 1.8696, "step": 30840 }, { "epoch": 3.7123947051744883, "grad_norm": 3.324138879776001, "learning_rate": 0.00019952403002366056, "loss": 1.6401, "step": 30850 }, { "epoch": 3.713598074608905, "grad_norm": 7.406821250915527, "learning_rate": 0.00019952365922081886, "loss": 1.7917, "step": 30860 }, { "epoch": 3.7148014440433212, "grad_norm": 4.478430271148682, "learning_rate": 0.00019952328827394186, "loss": 1.7915, "step": 30870 }, { "epoch": 3.7160048134777375, "grad_norm": 7.9550089836120605, "learning_rate": 0.00019952291718303012, "loss": 1.9478, "step": 30880 }, { "epoch": 3.717208182912154, "grad_norm": 6.367208957672119, "learning_rate": 0.0001995225459480841, "loss": 1.8459, "step": 30890 }, { "epoch": 3.7184115523465704, "grad_norm": 4.044086933135986, "learning_rate": 0.00019952217456910442, "loss": 1.8967, "step": 30900 }, { "epoch": 3.7196149217809866, "grad_norm": 6.0452375411987305, "learning_rate": 0.00019952180304609157, "loss": 2.1581, "step": 30910 }, { "epoch": 3.7208182912154033, "grad_norm": 4.80734395980835, "learning_rate": 0.0001995214313790461, "loss": 1.827, "step": 30920 }, { "epoch": 3.7220216606498195, "grad_norm": 8.116764068603516, "learning_rate": 0.00019952105956796853, "loss": 1.9225, "step": 30930 }, { "epoch": 3.7232250300842358, "grad_norm": 5.5072526931762695, "learning_rate": 0.00019952068761285943, "loss": 1.7358, "step": 30940 }, { "epoch": 3.7244283995186525, "grad_norm": 3.0707244873046875, "learning_rate": 0.00019952031551371933, "loss": 1.9061, "step": 30950 }, { "epoch": 3.7256317689530687, "grad_norm": 5.494694709777832, "learning_rate": 0.00019951994327054875, "loss": 1.9599, "step": 30960 }, { "epoch": 3.726835138387485, "grad_norm": 4.31592321395874, "learning_rate": 0.00019951957088334824, "loss": 1.8695, "step": 30970 }, { "epoch": 3.728038507821901, "grad_norm": 7.593077182769775, "learning_rate": 0.00019951919835211837, "loss": 2.0167, "step": 30980 }, { "epoch": 3.729241877256318, "grad_norm": 5.866640567779541, "learning_rate": 0.00019951882567685963, "loss": 1.8846, "step": 30990 }, { "epoch": 3.730445246690734, "grad_norm": 5.204739570617676, "learning_rate": 0.00019951845285757255, "loss": 1.7558, "step": 31000 }, { "epoch": 3.7316486161251503, "grad_norm": 6.713199615478516, "learning_rate": 0.00019951807989425775, "loss": 1.8385, "step": 31010 }, { "epoch": 3.7328519855595665, "grad_norm": 4.8455939292907715, "learning_rate": 0.0001995177067869157, "loss": 2.009, "step": 31020 }, { "epoch": 3.7340553549939832, "grad_norm": 8.778068542480469, "learning_rate": 0.00019951733353554697, "loss": 1.8584, "step": 31030 }, { "epoch": 3.7352587244283995, "grad_norm": 7.0389180183410645, "learning_rate": 0.00019951696014015204, "loss": 1.7476, "step": 31040 }, { "epoch": 3.7364620938628157, "grad_norm": 3.5129287242889404, "learning_rate": 0.00019951658660073154, "loss": 1.8695, "step": 31050 }, { "epoch": 3.7376654632972324, "grad_norm": 7.025370121002197, "learning_rate": 0.00019951621291728597, "loss": 1.8708, "step": 31060 }, { "epoch": 3.7388688327316486, "grad_norm": 5.776852607727051, "learning_rate": 0.00019951583908981585, "loss": 1.7144, "step": 31070 }, { "epoch": 3.740072202166065, "grad_norm": 8.288241386413574, "learning_rate": 0.00019951546511832175, "loss": 1.8474, "step": 31080 }, { "epoch": 3.7412755716004815, "grad_norm": 5.879149436950684, "learning_rate": 0.0001995150910028042, "loss": 1.7656, "step": 31090 }, { "epoch": 3.7424789410348978, "grad_norm": 3.3691365718841553, "learning_rate": 0.00019951471674326375, "loss": 1.7449, "step": 31100 }, { "epoch": 3.743682310469314, "grad_norm": 7.728209972381592, "learning_rate": 0.0001995143423397009, "loss": 1.868, "step": 31110 }, { "epoch": 3.7448856799037307, "grad_norm": 4.389208793640137, "learning_rate": 0.0001995139677921163, "loss": 1.7713, "step": 31120 }, { "epoch": 3.746089049338147, "grad_norm": 8.326992988586426, "learning_rate": 0.00019951359310051037, "loss": 2.0934, "step": 31130 }, { "epoch": 3.747292418772563, "grad_norm": 5.7215657234191895, "learning_rate": 0.0001995132182648837, "loss": 1.6712, "step": 31140 }, { "epoch": 3.74849578820698, "grad_norm": 4.79075813293457, "learning_rate": 0.00019951284328523686, "loss": 1.7695, "step": 31150 }, { "epoch": 3.749699157641396, "grad_norm": 6.803378105163574, "learning_rate": 0.00019951246816157035, "loss": 2.0639, "step": 31160 }, { "epoch": 3.7509025270758123, "grad_norm": 7.559670925140381, "learning_rate": 0.00019951209289388472, "loss": 1.8505, "step": 31170 }, { "epoch": 3.7521058965102285, "grad_norm": 7.970536231994629, "learning_rate": 0.00019951171748218053, "loss": 1.7489, "step": 31180 }, { "epoch": 3.7533092659446448, "grad_norm": 5.386301040649414, "learning_rate": 0.00019951134192645832, "loss": 1.8248, "step": 31190 }, { "epoch": 3.7545126353790614, "grad_norm": 4.138087749481201, "learning_rate": 0.00019951096622671864, "loss": 1.8383, "step": 31200 }, { "epoch": 3.7557160048134777, "grad_norm": 6.622817516326904, "learning_rate": 0.00019951059038296202, "loss": 1.9262, "step": 31210 }, { "epoch": 3.756919374247894, "grad_norm": 4.338325023651123, "learning_rate": 0.00019951021439518899, "loss": 1.5994, "step": 31220 }, { "epoch": 3.7581227436823106, "grad_norm": 7.29240608215332, "learning_rate": 0.00019950983826340013, "loss": 1.7926, "step": 31230 }, { "epoch": 3.759326113116727, "grad_norm": 5.991332054138184, "learning_rate": 0.00019950946198759594, "loss": 1.7988, "step": 31240 }, { "epoch": 3.760529482551143, "grad_norm": 3.630941152572632, "learning_rate": 0.00019950908556777704, "loss": 1.8122, "step": 31250 }, { "epoch": 3.7617328519855597, "grad_norm": 6.232128620147705, "learning_rate": 0.0001995087090039439, "loss": 2.0054, "step": 31260 }, { "epoch": 3.762936221419976, "grad_norm": 4.277485370635986, "learning_rate": 0.00019950833229609707, "loss": 1.8732, "step": 31270 }, { "epoch": 3.764139590854392, "grad_norm": 7.671878814697266, "learning_rate": 0.00019950795544423713, "loss": 1.8111, "step": 31280 }, { "epoch": 3.765342960288809, "grad_norm": 5.963305950164795, "learning_rate": 0.00019950757844836462, "loss": 1.7443, "step": 31290 }, { "epoch": 3.766546329723225, "grad_norm": 3.8424763679504395, "learning_rate": 0.00019950720130848003, "loss": 1.8689, "step": 31300 }, { "epoch": 3.7677496991576414, "grad_norm": 7.679384708404541, "learning_rate": 0.000199506824024584, "loss": 1.4353, "step": 31310 }, { "epoch": 3.768953068592058, "grad_norm": 4.970163822174072, "learning_rate": 0.00019950644659667702, "loss": 1.7932, "step": 31320 }, { "epoch": 3.7701564380264743, "grad_norm": 7.572193145751953, "learning_rate": 0.00019950606902475966, "loss": 2.0589, "step": 31330 }, { "epoch": 3.7713598074608905, "grad_norm": 5.745700836181641, "learning_rate": 0.0001995056913088324, "loss": 1.8469, "step": 31340 }, { "epoch": 3.7725631768953067, "grad_norm": 4.959266185760498, "learning_rate": 0.00019950531344889588, "loss": 1.7171, "step": 31350 }, { "epoch": 3.773766546329723, "grad_norm": 6.040401935577393, "learning_rate": 0.00019950493544495062, "loss": 1.8227, "step": 31360 }, { "epoch": 3.7749699157641396, "grad_norm": 5.258152008056641, "learning_rate": 0.0001995045572969971, "loss": 1.8235, "step": 31370 }, { "epoch": 3.776173285198556, "grad_norm": 7.423575401306152, "learning_rate": 0.00019950417900503595, "loss": 1.9273, "step": 31380 }, { "epoch": 3.777376654632972, "grad_norm": 6.263308048248291, "learning_rate": 0.00019950380056906768, "loss": 1.8643, "step": 31390 }, { "epoch": 3.778580024067389, "grad_norm": 4.115621089935303, "learning_rate": 0.00019950342198909283, "loss": 1.8648, "step": 31400 }, { "epoch": 3.779783393501805, "grad_norm": 6.392474174499512, "learning_rate": 0.00019950304326511197, "loss": 1.6777, "step": 31410 }, { "epoch": 3.7809867629362213, "grad_norm": 6.091777324676514, "learning_rate": 0.00019950266439712567, "loss": 1.8839, "step": 31420 }, { "epoch": 3.782190132370638, "grad_norm": 7.052060127258301, "learning_rate": 0.00019950228538513442, "loss": 1.8583, "step": 31430 }, { "epoch": 3.783393501805054, "grad_norm": 5.742568016052246, "learning_rate": 0.0001995019062291388, "loss": 1.8722, "step": 31440 }, { "epoch": 3.7845968712394704, "grad_norm": 3.930863380432129, "learning_rate": 0.00019950152692913936, "loss": 1.8836, "step": 31450 }, { "epoch": 3.785800240673887, "grad_norm": 9.043641090393066, "learning_rate": 0.00019950114748513664, "loss": 1.7099, "step": 31460 }, { "epoch": 3.7870036101083033, "grad_norm": 6.021909713745117, "learning_rate": 0.0001995007678971312, "loss": 1.8497, "step": 31470 }, { "epoch": 3.7882069795427196, "grad_norm": 8.166851043701172, "learning_rate": 0.00019950038816512358, "loss": 1.8112, "step": 31480 }, { "epoch": 3.7894103489771362, "grad_norm": 5.161540985107422, "learning_rate": 0.0001995000082891143, "loss": 1.5989, "step": 31490 }, { "epoch": 3.7906137184115525, "grad_norm": 4.264873027801514, "learning_rate": 0.000199499628269104, "loss": 1.8338, "step": 31500 }, { "epoch": 3.7918170878459687, "grad_norm": 7.2630181312561035, "learning_rate": 0.00019949924810509315, "loss": 1.6914, "step": 31510 }, { "epoch": 3.793020457280385, "grad_norm": 5.075514316558838, "learning_rate": 0.0001994988677970823, "loss": 1.8512, "step": 31520 }, { "epoch": 3.794223826714801, "grad_norm": 10.236590385437012, "learning_rate": 0.00019949848734507203, "loss": 1.8832, "step": 31530 }, { "epoch": 3.795427196149218, "grad_norm": 5.291720390319824, "learning_rate": 0.0001994981067490629, "loss": 1.7179, "step": 31540 }, { "epoch": 3.796630565583634, "grad_norm": 3.2387924194335938, "learning_rate": 0.00019949772600905545, "loss": 1.8682, "step": 31550 }, { "epoch": 3.7978339350180503, "grad_norm": 6.901940822601318, "learning_rate": 0.0001994973451250502, "loss": 1.9452, "step": 31560 }, { "epoch": 3.799037304452467, "grad_norm": 5.423299312591553, "learning_rate": 0.00019949696409704774, "loss": 1.9803, "step": 31570 }, { "epoch": 3.8002406738868832, "grad_norm": 8.527501106262207, "learning_rate": 0.0001994965829250486, "loss": 2.028, "step": 31580 }, { "epoch": 3.8014440433212995, "grad_norm": 5.666082859039307, "learning_rate": 0.00019949620160905336, "loss": 1.9733, "step": 31590 }, { "epoch": 3.802647412755716, "grad_norm": 3.896209239959717, "learning_rate": 0.00019949582014906251, "loss": 1.7397, "step": 31600 }, { "epoch": 3.8038507821901324, "grad_norm": 5.688864231109619, "learning_rate": 0.0001994954385450767, "loss": 1.999, "step": 31610 }, { "epoch": 3.8050541516245486, "grad_norm": 4.99901819229126, "learning_rate": 0.0001994950567970964, "loss": 1.8133, "step": 31620 }, { "epoch": 3.8062575210589653, "grad_norm": 9.179929733276367, "learning_rate": 0.00019949467490512217, "loss": 1.9697, "step": 31630 }, { "epoch": 3.8074608904933815, "grad_norm": 5.502438545227051, "learning_rate": 0.00019949429286915458, "loss": 1.7058, "step": 31640 }, { "epoch": 3.808664259927798, "grad_norm": 4.436057090759277, "learning_rate": 0.00019949391068919422, "loss": 1.6759, "step": 31650 }, { "epoch": 3.8098676293622145, "grad_norm": 5.866961479187012, "learning_rate": 0.00019949352836524158, "loss": 1.963, "step": 31660 }, { "epoch": 3.8110709987966307, "grad_norm": 5.318753719329834, "learning_rate": 0.00019949314589729725, "loss": 1.8176, "step": 31670 }, { "epoch": 3.812274368231047, "grad_norm": 8.733996391296387, "learning_rate": 0.00019949276328536176, "loss": 1.7677, "step": 31680 }, { "epoch": 3.813477737665463, "grad_norm": 4.764383792877197, "learning_rate": 0.00019949238052943571, "loss": 1.7102, "step": 31690 }, { "epoch": 3.8146811070998794, "grad_norm": 4.100124359130859, "learning_rate": 0.0001994919976295196, "loss": 1.9146, "step": 31700 }, { "epoch": 3.815884476534296, "grad_norm": 5.41672945022583, "learning_rate": 0.000199491614585614, "loss": 1.7753, "step": 31710 }, { "epoch": 3.8170878459687123, "grad_norm": 4.436059951782227, "learning_rate": 0.00019949123139771947, "loss": 1.6387, "step": 31720 }, { "epoch": 3.8182912154031285, "grad_norm": 7.260838031768799, "learning_rate": 0.00019949084806583658, "loss": 1.8845, "step": 31730 }, { "epoch": 3.8194945848375452, "grad_norm": 5.721841812133789, "learning_rate": 0.00019949046458996587, "loss": 1.7196, "step": 31740 }, { "epoch": 3.8206979542719615, "grad_norm": 3.769033908843994, "learning_rate": 0.00019949008097010787, "loss": 1.8924, "step": 31750 }, { "epoch": 3.8219013237063777, "grad_norm": 7.916036605834961, "learning_rate": 0.00019948969720626317, "loss": 1.8181, "step": 31760 }, { "epoch": 3.8231046931407944, "grad_norm": 5.001548767089844, "learning_rate": 0.00019948931329843232, "loss": 1.8001, "step": 31770 }, { "epoch": 3.8243080625752106, "grad_norm": 7.248650074005127, "learning_rate": 0.00019948892924661585, "loss": 1.9166, "step": 31780 }, { "epoch": 3.825511432009627, "grad_norm": 4.9142913818359375, "learning_rate": 0.00019948854505081438, "loss": 1.779, "step": 31790 }, { "epoch": 3.8267148014440435, "grad_norm": 4.243422985076904, "learning_rate": 0.00019948816071102839, "loss": 1.8412, "step": 31800 }, { "epoch": 3.8279181708784598, "grad_norm": 5.534666061401367, "learning_rate": 0.0001994877762272585, "loss": 1.9305, "step": 31810 }, { "epoch": 3.829121540312876, "grad_norm": 5.762303352355957, "learning_rate": 0.0001994873915995052, "loss": 1.7955, "step": 31820 }, { "epoch": 3.8303249097472927, "grad_norm": 8.31263256072998, "learning_rate": 0.00019948700682776907, "loss": 2.0227, "step": 31830 }, { "epoch": 3.831528279181709, "grad_norm": 6.127068996429443, "learning_rate": 0.00019948662191205071, "loss": 2.045, "step": 31840 }, { "epoch": 3.832731648616125, "grad_norm": 4.620586395263672, "learning_rate": 0.00019948623685235063, "loss": 1.9066, "step": 31850 }, { "epoch": 3.8339350180505414, "grad_norm": 6.38811731338501, "learning_rate": 0.00019948585164866942, "loss": 1.778, "step": 31860 }, { "epoch": 3.8351383874849576, "grad_norm": 4.971603870391846, "learning_rate": 0.0001994854663010076, "loss": 1.789, "step": 31870 }, { "epoch": 3.8363417569193743, "grad_norm": 6.25272798538208, "learning_rate": 0.0001994850808093658, "loss": 2.0515, "step": 31880 }, { "epoch": 3.8375451263537905, "grad_norm": 7.055475234985352, "learning_rate": 0.00019948469517374446, "loss": 1.7579, "step": 31890 }, { "epoch": 3.8387484957882068, "grad_norm": 4.127619743347168, "learning_rate": 0.00019948430939414422, "loss": 1.9073, "step": 31900 }, { "epoch": 3.8399518652226234, "grad_norm": 6.342318534851074, "learning_rate": 0.00019948392347056567, "loss": 1.6187, "step": 31910 }, { "epoch": 3.8411552346570397, "grad_norm": 4.762055397033691, "learning_rate": 0.00019948353740300925, "loss": 1.8488, "step": 31920 }, { "epoch": 3.842358604091456, "grad_norm": 7.973830223083496, "learning_rate": 0.00019948315119147565, "loss": 1.7017, "step": 31930 }, { "epoch": 3.8435619735258726, "grad_norm": 5.887857437133789, "learning_rate": 0.0001994827648359653, "loss": 1.9345, "step": 31940 }, { "epoch": 3.844765342960289, "grad_norm": 4.972349166870117, "learning_rate": 0.00019948237833647888, "loss": 1.9818, "step": 31950 }, { "epoch": 3.845968712394705, "grad_norm": 5.498665809631348, "learning_rate": 0.0001994819916930169, "loss": 1.75, "step": 31960 }, { "epoch": 3.8471720818291217, "grad_norm": 4.5480637550354, "learning_rate": 0.0001994816049055799, "loss": 1.4933, "step": 31970 }, { "epoch": 3.848375451263538, "grad_norm": 7.472859859466553, "learning_rate": 0.00019948121797416844, "loss": 1.9189, "step": 31980 }, { "epoch": 3.849578820697954, "grad_norm": 6.643512725830078, "learning_rate": 0.00019948083089878313, "loss": 2.0863, "step": 31990 }, { "epoch": 3.850782190132371, "grad_norm": 4.6981635093688965, "learning_rate": 0.00019948044367942448, "loss": 2.0633, "step": 32000 }, { "epoch": 3.851985559566787, "grad_norm": 6.2199177742004395, "learning_rate": 0.00019948005631609307, "loss": 1.9438, "step": 32010 }, { "epoch": 3.8531889290012034, "grad_norm": 6.112263202667236, "learning_rate": 0.00019947966880878948, "loss": 1.9433, "step": 32020 }, { "epoch": 3.8543922984356196, "grad_norm": 7.201168060302734, "learning_rate": 0.00019947928115751423, "loss": 1.9611, "step": 32030 }, { "epoch": 3.855595667870036, "grad_norm": 5.691432952880859, "learning_rate": 0.00019947889336226787, "loss": 1.6684, "step": 32040 }, { "epoch": 3.8567990373044525, "grad_norm": 4.065805435180664, "learning_rate": 0.00019947850542305102, "loss": 1.7571, "step": 32050 }, { "epoch": 3.8580024067388687, "grad_norm": 6.2115702629089355, "learning_rate": 0.0001994781173398642, "loss": 1.9781, "step": 32060 }, { "epoch": 3.859205776173285, "grad_norm": 4.214781284332275, "learning_rate": 0.000199477729112708, "loss": 1.8943, "step": 32070 }, { "epoch": 3.8604091456077017, "grad_norm": 9.22004222869873, "learning_rate": 0.00019947734074158294, "loss": 1.7834, "step": 32080 }, { "epoch": 3.861612515042118, "grad_norm": 5.543321132659912, "learning_rate": 0.00019947695222648966, "loss": 1.9621, "step": 32090 }, { "epoch": 3.862815884476534, "grad_norm": 4.949936389923096, "learning_rate": 0.00019947656356742864, "loss": 1.9948, "step": 32100 }, { "epoch": 3.864019253910951, "grad_norm": 7.064183235168457, "learning_rate": 0.00019947617476440046, "loss": 2.0271, "step": 32110 }, { "epoch": 3.865222623345367, "grad_norm": 6.2395758628845215, "learning_rate": 0.0001994757858174057, "loss": 1.789, "step": 32120 }, { "epoch": 3.8664259927797833, "grad_norm": 7.911624431610107, "learning_rate": 0.00019947539672644492, "loss": 1.8299, "step": 32130 }, { "epoch": 3.8676293622142, "grad_norm": 5.5708770751953125, "learning_rate": 0.00019947500749151867, "loss": 1.6924, "step": 32140 }, { "epoch": 3.868832731648616, "grad_norm": 3.6271274089813232, "learning_rate": 0.00019947461811262756, "loss": 1.753, "step": 32150 }, { "epoch": 3.8700361010830324, "grad_norm": 5.830916404724121, "learning_rate": 0.00019947422858977208, "loss": 1.6217, "step": 32160 }, { "epoch": 3.871239470517449, "grad_norm": 4.291306972503662, "learning_rate": 0.00019947383892295284, "loss": 1.7472, "step": 32170 }, { "epoch": 3.8724428399518653, "grad_norm": 7.956405162811279, "learning_rate": 0.00019947344911217042, "loss": 1.6967, "step": 32180 }, { "epoch": 3.8736462093862816, "grad_norm": 5.54548454284668, "learning_rate": 0.00019947305915742533, "loss": 1.8514, "step": 32190 }, { "epoch": 3.874849578820698, "grad_norm": 4.318104267120361, "learning_rate": 0.00019947266905871817, "loss": 1.8138, "step": 32200 }, { "epoch": 3.8760529482551145, "grad_norm": 7.319823741912842, "learning_rate": 0.0001994722788160495, "loss": 1.8414, "step": 32210 }, { "epoch": 3.8772563176895307, "grad_norm": 4.983766078948975, "learning_rate": 0.0001994718884294199, "loss": 1.814, "step": 32220 }, { "epoch": 3.878459687123947, "grad_norm": 7.071497440338135, "learning_rate": 0.00019947149789882991, "loss": 2.0585, "step": 32230 }, { "epoch": 3.879663056558363, "grad_norm": 6.389927864074707, "learning_rate": 0.00019947110722428007, "loss": 1.6976, "step": 32240 }, { "epoch": 3.88086642599278, "grad_norm": 3.7990963459014893, "learning_rate": 0.00019947071640577104, "loss": 1.9019, "step": 32250 }, { "epoch": 3.882069795427196, "grad_norm": 6.669363498687744, "learning_rate": 0.0001994703254433033, "loss": 1.8831, "step": 32260 }, { "epoch": 3.8832731648616123, "grad_norm": 3.8320515155792236, "learning_rate": 0.00019946993433687742, "loss": 1.9731, "step": 32270 }, { "epoch": 3.884476534296029, "grad_norm": 8.193338394165039, "learning_rate": 0.000199469543086494, "loss": 1.8038, "step": 32280 }, { "epoch": 3.8856799037304453, "grad_norm": 4.97507905960083, "learning_rate": 0.0001994691516921536, "loss": 1.7103, "step": 32290 }, { "epoch": 3.8868832731648615, "grad_norm": 4.077975749969482, "learning_rate": 0.00019946876015385678, "loss": 1.7446, "step": 32300 }, { "epoch": 3.888086642599278, "grad_norm": 6.96763801574707, "learning_rate": 0.0001994683684716041, "loss": 1.7259, "step": 32310 }, { "epoch": 3.8892900120336944, "grad_norm": 4.485548496246338, "learning_rate": 0.00019946797664539614, "loss": 1.8251, "step": 32320 }, { "epoch": 3.8904933814681106, "grad_norm": 7.93310546875, "learning_rate": 0.00019946758467523346, "loss": 1.7753, "step": 32330 }, { "epoch": 3.8916967509025273, "grad_norm": 5.288084983825684, "learning_rate": 0.00019946719256111662, "loss": 1.9221, "step": 32340 }, { "epoch": 3.8929001203369435, "grad_norm": 4.369314193725586, "learning_rate": 0.0001994668003030462, "loss": 1.6287, "step": 32350 }, { "epoch": 3.89410348977136, "grad_norm": 5.8581390380859375, "learning_rate": 0.00019946640790102275, "loss": 1.7752, "step": 32360 }, { "epoch": 3.895306859205776, "grad_norm": 4.442620754241943, "learning_rate": 0.00019946601535504686, "loss": 1.8486, "step": 32370 }, { "epoch": 3.8965102286401927, "grad_norm": 8.320189476013184, "learning_rate": 0.00019946562266511908, "loss": 1.9129, "step": 32380 }, { "epoch": 3.897713598074609, "grad_norm": 5.990233898162842, "learning_rate": 0.00019946522983124002, "loss": 1.9838, "step": 32390 }, { "epoch": 3.898916967509025, "grad_norm": 5.078760623931885, "learning_rate": 0.00019946483685341016, "loss": 1.6586, "step": 32400 }, { "epoch": 3.9001203369434414, "grad_norm": 6.948725700378418, "learning_rate": 0.00019946444373163016, "loss": 1.8155, "step": 32410 }, { "epoch": 3.901323706377858, "grad_norm": 4.908626079559326, "learning_rate": 0.00019946405046590056, "loss": 1.7441, "step": 32420 }, { "epoch": 3.9025270758122743, "grad_norm": 7.935828685760498, "learning_rate": 0.00019946365705622192, "loss": 2.0591, "step": 32430 }, { "epoch": 3.9037304452466906, "grad_norm": 5.426856994628906, "learning_rate": 0.00019946326350259478, "loss": 1.7126, "step": 32440 }, { "epoch": 3.9049338146811072, "grad_norm": 3.5557751655578613, "learning_rate": 0.00019946286980501978, "loss": 1.756, "step": 32450 }, { "epoch": 3.9061371841155235, "grad_norm": 7.084009647369385, "learning_rate": 0.0001994624759634974, "loss": 1.8829, "step": 32460 }, { "epoch": 3.9073405535499397, "grad_norm": 5.085668563842773, "learning_rate": 0.0001994620819780283, "loss": 1.7786, "step": 32470 }, { "epoch": 3.9085439229843564, "grad_norm": 8.720808982849121, "learning_rate": 0.000199461687848613, "loss": 1.9302, "step": 32480 }, { "epoch": 3.9097472924187726, "grad_norm": 6.374059200286865, "learning_rate": 0.0001994612935752521, "loss": 1.9572, "step": 32490 }, { "epoch": 3.910950661853189, "grad_norm": 4.396307468414307, "learning_rate": 0.00019946089915794612, "loss": 1.9987, "step": 32500 }, { "epoch": 3.9121540312876055, "grad_norm": 7.3977203369140625, "learning_rate": 0.0001994605045966957, "loss": 1.9049, "step": 32510 }, { "epoch": 3.9133574007220218, "grad_norm": 4.642151355743408, "learning_rate": 0.00019946010989150135, "loss": 1.7011, "step": 32520 }, { "epoch": 3.914560770156438, "grad_norm": 7.033011436462402, "learning_rate": 0.00019945971504236367, "loss": 2.227, "step": 32530 }, { "epoch": 3.9157641395908542, "grad_norm": 6.167843341827393, "learning_rate": 0.00019945932004928322, "loss": 1.8048, "step": 32540 }, { "epoch": 3.916967509025271, "grad_norm": 5.016860485076904, "learning_rate": 0.0001994589249122606, "loss": 1.6254, "step": 32550 }, { "epoch": 3.918170878459687, "grad_norm": 6.315822124481201, "learning_rate": 0.00019945852963129635, "loss": 1.6375, "step": 32560 }, { "epoch": 3.9193742478941034, "grad_norm": 4.872376441955566, "learning_rate": 0.00019945813420639102, "loss": 1.7956, "step": 32570 }, { "epoch": 3.9205776173285196, "grad_norm": 7.527656555175781, "learning_rate": 0.00019945773863754525, "loss": 1.8707, "step": 32580 }, { "epoch": 3.9217809867629363, "grad_norm": 4.640732765197754, "learning_rate": 0.00019945734292475958, "loss": 1.7628, "step": 32590 }, { "epoch": 3.9229843561973525, "grad_norm": 3.8397715091705322, "learning_rate": 0.00019945694706803455, "loss": 1.894, "step": 32600 }, { "epoch": 3.9241877256317688, "grad_norm": 6.057290077209473, "learning_rate": 0.00019945655106737078, "loss": 1.8933, "step": 32610 }, { "epoch": 3.9253910950661854, "grad_norm": 4.693942546844482, "learning_rate": 0.00019945615492276882, "loss": 1.867, "step": 32620 }, { "epoch": 3.9265944645006017, "grad_norm": 7.134082794189453, "learning_rate": 0.00019945575863422924, "loss": 1.6996, "step": 32630 }, { "epoch": 3.927797833935018, "grad_norm": 6.2517900466918945, "learning_rate": 0.00019945536220175266, "loss": 1.9585, "step": 32640 }, { "epoch": 3.9290012033694346, "grad_norm": 3.822658061981201, "learning_rate": 0.0001994549656253396, "loss": 1.7689, "step": 32650 }, { "epoch": 3.930204572803851, "grad_norm": 8.290013313293457, "learning_rate": 0.0001994545689049906, "loss": 1.6267, "step": 32660 }, { "epoch": 3.931407942238267, "grad_norm": 5.099239826202393, "learning_rate": 0.00019945417204070637, "loss": 1.8065, "step": 32670 }, { "epoch": 3.9326113116726837, "grad_norm": 7.6024861335754395, "learning_rate": 0.00019945377503248731, "loss": 1.982, "step": 32680 }, { "epoch": 3.9338146811071, "grad_norm": 4.7708659172058105, "learning_rate": 0.00019945337788033416, "loss": 1.8551, "step": 32690 }, { "epoch": 3.935018050541516, "grad_norm": 4.781246185302734, "learning_rate": 0.00019945298058424737, "loss": 1.9533, "step": 32700 }, { "epoch": 3.9362214199759324, "grad_norm": 5.206472396850586, "learning_rate": 0.0001994525831442276, "loss": 1.8055, "step": 32710 }, { "epoch": 3.937424789410349, "grad_norm": 4.313125133514404, "learning_rate": 0.00019945218556027537, "loss": 1.8589, "step": 32720 }, { "epoch": 3.9386281588447654, "grad_norm": 10.546980857849121, "learning_rate": 0.00019945178783239124, "loss": 1.9307, "step": 32730 }, { "epoch": 3.9398315282791816, "grad_norm": 5.398140907287598, "learning_rate": 0.00019945138996057586, "loss": 1.8189, "step": 32740 }, { "epoch": 3.941034897713598, "grad_norm": 4.577977180480957, "learning_rate": 0.00019945099194482977, "loss": 1.9049, "step": 32750 }, { "epoch": 3.9422382671480145, "grad_norm": 6.033322334289551, "learning_rate": 0.00019945059378515353, "loss": 1.771, "step": 32760 }, { "epoch": 3.9434416365824307, "grad_norm": 5.217099189758301, "learning_rate": 0.00019945019548154772, "loss": 1.7072, "step": 32770 }, { "epoch": 3.944645006016847, "grad_norm": 9.646574974060059, "learning_rate": 0.0001994497970340129, "loss": 1.7588, "step": 32780 }, { "epoch": 3.9458483754512637, "grad_norm": 13.237478256225586, "learning_rate": 0.0001994493984425497, "loss": 1.8727, "step": 32790 }, { "epoch": 3.94705174488568, "grad_norm": 4.134167671203613, "learning_rate": 0.00019944899970715868, "loss": 1.9761, "step": 32800 }, { "epoch": 3.948255114320096, "grad_norm": 6.274007797241211, "learning_rate": 0.0001994486008278404, "loss": 1.9178, "step": 32810 }, { "epoch": 3.949458483754513, "grad_norm": 4.518835544586182, "learning_rate": 0.00019944820180459543, "loss": 1.7662, "step": 32820 }, { "epoch": 3.950661853188929, "grad_norm": 8.868928909301758, "learning_rate": 0.00019944780263742436, "loss": 1.9949, "step": 32830 }, { "epoch": 3.9518652226233453, "grad_norm": 5.864382743835449, "learning_rate": 0.00019944740332632774, "loss": 1.9119, "step": 32840 }, { "epoch": 3.953068592057762, "grad_norm": 4.512370586395264, "learning_rate": 0.00019944700387130622, "loss": 1.7882, "step": 32850 }, { "epoch": 3.954271961492178, "grad_norm": 5.2216715812683105, "learning_rate": 0.0001994466042723603, "loss": 1.9608, "step": 32860 }, { "epoch": 3.9554753309265944, "grad_norm": 5.1193461418151855, "learning_rate": 0.0001994462045294906, "loss": 1.8013, "step": 32870 }, { "epoch": 3.956678700361011, "grad_norm": 9.372982025146484, "learning_rate": 0.0001994458046426977, "loss": 1.7889, "step": 32880 }, { "epoch": 3.9578820697954273, "grad_norm": 6.510072231292725, "learning_rate": 0.00019944540461198216, "loss": 1.6052, "step": 32890 }, { "epoch": 3.9590854392298436, "grad_norm": 4.168869495391846, "learning_rate": 0.00019944500443734456, "loss": 1.6398, "step": 32900 }, { "epoch": 3.96028880866426, "grad_norm": 6.725529193878174, "learning_rate": 0.00019944460411878552, "loss": 1.9307, "step": 32910 }, { "epoch": 3.961492178098676, "grad_norm": 8.65323543548584, "learning_rate": 0.00019944420365630556, "loss": 1.6071, "step": 32920 }, { "epoch": 3.9626955475330927, "grad_norm": 6.908558368682861, "learning_rate": 0.0001994438030499053, "loss": 2.0262, "step": 32930 }, { "epoch": 3.963898916967509, "grad_norm": 5.8963398933410645, "learning_rate": 0.00019944340229958527, "loss": 1.6929, "step": 32940 }, { "epoch": 3.965102286401925, "grad_norm": 4.276820659637451, "learning_rate": 0.0001994430014053461, "loss": 1.9331, "step": 32950 }, { "epoch": 3.966305655836342, "grad_norm": 7.858771324157715, "learning_rate": 0.00019944260036718837, "loss": 1.7647, "step": 32960 }, { "epoch": 3.967509025270758, "grad_norm": 5.020040035247803, "learning_rate": 0.00019944219918511264, "loss": 1.7347, "step": 32970 }, { "epoch": 3.9687123947051743, "grad_norm": 8.482172966003418, "learning_rate": 0.0001994417978591195, "loss": 1.8629, "step": 32980 }, { "epoch": 3.969915764139591, "grad_norm": 4.87166166305542, "learning_rate": 0.0001994413963892095, "loss": 1.8508, "step": 32990 }, { "epoch": 3.9711191335740073, "grad_norm": 4.907790184020996, "learning_rate": 0.0001994409947753833, "loss": 1.8536, "step": 33000 }, { "epoch": 3.9723225030084235, "grad_norm": 6.560929298400879, "learning_rate": 0.00019944059301764137, "loss": 1.9947, "step": 33010 }, { "epoch": 3.97352587244284, "grad_norm": 4.404400825500488, "learning_rate": 0.0001994401911159844, "loss": 1.9147, "step": 33020 }, { "epoch": 3.9747292418772564, "grad_norm": 8.753676414489746, "learning_rate": 0.0001994397890704129, "loss": 1.8392, "step": 33030 }, { "epoch": 3.9759326113116726, "grad_norm": 5.625767707824707, "learning_rate": 0.00019943938688092748, "loss": 1.7824, "step": 33040 }, { "epoch": 3.9771359807460893, "grad_norm": 4.419516563415527, "learning_rate": 0.0001994389845475287, "loss": 1.8224, "step": 33050 }, { "epoch": 3.9783393501805056, "grad_norm": 6.591396331787109, "learning_rate": 0.00019943858207021716, "loss": 1.9277, "step": 33060 }, { "epoch": 3.979542719614922, "grad_norm": 5.4560866355896, "learning_rate": 0.00019943817944899345, "loss": 2.0471, "step": 33070 }, { "epoch": 3.980746089049338, "grad_norm": 10.069039344787598, "learning_rate": 0.00019943777668385816, "loss": 1.9399, "step": 33080 }, { "epoch": 3.9819494584837543, "grad_norm": 6.393735408782959, "learning_rate": 0.00019943737377481186, "loss": 1.9805, "step": 33090 }, { "epoch": 3.983152827918171, "grad_norm": 5.489099502563477, "learning_rate": 0.0001994369707218551, "loss": 1.8353, "step": 33100 }, { "epoch": 3.984356197352587, "grad_norm": 6.359148025512695, "learning_rate": 0.0001994365675249885, "loss": 1.9273, "step": 33110 }, { "epoch": 3.9855595667870034, "grad_norm": 4.177426338195801, "learning_rate": 0.00019943616418421266, "loss": 1.8576, "step": 33120 }, { "epoch": 3.98676293622142, "grad_norm": 9.475194931030273, "learning_rate": 0.0001994357606995281, "loss": 1.9668, "step": 33130 }, { "epoch": 3.9879663056558363, "grad_norm": 8.060712814331055, "learning_rate": 0.00019943535707093549, "loss": 1.894, "step": 33140 }, { "epoch": 3.9891696750902526, "grad_norm": 3.6547257900238037, "learning_rate": 0.00019943495329843532, "loss": 1.9538, "step": 33150 }, { "epoch": 3.9903730445246692, "grad_norm": 7.021988391876221, "learning_rate": 0.00019943454938202825, "loss": 1.8346, "step": 33160 }, { "epoch": 3.9915764139590855, "grad_norm": 5.056446552276611, "learning_rate": 0.00019943414532171488, "loss": 1.8549, "step": 33170 }, { "epoch": 3.9927797833935017, "grad_norm": 10.918617248535156, "learning_rate": 0.0001994337411174957, "loss": 1.8216, "step": 33180 }, { "epoch": 3.9939831528279184, "grad_norm": 5.906192302703857, "learning_rate": 0.00019943333676937134, "loss": 1.8366, "step": 33190 }, { "epoch": 3.9951865222623346, "grad_norm": 3.4675395488739014, "learning_rate": 0.0001994329322773424, "loss": 1.9512, "step": 33200 }, { "epoch": 3.996389891696751, "grad_norm": 6.853846549987793, "learning_rate": 0.00019943252764140947, "loss": 1.8738, "step": 33210 }, { "epoch": 3.9975932611311675, "grad_norm": 4.8922648429870605, "learning_rate": 0.0001994321228615731, "loss": 1.7699, "step": 33220 }, { "epoch": 3.9987966305655838, "grad_norm": 6.658156871795654, "learning_rate": 0.00019943171793783396, "loss": 1.9007, "step": 33230 }, { "epoch": 4.0, "grad_norm": 6.3400421142578125, "learning_rate": 0.00019943131287019253, "loss": 1.5921, "step": 33240 }, { "epoch": 4.0, "eval_loss": 1.8273708820343018, "eval_runtime": 120.2867, "eval_samples_per_second": 61.412, "eval_steps_per_second": 7.682, "step": 33240 }, { "epoch": 4.001203369434417, "grad_norm": 6.284422874450684, "learning_rate": 0.00019943090765864945, "loss": 1.7291, "step": 33250 }, { "epoch": 4.0024067388688325, "grad_norm": 4.046846389770508, "learning_rate": 0.00019943050230320528, "loss": 1.8244, "step": 33260 }, { "epoch": 4.003610108303249, "grad_norm": 6.676296710968018, "learning_rate": 0.00019943009680386062, "loss": 1.7329, "step": 33270 }, { "epoch": 4.004813477737666, "grad_norm": 4.912373065948486, "learning_rate": 0.0001994296911606161, "loss": 1.4706, "step": 33280 }, { "epoch": 4.006016847172082, "grad_norm": 7.784985542297363, "learning_rate": 0.00019942928537347224, "loss": 1.6247, "step": 33290 }, { "epoch": 4.007220216606498, "grad_norm": 6.531484603881836, "learning_rate": 0.00019942887944242966, "loss": 1.6704, "step": 33300 }, { "epoch": 4.008423586040915, "grad_norm": 4.292130947113037, "learning_rate": 0.00019942847336748895, "loss": 1.5558, "step": 33310 }, { "epoch": 4.009626955475331, "grad_norm": 6.882789134979248, "learning_rate": 0.0001994280671486507, "loss": 1.9312, "step": 33320 }, { "epoch": 4.0108303249097474, "grad_norm": 5.988173961639404, "learning_rate": 0.00019942766078591547, "loss": 1.6523, "step": 33330 }, { "epoch": 4.012033694344163, "grad_norm": 11.171236038208008, "learning_rate": 0.0001994272542792839, "loss": 1.7468, "step": 33340 }, { "epoch": 4.01323706377858, "grad_norm": 4.555514812469482, "learning_rate": 0.0001994268476287565, "loss": 1.5814, "step": 33350 }, { "epoch": 4.014440433212997, "grad_norm": 4.666054725646973, "learning_rate": 0.0001994264408343339, "loss": 1.5923, "step": 33360 }, { "epoch": 4.015643802647412, "grad_norm": 6.363561630249023, "learning_rate": 0.00019942603389601671, "loss": 1.9146, "step": 33370 }, { "epoch": 4.016847172081829, "grad_norm": 5.004948139190674, "learning_rate": 0.00019942562681380554, "loss": 1.7598, "step": 33380 }, { "epoch": 4.018050541516246, "grad_norm": 9.599754333496094, "learning_rate": 0.0001994252195877009, "loss": 1.8804, "step": 33390 }, { "epoch": 4.0192539109506615, "grad_norm": 5.644397735595703, "learning_rate": 0.0001994248122177034, "loss": 1.6345, "step": 33400 }, { "epoch": 4.020457280385078, "grad_norm": 7.430975437164307, "learning_rate": 0.00019942440470381372, "loss": 1.9448, "step": 33410 }, { "epoch": 4.021660649819495, "grad_norm": 10.672667503356934, "learning_rate": 0.00019942399704603233, "loss": 1.8085, "step": 33420 }, { "epoch": 4.022864019253911, "grad_norm": 5.232245922088623, "learning_rate": 0.00019942358924435985, "loss": 1.5639, "step": 33430 }, { "epoch": 4.024067388688327, "grad_norm": 7.8057355880737305, "learning_rate": 0.00019942318129879694, "loss": 1.7672, "step": 33440 }, { "epoch": 4.025270758122744, "grad_norm": 8.06316089630127, "learning_rate": 0.00019942277320934412, "loss": 1.616, "step": 33450 }, { "epoch": 4.02647412755716, "grad_norm": 7.106293678283691, "learning_rate": 0.000199422364976002, "loss": 1.698, "step": 33460 }, { "epoch": 4.0276774969915765, "grad_norm": 7.68731689453125, "learning_rate": 0.00019942195659877114, "loss": 1.8209, "step": 33470 }, { "epoch": 4.028880866425993, "grad_norm": 5.684768199920654, "learning_rate": 0.0001994215480776522, "loss": 1.5607, "step": 33480 }, { "epoch": 4.030084235860409, "grad_norm": 8.300897598266602, "learning_rate": 0.00019942113941264573, "loss": 1.6034, "step": 33490 }, { "epoch": 4.031287605294826, "grad_norm": 6.523977756500244, "learning_rate": 0.0001994207306037523, "loss": 1.6377, "step": 33500 }, { "epoch": 4.0324909747292415, "grad_norm": 4.503218650817871, "learning_rate": 0.00019942032165097255, "loss": 1.7049, "step": 33510 }, { "epoch": 4.033694344163658, "grad_norm": 10.025880813598633, "learning_rate": 0.00019941991255430702, "loss": 1.608, "step": 33520 }, { "epoch": 4.034897713598075, "grad_norm": 4.286937236785889, "learning_rate": 0.0001994195033137564, "loss": 1.7023, "step": 33530 }, { "epoch": 4.036101083032491, "grad_norm": 9.422300338745117, "learning_rate": 0.00019941909392932113, "loss": 1.7398, "step": 33540 }, { "epoch": 4.037304452466907, "grad_norm": 7.26862907409668, "learning_rate": 0.00019941868440100194, "loss": 1.8983, "step": 33550 }, { "epoch": 4.038507821901324, "grad_norm": 4.133803367614746, "learning_rate": 0.00019941827472879934, "loss": 1.4949, "step": 33560 }, { "epoch": 4.03971119133574, "grad_norm": 7.196639537811279, "learning_rate": 0.00019941786491271395, "loss": 1.7609, "step": 33570 }, { "epoch": 4.040914560770156, "grad_norm": 4.263759613037109, "learning_rate": 0.00019941745495274637, "loss": 1.4943, "step": 33580 }, { "epoch": 4.042117930204573, "grad_norm": 8.759932518005371, "learning_rate": 0.0001994170448488972, "loss": 1.7811, "step": 33590 }, { "epoch": 4.043321299638989, "grad_norm": 4.935215473175049, "learning_rate": 0.00019941663460116703, "loss": 1.698, "step": 33600 }, { "epoch": 4.044524669073406, "grad_norm": 5.62227725982666, "learning_rate": 0.00019941622420955644, "loss": 1.5783, "step": 33610 }, { "epoch": 4.045728038507822, "grad_norm": 5.987107276916504, "learning_rate": 0.000199415813674066, "loss": 1.7969, "step": 33620 }, { "epoch": 4.046931407942238, "grad_norm": 6.0525360107421875, "learning_rate": 0.00019941540299469633, "loss": 1.7639, "step": 33630 }, { "epoch": 4.048134777376655, "grad_norm": 9.477378845214844, "learning_rate": 0.00019941499217144805, "loss": 1.5933, "step": 33640 }, { "epoch": 4.049338146811071, "grad_norm": 6.63507080078125, "learning_rate": 0.00019941458120432173, "loss": 1.4568, "step": 33650 }, { "epoch": 4.050541516245487, "grad_norm": 4.016391754150391, "learning_rate": 0.00019941417009331796, "loss": 1.6234, "step": 33660 }, { "epoch": 4.051744885679904, "grad_norm": 8.432229995727539, "learning_rate": 0.00019941375883843736, "loss": 1.5321, "step": 33670 }, { "epoch": 4.05294825511432, "grad_norm": 6.387386322021484, "learning_rate": 0.00019941334743968047, "loss": 1.6318, "step": 33680 }, { "epoch": 4.054151624548736, "grad_norm": 9.522067070007324, "learning_rate": 0.00019941293589704798, "loss": 1.8896, "step": 33690 }, { "epoch": 4.055354993983153, "grad_norm": 6.084255695343018, "learning_rate": 0.00019941252421054037, "loss": 1.6872, "step": 33700 }, { "epoch": 4.056558363417569, "grad_norm": 4.2610955238342285, "learning_rate": 0.00019941211238015831, "loss": 1.513, "step": 33710 }, { "epoch": 4.0577617328519855, "grad_norm": 6.165771961212158, "learning_rate": 0.0001994117004059024, "loss": 1.8259, "step": 33720 }, { "epoch": 4.058965102286402, "grad_norm": 6.202225685119629, "learning_rate": 0.0001994112882877732, "loss": 1.3925, "step": 33730 }, { "epoch": 4.060168471720818, "grad_norm": 6.500694751739502, "learning_rate": 0.0001994108760257713, "loss": 1.6529, "step": 33740 }, { "epoch": 4.061371841155235, "grad_norm": 4.840877532958984, "learning_rate": 0.00019941046361989732, "loss": 1.6744, "step": 33750 }, { "epoch": 4.062575210589651, "grad_norm": 6.332531452178955, "learning_rate": 0.0001994100510701519, "loss": 1.9234, "step": 33760 }, { "epoch": 4.063778580024067, "grad_norm": 5.797792911529541, "learning_rate": 0.00019940963837653555, "loss": 1.5065, "step": 33770 }, { "epoch": 4.064981949458484, "grad_norm": 5.591522693634033, "learning_rate": 0.00019940922553904895, "loss": 1.6815, "step": 33780 }, { "epoch": 4.0661853188929005, "grad_norm": 9.232203483581543, "learning_rate": 0.00019940881255769264, "loss": 1.9034, "step": 33790 }, { "epoch": 4.067388688327316, "grad_norm": 7.283100128173828, "learning_rate": 0.0001994083994324672, "loss": 1.5855, "step": 33800 }, { "epoch": 4.068592057761733, "grad_norm": 3.7003464698791504, "learning_rate": 0.00019940798616337332, "loss": 1.6843, "step": 33810 }, { "epoch": 4.06979542719615, "grad_norm": 5.79300594329834, "learning_rate": 0.0001994075727504115, "loss": 1.8349, "step": 33820 }, { "epoch": 4.070998796630565, "grad_norm": 5.004201889038086, "learning_rate": 0.00019940715919358239, "loss": 1.8159, "step": 33830 }, { "epoch": 4.072202166064982, "grad_norm": 6.606564521789551, "learning_rate": 0.0001994067454928866, "loss": 1.8115, "step": 33840 }, { "epoch": 4.073405535499398, "grad_norm": 4.895559310913086, "learning_rate": 0.00019940633164832464, "loss": 1.8092, "step": 33850 }, { "epoch": 4.074608904933815, "grad_norm": 3.183698892593384, "learning_rate": 0.00019940591765989725, "loss": 1.7114, "step": 33860 }, { "epoch": 4.075812274368231, "grad_norm": 6.283841609954834, "learning_rate": 0.0001994055035276049, "loss": 1.5288, "step": 33870 }, { "epoch": 4.077015643802647, "grad_norm": 3.9874267578125, "learning_rate": 0.0001994050892514483, "loss": 1.8218, "step": 33880 }, { "epoch": 4.078219013237064, "grad_norm": 8.361854553222656, "learning_rate": 0.00019940467483142794, "loss": 1.7652, "step": 33890 }, { "epoch": 4.07942238267148, "grad_norm": 6.726755142211914, "learning_rate": 0.0001994042602675445, "loss": 1.6765, "step": 33900 }, { "epoch": 4.080625752105896, "grad_norm": 4.079782009124756, "learning_rate": 0.00019940384555979857, "loss": 1.7714, "step": 33910 }, { "epoch": 4.081829121540313, "grad_norm": 7.918765544891357, "learning_rate": 0.00019940343070819073, "loss": 1.7044, "step": 33920 }, { "epoch": 4.0830324909747295, "grad_norm": 5.073817253112793, "learning_rate": 0.00019940301571272156, "loss": 1.8873, "step": 33930 }, { "epoch": 4.084235860409145, "grad_norm": 7.854415416717529, "learning_rate": 0.0001994026005733917, "loss": 1.8234, "step": 33940 }, { "epoch": 4.085439229843562, "grad_norm": 9.638435363769531, "learning_rate": 0.00019940218529020172, "loss": 1.8032, "step": 33950 }, { "epoch": 4.086642599277979, "grad_norm": 5.930091857910156, "learning_rate": 0.00019940176986315224, "loss": 1.7172, "step": 33960 }, { "epoch": 4.0878459687123945, "grad_norm": 7.531289100646973, "learning_rate": 0.00019940135429224386, "loss": 1.9137, "step": 33970 }, { "epoch": 4.089049338146811, "grad_norm": 5.307153224945068, "learning_rate": 0.00019940093857747718, "loss": 1.832, "step": 33980 }, { "epoch": 4.090252707581228, "grad_norm": 7.0909905433654785, "learning_rate": 0.00019940052271885282, "loss": 1.6379, "step": 33990 }, { "epoch": 4.091456077015644, "grad_norm": 5.340932846069336, "learning_rate": 0.00019940010671637133, "loss": 1.6154, "step": 34000 }, { "epoch": 4.09265944645006, "grad_norm": 5.438321113586426, "learning_rate": 0.00019939969057003335, "loss": 1.9888, "step": 34010 }, { "epoch": 4.093862815884476, "grad_norm": 5.239195346832275, "learning_rate": 0.0001993992742798395, "loss": 1.4993, "step": 34020 }, { "epoch": 4.095066185318893, "grad_norm": 4.424549102783203, "learning_rate": 0.00019939885784579032, "loss": 1.7697, "step": 34030 }, { "epoch": 4.0962695547533094, "grad_norm": 8.237585067749023, "learning_rate": 0.00019939844126788648, "loss": 1.8262, "step": 34040 }, { "epoch": 4.097472924187725, "grad_norm": 6.078985214233398, "learning_rate": 0.00019939802454612858, "loss": 1.6547, "step": 34050 }, { "epoch": 4.098676293622142, "grad_norm": 4.0947394371032715, "learning_rate": 0.00019939760768051713, "loss": 1.769, "step": 34060 }, { "epoch": 4.099879663056559, "grad_norm": 7.274417877197266, "learning_rate": 0.00019939719067105287, "loss": 1.663, "step": 34070 }, { "epoch": 4.101083032490974, "grad_norm": 4.327084064483643, "learning_rate": 0.00019939677351773629, "loss": 1.7496, "step": 34080 }, { "epoch": 4.102286401925391, "grad_norm": 8.57921314239502, "learning_rate": 0.00019939635622056806, "loss": 1.6565, "step": 34090 }, { "epoch": 4.103489771359808, "grad_norm": 5.8311028480529785, "learning_rate": 0.00019939593877954875, "loss": 1.6551, "step": 34100 }, { "epoch": 4.1046931407942235, "grad_norm": 4.009025573730469, "learning_rate": 0.00019939552119467897, "loss": 1.8955, "step": 34110 }, { "epoch": 4.10589651022864, "grad_norm": 6.618770599365234, "learning_rate": 0.00019939510346595935, "loss": 1.5865, "step": 34120 }, { "epoch": 4.107099879663057, "grad_norm": 3.555020332336426, "learning_rate": 0.00019939468559339047, "loss": 1.6269, "step": 34130 }, { "epoch": 4.108303249097473, "grad_norm": 7.425025463104248, "learning_rate": 0.00019939426757697293, "loss": 1.7053, "step": 34140 }, { "epoch": 4.109506618531889, "grad_norm": 6.488142967224121, "learning_rate": 0.00019939384941670735, "loss": 1.8724, "step": 34150 }, { "epoch": 4.110709987966306, "grad_norm": 3.57918643951416, "learning_rate": 0.00019939343111259435, "loss": 1.6016, "step": 34160 }, { "epoch": 4.111913357400722, "grad_norm": 6.3030924797058105, "learning_rate": 0.00019939301266463447, "loss": 1.5915, "step": 34170 }, { "epoch": 4.1131167268351385, "grad_norm": 4.9697418212890625, "learning_rate": 0.00019939259407282842, "loss": 1.6447, "step": 34180 }, { "epoch": 4.114320096269555, "grad_norm": 7.776681900024414, "learning_rate": 0.00019939217533717672, "loss": 1.8343, "step": 34190 }, { "epoch": 4.115523465703971, "grad_norm": 4.591990947723389, "learning_rate": 0.00019939175645768, "loss": 1.4935, "step": 34200 }, { "epoch": 4.116726835138388, "grad_norm": 4.370880603790283, "learning_rate": 0.00019939133743433887, "loss": 1.8305, "step": 34210 }, { "epoch": 4.1179302045728035, "grad_norm": 6.70311975479126, "learning_rate": 0.00019939091826715394, "loss": 1.5805, "step": 34220 }, { "epoch": 4.11913357400722, "grad_norm": 5.470571994781494, "learning_rate": 0.0001993904989561258, "loss": 1.6937, "step": 34230 }, { "epoch": 4.120336943441637, "grad_norm": 7.2369160652160645, "learning_rate": 0.00019939007950125508, "loss": 1.8691, "step": 34240 }, { "epoch": 4.121540312876053, "grad_norm": 5.476625919342041, "learning_rate": 0.0001993896599025424, "loss": 1.7346, "step": 34250 }, { "epoch": 4.122743682310469, "grad_norm": 3.4230034351348877, "learning_rate": 0.0001993892401599883, "loss": 2.0286, "step": 34260 }, { "epoch": 4.123947051744886, "grad_norm": 6.428805351257324, "learning_rate": 0.00019938882027359346, "loss": 1.7459, "step": 34270 }, { "epoch": 4.125150421179302, "grad_norm": 5.378191947937012, "learning_rate": 0.00019938840024335844, "loss": 1.7601, "step": 34280 }, { "epoch": 4.126353790613718, "grad_norm": 11.22222900390625, "learning_rate": 0.00019938798006928385, "loss": 1.7118, "step": 34290 }, { "epoch": 4.127557160048135, "grad_norm": 21.66954803466797, "learning_rate": 0.00019938755975137034, "loss": 1.7575, "step": 34300 }, { "epoch": 4.128760529482551, "grad_norm": 4.3829026222229, "learning_rate": 0.0001993871392896185, "loss": 1.903, "step": 34310 }, { "epoch": 4.129963898916968, "grad_norm": 6.230332374572754, "learning_rate": 0.00019938671868402895, "loss": 1.6153, "step": 34320 }, { "epoch": 4.131167268351384, "grad_norm": 4.583105564117432, "learning_rate": 0.00019938629793460225, "loss": 1.7303, "step": 34330 }, { "epoch": 4.1323706377858, "grad_norm": 7.515890598297119, "learning_rate": 0.00019938587704133905, "loss": 1.6951, "step": 34340 }, { "epoch": 4.133574007220217, "grad_norm": 4.609417915344238, "learning_rate": 0.00019938545600423993, "loss": 1.6442, "step": 34350 }, { "epoch": 4.1347773766546325, "grad_norm": 4.643413066864014, "learning_rate": 0.00019938503482330554, "loss": 1.6237, "step": 34360 }, { "epoch": 4.135980746089049, "grad_norm": 5.234893798828125, "learning_rate": 0.00019938461349853644, "loss": 1.7295, "step": 34370 }, { "epoch": 4.137184115523466, "grad_norm": 4.667137145996094, "learning_rate": 0.00019938419202993328, "loss": 1.6193, "step": 34380 }, { "epoch": 4.138387484957882, "grad_norm": 9.850410461425781, "learning_rate": 0.00019938377041749664, "loss": 1.787, "step": 34390 }, { "epoch": 4.139590854392298, "grad_norm": 4.949896335601807, "learning_rate": 0.0001993833486612272, "loss": 1.5774, "step": 34400 }, { "epoch": 4.140794223826715, "grad_norm": 4.389410495758057, "learning_rate": 0.00019938292676112548, "loss": 1.9383, "step": 34410 }, { "epoch": 4.141997593261131, "grad_norm": 6.364817142486572, "learning_rate": 0.00019938250471719212, "loss": 1.7452, "step": 34420 }, { "epoch": 4.1432009626955475, "grad_norm": 5.512551307678223, "learning_rate": 0.00019938208252942777, "loss": 1.8827, "step": 34430 }, { "epoch": 4.144404332129964, "grad_norm": 7.060027599334717, "learning_rate": 0.00019938166019783297, "loss": 1.6919, "step": 34440 }, { "epoch": 4.14560770156438, "grad_norm": 10.936914443969727, "learning_rate": 0.00019938123772240843, "loss": 1.6053, "step": 34450 }, { "epoch": 4.146811070998797, "grad_norm": 5.33294677734375, "learning_rate": 0.00019938081510315466, "loss": 1.8714, "step": 34460 }, { "epoch": 4.148014440433213, "grad_norm": 7.198515892028809, "learning_rate": 0.0001993803923400723, "loss": 1.8982, "step": 34470 }, { "epoch": 4.149217809867629, "grad_norm": 5.331077575683594, "learning_rate": 0.000199379969433162, "loss": 1.5221, "step": 34480 }, { "epoch": 4.150421179302046, "grad_norm": 9.042343139648438, "learning_rate": 0.00019937954638242435, "loss": 1.8686, "step": 34490 }, { "epoch": 4.1516245487364625, "grad_norm": 5.276646137237549, "learning_rate": 0.00019937912318785994, "loss": 1.5271, "step": 34500 }, { "epoch": 4.152827918170878, "grad_norm": 3.9702565670013428, "learning_rate": 0.00019937869984946942, "loss": 1.644, "step": 34510 }, { "epoch": 4.154031287605295, "grad_norm": 7.433538436889648, "learning_rate": 0.00019937827636725338, "loss": 1.6832, "step": 34520 }, { "epoch": 4.155234657039712, "grad_norm": 7.242001533508301, "learning_rate": 0.00019937785274121245, "loss": 1.6214, "step": 34530 }, { "epoch": 4.156438026474127, "grad_norm": 8.90034294128418, "learning_rate": 0.00019937742897134723, "loss": 1.6325, "step": 34540 }, { "epoch": 4.157641395908544, "grad_norm": 6.180896759033203, "learning_rate": 0.00019937700505765832, "loss": 1.7317, "step": 34550 }, { "epoch": 4.15884476534296, "grad_norm": 5.105179786682129, "learning_rate": 0.00019937658100014635, "loss": 1.8098, "step": 34560 }, { "epoch": 4.160048134777377, "grad_norm": 8.221943855285645, "learning_rate": 0.00019937615679881194, "loss": 1.7314, "step": 34570 }, { "epoch": 4.161251504211793, "grad_norm": 5.7328877449035645, "learning_rate": 0.0001993757324536557, "loss": 1.7452, "step": 34580 }, { "epoch": 4.162454873646209, "grad_norm": 12.69345760345459, "learning_rate": 0.00019937530796467824, "loss": 1.7706, "step": 34590 }, { "epoch": 4.163658243080626, "grad_norm": 6.551113128662109, "learning_rate": 0.00019937488333188018, "loss": 1.844, "step": 34600 }, { "epoch": 4.164861612515042, "grad_norm": 3.889042615890503, "learning_rate": 0.0001993744585552621, "loss": 1.6707, "step": 34610 }, { "epoch": 4.166064981949458, "grad_norm": 6.84438419342041, "learning_rate": 0.00019937403363482465, "loss": 1.6215, "step": 34620 }, { "epoch": 4.167268351383875, "grad_norm": 4.140308380126953, "learning_rate": 0.00019937360857056845, "loss": 1.8281, "step": 34630 }, { "epoch": 4.1684717208182915, "grad_norm": 9.280033111572266, "learning_rate": 0.0001993731833624941, "loss": 1.8555, "step": 34640 }, { "epoch": 4.169675090252707, "grad_norm": 5.941489219665527, "learning_rate": 0.00019937275801060225, "loss": 1.6224, "step": 34650 }, { "epoch": 4.170878459687124, "grad_norm": 3.7432074546813965, "learning_rate": 0.00019937233251489346, "loss": 1.7454, "step": 34660 }, { "epoch": 4.172081829121541, "grad_norm": 6.81998872756958, "learning_rate": 0.00019937190687536837, "loss": 1.5517, "step": 34670 }, { "epoch": 4.1732851985559565, "grad_norm": 6.194040775299072, "learning_rate": 0.0001993714810920276, "loss": 1.8116, "step": 34680 }, { "epoch": 4.174488567990373, "grad_norm": 9.579302787780762, "learning_rate": 0.00019937105516487175, "loss": 1.9583, "step": 34690 }, { "epoch": 4.17569193742479, "grad_norm": 6.3991522789001465, "learning_rate": 0.00019937062909390144, "loss": 1.7154, "step": 34700 }, { "epoch": 4.176895306859206, "grad_norm": 4.774811744689941, "learning_rate": 0.00019937020287911735, "loss": 1.8765, "step": 34710 }, { "epoch": 4.178098676293622, "grad_norm": 7.51531982421875, "learning_rate": 0.00019936977652051997, "loss": 1.7964, "step": 34720 }, { "epoch": 4.179302045728038, "grad_norm": 6.015110492706299, "learning_rate": 0.00019936935001811003, "loss": 1.6662, "step": 34730 }, { "epoch": 4.180505415162455, "grad_norm": 7.868860721588135, "learning_rate": 0.0001993689233718881, "loss": 1.9115, "step": 34740 }, { "epoch": 4.1817087845968715, "grad_norm": 5.770344257354736, "learning_rate": 0.00019936849658185483, "loss": 1.9144, "step": 34750 }, { "epoch": 4.182912154031287, "grad_norm": 4.498798370361328, "learning_rate": 0.00019936806964801078, "loss": 1.9393, "step": 34760 }, { "epoch": 4.184115523465704, "grad_norm": 7.16002893447876, "learning_rate": 0.00019936764257035661, "loss": 1.6565, "step": 34770 }, { "epoch": 4.185318892900121, "grad_norm": 5.324651718139648, "learning_rate": 0.00019936721534889292, "loss": 1.7747, "step": 34780 }, { "epoch": 4.186522262334536, "grad_norm": 8.296109199523926, "learning_rate": 0.00019936678798362033, "loss": 2.0047, "step": 34790 }, { "epoch": 4.187725631768953, "grad_norm": 7.719017028808594, "learning_rate": 0.0001993663604745395, "loss": 1.6695, "step": 34800 }, { "epoch": 4.18892900120337, "grad_norm": 6.086599349975586, "learning_rate": 0.000199365932821651, "loss": 1.8302, "step": 34810 }, { "epoch": 4.1901323706377855, "grad_norm": 7.158595561981201, "learning_rate": 0.00019936550502495543, "loss": 1.9011, "step": 34820 }, { "epoch": 4.191335740072202, "grad_norm": 5.211520195007324, "learning_rate": 0.00019936507708445347, "loss": 1.732, "step": 34830 }, { "epoch": 4.192539109506619, "grad_norm": 7.698371887207031, "learning_rate": 0.00019936464900014572, "loss": 1.9333, "step": 34840 }, { "epoch": 4.193742478941035, "grad_norm": 6.089698314666748, "learning_rate": 0.00019936422077203276, "loss": 1.8643, "step": 34850 }, { "epoch": 4.194945848375451, "grad_norm": 5.273772716522217, "learning_rate": 0.00019936379240011526, "loss": 1.8143, "step": 34860 }, { "epoch": 4.196149217809868, "grad_norm": 6.72157621383667, "learning_rate": 0.00019936336388439382, "loss": 1.6795, "step": 34870 }, { "epoch": 4.197352587244284, "grad_norm": 5.646254062652588, "learning_rate": 0.00019936293522486906, "loss": 1.8294, "step": 34880 }, { "epoch": 4.1985559566787005, "grad_norm": 9.14498519897461, "learning_rate": 0.00019936250642154163, "loss": 1.8915, "step": 34890 }, { "epoch": 4.199759326113116, "grad_norm": 5.023692607879639, "learning_rate": 0.00019936207747441208, "loss": 1.6755, "step": 34900 }, { "epoch": 4.200962695547533, "grad_norm": 4.519577980041504, "learning_rate": 0.00019936164838348108, "loss": 1.6403, "step": 34910 }, { "epoch": 4.20216606498195, "grad_norm": 7.0812482833862305, "learning_rate": 0.00019936121914874923, "loss": 1.6059, "step": 34920 }, { "epoch": 4.2033694344163655, "grad_norm": 5.809424877166748, "learning_rate": 0.0001993607897702172, "loss": 1.7562, "step": 34930 }, { "epoch": 4.204572803850782, "grad_norm": 12.280158042907715, "learning_rate": 0.00019936036024788555, "loss": 1.6649, "step": 34940 }, { "epoch": 4.205776173285199, "grad_norm": 7.3353657722473145, "learning_rate": 0.00019935993058175494, "loss": 1.7374, "step": 34950 }, { "epoch": 4.206979542719615, "grad_norm": 5.186786651611328, "learning_rate": 0.00019935950077182597, "loss": 2.0647, "step": 34960 }, { "epoch": 4.208182912154031, "grad_norm": 6.030091762542725, "learning_rate": 0.00019935907081809927, "loss": 1.4583, "step": 34970 }, { "epoch": 4.209386281588448, "grad_norm": 5.420872688293457, "learning_rate": 0.00019935864072057546, "loss": 1.8147, "step": 34980 }, { "epoch": 4.210589651022864, "grad_norm": 11.73915958404541, "learning_rate": 0.0001993582104792552, "loss": 1.8084, "step": 34990 }, { "epoch": 4.21179302045728, "grad_norm": 6.029935359954834, "learning_rate": 0.00019935778009413905, "loss": 1.8731, "step": 35000 }, { "epoch": 4.212996389891697, "grad_norm": 6.571990013122559, "learning_rate": 0.00019935734956522767, "loss": 1.7011, "step": 35010 }, { "epoch": 4.214199759326113, "grad_norm": 9.478644371032715, "learning_rate": 0.00019935691889252165, "loss": 1.78, "step": 35020 }, { "epoch": 4.21540312876053, "grad_norm": 5.048871994018555, "learning_rate": 0.00019935648807602166, "loss": 1.6787, "step": 35030 }, { "epoch": 4.216606498194946, "grad_norm": 15.470974922180176, "learning_rate": 0.00019935605711572828, "loss": 1.8724, "step": 35040 }, { "epoch": 4.217809867629362, "grad_norm": 6.290297985076904, "learning_rate": 0.00019935562601164223, "loss": 1.8578, "step": 35050 }, { "epoch": 4.219013237063779, "grad_norm": 4.576003551483154, "learning_rate": 0.00019935519476376397, "loss": 1.9149, "step": 35060 }, { "epoch": 4.2202166064981945, "grad_norm": 6.172026634216309, "learning_rate": 0.00019935476337209424, "loss": 1.7833, "step": 35070 }, { "epoch": 4.221419975932611, "grad_norm": 4.892474174499512, "learning_rate": 0.00019935433183663367, "loss": 1.7075, "step": 35080 }, { "epoch": 4.222623345367028, "grad_norm": 13.470946311950684, "learning_rate": 0.0001993539001573828, "loss": 2.0174, "step": 35090 }, { "epoch": 4.223826714801444, "grad_norm": 5.695079326629639, "learning_rate": 0.00019935346833434235, "loss": 1.5365, "step": 35100 }, { "epoch": 4.22503008423586, "grad_norm": 3.428987503051758, "learning_rate": 0.00019935303636751288, "loss": 1.7887, "step": 35110 }, { "epoch": 4.226233453670277, "grad_norm": 6.879944801330566, "learning_rate": 0.00019935260425689503, "loss": 1.8709, "step": 35120 }, { "epoch": 4.227436823104693, "grad_norm": 7.473792552947998, "learning_rate": 0.00019935217200248945, "loss": 1.806, "step": 35130 }, { "epoch": 4.2286401925391095, "grad_norm": 6.771570205688477, "learning_rate": 0.00019935173960429673, "loss": 1.8248, "step": 35140 }, { "epoch": 4.229843561973526, "grad_norm": 5.999017238616943, "learning_rate": 0.00019935130706231756, "loss": 1.6427, "step": 35150 }, { "epoch": 4.231046931407942, "grad_norm": 4.775561332702637, "learning_rate": 0.00019935087437655247, "loss": 1.8977, "step": 35160 }, { "epoch": 4.232250300842359, "grad_norm": 7.566339015960693, "learning_rate": 0.00019935044154700213, "loss": 1.7255, "step": 35170 }, { "epoch": 4.233453670276775, "grad_norm": 5.545777320861816, "learning_rate": 0.00019935000857366721, "loss": 1.5561, "step": 35180 }, { "epoch": 4.234657039711191, "grad_norm": 7.945019245147705, "learning_rate": 0.00019934957545654828, "loss": 1.846, "step": 35190 }, { "epoch": 4.235860409145608, "grad_norm": 7.428624153137207, "learning_rate": 0.00019934914219564597, "loss": 1.9585, "step": 35200 }, { "epoch": 4.2370637785800245, "grad_norm": 5.946137428283691, "learning_rate": 0.00019934870879096094, "loss": 1.7993, "step": 35210 }, { "epoch": 4.23826714801444, "grad_norm": 5.7132039070129395, "learning_rate": 0.00019934827524249383, "loss": 1.7853, "step": 35220 }, { "epoch": 4.239470517448857, "grad_norm": 5.234225273132324, "learning_rate": 0.0001993478415502452, "loss": 1.735, "step": 35230 }, { "epoch": 4.240673886883273, "grad_norm": 7.272106170654297, "learning_rate": 0.00019934740771421572, "loss": 2.1715, "step": 35240 }, { "epoch": 4.241877256317689, "grad_norm": 5.432416915893555, "learning_rate": 0.000199346973734406, "loss": 1.7854, "step": 35250 }, { "epoch": 4.243080625752106, "grad_norm": 3.8165786266326904, "learning_rate": 0.0001993465396108167, "loss": 2.0691, "step": 35260 }, { "epoch": 4.244283995186522, "grad_norm": 6.915083885192871, "learning_rate": 0.00019934610534344844, "loss": 1.6476, "step": 35270 }, { "epoch": 4.245487364620939, "grad_norm": 6.73002815246582, "learning_rate": 0.00019934567093230182, "loss": 1.9744, "step": 35280 }, { "epoch": 4.246690734055355, "grad_norm": 10.002744674682617, "learning_rate": 0.00019934523637737747, "loss": 2.1234, "step": 35290 }, { "epoch": 4.247894103489771, "grad_norm": 5.658259391784668, "learning_rate": 0.0001993448016786761, "loss": 1.6212, "step": 35300 }, { "epoch": 4.249097472924188, "grad_norm": 5.008306503295898, "learning_rate": 0.0001993443668361982, "loss": 1.8222, "step": 35310 }, { "epoch": 4.250300842358604, "grad_norm": 7.853512287139893, "learning_rate": 0.0001993439318499445, "loss": 1.6649, "step": 35320 }, { "epoch": 4.25150421179302, "grad_norm": 6.960813999176025, "learning_rate": 0.00019934349671991563, "loss": 1.7147, "step": 35330 }, { "epoch": 4.252707581227437, "grad_norm": 9.682868003845215, "learning_rate": 0.00019934306144611216, "loss": 1.7582, "step": 35340 }, { "epoch": 4.2539109506618535, "grad_norm": 6.54448938369751, "learning_rate": 0.00019934262602853476, "loss": 1.7474, "step": 35350 }, { "epoch": 4.255114320096269, "grad_norm": 4.013662338256836, "learning_rate": 0.00019934219046718405, "loss": 1.8111, "step": 35360 }, { "epoch": 4.256317689530686, "grad_norm": 7.412820339202881, "learning_rate": 0.00019934175476206067, "loss": 1.8062, "step": 35370 }, { "epoch": 4.257521058965103, "grad_norm": 5.398044109344482, "learning_rate": 0.00019934131891316526, "loss": 1.989, "step": 35380 }, { "epoch": 4.2587244283995185, "grad_norm": 21.605239868164062, "learning_rate": 0.0001993408829204984, "loss": 1.9033, "step": 35390 }, { "epoch": 4.259927797833935, "grad_norm": 9.701786041259766, "learning_rate": 0.00019934044678406078, "loss": 1.7739, "step": 35400 }, { "epoch": 4.261131167268351, "grad_norm": 4.230984210968018, "learning_rate": 0.00019934001050385302, "loss": 1.6612, "step": 35410 }, { "epoch": 4.262334536702768, "grad_norm": 6.254148483276367, "learning_rate": 0.00019933957407987572, "loss": 1.5458, "step": 35420 }, { "epoch": 4.263537906137184, "grad_norm": 3.7436411380767822, "learning_rate": 0.00019933913751212954, "loss": 1.9107, "step": 35430 }, { "epoch": 4.2647412755716, "grad_norm": 8.439335823059082, "learning_rate": 0.00019933870080061507, "loss": 1.9602, "step": 35440 }, { "epoch": 4.265944645006017, "grad_norm": 6.327473163604736, "learning_rate": 0.000199338263945333, "loss": 1.6398, "step": 35450 }, { "epoch": 4.2671480144404335, "grad_norm": 3.532308578491211, "learning_rate": 0.00019933782694628394, "loss": 1.9751, "step": 35460 }, { "epoch": 4.268351383874849, "grad_norm": 11.994088172912598, "learning_rate": 0.0001993373898034685, "loss": 1.7779, "step": 35470 }, { "epoch": 4.269554753309266, "grad_norm": 4.058684349060059, "learning_rate": 0.00019933695251688735, "loss": 1.8762, "step": 35480 }, { "epoch": 4.270758122743683, "grad_norm": 10.060203552246094, "learning_rate": 0.0001993365150865411, "loss": 1.6106, "step": 35490 }, { "epoch": 4.271961492178098, "grad_norm": 4.538617134094238, "learning_rate": 0.00019933607751243038, "loss": 1.4892, "step": 35500 }, { "epoch": 4.273164861612515, "grad_norm": 5.000890254974365, "learning_rate": 0.00019933563979455582, "loss": 1.73, "step": 35510 }, { "epoch": 4.274368231046932, "grad_norm": 6.338278770446777, "learning_rate": 0.00019933520193291812, "loss": 1.7313, "step": 35520 }, { "epoch": 4.2755716004813475, "grad_norm": 4.403758525848389, "learning_rate": 0.0001993347639275178, "loss": 1.7735, "step": 35530 }, { "epoch": 4.276774969915764, "grad_norm": 8.195080757141113, "learning_rate": 0.00019933432577835559, "loss": 1.7672, "step": 35540 }, { "epoch": 4.277978339350181, "grad_norm": 5.781729221343994, "learning_rate": 0.00019933388748543204, "loss": 1.6646, "step": 35550 }, { "epoch": 4.279181708784597, "grad_norm": 3.9311957359313965, "learning_rate": 0.00019933344904874786, "loss": 1.707, "step": 35560 }, { "epoch": 4.280385078219013, "grad_norm": 6.707176685333252, "learning_rate": 0.00019933301046830363, "loss": 1.8034, "step": 35570 }, { "epoch": 4.28158844765343, "grad_norm": 5.253597259521484, "learning_rate": 0.00019933257174410004, "loss": 1.6822, "step": 35580 }, { "epoch": 4.282791817087846, "grad_norm": 7.279968738555908, "learning_rate": 0.00019933213287613767, "loss": 1.7282, "step": 35590 }, { "epoch": 4.2839951865222625, "grad_norm": 5.6604390144348145, "learning_rate": 0.00019933169386441722, "loss": 1.7609, "step": 35600 }, { "epoch": 4.285198555956678, "grad_norm": 3.6584994792938232, "learning_rate": 0.00019933125470893926, "loss": 1.7549, "step": 35610 }, { "epoch": 4.286401925391095, "grad_norm": 5.592550754547119, "learning_rate": 0.00019933081540970444, "loss": 1.449, "step": 35620 }, { "epoch": 4.287605294825512, "grad_norm": 4.747725963592529, "learning_rate": 0.00019933037596671341, "loss": 1.8285, "step": 35630 }, { "epoch": 4.2888086642599275, "grad_norm": 8.285834312438965, "learning_rate": 0.00019932993637996683, "loss": 1.986, "step": 35640 }, { "epoch": 4.290012033694344, "grad_norm": 6.873829364776611, "learning_rate": 0.00019932949664946525, "loss": 1.6899, "step": 35650 }, { "epoch": 4.291215403128761, "grad_norm": 4.187936305999756, "learning_rate": 0.00019932905677520941, "loss": 1.7355, "step": 35660 }, { "epoch": 4.292418772563177, "grad_norm": 5.897096157073975, "learning_rate": 0.0001993286167571999, "loss": 1.8627, "step": 35670 }, { "epoch": 4.293622141997593, "grad_norm": 4.470292568206787, "learning_rate": 0.00019932817659543736, "loss": 1.6871, "step": 35680 }, { "epoch": 4.29482551143201, "grad_norm": 11.189642906188965, "learning_rate": 0.0001993277362899224, "loss": 1.7162, "step": 35690 }, { "epoch": 4.296028880866426, "grad_norm": 5.609859943389893, "learning_rate": 0.0001993272958406557, "loss": 1.844, "step": 35700 }, { "epoch": 4.297232250300842, "grad_norm": 4.639491558074951, "learning_rate": 0.00019932685524763787, "loss": 1.8034, "step": 35710 }, { "epoch": 4.298435619735259, "grad_norm": 6.2805352210998535, "learning_rate": 0.00019932641451086955, "loss": 1.6152, "step": 35720 }, { "epoch": 4.299638989169675, "grad_norm": 5.58298397064209, "learning_rate": 0.0001993259736303514, "loss": 1.8664, "step": 35730 }, { "epoch": 4.300842358604092, "grad_norm": 7.744947910308838, "learning_rate": 0.00019932553260608405, "loss": 1.6432, "step": 35740 }, { "epoch": 4.302045728038507, "grad_norm": 5.18134069442749, "learning_rate": 0.00019932509143806812, "loss": 1.8218, "step": 35750 }, { "epoch": 4.303249097472924, "grad_norm": 4.261021614074707, "learning_rate": 0.0001993246501263043, "loss": 1.947, "step": 35760 }, { "epoch": 4.304452466907341, "grad_norm": 13.76388168334961, "learning_rate": 0.00019932420867079312, "loss": 1.7549, "step": 35770 }, { "epoch": 4.3056558363417565, "grad_norm": 4.586688041687012, "learning_rate": 0.00019932376707153533, "loss": 1.8343, "step": 35780 }, { "epoch": 4.306859205776173, "grad_norm": 8.211957931518555, "learning_rate": 0.0001993233253285315, "loss": 1.8815, "step": 35790 }, { "epoch": 4.30806257521059, "grad_norm": 6.673455715179443, "learning_rate": 0.0001993228834417823, "loss": 1.7281, "step": 35800 }, { "epoch": 4.309265944645006, "grad_norm": 5.128195285797119, "learning_rate": 0.0001993224414112884, "loss": 1.7434, "step": 35810 }, { "epoch": 4.310469314079422, "grad_norm": 15.036243438720703, "learning_rate": 0.0001993219992370504, "loss": 1.6523, "step": 35820 }, { "epoch": 4.311672683513839, "grad_norm": 5.425032138824463, "learning_rate": 0.0001993215569190689, "loss": 1.8124, "step": 35830 }, { "epoch": 4.312876052948255, "grad_norm": 8.083535194396973, "learning_rate": 0.00019932111445734461, "loss": 1.9315, "step": 35840 }, { "epoch": 4.3140794223826715, "grad_norm": 6.526995658874512, "learning_rate": 0.00019932067185187818, "loss": 1.857, "step": 35850 }, { "epoch": 4.315282791817088, "grad_norm": 3.849255323410034, "learning_rate": 0.00019932022910267017, "loss": 1.7268, "step": 35860 }, { "epoch": 4.316486161251504, "grad_norm": 7.1441450119018555, "learning_rate": 0.00019931978620972126, "loss": 1.7334, "step": 35870 }, { "epoch": 4.317689530685921, "grad_norm": 5.974358558654785, "learning_rate": 0.00019931934317303217, "loss": 1.827, "step": 35880 }, { "epoch": 4.318892900120337, "grad_norm": 9.186847686767578, "learning_rate": 0.0001993188999926034, "loss": 1.8247, "step": 35890 }, { "epoch": 4.320096269554753, "grad_norm": 5.240936756134033, "learning_rate": 0.00019931845666843568, "loss": 1.6278, "step": 35900 }, { "epoch": 4.32129963898917, "grad_norm": 4.584122180938721, "learning_rate": 0.00019931801320052962, "loss": 1.8463, "step": 35910 }, { "epoch": 4.3225030084235865, "grad_norm": 15.641439437866211, "learning_rate": 0.0001993175695888859, "loss": 1.7596, "step": 35920 }, { "epoch": 4.323706377858002, "grad_norm": 5.413671016693115, "learning_rate": 0.00019931712583350512, "loss": 1.8015, "step": 35930 }, { "epoch": 4.324909747292419, "grad_norm": 9.118616104125977, "learning_rate": 0.00019931668193438795, "loss": 1.7768, "step": 35940 }, { "epoch": 4.326113116726835, "grad_norm": 5.554944038391113, "learning_rate": 0.00019931623789153503, "loss": 1.7296, "step": 35950 }, { "epoch": 4.327316486161251, "grad_norm": 3.5678491592407227, "learning_rate": 0.00019931579370494697, "loss": 1.9177, "step": 35960 }, { "epoch": 4.328519855595668, "grad_norm": 7.3495869636535645, "learning_rate": 0.00019931534937462443, "loss": 1.7876, "step": 35970 }, { "epoch": 4.329723225030084, "grad_norm": 4.591203212738037, "learning_rate": 0.00019931490490056807, "loss": 1.9446, "step": 35980 }, { "epoch": 4.330926594464501, "grad_norm": 9.50680160522461, "learning_rate": 0.00019931446028277852, "loss": 2.0639, "step": 35990 }, { "epoch": 4.332129963898917, "grad_norm": 5.03529167175293, "learning_rate": 0.00019931401552125645, "loss": 1.9546, "step": 36000 }, { "epoch": 4.333333333333333, "grad_norm": 5.492129325866699, "learning_rate": 0.00019931357061600245, "loss": 1.8102, "step": 36010 }, { "epoch": 4.33453670276775, "grad_norm": 6.360865592956543, "learning_rate": 0.00019931312556701721, "loss": 1.7551, "step": 36020 }, { "epoch": 4.335740072202166, "grad_norm": 4.650359630584717, "learning_rate": 0.00019931268037430132, "loss": 1.7854, "step": 36030 }, { "epoch": 4.336943441636582, "grad_norm": 10.43637752532959, "learning_rate": 0.00019931223503785552, "loss": 1.9163, "step": 36040 }, { "epoch": 4.338146811070999, "grad_norm": 8.118700981140137, "learning_rate": 0.00019931178955768036, "loss": 1.9775, "step": 36050 }, { "epoch": 4.3393501805054155, "grad_norm": 9.109759330749512, "learning_rate": 0.00019931134393377656, "loss": 1.8744, "step": 36060 }, { "epoch": 4.340553549939831, "grad_norm": 7.326494216918945, "learning_rate": 0.0001993108981661447, "loss": 1.9723, "step": 36070 }, { "epoch": 4.341756919374248, "grad_norm": 6.213112831115723, "learning_rate": 0.00019931045225478542, "loss": 1.7883, "step": 36080 }, { "epoch": 4.342960288808664, "grad_norm": 6.885368347167969, "learning_rate": 0.00019931000619969943, "loss": 1.9124, "step": 36090 }, { "epoch": 4.3441636582430805, "grad_norm": 5.544731616973877, "learning_rate": 0.00019930956000088735, "loss": 1.7551, "step": 36100 }, { "epoch": 4.345367027677497, "grad_norm": 3.515679359436035, "learning_rate": 0.00019930911365834982, "loss": 1.8179, "step": 36110 }, { "epoch": 4.346570397111913, "grad_norm": 6.745066165924072, "learning_rate": 0.00019930866717208744, "loss": 1.9082, "step": 36120 }, { "epoch": 4.34777376654633, "grad_norm": 4.27610969543457, "learning_rate": 0.00019930822054210095, "loss": 1.8479, "step": 36130 }, { "epoch": 4.348977135980746, "grad_norm": 6.727752685546875, "learning_rate": 0.0001993077737683909, "loss": 1.5559, "step": 36140 }, { "epoch": 4.350180505415162, "grad_norm": 5.7208027839660645, "learning_rate": 0.00019930732685095802, "loss": 1.7292, "step": 36150 }, { "epoch": 4.351383874849579, "grad_norm": 4.691730976104736, "learning_rate": 0.00019930687978980292, "loss": 1.8678, "step": 36160 }, { "epoch": 4.3525872442839955, "grad_norm": 6.359548568725586, "learning_rate": 0.00019930643258492622, "loss": 1.931, "step": 36170 }, { "epoch": 4.353790613718411, "grad_norm": 6.833027362823486, "learning_rate": 0.00019930598523632863, "loss": 1.7092, "step": 36180 }, { "epoch": 4.354993983152828, "grad_norm": 7.761911869049072, "learning_rate": 0.00019930553774401074, "loss": 1.7483, "step": 36190 }, { "epoch": 4.356197352587245, "grad_norm": 5.729844093322754, "learning_rate": 0.0001993050901079732, "loss": 1.7814, "step": 36200 }, { "epoch": 4.35740072202166, "grad_norm": 4.560784339904785, "learning_rate": 0.0001993046423282167, "loss": 1.7795, "step": 36210 }, { "epoch": 4.358604091456077, "grad_norm": 7.4706244468688965, "learning_rate": 0.00019930419440474187, "loss": 1.6964, "step": 36220 }, { "epoch": 4.359807460890494, "grad_norm": 11.191123962402344, "learning_rate": 0.00019930374633754934, "loss": 1.6069, "step": 36230 }, { "epoch": 4.3610108303249095, "grad_norm": 7.026401996612549, "learning_rate": 0.0001993032981266398, "loss": 1.7645, "step": 36240 }, { "epoch": 4.362214199759326, "grad_norm": 7.390120983123779, "learning_rate": 0.00019930284977201382, "loss": 1.7388, "step": 36250 }, { "epoch": 4.363417569193743, "grad_norm": 4.75221061706543, "learning_rate": 0.00019930240127367214, "loss": 1.6189, "step": 36260 }, { "epoch": 4.364620938628159, "grad_norm": 8.297945022583008, "learning_rate": 0.00019930195263161536, "loss": 1.8325, "step": 36270 }, { "epoch": 4.365824308062575, "grad_norm": 5.3272600173950195, "learning_rate": 0.00019930150384584412, "loss": 1.8726, "step": 36280 }, { "epoch": 4.367027677496991, "grad_norm": 7.332561492919922, "learning_rate": 0.00019930105491635912, "loss": 1.8565, "step": 36290 }, { "epoch": 4.368231046931408, "grad_norm": 5.097908020019531, "learning_rate": 0.00019930060584316095, "loss": 1.6533, "step": 36300 }, { "epoch": 4.3694344163658245, "grad_norm": 6.119940280914307, "learning_rate": 0.00019930015662625026, "loss": 2.0051, "step": 36310 }, { "epoch": 4.37063778580024, "grad_norm": 6.721015930175781, "learning_rate": 0.0001992997072656278, "loss": 1.7732, "step": 36320 }, { "epoch": 4.371841155234657, "grad_norm": 5.655241966247559, "learning_rate": 0.0001992992577612941, "loss": 1.7963, "step": 36330 }, { "epoch": 4.373044524669074, "grad_norm": 7.532368183135986, "learning_rate": 0.00019929880811324986, "loss": 2.1093, "step": 36340 }, { "epoch": 4.3742478941034895, "grad_norm": 7.15523624420166, "learning_rate": 0.00019929835832149575, "loss": 1.7397, "step": 36350 }, { "epoch": 4.375451263537906, "grad_norm": 5.214695930480957, "learning_rate": 0.00019929790838603238, "loss": 1.732, "step": 36360 }, { "epoch": 4.376654632972323, "grad_norm": 6.590765953063965, "learning_rate": 0.00019929745830686045, "loss": 1.8307, "step": 36370 }, { "epoch": 4.377858002406739, "grad_norm": 7.025494575500488, "learning_rate": 0.00019929700808398053, "loss": 1.955, "step": 36380 }, { "epoch": 4.379061371841155, "grad_norm": 8.426220893859863, "learning_rate": 0.00019929655771739336, "loss": 1.9521, "step": 36390 }, { "epoch": 4.380264741275572, "grad_norm": 6.4095048904418945, "learning_rate": 0.00019929610720709955, "loss": 1.682, "step": 36400 }, { "epoch": 4.381468110709988, "grad_norm": 3.4742069244384766, "learning_rate": 0.00019929565655309975, "loss": 1.7903, "step": 36410 }, { "epoch": 4.382671480144404, "grad_norm": 7.236454010009766, "learning_rate": 0.00019929520575539462, "loss": 1.752, "step": 36420 }, { "epoch": 4.38387484957882, "grad_norm": 6.319152355194092, "learning_rate": 0.00019929475481398484, "loss": 1.9301, "step": 36430 }, { "epoch": 4.385078219013237, "grad_norm": 9.045572280883789, "learning_rate": 0.000199294303728871, "loss": 2.134, "step": 36440 }, { "epoch": 4.386281588447654, "grad_norm": 5.281524658203125, "learning_rate": 0.0001992938525000538, "loss": 1.6693, "step": 36450 }, { "epoch": 4.387484957882069, "grad_norm": 7.266637802124023, "learning_rate": 0.00019929340112753385, "loss": 1.9864, "step": 36460 }, { "epoch": 4.388688327316486, "grad_norm": 5.887492656707764, "learning_rate": 0.0001992929496113119, "loss": 1.9689, "step": 36470 }, { "epoch": 4.389891696750903, "grad_norm": 6.876590728759766, "learning_rate": 0.00019929249795138846, "loss": 1.757, "step": 36480 }, { "epoch": 4.3910950661853185, "grad_norm": 7.290596008300781, "learning_rate": 0.00019929204614776432, "loss": 1.8331, "step": 36490 }, { "epoch": 4.392298435619735, "grad_norm": 6.67741060256958, "learning_rate": 0.00019929159420044003, "loss": 1.689, "step": 36500 }, { "epoch": 4.393501805054152, "grad_norm": 3.9267070293426514, "learning_rate": 0.00019929114210941634, "loss": 1.9893, "step": 36510 }, { "epoch": 4.394705174488568, "grad_norm": 18.108749389648438, "learning_rate": 0.00019929068987469383, "loss": 1.6952, "step": 36520 }, { "epoch": 4.395908543922984, "grad_norm": 5.565483093261719, "learning_rate": 0.00019929023749627316, "loss": 1.8553, "step": 36530 }, { "epoch": 4.397111913357401, "grad_norm": 7.668908596038818, "learning_rate": 0.00019928978497415502, "loss": 1.9182, "step": 36540 }, { "epoch": 4.398315282791817, "grad_norm": 5.820605754852295, "learning_rate": 0.00019928933230834006, "loss": 1.7105, "step": 36550 }, { "epoch": 4.3995186522262335, "grad_norm": 4.420670986175537, "learning_rate": 0.00019928887949882887, "loss": 1.8847, "step": 36560 }, { "epoch": 4.40072202166065, "grad_norm": 18.569854736328125, "learning_rate": 0.00019928842654562222, "loss": 1.881, "step": 36570 }, { "epoch": 4.401925391095066, "grad_norm": 9.885371208190918, "learning_rate": 0.00019928797344872067, "loss": 2.0918, "step": 36580 }, { "epoch": 4.403128760529483, "grad_norm": 19.252552032470703, "learning_rate": 0.00019928752020812493, "loss": 1.9647, "step": 36590 }, { "epoch": 4.404332129963899, "grad_norm": 11.048675537109375, "learning_rate": 0.00019928706682383561, "loss": 1.7183, "step": 36600 }, { "epoch": 4.405535499398315, "grad_norm": 8.772344589233398, "learning_rate": 0.00019928661329585344, "loss": 2.0971, "step": 36610 }, { "epoch": 4.406738868832732, "grad_norm": 7.278609752655029, "learning_rate": 0.000199286159624179, "loss": 1.8773, "step": 36620 }, { "epoch": 4.4079422382671485, "grad_norm": 4.808311939239502, "learning_rate": 0.00019928570580881295, "loss": 1.9369, "step": 36630 }, { "epoch": 4.409145607701564, "grad_norm": 12.39403247833252, "learning_rate": 0.00019928525184975598, "loss": 1.6563, "step": 36640 }, { "epoch": 4.410348977135981, "grad_norm": 5.27571439743042, "learning_rate": 0.00019928479774700874, "loss": 1.8594, "step": 36650 }, { "epoch": 4.411552346570397, "grad_norm": 7.558592796325684, "learning_rate": 0.00019928434350057188, "loss": 1.8163, "step": 36660 }, { "epoch": 4.412755716004813, "grad_norm": 8.557501792907715, "learning_rate": 0.00019928388911044607, "loss": 1.9418, "step": 36670 }, { "epoch": 4.41395908543923, "grad_norm": 6.074558258056641, "learning_rate": 0.00019928343457663195, "loss": 2.0838, "step": 36680 }, { "epoch": 4.415162454873646, "grad_norm": 11.97942066192627, "learning_rate": 0.00019928297989913022, "loss": 1.9122, "step": 36690 }, { "epoch": 4.416365824308063, "grad_norm": 6.416935443878174, "learning_rate": 0.00019928252507794147, "loss": 1.756, "step": 36700 }, { "epoch": 4.417569193742479, "grad_norm": 6.5072221755981445, "learning_rate": 0.0001992820701130664, "loss": 2.0011, "step": 36710 }, { "epoch": 4.418772563176895, "grad_norm": 24.140560150146484, "learning_rate": 0.00019928161500450567, "loss": 2.118, "step": 36720 }, { "epoch": 4.419975932611312, "grad_norm": 6.745471000671387, "learning_rate": 0.00019928115975225997, "loss": 1.5276, "step": 36730 }, { "epoch": 4.421179302045728, "grad_norm": 15.678532600402832, "learning_rate": 0.00019928070435632986, "loss": 1.8905, "step": 36740 }, { "epoch": 4.422382671480144, "grad_norm": 6.229355335235596, "learning_rate": 0.00019928024881671607, "loss": 1.8864, "step": 36750 }, { "epoch": 4.423586040914561, "grad_norm": 4.903979301452637, "learning_rate": 0.00019927979313341928, "loss": 2.031, "step": 36760 }, { "epoch": 4.4247894103489775, "grad_norm": 11.69352912902832, "learning_rate": 0.00019927933730644012, "loss": 1.8753, "step": 36770 }, { "epoch": 4.425992779783393, "grad_norm": 8.421667098999023, "learning_rate": 0.00019927888133577918, "loss": 1.8974, "step": 36780 }, { "epoch": 4.42719614921781, "grad_norm": 9.678336143493652, "learning_rate": 0.00019927842522143724, "loss": 2.2166, "step": 36790 }, { "epoch": 4.428399518652226, "grad_norm": 11.020577430725098, "learning_rate": 0.0001992779689634149, "loss": 2.0467, "step": 36800 }, { "epoch": 4.4296028880866425, "grad_norm": 8.437112808227539, "learning_rate": 0.00019927751256171284, "loss": 2.0048, "step": 36810 }, { "epoch": 4.430806257521059, "grad_norm": 10.001437187194824, "learning_rate": 0.0001992770560163317, "loss": 1.8926, "step": 36820 }, { "epoch": 4.432009626955475, "grad_norm": 6.060479640960693, "learning_rate": 0.00019927659932727213, "loss": 1.6919, "step": 36830 }, { "epoch": 4.433212996389892, "grad_norm": 7.661810874938965, "learning_rate": 0.00019927614249453486, "loss": 2.0966, "step": 36840 }, { "epoch": 4.434416365824308, "grad_norm": 6.644181251525879, "learning_rate": 0.00019927568551812046, "loss": 1.8284, "step": 36850 }, { "epoch": 4.435619735258724, "grad_norm": 4.807556629180908, "learning_rate": 0.00019927522839802963, "loss": 1.8337, "step": 36860 }, { "epoch": 4.436823104693141, "grad_norm": 8.891944885253906, "learning_rate": 0.00019927477113426304, "loss": 2.0124, "step": 36870 }, { "epoch": 4.4380264741275575, "grad_norm": 21.717252731323242, "learning_rate": 0.00019927431372682137, "loss": 1.847, "step": 36880 }, { "epoch": 4.439229843561973, "grad_norm": 7.565113067626953, "learning_rate": 0.00019927385617570523, "loss": 1.9442, "step": 36890 }, { "epoch": 4.44043321299639, "grad_norm": 5.530343055725098, "learning_rate": 0.00019927339848091534, "loss": 1.8794, "step": 36900 }, { "epoch": 4.441636582430807, "grad_norm": 5.1620635986328125, "learning_rate": 0.0001992729406424523, "loss": 1.9209, "step": 36910 }, { "epoch": 4.442839951865222, "grad_norm": 33.19838333129883, "learning_rate": 0.0001992724826603168, "loss": 1.8482, "step": 36920 }, { "epoch": 4.444043321299639, "grad_norm": 11.720840454101562, "learning_rate": 0.00019927202453450953, "loss": 1.952, "step": 36930 }, { "epoch": 4.445246690734056, "grad_norm": 8.067503929138184, "learning_rate": 0.00019927156626503115, "loss": 1.9282, "step": 36940 }, { "epoch": 4.4464500601684716, "grad_norm": 6.697203159332275, "learning_rate": 0.00019927110785188226, "loss": 1.8177, "step": 36950 }, { "epoch": 4.447653429602888, "grad_norm": 5.20427131652832, "learning_rate": 0.0001992706492950636, "loss": 1.9685, "step": 36960 }, { "epoch": 4.448856799037305, "grad_norm": 7.286891460418701, "learning_rate": 0.0001992701905945758, "loss": 1.9866, "step": 36970 }, { "epoch": 4.450060168471721, "grad_norm": 6.762622833251953, "learning_rate": 0.0001992697317504195, "loss": 1.9211, "step": 36980 }, { "epoch": 4.451263537906137, "grad_norm": 10.337411880493164, "learning_rate": 0.0001992692727625954, "loss": 2.1011, "step": 36990 }, { "epoch": 4.452466907340553, "grad_norm": 9.371976852416992, "learning_rate": 0.00019926881363110417, "loss": 1.8377, "step": 37000 }, { "epoch": 4.45367027677497, "grad_norm": 6.596066474914551, "learning_rate": 0.00019926835435594645, "loss": 2.0199, "step": 37010 }, { "epoch": 4.4548736462093865, "grad_norm": 11.738470077514648, "learning_rate": 0.0001992678949371229, "loss": 2.0109, "step": 37020 }, { "epoch": 4.456077015643802, "grad_norm": 8.737201690673828, "learning_rate": 0.00019926743537463423, "loss": 2.0224, "step": 37030 }, { "epoch": 4.457280385078219, "grad_norm": 8.715923309326172, "learning_rate": 0.00019926697566848104, "loss": 2.058, "step": 37040 }, { "epoch": 4.458483754512636, "grad_norm": 19.24488067626953, "learning_rate": 0.000199266515818664, "loss": 1.927, "step": 37050 }, { "epoch": 4.4596871239470515, "grad_norm": 7.10844087600708, "learning_rate": 0.00019926605582518386, "loss": 1.9459, "step": 37060 }, { "epoch": 4.460890493381468, "grad_norm": 7.398124694824219, "learning_rate": 0.0001992655956880412, "loss": 1.8815, "step": 37070 }, { "epoch": 4.462093862815885, "grad_norm": 5.712508201599121, "learning_rate": 0.00019926513540723676, "loss": 2.0228, "step": 37080 }, { "epoch": 4.463297232250301, "grad_norm": 14.620765686035156, "learning_rate": 0.00019926467498277111, "loss": 1.7733, "step": 37090 }, { "epoch": 4.464500601684717, "grad_norm": 6.207784175872803, "learning_rate": 0.00019926421441464497, "loss": 1.8915, "step": 37100 }, { "epoch": 4.465703971119134, "grad_norm": 5.447126388549805, "learning_rate": 0.00019926375370285903, "loss": 2.0654, "step": 37110 }, { "epoch": 4.46690734055355, "grad_norm": 25.362512588500977, "learning_rate": 0.0001992632928474139, "loss": 1.9623, "step": 37120 }, { "epoch": 4.4681107099879664, "grad_norm": 5.799778938293457, "learning_rate": 0.0001992628318483103, "loss": 1.993, "step": 37130 }, { "epoch": 4.469314079422382, "grad_norm": 8.49531078338623, "learning_rate": 0.0001992623707055489, "loss": 1.9349, "step": 37140 }, { "epoch": 4.470517448856799, "grad_norm": 12.713287353515625, "learning_rate": 0.0001992619094191303, "loss": 1.9627, "step": 37150 }, { "epoch": 4.471720818291216, "grad_norm": 6.367859363555908, "learning_rate": 0.00019926144798905522, "loss": 1.888, "step": 37160 }, { "epoch": 4.472924187725631, "grad_norm": 8.403925895690918, "learning_rate": 0.0001992609864153243, "loss": 1.9791, "step": 37170 }, { "epoch": 4.474127557160048, "grad_norm": 9.251457214355469, "learning_rate": 0.00019926052469793827, "loss": 1.8487, "step": 37180 }, { "epoch": 4.475330926594465, "grad_norm": 12.146878242492676, "learning_rate": 0.00019926006283689773, "loss": 1.9635, "step": 37190 }, { "epoch": 4.4765342960288805, "grad_norm": 6.026196002960205, "learning_rate": 0.00019925960083220335, "loss": 1.6327, "step": 37200 }, { "epoch": 4.477737665463297, "grad_norm": 7.230823993682861, "learning_rate": 0.00019925913868385587, "loss": 1.9194, "step": 37210 }, { "epoch": 4.478941034897714, "grad_norm": 6.682619571685791, "learning_rate": 0.0001992586763918559, "loss": 1.9382, "step": 37220 }, { "epoch": 4.48014440433213, "grad_norm": 7.898355484008789, "learning_rate": 0.0001992582139562041, "loss": 2.1301, "step": 37230 }, { "epoch": 4.481347773766546, "grad_norm": 16.16246223449707, "learning_rate": 0.00019925775137690118, "loss": 1.9163, "step": 37240 }, { "epoch": 4.482551143200963, "grad_norm": 8.502813339233398, "learning_rate": 0.00019925728865394774, "loss": 1.8153, "step": 37250 }, { "epoch": 4.483754512635379, "grad_norm": 6.817388534545898, "learning_rate": 0.00019925682578734452, "loss": 2.0369, "step": 37260 }, { "epoch": 4.4849578820697955, "grad_norm": 7.099721431732178, "learning_rate": 0.0001992563627770922, "loss": 1.9944, "step": 37270 }, { "epoch": 4.486161251504212, "grad_norm": 4.773013114929199, "learning_rate": 0.0001992558996231914, "loss": 1.8088, "step": 37280 }, { "epoch": 4.487364620938628, "grad_norm": 16.34334945678711, "learning_rate": 0.0001992554363256428, "loss": 1.9865, "step": 37290 }, { "epoch": 4.488567990373045, "grad_norm": 7.298641681671143, "learning_rate": 0.00019925497288444705, "loss": 2.083, "step": 37300 }, { "epoch": 4.489771359807461, "grad_norm": 5.195982933044434, "learning_rate": 0.0001992545092996049, "loss": 2.1238, "step": 37310 }, { "epoch": 4.490974729241877, "grad_norm": 8.375102996826172, "learning_rate": 0.00019925404557111692, "loss": 1.9985, "step": 37320 }, { "epoch": 4.492178098676294, "grad_norm": 6.089810371398926, "learning_rate": 0.0001992535816989839, "loss": 2.0661, "step": 37330 }, { "epoch": 4.49338146811071, "grad_norm": 7.642695903778076, "learning_rate": 0.00019925311768320637, "loss": 1.8794, "step": 37340 }, { "epoch": 4.494584837545126, "grad_norm": 6.981215953826904, "learning_rate": 0.00019925265352378513, "loss": 1.8602, "step": 37350 }, { "epoch": 4.495788206979543, "grad_norm": 4.471064567565918, "learning_rate": 0.00019925218922072077, "loss": 1.947, "step": 37360 }, { "epoch": 4.496991576413959, "grad_norm": 6.985415458679199, "learning_rate": 0.000199251724774014, "loss": 1.8577, "step": 37370 }, { "epoch": 4.498194945848375, "grad_norm": 7.6307549476623535, "learning_rate": 0.00019925126018366545, "loss": 1.8119, "step": 37380 }, { "epoch": 4.499398315282792, "grad_norm": 13.285627365112305, "learning_rate": 0.00019925079544967585, "loss": 1.8662, "step": 37390 }, { "epoch": 4.500601684717208, "grad_norm": 6.505211353302002, "learning_rate": 0.00019925033057204583, "loss": 1.9064, "step": 37400 }, { "epoch": 4.501805054151625, "grad_norm": 9.097518920898438, "learning_rate": 0.0001992498655507761, "loss": 1.8405, "step": 37410 }, { "epoch": 4.503008423586041, "grad_norm": 7.4252424240112305, "learning_rate": 0.00019924940038586729, "loss": 2.1371, "step": 37420 }, { "epoch": 4.504211793020457, "grad_norm": 7.798403739929199, "learning_rate": 0.00019924893507732009, "loss": 1.9207, "step": 37430 }, { "epoch": 4.505415162454874, "grad_norm": 7.5146164894104, "learning_rate": 0.00019924846962513518, "loss": 2.0236, "step": 37440 }, { "epoch": 4.5066185318892895, "grad_norm": 6.379922866821289, "learning_rate": 0.00019924800402931325, "loss": 1.869, "step": 37450 }, { "epoch": 4.507821901323706, "grad_norm": 6.786048412322998, "learning_rate": 0.0001992475382898549, "loss": 2.0661, "step": 37460 }, { "epoch": 4.509025270758123, "grad_norm": 8.681519508361816, "learning_rate": 0.00019924707240676092, "loss": 1.8231, "step": 37470 }, { "epoch": 4.510228640192539, "grad_norm": 4.627808570861816, "learning_rate": 0.00019924660638003192, "loss": 1.9974, "step": 37480 }, { "epoch": 4.511432009626955, "grad_norm": 8.16715145111084, "learning_rate": 0.00019924614020966854, "loss": 2.0107, "step": 37490 }, { "epoch": 4.512635379061372, "grad_norm": 7.240932941436768, "learning_rate": 0.00019924567389567152, "loss": 1.9919, "step": 37500 }, { "epoch": 4.513838748495788, "grad_norm": 4.046568870544434, "learning_rate": 0.00019924520743804148, "loss": 1.8022, "step": 37510 }, { "epoch": 4.5150421179302045, "grad_norm": 8.555015563964844, "learning_rate": 0.00019924474083677916, "loss": 1.8356, "step": 37520 }, { "epoch": 4.516245487364621, "grad_norm": 5.146360397338867, "learning_rate": 0.00019924427409188518, "loss": 1.9993, "step": 37530 }, { "epoch": 4.517448856799037, "grad_norm": 11.656143188476562, "learning_rate": 0.00019924380720336021, "loss": 2.1149, "step": 37540 }, { "epoch": 4.518652226233454, "grad_norm": 12.554299354553223, "learning_rate": 0.000199243340171205, "loss": 1.801, "step": 37550 }, { "epoch": 4.51985559566787, "grad_norm": 7.014607906341553, "learning_rate": 0.00019924287299542012, "loss": 1.7003, "step": 37560 }, { "epoch": 4.521058965102286, "grad_norm": 7.310118675231934, "learning_rate": 0.00019924240567600635, "loss": 1.9469, "step": 37570 }, { "epoch": 4.522262334536703, "grad_norm": 8.424588203430176, "learning_rate": 0.00019924193821296425, "loss": 1.9928, "step": 37580 }, { "epoch": 4.5234657039711195, "grad_norm": 7.748364448547363, "learning_rate": 0.00019924147060629464, "loss": 2.1891, "step": 37590 }, { "epoch": 4.524669073405535, "grad_norm": 7.200401782989502, "learning_rate": 0.00019924100285599808, "loss": 1.7539, "step": 37600 }, { "epoch": 4.525872442839952, "grad_norm": 12.594355583190918, "learning_rate": 0.00019924053496207529, "loss": 1.9731, "step": 37610 }, { "epoch": 4.527075812274369, "grad_norm": 10.205979347229004, "learning_rate": 0.00019924006692452695, "loss": 1.6851, "step": 37620 }, { "epoch": 4.528279181708784, "grad_norm": 6.131598949432373, "learning_rate": 0.0001992395987433537, "loss": 1.7934, "step": 37630 }, { "epoch": 4.529482551143201, "grad_norm": 6.139014720916748, "learning_rate": 0.0001992391304185563, "loss": 1.8358, "step": 37640 }, { "epoch": 4.530685920577618, "grad_norm": 8.550439834594727, "learning_rate": 0.00019923866195013535, "loss": 1.9257, "step": 37650 }, { "epoch": 4.5318892900120336, "grad_norm": 4.324347972869873, "learning_rate": 0.00019923819333809154, "loss": 1.9961, "step": 37660 }, { "epoch": 4.53309265944645, "grad_norm": 8.383240699768066, "learning_rate": 0.0001992377245824256, "loss": 1.8115, "step": 37670 }, { "epoch": 4.534296028880867, "grad_norm": 6.141906261444092, "learning_rate": 0.00019923725568313816, "loss": 2.1539, "step": 37680 }, { "epoch": 4.535499398315283, "grad_norm": 7.333021640777588, "learning_rate": 0.00019923678664022989, "loss": 2.143, "step": 37690 }, { "epoch": 4.536702767749699, "grad_norm": 5.740495204925537, "learning_rate": 0.0001992363174537015, "loss": 1.9818, "step": 37700 }, { "epoch": 4.537906137184115, "grad_norm": 6.611884117126465, "learning_rate": 0.00019923584812355367, "loss": 2.067, "step": 37710 }, { "epoch": 4.539109506618532, "grad_norm": 5.949376583099365, "learning_rate": 0.00019923537864978706, "loss": 1.825, "step": 37720 }, { "epoch": 4.5403128760529485, "grad_norm": 15.834717750549316, "learning_rate": 0.00019923490903240234, "loss": 1.7445, "step": 37730 }, { "epoch": 4.541516245487364, "grad_norm": 10.180578231811523, "learning_rate": 0.00019923443927140022, "loss": 2.1226, "step": 37740 }, { "epoch": 4.542719614921781, "grad_norm": 9.784039497375488, "learning_rate": 0.0001992339693667814, "loss": 1.6301, "step": 37750 }, { "epoch": 4.543922984356198, "grad_norm": 4.267033100128174, "learning_rate": 0.00019923349931854647, "loss": 1.7469, "step": 37760 }, { "epoch": 4.5451263537906135, "grad_norm": 8.615670204162598, "learning_rate": 0.00019923302912669617, "loss": 2.0589, "step": 37770 }, { "epoch": 4.54632972322503, "grad_norm": 4.914862632751465, "learning_rate": 0.0001992325587912312, "loss": 2.0424, "step": 37780 }, { "epoch": 4.547533092659447, "grad_norm": 7.973679065704346, "learning_rate": 0.00019923208831215222, "loss": 1.9179, "step": 37790 }, { "epoch": 4.548736462093863, "grad_norm": 5.358487129211426, "learning_rate": 0.0001992316176894599, "loss": 1.8285, "step": 37800 }, { "epoch": 4.549939831528279, "grad_norm": 4.255433082580566, "learning_rate": 0.00019923114692315492, "loss": 2.0838, "step": 37810 }, { "epoch": 4.551143200962695, "grad_norm": 9.114831924438477, "learning_rate": 0.00019923067601323798, "loss": 1.904, "step": 37820 }, { "epoch": 4.552346570397112, "grad_norm": 5.057202339172363, "learning_rate": 0.00019923020495970977, "loss": 1.9548, "step": 37830 }, { "epoch": 4.5535499398315284, "grad_norm": 11.94949722290039, "learning_rate": 0.00019922973376257095, "loss": 2.0864, "step": 37840 }, { "epoch": 4.554753309265944, "grad_norm": 6.582622528076172, "learning_rate": 0.0001992292624218222, "loss": 2.0782, "step": 37850 }, { "epoch": 4.555956678700361, "grad_norm": 5.160717964172363, "learning_rate": 0.0001992287909374642, "loss": 1.9943, "step": 37860 }, { "epoch": 4.557160048134778, "grad_norm": 8.315377235412598, "learning_rate": 0.00019922831930949762, "loss": 1.8425, "step": 37870 }, { "epoch": 4.558363417569193, "grad_norm": 6.430552959442139, "learning_rate": 0.0001992278475379232, "loss": 1.8823, "step": 37880 }, { "epoch": 4.55956678700361, "grad_norm": 10.638143539428711, "learning_rate": 0.00019922737562274158, "loss": 2.0202, "step": 37890 }, { "epoch": 4.560770156438027, "grad_norm": 7.622200965881348, "learning_rate": 0.00019922690356395344, "loss": 1.7269, "step": 37900 }, { "epoch": 4.5619735258724425, "grad_norm": 5.80261754989624, "learning_rate": 0.0001992264313615595, "loss": 1.9202, "step": 37910 }, { "epoch": 4.563176895306859, "grad_norm": 7.089993953704834, "learning_rate": 0.00019922595901556038, "loss": 1.9562, "step": 37920 }, { "epoch": 4.564380264741276, "grad_norm": 7.092113018035889, "learning_rate": 0.00019922548652595681, "loss": 2.0324, "step": 37930 }, { "epoch": 4.565583634175692, "grad_norm": 16.311973571777344, "learning_rate": 0.00019922501389274945, "loss": 1.856, "step": 37940 }, { "epoch": 4.566787003610108, "grad_norm": 9.96202278137207, "learning_rate": 0.00019922454111593902, "loss": 1.986, "step": 37950 }, { "epoch": 4.567990373044525, "grad_norm": 5.622683048248291, "learning_rate": 0.00019922406819552617, "loss": 1.7719, "step": 37960 }, { "epoch": 4.569193742478941, "grad_norm": 6.116504192352295, "learning_rate": 0.00019922359513151162, "loss": 1.9339, "step": 37970 }, { "epoch": 4.5703971119133575, "grad_norm": 10.435121536254883, "learning_rate": 0.00019922312192389597, "loss": 1.9793, "step": 37980 }, { "epoch": 4.571600481347774, "grad_norm": 8.617453575134277, "learning_rate": 0.00019922264857268003, "loss": 1.8636, "step": 37990 }, { "epoch": 4.57280385078219, "grad_norm": 10.9584379196167, "learning_rate": 0.00019922217507786438, "loss": 1.8736, "step": 38000 }, { "epoch": 4.574007220216607, "grad_norm": 14.49172306060791, "learning_rate": 0.00019922170143944976, "loss": 1.7835, "step": 38010 }, { "epoch": 4.575210589651023, "grad_norm": 7.032172203063965, "learning_rate": 0.00019922122765743682, "loss": 1.9243, "step": 38020 }, { "epoch": 4.576413959085439, "grad_norm": 8.402040481567383, "learning_rate": 0.00019922075373182628, "loss": 1.9136, "step": 38030 }, { "epoch": 4.577617328519856, "grad_norm": 8.604158401489258, "learning_rate": 0.00019922027966261883, "loss": 2.0815, "step": 38040 }, { "epoch": 4.578820697954272, "grad_norm": 5.890580177307129, "learning_rate": 0.00019921980544981515, "loss": 2.0205, "step": 38050 }, { "epoch": 4.580024067388688, "grad_norm": 11.066864967346191, "learning_rate": 0.00019921933109341589, "loss": 1.8682, "step": 38060 }, { "epoch": 4.581227436823105, "grad_norm": 8.122532844543457, "learning_rate": 0.00019921885659342174, "loss": 2.0424, "step": 38070 }, { "epoch": 4.582430806257521, "grad_norm": 8.963947296142578, "learning_rate": 0.00019921838194983344, "loss": 1.7892, "step": 38080 }, { "epoch": 4.583634175691937, "grad_norm": 6.899969577789307, "learning_rate": 0.00019921790716265165, "loss": 2.0055, "step": 38090 }, { "epoch": 4.584837545126354, "grad_norm": 8.297788619995117, "learning_rate": 0.00019921743223187704, "loss": 1.945, "step": 38100 }, { "epoch": 4.58604091456077, "grad_norm": 36.4146842956543, "learning_rate": 0.00019921695715751029, "loss": 2.0239, "step": 38110 }, { "epoch": 4.587244283995187, "grad_norm": 6.881565570831299, "learning_rate": 0.00019921648193955215, "loss": 2.0318, "step": 38120 }, { "epoch": 4.588447653429603, "grad_norm": 6.308768272399902, "learning_rate": 0.00019921600657800322, "loss": 1.8649, "step": 38130 }, { "epoch": 4.589651022864019, "grad_norm": 9.94020938873291, "learning_rate": 0.00019921553107286427, "loss": 1.809, "step": 38140 }, { "epoch": 4.590854392298436, "grad_norm": 6.146694660186768, "learning_rate": 0.0001992150554241359, "loss": 1.9957, "step": 38150 }, { "epoch": 4.5920577617328515, "grad_norm": 8.65699291229248, "learning_rate": 0.0001992145796318189, "loss": 1.8722, "step": 38160 }, { "epoch": 4.593261131167268, "grad_norm": 8.434221267700195, "learning_rate": 0.0001992141036959139, "loss": 2.0671, "step": 38170 }, { "epoch": 4.594464500601685, "grad_norm": 13.314875602722168, "learning_rate": 0.0001992136276164216, "loss": 1.8473, "step": 38180 }, { "epoch": 4.595667870036101, "grad_norm": 9.386907577514648, "learning_rate": 0.00019921315139334266, "loss": 2.1, "step": 38190 }, { "epoch": 4.596871239470517, "grad_norm": 5.242396354675293, "learning_rate": 0.00019921267502667783, "loss": 1.9444, "step": 38200 }, { "epoch": 4.598074608904934, "grad_norm": 4.145884990692139, "learning_rate": 0.00019921219851642773, "loss": 1.947, "step": 38210 }, { "epoch": 4.59927797833935, "grad_norm": 6.597233772277832, "learning_rate": 0.0001992117218625931, "loss": 1.8639, "step": 38220 }, { "epoch": 4.6004813477737665, "grad_norm": 5.497170448303223, "learning_rate": 0.00019921124506517462, "loss": 1.9504, "step": 38230 }, { "epoch": 4.601684717208183, "grad_norm": 7.455959796905518, "learning_rate": 0.00019921076812417298, "loss": 2.058, "step": 38240 }, { "epoch": 4.602888086642599, "grad_norm": 5.906527996063232, "learning_rate": 0.00019921029103958882, "loss": 1.7151, "step": 38250 }, { "epoch": 4.604091456077016, "grad_norm": 4.899425983428955, "learning_rate": 0.00019920981381142292, "loss": 2.1783, "step": 38260 }, { "epoch": 4.605294825511432, "grad_norm": 7.4382100105285645, "learning_rate": 0.00019920933643967592, "loss": 1.9446, "step": 38270 }, { "epoch": 4.606498194945848, "grad_norm": 5.704405784606934, "learning_rate": 0.0001992088589243485, "loss": 1.9866, "step": 38280 }, { "epoch": 4.607701564380265, "grad_norm": 18.863250732421875, "learning_rate": 0.00019920838126544137, "loss": 1.9098, "step": 38290 }, { "epoch": 4.6089049338146815, "grad_norm": 6.587116241455078, "learning_rate": 0.0001992079034629552, "loss": 1.8921, "step": 38300 }, { "epoch": 4.610108303249097, "grad_norm": 5.117888450622559, "learning_rate": 0.00019920742551689073, "loss": 2.1543, "step": 38310 }, { "epoch": 4.611311672683514, "grad_norm": 9.064175605773926, "learning_rate": 0.0001992069474272486, "loss": 1.9865, "step": 38320 }, { "epoch": 4.612515042117931, "grad_norm": 5.347578525543213, "learning_rate": 0.00019920646919402958, "loss": 2.0254, "step": 38330 }, { "epoch": 4.613718411552346, "grad_norm": 7.340710639953613, "learning_rate": 0.00019920599081723426, "loss": 2.1486, "step": 38340 }, { "epoch": 4.614921780986763, "grad_norm": 6.411147594451904, "learning_rate": 0.00019920551229686337, "loss": 2.0321, "step": 38350 }, { "epoch": 4.61612515042118, "grad_norm": 8.462505340576172, "learning_rate": 0.0001992050336329176, "loss": 2.2851, "step": 38360 }, { "epoch": 4.617328519855596, "grad_norm": 14.334405899047852, "learning_rate": 0.0001992045548253977, "loss": 2.1559, "step": 38370 }, { "epoch": 4.618531889290012, "grad_norm": 5.688232898712158, "learning_rate": 0.0001992040758743043, "loss": 2.036, "step": 38380 }, { "epoch": 4.619735258724428, "grad_norm": 7.32138204574585, "learning_rate": 0.0001992035967796381, "loss": 2.0482, "step": 38390 }, { "epoch": 4.620938628158845, "grad_norm": 9.042555809020996, "learning_rate": 0.0001992031175413998, "loss": 1.8062, "step": 38400 }, { "epoch": 4.622141997593261, "grad_norm": 5.123806953430176, "learning_rate": 0.00019920263815959008, "loss": 2.0217, "step": 38410 }, { "epoch": 4.623345367027677, "grad_norm": 7.053982734680176, "learning_rate": 0.00019920215863420969, "loss": 2.0229, "step": 38420 }, { "epoch": 4.624548736462094, "grad_norm": 8.836836814880371, "learning_rate": 0.00019920167896525927, "loss": 1.9295, "step": 38430 }, { "epoch": 4.6257521058965105, "grad_norm": 7.976781368255615, "learning_rate": 0.00019920119915273953, "loss": 1.9316, "step": 38440 }, { "epoch": 4.626955475330926, "grad_norm": 6.514922142028809, "learning_rate": 0.00019920071919665114, "loss": 1.9108, "step": 38450 }, { "epoch": 4.628158844765343, "grad_norm": 5.870746612548828, "learning_rate": 0.00019920023909699482, "loss": 1.8907, "step": 38460 }, { "epoch": 4.62936221419976, "grad_norm": 7.147825717926025, "learning_rate": 0.0001991997588537713, "loss": 2.0451, "step": 38470 }, { "epoch": 4.6305655836341755, "grad_norm": 15.836055755615234, "learning_rate": 0.00019919927846698122, "loss": 1.8581, "step": 38480 }, { "epoch": 4.631768953068592, "grad_norm": 8.948719024658203, "learning_rate": 0.00019919879793662526, "loss": 1.9407, "step": 38490 }, { "epoch": 4.632972322503008, "grad_norm": 13.253113746643066, "learning_rate": 0.00019919831726270419, "loss": 2.1827, "step": 38500 }, { "epoch": 4.634175691937425, "grad_norm": 5.759546756744385, "learning_rate": 0.00019919783644521864, "loss": 1.9114, "step": 38510 }, { "epoch": 4.635379061371841, "grad_norm": 9.337198257446289, "learning_rate": 0.00019919735548416933, "loss": 1.9372, "step": 38520 }, { "epoch": 4.636582430806257, "grad_norm": 9.921058654785156, "learning_rate": 0.00019919687437955697, "loss": 1.9241, "step": 38530 }, { "epoch": 4.637785800240674, "grad_norm": 10.45384407043457, "learning_rate": 0.00019919639313138222, "loss": 2.0231, "step": 38540 }, { "epoch": 4.6389891696750905, "grad_norm": 8.505849838256836, "learning_rate": 0.00019919591173964582, "loss": 1.7725, "step": 38550 }, { "epoch": 4.640192539109506, "grad_norm": 7.61210823059082, "learning_rate": 0.00019919543020434843, "loss": 1.8658, "step": 38560 }, { "epoch": 4.641395908543923, "grad_norm": 6.48394250869751, "learning_rate": 0.0001991949485254908, "loss": 1.9097, "step": 38570 }, { "epoch": 4.64259927797834, "grad_norm": 7.609060287475586, "learning_rate": 0.00019919446670307354, "loss": 1.8554, "step": 38580 }, { "epoch": 4.643802647412755, "grad_norm": 8.147257804870605, "learning_rate": 0.00019919398473709744, "loss": 1.9777, "step": 38590 }, { "epoch": 4.645006016847172, "grad_norm": 10.474865913391113, "learning_rate": 0.00019919350262756312, "loss": 1.768, "step": 38600 }, { "epoch": 4.646209386281589, "grad_norm": 5.379399299621582, "learning_rate": 0.0001991930203744713, "loss": 2.0159, "step": 38610 }, { "epoch": 4.6474127557160045, "grad_norm": 6.453421592712402, "learning_rate": 0.00019919253797782274, "loss": 1.8079, "step": 38620 }, { "epoch": 4.648616125150421, "grad_norm": 4.304332733154297, "learning_rate": 0.00019919205543761806, "loss": 2.0221, "step": 38630 }, { "epoch": 4.649819494584838, "grad_norm": 11.97105884552002, "learning_rate": 0.00019919157275385797, "loss": 1.8953, "step": 38640 }, { "epoch": 4.651022864019254, "grad_norm": 6.9134297370910645, "learning_rate": 0.0001991910899265432, "loss": 2.0701, "step": 38650 }, { "epoch": 4.65222623345367, "grad_norm": 5.144277572631836, "learning_rate": 0.0001991906069556744, "loss": 2.0459, "step": 38660 }, { "epoch": 4.653429602888087, "grad_norm": 13.8529634475708, "learning_rate": 0.00019919012384125235, "loss": 2.0014, "step": 38670 }, { "epoch": 4.654632972322503, "grad_norm": 4.78397274017334, "learning_rate": 0.0001991896405832777, "loss": 2.0482, "step": 38680 }, { "epoch": 4.6558363417569195, "grad_norm": 12.549687385559082, "learning_rate": 0.00019918915718175113, "loss": 1.9872, "step": 38690 }, { "epoch": 4.657039711191336, "grad_norm": 5.540886402130127, "learning_rate": 0.00019918867363667337, "loss": 2.0857, "step": 38700 }, { "epoch": 4.658243080625752, "grad_norm": 12.373787879943848, "learning_rate": 0.0001991881899480451, "loss": 2.0306, "step": 38710 }, { "epoch": 4.659446450060169, "grad_norm": 11.565423965454102, "learning_rate": 0.00019918770611586703, "loss": 1.8649, "step": 38720 }, { "epoch": 4.6606498194945845, "grad_norm": 5.739513397216797, "learning_rate": 0.0001991872221401399, "loss": 2.0518, "step": 38730 }, { "epoch": 4.661853188929001, "grad_norm": 15.935868263244629, "learning_rate": 0.00019918673802086435, "loss": 2.1491, "step": 38740 }, { "epoch": 4.663056558363418, "grad_norm": 5.813896656036377, "learning_rate": 0.00019918625375804107, "loss": 1.8102, "step": 38750 }, { "epoch": 4.664259927797834, "grad_norm": 8.6066312789917, "learning_rate": 0.0001991857693516708, "loss": 2.1178, "step": 38760 }, { "epoch": 4.66546329723225, "grad_norm": 13.907723426818848, "learning_rate": 0.00019918528480175428, "loss": 1.9506, "step": 38770 }, { "epoch": 4.666666666666667, "grad_norm": 4.396481513977051, "learning_rate": 0.0001991848001082921, "loss": 1.8585, "step": 38780 }, { "epoch": 4.667870036101083, "grad_norm": 12.915420532226562, "learning_rate": 0.00019918431527128508, "loss": 1.9045, "step": 38790 }, { "epoch": 4.669073405535499, "grad_norm": 7.082880973815918, "learning_rate": 0.00019918383029073384, "loss": 1.8275, "step": 38800 }, { "epoch": 4.670276774969916, "grad_norm": 8.76838207244873, "learning_rate": 0.00019918334516663913, "loss": 1.9593, "step": 38810 }, { "epoch": 4.671480144404332, "grad_norm": 7.5392231941223145, "learning_rate": 0.00019918285989900162, "loss": 2.0901, "step": 38820 }, { "epoch": 4.672683513838749, "grad_norm": 9.134536743164062, "learning_rate": 0.00019918237448782202, "loss": 1.8903, "step": 38830 }, { "epoch": 4.673886883273164, "grad_norm": 10.274060249328613, "learning_rate": 0.000199181888933101, "loss": 1.8829, "step": 38840 }, { "epoch": 4.675090252707581, "grad_norm": 6.194350719451904, "learning_rate": 0.00019918140323483936, "loss": 1.9095, "step": 38850 }, { "epoch": 4.676293622141998, "grad_norm": 5.424354553222656, "learning_rate": 0.00019918091739303773, "loss": 1.8485, "step": 38860 }, { "epoch": 4.6774969915764135, "grad_norm": 13.536675453186035, "learning_rate": 0.0001991804314076968, "loss": 1.8535, "step": 38870 }, { "epoch": 4.67870036101083, "grad_norm": 5.463991641998291, "learning_rate": 0.00019917994527881734, "loss": 1.7198, "step": 38880 }, { "epoch": 4.679903730445247, "grad_norm": 8.189309120178223, "learning_rate": 0.00019917945900639999, "loss": 2.0346, "step": 38890 }, { "epoch": 4.681107099879663, "grad_norm": 6.468610763549805, "learning_rate": 0.00019917897259044546, "loss": 1.8921, "step": 38900 }, { "epoch": 4.682310469314079, "grad_norm": 3.821913242340088, "learning_rate": 0.0001991784860309545, "loss": 1.9713, "step": 38910 }, { "epoch": 4.683513838748496, "grad_norm": 5.608883857727051, "learning_rate": 0.00019917799932792776, "loss": 2.2028, "step": 38920 }, { "epoch": 4.684717208182912, "grad_norm": 21.48900604248047, "learning_rate": 0.00019917751248136597, "loss": 1.8266, "step": 38930 }, { "epoch": 4.6859205776173285, "grad_norm": 8.551660537719727, "learning_rate": 0.00019917702549126983, "loss": 1.8584, "step": 38940 }, { "epoch": 4.687123947051745, "grad_norm": 6.141820907592773, "learning_rate": 0.0001991765383576401, "loss": 1.8196, "step": 38950 }, { "epoch": 4.688327316486161, "grad_norm": 6.991450309753418, "learning_rate": 0.00019917605108047738, "loss": 1.8017, "step": 38960 }, { "epoch": 4.689530685920578, "grad_norm": 8.62569808959961, "learning_rate": 0.00019917556365978243, "loss": 1.9349, "step": 38970 }, { "epoch": 4.690734055354994, "grad_norm": 5.824770927429199, "learning_rate": 0.00019917507609555597, "loss": 1.9892, "step": 38980 }, { "epoch": 4.69193742478941, "grad_norm": 10.973548889160156, "learning_rate": 0.00019917458838779868, "loss": 2.1287, "step": 38990 }, { "epoch": 4.693140794223827, "grad_norm": 8.601175308227539, "learning_rate": 0.00019917410053651126, "loss": 1.9457, "step": 39000 }, { "epoch": 4.6943441636582435, "grad_norm": 6.339250564575195, "learning_rate": 0.00019917361254169448, "loss": 1.9717, "step": 39010 }, { "epoch": 4.695547533092659, "grad_norm": 10.602438926696777, "learning_rate": 0.00019917312440334898, "loss": 1.9831, "step": 39020 }, { "epoch": 4.696750902527076, "grad_norm": 4.155811786651611, "learning_rate": 0.00019917263612147546, "loss": 1.7227, "step": 39030 }, { "epoch": 4.697954271961493, "grad_norm": 9.204297065734863, "learning_rate": 0.00019917214769607467, "loss": 1.9866, "step": 39040 }, { "epoch": 4.699157641395908, "grad_norm": 10.78105640411377, "learning_rate": 0.00019917165912714725, "loss": 2.1162, "step": 39050 }, { "epoch": 4.700361010830325, "grad_norm": 5.022622585296631, "learning_rate": 0.00019917117041469402, "loss": 1.9013, "step": 39060 }, { "epoch": 4.701564380264742, "grad_norm": 8.880290031433105, "learning_rate": 0.00019917068155871559, "loss": 1.7069, "step": 39070 }, { "epoch": 4.702767749699158, "grad_norm": 6.2118377685546875, "learning_rate": 0.0001991701925592127, "loss": 1.9699, "step": 39080 }, { "epoch": 4.703971119133574, "grad_norm": 7.420884609222412, "learning_rate": 0.00019916970341618607, "loss": 2.1992, "step": 39090 }, { "epoch": 4.70517448856799, "grad_norm": 6.072713375091553, "learning_rate": 0.0001991692141296364, "loss": 1.9586, "step": 39100 }, { "epoch": 4.706377858002407, "grad_norm": 4.617374897003174, "learning_rate": 0.00019916872469956438, "loss": 1.9509, "step": 39110 }, { "epoch": 4.707581227436823, "grad_norm": 5.9688334465026855, "learning_rate": 0.00019916823512597072, "loss": 2.0487, "step": 39120 }, { "epoch": 4.708784596871239, "grad_norm": 9.01282787322998, "learning_rate": 0.00019916774540885615, "loss": 1.9915, "step": 39130 }, { "epoch": 4.709987966305656, "grad_norm": 8.382208824157715, "learning_rate": 0.00019916725554822138, "loss": 1.943, "step": 39140 }, { "epoch": 4.7111913357400725, "grad_norm": 6.859579563140869, "learning_rate": 0.00019916676554406707, "loss": 1.8657, "step": 39150 }, { "epoch": 4.712394705174488, "grad_norm": 10.082171440124512, "learning_rate": 0.000199166275396394, "loss": 1.9387, "step": 39160 }, { "epoch": 4.713598074608905, "grad_norm": 5.954122066497803, "learning_rate": 0.00019916578510520286, "loss": 1.9521, "step": 39170 }, { "epoch": 4.714801444043322, "grad_norm": 8.008260726928711, "learning_rate": 0.0001991652946704943, "loss": 2.1352, "step": 39180 }, { "epoch": 4.7160048134777375, "grad_norm": 9.707684516906738, "learning_rate": 0.0001991648040922691, "loss": 1.7714, "step": 39190 }, { "epoch": 4.717208182912154, "grad_norm": 5.941680908203125, "learning_rate": 0.00019916431337052798, "loss": 2.0328, "step": 39200 }, { "epoch": 4.71841155234657, "grad_norm": 4.283074378967285, "learning_rate": 0.00019916382250527158, "loss": 2.0465, "step": 39210 }, { "epoch": 4.719614921780987, "grad_norm": 5.88446569442749, "learning_rate": 0.00019916333149650064, "loss": 2.009, "step": 39220 }, { "epoch": 4.720818291215403, "grad_norm": 14.850969314575195, "learning_rate": 0.0001991628403442159, "loss": 2.07, "step": 39230 }, { "epoch": 4.722021660649819, "grad_norm": 7.3254804611206055, "learning_rate": 0.000199162349048418, "loss": 1.8446, "step": 39240 }, { "epoch": 4.723225030084236, "grad_norm": 10.557574272155762, "learning_rate": 0.00019916185760910775, "loss": 2.0767, "step": 39250 }, { "epoch": 4.7244283995186525, "grad_norm": 6.993002891540527, "learning_rate": 0.0001991613660262858, "loss": 2.117, "step": 39260 }, { "epoch": 4.725631768953068, "grad_norm": 16.8297176361084, "learning_rate": 0.00019916087429995287, "loss": 2.1953, "step": 39270 }, { "epoch": 4.726835138387485, "grad_norm": 5.601232528686523, "learning_rate": 0.00019916038243010967, "loss": 1.9631, "step": 39280 }, { "epoch": 4.728038507821902, "grad_norm": 7.897584438323975, "learning_rate": 0.00019915989041675692, "loss": 2.111, "step": 39290 }, { "epoch": 4.729241877256317, "grad_norm": 16.734622955322266, "learning_rate": 0.0001991593982598953, "loss": 2.097, "step": 39300 }, { "epoch": 4.730445246690734, "grad_norm": 10.241059303283691, "learning_rate": 0.00019915890595952559, "loss": 2.124, "step": 39310 }, { "epoch": 4.731648616125151, "grad_norm": 8.126265525817871, "learning_rate": 0.00019915841351564843, "loss": 1.8868, "step": 39320 }, { "epoch": 4.7328519855595665, "grad_norm": 8.909639358520508, "learning_rate": 0.00019915792092826457, "loss": 1.9609, "step": 39330 }, { "epoch": 4.734055354993983, "grad_norm": 8.8421049118042, "learning_rate": 0.0001991574281973747, "loss": 2.2227, "step": 39340 }, { "epoch": 4.7352587244284, "grad_norm": 10.465092658996582, "learning_rate": 0.00019915693532297958, "loss": 1.8981, "step": 39350 }, { "epoch": 4.736462093862816, "grad_norm": 6.22850227355957, "learning_rate": 0.00019915644230507988, "loss": 1.9493, "step": 39360 }, { "epoch": 4.737665463297232, "grad_norm": 7.862131118774414, "learning_rate": 0.00019915594914367635, "loss": 1.8518, "step": 39370 }, { "epoch": 4.738868832731649, "grad_norm": 5.224724769592285, "learning_rate": 0.00019915545583876966, "loss": 2.1733, "step": 39380 }, { "epoch": 4.740072202166065, "grad_norm": 7.3520917892456055, "learning_rate": 0.00019915496239036054, "loss": 1.781, "step": 39390 }, { "epoch": 4.7412755716004815, "grad_norm": 13.000665664672852, "learning_rate": 0.0001991544687984497, "loss": 1.8869, "step": 39400 }, { "epoch": 4.742478941034898, "grad_norm": 6.38198184967041, "learning_rate": 0.0001991539750630379, "loss": 1.9835, "step": 39410 }, { "epoch": 4.743682310469314, "grad_norm": 8.315906524658203, "learning_rate": 0.00019915348118412578, "loss": 1.8297, "step": 39420 }, { "epoch": 4.744885679903731, "grad_norm": 8.645343780517578, "learning_rate": 0.0001991529871617141, "loss": 1.8701, "step": 39430 }, { "epoch": 4.7460890493381465, "grad_norm": 7.214653491973877, "learning_rate": 0.0001991524929958036, "loss": 2.0204, "step": 39440 }, { "epoch": 4.747292418772563, "grad_norm": 6.126239776611328, "learning_rate": 0.00019915199868639493, "loss": 2.0778, "step": 39450 }, { "epoch": 4.74849578820698, "grad_norm": 6.310642242431641, "learning_rate": 0.00019915150423348882, "loss": 2.0158, "step": 39460 }, { "epoch": 4.749699157641396, "grad_norm": 6.208978652954102, "learning_rate": 0.00019915100963708606, "loss": 1.801, "step": 39470 }, { "epoch": 4.750902527075812, "grad_norm": 4.734537601470947, "learning_rate": 0.00019915051489718727, "loss": 1.965, "step": 39480 }, { "epoch": 4.752105896510229, "grad_norm": 7.569093227386475, "learning_rate": 0.0001991500200137932, "loss": 2.1077, "step": 39490 }, { "epoch": 4.753309265944645, "grad_norm": 12.631699562072754, "learning_rate": 0.0001991495249869046, "loss": 1.9071, "step": 39500 }, { "epoch": 4.754512635379061, "grad_norm": 5.180163383483887, "learning_rate": 0.00019914902981652214, "loss": 2.1518, "step": 39510 }, { "epoch": 4.755716004813478, "grad_norm": 14.124245643615723, "learning_rate": 0.00019914853450264655, "loss": 2.0155, "step": 39520 }, { "epoch": 4.756919374247894, "grad_norm": 4.914310932159424, "learning_rate": 0.00019914803904527858, "loss": 2.0019, "step": 39530 }, { "epoch": 4.758122743682311, "grad_norm": 8.773962020874023, "learning_rate": 0.0001991475434444189, "loss": 2.1427, "step": 39540 }, { "epoch": 4.759326113116726, "grad_norm": 6.844643592834473, "learning_rate": 0.00019914704770006826, "loss": 1.9901, "step": 39550 }, { "epoch": 4.760529482551143, "grad_norm": 5.214836597442627, "learning_rate": 0.00019914655181222735, "loss": 2.2465, "step": 39560 }, { "epoch": 4.76173285198556, "grad_norm": 7.27862024307251, "learning_rate": 0.0001991460557808969, "loss": 2.0193, "step": 39570 }, { "epoch": 4.7629362214199755, "grad_norm": 6.350466728210449, "learning_rate": 0.00019914555960607761, "loss": 1.9277, "step": 39580 }, { "epoch": 4.764139590854392, "grad_norm": 8.133624076843262, "learning_rate": 0.00019914506328777023, "loss": 1.8919, "step": 39590 }, { "epoch": 4.765342960288809, "grad_norm": 5.769651412963867, "learning_rate": 0.00019914456682597548, "loss": 1.8878, "step": 39600 }, { "epoch": 4.766546329723225, "grad_norm": 7.046748161315918, "learning_rate": 0.00019914407022069406, "loss": 2.029, "step": 39610 }, { "epoch": 4.767749699157641, "grad_norm": 14.47966194152832, "learning_rate": 0.0001991435734719267, "loss": 2.1774, "step": 39620 }, { "epoch": 4.768953068592058, "grad_norm": 7.438046455383301, "learning_rate": 0.0001991430765796741, "loss": 2.0883, "step": 39630 }, { "epoch": 4.770156438026474, "grad_norm": 8.632655143737793, "learning_rate": 0.00019914257954393695, "loss": 2.1434, "step": 39640 }, { "epoch": 4.7713598074608905, "grad_norm": 5.609837055206299, "learning_rate": 0.00019914208236471607, "loss": 1.8814, "step": 39650 }, { "epoch": 4.772563176895307, "grad_norm": 4.805448055267334, "learning_rate": 0.0001991415850420121, "loss": 2.1829, "step": 39660 }, { "epoch": 4.773766546329723, "grad_norm": 11.303045272827148, "learning_rate": 0.00019914108757582578, "loss": 1.9826, "step": 39670 }, { "epoch": 4.77496991576414, "grad_norm": 8.081250190734863, "learning_rate": 0.00019914058996615784, "loss": 2.1111, "step": 39680 }, { "epoch": 4.776173285198556, "grad_norm": 12.083354949951172, "learning_rate": 0.00019914009221300898, "loss": 1.934, "step": 39690 }, { "epoch": 4.777376654632972, "grad_norm": 6.836496829986572, "learning_rate": 0.0001991395943163799, "loss": 1.9859, "step": 39700 }, { "epoch": 4.778580024067389, "grad_norm": 5.122470855712891, "learning_rate": 0.00019913909627627138, "loss": 2.0142, "step": 39710 }, { "epoch": 4.7797833935018055, "grad_norm": 7.628266334533691, "learning_rate": 0.00019913859809268412, "loss": 1.9407, "step": 39720 }, { "epoch": 4.780986762936221, "grad_norm": 11.423534393310547, "learning_rate": 0.00019913809976561882, "loss": 2.0385, "step": 39730 }, { "epoch": 4.782190132370638, "grad_norm": 15.587311744689941, "learning_rate": 0.0001991376012950762, "loss": 2.1869, "step": 39740 }, { "epoch": 4.783393501805055, "grad_norm": 5.586488246917725, "learning_rate": 0.00019913710268105702, "loss": 1.9371, "step": 39750 }, { "epoch": 4.78459687123947, "grad_norm": 10.33002758026123, "learning_rate": 0.00019913660392356194, "loss": 2.0048, "step": 39760 }, { "epoch": 4.785800240673887, "grad_norm": 13.47502613067627, "learning_rate": 0.00019913610502259176, "loss": 1.9103, "step": 39770 }, { "epoch": 4.787003610108303, "grad_norm": 11.487744331359863, "learning_rate": 0.00019913560597814716, "loss": 1.9125, "step": 39780 }, { "epoch": 4.78820697954272, "grad_norm": 20.302907943725586, "learning_rate": 0.0001991351067902288, "loss": 2.0052, "step": 39790 }, { "epoch": 4.789410348977136, "grad_norm": 7.953364849090576, "learning_rate": 0.00019913460745883755, "loss": 1.8549, "step": 39800 }, { "epoch": 4.790613718411552, "grad_norm": 6.035819053649902, "learning_rate": 0.000199134107983974, "loss": 2.0331, "step": 39810 }, { "epoch": 4.791817087845969, "grad_norm": 13.217386245727539, "learning_rate": 0.00019913360836563892, "loss": 1.9468, "step": 39820 }, { "epoch": 4.793020457280385, "grad_norm": 6.257696628570557, "learning_rate": 0.00019913310860383306, "loss": 2.05, "step": 39830 }, { "epoch": 4.794223826714801, "grad_norm": 8.752163887023926, "learning_rate": 0.00019913260869855708, "loss": 2.0791, "step": 39840 }, { "epoch": 4.795427196149218, "grad_norm": 8.14365291595459, "learning_rate": 0.00019913210864981175, "loss": 2.0613, "step": 39850 }, { "epoch": 4.7966305655836345, "grad_norm": 4.922191619873047, "learning_rate": 0.0001991316084575978, "loss": 2.0093, "step": 39860 }, { "epoch": 4.79783393501805, "grad_norm": 8.692961692810059, "learning_rate": 0.00019913110812191592, "loss": 2.2555, "step": 39870 }, { "epoch": 4.799037304452467, "grad_norm": 5.166471481323242, "learning_rate": 0.00019913060764276686, "loss": 1.9626, "step": 39880 }, { "epoch": 4.800240673886883, "grad_norm": 9.108636856079102, "learning_rate": 0.00019913010702015137, "loss": 1.9603, "step": 39890 }, { "epoch": 4.8014440433212995, "grad_norm": 10.726107597351074, "learning_rate": 0.0001991296062540701, "loss": 1.9157, "step": 39900 }, { "epoch": 4.802647412755716, "grad_norm": 6.966701984405518, "learning_rate": 0.0001991291053445238, "loss": 1.919, "step": 39910 }, { "epoch": 4.803850782190132, "grad_norm": 28.718181610107422, "learning_rate": 0.00019912860429151325, "loss": 1.9522, "step": 39920 }, { "epoch": 4.805054151624549, "grad_norm": 4.801002502441406, "learning_rate": 0.00019912810309503912, "loss": 1.91, "step": 39930 }, { "epoch": 4.806257521058965, "grad_norm": 10.285552978515625, "learning_rate": 0.00019912760175510213, "loss": 2.0466, "step": 39940 }, { "epoch": 4.807460890493381, "grad_norm": 6.975771427154541, "learning_rate": 0.00019912710027170306, "loss": 1.7739, "step": 39950 }, { "epoch": 4.808664259927798, "grad_norm": 5.462837219238281, "learning_rate": 0.00019912659864484256, "loss": 1.9376, "step": 39960 }, { "epoch": 4.8098676293622145, "grad_norm": 6.584494113922119, "learning_rate": 0.00019912609687452144, "loss": 2.0732, "step": 39970 }, { "epoch": 4.81107099879663, "grad_norm": 9.6201171875, "learning_rate": 0.00019912559496074037, "loss": 1.9854, "step": 39980 }, { "epoch": 4.812274368231047, "grad_norm": 10.575060844421387, "learning_rate": 0.0001991250929035001, "loss": 1.964, "step": 39990 }, { "epoch": 4.813477737665464, "grad_norm": 6.967158317565918, "learning_rate": 0.00019912459070280133, "loss": 1.8992, "step": 40000 }, { "epoch": 4.814681107099879, "grad_norm": 5.697733402252197, "learning_rate": 0.00019912408835864482, "loss": 1.96, "step": 40010 }, { "epoch": 4.815884476534296, "grad_norm": 8.761931419372559, "learning_rate": 0.0001991235858710313, "loss": 2.086, "step": 40020 }, { "epoch": 4.817087845968713, "grad_norm": 8.758502006530762, "learning_rate": 0.00019912308323996142, "loss": 1.8458, "step": 40030 }, { "epoch": 4.8182912154031285, "grad_norm": 13.713561058044434, "learning_rate": 0.000199122580465436, "loss": 2.0011, "step": 40040 }, { "epoch": 4.819494584837545, "grad_norm": 5.9470534324646, "learning_rate": 0.00019912207754745574, "loss": 1.8996, "step": 40050 }, { "epoch": 4.820697954271962, "grad_norm": 8.278471946716309, "learning_rate": 0.00019912157448602136, "loss": 1.9504, "step": 40060 }, { "epoch": 4.821901323706378, "grad_norm": 7.7857346534729, "learning_rate": 0.00019912107128113358, "loss": 1.8783, "step": 40070 }, { "epoch": 4.823104693140794, "grad_norm": 5.615408420562744, "learning_rate": 0.00019912056793279314, "loss": 2.1982, "step": 40080 }, { "epoch": 4.824308062575211, "grad_norm": 9.775245666503906, "learning_rate": 0.00019912006444100077, "loss": 2.0322, "step": 40090 }, { "epoch": 4.825511432009627, "grad_norm": 5.027626037597656, "learning_rate": 0.00019911956080575718, "loss": 1.9425, "step": 40100 }, { "epoch": 4.8267148014440435, "grad_norm": 5.311755180358887, "learning_rate": 0.00019911905702706313, "loss": 2.1683, "step": 40110 }, { "epoch": 4.827918170878459, "grad_norm": 6.125672817230225, "learning_rate": 0.00019911855310491934, "loss": 1.982, "step": 40120 }, { "epoch": 4.829121540312876, "grad_norm": 8.904145240783691, "learning_rate": 0.00019911804903932653, "loss": 1.9801, "step": 40130 }, { "epoch": 4.830324909747293, "grad_norm": 7.311500072479248, "learning_rate": 0.00019911754483028542, "loss": 2.0804, "step": 40140 }, { "epoch": 4.8315282791817085, "grad_norm": 6.035048961639404, "learning_rate": 0.00019911704047779674, "loss": 2.0655, "step": 40150 }, { "epoch": 4.832731648616125, "grad_norm": 7.495687484741211, "learning_rate": 0.00019911653598186124, "loss": 2.0579, "step": 40160 }, { "epoch": 4.833935018050542, "grad_norm": 8.822660446166992, "learning_rate": 0.00019911603134247966, "loss": 1.9424, "step": 40170 }, { "epoch": 4.835138387484958, "grad_norm": 5.27180290222168, "learning_rate": 0.00019911552655965269, "loss": 2.0385, "step": 40180 }, { "epoch": 4.836341756919374, "grad_norm": 8.535886764526367, "learning_rate": 0.00019911502163338107, "loss": 1.9898, "step": 40190 }, { "epoch": 4.837545126353791, "grad_norm": 7.249156951904297, "learning_rate": 0.00019911451656366559, "loss": 1.6896, "step": 40200 }, { "epoch": 4.838748495788207, "grad_norm": 6.474113941192627, "learning_rate": 0.0001991140113505069, "loss": 2.0244, "step": 40210 }, { "epoch": 4.839951865222623, "grad_norm": 6.112491607666016, "learning_rate": 0.00019911350599390577, "loss": 2.0344, "step": 40220 }, { "epoch": 4.841155234657039, "grad_norm": 4.396851062774658, "learning_rate": 0.00019911300049386293, "loss": 1.8916, "step": 40230 }, { "epoch": 4.842358604091456, "grad_norm": 7.9455342292785645, "learning_rate": 0.0001991124948503791, "loss": 2.2096, "step": 40240 }, { "epoch": 4.843561973525873, "grad_norm": 6.555493354797363, "learning_rate": 0.00019911198906345502, "loss": 1.8789, "step": 40250 }, { "epoch": 4.844765342960288, "grad_norm": 4.341710567474365, "learning_rate": 0.00019911148313309145, "loss": 2.1128, "step": 40260 }, { "epoch": 4.845968712394705, "grad_norm": 7.658411502838135, "learning_rate": 0.00019911097705928906, "loss": 1.7827, "step": 40270 }, { "epoch": 4.847172081829122, "grad_norm": 6.984907150268555, "learning_rate": 0.00019911047084204863, "loss": 1.9475, "step": 40280 }, { "epoch": 4.8483754512635375, "grad_norm": 8.7039155960083, "learning_rate": 0.00019910996448137087, "loss": 2.2006, "step": 40290 }, { "epoch": 4.849578820697954, "grad_norm": 6.686063289642334, "learning_rate": 0.00019910945797725654, "loss": 2.1521, "step": 40300 }, { "epoch": 4.850782190132371, "grad_norm": 4.801222801208496, "learning_rate": 0.00019910895132970634, "loss": 2.0962, "step": 40310 }, { "epoch": 4.851985559566787, "grad_norm": 10.462652206420898, "learning_rate": 0.00019910844453872102, "loss": 2.0403, "step": 40320 }, { "epoch": 4.853188929001203, "grad_norm": 8.48914909362793, "learning_rate": 0.00019910793760430134, "loss": 1.9896, "step": 40330 }, { "epoch": 4.85439229843562, "grad_norm": 8.941189765930176, "learning_rate": 0.00019910743052644795, "loss": 2.2719, "step": 40340 }, { "epoch": 4.855595667870036, "grad_norm": 6.038933753967285, "learning_rate": 0.00019910692330516168, "loss": 1.8343, "step": 40350 }, { "epoch": 4.8567990373044525, "grad_norm": 5.75877046585083, "learning_rate": 0.00019910641594044322, "loss": 1.86, "step": 40360 }, { "epoch": 4.858002406738869, "grad_norm": 6.664186000823975, "learning_rate": 0.0001991059084322933, "loss": 2.0437, "step": 40370 }, { "epoch": 4.859205776173285, "grad_norm": 5.103994369506836, "learning_rate": 0.00019910540078071264, "loss": 1.9842, "step": 40380 }, { "epoch": 4.860409145607702, "grad_norm": 8.606149673461914, "learning_rate": 0.00019910489298570204, "loss": 2.1473, "step": 40390 }, { "epoch": 4.861612515042118, "grad_norm": 5.822866439819336, "learning_rate": 0.00019910438504726216, "loss": 1.7962, "step": 40400 }, { "epoch": 4.862815884476534, "grad_norm": 4.304443836212158, "learning_rate": 0.00019910387696539376, "loss": 1.8914, "step": 40410 }, { "epoch": 4.864019253910951, "grad_norm": 6.2228593826293945, "learning_rate": 0.0001991033687400976, "loss": 1.9029, "step": 40420 }, { "epoch": 4.8652226233453675, "grad_norm": 6.532971382141113, "learning_rate": 0.0001991028603713744, "loss": 2.2502, "step": 40430 }, { "epoch": 4.866425992779783, "grad_norm": 7.718857765197754, "learning_rate": 0.0001991023518592249, "loss": 2.063, "step": 40440 }, { "epoch": 4.8676293622142, "grad_norm": 4.780737400054932, "learning_rate": 0.0001991018432036498, "loss": 2.0256, "step": 40450 }, { "epoch": 4.868832731648616, "grad_norm": 4.792891979217529, "learning_rate": 0.00019910133440464992, "loss": 1.886, "step": 40460 }, { "epoch": 4.870036101083032, "grad_norm": 8.56484317779541, "learning_rate": 0.00019910082546222588, "loss": 2.0183, "step": 40470 }, { "epoch": 4.871239470517449, "grad_norm": 5.709110736846924, "learning_rate": 0.0001991003163763785, "loss": 1.916, "step": 40480 }, { "epoch": 4.872442839951865, "grad_norm": 10.676185607910156, "learning_rate": 0.0001990998071471085, "loss": 1.8557, "step": 40490 }, { "epoch": 4.873646209386282, "grad_norm": 8.244970321655273, "learning_rate": 0.00019909929777441661, "loss": 2.0351, "step": 40500 }, { "epoch": 4.874849578820698, "grad_norm": 5.744730472564697, "learning_rate": 0.00019909878825830358, "loss": 1.9496, "step": 40510 }, { "epoch": 4.876052948255114, "grad_norm": 9.303240776062012, "learning_rate": 0.00019909827859877015, "loss": 1.9293, "step": 40520 }, { "epoch": 4.877256317689531, "grad_norm": 8.385213851928711, "learning_rate": 0.00019909776879581701, "loss": 1.8325, "step": 40530 }, { "epoch": 4.878459687123947, "grad_norm": 9.659674644470215, "learning_rate": 0.00019909725884944494, "loss": 2.056, "step": 40540 }, { "epoch": 4.879663056558363, "grad_norm": 7.0810089111328125, "learning_rate": 0.00019909674875965468, "loss": 1.8832, "step": 40550 }, { "epoch": 4.88086642599278, "grad_norm": 4.187409400939941, "learning_rate": 0.00019909623852644695, "loss": 1.8321, "step": 40560 }, { "epoch": 4.882069795427196, "grad_norm": 7.634274005889893, "learning_rate": 0.00019909572814982252, "loss": 1.959, "step": 40570 }, { "epoch": 4.883273164861612, "grad_norm": 5.2782745361328125, "learning_rate": 0.00019909521762978208, "loss": 2.0625, "step": 40580 }, { "epoch": 4.884476534296029, "grad_norm": 7.979857444763184, "learning_rate": 0.00019909470696632638, "loss": 2.0411, "step": 40590 }, { "epoch": 4.885679903730445, "grad_norm": 4.926553249359131, "learning_rate": 0.00019909419615945622, "loss": 1.9154, "step": 40600 }, { "epoch": 4.8868832731648615, "grad_norm": 8.534509658813477, "learning_rate": 0.00019909368520917224, "loss": 2.051, "step": 40610 }, { "epoch": 4.888086642599278, "grad_norm": 10.87283992767334, "learning_rate": 0.00019909317411547525, "loss": 1.9651, "step": 40620 }, { "epoch": 4.889290012033694, "grad_norm": 10.513968467712402, "learning_rate": 0.000199092662878366, "loss": 2.195, "step": 40630 }, { "epoch": 4.890493381468111, "grad_norm": 7.238644123077393, "learning_rate": 0.00019909215149784516, "loss": 2.0604, "step": 40640 }, { "epoch": 4.891696750902527, "grad_norm": 8.014089584350586, "learning_rate": 0.00019909163997391355, "loss": 1.7281, "step": 40650 }, { "epoch": 4.892900120336943, "grad_norm": 12.775314331054688, "learning_rate": 0.00019909112830657186, "loss": 1.9691, "step": 40660 }, { "epoch": 4.89410348977136, "grad_norm": 7.021448135375977, "learning_rate": 0.00019909061649582083, "loss": 1.9499, "step": 40670 }, { "epoch": 4.8953068592057765, "grad_norm": 6.132105350494385, "learning_rate": 0.0001990901045416612, "loss": 2.0732, "step": 40680 }, { "epoch": 4.896510228640192, "grad_norm": 17.236846923828125, "learning_rate": 0.0001990895924440938, "loss": 1.8141, "step": 40690 }, { "epoch": 4.897713598074609, "grad_norm": 8.221872329711914, "learning_rate": 0.00019908908020311922, "loss": 1.8637, "step": 40700 }, { "epoch": 4.898916967509026, "grad_norm": 4.113970756530762, "learning_rate": 0.00019908856781873828, "loss": 2.1051, "step": 40710 }, { "epoch": 4.900120336943441, "grad_norm": 9.410972595214844, "learning_rate": 0.00019908805529095174, "loss": 1.9666, "step": 40720 }, { "epoch": 4.901323706377858, "grad_norm": 5.489748477935791, "learning_rate": 0.0001990875426197603, "loss": 1.8994, "step": 40730 }, { "epoch": 4.902527075812275, "grad_norm": 9.212996482849121, "learning_rate": 0.00019908702980516477, "loss": 2.1255, "step": 40740 }, { "epoch": 4.9037304452466906, "grad_norm": 11.50767707824707, "learning_rate": 0.0001990865168471658, "loss": 1.996, "step": 40750 }, { "epoch": 4.904933814681107, "grad_norm": 4.1979475021362305, "learning_rate": 0.00019908600374576415, "loss": 1.9806, "step": 40760 }, { "epoch": 4.906137184115524, "grad_norm": 5.681236743927002, "learning_rate": 0.00019908549050096067, "loss": 1.9222, "step": 40770 }, { "epoch": 4.90734055354994, "grad_norm": 6.431387901306152, "learning_rate": 0.00019908497711275595, "loss": 1.7968, "step": 40780 }, { "epoch": 4.908543922984356, "grad_norm": 8.566272735595703, "learning_rate": 0.00019908446358115085, "loss": 2.0927, "step": 40790 }, { "epoch": 4.909747292418773, "grad_norm": 5.922589302062988, "learning_rate": 0.00019908394990614606, "loss": 2.1326, "step": 40800 }, { "epoch": 4.910950661853189, "grad_norm": 6.080644607543945, "learning_rate": 0.00019908343608774232, "loss": 2.1035, "step": 40810 }, { "epoch": 4.9121540312876055, "grad_norm": 6.855172634124756, "learning_rate": 0.0001990829221259404, "loss": 1.8538, "step": 40820 }, { "epoch": 4.913357400722021, "grad_norm": 12.062527656555176, "learning_rate": 0.00019908240802074098, "loss": 1.9515, "step": 40830 }, { "epoch": 4.914560770156438, "grad_norm": 8.997509956359863, "learning_rate": 0.0001990818937721449, "loss": 1.8753, "step": 40840 }, { "epoch": 4.915764139590855, "grad_norm": 5.290858268737793, "learning_rate": 0.00019908137938015282, "loss": 1.8015, "step": 40850 }, { "epoch": 4.9169675090252705, "grad_norm": 5.533619403839111, "learning_rate": 0.00019908086484476557, "loss": 2.2231, "step": 40860 }, { "epoch": 4.918170878459687, "grad_norm": 6.96120548248291, "learning_rate": 0.00019908035016598383, "loss": 1.8157, "step": 40870 }, { "epoch": 4.919374247894104, "grad_norm": 11.576964378356934, "learning_rate": 0.00019907983534380838, "loss": 2.0325, "step": 40880 }, { "epoch": 4.92057761732852, "grad_norm": 10.48487663269043, "learning_rate": 0.0001990793203782399, "loss": 2.2996, "step": 40890 }, { "epoch": 4.921780986762936, "grad_norm": 6.319563388824463, "learning_rate": 0.0001990788052692792, "loss": 1.8925, "step": 40900 }, { "epoch": 4.922984356197353, "grad_norm": 18.32505989074707, "learning_rate": 0.00019907829001692703, "loss": 2.0216, "step": 40910 }, { "epoch": 4.924187725631769, "grad_norm": 5.896711826324463, "learning_rate": 0.00019907777462118407, "loss": 1.9168, "step": 40920 }, { "epoch": 4.925391095066185, "grad_norm": 10.592049598693848, "learning_rate": 0.00019907725908205112, "loss": 1.8219, "step": 40930 }, { "epoch": 4.926594464500601, "grad_norm": 17.71052360534668, "learning_rate": 0.00019907674339952894, "loss": 2.0158, "step": 40940 }, { "epoch": 4.927797833935018, "grad_norm": 6.570394992828369, "learning_rate": 0.00019907622757361822, "loss": 2.038, "step": 40950 }, { "epoch": 4.929001203369435, "grad_norm": 7.129170894622803, "learning_rate": 0.00019907571160431977, "loss": 2.0614, "step": 40960 }, { "epoch": 4.93020457280385, "grad_norm": 5.932984828948975, "learning_rate": 0.0001990751954916343, "loss": 2.1398, "step": 40970 }, { "epoch": 4.931407942238267, "grad_norm": 5.241306781768799, "learning_rate": 0.00019907467923556253, "loss": 2.0994, "step": 40980 }, { "epoch": 4.932611311672684, "grad_norm": 8.4271821975708, "learning_rate": 0.00019907416283610526, "loss": 2.0999, "step": 40990 }, { "epoch": 4.9338146811070995, "grad_norm": 20.040708541870117, "learning_rate": 0.0001990736462932632, "loss": 2.1312, "step": 41000 }, { "epoch": 4.935018050541516, "grad_norm": 4.027037143707275, "learning_rate": 0.00019907312960703715, "loss": 1.7862, "step": 41010 }, { "epoch": 4.936221419975933, "grad_norm": 7.310624122619629, "learning_rate": 0.0001990726127774278, "loss": 1.8866, "step": 41020 }, { "epoch": 4.937424789410349, "grad_norm": 18.408870697021484, "learning_rate": 0.0001990720958044359, "loss": 1.8095, "step": 41030 }, { "epoch": 4.938628158844765, "grad_norm": 8.137670516967773, "learning_rate": 0.00019907157868806225, "loss": 1.9862, "step": 41040 }, { "epoch": 4.939831528279182, "grad_norm": 7.439457416534424, "learning_rate": 0.00019907106142830756, "loss": 2.1416, "step": 41050 }, { "epoch": 4.941034897713598, "grad_norm": 10.49796199798584, "learning_rate": 0.00019907054402517254, "loss": 2.075, "step": 41060 }, { "epoch": 4.9422382671480145, "grad_norm": 10.617677688598633, "learning_rate": 0.00019907002647865806, "loss": 2.0176, "step": 41070 }, { "epoch": 4.943441636582431, "grad_norm": 10.57815933227539, "learning_rate": 0.00019906950878876474, "loss": 1.8455, "step": 41080 }, { "epoch": 4.944645006016847, "grad_norm": 11.920673370361328, "learning_rate": 0.0001990689909554934, "loss": 1.879, "step": 41090 }, { "epoch": 4.945848375451264, "grad_norm": 16.81657600402832, "learning_rate": 0.00019906847297884476, "loss": 1.935, "step": 41100 }, { "epoch": 4.94705174488568, "grad_norm": 5.1014323234558105, "learning_rate": 0.00019906795485881957, "loss": 1.8974, "step": 41110 }, { "epoch": 4.948255114320096, "grad_norm": 7.044492244720459, "learning_rate": 0.0001990674365954186, "loss": 1.8451, "step": 41120 }, { "epoch": 4.949458483754513, "grad_norm": 11.909997940063477, "learning_rate": 0.0001990669181886426, "loss": 2.1763, "step": 41130 }, { "epoch": 4.9506618531889295, "grad_norm": 7.357934951782227, "learning_rate": 0.00019906639963849232, "loss": 2.0923, "step": 41140 }, { "epoch": 4.951865222623345, "grad_norm": 9.347342491149902, "learning_rate": 0.00019906588094496848, "loss": 2.0536, "step": 41150 }, { "epoch": 4.953068592057762, "grad_norm": 8.337024688720703, "learning_rate": 0.00019906536210807186, "loss": 2.1872, "step": 41160 }, { "epoch": 4.954271961492178, "grad_norm": 8.844291687011719, "learning_rate": 0.0001990648431278032, "loss": 1.9118, "step": 41170 }, { "epoch": 4.955475330926594, "grad_norm": 5.540093898773193, "learning_rate": 0.00019906432400416325, "loss": 2.1319, "step": 41180 }, { "epoch": 4.956678700361011, "grad_norm": 18.86222267150879, "learning_rate": 0.00019906380473715276, "loss": 2.0275, "step": 41190 }, { "epoch": 4.957882069795427, "grad_norm": 6.521647930145264, "learning_rate": 0.00019906328532677246, "loss": 1.885, "step": 41200 }, { "epoch": 4.959085439229844, "grad_norm": 6.3068437576293945, "learning_rate": 0.0001990627657730232, "loss": 2.165, "step": 41210 }, { "epoch": 4.96028880866426, "grad_norm": 12.06706714630127, "learning_rate": 0.0001990622460759056, "loss": 2.2039, "step": 41220 }, { "epoch": 4.961492178098676, "grad_norm": 8.219376564025879, "learning_rate": 0.00019906172623542048, "loss": 2.0524, "step": 41230 }, { "epoch": 4.962695547533093, "grad_norm": 7.559345245361328, "learning_rate": 0.00019906120625156864, "loss": 1.7926, "step": 41240 }, { "epoch": 4.963898916967509, "grad_norm": 9.300811767578125, "learning_rate": 0.00019906068612435073, "loss": 1.985, "step": 41250 }, { "epoch": 4.965102286401925, "grad_norm": 5.420833110809326, "learning_rate": 0.00019906016585376755, "loss": 2.1751, "step": 41260 }, { "epoch": 4.966305655836342, "grad_norm": 9.66323471069336, "learning_rate": 0.00019905964543981988, "loss": 2.0754, "step": 41270 }, { "epoch": 4.967509025270758, "grad_norm": 7.217255592346191, "learning_rate": 0.0001990591248825084, "loss": 2.0565, "step": 41280 }, { "epoch": 4.968712394705174, "grad_norm": 8.194851875305176, "learning_rate": 0.00019905860418183396, "loss": 1.9816, "step": 41290 }, { "epoch": 4.969915764139591, "grad_norm": 5.018765926361084, "learning_rate": 0.0001990580833377972, "loss": 2.1554, "step": 41300 }, { "epoch": 4.971119133574007, "grad_norm": 5.87050724029541, "learning_rate": 0.000199057562350399, "loss": 2.1388, "step": 41310 }, { "epoch": 4.9723225030084235, "grad_norm": 8.783880233764648, "learning_rate": 0.00019905704121964002, "loss": 1.9649, "step": 41320 }, { "epoch": 4.97352587244284, "grad_norm": 7.442572116851807, "learning_rate": 0.00019905651994552105, "loss": 2.0547, "step": 41330 }, { "epoch": 4.974729241877256, "grad_norm": 20.498619079589844, "learning_rate": 0.00019905599852804284, "loss": 2.2141, "step": 41340 }, { "epoch": 4.975932611311673, "grad_norm": 30.7430362701416, "learning_rate": 0.00019905547696720614, "loss": 2.3052, "step": 41350 }, { "epoch": 4.977135980746089, "grad_norm": 6.986667156219482, "learning_rate": 0.00019905495526301173, "loss": 2.0343, "step": 41360 }, { "epoch": 4.978339350180505, "grad_norm": 8.35204792022705, "learning_rate": 0.0001990544334154603, "loss": 2.0233, "step": 41370 }, { "epoch": 4.979542719614922, "grad_norm": 4.920589447021484, "learning_rate": 0.0001990539114245527, "loss": 1.9176, "step": 41380 }, { "epoch": 4.9807460890493385, "grad_norm": 13.045265197753906, "learning_rate": 0.00019905338929028957, "loss": 2.0661, "step": 41390 }, { "epoch": 4.981949458483754, "grad_norm": 8.587635040283203, "learning_rate": 0.0001990528670126718, "loss": 2.1709, "step": 41400 }, { "epoch": 4.983152827918171, "grad_norm": 9.253944396972656, "learning_rate": 0.00019905234459170003, "loss": 1.8809, "step": 41410 }, { "epoch": 4.984356197352588, "grad_norm": 11.270646095275879, "learning_rate": 0.0001990518220273751, "loss": 2.0994, "step": 41420 }, { "epoch": 4.985559566787003, "grad_norm": 10.309446334838867, "learning_rate": 0.0001990512993196977, "loss": 2.0687, "step": 41430 }, { "epoch": 4.98676293622142, "grad_norm": 64.89546966552734, "learning_rate": 0.00019905077646866862, "loss": 2.2817, "step": 41440 }, { "epoch": 4.987966305655837, "grad_norm": 259.8202209472656, "learning_rate": 0.00019905025347428864, "loss": 2.873, "step": 41450 }, { "epoch": 4.9891696750902526, "grad_norm": 181.38555908203125, "learning_rate": 0.00019904973033655843, "loss": 4.1569, "step": 41460 }, { "epoch": 4.990373044524669, "grad_norm": 199.8432159423828, "learning_rate": 0.00019904920705547885, "loss": 3.7375, "step": 41470 }, { "epoch": 4.991576413959086, "grad_norm": 152.0066680908203, "learning_rate": 0.0001990486836310506, "loss": 4.5042, "step": 41480 }, { "epoch": 4.992779783393502, "grad_norm": 89.15633392333984, "learning_rate": 0.00019904816006327444, "loss": 3.5483, "step": 41490 }, { "epoch": 4.993983152827918, "grad_norm": 111.64482879638672, "learning_rate": 0.00019904763635215117, "loss": 3.1912, "step": 41500 }, { "epoch": 4.995186522262334, "grad_norm": 97.60633087158203, "learning_rate": 0.00019904711249768147, "loss": 3.2804, "step": 41510 }, { "epoch": 4.996389891696751, "grad_norm": 699.0576171875, "learning_rate": 0.0001990465884998662, "loss": 5.176, "step": 41520 }, { "epoch": 4.9975932611311675, "grad_norm": 52.12234878540039, "learning_rate": 0.000199046064358706, "loss": 7.5459, "step": 41530 }, { "epoch": 4.998796630565583, "grad_norm": 246.80284118652344, "learning_rate": 0.00019904554007420174, "loss": 7.5954, "step": 41540 }, { "epoch": 5.0, "grad_norm": 46.441307067871094, "learning_rate": 0.0001990450156463541, "loss": 7.6823, "step": 41550 }, { "epoch": 5.0, "eval_loss": 6.835190296173096, "eval_runtime": 119.0343, "eval_samples_per_second": 62.058, "eval_steps_per_second": 7.762, "step": 41550 }, { "epoch": 5.001203369434417, "grad_norm": 188.66549682617188, "learning_rate": 0.00019904449107516388, "loss": 6.5096, "step": 41560 }, { "epoch": 5.0024067388688325, "grad_norm": 26.782245635986328, "learning_rate": 0.00019904396636063183, "loss": 6.2541, "step": 41570 }, { "epoch": 5.003610108303249, "grad_norm": 90.96510314941406, "learning_rate": 0.0001990434415027587, "loss": 5.5302, "step": 41580 }, { "epoch": 5.004813477737666, "grad_norm": 107.69341278076172, "learning_rate": 0.00019904291650154528, "loss": 5.2539, "step": 41590 }, { "epoch": 5.006016847172082, "grad_norm": 110.74876403808594, "learning_rate": 0.0001990423913569923, "loss": 6.1839, "step": 41600 }, { "epoch": 5.007220216606498, "grad_norm": 84.80052185058594, "learning_rate": 0.0001990418660691005, "loss": 5.2663, "step": 41610 }, { "epoch": 5.008423586040915, "grad_norm": 163.00718688964844, "learning_rate": 0.0001990413406378707, "loss": 5.6696, "step": 41620 }, { "epoch": 5.009626955475331, "grad_norm": 52.932132720947266, "learning_rate": 0.0001990408150633036, "loss": 6.2381, "step": 41630 }, { "epoch": 5.0108303249097474, "grad_norm": 172.20726013183594, "learning_rate": 0.00019904028934540003, "loss": 6.0336, "step": 41640 }, { "epoch": 5.012033694344163, "grad_norm": 148.40406799316406, "learning_rate": 0.00019903976348416066, "loss": 7.1318, "step": 41650 }, { "epoch": 5.01323706377858, "grad_norm": 58.285980224609375, "learning_rate": 0.00019903923747958632, "loss": 7.3503, "step": 41660 }, { "epoch": 5.014440433212997, "grad_norm": 9.146730422973633, "learning_rate": 0.00019903871133167774, "loss": 7.6519, "step": 41670 }, { "epoch": 5.015643802647412, "grad_norm": 390.4382019042969, "learning_rate": 0.00019903818504043573, "loss": 8.1563, "step": 41680 }, { "epoch": 5.016847172081829, "grad_norm": 405.6904296875, "learning_rate": 0.00019903765860586098, "loss": 18.0557, "step": 41690 }, { "epoch": 5.018050541516246, "grad_norm": 552.901611328125, "learning_rate": 0.0001990371320279543, "loss": 8.5311, "step": 41700 }, { "epoch": 5.0192539109506615, "grad_norm": 14.789624214172363, "learning_rate": 0.00019903660530671644, "loss": 8.0326, "step": 41710 }, { "epoch": 5.020457280385078, "grad_norm": 77.91343688964844, "learning_rate": 0.00019903607844214816, "loss": 7.5159, "step": 41720 }, { "epoch": 5.021660649819495, "grad_norm": 217.33673095703125, "learning_rate": 0.0001990355514342502, "loss": 8.4779, "step": 41730 }, { "epoch": 5.022864019253911, "grad_norm": 24.981643676757812, "learning_rate": 0.00019903502428302337, "loss": 7.9218, "step": 41740 }, { "epoch": 5.024067388688327, "grad_norm": 19.880374908447266, "learning_rate": 0.00019903449698846842, "loss": 8.2566, "step": 41750 }, { "epoch": 5.025270758122744, "grad_norm": 9.139537811279297, "learning_rate": 0.00019903396955058607, "loss": 7.8532, "step": 41760 }, { "epoch": 5.02647412755716, "grad_norm": 31.869863510131836, "learning_rate": 0.00019903344196937713, "loss": 7.8622, "step": 41770 }, { "epoch": 5.0276774969915765, "grad_norm": 20.091535568237305, "learning_rate": 0.00019903291424484235, "loss": 7.8264, "step": 41780 }, { "epoch": 5.028880866425993, "grad_norm": 33.73579788208008, "learning_rate": 0.0001990323863769825, "loss": 7.6968, "step": 41790 }, { "epoch": 5.030084235860409, "grad_norm": 208.64324951171875, "learning_rate": 0.00019903185836579832, "loss": 8.0207, "step": 41800 }, { "epoch": 5.031287605294826, "grad_norm": 8.617533683776855, "learning_rate": 0.0001990313302112906, "loss": 8.0253, "step": 41810 }, { "epoch": 5.0324909747292415, "grad_norm": 53.43470764160156, "learning_rate": 0.00019903080191346008, "loss": 7.7598, "step": 41820 }, { "epoch": 5.033694344163658, "grad_norm": 7.555762767791748, "learning_rate": 0.00019903027347230757, "loss": 7.6914, "step": 41830 }, { "epoch": 5.034897713598075, "grad_norm": 10.855253219604492, "learning_rate": 0.0001990297448878338, "loss": 7.8153, "step": 41840 }, { "epoch": 5.036101083032491, "grad_norm": 15.845646858215332, "learning_rate": 0.0001990292161600395, "loss": 7.7012, "step": 41850 }, { "epoch": 5.037304452466907, "grad_norm": 8.219528198242188, "learning_rate": 0.00019902868728892553, "loss": 7.6016, "step": 41860 }, { "epoch": 5.038507821901324, "grad_norm": 16.66413116455078, "learning_rate": 0.00019902815827449257, "loss": 7.435, "step": 41870 }, { "epoch": 5.03971119133574, "grad_norm": 40.152835845947266, "learning_rate": 0.0001990276291167414, "loss": 7.9836, "step": 41880 }, { "epoch": 5.040914560770156, "grad_norm": 16.253328323364258, "learning_rate": 0.00019902709981567282, "loss": 7.6251, "step": 41890 }, { "epoch": 5.042117930204573, "grad_norm": 16.718053817749023, "learning_rate": 0.0001990265703712876, "loss": 7.5786, "step": 41900 }, { "epoch": 5.043321299638989, "grad_norm": 27.236248016357422, "learning_rate": 0.00019902604078358643, "loss": 7.4927, "step": 41910 }, { "epoch": 5.044524669073406, "grad_norm": 9.109397888183594, "learning_rate": 0.0001990255110525702, "loss": 7.4796, "step": 41920 }, { "epoch": 5.045728038507822, "grad_norm": 30.82512855529785, "learning_rate": 0.00019902498117823955, "loss": 7.5358, "step": 41930 }, { "epoch": 5.046931407942238, "grad_norm": 19.24618148803711, "learning_rate": 0.00019902445116059533, "loss": 7.5374, "step": 41940 }, { "epoch": 5.048134777376655, "grad_norm": 8.26912784576416, "learning_rate": 0.00019902392099963826, "loss": 7.5509, "step": 41950 }, { "epoch": 5.049338146811071, "grad_norm": 15.063525199890137, "learning_rate": 0.00019902339069536917, "loss": 7.505, "step": 41960 }, { "epoch": 5.050541516245487, "grad_norm": 6.537913799285889, "learning_rate": 0.00019902286024778873, "loss": 7.538, "step": 41970 }, { "epoch": 5.051744885679904, "grad_norm": 13.247450828552246, "learning_rate": 0.0001990223296568978, "loss": 7.4489, "step": 41980 }, { "epoch": 5.05294825511432, "grad_norm": 88.28585815429688, "learning_rate": 0.0001990217989226971, "loss": 7.6089, "step": 41990 }, { "epoch": 5.054151624548736, "grad_norm": 12.32447624206543, "learning_rate": 0.0001990212680451874, "loss": 7.5718, "step": 42000 }, { "epoch": 5.055354993983153, "grad_norm": 22.057758331298828, "learning_rate": 0.00019902073702436952, "loss": 7.6186, "step": 42010 }, { "epoch": 5.056558363417569, "grad_norm": 12.481575965881348, "learning_rate": 0.00019902020586024414, "loss": 7.5671, "step": 42020 }, { "epoch": 5.0577617328519855, "grad_norm": 16.605792999267578, "learning_rate": 0.00019901967455281209, "loss": 7.4652, "step": 42030 }, { "epoch": 5.058965102286402, "grad_norm": 43.19899368286133, "learning_rate": 0.00019901914310207412, "loss": 7.4846, "step": 42040 }, { "epoch": 5.060168471720818, "grad_norm": 13.148046493530273, "learning_rate": 0.000199018611508031, "loss": 7.4514, "step": 42050 }, { "epoch": 5.061371841155235, "grad_norm": 64.03712463378906, "learning_rate": 0.00019901807977068352, "loss": 7.6252, "step": 42060 }, { "epoch": 5.062575210589651, "grad_norm": 25.0727481842041, "learning_rate": 0.00019901754789003242, "loss": 7.7015, "step": 42070 }, { "epoch": 5.063778580024067, "grad_norm": 18.119224548339844, "learning_rate": 0.00019901701586607848, "loss": 7.7114, "step": 42080 }, { "epoch": 5.064981949458484, "grad_norm": 7.71126127243042, "learning_rate": 0.00019901648369882247, "loss": 7.6586, "step": 42090 }, { "epoch": 5.0661853188929005, "grad_norm": 13.048935890197754, "learning_rate": 0.00019901595138826516, "loss": 7.9082, "step": 42100 }, { "epoch": 5.067388688327316, "grad_norm": 5.803314685821533, "learning_rate": 0.00019901541893440735, "loss": 7.5679, "step": 42110 }, { "epoch": 5.068592057761733, "grad_norm": 4.404675006866455, "learning_rate": 0.00019901488633724976, "loss": 7.6752, "step": 42120 }, { "epoch": 5.06979542719615, "grad_norm": 22.034263610839844, "learning_rate": 0.00019901435359679316, "loss": 7.8712, "step": 42130 }, { "epoch": 5.070998796630565, "grad_norm": 8.816366195678711, "learning_rate": 0.00019901382071303838, "loss": 7.7475, "step": 42140 }, { "epoch": 5.072202166064982, "grad_norm": 5.255917072296143, "learning_rate": 0.00019901328768598612, "loss": 7.6526, "step": 42150 }, { "epoch": 5.073405535499398, "grad_norm": 7.4424543380737305, "learning_rate": 0.00019901275451563724, "loss": 7.5585, "step": 42160 }, { "epoch": 5.074608904933815, "grad_norm": 5.376596450805664, "learning_rate": 0.00019901222120199244, "loss": 7.4951, "step": 42170 }, { "epoch": 5.075812274368231, "grad_norm": 9.995231628417969, "learning_rate": 0.00019901168774505247, "loss": 7.271, "step": 42180 }, { "epoch": 5.077015643802647, "grad_norm": 9.695402145385742, "learning_rate": 0.00019901115414481816, "loss": 7.312, "step": 42190 }, { "epoch": 5.078219013237064, "grad_norm": 14.717586517333984, "learning_rate": 0.0001990106204012903, "loss": 7.44, "step": 42200 }, { "epoch": 5.07942238267148, "grad_norm": 8.688631057739258, "learning_rate": 0.00019901008651446959, "loss": 7.2563, "step": 42210 }, { "epoch": 5.080625752105896, "grad_norm": 6.643031120300293, "learning_rate": 0.0001990095524843569, "loss": 7.275, "step": 42220 }, { "epoch": 5.081829121540313, "grad_norm": 12.19149398803711, "learning_rate": 0.00019900901831095287, "loss": 7.2457, "step": 42230 }, { "epoch": 5.0830324909747295, "grad_norm": 9.875896453857422, "learning_rate": 0.00019900848399425838, "loss": 7.2449, "step": 42240 }, { "epoch": 5.084235860409145, "grad_norm": 8.756649017333984, "learning_rate": 0.00019900794953427414, "loss": 7.1868, "step": 42250 }, { "epoch": 5.085439229843562, "grad_norm": 45.32009506225586, "learning_rate": 0.000199007414931001, "loss": 7.1647, "step": 42260 }, { "epoch": 5.086642599277979, "grad_norm": 10.563359260559082, "learning_rate": 0.00019900688018443968, "loss": 7.2773, "step": 42270 }, { "epoch": 5.0878459687123945, "grad_norm": 5.818143367767334, "learning_rate": 0.0001990063452945909, "loss": 7.0704, "step": 42280 }, { "epoch": 5.089049338146811, "grad_norm": 6.811473369598389, "learning_rate": 0.00019900581026145554, "loss": 6.9841, "step": 42290 }, { "epoch": 5.090252707581228, "grad_norm": 8.492681503295898, "learning_rate": 0.00019900527508503432, "loss": 7.0224, "step": 42300 }, { "epoch": 5.091456077015644, "grad_norm": 6.135493278503418, "learning_rate": 0.00019900473976532806, "loss": 6.8552, "step": 42310 }, { "epoch": 5.09265944645006, "grad_norm": 31.769546508789062, "learning_rate": 0.00019900420430233747, "loss": 7.4176, "step": 42320 }, { "epoch": 5.093862815884476, "grad_norm": 15.368035316467285, "learning_rate": 0.00019900366869606332, "loss": 7.6599, "step": 42330 }, { "epoch": 5.095066185318893, "grad_norm": 20.432085037231445, "learning_rate": 0.00019900313294650648, "loss": 7.6455, "step": 42340 }, { "epoch": 5.0962695547533094, "grad_norm": 8.058140754699707, "learning_rate": 0.00019900259705366762, "loss": 7.5279, "step": 42350 }, { "epoch": 5.097472924187725, "grad_norm": 14.024980545043945, "learning_rate": 0.00019900206101754756, "loss": 7.2253, "step": 42360 }, { "epoch": 5.098676293622142, "grad_norm": 4.082953453063965, "learning_rate": 0.0001990015248381471, "loss": 7.2084, "step": 42370 }, { "epoch": 5.099879663056559, "grad_norm": 10.778286933898926, "learning_rate": 0.00019900098851546698, "loss": 7.2656, "step": 42380 }, { "epoch": 5.101083032490974, "grad_norm": 4.0808491706848145, "learning_rate": 0.000199000452049508, "loss": 7.0583, "step": 42390 }, { "epoch": 5.102286401925391, "grad_norm": 8.811480522155762, "learning_rate": 0.0001989999154402709, "loss": 7.0453, "step": 42400 }, { "epoch": 5.103489771359808, "grad_norm": 14.220723152160645, "learning_rate": 0.00019899937868775647, "loss": 6.9582, "step": 42410 }, { "epoch": 5.1046931407942235, "grad_norm": 6.13651180267334, "learning_rate": 0.00019899884179196554, "loss": 6.9618, "step": 42420 }, { "epoch": 5.10589651022864, "grad_norm": 13.354302406311035, "learning_rate": 0.00019899830475289881, "loss": 6.9541, "step": 42430 }, { "epoch": 5.107099879663057, "grad_norm": 6.9171576499938965, "learning_rate": 0.00019899776757055707, "loss": 6.9258, "step": 42440 }, { "epoch": 5.108303249097473, "grad_norm": 27.351240158081055, "learning_rate": 0.00019899723024494116, "loss": 6.8234, "step": 42450 }, { "epoch": 5.109506618531889, "grad_norm": 39.086952209472656, "learning_rate": 0.0001989966927760518, "loss": 7.5022, "step": 42460 }, { "epoch": 5.110709987966306, "grad_norm": 6.120758056640625, "learning_rate": 0.0001989961551638898, "loss": 7.5716, "step": 42470 }, { "epoch": 5.111913357400722, "grad_norm": 11.316590309143066, "learning_rate": 0.00019899561740845592, "loss": 7.2036, "step": 42480 }, { "epoch": 5.1131167268351385, "grad_norm": 22.515880584716797, "learning_rate": 0.0001989950795097509, "loss": 6.9729, "step": 42490 }, { "epoch": 5.114320096269555, "grad_norm": 26.785188674926758, "learning_rate": 0.00019899454146777562, "loss": 7.164, "step": 42500 }, { "epoch": 5.115523465703971, "grad_norm": 93.84312438964844, "learning_rate": 0.00019899400328253074, "loss": 7.1441, "step": 42510 }, { "epoch": 5.116726835138388, "grad_norm": 59.40557861328125, "learning_rate": 0.00019899346495401714, "loss": 7.0329, "step": 42520 }, { "epoch": 5.1179302045728035, "grad_norm": 15.077173233032227, "learning_rate": 0.00019899292648223554, "loss": 7.3618, "step": 42530 }, { "epoch": 5.11913357400722, "grad_norm": 14.304474830627441, "learning_rate": 0.00019899238786718671, "loss": 7.0873, "step": 42540 }, { "epoch": 5.120336943441637, "grad_norm": 13.122438430786133, "learning_rate": 0.0001989918491088715, "loss": 7.2357, "step": 42550 }, { "epoch": 5.121540312876053, "grad_norm": 58.743526458740234, "learning_rate": 0.00019899131020729063, "loss": 7.1096, "step": 42560 }, { "epoch": 5.122743682310469, "grad_norm": 11.453507423400879, "learning_rate": 0.0001989907711624449, "loss": 7.2142, "step": 42570 }, { "epoch": 5.123947051744886, "grad_norm": 24.48541259765625, "learning_rate": 0.00019899023197433505, "loss": 7.0047, "step": 42580 }, { "epoch": 5.125150421179302, "grad_norm": 18.91883087158203, "learning_rate": 0.00019898969264296192, "loss": 6.9835, "step": 42590 }, { "epoch": 5.126353790613718, "grad_norm": 24.201261520385742, "learning_rate": 0.00019898915316832625, "loss": 7.1037, "step": 42600 }, { "epoch": 5.127557160048135, "grad_norm": 7.234903812408447, "learning_rate": 0.00019898861355042885, "loss": 6.9852, "step": 42610 }, { "epoch": 5.128760529482551, "grad_norm": 9.424560546875, "learning_rate": 0.00019898807378927047, "loss": 6.9081, "step": 42620 }, { "epoch": 5.129963898916968, "grad_norm": 21.928312301635742, "learning_rate": 0.0001989875338848519, "loss": 6.9391, "step": 42630 }, { "epoch": 5.131167268351384, "grad_norm": 14.327727317810059, "learning_rate": 0.000198986993837174, "loss": 6.9213, "step": 42640 }, { "epoch": 5.1323706377858, "grad_norm": 294.3425598144531, "learning_rate": 0.0001989864536462374, "loss": 6.962, "step": 42650 }, { "epoch": 5.133574007220217, "grad_norm": 30.27318572998047, "learning_rate": 0.00019898591331204298, "loss": 7.1761, "step": 42660 }, { "epoch": 5.1347773766546325, "grad_norm": 13.551473617553711, "learning_rate": 0.00019898537283459153, "loss": 7.1354, "step": 42670 }, { "epoch": 5.135980746089049, "grad_norm": 17.305870056152344, "learning_rate": 0.00019898483221388383, "loss": 7.0328, "step": 42680 }, { "epoch": 5.137184115523466, "grad_norm": 462.5865478515625, "learning_rate": 0.00019898429144992057, "loss": 6.912, "step": 42690 }, { "epoch": 5.138387484957882, "grad_norm": 283.6278076171875, "learning_rate": 0.00019898375054270263, "loss": 7.2048, "step": 42700 }, { "epoch": 5.139590854392298, "grad_norm": 13.909567832946777, "learning_rate": 0.00019898320949223076, "loss": 7.026, "step": 42710 }, { "epoch": 5.140794223826715, "grad_norm": 54.53024673461914, "learning_rate": 0.00019898266829850578, "loss": 7.0217, "step": 42720 }, { "epoch": 5.141997593261131, "grad_norm": 82.68240356445312, "learning_rate": 0.0001989821269615284, "loss": 7.0224, "step": 42730 }, { "epoch": 5.1432009626955475, "grad_norm": 41.48139953613281, "learning_rate": 0.0001989815854812995, "loss": 6.9344, "step": 42740 }, { "epoch": 5.144404332129964, "grad_norm": 21.438369750976562, "learning_rate": 0.00019898104385781976, "loss": 7.1404, "step": 42750 }, { "epoch": 5.14560770156438, "grad_norm": 32.261722564697266, "learning_rate": 0.00019898050209109003, "loss": 7.0036, "step": 42760 }, { "epoch": 5.146811070998797, "grad_norm": 28.524431228637695, "learning_rate": 0.00019897996018111106, "loss": 7.0866, "step": 42770 }, { "epoch": 5.148014440433213, "grad_norm": 82.64258575439453, "learning_rate": 0.00019897941812788364, "loss": 7.144, "step": 42780 }, { "epoch": 5.149217809867629, "grad_norm": 13.357208251953125, "learning_rate": 0.00019897887593140861, "loss": 7.4019, "step": 42790 }, { "epoch": 5.150421179302046, "grad_norm": 6.039828777313232, "learning_rate": 0.00019897833359168668, "loss": 7.2395, "step": 42800 }, { "epoch": 5.1516245487364625, "grad_norm": 21.577228546142578, "learning_rate": 0.00019897779110871866, "loss": 7.0816, "step": 42810 }, { "epoch": 5.152827918170878, "grad_norm": 27.31770133972168, "learning_rate": 0.00019897724848250535, "loss": 6.9184, "step": 42820 }, { "epoch": 5.154031287605295, "grad_norm": 24.126413345336914, "learning_rate": 0.00019897670571304753, "loss": 6.9494, "step": 42830 }, { "epoch": 5.155234657039712, "grad_norm": 17.958532333374023, "learning_rate": 0.00019897616280034597, "loss": 7.1084, "step": 42840 }, { "epoch": 5.156438026474127, "grad_norm": 11.24973201751709, "learning_rate": 0.00019897561974440144, "loss": 7.0399, "step": 42850 }, { "epoch": 5.157641395908544, "grad_norm": 7.521182537078857, "learning_rate": 0.0001989750765452148, "loss": 6.9667, "step": 42860 }, { "epoch": 5.15884476534296, "grad_norm": 7.6414642333984375, "learning_rate": 0.00019897453320278676, "loss": 6.9056, "step": 42870 }, { "epoch": 5.160048134777377, "grad_norm": 4.558192253112793, "learning_rate": 0.00019897398971711815, "loss": 6.8337, "step": 42880 }, { "epoch": 5.161251504211793, "grad_norm": 6.217107772827148, "learning_rate": 0.00019897344608820974, "loss": 6.8912, "step": 42890 }, { "epoch": 5.162454873646209, "grad_norm": 8.388609886169434, "learning_rate": 0.0001989729023160623, "loss": 6.7866, "step": 42900 }, { "epoch": 5.163658243080626, "grad_norm": 4.573419570922852, "learning_rate": 0.00019897235840067665, "loss": 6.8123, "step": 42910 }, { "epoch": 5.164861612515042, "grad_norm": 8.137279510498047, "learning_rate": 0.00019897181434205358, "loss": 6.7084, "step": 42920 }, { "epoch": 5.166064981949458, "grad_norm": 9.95292854309082, "learning_rate": 0.00019897127014019384, "loss": 6.8599, "step": 42930 }, { "epoch": 5.167268351383875, "grad_norm": 18.444242477416992, "learning_rate": 0.0001989707257950982, "loss": 6.7955, "step": 42940 }, { "epoch": 5.1684717208182915, "grad_norm": 13.281579971313477, "learning_rate": 0.00019897018130676753, "loss": 6.7585, "step": 42950 }, { "epoch": 5.169675090252707, "grad_norm": 11.055079460144043, "learning_rate": 0.00019896963667520256, "loss": 6.8016, "step": 42960 }, { "epoch": 5.170878459687124, "grad_norm": 5.975857257843018, "learning_rate": 0.0001989690919004041, "loss": 6.8301, "step": 42970 }, { "epoch": 5.172081829121541, "grad_norm": 8.398676872253418, "learning_rate": 0.0001989685469823729, "loss": 6.8602, "step": 42980 }, { "epoch": 5.1732851985559565, "grad_norm": 14.473787307739258, "learning_rate": 0.0001989680019211098, "loss": 6.9072, "step": 42990 }, { "epoch": 5.174488567990373, "grad_norm": 182.6295928955078, "learning_rate": 0.0001989674567166156, "loss": 6.7887, "step": 43000 }, { "epoch": 5.17569193742479, "grad_norm": 13.410714149475098, "learning_rate": 0.00019896691136889102, "loss": 6.8109, "step": 43010 }, { "epoch": 5.176895306859206, "grad_norm": 33.2347526550293, "learning_rate": 0.00019896636587793685, "loss": 6.9216, "step": 43020 }, { "epoch": 5.178098676293622, "grad_norm": 4.994111061096191, "learning_rate": 0.00019896582024375397, "loss": 7.3826, "step": 43030 }, { "epoch": 5.179302045728038, "grad_norm": 10.641875267028809, "learning_rate": 0.0001989652744663431, "loss": 7.2316, "step": 43040 }, { "epoch": 5.180505415162455, "grad_norm": 50.609764099121094, "learning_rate": 0.00019896472854570503, "loss": 7.1885, "step": 43050 }, { "epoch": 5.1817087845968715, "grad_norm": 16.520709991455078, "learning_rate": 0.00019896418248184058, "loss": 6.9104, "step": 43060 }, { "epoch": 5.182912154031287, "grad_norm": 15.513710021972656, "learning_rate": 0.00019896363627475052, "loss": 6.9581, "step": 43070 }, { "epoch": 5.184115523465704, "grad_norm": 16.634061813354492, "learning_rate": 0.00019896308992443565, "loss": 6.8967, "step": 43080 }, { "epoch": 5.185318892900121, "grad_norm": 12.569376945495605, "learning_rate": 0.00019896254343089676, "loss": 7.1891, "step": 43090 }, { "epoch": 5.186522262334536, "grad_norm": 9.628414154052734, "learning_rate": 0.00019896199679413461, "loss": 7.0908, "step": 43100 }, { "epoch": 5.187725631768953, "grad_norm": 16.586896896362305, "learning_rate": 0.00019896145001415004, "loss": 6.8478, "step": 43110 }, { "epoch": 5.18892900120337, "grad_norm": 19.666330337524414, "learning_rate": 0.00019896090309094383, "loss": 6.9485, "step": 43120 }, { "epoch": 5.1901323706377855, "grad_norm": 27.628435134887695, "learning_rate": 0.00019896035602451673, "loss": 6.7844, "step": 43130 }, { "epoch": 5.191335740072202, "grad_norm": 41.259517669677734, "learning_rate": 0.00019895980881486956, "loss": 6.8364, "step": 43140 }, { "epoch": 5.192539109506619, "grad_norm": 25.46467399597168, "learning_rate": 0.00019895926146200313, "loss": 6.7461, "step": 43150 }, { "epoch": 5.193742478941035, "grad_norm": 19.08307456970215, "learning_rate": 0.00019895871396591823, "loss": 6.8297, "step": 43160 }, { "epoch": 5.194945848375451, "grad_norm": 18.73707389831543, "learning_rate": 0.00019895816632661562, "loss": 6.8379, "step": 43170 }, { "epoch": 5.196149217809868, "grad_norm": 11.898574829101562, "learning_rate": 0.00019895761854409614, "loss": 6.7958, "step": 43180 }, { "epoch": 5.197352587244284, "grad_norm": 10.803080558776855, "learning_rate": 0.0001989570706183605, "loss": 6.8844, "step": 43190 }, { "epoch": 5.1985559566787005, "grad_norm": 7.940894603729248, "learning_rate": 0.00019895652254940958, "loss": 6.7806, "step": 43200 }, { "epoch": 5.199759326113116, "grad_norm": 9.130962371826172, "learning_rate": 0.00019895597433724417, "loss": 6.7556, "step": 43210 }, { "epoch": 5.200962695547533, "grad_norm": 6.850533962249756, "learning_rate": 0.000198955425981865, "loss": 6.5902, "step": 43220 }, { "epoch": 5.20216606498195, "grad_norm": 5.628232002258301, "learning_rate": 0.00019895487748327288, "loss": 6.812, "step": 43230 }, { "epoch": 5.2033694344163655, "grad_norm": 17.266263961791992, "learning_rate": 0.00019895432884146864, "loss": 6.7475, "step": 43240 }, { "epoch": 5.204572803850782, "grad_norm": 20.48459243774414, "learning_rate": 0.00019895378005645306, "loss": 6.7628, "step": 43250 }, { "epoch": 5.205776173285199, "grad_norm": 7.41314697265625, "learning_rate": 0.0001989532311282269, "loss": 6.6927, "step": 43260 }, { "epoch": 5.206979542719615, "grad_norm": 7.002894878387451, "learning_rate": 0.00019895268205679103, "loss": 6.8254, "step": 43270 }, { "epoch": 5.208182912154031, "grad_norm": 11.923335075378418, "learning_rate": 0.00019895213284214618, "loss": 6.6697, "step": 43280 }, { "epoch": 5.209386281588448, "grad_norm": 11.946123123168945, "learning_rate": 0.00019895158348429316, "loss": 6.7241, "step": 43290 }, { "epoch": 5.210589651022864, "grad_norm": 13.587684631347656, "learning_rate": 0.00019895103398323279, "loss": 6.7099, "step": 43300 }, { "epoch": 5.21179302045728, "grad_norm": 7.899662017822266, "learning_rate": 0.00019895048433896578, "loss": 6.6365, "step": 43310 }, { "epoch": 5.212996389891697, "grad_norm": 129.24859619140625, "learning_rate": 0.00019894993455149305, "loss": 6.8448, "step": 43320 }, { "epoch": 5.214199759326113, "grad_norm": 29.2088680267334, "learning_rate": 0.0001989493846208153, "loss": 6.9342, "step": 43330 }, { "epoch": 5.21540312876053, "grad_norm": 15.965337753295898, "learning_rate": 0.00019894883454693338, "loss": 6.7689, "step": 43340 }, { "epoch": 5.216606498194946, "grad_norm": 10.934747695922852, "learning_rate": 0.00019894828432984806, "loss": 6.7079, "step": 43350 }, { "epoch": 5.217809867629362, "grad_norm": 8.470879554748535, "learning_rate": 0.00019894773396956017, "loss": 6.8349, "step": 43360 }, { "epoch": 5.219013237063779, "grad_norm": 21.610458374023438, "learning_rate": 0.00019894718346607045, "loss": 6.8553, "step": 43370 }, { "epoch": 5.2202166064981945, "grad_norm": 7.955434322357178, "learning_rate": 0.00019894663281937972, "loss": 6.707, "step": 43380 }, { "epoch": 5.221419975932611, "grad_norm": 24.681379318237305, "learning_rate": 0.0001989460820294888, "loss": 6.7217, "step": 43390 }, { "epoch": 5.222623345367028, "grad_norm": 42.13639831542969, "learning_rate": 0.00019894553109639848, "loss": 6.994, "step": 43400 }, { "epoch": 5.223826714801444, "grad_norm": 91.69528198242188, "learning_rate": 0.00019894498002010952, "loss": 7.0314, "step": 43410 }, { "epoch": 5.22503008423586, "grad_norm": 13.20634651184082, "learning_rate": 0.00019894442880062277, "loss": 7.0666, "step": 43420 }, { "epoch": 5.226233453670277, "grad_norm": 18.00299072265625, "learning_rate": 0.000198943877437939, "loss": 6.9771, "step": 43430 }, { "epoch": 5.227436823104693, "grad_norm": 9.667646408081055, "learning_rate": 0.000198943325932059, "loss": 6.9375, "step": 43440 }, { "epoch": 5.2286401925391095, "grad_norm": 10.863765716552734, "learning_rate": 0.0001989427742829836, "loss": 6.7448, "step": 43450 }, { "epoch": 5.229843561973526, "grad_norm": 13.755781173706055, "learning_rate": 0.00019894222249071354, "loss": 6.8791, "step": 43460 }, { "epoch": 5.231046931407942, "grad_norm": 6.535577774047852, "learning_rate": 0.00019894167055524968, "loss": 6.8609, "step": 43470 }, { "epoch": 5.232250300842359, "grad_norm": 12.226031303405762, "learning_rate": 0.00019894111847659277, "loss": 6.8005, "step": 43480 }, { "epoch": 5.233453670276775, "grad_norm": 9.054750442504883, "learning_rate": 0.00019894056625474366, "loss": 6.7917, "step": 43490 }, { "epoch": 5.234657039711191, "grad_norm": 6.2871904373168945, "learning_rate": 0.00019894001388970312, "loss": 6.8346, "step": 43500 }, { "epoch": 5.235860409145608, "grad_norm": 7.824136734008789, "learning_rate": 0.00019893946138147196, "loss": 6.75, "step": 43510 }, { "epoch": 5.2370637785800245, "grad_norm": 10.756155967712402, "learning_rate": 0.00019893890873005096, "loss": 6.6822, "step": 43520 }, { "epoch": 5.23826714801444, "grad_norm": 17.44888687133789, "learning_rate": 0.00019893835593544094, "loss": 6.6595, "step": 43530 }, { "epoch": 5.239470517448857, "grad_norm": 13.10835075378418, "learning_rate": 0.00019893780299764266, "loss": 6.8248, "step": 43540 }, { "epoch": 5.240673886883273, "grad_norm": 18.595415115356445, "learning_rate": 0.000198937249916657, "loss": 6.6696, "step": 43550 }, { "epoch": 5.241877256317689, "grad_norm": 6.757090091705322, "learning_rate": 0.0001989366966924847, "loss": 6.7951, "step": 43560 }, { "epoch": 5.243080625752106, "grad_norm": 24.652469635009766, "learning_rate": 0.00019893614332512654, "loss": 6.6635, "step": 43570 }, { "epoch": 5.244283995186522, "grad_norm": 8.90617847442627, "learning_rate": 0.00019893558981458337, "loss": 6.6297, "step": 43580 }, { "epoch": 5.245487364620939, "grad_norm": 9.917733192443848, "learning_rate": 0.000198935036160856, "loss": 6.6716, "step": 43590 }, { "epoch": 5.246690734055355, "grad_norm": 18.178878784179688, "learning_rate": 0.00019893448236394518, "loss": 6.6969, "step": 43600 }, { "epoch": 5.247894103489771, "grad_norm": 47.34915542602539, "learning_rate": 0.00019893392842385176, "loss": 6.6517, "step": 43610 }, { "epoch": 5.249097472924188, "grad_norm": 29.6829833984375, "learning_rate": 0.00019893337434057648, "loss": 6.5945, "step": 43620 }, { "epoch": 5.250300842358604, "grad_norm": 15.29988956451416, "learning_rate": 0.0001989328201141202, "loss": 6.6756, "step": 43630 }, { "epoch": 5.25150421179302, "grad_norm": 10.747675895690918, "learning_rate": 0.00019893226574448374, "loss": 6.8092, "step": 43640 }, { "epoch": 5.252707581227437, "grad_norm": 7.114383697509766, "learning_rate": 0.00019893171123166784, "loss": 6.7703, "step": 43650 }, { "epoch": 5.2539109506618535, "grad_norm": 8.199907302856445, "learning_rate": 0.00019893115657567328, "loss": 6.6793, "step": 43660 }, { "epoch": 5.255114320096269, "grad_norm": 7.84580659866333, "learning_rate": 0.000198930601776501, "loss": 6.754, "step": 43670 }, { "epoch": 5.256317689530686, "grad_norm": 10.778278350830078, "learning_rate": 0.00019893004683415165, "loss": 6.6663, "step": 43680 }, { "epoch": 5.257521058965103, "grad_norm": 12.549016952514648, "learning_rate": 0.00019892949174862612, "loss": 6.6602, "step": 43690 }, { "epoch": 5.2587244283995185, "grad_norm": 7.637269496917725, "learning_rate": 0.00019892893651992518, "loss": 6.7702, "step": 43700 }, { "epoch": 5.259927797833935, "grad_norm": 29.3295841217041, "learning_rate": 0.00019892838114804966, "loss": 6.7947, "step": 43710 }, { "epoch": 5.261131167268351, "grad_norm": 53.47911834716797, "learning_rate": 0.00019892782563300034, "loss": 6.887, "step": 43720 }, { "epoch": 5.262334536702768, "grad_norm": 16.587194442749023, "learning_rate": 0.00019892726997477805, "loss": 7.2881, "step": 43730 }, { "epoch": 5.263537906137184, "grad_norm": 18.45558738708496, "learning_rate": 0.00019892671417338353, "loss": 7.2121, "step": 43740 }, { "epoch": 5.2647412755716, "grad_norm": 86.3219223022461, "learning_rate": 0.00019892615822881767, "loss": 7.0734, "step": 43750 }, { "epoch": 5.265944645006017, "grad_norm": 132.38186645507812, "learning_rate": 0.00019892560214108122, "loss": 7.3361, "step": 43760 }, { "epoch": 5.2671480144404335, "grad_norm": 24.015554428100586, "learning_rate": 0.00019892504591017502, "loss": 6.9493, "step": 43770 }, { "epoch": 5.268351383874849, "grad_norm": 44.07176208496094, "learning_rate": 0.00019892448953609982, "loss": 7.0007, "step": 43780 }, { "epoch": 5.269554753309266, "grad_norm": 69.94461822509766, "learning_rate": 0.00019892393301885648, "loss": 7.1794, "step": 43790 }, { "epoch": 5.270758122743683, "grad_norm": 158.1248321533203, "learning_rate": 0.0001989233763584458, "loss": 7.2272, "step": 43800 }, { "epoch": 5.271961492178098, "grad_norm": 26.2042179107666, "learning_rate": 0.00019892281955486853, "loss": 7.123, "step": 43810 }, { "epoch": 5.273164861612515, "grad_norm": 9.133247375488281, "learning_rate": 0.00019892226260812557, "loss": 7.2685, "step": 43820 }, { "epoch": 5.274368231046932, "grad_norm": 21.261714935302734, "learning_rate": 0.00019892170551821762, "loss": 7.2168, "step": 43830 }, { "epoch": 5.2755716004813475, "grad_norm": 135.9453887939453, "learning_rate": 0.00019892114828514557, "loss": 7.1195, "step": 43840 }, { "epoch": 5.276774969915764, "grad_norm": 27.17777442932129, "learning_rate": 0.0001989205909089102, "loss": 7.2869, "step": 43850 }, { "epoch": 5.277978339350181, "grad_norm": 72.9548110961914, "learning_rate": 0.0001989200333895123, "loss": 6.9865, "step": 43860 }, { "epoch": 5.279181708784597, "grad_norm": 47.44607162475586, "learning_rate": 0.00019891947572695266, "loss": 7.1993, "step": 43870 }, { "epoch": 5.280385078219013, "grad_norm": 30.02273941040039, "learning_rate": 0.00019891891792123216, "loss": 7.2468, "step": 43880 }, { "epoch": 5.28158844765343, "grad_norm": 85.73828125, "learning_rate": 0.00019891835997235156, "loss": 7.0706, "step": 43890 }, { "epoch": 5.282791817087846, "grad_norm": 133.78439331054688, "learning_rate": 0.00019891780188031164, "loss": 7.1245, "step": 43900 }, { "epoch": 5.2839951865222625, "grad_norm": 31.00410270690918, "learning_rate": 0.00019891724364511326, "loss": 7.2328, "step": 43910 }, { "epoch": 5.285198555956678, "grad_norm": 26.69734764099121, "learning_rate": 0.00019891668526675723, "loss": 7.0371, "step": 43920 }, { "epoch": 5.286401925391095, "grad_norm": 23.31447410583496, "learning_rate": 0.0001989161267452443, "loss": 7.2138, "step": 43930 }, { "epoch": 5.287605294825512, "grad_norm": 42.201351165771484, "learning_rate": 0.0001989155680805753, "loss": 7.1619, "step": 43940 }, { "epoch": 5.2888086642599275, "grad_norm": 23.718971252441406, "learning_rate": 0.0001989150092727511, "loss": 7.0938, "step": 43950 }, { "epoch": 5.290012033694344, "grad_norm": 43.38616943359375, "learning_rate": 0.00019891445032177243, "loss": 7.0059, "step": 43960 }, { "epoch": 5.291215403128761, "grad_norm": 37.879234313964844, "learning_rate": 0.00019891389122764013, "loss": 6.9589, "step": 43970 }, { "epoch": 5.292418772563177, "grad_norm": 16.111597061157227, "learning_rate": 0.00019891333199035502, "loss": 6.936, "step": 43980 }, { "epoch": 5.293622141997593, "grad_norm": 63.2092170715332, "learning_rate": 0.0001989127726099179, "loss": 6.9534, "step": 43990 }, { "epoch": 5.29482551143201, "grad_norm": 15.879876136779785, "learning_rate": 0.00019891221308632954, "loss": 6.9656, "step": 44000 }, { "epoch": 5.296028880866426, "grad_norm": 28.71221160888672, "learning_rate": 0.0001989116534195908, "loss": 7.0509, "step": 44010 }, { "epoch": 5.297232250300842, "grad_norm": 108.84678649902344, "learning_rate": 0.00019891109360970253, "loss": 7.1132, "step": 44020 }, { "epoch": 5.298435619735259, "grad_norm": 101.40082550048828, "learning_rate": 0.00019891053365666547, "loss": 7.069, "step": 44030 }, { "epoch": 5.299638989169675, "grad_norm": 140.65460205078125, "learning_rate": 0.0001989099735604804, "loss": 7.01, "step": 44040 }, { "epoch": 5.300842358604092, "grad_norm": 76.26039123535156, "learning_rate": 0.0001989094133211482, "loss": 6.9123, "step": 44050 }, { "epoch": 5.302045728038507, "grad_norm": 92.19999694824219, "learning_rate": 0.00019890885293866964, "loss": 6.8988, "step": 44060 }, { "epoch": 5.303249097472924, "grad_norm": 59.36953353881836, "learning_rate": 0.0001989082924130456, "loss": 7.0809, "step": 44070 }, { "epoch": 5.304452466907341, "grad_norm": 205.3117218017578, "learning_rate": 0.0001989077317442768, "loss": 7.0318, "step": 44080 }, { "epoch": 5.3056558363417565, "grad_norm": 60.94562530517578, "learning_rate": 0.00019890717093236414, "loss": 6.9655, "step": 44090 }, { "epoch": 5.306859205776173, "grad_norm": 82.89745330810547, "learning_rate": 0.00019890660997730835, "loss": 7.3218, "step": 44100 }, { "epoch": 5.30806257521059, "grad_norm": 111.86553192138672, "learning_rate": 0.00019890604887911026, "loss": 7.1645, "step": 44110 }, { "epoch": 5.309265944645006, "grad_norm": 97.93233489990234, "learning_rate": 0.00019890548763777072, "loss": 7.0804, "step": 44120 }, { "epoch": 5.310469314079422, "grad_norm": 27.88705825805664, "learning_rate": 0.0001989049262532905, "loss": 6.9342, "step": 44130 }, { "epoch": 5.311672683513839, "grad_norm": 21.621044158935547, "learning_rate": 0.00019890436472567047, "loss": 7.0262, "step": 44140 }, { "epoch": 5.312876052948255, "grad_norm": 32.33016586303711, "learning_rate": 0.0001989038030549114, "loss": 6.9299, "step": 44150 }, { "epoch": 5.3140794223826715, "grad_norm": 70.36555480957031, "learning_rate": 0.00019890324124101407, "loss": 7.001, "step": 44160 }, { "epoch": 5.315282791817088, "grad_norm": 42.8079719543457, "learning_rate": 0.00019890267928397934, "loss": 6.8388, "step": 44170 }, { "epoch": 5.316486161251504, "grad_norm": 33.48402404785156, "learning_rate": 0.00019890211718380805, "loss": 6.9514, "step": 44180 }, { "epoch": 5.317689530685921, "grad_norm": 14.187588691711426, "learning_rate": 0.00019890155494050097, "loss": 6.885, "step": 44190 }, { "epoch": 5.318892900120337, "grad_norm": 13.851027488708496, "learning_rate": 0.0001989009925540589, "loss": 7.1243, "step": 44200 }, { "epoch": 5.320096269554753, "grad_norm": 5.626720905303955, "learning_rate": 0.0001989004300244827, "loss": 7.2875, "step": 44210 }, { "epoch": 5.32129963898917, "grad_norm": 15.217040061950684, "learning_rate": 0.00019889986735177313, "loss": 7.4021, "step": 44220 }, { "epoch": 5.3225030084235865, "grad_norm": 33.85987091064453, "learning_rate": 0.00019889930453593105, "loss": 7.3542, "step": 44230 }, { "epoch": 5.323706377858002, "grad_norm": 56.41918182373047, "learning_rate": 0.00019889874157695724, "loss": 7.4216, "step": 44240 }, { "epoch": 5.324909747292419, "grad_norm": 37.51495361328125, "learning_rate": 0.00019889817847485258, "loss": 7.5273, "step": 44250 }, { "epoch": 5.326113116726835, "grad_norm": 56.5140266418457, "learning_rate": 0.0001988976152296178, "loss": 7.4221, "step": 44260 }, { "epoch": 5.327316486161251, "grad_norm": 90.66345977783203, "learning_rate": 0.00019889705184125375, "loss": 7.3977, "step": 44270 }, { "epoch": 5.328519855595668, "grad_norm": 12.699202537536621, "learning_rate": 0.00019889648830976125, "loss": 7.3602, "step": 44280 }, { "epoch": 5.329723225030084, "grad_norm": 7.04261589050293, "learning_rate": 0.00019889592463514117, "loss": 7.3357, "step": 44290 }, { "epoch": 5.330926594464501, "grad_norm": 26.140731811523438, "learning_rate": 0.0001988953608173942, "loss": 7.358, "step": 44300 }, { "epoch": 5.332129963898917, "grad_norm": 16.69830322265625, "learning_rate": 0.00019889479685652125, "loss": 7.4231, "step": 44310 }, { "epoch": 5.333333333333333, "grad_norm": 13.26341438293457, "learning_rate": 0.00019889423275252314, "loss": 7.1647, "step": 44320 }, { "epoch": 5.33453670276775, "grad_norm": 78.64390563964844, "learning_rate": 0.00019889366850540062, "loss": 7.2275, "step": 44330 }, { "epoch": 5.335740072202166, "grad_norm": 71.00178527832031, "learning_rate": 0.00019889310411515458, "loss": 7.1559, "step": 44340 }, { "epoch": 5.336943441636582, "grad_norm": 14.184823989868164, "learning_rate": 0.00019889253958178577, "loss": 7.0014, "step": 44350 }, { "epoch": 5.338146811070999, "grad_norm": 12.006002426147461, "learning_rate": 0.0001988919749052951, "loss": 7.0046, "step": 44360 }, { "epoch": 5.3393501805054155, "grad_norm": 18.308382034301758, "learning_rate": 0.00019889141008568327, "loss": 6.9466, "step": 44370 }, { "epoch": 5.340553549939831, "grad_norm": 11.394606590270996, "learning_rate": 0.00019889084512295116, "loss": 6.8929, "step": 44380 }, { "epoch": 5.341756919374248, "grad_norm": 25.32445526123047, "learning_rate": 0.00019889028001709958, "loss": 6.8786, "step": 44390 }, { "epoch": 5.342960288808664, "grad_norm": 35.181915283203125, "learning_rate": 0.00019888971476812937, "loss": 7.1647, "step": 44400 }, { "epoch": 5.3441636582430805, "grad_norm": 30.413166046142578, "learning_rate": 0.0001988891493760413, "loss": 7.2058, "step": 44410 }, { "epoch": 5.345367027677497, "grad_norm": 49.28425979614258, "learning_rate": 0.00019888858384083623, "loss": 7.1434, "step": 44420 }, { "epoch": 5.346570397111913, "grad_norm": 13.06767749786377, "learning_rate": 0.00019888801816251496, "loss": 7.0828, "step": 44430 }, { "epoch": 5.34777376654633, "grad_norm": 17.158748626708984, "learning_rate": 0.00019888745234107833, "loss": 7.2581, "step": 44440 }, { "epoch": 5.348977135980746, "grad_norm": 18.183988571166992, "learning_rate": 0.00019888688637652711, "loss": 7.2694, "step": 44450 }, { "epoch": 5.350180505415162, "grad_norm": 36.71931838989258, "learning_rate": 0.0001988863202688622, "loss": 7.136, "step": 44460 }, { "epoch": 5.351383874849579, "grad_norm": 8.497072219848633, "learning_rate": 0.00019888575401808432, "loss": 7.0786, "step": 44470 }, { "epoch": 5.3525872442839955, "grad_norm": 11.919191360473633, "learning_rate": 0.00019888518762419436, "loss": 6.9378, "step": 44480 }, { "epoch": 5.353790613718411, "grad_norm": 9.162808418273926, "learning_rate": 0.00019888462108719313, "loss": 7.0053, "step": 44490 }, { "epoch": 5.354993983152828, "grad_norm": 15.718364715576172, "learning_rate": 0.0001988840544070814, "loss": 6.8903, "step": 44500 }, { "epoch": 5.356197352587245, "grad_norm": 14.616697311401367, "learning_rate": 0.00019888348758386007, "loss": 6.9972, "step": 44510 }, { "epoch": 5.35740072202166, "grad_norm": 15.98686408996582, "learning_rate": 0.0001988829206175299, "loss": 6.9958, "step": 44520 }, { "epoch": 5.358604091456077, "grad_norm": 31.18436622619629, "learning_rate": 0.00019888235350809177, "loss": 6.8708, "step": 44530 }, { "epoch": 5.359807460890494, "grad_norm": 5.7120361328125, "learning_rate": 0.00019888178625554644, "loss": 6.8127, "step": 44540 }, { "epoch": 5.3610108303249095, "grad_norm": 11.021726608276367, "learning_rate": 0.00019888121885989473, "loss": 6.8173, "step": 44550 }, { "epoch": 5.362214199759326, "grad_norm": 20.15838623046875, "learning_rate": 0.0001988806513211375, "loss": 6.737, "step": 44560 }, { "epoch": 5.363417569193743, "grad_norm": 9.004701614379883, "learning_rate": 0.00019888008363927554, "loss": 6.9287, "step": 44570 }, { "epoch": 5.364620938628159, "grad_norm": 13.968864440917969, "learning_rate": 0.0001988795158143097, "loss": 6.815, "step": 44580 }, { "epoch": 5.365824308062575, "grad_norm": 4.209850788116455, "learning_rate": 0.0001988789478462408, "loss": 6.713, "step": 44590 }, { "epoch": 5.367027677496991, "grad_norm": 8.388750076293945, "learning_rate": 0.00019887837973506963, "loss": 6.7283, "step": 44600 }, { "epoch": 5.368231046931408, "grad_norm": 10.483491897583008, "learning_rate": 0.00019887781148079703, "loss": 6.7655, "step": 44610 }, { "epoch": 5.3694344163658245, "grad_norm": 7.144164562225342, "learning_rate": 0.00019887724308342383, "loss": 6.7459, "step": 44620 }, { "epoch": 5.37063778580024, "grad_norm": 12.199914932250977, "learning_rate": 0.00019887667454295086, "loss": 6.6878, "step": 44630 }, { "epoch": 5.371841155234657, "grad_norm": 5.8632426261901855, "learning_rate": 0.00019887610585937892, "loss": 6.617, "step": 44640 }, { "epoch": 5.373044524669074, "grad_norm": 12.842899322509766, "learning_rate": 0.00019887553703270885, "loss": 6.7336, "step": 44650 }, { "epoch": 5.3742478941034895, "grad_norm": 9.459039688110352, "learning_rate": 0.00019887496806294145, "loss": 6.7469, "step": 44660 }, { "epoch": 5.375451263537906, "grad_norm": 26.74758529663086, "learning_rate": 0.00019887439895007754, "loss": 6.7122, "step": 44670 }, { "epoch": 5.376654632972323, "grad_norm": 7.502065181732178, "learning_rate": 0.00019887382969411803, "loss": 6.6749, "step": 44680 }, { "epoch": 5.377858002406739, "grad_norm": 4.629199504852295, "learning_rate": 0.00019887326029506363, "loss": 6.7005, "step": 44690 }, { "epoch": 5.379061371841155, "grad_norm": 5.4511284828186035, "learning_rate": 0.0001988726907529152, "loss": 6.6812, "step": 44700 }, { "epoch": 5.380264741275572, "grad_norm": 6.781862735748291, "learning_rate": 0.0001988721210676736, "loss": 6.6241, "step": 44710 }, { "epoch": 5.381468110709988, "grad_norm": 15.501797676086426, "learning_rate": 0.00019887155123933964, "loss": 6.594, "step": 44720 }, { "epoch": 5.382671480144404, "grad_norm": 11.129393577575684, "learning_rate": 0.00019887098126791412, "loss": 6.6791, "step": 44730 }, { "epoch": 5.38387484957882, "grad_norm": 15.134982109069824, "learning_rate": 0.0001988704111533979, "loss": 6.7703, "step": 44740 }, { "epoch": 5.385078219013237, "grad_norm": 23.988197326660156, "learning_rate": 0.00019886984089579178, "loss": 6.6215, "step": 44750 }, { "epoch": 5.386281588447654, "grad_norm": 5.169701099395752, "learning_rate": 0.00019886927049509657, "loss": 6.6566, "step": 44760 }, { "epoch": 5.387484957882069, "grad_norm": 75.0279541015625, "learning_rate": 0.0001988686999513131, "loss": 6.6738, "step": 44770 }, { "epoch": 5.388688327316486, "grad_norm": 9.8955717086792, "learning_rate": 0.00019886812926444222, "loss": 6.5693, "step": 44780 }, { "epoch": 5.389891696750903, "grad_norm": 9.198053359985352, "learning_rate": 0.0001988675584344848, "loss": 6.6713, "step": 44790 }, { "epoch": 5.3910950661853185, "grad_norm": 67.88687896728516, "learning_rate": 0.00019886698746144154, "loss": 6.6615, "step": 44800 }, { "epoch": 5.392298435619735, "grad_norm": 85.84774780273438, "learning_rate": 0.00019886641634531339, "loss": 6.8457, "step": 44810 }, { "epoch": 5.393501805054152, "grad_norm": 8.06728744506836, "learning_rate": 0.0001988658450861011, "loss": 6.7166, "step": 44820 }, { "epoch": 5.394705174488568, "grad_norm": 10.970793724060059, "learning_rate": 0.00019886527368380555, "loss": 6.7579, "step": 44830 }, { "epoch": 5.395908543922984, "grad_norm": 8.422438621520996, "learning_rate": 0.00019886470213842752, "loss": 6.5816, "step": 44840 }, { "epoch": 5.397111913357401, "grad_norm": 6.012128829956055, "learning_rate": 0.00019886413044996784, "loss": 6.6582, "step": 44850 }, { "epoch": 5.398315282791817, "grad_norm": 7.007898330688477, "learning_rate": 0.0001988635586184274, "loss": 6.6602, "step": 44860 }, { "epoch": 5.3995186522262335, "grad_norm": 19.37340545654297, "learning_rate": 0.00019886298664380695, "loss": 6.7178, "step": 44870 }, { "epoch": 5.40072202166065, "grad_norm": 12.28745174407959, "learning_rate": 0.00019886241452610735, "loss": 6.6701, "step": 44880 }, { "epoch": 5.401925391095066, "grad_norm": 23.0762996673584, "learning_rate": 0.00019886184226532941, "loss": 6.5621, "step": 44890 }, { "epoch": 5.403128760529483, "grad_norm": 89.00927734375, "learning_rate": 0.000198861269861474, "loss": 6.7886, "step": 44900 }, { "epoch": 5.404332129963899, "grad_norm": 42.586124420166016, "learning_rate": 0.00019886069731454194, "loss": 6.7432, "step": 44910 }, { "epoch": 5.405535499398315, "grad_norm": 10.239490509033203, "learning_rate": 0.00019886012462453403, "loss": 7.2723, "step": 44920 }, { "epoch": 5.406738868832732, "grad_norm": 9.26880168914795, "learning_rate": 0.00019885955179145111, "loss": 7.4963, "step": 44930 }, { "epoch": 5.4079422382671485, "grad_norm": 19.099369049072266, "learning_rate": 0.000198858978815294, "loss": 7.0627, "step": 44940 }, { "epoch": 5.409145607701564, "grad_norm": 15.05772876739502, "learning_rate": 0.00019885840569606355, "loss": 6.9925, "step": 44950 }, { "epoch": 5.410348977135981, "grad_norm": 13.30691909790039, "learning_rate": 0.0001988578324337606, "loss": 6.7894, "step": 44960 }, { "epoch": 5.411552346570397, "grad_norm": 34.78816604614258, "learning_rate": 0.0001988572590283859, "loss": 6.6909, "step": 44970 }, { "epoch": 5.412755716004813, "grad_norm": 14.33220100402832, "learning_rate": 0.0001988566854799404, "loss": 6.8136, "step": 44980 }, { "epoch": 5.41395908543923, "grad_norm": 11.890724182128906, "learning_rate": 0.00019885611178842484, "loss": 6.785, "step": 44990 }, { "epoch": 5.415162454873646, "grad_norm": 7.02847146987915, "learning_rate": 0.00019885553795384012, "loss": 6.6332, "step": 45000 }, { "epoch": 5.416365824308063, "grad_norm": 7.15176248550415, "learning_rate": 0.00019885496397618698, "loss": 6.6381, "step": 45010 }, { "epoch": 5.417569193742479, "grad_norm": 6.993597984313965, "learning_rate": 0.00019885438985546631, "loss": 6.7677, "step": 45020 }, { "epoch": 5.418772563176895, "grad_norm": 5.475958347320557, "learning_rate": 0.00019885381559167894, "loss": 6.6059, "step": 45030 }, { "epoch": 5.419975932611312, "grad_norm": 7.7009596824646, "learning_rate": 0.0001988532411848257, "loss": 6.5585, "step": 45040 }, { "epoch": 5.421179302045728, "grad_norm": 13.65756893157959, "learning_rate": 0.0001988526666349074, "loss": 6.642, "step": 45050 }, { "epoch": 5.422382671480144, "grad_norm": 43.964698791503906, "learning_rate": 0.0001988520919419249, "loss": 6.5914, "step": 45060 }, { "epoch": 5.423586040914561, "grad_norm": 16.52776336669922, "learning_rate": 0.000198851517105879, "loss": 6.6282, "step": 45070 }, { "epoch": 5.4247894103489775, "grad_norm": 7.894533157348633, "learning_rate": 0.00019885094212677057, "loss": 6.5094, "step": 45080 }, { "epoch": 5.425992779783393, "grad_norm": 3.9156060218811035, "learning_rate": 0.00019885036700460038, "loss": 6.5818, "step": 45090 }, { "epoch": 5.42719614921781, "grad_norm": 13.460526466369629, "learning_rate": 0.00019884979173936934, "loss": 6.5725, "step": 45100 }, { "epoch": 5.428399518652226, "grad_norm": 7.966900825500488, "learning_rate": 0.00019884921633107823, "loss": 6.6553, "step": 45110 }, { "epoch": 5.4296028880866425, "grad_norm": 9.780647277832031, "learning_rate": 0.00019884864077972792, "loss": 6.6097, "step": 45120 }, { "epoch": 5.430806257521059, "grad_norm": 6.164175510406494, "learning_rate": 0.00019884806508531922, "loss": 6.4171, "step": 45130 }, { "epoch": 5.432009626955475, "grad_norm": 6.212482929229736, "learning_rate": 0.00019884748924785294, "loss": 6.5031, "step": 45140 }, { "epoch": 5.433212996389892, "grad_norm": 4.868154525756836, "learning_rate": 0.00019884691326732995, "loss": 6.5662, "step": 45150 }, { "epoch": 5.434416365824308, "grad_norm": 12.111865043640137, "learning_rate": 0.0001988463371437511, "loss": 6.4908, "step": 45160 }, { "epoch": 5.435619735258724, "grad_norm": 3.300665855407715, "learning_rate": 0.00019884576087711716, "loss": 6.3347, "step": 45170 }, { "epoch": 5.436823104693141, "grad_norm": 24.62493896484375, "learning_rate": 0.000198845184467429, "loss": 6.4454, "step": 45180 }, { "epoch": 5.4380264741275575, "grad_norm": 3.6107540130615234, "learning_rate": 0.00019884460791468747, "loss": 6.5296, "step": 45190 }, { "epoch": 5.439229843561973, "grad_norm": 5.2915940284729, "learning_rate": 0.00019884403121889338, "loss": 6.3876, "step": 45200 }, { "epoch": 5.44043321299639, "grad_norm": 14.972261428833008, "learning_rate": 0.00019884345438004756, "loss": 6.5571, "step": 45210 }, { "epoch": 5.441636582430807, "grad_norm": 20.201309204101562, "learning_rate": 0.0001988428773981509, "loss": 6.5529, "step": 45220 }, { "epoch": 5.442839951865222, "grad_norm": 6.401875972747803, "learning_rate": 0.00019884230027320413, "loss": 6.5248, "step": 45230 }, { "epoch": 5.444043321299639, "grad_norm": 14.680062294006348, "learning_rate": 0.0001988417230052082, "loss": 6.4735, "step": 45240 }, { "epoch": 5.445246690734056, "grad_norm": 13.246824264526367, "learning_rate": 0.0001988411455941639, "loss": 6.4265, "step": 45250 }, { "epoch": 5.4464500601684716, "grad_norm": 17.640949249267578, "learning_rate": 0.000198840568040072, "loss": 6.4548, "step": 45260 }, { "epoch": 5.447653429602888, "grad_norm": 13.611530303955078, "learning_rate": 0.00019883999034293344, "loss": 6.4113, "step": 45270 }, { "epoch": 5.448856799037305, "grad_norm": 4.311097621917725, "learning_rate": 0.00019883941250274903, "loss": 6.5528, "step": 45280 }, { "epoch": 5.450060168471721, "grad_norm": 5.291025161743164, "learning_rate": 0.00019883883451951955, "loss": 6.4416, "step": 45290 }, { "epoch": 5.451263537906137, "grad_norm": 5.119104385375977, "learning_rate": 0.00019883825639324586, "loss": 6.4439, "step": 45300 }, { "epoch": 5.452466907340553, "grad_norm": 4.42533016204834, "learning_rate": 0.00019883767812392886, "loss": 6.497, "step": 45310 }, { "epoch": 5.45367027677497, "grad_norm": 5.035144805908203, "learning_rate": 0.0001988370997115693, "loss": 6.386, "step": 45320 }, { "epoch": 5.4548736462093865, "grad_norm": 7.269014835357666, "learning_rate": 0.00019883652115616806, "loss": 6.4401, "step": 45330 }, { "epoch": 5.456077015643802, "grad_norm": 7.293306350708008, "learning_rate": 0.000198835942457726, "loss": 6.5438, "step": 45340 }, { "epoch": 5.457280385078219, "grad_norm": 11.02853012084961, "learning_rate": 0.0001988353636162439, "loss": 6.4328, "step": 45350 }, { "epoch": 5.458483754512636, "grad_norm": 10.555423736572266, "learning_rate": 0.00019883478463172263, "loss": 6.4596, "step": 45360 }, { "epoch": 5.4596871239470515, "grad_norm": 14.491849899291992, "learning_rate": 0.00019883420550416303, "loss": 6.423, "step": 45370 }, { "epoch": 5.460890493381468, "grad_norm": 10.816365242004395, "learning_rate": 0.00019883362623356594, "loss": 6.4074, "step": 45380 }, { "epoch": 5.462093862815885, "grad_norm": 3.7623682022094727, "learning_rate": 0.0001988330468199322, "loss": 6.5103, "step": 45390 }, { "epoch": 5.463297232250301, "grad_norm": 4.828091144561768, "learning_rate": 0.0001988324672632626, "loss": 6.4944, "step": 45400 }, { "epoch": 5.464500601684717, "grad_norm": 7.3919525146484375, "learning_rate": 0.00019883188756355806, "loss": 6.5367, "step": 45410 }, { "epoch": 5.465703971119134, "grad_norm": 12.38614559173584, "learning_rate": 0.00019883130772081937, "loss": 6.4677, "step": 45420 }, { "epoch": 5.46690734055355, "grad_norm": 22.366512298583984, "learning_rate": 0.00019883072773504737, "loss": 6.485, "step": 45430 }, { "epoch": 5.4681107099879664, "grad_norm": 5.2813920974731445, "learning_rate": 0.0001988301476062429, "loss": 6.4001, "step": 45440 }, { "epoch": 5.469314079422382, "grad_norm": 6.833184242248535, "learning_rate": 0.0001988295673344068, "loss": 6.4086, "step": 45450 }, { "epoch": 5.470517448856799, "grad_norm": 10.400935173034668, "learning_rate": 0.00019882898691953992, "loss": 6.4306, "step": 45460 }, { "epoch": 5.471720818291216, "grad_norm": 9.844170570373535, "learning_rate": 0.00019882840636164313, "loss": 6.4404, "step": 45470 }, { "epoch": 5.472924187725631, "grad_norm": 4.941709041595459, "learning_rate": 0.0001988278256607172, "loss": 6.4654, "step": 45480 }, { "epoch": 5.474127557160048, "grad_norm": 10.349566459655762, "learning_rate": 0.00019882724481676303, "loss": 6.4547, "step": 45490 }, { "epoch": 5.475330926594465, "grad_norm": 31.910966873168945, "learning_rate": 0.0001988266638297814, "loss": 6.5674, "step": 45500 }, { "epoch": 5.4765342960288805, "grad_norm": 17.867481231689453, "learning_rate": 0.00019882608269977323, "loss": 6.9163, "step": 45510 }, { "epoch": 5.477737665463297, "grad_norm": 6.678061008453369, "learning_rate": 0.0001988255014267393, "loss": 6.7346, "step": 45520 }, { "epoch": 5.478941034897714, "grad_norm": 23.98747444152832, "learning_rate": 0.00019882492001068046, "loss": 6.627, "step": 45530 }, { "epoch": 5.48014440433213, "grad_norm": 7.337192535400391, "learning_rate": 0.0001988243384515976, "loss": 6.5027, "step": 45540 }, { "epoch": 5.481347773766546, "grad_norm": 8.542789459228516, "learning_rate": 0.00019882375674949148, "loss": 6.427, "step": 45550 }, { "epoch": 5.482551143200963, "grad_norm": 7.10697603225708, "learning_rate": 0.000198823174904363, "loss": 6.4406, "step": 45560 }, { "epoch": 5.483754512635379, "grad_norm": 6.624485492706299, "learning_rate": 0.00019882259291621298, "loss": 6.3173, "step": 45570 }, { "epoch": 5.4849578820697955, "grad_norm": 74.85208892822266, "learning_rate": 0.0001988220107850423, "loss": 6.3787, "step": 45580 }, { "epoch": 5.486161251504212, "grad_norm": 15.59478759765625, "learning_rate": 0.00019882142851085176, "loss": 6.558, "step": 45590 }, { "epoch": 5.487364620938628, "grad_norm": 20.38960838317871, "learning_rate": 0.00019882084609364217, "loss": 6.5889, "step": 45600 }, { "epoch": 5.488567990373045, "grad_norm": 8.961256980895996, "learning_rate": 0.00019882026353341446, "loss": 6.5569, "step": 45610 }, { "epoch": 5.489771359807461, "grad_norm": 9.782734870910645, "learning_rate": 0.00019881968083016943, "loss": 6.48, "step": 45620 }, { "epoch": 5.490974729241877, "grad_norm": 16.865764617919922, "learning_rate": 0.00019881909798390792, "loss": 6.3723, "step": 45630 }, { "epoch": 5.492178098676294, "grad_norm": 13.1566162109375, "learning_rate": 0.00019881851499463075, "loss": 6.5305, "step": 45640 }, { "epoch": 5.49338146811071, "grad_norm": 6.949885845184326, "learning_rate": 0.00019881793186233884, "loss": 6.4292, "step": 45650 }, { "epoch": 5.494584837545126, "grad_norm": 6.953880310058594, "learning_rate": 0.00019881734858703297, "loss": 6.3776, "step": 45660 }, { "epoch": 5.495788206979543, "grad_norm": 13.188807487487793, "learning_rate": 0.00019881676516871397, "loss": 6.4843, "step": 45670 }, { "epoch": 5.496991576413959, "grad_norm": 8.368364334106445, "learning_rate": 0.00019881618160738277, "loss": 6.4333, "step": 45680 }, { "epoch": 5.498194945848375, "grad_norm": 8.661624908447266, "learning_rate": 0.0001988155979030401, "loss": 6.4573, "step": 45690 }, { "epoch": 5.499398315282792, "grad_norm": 7.819737434387207, "learning_rate": 0.00019881501405568687, "loss": 6.4315, "step": 45700 }, { "epoch": 5.500601684717208, "grad_norm": 17.464813232421875, "learning_rate": 0.00019881443006532394, "loss": 6.4108, "step": 45710 }, { "epoch": 5.501805054151625, "grad_norm": 5.219578742980957, "learning_rate": 0.00019881384593195214, "loss": 6.2546, "step": 45720 }, { "epoch": 5.503008423586041, "grad_norm": 17.478546142578125, "learning_rate": 0.00019881326165557229, "loss": 6.3163, "step": 45730 }, { "epoch": 5.504211793020457, "grad_norm": 8.404958724975586, "learning_rate": 0.00019881267723618527, "loss": 6.3914, "step": 45740 }, { "epoch": 5.505415162454874, "grad_norm": 13.10112476348877, "learning_rate": 0.00019881209267379188, "loss": 6.2438, "step": 45750 }, { "epoch": 5.5066185318892895, "grad_norm": 7.838147163391113, "learning_rate": 0.00019881150796839303, "loss": 6.3691, "step": 45760 }, { "epoch": 5.507821901323706, "grad_norm": 18.114526748657227, "learning_rate": 0.0001988109231199895, "loss": 6.3613, "step": 45770 }, { "epoch": 5.509025270758123, "grad_norm": 34.72455978393555, "learning_rate": 0.00019881033812858218, "loss": 6.4407, "step": 45780 }, { "epoch": 5.510228640192539, "grad_norm": 15.340691566467285, "learning_rate": 0.00019880975299417194, "loss": 6.4729, "step": 45790 }, { "epoch": 5.511432009626955, "grad_norm": 10.677668571472168, "learning_rate": 0.00019880916771675955, "loss": 6.5155, "step": 45800 }, { "epoch": 5.512635379061372, "grad_norm": 12.764081954956055, "learning_rate": 0.0001988085822963459, "loss": 6.4162, "step": 45810 }, { "epoch": 5.513838748495788, "grad_norm": 7.726113319396973, "learning_rate": 0.00019880799673293185, "loss": 6.394, "step": 45820 }, { "epoch": 5.5150421179302045, "grad_norm": 9.001998901367188, "learning_rate": 0.00019880741102651825, "loss": 6.2998, "step": 45830 }, { "epoch": 5.516245487364621, "grad_norm": 7.153882026672363, "learning_rate": 0.0001988068251771059, "loss": 6.35, "step": 45840 }, { "epoch": 5.517448856799037, "grad_norm": 21.886831283569336, "learning_rate": 0.0001988062391846957, "loss": 6.3185, "step": 45850 }, { "epoch": 5.518652226233454, "grad_norm": 15.019268989562988, "learning_rate": 0.00019880565304928845, "loss": 6.8131, "step": 45860 }, { "epoch": 5.51985559566787, "grad_norm": 8.751230239868164, "learning_rate": 0.00019880506677088508, "loss": 6.5887, "step": 45870 }, { "epoch": 5.521058965102286, "grad_norm": 11.072755813598633, "learning_rate": 0.00019880448034948636, "loss": 6.432, "step": 45880 }, { "epoch": 5.522262334536703, "grad_norm": 28.181812286376953, "learning_rate": 0.0001988038937850931, "loss": 6.5608, "step": 45890 }, { "epoch": 5.5234657039711195, "grad_norm": 14.7490816116333, "learning_rate": 0.0001988033070777063, "loss": 6.3919, "step": 45900 }, { "epoch": 5.524669073405535, "grad_norm": 6.526397228240967, "learning_rate": 0.00019880272022732668, "loss": 6.4002, "step": 45910 }, { "epoch": 5.525872442839952, "grad_norm": 9.350018501281738, "learning_rate": 0.00019880213323395513, "loss": 6.4645, "step": 45920 }, { "epoch": 5.527075812274369, "grad_norm": 6.043956756591797, "learning_rate": 0.00019880154609759252, "loss": 6.3516, "step": 45930 }, { "epoch": 5.528279181708784, "grad_norm": 7.510612487792969, "learning_rate": 0.00019880095881823966, "loss": 6.4044, "step": 45940 }, { "epoch": 5.529482551143201, "grad_norm": 11.649624824523926, "learning_rate": 0.00019880037139589741, "loss": 6.3728, "step": 45950 }, { "epoch": 5.530685920577618, "grad_norm": 4.744845390319824, "learning_rate": 0.0001987997838305667, "loss": 6.2978, "step": 45960 }, { "epoch": 5.5318892900120336, "grad_norm": 13.25458812713623, "learning_rate": 0.00019879919612224823, "loss": 6.3112, "step": 45970 }, { "epoch": 5.53309265944645, "grad_norm": 3.9331250190734863, "learning_rate": 0.00019879860827094295, "loss": 6.271, "step": 45980 }, { "epoch": 5.534296028880867, "grad_norm": 6.454897403717041, "learning_rate": 0.0001987980202766517, "loss": 6.2212, "step": 45990 }, { "epoch": 5.535499398315283, "grad_norm": 7.229743957519531, "learning_rate": 0.00019879743213937532, "loss": 6.3656, "step": 46000 }, { "epoch": 5.536702767749699, "grad_norm": 11.61962890625, "learning_rate": 0.00019879684385911466, "loss": 6.3485, "step": 46010 }, { "epoch": 5.537906137184115, "grad_norm": 6.350016117095947, "learning_rate": 0.00019879625543587056, "loss": 6.2877, "step": 46020 }, { "epoch": 5.539109506618532, "grad_norm": 4.834826946258545, "learning_rate": 0.00019879566686964393, "loss": 6.093, "step": 46030 }, { "epoch": 5.5403128760529485, "grad_norm": 9.270788192749023, "learning_rate": 0.00019879507816043554, "loss": 6.2919, "step": 46040 }, { "epoch": 5.541516245487364, "grad_norm": 11.917572975158691, "learning_rate": 0.00019879448930824629, "loss": 6.259, "step": 46050 }, { "epoch": 5.542719614921781, "grad_norm": 5.778326511383057, "learning_rate": 0.00019879390031307703, "loss": 6.3041, "step": 46060 }, { "epoch": 5.543922984356198, "grad_norm": 11.949620246887207, "learning_rate": 0.00019879331117492858, "loss": 6.3024, "step": 46070 }, { "epoch": 5.5451263537906135, "grad_norm": 8.083394050598145, "learning_rate": 0.00019879272189380186, "loss": 6.3281, "step": 46080 }, { "epoch": 5.54632972322503, "grad_norm": 6.177634239196777, "learning_rate": 0.00019879213246969766, "loss": 6.4272, "step": 46090 }, { "epoch": 5.547533092659447, "grad_norm": 6.1454057693481445, "learning_rate": 0.00019879154290261685, "loss": 6.2131, "step": 46100 }, { "epoch": 5.548736462093863, "grad_norm": 6.939477920532227, "learning_rate": 0.0001987909531925603, "loss": 6.2413, "step": 46110 }, { "epoch": 5.549939831528279, "grad_norm": 4.128360271453857, "learning_rate": 0.0001987903633395288, "loss": 6.3218, "step": 46120 }, { "epoch": 5.551143200962695, "grad_norm": 6.849950790405273, "learning_rate": 0.00019878977334352328, "loss": 6.3315, "step": 46130 }, { "epoch": 5.552346570397112, "grad_norm": 6.320056438446045, "learning_rate": 0.0001987891832045446, "loss": 6.2866, "step": 46140 }, { "epoch": 5.5535499398315284, "grad_norm": 7.691494464874268, "learning_rate": 0.00019878859292259357, "loss": 6.2612, "step": 46150 }, { "epoch": 5.554753309265944, "grad_norm": 6.219451904296875, "learning_rate": 0.00019878800249767105, "loss": 6.3023, "step": 46160 }, { "epoch": 5.555956678700361, "grad_norm": 4.319249153137207, "learning_rate": 0.0001987874119297779, "loss": 6.2107, "step": 46170 }, { "epoch": 5.557160048134778, "grad_norm": 7.467607021331787, "learning_rate": 0.00019878682121891498, "loss": 6.3902, "step": 46180 }, { "epoch": 5.558363417569193, "grad_norm": 39.97356033325195, "learning_rate": 0.0001987862303650831, "loss": 6.2473, "step": 46190 }, { "epoch": 5.55956678700361, "grad_norm": 11.547635078430176, "learning_rate": 0.0001987856393682832, "loss": 6.3006, "step": 46200 }, { "epoch": 5.560770156438027, "grad_norm": 5.582194805145264, "learning_rate": 0.00019878504822851606, "loss": 6.2777, "step": 46210 }, { "epoch": 5.5619735258724425, "grad_norm": 5.818282127380371, "learning_rate": 0.0001987844569457826, "loss": 6.1256, "step": 46220 }, { "epoch": 5.563176895306859, "grad_norm": 10.998391151428223, "learning_rate": 0.00019878386552008365, "loss": 6.2739, "step": 46230 }, { "epoch": 5.564380264741276, "grad_norm": 6.707700252532959, "learning_rate": 0.00019878327395142, "loss": 6.2491, "step": 46240 }, { "epoch": 5.565583634175692, "grad_norm": 17.052385330200195, "learning_rate": 0.0001987826822397926, "loss": 6.2402, "step": 46250 }, { "epoch": 5.566787003610108, "grad_norm": 9.396872520446777, "learning_rate": 0.00019878209038520225, "loss": 6.4235, "step": 46260 }, { "epoch": 5.567990373044525, "grad_norm": 8.486729621887207, "learning_rate": 0.00019878149838764987, "loss": 6.3466, "step": 46270 }, { "epoch": 5.569193742478941, "grad_norm": 9.036606788635254, "learning_rate": 0.00019878090624713624, "loss": 6.3939, "step": 46280 }, { "epoch": 5.5703971119133575, "grad_norm": 8.542448997497559, "learning_rate": 0.00019878031396366224, "loss": 6.4378, "step": 46290 }, { "epoch": 5.571600481347774, "grad_norm": 13.90946102142334, "learning_rate": 0.00019877972153722876, "loss": 6.3125, "step": 46300 }, { "epoch": 5.57280385078219, "grad_norm": 46.43960189819336, "learning_rate": 0.00019877912896783663, "loss": 6.3317, "step": 46310 }, { "epoch": 5.574007220216607, "grad_norm": 33.592288970947266, "learning_rate": 0.00019877853625548673, "loss": 6.3732, "step": 46320 }, { "epoch": 5.575210589651023, "grad_norm": 105.87428283691406, "learning_rate": 0.0001987779434001799, "loss": 6.5342, "step": 46330 }, { "epoch": 5.576413959085439, "grad_norm": 9.815814971923828, "learning_rate": 0.00019877735040191695, "loss": 6.5159, "step": 46340 }, { "epoch": 5.577617328519856, "grad_norm": 24.510147094726562, "learning_rate": 0.00019877675726069882, "loss": 6.5053, "step": 46350 }, { "epoch": 5.578820697954272, "grad_norm": 23.83465003967285, "learning_rate": 0.00019877616397652635, "loss": 6.5839, "step": 46360 }, { "epoch": 5.580024067388688, "grad_norm": 34.723670959472656, "learning_rate": 0.00019877557054940033, "loss": 6.843, "step": 46370 }, { "epoch": 5.581227436823105, "grad_norm": 16.495534896850586, "learning_rate": 0.00019877497697932172, "loss": 6.4301, "step": 46380 }, { "epoch": 5.582430806257521, "grad_norm": 16.811086654663086, "learning_rate": 0.00019877438326629132, "loss": 6.5386, "step": 46390 }, { "epoch": 5.583634175691937, "grad_norm": 17.09915542602539, "learning_rate": 0.00019877378941031003, "loss": 6.5213, "step": 46400 }, { "epoch": 5.584837545126354, "grad_norm": 7.495748996734619, "learning_rate": 0.0001987731954113786, "loss": 6.4153, "step": 46410 }, { "epoch": 5.58604091456077, "grad_norm": 5.768733978271484, "learning_rate": 0.00019877260126949806, "loss": 6.3213, "step": 46420 }, { "epoch": 5.587244283995187, "grad_norm": 9.990574836730957, "learning_rate": 0.0001987720069846691, "loss": 6.2859, "step": 46430 }, { "epoch": 5.588447653429603, "grad_norm": 6.547935485839844, "learning_rate": 0.00019877141255689274, "loss": 6.3208, "step": 46440 }, { "epoch": 5.589651022864019, "grad_norm": 6.238420009613037, "learning_rate": 0.00019877081798616973, "loss": 6.2468, "step": 46450 }, { "epoch": 5.590854392298436, "grad_norm": 28.083812713623047, "learning_rate": 0.00019877022327250093, "loss": 6.2582, "step": 46460 }, { "epoch": 5.5920577617328515, "grad_norm": 12.323185920715332, "learning_rate": 0.00019876962841588726, "loss": 6.2916, "step": 46470 }, { "epoch": 5.593261131167268, "grad_norm": 9.366296768188477, "learning_rate": 0.00019876903341632955, "loss": 6.2955, "step": 46480 }, { "epoch": 5.594464500601685, "grad_norm": 19.47185707092285, "learning_rate": 0.00019876843827382865, "loss": 6.3823, "step": 46490 }, { "epoch": 5.595667870036101, "grad_norm": 152.86924743652344, "learning_rate": 0.00019876784298838545, "loss": 6.3114, "step": 46500 }, { "epoch": 5.596871239470517, "grad_norm": 26.83968162536621, "learning_rate": 0.00019876724756000077, "loss": 7.0908, "step": 46510 }, { "epoch": 5.598074608904934, "grad_norm": 11.151542663574219, "learning_rate": 0.00019876665198867555, "loss": 7.0573, "step": 46520 }, { "epoch": 5.59927797833935, "grad_norm": 11.737648963928223, "learning_rate": 0.00019876605627441057, "loss": 6.849, "step": 46530 }, { "epoch": 5.6004813477737665, "grad_norm": 27.343643188476562, "learning_rate": 0.0001987654604172067, "loss": 6.6941, "step": 46540 }, { "epoch": 5.601684717208183, "grad_norm": 18.024578094482422, "learning_rate": 0.00019876486441706485, "loss": 6.7262, "step": 46550 }, { "epoch": 5.602888086642599, "grad_norm": 22.21957015991211, "learning_rate": 0.00019876426827398585, "loss": 6.6684, "step": 46560 }, { "epoch": 5.604091456077016, "grad_norm": 38.372962951660156, "learning_rate": 0.00019876367198797055, "loss": 6.4944, "step": 46570 }, { "epoch": 5.605294825511432, "grad_norm": 33.13584899902344, "learning_rate": 0.00019876307555901986, "loss": 6.4411, "step": 46580 }, { "epoch": 5.606498194945848, "grad_norm": 16.798213958740234, "learning_rate": 0.0001987624789871346, "loss": 6.4011, "step": 46590 }, { "epoch": 5.607701564380265, "grad_norm": 28.71711540222168, "learning_rate": 0.00019876188227231566, "loss": 6.3832, "step": 46600 }, { "epoch": 5.6089049338146815, "grad_norm": 38.26422882080078, "learning_rate": 0.0001987612854145639, "loss": 6.6683, "step": 46610 }, { "epoch": 5.610108303249097, "grad_norm": 36.17931365966797, "learning_rate": 0.00019876068841388018, "loss": 6.7922, "step": 46620 }, { "epoch": 5.611311672683514, "grad_norm": 56.7069091796875, "learning_rate": 0.00019876009127026533, "loss": 6.6921, "step": 46630 }, { "epoch": 5.612515042117931, "grad_norm": 57.587615966796875, "learning_rate": 0.00019875949398372026, "loss": 6.6318, "step": 46640 }, { "epoch": 5.613718411552346, "grad_norm": 51.06390380859375, "learning_rate": 0.00019875889655424583, "loss": 6.7319, "step": 46650 }, { "epoch": 5.614921780986763, "grad_norm": 23.352514266967773, "learning_rate": 0.0001987582989818429, "loss": 6.5627, "step": 46660 }, { "epoch": 5.61612515042118, "grad_norm": 42.385128021240234, "learning_rate": 0.00019875770126651232, "loss": 6.6787, "step": 46670 }, { "epoch": 5.617328519855596, "grad_norm": 13.203081130981445, "learning_rate": 0.00019875710340825492, "loss": 6.5287, "step": 46680 }, { "epoch": 5.618531889290012, "grad_norm": 19.75714111328125, "learning_rate": 0.00019875650540707168, "loss": 6.5904, "step": 46690 }, { "epoch": 5.619735258724428, "grad_norm": 50.870399475097656, "learning_rate": 0.00019875590726296336, "loss": 6.7852, "step": 46700 }, { "epoch": 5.620938628158845, "grad_norm": 1110.648681640625, "learning_rate": 0.00019875530897593086, "loss": 6.8428, "step": 46710 }, { "epoch": 5.622141997593261, "grad_norm": 187.42201232910156, "learning_rate": 0.00019875471054597505, "loss": 7.1965, "step": 46720 }, { "epoch": 5.623345367027677, "grad_norm": 11.785093307495117, "learning_rate": 0.0001987541119730968, "loss": 7.4633, "step": 46730 }, { "epoch": 5.624548736462094, "grad_norm": 4.544264793395996, "learning_rate": 0.00019875351325729693, "loss": 7.4169, "step": 46740 }, { "epoch": 5.6257521058965105, "grad_norm": 11.500255584716797, "learning_rate": 0.00019875291439857637, "loss": 6.9042, "step": 46750 }, { "epoch": 5.626955475330926, "grad_norm": 22.129554748535156, "learning_rate": 0.00019875231539693598, "loss": 6.5528, "step": 46760 }, { "epoch": 5.628158844765343, "grad_norm": 13.432845115661621, "learning_rate": 0.0001987517162523766, "loss": 6.516, "step": 46770 }, { "epoch": 5.62936221419976, "grad_norm": 25.117473602294922, "learning_rate": 0.0001987511169648991, "loss": 6.6237, "step": 46780 }, { "epoch": 5.6305655836341755, "grad_norm": 15.665063858032227, "learning_rate": 0.00019875051753450435, "loss": 6.5602, "step": 46790 }, { "epoch": 5.631768953068592, "grad_norm": 7.756671905517578, "learning_rate": 0.00019874991796119321, "loss": 6.7064, "step": 46800 }, { "epoch": 5.632972322503008, "grad_norm": 16.361539840698242, "learning_rate": 0.00019874931824496658, "loss": 6.7826, "step": 46810 }, { "epoch": 5.634175691937425, "grad_norm": 22.501079559326172, "learning_rate": 0.0001987487183858253, "loss": 6.6066, "step": 46820 }, { "epoch": 5.635379061371841, "grad_norm": 87.65595245361328, "learning_rate": 0.00019874811838377024, "loss": 6.5869, "step": 46830 }, { "epoch": 5.636582430806257, "grad_norm": 113.44232177734375, "learning_rate": 0.00019874751823880228, "loss": 6.6955, "step": 46840 }, { "epoch": 5.637785800240674, "grad_norm": 74.71009063720703, "learning_rate": 0.00019874691795092225, "loss": 6.8955, "step": 46850 }, { "epoch": 5.6389891696750905, "grad_norm": 264.922119140625, "learning_rate": 0.0001987463175201311, "loss": 6.6997, "step": 46860 }, { "epoch": 5.640192539109506, "grad_norm": 64.98933410644531, "learning_rate": 0.00019874571694642961, "loss": 6.9005, "step": 46870 }, { "epoch": 5.641395908543923, "grad_norm": 457.2339782714844, "learning_rate": 0.0001987451162298187, "loss": 7.1698, "step": 46880 }, { "epoch": 5.64259927797834, "grad_norm": 226.53030395507812, "learning_rate": 0.00019874451537029925, "loss": 6.9027, "step": 46890 }, { "epoch": 5.643802647412755, "grad_norm": 60.34992218017578, "learning_rate": 0.00019874391436787209, "loss": 6.8031, "step": 46900 }, { "epoch": 5.645006016847172, "grad_norm": 39.77439498901367, "learning_rate": 0.00019874331322253808, "loss": 6.7717, "step": 46910 }, { "epoch": 5.646209386281589, "grad_norm": 878.1384887695312, "learning_rate": 0.00019874271193429815, "loss": 6.9093, "step": 46920 }, { "epoch": 5.6474127557160045, "grad_norm": 34.1122932434082, "learning_rate": 0.0001987421105031531, "loss": 6.8495, "step": 46930 }, { "epoch": 5.648616125150421, "grad_norm": 90.69535827636719, "learning_rate": 0.0001987415089291039, "loss": 6.8004, "step": 46940 }, { "epoch": 5.649819494584838, "grad_norm": 131.42843627929688, "learning_rate": 0.00019874090721215133, "loss": 7.014, "step": 46950 }, { "epoch": 5.651022864019254, "grad_norm": 52.892433166503906, "learning_rate": 0.00019874030535229628, "loss": 7.0594, "step": 46960 }, { "epoch": 5.65222623345367, "grad_norm": 400.1392517089844, "learning_rate": 0.00019873970334953963, "loss": 6.9954, "step": 46970 }, { "epoch": 5.653429602888087, "grad_norm": 106.0921401977539, "learning_rate": 0.00019873910120388224, "loss": 7.1363, "step": 46980 }, { "epoch": 5.654632972322503, "grad_norm": 36.46430206298828, "learning_rate": 0.00019873849891532502, "loss": 7.3101, "step": 46990 }, { "epoch": 5.6558363417569195, "grad_norm": 5.877980709075928, "learning_rate": 0.00019873789648386877, "loss": 7.4363, "step": 47000 }, { "epoch": 5.657039711191336, "grad_norm": 9.77474308013916, "learning_rate": 0.00019873729390951445, "loss": 7.2457, "step": 47010 }, { "epoch": 5.658243080625752, "grad_norm": 24.199386596679688, "learning_rate": 0.00019873669119226288, "loss": 7.056, "step": 47020 }, { "epoch": 5.659446450060169, "grad_norm": 15.595993041992188, "learning_rate": 0.00019873608833211493, "loss": 6.8808, "step": 47030 }, { "epoch": 5.6606498194945845, "grad_norm": 9.290364265441895, "learning_rate": 0.0001987354853290715, "loss": 6.7269, "step": 47040 }, { "epoch": 5.661853188929001, "grad_norm": 28.148937225341797, "learning_rate": 0.0001987348821831334, "loss": 6.6595, "step": 47050 }, { "epoch": 5.663056558363418, "grad_norm": 52.12007522583008, "learning_rate": 0.00019873427889430159, "loss": 6.5249, "step": 47060 }, { "epoch": 5.664259927797834, "grad_norm": 116.76371002197266, "learning_rate": 0.0001987336754625769, "loss": 6.7813, "step": 47070 }, { "epoch": 5.66546329723225, "grad_norm": 57.85456848144531, "learning_rate": 0.0001987330718879602, "loss": 6.6487, "step": 47080 }, { "epoch": 5.666666666666667, "grad_norm": 174.3936004638672, "learning_rate": 0.00019873246817045238, "loss": 6.6781, "step": 47090 }, { "epoch": 5.667870036101083, "grad_norm": 157.48199462890625, "learning_rate": 0.00019873186431005428, "loss": 6.9296, "step": 47100 }, { "epoch": 5.669073405535499, "grad_norm": 82.49263763427734, "learning_rate": 0.00019873126030676682, "loss": 6.8377, "step": 47110 }, { "epoch": 5.670276774969916, "grad_norm": 85.44804382324219, "learning_rate": 0.00019873065616059083, "loss": 6.9748, "step": 47120 }, { "epoch": 5.671480144404332, "grad_norm": 89.8354721069336, "learning_rate": 0.0001987300518715272, "loss": 6.7323, "step": 47130 }, { "epoch": 5.672683513838749, "grad_norm": 54.78339767456055, "learning_rate": 0.00019872944743957682, "loss": 6.4902, "step": 47140 }, { "epoch": 5.673886883273164, "grad_norm": 46.192203521728516, "learning_rate": 0.00019872884286474055, "loss": 6.5638, "step": 47150 }, { "epoch": 5.675090252707581, "grad_norm": 59.51628494262695, "learning_rate": 0.0001987282381470193, "loss": 6.476, "step": 47160 }, { "epoch": 5.676293622141998, "grad_norm": 252.3123321533203, "learning_rate": 0.00019872763328641387, "loss": 6.6872, "step": 47170 }, { "epoch": 5.6774969915764135, "grad_norm": 12.463716506958008, "learning_rate": 0.0001987270282829252, "loss": 7.0556, "step": 47180 }, { "epoch": 5.67870036101083, "grad_norm": 12.086864471435547, "learning_rate": 0.00019872642313655415, "loss": 6.9903, "step": 47190 }, { "epoch": 5.679903730445247, "grad_norm": 23.978946685791016, "learning_rate": 0.00019872581784730157, "loss": 6.9209, "step": 47200 }, { "epoch": 5.681107099879663, "grad_norm": 25.509599685668945, "learning_rate": 0.00019872521241516837, "loss": 6.7212, "step": 47210 }, { "epoch": 5.682310469314079, "grad_norm": 15.048419952392578, "learning_rate": 0.00019872460684015541, "loss": 6.6035, "step": 47220 }, { "epoch": 5.683513838748496, "grad_norm": 31.152725219726562, "learning_rate": 0.00019872400112226358, "loss": 6.6196, "step": 47230 }, { "epoch": 5.684717208182912, "grad_norm": 22.092208862304688, "learning_rate": 0.00019872339526149374, "loss": 6.8158, "step": 47240 }, { "epoch": 5.6859205776173285, "grad_norm": 46.9203987121582, "learning_rate": 0.00019872278925784674, "loss": 7.0549, "step": 47250 }, { "epoch": 5.687123947051745, "grad_norm": 12.98798942565918, "learning_rate": 0.00019872218311132355, "loss": 7.1165, "step": 47260 }, { "epoch": 5.688327316486161, "grad_norm": 7.375025749206543, "learning_rate": 0.00019872157682192495, "loss": 6.7941, "step": 47270 }, { "epoch": 5.689530685920578, "grad_norm": 11.47173023223877, "learning_rate": 0.00019872097038965187, "loss": 6.5747, "step": 47280 }, { "epoch": 5.690734055354994, "grad_norm": 35.984779357910156, "learning_rate": 0.00019872036381450516, "loss": 6.6348, "step": 47290 }, { "epoch": 5.69193742478941, "grad_norm": 9.054459571838379, "learning_rate": 0.00019871975709648574, "loss": 6.5689, "step": 47300 }, { "epoch": 5.693140794223827, "grad_norm": 8.606990814208984, "learning_rate": 0.00019871915023559442, "loss": 6.5106, "step": 47310 }, { "epoch": 5.6943441636582435, "grad_norm": 7.831537246704102, "learning_rate": 0.00019871854323183215, "loss": 6.5651, "step": 47320 }, { "epoch": 5.695547533092659, "grad_norm": 8.106771469116211, "learning_rate": 0.00019871793608519975, "loss": 6.6654, "step": 47330 }, { "epoch": 5.696750902527076, "grad_norm": 11.992792129516602, "learning_rate": 0.00019871732879569813, "loss": 6.5571, "step": 47340 }, { "epoch": 5.697954271961493, "grad_norm": 8.731738090515137, "learning_rate": 0.00019871672136332816, "loss": 6.496, "step": 47350 }, { "epoch": 5.699157641395908, "grad_norm": 14.152222633361816, "learning_rate": 0.00019871611378809073, "loss": 6.4799, "step": 47360 }, { "epoch": 5.700361010830325, "grad_norm": 6.566752910614014, "learning_rate": 0.0001987155060699867, "loss": 6.5309, "step": 47370 }, { "epoch": 5.701564380264742, "grad_norm": 9.424914360046387, "learning_rate": 0.00019871489820901697, "loss": 6.3605, "step": 47380 }, { "epoch": 5.702767749699158, "grad_norm": 14.224370002746582, "learning_rate": 0.0001987142902051824, "loss": 6.4596, "step": 47390 }, { "epoch": 5.703971119133574, "grad_norm": 11.61376953125, "learning_rate": 0.00019871368205848388, "loss": 6.4664, "step": 47400 }, { "epoch": 5.70517448856799, "grad_norm": 26.754058837890625, "learning_rate": 0.00019871307376892233, "loss": 6.509, "step": 47410 }, { "epoch": 5.706377858002407, "grad_norm": 30.677982330322266, "learning_rate": 0.00019871246533649853, "loss": 6.6542, "step": 47420 }, { "epoch": 5.707581227436823, "grad_norm": 64.0774154663086, "learning_rate": 0.00019871185676121345, "loss": 6.5922, "step": 47430 }, { "epoch": 5.708784596871239, "grad_norm": 34.35859680175781, "learning_rate": 0.00019871124804306795, "loss": 6.6286, "step": 47440 }, { "epoch": 5.709987966305656, "grad_norm": 22.575218200683594, "learning_rate": 0.0001987106391820629, "loss": 6.5684, "step": 47450 }, { "epoch": 5.7111913357400725, "grad_norm": 23.261507034301758, "learning_rate": 0.0001987100301781992, "loss": 6.4519, "step": 47460 }, { "epoch": 5.712394705174488, "grad_norm": 31.767271041870117, "learning_rate": 0.00019870942103147768, "loss": 6.5099, "step": 47470 }, { "epoch": 5.713598074608905, "grad_norm": 96.95401763916016, "learning_rate": 0.00019870881174189928, "loss": 6.5382, "step": 47480 }, { "epoch": 5.714801444043322, "grad_norm": 10.955039978027344, "learning_rate": 0.00019870820230946486, "loss": 6.615, "step": 47490 }, { "epoch": 5.7160048134777375, "grad_norm": 21.750797271728516, "learning_rate": 0.00019870759273417528, "loss": 6.324, "step": 47500 }, { "epoch": 5.717208182912154, "grad_norm": 59.43056869506836, "learning_rate": 0.00019870698301603144, "loss": 6.7201, "step": 47510 }, { "epoch": 5.71841155234657, "grad_norm": 36.652042388916016, "learning_rate": 0.00019870637315503426, "loss": 6.7443, "step": 47520 }, { "epoch": 5.719614921780987, "grad_norm": 50.06197738647461, "learning_rate": 0.00019870576315118456, "loss": 6.6402, "step": 47530 }, { "epoch": 5.720818291215403, "grad_norm": 28.871110916137695, "learning_rate": 0.00019870515300448327, "loss": 6.5999, "step": 47540 }, { "epoch": 5.722021660649819, "grad_norm": 7.493163108825684, "learning_rate": 0.00019870454271493122, "loss": 6.4686, "step": 47550 }, { "epoch": 5.723225030084236, "grad_norm": 25.0213565826416, "learning_rate": 0.00019870393228252936, "loss": 6.5981, "step": 47560 }, { "epoch": 5.7244283995186525, "grad_norm": 41.69451141357422, "learning_rate": 0.00019870332170727854, "loss": 6.5718, "step": 47570 }, { "epoch": 5.725631768953068, "grad_norm": 11.689252853393555, "learning_rate": 0.00019870271098917965, "loss": 6.6932, "step": 47580 }, { "epoch": 5.726835138387485, "grad_norm": 17.256437301635742, "learning_rate": 0.00019870210012823354, "loss": 6.4876, "step": 47590 }, { "epoch": 5.728038507821902, "grad_norm": 9.13249683380127, "learning_rate": 0.00019870148912444117, "loss": 6.4415, "step": 47600 }, { "epoch": 5.729241877256317, "grad_norm": 108.54875946044922, "learning_rate": 0.0001987008779778033, "loss": 6.3982, "step": 47610 }, { "epoch": 5.730445246690734, "grad_norm": 20.35406494140625, "learning_rate": 0.00019870026668832095, "loss": 6.409, "step": 47620 }, { "epoch": 5.731648616125151, "grad_norm": 18.480241775512695, "learning_rate": 0.00019869965525599493, "loss": 6.4184, "step": 47630 }, { "epoch": 5.7328519855595665, "grad_norm": 21.412052154541016, "learning_rate": 0.00019869904368082614, "loss": 6.4053, "step": 47640 }, { "epoch": 5.734055354993983, "grad_norm": 90.26350402832031, "learning_rate": 0.00019869843196281548, "loss": 6.4532, "step": 47650 }, { "epoch": 5.7352587244284, "grad_norm": 13.593746185302734, "learning_rate": 0.0001986978201019638, "loss": 6.5318, "step": 47660 }, { "epoch": 5.736462093862816, "grad_norm": 13.714799880981445, "learning_rate": 0.00019869720809827202, "loss": 6.4598, "step": 47670 }, { "epoch": 5.737665463297232, "grad_norm": 21.631376266479492, "learning_rate": 0.00019869659595174102, "loss": 6.5457, "step": 47680 }, { "epoch": 5.738868832731649, "grad_norm": 65.28184509277344, "learning_rate": 0.00019869598366237164, "loss": 6.4729, "step": 47690 }, { "epoch": 5.740072202166065, "grad_norm": 32.66143798828125, "learning_rate": 0.00019869537123016483, "loss": 6.6336, "step": 47700 }, { "epoch": 5.7412755716004815, "grad_norm": 53.060203552246094, "learning_rate": 0.00019869475865512146, "loss": 6.3433, "step": 47710 }, { "epoch": 5.742478941034898, "grad_norm": 42.53267288208008, "learning_rate": 0.0001986941459372424, "loss": 6.436, "step": 47720 }, { "epoch": 5.743682310469314, "grad_norm": 31.560754776000977, "learning_rate": 0.00019869353307652853, "loss": 6.3752, "step": 47730 }, { "epoch": 5.744885679903731, "grad_norm": 25.02374267578125, "learning_rate": 0.0001986929200729808, "loss": 6.384, "step": 47740 }, { "epoch": 5.7460890493381465, "grad_norm": 28.127748489379883, "learning_rate": 0.00019869230692659998, "loss": 6.3578, "step": 47750 }, { "epoch": 5.747292418772563, "grad_norm": 92.90540313720703, "learning_rate": 0.00019869169363738706, "loss": 6.3625, "step": 47760 }, { "epoch": 5.74849578820698, "grad_norm": 48.94447708129883, "learning_rate": 0.0001986910802053429, "loss": 6.4407, "step": 47770 }, { "epoch": 5.749699157641396, "grad_norm": 37.32133865356445, "learning_rate": 0.00019869046663046838, "loss": 6.3567, "step": 47780 }, { "epoch": 5.750902527075812, "grad_norm": 42.19178009033203, "learning_rate": 0.00019868985291276436, "loss": 6.6661, "step": 47790 }, { "epoch": 5.752105896510229, "grad_norm": 136.2880096435547, "learning_rate": 0.00019868923905223178, "loss": 6.5138, "step": 47800 }, { "epoch": 5.753309265944645, "grad_norm": 68.13109588623047, "learning_rate": 0.00019868862504887153, "loss": 6.3546, "step": 47810 }, { "epoch": 5.754512635379061, "grad_norm": 159.12734985351562, "learning_rate": 0.00019868801090268444, "loss": 6.4623, "step": 47820 }, { "epoch": 5.755716004813478, "grad_norm": 179.521240234375, "learning_rate": 0.00019868739661367145, "loss": 6.7468, "step": 47830 }, { "epoch": 5.756919374247894, "grad_norm": 665.4715576171875, "learning_rate": 0.0001986867821818334, "loss": 6.9277, "step": 47840 }, { "epoch": 5.758122743682311, "grad_norm": 59.42085647583008, "learning_rate": 0.00019868616760717123, "loss": 6.5894, "step": 47850 }, { "epoch": 5.759326113116726, "grad_norm": 226.77992248535156, "learning_rate": 0.00019868555288968582, "loss": 6.5083, "step": 47860 }, { "epoch": 5.760529482551143, "grad_norm": 62.35588455200195, "learning_rate": 0.00019868493802937805, "loss": 6.5179, "step": 47870 }, { "epoch": 5.76173285198556, "grad_norm": 129.4388885498047, "learning_rate": 0.00019868432302624879, "loss": 6.3476, "step": 47880 }, { "epoch": 5.7629362214199755, "grad_norm": 76.39713287353516, "learning_rate": 0.00019868370788029898, "loss": 6.4871, "step": 47890 }, { "epoch": 5.764139590854392, "grad_norm": 71.60661315917969, "learning_rate": 0.00019868309259152945, "loss": 6.5476, "step": 47900 }, { "epoch": 5.765342960288809, "grad_norm": 70.02320098876953, "learning_rate": 0.00019868247715994114, "loss": 6.5925, "step": 47910 }, { "epoch": 5.766546329723225, "grad_norm": 32.23582077026367, "learning_rate": 0.00019868186158553494, "loss": 6.741, "step": 47920 }, { "epoch": 5.767749699157641, "grad_norm": 52.133811950683594, "learning_rate": 0.0001986812458683117, "loss": 6.4957, "step": 47930 }, { "epoch": 5.768953068592058, "grad_norm": 33.83395767211914, "learning_rate": 0.00019868063000827232, "loss": 6.5662, "step": 47940 }, { "epoch": 5.770156438026474, "grad_norm": 27.215024948120117, "learning_rate": 0.00019868001400541772, "loss": 6.5178, "step": 47950 }, { "epoch": 5.7713598074608905, "grad_norm": 42.432456970214844, "learning_rate": 0.00019867939785974875, "loss": 6.411, "step": 47960 }, { "epoch": 5.772563176895307, "grad_norm": 61.703800201416016, "learning_rate": 0.00019867878157126638, "loss": 6.5101, "step": 47970 }, { "epoch": 5.773766546329723, "grad_norm": 46.41755294799805, "learning_rate": 0.00019867816513997142, "loss": 6.3952, "step": 47980 }, { "epoch": 5.77496991576414, "grad_norm": 61.51991653442383, "learning_rate": 0.00019867754856586477, "loss": 6.454, "step": 47990 }, { "epoch": 5.776173285198556, "grad_norm": 75.86617279052734, "learning_rate": 0.00019867693184894738, "loss": 6.5089, "step": 48000 }, { "epoch": 5.777376654632972, "grad_norm": 116.02661895751953, "learning_rate": 0.0001986763149892201, "loss": 6.5398, "step": 48010 }, { "epoch": 5.778580024067389, "grad_norm": 35.84797668457031, "learning_rate": 0.00019867569798668382, "loss": 6.586, "step": 48020 }, { "epoch": 5.7797833935018055, "grad_norm": 106.19542694091797, "learning_rate": 0.00019867508084133947, "loss": 6.8426, "step": 48030 }, { "epoch": 5.780986762936221, "grad_norm": 81.0821304321289, "learning_rate": 0.0001986744635531879, "loss": 6.8483, "step": 48040 }, { "epoch": 5.782190132370638, "grad_norm": 59.08488845825195, "learning_rate": 0.00019867384612223, "loss": 7.0026, "step": 48050 }, { "epoch": 5.783393501805055, "grad_norm": 101.26488494873047, "learning_rate": 0.0001986732285484667, "loss": 6.98, "step": 48060 }, { "epoch": 5.78459687123947, "grad_norm": 37.543392181396484, "learning_rate": 0.00019867261083189887, "loss": 6.7258, "step": 48070 }, { "epoch": 5.785800240673887, "grad_norm": 25.11439323425293, "learning_rate": 0.0001986719929725274, "loss": 6.663, "step": 48080 }, { "epoch": 5.787003610108303, "grad_norm": 130.6200714111328, "learning_rate": 0.00019867137497035323, "loss": 6.7303, "step": 48090 }, { "epoch": 5.78820697954272, "grad_norm": 5723.11767578125, "learning_rate": 0.0001986707568253772, "loss": 7.0327, "step": 48100 }, { "epoch": 5.789410348977136, "grad_norm": 14.471563339233398, "learning_rate": 0.00019867013853760025, "loss": 7.2823, "step": 48110 }, { "epoch": 5.790613718411552, "grad_norm": 17.63632583618164, "learning_rate": 0.0001986695201070232, "loss": 7.1626, "step": 48120 }, { "epoch": 5.791817087845969, "grad_norm": 65.20819854736328, "learning_rate": 0.00019866890153364701, "loss": 6.791, "step": 48130 }, { "epoch": 5.793020457280385, "grad_norm": 82.6868896484375, "learning_rate": 0.00019866828281747256, "loss": 6.8117, "step": 48140 }, { "epoch": 5.794223826714801, "grad_norm": 53.12080001831055, "learning_rate": 0.00019866766395850073, "loss": 6.8404, "step": 48150 }, { "epoch": 5.795427196149218, "grad_norm": 72.31776428222656, "learning_rate": 0.00019866704495673248, "loss": 6.7715, "step": 48160 }, { "epoch": 5.7966305655836345, "grad_norm": 86.35231018066406, "learning_rate": 0.0001986664258121686, "loss": 6.8323, "step": 48170 }, { "epoch": 5.79783393501805, "grad_norm": 76.43995666503906, "learning_rate": 0.0001986658065248101, "loss": 6.8802, "step": 48180 }, { "epoch": 5.799037304452467, "grad_norm": 52.722164154052734, "learning_rate": 0.00019866518709465777, "loss": 6.7984, "step": 48190 }, { "epoch": 5.800240673886883, "grad_norm": 39.93399429321289, "learning_rate": 0.00019866456752171257, "loss": 7.1838, "step": 48200 }, { "epoch": 5.8014440433212995, "grad_norm": 9.672844886779785, "learning_rate": 0.00019866394780597538, "loss": 7.2161, "step": 48210 }, { "epoch": 5.802647412755716, "grad_norm": 15.779073715209961, "learning_rate": 0.0001986633279474471, "loss": 7.0148, "step": 48220 }, { "epoch": 5.803850782190132, "grad_norm": 15.048151016235352, "learning_rate": 0.00019866270794612862, "loss": 6.8137, "step": 48230 }, { "epoch": 5.805054151624549, "grad_norm": 7.421164512634277, "learning_rate": 0.00019866208780202084, "loss": 6.6582, "step": 48240 }, { "epoch": 5.806257521058965, "grad_norm": 12.760626792907715, "learning_rate": 0.0001986614675151247, "loss": 6.5762, "step": 48250 }, { "epoch": 5.807460890493381, "grad_norm": 46.091712951660156, "learning_rate": 0.000198660847085441, "loss": 6.5812, "step": 48260 }, { "epoch": 5.808664259927798, "grad_norm": 52.30769348144531, "learning_rate": 0.0001986602265129707, "loss": 6.7902, "step": 48270 }, { "epoch": 5.8098676293622145, "grad_norm": 20.084352493286133, "learning_rate": 0.0001986596057977147, "loss": 6.9179, "step": 48280 }, { "epoch": 5.81107099879663, "grad_norm": 8.5487699508667, "learning_rate": 0.00019865898493967392, "loss": 7.0288, "step": 48290 }, { "epoch": 5.812274368231047, "grad_norm": 53.21567916870117, "learning_rate": 0.00019865836393884924, "loss": 6.9843, "step": 48300 }, { "epoch": 5.813477737665464, "grad_norm": 26.6538028717041, "learning_rate": 0.00019865774279524153, "loss": 6.8132, "step": 48310 }, { "epoch": 5.814681107099879, "grad_norm": 29.152055740356445, "learning_rate": 0.0001986571215088517, "loss": 6.838, "step": 48320 }, { "epoch": 5.815884476534296, "grad_norm": 11.783438682556152, "learning_rate": 0.00019865650007968064, "loss": 7.1387, "step": 48330 }, { "epoch": 5.817087845968713, "grad_norm": 124.16717529296875, "learning_rate": 0.00019865587850772932, "loss": 6.9774, "step": 48340 }, { "epoch": 5.8182912154031285, "grad_norm": 43.25951385498047, "learning_rate": 0.00019865525679299854, "loss": 7.0962, "step": 48350 }, { "epoch": 5.819494584837545, "grad_norm": 28.44440460205078, "learning_rate": 0.00019865463493548926, "loss": 6.9566, "step": 48360 }, { "epoch": 5.820697954271962, "grad_norm": 8.934304237365723, "learning_rate": 0.00019865401293520237, "loss": 6.7227, "step": 48370 }, { "epoch": 5.821901323706378, "grad_norm": 8.736367225646973, "learning_rate": 0.00019865339079213877, "loss": 6.5421, "step": 48380 }, { "epoch": 5.823104693140794, "grad_norm": 13.938597679138184, "learning_rate": 0.00019865276850629935, "loss": 6.7676, "step": 48390 }, { "epoch": 5.824308062575211, "grad_norm": 15.920125007629395, "learning_rate": 0.000198652146077685, "loss": 6.6048, "step": 48400 }, { "epoch": 5.825511432009627, "grad_norm": 39.54819107055664, "learning_rate": 0.00019865152350629668, "loss": 6.5634, "step": 48410 }, { "epoch": 5.8267148014440435, "grad_norm": 360.4687805175781, "learning_rate": 0.00019865090079213522, "loss": 6.6545, "step": 48420 }, { "epoch": 5.827918170878459, "grad_norm": 67.05709838867188, "learning_rate": 0.00019865027793520158, "loss": 6.8256, "step": 48430 }, { "epoch": 5.829121540312876, "grad_norm": 14.328038215637207, "learning_rate": 0.00019864965493549658, "loss": 6.9099, "step": 48440 }, { "epoch": 5.830324909747293, "grad_norm": 25.57486915588379, "learning_rate": 0.0001986490317930212, "loss": 6.7648, "step": 48450 }, { "epoch": 5.8315282791817085, "grad_norm": 17.249244689941406, "learning_rate": 0.0001986484085077763, "loss": 6.8231, "step": 48460 }, { "epoch": 5.832731648616125, "grad_norm": 18.224124908447266, "learning_rate": 0.00019864778507976282, "loss": 6.8162, "step": 48470 }, { "epoch": 5.833935018050542, "grad_norm": 62.611942291259766, "learning_rate": 0.00019864716150898163, "loss": 6.6653, "step": 48480 }, { "epoch": 5.835138387484958, "grad_norm": 53.798423767089844, "learning_rate": 0.00019864653779543367, "loss": 6.5726, "step": 48490 }, { "epoch": 5.836341756919374, "grad_norm": 60.845558166503906, "learning_rate": 0.00019864591393911978, "loss": 6.535, "step": 48500 }, { "epoch": 5.837545126353791, "grad_norm": 317.6461486816406, "learning_rate": 0.00019864528994004092, "loss": 6.6664, "step": 48510 }, { "epoch": 5.838748495788207, "grad_norm": 59.18181610107422, "learning_rate": 0.00019864466579819795, "loss": 7.3048, "step": 48520 }, { "epoch": 5.839951865222623, "grad_norm": 23.103687286376953, "learning_rate": 0.00019864404151359182, "loss": 7.0544, "step": 48530 }, { "epoch": 5.841155234657039, "grad_norm": 13.91545581817627, "learning_rate": 0.0001986434170862234, "loss": 6.8467, "step": 48540 }, { "epoch": 5.842358604091456, "grad_norm": 35.511844635009766, "learning_rate": 0.00019864279251609356, "loss": 6.8236, "step": 48550 }, { "epoch": 5.843561973525873, "grad_norm": 37.969356536865234, "learning_rate": 0.00019864216780320329, "loss": 6.8571, "step": 48560 }, { "epoch": 5.844765342960288, "grad_norm": 19.357770919799805, "learning_rate": 0.00019864154294755345, "loss": 6.8503, "step": 48570 }, { "epoch": 5.845968712394705, "grad_norm": 82.99191284179688, "learning_rate": 0.0001986409179491449, "loss": 6.8761, "step": 48580 }, { "epoch": 5.847172081829122, "grad_norm": 57.941837310791016, "learning_rate": 0.0001986402928079786, "loss": 6.9824, "step": 48590 }, { "epoch": 5.8483754512635375, "grad_norm": 108.87654876708984, "learning_rate": 0.00019863966752405547, "loss": 6.987, "step": 48600 }, { "epoch": 5.849578820697954, "grad_norm": 85.42047882080078, "learning_rate": 0.00019863904209737641, "loss": 7.0367, "step": 48610 }, { "epoch": 5.850782190132371, "grad_norm": 48.762367248535156, "learning_rate": 0.00019863841652794227, "loss": 6.9113, "step": 48620 }, { "epoch": 5.851985559566787, "grad_norm": 65.24446868896484, "learning_rate": 0.000198637790815754, "loss": 6.9669, "step": 48630 }, { "epoch": 5.853188929001203, "grad_norm": 29.887727737426758, "learning_rate": 0.00019863716496081247, "loss": 7.2278, "step": 48640 }, { "epoch": 5.85439229843562, "grad_norm": 43.98811721801758, "learning_rate": 0.00019863653896311863, "loss": 7.1588, "step": 48650 }, { "epoch": 5.855595667870036, "grad_norm": 55.22576904296875, "learning_rate": 0.00019863591282267336, "loss": 7.1078, "step": 48660 }, { "epoch": 5.8567990373044525, "grad_norm": 67.01032257080078, "learning_rate": 0.00019863528653947757, "loss": 6.8984, "step": 48670 }, { "epoch": 5.858002406738869, "grad_norm": 19.69286346435547, "learning_rate": 0.00019863466011353214, "loss": 6.7313, "step": 48680 }, { "epoch": 5.859205776173285, "grad_norm": 31.302631378173828, "learning_rate": 0.00019863403354483805, "loss": 6.915, "step": 48690 }, { "epoch": 5.860409145607702, "grad_norm": 50.290992736816406, "learning_rate": 0.00019863340683339616, "loss": 6.9377, "step": 48700 }, { "epoch": 5.861612515042118, "grad_norm": 15.987544059753418, "learning_rate": 0.00019863277997920733, "loss": 6.9527, "step": 48710 }, { "epoch": 5.862815884476534, "grad_norm": 26.964595794677734, "learning_rate": 0.00019863215298227257, "loss": 6.7991, "step": 48720 }, { "epoch": 5.864019253910951, "grad_norm": 30.559459686279297, "learning_rate": 0.0001986315258425927, "loss": 6.7525, "step": 48730 }, { "epoch": 5.8652226233453675, "grad_norm": 9.94189739227295, "learning_rate": 0.00019863089856016867, "loss": 6.6273, "step": 48740 }, { "epoch": 5.866425992779783, "grad_norm": 20.493711471557617, "learning_rate": 0.0001986302711350014, "loss": 6.7033, "step": 48750 }, { "epoch": 5.8676293622142, "grad_norm": 96.32124328613281, "learning_rate": 0.00019862964356709176, "loss": 6.6239, "step": 48760 }, { "epoch": 5.868832731648616, "grad_norm": 47.75698471069336, "learning_rate": 0.00019862901585644064, "loss": 6.6847, "step": 48770 }, { "epoch": 5.870036101083032, "grad_norm": 218.04502868652344, "learning_rate": 0.00019862838800304902, "loss": 6.6552, "step": 48780 }, { "epoch": 5.871239470517449, "grad_norm": 11.607353210449219, "learning_rate": 0.00019862776000691777, "loss": 6.8557, "step": 48790 }, { "epoch": 5.872442839951865, "grad_norm": 17.712568283081055, "learning_rate": 0.0001986271318680478, "loss": 6.8654, "step": 48800 }, { "epoch": 5.873646209386282, "grad_norm": 7.156992435455322, "learning_rate": 0.00019862650358644005, "loss": 6.7159, "step": 48810 }, { "epoch": 5.874849578820698, "grad_norm": 22.025548934936523, "learning_rate": 0.00019862587516209534, "loss": 6.6292, "step": 48820 }, { "epoch": 5.876052948255114, "grad_norm": 185.76177978515625, "learning_rate": 0.00019862524659501466, "loss": 6.6061, "step": 48830 }, { "epoch": 5.877256317689531, "grad_norm": 24.29646110534668, "learning_rate": 0.00019862461788519893, "loss": 6.635, "step": 48840 }, { "epoch": 5.878459687123947, "grad_norm": 215.8610076904297, "learning_rate": 0.00019862398903264898, "loss": 6.5595, "step": 48850 }, { "epoch": 5.879663056558363, "grad_norm": 96.23988342285156, "learning_rate": 0.0001986233600373658, "loss": 6.4922, "step": 48860 }, { "epoch": 5.88086642599278, "grad_norm": 36.56184768676758, "learning_rate": 0.00019862273089935026, "loss": 6.6663, "step": 48870 }, { "epoch": 5.882069795427196, "grad_norm": 32.58598709106445, "learning_rate": 0.00019862210161860329, "loss": 6.6444, "step": 48880 }, { "epoch": 5.883273164861612, "grad_norm": 17.777141571044922, "learning_rate": 0.00019862147219512578, "loss": 6.8146, "step": 48890 }, { "epoch": 5.884476534296029, "grad_norm": 77.6886978149414, "learning_rate": 0.00019862084262891865, "loss": 6.6615, "step": 48900 }, { "epoch": 5.885679903730445, "grad_norm": 34.8476676940918, "learning_rate": 0.0001986202129199828, "loss": 6.7911, "step": 48910 }, { "epoch": 5.8868832731648615, "grad_norm": 52.74646759033203, "learning_rate": 0.00019861958306831916, "loss": 6.6983, "step": 48920 }, { "epoch": 5.888086642599278, "grad_norm": 50.859527587890625, "learning_rate": 0.00019861895307392866, "loss": 6.6, "step": 48930 }, { "epoch": 5.889290012033694, "grad_norm": 133.7000732421875, "learning_rate": 0.00019861832293681215, "loss": 6.6405, "step": 48940 }, { "epoch": 5.890493381468111, "grad_norm": 37.14678955078125, "learning_rate": 0.0001986176926569706, "loss": 6.5676, "step": 48950 }, { "epoch": 5.891696750902527, "grad_norm": 55.223114013671875, "learning_rate": 0.0001986170622344049, "loss": 6.6399, "step": 48960 }, { "epoch": 5.892900120336943, "grad_norm": 15.49814224243164, "learning_rate": 0.00019861643166911596, "loss": 6.7189, "step": 48970 }, { "epoch": 5.89410348977136, "grad_norm": 8.983789443969727, "learning_rate": 0.0001986158009611047, "loss": 6.6321, "step": 48980 }, { "epoch": 5.8953068592057765, "grad_norm": 10.405715942382812, "learning_rate": 0.00019861517011037202, "loss": 6.6703, "step": 48990 }, { "epoch": 5.896510228640192, "grad_norm": 6.647861957550049, "learning_rate": 0.00019861453911691887, "loss": 6.6884, "step": 49000 }, { "epoch": 5.897713598074609, "grad_norm": 29.058887481689453, "learning_rate": 0.00019861390798074608, "loss": 6.5258, "step": 49010 }, { "epoch": 5.898916967509026, "grad_norm": 22.239561080932617, "learning_rate": 0.00019861327670185464, "loss": 6.6583, "step": 49020 }, { "epoch": 5.900120336943441, "grad_norm": 154.3556671142578, "learning_rate": 0.00019861264528024547, "loss": 6.6688, "step": 49030 }, { "epoch": 5.901323706377858, "grad_norm": 21.16986083984375, "learning_rate": 0.00019861201371591943, "loss": 6.7152, "step": 49040 }, { "epoch": 5.902527075812275, "grad_norm": 90.42473602294922, "learning_rate": 0.00019861138200887748, "loss": 6.6219, "step": 49050 }, { "epoch": 5.9037304452466906, "grad_norm": 17.86294174194336, "learning_rate": 0.0001986107501591205, "loss": 6.6203, "step": 49060 }, { "epoch": 5.904933814681107, "grad_norm": 14.435193061828613, "learning_rate": 0.0001986101181666494, "loss": 6.5803, "step": 49070 }, { "epoch": 5.906137184115524, "grad_norm": 8.926048278808594, "learning_rate": 0.00019860948603146514, "loss": 6.4409, "step": 49080 }, { "epoch": 5.90734055354994, "grad_norm": 43.382713317871094, "learning_rate": 0.0001986088537535686, "loss": 6.5406, "step": 49090 }, { "epoch": 5.908543922984356, "grad_norm": 18.887489318847656, "learning_rate": 0.00019860822133296071, "loss": 6.5015, "step": 49100 }, { "epoch": 5.909747292418773, "grad_norm": 49.34949493408203, "learning_rate": 0.00019860758876964237, "loss": 6.6104, "step": 49110 }, { "epoch": 5.910950661853189, "grad_norm": 13.59429931640625, "learning_rate": 0.0001986069560636145, "loss": 6.7589, "step": 49120 }, { "epoch": 5.9121540312876055, "grad_norm": 33.782684326171875, "learning_rate": 0.00019860632321487803, "loss": 6.686, "step": 49130 }, { "epoch": 5.913357400722021, "grad_norm": 9.934277534484863, "learning_rate": 0.00019860569022343386, "loss": 6.7796, "step": 49140 }, { "epoch": 5.914560770156438, "grad_norm": 27.600479125976562, "learning_rate": 0.0001986050570892829, "loss": 6.5805, "step": 49150 }, { "epoch": 5.915764139590855, "grad_norm": 20.450551986694336, "learning_rate": 0.0001986044238124261, "loss": 6.6734, "step": 49160 }, { "epoch": 5.9169675090252705, "grad_norm": 10.983992576599121, "learning_rate": 0.00019860379039286433, "loss": 6.5625, "step": 49170 }, { "epoch": 5.918170878459687, "grad_norm": 22.772666931152344, "learning_rate": 0.00019860315683059853, "loss": 6.5695, "step": 49180 }, { "epoch": 5.919374247894104, "grad_norm": 88.57467651367188, "learning_rate": 0.00019860252312562963, "loss": 6.5864, "step": 49190 }, { "epoch": 5.92057761732852, "grad_norm": 200.0468292236328, "learning_rate": 0.00019860188927795855, "loss": 6.6279, "step": 49200 }, { "epoch": 5.921780986762936, "grad_norm": 57.41989517211914, "learning_rate": 0.00019860125528758617, "loss": 6.7301, "step": 49210 }, { "epoch": 5.922984356197353, "grad_norm": 29.92462158203125, "learning_rate": 0.00019860062115451343, "loss": 6.7402, "step": 49220 }, { "epoch": 5.924187725631769, "grad_norm": 71.5514144897461, "learning_rate": 0.00019859998687874126, "loss": 6.6112, "step": 49230 }, { "epoch": 5.925391095066185, "grad_norm": 18.483539581298828, "learning_rate": 0.00019859935246027054, "loss": 6.6185, "step": 49240 }, { "epoch": 5.926594464500601, "grad_norm": 11.909351348876953, "learning_rate": 0.00019859871789910228, "loss": 6.5832, "step": 49250 }, { "epoch": 5.927797833935018, "grad_norm": 11.49924373626709, "learning_rate": 0.00019859808319523724, "loss": 6.6664, "step": 49260 }, { "epoch": 5.929001203369435, "grad_norm": 17.225391387939453, "learning_rate": 0.0001985974483486765, "loss": 6.5571, "step": 49270 }, { "epoch": 5.93020457280385, "grad_norm": 39.27605056762695, "learning_rate": 0.00019859681335942088, "loss": 6.7195, "step": 49280 }, { "epoch": 5.931407942238267, "grad_norm": 33.30513381958008, "learning_rate": 0.00019859617822747132, "loss": 6.7147, "step": 49290 }, { "epoch": 5.932611311672684, "grad_norm": 28.122390747070312, "learning_rate": 0.00019859554295282878, "loss": 6.6049, "step": 49300 }, { "epoch": 5.9338146811070995, "grad_norm": 122.17819213867188, "learning_rate": 0.00019859490753549412, "loss": 6.657, "step": 49310 }, { "epoch": 5.935018050541516, "grad_norm": 199.0071563720703, "learning_rate": 0.00019859427197546829, "loss": 6.8036, "step": 49320 }, { "epoch": 5.936221419975933, "grad_norm": 35.690677642822266, "learning_rate": 0.0001985936362727522, "loss": 6.7938, "step": 49330 }, { "epoch": 5.937424789410349, "grad_norm": 43.040279388427734, "learning_rate": 0.0001985930004273468, "loss": 6.863, "step": 49340 }, { "epoch": 5.938628158844765, "grad_norm": 41.64094161987305, "learning_rate": 0.00019859236443925298, "loss": 6.6813, "step": 49350 }, { "epoch": 5.939831528279182, "grad_norm": 66.19989776611328, "learning_rate": 0.00019859172830847166, "loss": 6.6297, "step": 49360 }, { "epoch": 5.941034897713598, "grad_norm": 142.6940460205078, "learning_rate": 0.00019859109203500377, "loss": 6.6443, "step": 49370 }, { "epoch": 5.9422382671480145, "grad_norm": 77.23341369628906, "learning_rate": 0.00019859045561885022, "loss": 6.6949, "step": 49380 }, { "epoch": 5.943441636582431, "grad_norm": 21.141132354736328, "learning_rate": 0.00019858981906001194, "loss": 6.6877, "step": 49390 }, { "epoch": 5.944645006016847, "grad_norm": 14.902994155883789, "learning_rate": 0.00019858918235848987, "loss": 6.6521, "step": 49400 }, { "epoch": 5.945848375451264, "grad_norm": 42.90838623046875, "learning_rate": 0.0001985885455142849, "loss": 6.5217, "step": 49410 }, { "epoch": 5.94705174488568, "grad_norm": 46.79252243041992, "learning_rate": 0.00019858790852739797, "loss": 6.5498, "step": 49420 }, { "epoch": 5.948255114320096, "grad_norm": 200.27420043945312, "learning_rate": 0.00019858727139782998, "loss": 6.8862, "step": 49430 }, { "epoch": 5.949458483754513, "grad_norm": 21.510629653930664, "learning_rate": 0.00019858663412558188, "loss": 6.9137, "step": 49440 }, { "epoch": 5.9506618531889295, "grad_norm": 240.3212432861328, "learning_rate": 0.00019858599671065456, "loss": 6.655, "step": 49450 }, { "epoch": 5.951865222623345, "grad_norm": 53.579219818115234, "learning_rate": 0.000198585359153049, "loss": 6.7256, "step": 49460 }, { "epoch": 5.953068592057762, "grad_norm": 263.5654602050781, "learning_rate": 0.00019858472145276607, "loss": 6.8495, "step": 49470 }, { "epoch": 5.954271961492178, "grad_norm": 56.07474899291992, "learning_rate": 0.00019858408360980668, "loss": 7.0469, "step": 49480 }, { "epoch": 5.955475330926594, "grad_norm": 9.706421852111816, "learning_rate": 0.0001985834456241718, "loss": 7.0029, "step": 49490 }, { "epoch": 5.956678700361011, "grad_norm": 21.957490921020508, "learning_rate": 0.00019858280749586235, "loss": 6.8408, "step": 49500 }, { "epoch": 5.957882069795427, "grad_norm": 28.383865356445312, "learning_rate": 0.00019858216922487922, "loss": 6.9464, "step": 49510 }, { "epoch": 5.959085439229844, "grad_norm": 38.81053161621094, "learning_rate": 0.00019858153081122337, "loss": 6.9604, "step": 49520 }, { "epoch": 5.96028880866426, "grad_norm": 42.365753173828125, "learning_rate": 0.00019858089225489569, "loss": 6.874, "step": 49530 }, { "epoch": 5.961492178098676, "grad_norm": 85.8763656616211, "learning_rate": 0.0001985802535558971, "loss": 6.7034, "step": 49540 }, { "epoch": 5.962695547533093, "grad_norm": 37.90697479248047, "learning_rate": 0.0001985796147142286, "loss": 6.8336, "step": 49550 }, { "epoch": 5.963898916967509, "grad_norm": 43.58564376831055, "learning_rate": 0.00019857897572989104, "loss": 6.8717, "step": 49560 }, { "epoch": 5.965102286401925, "grad_norm": 60.161529541015625, "learning_rate": 0.00019857833660288533, "loss": 6.8313, "step": 49570 }, { "epoch": 5.966305655836342, "grad_norm": 68.186767578125, "learning_rate": 0.00019857769733321246, "loss": 6.7628, "step": 49580 }, { "epoch": 5.967509025270758, "grad_norm": 444.3592529296875, "learning_rate": 0.0001985770579208733, "loss": 6.7427, "step": 49590 }, { "epoch": 5.968712394705174, "grad_norm": 143.87109375, "learning_rate": 0.0001985764183658688, "loss": 6.8694, "step": 49600 }, { "epoch": 5.969915764139591, "grad_norm": 364.8321838378906, "learning_rate": 0.00019857577866819992, "loss": 6.8645, "step": 49610 }, { "epoch": 5.971119133574007, "grad_norm": 589.8414916992188, "learning_rate": 0.00019857513882786754, "loss": 7.0478, "step": 49620 }, { "epoch": 5.9723225030084235, "grad_norm": 1590.6165771484375, "learning_rate": 0.00019857449884487258, "loss": 7.4665, "step": 49630 }, { "epoch": 5.97352587244284, "grad_norm": 58.226348876953125, "learning_rate": 0.000198573858719216, "loss": 7.8009, "step": 49640 }, { "epoch": 5.974729241877256, "grad_norm": 287.2764892578125, "learning_rate": 0.00019857321845089868, "loss": 7.7155, "step": 49650 }, { "epoch": 5.975932611311673, "grad_norm": 37.035423278808594, "learning_rate": 0.0001985725780399216, "loss": 7.5602, "step": 49660 }, { "epoch": 5.977135980746089, "grad_norm": 8.563516616821289, "learning_rate": 0.00019857193748628566, "loss": 7.5437, "step": 49670 }, { "epoch": 5.978339350180505, "grad_norm": 6.727244853973389, "learning_rate": 0.00019857129678999181, "loss": 7.6304, "step": 49680 }, { "epoch": 5.979542719614922, "grad_norm": 17.694631576538086, "learning_rate": 0.00019857065595104093, "loss": 7.7271, "step": 49690 }, { "epoch": 5.9807460890493385, "grad_norm": 17.664844512939453, "learning_rate": 0.00019857001496943397, "loss": 7.6097, "step": 49700 }, { "epoch": 5.981949458483754, "grad_norm": 26.714693069458008, "learning_rate": 0.00019856937384517186, "loss": 7.4509, "step": 49710 }, { "epoch": 5.983152827918171, "grad_norm": 8.991143226623535, "learning_rate": 0.00019856873257825556, "loss": 7.3315, "step": 49720 }, { "epoch": 5.984356197352588, "grad_norm": 6.968205451965332, "learning_rate": 0.00019856809116868592, "loss": 7.6477, "step": 49730 }, { "epoch": 5.985559566787003, "grad_norm": 14.680328369140625, "learning_rate": 0.00019856744961646396, "loss": 7.6382, "step": 49740 }, { "epoch": 5.98676293622142, "grad_norm": 29.047746658325195, "learning_rate": 0.00019856680792159056, "loss": 7.4855, "step": 49750 }, { "epoch": 5.987966305655837, "grad_norm": 5.303946018218994, "learning_rate": 0.00019856616608406665, "loss": 7.6234, "step": 49760 }, { "epoch": 5.9891696750902526, "grad_norm": 3.9366724491119385, "learning_rate": 0.00019856552410389315, "loss": 7.4582, "step": 49770 }, { "epoch": 5.990373044524669, "grad_norm": 5.2910919189453125, "learning_rate": 0.000198564881981071, "loss": 7.3924, "step": 49780 }, { "epoch": 5.991576413959086, "grad_norm": 28.09467124938965, "learning_rate": 0.00019856423971560113, "loss": 7.3908, "step": 49790 }, { "epoch": 5.992779783393502, "grad_norm": 1442.5479736328125, "learning_rate": 0.0001985635973074845, "loss": 7.6586, "step": 49800 }, { "epoch": 5.993983152827918, "grad_norm": 34.08098220825195, "learning_rate": 0.000198562954756722, "loss": 7.5497, "step": 49810 }, { "epoch": 5.995186522262334, "grad_norm": 94.67636108398438, "learning_rate": 0.00019856231206331456, "loss": 7.4068, "step": 49820 }, { "epoch": 5.996389891696751, "grad_norm": 40.775840759277344, "learning_rate": 0.0001985616692272631, "loss": 7.2559, "step": 49830 }, { "epoch": 5.9975932611311675, "grad_norm": 149.15672302246094, "learning_rate": 0.0001985610262485686, "loss": 7.144, "step": 49840 }, { "epoch": 5.998796630565583, "grad_norm": 458.82275390625, "learning_rate": 0.00019856038312723192, "loss": 8.0931, "step": 49850 }, { "epoch": 6.0, "grad_norm": 65.29741668701172, "learning_rate": 0.00019855973986325408, "loss": 7.7783, "step": 49860 }, { "epoch": 6.0, "eval_loss": 8.033663749694824, "eval_runtime": 119.4338, "eval_samples_per_second": 61.85, "eval_steps_per_second": 7.737, "step": 49860 }, { "epoch": 6.001203369434417, "grad_norm": 41.65032958984375, "learning_rate": 0.00019855909645663595, "loss": 7.717, "step": 49870 }, { "epoch": 6.0024067388688325, "grad_norm": 168.38162231445312, "learning_rate": 0.00019855845290737848, "loss": 7.577, "step": 49880 }, { "epoch": 6.003610108303249, "grad_norm": 43.669158935546875, "learning_rate": 0.0001985578092154826, "loss": 7.3283, "step": 49890 }, { "epoch": 6.004813477737666, "grad_norm": 128.16262817382812, "learning_rate": 0.00019855716538094923, "loss": 7.3191, "step": 49900 }, { "epoch": 6.006016847172082, "grad_norm": 82.70264434814453, "learning_rate": 0.00019855652140377927, "loss": 7.3742, "step": 49910 }, { "epoch": 6.007220216606498, "grad_norm": 107.43938446044922, "learning_rate": 0.00019855587728397374, "loss": 7.2678, "step": 49920 }, { "epoch": 6.008423586040915, "grad_norm": 199.47250366210938, "learning_rate": 0.0001985552330215335, "loss": 7.0853, "step": 49930 }, { "epoch": 6.009626955475331, "grad_norm": 44.25736618041992, "learning_rate": 0.0001985545886164595, "loss": 7.1751, "step": 49940 }, { "epoch": 6.0108303249097474, "grad_norm": 29.525846481323242, "learning_rate": 0.0001985539440687527, "loss": 7.16, "step": 49950 }, { "epoch": 6.012033694344163, "grad_norm": 35.5792350769043, "learning_rate": 0.000198553299378414, "loss": 7.0683, "step": 49960 }, { "epoch": 6.01323706377858, "grad_norm": 155.37547302246094, "learning_rate": 0.00019855265454544433, "loss": 7.0179, "step": 49970 }, { "epoch": 6.014440433212997, "grad_norm": 24.824987411499023, "learning_rate": 0.00019855200956984465, "loss": 6.9366, "step": 49980 }, { "epoch": 6.015643802647412, "grad_norm": 33.48954772949219, "learning_rate": 0.00019855136445161588, "loss": 7.1316, "step": 49990 }, { "epoch": 6.016847172081829, "grad_norm": 51.579593658447266, "learning_rate": 0.00019855071919075895, "loss": 6.9995, "step": 50000 }, { "epoch": 6.018050541516246, "grad_norm": 21.164052963256836, "learning_rate": 0.00019855007378727482, "loss": 6.96, "step": 50010 }, { "epoch": 6.0192539109506615, "grad_norm": 26.542234420776367, "learning_rate": 0.00019854942824116436, "loss": 6.9492, "step": 50020 }, { "epoch": 6.020457280385078, "grad_norm": 21.792335510253906, "learning_rate": 0.0001985487825524286, "loss": 6.8295, "step": 50030 }, { "epoch": 6.021660649819495, "grad_norm": 83.90550231933594, "learning_rate": 0.00019854813672106838, "loss": 6.746, "step": 50040 }, { "epoch": 6.022864019253911, "grad_norm": 28.177583694458008, "learning_rate": 0.0001985474907470847, "loss": 6.6812, "step": 50050 }, { "epoch": 6.024067388688327, "grad_norm": 100.0069808959961, "learning_rate": 0.00019854684463047843, "loss": 6.6394, "step": 50060 }, { "epoch": 6.025270758122744, "grad_norm": 17.259765625, "learning_rate": 0.00019854619837125057, "loss": 6.7483, "step": 50070 }, { "epoch": 6.02647412755716, "grad_norm": 41.67375564575195, "learning_rate": 0.00019854555196940202, "loss": 6.8623, "step": 50080 }, { "epoch": 6.0276774969915765, "grad_norm": 38.76065444946289, "learning_rate": 0.00019854490542493375, "loss": 6.8325, "step": 50090 }, { "epoch": 6.028880866425993, "grad_norm": 20.695940017700195, "learning_rate": 0.00019854425873784667, "loss": 7.3002, "step": 50100 }, { "epoch": 6.030084235860409, "grad_norm": 34.97782516479492, "learning_rate": 0.00019854361190814172, "loss": 7.2257, "step": 50110 }, { "epoch": 6.031287605294826, "grad_norm": 43.32956314086914, "learning_rate": 0.00019854296493581984, "loss": 7.3992, "step": 50120 }, { "epoch": 6.0324909747292415, "grad_norm": 124.94662475585938, "learning_rate": 0.00019854231782088193, "loss": 6.9233, "step": 50130 }, { "epoch": 6.033694344163658, "grad_norm": 356.6945495605469, "learning_rate": 0.00019854167056332895, "loss": 7.1386, "step": 50140 }, { "epoch": 6.034897713598075, "grad_norm": 1409.9876708984375, "learning_rate": 0.00019854102316316187, "loss": 7.4419, "step": 50150 }, { "epoch": 6.036101083032491, "grad_norm": 456.3817138671875, "learning_rate": 0.0001985403756203816, "loss": 7.7544, "step": 50160 }, { "epoch": 6.037304452466907, "grad_norm": 134.14495849609375, "learning_rate": 0.00019853972793498908, "loss": 8.5643, "step": 50170 }, { "epoch": 6.038507821901324, "grad_norm": 103.23419189453125, "learning_rate": 0.00019853908010698522, "loss": 7.7766, "step": 50180 }, { "epoch": 6.03971119133574, "grad_norm": 145.68394470214844, "learning_rate": 0.00019853843213637101, "loss": 7.4989, "step": 50190 }, { "epoch": 6.040914560770156, "grad_norm": 10.152735710144043, "learning_rate": 0.00019853778402314736, "loss": 7.5882, "step": 50200 }, { "epoch": 6.042117930204573, "grad_norm": 15.388716697692871, "learning_rate": 0.00019853713576731518, "loss": 7.5852, "step": 50210 }, { "epoch": 6.043321299638989, "grad_norm": 10.143814086914062, "learning_rate": 0.00019853648736887545, "loss": 7.5448, "step": 50220 }, { "epoch": 6.044524669073406, "grad_norm": 76.52864837646484, "learning_rate": 0.00019853583882782912, "loss": 7.5189, "step": 50230 }, { "epoch": 6.045728038507822, "grad_norm": 675.9168701171875, "learning_rate": 0.00019853519014417708, "loss": 7.5494, "step": 50240 }, { "epoch": 6.046931407942238, "grad_norm": 22.926326751708984, "learning_rate": 0.00019853454131792028, "loss": 7.8772, "step": 50250 }, { "epoch": 6.048134777376655, "grad_norm": 57.346290588378906, "learning_rate": 0.0001985338923490597, "loss": 7.6679, "step": 50260 }, { "epoch": 6.049338146811071, "grad_norm": 14.811667442321777, "learning_rate": 0.00019853324323759622, "loss": 7.5137, "step": 50270 }, { "epoch": 6.050541516245487, "grad_norm": 4.689547061920166, "learning_rate": 0.00019853259398353083, "loss": 7.5534, "step": 50280 }, { "epoch": 6.051744885679904, "grad_norm": 27.728551864624023, "learning_rate": 0.00019853194458686446, "loss": 7.3619, "step": 50290 }, { "epoch": 6.05294825511432, "grad_norm": 30.655689239501953, "learning_rate": 0.000198531295047598, "loss": 7.3617, "step": 50300 }, { "epoch": 6.054151624548736, "grad_norm": 48.561012268066406, "learning_rate": 0.00019853064536573246, "loss": 7.4353, "step": 50310 }, { "epoch": 6.055354993983153, "grad_norm": 155.85556030273438, "learning_rate": 0.00019852999554126878, "loss": 7.241, "step": 50320 }, { "epoch": 6.056558363417569, "grad_norm": 73.67689514160156, "learning_rate": 0.0001985293455742078, "loss": 7.2979, "step": 50330 }, { "epoch": 6.0577617328519855, "grad_norm": 90.59698486328125, "learning_rate": 0.00019852869546455057, "loss": 7.9339, "step": 50340 }, { "epoch": 6.058965102286402, "grad_norm": 415.32452392578125, "learning_rate": 0.000198528045212298, "loss": 7.5561, "step": 50350 }, { "epoch": 6.060168471720818, "grad_norm": 161.16436767578125, "learning_rate": 0.00019852739481745102, "loss": 7.2972, "step": 50360 }, { "epoch": 6.061371841155235, "grad_norm": 313.7480163574219, "learning_rate": 0.00019852674428001054, "loss": 7.3981, "step": 50370 }, { "epoch": 6.062575210589651, "grad_norm": 105.19052124023438, "learning_rate": 0.00019852609359997757, "loss": 7.4124, "step": 50380 }, { "epoch": 6.063778580024067, "grad_norm": 53.43791198730469, "learning_rate": 0.00019852544277735305, "loss": 7.2467, "step": 50390 }, { "epoch": 6.064981949458484, "grad_norm": 1362.7615966796875, "learning_rate": 0.0001985247918121378, "loss": 7.4411, "step": 50400 }, { "epoch": 6.0661853188929005, "grad_norm": 50.751976013183594, "learning_rate": 0.00019852414070433292, "loss": 7.5127, "step": 50410 }, { "epoch": 6.067388688327316, "grad_norm": 124.1102523803711, "learning_rate": 0.00019852348945393925, "loss": 7.4167, "step": 50420 }, { "epoch": 6.068592057761733, "grad_norm": 63.0703125, "learning_rate": 0.00019852283806095778, "loss": 7.4491, "step": 50430 }, { "epoch": 6.06979542719615, "grad_norm": 9.66748332977295, "learning_rate": 0.00019852218652538945, "loss": 7.4479, "step": 50440 }, { "epoch": 6.070998796630565, "grad_norm": 4.118216514587402, "learning_rate": 0.00019852153484723516, "loss": 7.5609, "step": 50450 }, { "epoch": 6.072202166064982, "grad_norm": 7.636483192443848, "learning_rate": 0.0001985208830264959, "loss": 7.5435, "step": 50460 }, { "epoch": 6.073405535499398, "grad_norm": 10.706920623779297, "learning_rate": 0.0001985202310631726, "loss": 7.2436, "step": 50470 }, { "epoch": 6.074608904933815, "grad_norm": 5.278939247131348, "learning_rate": 0.00019851957895726622, "loss": 7.2387, "step": 50480 }, { "epoch": 6.075812274368231, "grad_norm": 18.34695816040039, "learning_rate": 0.00019851892670877765, "loss": 7.0852, "step": 50490 }, { "epoch": 6.077015643802647, "grad_norm": 266.5223693847656, "learning_rate": 0.0001985182743177079, "loss": 7.1159, "step": 50500 }, { "epoch": 6.078219013237064, "grad_norm": 18.489425659179688, "learning_rate": 0.00019851762178405786, "loss": 7.0828, "step": 50510 }, { "epoch": 6.07942238267148, "grad_norm": 8.026512145996094, "learning_rate": 0.00019851696910782853, "loss": 6.9848, "step": 50520 }, { "epoch": 6.080625752105896, "grad_norm": 7.6296610832214355, "learning_rate": 0.0001985163162890208, "loss": 6.9406, "step": 50530 }, { "epoch": 6.081829121540313, "grad_norm": 8.11501693725586, "learning_rate": 0.00019851566332763562, "loss": 6.8414, "step": 50540 }, { "epoch": 6.0830324909747295, "grad_norm": 15.515129089355469, "learning_rate": 0.000198515010223674, "loss": 6.9176, "step": 50550 }, { "epoch": 6.084235860409145, "grad_norm": 5.048215866088867, "learning_rate": 0.0001985143569771368, "loss": 6.7867, "step": 50560 }, { "epoch": 6.085439229843562, "grad_norm": 12.029462814331055, "learning_rate": 0.00019851370358802502, "loss": 6.7815, "step": 50570 }, { "epoch": 6.086642599277979, "grad_norm": 11.015639305114746, "learning_rate": 0.00019851305005633957, "loss": 6.6205, "step": 50580 }, { "epoch": 6.0878459687123945, "grad_norm": 9.940922737121582, "learning_rate": 0.00019851239638208143, "loss": 6.6384, "step": 50590 }, { "epoch": 6.089049338146811, "grad_norm": 12.857138633728027, "learning_rate": 0.00019851174256525154, "loss": 6.7911, "step": 50600 }, { "epoch": 6.090252707581228, "grad_norm": 14.697220802307129, "learning_rate": 0.0001985110886058508, "loss": 6.6931, "step": 50610 }, { "epoch": 6.091456077015644, "grad_norm": 6.209646224975586, "learning_rate": 0.00019851043450388022, "loss": 6.722, "step": 50620 }, { "epoch": 6.09265944645006, "grad_norm": 4.238023281097412, "learning_rate": 0.00019850978025934073, "loss": 6.6767, "step": 50630 }, { "epoch": 6.093862815884476, "grad_norm": 12.614048957824707, "learning_rate": 0.00019850912587223324, "loss": 6.633, "step": 50640 }, { "epoch": 6.095066185318893, "grad_norm": 6.149070739746094, "learning_rate": 0.00019850847134255875, "loss": 6.5404, "step": 50650 }, { "epoch": 6.0962695547533094, "grad_norm": 7.979625701904297, "learning_rate": 0.00019850781667031818, "loss": 6.5466, "step": 50660 }, { "epoch": 6.097472924187725, "grad_norm": 7.836061477661133, "learning_rate": 0.00019850716185551245, "loss": 6.4531, "step": 50670 }, { "epoch": 6.098676293622142, "grad_norm": 9.258145332336426, "learning_rate": 0.00019850650689814256, "loss": 6.6148, "step": 50680 }, { "epoch": 6.099879663056559, "grad_norm": 7.844076156616211, "learning_rate": 0.0001985058517982094, "loss": 6.694, "step": 50690 }, { "epoch": 6.101083032490974, "grad_norm": 25.579376220703125, "learning_rate": 0.000198505196555714, "loss": 6.5015, "step": 50700 }, { "epoch": 6.102286401925391, "grad_norm": 42.913421630859375, "learning_rate": 0.00019850454117065723, "loss": 6.5634, "step": 50710 }, { "epoch": 6.103489771359808, "grad_norm": 29.641809463500977, "learning_rate": 0.00019850388564304007, "loss": 6.5993, "step": 50720 }, { "epoch": 6.1046931407942235, "grad_norm": 169.4796142578125, "learning_rate": 0.00019850322997286347, "loss": 6.5776, "step": 50730 }, { "epoch": 6.10589651022864, "grad_norm": 235.8825225830078, "learning_rate": 0.0001985025741601284, "loss": 7.1905, "step": 50740 }, { "epoch": 6.107099879663057, "grad_norm": 408.1968078613281, "learning_rate": 0.00019850191820483578, "loss": 7.678, "step": 50750 }, { "epoch": 6.108303249097473, "grad_norm": 86.42868041992188, "learning_rate": 0.00019850126210698658, "loss": 7.5839, "step": 50760 }, { "epoch": 6.109506618531889, "grad_norm": 27.87179183959961, "learning_rate": 0.00019850060586658168, "loss": 7.1222, "step": 50770 }, { "epoch": 6.110709987966306, "grad_norm": 18.759296417236328, "learning_rate": 0.0001984999494836221, "loss": 6.8401, "step": 50780 }, { "epoch": 6.111913357400722, "grad_norm": 23.341739654541016, "learning_rate": 0.0001984992929581088, "loss": 6.6808, "step": 50790 }, { "epoch": 6.1131167268351385, "grad_norm": 12.954388618469238, "learning_rate": 0.00019849863629004268, "loss": 6.6503, "step": 50800 }, { "epoch": 6.114320096269555, "grad_norm": 9.701177597045898, "learning_rate": 0.00019849797947942474, "loss": 6.613, "step": 50810 }, { "epoch": 6.115523465703971, "grad_norm": 8.269774436950684, "learning_rate": 0.00019849732252625587, "loss": 6.5937, "step": 50820 }, { "epoch": 6.116726835138388, "grad_norm": 87.7800521850586, "learning_rate": 0.00019849666543053708, "loss": 6.5692, "step": 50830 }, { "epoch": 6.1179302045728035, "grad_norm": 752.7943115234375, "learning_rate": 0.0001984960081922693, "loss": 6.763, "step": 50840 }, { "epoch": 6.11913357400722, "grad_norm": 102.5551528930664, "learning_rate": 0.00019849535081145348, "loss": 6.7909, "step": 50850 }, { "epoch": 6.120336943441637, "grad_norm": 83.97057342529297, "learning_rate": 0.00019849469328809055, "loss": 6.5917, "step": 50860 }, { "epoch": 6.121540312876053, "grad_norm": 73.70748901367188, "learning_rate": 0.00019849403562218152, "loss": 6.5882, "step": 50870 }, { "epoch": 6.122743682310469, "grad_norm": 50.015472412109375, "learning_rate": 0.00019849337781372724, "loss": 6.65, "step": 50880 }, { "epoch": 6.123947051744886, "grad_norm": 69.71155548095703, "learning_rate": 0.0001984927198627288, "loss": 6.6931, "step": 50890 }, { "epoch": 6.125150421179302, "grad_norm": 150.67527770996094, "learning_rate": 0.00019849206176918702, "loss": 6.8684, "step": 50900 }, { "epoch": 6.126353790613718, "grad_norm": 216.06704711914062, "learning_rate": 0.00019849140353310292, "loss": 7.6943, "step": 50910 }, { "epoch": 6.127557160048135, "grad_norm": 6.53950309753418, "learning_rate": 0.00019849074515447746, "loss": 7.7072, "step": 50920 }, { "epoch": 6.128760529482551, "grad_norm": 6.397497653961182, "learning_rate": 0.00019849008663331156, "loss": 7.4985, "step": 50930 }, { "epoch": 6.129963898916968, "grad_norm": 15.183296203613281, "learning_rate": 0.00019848942796960622, "loss": 7.2878, "step": 50940 }, { "epoch": 6.131167268351384, "grad_norm": 3.0410451889038086, "learning_rate": 0.0001984887691633623, "loss": 7.2639, "step": 50950 }, { "epoch": 6.1323706377858, "grad_norm": 1423.0872802734375, "learning_rate": 0.00019848811021458087, "loss": 7.554, "step": 50960 }, { "epoch": 6.133574007220217, "grad_norm": 202.90269470214844, "learning_rate": 0.00019848745112326278, "loss": 7.8729, "step": 50970 }, { "epoch": 6.1347773766546325, "grad_norm": 72.13835144042969, "learning_rate": 0.00019848679188940908, "loss": 7.7909, "step": 50980 }, { "epoch": 6.135980746089049, "grad_norm": 779.5035400390625, "learning_rate": 0.00019848613251302065, "loss": 7.6156, "step": 50990 }, { "epoch": 6.137184115523466, "grad_norm": 31.454639434814453, "learning_rate": 0.00019848547299409849, "loss": 7.65, "step": 51000 }, { "epoch": 6.138387484957882, "grad_norm": 47.65021896362305, "learning_rate": 0.00019848481333264352, "loss": 7.5042, "step": 51010 }, { "epoch": 6.139590854392298, "grad_norm": 10.336441993713379, "learning_rate": 0.0001984841535286567, "loss": 7.4727, "step": 51020 }, { "epoch": 6.140794223826715, "grad_norm": 4.083159446716309, "learning_rate": 0.00019848349358213906, "loss": 7.2821, "step": 51030 }, { "epoch": 6.141997593261131, "grad_norm": 11.308076858520508, "learning_rate": 0.0001984828334930914, "loss": 7.1853, "step": 51040 }, { "epoch": 6.1432009626955475, "grad_norm": 18.26197624206543, "learning_rate": 0.00019848217326151482, "loss": 7.2388, "step": 51050 }, { "epoch": 6.144404332129964, "grad_norm": 2207.310791015625, "learning_rate": 0.0001984815128874102, "loss": 7.2516, "step": 51060 }, { "epoch": 6.14560770156438, "grad_norm": 49.99858856201172, "learning_rate": 0.00019848085237077854, "loss": 7.4258, "step": 51070 }, { "epoch": 6.146811070998797, "grad_norm": 151.1298828125, "learning_rate": 0.00019848019171162074, "loss": 7.5135, "step": 51080 }, { "epoch": 6.148014440433213, "grad_norm": 11.325151443481445, "learning_rate": 0.00019847953090993778, "loss": 7.8672, "step": 51090 }, { "epoch": 6.149217809867629, "grad_norm": 9.93419075012207, "learning_rate": 0.00019847886996573067, "loss": 7.4075, "step": 51100 }, { "epoch": 6.150421179302046, "grad_norm": 11.671231269836426, "learning_rate": 0.0001984782088790003, "loss": 7.2521, "step": 51110 }, { "epoch": 6.1516245487364625, "grad_norm": 8.38558292388916, "learning_rate": 0.00019847754764974764, "loss": 7.0416, "step": 51120 }, { "epoch": 6.152827918170878, "grad_norm": 10.745943069458008, "learning_rate": 0.00019847688627797367, "loss": 7.0191, "step": 51130 }, { "epoch": 6.154031287605295, "grad_norm": 9.660490989685059, "learning_rate": 0.00019847622476367932, "loss": 6.8097, "step": 51140 }, { "epoch": 6.155234657039712, "grad_norm": 7.7755045890808105, "learning_rate": 0.00019847556310686557, "loss": 6.7182, "step": 51150 }, { "epoch": 6.156438026474127, "grad_norm": 374.6153869628906, "learning_rate": 0.00019847490130753338, "loss": 6.8375, "step": 51160 }, { "epoch": 6.157641395908544, "grad_norm": 253.876953125, "learning_rate": 0.00019847423936568366, "loss": 7.1427, "step": 51170 }, { "epoch": 6.15884476534296, "grad_norm": 111.31069946289062, "learning_rate": 0.0001984735772813174, "loss": 7.0288, "step": 51180 }, { "epoch": 6.160048134777377, "grad_norm": 57.93216323852539, "learning_rate": 0.0001984729150544356, "loss": 7.2709, "step": 51190 }, { "epoch": 6.161251504211793, "grad_norm": 9.835779190063477, "learning_rate": 0.00019847225268503915, "loss": 7.0617, "step": 51200 }, { "epoch": 6.162454873646209, "grad_norm": 49.00759506225586, "learning_rate": 0.00019847159017312907, "loss": 6.8884, "step": 51210 }, { "epoch": 6.163658243080626, "grad_norm": 98.64542388916016, "learning_rate": 0.00019847092751870625, "loss": 7.3326, "step": 51220 }, { "epoch": 6.164861612515042, "grad_norm": 27.575584411621094, "learning_rate": 0.00019847026472177173, "loss": 7.4802, "step": 51230 }, { "epoch": 6.166064981949458, "grad_norm": 10.02219295501709, "learning_rate": 0.0001984696017823264, "loss": 7.3218, "step": 51240 }, { "epoch": 6.167268351383875, "grad_norm": 18.55671501159668, "learning_rate": 0.00019846893870037122, "loss": 7.2338, "step": 51250 }, { "epoch": 6.1684717208182915, "grad_norm": 14.191424369812012, "learning_rate": 0.0001984682754759072, "loss": 7.009, "step": 51260 }, { "epoch": 6.169675090252707, "grad_norm": 8.056644439697266, "learning_rate": 0.00019846761210893528, "loss": 6.9959, "step": 51270 }, { "epoch": 6.170878459687124, "grad_norm": 54.10923767089844, "learning_rate": 0.0001984669485994564, "loss": 6.816, "step": 51280 }, { "epoch": 6.172081829121541, "grad_norm": 29.607463836669922, "learning_rate": 0.00019846628494747155, "loss": 6.8708, "step": 51290 }, { "epoch": 6.1732851985559565, "grad_norm": 137.29750061035156, "learning_rate": 0.00019846562115298166, "loss": 6.7867, "step": 51300 }, { "epoch": 6.174488567990373, "grad_norm": 175.81932067871094, "learning_rate": 0.0001984649572159877, "loss": 7.2438, "step": 51310 }, { "epoch": 6.17569193742479, "grad_norm": 91.62136840820312, "learning_rate": 0.00019846429313649067, "loss": 7.5339, "step": 51320 }, { "epoch": 6.176895306859206, "grad_norm": 129.83792114257812, "learning_rate": 0.0001984636289144915, "loss": 7.4754, "step": 51330 }, { "epoch": 6.178098676293622, "grad_norm": 205.80770874023438, "learning_rate": 0.0001984629645499911, "loss": 7.3861, "step": 51340 }, { "epoch": 6.179302045728038, "grad_norm": 145.01470947265625, "learning_rate": 0.0001984623000429905, "loss": 7.3334, "step": 51350 }, { "epoch": 6.180505415162455, "grad_norm": 780.2528686523438, "learning_rate": 0.00019846163539349065, "loss": 7.5486, "step": 51360 }, { "epoch": 6.1817087845968715, "grad_norm": 12.88818359375, "learning_rate": 0.0001984609706014925, "loss": 8.5478, "step": 51370 }, { "epoch": 6.182912154031287, "grad_norm": 8.204535484313965, "learning_rate": 0.00019846030566699702, "loss": 7.3626, "step": 51380 }, { "epoch": 6.184115523465704, "grad_norm": 50.635101318359375, "learning_rate": 0.00019845964059000517, "loss": 7.0688, "step": 51390 }, { "epoch": 6.185318892900121, "grad_norm": 84.38154602050781, "learning_rate": 0.00019845897537051793, "loss": 7.188, "step": 51400 }, { "epoch": 6.186522262334536, "grad_norm": 31.963542938232422, "learning_rate": 0.00019845831000853622, "loss": 7.0432, "step": 51410 }, { "epoch": 6.187725631768953, "grad_norm": 14.625631332397461, "learning_rate": 0.00019845764450406104, "loss": 7.028, "step": 51420 }, { "epoch": 6.18892900120337, "grad_norm": 19.176286697387695, "learning_rate": 0.00019845697885709332, "loss": 7.1465, "step": 51430 }, { "epoch": 6.1901323706377855, "grad_norm": 198.12120056152344, "learning_rate": 0.00019845631306763406, "loss": 7.3459, "step": 51440 }, { "epoch": 6.191335740072202, "grad_norm": 8.463104248046875, "learning_rate": 0.00019845564713568417, "loss": 7.2657, "step": 51450 }, { "epoch": 6.192539109506619, "grad_norm": 28.150205612182617, "learning_rate": 0.0001984549810612447, "loss": 7.0837, "step": 51460 }, { "epoch": 6.193742478941035, "grad_norm": 53.76273727416992, "learning_rate": 0.00019845431484431653, "loss": 7.1347, "step": 51470 }, { "epoch": 6.194945848375451, "grad_norm": 21.740747451782227, "learning_rate": 0.0001984536484849007, "loss": 7.0647, "step": 51480 }, { "epoch": 6.196149217809868, "grad_norm": 328.05584716796875, "learning_rate": 0.00019845298198299812, "loss": 7.0085, "step": 51490 }, { "epoch": 6.197352587244284, "grad_norm": 184.19383239746094, "learning_rate": 0.00019845231533860972, "loss": 7.113, "step": 51500 }, { "epoch": 6.1985559566787005, "grad_norm": 62.600467681884766, "learning_rate": 0.00019845164855173658, "loss": 7.0953, "step": 51510 }, { "epoch": 6.199759326113116, "grad_norm": 14.029105186462402, "learning_rate": 0.00019845098162237954, "loss": 7.2352, "step": 51520 }, { "epoch": 6.200962695547533, "grad_norm": 25.74992561340332, "learning_rate": 0.00019845031455053964, "loss": 7.0544, "step": 51530 }, { "epoch": 6.20216606498195, "grad_norm": 59.89943313598633, "learning_rate": 0.00019844964733621784, "loss": 7.0761, "step": 51540 }, { "epoch": 6.2033694344163655, "grad_norm": 33.79304504394531, "learning_rate": 0.0001984489799794151, "loss": 7.1421, "step": 51550 }, { "epoch": 6.204572803850782, "grad_norm": 53.30209732055664, "learning_rate": 0.00019844831248013237, "loss": 7.0327, "step": 51560 }, { "epoch": 6.205776173285199, "grad_norm": 52.294410705566406, "learning_rate": 0.0001984476448383706, "loss": 7.0361, "step": 51570 }, { "epoch": 6.206979542719615, "grad_norm": 38.50944137573242, "learning_rate": 0.00019844697705413084, "loss": 7.0574, "step": 51580 }, { "epoch": 6.208182912154031, "grad_norm": 103.11620330810547, "learning_rate": 0.00019844630912741398, "loss": 7.074, "step": 51590 }, { "epoch": 6.209386281588448, "grad_norm": 30.791276931762695, "learning_rate": 0.000198445641058221, "loss": 7.0055, "step": 51600 }, { "epoch": 6.210589651022864, "grad_norm": 54.68841552734375, "learning_rate": 0.00019844497284655287, "loss": 6.9821, "step": 51610 }, { "epoch": 6.21179302045728, "grad_norm": 62.289852142333984, "learning_rate": 0.00019844430449241052, "loss": 6.8426, "step": 51620 }, { "epoch": 6.212996389891697, "grad_norm": 11.764166831970215, "learning_rate": 0.00019844363599579498, "loss": 6.9789, "step": 51630 }, { "epoch": 6.214199759326113, "grad_norm": 15.638115882873535, "learning_rate": 0.00019844296735670722, "loss": 7.0158, "step": 51640 }, { "epoch": 6.21540312876053, "grad_norm": 38.82330322265625, "learning_rate": 0.00019844229857514818, "loss": 6.9124, "step": 51650 }, { "epoch": 6.216606498194946, "grad_norm": 29.755327224731445, "learning_rate": 0.0001984416296511188, "loss": 6.969, "step": 51660 }, { "epoch": 6.217809867629362, "grad_norm": 20.518600463867188, "learning_rate": 0.00019844096058462007, "loss": 6.834, "step": 51670 }, { "epoch": 6.219013237063779, "grad_norm": 22.286468505859375, "learning_rate": 0.000198440291375653, "loss": 6.8331, "step": 51680 }, { "epoch": 6.2202166064981945, "grad_norm": 18.754711151123047, "learning_rate": 0.00019843962202421853, "loss": 6.8883, "step": 51690 }, { "epoch": 6.221419975932611, "grad_norm": 14.924210548400879, "learning_rate": 0.0001984389525303176, "loss": 6.9185, "step": 51700 }, { "epoch": 6.222623345367028, "grad_norm": 267.7951354980469, "learning_rate": 0.00019843828289395118, "loss": 7.0813, "step": 51710 }, { "epoch": 6.223826714801444, "grad_norm": 46.562496185302734, "learning_rate": 0.0001984376131151203, "loss": 6.8518, "step": 51720 }, { "epoch": 6.22503008423586, "grad_norm": 32.995338439941406, "learning_rate": 0.00019843694319382585, "loss": 6.8675, "step": 51730 }, { "epoch": 6.226233453670277, "grad_norm": 33.15148162841797, "learning_rate": 0.0001984362731300689, "loss": 6.7144, "step": 51740 }, { "epoch": 6.227436823104693, "grad_norm": 103.49129486083984, "learning_rate": 0.00019843560292385034, "loss": 6.6703, "step": 51750 }, { "epoch": 6.2286401925391095, "grad_norm": 23.396648406982422, "learning_rate": 0.00019843493257517113, "loss": 6.8905, "step": 51760 }, { "epoch": 6.229843561973526, "grad_norm": 66.21638488769531, "learning_rate": 0.00019843426208403228, "loss": 6.8329, "step": 51770 }, { "epoch": 6.231046931407942, "grad_norm": 25.776018142700195, "learning_rate": 0.00019843359145043477, "loss": 6.7987, "step": 51780 }, { "epoch": 6.232250300842359, "grad_norm": 74.73793029785156, "learning_rate": 0.00019843292067437952, "loss": 6.7562, "step": 51790 }, { "epoch": 6.233453670276775, "grad_norm": 20.916358947753906, "learning_rate": 0.00019843224975586756, "loss": 6.8506, "step": 51800 }, { "epoch": 6.234657039711191, "grad_norm": 35.76206970214844, "learning_rate": 0.0001984315786948998, "loss": 6.6572, "step": 51810 }, { "epoch": 6.235860409145608, "grad_norm": 20.380142211914062, "learning_rate": 0.0001984309074914773, "loss": 6.746, "step": 51820 }, { "epoch": 6.2370637785800245, "grad_norm": 60.54458236694336, "learning_rate": 0.0001984302361456009, "loss": 6.7675, "step": 51830 }, { "epoch": 6.23826714801444, "grad_norm": 103.00148010253906, "learning_rate": 0.00019842956465727173, "loss": 6.8724, "step": 51840 }, { "epoch": 6.239470517448857, "grad_norm": 105.30757141113281, "learning_rate": 0.00019842889302649062, "loss": 6.7432, "step": 51850 }, { "epoch": 6.240673886883273, "grad_norm": 44.128936767578125, "learning_rate": 0.0001984282212532586, "loss": 6.7083, "step": 51860 }, { "epoch": 6.241877256317689, "grad_norm": 561.1841430664062, "learning_rate": 0.0001984275493375767, "loss": 6.9873, "step": 51870 }, { "epoch": 6.243080625752106, "grad_norm": 41.606361389160156, "learning_rate": 0.00019842687727944576, "loss": 6.873, "step": 51880 }, { "epoch": 6.244283995186522, "grad_norm": 67.96810150146484, "learning_rate": 0.00019842620507886687, "loss": 6.9031, "step": 51890 }, { "epoch": 6.245487364620939, "grad_norm": 48.38755416870117, "learning_rate": 0.00019842553273584094, "loss": 6.9124, "step": 51900 }, { "epoch": 6.246690734055355, "grad_norm": 79.95443725585938, "learning_rate": 0.00019842486025036898, "loss": 6.8622, "step": 51910 }, { "epoch": 6.247894103489771, "grad_norm": 110.63379669189453, "learning_rate": 0.00019842418762245194, "loss": 6.773, "step": 51920 }, { "epoch": 6.249097472924188, "grad_norm": 295.70623779296875, "learning_rate": 0.00019842351485209082, "loss": 6.977, "step": 51930 }, { "epoch": 6.250300842358604, "grad_norm": 324.2843017578125, "learning_rate": 0.00019842284193928654, "loss": 7.1088, "step": 51940 }, { "epoch": 6.25150421179302, "grad_norm": 128.91664123535156, "learning_rate": 0.00019842216888404013, "loss": 6.9703, "step": 51950 }, { "epoch": 6.252707581227437, "grad_norm": 90.00405883789062, "learning_rate": 0.00019842149568635254, "loss": 7.0504, "step": 51960 }, { "epoch": 6.2539109506618535, "grad_norm": 28.90699577331543, "learning_rate": 0.0001984208223462247, "loss": 7.0963, "step": 51970 }, { "epoch": 6.255114320096269, "grad_norm": 170.06979370117188, "learning_rate": 0.00019842014886365772, "loss": 7.0379, "step": 51980 }, { "epoch": 6.256317689530686, "grad_norm": 73.8154525756836, "learning_rate": 0.00019841947523865244, "loss": 7.0657, "step": 51990 }, { "epoch": 6.257521058965103, "grad_norm": 47.21464538574219, "learning_rate": 0.00019841880147120988, "loss": 6.8675, "step": 52000 }, { "epoch": 6.2587244283995185, "grad_norm": 62.993038177490234, "learning_rate": 0.000198418127561331, "loss": 6.7525, "step": 52010 }, { "epoch": 6.259927797833935, "grad_norm": 70.13294982910156, "learning_rate": 0.00019841745350901682, "loss": 6.8223, "step": 52020 }, { "epoch": 6.261131167268351, "grad_norm": 97.21419525146484, "learning_rate": 0.00019841677931426826, "loss": 6.7882, "step": 52030 }, { "epoch": 6.262334536702768, "grad_norm": 1414.242431640625, "learning_rate": 0.00019841610497708633, "loss": 7.2025, "step": 52040 }, { "epoch": 6.263537906137184, "grad_norm": 261.9495544433594, "learning_rate": 0.000198415430497472, "loss": 7.3967, "step": 52050 }, { "epoch": 6.2647412755716, "grad_norm": 248.5710906982422, "learning_rate": 0.00019841475587542627, "loss": 7.1057, "step": 52060 }, { "epoch": 6.265944645006017, "grad_norm": 253.21038818359375, "learning_rate": 0.00019841408111095004, "loss": 7.2014, "step": 52070 }, { "epoch": 6.2671480144404335, "grad_norm": 277.64862060546875, "learning_rate": 0.00019841340620404437, "loss": 7.2097, "step": 52080 }, { "epoch": 6.268351383874849, "grad_norm": 501.5820007324219, "learning_rate": 0.0001984127311547102, "loss": 7.2405, "step": 52090 }, { "epoch": 6.269554753309266, "grad_norm": 36.811588287353516, "learning_rate": 0.0001984120559629485, "loss": 7.3904, "step": 52100 }, { "epoch": 6.270758122743683, "grad_norm": 1088.7659912109375, "learning_rate": 0.0001984113806287603, "loss": 7.2532, "step": 52110 }, { "epoch": 6.271961492178098, "grad_norm": 114.97406768798828, "learning_rate": 0.0001984107051521465, "loss": 7.7797, "step": 52120 }, { "epoch": 6.273164861612515, "grad_norm": 13.400031089782715, "learning_rate": 0.00019841002953310808, "loss": 7.5005, "step": 52130 }, { "epoch": 6.274368231046932, "grad_norm": 9.023686408996582, "learning_rate": 0.0001984093537716461, "loss": 7.6065, "step": 52140 }, { "epoch": 6.2755716004813475, "grad_norm": 35.21834182739258, "learning_rate": 0.00019840867786776145, "loss": 7.6507, "step": 52150 }, { "epoch": 6.276774969915764, "grad_norm": 10.762063026428223, "learning_rate": 0.0001984080018214552, "loss": 7.7764, "step": 52160 }, { "epoch": 6.277978339350181, "grad_norm": 9.541496276855469, "learning_rate": 0.00019840732563272822, "loss": 7.4684, "step": 52170 }, { "epoch": 6.279181708784597, "grad_norm": 26.444568634033203, "learning_rate": 0.00019840664930158156, "loss": 7.5107, "step": 52180 }, { "epoch": 6.280385078219013, "grad_norm": 18.916412353515625, "learning_rate": 0.00019840597282801622, "loss": 7.5227, "step": 52190 }, { "epoch": 6.28158844765343, "grad_norm": 12.912468910217285, "learning_rate": 0.0001984052962120331, "loss": 7.4949, "step": 52200 }, { "epoch": 6.282791817087846, "grad_norm": 9.408341407775879, "learning_rate": 0.00019840461945363322, "loss": 7.2832, "step": 52210 }, { "epoch": 6.2839951865222625, "grad_norm": 27.467823028564453, "learning_rate": 0.0001984039425528176, "loss": 7.2377, "step": 52220 }, { "epoch": 6.285198555956678, "grad_norm": 32.02880096435547, "learning_rate": 0.00019840326550958715, "loss": 7.3939, "step": 52230 }, { "epoch": 6.286401925391095, "grad_norm": 15.904004096984863, "learning_rate": 0.00019840258832394287, "loss": 7.4093, "step": 52240 }, { "epoch": 6.287605294825512, "grad_norm": 30.023584365844727, "learning_rate": 0.00019840191099588576, "loss": 7.3163, "step": 52250 }, { "epoch": 6.2888086642599275, "grad_norm": 28.092247009277344, "learning_rate": 0.0001984012335254168, "loss": 7.447, "step": 52260 }, { "epoch": 6.290012033694344, "grad_norm": 6.808090686798096, "learning_rate": 0.00019840055591253698, "loss": 7.4688, "step": 52270 }, { "epoch": 6.291215403128761, "grad_norm": 38.90576934814453, "learning_rate": 0.00019839987815724723, "loss": 7.4105, "step": 52280 }, { "epoch": 6.292418772563177, "grad_norm": 14.360461235046387, "learning_rate": 0.00019839920025954854, "loss": 7.4389, "step": 52290 }, { "epoch": 6.293622141997593, "grad_norm": 31.73518180847168, "learning_rate": 0.00019839852221944195, "loss": 7.3699, "step": 52300 }, { "epoch": 6.29482551143201, "grad_norm": 9.294116973876953, "learning_rate": 0.0001983978440369284, "loss": 7.3123, "step": 52310 }, { "epoch": 6.296028880866426, "grad_norm": 10.44865608215332, "learning_rate": 0.00019839716571200887, "loss": 7.1166, "step": 52320 }, { "epoch": 6.297232250300842, "grad_norm": 12.122678756713867, "learning_rate": 0.00019839648724468437, "loss": 7.1422, "step": 52330 }, { "epoch": 6.298435619735259, "grad_norm": 22.11392593383789, "learning_rate": 0.00019839580863495582, "loss": 6.9034, "step": 52340 }, { "epoch": 6.299638989169675, "grad_norm": 46.20428466796875, "learning_rate": 0.00019839512988282428, "loss": 7.1029, "step": 52350 }, { "epoch": 6.300842358604092, "grad_norm": 425.1414794921875, "learning_rate": 0.00019839445098829067, "loss": 7.1642, "step": 52360 }, { "epoch": 6.302045728038507, "grad_norm": 61.78722381591797, "learning_rate": 0.000198393771951356, "loss": 7.6449, "step": 52370 }, { "epoch": 6.303249097472924, "grad_norm": 54.612648010253906, "learning_rate": 0.00019839309277202126, "loss": 7.4344, "step": 52380 }, { "epoch": 6.304452466907341, "grad_norm": 52.32176208496094, "learning_rate": 0.0001983924134502874, "loss": 7.1869, "step": 52390 }, { "epoch": 6.3056558363417565, "grad_norm": 137.26119995117188, "learning_rate": 0.00019839173398615546, "loss": 6.9554, "step": 52400 }, { "epoch": 6.306859205776173, "grad_norm": 83.4488754272461, "learning_rate": 0.00019839105437962636, "loss": 6.9961, "step": 52410 }, { "epoch": 6.30806257521059, "grad_norm": 300.5815734863281, "learning_rate": 0.0001983903746307011, "loss": 7.0563, "step": 52420 }, { "epoch": 6.309265944645006, "grad_norm": 119.7309341430664, "learning_rate": 0.0001983896947393807, "loss": 7.0218, "step": 52430 }, { "epoch": 6.310469314079422, "grad_norm": 143.92747497558594, "learning_rate": 0.00019838901470566614, "loss": 7.1164, "step": 52440 }, { "epoch": 6.311672683513839, "grad_norm": 181.8562469482422, "learning_rate": 0.00019838833452955835, "loss": 7.1949, "step": 52450 }, { "epoch": 6.312876052948255, "grad_norm": 62.823307037353516, "learning_rate": 0.00019838765421105836, "loss": 7.2763, "step": 52460 }, { "epoch": 6.3140794223826715, "grad_norm": 345.1018371582031, "learning_rate": 0.00019838697375016714, "loss": 7.2706, "step": 52470 }, { "epoch": 6.315282791817088, "grad_norm": 41.3524055480957, "learning_rate": 0.00019838629314688567, "loss": 7.2011, "step": 52480 }, { "epoch": 6.316486161251504, "grad_norm": 87.23824310302734, "learning_rate": 0.00019838561240121495, "loss": 7.1037, "step": 52490 }, { "epoch": 6.317689530685921, "grad_norm": 92.90904235839844, "learning_rate": 0.000198384931513156, "loss": 6.9603, "step": 52500 }, { "epoch": 6.318892900120337, "grad_norm": 78.8731460571289, "learning_rate": 0.00019838425048270968, "loss": 6.9291, "step": 52510 }, { "epoch": 6.320096269554753, "grad_norm": 27.37542724609375, "learning_rate": 0.00019838356930987713, "loss": 6.9328, "step": 52520 }, { "epoch": 6.32129963898917, "grad_norm": 15.593992233276367, "learning_rate": 0.00019838288799465924, "loss": 7.1231, "step": 52530 }, { "epoch": 6.3225030084235865, "grad_norm": 78.6000747680664, "learning_rate": 0.000198382206537057, "loss": 7.2843, "step": 52540 }, { "epoch": 6.323706377858002, "grad_norm": 165.88641357421875, "learning_rate": 0.00019838152493707146, "loss": 7.2011, "step": 52550 }, { "epoch": 6.324909747292419, "grad_norm": 32.2805061340332, "learning_rate": 0.00019838084319470355, "loss": 7.118, "step": 52560 }, { "epoch": 6.326113116726835, "grad_norm": 29.490070343017578, "learning_rate": 0.00019838016130995425, "loss": 7.1597, "step": 52570 }, { "epoch": 6.327316486161251, "grad_norm": 17.017850875854492, "learning_rate": 0.00019837947928282458, "loss": 6.9764, "step": 52580 }, { "epoch": 6.328519855595668, "grad_norm": 35.79988098144531, "learning_rate": 0.00019837879711331552, "loss": 7.027, "step": 52590 }, { "epoch": 6.329723225030084, "grad_norm": 452.3404541015625, "learning_rate": 0.00019837811480142802, "loss": 6.9008, "step": 52600 }, { "epoch": 6.330926594464501, "grad_norm": 958.8521728515625, "learning_rate": 0.00019837743234716313, "loss": 7.2571, "step": 52610 }, { "epoch": 6.332129963898917, "grad_norm": 33.366722106933594, "learning_rate": 0.0001983767497505218, "loss": 7.408, "step": 52620 }, { "epoch": 6.333333333333333, "grad_norm": 43.752098083496094, "learning_rate": 0.00019837606701150504, "loss": 7.3278, "step": 52630 }, { "epoch": 6.33453670276775, "grad_norm": 51.28098678588867, "learning_rate": 0.0001983753841301138, "loss": 7.2685, "step": 52640 }, { "epoch": 6.335740072202166, "grad_norm": 76.79080200195312, "learning_rate": 0.0001983747011063491, "loss": 7.1186, "step": 52650 }, { "epoch": 6.336943441636582, "grad_norm": 329.1863098144531, "learning_rate": 0.00019837401794021192, "loss": 7.3061, "step": 52660 }, { "epoch": 6.338146811070999, "grad_norm": 201.25904846191406, "learning_rate": 0.00019837333463170324, "loss": 7.2656, "step": 52670 }, { "epoch": 6.3393501805054155, "grad_norm": 122.38607025146484, "learning_rate": 0.00019837265118082407, "loss": 7.1808, "step": 52680 }, { "epoch": 6.340553549939831, "grad_norm": 494.30841064453125, "learning_rate": 0.00019837196758757539, "loss": 7.2204, "step": 52690 }, { "epoch": 6.341756919374248, "grad_norm": 160.0564727783203, "learning_rate": 0.0001983712838519582, "loss": 7.0689, "step": 52700 }, { "epoch": 6.342960288808664, "grad_norm": 244.82846069335938, "learning_rate": 0.00019837059997397343, "loss": 7.0221, "step": 52710 }, { "epoch": 6.3441636582430805, "grad_norm": 69.26526641845703, "learning_rate": 0.00019836991595362214, "loss": 7.0907, "step": 52720 }, { "epoch": 6.345367027677497, "grad_norm": 32.76903533935547, "learning_rate": 0.0001983692317909053, "loss": 7.0811, "step": 52730 }, { "epoch": 6.346570397111913, "grad_norm": 91.3707275390625, "learning_rate": 0.00019836854748582388, "loss": 7.0809, "step": 52740 }, { "epoch": 6.34777376654633, "grad_norm": 92.4900131225586, "learning_rate": 0.00019836786303837887, "loss": 6.9811, "step": 52750 }, { "epoch": 6.348977135980746, "grad_norm": 985.42431640625, "learning_rate": 0.00019836717844857132, "loss": 7.1548, "step": 52760 }, { "epoch": 6.350180505415162, "grad_norm": 387.5338134765625, "learning_rate": 0.00019836649371640216, "loss": 7.2645, "step": 52770 }, { "epoch": 6.351383874849579, "grad_norm": 188.41053771972656, "learning_rate": 0.00019836580884187237, "loss": 6.9158, "step": 52780 }, { "epoch": 6.3525872442839955, "grad_norm": 460.3943176269531, "learning_rate": 0.000198365123824983, "loss": 7.4076, "step": 52790 }, { "epoch": 6.353790613718411, "grad_norm": 145.7770538330078, "learning_rate": 0.00019836443866573503, "loss": 7.3129, "step": 52800 }, { "epoch": 6.354993983152828, "grad_norm": 84.12643432617188, "learning_rate": 0.0001983637533641294, "loss": 7.2679, "step": 52810 }, { "epoch": 6.356197352587245, "grad_norm": 121.4907455444336, "learning_rate": 0.00019836306792016711, "loss": 7.2408, "step": 52820 }, { "epoch": 6.35740072202166, "grad_norm": 286.1339416503906, "learning_rate": 0.00019836238233384923, "loss": 7.182, "step": 52830 }, { "epoch": 6.358604091456077, "grad_norm": 987.18017578125, "learning_rate": 0.00019836169660517665, "loss": 7.2193, "step": 52840 }, { "epoch": 6.359807460890494, "grad_norm": 411.76727294921875, "learning_rate": 0.00019836101073415042, "loss": 7.2532, "step": 52850 }, { "epoch": 6.3610108303249095, "grad_norm": 118.34429931640625, "learning_rate": 0.00019836032472077156, "loss": 7.1956, "step": 52860 }, { "epoch": 6.362214199759326, "grad_norm": 623.2942504882812, "learning_rate": 0.000198359638565041, "loss": 7.1516, "step": 52870 }, { "epoch": 6.363417569193743, "grad_norm": 792.5098266601562, "learning_rate": 0.00019835895226695974, "loss": 7.0995, "step": 52880 }, { "epoch": 6.364620938628159, "grad_norm": 325.6565856933594, "learning_rate": 0.00019835826582652883, "loss": 7.1832, "step": 52890 }, { "epoch": 6.365824308062575, "grad_norm": 204.7774658203125, "learning_rate": 0.00019835757924374918, "loss": 7.1555, "step": 52900 }, { "epoch": 6.367027677496991, "grad_norm": 157.6756134033203, "learning_rate": 0.00019835689251862187, "loss": 7.1083, "step": 52910 }, { "epoch": 6.368231046931408, "grad_norm": 182.59307861328125, "learning_rate": 0.00019835620565114784, "loss": 7.1596, "step": 52920 }, { "epoch": 6.3694344163658245, "grad_norm": 414.76959228515625, "learning_rate": 0.0001983555186413281, "loss": 7.1812, "step": 52930 }, { "epoch": 6.37063778580024, "grad_norm": 375.0384216308594, "learning_rate": 0.00019835483148916363, "loss": 7.141, "step": 52940 }, { "epoch": 6.371841155234657, "grad_norm": 2651.772705078125, "learning_rate": 0.00019835414419465543, "loss": 7.3201, "step": 52950 }, { "epoch": 6.373044524669074, "grad_norm": 199.78663635253906, "learning_rate": 0.00019835345675780453, "loss": 7.2571, "step": 52960 }, { "epoch": 6.3742478941034895, "grad_norm": 822.8274536132812, "learning_rate": 0.00019835276917861186, "loss": 7.2299, "step": 52970 }, { "epoch": 6.375451263537906, "grad_norm": 485.8435974121094, "learning_rate": 0.0001983520814570785, "loss": 7.2557, "step": 52980 }, { "epoch": 6.376654632972323, "grad_norm": 396.6460876464844, "learning_rate": 0.00019835139359320535, "loss": 7.1102, "step": 52990 }, { "epoch": 6.377858002406739, "grad_norm": 855.5442504882812, "learning_rate": 0.00019835070558699348, "loss": 7.1931, "step": 53000 }, { "epoch": 6.379061371841155, "grad_norm": 376.45330810546875, "learning_rate": 0.00019835001743844383, "loss": 7.2304, "step": 53010 }, { "epoch": 6.380264741275572, "grad_norm": 98.51732635498047, "learning_rate": 0.00019834932914755744, "loss": 7.0782, "step": 53020 }, { "epoch": 6.381468110709988, "grad_norm": 69.67068481445312, "learning_rate": 0.0001983486407143353, "loss": 7.5735, "step": 53030 }, { "epoch": 6.382671480144404, "grad_norm": 201.29722595214844, "learning_rate": 0.00019834795213877836, "loss": 7.4532, "step": 53040 }, { "epoch": 6.38387484957882, "grad_norm": 345.9329833984375, "learning_rate": 0.00019834726342088767, "loss": 7.3236, "step": 53050 }, { "epoch": 6.385078219013237, "grad_norm": 118.62815856933594, "learning_rate": 0.0001983465745606642, "loss": 7.3724, "step": 53060 }, { "epoch": 6.386281588447654, "grad_norm": 44.16498565673828, "learning_rate": 0.000198345885558109, "loss": 7.4937, "step": 53070 }, { "epoch": 6.387484957882069, "grad_norm": 6.8613080978393555, "learning_rate": 0.00019834519641322298, "loss": 7.4436, "step": 53080 }, { "epoch": 6.388688327316486, "grad_norm": 62.453189849853516, "learning_rate": 0.0001983445071260072, "loss": 7.5031, "step": 53090 }, { "epoch": 6.389891696750903, "grad_norm": 29.752826690673828, "learning_rate": 0.00019834381769646262, "loss": 7.3467, "step": 53100 }, { "epoch": 6.3910950661853185, "grad_norm": 8.138988494873047, "learning_rate": 0.00019834312812459025, "loss": 7.4467, "step": 53110 }, { "epoch": 6.392298435619735, "grad_norm": 15.982645034790039, "learning_rate": 0.0001983424384103911, "loss": 7.5223, "step": 53120 }, { "epoch": 6.393501805054152, "grad_norm": 45.11640548706055, "learning_rate": 0.00019834174855386618, "loss": 7.2417, "step": 53130 }, { "epoch": 6.394705174488568, "grad_norm": 17.564626693725586, "learning_rate": 0.00019834105855501646, "loss": 7.2857, "step": 53140 }, { "epoch": 6.395908543922984, "grad_norm": 551.6398315429688, "learning_rate": 0.00019834036841384295, "loss": 7.6144, "step": 53150 }, { "epoch": 6.397111913357401, "grad_norm": 124.64905548095703, "learning_rate": 0.00019833967813034665, "loss": 7.5345, "step": 53160 }, { "epoch": 6.398315282791817, "grad_norm": 25.615991592407227, "learning_rate": 0.00019833898770452851, "loss": 7.3548, "step": 53170 }, { "epoch": 6.3995186522262335, "grad_norm": 24.818967819213867, "learning_rate": 0.00019833829713638963, "loss": 7.2835, "step": 53180 }, { "epoch": 6.40072202166065, "grad_norm": 160.3653564453125, "learning_rate": 0.00019833760642593096, "loss": 7.2268, "step": 53190 }, { "epoch": 6.401925391095066, "grad_norm": 2392.0791015625, "learning_rate": 0.00019833691557315345, "loss": 7.5182, "step": 53200 }, { "epoch": 6.403128760529483, "grad_norm": 674.7615356445312, "learning_rate": 0.00019833622457805818, "loss": 7.7121, "step": 53210 }, { "epoch": 6.404332129963899, "grad_norm": 326.4076843261719, "learning_rate": 0.00019833553344064612, "loss": 7.3973, "step": 53220 }, { "epoch": 6.405535499398315, "grad_norm": 266.168701171875, "learning_rate": 0.00019833484216091825, "loss": 7.2008, "step": 53230 }, { "epoch": 6.406738868832732, "grad_norm": 2855.228271484375, "learning_rate": 0.00019833415073887558, "loss": 7.2671, "step": 53240 }, { "epoch": 6.4079422382671485, "grad_norm": 908.1167602539062, "learning_rate": 0.00019833345917451913, "loss": 7.0994, "step": 53250 }, { "epoch": 6.409145607701564, "grad_norm": 250.6652374267578, "learning_rate": 0.0001983327674678499, "loss": 7.1249, "step": 53260 }, { "epoch": 6.410348977135981, "grad_norm": 185.2032928466797, "learning_rate": 0.00019833207561886882, "loss": 7.1267, "step": 53270 }, { "epoch": 6.411552346570397, "grad_norm": 173.52169799804688, "learning_rate": 0.000198331383627577, "loss": 8.2331, "step": 53280 }, { "epoch": 6.412755716004813, "grad_norm": 103.46623229980469, "learning_rate": 0.00019833069149397537, "loss": 8.351, "step": 53290 }, { "epoch": 6.41395908543923, "grad_norm": 54.158424377441406, "learning_rate": 0.00019832999921806497, "loss": 7.7284, "step": 53300 }, { "epoch": 6.415162454873646, "grad_norm": 28.00765037536621, "learning_rate": 0.00019832930679984675, "loss": 7.6515, "step": 53310 }, { "epoch": 6.416365824308063, "grad_norm": 15.205865859985352, "learning_rate": 0.0001983286142393218, "loss": 7.5569, "step": 53320 }, { "epoch": 6.417569193742479, "grad_norm": 17.263912200927734, "learning_rate": 0.000198327921536491, "loss": 7.6979, "step": 53330 }, { "epoch": 6.418772563176895, "grad_norm": 21.40951156616211, "learning_rate": 0.00019832722869135547, "loss": 7.7632, "step": 53340 }, { "epoch": 6.419975932611312, "grad_norm": 7.949574947357178, "learning_rate": 0.00019832653570391618, "loss": 7.572, "step": 53350 }, { "epoch": 6.421179302045728, "grad_norm": 39.62449645996094, "learning_rate": 0.00019832584257417407, "loss": 7.5378, "step": 53360 }, { "epoch": 6.422382671480144, "grad_norm": 29.13290023803711, "learning_rate": 0.0001983251493021302, "loss": 7.4672, "step": 53370 }, { "epoch": 6.423586040914561, "grad_norm": 29.931182861328125, "learning_rate": 0.00019832445588778558, "loss": 7.2516, "step": 53380 }, { "epoch": 6.4247894103489775, "grad_norm": 55.71449661254883, "learning_rate": 0.00019832376233114117, "loss": 7.3434, "step": 53390 }, { "epoch": 6.425992779783393, "grad_norm": 96.51713562011719, "learning_rate": 0.00019832306863219805, "loss": 7.3206, "step": 53400 }, { "epoch": 6.42719614921781, "grad_norm": 1127.621826171875, "learning_rate": 0.00019832237479095716, "loss": 7.3673, "step": 53410 }, { "epoch": 6.428399518652226, "grad_norm": 79.29362487792969, "learning_rate": 0.00019832168080741947, "loss": 7.3262, "step": 53420 }, { "epoch": 6.4296028880866425, "grad_norm": 78.43046569824219, "learning_rate": 0.00019832098668158608, "loss": 7.3669, "step": 53430 }, { "epoch": 6.430806257521059, "grad_norm": 79.61399841308594, "learning_rate": 0.00019832029241345793, "loss": 7.8748, "step": 53440 }, { "epoch": 6.432009626955475, "grad_norm": 55.962799072265625, "learning_rate": 0.00019831959800303603, "loss": 7.9114, "step": 53450 }, { "epoch": 6.433212996389892, "grad_norm": 25.874568939208984, "learning_rate": 0.00019831890345032145, "loss": 7.4856, "step": 53460 }, { "epoch": 6.434416365824308, "grad_norm": 30.580080032348633, "learning_rate": 0.00019831820875531507, "loss": 7.5684, "step": 53470 }, { "epoch": 6.435619735258724, "grad_norm": 26.631053924560547, "learning_rate": 0.000198317513918018, "loss": 7.5614, "step": 53480 }, { "epoch": 6.436823104693141, "grad_norm": 4.994171142578125, "learning_rate": 0.00019831681893843123, "loss": 7.4872, "step": 53490 }, { "epoch": 6.4380264741275575, "grad_norm": 11.913789749145508, "learning_rate": 0.00019831612381655575, "loss": 7.5216, "step": 53500 }, { "epoch": 6.439229843561973, "grad_norm": 256.5828857421875, "learning_rate": 0.00019831542855239254, "loss": 7.4161, "step": 53510 }, { "epoch": 6.44043321299639, "grad_norm": 9.101548194885254, "learning_rate": 0.00019831473314594264, "loss": 7.5105, "step": 53520 }, { "epoch": 6.441636582430807, "grad_norm": 3.497997760772705, "learning_rate": 0.00019831403759720706, "loss": 7.3792, "step": 53530 }, { "epoch": 6.442839951865222, "grad_norm": 15.275854110717773, "learning_rate": 0.0001983133419061868, "loss": 7.4054, "step": 53540 }, { "epoch": 6.444043321299639, "grad_norm": 4.371145248413086, "learning_rate": 0.00019831264607288285, "loss": 7.2603, "step": 53550 }, { "epoch": 6.445246690734056, "grad_norm": 21.592967987060547, "learning_rate": 0.00019831195009729623, "loss": 7.1969, "step": 53560 }, { "epoch": 6.4464500601684716, "grad_norm": 7.722259521484375, "learning_rate": 0.00019831125397942794, "loss": 7.1575, "step": 53570 }, { "epoch": 6.447653429602888, "grad_norm": 17.183794021606445, "learning_rate": 0.00019831055771927898, "loss": 7.1835, "step": 53580 }, { "epoch": 6.448856799037305, "grad_norm": 18.575815200805664, "learning_rate": 0.0001983098613168504, "loss": 7.1085, "step": 53590 }, { "epoch": 6.450060168471721, "grad_norm": 427.5148620605469, "learning_rate": 0.00019830916477214317, "loss": 7.3198, "step": 53600 }, { "epoch": 6.451263537906137, "grad_norm": 80.92161560058594, "learning_rate": 0.0001983084680851583, "loss": 7.7243, "step": 53610 }, { "epoch": 6.452466907340553, "grad_norm": 183.42733764648438, "learning_rate": 0.00019830777125589682, "loss": 7.6895, "step": 53620 }, { "epoch": 6.45367027677497, "grad_norm": 109.63179779052734, "learning_rate": 0.0001983070742843597, "loss": 7.3347, "step": 53630 }, { "epoch": 6.4548736462093865, "grad_norm": 58.21371841430664, "learning_rate": 0.00019830637717054798, "loss": 7.3218, "step": 53640 }, { "epoch": 6.456077015643802, "grad_norm": 30.193782806396484, "learning_rate": 0.00019830567991446264, "loss": 7.2972, "step": 53650 }, { "epoch": 6.457280385078219, "grad_norm": 33.620445251464844, "learning_rate": 0.00019830498251610475, "loss": 7.4496, "step": 53660 }, { "epoch": 6.458483754512636, "grad_norm": 27.148279190063477, "learning_rate": 0.00019830428497547527, "loss": 7.4207, "step": 53670 }, { "epoch": 6.4596871239470515, "grad_norm": 72.65086364746094, "learning_rate": 0.0001983035872925752, "loss": 7.2944, "step": 53680 }, { "epoch": 6.460890493381468, "grad_norm": 72.13494110107422, "learning_rate": 0.00019830288946740557, "loss": 7.3604, "step": 53690 }, { "epoch": 6.462093862815885, "grad_norm": 50.20564270019531, "learning_rate": 0.00019830219149996744, "loss": 7.4214, "step": 53700 }, { "epoch": 6.463297232250301, "grad_norm": 63.062278747558594, "learning_rate": 0.00019830149339026168, "loss": 7.2255, "step": 53710 }, { "epoch": 6.464500601684717, "grad_norm": 104.42791748046875, "learning_rate": 0.00019830079513828946, "loss": 7.2095, "step": 53720 }, { "epoch": 6.465703971119134, "grad_norm": 93.70234680175781, "learning_rate": 0.00019830009674405173, "loss": 7.1209, "step": 53730 }, { "epoch": 6.46690734055355, "grad_norm": 36.293392181396484, "learning_rate": 0.00019829939820754943, "loss": 7.1147, "step": 53740 }, { "epoch": 6.4681107099879664, "grad_norm": 78.2708740234375, "learning_rate": 0.00019829869952878366, "loss": 7.04, "step": 53750 }, { "epoch": 6.469314079422382, "grad_norm": 34.36333084106445, "learning_rate": 0.0001982980007077554, "loss": 7.0723, "step": 53760 }, { "epoch": 6.470517448856799, "grad_norm": 102.16293334960938, "learning_rate": 0.00019829730174446568, "loss": 7.1478, "step": 53770 }, { "epoch": 6.471720818291216, "grad_norm": 29.0257568359375, "learning_rate": 0.00019829660263891548, "loss": 7.3182, "step": 53780 }, { "epoch": 6.472924187725631, "grad_norm": 45.708526611328125, "learning_rate": 0.00019829590339110582, "loss": 7.2223, "step": 53790 }, { "epoch": 6.474127557160048, "grad_norm": 67.09626007080078, "learning_rate": 0.00019829520400103774, "loss": 7.2573, "step": 53800 }, { "epoch": 6.475330926594465, "grad_norm": 347.1438903808594, "learning_rate": 0.0001982945044687122, "loss": 7.094, "step": 53810 }, { "epoch": 6.4765342960288805, "grad_norm": 562.5654907226562, "learning_rate": 0.00019829380479413028, "loss": 7.1589, "step": 53820 }, { "epoch": 6.477737665463297, "grad_norm": 67.89314270019531, "learning_rate": 0.00019829310497729294, "loss": 7.1008, "step": 53830 }, { "epoch": 6.478941034897714, "grad_norm": 203.4894256591797, "learning_rate": 0.00019829240501820122, "loss": 7.276, "step": 53840 }, { "epoch": 6.48014440433213, "grad_norm": 27.177310943603516, "learning_rate": 0.0001982917049168561, "loss": 7.4379, "step": 53850 }, { "epoch": 6.481347773766546, "grad_norm": 18.682811737060547, "learning_rate": 0.00019829100467325862, "loss": 7.2928, "step": 53860 }, { "epoch": 6.482551143200963, "grad_norm": 110.0114974975586, "learning_rate": 0.00019829030428740983, "loss": 7.2596, "step": 53870 }, { "epoch": 6.483754512635379, "grad_norm": 217.84617614746094, "learning_rate": 0.00019828960375931068, "loss": 7.3008, "step": 53880 }, { "epoch": 6.4849578820697955, "grad_norm": 243.9082794189453, "learning_rate": 0.00019828890308896217, "loss": 7.78, "step": 53890 }, { "epoch": 6.486161251504212, "grad_norm": 9.155352592468262, "learning_rate": 0.00019828820227636539, "loss": 7.7512, "step": 53900 }, { "epoch": 6.487364620938628, "grad_norm": 10.2932767868042, "learning_rate": 0.0001982875013215213, "loss": 7.4618, "step": 53910 }, { "epoch": 6.488567990373045, "grad_norm": 17.100906372070312, "learning_rate": 0.0001982868002244309, "loss": 7.5332, "step": 53920 }, { "epoch": 6.489771359807461, "grad_norm": 12.623067855834961, "learning_rate": 0.00019828609898509529, "loss": 7.4229, "step": 53930 }, { "epoch": 6.490974729241877, "grad_norm": 50.046966552734375, "learning_rate": 0.0001982853976035154, "loss": 7.4933, "step": 53940 }, { "epoch": 6.492178098676294, "grad_norm": 21.30010414123535, "learning_rate": 0.0001982846960796923, "loss": 7.3344, "step": 53950 }, { "epoch": 6.49338146811071, "grad_norm": 183.37937927246094, "learning_rate": 0.00019828399441362696, "loss": 7.47, "step": 53960 }, { "epoch": 6.494584837545126, "grad_norm": 26.571096420288086, "learning_rate": 0.0001982832926053204, "loss": 7.4587, "step": 53970 }, { "epoch": 6.495788206979543, "grad_norm": 35.904136657714844, "learning_rate": 0.00019828259065477366, "loss": 7.6075, "step": 53980 }, { "epoch": 6.496991576413959, "grad_norm": 13.69642448425293, "learning_rate": 0.00019828188856198776, "loss": 7.5772, "step": 53990 }, { "epoch": 6.498194945848375, "grad_norm": 15.006000518798828, "learning_rate": 0.00019828118632696367, "loss": 7.5455, "step": 54000 }, { "epoch": 6.499398315282792, "grad_norm": 11.823795318603516, "learning_rate": 0.00019828048394970248, "loss": 7.4746, "step": 54010 }, { "epoch": 6.500601684717208, "grad_norm": 8.034140586853027, "learning_rate": 0.00019827978143020512, "loss": 7.3793, "step": 54020 }, { "epoch": 6.501805054151625, "grad_norm": 18.2496280670166, "learning_rate": 0.00019827907876847266, "loss": 7.4656, "step": 54030 }, { "epoch": 6.503008423586041, "grad_norm": 6.516838073730469, "learning_rate": 0.00019827837596450614, "loss": 7.4192, "step": 54040 }, { "epoch": 6.504211793020457, "grad_norm": 10.125895500183105, "learning_rate": 0.00019827767301830654, "loss": 7.314, "step": 54050 }, { "epoch": 6.505415162454874, "grad_norm": 5.568592071533203, "learning_rate": 0.00019827696992987486, "loss": 7.3927, "step": 54060 }, { "epoch": 6.5066185318892895, "grad_norm": 6.277139186859131, "learning_rate": 0.00019827626669921216, "loss": 7.2616, "step": 54070 }, { "epoch": 6.507821901323706, "grad_norm": 8.435803413391113, "learning_rate": 0.00019827556332631944, "loss": 7.3458, "step": 54080 }, { "epoch": 6.509025270758123, "grad_norm": 6.075100898742676, "learning_rate": 0.00019827485981119768, "loss": 7.2864, "step": 54090 }, { "epoch": 6.510228640192539, "grad_norm": 8.41025447845459, "learning_rate": 0.00019827415615384796, "loss": 7.3008, "step": 54100 }, { "epoch": 6.511432009626955, "grad_norm": 3.908461570739746, "learning_rate": 0.00019827345235427131, "loss": 7.2405, "step": 54110 }, { "epoch": 6.512635379061372, "grad_norm": 7.4334845542907715, "learning_rate": 0.00019827274841246865, "loss": 7.2647, "step": 54120 }, { "epoch": 6.513838748495788, "grad_norm": 7.621320724487305, "learning_rate": 0.0001982720443284411, "loss": 7.1673, "step": 54130 }, { "epoch": 6.5150421179302045, "grad_norm": 7.19697380065918, "learning_rate": 0.00019827134010218963, "loss": 7.1529, "step": 54140 }, { "epoch": 6.516245487364621, "grad_norm": 326.74365234375, "learning_rate": 0.00019827063573371527, "loss": 7.296, "step": 54150 }, { "epoch": 6.517448856799037, "grad_norm": 172.8400115966797, "learning_rate": 0.00019826993122301902, "loss": 7.8805, "step": 54160 }, { "epoch": 6.518652226233454, "grad_norm": 595.4959106445312, "learning_rate": 0.00019826922657010192, "loss": 7.9992, "step": 54170 }, { "epoch": 6.51985559566787, "grad_norm": 26.08918571472168, "learning_rate": 0.000198268521774965, "loss": 10.1198, "step": 54180 }, { "epoch": 6.521058965102286, "grad_norm": 11.50631046295166, "learning_rate": 0.00019826781683760926, "loss": 8.0815, "step": 54190 }, { "epoch": 6.522262334536703, "grad_norm": 10.769640922546387, "learning_rate": 0.00019826711175803573, "loss": 7.8827, "step": 54200 }, { "epoch": 6.5234657039711195, "grad_norm": 308.4264221191406, "learning_rate": 0.00019826640653624545, "loss": 7.5268, "step": 54210 }, { "epoch": 6.524669073405535, "grad_norm": 10.110445976257324, "learning_rate": 0.0001982657011722394, "loss": 7.5027, "step": 54220 }, { "epoch": 6.525872442839952, "grad_norm": 16.747638702392578, "learning_rate": 0.00019826499566601862, "loss": 7.4019, "step": 54230 }, { "epoch": 6.527075812274369, "grad_norm": 16.85960578918457, "learning_rate": 0.00019826429001758414, "loss": 7.4817, "step": 54240 }, { "epoch": 6.528279181708784, "grad_norm": 18.80135154724121, "learning_rate": 0.00019826358422693697, "loss": 7.4287, "step": 54250 }, { "epoch": 6.529482551143201, "grad_norm": 24.110595703125, "learning_rate": 0.0001982628782940781, "loss": 7.3289, "step": 54260 }, { "epoch": 6.530685920577618, "grad_norm": 39.86370849609375, "learning_rate": 0.0001982621722190086, "loss": 7.2644, "step": 54270 }, { "epoch": 6.5318892900120336, "grad_norm": 13.390047073364258, "learning_rate": 0.0001982614660017295, "loss": 7.4229, "step": 54280 }, { "epoch": 6.53309265944645, "grad_norm": 13.057512283325195, "learning_rate": 0.00019826075964224179, "loss": 7.381, "step": 54290 }, { "epoch": 6.534296028880867, "grad_norm": 13.116692543029785, "learning_rate": 0.00019826005314054648, "loss": 7.3792, "step": 54300 }, { "epoch": 6.535499398315283, "grad_norm": 17.642637252807617, "learning_rate": 0.00019825934649664464, "loss": 7.2654, "step": 54310 }, { "epoch": 6.536702767749699, "grad_norm": 21.632186889648438, "learning_rate": 0.00019825863971053724, "loss": 7.3399, "step": 54320 }, { "epoch": 6.537906137184115, "grad_norm": 34.379337310791016, "learning_rate": 0.00019825793278222534, "loss": 7.1887, "step": 54330 }, { "epoch": 6.539109506618532, "grad_norm": 20.180341720581055, "learning_rate": 0.00019825722571170998, "loss": 7.3595, "step": 54340 }, { "epoch": 6.5403128760529485, "grad_norm": 30.102216720581055, "learning_rate": 0.00019825651849899212, "loss": 7.1705, "step": 54350 }, { "epoch": 6.541516245487364, "grad_norm": 28.891704559326172, "learning_rate": 0.00019825581114407283, "loss": 7.4064, "step": 54360 }, { "epoch": 6.542719614921781, "grad_norm": 58.338565826416016, "learning_rate": 0.00019825510364695313, "loss": 7.1886, "step": 54370 }, { "epoch": 6.543922984356198, "grad_norm": 74.21753692626953, "learning_rate": 0.00019825439600763402, "loss": 7.2705, "step": 54380 }, { "epoch": 6.5451263537906135, "grad_norm": 92.1325454711914, "learning_rate": 0.00019825368822611657, "loss": 7.1965, "step": 54390 }, { "epoch": 6.54632972322503, "grad_norm": 27.837526321411133, "learning_rate": 0.00019825298030240173, "loss": 7.1966, "step": 54400 }, { "epoch": 6.547533092659447, "grad_norm": 25.98700523376465, "learning_rate": 0.00019825227223649062, "loss": 7.2939, "step": 54410 }, { "epoch": 6.548736462093863, "grad_norm": 45.24747848510742, "learning_rate": 0.00019825156402838417, "loss": 7.1676, "step": 54420 }, { "epoch": 6.549939831528279, "grad_norm": 14.43458080291748, "learning_rate": 0.00019825085567808345, "loss": 7.2205, "step": 54430 }, { "epoch": 6.551143200962695, "grad_norm": 14.4967041015625, "learning_rate": 0.00019825014718558953, "loss": 7.1258, "step": 54440 }, { "epoch": 6.552346570397112, "grad_norm": 9.712308883666992, "learning_rate": 0.00019824943855090336, "loss": 7.1183, "step": 54450 }, { "epoch": 6.5535499398315284, "grad_norm": 11.7882661819458, "learning_rate": 0.000198248729774026, "loss": 7.1117, "step": 54460 }, { "epoch": 6.554753309265944, "grad_norm": 24.683195114135742, "learning_rate": 0.00019824802085495846, "loss": 7.0663, "step": 54470 }, { "epoch": 6.555956678700361, "grad_norm": 26.272802352905273, "learning_rate": 0.0001982473117937018, "loss": 7.1405, "step": 54480 }, { "epoch": 6.557160048134778, "grad_norm": 15.428451538085938, "learning_rate": 0.000198246602590257, "loss": 7.117, "step": 54490 }, { "epoch": 6.558363417569193, "grad_norm": 20.457744598388672, "learning_rate": 0.00019824589324462513, "loss": 7.2366, "step": 54500 }, { "epoch": 6.55956678700361, "grad_norm": 6.069239616394043, "learning_rate": 0.00019824518375680716, "loss": 7.1005, "step": 54510 }, { "epoch": 6.560770156438027, "grad_norm": 8.31113052368164, "learning_rate": 0.0001982444741268042, "loss": 7.0395, "step": 54520 }, { "epoch": 6.5619735258724425, "grad_norm": 45.817283630371094, "learning_rate": 0.0001982437643546172, "loss": 7.11, "step": 54530 }, { "epoch": 6.563176895306859, "grad_norm": 46.114253997802734, "learning_rate": 0.00019824305444024725, "loss": 7.0689, "step": 54540 }, { "epoch": 6.564380264741276, "grad_norm": 190.5920867919922, "learning_rate": 0.0001982423443836953, "loss": 7.2594, "step": 54550 }, { "epoch": 6.565583634175692, "grad_norm": 215.85943603515625, "learning_rate": 0.00019824163418496245, "loss": 7.1505, "step": 54560 }, { "epoch": 6.566787003610108, "grad_norm": 284.28448486328125, "learning_rate": 0.00019824092384404968, "loss": 7.109, "step": 54570 }, { "epoch": 6.567990373044525, "grad_norm": 51.15251159667969, "learning_rate": 0.00019824021336095808, "loss": 7.1543, "step": 54580 }, { "epoch": 6.569193742478941, "grad_norm": 1200.6505126953125, "learning_rate": 0.00019823950273568862, "loss": 7.0203, "step": 54590 }, { "epoch": 6.5703971119133575, "grad_norm": 210.41134643554688, "learning_rate": 0.0001982387919682423, "loss": 7.1171, "step": 54600 }, { "epoch": 6.571600481347774, "grad_norm": 97.78995513916016, "learning_rate": 0.00019823808105862023, "loss": 7.2416, "step": 54610 }, { "epoch": 6.57280385078219, "grad_norm": 53.442588806152344, "learning_rate": 0.00019823737000682342, "loss": 7.1407, "step": 54620 }, { "epoch": 6.574007220216607, "grad_norm": 25.63939094543457, "learning_rate": 0.00019823665881285284, "loss": 7.4223, "step": 54630 }, { "epoch": 6.575210589651023, "grad_norm": 40.05807113647461, "learning_rate": 0.00019823594747670959, "loss": 7.5493, "step": 54640 }, { "epoch": 6.576413959085439, "grad_norm": 25.501604080200195, "learning_rate": 0.00019823523599839466, "loss": 7.4365, "step": 54650 }, { "epoch": 6.577617328519856, "grad_norm": 18.687335968017578, "learning_rate": 0.0001982345243779091, "loss": 7.6404, "step": 54660 }, { "epoch": 6.578820697954272, "grad_norm": 13.596409797668457, "learning_rate": 0.00019823381261525392, "loss": 7.5357, "step": 54670 }, { "epoch": 6.580024067388688, "grad_norm": 32.96208953857422, "learning_rate": 0.00019823310071043015, "loss": 7.6519, "step": 54680 }, { "epoch": 6.581227436823105, "grad_norm": 16.275880813598633, "learning_rate": 0.00019823238866343887, "loss": 7.5876, "step": 54690 }, { "epoch": 6.582430806257521, "grad_norm": 16.6007137298584, "learning_rate": 0.00019823167647428104, "loss": 7.5432, "step": 54700 }, { "epoch": 6.583634175691937, "grad_norm": 5.2712297439575195, "learning_rate": 0.00019823096414295772, "loss": 7.5932, "step": 54710 }, { "epoch": 6.584837545126354, "grad_norm": 6.463351249694824, "learning_rate": 0.00019823025166946994, "loss": 7.4931, "step": 54720 }, { "epoch": 6.58604091456077, "grad_norm": 34.08944320678711, "learning_rate": 0.00019822953905381877, "loss": 7.5064, "step": 54730 }, { "epoch": 6.587244283995187, "grad_norm": 171.5081787109375, "learning_rate": 0.00019822882629600517, "loss": 7.7336, "step": 54740 }, { "epoch": 6.588447653429603, "grad_norm": 23.387256622314453, "learning_rate": 0.0001982281133960302, "loss": 7.7548, "step": 54750 }, { "epoch": 6.589651022864019, "grad_norm": 7.655777931213379, "learning_rate": 0.0001982274003538949, "loss": 7.7193, "step": 54760 }, { "epoch": 6.590854392298436, "grad_norm": 27.844728469848633, "learning_rate": 0.00019822668716960032, "loss": 7.6539, "step": 54770 }, { "epoch": 6.5920577617328515, "grad_norm": 7.127274513244629, "learning_rate": 0.00019822597384314746, "loss": 7.4987, "step": 54780 }, { "epoch": 6.593261131167268, "grad_norm": 73.10989379882812, "learning_rate": 0.00019822526037453737, "loss": 7.3229, "step": 54790 }, { "epoch": 6.594464500601685, "grad_norm": 63.49520492553711, "learning_rate": 0.00019822454676377108, "loss": 7.2696, "step": 54800 }, { "epoch": 6.595667870036101, "grad_norm": 672.5573120117188, "learning_rate": 0.00019822383301084965, "loss": 7.5097, "step": 54810 }, { "epoch": 6.596871239470517, "grad_norm": 62.491119384765625, "learning_rate": 0.000198223119115774, "loss": 8.6739, "step": 54820 }, { "epoch": 6.598074608904934, "grad_norm": 61.918418884277344, "learning_rate": 0.00019822240507854532, "loss": 8.3629, "step": 54830 }, { "epoch": 6.59927797833935, "grad_norm": 15.558662414550781, "learning_rate": 0.00019822169089916454, "loss": 7.7627, "step": 54840 }, { "epoch": 6.6004813477737665, "grad_norm": 15.076722145080566, "learning_rate": 0.00019822097657763274, "loss": 7.6219, "step": 54850 }, { "epoch": 6.601684717208183, "grad_norm": 80.84474182128906, "learning_rate": 0.00019822026211395092, "loss": 7.5262, "step": 54860 }, { "epoch": 6.602888086642599, "grad_norm": 226.2410888671875, "learning_rate": 0.00019821954750812013, "loss": 7.389, "step": 54870 }, { "epoch": 6.604091456077016, "grad_norm": 663.0928955078125, "learning_rate": 0.0001982188327601414, "loss": 7.3833, "step": 54880 }, { "epoch": 6.605294825511432, "grad_norm": 57.77903747558594, "learning_rate": 0.00019821811787001578, "loss": 7.4233, "step": 54890 }, { "epoch": 6.606498194945848, "grad_norm": 1358.3236083984375, "learning_rate": 0.0001982174028377443, "loss": 7.1157, "step": 54900 }, { "epoch": 6.607701564380265, "grad_norm": 316.8460998535156, "learning_rate": 0.00019821668766332797, "loss": 7.3714, "step": 54910 }, { "epoch": 6.6089049338146815, "grad_norm": 197.40431213378906, "learning_rate": 0.00019821597234676787, "loss": 7.3758, "step": 54920 }, { "epoch": 6.610108303249097, "grad_norm": 28.91938591003418, "learning_rate": 0.000198215256888065, "loss": 7.4244, "step": 54930 }, { "epoch": 6.611311672683514, "grad_norm": 206.230224609375, "learning_rate": 0.00019821454128722042, "loss": 7.4257, "step": 54940 }, { "epoch": 6.612515042117931, "grad_norm": 34.72187042236328, "learning_rate": 0.0001982138255442351, "loss": 7.4846, "step": 54950 }, { "epoch": 6.613718411552346, "grad_norm": 26.24178123474121, "learning_rate": 0.00019821310965911017, "loss": 7.5512, "step": 54960 }, { "epoch": 6.614921780986763, "grad_norm": 81.10030364990234, "learning_rate": 0.0001982123936318466, "loss": 7.2925, "step": 54970 }, { "epoch": 6.61612515042118, "grad_norm": 23.236541748046875, "learning_rate": 0.00019821167746244547, "loss": 7.2629, "step": 54980 }, { "epoch": 6.617328519855596, "grad_norm": 228.65492248535156, "learning_rate": 0.0001982109611509078, "loss": 7.2236, "step": 54990 }, { "epoch": 6.618531889290012, "grad_norm": 12.98916244506836, "learning_rate": 0.00019821024469723458, "loss": 7.2756, "step": 55000 }, { "epoch": 6.619735258724428, "grad_norm": 94.37308502197266, "learning_rate": 0.00019820952810142693, "loss": 7.1736, "step": 55010 }, { "epoch": 6.620938628158845, "grad_norm": 80.3369140625, "learning_rate": 0.00019820881136348582, "loss": 7.2707, "step": 55020 }, { "epoch": 6.622141997593261, "grad_norm": 118.46678924560547, "learning_rate": 0.00019820809448341234, "loss": 7.1903, "step": 55030 }, { "epoch": 6.623345367027677, "grad_norm": 115.43425750732422, "learning_rate": 0.00019820737746120747, "loss": 7.1323, "step": 55040 }, { "epoch": 6.624548736462094, "grad_norm": 32.32550048828125, "learning_rate": 0.00019820666029687228, "loss": 7.3732, "step": 55050 }, { "epoch": 6.6257521058965105, "grad_norm": 361.280517578125, "learning_rate": 0.0001982059429904078, "loss": 7.4074, "step": 55060 }, { "epoch": 6.626955475330926, "grad_norm": 57.889488220214844, "learning_rate": 0.00019820522554181511, "loss": 7.3973, "step": 55070 }, { "epoch": 6.628158844765343, "grad_norm": 23.05805778503418, "learning_rate": 0.0001982045079510952, "loss": 7.301, "step": 55080 }, { "epoch": 6.62936221419976, "grad_norm": 41.13179397583008, "learning_rate": 0.0001982037902182491, "loss": 7.24, "step": 55090 }, { "epoch": 6.6305655836341755, "grad_norm": 12.71737003326416, "learning_rate": 0.00019820307234327792, "loss": 7.3102, "step": 55100 }, { "epoch": 6.631768953068592, "grad_norm": 25.804162979125977, "learning_rate": 0.0001982023543261826, "loss": 7.2828, "step": 55110 }, { "epoch": 6.632972322503008, "grad_norm": 18.82998275756836, "learning_rate": 0.00019820163616696422, "loss": 7.5072, "step": 55120 }, { "epoch": 6.634175691937425, "grad_norm": 58.981834411621094, "learning_rate": 0.00019820091786562386, "loss": 7.5733, "step": 55130 }, { "epoch": 6.635379061371841, "grad_norm": 102.47163391113281, "learning_rate": 0.0001982001994221625, "loss": 7.5286, "step": 55140 }, { "epoch": 6.636582430806257, "grad_norm": 173.20286560058594, "learning_rate": 0.00019819948083658125, "loss": 7.5001, "step": 55150 }, { "epoch": 6.637785800240674, "grad_norm": 59.042510986328125, "learning_rate": 0.00019819876210888106, "loss": 7.3809, "step": 55160 }, { "epoch": 6.6389891696750905, "grad_norm": 17.95256233215332, "learning_rate": 0.00019819804323906306, "loss": 7.3043, "step": 55170 }, { "epoch": 6.640192539109506, "grad_norm": 135.5763397216797, "learning_rate": 0.00019819732422712822, "loss": 7.3822, "step": 55180 }, { "epoch": 6.641395908543923, "grad_norm": 9.904345512390137, "learning_rate": 0.0001981966050730776, "loss": 7.3074, "step": 55190 }, { "epoch": 6.64259927797834, "grad_norm": 10.047538757324219, "learning_rate": 0.00019819588577691226, "loss": 7.1864, "step": 55200 }, { "epoch": 6.643802647412755, "grad_norm": 8.307875633239746, "learning_rate": 0.0001981951663386332, "loss": 7.1257, "step": 55210 }, { "epoch": 6.645006016847172, "grad_norm": 10.847360610961914, "learning_rate": 0.00019819444675824155, "loss": 7.1619, "step": 55220 }, { "epoch": 6.646209386281589, "grad_norm": 8.394771575927734, "learning_rate": 0.00019819372703573827, "loss": 7.3264, "step": 55230 }, { "epoch": 6.6474127557160045, "grad_norm": 26.60249900817871, "learning_rate": 0.0001981930071711244, "loss": 7.1023, "step": 55240 }, { "epoch": 6.648616125150421, "grad_norm": 103.9189453125, "learning_rate": 0.00019819228716440104, "loss": 7.2382, "step": 55250 }, { "epoch": 6.649819494584838, "grad_norm": 217.11172485351562, "learning_rate": 0.00019819156701556917, "loss": 7.2982, "step": 55260 }, { "epoch": 6.651022864019254, "grad_norm": 112.395751953125, "learning_rate": 0.00019819084672462986, "loss": 7.4447, "step": 55270 }, { "epoch": 6.65222623345367, "grad_norm": 36.52063751220703, "learning_rate": 0.00019819012629158415, "loss": 7.3282, "step": 55280 }, { "epoch": 6.653429602888087, "grad_norm": 1100.7315673828125, "learning_rate": 0.00019818940571643312, "loss": 7.2989, "step": 55290 }, { "epoch": 6.654632972322503, "grad_norm": 5.3416748046875, "learning_rate": 0.00019818868499917774, "loss": 7.3448, "step": 55300 }, { "epoch": 6.6558363417569195, "grad_norm": 3.8963637351989746, "learning_rate": 0.0001981879641398191, "loss": 7.393, "step": 55310 }, { "epoch": 6.657039711191336, "grad_norm": 20.22577667236328, "learning_rate": 0.00019818724313835825, "loss": 7.1391, "step": 55320 }, { "epoch": 6.658243080625752, "grad_norm": 23.58957862854004, "learning_rate": 0.0001981865219947962, "loss": 7.1837, "step": 55330 }, { "epoch": 6.659446450060169, "grad_norm": 6.533280372619629, "learning_rate": 0.000198185800709134, "loss": 7.359, "step": 55340 }, { "epoch": 6.6606498194945845, "grad_norm": 10.857223510742188, "learning_rate": 0.00019818507928137273, "loss": 7.2454, "step": 55350 }, { "epoch": 6.661853188929001, "grad_norm": 14.95755386352539, "learning_rate": 0.0001981843577115134, "loss": 7.1278, "step": 55360 }, { "epoch": 6.663056558363418, "grad_norm": 40.79299545288086, "learning_rate": 0.00019818363599955708, "loss": 7.4723, "step": 55370 }, { "epoch": 6.664259927797834, "grad_norm": 45.469818115234375, "learning_rate": 0.00019818291414550477, "loss": 7.2531, "step": 55380 }, { "epoch": 6.66546329723225, "grad_norm": 17.84055519104004, "learning_rate": 0.00019818219214935755, "loss": 7.3298, "step": 55390 }, { "epoch": 6.666666666666667, "grad_norm": 5.900567531585693, "learning_rate": 0.00019818147001111647, "loss": 7.31, "step": 55400 }, { "epoch": 6.667870036101083, "grad_norm": 5.828828811645508, "learning_rate": 0.00019818074773078257, "loss": 7.254, "step": 55410 }, { "epoch": 6.669073405535499, "grad_norm": 7.5560736656188965, "learning_rate": 0.00019818002530835687, "loss": 7.0643, "step": 55420 }, { "epoch": 6.670276774969916, "grad_norm": 7.557502269744873, "learning_rate": 0.00019817930274384043, "loss": 7.1738, "step": 55430 }, { "epoch": 6.671480144404332, "grad_norm": 81.7336196899414, "learning_rate": 0.0001981785800372343, "loss": 7.1299, "step": 55440 }, { "epoch": 6.672683513838749, "grad_norm": 5.938595294952393, "learning_rate": 0.00019817785718853953, "loss": 7.1194, "step": 55450 }, { "epoch": 6.673886883273164, "grad_norm": 12.312219619750977, "learning_rate": 0.00019817713419775717, "loss": 7.1877, "step": 55460 }, { "epoch": 6.675090252707581, "grad_norm": 47.27899932861328, "learning_rate": 0.00019817641106488826, "loss": 7.2036, "step": 55470 }, { "epoch": 6.676293622141998, "grad_norm": 77.00691986083984, "learning_rate": 0.00019817568778993382, "loss": 7.2, "step": 55480 }, { "epoch": 6.6774969915764135, "grad_norm": 35.118560791015625, "learning_rate": 0.00019817496437289493, "loss": 7.3714, "step": 55490 }, { "epoch": 6.67870036101083, "grad_norm": 23.781373977661133, "learning_rate": 0.00019817424081377264, "loss": 7.2999, "step": 55500 }, { "epoch": 6.679903730445247, "grad_norm": 605.828857421875, "learning_rate": 0.00019817351711256796, "loss": 7.4063, "step": 55510 }, { "epoch": 6.681107099879663, "grad_norm": 120.28981018066406, "learning_rate": 0.000198172793269282, "loss": 7.3056, "step": 55520 }, { "epoch": 6.682310469314079, "grad_norm": 14.749140739440918, "learning_rate": 0.00019817206928391575, "loss": 7.1653, "step": 55530 }, { "epoch": 6.683513838748496, "grad_norm": 19.87139129638672, "learning_rate": 0.00019817134515647027, "loss": 7.1294, "step": 55540 }, { "epoch": 6.684717208182912, "grad_norm": 51.131141662597656, "learning_rate": 0.00019817062088694663, "loss": 7.0697, "step": 55550 }, { "epoch": 6.6859205776173285, "grad_norm": 19.66100311279297, "learning_rate": 0.00019816989647534588, "loss": 7.2484, "step": 55560 }, { "epoch": 6.687123947051745, "grad_norm": 59.18501663208008, "learning_rate": 0.000198169171921669, "loss": 7.1588, "step": 55570 }, { "epoch": 6.688327316486161, "grad_norm": 12.373442649841309, "learning_rate": 0.00019816844722591714, "loss": 7.129, "step": 55580 }, { "epoch": 6.689530685920578, "grad_norm": 46.0589485168457, "learning_rate": 0.00019816772238809128, "loss": 7.0885, "step": 55590 }, { "epoch": 6.690734055354994, "grad_norm": 35.988494873046875, "learning_rate": 0.00019816699740819247, "loss": 7.2061, "step": 55600 }, { "epoch": 6.69193742478941, "grad_norm": 30.199434280395508, "learning_rate": 0.0001981662722862218, "loss": 7.152, "step": 55610 }, { "epoch": 6.693140794223827, "grad_norm": 26.043407440185547, "learning_rate": 0.0001981655470221803, "loss": 7.0291, "step": 55620 }, { "epoch": 6.6943441636582435, "grad_norm": 101.2144546508789, "learning_rate": 0.000198164821616069, "loss": 7.2461, "step": 55630 }, { "epoch": 6.695547533092659, "grad_norm": 17.652393341064453, "learning_rate": 0.000198164096067889, "loss": 7.1416, "step": 55640 }, { "epoch": 6.696750902527076, "grad_norm": 42.61686325073242, "learning_rate": 0.0001981633703776413, "loss": 7.1776, "step": 55650 }, { "epoch": 6.697954271961493, "grad_norm": 151.83102416992188, "learning_rate": 0.000198162644545327, "loss": 7.1747, "step": 55660 }, { "epoch": 6.699157641395908, "grad_norm": 26.311147689819336, "learning_rate": 0.00019816191857094704, "loss": 6.9852, "step": 55670 }, { "epoch": 6.700361010830325, "grad_norm": 34.56694793701172, "learning_rate": 0.00019816119245450259, "loss": 6.9377, "step": 55680 }, { "epoch": 6.701564380264742, "grad_norm": 38.924007415771484, "learning_rate": 0.00019816046619599466, "loss": 7.0062, "step": 55690 }, { "epoch": 6.702767749699158, "grad_norm": 89.71614074707031, "learning_rate": 0.00019815973979542433, "loss": 7.1952, "step": 55700 }, { "epoch": 6.703971119133574, "grad_norm": 95.2943115234375, "learning_rate": 0.00019815901325279258, "loss": 7.1886, "step": 55710 }, { "epoch": 6.70517448856799, "grad_norm": 88.1927719116211, "learning_rate": 0.0001981582865681005, "loss": 7.0989, "step": 55720 }, { "epoch": 6.706377858002407, "grad_norm": 95.01557922363281, "learning_rate": 0.00019815755974134918, "loss": 7.063, "step": 55730 }, { "epoch": 6.707581227436823, "grad_norm": 15.927534103393555, "learning_rate": 0.0001981568327725396, "loss": 7.0364, "step": 55740 }, { "epoch": 6.708784596871239, "grad_norm": 19.66518211364746, "learning_rate": 0.00019815610566167288, "loss": 7.1141, "step": 55750 }, { "epoch": 6.709987966305656, "grad_norm": 108.02747344970703, "learning_rate": 0.00019815537840875002, "loss": 7.1128, "step": 55760 }, { "epoch": 6.7111913357400725, "grad_norm": 20.291255950927734, "learning_rate": 0.00019815465101377208, "loss": 7.1718, "step": 55770 }, { "epoch": 6.712394705174488, "grad_norm": 67.08749389648438, "learning_rate": 0.00019815392347674019, "loss": 7.0628, "step": 55780 }, { "epoch": 6.713598074608905, "grad_norm": 53.44804000854492, "learning_rate": 0.00019815319579765528, "loss": 6.9924, "step": 55790 }, { "epoch": 6.714801444043322, "grad_norm": 42.22039794921875, "learning_rate": 0.00019815246797651848, "loss": 7.0726, "step": 55800 }, { "epoch": 6.7160048134777375, "grad_norm": 77.47686767578125, "learning_rate": 0.0001981517400133308, "loss": 7.0795, "step": 55810 }, { "epoch": 6.717208182912154, "grad_norm": 125.3265380859375, "learning_rate": 0.0001981510119080934, "loss": 7.2251, "step": 55820 }, { "epoch": 6.71841155234657, "grad_norm": 70.47822570800781, "learning_rate": 0.00019815028366080716, "loss": 7.2634, "step": 55830 }, { "epoch": 6.719614921780987, "grad_norm": 96.91410064697266, "learning_rate": 0.00019814955527147325, "loss": 6.9081, "step": 55840 }, { "epoch": 6.720818291215403, "grad_norm": 14.184112548828125, "learning_rate": 0.00019814882674009274, "loss": 6.9851, "step": 55850 }, { "epoch": 6.722021660649819, "grad_norm": 24.29714012145996, "learning_rate": 0.00019814809806666662, "loss": 6.9962, "step": 55860 }, { "epoch": 6.723225030084236, "grad_norm": 12.130228042602539, "learning_rate": 0.00019814736925119597, "loss": 6.9878, "step": 55870 }, { "epoch": 6.7244283995186525, "grad_norm": 127.275634765625, "learning_rate": 0.00019814664029368183, "loss": 6.9878, "step": 55880 }, { "epoch": 6.725631768953068, "grad_norm": 60.52707290649414, "learning_rate": 0.0001981459111941253, "loss": 7.0435, "step": 55890 }, { "epoch": 6.726835138387485, "grad_norm": 33.796531677246094, "learning_rate": 0.0001981451819525274, "loss": 7.0629, "step": 55900 }, { "epoch": 6.728038507821902, "grad_norm": 24.30449676513672, "learning_rate": 0.0001981444525688892, "loss": 7.0058, "step": 55910 }, { "epoch": 6.729241877256317, "grad_norm": 5.832573413848877, "learning_rate": 0.0001981437230432117, "loss": 7.1164, "step": 55920 }, { "epoch": 6.730445246690734, "grad_norm": 10.127008438110352, "learning_rate": 0.000198142993375496, "loss": 6.9855, "step": 55930 }, { "epoch": 6.731648616125151, "grad_norm": 14.363208770751953, "learning_rate": 0.0001981422635657432, "loss": 6.9767, "step": 55940 }, { "epoch": 6.7328519855595665, "grad_norm": 12.817060470581055, "learning_rate": 0.0001981415336139543, "loss": 6.9231, "step": 55950 }, { "epoch": 6.734055354993983, "grad_norm": 14.756559371948242, "learning_rate": 0.0001981408035201304, "loss": 7.0664, "step": 55960 }, { "epoch": 6.7352587244284, "grad_norm": 31.317472457885742, "learning_rate": 0.0001981400732842725, "loss": 7.2129, "step": 55970 }, { "epoch": 6.736462093862816, "grad_norm": 17.89832305908203, "learning_rate": 0.00019813934290638168, "loss": 7.0897, "step": 55980 }, { "epoch": 6.737665463297232, "grad_norm": 13.871992111206055, "learning_rate": 0.000198138612386459, "loss": 6.9262, "step": 55990 }, { "epoch": 6.738868832731649, "grad_norm": 40.8659553527832, "learning_rate": 0.0001981378817245055, "loss": 6.9915, "step": 56000 }, { "epoch": 6.740072202166065, "grad_norm": 18.634611129760742, "learning_rate": 0.00019813715092052229, "loss": 7.0372, "step": 56010 }, { "epoch": 6.7412755716004815, "grad_norm": 56.851539611816406, "learning_rate": 0.00019813641997451037, "loss": 6.9743, "step": 56020 }, { "epoch": 6.742478941034898, "grad_norm": 13.186022758483887, "learning_rate": 0.00019813568888647084, "loss": 6.9053, "step": 56030 }, { "epoch": 6.743682310469314, "grad_norm": 12.31736946105957, "learning_rate": 0.00019813495765640474, "loss": 6.7843, "step": 56040 }, { "epoch": 6.744885679903731, "grad_norm": 38.10343551635742, "learning_rate": 0.0001981342262843131, "loss": 6.8849, "step": 56050 }, { "epoch": 6.7460890493381465, "grad_norm": 12.185663223266602, "learning_rate": 0.00019813349477019703, "loss": 6.8924, "step": 56060 }, { "epoch": 6.747292418772563, "grad_norm": 202.5308380126953, "learning_rate": 0.00019813276311405757, "loss": 6.9407, "step": 56070 }, { "epoch": 6.74849578820698, "grad_norm": 31.466867446899414, "learning_rate": 0.00019813203131589577, "loss": 6.9976, "step": 56080 }, { "epoch": 6.749699157641396, "grad_norm": 36.996185302734375, "learning_rate": 0.0001981312993757127, "loss": 6.8978, "step": 56090 }, { "epoch": 6.750902527075812, "grad_norm": 14.816555976867676, "learning_rate": 0.00019813056729350937, "loss": 6.8933, "step": 56100 }, { "epoch": 6.752105896510229, "grad_norm": 8.242816925048828, "learning_rate": 0.00019812983506928693, "loss": 6.9547, "step": 56110 }, { "epoch": 6.753309265944645, "grad_norm": 11.20868968963623, "learning_rate": 0.00019812910270304633, "loss": 6.7677, "step": 56120 }, { "epoch": 6.754512635379061, "grad_norm": 13.01804256439209, "learning_rate": 0.00019812837019478875, "loss": 6.8561, "step": 56130 }, { "epoch": 6.755716004813478, "grad_norm": 12.331233978271484, "learning_rate": 0.00019812763754451518, "loss": 6.8899, "step": 56140 }, { "epoch": 6.756919374247894, "grad_norm": 10.18593978881836, "learning_rate": 0.0001981269047522267, "loss": 6.8891, "step": 56150 }, { "epoch": 6.758122743682311, "grad_norm": 15.321721076965332, "learning_rate": 0.00019812617181792433, "loss": 7.0075, "step": 56160 }, { "epoch": 6.759326113116726, "grad_norm": 20.115995407104492, "learning_rate": 0.00019812543874160918, "loss": 6.9075, "step": 56170 }, { "epoch": 6.760529482551143, "grad_norm": 25.264480590820312, "learning_rate": 0.0001981247055232823, "loss": 6.9353, "step": 56180 }, { "epoch": 6.76173285198556, "grad_norm": 9.208985328674316, "learning_rate": 0.00019812397216294474, "loss": 7.0104, "step": 56190 }, { "epoch": 6.7629362214199755, "grad_norm": 24.12775421142578, "learning_rate": 0.00019812323866059758, "loss": 6.9505, "step": 56200 }, { "epoch": 6.764139590854392, "grad_norm": 25.862085342407227, "learning_rate": 0.00019812250501624184, "loss": 6.9637, "step": 56210 }, { "epoch": 6.765342960288809, "grad_norm": 108.80167388916016, "learning_rate": 0.0001981217712298786, "loss": 7.3221, "step": 56220 }, { "epoch": 6.766546329723225, "grad_norm": 33.10988998413086, "learning_rate": 0.00019812103730150894, "loss": 7.0247, "step": 56230 }, { "epoch": 6.767749699157641, "grad_norm": 16.83145523071289, "learning_rate": 0.00019812030323113395, "loss": 6.9151, "step": 56240 }, { "epoch": 6.768953068592058, "grad_norm": 13.506775856018066, "learning_rate": 0.00019811956901875463, "loss": 6.9167, "step": 56250 }, { "epoch": 6.770156438026474, "grad_norm": 16.81162452697754, "learning_rate": 0.0001981188346643721, "loss": 6.9284, "step": 56260 }, { "epoch": 6.7713598074608905, "grad_norm": 10.348248481750488, "learning_rate": 0.00019811810016798735, "loss": 6.943, "step": 56270 }, { "epoch": 6.772563176895307, "grad_norm": 8.620027542114258, "learning_rate": 0.0001981173655296015, "loss": 6.8358, "step": 56280 }, { "epoch": 6.773766546329723, "grad_norm": 9.598176956176758, "learning_rate": 0.00019811663074921558, "loss": 6.8134, "step": 56290 }, { "epoch": 6.77496991576414, "grad_norm": 28.866487503051758, "learning_rate": 0.00019811589582683067, "loss": 6.8745, "step": 56300 }, { "epoch": 6.776173285198556, "grad_norm": 90.2942886352539, "learning_rate": 0.00019811516076244788, "loss": 6.8092, "step": 56310 }, { "epoch": 6.777376654632972, "grad_norm": 42.91451644897461, "learning_rate": 0.0001981144255560682, "loss": 7.0118, "step": 56320 }, { "epoch": 6.778580024067389, "grad_norm": 48.719337463378906, "learning_rate": 0.00019811369020769274, "loss": 7.3572, "step": 56330 }, { "epoch": 6.7797833935018055, "grad_norm": 9.14430046081543, "learning_rate": 0.0001981129547173225, "loss": 7.5251, "step": 56340 }, { "epoch": 6.780986762936221, "grad_norm": 4.427257061004639, "learning_rate": 0.0001981122190849586, "loss": 7.3696, "step": 56350 }, { "epoch": 6.782190132370638, "grad_norm": 3.3529253005981445, "learning_rate": 0.00019811148331060215, "loss": 7.3322, "step": 56360 }, { "epoch": 6.783393501805055, "grad_norm": 8.207467079162598, "learning_rate": 0.0001981107473942541, "loss": 7.1657, "step": 56370 }, { "epoch": 6.78459687123947, "grad_norm": 7.812586784362793, "learning_rate": 0.0001981100113359156, "loss": 7.1968, "step": 56380 }, { "epoch": 6.785800240673887, "grad_norm": 4.500979423522949, "learning_rate": 0.0001981092751355877, "loss": 7.0436, "step": 56390 }, { "epoch": 6.787003610108303, "grad_norm": 6.167128086090088, "learning_rate": 0.00019810853879327148, "loss": 7.0426, "step": 56400 }, { "epoch": 6.78820697954272, "grad_norm": 21.189769744873047, "learning_rate": 0.00019810780230896793, "loss": 7.011, "step": 56410 }, { "epoch": 6.789410348977136, "grad_norm": 8.848641395568848, "learning_rate": 0.00019810706568267817, "loss": 6.946, "step": 56420 }, { "epoch": 6.790613718411552, "grad_norm": 23.617721557617188, "learning_rate": 0.0001981063289144033, "loss": 7.0302, "step": 56430 }, { "epoch": 6.791817087845969, "grad_norm": 12.344385147094727, "learning_rate": 0.00019810559200414437, "loss": 6.9754, "step": 56440 }, { "epoch": 6.793020457280385, "grad_norm": 46.22349548339844, "learning_rate": 0.00019810485495190235, "loss": 6.967, "step": 56450 }, { "epoch": 6.794223826714801, "grad_norm": 36.92741394042969, "learning_rate": 0.00019810411775767842, "loss": 7.0277, "step": 56460 }, { "epoch": 6.795427196149218, "grad_norm": 40.07979965209961, "learning_rate": 0.00019810338042147363, "loss": 7.062, "step": 56470 }, { "epoch": 6.7966305655836345, "grad_norm": 10.478665351867676, "learning_rate": 0.00019810264294328902, "loss": 7.1715, "step": 56480 }, { "epoch": 6.79783393501805, "grad_norm": 13.519403457641602, "learning_rate": 0.00019810190532312566, "loss": 7.2005, "step": 56490 }, { "epoch": 6.799037304452467, "grad_norm": 61.327674865722656, "learning_rate": 0.0001981011675609846, "loss": 7.0186, "step": 56500 }, { "epoch": 6.800240673886883, "grad_norm": 24.989665985107422, "learning_rate": 0.00019810042965686696, "loss": 6.8714, "step": 56510 }, { "epoch": 6.8014440433212995, "grad_norm": 44.213470458984375, "learning_rate": 0.00019809969161077378, "loss": 6.8833, "step": 56520 }, { "epoch": 6.802647412755716, "grad_norm": 10.676111221313477, "learning_rate": 0.00019809895342270611, "loss": 6.9762, "step": 56530 }, { "epoch": 6.803850782190132, "grad_norm": 45.14767837524414, "learning_rate": 0.00019809821509266505, "loss": 6.9556, "step": 56540 }, { "epoch": 6.805054151624549, "grad_norm": 43.18658447265625, "learning_rate": 0.00019809747662065163, "loss": 6.8845, "step": 56550 }, { "epoch": 6.806257521058965, "grad_norm": 166.0917205810547, "learning_rate": 0.00019809673800666694, "loss": 7.1932, "step": 56560 }, { "epoch": 6.807460890493381, "grad_norm": 133.87234497070312, "learning_rate": 0.0001980959992507121, "loss": 7.0549, "step": 56570 }, { "epoch": 6.808664259927798, "grad_norm": 27.237041473388672, "learning_rate": 0.0001980952603527881, "loss": 7.2864, "step": 56580 }, { "epoch": 6.8098676293622145, "grad_norm": 9.687499046325684, "learning_rate": 0.000198094521312896, "loss": 7.1425, "step": 56590 }, { "epoch": 6.81107099879663, "grad_norm": 19.088077545166016, "learning_rate": 0.00019809378213103695, "loss": 7.0598, "step": 56600 }, { "epoch": 6.812274368231047, "grad_norm": 26.874658584594727, "learning_rate": 0.000198093042807212, "loss": 7.3514, "step": 56610 }, { "epoch": 6.813477737665464, "grad_norm": 29.382793426513672, "learning_rate": 0.00019809230334142216, "loss": 7.3406, "step": 56620 }, { "epoch": 6.814681107099879, "grad_norm": 20.1160831451416, "learning_rate": 0.00019809156373366854, "loss": 7.106, "step": 56630 }, { "epoch": 6.815884476534296, "grad_norm": 93.99827575683594, "learning_rate": 0.00019809082398395222, "loss": 7.0595, "step": 56640 }, { "epoch": 6.817087845968713, "grad_norm": 64.36432647705078, "learning_rate": 0.00019809008409227426, "loss": 6.985, "step": 56650 }, { "epoch": 6.8182912154031285, "grad_norm": 63.12652587890625, "learning_rate": 0.00019808934405863573, "loss": 7.1129, "step": 56660 }, { "epoch": 6.819494584837545, "grad_norm": 273.1567687988281, "learning_rate": 0.00019808860388303772, "loss": 6.8513, "step": 56670 }, { "epoch": 6.820697954271962, "grad_norm": 54.955684661865234, "learning_rate": 0.00019808786356548125, "loss": 7.033, "step": 56680 }, { "epoch": 6.821901323706378, "grad_norm": 179.6001739501953, "learning_rate": 0.00019808712310596743, "loss": 6.8798, "step": 56690 }, { "epoch": 6.823104693140794, "grad_norm": 24.781766891479492, "learning_rate": 0.00019808638250449733, "loss": 7.0504, "step": 56700 }, { "epoch": 6.824308062575211, "grad_norm": 96.25439453125, "learning_rate": 0.00019808564176107202, "loss": 7.2775, "step": 56710 }, { "epoch": 6.825511432009627, "grad_norm": 60.37154006958008, "learning_rate": 0.00019808490087569258, "loss": 7.2759, "step": 56720 }, { "epoch": 6.8267148014440435, "grad_norm": 52.00590515136719, "learning_rate": 0.00019808415984836006, "loss": 7.2158, "step": 56730 }, { "epoch": 6.827918170878459, "grad_norm": 25.168285369873047, "learning_rate": 0.00019808341867907556, "loss": 7.1152, "step": 56740 }, { "epoch": 6.829121540312876, "grad_norm": 47.336814880371094, "learning_rate": 0.00019808267736784013, "loss": 7.064, "step": 56750 }, { "epoch": 6.830324909747293, "grad_norm": 127.8853759765625, "learning_rate": 0.00019808193591465485, "loss": 7.0396, "step": 56760 }, { "epoch": 6.8315282791817085, "grad_norm": 48.18959426879883, "learning_rate": 0.00019808119431952078, "loss": 7.1016, "step": 56770 }, { "epoch": 6.832731648616125, "grad_norm": 117.4593276977539, "learning_rate": 0.000198080452582439, "loss": 7.0179, "step": 56780 }, { "epoch": 6.833935018050542, "grad_norm": 19.329599380493164, "learning_rate": 0.0001980797107034106, "loss": 7.0359, "step": 56790 }, { "epoch": 6.835138387484958, "grad_norm": 32.706485748291016, "learning_rate": 0.00019807896868243664, "loss": 6.9465, "step": 56800 }, { "epoch": 6.836341756919374, "grad_norm": 29.41881561279297, "learning_rate": 0.00019807822651951823, "loss": 7.0714, "step": 56810 }, { "epoch": 6.837545126353791, "grad_norm": 65.36539459228516, "learning_rate": 0.0001980774842146564, "loss": 7.1706, "step": 56820 }, { "epoch": 6.838748495788207, "grad_norm": 152.0336151123047, "learning_rate": 0.00019807674176785224, "loss": 7.2673, "step": 56830 }, { "epoch": 6.839951865222623, "grad_norm": 45.911563873291016, "learning_rate": 0.00019807599917910678, "loss": 7.3042, "step": 56840 }, { "epoch": 6.841155234657039, "grad_norm": 168.76766967773438, "learning_rate": 0.00019807525644842115, "loss": 7.5659, "step": 56850 }, { "epoch": 6.842358604091456, "grad_norm": 210.3383331298828, "learning_rate": 0.00019807451357579643, "loss": 7.4778, "step": 56860 }, { "epoch": 6.843561973525873, "grad_norm": 13.706093788146973, "learning_rate": 0.00019807377056123366, "loss": 7.4884, "step": 56870 }, { "epoch": 6.844765342960288, "grad_norm": 18.30304718017578, "learning_rate": 0.00019807302740473392, "loss": 7.5125, "step": 56880 }, { "epoch": 6.845968712394705, "grad_norm": 9.22196102142334, "learning_rate": 0.0001980722841062983, "loss": 7.4489, "step": 56890 }, { "epoch": 6.847172081829122, "grad_norm": 6.009471893310547, "learning_rate": 0.0001980715406659279, "loss": 7.3385, "step": 56900 }, { "epoch": 6.8483754512635375, "grad_norm": 17.32166290283203, "learning_rate": 0.00019807079708362375, "loss": 7.2548, "step": 56910 }, { "epoch": 6.849578820697954, "grad_norm": 33.649078369140625, "learning_rate": 0.00019807005335938693, "loss": 7.0738, "step": 56920 }, { "epoch": 6.850782190132371, "grad_norm": 45.04438781738281, "learning_rate": 0.00019806930949321856, "loss": 7.0602, "step": 56930 }, { "epoch": 6.851985559566787, "grad_norm": 36.69521713256836, "learning_rate": 0.00019806856548511967, "loss": 7.1064, "step": 56940 }, { "epoch": 6.853188929001203, "grad_norm": 29.256868362426758, "learning_rate": 0.00019806782133509134, "loss": 6.97, "step": 56950 }, { "epoch": 6.85439229843562, "grad_norm": 64.79109191894531, "learning_rate": 0.00019806707704313466, "loss": 6.985, "step": 56960 }, { "epoch": 6.855595667870036, "grad_norm": 525.453369140625, "learning_rate": 0.00019806633260925074, "loss": 6.9575, "step": 56970 }, { "epoch": 6.8567990373044525, "grad_norm": 277.41180419921875, "learning_rate": 0.0001980655880334406, "loss": 6.8746, "step": 56980 }, { "epoch": 6.858002406738869, "grad_norm": 606.5435791015625, "learning_rate": 0.00019806484331570535, "loss": 6.9853, "step": 56990 }, { "epoch": 6.859205776173285, "grad_norm": 713.16845703125, "learning_rate": 0.00019806409845604604, "loss": 7.0665, "step": 57000 }, { "epoch": 6.860409145607702, "grad_norm": 927.540771484375, "learning_rate": 0.0001980633534544638, "loss": 7.1799, "step": 57010 }, { "epoch": 6.861612515042118, "grad_norm": 360.078369140625, "learning_rate": 0.00019806260831095967, "loss": 7.2209, "step": 57020 }, { "epoch": 6.862815884476534, "grad_norm": 242.3676300048828, "learning_rate": 0.00019806186302553474, "loss": 7.3714, "step": 57030 }, { "epoch": 6.864019253910951, "grad_norm": 987.6715698242188, "learning_rate": 0.00019806111759819006, "loss": 7.2273, "step": 57040 }, { "epoch": 6.8652226233453675, "grad_norm": 413.6085205078125, "learning_rate": 0.00019806037202892675, "loss": 7.3345, "step": 57050 }, { "epoch": 6.866425992779783, "grad_norm": 55.95791244506836, "learning_rate": 0.00019805962631774587, "loss": 7.07, "step": 57060 }, { "epoch": 6.8676293622142, "grad_norm": 25.23629379272461, "learning_rate": 0.0001980588804646485, "loss": 6.9322, "step": 57070 }, { "epoch": 6.868832731648616, "grad_norm": 67.85858154296875, "learning_rate": 0.0001980581344696357, "loss": 7.1859, "step": 57080 }, { "epoch": 6.870036101083032, "grad_norm": 9.694706916809082, "learning_rate": 0.0001980573883327086, "loss": 6.9408, "step": 57090 }, { "epoch": 6.871239470517449, "grad_norm": 123.4223861694336, "learning_rate": 0.00019805664205386822, "loss": 7.0047, "step": 57100 }, { "epoch": 6.872442839951865, "grad_norm": 136.64669799804688, "learning_rate": 0.00019805589563311567, "loss": 6.9801, "step": 57110 }, { "epoch": 6.873646209386282, "grad_norm": 58.5440559387207, "learning_rate": 0.00019805514907045207, "loss": 6.8749, "step": 57120 }, { "epoch": 6.874849578820698, "grad_norm": 1241.7928466796875, "learning_rate": 0.00019805440236587843, "loss": 7.0498, "step": 57130 }, { "epoch": 6.876052948255114, "grad_norm": 118.74279022216797, "learning_rate": 0.00019805365551939588, "loss": 7.0736, "step": 57140 }, { "epoch": 6.877256317689531, "grad_norm": 10.805257797241211, "learning_rate": 0.00019805290853100544, "loss": 7.3568, "step": 57150 }, { "epoch": 6.878459687123947, "grad_norm": 43.8604736328125, "learning_rate": 0.00019805216140070825, "loss": 7.0794, "step": 57160 }, { "epoch": 6.879663056558363, "grad_norm": 54.924869537353516, "learning_rate": 0.00019805141412850542, "loss": 7.0036, "step": 57170 }, { "epoch": 6.88086642599278, "grad_norm": 17.287311553955078, "learning_rate": 0.00019805066671439793, "loss": 7.0393, "step": 57180 }, { "epoch": 6.882069795427196, "grad_norm": 139.8007049560547, "learning_rate": 0.00019804991915838694, "loss": 6.8933, "step": 57190 }, { "epoch": 6.883273164861612, "grad_norm": 20.29083824157715, "learning_rate": 0.0001980491714604735, "loss": 6.921, "step": 57200 }, { "epoch": 6.884476534296029, "grad_norm": 17.311073303222656, "learning_rate": 0.00019804842362065872, "loss": 6.9645, "step": 57210 }, { "epoch": 6.885679903730445, "grad_norm": 29.02623176574707, "learning_rate": 0.00019804767563894363, "loss": 6.8939, "step": 57220 }, { "epoch": 6.8868832731648615, "grad_norm": 33.343406677246094, "learning_rate": 0.0001980469275153294, "loss": 6.8359, "step": 57230 }, { "epoch": 6.888086642599278, "grad_norm": 141.8105926513672, "learning_rate": 0.000198046179249817, "loss": 7.1161, "step": 57240 }, { "epoch": 6.889290012033694, "grad_norm": 44.20021438598633, "learning_rate": 0.00019804543084240761, "loss": 6.9137, "step": 57250 }, { "epoch": 6.890493381468111, "grad_norm": 233.2378692626953, "learning_rate": 0.00019804468229310226, "loss": 6.9329, "step": 57260 }, { "epoch": 6.891696750902527, "grad_norm": 57.86533737182617, "learning_rate": 0.00019804393360190205, "loss": 6.8827, "step": 57270 }, { "epoch": 6.892900120336943, "grad_norm": 55.147926330566406, "learning_rate": 0.00019804318476880804, "loss": 7.0093, "step": 57280 }, { "epoch": 6.89410348977136, "grad_norm": 826.686767578125, "learning_rate": 0.0001980424357938214, "loss": 6.9077, "step": 57290 }, { "epoch": 6.8953068592057765, "grad_norm": 30.803756713867188, "learning_rate": 0.0001980416866769431, "loss": 6.9971, "step": 57300 }, { "epoch": 6.896510228640192, "grad_norm": 52.69157791137695, "learning_rate": 0.00019804093741817428, "loss": 6.9022, "step": 57310 }, { "epoch": 6.897713598074609, "grad_norm": 80.60667419433594, "learning_rate": 0.000198040188017516, "loss": 6.9859, "step": 57320 }, { "epoch": 6.898916967509026, "grad_norm": 31.60869598388672, "learning_rate": 0.0001980394384749694, "loss": 6.8363, "step": 57330 }, { "epoch": 6.900120336943441, "grad_norm": 84.88731384277344, "learning_rate": 0.0001980386887905355, "loss": 6.8107, "step": 57340 }, { "epoch": 6.901323706377858, "grad_norm": 52.038761138916016, "learning_rate": 0.00019803793896421544, "loss": 6.8244, "step": 57350 }, { "epoch": 6.902527075812275, "grad_norm": 518.0714721679688, "learning_rate": 0.00019803718899601025, "loss": 6.8211, "step": 57360 }, { "epoch": 6.9037304452466906, "grad_norm": 144.3697509765625, "learning_rate": 0.00019803643888592105, "loss": 6.9075, "step": 57370 }, { "epoch": 6.904933814681107, "grad_norm": 61.14954376220703, "learning_rate": 0.00019803568863394893, "loss": 6.8461, "step": 57380 }, { "epoch": 6.906137184115524, "grad_norm": 192.9075164794922, "learning_rate": 0.00019803493824009495, "loss": 6.9015, "step": 57390 }, { "epoch": 6.90734055354994, "grad_norm": 318.1616516113281, "learning_rate": 0.00019803418770436023, "loss": 6.8928, "step": 57400 }, { "epoch": 6.908543922984356, "grad_norm": 36.558746337890625, "learning_rate": 0.0001980334370267458, "loss": 6.8422, "step": 57410 }, { "epoch": 6.909747292418773, "grad_norm": 41.883949279785156, "learning_rate": 0.0001980326862072528, "loss": 6.8808, "step": 57420 }, { "epoch": 6.910950661853189, "grad_norm": 162.16358947753906, "learning_rate": 0.00019803193524588233, "loss": 6.7901, "step": 57430 }, { "epoch": 6.9121540312876055, "grad_norm": 31.169635772705078, "learning_rate": 0.00019803118414263543, "loss": 6.8673, "step": 57440 }, { "epoch": 6.913357400722021, "grad_norm": 1550.4622802734375, "learning_rate": 0.0001980304328975132, "loss": 6.9876, "step": 57450 }, { "epoch": 6.914560770156438, "grad_norm": 32.81758117675781, "learning_rate": 0.0001980296815105167, "loss": 7.0469, "step": 57460 }, { "epoch": 6.915764139590855, "grad_norm": 42.625938415527344, "learning_rate": 0.00019802892998164708, "loss": 6.9365, "step": 57470 }, { "epoch": 6.9169675090252705, "grad_norm": 66.55526733398438, "learning_rate": 0.0001980281783109054, "loss": 6.8728, "step": 57480 }, { "epoch": 6.918170878459687, "grad_norm": 26.685131072998047, "learning_rate": 0.00019802742649829274, "loss": 6.8764, "step": 57490 }, { "epoch": 6.919374247894104, "grad_norm": 24.899927139282227, "learning_rate": 0.00019802667454381018, "loss": 6.9962, "step": 57500 }, { "epoch": 6.92057761732852, "grad_norm": 458.8593444824219, "learning_rate": 0.0001980259224474588, "loss": 6.8451, "step": 57510 }, { "epoch": 6.921780986762936, "grad_norm": 248.4935302734375, "learning_rate": 0.00019802517020923977, "loss": 6.7186, "step": 57520 }, { "epoch": 6.922984356197353, "grad_norm": 62.006656646728516, "learning_rate": 0.00019802441782915407, "loss": 6.7663, "step": 57530 }, { "epoch": 6.924187725631769, "grad_norm": 87.1061782836914, "learning_rate": 0.00019802366530720283, "loss": 6.9221, "step": 57540 }, { "epoch": 6.925391095066185, "grad_norm": 238.37020874023438, "learning_rate": 0.00019802291264338717, "loss": 6.9728, "step": 57550 }, { "epoch": 6.926594464500601, "grad_norm": 59.73338317871094, "learning_rate": 0.00019802215983770816, "loss": 7.0036, "step": 57560 }, { "epoch": 6.927797833935018, "grad_norm": 571.4495239257812, "learning_rate": 0.00019802140689016687, "loss": 7.0787, "step": 57570 }, { "epoch": 6.929001203369435, "grad_norm": 52.87287521362305, "learning_rate": 0.00019802065380076437, "loss": 7.0749, "step": 57580 }, { "epoch": 6.93020457280385, "grad_norm": 46.382179260253906, "learning_rate": 0.00019801990056950183, "loss": 6.9606, "step": 57590 }, { "epoch": 6.931407942238267, "grad_norm": 1761.7576904296875, "learning_rate": 0.0001980191471963803, "loss": 7.0838, "step": 57600 }, { "epoch": 6.932611311672684, "grad_norm": 43.45762634277344, "learning_rate": 0.00019801839368140082, "loss": 7.0395, "step": 57610 }, { "epoch": 6.9338146811070995, "grad_norm": 53.85858917236328, "learning_rate": 0.00019801764002456455, "loss": 7.0695, "step": 57620 }, { "epoch": 6.935018050541516, "grad_norm": 200.94857788085938, "learning_rate": 0.00019801688622587255, "loss": 6.9848, "step": 57630 }, { "epoch": 6.936221419975933, "grad_norm": 733.8641967773438, "learning_rate": 0.00019801613228532592, "loss": 7.012, "step": 57640 }, { "epoch": 6.937424789410349, "grad_norm": 194.35470581054688, "learning_rate": 0.00019801537820292574, "loss": 7.2865, "step": 57650 }, { "epoch": 6.938628158844765, "grad_norm": 385.124755859375, "learning_rate": 0.00019801462397867314, "loss": 7.8188, "step": 57660 }, { "epoch": 6.939831528279182, "grad_norm": 57.436866760253906, "learning_rate": 0.00019801386961256916, "loss": 7.6012, "step": 57670 }, { "epoch": 6.941034897713598, "grad_norm": 58.678226470947266, "learning_rate": 0.0001980131151046149, "loss": 7.3455, "step": 57680 }, { "epoch": 6.9422382671480145, "grad_norm": 477.7326354980469, "learning_rate": 0.00019801236045481147, "loss": 7.2111, "step": 57690 }, { "epoch": 6.943441636582431, "grad_norm": 4454.03466796875, "learning_rate": 0.00019801160566315995, "loss": 7.019, "step": 57700 }, { "epoch": 6.944645006016847, "grad_norm": 380.172607421875, "learning_rate": 0.00019801085072966145, "loss": 7.4128, "step": 57710 }, { "epoch": 6.945848375451264, "grad_norm": 283.2594909667969, "learning_rate": 0.00019801009565431702, "loss": 7.0336, "step": 57720 }, { "epoch": 6.94705174488568, "grad_norm": 701.2532958984375, "learning_rate": 0.00019800934043712784, "loss": 7.1736, "step": 57730 }, { "epoch": 6.948255114320096, "grad_norm": 433.7130432128906, "learning_rate": 0.0001980085850780949, "loss": 7.0403, "step": 57740 }, { "epoch": 6.949458483754513, "grad_norm": 132.9949951171875, "learning_rate": 0.00019800782957721938, "loss": 7.1357, "step": 57750 }, { "epoch": 6.9506618531889295, "grad_norm": 323.5587158203125, "learning_rate": 0.00019800707393450228, "loss": 7.0936, "step": 57760 }, { "epoch": 6.951865222623345, "grad_norm": 207.72708129882812, "learning_rate": 0.00019800631814994479, "loss": 6.9885, "step": 57770 }, { "epoch": 6.953068592057762, "grad_norm": 24.468793869018555, "learning_rate": 0.00019800556222354795, "loss": 7.1302, "step": 57780 }, { "epoch": 6.954271961492178, "grad_norm": 51.776973724365234, "learning_rate": 0.00019800480615531286, "loss": 6.9301, "step": 57790 }, { "epoch": 6.955475330926594, "grad_norm": 77.13422393798828, "learning_rate": 0.0001980040499452406, "loss": 7.0983, "step": 57800 }, { "epoch": 6.956678700361011, "grad_norm": 280.56658935546875, "learning_rate": 0.00019800329359333233, "loss": 7.1142, "step": 57810 }, { "epoch": 6.957882069795427, "grad_norm": 233.68243408203125, "learning_rate": 0.00019800253709958905, "loss": 7.1303, "step": 57820 }, { "epoch": 6.959085439229844, "grad_norm": 2115.4267578125, "learning_rate": 0.00019800178046401192, "loss": 7.1151, "step": 57830 }, { "epoch": 6.96028880866426, "grad_norm": 178.87379455566406, "learning_rate": 0.00019800102368660203, "loss": 7.133, "step": 57840 }, { "epoch": 6.961492178098676, "grad_norm": 312.19281005859375, "learning_rate": 0.00019800026676736047, "loss": 7.3959, "step": 57850 }, { "epoch": 6.962695547533093, "grad_norm": 80.78385162353516, "learning_rate": 0.0001979995097062883, "loss": 7.7487, "step": 57860 }, { "epoch": 6.963898916967509, "grad_norm": 57.2595100402832, "learning_rate": 0.00019799875250338665, "loss": 7.9043, "step": 57870 }, { "epoch": 6.965102286401925, "grad_norm": 11.961369514465332, "learning_rate": 0.00019799799515865662, "loss": 7.5828, "step": 57880 }, { "epoch": 6.966305655836342, "grad_norm": 41.18761444091797, "learning_rate": 0.00019799723767209934, "loss": 7.5542, "step": 57890 }, { "epoch": 6.967509025270758, "grad_norm": 29.269014358520508, "learning_rate": 0.00019799648004371578, "loss": 7.4847, "step": 57900 }, { "epoch": 6.968712394705174, "grad_norm": 59.71797180175781, "learning_rate": 0.00019799572227350716, "loss": 7.5432, "step": 57910 }, { "epoch": 6.969915764139591, "grad_norm": 40.786354064941406, "learning_rate": 0.00019799496436147452, "loss": 7.5736, "step": 57920 }, { "epoch": 6.971119133574007, "grad_norm": 44.69639205932617, "learning_rate": 0.000197994206307619, "loss": 7.4789, "step": 57930 }, { "epoch": 6.9723225030084235, "grad_norm": 51.41767501831055, "learning_rate": 0.00019799344811194164, "loss": 7.4996, "step": 57940 }, { "epoch": 6.97352587244284, "grad_norm": 37.17129898071289, "learning_rate": 0.0001979926897744436, "loss": 7.5562, "step": 57950 }, { "epoch": 6.974729241877256, "grad_norm": 62.11970138549805, "learning_rate": 0.0001979919312951259, "loss": 7.5705, "step": 57960 }, { "epoch": 6.975932611311673, "grad_norm": 55.383914947509766, "learning_rate": 0.00019799117267398973, "loss": 7.5302, "step": 57970 }, { "epoch": 6.977135980746089, "grad_norm": 50.30924987792969, "learning_rate": 0.0001979904139110361, "loss": 7.6065, "step": 57980 }, { "epoch": 6.978339350180505, "grad_norm": 217.25390625, "learning_rate": 0.00019798965500626616, "loss": 7.4346, "step": 57990 }, { "epoch": 6.979542719614922, "grad_norm": 58.953006744384766, "learning_rate": 0.000197988895959681, "loss": 7.5839, "step": 58000 }, { "epoch": 6.9807460890493385, "grad_norm": 4133.89306640625, "learning_rate": 0.0001979881367712817, "loss": 7.6053, "step": 58010 }, { "epoch": 6.981949458483754, "grad_norm": 4566.65966796875, "learning_rate": 0.00019798737744106942, "loss": 7.7741, "step": 58020 }, { "epoch": 6.983152827918171, "grad_norm": 27203.322265625, "learning_rate": 0.0001979866179690452, "loss": 8.7773, "step": 58030 }, { "epoch": 6.984356197352588, "grad_norm": 381.32415771484375, "learning_rate": 0.00019798585835521012, "loss": 9.6103, "step": 58040 }, { "epoch": 6.985559566787003, "grad_norm": 80.57240295410156, "learning_rate": 0.0001979850985995653, "loss": 8.8075, "step": 58050 }, { "epoch": 6.98676293622142, "grad_norm": 55.19731140136719, "learning_rate": 0.00019798433870211188, "loss": 8.158, "step": 58060 }, { "epoch": 6.987966305655837, "grad_norm": 184.60914611816406, "learning_rate": 0.00019798357866285092, "loss": 7.5799, "step": 58070 }, { "epoch": 6.9891696750902526, "grad_norm": 350.00628662109375, "learning_rate": 0.00019798281848178352, "loss": 7.7077, "step": 58080 }, { "epoch": 6.990373044524669, "grad_norm": 3972.277099609375, "learning_rate": 0.00019798205815891082, "loss": 7.5186, "step": 58090 }, { "epoch": 6.991576413959086, "grad_norm": 3121.091552734375, "learning_rate": 0.0001979812976942339, "loss": 7.5143, "step": 58100 }, { "epoch": 6.992779783393502, "grad_norm": 646.8971557617188, "learning_rate": 0.00019798053708775383, "loss": 7.6622, "step": 58110 }, { "epoch": 6.993983152827918, "grad_norm": 2629.6357421875, "learning_rate": 0.00019797977633947172, "loss": 8.0694, "step": 58120 }, { "epoch": 6.995186522262334, "grad_norm": 683.5379638671875, "learning_rate": 0.00019797901544938869, "loss": 7.9134, "step": 58130 }, { "epoch": 6.996389891696751, "grad_norm": 96.93624877929688, "learning_rate": 0.00019797825441750583, "loss": 7.559, "step": 58140 }, { "epoch": 6.9975932611311675, "grad_norm": 23.396915435791016, "learning_rate": 0.00019797749324382426, "loss": 7.5456, "step": 58150 }, { "epoch": 6.998796630565583, "grad_norm": 19.570301055908203, "learning_rate": 0.00019797673192834507, "loss": 7.5895, "step": 58160 }, { "epoch": 7.0, "grad_norm": 20.815324783325195, "learning_rate": 0.00019797597047106934, "loss": 7.5813, "step": 58170 }, { "epoch": 7.0, "eval_loss": 7.590388774871826, "eval_runtime": 119.8697, "eval_samples_per_second": 61.625, "eval_steps_per_second": 7.708, "step": 58170 }, { "epoch": 7.001203369434417, "grad_norm": 6.156953811645508, "learning_rate": 0.0001979752088719982, "loss": 7.5375, "step": 58180 }, { "epoch": 7.0024067388688325, "grad_norm": 11.347652435302734, "learning_rate": 0.00019797444713113275, "loss": 7.6103, "step": 58190 }, { "epoch": 7.003610108303249, "grad_norm": 14.72617244720459, "learning_rate": 0.00019797368524847406, "loss": 7.7062, "step": 58200 }, { "epoch": 7.004813477737666, "grad_norm": 12.33837604522705, "learning_rate": 0.00019797292322402328, "loss": 7.5682, "step": 58210 }, { "epoch": 7.006016847172082, "grad_norm": 9.066107749938965, "learning_rate": 0.0001979721610577815, "loss": 7.5672, "step": 58220 }, { "epoch": 7.007220216606498, "grad_norm": 14.04942512512207, "learning_rate": 0.0001979713987497498, "loss": 7.512, "step": 58230 }, { "epoch": 7.008423586040915, "grad_norm": 3.176405906677246, "learning_rate": 0.0001979706362999293, "loss": 7.5756, "step": 58240 }, { "epoch": 7.009626955475331, "grad_norm": 12.903924942016602, "learning_rate": 0.00019796987370832112, "loss": 7.6313, "step": 58250 }, { "epoch": 7.0108303249097474, "grad_norm": 6.505326747894287, "learning_rate": 0.0001979691109749263, "loss": 7.6186, "step": 58260 }, { "epoch": 7.012033694344163, "grad_norm": 16.74981689453125, "learning_rate": 0.00019796834809974602, "loss": 7.5558, "step": 58270 }, { "epoch": 7.01323706377858, "grad_norm": 9.05154800415039, "learning_rate": 0.00019796758508278136, "loss": 7.5815, "step": 58280 }, { "epoch": 7.014440433212997, "grad_norm": 16.661067962646484, "learning_rate": 0.0001979668219240334, "loss": 7.6323, "step": 58290 }, { "epoch": 7.015643802647412, "grad_norm": 8.390691757202148, "learning_rate": 0.0001979660586235033, "loss": 7.6556, "step": 58300 }, { "epoch": 7.016847172081829, "grad_norm": 6.765058994293213, "learning_rate": 0.00019796529518119208, "loss": 7.6327, "step": 58310 }, { "epoch": 7.018050541516246, "grad_norm": 9.859917640686035, "learning_rate": 0.0001979645315971009, "loss": 7.5974, "step": 58320 }, { "epoch": 7.0192539109506615, "grad_norm": 5.7368998527526855, "learning_rate": 0.00019796376787123085, "loss": 7.5915, "step": 58330 }, { "epoch": 7.020457280385078, "grad_norm": 4.969225883483887, "learning_rate": 0.00019796300400358306, "loss": 7.6415, "step": 58340 }, { "epoch": 7.021660649819495, "grad_norm": 7.187626838684082, "learning_rate": 0.00019796223999415862, "loss": 7.5956, "step": 58350 }, { "epoch": 7.022864019253911, "grad_norm": 6.633703708648682, "learning_rate": 0.00019796147584295862, "loss": 7.5451, "step": 58360 }, { "epoch": 7.024067388688327, "grad_norm": 7.89232873916626, "learning_rate": 0.0001979607115499842, "loss": 7.6214, "step": 58370 }, { "epoch": 7.025270758122744, "grad_norm": 5.816625118255615, "learning_rate": 0.0001979599471152364, "loss": 7.5487, "step": 58380 }, { "epoch": 7.02647412755716, "grad_norm": 3.71209716796875, "learning_rate": 0.00019795918253871642, "loss": 7.5717, "step": 58390 }, { "epoch": 7.0276774969915765, "grad_norm": 9.613621711730957, "learning_rate": 0.0001979584178204253, "loss": 7.609, "step": 58400 }, { "epoch": 7.028880866425993, "grad_norm": 3.8423612117767334, "learning_rate": 0.00019795765296036413, "loss": 7.4974, "step": 58410 }, { "epoch": 7.030084235860409, "grad_norm": 4.460007667541504, "learning_rate": 0.0001979568879585341, "loss": 7.5948, "step": 58420 }, { "epoch": 7.031287605294826, "grad_norm": 4.862419128417969, "learning_rate": 0.00019795612281493625, "loss": 7.5215, "step": 58430 }, { "epoch": 7.0324909747292415, "grad_norm": 5.015869617462158, "learning_rate": 0.00019795535752957172, "loss": 7.6274, "step": 58440 }, { "epoch": 7.033694344163658, "grad_norm": 6.971224784851074, "learning_rate": 0.00019795459210244156, "loss": 7.7244, "step": 58450 }, { "epoch": 7.034897713598075, "grad_norm": 4.386664867401123, "learning_rate": 0.00019795382653354696, "loss": 7.6078, "step": 58460 }, { "epoch": 7.036101083032491, "grad_norm": 1728.6195068359375, "learning_rate": 0.00019795306082288897, "loss": 7.5955, "step": 58470 }, { "epoch": 7.037304452466907, "grad_norm": 14.76229190826416, "learning_rate": 0.00019795229497046872, "loss": 7.5277, "step": 58480 }, { "epoch": 7.038507821901324, "grad_norm": 13.176336288452148, "learning_rate": 0.0001979515289762873, "loss": 7.6122, "step": 58490 }, { "epoch": 7.03971119133574, "grad_norm": 7.656918048858643, "learning_rate": 0.00019795076284034586, "loss": 7.5423, "step": 58500 }, { "epoch": 7.040914560770156, "grad_norm": 5.574862957000732, "learning_rate": 0.0001979499965626455, "loss": 7.4972, "step": 58510 }, { "epoch": 7.042117930204573, "grad_norm": 8.460332870483398, "learning_rate": 0.0001979492301431873, "loss": 7.501, "step": 58520 }, { "epoch": 7.043321299638989, "grad_norm": 8.483569145202637, "learning_rate": 0.00019794846358197235, "loss": 7.555, "step": 58530 }, { "epoch": 7.044524669073406, "grad_norm": 5.071497917175293, "learning_rate": 0.00019794769687900182, "loss": 7.5449, "step": 58540 }, { "epoch": 7.045728038507822, "grad_norm": 5.318825721740723, "learning_rate": 0.00019794693003427676, "loss": 7.4687, "step": 58550 }, { "epoch": 7.046931407942238, "grad_norm": 7.241866111755371, "learning_rate": 0.00019794616304779833, "loss": 7.5028, "step": 58560 }, { "epoch": 7.048134777376655, "grad_norm": 7.500179767608643, "learning_rate": 0.00019794539591956763, "loss": 7.4961, "step": 58570 }, { "epoch": 7.049338146811071, "grad_norm": 15.585549354553223, "learning_rate": 0.00019794462864958576, "loss": 7.5545, "step": 58580 }, { "epoch": 7.050541516245487, "grad_norm": 20.122377395629883, "learning_rate": 0.00019794386123785383, "loss": 7.5008, "step": 58590 }, { "epoch": 7.051744885679904, "grad_norm": 13.105525016784668, "learning_rate": 0.0001979430936843729, "loss": 7.5519, "step": 58600 }, { "epoch": 7.05294825511432, "grad_norm": 6.437312602996826, "learning_rate": 0.0001979423259891442, "loss": 7.6007, "step": 58610 }, { "epoch": 7.054151624548736, "grad_norm": 141.77891540527344, "learning_rate": 0.00019794155815216872, "loss": 7.5104, "step": 58620 }, { "epoch": 7.055354993983153, "grad_norm": 5.608367919921875, "learning_rate": 0.00019794079017344766, "loss": 7.5207, "step": 58630 }, { "epoch": 7.056558363417569, "grad_norm": 6.196149826049805, "learning_rate": 0.0001979400220529821, "loss": 7.5716, "step": 58640 }, { "epoch": 7.0577617328519855, "grad_norm": 39.432212829589844, "learning_rate": 0.00019793925379077315, "loss": 7.5621, "step": 58650 }, { "epoch": 7.058965102286402, "grad_norm": 6.417609691619873, "learning_rate": 0.0001979384853868219, "loss": 7.507, "step": 58660 }, { "epoch": 7.060168471720818, "grad_norm": 7.268493175506592, "learning_rate": 0.00019793771684112948, "loss": 7.6224, "step": 58670 }, { "epoch": 7.061371841155235, "grad_norm": 6.067602157592773, "learning_rate": 0.000197936948153697, "loss": 7.5457, "step": 58680 }, { "epoch": 7.062575210589651, "grad_norm": 3.903944730758667, "learning_rate": 0.0001979361793245256, "loss": 7.5416, "step": 58690 }, { "epoch": 7.063778580024067, "grad_norm": 14.14567756652832, "learning_rate": 0.00019793541035361637, "loss": 7.5404, "step": 58700 }, { "epoch": 7.064981949458484, "grad_norm": 5.904326438903809, "learning_rate": 0.00019793464124097038, "loss": 7.5542, "step": 58710 }, { "epoch": 7.0661853188929005, "grad_norm": 13.580793380737305, "learning_rate": 0.00019793387198658883, "loss": 7.6201, "step": 58720 }, { "epoch": 7.067388688327316, "grad_norm": 9.535094261169434, "learning_rate": 0.00019793310259047276, "loss": 7.5076, "step": 58730 }, { "epoch": 7.068592057761733, "grad_norm": 13.778815269470215, "learning_rate": 0.0001979323330526233, "loss": 7.5298, "step": 58740 }, { "epoch": 7.06979542719615, "grad_norm": 8.662415504455566, "learning_rate": 0.00019793156337304162, "loss": 7.4654, "step": 58750 }, { "epoch": 7.070998796630565, "grad_norm": 4.333553791046143, "learning_rate": 0.00019793079355172877, "loss": 7.5594, "step": 58760 }, { "epoch": 7.072202166064982, "grad_norm": 61.4433708190918, "learning_rate": 0.00019793002358868585, "loss": 7.5706, "step": 58770 }, { "epoch": 7.073405535499398, "grad_norm": 27.061952590942383, "learning_rate": 0.00019792925348391407, "loss": 7.5555, "step": 58780 }, { "epoch": 7.074608904933815, "grad_norm": 4.7676849365234375, "learning_rate": 0.00019792848323741443, "loss": 7.5421, "step": 58790 }, { "epoch": 7.075812274368231, "grad_norm": 8.331336975097656, "learning_rate": 0.00019792771284918813, "loss": 7.5539, "step": 58800 }, { "epoch": 7.077015643802647, "grad_norm": 3.2121715545654297, "learning_rate": 0.00019792694231923622, "loss": 7.5369, "step": 58810 }, { "epoch": 7.078219013237064, "grad_norm": 18.14316749572754, "learning_rate": 0.00019792617164755987, "loss": 7.4792, "step": 58820 }, { "epoch": 7.07942238267148, "grad_norm": 5.8300395011901855, "learning_rate": 0.0001979254008341602, "loss": 7.4685, "step": 58830 }, { "epoch": 7.080625752105896, "grad_norm": 7.983304500579834, "learning_rate": 0.00019792462987903823, "loss": 7.523, "step": 58840 }, { "epoch": 7.081829121540313, "grad_norm": 8.311152458190918, "learning_rate": 0.00019792385878219517, "loss": 7.559, "step": 58850 }, { "epoch": 7.0830324909747295, "grad_norm": 174.98464965820312, "learning_rate": 0.00019792308754363214, "loss": 7.7203, "step": 58860 }, { "epoch": 7.084235860409145, "grad_norm": 38.772727966308594, "learning_rate": 0.0001979223161633502, "loss": 7.9951, "step": 58870 }, { "epoch": 7.085439229843562, "grad_norm": 46.782081604003906, "learning_rate": 0.00019792154464135048, "loss": 8.0025, "step": 58880 }, { "epoch": 7.086642599277979, "grad_norm": 11.872475624084473, "learning_rate": 0.00019792077297763413, "loss": 7.9058, "step": 58890 }, { "epoch": 7.0878459687123945, "grad_norm": 13.866183280944824, "learning_rate": 0.00019792000117220226, "loss": 7.6373, "step": 58900 }, { "epoch": 7.089049338146811, "grad_norm": 19.396339416503906, "learning_rate": 0.00019791922922505594, "loss": 7.6002, "step": 58910 }, { "epoch": 7.090252707581228, "grad_norm": 61.84298324584961, "learning_rate": 0.00019791845713619635, "loss": 7.6912, "step": 58920 }, { "epoch": 7.091456077015644, "grad_norm": 22.066791534423828, "learning_rate": 0.00019791768490562457, "loss": 7.5435, "step": 58930 }, { "epoch": 7.09265944645006, "grad_norm": 16.87318992614746, "learning_rate": 0.0001979169125333417, "loss": 7.6051, "step": 58940 }, { "epoch": 7.093862815884476, "grad_norm": 7.5266008377075195, "learning_rate": 0.0001979161400193489, "loss": 7.5309, "step": 58950 }, { "epoch": 7.095066185318893, "grad_norm": 9.935173034667969, "learning_rate": 0.00019791536736364727, "loss": 7.5462, "step": 58960 }, { "epoch": 7.0962695547533094, "grad_norm": 48.00737380981445, "learning_rate": 0.00019791459456623792, "loss": 7.6156, "step": 58970 }, { "epoch": 7.097472924187725, "grad_norm": 63.169559478759766, "learning_rate": 0.000197913821627122, "loss": 7.5662, "step": 58980 }, { "epoch": 7.098676293622142, "grad_norm": 38.13153076171875, "learning_rate": 0.0001979130485463006, "loss": 7.5425, "step": 58990 }, { "epoch": 7.099879663056559, "grad_norm": 10.797197341918945, "learning_rate": 0.0001979122753237748, "loss": 7.6917, "step": 59000 }, { "epoch": 7.101083032490974, "grad_norm": 7.944900035858154, "learning_rate": 0.0001979115019595458, "loss": 7.6342, "step": 59010 }, { "epoch": 7.102286401925391, "grad_norm": 5.772441864013672, "learning_rate": 0.0001979107284536147, "loss": 7.5624, "step": 59020 }, { "epoch": 7.103489771359808, "grad_norm": 3.6065547466278076, "learning_rate": 0.0001979099548059826, "loss": 7.5784, "step": 59030 }, { "epoch": 7.1046931407942235, "grad_norm": 20.077173233032227, "learning_rate": 0.00019790918101665058, "loss": 7.6786, "step": 59040 }, { "epoch": 7.10589651022864, "grad_norm": 6.177094459533691, "learning_rate": 0.00019790840708561986, "loss": 7.6383, "step": 59050 }, { "epoch": 7.107099879663057, "grad_norm": 9.216825485229492, "learning_rate": 0.00019790763301289147, "loss": 7.6239, "step": 59060 }, { "epoch": 7.108303249097473, "grad_norm": 23.94330596923828, "learning_rate": 0.00019790685879846656, "loss": 7.7322, "step": 59070 }, { "epoch": 7.109506618531889, "grad_norm": 5.786437511444092, "learning_rate": 0.00019790608444234623, "loss": 7.5141, "step": 59080 }, { "epoch": 7.110709987966306, "grad_norm": 3.0996527671813965, "learning_rate": 0.0001979053099445317, "loss": 7.6377, "step": 59090 }, { "epoch": 7.111913357400722, "grad_norm": 8.763830184936523, "learning_rate": 0.00019790453530502396, "loss": 7.5446, "step": 59100 }, { "epoch": 7.1131167268351385, "grad_norm": 2.4363465309143066, "learning_rate": 0.00019790376052382418, "loss": 7.6016, "step": 59110 }, { "epoch": 7.114320096269555, "grad_norm": 3.958065986633301, "learning_rate": 0.00019790298560093349, "loss": 7.6158, "step": 59120 }, { "epoch": 7.115523465703971, "grad_norm": 9.829353332519531, "learning_rate": 0.00019790221053635302, "loss": 7.557, "step": 59130 }, { "epoch": 7.116726835138388, "grad_norm": 7.361923694610596, "learning_rate": 0.00019790143533008389, "loss": 7.5716, "step": 59140 }, { "epoch": 7.1179302045728035, "grad_norm": 5.595799446105957, "learning_rate": 0.0001979006599821272, "loss": 7.5141, "step": 59150 }, { "epoch": 7.11913357400722, "grad_norm": 6.777280807495117, "learning_rate": 0.00019789988449248408, "loss": 7.6255, "step": 59160 }, { "epoch": 7.120336943441637, "grad_norm": 13.410460472106934, "learning_rate": 0.00019789910886115566, "loss": 7.5592, "step": 59170 }, { "epoch": 7.121540312876053, "grad_norm": 8.098434448242188, "learning_rate": 0.00019789833308814304, "loss": 7.5126, "step": 59180 }, { "epoch": 7.122743682310469, "grad_norm": 3.511575937271118, "learning_rate": 0.00019789755717344737, "loss": 7.5427, "step": 59190 }, { "epoch": 7.123947051744886, "grad_norm": 5.895703315734863, "learning_rate": 0.00019789678111706977, "loss": 7.4838, "step": 59200 }, { "epoch": 7.125150421179302, "grad_norm": 584.3899536132812, "learning_rate": 0.00019789600491901136, "loss": 7.6162, "step": 59210 }, { "epoch": 7.126353790613718, "grad_norm": 7.532637119293213, "learning_rate": 0.00019789522857927328, "loss": 7.6417, "step": 59220 }, { "epoch": 7.127557160048135, "grad_norm": 4.052803993225098, "learning_rate": 0.00019789445209785663, "loss": 7.5396, "step": 59230 }, { "epoch": 7.128760529482551, "grad_norm": 4.805096626281738, "learning_rate": 0.0001978936754747625, "loss": 7.499, "step": 59240 }, { "epoch": 7.129963898916968, "grad_norm": 6.047024726867676, "learning_rate": 0.0001978928987099921, "loss": 7.4485, "step": 59250 }, { "epoch": 7.131167268351384, "grad_norm": 427.5868835449219, "learning_rate": 0.0001978921218035465, "loss": 7.5272, "step": 59260 }, { "epoch": 7.1323706377858, "grad_norm": 65.29325103759766, "learning_rate": 0.0001978913447554268, "loss": 8.2954, "step": 59270 }, { "epoch": 7.133574007220217, "grad_norm": 5.001557350158691, "learning_rate": 0.00019789056756563418, "loss": 8.0184, "step": 59280 }, { "epoch": 7.1347773766546325, "grad_norm": 13.563468933105469, "learning_rate": 0.00019788979023416974, "loss": 7.7271, "step": 59290 }, { "epoch": 7.135980746089049, "grad_norm": 4.750330924987793, "learning_rate": 0.00019788901276103458, "loss": 7.6179, "step": 59300 }, { "epoch": 7.137184115523466, "grad_norm": 3.3980605602264404, "learning_rate": 0.00019788823514622987, "loss": 7.5529, "step": 59310 }, { "epoch": 7.138387484957882, "grad_norm": 5.319908618927002, "learning_rate": 0.0001978874573897567, "loss": 7.514, "step": 59320 }, { "epoch": 7.139590854392298, "grad_norm": 3.2248036861419678, "learning_rate": 0.00019788667949161622, "loss": 7.5623, "step": 59330 }, { "epoch": 7.140794223826715, "grad_norm": 5.58474588394165, "learning_rate": 0.00019788590145180957, "loss": 7.6767, "step": 59340 }, { "epoch": 7.141997593261131, "grad_norm": 2.9544930458068848, "learning_rate": 0.00019788512327033786, "loss": 7.727, "step": 59350 }, { "epoch": 7.1432009626955475, "grad_norm": 1.7140352725982666, "learning_rate": 0.00019788434494720218, "loss": 7.4993, "step": 59360 }, { "epoch": 7.144404332129964, "grad_norm": 3.823256731033325, "learning_rate": 0.0001978835664824037, "loss": 7.538, "step": 59370 }, { "epoch": 7.14560770156438, "grad_norm": 4.857741832733154, "learning_rate": 0.0001978827878759435, "loss": 7.4998, "step": 59380 }, { "epoch": 7.146811070998797, "grad_norm": 22.899444580078125, "learning_rate": 0.00019788200912782277, "loss": 7.6122, "step": 59390 }, { "epoch": 7.148014440433213, "grad_norm": 5.2488837242126465, "learning_rate": 0.00019788123023804262, "loss": 7.5971, "step": 59400 }, { "epoch": 7.149217809867629, "grad_norm": 2.129460334777832, "learning_rate": 0.0001978804512066041, "loss": 7.5568, "step": 59410 }, { "epoch": 7.150421179302046, "grad_norm": 4.204578876495361, "learning_rate": 0.00019787967203350847, "loss": 7.5412, "step": 59420 }, { "epoch": 7.1516245487364625, "grad_norm": 3.908076763153076, "learning_rate": 0.00019787889271875677, "loss": 7.4903, "step": 59430 }, { "epoch": 7.152827918170878, "grad_norm": 3.013484001159668, "learning_rate": 0.00019787811326235012, "loss": 7.5484, "step": 59440 }, { "epoch": 7.154031287605295, "grad_norm": 11.153820991516113, "learning_rate": 0.00019787733366428972, "loss": 7.4296, "step": 59450 }, { "epoch": 7.155234657039712, "grad_norm": 4.014166831970215, "learning_rate": 0.00019787655392457663, "loss": 7.4667, "step": 59460 }, { "epoch": 7.156438026474127, "grad_norm": 5.698665142059326, "learning_rate": 0.000197875774043212, "loss": 7.529, "step": 59470 }, { "epoch": 7.157641395908544, "grad_norm": 12.536340713500977, "learning_rate": 0.00019787499402019695, "loss": 7.512, "step": 59480 }, { "epoch": 7.15884476534296, "grad_norm": 4.035852909088135, "learning_rate": 0.00019787421385553263, "loss": 7.4643, "step": 59490 }, { "epoch": 7.160048134777377, "grad_norm": 4.346449375152588, "learning_rate": 0.00019787343354922017, "loss": 7.4415, "step": 59500 }, { "epoch": 7.161251504211793, "grad_norm": 4.9524736404418945, "learning_rate": 0.00019787265310126066, "loss": 7.4527, "step": 59510 }, { "epoch": 7.162454873646209, "grad_norm": 8.1602144241333, "learning_rate": 0.00019787187251165528, "loss": 7.5317, "step": 59520 }, { "epoch": 7.163658243080626, "grad_norm": 21.558897018432617, "learning_rate": 0.00019787109178040512, "loss": 7.6129, "step": 59530 }, { "epoch": 7.164861612515042, "grad_norm": 4.18744421005249, "learning_rate": 0.00019787031090751133, "loss": 7.6206, "step": 59540 }, { "epoch": 7.166064981949458, "grad_norm": 4.385098934173584, "learning_rate": 0.00019786952989297505, "loss": 7.4972, "step": 59550 }, { "epoch": 7.167268351383875, "grad_norm": 7.401921272277832, "learning_rate": 0.00019786874873679739, "loss": 7.4209, "step": 59560 }, { "epoch": 7.1684717208182915, "grad_norm": 761.7949829101562, "learning_rate": 0.0001978679674389795, "loss": 7.4231, "step": 59570 }, { "epoch": 7.169675090252707, "grad_norm": 569.0382690429688, "learning_rate": 0.0001978671859995225, "loss": 7.4035, "step": 59580 }, { "epoch": 7.170878459687124, "grad_norm": 289.3324890136719, "learning_rate": 0.00019786640441842748, "loss": 7.9095, "step": 59590 }, { "epoch": 7.172081829121541, "grad_norm": 3.3403782844543457, "learning_rate": 0.00019786562269569563, "loss": 7.854, "step": 59600 }, { "epoch": 7.1732851985559565, "grad_norm": 207.3368682861328, "learning_rate": 0.00019786484083132808, "loss": 7.621, "step": 59610 }, { "epoch": 7.174488567990373, "grad_norm": 6.522215843200684, "learning_rate": 0.00019786405882532592, "loss": 7.569, "step": 59620 }, { "epoch": 7.17569193742479, "grad_norm": 556.5330810546875, "learning_rate": 0.0001978632766776903, "loss": 7.5669, "step": 59630 }, { "epoch": 7.176895306859206, "grad_norm": 3.6186258792877197, "learning_rate": 0.00019786249438842238, "loss": 7.4964, "step": 59640 }, { "epoch": 7.178098676293622, "grad_norm": 5.747904300689697, "learning_rate": 0.00019786171195752329, "loss": 7.5989, "step": 59650 }, { "epoch": 7.179302045728038, "grad_norm": 17.137287139892578, "learning_rate": 0.0001978609293849941, "loss": 7.498, "step": 59660 }, { "epoch": 7.180505415162455, "grad_norm": 3.883235216140747, "learning_rate": 0.000197860146670836, "loss": 7.5472, "step": 59670 }, { "epoch": 7.1817087845968715, "grad_norm": 4.300616264343262, "learning_rate": 0.0001978593638150501, "loss": 7.5632, "step": 59680 }, { "epoch": 7.182912154031287, "grad_norm": 1.7276138067245483, "learning_rate": 0.00019785858081763756, "loss": 7.6019, "step": 59690 }, { "epoch": 7.184115523465704, "grad_norm": 5.939334392547607, "learning_rate": 0.00019785779767859946, "loss": 7.5971, "step": 59700 }, { "epoch": 7.185318892900121, "grad_norm": 6.778968334197998, "learning_rate": 0.00019785701439793698, "loss": 7.5578, "step": 59710 }, { "epoch": 7.186522262334536, "grad_norm": 7.693382263183594, "learning_rate": 0.00019785623097565125, "loss": 7.6227, "step": 59720 }, { "epoch": 7.187725631768953, "grad_norm": 5.100826263427734, "learning_rate": 0.00019785544741174338, "loss": 7.4925, "step": 59730 }, { "epoch": 7.18892900120337, "grad_norm": 3.846651554107666, "learning_rate": 0.00019785466370621452, "loss": 7.5773, "step": 59740 }, { "epoch": 7.1901323706377855, "grad_norm": 6.780317783355713, "learning_rate": 0.00019785387985906582, "loss": 7.5043, "step": 59750 }, { "epoch": 7.191335740072202, "grad_norm": 6.981686115264893, "learning_rate": 0.00019785309587029839, "loss": 7.4389, "step": 59760 }, { "epoch": 7.192539109506619, "grad_norm": 16.288442611694336, "learning_rate": 0.00019785231173991336, "loss": 7.4425, "step": 59770 }, { "epoch": 7.193742478941035, "grad_norm": 26.31230926513672, "learning_rate": 0.00019785152746791188, "loss": 7.4794, "step": 59780 }, { "epoch": 7.194945848375451, "grad_norm": 19.139209747314453, "learning_rate": 0.00019785074305429508, "loss": 7.5565, "step": 59790 }, { "epoch": 7.196149217809868, "grad_norm": 67.08979797363281, "learning_rate": 0.0001978499584990641, "loss": 7.5342, "step": 59800 }, { "epoch": 7.197352587244284, "grad_norm": 17.288860321044922, "learning_rate": 0.0001978491738022201, "loss": 7.5384, "step": 59810 }, { "epoch": 7.1985559566787005, "grad_norm": 74.42042541503906, "learning_rate": 0.00019784838896376412, "loss": 7.4855, "step": 59820 }, { "epoch": 7.199759326113116, "grad_norm": 15.520068168640137, "learning_rate": 0.00019784760398369738, "loss": 7.4961, "step": 59830 }, { "epoch": 7.200962695547533, "grad_norm": 12.522257804870605, "learning_rate": 0.00019784681886202105, "loss": 7.4992, "step": 59840 }, { "epoch": 7.20216606498195, "grad_norm": 29.99500274658203, "learning_rate": 0.00019784603359873618, "loss": 7.5273, "step": 59850 }, { "epoch": 7.2033694344163655, "grad_norm": 51.02986526489258, "learning_rate": 0.00019784524819384394, "loss": 7.4981, "step": 59860 }, { "epoch": 7.204572803850782, "grad_norm": 30.489192962646484, "learning_rate": 0.00019784446264734545, "loss": 7.4909, "step": 59870 }, { "epoch": 7.205776173285199, "grad_norm": 33.188053131103516, "learning_rate": 0.0001978436769592419, "loss": 7.4641, "step": 59880 }, { "epoch": 7.206979542719615, "grad_norm": 19.068748474121094, "learning_rate": 0.00019784289112953442, "loss": 7.5546, "step": 59890 }, { "epoch": 7.208182912154031, "grad_norm": 30.090360641479492, "learning_rate": 0.00019784210515822408, "loss": 7.4671, "step": 59900 }, { "epoch": 7.209386281588448, "grad_norm": 29.924680709838867, "learning_rate": 0.00019784131904531204, "loss": 7.402, "step": 59910 }, { "epoch": 7.210589651022864, "grad_norm": 37.59492111206055, "learning_rate": 0.00019784053279079948, "loss": 7.4358, "step": 59920 }, { "epoch": 7.21179302045728, "grad_norm": 55.84379196166992, "learning_rate": 0.00019783974639468749, "loss": 7.5398, "step": 59930 }, { "epoch": 7.212996389891697, "grad_norm": 82.36194610595703, "learning_rate": 0.00019783895985697728, "loss": 7.4889, "step": 59940 }, { "epoch": 7.214199759326113, "grad_norm": 32.53596115112305, "learning_rate": 0.00019783817317766992, "loss": 7.4392, "step": 59950 }, { "epoch": 7.21540312876053, "grad_norm": 46.800846099853516, "learning_rate": 0.00019783738635676654, "loss": 7.5065, "step": 59960 }, { "epoch": 7.216606498194946, "grad_norm": 114.05599212646484, "learning_rate": 0.0001978365993942683, "loss": 7.4845, "step": 59970 }, { "epoch": 7.217809867629362, "grad_norm": 48.833648681640625, "learning_rate": 0.0001978358122901764, "loss": 7.4846, "step": 59980 }, { "epoch": 7.219013237063779, "grad_norm": 27.91322135925293, "learning_rate": 0.0001978350250444919, "loss": 7.4814, "step": 59990 }, { "epoch": 7.2202166064981945, "grad_norm": 42.391719818115234, "learning_rate": 0.00019783423765721593, "loss": 7.4513, "step": 60000 }, { "epoch": 7.221419975932611, "grad_norm": 27.37371253967285, "learning_rate": 0.0001978334501283497, "loss": 7.5, "step": 60010 }, { "epoch": 7.222623345367028, "grad_norm": 64.112548828125, "learning_rate": 0.0001978326624578943, "loss": 7.4711, "step": 60020 }, { "epoch": 7.223826714801444, "grad_norm": 85.39812469482422, "learning_rate": 0.0001978318746458509, "loss": 7.5022, "step": 60030 }, { "epoch": 7.22503008423586, "grad_norm": 17.54473876953125, "learning_rate": 0.0001978310866922206, "loss": 7.4464, "step": 60040 }, { "epoch": 7.226233453670277, "grad_norm": 46.40385818481445, "learning_rate": 0.0001978302985970046, "loss": 7.4783, "step": 60050 }, { "epoch": 7.227436823104693, "grad_norm": 23.35736656188965, "learning_rate": 0.00019782951036020397, "loss": 7.4859, "step": 60060 }, { "epoch": 7.2286401925391095, "grad_norm": 36.57982635498047, "learning_rate": 0.0001978287219818199, "loss": 7.5214, "step": 60070 }, { "epoch": 7.229843561973526, "grad_norm": 75.2618179321289, "learning_rate": 0.0001978279334618535, "loss": 7.3987, "step": 60080 }, { "epoch": 7.231046931407942, "grad_norm": 44.916038513183594, "learning_rate": 0.00019782714480030594, "loss": 7.4643, "step": 60090 }, { "epoch": 7.232250300842359, "grad_norm": 57.88205337524414, "learning_rate": 0.00019782635599717836, "loss": 7.4747, "step": 60100 }, { "epoch": 7.233453670276775, "grad_norm": 77.02476501464844, "learning_rate": 0.00019782556705247187, "loss": 7.5516, "step": 60110 }, { "epoch": 7.234657039711191, "grad_norm": 278.16485595703125, "learning_rate": 0.00019782477796618765, "loss": 7.4994, "step": 60120 }, { "epoch": 7.235860409145608, "grad_norm": 102.18984985351562, "learning_rate": 0.00019782398873832683, "loss": 7.3611, "step": 60130 }, { "epoch": 7.2370637785800245, "grad_norm": 136.10565185546875, "learning_rate": 0.0001978231993688905, "loss": 7.5627, "step": 60140 }, { "epoch": 7.23826714801444, "grad_norm": 67.03374481201172, "learning_rate": 0.0001978224098578799, "loss": 7.4815, "step": 60150 }, { "epoch": 7.239470517448857, "grad_norm": 155.654541015625, "learning_rate": 0.0001978216202052961, "loss": 7.5172, "step": 60160 }, { "epoch": 7.240673886883273, "grad_norm": 115.03605651855469, "learning_rate": 0.00019782083041114027, "loss": 7.5528, "step": 60170 }, { "epoch": 7.241877256317689, "grad_norm": 104.7567138671875, "learning_rate": 0.00019782004047541355, "loss": 7.5031, "step": 60180 }, { "epoch": 7.243080625752106, "grad_norm": 42.97132873535156, "learning_rate": 0.00019781925039811707, "loss": 7.477, "step": 60190 }, { "epoch": 7.244283995186522, "grad_norm": 48.480133056640625, "learning_rate": 0.00019781846017925196, "loss": 7.5043, "step": 60200 }, { "epoch": 7.245487364620939, "grad_norm": 42.670719146728516, "learning_rate": 0.00019781766981881945, "loss": 7.49, "step": 60210 }, { "epoch": 7.246690734055355, "grad_norm": 72.44229125976562, "learning_rate": 0.00019781687931682055, "loss": 7.5018, "step": 60220 }, { "epoch": 7.247894103489771, "grad_norm": 96.29901885986328, "learning_rate": 0.00019781608867325653, "loss": 7.3994, "step": 60230 }, { "epoch": 7.249097472924188, "grad_norm": 29.10788917541504, "learning_rate": 0.00019781529788812845, "loss": 7.5485, "step": 60240 }, { "epoch": 7.250300842358604, "grad_norm": 52.739845275878906, "learning_rate": 0.0001978145069614375, "loss": 7.4365, "step": 60250 }, { "epoch": 7.25150421179302, "grad_norm": 64.79094696044922, "learning_rate": 0.0001978137158931848, "loss": 7.5318, "step": 60260 }, { "epoch": 7.252707581227437, "grad_norm": 53.2910270690918, "learning_rate": 0.0001978129246833715, "loss": 7.5018, "step": 60270 }, { "epoch": 7.2539109506618535, "grad_norm": 33.322811126708984, "learning_rate": 0.00019781213333199874, "loss": 7.4412, "step": 60280 }, { "epoch": 7.255114320096269, "grad_norm": 45.863426208496094, "learning_rate": 0.0001978113418390677, "loss": 7.4449, "step": 60290 }, { "epoch": 7.256317689530686, "grad_norm": 61.004669189453125, "learning_rate": 0.0001978105502045795, "loss": 7.438, "step": 60300 }, { "epoch": 7.257521058965103, "grad_norm": 38.47679901123047, "learning_rate": 0.00019780975842853527, "loss": 7.5001, "step": 60310 }, { "epoch": 7.2587244283995185, "grad_norm": 135.99485778808594, "learning_rate": 0.0001978089665109362, "loss": 7.518, "step": 60320 }, { "epoch": 7.259927797833935, "grad_norm": 31.63204574584961, "learning_rate": 0.00019780817445178337, "loss": 7.5673, "step": 60330 }, { "epoch": 7.261131167268351, "grad_norm": 28.619653701782227, "learning_rate": 0.00019780738225107797, "loss": 7.5921, "step": 60340 }, { "epoch": 7.262334536702768, "grad_norm": 56.345943450927734, "learning_rate": 0.00019780658990882114, "loss": 7.4882, "step": 60350 }, { "epoch": 7.263537906137184, "grad_norm": 49.259464263916016, "learning_rate": 0.00019780579742501405, "loss": 7.5011, "step": 60360 }, { "epoch": 7.2647412755716, "grad_norm": 42.734397888183594, "learning_rate": 0.0001978050047996578, "loss": 7.4518, "step": 60370 }, { "epoch": 7.265944645006017, "grad_norm": 55.917720794677734, "learning_rate": 0.00019780421203275357, "loss": 7.3579, "step": 60380 }, { "epoch": 7.2671480144404335, "grad_norm": 44.459991455078125, "learning_rate": 0.0001978034191243025, "loss": 7.5129, "step": 60390 }, { "epoch": 7.268351383874849, "grad_norm": 58.6427116394043, "learning_rate": 0.00019780262607430572, "loss": 7.4381, "step": 60400 }, { "epoch": 7.269554753309266, "grad_norm": 100.82861328125, "learning_rate": 0.0001978018328827644, "loss": 7.4171, "step": 60410 }, { "epoch": 7.270758122743683, "grad_norm": 54.6920051574707, "learning_rate": 0.0001978010395496797, "loss": 7.4001, "step": 60420 }, { "epoch": 7.271961492178098, "grad_norm": 71.27852630615234, "learning_rate": 0.00019780024607505273, "loss": 7.4267, "step": 60430 }, { "epoch": 7.273164861612515, "grad_norm": 61.42023849487305, "learning_rate": 0.00019779945245888467, "loss": 7.4185, "step": 60440 }, { "epoch": 7.274368231046932, "grad_norm": 60.58082962036133, "learning_rate": 0.00019779865870117664, "loss": 7.4937, "step": 60450 }, { "epoch": 7.2755716004813475, "grad_norm": 57.872398376464844, "learning_rate": 0.00019779786480192982, "loss": 7.4687, "step": 60460 }, { "epoch": 7.276774969915764, "grad_norm": 78.12239074707031, "learning_rate": 0.00019779707076114536, "loss": 7.4062, "step": 60470 }, { "epoch": 7.277978339350181, "grad_norm": 53.899505615234375, "learning_rate": 0.00019779627657882435, "loss": 7.4423, "step": 60480 }, { "epoch": 7.279181708784597, "grad_norm": 70.50180053710938, "learning_rate": 0.000197795482254968, "loss": 7.4284, "step": 60490 }, { "epoch": 7.280385078219013, "grad_norm": 74.60441589355469, "learning_rate": 0.00019779468778957746, "loss": 7.4769, "step": 60500 }, { "epoch": 7.28158844765343, "grad_norm": 85.81940460205078, "learning_rate": 0.00019779389318265385, "loss": 7.476, "step": 60510 }, { "epoch": 7.282791817087846, "grad_norm": 88.97525024414062, "learning_rate": 0.00019779309843419833, "loss": 7.4764, "step": 60520 }, { "epoch": 7.2839951865222625, "grad_norm": 108.99325561523438, "learning_rate": 0.00019779230354421206, "loss": 7.4186, "step": 60530 }, { "epoch": 7.285198555956678, "grad_norm": 56.356689453125, "learning_rate": 0.00019779150851269618, "loss": 7.4352, "step": 60540 }, { "epoch": 7.286401925391095, "grad_norm": 128.97547912597656, "learning_rate": 0.00019779071333965185, "loss": 7.3784, "step": 60550 }, { "epoch": 7.287605294825512, "grad_norm": 103.07829284667969, "learning_rate": 0.0001977899180250802, "loss": 7.4154, "step": 60560 }, { "epoch": 7.2888086642599275, "grad_norm": 198.7111053466797, "learning_rate": 0.0001977891225689824, "loss": 7.4376, "step": 60570 }, { "epoch": 7.290012033694344, "grad_norm": 384.5240173339844, "learning_rate": 0.00019778832697135959, "loss": 7.4949, "step": 60580 }, { "epoch": 7.291215403128761, "grad_norm": 413.0855407714844, "learning_rate": 0.00019778753123221292, "loss": 7.4138, "step": 60590 }, { "epoch": 7.292418772563177, "grad_norm": 143.67127990722656, "learning_rate": 0.00019778673535154355, "loss": 7.4467, "step": 60600 }, { "epoch": 7.293622141997593, "grad_norm": 300.8535461425781, "learning_rate": 0.00019778593932935265, "loss": 7.4996, "step": 60610 }, { "epoch": 7.29482551143201, "grad_norm": 280.2632141113281, "learning_rate": 0.00019778514316564136, "loss": 7.3682, "step": 60620 }, { "epoch": 7.296028880866426, "grad_norm": 215.53408813476562, "learning_rate": 0.0001977843468604108, "loss": 7.6457, "step": 60630 }, { "epoch": 7.297232250300842, "grad_norm": 88.64797973632812, "learning_rate": 0.00019778355041366217, "loss": 9.2716, "step": 60640 }, { "epoch": 7.298435619735259, "grad_norm": 400.6197204589844, "learning_rate": 0.0001977827538253966, "loss": 8.1227, "step": 60650 }, { "epoch": 7.299638989169675, "grad_norm": 204.63186645507812, "learning_rate": 0.00019778195709561523, "loss": 7.9466, "step": 60660 }, { "epoch": 7.300842358604092, "grad_norm": 142.33651733398438, "learning_rate": 0.00019778116022431923, "loss": 7.8356, "step": 60670 }, { "epoch": 7.302045728038507, "grad_norm": 355.62103271484375, "learning_rate": 0.00019778036321150974, "loss": 8.2918, "step": 60680 }, { "epoch": 7.303249097472924, "grad_norm": 39.75912857055664, "learning_rate": 0.00019777956605718794, "loss": 8.4857, "step": 60690 }, { "epoch": 7.304452466907341, "grad_norm": 118.32836151123047, "learning_rate": 0.00019777876876135494, "loss": 7.6227, "step": 60700 }, { "epoch": 7.3056558363417565, "grad_norm": 36.87958908081055, "learning_rate": 0.00019777797132401195, "loss": 7.594, "step": 60710 }, { "epoch": 7.306859205776173, "grad_norm": 28.497652053833008, "learning_rate": 0.0001977771737451601, "loss": 7.4891, "step": 60720 }, { "epoch": 7.30806257521059, "grad_norm": 28.90912628173828, "learning_rate": 0.0001977763760248005, "loss": 7.5382, "step": 60730 }, { "epoch": 7.309265944645006, "grad_norm": 24.26543617248535, "learning_rate": 0.00019777557816293438, "loss": 7.4958, "step": 60740 }, { "epoch": 7.310469314079422, "grad_norm": 38.23131561279297, "learning_rate": 0.00019777478015956285, "loss": 7.4338, "step": 60750 }, { "epoch": 7.311672683513839, "grad_norm": 25.858304977416992, "learning_rate": 0.00019777398201468706, "loss": 7.4713, "step": 60760 }, { "epoch": 7.312876052948255, "grad_norm": 44.17701721191406, "learning_rate": 0.0001977731837283082, "loss": 7.5925, "step": 60770 }, { "epoch": 7.3140794223826715, "grad_norm": 63.03474426269531, "learning_rate": 0.00019777238530042742, "loss": 7.4646, "step": 60780 }, { "epoch": 7.315282791817088, "grad_norm": 72.42682647705078, "learning_rate": 0.00019777158673104582, "loss": 7.5022, "step": 60790 }, { "epoch": 7.316486161251504, "grad_norm": 64.22356414794922, "learning_rate": 0.0001977707880201646, "loss": 7.5057, "step": 60800 }, { "epoch": 7.317689530685921, "grad_norm": 38.25808334350586, "learning_rate": 0.00019776998916778494, "loss": 7.4618, "step": 60810 }, { "epoch": 7.318892900120337, "grad_norm": 143.3399200439453, "learning_rate": 0.0001977691901739079, "loss": 7.4346, "step": 60820 }, { "epoch": 7.320096269554753, "grad_norm": 87.22521209716797, "learning_rate": 0.00019776839103853477, "loss": 7.4397, "step": 60830 }, { "epoch": 7.32129963898917, "grad_norm": 66.5234603881836, "learning_rate": 0.0001977675917616666, "loss": 7.4557, "step": 60840 }, { "epoch": 7.3225030084235865, "grad_norm": 62.35301971435547, "learning_rate": 0.00019776679234330462, "loss": 7.4729, "step": 60850 }, { "epoch": 7.323706377858002, "grad_norm": 69.43376159667969, "learning_rate": 0.00019776599278344993, "loss": 7.4977, "step": 60860 }, { "epoch": 7.324909747292419, "grad_norm": 86.56552124023438, "learning_rate": 0.00019776519308210372, "loss": 7.4436, "step": 60870 }, { "epoch": 7.326113116726835, "grad_norm": 94.27783203125, "learning_rate": 0.00019776439323926717, "loss": 7.4941, "step": 60880 }, { "epoch": 7.327316486161251, "grad_norm": 61.91367721557617, "learning_rate": 0.00019776359325494137, "loss": 7.4784, "step": 60890 }, { "epoch": 7.328519855595668, "grad_norm": 97.73733520507812, "learning_rate": 0.0001977627931291275, "loss": 7.4395, "step": 60900 }, { "epoch": 7.329723225030084, "grad_norm": 66.14411163330078, "learning_rate": 0.00019776199286182676, "loss": 7.4584, "step": 60910 }, { "epoch": 7.330926594464501, "grad_norm": 65.3894271850586, "learning_rate": 0.00019776119245304026, "loss": 7.5061, "step": 60920 }, { "epoch": 7.332129963898917, "grad_norm": 69.93453979492188, "learning_rate": 0.0001977603919027692, "loss": 7.3627, "step": 60930 }, { "epoch": 7.333333333333333, "grad_norm": 63.80093765258789, "learning_rate": 0.00019775959121101468, "loss": 7.5226, "step": 60940 }, { "epoch": 7.33453670276775, "grad_norm": 106.47486114501953, "learning_rate": 0.0001977587903777779, "loss": 7.4226, "step": 60950 }, { "epoch": 7.335740072202166, "grad_norm": 314.5417785644531, "learning_rate": 0.00019775798940306006, "loss": 7.4833, "step": 60960 }, { "epoch": 7.336943441636582, "grad_norm": 137.12054443359375, "learning_rate": 0.00019775718828686224, "loss": 7.4836, "step": 60970 }, { "epoch": 7.338146811070999, "grad_norm": 115.8062973022461, "learning_rate": 0.00019775638702918563, "loss": 7.3838, "step": 60980 }, { "epoch": 7.3393501805054155, "grad_norm": 128.8003387451172, "learning_rate": 0.0001977555856300314, "loss": 7.5599, "step": 60990 }, { "epoch": 7.340553549939831, "grad_norm": 103.74386596679688, "learning_rate": 0.0001977547840894007, "loss": 7.4646, "step": 61000 }, { "epoch": 7.341756919374248, "grad_norm": 101.12113952636719, "learning_rate": 0.0001977539824072947, "loss": 7.4665, "step": 61010 }, { "epoch": 7.342960288808664, "grad_norm": 116.65435028076172, "learning_rate": 0.00019775318058371457, "loss": 7.476, "step": 61020 }, { "epoch": 7.3441636582430805, "grad_norm": 67.12080383300781, "learning_rate": 0.00019775237861866142, "loss": 7.4221, "step": 61030 }, { "epoch": 7.345367027677497, "grad_norm": 93.72984313964844, "learning_rate": 0.00019775157651213645, "loss": 7.418, "step": 61040 }, { "epoch": 7.346570397111913, "grad_norm": 130.2222442626953, "learning_rate": 0.00019775077426414085, "loss": 7.412, "step": 61050 }, { "epoch": 7.34777376654633, "grad_norm": 134.0852813720703, "learning_rate": 0.00019774997187467573, "loss": 7.37, "step": 61060 }, { "epoch": 7.348977135980746, "grad_norm": 255.88497924804688, "learning_rate": 0.00019774916934374223, "loss": 7.463, "step": 61070 }, { "epoch": 7.350180505415162, "grad_norm": 97.97000122070312, "learning_rate": 0.00019774836667134157, "loss": 7.436, "step": 61080 }, { "epoch": 7.351383874849579, "grad_norm": 79.88434600830078, "learning_rate": 0.0001977475638574749, "loss": 7.4323, "step": 61090 }, { "epoch": 7.3525872442839955, "grad_norm": 204.21705627441406, "learning_rate": 0.00019774676090214338, "loss": 7.2863, "step": 61100 }, { "epoch": 7.353790613718411, "grad_norm": 111.7914047241211, "learning_rate": 0.00019774595780534812, "loss": 7.4165, "step": 61110 }, { "epoch": 7.354993983152828, "grad_norm": 159.9964599609375, "learning_rate": 0.00019774515456709034, "loss": 7.3932, "step": 61120 }, { "epoch": 7.356197352587245, "grad_norm": 174.8877716064453, "learning_rate": 0.00019774435118737125, "loss": 7.488, "step": 61130 }, { "epoch": 7.35740072202166, "grad_norm": 81.28499603271484, "learning_rate": 0.00019774354766619186, "loss": 7.4149, "step": 61140 }, { "epoch": 7.358604091456077, "grad_norm": 166.77029418945312, "learning_rate": 0.00019774274400355347, "loss": 7.3922, "step": 61150 }, { "epoch": 7.359807460890494, "grad_norm": 98.10020446777344, "learning_rate": 0.00019774194019945722, "loss": 7.4174, "step": 61160 }, { "epoch": 7.3610108303249095, "grad_norm": 199.62445068359375, "learning_rate": 0.00019774113625390424, "loss": 7.4153, "step": 61170 }, { "epoch": 7.362214199759326, "grad_norm": 248.86231994628906, "learning_rate": 0.0001977403321668957, "loss": 7.3757, "step": 61180 }, { "epoch": 7.363417569193743, "grad_norm": 169.08200073242188, "learning_rate": 0.00019773952793843274, "loss": 7.4045, "step": 61190 }, { "epoch": 7.364620938628159, "grad_norm": 237.693359375, "learning_rate": 0.00019773872356851658, "loss": 7.3645, "step": 61200 }, { "epoch": 7.365824308062575, "grad_norm": 181.377685546875, "learning_rate": 0.00019773791905714836, "loss": 7.4149, "step": 61210 }, { "epoch": 7.367027677496991, "grad_norm": 570.8060302734375, "learning_rate": 0.00019773711440432924, "loss": 7.482, "step": 61220 }, { "epoch": 7.368231046931408, "grad_norm": 692.4110107421875, "learning_rate": 0.00019773630961006036, "loss": 7.43, "step": 61230 }, { "epoch": 7.3694344163658245, "grad_norm": 145.51316833496094, "learning_rate": 0.00019773550467434291, "loss": 7.4216, "step": 61240 }, { "epoch": 7.37063778580024, "grad_norm": 515.2535400390625, "learning_rate": 0.00019773469959717807, "loss": 7.3935, "step": 61250 }, { "epoch": 7.371841155234657, "grad_norm": 300.3969421386719, "learning_rate": 0.000197733894378567, "loss": 7.4292, "step": 61260 }, { "epoch": 7.373044524669074, "grad_norm": 316.3150634765625, "learning_rate": 0.00019773308901851084, "loss": 7.4575, "step": 61270 }, { "epoch": 7.3742478941034895, "grad_norm": 779.3385009765625, "learning_rate": 0.00019773228351701078, "loss": 7.5211, "step": 61280 }, { "epoch": 7.375451263537906, "grad_norm": 161.46609497070312, "learning_rate": 0.00019773147787406796, "loss": 7.4393, "step": 61290 }, { "epoch": 7.376654632972323, "grad_norm": 468.7433166503906, "learning_rate": 0.00019773067208968357, "loss": 7.3958, "step": 61300 }, { "epoch": 7.377858002406739, "grad_norm": 550.7386474609375, "learning_rate": 0.00019772986616385877, "loss": 7.4006, "step": 61310 }, { "epoch": 7.379061371841155, "grad_norm": 420.22088623046875, "learning_rate": 0.00019772906009659472, "loss": 7.4031, "step": 61320 }, { "epoch": 7.380264741275572, "grad_norm": 334.4451599121094, "learning_rate": 0.00019772825388789263, "loss": 7.5428, "step": 61330 }, { "epoch": 7.381468110709988, "grad_norm": 465.0753173828125, "learning_rate": 0.00019772744753775357, "loss": 7.583, "step": 61340 }, { "epoch": 7.382671480144404, "grad_norm": 882.8710327148438, "learning_rate": 0.0001977266410461788, "loss": 7.404, "step": 61350 }, { "epoch": 7.38387484957882, "grad_norm": 407.3564453125, "learning_rate": 0.00019772583441316944, "loss": 7.6056, "step": 61360 }, { "epoch": 7.385078219013237, "grad_norm": 195.94752502441406, "learning_rate": 0.00019772502763872667, "loss": 7.565, "step": 61370 }, { "epoch": 7.386281588447654, "grad_norm": 758.3787231445312, "learning_rate": 0.00019772422072285165, "loss": 7.4727, "step": 61380 }, { "epoch": 7.387484957882069, "grad_norm": 115.20805358886719, "learning_rate": 0.00019772341366554557, "loss": 7.6022, "step": 61390 }, { "epoch": 7.388688327316486, "grad_norm": 900.2510986328125, "learning_rate": 0.0001977226064668096, "loss": 7.4808, "step": 61400 }, { "epoch": 7.389891696750903, "grad_norm": 91.54452514648438, "learning_rate": 0.00019772179912664486, "loss": 7.5298, "step": 61410 }, { "epoch": 7.3910950661853185, "grad_norm": 98.03681182861328, "learning_rate": 0.00019772099164505258, "loss": 7.7436, "step": 61420 }, { "epoch": 7.392298435619735, "grad_norm": 71757.5078125, "learning_rate": 0.00019772018402203388, "loss": 7.7631, "step": 61430 }, { "epoch": 7.393501805054152, "grad_norm": 6845.66796875, "learning_rate": 0.00019771937625758993, "loss": 8.3932, "step": 61440 }, { "epoch": 7.394705174488568, "grad_norm": 3221.813232421875, "learning_rate": 0.00019771856835172193, "loss": 8.2226, "step": 61450 }, { "epoch": 7.395908543922984, "grad_norm": 7.67422342300415, "learning_rate": 0.00019771776030443104, "loss": 8.0501, "step": 61460 }, { "epoch": 7.397111913357401, "grad_norm": 7.596240520477295, "learning_rate": 0.00019771695211571845, "loss": 7.731, "step": 61470 }, { "epoch": 7.398315282791817, "grad_norm": 2.8515572547912598, "learning_rate": 0.00019771614378558526, "loss": 7.6125, "step": 61480 }, { "epoch": 7.3995186522262335, "grad_norm": 4.044766426086426, "learning_rate": 0.00019771533531403273, "loss": 7.5527, "step": 61490 }, { "epoch": 7.40072202166065, "grad_norm": 2.84025239944458, "learning_rate": 0.00019771452670106195, "loss": 7.6032, "step": 61500 }, { "epoch": 7.401925391095066, "grad_norm": 6.143985271453857, "learning_rate": 0.00019771371794667414, "loss": 7.5401, "step": 61510 }, { "epoch": 7.403128760529483, "grad_norm": 3.8663153648376465, "learning_rate": 0.00019771290905087042, "loss": 7.5409, "step": 61520 }, { "epoch": 7.404332129963899, "grad_norm": 3.899678945541382, "learning_rate": 0.00019771210001365204, "loss": 7.6064, "step": 61530 }, { "epoch": 7.405535499398315, "grad_norm": 9.156954765319824, "learning_rate": 0.00019771129083502012, "loss": 7.6168, "step": 61540 }, { "epoch": 7.406738868832732, "grad_norm": 7.387956619262695, "learning_rate": 0.00019771048151497582, "loss": 7.5788, "step": 61550 }, { "epoch": 7.4079422382671485, "grad_norm": 5.763236999511719, "learning_rate": 0.00019770967205352035, "loss": 7.5211, "step": 61560 }, { "epoch": 7.409145607701564, "grad_norm": 2.982830047607422, "learning_rate": 0.00019770886245065486, "loss": 7.6556, "step": 61570 }, { "epoch": 7.410348977135981, "grad_norm": 5.544549942016602, "learning_rate": 0.00019770805270638052, "loss": 7.5588, "step": 61580 }, { "epoch": 7.411552346570397, "grad_norm": 8.750646591186523, "learning_rate": 0.00019770724282069848, "loss": 7.652, "step": 61590 }, { "epoch": 7.412755716004813, "grad_norm": 4.386682033538818, "learning_rate": 0.00019770643279360997, "loss": 7.591, "step": 61600 }, { "epoch": 7.41395908543923, "grad_norm": 6.326420783996582, "learning_rate": 0.0001977056226251161, "loss": 7.6151, "step": 61610 }, { "epoch": 7.415162454873646, "grad_norm": 4.076996326446533, "learning_rate": 0.0001977048123152181, "loss": 7.5841, "step": 61620 }, { "epoch": 7.416365824308063, "grad_norm": 2.429680824279785, "learning_rate": 0.00019770400186391708, "loss": 7.5782, "step": 61630 }, { "epoch": 7.417569193742479, "grad_norm": 3.590900421142578, "learning_rate": 0.00019770319127121425, "loss": 7.5815, "step": 61640 }, { "epoch": 7.418772563176895, "grad_norm": 5.025684833526611, "learning_rate": 0.0001977023805371108, "loss": 7.5659, "step": 61650 }, { "epoch": 7.419975932611312, "grad_norm": 1.833849549293518, "learning_rate": 0.0001977015696616079, "loss": 7.5136, "step": 61660 }, { "epoch": 7.421179302045728, "grad_norm": 2.585576057434082, "learning_rate": 0.00019770075864470666, "loss": 7.6369, "step": 61670 }, { "epoch": 7.422382671480144, "grad_norm": 1.9070616960525513, "learning_rate": 0.00019769994748640833, "loss": 7.6164, "step": 61680 }, { "epoch": 7.423586040914561, "grad_norm": 8.042707443237305, "learning_rate": 0.00019769913618671405, "loss": 7.6009, "step": 61690 }, { "epoch": 7.4247894103489775, "grad_norm": 1.3596677780151367, "learning_rate": 0.000197698324745625, "loss": 7.5459, "step": 61700 }, { "epoch": 7.425992779783393, "grad_norm": 3.9862680435180664, "learning_rate": 0.00019769751316314237, "loss": 7.6482, "step": 61710 }, { "epoch": 7.42719614921781, "grad_norm": 9.697206497192383, "learning_rate": 0.0001976967014392673, "loss": 7.7043, "step": 61720 }, { "epoch": 7.428399518652226, "grad_norm": 6.146006107330322, "learning_rate": 0.00019769588957400095, "loss": 7.6642, "step": 61730 }, { "epoch": 7.4296028880866425, "grad_norm": 7.936337471008301, "learning_rate": 0.00019769507756734457, "loss": 7.6674, "step": 61740 }, { "epoch": 7.430806257521059, "grad_norm": 3.8579227924346924, "learning_rate": 0.0001976942654192993, "loss": 7.5317, "step": 61750 }, { "epoch": 7.432009626955475, "grad_norm": 1.0830788612365723, "learning_rate": 0.0001976934531298663, "loss": 7.5707, "step": 61760 }, { "epoch": 7.433212996389892, "grad_norm": 4.701448917388916, "learning_rate": 0.00019769264069904673, "loss": 7.5479, "step": 61770 }, { "epoch": 7.434416365824308, "grad_norm": 5.61253547668457, "learning_rate": 0.0001976918281268418, "loss": 7.5542, "step": 61780 }, { "epoch": 7.435619735258724, "grad_norm": 1.6608448028564453, "learning_rate": 0.00019769101541325268, "loss": 7.5943, "step": 61790 }, { "epoch": 7.436823104693141, "grad_norm": 2.3284783363342285, "learning_rate": 0.00019769020255828055, "loss": 7.6296, "step": 61800 }, { "epoch": 7.4380264741275575, "grad_norm": 1.209886908531189, "learning_rate": 0.00019768938956192655, "loss": 7.5939, "step": 61810 }, { "epoch": 7.439229843561973, "grad_norm": 6.6313958168029785, "learning_rate": 0.00019768857642419192, "loss": 7.589, "step": 61820 }, { "epoch": 7.44043321299639, "grad_norm": 2.033318281173706, "learning_rate": 0.00019768776314507778, "loss": 7.5264, "step": 61830 }, { "epoch": 7.441636582430807, "grad_norm": 3.16302490234375, "learning_rate": 0.00019768694972458533, "loss": 7.6112, "step": 61840 }, { "epoch": 7.442839951865222, "grad_norm": 2.6318798065185547, "learning_rate": 0.00019768613616271576, "loss": 7.5498, "step": 61850 }, { "epoch": 7.444043321299639, "grad_norm": 3.1773931980133057, "learning_rate": 0.00019768532245947022, "loss": 7.5234, "step": 61860 }, { "epoch": 7.445246690734056, "grad_norm": 4.611644268035889, "learning_rate": 0.00019768450861484992, "loss": 7.5847, "step": 61870 }, { "epoch": 7.4464500601684716, "grad_norm": 3.9233875274658203, "learning_rate": 0.00019768369462885597, "loss": 7.508, "step": 61880 }, { "epoch": 7.447653429602888, "grad_norm": 3.1735899448394775, "learning_rate": 0.00019768288050148964, "loss": 7.5514, "step": 61890 }, { "epoch": 7.448856799037305, "grad_norm": 1.643195390701294, "learning_rate": 0.00019768206623275205, "loss": 7.6471, "step": 61900 }, { "epoch": 7.450060168471721, "grad_norm": 1.9416474103927612, "learning_rate": 0.0001976812518226444, "loss": 7.6117, "step": 61910 }, { "epoch": 7.451263537906137, "grad_norm": 5.642084121704102, "learning_rate": 0.00019768043727116786, "loss": 7.6559, "step": 61920 }, { "epoch": 7.452466907340553, "grad_norm": 2.461118221282959, "learning_rate": 0.0001976796225783236, "loss": 7.6176, "step": 61930 }, { "epoch": 7.45367027677497, "grad_norm": 1.362603783607483, "learning_rate": 0.00019767880774411282, "loss": 7.5749, "step": 61940 }, { "epoch": 7.4548736462093865, "grad_norm": 1.9335479736328125, "learning_rate": 0.0001976779927685367, "loss": 7.5407, "step": 61950 }, { "epoch": 7.456077015643802, "grad_norm": 27.64781379699707, "learning_rate": 0.0001976771776515964, "loss": 8.4723, "step": 61960 }, { "epoch": 7.457280385078219, "grad_norm": 3.2767229080200195, "learning_rate": 0.0001976763623932931, "loss": 7.6751, "step": 61970 }, { "epoch": 7.458483754512636, "grad_norm": 4.256045818328857, "learning_rate": 0.00019767554699362798, "loss": 7.6952, "step": 61980 }, { "epoch": 7.4596871239470515, "grad_norm": 4.228376865386963, "learning_rate": 0.00019767473145260221, "loss": 7.5736, "step": 61990 }, { "epoch": 7.460890493381468, "grad_norm": 3.365741491317749, "learning_rate": 0.00019767391577021703, "loss": 7.5411, "step": 62000 }, { "epoch": 7.462093862815885, "grad_norm": 3.4032626152038574, "learning_rate": 0.00019767309994647359, "loss": 7.5595, "step": 62010 }, { "epoch": 7.463297232250301, "grad_norm": 4.338688373565674, "learning_rate": 0.00019767228398137303, "loss": 7.6592, "step": 62020 }, { "epoch": 7.464500601684717, "grad_norm": 2.9457218647003174, "learning_rate": 0.00019767146787491658, "loss": 7.5986, "step": 62030 }, { "epoch": 7.465703971119134, "grad_norm": 3.9267494678497314, "learning_rate": 0.00019767065162710535, "loss": 7.5062, "step": 62040 }, { "epoch": 7.46690734055355, "grad_norm": 3.50807785987854, "learning_rate": 0.00019766983523794064, "loss": 7.5115, "step": 62050 }, { "epoch": 7.4681107099879664, "grad_norm": 2.1032209396362305, "learning_rate": 0.00019766901870742353, "loss": 7.6017, "step": 62060 }, { "epoch": 7.469314079422382, "grad_norm": 2.7084898948669434, "learning_rate": 0.00019766820203555524, "loss": 7.5257, "step": 62070 }, { "epoch": 7.470517448856799, "grad_norm": 2.000941514968872, "learning_rate": 0.00019766738522233697, "loss": 7.5845, "step": 62080 }, { "epoch": 7.471720818291216, "grad_norm": 1.2094730138778687, "learning_rate": 0.00019766656826776985, "loss": 7.5537, "step": 62090 }, { "epoch": 7.472924187725631, "grad_norm": 5.074915409088135, "learning_rate": 0.0001976657511718551, "loss": 7.6043, "step": 62100 }, { "epoch": 7.474127557160048, "grad_norm": 4.014949798583984, "learning_rate": 0.00019766493393459392, "loss": 7.6621, "step": 62110 }, { "epoch": 7.475330926594465, "grad_norm": 3.9575960636138916, "learning_rate": 0.00019766411655598742, "loss": 7.6231, "step": 62120 }, { "epoch": 7.4765342960288805, "grad_norm": 6.740754127502441, "learning_rate": 0.0001976632990360369, "loss": 7.5467, "step": 62130 }, { "epoch": 7.477737665463297, "grad_norm": 2.403111219406128, "learning_rate": 0.0001976624813747434, "loss": 7.6174, "step": 62140 }, { "epoch": 7.478941034897714, "grad_norm": 4.091782093048096, "learning_rate": 0.0001976616635721082, "loss": 7.494, "step": 62150 }, { "epoch": 7.48014440433213, "grad_norm": 2.9781603813171387, "learning_rate": 0.0001976608456281325, "loss": 7.5424, "step": 62160 }, { "epoch": 7.481347773766546, "grad_norm": 1.9330450296401978, "learning_rate": 0.0001976600275428174, "loss": 7.5204, "step": 62170 }, { "epoch": 7.482551143200963, "grad_norm": 1.7493013143539429, "learning_rate": 0.00019765920931616413, "loss": 7.6254, "step": 62180 }, { "epoch": 7.483754512635379, "grad_norm": 4.262869358062744, "learning_rate": 0.00019765839094817392, "loss": 7.5192, "step": 62190 }, { "epoch": 7.4849578820697955, "grad_norm": 1.4905524253845215, "learning_rate": 0.00019765757243884785, "loss": 7.5038, "step": 62200 }, { "epoch": 7.486161251504212, "grad_norm": 2.74172306060791, "learning_rate": 0.0001976567537881872, "loss": 7.5698, "step": 62210 }, { "epoch": 7.487364620938628, "grad_norm": 4.381186485290527, "learning_rate": 0.0001976559349961931, "loss": 7.6151, "step": 62220 }, { "epoch": 7.488567990373045, "grad_norm": 3.1179680824279785, "learning_rate": 0.00019765511606286676, "loss": 7.5753, "step": 62230 }, { "epoch": 7.489771359807461, "grad_norm": 2.6024482250213623, "learning_rate": 0.00019765429698820935, "loss": 7.4814, "step": 62240 }, { "epoch": 7.490974729241877, "grad_norm": 2.894402503967285, "learning_rate": 0.00019765347777222204, "loss": 7.5792, "step": 62250 }, { "epoch": 7.492178098676294, "grad_norm": 1.1348998546600342, "learning_rate": 0.00019765265841490608, "loss": 7.5516, "step": 62260 }, { "epoch": 7.49338146811071, "grad_norm": 3.59531569480896, "learning_rate": 0.00019765183891626257, "loss": 7.5775, "step": 62270 }, { "epoch": 7.494584837545126, "grad_norm": 5.755243301391602, "learning_rate": 0.00019765101927629277, "loss": 7.5668, "step": 62280 }, { "epoch": 7.495788206979543, "grad_norm": 1.2266857624053955, "learning_rate": 0.00019765019949499784, "loss": 7.5431, "step": 62290 }, { "epoch": 7.496991576413959, "grad_norm": 8.78464412689209, "learning_rate": 0.00019764937957237897, "loss": 7.5258, "step": 62300 }, { "epoch": 7.498194945848375, "grad_norm": 4.158756732940674, "learning_rate": 0.0001976485595084373, "loss": 7.5491, "step": 62310 }, { "epoch": 7.499398315282792, "grad_norm": 4.620486736297607, "learning_rate": 0.00019764773930317409, "loss": 7.6269, "step": 62320 }, { "epoch": 7.500601684717208, "grad_norm": 2.0696299076080322, "learning_rate": 0.00019764691895659045, "loss": 7.5648, "step": 62330 }, { "epoch": 7.501805054151625, "grad_norm": 5.2726826667785645, "learning_rate": 0.00019764609846868766, "loss": 7.6527, "step": 62340 }, { "epoch": 7.503008423586041, "grad_norm": 1.4752849340438843, "learning_rate": 0.00019764527783946686, "loss": 7.5715, "step": 62350 }, { "epoch": 7.504211793020457, "grad_norm": 3.716099262237549, "learning_rate": 0.00019764445706892917, "loss": 7.4312, "step": 62360 }, { "epoch": 7.505415162454874, "grad_norm": 2.872321605682373, "learning_rate": 0.00019764363615707592, "loss": 7.5898, "step": 62370 }, { "epoch": 7.5066185318892895, "grad_norm": 2.2229344844818115, "learning_rate": 0.00019764281510390816, "loss": 7.6523, "step": 62380 }, { "epoch": 7.507821901323706, "grad_norm": 3.699789047241211, "learning_rate": 0.00019764199390942717, "loss": 7.4637, "step": 62390 }, { "epoch": 7.509025270758123, "grad_norm": 4.786972999572754, "learning_rate": 0.0001976411725736341, "loss": 7.6154, "step": 62400 }, { "epoch": 7.510228640192539, "grad_norm": 2.555964946746826, "learning_rate": 0.00019764035109653017, "loss": 7.5778, "step": 62410 }, { "epoch": 7.511432009626955, "grad_norm": 1.9561940431594849, "learning_rate": 0.0001976395294781165, "loss": 7.5444, "step": 62420 }, { "epoch": 7.512635379061372, "grad_norm": 5.055975914001465, "learning_rate": 0.00019763870771839437, "loss": 7.5536, "step": 62430 }, { "epoch": 7.513838748495788, "grad_norm": 1.9030122756958008, "learning_rate": 0.0001976378858173649, "loss": 7.5497, "step": 62440 }, { "epoch": 7.5150421179302045, "grad_norm": 1.4870508909225464, "learning_rate": 0.0001976370637750293, "loss": 7.6542, "step": 62450 }, { "epoch": 7.516245487364621, "grad_norm": 1.8990765810012817, "learning_rate": 0.00019763624159138877, "loss": 7.6043, "step": 62460 }, { "epoch": 7.517448856799037, "grad_norm": 1.8807646036148071, "learning_rate": 0.00019763541926644452, "loss": 7.6038, "step": 62470 }, { "epoch": 7.518652226233454, "grad_norm": 1.7417924404144287, "learning_rate": 0.00019763459680019767, "loss": 7.5579, "step": 62480 }, { "epoch": 7.51985559566787, "grad_norm": 3.7538490295410156, "learning_rate": 0.00019763377419264945, "loss": 7.5665, "step": 62490 }, { "epoch": 7.521058965102286, "grad_norm": 3.280259132385254, "learning_rate": 0.00019763295144380107, "loss": 7.6211, "step": 62500 }, { "epoch": 7.522262334536703, "grad_norm": 2.8466787338256836, "learning_rate": 0.00019763212855365372, "loss": 7.5859, "step": 62510 }, { "epoch": 7.5234657039711195, "grad_norm": 2.8797900676727295, "learning_rate": 0.00019763130552220856, "loss": 7.6329, "step": 62520 }, { "epoch": 7.524669073405535, "grad_norm": 1.699373483657837, "learning_rate": 0.00019763048234946678, "loss": 7.5808, "step": 62530 }, { "epoch": 7.525872442839952, "grad_norm": 1.7151769399642944, "learning_rate": 0.00019762965903542964, "loss": 7.5738, "step": 62540 }, { "epoch": 7.527075812274369, "grad_norm": 2.4995622634887695, "learning_rate": 0.00019762883558009824, "loss": 7.539, "step": 62550 }, { "epoch": 7.528279181708784, "grad_norm": 1.6077799797058105, "learning_rate": 0.0001976280119834738, "loss": 7.5272, "step": 62560 }, { "epoch": 7.529482551143201, "grad_norm": 3.734455108642578, "learning_rate": 0.00019762718824555754, "loss": 7.5363, "step": 62570 }, { "epoch": 7.530685920577618, "grad_norm": 2.447986125946045, "learning_rate": 0.00019762636436635066, "loss": 7.5188, "step": 62580 }, { "epoch": 7.5318892900120336, "grad_norm": 5.249312877655029, "learning_rate": 0.0001976255403458543, "loss": 7.6377, "step": 62590 }, { "epoch": 7.53309265944645, "grad_norm": 2.261216640472412, "learning_rate": 0.00019762471618406968, "loss": 7.5725, "step": 62600 }, { "epoch": 7.534296028880867, "grad_norm": 3.281552314758301, "learning_rate": 0.00019762389188099797, "loss": 7.5659, "step": 62610 }, { "epoch": 7.535499398315283, "grad_norm": 2.4530956745147705, "learning_rate": 0.00019762306743664043, "loss": 7.5436, "step": 62620 }, { "epoch": 7.536702767749699, "grad_norm": 2.429037094116211, "learning_rate": 0.00019762224285099818, "loss": 7.5066, "step": 62630 }, { "epoch": 7.537906137184115, "grad_norm": 1.5216857194900513, "learning_rate": 0.00019762141812407246, "loss": 7.5222, "step": 62640 }, { "epoch": 7.539109506618532, "grad_norm": 4.295212268829346, "learning_rate": 0.00019762059325586443, "loss": 7.5354, "step": 62650 }, { "epoch": 7.5403128760529485, "grad_norm": 1.930651307106018, "learning_rate": 0.00019761976824637532, "loss": 7.5968, "step": 62660 }, { "epoch": 7.541516245487364, "grad_norm": 3.3710813522338867, "learning_rate": 0.0001976189430956063, "loss": 7.5271, "step": 62670 }, { "epoch": 7.542719614921781, "grad_norm": 6.041106700897217, "learning_rate": 0.00019761811780355854, "loss": 7.502, "step": 62680 }, { "epoch": 7.543922984356198, "grad_norm": 1.3379738330841064, "learning_rate": 0.0001976172923702333, "loss": 7.54, "step": 62690 }, { "epoch": 7.5451263537906135, "grad_norm": 6.153784275054932, "learning_rate": 0.00019761646679563173, "loss": 7.542, "step": 62700 }, { "epoch": 7.54632972322503, "grad_norm": 3.5414633750915527, "learning_rate": 0.000197615641079755, "loss": 7.5418, "step": 62710 }, { "epoch": 7.547533092659447, "grad_norm": 3.5140304565429688, "learning_rate": 0.00019761481522260436, "loss": 7.5398, "step": 62720 }, { "epoch": 7.548736462093863, "grad_norm": 3.721635341644287, "learning_rate": 0.00019761398922418097, "loss": 7.5333, "step": 62730 }, { "epoch": 7.549939831528279, "grad_norm": 2.585458755493164, "learning_rate": 0.00019761316308448604, "loss": 7.5586, "step": 62740 }, { "epoch": 7.551143200962695, "grad_norm": 2.3863401412963867, "learning_rate": 0.00019761233680352081, "loss": 7.5191, "step": 62750 }, { "epoch": 7.552346570397112, "grad_norm": 1.8514198064804077, "learning_rate": 0.00019761151038128638, "loss": 7.5751, "step": 62760 }, { "epoch": 7.5535499398315284, "grad_norm": 2.957806348800659, "learning_rate": 0.000197610683817784, "loss": 7.5367, "step": 62770 }, { "epoch": 7.554753309265944, "grad_norm": 2.435565233230591, "learning_rate": 0.00019760985711301488, "loss": 7.6302, "step": 62780 }, { "epoch": 7.555956678700361, "grad_norm": 2.6678693294525146, "learning_rate": 0.00019760903026698018, "loss": 7.5704, "step": 62790 }, { "epoch": 7.557160048134778, "grad_norm": 2.53086256980896, "learning_rate": 0.00019760820327968112, "loss": 7.4994, "step": 62800 }, { "epoch": 7.558363417569193, "grad_norm": 4.167178630828857, "learning_rate": 0.00019760737615111889, "loss": 7.5181, "step": 62810 }, { "epoch": 7.55956678700361, "grad_norm": 1.563988447189331, "learning_rate": 0.00019760654888129468, "loss": 7.6593, "step": 62820 }, { "epoch": 7.560770156438027, "grad_norm": 2.448904514312744, "learning_rate": 0.0001976057214702097, "loss": 7.6433, "step": 62830 }, { "epoch": 7.5619735258724425, "grad_norm": 5.7234206199646, "learning_rate": 0.00019760489391786515, "loss": 7.6568, "step": 62840 }, { "epoch": 7.563176895306859, "grad_norm": 1.2108515501022339, "learning_rate": 0.00019760406622426223, "loss": 7.5729, "step": 62850 }, { "epoch": 7.564380264741276, "grad_norm": 4.4809041023254395, "learning_rate": 0.0001976032383894021, "loss": 7.5406, "step": 62860 }, { "epoch": 7.565583634175692, "grad_norm": 4.5882086753845215, "learning_rate": 0.000197602410413286, "loss": 7.7044, "step": 62870 }, { "epoch": 7.566787003610108, "grad_norm": 1.8069469928741455, "learning_rate": 0.00019760158229591512, "loss": 7.5452, "step": 62880 }, { "epoch": 7.567990373044525, "grad_norm": 1.049312949180603, "learning_rate": 0.00019760075403729065, "loss": 7.5368, "step": 62890 }, { "epoch": 7.569193742478941, "grad_norm": 1.602185845375061, "learning_rate": 0.0001975999256374138, "loss": 7.5801, "step": 62900 }, { "epoch": 7.5703971119133575, "grad_norm": 1.6173040866851807, "learning_rate": 0.00019759909709628578, "loss": 7.6503, "step": 62910 }, { "epoch": 7.571600481347774, "grad_norm": 4.716909885406494, "learning_rate": 0.00019759826841390772, "loss": 7.53, "step": 62920 }, { "epoch": 7.57280385078219, "grad_norm": 2.9156434535980225, "learning_rate": 0.00019759743959028088, "loss": 7.5201, "step": 62930 }, { "epoch": 7.574007220216607, "grad_norm": 1.4834566116333008, "learning_rate": 0.0001975966106254065, "loss": 7.4602, "step": 62940 }, { "epoch": 7.575210589651023, "grad_norm": 1.8014588356018066, "learning_rate": 0.00019759578151928566, "loss": 7.581, "step": 62950 }, { "epoch": 7.576413959085439, "grad_norm": 4.3229451179504395, "learning_rate": 0.00019759495227191968, "loss": 7.6452, "step": 62960 }, { "epoch": 7.577617328519856, "grad_norm": 2.332765817642212, "learning_rate": 0.00019759412288330967, "loss": 7.6667, "step": 62970 }, { "epoch": 7.578820697954272, "grad_norm": 2.9040307998657227, "learning_rate": 0.0001975932933534569, "loss": 7.5922, "step": 62980 }, { "epoch": 7.580024067388688, "grad_norm": 3.8108394145965576, "learning_rate": 0.0001975924636823625, "loss": 7.5505, "step": 62990 }, { "epoch": 7.581227436823105, "grad_norm": 2.6067097187042236, "learning_rate": 0.00019759163387002775, "loss": 7.5408, "step": 63000 }, { "epoch": 7.582430806257521, "grad_norm": 2.1119534969329834, "learning_rate": 0.0001975908039164538, "loss": 7.5641, "step": 63010 }, { "epoch": 7.583634175691937, "grad_norm": 2.8722622394561768, "learning_rate": 0.00019758997382164186, "loss": 7.5431, "step": 63020 }, { "epoch": 7.584837545126354, "grad_norm": 2.3188719749450684, "learning_rate": 0.00019758914358559312, "loss": 7.5942, "step": 63030 }, { "epoch": 7.58604091456077, "grad_norm": 8.449291229248047, "learning_rate": 0.00019758831320830882, "loss": 7.546, "step": 63040 }, { "epoch": 7.587244283995187, "grad_norm": 2.0346593856811523, "learning_rate": 0.00019758748268979012, "loss": 7.5198, "step": 63050 }, { "epoch": 7.588447653429603, "grad_norm": 4.518615245819092, "learning_rate": 0.00019758665203003823, "loss": 7.6062, "step": 63060 }, { "epoch": 7.589651022864019, "grad_norm": 2.3619871139526367, "learning_rate": 0.0001975858212290544, "loss": 7.5481, "step": 63070 }, { "epoch": 7.590854392298436, "grad_norm": 1.6649744510650635, "learning_rate": 0.00019758499028683978, "loss": 7.5576, "step": 63080 }, { "epoch": 7.5920577617328515, "grad_norm": 7.858250141143799, "learning_rate": 0.00019758415920339558, "loss": 7.566, "step": 63090 }, { "epoch": 7.593261131167268, "grad_norm": 1.6704298257827759, "learning_rate": 0.000197583327978723, "loss": 7.5065, "step": 63100 }, { "epoch": 7.594464500601685, "grad_norm": 1.2919920682907104, "learning_rate": 0.00019758249661282323, "loss": 7.5531, "step": 63110 }, { "epoch": 7.595667870036101, "grad_norm": 4.33117151260376, "learning_rate": 0.00019758166510569755, "loss": 7.5211, "step": 63120 }, { "epoch": 7.596871239470517, "grad_norm": 1.717166781425476, "learning_rate": 0.00019758083345734707, "loss": 7.5163, "step": 63130 }, { "epoch": 7.598074608904934, "grad_norm": 6.342723369598389, "learning_rate": 0.00019758000166777303, "loss": 7.5172, "step": 63140 }, { "epoch": 7.59927797833935, "grad_norm": 5.0491461753845215, "learning_rate": 0.00019757916973697664, "loss": 7.4563, "step": 63150 }, { "epoch": 7.6004813477737665, "grad_norm": 1.950960636138916, "learning_rate": 0.00019757833766495913, "loss": 7.6231, "step": 63160 }, { "epoch": 7.601684717208183, "grad_norm": 3.9927687644958496, "learning_rate": 0.00019757750545172165, "loss": 7.5158, "step": 63170 }, { "epoch": 7.602888086642599, "grad_norm": 1.4569436311721802, "learning_rate": 0.00019757667309726543, "loss": 7.4586, "step": 63180 }, { "epoch": 7.604091456077016, "grad_norm": 7.387356758117676, "learning_rate": 0.00019757584060159166, "loss": 7.5484, "step": 63190 }, { "epoch": 7.605294825511432, "grad_norm": 2.513106346130371, "learning_rate": 0.0001975750079647016, "loss": 7.5849, "step": 63200 }, { "epoch": 7.606498194945848, "grad_norm": 2.2404375076293945, "learning_rate": 0.00019757417518659635, "loss": 7.619, "step": 63210 }, { "epoch": 7.607701564380265, "grad_norm": 3.4064767360687256, "learning_rate": 0.00019757334226727725, "loss": 7.5106, "step": 63220 }, { "epoch": 7.6089049338146815, "grad_norm": 1.926140308380127, "learning_rate": 0.00019757250920674537, "loss": 7.5097, "step": 63230 }, { "epoch": 7.610108303249097, "grad_norm": 1.4428722858428955, "learning_rate": 0.000197571676005002, "loss": 7.6128, "step": 63240 }, { "epoch": 7.611311672683514, "grad_norm": 3.848318099975586, "learning_rate": 0.00019757084266204834, "loss": 7.5696, "step": 63250 }, { "epoch": 7.612515042117931, "grad_norm": 2.6820499897003174, "learning_rate": 0.00019757000917788558, "loss": 7.561, "step": 63260 }, { "epoch": 7.613718411552346, "grad_norm": 2.538931131362915, "learning_rate": 0.0001975691755525149, "loss": 7.6254, "step": 63270 }, { "epoch": 7.614921780986763, "grad_norm": 1.9670612812042236, "learning_rate": 0.00019756834178593757, "loss": 7.5691, "step": 63280 }, { "epoch": 7.61612515042118, "grad_norm": 1.9742215871810913, "learning_rate": 0.00019756750787815473, "loss": 7.6541, "step": 63290 }, { "epoch": 7.617328519855596, "grad_norm": 2.971890926361084, "learning_rate": 0.00019756667382916763, "loss": 7.5045, "step": 63300 }, { "epoch": 7.618531889290012, "grad_norm": 1.8174625635147095, "learning_rate": 0.00019756583963897748, "loss": 7.5149, "step": 63310 }, { "epoch": 7.619735258724428, "grad_norm": 4.023271083831787, "learning_rate": 0.00019756500530758545, "loss": 7.573, "step": 63320 }, { "epoch": 7.620938628158845, "grad_norm": 1.5273938179016113, "learning_rate": 0.00019756417083499278, "loss": 7.5163, "step": 63330 }, { "epoch": 7.622141997593261, "grad_norm": 2.6752381324768066, "learning_rate": 0.00019756333622120067, "loss": 7.5582, "step": 63340 }, { "epoch": 7.623345367027677, "grad_norm": 1.7386105060577393, "learning_rate": 0.0001975625014662103, "loss": 7.554, "step": 63350 }, { "epoch": 7.624548736462094, "grad_norm": 2.0145838260650635, "learning_rate": 0.00019756166657002293, "loss": 7.4902, "step": 63360 }, { "epoch": 7.6257521058965105, "grad_norm": 2.0998079776763916, "learning_rate": 0.00019756083153263973, "loss": 7.5331, "step": 63370 }, { "epoch": 7.626955475330926, "grad_norm": 2.288391590118408, "learning_rate": 0.00019755999635406193, "loss": 7.543, "step": 63380 }, { "epoch": 7.628158844765343, "grad_norm": 1.6026840209960938, "learning_rate": 0.0001975591610342907, "loss": 7.517, "step": 63390 }, { "epoch": 7.62936221419976, "grad_norm": 2.1817140579223633, "learning_rate": 0.00019755832557332733, "loss": 7.4646, "step": 63400 }, { "epoch": 7.6305655836341755, "grad_norm": 2.484788179397583, "learning_rate": 0.00019755748997117293, "loss": 7.5459, "step": 63410 }, { "epoch": 7.631768953068592, "grad_norm": 2.66873836517334, "learning_rate": 0.00019755665422782878, "loss": 7.5803, "step": 63420 }, { "epoch": 7.632972322503008, "grad_norm": 1.8474595546722412, "learning_rate": 0.00019755581834329604, "loss": 7.582, "step": 63430 }, { "epoch": 7.634175691937425, "grad_norm": 6.507787227630615, "learning_rate": 0.00019755498231757595, "loss": 7.5455, "step": 63440 }, { "epoch": 7.635379061371841, "grad_norm": 2.517765760421753, "learning_rate": 0.00019755414615066976, "loss": 7.4921, "step": 63450 }, { "epoch": 7.636582430806257, "grad_norm": 8.62722110748291, "learning_rate": 0.0001975533098425786, "loss": 7.6468, "step": 63460 }, { "epoch": 7.637785800240674, "grad_norm": 3.2575252056121826, "learning_rate": 0.0001975524733933037, "loss": 7.5935, "step": 63470 }, { "epoch": 7.6389891696750905, "grad_norm": 5.830880165100098, "learning_rate": 0.00019755163680284631, "loss": 7.6109, "step": 63480 }, { "epoch": 7.640192539109506, "grad_norm": 5.177492141723633, "learning_rate": 0.00019755080007120764, "loss": 7.4877, "step": 63490 }, { "epoch": 7.641395908543923, "grad_norm": 1.9223870038986206, "learning_rate": 0.00019754996319838882, "loss": 7.5029, "step": 63500 }, { "epoch": 7.64259927797834, "grad_norm": 2.336559295654297, "learning_rate": 0.00019754912618439113, "loss": 7.5298, "step": 63510 }, { "epoch": 7.643802647412755, "grad_norm": 1.6006437540054321, "learning_rate": 0.00019754828902921582, "loss": 7.5539, "step": 63520 }, { "epoch": 7.645006016847172, "grad_norm": 2.190847396850586, "learning_rate": 0.00019754745173286402, "loss": 7.4398, "step": 63530 }, { "epoch": 7.646209386281589, "grad_norm": 3.2800686359405518, "learning_rate": 0.00019754661429533695, "loss": 7.5672, "step": 63540 }, { "epoch": 7.6474127557160045, "grad_norm": 2.1784653663635254, "learning_rate": 0.00019754577671663587, "loss": 7.4409, "step": 63550 }, { "epoch": 7.648616125150421, "grad_norm": 1.5865652561187744, "learning_rate": 0.00019754493899676198, "loss": 7.5773, "step": 63560 }, { "epoch": 7.649819494584838, "grad_norm": 3.0933616161346436, "learning_rate": 0.00019754410113571646, "loss": 7.5312, "step": 63570 }, { "epoch": 7.651022864019254, "grad_norm": 2.3897340297698975, "learning_rate": 0.00019754326313350056, "loss": 7.4591, "step": 63580 }, { "epoch": 7.65222623345367, "grad_norm": 3.7813637256622314, "learning_rate": 0.00019754242499011546, "loss": 7.5851, "step": 63590 }, { "epoch": 7.653429602888087, "grad_norm": 6.147426605224609, "learning_rate": 0.00019754158670556239, "loss": 7.4976, "step": 63600 }, { "epoch": 7.654632972322503, "grad_norm": 3.5563721656799316, "learning_rate": 0.00019754074827984256, "loss": 7.4387, "step": 63610 }, { "epoch": 7.6558363417569195, "grad_norm": 9.913110733032227, "learning_rate": 0.0001975399097129572, "loss": 7.4511, "step": 63620 }, { "epoch": 7.657039711191336, "grad_norm": 3.6053640842437744, "learning_rate": 0.0001975390710049075, "loss": 7.5784, "step": 63630 }, { "epoch": 7.658243080625752, "grad_norm": 1.8616575002670288, "learning_rate": 0.00019753823215569466, "loss": 7.636, "step": 63640 }, { "epoch": 7.659446450060169, "grad_norm": 2.616544485092163, "learning_rate": 0.00019753739316531995, "loss": 7.5859, "step": 63650 }, { "epoch": 7.6606498194945845, "grad_norm": 2.7370352745056152, "learning_rate": 0.00019753655403378454, "loss": 7.4884, "step": 63660 }, { "epoch": 7.661853188929001, "grad_norm": 6.263876438140869, "learning_rate": 0.00019753571476108968, "loss": 7.6124, "step": 63670 }, { "epoch": 7.663056558363418, "grad_norm": 5.364309787750244, "learning_rate": 0.0001975348753472365, "loss": 7.545, "step": 63680 }, { "epoch": 7.664259927797834, "grad_norm": 3.396055221557617, "learning_rate": 0.0001975340357922263, "loss": 7.5016, "step": 63690 }, { "epoch": 7.66546329723225, "grad_norm": 2.0077784061431885, "learning_rate": 0.0001975331960960603, "loss": 7.4803, "step": 63700 }, { "epoch": 7.666666666666667, "grad_norm": 3.4992411136627197, "learning_rate": 0.00019753235625873968, "loss": 7.5829, "step": 63710 }, { "epoch": 7.667870036101083, "grad_norm": 5.172300338745117, "learning_rate": 0.00019753151628026564, "loss": 7.4763, "step": 63720 }, { "epoch": 7.669073405535499, "grad_norm": 4.38728666305542, "learning_rate": 0.00019753067616063943, "loss": 7.609, "step": 63730 }, { "epoch": 7.670276774969916, "grad_norm": 6.026226997375488, "learning_rate": 0.0001975298358998622, "loss": 7.5913, "step": 63740 }, { "epoch": 7.671480144404332, "grad_norm": 37.50835418701172, "learning_rate": 0.0001975289954979353, "loss": 7.6434, "step": 63750 }, { "epoch": 7.672683513838749, "grad_norm": 4.4342498779296875, "learning_rate": 0.0001975281549548598, "loss": 7.5253, "step": 63760 }, { "epoch": 7.673886883273164, "grad_norm": 2.3639578819274902, "learning_rate": 0.00019752731427063702, "loss": 7.613, "step": 63770 }, { "epoch": 7.675090252707581, "grad_norm": 3.7852845191955566, "learning_rate": 0.0001975264734452681, "loss": 7.5704, "step": 63780 }, { "epoch": 7.676293622141998, "grad_norm": 2.4387924671173096, "learning_rate": 0.00019752563247875436, "loss": 7.4976, "step": 63790 }, { "epoch": 7.6774969915764135, "grad_norm": 6.428036212921143, "learning_rate": 0.00019752479137109688, "loss": 7.4921, "step": 63800 }, { "epoch": 7.67870036101083, "grad_norm": 116.8272476196289, "learning_rate": 0.00019752395012229701, "loss": 7.6181, "step": 63810 }, { "epoch": 7.679903730445247, "grad_norm": 418.4561462402344, "learning_rate": 0.00019752310873235587, "loss": 7.8285, "step": 63820 }, { "epoch": 7.681107099879663, "grad_norm": 5.228960990905762, "learning_rate": 0.00019752226720127472, "loss": 7.58, "step": 63830 }, { "epoch": 7.682310469314079, "grad_norm": 3.4253652095794678, "learning_rate": 0.00019752142552905476, "loss": 7.5709, "step": 63840 }, { "epoch": 7.683513838748496, "grad_norm": 2.6173439025878906, "learning_rate": 0.00019752058371569724, "loss": 7.5608, "step": 63850 }, { "epoch": 7.684717208182912, "grad_norm": 6.814177513122559, "learning_rate": 0.00019751974176120335, "loss": 7.6076, "step": 63860 }, { "epoch": 7.6859205776173285, "grad_norm": 1.6845054626464844, "learning_rate": 0.0001975188996655743, "loss": 7.5737, "step": 63870 }, { "epoch": 7.687123947051745, "grad_norm": 1.6492327451705933, "learning_rate": 0.00019751805742881134, "loss": 7.5565, "step": 63880 }, { "epoch": 7.688327316486161, "grad_norm": 4.752699375152588, "learning_rate": 0.0001975172150509157, "loss": 7.6199, "step": 63890 }, { "epoch": 7.689530685920578, "grad_norm": 1.4198944568634033, "learning_rate": 0.00019751637253188854, "loss": 7.5747, "step": 63900 }, { "epoch": 7.690734055354994, "grad_norm": 6.435874938964844, "learning_rate": 0.00019751552987173112, "loss": 7.5798, "step": 63910 }, { "epoch": 7.69193742478941, "grad_norm": 2.5221967697143555, "learning_rate": 0.00019751468707044465, "loss": 7.542, "step": 63920 }, { "epoch": 7.693140794223827, "grad_norm": 2.549466848373413, "learning_rate": 0.00019751384412803038, "loss": 7.4978, "step": 63930 }, { "epoch": 7.6943441636582435, "grad_norm": 0.9417816400527954, "learning_rate": 0.00019751300104448946, "loss": 7.6134, "step": 63940 }, { "epoch": 7.695547533092659, "grad_norm": 1.1502593755722046, "learning_rate": 0.00019751215781982318, "loss": 7.5314, "step": 63950 }, { "epoch": 7.696750902527076, "grad_norm": 1.5023001432418823, "learning_rate": 0.00019751131445403273, "loss": 7.5336, "step": 63960 }, { "epoch": 7.697954271961493, "grad_norm": 2.0197274684906006, "learning_rate": 0.00019751047094711933, "loss": 7.5932, "step": 63970 }, { "epoch": 7.699157641395908, "grad_norm": 2.939283847808838, "learning_rate": 0.0001975096272990842, "loss": 7.4884, "step": 63980 }, { "epoch": 7.700361010830325, "grad_norm": 4.106808185577393, "learning_rate": 0.00019750878350992856, "loss": 7.4562, "step": 63990 }, { "epoch": 7.701564380264742, "grad_norm": 2.5515804290771484, "learning_rate": 0.00019750793957965364, "loss": 7.589, "step": 64000 }, { "epoch": 7.702767749699158, "grad_norm": 2.34568190574646, "learning_rate": 0.00019750709550826065, "loss": 7.5463, "step": 64010 }, { "epoch": 7.703971119133574, "grad_norm": 7.649059295654297, "learning_rate": 0.00019750625129575085, "loss": 7.6698, "step": 64020 }, { "epoch": 7.70517448856799, "grad_norm": 2.6196110248565674, "learning_rate": 0.0001975054069421254, "loss": 7.5876, "step": 64030 }, { "epoch": 7.706377858002407, "grad_norm": 3.7890024185180664, "learning_rate": 0.00019750456244738555, "loss": 7.5667, "step": 64040 }, { "epoch": 7.707581227436823, "grad_norm": 1.9010287523269653, "learning_rate": 0.00019750371781153257, "loss": 7.5407, "step": 64050 }, { "epoch": 7.708784596871239, "grad_norm": 2.4469101428985596, "learning_rate": 0.00019750287303456758, "loss": 7.5519, "step": 64060 }, { "epoch": 7.709987966305656, "grad_norm": 3.000920295715332, "learning_rate": 0.0001975020281164919, "loss": 7.5291, "step": 64070 }, { "epoch": 7.7111913357400725, "grad_norm": 3.5249977111816406, "learning_rate": 0.0001975011830573067, "loss": 7.5339, "step": 64080 }, { "epoch": 7.712394705174488, "grad_norm": 6.092122554779053, "learning_rate": 0.0001975003378570132, "loss": 7.5936, "step": 64090 }, { "epoch": 7.713598074608905, "grad_norm": 4.037398338317871, "learning_rate": 0.00019749949251561267, "loss": 7.6532, "step": 64100 }, { "epoch": 7.714801444043322, "grad_norm": 1.274417757987976, "learning_rate": 0.00019749864703310626, "loss": 7.5724, "step": 64110 }, { "epoch": 7.7160048134777375, "grad_norm": 2.7037127017974854, "learning_rate": 0.00019749780140949528, "loss": 7.5893, "step": 64120 }, { "epoch": 7.717208182912154, "grad_norm": 5.475039958953857, "learning_rate": 0.00019749695564478091, "loss": 7.5529, "step": 64130 }, { "epoch": 7.71841155234657, "grad_norm": 7.58928108215332, "learning_rate": 0.00019749610973896433, "loss": 7.6414, "step": 64140 }, { "epoch": 7.719614921780987, "grad_norm": 1.3528416156768799, "learning_rate": 0.00019749526369204685, "loss": 7.6468, "step": 64150 }, { "epoch": 7.720818291215403, "grad_norm": 2.443830728530884, "learning_rate": 0.00019749441750402963, "loss": 7.5163, "step": 64160 }, { "epoch": 7.722021660649819, "grad_norm": 5.909456729888916, "learning_rate": 0.00019749357117491393, "loss": 7.5757, "step": 64170 }, { "epoch": 7.723225030084236, "grad_norm": 1.958285927772522, "learning_rate": 0.00019749272470470094, "loss": 7.4558, "step": 64180 }, { "epoch": 7.7244283995186525, "grad_norm": 5.574573516845703, "learning_rate": 0.0001974918780933919, "loss": 7.4697, "step": 64190 }, { "epoch": 7.725631768953068, "grad_norm": 2.4745922088623047, "learning_rate": 0.0001974910313409881, "loss": 7.6013, "step": 64200 }, { "epoch": 7.726835138387485, "grad_norm": 1.5560861825942993, "learning_rate": 0.00019749018444749066, "loss": 7.6324, "step": 64210 }, { "epoch": 7.728038507821902, "grad_norm": 1.7659121751785278, "learning_rate": 0.0001974893374129009, "loss": 7.5592, "step": 64220 }, { "epoch": 7.729241877256317, "grad_norm": 1.9414066076278687, "learning_rate": 0.00019748849023721996, "loss": 7.5265, "step": 64230 }, { "epoch": 7.730445246690734, "grad_norm": 4.28660249710083, "learning_rate": 0.00019748764292044907, "loss": 7.5062, "step": 64240 }, { "epoch": 7.731648616125151, "grad_norm": 2.162717342376709, "learning_rate": 0.00019748679546258954, "loss": 7.6001, "step": 64250 }, { "epoch": 7.7328519855595665, "grad_norm": 1.2348604202270508, "learning_rate": 0.00019748594786364257, "loss": 7.5055, "step": 64260 }, { "epoch": 7.734055354993983, "grad_norm": 4.4819722175598145, "learning_rate": 0.0001974851001236093, "loss": 7.5513, "step": 64270 }, { "epoch": 7.7352587244284, "grad_norm": 2.6607470512390137, "learning_rate": 0.00019748425224249108, "loss": 7.5627, "step": 64280 }, { "epoch": 7.736462093862816, "grad_norm": 1.8192871809005737, "learning_rate": 0.00019748340422028906, "loss": 7.7036, "step": 64290 }, { "epoch": 7.737665463297232, "grad_norm": 1.4009596109390259, "learning_rate": 0.00019748255605700447, "loss": 7.4962, "step": 64300 }, { "epoch": 7.738868832731649, "grad_norm": 1.0280046463012695, "learning_rate": 0.0001974817077526386, "loss": 7.5562, "step": 64310 }, { "epoch": 7.740072202166065, "grad_norm": 1.7691929340362549, "learning_rate": 0.00019748085930719258, "loss": 7.5742, "step": 64320 }, { "epoch": 7.7412755716004815, "grad_norm": 3.197049856185913, "learning_rate": 0.00019748001072066772, "loss": 7.5702, "step": 64330 }, { "epoch": 7.742478941034898, "grad_norm": 1.4524508714675903, "learning_rate": 0.0001974791619930652, "loss": 7.6947, "step": 64340 }, { "epoch": 7.743682310469314, "grad_norm": 1.4267940521240234, "learning_rate": 0.00019747831312438626, "loss": 7.5172, "step": 64350 }, { "epoch": 7.744885679903731, "grad_norm": 2.4507248401641846, "learning_rate": 0.00019747746411463214, "loss": 7.5614, "step": 64360 }, { "epoch": 7.7460890493381465, "grad_norm": 5.619117736816406, "learning_rate": 0.0001974766149638041, "loss": 7.6002, "step": 64370 }, { "epoch": 7.747292418772563, "grad_norm": 1.2406738996505737, "learning_rate": 0.00019747576567190328, "loss": 7.4875, "step": 64380 }, { "epoch": 7.74849578820698, "grad_norm": 1.8460060358047485, "learning_rate": 0.000197474916238931, "loss": 7.593, "step": 64390 }, { "epoch": 7.749699157641396, "grad_norm": 5.018037796020508, "learning_rate": 0.00019747406666488843, "loss": 7.4923, "step": 64400 }, { "epoch": 7.750902527075812, "grad_norm": 1.853564977645874, "learning_rate": 0.00019747321694977684, "loss": 7.5799, "step": 64410 }, { "epoch": 7.752105896510229, "grad_norm": 3.093491554260254, "learning_rate": 0.0001974723670935974, "loss": 7.5675, "step": 64420 }, { "epoch": 7.753309265944645, "grad_norm": 2.326819658279419, "learning_rate": 0.0001974715170963514, "loss": 7.5225, "step": 64430 }, { "epoch": 7.754512635379061, "grad_norm": 4.111430644989014, "learning_rate": 0.00019747066695804003, "loss": 7.6652, "step": 64440 }, { "epoch": 7.755716004813478, "grad_norm": 1.2467533349990845, "learning_rate": 0.0001974698166786646, "loss": 7.5544, "step": 64450 }, { "epoch": 7.756919374247894, "grad_norm": 4.279912948608398, "learning_rate": 0.00019746896625822622, "loss": 7.5619, "step": 64460 }, { "epoch": 7.758122743682311, "grad_norm": 4.139098644256592, "learning_rate": 0.00019746811569672622, "loss": 7.5576, "step": 64470 }, { "epoch": 7.759326113116726, "grad_norm": 1.6513266563415527, "learning_rate": 0.00019746726499416577, "loss": 7.6236, "step": 64480 }, { "epoch": 7.760529482551143, "grad_norm": 2.026365280151367, "learning_rate": 0.00019746641415054614, "loss": 7.5971, "step": 64490 }, { "epoch": 7.76173285198556, "grad_norm": 3.4769864082336426, "learning_rate": 0.00019746556316586853, "loss": 7.5674, "step": 64500 }, { "epoch": 7.7629362214199755, "grad_norm": 1.7914206981658936, "learning_rate": 0.0001974647120401342, "loss": 7.6313, "step": 64510 }, { "epoch": 7.764139590854392, "grad_norm": 7.555795192718506, "learning_rate": 0.00019746386077334433, "loss": 7.5473, "step": 64520 }, { "epoch": 7.765342960288809, "grad_norm": 2.8244681358337402, "learning_rate": 0.00019746300936550023, "loss": 7.5729, "step": 64530 }, { "epoch": 7.766546329723225, "grad_norm": 4.239053726196289, "learning_rate": 0.00019746215781660308, "loss": 7.5083, "step": 64540 }, { "epoch": 7.767749699157641, "grad_norm": 2.4355297088623047, "learning_rate": 0.00019746130612665415, "loss": 7.583, "step": 64550 }, { "epoch": 7.768953068592058, "grad_norm": 1.4362925291061401, "learning_rate": 0.00019746045429565464, "loss": 7.5604, "step": 64560 }, { "epoch": 7.770156438026474, "grad_norm": 7.067419528961182, "learning_rate": 0.00019745960232360577, "loss": 7.5351, "step": 64570 }, { "epoch": 7.7713598074608905, "grad_norm": 1.3868975639343262, "learning_rate": 0.00019745875021050878, "loss": 7.476, "step": 64580 }, { "epoch": 7.772563176895307, "grad_norm": 5.19750452041626, "learning_rate": 0.00019745789795636492, "loss": 7.5075, "step": 64590 }, { "epoch": 7.773766546329723, "grad_norm": 1.595211148262024, "learning_rate": 0.00019745704556117547, "loss": 7.5509, "step": 64600 }, { "epoch": 7.77496991576414, "grad_norm": 1.366173505783081, "learning_rate": 0.00019745619302494159, "loss": 7.5437, "step": 64610 }, { "epoch": 7.776173285198556, "grad_norm": 4.056373596191406, "learning_rate": 0.00019745534034766452, "loss": 7.6292, "step": 64620 }, { "epoch": 7.777376654632972, "grad_norm": 3.093219757080078, "learning_rate": 0.0001974544875293455, "loss": 7.5332, "step": 64630 }, { "epoch": 7.778580024067389, "grad_norm": 2.3698809146881104, "learning_rate": 0.0001974536345699858, "loss": 7.4866, "step": 64640 }, { "epoch": 7.7797833935018055, "grad_norm": 1.3150427341461182, "learning_rate": 0.0001974527814695866, "loss": 7.52, "step": 64650 }, { "epoch": 7.780986762936221, "grad_norm": 2.1142914295196533, "learning_rate": 0.00019745192822814923, "loss": 7.6069, "step": 64660 }, { "epoch": 7.782190132370638, "grad_norm": 3.0181469917297363, "learning_rate": 0.00019745107484567481, "loss": 7.6176, "step": 64670 }, { "epoch": 7.783393501805055, "grad_norm": 1.8113892078399658, "learning_rate": 0.00019745022132216464, "loss": 7.5021, "step": 64680 }, { "epoch": 7.78459687123947, "grad_norm": 0.9960190057754517, "learning_rate": 0.0001974493676576199, "loss": 7.6028, "step": 64690 }, { "epoch": 7.785800240673887, "grad_norm": 1.6795947551727295, "learning_rate": 0.00019744851385204191, "loss": 7.547, "step": 64700 }, { "epoch": 7.787003610108303, "grad_norm": 2.323227882385254, "learning_rate": 0.00019744765990543188, "loss": 7.5919, "step": 64710 }, { "epoch": 7.78820697954272, "grad_norm": 1.5074779987335205, "learning_rate": 0.00019744680581779101, "loss": 7.5274, "step": 64720 }, { "epoch": 7.789410348977136, "grad_norm": 2.540370464324951, "learning_rate": 0.00019744595158912052, "loss": 7.568, "step": 64730 }, { "epoch": 7.790613718411552, "grad_norm": 1.8090728521347046, "learning_rate": 0.0001974450972194217, "loss": 7.5752, "step": 64740 }, { "epoch": 7.791817087845969, "grad_norm": 1.3719005584716797, "learning_rate": 0.00019744424270869577, "loss": 7.5466, "step": 64750 }, { "epoch": 7.793020457280385, "grad_norm": 1.5757919549942017, "learning_rate": 0.00019744338805694395, "loss": 7.5842, "step": 64760 }, { "epoch": 7.794223826714801, "grad_norm": 4.796267986297607, "learning_rate": 0.0001974425332641675, "loss": 7.535, "step": 64770 }, { "epoch": 7.795427196149218, "grad_norm": 3.333724021911621, "learning_rate": 0.00019744167833036765, "loss": 7.5415, "step": 64780 }, { "epoch": 7.7966305655836345, "grad_norm": 7.976353168487549, "learning_rate": 0.00019744082325554564, "loss": 7.582, "step": 64790 }, { "epoch": 7.79783393501805, "grad_norm": 3.6790027618408203, "learning_rate": 0.00019743996803970267, "loss": 7.6207, "step": 64800 }, { "epoch": 7.799037304452467, "grad_norm": 4.515024185180664, "learning_rate": 0.00019743911268284005, "loss": 7.5974, "step": 64810 }, { "epoch": 7.800240673886883, "grad_norm": 2.5830047130584717, "learning_rate": 0.00019743825718495893, "loss": 7.6111, "step": 64820 }, { "epoch": 7.8014440433212995, "grad_norm": 2.3890299797058105, "learning_rate": 0.00019743740154606063, "loss": 7.5557, "step": 64830 }, { "epoch": 7.802647412755716, "grad_norm": 2.4947264194488525, "learning_rate": 0.00019743654576614636, "loss": 7.5119, "step": 64840 }, { "epoch": 7.803850782190132, "grad_norm": 1.304914116859436, "learning_rate": 0.00019743568984521734, "loss": 7.5988, "step": 64850 }, { "epoch": 7.805054151624549, "grad_norm": 1.2074496746063232, "learning_rate": 0.0001974348337832748, "loss": 7.5569, "step": 64860 }, { "epoch": 7.806257521058965, "grad_norm": 1.843406319618225, "learning_rate": 0.00019743397758032006, "loss": 7.5541, "step": 64870 }, { "epoch": 7.807460890493381, "grad_norm": 1.682096004486084, "learning_rate": 0.0001974331212363542, "loss": 7.5764, "step": 64880 }, { "epoch": 7.808664259927798, "grad_norm": 5.8746018409729, "learning_rate": 0.00019743226475137866, "loss": 7.6384, "step": 64890 }, { "epoch": 7.8098676293622145, "grad_norm": 2.056720495223999, "learning_rate": 0.0001974314081253945, "loss": 7.4991, "step": 64900 }, { "epoch": 7.81107099879663, "grad_norm": 2.306234836578369, "learning_rate": 0.0001974305513584031, "loss": 7.4743, "step": 64910 }, { "epoch": 7.812274368231047, "grad_norm": 3.863231658935547, "learning_rate": 0.0001974296944504056, "loss": 7.4782, "step": 64920 }, { "epoch": 7.813477737665464, "grad_norm": 0.9819413423538208, "learning_rate": 0.00019742883740140327, "loss": 7.5229, "step": 64930 }, { "epoch": 7.814681107099879, "grad_norm": 3.1213436126708984, "learning_rate": 0.0001974279802113974, "loss": 7.5257, "step": 64940 }, { "epoch": 7.815884476534296, "grad_norm": 4.330084800720215, "learning_rate": 0.00019742712288038912, "loss": 7.5378, "step": 64950 }, { "epoch": 7.817087845968713, "grad_norm": 1.4419766664505005, "learning_rate": 0.00019742626540837982, "loss": 7.5202, "step": 64960 }, { "epoch": 7.8182912154031285, "grad_norm": 4.496762275695801, "learning_rate": 0.0001974254077953706, "loss": 7.5922, "step": 64970 }, { "epoch": 7.819494584837545, "grad_norm": 1.9259644746780396, "learning_rate": 0.0001974245500413628, "loss": 7.5476, "step": 64980 }, { "epoch": 7.820697954271962, "grad_norm": 5.210505962371826, "learning_rate": 0.0001974236921463576, "loss": 7.5164, "step": 64990 }, { "epoch": 7.821901323706378, "grad_norm": 1.6667044162750244, "learning_rate": 0.00019742283411035627, "loss": 7.5339, "step": 65000 }, { "epoch": 7.823104693140794, "grad_norm": 2.4324257373809814, "learning_rate": 0.00019742197593336004, "loss": 7.6013, "step": 65010 }, { "epoch": 7.824308062575211, "grad_norm": 4.153875827789307, "learning_rate": 0.00019742111761537017, "loss": 7.6247, "step": 65020 }, { "epoch": 7.825511432009627, "grad_norm": 1.1132521629333496, "learning_rate": 0.00019742025915638788, "loss": 7.4552, "step": 65030 }, { "epoch": 7.8267148014440435, "grad_norm": 2.402911901473999, "learning_rate": 0.0001974194005564144, "loss": 7.5211, "step": 65040 }, { "epoch": 7.827918170878459, "grad_norm": 2.978116035461426, "learning_rate": 0.00019741854181545105, "loss": 7.5419, "step": 65050 }, { "epoch": 7.829121540312876, "grad_norm": 3.3875086307525635, "learning_rate": 0.000197417682933499, "loss": 7.5099, "step": 65060 }, { "epoch": 7.830324909747293, "grad_norm": 1.9529696702957153, "learning_rate": 0.0001974168239105595, "loss": 7.556, "step": 65070 }, { "epoch": 7.8315282791817085, "grad_norm": 2.051300287246704, "learning_rate": 0.0001974159647466338, "loss": 7.5171, "step": 65080 }, { "epoch": 7.832731648616125, "grad_norm": 3.8050243854522705, "learning_rate": 0.00019741510544172314, "loss": 7.5617, "step": 65090 }, { "epoch": 7.833935018050542, "grad_norm": 1.7248845100402832, "learning_rate": 0.0001974142459958288, "loss": 7.4871, "step": 65100 }, { "epoch": 7.835138387484958, "grad_norm": 1.7705193758010864, "learning_rate": 0.00019741338640895197, "loss": 7.5426, "step": 65110 }, { "epoch": 7.836341756919374, "grad_norm": 2.031907796859741, "learning_rate": 0.00019741252668109395, "loss": 7.4904, "step": 65120 }, { "epoch": 7.837545126353791, "grad_norm": 2.721560001373291, "learning_rate": 0.00019741166681225592, "loss": 7.5527, "step": 65130 }, { "epoch": 7.838748495788207, "grad_norm": 5.362534999847412, "learning_rate": 0.00019741080680243917, "loss": 7.4884, "step": 65140 }, { "epoch": 7.839951865222623, "grad_norm": 3.95763897895813, "learning_rate": 0.00019740994665164494, "loss": 7.5728, "step": 65150 }, { "epoch": 7.841155234657039, "grad_norm": 3.410754919052124, "learning_rate": 0.00019740908635987445, "loss": 7.6117, "step": 65160 }, { "epoch": 7.842358604091456, "grad_norm": 2.575103998184204, "learning_rate": 0.00019740822592712897, "loss": 7.5994, "step": 65170 }, { "epoch": 7.843561973525873, "grad_norm": 1.800849199295044, "learning_rate": 0.00019740736535340975, "loss": 7.5598, "step": 65180 }, { "epoch": 7.844765342960288, "grad_norm": 1.6818286180496216, "learning_rate": 0.000197406504638718, "loss": 7.5488, "step": 65190 }, { "epoch": 7.845968712394705, "grad_norm": 1.826019525527954, "learning_rate": 0.00019740564378305502, "loss": 7.5313, "step": 65200 }, { "epoch": 7.847172081829122, "grad_norm": 0.9545811414718628, "learning_rate": 0.000197404782786422, "loss": 7.6071, "step": 65210 }, { "epoch": 7.8483754512635375, "grad_norm": 2.7919225692749023, "learning_rate": 0.00019740392164882022, "loss": 7.5058, "step": 65220 }, { "epoch": 7.849578820697954, "grad_norm": 2.8619964122772217, "learning_rate": 0.00019740306037025092, "loss": 7.4722, "step": 65230 }, { "epoch": 7.850782190132371, "grad_norm": 3.166888952255249, "learning_rate": 0.0001974021989507153, "loss": 7.5576, "step": 65240 }, { "epoch": 7.851985559566787, "grad_norm": 3.817974805831909, "learning_rate": 0.00019740133739021474, "loss": 7.5461, "step": 65250 }, { "epoch": 7.853188929001203, "grad_norm": 2.8046810626983643, "learning_rate": 0.00019740047568875033, "loss": 7.5449, "step": 65260 }, { "epoch": 7.85439229843562, "grad_norm": 1.7972290515899658, "learning_rate": 0.0001973996138463234, "loss": 7.5372, "step": 65270 }, { "epoch": 7.855595667870036, "grad_norm": 3.015254497528076, "learning_rate": 0.0001973987518629352, "loss": 7.5555, "step": 65280 }, { "epoch": 7.8567990373044525, "grad_norm": 4.372307300567627, "learning_rate": 0.00019739788973858693, "loss": 7.5718, "step": 65290 }, { "epoch": 7.858002406738869, "grad_norm": 2.7161402702331543, "learning_rate": 0.00019739702747327987, "loss": 7.5227, "step": 65300 }, { "epoch": 7.859205776173285, "grad_norm": 1.8533118963241577, "learning_rate": 0.00019739616506701528, "loss": 7.4359, "step": 65310 }, { "epoch": 7.860409145607702, "grad_norm": 1.7904753684997559, "learning_rate": 0.0001973953025197944, "loss": 7.5533, "step": 65320 }, { "epoch": 7.861612515042118, "grad_norm": 2.156414270401001, "learning_rate": 0.00019739443983161846, "loss": 7.5456, "step": 65330 }, { "epoch": 7.862815884476534, "grad_norm": 3.3818130493164062, "learning_rate": 0.0001973935770024887, "loss": 7.5957, "step": 65340 }, { "epoch": 7.864019253910951, "grad_norm": 4.580965042114258, "learning_rate": 0.00019739271403240643, "loss": 7.5773, "step": 65350 }, { "epoch": 7.8652226233453675, "grad_norm": 2.1996190547943115, "learning_rate": 0.0001973918509213728, "loss": 7.4879, "step": 65360 }, { "epoch": 7.866425992779783, "grad_norm": 5.844728946685791, "learning_rate": 0.00019739098766938918, "loss": 7.5364, "step": 65370 }, { "epoch": 7.8676293622142, "grad_norm": 1.9474704265594482, "learning_rate": 0.0001973901242764567, "loss": 7.5071, "step": 65380 }, { "epoch": 7.868832731648616, "grad_norm": 2.776552438735962, "learning_rate": 0.0001973892607425767, "loss": 7.4707, "step": 65390 }, { "epoch": 7.870036101083032, "grad_norm": 2.329772472381592, "learning_rate": 0.0001973883970677504, "loss": 7.5491, "step": 65400 }, { "epoch": 7.871239470517449, "grad_norm": 1.808611273765564, "learning_rate": 0.00019738753325197902, "loss": 7.4872, "step": 65410 }, { "epoch": 7.872442839951865, "grad_norm": 2.5463521480560303, "learning_rate": 0.00019738666929526385, "loss": 7.6258, "step": 65420 }, { "epoch": 7.873646209386282, "grad_norm": 1.359895944595337, "learning_rate": 0.0001973858051976061, "loss": 7.5124, "step": 65430 }, { "epoch": 7.874849578820698, "grad_norm": 1.0344352722167969, "learning_rate": 0.00019738494095900707, "loss": 7.6403, "step": 65440 }, { "epoch": 7.876052948255114, "grad_norm": 4.382324695587158, "learning_rate": 0.000197384076579468, "loss": 7.5588, "step": 65450 }, { "epoch": 7.877256317689531, "grad_norm": 1.6695517301559448, "learning_rate": 0.0001973832120589901, "loss": 7.6111, "step": 65460 }, { "epoch": 7.878459687123947, "grad_norm": 1.7769498825073242, "learning_rate": 0.00019738234739757467, "loss": 7.5345, "step": 65470 }, { "epoch": 7.879663056558363, "grad_norm": 2.275113821029663, "learning_rate": 0.00019738148259522293, "loss": 7.5103, "step": 65480 }, { "epoch": 7.88086642599278, "grad_norm": 2.661782741546631, "learning_rate": 0.00019738061765193615, "loss": 7.564, "step": 65490 }, { "epoch": 7.882069795427196, "grad_norm": 1.5649412870407104, "learning_rate": 0.00019737975256771555, "loss": 7.5496, "step": 65500 }, { "epoch": 7.883273164861612, "grad_norm": 1.8353400230407715, "learning_rate": 0.00019737888734256242, "loss": 7.6234, "step": 65510 }, { "epoch": 7.884476534296029, "grad_norm": 3.566084623336792, "learning_rate": 0.00019737802197647802, "loss": 7.5368, "step": 65520 }, { "epoch": 7.885679903730445, "grad_norm": 2.392169713973999, "learning_rate": 0.00019737715646946354, "loss": 7.4623, "step": 65530 }, { "epoch": 7.8868832731648615, "grad_norm": 1.8639453649520874, "learning_rate": 0.0001973762908215203, "loss": 7.5273, "step": 65540 }, { "epoch": 7.888086642599278, "grad_norm": 3.9149012565612793, "learning_rate": 0.00019737542503264952, "loss": 7.5995, "step": 65550 }, { "epoch": 7.889290012033694, "grad_norm": 1.9269750118255615, "learning_rate": 0.00019737455910285245, "loss": 7.5675, "step": 65560 }, { "epoch": 7.890493381468111, "grad_norm": 7.425387859344482, "learning_rate": 0.0001973736930321304, "loss": 7.5878, "step": 65570 }, { "epoch": 7.891696750902527, "grad_norm": 1.50200355052948, "learning_rate": 0.0001973728268204845, "loss": 7.5821, "step": 65580 }, { "epoch": 7.892900120336943, "grad_norm": 1.8554785251617432, "learning_rate": 0.00019737196046791612, "loss": 7.5473, "step": 65590 }, { "epoch": 7.89410348977136, "grad_norm": 1.7223453521728516, "learning_rate": 0.00019737109397442647, "loss": 7.4737, "step": 65600 }, { "epoch": 7.8953068592057765, "grad_norm": 2.594144105911255, "learning_rate": 0.0001973702273400168, "loss": 7.5732, "step": 65610 }, { "epoch": 7.896510228640192, "grad_norm": 3.629218101501465, "learning_rate": 0.00019736936056468838, "loss": 7.5807, "step": 65620 }, { "epoch": 7.897713598074609, "grad_norm": 1.520920753479004, "learning_rate": 0.00019736849364844245, "loss": 7.5864, "step": 65630 }, { "epoch": 7.898916967509026, "grad_norm": 3.9262914657592773, "learning_rate": 0.00019736762659128027, "loss": 7.4949, "step": 65640 }, { "epoch": 7.900120336943441, "grad_norm": 2.1921727657318115, "learning_rate": 0.0001973667593932031, "loss": 7.5909, "step": 65650 }, { "epoch": 7.901323706377858, "grad_norm": 2.235055446624756, "learning_rate": 0.00019736589205421217, "loss": 7.5696, "step": 65660 }, { "epoch": 7.902527075812275, "grad_norm": 4.1180315017700195, "learning_rate": 0.00019736502457430878, "loss": 7.5985, "step": 65670 }, { "epoch": 7.9037304452466906, "grad_norm": 3.2192816734313965, "learning_rate": 0.00019736415695349418, "loss": 7.4839, "step": 65680 }, { "epoch": 7.904933814681107, "grad_norm": 2.169773578643799, "learning_rate": 0.00019736328919176956, "loss": 7.5479, "step": 65690 }, { "epoch": 7.906137184115524, "grad_norm": 5.835244655609131, "learning_rate": 0.00019736242128913625, "loss": 7.5532, "step": 65700 }, { "epoch": 7.90734055354994, "grad_norm": 3.51247239112854, "learning_rate": 0.00019736155324559548, "loss": 7.5576, "step": 65710 }, { "epoch": 7.908543922984356, "grad_norm": 2.271472215652466, "learning_rate": 0.00019736068506114848, "loss": 7.5475, "step": 65720 }, { "epoch": 7.909747292418773, "grad_norm": 5.35723876953125, "learning_rate": 0.00019735981673579654, "loss": 7.57, "step": 65730 }, { "epoch": 7.910950661853189, "grad_norm": 4.704807758331299, "learning_rate": 0.00019735894826954092, "loss": 7.6706, "step": 65740 }, { "epoch": 7.9121540312876055, "grad_norm": 1.3137952089309692, "learning_rate": 0.00019735807966238285, "loss": 7.5887, "step": 65750 }, { "epoch": 7.913357400722021, "grad_norm": 2.429102897644043, "learning_rate": 0.00019735721091432363, "loss": 7.5469, "step": 65760 }, { "epoch": 7.914560770156438, "grad_norm": 2.188586950302124, "learning_rate": 0.00019735634202536445, "loss": 7.5864, "step": 65770 }, { "epoch": 7.915764139590855, "grad_norm": 2.7870771884918213, "learning_rate": 0.00019735547299550664, "loss": 7.6143, "step": 65780 }, { "epoch": 7.9169675090252705, "grad_norm": 2.501793384552002, "learning_rate": 0.00019735460382475138, "loss": 7.5295, "step": 65790 }, { "epoch": 7.918170878459687, "grad_norm": 2.051754951477051, "learning_rate": 0.00019735373451310002, "loss": 7.6133, "step": 65800 }, { "epoch": 7.919374247894104, "grad_norm": 0.946588397026062, "learning_rate": 0.00019735286506055374, "loss": 7.5772, "step": 65810 }, { "epoch": 7.92057761732852, "grad_norm": 4.108164310455322, "learning_rate": 0.00019735199546711387, "loss": 7.5376, "step": 65820 }, { "epoch": 7.921780986762936, "grad_norm": 1.4005546569824219, "learning_rate": 0.0001973511257327816, "loss": 7.5352, "step": 65830 }, { "epoch": 7.922984356197353, "grad_norm": 2.1767354011535645, "learning_rate": 0.00019735025585755823, "loss": 7.606, "step": 65840 }, { "epoch": 7.924187725631769, "grad_norm": 2.0439133644104004, "learning_rate": 0.000197349385841445, "loss": 7.4631, "step": 65850 }, { "epoch": 7.925391095066185, "grad_norm": 0.9920651316642761, "learning_rate": 0.0001973485156844431, "loss": 7.5778, "step": 65860 }, { "epoch": 7.926594464500601, "grad_norm": 1.7963697910308838, "learning_rate": 0.00019734764538655394, "loss": 7.47, "step": 65870 }, { "epoch": 7.927797833935018, "grad_norm": 1.4925529956817627, "learning_rate": 0.00019734677494777868, "loss": 7.5808, "step": 65880 }, { "epoch": 7.929001203369435, "grad_norm": 1.299932599067688, "learning_rate": 0.00019734590436811864, "loss": 7.5486, "step": 65890 }, { "epoch": 7.93020457280385, "grad_norm": 2.4741785526275635, "learning_rate": 0.00019734503364757502, "loss": 7.5412, "step": 65900 }, { "epoch": 7.931407942238267, "grad_norm": 2.414104700088501, "learning_rate": 0.00019734416278614908, "loss": 7.6205, "step": 65910 }, { "epoch": 7.932611311672684, "grad_norm": 4.809906005859375, "learning_rate": 0.00019734329178384212, "loss": 7.6088, "step": 65920 }, { "epoch": 7.9338146811070995, "grad_norm": 2.123011350631714, "learning_rate": 0.00019734242064065537, "loss": 7.4841, "step": 65930 }, { "epoch": 7.935018050541516, "grad_norm": 1.3958371877670288, "learning_rate": 0.00019734154935659013, "loss": 7.6147, "step": 65940 }, { "epoch": 7.936221419975933, "grad_norm": 3.714866876602173, "learning_rate": 0.00019734067793164762, "loss": 7.5309, "step": 65950 }, { "epoch": 7.937424789410349, "grad_norm": 2.742020845413208, "learning_rate": 0.0001973398063658291, "loss": 7.4426, "step": 65960 }, { "epoch": 7.938628158844765, "grad_norm": 7.133040428161621, "learning_rate": 0.0001973389346591359, "loss": 7.4584, "step": 65970 }, { "epoch": 7.939831528279182, "grad_norm": 2.097890853881836, "learning_rate": 0.00019733806281156918, "loss": 7.5057, "step": 65980 }, { "epoch": 7.941034897713598, "grad_norm": 45.25676345825195, "learning_rate": 0.00019733719082313026, "loss": 7.3778, "step": 65990 }, { "epoch": 7.9422382671480145, "grad_norm": 1.7738686800003052, "learning_rate": 0.0001973363186938204, "loss": 7.5804, "step": 66000 }, { "epoch": 7.943441636582431, "grad_norm": 6.599555015563965, "learning_rate": 0.00019733544642364085, "loss": 7.551, "step": 66010 }, { "epoch": 7.944645006016847, "grad_norm": 19.756610870361328, "learning_rate": 0.0001973345740125929, "loss": 7.572, "step": 66020 }, { "epoch": 7.945848375451264, "grad_norm": 5.177730083465576, "learning_rate": 0.00019733370146067777, "loss": 7.5903, "step": 66030 }, { "epoch": 7.94705174488568, "grad_norm": 2.7433736324310303, "learning_rate": 0.00019733282876789674, "loss": 7.5059, "step": 66040 }, { "epoch": 7.948255114320096, "grad_norm": 11.886504173278809, "learning_rate": 0.00019733195593425108, "loss": 7.4819, "step": 66050 }, { "epoch": 7.949458483754513, "grad_norm": 2.1322600841522217, "learning_rate": 0.00019733108295974207, "loss": 7.6008, "step": 66060 }, { "epoch": 7.9506618531889295, "grad_norm": 18.446819305419922, "learning_rate": 0.00019733020984437092, "loss": 7.6633, "step": 66070 }, { "epoch": 7.951865222623345, "grad_norm": 2.9190242290496826, "learning_rate": 0.00019732933658813895, "loss": 7.526, "step": 66080 }, { "epoch": 7.953068592057762, "grad_norm": 6.180509090423584, "learning_rate": 0.0001973284631910474, "loss": 7.5624, "step": 66090 }, { "epoch": 7.954271961492178, "grad_norm": 3.771601676940918, "learning_rate": 0.00019732758965309752, "loss": 7.5549, "step": 66100 }, { "epoch": 7.955475330926594, "grad_norm": 5.095956325531006, "learning_rate": 0.0001973267159742906, "loss": 7.6696, "step": 66110 }, { "epoch": 7.956678700361011, "grad_norm": 3.6988613605499268, "learning_rate": 0.0001973258421546279, "loss": 7.5666, "step": 66120 }, { "epoch": 7.957882069795427, "grad_norm": 3.2755191326141357, "learning_rate": 0.00019732496819411063, "loss": 7.5828, "step": 66130 }, { "epoch": 7.959085439229844, "grad_norm": 4.143715858459473, "learning_rate": 0.00019732409409274016, "loss": 7.5146, "step": 66140 }, { "epoch": 7.96028880866426, "grad_norm": 1.505613923072815, "learning_rate": 0.00019732321985051768, "loss": 7.6425, "step": 66150 }, { "epoch": 7.961492178098676, "grad_norm": 2.0005664825439453, "learning_rate": 0.00019732234546744444, "loss": 7.5653, "step": 66160 }, { "epoch": 7.962695547533093, "grad_norm": 7.554701805114746, "learning_rate": 0.00019732147094352179, "loss": 7.4561, "step": 66170 }, { "epoch": 7.963898916967509, "grad_norm": 2.0083789825439453, "learning_rate": 0.0001973205962787509, "loss": 7.6, "step": 66180 }, { "epoch": 7.965102286401925, "grad_norm": 3.126695156097412, "learning_rate": 0.00019731972147313308, "loss": 7.581, "step": 66190 }, { "epoch": 7.966305655836342, "grad_norm": 2.4581592082977295, "learning_rate": 0.00019731884652666963, "loss": 7.6485, "step": 66200 }, { "epoch": 7.967509025270758, "grad_norm": 3.34667706489563, "learning_rate": 0.00019731797143936178, "loss": 7.4706, "step": 66210 }, { "epoch": 7.968712394705174, "grad_norm": 9.819497108459473, "learning_rate": 0.00019731709621121076, "loss": 7.4327, "step": 66220 }, { "epoch": 7.969915764139591, "grad_norm": 256.25848388671875, "learning_rate": 0.0001973162208422179, "loss": 7.5515, "step": 66230 }, { "epoch": 7.971119133574007, "grad_norm": 4.266899585723877, "learning_rate": 0.00019731534533238442, "loss": 7.4488, "step": 66240 }, { "epoch": 7.9723225030084235, "grad_norm": 3.634145975112915, "learning_rate": 0.00019731446968171166, "loss": 7.6172, "step": 66250 }, { "epoch": 7.97352587244284, "grad_norm": 1.9419814348220825, "learning_rate": 0.00019731359389020078, "loss": 7.5243, "step": 66260 }, { "epoch": 7.974729241877256, "grad_norm": 2.2661983966827393, "learning_rate": 0.00019731271795785316, "loss": 7.5668, "step": 66270 }, { "epoch": 7.975932611311673, "grad_norm": 5.704434394836426, "learning_rate": 0.00019731184188466995, "loss": 7.5363, "step": 66280 }, { "epoch": 7.977135980746089, "grad_norm": 3.7606465816497803, "learning_rate": 0.00019731096567065254, "loss": 7.5769, "step": 66290 }, { "epoch": 7.978339350180505, "grad_norm": 3.810349464416504, "learning_rate": 0.0001973100893158021, "loss": 7.5451, "step": 66300 }, { "epoch": 7.979542719614922, "grad_norm": 3.087070941925049, "learning_rate": 0.00019730921282011993, "loss": 7.6326, "step": 66310 }, { "epoch": 7.9807460890493385, "grad_norm": 2.7472946643829346, "learning_rate": 0.0001973083361836073, "loss": 7.6323, "step": 66320 }, { "epoch": 7.981949458483754, "grad_norm": 4.732410907745361, "learning_rate": 0.00019730745940626553, "loss": 7.573, "step": 66330 }, { "epoch": 7.983152827918171, "grad_norm": 3.2656383514404297, "learning_rate": 0.0001973065824880958, "loss": 7.5678, "step": 66340 }, { "epoch": 7.984356197352588, "grad_norm": 2.0426857471466064, "learning_rate": 0.00019730570542909943, "loss": 7.5306, "step": 66350 }, { "epoch": 7.985559566787003, "grad_norm": 5.330846786499023, "learning_rate": 0.00019730482822927768, "loss": 7.6089, "step": 66360 }, { "epoch": 7.98676293622142, "grad_norm": 2.1498169898986816, "learning_rate": 0.0001973039508886318, "loss": 7.5299, "step": 66370 }, { "epoch": 7.987966305655837, "grad_norm": 2.945660352706909, "learning_rate": 0.00019730307340716312, "loss": 7.5255, "step": 66380 }, { "epoch": 7.9891696750902526, "grad_norm": 2.038069725036621, "learning_rate": 0.00019730219578487285, "loss": 7.6004, "step": 66390 }, { "epoch": 7.990373044524669, "grad_norm": 2.870570659637451, "learning_rate": 0.00019730131802176229, "loss": 7.5769, "step": 66400 }, { "epoch": 7.991576413959086, "grad_norm": 4.0290985107421875, "learning_rate": 0.0001973004401178327, "loss": 7.5337, "step": 66410 }, { "epoch": 7.992779783393502, "grad_norm": 5.626413822174072, "learning_rate": 0.0001972995620730853, "loss": 7.591, "step": 66420 }, { "epoch": 7.993983152827918, "grad_norm": 2.0646297931671143, "learning_rate": 0.00019729868388752148, "loss": 7.5542, "step": 66430 }, { "epoch": 7.995186522262334, "grad_norm": 2.8770172595977783, "learning_rate": 0.0001972978055611424, "loss": 7.441, "step": 66440 }, { "epoch": 7.996389891696751, "grad_norm": 2.1362249851226807, "learning_rate": 0.0001972969270939494, "loss": 7.5164, "step": 66450 }, { "epoch": 7.9975932611311675, "grad_norm": 1.335581660270691, "learning_rate": 0.0001972960484859437, "loss": 7.6045, "step": 66460 }, { "epoch": 7.998796630565583, "grad_norm": 3.3891594409942627, "learning_rate": 0.00019729516973712662, "loss": 7.5193, "step": 66470 }, { "epoch": 8.0, "grad_norm": 3.9859588146209717, "learning_rate": 0.0001972942908474994, "loss": 7.4777, "step": 66480 }, { "epoch": 8.0, "eval_loss": 7.555968284606934, "eval_runtime": 119.8679, "eval_samples_per_second": 61.626, "eval_steps_per_second": 7.708, "step": 66480 }, { "epoch": 8.001203369434416, "grad_norm": 1.7889467477798462, "learning_rate": 0.0001972934118170633, "loss": 7.4666, "step": 66490 }, { "epoch": 8.002406738868833, "grad_norm": 2.1631128787994385, "learning_rate": 0.00019729253264581963, "loss": 7.6277, "step": 66500 }, { "epoch": 8.00361010830325, "grad_norm": 2.403017282485962, "learning_rate": 0.00019729165333376964, "loss": 7.5817, "step": 66510 }, { "epoch": 8.004813477737665, "grad_norm": 4.499189376831055, "learning_rate": 0.00019729077388091462, "loss": 7.4501, "step": 66520 }, { "epoch": 8.006016847172083, "grad_norm": 2.48382306098938, "learning_rate": 0.0001972898942872558, "loss": 7.5931, "step": 66530 }, { "epoch": 8.007220216606498, "grad_norm": 1.6500462293624878, "learning_rate": 0.00019728901455279453, "loss": 7.587, "step": 66540 }, { "epoch": 8.008423586040914, "grad_norm": 3.5017309188842773, "learning_rate": 0.000197288134677532, "loss": 7.5509, "step": 66550 }, { "epoch": 8.009626955475332, "grad_norm": 2.3547470569610596, "learning_rate": 0.00019728725466146952, "loss": 7.4834, "step": 66560 }, { "epoch": 8.010830324909747, "grad_norm": 1.6275417804718018, "learning_rate": 0.0001972863745046084, "loss": 7.4582, "step": 66570 }, { "epoch": 8.012033694344163, "grad_norm": 1.9797002077102661, "learning_rate": 0.00019728549420694984, "loss": 7.5016, "step": 66580 }, { "epoch": 8.01323706377858, "grad_norm": 3.043384075164795, "learning_rate": 0.00019728461376849517, "loss": 7.5484, "step": 66590 }, { "epoch": 8.014440433212997, "grad_norm": 2.2602577209472656, "learning_rate": 0.0001972837331892456, "loss": 7.592, "step": 66600 }, { "epoch": 8.015643802647412, "grad_norm": 4.684236526489258, "learning_rate": 0.00019728285246920247, "loss": 7.5489, "step": 66610 }, { "epoch": 8.01684717208183, "grad_norm": 1.8694789409637451, "learning_rate": 0.00019728197160836708, "loss": 7.6464, "step": 66620 }, { "epoch": 8.018050541516246, "grad_norm": 3.5975682735443115, "learning_rate": 0.0001972810906067406, "loss": 7.6482, "step": 66630 }, { "epoch": 8.019253910950662, "grad_norm": 1.554791808128357, "learning_rate": 0.00019728020946432442, "loss": 7.5689, "step": 66640 }, { "epoch": 8.020457280385079, "grad_norm": 3.4765594005584717, "learning_rate": 0.00019727932818111975, "loss": 7.5757, "step": 66650 }, { "epoch": 8.021660649819495, "grad_norm": 1.4756439924240112, "learning_rate": 0.00019727844675712782, "loss": 7.4566, "step": 66660 }, { "epoch": 8.02286401925391, "grad_norm": 2.2486424446105957, "learning_rate": 0.00019727756519235002, "loss": 7.6053, "step": 66670 }, { "epoch": 8.024067388688326, "grad_norm": 3.7347216606140137, "learning_rate": 0.0001972766834867875, "loss": 7.6275, "step": 66680 }, { "epoch": 8.025270758122744, "grad_norm": 1.8567224740982056, "learning_rate": 0.00019727580164044165, "loss": 7.4503, "step": 66690 }, { "epoch": 8.02647412755716, "grad_norm": 1.1227103471755981, "learning_rate": 0.0001972749196533137, "loss": 7.5952, "step": 66700 }, { "epoch": 8.027677496991576, "grad_norm": 3.2187764644622803, "learning_rate": 0.00019727403752540493, "loss": 7.5426, "step": 66710 }, { "epoch": 8.028880866425993, "grad_norm": 4.35701322555542, "learning_rate": 0.0001972731552567166, "loss": 7.5884, "step": 66720 }, { "epoch": 8.030084235860409, "grad_norm": 3.640974283218384, "learning_rate": 0.00019727227284725, "loss": 7.5374, "step": 66730 }, { "epoch": 8.031287605294825, "grad_norm": 1.4204896688461304, "learning_rate": 0.00019727139029700639, "loss": 7.6199, "step": 66740 }, { "epoch": 8.032490974729242, "grad_norm": 1.9910739660263062, "learning_rate": 0.0001972705076059871, "loss": 7.3852, "step": 66750 }, { "epoch": 8.033694344163658, "grad_norm": 1.7718479633331299, "learning_rate": 0.00019726962477419332, "loss": 7.5771, "step": 66760 }, { "epoch": 8.034897713598074, "grad_norm": 1.0193895101547241, "learning_rate": 0.0001972687418016264, "loss": 7.5511, "step": 66770 }, { "epoch": 8.036101083032491, "grad_norm": 3.0317351818084717, "learning_rate": 0.0001972678586882876, "loss": 7.5749, "step": 66780 }, { "epoch": 8.037304452466907, "grad_norm": 2.6694209575653076, "learning_rate": 0.00019726697543417818, "loss": 7.5776, "step": 66790 }, { "epoch": 8.038507821901323, "grad_norm": 1.310793161392212, "learning_rate": 0.00019726609203929947, "loss": 7.6521, "step": 66800 }, { "epoch": 8.03971119133574, "grad_norm": 2.222972869873047, "learning_rate": 0.00019726520850365267, "loss": 7.5005, "step": 66810 }, { "epoch": 8.040914560770156, "grad_norm": 1.9121289253234863, "learning_rate": 0.0001972643248272391, "loss": 7.5079, "step": 66820 }, { "epoch": 8.042117930204572, "grad_norm": 4.8868584632873535, "learning_rate": 0.00019726344101006005, "loss": 7.5309, "step": 66830 }, { "epoch": 8.04332129963899, "grad_norm": 1.1957162618637085, "learning_rate": 0.0001972625570521168, "loss": 7.5178, "step": 66840 }, { "epoch": 8.044524669073406, "grad_norm": 2.1886768341064453, "learning_rate": 0.00019726167295341061, "loss": 7.5092, "step": 66850 }, { "epoch": 8.045728038507821, "grad_norm": 2.4477555751800537, "learning_rate": 0.00019726078871394275, "loss": 7.4921, "step": 66860 }, { "epoch": 8.046931407942239, "grad_norm": 1.644256591796875, "learning_rate": 0.00019725990433371452, "loss": 7.499, "step": 66870 }, { "epoch": 8.048134777376655, "grad_norm": 2.111665964126587, "learning_rate": 0.00019725901981272723, "loss": 7.5616, "step": 66880 }, { "epoch": 8.04933814681107, "grad_norm": 1.5597729682922363, "learning_rate": 0.00019725813515098208, "loss": 7.5732, "step": 66890 }, { "epoch": 8.050541516245488, "grad_norm": 5.364047050476074, "learning_rate": 0.00019725725034848042, "loss": 7.5655, "step": 66900 }, { "epoch": 8.051744885679904, "grad_norm": 2.172454833984375, "learning_rate": 0.00019725636540522353, "loss": 7.5581, "step": 66910 }, { "epoch": 8.05294825511432, "grad_norm": 1.8547550439834595, "learning_rate": 0.00019725548032121262, "loss": 7.5491, "step": 66920 }, { "epoch": 8.054151624548737, "grad_norm": 1.9544776678085327, "learning_rate": 0.00019725459509644904, "loss": 7.5166, "step": 66930 }, { "epoch": 8.055354993983153, "grad_norm": 3.6576926708221436, "learning_rate": 0.00019725370973093403, "loss": 7.5342, "step": 66940 }, { "epoch": 8.056558363417569, "grad_norm": 4.2791428565979, "learning_rate": 0.0001972528242246689, "loss": 7.5077, "step": 66950 }, { "epoch": 8.057761732851986, "grad_norm": 3.432894229888916, "learning_rate": 0.00019725193857765495, "loss": 7.609, "step": 66960 }, { "epoch": 8.058965102286402, "grad_norm": 3.0376970767974854, "learning_rate": 0.00019725105278989343, "loss": 7.5796, "step": 66970 }, { "epoch": 8.060168471720818, "grad_norm": 1.7038400173187256, "learning_rate": 0.0001972501668613856, "loss": 7.5954, "step": 66980 }, { "epoch": 8.061371841155236, "grad_norm": 2.2276458740234375, "learning_rate": 0.00019724928079213277, "loss": 7.4984, "step": 66990 }, { "epoch": 8.062575210589651, "grad_norm": 1.1704379320144653, "learning_rate": 0.00019724839458213624, "loss": 7.5318, "step": 67000 }, { "epoch": 8.063778580024067, "grad_norm": 3.316469430923462, "learning_rate": 0.00019724750823139727, "loss": 7.5661, "step": 67010 }, { "epoch": 8.064981949458483, "grad_norm": 2.5439579486846924, "learning_rate": 0.00019724662173991716, "loss": 7.5363, "step": 67020 }, { "epoch": 8.0661853188929, "grad_norm": 3.5956685543060303, "learning_rate": 0.00019724573510769713, "loss": 7.5097, "step": 67030 }, { "epoch": 8.067388688327316, "grad_norm": 3.3855502605438232, "learning_rate": 0.00019724484833473854, "loss": 7.5059, "step": 67040 }, { "epoch": 8.068592057761732, "grad_norm": 2.211360454559326, "learning_rate": 0.00019724396142104265, "loss": 7.5294, "step": 67050 }, { "epoch": 8.06979542719615, "grad_norm": 2.3276491165161133, "learning_rate": 0.0001972430743666107, "loss": 7.6116, "step": 67060 }, { "epoch": 8.070998796630565, "grad_norm": 1.7261228561401367, "learning_rate": 0.0001972421871714441, "loss": 7.5458, "step": 67070 }, { "epoch": 8.072202166064981, "grad_norm": 4.647141933441162, "learning_rate": 0.00019724129983554397, "loss": 7.6452, "step": 67080 }, { "epoch": 8.073405535499399, "grad_norm": 3.790757179260254, "learning_rate": 0.0001972404123589117, "loss": 7.5392, "step": 67090 }, { "epoch": 8.074608904933815, "grad_norm": 3.0562901496887207, "learning_rate": 0.00019723952474154854, "loss": 7.5011, "step": 67100 }, { "epoch": 8.07581227436823, "grad_norm": 3.311819314956665, "learning_rate": 0.0001972386369834558, "loss": 7.6471, "step": 67110 }, { "epoch": 8.077015643802648, "grad_norm": 2.8634488582611084, "learning_rate": 0.00019723774908463472, "loss": 7.5742, "step": 67120 }, { "epoch": 8.078219013237064, "grad_norm": 5.867244243621826, "learning_rate": 0.0001972368610450866, "loss": 7.6673, "step": 67130 }, { "epoch": 8.07942238267148, "grad_norm": 1.945067048072815, "learning_rate": 0.00019723597286481278, "loss": 7.5856, "step": 67140 }, { "epoch": 8.080625752105897, "grad_norm": 3.4768807888031006, "learning_rate": 0.00019723508454381445, "loss": 7.4726, "step": 67150 }, { "epoch": 8.081829121540313, "grad_norm": 4.385050296783447, "learning_rate": 0.00019723419608209296, "loss": 7.4804, "step": 67160 }, { "epoch": 8.083032490974729, "grad_norm": 2.0109615325927734, "learning_rate": 0.0001972333074796496, "loss": 7.6148, "step": 67170 }, { "epoch": 8.084235860409146, "grad_norm": 4.329806804656982, "learning_rate": 0.0001972324187364856, "loss": 7.5095, "step": 67180 }, { "epoch": 8.085439229843562, "grad_norm": 2.5663046836853027, "learning_rate": 0.00019723152985260233, "loss": 7.5293, "step": 67190 }, { "epoch": 8.086642599277978, "grad_norm": 3.8938119411468506, "learning_rate": 0.000197230640828001, "loss": 7.6064, "step": 67200 }, { "epoch": 8.087845968712395, "grad_norm": 3.559451103210449, "learning_rate": 0.00019722975166268296, "loss": 7.5777, "step": 67210 }, { "epoch": 8.089049338146811, "grad_norm": 4.946438789367676, "learning_rate": 0.00019722886235664944, "loss": 7.5046, "step": 67220 }, { "epoch": 8.090252707581227, "grad_norm": 4.496367454528809, "learning_rate": 0.00019722797290990177, "loss": 7.5286, "step": 67230 }, { "epoch": 8.091456077015645, "grad_norm": 2.403841257095337, "learning_rate": 0.00019722708332244116, "loss": 7.5267, "step": 67240 }, { "epoch": 8.09265944645006, "grad_norm": 3.0910139083862305, "learning_rate": 0.000197226193594269, "loss": 7.5893, "step": 67250 }, { "epoch": 8.093862815884476, "grad_norm": 2.407782793045044, "learning_rate": 0.00019722530372538657, "loss": 7.5826, "step": 67260 }, { "epoch": 8.095066185318894, "grad_norm": 4.66002082824707, "learning_rate": 0.00019722441371579507, "loss": 7.5305, "step": 67270 }, { "epoch": 8.09626955475331, "grad_norm": 8.19322395324707, "learning_rate": 0.00019722352356549584, "loss": 7.6439, "step": 67280 }, { "epoch": 8.097472924187725, "grad_norm": 2.7666609287261963, "learning_rate": 0.0001972226332744902, "loss": 7.5423, "step": 67290 }, { "epoch": 8.098676293622143, "grad_norm": 2.804218053817749, "learning_rate": 0.0001972217428427794, "loss": 7.569, "step": 67300 }, { "epoch": 8.099879663056559, "grad_norm": 3.691422462463379, "learning_rate": 0.0001972208522703647, "loss": 7.6215, "step": 67310 }, { "epoch": 8.101083032490974, "grad_norm": 2.050039768218994, "learning_rate": 0.00019721996155724745, "loss": 7.522, "step": 67320 }, { "epoch": 8.102286401925392, "grad_norm": 3.7799856662750244, "learning_rate": 0.0001972190707034289, "loss": 7.5668, "step": 67330 }, { "epoch": 8.103489771359808, "grad_norm": 2.6276156902313232, "learning_rate": 0.00019721817970891036, "loss": 7.5166, "step": 67340 }, { "epoch": 8.104693140794224, "grad_norm": 1.7708622217178345, "learning_rate": 0.0001972172885736931, "loss": 7.5256, "step": 67350 }, { "epoch": 8.10589651022864, "grad_norm": 3.777482271194458, "learning_rate": 0.00019721639729777842, "loss": 7.568, "step": 67360 }, { "epoch": 8.107099879663057, "grad_norm": 3.338593006134033, "learning_rate": 0.00019721550588116765, "loss": 7.5303, "step": 67370 }, { "epoch": 8.108303249097473, "grad_norm": 2.4056456089019775, "learning_rate": 0.00019721461432386202, "loss": 7.5602, "step": 67380 }, { "epoch": 8.109506618531888, "grad_norm": 2.8567841053009033, "learning_rate": 0.00019721372262586283, "loss": 7.5319, "step": 67390 }, { "epoch": 8.110709987966306, "grad_norm": 3.788039445877075, "learning_rate": 0.00019721283078717142, "loss": 7.5456, "step": 67400 }, { "epoch": 8.111913357400722, "grad_norm": 3.1496784687042236, "learning_rate": 0.00019721193880778897, "loss": 7.5476, "step": 67410 }, { "epoch": 8.113116726835138, "grad_norm": 5.800292491912842, "learning_rate": 0.0001972110466877169, "loss": 7.4844, "step": 67420 }, { "epoch": 8.114320096269555, "grad_norm": 5.42637825012207, "learning_rate": 0.00019721015442695643, "loss": 7.5015, "step": 67430 }, { "epoch": 8.115523465703971, "grad_norm": 5.717906951904297, "learning_rate": 0.00019720926202550888, "loss": 7.6371, "step": 67440 }, { "epoch": 8.116726835138387, "grad_norm": 6.923117637634277, "learning_rate": 0.00019720836948337552, "loss": 7.5295, "step": 67450 }, { "epoch": 8.117930204572804, "grad_norm": 4.9793548583984375, "learning_rate": 0.0001972074768005576, "loss": 7.5399, "step": 67460 }, { "epoch": 8.11913357400722, "grad_norm": 2.6926350593566895, "learning_rate": 0.00019720658397705653, "loss": 7.4808, "step": 67470 }, { "epoch": 8.120336943441636, "grad_norm": 4.779403209686279, "learning_rate": 0.0001972056910128735, "loss": 7.5939, "step": 67480 }, { "epoch": 8.121540312876053, "grad_norm": 5.075592517852783, "learning_rate": 0.00019720479790800985, "loss": 7.5442, "step": 67490 }, { "epoch": 8.12274368231047, "grad_norm": 3.9302806854248047, "learning_rate": 0.00019720390466246686, "loss": 7.5373, "step": 67500 }, { "epoch": 8.123947051744885, "grad_norm": 4.288082122802734, "learning_rate": 0.0001972030112762458, "loss": 7.4558, "step": 67510 }, { "epoch": 8.125150421179303, "grad_norm": 3.6908655166625977, "learning_rate": 0.000197202117749348, "loss": 7.5385, "step": 67520 }, { "epoch": 8.126353790613718, "grad_norm": 5.126953125, "learning_rate": 0.00019720122408177474, "loss": 7.5608, "step": 67530 }, { "epoch": 8.127557160048134, "grad_norm": 74.11157989501953, "learning_rate": 0.0001972003302735273, "loss": 7.5073, "step": 67540 }, { "epoch": 8.128760529482552, "grad_norm": 3.187445640563965, "learning_rate": 0.000197199436324607, "loss": 7.6169, "step": 67550 }, { "epoch": 8.129963898916968, "grad_norm": 10.13817310333252, "learning_rate": 0.0001971985422350151, "loss": 7.5623, "step": 67560 }, { "epoch": 8.131167268351383, "grad_norm": 3.3027138710021973, "learning_rate": 0.00019719764800475291, "loss": 7.4782, "step": 67570 }, { "epoch": 8.132370637785801, "grad_norm": 7.6401448249816895, "learning_rate": 0.00019719675363382176, "loss": 7.5925, "step": 67580 }, { "epoch": 8.133574007220217, "grad_norm": 6.250296115875244, "learning_rate": 0.00019719585912222288, "loss": 7.537, "step": 67590 }, { "epoch": 8.134777376654633, "grad_norm": 4.511796474456787, "learning_rate": 0.0001971949644699576, "loss": 7.5018, "step": 67600 }, { "epoch": 8.13598074608905, "grad_norm": 5.038487911224365, "learning_rate": 0.0001971940696770272, "loss": 7.5311, "step": 67610 }, { "epoch": 8.137184115523466, "grad_norm": 2.808227062225342, "learning_rate": 0.00019719317474343301, "loss": 7.6221, "step": 67620 }, { "epoch": 8.138387484957882, "grad_norm": 6.342968940734863, "learning_rate": 0.00019719227966917625, "loss": 7.5024, "step": 67630 }, { "epoch": 8.1395908543923, "grad_norm": 4.256033420562744, "learning_rate": 0.00019719138445425832, "loss": 7.5954, "step": 67640 }, { "epoch": 8.140794223826715, "grad_norm": 2.9034934043884277, "learning_rate": 0.00019719048909868045, "loss": 7.6146, "step": 67650 }, { "epoch": 8.14199759326113, "grad_norm": 4.3751702308654785, "learning_rate": 0.00019718959360244393, "loss": 7.4616, "step": 67660 }, { "epoch": 8.143200962695548, "grad_norm": 4.708079814910889, "learning_rate": 0.00019718869796555007, "loss": 7.5555, "step": 67670 }, { "epoch": 8.144404332129964, "grad_norm": 10.061758995056152, "learning_rate": 0.0001971878021880002, "loss": 7.5387, "step": 67680 }, { "epoch": 8.14560770156438, "grad_norm": 3.6461355686187744, "learning_rate": 0.00019718690626979556, "loss": 7.5192, "step": 67690 }, { "epoch": 8.146811070998796, "grad_norm": 2.372333526611328, "learning_rate": 0.00019718601021093748, "loss": 7.5778, "step": 67700 }, { "epoch": 8.148014440433213, "grad_norm": 3.2085626125335693, "learning_rate": 0.00019718511401142723, "loss": 7.6006, "step": 67710 }, { "epoch": 8.14921780986763, "grad_norm": 3.4295706748962402, "learning_rate": 0.00019718421767126614, "loss": 7.5677, "step": 67720 }, { "epoch": 8.150421179302045, "grad_norm": 4.437617778778076, "learning_rate": 0.0001971833211904555, "loss": 7.5051, "step": 67730 }, { "epoch": 8.151624548736462, "grad_norm": 3.899390935897827, "learning_rate": 0.00019718242456899662, "loss": 7.4893, "step": 67740 }, { "epoch": 8.152827918170878, "grad_norm": 4.297066688537598, "learning_rate": 0.00019718152780689074, "loss": 7.5436, "step": 67750 }, { "epoch": 8.154031287605294, "grad_norm": 4.00190544128418, "learning_rate": 0.0001971806309041392, "loss": 7.5472, "step": 67760 }, { "epoch": 8.155234657039712, "grad_norm": 3.7725093364715576, "learning_rate": 0.00019717973386074332, "loss": 7.4599, "step": 67770 }, { "epoch": 8.156438026474127, "grad_norm": 4.101502895355225, "learning_rate": 0.00019717883667670435, "loss": 7.4917, "step": 67780 }, { "epoch": 8.157641395908543, "grad_norm": 4.189589023590088, "learning_rate": 0.00019717793935202361, "loss": 7.5229, "step": 67790 }, { "epoch": 8.15884476534296, "grad_norm": 4.835085868835449, "learning_rate": 0.0001971770418867024, "loss": 7.5586, "step": 67800 }, { "epoch": 8.160048134777377, "grad_norm": 3.7349352836608887, "learning_rate": 0.00019717614428074203, "loss": 7.5528, "step": 67810 }, { "epoch": 8.161251504211792, "grad_norm": 12.529157638549805, "learning_rate": 0.00019717524653414378, "loss": 7.5559, "step": 67820 }, { "epoch": 8.16245487364621, "grad_norm": 6.1256279945373535, "learning_rate": 0.00019717434864690897, "loss": 7.5865, "step": 67830 }, { "epoch": 8.163658243080626, "grad_norm": 2.5910558700561523, "learning_rate": 0.00019717345061903886, "loss": 7.5505, "step": 67840 }, { "epoch": 8.164861612515042, "grad_norm": 9.59126091003418, "learning_rate": 0.0001971725524505348, "loss": 7.6519, "step": 67850 }, { "epoch": 8.166064981949459, "grad_norm": 2.7174177169799805, "learning_rate": 0.00019717165414139803, "loss": 7.6099, "step": 67860 }, { "epoch": 8.167268351383875, "grad_norm": 2.9353551864624023, "learning_rate": 0.00019717075569162992, "loss": 7.5707, "step": 67870 }, { "epoch": 8.16847172081829, "grad_norm": 2.3061249256134033, "learning_rate": 0.00019716985710123173, "loss": 7.4742, "step": 67880 }, { "epoch": 8.169675090252708, "grad_norm": 2.0400211811065674, "learning_rate": 0.00019716895837020476, "loss": 7.4908, "step": 67890 }, { "epoch": 8.170878459687124, "grad_norm": 5.309356689453125, "learning_rate": 0.00019716805949855033, "loss": 7.5348, "step": 67900 }, { "epoch": 8.17208182912154, "grad_norm": 2.3623602390289307, "learning_rate": 0.00019716716048626972, "loss": 7.5545, "step": 67910 }, { "epoch": 8.173285198555957, "grad_norm": 2.786099910736084, "learning_rate": 0.0001971662613333642, "loss": 7.5822, "step": 67920 }, { "epoch": 8.174488567990373, "grad_norm": 2.34267258644104, "learning_rate": 0.0001971653620398352, "loss": 7.5849, "step": 67930 }, { "epoch": 8.175691937424789, "grad_norm": 1.97996187210083, "learning_rate": 0.00019716446260568385, "loss": 7.6152, "step": 67940 }, { "epoch": 8.176895306859207, "grad_norm": 2.22361421585083, "learning_rate": 0.00019716356303091156, "loss": 7.65, "step": 67950 }, { "epoch": 8.178098676293622, "grad_norm": 3.01961088180542, "learning_rate": 0.0001971626633155196, "loss": 7.617, "step": 67960 }, { "epoch": 8.179302045728038, "grad_norm": 2.3614389896392822, "learning_rate": 0.00019716176345950927, "loss": 7.5211, "step": 67970 }, { "epoch": 8.180505415162456, "grad_norm": 2.727503538131714, "learning_rate": 0.0001971608634628819, "loss": 7.5328, "step": 67980 }, { "epoch": 8.181708784596871, "grad_norm": 3.683574914932251, "learning_rate": 0.00019715996332563877, "loss": 7.6351, "step": 67990 }, { "epoch": 8.182912154031287, "grad_norm": 0.9345389604568481, "learning_rate": 0.0001971590630477812, "loss": 7.6196, "step": 68000 }, { "epoch": 8.184115523465705, "grad_norm": 1.5343817472457886, "learning_rate": 0.00019715816262931045, "loss": 7.5788, "step": 68010 }, { "epoch": 8.18531889290012, "grad_norm": 1.9073002338409424, "learning_rate": 0.00019715726207022787, "loss": 7.5732, "step": 68020 }, { "epoch": 8.186522262334536, "grad_norm": 3.421137809753418, "learning_rate": 0.00019715636137053471, "loss": 7.5483, "step": 68030 }, { "epoch": 8.187725631768952, "grad_norm": 2.6363823413848877, "learning_rate": 0.00019715546053023232, "loss": 7.5479, "step": 68040 }, { "epoch": 8.18892900120337, "grad_norm": 2.0040910243988037, "learning_rate": 0.00019715455954932203, "loss": 7.5816, "step": 68050 }, { "epoch": 8.190132370637786, "grad_norm": 1.6898119449615479, "learning_rate": 0.00019715365842780508, "loss": 7.5438, "step": 68060 }, { "epoch": 8.191335740072201, "grad_norm": 3.1103663444519043, "learning_rate": 0.0001971527571656828, "loss": 7.52, "step": 68070 }, { "epoch": 8.192539109506619, "grad_norm": 3.5032927989959717, "learning_rate": 0.0001971518557629565, "loss": 7.5365, "step": 68080 }, { "epoch": 8.193742478941035, "grad_norm": 2.230195999145508, "learning_rate": 0.00019715095421962746, "loss": 7.4769, "step": 68090 }, { "epoch": 8.19494584837545, "grad_norm": 5.8746161460876465, "learning_rate": 0.00019715005253569703, "loss": 7.6013, "step": 68100 }, { "epoch": 8.196149217809868, "grad_norm": 1.3629354238510132, "learning_rate": 0.00019714915071116645, "loss": 7.5615, "step": 68110 }, { "epoch": 8.197352587244284, "grad_norm": 7.240510940551758, "learning_rate": 0.0001971482487460371, "loss": 7.5066, "step": 68120 }, { "epoch": 8.1985559566787, "grad_norm": 1.6762794256210327, "learning_rate": 0.00019714734664031026, "loss": 7.5589, "step": 68130 }, { "epoch": 8.199759326113117, "grad_norm": 1.8678590059280396, "learning_rate": 0.00019714644439398717, "loss": 7.4935, "step": 68140 }, { "epoch": 8.200962695547533, "grad_norm": 3.108381509780884, "learning_rate": 0.00019714554200706921, "loss": 7.5725, "step": 68150 }, { "epoch": 8.202166064981949, "grad_norm": 3.1506729125976562, "learning_rate": 0.0001971446394795577, "loss": 7.5115, "step": 68160 }, { "epoch": 8.203369434416366, "grad_norm": 3.2303717136383057, "learning_rate": 0.00019714373681145388, "loss": 7.5672, "step": 68170 }, { "epoch": 8.204572803850782, "grad_norm": 3.26997971534729, "learning_rate": 0.0001971428340027591, "loss": 7.4824, "step": 68180 }, { "epoch": 8.205776173285198, "grad_norm": 1.6657923460006714, "learning_rate": 0.00019714193105347468, "loss": 7.5582, "step": 68190 }, { "epoch": 8.206979542719615, "grad_norm": 1.4390580654144287, "learning_rate": 0.00019714102796360185, "loss": 7.5975, "step": 68200 }, { "epoch": 8.208182912154031, "grad_norm": 2.1655938625335693, "learning_rate": 0.000197140124733142, "loss": 7.5292, "step": 68210 }, { "epoch": 8.209386281588447, "grad_norm": 1.6513818502426147, "learning_rate": 0.00019713922136209642, "loss": 7.5303, "step": 68220 }, { "epoch": 8.210589651022865, "grad_norm": 1.6579651832580566, "learning_rate": 0.0001971383178504664, "loss": 7.5422, "step": 68230 }, { "epoch": 8.21179302045728, "grad_norm": 1.0994912385940552, "learning_rate": 0.0001971374141982532, "loss": 7.5492, "step": 68240 }, { "epoch": 8.212996389891696, "grad_norm": 4.455509662628174, "learning_rate": 0.00019713651040545823, "loss": 7.5579, "step": 68250 }, { "epoch": 8.214199759326114, "grad_norm": 2.238157272338867, "learning_rate": 0.00019713560647208273, "loss": 7.5954, "step": 68260 }, { "epoch": 8.21540312876053, "grad_norm": 1.090720534324646, "learning_rate": 0.00019713470239812804, "loss": 7.4525, "step": 68270 }, { "epoch": 8.216606498194945, "grad_norm": 3.328411340713501, "learning_rate": 0.00019713379818359544, "loss": 7.5188, "step": 68280 }, { "epoch": 8.217809867629363, "grad_norm": 1.9475572109222412, "learning_rate": 0.00019713289382848625, "loss": 7.6525, "step": 68290 }, { "epoch": 8.219013237063779, "grad_norm": 1.8784102201461792, "learning_rate": 0.0001971319893328018, "loss": 7.5081, "step": 68300 }, { "epoch": 8.220216606498195, "grad_norm": 2.0379552841186523, "learning_rate": 0.00019713108469654336, "loss": 7.5691, "step": 68310 }, { "epoch": 8.221419975932612, "grad_norm": 4.128599166870117, "learning_rate": 0.00019713017991971227, "loss": 7.5606, "step": 68320 }, { "epoch": 8.222623345367028, "grad_norm": 3.831167697906494, "learning_rate": 0.0001971292750023098, "loss": 7.5611, "step": 68330 }, { "epoch": 8.223826714801444, "grad_norm": 2.0155131816864014, "learning_rate": 0.00019712836994433732, "loss": 7.6011, "step": 68340 }, { "epoch": 8.225030084235861, "grad_norm": 0.9491598606109619, "learning_rate": 0.00019712746474579614, "loss": 7.6215, "step": 68350 }, { "epoch": 8.226233453670277, "grad_norm": 2.5010132789611816, "learning_rate": 0.00019712655940668747, "loss": 7.5869, "step": 68360 }, { "epoch": 8.227436823104693, "grad_norm": 4.692080974578857, "learning_rate": 0.00019712565392701273, "loss": 7.5292, "step": 68370 }, { "epoch": 8.22864019253911, "grad_norm": 7.14740514755249, "learning_rate": 0.00019712474830677317, "loss": 7.6161, "step": 68380 }, { "epoch": 8.229843561973526, "grad_norm": 2.1906774044036865, "learning_rate": 0.00019712384254597014, "loss": 7.5704, "step": 68390 }, { "epoch": 8.231046931407942, "grad_norm": 2.0275540351867676, "learning_rate": 0.00019712293664460494, "loss": 7.4773, "step": 68400 }, { "epoch": 8.232250300842358, "grad_norm": 1.672620177268982, "learning_rate": 0.00019712203060267883, "loss": 7.4938, "step": 68410 }, { "epoch": 8.233453670276775, "grad_norm": 1.1433289051055908, "learning_rate": 0.0001971211244201932, "loss": 7.6208, "step": 68420 }, { "epoch": 8.234657039711191, "grad_norm": 3.5545032024383545, "learning_rate": 0.0001971202180971493, "loss": 7.6019, "step": 68430 }, { "epoch": 8.235860409145607, "grad_norm": 3.189627170562744, "learning_rate": 0.0001971193116335485, "loss": 7.5796, "step": 68440 }, { "epoch": 8.237063778580024, "grad_norm": 1.6809436082839966, "learning_rate": 0.00019711840502939203, "loss": 7.5374, "step": 68450 }, { "epoch": 8.23826714801444, "grad_norm": 3.4670281410217285, "learning_rate": 0.00019711749828468127, "loss": 7.6031, "step": 68460 }, { "epoch": 8.239470517448856, "grad_norm": 2.8628058433532715, "learning_rate": 0.0001971165913994175, "loss": 7.5635, "step": 68470 }, { "epoch": 8.240673886883274, "grad_norm": 4.718408584594727, "learning_rate": 0.00019711568437360207, "loss": 7.5076, "step": 68480 }, { "epoch": 8.24187725631769, "grad_norm": 1.6630380153656006, "learning_rate": 0.00019711477720723628, "loss": 7.5079, "step": 68490 }, { "epoch": 8.243080625752105, "grad_norm": 5.847504138946533, "learning_rate": 0.00019711386990032138, "loss": 7.5828, "step": 68500 }, { "epoch": 8.244283995186523, "grad_norm": 2.3594069480895996, "learning_rate": 0.0001971129624528588, "loss": 7.5139, "step": 68510 }, { "epoch": 8.245487364620939, "grad_norm": 1.472467064857483, "learning_rate": 0.00019711205486484974, "loss": 7.4547, "step": 68520 }, { "epoch": 8.246690734055354, "grad_norm": 1.9103935956954956, "learning_rate": 0.00019711114713629556, "loss": 7.663, "step": 68530 }, { "epoch": 8.247894103489772, "grad_norm": 1.3938982486724854, "learning_rate": 0.0001971102392671976, "loss": 7.523, "step": 68540 }, { "epoch": 8.249097472924188, "grad_norm": 1.1023763418197632, "learning_rate": 0.0001971093312575571, "loss": 7.5299, "step": 68550 }, { "epoch": 8.250300842358604, "grad_norm": 2.892200231552124, "learning_rate": 0.00019710842310737548, "loss": 7.5397, "step": 68560 }, { "epoch": 8.251504211793021, "grad_norm": 1.197816252708435, "learning_rate": 0.00019710751481665398, "loss": 7.5108, "step": 68570 }, { "epoch": 8.252707581227437, "grad_norm": 3.8547701835632324, "learning_rate": 0.00019710660638539392, "loss": 7.5239, "step": 68580 }, { "epoch": 8.253910950661853, "grad_norm": 2.4872729778289795, "learning_rate": 0.0001971056978135966, "loss": 7.6455, "step": 68590 }, { "epoch": 8.25511432009627, "grad_norm": 4.06846809387207, "learning_rate": 0.0001971047891012634, "loss": 7.5681, "step": 68600 }, { "epoch": 8.256317689530686, "grad_norm": 2.2631430625915527, "learning_rate": 0.0001971038802483956, "loss": 7.5589, "step": 68610 }, { "epoch": 8.257521058965102, "grad_norm": 3.6923489570617676, "learning_rate": 0.00019710297125499448, "loss": 7.5747, "step": 68620 }, { "epoch": 8.25872442839952, "grad_norm": 5.725185394287109, "learning_rate": 0.00019710206212106139, "loss": 7.5944, "step": 68630 }, { "epoch": 8.259927797833935, "grad_norm": 3.2183244228363037, "learning_rate": 0.00019710115284659767, "loss": 7.5221, "step": 68640 }, { "epoch": 8.261131167268351, "grad_norm": 1.4189926385879517, "learning_rate": 0.0001971002434316046, "loss": 7.543, "step": 68650 }, { "epoch": 8.262334536702769, "grad_norm": 1.2534085512161255, "learning_rate": 0.0001970993338760835, "loss": 7.4875, "step": 68660 }, { "epoch": 8.263537906137184, "grad_norm": 2.4593253135681152, "learning_rate": 0.00019709842418003568, "loss": 7.4666, "step": 68670 }, { "epoch": 8.2647412755716, "grad_norm": 2.0745296478271484, "learning_rate": 0.0001970975143434625, "loss": 7.6271, "step": 68680 }, { "epoch": 8.265944645006018, "grad_norm": 3.8010599613189697, "learning_rate": 0.0001970966043663652, "loss": 7.5012, "step": 68690 }, { "epoch": 8.267148014440433, "grad_norm": 1.0840163230895996, "learning_rate": 0.00019709569424874513, "loss": 7.5432, "step": 68700 }, { "epoch": 8.26835138387485, "grad_norm": 1.1043710708618164, "learning_rate": 0.00019709478399060367, "loss": 7.6078, "step": 68710 }, { "epoch": 8.269554753309265, "grad_norm": 2.4659457206726074, "learning_rate": 0.00019709387359194208, "loss": 7.5378, "step": 68720 }, { "epoch": 8.270758122743683, "grad_norm": 5.003413200378418, "learning_rate": 0.00019709296305276165, "loss": 7.6184, "step": 68730 }, { "epoch": 8.271961492178098, "grad_norm": 1.0706757307052612, "learning_rate": 0.00019709205237306376, "loss": 7.4712, "step": 68740 }, { "epoch": 8.273164861612514, "grad_norm": 3.0960686206817627, "learning_rate": 0.0001970911415528497, "loss": 7.5741, "step": 68750 }, { "epoch": 8.274368231046932, "grad_norm": 1.5803383588790894, "learning_rate": 0.00019709023059212078, "loss": 7.6581, "step": 68760 }, { "epoch": 8.275571600481348, "grad_norm": 4.492832183837891, "learning_rate": 0.0001970893194908783, "loss": 7.5397, "step": 68770 }, { "epoch": 8.276774969915763, "grad_norm": 3.1308069229125977, "learning_rate": 0.00019708840824912363, "loss": 7.5709, "step": 68780 }, { "epoch": 8.277978339350181, "grad_norm": 1.3884894847869873, "learning_rate": 0.00019708749686685805, "loss": 7.513, "step": 68790 }, { "epoch": 8.279181708784597, "grad_norm": 4.107443809509277, "learning_rate": 0.0001970865853440829, "loss": 7.5936, "step": 68800 }, { "epoch": 8.280385078219012, "grad_norm": 1.3624972105026245, "learning_rate": 0.00019708567368079951, "loss": 7.577, "step": 68810 }, { "epoch": 8.28158844765343, "grad_norm": 4.004277229309082, "learning_rate": 0.00019708476187700914, "loss": 7.5543, "step": 68820 }, { "epoch": 8.282791817087846, "grad_norm": 5.335235595703125, "learning_rate": 0.00019708384993271318, "loss": 7.5473, "step": 68830 }, { "epoch": 8.283995186522262, "grad_norm": 4.383147239685059, "learning_rate": 0.00019708293784791292, "loss": 7.5184, "step": 68840 }, { "epoch": 8.28519855595668, "grad_norm": 2.059014081954956, "learning_rate": 0.00019708202562260967, "loss": 7.571, "step": 68850 }, { "epoch": 8.286401925391095, "grad_norm": 4.604721546173096, "learning_rate": 0.00019708111325680475, "loss": 7.5548, "step": 68860 }, { "epoch": 8.28760529482551, "grad_norm": 0.9158333539962769, "learning_rate": 0.00019708020075049948, "loss": 7.6148, "step": 68870 }, { "epoch": 8.288808664259928, "grad_norm": 3.3548567295074463, "learning_rate": 0.0001970792881036952, "loss": 7.59, "step": 68880 }, { "epoch": 8.290012033694344, "grad_norm": 4.503845691680908, "learning_rate": 0.00019707837531639325, "loss": 7.5678, "step": 68890 }, { "epoch": 8.29121540312876, "grad_norm": 1.3565207719802856, "learning_rate": 0.00019707746238859493, "loss": 7.5724, "step": 68900 }, { "epoch": 8.292418772563177, "grad_norm": 1.3087491989135742, "learning_rate": 0.00019707654932030153, "loss": 7.5304, "step": 68910 }, { "epoch": 8.293622141997593, "grad_norm": 4.389442443847656, "learning_rate": 0.00019707563611151438, "loss": 7.5141, "step": 68920 }, { "epoch": 8.294825511432009, "grad_norm": 3.10077166557312, "learning_rate": 0.00019707472276223484, "loss": 7.6327, "step": 68930 }, { "epoch": 8.296028880866427, "grad_norm": 1.7084767818450928, "learning_rate": 0.00019707380927246423, "loss": 7.4655, "step": 68940 }, { "epoch": 8.297232250300842, "grad_norm": 5.375579357147217, "learning_rate": 0.0001970728956422038, "loss": 7.5407, "step": 68950 }, { "epoch": 8.298435619735258, "grad_norm": 1.3260475397109985, "learning_rate": 0.00019707198187145496, "loss": 7.6192, "step": 68960 }, { "epoch": 8.299638989169676, "grad_norm": 4.735003471374512, "learning_rate": 0.000197071067960219, "loss": 7.5451, "step": 68970 }, { "epoch": 8.300842358604092, "grad_norm": 2.3451125621795654, "learning_rate": 0.0001970701539084972, "loss": 7.533, "step": 68980 }, { "epoch": 8.302045728038507, "grad_norm": 4.834249496459961, "learning_rate": 0.00019706923971629097, "loss": 7.4932, "step": 68990 }, { "epoch": 8.303249097472925, "grad_norm": 1.004472017288208, "learning_rate": 0.00019706832538360155, "loss": 7.5187, "step": 69000 }, { "epoch": 8.30445246690734, "grad_norm": 3.006849527359009, "learning_rate": 0.00019706741091043032, "loss": 7.4423, "step": 69010 }, { "epoch": 8.305655836341757, "grad_norm": 1.71808922290802, "learning_rate": 0.00019706649629677855, "loss": 7.5339, "step": 69020 }, { "epoch": 8.306859205776174, "grad_norm": 1.9772635698318481, "learning_rate": 0.0001970655815426476, "loss": 7.6016, "step": 69030 }, { "epoch": 8.30806257521059, "grad_norm": 1.686840295791626, "learning_rate": 0.00019706466664803882, "loss": 7.544, "step": 69040 }, { "epoch": 8.309265944645006, "grad_norm": 2.325416088104248, "learning_rate": 0.00019706375161295347, "loss": 7.5321, "step": 69050 }, { "epoch": 8.310469314079423, "grad_norm": 1.6634761095046997, "learning_rate": 0.00019706283643739295, "loss": 7.4735, "step": 69060 }, { "epoch": 8.311672683513839, "grad_norm": 2.6000471115112305, "learning_rate": 0.0001970619211213585, "loss": 7.5945, "step": 69070 }, { "epoch": 8.312876052948255, "grad_norm": 3.5839436054229736, "learning_rate": 0.0001970610056648515, "loss": 7.5622, "step": 69080 }, { "epoch": 8.314079422382672, "grad_norm": 1.9345314502716064, "learning_rate": 0.00019706009006787326, "loss": 7.5756, "step": 69090 }, { "epoch": 8.315282791817088, "grad_norm": 2.8428375720977783, "learning_rate": 0.00019705917433042512, "loss": 7.4851, "step": 69100 }, { "epoch": 8.316486161251504, "grad_norm": 1.6608713865280151, "learning_rate": 0.00019705825845250837, "loss": 7.5587, "step": 69110 }, { "epoch": 8.31768953068592, "grad_norm": 1.403356909751892, "learning_rate": 0.00019705734243412437, "loss": 7.5413, "step": 69120 }, { "epoch": 8.318892900120337, "grad_norm": 4.303885459899902, "learning_rate": 0.00019705642627527444, "loss": 7.5151, "step": 69130 }, { "epoch": 8.320096269554753, "grad_norm": 2.9453518390655518, "learning_rate": 0.0001970555099759599, "loss": 7.7009, "step": 69140 }, { "epoch": 8.321299638989169, "grad_norm": 1.3282310962677002, "learning_rate": 0.00019705459353618206, "loss": 7.5429, "step": 69150 }, { "epoch": 8.322503008423586, "grad_norm": 1.5983847379684448, "learning_rate": 0.00019705367695594227, "loss": 7.5995, "step": 69160 }, { "epoch": 8.323706377858002, "grad_norm": 4.7585320472717285, "learning_rate": 0.0001970527602352418, "loss": 7.5006, "step": 69170 }, { "epoch": 8.324909747292418, "grad_norm": 2.053856372833252, "learning_rate": 0.00019705184337408209, "loss": 7.5291, "step": 69180 }, { "epoch": 8.326113116726836, "grad_norm": 1.6657274961471558, "learning_rate": 0.00019705092637246438, "loss": 7.5205, "step": 69190 }, { "epoch": 8.327316486161251, "grad_norm": 1.5674819946289062, "learning_rate": 0.00019705000923039002, "loss": 7.56, "step": 69200 }, { "epoch": 8.328519855595667, "grad_norm": 3.1776132583618164, "learning_rate": 0.00019704909194786033, "loss": 7.5724, "step": 69210 }, { "epoch": 8.329723225030085, "grad_norm": 0.933430016040802, "learning_rate": 0.00019704817452487663, "loss": 7.5107, "step": 69220 }, { "epoch": 8.3309265944645, "grad_norm": 2.722656011581421, "learning_rate": 0.0001970472569614403, "loss": 7.5993, "step": 69230 }, { "epoch": 8.332129963898916, "grad_norm": 2.6975135803222656, "learning_rate": 0.00019704633925755262, "loss": 7.5472, "step": 69240 }, { "epoch": 8.333333333333334, "grad_norm": 2.1026878356933594, "learning_rate": 0.00019704542141321487, "loss": 7.5919, "step": 69250 }, { "epoch": 8.33453670276775, "grad_norm": 1.7851208448410034, "learning_rate": 0.00019704450342842848, "loss": 7.5893, "step": 69260 }, { "epoch": 8.335740072202166, "grad_norm": 1.280189037322998, "learning_rate": 0.00019704358530319476, "loss": 7.5784, "step": 69270 }, { "epoch": 8.336943441636583, "grad_norm": 1.7406482696533203, "learning_rate": 0.00019704266703751496, "loss": 7.5389, "step": 69280 }, { "epoch": 8.338146811070999, "grad_norm": 2.391625165939331, "learning_rate": 0.00019704174863139048, "loss": 7.562, "step": 69290 }, { "epoch": 8.339350180505415, "grad_norm": 4.894526958465576, "learning_rate": 0.00019704083008482263, "loss": 7.5398, "step": 69300 }, { "epoch": 8.340553549939832, "grad_norm": 3.314066171646118, "learning_rate": 0.00019703991139781278, "loss": 7.5561, "step": 69310 }, { "epoch": 8.341756919374248, "grad_norm": 4.265951633453369, "learning_rate": 0.00019703899257036218, "loss": 7.5035, "step": 69320 }, { "epoch": 8.342960288808664, "grad_norm": 2.081082582473755, "learning_rate": 0.00019703807360247223, "loss": 7.5066, "step": 69330 }, { "epoch": 8.344163658243081, "grad_norm": 1.7546112537384033, "learning_rate": 0.0001970371544941442, "loss": 7.5573, "step": 69340 }, { "epoch": 8.345367027677497, "grad_norm": 2.2228806018829346, "learning_rate": 0.00019703623524537945, "loss": 7.5062, "step": 69350 }, { "epoch": 8.346570397111913, "grad_norm": 1.3018989562988281, "learning_rate": 0.0001970353158561793, "loss": 7.4785, "step": 69360 }, { "epoch": 8.34777376654633, "grad_norm": 1.700270652770996, "learning_rate": 0.00019703439632654513, "loss": 7.5577, "step": 69370 }, { "epoch": 8.348977135980746, "grad_norm": 5.086368083953857, "learning_rate": 0.00019703347665647817, "loss": 7.577, "step": 69380 }, { "epoch": 8.350180505415162, "grad_norm": 2.2030587196350098, "learning_rate": 0.00019703255684597987, "loss": 7.5343, "step": 69390 }, { "epoch": 8.35138387484958, "grad_norm": 2.14054012298584, "learning_rate": 0.00019703163689505147, "loss": 7.5883, "step": 69400 }, { "epoch": 8.352587244283995, "grad_norm": 1.624119758605957, "learning_rate": 0.00019703071680369437, "loss": 7.6223, "step": 69410 }, { "epoch": 8.353790613718411, "grad_norm": 2.0320706367492676, "learning_rate": 0.00019702979657190985, "loss": 7.5987, "step": 69420 }, { "epoch": 8.354993983152827, "grad_norm": 3.275172710418701, "learning_rate": 0.00019702887619969922, "loss": 7.5904, "step": 69430 }, { "epoch": 8.356197352587245, "grad_norm": 2.4781830310821533, "learning_rate": 0.00019702795568706392, "loss": 7.51, "step": 69440 }, { "epoch": 8.35740072202166, "grad_norm": 2.811317205429077, "learning_rate": 0.00019702703503400517, "loss": 7.5035, "step": 69450 }, { "epoch": 8.358604091456076, "grad_norm": 2.913123846054077, "learning_rate": 0.00019702611424052434, "loss": 7.5816, "step": 69460 }, { "epoch": 8.359807460890494, "grad_norm": 3.629225492477417, "learning_rate": 0.0001970251933066228, "loss": 7.5186, "step": 69470 }, { "epoch": 8.36101083032491, "grad_norm": 2.3426499366760254, "learning_rate": 0.00019702427223230182, "loss": 7.611, "step": 69480 }, { "epoch": 8.362214199759325, "grad_norm": 2.5738203525543213, "learning_rate": 0.00019702335101756278, "loss": 7.5928, "step": 69490 }, { "epoch": 8.363417569193743, "grad_norm": 4.000921249389648, "learning_rate": 0.00019702242966240698, "loss": 7.6396, "step": 69500 }, { "epoch": 8.364620938628159, "grad_norm": 2.1955952644348145, "learning_rate": 0.0001970215081668358, "loss": 7.5869, "step": 69510 }, { "epoch": 8.365824308062574, "grad_norm": 2.526052236557007, "learning_rate": 0.0001970205865308505, "loss": 7.5259, "step": 69520 }, { "epoch": 8.367027677496992, "grad_norm": 2.636810541152954, "learning_rate": 0.0001970196647544525, "loss": 7.4302, "step": 69530 }, { "epoch": 8.368231046931408, "grad_norm": 1.7989444732666016, "learning_rate": 0.00019701874283764307, "loss": 7.5755, "step": 69540 }, { "epoch": 8.369434416365824, "grad_norm": 3.394885301589966, "learning_rate": 0.0001970178207804236, "loss": 7.5249, "step": 69550 }, { "epoch": 8.370637785800241, "grad_norm": 3.081509828567505, "learning_rate": 0.00019701689858279533, "loss": 7.5408, "step": 69560 }, { "epoch": 8.371841155234657, "grad_norm": 1.3034275770187378, "learning_rate": 0.00019701597624475968, "loss": 7.586, "step": 69570 }, { "epoch": 8.373044524669073, "grad_norm": 3.7937939167022705, "learning_rate": 0.00019701505376631796, "loss": 7.6025, "step": 69580 }, { "epoch": 8.37424789410349, "grad_norm": 0.9847928285598755, "learning_rate": 0.00019701413114747154, "loss": 7.6243, "step": 69590 }, { "epoch": 8.375451263537906, "grad_norm": 2.6686904430389404, "learning_rate": 0.00019701320838822167, "loss": 7.5863, "step": 69600 }, { "epoch": 8.376654632972322, "grad_norm": 3.7679669857025146, "learning_rate": 0.00019701228548856979, "loss": 7.4367, "step": 69610 }, { "epoch": 8.37785800240674, "grad_norm": 1.0155153274536133, "learning_rate": 0.0001970113624485171, "loss": 7.5095, "step": 69620 }, { "epoch": 8.379061371841155, "grad_norm": 6.318257808685303, "learning_rate": 0.0001970104392680651, "loss": 7.6557, "step": 69630 }, { "epoch": 8.380264741275571, "grad_norm": 1.2143404483795166, "learning_rate": 0.000197009515947215, "loss": 7.4981, "step": 69640 }, { "epoch": 8.381468110709989, "grad_norm": 2.227907180786133, "learning_rate": 0.0001970085924859682, "loss": 7.5467, "step": 69650 }, { "epoch": 8.382671480144404, "grad_norm": 3.4254238605499268, "learning_rate": 0.000197007668884326, "loss": 7.6048, "step": 69660 }, { "epoch": 8.38387484957882, "grad_norm": 1.2634449005126953, "learning_rate": 0.00019700674514228975, "loss": 7.5963, "step": 69670 }, { "epoch": 8.385078219013238, "grad_norm": 3.4352493286132812, "learning_rate": 0.0001970058212598608, "loss": 7.5584, "step": 69680 }, { "epoch": 8.386281588447654, "grad_norm": 2.7097935676574707, "learning_rate": 0.0001970048972370405, "loss": 7.5435, "step": 69690 }, { "epoch": 8.38748495788207, "grad_norm": 1.6629337072372437, "learning_rate": 0.00019700397307383013, "loss": 7.6898, "step": 69700 }, { "epoch": 8.388688327316487, "grad_norm": 2.548401117324829, "learning_rate": 0.00019700304877023106, "loss": 7.5624, "step": 69710 }, { "epoch": 8.389891696750903, "grad_norm": 1.9017082452774048, "learning_rate": 0.00019700212432624463, "loss": 7.4993, "step": 69720 }, { "epoch": 8.391095066185319, "grad_norm": 1.875321626663208, "learning_rate": 0.0001970011997418722, "loss": 7.4741, "step": 69730 }, { "epoch": 8.392298435619736, "grad_norm": 2.2850897312164307, "learning_rate": 0.00019700027501711506, "loss": 7.5867, "step": 69740 }, { "epoch": 8.393501805054152, "grad_norm": 1.5570414066314697, "learning_rate": 0.00019699935015197458, "loss": 7.6479, "step": 69750 }, { "epoch": 8.394705174488568, "grad_norm": 1.8163044452667236, "learning_rate": 0.0001969984251464521, "loss": 7.5948, "step": 69760 }, { "epoch": 8.395908543922985, "grad_norm": 1.5942484140396118, "learning_rate": 0.00019699750000054896, "loss": 7.4422, "step": 69770 }, { "epoch": 8.397111913357401, "grad_norm": 4.415695667266846, "learning_rate": 0.00019699657471426647, "loss": 7.6148, "step": 69780 }, { "epoch": 8.398315282791817, "grad_norm": 1.3548928499221802, "learning_rate": 0.000196995649287606, "loss": 7.56, "step": 69790 }, { "epoch": 8.399518652226233, "grad_norm": 2.413607358932495, "learning_rate": 0.00019699472372056886, "loss": 7.5521, "step": 69800 }, { "epoch": 8.40072202166065, "grad_norm": 2.9080374240875244, "learning_rate": 0.00019699379801315644, "loss": 7.4956, "step": 69810 }, { "epoch": 8.401925391095066, "grad_norm": 4.320467472076416, "learning_rate": 0.00019699287216537, "loss": 7.5482, "step": 69820 }, { "epoch": 8.403128760529482, "grad_norm": 6.3931450843811035, "learning_rate": 0.00019699194617721097, "loss": 7.614, "step": 69830 }, { "epoch": 8.4043321299639, "grad_norm": 1.192388653755188, "learning_rate": 0.00019699102004868064, "loss": 7.6063, "step": 69840 }, { "epoch": 8.405535499398315, "grad_norm": 2.541799783706665, "learning_rate": 0.00019699009377978035, "loss": 7.6549, "step": 69850 }, { "epoch": 8.406738868832731, "grad_norm": 1.4081885814666748, "learning_rate": 0.00019698916737051145, "loss": 7.5446, "step": 69860 }, { "epoch": 8.407942238267148, "grad_norm": 2.5232038497924805, "learning_rate": 0.00019698824082087528, "loss": 7.5346, "step": 69870 }, { "epoch": 8.409145607701564, "grad_norm": 2.2329695224761963, "learning_rate": 0.00019698731413087317, "loss": 7.4967, "step": 69880 }, { "epoch": 8.41034897713598, "grad_norm": 1.0750991106033325, "learning_rate": 0.00019698638730050649, "loss": 7.5455, "step": 69890 }, { "epoch": 8.411552346570398, "grad_norm": 3.1521692276000977, "learning_rate": 0.00019698546032977655, "loss": 7.5921, "step": 69900 }, { "epoch": 8.412755716004813, "grad_norm": 1.1493871212005615, "learning_rate": 0.00019698453321868468, "loss": 7.4912, "step": 69910 }, { "epoch": 8.41395908543923, "grad_norm": 1.6853001117706299, "learning_rate": 0.00019698360596723228, "loss": 7.5831, "step": 69920 }, { "epoch": 8.415162454873647, "grad_norm": 2.5247533321380615, "learning_rate": 0.00019698267857542066, "loss": 7.5224, "step": 69930 }, { "epoch": 8.416365824308063, "grad_norm": 1.6904728412628174, "learning_rate": 0.00019698175104325113, "loss": 7.5352, "step": 69940 }, { "epoch": 8.417569193742478, "grad_norm": 1.9604597091674805, "learning_rate": 0.0001969808233707251, "loss": 7.5345, "step": 69950 }, { "epoch": 8.418772563176896, "grad_norm": 3.829887628555298, "learning_rate": 0.00019697989555784385, "loss": 7.6028, "step": 69960 }, { "epoch": 8.419975932611312, "grad_norm": 1.9152189493179321, "learning_rate": 0.00019697896760460874, "loss": 7.4951, "step": 69970 }, { "epoch": 8.421179302045728, "grad_norm": 4.484202861785889, "learning_rate": 0.00019697803951102113, "loss": 7.5123, "step": 69980 }, { "epoch": 8.422382671480145, "grad_norm": 2.0393359661102295, "learning_rate": 0.00019697711127708236, "loss": 7.5755, "step": 69990 }, { "epoch": 8.42358604091456, "grad_norm": 2.936713457107544, "learning_rate": 0.00019697618290279376, "loss": 7.5164, "step": 70000 }, { "epoch": 8.424789410348977, "grad_norm": 1.60394287109375, "learning_rate": 0.0001969752543881567, "loss": 7.5921, "step": 70010 }, { "epoch": 8.425992779783394, "grad_norm": 1.1328998804092407, "learning_rate": 0.00019697432573317247, "loss": 7.5357, "step": 70020 }, { "epoch": 8.42719614921781, "grad_norm": 3.7305259704589844, "learning_rate": 0.00019697339693784248, "loss": 7.5175, "step": 70030 }, { "epoch": 8.428399518652226, "grad_norm": 1.4460114240646362, "learning_rate": 0.000196972468002168, "loss": 7.4581, "step": 70040 }, { "epoch": 8.429602888086643, "grad_norm": 2.8230886459350586, "learning_rate": 0.00019697153892615047, "loss": 7.5622, "step": 70050 }, { "epoch": 8.43080625752106, "grad_norm": 1.8971261978149414, "learning_rate": 0.00019697060970979115, "loss": 7.5077, "step": 70060 }, { "epoch": 8.432009626955475, "grad_norm": 1.3763706684112549, "learning_rate": 0.00019696968035309146, "loss": 7.5594, "step": 70070 }, { "epoch": 8.433212996389893, "grad_norm": 4.566793918609619, "learning_rate": 0.00019696875085605266, "loss": 7.5197, "step": 70080 }, { "epoch": 8.434416365824308, "grad_norm": 3.3624672889709473, "learning_rate": 0.00019696782121867614, "loss": 7.5901, "step": 70090 }, { "epoch": 8.435619735258724, "grad_norm": 3.586397647857666, "learning_rate": 0.00019696689144096325, "loss": 7.5154, "step": 70100 }, { "epoch": 8.43682310469314, "grad_norm": 1.238344430923462, "learning_rate": 0.0001969659615229153, "loss": 7.5655, "step": 70110 }, { "epoch": 8.438026474127557, "grad_norm": 1.6360137462615967, "learning_rate": 0.0001969650314645337, "loss": 7.6414, "step": 70120 }, { "epoch": 8.439229843561973, "grad_norm": 6.115720272064209, "learning_rate": 0.00019696410126581975, "loss": 7.5798, "step": 70130 }, { "epoch": 8.440433212996389, "grad_norm": 3.8817498683929443, "learning_rate": 0.0001969631709267748, "loss": 7.5112, "step": 70140 }, { "epoch": 8.441636582430807, "grad_norm": 2.3776049613952637, "learning_rate": 0.0001969622404474002, "loss": 7.5668, "step": 70150 }, { "epoch": 8.442839951865222, "grad_norm": 1.765113115310669, "learning_rate": 0.00019696130982769733, "loss": 7.5458, "step": 70160 }, { "epoch": 8.444043321299638, "grad_norm": 1.173095703125, "learning_rate": 0.00019696037906766746, "loss": 7.5786, "step": 70170 }, { "epoch": 8.445246690734056, "grad_norm": 3.3456172943115234, "learning_rate": 0.000196959448167312, "loss": 7.6441, "step": 70180 }, { "epoch": 8.446450060168472, "grad_norm": 2.850996971130371, "learning_rate": 0.0001969585171266323, "loss": 7.5242, "step": 70190 }, { "epoch": 8.447653429602887, "grad_norm": 1.7878836393356323, "learning_rate": 0.00019695758594562965, "loss": 7.5496, "step": 70200 }, { "epoch": 8.448856799037305, "grad_norm": 2.1548261642456055, "learning_rate": 0.00019695665462430545, "loss": 7.5587, "step": 70210 }, { "epoch": 8.45006016847172, "grad_norm": 1.7706656455993652, "learning_rate": 0.00019695572316266103, "loss": 7.5371, "step": 70220 }, { "epoch": 8.451263537906136, "grad_norm": 3.8058576583862305, "learning_rate": 0.00019695479156069776, "loss": 7.563, "step": 70230 }, { "epoch": 8.452466907340554, "grad_norm": 2.564669370651245, "learning_rate": 0.00019695385981841697, "loss": 7.6053, "step": 70240 }, { "epoch": 8.45367027677497, "grad_norm": 1.0102994441986084, "learning_rate": 0.00019695292793581997, "loss": 7.6064, "step": 70250 }, { "epoch": 8.454873646209386, "grad_norm": 1.3062176704406738, "learning_rate": 0.00019695199591290822, "loss": 7.5409, "step": 70260 }, { "epoch": 8.456077015643803, "grad_norm": 0.8895031809806824, "learning_rate": 0.00019695106374968292, "loss": 7.5031, "step": 70270 }, { "epoch": 8.457280385078219, "grad_norm": 1.7016993761062622, "learning_rate": 0.00019695013144614552, "loss": 7.5381, "step": 70280 }, { "epoch": 8.458483754512635, "grad_norm": 1.497953176498413, "learning_rate": 0.00019694919900229736, "loss": 7.559, "step": 70290 }, { "epoch": 8.459687123947052, "grad_norm": 2.9686248302459717, "learning_rate": 0.00019694826641813973, "loss": 7.5928, "step": 70300 }, { "epoch": 8.460890493381468, "grad_norm": 1.3379900455474854, "learning_rate": 0.00019694733369367406, "loss": 7.5338, "step": 70310 }, { "epoch": 8.462093862815884, "grad_norm": 5.383237838745117, "learning_rate": 0.00019694640082890165, "loss": 7.481, "step": 70320 }, { "epoch": 8.463297232250302, "grad_norm": 2.661841630935669, "learning_rate": 0.0001969454678238239, "loss": 7.5019, "step": 70330 }, { "epoch": 8.464500601684717, "grad_norm": 3.2884738445281982, "learning_rate": 0.00019694453467844207, "loss": 7.5538, "step": 70340 }, { "epoch": 8.465703971119133, "grad_norm": 2.750004529953003, "learning_rate": 0.00019694360139275755, "loss": 7.5315, "step": 70350 }, { "epoch": 8.46690734055355, "grad_norm": 2.629000663757324, "learning_rate": 0.00019694266796677176, "loss": 7.4495, "step": 70360 }, { "epoch": 8.468110709987966, "grad_norm": 4.275704383850098, "learning_rate": 0.00019694173440048596, "loss": 7.473, "step": 70370 }, { "epoch": 8.469314079422382, "grad_norm": 1.3949379920959473, "learning_rate": 0.00019694080069390158, "loss": 7.5222, "step": 70380 }, { "epoch": 8.4705174488568, "grad_norm": 2.105985641479492, "learning_rate": 0.00019693986684701987, "loss": 7.6076, "step": 70390 }, { "epoch": 8.471720818291216, "grad_norm": 4.698059558868408, "learning_rate": 0.0001969389328598423, "loss": 7.5532, "step": 70400 }, { "epoch": 8.472924187725631, "grad_norm": 1.465378999710083, "learning_rate": 0.0001969379987323701, "loss": 7.5151, "step": 70410 }, { "epoch": 8.474127557160049, "grad_norm": 1.6236846446990967, "learning_rate": 0.0001969370644646047, "loss": 7.4925, "step": 70420 }, { "epoch": 8.475330926594465, "grad_norm": 5.72561502456665, "learning_rate": 0.00019693613005654744, "loss": 7.5585, "step": 70430 }, { "epoch": 8.47653429602888, "grad_norm": 2.293527364730835, "learning_rate": 0.0001969351955081997, "loss": 7.5193, "step": 70440 }, { "epoch": 8.477737665463298, "grad_norm": 5.550730228424072, "learning_rate": 0.00019693426081956273, "loss": 7.5706, "step": 70450 }, { "epoch": 8.478941034897714, "grad_norm": 1.1612584590911865, "learning_rate": 0.00019693332599063801, "loss": 7.6691, "step": 70460 }, { "epoch": 8.48014440433213, "grad_norm": 1.364365577697754, "learning_rate": 0.00019693239102142683, "loss": 7.5741, "step": 70470 }, { "epoch": 8.481347773766545, "grad_norm": 3.126809597015381, "learning_rate": 0.0001969314559119305, "loss": 7.6143, "step": 70480 }, { "epoch": 8.482551143200963, "grad_norm": 2.3165807723999023, "learning_rate": 0.00019693052066215047, "loss": 7.5971, "step": 70490 }, { "epoch": 8.483754512635379, "grad_norm": 1.6918388605117798, "learning_rate": 0.00019692958527208805, "loss": 7.5596, "step": 70500 }, { "epoch": 8.484957882069795, "grad_norm": 2.422433614730835, "learning_rate": 0.00019692864974174456, "loss": 7.4805, "step": 70510 }, { "epoch": 8.486161251504212, "grad_norm": 1.60307776927948, "learning_rate": 0.00019692771407112139, "loss": 7.5824, "step": 70520 }, { "epoch": 8.487364620938628, "grad_norm": 4.616024494171143, "learning_rate": 0.0001969267782602199, "loss": 7.4813, "step": 70530 }, { "epoch": 8.488567990373044, "grad_norm": 1.1546640396118164, "learning_rate": 0.00019692584230904142, "loss": 7.4846, "step": 70540 }, { "epoch": 8.489771359807461, "grad_norm": 2.1627120971679688, "learning_rate": 0.0001969249062175873, "loss": 7.5749, "step": 70550 }, { "epoch": 8.490974729241877, "grad_norm": 1.7128040790557861, "learning_rate": 0.00019692396998585894, "loss": 7.499, "step": 70560 }, { "epoch": 8.492178098676293, "grad_norm": 1.1143193244934082, "learning_rate": 0.00019692303361385765, "loss": 7.598, "step": 70570 }, { "epoch": 8.49338146811071, "grad_norm": 6.3489155769348145, "learning_rate": 0.0001969220971015848, "loss": 7.6215, "step": 70580 }, { "epoch": 8.494584837545126, "grad_norm": 2.653006076812744, "learning_rate": 0.00019692116044904177, "loss": 7.493, "step": 70590 }, { "epoch": 8.495788206979542, "grad_norm": 1.5156558752059937, "learning_rate": 0.00019692022365622988, "loss": 7.5372, "step": 70600 }, { "epoch": 8.49699157641396, "grad_norm": 2.8469014167785645, "learning_rate": 0.00019691928672315047, "loss": 7.5831, "step": 70610 }, { "epoch": 8.498194945848375, "grad_norm": 1.1309077739715576, "learning_rate": 0.00019691834964980496, "loss": 7.5292, "step": 70620 }, { "epoch": 8.499398315282791, "grad_norm": 1.6052037477493286, "learning_rate": 0.00019691741243619464, "loss": 7.6114, "step": 70630 }, { "epoch": 8.500601684717209, "grad_norm": 1.7786142826080322, "learning_rate": 0.00019691647508232092, "loss": 7.5592, "step": 70640 }, { "epoch": 8.501805054151625, "grad_norm": 2.218583583831787, "learning_rate": 0.00019691553758818513, "loss": 7.5153, "step": 70650 }, { "epoch": 8.50300842358604, "grad_norm": 2.2946512699127197, "learning_rate": 0.0001969145999537886, "loss": 7.5829, "step": 70660 }, { "epoch": 8.504211793020458, "grad_norm": 1.7440555095672607, "learning_rate": 0.00019691366217913276, "loss": 7.5762, "step": 70670 }, { "epoch": 8.505415162454874, "grad_norm": 2.6018712520599365, "learning_rate": 0.0001969127242642189, "loss": 7.5387, "step": 70680 }, { "epoch": 8.50661853188929, "grad_norm": 1.3647104501724243, "learning_rate": 0.0001969117862090484, "loss": 7.508, "step": 70690 }, { "epoch": 8.507821901323707, "grad_norm": 1.5754398107528687, "learning_rate": 0.00019691084801362263, "loss": 7.5032, "step": 70700 }, { "epoch": 8.509025270758123, "grad_norm": 2.110182762145996, "learning_rate": 0.00019690990967794294, "loss": 7.4783, "step": 70710 }, { "epoch": 8.510228640192539, "grad_norm": 10.546307563781738, "learning_rate": 0.00019690897120201064, "loss": 7.5894, "step": 70720 }, { "epoch": 8.511432009626956, "grad_norm": 874.0382690429688, "learning_rate": 0.0001969080325858272, "loss": 7.5401, "step": 70730 }, { "epoch": 8.512635379061372, "grad_norm": 1.38412344455719, "learning_rate": 0.00019690709382939384, "loss": 7.7281, "step": 70740 }, { "epoch": 8.513838748495788, "grad_norm": 9.912545204162598, "learning_rate": 0.00019690615493271203, "loss": 7.6684, "step": 70750 }, { "epoch": 8.515042117930205, "grad_norm": 4.450211048126221, "learning_rate": 0.0001969052158957831, "loss": 7.4833, "step": 70760 }, { "epoch": 8.516245487364621, "grad_norm": 5.637786388397217, "learning_rate": 0.0001969042767186084, "loss": 7.5644, "step": 70770 }, { "epoch": 8.517448856799037, "grad_norm": 3.0561916828155518, "learning_rate": 0.00019690333740118924, "loss": 7.5679, "step": 70780 }, { "epoch": 8.518652226233453, "grad_norm": 2.1943979263305664, "learning_rate": 0.00019690239794352708, "loss": 7.5543, "step": 70790 }, { "epoch": 8.51985559566787, "grad_norm": 3.0288281440734863, "learning_rate": 0.00019690145834562318, "loss": 7.548, "step": 70800 }, { "epoch": 8.521058965102286, "grad_norm": 1.6482231616973877, "learning_rate": 0.00019690051860747894, "loss": 7.5413, "step": 70810 }, { "epoch": 8.522262334536702, "grad_norm": 3.06941556930542, "learning_rate": 0.0001968995787290958, "loss": 7.5749, "step": 70820 }, { "epoch": 8.52346570397112, "grad_norm": 3.1419012546539307, "learning_rate": 0.00019689863871047498, "loss": 7.5293, "step": 70830 }, { "epoch": 8.524669073405535, "grad_norm": 1.204333782196045, "learning_rate": 0.0001968976985516179, "loss": 7.4705, "step": 70840 }, { "epoch": 8.525872442839951, "grad_norm": 2.838078498840332, "learning_rate": 0.000196896758252526, "loss": 7.5857, "step": 70850 }, { "epoch": 8.527075812274369, "grad_norm": 4.2524800300598145, "learning_rate": 0.0001968958178132005, "loss": 7.572, "step": 70860 }, { "epoch": 8.528279181708784, "grad_norm": 2.989532232284546, "learning_rate": 0.00019689487723364283, "loss": 7.4776, "step": 70870 }, { "epoch": 8.5294825511432, "grad_norm": 3.1052374839782715, "learning_rate": 0.00019689393651385438, "loss": 7.5736, "step": 70880 }, { "epoch": 8.530685920577618, "grad_norm": 2.4602415561676025, "learning_rate": 0.00019689299565383648, "loss": 7.5589, "step": 70890 }, { "epoch": 8.531889290012034, "grad_norm": 1.5469058752059937, "learning_rate": 0.0001968920546535905, "loss": 7.5763, "step": 70900 }, { "epoch": 8.53309265944645, "grad_norm": 1.3221018314361572, "learning_rate": 0.00019689111351311775, "loss": 7.5067, "step": 70910 }, { "epoch": 8.534296028880867, "grad_norm": 1.084427833557129, "learning_rate": 0.00019689017223241967, "loss": 7.523, "step": 70920 }, { "epoch": 8.535499398315283, "grad_norm": 2.507516622543335, "learning_rate": 0.00019688923081149758, "loss": 7.5407, "step": 70930 }, { "epoch": 8.536702767749698, "grad_norm": 1.2151966094970703, "learning_rate": 0.00019688828925035285, "loss": 7.5362, "step": 70940 }, { "epoch": 8.537906137184116, "grad_norm": 3.324131965637207, "learning_rate": 0.00019688734754898685, "loss": 7.542, "step": 70950 }, { "epoch": 8.539109506618532, "grad_norm": 2.180873394012451, "learning_rate": 0.00019688640570740095, "loss": 7.614, "step": 70960 }, { "epoch": 8.540312876052948, "grad_norm": 1.2994470596313477, "learning_rate": 0.0001968854637255965, "loss": 7.5134, "step": 70970 }, { "epoch": 8.541516245487365, "grad_norm": 2.6376659870147705, "learning_rate": 0.00019688452160357485, "loss": 7.5046, "step": 70980 }, { "epoch": 8.542719614921781, "grad_norm": 1.9583570957183838, "learning_rate": 0.00019688357934133737, "loss": 7.5359, "step": 70990 }, { "epoch": 8.543922984356197, "grad_norm": 1.505137324333191, "learning_rate": 0.00019688263693888543, "loss": 7.5449, "step": 71000 }, { "epoch": 8.545126353790614, "grad_norm": 3.4148805141448975, "learning_rate": 0.0001968816943962204, "loss": 7.5003, "step": 71010 }, { "epoch": 8.54632972322503, "grad_norm": 1.5637755393981934, "learning_rate": 0.00019688075171334367, "loss": 7.4096, "step": 71020 }, { "epoch": 8.547533092659446, "grad_norm": 4.581235885620117, "learning_rate": 0.00019687980889025652, "loss": 7.3817, "step": 71030 }, { "epoch": 8.548736462093864, "grad_norm": 1.3608286380767822, "learning_rate": 0.0001968788659269604, "loss": 7.4101, "step": 71040 }, { "epoch": 8.54993983152828, "grad_norm": 2.594306707382202, "learning_rate": 0.00019687792282345665, "loss": 7.4077, "step": 71050 }, { "epoch": 8.551143200962695, "grad_norm": 4.229619026184082, "learning_rate": 0.0001968769795797466, "loss": 7.3315, "step": 71060 }, { "epoch": 8.552346570397113, "grad_norm": 5.648379802703857, "learning_rate": 0.00019687603619583167, "loss": 7.4154, "step": 71070 }, { "epoch": 8.553549939831528, "grad_norm": 3.063415288925171, "learning_rate": 0.00019687509267171316, "loss": 7.3526, "step": 71080 }, { "epoch": 8.554753309265944, "grad_norm": 2.3927276134490967, "learning_rate": 0.0001968741490073925, "loss": 7.3678, "step": 71090 }, { "epoch": 8.555956678700362, "grad_norm": 4.117802143096924, "learning_rate": 0.00019687320520287105, "loss": 7.3824, "step": 71100 }, { "epoch": 8.557160048134778, "grad_norm": 3.0834388732910156, "learning_rate": 0.00019687226125815013, "loss": 7.3224, "step": 71110 }, { "epoch": 8.558363417569193, "grad_norm": 3.2998011112213135, "learning_rate": 0.00019687131717323114, "loss": 7.4092, "step": 71120 }, { "epoch": 8.559566787003611, "grad_norm": 3.658404588699341, "learning_rate": 0.0001968703729481154, "loss": 7.3639, "step": 71130 }, { "epoch": 8.560770156438027, "grad_norm": 2.8112010955810547, "learning_rate": 0.00019686942858280435, "loss": 7.2514, "step": 71140 }, { "epoch": 8.561973525872443, "grad_norm": 3.114164352416992, "learning_rate": 0.00019686848407729932, "loss": 7.4219, "step": 71150 }, { "epoch": 8.56317689530686, "grad_norm": 7.612882614135742, "learning_rate": 0.00019686753943160167, "loss": 7.309, "step": 71160 }, { "epoch": 8.564380264741276, "grad_norm": 3.221165418624878, "learning_rate": 0.00019686659464571277, "loss": 7.418, "step": 71170 }, { "epoch": 8.565583634175692, "grad_norm": 3.081568717956543, "learning_rate": 0.000196865649719634, "loss": 7.3208, "step": 71180 }, { "epoch": 8.566787003610107, "grad_norm": 2.8525428771972656, "learning_rate": 0.00019686470465336673, "loss": 7.2933, "step": 71190 }, { "epoch": 8.567990373044525, "grad_norm": 3.2281641960144043, "learning_rate": 0.00019686375944691231, "loss": 7.4252, "step": 71200 }, { "epoch": 8.56919374247894, "grad_norm": 2.482732057571411, "learning_rate": 0.0001968628141002721, "loss": 7.2929, "step": 71210 }, { "epoch": 8.570397111913357, "grad_norm": 2.630949020385742, "learning_rate": 0.0001968618686134475, "loss": 7.3732, "step": 71220 }, { "epoch": 8.571600481347774, "grad_norm": 3.7805721759796143, "learning_rate": 0.00019686092298643986, "loss": 7.3923, "step": 71230 }, { "epoch": 8.57280385078219, "grad_norm": 7.153574466705322, "learning_rate": 0.00019685997721925053, "loss": 7.2577, "step": 71240 }, { "epoch": 8.574007220216606, "grad_norm": 4.336424827575684, "learning_rate": 0.0001968590313118809, "loss": 7.4687, "step": 71250 }, { "epoch": 8.575210589651023, "grad_norm": 3.935218572616577, "learning_rate": 0.00019685808526433238, "loss": 7.221, "step": 71260 }, { "epoch": 8.57641395908544, "grad_norm": 7.177575588226318, "learning_rate": 0.00019685713907660626, "loss": 7.3643, "step": 71270 }, { "epoch": 8.577617328519855, "grad_norm": 4.28988790512085, "learning_rate": 0.00019685619274870396, "loss": 7.3117, "step": 71280 }, { "epoch": 8.578820697954272, "grad_norm": 5.960178375244141, "learning_rate": 0.00019685524628062684, "loss": 7.248, "step": 71290 }, { "epoch": 8.580024067388688, "grad_norm": 6.8065643310546875, "learning_rate": 0.00019685429967237627, "loss": 7.2714, "step": 71300 }, { "epoch": 8.581227436823104, "grad_norm": 7.6462788581848145, "learning_rate": 0.00019685335292395358, "loss": 7.2764, "step": 71310 }, { "epoch": 8.582430806257522, "grad_norm": 10.469433784484863, "learning_rate": 0.0001968524060353602, "loss": 7.2509, "step": 71320 }, { "epoch": 8.583634175691937, "grad_norm": 6.646592140197754, "learning_rate": 0.00019685145900659746, "loss": 7.3452, "step": 71330 }, { "epoch": 8.584837545126353, "grad_norm": 7.079794406890869, "learning_rate": 0.00019685051183766677, "loss": 7.3314, "step": 71340 }, { "epoch": 8.58604091456077, "grad_norm": 3.4195752143859863, "learning_rate": 0.00019684956452856945, "loss": 7.2803, "step": 71350 }, { "epoch": 8.587244283995187, "grad_norm": 9.062658309936523, "learning_rate": 0.00019684861707930696, "loss": 7.2947, "step": 71360 }, { "epoch": 8.588447653429602, "grad_norm": 6.66270112991333, "learning_rate": 0.00019684766948988055, "loss": 7.18, "step": 71370 }, { "epoch": 8.58965102286402, "grad_norm": 8.045583724975586, "learning_rate": 0.00019684672176029168, "loss": 7.3799, "step": 71380 }, { "epoch": 8.590854392298436, "grad_norm": 9.857762336730957, "learning_rate": 0.00019684577389054167, "loss": 7.2175, "step": 71390 }, { "epoch": 8.592057761732852, "grad_norm": 13.152425765991211, "learning_rate": 0.00019684482588063192, "loss": 7.2631, "step": 71400 }, { "epoch": 8.593261131167269, "grad_norm": 23.076932907104492, "learning_rate": 0.0001968438777305638, "loss": 7.1899, "step": 71410 }, { "epoch": 8.594464500601685, "grad_norm": 24.89419937133789, "learning_rate": 0.0001968429294403387, "loss": 7.1728, "step": 71420 }, { "epoch": 8.5956678700361, "grad_norm": 33.4057502746582, "learning_rate": 0.00019684198100995795, "loss": 7.3295, "step": 71430 }, { "epoch": 8.596871239470518, "grad_norm": 14.6361083984375, "learning_rate": 0.00019684103243942296, "loss": 7.2593, "step": 71440 }, { "epoch": 8.598074608904934, "grad_norm": 22.988183975219727, "learning_rate": 0.0001968400837287351, "loss": 7.2897, "step": 71450 }, { "epoch": 8.59927797833935, "grad_norm": 23.829151153564453, "learning_rate": 0.0001968391348778957, "loss": 7.3138, "step": 71460 }, { "epoch": 8.600481347773766, "grad_norm": 46.30461883544922, "learning_rate": 0.00019683818588690617, "loss": 7.2216, "step": 71470 }, { "epoch": 8.601684717208183, "grad_norm": 51.192527770996094, "learning_rate": 0.00019683723675576785, "loss": 7.2637, "step": 71480 }, { "epoch": 8.602888086642599, "grad_norm": 20.90011215209961, "learning_rate": 0.00019683628748448217, "loss": 7.1765, "step": 71490 }, { "epoch": 8.604091456077015, "grad_norm": 20.127248764038086, "learning_rate": 0.00019683533807305045, "loss": 7.2282, "step": 71500 }, { "epoch": 8.605294825511432, "grad_norm": 21.1735782623291, "learning_rate": 0.00019683438852147414, "loss": 7.2744, "step": 71510 }, { "epoch": 8.606498194945848, "grad_norm": 14.524378776550293, "learning_rate": 0.00019683343882975453, "loss": 7.2047, "step": 71520 }, { "epoch": 8.607701564380264, "grad_norm": 27.374757766723633, "learning_rate": 0.00019683248899789302, "loss": 7.2963, "step": 71530 }, { "epoch": 8.608904933814681, "grad_norm": 45.3835563659668, "learning_rate": 0.000196831539025891, "loss": 7.1218, "step": 71540 }, { "epoch": 8.610108303249097, "grad_norm": 30.102523803710938, "learning_rate": 0.00019683058891374985, "loss": 7.2178, "step": 71550 }, { "epoch": 8.611311672683513, "grad_norm": 398.0894470214844, "learning_rate": 0.00019682963866147089, "loss": 7.1868, "step": 71560 }, { "epoch": 8.61251504211793, "grad_norm": 55.32802200317383, "learning_rate": 0.00019682868826905557, "loss": 7.2882, "step": 71570 }, { "epoch": 8.613718411552346, "grad_norm": 56.809810638427734, "learning_rate": 0.00019682773773650522, "loss": 7.2094, "step": 71580 }, { "epoch": 8.614921780986762, "grad_norm": 52.83161926269531, "learning_rate": 0.00019682678706382122, "loss": 7.1541, "step": 71590 }, { "epoch": 8.61612515042118, "grad_norm": 46.8318977355957, "learning_rate": 0.00019682583625100497, "loss": 7.3496, "step": 71600 }, { "epoch": 8.617328519855596, "grad_norm": 92.50406646728516, "learning_rate": 0.00019682488529805783, "loss": 7.268, "step": 71610 }, { "epoch": 8.618531889290011, "grad_norm": 54.915550231933594, "learning_rate": 0.00019682393420498116, "loss": 7.2467, "step": 71620 }, { "epoch": 8.619735258724429, "grad_norm": 101.87727355957031, "learning_rate": 0.00019682298297177636, "loss": 7.2832, "step": 71630 }, { "epoch": 8.620938628158845, "grad_norm": 1934.8565673828125, "learning_rate": 0.00019682203159844482, "loss": 7.1457, "step": 71640 }, { "epoch": 8.62214199759326, "grad_norm": 86.66309356689453, "learning_rate": 0.00019682108008498784, "loss": 7.2731, "step": 71650 }, { "epoch": 8.623345367027678, "grad_norm": 125.84171295166016, "learning_rate": 0.0001968201284314069, "loss": 7.2701, "step": 71660 }, { "epoch": 8.624548736462094, "grad_norm": 69.844482421875, "learning_rate": 0.00019681917663770334, "loss": 7.3399, "step": 71670 }, { "epoch": 8.62575210589651, "grad_norm": 258.81988525390625, "learning_rate": 0.0001968182247038785, "loss": 7.3469, "step": 71680 }, { "epoch": 8.626955475330927, "grad_norm": 140.39862060546875, "learning_rate": 0.00019681727262993377, "loss": 7.2338, "step": 71690 }, { "epoch": 8.628158844765343, "grad_norm": 65.96165466308594, "learning_rate": 0.00019681632041587056, "loss": 7.2687, "step": 71700 }, { "epoch": 8.629362214199759, "grad_norm": 89.50534057617188, "learning_rate": 0.00019681536806169026, "loss": 7.2222, "step": 71710 }, { "epoch": 8.630565583634176, "grad_norm": 48.90932083129883, "learning_rate": 0.0001968144155673942, "loss": 7.2026, "step": 71720 }, { "epoch": 8.631768953068592, "grad_norm": 53.440757751464844, "learning_rate": 0.00019681346293298378, "loss": 7.3175, "step": 71730 }, { "epoch": 8.632972322503008, "grad_norm": 70.87751770019531, "learning_rate": 0.00019681251015846036, "loss": 7.293, "step": 71740 }, { "epoch": 8.634175691937426, "grad_norm": 62.420082092285156, "learning_rate": 0.00019681155724382535, "loss": 7.345, "step": 71750 }, { "epoch": 8.635379061371841, "grad_norm": 110.84393310546875, "learning_rate": 0.0001968106041890801, "loss": 7.2736, "step": 71760 }, { "epoch": 8.636582430806257, "grad_norm": 66.7536849975586, "learning_rate": 0.00019680965099422602, "loss": 7.2736, "step": 71770 }, { "epoch": 8.637785800240675, "grad_norm": 106.4581069946289, "learning_rate": 0.00019680869765926446, "loss": 7.2642, "step": 71780 }, { "epoch": 8.63898916967509, "grad_norm": 164.11788940429688, "learning_rate": 0.0001968077441841968, "loss": 7.2527, "step": 71790 }, { "epoch": 8.640192539109506, "grad_norm": 130.2183837890625, "learning_rate": 0.00019680679056902447, "loss": 7.3717, "step": 71800 }, { "epoch": 8.641395908543924, "grad_norm": 3453.31494140625, "learning_rate": 0.00019680583681374878, "loss": 7.2511, "step": 71810 }, { "epoch": 8.64259927797834, "grad_norm": 577.374755859375, "learning_rate": 0.00019680488291837117, "loss": 7.3618, "step": 71820 }, { "epoch": 8.643802647412755, "grad_norm": 172.49139404296875, "learning_rate": 0.00019680392888289297, "loss": 7.3405, "step": 71830 }, { "epoch": 8.645006016847173, "grad_norm": 82.99141693115234, "learning_rate": 0.0001968029747073156, "loss": 7.2138, "step": 71840 }, { "epoch": 8.646209386281589, "grad_norm": 97.69009399414062, "learning_rate": 0.00019680202039164043, "loss": 7.3318, "step": 71850 }, { "epoch": 8.647412755716005, "grad_norm": 138.60333251953125, "learning_rate": 0.00019680106593586885, "loss": 7.1722, "step": 71860 }, { "epoch": 8.648616125150422, "grad_norm": 101.49639892578125, "learning_rate": 0.0001968001113400022, "loss": 7.2632, "step": 71870 }, { "epoch": 8.649819494584838, "grad_norm": 165.77996826171875, "learning_rate": 0.0001967991566040419, "loss": 7.2798, "step": 71880 }, { "epoch": 8.651022864019254, "grad_norm": 126.64981079101562, "learning_rate": 0.00019679820172798933, "loss": 7.2899, "step": 71890 }, { "epoch": 8.65222623345367, "grad_norm": 103.47054290771484, "learning_rate": 0.00019679724671184585, "loss": 7.2584, "step": 71900 }, { "epoch": 8.653429602888087, "grad_norm": 120.01701354980469, "learning_rate": 0.00019679629155561285, "loss": 7.2726, "step": 71910 }, { "epoch": 8.654632972322503, "grad_norm": 50.30412673950195, "learning_rate": 0.00019679533625929175, "loss": 7.4641, "step": 71920 }, { "epoch": 8.655836341756919, "grad_norm": 157.47386169433594, "learning_rate": 0.00019679438082288386, "loss": 7.2505, "step": 71930 }, { "epoch": 8.657039711191336, "grad_norm": 155.23707580566406, "learning_rate": 0.00019679342524639063, "loss": 7.2249, "step": 71940 }, { "epoch": 8.658243080625752, "grad_norm": 224.6519317626953, "learning_rate": 0.0001967924695298134, "loss": 7.3257, "step": 71950 }, { "epoch": 8.659446450060168, "grad_norm": 211.5713653564453, "learning_rate": 0.00019679151367315358, "loss": 7.3759, "step": 71960 }, { "epoch": 8.660649819494585, "grad_norm": 126.58041381835938, "learning_rate": 0.00019679055767641255, "loss": 7.2925, "step": 71970 }, { "epoch": 8.661853188929001, "grad_norm": 227.46240234375, "learning_rate": 0.00019678960153959166, "loss": 7.2896, "step": 71980 }, { "epoch": 8.663056558363417, "grad_norm": 116.68350219726562, "learning_rate": 0.00019678864526269233, "loss": 7.3468, "step": 71990 }, { "epoch": 8.664259927797834, "grad_norm": 219.05282592773438, "learning_rate": 0.00019678768884571595, "loss": 7.2951, "step": 72000 }, { "epoch": 8.66546329723225, "grad_norm": 250.28330993652344, "learning_rate": 0.00019678673228866388, "loss": 7.3772, "step": 72010 }, { "epoch": 8.666666666666666, "grad_norm": 133.87220764160156, "learning_rate": 0.00019678577559153752, "loss": 7.2715, "step": 72020 }, { "epoch": 8.667870036101084, "grad_norm": 175.6766815185547, "learning_rate": 0.00019678481875433823, "loss": 7.3487, "step": 72030 }, { "epoch": 8.6690734055355, "grad_norm": 366.0092468261719, "learning_rate": 0.00019678386177706742, "loss": 7.4083, "step": 72040 }, { "epoch": 8.670276774969915, "grad_norm": 76.88314056396484, "learning_rate": 0.00019678290465972647, "loss": 7.352, "step": 72050 }, { "epoch": 8.671480144404333, "grad_norm": 79.90327453613281, "learning_rate": 0.00019678194740231674, "loss": 7.3391, "step": 72060 }, { "epoch": 8.672683513838749, "grad_norm": 78.71815490722656, "learning_rate": 0.00019678099000483966, "loss": 7.3185, "step": 72070 }, { "epoch": 8.673886883273164, "grad_norm": 309.57965087890625, "learning_rate": 0.0001967800324672966, "loss": 7.2959, "step": 72080 }, { "epoch": 8.675090252707582, "grad_norm": 109.93051147460938, "learning_rate": 0.00019677907478968894, "loss": 7.2478, "step": 72090 }, { "epoch": 8.676293622141998, "grad_norm": 211.07933044433594, "learning_rate": 0.00019677811697201803, "loss": 7.3607, "step": 72100 }, { "epoch": 8.677496991576414, "grad_norm": 356.20892333984375, "learning_rate": 0.00019677715901428533, "loss": 7.3882, "step": 72110 }, { "epoch": 8.678700361010831, "grad_norm": 163.8538818359375, "learning_rate": 0.00019677620091649217, "loss": 7.278, "step": 72120 }, { "epoch": 8.679903730445247, "grad_norm": 541.84228515625, "learning_rate": 0.00019677524267863996, "loss": 7.36, "step": 72130 }, { "epoch": 8.681107099879663, "grad_norm": 260.95477294921875, "learning_rate": 0.00019677428430073005, "loss": 7.2536, "step": 72140 }, { "epoch": 8.68231046931408, "grad_norm": 83.66963195800781, "learning_rate": 0.00019677332578276388, "loss": 7.4829, "step": 72150 }, { "epoch": 8.683513838748496, "grad_norm": 202.00425720214844, "learning_rate": 0.00019677236712474283, "loss": 7.3373, "step": 72160 }, { "epoch": 8.684717208182912, "grad_norm": 401.5888366699219, "learning_rate": 0.00019677140832666826, "loss": 7.3372, "step": 72170 }, { "epoch": 8.685920577617328, "grad_norm": 379.677490234375, "learning_rate": 0.00019677044938854157, "loss": 7.3094, "step": 72180 }, { "epoch": 8.687123947051745, "grad_norm": 384.5784912109375, "learning_rate": 0.00019676949031036415, "loss": 7.369, "step": 72190 }, { "epoch": 8.688327316486161, "grad_norm": 180.51034545898438, "learning_rate": 0.0001967685310921374, "loss": 7.3727, "step": 72200 }, { "epoch": 8.689530685920577, "grad_norm": 93.56525421142578, "learning_rate": 0.00019676757173386265, "loss": 7.3642, "step": 72210 }, { "epoch": 8.690734055354994, "grad_norm": 165.45242309570312, "learning_rate": 0.0001967666122355414, "loss": 7.4359, "step": 72220 }, { "epoch": 8.69193742478941, "grad_norm": 252.12612915039062, "learning_rate": 0.00019676565259717493, "loss": 7.3754, "step": 72230 }, { "epoch": 8.693140794223826, "grad_norm": 267.654541015625, "learning_rate": 0.00019676469281876467, "loss": 7.3834, "step": 72240 }, { "epoch": 8.694344163658243, "grad_norm": 359.25933837890625, "learning_rate": 0.00019676373290031197, "loss": 7.3656, "step": 72250 }, { "epoch": 8.69554753309266, "grad_norm": 960.4390869140625, "learning_rate": 0.00019676277284181833, "loss": 7.5518, "step": 72260 }, { "epoch": 8.696750902527075, "grad_norm": 373.3681640625, "learning_rate": 0.00019676181264328504, "loss": 7.3583, "step": 72270 }, { "epoch": 8.697954271961493, "grad_norm": 796.1593627929688, "learning_rate": 0.0001967608523047135, "loss": 7.467, "step": 72280 }, { "epoch": 8.699157641395908, "grad_norm": 479.3305358886719, "learning_rate": 0.00019675989182610514, "loss": 7.5587, "step": 72290 }, { "epoch": 8.700361010830324, "grad_norm": 585.9391479492188, "learning_rate": 0.00019675893120746132, "loss": 7.4181, "step": 72300 }, { "epoch": 8.701564380264742, "grad_norm": 971.416748046875, "learning_rate": 0.00019675797044878344, "loss": 7.4307, "step": 72310 }, { "epoch": 8.702767749699158, "grad_norm": 298.24786376953125, "learning_rate": 0.00019675700955007288, "loss": 7.416, "step": 72320 }, { "epoch": 8.703971119133573, "grad_norm": 321.9739990234375, "learning_rate": 0.00019675604851133105, "loss": 7.3523, "step": 72330 }, { "epoch": 8.705174488567991, "grad_norm": 280.7763366699219, "learning_rate": 0.00019675508733255933, "loss": 7.4245, "step": 72340 }, { "epoch": 8.706377858002407, "grad_norm": 551.8917236328125, "learning_rate": 0.0001967541260137591, "loss": 7.4216, "step": 72350 }, { "epoch": 8.707581227436823, "grad_norm": 135.53799438476562, "learning_rate": 0.00019675316455493175, "loss": 7.4067, "step": 72360 }, { "epoch": 8.70878459687124, "grad_norm": 256.7081604003906, "learning_rate": 0.0001967522029560787, "loss": 7.4115, "step": 72370 }, { "epoch": 8.709987966305656, "grad_norm": 167.232177734375, "learning_rate": 0.00019675124121720132, "loss": 7.412, "step": 72380 }, { "epoch": 8.711191335740072, "grad_norm": 239.28404235839844, "learning_rate": 0.000196750279338301, "loss": 7.3593, "step": 72390 }, { "epoch": 8.71239470517449, "grad_norm": 119.85040283203125, "learning_rate": 0.00019674931731937916, "loss": 7.4017, "step": 72400 }, { "epoch": 8.713598074608905, "grad_norm": 196.09478759765625, "learning_rate": 0.00019674835516043717, "loss": 7.3264, "step": 72410 }, { "epoch": 8.71480144404332, "grad_norm": 523.9920043945312, "learning_rate": 0.00019674739286147644, "loss": 7.3477, "step": 72420 }, { "epoch": 8.716004813477738, "grad_norm": 475.8111877441406, "learning_rate": 0.0001967464304224983, "loss": 7.4119, "step": 72430 }, { "epoch": 8.717208182912154, "grad_norm": 76.03440856933594, "learning_rate": 0.0001967454678435042, "loss": 7.2692, "step": 72440 }, { "epoch": 8.71841155234657, "grad_norm": 122.36022186279297, "learning_rate": 0.00019674450512449553, "loss": 7.3012, "step": 72450 }, { "epoch": 8.719614921780988, "grad_norm": 89.64482879638672, "learning_rate": 0.0001967435422654737, "loss": 7.2409, "step": 72460 }, { "epoch": 8.720818291215403, "grad_norm": 41.002525329589844, "learning_rate": 0.00019674257926644004, "loss": 7.239, "step": 72470 }, { "epoch": 8.722021660649819, "grad_norm": 114.916259765625, "learning_rate": 0.00019674161612739602, "loss": 7.3411, "step": 72480 }, { "epoch": 8.723225030084237, "grad_norm": 131.14639282226562, "learning_rate": 0.00019674065284834295, "loss": 7.3203, "step": 72490 }, { "epoch": 8.724428399518652, "grad_norm": 40.085140228271484, "learning_rate": 0.00019673968942928233, "loss": 7.2774, "step": 72500 }, { "epoch": 8.725631768953068, "grad_norm": 69.3632583618164, "learning_rate": 0.00019673872587021546, "loss": 7.2698, "step": 72510 }, { "epoch": 8.726835138387486, "grad_norm": 52.196964263916016, "learning_rate": 0.00019673776217114374, "loss": 7.2324, "step": 72520 }, { "epoch": 8.728038507821902, "grad_norm": 99.98628234863281, "learning_rate": 0.00019673679833206865, "loss": 7.3338, "step": 72530 }, { "epoch": 8.729241877256317, "grad_norm": 90.21719360351562, "learning_rate": 0.0001967358343529915, "loss": 7.2763, "step": 72540 }, { "epoch": 8.730445246690735, "grad_norm": 56.11993408203125, "learning_rate": 0.00019673487023391372, "loss": 7.191, "step": 72550 }, { "epoch": 8.73164861612515, "grad_norm": 102.80853271484375, "learning_rate": 0.0001967339059748367, "loss": 7.2479, "step": 72560 }, { "epoch": 8.732851985559567, "grad_norm": 64.7136001586914, "learning_rate": 0.00019673294157576183, "loss": 7.1636, "step": 72570 }, { "epoch": 8.734055354993982, "grad_norm": 85.17577362060547, "learning_rate": 0.00019673197703669052, "loss": 7.2703, "step": 72580 }, { "epoch": 8.7352587244284, "grad_norm": 34.33113098144531, "learning_rate": 0.00019673101235762416, "loss": 7.2237, "step": 72590 }, { "epoch": 8.736462093862816, "grad_norm": 11.690998077392578, "learning_rate": 0.00019673004753856414, "loss": 7.2095, "step": 72600 }, { "epoch": 8.737665463297231, "grad_norm": 35.17302703857422, "learning_rate": 0.00019672908257951184, "loss": 7.1793, "step": 72610 }, { "epoch": 8.738868832731649, "grad_norm": 25.318931579589844, "learning_rate": 0.0001967281174804687, "loss": 7.2641, "step": 72620 }, { "epoch": 8.740072202166065, "grad_norm": 84.61393737792969, "learning_rate": 0.0001967271522414361, "loss": 7.3209, "step": 72630 }, { "epoch": 8.74127557160048, "grad_norm": 69.09017181396484, "learning_rate": 0.0001967261868624154, "loss": 7.2556, "step": 72640 }, { "epoch": 8.742478941034898, "grad_norm": 48.37390899658203, "learning_rate": 0.00019672522134340806, "loss": 7.2947, "step": 72650 }, { "epoch": 8.743682310469314, "grad_norm": 45.822628021240234, "learning_rate": 0.0001967242556844154, "loss": 7.2985, "step": 72660 }, { "epoch": 8.74488567990373, "grad_norm": 247.39044189453125, "learning_rate": 0.0001967232898854389, "loss": 7.2706, "step": 72670 }, { "epoch": 8.746089049338147, "grad_norm": 81.0790023803711, "learning_rate": 0.0001967223239464799, "loss": 7.3618, "step": 72680 }, { "epoch": 8.747292418772563, "grad_norm": 58.9289665222168, "learning_rate": 0.0001967213578675398, "loss": 7.2608, "step": 72690 }, { "epoch": 8.748495788206979, "grad_norm": 72.7082290649414, "learning_rate": 0.00019672039164862003, "loss": 7.1639, "step": 72700 }, { "epoch": 8.749699157641396, "grad_norm": 51.148956298828125, "learning_rate": 0.00019671942528972202, "loss": 7.1739, "step": 72710 }, { "epoch": 8.750902527075812, "grad_norm": 51.62891387939453, "learning_rate": 0.00019671845879084708, "loss": 7.2465, "step": 72720 }, { "epoch": 8.752105896510228, "grad_norm": 109.6996841430664, "learning_rate": 0.00019671749215199665, "loss": 7.2561, "step": 72730 }, { "epoch": 8.753309265944646, "grad_norm": 97.04100036621094, "learning_rate": 0.00019671652537317212, "loss": 7.2702, "step": 72740 }, { "epoch": 8.754512635379061, "grad_norm": 63.299251556396484, "learning_rate": 0.00019671555845437492, "loss": 7.255, "step": 72750 }, { "epoch": 8.755716004813477, "grad_norm": 119.66692352294922, "learning_rate": 0.0001967145913956064, "loss": 7.2322, "step": 72760 }, { "epoch": 8.756919374247895, "grad_norm": 52.720359802246094, "learning_rate": 0.00019671362419686802, "loss": 7.2204, "step": 72770 }, { "epoch": 8.75812274368231, "grad_norm": 166.690185546875, "learning_rate": 0.00019671265685816116, "loss": 7.3432, "step": 72780 }, { "epoch": 8.759326113116726, "grad_norm": 54.056705474853516, "learning_rate": 0.0001967116893794872, "loss": 7.1537, "step": 72790 }, { "epoch": 8.760529482551144, "grad_norm": 35.2452278137207, "learning_rate": 0.00019671072176084752, "loss": 7.2601, "step": 72800 }, { "epoch": 8.76173285198556, "grad_norm": 86.97594451904297, "learning_rate": 0.00019670975400224355, "loss": 7.1536, "step": 72810 }, { "epoch": 8.762936221419976, "grad_norm": 84.7102279663086, "learning_rate": 0.0001967087861036767, "loss": 7.1861, "step": 72820 }, { "epoch": 8.764139590854393, "grad_norm": 84.19380950927734, "learning_rate": 0.00019670781806514838, "loss": 7.3394, "step": 72830 }, { "epoch": 8.765342960288809, "grad_norm": 125.45092010498047, "learning_rate": 0.00019670684988665996, "loss": 7.1695, "step": 72840 }, { "epoch": 8.766546329723225, "grad_norm": 88.8373794555664, "learning_rate": 0.00019670588156821283, "loss": 7.2827, "step": 72850 }, { "epoch": 8.76774969915764, "grad_norm": 135.4306182861328, "learning_rate": 0.00019670491310980843, "loss": 7.2219, "step": 72860 }, { "epoch": 8.768953068592058, "grad_norm": 83.65979766845703, "learning_rate": 0.00019670394451144817, "loss": 7.191, "step": 72870 }, { "epoch": 8.770156438026474, "grad_norm": 116.87435913085938, "learning_rate": 0.0001967029757731334, "loss": 7.273, "step": 72880 }, { "epoch": 8.77135980746089, "grad_norm": 132.07652282714844, "learning_rate": 0.00019670200689486553, "loss": 7.2315, "step": 72890 }, { "epoch": 8.772563176895307, "grad_norm": 102.33387756347656, "learning_rate": 0.00019670103787664602, "loss": 7.3263, "step": 72900 }, { "epoch": 8.773766546329723, "grad_norm": 267.4897155761719, "learning_rate": 0.0001967000687184762, "loss": 7.269, "step": 72910 }, { "epoch": 8.774969915764139, "grad_norm": 113.48148345947266, "learning_rate": 0.00019669909942035752, "loss": 7.2553, "step": 72920 }, { "epoch": 8.776173285198556, "grad_norm": 244.1143798828125, "learning_rate": 0.00019669812998229138, "loss": 7.3231, "step": 72930 }, { "epoch": 8.777376654632972, "grad_norm": 165.69198608398438, "learning_rate": 0.00019669716040427915, "loss": 7.3271, "step": 72940 }, { "epoch": 8.778580024067388, "grad_norm": 140.86488342285156, "learning_rate": 0.00019669619068632228, "loss": 7.1785, "step": 72950 }, { "epoch": 8.779783393501805, "grad_norm": 232.5905303955078, "learning_rate": 0.0001966952208284221, "loss": 7.2789, "step": 72960 }, { "epoch": 8.780986762936221, "grad_norm": 151.40029907226562, "learning_rate": 0.0001966942508305801, "loss": 7.3207, "step": 72970 }, { "epoch": 8.782190132370637, "grad_norm": 425.86578369140625, "learning_rate": 0.00019669328069279765, "loss": 7.3771, "step": 72980 }, { "epoch": 8.783393501805055, "grad_norm": 977.79296875, "learning_rate": 0.00019669231041507615, "loss": 7.3079, "step": 72990 }, { "epoch": 8.78459687123947, "grad_norm": 410.035888671875, "learning_rate": 0.000196691339997417, "loss": 7.3176, "step": 73000 }, { "epoch": 8.785800240673886, "grad_norm": 10397.0966796875, "learning_rate": 0.00019669036943982163, "loss": 7.4211, "step": 73010 }, { "epoch": 8.787003610108304, "grad_norm": 72771.4921875, "learning_rate": 0.0001966893987422914, "loss": 7.6, "step": 73020 }, { "epoch": 8.78820697954272, "grad_norm": 440206.6875, "learning_rate": 0.00019668842790482774, "loss": 7.4552, "step": 73030 }, { "epoch": 8.789410348977135, "grad_norm": 180011.390625, "learning_rate": 0.00019668745692743206, "loss": 7.6077, "step": 73040 }, { "epoch": 8.790613718411553, "grad_norm": 134450.203125, "learning_rate": 0.00019668648581010572, "loss": 7.5985, "step": 73050 }, { "epoch": 8.791817087845969, "grad_norm": 611143.25, "learning_rate": 0.00019668551455285022, "loss": 7.807, "step": 73060 }, { "epoch": 8.793020457280385, "grad_norm": 46172.26953125, "learning_rate": 0.00019668454315566685, "loss": 7.7018, "step": 73070 }, { "epoch": 8.794223826714802, "grad_norm": 94033.984375, "learning_rate": 0.0001966835716185571, "loss": 7.8388, "step": 73080 }, { "epoch": 8.795427196149218, "grad_norm": 73787.5625, "learning_rate": 0.00019668259994152235, "loss": 7.9338, "step": 73090 }, { "epoch": 8.796630565583634, "grad_norm": 77311.5546875, "learning_rate": 0.00019668162812456404, "loss": 7.9655, "step": 73100 }, { "epoch": 8.797833935018051, "grad_norm": 66708.921875, "learning_rate": 0.00019668065616768355, "loss": 8.0024, "step": 73110 }, { "epoch": 8.799037304452467, "grad_norm": 77981.0390625, "learning_rate": 0.00019667968407088222, "loss": 7.9471, "step": 73120 }, { "epoch": 8.800240673886883, "grad_norm": 42229.23828125, "learning_rate": 0.00019667871183416156, "loss": 7.7234, "step": 73130 }, { "epoch": 8.8014440433213, "grad_norm": 74030.171875, "learning_rate": 0.00019667773945752294, "loss": 7.6446, "step": 73140 }, { "epoch": 8.802647412755716, "grad_norm": 42493.125, "learning_rate": 0.00019667676694096775, "loss": 7.5837, "step": 73150 }, { "epoch": 8.803850782190132, "grad_norm": 113942.7265625, "learning_rate": 0.0001966757942844974, "loss": 7.6676, "step": 73160 }, { "epoch": 8.80505415162455, "grad_norm": 301.4753723144531, "learning_rate": 0.0001966748214881133, "loss": 7.4604, "step": 73170 }, { "epoch": 8.806257521058965, "grad_norm": 209.85147094726562, "learning_rate": 0.00019667384855181688, "loss": 7.5237, "step": 73180 }, { "epoch": 8.807460890493381, "grad_norm": 152.94668579101562, "learning_rate": 0.00019667287547560952, "loss": 7.2736, "step": 73190 }, { "epoch": 8.808664259927799, "grad_norm": 568.5696411132812, "learning_rate": 0.0001966719022594927, "loss": 7.504, "step": 73200 }, { "epoch": 8.809867629362214, "grad_norm": 154.757568359375, "learning_rate": 0.0001966709289034677, "loss": 7.3922, "step": 73210 }, { "epoch": 8.81107099879663, "grad_norm": 198.43405151367188, "learning_rate": 0.000196669955407536, "loss": 7.3032, "step": 73220 }, { "epoch": 8.812274368231048, "grad_norm": 1810.9339599609375, "learning_rate": 0.00019666898177169903, "loss": 7.3786, "step": 73230 }, { "epoch": 8.813477737665464, "grad_norm": 4194.4169921875, "learning_rate": 0.0001966680079959582, "loss": 7.5287, "step": 73240 }, { "epoch": 8.81468110709988, "grad_norm": 310.0009460449219, "learning_rate": 0.00019666703408031486, "loss": 7.4477, "step": 73250 }, { "epoch": 8.815884476534297, "grad_norm": 145.40956115722656, "learning_rate": 0.00019666606002477046, "loss": 7.3341, "step": 73260 }, { "epoch": 8.817087845968713, "grad_norm": 63.58095169067383, "learning_rate": 0.00019666508582932642, "loss": 7.411, "step": 73270 }, { "epoch": 8.818291215403129, "grad_norm": 148.66078186035156, "learning_rate": 0.00019666411149398414, "loss": 7.351, "step": 73280 }, { "epoch": 8.819494584837544, "grad_norm": 200.17071533203125, "learning_rate": 0.000196663137018745, "loss": 7.2373, "step": 73290 }, { "epoch": 8.820697954271962, "grad_norm": 148.67649841308594, "learning_rate": 0.00019666216240361045, "loss": 7.2577, "step": 73300 }, { "epoch": 8.821901323706378, "grad_norm": 254.94749450683594, "learning_rate": 0.0001966611876485819, "loss": 7.4477, "step": 73310 }, { "epoch": 8.823104693140793, "grad_norm": 355.9664306640625, "learning_rate": 0.00019666021275366072, "loss": 7.3074, "step": 73320 }, { "epoch": 8.824308062575211, "grad_norm": 1004.6361694335938, "learning_rate": 0.00019665923771884837, "loss": 7.3727, "step": 73330 }, { "epoch": 8.825511432009627, "grad_norm": 292.1346435546875, "learning_rate": 0.0001966582625441462, "loss": 7.2991, "step": 73340 }, { "epoch": 8.826714801444043, "grad_norm": 318.120361328125, "learning_rate": 0.0001966572872295557, "loss": 7.4043, "step": 73350 }, { "epoch": 8.82791817087846, "grad_norm": 274.3759460449219, "learning_rate": 0.00019665631177507824, "loss": 7.2219, "step": 73360 }, { "epoch": 8.829121540312876, "grad_norm": 264.17401123046875, "learning_rate": 0.00019665533618071522, "loss": 7.433, "step": 73370 }, { "epoch": 8.830324909747292, "grad_norm": 420.0724182128906, "learning_rate": 0.00019665436044646805, "loss": 7.291, "step": 73380 }, { "epoch": 8.83152827918171, "grad_norm": 235.51654052734375, "learning_rate": 0.00019665338457233817, "loss": 7.3185, "step": 73390 }, { "epoch": 8.832731648616125, "grad_norm": 459.8696594238281, "learning_rate": 0.000196652408558327, "loss": 7.3206, "step": 73400 }, { "epoch": 8.833935018050541, "grad_norm": 1089.8634033203125, "learning_rate": 0.0001966514324044359, "loss": 7.3873, "step": 73410 }, { "epoch": 8.835138387484958, "grad_norm": 503.56817626953125, "learning_rate": 0.00019665045611066634, "loss": 7.462, "step": 73420 }, { "epoch": 8.836341756919374, "grad_norm": 1028.46923828125, "learning_rate": 0.0001966494796770197, "loss": 7.5007, "step": 73430 }, { "epoch": 8.83754512635379, "grad_norm": 1281.8826904296875, "learning_rate": 0.00019664850310349737, "loss": 7.5903, "step": 73440 }, { "epoch": 8.838748495788208, "grad_norm": 577.5121459960938, "learning_rate": 0.00019664752639010085, "loss": 7.617, "step": 73450 }, { "epoch": 8.839951865222623, "grad_norm": 695.2171020507812, "learning_rate": 0.00019664654953683146, "loss": 7.4535, "step": 73460 }, { "epoch": 8.84115523465704, "grad_norm": 687.0921020507812, "learning_rate": 0.00019664557254369067, "loss": 7.524, "step": 73470 }, { "epoch": 8.842358604091457, "grad_norm": 1461.2998046875, "learning_rate": 0.00019664459541067988, "loss": 7.5276, "step": 73480 }, { "epoch": 8.843561973525873, "grad_norm": 1477.6116943359375, "learning_rate": 0.00019664361813780045, "loss": 7.4721, "step": 73490 }, { "epoch": 8.844765342960288, "grad_norm": 384.4921875, "learning_rate": 0.0001966426407250539, "loss": 7.7023, "step": 73500 }, { "epoch": 8.845968712394706, "grad_norm": 439.4667663574219, "learning_rate": 0.00019664166317244158, "loss": 7.5249, "step": 73510 }, { "epoch": 8.847172081829122, "grad_norm": 439.1540832519531, "learning_rate": 0.0001966406854799649, "loss": 7.5037, "step": 73520 }, { "epoch": 8.848375451263538, "grad_norm": 309.0791015625, "learning_rate": 0.00019663970764762528, "loss": 7.5758, "step": 73530 }, { "epoch": 8.849578820697955, "grad_norm": 356.8844909667969, "learning_rate": 0.00019663872967542415, "loss": 7.4662, "step": 73540 }, { "epoch": 8.85078219013237, "grad_norm": 275.45648193359375, "learning_rate": 0.00019663775156336292, "loss": 7.5084, "step": 73550 }, { "epoch": 8.851985559566787, "grad_norm": 379.85504150390625, "learning_rate": 0.00019663677331144302, "loss": 7.5011, "step": 73560 }, { "epoch": 8.853188929001202, "grad_norm": 300.57647705078125, "learning_rate": 0.00019663579491966584, "loss": 7.45, "step": 73570 }, { "epoch": 8.85439229843562, "grad_norm": 642.899169921875, "learning_rate": 0.0001966348163880328, "loss": 7.5217, "step": 73580 }, { "epoch": 8.855595667870036, "grad_norm": 531.8223876953125, "learning_rate": 0.00019663383771654534, "loss": 7.3338, "step": 73590 }, { "epoch": 8.856799037304452, "grad_norm": 331.4888916015625, "learning_rate": 0.00019663285890520485, "loss": 7.4976, "step": 73600 }, { "epoch": 8.85800240673887, "grad_norm": 1119.4039306640625, "learning_rate": 0.00019663187995401274, "loss": 7.4766, "step": 73610 }, { "epoch": 8.859205776173285, "grad_norm": 741.74267578125, "learning_rate": 0.00019663090086297045, "loss": 7.3044, "step": 73620 }, { "epoch": 8.8604091456077, "grad_norm": 772.233642578125, "learning_rate": 0.00019662992163207941, "loss": 7.3415, "step": 73630 }, { "epoch": 8.861612515042118, "grad_norm": 451.1818542480469, "learning_rate": 0.000196628942261341, "loss": 7.3441, "step": 73640 }, { "epoch": 8.862815884476534, "grad_norm": 143.37416076660156, "learning_rate": 0.00019662796275075667, "loss": 7.2599, "step": 73650 }, { "epoch": 8.86401925391095, "grad_norm": 178.45677185058594, "learning_rate": 0.0001966269831003278, "loss": 7.3787, "step": 73660 }, { "epoch": 8.865222623345367, "grad_norm": 147.97024536132812, "learning_rate": 0.0001966260033100558, "loss": 7.438, "step": 73670 }, { "epoch": 8.866425992779783, "grad_norm": 420.7137756347656, "learning_rate": 0.00019662502337994216, "loss": 7.4596, "step": 73680 }, { "epoch": 8.867629362214199, "grad_norm": 453.4184265136719, "learning_rate": 0.00019662404330998823, "loss": 7.3101, "step": 73690 }, { "epoch": 8.868832731648617, "grad_norm": 302.7304382324219, "learning_rate": 0.00019662306310019547, "loss": 7.2601, "step": 73700 }, { "epoch": 8.870036101083032, "grad_norm": 398.3207092285156, "learning_rate": 0.00019662208275056527, "loss": 7.3412, "step": 73710 }, { "epoch": 8.871239470517448, "grad_norm": 319.54083251953125, "learning_rate": 0.0001966211022610991, "loss": 7.3469, "step": 73720 }, { "epoch": 8.872442839951866, "grad_norm": 407.7167663574219, "learning_rate": 0.0001966201216317983, "loss": 7.3415, "step": 73730 }, { "epoch": 8.873646209386282, "grad_norm": 118.96154022216797, "learning_rate": 0.00019661914086266434, "loss": 7.316, "step": 73740 }, { "epoch": 8.874849578820697, "grad_norm": 111.42121887207031, "learning_rate": 0.00019661815995369864, "loss": 7.2347, "step": 73750 }, { "epoch": 8.876052948255115, "grad_norm": 199.5009307861328, "learning_rate": 0.0001966171789049026, "loss": 7.2554, "step": 73760 }, { "epoch": 8.87725631768953, "grad_norm": 349.4769287109375, "learning_rate": 0.00019661619771627764, "loss": 7.4234, "step": 73770 }, { "epoch": 8.878459687123947, "grad_norm": 535.7664184570312, "learning_rate": 0.0001966152163878252, "loss": 7.3354, "step": 73780 }, { "epoch": 8.879663056558364, "grad_norm": 404.7473449707031, "learning_rate": 0.00019661423491954666, "loss": 7.2791, "step": 73790 }, { "epoch": 8.88086642599278, "grad_norm": 498.8824768066406, "learning_rate": 0.00019661325331144349, "loss": 7.2278, "step": 73800 }, { "epoch": 8.882069795427196, "grad_norm": 1814.0084228515625, "learning_rate": 0.00019661227156351712, "loss": 7.3612, "step": 73810 }, { "epoch": 8.883273164861613, "grad_norm": 280.3747253417969, "learning_rate": 0.0001966112896757689, "loss": 7.2753, "step": 73820 }, { "epoch": 8.884476534296029, "grad_norm": 771.80810546875, "learning_rate": 0.0001966103076482003, "loss": 7.3609, "step": 73830 }, { "epoch": 8.885679903730445, "grad_norm": 454.1147766113281, "learning_rate": 0.00019660932548081275, "loss": 7.3298, "step": 73840 }, { "epoch": 8.886883273164862, "grad_norm": 322.5664367675781, "learning_rate": 0.0001966083431736076, "loss": 7.2804, "step": 73850 }, { "epoch": 8.888086642599278, "grad_norm": 793.1326293945312, "learning_rate": 0.0001966073607265864, "loss": 7.2871, "step": 73860 }, { "epoch": 8.889290012033694, "grad_norm": 383.89630126953125, "learning_rate": 0.00019660637813975044, "loss": 7.2863, "step": 73870 }, { "epoch": 8.890493381468112, "grad_norm": 704.010498046875, "learning_rate": 0.00019660539541310122, "loss": 7.3659, "step": 73880 }, { "epoch": 8.891696750902527, "grad_norm": 416.768798828125, "learning_rate": 0.00019660441254664015, "loss": 7.3856, "step": 73890 }, { "epoch": 8.892900120336943, "grad_norm": 987.7268676757812, "learning_rate": 0.00019660342954036863, "loss": 7.2553, "step": 73900 }, { "epoch": 8.89410348977136, "grad_norm": 310.909423828125, "learning_rate": 0.0001966024463942881, "loss": 7.3154, "step": 73910 }, { "epoch": 8.895306859205776, "grad_norm": 2970.412841796875, "learning_rate": 0.0001966014631084, "loss": 7.3766, "step": 73920 }, { "epoch": 8.896510228640192, "grad_norm": 28545.12890625, "learning_rate": 0.00019660047968270573, "loss": 7.8155, "step": 73930 }, { "epoch": 8.89771359807461, "grad_norm": 26139.6875, "learning_rate": 0.00019659949611720666, "loss": 8.5855, "step": 73940 }, { "epoch": 8.898916967509026, "grad_norm": 15616.185546875, "learning_rate": 0.00019659851241190433, "loss": 8.8785, "step": 73950 }, { "epoch": 8.900120336943441, "grad_norm": 13234.80859375, "learning_rate": 0.0001965975285668001, "loss": 8.0822, "step": 73960 }, { "epoch": 8.901323706377857, "grad_norm": 3385.303955078125, "learning_rate": 0.00019659654458189537, "loss": 7.8559, "step": 73970 }, { "epoch": 8.902527075812275, "grad_norm": 4327.6064453125, "learning_rate": 0.0001965955604571916, "loss": 7.7036, "step": 73980 }, { "epoch": 8.90373044524669, "grad_norm": 4876.15087890625, "learning_rate": 0.0001965945761926902, "loss": 7.644, "step": 73990 }, { "epoch": 8.904933814681106, "grad_norm": 12927.3759765625, "learning_rate": 0.00019659359178839263, "loss": 7.9037, "step": 74000 }, { "epoch": 8.906137184115524, "grad_norm": 18909.4921875, "learning_rate": 0.00019659260724430024, "loss": 7.592, "step": 74010 }, { "epoch": 8.90734055354994, "grad_norm": 3630.268798828125, "learning_rate": 0.00019659162256041452, "loss": 7.5257, "step": 74020 }, { "epoch": 8.908543922984355, "grad_norm": 5463.34375, "learning_rate": 0.0001965906377367369, "loss": 7.4377, "step": 74030 }, { "epoch": 8.909747292418773, "grad_norm": 2743.06494140625, "learning_rate": 0.00019658965277326876, "loss": 7.3535, "step": 74040 }, { "epoch": 8.910950661853189, "grad_norm": 7481.822265625, "learning_rate": 0.00019658866767001155, "loss": 7.4033, "step": 74050 }, { "epoch": 8.912154031287605, "grad_norm": 1642.2529296875, "learning_rate": 0.00019658768242696665, "loss": 7.3684, "step": 74060 }, { "epoch": 8.913357400722022, "grad_norm": 755.1593017578125, "learning_rate": 0.00019658669704413558, "loss": 7.2633, "step": 74070 }, { "epoch": 8.914560770156438, "grad_norm": 4966.22705078125, "learning_rate": 0.00019658571152151967, "loss": 7.3571, "step": 74080 }, { "epoch": 8.915764139590854, "grad_norm": 2784.0341796875, "learning_rate": 0.00019658472585912042, "loss": 7.4542, "step": 74090 }, { "epoch": 8.916967509025271, "grad_norm": 4800.8154296875, "learning_rate": 0.00019658374005693922, "loss": 7.501, "step": 74100 }, { "epoch": 8.918170878459687, "grad_norm": 5794.126953125, "learning_rate": 0.00019658275411497752, "loss": 7.5696, "step": 74110 }, { "epoch": 8.919374247894103, "grad_norm": 7495.03125, "learning_rate": 0.00019658176803323668, "loss": 7.5415, "step": 74120 }, { "epoch": 8.92057761732852, "grad_norm": 5297.14990234375, "learning_rate": 0.00019658078181171822, "loss": 7.5821, "step": 74130 }, { "epoch": 8.921780986762936, "grad_norm": 1676.0108642578125, "learning_rate": 0.00019657979545042352, "loss": 7.5723, "step": 74140 }, { "epoch": 8.922984356197352, "grad_norm": 3404.173828125, "learning_rate": 0.000196578808949354, "loss": 7.4392, "step": 74150 }, { "epoch": 8.92418772563177, "grad_norm": 1335.2464599609375, "learning_rate": 0.00019657782230851108, "loss": 7.6545, "step": 74160 }, { "epoch": 8.925391095066185, "grad_norm": 1651.59033203125, "learning_rate": 0.00019657683552789623, "loss": 7.5228, "step": 74170 }, { "epoch": 8.926594464500601, "grad_norm": 3013.30322265625, "learning_rate": 0.00019657584860751083, "loss": 7.6587, "step": 74180 }, { "epoch": 8.927797833935019, "grad_norm": 798.3281860351562, "learning_rate": 0.00019657486154735638, "loss": 7.6309, "step": 74190 }, { "epoch": 8.929001203369435, "grad_norm": 755.2177734375, "learning_rate": 0.0001965738743474342, "loss": 7.6763, "step": 74200 }, { "epoch": 8.93020457280385, "grad_norm": 1988.45458984375, "learning_rate": 0.00019657288700774585, "loss": 7.568, "step": 74210 }, { "epoch": 8.931407942238268, "grad_norm": 1843.828369140625, "learning_rate": 0.00019657189952829266, "loss": 7.5603, "step": 74220 }, { "epoch": 8.932611311672684, "grad_norm": 2340.12548828125, "learning_rate": 0.00019657091190907608, "loss": 7.6733, "step": 74230 }, { "epoch": 8.9338146811071, "grad_norm": 5211.78955078125, "learning_rate": 0.00019656992415009757, "loss": 7.6235, "step": 74240 }, { "epoch": 8.935018050541515, "grad_norm": 660.459228515625, "learning_rate": 0.0001965689362513585, "loss": 7.546, "step": 74250 }, { "epoch": 8.936221419975933, "grad_norm": 1261.160888671875, "learning_rate": 0.00019656794821286037, "loss": 7.5232, "step": 74260 }, { "epoch": 8.937424789410349, "grad_norm": 425.0375671386719, "learning_rate": 0.00019656696003460456, "loss": 7.4593, "step": 74270 }, { "epoch": 8.938628158844764, "grad_norm": 2253.50537109375, "learning_rate": 0.0001965659717165925, "loss": 7.4774, "step": 74280 }, { "epoch": 8.939831528279182, "grad_norm": 6685.58447265625, "learning_rate": 0.00019656498325882566, "loss": 7.5481, "step": 74290 }, { "epoch": 8.941034897713598, "grad_norm": 18517.52734375, "learning_rate": 0.00019656399466130546, "loss": 7.6617, "step": 74300 }, { "epoch": 8.942238267148014, "grad_norm": 17794.576171875, "learning_rate": 0.0001965630059240333, "loss": 7.5496, "step": 74310 }, { "epoch": 8.943441636582431, "grad_norm": 12111.9296875, "learning_rate": 0.00019656201704701065, "loss": 7.5412, "step": 74320 }, { "epoch": 8.944645006016847, "grad_norm": 63324.16796875, "learning_rate": 0.00019656102803023887, "loss": 7.4649, "step": 74330 }, { "epoch": 8.945848375451263, "grad_norm": 34897.78125, "learning_rate": 0.0001965600388737195, "loss": 7.343, "step": 74340 }, { "epoch": 8.94705174488568, "grad_norm": 58111.921875, "learning_rate": 0.0001965590495774539, "loss": 7.4078, "step": 74350 }, { "epoch": 8.948255114320096, "grad_norm": 124278.6640625, "learning_rate": 0.0001965580601414435, "loss": 7.5357, "step": 74360 }, { "epoch": 8.949458483754512, "grad_norm": 155516.234375, "learning_rate": 0.00019655707056568975, "loss": 7.6066, "step": 74370 }, { "epoch": 8.95066185318893, "grad_norm": 53974.58203125, "learning_rate": 0.0001965560808501941, "loss": 7.4978, "step": 74380 }, { "epoch": 8.951865222623345, "grad_norm": 28184.689453125, "learning_rate": 0.0001965550909949579, "loss": 7.5567, "step": 74390 }, { "epoch": 8.953068592057761, "grad_norm": 91322.3828125, "learning_rate": 0.00019655410099998273, "loss": 7.8956, "step": 74400 }, { "epoch": 8.954271961492179, "grad_norm": 71777.9375, "learning_rate": 0.00019655311086526987, "loss": 7.735, "step": 74410 }, { "epoch": 8.955475330926594, "grad_norm": 234126.375, "learning_rate": 0.00019655212059082087, "loss": 7.511, "step": 74420 }, { "epoch": 8.95667870036101, "grad_norm": 533.6937866210938, "learning_rate": 0.0001965511301766371, "loss": 7.4187, "step": 74430 }, { "epoch": 8.957882069795428, "grad_norm": 752.577880859375, "learning_rate": 0.00019655013962272002, "loss": 7.3956, "step": 74440 }, { "epoch": 8.959085439229844, "grad_norm": 1881.1563720703125, "learning_rate": 0.00019654914892907103, "loss": 7.4691, "step": 74450 }, { "epoch": 8.96028880866426, "grad_norm": 6551.96875, "learning_rate": 0.00019654815809569157, "loss": 7.4811, "step": 74460 }, { "epoch": 8.961492178098677, "grad_norm": 4642.265625, "learning_rate": 0.0001965471671225831, "loss": 7.3208, "step": 74470 }, { "epoch": 8.962695547533093, "grad_norm": 7261.7666015625, "learning_rate": 0.00019654617600974706, "loss": 7.4772, "step": 74480 }, { "epoch": 8.963898916967509, "grad_norm": 62814.953125, "learning_rate": 0.00019654518475718485, "loss": 8.1708, "step": 74490 }, { "epoch": 8.965102286401926, "grad_norm": 5553.8818359375, "learning_rate": 0.0001965441933648979, "loss": 9.1301, "step": 74500 }, { "epoch": 8.966305655836342, "grad_norm": 441.4981384277344, "learning_rate": 0.0001965432018328877, "loss": 7.7538, "step": 74510 }, { "epoch": 8.967509025270758, "grad_norm": 291.3560485839844, "learning_rate": 0.00019654221016115564, "loss": 7.7199, "step": 74520 }, { "epoch": 8.968712394705175, "grad_norm": 1062.9539794921875, "learning_rate": 0.00019654121834970315, "loss": 7.7126, "step": 74530 }, { "epoch": 8.969915764139591, "grad_norm": 1585.7899169921875, "learning_rate": 0.00019654022639853174, "loss": 7.4453, "step": 74540 }, { "epoch": 8.971119133574007, "grad_norm": 267.0061340332031, "learning_rate": 0.00019653923430764273, "loss": 7.4073, "step": 74550 }, { "epoch": 8.972322503008424, "grad_norm": 15941.5517578125, "learning_rate": 0.00019653824207703765, "loss": 7.4764, "step": 74560 }, { "epoch": 8.97352587244284, "grad_norm": 35514.24609375, "learning_rate": 0.00019653724970671786, "loss": 7.6851, "step": 74570 }, { "epoch": 8.974729241877256, "grad_norm": 55953.18359375, "learning_rate": 0.00019653625719668485, "loss": 7.544, "step": 74580 }, { "epoch": 8.975932611311674, "grad_norm": 61923.56640625, "learning_rate": 0.00019653526454694003, "loss": 7.8099, "step": 74590 }, { "epoch": 8.97713598074609, "grad_norm": 490680.71875, "learning_rate": 0.0001965342717574849, "loss": 7.7647, "step": 74600 }, { "epoch": 8.978339350180505, "grad_norm": 1914434.125, "learning_rate": 0.0001965332788283208, "loss": 8.1098, "step": 74610 }, { "epoch": 8.979542719614923, "grad_norm": 1534326.875, "learning_rate": 0.00019653228575944922, "loss": 8.1381, "step": 74620 }, { "epoch": 8.980746089049338, "grad_norm": 744071.5, "learning_rate": 0.00019653129255087158, "loss": 7.9991, "step": 74630 }, { "epoch": 8.981949458483754, "grad_norm": 1192951.5, "learning_rate": 0.00019653029920258934, "loss": 7.9607, "step": 74640 }, { "epoch": 8.98315282791817, "grad_norm": 206944.8125, "learning_rate": 0.00019652930571460394, "loss": 7.5681, "step": 74650 }, { "epoch": 8.984356197352588, "grad_norm": 389405.21875, "learning_rate": 0.0001965283120869168, "loss": 7.5574, "step": 74660 }, { "epoch": 8.985559566787003, "grad_norm": 28191.26953125, "learning_rate": 0.00019652731831952934, "loss": 7.4444, "step": 74670 }, { "epoch": 8.98676293622142, "grad_norm": 530025.0625, "learning_rate": 0.00019652632441244302, "loss": 7.6509, "step": 74680 }, { "epoch": 8.987966305655837, "grad_norm": 1986.7574462890625, "learning_rate": 0.00019652533036565927, "loss": 7.7447, "step": 74690 }, { "epoch": 8.989169675090253, "grad_norm": 773.9241943359375, "learning_rate": 0.00019652433617917952, "loss": 7.6749, "step": 74700 }, { "epoch": 8.990373044524668, "grad_norm": 1008.8065185546875, "learning_rate": 0.00019652334185300528, "loss": 7.5409, "step": 74710 }, { "epoch": 8.991576413959086, "grad_norm": 652.8068237304688, "learning_rate": 0.0001965223473871379, "loss": 7.4327, "step": 74720 }, { "epoch": 8.992779783393502, "grad_norm": 426.9729309082031, "learning_rate": 0.00019652135278157886, "loss": 7.4154, "step": 74730 }, { "epoch": 8.993983152827917, "grad_norm": 1204.4501953125, "learning_rate": 0.0001965203580363296, "loss": 7.2929, "step": 74740 }, { "epoch": 8.995186522262335, "grad_norm": 493.05084228515625, "learning_rate": 0.00019651936315139153, "loss": 7.361, "step": 74750 }, { "epoch": 8.99638989169675, "grad_norm": 527.6356201171875, "learning_rate": 0.00019651836812676612, "loss": 7.3862, "step": 74760 }, { "epoch": 8.997593261131167, "grad_norm": 1537.05517578125, "learning_rate": 0.0001965173729624548, "loss": 7.3668, "step": 74770 }, { "epoch": 8.998796630565584, "grad_norm": 255.41732788085938, "learning_rate": 0.00019651637765845902, "loss": 7.5919, "step": 74780 }, { "epoch": 9.0, "grad_norm": 1466.275390625, "learning_rate": 0.00019651538221478022, "loss": 7.5058, "step": 74790 }, { "epoch": 9.0, "eval_loss": 7.465070724487305, "eval_runtime": 119.5011, "eval_samples_per_second": 61.815, "eval_steps_per_second": 7.732, "step": 74790 }, { "epoch": 9.001203369434416, "grad_norm": 597.3857421875, "learning_rate": 0.00019651438663141981, "loss": 7.3772, "step": 74800 }, { "epoch": 9.002406738868833, "grad_norm": 1271.1337890625, "learning_rate": 0.00019651339090837928, "loss": 7.4259, "step": 74810 }, { "epoch": 9.00361010830325, "grad_norm": 827.50537109375, "learning_rate": 0.00019651239504566003, "loss": 7.4705, "step": 74820 }, { "epoch": 9.004813477737665, "grad_norm": 242.7447967529297, "learning_rate": 0.0001965113990432635, "loss": 7.4658, "step": 74830 }, { "epoch": 9.006016847172083, "grad_norm": 425.5442199707031, "learning_rate": 0.00019651040290119117, "loss": 7.5238, "step": 74840 }, { "epoch": 9.007220216606498, "grad_norm": 3862.75830078125, "learning_rate": 0.00019650940661944448, "loss": 7.2356, "step": 74850 }, { "epoch": 9.008423586040914, "grad_norm": 6282.27685546875, "learning_rate": 0.00019650841019802482, "loss": 7.3926, "step": 74860 }, { "epoch": 9.009626955475332, "grad_norm": 2767.276611328125, "learning_rate": 0.00019650741363693369, "loss": 7.5318, "step": 74870 }, { "epoch": 9.010830324909747, "grad_norm": 1277.268310546875, "learning_rate": 0.00019650641693617246, "loss": 7.477, "step": 74880 }, { "epoch": 9.012033694344163, "grad_norm": 10262.96875, "learning_rate": 0.00019650542009574265, "loss": 7.6104, "step": 74890 }, { "epoch": 9.01323706377858, "grad_norm": 5794.29736328125, "learning_rate": 0.00019650442311564564, "loss": 7.5953, "step": 74900 }, { "epoch": 9.014440433212997, "grad_norm": 3976.21826171875, "learning_rate": 0.00019650342599588294, "loss": 7.5964, "step": 74910 }, { "epoch": 9.015643802647412, "grad_norm": 4755.27099609375, "learning_rate": 0.00019650242873645593, "loss": 7.57, "step": 74920 }, { "epoch": 9.01684717208183, "grad_norm": 5756.71533203125, "learning_rate": 0.00019650143133736613, "loss": 7.8332, "step": 74930 }, { "epoch": 9.018050541516246, "grad_norm": 3540.826416015625, "learning_rate": 0.0001965004337986149, "loss": 7.6195, "step": 74940 }, { "epoch": 9.019253910950662, "grad_norm": 1619.6551513671875, "learning_rate": 0.0001964994361202037, "loss": 7.4998, "step": 74950 }, { "epoch": 9.020457280385079, "grad_norm": 1862.5577392578125, "learning_rate": 0.00019649843830213398, "loss": 7.393, "step": 74960 }, { "epoch": 9.021660649819495, "grad_norm": 2073.433349609375, "learning_rate": 0.00019649744034440722, "loss": 7.3996, "step": 74970 }, { "epoch": 9.02286401925391, "grad_norm": 3002.72265625, "learning_rate": 0.00019649644224702486, "loss": 7.4384, "step": 74980 }, { "epoch": 9.024067388688326, "grad_norm": 2637.7998046875, "learning_rate": 0.0001964954440099883, "loss": 7.4376, "step": 74990 }, { "epoch": 9.025270758122744, "grad_norm": 2617.635498046875, "learning_rate": 0.00019649444563329903, "loss": 7.366, "step": 75000 }, { "epoch": 9.02647412755716, "grad_norm": 3600.6943359375, "learning_rate": 0.00019649344711695842, "loss": 7.3583, "step": 75010 }, { "epoch": 9.027677496991576, "grad_norm": 3394.476806640625, "learning_rate": 0.00019649244846096803, "loss": 7.3795, "step": 75020 }, { "epoch": 9.028880866425993, "grad_norm": 3939.307373046875, "learning_rate": 0.00019649144966532922, "loss": 7.477, "step": 75030 }, { "epoch": 9.030084235860409, "grad_norm": 6318.85693359375, "learning_rate": 0.00019649045073004344, "loss": 7.5165, "step": 75040 }, { "epoch": 9.031287605294825, "grad_norm": 3790.567626953125, "learning_rate": 0.00019648945165511218, "loss": 7.6241, "step": 75050 }, { "epoch": 9.032490974729242, "grad_norm": 2998.84228515625, "learning_rate": 0.00019648845244053687, "loss": 7.6216, "step": 75060 }, { "epoch": 9.033694344163658, "grad_norm": 4382.93505859375, "learning_rate": 0.00019648745308631892, "loss": 7.6728, "step": 75070 }, { "epoch": 9.034897713598074, "grad_norm": 3263.46142578125, "learning_rate": 0.00019648645359245982, "loss": 7.7167, "step": 75080 }, { "epoch": 9.036101083032491, "grad_norm": 4775.517578125, "learning_rate": 0.00019648545395896098, "loss": 7.7483, "step": 75090 }, { "epoch": 9.037304452466907, "grad_norm": 2728.435302734375, "learning_rate": 0.00019648445418582388, "loss": 7.6134, "step": 75100 }, { "epoch": 9.038507821901323, "grad_norm": 8050.763671875, "learning_rate": 0.00019648345427304996, "loss": 7.5364, "step": 75110 }, { "epoch": 9.03971119133574, "grad_norm": 5224.39111328125, "learning_rate": 0.00019648245422064067, "loss": 7.7044, "step": 75120 }, { "epoch": 9.040914560770156, "grad_norm": 5393.15966796875, "learning_rate": 0.0001964814540285974, "loss": 7.6266, "step": 75130 }, { "epoch": 9.042117930204572, "grad_norm": 4150.26416015625, "learning_rate": 0.0001964804536969217, "loss": 7.6594, "step": 75140 }, { "epoch": 9.04332129963899, "grad_norm": 3149.526611328125, "learning_rate": 0.00019647945322561494, "loss": 7.6063, "step": 75150 }, { "epoch": 9.044524669073406, "grad_norm": 2388.351318359375, "learning_rate": 0.00019647845261467858, "loss": 7.5508, "step": 75160 }, { "epoch": 9.045728038507821, "grad_norm": 4836.52001953125, "learning_rate": 0.0001964774518641141, "loss": 7.5867, "step": 75170 }, { "epoch": 9.046931407942239, "grad_norm": 1788.6094970703125, "learning_rate": 0.00019647645097392293, "loss": 7.5901, "step": 75180 }, { "epoch": 9.048134777376655, "grad_norm": 7338.75634765625, "learning_rate": 0.0001964754499441065, "loss": 7.5614, "step": 75190 }, { "epoch": 9.04933814681107, "grad_norm": 3098.3955078125, "learning_rate": 0.00019647444877466628, "loss": 7.495, "step": 75200 }, { "epoch": 9.050541516245488, "grad_norm": 2653.363037109375, "learning_rate": 0.00019647344746560373, "loss": 7.6479, "step": 75210 }, { "epoch": 9.051744885679904, "grad_norm": 6382.98974609375, "learning_rate": 0.00019647244601692025, "loss": 7.5038, "step": 75220 }, { "epoch": 9.05294825511432, "grad_norm": 2274.214599609375, "learning_rate": 0.00019647144442861734, "loss": 7.5092, "step": 75230 }, { "epoch": 9.054151624548737, "grad_norm": 5597.10009765625, "learning_rate": 0.00019647044270069644, "loss": 7.5237, "step": 75240 }, { "epoch": 9.055354993983153, "grad_norm": 8310.5517578125, "learning_rate": 0.00019646944083315896, "loss": 7.572, "step": 75250 }, { "epoch": 9.056558363417569, "grad_norm": 2285.239501953125, "learning_rate": 0.00019646843882600642, "loss": 7.5238, "step": 75260 }, { "epoch": 9.057761732851986, "grad_norm": 9253.587890625, "learning_rate": 0.00019646743667924021, "loss": 7.5775, "step": 75270 }, { "epoch": 9.058965102286402, "grad_norm": 2495.662109375, "learning_rate": 0.0001964664343928618, "loss": 7.4708, "step": 75280 }, { "epoch": 9.060168471720818, "grad_norm": 3574.8486328125, "learning_rate": 0.00019646543196687267, "loss": 7.4904, "step": 75290 }, { "epoch": 9.061371841155236, "grad_norm": 3205.84912109375, "learning_rate": 0.0001964644294012742, "loss": 7.4509, "step": 75300 }, { "epoch": 9.062575210589651, "grad_norm": 4271.21533203125, "learning_rate": 0.00019646342669606793, "loss": 7.4857, "step": 75310 }, { "epoch": 9.063778580024067, "grad_norm": 2383.53515625, "learning_rate": 0.00019646242385125523, "loss": 7.2864, "step": 75320 }, { "epoch": 9.064981949458483, "grad_norm": 1065.3895263671875, "learning_rate": 0.0001964614208668376, "loss": 7.3884, "step": 75330 }, { "epoch": 9.0661853188929, "grad_norm": 2439.354736328125, "learning_rate": 0.0001964604177428165, "loss": 7.5093, "step": 75340 }, { "epoch": 9.067388688327316, "grad_norm": 1850.3184814453125, "learning_rate": 0.00019645941447919333, "loss": 7.3219, "step": 75350 }, { "epoch": 9.068592057761732, "grad_norm": 1488.564697265625, "learning_rate": 0.0001964584110759696, "loss": 7.3398, "step": 75360 }, { "epoch": 9.06979542719615, "grad_norm": 3440.6279296875, "learning_rate": 0.00019645740753314672, "loss": 7.4433, "step": 75370 }, { "epoch": 9.070998796630565, "grad_norm": 2456.555419921875, "learning_rate": 0.00019645640385072613, "loss": 7.473, "step": 75380 }, { "epoch": 9.072202166064981, "grad_norm": 11485.5634765625, "learning_rate": 0.00019645540002870935, "loss": 7.4575, "step": 75390 }, { "epoch": 9.073405535499399, "grad_norm": 4019.61669921875, "learning_rate": 0.00019645439606709778, "loss": 7.3776, "step": 75400 }, { "epoch": 9.074608904933815, "grad_norm": 2951.8466796875, "learning_rate": 0.00019645339196589285, "loss": 7.4738, "step": 75410 }, { "epoch": 9.07581227436823, "grad_norm": 2534.1728515625, "learning_rate": 0.0001964523877250961, "loss": 7.5318, "step": 75420 }, { "epoch": 9.077015643802648, "grad_norm": 5971.91552734375, "learning_rate": 0.0001964513833447089, "loss": 7.6485, "step": 75430 }, { "epoch": 9.078219013237064, "grad_norm": 7832.55810546875, "learning_rate": 0.00019645037882473276, "loss": 7.6937, "step": 75440 }, { "epoch": 9.07942238267148, "grad_norm": 2537.312744140625, "learning_rate": 0.00019644937416516908, "loss": 7.4627, "step": 75450 }, { "epoch": 9.080625752105897, "grad_norm": 2070.791015625, "learning_rate": 0.00019644836936601935, "loss": 7.5414, "step": 75460 }, { "epoch": 9.081829121540313, "grad_norm": 5187.1982421875, "learning_rate": 0.00019644736442728505, "loss": 7.4998, "step": 75470 }, { "epoch": 9.083032490974729, "grad_norm": 2618.777587890625, "learning_rate": 0.00019644635934896757, "loss": 7.5091, "step": 75480 }, { "epoch": 9.084235860409146, "grad_norm": 3503.40966796875, "learning_rate": 0.0001964453541310684, "loss": 7.5392, "step": 75490 }, { "epoch": 9.085439229843562, "grad_norm": 2291.1005859375, "learning_rate": 0.000196444348773589, "loss": 7.4334, "step": 75500 }, { "epoch": 9.086642599277978, "grad_norm": 4875.828125, "learning_rate": 0.0001964433432765308, "loss": 7.3775, "step": 75510 }, { "epoch": 9.087845968712395, "grad_norm": 4910.685546875, "learning_rate": 0.00019644233763989528, "loss": 7.3955, "step": 75520 }, { "epoch": 9.089049338146811, "grad_norm": 1899.69091796875, "learning_rate": 0.00019644133186368388, "loss": 7.2959, "step": 75530 }, { "epoch": 9.090252707581227, "grad_norm": 5022.66796875, "learning_rate": 0.00019644032594789808, "loss": 7.3931, "step": 75540 }, { "epoch": 9.091456077015645, "grad_norm": 2716.967529296875, "learning_rate": 0.0001964393198925393, "loss": 7.4728, "step": 75550 }, { "epoch": 9.09265944645006, "grad_norm": 4830.9619140625, "learning_rate": 0.00019643831369760905, "loss": 7.5441, "step": 75560 }, { "epoch": 9.093862815884476, "grad_norm": 3634.146240234375, "learning_rate": 0.00019643730736310867, "loss": 7.4721, "step": 75570 }, { "epoch": 9.095066185318894, "grad_norm": 4341.45166015625, "learning_rate": 0.00019643630088903976, "loss": 7.5323, "step": 75580 }, { "epoch": 9.09626955475331, "grad_norm": 2593.2265625, "learning_rate": 0.0001964352942754037, "loss": 7.4874, "step": 75590 }, { "epoch": 9.097472924187725, "grad_norm": 2320.23876953125, "learning_rate": 0.00019643428752220195, "loss": 7.3997, "step": 75600 }, { "epoch": 9.098676293622143, "grad_norm": 2152.728271484375, "learning_rate": 0.000196433280629436, "loss": 7.3698, "step": 75610 }, { "epoch": 9.099879663056559, "grad_norm": 2708.65625, "learning_rate": 0.00019643227359710728, "loss": 7.4336, "step": 75620 }, { "epoch": 9.101083032490974, "grad_norm": 2264.180908203125, "learning_rate": 0.00019643126642521726, "loss": 7.4608, "step": 75630 }, { "epoch": 9.102286401925392, "grad_norm": 2932.721435546875, "learning_rate": 0.00019643025911376735, "loss": 7.4104, "step": 75640 }, { "epoch": 9.103489771359808, "grad_norm": 3209.4423828125, "learning_rate": 0.0001964292516627591, "loss": 7.354, "step": 75650 }, { "epoch": 9.104693140794224, "grad_norm": 1590.328857421875, "learning_rate": 0.00019642824407219386, "loss": 7.3862, "step": 75660 }, { "epoch": 9.10589651022864, "grad_norm": 4043.4326171875, "learning_rate": 0.00019642723634207319, "loss": 7.3696, "step": 75670 }, { "epoch": 9.107099879663057, "grad_norm": 1901.3751220703125, "learning_rate": 0.00019642622847239848, "loss": 7.3036, "step": 75680 }, { "epoch": 9.108303249097473, "grad_norm": 2234.895751953125, "learning_rate": 0.0001964252204631712, "loss": 7.3256, "step": 75690 }, { "epoch": 9.109506618531888, "grad_norm": 1258.59130859375, "learning_rate": 0.00019642421231439285, "loss": 7.2707, "step": 75700 }, { "epoch": 9.110709987966306, "grad_norm": 1214.1578369140625, "learning_rate": 0.00019642320402606483, "loss": 7.291, "step": 75710 }, { "epoch": 9.111913357400722, "grad_norm": 1541.5499267578125, "learning_rate": 0.00019642219559818863, "loss": 7.2909, "step": 75720 }, { "epoch": 9.113116726835138, "grad_norm": 5279.283203125, "learning_rate": 0.00019642118703076572, "loss": 7.3405, "step": 75730 }, { "epoch": 9.114320096269555, "grad_norm": 2948.894287109375, "learning_rate": 0.00019642017832379756, "loss": 7.3548, "step": 75740 }, { "epoch": 9.115523465703971, "grad_norm": 1446.7877197265625, "learning_rate": 0.00019641916947728556, "loss": 7.2449, "step": 75750 }, { "epoch": 9.116726835138387, "grad_norm": 3335.70849609375, "learning_rate": 0.00019641816049123124, "loss": 7.2542, "step": 75760 }, { "epoch": 9.117930204572804, "grad_norm": 2856.826904296875, "learning_rate": 0.000196417151365636, "loss": 7.3918, "step": 75770 }, { "epoch": 9.11913357400722, "grad_norm": 830.1557006835938, "learning_rate": 0.00019641614210050138, "loss": 7.2572, "step": 75780 }, { "epoch": 9.120336943441636, "grad_norm": 3191.406982421875, "learning_rate": 0.0001964151326958288, "loss": 7.4104, "step": 75790 }, { "epoch": 9.121540312876053, "grad_norm": 2908.66015625, "learning_rate": 0.00019641412315161971, "loss": 7.3513, "step": 75800 }, { "epoch": 9.12274368231047, "grad_norm": 727.5166015625, "learning_rate": 0.00019641311346787558, "loss": 7.3175, "step": 75810 }, { "epoch": 9.123947051744885, "grad_norm": 2046.9010009765625, "learning_rate": 0.00019641210364459784, "loss": 7.2328, "step": 75820 }, { "epoch": 9.125150421179303, "grad_norm": 1243.1632080078125, "learning_rate": 0.00019641109368178804, "loss": 7.33, "step": 75830 }, { "epoch": 9.126353790613718, "grad_norm": 1148.9654541015625, "learning_rate": 0.00019641008357944753, "loss": 7.3078, "step": 75840 }, { "epoch": 9.127557160048134, "grad_norm": 969.3536987304688, "learning_rate": 0.00019640907333757785, "loss": 7.331, "step": 75850 }, { "epoch": 9.128760529482552, "grad_norm": 1206.808837890625, "learning_rate": 0.00019640806295618046, "loss": 7.3609, "step": 75860 }, { "epoch": 9.129963898916968, "grad_norm": 1089.9910888671875, "learning_rate": 0.00019640705243525677, "loss": 7.2742, "step": 75870 }, { "epoch": 9.131167268351383, "grad_norm": 1392.9652099609375, "learning_rate": 0.0001964060417748083, "loss": 7.3062, "step": 75880 }, { "epoch": 9.132370637785801, "grad_norm": 2660.32568359375, "learning_rate": 0.00019640503097483644, "loss": 7.4094, "step": 75890 }, { "epoch": 9.133574007220217, "grad_norm": 2828.388916015625, "learning_rate": 0.00019640402003534275, "loss": 7.2343, "step": 75900 }, { "epoch": 9.134777376654633, "grad_norm": 798.2307739257812, "learning_rate": 0.0001964030089563286, "loss": 7.333, "step": 75910 }, { "epoch": 9.13598074608905, "grad_norm": 2811.5615234375, "learning_rate": 0.0001964019977377955, "loss": 7.3057, "step": 75920 }, { "epoch": 9.137184115523466, "grad_norm": 1283.5013427734375, "learning_rate": 0.00019640098637974495, "loss": 7.2729, "step": 75930 }, { "epoch": 9.138387484957882, "grad_norm": 602.6644287109375, "learning_rate": 0.0001963999748821783, "loss": 7.2362, "step": 75940 }, { "epoch": 9.1395908543923, "grad_norm": 846.1353149414062, "learning_rate": 0.00019639896324509716, "loss": 7.2874, "step": 75950 }, { "epoch": 9.140794223826715, "grad_norm": 719.5494995117188, "learning_rate": 0.0001963979514685029, "loss": 7.3526, "step": 75960 }, { "epoch": 9.14199759326113, "grad_norm": 882.4271240234375, "learning_rate": 0.00019639693955239698, "loss": 7.2699, "step": 75970 }, { "epoch": 9.143200962695548, "grad_norm": 416.4239196777344, "learning_rate": 0.0001963959274967809, "loss": 7.3739, "step": 75980 }, { "epoch": 9.144404332129964, "grad_norm": 3346.033935546875, "learning_rate": 0.00019639491530165613, "loss": 7.2879, "step": 75990 }, { "epoch": 9.14560770156438, "grad_norm": 1748.701171875, "learning_rate": 0.0001963939029670241, "loss": 7.2639, "step": 76000 }, { "epoch": 9.146811070998796, "grad_norm": 967.0650634765625, "learning_rate": 0.00019639289049288628, "loss": 7.3475, "step": 76010 }, { "epoch": 9.148014440433213, "grad_norm": 925.6656494140625, "learning_rate": 0.00019639187787924418, "loss": 7.3854, "step": 76020 }, { "epoch": 9.14921780986763, "grad_norm": 1009.5439453125, "learning_rate": 0.00019639086512609924, "loss": 7.214, "step": 76030 }, { "epoch": 9.150421179302045, "grad_norm": 719.2240600585938, "learning_rate": 0.0001963898522334529, "loss": 7.4178, "step": 76040 }, { "epoch": 9.151624548736462, "grad_norm": 1386.6724853515625, "learning_rate": 0.00019638883920130665, "loss": 7.2835, "step": 76050 }, { "epoch": 9.152827918170878, "grad_norm": 523.4008178710938, "learning_rate": 0.00019638782602966192, "loss": 7.2417, "step": 76060 }, { "epoch": 9.154031287605294, "grad_norm": 1144.1290283203125, "learning_rate": 0.00019638681271852025, "loss": 7.1999, "step": 76070 }, { "epoch": 9.155234657039712, "grad_norm": 1325.0841064453125, "learning_rate": 0.00019638579926788306, "loss": 7.2872, "step": 76080 }, { "epoch": 9.156438026474127, "grad_norm": 2343.41845703125, "learning_rate": 0.0001963847856777518, "loss": 7.2956, "step": 76090 }, { "epoch": 9.157641395908543, "grad_norm": 606.81494140625, "learning_rate": 0.00019638377194812799, "loss": 7.438, "step": 76100 }, { "epoch": 9.15884476534296, "grad_norm": 415.5561828613281, "learning_rate": 0.00019638275807901302, "loss": 7.2043, "step": 76110 }, { "epoch": 9.160048134777377, "grad_norm": 832.2674560546875, "learning_rate": 0.00019638174407040845, "loss": 7.2921, "step": 76120 }, { "epoch": 9.161251504211792, "grad_norm": 376.2272644042969, "learning_rate": 0.00019638072992231566, "loss": 7.2569, "step": 76130 }, { "epoch": 9.16245487364621, "grad_norm": 1342.66357421875, "learning_rate": 0.00019637971563473617, "loss": 7.2735, "step": 76140 }, { "epoch": 9.163658243080626, "grad_norm": 2213.8486328125, "learning_rate": 0.00019637870120767147, "loss": 7.2528, "step": 76150 }, { "epoch": 9.164861612515042, "grad_norm": 456.4389343261719, "learning_rate": 0.00019637768664112297, "loss": 7.2569, "step": 76160 }, { "epoch": 9.166064981949459, "grad_norm": 1014.8824462890625, "learning_rate": 0.00019637667193509218, "loss": 7.2242, "step": 76170 }, { "epoch": 9.167268351383875, "grad_norm": 1248.652587890625, "learning_rate": 0.0001963756570895805, "loss": 7.3643, "step": 76180 }, { "epoch": 9.16847172081829, "grad_norm": 844.365478515625, "learning_rate": 0.00019637464210458952, "loss": 7.2705, "step": 76190 }, { "epoch": 9.169675090252708, "grad_norm": 3494.603515625, "learning_rate": 0.00019637362698012058, "loss": 7.1682, "step": 76200 }, { "epoch": 9.170878459687124, "grad_norm": 851.4253540039062, "learning_rate": 0.00019637261171617521, "loss": 7.2478, "step": 76210 }, { "epoch": 9.17208182912154, "grad_norm": 1071.74169921875, "learning_rate": 0.0001963715963127549, "loss": 7.3127, "step": 76220 }, { "epoch": 9.173285198555957, "grad_norm": 780.7906494140625, "learning_rate": 0.00019637058076986112, "loss": 7.1742, "step": 76230 }, { "epoch": 9.174488567990373, "grad_norm": 2235.20556640625, "learning_rate": 0.00019636956508749527, "loss": 7.2388, "step": 76240 }, { "epoch": 9.175691937424789, "grad_norm": 1000.28662109375, "learning_rate": 0.00019636854926565888, "loss": 7.2958, "step": 76250 }, { "epoch": 9.176895306859207, "grad_norm": 591.0308837890625, "learning_rate": 0.00019636753330435345, "loss": 7.223, "step": 76260 }, { "epoch": 9.178098676293622, "grad_norm": 830.2313232421875, "learning_rate": 0.00019636651720358035, "loss": 7.3221, "step": 76270 }, { "epoch": 9.179302045728038, "grad_norm": 445.8819580078125, "learning_rate": 0.00019636550096334112, "loss": 7.2662, "step": 76280 }, { "epoch": 9.180505415162456, "grad_norm": 994.1788330078125, "learning_rate": 0.00019636448458363722, "loss": 7.2521, "step": 76290 }, { "epoch": 9.181708784596871, "grad_norm": 809.820068359375, "learning_rate": 0.00019636346806447015, "loss": 7.3052, "step": 76300 }, { "epoch": 9.182912154031287, "grad_norm": 587.2872314453125, "learning_rate": 0.0001963624514058413, "loss": 7.2471, "step": 76310 }, { "epoch": 9.184115523465705, "grad_norm": 1231.2158203125, "learning_rate": 0.00019636143460775222, "loss": 7.2079, "step": 76320 }, { "epoch": 9.18531889290012, "grad_norm": 1350.02978515625, "learning_rate": 0.00019636041767020435, "loss": 7.1862, "step": 76330 }, { "epoch": 9.186522262334536, "grad_norm": 2103.52294921875, "learning_rate": 0.00019635940059319916, "loss": 7.319, "step": 76340 }, { "epoch": 9.187725631768952, "grad_norm": 2919.232666015625, "learning_rate": 0.00019635838337673814, "loss": 7.0929, "step": 76350 }, { "epoch": 9.18892900120337, "grad_norm": 1127.0545654296875, "learning_rate": 0.00019635736602082276, "loss": 7.2528, "step": 76360 }, { "epoch": 9.190132370637786, "grad_norm": 3849.189208984375, "learning_rate": 0.00019635634852545447, "loss": 7.2087, "step": 76370 }, { "epoch": 9.191335740072201, "grad_norm": 2272.21728515625, "learning_rate": 0.00019635533089063474, "loss": 7.2832, "step": 76380 }, { "epoch": 9.192539109506619, "grad_norm": 4344.23828125, "learning_rate": 0.00019635431311636504, "loss": 7.3083, "step": 76390 }, { "epoch": 9.193742478941035, "grad_norm": 2692.489013671875, "learning_rate": 0.0001963532952026469, "loss": 7.2261, "step": 76400 }, { "epoch": 9.19494584837545, "grad_norm": 2531.884033203125, "learning_rate": 0.00019635227714948173, "loss": 7.3983, "step": 76410 }, { "epoch": 9.196149217809868, "grad_norm": 3592.314453125, "learning_rate": 0.00019635125895687105, "loss": 7.2888, "step": 76420 }, { "epoch": 9.197352587244284, "grad_norm": 2629.387451171875, "learning_rate": 0.00019635024062481627, "loss": 7.2016, "step": 76430 }, { "epoch": 9.1985559566787, "grad_norm": 1859.731689453125, "learning_rate": 0.00019634922215331892, "loss": 7.3499, "step": 76440 }, { "epoch": 9.199759326113117, "grad_norm": 2324.42138671875, "learning_rate": 0.00019634820354238048, "loss": 7.2116, "step": 76450 }, { "epoch": 9.200962695547533, "grad_norm": 2354.31103515625, "learning_rate": 0.00019634718479200237, "loss": 7.2484, "step": 76460 }, { "epoch": 9.202166064981949, "grad_norm": 2572.722900390625, "learning_rate": 0.00019634616590218613, "loss": 7.2555, "step": 76470 }, { "epoch": 9.203369434416366, "grad_norm": 1928.0738525390625, "learning_rate": 0.0001963451468729332, "loss": 7.2725, "step": 76480 }, { "epoch": 9.204572803850782, "grad_norm": 2449.217529296875, "learning_rate": 0.00019634412770424504, "loss": 7.2963, "step": 76490 }, { "epoch": 9.205776173285198, "grad_norm": 2320.357177734375, "learning_rate": 0.00019634310839612313, "loss": 7.272, "step": 76500 }, { "epoch": 9.206979542719615, "grad_norm": 1060.3175048828125, "learning_rate": 0.00019634208894856895, "loss": 7.3014, "step": 76510 }, { "epoch": 9.208182912154031, "grad_norm": 2839.287353515625, "learning_rate": 0.00019634106936158402, "loss": 7.3121, "step": 76520 }, { "epoch": 9.209386281588447, "grad_norm": 3156.68115234375, "learning_rate": 0.00019634004963516973, "loss": 7.2479, "step": 76530 }, { "epoch": 9.210589651022865, "grad_norm": 1798.8192138671875, "learning_rate": 0.00019633902976932762, "loss": 7.2339, "step": 76540 }, { "epoch": 9.21179302045728, "grad_norm": 1643.83740234375, "learning_rate": 0.00019633800976405917, "loss": 7.1835, "step": 76550 }, { "epoch": 9.212996389891696, "grad_norm": 1385.792724609375, "learning_rate": 0.0001963369896193658, "loss": 7.3373, "step": 76560 }, { "epoch": 9.214199759326114, "grad_norm": 3337.798583984375, "learning_rate": 0.00019633596933524907, "loss": 7.2332, "step": 76570 }, { "epoch": 9.21540312876053, "grad_norm": 1712.10400390625, "learning_rate": 0.00019633494891171038, "loss": 7.3596, "step": 76580 }, { "epoch": 9.216606498194945, "grad_norm": 3498.635498046875, "learning_rate": 0.0001963339283487512, "loss": 7.268, "step": 76590 }, { "epoch": 9.217809867629363, "grad_norm": 3339.609375, "learning_rate": 0.0001963329076463731, "loss": 7.1177, "step": 76600 }, { "epoch": 9.219013237063779, "grad_norm": 1963.313232421875, "learning_rate": 0.00019633188680457744, "loss": 7.2571, "step": 76610 }, { "epoch": 9.220216606498195, "grad_norm": 1857.890625, "learning_rate": 0.00019633086582336582, "loss": 7.2689, "step": 76620 }, { "epoch": 9.221419975932612, "grad_norm": 1188.5067138671875, "learning_rate": 0.0001963298447027396, "loss": 7.2145, "step": 76630 }, { "epoch": 9.222623345367028, "grad_norm": 786.3170776367188, "learning_rate": 0.00019632882344270037, "loss": 7.2828, "step": 76640 }, { "epoch": 9.223826714801444, "grad_norm": 2681.61865234375, "learning_rate": 0.0001963278020432495, "loss": 7.3441, "step": 76650 }, { "epoch": 9.225030084235861, "grad_norm": 787.3405151367188, "learning_rate": 0.00019632678050438853, "loss": 7.2636, "step": 76660 }, { "epoch": 9.226233453670277, "grad_norm": 832.1189575195312, "learning_rate": 0.00019632575882611893, "loss": 7.2567, "step": 76670 }, { "epoch": 9.227436823104693, "grad_norm": 919.2532348632812, "learning_rate": 0.00019632473700844218, "loss": 7.1968, "step": 76680 }, { "epoch": 9.22864019253911, "grad_norm": 2160.03271484375, "learning_rate": 0.00019632371505135977, "loss": 7.2358, "step": 76690 }, { "epoch": 9.229843561973526, "grad_norm": 1070.5589599609375, "learning_rate": 0.0001963226929548731, "loss": 7.2749, "step": 76700 }, { "epoch": 9.231046931407942, "grad_norm": 1854.709716796875, "learning_rate": 0.00019632167071898375, "loss": 7.2184, "step": 76710 }, { "epoch": 9.232250300842358, "grad_norm": 2756.621337890625, "learning_rate": 0.0001963206483436932, "loss": 7.234, "step": 76720 }, { "epoch": 9.233453670276775, "grad_norm": 2554.576416015625, "learning_rate": 0.00019631962582900285, "loss": 7.2721, "step": 76730 }, { "epoch": 9.234657039711191, "grad_norm": 1862.0181884765625, "learning_rate": 0.00019631860317491422, "loss": 7.2227, "step": 76740 }, { "epoch": 9.235860409145607, "grad_norm": 1320.4754638671875, "learning_rate": 0.00019631758038142879, "loss": 7.2316, "step": 76750 }, { "epoch": 9.237063778580024, "grad_norm": 1313.3798828125, "learning_rate": 0.00019631655744854805, "loss": 7.3046, "step": 76760 }, { "epoch": 9.23826714801444, "grad_norm": 1622.3133544921875, "learning_rate": 0.00019631553437627348, "loss": 7.2199, "step": 76770 }, { "epoch": 9.239470517448856, "grad_norm": 2560.140380859375, "learning_rate": 0.00019631451116460652, "loss": 7.3148, "step": 76780 }, { "epoch": 9.240673886883274, "grad_norm": 8938.111328125, "learning_rate": 0.0001963134878135487, "loss": 7.2599, "step": 76790 }, { "epoch": 9.24187725631769, "grad_norm": 3016.765380859375, "learning_rate": 0.00019631246432310148, "loss": 7.2187, "step": 76800 }, { "epoch": 9.243080625752105, "grad_norm": 1779.9124755859375, "learning_rate": 0.00019631144069326637, "loss": 7.2957, "step": 76810 }, { "epoch": 9.244283995186523, "grad_norm": 1240.015625, "learning_rate": 0.00019631041692404482, "loss": 7.3554, "step": 76820 }, { "epoch": 9.245487364620939, "grad_norm": 2843.078857421875, "learning_rate": 0.00019630939301543832, "loss": 7.2836, "step": 76830 }, { "epoch": 9.246690734055354, "grad_norm": 2494.840576171875, "learning_rate": 0.00019630836896744833, "loss": 7.2299, "step": 76840 }, { "epoch": 9.247894103489772, "grad_norm": 6024.77099609375, "learning_rate": 0.00019630734478007637, "loss": 7.2111, "step": 76850 }, { "epoch": 9.249097472924188, "grad_norm": 4501.42041015625, "learning_rate": 0.00019630632045332388, "loss": 7.177, "step": 76860 }, { "epoch": 9.250300842358604, "grad_norm": 14304.2392578125, "learning_rate": 0.00019630529598719238, "loss": 7.2481, "step": 76870 }, { "epoch": 9.251504211793021, "grad_norm": 9051.515625, "learning_rate": 0.00019630427138168336, "loss": 7.3745, "step": 76880 }, { "epoch": 9.252707581227437, "grad_norm": 4632.99365234375, "learning_rate": 0.00019630324663679827, "loss": 7.3549, "step": 76890 }, { "epoch": 9.253910950661853, "grad_norm": 8207.3876953125, "learning_rate": 0.0001963022217525386, "loss": 7.3544, "step": 76900 }, { "epoch": 9.25511432009627, "grad_norm": 4218.57666015625, "learning_rate": 0.0001963011967289058, "loss": 7.2332, "step": 76910 }, { "epoch": 9.256317689530686, "grad_norm": 3511.77783203125, "learning_rate": 0.00019630017156590146, "loss": 7.2147, "step": 76920 }, { "epoch": 9.257521058965102, "grad_norm": 1797.2041015625, "learning_rate": 0.00019629914626352696, "loss": 7.234, "step": 76930 }, { "epoch": 9.25872442839952, "grad_norm": 4163.26806640625, "learning_rate": 0.00019629812082178384, "loss": 7.1906, "step": 76940 }, { "epoch": 9.259927797833935, "grad_norm": 6068.142578125, "learning_rate": 0.00019629709524067356, "loss": 7.2345, "step": 76950 }, { "epoch": 9.261131167268351, "grad_norm": 4087.80517578125, "learning_rate": 0.0001962960695201976, "loss": 7.2311, "step": 76960 }, { "epoch": 9.262334536702769, "grad_norm": 4219.29150390625, "learning_rate": 0.00019629504366035745, "loss": 7.1954, "step": 76970 }, { "epoch": 9.263537906137184, "grad_norm": 3657.016357421875, "learning_rate": 0.00019629401766115462, "loss": 7.3137, "step": 76980 }, { "epoch": 9.2647412755716, "grad_norm": 4069.469482421875, "learning_rate": 0.00019629299152259052, "loss": 7.302, "step": 76990 }, { "epoch": 9.265944645006018, "grad_norm": 2477.508544921875, "learning_rate": 0.00019629196524466674, "loss": 7.2541, "step": 77000 }, { "epoch": 9.267148014440433, "grad_norm": 3701.619140625, "learning_rate": 0.00019629093882738468, "loss": 7.3844, "step": 77010 }, { "epoch": 9.26835138387485, "grad_norm": 4865.80029296875, "learning_rate": 0.00019628991227074586, "loss": 7.2111, "step": 77020 }, { "epoch": 9.269554753309265, "grad_norm": 2069.574951171875, "learning_rate": 0.00019628888557475177, "loss": 7.2608, "step": 77030 }, { "epoch": 9.270758122743683, "grad_norm": 6883.31787109375, "learning_rate": 0.00019628785873940393, "loss": 7.3075, "step": 77040 }, { "epoch": 9.271961492178098, "grad_norm": 2148.96533203125, "learning_rate": 0.00019628683176470371, "loss": 7.1896, "step": 77050 }, { "epoch": 9.273164861612514, "grad_norm": 1338.982421875, "learning_rate": 0.00019628580465065273, "loss": 7.2983, "step": 77060 }, { "epoch": 9.274368231046932, "grad_norm": 6684.205078125, "learning_rate": 0.00019628477739725239, "loss": 7.112, "step": 77070 }, { "epoch": 9.275571600481348, "grad_norm": 1727.3746337890625, "learning_rate": 0.00019628375000450423, "loss": 7.0922, "step": 77080 }, { "epoch": 9.276774969915763, "grad_norm": 3326.0224609375, "learning_rate": 0.0001962827224724097, "loss": 7.1227, "step": 77090 }, { "epoch": 9.277978339350181, "grad_norm": 3629.23828125, "learning_rate": 0.00019628169480097028, "loss": 7.2084, "step": 77100 }, { "epoch": 9.279181708784597, "grad_norm": 2339.129150390625, "learning_rate": 0.0001962806669901875, "loss": 7.2363, "step": 77110 }, { "epoch": 9.280385078219012, "grad_norm": 3463.510986328125, "learning_rate": 0.00019627963904006283, "loss": 7.2249, "step": 77120 }, { "epoch": 9.28158844765343, "grad_norm": 3700.374755859375, "learning_rate": 0.0001962786109505977, "loss": 7.2272, "step": 77130 }, { "epoch": 9.282791817087846, "grad_norm": 9186.841796875, "learning_rate": 0.00019627758272179372, "loss": 7.1917, "step": 77140 }, { "epoch": 9.283995186522262, "grad_norm": 4616.56201171875, "learning_rate": 0.00019627655435365226, "loss": 7.3142, "step": 77150 }, { "epoch": 9.28519855595668, "grad_norm": 2341.943115234375, "learning_rate": 0.0001962755258461749, "loss": 7.289, "step": 77160 }, { "epoch": 9.286401925391095, "grad_norm": 4500.80615234375, "learning_rate": 0.00019627449719936303, "loss": 7.2633, "step": 77170 }, { "epoch": 9.28760529482551, "grad_norm": 2312.70654296875, "learning_rate": 0.00019627346841321824, "loss": 7.2438, "step": 77180 }, { "epoch": 9.288808664259928, "grad_norm": 4723.912109375, "learning_rate": 0.00019627243948774196, "loss": 7.2204, "step": 77190 }, { "epoch": 9.290012033694344, "grad_norm": 6762.6513671875, "learning_rate": 0.00019627141042293566, "loss": 7.2854, "step": 77200 }, { "epoch": 9.29121540312876, "grad_norm": 2534.995849609375, "learning_rate": 0.00019627038121880087, "loss": 7.2793, "step": 77210 }, { "epoch": 9.292418772563177, "grad_norm": 10283.8681640625, "learning_rate": 0.00019626935187533912, "loss": 7.1858, "step": 77220 }, { "epoch": 9.293622141997593, "grad_norm": 4278.6865234375, "learning_rate": 0.0001962683223925518, "loss": 7.2183, "step": 77230 }, { "epoch": 9.294825511432009, "grad_norm": 12018.0615234375, "learning_rate": 0.00019626729277044046, "loss": 7.2766, "step": 77240 }, { "epoch": 9.296028880866427, "grad_norm": 5349.31640625, "learning_rate": 0.0001962662630090066, "loss": 7.2706, "step": 77250 }, { "epoch": 9.297232250300842, "grad_norm": 3850.95068359375, "learning_rate": 0.00019626523310825163, "loss": 7.2422, "step": 77260 }, { "epoch": 9.298435619735258, "grad_norm": 4175.66015625, "learning_rate": 0.00019626420306817716, "loss": 7.2801, "step": 77270 }, { "epoch": 9.299638989169676, "grad_norm": 2477.304931640625, "learning_rate": 0.00019626317288878463, "loss": 7.2748, "step": 77280 }, { "epoch": 9.300842358604092, "grad_norm": 8570.326171875, "learning_rate": 0.00019626214257007548, "loss": 7.3768, "step": 77290 }, { "epoch": 9.302045728038507, "grad_norm": 5402.13232421875, "learning_rate": 0.00019626111211205126, "loss": 7.3386, "step": 77300 }, { "epoch": 9.303249097472925, "grad_norm": 3276.2119140625, "learning_rate": 0.00019626008151471346, "loss": 7.4104, "step": 77310 }, { "epoch": 9.30445246690734, "grad_norm": 4396.63525390625, "learning_rate": 0.00019625905077806354, "loss": 7.2342, "step": 77320 }, { "epoch": 9.305655836341757, "grad_norm": 3854.626220703125, "learning_rate": 0.00019625801990210299, "loss": 7.2903, "step": 77330 }, { "epoch": 9.306859205776174, "grad_norm": 8129.89404296875, "learning_rate": 0.00019625698888683335, "loss": 7.3334, "step": 77340 }, { "epoch": 9.30806257521059, "grad_norm": 6008.79248046875, "learning_rate": 0.00019625595773225607, "loss": 7.2288, "step": 77350 }, { "epoch": 9.309265944645006, "grad_norm": 6686.689453125, "learning_rate": 0.00019625492643837264, "loss": 7.2692, "step": 77360 }, { "epoch": 9.310469314079423, "grad_norm": 4833.21923828125, "learning_rate": 0.0001962538950051846, "loss": 7.2991, "step": 77370 }, { "epoch": 9.311672683513839, "grad_norm": 3234.591064453125, "learning_rate": 0.00019625286343269338, "loss": 7.172, "step": 77380 }, { "epoch": 9.312876052948255, "grad_norm": 5472.28125, "learning_rate": 0.00019625183172090053, "loss": 7.2136, "step": 77390 }, { "epoch": 9.314079422382672, "grad_norm": 8887.9833984375, "learning_rate": 0.0001962507998698075, "loss": 7.2837, "step": 77400 }, { "epoch": 9.315282791817088, "grad_norm": 3831.244384765625, "learning_rate": 0.00019624976787941577, "loss": 7.2685, "step": 77410 }, { "epoch": 9.316486161251504, "grad_norm": 2643.47705078125, "learning_rate": 0.0001962487357497269, "loss": 7.3147, "step": 77420 }, { "epoch": 9.31768953068592, "grad_norm": 4234.41015625, "learning_rate": 0.00019624770348074236, "loss": 7.405, "step": 77430 }, { "epoch": 9.318892900120337, "grad_norm": 9308.5439453125, "learning_rate": 0.00019624667107246357, "loss": 7.1495, "step": 77440 }, { "epoch": 9.320096269554753, "grad_norm": 8027.15185546875, "learning_rate": 0.00019624563852489212, "loss": 7.3376, "step": 77450 }, { "epoch": 9.321299638989169, "grad_norm": 6350.17236328125, "learning_rate": 0.00019624460583802947, "loss": 7.4338, "step": 77460 }, { "epoch": 9.322503008423586, "grad_norm": 11119.685546875, "learning_rate": 0.00019624357301187715, "loss": 7.3742, "step": 77470 }, { "epoch": 9.323706377858002, "grad_norm": 4849.087890625, "learning_rate": 0.00019624254004643656, "loss": 7.4504, "step": 77480 }, { "epoch": 9.324909747292418, "grad_norm": 7170.11572265625, "learning_rate": 0.00019624150694170925, "loss": 7.6526, "step": 77490 }, { "epoch": 9.326113116726836, "grad_norm": 4548.6494140625, "learning_rate": 0.00019624047369769677, "loss": 7.4069, "step": 77500 }, { "epoch": 9.327316486161251, "grad_norm": 3030.703125, "learning_rate": 0.00019623944031440054, "loss": 7.3581, "step": 77510 }, { "epoch": 9.328519855595667, "grad_norm": 3149.457763671875, "learning_rate": 0.00019623840679182207, "loss": 7.3331, "step": 77520 }, { "epoch": 9.329723225030085, "grad_norm": 8242.6728515625, "learning_rate": 0.0001962373731299629, "loss": 7.3794, "step": 77530 }, { "epoch": 9.3309265944645, "grad_norm": 4847.974609375, "learning_rate": 0.00019623633932882445, "loss": 7.442, "step": 77540 }, { "epoch": 9.332129963898916, "grad_norm": 2945.122802734375, "learning_rate": 0.00019623530538840825, "loss": 7.2981, "step": 77550 }, { "epoch": 9.333333333333334, "grad_norm": 2150.29541015625, "learning_rate": 0.00019623427130871583, "loss": 7.3586, "step": 77560 }, { "epoch": 9.33453670276775, "grad_norm": 3948.70947265625, "learning_rate": 0.00019623323708974866, "loss": 7.1776, "step": 77570 }, { "epoch": 9.335740072202166, "grad_norm": 8347.814453125, "learning_rate": 0.00019623220273150822, "loss": 7.3039, "step": 77580 }, { "epoch": 9.336943441636583, "grad_norm": 5592.7275390625, "learning_rate": 0.00019623116823399604, "loss": 7.3945, "step": 77590 }, { "epoch": 9.338146811070999, "grad_norm": 4071.8798828125, "learning_rate": 0.00019623013359721357, "loss": 7.2104, "step": 77600 }, { "epoch": 9.339350180505415, "grad_norm": 3863.02099609375, "learning_rate": 0.00019622909882116237, "loss": 7.4239, "step": 77610 }, { "epoch": 9.340553549939832, "grad_norm": 8054.66162109375, "learning_rate": 0.0001962280639058439, "loss": 7.3631, "step": 77620 }, { "epoch": 9.341756919374248, "grad_norm": 4694.1416015625, "learning_rate": 0.00019622702885125967, "loss": 7.3631, "step": 77630 }, { "epoch": 9.342960288808664, "grad_norm": 6013.3544921875, "learning_rate": 0.00019622599365741117, "loss": 7.3621, "step": 77640 }, { "epoch": 9.344163658243081, "grad_norm": 4333.583984375, "learning_rate": 0.00019622495832429988, "loss": 7.2826, "step": 77650 }, { "epoch": 9.345367027677497, "grad_norm": 2671.57763671875, "learning_rate": 0.00019622392285192732, "loss": 7.2949, "step": 77660 }, { "epoch": 9.346570397111913, "grad_norm": 4898.59716796875, "learning_rate": 0.000196222887240295, "loss": 7.3193, "step": 77670 }, { "epoch": 9.34777376654633, "grad_norm": 5069.6416015625, "learning_rate": 0.0001962218514894044, "loss": 7.2262, "step": 77680 }, { "epoch": 9.348977135980746, "grad_norm": 6325.212890625, "learning_rate": 0.00019622081559925704, "loss": 7.2731, "step": 77690 }, { "epoch": 9.350180505415162, "grad_norm": 4033.055419921875, "learning_rate": 0.0001962197795698544, "loss": 7.2964, "step": 77700 }, { "epoch": 9.35138387484958, "grad_norm": 3727.21875, "learning_rate": 0.00019621874340119798, "loss": 7.2466, "step": 77710 }, { "epoch": 9.352587244283995, "grad_norm": 3326.578857421875, "learning_rate": 0.00019621770709328927, "loss": 7.2284, "step": 77720 }, { "epoch": 9.353790613718411, "grad_norm": 3845.493408203125, "learning_rate": 0.00019621667064612978, "loss": 7.2972, "step": 77730 }, { "epoch": 9.354993983152827, "grad_norm": 6615.982421875, "learning_rate": 0.00019621563405972103, "loss": 7.2749, "step": 77740 }, { "epoch": 9.356197352587245, "grad_norm": 5356.36328125, "learning_rate": 0.00019621459733406448, "loss": 7.3435, "step": 77750 }, { "epoch": 9.35740072202166, "grad_norm": 3604.121826171875, "learning_rate": 0.00019621356046916168, "loss": 7.2066, "step": 77760 }, { "epoch": 9.358604091456076, "grad_norm": 5260.88037109375, "learning_rate": 0.0001962125234650141, "loss": 7.23, "step": 77770 }, { "epoch": 9.359807460890494, "grad_norm": 1868.055908203125, "learning_rate": 0.0001962114863216232, "loss": 7.2848, "step": 77780 }, { "epoch": 9.36101083032491, "grad_norm": 2795.3427734375, "learning_rate": 0.00019621044903899058, "loss": 7.2138, "step": 77790 }, { "epoch": 9.362214199759325, "grad_norm": 8162.1064453125, "learning_rate": 0.00019620941161711767, "loss": 7.2871, "step": 77800 }, { "epoch": 9.363417569193743, "grad_norm": 1776.7362060546875, "learning_rate": 0.000196208374056006, "loss": 7.2871, "step": 77810 }, { "epoch": 9.364620938628159, "grad_norm": 5059.310546875, "learning_rate": 0.00019620733635565703, "loss": 7.2553, "step": 77820 }, { "epoch": 9.365824308062574, "grad_norm": 2266.853759765625, "learning_rate": 0.0001962062985160723, "loss": 7.3381, "step": 77830 }, { "epoch": 9.367027677496992, "grad_norm": 2640.204833984375, "learning_rate": 0.0001962052605372533, "loss": 7.2424, "step": 77840 }, { "epoch": 9.368231046931408, "grad_norm": 1352.280517578125, "learning_rate": 0.00019620422241920155, "loss": 7.1813, "step": 77850 }, { "epoch": 9.369434416365824, "grad_norm": 2594.178955078125, "learning_rate": 0.00019620318416191854, "loss": 7.233, "step": 77860 }, { "epoch": 9.370637785800241, "grad_norm": 5957.42529296875, "learning_rate": 0.00019620214576540577, "loss": 7.2733, "step": 77870 }, { "epoch": 9.371841155234657, "grad_norm": 3715.390625, "learning_rate": 0.00019620110722966475, "loss": 7.3269, "step": 77880 }, { "epoch": 9.373044524669073, "grad_norm": 2783.47607421875, "learning_rate": 0.00019620006855469694, "loss": 7.258, "step": 77890 }, { "epoch": 9.37424789410349, "grad_norm": 4849.11376953125, "learning_rate": 0.0001961990297405039, "loss": 7.1817, "step": 77900 }, { "epoch": 9.375451263537906, "grad_norm": 3400.09326171875, "learning_rate": 0.00019619799078708715, "loss": 7.2494, "step": 77910 }, { "epoch": 9.376654632972322, "grad_norm": 5464.45556640625, "learning_rate": 0.0001961969516944481, "loss": 7.1859, "step": 77920 }, { "epoch": 9.37785800240674, "grad_norm": 2846.5673828125, "learning_rate": 0.00019619591246258834, "loss": 7.3879, "step": 77930 }, { "epoch": 9.379061371841155, "grad_norm": 3701.296630859375, "learning_rate": 0.00019619487309150934, "loss": 7.2437, "step": 77940 }, { "epoch": 9.380264741275571, "grad_norm": 2765.167724609375, "learning_rate": 0.00019619383358121262, "loss": 7.2886, "step": 77950 }, { "epoch": 9.381468110709989, "grad_norm": 2480.395751953125, "learning_rate": 0.00019619279393169966, "loss": 7.2509, "step": 77960 }, { "epoch": 9.382671480144404, "grad_norm": 1947.8099365234375, "learning_rate": 0.00019619175414297197, "loss": 7.3437, "step": 77970 }, { "epoch": 9.38387484957882, "grad_norm": 2265.4072265625, "learning_rate": 0.0001961907142150311, "loss": 7.1724, "step": 77980 }, { "epoch": 9.385078219013238, "grad_norm": 6581.12255859375, "learning_rate": 0.00019618967414787846, "loss": 7.3022, "step": 77990 }, { "epoch": 9.386281588447654, "grad_norm": 3810.63330078125, "learning_rate": 0.00019618863394151566, "loss": 7.259, "step": 78000 }, { "epoch": 9.38748495788207, "grad_norm": 4691.70166015625, "learning_rate": 0.0001961875935959441, "loss": 7.2887, "step": 78010 }, { "epoch": 9.388688327316487, "grad_norm": 2855.01025390625, "learning_rate": 0.00019618655311116543, "loss": 7.245, "step": 78020 }, { "epoch": 9.389891696750903, "grad_norm": 3010.580078125, "learning_rate": 0.00019618551248718104, "loss": 7.2509, "step": 78030 }, { "epoch": 9.391095066185319, "grad_norm": 4552.91162109375, "learning_rate": 0.0001961844717239924, "loss": 7.3897, "step": 78040 }, { "epoch": 9.392298435619736, "grad_norm": 10382.8818359375, "learning_rate": 0.00019618343082160116, "loss": 7.2379, "step": 78050 }, { "epoch": 9.393501805054152, "grad_norm": 10426.2216796875, "learning_rate": 0.0001961823897800087, "loss": 7.245, "step": 78060 }, { "epoch": 9.394705174488568, "grad_norm": 6853.20068359375, "learning_rate": 0.00019618134859921663, "loss": 7.3681, "step": 78070 }, { "epoch": 9.395908543922985, "grad_norm": 3953.904541015625, "learning_rate": 0.00019618030727922635, "loss": 7.4511, "step": 78080 }, { "epoch": 9.397111913357401, "grad_norm": 4179.099609375, "learning_rate": 0.00019617926582003945, "loss": 7.24, "step": 78090 }, { "epoch": 9.398315282791817, "grad_norm": 4626.548828125, "learning_rate": 0.00019617822422165739, "loss": 7.199, "step": 78100 }, { "epoch": 9.399518652226233, "grad_norm": 3491.727294921875, "learning_rate": 0.00019617718248408174, "loss": 7.2903, "step": 78110 }, { "epoch": 9.40072202166065, "grad_norm": 1451.7998046875, "learning_rate": 0.00019617614060731388, "loss": 7.2262, "step": 78120 }, { "epoch": 9.401925391095066, "grad_norm": 1606.47802734375, "learning_rate": 0.00019617509859135544, "loss": 7.317, "step": 78130 }, { "epoch": 9.403128760529482, "grad_norm": 1304.86572265625, "learning_rate": 0.0001961740564362079, "loss": 7.1825, "step": 78140 }, { "epoch": 9.4043321299639, "grad_norm": 1305.392578125, "learning_rate": 0.00019617301414187274, "loss": 7.2438, "step": 78150 }, { "epoch": 9.405535499398315, "grad_norm": 461.7162780761719, "learning_rate": 0.00019617197170835148, "loss": 7.3259, "step": 78160 }, { "epoch": 9.406738868832731, "grad_norm": 835.4100952148438, "learning_rate": 0.00019617092913564566, "loss": 7.2617, "step": 78170 }, { "epoch": 9.407942238267148, "grad_norm": 2162.604248046875, "learning_rate": 0.00019616988642375674, "loss": 7.3059, "step": 78180 }, { "epoch": 9.409145607701564, "grad_norm": 5187.7822265625, "learning_rate": 0.00019616884357268627, "loss": 7.2772, "step": 78190 }, { "epoch": 9.41034897713598, "grad_norm": 2610.12841796875, "learning_rate": 0.0001961678005824357, "loss": 7.3588, "step": 78200 }, { "epoch": 9.411552346570398, "grad_norm": 2318.993408203125, "learning_rate": 0.0001961667574530066, "loss": 7.4081, "step": 78210 }, { "epoch": 9.412755716004813, "grad_norm": 5456.6708984375, "learning_rate": 0.0001961657141844005, "loss": 7.2809, "step": 78220 }, { "epoch": 9.41395908543923, "grad_norm": 1491.068115234375, "learning_rate": 0.0001961646707766188, "loss": 7.2779, "step": 78230 }, { "epoch": 9.415162454873647, "grad_norm": 2164.580322265625, "learning_rate": 0.0001961636272296631, "loss": 7.1812, "step": 78240 }, { "epoch": 9.416365824308063, "grad_norm": 3099.32275390625, "learning_rate": 0.00019616258354353491, "loss": 7.3556, "step": 78250 }, { "epoch": 9.417569193742478, "grad_norm": 1073.731201171875, "learning_rate": 0.0001961615397182357, "loss": 7.3093, "step": 78260 }, { "epoch": 9.418772563176896, "grad_norm": 1488.4786376953125, "learning_rate": 0.00019616049575376702, "loss": 7.1745, "step": 78270 }, { "epoch": 9.419975932611312, "grad_norm": 835.6521606445312, "learning_rate": 0.00019615945165013033, "loss": 7.0926, "step": 78280 }, { "epoch": 9.421179302045728, "grad_norm": 1333.96142578125, "learning_rate": 0.00019615840740732724, "loss": 7.1712, "step": 78290 }, { "epoch": 9.422382671480145, "grad_norm": 2048.01416015625, "learning_rate": 0.00019615736302535912, "loss": 7.2467, "step": 78300 }, { "epoch": 9.42358604091456, "grad_norm": 759.7511596679688, "learning_rate": 0.0001961563185042276, "loss": 7.2253, "step": 78310 }, { "epoch": 9.424789410348977, "grad_norm": 1097.372314453125, "learning_rate": 0.0001961552738439341, "loss": 7.2261, "step": 78320 }, { "epoch": 9.425992779783394, "grad_norm": 602.5449829101562, "learning_rate": 0.00019615422904448022, "loss": 7.269, "step": 78330 }, { "epoch": 9.42719614921781, "grad_norm": 2424.92041015625, "learning_rate": 0.00019615318410586742, "loss": 7.2529, "step": 78340 }, { "epoch": 9.428399518652226, "grad_norm": 1713.84716796875, "learning_rate": 0.00019615213902809725, "loss": 7.1447, "step": 78350 }, { "epoch": 9.429602888086643, "grad_norm": 1154.8934326171875, "learning_rate": 0.00019615109381117117, "loss": 7.2958, "step": 78360 }, { "epoch": 9.43080625752106, "grad_norm": 3433.16455078125, "learning_rate": 0.0001961500484550907, "loss": 7.2972, "step": 78370 }, { "epoch": 9.432009626955475, "grad_norm": 2607.750732421875, "learning_rate": 0.00019614900295985738, "loss": 7.2306, "step": 78380 }, { "epoch": 9.433212996389893, "grad_norm": 2142.672119140625, "learning_rate": 0.00019614795732547272, "loss": 7.3421, "step": 78390 }, { "epoch": 9.434416365824308, "grad_norm": 2936.884033203125, "learning_rate": 0.00019614691155193822, "loss": 7.3912, "step": 78400 }, { "epoch": 9.435619735258724, "grad_norm": 326.2511901855469, "learning_rate": 0.00019614586563925543, "loss": 7.6074, "step": 78410 }, { "epoch": 9.43682310469314, "grad_norm": 962.3978271484375, "learning_rate": 0.0001961448195874258, "loss": 7.2849, "step": 78420 }, { "epoch": 9.438026474127557, "grad_norm": 540.7434692382812, "learning_rate": 0.0001961437733964509, "loss": 7.433, "step": 78430 }, { "epoch": 9.439229843561973, "grad_norm": 472.0730285644531, "learning_rate": 0.00019614272706633222, "loss": 7.4856, "step": 78440 }, { "epoch": 9.440433212996389, "grad_norm": 399.0505676269531, "learning_rate": 0.00019614168059707129, "loss": 7.5461, "step": 78450 }, { "epoch": 9.441636582430807, "grad_norm": 245.52577209472656, "learning_rate": 0.0001961406339886696, "loss": 7.5329, "step": 78460 }, { "epoch": 9.442839951865222, "grad_norm": 398.97039794921875, "learning_rate": 0.00019613958724112868, "loss": 7.4528, "step": 78470 }, { "epoch": 9.444043321299638, "grad_norm": 395.4848937988281, "learning_rate": 0.00019613854035445, "loss": 7.4018, "step": 78480 }, { "epoch": 9.445246690734056, "grad_norm": 275.9483642578125, "learning_rate": 0.00019613749332863517, "loss": 7.417, "step": 78490 }, { "epoch": 9.446450060168472, "grad_norm": 313.7587890625, "learning_rate": 0.00019613644616368563, "loss": 7.4118, "step": 78500 }, { "epoch": 9.447653429602887, "grad_norm": 440.3321838378906, "learning_rate": 0.0001961353988596029, "loss": 7.4619, "step": 78510 }, { "epoch": 9.448856799037305, "grad_norm": 371.33453369140625, "learning_rate": 0.00019613435141638855, "loss": 7.3197, "step": 78520 }, { "epoch": 9.45006016847172, "grad_norm": 146.487060546875, "learning_rate": 0.00019613330383404405, "loss": 7.3658, "step": 78530 }, { "epoch": 9.451263537906136, "grad_norm": 385.2295837402344, "learning_rate": 0.00019613225611257091, "loss": 7.3773, "step": 78540 }, { "epoch": 9.452466907340554, "grad_norm": 462.59490966796875, "learning_rate": 0.00019613120825197068, "loss": 7.3651, "step": 78550 }, { "epoch": 9.45367027677497, "grad_norm": 204.2476043701172, "learning_rate": 0.00019613016025224483, "loss": 7.3405, "step": 78560 }, { "epoch": 9.454873646209386, "grad_norm": 172.2794189453125, "learning_rate": 0.00019612911211339492, "loss": 7.2601, "step": 78570 }, { "epoch": 9.456077015643803, "grad_norm": 129.8483428955078, "learning_rate": 0.0001961280638354224, "loss": 7.2698, "step": 78580 }, { "epoch": 9.457280385078219, "grad_norm": 154.7813720703125, "learning_rate": 0.00019612701541832893, "loss": 7.3033, "step": 78590 }, { "epoch": 9.458483754512635, "grad_norm": 82.32933044433594, "learning_rate": 0.00019612596686211588, "loss": 7.3932, "step": 78600 }, { "epoch": 9.459687123947052, "grad_norm": 388.26068115234375, "learning_rate": 0.00019612491816678484, "loss": 7.3569, "step": 78610 }, { "epoch": 9.460890493381468, "grad_norm": 2829.243896484375, "learning_rate": 0.0001961238693323373, "loss": 7.684, "step": 78620 }, { "epoch": 9.462093862815884, "grad_norm": 3014.725341796875, "learning_rate": 0.00019612282035877477, "loss": 7.4296, "step": 78630 }, { "epoch": 9.463297232250302, "grad_norm": 22669.455078125, "learning_rate": 0.00019612177124609886, "loss": 8.0225, "step": 78640 }, { "epoch": 9.464500601684717, "grad_norm": 16556.046875, "learning_rate": 0.00019612072199431095, "loss": 8.7562, "step": 78650 }, { "epoch": 9.465703971119133, "grad_norm": 2059.367431640625, "learning_rate": 0.00019611967260341263, "loss": 7.8364, "step": 78660 }, { "epoch": 9.46690734055355, "grad_norm": 7453.05517578125, "learning_rate": 0.00019611862307340543, "loss": 8.4579, "step": 78670 }, { "epoch": 9.468110709987966, "grad_norm": 98882.0390625, "learning_rate": 0.00019611757340429087, "loss": 10.7681, "step": 78680 }, { "epoch": 9.469314079422382, "grad_norm": 5717.16796875, "learning_rate": 0.00019611652359607042, "loss": 9.2477, "step": 78690 }, { "epoch": 9.4705174488568, "grad_norm": 423.6444091796875, "learning_rate": 0.00019611547364874562, "loss": 9.2902, "step": 78700 }, { "epoch": 9.471720818291216, "grad_norm": 2988.169921875, "learning_rate": 0.000196114423562318, "loss": 8.2041, "step": 78710 }, { "epoch": 9.472924187725631, "grad_norm": 14273.8759765625, "learning_rate": 0.00019611337333678912, "loss": 8.4192, "step": 78720 }, { "epoch": 9.474127557160049, "grad_norm": 10774.318359375, "learning_rate": 0.00019611232297216043, "loss": 8.6146, "step": 78730 }, { "epoch": 9.475330926594465, "grad_norm": 18324.392578125, "learning_rate": 0.00019611127246843348, "loss": 8.4233, "step": 78740 }, { "epoch": 9.47653429602888, "grad_norm": 4517.78857421875, "learning_rate": 0.00019611022182560982, "loss": 7.8533, "step": 78750 }, { "epoch": 9.477737665463298, "grad_norm": 1231.1884765625, "learning_rate": 0.0001961091710436909, "loss": 7.6132, "step": 78760 }, { "epoch": 9.478941034897714, "grad_norm": 442.2496032714844, "learning_rate": 0.00019610812012267828, "loss": 7.6836, "step": 78770 }, { "epoch": 9.48014440433213, "grad_norm": 4462.33056640625, "learning_rate": 0.00019610706906257353, "loss": 7.512, "step": 78780 }, { "epoch": 9.481347773766545, "grad_norm": 87.68080139160156, "learning_rate": 0.00019610601786337805, "loss": 7.657, "step": 78790 }, { "epoch": 9.482551143200963, "grad_norm": 68.20985412597656, "learning_rate": 0.0001961049665250935, "loss": 7.55, "step": 78800 }, { "epoch": 9.483754512635379, "grad_norm": 16.40642738342285, "learning_rate": 0.00019610391504772129, "loss": 7.5414, "step": 78810 }, { "epoch": 9.484957882069795, "grad_norm": 29.987028121948242, "learning_rate": 0.00019610286343126304, "loss": 7.6145, "step": 78820 }, { "epoch": 9.486161251504212, "grad_norm": 14.566014289855957, "learning_rate": 0.00019610181167572014, "loss": 7.6461, "step": 78830 }, { "epoch": 9.487364620938628, "grad_norm": 12.040142059326172, "learning_rate": 0.00019610075978109426, "loss": 7.6969, "step": 78840 }, { "epoch": 9.488567990373044, "grad_norm": 6.859003067016602, "learning_rate": 0.00019609970774738683, "loss": 7.5266, "step": 78850 }, { "epoch": 9.489771359807461, "grad_norm": 6.0998358726501465, "learning_rate": 0.0001960986555745994, "loss": 7.5581, "step": 78860 }, { "epoch": 9.490974729241877, "grad_norm": 6.660614967346191, "learning_rate": 0.0001960976032627335, "loss": 7.5943, "step": 78870 }, { "epoch": 9.492178098676293, "grad_norm": 9.384659767150879, "learning_rate": 0.00019609655081179064, "loss": 7.5543, "step": 78880 }, { "epoch": 9.49338146811071, "grad_norm": 19.02987289428711, "learning_rate": 0.00019609549822177232, "loss": 7.6117, "step": 78890 }, { "epoch": 9.494584837545126, "grad_norm": 13.510137557983398, "learning_rate": 0.00019609444549268014, "loss": 7.664, "step": 78900 }, { "epoch": 9.495788206979542, "grad_norm": 105.62084197998047, "learning_rate": 0.00019609339262451552, "loss": 7.4671, "step": 78910 }, { "epoch": 9.49699157641396, "grad_norm": 29.44380760192871, "learning_rate": 0.00019609233961728005, "loss": 7.6215, "step": 78920 }, { "epoch": 9.498194945848375, "grad_norm": 43.94749069213867, "learning_rate": 0.00019609128647097524, "loss": 7.464, "step": 78930 }, { "epoch": 9.499398315282791, "grad_norm": 12.527983665466309, "learning_rate": 0.0001960902331856026, "loss": 7.5784, "step": 78940 }, { "epoch": 9.500601684717209, "grad_norm": 28.495134353637695, "learning_rate": 0.0001960891797611637, "loss": 7.5494, "step": 78950 }, { "epoch": 9.501805054151625, "grad_norm": 10.201322555541992, "learning_rate": 0.00019608812619766, "loss": 7.5191, "step": 78960 }, { "epoch": 9.50300842358604, "grad_norm": 27.35040283203125, "learning_rate": 0.00019608707249509308, "loss": 7.5623, "step": 78970 }, { "epoch": 9.504211793020458, "grad_norm": 8.544074058532715, "learning_rate": 0.00019608601865346445, "loss": 7.5798, "step": 78980 }, { "epoch": 9.505415162454874, "grad_norm": 34.04827117919922, "learning_rate": 0.00019608496467277565, "loss": 7.459, "step": 78990 }, { "epoch": 9.50661853188929, "grad_norm": 12.461686134338379, "learning_rate": 0.00019608391055302813, "loss": 7.6613, "step": 79000 }, { "epoch": 9.507821901323707, "grad_norm": 11.033659934997559, "learning_rate": 0.0001960828562942235, "loss": 7.5834, "step": 79010 }, { "epoch": 9.509025270758123, "grad_norm": 15.903694152832031, "learning_rate": 0.00019608180189636322, "loss": 7.5608, "step": 79020 }, { "epoch": 9.510228640192539, "grad_norm": 10.244565963745117, "learning_rate": 0.0001960807473594489, "loss": 7.6276, "step": 79030 }, { "epoch": 9.511432009626956, "grad_norm": 26.45344352722168, "learning_rate": 0.000196079692683482, "loss": 7.6082, "step": 79040 }, { "epoch": 9.512635379061372, "grad_norm": 14.464168548583984, "learning_rate": 0.00019607863786846405, "loss": 7.5861, "step": 79050 }, { "epoch": 9.513838748495788, "grad_norm": 12.145282745361328, "learning_rate": 0.0001960775829143966, "loss": 7.5265, "step": 79060 }, { "epoch": 9.515042117930205, "grad_norm": 23.147916793823242, "learning_rate": 0.00019607652782128117, "loss": 7.6055, "step": 79070 }, { "epoch": 9.516245487364621, "grad_norm": 19.37364959716797, "learning_rate": 0.00019607547258911927, "loss": 7.5509, "step": 79080 }, { "epoch": 9.517448856799037, "grad_norm": 45.35286331176758, "learning_rate": 0.00019607441721791245, "loss": 7.6603, "step": 79090 }, { "epoch": 9.518652226233453, "grad_norm": 37.83170700073242, "learning_rate": 0.00019607336170766223, "loss": 7.5165, "step": 79100 }, { "epoch": 9.51985559566787, "grad_norm": 15.612932205200195, "learning_rate": 0.00019607230605837015, "loss": 7.5318, "step": 79110 }, { "epoch": 9.521058965102286, "grad_norm": 30.468122482299805, "learning_rate": 0.00019607125027003769, "loss": 7.5491, "step": 79120 }, { "epoch": 9.522262334536702, "grad_norm": 7.987763404846191, "learning_rate": 0.00019607019434266643, "loss": 7.546, "step": 79130 }, { "epoch": 9.52346570397112, "grad_norm": 13.636329650878906, "learning_rate": 0.00019606913827625787, "loss": 7.575, "step": 79140 }, { "epoch": 9.524669073405535, "grad_norm": 14.846640586853027, "learning_rate": 0.00019606808207081359, "loss": 7.5855, "step": 79150 }, { "epoch": 9.525872442839951, "grad_norm": 19.705636978149414, "learning_rate": 0.000196067025726335, "loss": 7.4977, "step": 79160 }, { "epoch": 9.527075812274369, "grad_norm": 26.944072723388672, "learning_rate": 0.00019606596924282376, "loss": 7.6127, "step": 79170 }, { "epoch": 9.528279181708784, "grad_norm": 39.36925506591797, "learning_rate": 0.00019606491262028132, "loss": 7.4681, "step": 79180 }, { "epoch": 9.5294825511432, "grad_norm": 75.97850036621094, "learning_rate": 0.00019606385585870924, "loss": 7.6021, "step": 79190 }, { "epoch": 9.530685920577618, "grad_norm": 34.29001998901367, "learning_rate": 0.00019606279895810908, "loss": 7.4164, "step": 79200 }, { "epoch": 9.531889290012034, "grad_norm": 36.16572952270508, "learning_rate": 0.0001960617419184823, "loss": 7.3311, "step": 79210 }, { "epoch": 9.53309265944645, "grad_norm": 77.1339340209961, "learning_rate": 0.00019606068473983047, "loss": 7.4225, "step": 79220 }, { "epoch": 9.534296028880867, "grad_norm": 92.11058044433594, "learning_rate": 0.0001960596274221551, "loss": 7.4033, "step": 79230 }, { "epoch": 9.535499398315283, "grad_norm": 374.5948791503906, "learning_rate": 0.00019605856996545774, "loss": 7.4027, "step": 79240 }, { "epoch": 9.536702767749698, "grad_norm": 194.46343994140625, "learning_rate": 0.00019605751236973992, "loss": 7.351, "step": 79250 }, { "epoch": 9.537906137184116, "grad_norm": 121.67532348632812, "learning_rate": 0.00019605645463500318, "loss": 7.2614, "step": 79260 }, { "epoch": 9.539109506618532, "grad_norm": 104.19454956054688, "learning_rate": 0.000196055396761249, "loss": 7.3343, "step": 79270 }, { "epoch": 9.540312876052948, "grad_norm": 159.23812866210938, "learning_rate": 0.00019605433874847895, "loss": 7.4227, "step": 79280 }, { "epoch": 9.541516245487365, "grad_norm": 169.9329833984375, "learning_rate": 0.00019605328059669458, "loss": 7.2454, "step": 79290 }, { "epoch": 9.542719614921781, "grad_norm": 72.37603759765625, "learning_rate": 0.00019605222230589738, "loss": 7.2949, "step": 79300 }, { "epoch": 9.543922984356197, "grad_norm": 53.81119155883789, "learning_rate": 0.0001960511638760889, "loss": 7.4175, "step": 79310 }, { "epoch": 9.545126353790614, "grad_norm": 242.05604553222656, "learning_rate": 0.00019605010530727066, "loss": 7.3465, "step": 79320 }, { "epoch": 9.54632972322503, "grad_norm": 216.44239807128906, "learning_rate": 0.00019604904659944423, "loss": 7.2111, "step": 79330 }, { "epoch": 9.547533092659446, "grad_norm": 130.51792907714844, "learning_rate": 0.0001960479877526111, "loss": 7.2793, "step": 79340 }, { "epoch": 9.548736462093864, "grad_norm": 121.87419128417969, "learning_rate": 0.0001960469287667728, "loss": 7.3841, "step": 79350 }, { "epoch": 9.54993983152828, "grad_norm": 158.7510528564453, "learning_rate": 0.00019604586964193095, "loss": 7.3366, "step": 79360 }, { "epoch": 9.551143200962695, "grad_norm": 159.97264099121094, "learning_rate": 0.00019604481037808696, "loss": 7.1752, "step": 79370 }, { "epoch": 9.552346570397113, "grad_norm": 153.2975616455078, "learning_rate": 0.0001960437509752424, "loss": 7.2265, "step": 79380 }, { "epoch": 9.553549939831528, "grad_norm": 160.29055786132812, "learning_rate": 0.00019604269143339885, "loss": 7.2581, "step": 79390 }, { "epoch": 9.554753309265944, "grad_norm": 271.9485168457031, "learning_rate": 0.00019604163175255783, "loss": 7.2888, "step": 79400 }, { "epoch": 9.555956678700362, "grad_norm": 233.46905517578125, "learning_rate": 0.00019604057193272083, "loss": 7.3562, "step": 79410 }, { "epoch": 9.557160048134778, "grad_norm": 452.61572265625, "learning_rate": 0.0001960395119738894, "loss": 7.3663, "step": 79420 }, { "epoch": 9.558363417569193, "grad_norm": 157.4434356689453, "learning_rate": 0.0001960384518760651, "loss": 7.27, "step": 79430 }, { "epoch": 9.559566787003611, "grad_norm": 405.47259521484375, "learning_rate": 0.00019603739163924947, "loss": 7.2544, "step": 79440 }, { "epoch": 9.560770156438027, "grad_norm": 102.44945526123047, "learning_rate": 0.000196036331263444, "loss": 7.1992, "step": 79450 }, { "epoch": 9.561973525872443, "grad_norm": 143.56369018554688, "learning_rate": 0.00019603527074865023, "loss": 7.2965, "step": 79460 }, { "epoch": 9.56317689530686, "grad_norm": 190.5968475341797, "learning_rate": 0.00019603421009486974, "loss": 7.2631, "step": 79470 }, { "epoch": 9.564380264741276, "grad_norm": 133.09567260742188, "learning_rate": 0.00019603314930210402, "loss": 7.2565, "step": 79480 }, { "epoch": 9.565583634175692, "grad_norm": 73.62224578857422, "learning_rate": 0.00019603208837035464, "loss": 7.2491, "step": 79490 }, { "epoch": 9.566787003610107, "grad_norm": 157.73590087890625, "learning_rate": 0.00019603102729962311, "loss": 7.2924, "step": 79500 }, { "epoch": 9.567990373044525, "grad_norm": 1052.1524658203125, "learning_rate": 0.00019602996608991097, "loss": 7.3758, "step": 79510 }, { "epoch": 9.56919374247894, "grad_norm": 10060.75390625, "learning_rate": 0.00019602890474121977, "loss": 7.3981, "step": 79520 }, { "epoch": 9.570397111913357, "grad_norm": 30700.2890625, "learning_rate": 0.00019602784325355104, "loss": 7.7364, "step": 79530 }, { "epoch": 9.571600481347774, "grad_norm": 282.262451171875, "learning_rate": 0.00019602678162690632, "loss": 7.9551, "step": 79540 }, { "epoch": 9.57280385078219, "grad_norm": 11075.5908203125, "learning_rate": 0.0001960257198612871, "loss": 7.7967, "step": 79550 }, { "epoch": 9.574007220216606, "grad_norm": 39592.89453125, "learning_rate": 0.00019602465795669498, "loss": 7.5915, "step": 79560 }, { "epoch": 9.575210589651023, "grad_norm": 951.6878662109375, "learning_rate": 0.00019602359591313148, "loss": 7.6229, "step": 79570 }, { "epoch": 9.57641395908544, "grad_norm": 2799.107177734375, "learning_rate": 0.00019602253373059814, "loss": 7.7397, "step": 79580 }, { "epoch": 9.577617328519855, "grad_norm": 1263.59521484375, "learning_rate": 0.00019602147140909644, "loss": 7.7926, "step": 79590 }, { "epoch": 9.578820697954272, "grad_norm": 13267.4990234375, "learning_rate": 0.00019602040894862798, "loss": 7.8047, "step": 79600 }, { "epoch": 9.580024067388688, "grad_norm": 5765.8056640625, "learning_rate": 0.0001960193463491943, "loss": 7.9076, "step": 79610 }, { "epoch": 9.581227436823104, "grad_norm": 46395.68359375, "learning_rate": 0.00019601828361079694, "loss": 7.9171, "step": 79620 }, { "epoch": 9.582430806257522, "grad_norm": 42442.5625, "learning_rate": 0.00019601722073343738, "loss": 7.8046, "step": 79630 }, { "epoch": 9.583634175691937, "grad_norm": 14398.544921875, "learning_rate": 0.0001960161577171172, "loss": 7.7582, "step": 79640 }, { "epoch": 9.584837545126353, "grad_norm": 319.1088562011719, "learning_rate": 0.00019601509456183792, "loss": 7.7495, "step": 79650 }, { "epoch": 9.58604091456077, "grad_norm": 2.814500570297241, "learning_rate": 0.00019601403126760114, "loss": 7.5919, "step": 79660 }, { "epoch": 9.587244283995187, "grad_norm": 2.316964626312256, "learning_rate": 0.0001960129678344083, "loss": 7.6064, "step": 79670 }, { "epoch": 9.588447653429602, "grad_norm": 2.642634868621826, "learning_rate": 0.00019601190426226105, "loss": 7.6044, "step": 79680 }, { "epoch": 9.58965102286402, "grad_norm": 6.033572196960449, "learning_rate": 0.00019601084055116084, "loss": 7.5677, "step": 79690 }, { "epoch": 9.590854392298436, "grad_norm": 4.895756244659424, "learning_rate": 0.00019600977670110922, "loss": 7.5958, "step": 79700 }, { "epoch": 9.592057761732852, "grad_norm": 5.3907470703125, "learning_rate": 0.00019600871271210774, "loss": 7.5904, "step": 79710 }, { "epoch": 9.593261131167269, "grad_norm": 3.6726460456848145, "learning_rate": 0.00019600764858415797, "loss": 7.5695, "step": 79720 }, { "epoch": 9.594464500601685, "grad_norm": 3.873385190963745, "learning_rate": 0.00019600658431726144, "loss": 7.5725, "step": 79730 }, { "epoch": 9.5956678700361, "grad_norm": 4.151281833648682, "learning_rate": 0.00019600551991141968, "loss": 7.6107, "step": 79740 }, { "epoch": 9.596871239470518, "grad_norm": 9.565984725952148, "learning_rate": 0.00019600445536663421, "loss": 7.6013, "step": 79750 }, { "epoch": 9.598074608904934, "grad_norm": 16.456396102905273, "learning_rate": 0.0001960033906829066, "loss": 7.5136, "step": 79760 }, { "epoch": 9.59927797833935, "grad_norm": 3.6445560455322266, "learning_rate": 0.0001960023258602384, "loss": 7.5437, "step": 79770 }, { "epoch": 9.600481347773766, "grad_norm": 6.523273468017578, "learning_rate": 0.0001960012608986311, "loss": 7.6049, "step": 79780 }, { "epoch": 9.601684717208183, "grad_norm": 947.410888671875, "learning_rate": 0.0001960001957980863, "loss": 7.4322, "step": 79790 }, { "epoch": 9.602888086642599, "grad_norm": 4.064530849456787, "learning_rate": 0.0001959991305586055, "loss": 7.4855, "step": 79800 }, { "epoch": 9.604091456077015, "grad_norm": 11.220860481262207, "learning_rate": 0.00019599806518019025, "loss": 7.5319, "step": 79810 }, { "epoch": 9.605294825511432, "grad_norm": 18.553869247436523, "learning_rate": 0.00019599699966284213, "loss": 7.5367, "step": 79820 }, { "epoch": 9.606498194945848, "grad_norm": 10.639506340026855, "learning_rate": 0.0001959959340065626, "loss": 7.554, "step": 79830 }, { "epoch": 9.607701564380264, "grad_norm": 28.654579162597656, "learning_rate": 0.0001959948682113533, "loss": 7.5017, "step": 79840 }, { "epoch": 9.608904933814681, "grad_norm": 21.98432731628418, "learning_rate": 0.00019599380227721568, "loss": 7.3128, "step": 79850 }, { "epoch": 9.610108303249097, "grad_norm": 22.034852981567383, "learning_rate": 0.00019599273620415138, "loss": 7.307, "step": 79860 }, { "epoch": 9.611311672683513, "grad_norm": 21.325101852416992, "learning_rate": 0.00019599166999216186, "loss": 7.4465, "step": 79870 }, { "epoch": 9.61251504211793, "grad_norm": 42.87892150878906, "learning_rate": 0.0001959906036412487, "loss": 7.3836, "step": 79880 }, { "epoch": 9.613718411552346, "grad_norm": 31.864187240600586, "learning_rate": 0.00019598953715141345, "loss": 7.4534, "step": 79890 }, { "epoch": 9.614921780986762, "grad_norm": 26.607751846313477, "learning_rate": 0.00019598847052265761, "loss": 7.3211, "step": 79900 }, { "epoch": 9.61612515042118, "grad_norm": 59.20132064819336, "learning_rate": 0.00019598740375498277, "loss": 7.3567, "step": 79910 }, { "epoch": 9.617328519855596, "grad_norm": 41.33769607543945, "learning_rate": 0.00019598633684839048, "loss": 7.441, "step": 79920 }, { "epoch": 9.618531889290011, "grad_norm": 20.586872100830078, "learning_rate": 0.0001959852698028822, "loss": 7.3002, "step": 79930 }, { "epoch": 9.619735258724429, "grad_norm": 11.548286437988281, "learning_rate": 0.0001959842026184596, "loss": 7.3633, "step": 79940 }, { "epoch": 9.620938628158845, "grad_norm": 84.25369262695312, "learning_rate": 0.00019598313529512414, "loss": 7.3129, "step": 79950 }, { "epoch": 9.62214199759326, "grad_norm": 17.92951774597168, "learning_rate": 0.00019598206783287737, "loss": 7.2589, "step": 79960 }, { "epoch": 9.623345367027678, "grad_norm": 14.395515441894531, "learning_rate": 0.00019598100023172089, "loss": 7.2708, "step": 79970 }, { "epoch": 9.624548736462094, "grad_norm": 24.21454429626465, "learning_rate": 0.00019597993249165617, "loss": 7.2915, "step": 79980 }, { "epoch": 9.62575210589651, "grad_norm": 72.24156951904297, "learning_rate": 0.00019597886461268483, "loss": 7.306, "step": 79990 }, { "epoch": 9.626955475330927, "grad_norm": 30.216567993164062, "learning_rate": 0.00019597779659480833, "loss": 7.3008, "step": 80000 }, { "epoch": 9.628158844765343, "grad_norm": 38.20553970336914, "learning_rate": 0.0001959767284380283, "loss": 7.2473, "step": 80010 }, { "epoch": 9.629362214199759, "grad_norm": 54.55497741699219, "learning_rate": 0.0001959756601423462, "loss": 7.3417, "step": 80020 }, { "epoch": 9.630565583634176, "grad_norm": 20.382944107055664, "learning_rate": 0.00019597459170776367, "loss": 7.147, "step": 80030 }, { "epoch": 9.631768953068592, "grad_norm": 46.73661422729492, "learning_rate": 0.00019597352313428218, "loss": 7.3668, "step": 80040 }, { "epoch": 9.632972322503008, "grad_norm": 18.51910972595215, "learning_rate": 0.00019597245442190333, "loss": 7.2688, "step": 80050 }, { "epoch": 9.634175691937426, "grad_norm": 36.11069869995117, "learning_rate": 0.00019597138557062862, "loss": 7.2074, "step": 80060 }, { "epoch": 9.635379061371841, "grad_norm": 30.255470275878906, "learning_rate": 0.00019597031658045966, "loss": 7.2938, "step": 80070 }, { "epoch": 9.636582430806257, "grad_norm": 10.197362899780273, "learning_rate": 0.00019596924745139793, "loss": 7.4057, "step": 80080 }, { "epoch": 9.637785800240675, "grad_norm": 23.368932723999023, "learning_rate": 0.000195968178183445, "loss": 7.2778, "step": 80090 }, { "epoch": 9.63898916967509, "grad_norm": 19.56130599975586, "learning_rate": 0.00019596710877660243, "loss": 7.3187, "step": 80100 }, { "epoch": 9.640192539109506, "grad_norm": 24.682722091674805, "learning_rate": 0.00019596603923087175, "loss": 7.3107, "step": 80110 }, { "epoch": 9.641395908543924, "grad_norm": 43.84751510620117, "learning_rate": 0.00019596496954625453, "loss": 7.3186, "step": 80120 }, { "epoch": 9.64259927797834, "grad_norm": 15.692676544189453, "learning_rate": 0.00019596389972275234, "loss": 7.2783, "step": 80130 }, { "epoch": 9.643802647412755, "grad_norm": 6.348845958709717, "learning_rate": 0.0001959628297603666, "loss": 7.3805, "step": 80140 }, { "epoch": 9.645006016847173, "grad_norm": 2.7350292205810547, "learning_rate": 0.00019596175965909903, "loss": 7.2617, "step": 80150 }, { "epoch": 9.646209386281589, "grad_norm": 7.285604476928711, "learning_rate": 0.0001959606894189511, "loss": 7.4061, "step": 80160 }, { "epoch": 9.647412755716005, "grad_norm": 10.559433937072754, "learning_rate": 0.00019595961903992435, "loss": 7.3202, "step": 80170 }, { "epoch": 9.648616125150422, "grad_norm": 4.576714515686035, "learning_rate": 0.00019595854852202032, "loss": 7.3242, "step": 80180 }, { "epoch": 9.649819494584838, "grad_norm": 3.536768913269043, "learning_rate": 0.0001959574778652406, "loss": 7.2417, "step": 80190 }, { "epoch": 9.651022864019254, "grad_norm": 8.18758487701416, "learning_rate": 0.0001959564070695867, "loss": 7.1858, "step": 80200 }, { "epoch": 9.65222623345367, "grad_norm": 4.7222700119018555, "learning_rate": 0.0001959553361350602, "loss": 7.3187, "step": 80210 }, { "epoch": 9.653429602888087, "grad_norm": 42.41435241699219, "learning_rate": 0.00019595426506166264, "loss": 7.3391, "step": 80220 }, { "epoch": 9.654632972322503, "grad_norm": 15.976512908935547, "learning_rate": 0.00019595319384939556, "loss": 7.2505, "step": 80230 }, { "epoch": 9.655836341756919, "grad_norm": 6.87771463394165, "learning_rate": 0.00019595212249826053, "loss": 7.271, "step": 80240 }, { "epoch": 9.657039711191336, "grad_norm": 15.128507614135742, "learning_rate": 0.00019595105100825907, "loss": 7.2773, "step": 80250 }, { "epoch": 9.658243080625752, "grad_norm": 3.9212698936462402, "learning_rate": 0.00019594997937939277, "loss": 7.3445, "step": 80260 }, { "epoch": 9.659446450060168, "grad_norm": 6.612792015075684, "learning_rate": 0.00019594890761166317, "loss": 7.3511, "step": 80270 }, { "epoch": 9.660649819494585, "grad_norm": 9.729209899902344, "learning_rate": 0.00019594783570507177, "loss": 7.2522, "step": 80280 }, { "epoch": 9.661853188929001, "grad_norm": 25.043237686157227, "learning_rate": 0.0001959467636596202, "loss": 7.3197, "step": 80290 }, { "epoch": 9.663056558363417, "grad_norm": 189.68516540527344, "learning_rate": 0.00019594569147530995, "loss": 7.2938, "step": 80300 }, { "epoch": 9.664259927797834, "grad_norm": 301.1114807128906, "learning_rate": 0.00019594461915214262, "loss": 7.2735, "step": 80310 }, { "epoch": 9.66546329723225, "grad_norm": 61.184085845947266, "learning_rate": 0.00019594354669011973, "loss": 7.2298, "step": 80320 }, { "epoch": 9.666666666666666, "grad_norm": 14.759530067443848, "learning_rate": 0.00019594247408924283, "loss": 7.2912, "step": 80330 }, { "epoch": 9.667870036101084, "grad_norm": 32.6790657043457, "learning_rate": 0.00019594140134951348, "loss": 7.2896, "step": 80340 }, { "epoch": 9.6690734055355, "grad_norm": 10.808216094970703, "learning_rate": 0.00019594032847093326, "loss": 7.3244, "step": 80350 }, { "epoch": 9.670276774969915, "grad_norm": 6.880530834197998, "learning_rate": 0.00019593925545350366, "loss": 7.3362, "step": 80360 }, { "epoch": 9.671480144404333, "grad_norm": 10.589981079101562, "learning_rate": 0.0001959381822972263, "loss": 7.3277, "step": 80370 }, { "epoch": 9.672683513838749, "grad_norm": 17.26545524597168, "learning_rate": 0.0001959371090021027, "loss": 7.2339, "step": 80380 }, { "epoch": 9.673886883273164, "grad_norm": 30.79890251159668, "learning_rate": 0.00019593603556813442, "loss": 7.3052, "step": 80390 }, { "epoch": 9.675090252707582, "grad_norm": 9.122573852539062, "learning_rate": 0.00019593496199532299, "loss": 7.3575, "step": 80400 }, { "epoch": 9.676293622141998, "grad_norm": 10.84290885925293, "learning_rate": 0.00019593388828367, "loss": 7.3456, "step": 80410 }, { "epoch": 9.677496991576414, "grad_norm": 7.429494380950928, "learning_rate": 0.000195932814433177, "loss": 7.3102, "step": 80420 }, { "epoch": 9.678700361010831, "grad_norm": 7.6119303703308105, "learning_rate": 0.00019593174044384552, "loss": 7.2874, "step": 80430 }, { "epoch": 9.679903730445247, "grad_norm": 11.422502517700195, "learning_rate": 0.00019593066631567712, "loss": 7.2702, "step": 80440 }, { "epoch": 9.681107099879663, "grad_norm": 11.697199821472168, "learning_rate": 0.00019592959204867337, "loss": 7.2839, "step": 80450 }, { "epoch": 9.68231046931408, "grad_norm": 14.388079643249512, "learning_rate": 0.00019592851764283582, "loss": 7.3083, "step": 80460 }, { "epoch": 9.683513838748496, "grad_norm": 28.95720672607422, "learning_rate": 0.000195927443098166, "loss": 7.2277, "step": 80470 }, { "epoch": 9.684717208182912, "grad_norm": 8.118637084960938, "learning_rate": 0.0001959263684146655, "loss": 7.2757, "step": 80480 }, { "epoch": 9.685920577617328, "grad_norm": 20.11807632446289, "learning_rate": 0.00019592529359233586, "loss": 7.3231, "step": 80490 }, { "epoch": 9.687123947051745, "grad_norm": 7.092824459075928, "learning_rate": 0.00019592421863117863, "loss": 7.2876, "step": 80500 }, { "epoch": 9.688327316486161, "grad_norm": 5.425356864929199, "learning_rate": 0.00019592314353119542, "loss": 7.3649, "step": 80510 }, { "epoch": 9.689530685920577, "grad_norm": 5.126730918884277, "learning_rate": 0.00019592206829238768, "loss": 7.46, "step": 80520 }, { "epoch": 9.690734055354994, "grad_norm": 15.088762283325195, "learning_rate": 0.00019592099291475704, "loss": 7.3103, "step": 80530 }, { "epoch": 9.69193742478941, "grad_norm": 4.680660724639893, "learning_rate": 0.00019591991739830505, "loss": 7.2495, "step": 80540 }, { "epoch": 9.693140794223826, "grad_norm": 7.10062837600708, "learning_rate": 0.00019591884174303324, "loss": 7.2549, "step": 80550 }, { "epoch": 9.694344163658243, "grad_norm": 5.300331115722656, "learning_rate": 0.0001959177659489432, "loss": 7.1931, "step": 80560 }, { "epoch": 9.69554753309266, "grad_norm": 5.892553329467773, "learning_rate": 0.00019591669001603647, "loss": 7.3507, "step": 80570 }, { "epoch": 9.696750902527075, "grad_norm": 8.73146915435791, "learning_rate": 0.00019591561394431461, "loss": 7.266, "step": 80580 }, { "epoch": 9.697954271961493, "grad_norm": 11.49372386932373, "learning_rate": 0.00019591453773377916, "loss": 7.2896, "step": 80590 }, { "epoch": 9.699157641395908, "grad_norm": 4.181190490722656, "learning_rate": 0.00019591346138443175, "loss": 7.2576, "step": 80600 }, { "epoch": 9.700361010830324, "grad_norm": 1.6814228296279907, "learning_rate": 0.0001959123848962738, "loss": 7.2934, "step": 80610 }, { "epoch": 9.701564380264742, "grad_norm": 6.716777324676514, "learning_rate": 0.000195911308269307, "loss": 7.2646, "step": 80620 }, { "epoch": 9.702767749699158, "grad_norm": 1.973939061164856, "learning_rate": 0.00019591023150353286, "loss": 7.3238, "step": 80630 }, { "epoch": 9.703971119133573, "grad_norm": 1.9682577848434448, "learning_rate": 0.0001959091545989529, "loss": 7.2739, "step": 80640 }, { "epoch": 9.705174488567991, "grad_norm": 3.713451385498047, "learning_rate": 0.00019590807755556875, "loss": 7.3343, "step": 80650 }, { "epoch": 9.706377858002407, "grad_norm": 4.571086883544922, "learning_rate": 0.00019590700037338193, "loss": 7.2604, "step": 80660 }, { "epoch": 9.707581227436823, "grad_norm": 3.9480361938476562, "learning_rate": 0.00019590592305239398, "loss": 7.3839, "step": 80670 }, { "epoch": 9.70878459687124, "grad_norm": 5.151997089385986, "learning_rate": 0.00019590484559260646, "loss": 7.3549, "step": 80680 }, { "epoch": 9.709987966305656, "grad_norm": 5.794747352600098, "learning_rate": 0.00019590376799402098, "loss": 7.2307, "step": 80690 }, { "epoch": 9.711191335740072, "grad_norm": 10.18275260925293, "learning_rate": 0.0001959026902566391, "loss": 7.1675, "step": 80700 }, { "epoch": 9.71239470517449, "grad_norm": 3.547149896621704, "learning_rate": 0.00019590161238046232, "loss": 7.2141, "step": 80710 }, { "epoch": 9.713598074608905, "grad_norm": 3.9798226356506348, "learning_rate": 0.00019590053436549222, "loss": 7.2735, "step": 80720 }, { "epoch": 9.71480144404332, "grad_norm": 4.546346187591553, "learning_rate": 0.00019589945621173037, "loss": 7.3026, "step": 80730 }, { "epoch": 9.716004813477738, "grad_norm": 6.8329386711120605, "learning_rate": 0.00019589837791917836, "loss": 7.2736, "step": 80740 }, { "epoch": 9.717208182912154, "grad_norm": 6.138765811920166, "learning_rate": 0.00019589729948783769, "loss": 7.2282, "step": 80750 }, { "epoch": 9.71841155234657, "grad_norm": 4.7079691886901855, "learning_rate": 0.00019589622091770994, "loss": 7.1251, "step": 80760 }, { "epoch": 9.719614921780988, "grad_norm": 12.007296562194824, "learning_rate": 0.00019589514220879673, "loss": 7.2584, "step": 80770 }, { "epoch": 9.720818291215403, "grad_norm": 5.398209095001221, "learning_rate": 0.00019589406336109955, "loss": 7.1358, "step": 80780 }, { "epoch": 9.722021660649819, "grad_norm": 4.695461750030518, "learning_rate": 0.00019589298437461996, "loss": 7.2449, "step": 80790 }, { "epoch": 9.723225030084237, "grad_norm": 4.134794235229492, "learning_rate": 0.00019589190524935956, "loss": 7.1649, "step": 80800 }, { "epoch": 9.724428399518652, "grad_norm": 3.4911704063415527, "learning_rate": 0.0001958908259853199, "loss": 7.2282, "step": 80810 }, { "epoch": 9.725631768953068, "grad_norm": 7.523705005645752, "learning_rate": 0.00019588974658250256, "loss": 7.3258, "step": 80820 }, { "epoch": 9.726835138387486, "grad_norm": 5.453453540802002, "learning_rate": 0.00019588866704090907, "loss": 7.2819, "step": 80830 }, { "epoch": 9.728038507821902, "grad_norm": 13.083098411560059, "learning_rate": 0.000195887587360541, "loss": 7.2137, "step": 80840 }, { "epoch": 9.729241877256317, "grad_norm": 5.979798316955566, "learning_rate": 0.00019588650754139995, "loss": 7.156, "step": 80850 }, { "epoch": 9.730445246690735, "grad_norm": 3.339381694793701, "learning_rate": 0.00019588542758348741, "loss": 7.2267, "step": 80860 }, { "epoch": 9.73164861612515, "grad_norm": 3.9839985370635986, "learning_rate": 0.00019588434748680497, "loss": 7.1336, "step": 80870 }, { "epoch": 9.732851985559567, "grad_norm": 3.4965097904205322, "learning_rate": 0.00019588326725135423, "loss": 7.2444, "step": 80880 }, { "epoch": 9.734055354993982, "grad_norm": 4.205904006958008, "learning_rate": 0.00019588218687713674, "loss": 7.2232, "step": 80890 }, { "epoch": 9.7352587244284, "grad_norm": 5.698105335235596, "learning_rate": 0.00019588110636415405, "loss": 7.2114, "step": 80900 }, { "epoch": 9.736462093862816, "grad_norm": 12.120505332946777, "learning_rate": 0.0001958800257124077, "loss": 7.2099, "step": 80910 }, { "epoch": 9.737665463297231, "grad_norm": 5.2960662841796875, "learning_rate": 0.00019587894492189932, "loss": 7.2902, "step": 80920 }, { "epoch": 9.738868832731649, "grad_norm": 3.8109209537506104, "learning_rate": 0.0001958778639926304, "loss": 7.1597, "step": 80930 }, { "epoch": 9.740072202166065, "grad_norm": 5.949132919311523, "learning_rate": 0.00019587678292460256, "loss": 7.1693, "step": 80940 }, { "epoch": 9.74127557160048, "grad_norm": 6.312081336975098, "learning_rate": 0.00019587570171781735, "loss": 7.2273, "step": 80950 }, { "epoch": 9.742478941034898, "grad_norm": 13.132644653320312, "learning_rate": 0.0001958746203722763, "loss": 7.1698, "step": 80960 }, { "epoch": 9.743682310469314, "grad_norm": 10.809191703796387, "learning_rate": 0.000195873538887981, "loss": 7.1982, "step": 80970 }, { "epoch": 9.74488567990373, "grad_norm": 19.389211654663086, "learning_rate": 0.00019587245726493307, "loss": 7.1589, "step": 80980 }, { "epoch": 9.746089049338147, "grad_norm": 15.189022064208984, "learning_rate": 0.00019587137550313398, "loss": 7.1406, "step": 80990 }, { "epoch": 9.747292418772563, "grad_norm": 14.81157398223877, "learning_rate": 0.00019587029360258537, "loss": 7.1925, "step": 81000 }, { "epoch": 9.748495788206979, "grad_norm": 8.749092102050781, "learning_rate": 0.00019586921156328874, "loss": 7.0593, "step": 81010 }, { "epoch": 9.749699157641396, "grad_norm": 24.159252166748047, "learning_rate": 0.0001958681293852457, "loss": 7.2597, "step": 81020 }, { "epoch": 9.750902527075812, "grad_norm": 21.47060775756836, "learning_rate": 0.00019586704706845781, "loss": 7.2261, "step": 81030 }, { "epoch": 9.752105896510228, "grad_norm": 23.625019073486328, "learning_rate": 0.00019586596461292665, "loss": 7.2175, "step": 81040 }, { "epoch": 9.753309265944646, "grad_norm": 19.579246520996094, "learning_rate": 0.00019586488201865377, "loss": 7.1089, "step": 81050 }, { "epoch": 9.754512635379061, "grad_norm": 18.162458419799805, "learning_rate": 0.00019586379928564074, "loss": 7.1443, "step": 81060 }, { "epoch": 9.755716004813477, "grad_norm": 36.512001037597656, "learning_rate": 0.00019586271641388908, "loss": 7.0819, "step": 81070 }, { "epoch": 9.756919374247895, "grad_norm": 29.0833740234375, "learning_rate": 0.00019586163340340048, "loss": 7.2203, "step": 81080 }, { "epoch": 9.75812274368231, "grad_norm": 53.947410583496094, "learning_rate": 0.00019586055025417638, "loss": 7.2633, "step": 81090 }, { "epoch": 9.759326113116726, "grad_norm": 28.762466430664062, "learning_rate": 0.00019585946696621837, "loss": 7.1178, "step": 81100 }, { "epoch": 9.760529482551144, "grad_norm": 21.785465240478516, "learning_rate": 0.0001958583835395281, "loss": 7.1446, "step": 81110 }, { "epoch": 9.76173285198556, "grad_norm": 28.514663696289062, "learning_rate": 0.00019585729997410707, "loss": 7.1186, "step": 81120 }, { "epoch": 9.762936221419976, "grad_norm": 44.98613357543945, "learning_rate": 0.00019585621626995683, "loss": 7.1605, "step": 81130 }, { "epoch": 9.764139590854393, "grad_norm": 24.573091506958008, "learning_rate": 0.000195855132427079, "loss": 7.2074, "step": 81140 }, { "epoch": 9.765342960288809, "grad_norm": 47.555606842041016, "learning_rate": 0.00019585404844547512, "loss": 7.2037, "step": 81150 }, { "epoch": 9.766546329723225, "grad_norm": 22.620088577270508, "learning_rate": 0.00019585296432514677, "loss": 7.1592, "step": 81160 }, { "epoch": 9.76774969915764, "grad_norm": 18.58357048034668, "learning_rate": 0.00019585188006609553, "loss": 7.1873, "step": 81170 }, { "epoch": 9.768953068592058, "grad_norm": 26.78301239013672, "learning_rate": 0.0001958507956683229, "loss": 7.1344, "step": 81180 }, { "epoch": 9.770156438026474, "grad_norm": 55.00008773803711, "learning_rate": 0.00019584971113183056, "loss": 7.1803, "step": 81190 }, { "epoch": 9.77135980746089, "grad_norm": 63.540374755859375, "learning_rate": 0.00019584862645662004, "loss": 7.1767, "step": 81200 }, { "epoch": 9.772563176895307, "grad_norm": 27.377532958984375, "learning_rate": 0.00019584754164269285, "loss": 7.1682, "step": 81210 }, { "epoch": 9.773766546329723, "grad_norm": 42.744606018066406, "learning_rate": 0.0001958464566900506, "loss": 7.0821, "step": 81220 }, { "epoch": 9.774969915764139, "grad_norm": 65.01910400390625, "learning_rate": 0.0001958453715986949, "loss": 7.1563, "step": 81230 }, { "epoch": 9.776173285198556, "grad_norm": 95.84552001953125, "learning_rate": 0.00019584428636862727, "loss": 7.1289, "step": 81240 }, { "epoch": 9.777376654632972, "grad_norm": 39.06284713745117, "learning_rate": 0.00019584320099984924, "loss": 7.2058, "step": 81250 }, { "epoch": 9.778580024067388, "grad_norm": 28.717742919921875, "learning_rate": 0.0001958421154923625, "loss": 7.2044, "step": 81260 }, { "epoch": 9.779783393501805, "grad_norm": 37.450199127197266, "learning_rate": 0.00019584102984616854, "loss": 7.2618, "step": 81270 }, { "epoch": 9.780986762936221, "grad_norm": 24.722015380859375, "learning_rate": 0.00019583994406126894, "loss": 7.1449, "step": 81280 }, { "epoch": 9.782190132370637, "grad_norm": 34.97111892700195, "learning_rate": 0.0001958388581376653, "loss": 7.2478, "step": 81290 }, { "epoch": 9.783393501805055, "grad_norm": 15.848621368408203, "learning_rate": 0.00019583777207535916, "loss": 7.0592, "step": 81300 }, { "epoch": 9.78459687123947, "grad_norm": 14.618273735046387, "learning_rate": 0.0001958366858743521, "loss": 7.1619, "step": 81310 }, { "epoch": 9.785800240673886, "grad_norm": 46.58594512939453, "learning_rate": 0.0001958355995346457, "loss": 7.0839, "step": 81320 }, { "epoch": 9.787003610108304, "grad_norm": 14.210058212280273, "learning_rate": 0.0001958345130562415, "loss": 7.1581, "step": 81330 }, { "epoch": 9.78820697954272, "grad_norm": 55.863956451416016, "learning_rate": 0.0001958334264391411, "loss": 7.1962, "step": 81340 }, { "epoch": 9.789410348977135, "grad_norm": 16.850446701049805, "learning_rate": 0.0001958323396833461, "loss": 7.1214, "step": 81350 }, { "epoch": 9.790613718411553, "grad_norm": 31.345497131347656, "learning_rate": 0.00019583125278885803, "loss": 7.1457, "step": 81360 }, { "epoch": 9.791817087845969, "grad_norm": 54.03544235229492, "learning_rate": 0.00019583016575567847, "loss": 7.1623, "step": 81370 }, { "epoch": 9.793020457280385, "grad_norm": 40.371238708496094, "learning_rate": 0.00019582907858380901, "loss": 7.0905, "step": 81380 }, { "epoch": 9.794223826714802, "grad_norm": 46.57606887817383, "learning_rate": 0.00019582799127325123, "loss": 7.0522, "step": 81390 }, { "epoch": 9.795427196149218, "grad_norm": 24.272066116333008, "learning_rate": 0.00019582690382400668, "loss": 7.1489, "step": 81400 }, { "epoch": 9.796630565583634, "grad_norm": 33.30632781982422, "learning_rate": 0.00019582581623607691, "loss": 7.1172, "step": 81410 }, { "epoch": 9.797833935018051, "grad_norm": 49.117000579833984, "learning_rate": 0.00019582472850946357, "loss": 7.14, "step": 81420 }, { "epoch": 9.799037304452467, "grad_norm": 30.63140869140625, "learning_rate": 0.00019582364064416817, "loss": 7.1226, "step": 81430 }, { "epoch": 9.800240673886883, "grad_norm": 83.77889251708984, "learning_rate": 0.0001958225526401923, "loss": 7.1363, "step": 81440 }, { "epoch": 9.8014440433213, "grad_norm": 21.450422286987305, "learning_rate": 0.00019582146449753754, "loss": 7.2253, "step": 81450 }, { "epoch": 9.802647412755716, "grad_norm": 16.15947151184082, "learning_rate": 0.00019582037621620547, "loss": 7.1707, "step": 81460 }, { "epoch": 9.803850782190132, "grad_norm": 65.86964416503906, "learning_rate": 0.00019581928779619762, "loss": 7.1195, "step": 81470 }, { "epoch": 9.80505415162455, "grad_norm": 37.578853607177734, "learning_rate": 0.00019581819923751565, "loss": 7.2109, "step": 81480 }, { "epoch": 9.806257521058965, "grad_norm": 203.12808227539062, "learning_rate": 0.00019581711054016108, "loss": 7.1567, "step": 81490 }, { "epoch": 9.807460890493381, "grad_norm": 141.58651733398438, "learning_rate": 0.00019581602170413547, "loss": 7.1059, "step": 81500 }, { "epoch": 9.808664259927799, "grad_norm": 177.87869262695312, "learning_rate": 0.00019581493272944044, "loss": 7.1007, "step": 81510 }, { "epoch": 9.809867629362214, "grad_norm": 148.8583526611328, "learning_rate": 0.00019581384361607755, "loss": 7.1554, "step": 81520 }, { "epoch": 9.81107099879663, "grad_norm": 221.38961791992188, "learning_rate": 0.00019581275436404835, "loss": 7.1346, "step": 81530 }, { "epoch": 9.812274368231048, "grad_norm": 363.3433837890625, "learning_rate": 0.00019581166497335448, "loss": 7.2178, "step": 81540 }, { "epoch": 9.813477737665464, "grad_norm": 288.44677734375, "learning_rate": 0.00019581057544399742, "loss": 7.2879, "step": 81550 }, { "epoch": 9.81468110709988, "grad_norm": 140.5692596435547, "learning_rate": 0.00019580948577597887, "loss": 7.1986, "step": 81560 }, { "epoch": 9.815884476534297, "grad_norm": 222.69528198242188, "learning_rate": 0.0001958083959693003, "loss": 7.0938, "step": 81570 }, { "epoch": 9.817087845968713, "grad_norm": 306.8842468261719, "learning_rate": 0.00019580730602396332, "loss": 7.1569, "step": 81580 }, { "epoch": 9.818291215403129, "grad_norm": 356.0410461425781, "learning_rate": 0.00019580621593996952, "loss": 7.2226, "step": 81590 }, { "epoch": 9.819494584837544, "grad_norm": 398.6214904785156, "learning_rate": 0.0001958051257173205, "loss": 7.3989, "step": 81600 }, { "epoch": 9.820697954271962, "grad_norm": 401.3757629394531, "learning_rate": 0.00019580403535601778, "loss": 7.2843, "step": 81610 }, { "epoch": 9.821901323706378, "grad_norm": 134.86692810058594, "learning_rate": 0.00019580294485606298, "loss": 7.233, "step": 81620 }, { "epoch": 9.823104693140793, "grad_norm": 211.73191833496094, "learning_rate": 0.00019580185421745765, "loss": 7.2583, "step": 81630 }, { "epoch": 9.824308062575211, "grad_norm": 269.29052734375, "learning_rate": 0.0001958007634402034, "loss": 7.1652, "step": 81640 }, { "epoch": 9.825511432009627, "grad_norm": 118.49571990966797, "learning_rate": 0.0001957996725243018, "loss": 7.1154, "step": 81650 }, { "epoch": 9.826714801444043, "grad_norm": 44.02663040161133, "learning_rate": 0.0001957985814697544, "loss": 7.2774, "step": 81660 }, { "epoch": 9.82791817087846, "grad_norm": 79.97306823730469, "learning_rate": 0.0001957974902765628, "loss": 7.246, "step": 81670 }, { "epoch": 9.829121540312876, "grad_norm": 77.45560455322266, "learning_rate": 0.00019579639894472858, "loss": 7.193, "step": 81680 }, { "epoch": 9.830324909747292, "grad_norm": 126.63589477539062, "learning_rate": 0.00019579530747425335, "loss": 7.238, "step": 81690 }, { "epoch": 9.83152827918171, "grad_norm": 89.69837951660156, "learning_rate": 0.00019579421586513862, "loss": 7.1709, "step": 81700 }, { "epoch": 9.832731648616125, "grad_norm": 33.425048828125, "learning_rate": 0.00019579312411738603, "loss": 7.2799, "step": 81710 }, { "epoch": 9.833935018050541, "grad_norm": 16.653369903564453, "learning_rate": 0.00019579203223099714, "loss": 7.2248, "step": 81720 }, { "epoch": 9.835138387484958, "grad_norm": 117.22438049316406, "learning_rate": 0.00019579094020597354, "loss": 7.2314, "step": 81730 }, { "epoch": 9.836341756919374, "grad_norm": 163.76300048828125, "learning_rate": 0.00019578984804231677, "loss": 7.4453, "step": 81740 }, { "epoch": 9.83754512635379, "grad_norm": 53.07315444946289, "learning_rate": 0.00019578875574002846, "loss": 7.2892, "step": 81750 }, { "epoch": 9.838748495788208, "grad_norm": 31.876468658447266, "learning_rate": 0.00019578766329911016, "loss": 7.2551, "step": 81760 }, { "epoch": 9.839951865222623, "grad_norm": 204.01930236816406, "learning_rate": 0.00019578657071956344, "loss": 7.1353, "step": 81770 }, { "epoch": 9.84115523465704, "grad_norm": 329.9084777832031, "learning_rate": 0.00019578547800138996, "loss": 7.3476, "step": 81780 }, { "epoch": 9.842358604091457, "grad_norm": 284.59259033203125, "learning_rate": 0.00019578438514459122, "loss": 7.3247, "step": 81790 }, { "epoch": 9.843561973525873, "grad_norm": 308.4414367675781, "learning_rate": 0.0001957832921491688, "loss": 7.5106, "step": 81800 }, { "epoch": 9.844765342960288, "grad_norm": 245.64149475097656, "learning_rate": 0.00019578219901512434, "loss": 7.5745, "step": 81810 }, { "epoch": 9.845968712394706, "grad_norm": 287.5294494628906, "learning_rate": 0.00019578110574245935, "loss": 7.4791, "step": 81820 }, { "epoch": 9.847172081829122, "grad_norm": 127.05554962158203, "learning_rate": 0.00019578001233117552, "loss": 7.2435, "step": 81830 }, { "epoch": 9.848375451263538, "grad_norm": 230.76974487304688, "learning_rate": 0.00019577891878127434, "loss": 7.2437, "step": 81840 }, { "epoch": 9.849578820697955, "grad_norm": 102.9111099243164, "learning_rate": 0.00019577782509275736, "loss": 7.2524, "step": 81850 }, { "epoch": 9.85078219013237, "grad_norm": 74.16764068603516, "learning_rate": 0.00019577673126562628, "loss": 7.2241, "step": 81860 }, { "epoch": 9.851985559566787, "grad_norm": 60.01537322998047, "learning_rate": 0.0001957756372998826, "loss": 7.1939, "step": 81870 }, { "epoch": 9.853188929001202, "grad_norm": 49.157100677490234, "learning_rate": 0.00019577454319552796, "loss": 7.2806, "step": 81880 }, { "epoch": 9.85439229843562, "grad_norm": 11.818543434143066, "learning_rate": 0.00019577344895256387, "loss": 7.377, "step": 81890 }, { "epoch": 9.855595667870036, "grad_norm": 23.056699752807617, "learning_rate": 0.00019577235457099196, "loss": 7.307, "step": 81900 }, { "epoch": 9.856799037304452, "grad_norm": 29.7425594329834, "learning_rate": 0.00019577126005081383, "loss": 7.3352, "step": 81910 }, { "epoch": 9.85800240673887, "grad_norm": 95.12638092041016, "learning_rate": 0.00019577016539203102, "loss": 7.3838, "step": 81920 }, { "epoch": 9.859205776173285, "grad_norm": 11.68164348602295, "learning_rate": 0.00019576907059464516, "loss": 7.3004, "step": 81930 }, { "epoch": 9.8604091456077, "grad_norm": 24.45294761657715, "learning_rate": 0.00019576797565865777, "loss": 7.3887, "step": 81940 }, { "epoch": 9.861612515042118, "grad_norm": 30.26659393310547, "learning_rate": 0.0001957668805840705, "loss": 7.3831, "step": 81950 }, { "epoch": 9.862815884476534, "grad_norm": 14.567951202392578, "learning_rate": 0.00019576578537088492, "loss": 7.3683, "step": 81960 }, { "epoch": 9.86401925391095, "grad_norm": 34.78707504272461, "learning_rate": 0.00019576469001910258, "loss": 7.3319, "step": 81970 }, { "epoch": 9.865222623345367, "grad_norm": 13.080894470214844, "learning_rate": 0.0001957635945287251, "loss": 7.3395, "step": 81980 }, { "epoch": 9.866425992779783, "grad_norm": 21.612865447998047, "learning_rate": 0.00019576249889975407, "loss": 7.3189, "step": 81990 }, { "epoch": 9.867629362214199, "grad_norm": 20.334867477416992, "learning_rate": 0.00019576140313219104, "loss": 7.2705, "step": 82000 }, { "epoch": 9.868832731648617, "grad_norm": 12.033313751220703, "learning_rate": 0.00019576030722603762, "loss": 7.3791, "step": 82010 }, { "epoch": 9.870036101083032, "grad_norm": 40.773624420166016, "learning_rate": 0.0001957592111812954, "loss": 7.2292, "step": 82020 }, { "epoch": 9.871239470517448, "grad_norm": 196.13064575195312, "learning_rate": 0.00019575811499796596, "loss": 7.398, "step": 82030 }, { "epoch": 9.872442839951866, "grad_norm": 344.34698486328125, "learning_rate": 0.0001957570186760509, "loss": 7.4888, "step": 82040 }, { "epoch": 9.873646209386282, "grad_norm": 19.01952362060547, "learning_rate": 0.00019575592221555177, "loss": 7.2922, "step": 82050 }, { "epoch": 9.874849578820697, "grad_norm": 85.31414031982422, "learning_rate": 0.00019575482561647018, "loss": 7.433, "step": 82060 }, { "epoch": 9.876052948255115, "grad_norm": 98.12935638427734, "learning_rate": 0.00019575372887880774, "loss": 7.5917, "step": 82070 }, { "epoch": 9.87725631768953, "grad_norm": 9.710317611694336, "learning_rate": 0.000195752632002566, "loss": 7.6765, "step": 82080 }, { "epoch": 9.878459687123947, "grad_norm": 8.849117279052734, "learning_rate": 0.00019575153498774652, "loss": 7.5691, "step": 82090 }, { "epoch": 9.879663056558364, "grad_norm": 9.453598022460938, "learning_rate": 0.00019575043783435098, "loss": 7.5222, "step": 82100 }, { "epoch": 9.88086642599278, "grad_norm": 9.823592185974121, "learning_rate": 0.0001957493405423809, "loss": 7.5208, "step": 82110 }, { "epoch": 9.882069795427196, "grad_norm": 9.096379280090332, "learning_rate": 0.0001957482431118379, "loss": 7.4863, "step": 82120 }, { "epoch": 9.883273164861613, "grad_norm": 17.901180267333984, "learning_rate": 0.0001957471455427235, "loss": 7.6243, "step": 82130 }, { "epoch": 9.884476534296029, "grad_norm": 101.95468139648438, "learning_rate": 0.0001957460478350394, "loss": 7.566, "step": 82140 }, { "epoch": 9.885679903730445, "grad_norm": 39.395477294921875, "learning_rate": 0.00019574494998878713, "loss": 7.6857, "step": 82150 }, { "epoch": 9.886883273164862, "grad_norm": 11.01801872253418, "learning_rate": 0.00019574385200396823, "loss": 7.6153, "step": 82160 }, { "epoch": 9.888086642599278, "grad_norm": 8.296485900878906, "learning_rate": 0.0001957427538805844, "loss": 7.6091, "step": 82170 }, { "epoch": 9.889290012033694, "grad_norm": 11.651066780090332, "learning_rate": 0.0001957416556186371, "loss": 7.5882, "step": 82180 }, { "epoch": 9.890493381468112, "grad_norm": 15.031363487243652, "learning_rate": 0.00019574055721812802, "loss": 7.4771, "step": 82190 }, { "epoch": 9.891696750902527, "grad_norm": 22.049480438232422, "learning_rate": 0.00019573945867905871, "loss": 7.5367, "step": 82200 }, { "epoch": 9.892900120336943, "grad_norm": 6.620582103729248, "learning_rate": 0.00019573836000143078, "loss": 7.4201, "step": 82210 }, { "epoch": 9.89410348977136, "grad_norm": 44.3409423828125, "learning_rate": 0.0001957372611852458, "loss": 7.4448, "step": 82220 }, { "epoch": 9.895306859205776, "grad_norm": 11.555047988891602, "learning_rate": 0.00019573616223050538, "loss": 7.4677, "step": 82230 }, { "epoch": 9.896510228640192, "grad_norm": 42.511558532714844, "learning_rate": 0.00019573506313721106, "loss": 7.382, "step": 82240 }, { "epoch": 9.89771359807461, "grad_norm": 33.44620132446289, "learning_rate": 0.00019573396390536448, "loss": 7.4196, "step": 82250 }, { "epoch": 9.898916967509026, "grad_norm": 35.21254348754883, "learning_rate": 0.00019573286453496725, "loss": 7.3664, "step": 82260 }, { "epoch": 9.900120336943441, "grad_norm": 167.31251525878906, "learning_rate": 0.0001957317650260209, "loss": 7.4636, "step": 82270 }, { "epoch": 9.901323706377857, "grad_norm": 279.4736633300781, "learning_rate": 0.00019573066537852704, "loss": 7.5109, "step": 82280 }, { "epoch": 9.902527075812275, "grad_norm": 152.21102905273438, "learning_rate": 0.0001957295655924873, "loss": 7.4334, "step": 82290 }, { "epoch": 9.90373044524669, "grad_norm": 26.969270706176758, "learning_rate": 0.00019572846566790324, "loss": 7.3984, "step": 82300 }, { "epoch": 9.904933814681106, "grad_norm": 9.886170387268066, "learning_rate": 0.00019572736560477644, "loss": 7.4776, "step": 82310 }, { "epoch": 9.906137184115524, "grad_norm": 100.31951904296875, "learning_rate": 0.00019572626540310852, "loss": 7.3959, "step": 82320 }, { "epoch": 9.90734055354994, "grad_norm": 36.880149841308594, "learning_rate": 0.00019572516506290106, "loss": 7.4326, "step": 82330 }, { "epoch": 9.908543922984355, "grad_norm": 96.23030090332031, "learning_rate": 0.00019572406458415564, "loss": 7.3163, "step": 82340 }, { "epoch": 9.909747292418773, "grad_norm": 64.73321533203125, "learning_rate": 0.0001957229639668739, "loss": 7.3193, "step": 82350 }, { "epoch": 9.910950661853189, "grad_norm": 221.0384063720703, "learning_rate": 0.00019572186321105734, "loss": 7.3878, "step": 82360 }, { "epoch": 9.912154031287605, "grad_norm": 77.85171508789062, "learning_rate": 0.00019572076231670767, "loss": 7.4973, "step": 82370 }, { "epoch": 9.913357400722022, "grad_norm": 47.297149658203125, "learning_rate": 0.00019571966128382638, "loss": 7.4137, "step": 82380 }, { "epoch": 9.914560770156438, "grad_norm": 204.392822265625, "learning_rate": 0.0001957185601124151, "loss": 7.4217, "step": 82390 }, { "epoch": 9.915764139590854, "grad_norm": 27.453489303588867, "learning_rate": 0.00019571745880247546, "loss": 7.511, "step": 82400 }, { "epoch": 9.916967509025271, "grad_norm": 23.24348258972168, "learning_rate": 0.000195716357354009, "loss": 7.5252, "step": 82410 }, { "epoch": 9.918170878459687, "grad_norm": 9.649572372436523, "learning_rate": 0.00019571525576701738, "loss": 7.3736, "step": 82420 }, { "epoch": 9.919374247894103, "grad_norm": 10.647658348083496, "learning_rate": 0.00019571415404150212, "loss": 7.5302, "step": 82430 }, { "epoch": 9.92057761732852, "grad_norm": 28.23257064819336, "learning_rate": 0.00019571305217746488, "loss": 7.4453, "step": 82440 }, { "epoch": 9.921780986762936, "grad_norm": 16.840059280395508, "learning_rate": 0.00019571195017490723, "loss": 7.3965, "step": 82450 }, { "epoch": 9.922984356197352, "grad_norm": 9.777364730834961, "learning_rate": 0.0001957108480338307, "loss": 7.4309, "step": 82460 }, { "epoch": 9.92418772563177, "grad_norm": 21.885236740112305, "learning_rate": 0.000195709745754237, "loss": 7.4862, "step": 82470 }, { "epoch": 9.925391095066185, "grad_norm": 17.263158798217773, "learning_rate": 0.00019570864333612762, "loss": 7.4125, "step": 82480 }, { "epoch": 9.926594464500601, "grad_norm": 16.948944091796875, "learning_rate": 0.00019570754077950423, "loss": 7.4226, "step": 82490 }, { "epoch": 9.927797833935019, "grad_norm": 16.359371185302734, "learning_rate": 0.0001957064380843684, "loss": 7.4369, "step": 82500 }, { "epoch": 9.929001203369435, "grad_norm": 12.681251525878906, "learning_rate": 0.0001957053352507217, "loss": 7.3728, "step": 82510 }, { "epoch": 9.93020457280385, "grad_norm": 17.827796936035156, "learning_rate": 0.0001957042322785658, "loss": 7.3548, "step": 82520 }, { "epoch": 9.931407942238268, "grad_norm": 41.84023666381836, "learning_rate": 0.0001957031291679022, "loss": 7.3884, "step": 82530 }, { "epoch": 9.932611311672684, "grad_norm": 34.2298583984375, "learning_rate": 0.00019570202591873254, "loss": 7.3613, "step": 82540 }, { "epoch": 9.9338146811071, "grad_norm": 26.370372772216797, "learning_rate": 0.00019570092253105848, "loss": 7.3187, "step": 82550 }, { "epoch": 9.935018050541515, "grad_norm": 158.3146209716797, "learning_rate": 0.0001956998190048815, "loss": 7.8905, "step": 82560 }, { "epoch": 9.936221419975933, "grad_norm": 83.25333404541016, "learning_rate": 0.0001956987153402033, "loss": 8.5049, "step": 82570 }, { "epoch": 9.937424789410349, "grad_norm": 67.73261260986328, "learning_rate": 0.00019569761153702539, "loss": 7.752, "step": 82580 }, { "epoch": 9.938628158844764, "grad_norm": 41.0427360534668, "learning_rate": 0.0001956965075953494, "loss": 9.0304, "step": 82590 }, { "epoch": 9.939831528279182, "grad_norm": 28.815858840942383, "learning_rate": 0.00019569540351517696, "loss": 8.2543, "step": 82600 }, { "epoch": 9.941034897713598, "grad_norm": 28.119356155395508, "learning_rate": 0.00019569429929650967, "loss": 7.6497, "step": 82610 }, { "epoch": 9.942238267148014, "grad_norm": 30.308202743530273, "learning_rate": 0.00019569319493934904, "loss": 7.5888, "step": 82620 }, { "epoch": 9.943441636582431, "grad_norm": 15.763078689575195, "learning_rate": 0.00019569209044369677, "loss": 7.3972, "step": 82630 }, { "epoch": 9.944645006016847, "grad_norm": 35.42869567871094, "learning_rate": 0.00019569098580955445, "loss": 7.5268, "step": 82640 }, { "epoch": 9.945848375451263, "grad_norm": 30.2524356842041, "learning_rate": 0.0001956898810369236, "loss": 7.501, "step": 82650 }, { "epoch": 9.94705174488568, "grad_norm": 16.35939598083496, "learning_rate": 0.0001956887761258059, "loss": 7.4544, "step": 82660 }, { "epoch": 9.948255114320096, "grad_norm": 13.832306861877441, "learning_rate": 0.0001956876710762029, "loss": 7.4556, "step": 82670 }, { "epoch": 9.949458483754512, "grad_norm": 26.814851760864258, "learning_rate": 0.00019568656588811622, "loss": 7.5283, "step": 82680 }, { "epoch": 9.95066185318893, "grad_norm": 33.63003921508789, "learning_rate": 0.00019568546056154744, "loss": 7.517, "step": 82690 }, { "epoch": 9.951865222623345, "grad_norm": 15.94406795501709, "learning_rate": 0.0001956843550964982, "loss": 7.5016, "step": 82700 }, { "epoch": 9.953068592057761, "grad_norm": 9.614724159240723, "learning_rate": 0.00019568324949297003, "loss": 7.5599, "step": 82710 }, { "epoch": 9.954271961492179, "grad_norm": 11.35432243347168, "learning_rate": 0.0001956821437509646, "loss": 7.4455, "step": 82720 }, { "epoch": 9.955475330926594, "grad_norm": 28.24508285522461, "learning_rate": 0.0001956810378704835, "loss": 7.5293, "step": 82730 }, { "epoch": 9.95667870036101, "grad_norm": 16.813451766967773, "learning_rate": 0.00019567993185152832, "loss": 7.4443, "step": 82740 }, { "epoch": 9.957882069795428, "grad_norm": 14.316716194152832, "learning_rate": 0.00019567882569410062, "loss": 7.4105, "step": 82750 }, { "epoch": 9.959085439229844, "grad_norm": 11.617361068725586, "learning_rate": 0.00019567771939820206, "loss": 7.4074, "step": 82760 }, { "epoch": 9.96028880866426, "grad_norm": 16.331735610961914, "learning_rate": 0.00019567661296383425, "loss": 7.4777, "step": 82770 }, { "epoch": 9.961492178098677, "grad_norm": 14.681106567382812, "learning_rate": 0.00019567550639099869, "loss": 7.4397, "step": 82780 }, { "epoch": 9.962695547533093, "grad_norm": 29.191822052001953, "learning_rate": 0.0001956743996796971, "loss": 7.408, "step": 82790 }, { "epoch": 9.963898916967509, "grad_norm": 137.0829620361328, "learning_rate": 0.000195673292829931, "loss": 7.4947, "step": 82800 }, { "epoch": 9.965102286401926, "grad_norm": 11.389457702636719, "learning_rate": 0.00019567218584170206, "loss": 7.3979, "step": 82810 }, { "epoch": 9.966305655836342, "grad_norm": 54.594200134277344, "learning_rate": 0.00019567107871501182, "loss": 7.5069, "step": 82820 }, { "epoch": 9.967509025270758, "grad_norm": 13.628167152404785, "learning_rate": 0.00019566997144986192, "loss": 7.3462, "step": 82830 }, { "epoch": 9.968712394705175, "grad_norm": 50.4848518371582, "learning_rate": 0.00019566886404625395, "loss": 7.4765, "step": 82840 }, { "epoch": 9.969915764139591, "grad_norm": 163.96688842773438, "learning_rate": 0.00019566775650418951, "loss": 7.693, "step": 82850 }, { "epoch": 9.971119133574007, "grad_norm": 20.04106330871582, "learning_rate": 0.0001956666488236702, "loss": 7.5473, "step": 82860 }, { "epoch": 9.972322503008424, "grad_norm": 53.01713562011719, "learning_rate": 0.00019566554100469764, "loss": 7.5402, "step": 82870 }, { "epoch": 9.97352587244284, "grad_norm": 7.600004196166992, "learning_rate": 0.00019566443304727343, "loss": 7.4574, "step": 82880 }, { "epoch": 9.974729241877256, "grad_norm": 14.555046081542969, "learning_rate": 0.00019566332495139915, "loss": 7.3451, "step": 82890 }, { "epoch": 9.975932611311674, "grad_norm": 235.01226806640625, "learning_rate": 0.00019566221671707642, "loss": 7.3825, "step": 82900 }, { "epoch": 9.97713598074609, "grad_norm": 12.984586715698242, "learning_rate": 0.00019566110834430686, "loss": 7.3919, "step": 82910 }, { "epoch": 9.978339350180505, "grad_norm": 187.3184051513672, "learning_rate": 0.00019565999983309207, "loss": 7.3823, "step": 82920 }, { "epoch": 9.979542719614923, "grad_norm": 12.903731346130371, "learning_rate": 0.0001956588911834336, "loss": 7.3833, "step": 82930 }, { "epoch": 9.980746089049338, "grad_norm": 10.085701942443848, "learning_rate": 0.00019565778239533313, "loss": 7.3357, "step": 82940 }, { "epoch": 9.981949458483754, "grad_norm": 6.894649982452393, "learning_rate": 0.0001956566734687922, "loss": 7.2906, "step": 82950 }, { "epoch": 9.98315282791817, "grad_norm": 5.988856315612793, "learning_rate": 0.0001956555644038125, "loss": 7.3891, "step": 82960 }, { "epoch": 9.984356197352588, "grad_norm": 6.759174823760986, "learning_rate": 0.00019565445520039555, "loss": 7.3043, "step": 82970 }, { "epoch": 9.985559566787003, "grad_norm": 11.749421119689941, "learning_rate": 0.000195653345858543, "loss": 7.2955, "step": 82980 }, { "epoch": 9.98676293622142, "grad_norm": 9.498119354248047, "learning_rate": 0.00019565223637825642, "loss": 7.4558, "step": 82990 }, { "epoch": 9.987966305655837, "grad_norm": 6.589890480041504, "learning_rate": 0.00019565112675953744, "loss": 7.388, "step": 83000 }, { "epoch": 9.989169675090253, "grad_norm": 103.37834167480469, "learning_rate": 0.00019565001700238768, "loss": 7.4097, "step": 83010 }, { "epoch": 9.990373044524668, "grad_norm": 2.0575625896453857, "learning_rate": 0.00019564890710680873, "loss": 7.2956, "step": 83020 }, { "epoch": 9.991576413959086, "grad_norm": 3.804295778274536, "learning_rate": 0.00019564779707280218, "loss": 7.3008, "step": 83030 }, { "epoch": 9.992779783393502, "grad_norm": 56.05696105957031, "learning_rate": 0.00019564668690036965, "loss": 7.3577, "step": 83040 }, { "epoch": 9.993983152827917, "grad_norm": 8.73436450958252, "learning_rate": 0.00019564557658951278, "loss": 7.2376, "step": 83050 }, { "epoch": 9.995186522262335, "grad_norm": 7.325547695159912, "learning_rate": 0.0001956444661402331, "loss": 7.3478, "step": 83060 }, { "epoch": 9.99638989169675, "grad_norm": 30.338605880737305, "learning_rate": 0.00019564335555253233, "loss": 7.3733, "step": 83070 }, { "epoch": 9.997593261131167, "grad_norm": 113.83546447753906, "learning_rate": 0.00019564224482641196, "loss": 7.3178, "step": 83080 }, { "epoch": 9.998796630565584, "grad_norm": 54.39371871948242, "learning_rate": 0.00019564113396187364, "loss": 7.1751, "step": 83090 }, { "epoch": 10.0, "grad_norm": 7.800882339477539, "learning_rate": 0.00019564002295891904, "loss": 7.4801, "step": 83100 }, { "epoch": 10.0, "eval_loss": 7.518892765045166, "eval_runtime": 119.6337, "eval_samples_per_second": 61.747, "eval_steps_per_second": 7.724, "step": 83100 }, { "epoch": 10.001203369434416, "grad_norm": 544.3108520507812, "learning_rate": 0.00019563891181754967, "loss": 7.4537, "step": 83110 }, { "epoch": 10.002406738868833, "grad_norm": 45.86470031738281, "learning_rate": 0.0001956378005377672, "loss": 7.4266, "step": 83120 }, { "epoch": 10.00361010830325, "grad_norm": 23.418373107910156, "learning_rate": 0.0001956366891195732, "loss": 7.293, "step": 83130 }, { "epoch": 10.004813477737665, "grad_norm": 9.664619445800781, "learning_rate": 0.00019563557756296934, "loss": 7.3395, "step": 83140 }, { "epoch": 10.006016847172083, "grad_norm": 28.96971893310547, "learning_rate": 0.00019563446586795714, "loss": 7.3513, "step": 83150 }, { "epoch": 10.007220216606498, "grad_norm": 100.30049133300781, "learning_rate": 0.00019563335403453827, "loss": 7.368, "step": 83160 }, { "epoch": 10.008423586040914, "grad_norm": 61.96688461303711, "learning_rate": 0.00019563224206271436, "loss": 7.3882, "step": 83170 }, { "epoch": 10.009626955475332, "grad_norm": 56.330135345458984, "learning_rate": 0.00019563112995248693, "loss": 7.5772, "step": 83180 }, { "epoch": 10.010830324909747, "grad_norm": 564.260986328125, "learning_rate": 0.0001956300177038577, "loss": 7.4997, "step": 83190 }, { "epoch": 10.012033694344163, "grad_norm": 201.80166625976562, "learning_rate": 0.0001956289053168282, "loss": 7.4236, "step": 83200 }, { "epoch": 10.01323706377858, "grad_norm": 410.536376953125, "learning_rate": 0.00019562779279140005, "loss": 7.5006, "step": 83210 }, { "epoch": 10.014440433212997, "grad_norm": 3044.130126953125, "learning_rate": 0.00019562668012757488, "loss": 8.1232, "step": 83220 }, { "epoch": 10.015643802647412, "grad_norm": 36.56355285644531, "learning_rate": 0.00019562556732535428, "loss": 7.7171, "step": 83230 }, { "epoch": 10.01684717208183, "grad_norm": 79.8092269897461, "learning_rate": 0.0001956244543847399, "loss": 7.6449, "step": 83240 }, { "epoch": 10.018050541516246, "grad_norm": 17.27240753173828, "learning_rate": 0.00019562334130573332, "loss": 7.5587, "step": 83250 }, { "epoch": 10.019253910950662, "grad_norm": 18.181196212768555, "learning_rate": 0.00019562222808833615, "loss": 7.4222, "step": 83260 }, { "epoch": 10.020457280385079, "grad_norm": 103.67266845703125, "learning_rate": 0.00019562111473255, "loss": 7.3783, "step": 83270 }, { "epoch": 10.021660649819495, "grad_norm": 207.486572265625, "learning_rate": 0.0001956200012383765, "loss": 7.3435, "step": 83280 }, { "epoch": 10.02286401925391, "grad_norm": 7375.67578125, "learning_rate": 0.0001956188876058173, "loss": 7.4211, "step": 83290 }, { "epoch": 10.024067388688326, "grad_norm": 365.6496276855469, "learning_rate": 0.0001956177738348739, "loss": 7.5252, "step": 83300 }, { "epoch": 10.025270758122744, "grad_norm": 54.60540771484375, "learning_rate": 0.00019561665992554796, "loss": 7.4325, "step": 83310 }, { "epoch": 10.02647412755716, "grad_norm": 28.61625099182129, "learning_rate": 0.00019561554587784114, "loss": 7.4601, "step": 83320 }, { "epoch": 10.027677496991576, "grad_norm": 50.83510208129883, "learning_rate": 0.00019561443169175503, "loss": 7.4433, "step": 83330 }, { "epoch": 10.028880866425993, "grad_norm": 57.21589279174805, "learning_rate": 0.0001956133173672912, "loss": 7.4556, "step": 83340 }, { "epoch": 10.030084235860409, "grad_norm": 981.710205078125, "learning_rate": 0.00019561220290445133, "loss": 7.36, "step": 83350 }, { "epoch": 10.031287605294825, "grad_norm": 75.56356048583984, "learning_rate": 0.00019561108830323695, "loss": 7.3356, "step": 83360 }, { "epoch": 10.032490974729242, "grad_norm": 70.26111602783203, "learning_rate": 0.00019560997356364977, "loss": 7.4164, "step": 83370 }, { "epoch": 10.033694344163658, "grad_norm": 535.666748046875, "learning_rate": 0.00019560885868569132, "loss": 7.402, "step": 83380 }, { "epoch": 10.034897713598074, "grad_norm": 127.51241302490234, "learning_rate": 0.00019560774366936324, "loss": 7.4143, "step": 83390 }, { "epoch": 10.036101083032491, "grad_norm": 307.8184814453125, "learning_rate": 0.00019560662851466715, "loss": 7.4348, "step": 83400 }, { "epoch": 10.037304452466907, "grad_norm": 9.060751914978027, "learning_rate": 0.00019560551322160468, "loss": 7.4283, "step": 83410 }, { "epoch": 10.038507821901323, "grad_norm": 9.682147026062012, "learning_rate": 0.00019560439779017742, "loss": 7.3959, "step": 83420 }, { "epoch": 10.03971119133574, "grad_norm": 8.636005401611328, "learning_rate": 0.00019560328222038698, "loss": 7.4284, "step": 83430 }, { "epoch": 10.040914560770156, "grad_norm": 9.469674110412598, "learning_rate": 0.00019560216651223503, "loss": 7.2697, "step": 83440 }, { "epoch": 10.042117930204572, "grad_norm": 19.751909255981445, "learning_rate": 0.0001956010506657231, "loss": 7.418, "step": 83450 }, { "epoch": 10.04332129963899, "grad_norm": 166.0884552001953, "learning_rate": 0.0001955999346808529, "loss": 7.3375, "step": 83460 }, { "epoch": 10.044524669073406, "grad_norm": 5969.85107421875, "learning_rate": 0.00019559881855762593, "loss": 7.4188, "step": 83470 }, { "epoch": 10.045728038507821, "grad_norm": 115.87480163574219, "learning_rate": 0.0001955977022960439, "loss": 7.3576, "step": 83480 }, { "epoch": 10.046931407942239, "grad_norm": 5.944371700286865, "learning_rate": 0.00019559658589610838, "loss": 7.413, "step": 83490 }, { "epoch": 10.048134777376655, "grad_norm": 481.37255859375, "learning_rate": 0.00019559546935782101, "loss": 7.3182, "step": 83500 }, { "epoch": 10.04933814681107, "grad_norm": 40.134395599365234, "learning_rate": 0.00019559435268118338, "loss": 7.2567, "step": 83510 }, { "epoch": 10.050541516245488, "grad_norm": 183.8351593017578, "learning_rate": 0.00019559323586619713, "loss": 7.3474, "step": 83520 }, { "epoch": 10.051744885679904, "grad_norm": 118.63419342041016, "learning_rate": 0.00019559211891286387, "loss": 7.2396, "step": 83530 }, { "epoch": 10.05294825511432, "grad_norm": 27.949094772338867, "learning_rate": 0.0001955910018211852, "loss": 7.3118, "step": 83540 }, { "epoch": 10.054151624548737, "grad_norm": 102.37348175048828, "learning_rate": 0.00019558988459116274, "loss": 7.3392, "step": 83550 }, { "epoch": 10.055354993983153, "grad_norm": 71.63182067871094, "learning_rate": 0.00019558876722279816, "loss": 7.3955, "step": 83560 }, { "epoch": 10.056558363417569, "grad_norm": 1578.4404296875, "learning_rate": 0.000195587649716093, "loss": 7.4133, "step": 83570 }, { "epoch": 10.057761732851986, "grad_norm": 124.20844268798828, "learning_rate": 0.0001955865320710489, "loss": 7.3468, "step": 83580 }, { "epoch": 10.058965102286402, "grad_norm": 40.077510833740234, "learning_rate": 0.0001955854142876675, "loss": 7.2826, "step": 83590 }, { "epoch": 10.060168471720818, "grad_norm": 33.752506256103516, "learning_rate": 0.00019558429636595045, "loss": 7.3967, "step": 83600 }, { "epoch": 10.061371841155236, "grad_norm": 52.681339263916016, "learning_rate": 0.00019558317830589928, "loss": 7.3475, "step": 83610 }, { "epoch": 10.062575210589651, "grad_norm": 312.16448974609375, "learning_rate": 0.00019558206010751564, "loss": 7.2011, "step": 83620 }, { "epoch": 10.063778580024067, "grad_norm": 10.673030853271484, "learning_rate": 0.00019558094177080118, "loss": 7.265, "step": 83630 }, { "epoch": 10.064981949458483, "grad_norm": 60.73483657836914, "learning_rate": 0.00019557982329575747, "loss": 7.2372, "step": 83640 }, { "epoch": 10.0661853188929, "grad_norm": 30.778305053710938, "learning_rate": 0.00019557870468238622, "loss": 7.3471, "step": 83650 }, { "epoch": 10.067388688327316, "grad_norm": 69.0111312866211, "learning_rate": 0.00019557758593068892, "loss": 7.3048, "step": 83660 }, { "epoch": 10.068592057761732, "grad_norm": 30.080812454223633, "learning_rate": 0.00019557646704066733, "loss": 7.4109, "step": 83670 }, { "epoch": 10.06979542719615, "grad_norm": 32.3433723449707, "learning_rate": 0.00019557534801232292, "loss": 7.365, "step": 83680 }, { "epoch": 10.070998796630565, "grad_norm": 71.39273834228516, "learning_rate": 0.0001955742288456574, "loss": 7.408, "step": 83690 }, { "epoch": 10.072202166064981, "grad_norm": 133.87762451171875, "learning_rate": 0.00019557310954067242, "loss": 7.389, "step": 83700 }, { "epoch": 10.073405535499399, "grad_norm": 63.1323356628418, "learning_rate": 0.00019557199009736953, "loss": 7.4022, "step": 83710 }, { "epoch": 10.074608904933815, "grad_norm": 81.40836334228516, "learning_rate": 0.00019557087051575036, "loss": 7.2864, "step": 83720 }, { "epoch": 10.07581227436823, "grad_norm": 37.00311279296875, "learning_rate": 0.00019556975079581656, "loss": 7.2, "step": 83730 }, { "epoch": 10.077015643802648, "grad_norm": 32.62196350097656, "learning_rate": 0.0001955686309375697, "loss": 7.3818, "step": 83740 }, { "epoch": 10.078219013237064, "grad_norm": 931.1118774414062, "learning_rate": 0.0001955675109410115, "loss": 7.3582, "step": 83750 }, { "epoch": 10.07942238267148, "grad_norm": 74.43765258789062, "learning_rate": 0.0001955663908061435, "loss": 7.3079, "step": 83760 }, { "epoch": 10.080625752105897, "grad_norm": 387.19573974609375, "learning_rate": 0.00019556527053296731, "loss": 7.3001, "step": 83770 }, { "epoch": 10.081829121540313, "grad_norm": 535.4677734375, "learning_rate": 0.0001955641501214846, "loss": 7.3723, "step": 83780 }, { "epoch": 10.083032490974729, "grad_norm": 440.4124450683594, "learning_rate": 0.00019556302957169695, "loss": 7.3969, "step": 83790 }, { "epoch": 10.084235860409146, "grad_norm": 26.299827575683594, "learning_rate": 0.000195561908883606, "loss": 7.3512, "step": 83800 }, { "epoch": 10.085439229843562, "grad_norm": 9.611572265625, "learning_rate": 0.0001955607880572134, "loss": 7.3437, "step": 83810 }, { "epoch": 10.086642599277978, "grad_norm": 10106.060546875, "learning_rate": 0.00019555966709252072, "loss": 7.4588, "step": 83820 }, { "epoch": 10.087845968712395, "grad_norm": 51.20191955566406, "learning_rate": 0.00019555854598952963, "loss": 7.4455, "step": 83830 }, { "epoch": 10.089049338146811, "grad_norm": 49.935585021972656, "learning_rate": 0.00019555742474824172, "loss": 7.3975, "step": 83840 }, { "epoch": 10.090252707581227, "grad_norm": 672.3780517578125, "learning_rate": 0.00019555630336865864, "loss": 7.4609, "step": 83850 }, { "epoch": 10.091456077015645, "grad_norm": 791.9589233398438, "learning_rate": 0.000195555181850782, "loss": 7.4114, "step": 83860 }, { "epoch": 10.09265944645006, "grad_norm": 96690.9765625, "learning_rate": 0.0001955540601946134, "loss": 7.4866, "step": 83870 }, { "epoch": 10.093862815884476, "grad_norm": 169.25511169433594, "learning_rate": 0.00019555293840015447, "loss": 7.5957, "step": 83880 }, { "epoch": 10.095066185318894, "grad_norm": 217.37278747558594, "learning_rate": 0.00019555181646740687, "loss": 7.5802, "step": 83890 }, { "epoch": 10.09626955475331, "grad_norm": 143.0635986328125, "learning_rate": 0.00019555069439637223, "loss": 7.6048, "step": 83900 }, { "epoch": 10.097472924187725, "grad_norm": 792.082763671875, "learning_rate": 0.00019554957218705207, "loss": 7.4206, "step": 83910 }, { "epoch": 10.098676293622143, "grad_norm": 181.4624786376953, "learning_rate": 0.00019554844983944817, "loss": 7.4301, "step": 83920 }, { "epoch": 10.099879663056559, "grad_norm": 1086.484375, "learning_rate": 0.000195547327353562, "loss": 7.5052, "step": 83930 }, { "epoch": 10.101083032490974, "grad_norm": 216.3699951171875, "learning_rate": 0.0001955462047293953, "loss": 7.4086, "step": 83940 }, { "epoch": 10.102286401925392, "grad_norm": 159.45480346679688, "learning_rate": 0.00019554508196694962, "loss": 7.3906, "step": 83950 }, { "epoch": 10.103489771359808, "grad_norm": 36455.9140625, "learning_rate": 0.00019554395906622664, "loss": 7.4926, "step": 83960 }, { "epoch": 10.104693140794224, "grad_norm": 6384.41796875, "learning_rate": 0.000195542836027228, "loss": 7.4972, "step": 83970 }, { "epoch": 10.10589651022864, "grad_norm": 6696.1494140625, "learning_rate": 0.00019554171284995523, "loss": 7.3718, "step": 83980 }, { "epoch": 10.107099879663057, "grad_norm": 799.7194213867188, "learning_rate": 0.00019554058953441005, "loss": 7.4873, "step": 83990 }, { "epoch": 10.108303249097473, "grad_norm": 1213.58837890625, "learning_rate": 0.000195539466080594, "loss": 7.5494, "step": 84000 }, { "epoch": 10.109506618531888, "grad_norm": 3183014.75, "learning_rate": 0.0001955383424885088, "loss": 7.5762, "step": 84010 }, { "epoch": 10.110709987966306, "grad_norm": 44857.57421875, "learning_rate": 0.00019553721875815603, "loss": 7.493, "step": 84020 }, { "epoch": 10.111913357400722, "grad_norm": 1882.9136962890625, "learning_rate": 0.0001955360948895373, "loss": 7.4567, "step": 84030 }, { "epoch": 10.113116726835138, "grad_norm": 500.9793395996094, "learning_rate": 0.00019553497088265425, "loss": 7.4522, "step": 84040 }, { "epoch": 10.114320096269555, "grad_norm": 417.8712158203125, "learning_rate": 0.00019553384673750852, "loss": 7.4704, "step": 84050 }, { "epoch": 10.115523465703971, "grad_norm": 1876.11083984375, "learning_rate": 0.0001955327224541017, "loss": 7.4824, "step": 84060 }, { "epoch": 10.116726835138387, "grad_norm": 299.41705322265625, "learning_rate": 0.00019553159803243548, "loss": 7.4902, "step": 84070 }, { "epoch": 10.117930204572804, "grad_norm": 82.94864654541016, "learning_rate": 0.00019553047347251146, "loss": 7.3832, "step": 84080 }, { "epoch": 10.11913357400722, "grad_norm": 36297.7109375, "learning_rate": 0.00019552934877433122, "loss": 7.3803, "step": 84090 }, { "epoch": 10.120336943441636, "grad_norm": 783.7518310546875, "learning_rate": 0.00019552822393789645, "loss": 7.3939, "step": 84100 }, { "epoch": 10.121540312876053, "grad_norm": 3914.262939453125, "learning_rate": 0.00019552709896320875, "loss": 7.3988, "step": 84110 }, { "epoch": 10.12274368231047, "grad_norm": 494.4291076660156, "learning_rate": 0.00019552597385026976, "loss": 7.4736, "step": 84120 }, { "epoch": 10.123947051744885, "grad_norm": 3709.756591796875, "learning_rate": 0.0001955248485990811, "loss": 7.481, "step": 84130 }, { "epoch": 10.125150421179303, "grad_norm": 670.2003173828125, "learning_rate": 0.00019552372320964437, "loss": 7.4812, "step": 84140 }, { "epoch": 10.126353790613718, "grad_norm": 566.203125, "learning_rate": 0.00019552259768196127, "loss": 7.6785, "step": 84150 }, { "epoch": 10.127557160048134, "grad_norm": 552.3314208984375, "learning_rate": 0.00019552147201603337, "loss": 7.461, "step": 84160 }, { "epoch": 10.128760529482552, "grad_norm": 42738.7734375, "learning_rate": 0.0001955203462118623, "loss": 7.5014, "step": 84170 }, { "epoch": 10.129963898916968, "grad_norm": 1573.9173583984375, "learning_rate": 0.00019551922026944974, "loss": 7.5085, "step": 84180 }, { "epoch": 10.131167268351383, "grad_norm": 1960.232421875, "learning_rate": 0.00019551809418879727, "loss": 7.4702, "step": 84190 }, { "epoch": 10.132370637785801, "grad_norm": 2213.644287109375, "learning_rate": 0.00019551696796990653, "loss": 7.5749, "step": 84200 }, { "epoch": 10.133574007220217, "grad_norm": 541.3018188476562, "learning_rate": 0.00019551584161277917, "loss": 7.5032, "step": 84210 }, { "epoch": 10.134777376654633, "grad_norm": 43037.62109375, "learning_rate": 0.0001955147151174168, "loss": 7.664, "step": 84220 }, { "epoch": 10.13598074608905, "grad_norm": 5107.90185546875, "learning_rate": 0.00019551358848382105, "loss": 7.6627, "step": 84230 }, { "epoch": 10.137184115523466, "grad_norm": 8360.072265625, "learning_rate": 0.00019551246171199354, "loss": 7.6046, "step": 84240 }, { "epoch": 10.138387484957882, "grad_norm": 2013.773681640625, "learning_rate": 0.00019551133480193592, "loss": 7.6087, "step": 84250 }, { "epoch": 10.1395908543923, "grad_norm": 10944.0859375, "learning_rate": 0.00019551020775364984, "loss": 7.7149, "step": 84260 }, { "epoch": 10.140794223826715, "grad_norm": 1577.2322998046875, "learning_rate": 0.0001955090805671369, "loss": 7.6496, "step": 84270 }, { "epoch": 10.14199759326113, "grad_norm": 38615.421875, "learning_rate": 0.00019550795324239874, "loss": 7.8053, "step": 84280 }, { "epoch": 10.143200962695548, "grad_norm": 17308.25, "learning_rate": 0.00019550682577943699, "loss": 7.7949, "step": 84290 }, { "epoch": 10.144404332129964, "grad_norm": 21218.119140625, "learning_rate": 0.00019550569817825328, "loss": 7.818, "step": 84300 }, { "epoch": 10.14560770156438, "grad_norm": 7600.408203125, "learning_rate": 0.00019550457043884928, "loss": 7.8349, "step": 84310 }, { "epoch": 10.146811070998796, "grad_norm": 4755.52734375, "learning_rate": 0.00019550344256122652, "loss": 7.8572, "step": 84320 }, { "epoch": 10.148014440433213, "grad_norm": 11302.9296875, "learning_rate": 0.00019550231454538674, "loss": 7.8566, "step": 84330 }, { "epoch": 10.14921780986763, "grad_norm": 9766.044921875, "learning_rate": 0.00019550118639133154, "loss": 7.8228, "step": 84340 }, { "epoch": 10.150421179302045, "grad_norm": 2017.5308837890625, "learning_rate": 0.00019550005809906252, "loss": 7.8841, "step": 84350 }, { "epoch": 10.151624548736462, "grad_norm": 2552.633056640625, "learning_rate": 0.00019549892966858135, "loss": 7.7728, "step": 84360 }, { "epoch": 10.152827918170878, "grad_norm": 21814.564453125, "learning_rate": 0.00019549780109988965, "loss": 7.8626, "step": 84370 }, { "epoch": 10.154031287605294, "grad_norm": 34714.97265625, "learning_rate": 0.00019549667239298905, "loss": 7.8497, "step": 84380 }, { "epoch": 10.155234657039712, "grad_norm": 31947.96484375, "learning_rate": 0.0001954955435478812, "loss": 7.911, "step": 84390 }, { "epoch": 10.156438026474127, "grad_norm": 494912.46875, "learning_rate": 0.00019549441456456773, "loss": 7.8158, "step": 84400 }, { "epoch": 10.157641395908543, "grad_norm": 47175.609375, "learning_rate": 0.00019549328544305023, "loss": 7.8153, "step": 84410 }, { "epoch": 10.15884476534296, "grad_norm": 51441.97265625, "learning_rate": 0.0001954921561833304, "loss": 7.8902, "step": 84420 }, { "epoch": 10.160048134777377, "grad_norm": 2050.4072265625, "learning_rate": 0.0001954910267854098, "loss": 7.7684, "step": 84430 }, { "epoch": 10.161251504211792, "grad_norm": 4532.08056640625, "learning_rate": 0.00019548989724929014, "loss": 7.8678, "step": 84440 }, { "epoch": 10.16245487364621, "grad_norm": 3253.48046875, "learning_rate": 0.00019548876757497302, "loss": 7.8032, "step": 84450 }, { "epoch": 10.163658243080626, "grad_norm": 1844.1534423828125, "learning_rate": 0.0001954876377624601, "loss": 7.6659, "step": 84460 }, { "epoch": 10.164861612515042, "grad_norm": 6227.05126953125, "learning_rate": 0.00019548650781175297, "loss": 7.7648, "step": 84470 }, { "epoch": 10.166064981949459, "grad_norm": 3526.93798828125, "learning_rate": 0.0001954853777228533, "loss": 7.6573, "step": 84480 }, { "epoch": 10.167268351383875, "grad_norm": 437.5130615234375, "learning_rate": 0.0001954842474957627, "loss": 7.6103, "step": 84490 }, { "epoch": 10.16847172081829, "grad_norm": 290.9523620605469, "learning_rate": 0.00019548311713048282, "loss": 7.5856, "step": 84500 }, { "epoch": 10.169675090252708, "grad_norm": 184.8275146484375, "learning_rate": 0.0001954819866270153, "loss": 7.693, "step": 84510 }, { "epoch": 10.170878459687124, "grad_norm": 15172.384765625, "learning_rate": 0.0001954808559853618, "loss": 7.7003, "step": 84520 }, { "epoch": 10.17208182912154, "grad_norm": 2435.298583984375, "learning_rate": 0.0001954797252055239, "loss": 7.7421, "step": 84530 }, { "epoch": 10.173285198555957, "grad_norm": 1336.593505859375, "learning_rate": 0.00019547859428750326, "loss": 7.6244, "step": 84540 }, { "epoch": 10.174488567990373, "grad_norm": 1790.1878662109375, "learning_rate": 0.00019547746323130152, "loss": 7.6178, "step": 84550 }, { "epoch": 10.175691937424789, "grad_norm": 996.5396118164062, "learning_rate": 0.0001954763320369203, "loss": 7.5489, "step": 84560 }, { "epoch": 10.176895306859207, "grad_norm": 304.2719421386719, "learning_rate": 0.0001954752007043613, "loss": 7.5244, "step": 84570 }, { "epoch": 10.178098676293622, "grad_norm": 1636.525634765625, "learning_rate": 0.0001954740692336261, "loss": 7.6303, "step": 84580 }, { "epoch": 10.179302045728038, "grad_norm": 1122.403076171875, "learning_rate": 0.00019547293762471631, "loss": 7.646, "step": 84590 }, { "epoch": 10.180505415162456, "grad_norm": 3601.208251953125, "learning_rate": 0.00019547180587763365, "loss": 7.6218, "step": 84600 }, { "epoch": 10.181708784596871, "grad_norm": 224.7150115966797, "learning_rate": 0.00019547067399237973, "loss": 7.576, "step": 84610 }, { "epoch": 10.182912154031287, "grad_norm": 5668.404296875, "learning_rate": 0.00019546954196895616, "loss": 7.592, "step": 84620 }, { "epoch": 10.184115523465705, "grad_norm": 2218.216796875, "learning_rate": 0.00019546840980736455, "loss": 7.6003, "step": 84630 }, { "epoch": 10.18531889290012, "grad_norm": 3921.322998046875, "learning_rate": 0.00019546727750760663, "loss": 7.517, "step": 84640 }, { "epoch": 10.186522262334536, "grad_norm": 186.01300048828125, "learning_rate": 0.00019546614506968398, "loss": 7.6106, "step": 84650 }, { "epoch": 10.187725631768952, "grad_norm": 771.4862060546875, "learning_rate": 0.00019546501249359822, "loss": 7.5792, "step": 84660 }, { "epoch": 10.18892900120337, "grad_norm": 283.5538635253906, "learning_rate": 0.00019546387977935106, "loss": 7.5603, "step": 84670 }, { "epoch": 10.190132370637786, "grad_norm": 736.301513671875, "learning_rate": 0.00019546274692694408, "loss": 7.5591, "step": 84680 }, { "epoch": 10.191335740072201, "grad_norm": 1668.3681640625, "learning_rate": 0.0001954616139363789, "loss": 7.6162, "step": 84690 }, { "epoch": 10.192539109506619, "grad_norm": 12183.900390625, "learning_rate": 0.00019546048080765723, "loss": 7.4798, "step": 84700 }, { "epoch": 10.193742478941035, "grad_norm": 1348.5616455078125, "learning_rate": 0.00019545934754078067, "loss": 7.5387, "step": 84710 }, { "epoch": 10.19494584837545, "grad_norm": 3114.279052734375, "learning_rate": 0.00019545821413575088, "loss": 7.4717, "step": 84720 }, { "epoch": 10.196149217809868, "grad_norm": 4764.4140625, "learning_rate": 0.00019545708059256947, "loss": 7.4346, "step": 84730 }, { "epoch": 10.197352587244284, "grad_norm": 8097.38720703125, "learning_rate": 0.0001954559469112381, "loss": 7.3994, "step": 84740 }, { "epoch": 10.1985559566787, "grad_norm": 4141.76318359375, "learning_rate": 0.00019545481309175838, "loss": 7.4519, "step": 84750 }, { "epoch": 10.199759326113117, "grad_norm": 464.6991882324219, "learning_rate": 0.00019545367913413204, "loss": 7.404, "step": 84760 }, { "epoch": 10.200962695547533, "grad_norm": 194.63821411132812, "learning_rate": 0.00019545254503836065, "loss": 7.4688, "step": 84770 }, { "epoch": 10.202166064981949, "grad_norm": 3374.51318359375, "learning_rate": 0.0001954514108044458, "loss": 7.463, "step": 84780 }, { "epoch": 10.203369434416366, "grad_norm": 3136.606689453125, "learning_rate": 0.00019545027643238925, "loss": 7.4433, "step": 84790 }, { "epoch": 10.204572803850782, "grad_norm": 2281.577880859375, "learning_rate": 0.00019544914192219255, "loss": 7.5009, "step": 84800 }, { "epoch": 10.205776173285198, "grad_norm": 558.43212890625, "learning_rate": 0.00019544800727385738, "loss": 7.5724, "step": 84810 }, { "epoch": 10.206979542719615, "grad_norm": 4649.240234375, "learning_rate": 0.0001954468724873854, "loss": 7.5039, "step": 84820 }, { "epoch": 10.208182912154031, "grad_norm": 12884.7939453125, "learning_rate": 0.00019544573756277822, "loss": 7.5177, "step": 84830 }, { "epoch": 10.209386281588447, "grad_norm": 264.2885437011719, "learning_rate": 0.00019544460250003749, "loss": 7.439, "step": 84840 }, { "epoch": 10.210589651022865, "grad_norm": 546.5271606445312, "learning_rate": 0.00019544346729916486, "loss": 7.563, "step": 84850 }, { "epoch": 10.21179302045728, "grad_norm": 61.41135025024414, "learning_rate": 0.00019544233196016199, "loss": 7.4349, "step": 84860 }, { "epoch": 10.212996389891696, "grad_norm": 11019.5673828125, "learning_rate": 0.00019544119648303045, "loss": 7.4064, "step": 84870 }, { "epoch": 10.214199759326114, "grad_norm": 91.89659118652344, "learning_rate": 0.00019544006086777196, "loss": 7.4024, "step": 84880 }, { "epoch": 10.21540312876053, "grad_norm": 1944.5679931640625, "learning_rate": 0.00019543892511438813, "loss": 7.4678, "step": 84890 }, { "epoch": 10.216606498194945, "grad_norm": 2484.067626953125, "learning_rate": 0.00019543778922288064, "loss": 7.5887, "step": 84900 }, { "epoch": 10.217809867629363, "grad_norm": 2496.830810546875, "learning_rate": 0.0001954366531932511, "loss": 7.4322, "step": 84910 }, { "epoch": 10.219013237063779, "grad_norm": 35778.47265625, "learning_rate": 0.00019543551702550115, "loss": 7.5733, "step": 84920 }, { "epoch": 10.220216606498195, "grad_norm": 329.30694580078125, "learning_rate": 0.00019543438071963241, "loss": 7.5197, "step": 84930 }, { "epoch": 10.221419975932612, "grad_norm": 501.95611572265625, "learning_rate": 0.00019543324427564662, "loss": 7.4337, "step": 84940 }, { "epoch": 10.222623345367028, "grad_norm": 1292.3909912109375, "learning_rate": 0.00019543210769354535, "loss": 7.499, "step": 84950 }, { "epoch": 10.223826714801444, "grad_norm": 24684.328125, "learning_rate": 0.00019543097097333024, "loss": 7.5541, "step": 84960 }, { "epoch": 10.225030084235861, "grad_norm": 1482.8800048828125, "learning_rate": 0.00019542983411500293, "loss": 7.4271, "step": 84970 }, { "epoch": 10.226233453670277, "grad_norm": 2366.8427734375, "learning_rate": 0.00019542869711856512, "loss": 7.5985, "step": 84980 }, { "epoch": 10.227436823104693, "grad_norm": 2164.325439453125, "learning_rate": 0.00019542755998401844, "loss": 7.5098, "step": 84990 }, { "epoch": 10.22864019253911, "grad_norm": 1001.6786499023438, "learning_rate": 0.0001954264227113645, "loss": 7.4615, "step": 85000 }, { "epoch": 10.229843561973526, "grad_norm": 262.1122131347656, "learning_rate": 0.00019542528530060497, "loss": 7.5176, "step": 85010 }, { "epoch": 10.231046931407942, "grad_norm": 130.0224151611328, "learning_rate": 0.00019542414775174148, "loss": 7.3711, "step": 85020 }, { "epoch": 10.232250300842358, "grad_norm": 402.8792419433594, "learning_rate": 0.00019542301006477572, "loss": 7.4132, "step": 85030 }, { "epoch": 10.233453670276775, "grad_norm": 49.24036407470703, "learning_rate": 0.0001954218722397093, "loss": 7.4445, "step": 85040 }, { "epoch": 10.234657039711191, "grad_norm": 931.08984375, "learning_rate": 0.00019542073427654383, "loss": 7.3981, "step": 85050 }, { "epoch": 10.235860409145607, "grad_norm": 81.37020874023438, "learning_rate": 0.00019541959617528102, "loss": 7.3585, "step": 85060 }, { "epoch": 10.237063778580024, "grad_norm": 288.3209228515625, "learning_rate": 0.00019541845793592252, "loss": 7.3375, "step": 85070 }, { "epoch": 10.23826714801444, "grad_norm": 81.19265747070312, "learning_rate": 0.00019541731955846994, "loss": 7.3465, "step": 85080 }, { "epoch": 10.239470517448856, "grad_norm": 68.54808807373047, "learning_rate": 0.00019541618104292494, "loss": 7.4104, "step": 85090 }, { "epoch": 10.240673886883274, "grad_norm": 103.49225616455078, "learning_rate": 0.00019541504238928915, "loss": 7.4004, "step": 85100 }, { "epoch": 10.24187725631769, "grad_norm": 142.51296997070312, "learning_rate": 0.00019541390359756425, "loss": 7.3708, "step": 85110 }, { "epoch": 10.243080625752105, "grad_norm": 6.464770793914795, "learning_rate": 0.0001954127646677519, "loss": 7.497, "step": 85120 }, { "epoch": 10.244283995186523, "grad_norm": 33.73357009887695, "learning_rate": 0.00019541162559985368, "loss": 7.4298, "step": 85130 }, { "epoch": 10.245487364620939, "grad_norm": 22.592464447021484, "learning_rate": 0.0001954104863938713, "loss": 7.3442, "step": 85140 }, { "epoch": 10.246690734055354, "grad_norm": 17.11105728149414, "learning_rate": 0.0001954093470498064, "loss": 7.522, "step": 85150 }, { "epoch": 10.247894103489772, "grad_norm": 20.17689323425293, "learning_rate": 0.0001954082075676606, "loss": 7.3856, "step": 85160 }, { "epoch": 10.249097472924188, "grad_norm": 30.028621673583984, "learning_rate": 0.0001954070679474356, "loss": 7.3863, "step": 85170 }, { "epoch": 10.250300842358604, "grad_norm": 21.267133712768555, "learning_rate": 0.00019540592818913298, "loss": 7.4415, "step": 85180 }, { "epoch": 10.251504211793021, "grad_norm": 19.69413185119629, "learning_rate": 0.00019540478829275447, "loss": 7.5127, "step": 85190 }, { "epoch": 10.252707581227437, "grad_norm": 3.984941244125366, "learning_rate": 0.0001954036482583016, "loss": 7.5265, "step": 85200 }, { "epoch": 10.253910950661853, "grad_norm": 2.579620599746704, "learning_rate": 0.00019540250808577618, "loss": 7.4825, "step": 85210 }, { "epoch": 10.25511432009627, "grad_norm": 6.304272174835205, "learning_rate": 0.00019540136777517974, "loss": 7.5466, "step": 85220 }, { "epoch": 10.256317689530686, "grad_norm": 4.438540458679199, "learning_rate": 0.00019540022732651398, "loss": 7.5504, "step": 85230 }, { "epoch": 10.257521058965102, "grad_norm": 2.3285071849823, "learning_rate": 0.00019539908673978052, "loss": 7.5308, "step": 85240 }, { "epoch": 10.25872442839952, "grad_norm": 7.048343658447266, "learning_rate": 0.00019539794601498102, "loss": 7.4571, "step": 85250 }, { "epoch": 10.259927797833935, "grad_norm": 3.191307306289673, "learning_rate": 0.00019539680515211715, "loss": 7.479, "step": 85260 }, { "epoch": 10.261131167268351, "grad_norm": 5.28131628036499, "learning_rate": 0.00019539566415119057, "loss": 7.4891, "step": 85270 }, { "epoch": 10.262334536702769, "grad_norm": 4.179459095001221, "learning_rate": 0.0001953945230122029, "loss": 7.4649, "step": 85280 }, { "epoch": 10.263537906137184, "grad_norm": 3.379351854324341, "learning_rate": 0.0001953933817351558, "loss": 7.4909, "step": 85290 }, { "epoch": 10.2647412755716, "grad_norm": 9.762781143188477, "learning_rate": 0.00019539224032005093, "loss": 7.4819, "step": 85300 }, { "epoch": 10.265944645006018, "grad_norm": 8.950075149536133, "learning_rate": 0.00019539109876688993, "loss": 7.5743, "step": 85310 }, { "epoch": 10.267148014440433, "grad_norm": 9.266764640808105, "learning_rate": 0.00019538995707567446, "loss": 7.493, "step": 85320 }, { "epoch": 10.26835138387485, "grad_norm": 2.029279947280884, "learning_rate": 0.00019538881524640614, "loss": 7.4323, "step": 85330 }, { "epoch": 10.269554753309265, "grad_norm": 1.764323115348816, "learning_rate": 0.00019538767327908672, "loss": 7.3986, "step": 85340 }, { "epoch": 10.270758122743683, "grad_norm": 3.6983046531677246, "learning_rate": 0.00019538653117371775, "loss": 7.4004, "step": 85350 }, { "epoch": 10.271961492178098, "grad_norm": 1.9362916946411133, "learning_rate": 0.00019538538893030093, "loss": 7.4334, "step": 85360 }, { "epoch": 10.273164861612514, "grad_norm": 3.71360445022583, "learning_rate": 0.0001953842465488379, "loss": 7.5023, "step": 85370 }, { "epoch": 10.274368231046932, "grad_norm": 3.131895065307617, "learning_rate": 0.0001953831040293303, "loss": 7.5417, "step": 85380 }, { "epoch": 10.275571600481348, "grad_norm": 1.9646403789520264, "learning_rate": 0.0001953819613717798, "loss": 7.4674, "step": 85390 }, { "epoch": 10.276774969915763, "grad_norm": 2.816831111907959, "learning_rate": 0.00019538081857618805, "loss": 7.4984, "step": 85400 }, { "epoch": 10.277978339350181, "grad_norm": 1.4591765403747559, "learning_rate": 0.00019537967564255671, "loss": 7.4286, "step": 85410 }, { "epoch": 10.279181708784597, "grad_norm": 7.126994609832764, "learning_rate": 0.00019537853257088744, "loss": 7.4799, "step": 85420 }, { "epoch": 10.280385078219012, "grad_norm": 1.5111916065216064, "learning_rate": 0.00019537738936118188, "loss": 7.4561, "step": 85430 }, { "epoch": 10.28158844765343, "grad_norm": 3.422874927520752, "learning_rate": 0.00019537624601344172, "loss": 7.5421, "step": 85440 }, { "epoch": 10.282791817087846, "grad_norm": 8.737654685974121, "learning_rate": 0.00019537510252766853, "loss": 7.4657, "step": 85450 }, { "epoch": 10.283995186522262, "grad_norm": 2.050358533859253, "learning_rate": 0.00019537395890386404, "loss": 7.5599, "step": 85460 }, { "epoch": 10.28519855595668, "grad_norm": 5.419197082519531, "learning_rate": 0.0001953728151420299, "loss": 7.3527, "step": 85470 }, { "epoch": 10.286401925391095, "grad_norm": 4.262731552124023, "learning_rate": 0.00019537167124216774, "loss": 7.5019, "step": 85480 }, { "epoch": 10.28760529482551, "grad_norm": 1.5936861038208008, "learning_rate": 0.00019537052720427922, "loss": 7.5104, "step": 85490 }, { "epoch": 10.288808664259928, "grad_norm": 6.046593189239502, "learning_rate": 0.000195369383028366, "loss": 7.4177, "step": 85500 }, { "epoch": 10.290012033694344, "grad_norm": 4.11210298538208, "learning_rate": 0.00019536823871442978, "loss": 7.4079, "step": 85510 }, { "epoch": 10.29121540312876, "grad_norm": 4.220069885253906, "learning_rate": 0.00019536709426247212, "loss": 7.4925, "step": 85520 }, { "epoch": 10.292418772563177, "grad_norm": 2.521742105484009, "learning_rate": 0.00019536594967249475, "loss": 7.4716, "step": 85530 }, { "epoch": 10.293622141997593, "grad_norm": 2.1066694259643555, "learning_rate": 0.0001953648049444993, "loss": 7.495, "step": 85540 }, { "epoch": 10.294825511432009, "grad_norm": 4.6667656898498535, "learning_rate": 0.00019536366007848744, "loss": 7.4612, "step": 85550 }, { "epoch": 10.296028880866427, "grad_norm": 1.8581546545028687, "learning_rate": 0.00019536251507446082, "loss": 7.479, "step": 85560 }, { "epoch": 10.297232250300842, "grad_norm": 2.111947536468506, "learning_rate": 0.00019536136993242106, "loss": 7.4875, "step": 85570 }, { "epoch": 10.298435619735258, "grad_norm": 3.2716856002807617, "learning_rate": 0.0001953602246523699, "loss": 7.5409, "step": 85580 }, { "epoch": 10.299638989169676, "grad_norm": 3.554607629776001, "learning_rate": 0.0001953590792343089, "loss": 7.5228, "step": 85590 }, { "epoch": 10.300842358604092, "grad_norm": 3.1566975116729736, "learning_rate": 0.00019535793367823985, "loss": 7.4526, "step": 85600 }, { "epoch": 10.302045728038507, "grad_norm": 2.891200542449951, "learning_rate": 0.0001953567879841643, "loss": 7.4107, "step": 85610 }, { "epoch": 10.303249097472925, "grad_norm": 3.0826222896575928, "learning_rate": 0.00019535564215208387, "loss": 7.4472, "step": 85620 }, { "epoch": 10.30445246690734, "grad_norm": 4.875820159912109, "learning_rate": 0.0001953544961820003, "loss": 7.5037, "step": 85630 }, { "epoch": 10.305655836341757, "grad_norm": 1.9762930870056152, "learning_rate": 0.0001953533500739153, "loss": 7.388, "step": 85640 }, { "epoch": 10.306859205776174, "grad_norm": 2.4597346782684326, "learning_rate": 0.0001953522038278304, "loss": 7.5856, "step": 85650 }, { "epoch": 10.30806257521059, "grad_norm": 1.838729739189148, "learning_rate": 0.00019535105744374734, "loss": 7.545, "step": 85660 }, { "epoch": 10.309265944645006, "grad_norm": 1.7955926656723022, "learning_rate": 0.00019534991092166778, "loss": 7.5446, "step": 85670 }, { "epoch": 10.310469314079423, "grad_norm": 2.9095957279205322, "learning_rate": 0.00019534876426159333, "loss": 7.414, "step": 85680 }, { "epoch": 10.311672683513839, "grad_norm": 1.6297860145568848, "learning_rate": 0.0001953476174635257, "loss": 7.4953, "step": 85690 }, { "epoch": 10.312876052948255, "grad_norm": 3.330533504486084, "learning_rate": 0.0001953464705274665, "loss": 7.4244, "step": 85700 }, { "epoch": 10.314079422382672, "grad_norm": 2.861637830734253, "learning_rate": 0.00019534532345341743, "loss": 7.4212, "step": 85710 }, { "epoch": 10.315282791817088, "grad_norm": 2507.746826171875, "learning_rate": 0.00019534417624138012, "loss": 7.4614, "step": 85720 }, { "epoch": 10.316486161251504, "grad_norm": 7.7929301261901855, "learning_rate": 0.00019534302889135627, "loss": 7.409, "step": 85730 }, { "epoch": 10.31768953068592, "grad_norm": 6.382420063018799, "learning_rate": 0.0001953418814033475, "loss": 7.4432, "step": 85740 }, { "epoch": 10.318892900120337, "grad_norm": 19.105998992919922, "learning_rate": 0.0001953407337773555, "loss": 7.4761, "step": 85750 }, { "epoch": 10.320096269554753, "grad_norm": 5.199395179748535, "learning_rate": 0.00019533958601338194, "loss": 7.3907, "step": 85760 }, { "epoch": 10.321299638989169, "grad_norm": 5.394845485687256, "learning_rate": 0.00019533843811142843, "loss": 7.4173, "step": 85770 }, { "epoch": 10.322503008423586, "grad_norm": 6.040703773498535, "learning_rate": 0.00019533729007149665, "loss": 7.4228, "step": 85780 }, { "epoch": 10.323706377858002, "grad_norm": 3.328591823577881, "learning_rate": 0.0001953361418935883, "loss": 7.4293, "step": 85790 }, { "epoch": 10.324909747292418, "grad_norm": 2.576733112335205, "learning_rate": 0.00019533499357770503, "loss": 7.3564, "step": 85800 }, { "epoch": 10.326113116726836, "grad_norm": 85.13755798339844, "learning_rate": 0.00019533384512384843, "loss": 7.4474, "step": 85810 }, { "epoch": 10.327316486161251, "grad_norm": 8.398889541625977, "learning_rate": 0.00019533269653202026, "loss": 7.4373, "step": 85820 }, { "epoch": 10.328519855595667, "grad_norm": 4.141637802124023, "learning_rate": 0.00019533154780222213, "loss": 7.4746, "step": 85830 }, { "epoch": 10.329723225030085, "grad_norm": 5.044012546539307, "learning_rate": 0.00019533039893445572, "loss": 7.4292, "step": 85840 }, { "epoch": 10.3309265944645, "grad_norm": 5.562819480895996, "learning_rate": 0.0001953292499287227, "loss": 7.4262, "step": 85850 }, { "epoch": 10.332129963898916, "grad_norm": 3.3015358448028564, "learning_rate": 0.0001953281007850247, "loss": 7.3704, "step": 85860 }, { "epoch": 10.333333333333334, "grad_norm": 1.2157748937606812, "learning_rate": 0.0001953269515033634, "loss": 7.4286, "step": 85870 }, { "epoch": 10.33453670276775, "grad_norm": 31.767892837524414, "learning_rate": 0.00019532580208374048, "loss": 7.3897, "step": 85880 }, { "epoch": 10.335740072202166, "grad_norm": 8.570412635803223, "learning_rate": 0.00019532465252615757, "loss": 7.4112, "step": 85890 }, { "epoch": 10.336943441636583, "grad_norm": 6.177369594573975, "learning_rate": 0.00019532350283061638, "loss": 7.4998, "step": 85900 }, { "epoch": 10.338146811070999, "grad_norm": 1302.7275390625, "learning_rate": 0.0001953223529971185, "loss": 7.2976, "step": 85910 }, { "epoch": 10.339350180505415, "grad_norm": 469.8651123046875, "learning_rate": 0.0001953212030256657, "loss": 7.4372, "step": 85920 }, { "epoch": 10.340553549939832, "grad_norm": 4.476493835449219, "learning_rate": 0.00019532005291625956, "loss": 7.448, "step": 85930 }, { "epoch": 10.341756919374248, "grad_norm": 2.6381514072418213, "learning_rate": 0.00019531890266890177, "loss": 7.533, "step": 85940 }, { "epoch": 10.342960288808664, "grad_norm": 8.27548885345459, "learning_rate": 0.00019531775228359398, "loss": 7.381, "step": 85950 }, { "epoch": 10.344163658243081, "grad_norm": 6.770925998687744, "learning_rate": 0.0001953166017603379, "loss": 7.4246, "step": 85960 }, { "epoch": 10.345367027677497, "grad_norm": 6.874700546264648, "learning_rate": 0.00019531545109913512, "loss": 7.4873, "step": 85970 }, { "epoch": 10.346570397111913, "grad_norm": 2.744999885559082, "learning_rate": 0.00019531430029998739, "loss": 7.3974, "step": 85980 }, { "epoch": 10.34777376654633, "grad_norm": 29.037128448486328, "learning_rate": 0.0001953131493628963, "loss": 7.3437, "step": 85990 }, { "epoch": 10.348977135980746, "grad_norm": 22.944522857666016, "learning_rate": 0.0001953119982878636, "loss": 7.3648, "step": 86000 }, { "epoch": 10.350180505415162, "grad_norm": 4.095037937164307, "learning_rate": 0.00019531084707489086, "loss": 7.3007, "step": 86010 }, { "epoch": 10.35138387484958, "grad_norm": 5.845776081085205, "learning_rate": 0.00019530969572397982, "loss": 7.3914, "step": 86020 }, { "epoch": 10.352587244283995, "grad_norm": 206.785888671875, "learning_rate": 0.0001953085442351321, "loss": 7.3753, "step": 86030 }, { "epoch": 10.353790613718411, "grad_norm": 2.7486515045166016, "learning_rate": 0.00019530739260834937, "loss": 7.4032, "step": 86040 }, { "epoch": 10.354993983152827, "grad_norm": 5.12001895904541, "learning_rate": 0.00019530624084363336, "loss": 7.4674, "step": 86050 }, { "epoch": 10.356197352587245, "grad_norm": 4.1538262367248535, "learning_rate": 0.00019530508894098568, "loss": 7.3124, "step": 86060 }, { "epoch": 10.35740072202166, "grad_norm": 26.622573852539062, "learning_rate": 0.00019530393690040797, "loss": 7.3205, "step": 86070 }, { "epoch": 10.358604091456076, "grad_norm": 24.624265670776367, "learning_rate": 0.00019530278472190194, "loss": 7.356, "step": 86080 }, { "epoch": 10.359807460890494, "grad_norm": 1126.248046875, "learning_rate": 0.0001953016324054693, "loss": 7.3292, "step": 86090 }, { "epoch": 10.36101083032491, "grad_norm": 12.730032920837402, "learning_rate": 0.00019530047995111163, "loss": 7.3581, "step": 86100 }, { "epoch": 10.362214199759325, "grad_norm": 2211.71435546875, "learning_rate": 0.00019529932735883065, "loss": 7.4205, "step": 86110 }, { "epoch": 10.363417569193743, "grad_norm": 1.5781265497207642, "learning_rate": 0.000195298174628628, "loss": 7.4111, "step": 86120 }, { "epoch": 10.364620938628159, "grad_norm": 3.420382261276245, "learning_rate": 0.00019529702176050538, "loss": 7.3086, "step": 86130 }, { "epoch": 10.365824308062574, "grad_norm": 4.60121488571167, "learning_rate": 0.00019529586875446442, "loss": 7.298, "step": 86140 }, { "epoch": 10.367027677496992, "grad_norm": 6.83242130279541, "learning_rate": 0.0001952947156105068, "loss": 7.3454, "step": 86150 }, { "epoch": 10.368231046931408, "grad_norm": 50.682098388671875, "learning_rate": 0.00019529356232863423, "loss": 7.39, "step": 86160 }, { "epoch": 10.369434416365824, "grad_norm": 2.5132851600646973, "learning_rate": 0.00019529240890884834, "loss": 7.4204, "step": 86170 }, { "epoch": 10.370637785800241, "grad_norm": 6.81795072555542, "learning_rate": 0.00019529125535115084, "loss": 7.3806, "step": 86180 }, { "epoch": 10.371841155234657, "grad_norm": 2.5079987049102783, "learning_rate": 0.00019529010165554329, "loss": 7.2356, "step": 86190 }, { "epoch": 10.373044524669073, "grad_norm": 75.7482681274414, "learning_rate": 0.0001952889478220275, "loss": 7.3059, "step": 86200 }, { "epoch": 10.37424789410349, "grad_norm": 6.17049503326416, "learning_rate": 0.00019528779385060506, "loss": 7.3547, "step": 86210 }, { "epoch": 10.375451263537906, "grad_norm": 110.1769027709961, "learning_rate": 0.00019528663974127764, "loss": 7.204, "step": 86220 }, { "epoch": 10.376654632972322, "grad_norm": 3.8423500061035156, "learning_rate": 0.00019528548549404694, "loss": 7.3803, "step": 86230 }, { "epoch": 10.37785800240674, "grad_norm": 680.0330810546875, "learning_rate": 0.00019528433110891461, "loss": 7.363, "step": 86240 }, { "epoch": 10.379061371841155, "grad_norm": 2.896757125854492, "learning_rate": 0.00019528317658588234, "loss": 7.2612, "step": 86250 }, { "epoch": 10.380264741275571, "grad_norm": 2.5943007469177246, "learning_rate": 0.00019528202192495178, "loss": 7.3528, "step": 86260 }, { "epoch": 10.381468110709989, "grad_norm": 10.391607284545898, "learning_rate": 0.0001952808671261246, "loss": 7.3012, "step": 86270 }, { "epoch": 10.382671480144404, "grad_norm": 4.298302173614502, "learning_rate": 0.0001952797121894025, "loss": 7.3386, "step": 86280 }, { "epoch": 10.38387484957882, "grad_norm": 22.879108428955078, "learning_rate": 0.00019527855711478712, "loss": 7.3371, "step": 86290 }, { "epoch": 10.385078219013238, "grad_norm": 4.031415939331055, "learning_rate": 0.00019527740190228017, "loss": 7.3839, "step": 86300 }, { "epoch": 10.386281588447654, "grad_norm": 3.8353116512298584, "learning_rate": 0.00019527624655188327, "loss": 7.3102, "step": 86310 }, { "epoch": 10.38748495788207, "grad_norm": 28.881017684936523, "learning_rate": 0.00019527509106359814, "loss": 7.2641, "step": 86320 }, { "epoch": 10.388688327316487, "grad_norm": 3.1833651065826416, "learning_rate": 0.0001952739354374264, "loss": 7.3326, "step": 86330 }, { "epoch": 10.389891696750903, "grad_norm": 4.188170909881592, "learning_rate": 0.00019527277967336975, "loss": 7.334, "step": 86340 }, { "epoch": 10.391095066185319, "grad_norm": 101.34941864013672, "learning_rate": 0.00019527162377142992, "loss": 7.2211, "step": 86350 }, { "epoch": 10.392298435619736, "grad_norm": 89.14458465576172, "learning_rate": 0.00019527046773160848, "loss": 7.3642, "step": 86360 }, { "epoch": 10.393501805054152, "grad_norm": 11.451932907104492, "learning_rate": 0.0001952693115539072, "loss": 7.3821, "step": 86370 }, { "epoch": 10.394705174488568, "grad_norm": 102.76563262939453, "learning_rate": 0.00019526815523832763, "loss": 7.391, "step": 86380 }, { "epoch": 10.395908543922985, "grad_norm": 9.27910327911377, "learning_rate": 0.00019526699878487155, "loss": 7.3022, "step": 86390 }, { "epoch": 10.397111913357401, "grad_norm": 12.482718467712402, "learning_rate": 0.00019526584219354062, "loss": 7.2903, "step": 86400 }, { "epoch": 10.398315282791817, "grad_norm": 5.972568511962891, "learning_rate": 0.0001952646854643365, "loss": 7.3264, "step": 86410 }, { "epoch": 10.399518652226233, "grad_norm": 6.8508076667785645, "learning_rate": 0.00019526352859726082, "loss": 7.3016, "step": 86420 }, { "epoch": 10.40072202166065, "grad_norm": 2.6398284435272217, "learning_rate": 0.00019526237159231532, "loss": 7.3302, "step": 86430 }, { "epoch": 10.401925391095066, "grad_norm": 24.66536521911621, "learning_rate": 0.00019526121444950168, "loss": 7.31, "step": 86440 }, { "epoch": 10.403128760529482, "grad_norm": 9.506525993347168, "learning_rate": 0.00019526005716882149, "loss": 7.2653, "step": 86450 }, { "epoch": 10.4043321299639, "grad_norm": 91.26265716552734, "learning_rate": 0.0001952588997502765, "loss": 7.3002, "step": 86460 }, { "epoch": 10.405535499398315, "grad_norm": 11.353433609008789, "learning_rate": 0.00019525774219386838, "loss": 7.4245, "step": 86470 }, { "epoch": 10.406738868832731, "grad_norm": 12.2548828125, "learning_rate": 0.00019525658449959878, "loss": 7.2518, "step": 86480 }, { "epoch": 10.407942238267148, "grad_norm": 13.156693458557129, "learning_rate": 0.00019525542666746937, "loss": 7.2892, "step": 86490 }, { "epoch": 10.409145607701564, "grad_norm": 127.03163146972656, "learning_rate": 0.00019525426869748184, "loss": 7.1979, "step": 86500 }, { "epoch": 10.41034897713598, "grad_norm": 387.25146484375, "learning_rate": 0.0001952531105896379, "loss": 7.3386, "step": 86510 }, { "epoch": 10.411552346570398, "grad_norm": 134.3849639892578, "learning_rate": 0.00019525195234393916, "loss": 7.4107, "step": 86520 }, { "epoch": 10.412755716004813, "grad_norm": 57.4593505859375, "learning_rate": 0.00019525079396038732, "loss": 7.3787, "step": 86530 }, { "epoch": 10.41395908543923, "grad_norm": 528.2086791992188, "learning_rate": 0.00019524963543898408, "loss": 7.3134, "step": 86540 }, { "epoch": 10.415162454873647, "grad_norm": 8.248697280883789, "learning_rate": 0.0001952484767797311, "loss": 7.4736, "step": 86550 }, { "epoch": 10.416365824308063, "grad_norm": 26.072460174560547, "learning_rate": 0.00019524731798263006, "loss": 7.3903, "step": 86560 }, { "epoch": 10.417569193742478, "grad_norm": 339.4084777832031, "learning_rate": 0.00019524615904768262, "loss": 7.3795, "step": 86570 }, { "epoch": 10.418772563176896, "grad_norm": 2.1741859912872314, "learning_rate": 0.00019524499997489048, "loss": 7.3697, "step": 86580 }, { "epoch": 10.419975932611312, "grad_norm": 14.492084503173828, "learning_rate": 0.00019524384076425534, "loss": 7.267, "step": 86590 }, { "epoch": 10.421179302045728, "grad_norm": 9.385497093200684, "learning_rate": 0.0001952426814157788, "loss": 7.3738, "step": 86600 }, { "epoch": 10.422382671480145, "grad_norm": 138.29335021972656, "learning_rate": 0.00019524152192946262, "loss": 7.3135, "step": 86610 }, { "epoch": 10.42358604091456, "grad_norm": 11.764824867248535, "learning_rate": 0.00019524036230530843, "loss": 7.4418, "step": 86620 }, { "epoch": 10.424789410348977, "grad_norm": 203.98117065429688, "learning_rate": 0.0001952392025433179, "loss": 7.388, "step": 86630 }, { "epoch": 10.425992779783394, "grad_norm": 40.192779541015625, "learning_rate": 0.00019523804264349277, "loss": 7.3609, "step": 86640 }, { "epoch": 10.42719614921781, "grad_norm": 426.7164306640625, "learning_rate": 0.00019523688260583466, "loss": 7.4305, "step": 86650 }, { "epoch": 10.428399518652226, "grad_norm": 1.9205007553100586, "learning_rate": 0.00019523572243034524, "loss": 7.3843, "step": 86660 }, { "epoch": 10.429602888086643, "grad_norm": 39.38263702392578, "learning_rate": 0.00019523456211702626, "loss": 7.3838, "step": 86670 }, { "epoch": 10.43080625752106, "grad_norm": 16.812620162963867, "learning_rate": 0.00019523340166587936, "loss": 7.3231, "step": 86680 }, { "epoch": 10.432009626955475, "grad_norm": 6.904186248779297, "learning_rate": 0.00019523224107690618, "loss": 7.3075, "step": 86690 }, { "epoch": 10.433212996389893, "grad_norm": 41.70980453491211, "learning_rate": 0.00019523108035010846, "loss": 7.4046, "step": 86700 }, { "epoch": 10.434416365824308, "grad_norm": 685.0161743164062, "learning_rate": 0.00019522991948548785, "loss": 7.2371, "step": 86710 }, { "epoch": 10.435619735258724, "grad_norm": 13.445799827575684, "learning_rate": 0.00019522875848304602, "loss": 7.2452, "step": 86720 }, { "epoch": 10.43682310469314, "grad_norm": 168.27622985839844, "learning_rate": 0.00019522759734278466, "loss": 7.2095, "step": 86730 }, { "epoch": 10.438026474127557, "grad_norm": 24.310766220092773, "learning_rate": 0.0001952264360647055, "loss": 7.2637, "step": 86740 }, { "epoch": 10.439229843561973, "grad_norm": 31.537172317504883, "learning_rate": 0.00019522527464881014, "loss": 7.3222, "step": 86750 }, { "epoch": 10.440433212996389, "grad_norm": 9.848044395446777, "learning_rate": 0.0001952241130951003, "loss": 7.3228, "step": 86760 }, { "epoch": 10.441636582430807, "grad_norm": 2950.752197265625, "learning_rate": 0.00019522295140357766, "loss": 7.2449, "step": 86770 }, { "epoch": 10.442839951865222, "grad_norm": 8.810911178588867, "learning_rate": 0.0001952217895742439, "loss": 7.3124, "step": 86780 }, { "epoch": 10.444043321299638, "grad_norm": 169.3764190673828, "learning_rate": 0.00019522062760710072, "loss": 7.2767, "step": 86790 }, { "epoch": 10.445246690734056, "grad_norm": 8.374594688415527, "learning_rate": 0.00019521946550214977, "loss": 7.3505, "step": 86800 }, { "epoch": 10.446450060168472, "grad_norm": 29.65797233581543, "learning_rate": 0.00019521830325939274, "loss": 7.3024, "step": 86810 }, { "epoch": 10.447653429602887, "grad_norm": 2913.634765625, "learning_rate": 0.0001952171408788313, "loss": 7.311, "step": 86820 }, { "epoch": 10.448856799037305, "grad_norm": 94.82404327392578, "learning_rate": 0.00019521597836046718, "loss": 7.2979, "step": 86830 }, { "epoch": 10.45006016847172, "grad_norm": 16.484973907470703, "learning_rate": 0.00019521481570430201, "loss": 7.3126, "step": 86840 }, { "epoch": 10.451263537906136, "grad_norm": 9.324202537536621, "learning_rate": 0.00019521365291033752, "loss": 7.361, "step": 86850 }, { "epoch": 10.452466907340554, "grad_norm": 236.1169891357422, "learning_rate": 0.00019521248997857535, "loss": 7.2888, "step": 86860 }, { "epoch": 10.45367027677497, "grad_norm": 42.557838439941406, "learning_rate": 0.00019521132690901718, "loss": 7.2597, "step": 86870 }, { "epoch": 10.454873646209386, "grad_norm": 63.07026672363281, "learning_rate": 0.00019521016370166474, "loss": 7.385, "step": 86880 }, { "epoch": 10.456077015643803, "grad_norm": 9.713822364807129, "learning_rate": 0.00019520900035651967, "loss": 7.399, "step": 86890 }, { "epoch": 10.457280385078219, "grad_norm": 24.991436004638672, "learning_rate": 0.00019520783687358367, "loss": 7.458, "step": 86900 }, { "epoch": 10.458483754512635, "grad_norm": 71.0694580078125, "learning_rate": 0.00019520667325285845, "loss": 7.4802, "step": 86910 }, { "epoch": 10.459687123947052, "grad_norm": 257.9768371582031, "learning_rate": 0.00019520550949434564, "loss": 7.5513, "step": 86920 }, { "epoch": 10.460890493381468, "grad_norm": 10389.453125, "learning_rate": 0.00019520434559804696, "loss": 7.5223, "step": 86930 }, { "epoch": 10.462093862815884, "grad_norm": 16.677762985229492, "learning_rate": 0.0001952031815639641, "loss": 7.5917, "step": 86940 }, { "epoch": 10.463297232250302, "grad_norm": 26.780977249145508, "learning_rate": 0.00019520201739209873, "loss": 7.5718, "step": 86950 }, { "epoch": 10.464500601684717, "grad_norm": 261.8631896972656, "learning_rate": 0.0001952008530824525, "loss": 7.4584, "step": 86960 }, { "epoch": 10.465703971119133, "grad_norm": 106.22891235351562, "learning_rate": 0.0001951996886350272, "loss": 7.5674, "step": 86970 }, { "epoch": 10.46690734055355, "grad_norm": 335.2868347167969, "learning_rate": 0.00019519852404982437, "loss": 7.4891, "step": 86980 }, { "epoch": 10.468110709987966, "grad_norm": 185.00413513183594, "learning_rate": 0.00019519735932684582, "loss": 7.4306, "step": 86990 }, { "epoch": 10.469314079422382, "grad_norm": 385.6830749511719, "learning_rate": 0.00019519619446609315, "loss": 7.5965, "step": 87000 }, { "epoch": 10.4705174488568, "grad_norm": 3.527390480041504, "learning_rate": 0.00019519502946756815, "loss": 7.5177, "step": 87010 }, { "epoch": 10.471720818291216, "grad_norm": 157.47364807128906, "learning_rate": 0.00019519386433127238, "loss": 7.5277, "step": 87020 }, { "epoch": 10.472924187725631, "grad_norm": 158.25482177734375, "learning_rate": 0.0001951926990572076, "loss": 7.4535, "step": 87030 }, { "epoch": 10.474127557160049, "grad_norm": 120.78103637695312, "learning_rate": 0.00019519153364537547, "loss": 7.5546, "step": 87040 }, { "epoch": 10.475330926594465, "grad_norm": 13.872295379638672, "learning_rate": 0.00019519036809577771, "loss": 7.5583, "step": 87050 }, { "epoch": 10.47653429602888, "grad_norm": 187.38462829589844, "learning_rate": 0.000195189202408416, "loss": 7.4062, "step": 87060 }, { "epoch": 10.477737665463298, "grad_norm": 152.8062286376953, "learning_rate": 0.00019518803658329198, "loss": 7.405, "step": 87070 }, { "epoch": 10.478941034897714, "grad_norm": 9.428537368774414, "learning_rate": 0.0001951868706204074, "loss": 7.4634, "step": 87080 }, { "epoch": 10.48014440433213, "grad_norm": 55.365570068359375, "learning_rate": 0.0001951857045197639, "loss": 7.4388, "step": 87090 }, { "epoch": 10.481347773766545, "grad_norm": 57.88100814819336, "learning_rate": 0.00019518453828136316, "loss": 7.5701, "step": 87100 }, { "epoch": 10.482551143200963, "grad_norm": 34.78044891357422, "learning_rate": 0.0001951833719052069, "loss": 7.4406, "step": 87110 }, { "epoch": 10.483754512635379, "grad_norm": 40.717681884765625, "learning_rate": 0.00019518220539129684, "loss": 7.4336, "step": 87120 }, { "epoch": 10.484957882069795, "grad_norm": 694.4647827148438, "learning_rate": 0.00019518103873963463, "loss": 7.4763, "step": 87130 }, { "epoch": 10.486161251504212, "grad_norm": 79.98706817626953, "learning_rate": 0.00019517987195022193, "loss": 7.4129, "step": 87140 }, { "epoch": 10.487364620938628, "grad_norm": 8.193269729614258, "learning_rate": 0.00019517870502306045, "loss": 7.4447, "step": 87150 }, { "epoch": 10.488567990373044, "grad_norm": 51494.640625, "learning_rate": 0.0001951775379581519, "loss": 7.5182, "step": 87160 }, { "epoch": 10.489771359807461, "grad_norm": 180.87831115722656, "learning_rate": 0.00019517637075549795, "loss": 7.4129, "step": 87170 }, { "epoch": 10.490974729241877, "grad_norm": 63.636260986328125, "learning_rate": 0.0001951752034151003, "loss": 7.5, "step": 87180 }, { "epoch": 10.492178098676293, "grad_norm": 14.191164016723633, "learning_rate": 0.00019517403593696063, "loss": 7.5157, "step": 87190 }, { "epoch": 10.49338146811071, "grad_norm": 28.7744140625, "learning_rate": 0.00019517286832108061, "loss": 7.445, "step": 87200 }, { "epoch": 10.494584837545126, "grad_norm": 37.393409729003906, "learning_rate": 0.00019517170056746196, "loss": 7.5317, "step": 87210 }, { "epoch": 10.495788206979542, "grad_norm": 6.052454471588135, "learning_rate": 0.0001951705326761064, "loss": 7.497, "step": 87220 }, { "epoch": 10.49699157641396, "grad_norm": 41.5262565612793, "learning_rate": 0.00019516936464701555, "loss": 7.4961, "step": 87230 }, { "epoch": 10.498194945848375, "grad_norm": 15.388359069824219, "learning_rate": 0.00019516819648019112, "loss": 7.4264, "step": 87240 }, { "epoch": 10.499398315282791, "grad_norm": 191.34458923339844, "learning_rate": 0.00019516702817563484, "loss": 7.4435, "step": 87250 }, { "epoch": 10.500601684717209, "grad_norm": 18.088212966918945, "learning_rate": 0.0001951658597333484, "loss": 7.5107, "step": 87260 }, { "epoch": 10.501805054151625, "grad_norm": 787.8485717773438, "learning_rate": 0.0001951646911533334, "loss": 7.5042, "step": 87270 }, { "epoch": 10.50300842358604, "grad_norm": 35.14695358276367, "learning_rate": 0.00019516352243559163, "loss": 7.5672, "step": 87280 }, { "epoch": 10.504211793020458, "grad_norm": 1208.171875, "learning_rate": 0.00019516235358012473, "loss": 7.4827, "step": 87290 }, { "epoch": 10.505415162454874, "grad_norm": 93.63135528564453, "learning_rate": 0.00019516118458693443, "loss": 7.5129, "step": 87300 }, { "epoch": 10.50661853188929, "grad_norm": 111.2813491821289, "learning_rate": 0.0001951600154560224, "loss": 7.5537, "step": 87310 }, { "epoch": 10.507821901323707, "grad_norm": 30.587112426757812, "learning_rate": 0.00019515884618739033, "loss": 7.5448, "step": 87320 }, { "epoch": 10.509025270758123, "grad_norm": 18.9459285736084, "learning_rate": 0.0001951576767810399, "loss": 7.4557, "step": 87330 }, { "epoch": 10.510228640192539, "grad_norm": 148.022216796875, "learning_rate": 0.00019515650723697284, "loss": 7.5728, "step": 87340 }, { "epoch": 10.511432009626956, "grad_norm": 8.671709060668945, "learning_rate": 0.00019515533755519083, "loss": 7.5929, "step": 87350 }, { "epoch": 10.512635379061372, "grad_norm": 99.92613220214844, "learning_rate": 0.0001951541677356955, "loss": 7.458, "step": 87360 }, { "epoch": 10.513838748495788, "grad_norm": 51.72817611694336, "learning_rate": 0.00019515299777848866, "loss": 7.5404, "step": 87370 }, { "epoch": 10.515042117930205, "grad_norm": 20.989768981933594, "learning_rate": 0.0001951518276835719, "loss": 7.4775, "step": 87380 }, { "epoch": 10.516245487364621, "grad_norm": 384.83258056640625, "learning_rate": 0.00019515065745094696, "loss": 7.4001, "step": 87390 }, { "epoch": 10.517448856799037, "grad_norm": 25.202699661254883, "learning_rate": 0.00019514948708061554, "loss": 7.4639, "step": 87400 }, { "epoch": 10.518652226233453, "grad_norm": 14.15613842010498, "learning_rate": 0.00019514831657257932, "loss": 7.4721, "step": 87410 }, { "epoch": 10.51985559566787, "grad_norm": 8.060870170593262, "learning_rate": 0.00019514714592684, "loss": 7.5325, "step": 87420 }, { "epoch": 10.521058965102286, "grad_norm": 41.303680419921875, "learning_rate": 0.00019514597514339925, "loss": 7.5139, "step": 87430 }, { "epoch": 10.522262334536702, "grad_norm": 2.9415969848632812, "learning_rate": 0.0001951448042222588, "loss": 7.5952, "step": 87440 }, { "epoch": 10.52346570397112, "grad_norm": 6.626253604888916, "learning_rate": 0.0001951436331634203, "loss": 7.5419, "step": 87450 }, { "epoch": 10.524669073405535, "grad_norm": 7.343200206756592, "learning_rate": 0.0001951424619668855, "loss": 7.4667, "step": 87460 }, { "epoch": 10.525872442839951, "grad_norm": 11.2632417678833, "learning_rate": 0.00019514129063265604, "loss": 7.4708, "step": 87470 }, { "epoch": 10.527075812274369, "grad_norm": 5.937346935272217, "learning_rate": 0.00019514011916073367, "loss": 7.5314, "step": 87480 }, { "epoch": 10.528279181708784, "grad_norm": 9.836857795715332, "learning_rate": 0.00019513894755112006, "loss": 7.5127, "step": 87490 }, { "epoch": 10.5294825511432, "grad_norm": 4.930632591247559, "learning_rate": 0.00019513777580381687, "loss": 7.5156, "step": 87500 }, { "epoch": 10.530685920577618, "grad_norm": 27.229860305786133, "learning_rate": 0.00019513660391882586, "loss": 7.6346, "step": 87510 }, { "epoch": 10.531889290012034, "grad_norm": 7.525158882141113, "learning_rate": 0.00019513543189614868, "loss": 7.5316, "step": 87520 }, { "epoch": 10.53309265944645, "grad_norm": 6.16154146194458, "learning_rate": 0.00019513425973578707, "loss": 7.4763, "step": 87530 }, { "epoch": 10.534296028880867, "grad_norm": 3.742701768875122, "learning_rate": 0.00019513308743774265, "loss": 7.5409, "step": 87540 }, { "epoch": 10.535499398315283, "grad_norm": 7.226905822753906, "learning_rate": 0.00019513191500201721, "loss": 7.5202, "step": 87550 }, { "epoch": 10.536702767749698, "grad_norm": 9.411243438720703, "learning_rate": 0.00019513074242861241, "loss": 7.4987, "step": 87560 }, { "epoch": 10.537906137184116, "grad_norm": 6.339433670043945, "learning_rate": 0.0001951295697175299, "loss": 7.5569, "step": 87570 }, { "epoch": 10.539109506618532, "grad_norm": 3.9573373794555664, "learning_rate": 0.0001951283968687714, "loss": 7.4227, "step": 87580 }, { "epoch": 10.540312876052948, "grad_norm": 6.60520601272583, "learning_rate": 0.00019512722388233868, "loss": 7.2681, "step": 87590 }, { "epoch": 10.541516245487365, "grad_norm": 9.820063591003418, "learning_rate": 0.00019512605075823337, "loss": 7.411, "step": 87600 }, { "epoch": 10.542719614921781, "grad_norm": 18.547088623046875, "learning_rate": 0.00019512487749645714, "loss": 7.3299, "step": 87610 }, { "epoch": 10.543922984356197, "grad_norm": 3.7740328311920166, "learning_rate": 0.00019512370409701174, "loss": 7.2751, "step": 87620 }, { "epoch": 10.545126353790614, "grad_norm": 39.1048698425293, "learning_rate": 0.00019512253055989888, "loss": 7.367, "step": 87630 }, { "epoch": 10.54632972322503, "grad_norm": 12.924285888671875, "learning_rate": 0.0001951213568851202, "loss": 7.3617, "step": 87640 }, { "epoch": 10.547533092659446, "grad_norm": 11.680988311767578, "learning_rate": 0.00019512018307267748, "loss": 7.3535, "step": 87650 }, { "epoch": 10.548736462093864, "grad_norm": 5.430835247039795, "learning_rate": 0.00019511900912257233, "loss": 7.3529, "step": 87660 }, { "epoch": 10.54993983152828, "grad_norm": 5.599913120269775, "learning_rate": 0.0001951178350348065, "loss": 7.387, "step": 87670 }, { "epoch": 10.551143200962695, "grad_norm": 9.078004837036133, "learning_rate": 0.00019511666080938167, "loss": 7.2856, "step": 87680 }, { "epoch": 10.552346570397113, "grad_norm": 7.466926097869873, "learning_rate": 0.00019511548644629955, "loss": 7.355, "step": 87690 }, { "epoch": 10.553549939831528, "grad_norm": 29.83484649658203, "learning_rate": 0.00019511431194556183, "loss": 7.4152, "step": 87700 }, { "epoch": 10.554753309265944, "grad_norm": 7.700797080993652, "learning_rate": 0.00019511313730717026, "loss": 7.308, "step": 87710 }, { "epoch": 10.555956678700362, "grad_norm": 10.354599952697754, "learning_rate": 0.00019511196253112645, "loss": 7.3433, "step": 87720 }, { "epoch": 10.557160048134778, "grad_norm": 13.304309844970703, "learning_rate": 0.00019511078761743218, "loss": 7.3175, "step": 87730 }, { "epoch": 10.558363417569193, "grad_norm": 11.474148750305176, "learning_rate": 0.0001951096125660891, "loss": 7.4401, "step": 87740 }, { "epoch": 10.559566787003611, "grad_norm": 33.384464263916016, "learning_rate": 0.00019510843737709894, "loss": 7.385, "step": 87750 }, { "epoch": 10.560770156438027, "grad_norm": 22.092769622802734, "learning_rate": 0.0001951072620504634, "loss": 7.3322, "step": 87760 }, { "epoch": 10.561973525872443, "grad_norm": 8.763447761535645, "learning_rate": 0.00019510608658618414, "loss": 7.3348, "step": 87770 }, { "epoch": 10.56317689530686, "grad_norm": 22.685483932495117, "learning_rate": 0.0001951049109842629, "loss": 7.3653, "step": 87780 }, { "epoch": 10.564380264741276, "grad_norm": 45.91816329956055, "learning_rate": 0.00019510373524470138, "loss": 7.3695, "step": 87790 }, { "epoch": 10.565583634175692, "grad_norm": 30.154470443725586, "learning_rate": 0.00019510255936750127, "loss": 7.3982, "step": 87800 }, { "epoch": 10.566787003610107, "grad_norm": 16.189109802246094, "learning_rate": 0.0001951013833526643, "loss": 7.3565, "step": 87810 }, { "epoch": 10.567990373044525, "grad_norm": 8.944977760314941, "learning_rate": 0.00019510020720019214, "loss": 7.4081, "step": 87820 }, { "epoch": 10.56919374247894, "grad_norm": 8.774030685424805, "learning_rate": 0.00019509903091008646, "loss": 7.2951, "step": 87830 }, { "epoch": 10.570397111913357, "grad_norm": 6.231395244598389, "learning_rate": 0.00019509785448234906, "loss": 7.3018, "step": 87840 }, { "epoch": 10.571600481347774, "grad_norm": 10.9324951171875, "learning_rate": 0.00019509667791698155, "loss": 7.3708, "step": 87850 }, { "epoch": 10.57280385078219, "grad_norm": 5.3968329429626465, "learning_rate": 0.00019509550121398568, "loss": 7.2815, "step": 87860 }, { "epoch": 10.574007220216606, "grad_norm": 5.910791873931885, "learning_rate": 0.00019509432437336312, "loss": 7.2696, "step": 87870 }, { "epoch": 10.575210589651023, "grad_norm": 12.487360954284668, "learning_rate": 0.00019509314739511564, "loss": 7.3683, "step": 87880 }, { "epoch": 10.57641395908544, "grad_norm": 10.230481147766113, "learning_rate": 0.00019509197027924485, "loss": 7.3155, "step": 87890 }, { "epoch": 10.577617328519855, "grad_norm": 20.70260238647461, "learning_rate": 0.00019509079302575252, "loss": 7.2053, "step": 87900 }, { "epoch": 10.578820697954272, "grad_norm": 5.648168563842773, "learning_rate": 0.00019508961563464034, "loss": 7.3536, "step": 87910 }, { "epoch": 10.580024067388688, "grad_norm": 11.615484237670898, "learning_rate": 0.00019508843810591003, "loss": 7.3168, "step": 87920 }, { "epoch": 10.581227436823104, "grad_norm": 44.52412033081055, "learning_rate": 0.00019508726043956324, "loss": 7.3478, "step": 87930 }, { "epoch": 10.582430806257522, "grad_norm": 15.564360618591309, "learning_rate": 0.0001950860826356017, "loss": 7.344, "step": 87940 }, { "epoch": 10.583634175691937, "grad_norm": 14.57740592956543, "learning_rate": 0.00019508490469402715, "loss": 7.4948, "step": 87950 }, { "epoch": 10.584837545126353, "grad_norm": 37.10041046142578, "learning_rate": 0.00019508372661484127, "loss": 7.3218, "step": 87960 }, { "epoch": 10.58604091456077, "grad_norm": 20.117599487304688, "learning_rate": 0.00019508254839804577, "loss": 7.4361, "step": 87970 }, { "epoch": 10.587244283995187, "grad_norm": 15.298394203186035, "learning_rate": 0.0001950813700436423, "loss": 7.335, "step": 87980 }, { "epoch": 10.588447653429602, "grad_norm": 10.638603210449219, "learning_rate": 0.00019508019155163264, "loss": 7.3783, "step": 87990 }, { "epoch": 10.58965102286402, "grad_norm": 31.82084083557129, "learning_rate": 0.00019507901292201848, "loss": 7.4785, "step": 88000 }, { "epoch": 10.590854392298436, "grad_norm": 27.33327293395996, "learning_rate": 0.0001950778341548015, "loss": 7.2916, "step": 88010 }, { "epoch": 10.592057761732852, "grad_norm": 9.71547794342041, "learning_rate": 0.00019507665524998342, "loss": 7.4436, "step": 88020 }, { "epoch": 10.593261131167269, "grad_norm": 18.68053436279297, "learning_rate": 0.00019507547620756593, "loss": 7.364, "step": 88030 }, { "epoch": 10.594464500601685, "grad_norm": 15.65990924835205, "learning_rate": 0.0001950742970275508, "loss": 7.3381, "step": 88040 }, { "epoch": 10.5956678700361, "grad_norm": 14.456318855285645, "learning_rate": 0.00019507311770993965, "loss": 7.3626, "step": 88050 }, { "epoch": 10.596871239470518, "grad_norm": 15.827197074890137, "learning_rate": 0.00019507193825473424, "loss": 7.308, "step": 88060 }, { "epoch": 10.598074608904934, "grad_norm": 13.547784805297852, "learning_rate": 0.00019507075866193624, "loss": 7.3595, "step": 88070 }, { "epoch": 10.59927797833935, "grad_norm": 25.571096420288086, "learning_rate": 0.0001950695789315474, "loss": 7.279, "step": 88080 }, { "epoch": 10.600481347773766, "grad_norm": 23.713600158691406, "learning_rate": 0.00019506839906356942, "loss": 7.35, "step": 88090 }, { "epoch": 10.601684717208183, "grad_norm": 40.97940444946289, "learning_rate": 0.000195067219058004, "loss": 7.4039, "step": 88100 }, { "epoch": 10.602888086642599, "grad_norm": 14.471735000610352, "learning_rate": 0.00019506603891485282, "loss": 7.4411, "step": 88110 }, { "epoch": 10.604091456077015, "grad_norm": 174.0249481201172, "learning_rate": 0.0001950648586341176, "loss": 7.4085, "step": 88120 }, { "epoch": 10.605294825511432, "grad_norm": 22.778398513793945, "learning_rate": 0.0001950636782158001, "loss": 7.4301, "step": 88130 }, { "epoch": 10.606498194945848, "grad_norm": 21.839618682861328, "learning_rate": 0.00019506249765990195, "loss": 7.3172, "step": 88140 }, { "epoch": 10.607701564380264, "grad_norm": 25.78067398071289, "learning_rate": 0.0001950613169664249, "loss": 7.3865, "step": 88150 }, { "epoch": 10.608904933814681, "grad_norm": 37.15629959106445, "learning_rate": 0.00019506013613537066, "loss": 7.2948, "step": 88160 }, { "epoch": 10.610108303249097, "grad_norm": 36.7580680847168, "learning_rate": 0.00019505895516674096, "loss": 7.3327, "step": 88170 }, { "epoch": 10.611311672683513, "grad_norm": 75.67523193359375, "learning_rate": 0.00019505777406053745, "loss": 7.2686, "step": 88180 }, { "epoch": 10.61251504211793, "grad_norm": 56.04183578491211, "learning_rate": 0.00019505659281676186, "loss": 7.3916, "step": 88190 }, { "epoch": 10.613718411552346, "grad_norm": 55.43891906738281, "learning_rate": 0.00019505541143541595, "loss": 7.2491, "step": 88200 }, { "epoch": 10.614921780986762, "grad_norm": 50.380245208740234, "learning_rate": 0.00019505422991650137, "loss": 7.4029, "step": 88210 }, { "epoch": 10.61612515042118, "grad_norm": 35.543277740478516, "learning_rate": 0.00019505304826001983, "loss": 7.3614, "step": 88220 }, { "epoch": 10.617328519855596, "grad_norm": 61.23712158203125, "learning_rate": 0.00019505186646597312, "loss": 7.4214, "step": 88230 }, { "epoch": 10.618531889290011, "grad_norm": 26.799434661865234, "learning_rate": 0.00019505068453436284, "loss": 7.324, "step": 88240 }, { "epoch": 10.619735258724429, "grad_norm": 43.014469146728516, "learning_rate": 0.00019504950246519075, "loss": 7.4568, "step": 88250 }, { "epoch": 10.620938628158845, "grad_norm": 268.6820983886719, "learning_rate": 0.00019504832025845858, "loss": 7.3789, "step": 88260 }, { "epoch": 10.62214199759326, "grad_norm": 48.661808013916016, "learning_rate": 0.00019504713791416805, "loss": 7.4168, "step": 88270 }, { "epoch": 10.623345367027678, "grad_norm": 72.69803619384766, "learning_rate": 0.00019504595543232082, "loss": 7.3598, "step": 88280 }, { "epoch": 10.624548736462094, "grad_norm": 101.9991455078125, "learning_rate": 0.00019504477281291862, "loss": 7.3906, "step": 88290 }, { "epoch": 10.62575210589651, "grad_norm": 204.7308349609375, "learning_rate": 0.00019504359005596315, "loss": 7.3497, "step": 88300 }, { "epoch": 10.626955475330927, "grad_norm": 112.8714599609375, "learning_rate": 0.0001950424071614562, "loss": 7.2988, "step": 88310 }, { "epoch": 10.628158844765343, "grad_norm": 229.57504272460938, "learning_rate": 0.00019504122412939938, "loss": 7.2327, "step": 88320 }, { "epoch": 10.629362214199759, "grad_norm": 247.52845764160156, "learning_rate": 0.00019504004095979445, "loss": 7.3221, "step": 88330 }, { "epoch": 10.630565583634176, "grad_norm": 152.36831665039062, "learning_rate": 0.0001950388576526431, "loss": 7.2489, "step": 88340 }, { "epoch": 10.631768953068592, "grad_norm": 147.5357208251953, "learning_rate": 0.00019503767420794708, "loss": 7.3202, "step": 88350 }, { "epoch": 10.632972322503008, "grad_norm": 181.00552368164062, "learning_rate": 0.00019503649062570808, "loss": 7.3037, "step": 88360 }, { "epoch": 10.634175691937426, "grad_norm": 67.28950500488281, "learning_rate": 0.00019503530690592781, "loss": 7.3349, "step": 88370 }, { "epoch": 10.635379061371841, "grad_norm": 208.2025909423828, "learning_rate": 0.000195034123048608, "loss": 7.3797, "step": 88380 }, { "epoch": 10.636582430806257, "grad_norm": 123.33839416503906, "learning_rate": 0.00019503293905375033, "loss": 7.3294, "step": 88390 }, { "epoch": 10.637785800240675, "grad_norm": 141.5004119873047, "learning_rate": 0.00019503175492135654, "loss": 7.2263, "step": 88400 }, { "epoch": 10.63898916967509, "grad_norm": 173.01364135742188, "learning_rate": 0.00019503057065142833, "loss": 7.2706, "step": 88410 }, { "epoch": 10.640192539109506, "grad_norm": 66.8580093383789, "learning_rate": 0.00019502938624396744, "loss": 7.438, "step": 88420 }, { "epoch": 10.641395908543924, "grad_norm": 209.61973571777344, "learning_rate": 0.00019502820169897556, "loss": 7.3632, "step": 88430 }, { "epoch": 10.64259927797834, "grad_norm": 132.6012420654297, "learning_rate": 0.0001950270170164544, "loss": 7.3851, "step": 88440 }, { "epoch": 10.643802647412755, "grad_norm": 149.63917541503906, "learning_rate": 0.0001950258321964057, "loss": 7.4109, "step": 88450 }, { "epoch": 10.645006016847173, "grad_norm": 141.02175903320312, "learning_rate": 0.00019502464723883116, "loss": 7.3332, "step": 88460 }, { "epoch": 10.646209386281589, "grad_norm": 161.88961791992188, "learning_rate": 0.0001950234621437325, "loss": 7.3588, "step": 88470 }, { "epoch": 10.647412755716005, "grad_norm": 250.7991180419922, "learning_rate": 0.0001950222769111114, "loss": 7.2971, "step": 88480 }, { "epoch": 10.648616125150422, "grad_norm": 95.60113525390625, "learning_rate": 0.00019502109154096963, "loss": 7.3376, "step": 88490 }, { "epoch": 10.649819494584838, "grad_norm": 187.06863403320312, "learning_rate": 0.0001950199060333089, "loss": 7.2651, "step": 88500 }, { "epoch": 10.651022864019254, "grad_norm": 115.3002700805664, "learning_rate": 0.00019501872038813086, "loss": 7.286, "step": 88510 }, { "epoch": 10.65222623345367, "grad_norm": 66.42501831054688, "learning_rate": 0.0001950175346054373, "loss": 7.3652, "step": 88520 }, { "epoch": 10.653429602888087, "grad_norm": 140.0968780517578, "learning_rate": 0.0001950163486852299, "loss": 7.2641, "step": 88530 }, { "epoch": 10.654632972322503, "grad_norm": 129.69400024414062, "learning_rate": 0.00019501516262751038, "loss": 7.3465, "step": 88540 }, { "epoch": 10.655836341756919, "grad_norm": 166.33387756347656, "learning_rate": 0.00019501397643228047, "loss": 7.3526, "step": 88550 }, { "epoch": 10.657039711191336, "grad_norm": 69.1812515258789, "learning_rate": 0.00019501279009954186, "loss": 7.286, "step": 88560 }, { "epoch": 10.658243080625752, "grad_norm": 142.8148651123047, "learning_rate": 0.00019501160362929632, "loss": 7.2852, "step": 88570 }, { "epoch": 10.659446450060168, "grad_norm": 141.66908264160156, "learning_rate": 0.0001950104170215455, "loss": 7.2608, "step": 88580 }, { "epoch": 10.660649819494585, "grad_norm": 51.66764450073242, "learning_rate": 0.00019500923027629118, "loss": 7.2969, "step": 88590 }, { "epoch": 10.661853188929001, "grad_norm": 109.10205078125, "learning_rate": 0.00019500804339353504, "loss": 7.3724, "step": 88600 }, { "epoch": 10.663056558363417, "grad_norm": 93.02632141113281, "learning_rate": 0.00019500685637327875, "loss": 7.2457, "step": 88610 }, { "epoch": 10.664259927797834, "grad_norm": 73.33341979980469, "learning_rate": 0.00019500566921552414, "loss": 7.3102, "step": 88620 }, { "epoch": 10.66546329723225, "grad_norm": 102.7935791015625, "learning_rate": 0.00019500448192027284, "loss": 7.3332, "step": 88630 }, { "epoch": 10.666666666666666, "grad_norm": 108.53228759765625, "learning_rate": 0.00019500329448752664, "loss": 7.3152, "step": 88640 }, { "epoch": 10.667870036101084, "grad_norm": 103.6209716796875, "learning_rate": 0.00019500210691728717, "loss": 7.2561, "step": 88650 }, { "epoch": 10.6690734055355, "grad_norm": 99.7047119140625, "learning_rate": 0.00019500091920955623, "loss": 7.2516, "step": 88660 }, { "epoch": 10.670276774969915, "grad_norm": 62.162837982177734, "learning_rate": 0.0001949997313643355, "loss": 7.2708, "step": 88670 }, { "epoch": 10.671480144404333, "grad_norm": 84.71681213378906, "learning_rate": 0.0001949985433816267, "loss": 7.2769, "step": 88680 }, { "epoch": 10.672683513838749, "grad_norm": 69.47581481933594, "learning_rate": 0.00019499735526143152, "loss": 7.2773, "step": 88690 }, { "epoch": 10.673886883273164, "grad_norm": 210.3521270751953, "learning_rate": 0.00019499616700375177, "loss": 7.3398, "step": 88700 }, { "epoch": 10.675090252707582, "grad_norm": 79.78470611572266, "learning_rate": 0.00019499497860858909, "loss": 7.1656, "step": 88710 }, { "epoch": 10.676293622141998, "grad_norm": 62.30799865722656, "learning_rate": 0.00019499379007594517, "loss": 7.2361, "step": 88720 }, { "epoch": 10.677496991576414, "grad_norm": 91.21034240722656, "learning_rate": 0.00019499260140582188, "loss": 7.3132, "step": 88730 }, { "epoch": 10.678700361010831, "grad_norm": 43.911380767822266, "learning_rate": 0.00019499141259822077, "loss": 7.3459, "step": 88740 }, { "epoch": 10.679903730445247, "grad_norm": 105.96215057373047, "learning_rate": 0.00019499022365314367, "loss": 7.2649, "step": 88750 }, { "epoch": 10.681107099879663, "grad_norm": 248.10601806640625, "learning_rate": 0.00019498903457059222, "loss": 7.2417, "step": 88760 }, { "epoch": 10.68231046931408, "grad_norm": 38.80183410644531, "learning_rate": 0.00019498784535056825, "loss": 7.2187, "step": 88770 }, { "epoch": 10.683513838748496, "grad_norm": 59.740116119384766, "learning_rate": 0.00019498665599307336, "loss": 7.2648, "step": 88780 }, { "epoch": 10.684717208182912, "grad_norm": 26.129091262817383, "learning_rate": 0.00019498546649810935, "loss": 7.2915, "step": 88790 }, { "epoch": 10.685920577617328, "grad_norm": 56.78028869628906, "learning_rate": 0.00019498427686567791, "loss": 7.3406, "step": 88800 }, { "epoch": 10.687123947051745, "grad_norm": 64.2637939453125, "learning_rate": 0.00019498308709578078, "loss": 7.3238, "step": 88810 }, { "epoch": 10.688327316486161, "grad_norm": 36.57625961303711, "learning_rate": 0.0001949818971884197, "loss": 7.3383, "step": 88820 }, { "epoch": 10.689530685920577, "grad_norm": 265.4883117675781, "learning_rate": 0.00019498070714359632, "loss": 7.3633, "step": 88830 }, { "epoch": 10.690734055354994, "grad_norm": 93.23102569580078, "learning_rate": 0.0001949795169613124, "loss": 7.3179, "step": 88840 }, { "epoch": 10.69193742478941, "grad_norm": 205.05117797851562, "learning_rate": 0.00019497832664156972, "loss": 7.4237, "step": 88850 }, { "epoch": 10.693140794223826, "grad_norm": 145.75830078125, "learning_rate": 0.00019497713618436992, "loss": 7.3747, "step": 88860 }, { "epoch": 10.694344163658243, "grad_norm": 68.23468017578125, "learning_rate": 0.00019497594558971475, "loss": 7.393, "step": 88870 }, { "epoch": 10.69554753309266, "grad_norm": 116.01805114746094, "learning_rate": 0.00019497475485760594, "loss": 7.4107, "step": 88880 }, { "epoch": 10.696750902527075, "grad_norm": 62.533447265625, "learning_rate": 0.00019497356398804524, "loss": 7.3809, "step": 88890 }, { "epoch": 10.697954271961493, "grad_norm": 168.87171936035156, "learning_rate": 0.0001949723729810343, "loss": 7.4853, "step": 88900 }, { "epoch": 10.699157641395908, "grad_norm": 166.12615966796875, "learning_rate": 0.00019497118183657493, "loss": 7.5827, "step": 88910 }, { "epoch": 10.700361010830324, "grad_norm": 50.531700134277344, "learning_rate": 0.00019496999055466878, "loss": 7.4548, "step": 88920 }, { "epoch": 10.701564380264742, "grad_norm": 149.77862548828125, "learning_rate": 0.0001949687991353176, "loss": 7.426, "step": 88930 }, { "epoch": 10.702767749699158, "grad_norm": 94.55686950683594, "learning_rate": 0.00019496760757852316, "loss": 7.4463, "step": 88940 }, { "epoch": 10.703971119133573, "grad_norm": 62.388206481933594, "learning_rate": 0.00019496641588428714, "loss": 7.4493, "step": 88950 }, { "epoch": 10.705174488567991, "grad_norm": 66.34426879882812, "learning_rate": 0.00019496522405261125, "loss": 7.3723, "step": 88960 }, { "epoch": 10.706377858002407, "grad_norm": 40.63010787963867, "learning_rate": 0.0001949640320834972, "loss": 7.3687, "step": 88970 }, { "epoch": 10.707581227436823, "grad_norm": 97.47332763671875, "learning_rate": 0.0001949628399769468, "loss": 7.3967, "step": 88980 }, { "epoch": 10.70878459687124, "grad_norm": 52.6695556640625, "learning_rate": 0.0001949616477329617, "loss": 7.3656, "step": 88990 }, { "epoch": 10.709987966305656, "grad_norm": 87.3397445678711, "learning_rate": 0.00019496045535154368, "loss": 7.3819, "step": 89000 }, { "epoch": 10.711191335740072, "grad_norm": 225.9004669189453, "learning_rate": 0.00019495926283269444, "loss": 7.3577, "step": 89010 }, { "epoch": 10.71239470517449, "grad_norm": 85.98999786376953, "learning_rate": 0.00019495807017641566, "loss": 7.3868, "step": 89020 }, { "epoch": 10.713598074608905, "grad_norm": 143.83712768554688, "learning_rate": 0.00019495687738270914, "loss": 7.3105, "step": 89030 }, { "epoch": 10.71480144404332, "grad_norm": 78.22279357910156, "learning_rate": 0.00019495568445157657, "loss": 7.2924, "step": 89040 }, { "epoch": 10.716004813477738, "grad_norm": 248.8448944091797, "learning_rate": 0.00019495449138301967, "loss": 7.3742, "step": 89050 }, { "epoch": 10.717208182912154, "grad_norm": 63.11293411254883, "learning_rate": 0.00019495329817704018, "loss": 7.369, "step": 89060 }, { "epoch": 10.71841155234657, "grad_norm": 67.78326416015625, "learning_rate": 0.00019495210483363984, "loss": 7.343, "step": 89070 }, { "epoch": 10.719614921780988, "grad_norm": 101.28987121582031, "learning_rate": 0.00019495091135282033, "loss": 7.3267, "step": 89080 }, { "epoch": 10.720818291215403, "grad_norm": 61.997962951660156, "learning_rate": 0.00019494971773458342, "loss": 7.3417, "step": 89090 }, { "epoch": 10.722021660649819, "grad_norm": 349.41046142578125, "learning_rate": 0.00019494852397893085, "loss": 7.3135, "step": 89100 }, { "epoch": 10.723225030084237, "grad_norm": 105.47533416748047, "learning_rate": 0.0001949473300858643, "loss": 7.3145, "step": 89110 }, { "epoch": 10.724428399518652, "grad_norm": 106.47731018066406, "learning_rate": 0.00019494613605538553, "loss": 7.3453, "step": 89120 }, { "epoch": 10.725631768953068, "grad_norm": 189.0004425048828, "learning_rate": 0.00019494494188749626, "loss": 7.3526, "step": 89130 }, { "epoch": 10.726835138387486, "grad_norm": 245.7454071044922, "learning_rate": 0.0001949437475821982, "loss": 7.3629, "step": 89140 }, { "epoch": 10.728038507821902, "grad_norm": 319.6542053222656, "learning_rate": 0.00019494255313949312, "loss": 7.4202, "step": 89150 }, { "epoch": 10.729241877256317, "grad_norm": 200.45176696777344, "learning_rate": 0.00019494135855938272, "loss": 7.3789, "step": 89160 }, { "epoch": 10.730445246690735, "grad_norm": 246.00425720214844, "learning_rate": 0.00019494016384186873, "loss": 7.3848, "step": 89170 }, { "epoch": 10.73164861612515, "grad_norm": 587.3739624023438, "learning_rate": 0.00019493896898695287, "loss": 7.3729, "step": 89180 }, { "epoch": 10.732851985559567, "grad_norm": 169.81912231445312, "learning_rate": 0.00019493777399463692, "loss": 7.4477, "step": 89190 }, { "epoch": 10.734055354993982, "grad_norm": 132.12127685546875, "learning_rate": 0.00019493657886492254, "loss": 7.3375, "step": 89200 }, { "epoch": 10.7352587244284, "grad_norm": 114.60580444335938, "learning_rate": 0.0001949353835978115, "loss": 7.384, "step": 89210 }, { "epoch": 10.736462093862816, "grad_norm": 85.80974578857422, "learning_rate": 0.00019493418819330552, "loss": 7.4829, "step": 89220 }, { "epoch": 10.737665463297231, "grad_norm": 102.380126953125, "learning_rate": 0.00019493299265140634, "loss": 7.3055, "step": 89230 }, { "epoch": 10.738868832731649, "grad_norm": 155.44085693359375, "learning_rate": 0.00019493179697211566, "loss": 7.3606, "step": 89240 }, { "epoch": 10.740072202166065, "grad_norm": 312.52703857421875, "learning_rate": 0.00019493060115543528, "loss": 7.4591, "step": 89250 }, { "epoch": 10.74127557160048, "grad_norm": 82.09359741210938, "learning_rate": 0.00019492940520136683, "loss": 7.2659, "step": 89260 }, { "epoch": 10.742478941034898, "grad_norm": 33.064884185791016, "learning_rate": 0.00019492820910991209, "loss": 7.4064, "step": 89270 }, { "epoch": 10.743682310469314, "grad_norm": 230.91871643066406, "learning_rate": 0.00019492701288107282, "loss": 7.3389, "step": 89280 }, { "epoch": 10.74488567990373, "grad_norm": 211.80934143066406, "learning_rate": 0.00019492581651485072, "loss": 7.4542, "step": 89290 }, { "epoch": 10.746089049338147, "grad_norm": 206.44613647460938, "learning_rate": 0.00019492462001124755, "loss": 7.2896, "step": 89300 }, { "epoch": 10.747292418772563, "grad_norm": 56.54222869873047, "learning_rate": 0.00019492342337026498, "loss": 7.3159, "step": 89310 }, { "epoch": 10.748495788206979, "grad_norm": 46.79085922241211, "learning_rate": 0.0001949222265919048, "loss": 7.5051, "step": 89320 }, { "epoch": 10.749699157641396, "grad_norm": 68.00275421142578, "learning_rate": 0.00019492102967616872, "loss": 7.4421, "step": 89330 }, { "epoch": 10.750902527075812, "grad_norm": 23.74716567993164, "learning_rate": 0.0001949198326230585, "loss": 7.3478, "step": 89340 }, { "epoch": 10.752105896510228, "grad_norm": 83.93447875976562, "learning_rate": 0.00019491863543257579, "loss": 7.3559, "step": 89350 }, { "epoch": 10.753309265944646, "grad_norm": 33.42729187011719, "learning_rate": 0.00019491743810472244, "loss": 7.3887, "step": 89360 }, { "epoch": 10.754512635379061, "grad_norm": 50.58473205566406, "learning_rate": 0.00019491624063950008, "loss": 7.3563, "step": 89370 }, { "epoch": 10.755716004813477, "grad_norm": 51.0051155090332, "learning_rate": 0.00019491504303691047, "loss": 7.3379, "step": 89380 }, { "epoch": 10.756919374247895, "grad_norm": 22.731534957885742, "learning_rate": 0.0001949138452969554, "loss": 7.4411, "step": 89390 }, { "epoch": 10.75812274368231, "grad_norm": 11.859912872314453, "learning_rate": 0.00019491264741963656, "loss": 7.405, "step": 89400 }, { "epoch": 10.759326113116726, "grad_norm": 23.532920837402344, "learning_rate": 0.00019491144940495566, "loss": 7.3864, "step": 89410 }, { "epoch": 10.760529482551144, "grad_norm": 27.584901809692383, "learning_rate": 0.00019491025125291449, "loss": 7.419, "step": 89420 }, { "epoch": 10.76173285198556, "grad_norm": 44.958309173583984, "learning_rate": 0.00019490905296351473, "loss": 7.3588, "step": 89430 }, { "epoch": 10.762936221419976, "grad_norm": 32.18195724487305, "learning_rate": 0.00019490785453675816, "loss": 7.382, "step": 89440 }, { "epoch": 10.764139590854393, "grad_norm": 32.33243179321289, "learning_rate": 0.0001949066559726465, "loss": 7.4039, "step": 89450 }, { "epoch": 10.765342960288809, "grad_norm": 63.45510482788086, "learning_rate": 0.00019490545727118144, "loss": 7.4506, "step": 89460 }, { "epoch": 10.766546329723225, "grad_norm": 211.78750610351562, "learning_rate": 0.00019490425843236478, "loss": 7.459, "step": 89470 }, { "epoch": 10.76774969915764, "grad_norm": 1398.822509765625, "learning_rate": 0.00019490305945619823, "loss": 7.3793, "step": 89480 }, { "epoch": 10.768953068592058, "grad_norm": 277.9879455566406, "learning_rate": 0.0001949018603426835, "loss": 7.4114, "step": 89490 }, { "epoch": 10.770156438026474, "grad_norm": 571.7463989257812, "learning_rate": 0.00019490066109182234, "loss": 7.5029, "step": 89500 }, { "epoch": 10.77135980746089, "grad_norm": 409.6896057128906, "learning_rate": 0.00019489946170361653, "loss": 7.4473, "step": 89510 }, { "epoch": 10.772563176895307, "grad_norm": 375.6978759765625, "learning_rate": 0.00019489826217806774, "loss": 7.5655, "step": 89520 }, { "epoch": 10.773766546329723, "grad_norm": 652.9776000976562, "learning_rate": 0.00019489706251517777, "loss": 7.4944, "step": 89530 }, { "epoch": 10.774969915764139, "grad_norm": 430.00433349609375, "learning_rate": 0.00019489586271494828, "loss": 7.5482, "step": 89540 }, { "epoch": 10.776173285198556, "grad_norm": 24.84916114807129, "learning_rate": 0.00019489466277738105, "loss": 7.5287, "step": 89550 }, { "epoch": 10.777376654632972, "grad_norm": 27.26685905456543, "learning_rate": 0.00019489346270247783, "loss": 7.4141, "step": 89560 }, { "epoch": 10.778580024067388, "grad_norm": 44.02090072631836, "learning_rate": 0.00019489226249024035, "loss": 7.3955, "step": 89570 }, { "epoch": 10.779783393501805, "grad_norm": 64.54180908203125, "learning_rate": 0.0001948910621406703, "loss": 7.4451, "step": 89580 }, { "epoch": 10.780986762936221, "grad_norm": 53.844871520996094, "learning_rate": 0.0001948898616537695, "loss": 7.391, "step": 89590 }, { "epoch": 10.782190132370637, "grad_norm": 243.5305938720703, "learning_rate": 0.00019488866102953964, "loss": 7.4442, "step": 89600 }, { "epoch": 10.783393501805055, "grad_norm": 215.44296264648438, "learning_rate": 0.00019488746026798242, "loss": 7.3934, "step": 89610 }, { "epoch": 10.78459687123947, "grad_norm": 541.2142333984375, "learning_rate": 0.00019488625936909965, "loss": 7.4049, "step": 89620 }, { "epoch": 10.785800240673886, "grad_norm": 407.396484375, "learning_rate": 0.00019488505833289304, "loss": 7.3971, "step": 89630 }, { "epoch": 10.787003610108304, "grad_norm": 39.224212646484375, "learning_rate": 0.0001948838571593643, "loss": 7.3513, "step": 89640 }, { "epoch": 10.78820697954272, "grad_norm": 41.66058349609375, "learning_rate": 0.00019488265584851523, "loss": 7.352, "step": 89650 }, { "epoch": 10.789410348977135, "grad_norm": 74.9732666015625, "learning_rate": 0.00019488145440034747, "loss": 7.3182, "step": 89660 }, { "epoch": 10.790613718411553, "grad_norm": 168.19195556640625, "learning_rate": 0.00019488025281486288, "loss": 7.366, "step": 89670 }, { "epoch": 10.791817087845969, "grad_norm": 243.7516632080078, "learning_rate": 0.00019487905109206313, "loss": 7.428, "step": 89680 }, { "epoch": 10.793020457280385, "grad_norm": 474.8160705566406, "learning_rate": 0.00019487784923194994, "loss": 7.4098, "step": 89690 }, { "epoch": 10.794223826714802, "grad_norm": 416.8306884765625, "learning_rate": 0.0001948766472345251, "loss": 7.4483, "step": 89700 }, { "epoch": 10.795427196149218, "grad_norm": 85.0167236328125, "learning_rate": 0.0001948754450997903, "loss": 7.3781, "step": 89710 }, { "epoch": 10.796630565583634, "grad_norm": 142.3661651611328, "learning_rate": 0.00019487424282774732, "loss": 7.42, "step": 89720 }, { "epoch": 10.797833935018051, "grad_norm": 393.02716064453125, "learning_rate": 0.0001948730404183979, "loss": 7.3965, "step": 89730 }, { "epoch": 10.799037304452467, "grad_norm": 447.46881103515625, "learning_rate": 0.0001948718378717438, "loss": 7.4281, "step": 89740 }, { "epoch": 10.800240673886883, "grad_norm": 14.403511047363281, "learning_rate": 0.00019487063518778666, "loss": 7.3942, "step": 89750 }, { "epoch": 10.8014440433213, "grad_norm": 171.69520568847656, "learning_rate": 0.00019486943236652832, "loss": 7.4459, "step": 89760 }, { "epoch": 10.802647412755716, "grad_norm": 7661.4140625, "learning_rate": 0.0001948682294079705, "loss": 7.3851, "step": 89770 }, { "epoch": 10.803850782190132, "grad_norm": 2992.94482421875, "learning_rate": 0.00019486702631211488, "loss": 7.4832, "step": 89780 }, { "epoch": 10.80505415162455, "grad_norm": 295.5586853027344, "learning_rate": 0.00019486582307896331, "loss": 7.4184, "step": 89790 }, { "epoch": 10.806257521058965, "grad_norm": 355.16094970703125, "learning_rate": 0.00019486461970851746, "loss": 7.4807, "step": 89800 }, { "epoch": 10.807460890493381, "grad_norm": 10914.90625, "learning_rate": 0.00019486341620077906, "loss": 7.536, "step": 89810 }, { "epoch": 10.808664259927799, "grad_norm": 585.2021484375, "learning_rate": 0.0001948622125557499, "loss": 7.5429, "step": 89820 }, { "epoch": 10.809867629362214, "grad_norm": 1284.7327880859375, "learning_rate": 0.00019486100877343166, "loss": 7.4352, "step": 89830 }, { "epoch": 10.81107099879663, "grad_norm": 234.9237518310547, "learning_rate": 0.00019485980485382616, "loss": 7.3521, "step": 89840 }, { "epoch": 10.812274368231048, "grad_norm": 157.05502319335938, "learning_rate": 0.00019485860079693508, "loss": 7.4848, "step": 89850 }, { "epoch": 10.813477737665464, "grad_norm": 179.12625122070312, "learning_rate": 0.0001948573966027602, "loss": 7.4622, "step": 89860 }, { "epoch": 10.81468110709988, "grad_norm": 183.79322814941406, "learning_rate": 0.00019485619227130324, "loss": 7.3719, "step": 89870 }, { "epoch": 10.815884476534297, "grad_norm": 1028.42724609375, "learning_rate": 0.00019485498780256596, "loss": 7.3825, "step": 89880 }, { "epoch": 10.817087845968713, "grad_norm": 970.3973999023438, "learning_rate": 0.00019485378319655005, "loss": 7.4842, "step": 89890 }, { "epoch": 10.818291215403129, "grad_norm": 310.7757263183594, "learning_rate": 0.00019485257845325735, "loss": 7.4399, "step": 89900 }, { "epoch": 10.819494584837544, "grad_norm": 134.2033233642578, "learning_rate": 0.00019485137357268953, "loss": 7.3357, "step": 89910 }, { "epoch": 10.820697954271962, "grad_norm": 143.18923950195312, "learning_rate": 0.00019485016855484838, "loss": 7.4102, "step": 89920 }, { "epoch": 10.821901323706378, "grad_norm": 786.7396850585938, "learning_rate": 0.00019484896339973557, "loss": 7.3891, "step": 89930 }, { "epoch": 10.823104693140793, "grad_norm": 105.39263153076172, "learning_rate": 0.0001948477581073529, "loss": 7.4931, "step": 89940 }, { "epoch": 10.824308062575211, "grad_norm": 79.65034484863281, "learning_rate": 0.00019484655267770214, "loss": 7.4024, "step": 89950 }, { "epoch": 10.825511432009627, "grad_norm": 207.6728515625, "learning_rate": 0.000194845347110785, "loss": 7.408, "step": 89960 }, { "epoch": 10.826714801444043, "grad_norm": 291.0705261230469, "learning_rate": 0.0001948441414066032, "loss": 7.3783, "step": 89970 }, { "epoch": 10.82791817087846, "grad_norm": 257.7458190917969, "learning_rate": 0.00019484293556515852, "loss": 7.4086, "step": 89980 }, { "epoch": 10.829121540312876, "grad_norm": 600.5701904296875, "learning_rate": 0.0001948417295864527, "loss": 7.3214, "step": 89990 }, { "epoch": 10.830324909747292, "grad_norm": 126.4278564453125, "learning_rate": 0.0001948405234704875, "loss": 7.3987, "step": 90000 }, { "epoch": 10.83152827918171, "grad_norm": 197.2976837158203, "learning_rate": 0.00019483931721726462, "loss": 7.3567, "step": 90010 }, { "epoch": 10.832731648616125, "grad_norm": 1042.79052734375, "learning_rate": 0.0001948381108267858, "loss": 7.3748, "step": 90020 }, { "epoch": 10.833935018050541, "grad_norm": 205.5945587158203, "learning_rate": 0.0001948369042990529, "loss": 7.3638, "step": 90030 }, { "epoch": 10.835138387484958, "grad_norm": 491.60516357421875, "learning_rate": 0.00019483569763406754, "loss": 7.3998, "step": 90040 }, { "epoch": 10.836341756919374, "grad_norm": 111.6520004272461, "learning_rate": 0.00019483449083183152, "loss": 7.3486, "step": 90050 }, { "epoch": 10.83754512635379, "grad_norm": 99.41780090332031, "learning_rate": 0.00019483328389234656, "loss": 7.4199, "step": 90060 }, { "epoch": 10.838748495788208, "grad_norm": 171.64212036132812, "learning_rate": 0.00019483207681561445, "loss": 7.3448, "step": 90070 }, { "epoch": 10.839951865222623, "grad_norm": 199.79847717285156, "learning_rate": 0.0001948308696016369, "loss": 7.4762, "step": 90080 }, { "epoch": 10.84115523465704, "grad_norm": 270.7060241699219, "learning_rate": 0.00019482966225041568, "loss": 7.4044, "step": 90090 }, { "epoch": 10.842358604091457, "grad_norm": 739.8450927734375, "learning_rate": 0.0001948284547619525, "loss": 7.4325, "step": 90100 }, { "epoch": 10.843561973525873, "grad_norm": 216.46514892578125, "learning_rate": 0.00019482724713624917, "loss": 7.363, "step": 90110 }, { "epoch": 10.844765342960288, "grad_norm": 110.07520294189453, "learning_rate": 0.00019482603937330737, "loss": 7.2864, "step": 90120 }, { "epoch": 10.845968712394706, "grad_norm": 158.97850036621094, "learning_rate": 0.00019482483147312886, "loss": 7.4325, "step": 90130 }, { "epoch": 10.847172081829122, "grad_norm": 111.11646270751953, "learning_rate": 0.00019482362343571546, "loss": 7.3309, "step": 90140 }, { "epoch": 10.848375451263538, "grad_norm": 58.595584869384766, "learning_rate": 0.00019482241526106884, "loss": 7.5218, "step": 90150 }, { "epoch": 10.849578820697955, "grad_norm": 233.2845458984375, "learning_rate": 0.00019482120694919077, "loss": 7.4931, "step": 90160 }, { "epoch": 10.85078219013237, "grad_norm": 456.6014709472656, "learning_rate": 0.000194819998500083, "loss": 7.4513, "step": 90170 }, { "epoch": 10.851985559566787, "grad_norm": 902.7557373046875, "learning_rate": 0.0001948187899137473, "loss": 7.383, "step": 90180 }, { "epoch": 10.853188929001202, "grad_norm": 72.69772338867188, "learning_rate": 0.00019481758119018538, "loss": 7.4418, "step": 90190 }, { "epoch": 10.85439229843562, "grad_norm": 60.02667999267578, "learning_rate": 0.00019481637232939903, "loss": 7.3432, "step": 90200 }, { "epoch": 10.855595667870036, "grad_norm": 18.164566040039062, "learning_rate": 0.00019481516333138998, "loss": 7.3786, "step": 90210 }, { "epoch": 10.856799037304452, "grad_norm": 11.206013679504395, "learning_rate": 0.00019481395419615994, "loss": 7.3362, "step": 90220 }, { "epoch": 10.85800240673887, "grad_norm": 8.665871620178223, "learning_rate": 0.00019481274492371076, "loss": 7.3161, "step": 90230 }, { "epoch": 10.859205776173285, "grad_norm": 2.5264387130737305, "learning_rate": 0.0001948115355140441, "loss": 7.3466, "step": 90240 }, { "epoch": 10.8604091456077, "grad_norm": 20.93906021118164, "learning_rate": 0.00019481032596716172, "loss": 7.3447, "step": 90250 }, { "epoch": 10.861612515042118, "grad_norm": 6.975417613983154, "learning_rate": 0.00019480911628306542, "loss": 7.5147, "step": 90260 }, { "epoch": 10.862815884476534, "grad_norm": 2.2918078899383545, "learning_rate": 0.0001948079064617569, "loss": 7.3855, "step": 90270 }, { "epoch": 10.86401925391095, "grad_norm": 9.021227836608887, "learning_rate": 0.00019480669650323798, "loss": 7.3852, "step": 90280 }, { "epoch": 10.865222623345367, "grad_norm": 5.137051582336426, "learning_rate": 0.00019480548640751031, "loss": 7.5135, "step": 90290 }, { "epoch": 10.866425992779783, "grad_norm": 2.2990434169769287, "learning_rate": 0.00019480427617457572, "loss": 7.4849, "step": 90300 }, { "epoch": 10.867629362214199, "grad_norm": 24.73467445373535, "learning_rate": 0.00019480306580443591, "loss": 7.3778, "step": 90310 }, { "epoch": 10.868832731648617, "grad_norm": 5.181796073913574, "learning_rate": 0.0001948018552970927, "loss": 7.4085, "step": 90320 }, { "epoch": 10.870036101083032, "grad_norm": 4.720545768737793, "learning_rate": 0.00019480064465254778, "loss": 7.309, "step": 90330 }, { "epoch": 10.871239470517448, "grad_norm": 1.8978151082992554, "learning_rate": 0.0001947994338708029, "loss": 7.3723, "step": 90340 }, { "epoch": 10.872442839951866, "grad_norm": 6.551721096038818, "learning_rate": 0.00019479822295185984, "loss": 7.4731, "step": 90350 }, { "epoch": 10.873646209386282, "grad_norm": 2.412587881088257, "learning_rate": 0.00019479701189572038, "loss": 7.4649, "step": 90360 }, { "epoch": 10.874849578820697, "grad_norm": 4.760385513305664, "learning_rate": 0.0001947958007023862, "loss": 7.4933, "step": 90370 }, { "epoch": 10.876052948255115, "grad_norm": 5.1067585945129395, "learning_rate": 0.0001947945893718591, "loss": 7.4522, "step": 90380 }, { "epoch": 10.87725631768953, "grad_norm": 2.405296802520752, "learning_rate": 0.00019479337790414086, "loss": 7.4288, "step": 90390 }, { "epoch": 10.878459687123947, "grad_norm": 4.424656867980957, "learning_rate": 0.00019479216629923315, "loss": 7.3689, "step": 90400 }, { "epoch": 10.879663056558364, "grad_norm": 4.283668041229248, "learning_rate": 0.0001947909545571378, "loss": 7.417, "step": 90410 }, { "epoch": 10.88086642599278, "grad_norm": 2.591250419616699, "learning_rate": 0.00019478974267785653, "loss": 7.4594, "step": 90420 }, { "epoch": 10.882069795427196, "grad_norm": 3.5165891647338867, "learning_rate": 0.00019478853066139112, "loss": 7.5014, "step": 90430 }, { "epoch": 10.883273164861613, "grad_norm": 4.126473426818848, "learning_rate": 0.00019478731850774324, "loss": 7.4986, "step": 90440 }, { "epoch": 10.884476534296029, "grad_norm": 8.768253326416016, "learning_rate": 0.00019478610621691478, "loss": 7.4554, "step": 90450 }, { "epoch": 10.885679903730445, "grad_norm": 4.767611503601074, "learning_rate": 0.0001947848937889074, "loss": 7.3456, "step": 90460 }, { "epoch": 10.886883273164862, "grad_norm": 3.0895988941192627, "learning_rate": 0.00019478368122372285, "loss": 7.4833, "step": 90470 }, { "epoch": 10.888086642599278, "grad_norm": 4.713790416717529, "learning_rate": 0.00019478246852136293, "loss": 7.3336, "step": 90480 }, { "epoch": 10.889290012033694, "grad_norm": 2.3233847618103027, "learning_rate": 0.00019478125568182938, "loss": 7.4725, "step": 90490 }, { "epoch": 10.890493381468112, "grad_norm": 3.5966014862060547, "learning_rate": 0.00019478004270512394, "loss": 7.4315, "step": 90500 }, { "epoch": 10.891696750902527, "grad_norm": 3.502847909927368, "learning_rate": 0.0001947788295912484, "loss": 7.4436, "step": 90510 }, { "epoch": 10.892900120336943, "grad_norm": 5.073575496673584, "learning_rate": 0.0001947776163402045, "loss": 7.3108, "step": 90520 }, { "epoch": 10.89410348977136, "grad_norm": 2.306201457977295, "learning_rate": 0.00019477640295199398, "loss": 7.3425, "step": 90530 }, { "epoch": 10.895306859205776, "grad_norm": 1.9735738039016724, "learning_rate": 0.00019477518942661857, "loss": 7.3234, "step": 90540 }, { "epoch": 10.896510228640192, "grad_norm": 4.6471638679504395, "learning_rate": 0.0001947739757640801, "loss": 7.4421, "step": 90550 }, { "epoch": 10.89771359807461, "grad_norm": 2.2199184894561768, "learning_rate": 0.00019477276196438027, "loss": 7.3728, "step": 90560 }, { "epoch": 10.898916967509026, "grad_norm": 2.72428822517395, "learning_rate": 0.00019477154802752087, "loss": 7.3408, "step": 90570 }, { "epoch": 10.900120336943441, "grad_norm": 3.1605260372161865, "learning_rate": 0.00019477033395350364, "loss": 7.3783, "step": 90580 }, { "epoch": 10.901323706377857, "grad_norm": 2.2442264556884766, "learning_rate": 0.00019476911974233033, "loss": 7.3796, "step": 90590 }, { "epoch": 10.902527075812275, "grad_norm": 5.595448017120361, "learning_rate": 0.0001947679053940027, "loss": 7.3417, "step": 90600 }, { "epoch": 10.90373044524669, "grad_norm": 3.555079936981201, "learning_rate": 0.00019476669090852253, "loss": 7.3649, "step": 90610 }, { "epoch": 10.904933814681106, "grad_norm": 3.3862462043762207, "learning_rate": 0.00019476547628589154, "loss": 7.3889, "step": 90620 }, { "epoch": 10.906137184115524, "grad_norm": 3.648141622543335, "learning_rate": 0.00019476426152611152, "loss": 7.3131, "step": 90630 }, { "epoch": 10.90734055354994, "grad_norm": 3.0125887393951416, "learning_rate": 0.00019476304662918424, "loss": 7.3405, "step": 90640 }, { "epoch": 10.908543922984355, "grad_norm": 13.056954383850098, "learning_rate": 0.0001947618315951114, "loss": 7.3561, "step": 90650 }, { "epoch": 10.909747292418773, "grad_norm": 4.784685134887695, "learning_rate": 0.0001947606164238948, "loss": 7.4375, "step": 90660 }, { "epoch": 10.910950661853189, "grad_norm": 2.6147332191467285, "learning_rate": 0.0001947594011155362, "loss": 7.3464, "step": 90670 }, { "epoch": 10.912154031287605, "grad_norm": 7.623844623565674, "learning_rate": 0.00019475818567003735, "loss": 7.3462, "step": 90680 }, { "epoch": 10.913357400722022, "grad_norm": 4.65337610244751, "learning_rate": 0.0001947569700874, "loss": 7.3075, "step": 90690 }, { "epoch": 10.914560770156438, "grad_norm": 7.81140661239624, "learning_rate": 0.00019475575436762594, "loss": 7.385, "step": 90700 }, { "epoch": 10.915764139590854, "grad_norm": 7.787203311920166, "learning_rate": 0.00019475453851071692, "loss": 7.4105, "step": 90710 }, { "epoch": 10.916967509025271, "grad_norm": 9.700875282287598, "learning_rate": 0.00019475332251667467, "loss": 7.2566, "step": 90720 }, { "epoch": 10.918170878459687, "grad_norm": 5.274430751800537, "learning_rate": 0.00019475210638550094, "loss": 7.3838, "step": 90730 }, { "epoch": 10.919374247894103, "grad_norm": 5.20965576171875, "learning_rate": 0.00019475089011719755, "loss": 7.3184, "step": 90740 }, { "epoch": 10.92057761732852, "grad_norm": 20.575435638427734, "learning_rate": 0.00019474967371176623, "loss": 7.2909, "step": 90750 }, { "epoch": 10.921780986762936, "grad_norm": 4.916385173797607, "learning_rate": 0.00019474845716920873, "loss": 7.3813, "step": 90760 }, { "epoch": 10.922984356197352, "grad_norm": 5.026564598083496, "learning_rate": 0.00019474724048952684, "loss": 7.2943, "step": 90770 }, { "epoch": 10.92418772563177, "grad_norm": 9.916465759277344, "learning_rate": 0.00019474602367272227, "loss": 7.2248, "step": 90780 }, { "epoch": 10.925391095066185, "grad_norm": 6.919652938842773, "learning_rate": 0.00019474480671879683, "loss": 7.3152, "step": 90790 }, { "epoch": 10.926594464500601, "grad_norm": 8.888089179992676, "learning_rate": 0.00019474358962775227, "loss": 7.2641, "step": 90800 }, { "epoch": 10.927797833935019, "grad_norm": 18.766050338745117, "learning_rate": 0.00019474237239959033, "loss": 7.3283, "step": 90810 }, { "epoch": 10.929001203369435, "grad_norm": 14.107617378234863, "learning_rate": 0.0001947411550343128, "loss": 7.2375, "step": 90820 }, { "epoch": 10.93020457280385, "grad_norm": 18.338733673095703, "learning_rate": 0.00019473993753192138, "loss": 7.1868, "step": 90830 }, { "epoch": 10.931407942238268, "grad_norm": 9.514047622680664, "learning_rate": 0.00019473871989241794, "loss": 7.3133, "step": 90840 }, { "epoch": 10.932611311672684, "grad_norm": 11.194342613220215, "learning_rate": 0.00019473750211580418, "loss": 7.2377, "step": 90850 }, { "epoch": 10.9338146811071, "grad_norm": 11.65888500213623, "learning_rate": 0.0001947362842020818, "loss": 7.183, "step": 90860 }, { "epoch": 10.935018050541515, "grad_norm": 12.573893547058105, "learning_rate": 0.0001947350661512527, "loss": 7.2272, "step": 90870 }, { "epoch": 10.936221419975933, "grad_norm": 18.41805648803711, "learning_rate": 0.00019473384796331853, "loss": 7.2195, "step": 90880 }, { "epoch": 10.937424789410349, "grad_norm": 10.399399757385254, "learning_rate": 0.0001947326296382811, "loss": 7.1829, "step": 90890 }, { "epoch": 10.938628158844764, "grad_norm": 27.217512130737305, "learning_rate": 0.00019473141117614217, "loss": 7.2757, "step": 90900 }, { "epoch": 10.939831528279182, "grad_norm": 24.504770278930664, "learning_rate": 0.0001947301925769035, "loss": 7.2734, "step": 90910 }, { "epoch": 10.941034897713598, "grad_norm": 15.4624662399292, "learning_rate": 0.00019472897384056686, "loss": 7.267, "step": 90920 }, { "epoch": 10.942238267148014, "grad_norm": 23.55158233642578, "learning_rate": 0.00019472775496713398, "loss": 7.154, "step": 90930 }, { "epoch": 10.943441636582431, "grad_norm": 23.233240127563477, "learning_rate": 0.00019472653595660667, "loss": 7.3416, "step": 90940 }, { "epoch": 10.944645006016847, "grad_norm": 21.710960388183594, "learning_rate": 0.00019472531680898668, "loss": 7.1898, "step": 90950 }, { "epoch": 10.945848375451263, "grad_norm": 17.23990821838379, "learning_rate": 0.00019472409752427578, "loss": 7.2277, "step": 90960 }, { "epoch": 10.94705174488568, "grad_norm": 8.576688766479492, "learning_rate": 0.00019472287810247573, "loss": 7.2173, "step": 90970 }, { "epoch": 10.948255114320096, "grad_norm": 22.814760208129883, "learning_rate": 0.00019472165854358825, "loss": 7.3606, "step": 90980 }, { "epoch": 10.949458483754512, "grad_norm": 13.988065719604492, "learning_rate": 0.00019472043884761515, "loss": 7.1819, "step": 90990 }, { "epoch": 10.95066185318893, "grad_norm": 22.70740509033203, "learning_rate": 0.00019471921901455822, "loss": 7.3582, "step": 91000 }, { "epoch": 10.951865222623345, "grad_norm": 19.532262802124023, "learning_rate": 0.00019471799904441917, "loss": 7.1927, "step": 91010 }, { "epoch": 10.953068592057761, "grad_norm": 14.020984649658203, "learning_rate": 0.00019471677893719979, "loss": 7.3016, "step": 91020 }, { "epoch": 10.954271961492179, "grad_norm": 13.995887756347656, "learning_rate": 0.00019471555869290185, "loss": 7.1971, "step": 91030 }, { "epoch": 10.955475330926594, "grad_norm": 19.562938690185547, "learning_rate": 0.00019471433831152711, "loss": 7.2804, "step": 91040 }, { "epoch": 10.95667870036101, "grad_norm": 18.86977767944336, "learning_rate": 0.00019471311779307735, "loss": 7.3369, "step": 91050 }, { "epoch": 10.957882069795428, "grad_norm": 27.458768844604492, "learning_rate": 0.00019471189713755432, "loss": 7.2666, "step": 91060 }, { "epoch": 10.959085439229844, "grad_norm": 7.772292137145996, "learning_rate": 0.0001947106763449598, "loss": 7.2552, "step": 91070 }, { "epoch": 10.96028880866426, "grad_norm": 29.27229118347168, "learning_rate": 0.00019470945541529556, "loss": 7.2156, "step": 91080 }, { "epoch": 10.961492178098677, "grad_norm": 9.601334571838379, "learning_rate": 0.0001947082343485633, "loss": 7.1959, "step": 91090 }, { "epoch": 10.962695547533093, "grad_norm": 82.26222229003906, "learning_rate": 0.0001947070131447649, "loss": 7.2415, "step": 91100 }, { "epoch": 10.963898916967509, "grad_norm": 16.596284866333008, "learning_rate": 0.00019470579180390203, "loss": 7.2281, "step": 91110 }, { "epoch": 10.965102286401926, "grad_norm": 12.260496139526367, "learning_rate": 0.00019470457032597652, "loss": 7.2486, "step": 91120 }, { "epoch": 10.966305655836342, "grad_norm": 19.594247817993164, "learning_rate": 0.00019470334871099008, "loss": 7.1312, "step": 91130 }, { "epoch": 10.967509025270758, "grad_norm": 29.621244430541992, "learning_rate": 0.00019470212695894454, "loss": 7.1589, "step": 91140 }, { "epoch": 10.968712394705175, "grad_norm": 52.26381301879883, "learning_rate": 0.00019470090506984165, "loss": 7.269, "step": 91150 }, { "epoch": 10.969915764139591, "grad_norm": 44.431034088134766, "learning_rate": 0.00019469968304368317, "loss": 7.2556, "step": 91160 }, { "epoch": 10.971119133574007, "grad_norm": 10.630620002746582, "learning_rate": 0.00019469846088047084, "loss": 7.2296, "step": 91170 }, { "epoch": 10.972322503008424, "grad_norm": 50.92340850830078, "learning_rate": 0.00019469723858020648, "loss": 7.1534, "step": 91180 }, { "epoch": 10.97352587244284, "grad_norm": 14.209988594055176, "learning_rate": 0.00019469601614289183, "loss": 7.3142, "step": 91190 }, { "epoch": 10.974729241877256, "grad_norm": 36.25379943847656, "learning_rate": 0.00019469479356852866, "loss": 7.2598, "step": 91200 }, { "epoch": 10.975932611311674, "grad_norm": 36.42372131347656, "learning_rate": 0.00019469357085711875, "loss": 7.1667, "step": 91210 }, { "epoch": 10.97713598074609, "grad_norm": 15.982621192932129, "learning_rate": 0.00019469234800866388, "loss": 7.3051, "step": 91220 }, { "epoch": 10.978339350180505, "grad_norm": 35.26732635498047, "learning_rate": 0.00019469112502316577, "loss": 7.2107, "step": 91230 }, { "epoch": 10.979542719614923, "grad_norm": 110.99844360351562, "learning_rate": 0.00019468990190062626, "loss": 7.2238, "step": 91240 }, { "epoch": 10.980746089049338, "grad_norm": 76.19429016113281, "learning_rate": 0.00019468867864104705, "loss": 7.1451, "step": 91250 }, { "epoch": 10.981949458483754, "grad_norm": 59.430763244628906, "learning_rate": 0.00019468745524442996, "loss": 7.2429, "step": 91260 }, { "epoch": 10.98315282791817, "grad_norm": 42.25979995727539, "learning_rate": 0.00019468623171077674, "loss": 7.1314, "step": 91270 }, { "epoch": 10.984356197352588, "grad_norm": 90.5514144897461, "learning_rate": 0.00019468500804008918, "loss": 7.2055, "step": 91280 }, { "epoch": 10.985559566787003, "grad_norm": 17.978731155395508, "learning_rate": 0.00019468378423236903, "loss": 7.3261, "step": 91290 }, { "epoch": 10.98676293622142, "grad_norm": 53.58927917480469, "learning_rate": 0.00019468256028761806, "loss": 7.1953, "step": 91300 }, { "epoch": 10.987966305655837, "grad_norm": 53.382686614990234, "learning_rate": 0.00019468133620583803, "loss": 7.1197, "step": 91310 }, { "epoch": 10.989169675090253, "grad_norm": 16.70414161682129, "learning_rate": 0.00019468011198703079, "loss": 7.3798, "step": 91320 }, { "epoch": 10.990373044524668, "grad_norm": 54.25605392456055, "learning_rate": 0.000194678887631198, "loss": 7.2005, "step": 91330 }, { "epoch": 10.991576413959086, "grad_norm": 36.17827224731445, "learning_rate": 0.00019467766313834147, "loss": 7.1786, "step": 91340 }, { "epoch": 10.992779783393502, "grad_norm": 197.1502227783203, "learning_rate": 0.00019467643850846304, "loss": 7.1718, "step": 91350 }, { "epoch": 10.993983152827917, "grad_norm": 31.615577697753906, "learning_rate": 0.00019467521374156438, "loss": 7.2492, "step": 91360 }, { "epoch": 10.995186522262335, "grad_norm": 27.217073440551758, "learning_rate": 0.00019467398883764734, "loss": 7.1934, "step": 91370 }, { "epoch": 10.99638989169675, "grad_norm": 32.41168212890625, "learning_rate": 0.00019467276379671366, "loss": 7.0922, "step": 91380 }, { "epoch": 10.997593261131167, "grad_norm": 35.65819549560547, "learning_rate": 0.00019467153861876512, "loss": 7.1939, "step": 91390 }, { "epoch": 10.998796630565584, "grad_norm": 68.97451782226562, "learning_rate": 0.00019467031330380348, "loss": 7.203, "step": 91400 }, { "epoch": 11.0, "grad_norm": 52.4002685546875, "learning_rate": 0.00019466908785183056, "loss": 7.1735, "step": 91410 }, { "epoch": 11.0, "eval_loss": 7.210310459136963, "eval_runtime": 119.6216, "eval_samples_per_second": 61.753, "eval_steps_per_second": 7.724, "step": 91410 }, { "epoch": 11.001203369434416, "grad_norm": 88.97228240966797, "learning_rate": 0.00019466786226284802, "loss": 7.0587, "step": 91420 }, { "epoch": 11.002406738868833, "grad_norm": 31.14643096923828, "learning_rate": 0.00019466663653685778, "loss": 7.1077, "step": 91430 }, { "epoch": 11.00361010830325, "grad_norm": 57.134159088134766, "learning_rate": 0.00019466541067386152, "loss": 7.1847, "step": 91440 }, { "epoch": 11.004813477737665, "grad_norm": 23.970157623291016, "learning_rate": 0.00019466418467386105, "loss": 7.1429, "step": 91450 }, { "epoch": 11.006016847172083, "grad_norm": 87.09337615966797, "learning_rate": 0.0001946629585368581, "loss": 7.2082, "step": 91460 }, { "epoch": 11.007220216606498, "grad_norm": 78.77494049072266, "learning_rate": 0.00019466173226285452, "loss": 7.1348, "step": 91470 }, { "epoch": 11.008423586040914, "grad_norm": 43.038944244384766, "learning_rate": 0.00019466050585185203, "loss": 7.2466, "step": 91480 }, { "epoch": 11.009626955475332, "grad_norm": 19.317781448364258, "learning_rate": 0.0001946592793038524, "loss": 7.1249, "step": 91490 }, { "epoch": 11.010830324909747, "grad_norm": 47.15719223022461, "learning_rate": 0.0001946580526188574, "loss": 7.1508, "step": 91500 }, { "epoch": 11.012033694344163, "grad_norm": 45.80705642700195, "learning_rate": 0.00019465682579686888, "loss": 7.1475, "step": 91510 }, { "epoch": 11.01323706377858, "grad_norm": 34.837158203125, "learning_rate": 0.00019465559883788855, "loss": 7.2702, "step": 91520 }, { "epoch": 11.014440433212997, "grad_norm": 24.252323150634766, "learning_rate": 0.0001946543717419182, "loss": 7.1592, "step": 91530 }, { "epoch": 11.015643802647412, "grad_norm": 108.92929077148438, "learning_rate": 0.00019465314450895958, "loss": 7.1675, "step": 91540 }, { "epoch": 11.01684717208183, "grad_norm": 58.81249237060547, "learning_rate": 0.0001946519171390145, "loss": 7.1503, "step": 91550 }, { "epoch": 11.018050541516246, "grad_norm": 92.31916809082031, "learning_rate": 0.00019465068963208472, "loss": 7.2044, "step": 91560 }, { "epoch": 11.019253910950662, "grad_norm": 60.861717224121094, "learning_rate": 0.00019464946198817206, "loss": 7.28, "step": 91570 }, { "epoch": 11.020457280385079, "grad_norm": 34.50305938720703, "learning_rate": 0.00019464823420727823, "loss": 7.198, "step": 91580 }, { "epoch": 11.021660649819495, "grad_norm": 37.99361801147461, "learning_rate": 0.00019464700628940505, "loss": 7.2525, "step": 91590 }, { "epoch": 11.02286401925391, "grad_norm": 63.08331298828125, "learning_rate": 0.00019464577823455426, "loss": 7.1535, "step": 91600 }, { "epoch": 11.024067388688326, "grad_norm": 106.97830963134766, "learning_rate": 0.00019464455004272768, "loss": 7.1971, "step": 91610 }, { "epoch": 11.025270758122744, "grad_norm": 58.077816009521484, "learning_rate": 0.0001946433217139271, "loss": 7.1119, "step": 91620 }, { "epoch": 11.02647412755716, "grad_norm": 89.77117919921875, "learning_rate": 0.0001946420932481542, "loss": 7.1836, "step": 91630 }, { "epoch": 11.027677496991576, "grad_norm": 107.27037048339844, "learning_rate": 0.0001946408646454109, "loss": 7.1823, "step": 91640 }, { "epoch": 11.028880866425993, "grad_norm": 124.76302337646484, "learning_rate": 0.00019463963590569884, "loss": 7.2215, "step": 91650 }, { "epoch": 11.030084235860409, "grad_norm": 1184.3741455078125, "learning_rate": 0.00019463840702901988, "loss": 7.2242, "step": 91660 }, { "epoch": 11.031287605294825, "grad_norm": 253.3821258544922, "learning_rate": 0.00019463717801537577, "loss": 7.3282, "step": 91670 }, { "epoch": 11.032490974729242, "grad_norm": 155.58058166503906, "learning_rate": 0.00019463594886476836, "loss": 7.2619, "step": 91680 }, { "epoch": 11.033694344163658, "grad_norm": 281.6895751953125, "learning_rate": 0.00019463471957719927, "loss": 7.1515, "step": 91690 }, { "epoch": 11.034897713598074, "grad_norm": 182.16835021972656, "learning_rate": 0.00019463349015267045, "loss": 7.2305, "step": 91700 }, { "epoch": 11.036101083032491, "grad_norm": 102.86280822753906, "learning_rate": 0.0001946322605911836, "loss": 7.1943, "step": 91710 }, { "epoch": 11.037304452466907, "grad_norm": 72.52764892578125, "learning_rate": 0.00019463103089274047, "loss": 7.212, "step": 91720 }, { "epoch": 11.038507821901323, "grad_norm": 187.4462890625, "learning_rate": 0.0001946298010573429, "loss": 7.1589, "step": 91730 }, { "epoch": 11.03971119133574, "grad_norm": 91.68645477294922, "learning_rate": 0.00019462857108499263, "loss": 7.095, "step": 91740 }, { "epoch": 11.040914560770156, "grad_norm": 64.60287475585938, "learning_rate": 0.00019462734097569145, "loss": 7.1291, "step": 91750 }, { "epoch": 11.042117930204572, "grad_norm": 104.13249206542969, "learning_rate": 0.00019462611072944115, "loss": 7.1589, "step": 91760 }, { "epoch": 11.04332129963899, "grad_norm": 51.590476989746094, "learning_rate": 0.0001946248803462435, "loss": 7.1153, "step": 91770 }, { "epoch": 11.044524669073406, "grad_norm": 89.51387023925781, "learning_rate": 0.00019462364982610032, "loss": 7.2722, "step": 91780 }, { "epoch": 11.045728038507821, "grad_norm": 149.9066619873047, "learning_rate": 0.0001946224191690133, "loss": 7.1074, "step": 91790 }, { "epoch": 11.046931407942239, "grad_norm": 85.96741485595703, "learning_rate": 0.00019462118837498433, "loss": 7.165, "step": 91800 }, { "epoch": 11.048134777376655, "grad_norm": 389.3432312011719, "learning_rate": 0.00019461995744401512, "loss": 7.2343, "step": 91810 }, { "epoch": 11.04933814681107, "grad_norm": 109.79691314697266, "learning_rate": 0.00019461872637610748, "loss": 7.2251, "step": 91820 }, { "epoch": 11.050541516245488, "grad_norm": 67.6067123413086, "learning_rate": 0.00019461749517126316, "loss": 7.2376, "step": 91830 }, { "epoch": 11.051744885679904, "grad_norm": 136.72100830078125, "learning_rate": 0.000194616263829484, "loss": 7.2126, "step": 91840 }, { "epoch": 11.05294825511432, "grad_norm": 104.37311553955078, "learning_rate": 0.0001946150323507717, "loss": 7.204, "step": 91850 }, { "epoch": 11.054151624548737, "grad_norm": 229.95697021484375, "learning_rate": 0.00019461380073512813, "loss": 7.2275, "step": 91860 }, { "epoch": 11.055354993983153, "grad_norm": 110.85474395751953, "learning_rate": 0.000194612568982555, "loss": 7.2068, "step": 91870 }, { "epoch": 11.056558363417569, "grad_norm": 154.43856811523438, "learning_rate": 0.00019461133709305415, "loss": 7.1856, "step": 91880 }, { "epoch": 11.057761732851986, "grad_norm": 83.03573608398438, "learning_rate": 0.0001946101050666273, "loss": 7.1952, "step": 91890 }, { "epoch": 11.058965102286402, "grad_norm": 63.278724670410156, "learning_rate": 0.00019460887290327633, "loss": 7.2036, "step": 91900 }, { "epoch": 11.060168471720818, "grad_norm": 326.2492370605469, "learning_rate": 0.0001946076406030029, "loss": 7.209, "step": 91910 }, { "epoch": 11.061371841155236, "grad_norm": 133.4531707763672, "learning_rate": 0.00019460640816580888, "loss": 7.1174, "step": 91920 }, { "epoch": 11.062575210589651, "grad_norm": 59.3718147277832, "learning_rate": 0.00019460517559169603, "loss": 7.1888, "step": 91930 }, { "epoch": 11.063778580024067, "grad_norm": 418.29290771484375, "learning_rate": 0.00019460394288066613, "loss": 7.147, "step": 91940 }, { "epoch": 11.064981949458483, "grad_norm": 51.344417572021484, "learning_rate": 0.00019460271003272098, "loss": 7.1793, "step": 91950 }, { "epoch": 11.0661853188929, "grad_norm": 123.91556549072266, "learning_rate": 0.00019460147704786237, "loss": 7.2084, "step": 91960 }, { "epoch": 11.067388688327316, "grad_norm": 121.50829315185547, "learning_rate": 0.000194600243926092, "loss": 7.1258, "step": 91970 }, { "epoch": 11.068592057761732, "grad_norm": 125.03963470458984, "learning_rate": 0.00019459901066741178, "loss": 7.2939, "step": 91980 }, { "epoch": 11.06979542719615, "grad_norm": 142.17739868164062, "learning_rate": 0.00019459777727182342, "loss": 7.2213, "step": 91990 }, { "epoch": 11.070998796630565, "grad_norm": 94.00983428955078, "learning_rate": 0.00019459654373932871, "loss": 7.3204, "step": 92000 }, { "epoch": 11.072202166064981, "grad_norm": 88.48886108398438, "learning_rate": 0.00019459531006992947, "loss": 7.1866, "step": 92010 }, { "epoch": 11.073405535499399, "grad_norm": 127.03926849365234, "learning_rate": 0.00019459407626362744, "loss": 7.1556, "step": 92020 }, { "epoch": 11.074608904933815, "grad_norm": 45.86471176147461, "learning_rate": 0.00019459284232042443, "loss": 7.2194, "step": 92030 }, { "epoch": 11.07581227436823, "grad_norm": 164.46554565429688, "learning_rate": 0.0001945916082403222, "loss": 7.1532, "step": 92040 }, { "epoch": 11.077015643802648, "grad_norm": 212.33151245117188, "learning_rate": 0.00019459037402332261, "loss": 7.2713, "step": 92050 }, { "epoch": 11.078219013237064, "grad_norm": 58.923831939697266, "learning_rate": 0.00019458913966942737, "loss": 7.2077, "step": 92060 }, { "epoch": 11.07942238267148, "grad_norm": 66.29691314697266, "learning_rate": 0.0001945879051786383, "loss": 7.1387, "step": 92070 }, { "epoch": 11.080625752105897, "grad_norm": 48.843936920166016, "learning_rate": 0.00019458667055095716, "loss": 7.2679, "step": 92080 }, { "epoch": 11.081829121540313, "grad_norm": 42.04731369018555, "learning_rate": 0.00019458543578638579, "loss": 7.1973, "step": 92090 }, { "epoch": 11.083032490974729, "grad_norm": 94.57808685302734, "learning_rate": 0.00019458420088492591, "loss": 7.1809, "step": 92100 }, { "epoch": 11.084235860409146, "grad_norm": 196.99717712402344, "learning_rate": 0.00019458296584657934, "loss": 7.1951, "step": 92110 }, { "epoch": 11.085439229843562, "grad_norm": 124.04299926757812, "learning_rate": 0.00019458173067134787, "loss": 7.1782, "step": 92120 }, { "epoch": 11.086642599277978, "grad_norm": 51.40081024169922, "learning_rate": 0.00019458049535923328, "loss": 7.2055, "step": 92130 }, { "epoch": 11.087845968712395, "grad_norm": 96.16692352294922, "learning_rate": 0.00019457925991023736, "loss": 7.124, "step": 92140 }, { "epoch": 11.089049338146811, "grad_norm": 64.81087493896484, "learning_rate": 0.0001945780243243619, "loss": 7.1218, "step": 92150 }, { "epoch": 11.090252707581227, "grad_norm": 60.572288513183594, "learning_rate": 0.00019457678860160873, "loss": 7.2589, "step": 92160 }, { "epoch": 11.091456077015645, "grad_norm": 50.32078170776367, "learning_rate": 0.00019457555274197957, "loss": 7.1335, "step": 92170 }, { "epoch": 11.09265944645006, "grad_norm": 27.044208526611328, "learning_rate": 0.0001945743167454762, "loss": 7.219, "step": 92180 }, { "epoch": 11.093862815884476, "grad_norm": 138.8689422607422, "learning_rate": 0.00019457308061210049, "loss": 7.2221, "step": 92190 }, { "epoch": 11.095066185318894, "grad_norm": 25.8515682220459, "learning_rate": 0.00019457184434185416, "loss": 7.0875, "step": 92200 }, { "epoch": 11.09626955475331, "grad_norm": 50.15380859375, "learning_rate": 0.00019457060793473904, "loss": 7.2845, "step": 92210 }, { "epoch": 11.097472924187725, "grad_norm": 64.73393249511719, "learning_rate": 0.0001945693713907569, "loss": 7.1922, "step": 92220 }, { "epoch": 11.098676293622143, "grad_norm": 32.96710205078125, "learning_rate": 0.00019456813470990948, "loss": 7.1761, "step": 92230 }, { "epoch": 11.099879663056559, "grad_norm": 26.588193893432617, "learning_rate": 0.00019456689789219868, "loss": 7.2422, "step": 92240 }, { "epoch": 11.101083032490974, "grad_norm": 87.43901824951172, "learning_rate": 0.0001945656609376262, "loss": 7.243, "step": 92250 }, { "epoch": 11.102286401925392, "grad_norm": 43.802608489990234, "learning_rate": 0.0001945644238461939, "loss": 7.2159, "step": 92260 }, { "epoch": 11.103489771359808, "grad_norm": 79.26888275146484, "learning_rate": 0.0001945631866179035, "loss": 7.1176, "step": 92270 }, { "epoch": 11.104693140794224, "grad_norm": 61.184104919433594, "learning_rate": 0.00019456194925275683, "loss": 7.1025, "step": 92280 }, { "epoch": 11.10589651022864, "grad_norm": 90.06178283691406, "learning_rate": 0.00019456071175075564, "loss": 7.2312, "step": 92290 }, { "epoch": 11.107099879663057, "grad_norm": 78.83863830566406, "learning_rate": 0.0001945594741119018, "loss": 7.1369, "step": 92300 }, { "epoch": 11.108303249097473, "grad_norm": 249.10198974609375, "learning_rate": 0.00019455823633619706, "loss": 7.1614, "step": 92310 }, { "epoch": 11.109506618531888, "grad_norm": 81.26526641845703, "learning_rate": 0.00019455699842364319, "loss": 7.1176, "step": 92320 }, { "epoch": 11.110709987966306, "grad_norm": 56.093475341796875, "learning_rate": 0.00019455576037424196, "loss": 7.151, "step": 92330 }, { "epoch": 11.111913357400722, "grad_norm": 108.1319351196289, "learning_rate": 0.00019455452218799526, "loss": 7.1621, "step": 92340 }, { "epoch": 11.113116726835138, "grad_norm": 46.65833282470703, "learning_rate": 0.0001945532838649048, "loss": 7.1636, "step": 92350 }, { "epoch": 11.114320096269555, "grad_norm": 56.3868522644043, "learning_rate": 0.0001945520454049724, "loss": 7.1053, "step": 92360 }, { "epoch": 11.115523465703971, "grad_norm": 46.710933685302734, "learning_rate": 0.00019455080680819982, "loss": 7.1899, "step": 92370 }, { "epoch": 11.116726835138387, "grad_norm": 29.239248275756836, "learning_rate": 0.00019454956807458889, "loss": 7.1444, "step": 92380 }, { "epoch": 11.117930204572804, "grad_norm": 51.71601486206055, "learning_rate": 0.0001945483292041414, "loss": 7.1056, "step": 92390 }, { "epoch": 11.11913357400722, "grad_norm": 18.059215545654297, "learning_rate": 0.00019454709019685912, "loss": 7.1753, "step": 92400 }, { "epoch": 11.120336943441636, "grad_norm": 510.7772216796875, "learning_rate": 0.00019454585105274386, "loss": 7.2051, "step": 92410 }, { "epoch": 11.121540312876053, "grad_norm": 86.14214324951172, "learning_rate": 0.00019454461177179745, "loss": 7.2663, "step": 92420 }, { "epoch": 11.12274368231047, "grad_norm": 55.65878677368164, "learning_rate": 0.00019454337235402157, "loss": 7.2085, "step": 92430 }, { "epoch": 11.123947051744885, "grad_norm": 55.23828887939453, "learning_rate": 0.00019454213279941816, "loss": 7.1932, "step": 92440 }, { "epoch": 11.125150421179303, "grad_norm": 38.14577102661133, "learning_rate": 0.00019454089310798892, "loss": 7.2278, "step": 92450 }, { "epoch": 11.126353790613718, "grad_norm": 64.26361083984375, "learning_rate": 0.00019453965327973565, "loss": 7.0654, "step": 92460 }, { "epoch": 11.127557160048134, "grad_norm": 38.1277961730957, "learning_rate": 0.00019453841331466015, "loss": 7.2261, "step": 92470 }, { "epoch": 11.128760529482552, "grad_norm": 25.92961883544922, "learning_rate": 0.00019453717321276428, "loss": 7.19, "step": 92480 }, { "epoch": 11.129963898916968, "grad_norm": 30.9171085357666, "learning_rate": 0.00019453593297404974, "loss": 7.1756, "step": 92490 }, { "epoch": 11.131167268351383, "grad_norm": 23.84162712097168, "learning_rate": 0.00019453469259851836, "loss": 7.2164, "step": 92500 }, { "epoch": 11.132370637785801, "grad_norm": 64.93234252929688, "learning_rate": 0.00019453345208617195, "loss": 7.3184, "step": 92510 }, { "epoch": 11.133574007220217, "grad_norm": 39.70873260498047, "learning_rate": 0.00019453221143701227, "loss": 7.1402, "step": 92520 }, { "epoch": 11.134777376654633, "grad_norm": 53.39655303955078, "learning_rate": 0.0001945309706510412, "loss": 7.2207, "step": 92530 }, { "epoch": 11.13598074608905, "grad_norm": 41.56190872192383, "learning_rate": 0.00019452972972826044, "loss": 7.1307, "step": 92540 }, { "epoch": 11.137184115523466, "grad_norm": 57.11745071411133, "learning_rate": 0.00019452848866867182, "loss": 7.239, "step": 92550 }, { "epoch": 11.138387484957882, "grad_norm": 115.2613525390625, "learning_rate": 0.00019452724747227716, "loss": 7.1385, "step": 92560 }, { "epoch": 11.1395908543923, "grad_norm": 123.32693481445312, "learning_rate": 0.00019452600613907822, "loss": 7.1638, "step": 92570 }, { "epoch": 11.140794223826715, "grad_norm": 164.2078399658203, "learning_rate": 0.0001945247646690768, "loss": 7.2597, "step": 92580 }, { "epoch": 11.14199759326113, "grad_norm": 78.80862426757812, "learning_rate": 0.0001945235230622747, "loss": 7.2637, "step": 92590 }, { "epoch": 11.143200962695548, "grad_norm": 38.976924896240234, "learning_rate": 0.00019452228131867374, "loss": 7.2155, "step": 92600 }, { "epoch": 11.144404332129964, "grad_norm": 69.75531768798828, "learning_rate": 0.00019452103943827571, "loss": 7.2173, "step": 92610 }, { "epoch": 11.14560770156438, "grad_norm": 82.06482696533203, "learning_rate": 0.00019451979742108238, "loss": 7.1878, "step": 92620 }, { "epoch": 11.146811070998796, "grad_norm": 30.522260665893555, "learning_rate": 0.00019451855526709562, "loss": 7.1819, "step": 92630 }, { "epoch": 11.148014440433213, "grad_norm": 93.49755859375, "learning_rate": 0.0001945173129763171, "loss": 7.1299, "step": 92640 }, { "epoch": 11.14921780986763, "grad_norm": 29.52436637878418, "learning_rate": 0.00019451607054874875, "loss": 7.1796, "step": 92650 }, { "epoch": 11.150421179302045, "grad_norm": 143.0687713623047, "learning_rate": 0.0001945148279843923, "loss": 7.1951, "step": 92660 }, { "epoch": 11.151624548736462, "grad_norm": 59.0098762512207, "learning_rate": 0.00019451358528324955, "loss": 7.2471, "step": 92670 }, { "epoch": 11.152827918170878, "grad_norm": 39.90213394165039, "learning_rate": 0.00019451234244532227, "loss": 7.1435, "step": 92680 }, { "epoch": 11.154031287605294, "grad_norm": 80.75251007080078, "learning_rate": 0.00019451109947061235, "loss": 7.2116, "step": 92690 }, { "epoch": 11.155234657039712, "grad_norm": 37.435218811035156, "learning_rate": 0.00019450985635912152, "loss": 7.2105, "step": 92700 }, { "epoch": 11.156438026474127, "grad_norm": 73.86651611328125, "learning_rate": 0.00019450861311085161, "loss": 7.1904, "step": 92710 }, { "epoch": 11.157641395908543, "grad_norm": 50.3310546875, "learning_rate": 0.00019450736972580437, "loss": 7.0439, "step": 92720 }, { "epoch": 11.15884476534296, "grad_norm": 21.93097496032715, "learning_rate": 0.00019450612620398164, "loss": 7.1834, "step": 92730 }, { "epoch": 11.160048134777377, "grad_norm": 33.05723190307617, "learning_rate": 0.0001945048825453852, "loss": 7.178, "step": 92740 }, { "epoch": 11.161251504211792, "grad_norm": 25.512727737426758, "learning_rate": 0.0001945036387500169, "loss": 7.1151, "step": 92750 }, { "epoch": 11.16245487364621, "grad_norm": 40.864253997802734, "learning_rate": 0.0001945023948178785, "loss": 7.2379, "step": 92760 }, { "epoch": 11.163658243080626, "grad_norm": 25.75570297241211, "learning_rate": 0.00019450115074897176, "loss": 7.2645, "step": 92770 }, { "epoch": 11.164861612515042, "grad_norm": 13.679675102233887, "learning_rate": 0.00019449990654329854, "loss": 7.166, "step": 92780 }, { "epoch": 11.166064981949459, "grad_norm": 35.34743881225586, "learning_rate": 0.00019449866220086067, "loss": 7.1737, "step": 92790 }, { "epoch": 11.167268351383875, "grad_norm": 9.715303421020508, "learning_rate": 0.00019449741772165985, "loss": 7.282, "step": 92800 }, { "epoch": 11.16847172081829, "grad_norm": 18.321287155151367, "learning_rate": 0.00019449617310569796, "loss": 7.2068, "step": 92810 }, { "epoch": 11.169675090252708, "grad_norm": 20.16455841064453, "learning_rate": 0.0001944949283529768, "loss": 7.2858, "step": 92820 }, { "epoch": 11.170878459687124, "grad_norm": 12.81849479675293, "learning_rate": 0.0001944936834634981, "loss": 7.2756, "step": 92830 }, { "epoch": 11.17208182912154, "grad_norm": 51.767635345458984, "learning_rate": 0.00019449243843726373, "loss": 7.1577, "step": 92840 }, { "epoch": 11.173285198555957, "grad_norm": 31.77743911743164, "learning_rate": 0.00019449119327427547, "loss": 7.2155, "step": 92850 }, { "epoch": 11.174488567990373, "grad_norm": 26.9172420501709, "learning_rate": 0.00019448994797453513, "loss": 7.1221, "step": 92860 }, { "epoch": 11.175691937424789, "grad_norm": 129.29295349121094, "learning_rate": 0.0001944887025380445, "loss": 7.1493, "step": 92870 }, { "epoch": 11.176895306859207, "grad_norm": 38.593170166015625, "learning_rate": 0.0001944874569648054, "loss": 7.2299, "step": 92880 }, { "epoch": 11.178098676293622, "grad_norm": 65.74227905273438, "learning_rate": 0.00019448621125481963, "loss": 7.1211, "step": 92890 }, { "epoch": 11.179302045728038, "grad_norm": 39.74928283691406, "learning_rate": 0.00019448496540808895, "loss": 7.1957, "step": 92900 }, { "epoch": 11.180505415162456, "grad_norm": 29.60835838317871, "learning_rate": 0.00019448371942461522, "loss": 7.1436, "step": 92910 }, { "epoch": 11.181708784596871, "grad_norm": 28.809106826782227, "learning_rate": 0.0001944824733044002, "loss": 7.0376, "step": 92920 }, { "epoch": 11.182912154031287, "grad_norm": 33.554931640625, "learning_rate": 0.00019448122704744574, "loss": 7.2977, "step": 92930 }, { "epoch": 11.184115523465705, "grad_norm": 41.13526916503906, "learning_rate": 0.0001944799806537536, "loss": 7.1977, "step": 92940 }, { "epoch": 11.18531889290012, "grad_norm": 46.14151382446289, "learning_rate": 0.00019447873412332562, "loss": 7.1166, "step": 92950 }, { "epoch": 11.186522262334536, "grad_norm": 26.503915786743164, "learning_rate": 0.0001944774874561636, "loss": 7.2647, "step": 92960 }, { "epoch": 11.187725631768952, "grad_norm": 19.187028884887695, "learning_rate": 0.00019447624065226928, "loss": 7.1581, "step": 92970 }, { "epoch": 11.18892900120337, "grad_norm": 13.668183326721191, "learning_rate": 0.00019447499371164458, "loss": 7.2296, "step": 92980 }, { "epoch": 11.190132370637786, "grad_norm": 39.912803649902344, "learning_rate": 0.0001944737466342912, "loss": 7.1242, "step": 92990 }, { "epoch": 11.191335740072201, "grad_norm": 26.953168869018555, "learning_rate": 0.00019447249942021095, "loss": 7.1519, "step": 93000 }, { "epoch": 11.192539109506619, "grad_norm": 49.079139709472656, "learning_rate": 0.00019447125206940573, "loss": 7.1099, "step": 93010 }, { "epoch": 11.193742478941035, "grad_norm": 51.37486267089844, "learning_rate": 0.00019447000458187725, "loss": 7.0384, "step": 93020 }, { "epoch": 11.19494584837545, "grad_norm": 31.320575714111328, "learning_rate": 0.00019446875695762735, "loss": 7.2404, "step": 93030 }, { "epoch": 11.196149217809868, "grad_norm": 407.4568176269531, "learning_rate": 0.00019446750919665783, "loss": 7.1589, "step": 93040 }, { "epoch": 11.197352587244284, "grad_norm": 45.0624885559082, "learning_rate": 0.00019446626129897052, "loss": 7.2945, "step": 93050 }, { "epoch": 11.1985559566787, "grad_norm": 32.37380599975586, "learning_rate": 0.0001944650132645672, "loss": 7.1759, "step": 93060 }, { "epoch": 11.199759326113117, "grad_norm": 33.439395904541016, "learning_rate": 0.00019446376509344967, "loss": 7.2295, "step": 93070 }, { "epoch": 11.200962695547533, "grad_norm": 32.01109313964844, "learning_rate": 0.00019446251678561979, "loss": 7.1219, "step": 93080 }, { "epoch": 11.202166064981949, "grad_norm": 39.83729934692383, "learning_rate": 0.00019446126834107928, "loss": 7.1233, "step": 93090 }, { "epoch": 11.203369434416366, "grad_norm": 31.020111083984375, "learning_rate": 0.00019446001975983, "loss": 7.2814, "step": 93100 }, { "epoch": 11.204572803850782, "grad_norm": 74.25709533691406, "learning_rate": 0.00019445877104187377, "loss": 7.1696, "step": 93110 }, { "epoch": 11.205776173285198, "grad_norm": 50.41210174560547, "learning_rate": 0.00019445752218721237, "loss": 7.1742, "step": 93120 }, { "epoch": 11.206979542719615, "grad_norm": 13.669230461120605, "learning_rate": 0.00019445627319584761, "loss": 7.2549, "step": 93130 }, { "epoch": 11.208182912154031, "grad_norm": 56.16986846923828, "learning_rate": 0.0001944550240677813, "loss": 7.306, "step": 93140 }, { "epoch": 11.209386281588447, "grad_norm": 45.588157653808594, "learning_rate": 0.00019445377480301526, "loss": 6.9637, "step": 93150 }, { "epoch": 11.210589651022865, "grad_norm": 83.8860855102539, "learning_rate": 0.00019445252540155127, "loss": 7.1724, "step": 93160 }, { "epoch": 11.21179302045728, "grad_norm": 70.70026397705078, "learning_rate": 0.00019445127586339118, "loss": 7.1555, "step": 93170 }, { "epoch": 11.212996389891696, "grad_norm": 83.21636199951172, "learning_rate": 0.00019445002618853678, "loss": 7.0629, "step": 93180 }, { "epoch": 11.214199759326114, "grad_norm": 65.31185150146484, "learning_rate": 0.00019444877637698986, "loss": 7.2527, "step": 93190 }, { "epoch": 11.21540312876053, "grad_norm": 40.810035705566406, "learning_rate": 0.00019444752642875222, "loss": 7.1287, "step": 93200 }, { "epoch": 11.216606498194945, "grad_norm": 66.74649810791016, "learning_rate": 0.00019444627634382573, "loss": 7.2442, "step": 93210 }, { "epoch": 11.217809867629363, "grad_norm": 47.640777587890625, "learning_rate": 0.00019444502612221213, "loss": 7.1871, "step": 93220 }, { "epoch": 11.219013237063779, "grad_norm": 25.909526824951172, "learning_rate": 0.00019444377576391327, "loss": 7.0653, "step": 93230 }, { "epoch": 11.220216606498195, "grad_norm": 32.09661102294922, "learning_rate": 0.00019444252526893094, "loss": 7.0846, "step": 93240 }, { "epoch": 11.221419975932612, "grad_norm": 21.584688186645508, "learning_rate": 0.00019444127463726698, "loss": 7.123, "step": 93250 }, { "epoch": 11.222623345367028, "grad_norm": 42.743892669677734, "learning_rate": 0.00019444002386892316, "loss": 7.067, "step": 93260 }, { "epoch": 11.223826714801444, "grad_norm": 29.378700256347656, "learning_rate": 0.0001944387729639013, "loss": 7.1114, "step": 93270 }, { "epoch": 11.225030084235861, "grad_norm": 15.080081939697266, "learning_rate": 0.00019443752192220326, "loss": 7.174, "step": 93280 }, { "epoch": 11.226233453670277, "grad_norm": 21.681062698364258, "learning_rate": 0.00019443627074383076, "loss": 7.1851, "step": 93290 }, { "epoch": 11.227436823104693, "grad_norm": 19.04930877685547, "learning_rate": 0.0001944350194287857, "loss": 7.1523, "step": 93300 }, { "epoch": 11.22864019253911, "grad_norm": 52.01959228515625, "learning_rate": 0.00019443376797706984, "loss": 7.2094, "step": 93310 }, { "epoch": 11.229843561973526, "grad_norm": 57.18655014038086, "learning_rate": 0.000194432516388685, "loss": 7.1087, "step": 93320 }, { "epoch": 11.231046931407942, "grad_norm": 26.65498924255371, "learning_rate": 0.000194431264663633, "loss": 7.1064, "step": 93330 }, { "epoch": 11.232250300842358, "grad_norm": 79.77294158935547, "learning_rate": 0.00019443001280191564, "loss": 7.1444, "step": 93340 }, { "epoch": 11.233453670276775, "grad_norm": 26.291479110717773, "learning_rate": 0.00019442876080353472, "loss": 7.203, "step": 93350 }, { "epoch": 11.234657039711191, "grad_norm": 45.38020706176758, "learning_rate": 0.0001944275086684921, "loss": 7.1146, "step": 93360 }, { "epoch": 11.235860409145607, "grad_norm": 56.406150817871094, "learning_rate": 0.00019442625639678953, "loss": 7.1607, "step": 93370 }, { "epoch": 11.237063778580024, "grad_norm": 15.539941787719727, "learning_rate": 0.00019442500398842886, "loss": 7.1901, "step": 93380 }, { "epoch": 11.23826714801444, "grad_norm": 30.330307006835938, "learning_rate": 0.0001944237514434119, "loss": 7.1586, "step": 93390 }, { "epoch": 11.239470517448856, "grad_norm": 40.90454864501953, "learning_rate": 0.0001944224987617405, "loss": 7.2042, "step": 93400 }, { "epoch": 11.240673886883274, "grad_norm": 87.12039184570312, "learning_rate": 0.0001944212459434164, "loss": 7.1879, "step": 93410 }, { "epoch": 11.24187725631769, "grad_norm": 105.44353485107422, "learning_rate": 0.00019441999298844143, "loss": 7.0583, "step": 93420 }, { "epoch": 11.243080625752105, "grad_norm": 27.090097427368164, "learning_rate": 0.00019441873989681745, "loss": 7.1794, "step": 93430 }, { "epoch": 11.244283995186523, "grad_norm": 88.40074157714844, "learning_rate": 0.00019441748666854623, "loss": 7.1585, "step": 93440 }, { "epoch": 11.245487364620939, "grad_norm": 31.46632194519043, "learning_rate": 0.00019441623330362958, "loss": 7.1761, "step": 93450 }, { "epoch": 11.246690734055354, "grad_norm": 88.53533172607422, "learning_rate": 0.00019441497980206935, "loss": 7.1753, "step": 93460 }, { "epoch": 11.247894103489772, "grad_norm": 95.59032440185547, "learning_rate": 0.00019441372616386732, "loss": 7.1662, "step": 93470 }, { "epoch": 11.249097472924188, "grad_norm": 37.85075378417969, "learning_rate": 0.0001944124723890253, "loss": 7.2929, "step": 93480 }, { "epoch": 11.250300842358604, "grad_norm": 60.5465202331543, "learning_rate": 0.00019441121847754516, "loss": 7.2749, "step": 93490 }, { "epoch": 11.251504211793021, "grad_norm": 32.9837532043457, "learning_rate": 0.00019440996442942867, "loss": 7.0887, "step": 93500 }, { "epoch": 11.252707581227437, "grad_norm": 85.40311431884766, "learning_rate": 0.00019440871024467763, "loss": 7.0611, "step": 93510 }, { "epoch": 11.253910950661853, "grad_norm": 34.23643112182617, "learning_rate": 0.0001944074559232939, "loss": 7.1397, "step": 93520 }, { "epoch": 11.25511432009627, "grad_norm": 30.25879669189453, "learning_rate": 0.00019440620146527927, "loss": 7.1157, "step": 93530 }, { "epoch": 11.256317689530686, "grad_norm": 104.1557846069336, "learning_rate": 0.00019440494687063557, "loss": 7.0913, "step": 93540 }, { "epoch": 11.257521058965102, "grad_norm": 115.49676513671875, "learning_rate": 0.00019440369213936457, "loss": 7.141, "step": 93550 }, { "epoch": 11.25872442839952, "grad_norm": 47.08007049560547, "learning_rate": 0.00019440243727146815, "loss": 7.1111, "step": 93560 }, { "epoch": 11.259927797833935, "grad_norm": 102.65309143066406, "learning_rate": 0.0001944011822669481, "loss": 7.1391, "step": 93570 }, { "epoch": 11.261131167268351, "grad_norm": 121.46197509765625, "learning_rate": 0.0001943999271258062, "loss": 7.1205, "step": 93580 }, { "epoch": 11.262334536702769, "grad_norm": 747.8783569335938, "learning_rate": 0.0001943986718480443, "loss": 7.2145, "step": 93590 }, { "epoch": 11.263537906137184, "grad_norm": 416.4867858886719, "learning_rate": 0.00019439741643366426, "loss": 7.2525, "step": 93600 }, { "epoch": 11.2647412755716, "grad_norm": 273.11578369140625, "learning_rate": 0.0001943961608826678, "loss": 7.1684, "step": 93610 }, { "epoch": 11.265944645006018, "grad_norm": 452.9868469238281, "learning_rate": 0.00019439490519505682, "loss": 7.0907, "step": 93620 }, { "epoch": 11.267148014440433, "grad_norm": 214.64022827148438, "learning_rate": 0.0001943936493708331, "loss": 7.1935, "step": 93630 }, { "epoch": 11.26835138387485, "grad_norm": 475.05279541015625, "learning_rate": 0.00019439239340999847, "loss": 7.0981, "step": 93640 }, { "epoch": 11.269554753309265, "grad_norm": 269.6962890625, "learning_rate": 0.00019439113731255475, "loss": 7.2454, "step": 93650 }, { "epoch": 11.270758122743683, "grad_norm": 329.68585205078125, "learning_rate": 0.00019438988107850372, "loss": 7.1289, "step": 93660 }, { "epoch": 11.271961492178098, "grad_norm": 132.30197143554688, "learning_rate": 0.00019438862470784726, "loss": 7.211, "step": 93670 }, { "epoch": 11.273164861612514, "grad_norm": 134.85240173339844, "learning_rate": 0.00019438736820058715, "loss": 7.1663, "step": 93680 }, { "epoch": 11.274368231046932, "grad_norm": 176.08334350585938, "learning_rate": 0.00019438611155672518, "loss": 7.144, "step": 93690 }, { "epoch": 11.275571600481348, "grad_norm": 187.31689453125, "learning_rate": 0.00019438485477626323, "loss": 7.1344, "step": 93700 }, { "epoch": 11.276774969915763, "grad_norm": 184.18238830566406, "learning_rate": 0.0001943835978592031, "loss": 7.2313, "step": 93710 }, { "epoch": 11.277978339350181, "grad_norm": 214.82232666015625, "learning_rate": 0.00019438234080554657, "loss": 7.1117, "step": 93720 }, { "epoch": 11.279181708784597, "grad_norm": 186.7197265625, "learning_rate": 0.00019438108361529552, "loss": 7.0882, "step": 93730 }, { "epoch": 11.280385078219012, "grad_norm": 293.4224853515625, "learning_rate": 0.00019437982628845175, "loss": 7.26, "step": 93740 }, { "epoch": 11.28158844765343, "grad_norm": 258.84698486328125, "learning_rate": 0.00019437856882501702, "loss": 7.1738, "step": 93750 }, { "epoch": 11.282791817087846, "grad_norm": 309.6853332519531, "learning_rate": 0.00019437731122499326, "loss": 7.2102, "step": 93760 }, { "epoch": 11.283995186522262, "grad_norm": 645.7169189453125, "learning_rate": 0.0001943760534883822, "loss": 7.1867, "step": 93770 }, { "epoch": 11.28519855595668, "grad_norm": 587.5661010742188, "learning_rate": 0.0001943747956151857, "loss": 7.2186, "step": 93780 }, { "epoch": 11.286401925391095, "grad_norm": 586.8329467773438, "learning_rate": 0.00019437353760540555, "loss": 7.1943, "step": 93790 }, { "epoch": 11.28760529482551, "grad_norm": 709.7696533203125, "learning_rate": 0.00019437227945904358, "loss": 7.2064, "step": 93800 }, { "epoch": 11.288808664259928, "grad_norm": 522.105712890625, "learning_rate": 0.00019437102117610166, "loss": 7.2035, "step": 93810 }, { "epoch": 11.290012033694344, "grad_norm": 935.1792602539062, "learning_rate": 0.00019436976275658156, "loss": 7.2011, "step": 93820 }, { "epoch": 11.29121540312876, "grad_norm": 419.7920837402344, "learning_rate": 0.00019436850420048512, "loss": 7.1943, "step": 93830 }, { "epoch": 11.292418772563177, "grad_norm": 908.8665161132812, "learning_rate": 0.00019436724550781414, "loss": 7.2612, "step": 93840 }, { "epoch": 11.293622141997593, "grad_norm": 483.0259094238281, "learning_rate": 0.0001943659866785705, "loss": 7.3596, "step": 93850 }, { "epoch": 11.294825511432009, "grad_norm": 633.0369262695312, "learning_rate": 0.00019436472771275592, "loss": 7.2362, "step": 93860 }, { "epoch": 11.296028880866427, "grad_norm": 333.7626037597656, "learning_rate": 0.00019436346861037232, "loss": 7.2016, "step": 93870 }, { "epoch": 11.297232250300842, "grad_norm": 398.3389892578125, "learning_rate": 0.00019436220937142147, "loss": 7.2256, "step": 93880 }, { "epoch": 11.298435619735258, "grad_norm": 1115.1990966796875, "learning_rate": 0.0001943609499959052, "loss": 7.2736, "step": 93890 }, { "epoch": 11.299638989169676, "grad_norm": 346.9666748046875, "learning_rate": 0.00019435969048382532, "loss": 7.1469, "step": 93900 }, { "epoch": 11.300842358604092, "grad_norm": 2024.689453125, "learning_rate": 0.0001943584308351837, "loss": 7.0714, "step": 93910 }, { "epoch": 11.302045728038507, "grad_norm": 333.5049133300781, "learning_rate": 0.00019435717104998213, "loss": 7.0776, "step": 93920 }, { "epoch": 11.303249097472925, "grad_norm": 368.3515625, "learning_rate": 0.00019435591112822244, "loss": 7.0975, "step": 93930 }, { "epoch": 11.30445246690734, "grad_norm": 329.6982421875, "learning_rate": 0.00019435465106990645, "loss": 7.2583, "step": 93940 }, { "epoch": 11.305655836341757, "grad_norm": 252.72410583496094, "learning_rate": 0.00019435339087503598, "loss": 7.2367, "step": 93950 }, { "epoch": 11.306859205776174, "grad_norm": 304.77587890625, "learning_rate": 0.00019435213054361285, "loss": 7.1695, "step": 93960 }, { "epoch": 11.30806257521059, "grad_norm": 115.09303283691406, "learning_rate": 0.0001943508700756389, "loss": 7.2383, "step": 93970 }, { "epoch": 11.309265944645006, "grad_norm": 300.5790100097656, "learning_rate": 0.00019434960947111596, "loss": 7.1671, "step": 93980 }, { "epoch": 11.310469314079423, "grad_norm": 187.3275604248047, "learning_rate": 0.00019434834873004585, "loss": 7.1337, "step": 93990 }, { "epoch": 11.311672683513839, "grad_norm": 401.3988952636719, "learning_rate": 0.00019434708785243031, "loss": 7.1607, "step": 94000 }, { "epoch": 11.312876052948255, "grad_norm": 240.0775146484375, "learning_rate": 0.0001943458268382713, "loss": 7.1282, "step": 94010 }, { "epoch": 11.314079422382672, "grad_norm": 64.38675689697266, "learning_rate": 0.0001943445656875706, "loss": 7.1417, "step": 94020 }, { "epoch": 11.315282791817088, "grad_norm": 82.42398834228516, "learning_rate": 0.00019434330440033, "loss": 7.2038, "step": 94030 }, { "epoch": 11.316486161251504, "grad_norm": 88.60486602783203, "learning_rate": 0.00019434204297655137, "loss": 7.1203, "step": 94040 }, { "epoch": 11.31768953068592, "grad_norm": 91.94270324707031, "learning_rate": 0.00019434078141623647, "loss": 7.1417, "step": 94050 }, { "epoch": 11.318892900120337, "grad_norm": 465.58831787109375, "learning_rate": 0.00019433951971938718, "loss": 7.1706, "step": 94060 }, { "epoch": 11.320096269554753, "grad_norm": 281.7317810058594, "learning_rate": 0.00019433825788600534, "loss": 7.1066, "step": 94070 }, { "epoch": 11.321299638989169, "grad_norm": 342.6062316894531, "learning_rate": 0.0001943369959160927, "loss": 7.1243, "step": 94080 }, { "epoch": 11.322503008423586, "grad_norm": 2160.69140625, "learning_rate": 0.0001943357338096512, "loss": 7.1727, "step": 94090 }, { "epoch": 11.323706377858002, "grad_norm": 133.9144287109375, "learning_rate": 0.00019433447156668254, "loss": 7.1418, "step": 94100 }, { "epoch": 11.324909747292418, "grad_norm": 910.3568115234375, "learning_rate": 0.00019433320918718866, "loss": 7.2835, "step": 94110 }, { "epoch": 11.326113116726836, "grad_norm": 56116.01171875, "learning_rate": 0.00019433194667117128, "loss": 7.2748, "step": 94120 }, { "epoch": 11.327316486161251, "grad_norm": 2441.263671875, "learning_rate": 0.00019433068401863234, "loss": 7.2558, "step": 94130 }, { "epoch": 11.328519855595667, "grad_norm": 269.61370849609375, "learning_rate": 0.00019432942122957355, "loss": 7.2334, "step": 94140 }, { "epoch": 11.329723225030085, "grad_norm": 462.24847412109375, "learning_rate": 0.00019432815830399688, "loss": 7.1521, "step": 94150 }, { "epoch": 11.3309265944645, "grad_norm": 132.6151580810547, "learning_rate": 0.000194326895241904, "loss": 7.3506, "step": 94160 }, { "epoch": 11.332129963898916, "grad_norm": 253.41500854492188, "learning_rate": 0.00019432563204329684, "loss": 7.168, "step": 94170 }, { "epoch": 11.333333333333334, "grad_norm": 141.18609619140625, "learning_rate": 0.00019432436870817722, "loss": 7.3042, "step": 94180 }, { "epoch": 11.33453670276775, "grad_norm": 180.30938720703125, "learning_rate": 0.00019432310523654692, "loss": 7.092, "step": 94190 }, { "epoch": 11.335740072202166, "grad_norm": 721.1651611328125, "learning_rate": 0.00019432184162840782, "loss": 7.1546, "step": 94200 }, { "epoch": 11.336943441636583, "grad_norm": 254.870361328125, "learning_rate": 0.00019432057788376175, "loss": 7.1647, "step": 94210 }, { "epoch": 11.338146811070999, "grad_norm": 457.938232421875, "learning_rate": 0.0001943193140026105, "loss": 7.1007, "step": 94220 }, { "epoch": 11.339350180505415, "grad_norm": 87.42581939697266, "learning_rate": 0.0001943180499849559, "loss": 7.1604, "step": 94230 }, { "epoch": 11.340553549939832, "grad_norm": 91.93179321289062, "learning_rate": 0.0001943167858307998, "loss": 7.2931, "step": 94240 }, { "epoch": 11.341756919374248, "grad_norm": 85.75534057617188, "learning_rate": 0.00019431552154014402, "loss": 7.1948, "step": 94250 }, { "epoch": 11.342960288808664, "grad_norm": 154.4004364013672, "learning_rate": 0.0001943142571129904, "loss": 7.2252, "step": 94260 }, { "epoch": 11.344163658243081, "grad_norm": 84.31697082519531, "learning_rate": 0.00019431299254934076, "loss": 7.1564, "step": 94270 }, { "epoch": 11.345367027677497, "grad_norm": 127.80133819580078, "learning_rate": 0.00019431172784919695, "loss": 7.1801, "step": 94280 }, { "epoch": 11.346570397111913, "grad_norm": 113.86201477050781, "learning_rate": 0.00019431046301256077, "loss": 7.1007, "step": 94290 }, { "epoch": 11.34777376654633, "grad_norm": 289.74700927734375, "learning_rate": 0.0001943091980394341, "loss": 7.1574, "step": 94300 }, { "epoch": 11.348977135980746, "grad_norm": 104.62680053710938, "learning_rate": 0.0001943079329298187, "loss": 7.1413, "step": 94310 }, { "epoch": 11.350180505415162, "grad_norm": 136.5205841064453, "learning_rate": 0.0001943066676837164, "loss": 7.0677, "step": 94320 }, { "epoch": 11.35138387484958, "grad_norm": 109.92561340332031, "learning_rate": 0.00019430540230112913, "loss": 7.1886, "step": 94330 }, { "epoch": 11.352587244283995, "grad_norm": 150.95932006835938, "learning_rate": 0.00019430413678205865, "loss": 7.0972, "step": 94340 }, { "epoch": 11.353790613718411, "grad_norm": 191.3978271484375, "learning_rate": 0.00019430287112650677, "loss": 7.2288, "step": 94350 }, { "epoch": 11.354993983152827, "grad_norm": 7439.27978515625, "learning_rate": 0.0001943016053344754, "loss": 7.1949, "step": 94360 }, { "epoch": 11.356197352587245, "grad_norm": 14434.5625, "learning_rate": 0.0001943003394059663, "loss": 7.1926, "step": 94370 }, { "epoch": 11.35740072202166, "grad_norm": 7459.72607421875, "learning_rate": 0.00019429907334098134, "loss": 7.3478, "step": 94380 }, { "epoch": 11.358604091456076, "grad_norm": 47360.546875, "learning_rate": 0.00019429780713952233, "loss": 7.1775, "step": 94390 }, { "epoch": 11.359807460890494, "grad_norm": 6103.2099609375, "learning_rate": 0.0001942965408015911, "loss": 7.1571, "step": 94400 }, { "epoch": 11.36101083032491, "grad_norm": 5248.42041015625, "learning_rate": 0.0001942952743271895, "loss": 7.3057, "step": 94410 }, { "epoch": 11.362214199759325, "grad_norm": 8737.953125, "learning_rate": 0.00019429400771631936, "loss": 7.2353, "step": 94420 }, { "epoch": 11.363417569193743, "grad_norm": 15249.4755859375, "learning_rate": 0.0001942927409689825, "loss": 7.1192, "step": 94430 }, { "epoch": 11.364620938628159, "grad_norm": 24800.455078125, "learning_rate": 0.0001942914740851808, "loss": 7.1431, "step": 94440 }, { "epoch": 11.365824308062574, "grad_norm": 7497.79052734375, "learning_rate": 0.000194290207064916, "loss": 7.218, "step": 94450 }, { "epoch": 11.367027677496992, "grad_norm": 6490.017578125, "learning_rate": 0.00019428893990819005, "loss": 7.2677, "step": 94460 }, { "epoch": 11.368231046931408, "grad_norm": 12166.8515625, "learning_rate": 0.0001942876726150047, "loss": 7.2949, "step": 94470 }, { "epoch": 11.369434416365824, "grad_norm": 12129.544921875, "learning_rate": 0.0001942864051853618, "loss": 7.2637, "step": 94480 }, { "epoch": 11.370637785800241, "grad_norm": 10249.13671875, "learning_rate": 0.0001942851376192632, "loss": 7.3413, "step": 94490 }, { "epoch": 11.371841155234657, "grad_norm": 57884.32421875, "learning_rate": 0.00019428386991671072, "loss": 7.2272, "step": 94500 }, { "epoch": 11.373044524669073, "grad_norm": 16126.486328125, "learning_rate": 0.0001942826020777062, "loss": 7.4479, "step": 94510 }, { "epoch": 11.37424789410349, "grad_norm": 711.2689208984375, "learning_rate": 0.00019428133410225149, "loss": 7.1287, "step": 94520 }, { "epoch": 11.375451263537906, "grad_norm": 83732.4921875, "learning_rate": 0.00019428006599034843, "loss": 7.2605, "step": 94530 }, { "epoch": 11.376654632972322, "grad_norm": 235717.21875, "learning_rate": 0.00019427879774199883, "loss": 7.3628, "step": 94540 }, { "epoch": 11.37785800240674, "grad_norm": 5930.115234375, "learning_rate": 0.00019427752935720452, "loss": 7.3277, "step": 94550 }, { "epoch": 11.379061371841155, "grad_norm": 12438.8486328125, "learning_rate": 0.00019427626083596734, "loss": 7.2204, "step": 94560 }, { "epoch": 11.380264741275571, "grad_norm": 8256.85546875, "learning_rate": 0.00019427499217828913, "loss": 7.199, "step": 94570 }, { "epoch": 11.381468110709989, "grad_norm": 139658.09375, "learning_rate": 0.00019427372338417172, "loss": 7.182, "step": 94580 }, { "epoch": 11.382671480144404, "grad_norm": 771027.3125, "learning_rate": 0.00019427245445361702, "loss": 7.2765, "step": 94590 }, { "epoch": 11.38387484957882, "grad_norm": 4082.875, "learning_rate": 0.00019427118538662677, "loss": 7.2696, "step": 94600 }, { "epoch": 11.385078219013238, "grad_norm": 1302.040283203125, "learning_rate": 0.0001942699161832028, "loss": 7.3377, "step": 94610 }, { "epoch": 11.386281588447654, "grad_norm": 7915.7470703125, "learning_rate": 0.00019426864684334705, "loss": 7.1266, "step": 94620 }, { "epoch": 11.38748495788207, "grad_norm": 9391.2744140625, "learning_rate": 0.00019426737736706124, "loss": 7.1875, "step": 94630 }, { "epoch": 11.388688327316487, "grad_norm": 506.64593505859375, "learning_rate": 0.0001942661077543473, "loss": 7.1783, "step": 94640 }, { "epoch": 11.389891696750903, "grad_norm": 245.23056030273438, "learning_rate": 0.000194264838005207, "loss": 7.2504, "step": 94650 }, { "epoch": 11.391095066185319, "grad_norm": 223.31689453125, "learning_rate": 0.00019426356811964223, "loss": 7.2597, "step": 94660 }, { "epoch": 11.392298435619736, "grad_norm": 502.1722412109375, "learning_rate": 0.0001942622980976548, "loss": 7.2559, "step": 94670 }, { "epoch": 11.393501805054152, "grad_norm": 740.5872802734375, "learning_rate": 0.00019426102793924655, "loss": 7.2675, "step": 94680 }, { "epoch": 11.394705174488568, "grad_norm": 2435.196044921875, "learning_rate": 0.0001942597576444193, "loss": 7.1764, "step": 94690 }, { "epoch": 11.395908543922985, "grad_norm": 1092.825439453125, "learning_rate": 0.0001942584872131749, "loss": 7.4649, "step": 94700 }, { "epoch": 11.397111913357401, "grad_norm": 2141.05078125, "learning_rate": 0.00019425721664551526, "loss": 7.3611, "step": 94710 }, { "epoch": 11.398315282791817, "grad_norm": 468.9147644042969, "learning_rate": 0.0001942559459414421, "loss": 7.331, "step": 94720 }, { "epoch": 11.399518652226233, "grad_norm": 129.21363830566406, "learning_rate": 0.00019425467510095733, "loss": 7.2282, "step": 94730 }, { "epoch": 11.40072202166065, "grad_norm": 76.4381103515625, "learning_rate": 0.0001942534041240628, "loss": 7.2006, "step": 94740 }, { "epoch": 11.401925391095066, "grad_norm": 88.59284973144531, "learning_rate": 0.0001942521330107603, "loss": 7.1181, "step": 94750 }, { "epoch": 11.403128760529482, "grad_norm": 68.23381042480469, "learning_rate": 0.00019425086176105168, "loss": 7.1686, "step": 94760 }, { "epoch": 11.4043321299639, "grad_norm": 251.13429260253906, "learning_rate": 0.0001942495903749388, "loss": 7.0901, "step": 94770 }, { "epoch": 11.405535499398315, "grad_norm": 491.6206359863281, "learning_rate": 0.00019424831885242352, "loss": 7.2668, "step": 94780 }, { "epoch": 11.406738868832731, "grad_norm": 219.69728088378906, "learning_rate": 0.00019424704719350763, "loss": 7.239, "step": 94790 }, { "epoch": 11.407942238267148, "grad_norm": 483.8101806640625, "learning_rate": 0.000194245775398193, "loss": 7.1924, "step": 94800 }, { "epoch": 11.409145607701564, "grad_norm": 277.7901611328125, "learning_rate": 0.00019424450346648147, "loss": 7.2086, "step": 94810 }, { "epoch": 11.41034897713598, "grad_norm": 82.76998138427734, "learning_rate": 0.00019424323139837486, "loss": 7.2416, "step": 94820 }, { "epoch": 11.411552346570398, "grad_norm": 132.4668731689453, "learning_rate": 0.00019424195919387504, "loss": 7.2132, "step": 94830 }, { "epoch": 11.412755716004813, "grad_norm": 209.8936309814453, "learning_rate": 0.00019424068685298384, "loss": 7.1452, "step": 94840 }, { "epoch": 11.41395908543923, "grad_norm": 88.13241577148438, "learning_rate": 0.0001942394143757031, "loss": 7.1729, "step": 94850 }, { "epoch": 11.415162454873647, "grad_norm": 445.6883850097656, "learning_rate": 0.00019423814176203465, "loss": 7.1684, "step": 94860 }, { "epoch": 11.416365824308063, "grad_norm": 278.3733215332031, "learning_rate": 0.00019423686901198032, "loss": 7.0922, "step": 94870 }, { "epoch": 11.417569193742478, "grad_norm": 369.18939208984375, "learning_rate": 0.000194235596125542, "loss": 7.2681, "step": 94880 }, { "epoch": 11.418772563176896, "grad_norm": 1085.650634765625, "learning_rate": 0.00019423432310272152, "loss": 7.1861, "step": 94890 }, { "epoch": 11.419975932611312, "grad_norm": 620.5313110351562, "learning_rate": 0.00019423304994352068, "loss": 7.2098, "step": 94900 }, { "epoch": 11.421179302045728, "grad_norm": 240.4861297607422, "learning_rate": 0.00019423177664794138, "loss": 7.2149, "step": 94910 }, { "epoch": 11.422382671480145, "grad_norm": 105.56778717041016, "learning_rate": 0.00019423050321598542, "loss": 7.1391, "step": 94920 }, { "epoch": 11.42358604091456, "grad_norm": 49.94228744506836, "learning_rate": 0.00019422922964765465, "loss": 7.2401, "step": 94930 }, { "epoch": 11.424789410348977, "grad_norm": 541.0204467773438, "learning_rate": 0.0001942279559429509, "loss": 7.2546, "step": 94940 }, { "epoch": 11.425992779783394, "grad_norm": 1016.9796752929688, "learning_rate": 0.0001942266821018761, "loss": 7.2312, "step": 94950 }, { "epoch": 11.42719614921781, "grad_norm": 1361.0908203125, "learning_rate": 0.00019422540812443197, "loss": 7.2369, "step": 94960 }, { "epoch": 11.428399518652226, "grad_norm": 2959.397216796875, "learning_rate": 0.00019422413401062042, "loss": 7.1497, "step": 94970 }, { "epoch": 11.429602888086643, "grad_norm": 896.3115844726562, "learning_rate": 0.0001942228597604433, "loss": 7.259, "step": 94980 }, { "epoch": 11.43080625752106, "grad_norm": 622.7590942382812, "learning_rate": 0.00019422158537390244, "loss": 7.2424, "step": 94990 }, { "epoch": 11.432009626955475, "grad_norm": 891.4320678710938, "learning_rate": 0.00019422031085099967, "loss": 7.1982, "step": 95000 }, { "epoch": 11.433212996389893, "grad_norm": 184.54701232910156, "learning_rate": 0.00019421903619173687, "loss": 7.1738, "step": 95010 }, { "epoch": 11.434416365824308, "grad_norm": 208.2530975341797, "learning_rate": 0.00019421776139611583, "loss": 7.307, "step": 95020 }, { "epoch": 11.435619735258724, "grad_norm": 240.05751037597656, "learning_rate": 0.00019421648646413843, "loss": 7.1369, "step": 95030 }, { "epoch": 11.43682310469314, "grad_norm": 969.1519775390625, "learning_rate": 0.00019421521139580654, "loss": 7.2613, "step": 95040 }, { "epoch": 11.438026474127557, "grad_norm": 282.9772033691406, "learning_rate": 0.00019421393619112199, "loss": 7.227, "step": 95050 }, { "epoch": 11.439229843561973, "grad_norm": 157.27987670898438, "learning_rate": 0.00019421266085008657, "loss": 7.2907, "step": 95060 }, { "epoch": 11.440433212996389, "grad_norm": 596.8047485351562, "learning_rate": 0.0001942113853727022, "loss": 7.1922, "step": 95070 }, { "epoch": 11.441636582430807, "grad_norm": 307.0189208984375, "learning_rate": 0.00019421010975897068, "loss": 7.2553, "step": 95080 }, { "epoch": 11.442839951865222, "grad_norm": 469.5777893066406, "learning_rate": 0.00019420883400889384, "loss": 7.2223, "step": 95090 }, { "epoch": 11.444043321299638, "grad_norm": 287.67572021484375, "learning_rate": 0.0001942075581224736, "loss": 7.1608, "step": 95100 }, { "epoch": 11.445246690734056, "grad_norm": 19147.453125, "learning_rate": 0.00019420628209971177, "loss": 7.1531, "step": 95110 }, { "epoch": 11.446450060168472, "grad_norm": 323.05120849609375, "learning_rate": 0.00019420500594061017, "loss": 7.1275, "step": 95120 }, { "epoch": 11.447653429602887, "grad_norm": 121.5462646484375, "learning_rate": 0.0001942037296451707, "loss": 7.2484, "step": 95130 }, { "epoch": 11.448856799037305, "grad_norm": 149.90122985839844, "learning_rate": 0.00019420245321339513, "loss": 7.1881, "step": 95140 }, { "epoch": 11.45006016847172, "grad_norm": 189.19300842285156, "learning_rate": 0.00019420117664528536, "loss": 7.1153, "step": 95150 }, { "epoch": 11.451263537906136, "grad_norm": 119.75334930419922, "learning_rate": 0.00019419989994084324, "loss": 7.1951, "step": 95160 }, { "epoch": 11.452466907340554, "grad_norm": 118.05400085449219, "learning_rate": 0.00019419862310007062, "loss": 7.1437, "step": 95170 }, { "epoch": 11.45367027677497, "grad_norm": 37.96413040161133, "learning_rate": 0.00019419734612296933, "loss": 7.0621, "step": 95180 }, { "epoch": 11.454873646209386, "grad_norm": 267.73529052734375, "learning_rate": 0.00019419606900954124, "loss": 7.1646, "step": 95190 }, { "epoch": 11.456077015643803, "grad_norm": 177.4149169921875, "learning_rate": 0.00019419479175978818, "loss": 7.1765, "step": 95200 }, { "epoch": 11.457280385078219, "grad_norm": 141.97894287109375, "learning_rate": 0.00019419351437371195, "loss": 7.124, "step": 95210 }, { "epoch": 11.458483754512635, "grad_norm": 296.4188232421875, "learning_rate": 0.00019419223685131448, "loss": 7.1648, "step": 95220 }, { "epoch": 11.459687123947052, "grad_norm": 195.08131408691406, "learning_rate": 0.00019419095919259756, "loss": 7.1814, "step": 95230 }, { "epoch": 11.460890493381468, "grad_norm": 455.2322082519531, "learning_rate": 0.00019418968139756312, "loss": 7.2621, "step": 95240 }, { "epoch": 11.462093862815884, "grad_norm": 163.94717407226562, "learning_rate": 0.00019418840346621295, "loss": 7.2217, "step": 95250 }, { "epoch": 11.463297232250302, "grad_norm": 97.32848358154297, "learning_rate": 0.00019418712539854887, "loss": 7.2922, "step": 95260 }, { "epoch": 11.464500601684717, "grad_norm": 183.0526580810547, "learning_rate": 0.00019418584719457278, "loss": 7.1507, "step": 95270 }, { "epoch": 11.465703971119133, "grad_norm": 32.27583312988281, "learning_rate": 0.00019418456885428653, "loss": 7.1735, "step": 95280 }, { "epoch": 11.46690734055355, "grad_norm": 98.2454605102539, "learning_rate": 0.00019418329037769195, "loss": 7.1715, "step": 95290 }, { "epoch": 11.468110709987966, "grad_norm": 139.91546630859375, "learning_rate": 0.00019418201176479087, "loss": 7.0475, "step": 95300 }, { "epoch": 11.469314079422382, "grad_norm": 183.52671813964844, "learning_rate": 0.0001941807330155852, "loss": 7.2947, "step": 95310 }, { "epoch": 11.4705174488568, "grad_norm": 275.2720947265625, "learning_rate": 0.00019417945413007675, "loss": 7.1885, "step": 95320 }, { "epoch": 11.471720818291216, "grad_norm": 218.15499877929688, "learning_rate": 0.00019417817510826736, "loss": 7.2804, "step": 95330 }, { "epoch": 11.472924187725631, "grad_norm": 71.13890838623047, "learning_rate": 0.0001941768959501589, "loss": 7.2736, "step": 95340 }, { "epoch": 11.474127557160049, "grad_norm": 68.12261962890625, "learning_rate": 0.00019417561665575325, "loss": 7.2383, "step": 95350 }, { "epoch": 11.475330926594465, "grad_norm": 101.7570571899414, "learning_rate": 0.0001941743372250522, "loss": 7.2648, "step": 95360 }, { "epoch": 11.47653429602888, "grad_norm": 293.007080078125, "learning_rate": 0.00019417305765805765, "loss": 7.1892, "step": 95370 }, { "epoch": 11.477737665463298, "grad_norm": 74.36763000488281, "learning_rate": 0.0001941717779547714, "loss": 7.145, "step": 95380 }, { "epoch": 11.478941034897714, "grad_norm": 29.627981185913086, "learning_rate": 0.0001941704981151954, "loss": 7.2334, "step": 95390 }, { "epoch": 11.48014440433213, "grad_norm": 37.16349411010742, "learning_rate": 0.0001941692181393314, "loss": 7.2144, "step": 95400 }, { "epoch": 11.481347773766545, "grad_norm": 96.40779113769531, "learning_rate": 0.0001941679380271813, "loss": 7.3325, "step": 95410 }, { "epoch": 11.482551143200963, "grad_norm": 22.974285125732422, "learning_rate": 0.00019416665777874693, "loss": 7.3191, "step": 95420 }, { "epoch": 11.483754512635379, "grad_norm": 50.43960189819336, "learning_rate": 0.00019416537739403017, "loss": 7.23, "step": 95430 }, { "epoch": 11.484957882069795, "grad_norm": 23.416912078857422, "learning_rate": 0.00019416409687303285, "loss": 7.2831, "step": 95440 }, { "epoch": 11.486161251504212, "grad_norm": 207.31910705566406, "learning_rate": 0.00019416281621575685, "loss": 7.1682, "step": 95450 }, { "epoch": 11.487364620938628, "grad_norm": 25.098697662353516, "learning_rate": 0.000194161535422204, "loss": 7.3127, "step": 95460 }, { "epoch": 11.488567990373044, "grad_norm": 333.8545227050781, "learning_rate": 0.00019416025449237617, "loss": 7.1712, "step": 95470 }, { "epoch": 11.489771359807461, "grad_norm": 70.67992401123047, "learning_rate": 0.0001941589734262752, "loss": 7.1905, "step": 95480 }, { "epoch": 11.490974729241877, "grad_norm": 44.74904251098633, "learning_rate": 0.00019415769222390293, "loss": 7.2526, "step": 95490 }, { "epoch": 11.492178098676293, "grad_norm": 164.74118041992188, "learning_rate": 0.00019415641088526124, "loss": 7.201, "step": 95500 }, { "epoch": 11.49338146811071, "grad_norm": 30.1320743560791, "learning_rate": 0.00019415512941035198, "loss": 7.2024, "step": 95510 }, { "epoch": 11.494584837545126, "grad_norm": 2078.109375, "learning_rate": 0.00019415384779917702, "loss": 7.2331, "step": 95520 }, { "epoch": 11.495788206979542, "grad_norm": 5247.37939453125, "learning_rate": 0.00019415256605173818, "loss": 7.2696, "step": 95530 }, { "epoch": 11.49699157641396, "grad_norm": 19246.5703125, "learning_rate": 0.00019415128416803735, "loss": 7.3531, "step": 95540 }, { "epoch": 11.498194945848375, "grad_norm": 6615.9482421875, "learning_rate": 0.00019415000214807632, "loss": 7.3087, "step": 95550 }, { "epoch": 11.499398315282791, "grad_norm": 344.5341491699219, "learning_rate": 0.000194148719991857, "loss": 7.2626, "step": 95560 }, { "epoch": 11.500601684717209, "grad_norm": 391.76593017578125, "learning_rate": 0.00019414743769938128, "loss": 7.1514, "step": 95570 }, { "epoch": 11.501805054151625, "grad_norm": 4885.11474609375, "learning_rate": 0.00019414615527065093, "loss": 7.3166, "step": 95580 }, { "epoch": 11.50300842358604, "grad_norm": 3420.669921875, "learning_rate": 0.0001941448727056679, "loss": 7.259, "step": 95590 }, { "epoch": 11.504211793020458, "grad_norm": 4549.19873046875, "learning_rate": 0.00019414359000443395, "loss": 7.2412, "step": 95600 }, { "epoch": 11.505415162454874, "grad_norm": 162.44728088378906, "learning_rate": 0.000194142307166951, "loss": 7.2549, "step": 95610 }, { "epoch": 11.50661853188929, "grad_norm": 646.1400146484375, "learning_rate": 0.00019414102419322088, "loss": 7.3019, "step": 95620 }, { "epoch": 11.507821901323707, "grad_norm": 660.2918701171875, "learning_rate": 0.0001941397410832455, "loss": 7.2229, "step": 95630 }, { "epoch": 11.509025270758123, "grad_norm": 108.86157989501953, "learning_rate": 0.00019413845783702662, "loss": 7.1886, "step": 95640 }, { "epoch": 11.510228640192539, "grad_norm": 685.5956420898438, "learning_rate": 0.00019413717445456615, "loss": 7.212, "step": 95650 }, { "epoch": 11.511432009626956, "grad_norm": 1988.319580078125, "learning_rate": 0.00019413589093586597, "loss": 7.2999, "step": 95660 }, { "epoch": 11.512635379061372, "grad_norm": 2318.384033203125, "learning_rate": 0.0001941346072809279, "loss": 7.1294, "step": 95670 }, { "epoch": 11.513838748495788, "grad_norm": 308.8346252441406, "learning_rate": 0.00019413332348975382, "loss": 7.2871, "step": 95680 }, { "epoch": 11.515042117930205, "grad_norm": 178.97772216796875, "learning_rate": 0.00019413203956234558, "loss": 7.204, "step": 95690 }, { "epoch": 11.516245487364621, "grad_norm": 5297.19921875, "learning_rate": 0.00019413075549870503, "loss": 7.2808, "step": 95700 }, { "epoch": 11.517448856799037, "grad_norm": 1700379.125, "learning_rate": 0.00019412947129883405, "loss": 7.282, "step": 95710 }, { "epoch": 11.518652226233453, "grad_norm": 7690.8837890625, "learning_rate": 0.0001941281869627345, "loss": 7.2673, "step": 95720 }, { "epoch": 11.51985559566787, "grad_norm": 3003.178955078125, "learning_rate": 0.00019412690249040818, "loss": 7.2425, "step": 95730 }, { "epoch": 11.521058965102286, "grad_norm": 509.0173034667969, "learning_rate": 0.00019412561788185702, "loss": 7.2437, "step": 95740 }, { "epoch": 11.522262334536702, "grad_norm": 385.6251220703125, "learning_rate": 0.00019412433313708283, "loss": 7.3698, "step": 95750 }, { "epoch": 11.52346570397112, "grad_norm": 5005.37109375, "learning_rate": 0.0001941230482560875, "loss": 7.4122, "step": 95760 }, { "epoch": 11.524669073405535, "grad_norm": 13992.76171875, "learning_rate": 0.0001941217632388729, "loss": 7.4523, "step": 95770 }, { "epoch": 11.525872442839951, "grad_norm": 617.8475341796875, "learning_rate": 0.00019412047808544088, "loss": 7.4705, "step": 95780 }, { "epoch": 11.527075812274369, "grad_norm": 1462.038818359375, "learning_rate": 0.00019411919279579328, "loss": 7.4208, "step": 95790 }, { "epoch": 11.528279181708784, "grad_norm": 3715.39892578125, "learning_rate": 0.00019411790736993193, "loss": 7.3372, "step": 95800 }, { "epoch": 11.5294825511432, "grad_norm": 2403.631103515625, "learning_rate": 0.00019411662180785877, "loss": 7.3452, "step": 95810 }, { "epoch": 11.530685920577618, "grad_norm": 791.82958984375, "learning_rate": 0.00019411533610957563, "loss": 7.4349, "step": 95820 }, { "epoch": 11.531889290012034, "grad_norm": 471.4733581542969, "learning_rate": 0.00019411405027508434, "loss": 7.5038, "step": 95830 }, { "epoch": 11.53309265944645, "grad_norm": 306.58074951171875, "learning_rate": 0.0001941127643043868, "loss": 7.4147, "step": 95840 }, { "epoch": 11.534296028880867, "grad_norm": 707.7532958984375, "learning_rate": 0.00019411147819748482, "loss": 7.4396, "step": 95850 }, { "epoch": 11.535499398315283, "grad_norm": 649.7509765625, "learning_rate": 0.0001941101919543803, "loss": 7.3674, "step": 95860 }, { "epoch": 11.536702767749698, "grad_norm": 121.48490142822266, "learning_rate": 0.00019410890557507515, "loss": 7.3876, "step": 95870 }, { "epoch": 11.537906137184116, "grad_norm": 56.903682708740234, "learning_rate": 0.00019410761905957113, "loss": 7.4034, "step": 95880 }, { "epoch": 11.539109506618532, "grad_norm": 768.2057495117188, "learning_rate": 0.00019410633240787018, "loss": 7.4109, "step": 95890 }, { "epoch": 11.540312876052948, "grad_norm": 297.0628356933594, "learning_rate": 0.0001941050456199741, "loss": 7.3977, "step": 95900 }, { "epoch": 11.541516245487365, "grad_norm": 496.7565002441406, "learning_rate": 0.0001941037586958848, "loss": 7.5049, "step": 95910 }, { "epoch": 11.542719614921781, "grad_norm": 729.7393188476562, "learning_rate": 0.00019410247163560414, "loss": 7.3943, "step": 95920 }, { "epoch": 11.543922984356197, "grad_norm": 62.40972900390625, "learning_rate": 0.00019410118443913396, "loss": 7.4299, "step": 95930 }, { "epoch": 11.545126353790614, "grad_norm": 454.97454833984375, "learning_rate": 0.00019409989710647617, "loss": 7.4787, "step": 95940 }, { "epoch": 11.54632972322503, "grad_norm": 209.5741729736328, "learning_rate": 0.00019409860963763253, "loss": 7.4322, "step": 95950 }, { "epoch": 11.547533092659446, "grad_norm": 968.0933227539062, "learning_rate": 0.000194097322032605, "loss": 7.4145, "step": 95960 }, { "epoch": 11.548736462093864, "grad_norm": 4711.44921875, "learning_rate": 0.00019409603429139543, "loss": 7.9339, "step": 95970 }, { "epoch": 11.54993983152828, "grad_norm": 4932.09619140625, "learning_rate": 0.00019409474641400565, "loss": 8.1511, "step": 95980 }, { "epoch": 11.551143200962695, "grad_norm": 1032.34716796875, "learning_rate": 0.00019409345840043754, "loss": 8.1253, "step": 95990 }, { "epoch": 11.552346570397113, "grad_norm": 1271.8768310546875, "learning_rate": 0.00019409217025069295, "loss": 7.858, "step": 96000 }, { "epoch": 11.553549939831528, "grad_norm": 2195.34326171875, "learning_rate": 0.0001940908819647738, "loss": 7.6339, "step": 96010 }, { "epoch": 11.554753309265944, "grad_norm": 874.3984375, "learning_rate": 0.00019408959354268187, "loss": 7.4137, "step": 96020 }, { "epoch": 11.555956678700362, "grad_norm": 886.2830810546875, "learning_rate": 0.0001940883049844191, "loss": 7.4869, "step": 96030 }, { "epoch": 11.557160048134778, "grad_norm": 1172.5394287109375, "learning_rate": 0.0001940870162899873, "loss": 7.4175, "step": 96040 }, { "epoch": 11.558363417569193, "grad_norm": 815.3179321289062, "learning_rate": 0.00019408572745938836, "loss": 7.4108, "step": 96050 }, { "epoch": 11.559566787003611, "grad_norm": 748.6189575195312, "learning_rate": 0.00019408443849262418, "loss": 7.2286, "step": 96060 }, { "epoch": 11.560770156438027, "grad_norm": 2582.579833984375, "learning_rate": 0.00019408314938969653, "loss": 7.3017, "step": 96070 }, { "epoch": 11.561973525872443, "grad_norm": 1788.675537109375, "learning_rate": 0.0001940818601506074, "loss": 7.3826, "step": 96080 }, { "epoch": 11.56317689530686, "grad_norm": 710.4325561523438, "learning_rate": 0.00019408057077535852, "loss": 7.3341, "step": 96090 }, { "epoch": 11.564380264741276, "grad_norm": 6406.4833984375, "learning_rate": 0.0001940792812639519, "loss": 7.3452, "step": 96100 }, { "epoch": 11.565583634175692, "grad_norm": 539.2433471679688, "learning_rate": 0.00019407799161638928, "loss": 7.3412, "step": 96110 }, { "epoch": 11.566787003610107, "grad_norm": 505.794189453125, "learning_rate": 0.0001940767018326726, "loss": 7.3656, "step": 96120 }, { "epoch": 11.567990373044525, "grad_norm": 2025.591064453125, "learning_rate": 0.00019407541191280369, "loss": 7.3555, "step": 96130 }, { "epoch": 11.56919374247894, "grad_norm": 1910.954833984375, "learning_rate": 0.00019407412185678443, "loss": 7.3506, "step": 96140 }, { "epoch": 11.570397111913357, "grad_norm": 3230.227294921875, "learning_rate": 0.00019407283166461673, "loss": 7.3829, "step": 96150 }, { "epoch": 11.571600481347774, "grad_norm": 5494.11181640625, "learning_rate": 0.0001940715413363024, "loss": 7.4108, "step": 96160 }, { "epoch": 11.57280385078219, "grad_norm": 692.5964965820312, "learning_rate": 0.00019407025087184328, "loss": 7.4228, "step": 96170 }, { "epoch": 11.574007220216606, "grad_norm": 102.73624420166016, "learning_rate": 0.00019406896027124133, "loss": 7.3413, "step": 96180 }, { "epoch": 11.575210589651023, "grad_norm": 40.074222564697266, "learning_rate": 0.00019406766953449834, "loss": 7.3512, "step": 96190 }, { "epoch": 11.57641395908544, "grad_norm": 61.32003402709961, "learning_rate": 0.00019406637866161623, "loss": 7.2595, "step": 96200 }, { "epoch": 11.577617328519855, "grad_norm": 109.75365447998047, "learning_rate": 0.00019406508765259684, "loss": 7.4006, "step": 96210 }, { "epoch": 11.578820697954272, "grad_norm": 111.11814880371094, "learning_rate": 0.00019406379650744205, "loss": 7.3753, "step": 96220 }, { "epoch": 11.580024067388688, "grad_norm": 67.07972717285156, "learning_rate": 0.0001940625052261537, "loss": 7.2511, "step": 96230 }, { "epoch": 11.581227436823104, "grad_norm": 214.6697235107422, "learning_rate": 0.0001940612138087337, "loss": 7.3164, "step": 96240 }, { "epoch": 11.582430806257522, "grad_norm": 71.9437484741211, "learning_rate": 0.0001940599222551839, "loss": 7.3436, "step": 96250 }, { "epoch": 11.583634175691937, "grad_norm": 48.210391998291016, "learning_rate": 0.00019405863056550616, "loss": 7.3452, "step": 96260 }, { "epoch": 11.584837545126353, "grad_norm": 179.23489379882812, "learning_rate": 0.00019405733873970236, "loss": 7.2433, "step": 96270 }, { "epoch": 11.58604091456077, "grad_norm": 57.60596466064453, "learning_rate": 0.00019405604677777438, "loss": 7.2694, "step": 96280 }, { "epoch": 11.587244283995187, "grad_norm": 72.04112243652344, "learning_rate": 0.00019405475467972408, "loss": 7.2567, "step": 96290 }, { "epoch": 11.588447653429602, "grad_norm": 80.52898406982422, "learning_rate": 0.0001940534624455533, "loss": 7.2869, "step": 96300 }, { "epoch": 11.58965102286402, "grad_norm": 179.2220001220703, "learning_rate": 0.000194052170075264, "loss": 7.295, "step": 96310 }, { "epoch": 11.590854392298436, "grad_norm": 469.15692138671875, "learning_rate": 0.00019405087756885791, "loss": 7.2984, "step": 96320 }, { "epoch": 11.592057761732852, "grad_norm": 278.82354736328125, "learning_rate": 0.00019404958492633703, "loss": 7.2517, "step": 96330 }, { "epoch": 11.593261131167269, "grad_norm": 615.7554321289062, "learning_rate": 0.0001940482921477032, "loss": 7.3759, "step": 96340 }, { "epoch": 11.594464500601685, "grad_norm": 141.1912841796875, "learning_rate": 0.00019404699923295825, "loss": 7.3484, "step": 96350 }, { "epoch": 11.5956678700361, "grad_norm": 206.26466369628906, "learning_rate": 0.00019404570618210403, "loss": 7.2216, "step": 96360 }, { "epoch": 11.596871239470518, "grad_norm": 51.306732177734375, "learning_rate": 0.00019404441299514252, "loss": 7.225, "step": 96370 }, { "epoch": 11.598074608904934, "grad_norm": 50.382686614990234, "learning_rate": 0.00019404311967207547, "loss": 7.2798, "step": 96380 }, { "epoch": 11.59927797833935, "grad_norm": 76.64035034179688, "learning_rate": 0.00019404182621290484, "loss": 7.3731, "step": 96390 }, { "epoch": 11.600481347773766, "grad_norm": 70.0130615234375, "learning_rate": 0.00019404053261763243, "loss": 7.3046, "step": 96400 }, { "epoch": 11.601684717208183, "grad_norm": 44.97519302368164, "learning_rate": 0.00019403923888626018, "loss": 7.2776, "step": 96410 }, { "epoch": 11.602888086642599, "grad_norm": 79.91014862060547, "learning_rate": 0.00019403794501878996, "loss": 7.2753, "step": 96420 }, { "epoch": 11.604091456077015, "grad_norm": 88.37982177734375, "learning_rate": 0.00019403665101522358, "loss": 7.2956, "step": 96430 }, { "epoch": 11.605294825511432, "grad_norm": 53.483333587646484, "learning_rate": 0.000194035356875563, "loss": 7.2445, "step": 96440 }, { "epoch": 11.606498194945848, "grad_norm": 46.7051887512207, "learning_rate": 0.00019403406259980996, "loss": 7.2775, "step": 96450 }, { "epoch": 11.607701564380264, "grad_norm": 167.72657775878906, "learning_rate": 0.0001940327681879665, "loss": 7.2743, "step": 96460 }, { "epoch": 11.608904933814681, "grad_norm": 63.070072174072266, "learning_rate": 0.00019403147364003434, "loss": 7.2868, "step": 96470 }, { "epoch": 11.610108303249097, "grad_norm": 104.51396179199219, "learning_rate": 0.00019403017895601546, "loss": 7.2208, "step": 96480 }, { "epoch": 11.611311672683513, "grad_norm": 199.52304077148438, "learning_rate": 0.00019402888413591168, "loss": 7.2697, "step": 96490 }, { "epoch": 11.61251504211793, "grad_norm": 88.67973327636719, "learning_rate": 0.00019402758917972488, "loss": 7.3066, "step": 96500 }, { "epoch": 11.613718411552346, "grad_norm": 62.45795440673828, "learning_rate": 0.00019402629408745697, "loss": 7.281, "step": 96510 }, { "epoch": 11.614921780986762, "grad_norm": 64.20278930664062, "learning_rate": 0.0001940249988591098, "loss": 7.3778, "step": 96520 }, { "epoch": 11.61612515042118, "grad_norm": 196.82647705078125, "learning_rate": 0.0001940237034946852, "loss": 7.2641, "step": 96530 }, { "epoch": 11.617328519855596, "grad_norm": 95.7018051147461, "learning_rate": 0.00019402240799418511, "loss": 7.2731, "step": 96540 }, { "epoch": 11.618531889290011, "grad_norm": 155.0595245361328, "learning_rate": 0.0001940211123576114, "loss": 7.1003, "step": 96550 }, { "epoch": 11.619735258724429, "grad_norm": 117.70222473144531, "learning_rate": 0.0001940198165849659, "loss": 7.1122, "step": 96560 }, { "epoch": 11.620938628158845, "grad_norm": 295.2709045410156, "learning_rate": 0.00019401852067625055, "loss": 7.2431, "step": 96570 }, { "epoch": 11.62214199759326, "grad_norm": 125.61164855957031, "learning_rate": 0.00019401722463146718, "loss": 7.1774, "step": 96580 }, { "epoch": 11.623345367027678, "grad_norm": 274.9051208496094, "learning_rate": 0.00019401592845061765, "loss": 7.181, "step": 96590 }, { "epoch": 11.624548736462094, "grad_norm": 170.54946899414062, "learning_rate": 0.00019401463213370386, "loss": 7.3159, "step": 96600 }, { "epoch": 11.62575210589651, "grad_norm": 1383.941162109375, "learning_rate": 0.0001940133356807277, "loss": 7.4157, "step": 96610 }, { "epoch": 11.626955475330927, "grad_norm": 312.02886962890625, "learning_rate": 0.00019401203909169104, "loss": 7.2948, "step": 96620 }, { "epoch": 11.628158844765343, "grad_norm": 293.3307800292969, "learning_rate": 0.00019401074236659573, "loss": 7.2833, "step": 96630 }, { "epoch": 11.629362214199759, "grad_norm": 329.22216796875, "learning_rate": 0.00019400944550544366, "loss": 7.1939, "step": 96640 }, { "epoch": 11.630565583634176, "grad_norm": 107.05670166015625, "learning_rate": 0.00019400814850823673, "loss": 7.426, "step": 96650 }, { "epoch": 11.631768953068592, "grad_norm": 210.57260131835938, "learning_rate": 0.0001940068513749768, "loss": 7.3313, "step": 96660 }, { "epoch": 11.632972322503008, "grad_norm": 202.15170288085938, "learning_rate": 0.00019400555410566575, "loss": 7.2733, "step": 96670 }, { "epoch": 11.634175691937426, "grad_norm": 318.3314208984375, "learning_rate": 0.00019400425670030544, "loss": 7.268, "step": 96680 }, { "epoch": 11.635379061371841, "grad_norm": 259.2880554199219, "learning_rate": 0.00019400295915889774, "loss": 7.1044, "step": 96690 }, { "epoch": 11.636582430806257, "grad_norm": 308.6136779785156, "learning_rate": 0.0001940016614814446, "loss": 7.3616, "step": 96700 }, { "epoch": 11.637785800240675, "grad_norm": 105.45771789550781, "learning_rate": 0.0001940003636679478, "loss": 7.3933, "step": 96710 }, { "epoch": 11.63898916967509, "grad_norm": 121.55425262451172, "learning_rate": 0.00019399906571840932, "loss": 7.3401, "step": 96720 }, { "epoch": 11.640192539109506, "grad_norm": 191.5036163330078, "learning_rate": 0.00019399776763283092, "loss": 7.2856, "step": 96730 }, { "epoch": 11.641395908543924, "grad_norm": 176.63304138183594, "learning_rate": 0.0001939964694112146, "loss": 7.1382, "step": 96740 }, { "epoch": 11.64259927797834, "grad_norm": 114.03582000732422, "learning_rate": 0.00019399517105356216, "loss": 7.1845, "step": 96750 }, { "epoch": 11.643802647412755, "grad_norm": 489.2210998535156, "learning_rate": 0.00019399387255987548, "loss": 7.3281, "step": 96760 }, { "epoch": 11.645006016847173, "grad_norm": 102.50401306152344, "learning_rate": 0.0001939925739301565, "loss": 7.1657, "step": 96770 }, { "epoch": 11.646209386281589, "grad_norm": 61.25798034667969, "learning_rate": 0.000193991275164407, "loss": 7.3006, "step": 96780 }, { "epoch": 11.647412755716005, "grad_norm": 84.86211395263672, "learning_rate": 0.00019398997626262897, "loss": 7.232, "step": 96790 }, { "epoch": 11.648616125150422, "grad_norm": 103.94781494140625, "learning_rate": 0.00019398867722482423, "loss": 7.2801, "step": 96800 }, { "epoch": 11.649819494584838, "grad_norm": 139.27731323242188, "learning_rate": 0.00019398737805099468, "loss": 7.1559, "step": 96810 }, { "epoch": 11.651022864019254, "grad_norm": 275.8272705078125, "learning_rate": 0.00019398607874114218, "loss": 7.2544, "step": 96820 }, { "epoch": 11.65222623345367, "grad_norm": 184.69407653808594, "learning_rate": 0.0001939847792952686, "loss": 7.2527, "step": 96830 }, { "epoch": 11.653429602888087, "grad_norm": 66.01103973388672, "learning_rate": 0.00019398347971337585, "loss": 7.2175, "step": 96840 }, { "epoch": 11.654632972322503, "grad_norm": 142.17613220214844, "learning_rate": 0.0001939821799954658, "loss": 7.247, "step": 96850 }, { "epoch": 11.655836341756919, "grad_norm": 215.30686950683594, "learning_rate": 0.00019398088014154036, "loss": 7.1864, "step": 96860 }, { "epoch": 11.657039711191336, "grad_norm": 69.89537811279297, "learning_rate": 0.00019397958015160137, "loss": 7.2055, "step": 96870 }, { "epoch": 11.658243080625752, "grad_norm": 200.43955993652344, "learning_rate": 0.0001939782800256507, "loss": 7.2436, "step": 96880 }, { "epoch": 11.659446450060168, "grad_norm": 146.6250762939453, "learning_rate": 0.00019397697976369026, "loss": 7.1778, "step": 96890 }, { "epoch": 11.660649819494585, "grad_norm": 66.77310943603516, "learning_rate": 0.00019397567936572198, "loss": 7.2166, "step": 96900 }, { "epoch": 11.661853188929001, "grad_norm": 341.8200378417969, "learning_rate": 0.00019397437883174763, "loss": 7.2666, "step": 96910 }, { "epoch": 11.663056558363417, "grad_norm": 851.1067504882812, "learning_rate": 0.00019397307816176917, "loss": 7.2638, "step": 96920 }, { "epoch": 11.664259927797834, "grad_norm": 226.024658203125, "learning_rate": 0.00019397177735578847, "loss": 7.3104, "step": 96930 }, { "epoch": 11.66546329723225, "grad_norm": 277.4878845214844, "learning_rate": 0.0001939704764138074, "loss": 7.2567, "step": 96940 }, { "epoch": 11.666666666666666, "grad_norm": 306.32806396484375, "learning_rate": 0.00019396917533582783, "loss": 7.2011, "step": 96950 }, { "epoch": 11.667870036101084, "grad_norm": 230.68382263183594, "learning_rate": 0.00019396787412185172, "loss": 7.3227, "step": 96960 }, { "epoch": 11.6690734055355, "grad_norm": 606.9068603515625, "learning_rate": 0.00019396657277188087, "loss": 7.1708, "step": 96970 }, { "epoch": 11.670276774969915, "grad_norm": 732.1396484375, "learning_rate": 0.00019396527128591717, "loss": 7.2709, "step": 96980 }, { "epoch": 11.671480144404333, "grad_norm": 315.5829162597656, "learning_rate": 0.00019396396966396253, "loss": 7.1769, "step": 96990 }, { "epoch": 11.672683513838749, "grad_norm": 602.1509399414062, "learning_rate": 0.00019396266790601882, "loss": 7.1419, "step": 97000 }, { "epoch": 11.673886883273164, "grad_norm": 233.7982940673828, "learning_rate": 0.00019396136601208797, "loss": 7.249, "step": 97010 }, { "epoch": 11.675090252707582, "grad_norm": 234.9595184326172, "learning_rate": 0.0001939600639821718, "loss": 7.2539, "step": 97020 }, { "epoch": 11.676293622141998, "grad_norm": 443.2006530761719, "learning_rate": 0.00019395876181627222, "loss": 7.2811, "step": 97030 }, { "epoch": 11.677496991576414, "grad_norm": 337.4075927734375, "learning_rate": 0.0001939574595143911, "loss": 7.1453, "step": 97040 }, { "epoch": 11.678700361010831, "grad_norm": 519.0325927734375, "learning_rate": 0.00019395615707653034, "loss": 7.1625, "step": 97050 }, { "epoch": 11.679903730445247, "grad_norm": 447.83380126953125, "learning_rate": 0.00019395485450269186, "loss": 7.1652, "step": 97060 }, { "epoch": 11.681107099879663, "grad_norm": 695.3973999023438, "learning_rate": 0.0001939535517928775, "loss": 7.2403, "step": 97070 }, { "epoch": 11.68231046931408, "grad_norm": 718.5765380859375, "learning_rate": 0.00019395224894708913, "loss": 7.226, "step": 97080 }, { "epoch": 11.683513838748496, "grad_norm": 324.5961608886719, "learning_rate": 0.00019395094596532864, "loss": 7.2867, "step": 97090 }, { "epoch": 11.684717208182912, "grad_norm": 608.708984375, "learning_rate": 0.00019394964284759797, "loss": 7.2458, "step": 97100 }, { "epoch": 11.685920577617328, "grad_norm": 3069.66796875, "learning_rate": 0.00019394833959389897, "loss": 7.2379, "step": 97110 }, { "epoch": 11.687123947051745, "grad_norm": 1491.3551025390625, "learning_rate": 0.00019394703620423355, "loss": 7.2741, "step": 97120 }, { "epoch": 11.688327316486161, "grad_norm": 933.3352661132812, "learning_rate": 0.00019394573267860353, "loss": 7.2918, "step": 97130 }, { "epoch": 11.689530685920577, "grad_norm": 2231.718505859375, "learning_rate": 0.00019394442901701084, "loss": 7.1884, "step": 97140 }, { "epoch": 11.690734055354994, "grad_norm": 1130.867919921875, "learning_rate": 0.00019394312521945739, "loss": 7.2346, "step": 97150 }, { "epoch": 11.69193742478941, "grad_norm": 1030.5931396484375, "learning_rate": 0.00019394182128594506, "loss": 7.2024, "step": 97160 }, { "epoch": 11.693140794223826, "grad_norm": 627.8846435546875, "learning_rate": 0.00019394051721647565, "loss": 7.2279, "step": 97170 }, { "epoch": 11.694344163658243, "grad_norm": 735.602783203125, "learning_rate": 0.0001939392130110512, "loss": 7.347, "step": 97180 }, { "epoch": 11.69554753309266, "grad_norm": 1029.7615966796875, "learning_rate": 0.00019393790866967348, "loss": 7.1682, "step": 97190 }, { "epoch": 11.696750902527075, "grad_norm": 956.34716796875, "learning_rate": 0.0001939366041923444, "loss": 7.2551, "step": 97200 }, { "epoch": 11.697954271961493, "grad_norm": 1820.06103515625, "learning_rate": 0.00019393529957906588, "loss": 7.1317, "step": 97210 }, { "epoch": 11.699157641395908, "grad_norm": 957.2282104492188, "learning_rate": 0.0001939339948298398, "loss": 7.224, "step": 97220 }, { "epoch": 11.700361010830324, "grad_norm": 2351.061279296875, "learning_rate": 0.00019393268994466803, "loss": 7.1606, "step": 97230 }, { "epoch": 11.701564380264742, "grad_norm": 1216.29638671875, "learning_rate": 0.00019393138492355245, "loss": 7.269, "step": 97240 }, { "epoch": 11.702767749699158, "grad_norm": 1314.3826904296875, "learning_rate": 0.00019393007976649499, "loss": 7.2536, "step": 97250 }, { "epoch": 11.703971119133573, "grad_norm": 1006.3629760742188, "learning_rate": 0.0001939287744734975, "loss": 7.3008, "step": 97260 }, { "epoch": 11.705174488567991, "grad_norm": 2198.923828125, "learning_rate": 0.00019392746904456186, "loss": 7.2474, "step": 97270 }, { "epoch": 11.706377858002407, "grad_norm": 1371.2738037109375, "learning_rate": 0.00019392616347969002, "loss": 7.3, "step": 97280 }, { "epoch": 11.707581227436823, "grad_norm": 1478.193359375, "learning_rate": 0.00019392485777888383, "loss": 7.2453, "step": 97290 }, { "epoch": 11.70878459687124, "grad_norm": 2130.078369140625, "learning_rate": 0.00019392355194214518, "loss": 7.2719, "step": 97300 }, { "epoch": 11.709987966305656, "grad_norm": 572.8602294921875, "learning_rate": 0.00019392224596947592, "loss": 7.219, "step": 97310 }, { "epoch": 11.711191335740072, "grad_norm": 1772.4779052734375, "learning_rate": 0.00019392093986087802, "loss": 7.2102, "step": 97320 }, { "epoch": 11.71239470517449, "grad_norm": 2705.216064453125, "learning_rate": 0.00019391963361635334, "loss": 7.3427, "step": 97330 }, { "epoch": 11.713598074608905, "grad_norm": 824.8469848632812, "learning_rate": 0.00019391832723590375, "loss": 7.2595, "step": 97340 }, { "epoch": 11.71480144404332, "grad_norm": 357.5631103515625, "learning_rate": 0.00019391702071953115, "loss": 7.1763, "step": 97350 }, { "epoch": 11.716004813477738, "grad_norm": 1005.9691772460938, "learning_rate": 0.00019391571406723743, "loss": 7.3828, "step": 97360 }, { "epoch": 11.717208182912154, "grad_norm": 1013.7130126953125, "learning_rate": 0.00019391440727902447, "loss": 7.326, "step": 97370 }, { "epoch": 11.71841155234657, "grad_norm": 757.0460205078125, "learning_rate": 0.0001939131003548942, "loss": 7.3788, "step": 97380 }, { "epoch": 11.719614921780988, "grad_norm": 2146.37744140625, "learning_rate": 0.00019391179329484847, "loss": 7.2004, "step": 97390 }, { "epoch": 11.720818291215403, "grad_norm": 559.243408203125, "learning_rate": 0.00019391048609888921, "loss": 7.264, "step": 97400 }, { "epoch": 11.722021660649819, "grad_norm": 1754.53759765625, "learning_rate": 0.00019390917876701828, "loss": 7.5141, "step": 97410 }, { "epoch": 11.723225030084237, "grad_norm": 1784.84375, "learning_rate": 0.0001939078712992376, "loss": 7.4901, "step": 97420 }, { "epoch": 11.724428399518652, "grad_norm": 826.5912475585938, "learning_rate": 0.00019390656369554902, "loss": 7.263, "step": 97430 }, { "epoch": 11.725631768953068, "grad_norm": 1254.98583984375, "learning_rate": 0.00019390525595595448, "loss": 7.327, "step": 97440 }, { "epoch": 11.726835138387486, "grad_norm": 1729.592041015625, "learning_rate": 0.00019390394808045582, "loss": 7.3413, "step": 97450 }, { "epoch": 11.728038507821902, "grad_norm": 763.759765625, "learning_rate": 0.000193902640069055, "loss": 7.3255, "step": 97460 }, { "epoch": 11.729241877256317, "grad_norm": 629.5181884765625, "learning_rate": 0.00019390133192175385, "loss": 7.3258, "step": 97470 }, { "epoch": 11.730445246690735, "grad_norm": 1008.3265380859375, "learning_rate": 0.00019390002363855427, "loss": 7.2641, "step": 97480 }, { "epoch": 11.73164861612515, "grad_norm": 1309.145751953125, "learning_rate": 0.00019389871521945822, "loss": 7.2143, "step": 97490 }, { "epoch": 11.732851985559567, "grad_norm": 554.0267944335938, "learning_rate": 0.0001938974066644675, "loss": 7.1891, "step": 97500 }, { "epoch": 11.734055354993982, "grad_norm": 882.9625244140625, "learning_rate": 0.00019389609797358407, "loss": 7.3307, "step": 97510 }, { "epoch": 11.7352587244284, "grad_norm": 433.39190673828125, "learning_rate": 0.00019389478914680984, "loss": 7.2294, "step": 97520 }, { "epoch": 11.736462093862816, "grad_norm": 580.0017700195312, "learning_rate": 0.00019389348018414664, "loss": 7.2665, "step": 97530 }, { "epoch": 11.737665463297231, "grad_norm": 698.08837890625, "learning_rate": 0.00019389217108559635, "loss": 7.169, "step": 97540 }, { "epoch": 11.738868832731649, "grad_norm": 1450.7735595703125, "learning_rate": 0.00019389086185116097, "loss": 7.2314, "step": 97550 }, { "epoch": 11.740072202166065, "grad_norm": 343.9694519042969, "learning_rate": 0.0001938895524808423, "loss": 7.1547, "step": 97560 }, { "epoch": 11.74127557160048, "grad_norm": 2592.584228515625, "learning_rate": 0.0001938882429746423, "loss": 7.1933, "step": 97570 }, { "epoch": 11.742478941034898, "grad_norm": 623.029541015625, "learning_rate": 0.0001938869333325628, "loss": 7.2451, "step": 97580 }, { "epoch": 11.743682310469314, "grad_norm": 246.89083862304688, "learning_rate": 0.00019388562355460574, "loss": 7.2712, "step": 97590 }, { "epoch": 11.74488567990373, "grad_norm": 270.40576171875, "learning_rate": 0.000193884313640773, "loss": 7.2872, "step": 97600 }, { "epoch": 11.746089049338147, "grad_norm": 292.736572265625, "learning_rate": 0.00019388300359106646, "loss": 7.2095, "step": 97610 }, { "epoch": 11.747292418772563, "grad_norm": 374.883544921875, "learning_rate": 0.00019388169340548805, "loss": 7.2559, "step": 97620 }, { "epoch": 11.748495788206979, "grad_norm": 348.9725341796875, "learning_rate": 0.00019388038308403968, "loss": 7.174, "step": 97630 }, { "epoch": 11.749699157641396, "grad_norm": 557.6121826171875, "learning_rate": 0.00019387907262672318, "loss": 7.2309, "step": 97640 }, { "epoch": 11.750902527075812, "grad_norm": 592.2030029296875, "learning_rate": 0.00019387776203354048, "loss": 7.2064, "step": 97650 }, { "epoch": 11.752105896510228, "grad_norm": 807.6978149414062, "learning_rate": 0.0001938764513044935, "loss": 7.2876, "step": 97660 }, { "epoch": 11.753309265944646, "grad_norm": 578.1055297851562, "learning_rate": 0.00019387514043958413, "loss": 7.13, "step": 97670 }, { "epoch": 11.754512635379061, "grad_norm": 199.2294464111328, "learning_rate": 0.00019387382943881421, "loss": 7.2102, "step": 97680 }, { "epoch": 11.755716004813477, "grad_norm": 209.4580078125, "learning_rate": 0.00019387251830218572, "loss": 7.3086, "step": 97690 }, { "epoch": 11.756919374247895, "grad_norm": 212.68966674804688, "learning_rate": 0.00019387120702970049, "loss": 7.2825, "step": 97700 }, { "epoch": 11.75812274368231, "grad_norm": 342.11566162109375, "learning_rate": 0.0001938698956213605, "loss": 7.3014, "step": 97710 }, { "epoch": 11.759326113116726, "grad_norm": 373.3837890625, "learning_rate": 0.00019386858407716755, "loss": 7.2352, "step": 97720 }, { "epoch": 11.760529482551144, "grad_norm": 219.68666076660156, "learning_rate": 0.0001938672723971236, "loss": 7.2783, "step": 97730 }, { "epoch": 11.76173285198556, "grad_norm": 330.65557861328125, "learning_rate": 0.0001938659605812305, "loss": 7.2933, "step": 97740 }, { "epoch": 11.762936221419976, "grad_norm": 2246.142333984375, "learning_rate": 0.0001938646486294902, "loss": 7.3261, "step": 97750 }, { "epoch": 11.764139590854393, "grad_norm": 323.64886474609375, "learning_rate": 0.0001938633365419046, "loss": 7.2726, "step": 97760 }, { "epoch": 11.765342960288809, "grad_norm": 308.22125244140625, "learning_rate": 0.00019386202431847559, "loss": 7.1803, "step": 97770 }, { "epoch": 11.766546329723225, "grad_norm": 252.5232696533203, "learning_rate": 0.00019386071195920501, "loss": 7.2194, "step": 97780 }, { "epoch": 11.76774969915764, "grad_norm": 1257.066162109375, "learning_rate": 0.00019385939946409483, "loss": 7.2427, "step": 97790 }, { "epoch": 11.768953068592058, "grad_norm": 349.7895202636719, "learning_rate": 0.00019385808683314691, "loss": 7.2151, "step": 97800 }, { "epoch": 11.770156438026474, "grad_norm": 283.97357177734375, "learning_rate": 0.00019385677406636318, "loss": 7.2866, "step": 97810 }, { "epoch": 11.77135980746089, "grad_norm": 242.33619689941406, "learning_rate": 0.00019385546116374553, "loss": 7.2093, "step": 97820 }, { "epoch": 11.772563176895307, "grad_norm": 719.7299194335938, "learning_rate": 0.00019385414812529586, "loss": 7.2735, "step": 97830 }, { "epoch": 11.773766546329723, "grad_norm": 822.5176391601562, "learning_rate": 0.00019385283495101604, "loss": 7.246, "step": 97840 }, { "epoch": 11.774969915764139, "grad_norm": 784.7896728515625, "learning_rate": 0.00019385152164090802, "loss": 7.2197, "step": 97850 }, { "epoch": 11.776173285198556, "grad_norm": 191.56918334960938, "learning_rate": 0.00019385020819497365, "loss": 7.2684, "step": 97860 }, { "epoch": 11.777376654632972, "grad_norm": 508.599365234375, "learning_rate": 0.0001938488946132149, "loss": 7.2816, "step": 97870 }, { "epoch": 11.778580024067388, "grad_norm": 226.9486083984375, "learning_rate": 0.0001938475808956336, "loss": 7.2735, "step": 97880 }, { "epoch": 11.779783393501805, "grad_norm": 347.66680908203125, "learning_rate": 0.00019384626704223168, "loss": 7.1585, "step": 97890 }, { "epoch": 11.780986762936221, "grad_norm": 882.27490234375, "learning_rate": 0.00019384495305301106, "loss": 7.2276, "step": 97900 }, { "epoch": 11.782190132370637, "grad_norm": 262.778564453125, "learning_rate": 0.0001938436389279736, "loss": 7.2892, "step": 97910 }, { "epoch": 11.783393501805055, "grad_norm": 308.4969787597656, "learning_rate": 0.00019384232466712124, "loss": 7.2309, "step": 97920 }, { "epoch": 11.78459687123947, "grad_norm": 954.2713623046875, "learning_rate": 0.00019384101027045583, "loss": 7.273, "step": 97930 }, { "epoch": 11.785800240673886, "grad_norm": 184.59352111816406, "learning_rate": 0.00019383969573797937, "loss": 7.2294, "step": 97940 }, { "epoch": 11.787003610108304, "grad_norm": 824.8224487304688, "learning_rate": 0.00019383838106969367, "loss": 7.2572, "step": 97950 }, { "epoch": 11.78820697954272, "grad_norm": 721.7014770507812, "learning_rate": 0.00019383706626560068, "loss": 7.3616, "step": 97960 }, { "epoch": 11.789410348977135, "grad_norm": 201.83432006835938, "learning_rate": 0.00019383575132570226, "loss": 7.354, "step": 97970 }, { "epoch": 11.790613718411553, "grad_norm": 629.5089111328125, "learning_rate": 0.00019383443625000039, "loss": 7.2131, "step": 97980 }, { "epoch": 11.791817087845969, "grad_norm": 758.6402587890625, "learning_rate": 0.00019383312103849688, "loss": 7.2023, "step": 97990 }, { "epoch": 11.793020457280385, "grad_norm": 193.6214599609375, "learning_rate": 0.0001938318056911937, "loss": 7.2367, "step": 98000 }, { "epoch": 11.794223826714802, "grad_norm": 479.528076171875, "learning_rate": 0.00019383049020809268, "loss": 7.3421, "step": 98010 }, { "epoch": 11.795427196149218, "grad_norm": 507.7732238769531, "learning_rate": 0.00019382917458919585, "loss": 7.1746, "step": 98020 }, { "epoch": 11.796630565583634, "grad_norm": 186.1522674560547, "learning_rate": 0.00019382785883450502, "loss": 7.278, "step": 98030 }, { "epoch": 11.797833935018051, "grad_norm": 185.91578674316406, "learning_rate": 0.00019382654294402207, "loss": 7.2305, "step": 98040 }, { "epoch": 11.799037304452467, "grad_norm": 372.1086730957031, "learning_rate": 0.00019382522691774895, "loss": 7.1682, "step": 98050 }, { "epoch": 11.800240673886883, "grad_norm": 651.1045532226562, "learning_rate": 0.00019382391075568759, "loss": 7.2985, "step": 98060 }, { "epoch": 11.8014440433213, "grad_norm": 262.8721008300781, "learning_rate": 0.00019382259445783988, "loss": 7.202, "step": 98070 }, { "epoch": 11.802647412755716, "grad_norm": 188.15924072265625, "learning_rate": 0.0001938212780242077, "loss": 7.1035, "step": 98080 }, { "epoch": 11.803850782190132, "grad_norm": 330.3931579589844, "learning_rate": 0.00019381996145479295, "loss": 7.3127, "step": 98090 }, { "epoch": 11.80505415162455, "grad_norm": 227.94102478027344, "learning_rate": 0.00019381864474959758, "loss": 7.3039, "step": 98100 }, { "epoch": 11.806257521058965, "grad_norm": 140.84469604492188, "learning_rate": 0.00019381732790862345, "loss": 7.3337, "step": 98110 }, { "epoch": 11.807460890493381, "grad_norm": 340.9453430175781, "learning_rate": 0.00019381601093187246, "loss": 7.1657, "step": 98120 }, { "epoch": 11.808664259927799, "grad_norm": 183.7386932373047, "learning_rate": 0.0001938146938193466, "loss": 7.3006, "step": 98130 }, { "epoch": 11.809867629362214, "grad_norm": 779.0843505859375, "learning_rate": 0.00019381337657104766, "loss": 7.2035, "step": 98140 }, { "epoch": 11.81107099879663, "grad_norm": 237.23541259765625, "learning_rate": 0.0001938120591869776, "loss": 7.2857, "step": 98150 }, { "epoch": 11.812274368231048, "grad_norm": 181.39215087890625, "learning_rate": 0.00019381074166713837, "loss": 7.3467, "step": 98160 }, { "epoch": 11.813477737665464, "grad_norm": 236.60719299316406, "learning_rate": 0.0001938094240115318, "loss": 7.3021, "step": 98170 }, { "epoch": 11.81468110709988, "grad_norm": 175.98077392578125, "learning_rate": 0.00019380810622015985, "loss": 7.3032, "step": 98180 }, { "epoch": 11.815884476534297, "grad_norm": 531.7383422851562, "learning_rate": 0.00019380678829302442, "loss": 7.1955, "step": 98190 }, { "epoch": 11.817087845968713, "grad_norm": 257.8716735839844, "learning_rate": 0.0001938054702301274, "loss": 7.2312, "step": 98200 }, { "epoch": 11.818291215403129, "grad_norm": 214.31576538085938, "learning_rate": 0.00019380415203147072, "loss": 7.2404, "step": 98210 }, { "epoch": 11.819494584837544, "grad_norm": 1384.1571044921875, "learning_rate": 0.00019380283369705624, "loss": 7.2254, "step": 98220 }, { "epoch": 11.820697954271962, "grad_norm": 354.4250793457031, "learning_rate": 0.0001938015152268859, "loss": 7.3571, "step": 98230 }, { "epoch": 11.821901323706378, "grad_norm": 496.17333984375, "learning_rate": 0.00019380019662096164, "loss": 7.2166, "step": 98240 }, { "epoch": 11.823104693140793, "grad_norm": 468.5306091308594, "learning_rate": 0.00019379887787928531, "loss": 7.166, "step": 98250 }, { "epoch": 11.824308062575211, "grad_norm": 839.9148559570312, "learning_rate": 0.00019379755900185885, "loss": 7.2568, "step": 98260 }, { "epoch": 11.825511432009627, "grad_norm": 393.55718994140625, "learning_rate": 0.00019379623998868416, "loss": 7.2669, "step": 98270 }, { "epoch": 11.826714801444043, "grad_norm": 608.7523193359375, "learning_rate": 0.00019379492083976317, "loss": 7.2602, "step": 98280 }, { "epoch": 11.82791817087846, "grad_norm": 1107.749755859375, "learning_rate": 0.00019379360155509777, "loss": 7.1704, "step": 98290 }, { "epoch": 11.829121540312876, "grad_norm": 515.56591796875, "learning_rate": 0.00019379228213468988, "loss": 7.2147, "step": 98300 }, { "epoch": 11.830324909747292, "grad_norm": 355.8526306152344, "learning_rate": 0.0001937909625785414, "loss": 7.2304, "step": 98310 }, { "epoch": 11.83152827918171, "grad_norm": 751.2402954101562, "learning_rate": 0.00019378964288665421, "loss": 7.1255, "step": 98320 }, { "epoch": 11.832731648616125, "grad_norm": 215.8268585205078, "learning_rate": 0.0001937883230590303, "loss": 7.2136, "step": 98330 }, { "epoch": 11.833935018050541, "grad_norm": 377.1159973144531, "learning_rate": 0.00019378700309567149, "loss": 7.2863, "step": 98340 }, { "epoch": 11.835138387484958, "grad_norm": 536.8948364257812, "learning_rate": 0.00019378568299657973, "loss": 7.3514, "step": 98350 }, { "epoch": 11.836341756919374, "grad_norm": 192.38772583007812, "learning_rate": 0.00019378436276175695, "loss": 7.2993, "step": 98360 }, { "epoch": 11.83754512635379, "grad_norm": 391.0587463378906, "learning_rate": 0.00019378304239120505, "loss": 7.3251, "step": 98370 }, { "epoch": 11.838748495788208, "grad_norm": 128.91867065429688, "learning_rate": 0.00019378172188492588, "loss": 7.3763, "step": 98380 }, { "epoch": 11.839951865222623, "grad_norm": 147.3256378173828, "learning_rate": 0.00019378040124292146, "loss": 7.3197, "step": 98390 }, { "epoch": 11.84115523465704, "grad_norm": 141.4853973388672, "learning_rate": 0.00019377908046519363, "loss": 7.2579, "step": 98400 }, { "epoch": 11.842358604091457, "grad_norm": 309.0194091796875, "learning_rate": 0.00019377775955174432, "loss": 7.2703, "step": 98410 }, { "epoch": 11.843561973525873, "grad_norm": 235.58509826660156, "learning_rate": 0.00019377643850257545, "loss": 7.2928, "step": 98420 }, { "epoch": 11.844765342960288, "grad_norm": 116.53980255126953, "learning_rate": 0.00019377511731768888, "loss": 7.2744, "step": 98430 }, { "epoch": 11.845968712394706, "grad_norm": 85.77718353271484, "learning_rate": 0.0001937737959970866, "loss": 7.364, "step": 98440 }, { "epoch": 11.847172081829122, "grad_norm": 350.1137390136719, "learning_rate": 0.0001937724745407705, "loss": 7.3654, "step": 98450 }, { "epoch": 11.848375451263538, "grad_norm": 151.2226104736328, "learning_rate": 0.00019377115294874243, "loss": 7.3418, "step": 98460 }, { "epoch": 11.849578820697955, "grad_norm": 262.7079162597656, "learning_rate": 0.00019376983122100437, "loss": 7.2046, "step": 98470 }, { "epoch": 11.85078219013237, "grad_norm": 143.4476318359375, "learning_rate": 0.00019376850935755823, "loss": 7.3117, "step": 98480 }, { "epoch": 11.851985559566787, "grad_norm": 169.92245483398438, "learning_rate": 0.00019376718735840588, "loss": 7.3094, "step": 98490 }, { "epoch": 11.853188929001202, "grad_norm": 117.85218811035156, "learning_rate": 0.00019376586522354926, "loss": 7.3461, "step": 98500 }, { "epoch": 11.85439229843562, "grad_norm": 352.0733337402344, "learning_rate": 0.00019376454295299032, "loss": 7.1903, "step": 98510 }, { "epoch": 11.855595667870036, "grad_norm": 227.34487915039062, "learning_rate": 0.0001937632205467309, "loss": 7.2136, "step": 98520 }, { "epoch": 11.856799037304452, "grad_norm": 134.1216278076172, "learning_rate": 0.00019376189800477298, "loss": 7.4062, "step": 98530 }, { "epoch": 11.85800240673887, "grad_norm": 395.313232421875, "learning_rate": 0.00019376057532711843, "loss": 7.3388, "step": 98540 }, { "epoch": 11.859205776173285, "grad_norm": 257.0782775878906, "learning_rate": 0.00019375925251376917, "loss": 7.3002, "step": 98550 }, { "epoch": 11.8604091456077, "grad_norm": 271.50762939453125, "learning_rate": 0.0001937579295647271, "loss": 7.2972, "step": 98560 }, { "epoch": 11.861612515042118, "grad_norm": 158.8642120361328, "learning_rate": 0.00019375660647999422, "loss": 7.1253, "step": 98570 }, { "epoch": 11.862815884476534, "grad_norm": 130.6636505126953, "learning_rate": 0.00019375528325957234, "loss": 7.252, "step": 98580 }, { "epoch": 11.86401925391095, "grad_norm": 195.52801513671875, "learning_rate": 0.00019375395990346342, "loss": 7.3208, "step": 98590 }, { "epoch": 11.865222623345367, "grad_norm": 101.48219299316406, "learning_rate": 0.00019375263641166937, "loss": 7.2842, "step": 98600 }, { "epoch": 11.866425992779783, "grad_norm": 241.259765625, "learning_rate": 0.0001937513127841921, "loss": 7.3199, "step": 98610 }, { "epoch": 11.867629362214199, "grad_norm": 81.8074722290039, "learning_rate": 0.00019374998902103359, "loss": 7.326, "step": 98620 }, { "epoch": 11.868832731648617, "grad_norm": 143.58274841308594, "learning_rate": 0.00019374866512219564, "loss": 7.2454, "step": 98630 }, { "epoch": 11.870036101083032, "grad_norm": 199.72824096679688, "learning_rate": 0.00019374734108768026, "loss": 7.429, "step": 98640 }, { "epoch": 11.871239470517448, "grad_norm": 99.01521301269531, "learning_rate": 0.00019374601691748934, "loss": 7.4567, "step": 98650 }, { "epoch": 11.872442839951866, "grad_norm": 119.11502838134766, "learning_rate": 0.00019374469261162474, "loss": 7.5071, "step": 98660 }, { "epoch": 11.873646209386282, "grad_norm": 141.5318145751953, "learning_rate": 0.00019374336817008846, "loss": 7.2169, "step": 98670 }, { "epoch": 11.874849578820697, "grad_norm": 214.09669494628906, "learning_rate": 0.00019374204359288237, "loss": 7.3049, "step": 98680 }, { "epoch": 11.876052948255115, "grad_norm": 143.29026794433594, "learning_rate": 0.0001937407188800084, "loss": 7.2945, "step": 98690 }, { "epoch": 11.87725631768953, "grad_norm": 86.36083221435547, "learning_rate": 0.0001937393940314685, "loss": 7.2817, "step": 98700 }, { "epoch": 11.878459687123947, "grad_norm": 148.66464233398438, "learning_rate": 0.0001937380690472645, "loss": 7.3459, "step": 98710 }, { "epoch": 11.879663056558364, "grad_norm": 178.42825317382812, "learning_rate": 0.0001937367439273984, "loss": 7.3013, "step": 98720 }, { "epoch": 11.88086642599278, "grad_norm": 55.61753845214844, "learning_rate": 0.00019373541867187207, "loss": 7.3156, "step": 98730 }, { "epoch": 11.882069795427196, "grad_norm": 48.33180618286133, "learning_rate": 0.00019373409328068747, "loss": 7.28, "step": 98740 }, { "epoch": 11.883273164861613, "grad_norm": 83.62631225585938, "learning_rate": 0.00019373276775384648, "loss": 7.2481, "step": 98750 }, { "epoch": 11.884476534296029, "grad_norm": 98.91661834716797, "learning_rate": 0.00019373144209135107, "loss": 7.4269, "step": 98760 }, { "epoch": 11.885679903730445, "grad_norm": 201.1520538330078, "learning_rate": 0.00019373011629320304, "loss": 7.2154, "step": 98770 }, { "epoch": 11.886883273164862, "grad_norm": 70.19197082519531, "learning_rate": 0.00019372879035940444, "loss": 7.336, "step": 98780 }, { "epoch": 11.888086642599278, "grad_norm": 109.77552795410156, "learning_rate": 0.00019372746428995716, "loss": 7.348, "step": 98790 }, { "epoch": 11.889290012033694, "grad_norm": 116.01335906982422, "learning_rate": 0.0001937261380848631, "loss": 7.2029, "step": 98800 }, { "epoch": 11.890493381468112, "grad_norm": 1028.3544921875, "learning_rate": 0.00019372481174412414, "loss": 7.2875, "step": 98810 }, { "epoch": 11.891696750902527, "grad_norm": 103.023681640625, "learning_rate": 0.00019372348526774225, "loss": 7.3296, "step": 98820 }, { "epoch": 11.892900120336943, "grad_norm": 121.654052734375, "learning_rate": 0.00019372215865571933, "loss": 7.3729, "step": 98830 }, { "epoch": 11.89410348977136, "grad_norm": 69.78581237792969, "learning_rate": 0.00019372083190805732, "loss": 7.2538, "step": 98840 }, { "epoch": 11.895306859205776, "grad_norm": 74.58787536621094, "learning_rate": 0.00019371950502475812, "loss": 7.4778, "step": 98850 }, { "epoch": 11.896510228640192, "grad_norm": 164.1248016357422, "learning_rate": 0.00019371817800582365, "loss": 7.2292, "step": 98860 }, { "epoch": 11.89771359807461, "grad_norm": 118.36473846435547, "learning_rate": 0.00019371685085125585, "loss": 7.2678, "step": 98870 }, { "epoch": 11.898916967509026, "grad_norm": 113.45089721679688, "learning_rate": 0.00019371552356105664, "loss": 7.2654, "step": 98880 }, { "epoch": 11.900120336943441, "grad_norm": 152.16529846191406, "learning_rate": 0.00019371419613522793, "loss": 7.3207, "step": 98890 }, { "epoch": 11.901323706377857, "grad_norm": 128.1909942626953, "learning_rate": 0.00019371286857377165, "loss": 7.1912, "step": 98900 }, { "epoch": 11.902527075812275, "grad_norm": 126.4127197265625, "learning_rate": 0.00019371154087668968, "loss": 7.2401, "step": 98910 }, { "epoch": 11.90373044524669, "grad_norm": 92.52333068847656, "learning_rate": 0.00019371021304398397, "loss": 7.2159, "step": 98920 }, { "epoch": 11.904933814681106, "grad_norm": 97.5126953125, "learning_rate": 0.00019370888507565646, "loss": 7.3579, "step": 98930 }, { "epoch": 11.906137184115524, "grad_norm": 205.54931640625, "learning_rate": 0.00019370755697170906, "loss": 7.2454, "step": 98940 }, { "epoch": 11.90734055354994, "grad_norm": 161.52279663085938, "learning_rate": 0.0001937062287321437, "loss": 7.2965, "step": 98950 }, { "epoch": 11.908543922984355, "grad_norm": 141.312744140625, "learning_rate": 0.00019370490035696227, "loss": 7.2946, "step": 98960 }, { "epoch": 11.909747292418773, "grad_norm": 136.45433044433594, "learning_rate": 0.00019370357184616673, "loss": 7.2291, "step": 98970 }, { "epoch": 11.910950661853189, "grad_norm": 68.9397201538086, "learning_rate": 0.000193702243199759, "loss": 7.2818, "step": 98980 }, { "epoch": 11.912154031287605, "grad_norm": 148.0553436279297, "learning_rate": 0.00019370091441774095, "loss": 7.2469, "step": 98990 }, { "epoch": 11.913357400722022, "grad_norm": 115.45630645751953, "learning_rate": 0.00019369958550011457, "loss": 7.2465, "step": 99000 }, { "epoch": 11.914560770156438, "grad_norm": 190.38906860351562, "learning_rate": 0.00019369825644688176, "loss": 7.2955, "step": 99010 }, { "epoch": 11.915764139590854, "grad_norm": 52.35891342163086, "learning_rate": 0.0001936969272580444, "loss": 7.3681, "step": 99020 }, { "epoch": 11.916967509025271, "grad_norm": 60.705562591552734, "learning_rate": 0.0001936955979336045, "loss": 7.4014, "step": 99030 }, { "epoch": 11.918170878459687, "grad_norm": 150.88270568847656, "learning_rate": 0.00019369426847356392, "loss": 7.2546, "step": 99040 }, { "epoch": 11.919374247894103, "grad_norm": 72.51187896728516, "learning_rate": 0.0001936929388779246, "loss": 7.2991, "step": 99050 }, { "epoch": 11.92057761732852, "grad_norm": 157.69631958007812, "learning_rate": 0.00019369160914668845, "loss": 7.2478, "step": 99060 }, { "epoch": 11.921780986762936, "grad_norm": 106.79886627197266, "learning_rate": 0.00019369027927985743, "loss": 7.2584, "step": 99070 }, { "epoch": 11.922984356197352, "grad_norm": 94.18775939941406, "learning_rate": 0.00019368894927743345, "loss": 7.2719, "step": 99080 }, { "epoch": 11.92418772563177, "grad_norm": 180.44630432128906, "learning_rate": 0.00019368761913941842, "loss": 7.3039, "step": 99090 }, { "epoch": 11.925391095066185, "grad_norm": 116.6115951538086, "learning_rate": 0.00019368628886581429, "loss": 7.2578, "step": 99100 }, { "epoch": 11.926594464500601, "grad_norm": 189.6122283935547, "learning_rate": 0.00019368495845662294, "loss": 7.2478, "step": 99110 }, { "epoch": 11.927797833935019, "grad_norm": 122.07473754882812, "learning_rate": 0.00019368362791184632, "loss": 7.2349, "step": 99120 }, { "epoch": 11.929001203369435, "grad_norm": 53.1552734375, "learning_rate": 0.00019368229723148642, "loss": 7.2924, "step": 99130 }, { "epoch": 11.93020457280385, "grad_norm": 325.55657958984375, "learning_rate": 0.00019368096641554506, "loss": 7.2807, "step": 99140 }, { "epoch": 11.931407942238268, "grad_norm": 172.39874267578125, "learning_rate": 0.0001936796354640242, "loss": 7.2303, "step": 99150 }, { "epoch": 11.932611311672684, "grad_norm": 174.95828247070312, "learning_rate": 0.0001936783043769258, "loss": 7.3568, "step": 99160 }, { "epoch": 11.9338146811071, "grad_norm": 138.89820861816406, "learning_rate": 0.00019367697315425178, "loss": 7.2486, "step": 99170 }, { "epoch": 11.935018050541515, "grad_norm": 112.64092254638672, "learning_rate": 0.00019367564179600401, "loss": 7.3451, "step": 99180 }, { "epoch": 11.936221419975933, "grad_norm": 163.35719299316406, "learning_rate": 0.0001936743103021845, "loss": 7.4022, "step": 99190 }, { "epoch": 11.937424789410349, "grad_norm": 79.35734558105469, "learning_rate": 0.0001936729786727951, "loss": 7.228, "step": 99200 }, { "epoch": 11.938628158844764, "grad_norm": 171.12986755371094, "learning_rate": 0.0001936716469078378, "loss": 7.2654, "step": 99210 }, { "epoch": 11.939831528279182, "grad_norm": 117.14237213134766, "learning_rate": 0.00019367031500731448, "loss": 7.204, "step": 99220 }, { "epoch": 11.941034897713598, "grad_norm": 126.36323547363281, "learning_rate": 0.0001936689829712271, "loss": 7.3549, "step": 99230 }, { "epoch": 11.942238267148014, "grad_norm": 232.3984832763672, "learning_rate": 0.00019366765079957757, "loss": 7.327, "step": 99240 }, { "epoch": 11.943441636582431, "grad_norm": 150.30252075195312, "learning_rate": 0.00019366631849236784, "loss": 7.3605, "step": 99250 }, { "epoch": 11.944645006016847, "grad_norm": 161.02719116210938, "learning_rate": 0.00019366498604959982, "loss": 7.283, "step": 99260 }, { "epoch": 11.945848375451263, "grad_norm": 154.76730346679688, "learning_rate": 0.00019366365347127542, "loss": 7.2007, "step": 99270 }, { "epoch": 11.94705174488568, "grad_norm": 68.1380615234375, "learning_rate": 0.0001936623207573966, "loss": 7.1996, "step": 99280 }, { "epoch": 11.948255114320096, "grad_norm": 158.46258544921875, "learning_rate": 0.00019366098790796524, "loss": 7.2811, "step": 99290 }, { "epoch": 11.949458483754512, "grad_norm": 198.72340393066406, "learning_rate": 0.00019365965492298338, "loss": 7.2066, "step": 99300 }, { "epoch": 11.95066185318893, "grad_norm": 136.68829345703125, "learning_rate": 0.00019365832180245283, "loss": 7.2305, "step": 99310 }, { "epoch": 11.951865222623345, "grad_norm": 174.20272827148438, "learning_rate": 0.00019365698854637557, "loss": 7.1636, "step": 99320 }, { "epoch": 11.953068592057761, "grad_norm": 220.20640563964844, "learning_rate": 0.00019365565515475352, "loss": 7.2696, "step": 99330 }, { "epoch": 11.954271961492179, "grad_norm": 179.85169982910156, "learning_rate": 0.00019365432162758863, "loss": 7.2715, "step": 99340 }, { "epoch": 11.955475330926594, "grad_norm": 143.8240203857422, "learning_rate": 0.0001936529879648828, "loss": 7.2089, "step": 99350 }, { "epoch": 11.95667870036101, "grad_norm": 132.84027099609375, "learning_rate": 0.000193651654166638, "loss": 7.3723, "step": 99360 }, { "epoch": 11.957882069795428, "grad_norm": 75.92465209960938, "learning_rate": 0.0001936503202328561, "loss": 7.2857, "step": 99370 }, { "epoch": 11.959085439229844, "grad_norm": 108.4561538696289, "learning_rate": 0.00019364898616353905, "loss": 7.3754, "step": 99380 }, { "epoch": 11.96028880866426, "grad_norm": 236.8972625732422, "learning_rate": 0.00019364765195868886, "loss": 7.3046, "step": 99390 }, { "epoch": 11.961492178098677, "grad_norm": 74.1569595336914, "learning_rate": 0.00019364631761830734, "loss": 7.2137, "step": 99400 }, { "epoch": 11.962695547533093, "grad_norm": 178.64630126953125, "learning_rate": 0.0001936449831423965, "loss": 7.2537, "step": 99410 }, { "epoch": 11.963898916967509, "grad_norm": 95.76781463623047, "learning_rate": 0.00019364364853095825, "loss": 7.222, "step": 99420 }, { "epoch": 11.965102286401926, "grad_norm": 76.22486114501953, "learning_rate": 0.00019364231378399453, "loss": 7.2069, "step": 99430 }, { "epoch": 11.966305655836342, "grad_norm": 254.67642211914062, "learning_rate": 0.00019364097890150724, "loss": 7.2831, "step": 99440 }, { "epoch": 11.967509025270758, "grad_norm": 164.2748565673828, "learning_rate": 0.00019363964388349837, "loss": 7.275, "step": 99450 }, { "epoch": 11.968712394705175, "grad_norm": 57.14277648925781, "learning_rate": 0.00019363830872996978, "loss": 7.1939, "step": 99460 }, { "epoch": 11.969915764139591, "grad_norm": 68.4383316040039, "learning_rate": 0.00019363697344092345, "loss": 7.1941, "step": 99470 }, { "epoch": 11.971119133574007, "grad_norm": 69.10216522216797, "learning_rate": 0.0001936356380163613, "loss": 7.1493, "step": 99480 }, { "epoch": 11.972322503008424, "grad_norm": 105.06266021728516, "learning_rate": 0.00019363430245628526, "loss": 7.277, "step": 99490 }, { "epoch": 11.97352587244284, "grad_norm": 176.16969299316406, "learning_rate": 0.0001936329667606973, "loss": 7.3413, "step": 99500 }, { "epoch": 11.974729241877256, "grad_norm": 179.69448852539062, "learning_rate": 0.00019363163092959927, "loss": 7.2555, "step": 99510 }, { "epoch": 11.975932611311674, "grad_norm": 101.14362335205078, "learning_rate": 0.00019363029496299319, "loss": 7.179, "step": 99520 }, { "epoch": 11.97713598074609, "grad_norm": 46.72248458862305, "learning_rate": 0.00019362895886088094, "loss": 7.3154, "step": 99530 }, { "epoch": 11.978339350180505, "grad_norm": 100.50609588623047, "learning_rate": 0.00019362762262326445, "loss": 7.2748, "step": 99540 }, { "epoch": 11.979542719614923, "grad_norm": 209.98532104492188, "learning_rate": 0.0001936262862501457, "loss": 7.3112, "step": 99550 }, { "epoch": 11.980746089049338, "grad_norm": 172.79397583007812, "learning_rate": 0.0001936249497415266, "loss": 7.3209, "step": 99560 }, { "epoch": 11.981949458483754, "grad_norm": 171.19419860839844, "learning_rate": 0.00019362361309740908, "loss": 7.277, "step": 99570 }, { "epoch": 11.98315282791817, "grad_norm": 179.21743774414062, "learning_rate": 0.00019362227631779507, "loss": 7.3091, "step": 99580 }, { "epoch": 11.984356197352588, "grad_norm": 114.75621032714844, "learning_rate": 0.00019362093940268653, "loss": 7.1811, "step": 99590 }, { "epoch": 11.985559566787003, "grad_norm": 131.30796813964844, "learning_rate": 0.00019361960235208536, "loss": 7.196, "step": 99600 }, { "epoch": 11.98676293622142, "grad_norm": 104.64381408691406, "learning_rate": 0.0001936182651659935, "loss": 7.2974, "step": 99610 }, { "epoch": 11.987966305655837, "grad_norm": 113.8354263305664, "learning_rate": 0.00019361692784441293, "loss": 7.2838, "step": 99620 }, { "epoch": 11.989169675090253, "grad_norm": 88.38764953613281, "learning_rate": 0.00019361559038734552, "loss": 7.2407, "step": 99630 }, { "epoch": 11.990373044524668, "grad_norm": 144.893310546875, "learning_rate": 0.00019361425279479326, "loss": 7.1158, "step": 99640 }, { "epoch": 11.991576413959086, "grad_norm": 143.81150817871094, "learning_rate": 0.00019361291506675806, "loss": 7.2788, "step": 99650 }, { "epoch": 11.992779783393502, "grad_norm": 200.5011444091797, "learning_rate": 0.00019361157720324185, "loss": 7.2902, "step": 99660 }, { "epoch": 11.993983152827917, "grad_norm": 173.5289306640625, "learning_rate": 0.00019361023920424656, "loss": 7.1614, "step": 99670 }, { "epoch": 11.995186522262335, "grad_norm": 98.81207275390625, "learning_rate": 0.0001936089010697742, "loss": 7.1996, "step": 99680 }, { "epoch": 11.99638989169675, "grad_norm": 152.58604431152344, "learning_rate": 0.0001936075627998266, "loss": 7.2278, "step": 99690 }, { "epoch": 11.997593261131167, "grad_norm": 150.1853485107422, "learning_rate": 0.00019360622439440576, "loss": 7.2378, "step": 99700 }, { "epoch": 11.998796630565584, "grad_norm": 271.13079833984375, "learning_rate": 0.0001936048858535136, "loss": 7.2016, "step": 99710 }, { "epoch": 12.0, "grad_norm": 173.873779296875, "learning_rate": 0.00019360354717715207, "loss": 7.3304, "step": 99720 }, { "epoch": 12.0, "eval_loss": 7.209784507751465, "eval_runtime": 120.2166, "eval_samples_per_second": 61.447, "eval_steps_per_second": 7.686, "step": 99720 }, { "epoch": 12.001203369434416, "grad_norm": 187.25718688964844, "learning_rate": 0.00019360220836532306, "loss": 7.1957, "step": 99730 }, { "epoch": 12.002406738868833, "grad_norm": 103.36690521240234, "learning_rate": 0.00019360086941802858, "loss": 7.1717, "step": 99740 }, { "epoch": 12.00361010830325, "grad_norm": 268.524658203125, "learning_rate": 0.00019359953033527052, "loss": 7.3818, "step": 99750 }, { "epoch": 12.004813477737665, "grad_norm": 867.9302368164062, "learning_rate": 0.00019359819111705085, "loss": 7.1713, "step": 99760 }, { "epoch": 12.006016847172083, "grad_norm": 649.4356689453125, "learning_rate": 0.00019359685176337146, "loss": 7.2287, "step": 99770 }, { "epoch": 12.007220216606498, "grad_norm": 979.7839965820312, "learning_rate": 0.00019359551227423434, "loss": 7.256, "step": 99780 }, { "epoch": 12.008423586040914, "grad_norm": 512.5393676757812, "learning_rate": 0.00019359417264964139, "loss": 7.1805, "step": 99790 }, { "epoch": 12.009626955475332, "grad_norm": 587.2205810546875, "learning_rate": 0.00019359283288959459, "loss": 7.3021, "step": 99800 }, { "epoch": 12.010830324909747, "grad_norm": 1852.1031494140625, "learning_rate": 0.0001935914929940958, "loss": 7.1615, "step": 99810 }, { "epoch": 12.012033694344163, "grad_norm": 312.02886962890625, "learning_rate": 0.00019359015296314705, "loss": 7.1362, "step": 99820 }, { "epoch": 12.01323706377858, "grad_norm": 599.1365966796875, "learning_rate": 0.00019358881279675022, "loss": 7.2562, "step": 99830 }, { "epoch": 12.014440433212997, "grad_norm": 576.3143310546875, "learning_rate": 0.00019358747249490732, "loss": 7.2728, "step": 99840 }, { "epoch": 12.015643802647412, "grad_norm": 1972.549560546875, "learning_rate": 0.00019358613205762022, "loss": 7.334, "step": 99850 }, { "epoch": 12.01684717208183, "grad_norm": 861.9588012695312, "learning_rate": 0.00019358479148489085, "loss": 7.2409, "step": 99860 }, { "epoch": 12.018050541516246, "grad_norm": 539.2259521484375, "learning_rate": 0.00019358345077672123, "loss": 7.1917, "step": 99870 }, { "epoch": 12.019253910950662, "grad_norm": 384.87164306640625, "learning_rate": 0.0001935821099331132, "loss": 7.2393, "step": 99880 }, { "epoch": 12.020457280385079, "grad_norm": 732.364013671875, "learning_rate": 0.00019358076895406877, "loss": 7.297, "step": 99890 }, { "epoch": 12.021660649819495, "grad_norm": 1079.362060546875, "learning_rate": 0.00019357942783958988, "loss": 7.2927, "step": 99900 }, { "epoch": 12.02286401925391, "grad_norm": 1249.7060546875, "learning_rate": 0.00019357808658967843, "loss": 7.2572, "step": 99910 }, { "epoch": 12.024067388688326, "grad_norm": 925.38818359375, "learning_rate": 0.00019357674520433644, "loss": 7.3016, "step": 99920 }, { "epoch": 12.025270758122744, "grad_norm": 1079.5240478515625, "learning_rate": 0.00019357540368356574, "loss": 7.2744, "step": 99930 }, { "epoch": 12.02647412755716, "grad_norm": 1068.26806640625, "learning_rate": 0.00019357406202736836, "loss": 7.1954, "step": 99940 }, { "epoch": 12.027677496991576, "grad_norm": 489.1487731933594, "learning_rate": 0.0001935727202357462, "loss": 7.2742, "step": 99950 }, { "epoch": 12.028880866425993, "grad_norm": 2681.85498046875, "learning_rate": 0.0001935713783087012, "loss": 7.336, "step": 99960 }, { "epoch": 12.030084235860409, "grad_norm": 936.4141235351562, "learning_rate": 0.00019357003624623532, "loss": 7.215, "step": 99970 }, { "epoch": 12.031287605294825, "grad_norm": 440.8946228027344, "learning_rate": 0.0001935686940483505, "loss": 7.2278, "step": 99980 }, { "epoch": 12.032490974729242, "grad_norm": 240.0230255126953, "learning_rate": 0.00019356735171504864, "loss": 7.1908, "step": 99990 }, { "epoch": 12.033694344163658, "grad_norm": 1004.1493530273438, "learning_rate": 0.00019356600924633178, "loss": 7.2427, "step": 100000 }, { "epoch": 12.034897713598074, "grad_norm": 221.97073364257812, "learning_rate": 0.00019356466664220177, "loss": 7.2748, "step": 100010 }, { "epoch": 12.036101083032491, "grad_norm": 1387.8858642578125, "learning_rate": 0.00019356332390266058, "loss": 7.2564, "step": 100020 }, { "epoch": 12.037304452466907, "grad_norm": 332.2581787109375, "learning_rate": 0.00019356198102771018, "loss": 7.2014, "step": 100030 }, { "epoch": 12.038507821901323, "grad_norm": 72.22911071777344, "learning_rate": 0.0001935606380173525, "loss": 7.2356, "step": 100040 }, { "epoch": 12.03971119133574, "grad_norm": 109.0367202758789, "learning_rate": 0.00019355929487158946, "loss": 7.2605, "step": 100050 }, { "epoch": 12.040914560770156, "grad_norm": 100.6845703125, "learning_rate": 0.000193557951590423, "loss": 7.2765, "step": 100060 }, { "epoch": 12.042117930204572, "grad_norm": 136.1052703857422, "learning_rate": 0.00019355660817385514, "loss": 7.196, "step": 100070 }, { "epoch": 12.04332129963899, "grad_norm": 159.7888641357422, "learning_rate": 0.00019355526462188776, "loss": 7.2828, "step": 100080 }, { "epoch": 12.044524669073406, "grad_norm": 85.20870208740234, "learning_rate": 0.00019355392093452277, "loss": 7.118, "step": 100090 }, { "epoch": 12.045728038507821, "grad_norm": 94.80852508544922, "learning_rate": 0.0001935525771117622, "loss": 7.2206, "step": 100100 }, { "epoch": 12.046931407942239, "grad_norm": 150.93528747558594, "learning_rate": 0.00019355123315360792, "loss": 7.1637, "step": 100110 }, { "epoch": 12.048134777376655, "grad_norm": 135.34988403320312, "learning_rate": 0.00019354988906006195, "loss": 7.2427, "step": 100120 }, { "epoch": 12.04933814681107, "grad_norm": 148.75509643554688, "learning_rate": 0.00019354854483112616, "loss": 7.2306, "step": 100130 }, { "epoch": 12.050541516245488, "grad_norm": 80.59191131591797, "learning_rate": 0.00019354720046680255, "loss": 7.2597, "step": 100140 }, { "epoch": 12.051744885679904, "grad_norm": 373.69921875, "learning_rate": 0.000193545855967093, "loss": 7.2687, "step": 100150 }, { "epoch": 12.05294825511432, "grad_norm": 255.21372985839844, "learning_rate": 0.00019354451133199958, "loss": 7.2234, "step": 100160 }, { "epoch": 12.054151624548737, "grad_norm": 236.16403198242188, "learning_rate": 0.0001935431665615241, "loss": 7.1656, "step": 100170 }, { "epoch": 12.055354993983153, "grad_norm": 212.81130981445312, "learning_rate": 0.0001935418216556686, "loss": 7.1653, "step": 100180 }, { "epoch": 12.056558363417569, "grad_norm": 103.7719497680664, "learning_rate": 0.00019354047661443495, "loss": 7.2399, "step": 100190 }, { "epoch": 12.057761732851986, "grad_norm": 243.9726104736328, "learning_rate": 0.00019353913143782517, "loss": 7.3054, "step": 100200 }, { "epoch": 12.058965102286402, "grad_norm": 116.71979522705078, "learning_rate": 0.00019353778612584113, "loss": 7.2362, "step": 100210 }, { "epoch": 12.060168471720818, "grad_norm": 432.28131103515625, "learning_rate": 0.00019353644067848484, "loss": 7.2501, "step": 100220 }, { "epoch": 12.061371841155236, "grad_norm": 178.4130859375, "learning_rate": 0.00019353509509575822, "loss": 7.1561, "step": 100230 }, { "epoch": 12.062575210589651, "grad_norm": 277.750732421875, "learning_rate": 0.00019353374937766327, "loss": 7.1189, "step": 100240 }, { "epoch": 12.063778580024067, "grad_norm": 190.09104919433594, "learning_rate": 0.00019353240352420182, "loss": 7.2278, "step": 100250 }, { "epoch": 12.064981949458483, "grad_norm": 275.77935791015625, "learning_rate": 0.00019353105753537595, "loss": 7.1461, "step": 100260 }, { "epoch": 12.0661853188929, "grad_norm": 272.4902648925781, "learning_rate": 0.0001935297114111875, "loss": 7.1968, "step": 100270 }, { "epoch": 12.067388688327316, "grad_norm": 303.45550537109375, "learning_rate": 0.0001935283651516385, "loss": 7.1336, "step": 100280 }, { "epoch": 12.068592057761732, "grad_norm": 273.66064453125, "learning_rate": 0.00019352701875673085, "loss": 7.2506, "step": 100290 }, { "epoch": 12.06979542719615, "grad_norm": 228.72760009765625, "learning_rate": 0.00019352567222646655, "loss": 7.2459, "step": 100300 }, { "epoch": 12.070998796630565, "grad_norm": 195.4521026611328, "learning_rate": 0.00019352432556084747, "loss": 7.1851, "step": 100310 }, { "epoch": 12.072202166064981, "grad_norm": 322.5349426269531, "learning_rate": 0.00019352297875987562, "loss": 7.2004, "step": 100320 }, { "epoch": 12.073405535499399, "grad_norm": 181.39096069335938, "learning_rate": 0.0001935216318235529, "loss": 7.2689, "step": 100330 }, { "epoch": 12.074608904933815, "grad_norm": 185.29345703125, "learning_rate": 0.0001935202847518813, "loss": 7.2695, "step": 100340 }, { "epoch": 12.07581227436823, "grad_norm": 216.049560546875, "learning_rate": 0.00019351893754486276, "loss": 7.2004, "step": 100350 }, { "epoch": 12.077015643802648, "grad_norm": 200.36883544921875, "learning_rate": 0.00019351759020249924, "loss": 7.1085, "step": 100360 }, { "epoch": 12.078219013237064, "grad_norm": 336.3588562011719, "learning_rate": 0.0001935162427247927, "loss": 7.2686, "step": 100370 }, { "epoch": 12.07942238267148, "grad_norm": 160.20558166503906, "learning_rate": 0.000193514895111745, "loss": 7.1704, "step": 100380 }, { "epoch": 12.080625752105897, "grad_norm": 234.55931091308594, "learning_rate": 0.00019351354736335823, "loss": 7.1785, "step": 100390 }, { "epoch": 12.081829121540313, "grad_norm": 168.11973571777344, "learning_rate": 0.00019351219947963424, "loss": 7.2997, "step": 100400 }, { "epoch": 12.083032490974729, "grad_norm": 309.0727233886719, "learning_rate": 0.00019351085146057502, "loss": 7.2163, "step": 100410 }, { "epoch": 12.084235860409146, "grad_norm": 311.2032165527344, "learning_rate": 0.0001935095033061825, "loss": 7.2075, "step": 100420 }, { "epoch": 12.085439229843562, "grad_norm": 341.4127197265625, "learning_rate": 0.00019350815501645863, "loss": 7.2174, "step": 100430 }, { "epoch": 12.086642599277978, "grad_norm": 175.08729553222656, "learning_rate": 0.00019350680659140537, "loss": 7.1565, "step": 100440 }, { "epoch": 12.087845968712395, "grad_norm": 221.8728485107422, "learning_rate": 0.00019350545803102471, "loss": 7.1756, "step": 100450 }, { "epoch": 12.089049338146811, "grad_norm": 121.81482696533203, "learning_rate": 0.00019350410933531852, "loss": 7.2155, "step": 100460 }, { "epoch": 12.090252707581227, "grad_norm": 347.6531066894531, "learning_rate": 0.00019350276050428885, "loss": 7.2554, "step": 100470 }, { "epoch": 12.091456077015645, "grad_norm": 175.2806396484375, "learning_rate": 0.00019350141153793757, "loss": 7.2311, "step": 100480 }, { "epoch": 12.09265944645006, "grad_norm": 457.0815124511719, "learning_rate": 0.00019350006243626665, "loss": 7.1906, "step": 100490 }, { "epoch": 12.093862815884476, "grad_norm": 398.2767639160156, "learning_rate": 0.00019349871319927807, "loss": 7.1875, "step": 100500 }, { "epoch": 12.095066185318894, "grad_norm": 233.47964477539062, "learning_rate": 0.00019349736382697378, "loss": 7.2554, "step": 100510 }, { "epoch": 12.09626955475331, "grad_norm": 416.8374938964844, "learning_rate": 0.00019349601431935574, "loss": 7.1829, "step": 100520 }, { "epoch": 12.097472924187725, "grad_norm": 489.1484680175781, "learning_rate": 0.00019349466467642582, "loss": 7.2226, "step": 100530 }, { "epoch": 12.098676293622143, "grad_norm": 625.876220703125, "learning_rate": 0.00019349331489818607, "loss": 7.2729, "step": 100540 }, { "epoch": 12.099879663056559, "grad_norm": 301.9091491699219, "learning_rate": 0.00019349196498463843, "loss": 7.2179, "step": 100550 }, { "epoch": 12.101083032490974, "grad_norm": 621.7656860351562, "learning_rate": 0.00019349061493578482, "loss": 7.2261, "step": 100560 }, { "epoch": 12.102286401925392, "grad_norm": 604.3579711914062, "learning_rate": 0.0001934892647516272, "loss": 7.2722, "step": 100570 }, { "epoch": 12.103489771359808, "grad_norm": 460.4038391113281, "learning_rate": 0.00019348791443216757, "loss": 7.1147, "step": 100580 }, { "epoch": 12.104693140794224, "grad_norm": 158.8830108642578, "learning_rate": 0.00019348656397740778, "loss": 7.1657, "step": 100590 }, { "epoch": 12.10589651022864, "grad_norm": 313.7085876464844, "learning_rate": 0.00019348521338734989, "loss": 7.2749, "step": 100600 }, { "epoch": 12.107099879663057, "grad_norm": 282.0453186035156, "learning_rate": 0.00019348386266199584, "loss": 7.2465, "step": 100610 }, { "epoch": 12.108303249097473, "grad_norm": 323.4155578613281, "learning_rate": 0.0001934825118013475, "loss": 7.252, "step": 100620 }, { "epoch": 12.109506618531888, "grad_norm": 138.65968322753906, "learning_rate": 0.00019348116080540693, "loss": 7.222, "step": 100630 }, { "epoch": 12.110709987966306, "grad_norm": 186.51385498046875, "learning_rate": 0.00019347980967417602, "loss": 7.2035, "step": 100640 }, { "epoch": 12.111913357400722, "grad_norm": 289.2260437011719, "learning_rate": 0.00019347845840765677, "loss": 7.3423, "step": 100650 }, { "epoch": 12.113116726835138, "grad_norm": 274.5934753417969, "learning_rate": 0.00019347710700585107, "loss": 7.2769, "step": 100660 }, { "epoch": 12.114320096269555, "grad_norm": 499.8416442871094, "learning_rate": 0.00019347575546876095, "loss": 7.1917, "step": 100670 }, { "epoch": 12.115523465703971, "grad_norm": 274.7139892578125, "learning_rate": 0.00019347440379638832, "loss": 7.1797, "step": 100680 }, { "epoch": 12.116726835138387, "grad_norm": 140.1739501953125, "learning_rate": 0.00019347305198873514, "loss": 7.2113, "step": 100690 }, { "epoch": 12.117930204572804, "grad_norm": 273.6724548339844, "learning_rate": 0.0001934717000458034, "loss": 7.3191, "step": 100700 }, { "epoch": 12.11913357400722, "grad_norm": 349.076171875, "learning_rate": 0.00019347034796759503, "loss": 7.1546, "step": 100710 }, { "epoch": 12.120336943441636, "grad_norm": 627.3411865234375, "learning_rate": 0.000193468995754112, "loss": 7.2496, "step": 100720 }, { "epoch": 12.121540312876053, "grad_norm": 437.5904235839844, "learning_rate": 0.0001934676434053562, "loss": 7.1129, "step": 100730 }, { "epoch": 12.12274368231047, "grad_norm": 351.3270263671875, "learning_rate": 0.00019346629092132972, "loss": 7.2853, "step": 100740 }, { "epoch": 12.123947051744885, "grad_norm": 334.68109130859375, "learning_rate": 0.0001934649383020344, "loss": 7.2624, "step": 100750 }, { "epoch": 12.125150421179303, "grad_norm": 448.0818786621094, "learning_rate": 0.00019346358554747225, "loss": 7.1592, "step": 100760 }, { "epoch": 12.126353790613718, "grad_norm": 298.28668212890625, "learning_rate": 0.0001934622326576452, "loss": 7.2455, "step": 100770 }, { "epoch": 12.127557160048134, "grad_norm": 426.3834228515625, "learning_rate": 0.00019346087963255523, "loss": 7.266, "step": 100780 }, { "epoch": 12.128760529482552, "grad_norm": 598.8390502929688, "learning_rate": 0.00019345952647220428, "loss": 7.2482, "step": 100790 }, { "epoch": 12.129963898916968, "grad_norm": 531.2196044921875, "learning_rate": 0.00019345817317659435, "loss": 7.2136, "step": 100800 }, { "epoch": 12.131167268351383, "grad_norm": 497.2467041015625, "learning_rate": 0.00019345681974572735, "loss": 7.2716, "step": 100810 }, { "epoch": 12.132370637785801, "grad_norm": 453.32135009765625, "learning_rate": 0.00019345546617960526, "loss": 7.3065, "step": 100820 }, { "epoch": 12.133574007220217, "grad_norm": 289.6233825683594, "learning_rate": 0.00019345411247823004, "loss": 7.3029, "step": 100830 }, { "epoch": 12.134777376654633, "grad_norm": 155.31907653808594, "learning_rate": 0.00019345275864160366, "loss": 7.2986, "step": 100840 }, { "epoch": 12.13598074608905, "grad_norm": 772.6607055664062, "learning_rate": 0.00019345140466972804, "loss": 7.2191, "step": 100850 }, { "epoch": 12.137184115523466, "grad_norm": 429.3636474609375, "learning_rate": 0.0001934500505626052, "loss": 7.2323, "step": 100860 }, { "epoch": 12.138387484957882, "grad_norm": 345.03570556640625, "learning_rate": 0.00019344869632023702, "loss": 7.2898, "step": 100870 }, { "epoch": 12.1395908543923, "grad_norm": 236.43984985351562, "learning_rate": 0.00019344734194262552, "loss": 7.2594, "step": 100880 }, { "epoch": 12.140794223826715, "grad_norm": 84.82162475585938, "learning_rate": 0.00019344598742977267, "loss": 7.1923, "step": 100890 }, { "epoch": 12.14199759326113, "grad_norm": 192.3656768798828, "learning_rate": 0.00019344463278168038, "loss": 7.2424, "step": 100900 }, { "epoch": 12.143200962695548, "grad_norm": 189.2291259765625, "learning_rate": 0.00019344327799835064, "loss": 7.2318, "step": 100910 }, { "epoch": 12.144404332129964, "grad_norm": 371.7607116699219, "learning_rate": 0.00019344192307978544, "loss": 7.2003, "step": 100920 }, { "epoch": 12.14560770156438, "grad_norm": 241.46026611328125, "learning_rate": 0.00019344056802598667, "loss": 7.1725, "step": 100930 }, { "epoch": 12.146811070998796, "grad_norm": 252.74256896972656, "learning_rate": 0.00019343921283695631, "loss": 7.2456, "step": 100940 }, { "epoch": 12.148014440433213, "grad_norm": 334.9472351074219, "learning_rate": 0.00019343785751269637, "loss": 7.1778, "step": 100950 }, { "epoch": 12.14921780986763, "grad_norm": 311.9128723144531, "learning_rate": 0.00019343650205320878, "loss": 7.1967, "step": 100960 }, { "epoch": 12.150421179302045, "grad_norm": 291.40325927734375, "learning_rate": 0.0001934351464584955, "loss": 7.2297, "step": 100970 }, { "epoch": 12.151624548736462, "grad_norm": 414.8866271972656, "learning_rate": 0.0001934337907285585, "loss": 7.2476, "step": 100980 }, { "epoch": 12.152827918170878, "grad_norm": 274.6656494140625, "learning_rate": 0.0001934324348633997, "loss": 7.205, "step": 100990 }, { "epoch": 12.154031287605294, "grad_norm": 248.60829162597656, "learning_rate": 0.00019343107886302113, "loss": 7.1317, "step": 101000 }, { "epoch": 12.155234657039712, "grad_norm": 367.2349853515625, "learning_rate": 0.00019342972272742474, "loss": 7.1926, "step": 101010 }, { "epoch": 12.156438026474127, "grad_norm": 451.37103271484375, "learning_rate": 0.00019342836645661247, "loss": 7.1502, "step": 101020 }, { "epoch": 12.157641395908543, "grad_norm": 340.48223876953125, "learning_rate": 0.00019342701005058626, "loss": 7.1377, "step": 101030 }, { "epoch": 12.15884476534296, "grad_norm": 185.7396240234375, "learning_rate": 0.00019342565350934812, "loss": 7.2292, "step": 101040 }, { "epoch": 12.160048134777377, "grad_norm": 409.4861755371094, "learning_rate": 0.00019342429683289999, "loss": 7.0853, "step": 101050 }, { "epoch": 12.161251504211792, "grad_norm": 263.13800048828125, "learning_rate": 0.00019342294002124382, "loss": 7.2936, "step": 101060 }, { "epoch": 12.16245487364621, "grad_norm": 302.5745544433594, "learning_rate": 0.0001934215830743816, "loss": 7.2643, "step": 101070 }, { "epoch": 12.163658243080626, "grad_norm": 307.70635986328125, "learning_rate": 0.00019342022599231528, "loss": 7.1645, "step": 101080 }, { "epoch": 12.164861612515042, "grad_norm": 89.23799133300781, "learning_rate": 0.00019341886877504685, "loss": 7.2784, "step": 101090 }, { "epoch": 12.166064981949459, "grad_norm": 333.6704406738281, "learning_rate": 0.00019341751142257823, "loss": 7.2502, "step": 101100 }, { "epoch": 12.167268351383875, "grad_norm": 210.12353515625, "learning_rate": 0.0001934161539349114, "loss": 7.2462, "step": 101110 }, { "epoch": 12.16847172081829, "grad_norm": 221.42861938476562, "learning_rate": 0.00019341479631204835, "loss": 7.2906, "step": 101120 }, { "epoch": 12.169675090252708, "grad_norm": 214.8041229248047, "learning_rate": 0.000193413438553991, "loss": 7.2193, "step": 101130 }, { "epoch": 12.170878459687124, "grad_norm": 155.70309448242188, "learning_rate": 0.0001934120806607414, "loss": 7.1688, "step": 101140 }, { "epoch": 12.17208182912154, "grad_norm": 303.963623046875, "learning_rate": 0.00019341072263230142, "loss": 7.1714, "step": 101150 }, { "epoch": 12.173285198555957, "grad_norm": 256.7359619140625, "learning_rate": 0.00019340936446867306, "loss": 7.1868, "step": 101160 }, { "epoch": 12.174488567990373, "grad_norm": 306.3113708496094, "learning_rate": 0.00019340800616985827, "loss": 7.2854, "step": 101170 }, { "epoch": 12.175691937424789, "grad_norm": 240.64564514160156, "learning_rate": 0.00019340664773585906, "loss": 7.3645, "step": 101180 }, { "epoch": 12.176895306859207, "grad_norm": 235.72113037109375, "learning_rate": 0.00019340528916667737, "loss": 7.2431, "step": 101190 }, { "epoch": 12.178098676293622, "grad_norm": 256.3206787109375, "learning_rate": 0.00019340393046231517, "loss": 7.3116, "step": 101200 }, { "epoch": 12.179302045728038, "grad_norm": 387.47802734375, "learning_rate": 0.00019340257162277439, "loss": 7.2167, "step": 101210 }, { "epoch": 12.180505415162456, "grad_norm": 675.2747192382812, "learning_rate": 0.00019340121264805707, "loss": 7.162, "step": 101220 }, { "epoch": 12.181708784596871, "grad_norm": 583.4373168945312, "learning_rate": 0.0001933998535381651, "loss": 7.1362, "step": 101230 }, { "epoch": 12.182912154031287, "grad_norm": 471.8766174316406, "learning_rate": 0.0001933984942931005, "loss": 7.1989, "step": 101240 }, { "epoch": 12.184115523465705, "grad_norm": 375.63250732421875, "learning_rate": 0.0001933971349128652, "loss": 7.189, "step": 101250 }, { "epoch": 12.18531889290012, "grad_norm": 297.8193664550781, "learning_rate": 0.0001933957753974612, "loss": 7.2216, "step": 101260 }, { "epoch": 12.186522262334536, "grad_norm": 402.51312255859375, "learning_rate": 0.00019339441574689044, "loss": 7.2672, "step": 101270 }, { "epoch": 12.187725631768952, "grad_norm": 308.5905456542969, "learning_rate": 0.00019339305596115494, "loss": 7.2918, "step": 101280 }, { "epoch": 12.18892900120337, "grad_norm": 390.3688049316406, "learning_rate": 0.0001933916960402566, "loss": 7.1727, "step": 101290 }, { "epoch": 12.190132370637786, "grad_norm": 352.4194030761719, "learning_rate": 0.00019339033598419746, "loss": 7.2127, "step": 101300 }, { "epoch": 12.191335740072201, "grad_norm": 315.3782043457031, "learning_rate": 0.0001933889757929794, "loss": 7.2809, "step": 101310 }, { "epoch": 12.192539109506619, "grad_norm": 755.7091064453125, "learning_rate": 0.0001933876154666044, "loss": 7.3368, "step": 101320 }, { "epoch": 12.193742478941035, "grad_norm": 426.1267395019531, "learning_rate": 0.00019338625500507455, "loss": 7.2162, "step": 101330 }, { "epoch": 12.19494584837545, "grad_norm": 403.9010009765625, "learning_rate": 0.0001933848944083917, "loss": 7.2365, "step": 101340 }, { "epoch": 12.196149217809868, "grad_norm": 528.127197265625, "learning_rate": 0.00019338353367655785, "loss": 7.2191, "step": 101350 }, { "epoch": 12.197352587244284, "grad_norm": 413.1864013671875, "learning_rate": 0.00019338217280957494, "loss": 7.1863, "step": 101360 }, { "epoch": 12.1985559566787, "grad_norm": 758.5889892578125, "learning_rate": 0.00019338081180744502, "loss": 7.2286, "step": 101370 }, { "epoch": 12.199759326113117, "grad_norm": 689.4074096679688, "learning_rate": 0.00019337945067016997, "loss": 7.3487, "step": 101380 }, { "epoch": 12.200962695547533, "grad_norm": 675.9899291992188, "learning_rate": 0.00019337808939775183, "loss": 7.3461, "step": 101390 }, { "epoch": 12.202166064981949, "grad_norm": 1049.57373046875, "learning_rate": 0.00019337672799019253, "loss": 7.2437, "step": 101400 }, { "epoch": 12.203369434416366, "grad_norm": 855.0003662109375, "learning_rate": 0.00019337536644749404, "loss": 7.3677, "step": 101410 }, { "epoch": 12.204572803850782, "grad_norm": 713.3817749023438, "learning_rate": 0.00019337400476965834, "loss": 7.3115, "step": 101420 }, { "epoch": 12.205776173285198, "grad_norm": 505.4274597167969, "learning_rate": 0.00019337264295668743, "loss": 7.3864, "step": 101430 }, { "epoch": 12.206979542719615, "grad_norm": 361.9706726074219, "learning_rate": 0.00019337128100858323, "loss": 7.2781, "step": 101440 }, { "epoch": 12.208182912154031, "grad_norm": 553.4201049804688, "learning_rate": 0.00019336991892534772, "loss": 7.2834, "step": 101450 }, { "epoch": 12.209386281588447, "grad_norm": 306.5463562011719, "learning_rate": 0.00019336855670698292, "loss": 7.1579, "step": 101460 }, { "epoch": 12.210589651022865, "grad_norm": 526.7699584960938, "learning_rate": 0.00019336719435349075, "loss": 7.3386, "step": 101470 }, { "epoch": 12.21179302045728, "grad_norm": 292.1778869628906, "learning_rate": 0.0001933658318648732, "loss": 7.2308, "step": 101480 }, { "epoch": 12.212996389891696, "grad_norm": 220.00936889648438, "learning_rate": 0.00019336446924113224, "loss": 7.2684, "step": 101490 }, { "epoch": 12.214199759326114, "grad_norm": 407.5308837890625, "learning_rate": 0.00019336310648226986, "loss": 7.2218, "step": 101500 }, { "epoch": 12.21540312876053, "grad_norm": 273.404052734375, "learning_rate": 0.00019336174358828797, "loss": 7.256, "step": 101510 }, { "epoch": 12.216606498194945, "grad_norm": 380.9325866699219, "learning_rate": 0.00019336038055918863, "loss": 7.1894, "step": 101520 }, { "epoch": 12.217809867629363, "grad_norm": 295.1746826171875, "learning_rate": 0.00019335901739497374, "loss": 7.1889, "step": 101530 }, { "epoch": 12.219013237063779, "grad_norm": 235.63473510742188, "learning_rate": 0.0001933576540956453, "loss": 7.2149, "step": 101540 }, { "epoch": 12.220216606498195, "grad_norm": 471.75274658203125, "learning_rate": 0.00019335629066120533, "loss": 7.0959, "step": 101550 }, { "epoch": 12.221419975932612, "grad_norm": 499.1694030761719, "learning_rate": 0.00019335492709165572, "loss": 7.2216, "step": 101560 }, { "epoch": 12.222623345367028, "grad_norm": 415.7610168457031, "learning_rate": 0.0001933535633869985, "loss": 7.2801, "step": 101570 }, { "epoch": 12.223826714801444, "grad_norm": 603.1621704101562, "learning_rate": 0.0001933521995472356, "loss": 7.2372, "step": 101580 }, { "epoch": 12.225030084235861, "grad_norm": 375.14453125, "learning_rate": 0.00019335083557236904, "loss": 7.3356, "step": 101590 }, { "epoch": 12.226233453670277, "grad_norm": 581.4219970703125, "learning_rate": 0.00019334947146240077, "loss": 7.2836, "step": 101600 }, { "epoch": 12.227436823104693, "grad_norm": 443.1958312988281, "learning_rate": 0.00019334810721733273, "loss": 7.2507, "step": 101610 }, { "epoch": 12.22864019253911, "grad_norm": 709.861572265625, "learning_rate": 0.00019334674283716697, "loss": 7.1872, "step": 101620 }, { "epoch": 12.229843561973526, "grad_norm": 319.0143127441406, "learning_rate": 0.00019334537832190545, "loss": 7.1973, "step": 101630 }, { "epoch": 12.231046931407942, "grad_norm": 544.3501586914062, "learning_rate": 0.00019334401367155008, "loss": 7.2598, "step": 101640 }, { "epoch": 12.232250300842358, "grad_norm": 923.862548828125, "learning_rate": 0.00019334264888610288, "loss": 7.2714, "step": 101650 }, { "epoch": 12.233453670276775, "grad_norm": 584.833251953125, "learning_rate": 0.00019334128396556586, "loss": 7.2273, "step": 101660 }, { "epoch": 12.234657039711191, "grad_norm": 1729.77978515625, "learning_rate": 0.00019333991890994092, "loss": 7.3817, "step": 101670 }, { "epoch": 12.235860409145607, "grad_norm": 1678.5914306640625, "learning_rate": 0.0001933385537192301, "loss": 7.1393, "step": 101680 }, { "epoch": 12.237063778580024, "grad_norm": 598.308349609375, "learning_rate": 0.00019333718839343531, "loss": 7.2668, "step": 101690 }, { "epoch": 12.23826714801444, "grad_norm": 1732.816650390625, "learning_rate": 0.0001933358229325586, "loss": 7.2058, "step": 101700 }, { "epoch": 12.239470517448856, "grad_norm": 798.1570434570312, "learning_rate": 0.00019333445733660186, "loss": 7.3182, "step": 101710 }, { "epoch": 12.240673886883274, "grad_norm": 1631.985107421875, "learning_rate": 0.00019333309160556717, "loss": 7.3232, "step": 101720 }, { "epoch": 12.24187725631769, "grad_norm": 1210.0718994140625, "learning_rate": 0.00019333172573945644, "loss": 7.2411, "step": 101730 }, { "epoch": 12.243080625752105, "grad_norm": 449.6768493652344, "learning_rate": 0.00019333035973827164, "loss": 7.2817, "step": 101740 }, { "epoch": 12.244283995186523, "grad_norm": 648.34130859375, "learning_rate": 0.0001933289936020148, "loss": 7.1525, "step": 101750 }, { "epoch": 12.245487364620939, "grad_norm": 1073.5108642578125, "learning_rate": 0.00019332762733068783, "loss": 7.1361, "step": 101760 }, { "epoch": 12.246690734055354, "grad_norm": 2393.293701171875, "learning_rate": 0.0001933262609242928, "loss": 7.1835, "step": 101770 }, { "epoch": 12.247894103489772, "grad_norm": 778.3978881835938, "learning_rate": 0.00019332489438283156, "loss": 7.2649, "step": 101780 }, { "epoch": 12.249097472924188, "grad_norm": 674.7791137695312, "learning_rate": 0.00019332352770630616, "loss": 7.1656, "step": 101790 }, { "epoch": 12.250300842358604, "grad_norm": 1137.3094482421875, "learning_rate": 0.0001933221608947186, "loss": 7.248, "step": 101800 }, { "epoch": 12.251504211793021, "grad_norm": 695.7918090820312, "learning_rate": 0.00019332079394807083, "loss": 7.2165, "step": 101810 }, { "epoch": 12.252707581227437, "grad_norm": 1119.053955078125, "learning_rate": 0.00019331942686636483, "loss": 7.2189, "step": 101820 }, { "epoch": 12.253910950661853, "grad_norm": 1177.5511474609375, "learning_rate": 0.00019331805964960258, "loss": 7.195, "step": 101830 }, { "epoch": 12.25511432009627, "grad_norm": 691.4961547851562, "learning_rate": 0.0001933166922977861, "loss": 7.1818, "step": 101840 }, { "epoch": 12.256317689530686, "grad_norm": 742.1613159179688, "learning_rate": 0.00019331532481091726, "loss": 7.2151, "step": 101850 }, { "epoch": 12.257521058965102, "grad_norm": 1167.755859375, "learning_rate": 0.00019331395718899814, "loss": 7.2807, "step": 101860 }, { "epoch": 12.25872442839952, "grad_norm": 894.4489135742188, "learning_rate": 0.00019331258943203067, "loss": 7.18, "step": 101870 }, { "epoch": 12.259927797833935, "grad_norm": 713.478271484375, "learning_rate": 0.00019331122154001685, "loss": 7.3303, "step": 101880 }, { "epoch": 12.261131167268351, "grad_norm": 774.05419921875, "learning_rate": 0.00019330985351295865, "loss": 7.2863, "step": 101890 }, { "epoch": 12.262334536702769, "grad_norm": 800.697265625, "learning_rate": 0.00019330848535085806, "loss": 7.3505, "step": 101900 }, { "epoch": 12.263537906137184, "grad_norm": 600.5453491210938, "learning_rate": 0.00019330711705371707, "loss": 7.3349, "step": 101910 }, { "epoch": 12.2647412755716, "grad_norm": 1391.663818359375, "learning_rate": 0.00019330574862153762, "loss": 7.3021, "step": 101920 }, { "epoch": 12.265944645006018, "grad_norm": 1171.2723388671875, "learning_rate": 0.00019330438005432172, "loss": 7.2055, "step": 101930 }, { "epoch": 12.267148014440433, "grad_norm": 942.2772827148438, "learning_rate": 0.00019330301135207136, "loss": 7.189, "step": 101940 }, { "epoch": 12.26835138387485, "grad_norm": 712.9710083007812, "learning_rate": 0.0001933016425147885, "loss": 7.3585, "step": 101950 }, { "epoch": 12.269554753309265, "grad_norm": 568.570556640625, "learning_rate": 0.0001933002735424751, "loss": 7.2893, "step": 101960 }, { "epoch": 12.270758122743683, "grad_norm": 841.6047973632812, "learning_rate": 0.0001932989044351332, "loss": 7.2358, "step": 101970 }, { "epoch": 12.271961492178098, "grad_norm": 508.7265319824219, "learning_rate": 0.00019329753519276477, "loss": 7.2913, "step": 101980 }, { "epoch": 12.273164861612514, "grad_norm": 491.8988342285156, "learning_rate": 0.00019329616581537174, "loss": 7.2009, "step": 101990 }, { "epoch": 12.274368231046932, "grad_norm": 633.57861328125, "learning_rate": 0.00019329479630295616, "loss": 7.2609, "step": 102000 }, { "epoch": 12.275571600481348, "grad_norm": 722.3191528320312, "learning_rate": 0.00019329342665551992, "loss": 7.2123, "step": 102010 }, { "epoch": 12.276774969915763, "grad_norm": 1048.0599365234375, "learning_rate": 0.0001932920568730651, "loss": 7.2431, "step": 102020 }, { "epoch": 12.277978339350181, "grad_norm": 657.6622924804688, "learning_rate": 0.00019329068695559363, "loss": 7.2258, "step": 102030 }, { "epoch": 12.279181708784597, "grad_norm": 400.900634765625, "learning_rate": 0.0001932893169031075, "loss": 7.1728, "step": 102040 }, { "epoch": 12.280385078219012, "grad_norm": 454.8036193847656, "learning_rate": 0.00019328794671560868, "loss": 7.2718, "step": 102050 }, { "epoch": 12.28158844765343, "grad_norm": 300.41357421875, "learning_rate": 0.0001932865763930992, "loss": 7.2075, "step": 102060 }, { "epoch": 12.282791817087846, "grad_norm": 546.9795532226562, "learning_rate": 0.000193285205935581, "loss": 7.3232, "step": 102070 }, { "epoch": 12.283995186522262, "grad_norm": 420.5072326660156, "learning_rate": 0.00019328383534305608, "loss": 7.2453, "step": 102080 }, { "epoch": 12.28519855595668, "grad_norm": 276.8692932128906, "learning_rate": 0.0001932824646155264, "loss": 7.1813, "step": 102090 }, { "epoch": 12.286401925391095, "grad_norm": 634.6013793945312, "learning_rate": 0.00019328109375299396, "loss": 7.245, "step": 102100 }, { "epoch": 12.28760529482551, "grad_norm": 369.4891052246094, "learning_rate": 0.00019327972275546076, "loss": 7.1738, "step": 102110 }, { "epoch": 12.288808664259928, "grad_norm": 1377.2781982421875, "learning_rate": 0.0001932783516229288, "loss": 7.1727, "step": 102120 }, { "epoch": 12.290012033694344, "grad_norm": 358.9952087402344, "learning_rate": 0.00019327698035539998, "loss": 7.2774, "step": 102130 }, { "epoch": 12.29121540312876, "grad_norm": 245.9854278564453, "learning_rate": 0.0001932756089528764, "loss": 7.2062, "step": 102140 }, { "epoch": 12.292418772563177, "grad_norm": 559.222900390625, "learning_rate": 0.00019327423741535995, "loss": 7.2896, "step": 102150 }, { "epoch": 12.293622141997593, "grad_norm": 480.6968688964844, "learning_rate": 0.00019327286574285263, "loss": 7.1975, "step": 102160 }, { "epoch": 12.294825511432009, "grad_norm": 815.69580078125, "learning_rate": 0.0001932714939353565, "loss": 7.3247, "step": 102170 }, { "epoch": 12.296028880866427, "grad_norm": 522.746337890625, "learning_rate": 0.00019327012199287346, "loss": 7.2509, "step": 102180 }, { "epoch": 12.297232250300842, "grad_norm": 238.93533325195312, "learning_rate": 0.0001932687499154055, "loss": 7.2088, "step": 102190 }, { "epoch": 12.298435619735258, "grad_norm": 459.73004150390625, "learning_rate": 0.00019326737770295466, "loss": 7.1601, "step": 102200 }, { "epoch": 12.299638989169676, "grad_norm": 394.89263916015625, "learning_rate": 0.0001932660053555229, "loss": 7.2681, "step": 102210 }, { "epoch": 12.300842358604092, "grad_norm": 660.4417724609375, "learning_rate": 0.00019326463287311222, "loss": 7.2479, "step": 102220 }, { "epoch": 12.302045728038507, "grad_norm": 1098.3001708984375, "learning_rate": 0.00019326326025572455, "loss": 7.1862, "step": 102230 }, { "epoch": 12.303249097472925, "grad_norm": 410.9057312011719, "learning_rate": 0.00019326188750336192, "loss": 7.3611, "step": 102240 }, { "epoch": 12.30445246690734, "grad_norm": 919.73193359375, "learning_rate": 0.0001932605146160263, "loss": 7.2923, "step": 102250 }, { "epoch": 12.305655836341757, "grad_norm": 616.4190673828125, "learning_rate": 0.00019325914159371975, "loss": 7.2314, "step": 102260 }, { "epoch": 12.306859205776174, "grad_norm": 473.75244140625, "learning_rate": 0.00019325776843644418, "loss": 7.2387, "step": 102270 }, { "epoch": 12.30806257521059, "grad_norm": 595.4457397460938, "learning_rate": 0.0001932563951442016, "loss": 7.3135, "step": 102280 }, { "epoch": 12.309265944645006, "grad_norm": 315.10076904296875, "learning_rate": 0.00019325502171699393, "loss": 7.2885, "step": 102290 }, { "epoch": 12.310469314079423, "grad_norm": 889.8563232421875, "learning_rate": 0.00019325364815482327, "loss": 7.2235, "step": 102300 }, { "epoch": 12.311672683513839, "grad_norm": 542.8992309570312, "learning_rate": 0.00019325227445769153, "loss": 7.1828, "step": 102310 }, { "epoch": 12.312876052948255, "grad_norm": 529.4866943359375, "learning_rate": 0.00019325090062560077, "loss": 7.2545, "step": 102320 }, { "epoch": 12.314079422382672, "grad_norm": 463.8544616699219, "learning_rate": 0.00019324952665855292, "loss": 7.2028, "step": 102330 }, { "epoch": 12.315282791817088, "grad_norm": 737.2639770507812, "learning_rate": 0.00019324815255654994, "loss": 7.2929, "step": 102340 }, { "epoch": 12.316486161251504, "grad_norm": 253.27159118652344, "learning_rate": 0.00019324677831959391, "loss": 7.2794, "step": 102350 }, { "epoch": 12.31768953068592, "grad_norm": 407.6042175292969, "learning_rate": 0.00019324540394768675, "loss": 7.338, "step": 102360 }, { "epoch": 12.318892900120337, "grad_norm": 563.1797485351562, "learning_rate": 0.00019324402944083047, "loss": 7.2382, "step": 102370 }, { "epoch": 12.320096269554753, "grad_norm": 697.7218017578125, "learning_rate": 0.00019324265479902706, "loss": 7.225, "step": 102380 }, { "epoch": 12.321299638989169, "grad_norm": 351.2344055175781, "learning_rate": 0.0001932412800222785, "loss": 7.2622, "step": 102390 }, { "epoch": 12.322503008423586, "grad_norm": 358.47821044921875, "learning_rate": 0.0001932399051105868, "loss": 7.3145, "step": 102400 }, { "epoch": 12.323706377858002, "grad_norm": 526.3197631835938, "learning_rate": 0.00019323853006395392, "loss": 7.3057, "step": 102410 }, { "epoch": 12.324909747292418, "grad_norm": 659.393310546875, "learning_rate": 0.00019323715488238187, "loss": 7.2431, "step": 102420 }, { "epoch": 12.326113116726836, "grad_norm": 350.7294616699219, "learning_rate": 0.00019323577956587267, "loss": 7.2883, "step": 102430 }, { "epoch": 12.327316486161251, "grad_norm": 313.63714599609375, "learning_rate": 0.00019323440411442823, "loss": 7.236, "step": 102440 }, { "epoch": 12.328519855595667, "grad_norm": 428.5990295410156, "learning_rate": 0.00019323302852805062, "loss": 7.1618, "step": 102450 }, { "epoch": 12.329723225030085, "grad_norm": 334.8892517089844, "learning_rate": 0.0001932316528067418, "loss": 7.1929, "step": 102460 }, { "epoch": 12.3309265944645, "grad_norm": 439.4962158203125, "learning_rate": 0.00019323027695050375, "loss": 7.161, "step": 102470 }, { "epoch": 12.332129963898916, "grad_norm": 531.44140625, "learning_rate": 0.00019322890095933848, "loss": 7.1904, "step": 102480 }, { "epoch": 12.333333333333334, "grad_norm": 258.1150207519531, "learning_rate": 0.00019322752483324796, "loss": 7.2999, "step": 102490 }, { "epoch": 12.33453670276775, "grad_norm": 538.8759155273438, "learning_rate": 0.0001932261485722342, "loss": 7.2119, "step": 102500 }, { "epoch": 12.335740072202166, "grad_norm": 449.08526611328125, "learning_rate": 0.00019322477217629917, "loss": 7.1645, "step": 102510 }, { "epoch": 12.336943441636583, "grad_norm": 315.3735656738281, "learning_rate": 0.0001932233956454449, "loss": 7.3677, "step": 102520 }, { "epoch": 12.338146811070999, "grad_norm": 227.24887084960938, "learning_rate": 0.00019322201897967336, "loss": 7.2955, "step": 102530 }, { "epoch": 12.339350180505415, "grad_norm": 391.2577209472656, "learning_rate": 0.00019322064217898652, "loss": 7.2652, "step": 102540 }, { "epoch": 12.340553549939832, "grad_norm": 557.8499755859375, "learning_rate": 0.0001932192652433864, "loss": 7.3406, "step": 102550 }, { "epoch": 12.341756919374248, "grad_norm": 406.92327880859375, "learning_rate": 0.000193217888172875, "loss": 7.2197, "step": 102560 }, { "epoch": 12.342960288808664, "grad_norm": 759.5283813476562, "learning_rate": 0.00019321651096745432, "loss": 7.3051, "step": 102570 }, { "epoch": 12.344163658243081, "grad_norm": 615.8949584960938, "learning_rate": 0.00019321513362712632, "loss": 7.3135, "step": 102580 }, { "epoch": 12.345367027677497, "grad_norm": 174.0048370361328, "learning_rate": 0.00019321375615189302, "loss": 7.3849, "step": 102590 }, { "epoch": 12.346570397111913, "grad_norm": 479.3607482910156, "learning_rate": 0.00019321237854175638, "loss": 7.1912, "step": 102600 }, { "epoch": 12.34777376654633, "grad_norm": 296.47662353515625, "learning_rate": 0.00019321100079671842, "loss": 7.2004, "step": 102610 }, { "epoch": 12.348977135980746, "grad_norm": 891.5411987304688, "learning_rate": 0.0001932096229167811, "loss": 7.2093, "step": 102620 }, { "epoch": 12.350180505415162, "grad_norm": 608.9300537109375, "learning_rate": 0.00019320824490194646, "loss": 7.2044, "step": 102630 }, { "epoch": 12.35138387484958, "grad_norm": 472.94873046875, "learning_rate": 0.00019320686675221651, "loss": 7.2476, "step": 102640 }, { "epoch": 12.352587244283995, "grad_norm": 593.6053466796875, "learning_rate": 0.0001932054884675932, "loss": 7.1844, "step": 102650 }, { "epoch": 12.353790613718411, "grad_norm": 395.6983642578125, "learning_rate": 0.0001932041100480785, "loss": 7.2608, "step": 102660 }, { "epoch": 12.354993983152827, "grad_norm": 480.7305603027344, "learning_rate": 0.00019320273149367448, "loss": 7.3589, "step": 102670 }, { "epoch": 12.356197352587245, "grad_norm": 636.570556640625, "learning_rate": 0.0001932013528043831, "loss": 7.297, "step": 102680 }, { "epoch": 12.35740072202166, "grad_norm": 383.88409423828125, "learning_rate": 0.00019319997398020632, "loss": 7.2024, "step": 102690 }, { "epoch": 12.358604091456076, "grad_norm": 790.86181640625, "learning_rate": 0.00019319859502114618, "loss": 7.2429, "step": 102700 }, { "epoch": 12.359807460890494, "grad_norm": 771.8855590820312, "learning_rate": 0.00019319721592720468, "loss": 7.2434, "step": 102710 }, { "epoch": 12.36101083032491, "grad_norm": 784.812744140625, "learning_rate": 0.0001931958366983838, "loss": 7.2969, "step": 102720 }, { "epoch": 12.362214199759325, "grad_norm": 528.169677734375, "learning_rate": 0.0001931944573346855, "loss": 7.17, "step": 102730 }, { "epoch": 12.363417569193743, "grad_norm": 669.7982788085938, "learning_rate": 0.00019319307783611183, "loss": 7.3552, "step": 102740 }, { "epoch": 12.364620938628159, "grad_norm": 1540.96484375, "learning_rate": 0.0001931916982026648, "loss": 7.2878, "step": 102750 }, { "epoch": 12.365824308062574, "grad_norm": 625.98974609375, "learning_rate": 0.0001931903184343463, "loss": 7.1922, "step": 102760 }, { "epoch": 12.367027677496992, "grad_norm": 4020.57275390625, "learning_rate": 0.00019318893853115845, "loss": 7.1787, "step": 102770 }, { "epoch": 12.368231046931408, "grad_norm": 863.9078979492188, "learning_rate": 0.00019318755849310318, "loss": 7.2502, "step": 102780 }, { "epoch": 12.369434416365824, "grad_norm": 522.5355224609375, "learning_rate": 0.00019318617832018254, "loss": 7.2442, "step": 102790 }, { "epoch": 12.370637785800241, "grad_norm": 1180.03515625, "learning_rate": 0.00019318479801239846, "loss": 7.206, "step": 102800 }, { "epoch": 12.371841155234657, "grad_norm": 842.2874145507812, "learning_rate": 0.000193183417569753, "loss": 7.2195, "step": 102810 }, { "epoch": 12.373044524669073, "grad_norm": 1174.523193359375, "learning_rate": 0.0001931820369922481, "loss": 7.2102, "step": 102820 }, { "epoch": 12.37424789410349, "grad_norm": 1294.15966796875, "learning_rate": 0.0001931806562798858, "loss": 7.2565, "step": 102830 }, { "epoch": 12.375451263537906, "grad_norm": 615.298828125, "learning_rate": 0.0001931792754326681, "loss": 7.2556, "step": 102840 }, { "epoch": 12.376654632972322, "grad_norm": 622.953125, "learning_rate": 0.00019317789445059695, "loss": 7.2275, "step": 102850 }, { "epoch": 12.37785800240674, "grad_norm": 941.1856079101562, "learning_rate": 0.0001931765133336744, "loss": 7.2496, "step": 102860 }, { "epoch": 12.379061371841155, "grad_norm": 527.05712890625, "learning_rate": 0.00019317513208190243, "loss": 7.2863, "step": 102870 }, { "epoch": 12.380264741275571, "grad_norm": 848.1106567382812, "learning_rate": 0.00019317375069528305, "loss": 7.3015, "step": 102880 }, { "epoch": 12.381468110709989, "grad_norm": 1050.827880859375, "learning_rate": 0.00019317236917381822, "loss": 7.3324, "step": 102890 }, { "epoch": 12.382671480144404, "grad_norm": 1029.7017822265625, "learning_rate": 0.00019317098751750998, "loss": 7.2734, "step": 102900 }, { "epoch": 12.38387484957882, "grad_norm": 531.5472412109375, "learning_rate": 0.00019316960572636033, "loss": 7.3096, "step": 102910 }, { "epoch": 12.385078219013238, "grad_norm": 997.7796630859375, "learning_rate": 0.00019316822380037124, "loss": 7.2518, "step": 102920 }, { "epoch": 12.386281588447654, "grad_norm": 685.7658081054688, "learning_rate": 0.00019316684173954475, "loss": 7.2804, "step": 102930 }, { "epoch": 12.38748495788207, "grad_norm": 733.6983642578125, "learning_rate": 0.00019316545954388284, "loss": 7.1888, "step": 102940 }, { "epoch": 12.388688327316487, "grad_norm": 650.7233276367188, "learning_rate": 0.0001931640772133875, "loss": 7.2076, "step": 102950 }, { "epoch": 12.389891696750903, "grad_norm": 778.4987182617188, "learning_rate": 0.00019316269474806074, "loss": 7.1991, "step": 102960 }, { "epoch": 12.391095066185319, "grad_norm": 1474.708740234375, "learning_rate": 0.00019316131214790452, "loss": 7.1736, "step": 102970 }, { "epoch": 12.392298435619736, "grad_norm": 842.0376586914062, "learning_rate": 0.00019315992941292093, "loss": 7.1433, "step": 102980 }, { "epoch": 12.393501805054152, "grad_norm": 567.975341796875, "learning_rate": 0.0001931585465431119, "loss": 7.2068, "step": 102990 }, { "epoch": 12.394705174488568, "grad_norm": 362.0715637207031, "learning_rate": 0.00019315716353847945, "loss": 7.2032, "step": 103000 }, { "epoch": 12.395908543922985, "grad_norm": 372.596923828125, "learning_rate": 0.0001931557803990256, "loss": 7.2915, "step": 103010 }, { "epoch": 12.397111913357401, "grad_norm": 757.5379638671875, "learning_rate": 0.00019315439712475234, "loss": 7.1542, "step": 103020 }, { "epoch": 12.398315282791817, "grad_norm": 518.6209716796875, "learning_rate": 0.00019315301371566163, "loss": 7.2141, "step": 103030 }, { "epoch": 12.399518652226233, "grad_norm": 845.45947265625, "learning_rate": 0.00019315163017175557, "loss": 7.2308, "step": 103040 }, { "epoch": 12.40072202166065, "grad_norm": 781.7288208007812, "learning_rate": 0.00019315024649303606, "loss": 7.2957, "step": 103050 }, { "epoch": 12.401925391095066, "grad_norm": 515.6735229492188, "learning_rate": 0.00019314886267950514, "loss": 7.2819, "step": 103060 }, { "epoch": 12.403128760529482, "grad_norm": 637.827392578125, "learning_rate": 0.00019314747873116486, "loss": 7.2471, "step": 103070 }, { "epoch": 12.4043321299639, "grad_norm": 947.3207397460938, "learning_rate": 0.00019314609464801715, "loss": 7.1896, "step": 103080 }, { "epoch": 12.405535499398315, "grad_norm": 634.7215576171875, "learning_rate": 0.00019314471043006405, "loss": 7.2115, "step": 103090 }, { "epoch": 12.406738868832731, "grad_norm": 304.4258117675781, "learning_rate": 0.00019314332607730755, "loss": 7.2377, "step": 103100 }, { "epoch": 12.407942238267148, "grad_norm": 465.19647216796875, "learning_rate": 0.00019314194158974966, "loss": 7.2257, "step": 103110 }, { "epoch": 12.409145607701564, "grad_norm": 611.6575317382812, "learning_rate": 0.00019314055696739238, "loss": 7.2662, "step": 103120 }, { "epoch": 12.41034897713598, "grad_norm": 615.4658813476562, "learning_rate": 0.00019313917221023776, "loss": 7.1679, "step": 103130 }, { "epoch": 12.411552346570398, "grad_norm": 315.9479675292969, "learning_rate": 0.0001931377873182877, "loss": 7.1305, "step": 103140 }, { "epoch": 12.412755716004813, "grad_norm": 859.2803955078125, "learning_rate": 0.00019313640229154432, "loss": 7.1801, "step": 103150 }, { "epoch": 12.41395908543923, "grad_norm": 311.7611389160156, "learning_rate": 0.00019313501713000956, "loss": 7.3273, "step": 103160 }, { "epoch": 12.415162454873647, "grad_norm": 421.8462829589844, "learning_rate": 0.00019313363183368542, "loss": 7.2462, "step": 103170 }, { "epoch": 12.416365824308063, "grad_norm": 573.8762817382812, "learning_rate": 0.00019313224640257393, "loss": 7.2063, "step": 103180 }, { "epoch": 12.417569193742478, "grad_norm": 419.0594482421875, "learning_rate": 0.00019313086083667708, "loss": 7.2863, "step": 103190 }, { "epoch": 12.418772563176896, "grad_norm": 545.3675537109375, "learning_rate": 0.00019312947513599687, "loss": 7.3011, "step": 103200 }, { "epoch": 12.419975932611312, "grad_norm": 540.0492553710938, "learning_rate": 0.00019312808930053535, "loss": 7.1821, "step": 103210 }, { "epoch": 12.421179302045728, "grad_norm": 625.4568481445312, "learning_rate": 0.00019312670333029446, "loss": 7.3009, "step": 103220 }, { "epoch": 12.422382671480145, "grad_norm": 500.1476135253906, "learning_rate": 0.00019312531722527626, "loss": 7.2518, "step": 103230 }, { "epoch": 12.42358604091456, "grad_norm": 470.7789001464844, "learning_rate": 0.0001931239309854827, "loss": 7.2698, "step": 103240 }, { "epoch": 12.424789410348977, "grad_norm": 505.5385437011719, "learning_rate": 0.00019312254461091585, "loss": 7.3202, "step": 103250 }, { "epoch": 12.425992779783394, "grad_norm": 788.8880004882812, "learning_rate": 0.00019312115810157768, "loss": 7.2149, "step": 103260 }, { "epoch": 12.42719614921781, "grad_norm": 1218.793212890625, "learning_rate": 0.00019311977145747023, "loss": 7.206, "step": 103270 }, { "epoch": 12.428399518652226, "grad_norm": 405.4271240234375, "learning_rate": 0.00019311838467859545, "loss": 7.241, "step": 103280 }, { "epoch": 12.429602888086643, "grad_norm": 570.1797485351562, "learning_rate": 0.0001931169977649554, "loss": 7.2073, "step": 103290 }, { "epoch": 12.43080625752106, "grad_norm": 1340.482421875, "learning_rate": 0.00019311561071655203, "loss": 7.2762, "step": 103300 }, { "epoch": 12.432009626955475, "grad_norm": 872.6629028320312, "learning_rate": 0.0001931142235333874, "loss": 7.1806, "step": 103310 }, { "epoch": 12.433212996389893, "grad_norm": 923.6234130859375, "learning_rate": 0.0001931128362154635, "loss": 7.2311, "step": 103320 }, { "epoch": 12.434416365824308, "grad_norm": 1062.56591796875, "learning_rate": 0.00019311144876278232, "loss": 7.255, "step": 103330 }, { "epoch": 12.435619735258724, "grad_norm": 936.6660766601562, "learning_rate": 0.0001931100611753459, "loss": 7.2829, "step": 103340 }, { "epoch": 12.43682310469314, "grad_norm": 2858.281982421875, "learning_rate": 0.00019310867345315625, "loss": 7.2651, "step": 103350 }, { "epoch": 12.438026474127557, "grad_norm": 916.9486083984375, "learning_rate": 0.00019310728559621533, "loss": 7.2414, "step": 103360 }, { "epoch": 12.439229843561973, "grad_norm": 588.5291137695312, "learning_rate": 0.0001931058976045252, "loss": 7.2024, "step": 103370 }, { "epoch": 12.440433212996389, "grad_norm": 2572.08203125, "learning_rate": 0.00019310450947808785, "loss": 7.2622, "step": 103380 }, { "epoch": 12.441636582430807, "grad_norm": 749.9818115234375, "learning_rate": 0.00019310312121690527, "loss": 7.2346, "step": 103390 }, { "epoch": 12.442839951865222, "grad_norm": 857.43359375, "learning_rate": 0.0001931017328209795, "loss": 7.2685, "step": 103400 }, { "epoch": 12.444043321299638, "grad_norm": 1062.347412109375, "learning_rate": 0.00019310034429031255, "loss": 7.1894, "step": 103410 }, { "epoch": 12.445246690734056, "grad_norm": 978.841064453125, "learning_rate": 0.0001930989556249064, "loss": 7.2423, "step": 103420 }, { "epoch": 12.446450060168472, "grad_norm": 3225.428466796875, "learning_rate": 0.00019309756682476308, "loss": 7.1619, "step": 103430 }, { "epoch": 12.447653429602887, "grad_norm": 729.1671752929688, "learning_rate": 0.0001930961778898846, "loss": 7.3156, "step": 103440 }, { "epoch": 12.448856799037305, "grad_norm": 2013.3564453125, "learning_rate": 0.00019309478882027295, "loss": 7.2963, "step": 103450 }, { "epoch": 12.45006016847172, "grad_norm": 1602.622802734375, "learning_rate": 0.0001930933996159302, "loss": 7.3292, "step": 103460 }, { "epoch": 12.451263537906136, "grad_norm": 1088.54345703125, "learning_rate": 0.00019309201027685826, "loss": 7.2578, "step": 103470 }, { "epoch": 12.452466907340554, "grad_norm": 1920.3013916015625, "learning_rate": 0.00019309062080305922, "loss": 7.2533, "step": 103480 }, { "epoch": 12.45367027677497, "grad_norm": 1126.5413818359375, "learning_rate": 0.00019308923119453505, "loss": 7.1836, "step": 103490 }, { "epoch": 12.454873646209386, "grad_norm": 1206.23974609375, "learning_rate": 0.00019308784145128781, "loss": 7.2413, "step": 103500 }, { "epoch": 12.456077015643803, "grad_norm": 611.6064453125, "learning_rate": 0.00019308645157331948, "loss": 7.278, "step": 103510 }, { "epoch": 12.457280385078219, "grad_norm": 1355.9659423828125, "learning_rate": 0.00019308506156063207, "loss": 7.2932, "step": 103520 }, { "epoch": 12.458483754512635, "grad_norm": 1553.713134765625, "learning_rate": 0.0001930836714132276, "loss": 7.2575, "step": 103530 }, { "epoch": 12.459687123947052, "grad_norm": 876.7474975585938, "learning_rate": 0.00019308228113110802, "loss": 7.3791, "step": 103540 }, { "epoch": 12.460890493381468, "grad_norm": 2083.907470703125, "learning_rate": 0.00019308089071427546, "loss": 7.1998, "step": 103550 }, { "epoch": 12.462093862815884, "grad_norm": 1367.4674072265625, "learning_rate": 0.00019307950016273184, "loss": 7.2563, "step": 103560 }, { "epoch": 12.463297232250302, "grad_norm": 2078.96923828125, "learning_rate": 0.00019307810947647922, "loss": 7.2632, "step": 103570 }, { "epoch": 12.464500601684717, "grad_norm": 1232.34814453125, "learning_rate": 0.00019307671865551957, "loss": 7.2679, "step": 103580 }, { "epoch": 12.465703971119133, "grad_norm": 737.5986328125, "learning_rate": 0.00019307532769985498, "loss": 7.2929, "step": 103590 }, { "epoch": 12.46690734055355, "grad_norm": 1198.1258544921875, "learning_rate": 0.00019307393660948735, "loss": 7.2022, "step": 103600 }, { "epoch": 12.468110709987966, "grad_norm": 1645.317626953125, "learning_rate": 0.0001930725453844188, "loss": 7.2737, "step": 103610 }, { "epoch": 12.469314079422382, "grad_norm": 2021.9385986328125, "learning_rate": 0.00019307115402465125, "loss": 7.2181, "step": 103620 }, { "epoch": 12.4705174488568, "grad_norm": 1294.0675048828125, "learning_rate": 0.0001930697625301868, "loss": 7.299, "step": 103630 }, { "epoch": 12.471720818291216, "grad_norm": 2241.862548828125, "learning_rate": 0.00019306837090102744, "loss": 7.2652, "step": 103640 }, { "epoch": 12.472924187725631, "grad_norm": 5705.3740234375, "learning_rate": 0.00019306697913717513, "loss": 7.2608, "step": 103650 }, { "epoch": 12.474127557160049, "grad_norm": 3235.27734375, "learning_rate": 0.00019306558723863196, "loss": 7.2397, "step": 103660 }, { "epoch": 12.475330926594465, "grad_norm": 7455.26025390625, "learning_rate": 0.00019306419520539987, "loss": 7.35, "step": 103670 }, { "epoch": 12.47653429602888, "grad_norm": 6502.79052734375, "learning_rate": 0.00019306280303748094, "loss": 7.2179, "step": 103680 }, { "epoch": 12.477737665463298, "grad_norm": 7488.34228515625, "learning_rate": 0.00019306141073487714, "loss": 7.2402, "step": 103690 }, { "epoch": 12.478941034897714, "grad_norm": 18683.474609375, "learning_rate": 0.00019306001829759052, "loss": 7.3177, "step": 103700 }, { "epoch": 12.48014440433213, "grad_norm": 20434.6953125, "learning_rate": 0.00019305862572562306, "loss": 7.2047, "step": 103710 }, { "epoch": 12.481347773766545, "grad_norm": 7194.01513671875, "learning_rate": 0.0001930572330189768, "loss": 7.2306, "step": 103720 }, { "epoch": 12.482551143200963, "grad_norm": 8411.98046875, "learning_rate": 0.00019305584017765374, "loss": 7.2336, "step": 103730 }, { "epoch": 12.483754512635379, "grad_norm": 3715.859375, "learning_rate": 0.00019305444720165593, "loss": 7.3076, "step": 103740 }, { "epoch": 12.484957882069795, "grad_norm": 4028.9384765625, "learning_rate": 0.00019305305409098535, "loss": 7.2069, "step": 103750 }, { "epoch": 12.486161251504212, "grad_norm": 5455.505859375, "learning_rate": 0.00019305166084564401, "loss": 7.2597, "step": 103760 }, { "epoch": 12.487364620938628, "grad_norm": 1815.1351318359375, "learning_rate": 0.00019305026746563395, "loss": 7.3252, "step": 103770 }, { "epoch": 12.488567990373044, "grad_norm": 8051.52294921875, "learning_rate": 0.00019304887395095717, "loss": 7.2213, "step": 103780 }, { "epoch": 12.489771359807461, "grad_norm": 7441.04150390625, "learning_rate": 0.00019304748030161574, "loss": 7.1658, "step": 103790 }, { "epoch": 12.490974729241877, "grad_norm": 1232.86962890625, "learning_rate": 0.0001930460865176116, "loss": 7.3066, "step": 103800 }, { "epoch": 12.492178098676293, "grad_norm": 7903.53271484375, "learning_rate": 0.00019304469259894683, "loss": 7.2759, "step": 103810 }, { "epoch": 12.49338146811071, "grad_norm": 2103.48046875, "learning_rate": 0.00019304329854562336, "loss": 7.3269, "step": 103820 }, { "epoch": 12.494584837545126, "grad_norm": 1905.847900390625, "learning_rate": 0.00019304190435764332, "loss": 7.4008, "step": 103830 }, { "epoch": 12.495788206979542, "grad_norm": 1053.1986083984375, "learning_rate": 0.00019304051003500865, "loss": 7.2427, "step": 103840 }, { "epoch": 12.49699157641396, "grad_norm": 1760.149658203125, "learning_rate": 0.00019303911557772135, "loss": 7.4051, "step": 103850 }, { "epoch": 12.498194945848375, "grad_norm": 1173.190185546875, "learning_rate": 0.00019303772098578357, "loss": 7.2239, "step": 103860 }, { "epoch": 12.499398315282791, "grad_norm": 3636.263916015625, "learning_rate": 0.00019303632625919718, "loss": 7.2492, "step": 103870 }, { "epoch": 12.500601684717209, "grad_norm": 1471.09033203125, "learning_rate": 0.00019303493139796426, "loss": 7.2483, "step": 103880 }, { "epoch": 12.501805054151625, "grad_norm": 1033.12451171875, "learning_rate": 0.00019303353640208682, "loss": 7.1177, "step": 103890 }, { "epoch": 12.50300842358604, "grad_norm": 1721.246826171875, "learning_rate": 0.0001930321412715669, "loss": 7.1704, "step": 103900 }, { "epoch": 12.504211793020458, "grad_norm": 907.1747436523438, "learning_rate": 0.00019303074600640648, "loss": 7.3458, "step": 103910 }, { "epoch": 12.505415162454874, "grad_norm": 1607.6934814453125, "learning_rate": 0.00019302935060660763, "loss": 7.2169, "step": 103920 }, { "epoch": 12.50661853188929, "grad_norm": 1713.159423828125, "learning_rate": 0.00019302795507217233, "loss": 7.2595, "step": 103930 }, { "epoch": 12.507821901323707, "grad_norm": 784.2911376953125, "learning_rate": 0.00019302655940310262, "loss": 7.158, "step": 103940 }, { "epoch": 12.509025270758123, "grad_norm": 1609.2664794921875, "learning_rate": 0.00019302516359940048, "loss": 7.287, "step": 103950 }, { "epoch": 12.510228640192539, "grad_norm": 936.5604248046875, "learning_rate": 0.000193023767661068, "loss": 7.1432, "step": 103960 }, { "epoch": 12.511432009626956, "grad_norm": 1297.2315673828125, "learning_rate": 0.00019302237158810715, "loss": 7.1673, "step": 103970 }, { "epoch": 12.512635379061372, "grad_norm": 1857.13427734375, "learning_rate": 0.00019302097538051994, "loss": 7.2295, "step": 103980 }, { "epoch": 12.513838748495788, "grad_norm": 1274.822998046875, "learning_rate": 0.00019301957903830843, "loss": 7.3262, "step": 103990 }, { "epoch": 12.515042117930205, "grad_norm": 1373.2264404296875, "learning_rate": 0.0001930181825614746, "loss": 7.2863, "step": 104000 }, { "epoch": 12.516245487364621, "grad_norm": 1836.18359375, "learning_rate": 0.00019301678595002052, "loss": 7.1068, "step": 104010 }, { "epoch": 12.517448856799037, "grad_norm": 5573.10791015625, "learning_rate": 0.00019301538920394823, "loss": 7.3332, "step": 104020 }, { "epoch": 12.518652226233453, "grad_norm": 2214.633544921875, "learning_rate": 0.00019301399232325963, "loss": 7.1898, "step": 104030 }, { "epoch": 12.51985559566787, "grad_norm": 846.73681640625, "learning_rate": 0.00019301259530795685, "loss": 7.2471, "step": 104040 }, { "epoch": 12.521058965102286, "grad_norm": 2576.935546875, "learning_rate": 0.00019301119815804188, "loss": 7.269, "step": 104050 }, { "epoch": 12.522262334536702, "grad_norm": 982.31884765625, "learning_rate": 0.00019300980087351674, "loss": 7.1956, "step": 104060 }, { "epoch": 12.52346570397112, "grad_norm": 1140.760498046875, "learning_rate": 0.00019300840345438345, "loss": 7.2854, "step": 104070 }, { "epoch": 12.524669073405535, "grad_norm": 2254.50390625, "learning_rate": 0.00019300700590064405, "loss": 7.2001, "step": 104080 }, { "epoch": 12.525872442839951, "grad_norm": 1678.689697265625, "learning_rate": 0.00019300560821230054, "loss": 7.2017, "step": 104090 }, { "epoch": 12.527075812274369, "grad_norm": 1556.8857421875, "learning_rate": 0.00019300421038935496, "loss": 7.2397, "step": 104100 }, { "epoch": 12.528279181708784, "grad_norm": 1105.6611328125, "learning_rate": 0.0001930028124318093, "loss": 7.1864, "step": 104110 }, { "epoch": 12.5294825511432, "grad_norm": 2138.776123046875, "learning_rate": 0.00019300141433966562, "loss": 7.3185, "step": 104120 }, { "epoch": 12.530685920577618, "grad_norm": 1353.1348876953125, "learning_rate": 0.00019300001611292596, "loss": 7.1513, "step": 104130 }, { "epoch": 12.531889290012034, "grad_norm": 757.1256103515625, "learning_rate": 0.00019299861775159228, "loss": 7.1477, "step": 104140 }, { "epoch": 12.53309265944645, "grad_norm": 2442.3095703125, "learning_rate": 0.00019299721925566666, "loss": 7.2617, "step": 104150 }, { "epoch": 12.534296028880867, "grad_norm": 1530.1597900390625, "learning_rate": 0.00019299582062515112, "loss": 7.3253, "step": 104160 }, { "epoch": 12.535499398315283, "grad_norm": 2273.302001953125, "learning_rate": 0.00019299442186004764, "loss": 7.1853, "step": 104170 }, { "epoch": 12.536702767749698, "grad_norm": 2154.60400390625, "learning_rate": 0.00019299302296035828, "loss": 7.2701, "step": 104180 }, { "epoch": 12.537906137184116, "grad_norm": 808.8679809570312, "learning_rate": 0.00019299162392608506, "loss": 7.2159, "step": 104190 }, { "epoch": 12.539109506618532, "grad_norm": 1620.1002197265625, "learning_rate": 0.00019299022475722998, "loss": 7.3142, "step": 104200 }, { "epoch": 12.540312876052948, "grad_norm": 898.46875, "learning_rate": 0.00019298882545379514, "loss": 7.2186, "step": 104210 }, { "epoch": 12.541516245487365, "grad_norm": 1303.6656494140625, "learning_rate": 0.00019298742601578245, "loss": 7.2974, "step": 104220 }, { "epoch": 12.542719614921781, "grad_norm": 852.11962890625, "learning_rate": 0.00019298602644319401, "loss": 7.2035, "step": 104230 }, { "epoch": 12.543922984356197, "grad_norm": 1093.3443603515625, "learning_rate": 0.00019298462673603185, "loss": 7.2162, "step": 104240 }, { "epoch": 12.545126353790614, "grad_norm": 2158.495849609375, "learning_rate": 0.00019298322689429798, "loss": 7.1457, "step": 104250 }, { "epoch": 12.54632972322503, "grad_norm": 1533.84423828125, "learning_rate": 0.00019298182691799442, "loss": 7.2129, "step": 104260 }, { "epoch": 12.547533092659446, "grad_norm": 1018.7616577148438, "learning_rate": 0.00019298042680712318, "loss": 7.1869, "step": 104270 }, { "epoch": 12.548736462093864, "grad_norm": 1718.45947265625, "learning_rate": 0.00019297902656168633, "loss": 7.2355, "step": 104280 }, { "epoch": 12.54993983152828, "grad_norm": 925.1983032226562, "learning_rate": 0.00019297762618168587, "loss": 7.2473, "step": 104290 }, { "epoch": 12.551143200962695, "grad_norm": 1960.1783447265625, "learning_rate": 0.00019297622566712383, "loss": 7.3286, "step": 104300 }, { "epoch": 12.552346570397113, "grad_norm": 1889.014404296875, "learning_rate": 0.0001929748250180022, "loss": 7.2456, "step": 104310 }, { "epoch": 12.553549939831528, "grad_norm": 1755.573974609375, "learning_rate": 0.0001929734242343231, "loss": 7.2467, "step": 104320 }, { "epoch": 12.554753309265944, "grad_norm": 1215.699951171875, "learning_rate": 0.00019297202331608845, "loss": 7.2599, "step": 104330 }, { "epoch": 12.555956678700362, "grad_norm": 1160.3714599609375, "learning_rate": 0.00019297062226330037, "loss": 7.101, "step": 104340 }, { "epoch": 12.557160048134778, "grad_norm": 1271.2740478515625, "learning_rate": 0.00019296922107596082, "loss": 7.2709, "step": 104350 }, { "epoch": 12.558363417569193, "grad_norm": 619.2705078125, "learning_rate": 0.00019296781975407189, "loss": 7.1826, "step": 104360 }, { "epoch": 12.559566787003611, "grad_norm": 739.95654296875, "learning_rate": 0.00019296641829763552, "loss": 7.1994, "step": 104370 }, { "epoch": 12.560770156438027, "grad_norm": 1045.4051513671875, "learning_rate": 0.00019296501670665382, "loss": 7.2761, "step": 104380 }, { "epoch": 12.561973525872443, "grad_norm": 870.4738159179688, "learning_rate": 0.00019296361498112878, "loss": 7.1822, "step": 104390 }, { "epoch": 12.56317689530686, "grad_norm": 959.38623046875, "learning_rate": 0.00019296221312106244, "loss": 7.2169, "step": 104400 }, { "epoch": 12.564380264741276, "grad_norm": 885.3505859375, "learning_rate": 0.00019296081112645682, "loss": 7.1756, "step": 104410 }, { "epoch": 12.565583634175692, "grad_norm": 988.3435668945312, "learning_rate": 0.00019295940899731397, "loss": 7.2232, "step": 104420 }, { "epoch": 12.566787003610107, "grad_norm": 777.8799438476562, "learning_rate": 0.00019295800673363588, "loss": 7.1958, "step": 104430 }, { "epoch": 12.567990373044525, "grad_norm": 521.8305053710938, "learning_rate": 0.00019295660433542465, "loss": 7.3149, "step": 104440 }, { "epoch": 12.56919374247894, "grad_norm": 587.714111328125, "learning_rate": 0.0001929552018026822, "loss": 7.2738, "step": 104450 }, { "epoch": 12.570397111913357, "grad_norm": 717.1371459960938, "learning_rate": 0.00019295379913541067, "loss": 7.2753, "step": 104460 }, { "epoch": 12.571600481347774, "grad_norm": 1174.6915283203125, "learning_rate": 0.00019295239633361204, "loss": 7.1544, "step": 104470 }, { "epoch": 12.57280385078219, "grad_norm": 1600.67236328125, "learning_rate": 0.00019295099339728834, "loss": 7.2503, "step": 104480 }, { "epoch": 12.574007220216606, "grad_norm": 1140.218994140625, "learning_rate": 0.0001929495903264416, "loss": 7.2818, "step": 104490 }, { "epoch": 12.575210589651023, "grad_norm": 426.3902282714844, "learning_rate": 0.00019294818712107384, "loss": 7.192, "step": 104500 }, { "epoch": 12.57641395908544, "grad_norm": 553.1784057617188, "learning_rate": 0.0001929467837811871, "loss": 7.1825, "step": 104510 }, { "epoch": 12.577617328519855, "grad_norm": 1348.63671875, "learning_rate": 0.00019294538030678344, "loss": 7.2816, "step": 104520 }, { "epoch": 12.578820697954272, "grad_norm": 550.9815063476562, "learning_rate": 0.00019294397669786486, "loss": 7.2044, "step": 104530 }, { "epoch": 12.580024067388688, "grad_norm": 659.5072021484375, "learning_rate": 0.00019294257295443342, "loss": 7.2081, "step": 104540 }, { "epoch": 12.581227436823104, "grad_norm": 1185.632080078125, "learning_rate": 0.00019294116907649106, "loss": 7.174, "step": 104550 }, { "epoch": 12.582430806257522, "grad_norm": 668.927734375, "learning_rate": 0.00019293976506403994, "loss": 7.1641, "step": 104560 }, { "epoch": 12.583634175691937, "grad_norm": 335.18792724609375, "learning_rate": 0.00019293836091708203, "loss": 7.1545, "step": 104570 }, { "epoch": 12.584837545126353, "grad_norm": 526.4817504882812, "learning_rate": 0.00019293695663561935, "loss": 7.1394, "step": 104580 }, { "epoch": 12.58604091456077, "grad_norm": 451.1551818847656, "learning_rate": 0.00019293555221965396, "loss": 7.1956, "step": 104590 }, { "epoch": 12.587244283995187, "grad_norm": 378.8392333984375, "learning_rate": 0.0001929341476691879, "loss": 7.2655, "step": 104600 }, { "epoch": 12.588447653429602, "grad_norm": 864.7603149414062, "learning_rate": 0.00019293274298422315, "loss": 7.2422, "step": 104610 }, { "epoch": 12.58965102286402, "grad_norm": 1140.2222900390625, "learning_rate": 0.0001929313381647618, "loss": 7.2524, "step": 104620 }, { "epoch": 12.590854392298436, "grad_norm": 321.66156005859375, "learning_rate": 0.00019292993321080583, "loss": 7.261, "step": 104630 }, { "epoch": 12.592057761732852, "grad_norm": 260.8164978027344, "learning_rate": 0.00019292852812235732, "loss": 7.2221, "step": 104640 }, { "epoch": 12.593261131167269, "grad_norm": 399.3897705078125, "learning_rate": 0.00019292712289941828, "loss": 7.2745, "step": 104650 }, { "epoch": 12.594464500601685, "grad_norm": 671.0757446289062, "learning_rate": 0.0001929257175419908, "loss": 7.2062, "step": 104660 }, { "epoch": 12.5956678700361, "grad_norm": 741.1159057617188, "learning_rate": 0.0001929243120500768, "loss": 7.2583, "step": 104670 }, { "epoch": 12.596871239470518, "grad_norm": 679.1481323242188, "learning_rate": 0.00019292290642367838, "loss": 7.1983, "step": 104680 }, { "epoch": 12.598074608904934, "grad_norm": 378.79522705078125, "learning_rate": 0.00019292150066279763, "loss": 7.3115, "step": 104690 }, { "epoch": 12.59927797833935, "grad_norm": 686.9435424804688, "learning_rate": 0.00019292009476743645, "loss": 7.1957, "step": 104700 }, { "epoch": 12.600481347773766, "grad_norm": 463.1709289550781, "learning_rate": 0.00019291868873759702, "loss": 7.2124, "step": 104710 }, { "epoch": 12.601684717208183, "grad_norm": 753.973876953125, "learning_rate": 0.00019291728257328128, "loss": 7.2216, "step": 104720 }, { "epoch": 12.602888086642599, "grad_norm": 511.06182861328125, "learning_rate": 0.00019291587627449127, "loss": 7.1613, "step": 104730 }, { "epoch": 12.604091456077015, "grad_norm": 201.756103515625, "learning_rate": 0.00019291446984122908, "loss": 7.1919, "step": 104740 }, { "epoch": 12.605294825511432, "grad_norm": 546.0944213867188, "learning_rate": 0.0001929130632734967, "loss": 7.2802, "step": 104750 }, { "epoch": 12.606498194945848, "grad_norm": 257.6075439453125, "learning_rate": 0.00019291165657129615, "loss": 7.2511, "step": 104760 }, { "epoch": 12.607701564380264, "grad_norm": 505.5705871582031, "learning_rate": 0.0001929102497346295, "loss": 7.2642, "step": 104770 }, { "epoch": 12.608904933814681, "grad_norm": 505.3547668457031, "learning_rate": 0.00019290884276349884, "loss": 7.1775, "step": 104780 }, { "epoch": 12.610108303249097, "grad_norm": 350.80865478515625, "learning_rate": 0.0001929074356579061, "loss": 7.2038, "step": 104790 }, { "epoch": 12.611311672683513, "grad_norm": 426.80780029296875, "learning_rate": 0.00019290602841785337, "loss": 7.2106, "step": 104800 }, { "epoch": 12.61251504211793, "grad_norm": 414.563232421875, "learning_rate": 0.00019290462104334268, "loss": 7.1431, "step": 104810 }, { "epoch": 12.613718411552346, "grad_norm": 480.8339538574219, "learning_rate": 0.00019290321353437605, "loss": 7.2138, "step": 104820 }, { "epoch": 12.614921780986762, "grad_norm": 470.5589599609375, "learning_rate": 0.00019290180589095557, "loss": 7.2506, "step": 104830 }, { "epoch": 12.61612515042118, "grad_norm": 571.3959350585938, "learning_rate": 0.0001929003981130832, "loss": 7.2327, "step": 104840 }, { "epoch": 12.617328519855596, "grad_norm": 1099.705810546875, "learning_rate": 0.00019289899020076104, "loss": 7.2126, "step": 104850 }, { "epoch": 12.618531889290011, "grad_norm": 587.909423828125, "learning_rate": 0.00019289758215399106, "loss": 7.233, "step": 104860 }, { "epoch": 12.619735258724429, "grad_norm": 929.6836547851562, "learning_rate": 0.00019289617397277539, "loss": 7.1646, "step": 104870 }, { "epoch": 12.620938628158845, "grad_norm": 789.2809448242188, "learning_rate": 0.00019289476565711604, "loss": 7.2197, "step": 104880 }, { "epoch": 12.62214199759326, "grad_norm": 526.9064331054688, "learning_rate": 0.00019289335720701497, "loss": 7.1982, "step": 104890 }, { "epoch": 12.623345367027678, "grad_norm": 775.0690307617188, "learning_rate": 0.00019289194862247432, "loss": 7.3218, "step": 104900 }, { "epoch": 12.624548736462094, "grad_norm": 640.3621215820312, "learning_rate": 0.0001928905399034961, "loss": 7.22, "step": 104910 }, { "epoch": 12.62575210589651, "grad_norm": 918.5899658203125, "learning_rate": 0.00019288913105008231, "loss": 7.2355, "step": 104920 }, { "epoch": 12.626955475330927, "grad_norm": 644.662109375, "learning_rate": 0.00019288772206223502, "loss": 7.1904, "step": 104930 }, { "epoch": 12.628158844765343, "grad_norm": 442.900146484375, "learning_rate": 0.00019288631293995625, "loss": 7.1846, "step": 104940 }, { "epoch": 12.629362214199759, "grad_norm": 376.1397399902344, "learning_rate": 0.00019288490368324804, "loss": 7.2564, "step": 104950 }, { "epoch": 12.630565583634176, "grad_norm": 332.4659118652344, "learning_rate": 0.00019288349429211248, "loss": 7.2008, "step": 104960 }, { "epoch": 12.631768953068592, "grad_norm": 511.4075622558594, "learning_rate": 0.00019288208476655157, "loss": 7.2184, "step": 104970 }, { "epoch": 12.632972322503008, "grad_norm": 344.4521484375, "learning_rate": 0.0001928806751065673, "loss": 7.1591, "step": 104980 }, { "epoch": 12.634175691937426, "grad_norm": 397.17681884765625, "learning_rate": 0.0001928792653121618, "loss": 7.2057, "step": 104990 }, { "epoch": 12.635379061371841, "grad_norm": 436.052490234375, "learning_rate": 0.00019287785538333707, "loss": 7.1754, "step": 105000 }, { "epoch": 12.636582430806257, "grad_norm": 525.3685913085938, "learning_rate": 0.00019287644532009517, "loss": 7.1748, "step": 105010 }, { "epoch": 12.637785800240675, "grad_norm": 969.0641479492188, "learning_rate": 0.00019287503512243807, "loss": 7.231, "step": 105020 }, { "epoch": 12.63898916967509, "grad_norm": 754.4326782226562, "learning_rate": 0.00019287362479036792, "loss": 7.1445, "step": 105030 }, { "epoch": 12.640192539109506, "grad_norm": 291.4302978515625, "learning_rate": 0.00019287221432388667, "loss": 7.1647, "step": 105040 }, { "epoch": 12.641395908543924, "grad_norm": 686.8651733398438, "learning_rate": 0.00019287080372299643, "loss": 7.3095, "step": 105050 }, { "epoch": 12.64259927797834, "grad_norm": 501.2530212402344, "learning_rate": 0.00019286939298769919, "loss": 7.2156, "step": 105060 }, { "epoch": 12.643802647412755, "grad_norm": 726.4334716796875, "learning_rate": 0.000192867982117997, "loss": 7.1564, "step": 105070 }, { "epoch": 12.645006016847173, "grad_norm": 427.7022705078125, "learning_rate": 0.00019286657111389192, "loss": 7.1946, "step": 105080 }, { "epoch": 12.646209386281589, "grad_norm": 403.608154296875, "learning_rate": 0.00019286515997538598, "loss": 7.176, "step": 105090 }, { "epoch": 12.647412755716005, "grad_norm": 723.849853515625, "learning_rate": 0.00019286374870248124, "loss": 7.1783, "step": 105100 }, { "epoch": 12.648616125150422, "grad_norm": 1141.3013916015625, "learning_rate": 0.0001928623372951797, "loss": 7.2682, "step": 105110 }, { "epoch": 12.649819494584838, "grad_norm": 1122.19580078125, "learning_rate": 0.00019286092575348344, "loss": 7.2814, "step": 105120 }, { "epoch": 12.651022864019254, "grad_norm": 1010.5200805664062, "learning_rate": 0.0001928595140773945, "loss": 7.3321, "step": 105130 }, { "epoch": 12.65222623345367, "grad_norm": 524.7896118164062, "learning_rate": 0.00019285810226691492, "loss": 7.3271, "step": 105140 }, { "epoch": 12.653429602888087, "grad_norm": 1147.3658447265625, "learning_rate": 0.00019285669032204674, "loss": 7.2735, "step": 105150 }, { "epoch": 12.654632972322503, "grad_norm": 14950.4248046875, "learning_rate": 0.000192855278242792, "loss": 7.2107, "step": 105160 }, { "epoch": 12.655836341756919, "grad_norm": 1618.273681640625, "learning_rate": 0.00019285386602915273, "loss": 7.2349, "step": 105170 }, { "epoch": 12.657039711191336, "grad_norm": 1051.3424072265625, "learning_rate": 0.00019285245368113102, "loss": 7.1108, "step": 105180 }, { "epoch": 12.658243080625752, "grad_norm": 1006.135009765625, "learning_rate": 0.0001928510411987289, "loss": 7.1417, "step": 105190 }, { "epoch": 12.659446450060168, "grad_norm": 886.9829711914062, "learning_rate": 0.00019284962858194838, "loss": 7.2302, "step": 105200 }, { "epoch": 12.660649819494585, "grad_norm": 1091.4029541015625, "learning_rate": 0.00019284821583079152, "loss": 7.2558, "step": 105210 }, { "epoch": 12.661853188929001, "grad_norm": 1300.4217529296875, "learning_rate": 0.00019284680294526035, "loss": 7.2654, "step": 105220 }, { "epoch": 12.663056558363417, "grad_norm": 915.7379150390625, "learning_rate": 0.00019284538992535695, "loss": 7.1368, "step": 105230 }, { "epoch": 12.664259927797834, "grad_norm": 637.56396484375, "learning_rate": 0.00019284397677108334, "loss": 7.1809, "step": 105240 }, { "epoch": 12.66546329723225, "grad_norm": 1624.16455078125, "learning_rate": 0.0001928425634824416, "loss": 7.245, "step": 105250 }, { "epoch": 12.666666666666666, "grad_norm": 1183.37451171875, "learning_rate": 0.0001928411500594337, "loss": 7.2306, "step": 105260 }, { "epoch": 12.667870036101084, "grad_norm": 1145.4989013671875, "learning_rate": 0.00019283973650206177, "loss": 7.2561, "step": 105270 }, { "epoch": 12.6690734055355, "grad_norm": 1351.666748046875, "learning_rate": 0.0001928383228103278, "loss": 7.2238, "step": 105280 }, { "epoch": 12.670276774969915, "grad_norm": 717.142822265625, "learning_rate": 0.00019283690898423388, "loss": 7.2542, "step": 105290 }, { "epoch": 12.671480144404333, "grad_norm": 2237.205322265625, "learning_rate": 0.000192835495023782, "loss": 7.2325, "step": 105300 }, { "epoch": 12.672683513838749, "grad_norm": 1041.7303466796875, "learning_rate": 0.0001928340809289743, "loss": 7.2826, "step": 105310 }, { "epoch": 12.673886883273164, "grad_norm": 1921.87646484375, "learning_rate": 0.0001928326666998127, "loss": 7.2943, "step": 105320 }, { "epoch": 12.675090252707582, "grad_norm": 1226.3826904296875, "learning_rate": 0.00019283125233629934, "loss": 7.1878, "step": 105330 }, { "epoch": 12.676293622141998, "grad_norm": 1008.4230346679688, "learning_rate": 0.00019282983783843624, "loss": 7.2731, "step": 105340 }, { "epoch": 12.677496991576414, "grad_norm": 2094.8876953125, "learning_rate": 0.00019282842320622544, "loss": 7.3077, "step": 105350 }, { "epoch": 12.678700361010831, "grad_norm": 1533.685302734375, "learning_rate": 0.00019282700843966898, "loss": 7.3553, "step": 105360 }, { "epoch": 12.679903730445247, "grad_norm": 2363.348388671875, "learning_rate": 0.00019282559353876895, "loss": 7.3194, "step": 105370 }, { "epoch": 12.681107099879663, "grad_norm": 1206.56005859375, "learning_rate": 0.00019282417850352733, "loss": 7.2537, "step": 105380 }, { "epoch": 12.68231046931408, "grad_norm": 1111.983642578125, "learning_rate": 0.00019282276333394625, "loss": 7.2669, "step": 105390 }, { "epoch": 12.683513838748496, "grad_norm": 1521.7628173828125, "learning_rate": 0.0001928213480300277, "loss": 7.2765, "step": 105400 }, { "epoch": 12.684717208182912, "grad_norm": 1030.261962890625, "learning_rate": 0.00019281993259177371, "loss": 7.2829, "step": 105410 }, { "epoch": 12.685920577617328, "grad_norm": 2103.438720703125, "learning_rate": 0.0001928185170191864, "loss": 7.1891, "step": 105420 }, { "epoch": 12.687123947051745, "grad_norm": 1855.0755615234375, "learning_rate": 0.00019281710131226776, "loss": 7.2707, "step": 105430 }, { "epoch": 12.688327316486161, "grad_norm": 1581.441162109375, "learning_rate": 0.00019281568547101986, "loss": 7.2825, "step": 105440 }, { "epoch": 12.689530685920577, "grad_norm": 1499.044921875, "learning_rate": 0.00019281426949544473, "loss": 7.2123, "step": 105450 }, { "epoch": 12.690734055354994, "grad_norm": 2257.961181640625, "learning_rate": 0.0001928128533855445, "loss": 7.2567, "step": 105460 }, { "epoch": 12.69193742478941, "grad_norm": 1788.625244140625, "learning_rate": 0.0001928114371413211, "loss": 7.2857, "step": 105470 }, { "epoch": 12.693140794223826, "grad_norm": 1475.48486328125, "learning_rate": 0.00019281002076277664, "loss": 7.0721, "step": 105480 }, { "epoch": 12.694344163658243, "grad_norm": 1593.0787353515625, "learning_rate": 0.0001928086042499132, "loss": 7.3759, "step": 105490 }, { "epoch": 12.69554753309266, "grad_norm": 1411.5899658203125, "learning_rate": 0.00019280718760273275, "loss": 7.2307, "step": 105500 }, { "epoch": 12.696750902527075, "grad_norm": 1971.3603515625, "learning_rate": 0.0001928057708212374, "loss": 7.3171, "step": 105510 }, { "epoch": 12.697954271961493, "grad_norm": 1886.3671875, "learning_rate": 0.00019280435390542922, "loss": 7.29, "step": 105520 }, { "epoch": 12.699157641395908, "grad_norm": 1795.4234619140625, "learning_rate": 0.00019280293685531018, "loss": 7.3123, "step": 105530 }, { "epoch": 12.700361010830324, "grad_norm": 2612.332763671875, "learning_rate": 0.0001928015196708824, "loss": 7.3679, "step": 105540 }, { "epoch": 12.701564380264742, "grad_norm": 2954.5986328125, "learning_rate": 0.0001928001023521479, "loss": 7.2931, "step": 105550 }, { "epoch": 12.702767749699158, "grad_norm": 1535.2064208984375, "learning_rate": 0.00019279868489910877, "loss": 7.3075, "step": 105560 }, { "epoch": 12.703971119133573, "grad_norm": 2086.252197265625, "learning_rate": 0.00019279726731176702, "loss": 7.2187, "step": 105570 }, { "epoch": 12.705174488567991, "grad_norm": 2196.475830078125, "learning_rate": 0.0001927958495901247, "loss": 7.3416, "step": 105580 }, { "epoch": 12.706377858002407, "grad_norm": 1500.63232421875, "learning_rate": 0.00019279443173418387, "loss": 7.2577, "step": 105590 }, { "epoch": 12.707581227436823, "grad_norm": 2814.72607421875, "learning_rate": 0.0001927930137439466, "loss": 7.3348, "step": 105600 }, { "epoch": 12.70878459687124, "grad_norm": 1926.3851318359375, "learning_rate": 0.00019279159561941495, "loss": 7.3492, "step": 105610 }, { "epoch": 12.709987966305656, "grad_norm": 2156.497314453125, "learning_rate": 0.0001927901773605909, "loss": 7.2432, "step": 105620 }, { "epoch": 12.711191335740072, "grad_norm": 3704.6865234375, "learning_rate": 0.0001927887589674766, "loss": 7.1535, "step": 105630 }, { "epoch": 12.71239470517449, "grad_norm": 1651.7120361328125, "learning_rate": 0.00019278734044007404, "loss": 7.2742, "step": 105640 }, { "epoch": 12.713598074608905, "grad_norm": 3680.71875, "learning_rate": 0.00019278592177838526, "loss": 7.2761, "step": 105650 }, { "epoch": 12.71480144404332, "grad_norm": 2541.768310546875, "learning_rate": 0.00019278450298241238, "loss": 7.2813, "step": 105660 }, { "epoch": 12.716004813477738, "grad_norm": 2693.757080078125, "learning_rate": 0.0001927830840521574, "loss": 7.2498, "step": 105670 }, { "epoch": 12.717208182912154, "grad_norm": 1763.8775634765625, "learning_rate": 0.0001927816649876224, "loss": 7.2485, "step": 105680 }, { "epoch": 12.71841155234657, "grad_norm": 1683.4327392578125, "learning_rate": 0.00019278024578880945, "loss": 7.1773, "step": 105690 }, { "epoch": 12.719614921780988, "grad_norm": 2728.01611328125, "learning_rate": 0.00019277882645572054, "loss": 7.3171, "step": 105700 }, { "epoch": 12.720818291215403, "grad_norm": 1501.5855712890625, "learning_rate": 0.00019277740698835774, "loss": 7.2085, "step": 105710 }, { "epoch": 12.722021660649819, "grad_norm": 2660.85302734375, "learning_rate": 0.0001927759873867232, "loss": 7.3015, "step": 105720 }, { "epoch": 12.723225030084237, "grad_norm": 1528.508544921875, "learning_rate": 0.00019277456765081884, "loss": 7.3208, "step": 105730 }, { "epoch": 12.724428399518652, "grad_norm": 1237.6644287109375, "learning_rate": 0.0001927731477806468, "loss": 7.2579, "step": 105740 }, { "epoch": 12.725631768953068, "grad_norm": 1447.3583984375, "learning_rate": 0.00019277172777620912, "loss": 7.2394, "step": 105750 }, { "epoch": 12.726835138387486, "grad_norm": 1225.0308837890625, "learning_rate": 0.00019277030763750782, "loss": 7.2694, "step": 105760 }, { "epoch": 12.728038507821902, "grad_norm": 2391.46533203125, "learning_rate": 0.00019276888736454497, "loss": 7.2474, "step": 105770 }, { "epoch": 12.729241877256317, "grad_norm": 1786.611328125, "learning_rate": 0.00019276746695732268, "loss": 7.1899, "step": 105780 }, { "epoch": 12.730445246690735, "grad_norm": 1737.0618896484375, "learning_rate": 0.00019276604641584293, "loss": 7.2423, "step": 105790 }, { "epoch": 12.73164861612515, "grad_norm": 4655.181640625, "learning_rate": 0.00019276462574010781, "loss": 7.2267, "step": 105800 }, { "epoch": 12.732851985559567, "grad_norm": 1269.7513427734375, "learning_rate": 0.00019276320493011938, "loss": 7.2454, "step": 105810 }, { "epoch": 12.734055354993982, "grad_norm": 1343.595458984375, "learning_rate": 0.0001927617839858797, "loss": 7.248, "step": 105820 }, { "epoch": 12.7352587244284, "grad_norm": 1270.7225341796875, "learning_rate": 0.00019276036290739082, "loss": 7.2976, "step": 105830 }, { "epoch": 12.736462093862816, "grad_norm": 1309.854736328125, "learning_rate": 0.00019275894169465475, "loss": 7.1258, "step": 105840 }, { "epoch": 12.737665463297231, "grad_norm": 1100.3641357421875, "learning_rate": 0.00019275752034767364, "loss": 7.2197, "step": 105850 }, { "epoch": 12.738868832731649, "grad_norm": 753.73974609375, "learning_rate": 0.00019275609886644946, "loss": 7.1745, "step": 105860 }, { "epoch": 12.740072202166065, "grad_norm": 741.9046020507812, "learning_rate": 0.00019275467725098433, "loss": 7.2588, "step": 105870 }, { "epoch": 12.74127557160048, "grad_norm": 1046.6353759765625, "learning_rate": 0.00019275325550128028, "loss": 7.2604, "step": 105880 }, { "epoch": 12.742478941034898, "grad_norm": 1046.2142333984375, "learning_rate": 0.00019275183361733934, "loss": 7.3747, "step": 105890 }, { "epoch": 12.743682310469314, "grad_norm": 1912.5325927734375, "learning_rate": 0.00019275041159916363, "loss": 7.2507, "step": 105900 }, { "epoch": 12.74488567990373, "grad_norm": 1162.363037109375, "learning_rate": 0.00019274898944675515, "loss": 7.2649, "step": 105910 }, { "epoch": 12.746089049338147, "grad_norm": 1675.916015625, "learning_rate": 0.000192747567160116, "loss": 7.3377, "step": 105920 }, { "epoch": 12.747292418772563, "grad_norm": 1460.8504638671875, "learning_rate": 0.00019274614473924822, "loss": 7.1886, "step": 105930 }, { "epoch": 12.748495788206979, "grad_norm": 566.094482421875, "learning_rate": 0.00019274472218415385, "loss": 7.2528, "step": 105940 }, { "epoch": 12.749699157641396, "grad_norm": 681.6715087890625, "learning_rate": 0.00019274329949483501, "loss": 7.284, "step": 105950 }, { "epoch": 12.750902527075812, "grad_norm": 891.517822265625, "learning_rate": 0.00019274187667129368, "loss": 7.3392, "step": 105960 }, { "epoch": 12.752105896510228, "grad_norm": 1218.25390625, "learning_rate": 0.00019274045371353197, "loss": 7.1866, "step": 105970 }, { "epoch": 12.753309265944646, "grad_norm": 1238.319580078125, "learning_rate": 0.0001927390306215519, "loss": 7.3285, "step": 105980 }, { "epoch": 12.754512635379061, "grad_norm": 529.3389282226562, "learning_rate": 0.00019273760739535562, "loss": 7.2722, "step": 105990 }, { "epoch": 12.755716004813477, "grad_norm": 1807.80615234375, "learning_rate": 0.0001927361840349451, "loss": 7.3601, "step": 106000 }, { "epoch": 12.756919374247895, "grad_norm": 506.76751708984375, "learning_rate": 0.0001927347605403224, "loss": 7.2907, "step": 106010 }, { "epoch": 12.75812274368231, "grad_norm": 1444.1904296875, "learning_rate": 0.00019273333691148965, "loss": 7.2053, "step": 106020 }, { "epoch": 12.759326113116726, "grad_norm": 771.5220947265625, "learning_rate": 0.0001927319131484488, "loss": 7.2297, "step": 106030 }, { "epoch": 12.760529482551144, "grad_norm": 797.7838134765625, "learning_rate": 0.00019273048925120203, "loss": 7.2448, "step": 106040 }, { "epoch": 12.76173285198556, "grad_norm": 795.5725708007812, "learning_rate": 0.00019272906521975133, "loss": 7.1719, "step": 106050 }, { "epoch": 12.762936221419976, "grad_norm": 946.47216796875, "learning_rate": 0.0001927276410540988, "loss": 7.2363, "step": 106060 }, { "epoch": 12.764139590854393, "grad_norm": 1349.8101806640625, "learning_rate": 0.00019272621675424647, "loss": 7.2639, "step": 106070 }, { "epoch": 12.765342960288809, "grad_norm": 911.401611328125, "learning_rate": 0.00019272479232019637, "loss": 7.2361, "step": 106080 }, { "epoch": 12.766546329723225, "grad_norm": 820.5132446289062, "learning_rate": 0.00019272336775195067, "loss": 7.2526, "step": 106090 }, { "epoch": 12.76774969915764, "grad_norm": 1301.506591796875, "learning_rate": 0.0001927219430495113, "loss": 7.2908, "step": 106100 }, { "epoch": 12.768953068592058, "grad_norm": 391.9560546875, "learning_rate": 0.00019272051821288044, "loss": 7.131, "step": 106110 }, { "epoch": 12.770156438026474, "grad_norm": 898.4758911132812, "learning_rate": 0.00019271909324206006, "loss": 7.2503, "step": 106120 }, { "epoch": 12.77135980746089, "grad_norm": 616.5748901367188, "learning_rate": 0.00019271766813705227, "loss": 7.1981, "step": 106130 }, { "epoch": 12.772563176895307, "grad_norm": 451.3939208984375, "learning_rate": 0.00019271624289785914, "loss": 7.2903, "step": 106140 }, { "epoch": 12.773766546329723, "grad_norm": 863.1996459960938, "learning_rate": 0.0001927148175244827, "loss": 7.1782, "step": 106150 }, { "epoch": 12.774969915764139, "grad_norm": 399.4698181152344, "learning_rate": 0.000192713392016925, "loss": 7.2379, "step": 106160 }, { "epoch": 12.776173285198556, "grad_norm": 548.5631713867188, "learning_rate": 0.00019271196637518818, "loss": 7.2288, "step": 106170 }, { "epoch": 12.777376654632972, "grad_norm": 600.9078979492188, "learning_rate": 0.00019271054059927424, "loss": 7.139, "step": 106180 }, { "epoch": 12.778580024067388, "grad_norm": 303.4968566894531, "learning_rate": 0.00019270911468918526, "loss": 7.279, "step": 106190 }, { "epoch": 12.779783393501805, "grad_norm": 1112.216796875, "learning_rate": 0.0001927076886449233, "loss": 7.2589, "step": 106200 }, { "epoch": 12.780986762936221, "grad_norm": 928.738037109375, "learning_rate": 0.00019270626246649042, "loss": 7.2635, "step": 106210 }, { "epoch": 12.782190132370637, "grad_norm": 775.9215698242188, "learning_rate": 0.0001927048361538887, "loss": 7.1795, "step": 106220 }, { "epoch": 12.783393501805055, "grad_norm": 621.3668823242188, "learning_rate": 0.0001927034097071202, "loss": 7.271, "step": 106230 }, { "epoch": 12.78459687123947, "grad_norm": 463.00164794921875, "learning_rate": 0.00019270198312618697, "loss": 7.236, "step": 106240 }, { "epoch": 12.785800240673886, "grad_norm": 1090.231689453125, "learning_rate": 0.00019270055641109108, "loss": 7.3159, "step": 106250 }, { "epoch": 12.787003610108304, "grad_norm": 494.4300537109375, "learning_rate": 0.00019269912956183457, "loss": 7.2348, "step": 106260 }, { "epoch": 12.78820697954272, "grad_norm": 773.3367919921875, "learning_rate": 0.00019269770257841957, "loss": 7.3009, "step": 106270 }, { "epoch": 12.789410348977135, "grad_norm": 508.0045166015625, "learning_rate": 0.0001926962754608481, "loss": 7.259, "step": 106280 }, { "epoch": 12.790613718411553, "grad_norm": 497.3860778808594, "learning_rate": 0.00019269484820912225, "loss": 7.2769, "step": 106290 }, { "epoch": 12.791817087845969, "grad_norm": 914.2775268554688, "learning_rate": 0.00019269342082324403, "loss": 7.2492, "step": 106300 }, { "epoch": 12.793020457280385, "grad_norm": 297.7761535644531, "learning_rate": 0.00019269199330321557, "loss": 7.1343, "step": 106310 }, { "epoch": 12.794223826714802, "grad_norm": 1316.914794921875, "learning_rate": 0.0001926905656490389, "loss": 7.1067, "step": 106320 }, { "epoch": 12.795427196149218, "grad_norm": 536.9522705078125, "learning_rate": 0.00019268913786071613, "loss": 7.2784, "step": 106330 }, { "epoch": 12.796630565583634, "grad_norm": 504.66485595703125, "learning_rate": 0.00019268770993824923, "loss": 7.3043, "step": 106340 }, { "epoch": 12.797833935018051, "grad_norm": 834.7509155273438, "learning_rate": 0.00019268628188164036, "loss": 7.2565, "step": 106350 }, { "epoch": 12.799037304452467, "grad_norm": 791.4818115234375, "learning_rate": 0.00019268485369089156, "loss": 7.2705, "step": 106360 }, { "epoch": 12.800240673886883, "grad_norm": 1112.0408935546875, "learning_rate": 0.00019268342536600492, "loss": 7.2604, "step": 106370 }, { "epoch": 12.8014440433213, "grad_norm": 687.0901489257812, "learning_rate": 0.00019268199690698244, "loss": 7.3188, "step": 106380 }, { "epoch": 12.802647412755716, "grad_norm": 517.177734375, "learning_rate": 0.00019268056831382626, "loss": 7.2657, "step": 106390 }, { "epoch": 12.803850782190132, "grad_norm": 1602.3511962890625, "learning_rate": 0.0001926791395865384, "loss": 7.1756, "step": 106400 }, { "epoch": 12.80505415162455, "grad_norm": 652.8690185546875, "learning_rate": 0.00019267771072512094, "loss": 7.3113, "step": 106410 }, { "epoch": 12.806257521058965, "grad_norm": 1281.319580078125, "learning_rate": 0.00019267628172957597, "loss": 7.2228, "step": 106420 }, { "epoch": 12.807460890493381, "grad_norm": 848.3667602539062, "learning_rate": 0.0001926748525999055, "loss": 7.3012, "step": 106430 }, { "epoch": 12.808664259927799, "grad_norm": 713.812255859375, "learning_rate": 0.00019267342333611167, "loss": 7.202, "step": 106440 }, { "epoch": 12.809867629362214, "grad_norm": 896.2886352539062, "learning_rate": 0.0001926719939381965, "loss": 7.27, "step": 106450 }, { "epoch": 12.81107099879663, "grad_norm": 929.7326049804688, "learning_rate": 0.0001926705644061621, "loss": 7.171, "step": 106460 }, { "epoch": 12.812274368231048, "grad_norm": 1868.1378173828125, "learning_rate": 0.00019266913474001048, "loss": 7.2998, "step": 106470 }, { "epoch": 12.813477737665464, "grad_norm": 1212.1624755859375, "learning_rate": 0.00019266770493974378, "loss": 7.122, "step": 106480 }, { "epoch": 12.81468110709988, "grad_norm": 1813.029541015625, "learning_rate": 0.000192666275005364, "loss": 7.2796, "step": 106490 }, { "epoch": 12.815884476534297, "grad_norm": 1303.044677734375, "learning_rate": 0.00019266484493687326, "loss": 7.2759, "step": 106500 }, { "epoch": 12.817087845968713, "grad_norm": 732.6553344726562, "learning_rate": 0.00019266341473427357, "loss": 7.2141, "step": 106510 }, { "epoch": 12.818291215403129, "grad_norm": 2084.2119140625, "learning_rate": 0.0001926619843975671, "loss": 7.4055, "step": 106520 }, { "epoch": 12.819494584837544, "grad_norm": 1265.0379638671875, "learning_rate": 0.00019266055392675583, "loss": 7.292, "step": 106530 }, { "epoch": 12.820697954271962, "grad_norm": 743.56884765625, "learning_rate": 0.00019265912332184186, "loss": 7.3046, "step": 106540 }, { "epoch": 12.821901323706378, "grad_norm": 1051.9405517578125, "learning_rate": 0.0001926576925828273, "loss": 7.2897, "step": 106550 }, { "epoch": 12.823104693140793, "grad_norm": 870.7982177734375, "learning_rate": 0.00019265626170971413, "loss": 7.2685, "step": 106560 }, { "epoch": 12.824308062575211, "grad_norm": 930.4187622070312, "learning_rate": 0.0001926548307025045, "loss": 7.2323, "step": 106570 }, { "epoch": 12.825511432009627, "grad_norm": 598.90869140625, "learning_rate": 0.00019265339956120046, "loss": 7.1915, "step": 106580 }, { "epoch": 12.826714801444043, "grad_norm": 783.8077392578125, "learning_rate": 0.00019265196828580406, "loss": 7.3256, "step": 106590 }, { "epoch": 12.82791817087846, "grad_norm": 1008.6307983398438, "learning_rate": 0.0001926505368763174, "loss": 7.3393, "step": 106600 }, { "epoch": 12.829121540312876, "grad_norm": 788.7634887695312, "learning_rate": 0.0001926491053327425, "loss": 7.2716, "step": 106610 }, { "epoch": 12.830324909747292, "grad_norm": 885.7045288085938, "learning_rate": 0.0001926476736550815, "loss": 7.252, "step": 106620 }, { "epoch": 12.83152827918171, "grad_norm": 793.7576904296875, "learning_rate": 0.00019264624184333647, "loss": 7.2465, "step": 106630 }, { "epoch": 12.832731648616125, "grad_norm": 1067.9049072265625, "learning_rate": 0.00019264480989750942, "loss": 7.2384, "step": 106640 }, { "epoch": 12.833935018050541, "grad_norm": 546.8511352539062, "learning_rate": 0.00019264337781760245, "loss": 7.187, "step": 106650 }, { "epoch": 12.835138387484958, "grad_norm": 746.3120727539062, "learning_rate": 0.00019264194560361767, "loss": 7.2499, "step": 106660 }, { "epoch": 12.836341756919374, "grad_norm": 852.8392944335938, "learning_rate": 0.00019264051325555709, "loss": 7.2268, "step": 106670 }, { "epoch": 12.83754512635379, "grad_norm": 671.7431640625, "learning_rate": 0.00019263908077342285, "loss": 7.3018, "step": 106680 }, { "epoch": 12.838748495788208, "grad_norm": 670.9478149414062, "learning_rate": 0.00019263764815721698, "loss": 7.1596, "step": 106690 }, { "epoch": 12.839951865222623, "grad_norm": 919.4144897460938, "learning_rate": 0.00019263621540694151, "loss": 7.2458, "step": 106700 }, { "epoch": 12.84115523465704, "grad_norm": 1038.4052734375, "learning_rate": 0.0001926347825225986, "loss": 7.19, "step": 106710 }, { "epoch": 12.842358604091457, "grad_norm": 798.1454467773438, "learning_rate": 0.00019263334950419032, "loss": 7.2504, "step": 106720 }, { "epoch": 12.843561973525873, "grad_norm": 849.4981079101562, "learning_rate": 0.0001926319163517187, "loss": 7.2519, "step": 106730 }, { "epoch": 12.844765342960288, "grad_norm": 380.7121887207031, "learning_rate": 0.0001926304830651858, "loss": 7.2666, "step": 106740 }, { "epoch": 12.845968712394706, "grad_norm": 838.4188842773438, "learning_rate": 0.0001926290496445937, "loss": 7.248, "step": 106750 }, { "epoch": 12.847172081829122, "grad_norm": 1152.8275146484375, "learning_rate": 0.00019262761608994456, "loss": 7.2972, "step": 106760 }, { "epoch": 12.848375451263538, "grad_norm": 1013.2494506835938, "learning_rate": 0.00019262618240124035, "loss": 7.271, "step": 106770 }, { "epoch": 12.849578820697955, "grad_norm": 621.736083984375, "learning_rate": 0.0001926247485784832, "loss": 7.2184, "step": 106780 }, { "epoch": 12.85078219013237, "grad_norm": 1140.8487548828125, "learning_rate": 0.00019262331462167518, "loss": 7.2565, "step": 106790 }, { "epoch": 12.851985559566787, "grad_norm": 1151.6893310546875, "learning_rate": 0.00019262188053081834, "loss": 7.2355, "step": 106800 }, { "epoch": 12.853188929001202, "grad_norm": 901.2284545898438, "learning_rate": 0.0001926204463059148, "loss": 7.2758, "step": 106810 }, { "epoch": 12.85439229843562, "grad_norm": 1705.5452880859375, "learning_rate": 0.00019261901194696655, "loss": 7.2953, "step": 106820 }, { "epoch": 12.855595667870036, "grad_norm": 1389.90869140625, "learning_rate": 0.00019261757745397574, "loss": 7.2572, "step": 106830 }, { "epoch": 12.856799037304452, "grad_norm": 848.8786010742188, "learning_rate": 0.00019261614282694444, "loss": 7.1925, "step": 106840 }, { "epoch": 12.85800240673887, "grad_norm": 1665.571044921875, "learning_rate": 0.00019261470806587473, "loss": 7.2532, "step": 106850 }, { "epoch": 12.859205776173285, "grad_norm": 1021.503662109375, "learning_rate": 0.00019261327317076867, "loss": 7.3014, "step": 106860 }, { "epoch": 12.8604091456077, "grad_norm": 1767.217529296875, "learning_rate": 0.00019261183814162834, "loss": 7.2869, "step": 106870 }, { "epoch": 12.861612515042118, "grad_norm": 1115.7940673828125, "learning_rate": 0.0001926104029784558, "loss": 7.387, "step": 106880 }, { "epoch": 12.862815884476534, "grad_norm": 815.3505249023438, "learning_rate": 0.00019260896768125314, "loss": 7.2325, "step": 106890 }, { "epoch": 12.86401925391095, "grad_norm": 3455.2578125, "learning_rate": 0.00019260753225002242, "loss": 7.3216, "step": 106900 }, { "epoch": 12.865222623345367, "grad_norm": 1894.0472412109375, "learning_rate": 0.00019260609668476578, "loss": 7.2956, "step": 106910 }, { "epoch": 12.866425992779783, "grad_norm": 2075.967529296875, "learning_rate": 0.00019260466098548523, "loss": 7.3369, "step": 106920 }, { "epoch": 12.867629362214199, "grad_norm": 1722.5640869140625, "learning_rate": 0.0001926032251521829, "loss": 7.2593, "step": 106930 }, { "epoch": 12.868832731648617, "grad_norm": 1012.143310546875, "learning_rate": 0.0001926017891848608, "loss": 7.2522, "step": 106940 }, { "epoch": 12.870036101083032, "grad_norm": 2170.95361328125, "learning_rate": 0.00019260035308352107, "loss": 7.2281, "step": 106950 }, { "epoch": 12.871239470517448, "grad_norm": 1574.2196044921875, "learning_rate": 0.00019259891684816576, "loss": 7.2903, "step": 106960 }, { "epoch": 12.872442839951866, "grad_norm": 4061.8955078125, "learning_rate": 0.00019259748047879697, "loss": 7.395, "step": 106970 }, { "epoch": 12.873646209386282, "grad_norm": 2118.121337890625, "learning_rate": 0.00019259604397541674, "loss": 7.3463, "step": 106980 }, { "epoch": 12.874849578820697, "grad_norm": 1463.2431640625, "learning_rate": 0.00019259460733802718, "loss": 7.4214, "step": 106990 }, { "epoch": 12.876052948255115, "grad_norm": 3004.537353515625, "learning_rate": 0.0001925931705666304, "loss": 7.1198, "step": 107000 }, { "epoch": 12.87725631768953, "grad_norm": 2270.33154296875, "learning_rate": 0.0001925917336612284, "loss": 7.2168, "step": 107010 }, { "epoch": 12.878459687123947, "grad_norm": 1603.310791015625, "learning_rate": 0.00019259029662182332, "loss": 7.3277, "step": 107020 }, { "epoch": 12.879663056558364, "grad_norm": 1873.32275390625, "learning_rate": 0.00019258885944841719, "loss": 7.2713, "step": 107030 }, { "epoch": 12.88086642599278, "grad_norm": 1999.0050048828125, "learning_rate": 0.00019258742214101215, "loss": 7.2877, "step": 107040 }, { "epoch": 12.882069795427196, "grad_norm": 2248.563232421875, "learning_rate": 0.00019258598469961024, "loss": 7.267, "step": 107050 }, { "epoch": 12.883273164861613, "grad_norm": 1957.7547607421875, "learning_rate": 0.00019258454712421354, "loss": 7.2537, "step": 107060 }, { "epoch": 12.884476534296029, "grad_norm": 2966.833740234375, "learning_rate": 0.00019258310941482417, "loss": 7.275, "step": 107070 }, { "epoch": 12.885679903730445, "grad_norm": 2127.40966796875, "learning_rate": 0.00019258167157144417, "loss": 7.1983, "step": 107080 }, { "epoch": 12.886883273164862, "grad_norm": 1940.19677734375, "learning_rate": 0.00019258023359407564, "loss": 7.3223, "step": 107090 }, { "epoch": 12.888086642599278, "grad_norm": 1240.020263671875, "learning_rate": 0.00019257879548272065, "loss": 7.2978, "step": 107100 }, { "epoch": 12.889290012033694, "grad_norm": 1843.514892578125, "learning_rate": 0.0001925773572373813, "loss": 7.2528, "step": 107110 }, { "epoch": 12.890493381468112, "grad_norm": 2873.846435546875, "learning_rate": 0.00019257591885805963, "loss": 7.2442, "step": 107120 }, { "epoch": 12.891696750902527, "grad_norm": 1514.1368408203125, "learning_rate": 0.00019257448034475775, "loss": 7.2172, "step": 107130 }, { "epoch": 12.892900120336943, "grad_norm": 1113.5157470703125, "learning_rate": 0.00019257304169747777, "loss": 7.2373, "step": 107140 }, { "epoch": 12.89410348977136, "grad_norm": 1617.1826171875, "learning_rate": 0.00019257160291622172, "loss": 7.1976, "step": 107150 }, { "epoch": 12.895306859205776, "grad_norm": 1258.438232421875, "learning_rate": 0.0001925701640009917, "loss": 7.1783, "step": 107160 }, { "epoch": 12.896510228640192, "grad_norm": 2233.916015625, "learning_rate": 0.00019256872495178982, "loss": 7.0803, "step": 107170 }, { "epoch": 12.89771359807461, "grad_norm": 1309.814697265625, "learning_rate": 0.00019256728576861812, "loss": 7.2011, "step": 107180 }, { "epoch": 12.898916967509026, "grad_norm": 922.4718627929688, "learning_rate": 0.00019256584645147872, "loss": 7.2048, "step": 107190 }, { "epoch": 12.900120336943441, "grad_norm": 1547.85302734375, "learning_rate": 0.0001925644070003737, "loss": 7.0259, "step": 107200 }, { "epoch": 12.901323706377857, "grad_norm": 1193.3726806640625, "learning_rate": 0.00019256296741530508, "loss": 7.2371, "step": 107210 }, { "epoch": 12.902527075812275, "grad_norm": 1601.9708251953125, "learning_rate": 0.00019256152769627502, "loss": 7.2353, "step": 107220 }, { "epoch": 12.90373044524669, "grad_norm": 2380.31591796875, "learning_rate": 0.00019256008784328558, "loss": 7.1135, "step": 107230 }, { "epoch": 12.904933814681106, "grad_norm": 1205.4659423828125, "learning_rate": 0.00019255864785633887, "loss": 7.105, "step": 107240 }, { "epoch": 12.906137184115524, "grad_norm": 4662.0458984375, "learning_rate": 0.0001925572077354369, "loss": 7.2564, "step": 107250 }, { "epoch": 12.90734055354994, "grad_norm": 2169.8310546875, "learning_rate": 0.0001925557674805818, "loss": 7.2418, "step": 107260 }, { "epoch": 12.908543922984355, "grad_norm": 8630.0146484375, "learning_rate": 0.00019255432709177565, "loss": 7.1595, "step": 107270 }, { "epoch": 12.909747292418773, "grad_norm": 3923.92919921875, "learning_rate": 0.00019255288656902059, "loss": 7.2695, "step": 107280 }, { "epoch": 12.910950661853189, "grad_norm": 2810.24267578125, "learning_rate": 0.0001925514459123186, "loss": 7.2671, "step": 107290 }, { "epoch": 12.912154031287605, "grad_norm": 5679.7978515625, "learning_rate": 0.00019255000512167182, "loss": 7.2389, "step": 107300 }, { "epoch": 12.913357400722022, "grad_norm": 2105.82763671875, "learning_rate": 0.00019254856419708235, "loss": 7.2707, "step": 107310 }, { "epoch": 12.914560770156438, "grad_norm": 2365.353515625, "learning_rate": 0.00019254712313855225, "loss": 7.218, "step": 107320 }, { "epoch": 12.915764139590854, "grad_norm": 1215.5013427734375, "learning_rate": 0.00019254568194608364, "loss": 7.2295, "step": 107330 }, { "epoch": 12.916967509025271, "grad_norm": 999.1481323242188, "learning_rate": 0.00019254424061967854, "loss": 7.2461, "step": 107340 }, { "epoch": 12.918170878459687, "grad_norm": 1257.3634033203125, "learning_rate": 0.0001925427991593391, "loss": 7.3096, "step": 107350 }, { "epoch": 12.919374247894103, "grad_norm": 857.899658203125, "learning_rate": 0.00019254135756506738, "loss": 7.3217, "step": 107360 }, { "epoch": 12.92057761732852, "grad_norm": 3409.077392578125, "learning_rate": 0.00019253991583686545, "loss": 7.3375, "step": 107370 }, { "epoch": 12.921780986762936, "grad_norm": 2719.557861328125, "learning_rate": 0.00019253847397473543, "loss": 7.333, "step": 107380 }, { "epoch": 12.922984356197352, "grad_norm": 3630.5634765625, "learning_rate": 0.0001925370319786794, "loss": 7.2898, "step": 107390 }, { "epoch": 12.92418772563177, "grad_norm": 4198.22705078125, "learning_rate": 0.0001925355898486994, "loss": 7.1538, "step": 107400 }, { "epoch": 12.925391095066185, "grad_norm": 3447.393310546875, "learning_rate": 0.00019253414758479758, "loss": 7.2984, "step": 107410 }, { "epoch": 12.926594464500601, "grad_norm": 2648.252685546875, "learning_rate": 0.000192532705186976, "loss": 7.2454, "step": 107420 }, { "epoch": 12.927797833935019, "grad_norm": 5186.08154296875, "learning_rate": 0.00019253126265523677, "loss": 7.3981, "step": 107430 }, { "epoch": 12.929001203369435, "grad_norm": 11985.9013671875, "learning_rate": 0.00019252981998958192, "loss": 7.4484, "step": 107440 }, { "epoch": 12.93020457280385, "grad_norm": 11013.3349609375, "learning_rate": 0.00019252837719001362, "loss": 7.3707, "step": 107450 }, { "epoch": 12.931407942238268, "grad_norm": 5295.51708984375, "learning_rate": 0.0001925269342565339, "loss": 7.3675, "step": 107460 }, { "epoch": 12.932611311672684, "grad_norm": 1148.791259765625, "learning_rate": 0.00019252549118914484, "loss": 7.3161, "step": 107470 }, { "epoch": 12.9338146811071, "grad_norm": 4863.2080078125, "learning_rate": 0.00019252404798784855, "loss": 7.2384, "step": 107480 }, { "epoch": 12.935018050541515, "grad_norm": 1197.8677978515625, "learning_rate": 0.00019252260465264716, "loss": 7.2969, "step": 107490 }, { "epoch": 12.936221419975933, "grad_norm": 3278.40185546875, "learning_rate": 0.00019252116118354266, "loss": 7.2178, "step": 107500 }, { "epoch": 12.937424789410349, "grad_norm": 1501.00390625, "learning_rate": 0.00019251971758053724, "loss": 7.232, "step": 107510 }, { "epoch": 12.938628158844764, "grad_norm": 2276.19580078125, "learning_rate": 0.00019251827384363294, "loss": 7.2032, "step": 107520 }, { "epoch": 12.939831528279182, "grad_norm": 1260.967041015625, "learning_rate": 0.00019251682997283186, "loss": 7.1716, "step": 107530 }, { "epoch": 12.941034897713598, "grad_norm": 2488.078125, "learning_rate": 0.00019251538596813604, "loss": 7.3369, "step": 107540 }, { "epoch": 12.942238267148014, "grad_norm": 1627.5501708984375, "learning_rate": 0.00019251394182954768, "loss": 7.1778, "step": 107550 }, { "epoch": 12.943441636582431, "grad_norm": 1441.1497802734375, "learning_rate": 0.00019251249755706878, "loss": 7.2897, "step": 107560 }, { "epoch": 12.944645006016847, "grad_norm": 2034.059326171875, "learning_rate": 0.00019251105315070142, "loss": 7.3516, "step": 107570 }, { "epoch": 12.945848375451263, "grad_norm": 2380.8642578125, "learning_rate": 0.0001925096086104478, "loss": 7.2541, "step": 107580 }, { "epoch": 12.94705174488568, "grad_norm": 1377.181884765625, "learning_rate": 0.00019250816393630985, "loss": 7.2594, "step": 107590 }, { "epoch": 12.948255114320096, "grad_norm": 2636.31005859375, "learning_rate": 0.0001925067191282898, "loss": 7.2899, "step": 107600 }, { "epoch": 12.949458483754512, "grad_norm": 904.372314453125, "learning_rate": 0.00019250527418638967, "loss": 7.2828, "step": 107610 }, { "epoch": 12.95066185318893, "grad_norm": 2921.962646484375, "learning_rate": 0.00019250382911061158, "loss": 7.3664, "step": 107620 }, { "epoch": 12.951865222623345, "grad_norm": 6842.02099609375, "learning_rate": 0.0001925023839009576, "loss": 7.3008, "step": 107630 }, { "epoch": 12.953068592057761, "grad_norm": 2014.3673095703125, "learning_rate": 0.00019250093855742984, "loss": 7.3621, "step": 107640 }, { "epoch": 12.954271961492179, "grad_norm": 1539.3853759765625, "learning_rate": 0.0001924994930800304, "loss": 7.2573, "step": 107650 }, { "epoch": 12.955475330926594, "grad_norm": 1351.1629638671875, "learning_rate": 0.00019249804746876136, "loss": 7.3223, "step": 107660 }, { "epoch": 12.95667870036101, "grad_norm": 2710.90478515625, "learning_rate": 0.00019249660172362477, "loss": 7.3821, "step": 107670 }, { "epoch": 12.957882069795428, "grad_norm": 1781.0064697265625, "learning_rate": 0.00019249515584462276, "loss": 7.285, "step": 107680 }, { "epoch": 12.959085439229844, "grad_norm": 1480.45654296875, "learning_rate": 0.00019249370983175746, "loss": 7.3548, "step": 107690 }, { "epoch": 12.96028880866426, "grad_norm": 3168.09130859375, "learning_rate": 0.0001924922636850309, "loss": 7.2683, "step": 107700 }, { "epoch": 12.961492178098677, "grad_norm": 1672.503173828125, "learning_rate": 0.0001924908174044452, "loss": 7.2961, "step": 107710 }, { "epoch": 12.962695547533093, "grad_norm": 3248.193115234375, "learning_rate": 0.00019248937099000245, "loss": 7.4289, "step": 107720 }, { "epoch": 12.963898916967509, "grad_norm": 2357.509521484375, "learning_rate": 0.00019248792444170477, "loss": 7.3711, "step": 107730 }, { "epoch": 12.965102286401926, "grad_norm": 4426.1787109375, "learning_rate": 0.0001924864777595542, "loss": 7.4848, "step": 107740 }, { "epoch": 12.966305655836342, "grad_norm": 3416.599365234375, "learning_rate": 0.0001924850309435529, "loss": 7.3628, "step": 107750 }, { "epoch": 12.967509025270758, "grad_norm": 2189.18115234375, "learning_rate": 0.0001924835839937029, "loss": 7.3817, "step": 107760 }, { "epoch": 12.968712394705175, "grad_norm": 3998.75048828125, "learning_rate": 0.0001924821369100063, "loss": 7.4406, "step": 107770 }, { "epoch": 12.969915764139591, "grad_norm": 4420.34423828125, "learning_rate": 0.00019248068969246528, "loss": 7.4438, "step": 107780 }, { "epoch": 12.971119133574007, "grad_norm": 15506.3779296875, "learning_rate": 0.0001924792423410818, "loss": 7.434, "step": 107790 }, { "epoch": 12.972322503008424, "grad_norm": 14481.087890625, "learning_rate": 0.00019247779485585806, "loss": 7.4374, "step": 107800 }, { "epoch": 12.97352587244284, "grad_norm": 18825.759765625, "learning_rate": 0.0001924763472367961, "loss": 7.6036, "step": 107810 }, { "epoch": 12.974729241877256, "grad_norm": 23447.19140625, "learning_rate": 0.00019247489948389805, "loss": 7.6959, "step": 107820 }, { "epoch": 12.975932611311674, "grad_norm": 8680.255859375, "learning_rate": 0.000192473451597166, "loss": 7.7892, "step": 107830 }, { "epoch": 12.97713598074609, "grad_norm": 8544.8154296875, "learning_rate": 0.000192472003576602, "loss": 7.6692, "step": 107840 }, { "epoch": 12.978339350180505, "grad_norm": 10029.8583984375, "learning_rate": 0.0001924705554222082, "loss": 7.4473, "step": 107850 }, { "epoch": 12.979542719614923, "grad_norm": 5717.8291015625, "learning_rate": 0.00019246910713398668, "loss": 7.4975, "step": 107860 }, { "epoch": 12.980746089049338, "grad_norm": 59259.328125, "learning_rate": 0.00019246765871193953, "loss": 7.5435, "step": 107870 }, { "epoch": 12.981949458483754, "grad_norm": 11020.2177734375, "learning_rate": 0.00019246621015606884, "loss": 7.362, "step": 107880 }, { "epoch": 12.98315282791817, "grad_norm": 8665.4013671875, "learning_rate": 0.00019246476146637673, "loss": 7.4455, "step": 107890 }, { "epoch": 12.984356197352588, "grad_norm": 26855.755859375, "learning_rate": 0.00019246331264286527, "loss": 7.4505, "step": 107900 }, { "epoch": 12.985559566787003, "grad_norm": 28965.345703125, "learning_rate": 0.00019246186368553655, "loss": 7.3503, "step": 107910 }, { "epoch": 12.98676293622142, "grad_norm": 15996.9560546875, "learning_rate": 0.00019246041459439274, "loss": 7.4335, "step": 107920 }, { "epoch": 12.987966305655837, "grad_norm": 31387.595703125, "learning_rate": 0.00019245896536943582, "loss": 7.3639, "step": 107930 }, { "epoch": 12.989169675090253, "grad_norm": 11455.1064453125, "learning_rate": 0.00019245751601066797, "loss": 7.5035, "step": 107940 }, { "epoch": 12.990373044524668, "grad_norm": 20698.126953125, "learning_rate": 0.00019245606651809128, "loss": 7.6349, "step": 107950 }, { "epoch": 12.991576413959086, "grad_norm": 4148.4462890625, "learning_rate": 0.0001924546168917078, "loss": 7.5947, "step": 107960 }, { "epoch": 12.992779783393502, "grad_norm": 5056.8125, "learning_rate": 0.00019245316713151973, "loss": 7.6983, "step": 107970 }, { "epoch": 12.993983152827917, "grad_norm": 3186.49609375, "learning_rate": 0.00019245171723752903, "loss": 7.6045, "step": 107980 }, { "epoch": 12.995186522262335, "grad_norm": 1980.4691162109375, "learning_rate": 0.0001924502672097379, "loss": 7.5403, "step": 107990 }, { "epoch": 12.99638989169675, "grad_norm": 4110.58984375, "learning_rate": 0.00019244881704814844, "loss": 7.5637, "step": 108000 }, { "epoch": 12.997593261131167, "grad_norm": 21176.83203125, "learning_rate": 0.0001924473667527627, "loss": 7.5598, "step": 108010 }, { "epoch": 12.998796630565584, "grad_norm": 39221.96875, "learning_rate": 0.00019244591632358276, "loss": 7.5556, "step": 108020 }, { "epoch": 13.0, "grad_norm": 8619.4814453125, "learning_rate": 0.00019244446576061077, "loss": 7.5775, "step": 108030 }, { "epoch": 13.0, "eval_loss": 7.555514335632324, "eval_runtime": 120.208, "eval_samples_per_second": 61.452, "eval_steps_per_second": 7.687, "step": 108030 }, { "epoch": 13.001203369434416, "grad_norm": 15074.41015625, "learning_rate": 0.00019244301506384884, "loss": 7.5985, "step": 108040 }, { "epoch": 13.002406738868833, "grad_norm": 12033.8212890625, "learning_rate": 0.000192441564233299, "loss": 7.4701, "step": 108050 }, { "epoch": 13.00361010830325, "grad_norm": 15881.751953125, "learning_rate": 0.00019244011326896343, "loss": 7.5534, "step": 108060 }, { "epoch": 13.004813477737665, "grad_norm": 25642.037109375, "learning_rate": 0.0001924386621708442, "loss": 7.6858, "step": 108070 }, { "epoch": 13.006016847172083, "grad_norm": 5778.74951171875, "learning_rate": 0.00019243721093894337, "loss": 7.5327, "step": 108080 }, { "epoch": 13.007220216606498, "grad_norm": 12361.5380859375, "learning_rate": 0.0001924357595732631, "loss": 7.5847, "step": 108090 }, { "epoch": 13.008423586040914, "grad_norm": 7474.11962890625, "learning_rate": 0.00019243430807380546, "loss": 7.5545, "step": 108100 }, { "epoch": 13.009626955475332, "grad_norm": 35978.73046875, "learning_rate": 0.00019243285644057254, "loss": 7.5959, "step": 108110 }, { "epoch": 13.010830324909747, "grad_norm": 4408.5224609375, "learning_rate": 0.00019243140467356645, "loss": 7.5043, "step": 108120 }, { "epoch": 13.012033694344163, "grad_norm": 10178.572265625, "learning_rate": 0.00019242995277278933, "loss": 7.5143, "step": 108130 }, { "epoch": 13.01323706377858, "grad_norm": 12298.19921875, "learning_rate": 0.00019242850073824324, "loss": 7.4687, "step": 108140 }, { "epoch": 13.014440433212997, "grad_norm": 4582.2744140625, "learning_rate": 0.00019242704856993026, "loss": 7.5413, "step": 108150 }, { "epoch": 13.015643802647412, "grad_norm": 21745.123046875, "learning_rate": 0.00019242559626785256, "loss": 7.4886, "step": 108160 }, { "epoch": 13.01684717208183, "grad_norm": 13244.1435546875, "learning_rate": 0.00019242414383201215, "loss": 7.597, "step": 108170 }, { "epoch": 13.018050541516246, "grad_norm": 3957.949462890625, "learning_rate": 0.00019242269126241123, "loss": 7.6059, "step": 108180 }, { "epoch": 13.019253910950662, "grad_norm": 3317.078369140625, "learning_rate": 0.00019242123855905185, "loss": 7.4801, "step": 108190 }, { "epoch": 13.020457280385079, "grad_norm": 2736.24072265625, "learning_rate": 0.00019241978572193612, "loss": 7.5018, "step": 108200 }, { "epoch": 13.021660649819495, "grad_norm": 2209.564208984375, "learning_rate": 0.00019241833275106612, "loss": 7.5323, "step": 108210 }, { "epoch": 13.02286401925391, "grad_norm": 3912.98974609375, "learning_rate": 0.000192416879646444, "loss": 7.4479, "step": 108220 }, { "epoch": 13.024067388688326, "grad_norm": 9202.1728515625, "learning_rate": 0.00019241542640807184, "loss": 7.5079, "step": 108230 }, { "epoch": 13.025270758122744, "grad_norm": 4686.619140625, "learning_rate": 0.00019241397303595173, "loss": 7.547, "step": 108240 }, { "epoch": 13.02647412755716, "grad_norm": 7604.71435546875, "learning_rate": 0.0001924125195300858, "loss": 7.5015, "step": 108250 }, { "epoch": 13.027677496991576, "grad_norm": 10323.5732421875, "learning_rate": 0.00019241106589047612, "loss": 7.4948, "step": 108260 }, { "epoch": 13.028880866425993, "grad_norm": 5556.9228515625, "learning_rate": 0.0001924096121171248, "loss": 7.6201, "step": 108270 }, { "epoch": 13.030084235860409, "grad_norm": 1971.686767578125, "learning_rate": 0.00019240815821003397, "loss": 7.6097, "step": 108280 }, { "epoch": 13.031287605294825, "grad_norm": 4989.94921875, "learning_rate": 0.00019240670416920575, "loss": 7.4809, "step": 108290 }, { "epoch": 13.032490974729242, "grad_norm": 6852.31494140625, "learning_rate": 0.0001924052499946422, "loss": 7.6369, "step": 108300 }, { "epoch": 13.033694344163658, "grad_norm": 21070.107421875, "learning_rate": 0.0001924037956863454, "loss": 7.4799, "step": 108310 }, { "epoch": 13.034897713598074, "grad_norm": 4888.40478515625, "learning_rate": 0.00019240234124431752, "loss": 7.4902, "step": 108320 }, { "epoch": 13.036101083032491, "grad_norm": 5162.17578125, "learning_rate": 0.00019240088666856066, "loss": 7.5887, "step": 108330 }, { "epoch": 13.037304452466907, "grad_norm": 2335.8583984375, "learning_rate": 0.00019239943195907685, "loss": 7.4685, "step": 108340 }, { "epoch": 13.038507821901323, "grad_norm": 2831.58935546875, "learning_rate": 0.0001923979771158683, "loss": 7.4267, "step": 108350 }, { "epoch": 13.03971119133574, "grad_norm": 4768.30517578125, "learning_rate": 0.00019239652213893705, "loss": 7.4714, "step": 108360 }, { "epoch": 13.040914560770156, "grad_norm": 2200.756103515625, "learning_rate": 0.00019239506702828523, "loss": 7.451, "step": 108370 }, { "epoch": 13.042117930204572, "grad_norm": 4867.78369140625, "learning_rate": 0.0001923936117839149, "loss": 7.4994, "step": 108380 }, { "epoch": 13.04332129963899, "grad_norm": 7237.81396484375, "learning_rate": 0.00019239215640582823, "loss": 7.4377, "step": 108390 }, { "epoch": 13.044524669073406, "grad_norm": 2619.4345703125, "learning_rate": 0.00019239070089402733, "loss": 7.521, "step": 108400 }, { "epoch": 13.045728038507821, "grad_norm": 7368.5830078125, "learning_rate": 0.00019238924524851424, "loss": 7.537, "step": 108410 }, { "epoch": 13.046931407942239, "grad_norm": 4348.572265625, "learning_rate": 0.0001923877894692911, "loss": 7.4566, "step": 108420 }, { "epoch": 13.048134777376655, "grad_norm": 5014.33154296875, "learning_rate": 0.00019238633355636, "loss": 7.5919, "step": 108430 }, { "epoch": 13.04933814681107, "grad_norm": 4388.39697265625, "learning_rate": 0.0001923848775097231, "loss": 7.5423, "step": 108440 }, { "epoch": 13.050541516245488, "grad_norm": 8234.9833984375, "learning_rate": 0.00019238342132938248, "loss": 7.6377, "step": 108450 }, { "epoch": 13.051744885679904, "grad_norm": 14437.505859375, "learning_rate": 0.0001923819650153402, "loss": 7.5116, "step": 108460 }, { "epoch": 13.05294825511432, "grad_norm": 10338.55078125, "learning_rate": 0.00019238050856759843, "loss": 7.5523, "step": 108470 }, { "epoch": 13.054151624548737, "grad_norm": 29504.134765625, "learning_rate": 0.00019237905198615926, "loss": 7.5164, "step": 108480 }, { "epoch": 13.055354993983153, "grad_norm": 10035.2490234375, "learning_rate": 0.0001923775952710248, "loss": 7.5909, "step": 108490 }, { "epoch": 13.056558363417569, "grad_norm": 6084.50146484375, "learning_rate": 0.00019237613842219714, "loss": 7.5066, "step": 108500 }, { "epoch": 13.057761732851986, "grad_norm": 8544.9130859375, "learning_rate": 0.0001923746814396784, "loss": 7.3935, "step": 108510 }, { "epoch": 13.058965102286402, "grad_norm": 5593.34765625, "learning_rate": 0.00019237322432347068, "loss": 7.4424, "step": 108520 }, { "epoch": 13.060168471720818, "grad_norm": 12463.994140625, "learning_rate": 0.00019237176707357612, "loss": 7.5306, "step": 108530 }, { "epoch": 13.061371841155236, "grad_norm": 7964.5947265625, "learning_rate": 0.00019237030968999682, "loss": 7.4792, "step": 108540 }, { "epoch": 13.062575210589651, "grad_norm": 8108.3310546875, "learning_rate": 0.00019236885217273483, "loss": 7.4017, "step": 108550 }, { "epoch": 13.063778580024067, "grad_norm": 11556.1376953125, "learning_rate": 0.00019236739452179236, "loss": 7.443, "step": 108560 }, { "epoch": 13.064981949458483, "grad_norm": 10719.9599609375, "learning_rate": 0.00019236593673717142, "loss": 7.4535, "step": 108570 }, { "epoch": 13.0661853188929, "grad_norm": 8270.806640625, "learning_rate": 0.0001923644788188742, "loss": 7.4508, "step": 108580 }, { "epoch": 13.067388688327316, "grad_norm": 9861.3232421875, "learning_rate": 0.00019236302076690277, "loss": 7.487, "step": 108590 }, { "epoch": 13.068592057761732, "grad_norm": 16110.0537109375, "learning_rate": 0.0001923615625812592, "loss": 7.4765, "step": 108600 }, { "epoch": 13.06979542719615, "grad_norm": 25546.724609375, "learning_rate": 0.00019236010426194568, "loss": 7.3592, "step": 108610 }, { "epoch": 13.070998796630565, "grad_norm": 13784.078125, "learning_rate": 0.0001923586458089643, "loss": 7.4452, "step": 108620 }, { "epoch": 13.072202166064981, "grad_norm": 5039.65673828125, "learning_rate": 0.00019235718722231712, "loss": 7.5369, "step": 108630 }, { "epoch": 13.073405535499399, "grad_norm": 17726.580078125, "learning_rate": 0.00019235572850200632, "loss": 7.5048, "step": 108640 }, { "epoch": 13.074608904933815, "grad_norm": 24905.537109375, "learning_rate": 0.00019235426964803396, "loss": 7.4813, "step": 108650 }, { "epoch": 13.07581227436823, "grad_norm": 20696.16015625, "learning_rate": 0.0001923528106604022, "loss": 7.4328, "step": 108660 }, { "epoch": 13.077015643802648, "grad_norm": 14115.568359375, "learning_rate": 0.0001923513515391131, "loss": 7.4074, "step": 108670 }, { "epoch": 13.078219013237064, "grad_norm": 37487.0390625, "learning_rate": 0.00019234989228416878, "loss": 7.4354, "step": 108680 }, { "epoch": 13.07942238267148, "grad_norm": 7489.62646484375, "learning_rate": 0.00019234843289557137, "loss": 7.3867, "step": 108690 }, { "epoch": 13.080625752105897, "grad_norm": 4249.4619140625, "learning_rate": 0.000192346973373323, "loss": 7.4146, "step": 108700 }, { "epoch": 13.081829121540313, "grad_norm": 10376.52734375, "learning_rate": 0.00019234551371742574, "loss": 7.4164, "step": 108710 }, { "epoch": 13.083032490974729, "grad_norm": 22127.861328125, "learning_rate": 0.00019234405392788172, "loss": 7.3075, "step": 108720 }, { "epoch": 13.084235860409146, "grad_norm": 2832.487060546875, "learning_rate": 0.00019234259400469306, "loss": 7.3684, "step": 108730 }, { "epoch": 13.085439229843562, "grad_norm": 7236.1982421875, "learning_rate": 0.0001923411339478619, "loss": 7.3607, "step": 108740 }, { "epoch": 13.086642599277978, "grad_norm": 1548.1239013671875, "learning_rate": 0.00019233967375739027, "loss": 7.2633, "step": 108750 }, { "epoch": 13.087845968712395, "grad_norm": 1546.273193359375, "learning_rate": 0.00019233821343328037, "loss": 7.4365, "step": 108760 }, { "epoch": 13.089049338146811, "grad_norm": 2337.670654296875, "learning_rate": 0.00019233675297553426, "loss": 7.4905, "step": 108770 }, { "epoch": 13.090252707581227, "grad_norm": 1819.724609375, "learning_rate": 0.00019233529238415407, "loss": 7.5188, "step": 108780 }, { "epoch": 13.091456077015645, "grad_norm": 1907.8560791015625, "learning_rate": 0.0001923338316591419, "loss": 7.3461, "step": 108790 }, { "epoch": 13.09265944645006, "grad_norm": 1162.979736328125, "learning_rate": 0.0001923323708004999, "loss": 7.38, "step": 108800 }, { "epoch": 13.093862815884476, "grad_norm": 1256.3233642578125, "learning_rate": 0.00019233090980823016, "loss": 7.4264, "step": 108810 }, { "epoch": 13.095066185318894, "grad_norm": 1566.36962890625, "learning_rate": 0.0001923294486823348, "loss": 7.4459, "step": 108820 }, { "epoch": 13.09626955475331, "grad_norm": 1580.012451171875, "learning_rate": 0.00019232798742281596, "loss": 7.4325, "step": 108830 }, { "epoch": 13.097472924187725, "grad_norm": 1637.6971435546875, "learning_rate": 0.0001923265260296757, "loss": 7.618, "step": 108840 }, { "epoch": 13.098676293622143, "grad_norm": 1655.1015625, "learning_rate": 0.00019232506450291616, "loss": 7.5967, "step": 108850 }, { "epoch": 13.099879663056559, "grad_norm": 1030.7801513671875, "learning_rate": 0.00019232360284253942, "loss": 7.5514, "step": 108860 }, { "epoch": 13.101083032490974, "grad_norm": 2341.02392578125, "learning_rate": 0.0001923221410485477, "loss": 7.4855, "step": 108870 }, { "epoch": 13.102286401925392, "grad_norm": 2831.236572265625, "learning_rate": 0.000192320679120943, "loss": 7.5784, "step": 108880 }, { "epoch": 13.103489771359808, "grad_norm": 1810.7940673828125, "learning_rate": 0.00019231921705972752, "loss": 7.5255, "step": 108890 }, { "epoch": 13.104693140794224, "grad_norm": 1143.26904296875, "learning_rate": 0.00019231775486490334, "loss": 7.5552, "step": 108900 }, { "epoch": 13.10589651022864, "grad_norm": 2173.7587890625, "learning_rate": 0.00019231629253647255, "loss": 7.59, "step": 108910 }, { "epoch": 13.107099879663057, "grad_norm": 3471.625244140625, "learning_rate": 0.0001923148300744373, "loss": 7.5861, "step": 108920 }, { "epoch": 13.108303249097473, "grad_norm": 1325.9033203125, "learning_rate": 0.00019231336747879972, "loss": 7.5344, "step": 108930 }, { "epoch": 13.109506618531888, "grad_norm": 1272.190673828125, "learning_rate": 0.00019231190474956186, "loss": 7.5464, "step": 108940 }, { "epoch": 13.110709987966306, "grad_norm": 1664.0906982421875, "learning_rate": 0.00019231044188672592, "loss": 7.5406, "step": 108950 }, { "epoch": 13.111913357400722, "grad_norm": 2486.56689453125, "learning_rate": 0.00019230897889029397, "loss": 7.642, "step": 108960 }, { "epoch": 13.113116726835138, "grad_norm": 854.6378784179688, "learning_rate": 0.00019230751576026817, "loss": 7.5285, "step": 108970 }, { "epoch": 13.114320096269555, "grad_norm": 1075.2103271484375, "learning_rate": 0.00019230605249665055, "loss": 7.6098, "step": 108980 }, { "epoch": 13.115523465703971, "grad_norm": 1015.71630859375, "learning_rate": 0.00019230458909944332, "loss": 7.4277, "step": 108990 }, { "epoch": 13.116726835138387, "grad_norm": 1571.4444580078125, "learning_rate": 0.00019230312556864853, "loss": 7.473, "step": 109000 }, { "epoch": 13.117930204572804, "grad_norm": 1367.8033447265625, "learning_rate": 0.00019230166190426835, "loss": 7.5912, "step": 109010 }, { "epoch": 13.11913357400722, "grad_norm": 520.5321655273438, "learning_rate": 0.00019230019810630484, "loss": 7.5585, "step": 109020 }, { "epoch": 13.120336943441636, "grad_norm": 888.4727172851562, "learning_rate": 0.0001922987341747602, "loss": 7.5432, "step": 109030 }, { "epoch": 13.121540312876053, "grad_norm": 1325.9951171875, "learning_rate": 0.00019229727010963647, "loss": 7.583, "step": 109040 }, { "epoch": 13.12274368231047, "grad_norm": 1547.339111328125, "learning_rate": 0.00019229580591093585, "loss": 7.5742, "step": 109050 }, { "epoch": 13.123947051744885, "grad_norm": 3020.97998046875, "learning_rate": 0.00019229434157866038, "loss": 7.5616, "step": 109060 }, { "epoch": 13.125150421179303, "grad_norm": 1091.6090087890625, "learning_rate": 0.0001922928771128122, "loss": 7.5141, "step": 109070 }, { "epoch": 13.126353790613718, "grad_norm": 1790.71728515625, "learning_rate": 0.00019229141251339346, "loss": 7.4928, "step": 109080 }, { "epoch": 13.127557160048134, "grad_norm": 1158.9244384765625, "learning_rate": 0.00019228994778040624, "loss": 7.4885, "step": 109090 }, { "epoch": 13.128760529482552, "grad_norm": 1233.3555908203125, "learning_rate": 0.00019228848291385272, "loss": 7.465, "step": 109100 }, { "epoch": 13.129963898916968, "grad_norm": 1639.1177978515625, "learning_rate": 0.00019228701791373495, "loss": 7.4926, "step": 109110 }, { "epoch": 13.131167268351383, "grad_norm": 808.8528442382812, "learning_rate": 0.0001922855527800551, "loss": 7.584, "step": 109120 }, { "epoch": 13.132370637785801, "grad_norm": 2141.6220703125, "learning_rate": 0.00019228408751281525, "loss": 7.4634, "step": 109130 }, { "epoch": 13.133574007220217, "grad_norm": 1800.8280029296875, "learning_rate": 0.00019228262211201753, "loss": 7.4745, "step": 109140 }, { "epoch": 13.134777376654633, "grad_norm": 1815.041748046875, "learning_rate": 0.0001922811565776641, "loss": 7.5555, "step": 109150 }, { "epoch": 13.13598074608905, "grad_norm": 5168.802734375, "learning_rate": 0.00019227969090975704, "loss": 7.4652, "step": 109160 }, { "epoch": 13.137184115523466, "grad_norm": 4394.5966796875, "learning_rate": 0.0001922782251082985, "loss": 7.3334, "step": 109170 }, { "epoch": 13.138387484957882, "grad_norm": 1770.221923828125, "learning_rate": 0.00019227675917329054, "loss": 7.4452, "step": 109180 }, { "epoch": 13.1395908543923, "grad_norm": 2380.8955078125, "learning_rate": 0.00019227529310473539, "loss": 7.3853, "step": 109190 }, { "epoch": 13.140794223826715, "grad_norm": 5343.65087890625, "learning_rate": 0.00019227382690263504, "loss": 7.4249, "step": 109200 }, { "epoch": 13.14199759326113, "grad_norm": 8766.2265625, "learning_rate": 0.00019227236056699174, "loss": 7.3178, "step": 109210 }, { "epoch": 13.143200962695548, "grad_norm": 18961.572265625, "learning_rate": 0.0001922708940978075, "loss": 7.2531, "step": 109220 }, { "epoch": 13.144404332129964, "grad_norm": 6364.12939453125, "learning_rate": 0.00019226942749508456, "loss": 7.3507, "step": 109230 }, { "epoch": 13.14560770156438, "grad_norm": 10984.939453125, "learning_rate": 0.00019226796075882494, "loss": 7.2775, "step": 109240 }, { "epoch": 13.146811070998796, "grad_norm": 9057.078125, "learning_rate": 0.0001922664938890308, "loss": 7.3545, "step": 109250 }, { "epoch": 13.148014440433213, "grad_norm": 16218.24609375, "learning_rate": 0.00019226502688570428, "loss": 7.3798, "step": 109260 }, { "epoch": 13.14921780986763, "grad_norm": 8272.513671875, "learning_rate": 0.00019226355974884748, "loss": 7.4395, "step": 109270 }, { "epoch": 13.150421179302045, "grad_norm": 5024.8720703125, "learning_rate": 0.0001922620924784625, "loss": 7.3874, "step": 109280 }, { "epoch": 13.151624548736462, "grad_norm": 4208.353515625, "learning_rate": 0.0001922606250745515, "loss": 7.3606, "step": 109290 }, { "epoch": 13.152827918170878, "grad_norm": 5596.2080078125, "learning_rate": 0.00019225915753711664, "loss": 7.3807, "step": 109300 }, { "epoch": 13.154031287605294, "grad_norm": 3162.18603515625, "learning_rate": 0.00019225768986615998, "loss": 7.3208, "step": 109310 }, { "epoch": 13.155234657039712, "grad_norm": 2683.37890625, "learning_rate": 0.00019225622206168365, "loss": 7.3207, "step": 109320 }, { "epoch": 13.156438026474127, "grad_norm": 3708.72216796875, "learning_rate": 0.00019225475412368984, "loss": 7.1981, "step": 109330 }, { "epoch": 13.157641395908543, "grad_norm": 7212.5322265625, "learning_rate": 0.00019225328605218056, "loss": 7.255, "step": 109340 }, { "epoch": 13.15884476534296, "grad_norm": 1769.2431640625, "learning_rate": 0.00019225181784715802, "loss": 7.4158, "step": 109350 }, { "epoch": 13.160048134777377, "grad_norm": 1829.830810546875, "learning_rate": 0.00019225034950862434, "loss": 7.2682, "step": 109360 }, { "epoch": 13.161251504211792, "grad_norm": 1371.1326904296875, "learning_rate": 0.0001922488810365816, "loss": 7.3645, "step": 109370 }, { "epoch": 13.16245487364621, "grad_norm": 3305.714111328125, "learning_rate": 0.00019224741243103197, "loss": 7.3996, "step": 109380 }, { "epoch": 13.163658243080626, "grad_norm": 1587.72119140625, "learning_rate": 0.00019224594369197755, "loss": 7.328, "step": 109390 }, { "epoch": 13.164861612515042, "grad_norm": 2476.06298828125, "learning_rate": 0.0001922444748194205, "loss": 7.3211, "step": 109400 }, { "epoch": 13.166064981949459, "grad_norm": 3601.108642578125, "learning_rate": 0.0001922430058133629, "loss": 7.27, "step": 109410 }, { "epoch": 13.167268351383875, "grad_norm": 1661.8583984375, "learning_rate": 0.00019224153667380688, "loss": 7.322, "step": 109420 }, { "epoch": 13.16847172081829, "grad_norm": 2693.05419921875, "learning_rate": 0.00019224006740075462, "loss": 7.4785, "step": 109430 }, { "epoch": 13.169675090252708, "grad_norm": 1941.9661865234375, "learning_rate": 0.00019223859799420817, "loss": 7.3159, "step": 109440 }, { "epoch": 13.170878459687124, "grad_norm": 1575.8074951171875, "learning_rate": 0.00019223712845416974, "loss": 7.2925, "step": 109450 }, { "epoch": 13.17208182912154, "grad_norm": 7722.078125, "learning_rate": 0.00019223565878064139, "loss": 7.2437, "step": 109460 }, { "epoch": 13.173285198555957, "grad_norm": 2231.897705078125, "learning_rate": 0.00019223418897362527, "loss": 7.3116, "step": 109470 }, { "epoch": 13.174488567990373, "grad_norm": 5219.56787109375, "learning_rate": 0.00019223271903312349, "loss": 7.3412, "step": 109480 }, { "epoch": 13.175691937424789, "grad_norm": 2755.5380859375, "learning_rate": 0.0001922312489591382, "loss": 7.27, "step": 109490 }, { "epoch": 13.176895306859207, "grad_norm": 18974.353515625, "learning_rate": 0.00019222977875167155, "loss": 7.2698, "step": 109500 }, { "epoch": 13.178098676293622, "grad_norm": 2879.1416015625, "learning_rate": 0.0001922283084107256, "loss": 7.3253, "step": 109510 }, { "epoch": 13.179302045728038, "grad_norm": 1610.7001953125, "learning_rate": 0.00019222683793630255, "loss": 7.3353, "step": 109520 }, { "epoch": 13.180505415162456, "grad_norm": 2331.283935546875, "learning_rate": 0.0001922253673284045, "loss": 7.2884, "step": 109530 }, { "epoch": 13.181708784596871, "grad_norm": 1315.8941650390625, "learning_rate": 0.00019222389658703356, "loss": 7.4174, "step": 109540 }, { "epoch": 13.182912154031287, "grad_norm": 1656.6331787109375, "learning_rate": 0.00019222242571219185, "loss": 7.2555, "step": 109550 }, { "epoch": 13.184115523465705, "grad_norm": 1284.9022216796875, "learning_rate": 0.00019222095470388152, "loss": 7.3754, "step": 109560 }, { "epoch": 13.18531889290012, "grad_norm": 5103.41259765625, "learning_rate": 0.00019221948356210473, "loss": 7.294, "step": 109570 }, { "epoch": 13.186522262334536, "grad_norm": 10232.8310546875, "learning_rate": 0.00019221801228686358, "loss": 7.4663, "step": 109580 }, { "epoch": 13.187725631768952, "grad_norm": 13162.70703125, "learning_rate": 0.00019221654087816018, "loss": 7.3562, "step": 109590 }, { "epoch": 13.18892900120337, "grad_norm": 5513.1787109375, "learning_rate": 0.00019221506933599672, "loss": 7.4971, "step": 109600 }, { "epoch": 13.190132370637786, "grad_norm": 5229.48681640625, "learning_rate": 0.00019221359766037523, "loss": 7.5271, "step": 109610 }, { "epoch": 13.191335740072201, "grad_norm": 7014.1669921875, "learning_rate": 0.00019221212585129792, "loss": 7.5002, "step": 109620 }, { "epoch": 13.192539109506619, "grad_norm": 2516.60205078125, "learning_rate": 0.0001922106539087669, "loss": 7.5269, "step": 109630 }, { "epoch": 13.193742478941035, "grad_norm": 3174.449462890625, "learning_rate": 0.00019220918183278428, "loss": 7.3531, "step": 109640 }, { "epoch": 13.19494584837545, "grad_norm": 5468.00048828125, "learning_rate": 0.00019220770962335225, "loss": 7.3733, "step": 109650 }, { "epoch": 13.196149217809868, "grad_norm": 10192.5185546875, "learning_rate": 0.00019220623728047285, "loss": 7.4507, "step": 109660 }, { "epoch": 13.197352587244284, "grad_norm": 5235.59423828125, "learning_rate": 0.00019220476480414828, "loss": 7.4021, "step": 109670 }, { "epoch": 13.1985559566787, "grad_norm": 25926.591796875, "learning_rate": 0.00019220329219438067, "loss": 7.6072, "step": 109680 }, { "epoch": 13.199759326113117, "grad_norm": 6222.349609375, "learning_rate": 0.0001922018194511721, "loss": 7.466, "step": 109690 }, { "epoch": 13.200962695547533, "grad_norm": 5446.16162109375, "learning_rate": 0.00019220034657452476, "loss": 7.3945, "step": 109700 }, { "epoch": 13.202166064981949, "grad_norm": 5760.71044921875, "learning_rate": 0.00019219887356444076, "loss": 7.3501, "step": 109710 }, { "epoch": 13.203369434416366, "grad_norm": 5378.56689453125, "learning_rate": 0.0001921974004209222, "loss": 7.4397, "step": 109720 }, { "epoch": 13.204572803850782, "grad_norm": 3318.55029296875, "learning_rate": 0.00019219592714397123, "loss": 7.4323, "step": 109730 }, { "epoch": 13.205776173285198, "grad_norm": 13129.595703125, "learning_rate": 0.00019219445373359003, "loss": 7.3708, "step": 109740 }, { "epoch": 13.206979542719615, "grad_norm": 8539.7080078125, "learning_rate": 0.00019219298018978065, "loss": 7.4311, "step": 109750 }, { "epoch": 13.208182912154031, "grad_norm": 3719.565673828125, "learning_rate": 0.0001921915065125453, "loss": 7.2877, "step": 109760 }, { "epoch": 13.209386281588447, "grad_norm": 20094.443359375, "learning_rate": 0.00019219003270188608, "loss": 7.4069, "step": 109770 }, { "epoch": 13.210589651022865, "grad_norm": 3689.990234375, "learning_rate": 0.0001921885587578051, "loss": 7.3464, "step": 109780 }, { "epoch": 13.21179302045728, "grad_norm": 5972.1689453125, "learning_rate": 0.00019218708468030454, "loss": 7.2594, "step": 109790 }, { "epoch": 13.212996389891696, "grad_norm": 4884.939453125, "learning_rate": 0.00019218561046938648, "loss": 7.3278, "step": 109800 }, { "epoch": 13.214199759326114, "grad_norm": 13642.798828125, "learning_rate": 0.0001921841361250531, "loss": 7.2183, "step": 109810 }, { "epoch": 13.21540312876053, "grad_norm": 6999.47900390625, "learning_rate": 0.0001921826616473065, "loss": 7.2478, "step": 109820 }, { "epoch": 13.216606498194945, "grad_norm": 8606.3095703125, "learning_rate": 0.0001921811870361488, "loss": 7.4009, "step": 109830 }, { "epoch": 13.217809867629363, "grad_norm": 5449.6181640625, "learning_rate": 0.00019217971229158222, "loss": 7.3963, "step": 109840 }, { "epoch": 13.219013237063779, "grad_norm": 7508.28466796875, "learning_rate": 0.00019217823741360883, "loss": 7.3927, "step": 109850 }, { "epoch": 13.220216606498195, "grad_norm": 2850.408447265625, "learning_rate": 0.00019217676240223077, "loss": 7.2463, "step": 109860 }, { "epoch": 13.221419975932612, "grad_norm": 1229.3658447265625, "learning_rate": 0.00019217528725745016, "loss": 7.3286, "step": 109870 }, { "epoch": 13.222623345367028, "grad_norm": 1293.100830078125, "learning_rate": 0.00019217381197926919, "loss": 7.3012, "step": 109880 }, { "epoch": 13.223826714801444, "grad_norm": 3768.919189453125, "learning_rate": 0.00019217233656768993, "loss": 7.3584, "step": 109890 }, { "epoch": 13.225030084235861, "grad_norm": 1835.58447265625, "learning_rate": 0.00019217086102271452, "loss": 7.3239, "step": 109900 }, { "epoch": 13.226233453670277, "grad_norm": 4949.21826171875, "learning_rate": 0.00019216938534434515, "loss": 7.3144, "step": 109910 }, { "epoch": 13.227436823104693, "grad_norm": 3693.646484375, "learning_rate": 0.0001921679095325839, "loss": 7.2849, "step": 109920 }, { "epoch": 13.22864019253911, "grad_norm": 2925.319091796875, "learning_rate": 0.00019216643358743296, "loss": 7.3033, "step": 109930 }, { "epoch": 13.229843561973526, "grad_norm": 8444.2021484375, "learning_rate": 0.0001921649575088944, "loss": 7.2008, "step": 109940 }, { "epoch": 13.231046931407942, "grad_norm": 7526.40966796875, "learning_rate": 0.00019216348129697043, "loss": 7.385, "step": 109950 }, { "epoch": 13.232250300842358, "grad_norm": 10518.126953125, "learning_rate": 0.0001921620049516631, "loss": 7.3023, "step": 109960 }, { "epoch": 13.233453670276775, "grad_norm": 5515.12255859375, "learning_rate": 0.00019216052847297464, "loss": 7.3565, "step": 109970 }, { "epoch": 13.234657039711191, "grad_norm": 4395.20166015625, "learning_rate": 0.00019215905186090713, "loss": 7.3707, "step": 109980 }, { "epoch": 13.235860409145607, "grad_norm": 3720.353515625, "learning_rate": 0.0001921575751154627, "loss": 7.3712, "step": 109990 }, { "epoch": 13.237063778580024, "grad_norm": 2171.3388671875, "learning_rate": 0.00019215609823664353, "loss": 7.3181, "step": 110000 }, { "epoch": 13.23826714801444, "grad_norm": 3307.187744140625, "learning_rate": 0.00019215462122445173, "loss": 7.3334, "step": 110010 }, { "epoch": 13.239470517448856, "grad_norm": 3297.223388671875, "learning_rate": 0.00019215314407888942, "loss": 7.379, "step": 110020 }, { "epoch": 13.240673886883274, "grad_norm": 2438.4609375, "learning_rate": 0.00019215166679995878, "loss": 7.4286, "step": 110030 }, { "epoch": 13.24187725631769, "grad_norm": 2479.4765625, "learning_rate": 0.00019215018938766192, "loss": 7.3698, "step": 110040 }, { "epoch": 13.243080625752105, "grad_norm": 3579.760986328125, "learning_rate": 0.00019214871184200098, "loss": 7.3216, "step": 110050 }, { "epoch": 13.244283995186523, "grad_norm": 14938.8564453125, "learning_rate": 0.0001921472341629781, "loss": 7.3142, "step": 110060 }, { "epoch": 13.245487364620939, "grad_norm": 7436.0517578125, "learning_rate": 0.00019214575635059542, "loss": 7.2936, "step": 110070 }, { "epoch": 13.246690734055354, "grad_norm": 467193.0, "learning_rate": 0.0001921442784048551, "loss": 7.356, "step": 110080 }, { "epoch": 13.247894103489772, "grad_norm": 347034.1875, "learning_rate": 0.00019214280032575924, "loss": 7.3351, "step": 110090 }, { "epoch": 13.249097472924188, "grad_norm": 376508.1875, "learning_rate": 0.00019214132211331, "loss": 7.3825, "step": 110100 }, { "epoch": 13.250300842358604, "grad_norm": 199689.65625, "learning_rate": 0.00019213984376750953, "loss": 7.2715, "step": 110110 }, { "epoch": 13.251504211793021, "grad_norm": 187651.8125, "learning_rate": 0.00019213836528835995, "loss": 7.4711, "step": 110120 }, { "epoch": 13.252707581227437, "grad_norm": 221268.75, "learning_rate": 0.0001921368866758634, "loss": 7.5331, "step": 110130 }, { "epoch": 13.253910950661853, "grad_norm": 123181.625, "learning_rate": 0.00019213540793002205, "loss": 7.725, "step": 110140 }, { "epoch": 13.25511432009627, "grad_norm": 198592.984375, "learning_rate": 0.000192133929050838, "loss": 7.5445, "step": 110150 }, { "epoch": 13.256317689530686, "grad_norm": 532332.25, "learning_rate": 0.0001921324500383134, "loss": 7.9431, "step": 110160 }, { "epoch": 13.257521058965102, "grad_norm": 371910.6875, "learning_rate": 0.0001921309708924504, "loss": 9.2739, "step": 110170 }, { "epoch": 13.25872442839952, "grad_norm": 1474643.0, "learning_rate": 0.00019212949161325117, "loss": 9.9371, "step": 110180 }, { "epoch": 13.259927797833935, "grad_norm": 3103805.0, "learning_rate": 0.0001921280122007178, "loss": 9.8847, "step": 110190 }, { "epoch": 13.261131167268351, "grad_norm": 3739055.0, "learning_rate": 0.00019212653265485242, "loss": 10.0419, "step": 110200 }, { "epoch": 13.262334536702769, "grad_norm": 818148.875, "learning_rate": 0.00019212505297565725, "loss": 9.682, "step": 110210 }, { "epoch": 13.263537906137184, "grad_norm": 441856.84375, "learning_rate": 0.00019212357316313438, "loss": 8.9333, "step": 110220 }, { "epoch": 13.2647412755716, "grad_norm": 351513.40625, "learning_rate": 0.00019212209321728592, "loss": 8.4022, "step": 110230 }, { "epoch": 13.265944645006018, "grad_norm": 222066.546875, "learning_rate": 0.00019212061313811406, "loss": 8.3944, "step": 110240 }, { "epoch": 13.267148014440433, "grad_norm": 523585.0625, "learning_rate": 0.00019211913292562093, "loss": 8.3361, "step": 110250 }, { "epoch": 13.26835138387485, "grad_norm": 152219.90625, "learning_rate": 0.0001921176525798087, "loss": 8.4178, "step": 110260 }, { "epoch": 13.269554753309265, "grad_norm": 168611.453125, "learning_rate": 0.00019211617210067945, "loss": 8.2823, "step": 110270 }, { "epoch": 13.270758122743683, "grad_norm": 168448.765625, "learning_rate": 0.00019211469148823536, "loss": 8.0514, "step": 110280 }, { "epoch": 13.271961492178098, "grad_norm": 289148.90625, "learning_rate": 0.00019211321074247856, "loss": 7.751, "step": 110290 }, { "epoch": 13.273164861612514, "grad_norm": 235207.921875, "learning_rate": 0.00019211172986341124, "loss": 7.7347, "step": 110300 }, { "epoch": 13.274368231046932, "grad_norm": 175265.390625, "learning_rate": 0.00019211024885103548, "loss": 7.8259, "step": 110310 }, { "epoch": 13.275571600481348, "grad_norm": 92630.265625, "learning_rate": 0.00019210876770535344, "loss": 7.6984, "step": 110320 }, { "epoch": 13.276774969915763, "grad_norm": 192967.0, "learning_rate": 0.00019210728642636728, "loss": 7.769, "step": 110330 }, { "epoch": 13.277978339350181, "grad_norm": 93588.078125, "learning_rate": 0.00019210580501407916, "loss": 7.6669, "step": 110340 }, { "epoch": 13.279181708784597, "grad_norm": 44452.171875, "learning_rate": 0.00019210432346849116, "loss": 7.6325, "step": 110350 }, { "epoch": 13.280385078219012, "grad_norm": 52937.7578125, "learning_rate": 0.00019210284178960548, "loss": 7.6443, "step": 110360 }, { "epoch": 13.28158844765343, "grad_norm": 123180.0234375, "learning_rate": 0.00019210135997742428, "loss": 7.4855, "step": 110370 }, { "epoch": 13.282791817087846, "grad_norm": 186309.625, "learning_rate": 0.00019209987803194963, "loss": 7.6418, "step": 110380 }, { "epoch": 13.283995186522262, "grad_norm": 144785.140625, "learning_rate": 0.00019209839595318374, "loss": 7.5307, "step": 110390 }, { "epoch": 13.28519855595668, "grad_norm": 57480.515625, "learning_rate": 0.00019209691374112868, "loss": 7.4946, "step": 110400 }, { "epoch": 13.286401925391095, "grad_norm": 87574.5078125, "learning_rate": 0.0001920954313957867, "loss": 7.4765, "step": 110410 }, { "epoch": 13.28760529482551, "grad_norm": 223329.765625, "learning_rate": 0.00019209394891715991, "loss": 7.5298, "step": 110420 }, { "epoch": 13.288808664259928, "grad_norm": 367378.78125, "learning_rate": 0.0001920924663052504, "loss": 7.5591, "step": 110430 }, { "epoch": 13.290012033694344, "grad_norm": 141020.21875, "learning_rate": 0.00019209098356006037, "loss": 7.5163, "step": 110440 }, { "epoch": 13.29121540312876, "grad_norm": 101079.3203125, "learning_rate": 0.00019208950068159194, "loss": 7.6223, "step": 110450 }, { "epoch": 13.292418772563177, "grad_norm": 41622.0078125, "learning_rate": 0.00019208801766984728, "loss": 7.572, "step": 110460 }, { "epoch": 13.293622141997593, "grad_norm": 124686.2734375, "learning_rate": 0.0001920865345248285, "loss": 7.6329, "step": 110470 }, { "epoch": 13.294825511432009, "grad_norm": 259451.703125, "learning_rate": 0.0001920850512465378, "loss": 7.7731, "step": 110480 }, { "epoch": 13.296028880866427, "grad_norm": 108624.3125, "learning_rate": 0.00019208356783497726, "loss": 7.5644, "step": 110490 }, { "epoch": 13.297232250300842, "grad_norm": 101247.6875, "learning_rate": 0.0001920820842901491, "loss": 7.4012, "step": 110500 }, { "epoch": 13.298435619735258, "grad_norm": 193138.0625, "learning_rate": 0.00019208060061205542, "loss": 7.5445, "step": 110510 }, { "epoch": 13.299638989169676, "grad_norm": 167540.546875, "learning_rate": 0.0001920791168006984, "loss": 7.4594, "step": 110520 }, { "epoch": 13.300842358604092, "grad_norm": 55895.1484375, "learning_rate": 0.00019207763285608012, "loss": 7.4328, "step": 110530 }, { "epoch": 13.302045728038507, "grad_norm": 197037.609375, "learning_rate": 0.0001920761487782028, "loss": 7.3331, "step": 110540 }, { "epoch": 13.303249097472925, "grad_norm": 66672.2578125, "learning_rate": 0.00019207466456706854, "loss": 7.4436, "step": 110550 }, { "epoch": 13.30445246690734, "grad_norm": 230686.03125, "learning_rate": 0.00019207318022267952, "loss": 7.4088, "step": 110560 }, { "epoch": 13.305655836341757, "grad_norm": 91527.140625, "learning_rate": 0.00019207169574503787, "loss": 7.3945, "step": 110570 }, { "epoch": 13.306859205776174, "grad_norm": 306368.1875, "learning_rate": 0.00019207021113414573, "loss": 7.4848, "step": 110580 }, { "epoch": 13.30806257521059, "grad_norm": 96212.015625, "learning_rate": 0.0001920687263900053, "loss": 7.4783, "step": 110590 }, { "epoch": 13.309265944645006, "grad_norm": 148426.03125, "learning_rate": 0.00019206724151261866, "loss": 7.4722, "step": 110600 }, { "epoch": 13.310469314079423, "grad_norm": 515727.4375, "learning_rate": 0.000192065756501988, "loss": 7.4529, "step": 110610 }, { "epoch": 13.311672683513839, "grad_norm": 91867.6796875, "learning_rate": 0.00019206427135811546, "loss": 7.4579, "step": 110620 }, { "epoch": 13.312876052948255, "grad_norm": 179382.359375, "learning_rate": 0.0001920627860810032, "loss": 7.426, "step": 110630 }, { "epoch": 13.314079422382672, "grad_norm": 162694.515625, "learning_rate": 0.00019206130067065335, "loss": 7.3658, "step": 110640 }, { "epoch": 13.315282791817088, "grad_norm": 34279.328125, "learning_rate": 0.00019205981512706804, "loss": 7.464, "step": 110650 }, { "epoch": 13.316486161251504, "grad_norm": 81591.9140625, "learning_rate": 0.00019205832945024952, "loss": 7.4596, "step": 110660 }, { "epoch": 13.31768953068592, "grad_norm": 125334.453125, "learning_rate": 0.0001920568436401998, "loss": 7.434, "step": 110670 }, { "epoch": 13.318892900120337, "grad_norm": 174087.625, "learning_rate": 0.0001920553576969211, "loss": 7.3845, "step": 110680 }, { "epoch": 13.320096269554753, "grad_norm": 73718.3359375, "learning_rate": 0.00019205387162041563, "loss": 7.3848, "step": 110690 }, { "epoch": 13.321299638989169, "grad_norm": 68589.8359375, "learning_rate": 0.00019205238541068545, "loss": 7.3469, "step": 110700 }, { "epoch": 13.322503008423586, "grad_norm": 1782040.875, "learning_rate": 0.00019205089906773273, "loss": 7.4777, "step": 110710 }, { "epoch": 13.323706377858002, "grad_norm": 95544.1953125, "learning_rate": 0.00019204941259155965, "loss": 7.5217, "step": 110720 }, { "epoch": 13.324909747292418, "grad_norm": 1522073.375, "learning_rate": 0.00019204792598216832, "loss": 8.0024, "step": 110730 }, { "epoch": 13.326113116726836, "grad_norm": 324821.5625, "learning_rate": 0.00019204643923956093, "loss": 8.1135, "step": 110740 }, { "epoch": 13.327316486161251, "grad_norm": 203893.78125, "learning_rate": 0.00019204495236373962, "loss": 8.0937, "step": 110750 }, { "epoch": 13.328519855595667, "grad_norm": 2383935.75, "learning_rate": 0.00019204346535470652, "loss": 8.3211, "step": 110760 }, { "epoch": 13.329723225030085, "grad_norm": 1821319.625, "learning_rate": 0.00019204197821246383, "loss": 8.4965, "step": 110770 }, { "epoch": 13.3309265944645, "grad_norm": 334876.78125, "learning_rate": 0.00019204049093701366, "loss": 9.1062, "step": 110780 }, { "epoch": 13.332129963898916, "grad_norm": 1427789.0, "learning_rate": 0.00019203900352835818, "loss": 9.3586, "step": 110790 }, { "epoch": 13.333333333333334, "grad_norm": 910.2598266601562, "learning_rate": 0.0001920375159864995, "loss": 9.1072, "step": 110800 }, { "epoch": 13.33453670276775, "grad_norm": 2685.69775390625, "learning_rate": 0.00019203602831143986, "loss": 8.4476, "step": 110810 }, { "epoch": 13.335740072202166, "grad_norm": 604.9318237304688, "learning_rate": 0.00019203454050318135, "loss": 8.3137, "step": 110820 }, { "epoch": 13.336943441636583, "grad_norm": 1511.6759033203125, "learning_rate": 0.0001920330525617261, "loss": 8.0222, "step": 110830 }, { "epoch": 13.338146811070999, "grad_norm": 182.51419067382812, "learning_rate": 0.00019203156448707636, "loss": 7.6407, "step": 110840 }, { "epoch": 13.339350180505415, "grad_norm": 157.84323120117188, "learning_rate": 0.00019203007627923422, "loss": 7.8411, "step": 110850 }, { "epoch": 13.340553549939832, "grad_norm": 200.6873321533203, "learning_rate": 0.00019202858793820182, "loss": 7.7679, "step": 110860 }, { "epoch": 13.341756919374248, "grad_norm": 167.75271606445312, "learning_rate": 0.00019202709946398132, "loss": 7.5541, "step": 110870 }, { "epoch": 13.342960288808664, "grad_norm": 471.394775390625, "learning_rate": 0.0001920256108565749, "loss": 7.6278, "step": 110880 }, { "epoch": 13.344163658243081, "grad_norm": 273.3186340332031, "learning_rate": 0.00019202412211598467, "loss": 7.5743, "step": 110890 }, { "epoch": 13.345367027677497, "grad_norm": 202.0496826171875, "learning_rate": 0.00019202263324221287, "loss": 7.6374, "step": 110900 }, { "epoch": 13.346570397111913, "grad_norm": 247.0529327392578, "learning_rate": 0.00019202114423526158, "loss": 7.6715, "step": 110910 }, { "epoch": 13.34777376654633, "grad_norm": 96.9984130859375, "learning_rate": 0.00019201965509513296, "loss": 7.5571, "step": 110920 }, { "epoch": 13.348977135980746, "grad_norm": 844.7772827148438, "learning_rate": 0.00019201816582182919, "loss": 7.6822, "step": 110930 }, { "epoch": 13.350180505415162, "grad_norm": 1959.7215576171875, "learning_rate": 0.00019201667641535243, "loss": 7.6507, "step": 110940 }, { "epoch": 13.35138387484958, "grad_norm": 387.8429870605469, "learning_rate": 0.0001920151868757048, "loss": 7.7109, "step": 110950 }, { "epoch": 13.352587244283995, "grad_norm": 142.9046630859375, "learning_rate": 0.00019201369720288843, "loss": 7.534, "step": 110960 }, { "epoch": 13.353790613718411, "grad_norm": 35.90800094604492, "learning_rate": 0.0001920122073969056, "loss": 7.6489, "step": 110970 }, { "epoch": 13.354993983152827, "grad_norm": 46.38391876220703, "learning_rate": 0.00019201071745775835, "loss": 7.7431, "step": 110980 }, { "epoch": 13.356197352587245, "grad_norm": 31.96817398071289, "learning_rate": 0.0001920092273854489, "loss": 7.742, "step": 110990 }, { "epoch": 13.35740072202166, "grad_norm": 27.3577880859375, "learning_rate": 0.00019200773717997937, "loss": 7.6949, "step": 111000 }, { "epoch": 13.358604091456076, "grad_norm": 131.8791961669922, "learning_rate": 0.00019200624684135193, "loss": 7.5673, "step": 111010 }, { "epoch": 13.359807460890494, "grad_norm": 125.35891723632812, "learning_rate": 0.00019200475636956874, "loss": 7.5904, "step": 111020 }, { "epoch": 13.36101083032491, "grad_norm": 845.0186767578125, "learning_rate": 0.00019200326576463192, "loss": 7.6362, "step": 111030 }, { "epoch": 13.362214199759325, "grad_norm": 556.4752197265625, "learning_rate": 0.0001920017750265437, "loss": 7.5838, "step": 111040 }, { "epoch": 13.363417569193743, "grad_norm": 97.72586059570312, "learning_rate": 0.00019200028415530617, "loss": 7.4948, "step": 111050 }, { "epoch": 13.364620938628159, "grad_norm": 263.8709411621094, "learning_rate": 0.00019199879315092154, "loss": 7.5156, "step": 111060 }, { "epoch": 13.365824308062574, "grad_norm": 67.794921875, "learning_rate": 0.00019199730201339193, "loss": 7.5486, "step": 111070 }, { "epoch": 13.367027677496992, "grad_norm": 224.88714599609375, "learning_rate": 0.0001919958107427195, "loss": 7.6127, "step": 111080 }, { "epoch": 13.368231046931408, "grad_norm": 96.0893783569336, "learning_rate": 0.00019199431933890644, "loss": 7.5068, "step": 111090 }, { "epoch": 13.369434416365824, "grad_norm": 71.08943176269531, "learning_rate": 0.00019199282780195487, "loss": 7.6105, "step": 111100 }, { "epoch": 13.370637785800241, "grad_norm": 113.27473449707031, "learning_rate": 0.00019199133613186694, "loss": 7.4517, "step": 111110 }, { "epoch": 13.371841155234657, "grad_norm": 148.3744659423828, "learning_rate": 0.0001919898443286449, "loss": 7.5555, "step": 111120 }, { "epoch": 13.373044524669073, "grad_norm": 294.1316833496094, "learning_rate": 0.0001919883523922908, "loss": 7.5281, "step": 111130 }, { "epoch": 13.37424789410349, "grad_norm": 236.04330444335938, "learning_rate": 0.00019198686032280687, "loss": 7.4634, "step": 111140 }, { "epoch": 13.375451263537906, "grad_norm": 174.94091796875, "learning_rate": 0.0001919853681201952, "loss": 7.4227, "step": 111150 }, { "epoch": 13.376654632972322, "grad_norm": 337.4954528808594, "learning_rate": 0.000191983875784458, "loss": 7.4553, "step": 111160 }, { "epoch": 13.37785800240674, "grad_norm": 214.25624084472656, "learning_rate": 0.00019198238331559746, "loss": 7.4461, "step": 111170 }, { "epoch": 13.379061371841155, "grad_norm": 605.7977294921875, "learning_rate": 0.00019198089071361567, "loss": 7.5649, "step": 111180 }, { "epoch": 13.380264741275571, "grad_norm": 407.91314697265625, "learning_rate": 0.00019197939797851486, "loss": 7.4917, "step": 111190 }, { "epoch": 13.381468110709989, "grad_norm": 456.61358642578125, "learning_rate": 0.0001919779051102971, "loss": 7.4187, "step": 111200 }, { "epoch": 13.382671480144404, "grad_norm": 1044.808349609375, "learning_rate": 0.0001919764121089646, "loss": 7.5112, "step": 111210 }, { "epoch": 13.38387484957882, "grad_norm": 1758.309326171875, "learning_rate": 0.00019197491897451958, "loss": 7.4937, "step": 111220 }, { "epoch": 13.385078219013238, "grad_norm": 2519.55224609375, "learning_rate": 0.00019197342570696413, "loss": 7.4275, "step": 111230 }, { "epoch": 13.386281588447654, "grad_norm": 1460.6614990234375, "learning_rate": 0.0001919719323063004, "loss": 7.458, "step": 111240 }, { "epoch": 13.38748495788207, "grad_norm": 1221.3485107421875, "learning_rate": 0.0001919704387725306, "loss": 7.4563, "step": 111250 }, { "epoch": 13.388688327316487, "grad_norm": 1648.83203125, "learning_rate": 0.00019196894510565684, "loss": 7.4569, "step": 111260 }, { "epoch": 13.389891696750903, "grad_norm": 1689.3546142578125, "learning_rate": 0.00019196745130568133, "loss": 7.442, "step": 111270 }, { "epoch": 13.391095066185319, "grad_norm": 2027.291748046875, "learning_rate": 0.0001919659573726062, "loss": 7.447, "step": 111280 }, { "epoch": 13.392298435619736, "grad_norm": 1643.912841796875, "learning_rate": 0.00019196446330643362, "loss": 7.3493, "step": 111290 }, { "epoch": 13.393501805054152, "grad_norm": 580.79345703125, "learning_rate": 0.00019196296910716578, "loss": 7.4129, "step": 111300 }, { "epoch": 13.394705174488568, "grad_norm": 1131.4515380859375, "learning_rate": 0.00019196147477480483, "loss": 7.3378, "step": 111310 }, { "epoch": 13.395908543922985, "grad_norm": 915.0283203125, "learning_rate": 0.00019195998030935288, "loss": 7.3997, "step": 111320 }, { "epoch": 13.397111913357401, "grad_norm": 3438.982177734375, "learning_rate": 0.00019195848571081214, "loss": 7.3809, "step": 111330 }, { "epoch": 13.398315282791817, "grad_norm": 2424.0498046875, "learning_rate": 0.00019195699097918477, "loss": 7.4452, "step": 111340 }, { "epoch": 13.399518652226233, "grad_norm": 1284.0897216796875, "learning_rate": 0.00019195549611447294, "loss": 7.468, "step": 111350 }, { "epoch": 13.40072202166065, "grad_norm": 1931.1319580078125, "learning_rate": 0.0001919540011166788, "loss": 7.6212, "step": 111360 }, { "epoch": 13.401925391095066, "grad_norm": 10985.525390625, "learning_rate": 0.00019195250598580453, "loss": 7.4169, "step": 111370 }, { "epoch": 13.403128760529482, "grad_norm": 2739.75927734375, "learning_rate": 0.0001919510107218523, "loss": 7.4372, "step": 111380 }, { "epoch": 13.4043321299639, "grad_norm": 2312.317138671875, "learning_rate": 0.00019194951532482422, "loss": 7.434, "step": 111390 }, { "epoch": 13.405535499398315, "grad_norm": 810.5154418945312, "learning_rate": 0.0001919480197947225, "loss": 7.4419, "step": 111400 }, { "epoch": 13.406738868832731, "grad_norm": 2130.062744140625, "learning_rate": 0.00019194652413154927, "loss": 7.4068, "step": 111410 }, { "epoch": 13.407942238267148, "grad_norm": 570.1299438476562, "learning_rate": 0.00019194502833530676, "loss": 7.4127, "step": 111420 }, { "epoch": 13.409145607701564, "grad_norm": 2961.287841796875, "learning_rate": 0.00019194353240599706, "loss": 7.4312, "step": 111430 }, { "epoch": 13.41034897713598, "grad_norm": 1396.0400390625, "learning_rate": 0.0001919420363436224, "loss": 7.3678, "step": 111440 }, { "epoch": 13.411552346570398, "grad_norm": 470.570556640625, "learning_rate": 0.00019194054014818487, "loss": 7.3416, "step": 111450 }, { "epoch": 13.412755716004813, "grad_norm": 940.7001342773438, "learning_rate": 0.00019193904381968672, "loss": 7.5045, "step": 111460 }, { "epoch": 13.41395908543923, "grad_norm": 336.78741455078125, "learning_rate": 0.00019193754735813006, "loss": 7.3757, "step": 111470 }, { "epoch": 13.415162454873647, "grad_norm": 1625.3330078125, "learning_rate": 0.00019193605076351706, "loss": 7.3302, "step": 111480 }, { "epoch": 13.416365824308063, "grad_norm": 641.3401489257812, "learning_rate": 0.00019193455403584994, "loss": 7.4026, "step": 111490 }, { "epoch": 13.417569193742478, "grad_norm": 847.099853515625, "learning_rate": 0.00019193305717513077, "loss": 7.5379, "step": 111500 }, { "epoch": 13.418772563176896, "grad_norm": 1427.4117431640625, "learning_rate": 0.00019193156018136178, "loss": 7.5689, "step": 111510 }, { "epoch": 13.419975932611312, "grad_norm": 986.011962890625, "learning_rate": 0.00019193006305454513, "loss": 7.5583, "step": 111520 }, { "epoch": 13.421179302045728, "grad_norm": 1134.5452880859375, "learning_rate": 0.00019192856579468298, "loss": 7.4819, "step": 111530 }, { "epoch": 13.422382671480145, "grad_norm": 962.0313110351562, "learning_rate": 0.0001919270684017775, "loss": 7.5342, "step": 111540 }, { "epoch": 13.42358604091456, "grad_norm": 445.82147216796875, "learning_rate": 0.00019192557087583086, "loss": 7.5181, "step": 111550 }, { "epoch": 13.424789410348977, "grad_norm": 1068.888671875, "learning_rate": 0.00019192407321684524, "loss": 7.4808, "step": 111560 }, { "epoch": 13.425992779783394, "grad_norm": 613.2035522460938, "learning_rate": 0.00019192257542482278, "loss": 7.5092, "step": 111570 }, { "epoch": 13.42719614921781, "grad_norm": 1166.583740234375, "learning_rate": 0.00019192107749976565, "loss": 7.5399, "step": 111580 }, { "epoch": 13.428399518652226, "grad_norm": 837.2355346679688, "learning_rate": 0.00019191957944167601, "loss": 7.5028, "step": 111590 }, { "epoch": 13.429602888086643, "grad_norm": 297.49188232421875, "learning_rate": 0.00019191808125055608, "loss": 7.4061, "step": 111600 }, { "epoch": 13.43080625752106, "grad_norm": 1156.8074951171875, "learning_rate": 0.00019191658292640797, "loss": 7.3911, "step": 111610 }, { "epoch": 13.432009626955475, "grad_norm": 2390.753662109375, "learning_rate": 0.00019191508446923388, "loss": 7.396, "step": 111620 }, { "epoch": 13.433212996389893, "grad_norm": 26288.8203125, "learning_rate": 0.00019191358587903595, "loss": 7.4404, "step": 111630 }, { "epoch": 13.434416365824308, "grad_norm": 91465.8203125, "learning_rate": 0.0001919120871558164, "loss": 7.3824, "step": 111640 }, { "epoch": 13.435619735258724, "grad_norm": 514493.34375, "learning_rate": 0.00019191058829957734, "loss": 7.4483, "step": 111650 }, { "epoch": 13.43682310469314, "grad_norm": 30260.517578125, "learning_rate": 0.000191909089310321, "loss": 7.6983, "step": 111660 }, { "epoch": 13.438026474127557, "grad_norm": 298628.625, "learning_rate": 0.00019190759018804948, "loss": 7.3443, "step": 111670 }, { "epoch": 13.439229843561973, "grad_norm": 345494.8125, "learning_rate": 0.00019190609093276502, "loss": 7.423, "step": 111680 }, { "epoch": 13.440433212996389, "grad_norm": 60102.81640625, "learning_rate": 0.00019190459154446972, "loss": 7.6221, "step": 111690 }, { "epoch": 13.441636582430807, "grad_norm": 13738.0263671875, "learning_rate": 0.0001919030920231658, "loss": 7.6564, "step": 111700 }, { "epoch": 13.442839951865222, "grad_norm": 36233.39453125, "learning_rate": 0.00019190159236885543, "loss": 7.4552, "step": 111710 }, { "epoch": 13.444043321299638, "grad_norm": 27194894.0, "learning_rate": 0.00019190009258154077, "loss": 7.6795, "step": 111720 }, { "epoch": 13.445246690734056, "grad_norm": 431800.4375, "learning_rate": 0.00019189859266122397, "loss": 8.6444, "step": 111730 }, { "epoch": 13.446450060168472, "grad_norm": 3136103.0, "learning_rate": 0.0001918970926079072, "loss": 9.2939, "step": 111740 }, { "epoch": 13.447653429602887, "grad_norm": 6191241.0, "learning_rate": 0.00019189559242159266, "loss": 9.0118, "step": 111750 }, { "epoch": 13.448856799037305, "grad_norm": 2778721.5, "learning_rate": 0.00019189409210228252, "loss": 8.9635, "step": 111760 }, { "epoch": 13.45006016847172, "grad_norm": 2456772.25, "learning_rate": 0.0001918925916499789, "loss": 9.1585, "step": 111770 }, { "epoch": 13.451263537906136, "grad_norm": 125611504.0, "learning_rate": 0.00019189109106468408, "loss": 8.8731, "step": 111780 }, { "epoch": 13.452466907340554, "grad_norm": 1990498.75, "learning_rate": 0.00019188959034640015, "loss": 8.7496, "step": 111790 }, { "epoch": 13.45367027677497, "grad_norm": 11776526.0, "learning_rate": 0.00019188808949512927, "loss": 8.527, "step": 111800 }, { "epoch": 13.454873646209386, "grad_norm": 1119731.625, "learning_rate": 0.00019188658851087363, "loss": 8.5386, "step": 111810 }, { "epoch": 13.456077015643803, "grad_norm": 9190955.0, "learning_rate": 0.00019188508739363544, "loss": 8.6631, "step": 111820 }, { "epoch": 13.457280385078219, "grad_norm": 926190.8125, "learning_rate": 0.00019188358614341682, "loss": 8.5332, "step": 111830 }, { "epoch": 13.458483754512635, "grad_norm": 2818753.5, "learning_rate": 0.00019188208476022, "loss": 8.2075, "step": 111840 }, { "epoch": 13.459687123947052, "grad_norm": 3279688.75, "learning_rate": 0.00019188058324404707, "loss": 8.1707, "step": 111850 }, { "epoch": 13.460890493381468, "grad_norm": 3798531.75, "learning_rate": 0.0001918790815949003, "loss": 8.2591, "step": 111860 }, { "epoch": 13.462093862815884, "grad_norm": 3740412.5, "learning_rate": 0.0001918775798127818, "loss": 8.6526, "step": 111870 }, { "epoch": 13.463297232250302, "grad_norm": 4504479.5, "learning_rate": 0.00019187607789769372, "loss": 9.0411, "step": 111880 }, { "epoch": 13.464500601684717, "grad_norm": 6027376.0, "learning_rate": 0.00019187457584963832, "loss": 8.9381, "step": 111890 }, { "epoch": 13.465703971119133, "grad_norm": 16627999.0, "learning_rate": 0.00019187307366861769, "loss": 8.9557, "step": 111900 }, { "epoch": 13.46690734055355, "grad_norm": 68241200.0, "learning_rate": 0.00019187157135463405, "loss": 9.025, "step": 111910 }, { "epoch": 13.468110709987966, "grad_norm": 6391292.5, "learning_rate": 0.00019187006890768958, "loss": 8.7331, "step": 111920 }, { "epoch": 13.469314079422382, "grad_norm": 1137912.875, "learning_rate": 0.00019186856632778644, "loss": 8.2902, "step": 111930 }, { "epoch": 13.4705174488568, "grad_norm": 5648679.0, "learning_rate": 0.00019186706361492677, "loss": 8.3575, "step": 111940 }, { "epoch": 13.471720818291216, "grad_norm": 2155786.25, "learning_rate": 0.0001918655607691128, "loss": 8.7709, "step": 111950 }, { "epoch": 13.472924187725631, "grad_norm": 4373881.0, "learning_rate": 0.00019186405779034668, "loss": 8.9272, "step": 111960 }, { "epoch": 13.474127557160049, "grad_norm": 8810573.0, "learning_rate": 0.00019186255467863058, "loss": 8.9189, "step": 111970 }, { "epoch": 13.475330926594465, "grad_norm": 116230408.0, "learning_rate": 0.00019186105143396672, "loss": 8.6474, "step": 111980 }, { "epoch": 13.47653429602888, "grad_norm": 1655787.125, "learning_rate": 0.0001918595480563572, "loss": 8.5065, "step": 111990 }, { "epoch": 13.477737665463298, "grad_norm": 868290.6875, "learning_rate": 0.00019185804454580424, "loss": 8.2941, "step": 112000 }, { "epoch": 13.478941034897714, "grad_norm": 847596.375, "learning_rate": 0.00019185654090231002, "loss": 8.1891, "step": 112010 }, { "epoch": 13.48014440433213, "grad_norm": 837659.0625, "learning_rate": 0.00019185503712587673, "loss": 8.1248, "step": 112020 }, { "epoch": 13.481347773766545, "grad_norm": 1641022.0, "learning_rate": 0.00019185353321650648, "loss": 8.0722, "step": 112030 }, { "epoch": 13.482551143200963, "grad_norm": 621701.4375, "learning_rate": 0.0001918520291742015, "loss": 8.183, "step": 112040 }, { "epoch": 13.483754512635379, "grad_norm": 562886.3125, "learning_rate": 0.00019185052499896395, "loss": 8.0529, "step": 112050 }, { "epoch": 13.484957882069795, "grad_norm": 1825469.875, "learning_rate": 0.00019184902069079604, "loss": 8.073, "step": 112060 }, { "epoch": 13.486161251504212, "grad_norm": 1084234.25, "learning_rate": 0.0001918475162496999, "loss": 8.0609, "step": 112070 }, { "epoch": 13.487364620938628, "grad_norm": 429773.15625, "learning_rate": 0.00019184601167567772, "loss": 8.099, "step": 112080 }, { "epoch": 13.488567990373044, "grad_norm": 349600.46875, "learning_rate": 0.0001918445069687317, "loss": 7.9518, "step": 112090 }, { "epoch": 13.489771359807461, "grad_norm": 463701.96875, "learning_rate": 0.000191843002128864, "loss": 7.974, "step": 112100 }, { "epoch": 13.490974729241877, "grad_norm": 622646528.0, "learning_rate": 0.0001918414971560768, "loss": 8.7608, "step": 112110 }, { "epoch": 13.492178098676293, "grad_norm": 2490747392.0, "learning_rate": 0.0001918399920503723, "loss": 9.3857, "step": 112120 }, { "epoch": 13.49338146811071, "grad_norm": 1269773184.0, "learning_rate": 0.00019183848681175261, "loss": 9.4415, "step": 112130 }, { "epoch": 13.494584837545126, "grad_norm": 115942656.0, "learning_rate": 0.00019183698144022, "loss": 9.3894, "step": 112140 }, { "epoch": 13.495788206979542, "grad_norm": 3249985792.0, "learning_rate": 0.00019183547593577658, "loss": 8.934, "step": 112150 }, { "epoch": 13.49699157641396, "grad_norm": 330774.625, "learning_rate": 0.00019183397029842457, "loss": 8.6737, "step": 112160 }, { "epoch": 13.498194945848375, "grad_norm": 557643.8125, "learning_rate": 0.0001918324645281661, "loss": 9.1788, "step": 112170 }, { "epoch": 13.499398315282791, "grad_norm": 2705413.5, "learning_rate": 0.00019183095862500342, "loss": 9.439, "step": 112180 }, { "epoch": 13.500601684717209, "grad_norm": 1079514.25, "learning_rate": 0.00019182945258893863, "loss": 9.5887, "step": 112190 }, { "epoch": 13.501805054151625, "grad_norm": 3137683.5, "learning_rate": 0.00019182794641997396, "loss": 9.093, "step": 112200 }, { "epoch": 13.50300842358604, "grad_norm": 4004074.5, "learning_rate": 0.0001918264401181116, "loss": 8.9457, "step": 112210 }, { "epoch": 13.504211793020458, "grad_norm": 682983.3125, "learning_rate": 0.00019182493368335372, "loss": 8.7, "step": 112220 }, { "epoch": 13.505415162454874, "grad_norm": 1760792.375, "learning_rate": 0.00019182342711570246, "loss": 8.5762, "step": 112230 }, { "epoch": 13.50661853188929, "grad_norm": 919176.3125, "learning_rate": 0.00019182192041516008, "loss": 8.562, "step": 112240 }, { "epoch": 13.507821901323707, "grad_norm": 881351.125, "learning_rate": 0.00019182041358172867, "loss": 8.4858, "step": 112250 }, { "epoch": 13.509025270758123, "grad_norm": 3324286.75, "learning_rate": 0.00019181890661541044, "loss": 8.4438, "step": 112260 }, { "epoch": 13.510228640192539, "grad_norm": 1041977.375, "learning_rate": 0.00019181739951620758, "loss": 8.4676, "step": 112270 }, { "epoch": 13.511432009626956, "grad_norm": 633026.8125, "learning_rate": 0.00019181589228412231, "loss": 8.3156, "step": 112280 }, { "epoch": 13.512635379061372, "grad_norm": 1533722.25, "learning_rate": 0.00019181438491915675, "loss": 8.3899, "step": 112290 }, { "epoch": 13.513838748495788, "grad_norm": 200570.53125, "learning_rate": 0.00019181287742131312, "loss": 8.2837, "step": 112300 }, { "epoch": 13.515042117930205, "grad_norm": 362316.71875, "learning_rate": 0.00019181136979059358, "loss": 8.3123, "step": 112310 }, { "epoch": 13.516245487364621, "grad_norm": 1786150.75, "learning_rate": 0.00019180986202700032, "loss": 8.1862, "step": 112320 }, { "epoch": 13.517448856799037, "grad_norm": 608451.375, "learning_rate": 0.00019180835413053551, "loss": 8.182, "step": 112330 }, { "epoch": 13.518652226233453, "grad_norm": 736070.6875, "learning_rate": 0.00019180684610120138, "loss": 8.2476, "step": 112340 }, { "epoch": 13.51985559566787, "grad_norm": 435168.6875, "learning_rate": 0.00019180533793900005, "loss": 8.3545, "step": 112350 }, { "epoch": 13.521058965102286, "grad_norm": 333991.34375, "learning_rate": 0.00019180382964393376, "loss": 8.2604, "step": 112360 }, { "epoch": 13.522262334536702, "grad_norm": 128969.703125, "learning_rate": 0.00019180232121600465, "loss": 7.995, "step": 112370 }, { "epoch": 13.52346570397112, "grad_norm": 303082.875, "learning_rate": 0.00019180081265521488, "loss": 8.0015, "step": 112380 }, { "epoch": 13.524669073405535, "grad_norm": 776147.625, "learning_rate": 0.0001917993039615667, "loss": 8.0454, "step": 112390 }, { "epoch": 13.525872442839951, "grad_norm": 380622.28125, "learning_rate": 0.00019179779513506226, "loss": 8.0309, "step": 112400 }, { "epoch": 13.527075812274369, "grad_norm": 335196.6875, "learning_rate": 0.00019179628617570376, "loss": 8.0057, "step": 112410 }, { "epoch": 13.528279181708784, "grad_norm": 147368.046875, "learning_rate": 0.00019179477708349335, "loss": 8.0296, "step": 112420 }, { "epoch": 13.5294825511432, "grad_norm": 1779401.5, "learning_rate": 0.00019179326785843324, "loss": 8.0364, "step": 112430 }, { "epoch": 13.530685920577618, "grad_norm": 574697.5, "learning_rate": 0.0001917917585005256, "loss": 7.9876, "step": 112440 }, { "epoch": 13.531889290012034, "grad_norm": 971916.1875, "learning_rate": 0.00019179024900977263, "loss": 7.9857, "step": 112450 }, { "epoch": 13.53309265944645, "grad_norm": 2491465.5, "learning_rate": 0.0001917887393861765, "loss": 7.9302, "step": 112460 }, { "epoch": 13.534296028880867, "grad_norm": 1735155.625, "learning_rate": 0.00019178722962973942, "loss": 7.858, "step": 112470 }, { "epoch": 13.535499398315283, "grad_norm": 1431859.375, "learning_rate": 0.00019178571974046355, "loss": 7.872, "step": 112480 }, { "epoch": 13.536702767749698, "grad_norm": 370191.375, "learning_rate": 0.0001917842097183511, "loss": 7.8488, "step": 112490 }, { "epoch": 13.537906137184116, "grad_norm": 745230.4375, "learning_rate": 0.0001917826995634042, "loss": 7.9449, "step": 112500 }, { "epoch": 13.539109506618532, "grad_norm": 698564.4375, "learning_rate": 0.00019178118927562507, "loss": 7.8787, "step": 112510 }, { "epoch": 13.540312876052948, "grad_norm": 492286.28125, "learning_rate": 0.00019177967885501595, "loss": 7.9291, "step": 112520 }, { "epoch": 13.541516245487365, "grad_norm": 299212.96875, "learning_rate": 0.00019177816830157892, "loss": 7.9905, "step": 112530 }, { "epoch": 13.542719614921781, "grad_norm": 533764.8125, "learning_rate": 0.00019177665761531623, "loss": 8.0556, "step": 112540 }, { "epoch": 13.543922984356197, "grad_norm": 572797.0, "learning_rate": 0.0001917751467962301, "loss": 8.0768, "step": 112550 }, { "epoch": 13.545126353790614, "grad_norm": 784374.0, "learning_rate": 0.00019177363584432265, "loss": 8.1136, "step": 112560 }, { "epoch": 13.54632972322503, "grad_norm": 310809.3125, "learning_rate": 0.0001917721247595961, "loss": 8.3211, "step": 112570 }, { "epoch": 13.547533092659446, "grad_norm": 709662.75, "learning_rate": 0.0001917706135420526, "loss": 8.2857, "step": 112580 }, { "epoch": 13.548736462093864, "grad_norm": 612764.25, "learning_rate": 0.0001917691021916944, "loss": 8.1536, "step": 112590 }, { "epoch": 13.54993983152828, "grad_norm": 1159474.0, "learning_rate": 0.00019176759070852362, "loss": 8.3407, "step": 112600 }, { "epoch": 13.551143200962695, "grad_norm": 1587835.375, "learning_rate": 0.0001917660790925425, "loss": 8.6622, "step": 112610 }, { "epoch": 13.552346570397113, "grad_norm": 470549.40625, "learning_rate": 0.0001917645673437532, "loss": 9.0315, "step": 112620 }, { "epoch": 13.553549939831528, "grad_norm": 335479.0, "learning_rate": 0.00019176305546215788, "loss": 9.6403, "step": 112630 }, { "epoch": 13.554753309265944, "grad_norm": 1567790.375, "learning_rate": 0.00019176154344775884, "loss": 9.6195, "step": 112640 }, { "epoch": 13.555956678700362, "grad_norm": 169770.359375, "learning_rate": 0.00019176003130055814, "loss": 8.9873, "step": 112650 }, { "epoch": 13.557160048134778, "grad_norm": 142464.234375, "learning_rate": 0.00019175851902055802, "loss": 8.1042, "step": 112660 }, { "epoch": 13.558363417569193, "grad_norm": 17675.6640625, "learning_rate": 0.00019175700660776067, "loss": 7.9578, "step": 112670 }, { "epoch": 13.559566787003611, "grad_norm": 162659.609375, "learning_rate": 0.0001917554940621683, "loss": 7.8636, "step": 112680 }, { "epoch": 13.560770156438027, "grad_norm": 26223.21875, "learning_rate": 0.00019175398138378307, "loss": 7.9636, "step": 112690 }, { "epoch": 13.561973525872443, "grad_norm": 198680.3125, "learning_rate": 0.00019175246857260715, "loss": 7.9908, "step": 112700 }, { "epoch": 13.56317689530686, "grad_norm": 181411.140625, "learning_rate": 0.00019175095562864279, "loss": 8.0474, "step": 112710 }, { "epoch": 13.564380264741276, "grad_norm": 142860.9375, "learning_rate": 0.00019174944255189212, "loss": 8.0895, "step": 112720 }, { "epoch": 13.565583634175692, "grad_norm": 298246.0625, "learning_rate": 0.00019174792934235736, "loss": 7.9235, "step": 112730 }, { "epoch": 13.566787003610107, "grad_norm": 609628.5625, "learning_rate": 0.00019174641600004072, "loss": 7.9122, "step": 112740 }, { "epoch": 13.567990373044525, "grad_norm": 172673.015625, "learning_rate": 0.0001917449025249443, "loss": 7.9096, "step": 112750 }, { "epoch": 13.56919374247894, "grad_norm": 156694.53125, "learning_rate": 0.00019174338891707042, "loss": 7.8797, "step": 112760 }, { "epoch": 13.570397111913357, "grad_norm": 398692.3125, "learning_rate": 0.00019174187517642118, "loss": 7.9444, "step": 112770 }, { "epoch": 13.571600481347774, "grad_norm": 148620.859375, "learning_rate": 0.0001917403613029988, "loss": 8.0473, "step": 112780 }, { "epoch": 13.57280385078219, "grad_norm": 147429.3125, "learning_rate": 0.00019173884729680544, "loss": 7.9269, "step": 112790 }, { "epoch": 13.574007220216606, "grad_norm": 72187.125, "learning_rate": 0.00019173733315784336, "loss": 7.9013, "step": 112800 }, { "epoch": 13.575210589651023, "grad_norm": 189529.171875, "learning_rate": 0.00019173581888611469, "loss": 7.886, "step": 112810 }, { "epoch": 13.57641395908544, "grad_norm": 381347.59375, "learning_rate": 0.00019173430448162163, "loss": 7.9838, "step": 112820 }, { "epoch": 13.577617328519855, "grad_norm": 395882.90625, "learning_rate": 0.00019173278994436642, "loss": 7.9936, "step": 112830 }, { "epoch": 13.578820697954272, "grad_norm": 143029.53125, "learning_rate": 0.0001917312752743512, "loss": 7.8788, "step": 112840 }, { "epoch": 13.580024067388688, "grad_norm": 146582.78125, "learning_rate": 0.00019172976047157815, "loss": 7.8896, "step": 112850 }, { "epoch": 13.581227436823104, "grad_norm": 513577.9375, "learning_rate": 0.0001917282455360495, "loss": 7.8231, "step": 112860 }, { "epoch": 13.582430806257522, "grad_norm": 313743.125, "learning_rate": 0.00019172673046776746, "loss": 7.9447, "step": 112870 }, { "epoch": 13.583634175691937, "grad_norm": 1286934.0, "learning_rate": 0.00019172521526673414, "loss": 7.9083, "step": 112880 }, { "epoch": 13.584837545126353, "grad_norm": 816960.625, "learning_rate": 0.00019172369993295183, "loss": 7.9728, "step": 112890 }, { "epoch": 13.58604091456077, "grad_norm": 271149.84375, "learning_rate": 0.00019172218446642268, "loss": 7.8854, "step": 112900 }, { "epoch": 13.587244283995187, "grad_norm": 416902.96875, "learning_rate": 0.00019172066886714888, "loss": 7.8649, "step": 112910 }, { "epoch": 13.588447653429602, "grad_norm": 338114.21875, "learning_rate": 0.0001917191531351326, "loss": 7.8158, "step": 112920 }, { "epoch": 13.58965102286402, "grad_norm": 249585.328125, "learning_rate": 0.0001917176372703761, "loss": 7.9567, "step": 112930 }, { "epoch": 13.590854392298436, "grad_norm": 621158.875, "learning_rate": 0.00019171612127288153, "loss": 7.9621, "step": 112940 }, { "epoch": 13.592057761732852, "grad_norm": 276559.34375, "learning_rate": 0.00019171460514265108, "loss": 8.0345, "step": 112950 }, { "epoch": 13.593261131167269, "grad_norm": 377018.6875, "learning_rate": 0.00019171308887968693, "loss": 8.1667, "step": 112960 }, { "epoch": 13.594464500601685, "grad_norm": 168727.6875, "learning_rate": 0.00019171157248399132, "loss": 7.9832, "step": 112970 }, { "epoch": 13.5956678700361, "grad_norm": 882711.375, "learning_rate": 0.00019171005595556643, "loss": 8.0051, "step": 112980 }, { "epoch": 13.596871239470518, "grad_norm": 329854.875, "learning_rate": 0.00019170853929441442, "loss": 7.9144, "step": 112990 }, { "epoch": 13.598074608904934, "grad_norm": 416692.65625, "learning_rate": 0.0001917070225005375, "loss": 7.8785, "step": 113000 }, { "epoch": 13.59927797833935, "grad_norm": 130376.7265625, "learning_rate": 0.00019170550557393793, "loss": 7.8839, "step": 113010 }, { "epoch": 13.600481347773766, "grad_norm": 184268.09375, "learning_rate": 0.0001917039885146178, "loss": 7.8423, "step": 113020 }, { "epoch": 13.601684717208183, "grad_norm": 384845.84375, "learning_rate": 0.0001917024713225794, "loss": 7.8866, "step": 113030 }, { "epoch": 13.602888086642599, "grad_norm": 130104.640625, "learning_rate": 0.00019170095399782485, "loss": 7.8785, "step": 113040 }, { "epoch": 13.604091456077015, "grad_norm": 120734.328125, "learning_rate": 0.00019169943654035642, "loss": 7.7937, "step": 113050 }, { "epoch": 13.605294825511432, "grad_norm": 255052.390625, "learning_rate": 0.00019169791895017624, "loss": 7.7194, "step": 113060 }, { "epoch": 13.606498194945848, "grad_norm": 124873.4765625, "learning_rate": 0.00019169640122728655, "loss": 7.7463, "step": 113070 }, { "epoch": 13.607701564380264, "grad_norm": 98920.671875, "learning_rate": 0.00019169488337168948, "loss": 7.8596, "step": 113080 }, { "epoch": 13.608904933814681, "grad_norm": 42414.5078125, "learning_rate": 0.0001916933653833873, "loss": 7.7009, "step": 113090 }, { "epoch": 13.610108303249097, "grad_norm": 14710.7763671875, "learning_rate": 0.0001916918472623822, "loss": 7.81, "step": 113100 }, { "epoch": 13.611311672683513, "grad_norm": 27084.69921875, "learning_rate": 0.00019169032900867637, "loss": 7.8299, "step": 113110 }, { "epoch": 13.61251504211793, "grad_norm": 9788.5009765625, "learning_rate": 0.00019168881062227194, "loss": 7.707, "step": 113120 }, { "epoch": 13.613718411552346, "grad_norm": 25628.39453125, "learning_rate": 0.0001916872921031712, "loss": 7.8188, "step": 113130 }, { "epoch": 13.614921780986762, "grad_norm": 134475.671875, "learning_rate": 0.0001916857734513763, "loss": 7.7533, "step": 113140 }, { "epoch": 13.61612515042118, "grad_norm": 11464.96484375, "learning_rate": 0.00019168425466688945, "loss": 7.7125, "step": 113150 }, { "epoch": 13.617328519855596, "grad_norm": 18534.837890625, "learning_rate": 0.0001916827357497129, "loss": 7.658, "step": 113160 }, { "epoch": 13.618531889290011, "grad_norm": 15738.4111328125, "learning_rate": 0.00019168121669984874, "loss": 7.6425, "step": 113170 }, { "epoch": 13.619735258724429, "grad_norm": 66036.796875, "learning_rate": 0.00019167969751729923, "loss": 7.6592, "step": 113180 }, { "epoch": 13.620938628158845, "grad_norm": 6695.29541015625, "learning_rate": 0.00019167817820206655, "loss": 7.4731, "step": 113190 }, { "epoch": 13.62214199759326, "grad_norm": 8706.462890625, "learning_rate": 0.00019167665875415292, "loss": 7.7305, "step": 113200 }, { "epoch": 13.623345367027678, "grad_norm": 17654.146484375, "learning_rate": 0.00019167513917356054, "loss": 7.6584, "step": 113210 }, { "epoch": 13.624548736462094, "grad_norm": 21089.666015625, "learning_rate": 0.00019167361946029163, "loss": 7.6102, "step": 113220 }, { "epoch": 13.62575210589651, "grad_norm": 37830.56640625, "learning_rate": 0.0001916720996143483, "loss": 7.6818, "step": 113230 }, { "epoch": 13.626955475330927, "grad_norm": 18966.365234375, "learning_rate": 0.00019167057963573284, "loss": 7.6295, "step": 113240 }, { "epoch": 13.628158844765343, "grad_norm": 14995.208984375, "learning_rate": 0.00019166905952444743, "loss": 7.6847, "step": 113250 }, { "epoch": 13.629362214199759, "grad_norm": 96365.71875, "learning_rate": 0.00019166753928049425, "loss": 7.6703, "step": 113260 }, { "epoch": 13.630565583634176, "grad_norm": 15477.8720703125, "learning_rate": 0.0001916660189038755, "loss": 7.6484, "step": 113270 }, { "epoch": 13.631768953068592, "grad_norm": 93922.109375, "learning_rate": 0.0001916644983945934, "loss": 7.611, "step": 113280 }, { "epoch": 13.632972322503008, "grad_norm": 79700.671875, "learning_rate": 0.0001916629777526501, "loss": 7.6818, "step": 113290 }, { "epoch": 13.634175691937426, "grad_norm": 35336.71875, "learning_rate": 0.00019166145697804788, "loss": 7.7733, "step": 113300 }, { "epoch": 13.635379061371841, "grad_norm": 45406.3515625, "learning_rate": 0.00019165993607078892, "loss": 7.7625, "step": 113310 }, { "epoch": 13.636582430806257, "grad_norm": 50273.1015625, "learning_rate": 0.00019165841503087535, "loss": 7.7031, "step": 113320 }, { "epoch": 13.637785800240675, "grad_norm": 22194.9296875, "learning_rate": 0.00019165689385830948, "loss": 7.6699, "step": 113330 }, { "epoch": 13.63898916967509, "grad_norm": 18881.408203125, "learning_rate": 0.0001916553725530934, "loss": 7.5607, "step": 113340 }, { "epoch": 13.640192539109506, "grad_norm": 8510.826171875, "learning_rate": 0.0001916538511152294, "loss": 7.6266, "step": 113350 }, { "epoch": 13.641395908543924, "grad_norm": 6942.92138671875, "learning_rate": 0.00019165232954471964, "loss": 7.5286, "step": 113360 }, { "epoch": 13.64259927797834, "grad_norm": 6047.9248046875, "learning_rate": 0.00019165080784156632, "loss": 7.4819, "step": 113370 }, { "epoch": 13.643802647412755, "grad_norm": 13872.33984375, "learning_rate": 0.00019164928600577167, "loss": 7.6924, "step": 113380 }, { "epoch": 13.645006016847173, "grad_norm": 23888.787109375, "learning_rate": 0.00019164776403733788, "loss": 7.7363, "step": 113390 }, { "epoch": 13.646209386281589, "grad_norm": 23308.720703125, "learning_rate": 0.00019164624193626712, "loss": 7.7116, "step": 113400 }, { "epoch": 13.647412755716005, "grad_norm": 2657.74462890625, "learning_rate": 0.00019164471970256166, "loss": 7.7341, "step": 113410 }, { "epoch": 13.648616125150422, "grad_norm": 520.08154296875, "learning_rate": 0.00019164319733622364, "loss": 7.6925, "step": 113420 }, { "epoch": 13.649819494584838, "grad_norm": 530.255126953125, "learning_rate": 0.00019164167483725527, "loss": 7.511, "step": 113430 }, { "epoch": 13.651022864019254, "grad_norm": 288.64007568359375, "learning_rate": 0.0001916401522056588, "loss": 7.5524, "step": 113440 }, { "epoch": 13.65222623345367, "grad_norm": 462.2852478027344, "learning_rate": 0.0001916386294414364, "loss": 7.4896, "step": 113450 }, { "epoch": 13.653429602888087, "grad_norm": 277.65899658203125, "learning_rate": 0.00019163710654459027, "loss": 7.5753, "step": 113460 }, { "epoch": 13.654632972322503, "grad_norm": 100.58748626708984, "learning_rate": 0.00019163558351512262, "loss": 7.4679, "step": 113470 }, { "epoch": 13.655836341756919, "grad_norm": 245.92759704589844, "learning_rate": 0.00019163406035303568, "loss": 7.5388, "step": 113480 }, { "epoch": 13.657039711191336, "grad_norm": 321.40283203125, "learning_rate": 0.00019163253705833163, "loss": 7.5197, "step": 113490 }, { "epoch": 13.658243080625752, "grad_norm": 293.3218994140625, "learning_rate": 0.00019163101363101266, "loss": 7.577, "step": 113500 }, { "epoch": 13.659446450060168, "grad_norm": 46.85881805419922, "learning_rate": 0.00019162949007108098, "loss": 7.5217, "step": 113510 }, { "epoch": 13.660649819494585, "grad_norm": 29.74823570251465, "learning_rate": 0.0001916279663785388, "loss": 7.6511, "step": 113520 }, { "epoch": 13.661853188929001, "grad_norm": 25.847501754760742, "learning_rate": 0.00019162644255338836, "loss": 7.6641, "step": 113530 }, { "epoch": 13.663056558363417, "grad_norm": 24.84147834777832, "learning_rate": 0.00019162491859563182, "loss": 7.5375, "step": 113540 }, { "epoch": 13.664259927797834, "grad_norm": 13.050982475280762, "learning_rate": 0.0001916233945052714, "loss": 7.6707, "step": 113550 }, { "epoch": 13.66546329723225, "grad_norm": 149.4770965576172, "learning_rate": 0.00019162187028230933, "loss": 7.5944, "step": 113560 }, { "epoch": 13.666666666666666, "grad_norm": 92.45903015136719, "learning_rate": 0.00019162034592674776, "loss": 7.6368, "step": 113570 }, { "epoch": 13.667870036101084, "grad_norm": 135.0058135986328, "learning_rate": 0.00019161882143858892, "loss": 7.681, "step": 113580 }, { "epoch": 13.6690734055355, "grad_norm": 29.964786529541016, "learning_rate": 0.00019161729681783506, "loss": 7.5944, "step": 113590 }, { "epoch": 13.670276774969915, "grad_norm": 65.10269165039062, "learning_rate": 0.00019161577206448836, "loss": 7.6076, "step": 113600 }, { "epoch": 13.671480144404333, "grad_norm": 21.363481521606445, "learning_rate": 0.00019161424717855098, "loss": 7.6193, "step": 113610 }, { "epoch": 13.672683513838749, "grad_norm": 11.367424964904785, "learning_rate": 0.0001916127221600252, "loss": 7.6415, "step": 113620 }, { "epoch": 13.673886883273164, "grad_norm": 32.594451904296875, "learning_rate": 0.00019161119700891318, "loss": 7.6179, "step": 113630 }, { "epoch": 13.675090252707582, "grad_norm": 61.654598236083984, "learning_rate": 0.00019160967172521712, "loss": 7.5527, "step": 113640 }, { "epoch": 13.676293622141998, "grad_norm": 33.25052261352539, "learning_rate": 0.00019160814630893926, "loss": 7.5598, "step": 113650 }, { "epoch": 13.677496991576414, "grad_norm": 22.977548599243164, "learning_rate": 0.0001916066207600818, "loss": 7.5235, "step": 113660 }, { "epoch": 13.678700361010831, "grad_norm": 18.656898498535156, "learning_rate": 0.00019160509507864693, "loss": 7.6432, "step": 113670 }, { "epoch": 13.679903730445247, "grad_norm": 61.93690872192383, "learning_rate": 0.00019160356926463688, "loss": 7.6145, "step": 113680 }, { "epoch": 13.681107099879663, "grad_norm": 121.2342300415039, "learning_rate": 0.00019160204331805387, "loss": 7.5778, "step": 113690 }, { "epoch": 13.68231046931408, "grad_norm": 40.884281158447266, "learning_rate": 0.00019160051723890006, "loss": 7.6093, "step": 113700 }, { "epoch": 13.683513838748496, "grad_norm": 108.30182647705078, "learning_rate": 0.0001915989910271777, "loss": 7.573, "step": 113710 }, { "epoch": 13.684717208182912, "grad_norm": 122.6958999633789, "learning_rate": 0.00019159746468288899, "loss": 7.5289, "step": 113720 }, { "epoch": 13.685920577617328, "grad_norm": 58.39677429199219, "learning_rate": 0.0001915959382060361, "loss": 7.629, "step": 113730 }, { "epoch": 13.687123947051745, "grad_norm": 35.375911712646484, "learning_rate": 0.00019159441159662132, "loss": 7.4844, "step": 113740 }, { "epoch": 13.688327316486161, "grad_norm": 45.86326217651367, "learning_rate": 0.00019159288485464678, "loss": 7.5992, "step": 113750 }, { "epoch": 13.689530685920577, "grad_norm": 835.2339477539062, "learning_rate": 0.00019159135798011475, "loss": 7.5361, "step": 113760 }, { "epoch": 13.690734055354994, "grad_norm": 714.7633666992188, "learning_rate": 0.00019158983097302738, "loss": 7.5489, "step": 113770 }, { "epoch": 13.69193742478941, "grad_norm": 3187.551025390625, "learning_rate": 0.00019158830383338694, "loss": 7.4862, "step": 113780 }, { "epoch": 13.693140794223826, "grad_norm": 2072.183349609375, "learning_rate": 0.00019158677656119557, "loss": 7.5505, "step": 113790 }, { "epoch": 13.694344163658243, "grad_norm": 1155.9842529296875, "learning_rate": 0.00019158524915645556, "loss": 7.4884, "step": 113800 }, { "epoch": 13.69554753309266, "grad_norm": 8257.2919921875, "learning_rate": 0.00019158372161916907, "loss": 7.5835, "step": 113810 }, { "epoch": 13.696750902527075, "grad_norm": 3663.29638671875, "learning_rate": 0.00019158219394933834, "loss": 7.5884, "step": 113820 }, { "epoch": 13.697954271961493, "grad_norm": 2061.91650390625, "learning_rate": 0.00019158066614696556, "loss": 7.5723, "step": 113830 }, { "epoch": 13.699157641395908, "grad_norm": 1974.542724609375, "learning_rate": 0.0001915791382120529, "loss": 7.561, "step": 113840 }, { "epoch": 13.700361010830324, "grad_norm": 1014.097412109375, "learning_rate": 0.00019157761014460266, "loss": 7.5404, "step": 113850 }, { "epoch": 13.701564380264742, "grad_norm": 5040.04150390625, "learning_rate": 0.00019157608194461702, "loss": 7.5522, "step": 113860 }, { "epoch": 13.702767749699158, "grad_norm": 4629.54736328125, "learning_rate": 0.00019157455361209816, "loss": 7.5626, "step": 113870 }, { "epoch": 13.703971119133573, "grad_norm": 3905.244140625, "learning_rate": 0.00019157302514704832, "loss": 7.5662, "step": 113880 }, { "epoch": 13.705174488567991, "grad_norm": 2505.708251953125, "learning_rate": 0.00019157149654946967, "loss": 7.4498, "step": 113890 }, { "epoch": 13.706377858002407, "grad_norm": 2081.571044921875, "learning_rate": 0.0001915699678193645, "loss": 7.6297, "step": 113900 }, { "epoch": 13.707581227436823, "grad_norm": 5621.26806640625, "learning_rate": 0.00019156843895673498, "loss": 7.5617, "step": 113910 }, { "epoch": 13.70878459687124, "grad_norm": 2439.014404296875, "learning_rate": 0.00019156690996158327, "loss": 7.5824, "step": 113920 }, { "epoch": 13.709987966305656, "grad_norm": 289.02557373046875, "learning_rate": 0.0001915653808339117, "loss": 7.561, "step": 113930 }, { "epoch": 13.711191335740072, "grad_norm": 971.802978515625, "learning_rate": 0.00019156385157372235, "loss": 7.5163, "step": 113940 }, { "epoch": 13.71239470517449, "grad_norm": 1331.414306640625, "learning_rate": 0.00019156232218101756, "loss": 7.4725, "step": 113950 }, { "epoch": 13.713598074608905, "grad_norm": 449.71319580078125, "learning_rate": 0.00019156079265579944, "loss": 7.5403, "step": 113960 }, { "epoch": 13.71480144404332, "grad_norm": 2657.580322265625, "learning_rate": 0.00019155926299807026, "loss": 7.521, "step": 113970 }, { "epoch": 13.716004813477738, "grad_norm": 2584.49560546875, "learning_rate": 0.00019155773320783223, "loss": 7.6036, "step": 113980 }, { "epoch": 13.717208182912154, "grad_norm": 381.4786376953125, "learning_rate": 0.00019155620328508756, "loss": 7.4513, "step": 113990 }, { "epoch": 13.71841155234657, "grad_norm": 219.59832763671875, "learning_rate": 0.00019155467322983847, "loss": 7.5127, "step": 114000 }, { "epoch": 13.719614921780988, "grad_norm": 1460.5084228515625, "learning_rate": 0.00019155314304208711, "loss": 7.621, "step": 114010 }, { "epoch": 13.720818291215403, "grad_norm": 521.77099609375, "learning_rate": 0.0001915516127218358, "loss": 7.5442, "step": 114020 }, { "epoch": 13.722021660649819, "grad_norm": 272.19732666015625, "learning_rate": 0.0001915500822690867, "loss": 7.5867, "step": 114030 }, { "epoch": 13.723225030084237, "grad_norm": 953.1600341796875, "learning_rate": 0.000191548551683842, "loss": 7.5439, "step": 114040 }, { "epoch": 13.724428399518652, "grad_norm": 1730.5391845703125, "learning_rate": 0.00019154702096610396, "loss": 7.5285, "step": 114050 }, { "epoch": 13.725631768953068, "grad_norm": 3452.368896484375, "learning_rate": 0.00019154549011587477, "loss": 7.5289, "step": 114060 }, { "epoch": 13.726835138387486, "grad_norm": 1592.57275390625, "learning_rate": 0.00019154395913315665, "loss": 7.5607, "step": 114070 }, { "epoch": 13.728038507821902, "grad_norm": 3693.570068359375, "learning_rate": 0.00019154242801795187, "loss": 7.6056, "step": 114080 }, { "epoch": 13.729241877256317, "grad_norm": 3637.03564453125, "learning_rate": 0.00019154089677026254, "loss": 7.5423, "step": 114090 }, { "epoch": 13.730445246690735, "grad_norm": 1049.1793212890625, "learning_rate": 0.00019153936539009094, "loss": 7.5813, "step": 114100 }, { "epoch": 13.73164861612515, "grad_norm": 2903.21484375, "learning_rate": 0.0001915378338774393, "loss": 7.5154, "step": 114110 }, { "epoch": 13.732851985559567, "grad_norm": 3569.20703125, "learning_rate": 0.00019153630223230978, "loss": 7.558, "step": 114120 }, { "epoch": 13.734055354993982, "grad_norm": 2672.241455078125, "learning_rate": 0.00019153477045470466, "loss": 7.544, "step": 114130 }, { "epoch": 13.7352587244284, "grad_norm": 991.072509765625, "learning_rate": 0.0001915332385446261, "loss": 7.6041, "step": 114140 }, { "epoch": 13.736462093862816, "grad_norm": 881.470947265625, "learning_rate": 0.0001915317065020764, "loss": 7.4928, "step": 114150 }, { "epoch": 13.737665463297231, "grad_norm": 504.5856018066406, "learning_rate": 0.00019153017432705768, "loss": 7.6152, "step": 114160 }, { "epoch": 13.738868832731649, "grad_norm": 155.9636688232422, "learning_rate": 0.0001915286420195722, "loss": 7.6489, "step": 114170 }, { "epoch": 13.740072202166065, "grad_norm": 163597.453125, "learning_rate": 0.00019152710957962216, "loss": 7.5399, "step": 114180 }, { "epoch": 13.74127557160048, "grad_norm": 1000418.0, "learning_rate": 0.00019152557700720984, "loss": 7.5673, "step": 114190 }, { "epoch": 13.742478941034898, "grad_norm": 664559.9375, "learning_rate": 0.0001915240443023374, "loss": 7.5518, "step": 114200 }, { "epoch": 13.743682310469314, "grad_norm": 857.570068359375, "learning_rate": 0.00019152251146500703, "loss": 7.6212, "step": 114210 }, { "epoch": 13.74488567990373, "grad_norm": 463.45947265625, "learning_rate": 0.00019152097849522103, "loss": 7.5444, "step": 114220 }, { "epoch": 13.746089049338147, "grad_norm": 629.0192260742188, "learning_rate": 0.00019151944539298158, "loss": 7.5369, "step": 114230 }, { "epoch": 13.747292418772563, "grad_norm": 354.37371826171875, "learning_rate": 0.0001915179121582909, "loss": 7.5589, "step": 114240 }, { "epoch": 13.748495788206979, "grad_norm": 378.7722473144531, "learning_rate": 0.0001915163787911512, "loss": 7.5461, "step": 114250 }, { "epoch": 13.749699157641396, "grad_norm": 288.4242248535156, "learning_rate": 0.00019151484529156466, "loss": 7.4922, "step": 114260 }, { "epoch": 13.750902527075812, "grad_norm": 1222.2769775390625, "learning_rate": 0.0001915133116595336, "loss": 7.6345, "step": 114270 }, { "epoch": 13.752105896510228, "grad_norm": 1110.3900146484375, "learning_rate": 0.00019151177789506016, "loss": 7.4965, "step": 114280 }, { "epoch": 13.753309265944646, "grad_norm": 1740.095947265625, "learning_rate": 0.0001915102439981466, "loss": 7.6424, "step": 114290 }, { "epoch": 13.754512635379061, "grad_norm": 2457.517822265625, "learning_rate": 0.0001915087099687951, "loss": 7.479, "step": 114300 }, { "epoch": 13.755716004813477, "grad_norm": 521.0994262695312, "learning_rate": 0.00019150717580700794, "loss": 7.5619, "step": 114310 }, { "epoch": 13.756919374247895, "grad_norm": 742.9874877929688, "learning_rate": 0.00019150564151278727, "loss": 7.5571, "step": 114320 }, { "epoch": 13.75812274368231, "grad_norm": 497.9338684082031, "learning_rate": 0.00019150410708613537, "loss": 7.6108, "step": 114330 }, { "epoch": 13.759326113116726, "grad_norm": 247.5394744873047, "learning_rate": 0.0001915025725270544, "loss": 7.5334, "step": 114340 }, { "epoch": 13.760529482551144, "grad_norm": 554.1671752929688, "learning_rate": 0.00019150103783554666, "loss": 7.5137, "step": 114350 }, { "epoch": 13.76173285198556, "grad_norm": 335.91912841796875, "learning_rate": 0.0001914995030116143, "loss": 7.5443, "step": 114360 }, { "epoch": 13.762936221419976, "grad_norm": 219.8412628173828, "learning_rate": 0.0001914979680552596, "loss": 7.5378, "step": 114370 }, { "epoch": 13.764139590854393, "grad_norm": 204.30767822265625, "learning_rate": 0.0001914964329664847, "loss": 7.6104, "step": 114380 }, { "epoch": 13.765342960288809, "grad_norm": 1333.4427490234375, "learning_rate": 0.0001914948977452919, "loss": 7.5312, "step": 114390 }, { "epoch": 13.766546329723225, "grad_norm": 1243.517822265625, "learning_rate": 0.00019149336239168338, "loss": 7.4446, "step": 114400 }, { "epoch": 13.76774969915764, "grad_norm": 274.4715881347656, "learning_rate": 0.00019149182690566142, "loss": 7.5787, "step": 114410 }, { "epoch": 13.768953068592058, "grad_norm": 319.8570251464844, "learning_rate": 0.00019149029128722814, "loss": 7.5531, "step": 114420 }, { "epoch": 13.770156438026474, "grad_norm": 184.08749389648438, "learning_rate": 0.00019148875553638586, "loss": 7.6061, "step": 114430 }, { "epoch": 13.77135980746089, "grad_norm": 981.418212890625, "learning_rate": 0.00019148721965313675, "loss": 7.555, "step": 114440 }, { "epoch": 13.772563176895307, "grad_norm": 1492.6351318359375, "learning_rate": 0.00019148568363748302, "loss": 7.6056, "step": 114450 }, { "epoch": 13.773766546329723, "grad_norm": 186.94186401367188, "learning_rate": 0.00019148414748942696, "loss": 7.5459, "step": 114460 }, { "epoch": 13.774969915764139, "grad_norm": 2787.349365234375, "learning_rate": 0.00019148261120897073, "loss": 7.4659, "step": 114470 }, { "epoch": 13.776173285198556, "grad_norm": 651.6773681640625, "learning_rate": 0.00019148107479611656, "loss": 7.5889, "step": 114480 }, { "epoch": 13.777376654632972, "grad_norm": 680.3724975585938, "learning_rate": 0.00019147953825086674, "loss": 7.6009, "step": 114490 }, { "epoch": 13.778580024067388, "grad_norm": 5606.00048828125, "learning_rate": 0.0001914780015732234, "loss": 7.6182, "step": 114500 }, { "epoch": 13.779783393501805, "grad_norm": 10631.271484375, "learning_rate": 0.0001914764647631888, "loss": 7.5175, "step": 114510 }, { "epoch": 13.780986762936221, "grad_norm": 1343.87890625, "learning_rate": 0.00019147492782076522, "loss": 7.5946, "step": 114520 }, { "epoch": 13.782190132370637, "grad_norm": 9865.0048828125, "learning_rate": 0.00019147339074595477, "loss": 7.5153, "step": 114530 }, { "epoch": 13.783393501805055, "grad_norm": 3485.634033203125, "learning_rate": 0.0001914718535387598, "loss": 7.5806, "step": 114540 }, { "epoch": 13.78459687123947, "grad_norm": 2424.786376953125, "learning_rate": 0.00019147031619918243, "loss": 7.5224, "step": 114550 }, { "epoch": 13.785800240673886, "grad_norm": 1291.46875, "learning_rate": 0.00019146877872722496, "loss": 7.4227, "step": 114560 }, { "epoch": 13.787003610108304, "grad_norm": 2239.263427734375, "learning_rate": 0.00019146724112288957, "loss": 7.5755, "step": 114570 }, { "epoch": 13.78820697954272, "grad_norm": 20331.4296875, "learning_rate": 0.0001914657033861785, "loss": 7.5735, "step": 114580 }, { "epoch": 13.789410348977135, "grad_norm": 10993.927734375, "learning_rate": 0.00019146416551709398, "loss": 7.5049, "step": 114590 }, { "epoch": 13.790613718411553, "grad_norm": 10267.681640625, "learning_rate": 0.0001914626275156382, "loss": 7.6153, "step": 114600 }, { "epoch": 13.791817087845969, "grad_norm": 21958.6171875, "learning_rate": 0.00019146108938181345, "loss": 7.562, "step": 114610 }, { "epoch": 13.793020457280385, "grad_norm": 8434.3837890625, "learning_rate": 0.0001914595511156219, "loss": 7.4472, "step": 114620 }, { "epoch": 13.794223826714802, "grad_norm": 2945.95263671875, "learning_rate": 0.00019145801271706585, "loss": 7.4919, "step": 114630 }, { "epoch": 13.795427196149218, "grad_norm": 1485.851318359375, "learning_rate": 0.00019145647418614744, "loss": 7.5163, "step": 114640 }, { "epoch": 13.796630565583634, "grad_norm": 897.9228515625, "learning_rate": 0.00019145493552286892, "loss": 7.4508, "step": 114650 }, { "epoch": 13.797833935018051, "grad_norm": 2290.776611328125, "learning_rate": 0.00019145339672723254, "loss": 7.5969, "step": 114660 }, { "epoch": 13.799037304452467, "grad_norm": 709.6951904296875, "learning_rate": 0.0001914518577992405, "loss": 7.4705, "step": 114670 }, { "epoch": 13.800240673886883, "grad_norm": 2647.744384765625, "learning_rate": 0.00019145031873889508, "loss": 7.6015, "step": 114680 }, { "epoch": 13.8014440433213, "grad_norm": 4433.423828125, "learning_rate": 0.00019144877954619844, "loss": 7.5197, "step": 114690 }, { "epoch": 13.802647412755716, "grad_norm": 992.7537231445312, "learning_rate": 0.00019144724022115286, "loss": 7.5472, "step": 114700 }, { "epoch": 13.803850782190132, "grad_norm": 3379.71044921875, "learning_rate": 0.00019144570076376052, "loss": 7.7093, "step": 114710 }, { "epoch": 13.80505415162455, "grad_norm": 2955.573974609375, "learning_rate": 0.00019144416117402368, "loss": 7.4811, "step": 114720 }, { "epoch": 13.806257521058965, "grad_norm": 1883.2606201171875, "learning_rate": 0.00019144262145194457, "loss": 7.591, "step": 114730 }, { "epoch": 13.807460890493381, "grad_norm": 1220.7481689453125, "learning_rate": 0.00019144108159752543, "loss": 7.4673, "step": 114740 }, { "epoch": 13.808664259927799, "grad_norm": 714.267333984375, "learning_rate": 0.00019143954161076842, "loss": 7.5356, "step": 114750 }, { "epoch": 13.809867629362214, "grad_norm": 1203.8299560546875, "learning_rate": 0.00019143800149167586, "loss": 7.6293, "step": 114760 }, { "epoch": 13.81107099879663, "grad_norm": 352.29852294921875, "learning_rate": 0.00019143646124024992, "loss": 7.575, "step": 114770 }, { "epoch": 13.812274368231048, "grad_norm": 176.0352020263672, "learning_rate": 0.00019143492085649285, "loss": 7.5293, "step": 114780 }, { "epoch": 13.813477737665464, "grad_norm": 75.21821594238281, "learning_rate": 0.00019143338034040687, "loss": 7.5387, "step": 114790 }, { "epoch": 13.81468110709988, "grad_norm": 54.11989212036133, "learning_rate": 0.0001914318396919942, "loss": 7.5445, "step": 114800 }, { "epoch": 13.815884476534297, "grad_norm": 66.36608123779297, "learning_rate": 0.00019143029891125713, "loss": 7.5884, "step": 114810 }, { "epoch": 13.817087845968713, "grad_norm": 59.86701965332031, "learning_rate": 0.0001914287579981978, "loss": 7.5834, "step": 114820 }, { "epoch": 13.818291215403129, "grad_norm": 149.61434936523438, "learning_rate": 0.00019142721695281849, "loss": 7.6047, "step": 114830 }, { "epoch": 13.819494584837544, "grad_norm": 139.30702209472656, "learning_rate": 0.00019142567577512144, "loss": 7.5179, "step": 114840 }, { "epoch": 13.820697954271962, "grad_norm": 547.0332641601562, "learning_rate": 0.00019142413446510888, "loss": 7.446, "step": 114850 }, { "epoch": 13.821901323706378, "grad_norm": 131.2542266845703, "learning_rate": 0.000191422593022783, "loss": 7.5526, "step": 114860 }, { "epoch": 13.823104693140793, "grad_norm": 49.47675323486328, "learning_rate": 0.00019142105144814603, "loss": 7.5435, "step": 114870 }, { "epoch": 13.824308062575211, "grad_norm": 311121.65625, "learning_rate": 0.00019141950974120028, "loss": 7.5234, "step": 114880 }, { "epoch": 13.825511432009627, "grad_norm": 1522005.75, "learning_rate": 0.0001914179679019479, "loss": 7.5947, "step": 114890 }, { "epoch": 13.826714801444043, "grad_norm": 277930.375, "learning_rate": 0.00019141642593039116, "loss": 7.7185, "step": 114900 }, { "epoch": 13.82791817087846, "grad_norm": 365228.09375, "learning_rate": 0.00019141488382653227, "loss": 7.6736, "step": 114910 }, { "epoch": 13.829121540312876, "grad_norm": 777802.75, "learning_rate": 0.0001914133415903735, "loss": 7.6472, "step": 114920 }, { "epoch": 13.830324909747292, "grad_norm": 40078.71875, "learning_rate": 0.00019141179922191703, "loss": 7.7741, "step": 114930 }, { "epoch": 13.83152827918171, "grad_norm": 392143.34375, "learning_rate": 0.00019141025672116512, "loss": 7.701, "step": 114940 }, { "epoch": 13.832731648616125, "grad_norm": 375842.65625, "learning_rate": 0.00019140871408812, "loss": 7.7547, "step": 114950 }, { "epoch": 13.833935018050541, "grad_norm": 1742897.0, "learning_rate": 0.00019140717132278391, "loss": 7.6706, "step": 114960 }, { "epoch": 13.835138387484958, "grad_norm": 363935.90625, "learning_rate": 0.00019140562842515907, "loss": 7.7104, "step": 114970 }, { "epoch": 13.836341756919374, "grad_norm": 650831.125, "learning_rate": 0.00019140408539524773, "loss": 7.7497, "step": 114980 }, { "epoch": 13.83754512635379, "grad_norm": 386689.5, "learning_rate": 0.0001914025422330521, "loss": 7.5964, "step": 114990 }, { "epoch": 13.838748495788208, "grad_norm": 383262.0, "learning_rate": 0.00019140099893857443, "loss": 7.6103, "step": 115000 }, { "epoch": 13.839951865222623, "grad_norm": 1188258.5, "learning_rate": 0.00019139945551181696, "loss": 7.7531, "step": 115010 }, { "epoch": 13.84115523465704, "grad_norm": 383898.875, "learning_rate": 0.0001913979119527819, "loss": 7.7964, "step": 115020 }, { "epoch": 13.842358604091457, "grad_norm": 1139049.25, "learning_rate": 0.00019139636826147148, "loss": 7.7614, "step": 115030 }, { "epoch": 13.843561973525873, "grad_norm": 1116222.25, "learning_rate": 0.000191394824437888, "loss": 7.6898, "step": 115040 }, { "epoch": 13.844765342960288, "grad_norm": 327987.90625, "learning_rate": 0.00019139328048203358, "loss": 7.6877, "step": 115050 }, { "epoch": 13.845968712394706, "grad_norm": 590856.0625, "learning_rate": 0.00019139173639391054, "loss": 7.6416, "step": 115060 }, { "epoch": 13.847172081829122, "grad_norm": 109.004638671875, "learning_rate": 0.0001913901921735211, "loss": 7.6984, "step": 115070 }, { "epoch": 13.848375451263538, "grad_norm": 123.90349578857422, "learning_rate": 0.0001913886478208675, "loss": 7.5357, "step": 115080 }, { "epoch": 13.849578820697955, "grad_norm": 151.775634765625, "learning_rate": 0.00019138710333595197, "loss": 7.5502, "step": 115090 }, { "epoch": 13.85078219013237, "grad_norm": 162.43109130859375, "learning_rate": 0.0001913855587187767, "loss": 7.56, "step": 115100 }, { "epoch": 13.851985559566787, "grad_norm": 177.98968505859375, "learning_rate": 0.000191384013969344, "loss": 7.571, "step": 115110 }, { "epoch": 13.853188929001202, "grad_norm": 435.82769775390625, "learning_rate": 0.00019138246908765604, "loss": 7.4176, "step": 115120 }, { "epoch": 13.85439229843562, "grad_norm": 114.03434753417969, "learning_rate": 0.00019138092407371512, "loss": 7.6695, "step": 115130 }, { "epoch": 13.855595667870036, "grad_norm": 912.5336303710938, "learning_rate": 0.00019137937892752343, "loss": 7.6092, "step": 115140 }, { "epoch": 13.856799037304452, "grad_norm": 618.1643676757812, "learning_rate": 0.0001913778336490832, "loss": 7.5304, "step": 115150 }, { "epoch": 13.85800240673887, "grad_norm": 1248.12353515625, "learning_rate": 0.00019137628823839668, "loss": 7.5448, "step": 115160 }, { "epoch": 13.859205776173285, "grad_norm": 2512.384521484375, "learning_rate": 0.00019137474269546612, "loss": 7.5656, "step": 115170 }, { "epoch": 13.8604091456077, "grad_norm": 1786.19482421875, "learning_rate": 0.00019137319702029377, "loss": 7.5498, "step": 115180 }, { "epoch": 13.861612515042118, "grad_norm": 3869.5947265625, "learning_rate": 0.00019137165121288182, "loss": 7.4658, "step": 115190 }, { "epoch": 13.862815884476534, "grad_norm": 1402.50390625, "learning_rate": 0.00019137010527323255, "loss": 7.6734, "step": 115200 }, { "epoch": 13.86401925391095, "grad_norm": 3173.96923828125, "learning_rate": 0.00019136855920134815, "loss": 7.5028, "step": 115210 }, { "epoch": 13.865222623345367, "grad_norm": 2175.4541015625, "learning_rate": 0.0001913670129972309, "loss": 7.5096, "step": 115220 }, { "epoch": 13.866425992779783, "grad_norm": 4703.65869140625, "learning_rate": 0.00019136546666088307, "loss": 7.4453, "step": 115230 }, { "epoch": 13.867629362214199, "grad_norm": 2822.893798828125, "learning_rate": 0.0001913639201923068, "loss": 7.5953, "step": 115240 }, { "epoch": 13.868832731648617, "grad_norm": 1708.392333984375, "learning_rate": 0.0001913623735915044, "loss": 7.5144, "step": 115250 }, { "epoch": 13.870036101083032, "grad_norm": 362.4938659667969, "learning_rate": 0.00019136082685847807, "loss": 7.6342, "step": 115260 }, { "epoch": 13.871239470517448, "grad_norm": 78.56669616699219, "learning_rate": 0.0001913592799932301, "loss": 7.5313, "step": 115270 }, { "epoch": 13.872442839951866, "grad_norm": 878.3430786132812, "learning_rate": 0.00019135773299576268, "loss": 7.5766, "step": 115280 }, { "epoch": 13.873646209386282, "grad_norm": 604.4127197265625, "learning_rate": 0.00019135618586607804, "loss": 7.4555, "step": 115290 }, { "epoch": 13.874849578820697, "grad_norm": 322.37274169921875, "learning_rate": 0.00019135463860417846, "loss": 7.4844, "step": 115300 }, { "epoch": 13.876052948255115, "grad_norm": 1616.5966796875, "learning_rate": 0.00019135309121006617, "loss": 7.5152, "step": 115310 }, { "epoch": 13.87725631768953, "grad_norm": 684.5773315429688, "learning_rate": 0.0001913515436837434, "loss": 7.5278, "step": 115320 }, { "epoch": 13.878459687123947, "grad_norm": 454.45574951171875, "learning_rate": 0.0001913499960252124, "loss": 7.5819, "step": 115330 }, { "epoch": 13.879663056558364, "grad_norm": 1045.1630859375, "learning_rate": 0.0001913484482344754, "loss": 7.6103, "step": 115340 }, { "epoch": 13.88086642599278, "grad_norm": 768.3417358398438, "learning_rate": 0.00019134690031153464, "loss": 7.5073, "step": 115350 }, { "epoch": 13.882069795427196, "grad_norm": 1216.54150390625, "learning_rate": 0.00019134535225639234, "loss": 7.65, "step": 115360 }, { "epoch": 13.883273164861613, "grad_norm": 2441.576171875, "learning_rate": 0.00019134380406905078, "loss": 7.5967, "step": 115370 }, { "epoch": 13.884476534296029, "grad_norm": 726.3614501953125, "learning_rate": 0.0001913422557495122, "loss": 7.6242, "step": 115380 }, { "epoch": 13.885679903730445, "grad_norm": 946.9081420898438, "learning_rate": 0.0001913407072977788, "loss": 7.4688, "step": 115390 }, { "epoch": 13.886883273164862, "grad_norm": 415.5739440917969, "learning_rate": 0.00019133915871385283, "loss": 7.5077, "step": 115400 }, { "epoch": 13.888086642599278, "grad_norm": 1068.4891357421875, "learning_rate": 0.0001913376099977366, "loss": 7.4925, "step": 115410 }, { "epoch": 13.889290012033694, "grad_norm": 582.84326171875, "learning_rate": 0.00019133606114943226, "loss": 7.4997, "step": 115420 }, { "epoch": 13.890493381468112, "grad_norm": 614.4970703125, "learning_rate": 0.00019133451216894209, "loss": 7.5631, "step": 115430 }, { "epoch": 13.891696750902527, "grad_norm": 226.0276336669922, "learning_rate": 0.00019133296305626836, "loss": 7.4974, "step": 115440 }, { "epoch": 13.892900120336943, "grad_norm": 241.5906982421875, "learning_rate": 0.00019133141381141324, "loss": 7.5471, "step": 115450 }, { "epoch": 13.89410348977136, "grad_norm": 314.6260070800781, "learning_rate": 0.00019132986443437907, "loss": 7.3999, "step": 115460 }, { "epoch": 13.895306859205776, "grad_norm": 388.9868469238281, "learning_rate": 0.000191328314925168, "loss": 7.3893, "step": 115470 }, { "epoch": 13.896510228640192, "grad_norm": 722.2169189453125, "learning_rate": 0.0001913267652837823, "loss": 7.3604, "step": 115480 }, { "epoch": 13.89771359807461, "grad_norm": 557.0263061523438, "learning_rate": 0.00019132521551022425, "loss": 7.3636, "step": 115490 }, { "epoch": 13.898916967509026, "grad_norm": 252.8494873046875, "learning_rate": 0.00019132366560449605, "loss": 7.4054, "step": 115500 }, { "epoch": 13.900120336943441, "grad_norm": 1011.9021606445312, "learning_rate": 0.00019132211556659995, "loss": 7.3604, "step": 115510 }, { "epoch": 13.901323706377857, "grad_norm": 579.222412109375, "learning_rate": 0.0001913205653965382, "loss": 7.3747, "step": 115520 }, { "epoch": 13.902527075812275, "grad_norm": 1107.37353515625, "learning_rate": 0.00019131901509431308, "loss": 7.3347, "step": 115530 }, { "epoch": 13.90373044524669, "grad_norm": 1647.1949462890625, "learning_rate": 0.00019131746465992676, "loss": 7.2693, "step": 115540 }, { "epoch": 13.904933814681106, "grad_norm": 1018.6296997070312, "learning_rate": 0.00019131591409338156, "loss": 7.3063, "step": 115550 }, { "epoch": 13.906137184115524, "grad_norm": 1787.6634521484375, "learning_rate": 0.00019131436339467968, "loss": 7.3459, "step": 115560 }, { "epoch": 13.90734055354994, "grad_norm": 663.8572998046875, "learning_rate": 0.00019131281256382337, "loss": 7.2402, "step": 115570 }, { "epoch": 13.908543922984355, "grad_norm": 1712.150634765625, "learning_rate": 0.00019131126160081485, "loss": 7.3927, "step": 115580 }, { "epoch": 13.909747292418773, "grad_norm": 1224.6893310546875, "learning_rate": 0.0001913097105056564, "loss": 7.3018, "step": 115590 }, { "epoch": 13.910950661853189, "grad_norm": 624.0105590820312, "learning_rate": 0.00019130815927835028, "loss": 7.2882, "step": 115600 }, { "epoch": 13.912154031287605, "grad_norm": 935.8486938476562, "learning_rate": 0.00019130660791889868, "loss": 7.424, "step": 115610 }, { "epoch": 13.913357400722022, "grad_norm": 2166.057373046875, "learning_rate": 0.0001913050564273039, "loss": 7.3981, "step": 115620 }, { "epoch": 13.914560770156438, "grad_norm": 1311.1768798828125, "learning_rate": 0.00019130350480356815, "loss": 7.279, "step": 115630 }, { "epoch": 13.915764139590854, "grad_norm": 623.0220947265625, "learning_rate": 0.0001913019530476937, "loss": 7.2919, "step": 115640 }, { "epoch": 13.916967509025271, "grad_norm": 7811.54296875, "learning_rate": 0.00019130040115968274, "loss": 7.3636, "step": 115650 }, { "epoch": 13.918170878459687, "grad_norm": 4457.65869140625, "learning_rate": 0.00019129884913953758, "loss": 7.3314, "step": 115660 }, { "epoch": 13.919374247894103, "grad_norm": 741759.125, "learning_rate": 0.00019129729698726046, "loss": 7.3351, "step": 115670 }, { "epoch": 13.92057761732852, "grad_norm": 44654788.0, "learning_rate": 0.0001912957447028536, "loss": 7.4368, "step": 115680 }, { "epoch": 13.921780986762936, "grad_norm": 220440.046875, "learning_rate": 0.00019129419228631927, "loss": 7.5926, "step": 115690 }, { "epoch": 13.922984356197352, "grad_norm": 183853.40625, "learning_rate": 0.00019129263973765968, "loss": 7.4413, "step": 115700 }, { "epoch": 13.92418772563177, "grad_norm": 120519.4140625, "learning_rate": 0.00019129108705687712, "loss": 7.392, "step": 115710 }, { "epoch": 13.925391095066185, "grad_norm": 148056.796875, "learning_rate": 0.00019128953424397383, "loss": 7.4608, "step": 115720 }, { "epoch": 13.926594464500601, "grad_norm": 71594.375, "learning_rate": 0.000191287981298952, "loss": 7.4792, "step": 115730 }, { "epoch": 13.927797833935019, "grad_norm": 110738.96875, "learning_rate": 0.00019128642822181395, "loss": 7.4789, "step": 115740 }, { "epoch": 13.929001203369435, "grad_norm": 477835.84375, "learning_rate": 0.0001912848750125619, "loss": 7.449, "step": 115750 }, { "epoch": 13.93020457280385, "grad_norm": 73396.6875, "learning_rate": 0.0001912833216711981, "loss": 7.4704, "step": 115760 }, { "epoch": 13.931407942238268, "grad_norm": 3557499.75, "learning_rate": 0.00019128176819772482, "loss": 7.4118, "step": 115770 }, { "epoch": 13.932611311672684, "grad_norm": 213500.859375, "learning_rate": 0.00019128021459214424, "loss": 7.5003, "step": 115780 }, { "epoch": 13.9338146811071, "grad_norm": 258351.4375, "learning_rate": 0.00019127866085445866, "loss": 7.4014, "step": 115790 }, { "epoch": 13.935018050541515, "grad_norm": 38978.03515625, "learning_rate": 0.00019127710698467034, "loss": 7.3462, "step": 115800 }, { "epoch": 13.936221419975933, "grad_norm": 306324.0, "learning_rate": 0.00019127555298278153, "loss": 7.3709, "step": 115810 }, { "epoch": 13.937424789410349, "grad_norm": 87539.3515625, "learning_rate": 0.00019127399884879442, "loss": 7.2118, "step": 115820 }, { "epoch": 13.938628158844764, "grad_norm": 101658.6015625, "learning_rate": 0.00019127244458271128, "loss": 7.3367, "step": 115830 }, { "epoch": 13.939831528279182, "grad_norm": 237027.84375, "learning_rate": 0.00019127089018453446, "loss": 7.3105, "step": 115840 }, { "epoch": 13.941034897713598, "grad_norm": 491532.09375, "learning_rate": 0.00019126933565426606, "loss": 7.4238, "step": 115850 }, { "epoch": 13.942238267148014, "grad_norm": 306450.78125, "learning_rate": 0.00019126778099190842, "loss": 7.4432, "step": 115860 }, { "epoch": 13.943441636582431, "grad_norm": 176997.78125, "learning_rate": 0.00019126622619746378, "loss": 7.4159, "step": 115870 }, { "epoch": 13.944645006016847, "grad_norm": 339021.1875, "learning_rate": 0.00019126467127093434, "loss": 7.3195, "step": 115880 }, { "epoch": 13.945848375451263, "grad_norm": 1361273.0, "learning_rate": 0.0001912631162123224, "loss": 7.3144, "step": 115890 }, { "epoch": 13.94705174488568, "grad_norm": 1290467.125, "learning_rate": 0.00019126156102163022, "loss": 7.4041, "step": 115900 }, { "epoch": 13.948255114320096, "grad_norm": 747153.25, "learning_rate": 0.00019126000569886, "loss": 7.4325, "step": 115910 }, { "epoch": 13.949458483754512, "grad_norm": 86327400.0, "learning_rate": 0.00019125845024401406, "loss": 7.5069, "step": 115920 }, { "epoch": 13.95066185318893, "grad_norm": 736983.8125, "learning_rate": 0.0001912568946570946, "loss": 7.4164, "step": 115930 }, { "epoch": 13.951865222623345, "grad_norm": 736444.875, "learning_rate": 0.00019125533893810386, "loss": 7.4977, "step": 115940 }, { "epoch": 13.953068592057761, "grad_norm": 425886.09375, "learning_rate": 0.00019125378308704414, "loss": 7.448, "step": 115950 }, { "epoch": 13.954271961492179, "grad_norm": 210765.0625, "learning_rate": 0.00019125222710391767, "loss": 7.391, "step": 115960 }, { "epoch": 13.955475330926594, "grad_norm": 164297.4375, "learning_rate": 0.00019125067098872664, "loss": 7.2417, "step": 115970 }, { "epoch": 13.95667870036101, "grad_norm": 416952.03125, "learning_rate": 0.00019124911474147343, "loss": 7.3721, "step": 115980 }, { "epoch": 13.957882069795428, "grad_norm": 180548.59375, "learning_rate": 0.00019124755836216019, "loss": 7.3208, "step": 115990 }, { "epoch": 13.959085439229844, "grad_norm": 125330.5546875, "learning_rate": 0.0001912460018507892, "loss": 7.4576, "step": 116000 }, { "epoch": 13.96028880866426, "grad_norm": 109254.2578125, "learning_rate": 0.00019124444520736273, "loss": 7.3524, "step": 116010 }, { "epoch": 13.961492178098677, "grad_norm": 110185.8359375, "learning_rate": 0.00019124288843188302, "loss": 7.3794, "step": 116020 }, { "epoch": 13.962695547533093, "grad_norm": 297316.21875, "learning_rate": 0.00019124133152435233, "loss": 7.455, "step": 116030 }, { "epoch": 13.963898916967509, "grad_norm": 335070.09375, "learning_rate": 0.0001912397744847729, "loss": 7.2952, "step": 116040 }, { "epoch": 13.965102286401926, "grad_norm": 1116348.75, "learning_rate": 0.00019123821731314698, "loss": 7.4031, "step": 116050 }, { "epoch": 13.966305655836342, "grad_norm": 90652.84375, "learning_rate": 0.00019123666000947682, "loss": 7.3023, "step": 116060 }, { "epoch": 13.967509025270758, "grad_norm": 1236206.125, "learning_rate": 0.00019123510257376472, "loss": 7.4441, "step": 116070 }, { "epoch": 13.968712394705175, "grad_norm": 518104.8125, "learning_rate": 0.00019123354500601288, "loss": 7.3606, "step": 116080 }, { "epoch": 13.969915764139591, "grad_norm": 57724.90234375, "learning_rate": 0.0001912319873062236, "loss": 7.4245, "step": 116090 }, { "epoch": 13.971119133574007, "grad_norm": 1966861.125, "learning_rate": 0.00019123042947439907, "loss": 7.3486, "step": 116100 }, { "epoch": 13.972322503008424, "grad_norm": 207407.09375, "learning_rate": 0.00019122887151054162, "loss": 7.4521, "step": 116110 }, { "epoch": 13.97352587244284, "grad_norm": 1851230.375, "learning_rate": 0.00019122731341465347, "loss": 7.3208, "step": 116120 }, { "epoch": 13.974729241877256, "grad_norm": 99642.921875, "learning_rate": 0.00019122575518673687, "loss": 7.2795, "step": 116130 }, { "epoch": 13.975932611311674, "grad_norm": 169405.84375, "learning_rate": 0.00019122419682679405, "loss": 7.4647, "step": 116140 }, { "epoch": 13.97713598074609, "grad_norm": 683880.6875, "learning_rate": 0.00019122263833482733, "loss": 7.399, "step": 116150 }, { "epoch": 13.978339350180505, "grad_norm": 93851.8515625, "learning_rate": 0.0001912210797108389, "loss": 7.3239, "step": 116160 }, { "epoch": 13.979542719614923, "grad_norm": 630520.75, "learning_rate": 0.00019121952095483107, "loss": 7.3098, "step": 116170 }, { "epoch": 13.980746089049338, "grad_norm": 58073.99609375, "learning_rate": 0.00019121796206680606, "loss": 7.4691, "step": 116180 }, { "epoch": 13.981949458483754, "grad_norm": 264698.6875, "learning_rate": 0.00019121640304676615, "loss": 7.3899, "step": 116190 }, { "epoch": 13.98315282791817, "grad_norm": 612826.6875, "learning_rate": 0.0001912148438947136, "loss": 7.3602, "step": 116200 }, { "epoch": 13.984356197352588, "grad_norm": 274209.15625, "learning_rate": 0.0001912132846106506, "loss": 7.3731, "step": 116210 }, { "epoch": 13.985559566787003, "grad_norm": 411674.96875, "learning_rate": 0.00019121172519457949, "loss": 7.3124, "step": 116220 }, { "epoch": 13.98676293622142, "grad_norm": 573140.625, "learning_rate": 0.0001912101656465025, "loss": 7.371, "step": 116230 }, { "epoch": 13.987966305655837, "grad_norm": 275947.03125, "learning_rate": 0.00019120860596642185, "loss": 7.396, "step": 116240 }, { "epoch": 13.989169675090253, "grad_norm": 1235720.25, "learning_rate": 0.00019120704615433983, "loss": 7.3123, "step": 116250 }, { "epoch": 13.990373044524668, "grad_norm": 270274.875, "learning_rate": 0.00019120548621025872, "loss": 7.2705, "step": 116260 }, { "epoch": 13.991576413959086, "grad_norm": 1281890.75, "learning_rate": 0.00019120392613418074, "loss": 7.4206, "step": 116270 }, { "epoch": 13.992779783393502, "grad_norm": 128000.0078125, "learning_rate": 0.0001912023659261082, "loss": 7.381, "step": 116280 }, { "epoch": 13.993983152827917, "grad_norm": 445059.4375, "learning_rate": 0.00019120080558604328, "loss": 7.3548, "step": 116290 }, { "epoch": 13.995186522262335, "grad_norm": 12816711.0, "learning_rate": 0.00019119924511398827, "loss": 7.385, "step": 116300 }, { "epoch": 13.99638989169675, "grad_norm": 733852.875, "learning_rate": 0.00019119768450994546, "loss": 7.2864, "step": 116310 }, { "epoch": 13.997593261131167, "grad_norm": 254457.140625, "learning_rate": 0.0001911961237739171, "loss": 7.3542, "step": 116320 }, { "epoch": 13.998796630565584, "grad_norm": 250920.65625, "learning_rate": 0.00019119456290590537, "loss": 7.501, "step": 116330 }, { "epoch": 14.0, "grad_norm": 757592.3125, "learning_rate": 0.00019119300190591264, "loss": 7.4478, "step": 116340 }, { "epoch": 14.0, "eval_loss": 7.410001277923584, "eval_runtime": 119.0415, "eval_samples_per_second": 62.054, "eval_steps_per_second": 7.762, "step": 116340 }, { "epoch": 14.001203369434416, "grad_norm": 803210.9375, "learning_rate": 0.00019119144077394112, "loss": 7.4929, "step": 116350 }, { "epoch": 14.002406738868833, "grad_norm": 492713.84375, "learning_rate": 0.00019118987950999308, "loss": 7.3895, "step": 116360 }, { "epoch": 14.00361010830325, "grad_norm": 211750.4375, "learning_rate": 0.00019118831811407074, "loss": 7.3983, "step": 116370 }, { "epoch": 14.004813477737665, "grad_norm": 1076696.375, "learning_rate": 0.00019118675658617642, "loss": 7.4769, "step": 116380 }, { "epoch": 14.006016847172083, "grad_norm": 326243.5, "learning_rate": 0.00019118519492631233, "loss": 7.4759, "step": 116390 }, { "epoch": 14.007220216606498, "grad_norm": 404986.9375, "learning_rate": 0.00019118363313448073, "loss": 7.3638, "step": 116400 }, { "epoch": 14.008423586040914, "grad_norm": 1638931.375, "learning_rate": 0.00019118207121068394, "loss": 7.3761, "step": 116410 }, { "epoch": 14.009626955475332, "grad_norm": 356602.65625, "learning_rate": 0.00019118050915492416, "loss": 7.3779, "step": 116420 }, { "epoch": 14.010830324909747, "grad_norm": 879317.875, "learning_rate": 0.00019117894696720367, "loss": 7.3589, "step": 116430 }, { "epoch": 14.012033694344163, "grad_norm": 176502.515625, "learning_rate": 0.00019117738464752474, "loss": 7.3981, "step": 116440 }, { "epoch": 14.01323706377858, "grad_norm": 1440828.375, "learning_rate": 0.00019117582219588963, "loss": 7.3956, "step": 116450 }, { "epoch": 14.014440433212997, "grad_norm": 368090.78125, "learning_rate": 0.0001911742596123006, "loss": 7.4667, "step": 116460 }, { "epoch": 14.015643802647412, "grad_norm": 837236.625, "learning_rate": 0.0001911726968967599, "loss": 7.3579, "step": 116470 }, { "epoch": 14.01684717208183, "grad_norm": 1544272.375, "learning_rate": 0.00019117113404926978, "loss": 7.3264, "step": 116480 }, { "epoch": 14.018050541516246, "grad_norm": 377169.53125, "learning_rate": 0.00019116957106983252, "loss": 7.3992, "step": 116490 }, { "epoch": 14.019253910950662, "grad_norm": 212784.9375, "learning_rate": 0.0001911680079584504, "loss": 7.4495, "step": 116500 }, { "epoch": 14.020457280385079, "grad_norm": 1739744.375, "learning_rate": 0.00019116644471512567, "loss": 7.4042, "step": 116510 }, { "epoch": 14.021660649819495, "grad_norm": 1461934.0, "learning_rate": 0.00019116488133986054, "loss": 7.4079, "step": 116520 }, { "epoch": 14.02286401925391, "grad_norm": 71283.9375, "learning_rate": 0.00019116331783265738, "loss": 7.5439, "step": 116530 }, { "epoch": 14.024067388688326, "grad_norm": 401461.125, "learning_rate": 0.00019116175419351837, "loss": 7.3666, "step": 116540 }, { "epoch": 14.025270758122744, "grad_norm": 8769769.0, "learning_rate": 0.00019116019042244578, "loss": 7.2829, "step": 116550 }, { "epoch": 14.02647412755716, "grad_norm": 962703.1875, "learning_rate": 0.00019115862651944188, "loss": 7.4646, "step": 116560 }, { "epoch": 14.027677496991576, "grad_norm": 628370.5625, "learning_rate": 0.00019115706248450898, "loss": 7.4724, "step": 116570 }, { "epoch": 14.028880866425993, "grad_norm": 1131946.875, "learning_rate": 0.00019115549831764925, "loss": 7.4846, "step": 116580 }, { "epoch": 14.030084235860409, "grad_norm": 205639.09375, "learning_rate": 0.00019115393401886505, "loss": 7.6485, "step": 116590 }, { "epoch": 14.031287605294825, "grad_norm": 695927.1875, "learning_rate": 0.0001911523695881586, "loss": 7.4882, "step": 116600 }, { "epoch": 14.032490974729242, "grad_norm": 1315748.25, "learning_rate": 0.00019115080502553214, "loss": 7.4432, "step": 116610 }, { "epoch": 14.033694344163658, "grad_norm": 1552780.75, "learning_rate": 0.000191149240330988, "loss": 7.4332, "step": 116620 }, { "epoch": 14.034897713598074, "grad_norm": 1138868.875, "learning_rate": 0.00019114767550452836, "loss": 7.5017, "step": 116630 }, { "epoch": 14.036101083032491, "grad_norm": 63607.9453125, "learning_rate": 0.00019114611054615557, "loss": 7.5391, "step": 116640 }, { "epoch": 14.037304452466907, "grad_norm": 779430.625, "learning_rate": 0.00019114454545587185, "loss": 7.5726, "step": 116650 }, { "epoch": 14.038507821901323, "grad_norm": 8863215.0, "learning_rate": 0.00019114298023367946, "loss": 7.3929, "step": 116660 }, { "epoch": 14.03971119133574, "grad_norm": 305410.1875, "learning_rate": 0.00019114141487958067, "loss": 7.3741, "step": 116670 }, { "epoch": 14.040914560770156, "grad_norm": 5025197.5, "learning_rate": 0.00019113984939357774, "loss": 7.5043, "step": 116680 }, { "epoch": 14.042117930204572, "grad_norm": 183266.390625, "learning_rate": 0.00019113828377567295, "loss": 7.4493, "step": 116690 }, { "epoch": 14.04332129963899, "grad_norm": 1260241.125, "learning_rate": 0.0001911367180258686, "loss": 7.3701, "step": 116700 }, { "epoch": 14.044524669073406, "grad_norm": 1585346.75, "learning_rate": 0.00019113515214416689, "loss": 7.6099, "step": 116710 }, { "epoch": 14.045728038507821, "grad_norm": 483882.375, "learning_rate": 0.00019113358613057012, "loss": 7.4502, "step": 116720 }, { "epoch": 14.046931407942239, "grad_norm": 4439001.0, "learning_rate": 0.0001911320199850805, "loss": 7.5441, "step": 116730 }, { "epoch": 14.048134777376655, "grad_norm": 598037.875, "learning_rate": 0.00019113045370770042, "loss": 7.4766, "step": 116740 }, { "epoch": 14.04933814681107, "grad_norm": 267443.03125, "learning_rate": 0.00019112888729843204, "loss": 7.5264, "step": 116750 }, { "epoch": 14.050541516245488, "grad_norm": 2042944.75, "learning_rate": 0.00019112732075727767, "loss": 7.4372, "step": 116760 }, { "epoch": 14.051744885679904, "grad_norm": 8590250.0, "learning_rate": 0.00019112575408423957, "loss": 7.3768, "step": 116770 }, { "epoch": 14.05294825511432, "grad_norm": 778564.25, "learning_rate": 0.00019112418727932, "loss": 7.4346, "step": 116780 }, { "epoch": 14.054151624548737, "grad_norm": 438709.09375, "learning_rate": 0.00019112262034252125, "loss": 7.4166, "step": 116790 }, { "epoch": 14.055354993983153, "grad_norm": 168011.28125, "learning_rate": 0.00019112105327384556, "loss": 7.4851, "step": 116800 }, { "epoch": 14.056558363417569, "grad_norm": 5476587.0, "learning_rate": 0.0001911194860732952, "loss": 7.4535, "step": 116810 }, { "epoch": 14.057761732851986, "grad_norm": 350191.34375, "learning_rate": 0.00019111791874087245, "loss": 7.4771, "step": 116820 }, { "epoch": 14.058965102286402, "grad_norm": 518645.3125, "learning_rate": 0.0001911163512765796, "loss": 7.5144, "step": 116830 }, { "epoch": 14.060168471720818, "grad_norm": 966435.1875, "learning_rate": 0.00019111478368041883, "loss": 7.5035, "step": 116840 }, { "epoch": 14.061371841155236, "grad_norm": 763140.375, "learning_rate": 0.00019111321595239252, "loss": 7.4865, "step": 116850 }, { "epoch": 14.062575210589651, "grad_norm": 1528244.625, "learning_rate": 0.00019111164809250292, "loss": 7.425, "step": 116860 }, { "epoch": 14.063778580024067, "grad_norm": 937681.5, "learning_rate": 0.0001911100801007522, "loss": 7.5801, "step": 116870 }, { "epoch": 14.064981949458483, "grad_norm": 632960.375, "learning_rate": 0.00019110851197714276, "loss": 7.5401, "step": 116880 }, { "epoch": 14.0661853188929, "grad_norm": 430858.0625, "learning_rate": 0.00019110694372167676, "loss": 7.5046, "step": 116890 }, { "epoch": 14.067388688327316, "grad_norm": 567233.125, "learning_rate": 0.00019110537533435654, "loss": 7.4265, "step": 116900 }, { "epoch": 14.068592057761732, "grad_norm": 732851.25, "learning_rate": 0.00019110380681518438, "loss": 7.3545, "step": 116910 }, { "epoch": 14.06979542719615, "grad_norm": 460917.28125, "learning_rate": 0.00019110223816416248, "loss": 7.3706, "step": 116920 }, { "epoch": 14.070998796630565, "grad_norm": 186629.015625, "learning_rate": 0.00019110066938129316, "loss": 7.4142, "step": 116930 }, { "epoch": 14.072202166064981, "grad_norm": 269234.15625, "learning_rate": 0.00019109910046657865, "loss": 7.5336, "step": 116940 }, { "epoch": 14.073405535499399, "grad_norm": 1003651.625, "learning_rate": 0.0001910975314200213, "loss": 7.5019, "step": 116950 }, { "epoch": 14.074608904933815, "grad_norm": 52844284.0, "learning_rate": 0.0001910959622416233, "loss": 7.5406, "step": 116960 }, { "epoch": 14.07581227436823, "grad_norm": 703983.3125, "learning_rate": 0.00019109439293138697, "loss": 7.3688, "step": 116970 }, { "epoch": 14.077015643802648, "grad_norm": 379927.46875, "learning_rate": 0.00019109282348931457, "loss": 7.5265, "step": 116980 }, { "epoch": 14.078219013237064, "grad_norm": 590824.375, "learning_rate": 0.00019109125391540834, "loss": 7.5557, "step": 116990 }, { "epoch": 14.07942238267148, "grad_norm": 223986.796875, "learning_rate": 0.00019108968420967058, "loss": 7.5759, "step": 117000 }, { "epoch": 14.080625752105897, "grad_norm": 5330962.0, "learning_rate": 0.00019108811437210357, "loss": 7.6068, "step": 117010 }, { "epoch": 14.081829121540313, "grad_norm": 4413083.0, "learning_rate": 0.00019108654440270958, "loss": 7.6588, "step": 117020 }, { "epoch": 14.083032490974729, "grad_norm": 2584871.25, "learning_rate": 0.00019108497430149083, "loss": 7.6518, "step": 117030 }, { "epoch": 14.084235860409146, "grad_norm": 779955.9375, "learning_rate": 0.00019108340406844967, "loss": 7.705, "step": 117040 }, { "epoch": 14.085439229843562, "grad_norm": 5292306.5, "learning_rate": 0.00019108183370358833, "loss": 7.71, "step": 117050 }, { "epoch": 14.086642599277978, "grad_norm": 250159.6875, "learning_rate": 0.0001910802632069091, "loss": 7.714, "step": 117060 }, { "epoch": 14.087845968712395, "grad_norm": 1570434.375, "learning_rate": 0.0001910786925784142, "loss": 7.7099, "step": 117070 }, { "epoch": 14.089049338146811, "grad_norm": 3275709.0, "learning_rate": 0.00019107712181810595, "loss": 7.7099, "step": 117080 }, { "epoch": 14.090252707581227, "grad_norm": 613445.9375, "learning_rate": 0.00019107555092598666, "loss": 7.7203, "step": 117090 }, { "epoch": 14.091456077015645, "grad_norm": 10168207.0, "learning_rate": 0.00019107397990205852, "loss": 7.7154, "step": 117100 }, { "epoch": 14.09265944645006, "grad_norm": 296258.9375, "learning_rate": 0.00019107240874632387, "loss": 7.6818, "step": 117110 }, { "epoch": 14.093862815884476, "grad_norm": 8713289.0, "learning_rate": 0.000191070837458785, "loss": 7.7367, "step": 117120 }, { "epoch": 14.095066185318894, "grad_norm": 448273.75, "learning_rate": 0.00019106926603944407, "loss": 7.7558, "step": 117130 }, { "epoch": 14.09626955475331, "grad_norm": 2434391.75, "learning_rate": 0.00019106769448830346, "loss": 7.665, "step": 117140 }, { "epoch": 14.097472924187725, "grad_norm": 1744259.0, "learning_rate": 0.0001910661228053654, "loss": 7.7213, "step": 117150 }, { "epoch": 14.098676293622143, "grad_norm": 870823.75, "learning_rate": 0.00019106455099063217, "loss": 7.7306, "step": 117160 }, { "epoch": 14.099879663056559, "grad_norm": 268853.03125, "learning_rate": 0.00019106297904410608, "loss": 7.7649, "step": 117170 }, { "epoch": 14.101083032490974, "grad_norm": 316112.84375, "learning_rate": 0.00019106140696578938, "loss": 7.7016, "step": 117180 }, { "epoch": 14.102286401925392, "grad_norm": 855629.75, "learning_rate": 0.00019105983475568432, "loss": 7.7779, "step": 117190 }, { "epoch": 14.103489771359808, "grad_norm": 11373207.0, "learning_rate": 0.00019105826241379322, "loss": 7.7289, "step": 117200 }, { "epoch": 14.104693140794224, "grad_norm": 547612.75, "learning_rate": 0.00019105668994011832, "loss": 7.7554, "step": 117210 }, { "epoch": 14.10589651022864, "grad_norm": 7882332.0, "learning_rate": 0.00019105511733466188, "loss": 7.7567, "step": 117220 }, { "epoch": 14.107099879663057, "grad_norm": 1851580.25, "learning_rate": 0.00019105354459742624, "loss": 7.7383, "step": 117230 }, { "epoch": 14.108303249097473, "grad_norm": 7409177.5, "learning_rate": 0.00019105197172841366, "loss": 7.841, "step": 117240 }, { "epoch": 14.109506618531888, "grad_norm": 926833.1875, "learning_rate": 0.00019105039872762634, "loss": 7.7346, "step": 117250 }, { "epoch": 14.110709987966306, "grad_norm": 916795.875, "learning_rate": 0.00019104882559506665, "loss": 7.7149, "step": 117260 }, { "epoch": 14.111913357400722, "grad_norm": 2947412.25, "learning_rate": 0.0001910472523307368, "loss": 7.7044, "step": 117270 }, { "epoch": 14.113116726835138, "grad_norm": 2368365.75, "learning_rate": 0.00019104567893463915, "loss": 7.7467, "step": 117280 }, { "epoch": 14.114320096269555, "grad_norm": 257529.765625, "learning_rate": 0.0001910441054067759, "loss": 7.7203, "step": 117290 }, { "epoch": 14.115523465703971, "grad_norm": 7703754.0, "learning_rate": 0.00019104253174714932, "loss": 7.762, "step": 117300 }, { "epoch": 14.116726835138387, "grad_norm": 1125570.375, "learning_rate": 0.00019104095795576174, "loss": 7.7003, "step": 117310 }, { "epoch": 14.117930204572804, "grad_norm": 4180567.75, "learning_rate": 0.0001910393840326154, "loss": 7.748, "step": 117320 }, { "epoch": 14.11913357400722, "grad_norm": 15315138.0, "learning_rate": 0.00019103780997771264, "loss": 7.7816, "step": 117330 }, { "epoch": 14.120336943441636, "grad_norm": 770472.75, "learning_rate": 0.00019103623579105566, "loss": 7.7377, "step": 117340 }, { "epoch": 14.121540312876053, "grad_norm": 2066392.125, "learning_rate": 0.00019103466147264676, "loss": 7.7626, "step": 117350 }, { "epoch": 14.12274368231047, "grad_norm": 2314799.0, "learning_rate": 0.00019103308702248827, "loss": 7.7147, "step": 117360 }, { "epoch": 14.123947051744885, "grad_norm": 3735358.75, "learning_rate": 0.0001910315124405824, "loss": 7.6771, "step": 117370 }, { "epoch": 14.125150421179303, "grad_norm": 2781628.5, "learning_rate": 0.00019102993772693145, "loss": 7.7394, "step": 117380 }, { "epoch": 14.126353790613718, "grad_norm": 167057.25, "learning_rate": 0.0001910283628815377, "loss": 7.7124, "step": 117390 }, { "epoch": 14.127557160048134, "grad_norm": 13759909.0, "learning_rate": 0.00019102678790440346, "loss": 7.6901, "step": 117400 }, { "epoch": 14.128760529482552, "grad_norm": 3111263.5, "learning_rate": 0.00019102521279553097, "loss": 7.6916, "step": 117410 }, { "epoch": 14.129963898916968, "grad_norm": 302472.0, "learning_rate": 0.00019102363755492253, "loss": 7.6121, "step": 117420 }, { "epoch": 14.131167268351383, "grad_norm": 3485300.0, "learning_rate": 0.00019102206218258043, "loss": 7.6994, "step": 117430 }, { "epoch": 14.132370637785801, "grad_norm": 5885566.5, "learning_rate": 0.0001910204866785069, "loss": 7.6486, "step": 117440 }, { "epoch": 14.133574007220217, "grad_norm": 1157274.125, "learning_rate": 0.00019101891104270427, "loss": 7.6777, "step": 117450 }, { "epoch": 14.134777376654633, "grad_norm": 1057398.0, "learning_rate": 0.00019101733527517481, "loss": 7.6866, "step": 117460 }, { "epoch": 14.13598074608905, "grad_norm": 1628617.25, "learning_rate": 0.0001910157593759208, "loss": 7.6428, "step": 117470 }, { "epoch": 14.137184115523466, "grad_norm": 919845.0625, "learning_rate": 0.0001910141833449445, "loss": 7.7262, "step": 117480 }, { "epoch": 14.138387484957882, "grad_norm": 32634906.0, "learning_rate": 0.00019101260718224817, "loss": 7.6889, "step": 117490 }, { "epoch": 14.1395908543923, "grad_norm": 1332457.375, "learning_rate": 0.0001910110308878342, "loss": 7.7526, "step": 117500 }, { "epoch": 14.140794223826715, "grad_norm": 688719.75, "learning_rate": 0.00019100945446170475, "loss": 7.7073, "step": 117510 }, { "epoch": 14.14199759326113, "grad_norm": 3635474.25, "learning_rate": 0.00019100787790386216, "loss": 7.6832, "step": 117520 }, { "epoch": 14.143200962695548, "grad_norm": 3694973.5, "learning_rate": 0.00019100630121430872, "loss": 7.7528, "step": 117530 }, { "epoch": 14.144404332129964, "grad_norm": 612005.6875, "learning_rate": 0.00019100472439304668, "loss": 7.6511, "step": 117540 }, { "epoch": 14.14560770156438, "grad_norm": 310124.40625, "learning_rate": 0.00019100314744007832, "loss": 7.6766, "step": 117550 }, { "epoch": 14.146811070998796, "grad_norm": 472426.40625, "learning_rate": 0.00019100157035540597, "loss": 7.6752, "step": 117560 }, { "epoch": 14.148014440433213, "grad_norm": 1072434.125, "learning_rate": 0.00019099999313903183, "loss": 7.6697, "step": 117570 }, { "epoch": 14.14921780986763, "grad_norm": 390492.5625, "learning_rate": 0.00019099841579095828, "loss": 7.7356, "step": 117580 }, { "epoch": 14.150421179302045, "grad_norm": 500926.78125, "learning_rate": 0.00019099683831118753, "loss": 7.7001, "step": 117590 }, { "epoch": 14.151624548736462, "grad_norm": 617192.5, "learning_rate": 0.0001909952606997219, "loss": 7.6278, "step": 117600 }, { "epoch": 14.152827918170878, "grad_norm": 1324842.375, "learning_rate": 0.00019099368295656368, "loss": 7.595, "step": 117610 }, { "epoch": 14.154031287605294, "grad_norm": 308666.59375, "learning_rate": 0.0001909921050817151, "loss": 7.6461, "step": 117620 }, { "epoch": 14.155234657039712, "grad_norm": 2842516.75, "learning_rate": 0.0001909905270751785, "loss": 7.5856, "step": 117630 }, { "epoch": 14.156438026474127, "grad_norm": 84979.5625, "learning_rate": 0.00019098894893695615, "loss": 7.6255, "step": 117640 }, { "epoch": 14.157641395908543, "grad_norm": 841324.625, "learning_rate": 0.00019098737066705027, "loss": 7.5809, "step": 117650 }, { "epoch": 14.15884476534296, "grad_norm": 1796559.0, "learning_rate": 0.00019098579226546326, "loss": 7.6321, "step": 117660 }, { "epoch": 14.160048134777377, "grad_norm": 532219.5, "learning_rate": 0.00019098421373219733, "loss": 7.5754, "step": 117670 }, { "epoch": 14.161251504211792, "grad_norm": 1030640.5, "learning_rate": 0.00019098263506725477, "loss": 7.6161, "step": 117680 }, { "epoch": 14.16245487364621, "grad_norm": 60111.03125, "learning_rate": 0.0001909810562706379, "loss": 7.682, "step": 117690 }, { "epoch": 14.163658243080626, "grad_norm": 1406366.875, "learning_rate": 0.00019097947734234895, "loss": 7.695, "step": 117700 }, { "epoch": 14.164861612515042, "grad_norm": 1101489.5, "learning_rate": 0.00019097789828239026, "loss": 7.6959, "step": 117710 }, { "epoch": 14.166064981949459, "grad_norm": 325357.65625, "learning_rate": 0.00019097631909076407, "loss": 7.6261, "step": 117720 }, { "epoch": 14.167268351383875, "grad_norm": 229152.828125, "learning_rate": 0.00019097473976747268, "loss": 7.5822, "step": 117730 }, { "epoch": 14.16847172081829, "grad_norm": 55116.89453125, "learning_rate": 0.0001909731603125184, "loss": 7.569, "step": 117740 }, { "epoch": 14.169675090252708, "grad_norm": 276046.9375, "learning_rate": 0.0001909715807259035, "loss": 7.6175, "step": 117750 }, { "epoch": 14.170878459687124, "grad_norm": 2432009.5, "learning_rate": 0.00019097000100763025, "loss": 7.6564, "step": 117760 }, { "epoch": 14.17208182912154, "grad_norm": 5763792.0, "learning_rate": 0.00019096842115770094, "loss": 7.5945, "step": 117770 }, { "epoch": 14.173285198555957, "grad_norm": 715934.125, "learning_rate": 0.00019096684117611787, "loss": 7.6371, "step": 117780 }, { "epoch": 14.174488567990373, "grad_norm": 104462.046875, "learning_rate": 0.00019096526106288333, "loss": 7.5974, "step": 117790 }, { "epoch": 14.175691937424789, "grad_norm": 172284.640625, "learning_rate": 0.00019096368081799957, "loss": 7.5797, "step": 117800 }, { "epoch": 14.176895306859207, "grad_norm": 839931.125, "learning_rate": 0.00019096210044146895, "loss": 7.6661, "step": 117810 }, { "epoch": 14.178098676293622, "grad_norm": 103322.25, "learning_rate": 0.00019096051993329369, "loss": 7.5413, "step": 117820 }, { "epoch": 14.179302045728038, "grad_norm": 347017.21875, "learning_rate": 0.0001909589392934761, "loss": 7.6953, "step": 117830 }, { "epoch": 14.180505415162456, "grad_norm": 82645.4453125, "learning_rate": 0.00019095735852201848, "loss": 7.5759, "step": 117840 }, { "epoch": 14.181708784596871, "grad_norm": 741268.0625, "learning_rate": 0.00019095577761892306, "loss": 7.6107, "step": 117850 }, { "epoch": 14.182912154031287, "grad_norm": 1132008.0, "learning_rate": 0.00019095419658419225, "loss": 7.6534, "step": 117860 }, { "epoch": 14.184115523465705, "grad_norm": 7227692.5, "learning_rate": 0.0001909526154178282, "loss": 7.6384, "step": 117870 }, { "epoch": 14.18531889290012, "grad_norm": 2999032.0, "learning_rate": 0.00019095103411983328, "loss": 7.6321, "step": 117880 }, { "epoch": 14.186522262334536, "grad_norm": 352842.65625, "learning_rate": 0.00019094945269020977, "loss": 7.5941, "step": 117890 }, { "epoch": 14.187725631768952, "grad_norm": 1475735.375, "learning_rate": 0.00019094787112895994, "loss": 7.5975, "step": 117900 }, { "epoch": 14.18892900120337, "grad_norm": 1614576.5, "learning_rate": 0.00019094628943608607, "loss": 7.6226, "step": 117910 }, { "epoch": 14.190132370637786, "grad_norm": 1642992.375, "learning_rate": 0.0001909447076115905, "loss": 7.7208, "step": 117920 }, { "epoch": 14.191335740072201, "grad_norm": 136119.203125, "learning_rate": 0.00019094312565547545, "loss": 7.6313, "step": 117930 }, { "epoch": 14.192539109506619, "grad_norm": 83723.78125, "learning_rate": 0.00019094154356774325, "loss": 7.667, "step": 117940 }, { "epoch": 14.193742478941035, "grad_norm": 142090.59375, "learning_rate": 0.0001909399613483962, "loss": 7.6637, "step": 117950 }, { "epoch": 14.19494584837545, "grad_norm": 1624011.75, "learning_rate": 0.00019093837899743655, "loss": 7.5476, "step": 117960 }, { "epoch": 14.196149217809868, "grad_norm": 165817.359375, "learning_rate": 0.00019093679651486665, "loss": 7.6582, "step": 117970 }, { "epoch": 14.197352587244284, "grad_norm": 205383.234375, "learning_rate": 0.0001909352139006887, "loss": 7.6367, "step": 117980 }, { "epoch": 14.1985559566787, "grad_norm": 78171.171875, "learning_rate": 0.00019093363115490507, "loss": 7.5957, "step": 117990 }, { "epoch": 14.199759326113117, "grad_norm": 116647.828125, "learning_rate": 0.00019093204827751805, "loss": 7.5047, "step": 118000 }, { "epoch": 14.200962695547533, "grad_norm": 291826.875, "learning_rate": 0.00019093046526852988, "loss": 7.5897, "step": 118010 }, { "epoch": 14.202166064981949, "grad_norm": 178334.6875, "learning_rate": 0.00019092888212794292, "loss": 7.5844, "step": 118020 }, { "epoch": 14.203369434416366, "grad_norm": 109844.4140625, "learning_rate": 0.00019092729885575936, "loss": 7.5493, "step": 118030 }, { "epoch": 14.204572803850782, "grad_norm": 92265.140625, "learning_rate": 0.00019092571545198156, "loss": 7.6348, "step": 118040 }, { "epoch": 14.205776173285198, "grad_norm": 487097.03125, "learning_rate": 0.00019092413191661183, "loss": 7.5966, "step": 118050 }, { "epoch": 14.206979542719615, "grad_norm": 344763.875, "learning_rate": 0.0001909225482496524, "loss": 7.6262, "step": 118060 }, { "epoch": 14.208182912154031, "grad_norm": 224788.859375, "learning_rate": 0.00019092096445110562, "loss": 7.5832, "step": 118070 }, { "epoch": 14.209386281588447, "grad_norm": 72774.234375, "learning_rate": 0.00019091938052097373, "loss": 7.6131, "step": 118080 }, { "epoch": 14.210589651022865, "grad_norm": 148898.328125, "learning_rate": 0.0001909177964592591, "loss": 7.6359, "step": 118090 }, { "epoch": 14.21179302045728, "grad_norm": 136293.4375, "learning_rate": 0.00019091621226596393, "loss": 7.6048, "step": 118100 }, { "epoch": 14.212996389891696, "grad_norm": 124320.875, "learning_rate": 0.00019091462794109055, "loss": 7.6171, "step": 118110 }, { "epoch": 14.214199759326114, "grad_norm": 119047.1328125, "learning_rate": 0.00019091304348464126, "loss": 7.5196, "step": 118120 }, { "epoch": 14.21540312876053, "grad_norm": 725180.8125, "learning_rate": 0.00019091145889661837, "loss": 7.6205, "step": 118130 }, { "epoch": 14.216606498194945, "grad_norm": 100127.4296875, "learning_rate": 0.00019090987417702413, "loss": 7.5324, "step": 118140 }, { "epoch": 14.217809867629363, "grad_norm": 185144.703125, "learning_rate": 0.00019090828932586087, "loss": 7.6019, "step": 118150 }, { "epoch": 14.219013237063779, "grad_norm": 3687808.5, "learning_rate": 0.00019090670434313088, "loss": 7.6518, "step": 118160 }, { "epoch": 14.220216606498195, "grad_norm": 104892.1953125, "learning_rate": 0.00019090511922883641, "loss": 7.5703, "step": 118170 }, { "epoch": 14.221419975932612, "grad_norm": 37905.5546875, "learning_rate": 0.00019090353398297985, "loss": 7.6244, "step": 118180 }, { "epoch": 14.222623345367028, "grad_norm": 34009.3671875, "learning_rate": 0.0001909019486055634, "loss": 7.6278, "step": 118190 }, { "epoch": 14.223826714801444, "grad_norm": 143581.0, "learning_rate": 0.00019090036309658937, "loss": 7.5578, "step": 118200 }, { "epoch": 14.225030084235861, "grad_norm": 6221499.0, "learning_rate": 0.0001908987774560601, "loss": 7.5906, "step": 118210 }, { "epoch": 14.226233453670277, "grad_norm": 75237.9140625, "learning_rate": 0.0001908971916839778, "loss": 7.5605, "step": 118220 }, { "epoch": 14.227436823104693, "grad_norm": 1089059.5, "learning_rate": 0.0001908956057803449, "loss": 7.6115, "step": 118230 }, { "epoch": 14.22864019253911, "grad_norm": 412480.71875, "learning_rate": 0.00019089401974516356, "loss": 7.6311, "step": 118240 }, { "epoch": 14.229843561973526, "grad_norm": 482569.3125, "learning_rate": 0.00019089243357843616, "loss": 7.6057, "step": 118250 }, { "epoch": 14.231046931407942, "grad_norm": 226829.71875, "learning_rate": 0.00019089084728016498, "loss": 7.6026, "step": 118260 }, { "epoch": 14.232250300842358, "grad_norm": 164830.5, "learning_rate": 0.00019088926085035225, "loss": 7.639, "step": 118270 }, { "epoch": 14.233453670276775, "grad_norm": 5754952.5, "learning_rate": 0.00019088767428900036, "loss": 7.5471, "step": 118280 }, { "epoch": 14.234657039711191, "grad_norm": 191127.46875, "learning_rate": 0.00019088608759611157, "loss": 7.7126, "step": 118290 }, { "epoch": 14.235860409145607, "grad_norm": 4961701.0, "learning_rate": 0.0001908845007716882, "loss": 7.6296, "step": 118300 }, { "epoch": 14.237063778580024, "grad_norm": 250321.875, "learning_rate": 0.00019088291381573247, "loss": 7.6384, "step": 118310 }, { "epoch": 14.23826714801444, "grad_norm": 125380.984375, "learning_rate": 0.00019088132672824673, "loss": 7.6243, "step": 118320 }, { "epoch": 14.239470517448856, "grad_norm": 346098.59375, "learning_rate": 0.00019087973950923325, "loss": 7.5883, "step": 118330 }, { "epoch": 14.240673886883274, "grad_norm": 197186.375, "learning_rate": 0.0001908781521586944, "loss": 7.6825, "step": 118340 }, { "epoch": 14.24187725631769, "grad_norm": 198040.796875, "learning_rate": 0.0001908765646766324, "loss": 7.6941, "step": 118350 }, { "epoch": 14.243080625752105, "grad_norm": 398055.09375, "learning_rate": 0.00019087497706304958, "loss": 7.58, "step": 118360 }, { "epoch": 14.244283995186523, "grad_norm": 284740.65625, "learning_rate": 0.00019087338931794823, "loss": 7.4917, "step": 118370 }, { "epoch": 14.245487364620939, "grad_norm": 151272.890625, "learning_rate": 0.00019087180144133065, "loss": 7.6321, "step": 118380 }, { "epoch": 14.246690734055354, "grad_norm": 427842.3125, "learning_rate": 0.00019087021343319914, "loss": 7.5916, "step": 118390 }, { "epoch": 14.247894103489772, "grad_norm": 1271051.125, "learning_rate": 0.000190868625293556, "loss": 7.5778, "step": 118400 }, { "epoch": 14.249097472924188, "grad_norm": 1155579.125, "learning_rate": 0.00019086703702240354, "loss": 7.5796, "step": 118410 }, { "epoch": 14.250300842358604, "grad_norm": 688704.5, "learning_rate": 0.000190865448619744, "loss": 7.5713, "step": 118420 }, { "epoch": 14.251504211793021, "grad_norm": 2906671.5, "learning_rate": 0.00019086386008557976, "loss": 7.6762, "step": 118430 }, { "epoch": 14.252707581227437, "grad_norm": 2189865.0, "learning_rate": 0.00019086227141991305, "loss": 7.6628, "step": 118440 }, { "epoch": 14.253910950661853, "grad_norm": 262524.40625, "learning_rate": 0.00019086068262274622, "loss": 7.5942, "step": 118450 }, { "epoch": 14.25511432009627, "grad_norm": 2990643.5, "learning_rate": 0.00019085909369408156, "loss": 7.6678, "step": 118460 }, { "epoch": 14.256317689530686, "grad_norm": 769595.125, "learning_rate": 0.00019085750463392133, "loss": 7.6162, "step": 118470 }, { "epoch": 14.257521058965102, "grad_norm": 525261.9375, "learning_rate": 0.0001908559154422679, "loss": 7.6189, "step": 118480 }, { "epoch": 14.25872442839952, "grad_norm": 268710.96875, "learning_rate": 0.00019085432611912347, "loss": 7.6095, "step": 118490 }, { "epoch": 14.259927797833935, "grad_norm": 555324.25, "learning_rate": 0.00019085273666449046, "loss": 7.6909, "step": 118500 }, { "epoch": 14.261131167268351, "grad_norm": 384020.65625, "learning_rate": 0.00019085114707837107, "loss": 7.5423, "step": 118510 }, { "epoch": 14.262334536702769, "grad_norm": 3127925.0, "learning_rate": 0.00019084955736076763, "loss": 7.596, "step": 118520 }, { "epoch": 14.263537906137184, "grad_norm": 8664014.0, "learning_rate": 0.0001908479675116825, "loss": 7.6043, "step": 118530 }, { "epoch": 14.2647412755716, "grad_norm": 192653.078125, "learning_rate": 0.00019084637753111788, "loss": 7.5048, "step": 118540 }, { "epoch": 14.265944645006018, "grad_norm": 480439.0, "learning_rate": 0.00019084478741907614, "loss": 7.6826, "step": 118550 }, { "epoch": 14.267148014440433, "grad_norm": 1125248.0, "learning_rate": 0.00019084319717555957, "loss": 7.6078, "step": 118560 }, { "epoch": 14.26835138387485, "grad_norm": 2509802.5, "learning_rate": 0.00019084160680057048, "loss": 7.5447, "step": 118570 }, { "epoch": 14.269554753309265, "grad_norm": 1077061.875, "learning_rate": 0.00019084001629411114, "loss": 7.5356, "step": 118580 }, { "epoch": 14.270758122743683, "grad_norm": 182195.296875, "learning_rate": 0.0001908384256561839, "loss": 7.5576, "step": 118590 }, { "epoch": 14.271961492178098, "grad_norm": 2173710.75, "learning_rate": 0.00019083683488679097, "loss": 7.5693, "step": 118600 }, { "epoch": 14.273164861612514, "grad_norm": 3091036.0, "learning_rate": 0.00019083524398593477, "loss": 7.5223, "step": 118610 }, { "epoch": 14.274368231046932, "grad_norm": 1276458.625, "learning_rate": 0.00019083365295361748, "loss": 7.6413, "step": 118620 }, { "epoch": 14.275571600481348, "grad_norm": 657346.9375, "learning_rate": 0.00019083206178984152, "loss": 7.487, "step": 118630 }, { "epoch": 14.276774969915763, "grad_norm": 271066.5, "learning_rate": 0.0001908304704946091, "loss": 7.5473, "step": 118640 }, { "epoch": 14.277978339350181, "grad_norm": 1240744.5, "learning_rate": 0.0001908288790679226, "loss": 7.5293, "step": 118650 }, { "epoch": 14.279181708784597, "grad_norm": 2022690.625, "learning_rate": 0.00019082728750978429, "loss": 7.6134, "step": 118660 }, { "epoch": 14.280385078219012, "grad_norm": 432702.25, "learning_rate": 0.00019082569582019648, "loss": 7.605, "step": 118670 }, { "epoch": 14.28158844765343, "grad_norm": 2926700.25, "learning_rate": 0.00019082410399916144, "loss": 7.5629, "step": 118680 }, { "epoch": 14.282791817087846, "grad_norm": 207177.15625, "learning_rate": 0.0001908225120466815, "loss": 7.587, "step": 118690 }, { "epoch": 14.283995186522262, "grad_norm": 1329514.0, "learning_rate": 0.00019082091996275896, "loss": 7.5791, "step": 118700 }, { "epoch": 14.28519855595668, "grad_norm": 2284441.75, "learning_rate": 0.00019081932774739612, "loss": 7.6031, "step": 118710 }, { "epoch": 14.286401925391095, "grad_norm": 594683.125, "learning_rate": 0.0001908177354005953, "loss": 7.584, "step": 118720 }, { "epoch": 14.28760529482551, "grad_norm": 1597930.875, "learning_rate": 0.0001908161429223588, "loss": 7.6239, "step": 118730 }, { "epoch": 14.288808664259928, "grad_norm": 108540.125, "learning_rate": 0.0001908145503126889, "loss": 7.5947, "step": 118740 }, { "epoch": 14.290012033694344, "grad_norm": 17705258.0, "learning_rate": 0.00019081295757158794, "loss": 7.5908, "step": 118750 }, { "epoch": 14.29121540312876, "grad_norm": 1176973.0, "learning_rate": 0.00019081136469905824, "loss": 7.5876, "step": 118760 }, { "epoch": 14.292418772563177, "grad_norm": 1378779.5, "learning_rate": 0.000190809771695102, "loss": 7.5621, "step": 118770 }, { "epoch": 14.293622141997593, "grad_norm": 680724.9375, "learning_rate": 0.00019080817855972164, "loss": 7.5074, "step": 118780 }, { "epoch": 14.294825511432009, "grad_norm": 521541.3125, "learning_rate": 0.00019080658529291943, "loss": 7.6662, "step": 118790 }, { "epoch": 14.296028880866427, "grad_norm": 754615.75, "learning_rate": 0.00019080499189469768, "loss": 7.5437, "step": 118800 }, { "epoch": 14.297232250300842, "grad_norm": 500591.46875, "learning_rate": 0.00019080339836505868, "loss": 7.5233, "step": 118810 }, { "epoch": 14.298435619735258, "grad_norm": 1726237.75, "learning_rate": 0.00019080180470400473, "loss": 7.6256, "step": 118820 }, { "epoch": 14.299638989169676, "grad_norm": 2642596.25, "learning_rate": 0.00019080021091153816, "loss": 7.5715, "step": 118830 }, { "epoch": 14.300842358604092, "grad_norm": 89558.421875, "learning_rate": 0.00019079861698766126, "loss": 7.6221, "step": 118840 }, { "epoch": 14.302045728038507, "grad_norm": 1838512.75, "learning_rate": 0.00019079702293237639, "loss": 7.5489, "step": 118850 }, { "epoch": 14.303249097472925, "grad_norm": 500271.28125, "learning_rate": 0.00019079542874568574, "loss": 7.5761, "step": 118860 }, { "epoch": 14.30445246690734, "grad_norm": 425299.125, "learning_rate": 0.00019079383442759172, "loss": 7.5812, "step": 118870 }, { "epoch": 14.305655836341757, "grad_norm": 181704.6875, "learning_rate": 0.0001907922399780966, "loss": 7.6447, "step": 118880 }, { "epoch": 14.306859205776174, "grad_norm": 908107.375, "learning_rate": 0.0001907906453972027, "loss": 7.45, "step": 118890 }, { "epoch": 14.30806257521059, "grad_norm": 325110.0, "learning_rate": 0.00019078905068491234, "loss": 7.5869, "step": 118900 }, { "epoch": 14.309265944645006, "grad_norm": 1366996.25, "learning_rate": 0.0001907874558412278, "loss": 7.5595, "step": 118910 }, { "epoch": 14.310469314079423, "grad_norm": 400215.5625, "learning_rate": 0.00019078586086615138, "loss": 7.639, "step": 118920 }, { "epoch": 14.311672683513839, "grad_norm": 97284.28125, "learning_rate": 0.0001907842657596854, "loss": 7.5516, "step": 118930 }, { "epoch": 14.312876052948255, "grad_norm": 736925.9375, "learning_rate": 0.00019078267052183217, "loss": 7.5797, "step": 118940 }, { "epoch": 14.314079422382672, "grad_norm": 3637919.25, "learning_rate": 0.00019078107515259403, "loss": 7.6444, "step": 118950 }, { "epoch": 14.315282791817088, "grad_norm": 223660.40625, "learning_rate": 0.00019077947965197325, "loss": 7.6054, "step": 118960 }, { "epoch": 14.316486161251504, "grad_norm": 1043933.5625, "learning_rate": 0.00019077788401997215, "loss": 7.5201, "step": 118970 }, { "epoch": 14.31768953068592, "grad_norm": 298401.21875, "learning_rate": 0.00019077628825659302, "loss": 7.5479, "step": 118980 }, { "epoch": 14.318892900120337, "grad_norm": 295401.25, "learning_rate": 0.0001907746923618382, "loss": 7.5631, "step": 118990 }, { "epoch": 14.320096269554753, "grad_norm": 2421919.25, "learning_rate": 0.00019077309633570998, "loss": 7.6099, "step": 119000 }, { "epoch": 14.321299638989169, "grad_norm": 1209339.875, "learning_rate": 0.0001907715001782107, "loss": 7.5773, "step": 119010 }, { "epoch": 14.322503008423586, "grad_norm": 158389.015625, "learning_rate": 0.00019076990388934264, "loss": 7.5278, "step": 119020 }, { "epoch": 14.323706377858002, "grad_norm": 141192.375, "learning_rate": 0.00019076830746910813, "loss": 7.6169, "step": 119030 }, { "epoch": 14.324909747292418, "grad_norm": 71631.265625, "learning_rate": 0.00019076671091750944, "loss": 7.5682, "step": 119040 }, { "epoch": 14.326113116726836, "grad_norm": 174709.296875, "learning_rate": 0.00019076511423454895, "loss": 7.5451, "step": 119050 }, { "epoch": 14.327316486161251, "grad_norm": 228096.328125, "learning_rate": 0.0001907635174202289, "loss": 7.6053, "step": 119060 }, { "epoch": 14.328519855595667, "grad_norm": 202864.921875, "learning_rate": 0.00019076192047455163, "loss": 7.6161, "step": 119070 }, { "epoch": 14.329723225030085, "grad_norm": 255449.78125, "learning_rate": 0.00019076032339751948, "loss": 7.5969, "step": 119080 }, { "epoch": 14.3309265944645, "grad_norm": 870253.25, "learning_rate": 0.0001907587261891347, "loss": 7.5154, "step": 119090 }, { "epoch": 14.332129963898916, "grad_norm": 311781.03125, "learning_rate": 0.00019075712884939966, "loss": 7.5915, "step": 119100 }, { "epoch": 14.333333333333334, "grad_norm": 782481.75, "learning_rate": 0.00019075553137831665, "loss": 7.6192, "step": 119110 }, { "epoch": 14.33453670276775, "grad_norm": 1564174.0, "learning_rate": 0.00019075393377588796, "loss": 7.5312, "step": 119120 }, { "epoch": 14.335740072202166, "grad_norm": 99209.40625, "learning_rate": 0.00019075233604211593, "loss": 7.595, "step": 119130 }, { "epoch": 14.336943441636583, "grad_norm": 31667.69140625, "learning_rate": 0.00019075073817700287, "loss": 7.6006, "step": 119140 }, { "epoch": 14.338146811070999, "grad_norm": 8151985.0, "learning_rate": 0.00019074914018055107, "loss": 7.5678, "step": 119150 }, { "epoch": 14.339350180505415, "grad_norm": 2709748.75, "learning_rate": 0.00019074754205276287, "loss": 7.574, "step": 119160 }, { "epoch": 14.340553549939832, "grad_norm": 1194706.875, "learning_rate": 0.00019074594379364057, "loss": 7.5266, "step": 119170 }, { "epoch": 14.341756919374248, "grad_norm": 1466402.625, "learning_rate": 0.00019074434540318648, "loss": 7.5475, "step": 119180 }, { "epoch": 14.342960288808664, "grad_norm": 4018239.25, "learning_rate": 0.00019074274688140293, "loss": 7.5761, "step": 119190 }, { "epoch": 14.344163658243081, "grad_norm": 109044.6484375, "learning_rate": 0.00019074114822829224, "loss": 7.5674, "step": 119200 }, { "epoch": 14.345367027677497, "grad_norm": 430628.3125, "learning_rate": 0.00019073954944385667, "loss": 7.6097, "step": 119210 }, { "epoch": 14.346570397111913, "grad_norm": 133542.625, "learning_rate": 0.00019073795052809857, "loss": 7.5421, "step": 119220 }, { "epoch": 14.34777376654633, "grad_norm": 69039.859375, "learning_rate": 0.00019073635148102026, "loss": 7.6095, "step": 119230 }, { "epoch": 14.348977135980746, "grad_norm": 19254.169921875, "learning_rate": 0.00019073475230262405, "loss": 7.5676, "step": 119240 }, { "epoch": 14.350180505415162, "grad_norm": 23850.453125, "learning_rate": 0.00019073315299291226, "loss": 7.5821, "step": 119250 }, { "epoch": 14.35138387484958, "grad_norm": 2005147.25, "learning_rate": 0.0001907315535518872, "loss": 7.5356, "step": 119260 }, { "epoch": 14.352587244283995, "grad_norm": 62143.3046875, "learning_rate": 0.00019072995397955117, "loss": 7.5563, "step": 119270 }, { "epoch": 14.353790613718411, "grad_norm": 132732.40625, "learning_rate": 0.00019072835427590647, "loss": 7.5333, "step": 119280 }, { "epoch": 14.354993983152827, "grad_norm": 409001.1875, "learning_rate": 0.00019072675444095549, "loss": 7.5244, "step": 119290 }, { "epoch": 14.356197352587245, "grad_norm": 612200.6875, "learning_rate": 0.00019072515447470046, "loss": 7.6445, "step": 119300 }, { "epoch": 14.35740072202166, "grad_norm": 60181.74609375, "learning_rate": 0.00019072355437714373, "loss": 7.6717, "step": 119310 }, { "epoch": 14.358604091456076, "grad_norm": 206314.375, "learning_rate": 0.00019072195414828765, "loss": 7.6011, "step": 119320 }, { "epoch": 14.359807460890494, "grad_norm": 59793.54296875, "learning_rate": 0.0001907203537881345, "loss": 7.565, "step": 119330 }, { "epoch": 14.36101083032491, "grad_norm": 13689.9794921875, "learning_rate": 0.00019071875329668657, "loss": 7.5052, "step": 119340 }, { "epoch": 14.362214199759325, "grad_norm": 71346.2109375, "learning_rate": 0.00019071715267394624, "loss": 7.5693, "step": 119350 }, { "epoch": 14.363417569193743, "grad_norm": 139831.796875, "learning_rate": 0.00019071555191991574, "loss": 7.5218, "step": 119360 }, { "epoch": 14.364620938628159, "grad_norm": 329947.125, "learning_rate": 0.00019071395103459747, "loss": 7.6216, "step": 119370 }, { "epoch": 14.365824308062574, "grad_norm": 574487.875, "learning_rate": 0.00019071235001799376, "loss": 7.6086, "step": 119380 }, { "epoch": 14.367027677496992, "grad_norm": 21661.830078125, "learning_rate": 0.00019071074887010684, "loss": 7.5997, "step": 119390 }, { "epoch": 14.368231046931408, "grad_norm": 12835.291015625, "learning_rate": 0.00019070914759093907, "loss": 7.563, "step": 119400 }, { "epoch": 14.369434416365824, "grad_norm": 560535.125, "learning_rate": 0.0001907075461804928, "loss": 7.5462, "step": 119410 }, { "epoch": 14.370637785800241, "grad_norm": 17859.89453125, "learning_rate": 0.00019070594463877028, "loss": 7.5947, "step": 119420 }, { "epoch": 14.371841155234657, "grad_norm": 153670.6875, "learning_rate": 0.00019070434296577387, "loss": 7.5563, "step": 119430 }, { "epoch": 14.373044524669073, "grad_norm": 51824.6796875, "learning_rate": 0.00019070274116150587, "loss": 7.5882, "step": 119440 }, { "epoch": 14.37424789410349, "grad_norm": 70184.84375, "learning_rate": 0.00019070113922596863, "loss": 7.5978, "step": 119450 }, { "epoch": 14.375451263537906, "grad_norm": 26208.642578125, "learning_rate": 0.00019069953715916444, "loss": 7.6352, "step": 119460 }, { "epoch": 14.376654632972322, "grad_norm": 6632.07373046875, "learning_rate": 0.0001906979349610956, "loss": 7.5943, "step": 119470 }, { "epoch": 14.37785800240674, "grad_norm": 21841.8359375, "learning_rate": 0.00019069633263176452, "loss": 7.6114, "step": 119480 }, { "epoch": 14.379061371841155, "grad_norm": 3858.576904296875, "learning_rate": 0.0001906947301711734, "loss": 7.5277, "step": 119490 }, { "epoch": 14.380264741275571, "grad_norm": 4729.509765625, "learning_rate": 0.00019069312757932465, "loss": 7.4972, "step": 119500 }, { "epoch": 14.381468110709989, "grad_norm": 19417.59375, "learning_rate": 0.00019069152485622052, "loss": 7.609, "step": 119510 }, { "epoch": 14.382671480144404, "grad_norm": 17170.166015625, "learning_rate": 0.0001906899220018634, "loss": 7.6092, "step": 119520 }, { "epoch": 14.38387484957882, "grad_norm": 3339.51953125, "learning_rate": 0.00019068831901625555, "loss": 7.5038, "step": 119530 }, { "epoch": 14.385078219013238, "grad_norm": 5656.55078125, "learning_rate": 0.0001906867158993993, "loss": 7.5383, "step": 119540 }, { "epoch": 14.386281588447654, "grad_norm": 5327.35791015625, "learning_rate": 0.00019068511265129702, "loss": 7.533, "step": 119550 }, { "epoch": 14.38748495788207, "grad_norm": 5790.3759765625, "learning_rate": 0.00019068350927195095, "loss": 7.5724, "step": 119560 }, { "epoch": 14.388688327316487, "grad_norm": 3798.71435546875, "learning_rate": 0.00019068190576136345, "loss": 7.5458, "step": 119570 }, { "epoch": 14.389891696750903, "grad_norm": 6133.72314453125, "learning_rate": 0.0001906803021195369, "loss": 7.5392, "step": 119580 }, { "epoch": 14.391095066185319, "grad_norm": 2831.714111328125, "learning_rate": 0.0001906786983464735, "loss": 7.4844, "step": 119590 }, { "epoch": 14.392298435619736, "grad_norm": 1254.361083984375, "learning_rate": 0.00019067709444217567, "loss": 7.592, "step": 119600 }, { "epoch": 14.393501805054152, "grad_norm": 156516.84375, "learning_rate": 0.0001906754904066457, "loss": 7.5673, "step": 119610 }, { "epoch": 14.394705174488568, "grad_norm": 870.5654907226562, "learning_rate": 0.0001906738862398859, "loss": 7.4808, "step": 119620 }, { "epoch": 14.395908543922985, "grad_norm": 9966.8095703125, "learning_rate": 0.00019067228194189859, "loss": 7.4807, "step": 119630 }, { "epoch": 14.397111913357401, "grad_norm": 2594.8515625, "learning_rate": 0.00019067067751268613, "loss": 7.505, "step": 119640 }, { "epoch": 14.398315282791817, "grad_norm": 12772.3779296875, "learning_rate": 0.0001906690729522508, "loss": 7.5407, "step": 119650 }, { "epoch": 14.399518652226233, "grad_norm": 58876.48046875, "learning_rate": 0.00019066746826059493, "loss": 7.5641, "step": 119660 }, { "epoch": 14.40072202166065, "grad_norm": 11685.4697265625, "learning_rate": 0.00019066586343772084, "loss": 7.5972, "step": 119670 }, { "epoch": 14.401925391095066, "grad_norm": 17592.52734375, "learning_rate": 0.00019066425848363088, "loss": 7.5208, "step": 119680 }, { "epoch": 14.403128760529482, "grad_norm": 16205.251953125, "learning_rate": 0.00019066265339832733, "loss": 7.5903, "step": 119690 }, { "epoch": 14.4043321299639, "grad_norm": 20633.93359375, "learning_rate": 0.00019066104818181255, "loss": 7.5114, "step": 119700 }, { "epoch": 14.405535499398315, "grad_norm": 38214.70703125, "learning_rate": 0.00019065944283408885, "loss": 7.5092, "step": 119710 }, { "epoch": 14.406738868832731, "grad_norm": 8815.671875, "learning_rate": 0.00019065783735515857, "loss": 7.4565, "step": 119720 }, { "epoch": 14.407942238267148, "grad_norm": 8079.5439453125, "learning_rate": 0.000190656231745024, "loss": 7.553, "step": 119730 }, { "epoch": 14.409145607701564, "grad_norm": 9402.7861328125, "learning_rate": 0.00019065462600368748, "loss": 7.5981, "step": 119740 }, { "epoch": 14.41034897713598, "grad_norm": 26985.255859375, "learning_rate": 0.00019065302013115135, "loss": 7.5289, "step": 119750 }, { "epoch": 14.411552346570398, "grad_norm": 15124.28125, "learning_rate": 0.00019065141412741787, "loss": 7.5674, "step": 119760 }, { "epoch": 14.412755716004813, "grad_norm": 17110.228515625, "learning_rate": 0.00019064980799248947, "loss": 7.5469, "step": 119770 }, { "epoch": 14.41395908543923, "grad_norm": 9252.294921875, "learning_rate": 0.00019064820172636838, "loss": 7.6501, "step": 119780 }, { "epoch": 14.415162454873647, "grad_norm": 11879.595703125, "learning_rate": 0.00019064659532905698, "loss": 7.5487, "step": 119790 }, { "epoch": 14.416365824308063, "grad_norm": 20099.056640625, "learning_rate": 0.00019064498880055756, "loss": 7.5602, "step": 119800 }, { "epoch": 14.417569193742478, "grad_norm": 34142.984375, "learning_rate": 0.0001906433821408725, "loss": 7.6255, "step": 119810 }, { "epoch": 14.418772563176896, "grad_norm": 6366.70751953125, "learning_rate": 0.00019064177535000406, "loss": 7.5919, "step": 119820 }, { "epoch": 14.419975932611312, "grad_norm": 25508.58984375, "learning_rate": 0.00019064016842795458, "loss": 7.6212, "step": 119830 }, { "epoch": 14.421179302045728, "grad_norm": 6327.87158203125, "learning_rate": 0.0001906385613747264, "loss": 7.5441, "step": 119840 }, { "epoch": 14.422382671480145, "grad_norm": 7255.55322265625, "learning_rate": 0.00019063695419032186, "loss": 7.5433, "step": 119850 }, { "epoch": 14.42358604091456, "grad_norm": 5273.5888671875, "learning_rate": 0.00019063534687474326, "loss": 7.5321, "step": 119860 }, { "epoch": 14.424789410348977, "grad_norm": 2021.651123046875, "learning_rate": 0.00019063373942799293, "loss": 7.5048, "step": 119870 }, { "epoch": 14.425992779783394, "grad_norm": 1237.4359130859375, "learning_rate": 0.00019063213185007322, "loss": 7.622, "step": 119880 }, { "epoch": 14.42719614921781, "grad_norm": 2026.2740478515625, "learning_rate": 0.00019063052414098643, "loss": 7.5603, "step": 119890 }, { "epoch": 14.428399518652226, "grad_norm": 4687.3798828125, "learning_rate": 0.00019062891630073492, "loss": 7.4505, "step": 119900 }, { "epoch": 14.429602888086643, "grad_norm": 3074.654052734375, "learning_rate": 0.00019062730832932097, "loss": 7.5527, "step": 119910 }, { "epoch": 14.43080625752106, "grad_norm": 1889.7685546875, "learning_rate": 0.0001906257002267469, "loss": 7.566, "step": 119920 }, { "epoch": 14.432009626955475, "grad_norm": 3460.92236328125, "learning_rate": 0.00019062409199301512, "loss": 7.5607, "step": 119930 }, { "epoch": 14.433212996389893, "grad_norm": 3774.3955078125, "learning_rate": 0.00019062248362812788, "loss": 7.5595, "step": 119940 }, { "epoch": 14.434416365824308, "grad_norm": 3955.254150390625, "learning_rate": 0.00019062087513208753, "loss": 7.4998, "step": 119950 }, { "epoch": 14.435619735258724, "grad_norm": 65530.7109375, "learning_rate": 0.0001906192665048964, "loss": 7.5635, "step": 119960 }, { "epoch": 14.43682310469314, "grad_norm": 2011.41845703125, "learning_rate": 0.0001906176577465568, "loss": 7.5687, "step": 119970 }, { "epoch": 14.438026474127557, "grad_norm": 3497.224853515625, "learning_rate": 0.00019061604885707115, "loss": 7.568, "step": 119980 }, { "epoch": 14.439229843561973, "grad_norm": 2236.07373046875, "learning_rate": 0.00019061443983644163, "loss": 7.6512, "step": 119990 }, { "epoch": 14.440433212996389, "grad_norm": 3275.374755859375, "learning_rate": 0.00019061283068467065, "loss": 7.5944, "step": 120000 }, { "epoch": 14.441636582430807, "grad_norm": 2291.748779296875, "learning_rate": 0.00019061122140176057, "loss": 7.5395, "step": 120010 }, { "epoch": 14.442839951865222, "grad_norm": 3766.966064453125, "learning_rate": 0.00019060961198771366, "loss": 7.5335, "step": 120020 }, { "epoch": 14.444043321299638, "grad_norm": 1537.174560546875, "learning_rate": 0.00019060800244253225, "loss": 7.6213, "step": 120030 }, { "epoch": 14.445246690734056, "grad_norm": 1786.9052734375, "learning_rate": 0.00019060639276621873, "loss": 7.5077, "step": 120040 }, { "epoch": 14.446450060168472, "grad_norm": 4729.9560546875, "learning_rate": 0.00019060478295877538, "loss": 7.467, "step": 120050 }, { "epoch": 14.447653429602887, "grad_norm": 70892.3671875, "learning_rate": 0.00019060317302020452, "loss": 7.5656, "step": 120060 }, { "epoch": 14.448856799037305, "grad_norm": 11207.615234375, "learning_rate": 0.00019060156295050852, "loss": 7.509, "step": 120070 }, { "epoch": 14.45006016847172, "grad_norm": 25483.3671875, "learning_rate": 0.00019059995274968964, "loss": 7.4925, "step": 120080 }, { "epoch": 14.451263537906136, "grad_norm": 9200.310546875, "learning_rate": 0.00019059834241775032, "loss": 7.5617, "step": 120090 }, { "epoch": 14.452466907340554, "grad_norm": 5665.3662109375, "learning_rate": 0.0001905967319546928, "loss": 7.5211, "step": 120100 }, { "epoch": 14.45367027677497, "grad_norm": 5527.3173828125, "learning_rate": 0.00019059512136051945, "loss": 7.5932, "step": 120110 }, { "epoch": 14.454873646209386, "grad_norm": 11837.158203125, "learning_rate": 0.0001905935106352326, "loss": 7.6151, "step": 120120 }, { "epoch": 14.456077015643803, "grad_norm": 20097.900390625, "learning_rate": 0.00019059189977883455, "loss": 7.5961, "step": 120130 }, { "epoch": 14.457280385078219, "grad_norm": 10696.216796875, "learning_rate": 0.0001905902887913277, "loss": 7.5447, "step": 120140 }, { "epoch": 14.458483754512635, "grad_norm": 13723.712890625, "learning_rate": 0.00019058867767271428, "loss": 7.5686, "step": 120150 }, { "epoch": 14.459687123947052, "grad_norm": 26544.7734375, "learning_rate": 0.00019058706642299672, "loss": 7.5666, "step": 120160 }, { "epoch": 14.460890493381468, "grad_norm": 11483.302734375, "learning_rate": 0.00019058545504217728, "loss": 7.6048, "step": 120170 }, { "epoch": 14.462093862815884, "grad_norm": 24804.689453125, "learning_rate": 0.00019058384353025837, "loss": 7.6328, "step": 120180 }, { "epoch": 14.463297232250302, "grad_norm": 11350.1083984375, "learning_rate": 0.00019058223188724222, "loss": 7.5416, "step": 120190 }, { "epoch": 14.464500601684717, "grad_norm": 7787.87548828125, "learning_rate": 0.00019058062011313124, "loss": 7.5303, "step": 120200 }, { "epoch": 14.465703971119133, "grad_norm": 129329.0703125, "learning_rate": 0.00019057900820792774, "loss": 7.5924, "step": 120210 }, { "epoch": 14.46690734055355, "grad_norm": 23617.984375, "learning_rate": 0.00019057739617163403, "loss": 7.5598, "step": 120220 }, { "epoch": 14.468110709987966, "grad_norm": 13259.3095703125, "learning_rate": 0.00019057578400425248, "loss": 7.54, "step": 120230 }, { "epoch": 14.469314079422382, "grad_norm": 8982.37109375, "learning_rate": 0.0001905741717057854, "loss": 7.4943, "step": 120240 }, { "epoch": 14.4705174488568, "grad_norm": 9860.9189453125, "learning_rate": 0.00019057255927623515, "loss": 7.593, "step": 120250 }, { "epoch": 14.471720818291216, "grad_norm": 33967.8984375, "learning_rate": 0.00019057094671560404, "loss": 7.5468, "step": 120260 }, { "epoch": 14.472924187725631, "grad_norm": 9480.3759765625, "learning_rate": 0.0001905693340238944, "loss": 7.6913, "step": 120270 }, { "epoch": 14.474127557160049, "grad_norm": 11778.146484375, "learning_rate": 0.0001905677212011086, "loss": 7.5879, "step": 120280 }, { "epoch": 14.475330926594465, "grad_norm": 10220.3251953125, "learning_rate": 0.00019056610824724892, "loss": 7.6608, "step": 120290 }, { "epoch": 14.47653429602888, "grad_norm": 6985.7392578125, "learning_rate": 0.00019056449516231772, "loss": 7.5828, "step": 120300 }, { "epoch": 14.477737665463298, "grad_norm": 29242.396484375, "learning_rate": 0.00019056288194631735, "loss": 7.564, "step": 120310 }, { "epoch": 14.478941034897714, "grad_norm": 8868.1533203125, "learning_rate": 0.0001905612685992501, "loss": 7.5048, "step": 120320 }, { "epoch": 14.48014440433213, "grad_norm": 5269.60400390625, "learning_rate": 0.0001905596551211184, "loss": 7.5593, "step": 120330 }, { "epoch": 14.481347773766545, "grad_norm": 37762.30859375, "learning_rate": 0.00019055804151192447, "loss": 7.4889, "step": 120340 }, { "epoch": 14.482551143200963, "grad_norm": 41898.69921875, "learning_rate": 0.00019055642777167072, "loss": 7.5272, "step": 120350 }, { "epoch": 14.483754512635379, "grad_norm": 15239.4765625, "learning_rate": 0.00019055481390035944, "loss": 7.6083, "step": 120360 }, { "epoch": 14.484957882069795, "grad_norm": 6722.9072265625, "learning_rate": 0.00019055319989799298, "loss": 7.5189, "step": 120370 }, { "epoch": 14.486161251504212, "grad_norm": 4932.267578125, "learning_rate": 0.0001905515857645737, "loss": 7.5753, "step": 120380 }, { "epoch": 14.487364620938628, "grad_norm": 5543.306640625, "learning_rate": 0.00019054997150010392, "loss": 7.5521, "step": 120390 }, { "epoch": 14.488567990373044, "grad_norm": 24199.45703125, "learning_rate": 0.000190548357104586, "loss": 7.5241, "step": 120400 }, { "epoch": 14.489771359807461, "grad_norm": 145155.375, "learning_rate": 0.0001905467425780222, "loss": 7.6052, "step": 120410 }, { "epoch": 14.490974729241877, "grad_norm": 35324.0234375, "learning_rate": 0.00019054512792041493, "loss": 7.4526, "step": 120420 }, { "epoch": 14.492178098676293, "grad_norm": 17105.146484375, "learning_rate": 0.0001905435131317665, "loss": 7.5068, "step": 120430 }, { "epoch": 14.49338146811071, "grad_norm": 12936.751953125, "learning_rate": 0.00019054189821207925, "loss": 7.5293, "step": 120440 }, { "epoch": 14.494584837545126, "grad_norm": 4785.9609375, "learning_rate": 0.00019054028316135556, "loss": 7.5097, "step": 120450 }, { "epoch": 14.495788206979542, "grad_norm": 2668.955810546875, "learning_rate": 0.00019053866797959766, "loss": 7.4817, "step": 120460 }, { "epoch": 14.49699157641396, "grad_norm": 4131.68310546875, "learning_rate": 0.000190537052666808, "loss": 7.5655, "step": 120470 }, { "epoch": 14.498194945848375, "grad_norm": 915.166015625, "learning_rate": 0.00019053543722298886, "loss": 7.4781, "step": 120480 }, { "epoch": 14.499398315282791, "grad_norm": 3766.930908203125, "learning_rate": 0.00019053382164814256, "loss": 7.6013, "step": 120490 }, { "epoch": 14.500601684717209, "grad_norm": 598.1075439453125, "learning_rate": 0.00019053220594227152, "loss": 7.4729, "step": 120500 }, { "epoch": 14.501805054151625, "grad_norm": 94520.7109375, "learning_rate": 0.00019053059010537798, "loss": 7.6828, "step": 120510 }, { "epoch": 14.50300842358604, "grad_norm": 2203.223876953125, "learning_rate": 0.00019052897413746438, "loss": 7.5277, "step": 120520 }, { "epoch": 14.504211793020458, "grad_norm": 4880.93896484375, "learning_rate": 0.00019052735803853297, "loss": 7.7061, "step": 120530 }, { "epoch": 14.505415162454874, "grad_norm": 7914.91259765625, "learning_rate": 0.00019052574180858614, "loss": 7.617, "step": 120540 }, { "epoch": 14.50661853188929, "grad_norm": 3421.029541015625, "learning_rate": 0.00019052412544762616, "loss": 7.5142, "step": 120550 }, { "epoch": 14.507821901323707, "grad_norm": 12922.5166015625, "learning_rate": 0.0001905225089556555, "loss": 7.447, "step": 120560 }, { "epoch": 14.509025270758123, "grad_norm": 1617.4580078125, "learning_rate": 0.00019052089233267635, "loss": 7.5523, "step": 120570 }, { "epoch": 14.510228640192539, "grad_norm": 8773.5224609375, "learning_rate": 0.00019051927557869113, "loss": 7.5148, "step": 120580 }, { "epoch": 14.511432009626956, "grad_norm": 5880.3486328125, "learning_rate": 0.00019051765869370222, "loss": 7.5477, "step": 120590 }, { "epoch": 14.512635379061372, "grad_norm": 2873.053466796875, "learning_rate": 0.00019051604167771188, "loss": 7.549, "step": 120600 }, { "epoch": 14.513838748495788, "grad_norm": 22872.376953125, "learning_rate": 0.00019051442453072245, "loss": 7.4861, "step": 120610 }, { "epoch": 14.515042117930205, "grad_norm": 4340.60498046875, "learning_rate": 0.00019051280725273633, "loss": 7.5089, "step": 120620 }, { "epoch": 14.516245487364621, "grad_norm": 3579.42626953125, "learning_rate": 0.00019051118984375585, "loss": 7.5295, "step": 120630 }, { "epoch": 14.517448856799037, "grad_norm": 4048.6767578125, "learning_rate": 0.0001905095723037833, "loss": 7.4164, "step": 120640 }, { "epoch": 14.518652226233453, "grad_norm": 4062.582275390625, "learning_rate": 0.00019050795463282106, "loss": 7.4519, "step": 120650 }, { "epoch": 14.51985559566787, "grad_norm": 3027.69189453125, "learning_rate": 0.00019050633683087147, "loss": 7.5603, "step": 120660 }, { "epoch": 14.521058965102286, "grad_norm": 9249.0751953125, "learning_rate": 0.00019050471889793684, "loss": 7.5184, "step": 120670 }, { "epoch": 14.522262334536702, "grad_norm": 7461.44189453125, "learning_rate": 0.0001905031008340196, "loss": 7.5324, "step": 120680 }, { "epoch": 14.52346570397112, "grad_norm": 4505.70166015625, "learning_rate": 0.00019050148263912196, "loss": 7.5441, "step": 120690 }, { "epoch": 14.524669073405535, "grad_norm": 1619.5029296875, "learning_rate": 0.00019049986431324636, "loss": 7.5592, "step": 120700 }, { "epoch": 14.525872442839951, "grad_norm": 4418.81591796875, "learning_rate": 0.0001904982458563951, "loss": 7.5768, "step": 120710 }, { "epoch": 14.527075812274369, "grad_norm": 2280.780029296875, "learning_rate": 0.00019049662726857057, "loss": 7.4688, "step": 120720 }, { "epoch": 14.528279181708784, "grad_norm": 1257.7911376953125, "learning_rate": 0.00019049500854977504, "loss": 7.4916, "step": 120730 }, { "epoch": 14.5294825511432, "grad_norm": 8332.865234375, "learning_rate": 0.00019049338970001092, "loss": 7.524, "step": 120740 }, { "epoch": 14.530685920577618, "grad_norm": 7059.63037109375, "learning_rate": 0.0001904917707192805, "loss": 7.5303, "step": 120750 }, { "epoch": 14.531889290012034, "grad_norm": 1096.492919921875, "learning_rate": 0.00019049015160758613, "loss": 7.4917, "step": 120760 }, { "epoch": 14.53309265944645, "grad_norm": 869.6895141601562, "learning_rate": 0.0001904885323649302, "loss": 7.5133, "step": 120770 }, { "epoch": 14.534296028880867, "grad_norm": 2343.33544921875, "learning_rate": 0.00019048691299131503, "loss": 7.6167, "step": 120780 }, { "epoch": 14.535499398315283, "grad_norm": 3463.762939453125, "learning_rate": 0.00019048529348674294, "loss": 7.6773, "step": 120790 }, { "epoch": 14.536702767749698, "grad_norm": 1520.4482421875, "learning_rate": 0.00019048367385121628, "loss": 7.7014, "step": 120800 }, { "epoch": 14.537906137184116, "grad_norm": 1333.6510009765625, "learning_rate": 0.0001904820540847374, "loss": 7.6907, "step": 120810 }, { "epoch": 14.539109506618532, "grad_norm": 671.68505859375, "learning_rate": 0.00019048043418730867, "loss": 7.5611, "step": 120820 }, { "epoch": 14.540312876052948, "grad_norm": 1106.837158203125, "learning_rate": 0.00019047881415893244, "loss": 7.5656, "step": 120830 }, { "epoch": 14.541516245487365, "grad_norm": 3465.642822265625, "learning_rate": 0.00019047719399961096, "loss": 7.6345, "step": 120840 }, { "epoch": 14.542719614921781, "grad_norm": 710.587890625, "learning_rate": 0.0001904755737093467, "loss": 7.5027, "step": 120850 }, { "epoch": 14.543922984356197, "grad_norm": 510.3826904296875, "learning_rate": 0.0001904739532881419, "loss": 7.8557, "step": 120860 }, { "epoch": 14.545126353790614, "grad_norm": 1216.6356201171875, "learning_rate": 0.00019047233273599898, "loss": 7.6288, "step": 120870 }, { "epoch": 14.54632972322503, "grad_norm": 320.2340393066406, "learning_rate": 0.00019047071205292025, "loss": 7.5113, "step": 120880 }, { "epoch": 14.547533092659446, "grad_norm": 1047.402099609375, "learning_rate": 0.00019046909123890808, "loss": 7.5235, "step": 120890 }, { "epoch": 14.548736462093864, "grad_norm": 507.4156188964844, "learning_rate": 0.0001904674702939648, "loss": 7.5328, "step": 120900 }, { "epoch": 14.54993983152828, "grad_norm": 3783.83837890625, "learning_rate": 0.00019046584921809272, "loss": 7.5166, "step": 120910 }, { "epoch": 14.551143200962695, "grad_norm": 516.8233642578125, "learning_rate": 0.00019046422801129425, "loss": 7.5846, "step": 120920 }, { "epoch": 14.552346570397113, "grad_norm": 513927.84375, "learning_rate": 0.0001904626066735717, "loss": 7.5438, "step": 120930 }, { "epoch": 14.553549939831528, "grad_norm": 1209770.0, "learning_rate": 0.00019046098520492742, "loss": 7.6522, "step": 120940 }, { "epoch": 14.554753309265944, "grad_norm": 1310284.0, "learning_rate": 0.00019045936360536375, "loss": 7.5744, "step": 120950 }, { "epoch": 14.555956678700362, "grad_norm": 5488427.5, "learning_rate": 0.00019045774187488305, "loss": 7.5469, "step": 120960 }, { "epoch": 14.557160048134778, "grad_norm": 2373651.0, "learning_rate": 0.00019045612001348768, "loss": 7.4876, "step": 120970 }, { "epoch": 14.558363417569193, "grad_norm": 467700.625, "learning_rate": 0.00019045449802118, "loss": 7.5919, "step": 120980 }, { "epoch": 14.559566787003611, "grad_norm": 779538.6875, "learning_rate": 0.00019045287589796226, "loss": 7.5552, "step": 120990 }, { "epoch": 14.560770156438027, "grad_norm": 2413643.5, "learning_rate": 0.0001904512536438369, "loss": 7.3939, "step": 121000 }, { "epoch": 14.561973525872443, "grad_norm": 2278255.5, "learning_rate": 0.00019044963125880627, "loss": 7.5379, "step": 121010 }, { "epoch": 14.56317689530686, "grad_norm": 639653.75, "learning_rate": 0.00019044800874287268, "loss": 7.6289, "step": 121020 }, { "epoch": 14.564380264741276, "grad_norm": 168440.578125, "learning_rate": 0.00019044638609603845, "loss": 7.5448, "step": 121030 }, { "epoch": 14.565583634175692, "grad_norm": 222035.640625, "learning_rate": 0.00019044476331830606, "loss": 7.6814, "step": 121040 }, { "epoch": 14.566787003610107, "grad_norm": 582913.375, "learning_rate": 0.00019044314040967767, "loss": 7.5201, "step": 121050 }, { "epoch": 14.567990373044525, "grad_norm": 35121.67578125, "learning_rate": 0.0001904415173701558, "loss": 7.5092, "step": 121060 }, { "epoch": 14.56919374247894, "grad_norm": 107542.0390625, "learning_rate": 0.0001904398941997427, "loss": 7.5003, "step": 121070 }, { "epoch": 14.570397111913357, "grad_norm": 314492.96875, "learning_rate": 0.00019043827089844072, "loss": 7.5012, "step": 121080 }, { "epoch": 14.571600481347774, "grad_norm": 84995.375, "learning_rate": 0.00019043664746625226, "loss": 7.6043, "step": 121090 }, { "epoch": 14.57280385078219, "grad_norm": 100890.8125, "learning_rate": 0.00019043502390317962, "loss": 7.5499, "step": 121100 }, { "epoch": 14.574007220216606, "grad_norm": 246601.78125, "learning_rate": 0.0001904334002092252, "loss": 7.5159, "step": 121110 }, { "epoch": 14.575210589651023, "grad_norm": 45118.39453125, "learning_rate": 0.0001904317763843913, "loss": 7.5484, "step": 121120 }, { "epoch": 14.57641395908544, "grad_norm": 303728.75, "learning_rate": 0.00019043015242868033, "loss": 7.6251, "step": 121130 }, { "epoch": 14.577617328519855, "grad_norm": 88892.3359375, "learning_rate": 0.00019042852834209458, "loss": 7.5392, "step": 121140 }, { "epoch": 14.578820697954272, "grad_norm": 35062.8203125, "learning_rate": 0.0001904269041246364, "loss": 7.5078, "step": 121150 }, { "epoch": 14.580024067388688, "grad_norm": 78435.4609375, "learning_rate": 0.0001904252797763082, "loss": 7.4399, "step": 121160 }, { "epoch": 14.581227436823104, "grad_norm": 203744.515625, "learning_rate": 0.00019042365529711228, "loss": 7.6061, "step": 121170 }, { "epoch": 14.582430806257522, "grad_norm": 49817.32421875, "learning_rate": 0.000190422030687051, "loss": 7.5163, "step": 121180 }, { "epoch": 14.583634175691937, "grad_norm": 73829.75, "learning_rate": 0.0001904204059461267, "loss": 7.5219, "step": 121190 }, { "epoch": 14.584837545126353, "grad_norm": 46404.96875, "learning_rate": 0.00019041878107434177, "loss": 7.6124, "step": 121200 }, { "epoch": 14.58604091456077, "grad_norm": 110398.5234375, "learning_rate": 0.00019041715607169858, "loss": 7.5484, "step": 121210 }, { "epoch": 14.587244283995187, "grad_norm": 339699.03125, "learning_rate": 0.00019041553093819937, "loss": 7.5913, "step": 121220 }, { "epoch": 14.588447653429602, "grad_norm": 32041.23046875, "learning_rate": 0.00019041390567384658, "loss": 7.6101, "step": 121230 }, { "epoch": 14.58965102286402, "grad_norm": 432425.84375, "learning_rate": 0.0001904122802786426, "loss": 7.5679, "step": 121240 }, { "epoch": 14.590854392298436, "grad_norm": 2484734.25, "learning_rate": 0.00019041065475258967, "loss": 7.48, "step": 121250 }, { "epoch": 14.592057761732852, "grad_norm": 3148994.25, "learning_rate": 0.00019040902909569024, "loss": 7.5393, "step": 121260 }, { "epoch": 14.593261131167269, "grad_norm": 5117872.0, "learning_rate": 0.0001904074033079466, "loss": 7.7275, "step": 121270 }, { "epoch": 14.594464500601685, "grad_norm": 1033.40185546875, "learning_rate": 0.00019040577738936113, "loss": 7.7333, "step": 121280 }, { "epoch": 14.5956678700361, "grad_norm": 1416.0164794921875, "learning_rate": 0.00019040415133993616, "loss": 7.639, "step": 121290 }, { "epoch": 14.596871239470518, "grad_norm": 1054.4320068359375, "learning_rate": 0.0001904025251596741, "loss": 7.6402, "step": 121300 }, { "epoch": 14.598074608904934, "grad_norm": 2897.607421875, "learning_rate": 0.00019040089884857727, "loss": 7.6028, "step": 121310 }, { "epoch": 14.59927797833935, "grad_norm": 1400.635498046875, "learning_rate": 0.000190399272406648, "loss": 7.6035, "step": 121320 }, { "epoch": 14.600481347773766, "grad_norm": 11341.0, "learning_rate": 0.00019039764583388865, "loss": 7.5357, "step": 121330 }, { "epoch": 14.601684717208183, "grad_norm": 1359.5899658203125, "learning_rate": 0.00019039601913030158, "loss": 7.5346, "step": 121340 }, { "epoch": 14.602888086642599, "grad_norm": 5838.73681640625, "learning_rate": 0.0001903943922958892, "loss": 7.5998, "step": 121350 }, { "epoch": 14.604091456077015, "grad_norm": 8564.9609375, "learning_rate": 0.00019039276533065378, "loss": 7.5731, "step": 121360 }, { "epoch": 14.605294825511432, "grad_norm": 9540.6455078125, "learning_rate": 0.00019039113823459772, "loss": 7.5266, "step": 121370 }, { "epoch": 14.606498194945848, "grad_norm": 20517.2109375, "learning_rate": 0.00019038951100772337, "loss": 7.6627, "step": 121380 }, { "epoch": 14.607701564380264, "grad_norm": 4013.63671875, "learning_rate": 0.0001903878836500331, "loss": 7.4729, "step": 121390 }, { "epoch": 14.608904933814681, "grad_norm": 11828.546875, "learning_rate": 0.00019038625616152923, "loss": 7.5315, "step": 121400 }, { "epoch": 14.610108303249097, "grad_norm": 10380.02734375, "learning_rate": 0.0001903846285422141, "loss": 7.5812, "step": 121410 }, { "epoch": 14.611311672683513, "grad_norm": 9079.84375, "learning_rate": 0.00019038300079209016, "loss": 7.629, "step": 121420 }, { "epoch": 14.61251504211793, "grad_norm": 25292.38671875, "learning_rate": 0.00019038137291115965, "loss": 7.6044, "step": 121430 }, { "epoch": 14.613718411552346, "grad_norm": 15534.5, "learning_rate": 0.00019037974489942498, "loss": 7.5668, "step": 121440 }, { "epoch": 14.614921780986762, "grad_norm": 364.77783203125, "learning_rate": 0.00019037811675688855, "loss": 7.5552, "step": 121450 }, { "epoch": 14.61612515042118, "grad_norm": 91.00682830810547, "learning_rate": 0.00019037648848355262, "loss": 8.0238, "step": 121460 }, { "epoch": 14.617328519855596, "grad_norm": 171.2364501953125, "learning_rate": 0.00019037486007941965, "loss": 8.359, "step": 121470 }, { "epoch": 14.618531889290011, "grad_norm": 85.66725158691406, "learning_rate": 0.0001903732315444919, "loss": 7.9111, "step": 121480 }, { "epoch": 14.619735258724429, "grad_norm": 252.13705444335938, "learning_rate": 0.00019037160287877178, "loss": 7.7282, "step": 121490 }, { "epoch": 14.620938628158845, "grad_norm": 53.998146057128906, "learning_rate": 0.00019036997408226168, "loss": 7.8245, "step": 121500 }, { "epoch": 14.62214199759326, "grad_norm": 43.97968292236328, "learning_rate": 0.00019036834515496386, "loss": 7.6265, "step": 121510 }, { "epoch": 14.623345367027678, "grad_norm": 280.2363586425781, "learning_rate": 0.00019036671609688074, "loss": 7.7256, "step": 121520 }, { "epoch": 14.624548736462094, "grad_norm": 77.4672622680664, "learning_rate": 0.00019036508690801468, "loss": 7.6647, "step": 121530 }, { "epoch": 14.62575210589651, "grad_norm": 126.6371078491211, "learning_rate": 0.00019036345758836806, "loss": 7.6212, "step": 121540 }, { "epoch": 14.626955475330927, "grad_norm": 327.92376708984375, "learning_rate": 0.00019036182813794318, "loss": 7.5839, "step": 121550 }, { "epoch": 14.628158844765343, "grad_norm": 93.58558654785156, "learning_rate": 0.00019036019855674243, "loss": 7.5643, "step": 121560 }, { "epoch": 14.629362214199759, "grad_norm": 1769.89013671875, "learning_rate": 0.00019035856884476817, "loss": 7.5869, "step": 121570 }, { "epoch": 14.630565583634176, "grad_norm": 18687.419921875, "learning_rate": 0.00019035693900202274, "loss": 7.535, "step": 121580 }, { "epoch": 14.631768953068592, "grad_norm": 13755.064453125, "learning_rate": 0.0001903553090285085, "loss": 7.5482, "step": 121590 }, { "epoch": 14.632972322503008, "grad_norm": 6644.853515625, "learning_rate": 0.00019035367892422783, "loss": 7.433, "step": 121600 }, { "epoch": 14.634175691937426, "grad_norm": 6483.65673828125, "learning_rate": 0.00019035204868918308, "loss": 7.5786, "step": 121610 }, { "epoch": 14.635379061371841, "grad_norm": 18597.09375, "learning_rate": 0.00019035041832337664, "loss": 7.5576, "step": 121620 }, { "epoch": 14.636582430806257, "grad_norm": 19700.56640625, "learning_rate": 0.00019034878782681077, "loss": 7.5908, "step": 121630 }, { "epoch": 14.637785800240675, "grad_norm": 5978.9208984375, "learning_rate": 0.00019034715719948796, "loss": 7.5647, "step": 121640 }, { "epoch": 14.63898916967509, "grad_norm": 15456.568359375, "learning_rate": 0.00019034552644141048, "loss": 7.513, "step": 121650 }, { "epoch": 14.640192539109506, "grad_norm": 12615.375, "learning_rate": 0.00019034389555258073, "loss": 7.5143, "step": 121660 }, { "epoch": 14.641395908543924, "grad_norm": 14405.9013671875, "learning_rate": 0.00019034226453300103, "loss": 7.5429, "step": 121670 }, { "epoch": 14.64259927797834, "grad_norm": 11867.283203125, "learning_rate": 0.0001903406333826738, "loss": 7.5883, "step": 121680 }, { "epoch": 14.643802647412755, "grad_norm": 23185.244140625, "learning_rate": 0.00019033900210160137, "loss": 7.5519, "step": 121690 }, { "epoch": 14.645006016847173, "grad_norm": 7817.38134765625, "learning_rate": 0.00019033737068978608, "loss": 7.6064, "step": 121700 }, { "epoch": 14.646209386281589, "grad_norm": 8757.47265625, "learning_rate": 0.00019033573914723032, "loss": 7.4861, "step": 121710 }, { "epoch": 14.647412755716005, "grad_norm": 30533.787109375, "learning_rate": 0.00019033410747393642, "loss": 7.579, "step": 121720 }, { "epoch": 14.648616125150422, "grad_norm": 12339.4365234375, "learning_rate": 0.0001903324756699068, "loss": 7.5106, "step": 121730 }, { "epoch": 14.649819494584838, "grad_norm": 11227.1005859375, "learning_rate": 0.0001903308437351438, "loss": 7.6391, "step": 121740 }, { "epoch": 14.651022864019254, "grad_norm": 10967.966796875, "learning_rate": 0.0001903292116696497, "loss": 7.582, "step": 121750 }, { "epoch": 14.65222623345367, "grad_norm": 16365.79296875, "learning_rate": 0.000190327579473427, "loss": 7.5544, "step": 121760 }, { "epoch": 14.653429602888087, "grad_norm": 26277.84765625, "learning_rate": 0.00019032594714647792, "loss": 7.6138, "step": 121770 }, { "epoch": 14.654632972322503, "grad_norm": 10200.408203125, "learning_rate": 0.00019032431468880495, "loss": 7.5447, "step": 121780 }, { "epoch": 14.655836341756919, "grad_norm": 29018.0078125, "learning_rate": 0.00019032268210041035, "loss": 7.4466, "step": 121790 }, { "epoch": 14.657039711191336, "grad_norm": 9675.4892578125, "learning_rate": 0.00019032104938129657, "loss": 7.5781, "step": 121800 }, { "epoch": 14.658243080625752, "grad_norm": 13699.0322265625, "learning_rate": 0.0001903194165314659, "loss": 7.6039, "step": 121810 }, { "epoch": 14.659446450060168, "grad_norm": 23537.6328125, "learning_rate": 0.00019031778355092078, "loss": 7.5662, "step": 121820 }, { "epoch": 14.660649819494585, "grad_norm": 5751.908203125, "learning_rate": 0.00019031615043966352, "loss": 7.6278, "step": 121830 }, { "epoch": 14.661853188929001, "grad_norm": 30593.689453125, "learning_rate": 0.00019031451719769648, "loss": 7.5632, "step": 121840 }, { "epoch": 14.663056558363417, "grad_norm": 6277.75244140625, "learning_rate": 0.000190312883825022, "loss": 7.5859, "step": 121850 }, { "epoch": 14.664259927797834, "grad_norm": 18195.185546875, "learning_rate": 0.0001903112503216425, "loss": 7.5196, "step": 121860 }, { "epoch": 14.66546329723225, "grad_norm": 45156.15625, "learning_rate": 0.00019030961668756034, "loss": 7.553, "step": 121870 }, { "epoch": 14.666666666666666, "grad_norm": 11432.529296875, "learning_rate": 0.00019030798292277788, "loss": 7.5454, "step": 121880 }, { "epoch": 14.667870036101084, "grad_norm": 10046.5576171875, "learning_rate": 0.00019030634902729744, "loss": 7.6115, "step": 121890 }, { "epoch": 14.6690734055355, "grad_norm": 26655.6875, "learning_rate": 0.00019030471500112143, "loss": 7.5587, "step": 121900 }, { "epoch": 14.670276774969915, "grad_norm": 14060.3193359375, "learning_rate": 0.0001903030808442522, "loss": 7.6529, "step": 121910 }, { "epoch": 14.671480144404333, "grad_norm": 2856.0517578125, "learning_rate": 0.00019030144655669213, "loss": 7.5603, "step": 121920 }, { "epoch": 14.672683513838749, "grad_norm": 8363.81640625, "learning_rate": 0.00019029981213844357, "loss": 7.4968, "step": 121930 }, { "epoch": 14.673886883273164, "grad_norm": 1043.9971923828125, "learning_rate": 0.00019029817758950887, "loss": 7.6176, "step": 121940 }, { "epoch": 14.675090252707582, "grad_norm": 824.0234375, "learning_rate": 0.00019029654290989042, "loss": 7.4648, "step": 121950 }, { "epoch": 14.676293622141998, "grad_norm": 12782.1513671875, "learning_rate": 0.00019029490809959056, "loss": 7.5828, "step": 121960 }, { "epoch": 14.677496991576414, "grad_norm": 11996.8740234375, "learning_rate": 0.0001902932731586117, "loss": 7.5493, "step": 121970 }, { "epoch": 14.678700361010831, "grad_norm": 7799.84716796875, "learning_rate": 0.0001902916380869562, "loss": 7.586, "step": 121980 }, { "epoch": 14.679903730445247, "grad_norm": 15158.6318359375, "learning_rate": 0.00019029000288462637, "loss": 7.5302, "step": 121990 }, { "epoch": 14.681107099879663, "grad_norm": 11101.453125, "learning_rate": 0.00019028836755162463, "loss": 7.4953, "step": 122000 }, { "epoch": 14.68231046931408, "grad_norm": 11440.546875, "learning_rate": 0.00019028673208795334, "loss": 7.6015, "step": 122010 }, { "epoch": 14.683513838748496, "grad_norm": 9446.1123046875, "learning_rate": 0.00019028509649361483, "loss": 7.4875, "step": 122020 }, { "epoch": 14.684717208182912, "grad_norm": 4520.98828125, "learning_rate": 0.00019028346076861152, "loss": 7.5343, "step": 122030 }, { "epoch": 14.685920577617328, "grad_norm": 29015.2890625, "learning_rate": 0.00019028182491294575, "loss": 7.6106, "step": 122040 }, { "epoch": 14.687123947051745, "grad_norm": 5525.0478515625, "learning_rate": 0.00019028018892661987, "loss": 7.5342, "step": 122050 }, { "epoch": 14.688327316486161, "grad_norm": 6324.21142578125, "learning_rate": 0.00019027855280963628, "loss": 7.637, "step": 122060 }, { "epoch": 14.689530685920577, "grad_norm": 30115.294921875, "learning_rate": 0.00019027691656199735, "loss": 7.5702, "step": 122070 }, { "epoch": 14.690734055354994, "grad_norm": 9524.673828125, "learning_rate": 0.00019027528018370542, "loss": 7.596, "step": 122080 }, { "epoch": 14.69193742478941, "grad_norm": 12366.0908203125, "learning_rate": 0.00019027364367476285, "loss": 7.6289, "step": 122090 }, { "epoch": 14.693140794223826, "grad_norm": 21210.775390625, "learning_rate": 0.00019027200703517204, "loss": 7.5118, "step": 122100 }, { "epoch": 14.694344163658243, "grad_norm": 13548.4619140625, "learning_rate": 0.00019027037026493536, "loss": 7.6133, "step": 122110 }, { "epoch": 14.69554753309266, "grad_norm": 11168.794921875, "learning_rate": 0.00019026873336405517, "loss": 7.6082, "step": 122120 }, { "epoch": 14.696750902527075, "grad_norm": 50473.8046875, "learning_rate": 0.00019026709633253384, "loss": 7.4926, "step": 122130 }, { "epoch": 14.697954271961493, "grad_norm": 30738.1171875, "learning_rate": 0.00019026545917037372, "loss": 7.6124, "step": 122140 }, { "epoch": 14.699157641395908, "grad_norm": 8852.40234375, "learning_rate": 0.0001902638218775772, "loss": 7.4859, "step": 122150 }, { "epoch": 14.700361010830324, "grad_norm": 4939.70166015625, "learning_rate": 0.00019026218445414666, "loss": 7.4696, "step": 122160 }, { "epoch": 14.701564380264742, "grad_norm": 21111.3046875, "learning_rate": 0.00019026054690008445, "loss": 7.5586, "step": 122170 }, { "epoch": 14.702767749699158, "grad_norm": 17790.203125, "learning_rate": 0.00019025890921539292, "loss": 7.5835, "step": 122180 }, { "epoch": 14.703971119133573, "grad_norm": 10519.9755859375, "learning_rate": 0.00019025727140007447, "loss": 7.5517, "step": 122190 }, { "epoch": 14.705174488567991, "grad_norm": 14167.900390625, "learning_rate": 0.00019025563345413148, "loss": 7.5415, "step": 122200 }, { "epoch": 14.706377858002407, "grad_norm": 2617.821533203125, "learning_rate": 0.0001902539953775663, "loss": 7.5788, "step": 122210 }, { "epoch": 14.707581227436823, "grad_norm": 23949.509765625, "learning_rate": 0.0001902523571703813, "loss": 7.5168, "step": 122220 }, { "epoch": 14.70878459687124, "grad_norm": 35940.30859375, "learning_rate": 0.00019025071883257885, "loss": 7.5534, "step": 122230 }, { "epoch": 14.709987966305656, "grad_norm": 55285.6875, "learning_rate": 0.00019024908036416134, "loss": 7.5196, "step": 122240 }, { "epoch": 14.711191335740072, "grad_norm": 9608.3037109375, "learning_rate": 0.00019024744176513112, "loss": 7.5453, "step": 122250 }, { "epoch": 14.71239470517449, "grad_norm": 6294.59912109375, "learning_rate": 0.0001902458030354906, "loss": 7.504, "step": 122260 }, { "epoch": 14.713598074608905, "grad_norm": 7079.0673828125, "learning_rate": 0.00019024416417524206, "loss": 7.5439, "step": 122270 }, { "epoch": 14.71480144404332, "grad_norm": 4539.9521484375, "learning_rate": 0.000190242525184388, "loss": 7.5062, "step": 122280 }, { "epoch": 14.716004813477738, "grad_norm": 6838.11767578125, "learning_rate": 0.00019024088606293068, "loss": 7.4395, "step": 122290 }, { "epoch": 14.717208182912154, "grad_norm": 17486.287109375, "learning_rate": 0.00019023924681087255, "loss": 7.5497, "step": 122300 }, { "epoch": 14.71841155234657, "grad_norm": 7714.32958984375, "learning_rate": 0.00019023760742821594, "loss": 7.5659, "step": 122310 }, { "epoch": 14.719614921780988, "grad_norm": 19279.427734375, "learning_rate": 0.00019023596791496324, "loss": 7.502, "step": 122320 }, { "epoch": 14.720818291215403, "grad_norm": 4168.82861328125, "learning_rate": 0.00019023432827111681, "loss": 7.6056, "step": 122330 }, { "epoch": 14.722021660649819, "grad_norm": 11071.7626953125, "learning_rate": 0.00019023268849667903, "loss": 7.5612, "step": 122340 }, { "epoch": 14.723225030084237, "grad_norm": 4132.884765625, "learning_rate": 0.00019023104859165223, "loss": 7.4394, "step": 122350 }, { "epoch": 14.724428399518652, "grad_norm": 486.6014404296875, "learning_rate": 0.0001902294085560389, "loss": 7.6269, "step": 122360 }, { "epoch": 14.725631768953068, "grad_norm": 1091.424072265625, "learning_rate": 0.0001902277683898413, "loss": 7.5794, "step": 122370 }, { "epoch": 14.726835138387486, "grad_norm": 82.9935073852539, "learning_rate": 0.0001902261280930618, "loss": 7.523, "step": 122380 }, { "epoch": 14.728038507821902, "grad_norm": 547.2711791992188, "learning_rate": 0.0001902244876657029, "loss": 7.555, "step": 122390 }, { "epoch": 14.729241877256317, "grad_norm": 233.89939880371094, "learning_rate": 0.00019022284710776684, "loss": 7.5725, "step": 122400 }, { "epoch": 14.730445246690735, "grad_norm": 1547.3026123046875, "learning_rate": 0.00019022120641925606, "loss": 7.5564, "step": 122410 }, { "epoch": 14.73164861612515, "grad_norm": 264.3048400878906, "learning_rate": 0.0001902195656001729, "loss": 7.4457, "step": 122420 }, { "epoch": 14.732851985559567, "grad_norm": 8613589.0, "learning_rate": 0.00019021792465051976, "loss": 7.5391, "step": 122430 }, { "epoch": 14.734055354993982, "grad_norm": 515.7554321289062, "learning_rate": 0.00019021628357029902, "loss": 7.5001, "step": 122440 }, { "epoch": 14.7352587244284, "grad_norm": 5424.0078125, "learning_rate": 0.00019021464235951304, "loss": 7.4824, "step": 122450 }, { "epoch": 14.736462093862816, "grad_norm": 405.7532653808594, "learning_rate": 0.0001902130010181642, "loss": 7.5818, "step": 122460 }, { "epoch": 14.737665463297231, "grad_norm": 4180.47998046875, "learning_rate": 0.00019021135954625488, "loss": 7.6316, "step": 122470 }, { "epoch": 14.738868832731649, "grad_norm": 3046.92431640625, "learning_rate": 0.00019020971794378745, "loss": 7.5179, "step": 122480 }, { "epoch": 14.740072202166065, "grad_norm": 5158.8134765625, "learning_rate": 0.0001902080762107643, "loss": 7.5426, "step": 122490 }, { "epoch": 14.74127557160048, "grad_norm": 7532.86669921875, "learning_rate": 0.00019020643434718775, "loss": 7.5693, "step": 122500 }, { "epoch": 14.742478941034898, "grad_norm": 5827.73486328125, "learning_rate": 0.00019020479235306025, "loss": 7.6041, "step": 122510 }, { "epoch": 14.743682310469314, "grad_norm": 6403.52685546875, "learning_rate": 0.00019020315022838413, "loss": 7.56, "step": 122520 }, { "epoch": 14.74488567990373, "grad_norm": 6356.53955078125, "learning_rate": 0.00019020150797316177, "loss": 7.5203, "step": 122530 }, { "epoch": 14.746089049338147, "grad_norm": 155.31362915039062, "learning_rate": 0.0001901998655873956, "loss": 7.5035, "step": 122540 }, { "epoch": 14.747292418772563, "grad_norm": 28.775062561035156, "learning_rate": 0.00019019822307108793, "loss": 7.5288, "step": 122550 }, { "epoch": 14.748495788206979, "grad_norm": 49.77052307128906, "learning_rate": 0.00019019658042424117, "loss": 7.5085, "step": 122560 }, { "epoch": 14.749699157641396, "grad_norm": 58.89057922363281, "learning_rate": 0.0001901949376468577, "loss": 7.6573, "step": 122570 }, { "epoch": 14.750902527075812, "grad_norm": 37.69395446777344, "learning_rate": 0.00019019329473893987, "loss": 7.524, "step": 122580 }, { "epoch": 14.752105896510228, "grad_norm": 132.66085815429688, "learning_rate": 0.00019019165170049008, "loss": 7.4882, "step": 122590 }, { "epoch": 14.753309265944646, "grad_norm": 153.02452087402344, "learning_rate": 0.00019019000853151067, "loss": 7.5685, "step": 122600 }, { "epoch": 14.754512635379061, "grad_norm": 53.82183074951172, "learning_rate": 0.0001901883652320041, "loss": 7.5153, "step": 122610 }, { "epoch": 14.755716004813477, "grad_norm": 133.5232391357422, "learning_rate": 0.00019018672180197267, "loss": 7.4755, "step": 122620 }, { "epoch": 14.756919374247895, "grad_norm": 37.387062072753906, "learning_rate": 0.0001901850782414188, "loss": 7.6259, "step": 122630 }, { "epoch": 14.75812274368231, "grad_norm": 112.40479278564453, "learning_rate": 0.0001901834345503448, "loss": 7.4955, "step": 122640 }, { "epoch": 14.759326113116726, "grad_norm": 57.584815979003906, "learning_rate": 0.00019018179072875316, "loss": 7.4981, "step": 122650 }, { "epoch": 14.760529482551144, "grad_norm": 55.004913330078125, "learning_rate": 0.00019018014677664623, "loss": 7.4647, "step": 122660 }, { "epoch": 14.76173285198556, "grad_norm": 55.24393081665039, "learning_rate": 0.00019017850269402632, "loss": 7.5824, "step": 122670 }, { "epoch": 14.762936221419976, "grad_norm": 158.1719970703125, "learning_rate": 0.00019017685848089587, "loss": 7.5682, "step": 122680 }, { "epoch": 14.764139590854393, "grad_norm": 41.28356170654297, "learning_rate": 0.00019017521413725722, "loss": 7.5727, "step": 122690 }, { "epoch": 14.765342960288809, "grad_norm": 33.73509216308594, "learning_rate": 0.0001901735696631128, "loss": 7.5237, "step": 122700 }, { "epoch": 14.766546329723225, "grad_norm": 53.68198776245117, "learning_rate": 0.00019017192505846496, "loss": 7.501, "step": 122710 }, { "epoch": 14.76774969915764, "grad_norm": 91.94418334960938, "learning_rate": 0.00019017028032331602, "loss": 7.5169, "step": 122720 }, { "epoch": 14.768953068592058, "grad_norm": 34.075313568115234, "learning_rate": 0.0001901686354576685, "loss": 7.5402, "step": 122730 }, { "epoch": 14.770156438026474, "grad_norm": 85.29915618896484, "learning_rate": 0.00019016699046152467, "loss": 7.5127, "step": 122740 }, { "epoch": 14.77135980746089, "grad_norm": 68.47270965576172, "learning_rate": 0.00019016534533488694, "loss": 7.5958, "step": 122750 }, { "epoch": 14.772563176895307, "grad_norm": 33.10287857055664, "learning_rate": 0.0001901637000777577, "loss": 7.5908, "step": 122760 }, { "epoch": 14.773766546329723, "grad_norm": 101.12022399902344, "learning_rate": 0.0001901620546901393, "loss": 7.4801, "step": 122770 }, { "epoch": 14.774969915764139, "grad_norm": 61.49049377441406, "learning_rate": 0.00019016040917203418, "loss": 7.609, "step": 122780 }, { "epoch": 14.776173285198556, "grad_norm": 159.17449951171875, "learning_rate": 0.0001901587635234447, "loss": 7.5843, "step": 122790 }, { "epoch": 14.777376654632972, "grad_norm": 135.71945190429688, "learning_rate": 0.00019015711774437321, "loss": 7.4823, "step": 122800 }, { "epoch": 14.778580024067388, "grad_norm": 120.03449249267578, "learning_rate": 0.00019015547183482212, "loss": 7.4968, "step": 122810 }, { "epoch": 14.779783393501805, "grad_norm": 195.04974365234375, "learning_rate": 0.0001901538257947938, "loss": 7.5601, "step": 122820 }, { "epoch": 14.780986762936221, "grad_norm": 165.19677734375, "learning_rate": 0.0001901521796242906, "loss": 7.486, "step": 122830 }, { "epoch": 14.782190132370637, "grad_norm": 152.77638244628906, "learning_rate": 0.000190150533323315, "loss": 7.4967, "step": 122840 }, { "epoch": 14.783393501805055, "grad_norm": 78.87261962890625, "learning_rate": 0.00019014888689186928, "loss": 7.564, "step": 122850 }, { "epoch": 14.78459687123947, "grad_norm": 178.48228454589844, "learning_rate": 0.00019014724032995588, "loss": 7.5698, "step": 122860 }, { "epoch": 14.785800240673886, "grad_norm": 244.239501953125, "learning_rate": 0.00019014559363757715, "loss": 7.521, "step": 122870 }, { "epoch": 14.787003610108304, "grad_norm": 193.8568572998047, "learning_rate": 0.0001901439468147355, "loss": 7.598, "step": 122880 }, { "epoch": 14.78820697954272, "grad_norm": 220.69061279296875, "learning_rate": 0.00019014229986143331, "loss": 7.5123, "step": 122890 }, { "epoch": 14.789410348977135, "grad_norm": 331.3968200683594, "learning_rate": 0.00019014065277767295, "loss": 7.4906, "step": 122900 }, { "epoch": 14.790613718411553, "grad_norm": 604.4608154296875, "learning_rate": 0.00019013900556345682, "loss": 7.4675, "step": 122910 }, { "epoch": 14.791817087845969, "grad_norm": 231.6101531982422, "learning_rate": 0.00019013735821878726, "loss": 7.4957, "step": 122920 }, { "epoch": 14.793020457280385, "grad_norm": 300.3955383300781, "learning_rate": 0.00019013571074366672, "loss": 7.4884, "step": 122930 }, { "epoch": 14.794223826714802, "grad_norm": 274.4205322265625, "learning_rate": 0.00019013406313809757, "loss": 7.5254, "step": 122940 }, { "epoch": 14.795427196149218, "grad_norm": 591.6887817382812, "learning_rate": 0.00019013241540208212, "loss": 7.4035, "step": 122950 }, { "epoch": 14.796630565583634, "grad_norm": 427.78155517578125, "learning_rate": 0.00019013076753562287, "loss": 7.5022, "step": 122960 }, { "epoch": 14.797833935018051, "grad_norm": 392.9446105957031, "learning_rate": 0.0001901291195387221, "loss": 7.4787, "step": 122970 }, { "epoch": 14.799037304452467, "grad_norm": 508.8036193847656, "learning_rate": 0.00019012747141138227, "loss": 7.4821, "step": 122980 }, { "epoch": 14.800240673886883, "grad_norm": 471.197265625, "learning_rate": 0.00019012582315360574, "loss": 7.5001, "step": 122990 }, { "epoch": 14.8014440433213, "grad_norm": 411.7552490234375, "learning_rate": 0.00019012417476539488, "loss": 7.4503, "step": 123000 }, { "epoch": 14.802647412755716, "grad_norm": 167.80752563476562, "learning_rate": 0.00019012252624675207, "loss": 7.508, "step": 123010 }, { "epoch": 14.803850782190132, "grad_norm": 489.46759033203125, "learning_rate": 0.00019012087759767975, "loss": 7.4661, "step": 123020 }, { "epoch": 14.80505415162455, "grad_norm": 495.8715515136719, "learning_rate": 0.00019011922881818026, "loss": 7.5215, "step": 123030 }, { "epoch": 14.806257521058965, "grad_norm": 316.3471984863281, "learning_rate": 0.000190117579908256, "loss": 7.4855, "step": 123040 }, { "epoch": 14.807460890493381, "grad_norm": 1219.4678955078125, "learning_rate": 0.00019011593086790934, "loss": 7.4835, "step": 123050 }, { "epoch": 14.808664259927799, "grad_norm": 726.0543823242188, "learning_rate": 0.0001901142816971427, "loss": 7.5142, "step": 123060 }, { "epoch": 14.809867629362214, "grad_norm": 244.98731994628906, "learning_rate": 0.00019011263239595842, "loss": 7.4415, "step": 123070 }, { "epoch": 14.81107099879663, "grad_norm": 278.6109619140625, "learning_rate": 0.00019011098296435892, "loss": 7.429, "step": 123080 }, { "epoch": 14.812274368231048, "grad_norm": 236.35256958007812, "learning_rate": 0.00019010933340234658, "loss": 7.4471, "step": 123090 }, { "epoch": 14.813477737665464, "grad_norm": 1024.350830078125, "learning_rate": 0.0001901076837099238, "loss": 7.5124, "step": 123100 }, { "epoch": 14.81468110709988, "grad_norm": 249.34996032714844, "learning_rate": 0.00019010603388709295, "loss": 7.5433, "step": 123110 }, { "epoch": 14.815884476534297, "grad_norm": 288.5848083496094, "learning_rate": 0.0001901043839338564, "loss": 7.4241, "step": 123120 }, { "epoch": 14.817087845968713, "grad_norm": 552.9564208984375, "learning_rate": 0.0001901027338502166, "loss": 7.4428, "step": 123130 }, { "epoch": 14.818291215403129, "grad_norm": 517.490966796875, "learning_rate": 0.00019010108363617584, "loss": 7.4797, "step": 123140 }, { "epoch": 14.819494584837544, "grad_norm": 1194.057373046875, "learning_rate": 0.0001900994332917366, "loss": 7.5048, "step": 123150 }, { "epoch": 14.820697954271962, "grad_norm": 290.843505859375, "learning_rate": 0.00019009778281690127, "loss": 7.4732, "step": 123160 }, { "epoch": 14.821901323706378, "grad_norm": 631.5757446289062, "learning_rate": 0.00019009613221167214, "loss": 7.4528, "step": 123170 }, { "epoch": 14.823104693140793, "grad_norm": 631.85107421875, "learning_rate": 0.0001900944814760517, "loss": 7.4764, "step": 123180 }, { "epoch": 14.824308062575211, "grad_norm": 532.7399291992188, "learning_rate": 0.00019009283061004233, "loss": 7.4767, "step": 123190 }, { "epoch": 14.825511432009627, "grad_norm": 604.7129516601562, "learning_rate": 0.00019009117961364634, "loss": 7.3929, "step": 123200 }, { "epoch": 14.826714801444043, "grad_norm": 5542.78515625, "learning_rate": 0.0001900895284868662, "loss": 7.4148, "step": 123210 }, { "epoch": 14.82791817087846, "grad_norm": 1393.5919189453125, "learning_rate": 0.00019008787722970424, "loss": 7.4536, "step": 123220 }, { "epoch": 14.829121540312876, "grad_norm": 1129.9512939453125, "learning_rate": 0.0001900862258421629, "loss": 7.5028, "step": 123230 }, { "epoch": 14.830324909747292, "grad_norm": 419.77069091796875, "learning_rate": 0.00019008457432424453, "loss": 7.4902, "step": 123240 }, { "epoch": 14.83152827918171, "grad_norm": 1766.2742919921875, "learning_rate": 0.0001900829226759516, "loss": 7.4646, "step": 123250 }, { "epoch": 14.832731648616125, "grad_norm": 596.582275390625, "learning_rate": 0.00019008127089728639, "loss": 7.4202, "step": 123260 }, { "epoch": 14.833935018050541, "grad_norm": 1147.4884033203125, "learning_rate": 0.00019007961898825134, "loss": 7.4772, "step": 123270 }, { "epoch": 14.835138387484958, "grad_norm": 646.4959716796875, "learning_rate": 0.00019007796694884882, "loss": 7.5686, "step": 123280 }, { "epoch": 14.836341756919374, "grad_norm": 753.332763671875, "learning_rate": 0.0001900763147790813, "loss": 7.4752, "step": 123290 }, { "epoch": 14.83754512635379, "grad_norm": 317.5640869140625, "learning_rate": 0.00019007466247895107, "loss": 7.4868, "step": 123300 }, { "epoch": 14.838748495788208, "grad_norm": 258.5713806152344, "learning_rate": 0.0001900730100484606, "loss": 7.4888, "step": 123310 }, { "epoch": 14.839951865222623, "grad_norm": 394.9584045410156, "learning_rate": 0.0001900713574876122, "loss": 7.5689, "step": 123320 }, { "epoch": 14.84115523465704, "grad_norm": 613.81689453125, "learning_rate": 0.00019006970479640834, "loss": 7.5011, "step": 123330 }, { "epoch": 14.842358604091457, "grad_norm": 834.0196533203125, "learning_rate": 0.00019006805197485137, "loss": 7.4924, "step": 123340 }, { "epoch": 14.843561973525873, "grad_norm": 323.1937561035156, "learning_rate": 0.0001900663990229437, "loss": 7.4384, "step": 123350 }, { "epoch": 14.844765342960288, "grad_norm": 575.8717041015625, "learning_rate": 0.0001900647459406877, "loss": 7.3928, "step": 123360 }, { "epoch": 14.845968712394706, "grad_norm": 664.209228515625, "learning_rate": 0.0001900630927280858, "loss": 7.4093, "step": 123370 }, { "epoch": 14.847172081829122, "grad_norm": 885.2428588867188, "learning_rate": 0.00019006143938514033, "loss": 7.4908, "step": 123380 }, { "epoch": 14.848375451263538, "grad_norm": 2975.583984375, "learning_rate": 0.00019005978591185376, "loss": 7.37, "step": 123390 }, { "epoch": 14.849578820697955, "grad_norm": 548.8884887695312, "learning_rate": 0.00019005813230822845, "loss": 7.3167, "step": 123400 }, { "epoch": 14.85078219013237, "grad_norm": 523.2142333984375, "learning_rate": 0.00019005647857426675, "loss": 7.4647, "step": 123410 }, { "epoch": 14.851985559566787, "grad_norm": 1123.9825439453125, "learning_rate": 0.00019005482470997112, "loss": 7.4199, "step": 123420 }, { "epoch": 14.853188929001202, "grad_norm": 382.41973876953125, "learning_rate": 0.0001900531707153439, "loss": 7.433, "step": 123430 }, { "epoch": 14.85439229843562, "grad_norm": 781.0701293945312, "learning_rate": 0.00019005151659038752, "loss": 7.3746, "step": 123440 }, { "epoch": 14.855595667870036, "grad_norm": 3915.824951171875, "learning_rate": 0.00019004986233510436, "loss": 7.4461, "step": 123450 }, { "epoch": 14.856799037304452, "grad_norm": 821.8715209960938, "learning_rate": 0.00019004820794949682, "loss": 7.5793, "step": 123460 }, { "epoch": 14.85800240673887, "grad_norm": 17140.52734375, "learning_rate": 0.0001900465534335673, "loss": 7.6551, "step": 123470 }, { "epoch": 14.859205776173285, "grad_norm": 10236.552734375, "learning_rate": 0.00019004489878731817, "loss": 7.5299, "step": 123480 }, { "epoch": 14.8604091456077, "grad_norm": 3837.97607421875, "learning_rate": 0.00019004324401075185, "loss": 7.4197, "step": 123490 }, { "epoch": 14.861612515042118, "grad_norm": 592.1118774414062, "learning_rate": 0.00019004158910387073, "loss": 7.4037, "step": 123500 }, { "epoch": 14.862815884476534, "grad_norm": 2852.675537109375, "learning_rate": 0.0001900399340666772, "loss": 7.4502, "step": 123510 }, { "epoch": 14.86401925391095, "grad_norm": 1133.818359375, "learning_rate": 0.00019003827889917365, "loss": 7.4708, "step": 123520 }, { "epoch": 14.865222623345367, "grad_norm": 1052.6903076171875, "learning_rate": 0.00019003662360136248, "loss": 7.4866, "step": 123530 }, { "epoch": 14.866425992779783, "grad_norm": 317.1869812011719, "learning_rate": 0.00019003496817324606, "loss": 7.5149, "step": 123540 }, { "epoch": 14.867629362214199, "grad_norm": 1005.131103515625, "learning_rate": 0.00019003331261482686, "loss": 7.4323, "step": 123550 }, { "epoch": 14.868832731648617, "grad_norm": 814.2579345703125, "learning_rate": 0.0001900316569261072, "loss": 7.4878, "step": 123560 }, { "epoch": 14.870036101083032, "grad_norm": 246.00350952148438, "learning_rate": 0.0001900300011070895, "loss": 7.4267, "step": 123570 }, { "epoch": 14.871239470517448, "grad_norm": 546.3649291992188, "learning_rate": 0.00019002834515777618, "loss": 7.3432, "step": 123580 }, { "epoch": 14.872442839951866, "grad_norm": 2349.000732421875, "learning_rate": 0.00019002668907816962, "loss": 7.4714, "step": 123590 }, { "epoch": 14.873646209386282, "grad_norm": 2980.470703125, "learning_rate": 0.00019002503286827217, "loss": 7.3986, "step": 123600 }, { "epoch": 14.874849578820697, "grad_norm": 724.5343627929688, "learning_rate": 0.00019002337652808632, "loss": 7.4567, "step": 123610 }, { "epoch": 14.876052948255115, "grad_norm": 4227.9267578125, "learning_rate": 0.0001900217200576144, "loss": 7.5499, "step": 123620 }, { "epoch": 14.87725631768953, "grad_norm": 1813.550537109375, "learning_rate": 0.00019002006345685883, "loss": 7.5066, "step": 123630 }, { "epoch": 14.878459687123947, "grad_norm": 3634.90966796875, "learning_rate": 0.000190018406725822, "loss": 7.4871, "step": 123640 }, { "epoch": 14.879663056558364, "grad_norm": 1739.0926513671875, "learning_rate": 0.0001900167498645063, "loss": 7.4252, "step": 123650 }, { "epoch": 14.88086642599278, "grad_norm": 1023.6815795898438, "learning_rate": 0.00019001509287291417, "loss": 7.4732, "step": 123660 }, { "epoch": 14.882069795427196, "grad_norm": 920.9478149414062, "learning_rate": 0.00019001343575104795, "loss": 7.4512, "step": 123670 }, { "epoch": 14.883273164861613, "grad_norm": 3632.940185546875, "learning_rate": 0.00019001177849891008, "loss": 7.4817, "step": 123680 }, { "epoch": 14.884476534296029, "grad_norm": 3801.51318359375, "learning_rate": 0.00019001012111650293, "loss": 7.3781, "step": 123690 }, { "epoch": 14.885679903730445, "grad_norm": 2609.939697265625, "learning_rate": 0.00019000846360382888, "loss": 7.3926, "step": 123700 }, { "epoch": 14.886883273164862, "grad_norm": 638.1207275390625, "learning_rate": 0.00019000680596089042, "loss": 7.4244, "step": 123710 }, { "epoch": 14.888086642599278, "grad_norm": 1193.732666015625, "learning_rate": 0.00019000514818768986, "loss": 7.4428, "step": 123720 }, { "epoch": 14.889290012033694, "grad_norm": 1122.576171875, "learning_rate": 0.00019000349028422964, "loss": 7.4545, "step": 123730 }, { "epoch": 14.890493381468112, "grad_norm": 4699.59033203125, "learning_rate": 0.00019000183225051213, "loss": 7.4537, "step": 123740 }, { "epoch": 14.891696750902527, "grad_norm": 5353.478515625, "learning_rate": 0.00019000017408653976, "loss": 7.334, "step": 123750 }, { "epoch": 14.892900120336943, "grad_norm": 1078.3751220703125, "learning_rate": 0.00018999851579231493, "loss": 7.4892, "step": 123760 }, { "epoch": 14.89410348977136, "grad_norm": 1494.1815185546875, "learning_rate": 0.00018999685736784003, "loss": 7.4796, "step": 123770 }, { "epoch": 14.895306859205776, "grad_norm": 2306.15380859375, "learning_rate": 0.00018999519881311746, "loss": 7.6021, "step": 123780 }, { "epoch": 14.896510228640192, "grad_norm": 6194.015625, "learning_rate": 0.00018999354012814957, "loss": 7.4274, "step": 123790 }, { "epoch": 14.89771359807461, "grad_norm": 659.0177612304688, "learning_rate": 0.00018999188131293884, "loss": 7.3333, "step": 123800 }, { "epoch": 14.898916967509026, "grad_norm": 408.4515380859375, "learning_rate": 0.00018999022236748764, "loss": 7.3332, "step": 123810 }, { "epoch": 14.900120336943441, "grad_norm": 362.9180908203125, "learning_rate": 0.00018998856329179836, "loss": 7.4628, "step": 123820 }, { "epoch": 14.901323706377857, "grad_norm": 586.2081298828125, "learning_rate": 0.00018998690408587345, "loss": 7.4983, "step": 123830 }, { "epoch": 14.902527075812275, "grad_norm": 986.05419921875, "learning_rate": 0.00018998524474971525, "loss": 7.3288, "step": 123840 }, { "epoch": 14.90373044524669, "grad_norm": 456.545654296875, "learning_rate": 0.00018998358528332619, "loss": 7.4967, "step": 123850 }, { "epoch": 14.904933814681106, "grad_norm": 218.33396911621094, "learning_rate": 0.00018998192568670864, "loss": 7.4225, "step": 123860 }, { "epoch": 14.906137184115524, "grad_norm": 914.9017944335938, "learning_rate": 0.00018998026595986502, "loss": 7.4261, "step": 123870 }, { "epoch": 14.90734055354994, "grad_norm": 942.8607788085938, "learning_rate": 0.0001899786061027978, "loss": 7.4334, "step": 123880 }, { "epoch": 14.908543922984355, "grad_norm": 588.4299926757812, "learning_rate": 0.00018997694611550927, "loss": 7.4604, "step": 123890 }, { "epoch": 14.909747292418773, "grad_norm": 175.57318115234375, "learning_rate": 0.0001899752859980019, "loss": 7.4446, "step": 123900 }, { "epoch": 14.910950661853189, "grad_norm": 643.1263427734375, "learning_rate": 0.00018997362575027808, "loss": 7.4946, "step": 123910 }, { "epoch": 14.912154031287605, "grad_norm": 616.7755737304688, "learning_rate": 0.0001899719653723402, "loss": 7.2834, "step": 123920 }, { "epoch": 14.913357400722022, "grad_norm": 164.27496337890625, "learning_rate": 0.00018997030486419067, "loss": 7.3486, "step": 123930 }, { "epoch": 14.914560770156438, "grad_norm": 619.3140258789062, "learning_rate": 0.0001899686442258319, "loss": 7.2992, "step": 123940 }, { "epoch": 14.915764139590854, "grad_norm": 183.71238708496094, "learning_rate": 0.00018996698345726632, "loss": 7.3425, "step": 123950 }, { "epoch": 14.916967509025271, "grad_norm": 113.04483032226562, "learning_rate": 0.00018996532255849627, "loss": 7.263, "step": 123960 }, { "epoch": 14.918170878459687, "grad_norm": 142.35398864746094, "learning_rate": 0.00018996366152952422, "loss": 7.3863, "step": 123970 }, { "epoch": 14.919374247894103, "grad_norm": 96.34976959228516, "learning_rate": 0.00018996200037035247, "loss": 7.4361, "step": 123980 }, { "epoch": 14.92057761732852, "grad_norm": 408.62603759765625, "learning_rate": 0.00018996033908098356, "loss": 7.3373, "step": 123990 }, { "epoch": 14.921780986762936, "grad_norm": 147.46519470214844, "learning_rate": 0.00018995867766141982, "loss": 7.3312, "step": 124000 }, { "epoch": 14.922984356197352, "grad_norm": 178.59030151367188, "learning_rate": 0.00018995701611166364, "loss": 7.3884, "step": 124010 }, { "epoch": 14.92418772563177, "grad_norm": 722.7620239257812, "learning_rate": 0.00018995535443171746, "loss": 7.293, "step": 124020 }, { "epoch": 14.925391095066185, "grad_norm": 154.78280639648438, "learning_rate": 0.00018995369262158368, "loss": 7.4163, "step": 124030 }, { "epoch": 14.926594464500601, "grad_norm": 142.9695587158203, "learning_rate": 0.00018995203068126467, "loss": 7.3729, "step": 124040 }, { "epoch": 14.927797833935019, "grad_norm": 449.0110778808594, "learning_rate": 0.00018995036861076288, "loss": 7.3684, "step": 124050 }, { "epoch": 14.929001203369435, "grad_norm": 150.65570068359375, "learning_rate": 0.00018994870641008072, "loss": 7.302, "step": 124060 }, { "epoch": 14.93020457280385, "grad_norm": 751.1250610351562, "learning_rate": 0.00018994704407922054, "loss": 7.3357, "step": 124070 }, { "epoch": 14.931407942238268, "grad_norm": 525.4304809570312, "learning_rate": 0.0001899453816181848, "loss": 7.2665, "step": 124080 }, { "epoch": 14.932611311672684, "grad_norm": 292.63311767578125, "learning_rate": 0.00018994371902697588, "loss": 7.4467, "step": 124090 }, { "epoch": 14.9338146811071, "grad_norm": 164.7617645263672, "learning_rate": 0.00018994205630559622, "loss": 7.2848, "step": 124100 }, { "epoch": 14.935018050541515, "grad_norm": 789.33349609375, "learning_rate": 0.00018994039345404814, "loss": 7.3403, "step": 124110 }, { "epoch": 14.936221419975933, "grad_norm": 412.9779357910156, "learning_rate": 0.00018993873047233416, "loss": 7.3437, "step": 124120 }, { "epoch": 14.937424789410349, "grad_norm": 211.40426635742188, "learning_rate": 0.0001899370673604566, "loss": 7.2826, "step": 124130 }, { "epoch": 14.938628158844764, "grad_norm": 798.9019775390625, "learning_rate": 0.0001899354041184179, "loss": 7.3176, "step": 124140 }, { "epoch": 14.939831528279182, "grad_norm": 475.837890625, "learning_rate": 0.00018993374074622048, "loss": 7.1957, "step": 124150 }, { "epoch": 14.941034897713598, "grad_norm": 414.1307678222656, "learning_rate": 0.00018993207724386674, "loss": 7.3616, "step": 124160 }, { "epoch": 14.942238267148014, "grad_norm": 243.13662719726562, "learning_rate": 0.00018993041361135905, "loss": 7.3629, "step": 124170 }, { "epoch": 14.943441636582431, "grad_norm": 89.33318328857422, "learning_rate": 0.00018992874984869986, "loss": 7.2794, "step": 124180 }, { "epoch": 14.944645006016847, "grad_norm": 486.8153381347656, "learning_rate": 0.00018992708595589157, "loss": 7.2819, "step": 124190 }, { "epoch": 14.945848375451263, "grad_norm": 128.45579528808594, "learning_rate": 0.00018992542193293658, "loss": 7.279, "step": 124200 }, { "epoch": 14.94705174488568, "grad_norm": 136.5570831298828, "learning_rate": 0.00018992375777983732, "loss": 7.2401, "step": 124210 }, { "epoch": 14.948255114320096, "grad_norm": 544.4719848632812, "learning_rate": 0.00018992209349659618, "loss": 7.2996, "step": 124220 }, { "epoch": 14.949458483754512, "grad_norm": 807.21044921875, "learning_rate": 0.00018992042908321554, "loss": 7.3783, "step": 124230 }, { "epoch": 14.95066185318893, "grad_norm": 732.8168334960938, "learning_rate": 0.00018991876453969787, "loss": 7.3139, "step": 124240 }, { "epoch": 14.951865222623345, "grad_norm": 722.955322265625, "learning_rate": 0.00018991709986604556, "loss": 7.3174, "step": 124250 }, { "epoch": 14.953068592057761, "grad_norm": 178.8930206298828, "learning_rate": 0.00018991543506226098, "loss": 7.3048, "step": 124260 }, { "epoch": 14.954271961492179, "grad_norm": 419.10394287109375, "learning_rate": 0.00018991377012834656, "loss": 7.2753, "step": 124270 }, { "epoch": 14.955475330926594, "grad_norm": 396.1692199707031, "learning_rate": 0.00018991210506430473, "loss": 7.3006, "step": 124280 }, { "epoch": 14.95667870036101, "grad_norm": 255.86659240722656, "learning_rate": 0.00018991043987013785, "loss": 7.2862, "step": 124290 }, { "epoch": 14.957882069795428, "grad_norm": 118.07201385498047, "learning_rate": 0.0001899087745458484, "loss": 7.2756, "step": 124300 }, { "epoch": 14.959085439229844, "grad_norm": 149.11395263671875, "learning_rate": 0.00018990710909143877, "loss": 7.2947, "step": 124310 }, { "epoch": 14.96028880866426, "grad_norm": 4135.94873046875, "learning_rate": 0.00018990544350691132, "loss": 7.2493, "step": 124320 }, { "epoch": 14.961492178098677, "grad_norm": 2144.12060546875, "learning_rate": 0.00018990377779226853, "loss": 7.4241, "step": 124330 }, { "epoch": 14.962695547533093, "grad_norm": 1090.0350341796875, "learning_rate": 0.00018990211194751274, "loss": 7.29, "step": 124340 }, { "epoch": 14.963898916967509, "grad_norm": 300.70428466796875, "learning_rate": 0.00018990044597264643, "loss": 7.2437, "step": 124350 }, { "epoch": 14.965102286401926, "grad_norm": 395.0734558105469, "learning_rate": 0.00018989877986767196, "loss": 7.297, "step": 124360 }, { "epoch": 14.966305655836342, "grad_norm": 218.81089782714844, "learning_rate": 0.00018989711363259177, "loss": 7.1761, "step": 124370 }, { "epoch": 14.967509025270758, "grad_norm": 116.31067657470703, "learning_rate": 0.00018989544726740826, "loss": 7.2163, "step": 124380 }, { "epoch": 14.968712394705175, "grad_norm": 694.2081909179688, "learning_rate": 0.00018989378077212385, "loss": 7.133, "step": 124390 }, { "epoch": 14.969915764139591, "grad_norm": 417.2356262207031, "learning_rate": 0.00018989211414674096, "loss": 7.385, "step": 124400 }, { "epoch": 14.971119133574007, "grad_norm": 157.76431274414062, "learning_rate": 0.00018989044739126194, "loss": 7.2909, "step": 124410 }, { "epoch": 14.972322503008424, "grad_norm": 244.60145568847656, "learning_rate": 0.00018988878050568928, "loss": 7.2631, "step": 124420 }, { "epoch": 14.97352587244284, "grad_norm": 106.88508605957031, "learning_rate": 0.00018988711349002534, "loss": 7.3083, "step": 124430 }, { "epoch": 14.974729241877256, "grad_norm": 333.1745300292969, "learning_rate": 0.00018988544634427257, "loss": 7.3695, "step": 124440 }, { "epoch": 14.975932611311674, "grad_norm": 2485.152099609375, "learning_rate": 0.00018988377906843337, "loss": 7.3444, "step": 124450 }, { "epoch": 14.97713598074609, "grad_norm": 5089.16796875, "learning_rate": 0.00018988211166251015, "loss": 7.3382, "step": 124460 }, { "epoch": 14.978339350180505, "grad_norm": 5350.455078125, "learning_rate": 0.0001898804441265053, "loss": 7.2943, "step": 124470 }, { "epoch": 14.979542719614923, "grad_norm": 15196.720703125, "learning_rate": 0.00018987877646042128, "loss": 7.3234, "step": 124480 }, { "epoch": 14.980746089049338, "grad_norm": 11147.7783203125, "learning_rate": 0.0001898771086642605, "loss": 7.3459, "step": 124490 }, { "epoch": 14.981949458483754, "grad_norm": 760.6731567382812, "learning_rate": 0.00018987544073802533, "loss": 7.4078, "step": 124500 }, { "epoch": 14.98315282791817, "grad_norm": 145.38235473632812, "learning_rate": 0.0001898737726817182, "loss": 7.5813, "step": 124510 }, { "epoch": 14.984356197352588, "grad_norm": 398.988525390625, "learning_rate": 0.00018987210449534155, "loss": 7.4103, "step": 124520 }, { "epoch": 14.985559566787003, "grad_norm": 181.80856323242188, "learning_rate": 0.00018987043617889777, "loss": 7.4287, "step": 124530 }, { "epoch": 14.98676293622142, "grad_norm": 9107.564453125, "learning_rate": 0.0001898687677323893, "loss": 7.3466, "step": 124540 }, { "epoch": 14.987966305655837, "grad_norm": 9322.19140625, "learning_rate": 0.00018986709915581851, "loss": 7.2363, "step": 124550 }, { "epoch": 14.989169675090253, "grad_norm": 1166.1513671875, "learning_rate": 0.00018986543044918782, "loss": 7.3837, "step": 124560 }, { "epoch": 14.990373044524668, "grad_norm": 337.0623779296875, "learning_rate": 0.00018986376161249968, "loss": 7.3834, "step": 124570 }, { "epoch": 14.991576413959086, "grad_norm": 270.37750244140625, "learning_rate": 0.00018986209264575652, "loss": 7.3116, "step": 124580 }, { "epoch": 14.992779783393502, "grad_norm": 167.70469665527344, "learning_rate": 0.0001898604235489607, "loss": 7.4191, "step": 124590 }, { "epoch": 14.993983152827917, "grad_norm": 221.2666778564453, "learning_rate": 0.00018985875432211467, "loss": 7.3555, "step": 124600 }, { "epoch": 14.995186522262335, "grad_norm": 138.39874267578125, "learning_rate": 0.00018985708496522082, "loss": 7.4054, "step": 124610 }, { "epoch": 14.99638989169675, "grad_norm": 1753.5211181640625, "learning_rate": 0.00018985541547828158, "loss": 7.3489, "step": 124620 }, { "epoch": 14.997593261131167, "grad_norm": 1907.968017578125, "learning_rate": 0.0001898537458612994, "loss": 7.2738, "step": 124630 }, { "epoch": 14.998796630565584, "grad_norm": 353.17498779296875, "learning_rate": 0.00018985207611427668, "loss": 7.2584, "step": 124640 }, { "epoch": 15.0, "grad_norm": 1369.762451171875, "learning_rate": 0.00018985040623721578, "loss": 7.367, "step": 124650 }, { "epoch": 15.0, "eval_loss": 7.337878227233887, "eval_runtime": 118.6149, "eval_samples_per_second": 62.277, "eval_steps_per_second": 7.79, "step": 124650 }, { "epoch": 15.001203369434416, "grad_norm": 576.1705932617188, "learning_rate": 0.00018984873623011916, "loss": 7.3064, "step": 124660 }, { "epoch": 15.002406738868833, "grad_norm": 588.5545654296875, "learning_rate": 0.00018984706609298922, "loss": 7.319, "step": 124670 }, { "epoch": 15.00361010830325, "grad_norm": 5784.7509765625, "learning_rate": 0.00018984539582582842, "loss": 7.4283, "step": 124680 }, { "epoch": 15.004813477737665, "grad_norm": 3700.33544921875, "learning_rate": 0.00018984372542863917, "loss": 7.3646, "step": 124690 }, { "epoch": 15.006016847172083, "grad_norm": 6456.92431640625, "learning_rate": 0.00018984205490142384, "loss": 7.382, "step": 124700 }, { "epoch": 15.007220216606498, "grad_norm": 5785.44287109375, "learning_rate": 0.00018984038424418488, "loss": 7.3675, "step": 124710 }, { "epoch": 15.008423586040914, "grad_norm": 6390.42431640625, "learning_rate": 0.0001898387134569247, "loss": 7.4846, "step": 124720 }, { "epoch": 15.009626955475332, "grad_norm": 95375.96875, "learning_rate": 0.00018983704253964573, "loss": 7.4069, "step": 124730 }, { "epoch": 15.010830324909747, "grad_norm": 157604.671875, "learning_rate": 0.00018983537149235038, "loss": 7.3716, "step": 124740 }, { "epoch": 15.012033694344163, "grad_norm": 20311.841796875, "learning_rate": 0.00018983370031504108, "loss": 7.402, "step": 124750 }, { "epoch": 15.01323706377858, "grad_norm": 159118.9375, "learning_rate": 0.0001898320290077202, "loss": 7.3826, "step": 124760 }, { "epoch": 15.014440433212997, "grad_norm": 42688.9296875, "learning_rate": 0.00018983035757039022, "loss": 7.5085, "step": 124770 }, { "epoch": 15.015643802647412, "grad_norm": 203326.390625, "learning_rate": 0.00018982868600305356, "loss": 7.614, "step": 124780 }, { "epoch": 15.01684717208183, "grad_norm": 71533.9453125, "learning_rate": 0.00018982701430571254, "loss": 7.6447, "step": 124790 }, { "epoch": 15.018050541516246, "grad_norm": 381853.0, "learning_rate": 0.0001898253424783697, "loss": 7.4278, "step": 124800 }, { "epoch": 15.019253910950662, "grad_norm": 345558.40625, "learning_rate": 0.00018982367052102742, "loss": 7.4888, "step": 124810 }, { "epoch": 15.020457280385079, "grad_norm": 30896.861328125, "learning_rate": 0.00018982199843368811, "loss": 7.4484, "step": 124820 }, { "epoch": 15.021660649819495, "grad_norm": 27468.91015625, "learning_rate": 0.00018982032621635416, "loss": 7.5939, "step": 124830 }, { "epoch": 15.02286401925391, "grad_norm": 88471.640625, "learning_rate": 0.00018981865386902806, "loss": 7.5852, "step": 124840 }, { "epoch": 15.024067388688326, "grad_norm": 121201.0859375, "learning_rate": 0.00018981698139171219, "loss": 7.6381, "step": 124850 }, { "epoch": 15.025270758122744, "grad_norm": 268526.6875, "learning_rate": 0.00018981530878440893, "loss": 7.5442, "step": 124860 }, { "epoch": 15.02647412755716, "grad_norm": 38136.21875, "learning_rate": 0.00018981363604712078, "loss": 7.6097, "step": 124870 }, { "epoch": 15.027677496991576, "grad_norm": 92757.8203125, "learning_rate": 0.00018981196317985013, "loss": 7.5753, "step": 124880 }, { "epoch": 15.028880866425993, "grad_norm": 46574.66015625, "learning_rate": 0.00018981029018259936, "loss": 7.5345, "step": 124890 }, { "epoch": 15.030084235860409, "grad_norm": 23084.111328125, "learning_rate": 0.00018980861705537096, "loss": 7.6102, "step": 124900 }, { "epoch": 15.031287605294825, "grad_norm": 11719.412109375, "learning_rate": 0.00018980694379816732, "loss": 7.5387, "step": 124910 }, { "epoch": 15.032490974729242, "grad_norm": 17841.978515625, "learning_rate": 0.00018980527041099086, "loss": 7.6421, "step": 124920 }, { "epoch": 15.033694344163658, "grad_norm": 3395.980712890625, "learning_rate": 0.00018980359689384397, "loss": 7.6997, "step": 124930 }, { "epoch": 15.034897713598074, "grad_norm": 40489.2734375, "learning_rate": 0.00018980192324672912, "loss": 7.6845, "step": 124940 }, { "epoch": 15.036101083032491, "grad_norm": 31771.244140625, "learning_rate": 0.00018980024946964872, "loss": 7.5917, "step": 124950 }, { "epoch": 15.037304452466907, "grad_norm": 15742.474609375, "learning_rate": 0.00018979857556260517, "loss": 7.5517, "step": 124960 }, { "epoch": 15.038507821901323, "grad_norm": 25554.30078125, "learning_rate": 0.00018979690152560094, "loss": 7.595, "step": 124970 }, { "epoch": 15.03971119133574, "grad_norm": 10457.7578125, "learning_rate": 0.0001897952273586384, "loss": 7.5729, "step": 124980 }, { "epoch": 15.040914560770156, "grad_norm": 52429.6484375, "learning_rate": 0.00018979355306172001, "loss": 7.5176, "step": 124990 }, { "epoch": 15.042117930204572, "grad_norm": 15750.4541015625, "learning_rate": 0.00018979187863484817, "loss": 7.5973, "step": 125000 }, { "epoch": 15.04332129963899, "grad_norm": 46397.9453125, "learning_rate": 0.00018979020407802532, "loss": 7.5389, "step": 125010 }, { "epoch": 15.044524669073406, "grad_norm": 5923.57275390625, "learning_rate": 0.00018978852939125388, "loss": 7.6101, "step": 125020 }, { "epoch": 15.045728038507821, "grad_norm": 5345.36767578125, "learning_rate": 0.00018978685457453627, "loss": 7.6017, "step": 125030 }, { "epoch": 15.046931407942239, "grad_norm": 26302.05078125, "learning_rate": 0.0001897851796278749, "loss": 7.5601, "step": 125040 }, { "epoch": 15.048134777376655, "grad_norm": 9897.3740234375, "learning_rate": 0.00018978350455127222, "loss": 7.5551, "step": 125050 }, { "epoch": 15.04933814681107, "grad_norm": 19237822.0, "learning_rate": 0.00018978182934473064, "loss": 7.5993, "step": 125060 }, { "epoch": 15.050541516245488, "grad_norm": 6017680.5, "learning_rate": 0.00018978015400825256, "loss": 7.5672, "step": 125070 }, { "epoch": 15.051744885679904, "grad_norm": 42625164.0, "learning_rate": 0.00018977847854184047, "loss": 7.6082, "step": 125080 }, { "epoch": 15.05294825511432, "grad_norm": 41006348.0, "learning_rate": 0.00018977680294549675, "loss": 7.5719, "step": 125090 }, { "epoch": 15.054151624548737, "grad_norm": 6016879.5, "learning_rate": 0.0001897751272192238, "loss": 7.6385, "step": 125100 }, { "epoch": 15.055354993983153, "grad_norm": 5245343.5, "learning_rate": 0.00018977345136302415, "loss": 7.6785, "step": 125110 }, { "epoch": 15.056558363417569, "grad_norm": 1546680.5, "learning_rate": 0.00018977177537690007, "loss": 7.6086, "step": 125120 }, { "epoch": 15.057761732851986, "grad_norm": 3506114.75, "learning_rate": 0.00018977009926085411, "loss": 7.7258, "step": 125130 }, { "epoch": 15.058965102286402, "grad_norm": 13028783.0, "learning_rate": 0.00018976842301488865, "loss": 7.6379, "step": 125140 }, { "epoch": 15.060168471720818, "grad_norm": 38439512.0, "learning_rate": 0.0001897667466390061, "loss": 7.6399, "step": 125150 }, { "epoch": 15.061371841155236, "grad_norm": 5304360.5, "learning_rate": 0.0001897650701332089, "loss": 7.6453, "step": 125160 }, { "epoch": 15.062575210589651, "grad_norm": 114621320.0, "learning_rate": 0.0001897633934974995, "loss": 7.6827, "step": 125170 }, { "epoch": 15.063778580024067, "grad_norm": 38171608.0, "learning_rate": 0.0001897617167318803, "loss": 7.6337, "step": 125180 }, { "epoch": 15.064981949458483, "grad_norm": 56016476.0, "learning_rate": 0.00018976003983635375, "loss": 7.63, "step": 125190 }, { "epoch": 15.0661853188929, "grad_norm": 30925518.0, "learning_rate": 0.00018975836281092225, "loss": 7.7324, "step": 125200 }, { "epoch": 15.067388688327316, "grad_norm": 68823728.0, "learning_rate": 0.0001897566856555882, "loss": 7.6188, "step": 125210 }, { "epoch": 15.068592057761732, "grad_norm": 3008264448.0, "learning_rate": 0.00018975500837035413, "loss": 7.6448, "step": 125220 }, { "epoch": 15.06979542719615, "grad_norm": 61769200.0, "learning_rate": 0.00018975333095522237, "loss": 7.5929, "step": 125230 }, { "epoch": 15.070998796630565, "grad_norm": 33129292.0, "learning_rate": 0.00018975165341019538, "loss": 7.468, "step": 125240 }, { "epoch": 15.072202166064981, "grad_norm": 403965568.0, "learning_rate": 0.0001897499757352756, "loss": 7.5362, "step": 125250 }, { "epoch": 15.073405535499399, "grad_norm": 67818064.0, "learning_rate": 0.00018974829793046545, "loss": 7.4803, "step": 125260 }, { "epoch": 15.074608904933815, "grad_norm": 91769424.0, "learning_rate": 0.00018974661999576734, "loss": 7.5615, "step": 125270 }, { "epoch": 15.07581227436823, "grad_norm": 11050623.0, "learning_rate": 0.00018974494193118373, "loss": 7.5256, "step": 125280 }, { "epoch": 15.077015643802648, "grad_norm": 124145520.0, "learning_rate": 0.000189743263736717, "loss": 7.5175, "step": 125290 }, { "epoch": 15.078219013237064, "grad_norm": 265549664.0, "learning_rate": 0.00018974158541236965, "loss": 7.4579, "step": 125300 }, { "epoch": 15.07942238267148, "grad_norm": 617909696.0, "learning_rate": 0.00018973990695814405, "loss": 7.5923, "step": 125310 }, { "epoch": 15.080625752105897, "grad_norm": 245739968.0, "learning_rate": 0.00018973822837404264, "loss": 7.5682, "step": 125320 }, { "epoch": 15.081829121540313, "grad_norm": 87915488.0, "learning_rate": 0.00018973654966006787, "loss": 7.6359, "step": 125330 }, { "epoch": 15.083032490974729, "grad_norm": 55169520.0, "learning_rate": 0.00018973487081622216, "loss": 7.5563, "step": 125340 }, { "epoch": 15.084235860409146, "grad_norm": 916735040.0, "learning_rate": 0.00018973319184250795, "loss": 7.5715, "step": 125350 }, { "epoch": 15.085439229843562, "grad_norm": 11494315.0, "learning_rate": 0.00018973151273892763, "loss": 7.459, "step": 125360 }, { "epoch": 15.086642599277978, "grad_norm": 362630496.0, "learning_rate": 0.00018972983350548365, "loss": 7.5424, "step": 125370 }, { "epoch": 15.087845968712395, "grad_norm": 256943984.0, "learning_rate": 0.00018972815414217847, "loss": 7.5612, "step": 125380 }, { "epoch": 15.089049338146811, "grad_norm": 39302696.0, "learning_rate": 0.0001897264746490145, "loss": 7.5001, "step": 125390 }, { "epoch": 15.090252707581227, "grad_norm": 50235272.0, "learning_rate": 0.00018972479502599415, "loss": 7.485, "step": 125400 }, { "epoch": 15.091456077015645, "grad_norm": 2478366.5, "learning_rate": 0.00018972311527311987, "loss": 7.4595, "step": 125410 }, { "epoch": 15.09265944645006, "grad_norm": 25049776.0, "learning_rate": 0.00018972143539039413, "loss": 7.4178, "step": 125420 }, { "epoch": 15.093862815884476, "grad_norm": 44553648.0, "learning_rate": 0.00018971975537781927, "loss": 7.3734, "step": 125430 }, { "epoch": 15.095066185318894, "grad_norm": 25317616.0, "learning_rate": 0.0001897180752353978, "loss": 7.3609, "step": 125440 }, { "epoch": 15.09626955475331, "grad_norm": 43974652.0, "learning_rate": 0.00018971639496313212, "loss": 7.321, "step": 125450 }, { "epoch": 15.097472924187725, "grad_norm": 76693800.0, "learning_rate": 0.00018971471456102464, "loss": 7.452, "step": 125460 }, { "epoch": 15.098676293622143, "grad_norm": 48645816.0, "learning_rate": 0.00018971303402907786, "loss": 7.3009, "step": 125470 }, { "epoch": 15.099879663056559, "grad_norm": 185690368.0, "learning_rate": 0.00018971135336729415, "loss": 7.4491, "step": 125480 }, { "epoch": 15.101083032490974, "grad_norm": 32901132.0, "learning_rate": 0.00018970967257567596, "loss": 7.4528, "step": 125490 }, { "epoch": 15.102286401925392, "grad_norm": 124543952.0, "learning_rate": 0.00018970799165422575, "loss": 7.4281, "step": 125500 }, { "epoch": 15.103489771359808, "grad_norm": 51466192.0, "learning_rate": 0.0001897063106029459, "loss": 7.3982, "step": 125510 }, { "epoch": 15.104693140794224, "grad_norm": 66691996.0, "learning_rate": 0.00018970462942183888, "loss": 7.5264, "step": 125520 }, { "epoch": 15.10589651022864, "grad_norm": 123348960.0, "learning_rate": 0.00018970294811090712, "loss": 7.3115, "step": 125530 }, { "epoch": 15.107099879663057, "grad_norm": 127885784.0, "learning_rate": 0.00018970126667015302, "loss": 7.5694, "step": 125540 }, { "epoch": 15.108303249097473, "grad_norm": 100609040.0, "learning_rate": 0.00018969958509957907, "loss": 7.3544, "step": 125550 }, { "epoch": 15.109506618531888, "grad_norm": 95557008.0, "learning_rate": 0.00018969790339918766, "loss": 7.4631, "step": 125560 }, { "epoch": 15.110709987966306, "grad_norm": 20857170.0, "learning_rate": 0.00018969622156898126, "loss": 7.4274, "step": 125570 }, { "epoch": 15.111913357400722, "grad_norm": 24841854.0, "learning_rate": 0.00018969453960896227, "loss": 7.3387, "step": 125580 }, { "epoch": 15.113116726835138, "grad_norm": 75238168.0, "learning_rate": 0.00018969285751913313, "loss": 7.4576, "step": 125590 }, { "epoch": 15.114320096269555, "grad_norm": 4234657.5, "learning_rate": 0.00018969117529949627, "loss": 7.5172, "step": 125600 }, { "epoch": 15.115523465703971, "grad_norm": 12823890.0, "learning_rate": 0.00018968949295005417, "loss": 7.4722, "step": 125610 }, { "epoch": 15.116726835138387, "grad_norm": 89470008.0, "learning_rate": 0.0001896878104708092, "loss": 7.5282, "step": 125620 }, { "epoch": 15.117930204572804, "grad_norm": 10607944.0, "learning_rate": 0.00018968612786176382, "loss": 7.6361, "step": 125630 }, { "epoch": 15.11913357400722, "grad_norm": 6230450.5, "learning_rate": 0.0001896844451229205, "loss": 7.6768, "step": 125640 }, { "epoch": 15.120336943441636, "grad_norm": 113595872.0, "learning_rate": 0.0001896827622542816, "loss": 7.6624, "step": 125650 }, { "epoch": 15.121540312876053, "grad_norm": 25938984.0, "learning_rate": 0.00018968107925584966, "loss": 7.4947, "step": 125660 }, { "epoch": 15.12274368231047, "grad_norm": 35611652.0, "learning_rate": 0.000189679396127627, "loss": 7.5701, "step": 125670 }, { "epoch": 15.123947051744885, "grad_norm": 33624144.0, "learning_rate": 0.00018967771286961614, "loss": 7.6416, "step": 125680 }, { "epoch": 15.125150421179303, "grad_norm": 92578088.0, "learning_rate": 0.0001896760294818195, "loss": 7.6108, "step": 125690 }, { "epoch": 15.126353790613718, "grad_norm": 8069914.5, "learning_rate": 0.00018967434596423946, "loss": 7.4654, "step": 125700 }, { "epoch": 15.127557160048134, "grad_norm": 98823008.0, "learning_rate": 0.00018967266231687856, "loss": 7.4354, "step": 125710 }, { "epoch": 15.128760529482552, "grad_norm": 238709248.0, "learning_rate": 0.00018967097853973917, "loss": 7.4948, "step": 125720 }, { "epoch": 15.129963898916968, "grad_norm": 194297568.0, "learning_rate": 0.0001896692946328237, "loss": 7.4762, "step": 125730 }, { "epoch": 15.131167268351383, "grad_norm": 198150320.0, "learning_rate": 0.00018966761059613466, "loss": 7.5298, "step": 125740 }, { "epoch": 15.132370637785801, "grad_norm": 2562157.25, "learning_rate": 0.00018966592642967442, "loss": 7.6936, "step": 125750 }, { "epoch": 15.133574007220217, "grad_norm": 496502944.0, "learning_rate": 0.00018966424213344543, "loss": 7.6467, "step": 125760 }, { "epoch": 15.134777376654633, "grad_norm": 4172826.25, "learning_rate": 0.00018966255770745018, "loss": 7.6105, "step": 125770 }, { "epoch": 15.13598074608905, "grad_norm": 3981231.75, "learning_rate": 0.00018966087315169108, "loss": 7.6741, "step": 125780 }, { "epoch": 15.137184115523466, "grad_norm": 15576460.0, "learning_rate": 0.00018965918846617052, "loss": 7.5744, "step": 125790 }, { "epoch": 15.138387484957882, "grad_norm": 1738140.375, "learning_rate": 0.00018965750365089102, "loss": 7.6001, "step": 125800 }, { "epoch": 15.1395908543923, "grad_norm": 17191982.0, "learning_rate": 0.00018965581870585493, "loss": 7.6273, "step": 125810 }, { "epoch": 15.140794223826715, "grad_norm": 91394256.0, "learning_rate": 0.0001896541336310648, "loss": 7.6291, "step": 125820 }, { "epoch": 15.14199759326113, "grad_norm": 9186479.0, "learning_rate": 0.00018965244842652294, "loss": 7.6816, "step": 125830 }, { "epoch": 15.143200962695548, "grad_norm": 20866930.0, "learning_rate": 0.00018965076309223188, "loss": 7.6359, "step": 125840 }, { "epoch": 15.144404332129964, "grad_norm": 1660808.25, "learning_rate": 0.00018964907762819403, "loss": 7.63, "step": 125850 }, { "epoch": 15.14560770156438, "grad_norm": 2959039.5, "learning_rate": 0.00018964739203441183, "loss": 7.7153, "step": 125860 }, { "epoch": 15.146811070998796, "grad_norm": 56636160.0, "learning_rate": 0.0001896457063108877, "loss": 7.7589, "step": 125870 }, { "epoch": 15.148014440433213, "grad_norm": 880030.6875, "learning_rate": 0.00018964402045762416, "loss": 7.6986, "step": 125880 }, { "epoch": 15.14921780986763, "grad_norm": 2088721.625, "learning_rate": 0.00018964233447462355, "loss": 7.6734, "step": 125890 }, { "epoch": 15.150421179302045, "grad_norm": 282244.65625, "learning_rate": 0.00018964064836188835, "loss": 7.5968, "step": 125900 }, { "epoch": 15.151624548736462, "grad_norm": 1778353.75, "learning_rate": 0.000189638962119421, "loss": 7.5881, "step": 125910 }, { "epoch": 15.152827918170878, "grad_norm": 81155880.0, "learning_rate": 0.00018963727574722392, "loss": 7.5837, "step": 125920 }, { "epoch": 15.154031287605294, "grad_norm": 1443482.0, "learning_rate": 0.00018963558924529962, "loss": 7.6416, "step": 125930 }, { "epoch": 15.155234657039712, "grad_norm": 371085.125, "learning_rate": 0.00018963390261365044, "loss": 7.6189, "step": 125940 }, { "epoch": 15.156438026474127, "grad_norm": 1904537.25, "learning_rate": 0.00018963221585227894, "loss": 7.6868, "step": 125950 }, { "epoch": 15.157641395908543, "grad_norm": 398652.34375, "learning_rate": 0.00018963052896118742, "loss": 7.7223, "step": 125960 }, { "epoch": 15.15884476534296, "grad_norm": 7054542.0, "learning_rate": 0.00018962884194037845, "loss": 7.5471, "step": 125970 }, { "epoch": 15.160048134777377, "grad_norm": 548993.375, "learning_rate": 0.00018962715478985437, "loss": 7.6031, "step": 125980 }, { "epoch": 15.161251504211792, "grad_norm": 6219262.0, "learning_rate": 0.0001896254675096177, "loss": 7.6288, "step": 125990 }, { "epoch": 15.16245487364621, "grad_norm": 13239046.0, "learning_rate": 0.00018962378009967085, "loss": 7.6348, "step": 126000 }, { "epoch": 15.163658243080626, "grad_norm": 3554142.5, "learning_rate": 0.00018962209256001623, "loss": 7.6106, "step": 126010 }, { "epoch": 15.164861612515042, "grad_norm": 6099709.5, "learning_rate": 0.00018962040489065637, "loss": 7.5621, "step": 126020 }, { "epoch": 15.166064981949459, "grad_norm": 7998756.5, "learning_rate": 0.00018961871709159364, "loss": 7.6405, "step": 126030 }, { "epoch": 15.167268351383875, "grad_norm": 3567050.75, "learning_rate": 0.0001896170291628305, "loss": 7.6215, "step": 126040 }, { "epoch": 15.16847172081829, "grad_norm": 6089238.0, "learning_rate": 0.00018961534110436938, "loss": 7.5543, "step": 126050 }, { "epoch": 15.169675090252708, "grad_norm": 2044088.625, "learning_rate": 0.00018961365291621273, "loss": 7.6081, "step": 126060 }, { "epoch": 15.170878459687124, "grad_norm": 3017435.0, "learning_rate": 0.000189611964598363, "loss": 7.625, "step": 126070 }, { "epoch": 15.17208182912154, "grad_norm": 765081.5, "learning_rate": 0.00018961027615082266, "loss": 7.7568, "step": 126080 }, { "epoch": 15.173285198555957, "grad_norm": 1368187.875, "learning_rate": 0.0001896085875735941, "loss": 7.5899, "step": 126090 }, { "epoch": 15.174488567990373, "grad_norm": 3677093.25, "learning_rate": 0.0001896068988666798, "loss": 7.6117, "step": 126100 }, { "epoch": 15.175691937424789, "grad_norm": 51560796.0, "learning_rate": 0.0001896052100300822, "loss": 7.6965, "step": 126110 }, { "epoch": 15.176895306859207, "grad_norm": 29220676.0, "learning_rate": 0.00018960352106380372, "loss": 7.6417, "step": 126120 }, { "epoch": 15.178098676293622, "grad_norm": 115824040.0, "learning_rate": 0.00018960183196784684, "loss": 7.6137, "step": 126130 }, { "epoch": 15.179302045728038, "grad_norm": 67169680.0, "learning_rate": 0.00018960014274221396, "loss": 7.6809, "step": 126140 }, { "epoch": 15.180505415162456, "grad_norm": 52605844.0, "learning_rate": 0.00018959845338690758, "loss": 7.6398, "step": 126150 }, { "epoch": 15.181708784596871, "grad_norm": 51805612.0, "learning_rate": 0.00018959676390193012, "loss": 7.5682, "step": 126160 }, { "epoch": 15.182912154031287, "grad_norm": 121982488.0, "learning_rate": 0.000189595074287284, "loss": 7.6199, "step": 126170 }, { "epoch": 15.184115523465705, "grad_norm": 17981904.0, "learning_rate": 0.0001895933845429717, "loss": 7.7416, "step": 126180 }, { "epoch": 15.18531889290012, "grad_norm": 165518880.0, "learning_rate": 0.00018959169466899563, "loss": 7.4758, "step": 126190 }, { "epoch": 15.186522262334536, "grad_norm": 61010684.0, "learning_rate": 0.0001895900046653583, "loss": 7.6222, "step": 126200 }, { "epoch": 15.187725631768952, "grad_norm": 86460704.0, "learning_rate": 0.0001895883145320621, "loss": 7.6164, "step": 126210 }, { "epoch": 15.18892900120337, "grad_norm": 150151440.0, "learning_rate": 0.00018958662426910947, "loss": 7.6218, "step": 126220 }, { "epoch": 15.190132370637786, "grad_norm": 54037864.0, "learning_rate": 0.00018958493387650292, "loss": 7.6237, "step": 126230 }, { "epoch": 15.191335740072201, "grad_norm": 43249288.0, "learning_rate": 0.0001895832433542448, "loss": 7.6659, "step": 126240 }, { "epoch": 15.192539109506619, "grad_norm": 571147008.0, "learning_rate": 0.00018958155270233765, "loss": 7.6348, "step": 126250 }, { "epoch": 15.193742478941035, "grad_norm": 90332880.0, "learning_rate": 0.00018957986192078385, "loss": 7.6374, "step": 126260 }, { "epoch": 15.19494584837545, "grad_norm": 32835612.0, "learning_rate": 0.0001895781710095859, "loss": 7.6133, "step": 126270 }, { "epoch": 15.196149217809868, "grad_norm": 45888972.0, "learning_rate": 0.0001895764799687462, "loss": 7.655, "step": 126280 }, { "epoch": 15.197352587244284, "grad_norm": 211303104.0, "learning_rate": 0.0001895747887982672, "loss": 7.7339, "step": 126290 }, { "epoch": 15.1985559566787, "grad_norm": 15292181.0, "learning_rate": 0.00018957309749815138, "loss": 7.6366, "step": 126300 }, { "epoch": 15.199759326113117, "grad_norm": 227544224.0, "learning_rate": 0.0001895714060684012, "loss": 7.6085, "step": 126310 }, { "epoch": 15.200962695547533, "grad_norm": 140794672.0, "learning_rate": 0.00018956971450901904, "loss": 7.6487, "step": 126320 }, { "epoch": 15.202166064981949, "grad_norm": 278590528.0, "learning_rate": 0.0001895680228200074, "loss": 7.6761, "step": 126330 }, { "epoch": 15.203369434416366, "grad_norm": 299309792.0, "learning_rate": 0.00018956633100136874, "loss": 7.6734, "step": 126340 }, { "epoch": 15.204572803850782, "grad_norm": 52255880.0, "learning_rate": 0.0001895646390531055, "loss": 7.6526, "step": 126350 }, { "epoch": 15.205776173285198, "grad_norm": 25389330.0, "learning_rate": 0.00018956294697522005, "loss": 7.6007, "step": 126360 }, { "epoch": 15.206979542719615, "grad_norm": 62796488.0, "learning_rate": 0.00018956125476771494, "loss": 7.6383, "step": 126370 }, { "epoch": 15.208182912154031, "grad_norm": 22353084.0, "learning_rate": 0.00018955956243059258, "loss": 7.5952, "step": 126380 }, { "epoch": 15.209386281588447, "grad_norm": 62999420.0, "learning_rate": 0.00018955786996385543, "loss": 7.5965, "step": 126390 }, { "epoch": 15.210589651022865, "grad_norm": 5550995.0, "learning_rate": 0.00018955617736750592, "loss": 7.6796, "step": 126400 }, { "epoch": 15.21179302045728, "grad_norm": 198051696.0, "learning_rate": 0.00018955448464154652, "loss": 7.603, "step": 126410 }, { "epoch": 15.212996389891696, "grad_norm": 142097216.0, "learning_rate": 0.00018955279178597967, "loss": 7.706, "step": 126420 }, { "epoch": 15.214199759326114, "grad_norm": 152284144.0, "learning_rate": 0.0001895510988008078, "loss": 7.5868, "step": 126430 }, { "epoch": 15.21540312876053, "grad_norm": 81759360.0, "learning_rate": 0.00018954940568603342, "loss": 7.6344, "step": 126440 }, { "epoch": 15.216606498194945, "grad_norm": 108964240.0, "learning_rate": 0.00018954771244165892, "loss": 7.6994, "step": 126450 }, { "epoch": 15.217809867629363, "grad_norm": 180551568.0, "learning_rate": 0.00018954601906768676, "loss": 7.5111, "step": 126460 }, { "epoch": 15.219013237063779, "grad_norm": 62206476.0, "learning_rate": 0.00018954432556411943, "loss": 7.6055, "step": 126470 }, { "epoch": 15.220216606498195, "grad_norm": 66520216.0, "learning_rate": 0.0001895426319309593, "loss": 7.5669, "step": 126480 }, { "epoch": 15.221419975932612, "grad_norm": 427407872.0, "learning_rate": 0.00018954093816820892, "loss": 7.5638, "step": 126490 }, { "epoch": 15.222623345367028, "grad_norm": 51074128.0, "learning_rate": 0.00018953924427587068, "loss": 7.7063, "step": 126500 }, { "epoch": 15.223826714801444, "grad_norm": 97677584.0, "learning_rate": 0.00018953755025394707, "loss": 7.4796, "step": 126510 }, { "epoch": 15.225030084235861, "grad_norm": 85726640.0, "learning_rate": 0.0001895358561024405, "loss": 7.6195, "step": 126520 }, { "epoch": 15.226233453670277, "grad_norm": 89334944.0, "learning_rate": 0.00018953416182135343, "loss": 7.6106, "step": 126530 }, { "epoch": 15.227436823104693, "grad_norm": 138687424.0, "learning_rate": 0.00018953246741068832, "loss": 7.6594, "step": 126540 }, { "epoch": 15.22864019253911, "grad_norm": 139341248.0, "learning_rate": 0.00018953077287044766, "loss": 7.5984, "step": 126550 }, { "epoch": 15.229843561973526, "grad_norm": 11463859.0, "learning_rate": 0.00018952907820063387, "loss": 7.5688, "step": 126560 }, { "epoch": 15.231046931407942, "grad_norm": 198660160.0, "learning_rate": 0.00018952738340124932, "loss": 7.6082, "step": 126570 }, { "epoch": 15.232250300842358, "grad_norm": 138143184.0, "learning_rate": 0.00018952568847229663, "loss": 7.6043, "step": 126580 }, { "epoch": 15.233453670276775, "grad_norm": 40182768.0, "learning_rate": 0.0001895239934137781, "loss": 7.6157, "step": 126590 }, { "epoch": 15.234657039711191, "grad_norm": 7230538.0, "learning_rate": 0.0001895222982256963, "loss": 7.6768, "step": 126600 }, { "epoch": 15.235860409145607, "grad_norm": 99642928.0, "learning_rate": 0.00018952060290805363, "loss": 7.7098, "step": 126610 }, { "epoch": 15.237063778580024, "grad_norm": 144365696.0, "learning_rate": 0.0001895189074608525, "loss": 7.6389, "step": 126620 }, { "epoch": 15.23826714801444, "grad_norm": 46300184.0, "learning_rate": 0.00018951721188409547, "loss": 7.5654, "step": 126630 }, { "epoch": 15.239470517448856, "grad_norm": 36155432.0, "learning_rate": 0.00018951551617778489, "loss": 7.539, "step": 126640 }, { "epoch": 15.240673886883274, "grad_norm": 118549752.0, "learning_rate": 0.00018951382034192328, "loss": 7.6694, "step": 126650 }, { "epoch": 15.24187725631769, "grad_norm": 202069824.0, "learning_rate": 0.00018951212437651305, "loss": 7.6656, "step": 126660 }, { "epoch": 15.243080625752105, "grad_norm": 165244304.0, "learning_rate": 0.0001895104282815567, "loss": 7.6104, "step": 126670 }, { "epoch": 15.244283995186523, "grad_norm": 61142300.0, "learning_rate": 0.00018950873205705665, "loss": 7.6946, "step": 126680 }, { "epoch": 15.245487364620939, "grad_norm": 40359160.0, "learning_rate": 0.00018950703570301536, "loss": 7.5679, "step": 126690 }, { "epoch": 15.246690734055354, "grad_norm": 32732284.0, "learning_rate": 0.0001895053392194353, "loss": 7.6494, "step": 126700 }, { "epoch": 15.247894103489772, "grad_norm": 8230265.5, "learning_rate": 0.0001895036426063189, "loss": 7.6714, "step": 126710 }, { "epoch": 15.249097472924188, "grad_norm": 193257040.0, "learning_rate": 0.00018950194586366865, "loss": 7.6969, "step": 126720 }, { "epoch": 15.250300842358604, "grad_norm": 29375584.0, "learning_rate": 0.00018950024899148696, "loss": 7.6407, "step": 126730 }, { "epoch": 15.251504211793021, "grad_norm": 83153360.0, "learning_rate": 0.00018949855198977636, "loss": 7.6445, "step": 126740 }, { "epoch": 15.252707581227437, "grad_norm": 211772352.0, "learning_rate": 0.00018949685485853922, "loss": 7.5814, "step": 126750 }, { "epoch": 15.253910950661853, "grad_norm": 50353728.0, "learning_rate": 0.00018949515759777806, "loss": 7.66, "step": 126760 }, { "epoch": 15.25511432009627, "grad_norm": 14870720.0, "learning_rate": 0.00018949346020749526, "loss": 7.5546, "step": 126770 }, { "epoch": 15.256317689530686, "grad_norm": 62450724.0, "learning_rate": 0.0001894917626876934, "loss": 7.5365, "step": 126780 }, { "epoch": 15.257521058965102, "grad_norm": 76659632.0, "learning_rate": 0.0001894900650383748, "loss": 7.5909, "step": 126790 }, { "epoch": 15.25872442839952, "grad_norm": 125586104.0, "learning_rate": 0.00018948836725954203, "loss": 7.6316, "step": 126800 }, { "epoch": 15.259927797833935, "grad_norm": 18692324.0, "learning_rate": 0.00018948666935119748, "loss": 7.5532, "step": 126810 }, { "epoch": 15.261131167268351, "grad_norm": 36259796.0, "learning_rate": 0.00018948497131334363, "loss": 7.5931, "step": 126820 }, { "epoch": 15.262334536702769, "grad_norm": 29815936.0, "learning_rate": 0.00018948327314598294, "loss": 7.5578, "step": 126830 }, { "epoch": 15.263537906137184, "grad_norm": 27945626.0, "learning_rate": 0.00018948157484911785, "loss": 7.5871, "step": 126840 }, { "epoch": 15.2647412755716, "grad_norm": 49709876.0, "learning_rate": 0.00018947987642275083, "loss": 7.6183, "step": 126850 }, { "epoch": 15.265944645006018, "grad_norm": 22007600.0, "learning_rate": 0.00018947817786688434, "loss": 7.5946, "step": 126860 }, { "epoch": 15.267148014440433, "grad_norm": 122649344.0, "learning_rate": 0.0001894764791815208, "loss": 7.6612, "step": 126870 }, { "epoch": 15.26835138387485, "grad_norm": 23468648.0, "learning_rate": 0.00018947478036666275, "loss": 7.6735, "step": 126880 }, { "epoch": 15.269554753309265, "grad_norm": 130342416.0, "learning_rate": 0.00018947308142231258, "loss": 7.6228, "step": 126890 }, { "epoch": 15.270758122743683, "grad_norm": 50178788.0, "learning_rate": 0.0001894713823484728, "loss": 7.6629, "step": 126900 }, { "epoch": 15.271961492178098, "grad_norm": 18792854.0, "learning_rate": 0.0001894696831451458, "loss": 7.5434, "step": 126910 }, { "epoch": 15.273164861612514, "grad_norm": 136559232.0, "learning_rate": 0.0001894679838123341, "loss": 7.6019, "step": 126920 }, { "epoch": 15.274368231046932, "grad_norm": 75619864.0, "learning_rate": 0.00018946628435004016, "loss": 7.631, "step": 126930 }, { "epoch": 15.275571600481348, "grad_norm": 54553956.0, "learning_rate": 0.00018946458475826639, "loss": 7.6471, "step": 126940 }, { "epoch": 15.276774969915763, "grad_norm": 143264192.0, "learning_rate": 0.00018946288503701527, "loss": 7.6097, "step": 126950 }, { "epoch": 15.277978339350181, "grad_norm": 17656644.0, "learning_rate": 0.00018946118518628927, "loss": 7.6484, "step": 126960 }, { "epoch": 15.279181708784597, "grad_norm": 39571772.0, "learning_rate": 0.00018945948520609086, "loss": 7.5435, "step": 126970 }, { "epoch": 15.280385078219012, "grad_norm": 18116824.0, "learning_rate": 0.0001894577850964225, "loss": 7.6014, "step": 126980 }, { "epoch": 15.28158844765343, "grad_norm": 35965420.0, "learning_rate": 0.0001894560848572866, "loss": 7.6067, "step": 126990 }, { "epoch": 15.282791817087846, "grad_norm": 7533833.0, "learning_rate": 0.0001894543844886857, "loss": 7.6144, "step": 127000 }, { "epoch": 15.283995186522262, "grad_norm": 13021408.0, "learning_rate": 0.0001894526839906222, "loss": 7.5807, "step": 127010 }, { "epoch": 15.28519855595668, "grad_norm": 6979057.5, "learning_rate": 0.00018945098336309858, "loss": 7.5642, "step": 127020 }, { "epoch": 15.286401925391095, "grad_norm": 27475448.0, "learning_rate": 0.0001894492826061173, "loss": 7.6356, "step": 127030 }, { "epoch": 15.28760529482551, "grad_norm": 32513116.0, "learning_rate": 0.00018944758171968084, "loss": 7.6351, "step": 127040 }, { "epoch": 15.288808664259928, "grad_norm": 27836212.0, "learning_rate": 0.00018944588070379163, "loss": 7.6687, "step": 127050 }, { "epoch": 15.290012033694344, "grad_norm": 23459752.0, "learning_rate": 0.00018944417955845216, "loss": 7.6519, "step": 127060 }, { "epoch": 15.29121540312876, "grad_norm": 32514826.0, "learning_rate": 0.00018944247828366488, "loss": 7.6131, "step": 127070 }, { "epoch": 15.292418772563177, "grad_norm": 13687643.0, "learning_rate": 0.00018944077687943223, "loss": 7.5811, "step": 127080 }, { "epoch": 15.293622141997593, "grad_norm": 12742487.0, "learning_rate": 0.00018943907534575672, "loss": 7.6129, "step": 127090 }, { "epoch": 15.294825511432009, "grad_norm": 56113888.0, "learning_rate": 0.00018943737368264078, "loss": 7.6463, "step": 127100 }, { "epoch": 15.296028880866427, "grad_norm": 67657248.0, "learning_rate": 0.00018943567189008684, "loss": 7.6239, "step": 127110 }, { "epoch": 15.297232250300842, "grad_norm": 20921442.0, "learning_rate": 0.00018943396996809746, "loss": 7.6075, "step": 127120 }, { "epoch": 15.298435619735258, "grad_norm": 7145852.5, "learning_rate": 0.00018943226791667502, "loss": 7.6048, "step": 127130 }, { "epoch": 15.299638989169676, "grad_norm": 9042083.0, "learning_rate": 0.00018943056573582202, "loss": 7.5822, "step": 127140 }, { "epoch": 15.300842358604092, "grad_norm": 42575684.0, "learning_rate": 0.00018942886342554087, "loss": 7.642, "step": 127150 }, { "epoch": 15.302045728038507, "grad_norm": 27993732.0, "learning_rate": 0.0001894271609858341, "loss": 7.5568, "step": 127160 }, { "epoch": 15.303249097472925, "grad_norm": 23491200.0, "learning_rate": 0.00018942545841670416, "loss": 7.5542, "step": 127170 }, { "epoch": 15.30445246690734, "grad_norm": 44398652.0, "learning_rate": 0.0001894237557181535, "loss": 7.6108, "step": 127180 }, { "epoch": 15.305655836341757, "grad_norm": 14377230.0, "learning_rate": 0.00018942205289018457, "loss": 7.5937, "step": 127190 }, { "epoch": 15.306859205776174, "grad_norm": 45386232.0, "learning_rate": 0.00018942034993279987, "loss": 7.6048, "step": 127200 }, { "epoch": 15.30806257521059, "grad_norm": 13268388.0, "learning_rate": 0.00018941864684600185, "loss": 7.5048, "step": 127210 }, { "epoch": 15.309265944645006, "grad_norm": 22896958.0, "learning_rate": 0.00018941694362979296, "loss": 7.6437, "step": 127220 }, { "epoch": 15.310469314079423, "grad_norm": 12896894.0, "learning_rate": 0.00018941524028417568, "loss": 7.6753, "step": 127230 }, { "epoch": 15.311672683513839, "grad_norm": 33137488.0, "learning_rate": 0.00018941353680915248, "loss": 7.7099, "step": 127240 }, { "epoch": 15.312876052948255, "grad_norm": 28891676.0, "learning_rate": 0.0001894118332047258, "loss": 7.6516, "step": 127250 }, { "epoch": 15.314079422382672, "grad_norm": 46271324.0, "learning_rate": 0.00018941012947089813, "loss": 7.583, "step": 127260 }, { "epoch": 15.315282791817088, "grad_norm": 62318632.0, "learning_rate": 0.00018940842560767195, "loss": 7.6459, "step": 127270 }, { "epoch": 15.316486161251504, "grad_norm": 33538558.0, "learning_rate": 0.00018940672161504965, "loss": 7.6399, "step": 127280 }, { "epoch": 15.31768953068592, "grad_norm": 22386000.0, "learning_rate": 0.0001894050174930338, "loss": 7.5709, "step": 127290 }, { "epoch": 15.318892900120337, "grad_norm": 19844442.0, "learning_rate": 0.0001894033132416268, "loss": 7.6348, "step": 127300 }, { "epoch": 15.320096269554753, "grad_norm": 78055824.0, "learning_rate": 0.00018940160886083112, "loss": 7.6563, "step": 127310 }, { "epoch": 15.321299638989169, "grad_norm": 32610438.0, "learning_rate": 0.00018939990435064928, "loss": 7.7332, "step": 127320 }, { "epoch": 15.322503008423586, "grad_norm": 22330076.0, "learning_rate": 0.00018939819971108363, "loss": 7.6675, "step": 127330 }, { "epoch": 15.323706377858002, "grad_norm": 75460280.0, "learning_rate": 0.00018939649494213677, "loss": 7.7185, "step": 127340 }, { "epoch": 15.324909747292418, "grad_norm": 75515872.0, "learning_rate": 0.00018939479004381112, "loss": 7.6141, "step": 127350 }, { "epoch": 15.326113116726836, "grad_norm": 178747952.0, "learning_rate": 0.0001893930850161091, "loss": 7.6413, "step": 127360 }, { "epoch": 15.327316486161251, "grad_norm": 57875368.0, "learning_rate": 0.00018939137985903327, "loss": 7.6971, "step": 127370 }, { "epoch": 15.328519855595667, "grad_norm": 1167977.25, "learning_rate": 0.00018938967457258598, "loss": 7.6191, "step": 127380 }, { "epoch": 15.329723225030085, "grad_norm": 51320640.0, "learning_rate": 0.0001893879691567698, "loss": 7.6472, "step": 127390 }, { "epoch": 15.3309265944645, "grad_norm": 106197.671875, "learning_rate": 0.00018938626361158714, "loss": 7.6916, "step": 127400 }, { "epoch": 15.332129963898916, "grad_norm": 1414810435584.0, "learning_rate": 0.0001893845579370405, "loss": 7.5789, "step": 127410 }, { "epoch": 15.333333333333334, "grad_norm": 2504443035648.0, "learning_rate": 0.00018938285213313232, "loss": 7.8044, "step": 127420 }, { "epoch": 15.33453670276775, "grad_norm": 3258781532160.0, "learning_rate": 0.0001893811461998651, "loss": 7.7567, "step": 127430 }, { "epoch": 15.335740072202166, "grad_norm": 1773208600576.0, "learning_rate": 0.0001893794401372413, "loss": 7.9074, "step": 127440 }, { "epoch": 15.336943441636583, "grad_norm": 3807304744960.0, "learning_rate": 0.00018937773394526336, "loss": 7.8283, "step": 127450 }, { "epoch": 15.338146811070999, "grad_norm": 1970687705088.0, "learning_rate": 0.00018937602762393382, "loss": 7.7362, "step": 127460 }, { "epoch": 15.339350180505415, "grad_norm": 2991477489664.0, "learning_rate": 0.00018937432117325503, "loss": 7.8072, "step": 127470 }, { "epoch": 15.340553549939832, "grad_norm": 8188653469696.0, "learning_rate": 0.0001893726145932296, "loss": 7.8177, "step": 127480 }, { "epoch": 15.341756919374248, "grad_norm": 3692498518016.0, "learning_rate": 0.0001893709078838599, "loss": 7.7759, "step": 127490 }, { "epoch": 15.342960288808664, "grad_norm": 813133398016.0, "learning_rate": 0.00018936920104514843, "loss": 7.8943, "step": 127500 }, { "epoch": 15.344163658243081, "grad_norm": 2547295977472.0, "learning_rate": 0.00018936749407709767, "loss": 7.8693, "step": 127510 }, { "epoch": 15.345367027677497, "grad_norm": 1573141.875, "learning_rate": 0.0001893657869797101, "loss": 7.908, "step": 127520 }, { "epoch": 15.346570397111913, "grad_norm": 9042798.0, "learning_rate": 0.00018936407975298812, "loss": 7.9104, "step": 127530 }, { "epoch": 15.34777376654633, "grad_norm": 19842218.0, "learning_rate": 0.00018936237239693427, "loss": 7.9035, "step": 127540 }, { "epoch": 15.348977135980746, "grad_norm": 19952302.0, "learning_rate": 0.00018936066491155102, "loss": 7.9823, "step": 127550 }, { "epoch": 15.350180505415162, "grad_norm": 7535808.0, "learning_rate": 0.00018935895729684082, "loss": 7.9795, "step": 127560 }, { "epoch": 15.35138387484958, "grad_norm": 2834054.75, "learning_rate": 0.00018935724955280615, "loss": 7.8004, "step": 127570 }, { "epoch": 15.352587244283995, "grad_norm": 7588425.0, "learning_rate": 0.00018935554167944947, "loss": 7.9261, "step": 127580 }, { "epoch": 15.353790613718411, "grad_norm": 10952721.0, "learning_rate": 0.00018935383367677327, "loss": 7.9108, "step": 127590 }, { "epoch": 15.354993983152827, "grad_norm": 2211648.25, "learning_rate": 0.00018935212554478, "loss": 7.9316, "step": 127600 }, { "epoch": 15.356197352587245, "grad_norm": 1134086.625, "learning_rate": 0.00018935041728347218, "loss": 7.8866, "step": 127610 }, { "epoch": 15.35740072202166, "grad_norm": 6018494.5, "learning_rate": 0.0001893487088928522, "loss": 7.8616, "step": 127620 }, { "epoch": 15.358604091456076, "grad_norm": 6723438.5, "learning_rate": 0.00018934700037292261, "loss": 7.8405, "step": 127630 }, { "epoch": 15.359807460890494, "grad_norm": 5646049.5, "learning_rate": 0.00018934529172368584, "loss": 7.8521, "step": 127640 }, { "epoch": 15.36101083032491, "grad_norm": 1505657.25, "learning_rate": 0.00018934358294514438, "loss": 7.8557, "step": 127650 }, { "epoch": 15.362214199759325, "grad_norm": 8570042.0, "learning_rate": 0.0001893418740373007, "loss": 7.7837, "step": 127660 }, { "epoch": 15.363417569193743, "grad_norm": 926838.625, "learning_rate": 0.00018934016500015725, "loss": 7.8622, "step": 127670 }, { "epoch": 15.364620938628159, "grad_norm": 4178495.0, "learning_rate": 0.00018933845583371653, "loss": 7.8453, "step": 127680 }, { "epoch": 15.365824308062574, "grad_norm": 5984799.0, "learning_rate": 0.000189336746537981, "loss": 7.868, "step": 127690 }, { "epoch": 15.367027677496992, "grad_norm": 13215079.0, "learning_rate": 0.00018933503711295315, "loss": 7.9283, "step": 127700 }, { "epoch": 15.368231046931408, "grad_norm": 8529610.0, "learning_rate": 0.00018933332755863546, "loss": 7.8827, "step": 127710 }, { "epoch": 15.369434416365824, "grad_norm": 11834520.0, "learning_rate": 0.00018933161787503038, "loss": 7.9105, "step": 127720 }, { "epoch": 15.370637785800241, "grad_norm": 20368342.0, "learning_rate": 0.0001893299080621404, "loss": 7.896, "step": 127730 }, { "epoch": 15.371841155234657, "grad_norm": 11510814.0, "learning_rate": 0.00018932819811996797, "loss": 7.9498, "step": 127740 }, { "epoch": 15.373044524669073, "grad_norm": 6029462.5, "learning_rate": 0.00018932648804851558, "loss": 7.9647, "step": 127750 }, { "epoch": 15.37424789410349, "grad_norm": 62824980.0, "learning_rate": 0.00018932477784778574, "loss": 7.8644, "step": 127760 }, { "epoch": 15.375451263537906, "grad_norm": 6200466.0, "learning_rate": 0.0001893230675177809, "loss": 7.9306, "step": 127770 }, { "epoch": 15.376654632972322, "grad_norm": 31674432.0, "learning_rate": 0.00018932135705850348, "loss": 7.8448, "step": 127780 }, { "epoch": 15.37785800240674, "grad_norm": 17501450.0, "learning_rate": 0.000189319646469956, "loss": 7.9711, "step": 127790 }, { "epoch": 15.379061371841155, "grad_norm": 6270009.5, "learning_rate": 0.000189317935752141, "loss": 7.9291, "step": 127800 }, { "epoch": 15.380264741275571, "grad_norm": 15273657.0, "learning_rate": 0.00018931622490506086, "loss": 7.9161, "step": 127810 }, { "epoch": 15.381468110709989, "grad_norm": 9113166.0, "learning_rate": 0.00018931451392871807, "loss": 7.9603, "step": 127820 }, { "epoch": 15.382671480144404, "grad_norm": 11408706.0, "learning_rate": 0.00018931280282311516, "loss": 7.9122, "step": 127830 }, { "epoch": 15.38387484957882, "grad_norm": 32239620.0, "learning_rate": 0.00018931109158825456, "loss": 7.9983, "step": 127840 }, { "epoch": 15.385078219013238, "grad_norm": 7070946.0, "learning_rate": 0.00018930938022413877, "loss": 7.8207, "step": 127850 }, { "epoch": 15.386281588447654, "grad_norm": 29044062.0, "learning_rate": 0.00018930766873077024, "loss": 8.0038, "step": 127860 }, { "epoch": 15.38748495788207, "grad_norm": 18711484.0, "learning_rate": 0.00018930595710815146, "loss": 7.9539, "step": 127870 }, { "epoch": 15.388688327316487, "grad_norm": 24030608.0, "learning_rate": 0.00018930424535628496, "loss": 7.9431, "step": 127880 }, { "epoch": 15.389891696750903, "grad_norm": 20224566.0, "learning_rate": 0.0001893025334751731, "loss": 8.0645, "step": 127890 }, { "epoch": 15.391095066185319, "grad_norm": 18130146.0, "learning_rate": 0.00018930082146481847, "loss": 7.9791, "step": 127900 }, { "epoch": 15.392298435619736, "grad_norm": 16162494.0, "learning_rate": 0.00018929910932522347, "loss": 7.9798, "step": 127910 }, { "epoch": 15.393501805054152, "grad_norm": 11518512.0, "learning_rate": 0.00018929739705639067, "loss": 7.9174, "step": 127920 }, { "epoch": 15.394705174488568, "grad_norm": 10792011.0, "learning_rate": 0.00018929568465832244, "loss": 7.9288, "step": 127930 }, { "epoch": 15.395908543922985, "grad_norm": 4750258.0, "learning_rate": 0.00018929397213102132, "loss": 8.0279, "step": 127940 }, { "epoch": 15.397111913357401, "grad_norm": 10598107.0, "learning_rate": 0.00018929225947448978, "loss": 7.8804, "step": 127950 }, { "epoch": 15.398315282791817, "grad_norm": 21077748.0, "learning_rate": 0.0001892905466887303, "loss": 7.974, "step": 127960 }, { "epoch": 15.399518652226233, "grad_norm": 6635778.5, "learning_rate": 0.00018928883377374533, "loss": 7.803, "step": 127970 }, { "epoch": 15.40072202166065, "grad_norm": 16265248.0, "learning_rate": 0.0001892871207295374, "loss": 7.9542, "step": 127980 }, { "epoch": 15.401925391095066, "grad_norm": 11509844.0, "learning_rate": 0.00018928540755610895, "loss": 7.8735, "step": 127990 }, { "epoch": 15.403128760529482, "grad_norm": 2122466.75, "learning_rate": 0.00018928369425346247, "loss": 7.8465, "step": 128000 }, { "epoch": 15.4043321299639, "grad_norm": 7207854.0, "learning_rate": 0.00018928198082160044, "loss": 7.9605, "step": 128010 }, { "epoch": 15.405535499398315, "grad_norm": 28157966.0, "learning_rate": 0.00018928026726052535, "loss": 8.047, "step": 128020 }, { "epoch": 15.406738868832731, "grad_norm": 45350984.0, "learning_rate": 0.00018927855357023967, "loss": 7.9673, "step": 128030 }, { "epoch": 15.407942238267148, "grad_norm": 70852912.0, "learning_rate": 0.00018927683975074588, "loss": 7.9313, "step": 128040 }, { "epoch": 15.409145607701564, "grad_norm": 9853865.0, "learning_rate": 0.00018927512580204644, "loss": 7.9559, "step": 128050 }, { "epoch": 15.41034897713598, "grad_norm": 17501476.0, "learning_rate": 0.00018927341172414388, "loss": 7.9305, "step": 128060 }, { "epoch": 15.411552346570398, "grad_norm": 23052134.0, "learning_rate": 0.00018927169751704062, "loss": 7.9331, "step": 128070 }, { "epoch": 15.412755716004813, "grad_norm": 56401612.0, "learning_rate": 0.00018926998318073924, "loss": 7.9313, "step": 128080 }, { "epoch": 15.41395908543923, "grad_norm": 30189430.0, "learning_rate": 0.0001892682687152421, "loss": 8.0057, "step": 128090 }, { "epoch": 15.415162454873647, "grad_norm": 84842288.0, "learning_rate": 0.00018926655412055174, "loss": 7.9918, "step": 128100 }, { "epoch": 15.416365824308063, "grad_norm": 58338020.0, "learning_rate": 0.00018926483939667063, "loss": 8.0324, "step": 128110 }, { "epoch": 15.417569193742478, "grad_norm": 33805.7890625, "learning_rate": 0.00018926312454360127, "loss": 8.0076, "step": 128120 }, { "epoch": 15.418772563176896, "grad_norm": 71840.484375, "learning_rate": 0.00018926140956134613, "loss": 8.0435, "step": 128130 }, { "epoch": 15.419975932611312, "grad_norm": 43471.79296875, "learning_rate": 0.0001892596944499077, "loss": 7.7596, "step": 128140 }, { "epoch": 15.421179302045728, "grad_norm": 51212.53125, "learning_rate": 0.00018925797920928844, "loss": 7.7364, "step": 128150 }, { "epoch": 15.422382671480145, "grad_norm": 39187.37890625, "learning_rate": 0.00018925626383949084, "loss": 7.728, "step": 128160 }, { "epoch": 15.42358604091456, "grad_norm": 14409.4521484375, "learning_rate": 0.00018925454834051744, "loss": 7.7351, "step": 128170 }, { "epoch": 15.424789410348977, "grad_norm": 116253.59375, "learning_rate": 0.00018925283271237063, "loss": 7.6738, "step": 128180 }, { "epoch": 15.425992779783394, "grad_norm": 95810.2578125, "learning_rate": 0.00018925111695505294, "loss": 7.7214, "step": 128190 }, { "epoch": 15.42719614921781, "grad_norm": 106774.734375, "learning_rate": 0.00018924940106856684, "loss": 7.6666, "step": 128200 }, { "epoch": 15.428399518652226, "grad_norm": 44819.26171875, "learning_rate": 0.00018924768505291484, "loss": 7.686, "step": 128210 }, { "epoch": 15.429602888086643, "grad_norm": 32586.5625, "learning_rate": 0.0001892459689080994, "loss": 7.5878, "step": 128220 }, { "epoch": 15.43080625752106, "grad_norm": 62129.98828125, "learning_rate": 0.000189244252634123, "loss": 7.6324, "step": 128230 }, { "epoch": 15.432009626955475, "grad_norm": 40304.99609375, "learning_rate": 0.00018924253623098814, "loss": 7.6888, "step": 128240 }, { "epoch": 15.433212996389893, "grad_norm": 52973.91796875, "learning_rate": 0.0001892408196986973, "loss": 7.6634, "step": 128250 }, { "epoch": 15.434416365824308, "grad_norm": 69154.9140625, "learning_rate": 0.00018923910303725295, "loss": 7.6284, "step": 128260 }, { "epoch": 15.435619735258724, "grad_norm": 37306.50390625, "learning_rate": 0.0001892373862466576, "loss": 7.6516, "step": 128270 }, { "epoch": 15.43682310469314, "grad_norm": 129092.2578125, "learning_rate": 0.0001892356693269137, "loss": 7.5421, "step": 128280 }, { "epoch": 15.438026474127557, "grad_norm": 6688697.5, "learning_rate": 0.00018923395227802377, "loss": 7.6009, "step": 128290 }, { "epoch": 15.439229843561973, "grad_norm": 20396596.0, "learning_rate": 0.00018923223509999026, "loss": 7.6637, "step": 128300 }, { "epoch": 15.440433212996389, "grad_norm": 30236666.0, "learning_rate": 0.0001892305177928157, "loss": 7.5875, "step": 128310 }, { "epoch": 15.441636582430807, "grad_norm": 7551369.0, "learning_rate": 0.00018922880035650253, "loss": 7.5586, "step": 128320 }, { "epoch": 15.442839951865222, "grad_norm": 57299056.0, "learning_rate": 0.0001892270827910533, "loss": 7.6573, "step": 128330 }, { "epoch": 15.444043321299638, "grad_norm": 14870585.0, "learning_rate": 0.00018922536509647042, "loss": 7.565, "step": 128340 }, { "epoch": 15.445246690734056, "grad_norm": 33727032.0, "learning_rate": 0.0001892236472727564, "loss": 7.7003, "step": 128350 }, { "epoch": 15.446450060168472, "grad_norm": 6558484.5, "learning_rate": 0.00018922192931991374, "loss": 7.6156, "step": 128360 }, { "epoch": 15.447653429602887, "grad_norm": 49388.5, "learning_rate": 0.00018922021123794492, "loss": 7.589, "step": 128370 }, { "epoch": 15.448856799037305, "grad_norm": 70256.1796875, "learning_rate": 0.00018921849302685245, "loss": 7.5735, "step": 128380 }, { "epoch": 15.45006016847172, "grad_norm": 40877.8828125, "learning_rate": 0.00018921677468663876, "loss": 7.6634, "step": 128390 }, { "epoch": 15.451263537906136, "grad_norm": 109990.078125, "learning_rate": 0.00018921505621730637, "loss": 7.608, "step": 128400 }, { "epoch": 15.452466907340554, "grad_norm": 63311.01171875, "learning_rate": 0.0001892133376188578, "loss": 7.6643, "step": 128410 }, { "epoch": 15.45367027677497, "grad_norm": 17717.41015625, "learning_rate": 0.00018921161889129547, "loss": 7.5901, "step": 128420 }, { "epoch": 15.454873646209386, "grad_norm": 65983.3984375, "learning_rate": 0.00018920990003462192, "loss": 7.6224, "step": 128430 }, { "epoch": 15.456077015643803, "grad_norm": 37821.62890625, "learning_rate": 0.0001892081810488396, "loss": 7.5199, "step": 128440 }, { "epoch": 15.457280385078219, "grad_norm": 65781.1484375, "learning_rate": 0.00018920646193395103, "loss": 7.5692, "step": 128450 }, { "epoch": 15.458483754512635, "grad_norm": 75545.5078125, "learning_rate": 0.00018920474268995869, "loss": 7.5532, "step": 128460 }, { "epoch": 15.459687123947052, "grad_norm": 20159.916015625, "learning_rate": 0.00018920302331686508, "loss": 7.5708, "step": 128470 }, { "epoch": 15.460890493381468, "grad_norm": 65203.3515625, "learning_rate": 0.00018920130381467262, "loss": 7.5549, "step": 128480 }, { "epoch": 15.462093862815884, "grad_norm": 24130.107421875, "learning_rate": 0.0001891995841833839, "loss": 7.5831, "step": 128490 }, { "epoch": 15.463297232250302, "grad_norm": 13654795.0, "learning_rate": 0.00018919786442300133, "loss": 7.5671, "step": 128500 }, { "epoch": 15.464500601684717, "grad_norm": 21652528.0, "learning_rate": 0.00018919614453352746, "loss": 7.6134, "step": 128510 }, { "epoch": 15.465703971119133, "grad_norm": 7533261.5, "learning_rate": 0.00018919442451496472, "loss": 7.5694, "step": 128520 }, { "epoch": 15.46690734055355, "grad_norm": 18992542.0, "learning_rate": 0.00018919270436731561, "loss": 7.6579, "step": 128530 }, { "epoch": 15.468110709987966, "grad_norm": 6445664.5, "learning_rate": 0.0001891909840905827, "loss": 7.5958, "step": 128540 }, { "epoch": 15.469314079422382, "grad_norm": 41615272.0, "learning_rate": 0.00018918926368476834, "loss": 7.6084, "step": 128550 }, { "epoch": 15.4705174488568, "grad_norm": 8817272.0, "learning_rate": 0.00018918754314987515, "loss": 7.5406, "step": 128560 }, { "epoch": 15.471720818291216, "grad_norm": 4685589.5, "learning_rate": 0.00018918582248590555, "loss": 7.5583, "step": 128570 }, { "epoch": 15.472924187725631, "grad_norm": 1491868.625, "learning_rate": 0.00018918410169286201, "loss": 7.6372, "step": 128580 }, { "epoch": 15.474127557160049, "grad_norm": 12772666.0, "learning_rate": 0.0001891823807707471, "loss": 7.6059, "step": 128590 }, { "epoch": 15.475330926594465, "grad_norm": 4786470.5, "learning_rate": 0.00018918065971956324, "loss": 7.6642, "step": 128600 }, { "epoch": 15.47653429602888, "grad_norm": 2048514.625, "learning_rate": 0.00018917893853931297, "loss": 7.6305, "step": 128610 }, { "epoch": 15.477737665463298, "grad_norm": 4387916.0, "learning_rate": 0.00018917721722999878, "loss": 7.6003, "step": 128620 }, { "epoch": 15.478941034897714, "grad_norm": 3760863.5, "learning_rate": 0.0001891754957916231, "loss": 7.6605, "step": 128630 }, { "epoch": 15.48014440433213, "grad_norm": 9387811.0, "learning_rate": 0.00018917377422418845, "loss": 7.6452, "step": 128640 }, { "epoch": 15.481347773766545, "grad_norm": 3026744.25, "learning_rate": 0.00018917205252769737, "loss": 7.6199, "step": 128650 }, { "epoch": 15.482551143200963, "grad_norm": 8218311.0, "learning_rate": 0.0001891703307021523, "loss": 7.6202, "step": 128660 }, { "epoch": 15.483754512635379, "grad_norm": 10987606.0, "learning_rate": 0.00018916860874755573, "loss": 7.6613, "step": 128670 }, { "epoch": 15.484957882069795, "grad_norm": 13410976.0, "learning_rate": 0.00018916688666391018, "loss": 7.544, "step": 128680 }, { "epoch": 15.486161251504212, "grad_norm": 4956526.5, "learning_rate": 0.00018916516445121811, "loss": 7.5892, "step": 128690 }, { "epoch": 15.487364620938628, "grad_norm": 11888376.0, "learning_rate": 0.00018916344210948206, "loss": 7.6252, "step": 128700 }, { "epoch": 15.488567990373044, "grad_norm": 10391656.0, "learning_rate": 0.0001891617196387045, "loss": 7.5488, "step": 128710 }, { "epoch": 15.489771359807461, "grad_norm": 11783991.0, "learning_rate": 0.0001891599970388879, "loss": 7.5685, "step": 128720 }, { "epoch": 15.490974729241877, "grad_norm": 30776322.0, "learning_rate": 0.0001891582743100348, "loss": 7.6083, "step": 128730 }, { "epoch": 15.492178098676293, "grad_norm": 8654180.0, "learning_rate": 0.00018915655145214765, "loss": 7.5361, "step": 128740 }, { "epoch": 15.49338146811071, "grad_norm": 4086228.5, "learning_rate": 0.00018915482846522893, "loss": 7.6417, "step": 128750 }, { "epoch": 15.494584837545126, "grad_norm": 13028856.0, "learning_rate": 0.00018915310534928117, "loss": 7.6083, "step": 128760 }, { "epoch": 15.495788206979542, "grad_norm": 7764.36767578125, "learning_rate": 0.0001891513821043069, "loss": 7.5509, "step": 128770 }, { "epoch": 15.49699157641396, "grad_norm": 23228.30859375, "learning_rate": 0.00018914965873030853, "loss": 7.5357, "step": 128780 }, { "epoch": 15.498194945848375, "grad_norm": 14338.8427734375, "learning_rate": 0.0001891479352272886, "loss": 7.5521, "step": 128790 }, { "epoch": 15.499398315282791, "grad_norm": 84327.7890625, "learning_rate": 0.00018914621159524958, "loss": 7.682, "step": 128800 }, { "epoch": 15.500601684717209, "grad_norm": 9639.7568359375, "learning_rate": 0.000189144487834194, "loss": 7.6499, "step": 128810 }, { "epoch": 15.501805054151625, "grad_norm": 7120.83203125, "learning_rate": 0.00018914276394412432, "loss": 7.574, "step": 128820 }, { "epoch": 15.50300842358604, "grad_norm": 16379.93359375, "learning_rate": 0.0001891410399250431, "loss": 7.6814, "step": 128830 }, { "epoch": 15.504211793020458, "grad_norm": 5513.794921875, "learning_rate": 0.00018913931577695277, "loss": 7.6113, "step": 128840 }, { "epoch": 15.505415162454874, "grad_norm": 24535.748046875, "learning_rate": 0.00018913759149985582, "loss": 7.4889, "step": 128850 }, { "epoch": 15.50661853188929, "grad_norm": 7957.34375, "learning_rate": 0.00018913586709375476, "loss": 7.5974, "step": 128860 }, { "epoch": 15.507821901323707, "grad_norm": 10026.1513671875, "learning_rate": 0.00018913414255865212, "loss": 7.6244, "step": 128870 }, { "epoch": 15.509025270758123, "grad_norm": 21741.625, "learning_rate": 0.00018913241789455035, "loss": 7.5943, "step": 128880 }, { "epoch": 15.510228640192539, "grad_norm": 21506.59375, "learning_rate": 0.000189130693101452, "loss": 7.4769, "step": 128890 }, { "epoch": 15.511432009626956, "grad_norm": 14295.814453125, "learning_rate": 0.0001891289681793595, "loss": 7.6081, "step": 128900 }, { "epoch": 15.512635379061372, "grad_norm": 5405.8916015625, "learning_rate": 0.00018912724312827539, "loss": 7.5695, "step": 128910 }, { "epoch": 15.513838748495788, "grad_norm": 10172.646484375, "learning_rate": 0.00018912551794820215, "loss": 7.5735, "step": 128920 }, { "epoch": 15.515042117930205, "grad_norm": 12082.6923828125, "learning_rate": 0.00018912379263914225, "loss": 7.4674, "step": 128930 }, { "epoch": 15.516245487364621, "grad_norm": 9141.341796875, "learning_rate": 0.00018912206720109827, "loss": 7.5755, "step": 128940 }, { "epoch": 15.517448856799037, "grad_norm": 19292.23828125, "learning_rate": 0.00018912034163407263, "loss": 7.5052, "step": 128950 }, { "epoch": 15.518652226233453, "grad_norm": 10430.6025390625, "learning_rate": 0.00018911861593806786, "loss": 7.5322, "step": 128960 }, { "epoch": 15.51985559566787, "grad_norm": 17314.849609375, "learning_rate": 0.00018911689011308642, "loss": 7.5471, "step": 128970 }, { "epoch": 15.521058965102286, "grad_norm": 6586.01025390625, "learning_rate": 0.00018911516415913088, "loss": 7.5709, "step": 128980 }, { "epoch": 15.522262334536702, "grad_norm": 6877.32568359375, "learning_rate": 0.00018911343807620368, "loss": 7.5245, "step": 128990 }, { "epoch": 15.52346570397112, "grad_norm": 4980.56982421875, "learning_rate": 0.00018911171186430733, "loss": 7.57, "step": 129000 }, { "epoch": 15.524669073405535, "grad_norm": 5770.44287109375, "learning_rate": 0.00018910998552344434, "loss": 7.5478, "step": 129010 }, { "epoch": 15.525872442839951, "grad_norm": 4519.296875, "learning_rate": 0.0001891082590536172, "loss": 7.4952, "step": 129020 }, { "epoch": 15.527075812274369, "grad_norm": 16958.93359375, "learning_rate": 0.00018910653245482839, "loss": 7.5302, "step": 129030 }, { "epoch": 15.528279181708784, "grad_norm": 25330.244140625, "learning_rate": 0.00018910480572708046, "loss": 7.5386, "step": 129040 }, { "epoch": 15.5294825511432, "grad_norm": 5226.640625, "learning_rate": 0.00018910307887037587, "loss": 7.5301, "step": 129050 }, { "epoch": 15.530685920577618, "grad_norm": 9514.9990234375, "learning_rate": 0.0001891013518847171, "loss": 7.4962, "step": 129060 }, { "epoch": 15.531889290012034, "grad_norm": 3317.2431640625, "learning_rate": 0.0001890996247701067, "loss": 7.5716, "step": 129070 }, { "epoch": 15.53309265944645, "grad_norm": 8102.52880859375, "learning_rate": 0.0001890978975265471, "loss": 7.5251, "step": 129080 }, { "epoch": 15.534296028880867, "grad_norm": 6283.49169921875, "learning_rate": 0.00018909617015404087, "loss": 7.5218, "step": 129090 }, { "epoch": 15.535499398315283, "grad_norm": 3155.3505859375, "learning_rate": 0.0001890944426525905, "loss": 7.5427, "step": 129100 }, { "epoch": 15.536702767749698, "grad_norm": 2341.677978515625, "learning_rate": 0.00018909271502219848, "loss": 7.5057, "step": 129110 }, { "epoch": 15.537906137184116, "grad_norm": 15337.583984375, "learning_rate": 0.00018909098726286732, "loss": 7.5648, "step": 129120 }, { "epoch": 15.539109506618532, "grad_norm": 25096.537109375, "learning_rate": 0.00018908925937459947, "loss": 7.5205, "step": 129130 }, { "epoch": 15.540312876052948, "grad_norm": 7579.16015625, "learning_rate": 0.00018908753135739748, "loss": 7.5733, "step": 129140 }, { "epoch": 15.541516245487365, "grad_norm": 17752.521484375, "learning_rate": 0.0001890858032112638, "loss": 7.5869, "step": 129150 }, { "epoch": 15.542719614921781, "grad_norm": 8619.4423828125, "learning_rate": 0.00018908407493620102, "loss": 7.4921, "step": 129160 }, { "epoch": 15.543922984356197, "grad_norm": 302.3937683105469, "learning_rate": 0.00018908234653221157, "loss": 7.5666, "step": 129170 }, { "epoch": 15.545126353790614, "grad_norm": 673.1851806640625, "learning_rate": 0.00018908061799929798, "loss": 7.6795, "step": 129180 }, { "epoch": 15.54632972322503, "grad_norm": 344.5409851074219, "learning_rate": 0.00018907888933746274, "loss": 7.707, "step": 129190 }, { "epoch": 15.547533092659446, "grad_norm": 573.9432373046875, "learning_rate": 0.00018907716054670834, "loss": 7.5703, "step": 129200 }, { "epoch": 15.548736462093864, "grad_norm": 312.3635559082031, "learning_rate": 0.0001890754316270373, "loss": 7.7668, "step": 129210 }, { "epoch": 15.54993983152828, "grad_norm": 326.55804443359375, "learning_rate": 0.00018907370257845215, "loss": 7.5852, "step": 129220 }, { "epoch": 15.551143200962695, "grad_norm": 838.5820922851562, "learning_rate": 0.0001890719734009553, "loss": 7.6044, "step": 129230 }, { "epoch": 15.552346570397113, "grad_norm": 297.2216796875, "learning_rate": 0.00018907024409454937, "loss": 7.6065, "step": 129240 }, { "epoch": 15.553549939831528, "grad_norm": 314.0319519042969, "learning_rate": 0.00018906851465923675, "loss": 7.594, "step": 129250 }, { "epoch": 15.554753309265944, "grad_norm": 51.431846618652344, "learning_rate": 0.00018906678509502008, "loss": 7.5334, "step": 129260 }, { "epoch": 15.555956678700362, "grad_norm": 77.34065246582031, "learning_rate": 0.00018906505540190171, "loss": 7.6404, "step": 129270 }, { "epoch": 15.557160048134778, "grad_norm": 138.8236846923828, "learning_rate": 0.00018906332557988424, "loss": 7.704, "step": 129280 }, { "epoch": 15.558363417569193, "grad_norm": 44.31373596191406, "learning_rate": 0.00018906159562897016, "loss": 7.5354, "step": 129290 }, { "epoch": 15.559566787003611, "grad_norm": 50.43186950683594, "learning_rate": 0.00018905986554916196, "loss": 7.6112, "step": 129300 }, { "epoch": 15.560770156438027, "grad_norm": 54.885650634765625, "learning_rate": 0.00018905813534046212, "loss": 7.4474, "step": 129310 }, { "epoch": 15.561973525872443, "grad_norm": 24.321998596191406, "learning_rate": 0.00018905640500287317, "loss": 7.5927, "step": 129320 }, { "epoch": 15.56317689530686, "grad_norm": 46.89988327026367, "learning_rate": 0.00018905467453639765, "loss": 7.597, "step": 129330 }, { "epoch": 15.564380264741276, "grad_norm": 23.91341209411621, "learning_rate": 0.000189052943941038, "loss": 7.6583, "step": 129340 }, { "epoch": 15.565583634175692, "grad_norm": 24.96449089050293, "learning_rate": 0.0001890512132167968, "loss": 7.6456, "step": 129350 }, { "epoch": 15.566787003610107, "grad_norm": 21.215200424194336, "learning_rate": 0.00018904948236367645, "loss": 7.5274, "step": 129360 }, { "epoch": 15.567990373044525, "grad_norm": 20.48535919189453, "learning_rate": 0.00018904775138167953, "loss": 7.6008, "step": 129370 }, { "epoch": 15.56919374247894, "grad_norm": 39.43841552734375, "learning_rate": 0.00018904602027080856, "loss": 7.6322, "step": 129380 }, { "epoch": 15.570397111913357, "grad_norm": 37.140445709228516, "learning_rate": 0.00018904428903106598, "loss": 7.5993, "step": 129390 }, { "epoch": 15.571600481347774, "grad_norm": 46.02839279174805, "learning_rate": 0.00018904255766245432, "loss": 7.5191, "step": 129400 }, { "epoch": 15.57280385078219, "grad_norm": 44.908042907714844, "learning_rate": 0.00018904082616497613, "loss": 7.4756, "step": 129410 }, { "epoch": 15.574007220216606, "grad_norm": 32.054500579833984, "learning_rate": 0.00018903909453863387, "loss": 7.4928, "step": 129420 }, { "epoch": 15.575210589651023, "grad_norm": 64.82672119140625, "learning_rate": 0.00018903736278343006, "loss": 7.5339, "step": 129430 }, { "epoch": 15.57641395908544, "grad_norm": 29.251670837402344, "learning_rate": 0.00018903563089936717, "loss": 7.5726, "step": 129440 }, { "epoch": 15.577617328519855, "grad_norm": 109.84314727783203, "learning_rate": 0.00018903389888644777, "loss": 7.6259, "step": 129450 }, { "epoch": 15.578820697954272, "grad_norm": 46.81904983520508, "learning_rate": 0.00018903216674467433, "loss": 7.6572, "step": 129460 }, { "epoch": 15.580024067388688, "grad_norm": 1282.04541015625, "learning_rate": 0.00018903043447404937, "loss": 7.5675, "step": 129470 }, { "epoch": 15.581227436823104, "grad_norm": 706.8040771484375, "learning_rate": 0.0001890287020745754, "loss": 7.5486, "step": 129480 }, { "epoch": 15.582430806257522, "grad_norm": 740.2406005859375, "learning_rate": 0.0001890269695462549, "loss": 7.617, "step": 129490 }, { "epoch": 15.583634175691937, "grad_norm": 295.137939453125, "learning_rate": 0.0001890252368890904, "loss": 7.5754, "step": 129500 }, { "epoch": 15.584837545126353, "grad_norm": 116.53155517578125, "learning_rate": 0.0001890235041030844, "loss": 7.4607, "step": 129510 }, { "epoch": 15.58604091456077, "grad_norm": 641.5328369140625, "learning_rate": 0.00018902177118823943, "loss": 7.5049, "step": 129520 }, { "epoch": 15.587244283995187, "grad_norm": 1330.26416015625, "learning_rate": 0.00018902003814455794, "loss": 7.4866, "step": 129530 }, { "epoch": 15.588447653429602, "grad_norm": 317.4263916015625, "learning_rate": 0.00018901830497204252, "loss": 7.5133, "step": 129540 }, { "epoch": 15.58965102286402, "grad_norm": 3190.15087890625, "learning_rate": 0.00018901657167069558, "loss": 7.4912, "step": 129550 }, { "epoch": 15.590854392298436, "grad_norm": 581.2105102539062, "learning_rate": 0.00018901483824051974, "loss": 7.3774, "step": 129560 }, { "epoch": 15.592057761732852, "grad_norm": 403.0574035644531, "learning_rate": 0.00018901310468151742, "loss": 7.65, "step": 129570 }, { "epoch": 15.593261131167269, "grad_norm": 646.2369384765625, "learning_rate": 0.00018901137099369114, "loss": 7.4794, "step": 129580 }, { "epoch": 15.594464500601685, "grad_norm": 116.70044708251953, "learning_rate": 0.0001890096371770435, "loss": 7.4795, "step": 129590 }, { "epoch": 15.5956678700361, "grad_norm": 284.9908447265625, "learning_rate": 0.0001890079032315769, "loss": 7.5945, "step": 129600 }, { "epoch": 15.596871239470518, "grad_norm": 1824.6612548828125, "learning_rate": 0.00018900616915729388, "loss": 7.6665, "step": 129610 }, { "epoch": 15.598074608904934, "grad_norm": 61.20644760131836, "learning_rate": 0.00018900443495419695, "loss": 7.5661, "step": 129620 }, { "epoch": 15.59927797833935, "grad_norm": 75.7002944946289, "learning_rate": 0.00018900270062228864, "loss": 7.4868, "step": 129630 }, { "epoch": 15.600481347773766, "grad_norm": 52.21888732910156, "learning_rate": 0.00018900096616157144, "loss": 7.5386, "step": 129640 }, { "epoch": 15.601684717208183, "grad_norm": 97.52935791015625, "learning_rate": 0.00018899923157204788, "loss": 7.387, "step": 129650 }, { "epoch": 15.602888086642599, "grad_norm": 92.35443878173828, "learning_rate": 0.00018899749685372049, "loss": 7.5255, "step": 129660 }, { "epoch": 15.604091456077015, "grad_norm": 95.04878997802734, "learning_rate": 0.00018899576200659168, "loss": 7.4623, "step": 129670 }, { "epoch": 15.605294825511432, "grad_norm": 196.79928588867188, "learning_rate": 0.00018899402703066408, "loss": 7.4881, "step": 129680 }, { "epoch": 15.606498194945848, "grad_norm": 178.7354736328125, "learning_rate": 0.00018899229192594015, "loss": 7.5186, "step": 129690 }, { "epoch": 15.607701564380264, "grad_norm": 1128.239501953125, "learning_rate": 0.00018899055669242238, "loss": 7.4829, "step": 129700 }, { "epoch": 15.608904933814681, "grad_norm": 316.3932800292969, "learning_rate": 0.0001889888213301133, "loss": 7.5183, "step": 129710 }, { "epoch": 15.610108303249097, "grad_norm": 203.64212036132812, "learning_rate": 0.00018898708583901542, "loss": 7.4503, "step": 129720 }, { "epoch": 15.611311672683513, "grad_norm": 636.5574951171875, "learning_rate": 0.0001889853502191313, "loss": 7.5411, "step": 129730 }, { "epoch": 15.61251504211793, "grad_norm": 1181.5462646484375, "learning_rate": 0.0001889836144704634, "loss": 7.4925, "step": 129740 }, { "epoch": 15.613718411552346, "grad_norm": 218.50665283203125, "learning_rate": 0.0001889818785930142, "loss": 7.5558, "step": 129750 }, { "epoch": 15.614921780986762, "grad_norm": 116.60340118408203, "learning_rate": 0.00018898014258678626, "loss": 7.5185, "step": 129760 }, { "epoch": 15.61612515042118, "grad_norm": 131.9979705810547, "learning_rate": 0.00018897840645178214, "loss": 7.4183, "step": 129770 }, { "epoch": 15.617328519855596, "grad_norm": 318.1040954589844, "learning_rate": 0.00018897667018800426, "loss": 7.4749, "step": 129780 }, { "epoch": 15.618531889290011, "grad_norm": 216.2886962890625, "learning_rate": 0.00018897493379545517, "loss": 7.4415, "step": 129790 }, { "epoch": 15.619735258724429, "grad_norm": 5524.82861328125, "learning_rate": 0.00018897319727413737, "loss": 7.3992, "step": 129800 }, { "epoch": 15.620938628158845, "grad_norm": 3024.58349609375, "learning_rate": 0.0001889714606240534, "loss": 7.4304, "step": 129810 }, { "epoch": 15.62214199759326, "grad_norm": 462.4556884765625, "learning_rate": 0.00018896972384520577, "loss": 7.4408, "step": 129820 }, { "epoch": 15.623345367027678, "grad_norm": 363.1320495605469, "learning_rate": 0.00018896798693759695, "loss": 7.4105, "step": 129830 }, { "epoch": 15.624548736462094, "grad_norm": 505.0877990722656, "learning_rate": 0.00018896624990122953, "loss": 7.4509, "step": 129840 }, { "epoch": 15.62575210589651, "grad_norm": 347.98187255859375, "learning_rate": 0.00018896451273610595, "loss": 7.4938, "step": 129850 }, { "epoch": 15.626955475330927, "grad_norm": 400.0885314941406, "learning_rate": 0.00018896277544222878, "loss": 7.4287, "step": 129860 }, { "epoch": 15.628158844765343, "grad_norm": 325.1827087402344, "learning_rate": 0.0001889610380196005, "loss": 7.3676, "step": 129870 }, { "epoch": 15.629362214199759, "grad_norm": 237.39175415039062, "learning_rate": 0.00018895930046822364, "loss": 7.4048, "step": 129880 }, { "epoch": 15.630565583634176, "grad_norm": 135.3329620361328, "learning_rate": 0.0001889575627881007, "loss": 7.4627, "step": 129890 }, { "epoch": 15.631768953068592, "grad_norm": 311.0946350097656, "learning_rate": 0.00018895582497923421, "loss": 7.375, "step": 129900 }, { "epoch": 15.632972322503008, "grad_norm": 204.72337341308594, "learning_rate": 0.00018895408704162664, "loss": 7.4442, "step": 129910 }, { "epoch": 15.634175691937426, "grad_norm": 301.5276184082031, "learning_rate": 0.0001889523489752806, "loss": 7.3823, "step": 129920 }, { "epoch": 15.635379061371841, "grad_norm": 360.1788635253906, "learning_rate": 0.0001889506107801985, "loss": 7.4466, "step": 129930 }, { "epoch": 15.636582430806257, "grad_norm": 267.43511962890625, "learning_rate": 0.00018894887245638294, "loss": 7.4446, "step": 129940 }, { "epoch": 15.637785800240675, "grad_norm": 401.21533203125, "learning_rate": 0.00018894713400383642, "loss": 7.4477, "step": 129950 }, { "epoch": 15.63898916967509, "grad_norm": 379.4913635253906, "learning_rate": 0.0001889453954225614, "loss": 7.4691, "step": 129960 }, { "epoch": 15.640192539109506, "grad_norm": 447.0564270019531, "learning_rate": 0.00018894365671256045, "loss": 7.4469, "step": 129970 }, { "epoch": 15.641395908543924, "grad_norm": 616.8658447265625, "learning_rate": 0.00018894191787383603, "loss": 7.3652, "step": 129980 }, { "epoch": 15.64259927797834, "grad_norm": 367.65478515625, "learning_rate": 0.00018894017890639075, "loss": 7.393, "step": 129990 }, { "epoch": 15.643802647412755, "grad_norm": 820.666259765625, "learning_rate": 0.00018893843981022704, "loss": 7.4179, "step": 130000 }, { "epoch": 15.645006016847173, "grad_norm": 387.88372802734375, "learning_rate": 0.00018893670058534747, "loss": 7.4676, "step": 130010 }, { "epoch": 15.646209386281589, "grad_norm": 443.2305908203125, "learning_rate": 0.00018893496123175454, "loss": 7.4074, "step": 130020 }, { "epoch": 15.647412755716005, "grad_norm": 919.445556640625, "learning_rate": 0.00018893322174945074, "loss": 7.4859, "step": 130030 }, { "epoch": 15.648616125150422, "grad_norm": 524.7247924804688, "learning_rate": 0.00018893148213843861, "loss": 7.4252, "step": 130040 }, { "epoch": 15.649819494584838, "grad_norm": 730.0946044921875, "learning_rate": 0.00018892974239872068, "loss": 7.4358, "step": 130050 }, { "epoch": 15.651022864019254, "grad_norm": 849.2393798828125, "learning_rate": 0.00018892800253029947, "loss": 7.4084, "step": 130060 }, { "epoch": 15.65222623345367, "grad_norm": 1203.624267578125, "learning_rate": 0.00018892626253317748, "loss": 7.3498, "step": 130070 }, { "epoch": 15.653429602888087, "grad_norm": 819.9821166992188, "learning_rate": 0.0001889245224073572, "loss": 7.4228, "step": 130080 }, { "epoch": 15.654632972322503, "grad_norm": 465.1644592285156, "learning_rate": 0.0001889227821528412, "loss": 7.4745, "step": 130090 }, { "epoch": 15.655836341756919, "grad_norm": 1157.712158203125, "learning_rate": 0.000188921041769632, "loss": 7.4576, "step": 130100 }, { "epoch": 15.657039711191336, "grad_norm": 1139.73876953125, "learning_rate": 0.00018891930125773207, "loss": 7.3787, "step": 130110 }, { "epoch": 15.658243080625752, "grad_norm": 817.1405639648438, "learning_rate": 0.000188917560617144, "loss": 7.4338, "step": 130120 }, { "epoch": 15.659446450060168, "grad_norm": 750.9105834960938, "learning_rate": 0.0001889158198478702, "loss": 7.4513, "step": 130130 }, { "epoch": 15.660649819494585, "grad_norm": 550.6473999023438, "learning_rate": 0.0001889140789499133, "loss": 7.4223, "step": 130140 }, { "epoch": 15.661853188929001, "grad_norm": 494.1597900390625, "learning_rate": 0.0001889123379232758, "loss": 7.4624, "step": 130150 }, { "epoch": 15.663056558363417, "grad_norm": 545.972412109375, "learning_rate": 0.00018891059676796017, "loss": 7.4263, "step": 130160 }, { "epoch": 15.664259927797834, "grad_norm": 373.7424621582031, "learning_rate": 0.00018890885548396896, "loss": 7.3634, "step": 130170 }, { "epoch": 15.66546329723225, "grad_norm": 1047.8740234375, "learning_rate": 0.00018890711407130466, "loss": 7.4613, "step": 130180 }, { "epoch": 15.666666666666666, "grad_norm": 503.9117126464844, "learning_rate": 0.00018890537252996983, "loss": 7.4797, "step": 130190 }, { "epoch": 15.667870036101084, "grad_norm": 813.9482421875, "learning_rate": 0.00018890363085996697, "loss": 7.5782, "step": 130200 }, { "epoch": 15.6690734055355, "grad_norm": 656.7713623046875, "learning_rate": 0.00018890188906129862, "loss": 7.482, "step": 130210 }, { "epoch": 15.670276774969915, "grad_norm": 551.8267822265625, "learning_rate": 0.00018890014713396727, "loss": 7.4858, "step": 130220 }, { "epoch": 15.671480144404333, "grad_norm": 595.2601318359375, "learning_rate": 0.00018889840507797548, "loss": 7.4328, "step": 130230 }, { "epoch": 15.672683513838749, "grad_norm": 447.2945861816406, "learning_rate": 0.00018889666289332574, "loss": 7.419, "step": 130240 }, { "epoch": 15.673886883273164, "grad_norm": 1281.6627197265625, "learning_rate": 0.00018889492058002055, "loss": 7.4653, "step": 130250 }, { "epoch": 15.675090252707582, "grad_norm": 1769.7283935546875, "learning_rate": 0.0001888931781380625, "loss": 7.524, "step": 130260 }, { "epoch": 15.676293622141998, "grad_norm": 768.0513305664062, "learning_rate": 0.00018889143556745407, "loss": 7.4309, "step": 130270 }, { "epoch": 15.677496991576414, "grad_norm": 1498.276123046875, "learning_rate": 0.0001888896928681978, "loss": 7.3979, "step": 130280 }, { "epoch": 15.678700361010831, "grad_norm": 626.4533081054688, "learning_rate": 0.00018888795004029618, "loss": 7.4299, "step": 130290 }, { "epoch": 15.679903730445247, "grad_norm": 1042.447265625, "learning_rate": 0.00018888620708375177, "loss": 7.3081, "step": 130300 }, { "epoch": 15.681107099879663, "grad_norm": 898.1484375, "learning_rate": 0.00018888446399856703, "loss": 7.3163, "step": 130310 }, { "epoch": 15.68231046931408, "grad_norm": 586.4484252929688, "learning_rate": 0.00018888272078474455, "loss": 7.3992, "step": 130320 }, { "epoch": 15.683513838748496, "grad_norm": 432.5578308105469, "learning_rate": 0.00018888097744228683, "loss": 7.4685, "step": 130330 }, { "epoch": 15.684717208182912, "grad_norm": 705.9630737304688, "learning_rate": 0.0001888792339711964, "loss": 7.4727, "step": 130340 }, { "epoch": 15.685920577617328, "grad_norm": 1393.160400390625, "learning_rate": 0.00018887749037147574, "loss": 7.3867, "step": 130350 }, { "epoch": 15.687123947051745, "grad_norm": 931.7964477539062, "learning_rate": 0.00018887574664312745, "loss": 7.4873, "step": 130360 }, { "epoch": 15.688327316486161, "grad_norm": 2567.56201171875, "learning_rate": 0.00018887400278615402, "loss": 7.4288, "step": 130370 }, { "epoch": 15.689530685920577, "grad_norm": 1022.0443725585938, "learning_rate": 0.00018887225880055792, "loss": 7.3856, "step": 130380 }, { "epoch": 15.690734055354994, "grad_norm": 602.54736328125, "learning_rate": 0.00018887051468634175, "loss": 7.3619, "step": 130390 }, { "epoch": 15.69193742478941, "grad_norm": 816.98828125, "learning_rate": 0.000188868770443508, "loss": 7.392, "step": 130400 }, { "epoch": 15.693140794223826, "grad_norm": 587.8736572265625, "learning_rate": 0.00018886702607205918, "loss": 7.3641, "step": 130410 }, { "epoch": 15.694344163658243, "grad_norm": 735.8976440429688, "learning_rate": 0.00018886528157199784, "loss": 7.4425, "step": 130420 }, { "epoch": 15.69554753309266, "grad_norm": 2796.738525390625, "learning_rate": 0.00018886353694332652, "loss": 7.3884, "step": 130430 }, { "epoch": 15.696750902527075, "grad_norm": 1038.78125, "learning_rate": 0.00018886179218604768, "loss": 7.4072, "step": 130440 }, { "epoch": 15.697954271961493, "grad_norm": 1287.992919921875, "learning_rate": 0.00018886004730016393, "loss": 7.413, "step": 130450 }, { "epoch": 15.699157641395908, "grad_norm": 485.7673034667969, "learning_rate": 0.00018885830228567774, "loss": 7.4314, "step": 130460 }, { "epoch": 15.700361010830324, "grad_norm": 653.8545532226562, "learning_rate": 0.00018885655714259163, "loss": 7.4078, "step": 130470 }, { "epoch": 15.701564380264742, "grad_norm": 942.43017578125, "learning_rate": 0.00018885481187090817, "loss": 7.5279, "step": 130480 }, { "epoch": 15.702767749699158, "grad_norm": 1374.56298828125, "learning_rate": 0.00018885306647062984, "loss": 7.4221, "step": 130490 }, { "epoch": 15.703971119133573, "grad_norm": 1813.4957275390625, "learning_rate": 0.0001888513209417592, "loss": 7.5479, "step": 130500 }, { "epoch": 15.705174488567991, "grad_norm": 1242.5025634765625, "learning_rate": 0.00018884957528429873, "loss": 7.4474, "step": 130510 }, { "epoch": 15.706377858002407, "grad_norm": 841.1098022460938, "learning_rate": 0.00018884782949825103, "loss": 7.5788, "step": 130520 }, { "epoch": 15.707581227436823, "grad_norm": 750.1177978515625, "learning_rate": 0.00018884608358361857, "loss": 7.539, "step": 130530 }, { "epoch": 15.70878459687124, "grad_norm": 818.6088256835938, "learning_rate": 0.00018884433754040388, "loss": 7.4332, "step": 130540 }, { "epoch": 15.709987966305656, "grad_norm": 978.2344360351562, "learning_rate": 0.0001888425913686095, "loss": 7.3983, "step": 130550 }, { "epoch": 15.711191335740072, "grad_norm": 875.69482421875, "learning_rate": 0.000188840845068238, "loss": 7.4338, "step": 130560 }, { "epoch": 15.71239470517449, "grad_norm": 1089.465576171875, "learning_rate": 0.0001888390986392918, "loss": 7.3953, "step": 130570 }, { "epoch": 15.713598074608905, "grad_norm": 782.388916015625, "learning_rate": 0.00018883735208177353, "loss": 7.345, "step": 130580 }, { "epoch": 15.71480144404332, "grad_norm": 1078.1539306640625, "learning_rate": 0.00018883560539568567, "loss": 7.3685, "step": 130590 }, { "epoch": 15.716004813477738, "grad_norm": 1036.6904296875, "learning_rate": 0.00018883385858103075, "loss": 7.4876, "step": 130600 }, { "epoch": 15.717208182912154, "grad_norm": 854.8037109375, "learning_rate": 0.00018883211163781132, "loss": 7.2753, "step": 130610 }, { "epoch": 15.71841155234657, "grad_norm": 704.491943359375, "learning_rate": 0.00018883036456602986, "loss": 7.3449, "step": 130620 }, { "epoch": 15.719614921780988, "grad_norm": 2854.128173828125, "learning_rate": 0.00018882861736568897, "loss": 7.3086, "step": 130630 }, { "epoch": 15.720818291215403, "grad_norm": 1014.7636108398438, "learning_rate": 0.0001888268700367911, "loss": 7.3472, "step": 130640 }, { "epoch": 15.722021660649819, "grad_norm": 1005.1486206054688, "learning_rate": 0.00018882512257933885, "loss": 7.4366, "step": 130650 }, { "epoch": 15.723225030084237, "grad_norm": 940.9915771484375, "learning_rate": 0.00018882337499333472, "loss": 7.4396, "step": 130660 }, { "epoch": 15.724428399518652, "grad_norm": 690.8624267578125, "learning_rate": 0.00018882162727878118, "loss": 7.4627, "step": 130670 }, { "epoch": 15.725631768953068, "grad_norm": 647.682373046875, "learning_rate": 0.0001888198794356809, "loss": 7.2562, "step": 130680 }, { "epoch": 15.726835138387486, "grad_norm": 552.2103271484375, "learning_rate": 0.00018881813146403627, "loss": 7.3659, "step": 130690 }, { "epoch": 15.728038507821902, "grad_norm": 1565.328125, "learning_rate": 0.00018881638336384988, "loss": 7.4248, "step": 130700 }, { "epoch": 15.729241877256317, "grad_norm": 410.8542175292969, "learning_rate": 0.00018881463513512427, "loss": 7.3311, "step": 130710 }, { "epoch": 15.730445246690735, "grad_norm": 1234.8740234375, "learning_rate": 0.00018881288677786195, "loss": 7.3848, "step": 130720 }, { "epoch": 15.73164861612515, "grad_norm": 660.88720703125, "learning_rate": 0.00018881113829206548, "loss": 7.3114, "step": 130730 }, { "epoch": 15.732851985559567, "grad_norm": 337.2177734375, "learning_rate": 0.0001888093896777373, "loss": 7.4448, "step": 130740 }, { "epoch": 15.734055354993982, "grad_norm": 876.92431640625, "learning_rate": 0.00018880764093488005, "loss": 7.4242, "step": 130750 }, { "epoch": 15.7352587244284, "grad_norm": 1161.06689453125, "learning_rate": 0.00018880589206349624, "loss": 7.3932, "step": 130760 }, { "epoch": 15.736462093862816, "grad_norm": 919.97998046875, "learning_rate": 0.00018880414306358834, "loss": 7.4249, "step": 130770 }, { "epoch": 15.737665463297231, "grad_norm": 701.520263671875, "learning_rate": 0.00018880239393515895, "loss": 7.4571, "step": 130780 }, { "epoch": 15.738868832731649, "grad_norm": 633.79443359375, "learning_rate": 0.00018880064467821054, "loss": 7.4091, "step": 130790 }, { "epoch": 15.740072202166065, "grad_norm": 925.3358764648438, "learning_rate": 0.0001887988952927457, "loss": 7.3466, "step": 130800 }, { "epoch": 15.74127557160048, "grad_norm": 2348.89013671875, "learning_rate": 0.00018879714577876692, "loss": 7.2597, "step": 130810 }, { "epoch": 15.742478941034898, "grad_norm": 1451.4468994140625, "learning_rate": 0.00018879539613627676, "loss": 7.2688, "step": 130820 }, { "epoch": 15.743682310469314, "grad_norm": 519.1000366210938, "learning_rate": 0.00018879364636527771, "loss": 7.2851, "step": 130830 }, { "epoch": 15.74488567990373, "grad_norm": 489.55072021484375, "learning_rate": 0.00018879189646577236, "loss": 7.2179, "step": 130840 }, { "epoch": 15.746089049338147, "grad_norm": 962.341796875, "learning_rate": 0.00018879014643776321, "loss": 7.3851, "step": 130850 }, { "epoch": 15.747292418772563, "grad_norm": 1485.290771484375, "learning_rate": 0.0001887883962812528, "loss": 7.1963, "step": 130860 }, { "epoch": 15.748495788206979, "grad_norm": 507.83209228515625, "learning_rate": 0.00018878664599624364, "loss": 7.325, "step": 130870 }, { "epoch": 15.749699157641396, "grad_norm": 1357.3214111328125, "learning_rate": 0.0001887848955827383, "loss": 7.3346, "step": 130880 }, { "epoch": 15.750902527075812, "grad_norm": 641.6832275390625, "learning_rate": 0.00018878314504073928, "loss": 7.4197, "step": 130890 }, { "epoch": 15.752105896510228, "grad_norm": 1423.2657470703125, "learning_rate": 0.00018878139437024914, "loss": 7.3629, "step": 130900 }, { "epoch": 15.753309265944646, "grad_norm": 818.1052856445312, "learning_rate": 0.00018877964357127042, "loss": 7.3241, "step": 130910 }, { "epoch": 15.754512635379061, "grad_norm": 841.8325805664062, "learning_rate": 0.0001887778926438056, "loss": 7.3025, "step": 130920 }, { "epoch": 15.755716004813477, "grad_norm": 1068.5340576171875, "learning_rate": 0.00018877614158785728, "loss": 7.3987, "step": 130930 }, { "epoch": 15.756919374247895, "grad_norm": 1043.1324462890625, "learning_rate": 0.00018877439040342793, "loss": 7.3959, "step": 130940 }, { "epoch": 15.75812274368231, "grad_norm": 1346.5728759765625, "learning_rate": 0.00018877263909052014, "loss": 7.3352, "step": 130950 }, { "epoch": 15.759326113116726, "grad_norm": 1050.9190673828125, "learning_rate": 0.00018877088764913643, "loss": 7.2268, "step": 130960 }, { "epoch": 15.760529482551144, "grad_norm": 1043.7625732421875, "learning_rate": 0.00018876913607927934, "loss": 7.4861, "step": 130970 }, { "epoch": 15.76173285198556, "grad_norm": 1170.1287841796875, "learning_rate": 0.0001887673843809514, "loss": 7.3023, "step": 130980 }, { "epoch": 15.762936221419976, "grad_norm": 1080.5343017578125, "learning_rate": 0.0001887656325541551, "loss": 7.3434, "step": 130990 }, { "epoch": 15.764139590854393, "grad_norm": 1283.1015625, "learning_rate": 0.00018876388059889303, "loss": 7.2833, "step": 131000 }, { "epoch": 15.765342960288809, "grad_norm": 1127.6925048828125, "learning_rate": 0.0001887621285151677, "loss": 7.4026, "step": 131010 }, { "epoch": 15.766546329723225, "grad_norm": 1644.163818359375, "learning_rate": 0.00018876037630298167, "loss": 7.4056, "step": 131020 }, { "epoch": 15.76774969915764, "grad_norm": 1265.3726806640625, "learning_rate": 0.00018875862396233747, "loss": 7.2092, "step": 131030 }, { "epoch": 15.768953068592058, "grad_norm": 746.2817993164062, "learning_rate": 0.0001887568714932376, "loss": 7.3911, "step": 131040 }, { "epoch": 15.770156438026474, "grad_norm": 1599.3319091796875, "learning_rate": 0.00018875511889568464, "loss": 7.2268, "step": 131050 }, { "epoch": 15.77135980746089, "grad_norm": 1164.397216796875, "learning_rate": 0.0001887533661696811, "loss": 7.3815, "step": 131060 }, { "epoch": 15.772563176895307, "grad_norm": 900.9363403320312, "learning_rate": 0.00018875161331522957, "loss": 7.3425, "step": 131070 }, { "epoch": 15.773766546329723, "grad_norm": 1702.6241455078125, "learning_rate": 0.0001887498603323325, "loss": 7.3555, "step": 131080 }, { "epoch": 15.774969915764139, "grad_norm": 949.272705078125, "learning_rate": 0.00018874810722099248, "loss": 7.357, "step": 131090 }, { "epoch": 15.776173285198556, "grad_norm": 864.026123046875, "learning_rate": 0.00018874635398121204, "loss": 7.2819, "step": 131100 }, { "epoch": 15.777376654632972, "grad_norm": 1625.66162109375, "learning_rate": 0.00018874460061299371, "loss": 7.355, "step": 131110 }, { "epoch": 15.778580024067388, "grad_norm": 792.3026733398438, "learning_rate": 0.00018874284711634004, "loss": 7.3485, "step": 131120 }, { "epoch": 15.779783393501805, "grad_norm": 2529.9794921875, "learning_rate": 0.00018874109349125356, "loss": 7.2638, "step": 131130 }, { "epoch": 15.780986762936221, "grad_norm": 732.0496826171875, "learning_rate": 0.00018873933973773677, "loss": 7.3166, "step": 131140 }, { "epoch": 15.782190132370637, "grad_norm": 2237.266357421875, "learning_rate": 0.0001887375858557923, "loss": 7.3734, "step": 131150 }, { "epoch": 15.783393501805055, "grad_norm": 905.0309448242188, "learning_rate": 0.0001887358318454226, "loss": 7.4116, "step": 131160 }, { "epoch": 15.78459687123947, "grad_norm": 2812.263916015625, "learning_rate": 0.00018873407770663027, "loss": 7.3888, "step": 131170 }, { "epoch": 15.785800240673886, "grad_norm": 1823.0867919921875, "learning_rate": 0.00018873232343941782, "loss": 7.3182, "step": 131180 }, { "epoch": 15.787003610108304, "grad_norm": 1092.5263671875, "learning_rate": 0.00018873056904378778, "loss": 7.3221, "step": 131190 }, { "epoch": 15.78820697954272, "grad_norm": 1448.7960205078125, "learning_rate": 0.00018872881451974268, "loss": 7.3607, "step": 131200 }, { "epoch": 15.789410348977135, "grad_norm": 1206.72265625, "learning_rate": 0.00018872705986728512, "loss": 7.3801, "step": 131210 }, { "epoch": 15.790613718411553, "grad_norm": 1181.626708984375, "learning_rate": 0.00018872530508641758, "loss": 7.3637, "step": 131220 }, { "epoch": 15.791817087845969, "grad_norm": 522.133056640625, "learning_rate": 0.0001887235501771426, "loss": 7.3381, "step": 131230 }, { "epoch": 15.793020457280385, "grad_norm": 629.7036743164062, "learning_rate": 0.00018872179513946277, "loss": 7.2743, "step": 131240 }, { "epoch": 15.794223826714802, "grad_norm": 908.8665771484375, "learning_rate": 0.00018872003997338056, "loss": 7.3811, "step": 131250 }, { "epoch": 15.795427196149218, "grad_norm": 1087.2618408203125, "learning_rate": 0.00018871828467889858, "loss": 7.3002, "step": 131260 }, { "epoch": 15.796630565583634, "grad_norm": 652.273193359375, "learning_rate": 0.00018871652925601934, "loss": 7.2868, "step": 131270 }, { "epoch": 15.797833935018051, "grad_norm": 1055.8106689453125, "learning_rate": 0.00018871477370474535, "loss": 7.409, "step": 131280 }, { "epoch": 15.799037304452467, "grad_norm": 789.350830078125, "learning_rate": 0.00018871301802507923, "loss": 7.2364, "step": 131290 }, { "epoch": 15.800240673886883, "grad_norm": 1013.0385131835938, "learning_rate": 0.00018871126221702342, "loss": 7.346, "step": 131300 }, { "epoch": 15.8014440433213, "grad_norm": 527.7053833007812, "learning_rate": 0.00018870950628058051, "loss": 7.3471, "step": 131310 }, { "epoch": 15.802647412755716, "grad_norm": 586.3790893554688, "learning_rate": 0.00018870775021575305, "loss": 7.4011, "step": 131320 }, { "epoch": 15.803850782190132, "grad_norm": 776.3505859375, "learning_rate": 0.00018870599402254361, "loss": 7.3638, "step": 131330 }, { "epoch": 15.80505415162455, "grad_norm": 705.1693115234375, "learning_rate": 0.00018870423770095466, "loss": 7.2738, "step": 131340 }, { "epoch": 15.806257521058965, "grad_norm": 775.1263427734375, "learning_rate": 0.0001887024812509888, "loss": 7.4197, "step": 131350 }, { "epoch": 15.807460890493381, "grad_norm": 693.7694702148438, "learning_rate": 0.00018870072467264853, "loss": 7.288, "step": 131360 }, { "epoch": 15.808664259927799, "grad_norm": 1427.586181640625, "learning_rate": 0.00018869896796593644, "loss": 7.2597, "step": 131370 }, { "epoch": 15.809867629362214, "grad_norm": 505.89019775390625, "learning_rate": 0.000188697211130855, "loss": 7.2885, "step": 131380 }, { "epoch": 15.81107099879663, "grad_norm": 730.5121459960938, "learning_rate": 0.00018869545416740682, "loss": 7.2721, "step": 131390 }, { "epoch": 15.812274368231048, "grad_norm": 2388.80615234375, "learning_rate": 0.0001886936970755944, "loss": 7.2726, "step": 131400 }, { "epoch": 15.813477737665464, "grad_norm": 749.6900024414062, "learning_rate": 0.00018869193985542032, "loss": 7.3316, "step": 131410 }, { "epoch": 15.81468110709988, "grad_norm": 702.511962890625, "learning_rate": 0.00018869018250688714, "loss": 7.31, "step": 131420 }, { "epoch": 15.815884476534297, "grad_norm": 1237.361572265625, "learning_rate": 0.00018868842502999734, "loss": 7.3249, "step": 131430 }, { "epoch": 15.817087845968713, "grad_norm": 646.3370361328125, "learning_rate": 0.00018868666742475346, "loss": 7.4005, "step": 131440 }, { "epoch": 15.818291215403129, "grad_norm": 962.4224853515625, "learning_rate": 0.00018868490969115812, "loss": 7.2904, "step": 131450 }, { "epoch": 15.819494584837544, "grad_norm": 473.9652099609375, "learning_rate": 0.00018868315182921382, "loss": 7.2729, "step": 131460 }, { "epoch": 15.820697954271962, "grad_norm": 574.8343505859375, "learning_rate": 0.0001886813938389231, "loss": 7.2934, "step": 131470 }, { "epoch": 15.821901323706378, "grad_norm": 1261.2105712890625, "learning_rate": 0.00018867963572028848, "loss": 7.3167, "step": 131480 }, { "epoch": 15.823104693140793, "grad_norm": 284.2709655761719, "learning_rate": 0.00018867787747331257, "loss": 7.3167, "step": 131490 }, { "epoch": 15.824308062575211, "grad_norm": 543.37841796875, "learning_rate": 0.00018867611909799786, "loss": 7.3328, "step": 131500 }, { "epoch": 15.825511432009627, "grad_norm": 600.607666015625, "learning_rate": 0.0001886743605943469, "loss": 7.2612, "step": 131510 }, { "epoch": 15.826714801444043, "grad_norm": 246.95547485351562, "learning_rate": 0.00018867260196236228, "loss": 7.3524, "step": 131520 }, { "epoch": 15.82791817087846, "grad_norm": 356.9399719238281, "learning_rate": 0.00018867084320204646, "loss": 7.296, "step": 131530 }, { "epoch": 15.829121540312876, "grad_norm": 476.8534240722656, "learning_rate": 0.00018866908431340208, "loss": 7.3902, "step": 131540 }, { "epoch": 15.830324909747292, "grad_norm": 718.119140625, "learning_rate": 0.00018866732529643164, "loss": 7.3336, "step": 131550 }, { "epoch": 15.83152827918171, "grad_norm": 462.7386474609375, "learning_rate": 0.00018866556615113766, "loss": 7.3429, "step": 131560 }, { "epoch": 15.832731648616125, "grad_norm": 726.5955200195312, "learning_rate": 0.00018866380687752274, "loss": 7.2506, "step": 131570 }, { "epoch": 15.833935018050541, "grad_norm": 323.6458435058594, "learning_rate": 0.0001886620474755894, "loss": 7.2381, "step": 131580 }, { "epoch": 15.835138387484958, "grad_norm": 387.4329833984375, "learning_rate": 0.00018866028794534018, "loss": 7.3101, "step": 131590 }, { "epoch": 15.836341756919374, "grad_norm": 296.24725341796875, "learning_rate": 0.00018865852828677765, "loss": 7.2715, "step": 131600 }, { "epoch": 15.83754512635379, "grad_norm": 454.244140625, "learning_rate": 0.00018865676849990433, "loss": 7.268, "step": 131610 }, { "epoch": 15.838748495788208, "grad_norm": 597.741943359375, "learning_rate": 0.00018865500858472278, "loss": 7.3178, "step": 131620 }, { "epoch": 15.839951865222623, "grad_norm": 1367.4615478515625, "learning_rate": 0.00018865324854123552, "loss": 7.3464, "step": 131630 }, { "epoch": 15.84115523465704, "grad_norm": 437.6692199707031, "learning_rate": 0.00018865148836944514, "loss": 7.367, "step": 131640 }, { "epoch": 15.842358604091457, "grad_norm": 985.8402099609375, "learning_rate": 0.00018864972806935417, "loss": 7.3601, "step": 131650 }, { "epoch": 15.843561973525873, "grad_norm": 2324.87744140625, "learning_rate": 0.00018864796764096517, "loss": 7.4486, "step": 131660 }, { "epoch": 15.844765342960288, "grad_norm": 389.7665100097656, "learning_rate": 0.00018864620708428068, "loss": 7.3407, "step": 131670 }, { "epoch": 15.845968712394706, "grad_norm": 615.8221435546875, "learning_rate": 0.0001886444463993032, "loss": 7.3344, "step": 131680 }, { "epoch": 15.847172081829122, "grad_norm": 5037.34521484375, "learning_rate": 0.00018864268558603536, "loss": 7.3313, "step": 131690 }, { "epoch": 15.848375451263538, "grad_norm": 1342.1685791015625, "learning_rate": 0.00018864092464447967, "loss": 7.3743, "step": 131700 }, { "epoch": 15.849578820697955, "grad_norm": 1030.5870361328125, "learning_rate": 0.00018863916357463864, "loss": 7.4617, "step": 131710 }, { "epoch": 15.85078219013237, "grad_norm": 529.4223022460938, "learning_rate": 0.00018863740237651488, "loss": 7.3907, "step": 131720 }, { "epoch": 15.851985559566787, "grad_norm": 1138.220947265625, "learning_rate": 0.0001886356410501109, "loss": 7.494, "step": 131730 }, { "epoch": 15.853188929001202, "grad_norm": 851.51904296875, "learning_rate": 0.0001886338795954293, "loss": 7.4549, "step": 131740 }, { "epoch": 15.85439229843562, "grad_norm": 2602.716796875, "learning_rate": 0.00018863211801247256, "loss": 7.4268, "step": 131750 }, { "epoch": 15.855595667870036, "grad_norm": 1488.7198486328125, "learning_rate": 0.00018863035630124327, "loss": 7.432, "step": 131760 }, { "epoch": 15.856799037304452, "grad_norm": 1121.366943359375, "learning_rate": 0.00018862859446174397, "loss": 7.4475, "step": 131770 }, { "epoch": 15.85800240673887, "grad_norm": 3571.143798828125, "learning_rate": 0.00018862683249397722, "loss": 7.5597, "step": 131780 }, { "epoch": 15.859205776173285, "grad_norm": 676.1463012695312, "learning_rate": 0.00018862507039794557, "loss": 7.4271, "step": 131790 }, { "epoch": 15.8604091456077, "grad_norm": 1601.666748046875, "learning_rate": 0.00018862330817365151, "loss": 7.4623, "step": 131800 }, { "epoch": 15.861612515042118, "grad_norm": 1726.900634765625, "learning_rate": 0.00018862154582109772, "loss": 7.4037, "step": 131810 }, { "epoch": 15.862815884476534, "grad_norm": 1261.962890625, "learning_rate": 0.00018861978334028663, "loss": 7.4488, "step": 131820 }, { "epoch": 15.86401925391095, "grad_norm": 1210.049072265625, "learning_rate": 0.00018861802073122085, "loss": 7.4747, "step": 131830 }, { "epoch": 15.865222623345367, "grad_norm": 1083.919189453125, "learning_rate": 0.0001886162579939029, "loss": 7.4403, "step": 131840 }, { "epoch": 15.866425992779783, "grad_norm": 1613.10986328125, "learning_rate": 0.00018861449512833534, "loss": 7.5355, "step": 131850 }, { "epoch": 15.867629362214199, "grad_norm": 987.13037109375, "learning_rate": 0.00018861273213452076, "loss": 7.4347, "step": 131860 }, { "epoch": 15.868832731648617, "grad_norm": 601.0751342773438, "learning_rate": 0.00018861096901246166, "loss": 7.5164, "step": 131870 }, { "epoch": 15.870036101083032, "grad_norm": 685.5590209960938, "learning_rate": 0.0001886092057621606, "loss": 7.4458, "step": 131880 }, { "epoch": 15.871239470517448, "grad_norm": 419.60888671875, "learning_rate": 0.00018860744238362014, "loss": 7.3614, "step": 131890 }, { "epoch": 15.872442839951866, "grad_norm": 2338.96533203125, "learning_rate": 0.00018860567887684284, "loss": 7.3878, "step": 131900 }, { "epoch": 15.873646209386282, "grad_norm": 510.9063415527344, "learning_rate": 0.00018860391524183124, "loss": 7.3111, "step": 131910 }, { "epoch": 15.874849578820697, "grad_norm": 800.651611328125, "learning_rate": 0.00018860215147858793, "loss": 7.4052, "step": 131920 }, { "epoch": 15.876052948255115, "grad_norm": 814.5459594726562, "learning_rate": 0.0001886003875871154, "loss": 7.3182, "step": 131930 }, { "epoch": 15.87725631768953, "grad_norm": 552.2872314453125, "learning_rate": 0.00018859862356741623, "loss": 7.416, "step": 131940 }, { "epoch": 15.878459687123947, "grad_norm": 1093.0543212890625, "learning_rate": 0.00018859685941949303, "loss": 7.3964, "step": 131950 }, { "epoch": 15.879663056558364, "grad_norm": 928.7797241210938, "learning_rate": 0.00018859509514334826, "loss": 7.3422, "step": 131960 }, { "epoch": 15.88086642599278, "grad_norm": 1768.70849609375, "learning_rate": 0.00018859333073898451, "loss": 7.342, "step": 131970 }, { "epoch": 15.882069795427196, "grad_norm": 736.9295043945312, "learning_rate": 0.00018859156620640435, "loss": 7.3474, "step": 131980 }, { "epoch": 15.883273164861613, "grad_norm": 869.2301025390625, "learning_rate": 0.00018858980154561031, "loss": 7.4479, "step": 131990 }, { "epoch": 15.884476534296029, "grad_norm": 2636.7568359375, "learning_rate": 0.00018858803675660498, "loss": 7.3574, "step": 132000 }, { "epoch": 15.885679903730445, "grad_norm": 2663.193359375, "learning_rate": 0.00018858627183939087, "loss": 7.4161, "step": 132010 }, { "epoch": 15.886883273164862, "grad_norm": 742.95166015625, "learning_rate": 0.00018858450679397053, "loss": 7.3578, "step": 132020 }, { "epoch": 15.888086642599278, "grad_norm": 1010.35009765625, "learning_rate": 0.00018858274162034656, "loss": 7.3829, "step": 132030 }, { "epoch": 15.889290012033694, "grad_norm": 1248.4332275390625, "learning_rate": 0.0001885809763185215, "loss": 7.3209, "step": 132040 }, { "epoch": 15.890493381468112, "grad_norm": 1663.683837890625, "learning_rate": 0.0001885792108884979, "loss": 7.3999, "step": 132050 }, { "epoch": 15.891696750902527, "grad_norm": 973.1458740234375, "learning_rate": 0.0001885774453302783, "loss": 7.4251, "step": 132060 }, { "epoch": 15.892900120336943, "grad_norm": 896.4718627929688, "learning_rate": 0.00018857567964386526, "loss": 7.4159, "step": 132070 }, { "epoch": 15.89410348977136, "grad_norm": 1221.7650146484375, "learning_rate": 0.00018857391382926138, "loss": 7.2595, "step": 132080 }, { "epoch": 15.895306859205776, "grad_norm": 449.065185546875, "learning_rate": 0.00018857214788646915, "loss": 7.2464, "step": 132090 }, { "epoch": 15.896510228640192, "grad_norm": 703.9978637695312, "learning_rate": 0.00018857038181549116, "loss": 7.3952, "step": 132100 }, { "epoch": 15.89771359807461, "grad_norm": 903.7862548828125, "learning_rate": 0.00018856861561632995, "loss": 7.3301, "step": 132110 }, { "epoch": 15.898916967509026, "grad_norm": 1145.3795166015625, "learning_rate": 0.00018856684928898808, "loss": 7.2957, "step": 132120 }, { "epoch": 15.900120336943441, "grad_norm": 827.79736328125, "learning_rate": 0.00018856508283346813, "loss": 7.3222, "step": 132130 }, { "epoch": 15.901323706377857, "grad_norm": 1203.126220703125, "learning_rate": 0.00018856331624977264, "loss": 7.3951, "step": 132140 }, { "epoch": 15.902527075812275, "grad_norm": 1375.4981689453125, "learning_rate": 0.00018856154953790418, "loss": 7.4578, "step": 132150 }, { "epoch": 15.90373044524669, "grad_norm": 860.630126953125, "learning_rate": 0.00018855978269786527, "loss": 7.1981, "step": 132160 }, { "epoch": 15.904933814681106, "grad_norm": 597.31298828125, "learning_rate": 0.0001885580157296585, "loss": 7.2839, "step": 132170 }, { "epoch": 15.906137184115524, "grad_norm": 977.681396484375, "learning_rate": 0.0001885562486332864, "loss": 7.3767, "step": 132180 }, { "epoch": 15.90734055354994, "grad_norm": 1293.9266357421875, "learning_rate": 0.00018855448140875155, "loss": 7.2727, "step": 132190 }, { "epoch": 15.908543922984355, "grad_norm": 1996.4329833984375, "learning_rate": 0.00018855271405605653, "loss": 7.3439, "step": 132200 }, { "epoch": 15.909747292418773, "grad_norm": 1326.6815185546875, "learning_rate": 0.00018855094657520383, "loss": 7.3878, "step": 132210 }, { "epoch": 15.910950661853189, "grad_norm": 683.3563842773438, "learning_rate": 0.00018854917896619607, "loss": 7.4467, "step": 132220 }, { "epoch": 15.912154031287605, "grad_norm": 926.5361328125, "learning_rate": 0.00018854741122903578, "loss": 7.43, "step": 132230 }, { "epoch": 15.913357400722022, "grad_norm": 657.5689086914062, "learning_rate": 0.00018854564336372553, "loss": 7.2433, "step": 132240 }, { "epoch": 15.914560770156438, "grad_norm": 1846.4720458984375, "learning_rate": 0.00018854387537026786, "loss": 7.3929, "step": 132250 }, { "epoch": 15.915764139590854, "grad_norm": 900.9047241210938, "learning_rate": 0.00018854210724866536, "loss": 7.4495, "step": 132260 }, { "epoch": 15.916967509025271, "grad_norm": 1205.8076171875, "learning_rate": 0.00018854033899892055, "loss": 7.4043, "step": 132270 }, { "epoch": 15.918170878459687, "grad_norm": 572.4059448242188, "learning_rate": 0.00018853857062103605, "loss": 7.3304, "step": 132280 }, { "epoch": 15.919374247894103, "grad_norm": 760.0726318359375, "learning_rate": 0.00018853680211501433, "loss": 7.4405, "step": 132290 }, { "epoch": 15.92057761732852, "grad_norm": 1020.7083740234375, "learning_rate": 0.00018853503348085802, "loss": 7.3819, "step": 132300 }, { "epoch": 15.921780986762936, "grad_norm": 627.8305053710938, "learning_rate": 0.00018853326471856963, "loss": 7.3409, "step": 132310 }, { "epoch": 15.922984356197352, "grad_norm": 643.7003173828125, "learning_rate": 0.00018853149582815179, "loss": 7.3013, "step": 132320 }, { "epoch": 15.92418772563177, "grad_norm": 790.8099365234375, "learning_rate": 0.00018852972680960702, "loss": 7.3324, "step": 132330 }, { "epoch": 15.925391095066185, "grad_norm": 565.606201171875, "learning_rate": 0.00018852795766293785, "loss": 7.31, "step": 132340 }, { "epoch": 15.926594464500601, "grad_norm": 553.124267578125, "learning_rate": 0.00018852618838814688, "loss": 7.3832, "step": 132350 }, { "epoch": 15.927797833935019, "grad_norm": 779.17041015625, "learning_rate": 0.00018852441898523664, "loss": 7.3317, "step": 132360 }, { "epoch": 15.929001203369435, "grad_norm": 894.0552368164062, "learning_rate": 0.00018852264945420975, "loss": 7.2975, "step": 132370 }, { "epoch": 15.93020457280385, "grad_norm": 375.84625244140625, "learning_rate": 0.00018852087979506868, "loss": 7.2296, "step": 132380 }, { "epoch": 15.931407942238268, "grad_norm": 917.9942016601562, "learning_rate": 0.0001885191100078161, "loss": 7.2822, "step": 132390 }, { "epoch": 15.932611311672684, "grad_norm": 2386.097412109375, "learning_rate": 0.00018851734009245447, "loss": 7.2722, "step": 132400 }, { "epoch": 15.9338146811071, "grad_norm": 698.42333984375, "learning_rate": 0.00018851557004898642, "loss": 7.3083, "step": 132410 }, { "epoch": 15.935018050541515, "grad_norm": 1420.1732177734375, "learning_rate": 0.00018851379987741446, "loss": 7.3846, "step": 132420 }, { "epoch": 15.936221419975933, "grad_norm": 1176.1473388671875, "learning_rate": 0.00018851202957774116, "loss": 7.2335, "step": 132430 }, { "epoch": 15.937424789410349, "grad_norm": 1198.45166015625, "learning_rate": 0.00018851025914996914, "loss": 7.3123, "step": 132440 }, { "epoch": 15.938628158844764, "grad_norm": 788.9474487304688, "learning_rate": 0.00018850848859410093, "loss": 7.2493, "step": 132450 }, { "epoch": 15.939831528279182, "grad_norm": 759.3018188476562, "learning_rate": 0.00018850671791013907, "loss": 7.3103, "step": 132460 }, { "epoch": 15.941034897713598, "grad_norm": 1486.5074462890625, "learning_rate": 0.00018850494709808614, "loss": 7.2904, "step": 132470 }, { "epoch": 15.942238267148014, "grad_norm": 1557.888916015625, "learning_rate": 0.00018850317615794467, "loss": 7.2453, "step": 132480 }, { "epoch": 15.943441636582431, "grad_norm": 383.0955810546875, "learning_rate": 0.0001885014050897173, "loss": 7.3304, "step": 132490 }, { "epoch": 15.944645006016847, "grad_norm": 1168.932373046875, "learning_rate": 0.00018849963389340653, "loss": 7.4245, "step": 132500 }, { "epoch": 15.945848375451263, "grad_norm": 783.9414672851562, "learning_rate": 0.00018849786256901495, "loss": 7.324, "step": 132510 }, { "epoch": 15.94705174488568, "grad_norm": 1027.5152587890625, "learning_rate": 0.00018849609111654507, "loss": 7.3045, "step": 132520 }, { "epoch": 15.948255114320096, "grad_norm": 1178.9649658203125, "learning_rate": 0.00018849431953599954, "loss": 7.4402, "step": 132530 }, { "epoch": 15.949458483754512, "grad_norm": 1348.3472900390625, "learning_rate": 0.00018849254782738085, "loss": 7.3948, "step": 132540 }, { "epoch": 15.95066185318893, "grad_norm": 3098.52587890625, "learning_rate": 0.00018849077599069163, "loss": 7.2963, "step": 132550 }, { "epoch": 15.951865222623345, "grad_norm": 605.2205810546875, "learning_rate": 0.0001884890040259344, "loss": 7.211, "step": 132560 }, { "epoch": 15.953068592057761, "grad_norm": 1400.8870849609375, "learning_rate": 0.00018848723193311173, "loss": 7.233, "step": 132570 }, { "epoch": 15.954271961492179, "grad_norm": 2605.5439453125, "learning_rate": 0.00018848545971222617, "loss": 7.3758, "step": 132580 }, { "epoch": 15.955475330926594, "grad_norm": 981.1021728515625, "learning_rate": 0.00018848368736328034, "loss": 7.372, "step": 132590 }, { "epoch": 15.95667870036101, "grad_norm": 1613.8927001953125, "learning_rate": 0.00018848191488627674, "loss": 7.3922, "step": 132600 }, { "epoch": 15.957882069795428, "grad_norm": 537.8240356445312, "learning_rate": 0.00018848014228121796, "loss": 7.3488, "step": 132610 }, { "epoch": 15.959085439229844, "grad_norm": 1165.123046875, "learning_rate": 0.0001884783695481066, "loss": 7.2414, "step": 132620 }, { "epoch": 15.96028880866426, "grad_norm": 914.7545166015625, "learning_rate": 0.00018847659668694516, "loss": 7.2546, "step": 132630 }, { "epoch": 15.961492178098677, "grad_norm": 686.739501953125, "learning_rate": 0.00018847482369773626, "loss": 7.3288, "step": 132640 }, { "epoch": 15.962695547533093, "grad_norm": 449.0668640136719, "learning_rate": 0.00018847305058048244, "loss": 7.4283, "step": 132650 }, { "epoch": 15.963898916967509, "grad_norm": 449.6669921875, "learning_rate": 0.00018847127733518626, "loss": 7.2542, "step": 132660 }, { "epoch": 15.965102286401926, "grad_norm": 1582.1160888671875, "learning_rate": 0.00018846950396185033, "loss": 7.3047, "step": 132670 }, { "epoch": 15.966305655836342, "grad_norm": 729.3178100585938, "learning_rate": 0.00018846773046047718, "loss": 7.4101, "step": 132680 }, { "epoch": 15.967509025270758, "grad_norm": 610.6448974609375, "learning_rate": 0.00018846595683106937, "loss": 7.3058, "step": 132690 }, { "epoch": 15.968712394705175, "grad_norm": 852.5591430664062, "learning_rate": 0.00018846418307362947, "loss": 7.2494, "step": 132700 }, { "epoch": 15.969915764139591, "grad_norm": 894.6786499023438, "learning_rate": 0.00018846240918816008, "loss": 7.3286, "step": 132710 }, { "epoch": 15.971119133574007, "grad_norm": 933.6132202148438, "learning_rate": 0.0001884606351746637, "loss": 7.3031, "step": 132720 }, { "epoch": 15.972322503008424, "grad_norm": 1170.6556396484375, "learning_rate": 0.000188458861033143, "loss": 7.3357, "step": 132730 }, { "epoch": 15.97352587244284, "grad_norm": 695.614013671875, "learning_rate": 0.00018845708676360043, "loss": 7.353, "step": 132740 }, { "epoch": 15.974729241877256, "grad_norm": 1078.06298828125, "learning_rate": 0.00018845531236603863, "loss": 7.2228, "step": 132750 }, { "epoch": 15.975932611311674, "grad_norm": 1467.5294189453125, "learning_rate": 0.00018845353784046018, "loss": 7.2914, "step": 132760 }, { "epoch": 15.97713598074609, "grad_norm": 650.1083984375, "learning_rate": 0.00018845176318686761, "loss": 7.339, "step": 132770 }, { "epoch": 15.978339350180505, "grad_norm": 566.7533569335938, "learning_rate": 0.00018844998840526352, "loss": 7.2865, "step": 132780 }, { "epoch": 15.979542719614923, "grad_norm": 524.9175415039062, "learning_rate": 0.00018844821349565043, "loss": 7.2933, "step": 132790 }, { "epoch": 15.980746089049338, "grad_norm": 1949.8116455078125, "learning_rate": 0.00018844643845803094, "loss": 7.2193, "step": 132800 }, { "epoch": 15.981949458483754, "grad_norm": 491.0684509277344, "learning_rate": 0.00018844466329240764, "loss": 7.3184, "step": 132810 }, { "epoch": 15.98315282791817, "grad_norm": 1322.3477783203125, "learning_rate": 0.00018844288799878305, "loss": 7.2984, "step": 132820 }, { "epoch": 15.984356197352588, "grad_norm": 1918.1973876953125, "learning_rate": 0.00018844111257715977, "loss": 7.341, "step": 132830 }, { "epoch": 15.985559566787003, "grad_norm": 690.3482666015625, "learning_rate": 0.0001884393370275404, "loss": 7.3013, "step": 132840 }, { "epoch": 15.98676293622142, "grad_norm": 2622.135986328125, "learning_rate": 0.0001884375613499274, "loss": 7.2816, "step": 132850 }, { "epoch": 15.987966305655837, "grad_norm": 849.4652099609375, "learning_rate": 0.00018843578554432347, "loss": 7.3266, "step": 132860 }, { "epoch": 15.989169675090253, "grad_norm": 673.3524780273438, "learning_rate": 0.00018843400961073112, "loss": 7.2121, "step": 132870 }, { "epoch": 15.990373044524668, "grad_norm": 1238.9793701171875, "learning_rate": 0.00018843223354915293, "loss": 7.3043, "step": 132880 }, { "epoch": 15.991576413959086, "grad_norm": 964.6995239257812, "learning_rate": 0.00018843045735959143, "loss": 7.3414, "step": 132890 }, { "epoch": 15.992779783393502, "grad_norm": 1358.2208251953125, "learning_rate": 0.00018842868104204926, "loss": 7.2966, "step": 132900 }, { "epoch": 15.993983152827917, "grad_norm": 1238.5880126953125, "learning_rate": 0.00018842690459652895, "loss": 7.3556, "step": 132910 }, { "epoch": 15.995186522262335, "grad_norm": 753.2740478515625, "learning_rate": 0.00018842512802303308, "loss": 7.3793, "step": 132920 }, { "epoch": 15.99638989169675, "grad_norm": 3753.060546875, "learning_rate": 0.00018842335132156422, "loss": 7.3637, "step": 132930 }, { "epoch": 15.997593261131167, "grad_norm": 1965.354248046875, "learning_rate": 0.0001884215744921249, "loss": 7.4331, "step": 132940 }, { "epoch": 15.998796630565584, "grad_norm": 4114.978515625, "learning_rate": 0.00018841979753471776, "loss": 7.4812, "step": 132950 }, { "epoch": 16.0, "grad_norm": 2308.511962890625, "learning_rate": 0.00018841802044934535, "loss": 7.5405, "step": 132960 }, { "epoch": 16.0, "eval_loss": 7.461447238922119, "eval_runtime": 121.0916, "eval_samples_per_second": 61.003, "eval_steps_per_second": 7.631, "step": 132960 }, { "epoch": 16.001203369434418, "grad_norm": 2153.32568359375, "learning_rate": 0.00018841624323601022, "loss": 7.4752, "step": 132970 }, { "epoch": 16.00240673886883, "grad_norm": 889.717041015625, "learning_rate": 0.00018841446589471498, "loss": 7.4672, "step": 132980 }, { "epoch": 16.00361010830325, "grad_norm": 962.61572265625, "learning_rate": 0.00018841268842546217, "loss": 7.399, "step": 132990 }, { "epoch": 16.004813477737667, "grad_norm": 1718.97802734375, "learning_rate": 0.00018841091082825434, "loss": 7.3364, "step": 133000 }, { "epoch": 16.00601684717208, "grad_norm": 1896.02978515625, "learning_rate": 0.00018840913310309412, "loss": 7.3895, "step": 133010 }, { "epoch": 16.0072202166065, "grad_norm": 1826.3760986328125, "learning_rate": 0.00018840735524998405, "loss": 7.4432, "step": 133020 }, { "epoch": 16.008423586040916, "grad_norm": 1622.966552734375, "learning_rate": 0.00018840557726892675, "loss": 7.4268, "step": 133030 }, { "epoch": 16.00962695547533, "grad_norm": 3895.788330078125, "learning_rate": 0.0001884037991599247, "loss": 7.4765, "step": 133040 }, { "epoch": 16.010830324909747, "grad_norm": 4022.718994140625, "learning_rate": 0.00018840202092298055, "loss": 7.4086, "step": 133050 }, { "epoch": 16.012033694344165, "grad_norm": 2445.8984375, "learning_rate": 0.00018840024255809684, "loss": 7.4304, "step": 133060 }, { "epoch": 16.01323706377858, "grad_norm": 1398.728271484375, "learning_rate": 0.00018839846406527614, "loss": 7.3882, "step": 133070 }, { "epoch": 16.014440433212997, "grad_norm": 1955.9288330078125, "learning_rate": 0.00018839668544452107, "loss": 7.5115, "step": 133080 }, { "epoch": 16.015643802647414, "grad_norm": 1860.2630615234375, "learning_rate": 0.00018839490669583418, "loss": 7.383, "step": 133090 }, { "epoch": 16.016847172081828, "grad_norm": 4438.74755859375, "learning_rate": 0.00018839312781921802, "loss": 7.4305, "step": 133100 }, { "epoch": 16.018050541516246, "grad_norm": 2396.3515625, "learning_rate": 0.00018839134881467516, "loss": 7.5489, "step": 133110 }, { "epoch": 16.019253910950663, "grad_norm": 1607.8743896484375, "learning_rate": 0.0001883895696822082, "loss": 7.43, "step": 133120 }, { "epoch": 16.020457280385077, "grad_norm": 1343.34521484375, "learning_rate": 0.00018838779042181972, "loss": 7.4046, "step": 133130 }, { "epoch": 16.021660649819495, "grad_norm": 1568.716552734375, "learning_rate": 0.0001883860110335123, "loss": 7.4269, "step": 133140 }, { "epoch": 16.022864019253912, "grad_norm": 1527.429443359375, "learning_rate": 0.00018838423151728849, "loss": 7.4202, "step": 133150 }, { "epoch": 16.024067388688326, "grad_norm": 2880.157958984375, "learning_rate": 0.00018838245187315086, "loss": 7.5203, "step": 133160 }, { "epoch": 16.025270758122744, "grad_norm": 1841.1431884765625, "learning_rate": 0.00018838067210110202, "loss": 7.3747, "step": 133170 }, { "epoch": 16.02647412755716, "grad_norm": 2396.6181640625, "learning_rate": 0.0001883788922011445, "loss": 7.4847, "step": 133180 }, { "epoch": 16.027677496991576, "grad_norm": 1560.1546630859375, "learning_rate": 0.00018837711217328093, "loss": 7.5355, "step": 133190 }, { "epoch": 16.028880866425993, "grad_norm": 2516.204345703125, "learning_rate": 0.00018837533201751386, "loss": 7.4528, "step": 133200 }, { "epoch": 16.03008423586041, "grad_norm": 2364.503662109375, "learning_rate": 0.00018837355173384585, "loss": 7.382, "step": 133210 }, { "epoch": 16.031287605294825, "grad_norm": 2570.381103515625, "learning_rate": 0.0001883717713222795, "loss": 7.4052, "step": 133220 }, { "epoch": 16.032490974729242, "grad_norm": 1330.7001953125, "learning_rate": 0.00018836999078281736, "loss": 7.5175, "step": 133230 }, { "epoch": 16.03369434416366, "grad_norm": 1902.0858154296875, "learning_rate": 0.00018836821011546207, "loss": 7.381, "step": 133240 }, { "epoch": 16.034897713598074, "grad_norm": 3191.408447265625, "learning_rate": 0.00018836642932021613, "loss": 7.4846, "step": 133250 }, { "epoch": 16.03610108303249, "grad_norm": 1880.0172119140625, "learning_rate": 0.00018836464839708215, "loss": 7.329, "step": 133260 }, { "epoch": 16.03730445246691, "grad_norm": 2109.16162109375, "learning_rate": 0.00018836286734606273, "loss": 7.344, "step": 133270 }, { "epoch": 16.038507821901323, "grad_norm": 1009.3477172851562, "learning_rate": 0.00018836108616716038, "loss": 7.4741, "step": 133280 }, { "epoch": 16.03971119133574, "grad_norm": 1296.6412353515625, "learning_rate": 0.00018835930486037775, "loss": 7.4144, "step": 133290 }, { "epoch": 16.040914560770158, "grad_norm": 2100.59033203125, "learning_rate": 0.0001883575234257174, "loss": 7.4383, "step": 133300 }, { "epoch": 16.042117930204572, "grad_norm": 1190.213623046875, "learning_rate": 0.00018835574186318188, "loss": 7.4075, "step": 133310 }, { "epoch": 16.04332129963899, "grad_norm": 687.1910400390625, "learning_rate": 0.0001883539601727738, "loss": 7.4325, "step": 133320 }, { "epoch": 16.044524669073404, "grad_norm": 1521.695068359375, "learning_rate": 0.00018835217835449572, "loss": 7.3603, "step": 133330 }, { "epoch": 16.04572803850782, "grad_norm": 1121.0152587890625, "learning_rate": 0.00018835039640835023, "loss": 7.3836, "step": 133340 }, { "epoch": 16.04693140794224, "grad_norm": 558.4403686523438, "learning_rate": 0.0001883486143343399, "loss": 7.3389, "step": 133350 }, { "epoch": 16.048134777376653, "grad_norm": 1114.6622314453125, "learning_rate": 0.00018834683213246733, "loss": 7.3147, "step": 133360 }, { "epoch": 16.04933814681107, "grad_norm": 1681.1033935546875, "learning_rate": 0.00018834504980273504, "loss": 7.345, "step": 133370 }, { "epoch": 16.050541516245488, "grad_norm": 1570.2076416015625, "learning_rate": 0.0001883432673451457, "loss": 7.3738, "step": 133380 }, { "epoch": 16.051744885679902, "grad_norm": 2327.41357421875, "learning_rate": 0.00018834148475970183, "loss": 7.4091, "step": 133390 }, { "epoch": 16.05294825511432, "grad_norm": 3199.50390625, "learning_rate": 0.000188339702046406, "loss": 7.4601, "step": 133400 }, { "epoch": 16.054151624548737, "grad_norm": 1199.08984375, "learning_rate": 0.0001883379192052608, "loss": 7.5126, "step": 133410 }, { "epoch": 16.05535499398315, "grad_norm": 1048.759765625, "learning_rate": 0.00018833613623626884, "loss": 7.4348, "step": 133420 }, { "epoch": 16.05655836341757, "grad_norm": 1395.2281494140625, "learning_rate": 0.0001883343531394327, "loss": 7.3279, "step": 133430 }, { "epoch": 16.057761732851986, "grad_norm": 1758.9266357421875, "learning_rate": 0.0001883325699147549, "loss": 7.3411, "step": 133440 }, { "epoch": 16.0589651022864, "grad_norm": 2204.992431640625, "learning_rate": 0.0001883307865622381, "loss": 7.4047, "step": 133450 }, { "epoch": 16.060168471720818, "grad_norm": 1060.8265380859375, "learning_rate": 0.0001883290030818848, "loss": 7.3041, "step": 133460 }, { "epoch": 16.061371841155236, "grad_norm": 597.0863037109375, "learning_rate": 0.00018832721947369767, "loss": 7.3442, "step": 133470 }, { "epoch": 16.06257521058965, "grad_norm": 988.7749633789062, "learning_rate": 0.0001883254357376792, "loss": 7.3666, "step": 133480 }, { "epoch": 16.063778580024067, "grad_norm": 953.1688232421875, "learning_rate": 0.00018832365187383207, "loss": 7.309, "step": 133490 }, { "epoch": 16.064981949458485, "grad_norm": 483.6090087890625, "learning_rate": 0.0001883218678821588, "loss": 7.3684, "step": 133500 }, { "epoch": 16.0661853188929, "grad_norm": 692.1166381835938, "learning_rate": 0.00018832008376266197, "loss": 7.3841, "step": 133510 }, { "epoch": 16.067388688327316, "grad_norm": 654.5657958984375, "learning_rate": 0.00018831829951534418, "loss": 7.2664, "step": 133520 }, { "epoch": 16.068592057761734, "grad_norm": 796.6243896484375, "learning_rate": 0.00018831651514020797, "loss": 7.295, "step": 133530 }, { "epoch": 16.069795427196148, "grad_norm": 1008.3521118164062, "learning_rate": 0.000188314730637256, "loss": 7.3321, "step": 133540 }, { "epoch": 16.070998796630565, "grad_norm": 1003.3748168945312, "learning_rate": 0.00018831294600649078, "loss": 7.231, "step": 133550 }, { "epoch": 16.072202166064983, "grad_norm": 1938.42724609375, "learning_rate": 0.00018831116124791496, "loss": 7.3993, "step": 133560 }, { "epoch": 16.073405535499397, "grad_norm": 797.2125854492188, "learning_rate": 0.00018830937636153105, "loss": 7.3484, "step": 133570 }, { "epoch": 16.074608904933815, "grad_norm": 1339.31689453125, "learning_rate": 0.00018830759134734167, "loss": 7.4835, "step": 133580 }, { "epoch": 16.075812274368232, "grad_norm": 989.5545654296875, "learning_rate": 0.00018830580620534946, "loss": 7.2828, "step": 133590 }, { "epoch": 16.077015643802646, "grad_norm": 4392.4541015625, "learning_rate": 0.00018830402093555687, "loss": 7.352, "step": 133600 }, { "epoch": 16.078219013237064, "grad_norm": 3030.227294921875, "learning_rate": 0.0001883022355379666, "loss": 7.2869, "step": 133610 }, { "epoch": 16.07942238267148, "grad_norm": 3117.56884765625, "learning_rate": 0.0001883004500125812, "loss": 7.3385, "step": 133620 }, { "epoch": 16.080625752105895, "grad_norm": 2173.417236328125, "learning_rate": 0.00018829866435940323, "loss": 7.3315, "step": 133630 }, { "epoch": 16.081829121540313, "grad_norm": 3442.108642578125, "learning_rate": 0.0001882968785784353, "loss": 7.2368, "step": 133640 }, { "epoch": 16.08303249097473, "grad_norm": 1160.810546875, "learning_rate": 0.00018829509266968, "loss": 7.3203, "step": 133650 }, { "epoch": 16.084235860409144, "grad_norm": 1445.635009765625, "learning_rate": 0.00018829330663313985, "loss": 7.1463, "step": 133660 }, { "epoch": 16.085439229843562, "grad_norm": 814.6618041992188, "learning_rate": 0.00018829152046881756, "loss": 7.3323, "step": 133670 }, { "epoch": 16.08664259927798, "grad_norm": 1322.2098388671875, "learning_rate": 0.00018828973417671558, "loss": 7.2867, "step": 133680 }, { "epoch": 16.087845968712394, "grad_norm": 1727.97314453125, "learning_rate": 0.0001882879477568366, "loss": 7.2514, "step": 133690 }, { "epoch": 16.08904933814681, "grad_norm": 1164.9935302734375, "learning_rate": 0.00018828616120918312, "loss": 7.3356, "step": 133700 }, { "epoch": 16.09025270758123, "grad_norm": 1758.5335693359375, "learning_rate": 0.0001882843745337578, "loss": 7.1983, "step": 133710 }, { "epoch": 16.091456077015643, "grad_norm": 915.6506958007812, "learning_rate": 0.00018828258773056318, "loss": 7.3296, "step": 133720 }, { "epoch": 16.09265944645006, "grad_norm": 1868.530517578125, "learning_rate": 0.00018828080079960188, "loss": 7.3555, "step": 133730 }, { "epoch": 16.093862815884478, "grad_norm": 1778.7662353515625, "learning_rate": 0.00018827901374087643, "loss": 7.2858, "step": 133740 }, { "epoch": 16.095066185318892, "grad_norm": 1313.4185791015625, "learning_rate": 0.0001882772265543895, "loss": 7.2891, "step": 133750 }, { "epoch": 16.09626955475331, "grad_norm": 1100.2462158203125, "learning_rate": 0.00018827543924014356, "loss": 7.2885, "step": 133760 }, { "epoch": 16.097472924187727, "grad_norm": 3137.552001953125, "learning_rate": 0.00018827365179814134, "loss": 7.4447, "step": 133770 }, { "epoch": 16.09867629362214, "grad_norm": 2594.281982421875, "learning_rate": 0.0001882718642283853, "loss": 7.3153, "step": 133780 }, { "epoch": 16.09987966305656, "grad_norm": 1250.010009765625, "learning_rate": 0.0001882700765308781, "loss": 7.3383, "step": 133790 }, { "epoch": 16.101083032490976, "grad_norm": 747.5228881835938, "learning_rate": 0.0001882682887056223, "loss": 7.3911, "step": 133800 }, { "epoch": 16.10228640192539, "grad_norm": 913.8907470703125, "learning_rate": 0.0001882665007526205, "loss": 7.2911, "step": 133810 }, { "epoch": 16.103489771359808, "grad_norm": 2426.7666015625, "learning_rate": 0.00018826471267187526, "loss": 7.4006, "step": 133820 }, { "epoch": 16.104693140794225, "grad_norm": 924.7741088867188, "learning_rate": 0.0001882629244633892, "loss": 7.375, "step": 133830 }, { "epoch": 16.10589651022864, "grad_norm": 1016.3461303710938, "learning_rate": 0.0001882611361271649, "loss": 7.3458, "step": 133840 }, { "epoch": 16.107099879663057, "grad_norm": 1101.467041015625, "learning_rate": 0.00018825934766320498, "loss": 7.2704, "step": 133850 }, { "epoch": 16.108303249097474, "grad_norm": 2128.217041015625, "learning_rate": 0.00018825755907151193, "loss": 7.3006, "step": 133860 }, { "epoch": 16.10950661853189, "grad_norm": 3141.464111328125, "learning_rate": 0.00018825577035208845, "loss": 7.4474, "step": 133870 }, { "epoch": 16.110709987966306, "grad_norm": 1540.0906982421875, "learning_rate": 0.00018825398150493705, "loss": 7.459, "step": 133880 }, { "epoch": 16.111913357400724, "grad_norm": 4305.9921875, "learning_rate": 0.00018825219253006036, "loss": 7.3037, "step": 133890 }, { "epoch": 16.113116726835138, "grad_norm": 3027.341552734375, "learning_rate": 0.00018825040342746097, "loss": 7.4396, "step": 133900 }, { "epoch": 16.114320096269555, "grad_norm": 8410.6162109375, "learning_rate": 0.00018824861419714143, "loss": 7.4259, "step": 133910 }, { "epoch": 16.115523465703973, "grad_norm": 6526.33056640625, "learning_rate": 0.0001882468248391044, "loss": 7.3705, "step": 133920 }, { "epoch": 16.116726835138387, "grad_norm": 2232.2724609375, "learning_rate": 0.00018824503535335238, "loss": 7.4946, "step": 133930 }, { "epoch": 16.117930204572804, "grad_norm": 3278.3681640625, "learning_rate": 0.00018824324573988804, "loss": 7.382, "step": 133940 }, { "epoch": 16.119133574007222, "grad_norm": 2583.0341796875, "learning_rate": 0.00018824145599871393, "loss": 7.4288, "step": 133950 }, { "epoch": 16.120336943441636, "grad_norm": 2351.92724609375, "learning_rate": 0.00018823966612983265, "loss": 7.3954, "step": 133960 }, { "epoch": 16.121540312876053, "grad_norm": 2978.618896484375, "learning_rate": 0.00018823787613324677, "loss": 7.3464, "step": 133970 }, { "epoch": 16.12274368231047, "grad_norm": 2466.6240234375, "learning_rate": 0.00018823608600895893, "loss": 7.3458, "step": 133980 }, { "epoch": 16.123947051744885, "grad_norm": 2047.6640625, "learning_rate": 0.00018823429575697163, "loss": 7.3782, "step": 133990 }, { "epoch": 16.125150421179303, "grad_norm": 2854.529052734375, "learning_rate": 0.00018823250537728758, "loss": 7.3129, "step": 134000 }, { "epoch": 16.126353790613717, "grad_norm": 4302.376953125, "learning_rate": 0.00018823071486990929, "loss": 7.4665, "step": 134010 }, { "epoch": 16.127557160048134, "grad_norm": 4303.015625, "learning_rate": 0.00018822892423483935, "loss": 7.3908, "step": 134020 }, { "epoch": 16.128760529482552, "grad_norm": 4873.20361328125, "learning_rate": 0.0001882271334720804, "loss": 7.4954, "step": 134030 }, { "epoch": 16.129963898916966, "grad_norm": 2545.898681640625, "learning_rate": 0.00018822534258163502, "loss": 7.4327, "step": 134040 }, { "epoch": 16.131167268351383, "grad_norm": 3953.999755859375, "learning_rate": 0.00018822355156350576, "loss": 7.4307, "step": 134050 }, { "epoch": 16.1323706377858, "grad_norm": 4592.9296875, "learning_rate": 0.00018822176041769523, "loss": 7.3481, "step": 134060 }, { "epoch": 16.133574007220215, "grad_norm": 3657.686767578125, "learning_rate": 0.00018821996914420604, "loss": 7.4564, "step": 134070 }, { "epoch": 16.134777376654633, "grad_norm": 4818.3388671875, "learning_rate": 0.0001882181777430408, "loss": 7.5084, "step": 134080 }, { "epoch": 16.13598074608905, "grad_norm": 5988.2705078125, "learning_rate": 0.00018821638621420206, "loss": 7.5656, "step": 134090 }, { "epoch": 16.137184115523464, "grad_norm": 3293.47265625, "learning_rate": 0.00018821459455769244, "loss": 7.501, "step": 134100 }, { "epoch": 16.13838748495788, "grad_norm": 3640.26416015625, "learning_rate": 0.00018821280277351449, "loss": 7.5136, "step": 134110 }, { "epoch": 16.1395908543923, "grad_norm": 4979.64794921875, "learning_rate": 0.00018821101086167086, "loss": 7.484, "step": 134120 }, { "epoch": 16.140794223826713, "grad_norm": 4171.54931640625, "learning_rate": 0.0001882092188221641, "loss": 7.4495, "step": 134130 }, { "epoch": 16.14199759326113, "grad_norm": 9866.3525390625, "learning_rate": 0.00018820742665499684, "loss": 7.3779, "step": 134140 }, { "epoch": 16.14320096269555, "grad_norm": 4566.8125, "learning_rate": 0.00018820563436017164, "loss": 7.5587, "step": 134150 }, { "epoch": 16.144404332129962, "grad_norm": 7476.89599609375, "learning_rate": 0.00018820384193769113, "loss": 7.5376, "step": 134160 }, { "epoch": 16.14560770156438, "grad_norm": 4836.7578125, "learning_rate": 0.00018820204938755787, "loss": 7.4673, "step": 134170 }, { "epoch": 16.146811070998798, "grad_norm": 3187.313720703125, "learning_rate": 0.00018820025670977447, "loss": 7.5112, "step": 134180 }, { "epoch": 16.14801444043321, "grad_norm": 3993.05615234375, "learning_rate": 0.00018819846390434353, "loss": 7.5605, "step": 134190 }, { "epoch": 16.14921780986763, "grad_norm": 3113.9814453125, "learning_rate": 0.00018819667097126765, "loss": 7.4392, "step": 134200 }, { "epoch": 16.150421179302047, "grad_norm": 2835.6923828125, "learning_rate": 0.0001881948779105494, "loss": 7.4144, "step": 134210 }, { "epoch": 16.15162454873646, "grad_norm": 5039.08984375, "learning_rate": 0.00018819308472219136, "loss": 7.4577, "step": 134220 }, { "epoch": 16.15282791817088, "grad_norm": 2328.653076171875, "learning_rate": 0.0001881912914061962, "loss": 7.3902, "step": 134230 }, { "epoch": 16.154031287605296, "grad_norm": 3229.591064453125, "learning_rate": 0.00018818949796256643, "loss": 7.4442, "step": 134240 }, { "epoch": 16.15523465703971, "grad_norm": 2118.358154296875, "learning_rate": 0.00018818770439130468, "loss": 7.4533, "step": 134250 }, { "epoch": 16.156438026474127, "grad_norm": 2109.6953125, "learning_rate": 0.00018818591069241357, "loss": 7.418, "step": 134260 }, { "epoch": 16.157641395908545, "grad_norm": 1547.218505859375, "learning_rate": 0.0001881841168658957, "loss": 7.3236, "step": 134270 }, { "epoch": 16.15884476534296, "grad_norm": 1125.2373046875, "learning_rate": 0.0001881823229117536, "loss": 7.4228, "step": 134280 }, { "epoch": 16.160048134777377, "grad_norm": 1350.6263427734375, "learning_rate": 0.00018818052882998992, "loss": 7.315, "step": 134290 }, { "epoch": 16.161251504211794, "grad_norm": 2960.20849609375, "learning_rate": 0.0001881787346206072, "loss": 7.355, "step": 134300 }, { "epoch": 16.162454873646208, "grad_norm": 2167.13720703125, "learning_rate": 0.00018817694028360813, "loss": 7.364, "step": 134310 }, { "epoch": 16.163658243080626, "grad_norm": 3908.79052734375, "learning_rate": 0.00018817514581899526, "loss": 7.3271, "step": 134320 }, { "epoch": 16.164861612515043, "grad_norm": 5377.28076171875, "learning_rate": 0.00018817335122677117, "loss": 7.4126, "step": 134330 }, { "epoch": 16.166064981949457, "grad_norm": 1815.341796875, "learning_rate": 0.00018817155650693848, "loss": 7.3638, "step": 134340 }, { "epoch": 16.167268351383875, "grad_norm": 1928.3946533203125, "learning_rate": 0.0001881697616594998, "loss": 7.3946, "step": 134350 }, { "epoch": 16.168471720818292, "grad_norm": 1802.7713623046875, "learning_rate": 0.00018816796668445765, "loss": 7.3332, "step": 134360 }, { "epoch": 16.169675090252706, "grad_norm": 1804.3046875, "learning_rate": 0.0001881661715818147, "loss": 7.2565, "step": 134370 }, { "epoch": 16.170878459687124, "grad_norm": 635.908203125, "learning_rate": 0.00018816437635157355, "loss": 7.3992, "step": 134380 }, { "epoch": 16.17208182912154, "grad_norm": 653.279052734375, "learning_rate": 0.00018816258099373678, "loss": 7.361, "step": 134390 }, { "epoch": 16.173285198555956, "grad_norm": 1067.9378662109375, "learning_rate": 0.00018816078550830701, "loss": 7.3135, "step": 134400 }, { "epoch": 16.174488567990373, "grad_norm": 1257.174560546875, "learning_rate": 0.00018815898989528676, "loss": 7.3672, "step": 134410 }, { "epoch": 16.17569193742479, "grad_norm": 4849.39453125, "learning_rate": 0.00018815719415467874, "loss": 7.3568, "step": 134420 }, { "epoch": 16.176895306859205, "grad_norm": 9125.1962890625, "learning_rate": 0.00018815539828648546, "loss": 7.3637, "step": 134430 }, { "epoch": 16.178098676293622, "grad_norm": 7759.28271484375, "learning_rate": 0.00018815360229070956, "loss": 7.4134, "step": 134440 }, { "epoch": 16.17930204572804, "grad_norm": 3989.203125, "learning_rate": 0.00018815180616735364, "loss": 7.5372, "step": 134450 }, { "epoch": 16.180505415162454, "grad_norm": 8492.6474609375, "learning_rate": 0.0001881500099164203, "loss": 7.501, "step": 134460 }, { "epoch": 16.18170878459687, "grad_norm": 1717.7882080078125, "learning_rate": 0.0001881482135379121, "loss": 7.4058, "step": 134470 }, { "epoch": 16.18291215403129, "grad_norm": 2014.2508544921875, "learning_rate": 0.0001881464170318317, "loss": 7.4289, "step": 134480 }, { "epoch": 16.184115523465703, "grad_norm": 2031.71826171875, "learning_rate": 0.00018814462039818167, "loss": 7.2289, "step": 134490 }, { "epoch": 16.18531889290012, "grad_norm": 2361.267333984375, "learning_rate": 0.00018814282363696462, "loss": 7.3201, "step": 134500 }, { "epoch": 16.186522262334538, "grad_norm": 4718.13818359375, "learning_rate": 0.00018814102674818314, "loss": 7.1876, "step": 134510 }, { "epoch": 16.187725631768952, "grad_norm": 1611.455810546875, "learning_rate": 0.00018813922973183982, "loss": 7.4207, "step": 134520 }, { "epoch": 16.18892900120337, "grad_norm": 1906.565673828125, "learning_rate": 0.0001881374325879373, "loss": 7.3867, "step": 134530 }, { "epoch": 16.190132370637787, "grad_norm": 2264.2587890625, "learning_rate": 0.0001881356353164781, "loss": 7.2454, "step": 134540 }, { "epoch": 16.1913357400722, "grad_norm": 816.5081176757812, "learning_rate": 0.00018813383791746493, "loss": 7.3905, "step": 134550 }, { "epoch": 16.19253910950662, "grad_norm": 1701.131591796875, "learning_rate": 0.00018813204039090032, "loss": 7.3097, "step": 134560 }, { "epoch": 16.193742478941036, "grad_norm": 1724.3487548828125, "learning_rate": 0.00018813024273678686, "loss": 7.2824, "step": 134570 }, { "epoch": 16.19494584837545, "grad_norm": 2758.356689453125, "learning_rate": 0.0001881284449551272, "loss": 7.3388, "step": 134580 }, { "epoch": 16.196149217809868, "grad_norm": 982.5479125976562, "learning_rate": 0.00018812664704592394, "loss": 7.3162, "step": 134590 }, { "epoch": 16.197352587244286, "grad_norm": 1717.4306640625, "learning_rate": 0.00018812484900917965, "loss": 7.3075, "step": 134600 }, { "epoch": 16.1985559566787, "grad_norm": 2894.6103515625, "learning_rate": 0.00018812305084489695, "loss": 7.4182, "step": 134610 }, { "epoch": 16.199759326113117, "grad_norm": 1259.333251953125, "learning_rate": 0.00018812125255307844, "loss": 7.347, "step": 134620 }, { "epoch": 16.200962695547535, "grad_norm": 1081.9444580078125, "learning_rate": 0.0001881194541337267, "loss": 7.383, "step": 134630 }, { "epoch": 16.20216606498195, "grad_norm": 1318.0164794921875, "learning_rate": 0.00018811765558684438, "loss": 7.352, "step": 134640 }, { "epoch": 16.203369434416366, "grad_norm": 553.3134765625, "learning_rate": 0.00018811585691243406, "loss": 7.2723, "step": 134650 }, { "epoch": 16.204572803850784, "grad_norm": 959.5357666015625, "learning_rate": 0.00018811405811049831, "loss": 7.285, "step": 134660 }, { "epoch": 16.205776173285198, "grad_norm": 404.1699523925781, "learning_rate": 0.0001881122591810398, "loss": 7.2958, "step": 134670 }, { "epoch": 16.206979542719615, "grad_norm": 1347.3458251953125, "learning_rate": 0.00018811046012406107, "loss": 7.308, "step": 134680 }, { "epoch": 16.20818291215403, "grad_norm": 1214.8868408203125, "learning_rate": 0.00018810866093956475, "loss": 7.3352, "step": 134690 }, { "epoch": 16.209386281588447, "grad_norm": 871.9038696289062, "learning_rate": 0.00018810686162755346, "loss": 7.2537, "step": 134700 }, { "epoch": 16.210589651022865, "grad_norm": 528.3934326171875, "learning_rate": 0.00018810506218802978, "loss": 7.283, "step": 134710 }, { "epoch": 16.21179302045728, "grad_norm": 710.7511596679688, "learning_rate": 0.00018810326262099633, "loss": 7.3367, "step": 134720 }, { "epoch": 16.212996389891696, "grad_norm": 1877.6536865234375, "learning_rate": 0.0001881014629264557, "loss": 7.329, "step": 134730 }, { "epoch": 16.214199759326114, "grad_norm": 813.703857421875, "learning_rate": 0.00018809966310441047, "loss": 7.2091, "step": 134740 }, { "epoch": 16.215403128760528, "grad_norm": 707.8218383789062, "learning_rate": 0.00018809786315486334, "loss": 7.2575, "step": 134750 }, { "epoch": 16.216606498194945, "grad_norm": 3227.122314453125, "learning_rate": 0.0001880960630778168, "loss": 7.2496, "step": 134760 }, { "epoch": 16.217809867629363, "grad_norm": 268.9632263183594, "learning_rate": 0.00018809426287327353, "loss": 7.3143, "step": 134770 }, { "epoch": 16.219013237063777, "grad_norm": 1045.4754638671875, "learning_rate": 0.0001880924625412361, "loss": 7.4153, "step": 134780 }, { "epoch": 16.220216606498195, "grad_norm": 1444.0511474609375, "learning_rate": 0.00018809066208170716, "loss": 7.2603, "step": 134790 }, { "epoch": 16.221419975932612, "grad_norm": 556.0645141601562, "learning_rate": 0.00018808886149468924, "loss": 7.3158, "step": 134800 }, { "epoch": 16.222623345367026, "grad_norm": 485.6466979980469, "learning_rate": 0.000188087060780185, "loss": 7.2585, "step": 134810 }, { "epoch": 16.223826714801444, "grad_norm": 863.2359619140625, "learning_rate": 0.00018808525993819705, "loss": 7.4184, "step": 134820 }, { "epoch": 16.22503008423586, "grad_norm": 10132.2978515625, "learning_rate": 0.00018808345896872795, "loss": 7.3077, "step": 134830 }, { "epoch": 16.226233453670275, "grad_norm": 814.1869506835938, "learning_rate": 0.00018808165787178037, "loss": 7.2195, "step": 134840 }, { "epoch": 16.227436823104693, "grad_norm": 854.6044311523438, "learning_rate": 0.0001880798566473569, "loss": 7.3092, "step": 134850 }, { "epoch": 16.22864019253911, "grad_norm": 3079.143798828125, "learning_rate": 0.0001880780552954601, "loss": 7.4242, "step": 134860 }, { "epoch": 16.229843561973524, "grad_norm": 98335.5859375, "learning_rate": 0.00018807625381609262, "loss": 7.5575, "step": 134870 }, { "epoch": 16.231046931407942, "grad_norm": 68654.34375, "learning_rate": 0.00018807445220925706, "loss": 7.468, "step": 134880 }, { "epoch": 16.23225030084236, "grad_norm": 19752.994140625, "learning_rate": 0.000188072650474956, "loss": 7.5652, "step": 134890 }, { "epoch": 16.233453670276774, "grad_norm": 28384.818359375, "learning_rate": 0.0001880708486131921, "loss": 7.831, "step": 134900 }, { "epoch": 16.23465703971119, "grad_norm": 5448.560546875, "learning_rate": 0.0001880690466239679, "loss": 7.8397, "step": 134910 }, { "epoch": 16.23586040914561, "grad_norm": 3974.45751953125, "learning_rate": 0.00018806724450728607, "loss": 7.71, "step": 134920 }, { "epoch": 16.237063778580023, "grad_norm": 2246.431884765625, "learning_rate": 0.00018806544226314918, "loss": 7.5296, "step": 134930 }, { "epoch": 16.23826714801444, "grad_norm": 833.052490234375, "learning_rate": 0.00018806363989155988, "loss": 7.4504, "step": 134940 }, { "epoch": 16.239470517448858, "grad_norm": 1213.1649169921875, "learning_rate": 0.00018806183739252075, "loss": 7.5472, "step": 134950 }, { "epoch": 16.240673886883272, "grad_norm": 1169.3134765625, "learning_rate": 0.00018806003476603436, "loss": 7.4958, "step": 134960 }, { "epoch": 16.24187725631769, "grad_norm": 322.09368896484375, "learning_rate": 0.00018805823201210338, "loss": 7.3395, "step": 134970 }, { "epoch": 16.243080625752107, "grad_norm": 1242.49169921875, "learning_rate": 0.00018805642913073044, "loss": 7.5151, "step": 134980 }, { "epoch": 16.24428399518652, "grad_norm": 598.2134399414062, "learning_rate": 0.00018805462612191804, "loss": 7.3753, "step": 134990 }, { "epoch": 16.24548736462094, "grad_norm": 1171.3634033203125, "learning_rate": 0.0001880528229856689, "loss": 7.3291, "step": 135000 }, { "epoch": 16.246690734055356, "grad_norm": 987.525146484375, "learning_rate": 0.00018805101972198555, "loss": 7.4213, "step": 135010 }, { "epoch": 16.24789410348977, "grad_norm": 1815.230712890625, "learning_rate": 0.00018804921633087064, "loss": 7.3551, "step": 135020 }, { "epoch": 16.249097472924188, "grad_norm": 3428.69921875, "learning_rate": 0.0001880474128123268, "loss": 7.4784, "step": 135030 }, { "epoch": 16.250300842358605, "grad_norm": 26357.70703125, "learning_rate": 0.00018804560916635662, "loss": 7.4469, "step": 135040 }, { "epoch": 16.25150421179302, "grad_norm": 8461.775390625, "learning_rate": 0.00018804380539296268, "loss": 7.5533, "step": 135050 }, { "epoch": 16.252707581227437, "grad_norm": 38504.7265625, "learning_rate": 0.0001880420014921476, "loss": 7.4803, "step": 135060 }, { "epoch": 16.253910950661854, "grad_norm": 2633.317626953125, "learning_rate": 0.00018804019746391406, "loss": 7.4323, "step": 135070 }, { "epoch": 16.25511432009627, "grad_norm": 3646.68212890625, "learning_rate": 0.0001880383933082646, "loss": 7.3077, "step": 135080 }, { "epoch": 16.256317689530686, "grad_norm": 3053.300537109375, "learning_rate": 0.00018803658902520184, "loss": 7.4784, "step": 135090 }, { "epoch": 16.257521058965104, "grad_norm": 8867.451171875, "learning_rate": 0.0001880347846147284, "loss": 7.4092, "step": 135100 }, { "epoch": 16.258724428399518, "grad_norm": 3264.137939453125, "learning_rate": 0.0001880329800768469, "loss": 7.4702, "step": 135110 }, { "epoch": 16.259927797833935, "grad_norm": 3088.422119140625, "learning_rate": 0.00018803117541155991, "loss": 7.3454, "step": 135120 }, { "epoch": 16.261131167268353, "grad_norm": 5243.46923828125, "learning_rate": 0.0001880293706188701, "loss": 7.3988, "step": 135130 }, { "epoch": 16.262334536702767, "grad_norm": 430.4847412109375, "learning_rate": 0.00018802756569878006, "loss": 7.4231, "step": 135140 }, { "epoch": 16.263537906137184, "grad_norm": 1054.1220703125, "learning_rate": 0.00018802576065129242, "loss": 7.3821, "step": 135150 }, { "epoch": 16.264741275571602, "grad_norm": 359.6756896972656, "learning_rate": 0.00018802395547640974, "loss": 7.4069, "step": 135160 }, { "epoch": 16.265944645006016, "grad_norm": 357.1447448730469, "learning_rate": 0.00018802215017413464, "loss": 7.3935, "step": 135170 }, { "epoch": 16.267148014440433, "grad_norm": 6698.04736328125, "learning_rate": 0.00018802034474446982, "loss": 7.468, "step": 135180 }, { "epoch": 16.26835138387485, "grad_norm": 1396.1961669921875, "learning_rate": 0.00018801853918741778, "loss": 7.3965, "step": 135190 }, { "epoch": 16.269554753309265, "grad_norm": 710.7750854492188, "learning_rate": 0.00018801673350298122, "loss": 7.5052, "step": 135200 }, { "epoch": 16.270758122743683, "grad_norm": 2006.60986328125, "learning_rate": 0.00018801492769116268, "loss": 7.4554, "step": 135210 }, { "epoch": 16.2719614921781, "grad_norm": 487.1640930175781, "learning_rate": 0.00018801312175196482, "loss": 7.4907, "step": 135220 }, { "epoch": 16.273164861612514, "grad_norm": 12558.9658203125, "learning_rate": 0.00018801131568539026, "loss": 7.3869, "step": 135230 }, { "epoch": 16.27436823104693, "grad_norm": 1464.945068359375, "learning_rate": 0.00018800950949144157, "loss": 7.3742, "step": 135240 }, { "epoch": 16.27557160048135, "grad_norm": 1422.491943359375, "learning_rate": 0.00018800770317012143, "loss": 7.4689, "step": 135250 }, { "epoch": 16.276774969915763, "grad_norm": 3618.24072265625, "learning_rate": 0.0001880058967214324, "loss": 7.3944, "step": 135260 }, { "epoch": 16.27797833935018, "grad_norm": 1279.1890869140625, "learning_rate": 0.0001880040901453771, "loss": 7.3848, "step": 135270 }, { "epoch": 16.2791817087846, "grad_norm": 6718681.5, "learning_rate": 0.00018800228344195813, "loss": 7.3688, "step": 135280 }, { "epoch": 16.280385078219012, "grad_norm": 3917.640380859375, "learning_rate": 0.00018800047661117817, "loss": 7.4604, "step": 135290 }, { "epoch": 16.28158844765343, "grad_norm": 7963.70556640625, "learning_rate": 0.0001879986696530398, "loss": 7.4116, "step": 135300 }, { "epoch": 16.282791817087848, "grad_norm": 1286.34375, "learning_rate": 0.00018799686256754558, "loss": 7.3811, "step": 135310 }, { "epoch": 16.28399518652226, "grad_norm": 3045.016845703125, "learning_rate": 0.00018799505535469822, "loss": 7.3988, "step": 135320 }, { "epoch": 16.28519855595668, "grad_norm": 6178.04296875, "learning_rate": 0.00018799324801450025, "loss": 7.4496, "step": 135330 }, { "epoch": 16.286401925391097, "grad_norm": 1294.6044921875, "learning_rate": 0.00018799144054695437, "loss": 7.3696, "step": 135340 }, { "epoch": 16.28760529482551, "grad_norm": 5328.47265625, "learning_rate": 0.0001879896329520631, "loss": 7.354, "step": 135350 }, { "epoch": 16.28880866425993, "grad_norm": 2091.859375, "learning_rate": 0.00018798782522982913, "loss": 7.3048, "step": 135360 }, { "epoch": 16.290012033694346, "grad_norm": 3573.5048828125, "learning_rate": 0.00018798601738025507, "loss": 7.4095, "step": 135370 }, { "epoch": 16.29121540312876, "grad_norm": 5019.990234375, "learning_rate": 0.00018798420940334352, "loss": 7.4838, "step": 135380 }, { "epoch": 16.292418772563177, "grad_norm": 4849.22509765625, "learning_rate": 0.00018798240129909706, "loss": 7.337, "step": 135390 }, { "epoch": 16.29362214199759, "grad_norm": 1907.13330078125, "learning_rate": 0.00018798059306751838, "loss": 7.4253, "step": 135400 }, { "epoch": 16.29482551143201, "grad_norm": 6181.5908203125, "learning_rate": 0.00018797878470861004, "loss": 7.3597, "step": 135410 }, { "epoch": 16.296028880866427, "grad_norm": 837.5048217773438, "learning_rate": 0.00018797697622237468, "loss": 7.3881, "step": 135420 }, { "epoch": 16.29723225030084, "grad_norm": 4801.70703125, "learning_rate": 0.00018797516760881493, "loss": 7.4965, "step": 135430 }, { "epoch": 16.29843561973526, "grad_norm": 814.237548828125, "learning_rate": 0.0001879733588679334, "loss": 7.3403, "step": 135440 }, { "epoch": 16.299638989169676, "grad_norm": 4937.7548828125, "learning_rate": 0.00018797154999973268, "loss": 7.4315, "step": 135450 }, { "epoch": 16.30084235860409, "grad_norm": 1484.304931640625, "learning_rate": 0.00018796974100421541, "loss": 7.3327, "step": 135460 }, { "epoch": 16.302045728038507, "grad_norm": 16406.900390625, "learning_rate": 0.0001879679318813842, "loss": 7.3749, "step": 135470 }, { "epoch": 16.303249097472925, "grad_norm": 4597.98828125, "learning_rate": 0.00018796612263124166, "loss": 7.4448, "step": 135480 }, { "epoch": 16.30445246690734, "grad_norm": 1915.1119384765625, "learning_rate": 0.00018796431325379044, "loss": 7.3674, "step": 135490 }, { "epoch": 16.305655836341757, "grad_norm": 4350.32421875, "learning_rate": 0.00018796250374903317, "loss": 7.3967, "step": 135500 }, { "epoch": 16.306859205776174, "grad_norm": 35623.3515625, "learning_rate": 0.00018796069411697238, "loss": 7.3241, "step": 135510 }, { "epoch": 16.308062575210588, "grad_norm": 1764.968017578125, "learning_rate": 0.00018795888435761083, "loss": 7.3164, "step": 135520 }, { "epoch": 16.309265944645006, "grad_norm": 2249.67822265625, "learning_rate": 0.000187957074470951, "loss": 7.5236, "step": 135530 }, { "epoch": 16.310469314079423, "grad_norm": 3995.468017578125, "learning_rate": 0.00018795526445699557, "loss": 7.3412, "step": 135540 }, { "epoch": 16.311672683513837, "grad_norm": 1048.600830078125, "learning_rate": 0.00018795345431574717, "loss": 7.42, "step": 135550 }, { "epoch": 16.312876052948255, "grad_norm": 8585.6337890625, "learning_rate": 0.0001879516440472084, "loss": 7.4167, "step": 135560 }, { "epoch": 16.314079422382672, "grad_norm": 8284.8955078125, "learning_rate": 0.00018794983365138187, "loss": 7.469, "step": 135570 }, { "epoch": 16.315282791817086, "grad_norm": 12807.7412109375, "learning_rate": 0.00018794802312827025, "loss": 7.352, "step": 135580 }, { "epoch": 16.316486161251504, "grad_norm": 7363.87109375, "learning_rate": 0.0001879462124778761, "loss": 7.4129, "step": 135590 }, { "epoch": 16.31768953068592, "grad_norm": 3202.677001953125, "learning_rate": 0.00018794440170020207, "loss": 7.3481, "step": 135600 }, { "epoch": 16.318892900120336, "grad_norm": 6564.0791015625, "learning_rate": 0.00018794259079525077, "loss": 7.4588, "step": 135610 }, { "epoch": 16.320096269554753, "grad_norm": 1761.1639404296875, "learning_rate": 0.00018794077976302487, "loss": 7.4876, "step": 135620 }, { "epoch": 16.32129963898917, "grad_norm": 3432.889892578125, "learning_rate": 0.0001879389686035269, "loss": 7.3906, "step": 135630 }, { "epoch": 16.322503008423585, "grad_norm": 7886.70703125, "learning_rate": 0.00018793715731675955, "loss": 7.3919, "step": 135640 }, { "epoch": 16.323706377858002, "grad_norm": 4607.04638671875, "learning_rate": 0.00018793534590272543, "loss": 7.3867, "step": 135650 }, { "epoch": 16.32490974729242, "grad_norm": 4718.65380859375, "learning_rate": 0.00018793353436142717, "loss": 7.496, "step": 135660 }, { "epoch": 16.326113116726834, "grad_norm": 3534.19384765625, "learning_rate": 0.00018793172269286737, "loss": 7.3163, "step": 135670 }, { "epoch": 16.32731648616125, "grad_norm": 4739.6005859375, "learning_rate": 0.0001879299108970486, "loss": 7.3221, "step": 135680 }, { "epoch": 16.32851985559567, "grad_norm": 13892.4052734375, "learning_rate": 0.00018792809897397362, "loss": 7.3012, "step": 135690 }, { "epoch": 16.329723225030083, "grad_norm": 1643.28515625, "learning_rate": 0.00018792628692364493, "loss": 7.3831, "step": 135700 }, { "epoch": 16.3309265944645, "grad_norm": 2262.72021484375, "learning_rate": 0.0001879244747460652, "loss": 7.4484, "step": 135710 }, { "epoch": 16.332129963898918, "grad_norm": 1647.235595703125, "learning_rate": 0.00018792266244123705, "loss": 7.317, "step": 135720 }, { "epoch": 16.333333333333332, "grad_norm": 4474.38720703125, "learning_rate": 0.00018792085000916308, "loss": 7.3673, "step": 135730 }, { "epoch": 16.33453670276775, "grad_norm": 13284.1533203125, "learning_rate": 0.00018791903744984595, "loss": 7.4562, "step": 135740 }, { "epoch": 16.335740072202167, "grad_norm": 1539.72802734375, "learning_rate": 0.0001879172247632883, "loss": 7.2861, "step": 135750 }, { "epoch": 16.33694344163658, "grad_norm": 1045.3782958984375, "learning_rate": 0.00018791541194949267, "loss": 7.2342, "step": 135760 }, { "epoch": 16.338146811071, "grad_norm": 3884.170166015625, "learning_rate": 0.00018791359900846176, "loss": 7.4265, "step": 135770 }, { "epoch": 16.339350180505416, "grad_norm": 7942.37353515625, "learning_rate": 0.00018791178594019815, "loss": 7.3579, "step": 135780 }, { "epoch": 16.34055354993983, "grad_norm": 1561.6995849609375, "learning_rate": 0.0001879099727447045, "loss": 7.3824, "step": 135790 }, { "epoch": 16.341756919374248, "grad_norm": 5925.3896484375, "learning_rate": 0.00018790815942198338, "loss": 7.4067, "step": 135800 }, { "epoch": 16.342960288808666, "grad_norm": 1170.9866943359375, "learning_rate": 0.00018790634597203748, "loss": 7.3359, "step": 135810 }, { "epoch": 16.34416365824308, "grad_norm": 1370.93994140625, "learning_rate": 0.00018790453239486937, "loss": 7.3038, "step": 135820 }, { "epoch": 16.345367027677497, "grad_norm": 7989.8427734375, "learning_rate": 0.00018790271869048174, "loss": 7.3582, "step": 135830 }, { "epoch": 16.346570397111915, "grad_norm": 1201.0194091796875, "learning_rate": 0.00018790090485887713, "loss": 7.4095, "step": 135840 }, { "epoch": 16.34777376654633, "grad_norm": 5560.896484375, "learning_rate": 0.00018789909090005823, "loss": 7.4376, "step": 135850 }, { "epoch": 16.348977135980746, "grad_norm": 2773.318115234375, "learning_rate": 0.00018789727681402764, "loss": 7.3307, "step": 135860 }, { "epoch": 16.350180505415164, "grad_norm": 15386.08203125, "learning_rate": 0.00018789546260078801, "loss": 7.3922, "step": 135870 }, { "epoch": 16.351383874849578, "grad_norm": 2575.066650390625, "learning_rate": 0.0001878936482603419, "loss": 7.437, "step": 135880 }, { "epoch": 16.352587244283995, "grad_norm": 2834.9453125, "learning_rate": 0.00018789183379269204, "loss": 7.4451, "step": 135890 }, { "epoch": 16.353790613718413, "grad_norm": 12504.13671875, "learning_rate": 0.00018789001919784095, "loss": 7.3121, "step": 135900 }, { "epoch": 16.354993983152827, "grad_norm": 5930.501953125, "learning_rate": 0.00018788820447579132, "loss": 7.4703, "step": 135910 }, { "epoch": 16.356197352587245, "grad_norm": 4534.30615234375, "learning_rate": 0.00018788638962654575, "loss": 7.3633, "step": 135920 }, { "epoch": 16.357400722021662, "grad_norm": 3038.640869140625, "learning_rate": 0.00018788457465010688, "loss": 7.375, "step": 135930 }, { "epoch": 16.358604091456076, "grad_norm": 5623.09423828125, "learning_rate": 0.00018788275954647734, "loss": 7.3456, "step": 135940 }, { "epoch": 16.359807460890494, "grad_norm": 29531.099609375, "learning_rate": 0.00018788094431565974, "loss": 7.443, "step": 135950 }, { "epoch": 16.36101083032491, "grad_norm": 22576.966796875, "learning_rate": 0.0001878791289576567, "loss": 7.4488, "step": 135960 }, { "epoch": 16.362214199759325, "grad_norm": 378.20758056640625, "learning_rate": 0.0001878773134724709, "loss": 7.3549, "step": 135970 }, { "epoch": 16.363417569193743, "grad_norm": 3222.92822265625, "learning_rate": 0.0001878754978601049, "loss": 7.2736, "step": 135980 }, { "epoch": 16.36462093862816, "grad_norm": 422.771240234375, "learning_rate": 0.0001878736821205614, "loss": 7.4024, "step": 135990 }, { "epoch": 16.365824308062574, "grad_norm": 940.5438232421875, "learning_rate": 0.00018787186625384295, "loss": 7.4342, "step": 136000 }, { "epoch": 16.367027677496992, "grad_norm": 835.6951904296875, "learning_rate": 0.00018787005025995224, "loss": 7.3894, "step": 136010 }, { "epoch": 16.36823104693141, "grad_norm": 552.2232055664062, "learning_rate": 0.00018786823413889184, "loss": 7.3654, "step": 136020 }, { "epoch": 16.369434416365824, "grad_norm": 827.7374877929688, "learning_rate": 0.00018786641789066445, "loss": 7.423, "step": 136030 }, { "epoch": 16.37063778580024, "grad_norm": 880.3196411132812, "learning_rate": 0.0001878646015152726, "loss": 7.5086, "step": 136040 }, { "epoch": 16.37184115523466, "grad_norm": 1384.047607421875, "learning_rate": 0.00018786278501271904, "loss": 7.4775, "step": 136050 }, { "epoch": 16.373044524669073, "grad_norm": 575.7612915039062, "learning_rate": 0.0001878609683830063, "loss": 7.2752, "step": 136060 }, { "epoch": 16.37424789410349, "grad_norm": 518.8995971679688, "learning_rate": 0.00018785915162613707, "loss": 7.488, "step": 136070 }, { "epoch": 16.375451263537904, "grad_norm": 15413.3115234375, "learning_rate": 0.00018785733474211392, "loss": 7.5608, "step": 136080 }, { "epoch": 16.376654632972322, "grad_norm": 653.1942138671875, "learning_rate": 0.00018785551773093956, "loss": 7.3276, "step": 136090 }, { "epoch": 16.37785800240674, "grad_norm": 589.8455200195312, "learning_rate": 0.00018785370059261656, "loss": 7.4827, "step": 136100 }, { "epoch": 16.379061371841154, "grad_norm": 1137.1182861328125, "learning_rate": 0.00018785188332714757, "loss": 7.3475, "step": 136110 }, { "epoch": 16.38026474127557, "grad_norm": 5282.48681640625, "learning_rate": 0.00018785006593453521, "loss": 7.4058, "step": 136120 }, { "epoch": 16.38146811070999, "grad_norm": 6450.40625, "learning_rate": 0.0001878482484147821, "loss": 7.2965, "step": 136130 }, { "epoch": 16.382671480144403, "grad_norm": 30703.484375, "learning_rate": 0.0001878464307678909, "loss": 7.253, "step": 136140 }, { "epoch": 16.38387484957882, "grad_norm": 12553.6572265625, "learning_rate": 0.00018784461299386425, "loss": 7.5897, "step": 136150 }, { "epoch": 16.385078219013238, "grad_norm": 43169.89453125, "learning_rate": 0.00018784279509270472, "loss": 7.594, "step": 136160 }, { "epoch": 16.386281588447652, "grad_norm": 11119.8896484375, "learning_rate": 0.000187840977064415, "loss": 7.6198, "step": 136170 }, { "epoch": 16.38748495788207, "grad_norm": 8511.7587890625, "learning_rate": 0.00018783915890899767, "loss": 7.4384, "step": 136180 }, { "epoch": 16.388688327316487, "grad_norm": 5969.4306640625, "learning_rate": 0.0001878373406264554, "loss": 7.4835, "step": 136190 }, { "epoch": 16.3898916967509, "grad_norm": 6323.908203125, "learning_rate": 0.00018783552221679083, "loss": 7.4741, "step": 136200 }, { "epoch": 16.39109506618532, "grad_norm": 70771.2734375, "learning_rate": 0.00018783370368000653, "loss": 7.4753, "step": 136210 }, { "epoch": 16.392298435619736, "grad_norm": 696879.0625, "learning_rate": 0.00018783188501610525, "loss": 7.4855, "step": 136220 }, { "epoch": 16.39350180505415, "grad_norm": 14489.6826171875, "learning_rate": 0.0001878300662250895, "loss": 7.3957, "step": 136230 }, { "epoch": 16.394705174488568, "grad_norm": 14740.2939453125, "learning_rate": 0.00018782824730696196, "loss": 7.4846, "step": 136240 }, { "epoch": 16.395908543922985, "grad_norm": 39206.94140625, "learning_rate": 0.00018782642826172528, "loss": 7.3847, "step": 136250 }, { "epoch": 16.3971119133574, "grad_norm": 25458.478515625, "learning_rate": 0.00018782460908938205, "loss": 7.3715, "step": 136260 }, { "epoch": 16.398315282791817, "grad_norm": 1424.4176025390625, "learning_rate": 0.00018782278978993495, "loss": 7.3292, "step": 136270 }, { "epoch": 16.399518652226234, "grad_norm": 91076.3984375, "learning_rate": 0.00018782097036338657, "loss": 7.3388, "step": 136280 }, { "epoch": 16.40072202166065, "grad_norm": 25471.916015625, "learning_rate": 0.00018781915080973957, "loss": 7.3743, "step": 136290 }, { "epoch": 16.401925391095066, "grad_norm": 5020.595703125, "learning_rate": 0.00018781733112899657, "loss": 7.4635, "step": 136300 }, { "epoch": 16.403128760529484, "grad_norm": 44687.38671875, "learning_rate": 0.00018781551132116025, "loss": 7.531, "step": 136310 }, { "epoch": 16.404332129963898, "grad_norm": 107524.359375, "learning_rate": 0.00018781369138623318, "loss": 7.4699, "step": 136320 }, { "epoch": 16.405535499398315, "grad_norm": 5184.20849609375, "learning_rate": 0.00018781187132421799, "loss": 7.4869, "step": 136330 }, { "epoch": 16.406738868832733, "grad_norm": 18225.78515625, "learning_rate": 0.0001878100511351174, "loss": 7.5691, "step": 136340 }, { "epoch": 16.407942238267147, "grad_norm": 15367.7236328125, "learning_rate": 0.0001878082308189339, "loss": 7.4852, "step": 136350 }, { "epoch": 16.409145607701564, "grad_norm": 47433.625, "learning_rate": 0.0001878064103756703, "loss": 7.4459, "step": 136360 }, { "epoch": 16.410348977135982, "grad_norm": 16086.1826171875, "learning_rate": 0.00018780458980532908, "loss": 7.3971, "step": 136370 }, { "epoch": 16.411552346570396, "grad_norm": 10373.1318359375, "learning_rate": 0.00018780276910791298, "loss": 7.4956, "step": 136380 }, { "epoch": 16.412755716004813, "grad_norm": 40921.32421875, "learning_rate": 0.0001878009482834246, "loss": 7.4732, "step": 136390 }, { "epoch": 16.41395908543923, "grad_norm": 44686.60546875, "learning_rate": 0.00018779912733186654, "loss": 7.4912, "step": 136400 }, { "epoch": 16.415162454873645, "grad_norm": 68040.78125, "learning_rate": 0.00018779730625324148, "loss": 7.485, "step": 136410 }, { "epoch": 16.416365824308063, "grad_norm": 25117.5, "learning_rate": 0.00018779548504755205, "loss": 7.523, "step": 136420 }, { "epoch": 16.41756919374248, "grad_norm": 459473.1875, "learning_rate": 0.00018779366371480087, "loss": 7.5368, "step": 136430 }, { "epoch": 16.418772563176894, "grad_norm": 48309.23046875, "learning_rate": 0.00018779184225499056, "loss": 7.508, "step": 136440 }, { "epoch": 16.41997593261131, "grad_norm": 100739.703125, "learning_rate": 0.0001877900206681238, "loss": 7.411, "step": 136450 }, { "epoch": 16.42117930204573, "grad_norm": 13916.9072265625, "learning_rate": 0.00018778819895420322, "loss": 7.53, "step": 136460 }, { "epoch": 16.422382671480143, "grad_norm": 13331.6533203125, "learning_rate": 0.0001877863771132314, "loss": 7.4388, "step": 136470 }, { "epoch": 16.42358604091456, "grad_norm": 9611.49609375, "learning_rate": 0.00018778455514521108, "loss": 7.4856, "step": 136480 }, { "epoch": 16.42478941034898, "grad_norm": 114058.515625, "learning_rate": 0.0001877827330501448, "loss": 7.3812, "step": 136490 }, { "epoch": 16.425992779783392, "grad_norm": 6379.4580078125, "learning_rate": 0.00018778091082803525, "loss": 7.5142, "step": 136500 }, { "epoch": 16.42719614921781, "grad_norm": 48772.484375, "learning_rate": 0.000187779088478885, "loss": 7.5297, "step": 136510 }, { "epoch": 16.428399518652228, "grad_norm": 38506.64453125, "learning_rate": 0.00018777726600269679, "loss": 7.4822, "step": 136520 }, { "epoch": 16.42960288808664, "grad_norm": 18935.1640625, "learning_rate": 0.00018777544339947317, "loss": 7.41, "step": 136530 }, { "epoch": 16.43080625752106, "grad_norm": 23718.6171875, "learning_rate": 0.00018777362066921683, "loss": 7.4754, "step": 136540 }, { "epoch": 16.432009626955477, "grad_norm": 47956.45703125, "learning_rate": 0.00018777179781193039, "loss": 7.4338, "step": 136550 }, { "epoch": 16.43321299638989, "grad_norm": 38971.03515625, "learning_rate": 0.00018776997482761648, "loss": 7.5148, "step": 136560 }, { "epoch": 16.43441636582431, "grad_norm": 8915.9169921875, "learning_rate": 0.00018776815171627777, "loss": 7.4471, "step": 136570 }, { "epoch": 16.435619735258726, "grad_norm": 4852.09716796875, "learning_rate": 0.00018776632847791687, "loss": 7.471, "step": 136580 }, { "epoch": 16.43682310469314, "grad_norm": 19770.119140625, "learning_rate": 0.0001877645051125364, "loss": 7.4327, "step": 136590 }, { "epoch": 16.438026474127557, "grad_norm": 12467.087890625, "learning_rate": 0.00018776268162013903, "loss": 7.4982, "step": 136600 }, { "epoch": 16.439229843561975, "grad_norm": 12908.8193359375, "learning_rate": 0.0001877608580007274, "loss": 7.3856, "step": 136610 }, { "epoch": 16.44043321299639, "grad_norm": 10286.375, "learning_rate": 0.00018775903425430413, "loss": 7.3419, "step": 136620 }, { "epoch": 16.441636582430807, "grad_norm": 31747.52734375, "learning_rate": 0.0001877572103808719, "loss": 7.4967, "step": 136630 }, { "epoch": 16.442839951865224, "grad_norm": 6905.46337890625, "learning_rate": 0.0001877553863804333, "loss": 7.4347, "step": 136640 }, { "epoch": 16.444043321299638, "grad_norm": 466.9425354003906, "learning_rate": 0.000187753562252991, "loss": 7.4828, "step": 136650 }, { "epoch": 16.445246690734056, "grad_norm": 738.4029541015625, "learning_rate": 0.0001877517379985476, "loss": 7.4964, "step": 136660 }, { "epoch": 16.446450060168473, "grad_norm": 1243.8538818359375, "learning_rate": 0.0001877499136171058, "loss": 7.4728, "step": 136670 }, { "epoch": 16.447653429602887, "grad_norm": 1072.4976806640625, "learning_rate": 0.00018774808910866818, "loss": 7.538, "step": 136680 }, { "epoch": 16.448856799037305, "grad_norm": 1277.53857421875, "learning_rate": 0.00018774626447323742, "loss": 7.447, "step": 136690 }, { "epoch": 16.450060168471722, "grad_norm": 585.4208374023438, "learning_rate": 0.00018774443971081618, "loss": 7.5286, "step": 136700 }, { "epoch": 16.451263537906136, "grad_norm": 598.9535522460938, "learning_rate": 0.00018774261482140705, "loss": 7.5233, "step": 136710 }, { "epoch": 16.452466907340554, "grad_norm": 517.5478515625, "learning_rate": 0.00018774078980501272, "loss": 7.4624, "step": 136720 }, { "epoch": 16.45367027677497, "grad_norm": 509.94024658203125, "learning_rate": 0.00018773896466163574, "loss": 7.4874, "step": 136730 }, { "epoch": 16.454873646209386, "grad_norm": 923.160888671875, "learning_rate": 0.00018773713939127887, "loss": 7.5426, "step": 136740 }, { "epoch": 16.456077015643803, "grad_norm": 312.4687194824219, "learning_rate": 0.0001877353139939447, "loss": 7.4742, "step": 136750 }, { "epoch": 16.45728038507822, "grad_norm": 355.81109619140625, "learning_rate": 0.00018773348846963582, "loss": 7.4346, "step": 136760 }, { "epoch": 16.458483754512635, "grad_norm": 2573.20849609375, "learning_rate": 0.00018773166281835498, "loss": 7.4636, "step": 136770 }, { "epoch": 16.459687123947052, "grad_norm": 1106.6849365234375, "learning_rate": 0.00018772983704010474, "loss": 7.3676, "step": 136780 }, { "epoch": 16.460890493381466, "grad_norm": 329.6548156738281, "learning_rate": 0.00018772801113488776, "loss": 7.4621, "step": 136790 }, { "epoch": 16.462093862815884, "grad_norm": 7716.13525390625, "learning_rate": 0.0001877261851027067, "loss": 7.5916, "step": 136800 }, { "epoch": 16.4632972322503, "grad_norm": 1859.53857421875, "learning_rate": 0.00018772435894356417, "loss": 7.5168, "step": 136810 }, { "epoch": 16.464500601684716, "grad_norm": 501.2537841796875, "learning_rate": 0.00018772253265746284, "loss": 7.3892, "step": 136820 }, { "epoch": 16.465703971119133, "grad_norm": 3446.930419921875, "learning_rate": 0.00018772070624440538, "loss": 7.3543, "step": 136830 }, { "epoch": 16.46690734055355, "grad_norm": 831.8310546875, "learning_rate": 0.00018771887970439437, "loss": 7.4879, "step": 136840 }, { "epoch": 16.468110709987965, "grad_norm": 4313.0439453125, "learning_rate": 0.0001877170530374325, "loss": 7.394, "step": 136850 }, { "epoch": 16.469314079422382, "grad_norm": 992.0599975585938, "learning_rate": 0.00018771522624352237, "loss": 7.4155, "step": 136860 }, { "epoch": 16.4705174488568, "grad_norm": 458.1968078613281, "learning_rate": 0.0001877133993226667, "loss": 7.2855, "step": 136870 }, { "epoch": 16.471720818291214, "grad_norm": 1216.3865966796875, "learning_rate": 0.00018771157227486803, "loss": 7.3575, "step": 136880 }, { "epoch": 16.47292418772563, "grad_norm": 378.1925354003906, "learning_rate": 0.0001877097451001291, "loss": 7.3525, "step": 136890 }, { "epoch": 16.47412755716005, "grad_norm": 624.5488891601562, "learning_rate": 0.00018770791779845248, "loss": 7.4068, "step": 136900 }, { "epoch": 16.475330926594463, "grad_norm": 556.9076538085938, "learning_rate": 0.0001877060903698409, "loss": 7.3626, "step": 136910 }, { "epoch": 16.47653429602888, "grad_norm": 518.5603637695312, "learning_rate": 0.00018770426281429693, "loss": 7.3611, "step": 136920 }, { "epoch": 16.477737665463298, "grad_norm": 622.8832397460938, "learning_rate": 0.0001877024351318232, "loss": 7.3894, "step": 136930 }, { "epoch": 16.478941034897712, "grad_norm": 326.3026123046875, "learning_rate": 0.00018770060732242242, "loss": 7.3806, "step": 136940 }, { "epoch": 16.48014440433213, "grad_norm": 9292.4755859375, "learning_rate": 0.00018769877938609722, "loss": 7.2729, "step": 136950 }, { "epoch": 16.481347773766547, "grad_norm": 344.7122497558594, "learning_rate": 0.00018769695132285022, "loss": 7.3035, "step": 136960 }, { "epoch": 16.48255114320096, "grad_norm": 554.872802734375, "learning_rate": 0.0001876951231326841, "loss": 7.3936, "step": 136970 }, { "epoch": 16.48375451263538, "grad_norm": 516.7557983398438, "learning_rate": 0.00018769329481560146, "loss": 7.3623, "step": 136980 }, { "epoch": 16.484957882069796, "grad_norm": 229.0831298828125, "learning_rate": 0.000187691466371605, "loss": 7.3547, "step": 136990 }, { "epoch": 16.48616125150421, "grad_norm": 198.1752471923828, "learning_rate": 0.0001876896378006973, "loss": 7.3364, "step": 137000 }, { "epoch": 16.487364620938628, "grad_norm": 532.3231201171875, "learning_rate": 0.0001876878091028811, "loss": 7.3512, "step": 137010 }, { "epoch": 16.488567990373046, "grad_norm": 366.5466003417969, "learning_rate": 0.00018768598027815893, "loss": 7.3665, "step": 137020 }, { "epoch": 16.48977135980746, "grad_norm": 9869.470703125, "learning_rate": 0.00018768415132653353, "loss": 7.3486, "step": 137030 }, { "epoch": 16.490974729241877, "grad_norm": 518.7772827148438, "learning_rate": 0.0001876823222480075, "loss": 7.3746, "step": 137040 }, { "epoch": 16.492178098676295, "grad_norm": 16230.76953125, "learning_rate": 0.0001876804930425835, "loss": 7.407, "step": 137050 }, { "epoch": 16.49338146811071, "grad_norm": 141.1302032470703, "learning_rate": 0.0001876786637102642, "loss": 7.6212, "step": 137060 }, { "epoch": 16.494584837545126, "grad_norm": 199.4281463623047, "learning_rate": 0.0001876768342510522, "loss": 7.5952, "step": 137070 }, { "epoch": 16.495788206979544, "grad_norm": 13421.2236328125, "learning_rate": 0.0001876750046649502, "loss": 7.6444, "step": 137080 }, { "epoch": 16.496991576413958, "grad_norm": 194.61570739746094, "learning_rate": 0.00018767317495196079, "loss": 7.7355, "step": 137090 }, { "epoch": 16.498194945848375, "grad_norm": 3275.883544921875, "learning_rate": 0.00018767134511208668, "loss": 7.5391, "step": 137100 }, { "epoch": 16.499398315282793, "grad_norm": 1565.52685546875, "learning_rate": 0.00018766951514533048, "loss": 7.3521, "step": 137110 }, { "epoch": 16.500601684717207, "grad_norm": 528.7994384765625, "learning_rate": 0.00018766768505169482, "loss": 7.4239, "step": 137120 }, { "epoch": 16.501805054151625, "grad_norm": 413.8816833496094, "learning_rate": 0.00018766585483118243, "loss": 7.421, "step": 137130 }, { "epoch": 16.503008423586042, "grad_norm": 172.19161987304688, "learning_rate": 0.0001876640244837959, "loss": 7.4868, "step": 137140 }, { "epoch": 16.504211793020456, "grad_norm": 133.32363891601562, "learning_rate": 0.00018766219400953783, "loss": 7.3561, "step": 137150 }, { "epoch": 16.505415162454874, "grad_norm": 171.27767944335938, "learning_rate": 0.00018766036340841095, "loss": 7.3943, "step": 137160 }, { "epoch": 16.50661853188929, "grad_norm": 94.91278076171875, "learning_rate": 0.0001876585326804179, "loss": 7.4905, "step": 137170 }, { "epoch": 16.507821901323705, "grad_norm": 366.7793884277344, "learning_rate": 0.00018765670182556128, "loss": 7.4743, "step": 137180 }, { "epoch": 16.509025270758123, "grad_norm": 145.08108520507812, "learning_rate": 0.00018765487084384378, "loss": 7.3883, "step": 137190 }, { "epoch": 16.51022864019254, "grad_norm": 180.25515747070312, "learning_rate": 0.00018765303973526805, "loss": 7.4795, "step": 137200 }, { "epoch": 16.511432009626954, "grad_norm": 110.56512451171875, "learning_rate": 0.0001876512084998367, "loss": 7.3867, "step": 137210 }, { "epoch": 16.512635379061372, "grad_norm": 98.25045013427734, "learning_rate": 0.00018764937713755243, "loss": 7.348, "step": 137220 }, { "epoch": 16.51383874849579, "grad_norm": 147.30294799804688, "learning_rate": 0.0001876475456484179, "loss": 7.3438, "step": 137230 }, { "epoch": 16.515042117930204, "grad_norm": 64.69267272949219, "learning_rate": 0.00018764571403243573, "loss": 7.4821, "step": 137240 }, { "epoch": 16.51624548736462, "grad_norm": 418.8008728027344, "learning_rate": 0.00018764388228960854, "loss": 7.3345, "step": 137250 }, { "epoch": 16.51744885679904, "grad_norm": 94.74871826171875, "learning_rate": 0.00018764205041993902, "loss": 7.296, "step": 137260 }, { "epoch": 16.518652226233453, "grad_norm": 85.00372314453125, "learning_rate": 0.00018764021842342983, "loss": 7.36, "step": 137270 }, { "epoch": 16.51985559566787, "grad_norm": 345.0772705078125, "learning_rate": 0.0001876383863000836, "loss": 7.3627, "step": 137280 }, { "epoch": 16.521058965102288, "grad_norm": 95.23140716552734, "learning_rate": 0.00018763655404990297, "loss": 7.3586, "step": 137290 }, { "epoch": 16.522262334536702, "grad_norm": 275.6534118652344, "learning_rate": 0.00018763472167289064, "loss": 7.3728, "step": 137300 }, { "epoch": 16.52346570397112, "grad_norm": 44.374481201171875, "learning_rate": 0.0001876328891690492, "loss": 7.3683, "step": 137310 }, { "epoch": 16.524669073405537, "grad_norm": 73.00506591796875, "learning_rate": 0.00018763105653838132, "loss": 7.4326, "step": 137320 }, { "epoch": 16.52587244283995, "grad_norm": 234.020263671875, "learning_rate": 0.0001876292237808897, "loss": 7.3152, "step": 137330 }, { "epoch": 16.52707581227437, "grad_norm": 45.43013000488281, "learning_rate": 0.00018762739089657696, "loss": 7.3923, "step": 137340 }, { "epoch": 16.528279181708786, "grad_norm": 186.88645935058594, "learning_rate": 0.00018762555788544576, "loss": 7.4001, "step": 137350 }, { "epoch": 16.5294825511432, "grad_norm": 40.556644439697266, "learning_rate": 0.00018762372474749874, "loss": 7.3879, "step": 137360 }, { "epoch": 16.530685920577618, "grad_norm": 38.50154113769531, "learning_rate": 0.00018762189148273853, "loss": 7.367, "step": 137370 }, { "epoch": 16.531889290012035, "grad_norm": 128.13572692871094, "learning_rate": 0.00018762005809116783, "loss": 7.3556, "step": 137380 }, { "epoch": 16.53309265944645, "grad_norm": 50.316162109375, "learning_rate": 0.00018761822457278926, "loss": 7.4299, "step": 137390 }, { "epoch": 16.534296028880867, "grad_norm": 26.99710464477539, "learning_rate": 0.00018761639092760551, "loss": 7.3604, "step": 137400 }, { "epoch": 16.535499398315284, "grad_norm": 51.31673812866211, "learning_rate": 0.0001876145571556192, "loss": 7.303, "step": 137410 }, { "epoch": 16.5367027677497, "grad_norm": 218.17044067382812, "learning_rate": 0.00018761272325683296, "loss": 7.4029, "step": 137420 }, { "epoch": 16.537906137184116, "grad_norm": 143.8904266357422, "learning_rate": 0.0001876108892312495, "loss": 7.4381, "step": 137430 }, { "epoch": 16.53910950661853, "grad_norm": 90.31390380859375, "learning_rate": 0.0001876090550788715, "loss": 7.3692, "step": 137440 }, { "epoch": 16.540312876052948, "grad_norm": 120.62361907958984, "learning_rate": 0.0001876072207997015, "loss": 7.4033, "step": 137450 }, { "epoch": 16.541516245487365, "grad_norm": 54.00306701660156, "learning_rate": 0.00018760538639374226, "loss": 7.3523, "step": 137460 }, { "epoch": 16.54271961492178, "grad_norm": 84.41741180419922, "learning_rate": 0.00018760355186099638, "loss": 7.3048, "step": 137470 }, { "epoch": 16.543922984356197, "grad_norm": 120.92346954345703, "learning_rate": 0.00018760171720146653, "loss": 7.2617, "step": 137480 }, { "epoch": 16.545126353790614, "grad_norm": 116.1357421875, "learning_rate": 0.0001875998824151554, "loss": 7.3082, "step": 137490 }, { "epoch": 16.54632972322503, "grad_norm": 92.02224731445312, "learning_rate": 0.00018759804750206557, "loss": 7.232, "step": 137500 }, { "epoch": 16.547533092659446, "grad_norm": 75.33924102783203, "learning_rate": 0.00018759621246219977, "loss": 7.3646, "step": 137510 }, { "epoch": 16.548736462093864, "grad_norm": 232.46670532226562, "learning_rate": 0.0001875943772955606, "loss": 7.3562, "step": 137520 }, { "epoch": 16.549939831528278, "grad_norm": 98.42095184326172, "learning_rate": 0.00018759254200215075, "loss": 7.2942, "step": 137530 }, { "epoch": 16.551143200962695, "grad_norm": 141.63809204101562, "learning_rate": 0.00018759070658197287, "loss": 7.3499, "step": 137540 }, { "epoch": 16.552346570397113, "grad_norm": 92.96308135986328, "learning_rate": 0.0001875888710350296, "loss": 7.279, "step": 137550 }, { "epoch": 16.553549939831527, "grad_norm": 153.2523651123047, "learning_rate": 0.00018758703536132362, "loss": 7.3607, "step": 137560 }, { "epoch": 16.554753309265944, "grad_norm": 101.3573989868164, "learning_rate": 0.00018758519956085757, "loss": 7.2989, "step": 137570 }, { "epoch": 16.555956678700362, "grad_norm": 138.0144500732422, "learning_rate": 0.00018758336363363412, "loss": 7.2876, "step": 137580 }, { "epoch": 16.557160048134776, "grad_norm": 431.8025207519531, "learning_rate": 0.0001875815275796559, "loss": 7.2583, "step": 137590 }, { "epoch": 16.558363417569193, "grad_norm": 174.82595825195312, "learning_rate": 0.0001875796913989256, "loss": 7.3036, "step": 137600 }, { "epoch": 16.55956678700361, "grad_norm": 116.15555572509766, "learning_rate": 0.00018757785509144586, "loss": 7.3912, "step": 137610 }, { "epoch": 16.560770156438025, "grad_norm": 151.6611785888672, "learning_rate": 0.00018757601865721933, "loss": 7.2205, "step": 137620 }, { "epoch": 16.561973525872443, "grad_norm": 97.33600616455078, "learning_rate": 0.0001875741820962487, "loss": 7.3213, "step": 137630 }, { "epoch": 16.56317689530686, "grad_norm": 180.7718048095703, "learning_rate": 0.0001875723454085366, "loss": 7.4124, "step": 137640 }, { "epoch": 16.564380264741274, "grad_norm": 145.56834411621094, "learning_rate": 0.0001875705085940857, "loss": 7.335, "step": 137650 }, { "epoch": 16.56558363417569, "grad_norm": 205.83348083496094, "learning_rate": 0.00018756867165289865, "loss": 7.2019, "step": 137660 }, { "epoch": 16.56678700361011, "grad_norm": 195.31544494628906, "learning_rate": 0.00018756683458497813, "loss": 7.2055, "step": 137670 }, { "epoch": 16.567990373044523, "grad_norm": 244.1363983154297, "learning_rate": 0.00018756499739032675, "loss": 7.2788, "step": 137680 }, { "epoch": 16.56919374247894, "grad_norm": 231.9351348876953, "learning_rate": 0.00018756316006894724, "loss": 7.233, "step": 137690 }, { "epoch": 16.57039711191336, "grad_norm": 168.2899932861328, "learning_rate": 0.0001875613226208422, "loss": 7.2498, "step": 137700 }, { "epoch": 16.571600481347772, "grad_norm": 181.37579345703125, "learning_rate": 0.00018755948504601427, "loss": 7.2848, "step": 137710 }, { "epoch": 16.57280385078219, "grad_norm": 73.8011474609375, "learning_rate": 0.00018755764734446618, "loss": 7.4079, "step": 137720 }, { "epoch": 16.574007220216608, "grad_norm": 167.99313354492188, "learning_rate": 0.00018755580951620052, "loss": 7.2984, "step": 137730 }, { "epoch": 16.57521058965102, "grad_norm": 744.4867553710938, "learning_rate": 0.00018755397156122006, "loss": 7.2441, "step": 137740 }, { "epoch": 16.57641395908544, "grad_norm": 165.22177124023438, "learning_rate": 0.00018755213347952733, "loss": 7.3445, "step": 137750 }, { "epoch": 16.577617328519857, "grad_norm": 99.83592987060547, "learning_rate": 0.00018755029527112507, "loss": 7.272, "step": 137760 }, { "epoch": 16.57882069795427, "grad_norm": 94.53739166259766, "learning_rate": 0.0001875484569360159, "loss": 7.3411, "step": 137770 }, { "epoch": 16.58002406738869, "grad_norm": 119.02742767333984, "learning_rate": 0.0001875466184742025, "loss": 7.3016, "step": 137780 }, { "epoch": 16.581227436823106, "grad_norm": 99.3770751953125, "learning_rate": 0.00018754477988568754, "loss": 7.2913, "step": 137790 }, { "epoch": 16.58243080625752, "grad_norm": 314.3997802734375, "learning_rate": 0.00018754294117047366, "loss": 7.3973, "step": 137800 }, { "epoch": 16.583634175691937, "grad_norm": 180.70556640625, "learning_rate": 0.00018754110232856354, "loss": 7.3071, "step": 137810 }, { "epoch": 16.584837545126355, "grad_norm": 74.38983917236328, "learning_rate": 0.00018753926335995984, "loss": 7.3638, "step": 137820 }, { "epoch": 16.58604091456077, "grad_norm": 249.79318237304688, "learning_rate": 0.00018753742426466522, "loss": 7.3613, "step": 137830 }, { "epoch": 16.587244283995187, "grad_norm": 109.54365539550781, "learning_rate": 0.0001875355850426823, "loss": 7.3336, "step": 137840 }, { "epoch": 16.588447653429604, "grad_norm": 135.13758850097656, "learning_rate": 0.0001875337456940138, "loss": 7.2774, "step": 137850 }, { "epoch": 16.589651022864018, "grad_norm": 89.30496215820312, "learning_rate": 0.00018753190621866234, "loss": 7.3111, "step": 137860 }, { "epoch": 16.590854392298436, "grad_norm": 212.9723663330078, "learning_rate": 0.00018753006661663063, "loss": 7.2669, "step": 137870 }, { "epoch": 16.592057761732853, "grad_norm": 73.3120346069336, "learning_rate": 0.00018752822688792128, "loss": 7.347, "step": 137880 }, { "epoch": 16.593261131167267, "grad_norm": 48.44794845581055, "learning_rate": 0.000187526387032537, "loss": 7.3232, "step": 137890 }, { "epoch": 16.594464500601685, "grad_norm": 62.431514739990234, "learning_rate": 0.00018752454705048038, "loss": 7.2734, "step": 137900 }, { "epoch": 16.595667870036102, "grad_norm": 89.42121887207031, "learning_rate": 0.00018752270694175418, "loss": 7.3049, "step": 137910 }, { "epoch": 16.596871239470516, "grad_norm": 113.3212890625, "learning_rate": 0.000187520866706361, "loss": 7.3334, "step": 137920 }, { "epoch": 16.598074608904934, "grad_norm": 205.4388427734375, "learning_rate": 0.0001875190263443035, "loss": 7.3752, "step": 137930 }, { "epoch": 16.59927797833935, "grad_norm": 171.0076904296875, "learning_rate": 0.00018751718585558437, "loss": 7.2799, "step": 137940 }, { "epoch": 16.600481347773766, "grad_norm": 128.2551727294922, "learning_rate": 0.00018751534524020627, "loss": 7.3735, "step": 137950 }, { "epoch": 16.601684717208183, "grad_norm": 56.19858932495117, "learning_rate": 0.00018751350449817185, "loss": 7.3556, "step": 137960 }, { "epoch": 16.6028880866426, "grad_norm": 50.323246002197266, "learning_rate": 0.0001875116636294838, "loss": 7.2606, "step": 137970 }, { "epoch": 16.604091456077015, "grad_norm": 72.1199722290039, "learning_rate": 0.00018750982263414476, "loss": 7.2897, "step": 137980 }, { "epoch": 16.605294825511432, "grad_norm": 134.43150329589844, "learning_rate": 0.00018750798151215742, "loss": 7.3223, "step": 137990 }, { "epoch": 16.60649819494585, "grad_norm": 86.16520690917969, "learning_rate": 0.00018750614026352436, "loss": 7.4149, "step": 138000 }, { "epoch": 16.607701564380264, "grad_norm": 73.39897918701172, "learning_rate": 0.00018750429888824837, "loss": 7.4453, "step": 138010 }, { "epoch": 16.60890493381468, "grad_norm": 53.49974060058594, "learning_rate": 0.00018750245738633205, "loss": 7.3445, "step": 138020 }, { "epoch": 16.6101083032491, "grad_norm": 108.8686752319336, "learning_rate": 0.00018750061575777808, "loss": 7.2752, "step": 138030 }, { "epoch": 16.611311672683513, "grad_norm": 109.46723937988281, "learning_rate": 0.00018749877400258907, "loss": 7.3848, "step": 138040 }, { "epoch": 16.61251504211793, "grad_norm": 159.08604431152344, "learning_rate": 0.00018749693212076775, "loss": 7.3612, "step": 138050 }, { "epoch": 16.613718411552348, "grad_norm": 75.06536102294922, "learning_rate": 0.00018749509011231678, "loss": 7.2738, "step": 138060 }, { "epoch": 16.614921780986762, "grad_norm": 118.68558502197266, "learning_rate": 0.00018749324797723882, "loss": 7.3183, "step": 138070 }, { "epoch": 16.61612515042118, "grad_norm": 103.16372680664062, "learning_rate": 0.0001874914057155365, "loss": 7.3244, "step": 138080 }, { "epoch": 16.617328519855597, "grad_norm": 106.84732055664062, "learning_rate": 0.00018748956332721253, "loss": 7.2596, "step": 138090 }, { "epoch": 16.61853188929001, "grad_norm": 110.65084075927734, "learning_rate": 0.00018748772081226956, "loss": 7.2644, "step": 138100 }, { "epoch": 16.61973525872443, "grad_norm": 78.4281234741211, "learning_rate": 0.0001874858781707103, "loss": 7.3037, "step": 138110 }, { "epoch": 16.620938628158846, "grad_norm": 70.9840087890625, "learning_rate": 0.00018748403540253732, "loss": 7.199, "step": 138120 }, { "epoch": 16.62214199759326, "grad_norm": 96.12006378173828, "learning_rate": 0.00018748219250775335, "loss": 7.3796, "step": 138130 }, { "epoch": 16.623345367027678, "grad_norm": 102.58830261230469, "learning_rate": 0.00018748034948636106, "loss": 7.2708, "step": 138140 }, { "epoch": 16.624548736462096, "grad_norm": 164.28958129882812, "learning_rate": 0.0001874785063383631, "loss": 7.308, "step": 138150 }, { "epoch": 16.62575210589651, "grad_norm": 135.41746520996094, "learning_rate": 0.00018747666306376216, "loss": 7.2117, "step": 138160 }, { "epoch": 16.626955475330927, "grad_norm": 127.80256652832031, "learning_rate": 0.00018747481966256086, "loss": 7.2532, "step": 138170 }, { "epoch": 16.628158844765345, "grad_norm": 120.26117706298828, "learning_rate": 0.0001874729761347619, "loss": 7.3886, "step": 138180 }, { "epoch": 16.62936221419976, "grad_norm": 111.0550765991211, "learning_rate": 0.00018747113248036797, "loss": 7.4187, "step": 138190 }, { "epoch": 16.630565583634176, "grad_norm": 134.6328582763672, "learning_rate": 0.0001874692886993817, "loss": 7.3399, "step": 138200 }, { "epoch": 16.63176895306859, "grad_norm": 209.6514892578125, "learning_rate": 0.0001874674447918058, "loss": 7.2014, "step": 138210 }, { "epoch": 16.632972322503008, "grad_norm": 130.1690673828125, "learning_rate": 0.00018746560075764287, "loss": 7.195, "step": 138220 }, { "epoch": 16.634175691937426, "grad_norm": 85.39344787597656, "learning_rate": 0.00018746375659689567, "loss": 7.1786, "step": 138230 }, { "epoch": 16.63537906137184, "grad_norm": 199.42642211914062, "learning_rate": 0.0001874619123095668, "loss": 7.322, "step": 138240 }, { "epoch": 16.636582430806257, "grad_norm": 114.37007904052734, "learning_rate": 0.00018746006789565893, "loss": 7.3211, "step": 138250 }, { "epoch": 16.637785800240675, "grad_norm": 54.989017486572266, "learning_rate": 0.00018745822335517476, "loss": 7.31, "step": 138260 }, { "epoch": 16.63898916967509, "grad_norm": 56.570369720458984, "learning_rate": 0.00018745637868811696, "loss": 7.3263, "step": 138270 }, { "epoch": 16.640192539109506, "grad_norm": 115.92820739746094, "learning_rate": 0.0001874545338944882, "loss": 7.3327, "step": 138280 }, { "epoch": 16.641395908543924, "grad_norm": 224.2471160888672, "learning_rate": 0.0001874526889742911, "loss": 7.2967, "step": 138290 }, { "epoch": 16.642599277978338, "grad_norm": 79.9986801147461, "learning_rate": 0.0001874508439275284, "loss": 7.2535, "step": 138300 }, { "epoch": 16.643802647412755, "grad_norm": 53.839508056640625, "learning_rate": 0.00018744899875420268, "loss": 7.3326, "step": 138310 }, { "epoch": 16.645006016847173, "grad_norm": 139.7207794189453, "learning_rate": 0.00018744715345431673, "loss": 7.2236, "step": 138320 }, { "epoch": 16.646209386281587, "grad_norm": 141.90843200683594, "learning_rate": 0.00018744530802787315, "loss": 7.2426, "step": 138330 }, { "epoch": 16.647412755716005, "grad_norm": 133.6788787841797, "learning_rate": 0.0001874434624748746, "loss": 7.287, "step": 138340 }, { "epoch": 16.648616125150422, "grad_norm": 81.70609283447266, "learning_rate": 0.0001874416167953238, "loss": 7.2922, "step": 138350 }, { "epoch": 16.649819494584836, "grad_norm": 92.37747192382812, "learning_rate": 0.00018743977098922336, "loss": 7.1355, "step": 138360 }, { "epoch": 16.651022864019254, "grad_norm": 94.87637329101562, "learning_rate": 0.000187437925056576, "loss": 7.2832, "step": 138370 }, { "epoch": 16.65222623345367, "grad_norm": 256.49774169921875, "learning_rate": 0.00018743607899738434, "loss": 7.3185, "step": 138380 }, { "epoch": 16.653429602888085, "grad_norm": 99.84320068359375, "learning_rate": 0.00018743423281165114, "loss": 7.2878, "step": 138390 }, { "epoch": 16.654632972322503, "grad_norm": 172.68467712402344, "learning_rate": 0.000187432386499379, "loss": 7.3679, "step": 138400 }, { "epoch": 16.65583634175692, "grad_norm": 136.68402099609375, "learning_rate": 0.0001874305400605706, "loss": 7.3502, "step": 138410 }, { "epoch": 16.657039711191334, "grad_norm": 198.687744140625, "learning_rate": 0.00018742869349522862, "loss": 7.3141, "step": 138420 }, { "epoch": 16.658243080625752, "grad_norm": 106.51041412353516, "learning_rate": 0.00018742684680335577, "loss": 7.3389, "step": 138430 }, { "epoch": 16.65944645006017, "grad_norm": 94.6019058227539, "learning_rate": 0.00018742499998495465, "loss": 7.3798, "step": 138440 }, { "epoch": 16.660649819494584, "grad_norm": 108.6557846069336, "learning_rate": 0.00018742315304002798, "loss": 7.4042, "step": 138450 }, { "epoch": 16.661853188929, "grad_norm": 111.88874816894531, "learning_rate": 0.00018742130596857842, "loss": 7.2755, "step": 138460 }, { "epoch": 16.66305655836342, "grad_norm": 73.07344055175781, "learning_rate": 0.00018741945877060864, "loss": 7.2987, "step": 138470 }, { "epoch": 16.664259927797833, "grad_norm": 107.49629211425781, "learning_rate": 0.00018741761144612133, "loss": 7.2433, "step": 138480 }, { "epoch": 16.66546329723225, "grad_norm": 87.2340316772461, "learning_rate": 0.00018741576399511914, "loss": 7.2603, "step": 138490 }, { "epoch": 16.666666666666668, "grad_norm": 47.034210205078125, "learning_rate": 0.0001874139164176048, "loss": 7.3143, "step": 138500 }, { "epoch": 16.667870036101082, "grad_norm": 458.09979248046875, "learning_rate": 0.00018741206871358091, "loss": 7.2992, "step": 138510 }, { "epoch": 16.6690734055355, "grad_norm": 197.75437927246094, "learning_rate": 0.00018741022088305018, "loss": 7.277, "step": 138520 }, { "epoch": 16.670276774969917, "grad_norm": 106.57793426513672, "learning_rate": 0.0001874083729260153, "loss": 7.2749, "step": 138530 }, { "epoch": 16.67148014440433, "grad_norm": 248.29006958007812, "learning_rate": 0.00018740652484247887, "loss": 7.2091, "step": 138540 }, { "epoch": 16.67268351383875, "grad_norm": 44.29582595825195, "learning_rate": 0.0001874046766324437, "loss": 7.194, "step": 138550 }, { "epoch": 16.673886883273166, "grad_norm": 127.11905670166016, "learning_rate": 0.00018740282829591228, "loss": 7.2697, "step": 138560 }, { "epoch": 16.67509025270758, "grad_norm": 105.27174377441406, "learning_rate": 0.00018740097983288746, "loss": 7.3125, "step": 138570 }, { "epoch": 16.676293622141998, "grad_norm": 94.6681137084961, "learning_rate": 0.00018739913124337181, "loss": 7.1667, "step": 138580 }, { "epoch": 16.677496991576415, "grad_norm": 113.34056854248047, "learning_rate": 0.00018739728252736807, "loss": 7.2745, "step": 138590 }, { "epoch": 16.67870036101083, "grad_norm": 109.09080505371094, "learning_rate": 0.00018739543368487887, "loss": 7.2274, "step": 138600 }, { "epoch": 16.679903730445247, "grad_norm": 135.49853515625, "learning_rate": 0.00018739358471590688, "loss": 7.2697, "step": 138610 }, { "epoch": 16.681107099879664, "grad_norm": 73.32743835449219, "learning_rate": 0.00018739173562045482, "loss": 7.2995, "step": 138620 }, { "epoch": 16.68231046931408, "grad_norm": 89.45295715332031, "learning_rate": 0.00018738988639852534, "loss": 7.4163, "step": 138630 }, { "epoch": 16.683513838748496, "grad_norm": 218.6761016845703, "learning_rate": 0.00018738803705012114, "loss": 7.2111, "step": 138640 }, { "epoch": 16.684717208182914, "grad_norm": 124.73004150390625, "learning_rate": 0.00018738618757524483, "loss": 7.2773, "step": 138650 }, { "epoch": 16.685920577617328, "grad_norm": 302.06170654296875, "learning_rate": 0.00018738433797389913, "loss": 7.3111, "step": 138660 }, { "epoch": 16.687123947051745, "grad_norm": 119.40280151367188, "learning_rate": 0.00018738248824608675, "loss": 7.3417, "step": 138670 }, { "epoch": 16.688327316486163, "grad_norm": 96.8246078491211, "learning_rate": 0.00018738063839181035, "loss": 7.2684, "step": 138680 }, { "epoch": 16.689530685920577, "grad_norm": 236.0146026611328, "learning_rate": 0.00018737878841107257, "loss": 7.2409, "step": 138690 }, { "epoch": 16.690734055354994, "grad_norm": 137.79690551757812, "learning_rate": 0.00018737693830387611, "loss": 7.4255, "step": 138700 }, { "epoch": 16.691937424789412, "grad_norm": 174.46075439453125, "learning_rate": 0.00018737508807022363, "loss": 7.2236, "step": 138710 }, { "epoch": 16.693140794223826, "grad_norm": 180.52059936523438, "learning_rate": 0.00018737323771011785, "loss": 7.2406, "step": 138720 }, { "epoch": 16.694344163658243, "grad_norm": 130.89598083496094, "learning_rate": 0.00018737138722356144, "loss": 7.2732, "step": 138730 }, { "epoch": 16.69554753309266, "grad_norm": 271.3072509765625, "learning_rate": 0.00018736953661055702, "loss": 7.2451, "step": 138740 }, { "epoch": 16.696750902527075, "grad_norm": 176.72035217285156, "learning_rate": 0.0001873676858711073, "loss": 7.2727, "step": 138750 }, { "epoch": 16.697954271961493, "grad_norm": 143.26882934570312, "learning_rate": 0.00018736583500521503, "loss": 7.328, "step": 138760 }, { "epoch": 16.69915764139591, "grad_norm": 245.9073028564453, "learning_rate": 0.00018736398401288277, "loss": 7.246, "step": 138770 }, { "epoch": 16.700361010830324, "grad_norm": 103.39151763916016, "learning_rate": 0.0001873621328941133, "loss": 7.134, "step": 138780 }, { "epoch": 16.70156438026474, "grad_norm": 170.048828125, "learning_rate": 0.00018736028164890921, "loss": 7.2998, "step": 138790 }, { "epoch": 16.70276774969916, "grad_norm": 202.63087463378906, "learning_rate": 0.00018735843027727325, "loss": 7.2928, "step": 138800 }, { "epoch": 16.703971119133573, "grad_norm": 163.97991943359375, "learning_rate": 0.00018735657877920806, "loss": 7.3331, "step": 138810 }, { "epoch": 16.70517448856799, "grad_norm": 186.88934326171875, "learning_rate": 0.00018735472715471636, "loss": 7.2235, "step": 138820 }, { "epoch": 16.706377858002405, "grad_norm": 173.6659698486328, "learning_rate": 0.0001873528754038008, "loss": 7.3578, "step": 138830 }, { "epoch": 16.707581227436823, "grad_norm": 143.7120361328125, "learning_rate": 0.00018735102352646402, "loss": 7.4287, "step": 138840 }, { "epoch": 16.70878459687124, "grad_norm": 141.8917999267578, "learning_rate": 0.00018734917152270878, "loss": 7.302, "step": 138850 }, { "epoch": 16.709987966305654, "grad_norm": 252.0840301513672, "learning_rate": 0.0001873473193925377, "loss": 7.3326, "step": 138860 }, { "epoch": 16.71119133574007, "grad_norm": 306.8090515136719, "learning_rate": 0.0001873454671359535, "loss": 7.2765, "step": 138870 }, { "epoch": 16.71239470517449, "grad_norm": 259.6101379394531, "learning_rate": 0.00018734361475295885, "loss": 7.2738, "step": 138880 }, { "epoch": 16.713598074608903, "grad_norm": 221.7580108642578, "learning_rate": 0.0001873417622435564, "loss": 7.2437, "step": 138890 }, { "epoch": 16.71480144404332, "grad_norm": 161.8363494873047, "learning_rate": 0.00018733990960774887, "loss": 7.4207, "step": 138900 }, { "epoch": 16.71600481347774, "grad_norm": 275.39947509765625, "learning_rate": 0.00018733805684553896, "loss": 7.3731, "step": 138910 }, { "epoch": 16.717208182912152, "grad_norm": 107.85295867919922, "learning_rate": 0.00018733620395692928, "loss": 7.2704, "step": 138920 }, { "epoch": 16.71841155234657, "grad_norm": 67.6468734741211, "learning_rate": 0.00018733435094192254, "loss": 7.1798, "step": 138930 }, { "epoch": 16.719614921780988, "grad_norm": 93.60075378417969, "learning_rate": 0.00018733249780052144, "loss": 7.2046, "step": 138940 }, { "epoch": 16.7208182912154, "grad_norm": 109.11235809326172, "learning_rate": 0.00018733064453272869, "loss": 7.2176, "step": 138950 }, { "epoch": 16.72202166064982, "grad_norm": 86.03272247314453, "learning_rate": 0.0001873287911385469, "loss": 7.3452, "step": 138960 }, { "epoch": 16.723225030084237, "grad_norm": 100.9232177734375, "learning_rate": 0.00018732693761797878, "loss": 7.2866, "step": 138970 }, { "epoch": 16.72442839951865, "grad_norm": 48.10356521606445, "learning_rate": 0.00018732508397102709, "loss": 7.2352, "step": 138980 }, { "epoch": 16.72563176895307, "grad_norm": 30.741086959838867, "learning_rate": 0.00018732323019769438, "loss": 7.3659, "step": 138990 }, { "epoch": 16.726835138387486, "grad_norm": 194.4628448486328, "learning_rate": 0.0001873213762979834, "loss": 7.28, "step": 139000 }, { "epoch": 16.7280385078219, "grad_norm": 110.37733459472656, "learning_rate": 0.00018731952227189685, "loss": 7.3293, "step": 139010 }, { "epoch": 16.729241877256317, "grad_norm": 104.7942886352539, "learning_rate": 0.00018731766811943737, "loss": 7.2848, "step": 139020 }, { "epoch": 16.730445246690735, "grad_norm": 48.08375549316406, "learning_rate": 0.0001873158138406077, "loss": 7.3379, "step": 139030 }, { "epoch": 16.73164861612515, "grad_norm": 53.7038459777832, "learning_rate": 0.00018731395943541045, "loss": 7.2115, "step": 139040 }, { "epoch": 16.732851985559567, "grad_norm": 54.888431549072266, "learning_rate": 0.00018731210490384833, "loss": 7.2915, "step": 139050 }, { "epoch": 16.734055354993984, "grad_norm": 54.29003143310547, "learning_rate": 0.00018731025024592407, "loss": 7.279, "step": 139060 }, { "epoch": 16.735258724428398, "grad_norm": 130.731201171875, "learning_rate": 0.00018730839546164034, "loss": 7.3312, "step": 139070 }, { "epoch": 16.736462093862816, "grad_norm": 126.02831268310547, "learning_rate": 0.00018730654055099978, "loss": 7.2774, "step": 139080 }, { "epoch": 16.737665463297233, "grad_norm": 126.5333480834961, "learning_rate": 0.00018730468551400513, "loss": 7.2487, "step": 139090 }, { "epoch": 16.738868832731647, "grad_norm": 106.4551773071289, "learning_rate": 0.000187302830350659, "loss": 7.2209, "step": 139100 }, { "epoch": 16.740072202166065, "grad_norm": 185.82272338867188, "learning_rate": 0.00018730097506096415, "loss": 7.3364, "step": 139110 }, { "epoch": 16.741275571600482, "grad_norm": 519.2950439453125, "learning_rate": 0.00018729911964492324, "loss": 7.2096, "step": 139120 }, { "epoch": 16.742478941034896, "grad_norm": 224.7523956298828, "learning_rate": 0.00018729726410253892, "loss": 7.2224, "step": 139130 }, { "epoch": 16.743682310469314, "grad_norm": 362.90472412109375, "learning_rate": 0.00018729540843381393, "loss": 7.2313, "step": 139140 }, { "epoch": 16.74488567990373, "grad_norm": 104.93167877197266, "learning_rate": 0.00018729355263875092, "loss": 7.2703, "step": 139150 }, { "epoch": 16.746089049338146, "grad_norm": 91.54993438720703, "learning_rate": 0.0001872916967173526, "loss": 7.1763, "step": 139160 }, { "epoch": 16.747292418772563, "grad_norm": 162.8675537109375, "learning_rate": 0.00018728984066962166, "loss": 7.2305, "step": 139170 }, { "epoch": 16.74849578820698, "grad_norm": 52.1177978515625, "learning_rate": 0.00018728798449556074, "loss": 7.199, "step": 139180 }, { "epoch": 16.749699157641395, "grad_norm": 346.7183837890625, "learning_rate": 0.00018728612819517254, "loss": 7.2614, "step": 139190 }, { "epoch": 16.750902527075812, "grad_norm": 133.7086944580078, "learning_rate": 0.0001872842717684598, "loss": 7.2071, "step": 139200 }, { "epoch": 16.75210589651023, "grad_norm": 108.26351928710938, "learning_rate": 0.00018728241521542515, "loss": 7.3074, "step": 139210 }, { "epoch": 16.753309265944644, "grad_norm": 87.5799789428711, "learning_rate": 0.0001872805585360713, "loss": 7.2241, "step": 139220 }, { "epoch": 16.75451263537906, "grad_norm": 65.84819793701172, "learning_rate": 0.00018727870173040092, "loss": 7.183, "step": 139230 }, { "epoch": 16.75571600481348, "grad_norm": 164.9320831298828, "learning_rate": 0.00018727684479841674, "loss": 7.141, "step": 139240 }, { "epoch": 16.756919374247893, "grad_norm": 196.724853515625, "learning_rate": 0.00018727498774012138, "loss": 7.234, "step": 139250 }, { "epoch": 16.75812274368231, "grad_norm": 358.0588684082031, "learning_rate": 0.00018727313055551758, "loss": 7.1495, "step": 139260 }, { "epoch": 16.759326113116728, "grad_norm": 203.6941680908203, "learning_rate": 0.00018727127324460801, "loss": 7.272, "step": 139270 }, { "epoch": 16.760529482551142, "grad_norm": 203.03640747070312, "learning_rate": 0.00018726941580739538, "loss": 7.1698, "step": 139280 }, { "epoch": 16.76173285198556, "grad_norm": 260.54595947265625, "learning_rate": 0.00018726755824388235, "loss": 7.2256, "step": 139290 }, { "epoch": 16.762936221419977, "grad_norm": 148.79818725585938, "learning_rate": 0.0001872657005540716, "loss": 7.3323, "step": 139300 }, { "epoch": 16.76413959085439, "grad_norm": 848.3822021484375, "learning_rate": 0.00018726384273796588, "loss": 7.2515, "step": 139310 }, { "epoch": 16.76534296028881, "grad_norm": 136.0645751953125, "learning_rate": 0.0001872619847955678, "loss": 7.2265, "step": 139320 }, { "epoch": 16.766546329723226, "grad_norm": 145.14022827148438, "learning_rate": 0.0001872601267268801, "loss": 7.2299, "step": 139330 }, { "epoch": 16.76774969915764, "grad_norm": 69.01983642578125, "learning_rate": 0.0001872582685319054, "loss": 7.2612, "step": 139340 }, { "epoch": 16.768953068592058, "grad_norm": 110.63954162597656, "learning_rate": 0.0001872564102106465, "loss": 7.3256, "step": 139350 }, { "epoch": 16.770156438026476, "grad_norm": 931.7457885742188, "learning_rate": 0.00018725455176310602, "loss": 7.2788, "step": 139360 }, { "epoch": 16.77135980746089, "grad_norm": 197.388916015625, "learning_rate": 0.00018725269318928665, "loss": 7.1983, "step": 139370 }, { "epoch": 16.772563176895307, "grad_norm": 70.93016052246094, "learning_rate": 0.0001872508344891911, "loss": 7.2098, "step": 139380 }, { "epoch": 16.773766546329725, "grad_norm": 75.49091339111328, "learning_rate": 0.00018724897566282207, "loss": 7.1266, "step": 139390 }, { "epoch": 16.77496991576414, "grad_norm": 201.10296630859375, "learning_rate": 0.0001872471167101822, "loss": 7.2378, "step": 139400 }, { "epoch": 16.776173285198556, "grad_norm": 341.234375, "learning_rate": 0.00018724525763127422, "loss": 7.3339, "step": 139410 }, { "epoch": 16.777376654632974, "grad_norm": 684.5357055664062, "learning_rate": 0.0001872433984261008, "loss": 7.276, "step": 139420 }, { "epoch": 16.778580024067388, "grad_norm": 444.3345642089844, "learning_rate": 0.00018724153909466468, "loss": 7.3303, "step": 139430 }, { "epoch": 16.779783393501805, "grad_norm": 502.9717712402344, "learning_rate": 0.0001872396796369685, "loss": 7.2057, "step": 139440 }, { "epoch": 16.780986762936223, "grad_norm": 772.0226440429688, "learning_rate": 0.00018723782005301492, "loss": 7.1651, "step": 139450 }, { "epoch": 16.782190132370637, "grad_norm": 416.9214782714844, "learning_rate": 0.00018723596034280675, "loss": 7.2508, "step": 139460 }, { "epoch": 16.783393501805055, "grad_norm": 205.43426513671875, "learning_rate": 0.00018723410050634653, "loss": 7.23, "step": 139470 }, { "epoch": 16.784596871239472, "grad_norm": 383.4138488769531, "learning_rate": 0.00018723224054363708, "loss": 7.145, "step": 139480 }, { "epoch": 16.785800240673886, "grad_norm": 317.0182800292969, "learning_rate": 0.00018723038045468103, "loss": 7.2024, "step": 139490 }, { "epoch": 16.787003610108304, "grad_norm": 689.5606689453125, "learning_rate": 0.0001872285202394811, "loss": 7.2854, "step": 139500 }, { "epoch": 16.78820697954272, "grad_norm": 777.2088012695312, "learning_rate": 0.00018722665989803994, "loss": 7.2215, "step": 139510 }, { "epoch": 16.789410348977135, "grad_norm": 290.6332702636719, "learning_rate": 0.00018722479943036026, "loss": 7.27, "step": 139520 }, { "epoch": 16.790613718411553, "grad_norm": 259.62860107421875, "learning_rate": 0.00018722293883644477, "loss": 7.2449, "step": 139530 }, { "epoch": 16.79181708784597, "grad_norm": 732.4400024414062, "learning_rate": 0.00018722107811629618, "loss": 7.2064, "step": 139540 }, { "epoch": 16.793020457280385, "grad_norm": 387.3924865722656, "learning_rate": 0.00018721921726991712, "loss": 7.415, "step": 139550 }, { "epoch": 16.794223826714802, "grad_norm": 299.3282165527344, "learning_rate": 0.00018721735629731033, "loss": 7.2283, "step": 139560 }, { "epoch": 16.79542719614922, "grad_norm": 1383.3133544921875, "learning_rate": 0.00018721549519847848, "loss": 7.2772, "step": 139570 }, { "epoch": 16.796630565583634, "grad_norm": 468.2200012207031, "learning_rate": 0.0001872136339734243, "loss": 7.2919, "step": 139580 }, { "epoch": 16.79783393501805, "grad_norm": 412.17950439453125, "learning_rate": 0.00018721177262215046, "loss": 7.1286, "step": 139590 }, { "epoch": 16.799037304452465, "grad_norm": 303.6209716796875, "learning_rate": 0.00018720991114465964, "loss": 7.3256, "step": 139600 }, { "epoch": 16.800240673886883, "grad_norm": 245.2219696044922, "learning_rate": 0.00018720804954095455, "loss": 7.214, "step": 139610 }, { "epoch": 16.8014440433213, "grad_norm": 377.19915771484375, "learning_rate": 0.00018720618781103786, "loss": 7.196, "step": 139620 }, { "epoch": 16.802647412755714, "grad_norm": 278.16094970703125, "learning_rate": 0.00018720432595491232, "loss": 7.2349, "step": 139630 }, { "epoch": 16.803850782190132, "grad_norm": 664.1884765625, "learning_rate": 0.0001872024639725806, "loss": 7.1976, "step": 139640 }, { "epoch": 16.80505415162455, "grad_norm": 215.2874298095703, "learning_rate": 0.00018720060186404536, "loss": 7.103, "step": 139650 }, { "epoch": 16.806257521058964, "grad_norm": 231.60414123535156, "learning_rate": 0.00018719873962930932, "loss": 7.2328, "step": 139660 }, { "epoch": 16.80746089049338, "grad_norm": 236.93356323242188, "learning_rate": 0.00018719687726837515, "loss": 7.15, "step": 139670 }, { "epoch": 16.8086642599278, "grad_norm": 231.94137573242188, "learning_rate": 0.00018719501478124562, "loss": 7.3687, "step": 139680 }, { "epoch": 16.809867629362213, "grad_norm": 216.42724609375, "learning_rate": 0.00018719315216792336, "loss": 7.2421, "step": 139690 }, { "epoch": 16.81107099879663, "grad_norm": 218.34716796875, "learning_rate": 0.00018719128942841108, "loss": 7.1991, "step": 139700 }, { "epoch": 16.812274368231048, "grad_norm": 251.12322998046875, "learning_rate": 0.00018718942656271146, "loss": 7.2053, "step": 139710 }, { "epoch": 16.813477737665462, "grad_norm": 118.98465728759766, "learning_rate": 0.0001871875635708272, "loss": 7.2229, "step": 139720 }, { "epoch": 16.81468110709988, "grad_norm": 229.80421447753906, "learning_rate": 0.00018718570045276104, "loss": 7.3189, "step": 139730 }, { "epoch": 16.815884476534297, "grad_norm": 333.42333984375, "learning_rate": 0.00018718383720851565, "loss": 7.3007, "step": 139740 }, { "epoch": 16.81708784596871, "grad_norm": 175.28524780273438, "learning_rate": 0.00018718197383809374, "loss": 7.2219, "step": 139750 }, { "epoch": 16.81829121540313, "grad_norm": 857.9053955078125, "learning_rate": 0.00018718011034149794, "loss": 7.2849, "step": 139760 }, { "epoch": 16.819494584837546, "grad_norm": 595.9356689453125, "learning_rate": 0.00018717824671873103, "loss": 7.2844, "step": 139770 }, { "epoch": 16.82069795427196, "grad_norm": 616.8970947265625, "learning_rate": 0.00018717638296979566, "loss": 7.2565, "step": 139780 }, { "epoch": 16.821901323706378, "grad_norm": 323.5877990722656, "learning_rate": 0.00018717451909469455, "loss": 7.2226, "step": 139790 }, { "epoch": 16.823104693140795, "grad_norm": 301.0235290527344, "learning_rate": 0.0001871726550934304, "loss": 7.2297, "step": 139800 }, { "epoch": 16.82430806257521, "grad_norm": 476.3625793457031, "learning_rate": 0.00018717079096600584, "loss": 7.253, "step": 139810 }, { "epoch": 16.825511432009627, "grad_norm": 681.467041015625, "learning_rate": 0.00018716892671242368, "loss": 7.2507, "step": 139820 }, { "epoch": 16.826714801444044, "grad_norm": 567.54150390625, "learning_rate": 0.00018716706233268653, "loss": 7.2234, "step": 139830 }, { "epoch": 16.82791817087846, "grad_norm": 454.80859375, "learning_rate": 0.00018716519782679714, "loss": 7.1869, "step": 139840 }, { "epoch": 16.829121540312876, "grad_norm": 407.3795471191406, "learning_rate": 0.00018716333319475817, "loss": 7.188, "step": 139850 }, { "epoch": 16.830324909747294, "grad_norm": 660.4546508789062, "learning_rate": 0.00018716146843657233, "loss": 7.3038, "step": 139860 }, { "epoch": 16.831528279181708, "grad_norm": 380.2582092285156, "learning_rate": 0.00018715960355224237, "loss": 7.2803, "step": 139870 }, { "epoch": 16.832731648616125, "grad_norm": 585.8861694335938, "learning_rate": 0.00018715773854177092, "loss": 7.2756, "step": 139880 }, { "epoch": 16.833935018050543, "grad_norm": 1130.9735107421875, "learning_rate": 0.0001871558734051607, "loss": 7.2597, "step": 139890 }, { "epoch": 16.835138387484957, "grad_norm": 790.6238403320312, "learning_rate": 0.00018715400814241443, "loss": 7.2254, "step": 139900 }, { "epoch": 16.836341756919374, "grad_norm": 508.5173645019531, "learning_rate": 0.00018715214275353477, "loss": 7.3063, "step": 139910 }, { "epoch": 16.837545126353792, "grad_norm": 1146.9901123046875, "learning_rate": 0.00018715027723852442, "loss": 7.257, "step": 139920 }, { "epoch": 16.838748495788206, "grad_norm": 397.6241455078125, "learning_rate": 0.00018714841159738613, "loss": 7.1613, "step": 139930 }, { "epoch": 16.839951865222623, "grad_norm": 611.0847778320312, "learning_rate": 0.00018714654583012262, "loss": 7.1651, "step": 139940 }, { "epoch": 16.84115523465704, "grad_norm": 371.8877868652344, "learning_rate": 0.00018714467993673645, "loss": 7.2738, "step": 139950 }, { "epoch": 16.842358604091455, "grad_norm": 366.5208435058594, "learning_rate": 0.00018714281391723047, "loss": 7.3467, "step": 139960 }, { "epoch": 16.843561973525873, "grad_norm": 253.11341857910156, "learning_rate": 0.00018714094777160734, "loss": 7.2335, "step": 139970 }, { "epoch": 16.84476534296029, "grad_norm": 254.44203186035156, "learning_rate": 0.0001871390814998697, "loss": 7.2224, "step": 139980 }, { "epoch": 16.845968712394704, "grad_norm": 408.25006103515625, "learning_rate": 0.00018713721510202032, "loss": 7.2601, "step": 139990 }, { "epoch": 16.84717208182912, "grad_norm": 105.78641510009766, "learning_rate": 0.00018713534857806184, "loss": 7.2055, "step": 140000 }, { "epoch": 16.84837545126354, "grad_norm": 169.25010681152344, "learning_rate": 0.00018713348192799707, "loss": 7.2599, "step": 140010 }, { "epoch": 16.849578820697953, "grad_norm": 163.738525390625, "learning_rate": 0.00018713161515182859, "loss": 7.1699, "step": 140020 }, { "epoch": 16.85078219013237, "grad_norm": 118.00897216796875, "learning_rate": 0.00018712974824955915, "loss": 7.2125, "step": 140030 }, { "epoch": 16.85198555956679, "grad_norm": 179.82626342773438, "learning_rate": 0.00018712788122119144, "loss": 7.2152, "step": 140040 }, { "epoch": 16.853188929001202, "grad_norm": 230.5179901123047, "learning_rate": 0.0001871260140667282, "loss": 7.1367, "step": 140050 }, { "epoch": 16.85439229843562, "grad_norm": 117.68388366699219, "learning_rate": 0.0001871241467861721, "loss": 7.2819, "step": 140060 }, { "epoch": 16.855595667870038, "grad_norm": 102.51017761230469, "learning_rate": 0.00018712227937952585, "loss": 7.1079, "step": 140070 }, { "epoch": 16.85679903730445, "grad_norm": 91.59984588623047, "learning_rate": 0.00018712041184679214, "loss": 7.3432, "step": 140080 }, { "epoch": 16.85800240673887, "grad_norm": 132.9410400390625, "learning_rate": 0.00018711854418797372, "loss": 7.2687, "step": 140090 }, { "epoch": 16.859205776173287, "grad_norm": 122.43665313720703, "learning_rate": 0.00018711667640307323, "loss": 7.203, "step": 140100 }, { "epoch": 16.8604091456077, "grad_norm": 102.4945297241211, "learning_rate": 0.00018711480849209342, "loss": 7.2202, "step": 140110 }, { "epoch": 16.86161251504212, "grad_norm": 74.01268768310547, "learning_rate": 0.00018711294045503695, "loss": 7.2334, "step": 140120 }, { "epoch": 16.862815884476536, "grad_norm": 93.73290252685547, "learning_rate": 0.00018711107229190654, "loss": 7.3383, "step": 140130 }, { "epoch": 16.86401925391095, "grad_norm": 130.47085571289062, "learning_rate": 0.00018710920400270492, "loss": 7.298, "step": 140140 }, { "epoch": 16.865222623345367, "grad_norm": 130.94932556152344, "learning_rate": 0.0001871073355874348, "loss": 7.2934, "step": 140150 }, { "epoch": 16.866425992779785, "grad_norm": 430.466796875, "learning_rate": 0.0001871054670460988, "loss": 7.2353, "step": 140160 }, { "epoch": 16.8676293622142, "grad_norm": 130.47991943359375, "learning_rate": 0.00018710359837869974, "loss": 7.298, "step": 140170 }, { "epoch": 16.868832731648617, "grad_norm": 232.81826782226562, "learning_rate": 0.00018710172958524024, "loss": 7.2429, "step": 140180 }, { "epoch": 16.870036101083034, "grad_norm": 210.23997497558594, "learning_rate": 0.00018709986066572302, "loss": 7.196, "step": 140190 }, { "epoch": 16.871239470517448, "grad_norm": 78.10020446777344, "learning_rate": 0.00018709799162015082, "loss": 7.2458, "step": 140200 }, { "epoch": 16.872442839951866, "grad_norm": 191.275146484375, "learning_rate": 0.00018709612244852632, "loss": 7.1331, "step": 140210 }, { "epoch": 16.87364620938628, "grad_norm": 203.09738159179688, "learning_rate": 0.00018709425315085223, "loss": 7.2239, "step": 140220 }, { "epoch": 16.874849578820697, "grad_norm": 193.85629272460938, "learning_rate": 0.00018709238372713124, "loss": 7.3018, "step": 140230 }, { "epoch": 16.876052948255115, "grad_norm": 244.79351806640625, "learning_rate": 0.00018709051417736608, "loss": 7.1613, "step": 140240 }, { "epoch": 16.87725631768953, "grad_norm": 158.73377990722656, "learning_rate": 0.00018708864450155943, "loss": 7.3027, "step": 140250 }, { "epoch": 16.878459687123947, "grad_norm": 129.6427459716797, "learning_rate": 0.00018708677469971404, "loss": 7.2607, "step": 140260 }, { "epoch": 16.879663056558364, "grad_norm": 462.04388427734375, "learning_rate": 0.00018708490477183256, "loss": 7.2748, "step": 140270 }, { "epoch": 16.880866425992778, "grad_norm": 512.9693603515625, "learning_rate": 0.00018708303471791772, "loss": 7.2684, "step": 140280 }, { "epoch": 16.882069795427196, "grad_norm": 284.9821472167969, "learning_rate": 0.00018708116453797224, "loss": 7.2547, "step": 140290 }, { "epoch": 16.883273164861613, "grad_norm": 129.4783477783203, "learning_rate": 0.0001870792942319988, "loss": 7.2807, "step": 140300 }, { "epoch": 16.884476534296027, "grad_norm": 135.47842407226562, "learning_rate": 0.00018707742380000015, "loss": 7.3678, "step": 140310 }, { "epoch": 16.885679903730445, "grad_norm": 238.98765563964844, "learning_rate": 0.00018707555324197893, "loss": 7.2005, "step": 140320 }, { "epoch": 16.886883273164862, "grad_norm": 338.9595031738281, "learning_rate": 0.0001870736825579379, "loss": 7.2527, "step": 140330 }, { "epoch": 16.888086642599276, "grad_norm": 392.41009521484375, "learning_rate": 0.00018707181174787977, "loss": 7.2183, "step": 140340 }, { "epoch": 16.889290012033694, "grad_norm": 114.52117156982422, "learning_rate": 0.00018706994081180725, "loss": 7.3447, "step": 140350 }, { "epoch": 16.89049338146811, "grad_norm": 264.9649963378906, "learning_rate": 0.00018706806974972298, "loss": 7.2551, "step": 140360 }, { "epoch": 16.891696750902526, "grad_norm": 103.57659912109375, "learning_rate": 0.00018706619856162976, "loss": 7.2541, "step": 140370 }, { "epoch": 16.892900120336943, "grad_norm": 78.29728698730469, "learning_rate": 0.00018706432724753023, "loss": 7.3447, "step": 140380 }, { "epoch": 16.89410348977136, "grad_norm": 44.53462600708008, "learning_rate": 0.0001870624558074271, "loss": 7.3042, "step": 140390 }, { "epoch": 16.895306859205775, "grad_norm": 114.2386245727539, "learning_rate": 0.00018706058424132317, "loss": 7.2138, "step": 140400 }, { "epoch": 16.896510228640192, "grad_norm": 271.8332824707031, "learning_rate": 0.000187058712549221, "loss": 7.2662, "step": 140410 }, { "epoch": 16.89771359807461, "grad_norm": 101.64602661132812, "learning_rate": 0.00018705684073112342, "loss": 7.2041, "step": 140420 }, { "epoch": 16.898916967509024, "grad_norm": 197.1746063232422, "learning_rate": 0.0001870549687870331, "loss": 7.1597, "step": 140430 }, { "epoch": 16.90012033694344, "grad_norm": 119.66224670410156, "learning_rate": 0.00018705309671695274, "loss": 7.3107, "step": 140440 }, { "epoch": 16.90132370637786, "grad_norm": 107.16404724121094, "learning_rate": 0.00018705122452088505, "loss": 7.3012, "step": 140450 }, { "epoch": 16.902527075812273, "grad_norm": 112.4430160522461, "learning_rate": 0.00018704935219883274, "loss": 7.3344, "step": 140460 }, { "epoch": 16.90373044524669, "grad_norm": 147.1563262939453, "learning_rate": 0.00018704747975079858, "loss": 7.306, "step": 140470 }, { "epoch": 16.904933814681108, "grad_norm": 190.45547485351562, "learning_rate": 0.00018704560717678516, "loss": 7.2292, "step": 140480 }, { "epoch": 16.906137184115522, "grad_norm": 197.283935546875, "learning_rate": 0.0001870437344767953, "loss": 7.1409, "step": 140490 }, { "epoch": 16.90734055354994, "grad_norm": 162.4819793701172, "learning_rate": 0.00018704186165083165, "loss": 7.2511, "step": 140500 }, { "epoch": 16.908543922984357, "grad_norm": 196.75277709960938, "learning_rate": 0.0001870399886988969, "loss": 7.357, "step": 140510 }, { "epoch": 16.90974729241877, "grad_norm": 253.63694763183594, "learning_rate": 0.00018703811562099384, "loss": 7.2279, "step": 140520 }, { "epoch": 16.91095066185319, "grad_norm": 140.5008087158203, "learning_rate": 0.00018703624241712513, "loss": 7.2603, "step": 140530 }, { "epoch": 16.912154031287606, "grad_norm": 159.239990234375, "learning_rate": 0.00018703436908729348, "loss": 7.3026, "step": 140540 }, { "epoch": 16.91335740072202, "grad_norm": 249.13462829589844, "learning_rate": 0.00018703249563150161, "loss": 7.3498, "step": 140550 }, { "epoch": 16.914560770156438, "grad_norm": 155.85171508789062, "learning_rate": 0.00018703062204975223, "loss": 7.2365, "step": 140560 }, { "epoch": 16.915764139590856, "grad_norm": 494.0457763671875, "learning_rate": 0.00018702874834204808, "loss": 7.3239, "step": 140570 }, { "epoch": 16.91696750902527, "grad_norm": 658.1851196289062, "learning_rate": 0.0001870268745083918, "loss": 7.3253, "step": 140580 }, { "epoch": 16.918170878459687, "grad_norm": 362.6427307128906, "learning_rate": 0.00018702500054878617, "loss": 7.3368, "step": 140590 }, { "epoch": 16.919374247894105, "grad_norm": 192.82269287109375, "learning_rate": 0.0001870231264632339, "loss": 7.2243, "step": 140600 }, { "epoch": 16.92057761732852, "grad_norm": 413.574951171875, "learning_rate": 0.00018702125225173764, "loss": 7.2197, "step": 140610 }, { "epoch": 16.921780986762936, "grad_norm": 347.7251892089844, "learning_rate": 0.0001870193779143002, "loss": 7.193, "step": 140620 }, { "epoch": 16.922984356197354, "grad_norm": 585.346923828125, "learning_rate": 0.0001870175034509242, "loss": 7.3094, "step": 140630 }, { "epoch": 16.924187725631768, "grad_norm": 1707.060302734375, "learning_rate": 0.00018701562886161237, "loss": 7.2115, "step": 140640 }, { "epoch": 16.925391095066185, "grad_norm": 464.30328369140625, "learning_rate": 0.00018701375414636746, "loss": 7.1449, "step": 140650 }, { "epoch": 16.926594464500603, "grad_norm": 334.6609191894531, "learning_rate": 0.00018701187930519214, "loss": 7.244, "step": 140660 }, { "epoch": 16.927797833935017, "grad_norm": 379.38336181640625, "learning_rate": 0.00018701000433808917, "loss": 7.2555, "step": 140670 }, { "epoch": 16.929001203369435, "grad_norm": 750.4374389648438, "learning_rate": 0.00018700812924506125, "loss": 7.249, "step": 140680 }, { "epoch": 16.930204572803852, "grad_norm": 1935.0679931640625, "learning_rate": 0.0001870062540261111, "loss": 7.2245, "step": 140690 }, { "epoch": 16.931407942238266, "grad_norm": 1020.1409912109375, "learning_rate": 0.0001870043786812414, "loss": 7.2246, "step": 140700 }, { "epoch": 16.932611311672684, "grad_norm": 2117.8369140625, "learning_rate": 0.00018700250321045486, "loss": 7.2156, "step": 140710 }, { "epoch": 16.9338146811071, "grad_norm": 507.71435546875, "learning_rate": 0.00018700062761375423, "loss": 7.1958, "step": 140720 }, { "epoch": 16.935018050541515, "grad_norm": 448.57574462890625, "learning_rate": 0.00018699875189114224, "loss": 7.2327, "step": 140730 }, { "epoch": 16.936221419975933, "grad_norm": 584.8023681640625, "learning_rate": 0.00018699687604262154, "loss": 7.2211, "step": 140740 }, { "epoch": 16.93742478941035, "grad_norm": 502.99395751953125, "learning_rate": 0.0001869950000681949, "loss": 7.2682, "step": 140750 }, { "epoch": 16.938628158844764, "grad_norm": 662.310302734375, "learning_rate": 0.00018699312396786502, "loss": 7.2251, "step": 140760 }, { "epoch": 16.939831528279182, "grad_norm": 1193.97314453125, "learning_rate": 0.00018699124774163462, "loss": 7.2697, "step": 140770 }, { "epoch": 16.9410348977136, "grad_norm": 692.0593872070312, "learning_rate": 0.0001869893713895064, "loss": 7.2434, "step": 140780 }, { "epoch": 16.942238267148014, "grad_norm": 2674.326171875, "learning_rate": 0.00018698749491148306, "loss": 7.2773, "step": 140790 }, { "epoch": 16.94344163658243, "grad_norm": 1171.132568359375, "learning_rate": 0.00018698561830756736, "loss": 7.2644, "step": 140800 }, { "epoch": 16.94464500601685, "grad_norm": 1005.2243041992188, "learning_rate": 0.000186983741577762, "loss": 7.3121, "step": 140810 }, { "epoch": 16.945848375451263, "grad_norm": 702.234375, "learning_rate": 0.00018698186472206966, "loss": 7.1652, "step": 140820 }, { "epoch": 16.94705174488568, "grad_norm": 1431.6015625, "learning_rate": 0.0001869799877404931, "loss": 7.3035, "step": 140830 }, { "epoch": 16.948255114320098, "grad_norm": 1350.1988525390625, "learning_rate": 0.00018697811063303504, "loss": 7.1727, "step": 140840 }, { "epoch": 16.949458483754512, "grad_norm": 988.340576171875, "learning_rate": 0.00018697623339969816, "loss": 7.314, "step": 140850 }, { "epoch": 16.95066185318893, "grad_norm": 1350.4931640625, "learning_rate": 0.00018697435604048522, "loss": 7.2395, "step": 140860 }, { "epoch": 16.951865222623347, "grad_norm": 612.1477661132812, "learning_rate": 0.00018697247855539888, "loss": 7.2238, "step": 140870 }, { "epoch": 16.95306859205776, "grad_norm": 1204.4171142578125, "learning_rate": 0.00018697060094444192, "loss": 7.2783, "step": 140880 }, { "epoch": 16.95427196149218, "grad_norm": 551.55859375, "learning_rate": 0.00018696872320761703, "loss": 7.2574, "step": 140890 }, { "epoch": 16.955475330926596, "grad_norm": 885.8955688476562, "learning_rate": 0.0001869668453449269, "loss": 7.2001, "step": 140900 }, { "epoch": 16.95667870036101, "grad_norm": 1194.371337890625, "learning_rate": 0.0001869649673563743, "loss": 7.2824, "step": 140910 }, { "epoch": 16.957882069795428, "grad_norm": 689.4669799804688, "learning_rate": 0.00018696308924196192, "loss": 7.3292, "step": 140920 }, { "epoch": 16.959085439229845, "grad_norm": 2157.862060546875, "learning_rate": 0.00018696121100169246, "loss": 7.3187, "step": 140930 }, { "epoch": 16.96028880866426, "grad_norm": 1226.73388671875, "learning_rate": 0.00018695933263556866, "loss": 7.3299, "step": 140940 }, { "epoch": 16.961492178098677, "grad_norm": 5642.2080078125, "learning_rate": 0.00018695745414359322, "loss": 7.3384, "step": 140950 }, { "epoch": 16.96269554753309, "grad_norm": 1071.24853515625, "learning_rate": 0.0001869555755257689, "loss": 7.3305, "step": 140960 }, { "epoch": 16.96389891696751, "grad_norm": 1440.128173828125, "learning_rate": 0.0001869536967820984, "loss": 7.2943, "step": 140970 }, { "epoch": 16.965102286401926, "grad_norm": 5736.08544921875, "learning_rate": 0.00018695181791258443, "loss": 7.1581, "step": 140980 }, { "epoch": 16.96630565583634, "grad_norm": 2490.807861328125, "learning_rate": 0.0001869499389172297, "loss": 7.226, "step": 140990 }, { "epoch": 16.967509025270758, "grad_norm": 3486.12890625, "learning_rate": 0.00018694805979603695, "loss": 7.3359, "step": 141000 }, { "epoch": 16.968712394705175, "grad_norm": 395.9966735839844, "learning_rate": 0.00018694618054900889, "loss": 7.3345, "step": 141010 }, { "epoch": 16.96991576413959, "grad_norm": 557.5656127929688, "learning_rate": 0.0001869443011761482, "loss": 7.3539, "step": 141020 }, { "epoch": 16.971119133574007, "grad_norm": 155.63076782226562, "learning_rate": 0.0001869424216774577, "loss": 7.2907, "step": 141030 }, { "epoch": 16.972322503008424, "grad_norm": 176.70492553710938, "learning_rate": 0.00018694054205294002, "loss": 7.2284, "step": 141040 }, { "epoch": 16.97352587244284, "grad_norm": 194.2889862060547, "learning_rate": 0.00018693866230259793, "loss": 7.2408, "step": 141050 }, { "epoch": 16.974729241877256, "grad_norm": 191.15797424316406, "learning_rate": 0.00018693678242643412, "loss": 7.3036, "step": 141060 }, { "epoch": 16.975932611311674, "grad_norm": 389.35052490234375, "learning_rate": 0.0001869349024244513, "loss": 7.2751, "step": 141070 }, { "epoch": 16.977135980746088, "grad_norm": 237.237060546875, "learning_rate": 0.00018693302229665223, "loss": 7.3091, "step": 141080 }, { "epoch": 16.978339350180505, "grad_norm": 371.0087890625, "learning_rate": 0.00018693114204303962, "loss": 7.3141, "step": 141090 }, { "epoch": 16.979542719614923, "grad_norm": 159.05126953125, "learning_rate": 0.0001869292616636162, "loss": 7.2645, "step": 141100 }, { "epoch": 16.980746089049337, "grad_norm": 359.671875, "learning_rate": 0.00018692738115838467, "loss": 7.2667, "step": 141110 }, { "epoch": 16.981949458483754, "grad_norm": 911.1619262695312, "learning_rate": 0.0001869255005273477, "loss": 7.3295, "step": 141120 }, { "epoch": 16.983152827918172, "grad_norm": 941.1460571289062, "learning_rate": 0.00018692361977050813, "loss": 7.2638, "step": 141130 }, { "epoch": 16.984356197352586, "grad_norm": 852.6581420898438, "learning_rate": 0.0001869217388878686, "loss": 7.2092, "step": 141140 }, { "epoch": 16.985559566787003, "grad_norm": 1084.5054931640625, "learning_rate": 0.00018691985787943187, "loss": 7.2554, "step": 141150 }, { "epoch": 16.98676293622142, "grad_norm": 3219.71240234375, "learning_rate": 0.00018691797674520063, "loss": 7.2092, "step": 141160 }, { "epoch": 16.987966305655835, "grad_norm": 15177.5029296875, "learning_rate": 0.00018691609548517762, "loss": 7.235, "step": 141170 }, { "epoch": 16.989169675090253, "grad_norm": 2970.762451171875, "learning_rate": 0.00018691421409936562, "loss": 7.1923, "step": 141180 }, { "epoch": 16.99037304452467, "grad_norm": 3648.067626953125, "learning_rate": 0.0001869123325877672, "loss": 7.2056, "step": 141190 }, { "epoch": 16.991576413959084, "grad_norm": 2022.618408203125, "learning_rate": 0.00018691045095038523, "loss": 7.3552, "step": 141200 }, { "epoch": 16.9927797833935, "grad_norm": 3119.40771484375, "learning_rate": 0.00018690856918722237, "loss": 7.2329, "step": 141210 }, { "epoch": 16.99398315282792, "grad_norm": 2291.0751953125, "learning_rate": 0.00018690668729828133, "loss": 7.2169, "step": 141220 }, { "epoch": 16.995186522262333, "grad_norm": 9763.142578125, "learning_rate": 0.00018690480528356487, "loss": 7.3825, "step": 141230 }, { "epoch": 16.99638989169675, "grad_norm": 1863.9749755859375, "learning_rate": 0.0001869029231430757, "loss": 7.2771, "step": 141240 }, { "epoch": 16.99759326113117, "grad_norm": 5123.5166015625, "learning_rate": 0.00018690104087681656, "loss": 7.3613, "step": 141250 }, { "epoch": 16.998796630565582, "grad_norm": 1767.199951171875, "learning_rate": 0.00018689915848479015, "loss": 7.3014, "step": 141260 }, { "epoch": 17.0, "grad_norm": 1562.28857421875, "learning_rate": 0.00018689727596699922, "loss": 7.3865, "step": 141270 }, { "epoch": 17.0, "eval_loss": 7.285952091217041, "eval_runtime": 119.1122, "eval_samples_per_second": 62.017, "eval_steps_per_second": 7.757, "step": 141270 }, { "epoch": 17.001203369434418, "grad_norm": 924.876708984375, "learning_rate": 0.00018689539332344645, "loss": 7.3318, "step": 141280 }, { "epoch": 17.00240673886883, "grad_norm": 1089.5457763671875, "learning_rate": 0.0001868935105541346, "loss": 7.3168, "step": 141290 }, { "epoch": 17.00361010830325, "grad_norm": 1816.0389404296875, "learning_rate": 0.0001868916276590664, "loss": 7.2017, "step": 141300 }, { "epoch": 17.004813477737667, "grad_norm": 1234.37158203125, "learning_rate": 0.00018688974463824454, "loss": 7.3102, "step": 141310 }, { "epoch": 17.00601684717208, "grad_norm": 1734.231201171875, "learning_rate": 0.00018688786149167178, "loss": 7.3916, "step": 141320 }, { "epoch": 17.0072202166065, "grad_norm": 1366.3143310546875, "learning_rate": 0.00018688597821935084, "loss": 7.326, "step": 141330 }, { "epoch": 17.008423586040916, "grad_norm": 4636.26123046875, "learning_rate": 0.00018688409482128443, "loss": 7.3423, "step": 141340 }, { "epoch": 17.00962695547533, "grad_norm": 10725.2548828125, "learning_rate": 0.0001868822112974753, "loss": 7.2779, "step": 141350 }, { "epoch": 17.010830324909747, "grad_norm": 24845.275390625, "learning_rate": 0.00018688032764792612, "loss": 7.4422, "step": 141360 }, { "epoch": 17.012033694344165, "grad_norm": 13669.8125, "learning_rate": 0.00018687844387263968, "loss": 7.4754, "step": 141370 }, { "epoch": 17.01323706377858, "grad_norm": 29731.974609375, "learning_rate": 0.00018687655997161868, "loss": 7.4477, "step": 141380 }, { "epoch": 17.014440433212997, "grad_norm": 9919.509765625, "learning_rate": 0.00018687467594486585, "loss": 7.4055, "step": 141390 }, { "epoch": 17.015643802647414, "grad_norm": 17093.2421875, "learning_rate": 0.0001868727917923839, "loss": 7.3542, "step": 141400 }, { "epoch": 17.016847172081828, "grad_norm": 2228155.75, "learning_rate": 0.0001868709075141756, "loss": 7.7627, "step": 141410 }, { "epoch": 17.018050541516246, "grad_norm": 207971.96875, "learning_rate": 0.00018686902311024363, "loss": 10.0114, "step": 141420 }, { "epoch": 17.019253910950663, "grad_norm": 2111354.75, "learning_rate": 0.00018686713858059075, "loss": 10.3712, "step": 141430 }, { "epoch": 17.020457280385077, "grad_norm": 1330060.75, "learning_rate": 0.00018686525392521967, "loss": 9.8709, "step": 141440 }, { "epoch": 17.021660649819495, "grad_norm": 29423556.0, "learning_rate": 0.00018686336914413312, "loss": 9.4304, "step": 141450 }, { "epoch": 17.022864019253912, "grad_norm": 67532224.0, "learning_rate": 0.00018686148423733382, "loss": 10.2345, "step": 141460 }, { "epoch": 17.024067388688326, "grad_norm": 309209.0, "learning_rate": 0.0001868595992048245, "loss": 10.6619, "step": 141470 }, { "epoch": 17.025270758122744, "grad_norm": 476131.34375, "learning_rate": 0.00018685771404660794, "loss": 10.8618, "step": 141480 }, { "epoch": 17.02647412755716, "grad_norm": 21831126.0, "learning_rate": 0.00018685582876268677, "loss": 11.2351, "step": 141490 }, { "epoch": 17.027677496991576, "grad_norm": 10083105.0, "learning_rate": 0.00018685394335306378, "loss": 10.2909, "step": 141500 }, { "epoch": 17.028880866425993, "grad_norm": 153905440.0, "learning_rate": 0.00018685205781774174, "loss": 11.0587, "step": 141510 }, { "epoch": 17.03008423586041, "grad_norm": 630525248.0, "learning_rate": 0.00018685017215672327, "loss": 11.0391, "step": 141520 }, { "epoch": 17.031287605294825, "grad_norm": 63822776.0, "learning_rate": 0.00018684828637001118, "loss": 10.3231, "step": 141530 }, { "epoch": 17.032490974729242, "grad_norm": 60172244.0, "learning_rate": 0.00018684640045760816, "loss": 9.7273, "step": 141540 }, { "epoch": 17.03369434416366, "grad_norm": 428727744.0, "learning_rate": 0.000186844514419517, "loss": 9.6713, "step": 141550 }, { "epoch": 17.034897713598074, "grad_norm": 103376992.0, "learning_rate": 0.00018684262825574037, "loss": 10.0265, "step": 141560 }, { "epoch": 17.03610108303249, "grad_norm": 149720096.0, "learning_rate": 0.000186840741966281, "loss": 11.8091, "step": 141570 }, { "epoch": 17.03730445246691, "grad_norm": 12849386.0, "learning_rate": 0.00018683885555114164, "loss": 10.4154, "step": 141580 }, { "epoch": 17.038507821901323, "grad_norm": 3311020.25, "learning_rate": 0.000186836969010325, "loss": 9.7652, "step": 141590 }, { "epoch": 17.03971119133574, "grad_norm": 18709520.0, "learning_rate": 0.00018683508234383387, "loss": 8.5957, "step": 141600 }, { "epoch": 17.040914560770158, "grad_norm": 3221425.25, "learning_rate": 0.0001868331955516709, "loss": 8.11, "step": 141610 }, { "epoch": 17.042117930204572, "grad_norm": 503154880.0, "learning_rate": 0.00018683130863383885, "loss": 8.398, "step": 141620 }, { "epoch": 17.04332129963899, "grad_norm": 5451205.5, "learning_rate": 0.0001868294215903405, "loss": 9.0374, "step": 141630 }, { "epoch": 17.044524669073404, "grad_norm": 12103708.0, "learning_rate": 0.00018682753442117854, "loss": 8.1284, "step": 141640 }, { "epoch": 17.04572803850782, "grad_norm": 2498935.5, "learning_rate": 0.00018682564712635565, "loss": 7.9074, "step": 141650 }, { "epoch": 17.04693140794224, "grad_norm": 30658082.0, "learning_rate": 0.00018682375970587468, "loss": 7.8347, "step": 141660 }, { "epoch": 17.048134777376653, "grad_norm": 4080987.5, "learning_rate": 0.00018682187215973822, "loss": 7.7735, "step": 141670 }, { "epoch": 17.04933814681107, "grad_norm": 3283819.25, "learning_rate": 0.0001868199844879491, "loss": 8.0627, "step": 141680 }, { "epoch": 17.050541516245488, "grad_norm": 38352371712.0, "learning_rate": 0.00018681809669051006, "loss": 9.5241, "step": 141690 }, { "epoch": 17.051744885679902, "grad_norm": 6710531072.0, "learning_rate": 0.00018681620876742375, "loss": 10.4074, "step": 141700 }, { "epoch": 17.05294825511432, "grad_norm": 490279072.0, "learning_rate": 0.000186814320718693, "loss": 10.0882, "step": 141710 }, { "epoch": 17.054151624548737, "grad_norm": 109023453184.0, "learning_rate": 0.00018681243254432047, "loss": 9.7834, "step": 141720 }, { "epoch": 17.05535499398315, "grad_norm": 496933824.0, "learning_rate": 0.00018681054424430892, "loss": 9.1749, "step": 141730 }, { "epoch": 17.05655836341757, "grad_norm": 3079289088.0, "learning_rate": 0.0001868086558186611, "loss": 10.0425, "step": 141740 }, { "epoch": 17.057761732851986, "grad_norm": 635064896.0, "learning_rate": 0.00018680676726737968, "loss": 10.9633, "step": 141750 }, { "epoch": 17.0589651022864, "grad_norm": 52752132.0, "learning_rate": 0.00018680487859046746, "loss": 11.445, "step": 141760 }, { "epoch": 17.060168471720818, "grad_norm": 86162898944.0, "learning_rate": 0.00018680298978792717, "loss": 10.9618, "step": 141770 }, { "epoch": 17.061371841155236, "grad_norm": 2763698176.0, "learning_rate": 0.0001868011008597615, "loss": 10.1048, "step": 141780 }, { "epoch": 17.06257521058965, "grad_norm": 16464600064.0, "learning_rate": 0.0001867992118059732, "loss": 9.5028, "step": 141790 }, { "epoch": 17.063778580024067, "grad_norm": 3073200128.0, "learning_rate": 0.00018679732262656502, "loss": 7.5215, "step": 141800 }, { "epoch": 17.064981949458485, "grad_norm": 306863.46875, "learning_rate": 0.0001867954333215397, "loss": 7.384, "step": 141810 }, { "epoch": 17.0661853188929, "grad_norm": 4970920.5, "learning_rate": 0.00018679354389089991, "loss": 7.6664, "step": 141820 }, { "epoch": 17.067388688327316, "grad_norm": 18504586.0, "learning_rate": 0.00018679165433464846, "loss": 7.7047, "step": 141830 }, { "epoch": 17.068592057761734, "grad_norm": 47235032.0, "learning_rate": 0.00018678976465278809, "loss": 7.7192, "step": 141840 }, { "epoch": 17.069795427196148, "grad_norm": 323516224.0, "learning_rate": 0.00018678787484532146, "loss": 7.7239, "step": 141850 }, { "epoch": 17.070998796630565, "grad_norm": 220470.0625, "learning_rate": 0.00018678598491225136, "loss": 7.6575, "step": 141860 }, { "epoch": 17.072202166064983, "grad_norm": 353980.90625, "learning_rate": 0.00018678409485358054, "loss": 7.7066, "step": 141870 }, { "epoch": 17.073405535499397, "grad_norm": 28216.509765625, "learning_rate": 0.00018678220466931167, "loss": 7.6509, "step": 141880 }, { "epoch": 17.074608904933815, "grad_norm": 64755.54296875, "learning_rate": 0.00018678031435944754, "loss": 7.6948, "step": 141890 }, { "epoch": 17.075812274368232, "grad_norm": 29359730.0, "learning_rate": 0.00018677842392399087, "loss": 7.5768, "step": 141900 }, { "epoch": 17.077015643802646, "grad_norm": 568256.25, "learning_rate": 0.00018677653336294438, "loss": 7.6167, "step": 141910 }, { "epoch": 17.078219013237064, "grad_norm": 175611.65625, "learning_rate": 0.00018677464267631083, "loss": 7.6359, "step": 141920 }, { "epoch": 17.07942238267148, "grad_norm": 165359.421875, "learning_rate": 0.00018677275186409296, "loss": 7.6493, "step": 141930 }, { "epoch": 17.080625752105895, "grad_norm": 101344.546875, "learning_rate": 0.00018677086092629347, "loss": 7.6918, "step": 141940 }, { "epoch": 17.081829121540313, "grad_norm": 145652.84375, "learning_rate": 0.00018676896986291516, "loss": 7.6174, "step": 141950 }, { "epoch": 17.08303249097473, "grad_norm": 77464.8125, "learning_rate": 0.0001867670786739607, "loss": 7.6242, "step": 141960 }, { "epoch": 17.084235860409144, "grad_norm": 316192.28125, "learning_rate": 0.00018676518735943288, "loss": 7.6159, "step": 141970 }, { "epoch": 17.085439229843562, "grad_norm": 723720.1875, "learning_rate": 0.0001867632959193344, "loss": 7.6689, "step": 141980 }, { "epoch": 17.08664259927798, "grad_norm": 1426618.0, "learning_rate": 0.00018676140435366798, "loss": 7.6359, "step": 141990 }, { "epoch": 17.087845968712394, "grad_norm": 327477.0, "learning_rate": 0.00018675951266243642, "loss": 7.5401, "step": 142000 }, { "epoch": 17.08904933814681, "grad_norm": 24021530.0, "learning_rate": 0.00018675762084564243, "loss": 7.5873, "step": 142010 }, { "epoch": 17.09025270758123, "grad_norm": 307043.25, "learning_rate": 0.00018675572890328873, "loss": 7.5659, "step": 142020 }, { "epoch": 17.091456077015643, "grad_norm": 885694.4375, "learning_rate": 0.00018675383683537807, "loss": 7.6518, "step": 142030 }, { "epoch": 17.09265944645006, "grad_norm": 524654.9375, "learning_rate": 0.00018675194464191317, "loss": 7.6359, "step": 142040 }, { "epoch": 17.093862815884478, "grad_norm": 72734.875, "learning_rate": 0.00018675005232289681, "loss": 7.6203, "step": 142050 }, { "epoch": 17.095066185318892, "grad_norm": 29165812.0, "learning_rate": 0.0001867481598783317, "loss": 7.5633, "step": 142060 }, { "epoch": 17.09626955475331, "grad_norm": 89524.546875, "learning_rate": 0.0001867462673082206, "loss": 7.5212, "step": 142070 }, { "epoch": 17.097472924187727, "grad_norm": 500923.21875, "learning_rate": 0.00018674437461256623, "loss": 7.7076, "step": 142080 }, { "epoch": 17.09867629362214, "grad_norm": 5271867.0, "learning_rate": 0.00018674248179137133, "loss": 7.6236, "step": 142090 }, { "epoch": 17.09987966305656, "grad_norm": 652417.75, "learning_rate": 0.00018674058884463862, "loss": 7.6297, "step": 142100 }, { "epoch": 17.101083032490976, "grad_norm": 22941.08203125, "learning_rate": 0.0001867386957723709, "loss": 7.6299, "step": 142110 }, { "epoch": 17.10228640192539, "grad_norm": 28353.453125, "learning_rate": 0.00018673680257457086, "loss": 7.5819, "step": 142120 }, { "epoch": 17.103489771359808, "grad_norm": 55700.875, "learning_rate": 0.00018673490925124125, "loss": 7.6439, "step": 142130 }, { "epoch": 17.104693140794225, "grad_norm": 615666.875, "learning_rate": 0.00018673301580238478, "loss": 7.5065, "step": 142140 }, { "epoch": 17.10589651022864, "grad_norm": 217125.90625, "learning_rate": 0.00018673112222800423, "loss": 7.5731, "step": 142150 }, { "epoch": 17.107099879663057, "grad_norm": 196665.125, "learning_rate": 0.00018672922852810237, "loss": 7.5202, "step": 142160 }, { "epoch": 17.108303249097474, "grad_norm": 166179.25, "learning_rate": 0.0001867273347026819, "loss": 7.6613, "step": 142170 }, { "epoch": 17.10950661853189, "grad_norm": 96824.5, "learning_rate": 0.00018672544075174554, "loss": 7.5422, "step": 142180 }, { "epoch": 17.110709987966306, "grad_norm": 441479.03125, "learning_rate": 0.0001867235466752961, "loss": 7.4653, "step": 142190 }, { "epoch": 17.111913357400724, "grad_norm": 50175.7890625, "learning_rate": 0.00018672165247333623, "loss": 7.5489, "step": 142200 }, { "epoch": 17.113116726835138, "grad_norm": 69269.8046875, "learning_rate": 0.00018671975814586872, "loss": 7.6343, "step": 142210 }, { "epoch": 17.114320096269555, "grad_norm": 192029.21875, "learning_rate": 0.00018671786369289632, "loss": 7.5305, "step": 142220 }, { "epoch": 17.115523465703973, "grad_norm": 54431.62890625, "learning_rate": 0.00018671596911442174, "loss": 7.5186, "step": 142230 }, { "epoch": 17.116726835138387, "grad_norm": 219534.40625, "learning_rate": 0.0001867140744104478, "loss": 7.4882, "step": 142240 }, { "epoch": 17.117930204572804, "grad_norm": 1309003.25, "learning_rate": 0.00018671217958097716, "loss": 7.5362, "step": 142250 }, { "epoch": 17.119133574007222, "grad_norm": 535729.8125, "learning_rate": 0.00018671028462601257, "loss": 7.6497, "step": 142260 }, { "epoch": 17.120336943441636, "grad_norm": 17054.576171875, "learning_rate": 0.0001867083895455568, "loss": 7.4007, "step": 142270 }, { "epoch": 17.121540312876053, "grad_norm": 18348.041015625, "learning_rate": 0.0001867064943396126, "loss": 7.4202, "step": 142280 }, { "epoch": 17.12274368231047, "grad_norm": 134746.65625, "learning_rate": 0.00018670459900818265, "loss": 7.5017, "step": 142290 }, { "epoch": 17.123947051744885, "grad_norm": 8078.41015625, "learning_rate": 0.00018670270355126978, "loss": 7.3104, "step": 142300 }, { "epoch": 17.125150421179303, "grad_norm": 767163.0, "learning_rate": 0.00018670080796887666, "loss": 7.4925, "step": 142310 }, { "epoch": 17.126353790613717, "grad_norm": 138308.53125, "learning_rate": 0.00018669891226100608, "loss": 7.4317, "step": 142320 }, { "epoch": 17.127557160048134, "grad_norm": 12479.5205078125, "learning_rate": 0.00018669701642766076, "loss": 7.3732, "step": 142330 }, { "epoch": 17.128760529482552, "grad_norm": 157744.296875, "learning_rate": 0.00018669512046884347, "loss": 7.3109, "step": 142340 }, { "epoch": 17.129963898916966, "grad_norm": 12371.865234375, "learning_rate": 0.00018669322438455695, "loss": 7.423, "step": 142350 }, { "epoch": 17.131167268351383, "grad_norm": 12458.7294921875, "learning_rate": 0.0001866913281748039, "loss": 7.3762, "step": 142360 }, { "epoch": 17.1323706377858, "grad_norm": 11302.03515625, "learning_rate": 0.0001866894318395871, "loss": 7.4035, "step": 142370 }, { "epoch": 17.133574007220215, "grad_norm": 16933.833984375, "learning_rate": 0.0001866875353789093, "loss": 7.4945, "step": 142380 }, { "epoch": 17.134777376654633, "grad_norm": 423122.34375, "learning_rate": 0.00018668563879277323, "loss": 7.34, "step": 142390 }, { "epoch": 17.13598074608905, "grad_norm": 14923.0068359375, "learning_rate": 0.00018668374208118165, "loss": 7.3102, "step": 142400 }, { "epoch": 17.137184115523464, "grad_norm": 11712.00390625, "learning_rate": 0.00018668184524413728, "loss": 7.376, "step": 142410 }, { "epoch": 17.13838748495788, "grad_norm": 62720.671875, "learning_rate": 0.00018667994828164286, "loss": 7.317, "step": 142420 }, { "epoch": 17.1395908543923, "grad_norm": 41738.1796875, "learning_rate": 0.00018667805119370118, "loss": 7.4232, "step": 142430 }, { "epoch": 17.140794223826713, "grad_norm": 38861.62890625, "learning_rate": 0.00018667615398031495, "loss": 7.3007, "step": 142440 }, { "epoch": 17.14199759326113, "grad_norm": 153266.109375, "learning_rate": 0.00018667425664148693, "loss": 7.4199, "step": 142450 }, { "epoch": 17.14320096269555, "grad_norm": 211282.78125, "learning_rate": 0.00018667235917721984, "loss": 7.2611, "step": 142460 }, { "epoch": 17.144404332129962, "grad_norm": 13568.48828125, "learning_rate": 0.0001866704615875165, "loss": 7.3037, "step": 142470 }, { "epoch": 17.14560770156438, "grad_norm": 10499.71875, "learning_rate": 0.00018666856387237956, "loss": 7.3668, "step": 142480 }, { "epoch": 17.146811070998798, "grad_norm": 11154.3515625, "learning_rate": 0.00018666666603181183, "loss": 7.3346, "step": 142490 }, { "epoch": 17.14801444043321, "grad_norm": 160310.6875, "learning_rate": 0.00018666476806581604, "loss": 7.3174, "step": 142500 }, { "epoch": 17.14921780986763, "grad_norm": 34515.3125, "learning_rate": 0.00018666286997439491, "loss": 7.4247, "step": 142510 }, { "epoch": 17.150421179302047, "grad_norm": 36640.3125, "learning_rate": 0.00018666097175755122, "loss": 7.3735, "step": 142520 }, { "epoch": 17.15162454873646, "grad_norm": 41794.875, "learning_rate": 0.00018665907341528773, "loss": 7.4383, "step": 142530 }, { "epoch": 17.15282791817088, "grad_norm": 21124.376953125, "learning_rate": 0.00018665717494760714, "loss": 7.4022, "step": 142540 }, { "epoch": 17.154031287605296, "grad_norm": 60762.08203125, "learning_rate": 0.00018665527635451223, "loss": 7.2942, "step": 142550 }, { "epoch": 17.15523465703971, "grad_norm": 42075.8984375, "learning_rate": 0.00018665337763600572, "loss": 7.3258, "step": 142560 }, { "epoch": 17.156438026474127, "grad_norm": 36340.90234375, "learning_rate": 0.00018665147879209042, "loss": 7.3362, "step": 142570 }, { "epoch": 17.157641395908545, "grad_norm": 15715.5380859375, "learning_rate": 0.00018664957982276898, "loss": 7.468, "step": 142580 }, { "epoch": 17.15884476534296, "grad_norm": 60864.4765625, "learning_rate": 0.00018664768072804426, "loss": 7.4349, "step": 142590 }, { "epoch": 17.160048134777377, "grad_norm": 104410.78125, "learning_rate": 0.00018664578150791894, "loss": 7.3642, "step": 142600 }, { "epoch": 17.161251504211794, "grad_norm": 72621.6328125, "learning_rate": 0.00018664388216239577, "loss": 7.297, "step": 142610 }, { "epoch": 17.162454873646208, "grad_norm": 35826.2421875, "learning_rate": 0.0001866419826914775, "loss": 7.3096, "step": 142620 }, { "epoch": 17.163658243080626, "grad_norm": 50612.3984375, "learning_rate": 0.0001866400830951669, "loss": 7.4833, "step": 142630 }, { "epoch": 17.164861612515043, "grad_norm": 36865.8203125, "learning_rate": 0.00018663818337346676, "loss": 7.5272, "step": 142640 }, { "epoch": 17.166064981949457, "grad_norm": 39109.46484375, "learning_rate": 0.0001866362835263797, "loss": 7.4837, "step": 142650 }, { "epoch": 17.167268351383875, "grad_norm": 137890.71875, "learning_rate": 0.0001866343835539086, "loss": 7.4907, "step": 142660 }, { "epoch": 17.168471720818292, "grad_norm": 41040.09375, "learning_rate": 0.00018663248345605614, "loss": 7.4056, "step": 142670 }, { "epoch": 17.169675090252706, "grad_norm": 16532.912109375, "learning_rate": 0.0001866305832328251, "loss": 7.5154, "step": 142680 }, { "epoch": 17.170878459687124, "grad_norm": 22060.61328125, "learning_rate": 0.00018662868288421818, "loss": 7.5773, "step": 142690 }, { "epoch": 17.17208182912154, "grad_norm": 59600.97265625, "learning_rate": 0.0001866267824102382, "loss": 7.5662, "step": 142700 }, { "epoch": 17.173285198555956, "grad_norm": 10623.1748046875, "learning_rate": 0.00018662488181088785, "loss": 7.5215, "step": 142710 }, { "epoch": 17.174488567990373, "grad_norm": 18329.470703125, "learning_rate": 0.00018662298108616996, "loss": 7.4264, "step": 142720 }, { "epoch": 17.17569193742479, "grad_norm": 40400.17578125, "learning_rate": 0.00018662108023608718, "loss": 7.3876, "step": 142730 }, { "epoch": 17.176895306859205, "grad_norm": 13407.1435546875, "learning_rate": 0.00018661917926064234, "loss": 7.3398, "step": 142740 }, { "epoch": 17.178098676293622, "grad_norm": 42986.66796875, "learning_rate": 0.00018661727815983816, "loss": 7.4152, "step": 142750 }, { "epoch": 17.17930204572804, "grad_norm": 10154.193359375, "learning_rate": 0.0001866153769336774, "loss": 7.3588, "step": 142760 }, { "epoch": 17.180505415162454, "grad_norm": 27231.228515625, "learning_rate": 0.0001866134755821628, "loss": 7.3424, "step": 142770 }, { "epoch": 17.18170878459687, "grad_norm": 70247.015625, "learning_rate": 0.0001866115741052971, "loss": 7.3623, "step": 142780 }, { "epoch": 17.18291215403129, "grad_norm": 25750.89453125, "learning_rate": 0.00018660967250308305, "loss": 7.3052, "step": 142790 }, { "epoch": 17.184115523465703, "grad_norm": 403852.15625, "learning_rate": 0.00018660777077552347, "loss": 7.2851, "step": 142800 }, { "epoch": 17.18531889290012, "grad_norm": 427347.15625, "learning_rate": 0.000186605868922621, "loss": 7.3909, "step": 142810 }, { "epoch": 17.186522262334538, "grad_norm": 11774.193359375, "learning_rate": 0.00018660396694437852, "loss": 7.3808, "step": 142820 }, { "epoch": 17.187725631768952, "grad_norm": 35684.640625, "learning_rate": 0.0001866020648407987, "loss": 7.3358, "step": 142830 }, { "epoch": 17.18892900120337, "grad_norm": 92285.578125, "learning_rate": 0.00018660016261188428, "loss": 7.3237, "step": 142840 }, { "epoch": 17.190132370637787, "grad_norm": 10759.0458984375, "learning_rate": 0.0001865982602576381, "loss": 7.3341, "step": 142850 }, { "epoch": 17.1913357400722, "grad_norm": 18348.951171875, "learning_rate": 0.00018659635777806282, "loss": 7.2642, "step": 142860 }, { "epoch": 17.19253910950662, "grad_norm": 44980.66015625, "learning_rate": 0.00018659445517316123, "loss": 7.2788, "step": 142870 }, { "epoch": 17.193742478941036, "grad_norm": 294009.71875, "learning_rate": 0.0001865925524429361, "loss": 7.2674, "step": 142880 }, { "epoch": 17.19494584837545, "grad_norm": 22624.0703125, "learning_rate": 0.00018659064958739013, "loss": 7.2889, "step": 142890 }, { "epoch": 17.196149217809868, "grad_norm": 360896.65625, "learning_rate": 0.00018658874660652615, "loss": 7.3536, "step": 142900 }, { "epoch": 17.197352587244286, "grad_norm": 190300.53125, "learning_rate": 0.0001865868435003469, "loss": 7.3024, "step": 142910 }, { "epoch": 17.1985559566787, "grad_norm": 261780.9375, "learning_rate": 0.00018658494026885505, "loss": 7.3493, "step": 142920 }, { "epoch": 17.199759326113117, "grad_norm": 65406.859375, "learning_rate": 0.00018658303691205342, "loss": 7.2996, "step": 142930 }, { "epoch": 17.200962695547535, "grad_norm": 572977.0, "learning_rate": 0.00018658113342994478, "loss": 7.411, "step": 142940 }, { "epoch": 17.20216606498195, "grad_norm": 128455.8671875, "learning_rate": 0.00018657922982253186, "loss": 7.4283, "step": 142950 }, { "epoch": 17.203369434416366, "grad_norm": 11471.7587890625, "learning_rate": 0.00018657732608981742, "loss": 7.3861, "step": 142960 }, { "epoch": 17.204572803850784, "grad_norm": 11657.0908203125, "learning_rate": 0.00018657542223180423, "loss": 7.4415, "step": 142970 }, { "epoch": 17.205776173285198, "grad_norm": 38225.0859375, "learning_rate": 0.00018657351824849497, "loss": 7.3639, "step": 142980 }, { "epoch": 17.206979542719615, "grad_norm": 15534.462890625, "learning_rate": 0.00018657161413989252, "loss": 7.2942, "step": 142990 }, { "epoch": 17.20818291215403, "grad_norm": 14562.5517578125, "learning_rate": 0.00018656970990599955, "loss": 7.3218, "step": 143000 }, { "epoch": 17.209386281588447, "grad_norm": 10495.0380859375, "learning_rate": 0.00018656780554681884, "loss": 7.3931, "step": 143010 }, { "epoch": 17.210589651022865, "grad_norm": 16439.080078125, "learning_rate": 0.00018656590106235315, "loss": 7.3797, "step": 143020 }, { "epoch": 17.21179302045728, "grad_norm": 14018.064453125, "learning_rate": 0.0001865639964526052, "loss": 7.5922, "step": 143030 }, { "epoch": 17.212996389891696, "grad_norm": 5792.22119140625, "learning_rate": 0.0001865620917175778, "loss": 7.6399, "step": 143040 }, { "epoch": 17.214199759326114, "grad_norm": 12662.169921875, "learning_rate": 0.0001865601868572737, "loss": 7.3904, "step": 143050 }, { "epoch": 17.215403128760528, "grad_norm": 6172.56201171875, "learning_rate": 0.00018655828187169558, "loss": 7.3667, "step": 143060 }, { "epoch": 17.216606498194945, "grad_norm": 7061.3076171875, "learning_rate": 0.00018655637676084628, "loss": 7.3954, "step": 143070 }, { "epoch": 17.217809867629363, "grad_norm": 32888.35546875, "learning_rate": 0.00018655447152472854, "loss": 7.3878, "step": 143080 }, { "epoch": 17.219013237063777, "grad_norm": 13680.1015625, "learning_rate": 0.00018655256616334511, "loss": 7.3361, "step": 143090 }, { "epoch": 17.220216606498195, "grad_norm": 13983.4951171875, "learning_rate": 0.00018655066067669874, "loss": 7.3062, "step": 143100 }, { "epoch": 17.221419975932612, "grad_norm": 29049.599609375, "learning_rate": 0.00018654875506479222, "loss": 7.2493, "step": 143110 }, { "epoch": 17.222623345367026, "grad_norm": 21413.96484375, "learning_rate": 0.00018654684932762825, "loss": 7.4469, "step": 143120 }, { "epoch": 17.223826714801444, "grad_norm": 28884.884765625, "learning_rate": 0.00018654494346520965, "loss": 7.4074, "step": 143130 }, { "epoch": 17.22503008423586, "grad_norm": 39515.76953125, "learning_rate": 0.00018654303747753912, "loss": 7.455, "step": 143140 }, { "epoch": 17.226233453670275, "grad_norm": 30641.70703125, "learning_rate": 0.0001865411313646195, "loss": 7.3797, "step": 143150 }, { "epoch": 17.227436823104693, "grad_norm": 23947.599609375, "learning_rate": 0.00018653922512645345, "loss": 7.4171, "step": 143160 }, { "epoch": 17.22864019253911, "grad_norm": 161175.59375, "learning_rate": 0.00018653731876304379, "loss": 7.3796, "step": 143170 }, { "epoch": 17.229843561973524, "grad_norm": 31299.63671875, "learning_rate": 0.00018653541227439324, "loss": 7.3659, "step": 143180 }, { "epoch": 17.231046931407942, "grad_norm": 8280.3173828125, "learning_rate": 0.0001865335056605046, "loss": 7.3586, "step": 143190 }, { "epoch": 17.23225030084236, "grad_norm": 14721.0703125, "learning_rate": 0.00018653159892138062, "loss": 7.2953, "step": 143200 }, { "epoch": 17.233453670276774, "grad_norm": 12474.8017578125, "learning_rate": 0.00018652969205702405, "loss": 7.357, "step": 143210 }, { "epoch": 17.23465703971119, "grad_norm": 26803.2578125, "learning_rate": 0.00018652778506743768, "loss": 7.4322, "step": 143220 }, { "epoch": 17.23586040914561, "grad_norm": 4982.1591796875, "learning_rate": 0.0001865258779526242, "loss": 7.2978, "step": 143230 }, { "epoch": 17.237063778580023, "grad_norm": 11828.568359375, "learning_rate": 0.0001865239707125864, "loss": 7.3405, "step": 143240 }, { "epoch": 17.23826714801444, "grad_norm": 3795.12255859375, "learning_rate": 0.00018652206334732709, "loss": 7.3372, "step": 143250 }, { "epoch": 17.239470517448858, "grad_norm": 6387.31787109375, "learning_rate": 0.000186520155856849, "loss": 7.2956, "step": 143260 }, { "epoch": 17.240673886883272, "grad_norm": 8069.97216796875, "learning_rate": 0.00018651824824115484, "loss": 7.414, "step": 143270 }, { "epoch": 17.24187725631769, "grad_norm": 5350.2783203125, "learning_rate": 0.00018651634050024745, "loss": 7.2934, "step": 143280 }, { "epoch": 17.243080625752107, "grad_norm": 5435.71875, "learning_rate": 0.00018651443263412954, "loss": 7.3264, "step": 143290 }, { "epoch": 17.24428399518652, "grad_norm": 10408.421875, "learning_rate": 0.0001865125246428039, "loss": 7.2753, "step": 143300 }, { "epoch": 17.24548736462094, "grad_norm": 4312.27587890625, "learning_rate": 0.00018651061652627326, "loss": 7.406, "step": 143310 }, { "epoch": 17.246690734055356, "grad_norm": 13353.0390625, "learning_rate": 0.0001865087082845404, "loss": 7.1902, "step": 143320 }, { "epoch": 17.24789410348977, "grad_norm": 84529.4609375, "learning_rate": 0.00018650679991760808, "loss": 7.2451, "step": 143330 }, { "epoch": 17.249097472924188, "grad_norm": 5085.16650390625, "learning_rate": 0.0001865048914254791, "loss": 7.396, "step": 143340 }, { "epoch": 17.250300842358605, "grad_norm": 4344.40771484375, "learning_rate": 0.00018650298280815616, "loss": 7.267, "step": 143350 }, { "epoch": 17.25150421179302, "grad_norm": 19860.373046875, "learning_rate": 0.00018650107406564202, "loss": 7.3526, "step": 143360 }, { "epoch": 17.252707581227437, "grad_norm": 3494.416015625, "learning_rate": 0.0001864991651979395, "loss": 7.3546, "step": 143370 }, { "epoch": 17.253910950661854, "grad_norm": 7269.1943359375, "learning_rate": 0.00018649725620505133, "loss": 7.388, "step": 143380 }, { "epoch": 17.25511432009627, "grad_norm": 3985.4990234375, "learning_rate": 0.00018649534708698026, "loss": 7.4202, "step": 143390 }, { "epoch": 17.256317689530686, "grad_norm": 6875.625, "learning_rate": 0.00018649343784372908, "loss": 7.3016, "step": 143400 }, { "epoch": 17.257521058965104, "grad_norm": 47567.93359375, "learning_rate": 0.00018649152847530052, "loss": 7.5067, "step": 143410 }, { "epoch": 17.258724428399518, "grad_norm": 34000.109375, "learning_rate": 0.00018648961898169738, "loss": 7.4025, "step": 143420 }, { "epoch": 17.259927797833935, "grad_norm": 9943.953125, "learning_rate": 0.00018648770936292242, "loss": 7.3339, "step": 143430 }, { "epoch": 17.261131167268353, "grad_norm": 3363.312744140625, "learning_rate": 0.00018648579961897839, "loss": 7.3725, "step": 143440 }, { "epoch": 17.262334536702767, "grad_norm": 9100.0615234375, "learning_rate": 0.000186483889749868, "loss": 7.3353, "step": 143450 }, { "epoch": 17.263537906137184, "grad_norm": 6549.49658203125, "learning_rate": 0.00018648197975559415, "loss": 7.4322, "step": 143460 }, { "epoch": 17.264741275571602, "grad_norm": 15612.9072265625, "learning_rate": 0.00018648006963615946, "loss": 7.4149, "step": 143470 }, { "epoch": 17.265944645006016, "grad_norm": 5912.58935546875, "learning_rate": 0.0001864781593915668, "loss": 7.3577, "step": 143480 }, { "epoch": 17.267148014440433, "grad_norm": 16263.78515625, "learning_rate": 0.00018647624902181888, "loss": 7.3115, "step": 143490 }, { "epoch": 17.26835138387485, "grad_norm": 8480.08203125, "learning_rate": 0.0001864743385269185, "loss": 7.3549, "step": 143500 }, { "epoch": 17.269554753309265, "grad_norm": 4502.17822265625, "learning_rate": 0.00018647242790686836, "loss": 7.3199, "step": 143510 }, { "epoch": 17.270758122743683, "grad_norm": 5547.29052734375, "learning_rate": 0.00018647051716167128, "loss": 7.2781, "step": 143520 }, { "epoch": 17.2719614921781, "grad_norm": 6042.51806640625, "learning_rate": 0.00018646860629133004, "loss": 7.3389, "step": 143530 }, { "epoch": 17.273164861612514, "grad_norm": 8290.95703125, "learning_rate": 0.00018646669529584733, "loss": 7.3993, "step": 143540 }, { "epoch": 17.27436823104693, "grad_norm": 13297.8583984375, "learning_rate": 0.000186464784175226, "loss": 7.4346, "step": 143550 }, { "epoch": 17.27557160048135, "grad_norm": 2856.671875, "learning_rate": 0.00018646287292946876, "loss": 7.3942, "step": 143560 }, { "epoch": 17.276774969915763, "grad_norm": 17264.79296875, "learning_rate": 0.00018646096155857842, "loss": 7.2047, "step": 143570 }, { "epoch": 17.27797833935018, "grad_norm": 9158.6005859375, "learning_rate": 0.0001864590500625577, "loss": 7.4385, "step": 143580 }, { "epoch": 17.2791817087846, "grad_norm": 8827.74609375, "learning_rate": 0.0001864571384414094, "loss": 7.444, "step": 143590 }, { "epoch": 17.280385078219012, "grad_norm": 8712.2607421875, "learning_rate": 0.00018645522669513627, "loss": 7.3713, "step": 143600 }, { "epoch": 17.28158844765343, "grad_norm": 29853.666015625, "learning_rate": 0.0001864533148237411, "loss": 7.3908, "step": 143610 }, { "epoch": 17.282791817087848, "grad_norm": 4064.296630859375, "learning_rate": 0.00018645140282722662, "loss": 7.5226, "step": 143620 }, { "epoch": 17.28399518652226, "grad_norm": 10876.7958984375, "learning_rate": 0.00018644949070559564, "loss": 7.3226, "step": 143630 }, { "epoch": 17.28519855595668, "grad_norm": 6016.1044921875, "learning_rate": 0.00018644757845885085, "loss": 7.2681, "step": 143640 }, { "epoch": 17.286401925391097, "grad_norm": 7666.17919921875, "learning_rate": 0.00018644566608699508, "loss": 7.3294, "step": 143650 }, { "epoch": 17.28760529482551, "grad_norm": 3795.66455078125, "learning_rate": 0.00018644375359003115, "loss": 7.3469, "step": 143660 }, { "epoch": 17.28880866425993, "grad_norm": 12368.240234375, "learning_rate": 0.00018644184096796172, "loss": 7.4265, "step": 143670 }, { "epoch": 17.290012033694346, "grad_norm": 188565.40625, "learning_rate": 0.0001864399282207896, "loss": 7.3465, "step": 143680 }, { "epoch": 17.29121540312876, "grad_norm": 6490.89013671875, "learning_rate": 0.0001864380153485176, "loss": 7.2713, "step": 143690 }, { "epoch": 17.292418772563177, "grad_norm": 9034.33984375, "learning_rate": 0.00018643610235114842, "loss": 7.2496, "step": 143700 }, { "epoch": 17.29362214199759, "grad_norm": 6057.49267578125, "learning_rate": 0.00018643418922868486, "loss": 7.3932, "step": 143710 }, { "epoch": 17.29482551143201, "grad_norm": 2169.48876953125, "learning_rate": 0.00018643227598112968, "loss": 7.4326, "step": 143720 }, { "epoch": 17.296028880866427, "grad_norm": 2845.04541015625, "learning_rate": 0.00018643036260848568, "loss": 7.3755, "step": 143730 }, { "epoch": 17.29723225030084, "grad_norm": 4065.597412109375, "learning_rate": 0.00018642844911075562, "loss": 7.3622, "step": 143740 }, { "epoch": 17.29843561973526, "grad_norm": 2476.890380859375, "learning_rate": 0.0001864265354879422, "loss": 7.3002, "step": 143750 }, { "epoch": 17.299638989169676, "grad_norm": 2537.3583984375, "learning_rate": 0.00018642462174004828, "loss": 7.4122, "step": 143760 }, { "epoch": 17.30084235860409, "grad_norm": 16140.46484375, "learning_rate": 0.0001864227078670766, "loss": 7.3327, "step": 143770 }, { "epoch": 17.302045728038507, "grad_norm": 4514.66748046875, "learning_rate": 0.00018642079386902994, "loss": 7.377, "step": 143780 }, { "epoch": 17.303249097472925, "grad_norm": 4736.56689453125, "learning_rate": 0.000186418879745911, "loss": 7.3443, "step": 143790 }, { "epoch": 17.30445246690734, "grad_norm": 1771.5638427734375, "learning_rate": 0.00018641696549772264, "loss": 7.308, "step": 143800 }, { "epoch": 17.305655836341757, "grad_norm": 2567.355712890625, "learning_rate": 0.0001864150511244676, "loss": 7.3158, "step": 143810 }, { "epoch": 17.306859205776174, "grad_norm": 3582.007080078125, "learning_rate": 0.00018641313662614862, "loss": 7.3372, "step": 143820 }, { "epoch": 17.308062575210588, "grad_norm": 2451.7158203125, "learning_rate": 0.00018641122200276853, "loss": 7.4579, "step": 143830 }, { "epoch": 17.309265944645006, "grad_norm": 3274.827392578125, "learning_rate": 0.00018640930725433006, "loss": 7.3478, "step": 143840 }, { "epoch": 17.310469314079423, "grad_norm": 1747.725830078125, "learning_rate": 0.00018640739238083595, "loss": 7.318, "step": 143850 }, { "epoch": 17.311672683513837, "grad_norm": 1676.380126953125, "learning_rate": 0.00018640547738228906, "loss": 7.3239, "step": 143860 }, { "epoch": 17.312876052948255, "grad_norm": 4575.64453125, "learning_rate": 0.00018640356225869208, "loss": 7.2099, "step": 143870 }, { "epoch": 17.314079422382672, "grad_norm": 3088.161376953125, "learning_rate": 0.00018640164701004783, "loss": 7.301, "step": 143880 }, { "epoch": 17.315282791817086, "grad_norm": 2872.845947265625, "learning_rate": 0.00018639973163635907, "loss": 7.3168, "step": 143890 }, { "epoch": 17.316486161251504, "grad_norm": 2950.113525390625, "learning_rate": 0.00018639781613762855, "loss": 7.3247, "step": 143900 }, { "epoch": 17.31768953068592, "grad_norm": 1746.21435546875, "learning_rate": 0.00018639590051385907, "loss": 7.4187, "step": 143910 }, { "epoch": 17.318892900120336, "grad_norm": 4907.98779296875, "learning_rate": 0.00018639398476505336, "loss": 7.3506, "step": 143920 }, { "epoch": 17.320096269554753, "grad_norm": 1487.94384765625, "learning_rate": 0.00018639206889121428, "loss": 7.3712, "step": 143930 }, { "epoch": 17.32129963898917, "grad_norm": 4368.30908203125, "learning_rate": 0.00018639015289234452, "loss": 7.2957, "step": 143940 }, { "epoch": 17.322503008423585, "grad_norm": 2257.970703125, "learning_rate": 0.00018638823676844687, "loss": 7.2696, "step": 143950 }, { "epoch": 17.323706377858002, "grad_norm": 3080.18603515625, "learning_rate": 0.00018638632051952412, "loss": 7.3628, "step": 143960 }, { "epoch": 17.32490974729242, "grad_norm": 1503.36767578125, "learning_rate": 0.00018638440414557903, "loss": 7.4014, "step": 143970 }, { "epoch": 17.326113116726834, "grad_norm": 1938.1185302734375, "learning_rate": 0.00018638248764661438, "loss": 7.2477, "step": 143980 }, { "epoch": 17.32731648616125, "grad_norm": 1879.9444580078125, "learning_rate": 0.00018638057102263295, "loss": 7.2318, "step": 143990 }, { "epoch": 17.32851985559567, "grad_norm": 2374.004150390625, "learning_rate": 0.00018637865427363753, "loss": 7.4004, "step": 144000 }, { "epoch": 17.329723225030083, "grad_norm": 819.7803955078125, "learning_rate": 0.00018637673739963084, "loss": 7.3483, "step": 144010 }, { "epoch": 17.3309265944645, "grad_norm": 1696.6414794921875, "learning_rate": 0.00018637482040061572, "loss": 7.2798, "step": 144020 }, { "epoch": 17.332129963898918, "grad_norm": 1228.54443359375, "learning_rate": 0.00018637290327659485, "loss": 7.2843, "step": 144030 }, { "epoch": 17.333333333333332, "grad_norm": 1032.5482177734375, "learning_rate": 0.0001863709860275711, "loss": 7.3488, "step": 144040 }, { "epoch": 17.33453670276775, "grad_norm": 1954.9874267578125, "learning_rate": 0.0001863690686535472, "loss": 7.2892, "step": 144050 }, { "epoch": 17.335740072202167, "grad_norm": 1351.595947265625, "learning_rate": 0.00018636715115452596, "loss": 7.3964, "step": 144060 }, { "epoch": 17.33694344163658, "grad_norm": 5936.81787109375, "learning_rate": 0.0001863652335305101, "loss": 7.2856, "step": 144070 }, { "epoch": 17.338146811071, "grad_norm": 2107.088623046875, "learning_rate": 0.00018636331578150245, "loss": 7.3662, "step": 144080 }, { "epoch": 17.339350180505416, "grad_norm": 644.792724609375, "learning_rate": 0.0001863613979075057, "loss": 7.4152, "step": 144090 }, { "epoch": 17.34055354993983, "grad_norm": 1492.9910888671875, "learning_rate": 0.00018635947990852273, "loss": 7.2137, "step": 144100 }, { "epoch": 17.341756919374248, "grad_norm": 769.9312744140625, "learning_rate": 0.00018635756178455627, "loss": 7.3288, "step": 144110 }, { "epoch": 17.342960288808666, "grad_norm": 1779.5491943359375, "learning_rate": 0.00018635564353560913, "loss": 7.3602, "step": 144120 }, { "epoch": 17.34416365824308, "grad_norm": 8701.2197265625, "learning_rate": 0.00018635372516168398, "loss": 7.3381, "step": 144130 }, { "epoch": 17.345367027677497, "grad_norm": 2692.812255859375, "learning_rate": 0.00018635180666278374, "loss": 7.3142, "step": 144140 }, { "epoch": 17.346570397111915, "grad_norm": 810.0578002929688, "learning_rate": 0.00018634988803891106, "loss": 7.2351, "step": 144150 }, { "epoch": 17.34777376654633, "grad_norm": 1202.1480712890625, "learning_rate": 0.00018634796929006882, "loss": 7.2064, "step": 144160 }, { "epoch": 17.348977135980746, "grad_norm": 1173.7559814453125, "learning_rate": 0.00018634605041625973, "loss": 7.224, "step": 144170 }, { "epoch": 17.350180505415164, "grad_norm": 1695.92919921875, "learning_rate": 0.00018634413141748656, "loss": 7.2897, "step": 144180 }, { "epoch": 17.351383874849578, "grad_norm": 982.7464599609375, "learning_rate": 0.00018634221229375214, "loss": 7.303, "step": 144190 }, { "epoch": 17.352587244283995, "grad_norm": 1036.66796875, "learning_rate": 0.00018634029304505926, "loss": 7.3062, "step": 144200 }, { "epoch": 17.353790613718413, "grad_norm": 973.714111328125, "learning_rate": 0.0001863383736714106, "loss": 7.4222, "step": 144210 }, { "epoch": 17.354993983152827, "grad_norm": 1261.516845703125, "learning_rate": 0.00018633645417280903, "loss": 7.383, "step": 144220 }, { "epoch": 17.356197352587245, "grad_norm": 1612.299560546875, "learning_rate": 0.00018633453454925726, "loss": 7.2298, "step": 144230 }, { "epoch": 17.357400722021662, "grad_norm": 2724.57666015625, "learning_rate": 0.00018633261480075812, "loss": 7.3926, "step": 144240 }, { "epoch": 17.358604091456076, "grad_norm": 1550.954345703125, "learning_rate": 0.00018633069492731438, "loss": 7.2332, "step": 144250 }, { "epoch": 17.359807460890494, "grad_norm": 1035.0948486328125, "learning_rate": 0.00018632877492892882, "loss": 7.265, "step": 144260 }, { "epoch": 17.36101083032491, "grad_norm": 2157.24462890625, "learning_rate": 0.00018632685480560418, "loss": 7.2548, "step": 144270 }, { "epoch": 17.362214199759325, "grad_norm": 1274.3280029296875, "learning_rate": 0.00018632493455734327, "loss": 7.3577, "step": 144280 }, { "epoch": 17.363417569193743, "grad_norm": 874.005126953125, "learning_rate": 0.0001863230141841489, "loss": 7.1592, "step": 144290 }, { "epoch": 17.36462093862816, "grad_norm": 2269.83203125, "learning_rate": 0.0001863210936860238, "loss": 7.3021, "step": 144300 }, { "epoch": 17.365824308062574, "grad_norm": 527.2459106445312, "learning_rate": 0.00018631917306297075, "loss": 7.403, "step": 144310 }, { "epoch": 17.367027677496992, "grad_norm": 9902.6416015625, "learning_rate": 0.00018631725231499257, "loss": 7.3602, "step": 144320 }, { "epoch": 17.36823104693141, "grad_norm": 1581.8699951171875, "learning_rate": 0.000186315331442092, "loss": 7.3606, "step": 144330 }, { "epoch": 17.369434416365824, "grad_norm": 542.272216796875, "learning_rate": 0.00018631341044427182, "loss": 7.3011, "step": 144340 }, { "epoch": 17.37063778580024, "grad_norm": 1121.8876953125, "learning_rate": 0.00018631148932153483, "loss": 7.3544, "step": 144350 }, { "epoch": 17.37184115523466, "grad_norm": 341.2218322753906, "learning_rate": 0.0001863095680738838, "loss": 7.3572, "step": 144360 }, { "epoch": 17.373044524669073, "grad_norm": 1032.7603759765625, "learning_rate": 0.00018630764670132157, "loss": 7.2174, "step": 144370 }, { "epoch": 17.37424789410349, "grad_norm": 552.34228515625, "learning_rate": 0.00018630572520385083, "loss": 7.3507, "step": 144380 }, { "epoch": 17.375451263537904, "grad_norm": 969.8330688476562, "learning_rate": 0.0001863038035814744, "loss": 7.3362, "step": 144390 }, { "epoch": 17.376654632972322, "grad_norm": 651.5182495117188, "learning_rate": 0.00018630188183419507, "loss": 7.2821, "step": 144400 }, { "epoch": 17.37785800240674, "grad_norm": 1053.3939208984375, "learning_rate": 0.00018629995996201557, "loss": 7.3523, "step": 144410 }, { "epoch": 17.379061371841154, "grad_norm": 3322.80517578125, "learning_rate": 0.00018629803796493875, "loss": 7.2868, "step": 144420 }, { "epoch": 17.38026474127557, "grad_norm": 924.3411865234375, "learning_rate": 0.00018629611584296735, "loss": 7.2399, "step": 144430 }, { "epoch": 17.38146811070999, "grad_norm": 1255.8531494140625, "learning_rate": 0.00018629419359610417, "loss": 7.3268, "step": 144440 }, { "epoch": 17.382671480144403, "grad_norm": 435.2577209472656, "learning_rate": 0.000186292271224352, "loss": 7.2386, "step": 144450 }, { "epoch": 17.38387484957882, "grad_norm": 446.9579162597656, "learning_rate": 0.00018629034872771357, "loss": 7.31, "step": 144460 }, { "epoch": 17.385078219013238, "grad_norm": 1047.120849609375, "learning_rate": 0.00018628842610619178, "loss": 7.2474, "step": 144470 }, { "epoch": 17.386281588447652, "grad_norm": 427.711181640625, "learning_rate": 0.00018628650335978926, "loss": 7.3688, "step": 144480 }, { "epoch": 17.38748495788207, "grad_norm": 523.4109497070312, "learning_rate": 0.0001862845804885089, "loss": 7.3032, "step": 144490 }, { "epoch": 17.388688327316487, "grad_norm": 1037.880859375, "learning_rate": 0.00018628265749235344, "loss": 7.2783, "step": 144500 }, { "epoch": 17.3898916967509, "grad_norm": 817.0455932617188, "learning_rate": 0.00018628073437132567, "loss": 7.3336, "step": 144510 }, { "epoch": 17.39109506618532, "grad_norm": 970.6292114257812, "learning_rate": 0.00018627881112542836, "loss": 7.2893, "step": 144520 }, { "epoch": 17.392298435619736, "grad_norm": 600.50537109375, "learning_rate": 0.00018627688775466433, "loss": 7.3085, "step": 144530 }, { "epoch": 17.39350180505415, "grad_norm": 567.3610229492188, "learning_rate": 0.00018627496425903633, "loss": 7.3421, "step": 144540 }, { "epoch": 17.394705174488568, "grad_norm": 446.5660095214844, "learning_rate": 0.00018627304063854717, "loss": 7.3091, "step": 144550 }, { "epoch": 17.395908543922985, "grad_norm": 757.7503051757812, "learning_rate": 0.0001862711168931996, "loss": 7.321, "step": 144560 }, { "epoch": 17.3971119133574, "grad_norm": 1131.69384765625, "learning_rate": 0.00018626919302299646, "loss": 7.2824, "step": 144570 }, { "epoch": 17.398315282791817, "grad_norm": 236.73899841308594, "learning_rate": 0.00018626726902794044, "loss": 7.3372, "step": 144580 }, { "epoch": 17.399518652226234, "grad_norm": 689.5936889648438, "learning_rate": 0.00018626534490803444, "loss": 7.3674, "step": 144590 }, { "epoch": 17.40072202166065, "grad_norm": 1243.590087890625, "learning_rate": 0.00018626342066328116, "loss": 7.2967, "step": 144600 }, { "epoch": 17.401925391095066, "grad_norm": 508.0067443847656, "learning_rate": 0.0001862614962936834, "loss": 7.4296, "step": 144610 }, { "epoch": 17.403128760529484, "grad_norm": 74773.5234375, "learning_rate": 0.000186259571799244, "loss": 7.2877, "step": 144620 }, { "epoch": 17.404332129963898, "grad_norm": 352.3360290527344, "learning_rate": 0.00018625764717996568, "loss": 7.3145, "step": 144630 }, { "epoch": 17.405535499398315, "grad_norm": 371.5287170410156, "learning_rate": 0.00018625572243585127, "loss": 7.391, "step": 144640 }, { "epoch": 17.406738868832733, "grad_norm": 424.0631408691406, "learning_rate": 0.0001862537975669035, "loss": 7.298, "step": 144650 }, { "epoch": 17.407942238267147, "grad_norm": 328.6737365722656, "learning_rate": 0.00018625187257312518, "loss": 7.3303, "step": 144660 }, { "epoch": 17.409145607701564, "grad_norm": 603.6110229492188, "learning_rate": 0.00018624994745451915, "loss": 7.3787, "step": 144670 }, { "epoch": 17.410348977135982, "grad_norm": 829.3782958984375, "learning_rate": 0.0001862480222110881, "loss": 7.3476, "step": 144680 }, { "epoch": 17.411552346570396, "grad_norm": 814.5963134765625, "learning_rate": 0.0001862460968428349, "loss": 7.3706, "step": 144690 }, { "epoch": 17.412755716004813, "grad_norm": 514.9297485351562, "learning_rate": 0.0001862441713497623, "loss": 7.3761, "step": 144700 }, { "epoch": 17.41395908543923, "grad_norm": 416.16796875, "learning_rate": 0.0001862422457318731, "loss": 7.3654, "step": 144710 }, { "epoch": 17.415162454873645, "grad_norm": 1018.8207397460938, "learning_rate": 0.00018624031998917007, "loss": 7.3595, "step": 144720 }, { "epoch": 17.416365824308063, "grad_norm": 398.9307861328125, "learning_rate": 0.00018623839412165597, "loss": 7.2842, "step": 144730 }, { "epoch": 17.41756919374248, "grad_norm": 243.2138671875, "learning_rate": 0.0001862364681293337, "loss": 7.24, "step": 144740 }, { "epoch": 17.418772563176894, "grad_norm": 424.7333679199219, "learning_rate": 0.0001862345420122059, "loss": 7.413, "step": 144750 }, { "epoch": 17.41997593261131, "grad_norm": 668.6207275390625, "learning_rate": 0.00018623261577027547, "loss": 7.2815, "step": 144760 }, { "epoch": 17.42117930204573, "grad_norm": 602.9308471679688, "learning_rate": 0.00018623068940354514, "loss": 7.3633, "step": 144770 }, { "epoch": 17.422382671480143, "grad_norm": 511.3483581542969, "learning_rate": 0.0001862287629120177, "loss": 7.268, "step": 144780 }, { "epoch": 17.42358604091456, "grad_norm": 649.0414428710938, "learning_rate": 0.00018622683629569594, "loss": 7.3628, "step": 144790 }, { "epoch": 17.42478941034898, "grad_norm": 1002.4896850585938, "learning_rate": 0.0001862249095545827, "loss": 7.3551, "step": 144800 }, { "epoch": 17.425992779783392, "grad_norm": 337.2454528808594, "learning_rate": 0.0001862229826886807, "loss": 7.4289, "step": 144810 }, { "epoch": 17.42719614921781, "grad_norm": 919.8434448242188, "learning_rate": 0.00018622105569799277, "loss": 7.3102, "step": 144820 }, { "epoch": 17.428399518652228, "grad_norm": 1118.0283203125, "learning_rate": 0.0001862191285825217, "loss": 7.2126, "step": 144830 }, { "epoch": 17.42960288808664, "grad_norm": 469.995849609375, "learning_rate": 0.00018621720134227022, "loss": 7.3796, "step": 144840 }, { "epoch": 17.43080625752106, "grad_norm": 744.5451049804688, "learning_rate": 0.0001862152739772412, "loss": 7.3315, "step": 144850 }, { "epoch": 17.432009626955477, "grad_norm": 914.6758422851562, "learning_rate": 0.0001862133464874374, "loss": 7.3838, "step": 144860 }, { "epoch": 17.43321299638989, "grad_norm": 541.7366333007812, "learning_rate": 0.00018621141887286156, "loss": 7.3604, "step": 144870 }, { "epoch": 17.43441636582431, "grad_norm": 920.416015625, "learning_rate": 0.0001862094911335165, "loss": 7.2882, "step": 144880 }, { "epoch": 17.435619735258726, "grad_norm": 725.6030883789062, "learning_rate": 0.0001862075632694051, "loss": 7.2507, "step": 144890 }, { "epoch": 17.43682310469314, "grad_norm": 808.6946411132812, "learning_rate": 0.00018620563528053, "loss": 7.3138, "step": 144900 }, { "epoch": 17.438026474127557, "grad_norm": 1240.8717041015625, "learning_rate": 0.0001862037071668941, "loss": 7.3813, "step": 144910 }, { "epoch": 17.439229843561975, "grad_norm": 2129.42333984375, "learning_rate": 0.00018620177892850014, "loss": 7.4193, "step": 144920 }, { "epoch": 17.44043321299639, "grad_norm": 1639.1021728515625, "learning_rate": 0.0001861998505653509, "loss": 7.3415, "step": 144930 }, { "epoch": 17.441636582430807, "grad_norm": 975.15283203125, "learning_rate": 0.00018619792207744923, "loss": 7.3347, "step": 144940 }, { "epoch": 17.442839951865224, "grad_norm": 417.9345703125, "learning_rate": 0.00018619599346479787, "loss": 7.3501, "step": 144950 }, { "epoch": 17.444043321299638, "grad_norm": 636.4627685546875, "learning_rate": 0.00018619406472739964, "loss": 7.2403, "step": 144960 }, { "epoch": 17.445246690734056, "grad_norm": 475.5944519042969, "learning_rate": 0.0001861921358652573, "loss": 7.3205, "step": 144970 }, { "epoch": 17.446450060168473, "grad_norm": 935.9007568359375, "learning_rate": 0.00018619020687837368, "loss": 7.2482, "step": 144980 }, { "epoch": 17.447653429602887, "grad_norm": 513.8063354492188, "learning_rate": 0.00018618827776675155, "loss": 7.3805, "step": 144990 }, { "epoch": 17.448856799037305, "grad_norm": 279.8896789550781, "learning_rate": 0.00018618634853039367, "loss": 7.366, "step": 145000 }, { "epoch": 17.450060168471722, "grad_norm": 1896.1181640625, "learning_rate": 0.0001861844191693029, "loss": 7.3362, "step": 145010 }, { "epoch": 17.451263537906136, "grad_norm": 624.3501586914062, "learning_rate": 0.00018618248968348198, "loss": 7.2464, "step": 145020 }, { "epoch": 17.452466907340554, "grad_norm": 486.2643127441406, "learning_rate": 0.00018618056007293373, "loss": 7.2967, "step": 145030 }, { "epoch": 17.45367027677497, "grad_norm": 560.977783203125, "learning_rate": 0.00018617863033766092, "loss": 7.2483, "step": 145040 }, { "epoch": 17.454873646209386, "grad_norm": 274.99261474609375, "learning_rate": 0.00018617670047766638, "loss": 7.3979, "step": 145050 }, { "epoch": 17.456077015643803, "grad_norm": 266.1700439453125, "learning_rate": 0.00018617477049295286, "loss": 7.2552, "step": 145060 }, { "epoch": 17.45728038507822, "grad_norm": 430.4427185058594, "learning_rate": 0.00018617284038352315, "loss": 7.2915, "step": 145070 }, { "epoch": 17.458483754512635, "grad_norm": 258.2168273925781, "learning_rate": 0.0001861709101493801, "loss": 7.26, "step": 145080 }, { "epoch": 17.459687123947052, "grad_norm": 1040.547119140625, "learning_rate": 0.00018616897979052645, "loss": 7.3294, "step": 145090 }, { "epoch": 17.460890493381466, "grad_norm": 893.6184692382812, "learning_rate": 0.00018616704930696503, "loss": 7.4598, "step": 145100 }, { "epoch": 17.462093862815884, "grad_norm": 657.4937744140625, "learning_rate": 0.00018616511869869857, "loss": 7.4161, "step": 145110 }, { "epoch": 17.4632972322503, "grad_norm": 1353.1165771484375, "learning_rate": 0.00018616318796572996, "loss": 7.3446, "step": 145120 }, { "epoch": 17.464500601684716, "grad_norm": 274.4250793457031, "learning_rate": 0.00018616125710806193, "loss": 7.2106, "step": 145130 }, { "epoch": 17.465703971119133, "grad_norm": 441.3121032714844, "learning_rate": 0.00018615932612569725, "loss": 7.2857, "step": 145140 }, { "epoch": 17.46690734055355, "grad_norm": 950.4795532226562, "learning_rate": 0.0001861573950186388, "loss": 7.3201, "step": 145150 }, { "epoch": 17.468110709987965, "grad_norm": 439.0350036621094, "learning_rate": 0.00018615546378688934, "loss": 7.2845, "step": 145160 }, { "epoch": 17.469314079422382, "grad_norm": 509.9036560058594, "learning_rate": 0.0001861535324304516, "loss": 7.4241, "step": 145170 }, { "epoch": 17.4705174488568, "grad_norm": 536.127685546875, "learning_rate": 0.00018615160094932845, "loss": 7.3482, "step": 145180 }, { "epoch": 17.471720818291214, "grad_norm": 871.3175048828125, "learning_rate": 0.0001861496693435227, "loss": 7.3592, "step": 145190 }, { "epoch": 17.47292418772563, "grad_norm": 510.48870849609375, "learning_rate": 0.0001861477376130371, "loss": 7.3678, "step": 145200 }, { "epoch": 17.47412755716005, "grad_norm": 561.6532592773438, "learning_rate": 0.0001861458057578744, "loss": 7.2998, "step": 145210 }, { "epoch": 17.475330926594463, "grad_norm": 1668.0245361328125, "learning_rate": 0.00018614387377803747, "loss": 7.4022, "step": 145220 }, { "epoch": 17.47653429602888, "grad_norm": 418.8941345214844, "learning_rate": 0.0001861419416735291, "loss": 7.3786, "step": 145230 }, { "epoch": 17.477737665463298, "grad_norm": 1009.04248046875, "learning_rate": 0.00018614000944435207, "loss": 7.3459, "step": 145240 }, { "epoch": 17.478941034897712, "grad_norm": 1637.6072998046875, "learning_rate": 0.0001861380770905092, "loss": 7.3276, "step": 145250 }, { "epoch": 17.48014440433213, "grad_norm": 470.1253967285156, "learning_rate": 0.00018613614461200325, "loss": 7.2656, "step": 145260 }, { "epoch": 17.481347773766547, "grad_norm": 372.2898254394531, "learning_rate": 0.00018613421200883702, "loss": 7.2537, "step": 145270 }, { "epoch": 17.48255114320096, "grad_norm": 331.7706298828125, "learning_rate": 0.0001861322792810133, "loss": 7.3372, "step": 145280 }, { "epoch": 17.48375451263538, "grad_norm": 998.35546875, "learning_rate": 0.00018613034642853496, "loss": 7.353, "step": 145290 }, { "epoch": 17.484957882069796, "grad_norm": 663.9705810546875, "learning_rate": 0.0001861284134514047, "loss": 7.3066, "step": 145300 }, { "epoch": 17.48616125150421, "grad_norm": 432.5865173339844, "learning_rate": 0.0001861264803496254, "loss": 7.3771, "step": 145310 }, { "epoch": 17.487364620938628, "grad_norm": 1278.626953125, "learning_rate": 0.00018612454712319977, "loss": 7.3352, "step": 145320 }, { "epoch": 17.488567990373046, "grad_norm": 1087.6787109375, "learning_rate": 0.0001861226137721307, "loss": 7.2944, "step": 145330 }, { "epoch": 17.48977135980746, "grad_norm": 949.75, "learning_rate": 0.00018612068029642094, "loss": 7.2802, "step": 145340 }, { "epoch": 17.490974729241877, "grad_norm": 493.3676452636719, "learning_rate": 0.00018611874669607328, "loss": 7.302, "step": 145350 }, { "epoch": 17.492178098676295, "grad_norm": 830.3001098632812, "learning_rate": 0.00018611681297109054, "loss": 7.3606, "step": 145360 }, { "epoch": 17.49338146811071, "grad_norm": 596.345458984375, "learning_rate": 0.0001861148791214755, "loss": 7.3008, "step": 145370 }, { "epoch": 17.494584837545126, "grad_norm": 275.3224792480469, "learning_rate": 0.00018611294514723096, "loss": 7.2747, "step": 145380 }, { "epoch": 17.495788206979544, "grad_norm": 686.6500854492188, "learning_rate": 0.00018611101104835974, "loss": 7.2889, "step": 145390 }, { "epoch": 17.496991576413958, "grad_norm": 705.9725341796875, "learning_rate": 0.00018610907682486464, "loss": 7.3085, "step": 145400 }, { "epoch": 17.498194945848375, "grad_norm": 543.0433349609375, "learning_rate": 0.00018610714247674844, "loss": 7.3319, "step": 145410 }, { "epoch": 17.499398315282793, "grad_norm": 2264.55517578125, "learning_rate": 0.0001861052080040139, "loss": 7.3781, "step": 145420 }, { "epoch": 17.500601684717207, "grad_norm": 712.26416015625, "learning_rate": 0.00018610327340666394, "loss": 7.3167, "step": 145430 }, { "epoch": 17.501805054151625, "grad_norm": 924.663818359375, "learning_rate": 0.00018610133868470126, "loss": 7.36, "step": 145440 }, { "epoch": 17.503008423586042, "grad_norm": 425.78948974609375, "learning_rate": 0.00018609940383812866, "loss": 7.3095, "step": 145450 }, { "epoch": 17.504211793020456, "grad_norm": 501.5254821777344, "learning_rate": 0.00018609746886694896, "loss": 7.3466, "step": 145460 }, { "epoch": 17.505415162454874, "grad_norm": 677.3394165039062, "learning_rate": 0.000186095533771165, "loss": 7.3262, "step": 145470 }, { "epoch": 17.50661853188929, "grad_norm": 571.8798828125, "learning_rate": 0.00018609359855077955, "loss": 7.2988, "step": 145480 }, { "epoch": 17.507821901323705, "grad_norm": 2618.664794921875, "learning_rate": 0.00018609166320579537, "loss": 7.3308, "step": 145490 }, { "epoch": 17.509025270758123, "grad_norm": 889.0499267578125, "learning_rate": 0.00018608972773621532, "loss": 7.3257, "step": 145500 }, { "epoch": 17.51022864019254, "grad_norm": 574.9404907226562, "learning_rate": 0.00018608779214204218, "loss": 7.3327, "step": 145510 }, { "epoch": 17.511432009626954, "grad_norm": 1075.8699951171875, "learning_rate": 0.00018608585642327876, "loss": 7.3292, "step": 145520 }, { "epoch": 17.512635379061372, "grad_norm": 484.9434509277344, "learning_rate": 0.00018608392057992783, "loss": 7.3285, "step": 145530 }, { "epoch": 17.51383874849579, "grad_norm": 1054.6104736328125, "learning_rate": 0.00018608198461199222, "loss": 7.4039, "step": 145540 }, { "epoch": 17.515042117930204, "grad_norm": 809.6917114257812, "learning_rate": 0.00018608004851947474, "loss": 7.2362, "step": 145550 }, { "epoch": 17.51624548736462, "grad_norm": 2787.838623046875, "learning_rate": 0.00018607811230237817, "loss": 7.3033, "step": 145560 }, { "epoch": 17.51744885679904, "grad_norm": 928.9354248046875, "learning_rate": 0.0001860761759607053, "loss": 7.26, "step": 145570 }, { "epoch": 17.518652226233453, "grad_norm": 2730.526611328125, "learning_rate": 0.00018607423949445898, "loss": 7.3684, "step": 145580 }, { "epoch": 17.51985559566787, "grad_norm": 927.2328491210938, "learning_rate": 0.00018607230290364196, "loss": 7.2313, "step": 145590 }, { "epoch": 17.521058965102288, "grad_norm": 1870.129150390625, "learning_rate": 0.00018607036618825708, "loss": 7.2646, "step": 145600 }, { "epoch": 17.522262334536702, "grad_norm": 768.214111328125, "learning_rate": 0.00018606842934830713, "loss": 7.3782, "step": 145610 }, { "epoch": 17.52346570397112, "grad_norm": 1572.7806396484375, "learning_rate": 0.00018606649238379493, "loss": 7.3783, "step": 145620 }, { "epoch": 17.524669073405537, "grad_norm": 774.7906494140625, "learning_rate": 0.0001860645552947232, "loss": 7.3898, "step": 145630 }, { "epoch": 17.52587244283995, "grad_norm": 1090.316162109375, "learning_rate": 0.0001860626180810949, "loss": 7.4188, "step": 145640 }, { "epoch": 17.52707581227437, "grad_norm": 552.47119140625, "learning_rate": 0.0001860606807429127, "loss": 7.3743, "step": 145650 }, { "epoch": 17.528279181708786, "grad_norm": 1903.089599609375, "learning_rate": 0.00018605874328017942, "loss": 7.304, "step": 145660 }, { "epoch": 17.5294825511432, "grad_norm": 820.9869995117188, "learning_rate": 0.0001860568056928979, "loss": 7.3379, "step": 145670 }, { "epoch": 17.530685920577618, "grad_norm": 1438.8450927734375, "learning_rate": 0.000186054867981071, "loss": 7.2836, "step": 145680 }, { "epoch": 17.531889290012035, "grad_norm": 903.9391479492188, "learning_rate": 0.00018605293014470141, "loss": 7.342, "step": 145690 }, { "epoch": 17.53309265944645, "grad_norm": 1641.6419677734375, "learning_rate": 0.000186050992183792, "loss": 7.3934, "step": 145700 }, { "epoch": 17.534296028880867, "grad_norm": 1341.2841796875, "learning_rate": 0.00018604905409834555, "loss": 7.3568, "step": 145710 }, { "epoch": 17.535499398315284, "grad_norm": 1363.6627197265625, "learning_rate": 0.00018604711588836485, "loss": 7.3389, "step": 145720 }, { "epoch": 17.5367027677497, "grad_norm": 738.7965087890625, "learning_rate": 0.00018604517755385275, "loss": 7.2598, "step": 145730 }, { "epoch": 17.537906137184116, "grad_norm": 1507.6448974609375, "learning_rate": 0.00018604323909481206, "loss": 7.3141, "step": 145740 }, { "epoch": 17.53910950661853, "grad_norm": 618.2645874023438, "learning_rate": 0.00018604130051124554, "loss": 7.4364, "step": 145750 }, { "epoch": 17.540312876052948, "grad_norm": 572.6903076171875, "learning_rate": 0.000186039361803156, "loss": 7.3705, "step": 145760 }, { "epoch": 17.541516245487365, "grad_norm": 771.5133056640625, "learning_rate": 0.00018603742297054624, "loss": 7.426, "step": 145770 }, { "epoch": 17.54271961492178, "grad_norm": 390.690673828125, "learning_rate": 0.00018603548401341914, "loss": 7.3433, "step": 145780 }, { "epoch": 17.543922984356197, "grad_norm": 443.82257080078125, "learning_rate": 0.00018603354493177744, "loss": 7.3345, "step": 145790 }, { "epoch": 17.545126353790614, "grad_norm": 869.7037353515625, "learning_rate": 0.00018603160572562395, "loss": 7.2968, "step": 145800 }, { "epoch": 17.54632972322503, "grad_norm": 541.100341796875, "learning_rate": 0.0001860296663949615, "loss": 7.2972, "step": 145810 }, { "epoch": 17.547533092659446, "grad_norm": 598.3197021484375, "learning_rate": 0.00018602772693979285, "loss": 7.3979, "step": 145820 }, { "epoch": 17.548736462093864, "grad_norm": 358.25927734375, "learning_rate": 0.00018602578736012085, "loss": 7.386, "step": 145830 }, { "epoch": 17.549939831528278, "grad_norm": 543.0917358398438, "learning_rate": 0.0001860238476559483, "loss": 7.2739, "step": 145840 }, { "epoch": 17.551143200962695, "grad_norm": 549.5787353515625, "learning_rate": 0.00018602190782727803, "loss": 7.315, "step": 145850 }, { "epoch": 17.552346570397113, "grad_norm": 262.8681640625, "learning_rate": 0.0001860199678741128, "loss": 7.2264, "step": 145860 }, { "epoch": 17.553549939831527, "grad_norm": 539.4453125, "learning_rate": 0.00018601802779645544, "loss": 7.3807, "step": 145870 }, { "epoch": 17.554753309265944, "grad_norm": 877.7745361328125, "learning_rate": 0.00018601608759430876, "loss": 7.314, "step": 145880 }, { "epoch": 17.555956678700362, "grad_norm": 343.7127990722656, "learning_rate": 0.00018601414726767554, "loss": 7.3161, "step": 145890 }, { "epoch": 17.557160048134776, "grad_norm": 285.62847900390625, "learning_rate": 0.0001860122068165586, "loss": 7.3357, "step": 145900 }, { "epoch": 17.558363417569193, "grad_norm": 420.1763916015625, "learning_rate": 0.00018601026624096085, "loss": 7.3248, "step": 145910 }, { "epoch": 17.55956678700361, "grad_norm": 544.7677001953125, "learning_rate": 0.00018600832554088492, "loss": 7.3562, "step": 145920 }, { "epoch": 17.560770156438025, "grad_norm": 451.8050842285156, "learning_rate": 0.00018600638471633375, "loss": 7.267, "step": 145930 }, { "epoch": 17.561973525872443, "grad_norm": 773.5860595703125, "learning_rate": 0.00018600444376731012, "loss": 7.4023, "step": 145940 }, { "epoch": 17.56317689530686, "grad_norm": 448.1594543457031, "learning_rate": 0.0001860025026938168, "loss": 7.3144, "step": 145950 }, { "epoch": 17.564380264741274, "grad_norm": 313.5295104980469, "learning_rate": 0.00018600056149585664, "loss": 7.3236, "step": 145960 }, { "epoch": 17.56558363417569, "grad_norm": 538.1412353515625, "learning_rate": 0.0001859986201734324, "loss": 7.3641, "step": 145970 }, { "epoch": 17.56678700361011, "grad_norm": 349.06317138671875, "learning_rate": 0.00018599667872654696, "loss": 7.3648, "step": 145980 }, { "epoch": 17.567990373044523, "grad_norm": 716.6544799804688, "learning_rate": 0.0001859947371552031, "loss": 7.356, "step": 145990 }, { "epoch": 17.56919374247894, "grad_norm": 228.37562561035156, "learning_rate": 0.0001859927954594036, "loss": 7.3501, "step": 146000 }, { "epoch": 17.57039711191336, "grad_norm": 1217.5029296875, "learning_rate": 0.0001859908536391513, "loss": 7.3097, "step": 146010 }, { "epoch": 17.571600481347772, "grad_norm": 986.5830078125, "learning_rate": 0.00018598891169444902, "loss": 7.307, "step": 146020 }, { "epoch": 17.57280385078219, "grad_norm": 187.19424438476562, "learning_rate": 0.0001859869696252995, "loss": 7.3093, "step": 146030 }, { "epoch": 17.574007220216608, "grad_norm": 1073.8994140625, "learning_rate": 0.00018598502743170567, "loss": 7.2891, "step": 146040 }, { "epoch": 17.57521058965102, "grad_norm": 403.73626708984375, "learning_rate": 0.00018598308511367026, "loss": 7.2462, "step": 146050 }, { "epoch": 17.57641395908544, "grad_norm": 4176.06298828125, "learning_rate": 0.00018598114267119609, "loss": 7.3187, "step": 146060 }, { "epoch": 17.577617328519857, "grad_norm": 377.9479064941406, "learning_rate": 0.00018597920010428597, "loss": 7.2604, "step": 146070 }, { "epoch": 17.57882069795427, "grad_norm": 1296.099365234375, "learning_rate": 0.00018597725741294273, "loss": 7.2657, "step": 146080 }, { "epoch": 17.58002406738869, "grad_norm": 1034.092529296875, "learning_rate": 0.00018597531459716918, "loss": 7.3045, "step": 146090 }, { "epoch": 17.581227436823106, "grad_norm": 636.8875122070312, "learning_rate": 0.0001859733716569681, "loss": 7.3015, "step": 146100 }, { "epoch": 17.58243080625752, "grad_norm": 305.8636474609375, "learning_rate": 0.00018597142859234233, "loss": 7.2248, "step": 146110 }, { "epoch": 17.583634175691937, "grad_norm": 666.8740234375, "learning_rate": 0.00018596948540329467, "loss": 7.3088, "step": 146120 }, { "epoch": 17.584837545126355, "grad_norm": 248.62120056152344, "learning_rate": 0.00018596754208982796, "loss": 7.2785, "step": 146130 }, { "epoch": 17.58604091456077, "grad_norm": 5054.96044921875, "learning_rate": 0.000185965598651945, "loss": 7.1947, "step": 146140 }, { "epoch": 17.587244283995187, "grad_norm": 366.7696228027344, "learning_rate": 0.0001859636550896486, "loss": 7.2768, "step": 146150 }, { "epoch": 17.588447653429604, "grad_norm": 322.9616394042969, "learning_rate": 0.00018596171140294155, "loss": 7.1709, "step": 146160 }, { "epoch": 17.589651022864018, "grad_norm": 1128.99169921875, "learning_rate": 0.00018595976759182666, "loss": 7.2795, "step": 146170 }, { "epoch": 17.590854392298436, "grad_norm": 435.0795593261719, "learning_rate": 0.00018595782365630678, "loss": 7.246, "step": 146180 }, { "epoch": 17.592057761732853, "grad_norm": 2431923.25, "learning_rate": 0.0001859558795963847, "loss": 7.2314, "step": 146190 }, { "epoch": 17.593261131167267, "grad_norm": 4149.2431640625, "learning_rate": 0.00018595393541206325, "loss": 7.3353, "step": 146200 }, { "epoch": 17.594464500601685, "grad_norm": 705.9428100585938, "learning_rate": 0.00018595199110334524, "loss": 7.2372, "step": 146210 }, { "epoch": 17.595667870036102, "grad_norm": 697.0997314453125, "learning_rate": 0.00018595004667023348, "loss": 7.4221, "step": 146220 }, { "epoch": 17.596871239470516, "grad_norm": 11594.4716796875, "learning_rate": 0.00018594810211273078, "loss": 7.2416, "step": 146230 }, { "epoch": 17.598074608904934, "grad_norm": 5237.2587890625, "learning_rate": 0.00018594615743083994, "loss": 7.2553, "step": 146240 }, { "epoch": 17.59927797833935, "grad_norm": 3798.73388671875, "learning_rate": 0.0001859442126245638, "loss": 7.1613, "step": 146250 }, { "epoch": 17.600481347773766, "grad_norm": 3670.008056640625, "learning_rate": 0.00018594226769390516, "loss": 7.3628, "step": 146260 }, { "epoch": 17.601684717208183, "grad_norm": 537.2471313476562, "learning_rate": 0.00018594032263886685, "loss": 7.3301, "step": 146270 }, { "epoch": 17.6028880866426, "grad_norm": 23133.5234375, "learning_rate": 0.00018593837745945168, "loss": 7.2543, "step": 146280 }, { "epoch": 17.604091456077015, "grad_norm": 907.0701904296875, "learning_rate": 0.00018593643215566243, "loss": 7.3081, "step": 146290 }, { "epoch": 17.605294825511432, "grad_norm": 13418.8427734375, "learning_rate": 0.00018593448672750198, "loss": 7.2941, "step": 146300 }, { "epoch": 17.60649819494585, "grad_norm": 16172.681640625, "learning_rate": 0.0001859325411749731, "loss": 7.2694, "step": 146310 }, { "epoch": 17.607701564380264, "grad_norm": 874.1754760742188, "learning_rate": 0.0001859305954980786, "loss": 7.3033, "step": 146320 }, { "epoch": 17.60890493381468, "grad_norm": 3372.183349609375, "learning_rate": 0.00018592864969682133, "loss": 7.2456, "step": 146330 }, { "epoch": 17.6101083032491, "grad_norm": 3202.759521484375, "learning_rate": 0.0001859267037712041, "loss": 7.2307, "step": 146340 }, { "epoch": 17.611311672683513, "grad_norm": 1959.9033203125, "learning_rate": 0.0001859247577212297, "loss": 7.3286, "step": 146350 }, { "epoch": 17.61251504211793, "grad_norm": 6251.66015625, "learning_rate": 0.00018592281154690096, "loss": 7.2637, "step": 146360 }, { "epoch": 17.613718411552348, "grad_norm": 2906.01513671875, "learning_rate": 0.0001859208652482207, "loss": 7.2898, "step": 146370 }, { "epoch": 17.614921780986762, "grad_norm": 6713.6220703125, "learning_rate": 0.00018591891882519174, "loss": 7.2699, "step": 146380 }, { "epoch": 17.61612515042118, "grad_norm": 2991.421142578125, "learning_rate": 0.0001859169722778169, "loss": 7.2957, "step": 146390 }, { "epoch": 17.617328519855597, "grad_norm": 13018.111328125, "learning_rate": 0.00018591502560609896, "loss": 7.3497, "step": 146400 }, { "epoch": 17.61853188929001, "grad_norm": 184216.515625, "learning_rate": 0.00018591307881004078, "loss": 7.4097, "step": 146410 }, { "epoch": 17.61973525872443, "grad_norm": 265990.875, "learning_rate": 0.00018591113188964518, "loss": 7.2602, "step": 146420 }, { "epoch": 17.620938628158846, "grad_norm": 110093.6328125, "learning_rate": 0.00018590918484491495, "loss": 7.3023, "step": 146430 }, { "epoch": 17.62214199759326, "grad_norm": 35025.1484375, "learning_rate": 0.00018590723767585292, "loss": 7.336, "step": 146440 }, { "epoch": 17.623345367027678, "grad_norm": 3899.88623046875, "learning_rate": 0.00018590529038246188, "loss": 7.3167, "step": 146450 }, { "epoch": 17.624548736462096, "grad_norm": 10622.6640625, "learning_rate": 0.00018590334296474472, "loss": 7.3151, "step": 146460 }, { "epoch": 17.62575210589651, "grad_norm": 16361.875, "learning_rate": 0.0001859013954227042, "loss": 7.345, "step": 146470 }, { "epoch": 17.626955475330927, "grad_norm": 7798.11865234375, "learning_rate": 0.00018589944775634313, "loss": 7.3355, "step": 146480 }, { "epoch": 17.628158844765345, "grad_norm": 11577.7197265625, "learning_rate": 0.00018589749996566438, "loss": 7.2456, "step": 146490 }, { "epoch": 17.62936221419976, "grad_norm": 4457.5791015625, "learning_rate": 0.00018589555205067072, "loss": 7.3605, "step": 146500 }, { "epoch": 17.630565583634176, "grad_norm": 879.6420288085938, "learning_rate": 0.000185893604011365, "loss": 7.2893, "step": 146510 }, { "epoch": 17.63176895306859, "grad_norm": 4148.8310546875, "learning_rate": 0.00018589165584775, "loss": 7.2695, "step": 146520 }, { "epoch": 17.632972322503008, "grad_norm": 3638.1328125, "learning_rate": 0.0001858897075598286, "loss": 7.3228, "step": 146530 }, { "epoch": 17.634175691937426, "grad_norm": 48776.03125, "learning_rate": 0.0001858877591476036, "loss": 7.3248, "step": 146540 }, { "epoch": 17.63537906137184, "grad_norm": 19751.966796875, "learning_rate": 0.00018588581061107777, "loss": 7.2489, "step": 146550 }, { "epoch": 17.636582430806257, "grad_norm": 19402.21484375, "learning_rate": 0.000185883861950254, "loss": 7.2607, "step": 146560 }, { "epoch": 17.637785800240675, "grad_norm": 1876.0487060546875, "learning_rate": 0.00018588191316513508, "loss": 7.2645, "step": 146570 }, { "epoch": 17.63898916967509, "grad_norm": 2103.383544921875, "learning_rate": 0.0001858799642557238, "loss": 7.2497, "step": 146580 }, { "epoch": 17.640192539109506, "grad_norm": 1838.4342041015625, "learning_rate": 0.00018587801522202302, "loss": 7.3714, "step": 146590 }, { "epoch": 17.641395908543924, "grad_norm": 969.3784790039062, "learning_rate": 0.00018587606606403556, "loss": 7.3377, "step": 146600 }, { "epoch": 17.642599277978338, "grad_norm": 2345024.0, "learning_rate": 0.0001858741167817642, "loss": 7.2874, "step": 146610 }, { "epoch": 17.643802647412755, "grad_norm": 1383.1986083984375, "learning_rate": 0.00018587216737521182, "loss": 7.2152, "step": 146620 }, { "epoch": 17.645006016847173, "grad_norm": 104735.765625, "learning_rate": 0.00018587021784438119, "loss": 7.294, "step": 146630 }, { "epoch": 17.646209386281587, "grad_norm": 8677.5244140625, "learning_rate": 0.00018586826818927518, "loss": 7.3194, "step": 146640 }, { "epoch": 17.647412755716005, "grad_norm": 2759.0703125, "learning_rate": 0.0001858663184098966, "loss": 7.2547, "step": 146650 }, { "epoch": 17.648616125150422, "grad_norm": 6347.619140625, "learning_rate": 0.00018586436850624822, "loss": 7.2436, "step": 146660 }, { "epoch": 17.649819494584836, "grad_norm": 958.448974609375, "learning_rate": 0.0001858624184783329, "loss": 7.2641, "step": 146670 }, { "epoch": 17.651022864019254, "grad_norm": 4275.88330078125, "learning_rate": 0.00018586046832615348, "loss": 7.3091, "step": 146680 }, { "epoch": 17.65222623345367, "grad_norm": 76840.453125, "learning_rate": 0.00018585851804971278, "loss": 7.3178, "step": 146690 }, { "epoch": 17.653429602888085, "grad_norm": 19024.9375, "learning_rate": 0.00018585656764901355, "loss": 7.3103, "step": 146700 }, { "epoch": 17.654632972322503, "grad_norm": 2096.9912109375, "learning_rate": 0.00018585461712405872, "loss": 7.2463, "step": 146710 }, { "epoch": 17.65583634175692, "grad_norm": 2235.46044921875, "learning_rate": 0.00018585266647485105, "loss": 7.3411, "step": 146720 }, { "epoch": 17.657039711191334, "grad_norm": 4100.783203125, "learning_rate": 0.00018585071570139335, "loss": 7.3523, "step": 146730 }, { "epoch": 17.658243080625752, "grad_norm": 570.4086303710938, "learning_rate": 0.0001858487648036885, "loss": 7.2843, "step": 146740 }, { "epoch": 17.65944645006017, "grad_norm": 755.4594116210938, "learning_rate": 0.0001858468137817393, "loss": 7.2183, "step": 146750 }, { "epoch": 17.660649819494584, "grad_norm": 2245.36328125, "learning_rate": 0.00018584486263554853, "loss": 7.3899, "step": 146760 }, { "epoch": 17.661853188929, "grad_norm": 46659.6796875, "learning_rate": 0.00018584291136511903, "loss": 7.3418, "step": 146770 }, { "epoch": 17.66305655836342, "grad_norm": 1615.165283203125, "learning_rate": 0.0001858409599704537, "loss": 7.143, "step": 146780 }, { "epoch": 17.664259927797833, "grad_norm": 1254.905517578125, "learning_rate": 0.00018583900845155527, "loss": 7.2075, "step": 146790 }, { "epoch": 17.66546329723225, "grad_norm": 849.8490600585938, "learning_rate": 0.00018583705680842665, "loss": 7.3228, "step": 146800 }, { "epoch": 17.666666666666668, "grad_norm": 367898.90625, "learning_rate": 0.00018583510504107057, "loss": 7.2608, "step": 146810 }, { "epoch": 17.667870036101082, "grad_norm": 1528.6759033203125, "learning_rate": 0.00018583315314948994, "loss": 7.3309, "step": 146820 }, { "epoch": 17.6690734055355, "grad_norm": 645.6251831054688, "learning_rate": 0.0001858312011336875, "loss": 7.2909, "step": 146830 }, { "epoch": 17.670276774969917, "grad_norm": 1087.1163330078125, "learning_rate": 0.00018582924899366617, "loss": 7.2677, "step": 146840 }, { "epoch": 17.67148014440433, "grad_norm": 606.63818359375, "learning_rate": 0.0001858272967294287, "loss": 7.3886, "step": 146850 }, { "epoch": 17.67268351383875, "grad_norm": 806.1345825195312, "learning_rate": 0.0001858253443409779, "loss": 7.2328, "step": 146860 }, { "epoch": 17.673886883273166, "grad_norm": 312233.5625, "learning_rate": 0.0001858233918283167, "loss": 7.2273, "step": 146870 }, { "epoch": 17.67509025270758, "grad_norm": 2824082.75, "learning_rate": 0.00018582143919144783, "loss": 7.3743, "step": 146880 }, { "epoch": 17.676293622141998, "grad_norm": 92454.453125, "learning_rate": 0.00018581948643037416, "loss": 7.3626, "step": 146890 }, { "epoch": 17.677496991576415, "grad_norm": 1404.9002685546875, "learning_rate": 0.0001858175335450985, "loss": 7.2826, "step": 146900 }, { "epoch": 17.67870036101083, "grad_norm": 2343.916259765625, "learning_rate": 0.00018581558053562368, "loss": 7.2826, "step": 146910 }, { "epoch": 17.679903730445247, "grad_norm": 828.0574340820312, "learning_rate": 0.00018581362740195253, "loss": 7.2656, "step": 146920 }, { "epoch": 17.681107099879664, "grad_norm": 1012.4451293945312, "learning_rate": 0.00018581167414408788, "loss": 7.3018, "step": 146930 }, { "epoch": 17.68231046931408, "grad_norm": 3416.289306640625, "learning_rate": 0.00018580972076203257, "loss": 7.3325, "step": 146940 }, { "epoch": 17.683513838748496, "grad_norm": 762.9553833007812, "learning_rate": 0.00018580776725578938, "loss": 7.2268, "step": 146950 }, { "epoch": 17.684717208182914, "grad_norm": 700855.4375, "learning_rate": 0.0001858058136253612, "loss": 7.1951, "step": 146960 }, { "epoch": 17.685920577617328, "grad_norm": 890.2206420898438, "learning_rate": 0.00018580385987075078, "loss": 7.3135, "step": 146970 }, { "epoch": 17.687123947051745, "grad_norm": 894.7973022460938, "learning_rate": 0.00018580190599196098, "loss": 7.306, "step": 146980 }, { "epoch": 17.688327316486163, "grad_norm": 2258.790283203125, "learning_rate": 0.00018579995198899466, "loss": 7.2905, "step": 146990 }, { "epoch": 17.689530685920577, "grad_norm": 2671.80126953125, "learning_rate": 0.00018579799786185467, "loss": 7.211, "step": 147000 }, { "epoch": 17.690734055354994, "grad_norm": 574.3573608398438, "learning_rate": 0.00018579604361054376, "loss": 7.2214, "step": 147010 }, { "epoch": 17.691937424789412, "grad_norm": 2397.772705078125, "learning_rate": 0.0001857940892350648, "loss": 7.2681, "step": 147020 }, { "epoch": 17.693140794223826, "grad_norm": 44892.40234375, "learning_rate": 0.00018579213473542057, "loss": 7.216, "step": 147030 }, { "epoch": 17.694344163658243, "grad_norm": 3331.72216796875, "learning_rate": 0.00018579018011161399, "loss": 7.3013, "step": 147040 }, { "epoch": 17.69554753309266, "grad_norm": 6074.6884765625, "learning_rate": 0.0001857882253636478, "loss": 7.3451, "step": 147050 }, { "epoch": 17.696750902527075, "grad_norm": 404.1683349609375, "learning_rate": 0.0001857862704915249, "loss": 7.3689, "step": 147060 }, { "epoch": 17.697954271961493, "grad_norm": 705.8678588867188, "learning_rate": 0.00018578431549524807, "loss": 7.282, "step": 147070 }, { "epoch": 17.69915764139591, "grad_norm": 1980.671142578125, "learning_rate": 0.00018578236037482016, "loss": 7.3181, "step": 147080 }, { "epoch": 17.700361010830324, "grad_norm": 6872.78466796875, "learning_rate": 0.00018578040513024394, "loss": 7.2432, "step": 147090 }, { "epoch": 17.70156438026474, "grad_norm": 927.7977294921875, "learning_rate": 0.0001857784497615224, "loss": 7.2748, "step": 147100 }, { "epoch": 17.70276774969916, "grad_norm": 654.0418090820312, "learning_rate": 0.00018577649426865817, "loss": 7.1842, "step": 147110 }, { "epoch": 17.703971119133573, "grad_norm": 718.8326416015625, "learning_rate": 0.00018577453865165425, "loss": 7.3581, "step": 147120 }, { "epoch": 17.70517448856799, "grad_norm": 2901.184326171875, "learning_rate": 0.00018577258291051332, "loss": 7.2567, "step": 147130 }, { "epoch": 17.706377858002405, "grad_norm": 4967.12158203125, "learning_rate": 0.00018577062704523836, "loss": 7.27, "step": 147140 }, { "epoch": 17.707581227436823, "grad_norm": 547.86865234375, "learning_rate": 0.00018576867105583208, "loss": 7.1951, "step": 147150 }, { "epoch": 17.70878459687124, "grad_norm": 3806.975830078125, "learning_rate": 0.0001857667149422974, "loss": 7.2073, "step": 147160 }, { "epoch": 17.709987966305654, "grad_norm": 254.1750030517578, "learning_rate": 0.00018576475870463709, "loss": 7.1362, "step": 147170 }, { "epoch": 17.71119133574007, "grad_norm": 352.9841613769531, "learning_rate": 0.00018576280234285398, "loss": 7.3163, "step": 147180 }, { "epoch": 17.71239470517449, "grad_norm": 1729.2803955078125, "learning_rate": 0.00018576084585695093, "loss": 7.2933, "step": 147190 }, { "epoch": 17.713598074608903, "grad_norm": 986.765625, "learning_rate": 0.00018575888924693076, "loss": 7.2552, "step": 147200 }, { "epoch": 17.71480144404332, "grad_norm": 13549.70703125, "learning_rate": 0.0001857569325127963, "loss": 7.2425, "step": 147210 }, { "epoch": 17.71600481347774, "grad_norm": 488.2624816894531, "learning_rate": 0.0001857549756545504, "loss": 7.2478, "step": 147220 }, { "epoch": 17.717208182912152, "grad_norm": 1665.0059814453125, "learning_rate": 0.00018575301867219588, "loss": 7.2689, "step": 147230 }, { "epoch": 17.71841155234657, "grad_norm": 35308.27734375, "learning_rate": 0.00018575106156573556, "loss": 7.3196, "step": 147240 }, { "epoch": 17.719614921780988, "grad_norm": 230.5156707763672, "learning_rate": 0.00018574910433517228, "loss": 7.2919, "step": 147250 }, { "epoch": 17.7208182912154, "grad_norm": 156.07965087890625, "learning_rate": 0.00018574714698050887, "loss": 7.4263, "step": 147260 }, { "epoch": 17.72202166064982, "grad_norm": 11104.1474609375, "learning_rate": 0.0001857451895017482, "loss": 7.294, "step": 147270 }, { "epoch": 17.723225030084237, "grad_norm": 493.3023986816406, "learning_rate": 0.00018574323189889303, "loss": 7.2137, "step": 147280 }, { "epoch": 17.72442839951865, "grad_norm": 722.85986328125, "learning_rate": 0.00018574127417194628, "loss": 7.2701, "step": 147290 }, { "epoch": 17.72563176895307, "grad_norm": 236.43310546875, "learning_rate": 0.0001857393163209107, "loss": 7.2207, "step": 147300 }, { "epoch": 17.726835138387486, "grad_norm": 3539.54931640625, "learning_rate": 0.00018573735834578919, "loss": 7.2915, "step": 147310 }, { "epoch": 17.7280385078219, "grad_norm": 3622.7412109375, "learning_rate": 0.00018573540024658454, "loss": 7.2717, "step": 147320 }, { "epoch": 17.729241877256317, "grad_norm": 3338.32080078125, "learning_rate": 0.00018573344202329957, "loss": 7.2582, "step": 147330 }, { "epoch": 17.730445246690735, "grad_norm": 68588.2421875, "learning_rate": 0.00018573148367593718, "loss": 7.2161, "step": 147340 }, { "epoch": 17.73164861612515, "grad_norm": 18673.337890625, "learning_rate": 0.00018572952520450015, "loss": 7.2857, "step": 147350 }, { "epoch": 17.732851985559567, "grad_norm": 18418.140625, "learning_rate": 0.00018572756660899137, "loss": 7.2765, "step": 147360 }, { "epoch": 17.734055354993984, "grad_norm": 47369.05859375, "learning_rate": 0.00018572560788941359, "loss": 7.2487, "step": 147370 }, { "epoch": 17.735258724428398, "grad_norm": 7611.26123046875, "learning_rate": 0.00018572364904576965, "loss": 7.2968, "step": 147380 }, { "epoch": 17.736462093862816, "grad_norm": 544.062255859375, "learning_rate": 0.0001857216900780625, "loss": 7.3254, "step": 147390 }, { "epoch": 17.737665463297233, "grad_norm": 365.38739013671875, "learning_rate": 0.0001857197309862949, "loss": 7.1947, "step": 147400 }, { "epoch": 17.738868832731647, "grad_norm": 1484.4580078125, "learning_rate": 0.00018571777177046965, "loss": 7.2881, "step": 147410 }, { "epoch": 17.740072202166065, "grad_norm": 45483.5859375, "learning_rate": 0.00018571581243058963, "loss": 7.423, "step": 147420 }, { "epoch": 17.741275571600482, "grad_norm": 836.2890014648438, "learning_rate": 0.00018571385296665768, "loss": 7.2216, "step": 147430 }, { "epoch": 17.742478941034896, "grad_norm": 1123.537841796875, "learning_rate": 0.00018571189337867662, "loss": 7.2905, "step": 147440 }, { "epoch": 17.743682310469314, "grad_norm": 200.18394470214844, "learning_rate": 0.00018570993366664928, "loss": 7.2276, "step": 147450 }, { "epoch": 17.74488567990373, "grad_norm": 69.5604019165039, "learning_rate": 0.0001857079738305785, "loss": 7.551, "step": 147460 }, { "epoch": 17.746089049338146, "grad_norm": 1939.722900390625, "learning_rate": 0.0001857060138704671, "loss": 7.4713, "step": 147470 }, { "epoch": 17.747292418772563, "grad_norm": 462.474853515625, "learning_rate": 0.00018570405378631798, "loss": 7.4653, "step": 147480 }, { "epoch": 17.74849578820698, "grad_norm": 766.5291748046875, "learning_rate": 0.00018570209357813392, "loss": 7.3912, "step": 147490 }, { "epoch": 17.749699157641395, "grad_norm": 414.2393798828125, "learning_rate": 0.00018570013324591779, "loss": 7.2489, "step": 147500 }, { "epoch": 17.750902527075812, "grad_norm": 356.94097900390625, "learning_rate": 0.0001856981727896724, "loss": 7.3119, "step": 147510 }, { "epoch": 17.75210589651023, "grad_norm": 314.382080078125, "learning_rate": 0.00018569621220940058, "loss": 7.2416, "step": 147520 }, { "epoch": 17.753309265944644, "grad_norm": 644.947021484375, "learning_rate": 0.00018569425150510518, "loss": 7.3448, "step": 147530 }, { "epoch": 17.75451263537906, "grad_norm": 244.98236083984375, "learning_rate": 0.00018569229067678906, "loss": 7.3027, "step": 147540 }, { "epoch": 17.75571600481348, "grad_norm": 2374.938720703125, "learning_rate": 0.00018569032972445503, "loss": 7.2604, "step": 147550 }, { "epoch": 17.756919374247893, "grad_norm": 39953.62890625, "learning_rate": 0.00018568836864810595, "loss": 7.2572, "step": 147560 }, { "epoch": 17.75812274368231, "grad_norm": 963.0919799804688, "learning_rate": 0.0001856864074477446, "loss": 7.3877, "step": 147570 }, { "epoch": 17.759326113116728, "grad_norm": 1925.2265625, "learning_rate": 0.0001856844461233739, "loss": 7.1947, "step": 147580 }, { "epoch": 17.760529482551142, "grad_norm": 3750.310546875, "learning_rate": 0.00018568248467499667, "loss": 7.2778, "step": 147590 }, { "epoch": 17.76173285198556, "grad_norm": 1256.3724365234375, "learning_rate": 0.0001856805231026157, "loss": 7.2844, "step": 147600 }, { "epoch": 17.762936221419977, "grad_norm": 2453.166015625, "learning_rate": 0.00018567856140623386, "loss": 7.3343, "step": 147610 }, { "epoch": 17.76413959085439, "grad_norm": 961.9985961914062, "learning_rate": 0.000185676599585854, "loss": 7.1797, "step": 147620 }, { "epoch": 17.76534296028881, "grad_norm": 810.9354858398438, "learning_rate": 0.00018567463764147894, "loss": 7.271, "step": 147630 }, { "epoch": 17.766546329723226, "grad_norm": 3024.932861328125, "learning_rate": 0.0001856726755731115, "loss": 7.2779, "step": 147640 }, { "epoch": 17.76774969915764, "grad_norm": 2884.401611328125, "learning_rate": 0.0001856707133807546, "loss": 7.2561, "step": 147650 }, { "epoch": 17.768953068592058, "grad_norm": 213.5396270751953, "learning_rate": 0.000185668751064411, "loss": 7.2765, "step": 147660 }, { "epoch": 17.770156438026476, "grad_norm": 26260.826171875, "learning_rate": 0.00018566678862408357, "loss": 7.3276, "step": 147670 }, { "epoch": 17.77135980746089, "grad_norm": 800.679931640625, "learning_rate": 0.00018566482605977512, "loss": 7.1884, "step": 147680 }, { "epoch": 17.772563176895307, "grad_norm": 784.98583984375, "learning_rate": 0.00018566286337148857, "loss": 7.2952, "step": 147690 }, { "epoch": 17.773766546329725, "grad_norm": 631.4320068359375, "learning_rate": 0.00018566090055922667, "loss": 7.2861, "step": 147700 }, { "epoch": 17.77496991576414, "grad_norm": 4246.47021484375, "learning_rate": 0.0001856589376229923, "loss": 7.2888, "step": 147710 }, { "epoch": 17.776173285198556, "grad_norm": 1353.4666748046875, "learning_rate": 0.0001856569745627883, "loss": 7.3849, "step": 147720 }, { "epoch": 17.777376654632974, "grad_norm": 599.0125732421875, "learning_rate": 0.00018565501137861751, "loss": 7.2951, "step": 147730 }, { "epoch": 17.778580024067388, "grad_norm": 856.3153076171875, "learning_rate": 0.0001856530480704828, "loss": 7.4197, "step": 147740 }, { "epoch": 17.779783393501805, "grad_norm": 337.82281494140625, "learning_rate": 0.00018565108463838693, "loss": 7.3237, "step": 147750 }, { "epoch": 17.780986762936223, "grad_norm": 1585.96533203125, "learning_rate": 0.00018564912108233282, "loss": 7.3863, "step": 147760 }, { "epoch": 17.782190132370637, "grad_norm": 6684.890625, "learning_rate": 0.0001856471574023233, "loss": 7.3317, "step": 147770 }, { "epoch": 17.783393501805055, "grad_norm": 795.8919067382812, "learning_rate": 0.00018564519359836118, "loss": 7.2081, "step": 147780 }, { "epoch": 17.784596871239472, "grad_norm": 411.29083251953125, "learning_rate": 0.00018564322967044934, "loss": 7.2962, "step": 147790 }, { "epoch": 17.785800240673886, "grad_norm": 1413.302978515625, "learning_rate": 0.00018564126561859058, "loss": 7.3072, "step": 147800 }, { "epoch": 17.787003610108304, "grad_norm": 2164.65185546875, "learning_rate": 0.00018563930144278778, "loss": 7.3282, "step": 147810 }, { "epoch": 17.78820697954272, "grad_norm": 408.6491394042969, "learning_rate": 0.00018563733714304376, "loss": 7.3427, "step": 147820 }, { "epoch": 17.789410348977135, "grad_norm": 632.2469482421875, "learning_rate": 0.00018563537271936136, "loss": 7.2408, "step": 147830 }, { "epoch": 17.790613718411553, "grad_norm": 320.8076477050781, "learning_rate": 0.00018563340817174344, "loss": 7.3619, "step": 147840 }, { "epoch": 17.79181708784597, "grad_norm": 1550.425537109375, "learning_rate": 0.00018563144350019283, "loss": 7.3577, "step": 147850 }, { "epoch": 17.793020457280385, "grad_norm": 145.00375366210938, "learning_rate": 0.0001856294787047124, "loss": 7.283, "step": 147860 }, { "epoch": 17.794223826714802, "grad_norm": 266.2540588378906, "learning_rate": 0.00018562751378530497, "loss": 7.4418, "step": 147870 }, { "epoch": 17.79542719614922, "grad_norm": 363.5646667480469, "learning_rate": 0.00018562554874197335, "loss": 7.3938, "step": 147880 }, { "epoch": 17.796630565583634, "grad_norm": 182.42591857910156, "learning_rate": 0.00018562358357472047, "loss": 7.4441, "step": 147890 }, { "epoch": 17.79783393501805, "grad_norm": 307.95489501953125, "learning_rate": 0.0001856216182835491, "loss": 7.458, "step": 147900 }, { "epoch": 17.799037304452465, "grad_norm": 797.8807983398438, "learning_rate": 0.0001856196528684621, "loss": 7.3688, "step": 147910 }, { "epoch": 17.800240673886883, "grad_norm": 314.898681640625, "learning_rate": 0.00018561768732946234, "loss": 7.4573, "step": 147920 }, { "epoch": 17.8014440433213, "grad_norm": 267.087158203125, "learning_rate": 0.00018561572166655267, "loss": 7.433, "step": 147930 }, { "epoch": 17.802647412755714, "grad_norm": 1739.5926513671875, "learning_rate": 0.00018561375587973586, "loss": 7.407, "step": 147940 }, { "epoch": 17.803850782190132, "grad_norm": 3493.8916015625, "learning_rate": 0.00018561178996901484, "loss": 7.3938, "step": 147950 }, { "epoch": 17.80505415162455, "grad_norm": 384.19293212890625, "learning_rate": 0.0001856098239343924, "loss": 7.382, "step": 147960 }, { "epoch": 17.806257521058964, "grad_norm": 3504.621337890625, "learning_rate": 0.00018560785777587145, "loss": 7.455, "step": 147970 }, { "epoch": 17.80746089049338, "grad_norm": 631.4364624023438, "learning_rate": 0.00018560589149345475, "loss": 7.4463, "step": 147980 }, { "epoch": 17.8086642599278, "grad_norm": 334.4178771972656, "learning_rate": 0.0001856039250871452, "loss": 7.3855, "step": 147990 }, { "epoch": 17.809867629362213, "grad_norm": 1590.579345703125, "learning_rate": 0.00018560195855694565, "loss": 7.4281, "step": 148000 }, { "epoch": 17.81107099879663, "grad_norm": 219.71746826171875, "learning_rate": 0.00018559999190285894, "loss": 7.4253, "step": 148010 }, { "epoch": 17.812274368231048, "grad_norm": 156.32762145996094, "learning_rate": 0.00018559802512488788, "loss": 7.4864, "step": 148020 }, { "epoch": 17.813477737665462, "grad_norm": 744.1489868164062, "learning_rate": 0.0001855960582230354, "loss": 7.3859, "step": 148030 }, { "epoch": 17.81468110709988, "grad_norm": 266.0203552246094, "learning_rate": 0.0001855940911973042, "loss": 7.4715, "step": 148040 }, { "epoch": 17.815884476534297, "grad_norm": 357.6445617675781, "learning_rate": 0.0001855921240476973, "loss": 7.4881, "step": 148050 }, { "epoch": 17.81708784596871, "grad_norm": 134.54136657714844, "learning_rate": 0.0001855901567742174, "loss": 7.5326, "step": 148060 }, { "epoch": 17.81829121540313, "grad_norm": 476.8880920410156, "learning_rate": 0.00018558818937686744, "loss": 7.6585, "step": 148070 }, { "epoch": 17.819494584837546, "grad_norm": 976.9490356445312, "learning_rate": 0.00018558622185565023, "loss": 7.5011, "step": 148080 }, { "epoch": 17.82069795427196, "grad_norm": 250.97000122070312, "learning_rate": 0.00018558425421056867, "loss": 7.4828, "step": 148090 }, { "epoch": 17.821901323706378, "grad_norm": 202.69407653808594, "learning_rate": 0.00018558228644162552, "loss": 7.5045, "step": 148100 }, { "epoch": 17.823104693140795, "grad_norm": 265.7468566894531, "learning_rate": 0.00018558031854882368, "loss": 7.47, "step": 148110 }, { "epoch": 17.82430806257521, "grad_norm": 211.81085205078125, "learning_rate": 0.000185578350532166, "loss": 7.561, "step": 148120 }, { "epoch": 17.825511432009627, "grad_norm": 53.499542236328125, "learning_rate": 0.0001855763823916553, "loss": 7.5194, "step": 148130 }, { "epoch": 17.826714801444044, "grad_norm": 73.63157653808594, "learning_rate": 0.00018557441412729445, "loss": 7.5213, "step": 148140 }, { "epoch": 17.82791817087846, "grad_norm": 136.89990234375, "learning_rate": 0.00018557244573908633, "loss": 7.4758, "step": 148150 }, { "epoch": 17.829121540312876, "grad_norm": 66.81103515625, "learning_rate": 0.00018557047722703374, "loss": 7.4555, "step": 148160 }, { "epoch": 17.830324909747294, "grad_norm": 237.77450561523438, "learning_rate": 0.00018556850859113952, "loss": 7.5492, "step": 148170 }, { "epoch": 17.831528279181708, "grad_norm": 530.3276977539062, "learning_rate": 0.00018556653983140654, "loss": 7.4046, "step": 148180 }, { "epoch": 17.832731648616125, "grad_norm": 108.76467895507812, "learning_rate": 0.00018556457094783766, "loss": 7.4829, "step": 148190 }, { "epoch": 17.833935018050543, "grad_norm": 234.10568237304688, "learning_rate": 0.00018556260194043574, "loss": 7.4189, "step": 148200 }, { "epoch": 17.835138387484957, "grad_norm": 62.14372253417969, "learning_rate": 0.0001855606328092036, "loss": 7.5113, "step": 148210 }, { "epoch": 17.836341756919374, "grad_norm": 113.21151733398438, "learning_rate": 0.0001855586635541441, "loss": 7.4875, "step": 148220 }, { "epoch": 17.837545126353792, "grad_norm": 221.73915100097656, "learning_rate": 0.00018555669417526008, "loss": 7.3501, "step": 148230 }, { "epoch": 17.838748495788206, "grad_norm": 52.65148162841797, "learning_rate": 0.0001855547246725544, "loss": 7.4315, "step": 148240 }, { "epoch": 17.839951865222623, "grad_norm": 52.15606689453125, "learning_rate": 0.00018555275504602992, "loss": 7.3908, "step": 148250 }, { "epoch": 17.84115523465704, "grad_norm": 476.1154479980469, "learning_rate": 0.00018555078529568946, "loss": 7.4344, "step": 148260 }, { "epoch": 17.842358604091455, "grad_norm": 309.08978271484375, "learning_rate": 0.0001855488154215359, "loss": 7.3377, "step": 148270 }, { "epoch": 17.843561973525873, "grad_norm": 74.35446166992188, "learning_rate": 0.0001855468454235721, "loss": 7.4406, "step": 148280 }, { "epoch": 17.84476534296029, "grad_norm": 119.46814727783203, "learning_rate": 0.0001855448753018009, "loss": 7.4396, "step": 148290 }, { "epoch": 17.845968712394704, "grad_norm": 101.33686828613281, "learning_rate": 0.0001855429050562251, "loss": 7.4416, "step": 148300 }, { "epoch": 17.84717208182912, "grad_norm": 126.79830169677734, "learning_rate": 0.00018554093468684766, "loss": 7.5306, "step": 148310 }, { "epoch": 17.84837545126354, "grad_norm": 118.94538879394531, "learning_rate": 0.00018553896419367131, "loss": 7.4806, "step": 148320 }, { "epoch": 17.849578820697953, "grad_norm": 185.35955810546875, "learning_rate": 0.00018553699357669898, "loss": 7.3903, "step": 148330 }, { "epoch": 17.85078219013237, "grad_norm": 46.131656646728516, "learning_rate": 0.00018553502283593352, "loss": 7.467, "step": 148340 }, { "epoch": 17.85198555956679, "grad_norm": 51.31923294067383, "learning_rate": 0.00018553305197137774, "loss": 7.4876, "step": 148350 }, { "epoch": 17.853188929001202, "grad_norm": 47.45521926879883, "learning_rate": 0.00018553108098303452, "loss": 7.5148, "step": 148360 }, { "epoch": 17.85439229843562, "grad_norm": 86.61430358886719, "learning_rate": 0.00018552910987090676, "loss": 7.4264, "step": 148370 }, { "epoch": 17.855595667870038, "grad_norm": 339.5720520019531, "learning_rate": 0.00018552713863499717, "loss": 7.4492, "step": 148380 }, { "epoch": 17.85679903730445, "grad_norm": 159.4058837890625, "learning_rate": 0.00018552516727530875, "loss": 7.4231, "step": 148390 }, { "epoch": 17.85800240673887, "grad_norm": 103.97511291503906, "learning_rate": 0.0001855231957918443, "loss": 7.3985, "step": 148400 }, { "epoch": 17.859205776173287, "grad_norm": 77.51788330078125, "learning_rate": 0.00018552122418460666, "loss": 7.402, "step": 148410 }, { "epoch": 17.8604091456077, "grad_norm": 75.72032165527344, "learning_rate": 0.0001855192524535987, "loss": 7.3733, "step": 148420 }, { "epoch": 17.86161251504212, "grad_norm": 45.09842300415039, "learning_rate": 0.00018551728059882326, "loss": 7.42, "step": 148430 }, { "epoch": 17.862815884476536, "grad_norm": 49.23476791381836, "learning_rate": 0.00018551530862028322, "loss": 7.4076, "step": 148440 }, { "epoch": 17.86401925391095, "grad_norm": 76.97235870361328, "learning_rate": 0.00018551333651798137, "loss": 7.4252, "step": 148450 }, { "epoch": 17.865222623345367, "grad_norm": 53.90160369873047, "learning_rate": 0.00018551136429192065, "loss": 7.4592, "step": 148460 }, { "epoch": 17.866425992779785, "grad_norm": 233.33580017089844, "learning_rate": 0.00018550939194210387, "loss": 7.4445, "step": 148470 }, { "epoch": 17.8676293622142, "grad_norm": 161.20196533203125, "learning_rate": 0.00018550741946853387, "loss": 7.4449, "step": 148480 }, { "epoch": 17.868832731648617, "grad_norm": 53.023536682128906, "learning_rate": 0.00018550544687121354, "loss": 7.4052, "step": 148490 }, { "epoch": 17.870036101083034, "grad_norm": 174.26846313476562, "learning_rate": 0.00018550347415014572, "loss": 7.4383, "step": 148500 }, { "epoch": 17.871239470517448, "grad_norm": 148.2725372314453, "learning_rate": 0.0001855015013053332, "loss": 7.4779, "step": 148510 }, { "epoch": 17.872442839951866, "grad_norm": 237.47178649902344, "learning_rate": 0.000185499528336779, "loss": 7.4529, "step": 148520 }, { "epoch": 17.87364620938628, "grad_norm": 80.06126403808594, "learning_rate": 0.0001854975552444858, "loss": 7.3896, "step": 148530 }, { "epoch": 17.874849578820697, "grad_norm": 115.9737548828125, "learning_rate": 0.00018549558202845656, "loss": 7.4874, "step": 148540 }, { "epoch": 17.876052948255115, "grad_norm": 50.52712631225586, "learning_rate": 0.0001854936086886941, "loss": 7.4149, "step": 148550 }, { "epoch": 17.87725631768953, "grad_norm": 176.19664001464844, "learning_rate": 0.00018549163522520127, "loss": 7.4091, "step": 148560 }, { "epoch": 17.878459687123947, "grad_norm": 108.81830596923828, "learning_rate": 0.00018548966163798095, "loss": 7.4882, "step": 148570 }, { "epoch": 17.879663056558364, "grad_norm": 27.57801628112793, "learning_rate": 0.00018548768792703597, "loss": 7.3848, "step": 148580 }, { "epoch": 17.880866425992778, "grad_norm": 170.5851593017578, "learning_rate": 0.00018548571409236924, "loss": 7.3669, "step": 148590 }, { "epoch": 17.882069795427196, "grad_norm": 79.25787353515625, "learning_rate": 0.00018548374013398352, "loss": 7.5217, "step": 148600 }, { "epoch": 17.883273164861613, "grad_norm": 579.5789184570312, "learning_rate": 0.00018548176605188177, "loss": 7.4688, "step": 148610 }, { "epoch": 17.884476534296027, "grad_norm": 213.94871520996094, "learning_rate": 0.00018547979184606678, "loss": 7.4313, "step": 148620 }, { "epoch": 17.885679903730445, "grad_norm": 105.76802062988281, "learning_rate": 0.0001854778175165414, "loss": 7.4935, "step": 148630 }, { "epoch": 17.886883273164862, "grad_norm": 140.32843017578125, "learning_rate": 0.00018547584306330853, "loss": 7.326, "step": 148640 }, { "epoch": 17.888086642599276, "grad_norm": 113.10185241699219, "learning_rate": 0.00018547386848637103, "loss": 7.3528, "step": 148650 }, { "epoch": 17.889290012033694, "grad_norm": 75.72783660888672, "learning_rate": 0.00018547189378573172, "loss": 7.4721, "step": 148660 }, { "epoch": 17.89049338146811, "grad_norm": 96.25650024414062, "learning_rate": 0.0001854699189613935, "loss": 7.4121, "step": 148670 }, { "epoch": 17.891696750902526, "grad_norm": 40.19290542602539, "learning_rate": 0.0001854679440133592, "loss": 7.4876, "step": 148680 }, { "epoch": 17.892900120336943, "grad_norm": 160.92198181152344, "learning_rate": 0.00018546596894163166, "loss": 7.4104, "step": 148690 }, { "epoch": 17.89410348977136, "grad_norm": 163.02830505371094, "learning_rate": 0.00018546399374621376, "loss": 7.4879, "step": 148700 }, { "epoch": 17.895306859205775, "grad_norm": 95.01387786865234, "learning_rate": 0.0001854620184271084, "loss": 7.434, "step": 148710 }, { "epoch": 17.896510228640192, "grad_norm": 135.92922973632812, "learning_rate": 0.0001854600429843184, "loss": 7.5638, "step": 148720 }, { "epoch": 17.89771359807461, "grad_norm": 50.186397552490234, "learning_rate": 0.00018545806741784656, "loss": 7.4918, "step": 148730 }, { "epoch": 17.898916967509024, "grad_norm": 60.19594955444336, "learning_rate": 0.00018545609172769583, "loss": 7.4068, "step": 148740 }, { "epoch": 17.90012033694344, "grad_norm": 111.28684997558594, "learning_rate": 0.00018545411591386904, "loss": 7.4894, "step": 148750 }, { "epoch": 17.90132370637786, "grad_norm": 409.53533935546875, "learning_rate": 0.00018545213997636905, "loss": 7.4667, "step": 148760 }, { "epoch": 17.902527075812273, "grad_norm": 522.3593139648438, "learning_rate": 0.00018545016391519872, "loss": 7.3409, "step": 148770 }, { "epoch": 17.90373044524669, "grad_norm": 115.39041900634766, "learning_rate": 0.00018544818773036088, "loss": 7.4288, "step": 148780 }, { "epoch": 17.904933814681108, "grad_norm": 156.78968811035156, "learning_rate": 0.00018544621142185845, "loss": 7.3581, "step": 148790 }, { "epoch": 17.906137184115522, "grad_norm": 84.30831909179688, "learning_rate": 0.00018544423498969423, "loss": 7.3875, "step": 148800 }, { "epoch": 17.90734055354994, "grad_norm": 66.62500762939453, "learning_rate": 0.00018544225843387114, "loss": 7.3363, "step": 148810 }, { "epoch": 17.908543922984357, "grad_norm": 93.46416473388672, "learning_rate": 0.00018544028175439197, "loss": 7.3188, "step": 148820 }, { "epoch": 17.90974729241877, "grad_norm": 115.00357055664062, "learning_rate": 0.00018543830495125962, "loss": 7.2761, "step": 148830 }, { "epoch": 17.91095066185319, "grad_norm": 396.2091979980469, "learning_rate": 0.00018543632802447697, "loss": 7.358, "step": 148840 }, { "epoch": 17.912154031287606, "grad_norm": 521.9418334960938, "learning_rate": 0.00018543435097404686, "loss": 7.3077, "step": 148850 }, { "epoch": 17.91335740072202, "grad_norm": 278.07623291015625, "learning_rate": 0.00018543237379997215, "loss": 7.3496, "step": 148860 }, { "epoch": 17.914560770156438, "grad_norm": 134.51907348632812, "learning_rate": 0.0001854303965022557, "loss": 7.2756, "step": 148870 }, { "epoch": 17.915764139590856, "grad_norm": 229.17855834960938, "learning_rate": 0.00018542841908090034, "loss": 7.4308, "step": 148880 }, { "epoch": 17.91696750902527, "grad_norm": 352.9532165527344, "learning_rate": 0.000185426441535909, "loss": 7.3495, "step": 148890 }, { "epoch": 17.918170878459687, "grad_norm": 75.5967788696289, "learning_rate": 0.0001854244638672845, "loss": 7.413, "step": 148900 }, { "epoch": 17.919374247894105, "grad_norm": 97.91011047363281, "learning_rate": 0.00018542248607502973, "loss": 7.3611, "step": 148910 }, { "epoch": 17.92057761732852, "grad_norm": 69.5083236694336, "learning_rate": 0.0001854205081591475, "loss": 7.4491, "step": 148920 }, { "epoch": 17.921780986762936, "grad_norm": 92.9021224975586, "learning_rate": 0.00018541853011964072, "loss": 7.3431, "step": 148930 }, { "epoch": 17.922984356197354, "grad_norm": 851.4580078125, "learning_rate": 0.00018541655195651222, "loss": 7.4176, "step": 148940 }, { "epoch": 17.924187725631768, "grad_norm": 592.9284057617188, "learning_rate": 0.00018541457366976491, "loss": 7.3375, "step": 148950 }, { "epoch": 17.925391095066185, "grad_norm": 570.1676025390625, "learning_rate": 0.0001854125952594016, "loss": 7.3668, "step": 148960 }, { "epoch": 17.926594464500603, "grad_norm": 923.6080322265625, "learning_rate": 0.0001854106167254252, "loss": 7.4887, "step": 148970 }, { "epoch": 17.927797833935017, "grad_norm": 110.47050476074219, "learning_rate": 0.00018540863806783855, "loss": 7.4161, "step": 148980 }, { "epoch": 17.929001203369435, "grad_norm": 4797.53125, "learning_rate": 0.0001854066592866445, "loss": 7.3715, "step": 148990 }, { "epoch": 17.930204572803852, "grad_norm": 92.46206665039062, "learning_rate": 0.00018540468038184593, "loss": 7.4468, "step": 149000 }, { "epoch": 17.931407942238266, "grad_norm": 92.92508697509766, "learning_rate": 0.0001854027013534457, "loss": 7.4184, "step": 149010 }, { "epoch": 17.932611311672684, "grad_norm": 831.9789428710938, "learning_rate": 0.00018540072220144666, "loss": 7.5265, "step": 149020 }, { "epoch": 17.9338146811071, "grad_norm": 65246.26953125, "learning_rate": 0.00018539874292585173, "loss": 7.3995, "step": 149030 }, { "epoch": 17.935018050541515, "grad_norm": 169.46783447265625, "learning_rate": 0.0001853967635266637, "loss": 7.4446, "step": 149040 }, { "epoch": 17.936221419975933, "grad_norm": 31.190034866333008, "learning_rate": 0.0001853947840038855, "loss": 7.4576, "step": 149050 }, { "epoch": 17.93742478941035, "grad_norm": 708.36279296875, "learning_rate": 0.00018539280435751992, "loss": 7.5075, "step": 149060 }, { "epoch": 17.938628158844764, "grad_norm": 759.9669189453125, "learning_rate": 0.00018539082458756992, "loss": 7.6613, "step": 149070 }, { "epoch": 17.939831528279182, "grad_norm": 32.73354721069336, "learning_rate": 0.0001853888446940383, "loss": 7.4739, "step": 149080 }, { "epoch": 17.9410348977136, "grad_norm": 138.23373413085938, "learning_rate": 0.0001853868646769279, "loss": 7.5144, "step": 149090 }, { "epoch": 17.942238267148014, "grad_norm": 60.550025939941406, "learning_rate": 0.00018538488453624167, "loss": 7.481, "step": 149100 }, { "epoch": 17.94344163658243, "grad_norm": 77.27680206298828, "learning_rate": 0.00018538290427198243, "loss": 7.3966, "step": 149110 }, { "epoch": 17.94464500601685, "grad_norm": 349.0970153808594, "learning_rate": 0.00018538092388415304, "loss": 7.4954, "step": 149120 }, { "epoch": 17.945848375451263, "grad_norm": 326.36029052734375, "learning_rate": 0.00018537894337275636, "loss": 7.457, "step": 149130 }, { "epoch": 17.94705174488568, "grad_norm": 529.8981323242188, "learning_rate": 0.00018537696273779528, "loss": 7.4533, "step": 149140 }, { "epoch": 17.948255114320098, "grad_norm": 54.149330139160156, "learning_rate": 0.00018537498197927264, "loss": 7.4147, "step": 149150 }, { "epoch": 17.949458483754512, "grad_norm": 63.90738296508789, "learning_rate": 0.00018537300109719134, "loss": 7.423, "step": 149160 }, { "epoch": 17.95066185318893, "grad_norm": 33.161537170410156, "learning_rate": 0.00018537102009155422, "loss": 7.4976, "step": 149170 }, { "epoch": 17.951865222623347, "grad_norm": 109.87821960449219, "learning_rate": 0.00018536903896236418, "loss": 7.4773, "step": 149180 }, { "epoch": 17.95306859205776, "grad_norm": 50.217193603515625, "learning_rate": 0.00018536705770962403, "loss": 7.4163, "step": 149190 }, { "epoch": 17.95427196149218, "grad_norm": 282.6112976074219, "learning_rate": 0.00018536507633333668, "loss": 7.3643, "step": 149200 }, { "epoch": 17.955475330926596, "grad_norm": 242.79551696777344, "learning_rate": 0.000185363094833505, "loss": 7.424, "step": 149210 }, { "epoch": 17.95667870036101, "grad_norm": 258.24517822265625, "learning_rate": 0.00018536111321013185, "loss": 7.339, "step": 149220 }, { "epoch": 17.957882069795428, "grad_norm": 73.15386199951172, "learning_rate": 0.0001853591314632201, "loss": 7.3714, "step": 149230 }, { "epoch": 17.959085439229845, "grad_norm": 90.18331909179688, "learning_rate": 0.00018535714959277258, "loss": 7.4321, "step": 149240 }, { "epoch": 17.96028880866426, "grad_norm": 178.28907775878906, "learning_rate": 0.00018535516759879222, "loss": 7.4123, "step": 149250 }, { "epoch": 17.961492178098677, "grad_norm": 41.904869079589844, "learning_rate": 0.00018535318548128184, "loss": 7.3803, "step": 149260 }, { "epoch": 17.96269554753309, "grad_norm": 189.78482055664062, "learning_rate": 0.00018535120324024434, "loss": 7.4245, "step": 149270 }, { "epoch": 17.96389891696751, "grad_norm": 117.08611297607422, "learning_rate": 0.0001853492208756826, "loss": 7.4566, "step": 149280 }, { "epoch": 17.965102286401926, "grad_norm": 96.96859741210938, "learning_rate": 0.00018534723838759941, "loss": 7.4392, "step": 149290 }, { "epoch": 17.96630565583634, "grad_norm": 653.5653076171875, "learning_rate": 0.00018534525577599772, "loss": 7.3919, "step": 149300 }, { "epoch": 17.967509025270758, "grad_norm": 4584.6845703125, "learning_rate": 0.00018534327304088038, "loss": 7.3663, "step": 149310 }, { "epoch": 17.968712394705175, "grad_norm": 607.0430297851562, "learning_rate": 0.00018534129018225027, "loss": 7.4837, "step": 149320 }, { "epoch": 17.96991576413959, "grad_norm": 380.6038818359375, "learning_rate": 0.0001853393072001102, "loss": 7.3697, "step": 149330 }, { "epoch": 17.971119133574007, "grad_norm": 394.68377685546875, "learning_rate": 0.0001853373240944631, "loss": 7.3179, "step": 149340 }, { "epoch": 17.972322503008424, "grad_norm": 542.9088134765625, "learning_rate": 0.00018533534086531185, "loss": 7.4092, "step": 149350 }, { "epoch": 17.97352587244284, "grad_norm": 1883.626220703125, "learning_rate": 0.00018533335751265924, "loss": 7.2972, "step": 149360 }, { "epoch": 17.974729241877256, "grad_norm": 1593.1341552734375, "learning_rate": 0.00018533137403650823, "loss": 7.3951, "step": 149370 }, { "epoch": 17.975932611311674, "grad_norm": 1005.785888671875, "learning_rate": 0.00018532939043686163, "loss": 7.3128, "step": 149380 }, { "epoch": 17.977135980746088, "grad_norm": 2285.49072265625, "learning_rate": 0.00018532740671372238, "loss": 7.4595, "step": 149390 }, { "epoch": 17.978339350180505, "grad_norm": 361.1526184082031, "learning_rate": 0.00018532542286709328, "loss": 7.4691, "step": 149400 }, { "epoch": 17.979542719614923, "grad_norm": 7603.2060546875, "learning_rate": 0.0001853234388969772, "loss": 7.3966, "step": 149410 }, { "epoch": 17.980746089049337, "grad_norm": 26465.263671875, "learning_rate": 0.00018532145480337707, "loss": 7.3971, "step": 149420 }, { "epoch": 17.981949458483754, "grad_norm": 1138.2674560546875, "learning_rate": 0.00018531947058629574, "loss": 7.2141, "step": 149430 }, { "epoch": 17.983152827918172, "grad_norm": 430.54022216796875, "learning_rate": 0.00018531748624573603, "loss": 7.4032, "step": 149440 }, { "epoch": 17.984356197352586, "grad_norm": 340.5308837890625, "learning_rate": 0.0001853155017817009, "loss": 7.4153, "step": 149450 }, { "epoch": 17.985559566787003, "grad_norm": 539.905029296875, "learning_rate": 0.00018531351719419317, "loss": 7.3884, "step": 149460 }, { "epoch": 17.98676293622142, "grad_norm": 515.404541015625, "learning_rate": 0.00018531153248321568, "loss": 7.3655, "step": 149470 }, { "epoch": 17.987966305655835, "grad_norm": 1004.9083862304688, "learning_rate": 0.00018530954764877137, "loss": 7.4721, "step": 149480 }, { "epoch": 17.989169675090253, "grad_norm": 64222.359375, "learning_rate": 0.00018530756269086307, "loss": 7.3745, "step": 149490 }, { "epoch": 17.99037304452467, "grad_norm": 117.87918090820312, "learning_rate": 0.00018530557760949368, "loss": 7.3749, "step": 149500 }, { "epoch": 17.991576413959084, "grad_norm": 6933.921875, "learning_rate": 0.00018530359240466604, "loss": 7.3617, "step": 149510 }, { "epoch": 17.9927797833935, "grad_norm": 84.59420013427734, "learning_rate": 0.00018530160707638305, "loss": 7.3928, "step": 149520 }, { "epoch": 17.99398315282792, "grad_norm": 408.6295471191406, "learning_rate": 0.00018529962162464755, "loss": 7.3956, "step": 149530 }, { "epoch": 17.995186522262333, "grad_norm": 19631.50390625, "learning_rate": 0.0001852976360494625, "loss": 7.4022, "step": 149540 }, { "epoch": 17.99638989169675, "grad_norm": 111.64810180664062, "learning_rate": 0.00018529565035083065, "loss": 7.4755, "step": 149550 }, { "epoch": 17.99759326113117, "grad_norm": 568.51025390625, "learning_rate": 0.00018529366452875494, "loss": 7.4421, "step": 149560 }, { "epoch": 17.998796630565582, "grad_norm": 84.73117065429688, "learning_rate": 0.0001852916785832383, "loss": 7.4819, "step": 149570 }, { "epoch": 18.0, "grad_norm": 111.88549041748047, "learning_rate": 0.0001852896925142835, "loss": 7.362, "step": 149580 }, { "epoch": 18.0, "eval_loss": 7.366647720336914, "eval_runtime": 118.9567, "eval_samples_per_second": 62.098, "eval_steps_per_second": 7.768, "step": 149580 }, { "epoch": 18.001203369434418, "grad_norm": 2082.1474609375, "learning_rate": 0.0001852877063218934, "loss": 7.3862, "step": 149590 }, { "epoch": 18.00240673886883, "grad_norm": 151.06271362304688, "learning_rate": 0.000185285720006071, "loss": 7.3733, "step": 149600 }, { "epoch": 18.00361010830325, "grad_norm": 338.38525390625, "learning_rate": 0.0001852837335668191, "loss": 7.3697, "step": 149610 }, { "epoch": 18.004813477737667, "grad_norm": 305.02740478515625, "learning_rate": 0.0001852817470041406, "loss": 7.3517, "step": 149620 }, { "epoch": 18.00601684717208, "grad_norm": 64.78070068359375, "learning_rate": 0.00018527976031803832, "loss": 7.3814, "step": 149630 }, { "epoch": 18.0072202166065, "grad_norm": 525.9762573242188, "learning_rate": 0.00018527777350851516, "loss": 7.3542, "step": 149640 }, { "epoch": 18.008423586040916, "grad_norm": 1789.5118408203125, "learning_rate": 0.00018527578657557406, "loss": 7.3802, "step": 149650 }, { "epoch": 18.00962695547533, "grad_norm": 172.57984924316406, "learning_rate": 0.0001852737995192178, "loss": 7.4317, "step": 149660 }, { "epoch": 18.010830324909747, "grad_norm": 3822.205078125, "learning_rate": 0.00018527181233944932, "loss": 7.2932, "step": 149670 }, { "epoch": 18.012033694344165, "grad_norm": 304.72161865234375, "learning_rate": 0.00018526982503627148, "loss": 7.334, "step": 149680 }, { "epoch": 18.01323706377858, "grad_norm": 103.07905578613281, "learning_rate": 0.00018526783760968712, "loss": 7.3166, "step": 149690 }, { "epoch": 18.014440433212997, "grad_norm": 111.76778411865234, "learning_rate": 0.0001852658500596992, "loss": 7.3012, "step": 149700 }, { "epoch": 18.015643802647414, "grad_norm": 173.53530883789062, "learning_rate": 0.0001852638623863105, "loss": 7.2736, "step": 149710 }, { "epoch": 18.016847172081828, "grad_norm": 1576.4041748046875, "learning_rate": 0.00018526187458952394, "loss": 7.398, "step": 149720 }, { "epoch": 18.018050541516246, "grad_norm": 1148.1187744140625, "learning_rate": 0.0001852598866693424, "loss": 7.3654, "step": 149730 }, { "epoch": 18.019253910950663, "grad_norm": 123.3941879272461, "learning_rate": 0.00018525789862576875, "loss": 7.4426, "step": 149740 }, { "epoch": 18.020457280385077, "grad_norm": 771.9910278320312, "learning_rate": 0.0001852559104588059, "loss": 7.3355, "step": 149750 }, { "epoch": 18.021660649819495, "grad_norm": 1503.333740234375, "learning_rate": 0.0001852539221684567, "loss": 7.3849, "step": 149760 }, { "epoch": 18.022864019253912, "grad_norm": 1722.6297607421875, "learning_rate": 0.00018525193375472398, "loss": 7.371, "step": 149770 }, { "epoch": 18.024067388688326, "grad_norm": 2357.4091796875, "learning_rate": 0.00018524994521761069, "loss": 7.3561, "step": 149780 }, { "epoch": 18.025270758122744, "grad_norm": 460.76104736328125, "learning_rate": 0.00018524795655711968, "loss": 7.3813, "step": 149790 }, { "epoch": 18.02647412755716, "grad_norm": 1920.828125, "learning_rate": 0.00018524596777325382, "loss": 7.4556, "step": 149800 }, { "epoch": 18.027677496991576, "grad_norm": 9213.4033203125, "learning_rate": 0.00018524397886601602, "loss": 7.3546, "step": 149810 }, { "epoch": 18.028880866425993, "grad_norm": 9927.3623046875, "learning_rate": 0.00018524198983540914, "loss": 7.3267, "step": 149820 }, { "epoch": 18.03008423586041, "grad_norm": 18445.11328125, "learning_rate": 0.00018524000068143604, "loss": 7.4944, "step": 149830 }, { "epoch": 18.031287605294825, "grad_norm": 8243.80078125, "learning_rate": 0.0001852380114040996, "loss": 7.5059, "step": 149840 }, { "epoch": 18.032490974729242, "grad_norm": 1434.2047119140625, "learning_rate": 0.00018523602200340276, "loss": 7.4293, "step": 149850 }, { "epoch": 18.03369434416366, "grad_norm": 983.7843017578125, "learning_rate": 0.0001852340324793483, "loss": 7.4002, "step": 149860 }, { "epoch": 18.034897713598074, "grad_norm": 3585.263427734375, "learning_rate": 0.00018523204283193919, "loss": 7.3998, "step": 149870 }, { "epoch": 18.03610108303249, "grad_norm": 3442.3017578125, "learning_rate": 0.0001852300530611782, "loss": 7.4419, "step": 149880 }, { "epoch": 18.03730445246691, "grad_norm": 743.7374267578125, "learning_rate": 0.00018522806316706837, "loss": 7.4102, "step": 149890 }, { "epoch": 18.038507821901323, "grad_norm": 1866.08251953125, "learning_rate": 0.00018522607314961246, "loss": 7.501, "step": 149900 }, { "epoch": 18.03971119133574, "grad_norm": 405.5949401855469, "learning_rate": 0.00018522408300881337, "loss": 7.3552, "step": 149910 }, { "epoch": 18.040914560770158, "grad_norm": 4595.52783203125, "learning_rate": 0.000185222092744674, "loss": 7.4292, "step": 149920 }, { "epoch": 18.042117930204572, "grad_norm": 388.3169250488281, "learning_rate": 0.0001852201023571972, "loss": 7.3558, "step": 149930 }, { "epoch": 18.04332129963899, "grad_norm": 1000.1346435546875, "learning_rate": 0.0001852181118463859, "loss": 7.3738, "step": 149940 }, { "epoch": 18.044524669073404, "grad_norm": 7603.63720703125, "learning_rate": 0.00018521612121224294, "loss": 7.4176, "step": 149950 }, { "epoch": 18.04572803850782, "grad_norm": 334.28271484375, "learning_rate": 0.0001852141304547712, "loss": 7.4004, "step": 149960 }, { "epoch": 18.04693140794224, "grad_norm": 1391.88037109375, "learning_rate": 0.0001852121395739736, "loss": 7.3678, "step": 149970 }, { "epoch": 18.048134777376653, "grad_norm": 27133.986328125, "learning_rate": 0.000185210148569853, "loss": 7.4173, "step": 149980 }, { "epoch": 18.04933814681107, "grad_norm": 654.1278076171875, "learning_rate": 0.00018520815744241226, "loss": 7.4461, "step": 149990 }, { "epoch": 18.050541516245488, "grad_norm": 653.0599365234375, "learning_rate": 0.00018520616619165429, "loss": 7.4052, "step": 150000 }, { "epoch": 18.051744885679902, "grad_norm": 17197.240234375, "learning_rate": 0.00018520417481758194, "loss": 7.4436, "step": 150010 }, { "epoch": 18.05294825511432, "grad_norm": 991.9915771484375, "learning_rate": 0.0001852021833201981, "loss": 7.4564, "step": 150020 }, { "epoch": 18.054151624548737, "grad_norm": 2691.764892578125, "learning_rate": 0.00018520019169950572, "loss": 7.4842, "step": 150030 }, { "epoch": 18.05535499398315, "grad_norm": 6338.83544921875, "learning_rate": 0.0001851981999555076, "loss": 7.3698, "step": 150040 }, { "epoch": 18.05655836341757, "grad_norm": 185.8012237548828, "learning_rate": 0.00018519620808820663, "loss": 7.2721, "step": 150050 }, { "epoch": 18.057761732851986, "grad_norm": 339.1905212402344, "learning_rate": 0.00018519421609760573, "loss": 7.3596, "step": 150060 }, { "epoch": 18.0589651022864, "grad_norm": 10055.3291015625, "learning_rate": 0.00018519222398370776, "loss": 7.4733, "step": 150070 }, { "epoch": 18.060168471720818, "grad_norm": 384.69122314453125, "learning_rate": 0.00018519023174651562, "loss": 7.4054, "step": 150080 }, { "epoch": 18.061371841155236, "grad_norm": 1484.3526611328125, "learning_rate": 0.00018518823938603219, "loss": 7.4336, "step": 150090 }, { "epoch": 18.06257521058965, "grad_norm": 655.484130859375, "learning_rate": 0.00018518624690226033, "loss": 7.409, "step": 150100 }, { "epoch": 18.063778580024067, "grad_norm": 399.81085205078125, "learning_rate": 0.00018518425429520292, "loss": 7.4643, "step": 150110 }, { "epoch": 18.064981949458485, "grad_norm": 777.1300659179688, "learning_rate": 0.00018518226156486288, "loss": 7.4887, "step": 150120 }, { "epoch": 18.0661853188929, "grad_norm": 1444.622314453125, "learning_rate": 0.00018518026871124307, "loss": 7.357, "step": 150130 }, { "epoch": 18.067388688327316, "grad_norm": 2199.203125, "learning_rate": 0.00018517827573434642, "loss": 7.3952, "step": 150140 }, { "epoch": 18.068592057761734, "grad_norm": 921.2860107421875, "learning_rate": 0.00018517628263417572, "loss": 7.4424, "step": 150150 }, { "epoch": 18.069795427196148, "grad_norm": 1011.6319580078125, "learning_rate": 0.00018517428941073396, "loss": 7.4433, "step": 150160 }, { "epoch": 18.070998796630565, "grad_norm": 355.4660339355469, "learning_rate": 0.00018517229606402393, "loss": 7.4623, "step": 150170 }, { "epoch": 18.072202166064983, "grad_norm": 336.6348876953125, "learning_rate": 0.0001851703025940486, "loss": 7.4286, "step": 150180 }, { "epoch": 18.073405535499397, "grad_norm": 1935.4490966796875, "learning_rate": 0.00018516830900081076, "loss": 7.3342, "step": 150190 }, { "epoch": 18.074608904933815, "grad_norm": 1970.3548583984375, "learning_rate": 0.0001851663152843134, "loss": 7.4098, "step": 150200 }, { "epoch": 18.075812274368232, "grad_norm": 10640.7919921875, "learning_rate": 0.00018516432144455934, "loss": 7.4296, "step": 150210 }, { "epoch": 18.077015643802646, "grad_norm": 5139.22802734375, "learning_rate": 0.00018516232748155148, "loss": 7.4374, "step": 150220 }, { "epoch": 18.078219013237064, "grad_norm": 6197.271484375, "learning_rate": 0.0001851603333952927, "loss": 7.4404, "step": 150230 }, { "epoch": 18.07942238267148, "grad_norm": 814.4199829101562, "learning_rate": 0.0001851583391857859, "loss": 7.4299, "step": 150240 }, { "epoch": 18.080625752105895, "grad_norm": 7303.916015625, "learning_rate": 0.00018515634485303395, "loss": 7.3515, "step": 150250 }, { "epoch": 18.081829121540313, "grad_norm": 37418.73046875, "learning_rate": 0.00018515435039703974, "loss": 7.4337, "step": 150260 }, { "epoch": 18.08303249097473, "grad_norm": 2385.81640625, "learning_rate": 0.0001851523558178062, "loss": 7.393, "step": 150270 }, { "epoch": 18.084235860409144, "grad_norm": 365.7031555175781, "learning_rate": 0.00018515036111533612, "loss": 7.4232, "step": 150280 }, { "epoch": 18.085439229843562, "grad_norm": 4888.36572265625, "learning_rate": 0.0001851483662896325, "loss": 7.3289, "step": 150290 }, { "epoch": 18.08664259927798, "grad_norm": 719.6381225585938, "learning_rate": 0.00018514637134069812, "loss": 7.4972, "step": 150300 }, { "epoch": 18.087845968712394, "grad_norm": 4957.48974609375, "learning_rate": 0.00018514437626853596, "loss": 7.557, "step": 150310 }, { "epoch": 18.08904933814681, "grad_norm": 5745.91552734375, "learning_rate": 0.00018514238107314882, "loss": 7.4551, "step": 150320 }, { "epoch": 18.09025270758123, "grad_norm": 3058.720703125, "learning_rate": 0.0001851403857545397, "loss": 7.4735, "step": 150330 }, { "epoch": 18.091456077015643, "grad_norm": 983.1807250976562, "learning_rate": 0.00018513839031271135, "loss": 7.4255, "step": 150340 }, { "epoch": 18.09265944645006, "grad_norm": 190.457763671875, "learning_rate": 0.00018513639474766676, "loss": 7.4275, "step": 150350 }, { "epoch": 18.093862815884478, "grad_norm": 136.7671661376953, "learning_rate": 0.0001851343990594088, "loss": 7.518, "step": 150360 }, { "epoch": 18.095066185318892, "grad_norm": 62.439186096191406, "learning_rate": 0.00018513240324794034, "loss": 7.5152, "step": 150370 }, { "epoch": 18.09626955475331, "grad_norm": 53.048553466796875, "learning_rate": 0.00018513040731326426, "loss": 7.5674, "step": 150380 }, { "epoch": 18.097472924187727, "grad_norm": 30.955257415771484, "learning_rate": 0.0001851284112553835, "loss": 7.5671, "step": 150390 }, { "epoch": 18.09867629362214, "grad_norm": 35.14524459838867, "learning_rate": 0.00018512641507430086, "loss": 7.5236, "step": 150400 }, { "epoch": 18.09987966305656, "grad_norm": 105.49443817138672, "learning_rate": 0.0001851244187700193, "loss": 7.53, "step": 150410 }, { "epoch": 18.101083032490976, "grad_norm": 26.84381103515625, "learning_rate": 0.0001851224223425417, "loss": 7.5037, "step": 150420 }, { "epoch": 18.10228640192539, "grad_norm": 55.635597229003906, "learning_rate": 0.00018512042579187094, "loss": 7.4922, "step": 150430 }, { "epoch": 18.103489771359808, "grad_norm": 66.30591583251953, "learning_rate": 0.00018511842911800988, "loss": 7.5733, "step": 150440 }, { "epoch": 18.104693140794225, "grad_norm": 111.55799102783203, "learning_rate": 0.00018511643232096148, "loss": 7.5171, "step": 150450 }, { "epoch": 18.10589651022864, "grad_norm": 203.03775024414062, "learning_rate": 0.00018511443540072855, "loss": 7.5406, "step": 150460 }, { "epoch": 18.107099879663057, "grad_norm": 65.03297424316406, "learning_rate": 0.00018511243835731404, "loss": 7.3924, "step": 150470 }, { "epoch": 18.108303249097474, "grad_norm": 247.0765380859375, "learning_rate": 0.0001851104411907208, "loss": 7.5306, "step": 150480 }, { "epoch": 18.10950661853189, "grad_norm": 105.78290557861328, "learning_rate": 0.00018510844390095177, "loss": 7.4942, "step": 150490 }, { "epoch": 18.110709987966306, "grad_norm": 82.8394775390625, "learning_rate": 0.00018510644648800977, "loss": 7.5812, "step": 150500 }, { "epoch": 18.111913357400724, "grad_norm": 185.19444274902344, "learning_rate": 0.00018510444895189777, "loss": 7.3928, "step": 150510 }, { "epoch": 18.113116726835138, "grad_norm": 318.8232421875, "learning_rate": 0.0001851024512926186, "loss": 7.3806, "step": 150520 }, { "epoch": 18.114320096269555, "grad_norm": 199.4155731201172, "learning_rate": 0.00018510045351017516, "loss": 7.4369, "step": 150530 }, { "epoch": 18.115523465703973, "grad_norm": 200.80133056640625, "learning_rate": 0.00018509845560457037, "loss": 7.4884, "step": 150540 }, { "epoch": 18.116726835138387, "grad_norm": 68.57654571533203, "learning_rate": 0.0001850964575758071, "loss": 7.3612, "step": 150550 }, { "epoch": 18.117930204572804, "grad_norm": 155.61082458496094, "learning_rate": 0.00018509445942388824, "loss": 7.3387, "step": 150560 }, { "epoch": 18.119133574007222, "grad_norm": 89.49139404296875, "learning_rate": 0.0001850924611488167, "loss": 7.355, "step": 150570 }, { "epoch": 18.120336943441636, "grad_norm": 200.8621826171875, "learning_rate": 0.00018509046275059536, "loss": 7.3875, "step": 150580 }, { "epoch": 18.121540312876053, "grad_norm": 156.1785888671875, "learning_rate": 0.0001850884642292271, "loss": 7.2877, "step": 150590 }, { "epoch": 18.12274368231047, "grad_norm": 99.13287353515625, "learning_rate": 0.00018508646558471482, "loss": 7.4628, "step": 150600 }, { "epoch": 18.123947051744885, "grad_norm": 241.97088623046875, "learning_rate": 0.00018508446681706145, "loss": 7.3007, "step": 150610 }, { "epoch": 18.125150421179303, "grad_norm": 103.4607162475586, "learning_rate": 0.00018508246792626985, "loss": 7.4595, "step": 150620 }, { "epoch": 18.126353790613717, "grad_norm": 279.5429992675781, "learning_rate": 0.00018508046891234288, "loss": 7.3618, "step": 150630 }, { "epoch": 18.127557160048134, "grad_norm": 137.11724853515625, "learning_rate": 0.00018507846977528346, "loss": 7.3999, "step": 150640 }, { "epoch": 18.128760529482552, "grad_norm": 111.09395599365234, "learning_rate": 0.00018507647051509453, "loss": 7.4563, "step": 150650 }, { "epoch": 18.129963898916966, "grad_norm": 143.97569274902344, "learning_rate": 0.0001850744711317789, "loss": 7.3328, "step": 150660 }, { "epoch": 18.131167268351383, "grad_norm": 85.04871368408203, "learning_rate": 0.00018507247162533955, "loss": 7.4279, "step": 150670 }, { "epoch": 18.1323706377858, "grad_norm": 270.6744689941406, "learning_rate": 0.00018507047199577928, "loss": 7.3959, "step": 150680 }, { "epoch": 18.133574007220215, "grad_norm": 67.41458129882812, "learning_rate": 0.00018506847224310105, "loss": 7.3664, "step": 150690 }, { "epoch": 18.134777376654633, "grad_norm": 134.37625122070312, "learning_rate": 0.00018506647236730774, "loss": 7.3415, "step": 150700 }, { "epoch": 18.13598074608905, "grad_norm": 253.5409393310547, "learning_rate": 0.00018506447236840226, "loss": 7.3253, "step": 150710 }, { "epoch": 18.137184115523464, "grad_norm": 107.16642761230469, "learning_rate": 0.00018506247224638747, "loss": 7.3727, "step": 150720 }, { "epoch": 18.13838748495788, "grad_norm": 203.11355590820312, "learning_rate": 0.0001850604720012663, "loss": 7.4762, "step": 150730 }, { "epoch": 18.1395908543923, "grad_norm": 211.16729736328125, "learning_rate": 0.00018505847163304158, "loss": 7.4892, "step": 150740 }, { "epoch": 18.140794223826713, "grad_norm": 92.10486602783203, "learning_rate": 0.0001850564711417163, "loss": 7.3412, "step": 150750 }, { "epoch": 18.14199759326113, "grad_norm": 149.5958709716797, "learning_rate": 0.0001850544705272933, "loss": 7.3792, "step": 150760 }, { "epoch": 18.14320096269555, "grad_norm": 145.82102966308594, "learning_rate": 0.0001850524697897755, "loss": 7.406, "step": 150770 }, { "epoch": 18.144404332129962, "grad_norm": 198.8045654296875, "learning_rate": 0.0001850504689291657, "loss": 7.3768, "step": 150780 }, { "epoch": 18.14560770156438, "grad_norm": 115.61459350585938, "learning_rate": 0.00018504846794546693, "loss": 7.3904, "step": 150790 }, { "epoch": 18.146811070998798, "grad_norm": 98.6722183227539, "learning_rate": 0.00018504646683868204, "loss": 7.3789, "step": 150800 }, { "epoch": 18.14801444043321, "grad_norm": 156.54734802246094, "learning_rate": 0.0001850444656088139, "loss": 7.2916, "step": 150810 }, { "epoch": 18.14921780986763, "grad_norm": 105.44517517089844, "learning_rate": 0.00018504246425586542, "loss": 7.3621, "step": 150820 }, { "epoch": 18.150421179302047, "grad_norm": 177.7098388671875, "learning_rate": 0.0001850404627798395, "loss": 7.4214, "step": 150830 }, { "epoch": 18.15162454873646, "grad_norm": 88.13648223876953, "learning_rate": 0.00018503846118073902, "loss": 7.4682, "step": 150840 }, { "epoch": 18.15282791817088, "grad_norm": 93.81919860839844, "learning_rate": 0.00018503645945856692, "loss": 7.3496, "step": 150850 }, { "epoch": 18.154031287605296, "grad_norm": 137.61151123046875, "learning_rate": 0.00018503445761332605, "loss": 7.4063, "step": 150860 }, { "epoch": 18.15523465703971, "grad_norm": 57.4403076171875, "learning_rate": 0.00018503245564501931, "loss": 7.4349, "step": 150870 }, { "epoch": 18.156438026474127, "grad_norm": 138.23391723632812, "learning_rate": 0.00018503045355364964, "loss": 7.3763, "step": 150880 }, { "epoch": 18.157641395908545, "grad_norm": 151.458740234375, "learning_rate": 0.00018502845133921988, "loss": 7.35, "step": 150890 }, { "epoch": 18.15884476534296, "grad_norm": 106.25843811035156, "learning_rate": 0.000185026449001733, "loss": 7.3819, "step": 150900 }, { "epoch": 18.160048134777377, "grad_norm": 126.08541107177734, "learning_rate": 0.00018502444654119182, "loss": 7.467, "step": 150910 }, { "epoch": 18.161251504211794, "grad_norm": 122.78411102294922, "learning_rate": 0.0001850224439575993, "loss": 7.3777, "step": 150920 }, { "epoch": 18.162454873646208, "grad_norm": 255.74432373046875, "learning_rate": 0.00018502044125095828, "loss": 7.3988, "step": 150930 }, { "epoch": 18.163658243080626, "grad_norm": 167.97303771972656, "learning_rate": 0.00018501843842127174, "loss": 7.523, "step": 150940 }, { "epoch": 18.164861612515043, "grad_norm": 78.77965545654297, "learning_rate": 0.00018501643546854247, "loss": 7.369, "step": 150950 }, { "epoch": 18.166064981949457, "grad_norm": 172.5324249267578, "learning_rate": 0.00018501443239277347, "loss": 7.457, "step": 150960 }, { "epoch": 18.167268351383875, "grad_norm": 69.45236206054688, "learning_rate": 0.00018501242919396758, "loss": 7.3798, "step": 150970 }, { "epoch": 18.168471720818292, "grad_norm": 66.4688491821289, "learning_rate": 0.00018501042587212773, "loss": 7.5096, "step": 150980 }, { "epoch": 18.169675090252706, "grad_norm": 102.49665832519531, "learning_rate": 0.0001850084224272568, "loss": 7.4028, "step": 150990 }, { "epoch": 18.170878459687124, "grad_norm": 169.3874053955078, "learning_rate": 0.0001850064188593577, "loss": 7.3443, "step": 151000 }, { "epoch": 18.17208182912154, "grad_norm": 119.93611145019531, "learning_rate": 0.0001850044151684333, "loss": 7.3586, "step": 151010 }, { "epoch": 18.173285198555956, "grad_norm": 111.78881072998047, "learning_rate": 0.00018500241135448655, "loss": 7.3727, "step": 151020 }, { "epoch": 18.174488567990373, "grad_norm": 209.1993408203125, "learning_rate": 0.0001850004074175203, "loss": 7.3423, "step": 151030 }, { "epoch": 18.17569193742479, "grad_norm": 129.2776641845703, "learning_rate": 0.0001849984033575375, "loss": 7.3181, "step": 151040 }, { "epoch": 18.176895306859205, "grad_norm": 51.54475784301758, "learning_rate": 0.000184996399174541, "loss": 7.5765, "step": 151050 }, { "epoch": 18.178098676293622, "grad_norm": 85.18266296386719, "learning_rate": 0.00018499439486853374, "loss": 7.4474, "step": 151060 }, { "epoch": 18.17930204572804, "grad_norm": 49.51419448852539, "learning_rate": 0.00018499239043951861, "loss": 7.5092, "step": 151070 }, { "epoch": 18.180505415162454, "grad_norm": 89.0058364868164, "learning_rate": 0.0001849903858874985, "loss": 7.5768, "step": 151080 }, { "epoch": 18.18170878459687, "grad_norm": 558.0090942382812, "learning_rate": 0.00018498838121247632, "loss": 7.5288, "step": 151090 }, { "epoch": 18.18291215403129, "grad_norm": 78.90757751464844, "learning_rate": 0.00018498637641445497, "loss": 7.4631, "step": 151100 }, { "epoch": 18.184115523465703, "grad_norm": 510.8673095703125, "learning_rate": 0.00018498437149343735, "loss": 7.5264, "step": 151110 }, { "epoch": 18.18531889290012, "grad_norm": 73.83006286621094, "learning_rate": 0.00018498236644942634, "loss": 7.4912, "step": 151120 }, { "epoch": 18.186522262334538, "grad_norm": 104.17164611816406, "learning_rate": 0.00018498036128242494, "loss": 7.5573, "step": 151130 }, { "epoch": 18.187725631768952, "grad_norm": 66.8059310913086, "learning_rate": 0.0001849783559924359, "loss": 7.4829, "step": 151140 }, { "epoch": 18.18892900120337, "grad_norm": 45.53932571411133, "learning_rate": 0.00018497635057946222, "loss": 7.5827, "step": 151150 }, { "epoch": 18.190132370637787, "grad_norm": 52.43602752685547, "learning_rate": 0.00018497434504350678, "loss": 7.5176, "step": 151160 }, { "epoch": 18.1913357400722, "grad_norm": 1033.6990966796875, "learning_rate": 0.0001849723393845725, "loss": 7.492, "step": 151170 }, { "epoch": 18.19253910950662, "grad_norm": 188.4258270263672, "learning_rate": 0.00018497033360266224, "loss": 7.5684, "step": 151180 }, { "epoch": 18.193742478941036, "grad_norm": 136.79278564453125, "learning_rate": 0.00018496832769777894, "loss": 7.5707, "step": 151190 }, { "epoch": 18.19494584837545, "grad_norm": 56.89272689819336, "learning_rate": 0.00018496632166992548, "loss": 7.5106, "step": 151200 }, { "epoch": 18.196149217809868, "grad_norm": 208.19151306152344, "learning_rate": 0.0001849643155191048, "loss": 7.5228, "step": 151210 }, { "epoch": 18.197352587244286, "grad_norm": 1972.562255859375, "learning_rate": 0.00018496230924531978, "loss": 7.537, "step": 151220 }, { "epoch": 18.1985559566787, "grad_norm": 236.2059783935547, "learning_rate": 0.0001849603028485733, "loss": 7.5807, "step": 151230 }, { "epoch": 18.199759326113117, "grad_norm": 155.88121032714844, "learning_rate": 0.0001849582963288683, "loss": 7.4712, "step": 151240 }, { "epoch": 18.200962695547535, "grad_norm": 190.3043975830078, "learning_rate": 0.00018495628968620768, "loss": 7.4649, "step": 151250 }, { "epoch": 18.20216606498195, "grad_norm": 122.1622543334961, "learning_rate": 0.0001849542829205943, "loss": 7.5207, "step": 151260 }, { "epoch": 18.203369434416366, "grad_norm": 83.27071380615234, "learning_rate": 0.0001849522760320311, "loss": 7.5285, "step": 151270 }, { "epoch": 18.204572803850784, "grad_norm": 162.8627166748047, "learning_rate": 0.00018495026902052103, "loss": 7.4729, "step": 151280 }, { "epoch": 18.205776173285198, "grad_norm": 149.04122924804688, "learning_rate": 0.00018494826188606691, "loss": 7.4415, "step": 151290 }, { "epoch": 18.206979542719615, "grad_norm": 49.33675765991211, "learning_rate": 0.00018494625462867168, "loss": 7.4724, "step": 151300 }, { "epoch": 18.20818291215403, "grad_norm": 220.3988800048828, "learning_rate": 0.00018494424724833824, "loss": 7.412, "step": 151310 }, { "epoch": 18.209386281588447, "grad_norm": 114.99543762207031, "learning_rate": 0.00018494223974506953, "loss": 7.3662, "step": 151320 }, { "epoch": 18.210589651022865, "grad_norm": 143.4963836669922, "learning_rate": 0.00018494023211886843, "loss": 7.499, "step": 151330 }, { "epoch": 18.21179302045728, "grad_norm": 90.40047454833984, "learning_rate": 0.0001849382243697378, "loss": 7.5558, "step": 151340 }, { "epoch": 18.212996389891696, "grad_norm": 107.75560760498047, "learning_rate": 0.00018493621649768062, "loss": 7.4405, "step": 151350 }, { "epoch": 18.214199759326114, "grad_norm": 184.58242797851562, "learning_rate": 0.00018493420850269977, "loss": 7.4225, "step": 151360 }, { "epoch": 18.215403128760528, "grad_norm": 112.13935852050781, "learning_rate": 0.00018493220038479812, "loss": 7.348, "step": 151370 }, { "epoch": 18.216606498194945, "grad_norm": 130.02120971679688, "learning_rate": 0.00018493019214397863, "loss": 7.5202, "step": 151380 }, { "epoch": 18.217809867629363, "grad_norm": 308.7380676269531, "learning_rate": 0.0001849281837802442, "loss": 7.3169, "step": 151390 }, { "epoch": 18.219013237063777, "grad_norm": 235.38868713378906, "learning_rate": 0.0001849261752935977, "loss": 7.3293, "step": 151400 }, { "epoch": 18.220216606498195, "grad_norm": 723.6695556640625, "learning_rate": 0.00018492416668404205, "loss": 7.3796, "step": 151410 }, { "epoch": 18.221419975932612, "grad_norm": 285.28717041015625, "learning_rate": 0.00018492215795158016, "loss": 7.3958, "step": 151420 }, { "epoch": 18.222623345367026, "grad_norm": 628.6517944335938, "learning_rate": 0.00018492014909621496, "loss": 7.4207, "step": 151430 }, { "epoch": 18.223826714801444, "grad_norm": 779.4058227539062, "learning_rate": 0.00018491814011794934, "loss": 7.3709, "step": 151440 }, { "epoch": 18.22503008423586, "grad_norm": 401.85968017578125, "learning_rate": 0.0001849161310167862, "loss": 7.3898, "step": 151450 }, { "epoch": 18.226233453670275, "grad_norm": 797.9706420898438, "learning_rate": 0.00018491412179272843, "loss": 7.3811, "step": 151460 }, { "epoch": 18.227436823104693, "grad_norm": 615.8762817382812, "learning_rate": 0.000184912112445779, "loss": 7.455, "step": 151470 }, { "epoch": 18.22864019253911, "grad_norm": 1646.37353515625, "learning_rate": 0.00018491010297594075, "loss": 7.389, "step": 151480 }, { "epoch": 18.229843561973524, "grad_norm": 1196.743408203125, "learning_rate": 0.00018490809338321662, "loss": 7.4918, "step": 151490 }, { "epoch": 18.231046931407942, "grad_norm": 1348.6378173828125, "learning_rate": 0.00018490608366760952, "loss": 7.4224, "step": 151500 }, { "epoch": 18.23225030084236, "grad_norm": 1069.326904296875, "learning_rate": 0.00018490407382912237, "loss": 7.4181, "step": 151510 }, { "epoch": 18.233453670276774, "grad_norm": 920.8336181640625, "learning_rate": 0.00018490206386775806, "loss": 7.4824, "step": 151520 }, { "epoch": 18.23465703971119, "grad_norm": 1177.33984375, "learning_rate": 0.0001849000537835195, "loss": 7.4369, "step": 151530 }, { "epoch": 18.23586040914561, "grad_norm": 1107.93359375, "learning_rate": 0.0001848980435764096, "loss": 7.3992, "step": 151540 }, { "epoch": 18.237063778580023, "grad_norm": 671.354736328125, "learning_rate": 0.00018489603324643126, "loss": 7.4526, "step": 151550 }, { "epoch": 18.23826714801444, "grad_norm": 713.6513061523438, "learning_rate": 0.0001848940227935874, "loss": 7.4403, "step": 151560 }, { "epoch": 18.239470517448858, "grad_norm": 826.3712768554688, "learning_rate": 0.0001848920122178809, "loss": 7.3983, "step": 151570 }, { "epoch": 18.240673886883272, "grad_norm": 1284.66650390625, "learning_rate": 0.00018489000151931476, "loss": 7.4668, "step": 151580 }, { "epoch": 18.24187725631769, "grad_norm": 699.0711059570312, "learning_rate": 0.0001848879906978918, "loss": 7.4623, "step": 151590 }, { "epoch": 18.243080625752107, "grad_norm": 404.0067443847656, "learning_rate": 0.00018488597975361496, "loss": 7.4273, "step": 151600 }, { "epoch": 18.24428399518652, "grad_norm": 987.5184326171875, "learning_rate": 0.00018488396868648717, "loss": 7.4349, "step": 151610 }, { "epoch": 18.24548736462094, "grad_norm": 851.533447265625, "learning_rate": 0.00018488195749651127, "loss": 7.329, "step": 151620 }, { "epoch": 18.246690734055356, "grad_norm": 971.6378784179688, "learning_rate": 0.00018487994618369028, "loss": 7.3766, "step": 151630 }, { "epoch": 18.24789410348977, "grad_norm": 675.4795532226562, "learning_rate": 0.000184877934748027, "loss": 7.3764, "step": 151640 }, { "epoch": 18.249097472924188, "grad_norm": 861.827392578125, "learning_rate": 0.00018487592318952443, "loss": 7.3667, "step": 151650 }, { "epoch": 18.250300842358605, "grad_norm": 1099.5947265625, "learning_rate": 0.0001848739115081854, "loss": 7.3711, "step": 151660 }, { "epoch": 18.25150421179302, "grad_norm": 358.1723937988281, "learning_rate": 0.0001848718997040129, "loss": 7.4148, "step": 151670 }, { "epoch": 18.252707581227437, "grad_norm": 2679.742431640625, "learning_rate": 0.00018486988777700983, "loss": 7.3917, "step": 151680 }, { "epoch": 18.253910950661854, "grad_norm": 1006.3916015625, "learning_rate": 0.00018486787572717902, "loss": 7.444, "step": 151690 }, { "epoch": 18.25511432009627, "grad_norm": 767.1204223632812, "learning_rate": 0.00018486586355452348, "loss": 7.3879, "step": 151700 }, { "epoch": 18.256317689530686, "grad_norm": 916.1561889648438, "learning_rate": 0.0001848638512590461, "loss": 7.3784, "step": 151710 }, { "epoch": 18.257521058965104, "grad_norm": 365.0423889160156, "learning_rate": 0.00018486183884074971, "loss": 7.3472, "step": 151720 }, { "epoch": 18.258724428399518, "grad_norm": 466.4209289550781, "learning_rate": 0.00018485982629963736, "loss": 7.3599, "step": 151730 }, { "epoch": 18.259927797833935, "grad_norm": 301.1725769042969, "learning_rate": 0.00018485781363571183, "loss": 7.407, "step": 151740 }, { "epoch": 18.261131167268353, "grad_norm": 360.5256652832031, "learning_rate": 0.00018485580084897613, "loss": 7.321, "step": 151750 }, { "epoch": 18.262334536702767, "grad_norm": 867.6375732421875, "learning_rate": 0.00018485378793943311, "loss": 7.3021, "step": 151760 }, { "epoch": 18.263537906137184, "grad_norm": 422.22113037109375, "learning_rate": 0.00018485177490708575, "loss": 7.3276, "step": 151770 }, { "epoch": 18.264741275571602, "grad_norm": 544.160400390625, "learning_rate": 0.0001848497617519369, "loss": 7.3177, "step": 151780 }, { "epoch": 18.265944645006016, "grad_norm": 496.56494140625, "learning_rate": 0.0001848477484739895, "loss": 7.3398, "step": 151790 }, { "epoch": 18.267148014440433, "grad_norm": 417.65289306640625, "learning_rate": 0.00018484573507324644, "loss": 7.3748, "step": 151800 }, { "epoch": 18.26835138387485, "grad_norm": 883.1669921875, "learning_rate": 0.00018484372154971065, "loss": 7.3655, "step": 151810 }, { "epoch": 18.269554753309265, "grad_norm": 639.7716674804688, "learning_rate": 0.00018484170790338507, "loss": 7.4151, "step": 151820 }, { "epoch": 18.270758122743683, "grad_norm": 910.5934448242188, "learning_rate": 0.00018483969413427259, "loss": 7.3394, "step": 151830 }, { "epoch": 18.2719614921781, "grad_norm": 489.7258605957031, "learning_rate": 0.00018483768024237613, "loss": 7.3665, "step": 151840 }, { "epoch": 18.273164861612514, "grad_norm": 431.5764465332031, "learning_rate": 0.00018483566622769858, "loss": 7.344, "step": 151850 }, { "epoch": 18.27436823104693, "grad_norm": 542.9868774414062, "learning_rate": 0.0001848336520902429, "loss": 7.3436, "step": 151860 }, { "epoch": 18.27557160048135, "grad_norm": 371.8831481933594, "learning_rate": 0.00018483163783001197, "loss": 7.3787, "step": 151870 }, { "epoch": 18.276774969915763, "grad_norm": 570.6926879882812, "learning_rate": 0.00018482962344700871, "loss": 7.4664, "step": 151880 }, { "epoch": 18.27797833935018, "grad_norm": 425.23931884765625, "learning_rate": 0.00018482760894123604, "loss": 7.2684, "step": 151890 }, { "epoch": 18.2791817087846, "grad_norm": 390.46575927734375, "learning_rate": 0.0001848255943126969, "loss": 7.349, "step": 151900 }, { "epoch": 18.280385078219012, "grad_norm": 474.7912902832031, "learning_rate": 0.00018482357956139416, "loss": 7.4784, "step": 151910 }, { "epoch": 18.28158844765343, "grad_norm": 497.7284240722656, "learning_rate": 0.00018482156468733077, "loss": 7.42, "step": 151920 }, { "epoch": 18.282791817087848, "grad_norm": 377.16796875, "learning_rate": 0.00018481954969050963, "loss": 7.3588, "step": 151930 }, { "epoch": 18.28399518652226, "grad_norm": 509.6619873046875, "learning_rate": 0.00018481753457093365, "loss": 7.4345, "step": 151940 }, { "epoch": 18.28519855595668, "grad_norm": 505.9047546386719, "learning_rate": 0.00018481551932860577, "loss": 7.3872, "step": 151950 }, { "epoch": 18.286401925391097, "grad_norm": 504.4636535644531, "learning_rate": 0.0001848135039635289, "loss": 7.3197, "step": 151960 }, { "epoch": 18.28760529482551, "grad_norm": 331.6162109375, "learning_rate": 0.0001848114884757059, "loss": 7.4205, "step": 151970 }, { "epoch": 18.28880866425993, "grad_norm": 499.6607971191406, "learning_rate": 0.0001848094728651398, "loss": 7.4117, "step": 151980 }, { "epoch": 18.290012033694346, "grad_norm": 602.674072265625, "learning_rate": 0.00018480745713183344, "loss": 7.3608, "step": 151990 }, { "epoch": 18.29121540312876, "grad_norm": 75.54895782470703, "learning_rate": 0.00018480544127578972, "loss": 7.4137, "step": 152000 }, { "epoch": 18.292418772563177, "grad_norm": 104.66126251220703, "learning_rate": 0.00018480342529701162, "loss": 7.3288, "step": 152010 }, { "epoch": 18.29362214199759, "grad_norm": 343.62664794921875, "learning_rate": 0.000184801409195502, "loss": 7.3711, "step": 152020 }, { "epoch": 18.29482551143201, "grad_norm": 523.1060791015625, "learning_rate": 0.00018479939297126382, "loss": 7.4172, "step": 152030 }, { "epoch": 18.296028880866427, "grad_norm": 326.9176025390625, "learning_rate": 0.0001847973766243, "loss": 7.4349, "step": 152040 }, { "epoch": 18.29723225030084, "grad_norm": 264.3764343261719, "learning_rate": 0.0001847953601546134, "loss": 7.3919, "step": 152050 }, { "epoch": 18.29843561973526, "grad_norm": 459.1297607421875, "learning_rate": 0.00018479334356220702, "loss": 7.3893, "step": 152060 }, { "epoch": 18.299638989169676, "grad_norm": 290.79315185546875, "learning_rate": 0.0001847913268470837, "loss": 7.415, "step": 152070 }, { "epoch": 18.30084235860409, "grad_norm": 597.1858520507812, "learning_rate": 0.00018478931000924642, "loss": 7.3745, "step": 152080 }, { "epoch": 18.302045728038507, "grad_norm": 850.1022338867188, "learning_rate": 0.00018478729304869805, "loss": 7.3653, "step": 152090 }, { "epoch": 18.303249097472925, "grad_norm": 237.01907348632812, "learning_rate": 0.00018478527596544158, "loss": 7.3974, "step": 152100 }, { "epoch": 18.30445246690734, "grad_norm": 964.3274536132812, "learning_rate": 0.00018478325875947985, "loss": 7.3103, "step": 152110 }, { "epoch": 18.305655836341757, "grad_norm": 365.0823974609375, "learning_rate": 0.00018478124143081581, "loss": 7.3759, "step": 152120 }, { "epoch": 18.306859205776174, "grad_norm": 540.3898315429688, "learning_rate": 0.0001847792239794524, "loss": 7.3724, "step": 152130 }, { "epoch": 18.308062575210588, "grad_norm": 206.74462890625, "learning_rate": 0.0001847772064053925, "loss": 7.3672, "step": 152140 }, { "epoch": 18.309265944645006, "grad_norm": 756.8995361328125, "learning_rate": 0.00018477518870863905, "loss": 7.4172, "step": 152150 }, { "epoch": 18.310469314079423, "grad_norm": 619.6227416992188, "learning_rate": 0.00018477317088919502, "loss": 7.3557, "step": 152160 }, { "epoch": 18.311672683513837, "grad_norm": 206.07252502441406, "learning_rate": 0.00018477115294706321, "loss": 7.3845, "step": 152170 }, { "epoch": 18.312876052948255, "grad_norm": 206.29991149902344, "learning_rate": 0.00018476913488224667, "loss": 7.408, "step": 152180 }, { "epoch": 18.314079422382672, "grad_norm": 166.0495147705078, "learning_rate": 0.00018476711669474823, "loss": 7.3012, "step": 152190 }, { "epoch": 18.315282791817086, "grad_norm": 161.13961791992188, "learning_rate": 0.00018476509838457085, "loss": 7.3856, "step": 152200 }, { "epoch": 18.316486161251504, "grad_norm": 127.53560638427734, "learning_rate": 0.00018476307995171745, "loss": 7.2802, "step": 152210 }, { "epoch": 18.31768953068592, "grad_norm": 232.245361328125, "learning_rate": 0.00018476106139619095, "loss": 7.3252, "step": 152220 }, { "epoch": 18.318892900120336, "grad_norm": 189.64749145507812, "learning_rate": 0.00018475904271799423, "loss": 7.2922, "step": 152230 }, { "epoch": 18.320096269554753, "grad_norm": 135.0006103515625, "learning_rate": 0.00018475702391713031, "loss": 7.332, "step": 152240 }, { "epoch": 18.32129963898917, "grad_norm": 372.1553649902344, "learning_rate": 0.00018475500499360204, "loss": 7.35, "step": 152250 }, { "epoch": 18.322503008423585, "grad_norm": 514.6927490234375, "learning_rate": 0.00018475298594741233, "loss": 7.4239, "step": 152260 }, { "epoch": 18.323706377858002, "grad_norm": 163.652587890625, "learning_rate": 0.00018475096677856412, "loss": 7.4147, "step": 152270 }, { "epoch": 18.32490974729242, "grad_norm": 248.01438903808594, "learning_rate": 0.00018474894748706034, "loss": 7.3285, "step": 152280 }, { "epoch": 18.326113116726834, "grad_norm": 275.4211120605469, "learning_rate": 0.0001847469280729039, "loss": 7.3398, "step": 152290 }, { "epoch": 18.32731648616125, "grad_norm": 271.49530029296875, "learning_rate": 0.00018474490853609776, "loss": 7.4121, "step": 152300 }, { "epoch": 18.32851985559567, "grad_norm": 275.52764892578125, "learning_rate": 0.0001847428888766448, "loss": 7.3025, "step": 152310 }, { "epoch": 18.329723225030083, "grad_norm": 815.0440673828125, "learning_rate": 0.00018474086909454795, "loss": 7.3067, "step": 152320 }, { "epoch": 18.3309265944645, "grad_norm": 557.1624755859375, "learning_rate": 0.00018473884918981015, "loss": 7.4209, "step": 152330 }, { "epoch": 18.332129963898918, "grad_norm": 283.4029541015625, "learning_rate": 0.00018473682916243434, "loss": 7.3618, "step": 152340 }, { "epoch": 18.333333333333332, "grad_norm": 284.2005615234375, "learning_rate": 0.00018473480901242336, "loss": 7.4374, "step": 152350 }, { "epoch": 18.33453670276775, "grad_norm": 181.1649169921875, "learning_rate": 0.00018473278873978023, "loss": 7.4721, "step": 152360 }, { "epoch": 18.335740072202167, "grad_norm": 380.21990966796875, "learning_rate": 0.0001847307683445078, "loss": 7.3412, "step": 152370 }, { "epoch": 18.33694344163658, "grad_norm": 96.97758483886719, "learning_rate": 0.0001847287478266091, "loss": 7.3578, "step": 152380 }, { "epoch": 18.338146811071, "grad_norm": 197.025390625, "learning_rate": 0.00018472672718608692, "loss": 7.3366, "step": 152390 }, { "epoch": 18.339350180505416, "grad_norm": 153.88284301757812, "learning_rate": 0.00018472470642294427, "loss": 7.4359, "step": 152400 }, { "epoch": 18.34055354993983, "grad_norm": 74.3664321899414, "learning_rate": 0.00018472268553718405, "loss": 7.3939, "step": 152410 }, { "epoch": 18.341756919374248, "grad_norm": 118.60506439208984, "learning_rate": 0.00018472066452880918, "loss": 7.3732, "step": 152420 }, { "epoch": 18.342960288808666, "grad_norm": 351.7347412109375, "learning_rate": 0.0001847186433978226, "loss": 7.3547, "step": 152430 }, { "epoch": 18.34416365824308, "grad_norm": 98.69527435302734, "learning_rate": 0.00018471662214422722, "loss": 7.4385, "step": 152440 }, { "epoch": 18.345367027677497, "grad_norm": 116.80021667480469, "learning_rate": 0.00018471460076802595, "loss": 7.3886, "step": 152450 }, { "epoch": 18.346570397111915, "grad_norm": 311.6566162109375, "learning_rate": 0.0001847125792692218, "loss": 7.3342, "step": 152460 }, { "epoch": 18.34777376654633, "grad_norm": 245.01870727539062, "learning_rate": 0.00018471055764781758, "loss": 7.446, "step": 152470 }, { "epoch": 18.348977135980746, "grad_norm": 699.9287719726562, "learning_rate": 0.00018470853590381626, "loss": 7.3242, "step": 152480 }, { "epoch": 18.350180505415164, "grad_norm": 325.8674621582031, "learning_rate": 0.0001847065140372208, "loss": 7.262, "step": 152490 }, { "epoch": 18.351383874849578, "grad_norm": 309.1668701171875, "learning_rate": 0.0001847044920480341, "loss": 7.4257, "step": 152500 }, { "epoch": 18.352587244283995, "grad_norm": 352.16363525390625, "learning_rate": 0.0001847024699362591, "loss": 7.3365, "step": 152510 }, { "epoch": 18.353790613718413, "grad_norm": 669.510498046875, "learning_rate": 0.00018470044770189868, "loss": 7.2026, "step": 152520 }, { "epoch": 18.354993983152827, "grad_norm": 6747.38037109375, "learning_rate": 0.00018469842534495583, "loss": 7.2472, "step": 152530 }, { "epoch": 18.356197352587245, "grad_norm": 752.1255493164062, "learning_rate": 0.00018469640286543343, "loss": 7.3093, "step": 152540 }, { "epoch": 18.357400722021662, "grad_norm": 637.0225219726562, "learning_rate": 0.00018469438026333444, "loss": 7.3485, "step": 152550 }, { "epoch": 18.358604091456076, "grad_norm": 1597.137939453125, "learning_rate": 0.00018469235753866176, "loss": 7.3601, "step": 152560 }, { "epoch": 18.359807460890494, "grad_norm": 805.3880004882812, "learning_rate": 0.00018469033469141835, "loss": 7.4051, "step": 152570 }, { "epoch": 18.36101083032491, "grad_norm": 409.3124694824219, "learning_rate": 0.00018468831172160707, "loss": 7.2234, "step": 152580 }, { "epoch": 18.362214199759325, "grad_norm": 406.9848327636719, "learning_rate": 0.00018468628862923094, "loss": 7.3235, "step": 152590 }, { "epoch": 18.363417569193743, "grad_norm": 1156.8260498046875, "learning_rate": 0.00018468426541429282, "loss": 7.2933, "step": 152600 }, { "epoch": 18.36462093862816, "grad_norm": 831.1348266601562, "learning_rate": 0.00018468224207679565, "loss": 7.3392, "step": 152610 }, { "epoch": 18.365824308062574, "grad_norm": 961.0281372070312, "learning_rate": 0.0001846802186167424, "loss": 7.3274, "step": 152620 }, { "epoch": 18.367027677496992, "grad_norm": 15979.4677734375, "learning_rate": 0.00018467819503413595, "loss": 7.423, "step": 152630 }, { "epoch": 18.36823104693141, "grad_norm": 4876.44580078125, "learning_rate": 0.00018467617132897926, "loss": 7.399, "step": 152640 }, { "epoch": 18.369434416365824, "grad_norm": 1651.61083984375, "learning_rate": 0.0001846741475012752, "loss": 7.4468, "step": 152650 }, { "epoch": 18.37063778580024, "grad_norm": 1993.109375, "learning_rate": 0.0001846721235510268, "loss": 7.415, "step": 152660 }, { "epoch": 18.37184115523466, "grad_norm": 1994.6533203125, "learning_rate": 0.00018467009947823688, "loss": 7.4098, "step": 152670 }, { "epoch": 18.373044524669073, "grad_norm": 1205.272216796875, "learning_rate": 0.00018466807528290846, "loss": 7.3834, "step": 152680 }, { "epoch": 18.37424789410349, "grad_norm": 813.9103393554688, "learning_rate": 0.0001846660509650444, "loss": 7.3982, "step": 152690 }, { "epoch": 18.375451263537904, "grad_norm": 276.84539794921875, "learning_rate": 0.00018466402652464768, "loss": 7.2824, "step": 152700 }, { "epoch": 18.376654632972322, "grad_norm": 239.27444458007812, "learning_rate": 0.00018466200196172122, "loss": 7.3645, "step": 152710 }, { "epoch": 18.37785800240674, "grad_norm": 449.86865234375, "learning_rate": 0.0001846599772762679, "loss": 7.2669, "step": 152720 }, { "epoch": 18.379061371841154, "grad_norm": 473.189453125, "learning_rate": 0.00018465795246829075, "loss": 7.3682, "step": 152730 }, { "epoch": 18.38026474127557, "grad_norm": 981.4517211914062, "learning_rate": 0.0001846559275377926, "loss": 7.4094, "step": 152740 }, { "epoch": 18.38146811070999, "grad_norm": 671.7831420898438, "learning_rate": 0.00018465390248477643, "loss": 7.275, "step": 152750 }, { "epoch": 18.382671480144403, "grad_norm": 440.99591064453125, "learning_rate": 0.00018465187730924517, "loss": 7.3697, "step": 152760 }, { "epoch": 18.38387484957882, "grad_norm": 352.1147155761719, "learning_rate": 0.00018464985201120174, "loss": 7.2749, "step": 152770 }, { "epoch": 18.385078219013238, "grad_norm": 406.75775146484375, "learning_rate": 0.0001846478265906491, "loss": 7.4258, "step": 152780 }, { "epoch": 18.386281588447652, "grad_norm": 633.501220703125, "learning_rate": 0.0001846458010475901, "loss": 7.4049, "step": 152790 }, { "epoch": 18.38748495788207, "grad_norm": 458.1518249511719, "learning_rate": 0.00018464377538202778, "loss": 7.415, "step": 152800 }, { "epoch": 18.388688327316487, "grad_norm": 672.3319091796875, "learning_rate": 0.00018464174959396498, "loss": 7.4147, "step": 152810 }, { "epoch": 18.3898916967509, "grad_norm": 273.4700927734375, "learning_rate": 0.00018463972368340468, "loss": 7.273, "step": 152820 }, { "epoch": 18.39109506618532, "grad_norm": 239.07583618164062, "learning_rate": 0.00018463769765034982, "loss": 7.3782, "step": 152830 }, { "epoch": 18.392298435619736, "grad_norm": 638.8527221679688, "learning_rate": 0.0001846356714948033, "loss": 7.2381, "step": 152840 }, { "epoch": 18.39350180505415, "grad_norm": 776.33447265625, "learning_rate": 0.00018463364521676808, "loss": 7.4281, "step": 152850 }, { "epoch": 18.394705174488568, "grad_norm": 343.4711608886719, "learning_rate": 0.0001846316188162471, "loss": 7.3826, "step": 152860 }, { "epoch": 18.395908543922985, "grad_norm": 725.744873046875, "learning_rate": 0.00018462959229324324, "loss": 7.4091, "step": 152870 }, { "epoch": 18.3971119133574, "grad_norm": 1224.29443359375, "learning_rate": 0.00018462756564775946, "loss": 7.4163, "step": 152880 }, { "epoch": 18.398315282791817, "grad_norm": 503.4244384765625, "learning_rate": 0.0001846255388797987, "loss": 7.2828, "step": 152890 }, { "epoch": 18.399518652226234, "grad_norm": 343.1127014160156, "learning_rate": 0.0001846235119893639, "loss": 7.2975, "step": 152900 }, { "epoch": 18.40072202166065, "grad_norm": 399.8078308105469, "learning_rate": 0.000184621484976458, "loss": 7.336, "step": 152910 }, { "epoch": 18.401925391095066, "grad_norm": 283.1817321777344, "learning_rate": 0.0001846194578410839, "loss": 7.2593, "step": 152920 }, { "epoch": 18.403128760529484, "grad_norm": 149.43238830566406, "learning_rate": 0.00018461743058324455, "loss": 7.2923, "step": 152930 }, { "epoch": 18.404332129963898, "grad_norm": 882.1889038085938, "learning_rate": 0.0001846154032029429, "loss": 7.3169, "step": 152940 }, { "epoch": 18.405535499398315, "grad_norm": 232.15188598632812, "learning_rate": 0.00018461337570018188, "loss": 7.4182, "step": 152950 }, { "epoch": 18.406738868832733, "grad_norm": 810.0722045898438, "learning_rate": 0.0001846113480749644, "loss": 7.43, "step": 152960 }, { "epoch": 18.407942238267147, "grad_norm": 460.2532043457031, "learning_rate": 0.0001846093203272934, "loss": 7.4534, "step": 152970 }, { "epoch": 18.409145607701564, "grad_norm": 652.58154296875, "learning_rate": 0.00018460729245717183, "loss": 7.3855, "step": 152980 }, { "epoch": 18.410348977135982, "grad_norm": 340.3279113769531, "learning_rate": 0.00018460526446460263, "loss": 7.4736, "step": 152990 }, { "epoch": 18.411552346570396, "grad_norm": 878.8419189453125, "learning_rate": 0.00018460323634958873, "loss": 7.4742, "step": 153000 }, { "epoch": 18.412755716004813, "grad_norm": 810.9647216796875, "learning_rate": 0.00018460120811213305, "loss": 7.3896, "step": 153010 }, { "epoch": 18.41395908543923, "grad_norm": 1688.00244140625, "learning_rate": 0.00018459917975223854, "loss": 7.3993, "step": 153020 }, { "epoch": 18.415162454873645, "grad_norm": 430.4827575683594, "learning_rate": 0.0001845971512699081, "loss": 7.4924, "step": 153030 }, { "epoch": 18.416365824308063, "grad_norm": 85.9831771850586, "learning_rate": 0.00018459512266514474, "loss": 7.5139, "step": 153040 }, { "epoch": 18.41756919374248, "grad_norm": 51.568939208984375, "learning_rate": 0.00018459309393795132, "loss": 7.481, "step": 153050 }, { "epoch": 18.418772563176894, "grad_norm": 54.81188201904297, "learning_rate": 0.0001845910650883308, "loss": 7.5253, "step": 153060 }, { "epoch": 18.41997593261131, "grad_norm": 74.08306884765625, "learning_rate": 0.00018458903611628615, "loss": 7.5278, "step": 153070 }, { "epoch": 18.42117930204573, "grad_norm": 147.89413452148438, "learning_rate": 0.00018458700702182027, "loss": 7.5596, "step": 153080 }, { "epoch": 18.422382671480143, "grad_norm": 692.1793823242188, "learning_rate": 0.00018458497780493612, "loss": 7.5143, "step": 153090 }, { "epoch": 18.42358604091456, "grad_norm": 40.994956970214844, "learning_rate": 0.0001845829484656366, "loss": 7.5969, "step": 153100 }, { "epoch": 18.42478941034898, "grad_norm": 9.625137329101562, "learning_rate": 0.00018458091900392466, "loss": 7.7019, "step": 153110 }, { "epoch": 18.425992779783392, "grad_norm": 12.490511894226074, "learning_rate": 0.0001845788894198033, "loss": 7.6813, "step": 153120 }, { "epoch": 18.42719614921781, "grad_norm": 8.232507705688477, "learning_rate": 0.00018457685971327536, "loss": 7.5214, "step": 153130 }, { "epoch": 18.428399518652228, "grad_norm": 34.846214294433594, "learning_rate": 0.00018457482988434385, "loss": 7.4904, "step": 153140 }, { "epoch": 18.42960288808664, "grad_norm": 413.77508544921875, "learning_rate": 0.00018457279993301166, "loss": 7.5764, "step": 153150 }, { "epoch": 18.43080625752106, "grad_norm": 290.9382629394531, "learning_rate": 0.00018457076985928177, "loss": 7.7764, "step": 153160 }, { "epoch": 18.432009626955477, "grad_norm": 1067.5765380859375, "learning_rate": 0.00018456873966315708, "loss": 7.7062, "step": 153170 }, { "epoch": 18.43321299638989, "grad_norm": 1371.8919677734375, "learning_rate": 0.00018456670934464056, "loss": 7.6137, "step": 153180 }, { "epoch": 18.43441636582431, "grad_norm": 526.41357421875, "learning_rate": 0.0001845646789037351, "loss": 7.5463, "step": 153190 }, { "epoch": 18.435619735258726, "grad_norm": 35714.6171875, "learning_rate": 0.0001845626483404437, "loss": 7.6021, "step": 153200 }, { "epoch": 18.43682310469314, "grad_norm": 107209.7421875, "learning_rate": 0.00018456061765476927, "loss": 7.9262, "step": 153210 }, { "epoch": 18.438026474127557, "grad_norm": 290236160.0, "learning_rate": 0.00018455858684671475, "loss": 8.4073, "step": 153220 }, { "epoch": 18.439229843561975, "grad_norm": 49486820.0, "learning_rate": 0.00018455655591628306, "loss": 8.7149, "step": 153230 }, { "epoch": 18.44043321299639, "grad_norm": 354431008.0, "learning_rate": 0.00018455452486347714, "loss": 9.1406, "step": 153240 }, { "epoch": 18.441636582430807, "grad_norm": 952564928.0, "learning_rate": 0.00018455249368829998, "loss": 9.4997, "step": 153250 }, { "epoch": 18.442839951865224, "grad_norm": 343560960.0, "learning_rate": 0.0001845504623907545, "loss": 9.6089, "step": 153260 }, { "epoch": 18.444043321299638, "grad_norm": 907001152.0, "learning_rate": 0.0001845484309708436, "loss": 9.5405, "step": 153270 }, { "epoch": 18.445246690734056, "grad_norm": 1099533184.0, "learning_rate": 0.00018454639942857025, "loss": 9.7289, "step": 153280 }, { "epoch": 18.446450060168473, "grad_norm": 401878048.0, "learning_rate": 0.00018454436776393739, "loss": 9.5961, "step": 153290 }, { "epoch": 18.447653429602887, "grad_norm": 393140160.0, "learning_rate": 0.00018454233597694795, "loss": 9.6738, "step": 153300 }, { "epoch": 18.448856799037305, "grad_norm": 110440016.0, "learning_rate": 0.00018454030406760488, "loss": 9.7425, "step": 153310 }, { "epoch": 18.450060168471722, "grad_norm": 227057136.0, "learning_rate": 0.0001845382720359111, "loss": 9.9498, "step": 153320 }, { "epoch": 18.451263537906136, "grad_norm": 189533648.0, "learning_rate": 0.0001845362398818696, "loss": 10.2202, "step": 153330 }, { "epoch": 18.452466907340554, "grad_norm": 144564608.0, "learning_rate": 0.00018453420760548327, "loss": 10.5441, "step": 153340 }, { "epoch": 18.45367027677497, "grad_norm": 158687680.0, "learning_rate": 0.00018453217520675508, "loss": 10.4903, "step": 153350 }, { "epoch": 18.454873646209386, "grad_norm": 84126192.0, "learning_rate": 0.00018453014268568797, "loss": 10.4288, "step": 153360 }, { "epoch": 18.456077015643803, "grad_norm": 347420544.0, "learning_rate": 0.00018452811004228485, "loss": 10.4237, "step": 153370 }, { "epoch": 18.45728038507822, "grad_norm": 705729536.0, "learning_rate": 0.0001845260772765487, "loss": 10.4202, "step": 153380 }, { "epoch": 18.458483754512635, "grad_norm": 447884192.0, "learning_rate": 0.00018452404438848246, "loss": 10.3371, "step": 153390 }, { "epoch": 18.459687123947052, "grad_norm": 319394624.0, "learning_rate": 0.00018452201137808904, "loss": 10.5163, "step": 153400 }, { "epoch": 18.460890493381466, "grad_norm": 307767616.0, "learning_rate": 0.00018451997824537139, "loss": 10.3893, "step": 153410 }, { "epoch": 18.462093862815884, "grad_norm": 18004549632.0, "learning_rate": 0.00018451794499033247, "loss": 10.6187, "step": 153420 }, { "epoch": 18.4632972322503, "grad_norm": 2387283456.0, "learning_rate": 0.00018451591161297524, "loss": 10.5182, "step": 153430 }, { "epoch": 18.464500601684716, "grad_norm": 256399568.0, "learning_rate": 0.00018451387811330264, "loss": 10.6427, "step": 153440 }, { "epoch": 18.465703971119133, "grad_norm": 226842784.0, "learning_rate": 0.00018451184449131756, "loss": 10.4203, "step": 153450 }, { "epoch": 18.46690734055355, "grad_norm": 15965370.0, "learning_rate": 0.00018450981074702295, "loss": 10.6075, "step": 153460 }, { "epoch": 18.468110709987965, "grad_norm": 38252532.0, "learning_rate": 0.0001845077768804218, "loss": 10.7387, "step": 153470 }, { "epoch": 18.469314079422382, "grad_norm": 34838764.0, "learning_rate": 0.00018450574289151706, "loss": 10.9605, "step": 153480 }, { "epoch": 18.4705174488568, "grad_norm": 62735660.0, "learning_rate": 0.0001845037087803116, "loss": 10.692, "step": 153490 }, { "epoch": 18.471720818291214, "grad_norm": 30423376.0, "learning_rate": 0.00018450167454680844, "loss": 10.3002, "step": 153500 }, { "epoch": 18.47292418772563, "grad_norm": 1632257.75, "learning_rate": 0.0001844996401910105, "loss": 10.3527, "step": 153510 }, { "epoch": 18.47412755716005, "grad_norm": 277744.25, "learning_rate": 0.00018449760571292073, "loss": 10.359, "step": 153520 }, { "epoch": 18.475330926594463, "grad_norm": 1733779.25, "learning_rate": 0.000184495571112542, "loss": 10.1695, "step": 153530 }, { "epoch": 18.47653429602888, "grad_norm": 903284.5, "learning_rate": 0.00018449353638987738, "loss": 9.9837, "step": 153540 }, { "epoch": 18.477737665463298, "grad_norm": 1033326.5, "learning_rate": 0.00018449150154492973, "loss": 10.4487, "step": 153550 }, { "epoch": 18.478941034897712, "grad_norm": 686108.9375, "learning_rate": 0.00018448946657770199, "loss": 10.8624, "step": 153560 }, { "epoch": 18.48014440433213, "grad_norm": 272341.71875, "learning_rate": 0.00018448743148819717, "loss": 10.9326, "step": 153570 }, { "epoch": 18.481347773766547, "grad_norm": 7521502232576.0, "learning_rate": 0.00018448539627641817, "loss": 10.9476, "step": 153580 }, { "epoch": 18.48255114320096, "grad_norm": 5581615661056.0, "learning_rate": 0.00018448336094236792, "loss": 10.5312, "step": 153590 }, { "epoch": 18.48375451263538, "grad_norm": 35045565792256.0, "learning_rate": 0.00018448132548604942, "loss": 10.2854, "step": 153600 }, { "epoch": 18.484957882069796, "grad_norm": 4497267490816.0, "learning_rate": 0.00018447928990746558, "loss": 10.3303, "step": 153610 }, { "epoch": 18.48616125150421, "grad_norm": 19079746289664.0, "learning_rate": 0.00018447725420661933, "loss": 10.5231, "step": 153620 }, { "epoch": 18.487364620938628, "grad_norm": 16816830152704.0, "learning_rate": 0.00018447521838351364, "loss": 10.6547, "step": 153630 }, { "epoch": 18.488567990373046, "grad_norm": 34880473792512.0, "learning_rate": 0.00018447318243815147, "loss": 10.7887, "step": 153640 }, { "epoch": 18.48977135980746, "grad_norm": 25699289088.0, "learning_rate": 0.00018447114637053572, "loss": 10.8947, "step": 153650 }, { "epoch": 18.490974729241877, "grad_norm": 49615843328.0, "learning_rate": 0.0001844691101806694, "loss": 10.9466, "step": 153660 }, { "epoch": 18.492178098676295, "grad_norm": 63240937472.0, "learning_rate": 0.0001844670738685554, "loss": 11.0833, "step": 153670 }, { "epoch": 18.49338146811071, "grad_norm": Infinity, "learning_rate": 0.0001844650374341967, "loss": 11.1395, "step": 153680 }, { "epoch": 18.494584837545126, "grad_norm": Infinity, "learning_rate": 0.00018446300087759623, "loss": 11.1156, "step": 153690 }, { "epoch": 18.495788206979544, "grad_norm": Infinity, "learning_rate": 0.00018446096419875695, "loss": 11.1737, "step": 153700 }, { "epoch": 18.496991576413958, "grad_norm": Infinity, "learning_rate": 0.00018445892739768182, "loss": 11.1832, "step": 153710 }, { "epoch": 18.498194945848375, "grad_norm": Infinity, "learning_rate": 0.00018445689047437376, "loss": 11.184, "step": 153720 }, { "epoch": 18.499398315282793, "grad_norm": Infinity, "learning_rate": 0.0001844548534288357, "loss": 11.2043, "step": 153730 }, { "epoch": 18.500601684717207, "grad_norm": Infinity, "learning_rate": 0.00018445281626107063, "loss": 11.0292, "step": 153740 }, { "epoch": 18.501805054151625, "grad_norm": Infinity, "learning_rate": 0.0001844507789710815, "loss": 11.0998, "step": 153750 }, { "epoch": 18.503008423586042, "grad_norm": Infinity, "learning_rate": 0.00018444874155887123, "loss": 11.1909, "step": 153760 }, { "epoch": 18.504211793020456, "grad_norm": Infinity, "learning_rate": 0.00018444670402444278, "loss": 11.1862, "step": 153770 }, { "epoch": 18.505415162454874, "grad_norm": Infinity, "learning_rate": 0.0001844446663677991, "loss": 11.2138, "step": 153780 }, { "epoch": 18.50661853188929, "grad_norm": Infinity, "learning_rate": 0.00018444262858894312, "loss": 11.1951, "step": 153790 }, { "epoch": 18.507821901323705, "grad_norm": Infinity, "learning_rate": 0.00018444059068787786, "loss": 11.0419, "step": 153800 }, { "epoch": 18.509025270758123, "grad_norm": Infinity, "learning_rate": 0.0001844385526646062, "loss": 11.0918, "step": 153810 }, { "epoch": 18.51022864019254, "grad_norm": Infinity, "learning_rate": 0.00018443651451913108, "loss": 11.1833, "step": 153820 }, { "epoch": 18.511432009626954, "grad_norm": Infinity, "learning_rate": 0.00018443447625145548, "loss": 11.2276, "step": 153830 }, { "epoch": 18.512635379061372, "grad_norm": Infinity, "learning_rate": 0.00018443243786158236, "loss": 11.2327, "step": 153840 }, { "epoch": 18.51383874849579, "grad_norm": Infinity, "learning_rate": 0.00018443039934951466, "loss": 11.0014, "step": 153850 }, { "epoch": 18.515042117930204, "grad_norm": Infinity, "learning_rate": 0.00018442836071525532, "loss": 11.2101, "step": 153860 }, { "epoch": 18.51624548736462, "grad_norm": Infinity, "learning_rate": 0.00018442632195880728, "loss": 11.1144, "step": 153870 }, { "epoch": 18.51744885679904, "grad_norm": Infinity, "learning_rate": 0.00018442428308017354, "loss": 11.1024, "step": 153880 }, { "epoch": 18.518652226233453, "grad_norm": Infinity, "learning_rate": 0.000184422244079357, "loss": 11.1752, "step": 153890 }, { "epoch": 18.51985559566787, "grad_norm": Infinity, "learning_rate": 0.00018442020495636067, "loss": 11.108, "step": 153900 }, { "epoch": 18.521058965102288, "grad_norm": Infinity, "learning_rate": 0.00018441816571118742, "loss": 11.1124, "step": 153910 }, { "epoch": 18.522262334536702, "grad_norm": Infinity, "learning_rate": 0.00018441612634384025, "loss": 11.1661, "step": 153920 }, { "epoch": 18.52346570397112, "grad_norm": Infinity, "learning_rate": 0.0001844140868543221, "loss": 11.1174, "step": 153930 }, { "epoch": 18.524669073405537, "grad_norm": Infinity, "learning_rate": 0.00018441204724263592, "loss": 11.0932, "step": 153940 }, { "epoch": 18.52587244283995, "grad_norm": Infinity, "learning_rate": 0.00018441000750878467, "loss": 11.1473, "step": 153950 }, { "epoch": 18.52707581227437, "grad_norm": Infinity, "learning_rate": 0.0001844079676527713, "loss": 11.1022, "step": 153960 }, { "epoch": 18.528279181708786, "grad_norm": Infinity, "learning_rate": 0.00018440592767459877, "loss": 11.0514, "step": 153970 }, { "epoch": 18.5294825511432, "grad_norm": Infinity, "learning_rate": 0.00018440388757427, "loss": 11.1552, "step": 153980 }, { "epoch": 18.530685920577618, "grad_norm": Infinity, "learning_rate": 0.000184401847351788, "loss": 11.0548, "step": 153990 }, { "epoch": 18.531889290012035, "grad_norm": Infinity, "learning_rate": 0.00018439980700715568, "loss": 11.1378, "step": 154000 }, { "epoch": 18.53309265944645, "grad_norm": Infinity, "learning_rate": 0.000184397766540376, "loss": 11.1906, "step": 154010 }, { "epoch": 18.534296028880867, "grad_norm": Infinity, "learning_rate": 0.0001843957259514519, "loss": 11.0453, "step": 154020 }, { "epoch": 18.535499398315284, "grad_norm": Infinity, "learning_rate": 0.00018439368524038633, "loss": 11.2196, "step": 154030 }, { "epoch": 18.5367027677497, "grad_norm": Infinity, "learning_rate": 0.0001843916444071823, "loss": 11.2259, "step": 154040 }, { "epoch": 18.537906137184116, "grad_norm": Infinity, "learning_rate": 0.00018438960345184272, "loss": 11.239, "step": 154050 }, { "epoch": 18.53910950661853, "grad_norm": Infinity, "learning_rate": 0.00018438756237437054, "loss": 11.0352, "step": 154060 }, { "epoch": 18.540312876052948, "grad_norm": Infinity, "learning_rate": 0.00018438552117476872, "loss": 11.2338, "step": 154070 }, { "epoch": 18.541516245487365, "grad_norm": Infinity, "learning_rate": 0.00018438347985304024, "loss": 11.1626, "step": 154080 }, { "epoch": 18.54271961492178, "grad_norm": Infinity, "learning_rate": 0.00018438143840918798, "loss": 11.1056, "step": 154090 }, { "epoch": 18.543922984356197, "grad_norm": Infinity, "learning_rate": 0.00018437939684321498, "loss": 11.297, "step": 154100 }, { "epoch": 18.545126353790614, "grad_norm": Infinity, "learning_rate": 0.00018437735515512417, "loss": 11.2027, "step": 154110 }, { "epoch": 18.54632972322503, "grad_norm": Infinity, "learning_rate": 0.00018437531334491848, "loss": 11.1862, "step": 154120 }, { "epoch": 18.547533092659446, "grad_norm": Infinity, "learning_rate": 0.00018437327141260088, "loss": 11.2691, "step": 154130 }, { "epoch": 18.548736462093864, "grad_norm": Infinity, "learning_rate": 0.00018437122935817434, "loss": 11.0923, "step": 154140 }, { "epoch": 18.549939831528278, "grad_norm": Infinity, "learning_rate": 0.0001843691871816418, "loss": 11.1205, "step": 154150 }, { "epoch": 18.551143200962695, "grad_norm": Infinity, "learning_rate": 0.0001843671448830062, "loss": 11.164, "step": 154160 }, { "epoch": 18.552346570397113, "grad_norm": Infinity, "learning_rate": 0.0001843651024622705, "loss": 11.1202, "step": 154170 }, { "epoch": 18.553549939831527, "grad_norm": Infinity, "learning_rate": 0.0001843630599194377, "loss": 11.1886, "step": 154180 }, { "epoch": 18.554753309265944, "grad_norm": Infinity, "learning_rate": 0.0001843610172545107, "loss": 11.1563, "step": 154190 }, { "epoch": 18.555956678700362, "grad_norm": Infinity, "learning_rate": 0.00018435897446749248, "loss": 11.1706, "step": 154200 }, { "epoch": 18.557160048134776, "grad_norm": Infinity, "learning_rate": 0.000184356931558386, "loss": 11.0745, "step": 154210 }, { "epoch": 18.558363417569193, "grad_norm": Infinity, "learning_rate": 0.00018435488852719422, "loss": 11.1546, "step": 154220 }, { "epoch": 18.55956678700361, "grad_norm": Infinity, "learning_rate": 0.0001843528453739201, "loss": 11.2963, "step": 154230 }, { "epoch": 18.560770156438025, "grad_norm": Infinity, "learning_rate": 0.00018435080209856656, "loss": 11.1517, "step": 154240 }, { "epoch": 18.561973525872443, "grad_norm": Infinity, "learning_rate": 0.0001843487587011366, "loss": 11.1994, "step": 154250 }, { "epoch": 18.56317689530686, "grad_norm": Infinity, "learning_rate": 0.00018434671518163315, "loss": 11.0373, "step": 154260 }, { "epoch": 18.564380264741274, "grad_norm": Infinity, "learning_rate": 0.00018434467154005918, "loss": 11.1424, "step": 154270 }, { "epoch": 18.56558363417569, "grad_norm": Infinity, "learning_rate": 0.00018434262777641767, "loss": 11.2854, "step": 154280 }, { "epoch": 18.56678700361011, "grad_norm": Infinity, "learning_rate": 0.00018434058389071154, "loss": 11.1538, "step": 154290 }, { "epoch": 18.567990373044523, "grad_norm": Infinity, "learning_rate": 0.00018433853988294375, "loss": 11.1253, "step": 154300 }, { "epoch": 18.56919374247894, "grad_norm": Infinity, "learning_rate": 0.0001843364957531173, "loss": 11.1735, "step": 154310 }, { "epoch": 18.57039711191336, "grad_norm": Infinity, "learning_rate": 0.00018433445150123508, "loss": 11.0829, "step": 154320 }, { "epoch": 18.571600481347772, "grad_norm": Infinity, "learning_rate": 0.0001843324071273001, "loss": 11.2586, "step": 154330 }, { "epoch": 18.57280385078219, "grad_norm": Infinity, "learning_rate": 0.00018433036263131532, "loss": 11.1248, "step": 154340 }, { "epoch": 18.574007220216608, "grad_norm": Infinity, "learning_rate": 0.00018432831801328366, "loss": 11.111, "step": 154350 }, { "epoch": 18.57521058965102, "grad_norm": Infinity, "learning_rate": 0.00018432627327320816, "loss": 11.0498, "step": 154360 }, { "epoch": 18.57641395908544, "grad_norm": Infinity, "learning_rate": 0.00018432422841109167, "loss": 11.2345, "step": 154370 }, { "epoch": 18.577617328519857, "grad_norm": Infinity, "learning_rate": 0.00018432218342693721, "loss": 11.1254, "step": 154380 }, { "epoch": 18.57882069795427, "grad_norm": Infinity, "learning_rate": 0.00018432013832074776, "loss": 11.2464, "step": 154390 }, { "epoch": 18.58002406738869, "grad_norm": Infinity, "learning_rate": 0.00018431809309252622, "loss": 11.1221, "step": 154400 }, { "epoch": 18.581227436823106, "grad_norm": Infinity, "learning_rate": 0.0001843160477422756, "loss": 11.194, "step": 154410 }, { "epoch": 18.58243080625752, "grad_norm": Infinity, "learning_rate": 0.00018431400226999882, "loss": 11.2833, "step": 154420 }, { "epoch": 18.583634175691937, "grad_norm": Infinity, "learning_rate": 0.00018431195667569888, "loss": 11.1216, "step": 154430 }, { "epoch": 18.584837545126355, "grad_norm": Infinity, "learning_rate": 0.00018430991095937872, "loss": 11.0644, "step": 154440 }, { "epoch": 18.58604091456077, "grad_norm": Infinity, "learning_rate": 0.00018430786512104127, "loss": 11.1347, "step": 154450 }, { "epoch": 18.587244283995187, "grad_norm": Infinity, "learning_rate": 0.0001843058191606896, "loss": 11.1396, "step": 154460 }, { "epoch": 18.588447653429604, "grad_norm": Infinity, "learning_rate": 0.00018430377307832653, "loss": 11.1106, "step": 154470 }, { "epoch": 18.589651022864018, "grad_norm": Infinity, "learning_rate": 0.0001843017268739551, "loss": 11.1095, "step": 154480 }, { "epoch": 18.590854392298436, "grad_norm": Infinity, "learning_rate": 0.00018429968054757827, "loss": 11.1176, "step": 154490 }, { "epoch": 18.592057761732853, "grad_norm": Infinity, "learning_rate": 0.00018429763409919897, "loss": 11.0243, "step": 154500 }, { "epoch": 18.593261131167267, "grad_norm": Infinity, "learning_rate": 0.0001842955875288202, "loss": 11.0968, "step": 154510 }, { "epoch": 18.594464500601685, "grad_norm": Infinity, "learning_rate": 0.0001842935408364449, "loss": 11.139, "step": 154520 }, { "epoch": 18.595667870036102, "grad_norm": Infinity, "learning_rate": 0.00018429149402207604, "loss": 11.2446, "step": 154530 }, { "epoch": 18.596871239470516, "grad_norm": Infinity, "learning_rate": 0.00018428944708571656, "loss": 11.1045, "step": 154540 }, { "epoch": 18.598074608904934, "grad_norm": Infinity, "learning_rate": 0.00018428740002736946, "loss": 11.2959, "step": 154550 }, { "epoch": 18.59927797833935, "grad_norm": Infinity, "learning_rate": 0.00018428535284703768, "loss": 11.1471, "step": 154560 }, { "epoch": 18.600481347773766, "grad_norm": Infinity, "learning_rate": 0.00018428330554472416, "loss": 11.1321, "step": 154570 }, { "epoch": 18.601684717208183, "grad_norm": Infinity, "learning_rate": 0.00018428125812043194, "loss": 11.1653, "step": 154580 }, { "epoch": 18.6028880866426, "grad_norm": Infinity, "learning_rate": 0.00018427921057416388, "loss": 11.0967, "step": 154590 }, { "epoch": 18.604091456077015, "grad_norm": Infinity, "learning_rate": 0.00018427716290592302, "loss": 11.0111, "step": 154600 }, { "epoch": 18.605294825511432, "grad_norm": Infinity, "learning_rate": 0.00018427511511571228, "loss": 11.1689, "step": 154610 }, { "epoch": 18.60649819494585, "grad_norm": Infinity, "learning_rate": 0.00018427306720353466, "loss": 11.1187, "step": 154620 }, { "epoch": 18.607701564380264, "grad_norm": Infinity, "learning_rate": 0.00018427101916939308, "loss": 11.2486, "step": 154630 }, { "epoch": 18.60890493381468, "grad_norm": Infinity, "learning_rate": 0.00018426897101329054, "loss": 11.0974, "step": 154640 }, { "epoch": 18.6101083032491, "grad_norm": Infinity, "learning_rate": 0.00018426692273523, "loss": 11.228, "step": 154650 }, { "epoch": 18.611311672683513, "grad_norm": Infinity, "learning_rate": 0.00018426487433521445, "loss": 11.1379, "step": 154660 }, { "epoch": 18.61251504211793, "grad_norm": Infinity, "learning_rate": 0.00018426282581324678, "loss": 11.1701, "step": 154670 }, { "epoch": 18.613718411552348, "grad_norm": Infinity, "learning_rate": 0.00018426077716932998, "loss": 11.1545, "step": 154680 }, { "epoch": 18.614921780986762, "grad_norm": Infinity, "learning_rate": 0.00018425872840346707, "loss": 11.1638, "step": 154690 }, { "epoch": 18.61612515042118, "grad_norm": Infinity, "learning_rate": 0.00018425667951566096, "loss": 11.1699, "step": 154700 }, { "epoch": 18.617328519855597, "grad_norm": Infinity, "learning_rate": 0.00018425463050591464, "loss": 11.2327, "step": 154710 }, { "epoch": 18.61853188929001, "grad_norm": Infinity, "learning_rate": 0.00018425258137423108, "loss": 11.1129, "step": 154720 }, { "epoch": 18.61973525872443, "grad_norm": Infinity, "learning_rate": 0.0001842505321206132, "loss": 11.1449, "step": 154730 }, { "epoch": 18.620938628158846, "grad_norm": Infinity, "learning_rate": 0.00018424848274506403, "loss": 11.019, "step": 154740 }, { "epoch": 18.62214199759326, "grad_norm": Infinity, "learning_rate": 0.0001842464332475865, "loss": 11.1304, "step": 154750 }, { "epoch": 18.623345367027678, "grad_norm": Infinity, "learning_rate": 0.00018424438362818358, "loss": 11.1931, "step": 154760 }, { "epoch": 18.624548736462096, "grad_norm": Infinity, "learning_rate": 0.00018424233388685823, "loss": 11.0813, "step": 154770 }, { "epoch": 18.62575210589651, "grad_norm": Infinity, "learning_rate": 0.00018424028402361345, "loss": 11.1179, "step": 154780 }, { "epoch": 18.626955475330927, "grad_norm": Infinity, "learning_rate": 0.00018423823403845212, "loss": 11.2252, "step": 154790 }, { "epoch": 18.628158844765345, "grad_norm": Infinity, "learning_rate": 0.00018423618393137732, "loss": 11.2128, "step": 154800 }, { "epoch": 18.62936221419976, "grad_norm": Infinity, "learning_rate": 0.00018423413370239198, "loss": 11.1383, "step": 154810 }, { "epoch": 18.630565583634176, "grad_norm": Infinity, "learning_rate": 0.000184232083351499, "loss": 11.1558, "step": 154820 }, { "epoch": 18.63176895306859, "grad_norm": Infinity, "learning_rate": 0.00018423003287870143, "loss": 11.1106, "step": 154830 }, { "epoch": 18.632972322503008, "grad_norm": Infinity, "learning_rate": 0.0001842279822840022, "loss": 11.2044, "step": 154840 }, { "epoch": 18.634175691937426, "grad_norm": Infinity, "learning_rate": 0.0001842259315674043, "loss": 11.0732, "step": 154850 }, { "epoch": 18.63537906137184, "grad_norm": Infinity, "learning_rate": 0.00018422388072891063, "loss": 11.2463, "step": 154860 }, { "epoch": 18.636582430806257, "grad_norm": Infinity, "learning_rate": 0.00018422182976852427, "loss": 11.208, "step": 154870 }, { "epoch": 18.637785800240675, "grad_norm": Infinity, "learning_rate": 0.00018421977868624808, "loss": 11.0892, "step": 154880 }, { "epoch": 18.63898916967509, "grad_norm": Infinity, "learning_rate": 0.0001842177274820851, "loss": 11.0983, "step": 154890 }, { "epoch": 18.640192539109506, "grad_norm": Infinity, "learning_rate": 0.0001842156761560383, "loss": 11.0362, "step": 154900 }, { "epoch": 18.641395908543924, "grad_norm": Infinity, "learning_rate": 0.0001842136247081106, "loss": 11.2536, "step": 154910 }, { "epoch": 18.642599277978338, "grad_norm": Infinity, "learning_rate": 0.000184211573138305, "loss": 11.1086, "step": 154920 }, { "epoch": 18.643802647412755, "grad_norm": Infinity, "learning_rate": 0.00018420952144662444, "loss": 11.1801, "step": 154930 }, { "epoch": 18.645006016847173, "grad_norm": Infinity, "learning_rate": 0.0001842074696330719, "loss": 11.1866, "step": 154940 }, { "epoch": 18.646209386281587, "grad_norm": Infinity, "learning_rate": 0.00018420541769765042, "loss": 11.1017, "step": 154950 }, { "epoch": 18.647412755716005, "grad_norm": Infinity, "learning_rate": 0.00018420336564036287, "loss": 11.2785, "step": 154960 }, { "epoch": 18.648616125150422, "grad_norm": Infinity, "learning_rate": 0.0001842013134612123, "loss": 11.0446, "step": 154970 }, { "epoch": 18.649819494584836, "grad_norm": Infinity, "learning_rate": 0.0001841992611602016, "loss": 11.1732, "step": 154980 }, { "epoch": 18.651022864019254, "grad_norm": Infinity, "learning_rate": 0.0001841972087373338, "loss": 11.136, "step": 154990 }, { "epoch": 18.65222623345367, "grad_norm": Infinity, "learning_rate": 0.00018419515619261186, "loss": 11.0919, "step": 155000 }, { "epoch": 18.653429602888085, "grad_norm": Infinity, "learning_rate": 0.00018419310352603868, "loss": 11.1219, "step": 155010 }, { "epoch": 18.654632972322503, "grad_norm": Infinity, "learning_rate": 0.00018419105073761737, "loss": 11.1603, "step": 155020 }, { "epoch": 18.65583634175692, "grad_norm": Infinity, "learning_rate": 0.00018418899782735078, "loss": 11.1119, "step": 155030 }, { "epoch": 18.657039711191334, "grad_norm": Infinity, "learning_rate": 0.00018418694479524193, "loss": 11.2415, "step": 155040 }, { "epoch": 18.658243080625752, "grad_norm": Infinity, "learning_rate": 0.0001841848916412938, "loss": 11.1529, "step": 155050 }, { "epoch": 18.65944645006017, "grad_norm": Infinity, "learning_rate": 0.00018418283836550936, "loss": 11.0478, "step": 155060 }, { "epoch": 18.660649819494584, "grad_norm": Infinity, "learning_rate": 0.00018418078496789154, "loss": 11.1712, "step": 155070 }, { "epoch": 18.661853188929, "grad_norm": Infinity, "learning_rate": 0.00018417873144844333, "loss": 11.0886, "step": 155080 }, { "epoch": 18.66305655836342, "grad_norm": Infinity, "learning_rate": 0.00018417667780716777, "loss": 11.0901, "step": 155090 }, { "epoch": 18.664259927797833, "grad_norm": Infinity, "learning_rate": 0.00018417462404406773, "loss": 11.1937, "step": 155100 }, { "epoch": 18.66546329723225, "grad_norm": Infinity, "learning_rate": 0.0001841725701591462, "loss": 11.0931, "step": 155110 }, { "epoch": 18.666666666666668, "grad_norm": Infinity, "learning_rate": 0.00018417051615240624, "loss": 11.0515, "step": 155120 }, { "epoch": 18.667870036101082, "grad_norm": Infinity, "learning_rate": 0.00018416846202385073, "loss": 11.2758, "step": 155130 }, { "epoch": 18.6690734055355, "grad_norm": Infinity, "learning_rate": 0.0001841664077734827, "loss": 11.1772, "step": 155140 }, { "epoch": 18.670276774969917, "grad_norm": Infinity, "learning_rate": 0.00018416435340130508, "loss": 11.0956, "step": 155150 }, { "epoch": 18.67148014440433, "grad_norm": Infinity, "learning_rate": 0.00018416229890732083, "loss": 11.0273, "step": 155160 }, { "epoch": 18.67268351383875, "grad_norm": Infinity, "learning_rate": 0.000184160244291533, "loss": 11.207, "step": 155170 }, { "epoch": 18.673886883273166, "grad_norm": Infinity, "learning_rate": 0.0001841581895539445, "loss": 11.1246, "step": 155180 }, { "epoch": 18.67509025270758, "grad_norm": Infinity, "learning_rate": 0.0001841561346945583, "loss": 11.2164, "step": 155190 }, { "epoch": 18.676293622141998, "grad_norm": Infinity, "learning_rate": 0.00018415407971337743, "loss": 11.1303, "step": 155200 }, { "epoch": 18.677496991576415, "grad_norm": Infinity, "learning_rate": 0.00018415202461040484, "loss": 11.0508, "step": 155210 }, { "epoch": 18.67870036101083, "grad_norm": Infinity, "learning_rate": 0.00018414996938564347, "loss": 11.1942, "step": 155220 }, { "epoch": 18.679903730445247, "grad_norm": Infinity, "learning_rate": 0.0001841479140390963, "loss": 11.1507, "step": 155230 }, { "epoch": 18.681107099879664, "grad_norm": Infinity, "learning_rate": 0.00018414585857076635, "loss": 11.094, "step": 155240 }, { "epoch": 18.68231046931408, "grad_norm": Infinity, "learning_rate": 0.00018414380298065658, "loss": 11.3475, "step": 155250 }, { "epoch": 18.683513838748496, "grad_norm": Infinity, "learning_rate": 0.00018414174726876992, "loss": 11.2109, "step": 155260 }, { "epoch": 18.684717208182914, "grad_norm": Infinity, "learning_rate": 0.0001841396914351094, "loss": 11.328, "step": 155270 }, { "epoch": 18.685920577617328, "grad_norm": Infinity, "learning_rate": 0.00018413763547967796, "loss": 11.2317, "step": 155280 }, { "epoch": 18.687123947051745, "grad_norm": Infinity, "learning_rate": 0.00018413557940247862, "loss": 11.0034, "step": 155290 }, { "epoch": 18.688327316486163, "grad_norm": Infinity, "learning_rate": 0.00018413352320351427, "loss": 11.2417, "step": 155300 }, { "epoch": 18.689530685920577, "grad_norm": Infinity, "learning_rate": 0.00018413146688278797, "loss": 10.9798, "step": 155310 }, { "epoch": 18.690734055354994, "grad_norm": Infinity, "learning_rate": 0.00018412941044030267, "loss": 11.2268, "step": 155320 }, { "epoch": 18.691937424789412, "grad_norm": Infinity, "learning_rate": 0.00018412735387606131, "loss": 11.1689, "step": 155330 }, { "epoch": 18.693140794223826, "grad_norm": Infinity, "learning_rate": 0.00018412529719006695, "loss": 11.1137, "step": 155340 }, { "epoch": 18.694344163658243, "grad_norm": Infinity, "learning_rate": 0.00018412324038232249, "loss": 11.1095, "step": 155350 }, { "epoch": 18.69554753309266, "grad_norm": Infinity, "learning_rate": 0.00018412118345283092, "loss": 10.9851, "step": 155360 }, { "epoch": 18.696750902527075, "grad_norm": Infinity, "learning_rate": 0.00018411912640159525, "loss": 11.0877, "step": 155370 }, { "epoch": 18.697954271961493, "grad_norm": Infinity, "learning_rate": 0.00018411706922861844, "loss": 11.1739, "step": 155380 }, { "epoch": 18.69915764139591, "grad_norm": Infinity, "learning_rate": 0.00018411501193390346, "loss": 11.1279, "step": 155390 }, { "epoch": 18.700361010830324, "grad_norm": Infinity, "learning_rate": 0.00018411295451745328, "loss": 11.1035, "step": 155400 }, { "epoch": 18.70156438026474, "grad_norm": Infinity, "learning_rate": 0.00018411089697927086, "loss": 11.1095, "step": 155410 }, { "epoch": 18.70276774969916, "grad_norm": Infinity, "learning_rate": 0.00018410883931935926, "loss": 11.1272, "step": 155420 }, { "epoch": 18.703971119133573, "grad_norm": Infinity, "learning_rate": 0.00018410678153772136, "loss": 11.1291, "step": 155430 }, { "epoch": 18.70517448856799, "grad_norm": Infinity, "learning_rate": 0.0001841047236343602, "loss": 11.1143, "step": 155440 }, { "epoch": 18.706377858002405, "grad_norm": Infinity, "learning_rate": 0.00018410266560927875, "loss": 11.081, "step": 155450 }, { "epoch": 18.707581227436823, "grad_norm": Infinity, "learning_rate": 0.00018410060746247995, "loss": 11.1918, "step": 155460 }, { "epoch": 18.70878459687124, "grad_norm": Infinity, "learning_rate": 0.00018409854919396686, "loss": 11.1112, "step": 155470 }, { "epoch": 18.709987966305654, "grad_norm": Infinity, "learning_rate": 0.00018409649080374237, "loss": 11.1154, "step": 155480 }, { "epoch": 18.71119133574007, "grad_norm": Infinity, "learning_rate": 0.00018409443229180946, "loss": 11.1244, "step": 155490 }, { "epoch": 18.71239470517449, "grad_norm": Infinity, "learning_rate": 0.00018409237365817118, "loss": 11.1785, "step": 155500 }, { "epoch": 18.713598074608903, "grad_norm": Infinity, "learning_rate": 0.0001840903149028305, "loss": 11.0254, "step": 155510 }, { "epoch": 18.71480144404332, "grad_norm": Infinity, "learning_rate": 0.00018408825602579033, "loss": 11.096, "step": 155520 }, { "epoch": 18.71600481347774, "grad_norm": Infinity, "learning_rate": 0.00018408619702705372, "loss": 11.1088, "step": 155530 }, { "epoch": 18.717208182912152, "grad_norm": Infinity, "learning_rate": 0.0001840841379066236, "loss": 11.1197, "step": 155540 }, { "epoch": 18.71841155234657, "grad_norm": Infinity, "learning_rate": 0.000184082078664503, "loss": 11.043, "step": 155550 }, { "epoch": 18.719614921780988, "grad_norm": Infinity, "learning_rate": 0.00018408001930069484, "loss": 11.0468, "step": 155560 }, { "epoch": 18.7208182912154, "grad_norm": Infinity, "learning_rate": 0.00018407795981520216, "loss": 11.0612, "step": 155570 }, { "epoch": 18.72202166064982, "grad_norm": Infinity, "learning_rate": 0.00018407590020802793, "loss": 11.2637, "step": 155580 }, { "epoch": 18.723225030084237, "grad_norm": Infinity, "learning_rate": 0.0001840738404791751, "loss": 11.1286, "step": 155590 }, { "epoch": 18.72442839951865, "grad_norm": Infinity, "learning_rate": 0.00018407178062864664, "loss": 10.9937, "step": 155600 }, { "epoch": 18.72563176895307, "grad_norm": Infinity, "learning_rate": 0.00018406972065644557, "loss": 11.0324, "step": 155610 }, { "epoch": 18.726835138387486, "grad_norm": Infinity, "learning_rate": 0.00018406766056257485, "loss": 11.0811, "step": 155620 }, { "epoch": 18.7280385078219, "grad_norm": Infinity, "learning_rate": 0.00018406560034703748, "loss": 11.1048, "step": 155630 }, { "epoch": 18.729241877256317, "grad_norm": Infinity, "learning_rate": 0.00018406354000983642, "loss": 10.9592, "step": 155640 }, { "epoch": 18.730445246690735, "grad_norm": Infinity, "learning_rate": 0.0001840614795509747, "loss": 11.0839, "step": 155650 }, { "epoch": 18.73164861612515, "grad_norm": Infinity, "learning_rate": 0.00018405941897045523, "loss": 11.1915, "step": 155660 }, { "epoch": 18.732851985559567, "grad_norm": Infinity, "learning_rate": 0.00018405735826828107, "loss": 11.0202, "step": 155670 }, { "epoch": 18.734055354993984, "grad_norm": Infinity, "learning_rate": 0.0001840552974444551, "loss": 11.1132, "step": 155680 }, { "epoch": 18.735258724428398, "grad_norm": Infinity, "learning_rate": 0.0001840532364989804, "loss": 11.2307, "step": 155690 }, { "epoch": 18.736462093862816, "grad_norm": Infinity, "learning_rate": 0.0001840511754318599, "loss": 11.1407, "step": 155700 }, { "epoch": 18.737665463297233, "grad_norm": Infinity, "learning_rate": 0.00018404911424309658, "loss": 11.1428, "step": 155710 }, { "epoch": 18.738868832731647, "grad_norm": Infinity, "learning_rate": 0.00018404705293269347, "loss": 11.1013, "step": 155720 }, { "epoch": 18.740072202166065, "grad_norm": Infinity, "learning_rate": 0.00018404499150065351, "loss": 11.1795, "step": 155730 }, { "epoch": 18.741275571600482, "grad_norm": Infinity, "learning_rate": 0.0001840429299469797, "loss": 11.1118, "step": 155740 }, { "epoch": 18.742478941034896, "grad_norm": Infinity, "learning_rate": 0.00018404086827167502, "loss": 11.0965, "step": 155750 }, { "epoch": 18.743682310469314, "grad_norm": Infinity, "learning_rate": 0.00018403880647474249, "loss": 11.1054, "step": 155760 }, { "epoch": 18.74488567990373, "grad_norm": Infinity, "learning_rate": 0.00018403674455618501, "loss": 11.0522, "step": 155770 }, { "epoch": 18.746089049338146, "grad_norm": Infinity, "learning_rate": 0.00018403468251600562, "loss": 10.9737, "step": 155780 }, { "epoch": 18.747292418772563, "grad_norm": Infinity, "learning_rate": 0.0001840326203542073, "loss": 11.2099, "step": 155790 }, { "epoch": 18.74849578820698, "grad_norm": Infinity, "learning_rate": 0.00018403055807079307, "loss": 11.174, "step": 155800 }, { "epoch": 18.749699157641395, "grad_norm": Infinity, "learning_rate": 0.00018402849566576582, "loss": 11.0626, "step": 155810 }, { "epoch": 18.750902527075812, "grad_norm": Infinity, "learning_rate": 0.00018402643313912862, "loss": 11.1572, "step": 155820 }, { "epoch": 18.75210589651023, "grad_norm": Infinity, "learning_rate": 0.00018402437049088444, "loss": 11.0243, "step": 155830 }, { "epoch": 18.753309265944644, "grad_norm": Infinity, "learning_rate": 0.0001840223077210362, "loss": 11.0665, "step": 155840 }, { "epoch": 18.75451263537906, "grad_norm": Infinity, "learning_rate": 0.000184020244829587, "loss": 11.1575, "step": 155850 }, { "epoch": 18.75571600481348, "grad_norm": Infinity, "learning_rate": 0.0001840181818165397, "loss": 11.2205, "step": 155860 }, { "epoch": 18.756919374247893, "grad_norm": Infinity, "learning_rate": 0.00018401611868189738, "loss": 11.2051, "step": 155870 }, { "epoch": 18.75812274368231, "grad_norm": Infinity, "learning_rate": 0.00018401405542566298, "loss": 11.1057, "step": 155880 }, { "epoch": 18.759326113116728, "grad_norm": Infinity, "learning_rate": 0.00018401199204783953, "loss": 11.094, "step": 155890 }, { "epoch": 18.760529482551142, "grad_norm": Infinity, "learning_rate": 0.00018400992854842997, "loss": 11.0583, "step": 155900 }, { "epoch": 18.76173285198556, "grad_norm": Infinity, "learning_rate": 0.0001840078649274373, "loss": 11.1038, "step": 155910 }, { "epoch": 18.762936221419977, "grad_norm": Infinity, "learning_rate": 0.0001840058011848645, "loss": 11.3338, "step": 155920 }, { "epoch": 18.76413959085439, "grad_norm": Infinity, "learning_rate": 0.00018400373732071457, "loss": 11.0459, "step": 155930 }, { "epoch": 18.76534296028881, "grad_norm": Infinity, "learning_rate": 0.00018400167333499048, "loss": 11.2301, "step": 155940 }, { "epoch": 18.766546329723226, "grad_norm": Infinity, "learning_rate": 0.00018399960922769527, "loss": 11.1998, "step": 155950 }, { "epoch": 18.76774969915764, "grad_norm": Infinity, "learning_rate": 0.00018399754499883185, "loss": 11.096, "step": 155960 }, { "epoch": 18.768953068592058, "grad_norm": Infinity, "learning_rate": 0.00018399548064840322, "loss": 11.1265, "step": 155970 }, { "epoch": 18.770156438026476, "grad_norm": Infinity, "learning_rate": 0.00018399341617641245, "loss": 11.1074, "step": 155980 }, { "epoch": 18.77135980746089, "grad_norm": Infinity, "learning_rate": 0.00018399135158286245, "loss": 11.1348, "step": 155990 }, { "epoch": 18.772563176895307, "grad_norm": Infinity, "learning_rate": 0.0001839892868677562, "loss": 11.0085, "step": 156000 }, { "epoch": 18.773766546329725, "grad_norm": Infinity, "learning_rate": 0.00018398722203109675, "loss": 11.0944, "step": 156010 }, { "epoch": 18.77496991576414, "grad_norm": Infinity, "learning_rate": 0.00018398515707288704, "loss": 11.1064, "step": 156020 }, { "epoch": 18.776173285198556, "grad_norm": Infinity, "learning_rate": 0.00018398309199313008, "loss": 11.202, "step": 156030 }, { "epoch": 18.777376654632974, "grad_norm": Infinity, "learning_rate": 0.00018398102679182885, "loss": 11.0866, "step": 156040 }, { "epoch": 18.778580024067388, "grad_norm": Infinity, "learning_rate": 0.00018397896146898633, "loss": 11.1692, "step": 156050 }, { "epoch": 18.779783393501805, "grad_norm": Infinity, "learning_rate": 0.0001839768960246055, "loss": 11.1189, "step": 156060 }, { "epoch": 18.780986762936223, "grad_norm": Infinity, "learning_rate": 0.00018397483045868942, "loss": 11.1767, "step": 156070 }, { "epoch": 18.782190132370637, "grad_norm": Infinity, "learning_rate": 0.00018397276477124098, "loss": 11.1687, "step": 156080 }, { "epoch": 18.783393501805055, "grad_norm": Infinity, "learning_rate": 0.00018397069896226325, "loss": 11.1573, "step": 156090 }, { "epoch": 18.784596871239472, "grad_norm": Infinity, "learning_rate": 0.00018396863303175918, "loss": 11.0969, "step": 156100 }, { "epoch": 18.785800240673886, "grad_norm": Infinity, "learning_rate": 0.00018396656697973175, "loss": 11.0836, "step": 156110 }, { "epoch": 18.787003610108304, "grad_norm": Infinity, "learning_rate": 0.00018396450080618396, "loss": 11.1231, "step": 156120 }, { "epoch": 18.78820697954272, "grad_norm": Infinity, "learning_rate": 0.00018396243451111884, "loss": 11.0268, "step": 156130 }, { "epoch": 18.789410348977135, "grad_norm": Infinity, "learning_rate": 0.00018396036809453933, "loss": 11.2009, "step": 156140 }, { "epoch": 18.790613718411553, "grad_norm": Infinity, "learning_rate": 0.00018395830155644845, "loss": 11.106, "step": 156150 }, { "epoch": 18.79181708784597, "grad_norm": Infinity, "learning_rate": 0.00018395623489684916, "loss": 11.1531, "step": 156160 }, { "epoch": 18.793020457280385, "grad_norm": Infinity, "learning_rate": 0.00018395416811574448, "loss": 11.1239, "step": 156170 }, { "epoch": 18.794223826714802, "grad_norm": Infinity, "learning_rate": 0.0001839521012131374, "loss": 11.0848, "step": 156180 }, { "epoch": 18.79542719614922, "grad_norm": Infinity, "learning_rate": 0.00018395003418903086, "loss": 11.0738, "step": 156190 }, { "epoch": 18.796630565583634, "grad_norm": Infinity, "learning_rate": 0.00018394796704342795, "loss": 11.1085, "step": 156200 }, { "epoch": 18.79783393501805, "grad_norm": Infinity, "learning_rate": 0.0001839458997763316, "loss": 11.1598, "step": 156210 }, { "epoch": 18.799037304452465, "grad_norm": Infinity, "learning_rate": 0.00018394383238774477, "loss": 11.1524, "step": 156220 }, { "epoch": 18.800240673886883, "grad_norm": Infinity, "learning_rate": 0.00018394176487767052, "loss": 11.0619, "step": 156230 }, { "epoch": 18.8014440433213, "grad_norm": Infinity, "learning_rate": 0.0001839396972461118, "loss": 11.0887, "step": 156240 }, { "epoch": 18.802647412755714, "grad_norm": Infinity, "learning_rate": 0.0001839376294930716, "loss": 11.1339, "step": 156250 }, { "epoch": 18.803850782190132, "grad_norm": Infinity, "learning_rate": 0.00018393556161855297, "loss": 11.2568, "step": 156260 }, { "epoch": 18.80505415162455, "grad_norm": Infinity, "learning_rate": 0.00018393349362255882, "loss": 11.2225, "step": 156270 }, { "epoch": 18.806257521058964, "grad_norm": Infinity, "learning_rate": 0.0001839314255050922, "loss": 11.1804, "step": 156280 }, { "epoch": 18.80746089049338, "grad_norm": Infinity, "learning_rate": 0.00018392935726615608, "loss": 11.0418, "step": 156290 }, { "epoch": 18.8086642599278, "grad_norm": Infinity, "learning_rate": 0.00018392728890575345, "loss": 11.169, "step": 156300 }, { "epoch": 18.809867629362213, "grad_norm": Infinity, "learning_rate": 0.00018392522042388733, "loss": 11.1228, "step": 156310 }, { "epoch": 18.81107099879663, "grad_norm": Infinity, "learning_rate": 0.0001839231518205607, "loss": 11.2595, "step": 156320 }, { "epoch": 18.812274368231048, "grad_norm": Infinity, "learning_rate": 0.00018392108309577652, "loss": 11.2344, "step": 156330 }, { "epoch": 18.813477737665462, "grad_norm": Infinity, "learning_rate": 0.0001839190142495378, "loss": 11.0632, "step": 156340 }, { "epoch": 18.81468110709988, "grad_norm": Infinity, "learning_rate": 0.00018391694528184757, "loss": 11.1457, "step": 156350 }, { "epoch": 18.815884476534297, "grad_norm": Infinity, "learning_rate": 0.00018391487619270883, "loss": 11.2568, "step": 156360 }, { "epoch": 18.81708784596871, "grad_norm": Infinity, "learning_rate": 0.0001839128069821245, "loss": 11.1566, "step": 156370 }, { "epoch": 18.81829121540313, "grad_norm": Infinity, "learning_rate": 0.00018391073765009765, "loss": 11.3208, "step": 156380 }, { "epoch": 18.819494584837546, "grad_norm": Infinity, "learning_rate": 0.00018390866819663123, "loss": 11.101, "step": 156390 }, { "epoch": 18.82069795427196, "grad_norm": Infinity, "learning_rate": 0.0001839065986217283, "loss": 11.1122, "step": 156400 }, { "epoch": 18.821901323706378, "grad_norm": Infinity, "learning_rate": 0.00018390452892539175, "loss": 11.1221, "step": 156410 }, { "epoch": 18.823104693140795, "grad_norm": Infinity, "learning_rate": 0.00018390245910762464, "loss": 11.084, "step": 156420 }, { "epoch": 18.82430806257521, "grad_norm": Infinity, "learning_rate": 0.00018390038916842998, "loss": 11.1787, "step": 156430 }, { "epoch": 18.825511432009627, "grad_norm": Infinity, "learning_rate": 0.00018389831910781073, "loss": 11.1843, "step": 156440 }, { "epoch": 18.826714801444044, "grad_norm": Infinity, "learning_rate": 0.00018389624892576988, "loss": 11.1098, "step": 156450 }, { "epoch": 18.82791817087846, "grad_norm": Infinity, "learning_rate": 0.00018389417862231048, "loss": 11.1299, "step": 156460 }, { "epoch": 18.829121540312876, "grad_norm": Infinity, "learning_rate": 0.00018389210819743545, "loss": 11.0784, "step": 156470 }, { "epoch": 18.830324909747294, "grad_norm": Infinity, "learning_rate": 0.00018389003765114785, "loss": 11.1469, "step": 156480 }, { "epoch": 18.831528279181708, "grad_norm": Infinity, "learning_rate": 0.00018388796698345065, "loss": 11.1581, "step": 156490 }, { "epoch": 18.832731648616125, "grad_norm": Infinity, "learning_rate": 0.00018388589619434687, "loss": 11.0997, "step": 156500 }, { "epoch": 18.833935018050543, "grad_norm": Infinity, "learning_rate": 0.00018388382528383948, "loss": 11.1977, "step": 156510 }, { "epoch": 18.835138387484957, "grad_norm": Infinity, "learning_rate": 0.00018388175425193147, "loss": 11.1397, "step": 156520 }, { "epoch": 18.836341756919374, "grad_norm": Infinity, "learning_rate": 0.00018387968309862583, "loss": 11.3134, "step": 156530 }, { "epoch": 18.837545126353792, "grad_norm": Infinity, "learning_rate": 0.0001838776118239256, "loss": 11.0607, "step": 156540 }, { "epoch": 18.838748495788206, "grad_norm": Infinity, "learning_rate": 0.00018387554042783376, "loss": 11.0607, "step": 156550 }, { "epoch": 18.839951865222623, "grad_norm": Infinity, "learning_rate": 0.00018387346891035332, "loss": 11.1197, "step": 156560 }, { "epoch": 18.84115523465704, "grad_norm": Infinity, "learning_rate": 0.00018387139727148725, "loss": 11.1981, "step": 156570 }, { "epoch": 18.842358604091455, "grad_norm": Infinity, "learning_rate": 0.00018386932551123857, "loss": 11.165, "step": 156580 }, { "epoch": 18.843561973525873, "grad_norm": Infinity, "learning_rate": 0.00018386725362961025, "loss": 11.1773, "step": 156590 }, { "epoch": 18.84476534296029, "grad_norm": Infinity, "learning_rate": 0.00018386518162660532, "loss": 11.1637, "step": 156600 }, { "epoch": 18.845968712394704, "grad_norm": Infinity, "learning_rate": 0.00018386310950222677, "loss": 11.1841, "step": 156610 }, { "epoch": 18.84717208182912, "grad_norm": Infinity, "learning_rate": 0.0001838610372564776, "loss": 11.2402, "step": 156620 }, { "epoch": 18.84837545126354, "grad_norm": Infinity, "learning_rate": 0.00018385896488936075, "loss": 11.1267, "step": 156630 }, { "epoch": 18.849578820697953, "grad_norm": Infinity, "learning_rate": 0.00018385689240087936, "loss": 11.2748, "step": 156640 }, { "epoch": 18.85078219013237, "grad_norm": Infinity, "learning_rate": 0.00018385481979103628, "loss": 11.2116, "step": 156650 }, { "epoch": 18.85198555956679, "grad_norm": Infinity, "learning_rate": 0.0001838527470598346, "loss": 11.1882, "step": 156660 }, { "epoch": 18.853188929001202, "grad_norm": Infinity, "learning_rate": 0.0001838506742072773, "loss": 11.1638, "step": 156670 }, { "epoch": 18.85439229843562, "grad_norm": Infinity, "learning_rate": 0.00018384860123336734, "loss": 11.208, "step": 156680 }, { "epoch": 18.855595667870038, "grad_norm": Infinity, "learning_rate": 0.00018384652813810778, "loss": 11.1221, "step": 156690 }, { "epoch": 18.85679903730445, "grad_norm": Infinity, "learning_rate": 0.00018384445492150158, "loss": 11.0525, "step": 156700 }, { "epoch": 18.85800240673887, "grad_norm": Infinity, "learning_rate": 0.00018384238158355176, "loss": 11.1971, "step": 156710 }, { "epoch": 18.859205776173287, "grad_norm": Infinity, "learning_rate": 0.00018384030812426132, "loss": 11.1792, "step": 156720 }, { "epoch": 18.8604091456077, "grad_norm": Infinity, "learning_rate": 0.00018383823454363324, "loss": 11.1946, "step": 156730 }, { "epoch": 18.86161251504212, "grad_norm": Infinity, "learning_rate": 0.00018383616084167054, "loss": 11.064, "step": 156740 }, { "epoch": 18.862815884476536, "grad_norm": Infinity, "learning_rate": 0.00018383408701837622, "loss": 11.0675, "step": 156750 }, { "epoch": 18.86401925391095, "grad_norm": Infinity, "learning_rate": 0.00018383201307375327, "loss": 11.1382, "step": 156760 }, { "epoch": 18.865222623345367, "grad_norm": Infinity, "learning_rate": 0.0001838299390078047, "loss": 11.054, "step": 156770 }, { "epoch": 18.866425992779785, "grad_norm": Infinity, "learning_rate": 0.0001838278648205335, "loss": 11.105, "step": 156780 }, { "epoch": 18.8676293622142, "grad_norm": Infinity, "learning_rate": 0.00018382579051194273, "loss": 11.1738, "step": 156790 }, { "epoch": 18.868832731648617, "grad_norm": Infinity, "learning_rate": 0.0001838237160820353, "loss": 11.0556, "step": 156800 }, { "epoch": 18.870036101083034, "grad_norm": Infinity, "learning_rate": 0.00018382164153081426, "loss": 11.1819, "step": 156810 }, { "epoch": 18.871239470517448, "grad_norm": Infinity, "learning_rate": 0.00018381956685828264, "loss": 11.1954, "step": 156820 }, { "epoch": 18.872442839951866, "grad_norm": Infinity, "learning_rate": 0.0001838174920644434, "loss": 11.17, "step": 156830 }, { "epoch": 18.87364620938628, "grad_norm": Infinity, "learning_rate": 0.00018381541714929955, "loss": 11.2456, "step": 156840 }, { "epoch": 18.874849578820697, "grad_norm": Infinity, "learning_rate": 0.00018381334211285412, "loss": 11.1573, "step": 156850 }, { "epoch": 18.876052948255115, "grad_norm": Infinity, "learning_rate": 0.00018381126695511004, "loss": 11.1414, "step": 156860 }, { "epoch": 18.87725631768953, "grad_norm": Infinity, "learning_rate": 0.0001838091916760704, "loss": 11.2085, "step": 156870 }, { "epoch": 18.878459687123947, "grad_norm": Infinity, "learning_rate": 0.00018380711627573817, "loss": 11.1137, "step": 156880 }, { "epoch": 18.879663056558364, "grad_norm": Infinity, "learning_rate": 0.00018380504075411633, "loss": 11.1565, "step": 156890 }, { "epoch": 18.880866425992778, "grad_norm": Infinity, "learning_rate": 0.0001838029651112079, "loss": 11.1592, "step": 156900 }, { "epoch": 18.882069795427196, "grad_norm": Infinity, "learning_rate": 0.00018380088934701592, "loss": 11.0634, "step": 156910 }, { "epoch": 18.883273164861613, "grad_norm": Infinity, "learning_rate": 0.00018379881346154337, "loss": 11.1236, "step": 156920 }, { "epoch": 18.884476534296027, "grad_norm": Infinity, "learning_rate": 0.0001837967374547932, "loss": 11.0946, "step": 156930 }, { "epoch": 18.885679903730445, "grad_norm": Infinity, "learning_rate": 0.00018379466132676853, "loss": 11.1062, "step": 156940 }, { "epoch": 18.886883273164862, "grad_norm": Infinity, "learning_rate": 0.00018379258507747223, "loss": 11.0293, "step": 156950 }, { "epoch": 18.888086642599276, "grad_norm": Infinity, "learning_rate": 0.00018379050870690738, "loss": 11.0878, "step": 156960 }, { "epoch": 18.889290012033694, "grad_norm": Infinity, "learning_rate": 0.000183788432215077, "loss": 10.9764, "step": 156970 }, { "epoch": 18.89049338146811, "grad_norm": Infinity, "learning_rate": 0.0001837863556019841, "loss": 11.0772, "step": 156980 }, { "epoch": 18.891696750902526, "grad_norm": Infinity, "learning_rate": 0.0001837842788676316, "loss": 11.1573, "step": 156990 }, { "epoch": 18.892900120336943, "grad_norm": Infinity, "learning_rate": 0.0001837822020120226, "loss": 11.0387, "step": 157000 }, { "epoch": 18.89410348977136, "grad_norm": Infinity, "learning_rate": 0.00018378012503516007, "loss": 11.1678, "step": 157010 }, { "epoch": 18.895306859205775, "grad_norm": Infinity, "learning_rate": 0.000183778047937047, "loss": 11.1658, "step": 157020 }, { "epoch": 18.896510228640192, "grad_norm": Infinity, "learning_rate": 0.0001837759707176864, "loss": 11.1512, "step": 157030 }, { "epoch": 18.89771359807461, "grad_norm": Infinity, "learning_rate": 0.0001837738933770813, "loss": 11.0356, "step": 157040 }, { "epoch": 18.898916967509024, "grad_norm": Infinity, "learning_rate": 0.00018377181591523468, "loss": 11.2211, "step": 157050 }, { "epoch": 18.90012033694344, "grad_norm": Infinity, "learning_rate": 0.0001837697383321496, "loss": 11.0747, "step": 157060 }, { "epoch": 18.90132370637786, "grad_norm": Infinity, "learning_rate": 0.00018376766062782896, "loss": 11.206, "step": 157070 }, { "epoch": 18.902527075812273, "grad_norm": Infinity, "learning_rate": 0.00018376558280227588, "loss": 11.1095, "step": 157080 }, { "epoch": 18.90373044524669, "grad_norm": Infinity, "learning_rate": 0.00018376350485549333, "loss": 11.1608, "step": 157090 }, { "epoch": 18.904933814681108, "grad_norm": Infinity, "learning_rate": 0.00018376142678748426, "loss": 11.0815, "step": 157100 }, { "epoch": 18.906137184115522, "grad_norm": Infinity, "learning_rate": 0.00018375934859825177, "loss": 11.0471, "step": 157110 }, { "epoch": 18.90734055354994, "grad_norm": Infinity, "learning_rate": 0.0001837572702877988, "loss": 11.2041, "step": 157120 }, { "epoch": 18.908543922984357, "grad_norm": Infinity, "learning_rate": 0.0001837551918561284, "loss": 11.1298, "step": 157130 }, { "epoch": 18.90974729241877, "grad_norm": Infinity, "learning_rate": 0.00018375311330324356, "loss": 11.1328, "step": 157140 }, { "epoch": 18.91095066185319, "grad_norm": Infinity, "learning_rate": 0.00018375103462914727, "loss": 11.069, "step": 157150 }, { "epoch": 18.912154031287606, "grad_norm": Infinity, "learning_rate": 0.00018374895583384254, "loss": 11.1397, "step": 157160 }, { "epoch": 18.91335740072202, "grad_norm": Infinity, "learning_rate": 0.00018374687691733244, "loss": 11.1763, "step": 157170 }, { "epoch": 18.914560770156438, "grad_norm": Infinity, "learning_rate": 0.00018374479787961989, "loss": 11.2508, "step": 157180 }, { "epoch": 18.915764139590856, "grad_norm": Infinity, "learning_rate": 0.00018374271872070795, "loss": 11.2083, "step": 157190 }, { "epoch": 18.91696750902527, "grad_norm": Infinity, "learning_rate": 0.00018374063944059964, "loss": 11.1163, "step": 157200 }, { "epoch": 18.918170878459687, "grad_norm": Infinity, "learning_rate": 0.00018373856003929794, "loss": 11.1841, "step": 157210 }, { "epoch": 18.919374247894105, "grad_norm": Infinity, "learning_rate": 0.0001837364805168059, "loss": 11.0735, "step": 157220 }, { "epoch": 18.92057761732852, "grad_norm": Infinity, "learning_rate": 0.00018373440087312643, "loss": 11.1616, "step": 157230 }, { "epoch": 18.921780986762936, "grad_norm": Infinity, "learning_rate": 0.00018373232110826266, "loss": 11.1764, "step": 157240 }, { "epoch": 18.922984356197354, "grad_norm": Infinity, "learning_rate": 0.00018373024122221753, "loss": 11.16, "step": 157250 }, { "epoch": 18.924187725631768, "grad_norm": Infinity, "learning_rate": 0.00018372816121499405, "loss": 11.0687, "step": 157260 }, { "epoch": 18.925391095066185, "grad_norm": Infinity, "learning_rate": 0.00018372608108659528, "loss": 11.1236, "step": 157270 }, { "epoch": 18.926594464500603, "grad_norm": Infinity, "learning_rate": 0.00018372400083702418, "loss": 11.1659, "step": 157280 }, { "epoch": 18.927797833935017, "grad_norm": Infinity, "learning_rate": 0.0001837219204662838, "loss": 11.0458, "step": 157290 }, { "epoch": 18.929001203369435, "grad_norm": Infinity, "learning_rate": 0.00018371983997437713, "loss": 11.0637, "step": 157300 }, { "epoch": 18.930204572803852, "grad_norm": Infinity, "learning_rate": 0.00018371775936130717, "loss": 11.1876, "step": 157310 }, { "epoch": 18.931407942238266, "grad_norm": Infinity, "learning_rate": 0.00018371567862707694, "loss": 11.2099, "step": 157320 }, { "epoch": 18.932611311672684, "grad_norm": Infinity, "learning_rate": 0.00018371359777168947, "loss": 11.1799, "step": 157330 }, { "epoch": 18.9338146811071, "grad_norm": Infinity, "learning_rate": 0.00018371151679514775, "loss": 11.0395, "step": 157340 }, { "epoch": 18.935018050541515, "grad_norm": Infinity, "learning_rate": 0.0001837094356974548, "loss": 11.1093, "step": 157350 }, { "epoch": 18.936221419975933, "grad_norm": Infinity, "learning_rate": 0.0001837073544786136, "loss": 11.1713, "step": 157360 }, { "epoch": 18.93742478941035, "grad_norm": Infinity, "learning_rate": 0.00018370527313862724, "loss": 11.2034, "step": 157370 }, { "epoch": 18.938628158844764, "grad_norm": Infinity, "learning_rate": 0.00018370319167749865, "loss": 11.1406, "step": 157380 }, { "epoch": 18.939831528279182, "grad_norm": Infinity, "learning_rate": 0.00018370111009523087, "loss": 11.2147, "step": 157390 }, { "epoch": 18.9410348977136, "grad_norm": Infinity, "learning_rate": 0.00018369902839182695, "loss": 11.2302, "step": 157400 }, { "epoch": 18.942238267148014, "grad_norm": Infinity, "learning_rate": 0.00018369694656728984, "loss": 11.1735, "step": 157410 }, { "epoch": 18.94344163658243, "grad_norm": Infinity, "learning_rate": 0.00018369486462162262, "loss": 11.1265, "step": 157420 }, { "epoch": 18.94464500601685, "grad_norm": Infinity, "learning_rate": 0.00018369278255482826, "loss": 11.1389, "step": 157430 }, { "epoch": 18.945848375451263, "grad_norm": Infinity, "learning_rate": 0.00018369070036690976, "loss": 11.1313, "step": 157440 }, { "epoch": 18.94705174488568, "grad_norm": Infinity, "learning_rate": 0.00018368861805787018, "loss": 11.151, "step": 157450 }, { "epoch": 18.948255114320098, "grad_norm": Infinity, "learning_rate": 0.00018368653562771245, "loss": 11.1124, "step": 157460 }, { "epoch": 18.949458483754512, "grad_norm": Infinity, "learning_rate": 0.00018368445307643969, "loss": 11.1773, "step": 157470 }, { "epoch": 18.95066185318893, "grad_norm": Infinity, "learning_rate": 0.00018368237040405485, "loss": 11.0857, "step": 157480 }, { "epoch": 18.951865222623347, "grad_norm": Infinity, "learning_rate": 0.00018368028761056094, "loss": 11.0975, "step": 157490 }, { "epoch": 18.95306859205776, "grad_norm": Infinity, "learning_rate": 0.00018367820469596105, "loss": 11.022, "step": 157500 }, { "epoch": 18.95427196149218, "grad_norm": Infinity, "learning_rate": 0.0001836761216602581, "loss": 11.2443, "step": 157510 }, { "epoch": 18.955475330926596, "grad_norm": Infinity, "learning_rate": 0.00018367403850345516, "loss": 11.1917, "step": 157520 }, { "epoch": 18.95667870036101, "grad_norm": Infinity, "learning_rate": 0.0001836719552255552, "loss": 11.022, "step": 157530 }, { "epoch": 18.957882069795428, "grad_norm": Infinity, "learning_rate": 0.00018366987182656127, "loss": 11.2335, "step": 157540 }, { "epoch": 18.959085439229845, "grad_norm": Infinity, "learning_rate": 0.0001836677883064764, "loss": 11.2388, "step": 157550 }, { "epoch": 18.96028880866426, "grad_norm": Infinity, "learning_rate": 0.00018366570466530358, "loss": 11.0706, "step": 157560 }, { "epoch": 18.961492178098677, "grad_norm": Infinity, "learning_rate": 0.0001836636209030458, "loss": 11.1347, "step": 157570 }, { "epoch": 18.96269554753309, "grad_norm": Infinity, "learning_rate": 0.00018366153701970613, "loss": 11.1475, "step": 157580 }, { "epoch": 18.96389891696751, "grad_norm": Infinity, "learning_rate": 0.00018365945301528758, "loss": 11.1984, "step": 157590 }, { "epoch": 18.965102286401926, "grad_norm": Infinity, "learning_rate": 0.00018365736888979314, "loss": 11.1857, "step": 157600 }, { "epoch": 18.96630565583634, "grad_norm": Infinity, "learning_rate": 0.00018365528464322578, "loss": 11.0506, "step": 157610 }, { "epoch": 18.967509025270758, "grad_norm": Infinity, "learning_rate": 0.0001836532002755886, "loss": 11.2283, "step": 157620 }, { "epoch": 18.968712394705175, "grad_norm": Infinity, "learning_rate": 0.0001836511157868846, "loss": 11.2945, "step": 157630 }, { "epoch": 18.96991576413959, "grad_norm": Infinity, "learning_rate": 0.00018364903117711677, "loss": 11.0815, "step": 157640 }, { "epoch": 18.971119133574007, "grad_norm": Infinity, "learning_rate": 0.00018364694644628816, "loss": 11.174, "step": 157650 }, { "epoch": 18.972322503008424, "grad_norm": Infinity, "learning_rate": 0.00018364486159440173, "loss": 11.0937, "step": 157660 }, { "epoch": 18.97352587244284, "grad_norm": Infinity, "learning_rate": 0.00018364277662146056, "loss": 11.1016, "step": 157670 }, { "epoch": 18.974729241877256, "grad_norm": Infinity, "learning_rate": 0.00018364069152746764, "loss": 11.2318, "step": 157680 }, { "epoch": 18.975932611311674, "grad_norm": Infinity, "learning_rate": 0.00018363860631242599, "loss": 11.0833, "step": 157690 }, { "epoch": 18.977135980746088, "grad_norm": Infinity, "learning_rate": 0.0001836365209763386, "loss": 11.0869, "step": 157700 }, { "epoch": 18.978339350180505, "grad_norm": Infinity, "learning_rate": 0.00018363443551920857, "loss": 11.23, "step": 157710 }, { "epoch": 18.979542719614923, "grad_norm": Infinity, "learning_rate": 0.0001836323499410388, "loss": 11.1504, "step": 157720 }, { "epoch": 18.980746089049337, "grad_norm": Infinity, "learning_rate": 0.0001836302642418324, "loss": 11.1502, "step": 157730 }, { "epoch": 18.981949458483754, "grad_norm": Infinity, "learning_rate": 0.00018362817842159236, "loss": 11.1727, "step": 157740 }, { "epoch": 18.983152827918172, "grad_norm": Infinity, "learning_rate": 0.0001836260924803217, "loss": 11.0221, "step": 157750 }, { "epoch": 18.984356197352586, "grad_norm": Infinity, "learning_rate": 0.0001836240064180234, "loss": 11.1395, "step": 157760 }, { "epoch": 18.985559566787003, "grad_norm": Infinity, "learning_rate": 0.00018362192023470058, "loss": 11.0751, "step": 157770 }, { "epoch": 18.98676293622142, "grad_norm": Infinity, "learning_rate": 0.00018361983393035618, "loss": 11.1112, "step": 157780 }, { "epoch": 18.987966305655835, "grad_norm": Infinity, "learning_rate": 0.00018361774750499322, "loss": 11.0373, "step": 157790 }, { "epoch": 18.989169675090253, "grad_norm": Infinity, "learning_rate": 0.00018361566095861473, "loss": 11.0098, "step": 157800 }, { "epoch": 18.99037304452467, "grad_norm": Infinity, "learning_rate": 0.00018361357429122377, "loss": 11.1946, "step": 157810 }, { "epoch": 18.991576413959084, "grad_norm": Infinity, "learning_rate": 0.0001836114875028233, "loss": 11.1975, "step": 157820 }, { "epoch": 18.9927797833935, "grad_norm": Infinity, "learning_rate": 0.00018360940059341635, "loss": 11.2993, "step": 157830 }, { "epoch": 18.99398315282792, "grad_norm": Infinity, "learning_rate": 0.00018360731356300596, "loss": 11.138, "step": 157840 }, { "epoch": 18.995186522262333, "grad_norm": Infinity, "learning_rate": 0.00018360522641159516, "loss": 11.2325, "step": 157850 }, { "epoch": 18.99638989169675, "grad_norm": Infinity, "learning_rate": 0.00018360313913918694, "loss": 11.1623, "step": 157860 }, { "epoch": 18.99759326113117, "grad_norm": Infinity, "learning_rate": 0.00018360105174578437, "loss": 11.0993, "step": 157870 }, { "epoch": 18.998796630565582, "grad_norm": Infinity, "learning_rate": 0.0001835989642313904, "loss": 11.1284, "step": 157880 }, { "epoch": 19.0, "grad_norm": Infinity, "learning_rate": 0.0001835968765960081, "loss": 11.1654, "step": 157890 }, { "epoch": 19.0, "eval_loss": 11.139603614807129, "eval_runtime": 118.4351, "eval_samples_per_second": 62.372, "eval_steps_per_second": 7.802, "step": 157890 }, { "epoch": 19.001203369434418, "grad_norm": Infinity, "learning_rate": 0.0001835947888396405, "loss": 11.0563, "step": 157900 }, { "epoch": 19.00240673886883, "grad_norm": Infinity, "learning_rate": 0.00018359270096229057, "loss": 11.146, "step": 157910 }, { "epoch": 19.00361010830325, "grad_norm": Infinity, "learning_rate": 0.00018359061296396138, "loss": 11.1602, "step": 157920 }, { "epoch": 19.004813477737667, "grad_norm": Infinity, "learning_rate": 0.00018358852484465593, "loss": 11.0101, "step": 157930 }, { "epoch": 19.00601684717208, "grad_norm": Infinity, "learning_rate": 0.00018358643660437723, "loss": 11.1811, "step": 157940 }, { "epoch": 19.0072202166065, "grad_norm": Infinity, "learning_rate": 0.00018358434824312835, "loss": 11.19, "step": 157950 }, { "epoch": 19.008423586040916, "grad_norm": Infinity, "learning_rate": 0.00018358225976091228, "loss": 11.0729, "step": 157960 }, { "epoch": 19.00962695547533, "grad_norm": Infinity, "learning_rate": 0.00018358017115773203, "loss": 11.1656, "step": 157970 }, { "epoch": 19.010830324909747, "grad_norm": Infinity, "learning_rate": 0.00018357808243359066, "loss": 11.1126, "step": 157980 }, { "epoch": 19.012033694344165, "grad_norm": Infinity, "learning_rate": 0.00018357599358849114, "loss": 11.1051, "step": 157990 }, { "epoch": 19.01323706377858, "grad_norm": Infinity, "learning_rate": 0.00018357390462243653, "loss": 11.0959, "step": 158000 }, { "epoch": 19.014440433212997, "grad_norm": Infinity, "learning_rate": 0.00018357181553542985, "loss": 11.2075, "step": 158010 }, { "epoch": 19.015643802647414, "grad_norm": Infinity, "learning_rate": 0.00018356972632747412, "loss": 11.1011, "step": 158020 }, { "epoch": 19.016847172081828, "grad_norm": Infinity, "learning_rate": 0.0001835676369985724, "loss": 11.1666, "step": 158030 }, { "epoch": 19.018050541516246, "grad_norm": Infinity, "learning_rate": 0.0001835655475487276, "loss": 11.2899, "step": 158040 }, { "epoch": 19.019253910950663, "grad_norm": Infinity, "learning_rate": 0.00018356345797794287, "loss": 11.1406, "step": 158050 }, { "epoch": 19.020457280385077, "grad_norm": Infinity, "learning_rate": 0.00018356136828622118, "loss": 11.2048, "step": 158060 }, { "epoch": 19.021660649819495, "grad_norm": Infinity, "learning_rate": 0.00018355927847356557, "loss": 11.1747, "step": 158070 }, { "epoch": 19.022864019253912, "grad_norm": Infinity, "learning_rate": 0.00018355718853997903, "loss": 11.2221, "step": 158080 }, { "epoch": 19.024067388688326, "grad_norm": Infinity, "learning_rate": 0.00018355509848546464, "loss": 11.189, "step": 158090 }, { "epoch": 19.025270758122744, "grad_norm": Infinity, "learning_rate": 0.00018355300831002537, "loss": 11.1854, "step": 158100 }, { "epoch": 19.02647412755716, "grad_norm": Infinity, "learning_rate": 0.00018355091801366426, "loss": 11.2108, "step": 158110 }, { "epoch": 19.027677496991576, "grad_norm": Infinity, "learning_rate": 0.00018354882759638437, "loss": 11.1077, "step": 158120 }, { "epoch": 19.028880866425993, "grad_norm": Infinity, "learning_rate": 0.0001835467370581887, "loss": 11.1843, "step": 158130 }, { "epoch": 19.03008423586041, "grad_norm": Infinity, "learning_rate": 0.00018354464639908023, "loss": 11.0339, "step": 158140 }, { "epoch": 19.031287605294825, "grad_norm": Infinity, "learning_rate": 0.00018354255561906207, "loss": 11.1135, "step": 158150 }, { "epoch": 19.032490974729242, "grad_norm": Infinity, "learning_rate": 0.00018354046471813718, "loss": 11.1554, "step": 158160 }, { "epoch": 19.03369434416366, "grad_norm": Infinity, "learning_rate": 0.00018353837369630864, "loss": 11.0672, "step": 158170 }, { "epoch": 19.034897713598074, "grad_norm": Infinity, "learning_rate": 0.00018353628255357945, "loss": 11.1759, "step": 158180 }, { "epoch": 19.03610108303249, "grad_norm": Infinity, "learning_rate": 0.00018353419128995258, "loss": 11.0807, "step": 158190 }, { "epoch": 19.03730445246691, "grad_norm": Infinity, "learning_rate": 0.00018353209990543115, "loss": 11.143, "step": 158200 }, { "epoch": 19.038507821901323, "grad_norm": Infinity, "learning_rate": 0.00018353000840001818, "loss": 11.082, "step": 158210 }, { "epoch": 19.03971119133574, "grad_norm": Infinity, "learning_rate": 0.00018352791677371663, "loss": 11.1706, "step": 158220 }, { "epoch": 19.040914560770158, "grad_norm": Infinity, "learning_rate": 0.00018352582502652957, "loss": 11.036, "step": 158230 }, { "epoch": 19.042117930204572, "grad_norm": Infinity, "learning_rate": 0.00018352373315846002, "loss": 11.0549, "step": 158240 }, { "epoch": 19.04332129963899, "grad_norm": Infinity, "learning_rate": 0.00018352164116951098, "loss": 11.2535, "step": 158250 }, { "epoch": 19.044524669073404, "grad_norm": Infinity, "learning_rate": 0.00018351954905968554, "loss": 11.1804, "step": 158260 }, { "epoch": 19.04572803850782, "grad_norm": Infinity, "learning_rate": 0.00018351745682898668, "loss": 11.1223, "step": 158270 }, { "epoch": 19.04693140794224, "grad_norm": Infinity, "learning_rate": 0.00018351536447741744, "loss": 11.0699, "step": 158280 }, { "epoch": 19.048134777376653, "grad_norm": Infinity, "learning_rate": 0.00018351327200498085, "loss": 11.1097, "step": 158290 }, { "epoch": 19.04933814681107, "grad_norm": Infinity, "learning_rate": 0.00018351117941167993, "loss": 11.1484, "step": 158300 }, { "epoch": 19.050541516245488, "grad_norm": Infinity, "learning_rate": 0.00018350908669751773, "loss": 11.1152, "step": 158310 }, { "epoch": 19.051744885679902, "grad_norm": Infinity, "learning_rate": 0.00018350699386249724, "loss": 11.0663, "step": 158320 }, { "epoch": 19.05294825511432, "grad_norm": Infinity, "learning_rate": 0.00018350490090662152, "loss": 11.0921, "step": 158330 }, { "epoch": 19.054151624548737, "grad_norm": Infinity, "learning_rate": 0.0001835028078298936, "loss": 11.1356, "step": 158340 }, { "epoch": 19.05535499398315, "grad_norm": Infinity, "learning_rate": 0.0001835007146323165, "loss": 11.1691, "step": 158350 }, { "epoch": 19.05655836341757, "grad_norm": Infinity, "learning_rate": 0.00018349862131389325, "loss": 11.2317, "step": 158360 }, { "epoch": 19.057761732851986, "grad_norm": Infinity, "learning_rate": 0.00018349652787462688, "loss": 11.0429, "step": 158370 }, { "epoch": 19.0589651022864, "grad_norm": Infinity, "learning_rate": 0.0001834944343145204, "loss": 11.1741, "step": 158380 }, { "epoch": 19.060168471720818, "grad_norm": Infinity, "learning_rate": 0.0001834923406335769, "loss": 11.1619, "step": 158390 }, { "epoch": 19.061371841155236, "grad_norm": Infinity, "learning_rate": 0.00018349024683179935, "loss": 11.2042, "step": 158400 }, { "epoch": 19.06257521058965, "grad_norm": Infinity, "learning_rate": 0.0001834881529091908, "loss": 11.1238, "step": 158410 }, { "epoch": 19.063778580024067, "grad_norm": Infinity, "learning_rate": 0.00018348605886575427, "loss": 11.0707, "step": 158420 }, { "epoch": 19.064981949458485, "grad_norm": Infinity, "learning_rate": 0.0001834839647014928, "loss": 11.2027, "step": 158430 }, { "epoch": 19.0661853188929, "grad_norm": Infinity, "learning_rate": 0.00018348187041640943, "loss": 11.1844, "step": 158440 }, { "epoch": 19.067388688327316, "grad_norm": Infinity, "learning_rate": 0.0001834797760105072, "loss": 11.1456, "step": 158450 }, { "epoch": 19.068592057761734, "grad_norm": Infinity, "learning_rate": 0.00018347768148378908, "loss": 11.0727, "step": 158460 }, { "epoch": 19.069795427196148, "grad_norm": Infinity, "learning_rate": 0.0001834755868362582, "loss": 11.1388, "step": 158470 }, { "epoch": 19.070998796630565, "grad_norm": Infinity, "learning_rate": 0.0001834734920679175, "loss": 11.1525, "step": 158480 }, { "epoch": 19.072202166064983, "grad_norm": Infinity, "learning_rate": 0.00018347139717877004, "loss": 11.1697, "step": 158490 }, { "epoch": 19.073405535499397, "grad_norm": Infinity, "learning_rate": 0.00018346930216881887, "loss": 11.2195, "step": 158500 }, { "epoch": 19.074608904933815, "grad_norm": Infinity, "learning_rate": 0.000183467207038067, "loss": 11.0772, "step": 158510 }, { "epoch": 19.075812274368232, "grad_norm": Infinity, "learning_rate": 0.0001834651117865175, "loss": 11.2047, "step": 158520 }, { "epoch": 19.077015643802646, "grad_norm": Infinity, "learning_rate": 0.00018346301641417338, "loss": 11.0359, "step": 158530 }, { "epoch": 19.078219013237064, "grad_norm": Infinity, "learning_rate": 0.00018346092092103765, "loss": 11.1607, "step": 158540 }, { "epoch": 19.07942238267148, "grad_norm": Infinity, "learning_rate": 0.00018345882530711334, "loss": 11.087, "step": 158550 }, { "epoch": 19.080625752105895, "grad_norm": Infinity, "learning_rate": 0.00018345672957240358, "loss": 10.9896, "step": 158560 }, { "epoch": 19.081829121540313, "grad_norm": Infinity, "learning_rate": 0.00018345463371691127, "loss": 11.1302, "step": 158570 }, { "epoch": 19.08303249097473, "grad_norm": Infinity, "learning_rate": 0.0001834525377406395, "loss": 11.1366, "step": 158580 }, { "epoch": 19.084235860409144, "grad_norm": Infinity, "learning_rate": 0.00018345044164359134, "loss": 11.0495, "step": 158590 }, { "epoch": 19.085439229843562, "grad_norm": Infinity, "learning_rate": 0.00018344834542576973, "loss": 11.1212, "step": 158600 }, { "epoch": 19.08664259927798, "grad_norm": Infinity, "learning_rate": 0.0001834462490871778, "loss": 11.2575, "step": 158610 }, { "epoch": 19.087845968712394, "grad_norm": Infinity, "learning_rate": 0.00018344415262781856, "loss": 11.1387, "step": 158620 }, { "epoch": 19.08904933814681, "grad_norm": Infinity, "learning_rate": 0.000183442056047695, "loss": 11.0817, "step": 158630 }, { "epoch": 19.09025270758123, "grad_norm": Infinity, "learning_rate": 0.00018343995934681017, "loss": 11.0478, "step": 158640 }, { "epoch": 19.091456077015643, "grad_norm": Infinity, "learning_rate": 0.00018343786252516715, "loss": 11.1995, "step": 158650 }, { "epoch": 19.09265944645006, "grad_norm": Infinity, "learning_rate": 0.00018343576558276894, "loss": 11.1377, "step": 158660 }, { "epoch": 19.093862815884478, "grad_norm": Infinity, "learning_rate": 0.00018343366851961856, "loss": 11.1158, "step": 158670 }, { "epoch": 19.095066185318892, "grad_norm": Infinity, "learning_rate": 0.0001834315713357191, "loss": 11.1496, "step": 158680 }, { "epoch": 19.09626955475331, "grad_norm": Infinity, "learning_rate": 0.00018342947403107352, "loss": 11.0106, "step": 158690 }, { "epoch": 19.097472924187727, "grad_norm": Infinity, "learning_rate": 0.0001834273766056849, "loss": 11.1551, "step": 158700 }, { "epoch": 19.09867629362214, "grad_norm": Infinity, "learning_rate": 0.00018342527905955628, "loss": 11.1511, "step": 158710 }, { "epoch": 19.09987966305656, "grad_norm": Infinity, "learning_rate": 0.00018342318139269066, "loss": 11.2416, "step": 158720 }, { "epoch": 19.101083032490976, "grad_norm": Infinity, "learning_rate": 0.0001834210836050911, "loss": 11.0608, "step": 158730 }, { "epoch": 19.10228640192539, "grad_norm": Infinity, "learning_rate": 0.00018341898569676065, "loss": 11.0706, "step": 158740 }, { "epoch": 19.103489771359808, "grad_norm": Infinity, "learning_rate": 0.00018341688766770236, "loss": 11.2039, "step": 158750 }, { "epoch": 19.104693140794225, "grad_norm": Infinity, "learning_rate": 0.0001834147895179192, "loss": 11.127, "step": 158760 }, { "epoch": 19.10589651022864, "grad_norm": Infinity, "learning_rate": 0.00018341269124741427, "loss": 11.191, "step": 158770 }, { "epoch": 19.107099879663057, "grad_norm": Infinity, "learning_rate": 0.00018341059285619058, "loss": 11.231, "step": 158780 }, { "epoch": 19.108303249097474, "grad_norm": Infinity, "learning_rate": 0.00018340849434425117, "loss": 11.1613, "step": 158790 }, { "epoch": 19.10950661853189, "grad_norm": Infinity, "learning_rate": 0.00018340639571159907, "loss": 11.0705, "step": 158800 }, { "epoch": 19.110709987966306, "grad_norm": Infinity, "learning_rate": 0.0001834042969582373, "loss": 11.1376, "step": 158810 }, { "epoch": 19.111913357400724, "grad_norm": Infinity, "learning_rate": 0.00018340219808416893, "loss": 11.2207, "step": 158820 }, { "epoch": 19.113116726835138, "grad_norm": Infinity, "learning_rate": 0.000183400099089397, "loss": 11.2688, "step": 158830 }, { "epoch": 19.114320096269555, "grad_norm": Infinity, "learning_rate": 0.00018339799997392454, "loss": 11.2192, "step": 158840 }, { "epoch": 19.115523465703973, "grad_norm": Infinity, "learning_rate": 0.00018339590073775455, "loss": 11.1115, "step": 158850 }, { "epoch": 19.116726835138387, "grad_norm": Infinity, "learning_rate": 0.00018339380138089015, "loss": 11.1967, "step": 158860 }, { "epoch": 19.117930204572804, "grad_norm": Infinity, "learning_rate": 0.0001833917019033343, "loss": 11.1094, "step": 158870 }, { "epoch": 19.119133574007222, "grad_norm": Infinity, "learning_rate": 0.00018338960230509006, "loss": 11.2252, "step": 158880 }, { "epoch": 19.120336943441636, "grad_norm": Infinity, "learning_rate": 0.0001833875025861605, "loss": 11.1617, "step": 158890 }, { "epoch": 19.121540312876053, "grad_norm": Infinity, "learning_rate": 0.00018338540274654864, "loss": 11.1357, "step": 158900 }, { "epoch": 19.12274368231047, "grad_norm": Infinity, "learning_rate": 0.0001833833027862575, "loss": 11.2059, "step": 158910 }, { "epoch": 19.123947051744885, "grad_norm": Infinity, "learning_rate": 0.00018338120270529015, "loss": 11.2334, "step": 158920 }, { "epoch": 19.125150421179303, "grad_norm": Infinity, "learning_rate": 0.0001833791025036496, "loss": 11.1384, "step": 158930 }, { "epoch": 19.126353790613717, "grad_norm": Infinity, "learning_rate": 0.00018337700218133888, "loss": 11.2042, "step": 158940 }, { "epoch": 19.127557160048134, "grad_norm": Infinity, "learning_rate": 0.0001833749017383611, "loss": 11.139, "step": 158950 }, { "epoch": 19.128760529482552, "grad_norm": Infinity, "learning_rate": 0.00018337280117471924, "loss": 11.1242, "step": 158960 }, { "epoch": 19.129963898916966, "grad_norm": Infinity, "learning_rate": 0.00018337070049041632, "loss": 11.1422, "step": 158970 }, { "epoch": 19.131167268351383, "grad_norm": Infinity, "learning_rate": 0.00018336859968545545, "loss": 11.1218, "step": 158980 }, { "epoch": 19.1323706377858, "grad_norm": Infinity, "learning_rate": 0.00018336649875983963, "loss": 11.2146, "step": 158990 }, { "epoch": 19.133574007220215, "grad_norm": Infinity, "learning_rate": 0.0001833643977135719, "loss": 11.1259, "step": 159000 }, { "epoch": 19.134777376654633, "grad_norm": Infinity, "learning_rate": 0.00018336229654665527, "loss": 11.1867, "step": 159010 }, { "epoch": 19.13598074608905, "grad_norm": Infinity, "learning_rate": 0.00018336019525909284, "loss": 11.1998, "step": 159020 }, { "epoch": 19.137184115523464, "grad_norm": Infinity, "learning_rate": 0.00018335809385088765, "loss": 11.0415, "step": 159030 }, { "epoch": 19.13838748495788, "grad_norm": Infinity, "learning_rate": 0.0001833559923220427, "loss": 11.1145, "step": 159040 }, { "epoch": 19.1395908543923, "grad_norm": Infinity, "learning_rate": 0.00018335389067256102, "loss": 11.0816, "step": 159050 }, { "epoch": 19.140794223826713, "grad_norm": Infinity, "learning_rate": 0.00018335178890244573, "loss": 11.0793, "step": 159060 }, { "epoch": 19.14199759326113, "grad_norm": Infinity, "learning_rate": 0.00018334968701169978, "loss": 11.3079, "step": 159070 }, { "epoch": 19.14320096269555, "grad_norm": Infinity, "learning_rate": 0.0001833475850003263, "loss": 11.0181, "step": 159080 }, { "epoch": 19.144404332129962, "grad_norm": Infinity, "learning_rate": 0.00018334548286832824, "loss": 11.1338, "step": 159090 }, { "epoch": 19.14560770156438, "grad_norm": Infinity, "learning_rate": 0.00018334338061570872, "loss": 11.1068, "step": 159100 }, { "epoch": 19.146811070998798, "grad_norm": Infinity, "learning_rate": 0.00018334127824247074, "loss": 11.1185, "step": 159110 }, { "epoch": 19.14801444043321, "grad_norm": Infinity, "learning_rate": 0.00018333917574861737, "loss": 11.2902, "step": 159120 }, { "epoch": 19.14921780986763, "grad_norm": Infinity, "learning_rate": 0.00018333707313415162, "loss": 11.1842, "step": 159130 }, { "epoch": 19.150421179302047, "grad_norm": Infinity, "learning_rate": 0.00018333497039907655, "loss": 11.1929, "step": 159140 }, { "epoch": 19.15162454873646, "grad_norm": Infinity, "learning_rate": 0.0001833328675433952, "loss": 11.0934, "step": 159150 }, { "epoch": 19.15282791817088, "grad_norm": Infinity, "learning_rate": 0.0001833307645671106, "loss": 11.0951, "step": 159160 }, { "epoch": 19.154031287605296, "grad_norm": Infinity, "learning_rate": 0.00018332866147022586, "loss": 11.0999, "step": 159170 }, { "epoch": 19.15523465703971, "grad_norm": Infinity, "learning_rate": 0.00018332655825274393, "loss": 11.2071, "step": 159180 }, { "epoch": 19.156438026474127, "grad_norm": Infinity, "learning_rate": 0.00018332445491466793, "loss": 11.1583, "step": 159190 }, { "epoch": 19.157641395908545, "grad_norm": Infinity, "learning_rate": 0.00018332235145600084, "loss": 11.0031, "step": 159200 }, { "epoch": 19.15884476534296, "grad_norm": Infinity, "learning_rate": 0.00018332024787674577, "loss": 11.1038, "step": 159210 }, { "epoch": 19.160048134777377, "grad_norm": Infinity, "learning_rate": 0.00018331814417690568, "loss": 11.1046, "step": 159220 }, { "epoch": 19.161251504211794, "grad_norm": Infinity, "learning_rate": 0.00018331604035648372, "loss": 11.1936, "step": 159230 }, { "epoch": 19.162454873646208, "grad_norm": Infinity, "learning_rate": 0.00018331393641548283, "loss": 11.0831, "step": 159240 }, { "epoch": 19.163658243080626, "grad_norm": Infinity, "learning_rate": 0.00018331183235390614, "loss": 11.1405, "step": 159250 }, { "epoch": 19.164861612515043, "grad_norm": Infinity, "learning_rate": 0.00018330972817175663, "loss": 11.1274, "step": 159260 }, { "epoch": 19.166064981949457, "grad_norm": Infinity, "learning_rate": 0.00018330762386903742, "loss": 11.1068, "step": 159270 }, { "epoch": 19.167268351383875, "grad_norm": Infinity, "learning_rate": 0.00018330551944575148, "loss": 11.1133, "step": 159280 }, { "epoch": 19.168471720818292, "grad_norm": Infinity, "learning_rate": 0.0001833034149019019, "loss": 11.2081, "step": 159290 }, { "epoch": 19.169675090252706, "grad_norm": Infinity, "learning_rate": 0.00018330131023749167, "loss": 11.1845, "step": 159300 }, { "epoch": 19.170878459687124, "grad_norm": Infinity, "learning_rate": 0.00018329920545252394, "loss": 11.1335, "step": 159310 }, { "epoch": 19.17208182912154, "grad_norm": Infinity, "learning_rate": 0.00018329710054700168, "loss": 11.0865, "step": 159320 }, { "epoch": 19.173285198555956, "grad_norm": Infinity, "learning_rate": 0.0001832949955209279, "loss": 11.1383, "step": 159330 }, { "epoch": 19.174488567990373, "grad_norm": Infinity, "learning_rate": 0.00018329289037430572, "loss": 11.0965, "step": 159340 }, { "epoch": 19.17569193742479, "grad_norm": Infinity, "learning_rate": 0.0001832907851071382, "loss": 11.2015, "step": 159350 }, { "epoch": 19.176895306859205, "grad_norm": Infinity, "learning_rate": 0.00018328867971942832, "loss": 11.237, "step": 159360 }, { "epoch": 19.178098676293622, "grad_norm": Infinity, "learning_rate": 0.00018328657421117915, "loss": 11.2032, "step": 159370 }, { "epoch": 19.17930204572804, "grad_norm": Infinity, "learning_rate": 0.00018328446858239375, "loss": 11.1951, "step": 159380 }, { "epoch": 19.180505415162454, "grad_norm": Infinity, "learning_rate": 0.0001832823628330752, "loss": 11.0599, "step": 159390 }, { "epoch": 19.18170878459687, "grad_norm": Infinity, "learning_rate": 0.00018328025696322646, "loss": 11.0815, "step": 159400 }, { "epoch": 19.18291215403129, "grad_norm": Infinity, "learning_rate": 0.00018327815097285064, "loss": 11.1372, "step": 159410 }, { "epoch": 19.184115523465703, "grad_norm": Infinity, "learning_rate": 0.00018327604486195078, "loss": 11.1505, "step": 159420 }, { "epoch": 19.18531889290012, "grad_norm": Infinity, "learning_rate": 0.0001832739386305299, "loss": 11.233, "step": 159430 }, { "epoch": 19.186522262334538, "grad_norm": Infinity, "learning_rate": 0.0001832718322785911, "loss": 11.1522, "step": 159440 }, { "epoch": 19.187725631768952, "grad_norm": Infinity, "learning_rate": 0.0001832697258061374, "loss": 11.0415, "step": 159450 }, { "epoch": 19.18892900120337, "grad_norm": Infinity, "learning_rate": 0.00018326761921317185, "loss": 11.1992, "step": 159460 }, { "epoch": 19.190132370637787, "grad_norm": Infinity, "learning_rate": 0.00018326551249969747, "loss": 11.134, "step": 159470 }, { "epoch": 19.1913357400722, "grad_norm": Infinity, "learning_rate": 0.00018326340566571735, "loss": 11.0484, "step": 159480 }, { "epoch": 19.19253910950662, "grad_norm": Infinity, "learning_rate": 0.00018326129871123453, "loss": 11.146, "step": 159490 }, { "epoch": 19.193742478941036, "grad_norm": Infinity, "learning_rate": 0.00018325919163625205, "loss": 11.1345, "step": 159500 }, { "epoch": 19.19494584837545, "grad_norm": Infinity, "learning_rate": 0.00018325708444077296, "loss": 11.1604, "step": 159510 }, { "epoch": 19.196149217809868, "grad_norm": Infinity, "learning_rate": 0.0001832549771248003, "loss": 11.1589, "step": 159520 }, { "epoch": 19.197352587244286, "grad_norm": Infinity, "learning_rate": 0.00018325286968833717, "loss": 11.1412, "step": 159530 }, { "epoch": 19.1985559566787, "grad_norm": Infinity, "learning_rate": 0.00018325076213138657, "loss": 11.1329, "step": 159540 }, { "epoch": 19.199759326113117, "grad_norm": Infinity, "learning_rate": 0.00018324865445395157, "loss": 11.174, "step": 159550 }, { "epoch": 19.200962695547535, "grad_norm": Infinity, "learning_rate": 0.0001832465466560352, "loss": 11.0711, "step": 159560 }, { "epoch": 19.20216606498195, "grad_norm": Infinity, "learning_rate": 0.0001832444387376405, "loss": 10.9949, "step": 159570 }, { "epoch": 19.203369434416366, "grad_norm": Infinity, "learning_rate": 0.0001832423306987706, "loss": 11.13, "step": 159580 }, { "epoch": 19.204572803850784, "grad_norm": Infinity, "learning_rate": 0.00018324022253942844, "loss": 11.0963, "step": 159590 }, { "epoch": 19.205776173285198, "grad_norm": Infinity, "learning_rate": 0.00018323811425961716, "loss": 11.1516, "step": 159600 }, { "epoch": 19.206979542719615, "grad_norm": Infinity, "learning_rate": 0.00018323600585933977, "loss": 11.2114, "step": 159610 }, { "epoch": 19.20818291215403, "grad_norm": Infinity, "learning_rate": 0.00018323389733859933, "loss": 11.1687, "step": 159620 }, { "epoch": 19.209386281588447, "grad_norm": Infinity, "learning_rate": 0.0001832317886973989, "loss": 11.151, "step": 159630 }, { "epoch": 19.210589651022865, "grad_norm": Infinity, "learning_rate": 0.0001832296799357415, "loss": 11.2609, "step": 159640 }, { "epoch": 19.21179302045728, "grad_norm": Infinity, "learning_rate": 0.00018322757105363022, "loss": 11.2683, "step": 159650 }, { "epoch": 19.212996389891696, "grad_norm": Infinity, "learning_rate": 0.0001832254620510681, "loss": 11.1292, "step": 159660 }, { "epoch": 19.214199759326114, "grad_norm": Infinity, "learning_rate": 0.00018322335292805816, "loss": 11.1581, "step": 159670 }, { "epoch": 19.215403128760528, "grad_norm": Infinity, "learning_rate": 0.00018322124368460351, "loss": 11.1436, "step": 159680 }, { "epoch": 19.216606498194945, "grad_norm": Infinity, "learning_rate": 0.00018321913432070715, "loss": 11.1155, "step": 159690 }, { "epoch": 19.217809867629363, "grad_norm": Infinity, "learning_rate": 0.0001832170248363722, "loss": 11.2039, "step": 159700 }, { "epoch": 19.219013237063777, "grad_norm": Infinity, "learning_rate": 0.00018321491523160164, "loss": 11.1006, "step": 159710 }, { "epoch": 19.220216606498195, "grad_norm": Infinity, "learning_rate": 0.00018321280550639854, "loss": 11.2315, "step": 159720 }, { "epoch": 19.221419975932612, "grad_norm": Infinity, "learning_rate": 0.00018321069566076597, "loss": 11.1251, "step": 159730 }, { "epoch": 19.222623345367026, "grad_norm": Infinity, "learning_rate": 0.000183208585694707, "loss": 11.0394, "step": 159740 }, { "epoch": 19.223826714801444, "grad_norm": Infinity, "learning_rate": 0.00018320647560822467, "loss": 11.1559, "step": 159750 }, { "epoch": 19.22503008423586, "grad_norm": Infinity, "learning_rate": 0.000183204365401322, "loss": 11.1828, "step": 159760 }, { "epoch": 19.226233453670275, "grad_norm": Infinity, "learning_rate": 0.0001832022550740021, "loss": 11.2003, "step": 159770 }, { "epoch": 19.227436823104693, "grad_norm": Infinity, "learning_rate": 0.00018320014462626797, "loss": 11.2145, "step": 159780 }, { "epoch": 19.22864019253911, "grad_norm": Infinity, "learning_rate": 0.0001831980340581227, "loss": 11.1368, "step": 159790 }, { "epoch": 19.229843561973524, "grad_norm": Infinity, "learning_rate": 0.00018319592336956932, "loss": 11.0966, "step": 159800 }, { "epoch": 19.231046931407942, "grad_norm": Infinity, "learning_rate": 0.00018319381256061093, "loss": 11.1462, "step": 159810 }, { "epoch": 19.23225030084236, "grad_norm": Infinity, "learning_rate": 0.0001831917016312505, "loss": 11.1368, "step": 159820 }, { "epoch": 19.233453670276774, "grad_norm": Infinity, "learning_rate": 0.00018318959058149118, "loss": 11.1017, "step": 159830 }, { "epoch": 19.23465703971119, "grad_norm": Infinity, "learning_rate": 0.00018318747941133595, "loss": 11.0607, "step": 159840 }, { "epoch": 19.23586040914561, "grad_norm": Infinity, "learning_rate": 0.00018318536812078794, "loss": 11.2353, "step": 159850 }, { "epoch": 19.237063778580023, "grad_norm": Infinity, "learning_rate": 0.00018318325670985014, "loss": 11.2015, "step": 159860 }, { "epoch": 19.23826714801444, "grad_norm": Infinity, "learning_rate": 0.00018318114517852565, "loss": 11.1767, "step": 159870 }, { "epoch": 19.239470517448858, "grad_norm": Infinity, "learning_rate": 0.0001831790335268175, "loss": 11.1298, "step": 159880 }, { "epoch": 19.240673886883272, "grad_norm": Infinity, "learning_rate": 0.00018317692175472873, "loss": 11.2561, "step": 159890 }, { "epoch": 19.24187725631769, "grad_norm": Infinity, "learning_rate": 0.00018317480986226246, "loss": 11.0442, "step": 159900 }, { "epoch": 19.243080625752107, "grad_norm": Infinity, "learning_rate": 0.00018317269784942167, "loss": 11.143, "step": 159910 }, { "epoch": 19.24428399518652, "grad_norm": Infinity, "learning_rate": 0.00018317058571620947, "loss": 11.0817, "step": 159920 }, { "epoch": 19.24548736462094, "grad_norm": Infinity, "learning_rate": 0.00018316847346262888, "loss": 11.2806, "step": 159930 }, { "epoch": 19.246690734055356, "grad_norm": Infinity, "learning_rate": 0.00018316636108868298, "loss": 11.1808, "step": 159940 }, { "epoch": 19.24789410348977, "grad_norm": Infinity, "learning_rate": 0.00018316424859437483, "loss": 11.1272, "step": 159950 }, { "epoch": 19.249097472924188, "grad_norm": Infinity, "learning_rate": 0.00018316213597970745, "loss": 11.094, "step": 159960 }, { "epoch": 19.250300842358605, "grad_norm": Infinity, "learning_rate": 0.00018316002324468396, "loss": 11.2003, "step": 159970 }, { "epoch": 19.25150421179302, "grad_norm": Infinity, "learning_rate": 0.00018315791038930738, "loss": 11.1593, "step": 159980 }, { "epoch": 19.252707581227437, "grad_norm": Infinity, "learning_rate": 0.00018315579741358078, "loss": 11.0211, "step": 159990 }, { "epoch": 19.253910950661854, "grad_norm": Infinity, "learning_rate": 0.0001831536843175072, "loss": 11.2423, "step": 160000 }, { "epoch": 19.25511432009627, "grad_norm": Infinity, "learning_rate": 0.0001831515711010897, "loss": 11.1407, "step": 160010 }, { "epoch": 19.256317689530686, "grad_norm": Infinity, "learning_rate": 0.00018314945776433137, "loss": 11.1076, "step": 160020 }, { "epoch": 19.257521058965104, "grad_norm": Infinity, "learning_rate": 0.00018314734430723522, "loss": 11.2068, "step": 160030 }, { "epoch": 19.258724428399518, "grad_norm": Infinity, "learning_rate": 0.00018314523072980436, "loss": 11.113, "step": 160040 }, { "epoch": 19.259927797833935, "grad_norm": Infinity, "learning_rate": 0.0001831431170320418, "loss": 10.97, "step": 160050 }, { "epoch": 19.261131167268353, "grad_norm": Infinity, "learning_rate": 0.00018314100321395062, "loss": 11.1879, "step": 160060 }, { "epoch": 19.262334536702767, "grad_norm": Infinity, "learning_rate": 0.00018313888927553391, "loss": 11.1473, "step": 160070 }, { "epoch": 19.263537906137184, "grad_norm": Infinity, "learning_rate": 0.00018313677521679468, "loss": 11.171, "step": 160080 }, { "epoch": 19.264741275571602, "grad_norm": Infinity, "learning_rate": 0.00018313466103773602, "loss": 11.224, "step": 160090 }, { "epoch": 19.265944645006016, "grad_norm": Infinity, "learning_rate": 0.00018313254673836099, "loss": 11.2056, "step": 160100 }, { "epoch": 19.267148014440433, "grad_norm": Infinity, "learning_rate": 0.00018313043231867263, "loss": 11.0397, "step": 160110 }, { "epoch": 19.26835138387485, "grad_norm": Infinity, "learning_rate": 0.000183128317778674, "loss": 11.0851, "step": 160120 }, { "epoch": 19.269554753309265, "grad_norm": Infinity, "learning_rate": 0.00018312620311836816, "loss": 11.2034, "step": 160130 }, { "epoch": 19.270758122743683, "grad_norm": Infinity, "learning_rate": 0.0001831240883377582, "loss": 11.0784, "step": 160140 }, { "epoch": 19.2719614921781, "grad_norm": Infinity, "learning_rate": 0.00018312197343684717, "loss": 11.2138, "step": 160150 }, { "epoch": 19.273164861612514, "grad_norm": Infinity, "learning_rate": 0.0001831198584156381, "loss": 11.1714, "step": 160160 }, { "epoch": 19.27436823104693, "grad_norm": Infinity, "learning_rate": 0.00018311774327413408, "loss": 11.0458, "step": 160170 }, { "epoch": 19.27557160048135, "grad_norm": Infinity, "learning_rate": 0.00018311562801233818, "loss": 11.111, "step": 160180 }, { "epoch": 19.276774969915763, "grad_norm": Infinity, "learning_rate": 0.00018311351263025343, "loss": 11.0016, "step": 160190 }, { "epoch": 19.27797833935018, "grad_norm": Infinity, "learning_rate": 0.00018311139712788292, "loss": 11.117, "step": 160200 }, { "epoch": 19.2791817087846, "grad_norm": Infinity, "learning_rate": 0.00018310928150522967, "loss": 11.0723, "step": 160210 }, { "epoch": 19.280385078219012, "grad_norm": Infinity, "learning_rate": 0.00018310716576229677, "loss": 11.17, "step": 160220 }, { "epoch": 19.28158844765343, "grad_norm": Infinity, "learning_rate": 0.00018310504989908733, "loss": 11.1638, "step": 160230 }, { "epoch": 19.282791817087848, "grad_norm": Infinity, "learning_rate": 0.00018310293391560434, "loss": 11.1958, "step": 160240 }, { "epoch": 19.28399518652226, "grad_norm": Infinity, "learning_rate": 0.00018310081781185088, "loss": 11.1338, "step": 160250 }, { "epoch": 19.28519855595668, "grad_norm": Infinity, "learning_rate": 0.00018309870158783004, "loss": 11.0154, "step": 160260 }, { "epoch": 19.286401925391097, "grad_norm": Infinity, "learning_rate": 0.00018309658524354484, "loss": 11.1427, "step": 160270 }, { "epoch": 19.28760529482551, "grad_norm": Infinity, "learning_rate": 0.00018309446877899839, "loss": 11.0997, "step": 160280 }, { "epoch": 19.28880866425993, "grad_norm": Infinity, "learning_rate": 0.0001830923521941937, "loss": 11.1912, "step": 160290 }, { "epoch": 19.290012033694346, "grad_norm": Infinity, "learning_rate": 0.00018309023548913387, "loss": 11.2125, "step": 160300 }, { "epoch": 19.29121540312876, "grad_norm": Infinity, "learning_rate": 0.00018308811866382197, "loss": 11.1763, "step": 160310 }, { "epoch": 19.292418772563177, "grad_norm": Infinity, "learning_rate": 0.00018308600171826103, "loss": 11.1663, "step": 160320 }, { "epoch": 19.29362214199759, "grad_norm": Infinity, "learning_rate": 0.0001830838846524541, "loss": 11.1433, "step": 160330 }, { "epoch": 19.29482551143201, "grad_norm": Infinity, "learning_rate": 0.00018308176746640434, "loss": 11.1087, "step": 160340 }, { "epoch": 19.296028880866427, "grad_norm": Infinity, "learning_rate": 0.0001830796501601147, "loss": 11.1544, "step": 160350 }, { "epoch": 19.29723225030084, "grad_norm": Infinity, "learning_rate": 0.00018307753273358833, "loss": 11.0843, "step": 160360 }, { "epoch": 19.29843561973526, "grad_norm": Infinity, "learning_rate": 0.00018307541518682827, "loss": 11.2509, "step": 160370 }, { "epoch": 19.299638989169676, "grad_norm": Infinity, "learning_rate": 0.00018307329751983756, "loss": 11.211, "step": 160380 }, { "epoch": 19.30084235860409, "grad_norm": Infinity, "learning_rate": 0.00018307117973261926, "loss": 11.1481, "step": 160390 }, { "epoch": 19.302045728038507, "grad_norm": Infinity, "learning_rate": 0.00018306906182517647, "loss": 11.1643, "step": 160400 }, { "epoch": 19.303249097472925, "grad_norm": Infinity, "learning_rate": 0.00018306694379751224, "loss": 11.1352, "step": 160410 }, { "epoch": 19.30445246690734, "grad_norm": Infinity, "learning_rate": 0.00018306482564962962, "loss": 11.1276, "step": 160420 }, { "epoch": 19.305655836341757, "grad_norm": Infinity, "learning_rate": 0.0001830627073815317, "loss": 11.0039, "step": 160430 }, { "epoch": 19.306859205776174, "grad_norm": Infinity, "learning_rate": 0.00018306058899322152, "loss": 11.0804, "step": 160440 }, { "epoch": 19.308062575210588, "grad_norm": Infinity, "learning_rate": 0.00018305847048470216, "loss": 11.2549, "step": 160450 }, { "epoch": 19.309265944645006, "grad_norm": Infinity, "learning_rate": 0.0001830563518559767, "loss": 11.2358, "step": 160460 }, { "epoch": 19.310469314079423, "grad_norm": Infinity, "learning_rate": 0.0001830542331070482, "loss": 11.1776, "step": 160470 }, { "epoch": 19.311672683513837, "grad_norm": Infinity, "learning_rate": 0.0001830521142379197, "loss": 11.156, "step": 160480 }, { "epoch": 19.312876052948255, "grad_norm": Infinity, "learning_rate": 0.00018304999524859433, "loss": 11.1994, "step": 160490 }, { "epoch": 19.314079422382672, "grad_norm": Infinity, "learning_rate": 0.00018304787613907504, "loss": 11.1623, "step": 160500 }, { "epoch": 19.315282791817086, "grad_norm": Infinity, "learning_rate": 0.000183045756909365, "loss": 11.2263, "step": 160510 }, { "epoch": 19.316486161251504, "grad_norm": Infinity, "learning_rate": 0.00018304363755946727, "loss": 11.1237, "step": 160520 }, { "epoch": 19.31768953068592, "grad_norm": Infinity, "learning_rate": 0.00018304151808938487, "loss": 11.0652, "step": 160530 }, { "epoch": 19.318892900120336, "grad_norm": Infinity, "learning_rate": 0.0001830393984991209, "loss": 11.1131, "step": 160540 }, { "epoch": 19.320096269554753, "grad_norm": Infinity, "learning_rate": 0.0001830372787886784, "loss": 11.1326, "step": 160550 }, { "epoch": 19.32129963898917, "grad_norm": Infinity, "learning_rate": 0.00018303515895806048, "loss": 11.1641, "step": 160560 }, { "epoch": 19.322503008423585, "grad_norm": Infinity, "learning_rate": 0.00018303303900727019, "loss": 11.1123, "step": 160570 }, { "epoch": 19.323706377858002, "grad_norm": Infinity, "learning_rate": 0.0001830309189363106, "loss": 11.2257, "step": 160580 }, { "epoch": 19.32490974729242, "grad_norm": Infinity, "learning_rate": 0.00018302879874518472, "loss": 11.082, "step": 160590 }, { "epoch": 19.326113116726834, "grad_norm": Infinity, "learning_rate": 0.0001830266784338957, "loss": 11.0323, "step": 160600 }, { "epoch": 19.32731648616125, "grad_norm": Infinity, "learning_rate": 0.0001830245580024466, "loss": 11.0977, "step": 160610 }, { "epoch": 19.32851985559567, "grad_norm": Infinity, "learning_rate": 0.00018302243745084041, "loss": 11.2239, "step": 160620 }, { "epoch": 19.329723225030083, "grad_norm": Infinity, "learning_rate": 0.0001830203167790803, "loss": 11.1488, "step": 160630 }, { "epoch": 19.3309265944645, "grad_norm": Infinity, "learning_rate": 0.0001830181959871693, "loss": 11.2383, "step": 160640 }, { "epoch": 19.332129963898918, "grad_norm": Infinity, "learning_rate": 0.00018301607507511045, "loss": 11.1904, "step": 160650 }, { "epoch": 19.333333333333332, "grad_norm": Infinity, "learning_rate": 0.00018301395404290687, "loss": 11.1402, "step": 160660 }, { "epoch": 19.33453670276775, "grad_norm": Infinity, "learning_rate": 0.00018301183289056157, "loss": 11.2458, "step": 160670 }, { "epoch": 19.335740072202167, "grad_norm": Infinity, "learning_rate": 0.00018300971161807768, "loss": 11.1825, "step": 160680 }, { "epoch": 19.33694344163658, "grad_norm": Infinity, "learning_rate": 0.00018300759022545822, "loss": 11.3386, "step": 160690 }, { "epoch": 19.338146811071, "grad_norm": Infinity, "learning_rate": 0.0001830054687127063, "loss": 11.2002, "step": 160700 }, { "epoch": 19.339350180505416, "grad_norm": Infinity, "learning_rate": 0.00018300334707982497, "loss": 11.0794, "step": 160710 }, { "epoch": 19.34055354993983, "grad_norm": Infinity, "learning_rate": 0.00018300122532681728, "loss": 11.0661, "step": 160720 }, { "epoch": 19.341756919374248, "grad_norm": Infinity, "learning_rate": 0.00018299910345368633, "loss": 10.9984, "step": 160730 }, { "epoch": 19.342960288808666, "grad_norm": Infinity, "learning_rate": 0.00018299698146043522, "loss": 11.0773, "step": 160740 }, { "epoch": 19.34416365824308, "grad_norm": Infinity, "learning_rate": 0.00018299485934706697, "loss": 11.129, "step": 160750 }, { "epoch": 19.345367027677497, "grad_norm": Infinity, "learning_rate": 0.00018299273711358467, "loss": 11.1263, "step": 160760 }, { "epoch": 19.346570397111915, "grad_norm": Infinity, "learning_rate": 0.00018299061475999137, "loss": 11.1046, "step": 160770 }, { "epoch": 19.34777376654633, "grad_norm": Infinity, "learning_rate": 0.0001829884922862902, "loss": 11.057, "step": 160780 }, { "epoch": 19.348977135980746, "grad_norm": Infinity, "learning_rate": 0.00018298636969248414, "loss": 11.1086, "step": 160790 }, { "epoch": 19.350180505415164, "grad_norm": Infinity, "learning_rate": 0.00018298424697857634, "loss": 11.1199, "step": 160800 }, { "epoch": 19.351383874849578, "grad_norm": Infinity, "learning_rate": 0.00018298212414456986, "loss": 11.0532, "step": 160810 }, { "epoch": 19.352587244283995, "grad_norm": Infinity, "learning_rate": 0.00018298000119046774, "loss": 11.0792, "step": 160820 }, { "epoch": 19.353790613718413, "grad_norm": Infinity, "learning_rate": 0.00018297787811627307, "loss": 11.2113, "step": 160830 }, { "epoch": 19.354993983152827, "grad_norm": Infinity, "learning_rate": 0.00018297575492198895, "loss": 11.1289, "step": 160840 }, { "epoch": 19.356197352587245, "grad_norm": Infinity, "learning_rate": 0.0001829736316076184, "loss": 11.0859, "step": 160850 }, { "epoch": 19.357400722021662, "grad_norm": Infinity, "learning_rate": 0.00018297150817316453, "loss": 11.1366, "step": 160860 }, { "epoch": 19.358604091456076, "grad_norm": Infinity, "learning_rate": 0.0001829693846186304, "loss": 11.0457, "step": 160870 }, { "epoch": 19.359807460890494, "grad_norm": Infinity, "learning_rate": 0.00018296726094401908, "loss": 11.1258, "step": 160880 }, { "epoch": 19.36101083032491, "grad_norm": Infinity, "learning_rate": 0.00018296513714933369, "loss": 11.1406, "step": 160890 }, { "epoch": 19.362214199759325, "grad_norm": Infinity, "learning_rate": 0.00018296301323457721, "loss": 11.1511, "step": 160900 }, { "epoch": 19.363417569193743, "grad_norm": Infinity, "learning_rate": 0.00018296088919975278, "loss": 11.1093, "step": 160910 }, { "epoch": 19.36462093862816, "grad_norm": Infinity, "learning_rate": 0.00018295876504486347, "loss": 11.199, "step": 160920 }, { "epoch": 19.365824308062574, "grad_norm": Infinity, "learning_rate": 0.00018295664076991232, "loss": 11.1904, "step": 160930 }, { "epoch": 19.367027677496992, "grad_norm": Infinity, "learning_rate": 0.00018295451637490246, "loss": 11.2918, "step": 160940 }, { "epoch": 19.36823104693141, "grad_norm": Infinity, "learning_rate": 0.00018295239185983693, "loss": 11.1088, "step": 160950 }, { "epoch": 19.369434416365824, "grad_norm": Infinity, "learning_rate": 0.0001829502672247188, "loss": 11.1114, "step": 160960 }, { "epoch": 19.37063778580024, "grad_norm": Infinity, "learning_rate": 0.00018294814246955115, "loss": 11.2617, "step": 160970 }, { "epoch": 19.37184115523466, "grad_norm": Infinity, "learning_rate": 0.00018294601759433706, "loss": 11.1418, "step": 160980 }, { "epoch": 19.373044524669073, "grad_norm": Infinity, "learning_rate": 0.00018294389259907964, "loss": 11.2187, "step": 160990 }, { "epoch": 19.37424789410349, "grad_norm": Infinity, "learning_rate": 0.0001829417674837819, "loss": 11.0197, "step": 161000 }, { "epoch": 19.375451263537904, "grad_norm": Infinity, "learning_rate": 0.00018293964224844693, "loss": 11.1371, "step": 161010 }, { "epoch": 19.376654632972322, "grad_norm": Infinity, "learning_rate": 0.00018293751689307783, "loss": 11.1498, "step": 161020 }, { "epoch": 19.37785800240674, "grad_norm": Infinity, "learning_rate": 0.00018293539141767768, "loss": 11.0602, "step": 161030 }, { "epoch": 19.379061371841154, "grad_norm": Infinity, "learning_rate": 0.00018293326582224953, "loss": 11.1184, "step": 161040 }, { "epoch": 19.38026474127557, "grad_norm": Infinity, "learning_rate": 0.00018293114010679647, "loss": 11.1488, "step": 161050 }, { "epoch": 19.38146811070999, "grad_norm": Infinity, "learning_rate": 0.0001829290142713216, "loss": 11.1844, "step": 161060 }, { "epoch": 19.382671480144403, "grad_norm": Infinity, "learning_rate": 0.00018292688831582794, "loss": 11.0572, "step": 161070 }, { "epoch": 19.38387484957882, "grad_norm": Infinity, "learning_rate": 0.0001829247622403186, "loss": 11.0694, "step": 161080 }, { "epoch": 19.385078219013238, "grad_norm": Infinity, "learning_rate": 0.00018292263604479666, "loss": 11.1746, "step": 161090 }, { "epoch": 19.386281588447652, "grad_norm": Infinity, "learning_rate": 0.0001829205097292652, "loss": 11.0507, "step": 161100 }, { "epoch": 19.38748495788207, "grad_norm": Infinity, "learning_rate": 0.00018291838329372728, "loss": 11.167, "step": 161110 }, { "epoch": 19.388688327316487, "grad_norm": Infinity, "learning_rate": 0.000182916256738186, "loss": 11.107, "step": 161120 }, { "epoch": 19.3898916967509, "grad_norm": Infinity, "learning_rate": 0.00018291413006264442, "loss": 11.1558, "step": 161130 }, { "epoch": 19.39109506618532, "grad_norm": Infinity, "learning_rate": 0.00018291200326710563, "loss": 11.1019, "step": 161140 }, { "epoch": 19.392298435619736, "grad_norm": Infinity, "learning_rate": 0.0001829098763515727, "loss": 11.1393, "step": 161150 }, { "epoch": 19.39350180505415, "grad_norm": Infinity, "learning_rate": 0.0001829077493160487, "loss": 11.1555, "step": 161160 }, { "epoch": 19.394705174488568, "grad_norm": Infinity, "learning_rate": 0.0001829056221605367, "loss": 11.0415, "step": 161170 }, { "epoch": 19.395908543922985, "grad_norm": Infinity, "learning_rate": 0.00018290349488503982, "loss": 11.0785, "step": 161180 }, { "epoch": 19.3971119133574, "grad_norm": Infinity, "learning_rate": 0.00018290136748956112, "loss": 11.1671, "step": 161190 }, { "epoch": 19.398315282791817, "grad_norm": Infinity, "learning_rate": 0.00018289923997410364, "loss": 11.1875, "step": 161200 }, { "epoch": 19.399518652226234, "grad_norm": Infinity, "learning_rate": 0.00018289711233867052, "loss": 11.1483, "step": 161210 }, { "epoch": 19.40072202166065, "grad_norm": Infinity, "learning_rate": 0.0001828949845832648, "loss": 11.117, "step": 161220 }, { "epoch": 19.401925391095066, "grad_norm": Infinity, "learning_rate": 0.0001828928567078896, "loss": 10.9823, "step": 161230 }, { "epoch": 19.403128760529484, "grad_norm": Infinity, "learning_rate": 0.00018289072871254796, "loss": 11.2061, "step": 161240 }, { "epoch": 19.404332129963898, "grad_norm": Infinity, "learning_rate": 0.00018288860059724295, "loss": 11.2871, "step": 161250 }, { "epoch": 19.405535499398315, "grad_norm": Infinity, "learning_rate": 0.00018288647236197768, "loss": 11.2584, "step": 161260 }, { "epoch": 19.406738868832733, "grad_norm": Infinity, "learning_rate": 0.00018288434400675523, "loss": 11.1404, "step": 161270 }, { "epoch": 19.407942238267147, "grad_norm": Infinity, "learning_rate": 0.00018288221553157866, "loss": 11.0662, "step": 161280 }, { "epoch": 19.409145607701564, "grad_norm": Infinity, "learning_rate": 0.00018288008693645105, "loss": 11.1553, "step": 161290 }, { "epoch": 19.410348977135982, "grad_norm": Infinity, "learning_rate": 0.0001828779582213755, "loss": 11.049, "step": 161300 }, { "epoch": 19.411552346570396, "grad_norm": Infinity, "learning_rate": 0.0001828758293863551, "loss": 11.1687, "step": 161310 }, { "epoch": 19.412755716004813, "grad_norm": Infinity, "learning_rate": 0.0001828737004313929, "loss": 11.1317, "step": 161320 }, { "epoch": 19.41395908543923, "grad_norm": Infinity, "learning_rate": 0.000182871571356492, "loss": 11.097, "step": 161330 }, { "epoch": 19.415162454873645, "grad_norm": Infinity, "learning_rate": 0.00018286944216165546, "loss": 11.2208, "step": 161340 }, { "epoch": 19.416365824308063, "grad_norm": Infinity, "learning_rate": 0.00018286731284688639, "loss": 11.1941, "step": 161350 }, { "epoch": 19.41756919374248, "grad_norm": Infinity, "learning_rate": 0.00018286518341218785, "loss": 11.1709, "step": 161360 }, { "epoch": 19.418772563176894, "grad_norm": Infinity, "learning_rate": 0.00018286305385756295, "loss": 11.0176, "step": 161370 }, { "epoch": 19.41997593261131, "grad_norm": Infinity, "learning_rate": 0.00018286092418301472, "loss": 11.1106, "step": 161380 }, { "epoch": 19.42117930204573, "grad_norm": Infinity, "learning_rate": 0.0001828587943885463, "loss": 11.1976, "step": 161390 }, { "epoch": 19.422382671480143, "grad_norm": Infinity, "learning_rate": 0.00018285666447416077, "loss": 11.2034, "step": 161400 }, { "epoch": 19.42358604091456, "grad_norm": Infinity, "learning_rate": 0.00018285453443986116, "loss": 11.1639, "step": 161410 }, { "epoch": 19.42478941034898, "grad_norm": Infinity, "learning_rate": 0.0001828524042856506, "loss": 11.1199, "step": 161420 }, { "epoch": 19.425992779783392, "grad_norm": Infinity, "learning_rate": 0.00018285027401153213, "loss": 11.1309, "step": 161430 }, { "epoch": 19.42719614921781, "grad_norm": Infinity, "learning_rate": 0.00018284814361750886, "loss": 11.1051, "step": 161440 }, { "epoch": 19.428399518652228, "grad_norm": Infinity, "learning_rate": 0.00018284601310358388, "loss": 11.1438, "step": 161450 }, { "epoch": 19.42960288808664, "grad_norm": Infinity, "learning_rate": 0.00018284388246976027, "loss": 11.0844, "step": 161460 }, { "epoch": 19.43080625752106, "grad_norm": Infinity, "learning_rate": 0.0001828417517160411, "loss": 11.1385, "step": 161470 }, { "epoch": 19.432009626955477, "grad_norm": Infinity, "learning_rate": 0.00018283962084242948, "loss": 11.2327, "step": 161480 }, { "epoch": 19.43321299638989, "grad_norm": Infinity, "learning_rate": 0.00018283748984892847, "loss": 11.1238, "step": 161490 }, { "epoch": 19.43441636582431, "grad_norm": Infinity, "learning_rate": 0.00018283535873554114, "loss": 11.0821, "step": 161500 }, { "epoch": 19.435619735258726, "grad_norm": Infinity, "learning_rate": 0.0001828332275022706, "loss": 11.1408, "step": 161510 }, { "epoch": 19.43682310469314, "grad_norm": Infinity, "learning_rate": 0.00018283109614911994, "loss": 11.0691, "step": 161520 }, { "epoch": 19.438026474127557, "grad_norm": Infinity, "learning_rate": 0.00018282896467609224, "loss": 11.1385, "step": 161530 }, { "epoch": 19.439229843561975, "grad_norm": Infinity, "learning_rate": 0.00018282683308319057, "loss": 11.1372, "step": 161540 }, { "epoch": 19.44043321299639, "grad_norm": Infinity, "learning_rate": 0.00018282470137041804, "loss": 11.0371, "step": 161550 }, { "epoch": 19.441636582430807, "grad_norm": Infinity, "learning_rate": 0.0001828225695377777, "loss": 11.043, "step": 161560 }, { "epoch": 19.442839951865224, "grad_norm": Infinity, "learning_rate": 0.00018282043758527266, "loss": 11.1695, "step": 161570 }, { "epoch": 19.444043321299638, "grad_norm": Infinity, "learning_rate": 0.00018281830551290597, "loss": 11.2091, "step": 161580 }, { "epoch": 19.445246690734056, "grad_norm": Infinity, "learning_rate": 0.00018281617332068078, "loss": 11.0843, "step": 161590 }, { "epoch": 19.446450060168473, "grad_norm": Infinity, "learning_rate": 0.00018281404100860014, "loss": 11.1245, "step": 161600 }, { "epoch": 19.447653429602887, "grad_norm": Infinity, "learning_rate": 0.00018281190857666712, "loss": 11.2474, "step": 161610 }, { "epoch": 19.448856799037305, "grad_norm": Infinity, "learning_rate": 0.00018280977602488482, "loss": 11.132, "step": 161620 }, { "epoch": 19.450060168471722, "grad_norm": Infinity, "learning_rate": 0.00018280764335325636, "loss": 11.1568, "step": 161630 }, { "epoch": 19.451263537906136, "grad_norm": Infinity, "learning_rate": 0.00018280551056178474, "loss": 11.2903, "step": 161640 }, { "epoch": 19.452466907340554, "grad_norm": Infinity, "learning_rate": 0.00018280337765047314, "loss": 11.1696, "step": 161650 }, { "epoch": 19.45367027677497, "grad_norm": Infinity, "learning_rate": 0.00018280124461932462, "loss": 11.2114, "step": 161660 }, { "epoch": 19.454873646209386, "grad_norm": Infinity, "learning_rate": 0.00018279911146834224, "loss": 11.2059, "step": 161670 }, { "epoch": 19.456077015643803, "grad_norm": Infinity, "learning_rate": 0.00018279697819752907, "loss": 11.056, "step": 161680 }, { "epoch": 19.45728038507822, "grad_norm": Infinity, "learning_rate": 0.00018279484480688828, "loss": 11.1237, "step": 161690 }, { "epoch": 19.458483754512635, "grad_norm": Infinity, "learning_rate": 0.0001827927112964229, "loss": 11.2018, "step": 161700 }, { "epoch": 19.459687123947052, "grad_norm": Infinity, "learning_rate": 0.000182790577666136, "loss": 11.1812, "step": 161710 }, { "epoch": 19.460890493381466, "grad_norm": Infinity, "learning_rate": 0.00018278844391603075, "loss": 11.0153, "step": 161720 }, { "epoch": 19.462093862815884, "grad_norm": Infinity, "learning_rate": 0.00018278631004611015, "loss": 11.1073, "step": 161730 }, { "epoch": 19.4632972322503, "grad_norm": Infinity, "learning_rate": 0.00018278417605637728, "loss": 11.1949, "step": 161740 }, { "epoch": 19.464500601684716, "grad_norm": Infinity, "learning_rate": 0.00018278204194683531, "loss": 11.144, "step": 161750 }, { "epoch": 19.465703971119133, "grad_norm": Infinity, "learning_rate": 0.0001827799077174873, "loss": 11.1027, "step": 161760 }, { "epoch": 19.46690734055355, "grad_norm": Infinity, "learning_rate": 0.0001827777733683363, "loss": 11.1226, "step": 161770 }, { "epoch": 19.468110709987965, "grad_norm": Infinity, "learning_rate": 0.00018277563889938542, "loss": 11.1499, "step": 161780 }, { "epoch": 19.469314079422382, "grad_norm": Infinity, "learning_rate": 0.00018277350431063777, "loss": 11.0822, "step": 161790 }, { "epoch": 19.4705174488568, "grad_norm": Infinity, "learning_rate": 0.0001827713696020964, "loss": 11.1109, "step": 161800 }, { "epoch": 19.471720818291214, "grad_norm": Infinity, "learning_rate": 0.00018276923477376445, "loss": 11.1281, "step": 161810 }, { "epoch": 19.47292418772563, "grad_norm": Infinity, "learning_rate": 0.00018276709982564498, "loss": 11.1451, "step": 161820 }, { "epoch": 19.47412755716005, "grad_norm": Infinity, "learning_rate": 0.00018276496475774106, "loss": 11.127, "step": 161830 }, { "epoch": 19.475330926594463, "grad_norm": Infinity, "learning_rate": 0.00018276282957005582, "loss": 11.0422, "step": 161840 }, { "epoch": 19.47653429602888, "grad_norm": Infinity, "learning_rate": 0.00018276069426259234, "loss": 11.0957, "step": 161850 }, { "epoch": 19.477737665463298, "grad_norm": Infinity, "learning_rate": 0.00018275855883535368, "loss": 11.1305, "step": 161860 }, { "epoch": 19.478941034897712, "grad_norm": Infinity, "learning_rate": 0.00018275642328834297, "loss": 11.1308, "step": 161870 }, { "epoch": 19.48014440433213, "grad_norm": Infinity, "learning_rate": 0.0001827542876215633, "loss": 11.2344, "step": 161880 }, { "epoch": 19.481347773766547, "grad_norm": Infinity, "learning_rate": 0.0001827521518350177, "loss": 11.0885, "step": 161890 }, { "epoch": 19.48255114320096, "grad_norm": Infinity, "learning_rate": 0.00018275001592870933, "loss": 11.1191, "step": 161900 }, { "epoch": 19.48375451263538, "grad_norm": Infinity, "learning_rate": 0.00018274787990264123, "loss": 11.2096, "step": 161910 }, { "epoch": 19.484957882069796, "grad_norm": Infinity, "learning_rate": 0.00018274574375681658, "loss": 11.1793, "step": 161920 }, { "epoch": 19.48616125150421, "grad_norm": Infinity, "learning_rate": 0.00018274360749123835, "loss": 11.1446, "step": 161930 }, { "epoch": 19.487364620938628, "grad_norm": Infinity, "learning_rate": 0.0001827414711059097, "loss": 11.1112, "step": 161940 }, { "epoch": 19.488567990373046, "grad_norm": Infinity, "learning_rate": 0.00018273933460083373, "loss": 11.1483, "step": 161950 }, { "epoch": 19.48977135980746, "grad_norm": Infinity, "learning_rate": 0.00018273719797601352, "loss": 11.1544, "step": 161960 }, { "epoch": 19.490974729241877, "grad_norm": Infinity, "learning_rate": 0.00018273506123145215, "loss": 11.184, "step": 161970 }, { "epoch": 19.492178098676295, "grad_norm": Infinity, "learning_rate": 0.0001827329243671527, "loss": 11.1902, "step": 161980 }, { "epoch": 19.49338146811071, "grad_norm": Infinity, "learning_rate": 0.00018273078738311832, "loss": 11.1775, "step": 161990 }, { "epoch": 19.494584837545126, "grad_norm": Infinity, "learning_rate": 0.00018272865027935204, "loss": 11.1122, "step": 162000 }, { "epoch": 19.495788206979544, "grad_norm": Infinity, "learning_rate": 0.000182726513055857, "loss": 11.2597, "step": 162010 }, { "epoch": 19.496991576413958, "grad_norm": Infinity, "learning_rate": 0.00018272437571263624, "loss": 11.1053, "step": 162020 }, { "epoch": 19.498194945848375, "grad_norm": Infinity, "learning_rate": 0.0001827222382496929, "loss": 11.102, "step": 162030 }, { "epoch": 19.499398315282793, "grad_norm": Infinity, "learning_rate": 0.00018272010066703004, "loss": 11.1276, "step": 162040 }, { "epoch": 19.500601684717207, "grad_norm": Infinity, "learning_rate": 0.0001827179629646508, "loss": 11.1512, "step": 162050 }, { "epoch": 19.501805054151625, "grad_norm": Infinity, "learning_rate": 0.00018271582514255825, "loss": 11.1552, "step": 162060 }, { "epoch": 19.503008423586042, "grad_norm": Infinity, "learning_rate": 0.00018271368720075546, "loss": 11.1152, "step": 162070 }, { "epoch": 19.504211793020456, "grad_norm": Infinity, "learning_rate": 0.00018271154913924553, "loss": 10.966, "step": 162080 }, { "epoch": 19.505415162454874, "grad_norm": Infinity, "learning_rate": 0.00018270941095803158, "loss": 11.1908, "step": 162090 }, { "epoch": 19.50661853188929, "grad_norm": Infinity, "learning_rate": 0.00018270727265711672, "loss": 11.1515, "step": 162100 }, { "epoch": 19.507821901323705, "grad_norm": Infinity, "learning_rate": 0.00018270513423650396, "loss": 11.1622, "step": 162110 }, { "epoch": 19.509025270758123, "grad_norm": Infinity, "learning_rate": 0.0001827029956961965, "loss": 11.2566, "step": 162120 }, { "epoch": 19.51022864019254, "grad_norm": Infinity, "learning_rate": 0.00018270085703619738, "loss": 11.1811, "step": 162130 }, { "epoch": 19.511432009626954, "grad_norm": Infinity, "learning_rate": 0.00018269871825650968, "loss": 11.1443, "step": 162140 }, { "epoch": 19.512635379061372, "grad_norm": Infinity, "learning_rate": 0.00018269657935713653, "loss": 11.1258, "step": 162150 }, { "epoch": 19.51383874849579, "grad_norm": Infinity, "learning_rate": 0.000182694440338081, "loss": 11.1851, "step": 162160 }, { "epoch": 19.515042117930204, "grad_norm": Infinity, "learning_rate": 0.0001826923011993462, "loss": 11.0742, "step": 162170 }, { "epoch": 19.51624548736462, "grad_norm": Infinity, "learning_rate": 0.00018269016194093527, "loss": 11.215, "step": 162180 }, { "epoch": 19.51744885679904, "grad_norm": Infinity, "learning_rate": 0.00018268802256285121, "loss": 11.2069, "step": 162190 }, { "epoch": 19.518652226233453, "grad_norm": Infinity, "learning_rate": 0.00018268588306509718, "loss": 11.1003, "step": 162200 }, { "epoch": 19.51985559566787, "grad_norm": Infinity, "learning_rate": 0.00018268374344767625, "loss": 11.0829, "step": 162210 }, { "epoch": 19.521058965102288, "grad_norm": Infinity, "learning_rate": 0.00018268160371059155, "loss": 11.1549, "step": 162220 }, { "epoch": 19.522262334536702, "grad_norm": Infinity, "learning_rate": 0.00018267946385384617, "loss": 11.0657, "step": 162230 }, { "epoch": 19.52346570397112, "grad_norm": Infinity, "learning_rate": 0.00018267732387744316, "loss": 11.2486, "step": 162240 }, { "epoch": 19.524669073405537, "grad_norm": Infinity, "learning_rate": 0.00018267518378138563, "loss": 11.1109, "step": 162250 }, { "epoch": 19.52587244283995, "grad_norm": Infinity, "learning_rate": 0.00018267304356567676, "loss": 10.9885, "step": 162260 }, { "epoch": 19.52707581227437, "grad_norm": Infinity, "learning_rate": 0.00018267090323031954, "loss": 11.1365, "step": 162270 }, { "epoch": 19.528279181708786, "grad_norm": Infinity, "learning_rate": 0.00018266876277531715, "loss": 11.0086, "step": 162280 }, { "epoch": 19.5294825511432, "grad_norm": Infinity, "learning_rate": 0.00018266662220067258, "loss": 11.22, "step": 162290 }, { "epoch": 19.530685920577618, "grad_norm": Infinity, "learning_rate": 0.00018266448150638907, "loss": 11.1783, "step": 162300 }, { "epoch": 19.531889290012035, "grad_norm": Infinity, "learning_rate": 0.00018266234069246962, "loss": 11.1648, "step": 162310 }, { "epoch": 19.53309265944645, "grad_norm": Infinity, "learning_rate": 0.00018266019975891735, "loss": 11.1629, "step": 162320 }, { "epoch": 19.534296028880867, "grad_norm": Infinity, "learning_rate": 0.00018265805870573538, "loss": 11.1951, "step": 162330 }, { "epoch": 19.535499398315284, "grad_norm": Infinity, "learning_rate": 0.00018265591753292677, "loss": 11.1585, "step": 162340 }, { "epoch": 19.5367027677497, "grad_norm": Infinity, "learning_rate": 0.00018265377624049464, "loss": 11.1936, "step": 162350 }, { "epoch": 19.537906137184116, "grad_norm": Infinity, "learning_rate": 0.0001826516348284421, "loss": 11.1752, "step": 162360 }, { "epoch": 19.53910950661853, "grad_norm": Infinity, "learning_rate": 0.00018264949329677222, "loss": 11.0277, "step": 162370 }, { "epoch": 19.540312876052948, "grad_norm": Infinity, "learning_rate": 0.00018264735164548817, "loss": 11.2411, "step": 162380 }, { "epoch": 19.541516245487365, "grad_norm": Infinity, "learning_rate": 0.00018264520987459296, "loss": 11.1114, "step": 162390 }, { "epoch": 19.54271961492178, "grad_norm": Infinity, "learning_rate": 0.0001826430679840897, "loss": 11.1428, "step": 162400 }, { "epoch": 19.543922984356197, "grad_norm": Infinity, "learning_rate": 0.00018264092597398155, "loss": 11.1593, "step": 162410 }, { "epoch": 19.545126353790614, "grad_norm": Infinity, "learning_rate": 0.00018263878384427158, "loss": 11.0661, "step": 162420 }, { "epoch": 19.54632972322503, "grad_norm": Infinity, "learning_rate": 0.00018263664159496289, "loss": 11.0729, "step": 162430 }, { "epoch": 19.547533092659446, "grad_norm": Infinity, "learning_rate": 0.00018263449922605856, "loss": 11.0719, "step": 162440 }, { "epoch": 19.548736462093864, "grad_norm": Infinity, "learning_rate": 0.0001826323567375617, "loss": 11.1303, "step": 162450 }, { "epoch": 19.549939831528278, "grad_norm": Infinity, "learning_rate": 0.00018263021412947544, "loss": 11.2754, "step": 162460 }, { "epoch": 19.551143200962695, "grad_norm": Infinity, "learning_rate": 0.00018262807140180285, "loss": 11.2033, "step": 162470 }, { "epoch": 19.552346570397113, "grad_norm": Infinity, "learning_rate": 0.00018262592855454704, "loss": 11.1621, "step": 162480 }, { "epoch": 19.553549939831527, "grad_norm": Infinity, "learning_rate": 0.0001826237855877111, "loss": 11.1376, "step": 162490 }, { "epoch": 19.554753309265944, "grad_norm": Infinity, "learning_rate": 0.00018262164250129818, "loss": 11.0886, "step": 162500 }, { "epoch": 19.555956678700362, "grad_norm": Infinity, "learning_rate": 0.0001826194992953113, "loss": 11.1191, "step": 162510 }, { "epoch": 19.557160048134776, "grad_norm": Infinity, "learning_rate": 0.00018261735596975364, "loss": 11.1761, "step": 162520 }, { "epoch": 19.558363417569193, "grad_norm": Infinity, "learning_rate": 0.00018261521252462823, "loss": 11.1334, "step": 162530 }, { "epoch": 19.55956678700361, "grad_norm": Infinity, "learning_rate": 0.00018261306895993827, "loss": 11.1404, "step": 162540 }, { "epoch": 19.560770156438025, "grad_norm": Infinity, "learning_rate": 0.00018261092527568673, "loss": 11.1523, "step": 162550 }, { "epoch": 19.561973525872443, "grad_norm": Infinity, "learning_rate": 0.00018260878147187683, "loss": 11.1759, "step": 162560 }, { "epoch": 19.56317689530686, "grad_norm": Infinity, "learning_rate": 0.00018260663754851163, "loss": 11.0084, "step": 162570 }, { "epoch": 19.564380264741274, "grad_norm": Infinity, "learning_rate": 0.00018260449350559422, "loss": 11.0994, "step": 162580 }, { "epoch": 19.56558363417569, "grad_norm": Infinity, "learning_rate": 0.00018260234934312772, "loss": 11.1455, "step": 162590 }, { "epoch": 19.56678700361011, "grad_norm": Infinity, "learning_rate": 0.00018260020506111522, "loss": 11.1078, "step": 162600 }, { "epoch": 19.567990373044523, "grad_norm": Infinity, "learning_rate": 0.00018259806065955983, "loss": 11.2148, "step": 162610 }, { "epoch": 19.56919374247894, "grad_norm": Infinity, "learning_rate": 0.00018259591613846465, "loss": 11.1882, "step": 162620 }, { "epoch": 19.57039711191336, "grad_norm": Infinity, "learning_rate": 0.0001825937714978328, "loss": 11.0433, "step": 162630 }, { "epoch": 19.571600481347772, "grad_norm": Infinity, "learning_rate": 0.00018259162673766737, "loss": 11.1789, "step": 162640 }, { "epoch": 19.57280385078219, "grad_norm": Infinity, "learning_rate": 0.00018258948185797148, "loss": 11.1198, "step": 162650 }, { "epoch": 19.574007220216608, "grad_norm": Infinity, "learning_rate": 0.00018258733685874815, "loss": 11.1387, "step": 162660 }, { "epoch": 19.57521058965102, "grad_norm": Infinity, "learning_rate": 0.00018258519174000061, "loss": 11.1339, "step": 162670 }, { "epoch": 19.57641395908544, "grad_norm": Infinity, "learning_rate": 0.00018258304650173193, "loss": 11.1132, "step": 162680 }, { "epoch": 19.577617328519857, "grad_norm": Infinity, "learning_rate": 0.00018258090114394514, "loss": 11.151, "step": 162690 }, { "epoch": 19.57882069795427, "grad_norm": Infinity, "learning_rate": 0.0001825787556666434, "loss": 11.1796, "step": 162700 }, { "epoch": 19.58002406738869, "grad_norm": Infinity, "learning_rate": 0.00018257661006982985, "loss": 11.1099, "step": 162710 }, { "epoch": 19.581227436823106, "grad_norm": Infinity, "learning_rate": 0.00018257446435350754, "loss": 11.1478, "step": 162720 }, { "epoch": 19.58243080625752, "grad_norm": Infinity, "learning_rate": 0.00018257231851767958, "loss": 11.1537, "step": 162730 }, { "epoch": 19.583634175691937, "grad_norm": Infinity, "learning_rate": 0.00018257017256234913, "loss": 11.2141, "step": 162740 }, { "epoch": 19.584837545126355, "grad_norm": Infinity, "learning_rate": 0.00018256802648751921, "loss": 11.0646, "step": 162750 }, { "epoch": 19.58604091456077, "grad_norm": Infinity, "learning_rate": 0.00018256588029319298, "loss": 11.1072, "step": 162760 }, { "epoch": 19.587244283995187, "grad_norm": Infinity, "learning_rate": 0.00018256373397937354, "loss": 11.1924, "step": 162770 }, { "epoch": 19.588447653429604, "grad_norm": Infinity, "learning_rate": 0.000182561587546064, "loss": 11.1948, "step": 162780 }, { "epoch": 19.589651022864018, "grad_norm": Infinity, "learning_rate": 0.00018255944099326745, "loss": 11.1777, "step": 162790 }, { "epoch": 19.590854392298436, "grad_norm": Infinity, "learning_rate": 0.000182557294320987, "loss": 11.1685, "step": 162800 }, { "epoch": 19.592057761732853, "grad_norm": Infinity, "learning_rate": 0.0001825551475292258, "loss": 11.1947, "step": 162810 }, { "epoch": 19.593261131167267, "grad_norm": Infinity, "learning_rate": 0.00018255300061798687, "loss": 11.1744, "step": 162820 }, { "epoch": 19.594464500601685, "grad_norm": Infinity, "learning_rate": 0.0001825508535872734, "loss": 11.1296, "step": 162830 }, { "epoch": 19.595667870036102, "grad_norm": Infinity, "learning_rate": 0.00018254870643708845, "loss": 11.0999, "step": 162840 }, { "epoch": 19.596871239470516, "grad_norm": Infinity, "learning_rate": 0.00018254655916743513, "loss": 11.101, "step": 162850 }, { "epoch": 19.598074608904934, "grad_norm": Infinity, "learning_rate": 0.00018254441177831658, "loss": 11.1262, "step": 162860 }, { "epoch": 19.59927797833935, "grad_norm": Infinity, "learning_rate": 0.00018254226426973587, "loss": 11.1276, "step": 162870 }, { "epoch": 19.600481347773766, "grad_norm": Infinity, "learning_rate": 0.00018254011664169614, "loss": 11.0346, "step": 162880 }, { "epoch": 19.601684717208183, "grad_norm": Infinity, "learning_rate": 0.00018253796889420047, "loss": 11.2714, "step": 162890 }, { "epoch": 19.6028880866426, "grad_norm": Infinity, "learning_rate": 0.000182535821027252, "loss": 11.2405, "step": 162900 }, { "epoch": 19.604091456077015, "grad_norm": Infinity, "learning_rate": 0.00018253367304085378, "loss": 11.1912, "step": 162910 }, { "epoch": 19.605294825511432, "grad_norm": Infinity, "learning_rate": 0.000182531524935009, "loss": 11.0689, "step": 162920 }, { "epoch": 19.60649819494585, "grad_norm": Infinity, "learning_rate": 0.0001825293767097207, "loss": 11.0899, "step": 162930 }, { "epoch": 19.607701564380264, "grad_norm": Infinity, "learning_rate": 0.00018252722836499205, "loss": 11.1685, "step": 162940 }, { "epoch": 19.60890493381468, "grad_norm": Infinity, "learning_rate": 0.0001825250799008261, "loss": 11.1937, "step": 162950 }, { "epoch": 19.6101083032491, "grad_norm": Infinity, "learning_rate": 0.000182522931317226, "loss": 11.0739, "step": 162960 }, { "epoch": 19.611311672683513, "grad_norm": Infinity, "learning_rate": 0.0001825207826141948, "loss": 11.1413, "step": 162970 }, { "epoch": 19.61251504211793, "grad_norm": Infinity, "learning_rate": 0.00018251863379173568, "loss": 11.0881, "step": 162980 }, { "epoch": 19.613718411552348, "grad_norm": Infinity, "learning_rate": 0.00018251648484985173, "loss": 11.2409, "step": 162990 }, { "epoch": 19.614921780986762, "grad_norm": Infinity, "learning_rate": 0.00018251433578854605, "loss": 11.0716, "step": 163000 }, { "epoch": 19.61612515042118, "grad_norm": Infinity, "learning_rate": 0.00018251218660782177, "loss": 11.1568, "step": 163010 }, { "epoch": 19.617328519855597, "grad_norm": Infinity, "learning_rate": 0.00018251003730768197, "loss": 11.1597, "step": 163020 }, { "epoch": 19.61853188929001, "grad_norm": Infinity, "learning_rate": 0.00018250788788812977, "loss": 11.1744, "step": 163030 }, { "epoch": 19.61973525872443, "grad_norm": Infinity, "learning_rate": 0.0001825057383491683, "loss": 11.1821, "step": 163040 }, { "epoch": 19.620938628158846, "grad_norm": Infinity, "learning_rate": 0.00018250358869080065, "loss": 11.1035, "step": 163050 }, { "epoch": 19.62214199759326, "grad_norm": Infinity, "learning_rate": 0.00018250143891302992, "loss": 11.1784, "step": 163060 }, { "epoch": 19.623345367027678, "grad_norm": Infinity, "learning_rate": 0.00018249928901585926, "loss": 11.1601, "step": 163070 }, { "epoch": 19.624548736462096, "grad_norm": Infinity, "learning_rate": 0.00018249713899929176, "loss": 11.1772, "step": 163080 }, { "epoch": 19.62575210589651, "grad_norm": Infinity, "learning_rate": 0.00018249498886333055, "loss": 11.1616, "step": 163090 }, { "epoch": 19.626955475330927, "grad_norm": Infinity, "learning_rate": 0.0001824928386079787, "loss": 11.2318, "step": 163100 }, { "epoch": 19.628158844765345, "grad_norm": Infinity, "learning_rate": 0.0001824906882332393, "loss": 11.1606, "step": 163110 }, { "epoch": 19.62936221419976, "grad_norm": Infinity, "learning_rate": 0.0001824885377391156, "loss": 11.118, "step": 163120 }, { "epoch": 19.630565583634176, "grad_norm": Infinity, "learning_rate": 0.00018248638712561055, "loss": 11.2344, "step": 163130 }, { "epoch": 19.63176895306859, "grad_norm": Infinity, "learning_rate": 0.00018248423639272737, "loss": 11.1924, "step": 163140 }, { "epoch": 19.632972322503008, "grad_norm": Infinity, "learning_rate": 0.0001824820855404691, "loss": 11.1809, "step": 163150 }, { "epoch": 19.634175691937426, "grad_norm": Infinity, "learning_rate": 0.00018247993456883892, "loss": 11.1535, "step": 163160 }, { "epoch": 19.63537906137184, "grad_norm": Infinity, "learning_rate": 0.0001824777834778399, "loss": 11.1904, "step": 163170 }, { "epoch": 19.636582430806257, "grad_norm": Infinity, "learning_rate": 0.00018247563226747517, "loss": 11.0601, "step": 163180 }, { "epoch": 19.637785800240675, "grad_norm": Infinity, "learning_rate": 0.00018247348093774782, "loss": 11.246, "step": 163190 }, { "epoch": 19.63898916967509, "grad_norm": Infinity, "learning_rate": 0.000182471329488661, "loss": 11.0633, "step": 163200 }, { "epoch": 19.640192539109506, "grad_norm": Infinity, "learning_rate": 0.0001824691779202178, "loss": 10.9845, "step": 163210 }, { "epoch": 19.641395908543924, "grad_norm": Infinity, "learning_rate": 0.00018246702623242133, "loss": 11.2375, "step": 163220 }, { "epoch": 19.642599277978338, "grad_norm": Infinity, "learning_rate": 0.00018246487442527472, "loss": 11.0849, "step": 163230 }, { "epoch": 19.643802647412755, "grad_norm": Infinity, "learning_rate": 0.00018246272249878106, "loss": 11.0735, "step": 163240 }, { "epoch": 19.645006016847173, "grad_norm": Infinity, "learning_rate": 0.00018246057045294352, "loss": 11.0803, "step": 163250 }, { "epoch": 19.646209386281587, "grad_norm": Infinity, "learning_rate": 0.00018245841828776516, "loss": 11.1412, "step": 163260 }, { "epoch": 19.647412755716005, "grad_norm": Infinity, "learning_rate": 0.0001824562660032491, "loss": 11.1985, "step": 163270 }, { "epoch": 19.648616125150422, "grad_norm": Infinity, "learning_rate": 0.00018245411359939847, "loss": 11.1095, "step": 163280 }, { "epoch": 19.649819494584836, "grad_norm": Infinity, "learning_rate": 0.00018245196107621636, "loss": 11.2471, "step": 163290 }, { "epoch": 19.651022864019254, "grad_norm": Infinity, "learning_rate": 0.00018244980843370594, "loss": 11.1323, "step": 163300 }, { "epoch": 19.65222623345367, "grad_norm": Infinity, "learning_rate": 0.00018244765567187028, "loss": 11.2032, "step": 163310 }, { "epoch": 19.653429602888085, "grad_norm": Infinity, "learning_rate": 0.0001824455027907125, "loss": 11.0448, "step": 163320 }, { "epoch": 19.654632972322503, "grad_norm": Infinity, "learning_rate": 0.00018244334979023572, "loss": 11.1328, "step": 163330 }, { "epoch": 19.65583634175692, "grad_norm": Infinity, "learning_rate": 0.00018244119667044307, "loss": 11.1471, "step": 163340 }, { "epoch": 19.657039711191334, "grad_norm": Infinity, "learning_rate": 0.00018243904343133763, "loss": 11.0798, "step": 163350 }, { "epoch": 19.658243080625752, "grad_norm": Infinity, "learning_rate": 0.00018243689007292258, "loss": 11.1756, "step": 163360 }, { "epoch": 19.65944645006017, "grad_norm": Infinity, "learning_rate": 0.00018243473659520097, "loss": 11.1611, "step": 163370 }, { "epoch": 19.660649819494584, "grad_norm": Infinity, "learning_rate": 0.00018243258299817594, "loss": 11.0126, "step": 163380 }, { "epoch": 19.661853188929, "grad_norm": Infinity, "learning_rate": 0.00018243042928185063, "loss": 11.1627, "step": 163390 }, { "epoch": 19.66305655836342, "grad_norm": Infinity, "learning_rate": 0.00018242827544622813, "loss": 11.1748, "step": 163400 }, { "epoch": 19.664259927797833, "grad_norm": Infinity, "learning_rate": 0.00018242612149131155, "loss": 11.1755, "step": 163410 }, { "epoch": 19.66546329723225, "grad_norm": Infinity, "learning_rate": 0.00018242396741710402, "loss": 11.043, "step": 163420 }, { "epoch": 19.666666666666668, "grad_norm": Infinity, "learning_rate": 0.00018242181322360867, "loss": 11.1601, "step": 163430 }, { "epoch": 19.667870036101082, "grad_norm": Infinity, "learning_rate": 0.00018241965891082864, "loss": 11.2145, "step": 163440 }, { "epoch": 19.6690734055355, "grad_norm": Infinity, "learning_rate": 0.000182417504478767, "loss": 11.0432, "step": 163450 }, { "epoch": 19.670276774969917, "grad_norm": Infinity, "learning_rate": 0.00018241534992742683, "loss": 11.1314, "step": 163460 }, { "epoch": 19.67148014440433, "grad_norm": Infinity, "learning_rate": 0.00018241319525681135, "loss": 11.2229, "step": 163470 }, { "epoch": 19.67268351383875, "grad_norm": Infinity, "learning_rate": 0.00018241104046692362, "loss": 11.1409, "step": 163480 }, { "epoch": 19.673886883273166, "grad_norm": Infinity, "learning_rate": 0.00018240888555776677, "loss": 11.1248, "step": 163490 }, { "epoch": 19.67509025270758, "grad_norm": Infinity, "learning_rate": 0.0001824067305293439, "loss": 11.1087, "step": 163500 }, { "epoch": 19.676293622141998, "grad_norm": Infinity, "learning_rate": 0.00018240457538165817, "loss": 11.1314, "step": 163510 }, { "epoch": 19.677496991576415, "grad_norm": Infinity, "learning_rate": 0.00018240242011471268, "loss": 11.0941, "step": 163520 }, { "epoch": 19.67870036101083, "grad_norm": Infinity, "learning_rate": 0.00018240026472851048, "loss": 11.2069, "step": 163530 }, { "epoch": 19.679903730445247, "grad_norm": Infinity, "learning_rate": 0.0001823981092230548, "loss": 11.1347, "step": 163540 }, { "epoch": 19.681107099879664, "grad_norm": Infinity, "learning_rate": 0.00018239595359834874, "loss": 11.0686, "step": 163550 }, { "epoch": 19.68231046931408, "grad_norm": Infinity, "learning_rate": 0.00018239379785439533, "loss": 11.029, "step": 163560 }, { "epoch": 19.683513838748496, "grad_norm": Infinity, "learning_rate": 0.00018239164199119782, "loss": 11.1197, "step": 163570 }, { "epoch": 19.684717208182914, "grad_norm": Infinity, "learning_rate": 0.0001823894860087592, "loss": 11.1517, "step": 163580 }, { "epoch": 19.685920577617328, "grad_norm": Infinity, "learning_rate": 0.0001823873299070827, "loss": 11.2753, "step": 163590 }, { "epoch": 19.687123947051745, "grad_norm": Infinity, "learning_rate": 0.00018238517368617137, "loss": 11.2353, "step": 163600 }, { "epoch": 19.688327316486163, "grad_norm": Infinity, "learning_rate": 0.00018238301734602836, "loss": 11.2015, "step": 163610 }, { "epoch": 19.689530685920577, "grad_norm": Infinity, "learning_rate": 0.00018238086088665675, "loss": 11.1367, "step": 163620 }, { "epoch": 19.690734055354994, "grad_norm": Infinity, "learning_rate": 0.00018237870430805976, "loss": 11.0568, "step": 163630 }, { "epoch": 19.691937424789412, "grad_norm": Infinity, "learning_rate": 0.0001823765476102404, "loss": 11.1073, "step": 163640 }, { "epoch": 19.693140794223826, "grad_norm": Infinity, "learning_rate": 0.00018237439079320185, "loss": 11.1918, "step": 163650 }, { "epoch": 19.694344163658243, "grad_norm": Infinity, "learning_rate": 0.00018237223385694722, "loss": 11.1728, "step": 163660 }, { "epoch": 19.69554753309266, "grad_norm": Infinity, "learning_rate": 0.00018237007680147962, "loss": 11.1465, "step": 163670 }, { "epoch": 19.696750902527075, "grad_norm": Infinity, "learning_rate": 0.00018236791962680218, "loss": 11.07, "step": 163680 }, { "epoch": 19.697954271961493, "grad_norm": Infinity, "learning_rate": 0.00018236576233291804, "loss": 11.1767, "step": 163690 }, { "epoch": 19.69915764139591, "grad_norm": Infinity, "learning_rate": 0.00018236360491983032, "loss": 11.0121, "step": 163700 }, { "epoch": 19.700361010830324, "grad_norm": Infinity, "learning_rate": 0.00018236144738754208, "loss": 10.9453, "step": 163710 }, { "epoch": 19.70156438026474, "grad_norm": Infinity, "learning_rate": 0.00018235928973605654, "loss": 11.1796, "step": 163720 }, { "epoch": 19.70276774969916, "grad_norm": Infinity, "learning_rate": 0.00018235713196537674, "loss": 11.1494, "step": 163730 }, { "epoch": 19.703971119133573, "grad_norm": Infinity, "learning_rate": 0.00018235497407550585, "loss": 11.1771, "step": 163740 }, { "epoch": 19.70517448856799, "grad_norm": Infinity, "learning_rate": 0.000182352816066447, "loss": 11.0732, "step": 163750 }, { "epoch": 19.706377858002405, "grad_norm": Infinity, "learning_rate": 0.00018235065793820328, "loss": 11.2246, "step": 163760 }, { "epoch": 19.707581227436823, "grad_norm": Infinity, "learning_rate": 0.00018234849969077783, "loss": 11.187, "step": 163770 }, { "epoch": 19.70878459687124, "grad_norm": Infinity, "learning_rate": 0.00018234634132417374, "loss": 11.1288, "step": 163780 }, { "epoch": 19.709987966305654, "grad_norm": Infinity, "learning_rate": 0.0001823441828383942, "loss": 11.0761, "step": 163790 }, { "epoch": 19.71119133574007, "grad_norm": Infinity, "learning_rate": 0.0001823420242334423, "loss": 11.1201, "step": 163800 }, { "epoch": 19.71239470517449, "grad_norm": Infinity, "learning_rate": 0.00018233986550932112, "loss": 11.1259, "step": 163810 }, { "epoch": 19.713598074608903, "grad_norm": Infinity, "learning_rate": 0.00018233770666603385, "loss": 10.9689, "step": 163820 }, { "epoch": 19.71480144404332, "grad_norm": Infinity, "learning_rate": 0.00018233554770358357, "loss": 11.1964, "step": 163830 }, { "epoch": 19.71600481347774, "grad_norm": Infinity, "learning_rate": 0.00018233338862197343, "loss": 11.1857, "step": 163840 }, { "epoch": 19.717208182912152, "grad_norm": Infinity, "learning_rate": 0.0001823312294212066, "loss": 11.0965, "step": 163850 }, { "epoch": 19.71841155234657, "grad_norm": Infinity, "learning_rate": 0.0001823290701012861, "loss": 11.1701, "step": 163860 }, { "epoch": 19.719614921780988, "grad_norm": Infinity, "learning_rate": 0.0001823269106622151, "loss": 11.1337, "step": 163870 }, { "epoch": 19.7208182912154, "grad_norm": Infinity, "learning_rate": 0.00018232475110399677, "loss": 11.2088, "step": 163880 }, { "epoch": 19.72202166064982, "grad_norm": Infinity, "learning_rate": 0.0001823225914266342, "loss": 11.0902, "step": 163890 }, { "epoch": 19.723225030084237, "grad_norm": Infinity, "learning_rate": 0.0001823204316301305, "loss": 11.1607, "step": 163900 }, { "epoch": 19.72442839951865, "grad_norm": Infinity, "learning_rate": 0.0001823182717144888, "loss": 11.1018, "step": 163910 }, { "epoch": 19.72563176895307, "grad_norm": Infinity, "learning_rate": 0.00018231611167971227, "loss": 11.1827, "step": 163920 }, { "epoch": 19.726835138387486, "grad_norm": Infinity, "learning_rate": 0.000182313951525804, "loss": 11.0776, "step": 163930 }, { "epoch": 19.7280385078219, "grad_norm": Infinity, "learning_rate": 0.0001823117912527671, "loss": 10.9289, "step": 163940 }, { "epoch": 19.729241877256317, "grad_norm": Infinity, "learning_rate": 0.00018230963086060471, "loss": 11.241, "step": 163950 }, { "epoch": 19.730445246690735, "grad_norm": Infinity, "learning_rate": 0.00018230747034932, "loss": 11.1012, "step": 163960 }, { "epoch": 19.73164861612515, "grad_norm": Infinity, "learning_rate": 0.00018230530971891602, "loss": 11.0606, "step": 163970 }, { "epoch": 19.732851985559567, "grad_norm": Infinity, "learning_rate": 0.00018230314896939596, "loss": 11.1034, "step": 163980 }, { "epoch": 19.734055354993984, "grad_norm": Infinity, "learning_rate": 0.0001823009881007629, "loss": 11.1319, "step": 163990 }, { "epoch": 19.735258724428398, "grad_norm": Infinity, "learning_rate": 0.00018229882711302, "loss": 11.1511, "step": 164000 }, { "epoch": 19.736462093862816, "grad_norm": Infinity, "learning_rate": 0.00018229666600617038, "loss": 11.0793, "step": 164010 }, { "epoch": 19.737665463297233, "grad_norm": Infinity, "learning_rate": 0.0001822945047802172, "loss": 11.192, "step": 164020 }, { "epoch": 19.738868832731647, "grad_norm": Infinity, "learning_rate": 0.0001822923434351635, "loss": 11.1414, "step": 164030 }, { "epoch": 19.740072202166065, "grad_norm": Infinity, "learning_rate": 0.0001822901819710125, "loss": 11.1845, "step": 164040 }, { "epoch": 19.741275571600482, "grad_norm": Infinity, "learning_rate": 0.00018228802038776725, "loss": 11.1298, "step": 164050 }, { "epoch": 19.742478941034896, "grad_norm": Infinity, "learning_rate": 0.00018228585868543097, "loss": 11.2415, "step": 164060 }, { "epoch": 19.743682310469314, "grad_norm": Infinity, "learning_rate": 0.0001822836968640067, "loss": 11.0748, "step": 164070 }, { "epoch": 19.74488567990373, "grad_norm": Infinity, "learning_rate": 0.0001822815349234976, "loss": 11.0964, "step": 164080 }, { "epoch": 19.746089049338146, "grad_norm": Infinity, "learning_rate": 0.0001822793728639068, "loss": 11.0841, "step": 164090 }, { "epoch": 19.747292418772563, "grad_norm": Infinity, "learning_rate": 0.00018227721068523746, "loss": 11.0719, "step": 164100 }, { "epoch": 19.74849578820698, "grad_norm": Infinity, "learning_rate": 0.0001822750483874927, "loss": 11.1493, "step": 164110 }, { "epoch": 19.749699157641395, "grad_norm": Infinity, "learning_rate": 0.0001822728859706756, "loss": 11.1783, "step": 164120 }, { "epoch": 19.750902527075812, "grad_norm": Infinity, "learning_rate": 0.00018227072343478932, "loss": 11.1892, "step": 164130 }, { "epoch": 19.75210589651023, "grad_norm": Infinity, "learning_rate": 0.000182268560779837, "loss": 11.0644, "step": 164140 }, { "epoch": 19.753309265944644, "grad_norm": Infinity, "learning_rate": 0.00018226639800582176, "loss": 11.1722, "step": 164150 }, { "epoch": 19.75451263537906, "grad_norm": Infinity, "learning_rate": 0.00018226423511274672, "loss": 11.0826, "step": 164160 }, { "epoch": 19.75571600481348, "grad_norm": Infinity, "learning_rate": 0.00018226207210061507, "loss": 11.1833, "step": 164170 }, { "epoch": 19.756919374247893, "grad_norm": Infinity, "learning_rate": 0.00018225990896942983, "loss": 11.0197, "step": 164180 }, { "epoch": 19.75812274368231, "grad_norm": Infinity, "learning_rate": 0.0001822577457191942, "loss": 11.2286, "step": 164190 }, { "epoch": 19.759326113116728, "grad_norm": Infinity, "learning_rate": 0.00018225558234991133, "loss": 11.1398, "step": 164200 }, { "epoch": 19.760529482551142, "grad_norm": Infinity, "learning_rate": 0.0001822534188615843, "loss": 11.154, "step": 164210 }, { "epoch": 19.76173285198556, "grad_norm": Infinity, "learning_rate": 0.00018225125525421628, "loss": 11.0459, "step": 164220 }, { "epoch": 19.762936221419977, "grad_norm": Infinity, "learning_rate": 0.00018224909152781042, "loss": 11.0892, "step": 164230 }, { "epoch": 19.76413959085439, "grad_norm": Infinity, "learning_rate": 0.00018224692768236978, "loss": 11.1035, "step": 164240 }, { "epoch": 19.76534296028881, "grad_norm": Infinity, "learning_rate": 0.00018224476371789754, "loss": 11.0743, "step": 164250 }, { "epoch": 19.766546329723226, "grad_norm": Infinity, "learning_rate": 0.00018224259963439682, "loss": 11.0997, "step": 164260 }, { "epoch": 19.76774969915764, "grad_norm": Infinity, "learning_rate": 0.00018224043543187073, "loss": 11.0573, "step": 164270 }, { "epoch": 19.768953068592058, "grad_norm": Infinity, "learning_rate": 0.00018223827111032246, "loss": 11.1615, "step": 164280 }, { "epoch": 19.770156438026476, "grad_norm": Infinity, "learning_rate": 0.00018223610666975507, "loss": 11.2072, "step": 164290 }, { "epoch": 19.77135980746089, "grad_norm": Infinity, "learning_rate": 0.00018223394211017176, "loss": 11.0979, "step": 164300 }, { "epoch": 19.772563176895307, "grad_norm": Infinity, "learning_rate": 0.00018223177743157562, "loss": 11.1511, "step": 164310 }, { "epoch": 19.773766546329725, "grad_norm": Infinity, "learning_rate": 0.0001822296126339698, "loss": 11.1447, "step": 164320 }, { "epoch": 19.77496991576414, "grad_norm": Infinity, "learning_rate": 0.0001822274477173574, "loss": 11.2087, "step": 164330 }, { "epoch": 19.776173285198556, "grad_norm": Infinity, "learning_rate": 0.00018222528268174165, "loss": 11.1231, "step": 164340 }, { "epoch": 19.777376654632974, "grad_norm": Infinity, "learning_rate": 0.00018222311752712558, "loss": 11.1377, "step": 164350 }, { "epoch": 19.778580024067388, "grad_norm": Infinity, "learning_rate": 0.00018222095225351234, "loss": 11.1396, "step": 164360 }, { "epoch": 19.779783393501805, "grad_norm": Infinity, "learning_rate": 0.0001822187868609051, "loss": 11.2541, "step": 164370 }, { "epoch": 19.780986762936223, "grad_norm": Infinity, "learning_rate": 0.00018221662134930696, "loss": 11.1231, "step": 164380 }, { "epoch": 19.782190132370637, "grad_norm": Infinity, "learning_rate": 0.0001822144557187211, "loss": 11.1657, "step": 164390 }, { "epoch": 19.783393501805055, "grad_norm": Infinity, "learning_rate": 0.00018221228996915062, "loss": 11.199, "step": 164400 }, { "epoch": 19.784596871239472, "grad_norm": Infinity, "learning_rate": 0.00018221012410059865, "loss": 11.2341, "step": 164410 }, { "epoch": 19.785800240673886, "grad_norm": Infinity, "learning_rate": 0.0001822079581130683, "loss": 11.1467, "step": 164420 }, { "epoch": 19.787003610108304, "grad_norm": Infinity, "learning_rate": 0.00018220579200656277, "loss": 11.1569, "step": 164430 }, { "epoch": 19.78820697954272, "grad_norm": Infinity, "learning_rate": 0.00018220362578108516, "loss": 11.0901, "step": 164440 }, { "epoch": 19.789410348977135, "grad_norm": Infinity, "learning_rate": 0.00018220145943663864, "loss": 11.1769, "step": 164450 }, { "epoch": 19.790613718411553, "grad_norm": Infinity, "learning_rate": 0.00018219929297322627, "loss": 11.0786, "step": 164460 }, { "epoch": 19.79181708784597, "grad_norm": Infinity, "learning_rate": 0.00018219712639085123, "loss": 11.1887, "step": 164470 }, { "epoch": 19.793020457280385, "grad_norm": Infinity, "learning_rate": 0.00018219495968951668, "loss": 11.1999, "step": 164480 }, { "epoch": 19.794223826714802, "grad_norm": Infinity, "learning_rate": 0.0001821927928692257, "loss": 11.2034, "step": 164490 }, { "epoch": 19.79542719614922, "grad_norm": Infinity, "learning_rate": 0.00018219062592998147, "loss": 11.1517, "step": 164500 }, { "epoch": 19.796630565583634, "grad_norm": Infinity, "learning_rate": 0.00018218845887178712, "loss": 11.1718, "step": 164510 }, { "epoch": 19.79783393501805, "grad_norm": Infinity, "learning_rate": 0.00018218629169464577, "loss": 11.1323, "step": 164520 }, { "epoch": 19.799037304452465, "grad_norm": Infinity, "learning_rate": 0.00018218412439856057, "loss": 11.2685, "step": 164530 }, { "epoch": 19.800240673886883, "grad_norm": Infinity, "learning_rate": 0.00018218195698353464, "loss": 11.199, "step": 164540 }, { "epoch": 19.8014440433213, "grad_norm": Infinity, "learning_rate": 0.00018217978944957115, "loss": 11.2102, "step": 164550 }, { "epoch": 19.802647412755714, "grad_norm": Infinity, "learning_rate": 0.00018217762179667318, "loss": 11.0861, "step": 164560 }, { "epoch": 19.803850782190132, "grad_norm": Infinity, "learning_rate": 0.00018217545402484392, "loss": 11.1543, "step": 164570 }, { "epoch": 19.80505415162455, "grad_norm": Infinity, "learning_rate": 0.0001821732861340865, "loss": 11.3454, "step": 164580 }, { "epoch": 19.806257521058964, "grad_norm": Infinity, "learning_rate": 0.00018217111812440404, "loss": 11.17, "step": 164590 }, { "epoch": 19.80746089049338, "grad_norm": Infinity, "learning_rate": 0.00018216894999579969, "loss": 11.07, "step": 164600 }, { "epoch": 19.8086642599278, "grad_norm": Infinity, "learning_rate": 0.00018216678174827656, "loss": 11.1024, "step": 164610 }, { "epoch": 19.809867629362213, "grad_norm": Infinity, "learning_rate": 0.00018216461338183785, "loss": 11.1108, "step": 164620 }, { "epoch": 19.81107099879663, "grad_norm": Infinity, "learning_rate": 0.00018216244489648663, "loss": 11.1018, "step": 164630 }, { "epoch": 19.812274368231048, "grad_norm": Infinity, "learning_rate": 0.00018216027629222608, "loss": 11.2308, "step": 164640 }, { "epoch": 19.813477737665462, "grad_norm": Infinity, "learning_rate": 0.00018215810756905932, "loss": 10.9164, "step": 164650 }, { "epoch": 19.81468110709988, "grad_norm": Infinity, "learning_rate": 0.0001821559387269895, "loss": 11.0672, "step": 164660 }, { "epoch": 19.815884476534297, "grad_norm": Infinity, "learning_rate": 0.00018215376976601975, "loss": 11.1489, "step": 164670 }, { "epoch": 19.81708784596871, "grad_norm": Infinity, "learning_rate": 0.00018215160068615321, "loss": 11.0974, "step": 164680 }, { "epoch": 19.81829121540313, "grad_norm": Infinity, "learning_rate": 0.000182149431487393, "loss": 11.1122, "step": 164690 }, { "epoch": 19.819494584837546, "grad_norm": Infinity, "learning_rate": 0.00018214726216974229, "loss": 11.0882, "step": 164700 }, { "epoch": 19.82069795427196, "grad_norm": Infinity, "learning_rate": 0.00018214509273320424, "loss": 11.1044, "step": 164710 }, { "epoch": 19.821901323706378, "grad_norm": Infinity, "learning_rate": 0.00018214292317778193, "loss": 11.2734, "step": 164720 }, { "epoch": 19.823104693140795, "grad_norm": Infinity, "learning_rate": 0.00018214075350347854, "loss": 11.1612, "step": 164730 }, { "epoch": 19.82430806257521, "grad_norm": Infinity, "learning_rate": 0.00018213858371029723, "loss": 11.087, "step": 164740 }, { "epoch": 19.825511432009627, "grad_norm": Infinity, "learning_rate": 0.00018213641379824104, "loss": 11.0612, "step": 164750 }, { "epoch": 19.826714801444044, "grad_norm": Infinity, "learning_rate": 0.00018213424376731322, "loss": 11.096, "step": 164760 }, { "epoch": 19.82791817087846, "grad_norm": Infinity, "learning_rate": 0.0001821320736175169, "loss": 11.2982, "step": 164770 }, { "epoch": 19.829121540312876, "grad_norm": Infinity, "learning_rate": 0.00018212990334885514, "loss": 11.2142, "step": 164780 }, { "epoch": 19.830324909747294, "grad_norm": Infinity, "learning_rate": 0.00018212773296133117, "loss": 11.2466, "step": 164790 }, { "epoch": 19.831528279181708, "grad_norm": Infinity, "learning_rate": 0.00018212556245494806, "loss": 11.0668, "step": 164800 }, { "epoch": 19.832731648616125, "grad_norm": Infinity, "learning_rate": 0.00018212339182970902, "loss": 11.0883, "step": 164810 }, { "epoch": 19.833935018050543, "grad_norm": Infinity, "learning_rate": 0.00018212122108561714, "loss": 11.1541, "step": 164820 }, { "epoch": 19.835138387484957, "grad_norm": Infinity, "learning_rate": 0.00018211905022267558, "loss": 11.1137, "step": 164830 }, { "epoch": 19.836341756919374, "grad_norm": Infinity, "learning_rate": 0.00018211687924088747, "loss": 11.0337, "step": 164840 }, { "epoch": 19.837545126353792, "grad_norm": Infinity, "learning_rate": 0.00018211470814025597, "loss": 11.083, "step": 164850 }, { "epoch": 19.838748495788206, "grad_norm": Infinity, "learning_rate": 0.0001821125369207842, "loss": 11.278, "step": 164860 }, { "epoch": 19.839951865222623, "grad_norm": Infinity, "learning_rate": 0.00018211036558247537, "loss": 11.2, "step": 164870 }, { "epoch": 19.84115523465704, "grad_norm": Infinity, "learning_rate": 0.0001821081941253325, "loss": 11.1798, "step": 164880 }, { "epoch": 19.842358604091455, "grad_norm": Infinity, "learning_rate": 0.00018210602254935886, "loss": 11.2188, "step": 164890 }, { "epoch": 19.843561973525873, "grad_norm": Infinity, "learning_rate": 0.00018210385085455747, "loss": 11.0565, "step": 164900 }, { "epoch": 19.84476534296029, "grad_norm": Infinity, "learning_rate": 0.00018210167904093158, "loss": 11.0445, "step": 164910 }, { "epoch": 19.845968712394704, "grad_norm": Infinity, "learning_rate": 0.0001820995071084843, "loss": 11.0802, "step": 164920 }, { "epoch": 19.84717208182912, "grad_norm": Infinity, "learning_rate": 0.00018209733505721872, "loss": 11.2569, "step": 164930 }, { "epoch": 19.84837545126354, "grad_norm": Infinity, "learning_rate": 0.00018209516288713806, "loss": 11.2663, "step": 164940 }, { "epoch": 19.849578820697953, "grad_norm": Infinity, "learning_rate": 0.00018209299059824543, "loss": 11.1048, "step": 164950 }, { "epoch": 19.85078219013237, "grad_norm": Infinity, "learning_rate": 0.00018209081819054395, "loss": 11.2474, "step": 164960 }, { "epoch": 19.85198555956679, "grad_norm": Infinity, "learning_rate": 0.0001820886456640368, "loss": 11.2436, "step": 164970 }, { "epoch": 19.853188929001202, "grad_norm": Infinity, "learning_rate": 0.00018208647301872714, "loss": 11.1079, "step": 164980 }, { "epoch": 19.85439229843562, "grad_norm": Infinity, "learning_rate": 0.00018208430025461807, "loss": 11.0617, "step": 164990 }, { "epoch": 19.855595667870038, "grad_norm": Infinity, "learning_rate": 0.00018208212737171273, "loss": 11.1149, "step": 165000 }, { "epoch": 19.85679903730445, "grad_norm": Infinity, "learning_rate": 0.0001820799543700143, "loss": 11.203, "step": 165010 }, { "epoch": 19.85800240673887, "grad_norm": Infinity, "learning_rate": 0.00018207778124952593, "loss": 11.2121, "step": 165020 }, { "epoch": 19.859205776173287, "grad_norm": Infinity, "learning_rate": 0.00018207560801025073, "loss": 11.0755, "step": 165030 }, { "epoch": 19.8604091456077, "grad_norm": Infinity, "learning_rate": 0.00018207343465219186, "loss": 11.1553, "step": 165040 }, { "epoch": 19.86161251504212, "grad_norm": Infinity, "learning_rate": 0.00018207126117535246, "loss": 11.0546, "step": 165050 }, { "epoch": 19.862815884476536, "grad_norm": Infinity, "learning_rate": 0.00018206908757973568, "loss": 11.2138, "step": 165060 }, { "epoch": 19.86401925391095, "grad_norm": Infinity, "learning_rate": 0.0001820669138653447, "loss": 11.0992, "step": 165070 }, { "epoch": 19.865222623345367, "grad_norm": Infinity, "learning_rate": 0.00018206474003218263, "loss": 11.1009, "step": 165080 }, { "epoch": 19.866425992779785, "grad_norm": Infinity, "learning_rate": 0.00018206256608025258, "loss": 11.1346, "step": 165090 }, { "epoch": 19.8676293622142, "grad_norm": Infinity, "learning_rate": 0.00018206039200955776, "loss": 11.1552, "step": 165100 }, { "epoch": 19.868832731648617, "grad_norm": Infinity, "learning_rate": 0.0001820582178201013, "loss": 11.1733, "step": 165110 }, { "epoch": 19.870036101083034, "grad_norm": Infinity, "learning_rate": 0.0001820560435118863, "loss": 11.1219, "step": 165120 }, { "epoch": 19.871239470517448, "grad_norm": Infinity, "learning_rate": 0.00018205386908491599, "loss": 11.1515, "step": 165130 }, { "epoch": 19.872442839951866, "grad_norm": Infinity, "learning_rate": 0.00018205169453919346, "loss": 11.2341, "step": 165140 }, { "epoch": 19.87364620938628, "grad_norm": Infinity, "learning_rate": 0.00018204951987472188, "loss": 11.0963, "step": 165150 }, { "epoch": 19.874849578820697, "grad_norm": Infinity, "learning_rate": 0.00018204734509150438, "loss": 11.0325, "step": 165160 }, { "epoch": 19.876052948255115, "grad_norm": Infinity, "learning_rate": 0.00018204517018954413, "loss": 11.2082, "step": 165170 }, { "epoch": 19.87725631768953, "grad_norm": Infinity, "learning_rate": 0.0001820429951688442, "loss": 11.1202, "step": 165180 }, { "epoch": 19.878459687123947, "grad_norm": Infinity, "learning_rate": 0.0001820408200294079, "loss": 11.1797, "step": 165190 }, { "epoch": 19.879663056558364, "grad_norm": Infinity, "learning_rate": 0.0001820386447712382, "loss": 11.2218, "step": 165200 }, { "epoch": 19.880866425992778, "grad_norm": Infinity, "learning_rate": 0.00018203646939433834, "loss": 11.2033, "step": 165210 }, { "epoch": 19.882069795427196, "grad_norm": Infinity, "learning_rate": 0.00018203429389871146, "loss": 10.9957, "step": 165220 }, { "epoch": 19.883273164861613, "grad_norm": Infinity, "learning_rate": 0.0001820321182843607, "loss": 11.1282, "step": 165230 }, { "epoch": 19.884476534296027, "grad_norm": Infinity, "learning_rate": 0.00018202994255128924, "loss": 11.2358, "step": 165240 }, { "epoch": 19.885679903730445, "grad_norm": Infinity, "learning_rate": 0.00018202776669950016, "loss": 11.0358, "step": 165250 }, { "epoch": 19.886883273164862, "grad_norm": Infinity, "learning_rate": 0.00018202559072899666, "loss": 11.1336, "step": 165260 }, { "epoch": 19.888086642599276, "grad_norm": Infinity, "learning_rate": 0.0001820234146397819, "loss": 11.1526, "step": 165270 }, { "epoch": 19.889290012033694, "grad_norm": Infinity, "learning_rate": 0.000182021238431859, "loss": 11.2606, "step": 165280 }, { "epoch": 19.89049338146811, "grad_norm": Infinity, "learning_rate": 0.00018201906210523112, "loss": 11.1386, "step": 165290 }, { "epoch": 19.891696750902526, "grad_norm": Infinity, "learning_rate": 0.00018201688565990142, "loss": 11.1824, "step": 165300 }, { "epoch": 19.892900120336943, "grad_norm": Infinity, "learning_rate": 0.00018201470909587298, "loss": 11.1232, "step": 165310 }, { "epoch": 19.89410348977136, "grad_norm": Infinity, "learning_rate": 0.00018201253241314905, "loss": 11.0581, "step": 165320 }, { "epoch": 19.895306859205775, "grad_norm": Infinity, "learning_rate": 0.00018201035561173274, "loss": 11.0982, "step": 165330 }, { "epoch": 19.896510228640192, "grad_norm": Infinity, "learning_rate": 0.00018200817869162722, "loss": 11.1737, "step": 165340 }, { "epoch": 19.89771359807461, "grad_norm": Infinity, "learning_rate": 0.00018200600165283558, "loss": 11.0467, "step": 165350 }, { "epoch": 19.898916967509024, "grad_norm": Infinity, "learning_rate": 0.000182003824495361, "loss": 11.0762, "step": 165360 }, { "epoch": 19.90012033694344, "grad_norm": Infinity, "learning_rate": 0.00018200164721920666, "loss": 11.1541, "step": 165370 }, { "epoch": 19.90132370637786, "grad_norm": Infinity, "learning_rate": 0.0001819994698243757, "loss": 11.0707, "step": 165380 }, { "epoch": 19.902527075812273, "grad_norm": Infinity, "learning_rate": 0.00018199729231087124, "loss": 11.2656, "step": 165390 }, { "epoch": 19.90373044524669, "grad_norm": Infinity, "learning_rate": 0.00018199511467869645, "loss": 11.1462, "step": 165400 }, { "epoch": 19.904933814681108, "grad_norm": Infinity, "learning_rate": 0.00018199293692785452, "loss": 11.1108, "step": 165410 }, { "epoch": 19.906137184115522, "grad_norm": Infinity, "learning_rate": 0.00018199075905834855, "loss": 11.2118, "step": 165420 }, { "epoch": 19.90734055354994, "grad_norm": Infinity, "learning_rate": 0.0001819885810701817, "loss": 11.2167, "step": 165430 }, { "epoch": 19.908543922984357, "grad_norm": Infinity, "learning_rate": 0.00018198640296335713, "loss": 11.2291, "step": 165440 }, { "epoch": 19.90974729241877, "grad_norm": Infinity, "learning_rate": 0.000181984224737878, "loss": 11.2354, "step": 165450 }, { "epoch": 19.91095066185319, "grad_norm": Infinity, "learning_rate": 0.00018198204639374745, "loss": 11.2053, "step": 165460 }, { "epoch": 19.912154031287606, "grad_norm": Infinity, "learning_rate": 0.00018197986793096863, "loss": 11.1204, "step": 165470 }, { "epoch": 19.91335740072202, "grad_norm": Infinity, "learning_rate": 0.00018197768934954473, "loss": 11.146, "step": 165480 }, { "epoch": 19.914560770156438, "grad_norm": Infinity, "learning_rate": 0.00018197551064947886, "loss": 11.1402, "step": 165490 }, { "epoch": 19.915764139590856, "grad_norm": Infinity, "learning_rate": 0.0001819733318307742, "loss": 11.1543, "step": 165500 }, { "epoch": 19.91696750902527, "grad_norm": Infinity, "learning_rate": 0.00018197115289343384, "loss": 11.1014, "step": 165510 }, { "epoch": 19.918170878459687, "grad_norm": Infinity, "learning_rate": 0.00018196897383746102, "loss": 11.1375, "step": 165520 }, { "epoch": 19.919374247894105, "grad_norm": Infinity, "learning_rate": 0.00018196679466285888, "loss": 11.2382, "step": 165530 }, { "epoch": 19.92057761732852, "grad_norm": Infinity, "learning_rate": 0.00018196461536963053, "loss": 11.0677, "step": 165540 }, { "epoch": 19.921780986762936, "grad_norm": Infinity, "learning_rate": 0.00018196243595777913, "loss": 11.1583, "step": 165550 }, { "epoch": 19.922984356197354, "grad_norm": Infinity, "learning_rate": 0.00018196025642730788, "loss": 11.2589, "step": 165560 }, { "epoch": 19.924187725631768, "grad_norm": Infinity, "learning_rate": 0.00018195807677821987, "loss": 11.2583, "step": 165570 }, { "epoch": 19.925391095066185, "grad_norm": Infinity, "learning_rate": 0.00018195589701051833, "loss": 11.0953, "step": 165580 }, { "epoch": 19.926594464500603, "grad_norm": Infinity, "learning_rate": 0.00018195371712420631, "loss": 11.2733, "step": 165590 }, { "epoch": 19.927797833935017, "grad_norm": Infinity, "learning_rate": 0.00018195153711928708, "loss": 11.1126, "step": 165600 }, { "epoch": 19.929001203369435, "grad_norm": Infinity, "learning_rate": 0.00018194935699576372, "loss": 11.2585, "step": 165610 }, { "epoch": 19.930204572803852, "grad_norm": Infinity, "learning_rate": 0.0001819471767536394, "loss": 11.1855, "step": 165620 }, { "epoch": 19.931407942238266, "grad_norm": Infinity, "learning_rate": 0.0001819449963929173, "loss": 11.1656, "step": 165630 }, { "epoch": 19.932611311672684, "grad_norm": Infinity, "learning_rate": 0.00018194281591360056, "loss": 11.1314, "step": 165640 }, { "epoch": 19.9338146811071, "grad_norm": Infinity, "learning_rate": 0.00018194063531569234, "loss": 11.1184, "step": 165650 }, { "epoch": 19.935018050541515, "grad_norm": Infinity, "learning_rate": 0.00018193845459919578, "loss": 11.184, "step": 165660 }, { "epoch": 19.936221419975933, "grad_norm": Infinity, "learning_rate": 0.00018193627376411403, "loss": 11.1132, "step": 165670 }, { "epoch": 19.93742478941035, "grad_norm": Infinity, "learning_rate": 0.0001819340928104503, "loss": 11.0606, "step": 165680 }, { "epoch": 19.938628158844764, "grad_norm": Infinity, "learning_rate": 0.00018193191173820767, "loss": 11.1, "step": 165690 }, { "epoch": 19.939831528279182, "grad_norm": Infinity, "learning_rate": 0.00018192973054738935, "loss": 11.0695, "step": 165700 }, { "epoch": 19.9410348977136, "grad_norm": Infinity, "learning_rate": 0.0001819275492379985, "loss": 11.0693, "step": 165710 }, { "epoch": 19.942238267148014, "grad_norm": Infinity, "learning_rate": 0.00018192536781003825, "loss": 11.1839, "step": 165720 }, { "epoch": 19.94344163658243, "grad_norm": Infinity, "learning_rate": 0.00018192318626351173, "loss": 11.1644, "step": 165730 }, { "epoch": 19.94464500601685, "grad_norm": Infinity, "learning_rate": 0.00018192100459842218, "loss": 11.2392, "step": 165740 }, { "epoch": 19.945848375451263, "grad_norm": Infinity, "learning_rate": 0.0001819188228147727, "loss": 11.0428, "step": 165750 }, { "epoch": 19.94705174488568, "grad_norm": Infinity, "learning_rate": 0.00018191664091256647, "loss": 11.0952, "step": 165760 }, { "epoch": 19.948255114320098, "grad_norm": Infinity, "learning_rate": 0.00018191445889180661, "loss": 11.254, "step": 165770 }, { "epoch": 19.949458483754512, "grad_norm": Infinity, "learning_rate": 0.0001819122767524963, "loss": 11.123, "step": 165780 }, { "epoch": 19.95066185318893, "grad_norm": Infinity, "learning_rate": 0.00018191009449463873, "loss": 11.2211, "step": 165790 }, { "epoch": 19.951865222623347, "grad_norm": Infinity, "learning_rate": 0.000181907912118237, "loss": 11.1385, "step": 165800 }, { "epoch": 19.95306859205776, "grad_norm": Infinity, "learning_rate": 0.00018190572962329432, "loss": 11.0776, "step": 165810 }, { "epoch": 19.95427196149218, "grad_norm": Infinity, "learning_rate": 0.00018190354700981382, "loss": 11.1519, "step": 165820 }, { "epoch": 19.955475330926596, "grad_norm": Infinity, "learning_rate": 0.00018190136427779867, "loss": 11.1281, "step": 165830 }, { "epoch": 19.95667870036101, "grad_norm": Infinity, "learning_rate": 0.000181899181427252, "loss": 11.1464, "step": 165840 }, { "epoch": 19.957882069795428, "grad_norm": Infinity, "learning_rate": 0.00018189699845817705, "loss": 11.1463, "step": 165850 }, { "epoch": 19.959085439229845, "grad_norm": Infinity, "learning_rate": 0.0001818948153705769, "loss": 11.1786, "step": 165860 }, { "epoch": 19.96028880866426, "grad_norm": Infinity, "learning_rate": 0.0001818926321644547, "loss": 11.2121, "step": 165870 }, { "epoch": 19.961492178098677, "grad_norm": Infinity, "learning_rate": 0.00018189044883981366, "loss": 11.2523, "step": 165880 }, { "epoch": 19.96269554753309, "grad_norm": Infinity, "learning_rate": 0.00018188826539665694, "loss": 11.1515, "step": 165890 }, { "epoch": 19.96389891696751, "grad_norm": Infinity, "learning_rate": 0.00018188608183498768, "loss": 11.0813, "step": 165900 }, { "epoch": 19.965102286401926, "grad_norm": Infinity, "learning_rate": 0.00018188389815480902, "loss": 11.0842, "step": 165910 }, { "epoch": 19.96630565583634, "grad_norm": Infinity, "learning_rate": 0.00018188171435612414, "loss": 11.2649, "step": 165920 }, { "epoch": 19.967509025270758, "grad_norm": Infinity, "learning_rate": 0.00018187953043893624, "loss": 11.0825, "step": 165930 }, { "epoch": 19.968712394705175, "grad_norm": Infinity, "learning_rate": 0.00018187734640324842, "loss": 11.1482, "step": 165940 }, { "epoch": 19.96991576413959, "grad_norm": Infinity, "learning_rate": 0.00018187516224906386, "loss": 11.0872, "step": 165950 }, { "epoch": 19.971119133574007, "grad_norm": Infinity, "learning_rate": 0.00018187297797638574, "loss": 11.2438, "step": 165960 }, { "epoch": 19.972322503008424, "grad_norm": Infinity, "learning_rate": 0.00018187079358521723, "loss": 11.0543, "step": 165970 }, { "epoch": 19.97352587244284, "grad_norm": Infinity, "learning_rate": 0.00018186860907556143, "loss": 11.1657, "step": 165980 }, { "epoch": 19.974729241877256, "grad_norm": Infinity, "learning_rate": 0.00018186642444742153, "loss": 11.2358, "step": 165990 }, { "epoch": 19.975932611311674, "grad_norm": Infinity, "learning_rate": 0.00018186423970080074, "loss": 11.0724, "step": 166000 }, { "epoch": 19.977135980746088, "grad_norm": Infinity, "learning_rate": 0.00018186205483570214, "loss": 11.162, "step": 166010 }, { "epoch": 19.978339350180505, "grad_norm": Infinity, "learning_rate": 0.00018185986985212897, "loss": 11.1845, "step": 166020 }, { "epoch": 19.979542719614923, "grad_norm": Infinity, "learning_rate": 0.00018185768475008436, "loss": 11.2258, "step": 166030 }, { "epoch": 19.980746089049337, "grad_norm": Infinity, "learning_rate": 0.00018185549952957146, "loss": 11.2137, "step": 166040 }, { "epoch": 19.981949458483754, "grad_norm": Infinity, "learning_rate": 0.0001818533141905934, "loss": 11.196, "step": 166050 }, { "epoch": 19.983152827918172, "grad_norm": Infinity, "learning_rate": 0.00018185112873315347, "loss": 11.1218, "step": 166060 }, { "epoch": 19.984356197352586, "grad_norm": Infinity, "learning_rate": 0.00018184894315725469, "loss": 11.2698, "step": 166070 }, { "epoch": 19.985559566787003, "grad_norm": Infinity, "learning_rate": 0.00018184675746290028, "loss": 11.117, "step": 166080 }, { "epoch": 19.98676293622142, "grad_norm": Infinity, "learning_rate": 0.00018184457165009343, "loss": 11.1865, "step": 166090 }, { "epoch": 19.987966305655835, "grad_norm": Infinity, "learning_rate": 0.0001818423857188373, "loss": 11.182, "step": 166100 }, { "epoch": 19.989169675090253, "grad_norm": Infinity, "learning_rate": 0.00018184019966913497, "loss": 11.0618, "step": 166110 }, { "epoch": 19.99037304452467, "grad_norm": Infinity, "learning_rate": 0.0001818380135009897, "loss": 11.3382, "step": 166120 }, { "epoch": 19.991576413959084, "grad_norm": Infinity, "learning_rate": 0.00018183582721440462, "loss": 11.1709, "step": 166130 }, { "epoch": 19.9927797833935, "grad_norm": Infinity, "learning_rate": 0.0001818336408093829, "loss": 11.1113, "step": 166140 }, { "epoch": 19.99398315282792, "grad_norm": Infinity, "learning_rate": 0.00018183145428592767, "loss": 11.0988, "step": 166150 }, { "epoch": 19.995186522262333, "grad_norm": Infinity, "learning_rate": 0.00018182926764404215, "loss": 11.1319, "step": 166160 }, { "epoch": 19.99638989169675, "grad_norm": Infinity, "learning_rate": 0.00018182708088372945, "loss": 11.1248, "step": 166170 }, { "epoch": 19.99759326113117, "grad_norm": Infinity, "learning_rate": 0.00018182489400499278, "loss": 11.1114, "step": 166180 }, { "epoch": 19.998796630565582, "grad_norm": Infinity, "learning_rate": 0.00018182270700783527, "loss": 11.0689, "step": 166190 }, { "epoch": 20.0, "grad_norm": Infinity, "learning_rate": 0.00018182051989226015, "loss": 11.1714, "step": 166200 }, { "epoch": 20.0, "eval_loss": 11.139604568481445, "eval_runtime": 118.6697, "eval_samples_per_second": 62.248, "eval_steps_per_second": 7.786, "step": 166200 }, { "epoch": 20.001203369434418, "grad_norm": Infinity, "learning_rate": 0.0001818183326582705, "loss": 11.069, "step": 166210 }, { "epoch": 20.00240673886883, "grad_norm": Infinity, "learning_rate": 0.0001818161453058695, "loss": 11.1056, "step": 166220 }, { "epoch": 20.00361010830325, "grad_norm": Infinity, "learning_rate": 0.0001818139578350604, "loss": 11.0334, "step": 166230 }, { "epoch": 20.004813477737667, "grad_norm": Infinity, "learning_rate": 0.00018181177024584623, "loss": 11.2063, "step": 166240 }, { "epoch": 20.00601684717208, "grad_norm": Infinity, "learning_rate": 0.00018180958253823028, "loss": 11.1498, "step": 166250 }, { "epoch": 20.0072202166065, "grad_norm": Infinity, "learning_rate": 0.00018180739471221563, "loss": 11.19, "step": 166260 }, { "epoch": 20.008423586040916, "grad_norm": Infinity, "learning_rate": 0.0001818052067678055, "loss": 11.0813, "step": 166270 }, { "epoch": 20.00962695547533, "grad_norm": Infinity, "learning_rate": 0.00018180301870500304, "loss": 11.1756, "step": 166280 }, { "epoch": 20.010830324909747, "grad_norm": Infinity, "learning_rate": 0.00018180083052381143, "loss": 11.1816, "step": 166290 }, { "epoch": 20.012033694344165, "grad_norm": Infinity, "learning_rate": 0.0001817986422242338, "loss": 11.1397, "step": 166300 }, { "epoch": 20.01323706377858, "grad_norm": Infinity, "learning_rate": 0.00018179645380627337, "loss": 11.0872, "step": 166310 }, { "epoch": 20.014440433212997, "grad_norm": Infinity, "learning_rate": 0.00018179426526993326, "loss": 11.0543, "step": 166320 }, { "epoch": 20.015643802647414, "grad_norm": Infinity, "learning_rate": 0.00018179207661521665, "loss": 11.1851, "step": 166330 }, { "epoch": 20.016847172081828, "grad_norm": Infinity, "learning_rate": 0.00018178988784212669, "loss": 11.2014, "step": 166340 }, { "epoch": 20.018050541516246, "grad_norm": Infinity, "learning_rate": 0.00018178769895066662, "loss": 11.0478, "step": 166350 }, { "epoch": 20.019253910950663, "grad_norm": Infinity, "learning_rate": 0.0001817855099408395, "loss": 11.1208, "step": 166360 }, { "epoch": 20.020457280385077, "grad_norm": Infinity, "learning_rate": 0.00018178332081264859, "loss": 10.9829, "step": 166370 }, { "epoch": 20.021660649819495, "grad_norm": Infinity, "learning_rate": 0.00018178113156609702, "loss": 11.2705, "step": 166380 }, { "epoch": 20.022864019253912, "grad_norm": Infinity, "learning_rate": 0.00018177894220118795, "loss": 11.1191, "step": 166390 }, { "epoch": 20.024067388688326, "grad_norm": Infinity, "learning_rate": 0.0001817767527179246, "loss": 11.0681, "step": 166400 }, { "epoch": 20.025270758122744, "grad_norm": Infinity, "learning_rate": 0.00018177456311631004, "loss": 11.1591, "step": 166410 }, { "epoch": 20.02647412755716, "grad_norm": Infinity, "learning_rate": 0.00018177237339634753, "loss": 11.2164, "step": 166420 }, { "epoch": 20.027677496991576, "grad_norm": Infinity, "learning_rate": 0.00018177018355804018, "loss": 11.1491, "step": 166430 }, { "epoch": 20.028880866425993, "grad_norm": Infinity, "learning_rate": 0.00018176799360139124, "loss": 11.0098, "step": 166440 }, { "epoch": 20.03008423586041, "grad_norm": Infinity, "learning_rate": 0.00018176580352640377, "loss": 11.121, "step": 166450 }, { "epoch": 20.031287605294825, "grad_norm": Infinity, "learning_rate": 0.00018176361333308103, "loss": 11.1314, "step": 166460 }, { "epoch": 20.032490974729242, "grad_norm": Infinity, "learning_rate": 0.00018176142302142614, "loss": 11.0975, "step": 166470 }, { "epoch": 20.03369434416366, "grad_norm": Infinity, "learning_rate": 0.0001817592325914423, "loss": 11.2076, "step": 166480 }, { "epoch": 20.034897713598074, "grad_norm": Infinity, "learning_rate": 0.00018175704204313262, "loss": 11.1241, "step": 166490 }, { "epoch": 20.03610108303249, "grad_norm": Infinity, "learning_rate": 0.00018175485137650034, "loss": 11.1964, "step": 166500 }, { "epoch": 20.03730445246691, "grad_norm": Infinity, "learning_rate": 0.00018175266059154862, "loss": 11.1089, "step": 166510 }, { "epoch": 20.038507821901323, "grad_norm": Infinity, "learning_rate": 0.00018175046968828062, "loss": 11.2726, "step": 166520 }, { "epoch": 20.03971119133574, "grad_norm": Infinity, "learning_rate": 0.00018174827866669947, "loss": 11.1519, "step": 166530 }, { "epoch": 20.040914560770158, "grad_norm": Infinity, "learning_rate": 0.0001817460875268084, "loss": 11.2426, "step": 166540 }, { "epoch": 20.042117930204572, "grad_norm": Infinity, "learning_rate": 0.00018174389626861057, "loss": 11.2022, "step": 166550 }, { "epoch": 20.04332129963899, "grad_norm": Infinity, "learning_rate": 0.00018174170489210917, "loss": 11.0936, "step": 166560 }, { "epoch": 20.044524669073404, "grad_norm": Infinity, "learning_rate": 0.00018173951339730727, "loss": 11.1601, "step": 166570 }, { "epoch": 20.04572803850782, "grad_norm": Infinity, "learning_rate": 0.00018173732178420815, "loss": 11.1186, "step": 166580 }, { "epoch": 20.04693140794224, "grad_norm": Infinity, "learning_rate": 0.00018173513005281493, "loss": 11.1114, "step": 166590 }, { "epoch": 20.048134777376653, "grad_norm": Infinity, "learning_rate": 0.00018173293820313079, "loss": 11.1871, "step": 166600 }, { "epoch": 20.04933814681107, "grad_norm": Infinity, "learning_rate": 0.00018173074623515896, "loss": 11.0846, "step": 166610 }, { "epoch": 20.050541516245488, "grad_norm": Infinity, "learning_rate": 0.00018172855414890252, "loss": 11.1424, "step": 166620 }, { "epoch": 20.051744885679902, "grad_norm": Infinity, "learning_rate": 0.0001817263619443647, "loss": 11.1876, "step": 166630 }, { "epoch": 20.05294825511432, "grad_norm": Infinity, "learning_rate": 0.00018172416962154863, "loss": 11.0225, "step": 166640 }, { "epoch": 20.054151624548737, "grad_norm": Infinity, "learning_rate": 0.00018172197718045754, "loss": 11.1876, "step": 166650 }, { "epoch": 20.05535499398315, "grad_norm": Infinity, "learning_rate": 0.00018171978462109455, "loss": 11.1497, "step": 166660 }, { "epoch": 20.05655836341757, "grad_norm": Infinity, "learning_rate": 0.00018171759194346288, "loss": 11.0555, "step": 166670 }, { "epoch": 20.057761732851986, "grad_norm": Infinity, "learning_rate": 0.00018171539914756566, "loss": 11.1797, "step": 166680 }, { "epoch": 20.0589651022864, "grad_norm": Infinity, "learning_rate": 0.0001817132062334061, "loss": 11.1347, "step": 166690 }, { "epoch": 20.060168471720818, "grad_norm": Infinity, "learning_rate": 0.00018171101320098732, "loss": 11.1256, "step": 166700 }, { "epoch": 20.061371841155236, "grad_norm": Infinity, "learning_rate": 0.00018170882005031258, "loss": 11.0957, "step": 166710 }, { "epoch": 20.06257521058965, "grad_norm": Infinity, "learning_rate": 0.00018170662678138497, "loss": 11.1812, "step": 166720 }, { "epoch": 20.063778580024067, "grad_norm": Infinity, "learning_rate": 0.0001817044333942077, "loss": 11.1599, "step": 166730 }, { "epoch": 20.064981949458485, "grad_norm": Infinity, "learning_rate": 0.00018170223988878394, "loss": 11.2837, "step": 166740 }, { "epoch": 20.0661853188929, "grad_norm": Infinity, "learning_rate": 0.0001817000462651169, "loss": 11.1328, "step": 166750 }, { "epoch": 20.067388688327316, "grad_norm": Infinity, "learning_rate": 0.0001816978525232097, "loss": 11.075, "step": 166760 }, { "epoch": 20.068592057761734, "grad_norm": Infinity, "learning_rate": 0.00018169565866306552, "loss": 11.209, "step": 166770 }, { "epoch": 20.069795427196148, "grad_norm": Infinity, "learning_rate": 0.00018169346468468756, "loss": 11.2069, "step": 166780 }, { "epoch": 20.070998796630565, "grad_norm": Infinity, "learning_rate": 0.000181691270588079, "loss": 11.1312, "step": 166790 }, { "epoch": 20.072202166064983, "grad_norm": Infinity, "learning_rate": 0.000181689076373243, "loss": 11.0913, "step": 166800 }, { "epoch": 20.073405535499397, "grad_norm": Infinity, "learning_rate": 0.00018168688204018275, "loss": 11.017, "step": 166810 }, { "epoch": 20.074608904933815, "grad_norm": Infinity, "learning_rate": 0.0001816846875889014, "loss": 11.1794, "step": 166820 }, { "epoch": 20.075812274368232, "grad_norm": Infinity, "learning_rate": 0.00018168249301940215, "loss": 11.0485, "step": 166830 }, { "epoch": 20.077015643802646, "grad_norm": Infinity, "learning_rate": 0.00018168029833168814, "loss": 11.0515, "step": 166840 }, { "epoch": 20.078219013237064, "grad_norm": Infinity, "learning_rate": 0.0001816781035257626, "loss": 11.158, "step": 166850 }, { "epoch": 20.07942238267148, "grad_norm": Infinity, "learning_rate": 0.00018167590860162865, "loss": 11.1102, "step": 166860 }, { "epoch": 20.080625752105895, "grad_norm": Infinity, "learning_rate": 0.00018167371355928952, "loss": 11.1662, "step": 166870 }, { "epoch": 20.081829121540313, "grad_norm": Infinity, "learning_rate": 0.00018167151839874833, "loss": 11.0974, "step": 166880 }, { "epoch": 20.08303249097473, "grad_norm": Infinity, "learning_rate": 0.00018166932312000834, "loss": 11.1498, "step": 166890 }, { "epoch": 20.084235860409144, "grad_norm": Infinity, "learning_rate": 0.00018166712772307264, "loss": 11.2222, "step": 166900 }, { "epoch": 20.085439229843562, "grad_norm": Infinity, "learning_rate": 0.00018166493220794444, "loss": 11.0615, "step": 166910 }, { "epoch": 20.08664259927798, "grad_norm": Infinity, "learning_rate": 0.00018166273657462694, "loss": 11.0623, "step": 166920 }, { "epoch": 20.087845968712394, "grad_norm": Infinity, "learning_rate": 0.00018166054082312328, "loss": 11.1037, "step": 166930 }, { "epoch": 20.08904933814681, "grad_norm": Infinity, "learning_rate": 0.00018165834495343668, "loss": 11.1357, "step": 166940 }, { "epoch": 20.09025270758123, "grad_norm": Infinity, "learning_rate": 0.00018165614896557028, "loss": 11.1944, "step": 166950 }, { "epoch": 20.091456077015643, "grad_norm": Infinity, "learning_rate": 0.00018165395285952727, "loss": 11.2273, "step": 166960 }, { "epoch": 20.09265944645006, "grad_norm": Infinity, "learning_rate": 0.00018165175663531082, "loss": 11.1936, "step": 166970 }, { "epoch": 20.093862815884478, "grad_norm": Infinity, "learning_rate": 0.00018164956029292415, "loss": 11.1253, "step": 166980 }, { "epoch": 20.095066185318892, "grad_norm": Infinity, "learning_rate": 0.0001816473638323704, "loss": 11.1734, "step": 166990 }, { "epoch": 20.09626955475331, "grad_norm": Infinity, "learning_rate": 0.0001816451672536527, "loss": 11.1823, "step": 167000 }, { "epoch": 20.097472924187727, "grad_norm": Infinity, "learning_rate": 0.00018164297055677436, "loss": 11.1539, "step": 167010 }, { "epoch": 20.09867629362214, "grad_norm": Infinity, "learning_rate": 0.00018164077374173843, "loss": 11.1835, "step": 167020 }, { "epoch": 20.09987966305656, "grad_norm": Infinity, "learning_rate": 0.0001816385768085482, "loss": 11.0116, "step": 167030 }, { "epoch": 20.101083032490976, "grad_norm": Infinity, "learning_rate": 0.00018163637975720677, "loss": 11.2357, "step": 167040 }, { "epoch": 20.10228640192539, "grad_norm": Infinity, "learning_rate": 0.00018163418258771734, "loss": 11.1763, "step": 167050 }, { "epoch": 20.103489771359808, "grad_norm": Infinity, "learning_rate": 0.00018163198530008308, "loss": 11.0937, "step": 167060 }, { "epoch": 20.104693140794225, "grad_norm": Infinity, "learning_rate": 0.0001816297878943072, "loss": 11.1731, "step": 167070 }, { "epoch": 20.10589651022864, "grad_norm": Infinity, "learning_rate": 0.00018162759037039283, "loss": 11.0829, "step": 167080 }, { "epoch": 20.107099879663057, "grad_norm": Infinity, "learning_rate": 0.00018162539272834324, "loss": 11.0928, "step": 167090 }, { "epoch": 20.108303249097474, "grad_norm": Infinity, "learning_rate": 0.0001816231949681615, "loss": 11.1036, "step": 167100 }, { "epoch": 20.10950661853189, "grad_norm": Infinity, "learning_rate": 0.00018162099708985088, "loss": 11.2172, "step": 167110 }, { "epoch": 20.110709987966306, "grad_norm": Infinity, "learning_rate": 0.0001816187990934145, "loss": 11.1132, "step": 167120 }, { "epoch": 20.111913357400724, "grad_norm": Infinity, "learning_rate": 0.00018161660097885558, "loss": 11.1859, "step": 167130 }, { "epoch": 20.113116726835138, "grad_norm": Infinity, "learning_rate": 0.00018161440274617728, "loss": 11.2652, "step": 167140 }, { "epoch": 20.114320096269555, "grad_norm": Infinity, "learning_rate": 0.00018161220439538283, "loss": 11.2855, "step": 167150 }, { "epoch": 20.115523465703973, "grad_norm": Infinity, "learning_rate": 0.0001816100059264753, "loss": 11.0792, "step": 167160 }, { "epoch": 20.116726835138387, "grad_norm": Infinity, "learning_rate": 0.00018160780733945798, "loss": 11.1545, "step": 167170 }, { "epoch": 20.117930204572804, "grad_norm": Infinity, "learning_rate": 0.00018160560863433403, "loss": 11.1262, "step": 167180 }, { "epoch": 20.119133574007222, "grad_norm": Infinity, "learning_rate": 0.00018160340981110662, "loss": 11.1201, "step": 167190 }, { "epoch": 20.120336943441636, "grad_norm": Infinity, "learning_rate": 0.00018160121086977887, "loss": 11.0896, "step": 167200 }, { "epoch": 20.121540312876053, "grad_norm": Infinity, "learning_rate": 0.0001815990118103541, "loss": 11.007, "step": 167210 }, { "epoch": 20.12274368231047, "grad_norm": Infinity, "learning_rate": 0.00018159681263283533, "loss": 11.1382, "step": 167220 }, { "epoch": 20.123947051744885, "grad_norm": Infinity, "learning_rate": 0.0001815946133372259, "loss": 11.1126, "step": 167230 }, { "epoch": 20.125150421179303, "grad_norm": Infinity, "learning_rate": 0.0001815924139235289, "loss": 11.1613, "step": 167240 }, { "epoch": 20.126353790613717, "grad_norm": Infinity, "learning_rate": 0.00018159021439174752, "loss": 11.1604, "step": 167250 }, { "epoch": 20.127557160048134, "grad_norm": Infinity, "learning_rate": 0.00018158801474188496, "loss": 11.2473, "step": 167260 }, { "epoch": 20.128760529482552, "grad_norm": Infinity, "learning_rate": 0.0001815858149739444, "loss": 11.125, "step": 167270 }, { "epoch": 20.129963898916966, "grad_norm": Infinity, "learning_rate": 0.000181583615087929, "loss": 11.1334, "step": 167280 }, { "epoch": 20.131167268351383, "grad_norm": Infinity, "learning_rate": 0.00018158141508384202, "loss": 11.1533, "step": 167290 }, { "epoch": 20.1323706377858, "grad_norm": Infinity, "learning_rate": 0.00018157921496168654, "loss": 11.0755, "step": 167300 }, { "epoch": 20.133574007220215, "grad_norm": Infinity, "learning_rate": 0.0001815770147214658, "loss": 11.1248, "step": 167310 }, { "epoch": 20.134777376654633, "grad_norm": Infinity, "learning_rate": 0.00018157481436318303, "loss": 11.084, "step": 167320 }, { "epoch": 20.13598074608905, "grad_norm": Infinity, "learning_rate": 0.0001815726138868413, "loss": 11.0828, "step": 167330 }, { "epoch": 20.137184115523464, "grad_norm": Infinity, "learning_rate": 0.0001815704132924439, "loss": 11.1401, "step": 167340 }, { "epoch": 20.13838748495788, "grad_norm": Infinity, "learning_rate": 0.00018156821257999398, "loss": 11.1719, "step": 167350 }, { "epoch": 20.1395908543923, "grad_norm": Infinity, "learning_rate": 0.00018156601174949466, "loss": 11.2306, "step": 167360 }, { "epoch": 20.140794223826713, "grad_norm": Infinity, "learning_rate": 0.00018156381080094923, "loss": 11.0992, "step": 167370 }, { "epoch": 20.14199759326113, "grad_norm": Infinity, "learning_rate": 0.0001815616097343608, "loss": 11.1099, "step": 167380 }, { "epoch": 20.14320096269555, "grad_norm": Infinity, "learning_rate": 0.00018155940854973262, "loss": 11.085, "step": 167390 }, { "epoch": 20.144404332129962, "grad_norm": Infinity, "learning_rate": 0.00018155720724706778, "loss": 11.1913, "step": 167400 }, { "epoch": 20.14560770156438, "grad_norm": Infinity, "learning_rate": 0.0001815550058263696, "loss": 11.1644, "step": 167410 }, { "epoch": 20.146811070998798, "grad_norm": Infinity, "learning_rate": 0.00018155280428764115, "loss": 11.1332, "step": 167420 }, { "epoch": 20.14801444043321, "grad_norm": Infinity, "learning_rate": 0.00018155060263088564, "loss": 11.1433, "step": 167430 }, { "epoch": 20.14921780986763, "grad_norm": Infinity, "learning_rate": 0.00018154840085610633, "loss": 11.2023, "step": 167440 }, { "epoch": 20.150421179302047, "grad_norm": Infinity, "learning_rate": 0.00018154619896330632, "loss": 11.0908, "step": 167450 }, { "epoch": 20.15162454873646, "grad_norm": Infinity, "learning_rate": 0.0001815439969524888, "loss": 11.235, "step": 167460 }, { "epoch": 20.15282791817088, "grad_norm": Infinity, "learning_rate": 0.00018154179482365702, "loss": 11.1091, "step": 167470 }, { "epoch": 20.154031287605296, "grad_norm": Infinity, "learning_rate": 0.00018153959257681413, "loss": 11.1528, "step": 167480 }, { "epoch": 20.15523465703971, "grad_norm": Infinity, "learning_rate": 0.0001815373902119633, "loss": 11.0912, "step": 167490 }, { "epoch": 20.156438026474127, "grad_norm": Infinity, "learning_rate": 0.00018153518772910773, "loss": 11.1255, "step": 167500 }, { "epoch": 20.157641395908545, "grad_norm": Infinity, "learning_rate": 0.00018153298512825066, "loss": 11.0745, "step": 167510 }, { "epoch": 20.15884476534296, "grad_norm": Infinity, "learning_rate": 0.00018153078240939517, "loss": 11.2918, "step": 167520 }, { "epoch": 20.160048134777377, "grad_norm": Infinity, "learning_rate": 0.00018152857957254455, "loss": 11.1125, "step": 167530 }, { "epoch": 20.161251504211794, "grad_norm": Infinity, "learning_rate": 0.00018152637661770192, "loss": 11.1964, "step": 167540 }, { "epoch": 20.162454873646208, "grad_norm": Infinity, "learning_rate": 0.0001815241735448705, "loss": 11.2311, "step": 167550 }, { "epoch": 20.163658243080626, "grad_norm": Infinity, "learning_rate": 0.00018152197035405348, "loss": 11.0796, "step": 167560 }, { "epoch": 20.164861612515043, "grad_norm": Infinity, "learning_rate": 0.00018151976704525408, "loss": 11.1609, "step": 167570 }, { "epoch": 20.166064981949457, "grad_norm": Infinity, "learning_rate": 0.0001815175636184754, "loss": 11.0817, "step": 167580 }, { "epoch": 20.167268351383875, "grad_norm": Infinity, "learning_rate": 0.00018151536007372066, "loss": 11.1746, "step": 167590 }, { "epoch": 20.168471720818292, "grad_norm": Infinity, "learning_rate": 0.0001815131564109931, "loss": 11.1585, "step": 167600 }, { "epoch": 20.169675090252706, "grad_norm": Infinity, "learning_rate": 0.0001815109526302959, "loss": 11.0773, "step": 167610 }, { "epoch": 20.170878459687124, "grad_norm": Infinity, "learning_rate": 0.0001815087487316322, "loss": 11.0182, "step": 167620 }, { "epoch": 20.17208182912154, "grad_norm": Infinity, "learning_rate": 0.0001815065447150052, "loss": 11.113, "step": 167630 }, { "epoch": 20.173285198555956, "grad_norm": Infinity, "learning_rate": 0.00018150434058041815, "loss": 11.1181, "step": 167640 }, { "epoch": 20.174488567990373, "grad_norm": Infinity, "learning_rate": 0.00018150213632787417, "loss": 11.1721, "step": 167650 }, { "epoch": 20.17569193742479, "grad_norm": Infinity, "learning_rate": 0.00018149993195737646, "loss": 11.1159, "step": 167660 }, { "epoch": 20.176895306859205, "grad_norm": Infinity, "learning_rate": 0.00018149772746892828, "loss": 11.0641, "step": 167670 }, { "epoch": 20.178098676293622, "grad_norm": Infinity, "learning_rate": 0.0001814955228625327, "loss": 11.1873, "step": 167680 }, { "epoch": 20.17930204572804, "grad_norm": Infinity, "learning_rate": 0.00018149331813819303, "loss": 11.2447, "step": 167690 }, { "epoch": 20.180505415162454, "grad_norm": Infinity, "learning_rate": 0.0001814911132959124, "loss": 11.1306, "step": 167700 }, { "epoch": 20.18170878459687, "grad_norm": Infinity, "learning_rate": 0.000181488908335694, "loss": 11.0634, "step": 167710 }, { "epoch": 20.18291215403129, "grad_norm": Infinity, "learning_rate": 0.00018148670325754106, "loss": 11.0495, "step": 167720 }, { "epoch": 20.184115523465703, "grad_norm": Infinity, "learning_rate": 0.00018148449806145667, "loss": 11.0815, "step": 167730 }, { "epoch": 20.18531889290012, "grad_norm": Infinity, "learning_rate": 0.00018148229274744416, "loss": 11.1151, "step": 167740 }, { "epoch": 20.186522262334538, "grad_norm": Infinity, "learning_rate": 0.00018148008731550664, "loss": 11.3008, "step": 167750 }, { "epoch": 20.187725631768952, "grad_norm": Infinity, "learning_rate": 0.00018147788176564727, "loss": 11.0794, "step": 167760 }, { "epoch": 20.18892900120337, "grad_norm": Infinity, "learning_rate": 0.00018147567609786935, "loss": 11.1313, "step": 167770 }, { "epoch": 20.190132370637787, "grad_norm": Infinity, "learning_rate": 0.00018147347031217598, "loss": 11.2121, "step": 167780 }, { "epoch": 20.1913357400722, "grad_norm": Infinity, "learning_rate": 0.0001814712644085704, "loss": 11.079, "step": 167790 }, { "epoch": 20.19253910950662, "grad_norm": Infinity, "learning_rate": 0.0001814690583870558, "loss": 11.1556, "step": 167800 }, { "epoch": 20.193742478941036, "grad_norm": Infinity, "learning_rate": 0.0001814668522476353, "loss": 11.2027, "step": 167810 }, { "epoch": 20.19494584837545, "grad_norm": Infinity, "learning_rate": 0.0001814646459903122, "loss": 11.1316, "step": 167820 }, { "epoch": 20.196149217809868, "grad_norm": Infinity, "learning_rate": 0.00018146243961508963, "loss": 11.161, "step": 167830 }, { "epoch": 20.197352587244286, "grad_norm": Infinity, "learning_rate": 0.0001814602331219708, "loss": 11.2116, "step": 167840 }, { "epoch": 20.1985559566787, "grad_norm": Infinity, "learning_rate": 0.0001814580265109589, "loss": 11.1085, "step": 167850 }, { "epoch": 20.199759326113117, "grad_norm": Infinity, "learning_rate": 0.00018145581978205711, "loss": 11.1425, "step": 167860 }, { "epoch": 20.200962695547535, "grad_norm": Infinity, "learning_rate": 0.0001814536129352687, "loss": 11.0938, "step": 167870 }, { "epoch": 20.20216606498195, "grad_norm": Infinity, "learning_rate": 0.00018145140597059672, "loss": 11.1118, "step": 167880 }, { "epoch": 20.203369434416366, "grad_norm": Infinity, "learning_rate": 0.0001814491988880445, "loss": 11.0124, "step": 167890 }, { "epoch": 20.204572803850784, "grad_norm": Infinity, "learning_rate": 0.00018144699168761515, "loss": 11.1443, "step": 167900 }, { "epoch": 20.205776173285198, "grad_norm": Infinity, "learning_rate": 0.00018144478436931189, "loss": 11.0882, "step": 167910 }, { "epoch": 20.206979542719615, "grad_norm": Infinity, "learning_rate": 0.00018144257693313796, "loss": 11.2014, "step": 167920 }, { "epoch": 20.20818291215403, "grad_norm": Infinity, "learning_rate": 0.00018144036937909648, "loss": 11.1425, "step": 167930 }, { "epoch": 20.209386281588447, "grad_norm": Infinity, "learning_rate": 0.00018143816170719068, "loss": 11.1595, "step": 167940 }, { "epoch": 20.210589651022865, "grad_norm": Infinity, "learning_rate": 0.00018143595391742376, "loss": 11.0945, "step": 167950 }, { "epoch": 20.21179302045728, "grad_norm": Infinity, "learning_rate": 0.00018143374600979893, "loss": 11.0637, "step": 167960 }, { "epoch": 20.212996389891696, "grad_norm": Infinity, "learning_rate": 0.0001814315379843193, "loss": 11.2481, "step": 167970 }, { "epoch": 20.214199759326114, "grad_norm": Infinity, "learning_rate": 0.0001814293298409882, "loss": 11.1392, "step": 167980 }, { "epoch": 20.215403128760528, "grad_norm": Infinity, "learning_rate": 0.00018142712157980873, "loss": 11.0839, "step": 167990 }, { "epoch": 20.216606498194945, "grad_norm": Infinity, "learning_rate": 0.0001814249132007841, "loss": 11.1314, "step": 168000 }, { "epoch": 20.217809867629363, "grad_norm": Infinity, "learning_rate": 0.00018142270470391752, "loss": 11.1778, "step": 168010 }, { "epoch": 20.219013237063777, "grad_norm": Infinity, "learning_rate": 0.0001814204960892122, "loss": 11.1252, "step": 168020 }, { "epoch": 20.220216606498195, "grad_norm": Infinity, "learning_rate": 0.0001814182873566713, "loss": 11.1722, "step": 168030 }, { "epoch": 20.221419975932612, "grad_norm": Infinity, "learning_rate": 0.00018141607850629806, "loss": 11.2076, "step": 168040 }, { "epoch": 20.222623345367026, "grad_norm": Infinity, "learning_rate": 0.00018141386953809564, "loss": 10.9646, "step": 168050 }, { "epoch": 20.223826714801444, "grad_norm": Infinity, "learning_rate": 0.00018141166045206722, "loss": 11.1962, "step": 168060 }, { "epoch": 20.22503008423586, "grad_norm": Infinity, "learning_rate": 0.00018140945124821606, "loss": 11.1501, "step": 168070 }, { "epoch": 20.226233453670275, "grad_norm": Infinity, "learning_rate": 0.00018140724192654536, "loss": 11.1453, "step": 168080 }, { "epoch": 20.227436823104693, "grad_norm": Infinity, "learning_rate": 0.00018140503248705823, "loss": 11.1853, "step": 168090 }, { "epoch": 20.22864019253911, "grad_norm": Infinity, "learning_rate": 0.00018140282292975792, "loss": 11.3167, "step": 168100 }, { "epoch": 20.229843561973524, "grad_norm": Infinity, "learning_rate": 0.00018140061325464766, "loss": 11.099, "step": 168110 }, { "epoch": 20.231046931407942, "grad_norm": Infinity, "learning_rate": 0.0001813984034617306, "loss": 11.0346, "step": 168120 }, { "epoch": 20.23225030084236, "grad_norm": Infinity, "learning_rate": 0.00018139619355100995, "loss": 11.1847, "step": 168130 }, { "epoch": 20.233453670276774, "grad_norm": Infinity, "learning_rate": 0.00018139398352248892, "loss": 11.046, "step": 168140 }, { "epoch": 20.23465703971119, "grad_norm": Infinity, "learning_rate": 0.00018139177337617068, "loss": 11.2267, "step": 168150 }, { "epoch": 20.23586040914561, "grad_norm": Infinity, "learning_rate": 0.00018138956311205846, "loss": 11.1438, "step": 168160 }, { "epoch": 20.237063778580023, "grad_norm": Infinity, "learning_rate": 0.00018138735273015544, "loss": 11.1627, "step": 168170 }, { "epoch": 20.23826714801444, "grad_norm": Infinity, "learning_rate": 0.00018138514223046484, "loss": 11.0866, "step": 168180 }, { "epoch": 20.239470517448858, "grad_norm": Infinity, "learning_rate": 0.00018138293161298986, "loss": 11.2642, "step": 168190 }, { "epoch": 20.240673886883272, "grad_norm": Infinity, "learning_rate": 0.00018138072087773365, "loss": 11.1357, "step": 168200 }, { "epoch": 20.24187725631769, "grad_norm": Infinity, "learning_rate": 0.00018137851002469945, "loss": 11.162, "step": 168210 }, { "epoch": 20.243080625752107, "grad_norm": Infinity, "learning_rate": 0.00018137629905389048, "loss": 11.0163, "step": 168220 }, { "epoch": 20.24428399518652, "grad_norm": Infinity, "learning_rate": 0.0001813740879653099, "loss": 11.09, "step": 168230 }, { "epoch": 20.24548736462094, "grad_norm": Infinity, "learning_rate": 0.00018137187675896095, "loss": 11.1475, "step": 168240 }, { "epoch": 20.246690734055356, "grad_norm": Infinity, "learning_rate": 0.00018136966543484678, "loss": 11.1754, "step": 168250 }, { "epoch": 20.24789410348977, "grad_norm": Infinity, "learning_rate": 0.0001813674539929706, "loss": 11.0882, "step": 168260 }, { "epoch": 20.249097472924188, "grad_norm": Infinity, "learning_rate": 0.00018136524243333562, "loss": 11.1498, "step": 168270 }, { "epoch": 20.250300842358605, "grad_norm": Infinity, "learning_rate": 0.00018136303075594508, "loss": 11.1468, "step": 168280 }, { "epoch": 20.25150421179302, "grad_norm": Infinity, "learning_rate": 0.00018136081896080213, "loss": 11.2255, "step": 168290 }, { "epoch": 20.252707581227437, "grad_norm": Infinity, "learning_rate": 0.00018135860704791, "loss": 11.1239, "step": 168300 }, { "epoch": 20.253910950661854, "grad_norm": Infinity, "learning_rate": 0.00018135639501727185, "loss": 11.1368, "step": 168310 }, { "epoch": 20.25511432009627, "grad_norm": Infinity, "learning_rate": 0.00018135418286889095, "loss": 11.201, "step": 168320 }, { "epoch": 20.256317689530686, "grad_norm": Infinity, "learning_rate": 0.00018135197060277042, "loss": 11.1481, "step": 168330 }, { "epoch": 20.257521058965104, "grad_norm": Infinity, "learning_rate": 0.00018134975821891352, "loss": 11.1933, "step": 168340 }, { "epoch": 20.258724428399518, "grad_norm": Infinity, "learning_rate": 0.00018134754571732343, "loss": 11.1831, "step": 168350 }, { "epoch": 20.259927797833935, "grad_norm": Infinity, "learning_rate": 0.00018134533309800336, "loss": 11.2204, "step": 168360 }, { "epoch": 20.261131167268353, "grad_norm": Infinity, "learning_rate": 0.00018134312036095654, "loss": 11.1646, "step": 168370 }, { "epoch": 20.262334536702767, "grad_norm": Infinity, "learning_rate": 0.0001813409075061861, "loss": 11.1786, "step": 168380 }, { "epoch": 20.263537906137184, "grad_norm": Infinity, "learning_rate": 0.00018133869453369533, "loss": 11.088, "step": 168390 }, { "epoch": 20.264741275571602, "grad_norm": Infinity, "learning_rate": 0.00018133648144348733, "loss": 11.2518, "step": 168400 }, { "epoch": 20.265944645006016, "grad_norm": Infinity, "learning_rate": 0.0001813342682355654, "loss": 11.1197, "step": 168410 }, { "epoch": 20.267148014440433, "grad_norm": Infinity, "learning_rate": 0.0001813320549099327, "loss": 11.1416, "step": 168420 }, { "epoch": 20.26835138387485, "grad_norm": Infinity, "learning_rate": 0.0001813298414665924, "loss": 11.1635, "step": 168430 }, { "epoch": 20.269554753309265, "grad_norm": Infinity, "learning_rate": 0.00018132762790554777, "loss": 11.064, "step": 168440 }, { "epoch": 20.270758122743683, "grad_norm": Infinity, "learning_rate": 0.000181325414226802, "loss": 11.0857, "step": 168450 }, { "epoch": 20.2719614921781, "grad_norm": Infinity, "learning_rate": 0.00018132320043035826, "loss": 11.19, "step": 168460 }, { "epoch": 20.273164861612514, "grad_norm": Infinity, "learning_rate": 0.00018132098651621977, "loss": 11.064, "step": 168470 }, { "epoch": 20.27436823104693, "grad_norm": Infinity, "learning_rate": 0.00018131877248438972, "loss": 11.0883, "step": 168480 }, { "epoch": 20.27557160048135, "grad_norm": Infinity, "learning_rate": 0.00018131655833487136, "loss": 11.275, "step": 168490 }, { "epoch": 20.276774969915763, "grad_norm": Infinity, "learning_rate": 0.00018131434406766782, "loss": 11.2177, "step": 168500 }, { "epoch": 20.27797833935018, "grad_norm": Infinity, "learning_rate": 0.0001813121296827824, "loss": 11.2364, "step": 168510 }, { "epoch": 20.2791817087846, "grad_norm": Infinity, "learning_rate": 0.00018130991518021822, "loss": 11.1248, "step": 168520 }, { "epoch": 20.280385078219012, "grad_norm": Infinity, "learning_rate": 0.0001813077005599785, "loss": 11.1157, "step": 168530 }, { "epoch": 20.28158844765343, "grad_norm": Infinity, "learning_rate": 0.0001813054858220665, "loss": 11.116, "step": 168540 }, { "epoch": 20.282791817087848, "grad_norm": Infinity, "learning_rate": 0.0001813032709664854, "loss": 11.0292, "step": 168550 }, { "epoch": 20.28399518652226, "grad_norm": Infinity, "learning_rate": 0.00018130105599323837, "loss": 11.1008, "step": 168560 }, { "epoch": 20.28519855595668, "grad_norm": Infinity, "learning_rate": 0.00018129884090232863, "loss": 11.0948, "step": 168570 }, { "epoch": 20.286401925391097, "grad_norm": Infinity, "learning_rate": 0.00018129662569375943, "loss": 11.1765, "step": 168580 }, { "epoch": 20.28760529482551, "grad_norm": Infinity, "learning_rate": 0.0001812944103675339, "loss": 11.1143, "step": 168590 }, { "epoch": 20.28880866425993, "grad_norm": Infinity, "learning_rate": 0.0001812921949236553, "loss": 11.1647, "step": 168600 }, { "epoch": 20.290012033694346, "grad_norm": Infinity, "learning_rate": 0.00018128997936212683, "loss": 11.1665, "step": 168610 }, { "epoch": 20.29121540312876, "grad_norm": Infinity, "learning_rate": 0.00018128776368295168, "loss": 11.1361, "step": 168620 }, { "epoch": 20.292418772563177, "grad_norm": Infinity, "learning_rate": 0.00018128554788613308, "loss": 11.186, "step": 168630 }, { "epoch": 20.29362214199759, "grad_norm": Infinity, "learning_rate": 0.0001812833319716742, "loss": 11.2919, "step": 168640 }, { "epoch": 20.29482551143201, "grad_norm": Infinity, "learning_rate": 0.0001812811159395783, "loss": 11.1262, "step": 168650 }, { "epoch": 20.296028880866427, "grad_norm": Infinity, "learning_rate": 0.00018127889978984855, "loss": 11.1271, "step": 168660 }, { "epoch": 20.29723225030084, "grad_norm": Infinity, "learning_rate": 0.00018127668352248817, "loss": 11.0992, "step": 168670 }, { "epoch": 20.29843561973526, "grad_norm": Infinity, "learning_rate": 0.00018127446713750033, "loss": 11.1489, "step": 168680 }, { "epoch": 20.299638989169676, "grad_norm": Infinity, "learning_rate": 0.0001812722506348883, "loss": 11.1718, "step": 168690 }, { "epoch": 20.30084235860409, "grad_norm": Infinity, "learning_rate": 0.00018127003401465525, "loss": 11.0129, "step": 168700 }, { "epoch": 20.302045728038507, "grad_norm": Infinity, "learning_rate": 0.0001812678172768044, "loss": 11.1778, "step": 168710 }, { "epoch": 20.303249097472925, "grad_norm": Infinity, "learning_rate": 0.00018126560042133896, "loss": 11.1122, "step": 168720 }, { "epoch": 20.30445246690734, "grad_norm": Infinity, "learning_rate": 0.00018126338344826213, "loss": 11.1422, "step": 168730 }, { "epoch": 20.305655836341757, "grad_norm": Infinity, "learning_rate": 0.00018126116635757707, "loss": 11.036, "step": 168740 }, { "epoch": 20.306859205776174, "grad_norm": Infinity, "learning_rate": 0.0001812589491492871, "loss": 11.2096, "step": 168750 }, { "epoch": 20.308062575210588, "grad_norm": Infinity, "learning_rate": 0.00018125673182339534, "loss": 11.1884, "step": 168760 }, { "epoch": 20.309265944645006, "grad_norm": Infinity, "learning_rate": 0.00018125451437990507, "loss": 11.1084, "step": 168770 }, { "epoch": 20.310469314079423, "grad_norm": Infinity, "learning_rate": 0.0001812522968188194, "loss": 11.1523, "step": 168780 }, { "epoch": 20.311672683513837, "grad_norm": Infinity, "learning_rate": 0.00018125007914014164, "loss": 11.143, "step": 168790 }, { "epoch": 20.312876052948255, "grad_norm": Infinity, "learning_rate": 0.00018124786134387494, "loss": 11.1536, "step": 168800 }, { "epoch": 20.314079422382672, "grad_norm": Infinity, "learning_rate": 0.00018124564343002252, "loss": 11.1718, "step": 168810 }, { "epoch": 20.315282791817086, "grad_norm": Infinity, "learning_rate": 0.0001812434253985876, "loss": 11.1878, "step": 168820 }, { "epoch": 20.316486161251504, "grad_norm": Infinity, "learning_rate": 0.0001812412072495734, "loss": 11.1281, "step": 168830 }, { "epoch": 20.31768953068592, "grad_norm": Infinity, "learning_rate": 0.0001812389889829831, "loss": 11.2728, "step": 168840 }, { "epoch": 20.318892900120336, "grad_norm": Infinity, "learning_rate": 0.00018123677059881993, "loss": 11.2612, "step": 168850 }, { "epoch": 20.320096269554753, "grad_norm": Infinity, "learning_rate": 0.00018123455209708708, "loss": 11.1249, "step": 168860 }, { "epoch": 20.32129963898917, "grad_norm": Infinity, "learning_rate": 0.00018123233347778776, "loss": 11.0236, "step": 168870 }, { "epoch": 20.322503008423585, "grad_norm": Infinity, "learning_rate": 0.00018123011474092523, "loss": 11.0864, "step": 168880 }, { "epoch": 20.323706377858002, "grad_norm": Infinity, "learning_rate": 0.00018122789588650266, "loss": 11.0723, "step": 168890 }, { "epoch": 20.32490974729242, "grad_norm": Infinity, "learning_rate": 0.0001812256769145233, "loss": 11.1216, "step": 168900 }, { "epoch": 20.326113116726834, "grad_norm": Infinity, "learning_rate": 0.0001812234578249903, "loss": 11.1565, "step": 168910 }, { "epoch": 20.32731648616125, "grad_norm": Infinity, "learning_rate": 0.0001812212386179069, "loss": 11.2159, "step": 168920 }, { "epoch": 20.32851985559567, "grad_norm": Infinity, "learning_rate": 0.00018121901929327633, "loss": 11.0953, "step": 168930 }, { "epoch": 20.329723225030083, "grad_norm": Infinity, "learning_rate": 0.00018121679985110177, "loss": 11.0642, "step": 168940 }, { "epoch": 20.3309265944645, "grad_norm": Infinity, "learning_rate": 0.00018121458029138644, "loss": 11.2041, "step": 168950 }, { "epoch": 20.332129963898918, "grad_norm": Infinity, "learning_rate": 0.0001812123606141336, "loss": 11.1106, "step": 168960 }, { "epoch": 20.333333333333332, "grad_norm": Infinity, "learning_rate": 0.00018121014081934643, "loss": 11.197, "step": 168970 }, { "epoch": 20.33453670276775, "grad_norm": Infinity, "learning_rate": 0.00018120792090702809, "loss": 11.1328, "step": 168980 }, { "epoch": 20.335740072202167, "grad_norm": Infinity, "learning_rate": 0.00018120570087718188, "loss": 11.1587, "step": 168990 }, { "epoch": 20.33694344163658, "grad_norm": Infinity, "learning_rate": 0.00018120348072981092, "loss": 11.1417, "step": 169000 }, { "epoch": 20.338146811071, "grad_norm": Infinity, "learning_rate": 0.00018120126046491852, "loss": 11.0425, "step": 169010 }, { "epoch": 20.339350180505416, "grad_norm": Infinity, "learning_rate": 0.00018119904008250784, "loss": 11.2006, "step": 169020 }, { "epoch": 20.34055354993983, "grad_norm": Infinity, "learning_rate": 0.0001811968195825821, "loss": 11.2156, "step": 169030 }, { "epoch": 20.341756919374248, "grad_norm": Infinity, "learning_rate": 0.0001811945989651445, "loss": 11.0456, "step": 169040 }, { "epoch": 20.342960288808666, "grad_norm": Infinity, "learning_rate": 0.00018119237823019827, "loss": 11.2371, "step": 169050 }, { "epoch": 20.34416365824308, "grad_norm": Infinity, "learning_rate": 0.00018119015737774662, "loss": 11.1413, "step": 169060 }, { "epoch": 20.345367027677497, "grad_norm": Infinity, "learning_rate": 0.0001811879364077928, "loss": 11.0901, "step": 169070 }, { "epoch": 20.346570397111915, "grad_norm": Infinity, "learning_rate": 0.00018118571532033996, "loss": 11.1257, "step": 169080 }, { "epoch": 20.34777376654633, "grad_norm": Infinity, "learning_rate": 0.00018118349411539136, "loss": 11.1537, "step": 169090 }, { "epoch": 20.348977135980746, "grad_norm": Infinity, "learning_rate": 0.0001811812727929502, "loss": 11.0659, "step": 169100 }, { "epoch": 20.350180505415164, "grad_norm": Infinity, "learning_rate": 0.0001811790513530197, "loss": 11.2239, "step": 169110 }, { "epoch": 20.351383874849578, "grad_norm": Infinity, "learning_rate": 0.00018117682979560304, "loss": 11.141, "step": 169120 }, { "epoch": 20.352587244283995, "grad_norm": Infinity, "learning_rate": 0.0001811746081207035, "loss": 11.1378, "step": 169130 }, { "epoch": 20.353790613718413, "grad_norm": Infinity, "learning_rate": 0.00018117238632832426, "loss": 11.0068, "step": 169140 }, { "epoch": 20.354993983152827, "grad_norm": Infinity, "learning_rate": 0.0001811701644184685, "loss": 11.2291, "step": 169150 }, { "epoch": 20.356197352587245, "grad_norm": Infinity, "learning_rate": 0.0001811679423911395, "loss": 11.1351, "step": 169160 }, { "epoch": 20.357400722021662, "grad_norm": Infinity, "learning_rate": 0.00018116572024634044, "loss": 11.1022, "step": 169170 }, { "epoch": 20.358604091456076, "grad_norm": Infinity, "learning_rate": 0.00018116349798407458, "loss": 11.0519, "step": 169180 }, { "epoch": 20.359807460890494, "grad_norm": Infinity, "learning_rate": 0.00018116127560434503, "loss": 11.1231, "step": 169190 }, { "epoch": 20.36101083032491, "grad_norm": Infinity, "learning_rate": 0.0001811590531071551, "loss": 11.1083, "step": 169200 }, { "epoch": 20.362214199759325, "grad_norm": Infinity, "learning_rate": 0.00018115683049250801, "loss": 11.1163, "step": 169210 }, { "epoch": 20.363417569193743, "grad_norm": Infinity, "learning_rate": 0.00018115460776040694, "loss": 11.1834, "step": 169220 }, { "epoch": 20.36462093862816, "grad_norm": Infinity, "learning_rate": 0.00018115238491085513, "loss": 11.3076, "step": 169230 }, { "epoch": 20.365824308062574, "grad_norm": Infinity, "learning_rate": 0.00018115016194385577, "loss": 11.1896, "step": 169240 }, { "epoch": 20.367027677496992, "grad_norm": Infinity, "learning_rate": 0.00018114793885941208, "loss": 11.1026, "step": 169250 }, { "epoch": 20.36823104693141, "grad_norm": Infinity, "learning_rate": 0.00018114571565752726, "loss": 11.2092, "step": 169260 }, { "epoch": 20.369434416365824, "grad_norm": Infinity, "learning_rate": 0.0001811434923382046, "loss": 11.2338, "step": 169270 }, { "epoch": 20.37063778580024, "grad_norm": Infinity, "learning_rate": 0.00018114126890144726, "loss": 11.068, "step": 169280 }, { "epoch": 20.37184115523466, "grad_norm": Infinity, "learning_rate": 0.00018113904534725847, "loss": 11.2205, "step": 169290 }, { "epoch": 20.373044524669073, "grad_norm": Infinity, "learning_rate": 0.00018113682167564146, "loss": 11.2902, "step": 169300 }, { "epoch": 20.37424789410349, "grad_norm": Infinity, "learning_rate": 0.00018113459788659945, "loss": 11.0897, "step": 169310 }, { "epoch": 20.375451263537904, "grad_norm": Infinity, "learning_rate": 0.0001811323739801356, "loss": 11.0878, "step": 169320 }, { "epoch": 20.376654632972322, "grad_norm": Infinity, "learning_rate": 0.00018113014995625322, "loss": 11.1376, "step": 169330 }, { "epoch": 20.37785800240674, "grad_norm": Infinity, "learning_rate": 0.00018112792581495544, "loss": 11.1547, "step": 169340 }, { "epoch": 20.379061371841154, "grad_norm": Infinity, "learning_rate": 0.00018112570155624552, "loss": 11.0835, "step": 169350 }, { "epoch": 20.38026474127557, "grad_norm": Infinity, "learning_rate": 0.00018112347718012673, "loss": 11.1521, "step": 169360 }, { "epoch": 20.38146811070999, "grad_norm": Infinity, "learning_rate": 0.0001811212526866022, "loss": 11.1616, "step": 169370 }, { "epoch": 20.382671480144403, "grad_norm": Infinity, "learning_rate": 0.0001811190280756752, "loss": 11.0925, "step": 169380 }, { "epoch": 20.38387484957882, "grad_norm": Infinity, "learning_rate": 0.00018111680334734892, "loss": 11.1141, "step": 169390 }, { "epoch": 20.385078219013238, "grad_norm": Infinity, "learning_rate": 0.00018111457850162666, "loss": 11.0996, "step": 169400 }, { "epoch": 20.386281588447652, "grad_norm": Infinity, "learning_rate": 0.00018111235353851152, "loss": 11.0842, "step": 169410 }, { "epoch": 20.38748495788207, "grad_norm": Infinity, "learning_rate": 0.0001811101284580068, "loss": 11.1849, "step": 169420 }, { "epoch": 20.388688327316487, "grad_norm": Infinity, "learning_rate": 0.0001811079032601157, "loss": 11.0988, "step": 169430 }, { "epoch": 20.3898916967509, "grad_norm": Infinity, "learning_rate": 0.00018110567794484141, "loss": 11.2283, "step": 169440 }, { "epoch": 20.39109506618532, "grad_norm": Infinity, "learning_rate": 0.0001811034525121872, "loss": 11.1412, "step": 169450 }, { "epoch": 20.392298435619736, "grad_norm": Infinity, "learning_rate": 0.0001811012269621563, "loss": 11.136, "step": 169460 }, { "epoch": 20.39350180505415, "grad_norm": Infinity, "learning_rate": 0.00018109900129475184, "loss": 11.143, "step": 169470 }, { "epoch": 20.394705174488568, "grad_norm": Infinity, "learning_rate": 0.00018109677550997715, "loss": 11.0315, "step": 169480 }, { "epoch": 20.395908543922985, "grad_norm": Infinity, "learning_rate": 0.0001810945496078354, "loss": 11.158, "step": 169490 }, { "epoch": 20.3971119133574, "grad_norm": Infinity, "learning_rate": 0.0001810923235883298, "loss": 11.2196, "step": 169500 }, { "epoch": 20.398315282791817, "grad_norm": Infinity, "learning_rate": 0.00018109009745146358, "loss": 11.2157, "step": 169510 }, { "epoch": 20.399518652226234, "grad_norm": Infinity, "learning_rate": 0.00018108787119724, "loss": 11.1027, "step": 169520 }, { "epoch": 20.40072202166065, "grad_norm": Infinity, "learning_rate": 0.00018108564482566223, "loss": 11.1171, "step": 169530 }, { "epoch": 20.401925391095066, "grad_norm": Infinity, "learning_rate": 0.00018108341833673351, "loss": 11.014, "step": 169540 }, { "epoch": 20.403128760529484, "grad_norm": Infinity, "learning_rate": 0.00018108119173045707, "loss": 11.2019, "step": 169550 }, { "epoch": 20.404332129963898, "grad_norm": Infinity, "learning_rate": 0.00018107896500683612, "loss": 11.3003, "step": 169560 }, { "epoch": 20.405535499398315, "grad_norm": Infinity, "learning_rate": 0.0001810767381658739, "loss": 11.0753, "step": 169570 }, { "epoch": 20.406738868832733, "grad_norm": Infinity, "learning_rate": 0.00018107451120757362, "loss": 11.1968, "step": 169580 }, { "epoch": 20.407942238267147, "grad_norm": Infinity, "learning_rate": 0.0001810722841319385, "loss": 11.2494, "step": 169590 }, { "epoch": 20.409145607701564, "grad_norm": Infinity, "learning_rate": 0.00018107005693897174, "loss": 11.1288, "step": 169600 }, { "epoch": 20.410348977135982, "grad_norm": Infinity, "learning_rate": 0.00018106782962867666, "loss": 11.1042, "step": 169610 }, { "epoch": 20.411552346570396, "grad_norm": Infinity, "learning_rate": 0.00018106560220105638, "loss": 11.2676, "step": 169620 }, { "epoch": 20.412755716004813, "grad_norm": Infinity, "learning_rate": 0.00018106337465611415, "loss": 11.1516, "step": 169630 }, { "epoch": 20.41395908543923, "grad_norm": Infinity, "learning_rate": 0.00018106114699385323, "loss": 11.1647, "step": 169640 }, { "epoch": 20.415162454873645, "grad_norm": Infinity, "learning_rate": 0.00018105891921427677, "loss": 11.1637, "step": 169650 }, { "epoch": 20.416365824308063, "grad_norm": Infinity, "learning_rate": 0.00018105669131738807, "loss": 11.0832, "step": 169660 }, { "epoch": 20.41756919374248, "grad_norm": Infinity, "learning_rate": 0.00018105446330319035, "loss": 11.1426, "step": 169670 }, { "epoch": 20.418772563176894, "grad_norm": Infinity, "learning_rate": 0.00018105223517168677, "loss": 11.108, "step": 169680 }, { "epoch": 20.41997593261131, "grad_norm": Infinity, "learning_rate": 0.0001810500069228806, "loss": 11.1267, "step": 169690 }, { "epoch": 20.42117930204573, "grad_norm": Infinity, "learning_rate": 0.00018104777855677506, "loss": 11.1771, "step": 169700 }, { "epoch": 20.422382671480143, "grad_norm": Infinity, "learning_rate": 0.0001810455500733734, "loss": 11.0843, "step": 169710 }, { "epoch": 20.42358604091456, "grad_norm": Infinity, "learning_rate": 0.0001810433214726788, "loss": 11.2624, "step": 169720 }, { "epoch": 20.42478941034898, "grad_norm": Infinity, "learning_rate": 0.00018104109275469453, "loss": 11.1661, "step": 169730 }, { "epoch": 20.425992779783392, "grad_norm": Infinity, "learning_rate": 0.00018103886391942375, "loss": 11.178, "step": 169740 }, { "epoch": 20.42719614921781, "grad_norm": Infinity, "learning_rate": 0.00018103663496686973, "loss": 11.1344, "step": 169750 }, { "epoch": 20.428399518652228, "grad_norm": Infinity, "learning_rate": 0.00018103440589703574, "loss": 11.131, "step": 169760 }, { "epoch": 20.42960288808664, "grad_norm": Infinity, "learning_rate": 0.00018103217670992493, "loss": 11.1171, "step": 169770 }, { "epoch": 20.43080625752106, "grad_norm": Infinity, "learning_rate": 0.00018102994740554053, "loss": 11.242, "step": 169780 }, { "epoch": 20.432009626955477, "grad_norm": Infinity, "learning_rate": 0.0001810277179838858, "loss": 11.1073, "step": 169790 }, { "epoch": 20.43321299638989, "grad_norm": Infinity, "learning_rate": 0.00018102548844496397, "loss": 11.1494, "step": 169800 }, { "epoch": 20.43441636582431, "grad_norm": Infinity, "learning_rate": 0.00018102325878877827, "loss": 11.097, "step": 169810 }, { "epoch": 20.435619735258726, "grad_norm": Infinity, "learning_rate": 0.00018102102901533187, "loss": 11.1085, "step": 169820 }, { "epoch": 20.43682310469314, "grad_norm": Infinity, "learning_rate": 0.00018101879912462806, "loss": 11.0895, "step": 169830 }, { "epoch": 20.438026474127557, "grad_norm": Infinity, "learning_rate": 0.00018101656911667004, "loss": 11.0728, "step": 169840 }, { "epoch": 20.439229843561975, "grad_norm": Infinity, "learning_rate": 0.00018101433899146105, "loss": 11.1207, "step": 169850 }, { "epoch": 20.44043321299639, "grad_norm": Infinity, "learning_rate": 0.0001810121087490043, "loss": 11.1676, "step": 169860 }, { "epoch": 20.441636582430807, "grad_norm": Infinity, "learning_rate": 0.00018100987838930304, "loss": 11.1478, "step": 169870 }, { "epoch": 20.442839951865224, "grad_norm": Infinity, "learning_rate": 0.00018100764791236046, "loss": 11.2377, "step": 169880 }, { "epoch": 20.444043321299638, "grad_norm": Infinity, "learning_rate": 0.00018100541731817983, "loss": 11.1416, "step": 169890 }, { "epoch": 20.445246690734056, "grad_norm": Infinity, "learning_rate": 0.00018100318660676438, "loss": 11.0905, "step": 169900 }, { "epoch": 20.446450060168473, "grad_norm": Infinity, "learning_rate": 0.00018100095577811728, "loss": 11.3147, "step": 169910 }, { "epoch": 20.447653429602887, "grad_norm": Infinity, "learning_rate": 0.00018099872483224184, "loss": 11.1742, "step": 169920 }, { "epoch": 20.448856799037305, "grad_norm": Infinity, "learning_rate": 0.00018099649376914122, "loss": 11.0597, "step": 169930 }, { "epoch": 20.450060168471722, "grad_norm": Infinity, "learning_rate": 0.00018099426258881869, "loss": 11.1532, "step": 169940 }, { "epoch": 20.451263537906136, "grad_norm": Infinity, "learning_rate": 0.00018099203129127745, "loss": 11.1107, "step": 169950 }, { "epoch": 20.452466907340554, "grad_norm": Infinity, "learning_rate": 0.00018098979987652073, "loss": 11.1537, "step": 169960 }, { "epoch": 20.45367027677497, "grad_norm": Infinity, "learning_rate": 0.00018098756834455183, "loss": 11.0945, "step": 169970 }, { "epoch": 20.454873646209386, "grad_norm": Infinity, "learning_rate": 0.0001809853366953739, "loss": 11.2382, "step": 169980 }, { "epoch": 20.456077015643803, "grad_norm": Infinity, "learning_rate": 0.00018098310492899015, "loss": 11.1639, "step": 169990 }, { "epoch": 20.45728038507822, "grad_norm": Infinity, "learning_rate": 0.0001809808730454039, "loss": 11.147, "step": 170000 }, { "epoch": 20.458483754512635, "grad_norm": Infinity, "learning_rate": 0.00018097864104461834, "loss": 11.12, "step": 170010 }, { "epoch": 20.459687123947052, "grad_norm": Infinity, "learning_rate": 0.00018097640892663665, "loss": 11.1234, "step": 170020 }, { "epoch": 20.460890493381466, "grad_norm": Infinity, "learning_rate": 0.00018097417669146214, "loss": 11.184, "step": 170030 }, { "epoch": 20.462093862815884, "grad_norm": Infinity, "learning_rate": 0.00018097194433909798, "loss": 11.2513, "step": 170040 }, { "epoch": 20.4632972322503, "grad_norm": Infinity, "learning_rate": 0.00018096971186954742, "loss": 11.1851, "step": 170050 }, { "epoch": 20.464500601684716, "grad_norm": Infinity, "learning_rate": 0.00018096747928281372, "loss": 11.1678, "step": 170060 }, { "epoch": 20.465703971119133, "grad_norm": Infinity, "learning_rate": 0.0001809652465789001, "loss": 11.1197, "step": 170070 }, { "epoch": 20.46690734055355, "grad_norm": Infinity, "learning_rate": 0.00018096301375780975, "loss": 11.1274, "step": 170080 }, { "epoch": 20.468110709987965, "grad_norm": Infinity, "learning_rate": 0.00018096078081954593, "loss": 11.1493, "step": 170090 }, { "epoch": 20.469314079422382, "grad_norm": Infinity, "learning_rate": 0.0001809585477641119, "loss": 11.0888, "step": 170100 }, { "epoch": 20.4705174488568, "grad_norm": Infinity, "learning_rate": 0.00018095631459151082, "loss": 11.2197, "step": 170110 }, { "epoch": 20.471720818291214, "grad_norm": Infinity, "learning_rate": 0.000180954081301746, "loss": 11.113, "step": 170120 }, { "epoch": 20.47292418772563, "grad_norm": Infinity, "learning_rate": 0.00018095184789482062, "loss": 11.2568, "step": 170130 }, { "epoch": 20.47412755716005, "grad_norm": Infinity, "learning_rate": 0.00018094961437073795, "loss": 11.1379, "step": 170140 }, { "epoch": 20.475330926594463, "grad_norm": Infinity, "learning_rate": 0.0001809473807295012, "loss": 11.1916, "step": 170150 }, { "epoch": 20.47653429602888, "grad_norm": Infinity, "learning_rate": 0.00018094514697111357, "loss": 11.0953, "step": 170160 }, { "epoch": 20.477737665463298, "grad_norm": Infinity, "learning_rate": 0.00018094291309557836, "loss": 11.1652, "step": 170170 }, { "epoch": 20.478941034897712, "grad_norm": Infinity, "learning_rate": 0.00018094067910289876, "loss": 11.1372, "step": 170180 }, { "epoch": 20.48014440433213, "grad_norm": Infinity, "learning_rate": 0.000180938444993078, "loss": 11.1689, "step": 170190 }, { "epoch": 20.481347773766547, "grad_norm": Infinity, "learning_rate": 0.00018093621076611935, "loss": 11.2801, "step": 170200 }, { "epoch": 20.48255114320096, "grad_norm": Infinity, "learning_rate": 0.000180933976422026, "loss": 11.1455, "step": 170210 }, { "epoch": 20.48375451263538, "grad_norm": Infinity, "learning_rate": 0.00018093174196080124, "loss": 11.1376, "step": 170220 }, { "epoch": 20.484957882069796, "grad_norm": Infinity, "learning_rate": 0.00018092950738244823, "loss": 11.0514, "step": 170230 }, { "epoch": 20.48616125150421, "grad_norm": Infinity, "learning_rate": 0.00018092727268697028, "loss": 11.0518, "step": 170240 }, { "epoch": 20.487364620938628, "grad_norm": Infinity, "learning_rate": 0.00018092503787437054, "loss": 11.1309, "step": 170250 }, { "epoch": 20.488567990373046, "grad_norm": Infinity, "learning_rate": 0.0001809228029446523, "loss": 11.0402, "step": 170260 }, { "epoch": 20.48977135980746, "grad_norm": Infinity, "learning_rate": 0.00018092056789781883, "loss": 11.1491, "step": 170270 }, { "epoch": 20.490974729241877, "grad_norm": Infinity, "learning_rate": 0.0001809183327338733, "loss": 11.2244, "step": 170280 }, { "epoch": 20.492178098676295, "grad_norm": Infinity, "learning_rate": 0.0001809160974528189, "loss": 11.1063, "step": 170290 }, { "epoch": 20.49338146811071, "grad_norm": Infinity, "learning_rate": 0.00018091386205465901, "loss": 11.2281, "step": 170300 }, { "epoch": 20.494584837545126, "grad_norm": Infinity, "learning_rate": 0.00018091162653939677, "loss": 11.1546, "step": 170310 }, { "epoch": 20.495788206979544, "grad_norm": Infinity, "learning_rate": 0.0001809093909070354, "loss": 11.0695, "step": 170320 }, { "epoch": 20.496991576413958, "grad_norm": Infinity, "learning_rate": 0.0001809071551575782, "loss": 10.9658, "step": 170330 }, { "epoch": 20.498194945848375, "grad_norm": Infinity, "learning_rate": 0.00018090491929102833, "loss": 11.1502, "step": 170340 }, { "epoch": 20.499398315282793, "grad_norm": Infinity, "learning_rate": 0.00018090268330738912, "loss": 11.1716, "step": 170350 }, { "epoch": 20.500601684717207, "grad_norm": Infinity, "learning_rate": 0.00018090044720666372, "loss": 11.1951, "step": 170360 }, { "epoch": 20.501805054151625, "grad_norm": Infinity, "learning_rate": 0.00018089821098885538, "loss": 11.2606, "step": 170370 }, { "epoch": 20.503008423586042, "grad_norm": Infinity, "learning_rate": 0.0001808959746539674, "loss": 11.155, "step": 170380 }, { "epoch": 20.504211793020456, "grad_norm": Infinity, "learning_rate": 0.00018089373820200292, "loss": 11.0203, "step": 170390 }, { "epoch": 20.505415162454874, "grad_norm": Infinity, "learning_rate": 0.00018089150163296526, "loss": 11.0893, "step": 170400 }, { "epoch": 20.50661853188929, "grad_norm": Infinity, "learning_rate": 0.00018088926494685766, "loss": 11.052, "step": 170410 }, { "epoch": 20.507821901323705, "grad_norm": Infinity, "learning_rate": 0.00018088702814368328, "loss": 11.1476, "step": 170420 }, { "epoch": 20.509025270758123, "grad_norm": Infinity, "learning_rate": 0.0001808847912234454, "loss": 11.0949, "step": 170430 }, { "epoch": 20.51022864019254, "grad_norm": Infinity, "learning_rate": 0.00018088255418614727, "loss": 11.1898, "step": 170440 }, { "epoch": 20.511432009626954, "grad_norm": Infinity, "learning_rate": 0.00018088031703179208, "loss": 11.2116, "step": 170450 }, { "epoch": 20.512635379061372, "grad_norm": Infinity, "learning_rate": 0.00018087807976038315, "loss": 11.143, "step": 170460 }, { "epoch": 20.51383874849579, "grad_norm": Infinity, "learning_rate": 0.00018087584237192364, "loss": 11.2086, "step": 170470 }, { "epoch": 20.515042117930204, "grad_norm": Infinity, "learning_rate": 0.00018087360486641684, "loss": 11.0903, "step": 170480 }, { "epoch": 20.51624548736462, "grad_norm": Infinity, "learning_rate": 0.00018087136724386594, "loss": 11.0871, "step": 170490 }, { "epoch": 20.51744885679904, "grad_norm": Infinity, "learning_rate": 0.00018086912950427423, "loss": 11.1235, "step": 170500 }, { "epoch": 20.518652226233453, "grad_norm": Infinity, "learning_rate": 0.00018086689164764492, "loss": 11.124, "step": 170510 }, { "epoch": 20.51985559566787, "grad_norm": Infinity, "learning_rate": 0.00018086465367398127, "loss": 11.1239, "step": 170520 }, { "epoch": 20.521058965102288, "grad_norm": Infinity, "learning_rate": 0.00018086241558328648, "loss": 11.1464, "step": 170530 }, { "epoch": 20.522262334536702, "grad_norm": Infinity, "learning_rate": 0.0001808601773755638, "loss": 11.0619, "step": 170540 }, { "epoch": 20.52346570397112, "grad_norm": Infinity, "learning_rate": 0.0001808579390508165, "loss": 11.1267, "step": 170550 }, { "epoch": 20.524669073405537, "grad_norm": Infinity, "learning_rate": 0.00018085570060904778, "loss": 11.1167, "step": 170560 }, { "epoch": 20.52587244283995, "grad_norm": Infinity, "learning_rate": 0.0001808534620502609, "loss": 11.1912, "step": 170570 }, { "epoch": 20.52707581227437, "grad_norm": Infinity, "learning_rate": 0.00018085122337445911, "loss": 11.0377, "step": 170580 }, { "epoch": 20.528279181708786, "grad_norm": Infinity, "learning_rate": 0.00018084898458164562, "loss": 11.1624, "step": 170590 }, { "epoch": 20.5294825511432, "grad_norm": Infinity, "learning_rate": 0.0001808467456718237, "loss": 11.1101, "step": 170600 }, { "epoch": 20.530685920577618, "grad_norm": Infinity, "learning_rate": 0.0001808445066449966, "loss": 11.1022, "step": 170610 }, { "epoch": 20.531889290012035, "grad_norm": Infinity, "learning_rate": 0.00018084226750116753, "loss": 11.1836, "step": 170620 }, { "epoch": 20.53309265944645, "grad_norm": Infinity, "learning_rate": 0.0001808400282403397, "loss": 11.1366, "step": 170630 }, { "epoch": 20.534296028880867, "grad_norm": Infinity, "learning_rate": 0.00018083778886251642, "loss": 11.1776, "step": 170640 }, { "epoch": 20.535499398315284, "grad_norm": Infinity, "learning_rate": 0.00018083554936770092, "loss": 11.1768, "step": 170650 }, { "epoch": 20.5367027677497, "grad_norm": Infinity, "learning_rate": 0.0001808333097558964, "loss": 11.202, "step": 170660 }, { "epoch": 20.537906137184116, "grad_norm": Infinity, "learning_rate": 0.00018083107002710613, "loss": 11.2465, "step": 170670 }, { "epoch": 20.53910950661853, "grad_norm": Infinity, "learning_rate": 0.00018082883018133333, "loss": 11.197, "step": 170680 }, { "epoch": 20.540312876052948, "grad_norm": Infinity, "learning_rate": 0.0001808265902185813, "loss": 11.1777, "step": 170690 }, { "epoch": 20.541516245487365, "grad_norm": Infinity, "learning_rate": 0.0001808243501388532, "loss": 11.2165, "step": 170700 }, { "epoch": 20.54271961492178, "grad_norm": Infinity, "learning_rate": 0.00018082210994215233, "loss": 11.1918, "step": 170710 }, { "epoch": 20.543922984356197, "grad_norm": Infinity, "learning_rate": 0.0001808198696284819, "loss": 11.1845, "step": 170720 }, { "epoch": 20.545126353790614, "grad_norm": Infinity, "learning_rate": 0.00018081762919784514, "loss": 11.026, "step": 170730 }, { "epoch": 20.54632972322503, "grad_norm": Infinity, "learning_rate": 0.00018081538865024534, "loss": 11.1739, "step": 170740 }, { "epoch": 20.547533092659446, "grad_norm": Infinity, "learning_rate": 0.00018081314798568574, "loss": 11.2232, "step": 170750 }, { "epoch": 20.548736462093864, "grad_norm": Infinity, "learning_rate": 0.00018081090720416953, "loss": 11.1036, "step": 170760 }, { "epoch": 20.549939831528278, "grad_norm": Infinity, "learning_rate": 0.00018080866630569998, "loss": 11.2451, "step": 170770 }, { "epoch": 20.551143200962695, "grad_norm": Infinity, "learning_rate": 0.0001808064252902804, "loss": 11.0609, "step": 170780 }, { "epoch": 20.552346570397113, "grad_norm": Infinity, "learning_rate": 0.00018080418415791392, "loss": 11.1309, "step": 170790 }, { "epoch": 20.553549939831527, "grad_norm": Infinity, "learning_rate": 0.00018080194290860386, "loss": 11.1908, "step": 170800 }, { "epoch": 20.554753309265944, "grad_norm": Infinity, "learning_rate": 0.0001807997015423534, "loss": 11.1889, "step": 170810 }, { "epoch": 20.555956678700362, "grad_norm": Infinity, "learning_rate": 0.00018079746005916588, "loss": 11.1928, "step": 170820 }, { "epoch": 20.557160048134776, "grad_norm": Infinity, "learning_rate": 0.00018079521845904445, "loss": 11.2115, "step": 170830 }, { "epoch": 20.558363417569193, "grad_norm": Infinity, "learning_rate": 0.0001807929767419924, "loss": 11.0234, "step": 170840 }, { "epoch": 20.55956678700361, "grad_norm": Infinity, "learning_rate": 0.00018079073490801295, "loss": 11.1885, "step": 170850 }, { "epoch": 20.560770156438025, "grad_norm": Infinity, "learning_rate": 0.00018078849295710938, "loss": 11.0748, "step": 170860 }, { "epoch": 20.561973525872443, "grad_norm": Infinity, "learning_rate": 0.0001807862508892849, "loss": 11.0524, "step": 170870 }, { "epoch": 20.56317689530686, "grad_norm": Infinity, "learning_rate": 0.0001807840087045428, "loss": 11.0896, "step": 170880 }, { "epoch": 20.564380264741274, "grad_norm": Infinity, "learning_rate": 0.00018078176640288625, "loss": 11.225, "step": 170890 }, { "epoch": 20.56558363417569, "grad_norm": Infinity, "learning_rate": 0.00018077952398431858, "loss": 11.1962, "step": 170900 }, { "epoch": 20.56678700361011, "grad_norm": Infinity, "learning_rate": 0.00018077728144884296, "loss": 11.1384, "step": 170910 }, { "epoch": 20.567990373044523, "grad_norm": Infinity, "learning_rate": 0.0001807750387964627, "loss": 11.1526, "step": 170920 }, { "epoch": 20.56919374247894, "grad_norm": Infinity, "learning_rate": 0.000180772796027181, "loss": 11.1168, "step": 170930 }, { "epoch": 20.57039711191336, "grad_norm": Infinity, "learning_rate": 0.00018077055314100112, "loss": 11.0887, "step": 170940 }, { "epoch": 20.571600481347772, "grad_norm": Infinity, "learning_rate": 0.00018076831013792631, "loss": 11.2093, "step": 170950 }, { "epoch": 20.57280385078219, "grad_norm": Infinity, "learning_rate": 0.00018076606701795985, "loss": 11.085, "step": 170960 }, { "epoch": 20.574007220216608, "grad_norm": Infinity, "learning_rate": 0.0001807638237811049, "loss": 11.0918, "step": 170970 }, { "epoch": 20.57521058965102, "grad_norm": Infinity, "learning_rate": 0.00018076158042736477, "loss": 11.0611, "step": 170980 }, { "epoch": 20.57641395908544, "grad_norm": Infinity, "learning_rate": 0.0001807593369567427, "loss": 11.2597, "step": 170990 }, { "epoch": 20.577617328519857, "grad_norm": Infinity, "learning_rate": 0.00018075709336924195, "loss": 11.1704, "step": 171000 }, { "epoch": 20.57882069795427, "grad_norm": Infinity, "learning_rate": 0.0001807548496648657, "loss": 11.0639, "step": 171010 }, { "epoch": 20.58002406738869, "grad_norm": Infinity, "learning_rate": 0.00018075260584361728, "loss": 11.143, "step": 171020 }, { "epoch": 20.581227436823106, "grad_norm": Infinity, "learning_rate": 0.0001807503619054999, "loss": 11.1953, "step": 171030 }, { "epoch": 20.58243080625752, "grad_norm": Infinity, "learning_rate": 0.0001807481178505168, "loss": 11.1244, "step": 171040 }, { "epoch": 20.583634175691937, "grad_norm": Infinity, "learning_rate": 0.00018074587367867127, "loss": 11.1587, "step": 171050 }, { "epoch": 20.584837545126355, "grad_norm": Infinity, "learning_rate": 0.00018074362938996647, "loss": 11.0961, "step": 171060 }, { "epoch": 20.58604091456077, "grad_norm": Infinity, "learning_rate": 0.00018074138498440573, "loss": 11.0667, "step": 171070 }, { "epoch": 20.587244283995187, "grad_norm": Infinity, "learning_rate": 0.00018073914046199226, "loss": 11.1862, "step": 171080 }, { "epoch": 20.588447653429604, "grad_norm": Infinity, "learning_rate": 0.00018073689582272934, "loss": 11.1575, "step": 171090 }, { "epoch": 20.589651022864018, "grad_norm": Infinity, "learning_rate": 0.0001807346510666202, "loss": 11.1074, "step": 171100 }, { "epoch": 20.590854392298436, "grad_norm": Infinity, "learning_rate": 0.00018073240619366807, "loss": 11.1979, "step": 171110 }, { "epoch": 20.592057761732853, "grad_norm": Infinity, "learning_rate": 0.00018073016120387622, "loss": 11.1893, "step": 171120 }, { "epoch": 20.593261131167267, "grad_norm": Infinity, "learning_rate": 0.0001807279160972479, "loss": 11.2165, "step": 171130 }, { "epoch": 20.594464500601685, "grad_norm": Infinity, "learning_rate": 0.00018072567087378636, "loss": 11.2428, "step": 171140 }, { "epoch": 20.595667870036102, "grad_norm": Infinity, "learning_rate": 0.00018072342553349482, "loss": 11.1328, "step": 171150 }, { "epoch": 20.596871239470516, "grad_norm": Infinity, "learning_rate": 0.00018072118007637656, "loss": 11.1582, "step": 171160 }, { "epoch": 20.598074608904934, "grad_norm": Infinity, "learning_rate": 0.00018071893450243484, "loss": 11.1275, "step": 171170 }, { "epoch": 20.59927797833935, "grad_norm": Infinity, "learning_rate": 0.0001807166888116729, "loss": 11.1685, "step": 171180 }, { "epoch": 20.600481347773766, "grad_norm": Infinity, "learning_rate": 0.000180714443004094, "loss": 11.1814, "step": 171190 }, { "epoch": 20.601684717208183, "grad_norm": Infinity, "learning_rate": 0.0001807121970797013, "loss": 11.1649, "step": 171200 }, { "epoch": 20.6028880866426, "grad_norm": Infinity, "learning_rate": 0.0001807099510384982, "loss": 11.2236, "step": 171210 }, { "epoch": 20.604091456077015, "grad_norm": Infinity, "learning_rate": 0.0001807077048804878, "loss": 11.2317, "step": 171220 }, { "epoch": 20.605294825511432, "grad_norm": Infinity, "learning_rate": 0.00018070545860567348, "loss": 11.2011, "step": 171230 }, { "epoch": 20.60649819494585, "grad_norm": Infinity, "learning_rate": 0.00018070321221405844, "loss": 11.0179, "step": 171240 }, { "epoch": 20.607701564380264, "grad_norm": Infinity, "learning_rate": 0.0001807009657056459, "loss": 11.1776, "step": 171250 }, { "epoch": 20.60890493381468, "grad_norm": Infinity, "learning_rate": 0.00018069871908043915, "loss": 11.2294, "step": 171260 }, { "epoch": 20.6101083032491, "grad_norm": Infinity, "learning_rate": 0.0001806964723384414, "loss": 11.1094, "step": 171270 }, { "epoch": 20.611311672683513, "grad_norm": Infinity, "learning_rate": 0.00018069422547965598, "loss": 11.057, "step": 171280 }, { "epoch": 20.61251504211793, "grad_norm": Infinity, "learning_rate": 0.00018069197850408605, "loss": 11.1786, "step": 171290 }, { "epoch": 20.613718411552348, "grad_norm": Infinity, "learning_rate": 0.00018068973141173493, "loss": 11.0535, "step": 171300 }, { "epoch": 20.614921780986762, "grad_norm": Infinity, "learning_rate": 0.00018068748420260584, "loss": 11.1982, "step": 171310 }, { "epoch": 20.61612515042118, "grad_norm": Infinity, "learning_rate": 0.00018068523687670205, "loss": 11.1507, "step": 171320 }, { "epoch": 20.617328519855597, "grad_norm": Infinity, "learning_rate": 0.00018068298943402678, "loss": 11.2291, "step": 171330 }, { "epoch": 20.61853188929001, "grad_norm": Infinity, "learning_rate": 0.00018068074187458332, "loss": 11.1811, "step": 171340 }, { "epoch": 20.61973525872443, "grad_norm": Infinity, "learning_rate": 0.0001806784941983749, "loss": 11.1592, "step": 171350 }, { "epoch": 20.620938628158846, "grad_norm": Infinity, "learning_rate": 0.0001806762464054048, "loss": 11.1216, "step": 171360 }, { "epoch": 20.62214199759326, "grad_norm": Infinity, "learning_rate": 0.00018067399849567623, "loss": 11.1326, "step": 171370 }, { "epoch": 20.623345367027678, "grad_norm": Infinity, "learning_rate": 0.00018067175046919246, "loss": 11.1273, "step": 171380 }, { "epoch": 20.624548736462096, "grad_norm": Infinity, "learning_rate": 0.0001806695023259568, "loss": 11.1192, "step": 171390 }, { "epoch": 20.62575210589651, "grad_norm": Infinity, "learning_rate": 0.0001806672540659724, "loss": 11.2, "step": 171400 }, { "epoch": 20.626955475330927, "grad_norm": Infinity, "learning_rate": 0.00018066500568924258, "loss": 11.087, "step": 171410 }, { "epoch": 20.628158844765345, "grad_norm": Infinity, "learning_rate": 0.0001806627571957706, "loss": 11.1723, "step": 171420 }, { "epoch": 20.62936221419976, "grad_norm": Infinity, "learning_rate": 0.00018066050858555967, "loss": 11.114, "step": 171430 }, { "epoch": 20.630565583634176, "grad_norm": Infinity, "learning_rate": 0.00018065825985861308, "loss": 11.0254, "step": 171440 }, { "epoch": 20.63176895306859, "grad_norm": Infinity, "learning_rate": 0.00018065601101493407, "loss": 11.2022, "step": 171450 }, { "epoch": 20.632972322503008, "grad_norm": Infinity, "learning_rate": 0.00018065376205452592, "loss": 11.153, "step": 171460 }, { "epoch": 20.634175691937426, "grad_norm": Infinity, "learning_rate": 0.00018065151297739184, "loss": 11.0004, "step": 171470 }, { "epoch": 20.63537906137184, "grad_norm": Infinity, "learning_rate": 0.0001806492637835351, "loss": 11.1548, "step": 171480 }, { "epoch": 20.636582430806257, "grad_norm": Infinity, "learning_rate": 0.00018064701447295897, "loss": 11.223, "step": 171490 }, { "epoch": 20.637785800240675, "grad_norm": Infinity, "learning_rate": 0.00018064476504566672, "loss": 11.1358, "step": 171500 }, { "epoch": 20.63898916967509, "grad_norm": Infinity, "learning_rate": 0.00018064251550166158, "loss": 11.1763, "step": 171510 }, { "epoch": 20.640192539109506, "grad_norm": Infinity, "learning_rate": 0.0001806402658409468, "loss": 11.2437, "step": 171520 }, { "epoch": 20.641395908543924, "grad_norm": Infinity, "learning_rate": 0.00018063801606352567, "loss": 11.0512, "step": 171530 }, { "epoch": 20.642599277978338, "grad_norm": Infinity, "learning_rate": 0.00018063576616940137, "loss": 11.046, "step": 171540 }, { "epoch": 20.643802647412755, "grad_norm": Infinity, "learning_rate": 0.00018063351615857725, "loss": 11.0892, "step": 171550 }, { "epoch": 20.645006016847173, "grad_norm": Infinity, "learning_rate": 0.0001806312660310565, "loss": 11.1015, "step": 171560 }, { "epoch": 20.646209386281587, "grad_norm": Infinity, "learning_rate": 0.00018062901578684242, "loss": 11.0387, "step": 171570 }, { "epoch": 20.647412755716005, "grad_norm": Infinity, "learning_rate": 0.00018062676542593824, "loss": 11.2202, "step": 171580 }, { "epoch": 20.648616125150422, "grad_norm": Infinity, "learning_rate": 0.00018062451494834723, "loss": 11.2432, "step": 171590 }, { "epoch": 20.649819494584836, "grad_norm": Infinity, "learning_rate": 0.00018062226435407261, "loss": 11.0643, "step": 171600 }, { "epoch": 20.651022864019254, "grad_norm": Infinity, "learning_rate": 0.0001806200136431177, "loss": 11.1514, "step": 171610 }, { "epoch": 20.65222623345367, "grad_norm": Infinity, "learning_rate": 0.00018061776281548574, "loss": 11.2147, "step": 171620 }, { "epoch": 20.653429602888085, "grad_norm": Infinity, "learning_rate": 0.00018061551187117995, "loss": 11.0922, "step": 171630 }, { "epoch": 20.654632972322503, "grad_norm": Infinity, "learning_rate": 0.00018061326081020363, "loss": 11.2056, "step": 171640 }, { "epoch": 20.65583634175692, "grad_norm": Infinity, "learning_rate": 0.00018061100963256, "loss": 11.0293, "step": 171650 }, { "epoch": 20.657039711191334, "grad_norm": Infinity, "learning_rate": 0.00018060875833825232, "loss": 11.2296, "step": 171660 }, { "epoch": 20.658243080625752, "grad_norm": Infinity, "learning_rate": 0.00018060650692728388, "loss": 11.2358, "step": 171670 }, { "epoch": 20.65944645006017, "grad_norm": Infinity, "learning_rate": 0.00018060425539965795, "loss": 11.1237, "step": 171680 }, { "epoch": 20.660649819494584, "grad_norm": Infinity, "learning_rate": 0.0001806020037553777, "loss": 11.1111, "step": 171690 }, { "epoch": 20.661853188929, "grad_norm": Infinity, "learning_rate": 0.00018059975199444654, "loss": 11.1746, "step": 171700 }, { "epoch": 20.66305655836342, "grad_norm": Infinity, "learning_rate": 0.00018059750011686758, "loss": 11.0923, "step": 171710 }, { "epoch": 20.664259927797833, "grad_norm": Infinity, "learning_rate": 0.00018059524812264415, "loss": 11.0619, "step": 171720 }, { "epoch": 20.66546329723225, "grad_norm": Infinity, "learning_rate": 0.0001805929960117795, "loss": 11.2698, "step": 171730 }, { "epoch": 20.666666666666668, "grad_norm": Infinity, "learning_rate": 0.00018059074378427688, "loss": 11.1528, "step": 171740 }, { "epoch": 20.667870036101082, "grad_norm": Infinity, "learning_rate": 0.00018058849144013956, "loss": 11.1561, "step": 171750 }, { "epoch": 20.6690734055355, "grad_norm": Infinity, "learning_rate": 0.0001805862389793708, "loss": 11.1256, "step": 171760 }, { "epoch": 20.670276774969917, "grad_norm": Infinity, "learning_rate": 0.00018058398640197386, "loss": 11.1578, "step": 171770 }, { "epoch": 20.67148014440433, "grad_norm": Infinity, "learning_rate": 0.00018058173370795198, "loss": 11.0442, "step": 171780 }, { "epoch": 20.67268351383875, "grad_norm": Infinity, "learning_rate": 0.00018057948089730843, "loss": 11.1268, "step": 171790 }, { "epoch": 20.673886883273166, "grad_norm": Infinity, "learning_rate": 0.0001805772279700465, "loss": 11.0373, "step": 171800 }, { "epoch": 20.67509025270758, "grad_norm": Infinity, "learning_rate": 0.0001805749749261694, "loss": 11.1178, "step": 171810 }, { "epoch": 20.676293622141998, "grad_norm": Infinity, "learning_rate": 0.00018057272176568047, "loss": 11.2226, "step": 171820 }, { "epoch": 20.677496991576415, "grad_norm": Infinity, "learning_rate": 0.00018057046848858288, "loss": 11.1206, "step": 171830 }, { "epoch": 20.67870036101083, "grad_norm": Infinity, "learning_rate": 0.00018056821509487993, "loss": 11.0796, "step": 171840 }, { "epoch": 20.679903730445247, "grad_norm": Infinity, "learning_rate": 0.0001805659615845749, "loss": 11.0635, "step": 171850 }, { "epoch": 20.681107099879664, "grad_norm": Infinity, "learning_rate": 0.00018056370795767101, "loss": 11.1728, "step": 171860 }, { "epoch": 20.68231046931408, "grad_norm": Infinity, "learning_rate": 0.00018056145421417155, "loss": 11.1653, "step": 171870 }, { "epoch": 20.683513838748496, "grad_norm": Infinity, "learning_rate": 0.0001805592003540798, "loss": 11.1041, "step": 171880 }, { "epoch": 20.684717208182914, "grad_norm": Infinity, "learning_rate": 0.00018055694637739898, "loss": 11.285, "step": 171890 }, { "epoch": 20.685920577617328, "grad_norm": Infinity, "learning_rate": 0.00018055469228413234, "loss": 11.2047, "step": 171900 }, { "epoch": 20.687123947051745, "grad_norm": Infinity, "learning_rate": 0.0001805524380742832, "loss": 11.1747, "step": 171910 }, { "epoch": 20.688327316486163, "grad_norm": Infinity, "learning_rate": 0.0001805501837478548, "loss": 11.1744, "step": 171920 }, { "epoch": 20.689530685920577, "grad_norm": Infinity, "learning_rate": 0.0001805479293048504, "loss": 11.276, "step": 171930 }, { "epoch": 20.690734055354994, "grad_norm": Infinity, "learning_rate": 0.00018054567474527325, "loss": 11.0996, "step": 171940 }, { "epoch": 20.691937424789412, "grad_norm": Infinity, "learning_rate": 0.00018054342006912664, "loss": 11.1789, "step": 171950 }, { "epoch": 20.693140794223826, "grad_norm": Infinity, "learning_rate": 0.00018054116527641378, "loss": 11.0768, "step": 171960 }, { "epoch": 20.694344163658243, "grad_norm": Infinity, "learning_rate": 0.00018053891036713798, "loss": 11.0863, "step": 171970 }, { "epoch": 20.69554753309266, "grad_norm": Infinity, "learning_rate": 0.0001805366553413025, "loss": 11.0095, "step": 171980 }, { "epoch": 20.696750902527075, "grad_norm": Infinity, "learning_rate": 0.0001805344001989106, "loss": 11.1681, "step": 171990 }, { "epoch": 20.697954271961493, "grad_norm": Infinity, "learning_rate": 0.00018053214493996553, "loss": 11.1088, "step": 172000 }, { "epoch": 20.69915764139591, "grad_norm": Infinity, "learning_rate": 0.00018052988956447058, "loss": 11.1341, "step": 172010 }, { "epoch": 20.700361010830324, "grad_norm": Infinity, "learning_rate": 0.000180527634072429, "loss": 11.1545, "step": 172020 }, { "epoch": 20.70156438026474, "grad_norm": Infinity, "learning_rate": 0.00018052537846384401, "loss": 11.1379, "step": 172030 }, { "epoch": 20.70276774969916, "grad_norm": Infinity, "learning_rate": 0.00018052312273871893, "loss": 11.1307, "step": 172040 }, { "epoch": 20.703971119133573, "grad_norm": Infinity, "learning_rate": 0.00018052086689705705, "loss": 11.2065, "step": 172050 }, { "epoch": 20.70517448856799, "grad_norm": Infinity, "learning_rate": 0.00018051861093886155, "loss": 11.1411, "step": 172060 }, { "epoch": 20.706377858002405, "grad_norm": Infinity, "learning_rate": 0.00018051635486413578, "loss": 11.169, "step": 172070 }, { "epoch": 20.707581227436823, "grad_norm": Infinity, "learning_rate": 0.00018051409867288293, "loss": 11.1897, "step": 172080 }, { "epoch": 20.70878459687124, "grad_norm": Infinity, "learning_rate": 0.00018051184236510633, "loss": 11.0845, "step": 172090 }, { "epoch": 20.709987966305654, "grad_norm": Infinity, "learning_rate": 0.0001805095859408092, "loss": 11.0914, "step": 172100 }, { "epoch": 20.71119133574007, "grad_norm": Infinity, "learning_rate": 0.00018050732939999483, "loss": 11.1459, "step": 172110 }, { "epoch": 20.71239470517449, "grad_norm": Infinity, "learning_rate": 0.00018050507274266648, "loss": 11.2425, "step": 172120 }, { "epoch": 20.713598074608903, "grad_norm": Infinity, "learning_rate": 0.0001805028159688274, "loss": 11.1846, "step": 172130 }, { "epoch": 20.71480144404332, "grad_norm": Infinity, "learning_rate": 0.00018050055907848088, "loss": 11.0592, "step": 172140 }, { "epoch": 20.71600481347774, "grad_norm": Infinity, "learning_rate": 0.00018049830207163015, "loss": 11.1487, "step": 172150 }, { "epoch": 20.717208182912152, "grad_norm": Infinity, "learning_rate": 0.00018049604494827854, "loss": 11.1103, "step": 172160 }, { "epoch": 20.71841155234657, "grad_norm": Infinity, "learning_rate": 0.00018049378770842927, "loss": 11.1495, "step": 172170 }, { "epoch": 20.719614921780988, "grad_norm": Infinity, "learning_rate": 0.0001804915303520856, "loss": 11.1195, "step": 172180 }, { "epoch": 20.7208182912154, "grad_norm": Infinity, "learning_rate": 0.00018048927287925084, "loss": 11.0399, "step": 172190 }, { "epoch": 20.72202166064982, "grad_norm": Infinity, "learning_rate": 0.00018048701528992819, "loss": 11.1366, "step": 172200 }, { "epoch": 20.723225030084237, "grad_norm": Infinity, "learning_rate": 0.000180484757584121, "loss": 11.1213, "step": 172210 }, { "epoch": 20.72442839951865, "grad_norm": Infinity, "learning_rate": 0.00018048249976183246, "loss": 10.9934, "step": 172220 }, { "epoch": 20.72563176895307, "grad_norm": Infinity, "learning_rate": 0.00018048024182306592, "loss": 11.1397, "step": 172230 }, { "epoch": 20.726835138387486, "grad_norm": Infinity, "learning_rate": 0.00018047798376782454, "loss": 11.0982, "step": 172240 }, { "epoch": 20.7280385078219, "grad_norm": Infinity, "learning_rate": 0.0001804757255961117, "loss": 11.1895, "step": 172250 }, { "epoch": 20.729241877256317, "grad_norm": Infinity, "learning_rate": 0.00018047346730793058, "loss": 11.164, "step": 172260 }, { "epoch": 20.730445246690735, "grad_norm": Infinity, "learning_rate": 0.0001804712089032845, "loss": 11.2391, "step": 172270 }, { "epoch": 20.73164861612515, "grad_norm": Infinity, "learning_rate": 0.0001804689503821767, "loss": 11.1563, "step": 172280 }, { "epoch": 20.732851985559567, "grad_norm": Infinity, "learning_rate": 0.00018046669174461046, "loss": 11.1299, "step": 172290 }, { "epoch": 20.734055354993984, "grad_norm": Infinity, "learning_rate": 0.00018046443299058907, "loss": 11.1237, "step": 172300 }, { "epoch": 20.735258724428398, "grad_norm": Infinity, "learning_rate": 0.00018046217412011578, "loss": 11.1034, "step": 172310 }, { "epoch": 20.736462093862816, "grad_norm": Infinity, "learning_rate": 0.00018045991513319383, "loss": 11.186, "step": 172320 }, { "epoch": 20.737665463297233, "grad_norm": Infinity, "learning_rate": 0.00018045765602982657, "loss": 11.1002, "step": 172330 }, { "epoch": 20.738868832731647, "grad_norm": Infinity, "learning_rate": 0.00018045539681001718, "loss": 11.045, "step": 172340 }, { "epoch": 20.740072202166065, "grad_norm": Infinity, "learning_rate": 0.00018045313747376898, "loss": 11.2122, "step": 172350 }, { "epoch": 20.741275571600482, "grad_norm": Infinity, "learning_rate": 0.0001804508780210852, "loss": 11.0942, "step": 172360 }, { "epoch": 20.742478941034896, "grad_norm": Infinity, "learning_rate": 0.00018044861845196915, "loss": 11.0911, "step": 172370 }, { "epoch": 20.743682310469314, "grad_norm": Infinity, "learning_rate": 0.0001804463587664241, "loss": 11.2691, "step": 172380 }, { "epoch": 20.74488567990373, "grad_norm": Infinity, "learning_rate": 0.0001804440989644533, "loss": 11.1032, "step": 172390 }, { "epoch": 20.746089049338146, "grad_norm": Infinity, "learning_rate": 0.00018044183904606005, "loss": 10.9427, "step": 172400 }, { "epoch": 20.747292418772563, "grad_norm": Infinity, "learning_rate": 0.00018043957901124754, "loss": 11.1419, "step": 172410 }, { "epoch": 20.74849578820698, "grad_norm": Infinity, "learning_rate": 0.00018043731886001915, "loss": 11.0311, "step": 172420 }, { "epoch": 20.749699157641395, "grad_norm": Infinity, "learning_rate": 0.0001804350585923781, "loss": 11.1091, "step": 172430 }, { "epoch": 20.750902527075812, "grad_norm": Infinity, "learning_rate": 0.00018043279820832762, "loss": 11.2184, "step": 172440 }, { "epoch": 20.75210589651023, "grad_norm": Infinity, "learning_rate": 0.00018043053770787107, "loss": 11.0518, "step": 172450 }, { "epoch": 20.753309265944644, "grad_norm": Infinity, "learning_rate": 0.00018042827709101165, "loss": 11.1686, "step": 172460 }, { "epoch": 20.75451263537906, "grad_norm": Infinity, "learning_rate": 0.00018042601635775266, "loss": 11.1105, "step": 172470 }, { "epoch": 20.75571600481348, "grad_norm": Infinity, "learning_rate": 0.00018042375550809738, "loss": 11.1936, "step": 172480 }, { "epoch": 20.756919374247893, "grad_norm": Infinity, "learning_rate": 0.00018042149454204905, "loss": 11.0519, "step": 172490 }, { "epoch": 20.75812274368231, "grad_norm": Infinity, "learning_rate": 0.00018041923345961096, "loss": 11.1132, "step": 172500 }, { "epoch": 20.759326113116728, "grad_norm": Infinity, "learning_rate": 0.0001804169722607864, "loss": 11.1417, "step": 172510 }, { "epoch": 20.760529482551142, "grad_norm": Infinity, "learning_rate": 0.00018041471094557864, "loss": 11.1195, "step": 172520 }, { "epoch": 20.76173285198556, "grad_norm": Infinity, "learning_rate": 0.0001804124495139909, "loss": 11.1559, "step": 172530 }, { "epoch": 20.762936221419977, "grad_norm": Infinity, "learning_rate": 0.00018041018796602652, "loss": 11.1252, "step": 172540 }, { "epoch": 20.76413959085439, "grad_norm": Infinity, "learning_rate": 0.00018040792630168875, "loss": 11.1344, "step": 172550 }, { "epoch": 20.76534296028881, "grad_norm": Infinity, "learning_rate": 0.00018040566452098085, "loss": 11.1014, "step": 172560 }, { "epoch": 20.766546329723226, "grad_norm": Infinity, "learning_rate": 0.0001804034026239061, "loss": 11.2088, "step": 172570 }, { "epoch": 20.76774969915764, "grad_norm": Infinity, "learning_rate": 0.00018040114061046775, "loss": 11.0848, "step": 172580 }, { "epoch": 20.768953068592058, "grad_norm": Infinity, "learning_rate": 0.00018039887848066914, "loss": 11.1741, "step": 172590 }, { "epoch": 20.770156438026476, "grad_norm": Infinity, "learning_rate": 0.00018039661623451346, "loss": 11.0817, "step": 172600 }, { "epoch": 20.77135980746089, "grad_norm": Infinity, "learning_rate": 0.00018039435387200408, "loss": 11.1698, "step": 172610 }, { "epoch": 20.772563176895307, "grad_norm": Infinity, "learning_rate": 0.0001803920913931442, "loss": 11.2413, "step": 172620 }, { "epoch": 20.773766546329725, "grad_norm": Infinity, "learning_rate": 0.00018038982879793708, "loss": 11.2063, "step": 172630 }, { "epoch": 20.77496991576414, "grad_norm": Infinity, "learning_rate": 0.00018038756608638608, "loss": 11.2472, "step": 172640 }, { "epoch": 20.776173285198556, "grad_norm": Infinity, "learning_rate": 0.0001803853032584944, "loss": 11.2178, "step": 172650 }, { "epoch": 20.777376654632974, "grad_norm": Infinity, "learning_rate": 0.00018038304031426535, "loss": 11.1067, "step": 172660 }, { "epoch": 20.778580024067388, "grad_norm": Infinity, "learning_rate": 0.0001803807772537022, "loss": 11.0927, "step": 172670 }, { "epoch": 20.779783393501805, "grad_norm": Infinity, "learning_rate": 0.00018037851407680822, "loss": 11.0596, "step": 172680 }, { "epoch": 20.780986762936223, "grad_norm": Infinity, "learning_rate": 0.00018037625078358666, "loss": 11.2017, "step": 172690 }, { "epoch": 20.782190132370637, "grad_norm": Infinity, "learning_rate": 0.00018037398737404082, "loss": 11.2352, "step": 172700 }, { "epoch": 20.783393501805055, "grad_norm": Infinity, "learning_rate": 0.000180371723848174, "loss": 11.1597, "step": 172710 }, { "epoch": 20.784596871239472, "grad_norm": Infinity, "learning_rate": 0.00018036946020598947, "loss": 11.1804, "step": 172720 }, { "epoch": 20.785800240673886, "grad_norm": Infinity, "learning_rate": 0.00018036719644749045, "loss": 11.2442, "step": 172730 }, { "epoch": 20.787003610108304, "grad_norm": Infinity, "learning_rate": 0.00018036493257268026, "loss": 11.0907, "step": 172740 }, { "epoch": 20.78820697954272, "grad_norm": Infinity, "learning_rate": 0.00018036266858156218, "loss": 11.0912, "step": 172750 }, { "epoch": 20.789410348977135, "grad_norm": Infinity, "learning_rate": 0.0001803604044741395, "loss": 11.0766, "step": 172760 }, { "epoch": 20.790613718411553, "grad_norm": Infinity, "learning_rate": 0.00018035814025041543, "loss": 11.1466, "step": 172770 }, { "epoch": 20.79181708784597, "grad_norm": Infinity, "learning_rate": 0.00018035587591039333, "loss": 11.1713, "step": 172780 }, { "epoch": 20.793020457280385, "grad_norm": Infinity, "learning_rate": 0.0001803536114540764, "loss": 11.219, "step": 172790 }, { "epoch": 20.794223826714802, "grad_norm": Infinity, "learning_rate": 0.00018035134688146798, "loss": 11.1891, "step": 172800 }, { "epoch": 20.79542719614922, "grad_norm": Infinity, "learning_rate": 0.00018034908219257133, "loss": 11.193, "step": 172810 }, { "epoch": 20.796630565583634, "grad_norm": Infinity, "learning_rate": 0.00018034681738738972, "loss": 11.1871, "step": 172820 }, { "epoch": 20.79783393501805, "grad_norm": Infinity, "learning_rate": 0.00018034455246592642, "loss": 11.232, "step": 172830 }, { "epoch": 20.799037304452465, "grad_norm": Infinity, "learning_rate": 0.0001803422874281847, "loss": 11.1497, "step": 172840 }, { "epoch": 20.800240673886883, "grad_norm": Infinity, "learning_rate": 0.0001803400222741679, "loss": 11.1892, "step": 172850 }, { "epoch": 20.8014440433213, "grad_norm": Infinity, "learning_rate": 0.00018033775700387918, "loss": 11.1423, "step": 172860 }, { "epoch": 20.802647412755714, "grad_norm": Infinity, "learning_rate": 0.00018033549161732194, "loss": 11.1968, "step": 172870 }, { "epoch": 20.803850782190132, "grad_norm": Infinity, "learning_rate": 0.00018033322611449938, "loss": 11.1266, "step": 172880 }, { "epoch": 20.80505415162455, "grad_norm": Infinity, "learning_rate": 0.00018033096049541484, "loss": 11.1276, "step": 172890 }, { "epoch": 20.806257521058964, "grad_norm": Infinity, "learning_rate": 0.00018032869476007158, "loss": 11.2131, "step": 172900 }, { "epoch": 20.80746089049338, "grad_norm": Infinity, "learning_rate": 0.00018032642890847283, "loss": 11.1898, "step": 172910 }, { "epoch": 20.8086642599278, "grad_norm": Infinity, "learning_rate": 0.00018032416294062192, "loss": 11.0419, "step": 172920 }, { "epoch": 20.809867629362213, "grad_norm": Infinity, "learning_rate": 0.00018032189685652212, "loss": 11.1756, "step": 172930 }, { "epoch": 20.81107099879663, "grad_norm": Infinity, "learning_rate": 0.0001803196306561767, "loss": 11.1386, "step": 172940 }, { "epoch": 20.812274368231048, "grad_norm": Infinity, "learning_rate": 0.00018031736433958892, "loss": 11.1541, "step": 172950 }, { "epoch": 20.813477737665462, "grad_norm": Infinity, "learning_rate": 0.00018031509790676214, "loss": 11.2423, "step": 172960 }, { "epoch": 20.81468110709988, "grad_norm": Infinity, "learning_rate": 0.00018031283135769952, "loss": 11.0973, "step": 172970 }, { "epoch": 20.815884476534297, "grad_norm": Infinity, "learning_rate": 0.00018031056469240445, "loss": 11.1409, "step": 172980 }, { "epoch": 20.81708784596871, "grad_norm": Infinity, "learning_rate": 0.00018030829791088016, "loss": 11.023, "step": 172990 }, { "epoch": 20.81829121540313, "grad_norm": Infinity, "learning_rate": 0.0001803060310131299, "loss": 11.0497, "step": 173000 }, { "epoch": 20.819494584837546, "grad_norm": Infinity, "learning_rate": 0.00018030376399915703, "loss": 11.2143, "step": 173010 }, { "epoch": 20.82069795427196, "grad_norm": Infinity, "learning_rate": 0.00018030149686896476, "loss": 10.9892, "step": 173020 }, { "epoch": 20.821901323706378, "grad_norm": Infinity, "learning_rate": 0.0001802992296225564, "loss": 11.1598, "step": 173030 }, { "epoch": 20.823104693140795, "grad_norm": Infinity, "learning_rate": 0.00018029696225993524, "loss": 11.2072, "step": 173040 }, { "epoch": 20.82430806257521, "grad_norm": Infinity, "learning_rate": 0.00018029469478110453, "loss": 11.176, "step": 173050 }, { "epoch": 20.825511432009627, "grad_norm": Infinity, "learning_rate": 0.0001802924271860676, "loss": 10.9815, "step": 173060 }, { "epoch": 20.826714801444044, "grad_norm": Infinity, "learning_rate": 0.0001802901594748277, "loss": 11.1104, "step": 173070 }, { "epoch": 20.82791817087846, "grad_norm": Infinity, "learning_rate": 0.00018028789164738812, "loss": 11.1583, "step": 173080 }, { "epoch": 20.829121540312876, "grad_norm": Infinity, "learning_rate": 0.0001802856237037521, "loss": 11.2288, "step": 173090 }, { "epoch": 20.830324909747294, "grad_norm": Infinity, "learning_rate": 0.000180283355643923, "loss": 11.23, "step": 173100 }, { "epoch": 20.831528279181708, "grad_norm": Infinity, "learning_rate": 0.00018028108746790403, "loss": 11.2722, "step": 173110 }, { "epoch": 20.832731648616125, "grad_norm": Infinity, "learning_rate": 0.00018027881917569854, "loss": 11.1888, "step": 173120 }, { "epoch": 20.833935018050543, "grad_norm": Infinity, "learning_rate": 0.0001802765507673098, "loss": 11.0948, "step": 173130 }, { "epoch": 20.835138387484957, "grad_norm": Infinity, "learning_rate": 0.00018027428224274102, "loss": 11.1078, "step": 173140 }, { "epoch": 20.836341756919374, "grad_norm": Infinity, "learning_rate": 0.00018027201360199555, "loss": 11.2255, "step": 173150 }, { "epoch": 20.837545126353792, "grad_norm": Infinity, "learning_rate": 0.00018026974484507667, "loss": 11.0824, "step": 173160 }, { "epoch": 20.838748495788206, "grad_norm": Infinity, "learning_rate": 0.00018026747597198764, "loss": 11.1556, "step": 173170 }, { "epoch": 20.839951865222623, "grad_norm": Infinity, "learning_rate": 0.00018026520698273175, "loss": 11.1401, "step": 173180 }, { "epoch": 20.84115523465704, "grad_norm": Infinity, "learning_rate": 0.0001802629378773123, "loss": 11.2589, "step": 173190 }, { "epoch": 20.842358604091455, "grad_norm": Infinity, "learning_rate": 0.00018026066865573255, "loss": 11.2058, "step": 173200 }, { "epoch": 20.843561973525873, "grad_norm": Infinity, "learning_rate": 0.0001802583993179958, "loss": 11.1996, "step": 173210 }, { "epoch": 20.84476534296029, "grad_norm": Infinity, "learning_rate": 0.00018025612986410534, "loss": 11.0883, "step": 173220 }, { "epoch": 20.845968712394704, "grad_norm": Infinity, "learning_rate": 0.00018025386029406443, "loss": 11.1341, "step": 173230 }, { "epoch": 20.84717208182912, "grad_norm": Infinity, "learning_rate": 0.00018025159060787639, "loss": 11.1927, "step": 173240 }, { "epoch": 20.84837545126354, "grad_norm": Infinity, "learning_rate": 0.00018024932080554446, "loss": 11.1739, "step": 173250 }, { "epoch": 20.849578820697953, "grad_norm": Infinity, "learning_rate": 0.00018024705088707198, "loss": 11.0904, "step": 173260 }, { "epoch": 20.85078219013237, "grad_norm": Infinity, "learning_rate": 0.0001802447808524622, "loss": 11.159, "step": 173270 }, { "epoch": 20.85198555956679, "grad_norm": Infinity, "learning_rate": 0.0001802425107017184, "loss": 11.0507, "step": 173280 }, { "epoch": 20.853188929001202, "grad_norm": Infinity, "learning_rate": 0.00018024024043484388, "loss": 11.1128, "step": 173290 }, { "epoch": 20.85439229843562, "grad_norm": Infinity, "learning_rate": 0.00018023797005184192, "loss": 11.2208, "step": 173300 }, { "epoch": 20.855595667870038, "grad_norm": Infinity, "learning_rate": 0.00018023569955271577, "loss": 11.1155, "step": 173310 }, { "epoch": 20.85679903730445, "grad_norm": Infinity, "learning_rate": 0.0001802334289374688, "loss": 11.174, "step": 173320 }, { "epoch": 20.85800240673887, "grad_norm": Infinity, "learning_rate": 0.00018023115820610427, "loss": 11.0918, "step": 173330 }, { "epoch": 20.859205776173287, "grad_norm": Infinity, "learning_rate": 0.0001802288873586254, "loss": 10.9758, "step": 173340 }, { "epoch": 20.8604091456077, "grad_norm": Infinity, "learning_rate": 0.0001802266163950355, "loss": 11.2076, "step": 173350 }, { "epoch": 20.86161251504212, "grad_norm": Infinity, "learning_rate": 0.00018022434531533794, "loss": 11.1549, "step": 173360 }, { "epoch": 20.862815884476536, "grad_norm": Infinity, "learning_rate": 0.0001802220741195359, "loss": 11.2134, "step": 173370 }, { "epoch": 20.86401925391095, "grad_norm": Infinity, "learning_rate": 0.00018021980280763275, "loss": 11.2158, "step": 173380 }, { "epoch": 20.865222623345367, "grad_norm": Infinity, "learning_rate": 0.00018021753137963172, "loss": 11.0854, "step": 173390 }, { "epoch": 20.866425992779785, "grad_norm": Infinity, "learning_rate": 0.00018021525983553613, "loss": 11.2, "step": 173400 }, { "epoch": 20.8676293622142, "grad_norm": Infinity, "learning_rate": 0.00018021298817534923, "loss": 11.0791, "step": 173410 }, { "epoch": 20.868832731648617, "grad_norm": Infinity, "learning_rate": 0.00018021071639907435, "loss": 11.0264, "step": 173420 }, { "epoch": 20.870036101083034, "grad_norm": Infinity, "learning_rate": 0.00018020844450671475, "loss": 11.1703, "step": 173430 }, { "epoch": 20.871239470517448, "grad_norm": Infinity, "learning_rate": 0.00018020617249827375, "loss": 11.1173, "step": 173440 }, { "epoch": 20.872442839951866, "grad_norm": Infinity, "learning_rate": 0.0001802039003737546, "loss": 11.25, "step": 173450 }, { "epoch": 20.87364620938628, "grad_norm": Infinity, "learning_rate": 0.0001802016281331606, "loss": 11.1557, "step": 173460 }, { "epoch": 20.874849578820697, "grad_norm": Infinity, "learning_rate": 0.00018019935577649502, "loss": 11.0223, "step": 173470 }, { "epoch": 20.876052948255115, "grad_norm": Infinity, "learning_rate": 0.0001801970833037612, "loss": 11.1554, "step": 173480 }, { "epoch": 20.87725631768953, "grad_norm": Infinity, "learning_rate": 0.00018019481071496242, "loss": 11.0719, "step": 173490 }, { "epoch": 20.878459687123947, "grad_norm": Infinity, "learning_rate": 0.00018019253801010194, "loss": 11.2168, "step": 173500 }, { "epoch": 20.879663056558364, "grad_norm": Infinity, "learning_rate": 0.00018019026518918303, "loss": 11.0576, "step": 173510 }, { "epoch": 20.880866425992778, "grad_norm": Infinity, "learning_rate": 0.00018018799225220903, "loss": 11.0965, "step": 173520 }, { "epoch": 20.882069795427196, "grad_norm": Infinity, "learning_rate": 0.0001801857191991832, "loss": 11.0693, "step": 173530 }, { "epoch": 20.883273164861613, "grad_norm": Infinity, "learning_rate": 0.00018018344603010884, "loss": 11.1587, "step": 173540 }, { "epoch": 20.884476534296027, "grad_norm": Infinity, "learning_rate": 0.00018018117274498923, "loss": 11.2679, "step": 173550 }, { "epoch": 20.885679903730445, "grad_norm": Infinity, "learning_rate": 0.00018017889934382768, "loss": 11.0494, "step": 173560 }, { "epoch": 20.886883273164862, "grad_norm": Infinity, "learning_rate": 0.00018017662582662744, "loss": 11.1472, "step": 173570 }, { "epoch": 20.888086642599276, "grad_norm": Infinity, "learning_rate": 0.00018017435219339187, "loss": 11.0953, "step": 173580 }, { "epoch": 20.889290012033694, "grad_norm": Infinity, "learning_rate": 0.0001801720784441242, "loss": 11.0539, "step": 173590 }, { "epoch": 20.89049338146811, "grad_norm": Infinity, "learning_rate": 0.00018016980457882773, "loss": 11.0687, "step": 173600 }, { "epoch": 20.891696750902526, "grad_norm": Infinity, "learning_rate": 0.00018016753059750578, "loss": 11.1559, "step": 173610 }, { "epoch": 20.892900120336943, "grad_norm": Infinity, "learning_rate": 0.0001801652565001616, "loss": 11.1049, "step": 173620 }, { "epoch": 20.89410348977136, "grad_norm": Infinity, "learning_rate": 0.00018016298228679852, "loss": 11.1504, "step": 173630 }, { "epoch": 20.895306859205775, "grad_norm": Infinity, "learning_rate": 0.00018016070795741978, "loss": 11.1768, "step": 173640 }, { "epoch": 20.896510228640192, "grad_norm": Infinity, "learning_rate": 0.00018015843351202872, "loss": 11.1212, "step": 173650 }, { "epoch": 20.89771359807461, "grad_norm": Infinity, "learning_rate": 0.00018015615895062862, "loss": 11.1894, "step": 173660 }, { "epoch": 20.898916967509024, "grad_norm": Infinity, "learning_rate": 0.00018015388427322277, "loss": 11.0539, "step": 173670 }, { "epoch": 20.90012033694344, "grad_norm": Infinity, "learning_rate": 0.00018015160947981447, "loss": 11.114, "step": 173680 }, { "epoch": 20.90132370637786, "grad_norm": Infinity, "learning_rate": 0.00018014933457040697, "loss": 11.0948, "step": 173690 }, { "epoch": 20.902527075812273, "grad_norm": Infinity, "learning_rate": 0.0001801470595450036, "loss": 11.0909, "step": 173700 }, { "epoch": 20.90373044524669, "grad_norm": Infinity, "learning_rate": 0.0001801447844036077, "loss": 11.1265, "step": 173710 }, { "epoch": 20.904933814681108, "grad_norm": Infinity, "learning_rate": 0.00018014250914622247, "loss": 11.0683, "step": 173720 }, { "epoch": 20.906137184115522, "grad_norm": Infinity, "learning_rate": 0.00018014023377285122, "loss": 11.0873, "step": 173730 }, { "epoch": 20.90734055354994, "grad_norm": Infinity, "learning_rate": 0.0001801379582834973, "loss": 11.1706, "step": 173740 }, { "epoch": 20.908543922984357, "grad_norm": Infinity, "learning_rate": 0.00018013568267816397, "loss": 11.1743, "step": 173750 }, { "epoch": 20.90974729241877, "grad_norm": Infinity, "learning_rate": 0.0001801334069568545, "loss": 11.0408, "step": 173760 }, { "epoch": 20.91095066185319, "grad_norm": Infinity, "learning_rate": 0.00018013113111957222, "loss": 11.0911, "step": 173770 }, { "epoch": 20.912154031287606, "grad_norm": Infinity, "learning_rate": 0.0001801288551663204, "loss": 11.1863, "step": 173780 }, { "epoch": 20.91335740072202, "grad_norm": Infinity, "learning_rate": 0.00018012657909710237, "loss": 11.1792, "step": 173790 }, { "epoch": 20.914560770156438, "grad_norm": Infinity, "learning_rate": 0.00018012430291192136, "loss": 11.2082, "step": 173800 }, { "epoch": 20.915764139590856, "grad_norm": Infinity, "learning_rate": 0.00018012202661078073, "loss": 11.1883, "step": 173810 }, { "epoch": 20.91696750902527, "grad_norm": Infinity, "learning_rate": 0.00018011975019368375, "loss": 11.0674, "step": 173820 }, { "epoch": 20.918170878459687, "grad_norm": Infinity, "learning_rate": 0.0001801174736606337, "loss": 11.1176, "step": 173830 }, { "epoch": 20.919374247894105, "grad_norm": Infinity, "learning_rate": 0.00018011519701163387, "loss": 11.1708, "step": 173840 }, { "epoch": 20.92057761732852, "grad_norm": Infinity, "learning_rate": 0.00018011292024668757, "loss": 11.1329, "step": 173850 }, { "epoch": 20.921780986762936, "grad_norm": Infinity, "learning_rate": 0.0001801106433657981, "loss": 11.0115, "step": 173860 }, { "epoch": 20.922984356197354, "grad_norm": Infinity, "learning_rate": 0.0001801083663689688, "loss": 11.0324, "step": 173870 }, { "epoch": 20.924187725631768, "grad_norm": Infinity, "learning_rate": 0.00018010608925620285, "loss": 11.27, "step": 173880 }, { "epoch": 20.925391095066185, "grad_norm": Infinity, "learning_rate": 0.00018010381202750365, "loss": 11.1616, "step": 173890 }, { "epoch": 20.926594464500603, "grad_norm": Infinity, "learning_rate": 0.00018010153468287442, "loss": 11.1951, "step": 173900 }, { "epoch": 20.927797833935017, "grad_norm": Infinity, "learning_rate": 0.00018009925722231853, "loss": 11.2645, "step": 173910 }, { "epoch": 20.929001203369435, "grad_norm": Infinity, "learning_rate": 0.0001800969796458392, "loss": 11.157, "step": 173920 }, { "epoch": 20.930204572803852, "grad_norm": Infinity, "learning_rate": 0.0001800947019534398, "loss": 11.1395, "step": 173930 }, { "epoch": 20.931407942238266, "grad_norm": Infinity, "learning_rate": 0.00018009242414512356, "loss": 11.1996, "step": 173940 }, { "epoch": 20.932611311672684, "grad_norm": Infinity, "learning_rate": 0.00018009014622089384, "loss": 11.1171, "step": 173950 }, { "epoch": 20.9338146811071, "grad_norm": Infinity, "learning_rate": 0.0001800878681807539, "loss": 11.2146, "step": 173960 }, { "epoch": 20.935018050541515, "grad_norm": Infinity, "learning_rate": 0.00018008559002470704, "loss": 11.1485, "step": 173970 }, { "epoch": 20.936221419975933, "grad_norm": Infinity, "learning_rate": 0.00018008331175275655, "loss": 11.1549, "step": 173980 }, { "epoch": 20.93742478941035, "grad_norm": Infinity, "learning_rate": 0.00018008103336490574, "loss": 11.1026, "step": 173990 }, { "epoch": 20.938628158844764, "grad_norm": Infinity, "learning_rate": 0.0001800787548611579, "loss": 11.1206, "step": 174000 }, { "epoch": 20.939831528279182, "grad_norm": Infinity, "learning_rate": 0.00018007647624151634, "loss": 11.0992, "step": 174010 }, { "epoch": 20.9410348977136, "grad_norm": Infinity, "learning_rate": 0.00018007419750598434, "loss": 11.1332, "step": 174020 }, { "epoch": 20.942238267148014, "grad_norm": Infinity, "learning_rate": 0.0001800719186545652, "loss": 11.1261, "step": 174030 }, { "epoch": 20.94344163658243, "grad_norm": Infinity, "learning_rate": 0.0001800696396872622, "loss": 11.1842, "step": 174040 }, { "epoch": 20.94464500601685, "grad_norm": Infinity, "learning_rate": 0.00018006736060407875, "loss": 11.1692, "step": 174050 }, { "epoch": 20.945848375451263, "grad_norm": Infinity, "learning_rate": 0.00018006508140501799, "loss": 11.1657, "step": 174060 }, { "epoch": 20.94705174488568, "grad_norm": Infinity, "learning_rate": 0.00018006280209008333, "loss": 11.0909, "step": 174070 }, { "epoch": 20.948255114320098, "grad_norm": Infinity, "learning_rate": 0.00018006052265927798, "loss": 11.0538, "step": 174080 }, { "epoch": 20.949458483754512, "grad_norm": Infinity, "learning_rate": 0.0001800582431126053, "loss": 11.0875, "step": 174090 }, { "epoch": 20.95066185318893, "grad_norm": Infinity, "learning_rate": 0.00018005596345006862, "loss": 11.1126, "step": 174100 }, { "epoch": 20.951865222623347, "grad_norm": Infinity, "learning_rate": 0.00018005368367167117, "loss": 11.1622, "step": 174110 }, { "epoch": 20.95306859205776, "grad_norm": Infinity, "learning_rate": 0.00018005140377741626, "loss": 11.1136, "step": 174120 }, { "epoch": 20.95427196149218, "grad_norm": Infinity, "learning_rate": 0.0001800491237673072, "loss": 11.2881, "step": 174130 }, { "epoch": 20.955475330926596, "grad_norm": Infinity, "learning_rate": 0.00018004684364134732, "loss": 11.0797, "step": 174140 }, { "epoch": 20.95667870036101, "grad_norm": Infinity, "learning_rate": 0.00018004456339953988, "loss": 11.1497, "step": 174150 }, { "epoch": 20.957882069795428, "grad_norm": Infinity, "learning_rate": 0.00018004228304188818, "loss": 11.0825, "step": 174160 }, { "epoch": 20.959085439229845, "grad_norm": Infinity, "learning_rate": 0.00018004000256839557, "loss": 11.0937, "step": 174170 }, { "epoch": 20.96028880866426, "grad_norm": Infinity, "learning_rate": 0.0001800377219790653, "loss": 11.0773, "step": 174180 }, { "epoch": 20.961492178098677, "grad_norm": Infinity, "learning_rate": 0.0001800354412739007, "loss": 11.1334, "step": 174190 }, { "epoch": 20.96269554753309, "grad_norm": Infinity, "learning_rate": 0.00018003316045290505, "loss": 11.1376, "step": 174200 }, { "epoch": 20.96389891696751, "grad_norm": Infinity, "learning_rate": 0.00018003087951608162, "loss": 11.1124, "step": 174210 }, { "epoch": 20.965102286401926, "grad_norm": Infinity, "learning_rate": 0.00018002859846343378, "loss": 11.175, "step": 174220 }, { "epoch": 20.96630565583634, "grad_norm": Infinity, "learning_rate": 0.00018002631729496478, "loss": 11.1262, "step": 174230 }, { "epoch": 20.967509025270758, "grad_norm": Infinity, "learning_rate": 0.00018002403601067796, "loss": 11.04, "step": 174240 }, { "epoch": 20.968712394705175, "grad_norm": Infinity, "learning_rate": 0.0001800217546105766, "loss": 11.2401, "step": 174250 }, { "epoch": 20.96991576413959, "grad_norm": Infinity, "learning_rate": 0.00018001947309466402, "loss": 11.0645, "step": 174260 }, { "epoch": 20.971119133574007, "grad_norm": Infinity, "learning_rate": 0.00018001719146294346, "loss": 11.2181, "step": 174270 }, { "epoch": 20.972322503008424, "grad_norm": Infinity, "learning_rate": 0.00018001490971541833, "loss": 11.1317, "step": 174280 }, { "epoch": 20.97352587244284, "grad_norm": Infinity, "learning_rate": 0.00018001262785209183, "loss": 11.0471, "step": 174290 }, { "epoch": 20.974729241877256, "grad_norm": Infinity, "learning_rate": 0.0001800103458729673, "loss": 11.2109, "step": 174300 }, { "epoch": 20.975932611311674, "grad_norm": Infinity, "learning_rate": 0.00018000806377804806, "loss": 11.1007, "step": 174310 }, { "epoch": 20.977135980746088, "grad_norm": Infinity, "learning_rate": 0.00018000578156733741, "loss": 11.2041, "step": 174320 }, { "epoch": 20.978339350180505, "grad_norm": Infinity, "learning_rate": 0.00018000349924083862, "loss": 11.1238, "step": 174330 }, { "epoch": 20.979542719614923, "grad_norm": Infinity, "learning_rate": 0.00018000121679855502, "loss": 11.1583, "step": 174340 }, { "epoch": 20.980746089049337, "grad_norm": Infinity, "learning_rate": 0.00017999893424048994, "loss": 11.226, "step": 174350 }, { "epoch": 20.981949458483754, "grad_norm": Infinity, "learning_rate": 0.00017999665156664661, "loss": 11.1327, "step": 174360 }, { "epoch": 20.983152827918172, "grad_norm": Infinity, "learning_rate": 0.0001799943687770284, "loss": 11.1759, "step": 174370 }, { "epoch": 20.984356197352586, "grad_norm": Infinity, "learning_rate": 0.00017999208587163854, "loss": 11.2303, "step": 174380 }, { "epoch": 20.985559566787003, "grad_norm": Infinity, "learning_rate": 0.0001799898028504804, "loss": 11.2441, "step": 174390 }, { "epoch": 20.98676293622142, "grad_norm": Infinity, "learning_rate": 0.0001799875197135573, "loss": 11.1695, "step": 174400 }, { "epoch": 20.987966305655835, "grad_norm": Infinity, "learning_rate": 0.00017998523646087247, "loss": 10.9997, "step": 174410 }, { "epoch": 20.989169675090253, "grad_norm": Infinity, "learning_rate": 0.0001799829530924293, "loss": 11.2273, "step": 174420 }, { "epoch": 20.99037304452467, "grad_norm": Infinity, "learning_rate": 0.000179980669608231, "loss": 11.1485, "step": 174430 }, { "epoch": 20.991576413959084, "grad_norm": Infinity, "learning_rate": 0.00017997838600828097, "loss": 11.2727, "step": 174440 }, { "epoch": 20.9927797833935, "grad_norm": Infinity, "learning_rate": 0.00017997610229258246, "loss": 11.1697, "step": 174450 }, { "epoch": 20.99398315282792, "grad_norm": Infinity, "learning_rate": 0.00017997381846113875, "loss": 11.0973, "step": 174460 }, { "epoch": 20.995186522262333, "grad_norm": Infinity, "learning_rate": 0.00017997153451395322, "loss": 11.1313, "step": 174470 }, { "epoch": 20.99638989169675, "grad_norm": Infinity, "learning_rate": 0.00017996925045102912, "loss": 11.1774, "step": 174480 }, { "epoch": 20.99759326113117, "grad_norm": Infinity, "learning_rate": 0.00017996696627236978, "loss": 11.2222, "step": 174490 }, { "epoch": 20.998796630565582, "grad_norm": Infinity, "learning_rate": 0.00017996468197797847, "loss": 11.1186, "step": 174500 }, { "epoch": 21.0, "grad_norm": Infinity, "learning_rate": 0.00017996239756785855, "loss": 11.1338, "step": 174510 }, { "epoch": 21.0, "eval_loss": 11.139604568481445, "eval_runtime": 120.1616, "eval_samples_per_second": 61.476, "eval_steps_per_second": 7.69, "step": 174510 }, { "epoch": 21.001203369434418, "grad_norm": Infinity, "learning_rate": 0.0001799601130420133, "loss": 11.0037, "step": 174520 }, { "epoch": 21.00240673886883, "grad_norm": Infinity, "learning_rate": 0.000179957828400446, "loss": 11.0998, "step": 174530 }, { "epoch": 21.00361010830325, "grad_norm": Infinity, "learning_rate": 0.00017995554364316, "loss": 11.1011, "step": 174540 }, { "epoch": 21.004813477737667, "grad_norm": Infinity, "learning_rate": 0.00017995325877015857, "loss": 11.1358, "step": 174550 }, { "epoch": 21.00601684717208, "grad_norm": Infinity, "learning_rate": 0.00017995097378144506, "loss": 11.194, "step": 174560 }, { "epoch": 21.0072202166065, "grad_norm": Infinity, "learning_rate": 0.00017994868867702275, "loss": 11.1631, "step": 174570 }, { "epoch": 21.008423586040916, "grad_norm": Infinity, "learning_rate": 0.00017994640345689493, "loss": 11.2274, "step": 174580 }, { "epoch": 21.00962695547533, "grad_norm": Infinity, "learning_rate": 0.00017994411812106493, "loss": 11.0714, "step": 174590 }, { "epoch": 21.010830324909747, "grad_norm": Infinity, "learning_rate": 0.00017994183266953605, "loss": 11.0898, "step": 174600 }, { "epoch": 21.012033694344165, "grad_norm": Infinity, "learning_rate": 0.00017993954710231161, "loss": 11.0924, "step": 174610 }, { "epoch": 21.01323706377858, "grad_norm": Infinity, "learning_rate": 0.00017993726141939492, "loss": 11.2257, "step": 174620 }, { "epoch": 21.014440433212997, "grad_norm": Infinity, "learning_rate": 0.00017993497562078926, "loss": 11.1523, "step": 174630 }, { "epoch": 21.015643802647414, "grad_norm": Infinity, "learning_rate": 0.00017993268970649794, "loss": 11.0388, "step": 174640 }, { "epoch": 21.016847172081828, "grad_norm": Infinity, "learning_rate": 0.00017993040367652433, "loss": 11.1122, "step": 174650 }, { "epoch": 21.018050541516246, "grad_norm": Infinity, "learning_rate": 0.00017992811753087166, "loss": 11.162, "step": 174660 }, { "epoch": 21.019253910950663, "grad_norm": Infinity, "learning_rate": 0.00017992583126954327, "loss": 11.088, "step": 174670 }, { "epoch": 21.020457280385077, "grad_norm": Infinity, "learning_rate": 0.00017992354489254248, "loss": 11.2144, "step": 174680 }, { "epoch": 21.021660649819495, "grad_norm": Infinity, "learning_rate": 0.00017992125839987259, "loss": 11.1326, "step": 174690 }, { "epoch": 21.022864019253912, "grad_norm": Infinity, "learning_rate": 0.00017991897179153688, "loss": 11.204, "step": 174700 }, { "epoch": 21.024067388688326, "grad_norm": Infinity, "learning_rate": 0.0001799166850675387, "loss": 11.2448, "step": 174710 }, { "epoch": 21.025270758122744, "grad_norm": Infinity, "learning_rate": 0.00017991439822788137, "loss": 11.0121, "step": 174720 }, { "epoch": 21.02647412755716, "grad_norm": Infinity, "learning_rate": 0.00017991211127256818, "loss": 11.2258, "step": 174730 }, { "epoch": 21.027677496991576, "grad_norm": Infinity, "learning_rate": 0.00017990982420160238, "loss": 11.1731, "step": 174740 }, { "epoch": 21.028880866425993, "grad_norm": Infinity, "learning_rate": 0.00017990753701498737, "loss": 11.1214, "step": 174750 }, { "epoch": 21.03008423586041, "grad_norm": Infinity, "learning_rate": 0.00017990524971272644, "loss": 11.101, "step": 174760 }, { "epoch": 21.031287605294825, "grad_norm": Infinity, "learning_rate": 0.00017990296229482286, "loss": 11.0451, "step": 174770 }, { "epoch": 21.032490974729242, "grad_norm": Infinity, "learning_rate": 0.00017990067476127995, "loss": 11.0666, "step": 174780 }, { "epoch": 21.03369434416366, "grad_norm": Infinity, "learning_rate": 0.0001798983871121011, "loss": 11.1861, "step": 174790 }, { "epoch": 21.034897713598074, "grad_norm": Infinity, "learning_rate": 0.00017989609934728954, "loss": 11.2275, "step": 174800 }, { "epoch": 21.03610108303249, "grad_norm": Infinity, "learning_rate": 0.00017989381146684856, "loss": 11.0896, "step": 174810 }, { "epoch": 21.03730445246691, "grad_norm": Infinity, "learning_rate": 0.0001798915234707815, "loss": 11.0762, "step": 174820 }, { "epoch": 21.038507821901323, "grad_norm": Infinity, "learning_rate": 0.00017988923535909172, "loss": 11.1872, "step": 174830 }, { "epoch": 21.03971119133574, "grad_norm": Infinity, "learning_rate": 0.00017988694713178249, "loss": 11.1857, "step": 174840 }, { "epoch": 21.040914560770158, "grad_norm": Infinity, "learning_rate": 0.0001798846587888571, "loss": 11.2217, "step": 174850 }, { "epoch": 21.042117930204572, "grad_norm": Infinity, "learning_rate": 0.00017988237033031892, "loss": 11.2011, "step": 174860 }, { "epoch": 21.04332129963899, "grad_norm": Infinity, "learning_rate": 0.0001798800817561712, "loss": 11.1494, "step": 174870 }, { "epoch": 21.044524669073404, "grad_norm": Infinity, "learning_rate": 0.00017987779306641728, "loss": 11.15, "step": 174880 }, { "epoch": 21.04572803850782, "grad_norm": Infinity, "learning_rate": 0.00017987550426106047, "loss": 11.1517, "step": 174890 }, { "epoch": 21.04693140794224, "grad_norm": Infinity, "learning_rate": 0.0001798732153401041, "loss": 11.0835, "step": 174900 }, { "epoch": 21.048134777376653, "grad_norm": Infinity, "learning_rate": 0.0001798709263035515, "loss": 11.1482, "step": 174910 }, { "epoch": 21.04933814681107, "grad_norm": Infinity, "learning_rate": 0.0001798686371514059, "loss": 11.1244, "step": 174920 }, { "epoch": 21.050541516245488, "grad_norm": Infinity, "learning_rate": 0.00017986634788367067, "loss": 11.2063, "step": 174930 }, { "epoch": 21.051744885679902, "grad_norm": Infinity, "learning_rate": 0.00017986405850034913, "loss": 11.1859, "step": 174940 }, { "epoch": 21.05294825511432, "grad_norm": Infinity, "learning_rate": 0.00017986176900144457, "loss": 11.1427, "step": 174950 }, { "epoch": 21.054151624548737, "grad_norm": Infinity, "learning_rate": 0.00017985947938696032, "loss": 11.1569, "step": 174960 }, { "epoch": 21.05535499398315, "grad_norm": Infinity, "learning_rate": 0.00017985718965689968, "loss": 11.0558, "step": 174970 }, { "epoch": 21.05655836341757, "grad_norm": Infinity, "learning_rate": 0.00017985489981126596, "loss": 11.1052, "step": 174980 }, { "epoch": 21.057761732851986, "grad_norm": Infinity, "learning_rate": 0.0001798526098500625, "loss": 11.155, "step": 174990 }, { "epoch": 21.0589651022864, "grad_norm": Infinity, "learning_rate": 0.0001798503197732926, "loss": 11.1384, "step": 175000 }, { "epoch": 21.060168471720818, "grad_norm": Infinity, "learning_rate": 0.00017984802958095957, "loss": 10.9837, "step": 175010 }, { "epoch": 21.061371841155236, "grad_norm": Infinity, "learning_rate": 0.0001798457392730667, "loss": 11.1521, "step": 175020 }, { "epoch": 21.06257521058965, "grad_norm": Infinity, "learning_rate": 0.00017984344884961736, "loss": 11.0969, "step": 175030 }, { "epoch": 21.063778580024067, "grad_norm": Infinity, "learning_rate": 0.00017984115831061486, "loss": 11.0779, "step": 175040 }, { "epoch": 21.064981949458485, "grad_norm": Infinity, "learning_rate": 0.00017983886765606246, "loss": 11.042, "step": 175050 }, { "epoch": 21.0661853188929, "grad_norm": Infinity, "learning_rate": 0.0001798365768859635, "loss": 11.2205, "step": 175060 }, { "epoch": 21.067388688327316, "grad_norm": Infinity, "learning_rate": 0.0001798342860003213, "loss": 11.1827, "step": 175070 }, { "epoch": 21.068592057761734, "grad_norm": Infinity, "learning_rate": 0.0001798319949991392, "loss": 11.1997, "step": 175080 }, { "epoch": 21.069795427196148, "grad_norm": Infinity, "learning_rate": 0.0001798297038824205, "loss": 11.072, "step": 175090 }, { "epoch": 21.070998796630565, "grad_norm": Infinity, "learning_rate": 0.00017982741265016848, "loss": 11.169, "step": 175100 }, { "epoch": 21.072202166064983, "grad_norm": Infinity, "learning_rate": 0.00017982512130238653, "loss": 11.1379, "step": 175110 }, { "epoch": 21.073405535499397, "grad_norm": Infinity, "learning_rate": 0.00017982282983907788, "loss": 11.2083, "step": 175120 }, { "epoch": 21.074608904933815, "grad_norm": Infinity, "learning_rate": 0.0001798205382602459, "loss": 11.2237, "step": 175130 }, { "epoch": 21.075812274368232, "grad_norm": Infinity, "learning_rate": 0.0001798182465658939, "loss": 11.1737, "step": 175140 }, { "epoch": 21.077015643802646, "grad_norm": Infinity, "learning_rate": 0.0001798159547560252, "loss": 11.1462, "step": 175150 }, { "epoch": 21.078219013237064, "grad_norm": Infinity, "learning_rate": 0.0001798136628306431, "loss": 11.1439, "step": 175160 }, { "epoch": 21.07942238267148, "grad_norm": Infinity, "learning_rate": 0.0001798113707897509, "loss": 11.066, "step": 175170 }, { "epoch": 21.080625752105895, "grad_norm": Infinity, "learning_rate": 0.00017980907863335196, "loss": 11.1613, "step": 175180 }, { "epoch": 21.081829121540313, "grad_norm": Infinity, "learning_rate": 0.00017980678636144955, "loss": 11.0563, "step": 175190 }, { "epoch": 21.08303249097473, "grad_norm": Infinity, "learning_rate": 0.00017980449397404707, "loss": 11.1427, "step": 175200 }, { "epoch": 21.084235860409144, "grad_norm": Infinity, "learning_rate": 0.00017980220147114775, "loss": 11.1727, "step": 175210 }, { "epoch": 21.085439229843562, "grad_norm": Infinity, "learning_rate": 0.00017979990885275495, "loss": 11.2068, "step": 175220 }, { "epoch": 21.08664259927798, "grad_norm": Infinity, "learning_rate": 0.00017979761611887197, "loss": 11.1031, "step": 175230 }, { "epoch": 21.087845968712394, "grad_norm": Infinity, "learning_rate": 0.00017979532326950216, "loss": 11.1011, "step": 175240 }, { "epoch": 21.08904933814681, "grad_norm": Infinity, "learning_rate": 0.0001797930303046488, "loss": 11.0811, "step": 175250 }, { "epoch": 21.09025270758123, "grad_norm": Infinity, "learning_rate": 0.00017979073722431524, "loss": 11.1707, "step": 175260 }, { "epoch": 21.091456077015643, "grad_norm": Infinity, "learning_rate": 0.00017978844402850475, "loss": 11.164, "step": 175270 }, { "epoch": 21.09265944645006, "grad_norm": Infinity, "learning_rate": 0.00017978615071722072, "loss": 11.0354, "step": 175280 }, { "epoch": 21.093862815884478, "grad_norm": Infinity, "learning_rate": 0.00017978385729046644, "loss": 11.229, "step": 175290 }, { "epoch": 21.095066185318892, "grad_norm": Infinity, "learning_rate": 0.00017978156374824517, "loss": 10.9969, "step": 175300 }, { "epoch": 21.09626955475331, "grad_norm": Infinity, "learning_rate": 0.0001797792700905603, "loss": 11.1586, "step": 175310 }, { "epoch": 21.097472924187727, "grad_norm": Infinity, "learning_rate": 0.00017977697631741513, "loss": 11.0191, "step": 175320 }, { "epoch": 21.09867629362214, "grad_norm": Infinity, "learning_rate": 0.00017977468242881298, "loss": 11.0642, "step": 175330 }, { "epoch": 21.09987966305656, "grad_norm": Infinity, "learning_rate": 0.0001797723884247572, "loss": 11.118, "step": 175340 }, { "epoch": 21.101083032490976, "grad_norm": Infinity, "learning_rate": 0.00017977009430525105, "loss": 11.16, "step": 175350 }, { "epoch": 21.10228640192539, "grad_norm": Infinity, "learning_rate": 0.00017976780007029788, "loss": 11.0596, "step": 175360 }, { "epoch": 21.103489771359808, "grad_norm": Infinity, "learning_rate": 0.000179765505719901, "loss": 11.1998, "step": 175370 }, { "epoch": 21.104693140794225, "grad_norm": Infinity, "learning_rate": 0.00017976321125406374, "loss": 11.1866, "step": 175380 }, { "epoch": 21.10589651022864, "grad_norm": Infinity, "learning_rate": 0.00017976091667278946, "loss": 11.1864, "step": 175390 }, { "epoch": 21.107099879663057, "grad_norm": Infinity, "learning_rate": 0.0001797586219760814, "loss": 11.1361, "step": 175400 }, { "epoch": 21.108303249097474, "grad_norm": Infinity, "learning_rate": 0.00017975632716394296, "loss": 11.1497, "step": 175410 }, { "epoch": 21.10950661853189, "grad_norm": Infinity, "learning_rate": 0.00017975403223637737, "loss": 11.0481, "step": 175420 }, { "epoch": 21.110709987966306, "grad_norm": Infinity, "learning_rate": 0.00017975173719338803, "loss": 11.074, "step": 175430 }, { "epoch": 21.111913357400724, "grad_norm": Infinity, "learning_rate": 0.00017974944203497824, "loss": 11.1384, "step": 175440 }, { "epoch": 21.113116726835138, "grad_norm": Infinity, "learning_rate": 0.0001797471467611513, "loss": 11.1639, "step": 175450 }, { "epoch": 21.114320096269555, "grad_norm": Infinity, "learning_rate": 0.0001797448513719106, "loss": 11.0698, "step": 175460 }, { "epoch": 21.115523465703973, "grad_norm": Infinity, "learning_rate": 0.00017974255586725935, "loss": 11.1396, "step": 175470 }, { "epoch": 21.116726835138387, "grad_norm": Infinity, "learning_rate": 0.00017974026024720097, "loss": 11.1011, "step": 175480 }, { "epoch": 21.117930204572804, "grad_norm": Infinity, "learning_rate": 0.00017973796451173875, "loss": 11.0587, "step": 175490 }, { "epoch": 21.119133574007222, "grad_norm": Infinity, "learning_rate": 0.00017973566866087598, "loss": 11.1652, "step": 175500 }, { "epoch": 21.120336943441636, "grad_norm": Infinity, "learning_rate": 0.00017973337269461604, "loss": 11.2248, "step": 175510 }, { "epoch": 21.121540312876053, "grad_norm": Infinity, "learning_rate": 0.0001797310766129622, "loss": 11.0861, "step": 175520 }, { "epoch": 21.12274368231047, "grad_norm": Infinity, "learning_rate": 0.00017972878041591783, "loss": 11.1933, "step": 175530 }, { "epoch": 21.123947051744885, "grad_norm": Infinity, "learning_rate": 0.00017972648410348623, "loss": 11.0916, "step": 175540 }, { "epoch": 21.125150421179303, "grad_norm": Infinity, "learning_rate": 0.0001797241876756707, "loss": 11.1908, "step": 175550 }, { "epoch": 21.126353790613717, "grad_norm": Infinity, "learning_rate": 0.0001797218911324746, "loss": 11.1684, "step": 175560 }, { "epoch": 21.127557160048134, "grad_norm": Infinity, "learning_rate": 0.00017971959447390124, "loss": 11.2317, "step": 175570 }, { "epoch": 21.128760529482552, "grad_norm": Infinity, "learning_rate": 0.00017971729769995396, "loss": 11.1633, "step": 175580 }, { "epoch": 21.129963898916966, "grad_norm": Infinity, "learning_rate": 0.00017971500081063603, "loss": 11.0136, "step": 175590 }, { "epoch": 21.131167268351383, "grad_norm": Infinity, "learning_rate": 0.00017971270380595086, "loss": 11.105, "step": 175600 }, { "epoch": 21.1323706377858, "grad_norm": Infinity, "learning_rate": 0.00017971040668590172, "loss": 11.1389, "step": 175610 }, { "epoch": 21.133574007220215, "grad_norm": Infinity, "learning_rate": 0.0001797081094504919, "loss": 11.1323, "step": 175620 }, { "epoch": 21.134777376654633, "grad_norm": Infinity, "learning_rate": 0.00017970581209972482, "loss": 11.1936, "step": 175630 }, { "epoch": 21.13598074608905, "grad_norm": Infinity, "learning_rate": 0.00017970351463360371, "loss": 11.1362, "step": 175640 }, { "epoch": 21.137184115523464, "grad_norm": Infinity, "learning_rate": 0.00017970121705213193, "loss": 11.1278, "step": 175650 }, { "epoch": 21.13838748495788, "grad_norm": Infinity, "learning_rate": 0.00017969891935531285, "loss": 11.1496, "step": 175660 }, { "epoch": 21.1395908543923, "grad_norm": Infinity, "learning_rate": 0.00017969662154314973, "loss": 11.1811, "step": 175670 }, { "epoch": 21.140794223826713, "grad_norm": Infinity, "learning_rate": 0.00017969432361564594, "loss": 11.0146, "step": 175680 }, { "epoch": 21.14199759326113, "grad_norm": Infinity, "learning_rate": 0.00017969202557280477, "loss": 11.2045, "step": 175690 }, { "epoch": 21.14320096269555, "grad_norm": Infinity, "learning_rate": 0.00017968972741462958, "loss": 11.1125, "step": 175700 }, { "epoch": 21.144404332129962, "grad_norm": Infinity, "learning_rate": 0.00017968742914112367, "loss": 11.2293, "step": 175710 }, { "epoch": 21.14560770156438, "grad_norm": Infinity, "learning_rate": 0.00017968513075229038, "loss": 11.1283, "step": 175720 }, { "epoch": 21.146811070998798, "grad_norm": Infinity, "learning_rate": 0.00017968283224813303, "loss": 11.0612, "step": 175730 }, { "epoch": 21.14801444043321, "grad_norm": Infinity, "learning_rate": 0.00017968053362865492, "loss": 11.074, "step": 175740 }, { "epoch": 21.14921780986763, "grad_norm": Infinity, "learning_rate": 0.00017967823489385945, "loss": 11.164, "step": 175750 }, { "epoch": 21.150421179302047, "grad_norm": Infinity, "learning_rate": 0.00017967593604374988, "loss": 11.1004, "step": 175760 }, { "epoch": 21.15162454873646, "grad_norm": Infinity, "learning_rate": 0.00017967363707832958, "loss": 11.1457, "step": 175770 }, { "epoch": 21.15282791817088, "grad_norm": Infinity, "learning_rate": 0.00017967133799760184, "loss": 11.3109, "step": 175780 }, { "epoch": 21.154031287605296, "grad_norm": Infinity, "learning_rate": 0.00017966903880157, "loss": 11.103, "step": 175790 }, { "epoch": 21.15523465703971, "grad_norm": Infinity, "learning_rate": 0.00017966673949023737, "loss": 11.1572, "step": 175800 }, { "epoch": 21.156438026474127, "grad_norm": Infinity, "learning_rate": 0.00017966444006360733, "loss": 11.1813, "step": 175810 }, { "epoch": 21.157641395908545, "grad_norm": Infinity, "learning_rate": 0.00017966214052168319, "loss": 11.1746, "step": 175820 }, { "epoch": 21.15884476534296, "grad_norm": Infinity, "learning_rate": 0.00017965984086446826, "loss": 11.2223, "step": 175830 }, { "epoch": 21.160048134777377, "grad_norm": Infinity, "learning_rate": 0.00017965754109196586, "loss": 11.1993, "step": 175840 }, { "epoch": 21.161251504211794, "grad_norm": Infinity, "learning_rate": 0.00017965524120417932, "loss": 11.1044, "step": 175850 }, { "epoch": 21.162454873646208, "grad_norm": Infinity, "learning_rate": 0.000179652941201112, "loss": 11.2017, "step": 175860 }, { "epoch": 21.163658243080626, "grad_norm": Infinity, "learning_rate": 0.0001796506410827672, "loss": 11.2222, "step": 175870 }, { "epoch": 21.164861612515043, "grad_norm": Infinity, "learning_rate": 0.00017964834084914822, "loss": 11.198, "step": 175880 }, { "epoch": 21.166064981949457, "grad_norm": Infinity, "learning_rate": 0.0001796460405002585, "loss": 11.0992, "step": 175890 }, { "epoch": 21.167268351383875, "grad_norm": Infinity, "learning_rate": 0.00017964374003610125, "loss": 11.1333, "step": 175900 }, { "epoch": 21.168471720818292, "grad_norm": Infinity, "learning_rate": 0.00017964143945667984, "loss": 11.1504, "step": 175910 }, { "epoch": 21.169675090252706, "grad_norm": Infinity, "learning_rate": 0.00017963913876199763, "loss": 11.19, "step": 175920 }, { "epoch": 21.170878459687124, "grad_norm": Infinity, "learning_rate": 0.0001796368379520579, "loss": 11.1756, "step": 175930 }, { "epoch": 21.17208182912154, "grad_norm": Infinity, "learning_rate": 0.00017963453702686403, "loss": 11.1727, "step": 175940 }, { "epoch": 21.173285198555956, "grad_norm": Infinity, "learning_rate": 0.0001796322359864193, "loss": 11.113, "step": 175950 }, { "epoch": 21.174488567990373, "grad_norm": Infinity, "learning_rate": 0.00017962993483072704, "loss": 11.1965, "step": 175960 }, { "epoch": 21.17569193742479, "grad_norm": Infinity, "learning_rate": 0.00017962763355979067, "loss": 11.0987, "step": 175970 }, { "epoch": 21.176895306859205, "grad_norm": Infinity, "learning_rate": 0.00017962533217361342, "loss": 11.1025, "step": 175980 }, { "epoch": 21.178098676293622, "grad_norm": Infinity, "learning_rate": 0.00017962303067219865, "loss": 11.1558, "step": 175990 }, { "epoch": 21.17930204572804, "grad_norm": Infinity, "learning_rate": 0.0001796207290555497, "loss": 11.0474, "step": 176000 }, { "epoch": 21.180505415162454, "grad_norm": Infinity, "learning_rate": 0.0001796184273236699, "loss": 11.1678, "step": 176010 }, { "epoch": 21.18170878459687, "grad_norm": Infinity, "learning_rate": 0.00017961612547656258, "loss": 11.0584, "step": 176020 }, { "epoch": 21.18291215403129, "grad_norm": Infinity, "learning_rate": 0.00017961382351423105, "loss": 11.1787, "step": 176030 }, { "epoch": 21.184115523465703, "grad_norm": Infinity, "learning_rate": 0.0001796115214366787, "loss": 11.0706, "step": 176040 }, { "epoch": 21.18531889290012, "grad_norm": Infinity, "learning_rate": 0.00017960921924390882, "loss": 11.0195, "step": 176050 }, { "epoch": 21.186522262334538, "grad_norm": Infinity, "learning_rate": 0.00017960691693592472, "loss": 11.0011, "step": 176060 }, { "epoch": 21.187725631768952, "grad_norm": Infinity, "learning_rate": 0.00017960461451272977, "loss": 11.1685, "step": 176070 }, { "epoch": 21.18892900120337, "grad_norm": Infinity, "learning_rate": 0.00017960231197432726, "loss": 11.1919, "step": 176080 }, { "epoch": 21.190132370637787, "grad_norm": Infinity, "learning_rate": 0.00017960000932072058, "loss": 11.1221, "step": 176090 }, { "epoch": 21.1913357400722, "grad_norm": Infinity, "learning_rate": 0.00017959770655191304, "loss": 11.1551, "step": 176100 }, { "epoch": 21.19253910950662, "grad_norm": Infinity, "learning_rate": 0.00017959540366790794, "loss": 11.1241, "step": 176110 }, { "epoch": 21.193742478941036, "grad_norm": Infinity, "learning_rate": 0.0001795931006687087, "loss": 11.1233, "step": 176120 }, { "epoch": 21.19494584837545, "grad_norm": Infinity, "learning_rate": 0.00017959079755431852, "loss": 11.1798, "step": 176130 }, { "epoch": 21.196149217809868, "grad_norm": Infinity, "learning_rate": 0.00017958849432474084, "loss": 11.1839, "step": 176140 }, { "epoch": 21.197352587244286, "grad_norm": Infinity, "learning_rate": 0.00017958619097997893, "loss": 11.225, "step": 176150 }, { "epoch": 21.1985559566787, "grad_norm": Infinity, "learning_rate": 0.00017958388752003617, "loss": 11.1674, "step": 176160 }, { "epoch": 21.199759326113117, "grad_norm": Infinity, "learning_rate": 0.0001795815839449159, "loss": 11.2309, "step": 176170 }, { "epoch": 21.200962695547535, "grad_norm": Infinity, "learning_rate": 0.0001795792802546214, "loss": 11.1631, "step": 176180 }, { "epoch": 21.20216606498195, "grad_norm": Infinity, "learning_rate": 0.00017957697644915605, "loss": 11.1528, "step": 176190 }, { "epoch": 21.203369434416366, "grad_norm": Infinity, "learning_rate": 0.00017957467252852319, "loss": 11.1066, "step": 176200 }, { "epoch": 21.204572803850784, "grad_norm": Infinity, "learning_rate": 0.0001795723684927261, "loss": 11.1619, "step": 176210 }, { "epoch": 21.205776173285198, "grad_norm": Infinity, "learning_rate": 0.00017957006434176815, "loss": 11.0592, "step": 176220 }, { "epoch": 21.206979542719615, "grad_norm": Infinity, "learning_rate": 0.00017956776007565267, "loss": 11.1485, "step": 176230 }, { "epoch": 21.20818291215403, "grad_norm": Infinity, "learning_rate": 0.000179565455694383, "loss": 11.2799, "step": 176240 }, { "epoch": 21.209386281588447, "grad_norm": Infinity, "learning_rate": 0.00017956315119796248, "loss": 11.0851, "step": 176250 }, { "epoch": 21.210589651022865, "grad_norm": Infinity, "learning_rate": 0.0001795608465863944, "loss": 11.171, "step": 176260 }, { "epoch": 21.21179302045728, "grad_norm": Infinity, "learning_rate": 0.00017955854185968217, "loss": 11.1642, "step": 176270 }, { "epoch": 21.212996389891696, "grad_norm": Infinity, "learning_rate": 0.0001795562370178291, "loss": 11.2228, "step": 176280 }, { "epoch": 21.214199759326114, "grad_norm": Infinity, "learning_rate": 0.00017955393206083847, "loss": 11.1731, "step": 176290 }, { "epoch": 21.215403128760528, "grad_norm": Infinity, "learning_rate": 0.00017955162698871368, "loss": 11.1642, "step": 176300 }, { "epoch": 21.216606498194945, "grad_norm": Infinity, "learning_rate": 0.00017954932180145805, "loss": 11.1389, "step": 176310 }, { "epoch": 21.217809867629363, "grad_norm": Infinity, "learning_rate": 0.0001795470164990749, "loss": 11.2054, "step": 176320 }, { "epoch": 21.219013237063777, "grad_norm": Infinity, "learning_rate": 0.0001795447110815676, "loss": 11.1474, "step": 176330 }, { "epoch": 21.220216606498195, "grad_norm": Infinity, "learning_rate": 0.00017954240554893947, "loss": 11.0303, "step": 176340 }, { "epoch": 21.221419975932612, "grad_norm": Infinity, "learning_rate": 0.0001795400999011938, "loss": 11.2145, "step": 176350 }, { "epoch": 21.222623345367026, "grad_norm": Infinity, "learning_rate": 0.000179537794138334, "loss": 11.1101, "step": 176360 }, { "epoch": 21.223826714801444, "grad_norm": Infinity, "learning_rate": 0.00017953548826036336, "loss": 11.1943, "step": 176370 }, { "epoch": 21.22503008423586, "grad_norm": Infinity, "learning_rate": 0.00017953318226728523, "loss": 11.1527, "step": 176380 }, { "epoch": 21.226233453670275, "grad_norm": Infinity, "learning_rate": 0.00017953087615910294, "loss": 11.2004, "step": 176390 }, { "epoch": 21.227436823104693, "grad_norm": Infinity, "learning_rate": 0.00017952856993581984, "loss": 11.1601, "step": 176400 }, { "epoch": 21.22864019253911, "grad_norm": Infinity, "learning_rate": 0.0001795262635974393, "loss": 11.0547, "step": 176410 }, { "epoch": 21.229843561973524, "grad_norm": Infinity, "learning_rate": 0.0001795239571439646, "loss": 11.0258, "step": 176420 }, { "epoch": 21.231046931407942, "grad_norm": Infinity, "learning_rate": 0.00017952165057539906, "loss": 11.1601, "step": 176430 }, { "epoch": 21.23225030084236, "grad_norm": Infinity, "learning_rate": 0.0001795193438917461, "loss": 11.1631, "step": 176440 }, { "epoch": 21.233453670276774, "grad_norm": Infinity, "learning_rate": 0.00017951703709300903, "loss": 11.0708, "step": 176450 }, { "epoch": 21.23465703971119, "grad_norm": Infinity, "learning_rate": 0.00017951473017919115, "loss": 11.194, "step": 176460 }, { "epoch": 21.23586040914561, "grad_norm": Infinity, "learning_rate": 0.0001795124231502958, "loss": 11.1157, "step": 176470 }, { "epoch": 21.237063778580023, "grad_norm": Infinity, "learning_rate": 0.0001795101160063264, "loss": 11.2554, "step": 176480 }, { "epoch": 21.23826714801444, "grad_norm": Infinity, "learning_rate": 0.0001795078087472862, "loss": 11.1329, "step": 176490 }, { "epoch": 21.239470517448858, "grad_norm": Infinity, "learning_rate": 0.0001795055013731786, "loss": 11.1903, "step": 176500 }, { "epoch": 21.240673886883272, "grad_norm": Infinity, "learning_rate": 0.00017950319388400688, "loss": 11.1696, "step": 176510 }, { "epoch": 21.24187725631769, "grad_norm": Infinity, "learning_rate": 0.00017950088627977442, "loss": 11.0923, "step": 176520 }, { "epoch": 21.243080625752107, "grad_norm": Infinity, "learning_rate": 0.00017949857856048454, "loss": 11.1562, "step": 176530 }, { "epoch": 21.24428399518652, "grad_norm": Infinity, "learning_rate": 0.00017949627072614058, "loss": 11.1852, "step": 176540 }, { "epoch": 21.24548736462094, "grad_norm": Infinity, "learning_rate": 0.00017949396277674594, "loss": 11.2694, "step": 176550 }, { "epoch": 21.246690734055356, "grad_norm": Infinity, "learning_rate": 0.00017949165471230384, "loss": 11.1554, "step": 176560 }, { "epoch": 21.24789410348977, "grad_norm": Infinity, "learning_rate": 0.00017948934653281773, "loss": 11.081, "step": 176570 }, { "epoch": 21.249097472924188, "grad_norm": Infinity, "learning_rate": 0.00017948703823829088, "loss": 11.2633, "step": 176580 }, { "epoch": 21.250300842358605, "grad_norm": Infinity, "learning_rate": 0.00017948472982872671, "loss": 11.0338, "step": 176590 }, { "epoch": 21.25150421179302, "grad_norm": Infinity, "learning_rate": 0.00017948242130412848, "loss": 11.1355, "step": 176600 }, { "epoch": 21.252707581227437, "grad_norm": Infinity, "learning_rate": 0.00017948011266449957, "loss": 11.1979, "step": 176610 }, { "epoch": 21.253910950661854, "grad_norm": Infinity, "learning_rate": 0.0001794778039098433, "loss": 11.0544, "step": 176620 }, { "epoch": 21.25511432009627, "grad_norm": Infinity, "learning_rate": 0.00017947549504016306, "loss": 11.1873, "step": 176630 }, { "epoch": 21.256317689530686, "grad_norm": Infinity, "learning_rate": 0.00017947318605546211, "loss": 11.317, "step": 176640 }, { "epoch": 21.257521058965104, "grad_norm": Infinity, "learning_rate": 0.00017947087695574388, "loss": 11.2159, "step": 176650 }, { "epoch": 21.258724428399518, "grad_norm": Infinity, "learning_rate": 0.00017946856774101163, "loss": 11.1819, "step": 176660 }, { "epoch": 21.259927797833935, "grad_norm": Infinity, "learning_rate": 0.0001794662584112688, "loss": 11.0854, "step": 176670 }, { "epoch": 21.261131167268353, "grad_norm": Infinity, "learning_rate": 0.00017946394896651863, "loss": 11.2274, "step": 176680 }, { "epoch": 21.262334536702767, "grad_norm": Infinity, "learning_rate": 0.00017946163940676452, "loss": 11.2985, "step": 176690 }, { "epoch": 21.263537906137184, "grad_norm": Infinity, "learning_rate": 0.0001794593297320098, "loss": 11.1206, "step": 176700 }, { "epoch": 21.264741275571602, "grad_norm": Infinity, "learning_rate": 0.00017945701994225778, "loss": 11.2249, "step": 176710 }, { "epoch": 21.265944645006016, "grad_norm": Infinity, "learning_rate": 0.0001794547100375119, "loss": 11.0384, "step": 176720 }, { "epoch": 21.267148014440433, "grad_norm": Infinity, "learning_rate": 0.0001794524000177754, "loss": 11.1015, "step": 176730 }, { "epoch": 21.26835138387485, "grad_norm": Infinity, "learning_rate": 0.00017945008988305166, "loss": 11.1558, "step": 176740 }, { "epoch": 21.269554753309265, "grad_norm": Infinity, "learning_rate": 0.00017944777963334403, "loss": 11.1869, "step": 176750 }, { "epoch": 21.270758122743683, "grad_norm": Infinity, "learning_rate": 0.00017944546926865584, "loss": 11.0758, "step": 176760 }, { "epoch": 21.2719614921781, "grad_norm": Infinity, "learning_rate": 0.00017944315878899042, "loss": 11.134, "step": 176770 }, { "epoch": 21.273164861612514, "grad_norm": Infinity, "learning_rate": 0.00017944084819435118, "loss": 11.1136, "step": 176780 }, { "epoch": 21.27436823104693, "grad_norm": Infinity, "learning_rate": 0.0001794385374847414, "loss": 11.2003, "step": 176790 }, { "epoch": 21.27557160048135, "grad_norm": Infinity, "learning_rate": 0.00017943622666016445, "loss": 11.0783, "step": 176800 }, { "epoch": 21.276774969915763, "grad_norm": Infinity, "learning_rate": 0.00017943391572062365, "loss": 11.1038, "step": 176810 }, { "epoch": 21.27797833935018, "grad_norm": Infinity, "learning_rate": 0.00017943160466612237, "loss": 11.1468, "step": 176820 }, { "epoch": 21.2791817087846, "grad_norm": Infinity, "learning_rate": 0.00017942929349666398, "loss": 10.9171, "step": 176830 }, { "epoch": 21.280385078219012, "grad_norm": Infinity, "learning_rate": 0.00017942698221225174, "loss": 11.0567, "step": 176840 }, { "epoch": 21.28158844765343, "grad_norm": Infinity, "learning_rate": 0.00017942467081288906, "loss": 11.2039, "step": 176850 }, { "epoch": 21.282791817087848, "grad_norm": Infinity, "learning_rate": 0.0001794223592985793, "loss": 11.2548, "step": 176860 }, { "epoch": 21.28399518652226, "grad_norm": Infinity, "learning_rate": 0.00017942004766932575, "loss": 11.097, "step": 176870 }, { "epoch": 21.28519855595668, "grad_norm": Infinity, "learning_rate": 0.0001794177359251318, "loss": 11.2278, "step": 176880 }, { "epoch": 21.286401925391097, "grad_norm": Infinity, "learning_rate": 0.00017941542406600078, "loss": 11.1818, "step": 176890 }, { "epoch": 21.28760529482551, "grad_norm": Infinity, "learning_rate": 0.000179413112091936, "loss": 11.2058, "step": 176900 }, { "epoch": 21.28880866425993, "grad_norm": Infinity, "learning_rate": 0.0001794108000029409, "loss": 11.1192, "step": 176910 }, { "epoch": 21.290012033694346, "grad_norm": Infinity, "learning_rate": 0.00017940848779901872, "loss": 11.1827, "step": 176920 }, { "epoch": 21.29121540312876, "grad_norm": Infinity, "learning_rate": 0.00017940617548017286, "loss": 11.1723, "step": 176930 }, { "epoch": 21.292418772563177, "grad_norm": Infinity, "learning_rate": 0.00017940386304640666, "loss": 11.1733, "step": 176940 }, { "epoch": 21.29362214199759, "grad_norm": Infinity, "learning_rate": 0.00017940155049772348, "loss": 11.1362, "step": 176950 }, { "epoch": 21.29482551143201, "grad_norm": Infinity, "learning_rate": 0.00017939923783412661, "loss": 11.2565, "step": 176960 }, { "epoch": 21.296028880866427, "grad_norm": Infinity, "learning_rate": 0.00017939692505561952, "loss": 11.0689, "step": 176970 }, { "epoch": 21.29723225030084, "grad_norm": Infinity, "learning_rate": 0.0001793946121622054, "loss": 11.171, "step": 176980 }, { "epoch": 21.29843561973526, "grad_norm": Infinity, "learning_rate": 0.0001793922991538877, "loss": 11.1513, "step": 176990 }, { "epoch": 21.299638989169676, "grad_norm": Infinity, "learning_rate": 0.00017938998603066975, "loss": 11.0984, "step": 177000 }, { "epoch": 21.30084235860409, "grad_norm": Infinity, "learning_rate": 0.00017938767279255489, "loss": 11.1211, "step": 177010 }, { "epoch": 21.302045728038507, "grad_norm": Infinity, "learning_rate": 0.0001793853594395465, "loss": 11.1399, "step": 177020 }, { "epoch": 21.303249097472925, "grad_norm": Infinity, "learning_rate": 0.00017938304597164782, "loss": 11.0951, "step": 177030 }, { "epoch": 21.30445246690734, "grad_norm": Infinity, "learning_rate": 0.00017938073238886232, "loss": 11.1552, "step": 177040 }, { "epoch": 21.305655836341757, "grad_norm": Infinity, "learning_rate": 0.00017937841869119326, "loss": 11.0345, "step": 177050 }, { "epoch": 21.306859205776174, "grad_norm": Infinity, "learning_rate": 0.0001793761048786441, "loss": 11.1799, "step": 177060 }, { "epoch": 21.308062575210588, "grad_norm": Infinity, "learning_rate": 0.00017937379095121807, "loss": 11.1944, "step": 177070 }, { "epoch": 21.309265944645006, "grad_norm": Infinity, "learning_rate": 0.00017937147690891858, "loss": 11.043, "step": 177080 }, { "epoch": 21.310469314079423, "grad_norm": Infinity, "learning_rate": 0.00017936916275174895, "loss": 11.1842, "step": 177090 }, { "epoch": 21.311672683513837, "grad_norm": Infinity, "learning_rate": 0.0001793668484797126, "loss": 11.134, "step": 177100 }, { "epoch": 21.312876052948255, "grad_norm": Infinity, "learning_rate": 0.00017936453409281278, "loss": 11.159, "step": 177110 }, { "epoch": 21.314079422382672, "grad_norm": Infinity, "learning_rate": 0.00017936221959105288, "loss": 11.1535, "step": 177120 }, { "epoch": 21.315282791817086, "grad_norm": Infinity, "learning_rate": 0.0001793599049744363, "loss": 11.2275, "step": 177130 }, { "epoch": 21.316486161251504, "grad_norm": Infinity, "learning_rate": 0.0001793575902429663, "loss": 11.1148, "step": 177140 }, { "epoch": 21.31768953068592, "grad_norm": Infinity, "learning_rate": 0.00017935527539664626, "loss": 11.1367, "step": 177150 }, { "epoch": 21.318892900120336, "grad_norm": Infinity, "learning_rate": 0.0001793529604354796, "loss": 11.1976, "step": 177160 }, { "epoch": 21.320096269554753, "grad_norm": Infinity, "learning_rate": 0.00017935064535946955, "loss": 11.1856, "step": 177170 }, { "epoch": 21.32129963898917, "grad_norm": Infinity, "learning_rate": 0.0001793483301686196, "loss": 11.2488, "step": 177180 }, { "epoch": 21.322503008423585, "grad_norm": Infinity, "learning_rate": 0.00017934601486293295, "loss": 11.1916, "step": 177190 }, { "epoch": 21.323706377858002, "grad_norm": Infinity, "learning_rate": 0.00017934369944241308, "loss": 11.0954, "step": 177200 }, { "epoch": 21.32490974729242, "grad_norm": Infinity, "learning_rate": 0.00017934138390706327, "loss": 11.018, "step": 177210 }, { "epoch": 21.326113116726834, "grad_norm": Infinity, "learning_rate": 0.00017933906825688686, "loss": 11.0858, "step": 177220 }, { "epoch": 21.32731648616125, "grad_norm": Infinity, "learning_rate": 0.0001793367524918873, "loss": 11.1276, "step": 177230 }, { "epoch": 21.32851985559567, "grad_norm": Infinity, "learning_rate": 0.0001793344366120678, "loss": 11.0127, "step": 177240 }, { "epoch": 21.329723225030083, "grad_norm": Infinity, "learning_rate": 0.00017933212061743182, "loss": 11.1165, "step": 177250 }, { "epoch": 21.3309265944645, "grad_norm": Infinity, "learning_rate": 0.00017932980450798266, "loss": 11.1582, "step": 177260 }, { "epoch": 21.332129963898918, "grad_norm": Infinity, "learning_rate": 0.0001793274882837237, "loss": 11.1298, "step": 177270 }, { "epoch": 21.333333333333332, "grad_norm": Infinity, "learning_rate": 0.00017932517194465828, "loss": 11.2097, "step": 177280 }, { "epoch": 21.33453670276775, "grad_norm": Infinity, "learning_rate": 0.00017932285549078974, "loss": 11.2558, "step": 177290 }, { "epoch": 21.335740072202167, "grad_norm": Infinity, "learning_rate": 0.00017932053892212143, "loss": 11.2204, "step": 177300 }, { "epoch": 21.33694344163658, "grad_norm": Infinity, "learning_rate": 0.00017931822223865676, "loss": 11.1964, "step": 177310 }, { "epoch": 21.338146811071, "grad_norm": Infinity, "learning_rate": 0.000179315905440399, "loss": 11.0676, "step": 177320 }, { "epoch": 21.339350180505416, "grad_norm": Infinity, "learning_rate": 0.0001793135885273516, "loss": 11.0256, "step": 177330 }, { "epoch": 21.34055354993983, "grad_norm": Infinity, "learning_rate": 0.00017931127149951778, "loss": 11.1466, "step": 177340 }, { "epoch": 21.341756919374248, "grad_norm": Infinity, "learning_rate": 0.000179308954356901, "loss": 11.0819, "step": 177350 }, { "epoch": 21.342960288808666, "grad_norm": Infinity, "learning_rate": 0.0001793066370995046, "loss": 11.1377, "step": 177360 }, { "epoch": 21.34416365824308, "grad_norm": Infinity, "learning_rate": 0.0001793043197273319, "loss": 11.0592, "step": 177370 }, { "epoch": 21.345367027677497, "grad_norm": Infinity, "learning_rate": 0.00017930200224038626, "loss": 11.0842, "step": 177380 }, { "epoch": 21.346570397111915, "grad_norm": Infinity, "learning_rate": 0.00017929968463867107, "loss": 11.1002, "step": 177390 }, { "epoch": 21.34777376654633, "grad_norm": Infinity, "learning_rate": 0.0001792973669221896, "loss": 11.2114, "step": 177400 }, { "epoch": 21.348977135980746, "grad_norm": Infinity, "learning_rate": 0.00017929504909094534, "loss": 11.138, "step": 177410 }, { "epoch": 21.350180505415164, "grad_norm": Infinity, "learning_rate": 0.00017929273114494152, "loss": 11.0799, "step": 177420 }, { "epoch": 21.351383874849578, "grad_norm": Infinity, "learning_rate": 0.00017929041308418155, "loss": 11.2754, "step": 177430 }, { "epoch": 21.352587244283995, "grad_norm": Infinity, "learning_rate": 0.00017928809490866878, "loss": 11.1381, "step": 177440 }, { "epoch": 21.353790613718413, "grad_norm": Infinity, "learning_rate": 0.00017928577661840658, "loss": 11.1783, "step": 177450 }, { "epoch": 21.354993983152827, "grad_norm": Infinity, "learning_rate": 0.00017928345821339824, "loss": 11.158, "step": 177460 }, { "epoch": 21.356197352587245, "grad_norm": Infinity, "learning_rate": 0.0001792811396936472, "loss": 11.172, "step": 177470 }, { "epoch": 21.357400722021662, "grad_norm": Infinity, "learning_rate": 0.00017927882105915675, "loss": 11.2189, "step": 177480 }, { "epoch": 21.358604091456076, "grad_norm": Infinity, "learning_rate": 0.0001792765023099303, "loss": 11.0759, "step": 177490 }, { "epoch": 21.359807460890494, "grad_norm": Infinity, "learning_rate": 0.00017927418344597116, "loss": 11.0952, "step": 177500 }, { "epoch": 21.36101083032491, "grad_norm": Infinity, "learning_rate": 0.0001792718644672827, "loss": 11.2672, "step": 177510 }, { "epoch": 21.362214199759325, "grad_norm": Infinity, "learning_rate": 0.0001792695453738683, "loss": 11.1047, "step": 177520 }, { "epoch": 21.363417569193743, "grad_norm": Infinity, "learning_rate": 0.0001792672261657313, "loss": 11.1513, "step": 177530 }, { "epoch": 21.36462093862816, "grad_norm": Infinity, "learning_rate": 0.000179264906842875, "loss": 11.1857, "step": 177540 }, { "epoch": 21.365824308062574, "grad_norm": Infinity, "learning_rate": 0.00017926258740530288, "loss": 11.0515, "step": 177550 }, { "epoch": 21.367027677496992, "grad_norm": Infinity, "learning_rate": 0.00017926026785301818, "loss": 11.097, "step": 177560 }, { "epoch": 21.36823104693141, "grad_norm": Infinity, "learning_rate": 0.00017925794818602432, "loss": 11.0533, "step": 177570 }, { "epoch": 21.369434416365824, "grad_norm": Infinity, "learning_rate": 0.00017925562840432466, "loss": 11.1579, "step": 177580 }, { "epoch": 21.37063778580024, "grad_norm": Infinity, "learning_rate": 0.00017925330850792254, "loss": 11.0705, "step": 177590 }, { "epoch": 21.37184115523466, "grad_norm": Infinity, "learning_rate": 0.00017925098849682126, "loss": 11.2422, "step": 177600 }, { "epoch": 21.373044524669073, "grad_norm": Infinity, "learning_rate": 0.00017924866837102429, "loss": 11.1921, "step": 177610 }, { "epoch": 21.37424789410349, "grad_norm": Infinity, "learning_rate": 0.00017924634813053488, "loss": 11.2032, "step": 177620 }, { "epoch": 21.375451263537904, "grad_norm": Infinity, "learning_rate": 0.00017924402777535646, "loss": 11.1397, "step": 177630 }, { "epoch": 21.376654632972322, "grad_norm": Infinity, "learning_rate": 0.0001792417073054924, "loss": 11.1978, "step": 177640 }, { "epoch": 21.37785800240674, "grad_norm": Infinity, "learning_rate": 0.000179239386720946, "loss": 11.1372, "step": 177650 }, { "epoch": 21.379061371841154, "grad_norm": Infinity, "learning_rate": 0.00017923706602172065, "loss": 11.2596, "step": 177660 }, { "epoch": 21.38026474127557, "grad_norm": Infinity, "learning_rate": 0.0001792347452078197, "loss": 11.1096, "step": 177670 }, { "epoch": 21.38146811070999, "grad_norm": Infinity, "learning_rate": 0.00017923242427924648, "loss": 11.0652, "step": 177680 }, { "epoch": 21.382671480144403, "grad_norm": Infinity, "learning_rate": 0.0001792301032360044, "loss": 11.2321, "step": 177690 }, { "epoch": 21.38387484957882, "grad_norm": Infinity, "learning_rate": 0.0001792277820780968, "loss": 11.1908, "step": 177700 }, { "epoch": 21.385078219013238, "grad_norm": Infinity, "learning_rate": 0.00017922546080552707, "loss": 11.1872, "step": 177710 }, { "epoch": 21.386281588447652, "grad_norm": Infinity, "learning_rate": 0.00017922313941829852, "loss": 11.2141, "step": 177720 }, { "epoch": 21.38748495788207, "grad_norm": Infinity, "learning_rate": 0.0001792208179164145, "loss": 11.1941, "step": 177730 }, { "epoch": 21.388688327316487, "grad_norm": Infinity, "learning_rate": 0.00017921849629987842, "loss": 11.2062, "step": 177740 }, { "epoch": 21.3898916967509, "grad_norm": Infinity, "learning_rate": 0.00017921617456869365, "loss": 11.1188, "step": 177750 }, { "epoch": 21.39109506618532, "grad_norm": Infinity, "learning_rate": 0.00017921385272286346, "loss": 10.9906, "step": 177760 }, { "epoch": 21.392298435619736, "grad_norm": Infinity, "learning_rate": 0.0001792115307623913, "loss": 11.2406, "step": 177770 }, { "epoch": 21.39350180505415, "grad_norm": Infinity, "learning_rate": 0.00017920920868728048, "loss": 11.1204, "step": 177780 }, { "epoch": 21.394705174488568, "grad_norm": Infinity, "learning_rate": 0.00017920688649753438, "loss": 11.1523, "step": 177790 }, { "epoch": 21.395908543922985, "grad_norm": Infinity, "learning_rate": 0.0001792045641931564, "loss": 11.1775, "step": 177800 }, { "epoch": 21.3971119133574, "grad_norm": Infinity, "learning_rate": 0.00017920224177414982, "loss": 11.0372, "step": 177810 }, { "epoch": 21.398315282791817, "grad_norm": Infinity, "learning_rate": 0.00017919991924051804, "loss": 11.0467, "step": 177820 }, { "epoch": 21.399518652226234, "grad_norm": Infinity, "learning_rate": 0.00017919759659226444, "loss": 11.1863, "step": 177830 }, { "epoch": 21.40072202166065, "grad_norm": Infinity, "learning_rate": 0.00017919527382939237, "loss": 11.1306, "step": 177840 }, { "epoch": 21.401925391095066, "grad_norm": Infinity, "learning_rate": 0.00017919295095190517, "loss": 11.2199, "step": 177850 }, { "epoch": 21.403128760529484, "grad_norm": Infinity, "learning_rate": 0.00017919062795980622, "loss": 11.0467, "step": 177860 }, { "epoch": 21.404332129963898, "grad_norm": Infinity, "learning_rate": 0.0001791883048530989, "loss": 11.1701, "step": 177870 }, { "epoch": 21.405535499398315, "grad_norm": Infinity, "learning_rate": 0.00017918598163178652, "loss": 11.1886, "step": 177880 }, { "epoch": 21.406738868832733, "grad_norm": Infinity, "learning_rate": 0.00017918365829587248, "loss": 11.059, "step": 177890 }, { "epoch": 21.407942238267147, "grad_norm": Infinity, "learning_rate": 0.00017918133484536014, "loss": 11.1136, "step": 177900 }, { "epoch": 21.409145607701564, "grad_norm": Infinity, "learning_rate": 0.00017917901128025287, "loss": 11.146, "step": 177910 }, { "epoch": 21.410348977135982, "grad_norm": Infinity, "learning_rate": 0.00017917668760055402, "loss": 11.1497, "step": 177920 }, { "epoch": 21.411552346570396, "grad_norm": Infinity, "learning_rate": 0.00017917436380626695, "loss": 11.1289, "step": 177930 }, { "epoch": 21.412755716004813, "grad_norm": Infinity, "learning_rate": 0.00017917203989739505, "loss": 11.0559, "step": 177940 }, { "epoch": 21.41395908543923, "grad_norm": Infinity, "learning_rate": 0.00017916971587394163, "loss": 11.1901, "step": 177950 }, { "epoch": 21.415162454873645, "grad_norm": Infinity, "learning_rate": 0.0001791673917359101, "loss": 11.1796, "step": 177960 }, { "epoch": 21.416365824308063, "grad_norm": Infinity, "learning_rate": 0.00017916506748330379, "loss": 11.0309, "step": 177970 }, { "epoch": 21.41756919374248, "grad_norm": Infinity, "learning_rate": 0.0001791627431161261, "loss": 11.2446, "step": 177980 }, { "epoch": 21.418772563176894, "grad_norm": Infinity, "learning_rate": 0.00017916041863438037, "loss": 11.1847, "step": 177990 }, { "epoch": 21.41997593261131, "grad_norm": Infinity, "learning_rate": 0.00017915809403806998, "loss": 11.1351, "step": 178000 }, { "epoch": 21.42117930204573, "grad_norm": Infinity, "learning_rate": 0.00017915576932719826, "loss": 11.1855, "step": 178010 }, { "epoch": 21.422382671480143, "grad_norm": Infinity, "learning_rate": 0.00017915344450176863, "loss": 11.1336, "step": 178020 }, { "epoch": 21.42358604091456, "grad_norm": Infinity, "learning_rate": 0.00017915111956178442, "loss": 11.1641, "step": 178030 }, { "epoch": 21.42478941034898, "grad_norm": Infinity, "learning_rate": 0.00017914879450724899, "loss": 11.0796, "step": 178040 }, { "epoch": 21.425992779783392, "grad_norm": Infinity, "learning_rate": 0.0001791464693381657, "loss": 11.3461, "step": 178050 }, { "epoch": 21.42719614921781, "grad_norm": Infinity, "learning_rate": 0.00017914414405453792, "loss": 11.0922, "step": 178060 }, { "epoch": 21.428399518652228, "grad_norm": Infinity, "learning_rate": 0.00017914181865636905, "loss": 11.1003, "step": 178070 }, { "epoch": 21.42960288808664, "grad_norm": Infinity, "learning_rate": 0.00017913949314366244, "loss": 11.1272, "step": 178080 }, { "epoch": 21.43080625752106, "grad_norm": Infinity, "learning_rate": 0.00017913716751642142, "loss": 11.2766, "step": 178090 }, { "epoch": 21.432009626955477, "grad_norm": Infinity, "learning_rate": 0.0001791348417746494, "loss": 11.2385, "step": 178100 }, { "epoch": 21.43321299638989, "grad_norm": Infinity, "learning_rate": 0.0001791325159183497, "loss": 11.2543, "step": 178110 }, { "epoch": 21.43441636582431, "grad_norm": Infinity, "learning_rate": 0.00017913018994752573, "loss": 11.0706, "step": 178120 }, { "epoch": 21.435619735258726, "grad_norm": Infinity, "learning_rate": 0.0001791278638621808, "loss": 11.1407, "step": 178130 }, { "epoch": 21.43682310469314, "grad_norm": Infinity, "learning_rate": 0.00017912553766231837, "loss": 11.1637, "step": 178140 }, { "epoch": 21.438026474127557, "grad_norm": Infinity, "learning_rate": 0.00017912321134794172, "loss": 11.1057, "step": 178150 }, { "epoch": 21.439229843561975, "grad_norm": Infinity, "learning_rate": 0.00017912088491905425, "loss": 11.1744, "step": 178160 }, { "epoch": 21.44043321299639, "grad_norm": Infinity, "learning_rate": 0.00017911855837565935, "loss": 11.0698, "step": 178170 }, { "epoch": 21.441636582430807, "grad_norm": Infinity, "learning_rate": 0.00017911623171776032, "loss": 11.0919, "step": 178180 }, { "epoch": 21.442839951865224, "grad_norm": Infinity, "learning_rate": 0.00017911390494536062, "loss": 11.1396, "step": 178190 }, { "epoch": 21.444043321299638, "grad_norm": Infinity, "learning_rate": 0.00017911157805846354, "loss": 11.069, "step": 178200 }, { "epoch": 21.445246690734056, "grad_norm": Infinity, "learning_rate": 0.00017910925105707248, "loss": 11.2572, "step": 178210 }, { "epoch": 21.446450060168473, "grad_norm": Infinity, "learning_rate": 0.00017910692394119078, "loss": 11.1759, "step": 178220 }, { "epoch": 21.447653429602887, "grad_norm": Infinity, "learning_rate": 0.00017910459671082184, "loss": 11.1208, "step": 178230 }, { "epoch": 21.448856799037305, "grad_norm": Infinity, "learning_rate": 0.00017910226936596906, "loss": 11.133, "step": 178240 }, { "epoch": 21.450060168471722, "grad_norm": Infinity, "learning_rate": 0.00017909994190663572, "loss": 11.1646, "step": 178250 }, { "epoch": 21.451263537906136, "grad_norm": Infinity, "learning_rate": 0.00017909761433282525, "loss": 11.1254, "step": 178260 }, { "epoch": 21.452466907340554, "grad_norm": Infinity, "learning_rate": 0.000179095286644541, "loss": 11.0085, "step": 178270 }, { "epoch": 21.45367027677497, "grad_norm": Infinity, "learning_rate": 0.00017909295884178635, "loss": 11.1745, "step": 178280 }, { "epoch": 21.454873646209386, "grad_norm": Infinity, "learning_rate": 0.00017909063092456465, "loss": 11.0254, "step": 178290 }, { "epoch": 21.456077015643803, "grad_norm": Infinity, "learning_rate": 0.0001790883028928793, "loss": 11.1441, "step": 178300 }, { "epoch": 21.45728038507822, "grad_norm": Infinity, "learning_rate": 0.00017908597474673363, "loss": 10.9994, "step": 178310 }, { "epoch": 21.458483754512635, "grad_norm": Infinity, "learning_rate": 0.00017908364648613106, "loss": 11.1418, "step": 178320 }, { "epoch": 21.459687123947052, "grad_norm": Infinity, "learning_rate": 0.00017908131811107488, "loss": 11.0872, "step": 178330 }, { "epoch": 21.460890493381466, "grad_norm": Infinity, "learning_rate": 0.0001790789896215685, "loss": 10.9967, "step": 178340 }, { "epoch": 21.462093862815884, "grad_norm": Infinity, "learning_rate": 0.00017907666101761536, "loss": 11.186, "step": 178350 }, { "epoch": 21.4632972322503, "grad_norm": Infinity, "learning_rate": 0.00017907433229921873, "loss": 11.1498, "step": 178360 }, { "epoch": 21.464500601684716, "grad_norm": Infinity, "learning_rate": 0.00017907200346638203, "loss": 11.098, "step": 178370 }, { "epoch": 21.465703971119133, "grad_norm": Infinity, "learning_rate": 0.00017906967451910858, "loss": 11.1497, "step": 178380 }, { "epoch": 21.46690734055355, "grad_norm": Infinity, "learning_rate": 0.00017906734545740185, "loss": 11.0304, "step": 178390 }, { "epoch": 21.468110709987965, "grad_norm": Infinity, "learning_rate": 0.00017906501628126513, "loss": 11.1167, "step": 178400 }, { "epoch": 21.469314079422382, "grad_norm": Infinity, "learning_rate": 0.0001790626869907018, "loss": 11.2074, "step": 178410 }, { "epoch": 21.4705174488568, "grad_norm": Infinity, "learning_rate": 0.00017906035758571525, "loss": 11.1508, "step": 178420 }, { "epoch": 21.471720818291214, "grad_norm": Infinity, "learning_rate": 0.00017905802806630883, "loss": 11.1496, "step": 178430 }, { "epoch": 21.47292418772563, "grad_norm": Infinity, "learning_rate": 0.00017905569843248596, "loss": 11.1121, "step": 178440 }, { "epoch": 21.47412755716005, "grad_norm": Infinity, "learning_rate": 0.00017905336868424994, "loss": 11.1338, "step": 178450 }, { "epoch": 21.475330926594463, "grad_norm": Infinity, "learning_rate": 0.0001790510388216042, "loss": 11.0631, "step": 178460 }, { "epoch": 21.47653429602888, "grad_norm": Infinity, "learning_rate": 0.00017904870884455205, "loss": 11.1566, "step": 178470 }, { "epoch": 21.477737665463298, "grad_norm": Infinity, "learning_rate": 0.00017904637875309693, "loss": 11.1467, "step": 178480 }, { "epoch": 21.478941034897712, "grad_norm": Infinity, "learning_rate": 0.00017904404854724218, "loss": 11.0822, "step": 178490 }, { "epoch": 21.48014440433213, "grad_norm": Infinity, "learning_rate": 0.00017904171822699118, "loss": 11.1704, "step": 178500 }, { "epoch": 21.481347773766547, "grad_norm": Infinity, "learning_rate": 0.0001790393877923473, "loss": 11.1729, "step": 178510 }, { "epoch": 21.48255114320096, "grad_norm": Infinity, "learning_rate": 0.0001790370572433139, "loss": 11.1836, "step": 178520 }, { "epoch": 21.48375451263538, "grad_norm": Infinity, "learning_rate": 0.0001790347265798944, "loss": 11.2747, "step": 178530 }, { "epoch": 21.484957882069796, "grad_norm": Infinity, "learning_rate": 0.0001790323958020921, "loss": 11.1204, "step": 178540 }, { "epoch": 21.48616125150421, "grad_norm": Infinity, "learning_rate": 0.0001790300649099104, "loss": 11.1644, "step": 178550 }, { "epoch": 21.487364620938628, "grad_norm": Infinity, "learning_rate": 0.00017902773390335273, "loss": 11.1467, "step": 178560 }, { "epoch": 21.488567990373046, "grad_norm": Infinity, "learning_rate": 0.00017902540278242238, "loss": 11.1352, "step": 178570 }, { "epoch": 21.48977135980746, "grad_norm": Infinity, "learning_rate": 0.0001790230715471228, "loss": 11.1543, "step": 178580 }, { "epoch": 21.490974729241877, "grad_norm": Infinity, "learning_rate": 0.00017902074019745729, "loss": 11.1715, "step": 178590 }, { "epoch": 21.492178098676295, "grad_norm": Infinity, "learning_rate": 0.0001790184087334293, "loss": 11.1144, "step": 178600 }, { "epoch": 21.49338146811071, "grad_norm": Infinity, "learning_rate": 0.00017901607715504212, "loss": 11.1829, "step": 178610 }, { "epoch": 21.494584837545126, "grad_norm": Infinity, "learning_rate": 0.00017901374546229918, "loss": 11.2078, "step": 178620 }, { "epoch": 21.495788206979544, "grad_norm": Infinity, "learning_rate": 0.0001790114136552038, "loss": 11.0871, "step": 178630 }, { "epoch": 21.496991576413958, "grad_norm": Infinity, "learning_rate": 0.00017900908173375948, "loss": 11.0597, "step": 178640 }, { "epoch": 21.498194945848375, "grad_norm": Infinity, "learning_rate": 0.00017900674969796948, "loss": 11.1074, "step": 178650 }, { "epoch": 21.499398315282793, "grad_norm": Infinity, "learning_rate": 0.00017900441754783718, "loss": 11.1345, "step": 178660 }, { "epoch": 21.500601684717207, "grad_norm": Infinity, "learning_rate": 0.00017900208528336603, "loss": 11.2014, "step": 178670 }, { "epoch": 21.501805054151625, "grad_norm": Infinity, "learning_rate": 0.00017899975290455933, "loss": 11.1527, "step": 178680 }, { "epoch": 21.503008423586042, "grad_norm": Infinity, "learning_rate": 0.00017899742041142046, "loss": 11.2237, "step": 178690 }, { "epoch": 21.504211793020456, "grad_norm": Infinity, "learning_rate": 0.00017899508780395287, "loss": 11.1291, "step": 178700 }, { "epoch": 21.505415162454874, "grad_norm": Infinity, "learning_rate": 0.00017899275508215985, "loss": 11.0648, "step": 178710 }, { "epoch": 21.50661853188929, "grad_norm": Infinity, "learning_rate": 0.00017899042224604484, "loss": 11.0635, "step": 178720 }, { "epoch": 21.507821901323705, "grad_norm": Infinity, "learning_rate": 0.00017898808929561115, "loss": 11.1899, "step": 178730 }, { "epoch": 21.509025270758123, "grad_norm": Infinity, "learning_rate": 0.0001789857562308622, "loss": 11.1596, "step": 178740 }, { "epoch": 21.51022864019254, "grad_norm": Infinity, "learning_rate": 0.00017898342305180134, "loss": 11.1185, "step": 178750 }, { "epoch": 21.511432009626954, "grad_norm": Infinity, "learning_rate": 0.000178981089758432, "loss": 11.0803, "step": 178760 }, { "epoch": 21.512635379061372, "grad_norm": Infinity, "learning_rate": 0.00017897875635075754, "loss": 11.0858, "step": 178770 }, { "epoch": 21.51383874849579, "grad_norm": Infinity, "learning_rate": 0.0001789764228287813, "loss": 11.2377, "step": 178780 }, { "epoch": 21.515042117930204, "grad_norm": Infinity, "learning_rate": 0.0001789740891925067, "loss": 11.1412, "step": 178790 }, { "epoch": 21.51624548736462, "grad_norm": Infinity, "learning_rate": 0.00017897175544193705, "loss": 11.0815, "step": 178800 }, { "epoch": 21.51744885679904, "grad_norm": Infinity, "learning_rate": 0.00017896942157707578, "loss": 11.1827, "step": 178810 }, { "epoch": 21.518652226233453, "grad_norm": Infinity, "learning_rate": 0.0001789670875979263, "loss": 11.2028, "step": 178820 }, { "epoch": 21.51985559566787, "grad_norm": Infinity, "learning_rate": 0.00017896475350449192, "loss": 11.1479, "step": 178830 }, { "epoch": 21.521058965102288, "grad_norm": Infinity, "learning_rate": 0.00017896241929677605, "loss": 11.0077, "step": 178840 }, { "epoch": 21.522262334536702, "grad_norm": Infinity, "learning_rate": 0.00017896008497478205, "loss": 11.1626, "step": 178850 }, { "epoch": 21.52346570397112, "grad_norm": Infinity, "learning_rate": 0.00017895775053851333, "loss": 11.1765, "step": 178860 }, { "epoch": 21.524669073405537, "grad_norm": Infinity, "learning_rate": 0.00017895541598797322, "loss": 11.1911, "step": 178870 }, { "epoch": 21.52587244283995, "grad_norm": Infinity, "learning_rate": 0.00017895308132316517, "loss": 11.2009, "step": 178880 }, { "epoch": 21.52707581227437, "grad_norm": Infinity, "learning_rate": 0.0001789507465440925, "loss": 11.1698, "step": 178890 }, { "epoch": 21.528279181708786, "grad_norm": Infinity, "learning_rate": 0.00017894841165075863, "loss": 11.1596, "step": 178900 }, { "epoch": 21.5294825511432, "grad_norm": Infinity, "learning_rate": 0.0001789460766431669, "loss": 11.2234, "step": 178910 }, { "epoch": 21.530685920577618, "grad_norm": Infinity, "learning_rate": 0.0001789437415213207, "loss": 11.236, "step": 178920 }, { "epoch": 21.531889290012035, "grad_norm": Infinity, "learning_rate": 0.0001789414062852234, "loss": 11.016, "step": 178930 }, { "epoch": 21.53309265944645, "grad_norm": Infinity, "learning_rate": 0.00017893907093487842, "loss": 11.1852, "step": 178940 }, { "epoch": 21.534296028880867, "grad_norm": Infinity, "learning_rate": 0.00017893673547028912, "loss": 11.1889, "step": 178950 }, { "epoch": 21.535499398315284, "grad_norm": Infinity, "learning_rate": 0.00017893439989145885, "loss": 11.1744, "step": 178960 }, { "epoch": 21.5367027677497, "grad_norm": Infinity, "learning_rate": 0.00017893206419839102, "loss": 11.0609, "step": 178970 }, { "epoch": 21.537906137184116, "grad_norm": Infinity, "learning_rate": 0.00017892972839108904, "loss": 11.164, "step": 178980 }, { "epoch": 21.53910950661853, "grad_norm": Infinity, "learning_rate": 0.0001789273924695562, "loss": 11.1089, "step": 178990 }, { "epoch": 21.540312876052948, "grad_norm": Infinity, "learning_rate": 0.00017892505643379595, "loss": 11.0832, "step": 179000 }, { "epoch": 21.541516245487365, "grad_norm": Infinity, "learning_rate": 0.00017892272028381169, "loss": 11.1279, "step": 179010 }, { "epoch": 21.54271961492178, "grad_norm": Infinity, "learning_rate": 0.00017892038401960675, "loss": 11.1563, "step": 179020 }, { "epoch": 21.543922984356197, "grad_norm": Infinity, "learning_rate": 0.00017891804764118453, "loss": 11.0521, "step": 179030 }, { "epoch": 21.545126353790614, "grad_norm": Infinity, "learning_rate": 0.00017891571114854842, "loss": 11.22, "step": 179040 }, { "epoch": 21.54632972322503, "grad_norm": Infinity, "learning_rate": 0.00017891337454170177, "loss": 11.2028, "step": 179050 }, { "epoch": 21.547533092659446, "grad_norm": Infinity, "learning_rate": 0.000178911037820648, "loss": 11.1106, "step": 179060 }, { "epoch": 21.548736462093864, "grad_norm": Infinity, "learning_rate": 0.00017890870098539048, "loss": 11.2388, "step": 179070 }, { "epoch": 21.549939831528278, "grad_norm": Infinity, "learning_rate": 0.00017890636403593258, "loss": 11.2003, "step": 179080 }, { "epoch": 21.551143200962695, "grad_norm": Infinity, "learning_rate": 0.0001789040269722777, "loss": 11.1938, "step": 179090 }, { "epoch": 21.552346570397113, "grad_norm": Infinity, "learning_rate": 0.0001789016897944292, "loss": 11.1349, "step": 179100 }, { "epoch": 21.553549939831527, "grad_norm": Infinity, "learning_rate": 0.0001788993525023905, "loss": 11.1204, "step": 179110 }, { "epoch": 21.554753309265944, "grad_norm": Infinity, "learning_rate": 0.00017889701509616492, "loss": 11.0467, "step": 179120 }, { "epoch": 21.555956678700362, "grad_norm": Infinity, "learning_rate": 0.0001788946775757559, "loss": 11.1042, "step": 179130 }, { "epoch": 21.557160048134776, "grad_norm": Infinity, "learning_rate": 0.0001788923399411668, "loss": 11.2238, "step": 179140 }, { "epoch": 21.558363417569193, "grad_norm": Infinity, "learning_rate": 0.000178890002192401, "loss": 11.1167, "step": 179150 }, { "epoch": 21.55956678700361, "grad_norm": Infinity, "learning_rate": 0.0001788876643294619, "loss": 11.19, "step": 179160 }, { "epoch": 21.560770156438025, "grad_norm": Infinity, "learning_rate": 0.00017888532635235288, "loss": 11.1259, "step": 179170 }, { "epoch": 21.561973525872443, "grad_norm": Infinity, "learning_rate": 0.0001788829882610773, "loss": 11.1525, "step": 179180 }, { "epoch": 21.56317689530686, "grad_norm": Infinity, "learning_rate": 0.00017888065005563854, "loss": 11.1196, "step": 179190 }, { "epoch": 21.564380264741274, "grad_norm": Infinity, "learning_rate": 0.00017887831173604005, "loss": 11.2967, "step": 179200 }, { "epoch": 21.56558363417569, "grad_norm": Infinity, "learning_rate": 0.00017887597330228517, "loss": 11.0438, "step": 179210 }, { "epoch": 21.56678700361011, "grad_norm": Infinity, "learning_rate": 0.00017887363475437723, "loss": 11.1433, "step": 179220 }, { "epoch": 21.567990373044523, "grad_norm": Infinity, "learning_rate": 0.0001788712960923197, "loss": 11.162, "step": 179230 }, { "epoch": 21.56919374247894, "grad_norm": Infinity, "learning_rate": 0.00017886895731611591, "loss": 11.1669, "step": 179240 }, { "epoch": 21.57039711191336, "grad_norm": Infinity, "learning_rate": 0.0001788666184257693, "loss": 11.0664, "step": 179250 }, { "epoch": 21.571600481347772, "grad_norm": Infinity, "learning_rate": 0.00017886427942128324, "loss": 11.0326, "step": 179260 }, { "epoch": 21.57280385078219, "grad_norm": Infinity, "learning_rate": 0.00017886194030266103, "loss": 11.1657, "step": 179270 }, { "epoch": 21.574007220216608, "grad_norm": Infinity, "learning_rate": 0.00017885960106990619, "loss": 11.0824, "step": 179280 }, { "epoch": 21.57521058965102, "grad_norm": Infinity, "learning_rate": 0.00017885726172302198, "loss": 11.1467, "step": 179290 }, { "epoch": 21.57641395908544, "grad_norm": Infinity, "learning_rate": 0.00017885492226201187, "loss": 11.1381, "step": 179300 }, { "epoch": 21.577617328519857, "grad_norm": Infinity, "learning_rate": 0.0001788525826868792, "loss": 11.1116, "step": 179310 }, { "epoch": 21.57882069795427, "grad_norm": Infinity, "learning_rate": 0.0001788502429976274, "loss": 11.1916, "step": 179320 }, { "epoch": 21.58002406738869, "grad_norm": Infinity, "learning_rate": 0.00017884790319425982, "loss": 11.2377, "step": 179330 }, { "epoch": 21.581227436823106, "grad_norm": Infinity, "learning_rate": 0.00017884556327677986, "loss": 11.0818, "step": 179340 }, { "epoch": 21.58243080625752, "grad_norm": Infinity, "learning_rate": 0.00017884322324519092, "loss": 11.16, "step": 179350 }, { "epoch": 21.583634175691937, "grad_norm": Infinity, "learning_rate": 0.00017884088309949635, "loss": 11.1542, "step": 179360 }, { "epoch": 21.584837545126355, "grad_norm": Infinity, "learning_rate": 0.00017883854283969955, "loss": 11.1207, "step": 179370 }, { "epoch": 21.58604091456077, "grad_norm": Infinity, "learning_rate": 0.00017883620246580392, "loss": 11.012, "step": 179380 }, { "epoch": 21.587244283995187, "grad_norm": Infinity, "learning_rate": 0.00017883386197781284, "loss": 11.0529, "step": 179390 }, { "epoch": 21.588447653429604, "grad_norm": Infinity, "learning_rate": 0.0001788315213757297, "loss": 11.2252, "step": 179400 }, { "epoch": 21.589651022864018, "grad_norm": Infinity, "learning_rate": 0.0001788291806595579, "loss": 11.1763, "step": 179410 }, { "epoch": 21.590854392298436, "grad_norm": Infinity, "learning_rate": 0.0001788268398293008, "loss": 11.0922, "step": 179420 }, { "epoch": 21.592057761732853, "grad_norm": Infinity, "learning_rate": 0.0001788244988849618, "loss": 11.1546, "step": 179430 }, { "epoch": 21.593261131167267, "grad_norm": Infinity, "learning_rate": 0.0001788221578265443, "loss": 11.1087, "step": 179440 }, { "epoch": 21.594464500601685, "grad_norm": Infinity, "learning_rate": 0.00017881981665405166, "loss": 11.189, "step": 179450 }, { "epoch": 21.595667870036102, "grad_norm": Infinity, "learning_rate": 0.0001788174753674873, "loss": 10.9882, "step": 179460 }, { "epoch": 21.596871239470516, "grad_norm": Infinity, "learning_rate": 0.0001788151339668546, "loss": 11.1569, "step": 179470 }, { "epoch": 21.598074608904934, "grad_norm": Infinity, "learning_rate": 0.0001788127924521569, "loss": 11.1781, "step": 179480 }, { "epoch": 21.59927797833935, "grad_norm": Infinity, "learning_rate": 0.0001788104508233977, "loss": 11.0443, "step": 179490 }, { "epoch": 21.600481347773766, "grad_norm": Infinity, "learning_rate": 0.00017880810908058026, "loss": 11.0961, "step": 179500 }, { "epoch": 21.601684717208183, "grad_norm": Infinity, "learning_rate": 0.00017880576722370805, "loss": 11.0358, "step": 179510 }, { "epoch": 21.6028880866426, "grad_norm": Infinity, "learning_rate": 0.00017880342525278443, "loss": 11.198, "step": 179520 }, { "epoch": 21.604091456077015, "grad_norm": Infinity, "learning_rate": 0.0001788010831678128, "loss": 11.1522, "step": 179530 }, { "epoch": 21.605294825511432, "grad_norm": Infinity, "learning_rate": 0.00017879874096879655, "loss": 11.218, "step": 179540 }, { "epoch": 21.60649819494585, "grad_norm": Infinity, "learning_rate": 0.0001787963986557391, "loss": 11.1059, "step": 179550 }, { "epoch": 21.607701564380264, "grad_norm": Infinity, "learning_rate": 0.00017879405622864378, "loss": 11.0864, "step": 179560 }, { "epoch": 21.60890493381468, "grad_norm": Infinity, "learning_rate": 0.00017879171368751398, "loss": 11.1138, "step": 179570 }, { "epoch": 21.6101083032491, "grad_norm": Infinity, "learning_rate": 0.00017878937103235317, "loss": 11.0421, "step": 179580 }, { "epoch": 21.611311672683513, "grad_norm": Infinity, "learning_rate": 0.00017878702826316464, "loss": 11.164, "step": 179590 }, { "epoch": 21.61251504211793, "grad_norm": Infinity, "learning_rate": 0.00017878468537995185, "loss": 11.1227, "step": 179600 }, { "epoch": 21.613718411552348, "grad_norm": Infinity, "learning_rate": 0.00017878234238271818, "loss": 11.12, "step": 179610 }, { "epoch": 21.614921780986762, "grad_norm": Infinity, "learning_rate": 0.00017877999927146698, "loss": 11.1591, "step": 179620 }, { "epoch": 21.61612515042118, "grad_norm": Infinity, "learning_rate": 0.00017877765604620168, "loss": 11.1408, "step": 179630 }, { "epoch": 21.617328519855597, "grad_norm": Infinity, "learning_rate": 0.0001787753127069257, "loss": 11.1705, "step": 179640 }, { "epoch": 21.61853188929001, "grad_norm": Infinity, "learning_rate": 0.00017877296925364236, "loss": 11.032, "step": 179650 }, { "epoch": 21.61973525872443, "grad_norm": Infinity, "learning_rate": 0.00017877062568635507, "loss": 11.2358, "step": 179660 }, { "epoch": 21.620938628158846, "grad_norm": Infinity, "learning_rate": 0.00017876828200506727, "loss": 11.1123, "step": 179670 }, { "epoch": 21.62214199759326, "grad_norm": Infinity, "learning_rate": 0.0001787659382097823, "loss": 11.2215, "step": 179680 }, { "epoch": 21.623345367027678, "grad_norm": Infinity, "learning_rate": 0.00017876359430050358, "loss": 11.1523, "step": 179690 }, { "epoch": 21.624548736462096, "grad_norm": Infinity, "learning_rate": 0.0001787612502772345, "loss": 11.1506, "step": 179700 }, { "epoch": 21.62575210589651, "grad_norm": Infinity, "learning_rate": 0.0001787589061399784, "loss": 11.1127, "step": 179710 }, { "epoch": 21.626955475330927, "grad_norm": Infinity, "learning_rate": 0.00017875656188873874, "loss": 11.0368, "step": 179720 }, { "epoch": 21.628158844765345, "grad_norm": Infinity, "learning_rate": 0.00017875421752351894, "loss": 11.042, "step": 179730 }, { "epoch": 21.62936221419976, "grad_norm": Infinity, "learning_rate": 0.00017875187304432227, "loss": 11.2149, "step": 179740 }, { "epoch": 21.630565583634176, "grad_norm": Infinity, "learning_rate": 0.00017874952845115224, "loss": 11.1029, "step": 179750 }, { "epoch": 21.63176895306859, "grad_norm": Infinity, "learning_rate": 0.0001787471837440122, "loss": 11.1455, "step": 179760 }, { "epoch": 21.632972322503008, "grad_norm": Infinity, "learning_rate": 0.00017874483892290553, "loss": 11.1065, "step": 179770 }, { "epoch": 21.634175691937426, "grad_norm": Infinity, "learning_rate": 0.00017874249398783565, "loss": 11.0886, "step": 179780 }, { "epoch": 21.63537906137184, "grad_norm": Infinity, "learning_rate": 0.00017874014893880594, "loss": 11.2223, "step": 179790 }, { "epoch": 21.636582430806257, "grad_norm": Infinity, "learning_rate": 0.0001787378037758198, "loss": 11.0646, "step": 179800 }, { "epoch": 21.637785800240675, "grad_norm": Infinity, "learning_rate": 0.0001787354584988806, "loss": 11.1551, "step": 179810 }, { "epoch": 21.63898916967509, "grad_norm": Infinity, "learning_rate": 0.00017873311310799173, "loss": 11.2092, "step": 179820 }, { "epoch": 21.640192539109506, "grad_norm": Infinity, "learning_rate": 0.00017873076760315665, "loss": 11.1924, "step": 179830 }, { "epoch": 21.641395908543924, "grad_norm": Infinity, "learning_rate": 0.0001787284219843787, "loss": 11.1122, "step": 179840 }, { "epoch": 21.642599277978338, "grad_norm": Infinity, "learning_rate": 0.00017872607625166124, "loss": 11.2104, "step": 179850 }, { "epoch": 21.643802647412755, "grad_norm": Infinity, "learning_rate": 0.0001787237304050078, "loss": 11.1106, "step": 179860 }, { "epoch": 21.645006016847173, "grad_norm": Infinity, "learning_rate": 0.0001787213844444216, "loss": 11.1625, "step": 179870 }, { "epoch": 21.646209386281587, "grad_norm": Infinity, "learning_rate": 0.00017871903836990616, "loss": 11.2445, "step": 179880 }, { "epoch": 21.647412755716005, "grad_norm": Infinity, "learning_rate": 0.00017871669218146483, "loss": 11.0936, "step": 179890 }, { "epoch": 21.648616125150422, "grad_norm": Infinity, "learning_rate": 0.000178714345879101, "loss": 11.0613, "step": 179900 }, { "epoch": 21.649819494584836, "grad_norm": Infinity, "learning_rate": 0.0001787119994628181, "loss": 11.2723, "step": 179910 }, { "epoch": 21.651022864019254, "grad_norm": Infinity, "learning_rate": 0.0001787096529326195, "loss": 11.2057, "step": 179920 }, { "epoch": 21.65222623345367, "grad_norm": Infinity, "learning_rate": 0.0001787073062885086, "loss": 11.0551, "step": 179930 }, { "epoch": 21.653429602888085, "grad_norm": Infinity, "learning_rate": 0.0001787049595304888, "loss": 11.1519, "step": 179940 }, { "epoch": 21.654632972322503, "grad_norm": Infinity, "learning_rate": 0.0001787026126585635, "loss": 11.2491, "step": 179950 }, { "epoch": 21.65583634175692, "grad_norm": Infinity, "learning_rate": 0.00017870026567273607, "loss": 11.0742, "step": 179960 }, { "epoch": 21.657039711191334, "grad_norm": Infinity, "learning_rate": 0.0001786979185730099, "loss": 11.0986, "step": 179970 }, { "epoch": 21.658243080625752, "grad_norm": Infinity, "learning_rate": 0.00017869557135938844, "loss": 11.2278, "step": 179980 }, { "epoch": 21.65944645006017, "grad_norm": Infinity, "learning_rate": 0.00017869322403187506, "loss": 11.2291, "step": 179990 }, { "epoch": 21.660649819494584, "grad_norm": Infinity, "learning_rate": 0.00017869087659047315, "loss": 11.0964, "step": 180000 }, { "epoch": 21.661853188929, "grad_norm": Infinity, "learning_rate": 0.00017868852903518614, "loss": 11.2089, "step": 180010 }, { "epoch": 21.66305655836342, "grad_norm": Infinity, "learning_rate": 0.00017868618136601737, "loss": 11.0344, "step": 180020 }, { "epoch": 21.664259927797833, "grad_norm": Infinity, "learning_rate": 0.00017868383358297027, "loss": 11.1704, "step": 180030 }, { "epoch": 21.66546329723225, "grad_norm": Infinity, "learning_rate": 0.00017868148568604824, "loss": 11.1504, "step": 180040 }, { "epoch": 21.666666666666668, "grad_norm": Infinity, "learning_rate": 0.00017867913767525472, "loss": 11.1858, "step": 180050 }, { "epoch": 21.667870036101082, "grad_norm": Infinity, "learning_rate": 0.000178676789550593, "loss": 11.2171, "step": 180060 }, { "epoch": 21.6690734055355, "grad_norm": Infinity, "learning_rate": 0.00017867444131206654, "loss": 11.188, "step": 180070 }, { "epoch": 21.670276774969917, "grad_norm": Infinity, "learning_rate": 0.00017867209295967877, "loss": 11.1513, "step": 180080 }, { "epoch": 21.67148014440433, "grad_norm": Infinity, "learning_rate": 0.00017866974449343305, "loss": 11.108, "step": 180090 }, { "epoch": 21.67268351383875, "grad_norm": Infinity, "learning_rate": 0.0001786673959133328, "loss": 11.2216, "step": 180100 }, { "epoch": 21.673886883273166, "grad_norm": Infinity, "learning_rate": 0.00017866504721938137, "loss": 11.1627, "step": 180110 }, { "epoch": 21.67509025270758, "grad_norm": Infinity, "learning_rate": 0.00017866269841158223, "loss": 11.0329, "step": 180120 }, { "epoch": 21.676293622141998, "grad_norm": Infinity, "learning_rate": 0.00017866034948993873, "loss": 11.1457, "step": 180130 }, { "epoch": 21.677496991576415, "grad_norm": Infinity, "learning_rate": 0.0001786580004544543, "loss": 11.1479, "step": 180140 }, { "epoch": 21.67870036101083, "grad_norm": Infinity, "learning_rate": 0.00017865565130513228, "loss": 11.1805, "step": 180150 }, { "epoch": 21.679903730445247, "grad_norm": Infinity, "learning_rate": 0.00017865330204197615, "loss": 11.1952, "step": 180160 }, { "epoch": 21.681107099879664, "grad_norm": Infinity, "learning_rate": 0.00017865095266498925, "loss": 10.9904, "step": 180170 }, { "epoch": 21.68231046931408, "grad_norm": Infinity, "learning_rate": 0.00017864860317417502, "loss": 11.1384, "step": 180180 }, { "epoch": 21.683513838748496, "grad_norm": Infinity, "learning_rate": 0.00017864625356953684, "loss": 11.1221, "step": 180190 }, { "epoch": 21.684717208182914, "grad_norm": Infinity, "learning_rate": 0.0001786439038510781, "loss": 11.2118, "step": 180200 }, { "epoch": 21.685920577617328, "grad_norm": Infinity, "learning_rate": 0.00017864155401880224, "loss": 11.0066, "step": 180210 }, { "epoch": 21.687123947051745, "grad_norm": Infinity, "learning_rate": 0.00017863920407271264, "loss": 11.1237, "step": 180220 }, { "epoch": 21.688327316486163, "grad_norm": Infinity, "learning_rate": 0.00017863685401281267, "loss": 11.2164, "step": 180230 }, { "epoch": 21.689530685920577, "grad_norm": Infinity, "learning_rate": 0.00017863450383910577, "loss": 11.2231, "step": 180240 }, { "epoch": 21.690734055354994, "grad_norm": Infinity, "learning_rate": 0.00017863215355159531, "loss": 11.0839, "step": 180250 }, { "epoch": 21.691937424789412, "grad_norm": Infinity, "learning_rate": 0.00017862980315028475, "loss": 11.1832, "step": 180260 }, { "epoch": 21.693140794223826, "grad_norm": Infinity, "learning_rate": 0.00017862745263517744, "loss": 11.1609, "step": 180270 }, { "epoch": 21.694344163658243, "grad_norm": Infinity, "learning_rate": 0.00017862510200627676, "loss": 11.1112, "step": 180280 }, { "epoch": 21.69554753309266, "grad_norm": Infinity, "learning_rate": 0.00017862275126358617, "loss": 11.1118, "step": 180290 }, { "epoch": 21.696750902527075, "grad_norm": Infinity, "learning_rate": 0.00017862040040710905, "loss": 11.1723, "step": 180300 }, { "epoch": 21.697954271961493, "grad_norm": Infinity, "learning_rate": 0.00017861804943684878, "loss": 11.2083, "step": 180310 }, { "epoch": 21.69915764139591, "grad_norm": Infinity, "learning_rate": 0.0001786156983528088, "loss": 11.2532, "step": 180320 }, { "epoch": 21.700361010830324, "grad_norm": Infinity, "learning_rate": 0.0001786133471549925, "loss": 11.1602, "step": 180330 }, { "epoch": 21.70156438026474, "grad_norm": Infinity, "learning_rate": 0.00017861099584340326, "loss": 11.0888, "step": 180340 }, { "epoch": 21.70276774969916, "grad_norm": Infinity, "learning_rate": 0.0001786086444180445, "loss": 11.0899, "step": 180350 }, { "epoch": 21.703971119133573, "grad_norm": Infinity, "learning_rate": 0.00017860629287891965, "loss": 11.0778, "step": 180360 }, { "epoch": 21.70517448856799, "grad_norm": Infinity, "learning_rate": 0.0001786039412260321, "loss": 11.1506, "step": 180370 }, { "epoch": 21.706377858002405, "grad_norm": Infinity, "learning_rate": 0.00017860158945938519, "loss": 11.1819, "step": 180380 }, { "epoch": 21.707581227436823, "grad_norm": Infinity, "learning_rate": 0.00017859923757898238, "loss": 11.1748, "step": 180390 }, { "epoch": 21.70878459687124, "grad_norm": Infinity, "learning_rate": 0.0001785968855848271, "loss": 11.1569, "step": 180400 }, { "epoch": 21.709987966305654, "grad_norm": Infinity, "learning_rate": 0.0001785945334769227, "loss": 11.1512, "step": 180410 }, { "epoch": 21.71119133574007, "grad_norm": Infinity, "learning_rate": 0.0001785921812552726, "loss": 11.1474, "step": 180420 }, { "epoch": 21.71239470517449, "grad_norm": Infinity, "learning_rate": 0.00017858982891988024, "loss": 11.1456, "step": 180430 }, { "epoch": 21.713598074608903, "grad_norm": Infinity, "learning_rate": 0.00017858747647074897, "loss": 11.1321, "step": 180440 }, { "epoch": 21.71480144404332, "grad_norm": Infinity, "learning_rate": 0.00017858512390788223, "loss": 11.2342, "step": 180450 }, { "epoch": 21.71600481347774, "grad_norm": Infinity, "learning_rate": 0.0001785827712312834, "loss": 11.1463, "step": 180460 }, { "epoch": 21.717208182912152, "grad_norm": Infinity, "learning_rate": 0.0001785804184409559, "loss": 11.1173, "step": 180470 }, { "epoch": 21.71841155234657, "grad_norm": Infinity, "learning_rate": 0.00017857806553690314, "loss": 11.1319, "step": 180480 }, { "epoch": 21.719614921780988, "grad_norm": Infinity, "learning_rate": 0.00017857571251912851, "loss": 11.0869, "step": 180490 }, { "epoch": 21.7208182912154, "grad_norm": Infinity, "learning_rate": 0.00017857335938763544, "loss": 11.2537, "step": 180500 }, { "epoch": 21.72202166064982, "grad_norm": Infinity, "learning_rate": 0.00017857100614242731, "loss": 11.2086, "step": 180510 }, { "epoch": 21.723225030084237, "grad_norm": Infinity, "learning_rate": 0.00017856865278350755, "loss": 11.1466, "step": 180520 }, { "epoch": 21.72442839951865, "grad_norm": Infinity, "learning_rate": 0.0001785662993108795, "loss": 11.1545, "step": 180530 }, { "epoch": 21.72563176895307, "grad_norm": Infinity, "learning_rate": 0.00017856394572454665, "loss": 11.1046, "step": 180540 }, { "epoch": 21.726835138387486, "grad_norm": Infinity, "learning_rate": 0.0001785615920245124, "loss": 11.1605, "step": 180550 }, { "epoch": 21.7280385078219, "grad_norm": Infinity, "learning_rate": 0.00017855923821078013, "loss": 11.172, "step": 180560 }, { "epoch": 21.729241877256317, "grad_norm": Infinity, "learning_rate": 0.0001785568842833532, "loss": 11.0667, "step": 180570 }, { "epoch": 21.730445246690735, "grad_norm": Infinity, "learning_rate": 0.00017855453024223511, "loss": 11.1737, "step": 180580 }, { "epoch": 21.73164861612515, "grad_norm": Infinity, "learning_rate": 0.0001785521760874292, "loss": 11.0979, "step": 180590 }, { "epoch": 21.732851985559567, "grad_norm": Infinity, "learning_rate": 0.0001785498218189389, "loss": 11.164, "step": 180600 }, { "epoch": 21.734055354993984, "grad_norm": Infinity, "learning_rate": 0.00017854746743676763, "loss": 11.1741, "step": 180610 }, { "epoch": 21.735258724428398, "grad_norm": Infinity, "learning_rate": 0.00017854511294091875, "loss": 11.1297, "step": 180620 }, { "epoch": 21.736462093862816, "grad_norm": Infinity, "learning_rate": 0.0001785427583313957, "loss": 11.1815, "step": 180630 }, { "epoch": 21.737665463297233, "grad_norm": Infinity, "learning_rate": 0.0001785404036082019, "loss": 11.1617, "step": 180640 }, { "epoch": 21.738868832731647, "grad_norm": Infinity, "learning_rate": 0.00017853804877134076, "loss": 11.2375, "step": 180650 }, { "epoch": 21.740072202166065, "grad_norm": Infinity, "learning_rate": 0.00017853569382081564, "loss": 11.1206, "step": 180660 }, { "epoch": 21.741275571600482, "grad_norm": Infinity, "learning_rate": 0.00017853333875663, "loss": 11.2529, "step": 180670 }, { "epoch": 21.742478941034896, "grad_norm": Infinity, "learning_rate": 0.00017853098357878725, "loss": 11.0324, "step": 180680 }, { "epoch": 21.743682310469314, "grad_norm": Infinity, "learning_rate": 0.00017852862828729075, "loss": 11.1769, "step": 180690 }, { "epoch": 21.74488567990373, "grad_norm": Infinity, "learning_rate": 0.00017852627288214397, "loss": 11.1054, "step": 180700 }, { "epoch": 21.746089049338146, "grad_norm": Infinity, "learning_rate": 0.00017852391736335025, "loss": 11.1726, "step": 180710 }, { "epoch": 21.747292418772563, "grad_norm": Infinity, "learning_rate": 0.00017852156173091304, "loss": 11.2157, "step": 180720 }, { "epoch": 21.74849578820698, "grad_norm": Infinity, "learning_rate": 0.00017851920598483575, "loss": 11.1392, "step": 180730 }, { "epoch": 21.749699157641395, "grad_norm": Infinity, "learning_rate": 0.00017851685012512178, "loss": 11.2039, "step": 180740 }, { "epoch": 21.750902527075812, "grad_norm": Infinity, "learning_rate": 0.00017851449415177455, "loss": 11.1707, "step": 180750 }, { "epoch": 21.75210589651023, "grad_norm": Infinity, "learning_rate": 0.00017851213806479744, "loss": 11.0881, "step": 180760 }, { "epoch": 21.753309265944644, "grad_norm": Infinity, "learning_rate": 0.00017850978186419392, "loss": 11.0546, "step": 180770 }, { "epoch": 21.75451263537906, "grad_norm": Infinity, "learning_rate": 0.00017850742554996734, "loss": 11.163, "step": 180780 }, { "epoch": 21.75571600481348, "grad_norm": Infinity, "learning_rate": 0.0001785050691221211, "loss": 11.1368, "step": 180790 }, { "epoch": 21.756919374247893, "grad_norm": Infinity, "learning_rate": 0.00017850271258065868, "loss": 11.1986, "step": 180800 }, { "epoch": 21.75812274368231, "grad_norm": Infinity, "learning_rate": 0.00017850035592558347, "loss": 11.121, "step": 180810 }, { "epoch": 21.759326113116728, "grad_norm": Infinity, "learning_rate": 0.00017849799915689886, "loss": 11.0966, "step": 180820 }, { "epoch": 21.760529482551142, "grad_norm": Infinity, "learning_rate": 0.00017849564227460823, "loss": 11.1748, "step": 180830 }, { "epoch": 21.76173285198556, "grad_norm": Infinity, "learning_rate": 0.00017849328527871504, "loss": 11.2282, "step": 180840 }, { "epoch": 21.762936221419977, "grad_norm": Infinity, "learning_rate": 0.0001784909281692227, "loss": 11.1932, "step": 180850 }, { "epoch": 21.76413959085439, "grad_norm": Infinity, "learning_rate": 0.00017848857094613458, "loss": 11.1459, "step": 180860 }, { "epoch": 21.76534296028881, "grad_norm": Infinity, "learning_rate": 0.00017848621360945415, "loss": 11.2191, "step": 180870 }, { "epoch": 21.766546329723226, "grad_norm": Infinity, "learning_rate": 0.00017848385615918479, "loss": 10.993, "step": 180880 }, { "epoch": 21.76774969915764, "grad_norm": Infinity, "learning_rate": 0.0001784814985953299, "loss": 11.1873, "step": 180890 }, { "epoch": 21.768953068592058, "grad_norm": Infinity, "learning_rate": 0.0001784791409178929, "loss": 11.1758, "step": 180900 }, { "epoch": 21.770156438026476, "grad_norm": Infinity, "learning_rate": 0.0001784767831268772, "loss": 11.2121, "step": 180910 }, { "epoch": 21.77135980746089, "grad_norm": Infinity, "learning_rate": 0.00017847442522228626, "loss": 11.1558, "step": 180920 }, { "epoch": 21.772563176895307, "grad_norm": Infinity, "learning_rate": 0.00017847206720412345, "loss": 11.159, "step": 180930 }, { "epoch": 21.773766546329725, "grad_norm": Infinity, "learning_rate": 0.00017846970907239214, "loss": 11.1322, "step": 180940 }, { "epoch": 21.77496991576414, "grad_norm": Infinity, "learning_rate": 0.0001784673508270958, "loss": 11.047, "step": 180950 }, { "epoch": 21.776173285198556, "grad_norm": Infinity, "learning_rate": 0.00017846499246823786, "loss": 11.1279, "step": 180960 }, { "epoch": 21.777376654632974, "grad_norm": Infinity, "learning_rate": 0.00017846263399582166, "loss": 11.1287, "step": 180970 }, { "epoch": 21.778580024067388, "grad_norm": Infinity, "learning_rate": 0.00017846027540985068, "loss": 11.0994, "step": 180980 }, { "epoch": 21.779783393501805, "grad_norm": Infinity, "learning_rate": 0.0001784579167103283, "loss": 11.031, "step": 180990 }, { "epoch": 21.780986762936223, "grad_norm": Infinity, "learning_rate": 0.00017845555789725796, "loss": 11.1557, "step": 181000 }, { "epoch": 21.782190132370637, "grad_norm": Infinity, "learning_rate": 0.00017845319897064304, "loss": 11.1921, "step": 181010 }, { "epoch": 21.783393501805055, "grad_norm": Infinity, "learning_rate": 0.00017845083993048698, "loss": 11.1343, "step": 181020 }, { "epoch": 21.784596871239472, "grad_norm": Infinity, "learning_rate": 0.0001784484807767932, "loss": 11.2043, "step": 181030 }, { "epoch": 21.785800240673886, "grad_norm": Infinity, "learning_rate": 0.0001784461215095651, "loss": 11.2302, "step": 181040 }, { "epoch": 21.787003610108304, "grad_norm": Infinity, "learning_rate": 0.00017844376212880608, "loss": 11.1428, "step": 181050 }, { "epoch": 21.78820697954272, "grad_norm": Infinity, "learning_rate": 0.00017844140263451958, "loss": 11.1244, "step": 181060 }, { "epoch": 21.789410348977135, "grad_norm": Infinity, "learning_rate": 0.00017843904302670898, "loss": 11.1617, "step": 181070 }, { "epoch": 21.790613718411553, "grad_norm": Infinity, "learning_rate": 0.00017843668330537774, "loss": 11.1106, "step": 181080 }, { "epoch": 21.79181708784597, "grad_norm": Infinity, "learning_rate": 0.00017843432347052924, "loss": 11.2189, "step": 181090 }, { "epoch": 21.793020457280385, "grad_norm": Infinity, "learning_rate": 0.00017843196352216695, "loss": 11.0325, "step": 181100 }, { "epoch": 21.794223826714802, "grad_norm": Infinity, "learning_rate": 0.00017842960346029418, "loss": 11.2234, "step": 181110 }, { "epoch": 21.79542719614922, "grad_norm": Infinity, "learning_rate": 0.00017842724328491448, "loss": 11.1873, "step": 181120 }, { "epoch": 21.796630565583634, "grad_norm": Infinity, "learning_rate": 0.00017842488299603116, "loss": 11.0861, "step": 181130 }, { "epoch": 21.79783393501805, "grad_norm": Infinity, "learning_rate": 0.00017842252259364766, "loss": 11.0352, "step": 181140 }, { "epoch": 21.799037304452465, "grad_norm": Infinity, "learning_rate": 0.00017842016207776743, "loss": 11.0851, "step": 181150 }, { "epoch": 21.800240673886883, "grad_norm": Infinity, "learning_rate": 0.00017841780144839384, "loss": 11.0293, "step": 181160 }, { "epoch": 21.8014440433213, "grad_norm": Infinity, "learning_rate": 0.00017841544070553033, "loss": 11.1587, "step": 181170 }, { "epoch": 21.802647412755714, "grad_norm": Infinity, "learning_rate": 0.00017841307984918032, "loss": 11.1068, "step": 181180 }, { "epoch": 21.803850782190132, "grad_norm": Infinity, "learning_rate": 0.00017841071887934727, "loss": 11.182, "step": 181190 }, { "epoch": 21.80505415162455, "grad_norm": Infinity, "learning_rate": 0.0001784083577960345, "loss": 11.1532, "step": 181200 }, { "epoch": 21.806257521058964, "grad_norm": Infinity, "learning_rate": 0.00017840599659924548, "loss": 11.0924, "step": 181210 }, { "epoch": 21.80746089049338, "grad_norm": Infinity, "learning_rate": 0.00017840363528898366, "loss": 11.1952, "step": 181220 }, { "epoch": 21.8086642599278, "grad_norm": Infinity, "learning_rate": 0.00017840127386525238, "loss": 11.2437, "step": 181230 }, { "epoch": 21.809867629362213, "grad_norm": Infinity, "learning_rate": 0.0001783989123280551, "loss": 11.2601, "step": 181240 }, { "epoch": 21.81107099879663, "grad_norm": Infinity, "learning_rate": 0.0001783965506773953, "loss": 11.1212, "step": 181250 }, { "epoch": 21.812274368231048, "grad_norm": Infinity, "learning_rate": 0.00017839418891327626, "loss": 11.1074, "step": 181260 }, { "epoch": 21.813477737665462, "grad_norm": Infinity, "learning_rate": 0.00017839182703570153, "loss": 11.2556, "step": 181270 }, { "epoch": 21.81468110709988, "grad_norm": Infinity, "learning_rate": 0.00017838946504467444, "loss": 11.0453, "step": 181280 }, { "epoch": 21.815884476534297, "grad_norm": Infinity, "learning_rate": 0.00017838710294019845, "loss": 11.252, "step": 181290 }, { "epoch": 21.81708784596871, "grad_norm": Infinity, "learning_rate": 0.00017838474072227696, "loss": 11.1918, "step": 181300 }, { "epoch": 21.81829121540313, "grad_norm": Infinity, "learning_rate": 0.0001783823783909134, "loss": 11.1111, "step": 181310 }, { "epoch": 21.819494584837546, "grad_norm": Infinity, "learning_rate": 0.00017838001594611122, "loss": 10.9816, "step": 181320 }, { "epoch": 21.82069795427196, "grad_norm": Infinity, "learning_rate": 0.00017837765338787377, "loss": 11.1914, "step": 181330 }, { "epoch": 21.821901323706378, "grad_norm": Infinity, "learning_rate": 0.0001783752907162045, "loss": 11.1404, "step": 181340 }, { "epoch": 21.823104693140795, "grad_norm": Infinity, "learning_rate": 0.00017837292793110686, "loss": 11.2338, "step": 181350 }, { "epoch": 21.82430806257521, "grad_norm": Infinity, "learning_rate": 0.00017837056503258424, "loss": 11.1739, "step": 181360 }, { "epoch": 21.825511432009627, "grad_norm": Infinity, "learning_rate": 0.00017836820202064003, "loss": 11.0121, "step": 181370 }, { "epoch": 21.826714801444044, "grad_norm": Infinity, "learning_rate": 0.00017836583889527772, "loss": 11.0616, "step": 181380 }, { "epoch": 21.82791817087846, "grad_norm": Infinity, "learning_rate": 0.0001783634756565007, "loss": 11.1607, "step": 181390 }, { "epoch": 21.829121540312876, "grad_norm": Infinity, "learning_rate": 0.00017836111230431234, "loss": 11.1626, "step": 181400 }, { "epoch": 21.830324909747294, "grad_norm": Infinity, "learning_rate": 0.00017835874883871612, "loss": 11.2276, "step": 181410 }, { "epoch": 21.831528279181708, "grad_norm": Infinity, "learning_rate": 0.00017835638525971547, "loss": 11.1743, "step": 181420 }, { "epoch": 21.832731648616125, "grad_norm": Infinity, "learning_rate": 0.00017835402156731375, "loss": 11.1401, "step": 181430 }, { "epoch": 21.833935018050543, "grad_norm": Infinity, "learning_rate": 0.00017835165776151443, "loss": 11.1726, "step": 181440 }, { "epoch": 21.835138387484957, "grad_norm": Infinity, "learning_rate": 0.00017834929384232093, "loss": 11.1171, "step": 181450 }, { "epoch": 21.836341756919374, "grad_norm": Infinity, "learning_rate": 0.00017834692980973665, "loss": 11.1887, "step": 181460 }, { "epoch": 21.837545126353792, "grad_norm": Infinity, "learning_rate": 0.000178344565663765, "loss": 11.1097, "step": 181470 }, { "epoch": 21.838748495788206, "grad_norm": Infinity, "learning_rate": 0.00017834220140440946, "loss": 11.1601, "step": 181480 }, { "epoch": 21.839951865222623, "grad_norm": Infinity, "learning_rate": 0.00017833983703167337, "loss": 11.1485, "step": 181490 }, { "epoch": 21.84115523465704, "grad_norm": Infinity, "learning_rate": 0.00017833747254556023, "loss": 11.1811, "step": 181500 }, { "epoch": 21.842358604091455, "grad_norm": Infinity, "learning_rate": 0.00017833510794607342, "loss": 11.1527, "step": 181510 }, { "epoch": 21.843561973525873, "grad_norm": Infinity, "learning_rate": 0.00017833274323321633, "loss": 11.104, "step": 181520 }, { "epoch": 21.84476534296029, "grad_norm": Infinity, "learning_rate": 0.00017833037840699246, "loss": 11.1038, "step": 181530 }, { "epoch": 21.845968712394704, "grad_norm": Infinity, "learning_rate": 0.0001783280134674052, "loss": 11.2436, "step": 181540 }, { "epoch": 21.84717208182912, "grad_norm": Infinity, "learning_rate": 0.00017832564841445793, "loss": 11.0612, "step": 181550 }, { "epoch": 21.84837545126354, "grad_norm": Infinity, "learning_rate": 0.00017832328324815416, "loss": 11.1391, "step": 181560 }, { "epoch": 21.849578820697953, "grad_norm": Infinity, "learning_rate": 0.0001783209179684972, "loss": 11.0787, "step": 181570 }, { "epoch": 21.85078219013237, "grad_norm": Infinity, "learning_rate": 0.00017831855257549058, "loss": 11.18, "step": 181580 }, { "epoch": 21.85198555956679, "grad_norm": Infinity, "learning_rate": 0.00017831618706913766, "loss": 11.065, "step": 181590 }, { "epoch": 21.853188929001202, "grad_norm": Infinity, "learning_rate": 0.0001783138214494419, "loss": 11.2187, "step": 181600 }, { "epoch": 21.85439229843562, "grad_norm": Infinity, "learning_rate": 0.00017831145571640668, "loss": 11.1288, "step": 181610 }, { "epoch": 21.855595667870038, "grad_norm": Infinity, "learning_rate": 0.00017830908987003546, "loss": 11.0917, "step": 181620 }, { "epoch": 21.85679903730445, "grad_norm": Infinity, "learning_rate": 0.00017830672391033165, "loss": 11.1151, "step": 181630 }, { "epoch": 21.85800240673887, "grad_norm": Infinity, "learning_rate": 0.00017830435783729867, "loss": 11.1632, "step": 181640 }, { "epoch": 21.859205776173287, "grad_norm": Infinity, "learning_rate": 0.00017830199165093998, "loss": 11.157, "step": 181650 }, { "epoch": 21.8604091456077, "grad_norm": Infinity, "learning_rate": 0.00017829962535125896, "loss": 11.1263, "step": 181660 }, { "epoch": 21.86161251504212, "grad_norm": Infinity, "learning_rate": 0.00017829725893825905, "loss": 11.1513, "step": 181670 }, { "epoch": 21.862815884476536, "grad_norm": Infinity, "learning_rate": 0.00017829489241194367, "loss": 11.1217, "step": 181680 }, { "epoch": 21.86401925391095, "grad_norm": Infinity, "learning_rate": 0.00017829252577231626, "loss": 11.1202, "step": 181690 }, { "epoch": 21.865222623345367, "grad_norm": Infinity, "learning_rate": 0.0001782901590193802, "loss": 11.1383, "step": 181700 }, { "epoch": 21.866425992779785, "grad_norm": Infinity, "learning_rate": 0.000178287792153139, "loss": 11.2577, "step": 181710 }, { "epoch": 21.8676293622142, "grad_norm": Infinity, "learning_rate": 0.00017828542517359602, "loss": 11.1333, "step": 181720 }, { "epoch": 21.868832731648617, "grad_norm": Infinity, "learning_rate": 0.00017828305808075467, "loss": 11.1896, "step": 181730 }, { "epoch": 21.870036101083034, "grad_norm": Infinity, "learning_rate": 0.00017828069087461845, "loss": 11.1563, "step": 181740 }, { "epoch": 21.871239470517448, "grad_norm": Infinity, "learning_rate": 0.00017827832355519073, "loss": 11.1597, "step": 181750 }, { "epoch": 21.872442839951866, "grad_norm": Infinity, "learning_rate": 0.00017827595612247493, "loss": 11.1044, "step": 181760 }, { "epoch": 21.87364620938628, "grad_norm": Infinity, "learning_rate": 0.00017827358857647454, "loss": 11.1056, "step": 181770 }, { "epoch": 21.874849578820697, "grad_norm": Infinity, "learning_rate": 0.0001782712209171929, "loss": 11.2507, "step": 181780 }, { "epoch": 21.876052948255115, "grad_norm": Infinity, "learning_rate": 0.00017826885314463348, "loss": 11.1047, "step": 181790 }, { "epoch": 21.87725631768953, "grad_norm": Infinity, "learning_rate": 0.00017826648525879973, "loss": 11.06, "step": 181800 }, { "epoch": 21.878459687123947, "grad_norm": Infinity, "learning_rate": 0.00017826411725969506, "loss": 11.0438, "step": 181810 }, { "epoch": 21.879663056558364, "grad_norm": Infinity, "learning_rate": 0.00017826174914732285, "loss": 11.1146, "step": 181820 }, { "epoch": 21.880866425992778, "grad_norm": Infinity, "learning_rate": 0.00017825938092168658, "loss": 11.1669, "step": 181830 }, { "epoch": 21.882069795427196, "grad_norm": Infinity, "learning_rate": 0.00017825701258278968, "loss": 11.0667, "step": 181840 }, { "epoch": 21.883273164861613, "grad_norm": Infinity, "learning_rate": 0.00017825464413063558, "loss": 11.0749, "step": 181850 }, { "epoch": 21.884476534296027, "grad_norm": Infinity, "learning_rate": 0.00017825227556522765, "loss": 11.1729, "step": 181860 }, { "epoch": 21.885679903730445, "grad_norm": Infinity, "learning_rate": 0.00017824990688656937, "loss": 11.0918, "step": 181870 }, { "epoch": 21.886883273164862, "grad_norm": Infinity, "learning_rate": 0.00017824753809466418, "loss": 11.0952, "step": 181880 }, { "epoch": 21.888086642599276, "grad_norm": Infinity, "learning_rate": 0.00017824516918951545, "loss": 11.2068, "step": 181890 }, { "epoch": 21.889290012033694, "grad_norm": Infinity, "learning_rate": 0.00017824280017112666, "loss": 11.0649, "step": 181900 }, { "epoch": 21.89049338146811, "grad_norm": Infinity, "learning_rate": 0.0001782404310395012, "loss": 11.036, "step": 181910 }, { "epoch": 21.891696750902526, "grad_norm": Infinity, "learning_rate": 0.00017823806179464253, "loss": 11.2186, "step": 181920 }, { "epoch": 21.892900120336943, "grad_norm": Infinity, "learning_rate": 0.00017823569243655406, "loss": 11.0482, "step": 181930 }, { "epoch": 21.89410348977136, "grad_norm": Infinity, "learning_rate": 0.00017823332296523924, "loss": 11.2153, "step": 181940 }, { "epoch": 21.895306859205775, "grad_norm": Infinity, "learning_rate": 0.00017823095338070149, "loss": 11.073, "step": 181950 }, { "epoch": 21.896510228640192, "grad_norm": Infinity, "learning_rate": 0.00017822858368294424, "loss": 11.1148, "step": 181960 }, { "epoch": 21.89771359807461, "grad_norm": Infinity, "learning_rate": 0.00017822621387197088, "loss": 11.1757, "step": 181970 }, { "epoch": 21.898916967509024, "grad_norm": Infinity, "learning_rate": 0.00017822384394778492, "loss": 11.1131, "step": 181980 }, { "epoch": 21.90012033694344, "grad_norm": Infinity, "learning_rate": 0.00017822147391038975, "loss": 11.0978, "step": 181990 }, { "epoch": 21.90132370637786, "grad_norm": Infinity, "learning_rate": 0.00017821910375978876, "loss": 11.1209, "step": 182000 }, { "epoch": 21.902527075812273, "grad_norm": Infinity, "learning_rate": 0.00017821673349598542, "loss": 11.1225, "step": 182010 }, { "epoch": 21.90373044524669, "grad_norm": Infinity, "learning_rate": 0.00017821436311898315, "loss": 10.9861, "step": 182020 }, { "epoch": 21.904933814681108, "grad_norm": Infinity, "learning_rate": 0.0001782119926287854, "loss": 11.2235, "step": 182030 }, { "epoch": 21.906137184115522, "grad_norm": Infinity, "learning_rate": 0.0001782096220253956, "loss": 11.1922, "step": 182040 }, { "epoch": 21.90734055354994, "grad_norm": Infinity, "learning_rate": 0.00017820725130881715, "loss": 11.2977, "step": 182050 }, { "epoch": 21.908543922984357, "grad_norm": Infinity, "learning_rate": 0.0001782048804790535, "loss": 11.1831, "step": 182060 }, { "epoch": 21.90974729241877, "grad_norm": Infinity, "learning_rate": 0.00017820250953610808, "loss": 11.1861, "step": 182070 }, { "epoch": 21.91095066185319, "grad_norm": Infinity, "learning_rate": 0.0001782001384799843, "loss": 11.1354, "step": 182080 }, { "epoch": 21.912154031287606, "grad_norm": Infinity, "learning_rate": 0.00017819776731068564, "loss": 11.1224, "step": 182090 }, { "epoch": 21.91335740072202, "grad_norm": Infinity, "learning_rate": 0.00017819539602821548, "loss": 11.2226, "step": 182100 }, { "epoch": 21.914560770156438, "grad_norm": Infinity, "learning_rate": 0.00017819302463257733, "loss": 11.2407, "step": 182110 }, { "epoch": 21.915764139590856, "grad_norm": Infinity, "learning_rate": 0.00017819065312377452, "loss": 11.1271, "step": 182120 }, { "epoch": 21.91696750902527, "grad_norm": Infinity, "learning_rate": 0.00017818828150181055, "loss": 11.1881, "step": 182130 }, { "epoch": 21.918170878459687, "grad_norm": Infinity, "learning_rate": 0.00017818590976668883, "loss": 11.2033, "step": 182140 }, { "epoch": 21.919374247894105, "grad_norm": Infinity, "learning_rate": 0.00017818353791841277, "loss": 11.1064, "step": 182150 }, { "epoch": 21.92057761732852, "grad_norm": Infinity, "learning_rate": 0.00017818116595698586, "loss": 11.1049, "step": 182160 }, { "epoch": 21.921780986762936, "grad_norm": Infinity, "learning_rate": 0.0001781787938824115, "loss": 10.9041, "step": 182170 }, { "epoch": 21.922984356197354, "grad_norm": Infinity, "learning_rate": 0.0001781764216946931, "loss": 11.1451, "step": 182180 }, { "epoch": 21.924187725631768, "grad_norm": Infinity, "learning_rate": 0.00017817404939383415, "loss": 11.1456, "step": 182190 }, { "epoch": 21.925391095066185, "grad_norm": Infinity, "learning_rate": 0.00017817167697983802, "loss": 11.0745, "step": 182200 }, { "epoch": 21.926594464500603, "grad_norm": Infinity, "learning_rate": 0.00017816930445270818, "loss": 11.1366, "step": 182210 }, { "epoch": 21.927797833935017, "grad_norm": Infinity, "learning_rate": 0.00017816693181244806, "loss": 11.0964, "step": 182220 }, { "epoch": 21.929001203369435, "grad_norm": Infinity, "learning_rate": 0.00017816455905906109, "loss": 11.2673, "step": 182230 }, { "epoch": 21.930204572803852, "grad_norm": Infinity, "learning_rate": 0.00017816218619255068, "loss": 11.0652, "step": 182240 }, { "epoch": 21.931407942238266, "grad_norm": Infinity, "learning_rate": 0.00017815981321292034, "loss": 11.153, "step": 182250 }, { "epoch": 21.932611311672684, "grad_norm": Infinity, "learning_rate": 0.0001781574401201734, "loss": 11.1923, "step": 182260 }, { "epoch": 21.9338146811071, "grad_norm": Infinity, "learning_rate": 0.0001781550669143134, "loss": 11.096, "step": 182270 }, { "epoch": 21.935018050541515, "grad_norm": Infinity, "learning_rate": 0.0001781526935953437, "loss": 11.1599, "step": 182280 }, { "epoch": 21.936221419975933, "grad_norm": Infinity, "learning_rate": 0.00017815032016326777, "loss": 11.1602, "step": 182290 }, { "epoch": 21.93742478941035, "grad_norm": Infinity, "learning_rate": 0.000178147946618089, "loss": 11.155, "step": 182300 }, { "epoch": 21.938628158844764, "grad_norm": Infinity, "learning_rate": 0.00017814557295981086, "loss": 11.0521, "step": 182310 }, { "epoch": 21.939831528279182, "grad_norm": Infinity, "learning_rate": 0.00017814319918843681, "loss": 11.3187, "step": 182320 }, { "epoch": 21.9410348977136, "grad_norm": Infinity, "learning_rate": 0.00017814082530397024, "loss": 11.224, "step": 182330 }, { "epoch": 21.942238267148014, "grad_norm": Infinity, "learning_rate": 0.0001781384513064146, "loss": 11.2031, "step": 182340 }, { "epoch": 21.94344163658243, "grad_norm": Infinity, "learning_rate": 0.00017813607719577334, "loss": 11.1313, "step": 182350 }, { "epoch": 21.94464500601685, "grad_norm": Infinity, "learning_rate": 0.00017813370297204987, "loss": 11.1111, "step": 182360 }, { "epoch": 21.945848375451263, "grad_norm": Infinity, "learning_rate": 0.0001781313286352477, "loss": 11.0944, "step": 182370 }, { "epoch": 21.94705174488568, "grad_norm": Infinity, "learning_rate": 0.00017812895418537012, "loss": 11.0591, "step": 182380 }, { "epoch": 21.948255114320098, "grad_norm": Infinity, "learning_rate": 0.0001781265796224207, "loss": 11.1925, "step": 182390 }, { "epoch": 21.949458483754512, "grad_norm": Infinity, "learning_rate": 0.00017812420494640283, "loss": 11.1055, "step": 182400 }, { "epoch": 21.95066185318893, "grad_norm": Infinity, "learning_rate": 0.00017812183015731995, "loss": 11.1512, "step": 182410 }, { "epoch": 21.951865222623347, "grad_norm": Infinity, "learning_rate": 0.00017811945525517548, "loss": 11.1603, "step": 182420 }, { "epoch": 21.95306859205776, "grad_norm": Infinity, "learning_rate": 0.00017811708023997289, "loss": 11.213, "step": 182430 }, { "epoch": 21.95427196149218, "grad_norm": Infinity, "learning_rate": 0.00017811470511171557, "loss": 11.0421, "step": 182440 }, { "epoch": 21.955475330926596, "grad_norm": Infinity, "learning_rate": 0.000178112329870407, "loss": 11.1248, "step": 182450 }, { "epoch": 21.95667870036101, "grad_norm": Infinity, "learning_rate": 0.00017810995451605062, "loss": 11.1249, "step": 182460 }, { "epoch": 21.957882069795428, "grad_norm": Infinity, "learning_rate": 0.00017810757904864984, "loss": 11.1359, "step": 182470 }, { "epoch": 21.959085439229845, "grad_norm": Infinity, "learning_rate": 0.00017810520346820812, "loss": 11.1427, "step": 182480 }, { "epoch": 21.96028880866426, "grad_norm": Infinity, "learning_rate": 0.00017810282777472883, "loss": 11.1613, "step": 182490 }, { "epoch": 21.961492178098677, "grad_norm": Infinity, "learning_rate": 0.00017810045196821553, "loss": 11.0584, "step": 182500 }, { "epoch": 21.96269554753309, "grad_norm": Infinity, "learning_rate": 0.00017809807604867158, "loss": 11.183, "step": 182510 }, { "epoch": 21.96389891696751, "grad_norm": Infinity, "learning_rate": 0.00017809570001610042, "loss": 11.0937, "step": 182520 }, { "epoch": 21.965102286401926, "grad_norm": Infinity, "learning_rate": 0.0001780933238705055, "loss": 11.1415, "step": 182530 }, { "epoch": 21.96630565583634, "grad_norm": Infinity, "learning_rate": 0.00017809094761189024, "loss": 11.06, "step": 182540 }, { "epoch": 21.967509025270758, "grad_norm": Infinity, "learning_rate": 0.00017808857124025816, "loss": 11.1841, "step": 182550 }, { "epoch": 21.968712394705175, "grad_norm": Infinity, "learning_rate": 0.00017808619475561259, "loss": 11.1151, "step": 182560 }, { "epoch": 21.96991576413959, "grad_norm": Infinity, "learning_rate": 0.00017808381815795701, "loss": 11.069, "step": 182570 }, { "epoch": 21.971119133574007, "grad_norm": Infinity, "learning_rate": 0.0001780814414472949, "loss": 11.2328, "step": 182580 }, { "epoch": 21.972322503008424, "grad_norm": Infinity, "learning_rate": 0.00017807906462362968, "loss": 11.0846, "step": 182590 }, { "epoch": 21.97352587244284, "grad_norm": Infinity, "learning_rate": 0.0001780766876869647, "loss": 11.0604, "step": 182600 }, { "epoch": 21.974729241877256, "grad_norm": Infinity, "learning_rate": 0.00017807431063730355, "loss": 11.1187, "step": 182610 }, { "epoch": 21.975932611311674, "grad_norm": Infinity, "learning_rate": 0.00017807193347464957, "loss": 11.1575, "step": 182620 }, { "epoch": 21.977135980746088, "grad_norm": Infinity, "learning_rate": 0.00017806955619900622, "loss": 11.2411, "step": 182630 }, { "epoch": 21.978339350180505, "grad_norm": Infinity, "learning_rate": 0.00017806717881037695, "loss": 11.1222, "step": 182640 }, { "epoch": 21.979542719614923, "grad_norm": Infinity, "learning_rate": 0.0001780648013087652, "loss": 11.1311, "step": 182650 }, { "epoch": 21.980746089049337, "grad_norm": Infinity, "learning_rate": 0.0001780624236941744, "loss": 11.2069, "step": 182660 }, { "epoch": 21.981949458483754, "grad_norm": Infinity, "learning_rate": 0.00017806004596660803, "loss": 11.0799, "step": 182670 }, { "epoch": 21.983152827918172, "grad_norm": Infinity, "learning_rate": 0.0001780576681260695, "loss": 11.1148, "step": 182680 }, { "epoch": 21.984356197352586, "grad_norm": Infinity, "learning_rate": 0.00017805529017256224, "loss": 11.0971, "step": 182690 }, { "epoch": 21.985559566787003, "grad_norm": Infinity, "learning_rate": 0.0001780529121060897, "loss": 11.1542, "step": 182700 }, { "epoch": 21.98676293622142, "grad_norm": Infinity, "learning_rate": 0.0001780505339266553, "loss": 11.1696, "step": 182710 }, { "epoch": 21.987966305655835, "grad_norm": Infinity, "learning_rate": 0.00017804815563426254, "loss": 11.2658, "step": 182720 }, { "epoch": 21.989169675090253, "grad_norm": Infinity, "learning_rate": 0.00017804577722891482, "loss": 11.1954, "step": 182730 }, { "epoch": 21.99037304452467, "grad_norm": Infinity, "learning_rate": 0.0001780433987106156, "loss": 11.175, "step": 182740 }, { "epoch": 21.991576413959084, "grad_norm": Infinity, "learning_rate": 0.0001780410200793683, "loss": 11.0907, "step": 182750 }, { "epoch": 21.9927797833935, "grad_norm": Infinity, "learning_rate": 0.0001780386413351764, "loss": 11.2459, "step": 182760 }, { "epoch": 21.99398315282792, "grad_norm": Infinity, "learning_rate": 0.00017803626247804328, "loss": 11.0128, "step": 182770 }, { "epoch": 21.995186522262333, "grad_norm": Infinity, "learning_rate": 0.00017803388350797245, "loss": 11.1205, "step": 182780 }, { "epoch": 21.99638989169675, "grad_norm": Infinity, "learning_rate": 0.00017803150442496733, "loss": 11.1914, "step": 182790 }, { "epoch": 21.99759326113117, "grad_norm": Infinity, "learning_rate": 0.0001780291252290313, "loss": 11.149, "step": 182800 }, { "epoch": 21.998796630565582, "grad_norm": Infinity, "learning_rate": 0.00017802674592016794, "loss": 11.1617, "step": 182810 }, { "epoch": 22.0, "grad_norm": Infinity, "learning_rate": 0.0001780243664983806, "loss": 11.1725, "step": 182820 }, { "epoch": 22.0, "eval_loss": 11.139603614807129, "eval_runtime": 120.6846, "eval_samples_per_second": 61.209, "eval_steps_per_second": 7.656, "step": 182820 }, { "epoch": 22.001203369434418, "grad_norm": Infinity, "learning_rate": 0.0001780219869636727, "loss": 11.1769, "step": 182830 }, { "epoch": 22.00240673886883, "grad_norm": Infinity, "learning_rate": 0.00017801960731604773, "loss": 11.1089, "step": 182840 }, { "epoch": 22.00361010830325, "grad_norm": Infinity, "learning_rate": 0.00017801722755550917, "loss": 11.2128, "step": 182850 }, { "epoch": 22.004813477737667, "grad_norm": Infinity, "learning_rate": 0.00017801484768206036, "loss": 11.1146, "step": 182860 }, { "epoch": 22.00601684717208, "grad_norm": Infinity, "learning_rate": 0.00017801246769570485, "loss": 11.0829, "step": 182870 }, { "epoch": 22.0072202166065, "grad_norm": Infinity, "learning_rate": 0.000178010087596446, "loss": 11.1403, "step": 182880 }, { "epoch": 22.008423586040916, "grad_norm": Infinity, "learning_rate": 0.00017800770738428731, "loss": 11.0535, "step": 182890 }, { "epoch": 22.00962695547533, "grad_norm": Infinity, "learning_rate": 0.00017800532705923222, "loss": 11.0978, "step": 182900 }, { "epoch": 22.010830324909747, "grad_norm": Infinity, "learning_rate": 0.00017800294662128417, "loss": 11.0584, "step": 182910 }, { "epoch": 22.012033694344165, "grad_norm": Infinity, "learning_rate": 0.00017800056607044658, "loss": 11.1655, "step": 182920 }, { "epoch": 22.01323706377858, "grad_norm": Infinity, "learning_rate": 0.00017799818540672292, "loss": 11.1153, "step": 182930 }, { "epoch": 22.014440433212997, "grad_norm": Infinity, "learning_rate": 0.00017799580463011662, "loss": 11.016, "step": 182940 }, { "epoch": 22.015643802647414, "grad_norm": Infinity, "learning_rate": 0.0001779934237406311, "loss": 11.2378, "step": 182950 }, { "epoch": 22.016847172081828, "grad_norm": Infinity, "learning_rate": 0.0001779910427382699, "loss": 11.1098, "step": 182960 }, { "epoch": 22.018050541516246, "grad_norm": Infinity, "learning_rate": 0.0001779886616230364, "loss": 11.0959, "step": 182970 }, { "epoch": 22.019253910950663, "grad_norm": Infinity, "learning_rate": 0.000177986280394934, "loss": 11.143, "step": 182980 }, { "epoch": 22.020457280385077, "grad_norm": Infinity, "learning_rate": 0.00017798389905396625, "loss": 11.072, "step": 182990 }, { "epoch": 22.021660649819495, "grad_norm": Infinity, "learning_rate": 0.00017798151760013651, "loss": 11.2087, "step": 183000 }, { "epoch": 22.022864019253912, "grad_norm": Infinity, "learning_rate": 0.00017797913603344832, "loss": 11.1155, "step": 183010 }, { "epoch": 22.024067388688326, "grad_norm": Infinity, "learning_rate": 0.000177976754353905, "loss": 11.1776, "step": 183020 }, { "epoch": 22.025270758122744, "grad_norm": Infinity, "learning_rate": 0.0001779743725615101, "loss": 11.1987, "step": 183030 }, { "epoch": 22.02647412755716, "grad_norm": Infinity, "learning_rate": 0.00017797199065626703, "loss": 11.0672, "step": 183040 }, { "epoch": 22.027677496991576, "grad_norm": Infinity, "learning_rate": 0.00017796960863817923, "loss": 11.1907, "step": 183050 }, { "epoch": 22.028880866425993, "grad_norm": Infinity, "learning_rate": 0.00017796722650725014, "loss": 11.1238, "step": 183060 }, { "epoch": 22.03008423586041, "grad_norm": Infinity, "learning_rate": 0.00017796484426348325, "loss": 11.1398, "step": 183070 }, { "epoch": 22.031287605294825, "grad_norm": Infinity, "learning_rate": 0.000177962461906882, "loss": 11.2278, "step": 183080 }, { "epoch": 22.032490974729242, "grad_norm": Infinity, "learning_rate": 0.00017796007943744977, "loss": 11.1665, "step": 183090 }, { "epoch": 22.03369434416366, "grad_norm": Infinity, "learning_rate": 0.0001779576968551901, "loss": 11.1412, "step": 183100 }, { "epoch": 22.034897713598074, "grad_norm": Infinity, "learning_rate": 0.00017795531416010636, "loss": 11.0917, "step": 183110 }, { "epoch": 22.03610108303249, "grad_norm": Infinity, "learning_rate": 0.00017795293135220206, "loss": 11.2043, "step": 183120 }, { "epoch": 22.03730445246691, "grad_norm": Infinity, "learning_rate": 0.0001779505484314806, "loss": 11.1065, "step": 183130 }, { "epoch": 22.038507821901323, "grad_norm": Infinity, "learning_rate": 0.0001779481653979455, "loss": 11.1322, "step": 183140 }, { "epoch": 22.03971119133574, "grad_norm": Infinity, "learning_rate": 0.0001779457822516001, "loss": 11.1741, "step": 183150 }, { "epoch": 22.040914560770158, "grad_norm": Infinity, "learning_rate": 0.00017794339899244793, "loss": 11.1394, "step": 183160 }, { "epoch": 22.042117930204572, "grad_norm": Infinity, "learning_rate": 0.0001779410156204924, "loss": 11.1185, "step": 183170 }, { "epoch": 22.04332129963899, "grad_norm": Infinity, "learning_rate": 0.000177938632135737, "loss": 11.1088, "step": 183180 }, { "epoch": 22.044524669073404, "grad_norm": Infinity, "learning_rate": 0.00017793624853818516, "loss": 11.1863, "step": 183190 }, { "epoch": 22.04572803850782, "grad_norm": Infinity, "learning_rate": 0.0001779338648278403, "loss": 11.192, "step": 183200 }, { "epoch": 22.04693140794224, "grad_norm": Infinity, "learning_rate": 0.00017793148100470594, "loss": 11.1201, "step": 183210 }, { "epoch": 22.048134777376653, "grad_norm": Infinity, "learning_rate": 0.00017792909706878547, "loss": 11.3088, "step": 183220 }, { "epoch": 22.04933814681107, "grad_norm": Infinity, "learning_rate": 0.00017792671302008234, "loss": 11.151, "step": 183230 }, { "epoch": 22.050541516245488, "grad_norm": Infinity, "learning_rate": 0.00017792432885860003, "loss": 11.0786, "step": 183240 }, { "epoch": 22.051744885679902, "grad_norm": Infinity, "learning_rate": 0.00017792194458434197, "loss": 11.1415, "step": 183250 }, { "epoch": 22.05294825511432, "grad_norm": Infinity, "learning_rate": 0.00017791956019731162, "loss": 11.1922, "step": 183260 }, { "epoch": 22.054151624548737, "grad_norm": Infinity, "learning_rate": 0.00017791717569751243, "loss": 11.1008, "step": 183270 }, { "epoch": 22.05535499398315, "grad_norm": Infinity, "learning_rate": 0.00017791479108494787, "loss": 11.0372, "step": 183280 }, { "epoch": 22.05655836341757, "grad_norm": Infinity, "learning_rate": 0.0001779124063596213, "loss": 10.9654, "step": 183290 }, { "epoch": 22.057761732851986, "grad_norm": Infinity, "learning_rate": 0.0001779100215215363, "loss": 11.1651, "step": 183300 }, { "epoch": 22.0589651022864, "grad_norm": Infinity, "learning_rate": 0.0001779076365706963, "loss": 11.1447, "step": 183310 }, { "epoch": 22.060168471720818, "grad_norm": Infinity, "learning_rate": 0.00017790525150710465, "loss": 11.2377, "step": 183320 }, { "epoch": 22.061371841155236, "grad_norm": Infinity, "learning_rate": 0.0001779028663307649, "loss": 11.1066, "step": 183330 }, { "epoch": 22.06257521058965, "grad_norm": Infinity, "learning_rate": 0.00017790048104168043, "loss": 11.1419, "step": 183340 }, { "epoch": 22.063778580024067, "grad_norm": Infinity, "learning_rate": 0.00017789809563985477, "loss": 11.2599, "step": 183350 }, { "epoch": 22.064981949458485, "grad_norm": Infinity, "learning_rate": 0.00017789571012529135, "loss": 11.1508, "step": 183360 }, { "epoch": 22.0661853188929, "grad_norm": Infinity, "learning_rate": 0.00017789332449799354, "loss": 11.1465, "step": 183370 }, { "epoch": 22.067388688327316, "grad_norm": Infinity, "learning_rate": 0.0001778909387579649, "loss": 11.1886, "step": 183380 }, { "epoch": 22.068592057761734, "grad_norm": Infinity, "learning_rate": 0.00017788855290520883, "loss": 11.217, "step": 183390 }, { "epoch": 22.069795427196148, "grad_norm": Infinity, "learning_rate": 0.0001778861669397288, "loss": 11.1068, "step": 183400 }, { "epoch": 22.070998796630565, "grad_norm": Infinity, "learning_rate": 0.00017788378086152826, "loss": 11.1964, "step": 183410 }, { "epoch": 22.072202166064983, "grad_norm": Infinity, "learning_rate": 0.00017788139467061066, "loss": 11.2453, "step": 183420 }, { "epoch": 22.073405535499397, "grad_norm": Infinity, "learning_rate": 0.00017787900836697945, "loss": 11.1021, "step": 183430 }, { "epoch": 22.074608904933815, "grad_norm": Infinity, "learning_rate": 0.0001778766219506381, "loss": 11.1761, "step": 183440 }, { "epoch": 22.075812274368232, "grad_norm": Infinity, "learning_rate": 0.00017787423542159003, "loss": 11.1881, "step": 183450 }, { "epoch": 22.077015643802646, "grad_norm": Infinity, "learning_rate": 0.00017787184877983872, "loss": 11.2353, "step": 183460 }, { "epoch": 22.078219013237064, "grad_norm": Infinity, "learning_rate": 0.0001778694620253876, "loss": 11.1493, "step": 183470 }, { "epoch": 22.07942238267148, "grad_norm": Infinity, "learning_rate": 0.00017786707515824018, "loss": 11.1424, "step": 183480 }, { "epoch": 22.080625752105895, "grad_norm": Infinity, "learning_rate": 0.00017786468817839986, "loss": 11.0966, "step": 183490 }, { "epoch": 22.081829121540313, "grad_norm": Infinity, "learning_rate": 0.00017786230108587012, "loss": 11.0919, "step": 183500 }, { "epoch": 22.08303249097473, "grad_norm": Infinity, "learning_rate": 0.0001778599138806544, "loss": 11.1699, "step": 183510 }, { "epoch": 22.084235860409144, "grad_norm": Infinity, "learning_rate": 0.0001778575265627562, "loss": 11.1028, "step": 183520 }, { "epoch": 22.085439229843562, "grad_norm": Infinity, "learning_rate": 0.00017785513913217885, "loss": 11.1392, "step": 183530 }, { "epoch": 22.08664259927798, "grad_norm": Infinity, "learning_rate": 0.00017785275158892596, "loss": 11.2001, "step": 183540 }, { "epoch": 22.087845968712394, "grad_norm": Infinity, "learning_rate": 0.0001778503639330009, "loss": 11.0794, "step": 183550 }, { "epoch": 22.08904933814681, "grad_norm": Infinity, "learning_rate": 0.00017784797616440714, "loss": 11.14, "step": 183560 }, { "epoch": 22.09025270758123, "grad_norm": Infinity, "learning_rate": 0.00017784558828314815, "loss": 11.2234, "step": 183570 }, { "epoch": 22.091456077015643, "grad_norm": Infinity, "learning_rate": 0.00017784320028922735, "loss": 11.0392, "step": 183580 }, { "epoch": 22.09265944645006, "grad_norm": Infinity, "learning_rate": 0.00017784081218264825, "loss": 11.1166, "step": 183590 }, { "epoch": 22.093862815884478, "grad_norm": Infinity, "learning_rate": 0.00017783842396341425, "loss": 11.0499, "step": 183600 }, { "epoch": 22.095066185318892, "grad_norm": Infinity, "learning_rate": 0.00017783603563152885, "loss": 11.1152, "step": 183610 }, { "epoch": 22.09626955475331, "grad_norm": Infinity, "learning_rate": 0.0001778336471869955, "loss": 11.168, "step": 183620 }, { "epoch": 22.097472924187727, "grad_norm": Infinity, "learning_rate": 0.0001778312586298176, "loss": 11.1864, "step": 183630 }, { "epoch": 22.09867629362214, "grad_norm": Infinity, "learning_rate": 0.00017782886995999867, "loss": 11.1706, "step": 183640 }, { "epoch": 22.09987966305656, "grad_norm": Infinity, "learning_rate": 0.00017782648117754216, "loss": 11.0227, "step": 183650 }, { "epoch": 22.101083032490976, "grad_norm": Infinity, "learning_rate": 0.00017782409228245155, "loss": 11.0737, "step": 183660 }, { "epoch": 22.10228640192539, "grad_norm": Infinity, "learning_rate": 0.00017782170327473024, "loss": 11.1925, "step": 183670 }, { "epoch": 22.103489771359808, "grad_norm": Infinity, "learning_rate": 0.00017781931415438172, "loss": 11.0555, "step": 183680 }, { "epoch": 22.104693140794225, "grad_norm": Infinity, "learning_rate": 0.0001778169249214094, "loss": 11.2993, "step": 183690 }, { "epoch": 22.10589651022864, "grad_norm": Infinity, "learning_rate": 0.0001778145355758168, "loss": 11.1392, "step": 183700 }, { "epoch": 22.107099879663057, "grad_norm": Infinity, "learning_rate": 0.00017781214611760734, "loss": 11.1894, "step": 183710 }, { "epoch": 22.108303249097474, "grad_norm": Infinity, "learning_rate": 0.00017780975654678452, "loss": 11.152, "step": 183720 }, { "epoch": 22.10950661853189, "grad_norm": Infinity, "learning_rate": 0.00017780736686335178, "loss": 11.0664, "step": 183730 }, { "epoch": 22.110709987966306, "grad_norm": Infinity, "learning_rate": 0.00017780497706731255, "loss": 11.1938, "step": 183740 }, { "epoch": 22.111913357400724, "grad_norm": Infinity, "learning_rate": 0.0001778025871586703, "loss": 11.1373, "step": 183750 }, { "epoch": 22.113116726835138, "grad_norm": Infinity, "learning_rate": 0.00017780019713742852, "loss": 11.1037, "step": 183760 }, { "epoch": 22.114320096269555, "grad_norm": Infinity, "learning_rate": 0.00017779780700359063, "loss": 11.2328, "step": 183770 }, { "epoch": 22.115523465703973, "grad_norm": Infinity, "learning_rate": 0.00017779541675716013, "loss": 11.1739, "step": 183780 }, { "epoch": 22.116726835138387, "grad_norm": Infinity, "learning_rate": 0.00017779302639814044, "loss": 11.2937, "step": 183790 }, { "epoch": 22.117930204572804, "grad_norm": Infinity, "learning_rate": 0.00017779063592653503, "loss": 11.1634, "step": 183800 }, { "epoch": 22.119133574007222, "grad_norm": Infinity, "learning_rate": 0.00017778824534234734, "loss": 11.1178, "step": 183810 }, { "epoch": 22.120336943441636, "grad_norm": Infinity, "learning_rate": 0.0001777858546455809, "loss": 11.0466, "step": 183820 }, { "epoch": 22.121540312876053, "grad_norm": Infinity, "learning_rate": 0.0001777834638362391, "loss": 11.1625, "step": 183830 }, { "epoch": 22.12274368231047, "grad_norm": Infinity, "learning_rate": 0.00017778107291432539, "loss": 11.1816, "step": 183840 }, { "epoch": 22.123947051744885, "grad_norm": Infinity, "learning_rate": 0.0001777786818798433, "loss": 11.1585, "step": 183850 }, { "epoch": 22.125150421179303, "grad_norm": Infinity, "learning_rate": 0.00017777629073279628, "loss": 11.0821, "step": 183860 }, { "epoch": 22.126353790613717, "grad_norm": Infinity, "learning_rate": 0.0001777738994731877, "loss": 11.235, "step": 183870 }, { "epoch": 22.127557160048134, "grad_norm": Infinity, "learning_rate": 0.00017777150810102115, "loss": 11.1687, "step": 183880 }, { "epoch": 22.128760529482552, "grad_norm": Infinity, "learning_rate": 0.0001777691166163, "loss": 11.116, "step": 183890 }, { "epoch": 22.129963898916966, "grad_norm": Infinity, "learning_rate": 0.00017776672501902774, "loss": 11.144, "step": 183900 }, { "epoch": 22.131167268351383, "grad_norm": Infinity, "learning_rate": 0.00017776433330920783, "loss": 11.1895, "step": 183910 }, { "epoch": 22.1323706377858, "grad_norm": Infinity, "learning_rate": 0.00017776194148684372, "loss": 11.2596, "step": 183920 }, { "epoch": 22.133574007220215, "grad_norm": Infinity, "learning_rate": 0.00017775954955193888, "loss": 11.1265, "step": 183930 }, { "epoch": 22.134777376654633, "grad_norm": Infinity, "learning_rate": 0.0001777571575044968, "loss": 11.0285, "step": 183940 }, { "epoch": 22.13598074608905, "grad_norm": Infinity, "learning_rate": 0.00017775476534452088, "loss": 11.2185, "step": 183950 }, { "epoch": 22.137184115523464, "grad_norm": Infinity, "learning_rate": 0.00017775237307201468, "loss": 11.0788, "step": 183960 }, { "epoch": 22.13838748495788, "grad_norm": Infinity, "learning_rate": 0.00017774998068698151, "loss": 11.1457, "step": 183970 }, { "epoch": 22.1395908543923, "grad_norm": Infinity, "learning_rate": 0.00017774758818942497, "loss": 11.201, "step": 183980 }, { "epoch": 22.140794223826713, "grad_norm": Infinity, "learning_rate": 0.0001777451955793485, "loss": 11.0011, "step": 183990 }, { "epoch": 22.14199759326113, "grad_norm": Infinity, "learning_rate": 0.00017774280285675552, "loss": 11.165, "step": 184000 }, { "epoch": 22.14320096269555, "grad_norm": Infinity, "learning_rate": 0.0001777404100216495, "loss": 11.1346, "step": 184010 }, { "epoch": 22.144404332129962, "grad_norm": Infinity, "learning_rate": 0.00017773801707403388, "loss": 11.0519, "step": 184020 }, { "epoch": 22.14560770156438, "grad_norm": Infinity, "learning_rate": 0.0001777356240139122, "loss": 11.2038, "step": 184030 }, { "epoch": 22.146811070998798, "grad_norm": Infinity, "learning_rate": 0.00017773323084128789, "loss": 11.146, "step": 184040 }, { "epoch": 22.14801444043321, "grad_norm": Infinity, "learning_rate": 0.00017773083755616436, "loss": 11.2012, "step": 184050 }, { "epoch": 22.14921780986763, "grad_norm": Infinity, "learning_rate": 0.00017772844415854514, "loss": 11.147, "step": 184060 }, { "epoch": 22.150421179302047, "grad_norm": Infinity, "learning_rate": 0.00017772605064843367, "loss": 11.2395, "step": 184070 }, { "epoch": 22.15162454873646, "grad_norm": Infinity, "learning_rate": 0.00017772365702583342, "loss": 11.1637, "step": 184080 }, { "epoch": 22.15282791817088, "grad_norm": Infinity, "learning_rate": 0.00017772126329074786, "loss": 10.9943, "step": 184090 }, { "epoch": 22.154031287605296, "grad_norm": Infinity, "learning_rate": 0.0001777188694431804, "loss": 11.1741, "step": 184100 }, { "epoch": 22.15523465703971, "grad_norm": Infinity, "learning_rate": 0.0001777164754831346, "loss": 11.1082, "step": 184110 }, { "epoch": 22.156438026474127, "grad_norm": Infinity, "learning_rate": 0.00017771408141061383, "loss": 11.133, "step": 184120 }, { "epoch": 22.157641395908545, "grad_norm": Infinity, "learning_rate": 0.00017771168722562162, "loss": 11.1978, "step": 184130 }, { "epoch": 22.15884476534296, "grad_norm": Infinity, "learning_rate": 0.0001777092929281614, "loss": 11.0815, "step": 184140 }, { "epoch": 22.160048134777377, "grad_norm": Infinity, "learning_rate": 0.00017770689851823664, "loss": 11.1047, "step": 184150 }, { "epoch": 22.161251504211794, "grad_norm": Infinity, "learning_rate": 0.00017770450399585082, "loss": 11.1961, "step": 184160 }, { "epoch": 22.162454873646208, "grad_norm": Infinity, "learning_rate": 0.00017770210936100743, "loss": 11.0904, "step": 184170 }, { "epoch": 22.163658243080626, "grad_norm": Infinity, "learning_rate": 0.00017769971461370986, "loss": 11.1686, "step": 184180 }, { "epoch": 22.164861612515043, "grad_norm": Infinity, "learning_rate": 0.00017769731975396166, "loss": 11.0963, "step": 184190 }, { "epoch": 22.166064981949457, "grad_norm": Infinity, "learning_rate": 0.00017769492478176622, "loss": 11.1637, "step": 184200 }, { "epoch": 22.167268351383875, "grad_norm": Infinity, "learning_rate": 0.00017769252969712705, "loss": 11.1323, "step": 184210 }, { "epoch": 22.168471720818292, "grad_norm": Infinity, "learning_rate": 0.00017769013450004764, "loss": 11.0723, "step": 184220 }, { "epoch": 22.169675090252706, "grad_norm": Infinity, "learning_rate": 0.0001776877391905314, "loss": 11.1148, "step": 184230 }, { "epoch": 22.170878459687124, "grad_norm": Infinity, "learning_rate": 0.00017768534376858181, "loss": 11.1893, "step": 184240 }, { "epoch": 22.17208182912154, "grad_norm": Infinity, "learning_rate": 0.00017768294823420236, "loss": 11.0502, "step": 184250 }, { "epoch": 22.173285198555956, "grad_norm": Infinity, "learning_rate": 0.0001776805525873965, "loss": 11.2387, "step": 184260 }, { "epoch": 22.174488567990373, "grad_norm": Infinity, "learning_rate": 0.0001776781568281677, "loss": 11.1015, "step": 184270 }, { "epoch": 22.17569193742479, "grad_norm": Infinity, "learning_rate": 0.00017767576095651944, "loss": 11.1129, "step": 184280 }, { "epoch": 22.176895306859205, "grad_norm": Infinity, "learning_rate": 0.00017767336497245514, "loss": 11.2017, "step": 184290 }, { "epoch": 22.178098676293622, "grad_norm": Infinity, "learning_rate": 0.00017767096887597833, "loss": 11.1187, "step": 184300 }, { "epoch": 22.17930204572804, "grad_norm": Infinity, "learning_rate": 0.00017766857266709245, "loss": 11.1473, "step": 184310 }, { "epoch": 22.180505415162454, "grad_norm": Infinity, "learning_rate": 0.00017766617634580102, "loss": 11.0872, "step": 184320 }, { "epoch": 22.18170878459687, "grad_norm": Infinity, "learning_rate": 0.00017766377991210739, "loss": 11.2037, "step": 184330 }, { "epoch": 22.18291215403129, "grad_norm": Infinity, "learning_rate": 0.00017766138336601509, "loss": 11.0925, "step": 184340 }, { "epoch": 22.184115523465703, "grad_norm": Infinity, "learning_rate": 0.00017765898670752764, "loss": 11.1631, "step": 184350 }, { "epoch": 22.18531889290012, "grad_norm": Infinity, "learning_rate": 0.00017765658993664844, "loss": 11.1585, "step": 184360 }, { "epoch": 22.186522262334538, "grad_norm": Infinity, "learning_rate": 0.000177654193053381, "loss": 11.1756, "step": 184370 }, { "epoch": 22.187725631768952, "grad_norm": Infinity, "learning_rate": 0.00017765179605772874, "loss": 11.1538, "step": 184380 }, { "epoch": 22.18892900120337, "grad_norm": Infinity, "learning_rate": 0.00017764939894969519, "loss": 11.2333, "step": 184390 }, { "epoch": 22.190132370637787, "grad_norm": Infinity, "learning_rate": 0.00017764700172928378, "loss": 11.1714, "step": 184400 }, { "epoch": 22.1913357400722, "grad_norm": Infinity, "learning_rate": 0.00017764460439649796, "loss": 11.1207, "step": 184410 }, { "epoch": 22.19253910950662, "grad_norm": Infinity, "learning_rate": 0.00017764220695134125, "loss": 11.2096, "step": 184420 }, { "epoch": 22.193742478941036, "grad_norm": Infinity, "learning_rate": 0.0001776398093938171, "loss": 11.1228, "step": 184430 }, { "epoch": 22.19494584837545, "grad_norm": Infinity, "learning_rate": 0.00017763741172392895, "loss": 11.218, "step": 184440 }, { "epoch": 22.196149217809868, "grad_norm": Infinity, "learning_rate": 0.00017763501394168035, "loss": 11.1506, "step": 184450 }, { "epoch": 22.197352587244286, "grad_norm": Infinity, "learning_rate": 0.00017763261604707468, "loss": 11.0503, "step": 184460 }, { "epoch": 22.1985559566787, "grad_norm": Infinity, "learning_rate": 0.00017763021804011547, "loss": 11.0846, "step": 184470 }, { "epoch": 22.199759326113117, "grad_norm": Infinity, "learning_rate": 0.00017762781992080613, "loss": 11.1866, "step": 184480 }, { "epoch": 22.200962695547535, "grad_norm": Infinity, "learning_rate": 0.0001776254216891502, "loss": 11.128, "step": 184490 }, { "epoch": 22.20216606498195, "grad_norm": Infinity, "learning_rate": 0.0001776230233451511, "loss": 11.1469, "step": 184500 }, { "epoch": 22.203369434416366, "grad_norm": Infinity, "learning_rate": 0.00017762062488881235, "loss": 11.2191, "step": 184510 }, { "epoch": 22.204572803850784, "grad_norm": Infinity, "learning_rate": 0.00017761822632013737, "loss": 10.9944, "step": 184520 }, { "epoch": 22.205776173285198, "grad_norm": Infinity, "learning_rate": 0.00017761582763912967, "loss": 11.0869, "step": 184530 }, { "epoch": 22.206979542719615, "grad_norm": Infinity, "learning_rate": 0.0001776134288457927, "loss": 11.097, "step": 184540 }, { "epoch": 22.20818291215403, "grad_norm": Infinity, "learning_rate": 0.0001776110299401299, "loss": 11.2775, "step": 184550 }, { "epoch": 22.209386281588447, "grad_norm": Infinity, "learning_rate": 0.00017760863092214485, "loss": 11.1367, "step": 184560 }, { "epoch": 22.210589651022865, "grad_norm": Infinity, "learning_rate": 0.0001776062317918409, "loss": 11.0717, "step": 184570 }, { "epoch": 22.21179302045728, "grad_norm": Infinity, "learning_rate": 0.00017760383254922157, "loss": 11.1684, "step": 184580 }, { "epoch": 22.212996389891696, "grad_norm": Infinity, "learning_rate": 0.00017760143319429036, "loss": 11.1855, "step": 184590 }, { "epoch": 22.214199759326114, "grad_norm": Infinity, "learning_rate": 0.00017759903372705068, "loss": 11.0781, "step": 184600 }, { "epoch": 22.215403128760528, "grad_norm": Infinity, "learning_rate": 0.00017759663414750607, "loss": 11.1864, "step": 184610 }, { "epoch": 22.216606498194945, "grad_norm": Infinity, "learning_rate": 0.00017759423445565996, "loss": 11.1486, "step": 184620 }, { "epoch": 22.217809867629363, "grad_norm": Infinity, "learning_rate": 0.00017759183465151584, "loss": 11.0688, "step": 184630 }, { "epoch": 22.219013237063777, "grad_norm": Infinity, "learning_rate": 0.0001775894347350772, "loss": 11.1402, "step": 184640 }, { "epoch": 22.220216606498195, "grad_norm": Infinity, "learning_rate": 0.00017758703470634743, "loss": 11.1887, "step": 184650 }, { "epoch": 22.221419975932612, "grad_norm": Infinity, "learning_rate": 0.00017758463456533013, "loss": 11.1494, "step": 184660 }, { "epoch": 22.222623345367026, "grad_norm": Infinity, "learning_rate": 0.00017758223431202868, "loss": 11.1789, "step": 184670 }, { "epoch": 22.223826714801444, "grad_norm": Infinity, "learning_rate": 0.00017757983394644657, "loss": 11.166, "step": 184680 }, { "epoch": 22.22503008423586, "grad_norm": Infinity, "learning_rate": 0.0001775774334685873, "loss": 11.169, "step": 184690 }, { "epoch": 22.226233453670275, "grad_norm": Infinity, "learning_rate": 0.00017757503287845435, "loss": 11.1179, "step": 184700 }, { "epoch": 22.227436823104693, "grad_norm": Infinity, "learning_rate": 0.00017757263217605115, "loss": 11.1811, "step": 184710 }, { "epoch": 22.22864019253911, "grad_norm": Infinity, "learning_rate": 0.0001775702313613812, "loss": 11.283, "step": 184720 }, { "epoch": 22.229843561973524, "grad_norm": Infinity, "learning_rate": 0.000177567830434448, "loss": 11.1423, "step": 184730 }, { "epoch": 22.231046931407942, "grad_norm": Infinity, "learning_rate": 0.00017756542939525497, "loss": 11.1595, "step": 184740 }, { "epoch": 22.23225030084236, "grad_norm": Infinity, "learning_rate": 0.00017756302824380562, "loss": 11.193, "step": 184750 }, { "epoch": 22.233453670276774, "grad_norm": Infinity, "learning_rate": 0.00017756062698010342, "loss": 11.1113, "step": 184760 }, { "epoch": 22.23465703971119, "grad_norm": Infinity, "learning_rate": 0.00017755822560415184, "loss": 11.0974, "step": 184770 }, { "epoch": 22.23586040914561, "grad_norm": Infinity, "learning_rate": 0.00017755582411595438, "loss": 11.1278, "step": 184780 }, { "epoch": 22.237063778580023, "grad_norm": Infinity, "learning_rate": 0.0001775534225155145, "loss": 11.1743, "step": 184790 }, { "epoch": 22.23826714801444, "grad_norm": Infinity, "learning_rate": 0.0001775510208028356, "loss": 11.2146, "step": 184800 }, { "epoch": 22.239470517448858, "grad_norm": Infinity, "learning_rate": 0.00017754861897792127, "loss": 11.0847, "step": 184810 }, { "epoch": 22.240673886883272, "grad_norm": Infinity, "learning_rate": 0.00017754621704077497, "loss": 11.1111, "step": 184820 }, { "epoch": 22.24187725631769, "grad_norm": Infinity, "learning_rate": 0.0001775438149914001, "loss": 11.1007, "step": 184830 }, { "epoch": 22.243080625752107, "grad_norm": Infinity, "learning_rate": 0.0001775414128298002, "loss": 11.1749, "step": 184840 }, { "epoch": 22.24428399518652, "grad_norm": Infinity, "learning_rate": 0.00017753901055597878, "loss": 11.1378, "step": 184850 }, { "epoch": 22.24548736462094, "grad_norm": Infinity, "learning_rate": 0.00017753660816993922, "loss": 11.0832, "step": 184860 }, { "epoch": 22.246690734055356, "grad_norm": Infinity, "learning_rate": 0.00017753420567168506, "loss": 11.1766, "step": 184870 }, { "epoch": 22.24789410348977, "grad_norm": Infinity, "learning_rate": 0.00017753180306121974, "loss": 11.1564, "step": 184880 }, { "epoch": 22.249097472924188, "grad_norm": Infinity, "learning_rate": 0.00017752940033854676, "loss": 11.1274, "step": 184890 }, { "epoch": 22.250300842358605, "grad_norm": Infinity, "learning_rate": 0.00017752699750366962, "loss": 11.1084, "step": 184900 }, { "epoch": 22.25150421179302, "grad_norm": Infinity, "learning_rate": 0.0001775245945565918, "loss": 11.0816, "step": 184910 }, { "epoch": 22.252707581227437, "grad_norm": Infinity, "learning_rate": 0.0001775221914973167, "loss": 11.2123, "step": 184920 }, { "epoch": 22.253910950661854, "grad_norm": Infinity, "learning_rate": 0.00017751978832584786, "loss": 11.1126, "step": 184930 }, { "epoch": 22.25511432009627, "grad_norm": Infinity, "learning_rate": 0.00017751738504218875, "loss": 11.0857, "step": 184940 }, { "epoch": 22.256317689530686, "grad_norm": Infinity, "learning_rate": 0.00017751498164634283, "loss": 11.1787, "step": 184950 }, { "epoch": 22.257521058965104, "grad_norm": Infinity, "learning_rate": 0.00017751257813831362, "loss": 11.3094, "step": 184960 }, { "epoch": 22.258724428399518, "grad_norm": Infinity, "learning_rate": 0.00017751017451810456, "loss": 11.1281, "step": 184970 }, { "epoch": 22.259927797833935, "grad_norm": Infinity, "learning_rate": 0.00017750777078571917, "loss": 11.07, "step": 184980 }, { "epoch": 22.261131167268353, "grad_norm": Infinity, "learning_rate": 0.00017750536694116086, "loss": 11.1521, "step": 184990 }, { "epoch": 22.262334536702767, "grad_norm": Infinity, "learning_rate": 0.0001775029629844332, "loss": 11.1335, "step": 185000 }, { "epoch": 22.263537906137184, "grad_norm": Infinity, "learning_rate": 0.00017750055891553953, "loss": 11.103, "step": 185010 }, { "epoch": 22.264741275571602, "grad_norm": Infinity, "learning_rate": 0.00017749815473448347, "loss": 11.2661, "step": 185020 }, { "epoch": 22.265944645006016, "grad_norm": Infinity, "learning_rate": 0.00017749575044126846, "loss": 11.1043, "step": 185030 }, { "epoch": 22.267148014440433, "grad_norm": Infinity, "learning_rate": 0.00017749334603589796, "loss": 11.1832, "step": 185040 }, { "epoch": 22.26835138387485, "grad_norm": Infinity, "learning_rate": 0.00017749094151837545, "loss": 11.261, "step": 185050 }, { "epoch": 22.269554753309265, "grad_norm": Infinity, "learning_rate": 0.0001774885368887044, "loss": 11.1062, "step": 185060 }, { "epoch": 22.270758122743683, "grad_norm": Infinity, "learning_rate": 0.00017748613214688832, "loss": 11.1086, "step": 185070 }, { "epoch": 22.2719614921781, "grad_norm": Infinity, "learning_rate": 0.00017748372729293067, "loss": 11.1146, "step": 185080 }, { "epoch": 22.273164861612514, "grad_norm": Infinity, "learning_rate": 0.00017748132232683493, "loss": 10.9917, "step": 185090 }, { "epoch": 22.27436823104693, "grad_norm": Infinity, "learning_rate": 0.0001774789172486046, "loss": 11.199, "step": 185100 }, { "epoch": 22.27557160048135, "grad_norm": Infinity, "learning_rate": 0.00017747651205824317, "loss": 11.1472, "step": 185110 }, { "epoch": 22.276774969915763, "grad_norm": Infinity, "learning_rate": 0.00017747410675575406, "loss": 11.224, "step": 185120 }, { "epoch": 22.27797833935018, "grad_norm": Infinity, "learning_rate": 0.0001774717013411408, "loss": 10.9945, "step": 185130 }, { "epoch": 22.2791817087846, "grad_norm": Infinity, "learning_rate": 0.00017746929581440686, "loss": 11.1843, "step": 185140 }, { "epoch": 22.280385078219012, "grad_norm": Infinity, "learning_rate": 0.00017746689017555573, "loss": 11.1287, "step": 185150 }, { "epoch": 22.28158844765343, "grad_norm": Infinity, "learning_rate": 0.0001774644844245909, "loss": 11.0673, "step": 185160 }, { "epoch": 22.282791817087848, "grad_norm": Infinity, "learning_rate": 0.0001774620785615158, "loss": 11.2445, "step": 185170 }, { "epoch": 22.28399518652226, "grad_norm": Infinity, "learning_rate": 0.00017745967258633397, "loss": 11.1657, "step": 185180 }, { "epoch": 22.28519855595668, "grad_norm": Infinity, "learning_rate": 0.00017745726649904885, "loss": 11.1121, "step": 185190 }, { "epoch": 22.286401925391097, "grad_norm": Infinity, "learning_rate": 0.00017745486029966396, "loss": 11.1775, "step": 185200 }, { "epoch": 22.28760529482551, "grad_norm": Infinity, "learning_rate": 0.00017745245398818278, "loss": 11.2244, "step": 185210 }, { "epoch": 22.28880866425993, "grad_norm": Infinity, "learning_rate": 0.00017745004756460875, "loss": 11.0251, "step": 185220 }, { "epoch": 22.290012033694346, "grad_norm": Infinity, "learning_rate": 0.0001774476410289454, "loss": 11.1388, "step": 185230 }, { "epoch": 22.29121540312876, "grad_norm": Infinity, "learning_rate": 0.00017744523438119615, "loss": 11.2072, "step": 185240 }, { "epoch": 22.292418772563177, "grad_norm": Infinity, "learning_rate": 0.00017744282762136457, "loss": 11.1566, "step": 185250 }, { "epoch": 22.29362214199759, "grad_norm": Infinity, "learning_rate": 0.00017744042074945409, "loss": 11.132, "step": 185260 }, { "epoch": 22.29482551143201, "grad_norm": Infinity, "learning_rate": 0.00017743801376546818, "loss": 11.2767, "step": 185270 }, { "epoch": 22.296028880866427, "grad_norm": Infinity, "learning_rate": 0.00017743560666941036, "loss": 11.1979, "step": 185280 }, { "epoch": 22.29723225030084, "grad_norm": Infinity, "learning_rate": 0.00017743319946128408, "loss": 11.1081, "step": 185290 }, { "epoch": 22.29843561973526, "grad_norm": Infinity, "learning_rate": 0.0001774307921410929, "loss": 11.0549, "step": 185300 }, { "epoch": 22.299638989169676, "grad_norm": Infinity, "learning_rate": 0.00017742838470884018, "loss": 11.1552, "step": 185310 }, { "epoch": 22.30084235860409, "grad_norm": Infinity, "learning_rate": 0.0001774259771645295, "loss": 11.0598, "step": 185320 }, { "epoch": 22.302045728038507, "grad_norm": Infinity, "learning_rate": 0.00017742356950816433, "loss": 11.0117, "step": 185330 }, { "epoch": 22.303249097472925, "grad_norm": Infinity, "learning_rate": 0.0001774211617397481, "loss": 11.1422, "step": 185340 }, { "epoch": 22.30445246690734, "grad_norm": Infinity, "learning_rate": 0.00017741875385928437, "loss": 11.1606, "step": 185350 }, { "epoch": 22.305655836341757, "grad_norm": Infinity, "learning_rate": 0.00017741634586677658, "loss": 11.1669, "step": 185360 }, { "epoch": 22.306859205776174, "grad_norm": Infinity, "learning_rate": 0.0001774139377622282, "loss": 11.093, "step": 185370 }, { "epoch": 22.308062575210588, "grad_norm": Infinity, "learning_rate": 0.00017741152954564278, "loss": 11.0746, "step": 185380 }, { "epoch": 22.309265944645006, "grad_norm": Infinity, "learning_rate": 0.00017740912121702373, "loss": 11.0157, "step": 185390 }, { "epoch": 22.310469314079423, "grad_norm": Infinity, "learning_rate": 0.0001774067127763746, "loss": 11.237, "step": 185400 }, { "epoch": 22.311672683513837, "grad_norm": Infinity, "learning_rate": 0.00017740430422369882, "loss": 11.1536, "step": 185410 }, { "epoch": 22.312876052948255, "grad_norm": Infinity, "learning_rate": 0.00017740189555899991, "loss": 11.2214, "step": 185420 }, { "epoch": 22.314079422382672, "grad_norm": Infinity, "learning_rate": 0.00017739948678228133, "loss": 11.1147, "step": 185430 }, { "epoch": 22.315282791817086, "grad_norm": Infinity, "learning_rate": 0.00017739707789354663, "loss": 11.1594, "step": 185440 }, { "epoch": 22.316486161251504, "grad_norm": Infinity, "learning_rate": 0.0001773946688927992, "loss": 11.2079, "step": 185450 }, { "epoch": 22.31768953068592, "grad_norm": Infinity, "learning_rate": 0.00017739225978004262, "loss": 11.1436, "step": 185460 }, { "epoch": 22.318892900120336, "grad_norm": Infinity, "learning_rate": 0.00017738985055528028, "loss": 11.1081, "step": 185470 }, { "epoch": 22.320096269554753, "grad_norm": Infinity, "learning_rate": 0.00017738744121851576, "loss": 11.1852, "step": 185480 }, { "epoch": 22.32129963898917, "grad_norm": Infinity, "learning_rate": 0.0001773850317697525, "loss": 11.0762, "step": 185490 }, { "epoch": 22.322503008423585, "grad_norm": Infinity, "learning_rate": 0.000177382622208994, "loss": 11.2403, "step": 185500 }, { "epoch": 22.323706377858002, "grad_norm": Infinity, "learning_rate": 0.00017738021253624372, "loss": 11.1424, "step": 185510 }, { "epoch": 22.32490974729242, "grad_norm": Infinity, "learning_rate": 0.00017737780275150518, "loss": 11.1715, "step": 185520 }, { "epoch": 22.326113116726834, "grad_norm": Infinity, "learning_rate": 0.00017737539285478184, "loss": 11.1282, "step": 185530 }, { "epoch": 22.32731648616125, "grad_norm": Infinity, "learning_rate": 0.00017737298284607722, "loss": 11.1532, "step": 185540 }, { "epoch": 22.32851985559567, "grad_norm": Infinity, "learning_rate": 0.0001773705727253948, "loss": 11.2329, "step": 185550 }, { "epoch": 22.329723225030083, "grad_norm": Infinity, "learning_rate": 0.00017736816249273806, "loss": 11.1544, "step": 185560 }, { "epoch": 22.3309265944645, "grad_norm": Infinity, "learning_rate": 0.00017736575214811044, "loss": 11.1745, "step": 185570 }, { "epoch": 22.332129963898918, "grad_norm": Infinity, "learning_rate": 0.0001773633416915155, "loss": 11.2214, "step": 185580 }, { "epoch": 22.333333333333332, "grad_norm": Infinity, "learning_rate": 0.00017736093112295673, "loss": 11.1923, "step": 185590 }, { "epoch": 22.33453670276775, "grad_norm": Infinity, "learning_rate": 0.00017735852044243756, "loss": 11.1805, "step": 185600 }, { "epoch": 22.335740072202167, "grad_norm": Infinity, "learning_rate": 0.00017735610964996154, "loss": 11.102, "step": 185610 }, { "epoch": 22.33694344163658, "grad_norm": Infinity, "learning_rate": 0.0001773536987455321, "loss": 11.2309, "step": 185620 }, { "epoch": 22.338146811071, "grad_norm": Infinity, "learning_rate": 0.0001773512877291528, "loss": 11.2199, "step": 185630 }, { "epoch": 22.339350180505416, "grad_norm": Infinity, "learning_rate": 0.00017734887660082705, "loss": 11.3025, "step": 185640 }, { "epoch": 22.34055354993983, "grad_norm": Infinity, "learning_rate": 0.0001773464653605584, "loss": 11.1069, "step": 185650 }, { "epoch": 22.341756919374248, "grad_norm": Infinity, "learning_rate": 0.00017734405400835032, "loss": 11.0437, "step": 185660 }, { "epoch": 22.342960288808666, "grad_norm": Infinity, "learning_rate": 0.00017734164254420629, "loss": 11.1679, "step": 185670 }, { "epoch": 22.34416365824308, "grad_norm": Infinity, "learning_rate": 0.0001773392309681298, "loss": 11.2021, "step": 185680 }, { "epoch": 22.345367027677497, "grad_norm": Infinity, "learning_rate": 0.00017733681928012434, "loss": 11.2601, "step": 185690 }, { "epoch": 22.346570397111915, "grad_norm": Infinity, "learning_rate": 0.00017733440748019346, "loss": 11.1717, "step": 185700 }, { "epoch": 22.34777376654633, "grad_norm": Infinity, "learning_rate": 0.00017733199556834058, "loss": 11.0309, "step": 185710 }, { "epoch": 22.348977135980746, "grad_norm": Infinity, "learning_rate": 0.0001773295835445692, "loss": 11.128, "step": 185720 }, { "epoch": 22.350180505415164, "grad_norm": Infinity, "learning_rate": 0.0001773271714088828, "loss": 11.0878, "step": 185730 }, { "epoch": 22.351383874849578, "grad_norm": Infinity, "learning_rate": 0.00017732475916128491, "loss": 11.1033, "step": 185740 }, { "epoch": 22.352587244283995, "grad_norm": Infinity, "learning_rate": 0.00017732234680177902, "loss": 11.1801, "step": 185750 }, { "epoch": 22.353790613718413, "grad_norm": Infinity, "learning_rate": 0.00017731993433036855, "loss": 11.2125, "step": 185760 }, { "epoch": 22.354993983152827, "grad_norm": Infinity, "learning_rate": 0.0001773175217470571, "loss": 11.2979, "step": 185770 }, { "epoch": 22.356197352587245, "grad_norm": Infinity, "learning_rate": 0.00017731510905184808, "loss": 11.0352, "step": 185780 }, { "epoch": 22.357400722021662, "grad_norm": Infinity, "learning_rate": 0.00017731269624474502, "loss": 11.0381, "step": 185790 }, { "epoch": 22.358604091456076, "grad_norm": Infinity, "learning_rate": 0.0001773102833257514, "loss": 11.1038, "step": 185800 }, { "epoch": 22.359807460890494, "grad_norm": Infinity, "learning_rate": 0.0001773078702948707, "loss": 11.1792, "step": 185810 }, { "epoch": 22.36101083032491, "grad_norm": Infinity, "learning_rate": 0.00017730545715210645, "loss": 11.1818, "step": 185820 }, { "epoch": 22.362214199759325, "grad_norm": Infinity, "learning_rate": 0.00017730304389746207, "loss": 11.0299, "step": 185830 }, { "epoch": 22.363417569193743, "grad_norm": Infinity, "learning_rate": 0.00017730063053094115, "loss": 11.2224, "step": 185840 }, { "epoch": 22.36462093862816, "grad_norm": Infinity, "learning_rate": 0.0001772982170525471, "loss": 11.2642, "step": 185850 }, { "epoch": 22.365824308062574, "grad_norm": Infinity, "learning_rate": 0.00017729580346228347, "loss": 11.1858, "step": 185860 }, { "epoch": 22.367027677496992, "grad_norm": Infinity, "learning_rate": 0.00017729338976015373, "loss": 11.1779, "step": 185870 }, { "epoch": 22.36823104693141, "grad_norm": Infinity, "learning_rate": 0.00017729097594616135, "loss": 11.0451, "step": 185880 }, { "epoch": 22.369434416365824, "grad_norm": Infinity, "learning_rate": 0.00017728856202030988, "loss": 11.1909, "step": 185890 }, { "epoch": 22.37063778580024, "grad_norm": Infinity, "learning_rate": 0.00017728614798260273, "loss": 11.1536, "step": 185900 }, { "epoch": 22.37184115523466, "grad_norm": Infinity, "learning_rate": 0.0001772837338330435, "loss": 11.3102, "step": 185910 }, { "epoch": 22.373044524669073, "grad_norm": Infinity, "learning_rate": 0.00017728131957163556, "loss": 11.2002, "step": 185920 }, { "epoch": 22.37424789410349, "grad_norm": Infinity, "learning_rate": 0.00017727890519838254, "loss": 11.1803, "step": 185930 }, { "epoch": 22.375451263537904, "grad_norm": Infinity, "learning_rate": 0.0001772764907132878, "loss": 11.2243, "step": 185940 }, { "epoch": 22.376654632972322, "grad_norm": Infinity, "learning_rate": 0.00017727407611635494, "loss": 11.1154, "step": 185950 }, { "epoch": 22.37785800240674, "grad_norm": Infinity, "learning_rate": 0.0001772716614075874, "loss": 11.1048, "step": 185960 }, { "epoch": 22.379061371841154, "grad_norm": Infinity, "learning_rate": 0.0001772692465869887, "loss": 11.2587, "step": 185970 }, { "epoch": 22.38026474127557, "grad_norm": Infinity, "learning_rate": 0.0001772668316545623, "loss": 11.2595, "step": 185980 }, { "epoch": 22.38146811070999, "grad_norm": Infinity, "learning_rate": 0.00017726441661031173, "loss": 11.1677, "step": 185990 }, { "epoch": 22.382671480144403, "grad_norm": Infinity, "learning_rate": 0.00017726200145424045, "loss": 11.0761, "step": 186000 }, { "epoch": 22.38387484957882, "grad_norm": Infinity, "learning_rate": 0.000177259586186352, "loss": 11.1223, "step": 186010 }, { "epoch": 22.385078219013238, "grad_norm": Infinity, "learning_rate": 0.00017725717080664988, "loss": 11.1536, "step": 186020 }, { "epoch": 22.386281588447652, "grad_norm": Infinity, "learning_rate": 0.00017725475531513754, "loss": 11.032, "step": 186030 }, { "epoch": 22.38748495788207, "grad_norm": Infinity, "learning_rate": 0.00017725233971181847, "loss": 11.1847, "step": 186040 }, { "epoch": 22.388688327316487, "grad_norm": Infinity, "learning_rate": 0.00017724992399669622, "loss": 11.1611, "step": 186050 }, { "epoch": 22.3898916967509, "grad_norm": Infinity, "learning_rate": 0.00017724750816977425, "loss": 11.1265, "step": 186060 }, { "epoch": 22.39109506618532, "grad_norm": Infinity, "learning_rate": 0.00017724509223105605, "loss": 11.1782, "step": 186070 }, { "epoch": 22.392298435619736, "grad_norm": Infinity, "learning_rate": 0.00017724267618054516, "loss": 11.1578, "step": 186080 }, { "epoch": 22.39350180505415, "grad_norm": Infinity, "learning_rate": 0.00017724026001824506, "loss": 11.0779, "step": 186090 }, { "epoch": 22.394705174488568, "grad_norm": Infinity, "learning_rate": 0.00017723784374415916, "loss": 11.2025, "step": 186100 }, { "epoch": 22.395908543922985, "grad_norm": Infinity, "learning_rate": 0.0001772354273582911, "loss": 11.1966, "step": 186110 }, { "epoch": 22.3971119133574, "grad_norm": Infinity, "learning_rate": 0.00017723301086064428, "loss": 11.1856, "step": 186120 }, { "epoch": 22.398315282791817, "grad_norm": Infinity, "learning_rate": 0.00017723059425122224, "loss": 11.1591, "step": 186130 }, { "epoch": 22.399518652226234, "grad_norm": Infinity, "learning_rate": 0.00017722817753002844, "loss": 11.214, "step": 186140 }, { "epoch": 22.40072202166065, "grad_norm": Infinity, "learning_rate": 0.0001772257606970664, "loss": 11.1031, "step": 186150 }, { "epoch": 22.401925391095066, "grad_norm": Infinity, "learning_rate": 0.00017722334375233964, "loss": 11.1691, "step": 186160 }, { "epoch": 22.403128760529484, "grad_norm": Infinity, "learning_rate": 0.00017722092669585163, "loss": 11.1589, "step": 186170 }, { "epoch": 22.404332129963898, "grad_norm": Infinity, "learning_rate": 0.00017721850952760585, "loss": 11.175, "step": 186180 }, { "epoch": 22.405535499398315, "grad_norm": Infinity, "learning_rate": 0.00017721609224760588, "loss": 11.1472, "step": 186190 }, { "epoch": 22.406738868832733, "grad_norm": Infinity, "learning_rate": 0.0001772136748558551, "loss": 11.1941, "step": 186200 }, { "epoch": 22.407942238267147, "grad_norm": Infinity, "learning_rate": 0.0001772112573523571, "loss": 10.985, "step": 186210 }, { "epoch": 22.409145607701564, "grad_norm": Infinity, "learning_rate": 0.00017720883973711533, "loss": 11.2462, "step": 186220 }, { "epoch": 22.410348977135982, "grad_norm": Infinity, "learning_rate": 0.0001772064220101333, "loss": 11.1168, "step": 186230 }, { "epoch": 22.411552346570396, "grad_norm": Infinity, "learning_rate": 0.00017720400417141455, "loss": 11.1905, "step": 186240 }, { "epoch": 22.412755716004813, "grad_norm": Infinity, "learning_rate": 0.00017720158622096252, "loss": 11.0778, "step": 186250 }, { "epoch": 22.41395908543923, "grad_norm": Infinity, "learning_rate": 0.00017719916815878075, "loss": 11.1179, "step": 186260 }, { "epoch": 22.415162454873645, "grad_norm": Infinity, "learning_rate": 0.0001771967499848727, "loss": 11.2511, "step": 186270 }, { "epoch": 22.416365824308063, "grad_norm": Infinity, "learning_rate": 0.00017719433169924192, "loss": 11.1963, "step": 186280 }, { "epoch": 22.41756919374248, "grad_norm": Infinity, "learning_rate": 0.00017719191330189187, "loss": 11.1283, "step": 186290 }, { "epoch": 22.418772563176894, "grad_norm": Infinity, "learning_rate": 0.00017718949479282606, "loss": 11.1111, "step": 186300 }, { "epoch": 22.41997593261131, "grad_norm": Infinity, "learning_rate": 0.00017718707617204798, "loss": 11.0909, "step": 186310 }, { "epoch": 22.42117930204573, "grad_norm": Infinity, "learning_rate": 0.00017718465743956117, "loss": 11.1416, "step": 186320 }, { "epoch": 22.422382671480143, "grad_norm": Infinity, "learning_rate": 0.00017718223859536911, "loss": 11.1364, "step": 186330 }, { "epoch": 22.42358604091456, "grad_norm": Infinity, "learning_rate": 0.0001771798196394753, "loss": 11.1929, "step": 186340 }, { "epoch": 22.42478941034898, "grad_norm": Infinity, "learning_rate": 0.0001771774005718832, "loss": 11.2197, "step": 186350 }, { "epoch": 22.425992779783392, "grad_norm": Infinity, "learning_rate": 0.00017717498139259633, "loss": 11.0222, "step": 186360 }, { "epoch": 22.42719614921781, "grad_norm": Infinity, "learning_rate": 0.00017717256210161825, "loss": 11.1915, "step": 186370 }, { "epoch": 22.428399518652228, "grad_norm": Infinity, "learning_rate": 0.0001771701426989524, "loss": 11.1596, "step": 186380 }, { "epoch": 22.42960288808664, "grad_norm": Infinity, "learning_rate": 0.00017716772318460234, "loss": 11.2313, "step": 186390 }, { "epoch": 22.43080625752106, "grad_norm": Infinity, "learning_rate": 0.0001771653035585715, "loss": 11.0301, "step": 186400 }, { "epoch": 22.432009626955477, "grad_norm": Infinity, "learning_rate": 0.00017716288382086344, "loss": 11.1357, "step": 186410 }, { "epoch": 22.43321299638989, "grad_norm": Infinity, "learning_rate": 0.00017716046397148161, "loss": 11.1981, "step": 186420 }, { "epoch": 22.43441636582431, "grad_norm": Infinity, "learning_rate": 0.00017715804401042954, "loss": 11.0884, "step": 186430 }, { "epoch": 22.435619735258726, "grad_norm": Infinity, "learning_rate": 0.00017715562393771074, "loss": 11.221, "step": 186440 }, { "epoch": 22.43682310469314, "grad_norm": Infinity, "learning_rate": 0.00017715320375332868, "loss": 11.1002, "step": 186450 }, { "epoch": 22.438026474127557, "grad_norm": Infinity, "learning_rate": 0.00017715078345728692, "loss": 11.1157, "step": 186460 }, { "epoch": 22.439229843561975, "grad_norm": Infinity, "learning_rate": 0.0001771483630495889, "loss": 11.2329, "step": 186470 }, { "epoch": 22.44043321299639, "grad_norm": Infinity, "learning_rate": 0.00017714594253023816, "loss": 11.1669, "step": 186480 }, { "epoch": 22.441636582430807, "grad_norm": Infinity, "learning_rate": 0.0001771435218992382, "loss": 11.1054, "step": 186490 }, { "epoch": 22.442839951865224, "grad_norm": Infinity, "learning_rate": 0.0001771411011565925, "loss": 11.1214, "step": 186500 }, { "epoch": 22.444043321299638, "grad_norm": Infinity, "learning_rate": 0.0001771386803023046, "loss": 11.1538, "step": 186510 }, { "epoch": 22.445246690734056, "grad_norm": Infinity, "learning_rate": 0.000177136259336378, "loss": 11.0772, "step": 186520 }, { "epoch": 22.446450060168473, "grad_norm": Infinity, "learning_rate": 0.00017713383825881615, "loss": 11.1226, "step": 186530 }, { "epoch": 22.447653429602887, "grad_norm": Infinity, "learning_rate": 0.00017713141706962262, "loss": 11.1928, "step": 186540 }, { "epoch": 22.448856799037305, "grad_norm": Infinity, "learning_rate": 0.00017712899576880086, "loss": 11.0675, "step": 186550 }, { "epoch": 22.450060168471722, "grad_norm": Infinity, "learning_rate": 0.0001771265743563544, "loss": 11.0459, "step": 186560 }, { "epoch": 22.451263537906136, "grad_norm": Infinity, "learning_rate": 0.0001771241528322868, "loss": 11.237, "step": 186570 }, { "epoch": 22.452466907340554, "grad_norm": Infinity, "learning_rate": 0.00017712173119660145, "loss": 11.1534, "step": 186580 }, { "epoch": 22.45367027677497, "grad_norm": Infinity, "learning_rate": 0.00017711930944930195, "loss": 11.2796, "step": 186590 }, { "epoch": 22.454873646209386, "grad_norm": Infinity, "learning_rate": 0.00017711688759039176, "loss": 11.1256, "step": 186600 }, { "epoch": 22.456077015643803, "grad_norm": Infinity, "learning_rate": 0.0001771144656198744, "loss": 10.9658, "step": 186610 }, { "epoch": 22.45728038507822, "grad_norm": Infinity, "learning_rate": 0.00017711204353775334, "loss": 11.1732, "step": 186620 }, { "epoch": 22.458483754512635, "grad_norm": Infinity, "learning_rate": 0.00017710962134403216, "loss": 11.2053, "step": 186630 }, { "epoch": 22.459687123947052, "grad_norm": Infinity, "learning_rate": 0.00017710719903871427, "loss": 11.0598, "step": 186640 }, { "epoch": 22.460890493381466, "grad_norm": Infinity, "learning_rate": 0.00017710477662180325, "loss": 11.0909, "step": 186650 }, { "epoch": 22.462093862815884, "grad_norm": Infinity, "learning_rate": 0.0001771023540933026, "loss": 11.1607, "step": 186660 }, { "epoch": 22.4632972322503, "grad_norm": Infinity, "learning_rate": 0.0001770999314532158, "loss": 11.2159, "step": 186670 }, { "epoch": 22.464500601684716, "grad_norm": Infinity, "learning_rate": 0.00017709750870154633, "loss": 11.101, "step": 186680 }, { "epoch": 22.465703971119133, "grad_norm": Infinity, "learning_rate": 0.00017709508583829776, "loss": 11.196, "step": 186690 }, { "epoch": 22.46690734055355, "grad_norm": Infinity, "learning_rate": 0.00017709266286347356, "loss": 11.1604, "step": 186700 }, { "epoch": 22.468110709987965, "grad_norm": Infinity, "learning_rate": 0.00017709023977707724, "loss": 11.0374, "step": 186710 }, { "epoch": 22.469314079422382, "grad_norm": Infinity, "learning_rate": 0.0001770878165791123, "loss": 11.1322, "step": 186720 }, { "epoch": 22.4705174488568, "grad_norm": Infinity, "learning_rate": 0.00017708539326958227, "loss": 11.1592, "step": 186730 }, { "epoch": 22.471720818291214, "grad_norm": Infinity, "learning_rate": 0.00017708296984849064, "loss": 11.1446, "step": 186740 }, { "epoch": 22.47292418772563, "grad_norm": Infinity, "learning_rate": 0.00017708054631584093, "loss": 11.2014, "step": 186750 }, { "epoch": 22.47412755716005, "grad_norm": Infinity, "learning_rate": 0.00017707812267163663, "loss": 11.1451, "step": 186760 }, { "epoch": 22.475330926594463, "grad_norm": Infinity, "learning_rate": 0.00017707569891588126, "loss": 11.2231, "step": 186770 }, { "epoch": 22.47653429602888, "grad_norm": Infinity, "learning_rate": 0.00017707327504857834, "loss": 11.1823, "step": 186780 }, { "epoch": 22.477737665463298, "grad_norm": Infinity, "learning_rate": 0.00017707085106973133, "loss": 11.1684, "step": 186790 }, { "epoch": 22.478941034897712, "grad_norm": Infinity, "learning_rate": 0.00017706842697934381, "loss": 11.1713, "step": 186800 }, { "epoch": 22.48014440433213, "grad_norm": Infinity, "learning_rate": 0.00017706600277741922, "loss": 11.1839, "step": 186810 }, { "epoch": 22.481347773766547, "grad_norm": Infinity, "learning_rate": 0.00017706357846396112, "loss": 11.1706, "step": 186820 }, { "epoch": 22.48255114320096, "grad_norm": Infinity, "learning_rate": 0.00017706115403897296, "loss": 11.1758, "step": 186830 }, { "epoch": 22.48375451263538, "grad_norm": Infinity, "learning_rate": 0.0001770587295024583, "loss": 11.0375, "step": 186840 }, { "epoch": 22.484957882069796, "grad_norm": Infinity, "learning_rate": 0.00017705630485442066, "loss": 11.2197, "step": 186850 }, { "epoch": 22.48616125150421, "grad_norm": Infinity, "learning_rate": 0.00017705388009486348, "loss": 11.2333, "step": 186860 }, { "epoch": 22.487364620938628, "grad_norm": Infinity, "learning_rate": 0.00017705145522379034, "loss": 11.258, "step": 186870 }, { "epoch": 22.488567990373046, "grad_norm": Infinity, "learning_rate": 0.00017704903024120473, "loss": 11.1586, "step": 186880 }, { "epoch": 22.48977135980746, "grad_norm": Infinity, "learning_rate": 0.00017704660514711011, "loss": 11.2632, "step": 186890 }, { "epoch": 22.490974729241877, "grad_norm": Infinity, "learning_rate": 0.00017704417994151002, "loss": 11.1358, "step": 186900 }, { "epoch": 22.492178098676295, "grad_norm": Infinity, "learning_rate": 0.00017704175462440803, "loss": 11.0721, "step": 186910 }, { "epoch": 22.49338146811071, "grad_norm": Infinity, "learning_rate": 0.0001770393291958076, "loss": 11.0954, "step": 186920 }, { "epoch": 22.494584837545126, "grad_norm": Infinity, "learning_rate": 0.0001770369036557122, "loss": 11.1236, "step": 186930 }, { "epoch": 22.495788206979544, "grad_norm": Infinity, "learning_rate": 0.00017703447800412542, "loss": 11.1668, "step": 186940 }, { "epoch": 22.496991576413958, "grad_norm": Infinity, "learning_rate": 0.00017703205224105074, "loss": 11.0386, "step": 186950 }, { "epoch": 22.498194945848375, "grad_norm": Infinity, "learning_rate": 0.0001770296263664916, "loss": 11.2007, "step": 186960 }, { "epoch": 22.499398315282793, "grad_norm": Infinity, "learning_rate": 0.0001770272003804516, "loss": 11.1729, "step": 186970 }, { "epoch": 22.500601684717207, "grad_norm": Infinity, "learning_rate": 0.00017702477428293425, "loss": 11.2161, "step": 186980 }, { "epoch": 22.501805054151625, "grad_norm": Infinity, "learning_rate": 0.00017702234807394303, "loss": 11.2265, "step": 186990 }, { "epoch": 22.503008423586042, "grad_norm": Infinity, "learning_rate": 0.00017701992175348142, "loss": 11.0526, "step": 187000 }, { "epoch": 22.504211793020456, "grad_norm": Infinity, "learning_rate": 0.000177017495321553, "loss": 11.1864, "step": 187010 }, { "epoch": 22.505415162454874, "grad_norm": Infinity, "learning_rate": 0.00017701506877816127, "loss": 11.1147, "step": 187020 }, { "epoch": 22.50661853188929, "grad_norm": Infinity, "learning_rate": 0.00017701264212330968, "loss": 11.1837, "step": 187030 }, { "epoch": 22.507821901323705, "grad_norm": Infinity, "learning_rate": 0.00017701021535700178, "loss": 11.168, "step": 187040 }, { "epoch": 22.509025270758123, "grad_norm": Infinity, "learning_rate": 0.00017700778847924112, "loss": 11.1252, "step": 187050 }, { "epoch": 22.51022864019254, "grad_norm": Infinity, "learning_rate": 0.00017700536149003115, "loss": 11.1354, "step": 187060 }, { "epoch": 22.511432009626954, "grad_norm": Infinity, "learning_rate": 0.0001770029343893754, "loss": 11.1933, "step": 187070 }, { "epoch": 22.512635379061372, "grad_norm": Infinity, "learning_rate": 0.0001770005071772774, "loss": 11.1285, "step": 187080 }, { "epoch": 22.51383874849579, "grad_norm": Infinity, "learning_rate": 0.0001769980798537407, "loss": 11.1466, "step": 187090 }, { "epoch": 22.515042117930204, "grad_norm": Infinity, "learning_rate": 0.00017699565241876872, "loss": 11.0605, "step": 187100 }, { "epoch": 22.51624548736462, "grad_norm": Infinity, "learning_rate": 0.00017699322487236502, "loss": 11.2709, "step": 187110 }, { "epoch": 22.51744885679904, "grad_norm": Infinity, "learning_rate": 0.00017699079721453313, "loss": 11.1914, "step": 187120 }, { "epoch": 22.518652226233453, "grad_norm": Infinity, "learning_rate": 0.00017698836944527656, "loss": 11.1101, "step": 187130 }, { "epoch": 22.51985559566787, "grad_norm": Infinity, "learning_rate": 0.00017698594156459878, "loss": 11.221, "step": 187140 }, { "epoch": 22.521058965102288, "grad_norm": Infinity, "learning_rate": 0.00017698351357250337, "loss": 11.2434, "step": 187150 }, { "epoch": 22.522262334536702, "grad_norm": Infinity, "learning_rate": 0.00017698108546899377, "loss": 11.1105, "step": 187160 }, { "epoch": 22.52346570397112, "grad_norm": Infinity, "learning_rate": 0.00017697865725407357, "loss": 10.9253, "step": 187170 }, { "epoch": 22.524669073405537, "grad_norm": Infinity, "learning_rate": 0.00017697622892774625, "loss": 11.0859, "step": 187180 }, { "epoch": 22.52587244283995, "grad_norm": Infinity, "learning_rate": 0.0001769738004900153, "loss": 11.2238, "step": 187190 }, { "epoch": 22.52707581227437, "grad_norm": Infinity, "learning_rate": 0.00017697137194088426, "loss": 11.16, "step": 187200 }, { "epoch": 22.528279181708786, "grad_norm": Infinity, "learning_rate": 0.00017696894328035666, "loss": 11.0697, "step": 187210 }, { "epoch": 22.5294825511432, "grad_norm": Infinity, "learning_rate": 0.00017696651450843596, "loss": 11.181, "step": 187220 }, { "epoch": 22.530685920577618, "grad_norm": Infinity, "learning_rate": 0.00017696408562512575, "loss": 11.1725, "step": 187230 }, { "epoch": 22.531889290012035, "grad_norm": Infinity, "learning_rate": 0.00017696165663042948, "loss": 11.0334, "step": 187240 }, { "epoch": 22.53309265944645, "grad_norm": Infinity, "learning_rate": 0.0001769592275243507, "loss": 11.2224, "step": 187250 }, { "epoch": 22.534296028880867, "grad_norm": Infinity, "learning_rate": 0.0001769567983068929, "loss": 11.147, "step": 187260 }, { "epoch": 22.535499398315284, "grad_norm": Infinity, "learning_rate": 0.00017695436897805964, "loss": 11.1405, "step": 187270 }, { "epoch": 22.5367027677497, "grad_norm": Infinity, "learning_rate": 0.0001769519395378544, "loss": 11.1163, "step": 187280 }, { "epoch": 22.537906137184116, "grad_norm": Infinity, "learning_rate": 0.0001769495099862807, "loss": 11.1465, "step": 187290 }, { "epoch": 22.53910950661853, "grad_norm": Infinity, "learning_rate": 0.00017694708032334205, "loss": 11.0935, "step": 187300 }, { "epoch": 22.540312876052948, "grad_norm": Infinity, "learning_rate": 0.000176944650549042, "loss": 11.0713, "step": 187310 }, { "epoch": 22.541516245487365, "grad_norm": Infinity, "learning_rate": 0.00017694222066338405, "loss": 11.1573, "step": 187320 }, { "epoch": 22.54271961492178, "grad_norm": Infinity, "learning_rate": 0.00017693979066637168, "loss": 11.094, "step": 187330 }, { "epoch": 22.543922984356197, "grad_norm": Infinity, "learning_rate": 0.00017693736055800845, "loss": 11.0219, "step": 187340 }, { "epoch": 22.545126353790614, "grad_norm": Infinity, "learning_rate": 0.00017693493033829784, "loss": 11.187, "step": 187350 }, { "epoch": 22.54632972322503, "grad_norm": Infinity, "learning_rate": 0.00017693250000724343, "loss": 11.2475, "step": 187360 }, { "epoch": 22.547533092659446, "grad_norm": Infinity, "learning_rate": 0.00017693006956484867, "loss": 11.1387, "step": 187370 }, { "epoch": 22.548736462093864, "grad_norm": Infinity, "learning_rate": 0.00017692763901111714, "loss": 11.1853, "step": 187380 }, { "epoch": 22.549939831528278, "grad_norm": Infinity, "learning_rate": 0.00017692520834605227, "loss": 11.0701, "step": 187390 }, { "epoch": 22.551143200962695, "grad_norm": Infinity, "learning_rate": 0.0001769227775696577, "loss": 11.2376, "step": 187400 }, { "epoch": 22.552346570397113, "grad_norm": Infinity, "learning_rate": 0.00017692034668193683, "loss": 11.1516, "step": 187410 }, { "epoch": 22.553549939831527, "grad_norm": Infinity, "learning_rate": 0.00017691791568289324, "loss": 11.1306, "step": 187420 }, { "epoch": 22.554753309265944, "grad_norm": Infinity, "learning_rate": 0.00017691548457253044, "loss": 11.1122, "step": 187430 }, { "epoch": 22.555956678700362, "grad_norm": Infinity, "learning_rate": 0.00017691305335085195, "loss": 11.1582, "step": 187440 }, { "epoch": 22.557160048134776, "grad_norm": Infinity, "learning_rate": 0.00017691062201786125, "loss": 11.0114, "step": 187450 }, { "epoch": 22.558363417569193, "grad_norm": Infinity, "learning_rate": 0.00017690819057356193, "loss": 11.1611, "step": 187460 }, { "epoch": 22.55956678700361, "grad_norm": Infinity, "learning_rate": 0.00017690575901795747, "loss": 11.2243, "step": 187470 }, { "epoch": 22.560770156438025, "grad_norm": Infinity, "learning_rate": 0.0001769033273510514, "loss": 11.028, "step": 187480 }, { "epoch": 22.561973525872443, "grad_norm": Infinity, "learning_rate": 0.00017690089557284718, "loss": 11.195, "step": 187490 }, { "epoch": 22.56317689530686, "grad_norm": Infinity, "learning_rate": 0.0001768984636833484, "loss": 11.177, "step": 187500 }, { "epoch": 22.564380264741274, "grad_norm": Infinity, "learning_rate": 0.00017689603168255855, "loss": 11.0852, "step": 187510 }, { "epoch": 22.56558363417569, "grad_norm": Infinity, "learning_rate": 0.00017689359957048117, "loss": 11.1243, "step": 187520 }, { "epoch": 22.56678700361011, "grad_norm": Infinity, "learning_rate": 0.00017689116734711976, "loss": 11.1217, "step": 187530 }, { "epoch": 22.567990373044523, "grad_norm": Infinity, "learning_rate": 0.00017688873501247785, "loss": 11.1652, "step": 187540 }, { "epoch": 22.56919374247894, "grad_norm": Infinity, "learning_rate": 0.00017688630256655898, "loss": 11.1238, "step": 187550 }, { "epoch": 22.57039711191336, "grad_norm": Infinity, "learning_rate": 0.0001768838700093666, "loss": 11.1144, "step": 187560 }, { "epoch": 22.571600481347772, "grad_norm": Infinity, "learning_rate": 0.0001768814373409043, "loss": 11.1112, "step": 187570 }, { "epoch": 22.57280385078219, "grad_norm": Infinity, "learning_rate": 0.0001768790045611756, "loss": 11.0239, "step": 187580 }, { "epoch": 22.574007220216608, "grad_norm": Infinity, "learning_rate": 0.00017687657167018397, "loss": 11.1385, "step": 187590 }, { "epoch": 22.57521058965102, "grad_norm": Infinity, "learning_rate": 0.000176874138667933, "loss": 11.0655, "step": 187600 }, { "epoch": 22.57641395908544, "grad_norm": Infinity, "learning_rate": 0.0001768717055544261, "loss": 11.0885, "step": 187610 }, { "epoch": 22.577617328519857, "grad_norm": Infinity, "learning_rate": 0.00017686927232966692, "loss": 11.2164, "step": 187620 }, { "epoch": 22.57882069795427, "grad_norm": Infinity, "learning_rate": 0.00017686683899365891, "loss": 11.0837, "step": 187630 }, { "epoch": 22.58002406738869, "grad_norm": Infinity, "learning_rate": 0.0001768644055464056, "loss": 11.1123, "step": 187640 }, { "epoch": 22.581227436823106, "grad_norm": Infinity, "learning_rate": 0.00017686197198791054, "loss": 11.1777, "step": 187650 }, { "epoch": 22.58243080625752, "grad_norm": Infinity, "learning_rate": 0.00017685953831817722, "loss": 11.0424, "step": 187660 }, { "epoch": 22.583634175691937, "grad_norm": Infinity, "learning_rate": 0.00017685710453720917, "loss": 11.173, "step": 187670 }, { "epoch": 22.584837545126355, "grad_norm": Infinity, "learning_rate": 0.0001768546706450099, "loss": 11.1405, "step": 187680 }, { "epoch": 22.58604091456077, "grad_norm": Infinity, "learning_rate": 0.00017685223664158298, "loss": 11.1449, "step": 187690 }, { "epoch": 22.587244283995187, "grad_norm": Infinity, "learning_rate": 0.00017684980252693187, "loss": 11.0346, "step": 187700 }, { "epoch": 22.588447653429604, "grad_norm": Infinity, "learning_rate": 0.0001768473683010601, "loss": 11.1039, "step": 187710 }, { "epoch": 22.589651022864018, "grad_norm": Infinity, "learning_rate": 0.00017684493396397127, "loss": 11.1379, "step": 187720 }, { "epoch": 22.590854392298436, "grad_norm": Infinity, "learning_rate": 0.00017684249951566882, "loss": 11.1374, "step": 187730 }, { "epoch": 22.592057761732853, "grad_norm": Infinity, "learning_rate": 0.0001768400649561563, "loss": 11.1715, "step": 187740 }, { "epoch": 22.593261131167267, "grad_norm": Infinity, "learning_rate": 0.00017683763028543723, "loss": 11.146, "step": 187750 }, { "epoch": 22.594464500601685, "grad_norm": Infinity, "learning_rate": 0.00017683519550351515, "loss": 11.1752, "step": 187760 }, { "epoch": 22.595667870036102, "grad_norm": Infinity, "learning_rate": 0.00017683276061039357, "loss": 11.1637, "step": 187770 }, { "epoch": 22.596871239470516, "grad_norm": Infinity, "learning_rate": 0.000176830325606076, "loss": 11.0903, "step": 187780 }, { "epoch": 22.598074608904934, "grad_norm": Infinity, "learning_rate": 0.00017682789049056603, "loss": 11.1792, "step": 187790 }, { "epoch": 22.59927797833935, "grad_norm": Infinity, "learning_rate": 0.00017682545526386708, "loss": 11.1525, "step": 187800 }, { "epoch": 22.600481347773766, "grad_norm": Infinity, "learning_rate": 0.00017682301992598276, "loss": 11.21, "step": 187810 }, { "epoch": 22.601684717208183, "grad_norm": Infinity, "learning_rate": 0.00017682058447691655, "loss": 11.1317, "step": 187820 }, { "epoch": 22.6028880866426, "grad_norm": Infinity, "learning_rate": 0.000176818148916672, "loss": 11.0641, "step": 187830 }, { "epoch": 22.604091456077015, "grad_norm": Infinity, "learning_rate": 0.00017681571324525258, "loss": 11.0839, "step": 187840 }, { "epoch": 22.605294825511432, "grad_norm": Infinity, "learning_rate": 0.0001768132774626619, "loss": 11.2821, "step": 187850 }, { "epoch": 22.60649819494585, "grad_norm": Infinity, "learning_rate": 0.00017681084156890344, "loss": 11.1419, "step": 187860 }, { "epoch": 22.607701564380264, "grad_norm": Infinity, "learning_rate": 0.00017680840556398073, "loss": 11.1512, "step": 187870 }, { "epoch": 22.60890493381468, "grad_norm": Infinity, "learning_rate": 0.00017680596944789727, "loss": 11.158, "step": 187880 }, { "epoch": 22.6101083032491, "grad_norm": Infinity, "learning_rate": 0.00017680353322065663, "loss": 11.1676, "step": 187890 }, { "epoch": 22.611311672683513, "grad_norm": Infinity, "learning_rate": 0.0001768010968822623, "loss": 11.1941, "step": 187900 }, { "epoch": 22.61251504211793, "grad_norm": Infinity, "learning_rate": 0.00017679866043271786, "loss": 11.1175, "step": 187910 }, { "epoch": 22.613718411552348, "grad_norm": Infinity, "learning_rate": 0.00017679622387202674, "loss": 11.1983, "step": 187920 }, { "epoch": 22.614921780986762, "grad_norm": Infinity, "learning_rate": 0.00017679378720019258, "loss": 11.0447, "step": 187930 }, { "epoch": 22.61612515042118, "grad_norm": Infinity, "learning_rate": 0.00017679135041721882, "loss": 11.2145, "step": 187940 }, { "epoch": 22.617328519855597, "grad_norm": Infinity, "learning_rate": 0.00017678891352310904, "loss": 11.1853, "step": 187950 }, { "epoch": 22.61853188929001, "grad_norm": Infinity, "learning_rate": 0.0001767864765178667, "loss": 11.1153, "step": 187960 }, { "epoch": 22.61973525872443, "grad_norm": Infinity, "learning_rate": 0.00017678403940149537, "loss": 11.1473, "step": 187970 }, { "epoch": 22.620938628158846, "grad_norm": Infinity, "learning_rate": 0.0001767816021739986, "loss": 11.2434, "step": 187980 }, { "epoch": 22.62214199759326, "grad_norm": Infinity, "learning_rate": 0.00017677916483537992, "loss": 11.206, "step": 187990 }, { "epoch": 22.623345367027678, "grad_norm": Infinity, "learning_rate": 0.00017677672738564278, "loss": 11.1623, "step": 188000 }, { "epoch": 22.624548736462096, "grad_norm": Infinity, "learning_rate": 0.0001767742898247908, "loss": 11.1728, "step": 188010 }, { "epoch": 22.62575210589651, "grad_norm": Infinity, "learning_rate": 0.00017677185215282746, "loss": 11.106, "step": 188020 }, { "epoch": 22.626955475330927, "grad_norm": Infinity, "learning_rate": 0.0001767694143697563, "loss": 10.9994, "step": 188030 }, { "epoch": 22.628158844765345, "grad_norm": Infinity, "learning_rate": 0.00017676697647558084, "loss": 11.1421, "step": 188040 }, { "epoch": 22.62936221419976, "grad_norm": Infinity, "learning_rate": 0.00017676453847030458, "loss": 11.2661, "step": 188050 }, { "epoch": 22.630565583634176, "grad_norm": Infinity, "learning_rate": 0.00017676210035393112, "loss": 11.1114, "step": 188060 }, { "epoch": 22.63176895306859, "grad_norm": Infinity, "learning_rate": 0.00017675966212646392, "loss": 11.1273, "step": 188070 }, { "epoch": 22.632972322503008, "grad_norm": Infinity, "learning_rate": 0.00017675722378790657, "loss": 11.1042, "step": 188080 }, { "epoch": 22.634175691937426, "grad_norm": Infinity, "learning_rate": 0.00017675478533826254, "loss": 11.1341, "step": 188090 }, { "epoch": 22.63537906137184, "grad_norm": Infinity, "learning_rate": 0.00017675234677753542, "loss": 11.1282, "step": 188100 }, { "epoch": 22.636582430806257, "grad_norm": Infinity, "learning_rate": 0.00017674990810572866, "loss": 11.075, "step": 188110 }, { "epoch": 22.637785800240675, "grad_norm": Infinity, "learning_rate": 0.00017674746932284588, "loss": 11.1286, "step": 188120 }, { "epoch": 22.63898916967509, "grad_norm": Infinity, "learning_rate": 0.00017674503042889052, "loss": 11.0815, "step": 188130 }, { "epoch": 22.640192539109506, "grad_norm": Infinity, "learning_rate": 0.00017674259142386618, "loss": 11.1622, "step": 188140 }, { "epoch": 22.641395908543924, "grad_norm": Infinity, "learning_rate": 0.00017674015230777635, "loss": 11.1906, "step": 188150 }, { "epoch": 22.642599277978338, "grad_norm": Infinity, "learning_rate": 0.00017673771308062458, "loss": 11.1892, "step": 188160 }, { "epoch": 22.643802647412755, "grad_norm": Infinity, "learning_rate": 0.0001767352737424144, "loss": 11.1136, "step": 188170 }, { "epoch": 22.645006016847173, "grad_norm": Infinity, "learning_rate": 0.0001767328342931493, "loss": 11.2288, "step": 188180 }, { "epoch": 22.646209386281587, "grad_norm": Infinity, "learning_rate": 0.00017673039473283287, "loss": 11.1405, "step": 188190 }, { "epoch": 22.647412755716005, "grad_norm": Infinity, "learning_rate": 0.0001767279550614686, "loss": 11.1905, "step": 188200 }, { "epoch": 22.648616125150422, "grad_norm": Infinity, "learning_rate": 0.00017672551527906006, "loss": 11.183, "step": 188210 }, { "epoch": 22.649819494584836, "grad_norm": Infinity, "learning_rate": 0.00017672307538561075, "loss": 11.0984, "step": 188220 }, { "epoch": 22.651022864019254, "grad_norm": Infinity, "learning_rate": 0.0001767206353811242, "loss": 11.0781, "step": 188230 }, { "epoch": 22.65222623345367, "grad_norm": Infinity, "learning_rate": 0.00017671819526560395, "loss": 11.2771, "step": 188240 }, { "epoch": 22.653429602888085, "grad_norm": Infinity, "learning_rate": 0.0001767157550390535, "loss": 11.0902, "step": 188250 }, { "epoch": 22.654632972322503, "grad_norm": Infinity, "learning_rate": 0.00017671331470147647, "loss": 11.155, "step": 188260 }, { "epoch": 22.65583634175692, "grad_norm": Infinity, "learning_rate": 0.0001767108742528763, "loss": 11.2088, "step": 188270 }, { "epoch": 22.657039711191334, "grad_norm": Infinity, "learning_rate": 0.00017670843369325655, "loss": 11.0392, "step": 188280 }, { "epoch": 22.658243080625752, "grad_norm": Infinity, "learning_rate": 0.0001767059930226208, "loss": 11.163, "step": 188290 }, { "epoch": 22.65944645006017, "grad_norm": Infinity, "learning_rate": 0.0001767035522409725, "loss": 11.1256, "step": 188300 }, { "epoch": 22.660649819494584, "grad_norm": Infinity, "learning_rate": 0.00017670111134831525, "loss": 11.1662, "step": 188310 }, { "epoch": 22.661853188929, "grad_norm": Infinity, "learning_rate": 0.00017669867034465252, "loss": 11.0707, "step": 188320 }, { "epoch": 22.66305655836342, "grad_norm": Infinity, "learning_rate": 0.0001766962292299879, "loss": 11.0481, "step": 188330 }, { "epoch": 22.664259927797833, "grad_norm": Infinity, "learning_rate": 0.0001766937880043249, "loss": 11.1178, "step": 188340 }, { "epoch": 22.66546329723225, "grad_norm": Infinity, "learning_rate": 0.00017669134666766705, "loss": 11.1569, "step": 188350 }, { "epoch": 22.666666666666668, "grad_norm": Infinity, "learning_rate": 0.00017668890522001788, "loss": 11.1828, "step": 188360 }, { "epoch": 22.667870036101082, "grad_norm": Infinity, "learning_rate": 0.00017668646366138092, "loss": 11.1584, "step": 188370 }, { "epoch": 22.6690734055355, "grad_norm": Infinity, "learning_rate": 0.00017668402199175976, "loss": 11.1627, "step": 188380 }, { "epoch": 22.670276774969917, "grad_norm": Infinity, "learning_rate": 0.00017668158021115783, "loss": 11.1631, "step": 188390 }, { "epoch": 22.67148014440433, "grad_norm": Infinity, "learning_rate": 0.00017667913831957875, "loss": 11.077, "step": 188400 }, { "epoch": 22.67268351383875, "grad_norm": Infinity, "learning_rate": 0.00017667669631702605, "loss": 11.0115, "step": 188410 }, { "epoch": 22.673886883273166, "grad_norm": Infinity, "learning_rate": 0.0001766742542035032, "loss": 11.1311, "step": 188420 }, { "epoch": 22.67509025270758, "grad_norm": Infinity, "learning_rate": 0.0001766718119790138, "loss": 11.0342, "step": 188430 }, { "epoch": 22.676293622141998, "grad_norm": Infinity, "learning_rate": 0.00017666936964356132, "loss": 11.1917, "step": 188440 }, { "epoch": 22.677496991576415, "grad_norm": Infinity, "learning_rate": 0.0001766669271971494, "loss": 11.1531, "step": 188450 }, { "epoch": 22.67870036101083, "grad_norm": Infinity, "learning_rate": 0.00017666448463978145, "loss": 11.0852, "step": 188460 }, { "epoch": 22.679903730445247, "grad_norm": Infinity, "learning_rate": 0.00017666204197146107, "loss": 11.1602, "step": 188470 }, { "epoch": 22.681107099879664, "grad_norm": Infinity, "learning_rate": 0.0001766595991921918, "loss": 11.0548, "step": 188480 }, { "epoch": 22.68231046931408, "grad_norm": Infinity, "learning_rate": 0.00017665715630197718, "loss": 10.9654, "step": 188490 }, { "epoch": 22.683513838748496, "grad_norm": Infinity, "learning_rate": 0.0001766547133008207, "loss": 11.1588, "step": 188500 }, { "epoch": 22.684717208182914, "grad_norm": Infinity, "learning_rate": 0.00017665227018872595, "loss": 11.0222, "step": 188510 }, { "epoch": 22.685920577617328, "grad_norm": Infinity, "learning_rate": 0.00017664982696569644, "loss": 11.0054, "step": 188520 }, { "epoch": 22.687123947051745, "grad_norm": Infinity, "learning_rate": 0.00017664738363173566, "loss": 10.9621, "step": 188530 }, { "epoch": 22.688327316486163, "grad_norm": Infinity, "learning_rate": 0.00017664494018684724, "loss": 11.2123, "step": 188540 }, { "epoch": 22.689530685920577, "grad_norm": Infinity, "learning_rate": 0.00017664249663103463, "loss": 11.2189, "step": 188550 }, { "epoch": 22.690734055354994, "grad_norm": Infinity, "learning_rate": 0.00017664005296430145, "loss": 11.0421, "step": 188560 }, { "epoch": 22.691937424789412, "grad_norm": Infinity, "learning_rate": 0.00017663760918665115, "loss": 11.2462, "step": 188570 }, { "epoch": 22.693140794223826, "grad_norm": Infinity, "learning_rate": 0.00017663516529808733, "loss": 11.0542, "step": 188580 }, { "epoch": 22.694344163658243, "grad_norm": Infinity, "learning_rate": 0.0001766327212986135, "loss": 11.2431, "step": 188590 }, { "epoch": 22.69554753309266, "grad_norm": Infinity, "learning_rate": 0.00017663027718823322, "loss": 11.1883, "step": 188600 }, { "epoch": 22.696750902527075, "grad_norm": Infinity, "learning_rate": 0.00017662783296695, "loss": 11.1646, "step": 188610 }, { "epoch": 22.697954271961493, "grad_norm": Infinity, "learning_rate": 0.00017662538863476738, "loss": 11.2338, "step": 188620 }, { "epoch": 22.69915764139591, "grad_norm": Infinity, "learning_rate": 0.0001766229441916889, "loss": 11.1293, "step": 188630 }, { "epoch": 22.700361010830324, "grad_norm": Infinity, "learning_rate": 0.0001766204996377181, "loss": 11.0257, "step": 188640 }, { "epoch": 22.70156438026474, "grad_norm": Infinity, "learning_rate": 0.00017661805497285854, "loss": 11.1802, "step": 188650 }, { "epoch": 22.70276774969916, "grad_norm": Infinity, "learning_rate": 0.00017661561019711373, "loss": 11.1375, "step": 188660 }, { "epoch": 22.703971119133573, "grad_norm": Infinity, "learning_rate": 0.0001766131653104872, "loss": 11.1061, "step": 188670 }, { "epoch": 22.70517448856799, "grad_norm": Infinity, "learning_rate": 0.00017661072031298252, "loss": 11.0686, "step": 188680 }, { "epoch": 22.706377858002405, "grad_norm": Infinity, "learning_rate": 0.00017660827520460322, "loss": 11.078, "step": 188690 }, { "epoch": 22.707581227436823, "grad_norm": Infinity, "learning_rate": 0.0001766058299853528, "loss": 11.1074, "step": 188700 }, { "epoch": 22.70878459687124, "grad_norm": Infinity, "learning_rate": 0.00017660338465523486, "loss": 11.1721, "step": 188710 }, { "epoch": 22.709987966305654, "grad_norm": Infinity, "learning_rate": 0.0001766009392142529, "loss": 11.1626, "step": 188720 }, { "epoch": 22.71119133574007, "grad_norm": Infinity, "learning_rate": 0.00017659849366241046, "loss": 11.1575, "step": 188730 }, { "epoch": 22.71239470517449, "grad_norm": Infinity, "learning_rate": 0.0001765960479997111, "loss": 11.1587, "step": 188740 }, { "epoch": 22.713598074608903, "grad_norm": Infinity, "learning_rate": 0.00017659360222615835, "loss": 11.2074, "step": 188750 }, { "epoch": 22.71480144404332, "grad_norm": Infinity, "learning_rate": 0.00017659115634175576, "loss": 11.133, "step": 188760 }, { "epoch": 22.71600481347774, "grad_norm": Infinity, "learning_rate": 0.00017658871034650683, "loss": 11.107, "step": 188770 }, { "epoch": 22.717208182912152, "grad_norm": Infinity, "learning_rate": 0.00017658626424041515, "loss": 11.1735, "step": 188780 }, { "epoch": 22.71841155234657, "grad_norm": Infinity, "learning_rate": 0.00017658381802348422, "loss": 11.0705, "step": 188790 }, { "epoch": 22.719614921780988, "grad_norm": Infinity, "learning_rate": 0.0001765813716957176, "loss": 11.1287, "step": 188800 }, { "epoch": 22.7208182912154, "grad_norm": Infinity, "learning_rate": 0.00017657892525711882, "loss": 11.1294, "step": 188810 }, { "epoch": 22.72202166064982, "grad_norm": Infinity, "learning_rate": 0.00017657647870769145, "loss": 11.0942, "step": 188820 }, { "epoch": 22.723225030084237, "grad_norm": Infinity, "learning_rate": 0.00017657403204743898, "loss": 11.2001, "step": 188830 }, { "epoch": 22.72442839951865, "grad_norm": Infinity, "learning_rate": 0.000176571585276365, "loss": 11.2361, "step": 188840 }, { "epoch": 22.72563176895307, "grad_norm": Infinity, "learning_rate": 0.00017656913839447306, "loss": 11.1385, "step": 188850 }, { "epoch": 22.726835138387486, "grad_norm": Infinity, "learning_rate": 0.00017656669140176664, "loss": 11.0191, "step": 188860 }, { "epoch": 22.7280385078219, "grad_norm": Infinity, "learning_rate": 0.00017656424429824929, "loss": 11.0866, "step": 188870 }, { "epoch": 22.729241877256317, "grad_norm": Infinity, "learning_rate": 0.00017656179708392463, "loss": 11.0602, "step": 188880 }, { "epoch": 22.730445246690735, "grad_norm": Infinity, "learning_rate": 0.00017655934975879608, "loss": 11.1853, "step": 188890 }, { "epoch": 22.73164861612515, "grad_norm": Infinity, "learning_rate": 0.0001765569023228673, "loss": 11.0442, "step": 188900 }, { "epoch": 22.732851985559567, "grad_norm": Infinity, "learning_rate": 0.00017655445477614176, "loss": 11.1074, "step": 188910 }, { "epoch": 22.734055354993984, "grad_norm": Infinity, "learning_rate": 0.00017655200711862303, "loss": 11.2331, "step": 188920 }, { "epoch": 22.735258724428398, "grad_norm": Infinity, "learning_rate": 0.00017654955935031465, "loss": 11.1989, "step": 188930 }, { "epoch": 22.736462093862816, "grad_norm": Infinity, "learning_rate": 0.00017654711147122016, "loss": 11.1129, "step": 188940 }, { "epoch": 22.737665463297233, "grad_norm": Infinity, "learning_rate": 0.0001765446634813431, "loss": 11.0734, "step": 188950 }, { "epoch": 22.738868832731647, "grad_norm": Infinity, "learning_rate": 0.000176542215380687, "loss": 11.1119, "step": 188960 }, { "epoch": 22.740072202166065, "grad_norm": Infinity, "learning_rate": 0.00017653976716925542, "loss": 11.1422, "step": 188970 }, { "epoch": 22.741275571600482, "grad_norm": Infinity, "learning_rate": 0.00017653731884705192, "loss": 11.1614, "step": 188980 }, { "epoch": 22.742478941034896, "grad_norm": Infinity, "learning_rate": 0.00017653487041408, "loss": 11.1652, "step": 188990 }, { "epoch": 22.743682310469314, "grad_norm": Infinity, "learning_rate": 0.00017653242187034323, "loss": 11.0447, "step": 189000 }, { "epoch": 22.74488567990373, "grad_norm": Infinity, "learning_rate": 0.00017652997321584516, "loss": 11.1534, "step": 189010 }, { "epoch": 22.746089049338146, "grad_norm": Infinity, "learning_rate": 0.00017652752445058932, "loss": 11.1196, "step": 189020 }, { "epoch": 22.747292418772563, "grad_norm": Infinity, "learning_rate": 0.00017652507557457928, "loss": 11.1514, "step": 189030 }, { "epoch": 22.74849578820698, "grad_norm": Infinity, "learning_rate": 0.00017652262658781852, "loss": 11.103, "step": 189040 }, { "epoch": 22.749699157641395, "grad_norm": Infinity, "learning_rate": 0.00017652017749031064, "loss": 11.2375, "step": 189050 }, { "epoch": 22.750902527075812, "grad_norm": Infinity, "learning_rate": 0.0001765177282820592, "loss": 11.2562, "step": 189060 }, { "epoch": 22.75210589651023, "grad_norm": Infinity, "learning_rate": 0.0001765152789630677, "loss": 11.1239, "step": 189070 }, { "epoch": 22.753309265944644, "grad_norm": Infinity, "learning_rate": 0.00017651282953333968, "loss": 11.0871, "step": 189080 }, { "epoch": 22.75451263537906, "grad_norm": Infinity, "learning_rate": 0.00017651037999287872, "loss": 11.1931, "step": 189090 }, { "epoch": 22.75571600481348, "grad_norm": Infinity, "learning_rate": 0.00017650793034168833, "loss": 11.2694, "step": 189100 }, { "epoch": 22.756919374247893, "grad_norm": Infinity, "learning_rate": 0.0001765054805797721, "loss": 11.1285, "step": 189110 }, { "epoch": 22.75812274368231, "grad_norm": Infinity, "learning_rate": 0.00017650303070713353, "loss": 11.1306, "step": 189120 }, { "epoch": 22.759326113116728, "grad_norm": Infinity, "learning_rate": 0.00017650058072377623, "loss": 11.1381, "step": 189130 }, { "epoch": 22.760529482551142, "grad_norm": Infinity, "learning_rate": 0.00017649813062970367, "loss": 11.164, "step": 189140 }, { "epoch": 22.76173285198556, "grad_norm": Infinity, "learning_rate": 0.00017649568042491943, "loss": 11.0868, "step": 189150 }, { "epoch": 22.762936221419977, "grad_norm": Infinity, "learning_rate": 0.00017649323010942707, "loss": 11.1251, "step": 189160 }, { "epoch": 22.76413959085439, "grad_norm": Infinity, "learning_rate": 0.0001764907796832301, "loss": 11.175, "step": 189170 }, { "epoch": 22.76534296028881, "grad_norm": Infinity, "learning_rate": 0.00017648832914633208, "loss": 11.1305, "step": 189180 }, { "epoch": 22.766546329723226, "grad_norm": Infinity, "learning_rate": 0.0001764858784987366, "loss": 11.1068, "step": 189190 }, { "epoch": 22.76774969915764, "grad_norm": Infinity, "learning_rate": 0.00017648342774044716, "loss": 11.0022, "step": 189200 }, { "epoch": 22.768953068592058, "grad_norm": Infinity, "learning_rate": 0.0001764809768714673, "loss": 11.1103, "step": 189210 }, { "epoch": 22.770156438026476, "grad_norm": Infinity, "learning_rate": 0.00017647852589180058, "loss": 11.2056, "step": 189220 }, { "epoch": 22.77135980746089, "grad_norm": Infinity, "learning_rate": 0.00017647607480145055, "loss": 11.1104, "step": 189230 }, { "epoch": 22.772563176895307, "grad_norm": Infinity, "learning_rate": 0.0001764736236004208, "loss": 11.2225, "step": 189240 }, { "epoch": 22.773766546329725, "grad_norm": Infinity, "learning_rate": 0.00017647117228871477, "loss": 11.2761, "step": 189250 }, { "epoch": 22.77496991576414, "grad_norm": Infinity, "learning_rate": 0.00017646872086633612, "loss": 11.1101, "step": 189260 }, { "epoch": 22.776173285198556, "grad_norm": Infinity, "learning_rate": 0.00017646626933328833, "loss": 11.0977, "step": 189270 }, { "epoch": 22.777376654632974, "grad_norm": Infinity, "learning_rate": 0.00017646381768957498, "loss": 11.1827, "step": 189280 }, { "epoch": 22.778580024067388, "grad_norm": Infinity, "learning_rate": 0.0001764613659351996, "loss": 11.0711, "step": 189290 }, { "epoch": 22.779783393501805, "grad_norm": Infinity, "learning_rate": 0.00017645891407016577, "loss": 11.1122, "step": 189300 }, { "epoch": 22.780986762936223, "grad_norm": Infinity, "learning_rate": 0.000176456462094477, "loss": 11.1671, "step": 189310 }, { "epoch": 22.782190132370637, "grad_norm": Infinity, "learning_rate": 0.00017645401000813683, "loss": 11.0999, "step": 189320 }, { "epoch": 22.783393501805055, "grad_norm": Infinity, "learning_rate": 0.00017645155781114886, "loss": 11.1402, "step": 189330 }, { "epoch": 22.784596871239472, "grad_norm": Infinity, "learning_rate": 0.00017644910550351659, "loss": 11.1618, "step": 189340 }, { "epoch": 22.785800240673886, "grad_norm": Infinity, "learning_rate": 0.00017644665308524362, "loss": 11.1845, "step": 189350 }, { "epoch": 22.787003610108304, "grad_norm": Infinity, "learning_rate": 0.00017644420055633344, "loss": 11.2174, "step": 189360 }, { "epoch": 22.78820697954272, "grad_norm": Infinity, "learning_rate": 0.00017644174791678964, "loss": 11.1245, "step": 189370 }, { "epoch": 22.789410348977135, "grad_norm": Infinity, "learning_rate": 0.00017643929516661575, "loss": 11.1825, "step": 189380 }, { "epoch": 22.790613718411553, "grad_norm": Infinity, "learning_rate": 0.00017643684230581535, "loss": 11.1393, "step": 189390 }, { "epoch": 22.79181708784597, "grad_norm": Infinity, "learning_rate": 0.00017643438933439195, "loss": 10.9904, "step": 189400 }, { "epoch": 22.793020457280385, "grad_norm": Infinity, "learning_rate": 0.00017643193625234912, "loss": 11.1743, "step": 189410 }, { "epoch": 22.794223826714802, "grad_norm": Infinity, "learning_rate": 0.0001764294830596904, "loss": 11.2213, "step": 189420 }, { "epoch": 22.79542719614922, "grad_norm": Infinity, "learning_rate": 0.00017642702975641936, "loss": 11.2268, "step": 189430 }, { "epoch": 22.796630565583634, "grad_norm": Infinity, "learning_rate": 0.00017642457634253953, "loss": 11.1792, "step": 189440 }, { "epoch": 22.79783393501805, "grad_norm": Infinity, "learning_rate": 0.00017642212281805447, "loss": 11.1201, "step": 189450 }, { "epoch": 22.799037304452465, "grad_norm": Infinity, "learning_rate": 0.00017641966918296773, "loss": 11.0136, "step": 189460 }, { "epoch": 22.800240673886883, "grad_norm": Infinity, "learning_rate": 0.00017641721543728287, "loss": 11.1602, "step": 189470 }, { "epoch": 22.8014440433213, "grad_norm": Infinity, "learning_rate": 0.00017641476158100343, "loss": 11.0964, "step": 189480 }, { "epoch": 22.802647412755714, "grad_norm": Infinity, "learning_rate": 0.00017641230761413296, "loss": 11.1713, "step": 189490 }, { "epoch": 22.803850782190132, "grad_norm": Infinity, "learning_rate": 0.00017640985353667502, "loss": 11.0016, "step": 189500 }, { "epoch": 22.80505415162455, "grad_norm": Infinity, "learning_rate": 0.00017640739934863314, "loss": 11.2487, "step": 189510 }, { "epoch": 22.806257521058964, "grad_norm": Infinity, "learning_rate": 0.00017640494505001092, "loss": 11.2146, "step": 189520 }, { "epoch": 22.80746089049338, "grad_norm": Infinity, "learning_rate": 0.00017640249064081184, "loss": 11.1064, "step": 189530 }, { "epoch": 22.8086642599278, "grad_norm": Infinity, "learning_rate": 0.00017640003612103951, "loss": 11.117, "step": 189540 }, { "epoch": 22.809867629362213, "grad_norm": Infinity, "learning_rate": 0.00017639758149069748, "loss": 11.0608, "step": 189550 }, { "epoch": 22.81107099879663, "grad_norm": Infinity, "learning_rate": 0.00017639512674978927, "loss": 11.1272, "step": 189560 }, { "epoch": 22.812274368231048, "grad_norm": Infinity, "learning_rate": 0.00017639267189831846, "loss": 11.0979, "step": 189570 }, { "epoch": 22.813477737665462, "grad_norm": Infinity, "learning_rate": 0.00017639021693628858, "loss": 11.0264, "step": 189580 }, { "epoch": 22.81468110709988, "grad_norm": Infinity, "learning_rate": 0.00017638776186370322, "loss": 11.1974, "step": 189590 }, { "epoch": 22.815884476534297, "grad_norm": Infinity, "learning_rate": 0.0001763853066805659, "loss": 11.2255, "step": 189600 }, { "epoch": 22.81708784596871, "grad_norm": Infinity, "learning_rate": 0.0001763828513868802, "loss": 11.0818, "step": 189610 }, { "epoch": 22.81829121540313, "grad_norm": Infinity, "learning_rate": 0.00017638039598264963, "loss": 11.147, "step": 189620 }, { "epoch": 22.819494584837546, "grad_norm": Infinity, "learning_rate": 0.00017637794046787778, "loss": 11.1031, "step": 189630 }, { "epoch": 22.82069795427196, "grad_norm": Infinity, "learning_rate": 0.00017637548484256818, "loss": 11.2972, "step": 189640 }, { "epoch": 22.821901323706378, "grad_norm": Infinity, "learning_rate": 0.0001763730291067244, "loss": 11.165, "step": 189650 }, { "epoch": 22.823104693140795, "grad_norm": Infinity, "learning_rate": 0.00017637057326035, "loss": 11.1457, "step": 189660 }, { "epoch": 22.82430806257521, "grad_norm": Infinity, "learning_rate": 0.00017636811730344852, "loss": 11.2185, "step": 189670 }, { "epoch": 22.825511432009627, "grad_norm": Infinity, "learning_rate": 0.0001763656612360235, "loss": 11.1695, "step": 189680 }, { "epoch": 22.826714801444044, "grad_norm": Infinity, "learning_rate": 0.00017636320505807853, "loss": 11.1892, "step": 189690 }, { "epoch": 22.82791817087846, "grad_norm": Infinity, "learning_rate": 0.00017636074876961716, "loss": 11.0703, "step": 189700 }, { "epoch": 22.829121540312876, "grad_norm": Infinity, "learning_rate": 0.00017635829237064294, "loss": 11.1229, "step": 189710 }, { "epoch": 22.830324909747294, "grad_norm": Infinity, "learning_rate": 0.0001763558358611594, "loss": 11.1212, "step": 189720 }, { "epoch": 22.831528279181708, "grad_norm": Infinity, "learning_rate": 0.00017635337924117012, "loss": 11.0298, "step": 189730 }, { "epoch": 22.832731648616125, "grad_norm": Infinity, "learning_rate": 0.00017635092251067866, "loss": 11.0389, "step": 189740 }, { "epoch": 22.833935018050543, "grad_norm": Infinity, "learning_rate": 0.00017634846566968855, "loss": 11.1126, "step": 189750 }, { "epoch": 22.835138387484957, "grad_norm": Infinity, "learning_rate": 0.00017634600871820335, "loss": 11.0647, "step": 189760 }, { "epoch": 22.836341756919374, "grad_norm": Infinity, "learning_rate": 0.00017634355165622662, "loss": 11.1533, "step": 189770 }, { "epoch": 22.837545126353792, "grad_norm": Infinity, "learning_rate": 0.00017634109448376196, "loss": 11.0943, "step": 189780 }, { "epoch": 22.838748495788206, "grad_norm": Infinity, "learning_rate": 0.00017633863720081284, "loss": 11.1534, "step": 189790 }, { "epoch": 22.839951865222623, "grad_norm": Infinity, "learning_rate": 0.00017633617980738293, "loss": 11.072, "step": 189800 }, { "epoch": 22.84115523465704, "grad_norm": Infinity, "learning_rate": 0.0001763337223034757, "loss": 11.2568, "step": 189810 }, { "epoch": 22.842358604091455, "grad_norm": Infinity, "learning_rate": 0.0001763312646890947, "loss": 11.2142, "step": 189820 }, { "epoch": 22.843561973525873, "grad_norm": Infinity, "learning_rate": 0.0001763288069642435, "loss": 11.1366, "step": 189830 }, { "epoch": 22.84476534296029, "grad_norm": Infinity, "learning_rate": 0.00017632634912892573, "loss": 11.0713, "step": 189840 }, { "epoch": 22.845968712394704, "grad_norm": Infinity, "learning_rate": 0.00017632389118314484, "loss": 11.2236, "step": 189850 }, { "epoch": 22.84717208182912, "grad_norm": Infinity, "learning_rate": 0.00017632143312690447, "loss": 11.1332, "step": 189860 }, { "epoch": 22.84837545126354, "grad_norm": Infinity, "learning_rate": 0.0001763189749602081, "loss": 11.1097, "step": 189870 }, { "epoch": 22.849578820697953, "grad_norm": Infinity, "learning_rate": 0.00017631651668305939, "loss": 10.9052, "step": 189880 }, { "epoch": 22.85078219013237, "grad_norm": Infinity, "learning_rate": 0.00017631405829546178, "loss": 11.1419, "step": 189890 }, { "epoch": 22.85198555956679, "grad_norm": Infinity, "learning_rate": 0.0001763115997974189, "loss": 11.1203, "step": 189900 }, { "epoch": 22.853188929001202, "grad_norm": Infinity, "learning_rate": 0.00017630914118893433, "loss": 11.1216, "step": 189910 }, { "epoch": 22.85439229843562, "grad_norm": Infinity, "learning_rate": 0.00017630668247001156, "loss": 11.1302, "step": 189920 }, { "epoch": 22.855595667870038, "grad_norm": Infinity, "learning_rate": 0.0001763042236406542, "loss": 11.1505, "step": 189930 }, { "epoch": 22.85679903730445, "grad_norm": Infinity, "learning_rate": 0.00017630176470086577, "loss": 11.1267, "step": 189940 }, { "epoch": 22.85800240673887, "grad_norm": Infinity, "learning_rate": 0.00017629930565064985, "loss": 11.14, "step": 189950 }, { "epoch": 22.859205776173287, "grad_norm": Infinity, "learning_rate": 0.00017629684649001003, "loss": 11.0138, "step": 189960 }, { "epoch": 22.8604091456077, "grad_norm": Infinity, "learning_rate": 0.0001762943872189498, "loss": 11.1343, "step": 189970 }, { "epoch": 22.86161251504212, "grad_norm": Infinity, "learning_rate": 0.00017629192783747276, "loss": 11.0876, "step": 189980 }, { "epoch": 22.862815884476536, "grad_norm": Infinity, "learning_rate": 0.00017628946834558247, "loss": 11.1311, "step": 189990 }, { "epoch": 22.86401925391095, "grad_norm": Infinity, "learning_rate": 0.00017628700874328248, "loss": 11.0517, "step": 190000 }, { "epoch": 22.865222623345367, "grad_norm": Infinity, "learning_rate": 0.00017628454903057637, "loss": 11.1689, "step": 190010 }, { "epoch": 22.866425992779785, "grad_norm": Infinity, "learning_rate": 0.00017628208920746766, "loss": 11.2026, "step": 190020 }, { "epoch": 22.8676293622142, "grad_norm": Infinity, "learning_rate": 0.00017627962927395995, "loss": 11.1512, "step": 190030 }, { "epoch": 22.868832731648617, "grad_norm": Infinity, "learning_rate": 0.00017627716923005677, "loss": 11.2832, "step": 190040 }, { "epoch": 22.870036101083034, "grad_norm": Infinity, "learning_rate": 0.0001762747090757617, "loss": 11.0216, "step": 190050 }, { "epoch": 22.871239470517448, "grad_norm": Infinity, "learning_rate": 0.00017627224881107832, "loss": 11.0908, "step": 190060 }, { "epoch": 22.872442839951866, "grad_norm": Infinity, "learning_rate": 0.0001762697884360101, "loss": 11.1863, "step": 190070 }, { "epoch": 22.87364620938628, "grad_norm": Infinity, "learning_rate": 0.00017626732795056072, "loss": 11.1152, "step": 190080 }, { "epoch": 22.874849578820697, "grad_norm": Infinity, "learning_rate": 0.0001762648673547337, "loss": 11.2341, "step": 190090 }, { "epoch": 22.876052948255115, "grad_norm": Infinity, "learning_rate": 0.00017626240664853256, "loss": 11.1993, "step": 190100 }, { "epoch": 22.87725631768953, "grad_norm": Infinity, "learning_rate": 0.00017625994583196088, "loss": 11.2688, "step": 190110 }, { "epoch": 22.878459687123947, "grad_norm": Infinity, "learning_rate": 0.00017625748490502223, "loss": 11.0799, "step": 190120 }, { "epoch": 22.879663056558364, "grad_norm": Infinity, "learning_rate": 0.0001762550238677202, "loss": 11.1661, "step": 190130 }, { "epoch": 22.880866425992778, "grad_norm": Infinity, "learning_rate": 0.0001762525627200583, "loss": 11.1794, "step": 190140 }, { "epoch": 22.882069795427196, "grad_norm": Infinity, "learning_rate": 0.00017625010146204013, "loss": 11.2047, "step": 190150 }, { "epoch": 22.883273164861613, "grad_norm": Infinity, "learning_rate": 0.00017624764009366924, "loss": 11.1169, "step": 190160 }, { "epoch": 22.884476534296027, "grad_norm": Infinity, "learning_rate": 0.00017624517861494917, "loss": 11.0868, "step": 190170 }, { "epoch": 22.885679903730445, "grad_norm": Infinity, "learning_rate": 0.00017624271702588348, "loss": 11.2496, "step": 190180 }, { "epoch": 22.886883273164862, "grad_norm": Infinity, "learning_rate": 0.0001762402553264758, "loss": 11.1704, "step": 190190 }, { "epoch": 22.888086642599276, "grad_norm": Infinity, "learning_rate": 0.00017623779351672963, "loss": 11.1896, "step": 190200 }, { "epoch": 22.889290012033694, "grad_norm": Infinity, "learning_rate": 0.00017623533159664856, "loss": 11.2113, "step": 190210 }, { "epoch": 22.89049338146811, "grad_norm": Infinity, "learning_rate": 0.00017623286956623612, "loss": 11.2825, "step": 190220 }, { "epoch": 22.891696750902526, "grad_norm": Infinity, "learning_rate": 0.00017623040742549594, "loss": 11.1869, "step": 190230 }, { "epoch": 22.892900120336943, "grad_norm": Infinity, "learning_rate": 0.0001762279451744315, "loss": 11.0981, "step": 190240 }, { "epoch": 22.89410348977136, "grad_norm": Infinity, "learning_rate": 0.00017622548281304642, "loss": 11.1534, "step": 190250 }, { "epoch": 22.895306859205775, "grad_norm": Infinity, "learning_rate": 0.00017622302034134423, "loss": 11.2106, "step": 190260 }, { "epoch": 22.896510228640192, "grad_norm": Infinity, "learning_rate": 0.00017622055775932854, "loss": 11.115, "step": 190270 }, { "epoch": 22.89771359807461, "grad_norm": Infinity, "learning_rate": 0.0001762180950670029, "loss": 11.071, "step": 190280 }, { "epoch": 22.898916967509024, "grad_norm": Infinity, "learning_rate": 0.0001762156322643708, "loss": 11.1501, "step": 190290 }, { "epoch": 22.90012033694344, "grad_norm": Infinity, "learning_rate": 0.0001762131693514359, "loss": 11.2295, "step": 190300 }, { "epoch": 22.90132370637786, "grad_norm": Infinity, "learning_rate": 0.0001762107063282017, "loss": 11.108, "step": 190310 }, { "epoch": 22.902527075812273, "grad_norm": Infinity, "learning_rate": 0.00017620824319467183, "loss": 11.2575, "step": 190320 }, { "epoch": 22.90373044524669, "grad_norm": Infinity, "learning_rate": 0.00017620577995084978, "loss": 11.1869, "step": 190330 }, { "epoch": 22.904933814681108, "grad_norm": Infinity, "learning_rate": 0.0001762033165967392, "loss": 11.1661, "step": 190340 }, { "epoch": 22.906137184115522, "grad_norm": Infinity, "learning_rate": 0.00017620085313234355, "loss": 11.1495, "step": 190350 }, { "epoch": 22.90734055354994, "grad_norm": Infinity, "learning_rate": 0.00017619838955766648, "loss": 11.1451, "step": 190360 }, { "epoch": 22.908543922984357, "grad_norm": Infinity, "learning_rate": 0.00017619592587271154, "loss": 11.1432, "step": 190370 }, { "epoch": 22.90974729241877, "grad_norm": Infinity, "learning_rate": 0.00017619346207748226, "loss": 11.0582, "step": 190380 }, { "epoch": 22.91095066185319, "grad_norm": Infinity, "learning_rate": 0.00017619099817198225, "loss": 11.1593, "step": 190390 }, { "epoch": 22.912154031287606, "grad_norm": Infinity, "learning_rate": 0.00017618853415621503, "loss": 11.1102, "step": 190400 }, { "epoch": 22.91335740072202, "grad_norm": Infinity, "learning_rate": 0.0001761860700301842, "loss": 11.0924, "step": 190410 }, { "epoch": 22.914560770156438, "grad_norm": Infinity, "learning_rate": 0.00017618360579389332, "loss": 10.9973, "step": 190420 }, { "epoch": 22.915764139590856, "grad_norm": Infinity, "learning_rate": 0.00017618114144734594, "loss": 11.1166, "step": 190430 }, { "epoch": 22.91696750902527, "grad_norm": Infinity, "learning_rate": 0.00017617867699054566, "loss": 11.1088, "step": 190440 }, { "epoch": 22.918170878459687, "grad_norm": Infinity, "learning_rate": 0.00017617621242349603, "loss": 11.1023, "step": 190450 }, { "epoch": 22.919374247894105, "grad_norm": Infinity, "learning_rate": 0.0001761737477462006, "loss": 11.0637, "step": 190460 }, { "epoch": 22.92057761732852, "grad_norm": Infinity, "learning_rate": 0.00017617128295866293, "loss": 11.2578, "step": 190470 }, { "epoch": 22.921780986762936, "grad_norm": Infinity, "learning_rate": 0.00017616881806088665, "loss": 11.0875, "step": 190480 }, { "epoch": 22.922984356197354, "grad_norm": Infinity, "learning_rate": 0.00017616635305287527, "loss": 11.1683, "step": 190490 }, { "epoch": 22.924187725631768, "grad_norm": Infinity, "learning_rate": 0.00017616388793463238, "loss": 11.1869, "step": 190500 }, { "epoch": 22.925391095066185, "grad_norm": Infinity, "learning_rate": 0.0001761614227061615, "loss": 11.1292, "step": 190510 }, { "epoch": 22.926594464500603, "grad_norm": Infinity, "learning_rate": 0.00017615895736746627, "loss": 11.251, "step": 190520 }, { "epoch": 22.927797833935017, "grad_norm": Infinity, "learning_rate": 0.00017615649191855023, "loss": 11.1146, "step": 190530 }, { "epoch": 22.929001203369435, "grad_norm": Infinity, "learning_rate": 0.00017615402635941694, "loss": 11.1738, "step": 190540 }, { "epoch": 22.930204572803852, "grad_norm": Infinity, "learning_rate": 0.00017615156069006996, "loss": 11.148, "step": 190550 }, { "epoch": 22.931407942238266, "grad_norm": Infinity, "learning_rate": 0.0001761490949105129, "loss": 11.1293, "step": 190560 }, { "epoch": 22.932611311672684, "grad_norm": Infinity, "learning_rate": 0.00017614662902074927, "loss": 11.2317, "step": 190570 }, { "epoch": 22.9338146811071, "grad_norm": Infinity, "learning_rate": 0.00017614416302078268, "loss": 11.2083, "step": 190580 }, { "epoch": 22.935018050541515, "grad_norm": Infinity, "learning_rate": 0.0001761416969106167, "loss": 11.1864, "step": 190590 }, { "epoch": 22.936221419975933, "grad_norm": Infinity, "learning_rate": 0.00017613923069025484, "loss": 11.0079, "step": 190600 }, { "epoch": 22.93742478941035, "grad_norm": Infinity, "learning_rate": 0.00017613676435970075, "loss": 11.093, "step": 190610 }, { "epoch": 22.938628158844764, "grad_norm": Infinity, "learning_rate": 0.00017613429791895794, "loss": 11.0738, "step": 190620 }, { "epoch": 22.939831528279182, "grad_norm": Infinity, "learning_rate": 0.00017613183136803003, "loss": 11.0428, "step": 190630 }, { "epoch": 22.9410348977136, "grad_norm": Infinity, "learning_rate": 0.00017612936470692055, "loss": 11.0331, "step": 190640 }, { "epoch": 22.942238267148014, "grad_norm": Infinity, "learning_rate": 0.0001761268979356331, "loss": 11.0988, "step": 190650 }, { "epoch": 22.94344163658243, "grad_norm": Infinity, "learning_rate": 0.0001761244310541712, "loss": 11.1847, "step": 190660 }, { "epoch": 22.94464500601685, "grad_norm": Infinity, "learning_rate": 0.0001761219640625385, "loss": 11.1531, "step": 190670 }, { "epoch": 22.945848375451263, "grad_norm": Infinity, "learning_rate": 0.00017611949696073848, "loss": 11.1233, "step": 190680 }, { "epoch": 22.94705174488568, "grad_norm": Infinity, "learning_rate": 0.00017611702974877478, "loss": 11.2216, "step": 190690 }, { "epoch": 22.948255114320098, "grad_norm": Infinity, "learning_rate": 0.0001761145624266509, "loss": 11.1471, "step": 190700 }, { "epoch": 22.949458483754512, "grad_norm": Infinity, "learning_rate": 0.00017611209499437055, "loss": 11.2306, "step": 190710 }, { "epoch": 22.95066185318893, "grad_norm": Infinity, "learning_rate": 0.00017610962745193715, "loss": 11.1139, "step": 190720 }, { "epoch": 22.951865222623347, "grad_norm": Infinity, "learning_rate": 0.0001761071597993543, "loss": 11.1324, "step": 190730 }, { "epoch": 22.95306859205776, "grad_norm": Infinity, "learning_rate": 0.00017610469203662566, "loss": 11.1031, "step": 190740 }, { "epoch": 22.95427196149218, "grad_norm": Infinity, "learning_rate": 0.0001761022241637547, "loss": 11.1528, "step": 190750 }, { "epoch": 22.955475330926596, "grad_norm": Infinity, "learning_rate": 0.00017609975618074506, "loss": 11.199, "step": 190760 }, { "epoch": 22.95667870036101, "grad_norm": Infinity, "learning_rate": 0.00017609728808760028, "loss": 11.1168, "step": 190770 }, { "epoch": 22.957882069795428, "grad_norm": Infinity, "learning_rate": 0.00017609481988432388, "loss": 11.1278, "step": 190780 }, { "epoch": 22.959085439229845, "grad_norm": Infinity, "learning_rate": 0.00017609235157091955, "loss": 11.1338, "step": 190790 }, { "epoch": 22.96028880866426, "grad_norm": Infinity, "learning_rate": 0.00017608988314739076, "loss": 11.1349, "step": 190800 }, { "epoch": 22.961492178098677, "grad_norm": Infinity, "learning_rate": 0.00017608741461374116, "loss": 11.1423, "step": 190810 }, { "epoch": 22.96269554753309, "grad_norm": Infinity, "learning_rate": 0.00017608494596997427, "loss": 11.2185, "step": 190820 }, { "epoch": 22.96389891696751, "grad_norm": Infinity, "learning_rate": 0.0001760824772160937, "loss": 11.1235, "step": 190830 }, { "epoch": 22.965102286401926, "grad_norm": Infinity, "learning_rate": 0.00017608000835210298, "loss": 11.1686, "step": 190840 }, { "epoch": 22.96630565583634, "grad_norm": Infinity, "learning_rate": 0.00017607753937800568, "loss": 11.0293, "step": 190850 }, { "epoch": 22.967509025270758, "grad_norm": Infinity, "learning_rate": 0.00017607507029380543, "loss": 11.149, "step": 190860 }, { "epoch": 22.968712394705175, "grad_norm": Infinity, "learning_rate": 0.00017607260109950577, "loss": 11.1118, "step": 190870 }, { "epoch": 22.96991576413959, "grad_norm": Infinity, "learning_rate": 0.00017607013179511027, "loss": 11.0345, "step": 190880 }, { "epoch": 22.971119133574007, "grad_norm": Infinity, "learning_rate": 0.0001760676623806225, "loss": 11.0491, "step": 190890 }, { "epoch": 22.972322503008424, "grad_norm": Infinity, "learning_rate": 0.00017606519285604606, "loss": 11.1346, "step": 190900 }, { "epoch": 22.97352587244284, "grad_norm": Infinity, "learning_rate": 0.0001760627232213845, "loss": 11.2246, "step": 190910 }, { "epoch": 22.974729241877256, "grad_norm": Infinity, "learning_rate": 0.0001760602534766414, "loss": 11.2425, "step": 190920 }, { "epoch": 22.975932611311674, "grad_norm": Infinity, "learning_rate": 0.00017605778362182032, "loss": 11.1684, "step": 190930 }, { "epoch": 22.977135980746088, "grad_norm": Infinity, "learning_rate": 0.00017605531365692486, "loss": 11.2066, "step": 190940 }, { "epoch": 22.978339350180505, "grad_norm": Infinity, "learning_rate": 0.0001760528435819586, "loss": 11.1009, "step": 190950 }, { "epoch": 22.979542719614923, "grad_norm": Infinity, "learning_rate": 0.00017605037339692508, "loss": 11.1612, "step": 190960 }, { "epoch": 22.980746089049337, "grad_norm": Infinity, "learning_rate": 0.0001760479031018279, "loss": 11.1445, "step": 190970 }, { "epoch": 22.981949458483754, "grad_norm": Infinity, "learning_rate": 0.0001760454326966706, "loss": 10.9852, "step": 190980 }, { "epoch": 22.983152827918172, "grad_norm": Infinity, "learning_rate": 0.0001760429621814568, "loss": 11.0465, "step": 190990 }, { "epoch": 22.984356197352586, "grad_norm": Infinity, "learning_rate": 0.0001760404915561901, "loss": 11.1964, "step": 191000 }, { "epoch": 22.985559566787003, "grad_norm": Infinity, "learning_rate": 0.00017603802082087397, "loss": 11.0606, "step": 191010 }, { "epoch": 22.98676293622142, "grad_norm": Infinity, "learning_rate": 0.0001760355499755121, "loss": 11.2024, "step": 191020 }, { "epoch": 22.987966305655835, "grad_norm": Infinity, "learning_rate": 0.000176033079020108, "loss": 11.093, "step": 191030 }, { "epoch": 22.989169675090253, "grad_norm": Infinity, "learning_rate": 0.00017603060795466527, "loss": 11.0497, "step": 191040 }, { "epoch": 22.99037304452467, "grad_norm": Infinity, "learning_rate": 0.00017602813677918745, "loss": 11.118, "step": 191050 }, { "epoch": 22.991576413959084, "grad_norm": Infinity, "learning_rate": 0.00017602566549367819, "loss": 11.156, "step": 191060 }, { "epoch": 22.9927797833935, "grad_norm": Infinity, "learning_rate": 0.000176023194098141, "loss": 11.2422, "step": 191070 }, { "epoch": 22.99398315282792, "grad_norm": Infinity, "learning_rate": 0.00017602072259257945, "loss": 11.0094, "step": 191080 }, { "epoch": 22.995186522262333, "grad_norm": Infinity, "learning_rate": 0.00017601825097699717, "loss": 11.1688, "step": 191090 }, { "epoch": 22.99638989169675, "grad_norm": Infinity, "learning_rate": 0.00017601577925139772, "loss": 10.9977, "step": 191100 }, { "epoch": 22.99759326113117, "grad_norm": Infinity, "learning_rate": 0.00017601330741578468, "loss": 11.1709, "step": 191110 }, { "epoch": 22.998796630565582, "grad_norm": Infinity, "learning_rate": 0.0001760108354701616, "loss": 11.1603, "step": 191120 }, { "epoch": 23.0, "grad_norm": Infinity, "learning_rate": 0.00017600836341453207, "loss": 11.1038, "step": 191130 }, { "epoch": 23.0, "eval_loss": 11.139604568481445, "eval_runtime": 118.3041, "eval_samples_per_second": 62.441, "eval_steps_per_second": 7.81, "step": 191130 }, { "epoch": 23.001203369434418, "grad_norm": Infinity, "learning_rate": 0.00017600589124889968, "loss": 11.1964, "step": 191140 }, { "epoch": 23.00240673886883, "grad_norm": Infinity, "learning_rate": 0.000176003418973268, "loss": 11.0488, "step": 191150 }, { "epoch": 23.00361010830325, "grad_norm": Infinity, "learning_rate": 0.00017600094658764062, "loss": 11.2841, "step": 191160 }, { "epoch": 23.004813477737667, "grad_norm": Infinity, "learning_rate": 0.00017599847409202108, "loss": 11.2014, "step": 191170 }, { "epoch": 23.00601684717208, "grad_norm": Infinity, "learning_rate": 0.000175996001486413, "loss": 11.2959, "step": 191180 }, { "epoch": 23.0072202166065, "grad_norm": Infinity, "learning_rate": 0.0001759935287708199, "loss": 11.2211, "step": 191190 }, { "epoch": 23.008423586040916, "grad_norm": Infinity, "learning_rate": 0.00017599105594524547, "loss": 11.22, "step": 191200 }, { "epoch": 23.00962695547533, "grad_norm": Infinity, "learning_rate": 0.0001759885830096932, "loss": 11.158, "step": 191210 }, { "epoch": 23.010830324909747, "grad_norm": Infinity, "learning_rate": 0.0001759861099641667, "loss": 11.1659, "step": 191220 }, { "epoch": 23.012033694344165, "grad_norm": Infinity, "learning_rate": 0.0001759836368086695, "loss": 11.175, "step": 191230 }, { "epoch": 23.01323706377858, "grad_norm": Infinity, "learning_rate": 0.00017598116354320523, "loss": 11.1375, "step": 191240 }, { "epoch": 23.014440433212997, "grad_norm": Infinity, "learning_rate": 0.00017597869016777748, "loss": 11.1379, "step": 191250 }, { "epoch": 23.015643802647414, "grad_norm": Infinity, "learning_rate": 0.00017597621668238977, "loss": 11.0362, "step": 191260 }, { "epoch": 23.016847172081828, "grad_norm": Infinity, "learning_rate": 0.00017597374308704576, "loss": 11.1441, "step": 191270 }, { "epoch": 23.018050541516246, "grad_norm": Infinity, "learning_rate": 0.00017597126938174898, "loss": 11.2475, "step": 191280 }, { "epoch": 23.019253910950663, "grad_norm": Infinity, "learning_rate": 0.00017596879556650298, "loss": 11.225, "step": 191290 }, { "epoch": 23.020457280385077, "grad_norm": Infinity, "learning_rate": 0.0001759663216413114, "loss": 11.2133, "step": 191300 }, { "epoch": 23.021660649819495, "grad_norm": Infinity, "learning_rate": 0.0001759638476061778, "loss": 11.1681, "step": 191310 }, { "epoch": 23.022864019253912, "grad_norm": Infinity, "learning_rate": 0.00017596137346110574, "loss": 11.0869, "step": 191320 }, { "epoch": 23.024067388688326, "grad_norm": Infinity, "learning_rate": 0.00017595889920609886, "loss": 11.1001, "step": 191330 }, { "epoch": 23.025270758122744, "grad_norm": Infinity, "learning_rate": 0.00017595642484116066, "loss": 11.107, "step": 191340 }, { "epoch": 23.02647412755716, "grad_norm": Infinity, "learning_rate": 0.00017595395036629476, "loss": 11.2042, "step": 191350 }, { "epoch": 23.027677496991576, "grad_norm": Infinity, "learning_rate": 0.00017595147578150475, "loss": 11.0908, "step": 191360 }, { "epoch": 23.028880866425993, "grad_norm": Infinity, "learning_rate": 0.0001759490010867942, "loss": 11.1936, "step": 191370 }, { "epoch": 23.03008423586041, "grad_norm": Infinity, "learning_rate": 0.00017594652628216673, "loss": 11.1785, "step": 191380 }, { "epoch": 23.031287605294825, "grad_norm": Infinity, "learning_rate": 0.00017594405136762582, "loss": 11.1884, "step": 191390 }, { "epoch": 23.032490974729242, "grad_norm": Infinity, "learning_rate": 0.00017594157634317516, "loss": 11.0629, "step": 191400 }, { "epoch": 23.03369434416366, "grad_norm": Infinity, "learning_rate": 0.0001759391012088183, "loss": 11.0853, "step": 191410 }, { "epoch": 23.034897713598074, "grad_norm": Infinity, "learning_rate": 0.00017593662596455877, "loss": 11.2413, "step": 191420 }, { "epoch": 23.03610108303249, "grad_norm": Infinity, "learning_rate": 0.00017593415061040025, "loss": 11.2708, "step": 191430 }, { "epoch": 23.03730445246691, "grad_norm": Infinity, "learning_rate": 0.00017593167514634624, "loss": 10.9999, "step": 191440 }, { "epoch": 23.038507821901323, "grad_norm": Infinity, "learning_rate": 0.00017592919957240032, "loss": 11.0191, "step": 191450 }, { "epoch": 23.03971119133574, "grad_norm": Infinity, "learning_rate": 0.00017592672388856614, "loss": 11.1132, "step": 191460 }, { "epoch": 23.040914560770158, "grad_norm": Infinity, "learning_rate": 0.00017592424809484723, "loss": 11.1149, "step": 191470 }, { "epoch": 23.042117930204572, "grad_norm": Infinity, "learning_rate": 0.00017592177219124717, "loss": 11.11, "step": 191480 }, { "epoch": 23.04332129963899, "grad_norm": Infinity, "learning_rate": 0.0001759192961777696, "loss": 11.1361, "step": 191490 }, { "epoch": 23.044524669073404, "grad_norm": Infinity, "learning_rate": 0.00017591682005441804, "loss": 11.1655, "step": 191500 }, { "epoch": 23.04572803850782, "grad_norm": Infinity, "learning_rate": 0.0001759143438211961, "loss": 11.147, "step": 191510 }, { "epoch": 23.04693140794224, "grad_norm": Infinity, "learning_rate": 0.00017591186747810736, "loss": 11.1585, "step": 191520 }, { "epoch": 23.048134777376653, "grad_norm": Infinity, "learning_rate": 0.00017590939102515542, "loss": 11.0279, "step": 191530 }, { "epoch": 23.04933814681107, "grad_norm": Infinity, "learning_rate": 0.00017590691446234383, "loss": 11.0423, "step": 191540 }, { "epoch": 23.050541516245488, "grad_norm": Infinity, "learning_rate": 0.0001759044377896762, "loss": 11.1259, "step": 191550 }, { "epoch": 23.051744885679902, "grad_norm": Infinity, "learning_rate": 0.00017590196100715613, "loss": 11.2332, "step": 191560 }, { "epoch": 23.05294825511432, "grad_norm": Infinity, "learning_rate": 0.00017589948411478714, "loss": 11.0941, "step": 191570 }, { "epoch": 23.054151624548737, "grad_norm": Infinity, "learning_rate": 0.0001758970071125729, "loss": 11.1196, "step": 191580 }, { "epoch": 23.05535499398315, "grad_norm": Infinity, "learning_rate": 0.00017589453000051692, "loss": 11.1721, "step": 191590 }, { "epoch": 23.05655836341757, "grad_norm": Infinity, "learning_rate": 0.00017589205277862284, "loss": 11.0877, "step": 191600 }, { "epoch": 23.057761732851986, "grad_norm": Infinity, "learning_rate": 0.0001758895754468942, "loss": 11.1361, "step": 191610 }, { "epoch": 23.0589651022864, "grad_norm": Infinity, "learning_rate": 0.00017588709800533465, "loss": 11.1026, "step": 191620 }, { "epoch": 23.060168471720818, "grad_norm": Infinity, "learning_rate": 0.0001758846204539477, "loss": 11.1873, "step": 191630 }, { "epoch": 23.061371841155236, "grad_norm": Infinity, "learning_rate": 0.00017588214279273698, "loss": 10.9926, "step": 191640 }, { "epoch": 23.06257521058965, "grad_norm": Infinity, "learning_rate": 0.00017587966502170604, "loss": 11.1515, "step": 191650 }, { "epoch": 23.063778580024067, "grad_norm": Infinity, "learning_rate": 0.0001758771871408585, "loss": 11.1253, "step": 191660 }, { "epoch": 23.064981949458485, "grad_norm": Infinity, "learning_rate": 0.00017587470915019795, "loss": 11.2217, "step": 191670 }, { "epoch": 23.0661853188929, "grad_norm": Infinity, "learning_rate": 0.00017587223104972797, "loss": 11.0556, "step": 191680 }, { "epoch": 23.067388688327316, "grad_norm": Infinity, "learning_rate": 0.00017586975283945212, "loss": 11.1713, "step": 191690 }, { "epoch": 23.068592057761734, "grad_norm": Infinity, "learning_rate": 0.00017586727451937405, "loss": 11.1873, "step": 191700 }, { "epoch": 23.069795427196148, "grad_norm": Infinity, "learning_rate": 0.00017586479608949724, "loss": 11.2209, "step": 191710 }, { "epoch": 23.070998796630565, "grad_norm": Infinity, "learning_rate": 0.00017586231754982537, "loss": 11.0533, "step": 191720 }, { "epoch": 23.072202166064983, "grad_norm": Infinity, "learning_rate": 0.000175859838900362, "loss": 11.0896, "step": 191730 }, { "epoch": 23.073405535499397, "grad_norm": Infinity, "learning_rate": 0.0001758573601411107, "loss": 11.1186, "step": 191740 }, { "epoch": 23.074608904933815, "grad_norm": Infinity, "learning_rate": 0.0001758548812720751, "loss": 11.051, "step": 191750 }, { "epoch": 23.075812274368232, "grad_norm": Infinity, "learning_rate": 0.0001758524022932587, "loss": 10.9249, "step": 191760 }, { "epoch": 23.077015643802646, "grad_norm": Infinity, "learning_rate": 0.0001758499232046652, "loss": 11.1358, "step": 191770 }, { "epoch": 23.078219013237064, "grad_norm": Infinity, "learning_rate": 0.00017584744400629814, "loss": 11.2267, "step": 191780 }, { "epoch": 23.07942238267148, "grad_norm": Infinity, "learning_rate": 0.00017584496469816107, "loss": 11.1696, "step": 191790 }, { "epoch": 23.080625752105895, "grad_norm": Infinity, "learning_rate": 0.00017584248528025763, "loss": 11.1102, "step": 191800 }, { "epoch": 23.081829121540313, "grad_norm": Infinity, "learning_rate": 0.00017584000575259138, "loss": 11.1422, "step": 191810 }, { "epoch": 23.08303249097473, "grad_norm": Infinity, "learning_rate": 0.0001758375261151659, "loss": 11.1524, "step": 191820 }, { "epoch": 23.084235860409144, "grad_norm": Infinity, "learning_rate": 0.00017583504636798484, "loss": 11.0193, "step": 191830 }, { "epoch": 23.085439229843562, "grad_norm": Infinity, "learning_rate": 0.0001758325665110517, "loss": 11.0701, "step": 191840 }, { "epoch": 23.08664259927798, "grad_norm": Infinity, "learning_rate": 0.00017583008654437014, "loss": 11.1242, "step": 191850 }, { "epoch": 23.087845968712394, "grad_norm": Infinity, "learning_rate": 0.00017582760646794372, "loss": 11.1676, "step": 191860 }, { "epoch": 23.08904933814681, "grad_norm": Infinity, "learning_rate": 0.000175825126281776, "loss": 11.0349, "step": 191870 }, { "epoch": 23.09025270758123, "grad_norm": Infinity, "learning_rate": 0.00017582264598587066, "loss": 11.1606, "step": 191880 }, { "epoch": 23.091456077015643, "grad_norm": Infinity, "learning_rate": 0.00017582016558023117, "loss": 11.1211, "step": 191890 }, { "epoch": 23.09265944645006, "grad_norm": Infinity, "learning_rate": 0.00017581768506486122, "loss": 11.0956, "step": 191900 }, { "epoch": 23.093862815884478, "grad_norm": Infinity, "learning_rate": 0.0001758152044397644, "loss": 11.1547, "step": 191910 }, { "epoch": 23.095066185318892, "grad_norm": Infinity, "learning_rate": 0.0001758127237049442, "loss": 11.2541, "step": 191920 }, { "epoch": 23.09626955475331, "grad_norm": Infinity, "learning_rate": 0.00017581024286040427, "loss": 11.1044, "step": 191930 }, { "epoch": 23.097472924187727, "grad_norm": Infinity, "learning_rate": 0.00017580776190614822, "loss": 11.1746, "step": 191940 }, { "epoch": 23.09867629362214, "grad_norm": Infinity, "learning_rate": 0.00017580528084217963, "loss": 11.1385, "step": 191950 }, { "epoch": 23.09987966305656, "grad_norm": Infinity, "learning_rate": 0.00017580279966850207, "loss": 11.1221, "step": 191960 }, { "epoch": 23.101083032490976, "grad_norm": Infinity, "learning_rate": 0.00017580031838511912, "loss": 11.17, "step": 191970 }, { "epoch": 23.10228640192539, "grad_norm": Infinity, "learning_rate": 0.00017579783699203443, "loss": 11.1612, "step": 191980 }, { "epoch": 23.103489771359808, "grad_norm": Infinity, "learning_rate": 0.00017579535548925155, "loss": 11.0318, "step": 191990 }, { "epoch": 23.104693140794225, "grad_norm": Infinity, "learning_rate": 0.00017579287387677408, "loss": 11.1372, "step": 192000 }, { "epoch": 23.10589651022864, "grad_norm": Infinity, "learning_rate": 0.0001757903921546056, "loss": 11.177, "step": 192010 }, { "epoch": 23.107099879663057, "grad_norm": Infinity, "learning_rate": 0.0001757879103227497, "loss": 11.1301, "step": 192020 }, { "epoch": 23.108303249097474, "grad_norm": Infinity, "learning_rate": 0.00017578542838121, "loss": 11.2456, "step": 192030 }, { "epoch": 23.10950661853189, "grad_norm": Infinity, "learning_rate": 0.0001757829463299901, "loss": 11.0623, "step": 192040 }, { "epoch": 23.110709987966306, "grad_norm": Infinity, "learning_rate": 0.00017578046416909352, "loss": 10.9927, "step": 192050 }, { "epoch": 23.111913357400724, "grad_norm": Infinity, "learning_rate": 0.0001757779818985239, "loss": 11.0926, "step": 192060 }, { "epoch": 23.113116726835138, "grad_norm": Infinity, "learning_rate": 0.00017577549951828484, "loss": 11.2262, "step": 192070 }, { "epoch": 23.114320096269555, "grad_norm": Infinity, "learning_rate": 0.00017577301702837995, "loss": 11.2446, "step": 192080 }, { "epoch": 23.115523465703973, "grad_norm": Infinity, "learning_rate": 0.00017577053442881276, "loss": 11.0588, "step": 192090 }, { "epoch": 23.116726835138387, "grad_norm": Infinity, "learning_rate": 0.00017576805171958695, "loss": 11.1521, "step": 192100 }, { "epoch": 23.117930204572804, "grad_norm": Infinity, "learning_rate": 0.000175765568900706, "loss": 11.2255, "step": 192110 }, { "epoch": 23.119133574007222, "grad_norm": Infinity, "learning_rate": 0.00017576308597217358, "loss": 11.0934, "step": 192120 }, { "epoch": 23.120336943441636, "grad_norm": Infinity, "learning_rate": 0.00017576060293399333, "loss": 11.2551, "step": 192130 }, { "epoch": 23.121540312876053, "grad_norm": Infinity, "learning_rate": 0.00017575811978616873, "loss": 11.1774, "step": 192140 }, { "epoch": 23.12274368231047, "grad_norm": Infinity, "learning_rate": 0.0001757556365287034, "loss": 11.1375, "step": 192150 }, { "epoch": 23.123947051744885, "grad_norm": Infinity, "learning_rate": 0.000175753153161601, "loss": 11.2116, "step": 192160 }, { "epoch": 23.125150421179303, "grad_norm": Infinity, "learning_rate": 0.0001757506696848651, "loss": 11.1012, "step": 192170 }, { "epoch": 23.126353790613717, "grad_norm": Infinity, "learning_rate": 0.00017574818609849925, "loss": 11.2283, "step": 192180 }, { "epoch": 23.127557160048134, "grad_norm": Infinity, "learning_rate": 0.0001757457024025071, "loss": 11.2024, "step": 192190 }, { "epoch": 23.128760529482552, "grad_norm": Infinity, "learning_rate": 0.00017574321859689218, "loss": 11.0866, "step": 192200 }, { "epoch": 23.129963898916966, "grad_norm": Infinity, "learning_rate": 0.00017574073468165814, "loss": 11.2123, "step": 192210 }, { "epoch": 23.131167268351383, "grad_norm": Infinity, "learning_rate": 0.00017573825065680856, "loss": 11.1511, "step": 192220 }, { "epoch": 23.1323706377858, "grad_norm": Infinity, "learning_rate": 0.00017573576652234703, "loss": 11.1686, "step": 192230 }, { "epoch": 23.133574007220215, "grad_norm": Infinity, "learning_rate": 0.00017573328227827713, "loss": 11.2081, "step": 192240 }, { "epoch": 23.134777376654633, "grad_norm": Infinity, "learning_rate": 0.0001757307979246025, "loss": 11.1236, "step": 192250 }, { "epoch": 23.13598074608905, "grad_norm": Infinity, "learning_rate": 0.0001757283134613267, "loss": 11.1247, "step": 192260 }, { "epoch": 23.137184115523464, "grad_norm": Infinity, "learning_rate": 0.00017572582888845332, "loss": 11.2671, "step": 192270 }, { "epoch": 23.13838748495788, "grad_norm": Infinity, "learning_rate": 0.000175723344205986, "loss": 11.1052, "step": 192280 }, { "epoch": 23.1395908543923, "grad_norm": Infinity, "learning_rate": 0.0001757208594139283, "loss": 11.0528, "step": 192290 }, { "epoch": 23.140794223826713, "grad_norm": Infinity, "learning_rate": 0.0001757183745122838, "loss": 11.2499, "step": 192300 }, { "epoch": 23.14199759326113, "grad_norm": Infinity, "learning_rate": 0.00017571588950105613, "loss": 11.242, "step": 192310 }, { "epoch": 23.14320096269555, "grad_norm": Infinity, "learning_rate": 0.00017571340438024885, "loss": 11.0757, "step": 192320 }, { "epoch": 23.144404332129962, "grad_norm": Infinity, "learning_rate": 0.0001757109191498656, "loss": 11.1414, "step": 192330 }, { "epoch": 23.14560770156438, "grad_norm": Infinity, "learning_rate": 0.00017570843380990998, "loss": 11.1974, "step": 192340 }, { "epoch": 23.146811070998798, "grad_norm": Infinity, "learning_rate": 0.00017570594836038551, "loss": 11.2422, "step": 192350 }, { "epoch": 23.14801444043321, "grad_norm": Infinity, "learning_rate": 0.0001757034628012959, "loss": 11.1928, "step": 192360 }, { "epoch": 23.14921780986763, "grad_norm": Infinity, "learning_rate": 0.00017570097713264464, "loss": 11.1876, "step": 192370 }, { "epoch": 23.150421179302047, "grad_norm": Infinity, "learning_rate": 0.00017569849135443542, "loss": 11.1964, "step": 192380 }, { "epoch": 23.15162454873646, "grad_norm": Infinity, "learning_rate": 0.00017569600546667177, "loss": 11.0677, "step": 192390 }, { "epoch": 23.15282791817088, "grad_norm": Infinity, "learning_rate": 0.00017569351946935733, "loss": 11.1434, "step": 192400 }, { "epoch": 23.154031287605296, "grad_norm": Infinity, "learning_rate": 0.00017569103336249564, "loss": 11.1399, "step": 192410 }, { "epoch": 23.15523465703971, "grad_norm": Infinity, "learning_rate": 0.00017568854714609036, "loss": 11.0888, "step": 192420 }, { "epoch": 23.156438026474127, "grad_norm": Infinity, "learning_rate": 0.00017568606082014507, "loss": 11.2442, "step": 192430 }, { "epoch": 23.157641395908545, "grad_norm": Infinity, "learning_rate": 0.00017568357438466336, "loss": 11.1043, "step": 192440 }, { "epoch": 23.15884476534296, "grad_norm": Infinity, "learning_rate": 0.00017568108783964882, "loss": 11.1085, "step": 192450 }, { "epoch": 23.160048134777377, "grad_norm": Infinity, "learning_rate": 0.00017567860118510508, "loss": 11.1456, "step": 192460 }, { "epoch": 23.161251504211794, "grad_norm": Infinity, "learning_rate": 0.0001756761144210357, "loss": 11.1575, "step": 192470 }, { "epoch": 23.162454873646208, "grad_norm": Infinity, "learning_rate": 0.00017567362754744434, "loss": 11.2116, "step": 192480 }, { "epoch": 23.163658243080626, "grad_norm": Infinity, "learning_rate": 0.00017567114056433452, "loss": 11.1255, "step": 192490 }, { "epoch": 23.164861612515043, "grad_norm": Infinity, "learning_rate": 0.00017566865347170988, "loss": 11.0535, "step": 192500 }, { "epoch": 23.166064981949457, "grad_norm": Infinity, "learning_rate": 0.00017566616626957403, "loss": 11.178, "step": 192510 }, { "epoch": 23.167268351383875, "grad_norm": Infinity, "learning_rate": 0.00017566367895793054, "loss": 11.0545, "step": 192520 }, { "epoch": 23.168471720818292, "grad_norm": Infinity, "learning_rate": 0.00017566119153678304, "loss": 11.163, "step": 192530 }, { "epoch": 23.169675090252706, "grad_norm": Infinity, "learning_rate": 0.0001756587040061351, "loss": 11.058, "step": 192540 }, { "epoch": 23.170878459687124, "grad_norm": Infinity, "learning_rate": 0.00017565621636599036, "loss": 11.0659, "step": 192550 }, { "epoch": 23.17208182912154, "grad_norm": Infinity, "learning_rate": 0.0001756537286163524, "loss": 11.1096, "step": 192560 }, { "epoch": 23.173285198555956, "grad_norm": Infinity, "learning_rate": 0.00017565124075722476, "loss": 11.1551, "step": 192570 }, { "epoch": 23.174488567990373, "grad_norm": Infinity, "learning_rate": 0.00017564875278861116, "loss": 11.205, "step": 192580 }, { "epoch": 23.17569193742479, "grad_norm": Infinity, "learning_rate": 0.0001756462647105151, "loss": 11.3652, "step": 192590 }, { "epoch": 23.176895306859205, "grad_norm": Infinity, "learning_rate": 0.00017564377652294023, "loss": 11.2155, "step": 192600 }, { "epoch": 23.178098676293622, "grad_norm": Infinity, "learning_rate": 0.00017564128822589013, "loss": 11.1632, "step": 192610 }, { "epoch": 23.17930204572804, "grad_norm": Infinity, "learning_rate": 0.00017563879981936844, "loss": 11.1482, "step": 192620 }, { "epoch": 23.180505415162454, "grad_norm": Infinity, "learning_rate": 0.0001756363113033787, "loss": 11.1527, "step": 192630 }, { "epoch": 23.18170878459687, "grad_norm": Infinity, "learning_rate": 0.00017563382267792455, "loss": 10.9933, "step": 192640 }, { "epoch": 23.18291215403129, "grad_norm": Infinity, "learning_rate": 0.00017563133394300962, "loss": 11.1944, "step": 192650 }, { "epoch": 23.184115523465703, "grad_norm": Infinity, "learning_rate": 0.0001756288450986374, "loss": 11.1868, "step": 192660 }, { "epoch": 23.18531889290012, "grad_norm": Infinity, "learning_rate": 0.00017562635614481166, "loss": 11.1149, "step": 192670 }, { "epoch": 23.186522262334538, "grad_norm": Infinity, "learning_rate": 0.00017562386708153585, "loss": 11.227, "step": 192680 }, { "epoch": 23.187725631768952, "grad_norm": Infinity, "learning_rate": 0.00017562137790881366, "loss": 11.2388, "step": 192690 }, { "epoch": 23.18892900120337, "grad_norm": Infinity, "learning_rate": 0.00017561888862664866, "loss": 11.1108, "step": 192700 }, { "epoch": 23.190132370637787, "grad_norm": Infinity, "learning_rate": 0.00017561639923504446, "loss": 11.1981, "step": 192710 }, { "epoch": 23.1913357400722, "grad_norm": Infinity, "learning_rate": 0.00017561390973400465, "loss": 11.0734, "step": 192720 }, { "epoch": 23.19253910950662, "grad_norm": Infinity, "learning_rate": 0.00017561142012353286, "loss": 11.1146, "step": 192730 }, { "epoch": 23.193742478941036, "grad_norm": Infinity, "learning_rate": 0.00017560893040363267, "loss": 11.1375, "step": 192740 }, { "epoch": 23.19494584837545, "grad_norm": Infinity, "learning_rate": 0.0001756064405743077, "loss": 11.132, "step": 192750 }, { "epoch": 23.196149217809868, "grad_norm": Infinity, "learning_rate": 0.0001756039506355615, "loss": 11.1629, "step": 192760 }, { "epoch": 23.197352587244286, "grad_norm": Infinity, "learning_rate": 0.00017560146058739776, "loss": 11.1582, "step": 192770 }, { "epoch": 23.1985559566787, "grad_norm": Infinity, "learning_rate": 0.00017559897042982003, "loss": 11.1921, "step": 192780 }, { "epoch": 23.199759326113117, "grad_norm": Infinity, "learning_rate": 0.00017559648016283194, "loss": 11.1661, "step": 192790 }, { "epoch": 23.200962695547535, "grad_norm": Infinity, "learning_rate": 0.00017559398978643707, "loss": 11.139, "step": 192800 }, { "epoch": 23.20216606498195, "grad_norm": Infinity, "learning_rate": 0.00017559149930063904, "loss": 11.2258, "step": 192810 }, { "epoch": 23.203369434416366, "grad_norm": Infinity, "learning_rate": 0.00017558900870544143, "loss": 11.223, "step": 192820 }, { "epoch": 23.204572803850784, "grad_norm": Infinity, "learning_rate": 0.00017558651800084787, "loss": 11.0765, "step": 192830 }, { "epoch": 23.205776173285198, "grad_norm": Infinity, "learning_rate": 0.00017558402718686195, "loss": 11.1681, "step": 192840 }, { "epoch": 23.206979542719615, "grad_norm": Infinity, "learning_rate": 0.0001755815362634873, "loss": 11.0835, "step": 192850 }, { "epoch": 23.20818291215403, "grad_norm": Infinity, "learning_rate": 0.00017557904523072748, "loss": 11.1196, "step": 192860 }, { "epoch": 23.209386281588447, "grad_norm": Infinity, "learning_rate": 0.00017557655408858614, "loss": 11.2878, "step": 192870 }, { "epoch": 23.210589651022865, "grad_norm": Infinity, "learning_rate": 0.00017557406283706686, "loss": 11.2261, "step": 192880 }, { "epoch": 23.21179302045728, "grad_norm": Infinity, "learning_rate": 0.00017557157147617326, "loss": 11.2108, "step": 192890 }, { "epoch": 23.212996389891696, "grad_norm": Infinity, "learning_rate": 0.00017556908000590892, "loss": 11.223, "step": 192900 }, { "epoch": 23.214199759326114, "grad_norm": Infinity, "learning_rate": 0.00017556658842627748, "loss": 11.1965, "step": 192910 }, { "epoch": 23.215403128760528, "grad_norm": Infinity, "learning_rate": 0.00017556409673728253, "loss": 11.1263, "step": 192920 }, { "epoch": 23.216606498194945, "grad_norm": Infinity, "learning_rate": 0.00017556160493892767, "loss": 11.158, "step": 192930 }, { "epoch": 23.217809867629363, "grad_norm": Infinity, "learning_rate": 0.00017555911303121654, "loss": 11.2259, "step": 192940 }, { "epoch": 23.219013237063777, "grad_norm": Infinity, "learning_rate": 0.0001755566210141527, "loss": 11.2653, "step": 192950 }, { "epoch": 23.220216606498195, "grad_norm": Infinity, "learning_rate": 0.00017555412888773975, "loss": 11.2626, "step": 192960 }, { "epoch": 23.221419975932612, "grad_norm": Infinity, "learning_rate": 0.00017555163665198132, "loss": 11.1391, "step": 192970 }, { "epoch": 23.222623345367026, "grad_norm": Infinity, "learning_rate": 0.00017554914430688106, "loss": 11.1943, "step": 192980 }, { "epoch": 23.223826714801444, "grad_norm": Infinity, "learning_rate": 0.0001755466518524425, "loss": 11.1316, "step": 192990 }, { "epoch": 23.22503008423586, "grad_norm": Infinity, "learning_rate": 0.0001755441592886693, "loss": 11.153, "step": 193000 }, { "epoch": 23.226233453670275, "grad_norm": Infinity, "learning_rate": 0.00017554166661556505, "loss": 11.1116, "step": 193010 }, { "epoch": 23.227436823104693, "grad_norm": Infinity, "learning_rate": 0.00017553917383313335, "loss": 11.252, "step": 193020 }, { "epoch": 23.22864019253911, "grad_norm": Infinity, "learning_rate": 0.00017553668094137784, "loss": 11.1595, "step": 193030 }, { "epoch": 23.229843561973524, "grad_norm": Infinity, "learning_rate": 0.00017553418794030208, "loss": 11.0905, "step": 193040 }, { "epoch": 23.231046931407942, "grad_norm": Infinity, "learning_rate": 0.0001755316948299097, "loss": 11.1444, "step": 193050 }, { "epoch": 23.23225030084236, "grad_norm": Infinity, "learning_rate": 0.0001755292016102043, "loss": 11.064, "step": 193060 }, { "epoch": 23.233453670276774, "grad_norm": Infinity, "learning_rate": 0.00017552670828118952, "loss": 11.0443, "step": 193070 }, { "epoch": 23.23465703971119, "grad_norm": Infinity, "learning_rate": 0.00017552421484286893, "loss": 11.23, "step": 193080 }, { "epoch": 23.23586040914561, "grad_norm": Infinity, "learning_rate": 0.00017552172129524618, "loss": 11.2578, "step": 193090 }, { "epoch": 23.237063778580023, "grad_norm": Infinity, "learning_rate": 0.00017551922763832484, "loss": 11.1379, "step": 193100 }, { "epoch": 23.23826714801444, "grad_norm": Infinity, "learning_rate": 0.00017551673387210853, "loss": 11.1566, "step": 193110 }, { "epoch": 23.239470517448858, "grad_norm": Infinity, "learning_rate": 0.00017551423999660087, "loss": 11.1361, "step": 193120 }, { "epoch": 23.240673886883272, "grad_norm": Infinity, "learning_rate": 0.0001755117460118054, "loss": 11.1516, "step": 193130 }, { "epoch": 23.24187725631769, "grad_norm": Infinity, "learning_rate": 0.0001755092519177259, "loss": 11.1199, "step": 193140 }, { "epoch": 23.243080625752107, "grad_norm": Infinity, "learning_rate": 0.0001755067577143658, "loss": 11.1568, "step": 193150 }, { "epoch": 23.24428399518652, "grad_norm": Infinity, "learning_rate": 0.00017550426340172878, "loss": 11.1194, "step": 193160 }, { "epoch": 23.24548736462094, "grad_norm": Infinity, "learning_rate": 0.00017550176897981846, "loss": 11.1797, "step": 193170 }, { "epoch": 23.246690734055356, "grad_norm": Infinity, "learning_rate": 0.00017549927444863845, "loss": 11.1113, "step": 193180 }, { "epoch": 23.24789410348977, "grad_norm": Infinity, "learning_rate": 0.00017549677980819236, "loss": 11.1766, "step": 193190 }, { "epoch": 23.249097472924188, "grad_norm": Infinity, "learning_rate": 0.00017549428505848376, "loss": 11.1253, "step": 193200 }, { "epoch": 23.250300842358605, "grad_norm": Infinity, "learning_rate": 0.00017549179019951628, "loss": 11.0924, "step": 193210 }, { "epoch": 23.25150421179302, "grad_norm": Infinity, "learning_rate": 0.0001754892952312936, "loss": 11.2372, "step": 193220 }, { "epoch": 23.252707581227437, "grad_norm": Infinity, "learning_rate": 0.00017548680015381919, "loss": 11.136, "step": 193230 }, { "epoch": 23.253910950661854, "grad_norm": Infinity, "learning_rate": 0.0001754843049670968, "loss": 11.2663, "step": 193240 }, { "epoch": 23.25511432009627, "grad_norm": Infinity, "learning_rate": 0.00017548180967112998, "loss": 11.0405, "step": 193250 }, { "epoch": 23.256317689530686, "grad_norm": Infinity, "learning_rate": 0.00017547931426592235, "loss": 11.0678, "step": 193260 }, { "epoch": 23.257521058965104, "grad_norm": Infinity, "learning_rate": 0.0001754768187514775, "loss": 11.2024, "step": 193270 }, { "epoch": 23.258724428399518, "grad_norm": Infinity, "learning_rate": 0.00017547432312779905, "loss": 11.1946, "step": 193280 }, { "epoch": 23.259927797833935, "grad_norm": Infinity, "learning_rate": 0.00017547182739489063, "loss": 11.1317, "step": 193290 }, { "epoch": 23.261131167268353, "grad_norm": Infinity, "learning_rate": 0.00017546933155275585, "loss": 11.0937, "step": 193300 }, { "epoch": 23.262334536702767, "grad_norm": Infinity, "learning_rate": 0.00017546683560139829, "loss": 11.0807, "step": 193310 }, { "epoch": 23.263537906137184, "grad_norm": Infinity, "learning_rate": 0.00017546433954082163, "loss": 11.2003, "step": 193320 }, { "epoch": 23.264741275571602, "grad_norm": Infinity, "learning_rate": 0.00017546184337102942, "loss": 11.076, "step": 193330 }, { "epoch": 23.265944645006016, "grad_norm": Infinity, "learning_rate": 0.0001754593470920253, "loss": 11.0659, "step": 193340 }, { "epoch": 23.267148014440433, "grad_norm": Infinity, "learning_rate": 0.00017545685070381284, "loss": 11.1645, "step": 193350 }, { "epoch": 23.26835138387485, "grad_norm": Infinity, "learning_rate": 0.0001754543542063957, "loss": 11.1878, "step": 193360 }, { "epoch": 23.269554753309265, "grad_norm": Infinity, "learning_rate": 0.00017545185759977749, "loss": 11.2552, "step": 193370 }, { "epoch": 23.270758122743683, "grad_norm": Infinity, "learning_rate": 0.0001754493608839618, "loss": 11.2498, "step": 193380 }, { "epoch": 23.2719614921781, "grad_norm": Infinity, "learning_rate": 0.0001754468640589523, "loss": 11.0623, "step": 193390 }, { "epoch": 23.273164861612514, "grad_norm": Infinity, "learning_rate": 0.00017544436712475255, "loss": 11.1857, "step": 193400 }, { "epoch": 23.27436823104693, "grad_norm": Infinity, "learning_rate": 0.00017544187008136612, "loss": 11.0667, "step": 193410 }, { "epoch": 23.27557160048135, "grad_norm": Infinity, "learning_rate": 0.00017543937292879673, "loss": 11.0717, "step": 193420 }, { "epoch": 23.276774969915763, "grad_norm": Infinity, "learning_rate": 0.0001754368756670479, "loss": 11.0537, "step": 193430 }, { "epoch": 23.27797833935018, "grad_norm": Infinity, "learning_rate": 0.00017543437829612332, "loss": 11.0827, "step": 193440 }, { "epoch": 23.2791817087846, "grad_norm": Infinity, "learning_rate": 0.0001754318808160266, "loss": 11.1383, "step": 193450 }, { "epoch": 23.280385078219012, "grad_norm": Infinity, "learning_rate": 0.00017542938322676128, "loss": 11.2067, "step": 193460 }, { "epoch": 23.28158844765343, "grad_norm": Infinity, "learning_rate": 0.000175426885528331, "loss": 11.0624, "step": 193470 }, { "epoch": 23.282791817087848, "grad_norm": Infinity, "learning_rate": 0.00017542438772073945, "loss": 11.0763, "step": 193480 }, { "epoch": 23.28399518652226, "grad_norm": Infinity, "learning_rate": 0.00017542188980399016, "loss": 11.1682, "step": 193490 }, { "epoch": 23.28519855595668, "grad_norm": Infinity, "learning_rate": 0.00017541939177808676, "loss": 11.2145, "step": 193500 }, { "epoch": 23.286401925391097, "grad_norm": Infinity, "learning_rate": 0.00017541689364303288, "loss": 11.0859, "step": 193510 }, { "epoch": 23.28760529482551, "grad_norm": Infinity, "learning_rate": 0.00017541439539883215, "loss": 11.1552, "step": 193520 }, { "epoch": 23.28880866425993, "grad_norm": Infinity, "learning_rate": 0.0001754118970454882, "loss": 11.0117, "step": 193530 }, { "epoch": 23.290012033694346, "grad_norm": Infinity, "learning_rate": 0.00017540939858300456, "loss": 11.1963, "step": 193540 }, { "epoch": 23.29121540312876, "grad_norm": Infinity, "learning_rate": 0.00017540690001138494, "loss": 11.1663, "step": 193550 }, { "epoch": 23.292418772563177, "grad_norm": Infinity, "learning_rate": 0.0001754044013306329, "loss": 11.1226, "step": 193560 }, { "epoch": 23.29362214199759, "grad_norm": Infinity, "learning_rate": 0.0001754019025407521, "loss": 11.2278, "step": 193570 }, { "epoch": 23.29482551143201, "grad_norm": Infinity, "learning_rate": 0.0001753994036417461, "loss": 11.129, "step": 193580 }, { "epoch": 23.296028880866427, "grad_norm": Infinity, "learning_rate": 0.00017539690463361856, "loss": 11.0296, "step": 193590 }, { "epoch": 23.29723225030084, "grad_norm": Infinity, "learning_rate": 0.00017539440551637308, "loss": 11.1274, "step": 193600 }, { "epoch": 23.29843561973526, "grad_norm": Infinity, "learning_rate": 0.0001753919062900133, "loss": 11.1067, "step": 193610 }, { "epoch": 23.299638989169676, "grad_norm": Infinity, "learning_rate": 0.0001753894069545428, "loss": 11.1685, "step": 193620 }, { "epoch": 23.30084235860409, "grad_norm": Infinity, "learning_rate": 0.0001753869075099652, "loss": 11.019, "step": 193630 }, { "epoch": 23.302045728038507, "grad_norm": Infinity, "learning_rate": 0.00017538440795628418, "loss": 11.1925, "step": 193640 }, { "epoch": 23.303249097472925, "grad_norm": Infinity, "learning_rate": 0.00017538190829350327, "loss": 11.1634, "step": 193650 }, { "epoch": 23.30445246690734, "grad_norm": Infinity, "learning_rate": 0.00017537940852162614, "loss": 11.0511, "step": 193660 }, { "epoch": 23.305655836341757, "grad_norm": Infinity, "learning_rate": 0.00017537690864065638, "loss": 11.1921, "step": 193670 }, { "epoch": 23.306859205776174, "grad_norm": Infinity, "learning_rate": 0.00017537440865059763, "loss": 11.1834, "step": 193680 }, { "epoch": 23.308062575210588, "grad_norm": Infinity, "learning_rate": 0.0001753719085514535, "loss": 11.1029, "step": 193690 }, { "epoch": 23.309265944645006, "grad_norm": Infinity, "learning_rate": 0.00017536940834322762, "loss": 11.1336, "step": 193700 }, { "epoch": 23.310469314079423, "grad_norm": Infinity, "learning_rate": 0.00017536690802592362, "loss": 11.1643, "step": 193710 }, { "epoch": 23.311672683513837, "grad_norm": Infinity, "learning_rate": 0.00017536440759954506, "loss": 11.0536, "step": 193720 }, { "epoch": 23.312876052948255, "grad_norm": Infinity, "learning_rate": 0.0001753619070640956, "loss": 11.0405, "step": 193730 }, { "epoch": 23.314079422382672, "grad_norm": Infinity, "learning_rate": 0.00017535940641957883, "loss": 11.2063, "step": 193740 }, { "epoch": 23.315282791817086, "grad_norm": Infinity, "learning_rate": 0.00017535690566599846, "loss": 11.0488, "step": 193750 }, { "epoch": 23.316486161251504, "grad_norm": Infinity, "learning_rate": 0.000175354404803358, "loss": 11.0255, "step": 193760 }, { "epoch": 23.31768953068592, "grad_norm": Infinity, "learning_rate": 0.00017535190383166112, "loss": 11.1295, "step": 193770 }, { "epoch": 23.318892900120336, "grad_norm": Infinity, "learning_rate": 0.00017534940275091144, "loss": 11.1352, "step": 193780 }, { "epoch": 23.320096269554753, "grad_norm": Infinity, "learning_rate": 0.00017534690156111252, "loss": 11.0754, "step": 193790 }, { "epoch": 23.32129963898917, "grad_norm": Infinity, "learning_rate": 0.0001753444002622681, "loss": 11.1256, "step": 193800 }, { "epoch": 23.322503008423585, "grad_norm": Infinity, "learning_rate": 0.00017534189885438171, "loss": 11.0896, "step": 193810 }, { "epoch": 23.323706377858002, "grad_norm": Infinity, "learning_rate": 0.000175339397337457, "loss": 11.0594, "step": 193820 }, { "epoch": 23.32490974729242, "grad_norm": Infinity, "learning_rate": 0.00017533689571149754, "loss": 11.1942, "step": 193830 }, { "epoch": 23.326113116726834, "grad_norm": Infinity, "learning_rate": 0.00017533439397650702, "loss": 11.1883, "step": 193840 }, { "epoch": 23.32731648616125, "grad_norm": Infinity, "learning_rate": 0.00017533189213248904, "loss": 11.0362, "step": 193850 }, { "epoch": 23.32851985559567, "grad_norm": Infinity, "learning_rate": 0.00017532939017944718, "loss": 11.2057, "step": 193860 }, { "epoch": 23.329723225030083, "grad_norm": Infinity, "learning_rate": 0.00017532688811738514, "loss": 11.1193, "step": 193870 }, { "epoch": 23.3309265944645, "grad_norm": Infinity, "learning_rate": 0.00017532438594630647, "loss": 11.123, "step": 193880 }, { "epoch": 23.332129963898918, "grad_norm": Infinity, "learning_rate": 0.0001753218836662148, "loss": 11.1341, "step": 193890 }, { "epoch": 23.333333333333332, "grad_norm": Infinity, "learning_rate": 0.0001753193812771138, "loss": 11.0933, "step": 193900 }, { "epoch": 23.33453670276775, "grad_norm": Infinity, "learning_rate": 0.00017531687877900704, "loss": 11.1447, "step": 193910 }, { "epoch": 23.335740072202167, "grad_norm": Infinity, "learning_rate": 0.0001753143761718982, "loss": 11.1411, "step": 193920 }, { "epoch": 23.33694344163658, "grad_norm": Infinity, "learning_rate": 0.0001753118734557908, "loss": 11.1185, "step": 193930 }, { "epoch": 23.338146811071, "grad_norm": Infinity, "learning_rate": 0.00017530937063068859, "loss": 11.0155, "step": 193940 }, { "epoch": 23.339350180505416, "grad_norm": Infinity, "learning_rate": 0.00017530686769659507, "loss": 11.24, "step": 193950 }, { "epoch": 23.34055354993983, "grad_norm": Infinity, "learning_rate": 0.00017530436465351394, "loss": 11.1776, "step": 193960 }, { "epoch": 23.341756919374248, "grad_norm": Infinity, "learning_rate": 0.00017530186150144879, "loss": 11.1802, "step": 193970 }, { "epoch": 23.342960288808666, "grad_norm": Infinity, "learning_rate": 0.0001752993582404033, "loss": 11.1322, "step": 193980 }, { "epoch": 23.34416365824308, "grad_norm": Infinity, "learning_rate": 0.00017529685487038102, "loss": 11.0815, "step": 193990 }, { "epoch": 23.345367027677497, "grad_norm": Infinity, "learning_rate": 0.00017529435139138557, "loss": 11.1885, "step": 194000 }, { "epoch": 23.346570397111915, "grad_norm": Infinity, "learning_rate": 0.00017529184780342065, "loss": 11.2131, "step": 194010 }, { "epoch": 23.34777376654633, "grad_norm": Infinity, "learning_rate": 0.00017528934410648983, "loss": 11.248, "step": 194020 }, { "epoch": 23.348977135980746, "grad_norm": Infinity, "learning_rate": 0.0001752868403005967, "loss": 11.0511, "step": 194030 }, { "epoch": 23.350180505415164, "grad_norm": Infinity, "learning_rate": 0.00017528433638574497, "loss": 11.1162, "step": 194040 }, { "epoch": 23.351383874849578, "grad_norm": Infinity, "learning_rate": 0.0001752818323619382, "loss": 11.1686, "step": 194050 }, { "epoch": 23.352587244283995, "grad_norm": Infinity, "learning_rate": 0.00017527932822918003, "loss": 11.1534, "step": 194060 }, { "epoch": 23.353790613718413, "grad_norm": Infinity, "learning_rate": 0.00017527682398747413, "loss": 11.1434, "step": 194070 }, { "epoch": 23.354993983152827, "grad_norm": Infinity, "learning_rate": 0.000175274319636824, "loss": 11.0826, "step": 194080 }, { "epoch": 23.356197352587245, "grad_norm": Infinity, "learning_rate": 0.00017527181517723342, "loss": 11.1348, "step": 194090 }, { "epoch": 23.357400722021662, "grad_norm": Infinity, "learning_rate": 0.00017526931060870592, "loss": 11.1702, "step": 194100 }, { "epoch": 23.358604091456076, "grad_norm": Infinity, "learning_rate": 0.0001752668059312451, "loss": 11.1623, "step": 194110 }, { "epoch": 23.359807460890494, "grad_norm": Infinity, "learning_rate": 0.00017526430114485467, "loss": 11.0346, "step": 194120 }, { "epoch": 23.36101083032491, "grad_norm": Infinity, "learning_rate": 0.0001752617962495382, "loss": 11.1335, "step": 194130 }, { "epoch": 23.362214199759325, "grad_norm": Infinity, "learning_rate": 0.00017525929124529932, "loss": 11.0116, "step": 194140 }, { "epoch": 23.363417569193743, "grad_norm": Infinity, "learning_rate": 0.0001752567861321417, "loss": 11.1241, "step": 194150 }, { "epoch": 23.36462093862816, "grad_norm": Infinity, "learning_rate": 0.00017525428091006887, "loss": 11.286, "step": 194160 }, { "epoch": 23.365824308062574, "grad_norm": Infinity, "learning_rate": 0.00017525177557908456, "loss": 11.1952, "step": 194170 }, { "epoch": 23.367027677496992, "grad_norm": Infinity, "learning_rate": 0.00017524927013919234, "loss": 11.1059, "step": 194180 }, { "epoch": 23.36823104693141, "grad_norm": Infinity, "learning_rate": 0.00017524676459039587, "loss": 11.1561, "step": 194190 }, { "epoch": 23.369434416365824, "grad_norm": Infinity, "learning_rate": 0.00017524425893269873, "loss": 11.2982, "step": 194200 }, { "epoch": 23.37063778580024, "grad_norm": Infinity, "learning_rate": 0.0001752417531661046, "loss": 11.168, "step": 194210 }, { "epoch": 23.37184115523466, "grad_norm": Infinity, "learning_rate": 0.00017523924729061701, "loss": 11.1924, "step": 194220 }, { "epoch": 23.373044524669073, "grad_norm": Infinity, "learning_rate": 0.00017523674130623973, "loss": 11.1715, "step": 194230 }, { "epoch": 23.37424789410349, "grad_norm": Infinity, "learning_rate": 0.0001752342352129763, "loss": 11.1251, "step": 194240 }, { "epoch": 23.375451263537904, "grad_norm": Infinity, "learning_rate": 0.0001752317290108303, "loss": 11.0972, "step": 194250 }, { "epoch": 23.376654632972322, "grad_norm": Infinity, "learning_rate": 0.00017522922269980545, "loss": 11.1484, "step": 194260 }, { "epoch": 23.37785800240674, "grad_norm": Infinity, "learning_rate": 0.00017522671627990535, "loss": 11.1995, "step": 194270 }, { "epoch": 23.379061371841154, "grad_norm": Infinity, "learning_rate": 0.00017522420975113362, "loss": 11.1673, "step": 194280 }, { "epoch": 23.38026474127557, "grad_norm": Infinity, "learning_rate": 0.00017522170311349387, "loss": 11.124, "step": 194290 }, { "epoch": 23.38146811070999, "grad_norm": Infinity, "learning_rate": 0.00017521919636698973, "loss": 11.2001, "step": 194300 }, { "epoch": 23.382671480144403, "grad_norm": Infinity, "learning_rate": 0.00017521668951162486, "loss": 11.212, "step": 194310 }, { "epoch": 23.38387484957882, "grad_norm": Infinity, "learning_rate": 0.00017521418254740287, "loss": 11.0148, "step": 194320 }, { "epoch": 23.385078219013238, "grad_norm": Infinity, "learning_rate": 0.0001752116754743274, "loss": 11.1504, "step": 194330 }, { "epoch": 23.386281588447652, "grad_norm": Infinity, "learning_rate": 0.00017520916829240205, "loss": 11.1509, "step": 194340 }, { "epoch": 23.38748495788207, "grad_norm": Infinity, "learning_rate": 0.00017520666100163048, "loss": 11.0231, "step": 194350 }, { "epoch": 23.388688327316487, "grad_norm": Infinity, "learning_rate": 0.00017520415360201626, "loss": 11.1179, "step": 194360 }, { "epoch": 23.3898916967509, "grad_norm": Infinity, "learning_rate": 0.00017520164609356314, "loss": 11.2122, "step": 194370 }, { "epoch": 23.39109506618532, "grad_norm": Infinity, "learning_rate": 0.0001751991384762746, "loss": 11.2921, "step": 194380 }, { "epoch": 23.392298435619736, "grad_norm": Infinity, "learning_rate": 0.00017519663075015439, "loss": 11.2267, "step": 194390 }, { "epoch": 23.39350180505415, "grad_norm": Infinity, "learning_rate": 0.00017519412291520605, "loss": 11.097, "step": 194400 }, { "epoch": 23.394705174488568, "grad_norm": Infinity, "learning_rate": 0.00017519161497143326, "loss": 11.1614, "step": 194410 }, { "epoch": 23.395908543922985, "grad_norm": Infinity, "learning_rate": 0.00017518910691883966, "loss": 11.1221, "step": 194420 }, { "epoch": 23.3971119133574, "grad_norm": Infinity, "learning_rate": 0.0001751865987574288, "loss": 11.1046, "step": 194430 }, { "epoch": 23.398315282791817, "grad_norm": Infinity, "learning_rate": 0.00017518409048720445, "loss": 11.0423, "step": 194440 }, { "epoch": 23.399518652226234, "grad_norm": Infinity, "learning_rate": 0.0001751815821081701, "loss": 11.1207, "step": 194450 }, { "epoch": 23.40072202166065, "grad_norm": Infinity, "learning_rate": 0.00017517907362032948, "loss": 11.1068, "step": 194460 }, { "epoch": 23.401925391095066, "grad_norm": Infinity, "learning_rate": 0.00017517656502368616, "loss": 11.0872, "step": 194470 }, { "epoch": 23.403128760529484, "grad_norm": Infinity, "learning_rate": 0.0001751740563182438, "loss": 11.1543, "step": 194480 }, { "epoch": 23.404332129963898, "grad_norm": Infinity, "learning_rate": 0.000175171547504006, "loss": 11.2175, "step": 194490 }, { "epoch": 23.405535499398315, "grad_norm": Infinity, "learning_rate": 0.00017516903858097642, "loss": 11.2075, "step": 194500 }, { "epoch": 23.406738868832733, "grad_norm": Infinity, "learning_rate": 0.00017516652954915868, "loss": 10.9674, "step": 194510 }, { "epoch": 23.407942238267147, "grad_norm": Infinity, "learning_rate": 0.00017516402040855641, "loss": 11.05, "step": 194520 }, { "epoch": 23.409145607701564, "grad_norm": Infinity, "learning_rate": 0.00017516151115917324, "loss": 11.037, "step": 194530 }, { "epoch": 23.410348977135982, "grad_norm": Infinity, "learning_rate": 0.00017515900180101282, "loss": 11.1966, "step": 194540 }, { "epoch": 23.411552346570396, "grad_norm": Infinity, "learning_rate": 0.0001751564923340788, "loss": 11.2884, "step": 194550 }, { "epoch": 23.412755716004813, "grad_norm": Infinity, "learning_rate": 0.00017515398275837476, "loss": 11.2552, "step": 194560 }, { "epoch": 23.41395908543923, "grad_norm": Infinity, "learning_rate": 0.00017515147307390431, "loss": 11.1307, "step": 194570 }, { "epoch": 23.415162454873645, "grad_norm": Infinity, "learning_rate": 0.00017514896328067116, "loss": 11.0145, "step": 194580 }, { "epoch": 23.416365824308063, "grad_norm": Infinity, "learning_rate": 0.00017514645337867889, "loss": 11.1513, "step": 194590 }, { "epoch": 23.41756919374248, "grad_norm": Infinity, "learning_rate": 0.00017514394336793116, "loss": 11.2107, "step": 194600 }, { "epoch": 23.418772563176894, "grad_norm": Infinity, "learning_rate": 0.0001751414332484316, "loss": 11.3053, "step": 194610 }, { "epoch": 23.41997593261131, "grad_norm": Infinity, "learning_rate": 0.00017513892302018383, "loss": 11.0757, "step": 194620 }, { "epoch": 23.42117930204573, "grad_norm": Infinity, "learning_rate": 0.00017513641268319146, "loss": 11.1309, "step": 194630 }, { "epoch": 23.422382671480143, "grad_norm": Infinity, "learning_rate": 0.0001751339022374582, "loss": 11.0912, "step": 194640 }, { "epoch": 23.42358604091456, "grad_norm": Infinity, "learning_rate": 0.0001751313916829876, "loss": 11.2072, "step": 194650 }, { "epoch": 23.42478941034898, "grad_norm": Infinity, "learning_rate": 0.00017512888101978334, "loss": 11.1931, "step": 194660 }, { "epoch": 23.425992779783392, "grad_norm": Infinity, "learning_rate": 0.00017512637024784904, "loss": 11.2085, "step": 194670 }, { "epoch": 23.42719614921781, "grad_norm": Infinity, "learning_rate": 0.00017512385936718832, "loss": 11.1922, "step": 194680 }, { "epoch": 23.428399518652228, "grad_norm": Infinity, "learning_rate": 0.00017512134837780483, "loss": 11.1161, "step": 194690 }, { "epoch": 23.42960288808664, "grad_norm": Infinity, "learning_rate": 0.0001751188372797022, "loss": 11.1434, "step": 194700 }, { "epoch": 23.43080625752106, "grad_norm": Infinity, "learning_rate": 0.0001751163260728841, "loss": 11.0348, "step": 194710 }, { "epoch": 23.432009626955477, "grad_norm": Infinity, "learning_rate": 0.00017511381475735407, "loss": 11.1551, "step": 194720 }, { "epoch": 23.43321299638989, "grad_norm": Infinity, "learning_rate": 0.00017511130333311585, "loss": 11.1053, "step": 194730 }, { "epoch": 23.43441636582431, "grad_norm": Infinity, "learning_rate": 0.00017510879180017303, "loss": 11.1571, "step": 194740 }, { "epoch": 23.435619735258726, "grad_norm": Infinity, "learning_rate": 0.0001751062801585292, "loss": 11.049, "step": 194750 }, { "epoch": 23.43682310469314, "grad_norm": Infinity, "learning_rate": 0.00017510376840818812, "loss": 11.1627, "step": 194760 }, { "epoch": 23.438026474127557, "grad_norm": Infinity, "learning_rate": 0.00017510125654915328, "loss": 11.2024, "step": 194770 }, { "epoch": 23.439229843561975, "grad_norm": Infinity, "learning_rate": 0.0001750987445814284, "loss": 11.1581, "step": 194780 }, { "epoch": 23.44043321299639, "grad_norm": Infinity, "learning_rate": 0.0001750962325050171, "loss": 11.1818, "step": 194790 }, { "epoch": 23.441636582430807, "grad_norm": Infinity, "learning_rate": 0.00017509372031992298, "loss": 11.0888, "step": 194800 }, { "epoch": 23.442839951865224, "grad_norm": Infinity, "learning_rate": 0.00017509120802614977, "loss": 11.2685, "step": 194810 }, { "epoch": 23.444043321299638, "grad_norm": Infinity, "learning_rate": 0.00017508869562370098, "loss": 11.1844, "step": 194820 }, { "epoch": 23.445246690734056, "grad_norm": Infinity, "learning_rate": 0.00017508618311258033, "loss": 11.0798, "step": 194830 }, { "epoch": 23.446450060168473, "grad_norm": Infinity, "learning_rate": 0.00017508367049279145, "loss": 11.2141, "step": 194840 }, { "epoch": 23.447653429602887, "grad_norm": Infinity, "learning_rate": 0.00017508115776433794, "loss": 11.124, "step": 194850 }, { "epoch": 23.448856799037305, "grad_norm": Infinity, "learning_rate": 0.00017507864492722345, "loss": 11.2347, "step": 194860 }, { "epoch": 23.450060168471722, "grad_norm": Infinity, "learning_rate": 0.00017507613198145165, "loss": 11.1683, "step": 194870 }, { "epoch": 23.451263537906136, "grad_norm": Infinity, "learning_rate": 0.00017507361892702615, "loss": 11.1772, "step": 194880 }, { "epoch": 23.452466907340554, "grad_norm": Infinity, "learning_rate": 0.00017507110576395055, "loss": 11.2221, "step": 194890 }, { "epoch": 23.45367027677497, "grad_norm": Infinity, "learning_rate": 0.00017506859249222858, "loss": 11.0778, "step": 194900 }, { "epoch": 23.454873646209386, "grad_norm": Infinity, "learning_rate": 0.0001750660791118638, "loss": 11.1625, "step": 194910 }, { "epoch": 23.456077015643803, "grad_norm": Infinity, "learning_rate": 0.00017506356562285987, "loss": 11.1564, "step": 194920 }, { "epoch": 23.45728038507822, "grad_norm": Infinity, "learning_rate": 0.0001750610520252204, "loss": 11.2514, "step": 194930 }, { "epoch": 23.458483754512635, "grad_norm": Infinity, "learning_rate": 0.00017505853831894909, "loss": 11.1341, "step": 194940 }, { "epoch": 23.459687123947052, "grad_norm": Infinity, "learning_rate": 0.00017505602450404955, "loss": 11.0626, "step": 194950 }, { "epoch": 23.460890493381466, "grad_norm": Infinity, "learning_rate": 0.0001750535105805254, "loss": 11.1028, "step": 194960 }, { "epoch": 23.462093862815884, "grad_norm": Infinity, "learning_rate": 0.00017505099654838027, "loss": 11.1309, "step": 194970 }, { "epoch": 23.4632972322503, "grad_norm": Infinity, "learning_rate": 0.00017504848240761786, "loss": 11.224, "step": 194980 }, { "epoch": 23.464500601684716, "grad_norm": Infinity, "learning_rate": 0.00017504596815824176, "loss": 11.0937, "step": 194990 }, { "epoch": 23.465703971119133, "grad_norm": Infinity, "learning_rate": 0.00017504345380025556, "loss": 11.0216, "step": 195000 }, { "epoch": 23.46690734055355, "grad_norm": Infinity, "learning_rate": 0.000175040939333663, "loss": 11.1226, "step": 195010 }, { "epoch": 23.468110709987965, "grad_norm": Infinity, "learning_rate": 0.00017503842475846768, "loss": 11.1569, "step": 195020 }, { "epoch": 23.469314079422382, "grad_norm": Infinity, "learning_rate": 0.0001750359100746732, "loss": 11.1827, "step": 195030 }, { "epoch": 23.4705174488568, "grad_norm": Infinity, "learning_rate": 0.00017503339528228325, "loss": 11.1777, "step": 195040 }, { "epoch": 23.471720818291214, "grad_norm": Infinity, "learning_rate": 0.00017503088038130145, "loss": 11.007, "step": 195050 }, { "epoch": 23.47292418772563, "grad_norm": Infinity, "learning_rate": 0.00017502836537173146, "loss": 11.1805, "step": 195060 }, { "epoch": 23.47412755716005, "grad_norm": Infinity, "learning_rate": 0.0001750258502535769, "loss": 11.0962, "step": 195070 }, { "epoch": 23.475330926594463, "grad_norm": Infinity, "learning_rate": 0.00017502333502684137, "loss": 11.2764, "step": 195080 }, { "epoch": 23.47653429602888, "grad_norm": Infinity, "learning_rate": 0.00017502081969152862, "loss": 11.0084, "step": 195090 }, { "epoch": 23.477737665463298, "grad_norm": Infinity, "learning_rate": 0.00017501830424764217, "loss": 11.0039, "step": 195100 }, { "epoch": 23.478941034897712, "grad_norm": Infinity, "learning_rate": 0.0001750157886951857, "loss": 11.1472, "step": 195110 }, { "epoch": 23.48014440433213, "grad_norm": Infinity, "learning_rate": 0.00017501327303416292, "loss": 11.1366, "step": 195120 }, { "epoch": 23.481347773766547, "grad_norm": Infinity, "learning_rate": 0.00017501075726457737, "loss": 11.1748, "step": 195130 }, { "epoch": 23.48255114320096, "grad_norm": Infinity, "learning_rate": 0.00017500824138643275, "loss": 11.0998, "step": 195140 }, { "epoch": 23.48375451263538, "grad_norm": Infinity, "learning_rate": 0.00017500572539973268, "loss": 11.223, "step": 195150 }, { "epoch": 23.484957882069796, "grad_norm": Infinity, "learning_rate": 0.0001750032093044808, "loss": 10.9834, "step": 195160 }, { "epoch": 23.48616125150421, "grad_norm": Infinity, "learning_rate": 0.00017500069310068077, "loss": 11.2017, "step": 195170 }, { "epoch": 23.487364620938628, "grad_norm": Infinity, "learning_rate": 0.00017499817678833624, "loss": 11.1433, "step": 195180 }, { "epoch": 23.488567990373046, "grad_norm": Infinity, "learning_rate": 0.0001749956603674508, "loss": 11.1318, "step": 195190 }, { "epoch": 23.48977135980746, "grad_norm": Infinity, "learning_rate": 0.00017499314383802813, "loss": 11.2182, "step": 195200 }, { "epoch": 23.490974729241877, "grad_norm": Infinity, "learning_rate": 0.00017499062720007186, "loss": 11.1747, "step": 195210 }, { "epoch": 23.492178098676295, "grad_norm": Infinity, "learning_rate": 0.00017498811045358565, "loss": 11.312, "step": 195220 }, { "epoch": 23.49338146811071, "grad_norm": Infinity, "learning_rate": 0.00017498559359857313, "loss": 11.1799, "step": 195230 }, { "epoch": 23.494584837545126, "grad_norm": Infinity, "learning_rate": 0.00017498307663503794, "loss": 11.1734, "step": 195240 }, { "epoch": 23.495788206979544, "grad_norm": Infinity, "learning_rate": 0.00017498055956298374, "loss": 11.2187, "step": 195250 }, { "epoch": 23.496991576413958, "grad_norm": Infinity, "learning_rate": 0.0001749780423824141, "loss": 11.1922, "step": 195260 }, { "epoch": 23.498194945848375, "grad_norm": Infinity, "learning_rate": 0.00017497552509333279, "loss": 11.1969, "step": 195270 }, { "epoch": 23.499398315282793, "grad_norm": Infinity, "learning_rate": 0.00017497300769574333, "loss": 11.1444, "step": 195280 }, { "epoch": 23.500601684717207, "grad_norm": Infinity, "learning_rate": 0.0001749704901896495, "loss": 11.1288, "step": 195290 }, { "epoch": 23.501805054151625, "grad_norm": Infinity, "learning_rate": 0.00017496797257505477, "loss": 11.2265, "step": 195300 }, { "epoch": 23.503008423586042, "grad_norm": Infinity, "learning_rate": 0.00017496545485196291, "loss": 11.0784, "step": 195310 }, { "epoch": 23.504211793020456, "grad_norm": Infinity, "learning_rate": 0.00017496293702037755, "loss": 11.2303, "step": 195320 }, { "epoch": 23.505415162454874, "grad_norm": Infinity, "learning_rate": 0.0001749604190803023, "loss": 11.0919, "step": 195330 }, { "epoch": 23.50661853188929, "grad_norm": Infinity, "learning_rate": 0.00017495790103174078, "loss": 11.0806, "step": 195340 }, { "epoch": 23.507821901323705, "grad_norm": Infinity, "learning_rate": 0.00017495538287469673, "loss": 11.1103, "step": 195350 }, { "epoch": 23.509025270758123, "grad_norm": Infinity, "learning_rate": 0.00017495286460917367, "loss": 11.0826, "step": 195360 }, { "epoch": 23.51022864019254, "grad_norm": Infinity, "learning_rate": 0.00017495034623517537, "loss": 11.068, "step": 195370 }, { "epoch": 23.511432009626954, "grad_norm": Infinity, "learning_rate": 0.00017494782775270539, "loss": 11.1516, "step": 195380 }, { "epoch": 23.512635379061372, "grad_norm": Infinity, "learning_rate": 0.0001749453091617674, "loss": 11.2196, "step": 195390 }, { "epoch": 23.51383874849579, "grad_norm": Infinity, "learning_rate": 0.00017494279046236503, "loss": 11.1573, "step": 195400 }, { "epoch": 23.515042117930204, "grad_norm": Infinity, "learning_rate": 0.00017494027165450196, "loss": 11.1239, "step": 195410 }, { "epoch": 23.51624548736462, "grad_norm": Infinity, "learning_rate": 0.0001749377527381818, "loss": 11.1621, "step": 195420 }, { "epoch": 23.51744885679904, "grad_norm": Infinity, "learning_rate": 0.0001749352337134082, "loss": 11.1501, "step": 195430 }, { "epoch": 23.518652226233453, "grad_norm": Infinity, "learning_rate": 0.00017493271458018486, "loss": 11.0687, "step": 195440 }, { "epoch": 23.51985559566787, "grad_norm": Infinity, "learning_rate": 0.00017493019533851536, "loss": 11.0757, "step": 195450 }, { "epoch": 23.521058965102288, "grad_norm": Infinity, "learning_rate": 0.00017492767598840337, "loss": 11.1104, "step": 195460 }, { "epoch": 23.522262334536702, "grad_norm": Infinity, "learning_rate": 0.0001749251565298525, "loss": 11.0994, "step": 195470 }, { "epoch": 23.52346570397112, "grad_norm": Infinity, "learning_rate": 0.00017492263696286645, "loss": 11.145, "step": 195480 }, { "epoch": 23.524669073405537, "grad_norm": Infinity, "learning_rate": 0.00017492011728744886, "loss": 11.1268, "step": 195490 }, { "epoch": 23.52587244283995, "grad_norm": Infinity, "learning_rate": 0.00017491759750360334, "loss": 11.0251, "step": 195500 }, { "epoch": 23.52707581227437, "grad_norm": Infinity, "learning_rate": 0.00017491507761133358, "loss": 11.0577, "step": 195510 }, { "epoch": 23.528279181708786, "grad_norm": Infinity, "learning_rate": 0.00017491255761064323, "loss": 11.1736, "step": 195520 }, { "epoch": 23.5294825511432, "grad_norm": Infinity, "learning_rate": 0.00017491003750153588, "loss": 11.2155, "step": 195530 }, { "epoch": 23.530685920577618, "grad_norm": Infinity, "learning_rate": 0.0001749075172840152, "loss": 11.2012, "step": 195540 }, { "epoch": 23.531889290012035, "grad_norm": Infinity, "learning_rate": 0.00017490499695808488, "loss": 11.0345, "step": 195550 }, { "epoch": 23.53309265944645, "grad_norm": Infinity, "learning_rate": 0.00017490247652374852, "loss": 11.1722, "step": 195560 }, { "epoch": 23.534296028880867, "grad_norm": Infinity, "learning_rate": 0.0001748999559810098, "loss": 11.2242, "step": 195570 }, { "epoch": 23.535499398315284, "grad_norm": Infinity, "learning_rate": 0.00017489743532987233, "loss": 11.0888, "step": 195580 }, { "epoch": 23.5367027677497, "grad_norm": Infinity, "learning_rate": 0.00017489491457033977, "loss": 11.2027, "step": 195590 }, { "epoch": 23.537906137184116, "grad_norm": Infinity, "learning_rate": 0.00017489239370241578, "loss": 11.2428, "step": 195600 }, { "epoch": 23.53910950661853, "grad_norm": Infinity, "learning_rate": 0.00017488987272610404, "loss": 11.0786, "step": 195610 }, { "epoch": 23.540312876052948, "grad_norm": Infinity, "learning_rate": 0.00017488735164140814, "loss": 11.0887, "step": 195620 }, { "epoch": 23.541516245487365, "grad_norm": Infinity, "learning_rate": 0.00017488483044833178, "loss": 11.1581, "step": 195630 }, { "epoch": 23.54271961492178, "grad_norm": Infinity, "learning_rate": 0.00017488230914687854, "loss": 11.0984, "step": 195640 }, { "epoch": 23.543922984356197, "grad_norm": Infinity, "learning_rate": 0.00017487978773705215, "loss": 11.1414, "step": 195650 }, { "epoch": 23.545126353790614, "grad_norm": Infinity, "learning_rate": 0.0001748772662188562, "loss": 11.1765, "step": 195660 }, { "epoch": 23.54632972322503, "grad_norm": Infinity, "learning_rate": 0.00017487474459229434, "loss": 11.0267, "step": 195670 }, { "epoch": 23.547533092659446, "grad_norm": Infinity, "learning_rate": 0.00017487222285737027, "loss": 11.1396, "step": 195680 }, { "epoch": 23.548736462093864, "grad_norm": Infinity, "learning_rate": 0.00017486970101408757, "loss": 11.2185, "step": 195690 }, { "epoch": 23.549939831528278, "grad_norm": Infinity, "learning_rate": 0.00017486717906244997, "loss": 11.1946, "step": 195700 }, { "epoch": 23.551143200962695, "grad_norm": Infinity, "learning_rate": 0.00017486465700246106, "loss": 11.1448, "step": 195710 }, { "epoch": 23.552346570397113, "grad_norm": Infinity, "learning_rate": 0.0001748621348341245, "loss": 11.1367, "step": 195720 }, { "epoch": 23.553549939831527, "grad_norm": Infinity, "learning_rate": 0.00017485961255744395, "loss": 11.1005, "step": 195730 }, { "epoch": 23.554753309265944, "grad_norm": Infinity, "learning_rate": 0.0001748570901724231, "loss": 11.1457, "step": 195740 }, { "epoch": 23.555956678700362, "grad_norm": Infinity, "learning_rate": 0.00017485456767906552, "loss": 11.1066, "step": 195750 }, { "epoch": 23.557160048134776, "grad_norm": Infinity, "learning_rate": 0.00017485204507737488, "loss": 11.1473, "step": 195760 }, { "epoch": 23.558363417569193, "grad_norm": Infinity, "learning_rate": 0.0001748495223673549, "loss": 11.1539, "step": 195770 }, { "epoch": 23.55956678700361, "grad_norm": Infinity, "learning_rate": 0.00017484699954900914, "loss": 11.1721, "step": 195780 }, { "epoch": 23.560770156438025, "grad_norm": Infinity, "learning_rate": 0.00017484447662234132, "loss": 11.1648, "step": 195790 }, { "epoch": 23.561973525872443, "grad_norm": Infinity, "learning_rate": 0.00017484195358735504, "loss": 11.1405, "step": 195800 }, { "epoch": 23.56317689530686, "grad_norm": Infinity, "learning_rate": 0.00017483943044405397, "loss": 11.2171, "step": 195810 }, { "epoch": 23.564380264741274, "grad_norm": Infinity, "learning_rate": 0.0001748369071924418, "loss": 11.186, "step": 195820 }, { "epoch": 23.56558363417569, "grad_norm": Infinity, "learning_rate": 0.00017483438383252215, "loss": 11.1565, "step": 195830 }, { "epoch": 23.56678700361011, "grad_norm": Infinity, "learning_rate": 0.00017483186036429865, "loss": 11.0553, "step": 195840 }, { "epoch": 23.567990373044523, "grad_norm": Infinity, "learning_rate": 0.00017482933678777498, "loss": 11.3257, "step": 195850 }, { "epoch": 23.56919374247894, "grad_norm": Infinity, "learning_rate": 0.00017482681310295475, "loss": 11.1756, "step": 195860 }, { "epoch": 23.57039711191336, "grad_norm": Infinity, "learning_rate": 0.0001748242893098417, "loss": 11.0925, "step": 195870 }, { "epoch": 23.571600481347772, "grad_norm": Infinity, "learning_rate": 0.00017482176540843943, "loss": 11.1566, "step": 195880 }, { "epoch": 23.57280385078219, "grad_norm": Infinity, "learning_rate": 0.00017481924139875154, "loss": 11.2046, "step": 195890 }, { "epoch": 23.574007220216608, "grad_norm": Infinity, "learning_rate": 0.00017481671728078178, "loss": 11.3324, "step": 195900 }, { "epoch": 23.57521058965102, "grad_norm": Infinity, "learning_rate": 0.00017481419305453373, "loss": 11.0436, "step": 195910 }, { "epoch": 23.57641395908544, "grad_norm": Infinity, "learning_rate": 0.0001748116687200111, "loss": 11.1291, "step": 195920 }, { "epoch": 23.577617328519857, "grad_norm": Infinity, "learning_rate": 0.0001748091442772175, "loss": 11.2869, "step": 195930 }, { "epoch": 23.57882069795427, "grad_norm": Infinity, "learning_rate": 0.0001748066197261566, "loss": 11.2446, "step": 195940 }, { "epoch": 23.58002406738869, "grad_norm": Infinity, "learning_rate": 0.00017480409506683207, "loss": 11.1379, "step": 195950 }, { "epoch": 23.581227436823106, "grad_norm": Infinity, "learning_rate": 0.0001748015702992475, "loss": 11.2747, "step": 195960 }, { "epoch": 23.58243080625752, "grad_norm": Infinity, "learning_rate": 0.0001747990454234066, "loss": 10.9951, "step": 195970 }, { "epoch": 23.583634175691937, "grad_norm": Infinity, "learning_rate": 0.00017479652043931308, "loss": 11.202, "step": 195980 }, { "epoch": 23.584837545126355, "grad_norm": Infinity, "learning_rate": 0.00017479399534697047, "loss": 11.2329, "step": 195990 }, { "epoch": 23.58604091456077, "grad_norm": Infinity, "learning_rate": 0.00017479147014638248, "loss": 11.2264, "step": 196000 }, { "epoch": 23.587244283995187, "grad_norm": Infinity, "learning_rate": 0.00017478894483755278, "loss": 11.0912, "step": 196010 }, { "epoch": 23.588447653429604, "grad_norm": Infinity, "learning_rate": 0.000174786419420485, "loss": 11.2065, "step": 196020 }, { "epoch": 23.589651022864018, "grad_norm": Infinity, "learning_rate": 0.00017478389389518283, "loss": 11.1534, "step": 196030 }, { "epoch": 23.590854392298436, "grad_norm": Infinity, "learning_rate": 0.00017478136826164987, "loss": 11.0833, "step": 196040 }, { "epoch": 23.592057761732853, "grad_norm": Infinity, "learning_rate": 0.00017477884251988985, "loss": 11.0591, "step": 196050 }, { "epoch": 23.593261131167267, "grad_norm": Infinity, "learning_rate": 0.00017477631666990638, "loss": 11.1408, "step": 196060 }, { "epoch": 23.594464500601685, "grad_norm": Infinity, "learning_rate": 0.00017477379071170307, "loss": 11.1734, "step": 196070 }, { "epoch": 23.595667870036102, "grad_norm": Infinity, "learning_rate": 0.00017477126464528366, "loss": 11.0705, "step": 196080 }, { "epoch": 23.596871239470516, "grad_norm": Infinity, "learning_rate": 0.0001747687384706518, "loss": 11.1252, "step": 196090 }, { "epoch": 23.598074608904934, "grad_norm": Infinity, "learning_rate": 0.00017476621218781105, "loss": 11.1243, "step": 196100 }, { "epoch": 23.59927797833935, "grad_norm": Infinity, "learning_rate": 0.00017476368579676517, "loss": 11.0919, "step": 196110 }, { "epoch": 23.600481347773766, "grad_norm": Infinity, "learning_rate": 0.00017476115929751775, "loss": 11.0937, "step": 196120 }, { "epoch": 23.601684717208183, "grad_norm": Infinity, "learning_rate": 0.0001747586326900725, "loss": 11.1627, "step": 196130 }, { "epoch": 23.6028880866426, "grad_norm": Infinity, "learning_rate": 0.00017475610597443306, "loss": 10.9796, "step": 196140 }, { "epoch": 23.604091456077015, "grad_norm": Infinity, "learning_rate": 0.00017475357915060303, "loss": 11.0526, "step": 196150 }, { "epoch": 23.605294825511432, "grad_norm": Infinity, "learning_rate": 0.00017475105221858615, "loss": 11.1849, "step": 196160 }, { "epoch": 23.60649819494585, "grad_norm": Infinity, "learning_rate": 0.00017474852517838604, "loss": 11.0206, "step": 196170 }, { "epoch": 23.607701564380264, "grad_norm": Infinity, "learning_rate": 0.00017474599803000634, "loss": 11.2783, "step": 196180 }, { "epoch": 23.60890493381468, "grad_norm": Infinity, "learning_rate": 0.00017474347077345076, "loss": 11.1151, "step": 196190 }, { "epoch": 23.6101083032491, "grad_norm": Infinity, "learning_rate": 0.0001747409434087229, "loss": 11.1317, "step": 196200 }, { "epoch": 23.611311672683513, "grad_norm": Infinity, "learning_rate": 0.00017473841593582646, "loss": 11.1793, "step": 196210 }, { "epoch": 23.61251504211793, "grad_norm": Infinity, "learning_rate": 0.00017473588835476507, "loss": 11.1582, "step": 196220 }, { "epoch": 23.613718411552348, "grad_norm": Infinity, "learning_rate": 0.0001747333606655424, "loss": 11.192, "step": 196230 }, { "epoch": 23.614921780986762, "grad_norm": Infinity, "learning_rate": 0.0001747308328681621, "loss": 11.1467, "step": 196240 }, { "epoch": 23.61612515042118, "grad_norm": Infinity, "learning_rate": 0.00017472830496262788, "loss": 11.0942, "step": 196250 }, { "epoch": 23.617328519855597, "grad_norm": Infinity, "learning_rate": 0.0001747257769489433, "loss": 11.078, "step": 196260 }, { "epoch": 23.61853188929001, "grad_norm": Infinity, "learning_rate": 0.00017472324882711207, "loss": 11.1349, "step": 196270 }, { "epoch": 23.61973525872443, "grad_norm": Infinity, "learning_rate": 0.00017472072059713788, "loss": 11.1673, "step": 196280 }, { "epoch": 23.620938628158846, "grad_norm": Infinity, "learning_rate": 0.00017471819225902432, "loss": 11.0936, "step": 196290 }, { "epoch": 23.62214199759326, "grad_norm": Infinity, "learning_rate": 0.0001747156638127751, "loss": 11.1077, "step": 196300 }, { "epoch": 23.623345367027678, "grad_norm": Infinity, "learning_rate": 0.0001747131352583939, "loss": 11.0793, "step": 196310 }, { "epoch": 23.624548736462096, "grad_norm": Infinity, "learning_rate": 0.00017471060659588432, "loss": 11.1016, "step": 196320 }, { "epoch": 23.62575210589651, "grad_norm": Infinity, "learning_rate": 0.00017470807782525008, "loss": 11.072, "step": 196330 }, { "epoch": 23.626955475330927, "grad_norm": Infinity, "learning_rate": 0.00017470554894649475, "loss": 11.1738, "step": 196340 }, { "epoch": 23.628158844765345, "grad_norm": Infinity, "learning_rate": 0.00017470301995962207, "loss": 11.1468, "step": 196350 }, { "epoch": 23.62936221419976, "grad_norm": Infinity, "learning_rate": 0.00017470049086463569, "loss": 11.106, "step": 196360 }, { "epoch": 23.630565583634176, "grad_norm": Infinity, "learning_rate": 0.00017469796166153924, "loss": 11.0997, "step": 196370 }, { "epoch": 23.63176895306859, "grad_norm": Infinity, "learning_rate": 0.00017469543235033643, "loss": 11.0373, "step": 196380 }, { "epoch": 23.632972322503008, "grad_norm": Infinity, "learning_rate": 0.00017469290293103083, "loss": 11.1763, "step": 196390 }, { "epoch": 23.634175691937426, "grad_norm": Infinity, "learning_rate": 0.0001746903734036262, "loss": 11.1654, "step": 196400 }, { "epoch": 23.63537906137184, "grad_norm": Infinity, "learning_rate": 0.00017468784376812617, "loss": 11.1513, "step": 196410 }, { "epoch": 23.636582430806257, "grad_norm": Infinity, "learning_rate": 0.00017468531402453437, "loss": 11.2204, "step": 196420 }, { "epoch": 23.637785800240675, "grad_norm": Infinity, "learning_rate": 0.00017468278417285448, "loss": 11.1224, "step": 196430 }, { "epoch": 23.63898916967509, "grad_norm": Infinity, "learning_rate": 0.00017468025421309017, "loss": 11.0977, "step": 196440 }, { "epoch": 23.640192539109506, "grad_norm": Infinity, "learning_rate": 0.00017467772414524508, "loss": 11.1476, "step": 196450 }, { "epoch": 23.641395908543924, "grad_norm": Infinity, "learning_rate": 0.00017467519396932292, "loss": 11.1794, "step": 196460 }, { "epoch": 23.642599277978338, "grad_norm": Infinity, "learning_rate": 0.00017467266368532727, "loss": 11.1345, "step": 196470 }, { "epoch": 23.643802647412755, "grad_norm": Infinity, "learning_rate": 0.00017467013329326188, "loss": 11.1471, "step": 196480 }, { "epoch": 23.645006016847173, "grad_norm": Infinity, "learning_rate": 0.00017466760279313034, "loss": 11.1729, "step": 196490 }, { "epoch": 23.646209386281587, "grad_norm": Infinity, "learning_rate": 0.00017466507218493637, "loss": 11.1896, "step": 196500 }, { "epoch": 23.647412755716005, "grad_norm": Infinity, "learning_rate": 0.0001746625414686836, "loss": 11.2276, "step": 196510 }, { "epoch": 23.648616125150422, "grad_norm": Infinity, "learning_rate": 0.0001746600106443757, "loss": 11.1161, "step": 196520 }, { "epoch": 23.649819494584836, "grad_norm": Infinity, "learning_rate": 0.0001746574797120163, "loss": 11.0692, "step": 196530 }, { "epoch": 23.651022864019254, "grad_norm": Infinity, "learning_rate": 0.00017465494867160915, "loss": 11.0433, "step": 196540 }, { "epoch": 23.65222623345367, "grad_norm": Infinity, "learning_rate": 0.0001746524175231578, "loss": 11.1093, "step": 196550 }, { "epoch": 23.653429602888085, "grad_norm": Infinity, "learning_rate": 0.00017464988626666601, "loss": 11.0656, "step": 196560 }, { "epoch": 23.654632972322503, "grad_norm": Infinity, "learning_rate": 0.0001746473549021374, "loss": 11.2172, "step": 196570 }, { "epoch": 23.65583634175692, "grad_norm": Infinity, "learning_rate": 0.00017464482342957561, "loss": 11.099, "step": 196580 }, { "epoch": 23.657039711191334, "grad_norm": Infinity, "learning_rate": 0.00017464229184898438, "loss": 11.0274, "step": 196590 }, { "epoch": 23.658243080625752, "grad_norm": Infinity, "learning_rate": 0.00017463976016036728, "loss": 11.1531, "step": 196600 }, { "epoch": 23.65944645006017, "grad_norm": Infinity, "learning_rate": 0.00017463722836372805, "loss": 11.0268, "step": 196610 }, { "epoch": 23.660649819494584, "grad_norm": Infinity, "learning_rate": 0.0001746346964590703, "loss": 11.1401, "step": 196620 }, { "epoch": 23.661853188929, "grad_norm": Infinity, "learning_rate": 0.00017463216444639775, "loss": 11.1238, "step": 196630 }, { "epoch": 23.66305655836342, "grad_norm": Infinity, "learning_rate": 0.000174629632325714, "loss": 11.0019, "step": 196640 }, { "epoch": 23.664259927797833, "grad_norm": Infinity, "learning_rate": 0.00017462710009702275, "loss": 11.1642, "step": 196650 }, { "epoch": 23.66546329723225, "grad_norm": Infinity, "learning_rate": 0.00017462456776032767, "loss": 11.1533, "step": 196660 }, { "epoch": 23.666666666666668, "grad_norm": Infinity, "learning_rate": 0.0001746220353156324, "loss": 11.0911, "step": 196670 }, { "epoch": 23.667870036101082, "grad_norm": Infinity, "learning_rate": 0.00017461950276294064, "loss": 11.2071, "step": 196680 }, { "epoch": 23.6690734055355, "grad_norm": Infinity, "learning_rate": 0.000174616970102256, "loss": 11.1489, "step": 196690 }, { "epoch": 23.670276774969917, "grad_norm": Infinity, "learning_rate": 0.00017461443733358224, "loss": 11.1009, "step": 196700 }, { "epoch": 23.67148014440433, "grad_norm": Infinity, "learning_rate": 0.00017461190445692296, "loss": 11.268, "step": 196710 }, { "epoch": 23.67268351383875, "grad_norm": Infinity, "learning_rate": 0.0001746093714722818, "loss": 11.14, "step": 196720 }, { "epoch": 23.673886883273166, "grad_norm": Infinity, "learning_rate": 0.00017460683837966246, "loss": 11.1555, "step": 196730 }, { "epoch": 23.67509025270758, "grad_norm": Infinity, "learning_rate": 0.00017460430517906862, "loss": 11.0493, "step": 196740 }, { "epoch": 23.676293622141998, "grad_norm": Infinity, "learning_rate": 0.00017460177187050393, "loss": 11.0805, "step": 196750 }, { "epoch": 23.677496991576415, "grad_norm": Infinity, "learning_rate": 0.00017459923845397206, "loss": 11.2198, "step": 196760 }, { "epoch": 23.67870036101083, "grad_norm": Infinity, "learning_rate": 0.00017459670492947667, "loss": 11.041, "step": 196770 }, { "epoch": 23.679903730445247, "grad_norm": Infinity, "learning_rate": 0.00017459417129702144, "loss": 11.1872, "step": 196780 }, { "epoch": 23.681107099879664, "grad_norm": Infinity, "learning_rate": 0.00017459163755661, "loss": 11.0921, "step": 196790 }, { "epoch": 23.68231046931408, "grad_norm": Infinity, "learning_rate": 0.00017458910370824607, "loss": 11.1232, "step": 196800 }, { "epoch": 23.683513838748496, "grad_norm": Infinity, "learning_rate": 0.00017458656975193328, "loss": 11.1452, "step": 196810 }, { "epoch": 23.684717208182914, "grad_norm": Infinity, "learning_rate": 0.0001745840356876753, "loss": 11.143, "step": 196820 }, { "epoch": 23.685920577617328, "grad_norm": Infinity, "learning_rate": 0.00017458150151547584, "loss": 11.1584, "step": 196830 }, { "epoch": 23.687123947051745, "grad_norm": Infinity, "learning_rate": 0.0001745789672353385, "loss": 11.0752, "step": 196840 }, { "epoch": 23.688327316486163, "grad_norm": Infinity, "learning_rate": 0.000174576432847267, "loss": 11.0889, "step": 196850 }, { "epoch": 23.689530685920577, "grad_norm": Infinity, "learning_rate": 0.00017457389835126498, "loss": 11.1347, "step": 196860 }, { "epoch": 23.690734055354994, "grad_norm": Infinity, "learning_rate": 0.00017457136374733612, "loss": 11.1619, "step": 196870 }, { "epoch": 23.691937424789412, "grad_norm": Infinity, "learning_rate": 0.00017456882903548412, "loss": 11.1127, "step": 196880 }, { "epoch": 23.693140794223826, "grad_norm": Infinity, "learning_rate": 0.00017456629421571255, "loss": 11.0257, "step": 196890 }, { "epoch": 23.694344163658243, "grad_norm": Infinity, "learning_rate": 0.00017456375928802517, "loss": 11.1361, "step": 196900 }, { "epoch": 23.69554753309266, "grad_norm": Infinity, "learning_rate": 0.00017456122425242563, "loss": 11.1188, "step": 196910 }, { "epoch": 23.696750902527075, "grad_norm": Infinity, "learning_rate": 0.00017455868910891758, "loss": 11.0865, "step": 196920 }, { "epoch": 23.697954271961493, "grad_norm": Infinity, "learning_rate": 0.00017455615385750473, "loss": 11.0205, "step": 196930 }, { "epoch": 23.69915764139591, "grad_norm": Infinity, "learning_rate": 0.00017455361849819069, "loss": 11.1067, "step": 196940 }, { "epoch": 23.700361010830324, "grad_norm": Infinity, "learning_rate": 0.00017455108303097912, "loss": 11.1015, "step": 196950 }, { "epoch": 23.70156438026474, "grad_norm": Infinity, "learning_rate": 0.00017454854745587378, "loss": 11.0817, "step": 196960 }, { "epoch": 23.70276774969916, "grad_norm": Infinity, "learning_rate": 0.00017454601177287828, "loss": 11.1627, "step": 196970 }, { "epoch": 23.703971119133573, "grad_norm": Infinity, "learning_rate": 0.0001745434759819963, "loss": 11.1831, "step": 196980 }, { "epoch": 23.70517448856799, "grad_norm": Infinity, "learning_rate": 0.00017454094008323147, "loss": 11.1608, "step": 196990 }, { "epoch": 23.706377858002405, "grad_norm": Infinity, "learning_rate": 0.00017453840407658753, "loss": 11.0917, "step": 197000 }, { "epoch": 23.707581227436823, "grad_norm": Infinity, "learning_rate": 0.0001745358679620681, "loss": 11.1608, "step": 197010 }, { "epoch": 23.70878459687124, "grad_norm": Infinity, "learning_rate": 0.00017453333173967687, "loss": 10.9933, "step": 197020 }, { "epoch": 23.709987966305654, "grad_norm": Infinity, "learning_rate": 0.0001745307954094175, "loss": 11.1136, "step": 197030 }, { "epoch": 23.71119133574007, "grad_norm": Infinity, "learning_rate": 0.00017452825897129368, "loss": 11.0798, "step": 197040 }, { "epoch": 23.71239470517449, "grad_norm": Infinity, "learning_rate": 0.00017452572242530908, "loss": 11.1815, "step": 197050 }, { "epoch": 23.713598074608903, "grad_norm": Infinity, "learning_rate": 0.00017452318577146735, "loss": 11.0755, "step": 197060 }, { "epoch": 23.71480144404332, "grad_norm": Infinity, "learning_rate": 0.00017452064900977217, "loss": 11.1482, "step": 197070 }, { "epoch": 23.71600481347774, "grad_norm": Infinity, "learning_rate": 0.0001745181121402272, "loss": 11.1554, "step": 197080 }, { "epoch": 23.717208182912152, "grad_norm": Infinity, "learning_rate": 0.00017451557516283615, "loss": 11.2002, "step": 197090 }, { "epoch": 23.71841155234657, "grad_norm": Infinity, "learning_rate": 0.00017451303807760265, "loss": 11.1313, "step": 197100 }, { "epoch": 23.719614921780988, "grad_norm": Infinity, "learning_rate": 0.0001745105008845304, "loss": 11.1451, "step": 197110 }, { "epoch": 23.7208182912154, "grad_norm": Infinity, "learning_rate": 0.00017450796358362304, "loss": 11.1946, "step": 197120 }, { "epoch": 23.72202166064982, "grad_norm": Infinity, "learning_rate": 0.00017450542617488424, "loss": 11.2111, "step": 197130 }, { "epoch": 23.723225030084237, "grad_norm": Infinity, "learning_rate": 0.0001745028886583177, "loss": 11.0723, "step": 197140 }, { "epoch": 23.72442839951865, "grad_norm": Infinity, "learning_rate": 0.00017450035103392715, "loss": 11.3497, "step": 197150 }, { "epoch": 23.72563176895307, "grad_norm": Infinity, "learning_rate": 0.00017449781330171614, "loss": 11.1267, "step": 197160 }, { "epoch": 23.726835138387486, "grad_norm": Infinity, "learning_rate": 0.00017449527546168841, "loss": 11.1037, "step": 197170 }, { "epoch": 23.7280385078219, "grad_norm": Infinity, "learning_rate": 0.00017449273751384765, "loss": 11.1939, "step": 197180 }, { "epoch": 23.729241877256317, "grad_norm": Infinity, "learning_rate": 0.00017449019945819745, "loss": 11.2122, "step": 197190 }, { "epoch": 23.730445246690735, "grad_norm": Infinity, "learning_rate": 0.00017448766129474162, "loss": 11.1186, "step": 197200 }, { "epoch": 23.73164861612515, "grad_norm": Infinity, "learning_rate": 0.00017448512302348367, "loss": 11.0483, "step": 197210 }, { "epoch": 23.732851985559567, "grad_norm": Infinity, "learning_rate": 0.0001744825846444274, "loss": 11.142, "step": 197220 }, { "epoch": 23.734055354993984, "grad_norm": Infinity, "learning_rate": 0.00017448004615757646, "loss": 11.1176, "step": 197230 }, { "epoch": 23.735258724428398, "grad_norm": Infinity, "learning_rate": 0.00017447750756293448, "loss": 11.1994, "step": 197240 }, { "epoch": 23.736462093862816, "grad_norm": Infinity, "learning_rate": 0.00017447496886050515, "loss": 11.1696, "step": 197250 }, { "epoch": 23.737665463297233, "grad_norm": Infinity, "learning_rate": 0.0001744724300502922, "loss": 11.1203, "step": 197260 }, { "epoch": 23.738868832731647, "grad_norm": Infinity, "learning_rate": 0.00017446989113229918, "loss": 11.0952, "step": 197270 }, { "epoch": 23.740072202166065, "grad_norm": Infinity, "learning_rate": 0.00017446735210652992, "loss": 11.1079, "step": 197280 }, { "epoch": 23.741275571600482, "grad_norm": Infinity, "learning_rate": 0.00017446481297298796, "loss": 11.035, "step": 197290 }, { "epoch": 23.742478941034896, "grad_norm": Infinity, "learning_rate": 0.00017446227373167704, "loss": 11.2231, "step": 197300 }, { "epoch": 23.743682310469314, "grad_norm": Infinity, "learning_rate": 0.00017445973438260086, "loss": 11.1426, "step": 197310 }, { "epoch": 23.74488567990373, "grad_norm": Infinity, "learning_rate": 0.00017445719492576304, "loss": 11.1374, "step": 197320 }, { "epoch": 23.746089049338146, "grad_norm": Infinity, "learning_rate": 0.00017445465536116726, "loss": 11.1727, "step": 197330 }, { "epoch": 23.747292418772563, "grad_norm": Infinity, "learning_rate": 0.00017445211568881724, "loss": 11.125, "step": 197340 }, { "epoch": 23.74849578820698, "grad_norm": Infinity, "learning_rate": 0.00017444957590871663, "loss": 11.1961, "step": 197350 }, { "epoch": 23.749699157641395, "grad_norm": Infinity, "learning_rate": 0.00017444703602086906, "loss": 11.1705, "step": 197360 }, { "epoch": 23.750902527075812, "grad_norm": Infinity, "learning_rate": 0.00017444449602527828, "loss": 11.0648, "step": 197370 }, { "epoch": 23.75210589651023, "grad_norm": Infinity, "learning_rate": 0.00017444195592194791, "loss": 11.1219, "step": 197380 }, { "epoch": 23.753309265944644, "grad_norm": Infinity, "learning_rate": 0.00017443941571088168, "loss": 11.072, "step": 197390 }, { "epoch": 23.75451263537906, "grad_norm": Infinity, "learning_rate": 0.0001744368753920832, "loss": 11.2714, "step": 197400 }, { "epoch": 23.75571600481348, "grad_norm": Infinity, "learning_rate": 0.00017443433496555625, "loss": 11.0549, "step": 197410 }, { "epoch": 23.756919374247893, "grad_norm": Infinity, "learning_rate": 0.0001744317944313044, "loss": 11.0843, "step": 197420 }, { "epoch": 23.75812274368231, "grad_norm": Infinity, "learning_rate": 0.00017442925378933135, "loss": 11.1402, "step": 197430 }, { "epoch": 23.759326113116728, "grad_norm": Infinity, "learning_rate": 0.00017442671303964083, "loss": 11.1855, "step": 197440 }, { "epoch": 23.760529482551142, "grad_norm": Infinity, "learning_rate": 0.00017442417218223644, "loss": 10.9913, "step": 197450 }, { "epoch": 23.76173285198556, "grad_norm": Infinity, "learning_rate": 0.0001744216312171219, "loss": 11.1436, "step": 197460 }, { "epoch": 23.762936221419977, "grad_norm": Infinity, "learning_rate": 0.00017441909014430094, "loss": 11.1198, "step": 197470 }, { "epoch": 23.76413959085439, "grad_norm": Infinity, "learning_rate": 0.00017441654896377712, "loss": 11.1991, "step": 197480 }, { "epoch": 23.76534296028881, "grad_norm": Infinity, "learning_rate": 0.00017441400767555422, "loss": 11.088, "step": 197490 }, { "epoch": 23.766546329723226, "grad_norm": Infinity, "learning_rate": 0.00017441146627963588, "loss": 11.1659, "step": 197500 }, { "epoch": 23.76774969915764, "grad_norm": Infinity, "learning_rate": 0.00017440892477602575, "loss": 11.1147, "step": 197510 }, { "epoch": 23.768953068592058, "grad_norm": Infinity, "learning_rate": 0.00017440638316472754, "loss": 11.0198, "step": 197520 }, { "epoch": 23.770156438026476, "grad_norm": Infinity, "learning_rate": 0.00017440384144574495, "loss": 11.2263, "step": 197530 }, { "epoch": 23.77135980746089, "grad_norm": Infinity, "learning_rate": 0.0001744012996190816, "loss": 11.0934, "step": 197540 }, { "epoch": 23.772563176895307, "grad_norm": Infinity, "learning_rate": 0.00017439875768474124, "loss": 11.1262, "step": 197550 }, { "epoch": 23.773766546329725, "grad_norm": Infinity, "learning_rate": 0.00017439621564272745, "loss": 11.0539, "step": 197560 }, { "epoch": 23.77496991576414, "grad_norm": Infinity, "learning_rate": 0.00017439367349304403, "loss": 11.1165, "step": 197570 }, { "epoch": 23.776173285198556, "grad_norm": Infinity, "learning_rate": 0.00017439113123569454, "loss": 11.2575, "step": 197580 }, { "epoch": 23.777376654632974, "grad_norm": Infinity, "learning_rate": 0.00017438858887068275, "loss": 11.1105, "step": 197590 }, { "epoch": 23.778580024067388, "grad_norm": Infinity, "learning_rate": 0.0001743860463980123, "loss": 11.1292, "step": 197600 }, { "epoch": 23.779783393501805, "grad_norm": Infinity, "learning_rate": 0.00017438350381768688, "loss": 11.1761, "step": 197610 }, { "epoch": 23.780986762936223, "grad_norm": Infinity, "learning_rate": 0.00017438096112971016, "loss": 11.1877, "step": 197620 }, { "epoch": 23.782190132370637, "grad_norm": Infinity, "learning_rate": 0.00017437841833408584, "loss": 11.1328, "step": 197630 }, { "epoch": 23.783393501805055, "grad_norm": Infinity, "learning_rate": 0.00017437587543081757, "loss": 10.9783, "step": 197640 }, { "epoch": 23.784596871239472, "grad_norm": Infinity, "learning_rate": 0.00017437333241990906, "loss": 11.0719, "step": 197650 }, { "epoch": 23.785800240673886, "grad_norm": Infinity, "learning_rate": 0.00017437078930136396, "loss": 11.1263, "step": 197660 }, { "epoch": 23.787003610108304, "grad_norm": Infinity, "learning_rate": 0.00017436824607518597, "loss": 11.1569, "step": 197670 }, { "epoch": 23.78820697954272, "grad_norm": Infinity, "learning_rate": 0.00017436570274137876, "loss": 11.203, "step": 197680 }, { "epoch": 23.789410348977135, "grad_norm": Infinity, "learning_rate": 0.000174363159299946, "loss": 11.1132, "step": 197690 }, { "epoch": 23.790613718411553, "grad_norm": Infinity, "learning_rate": 0.00017436061575089141, "loss": 11.2019, "step": 197700 }, { "epoch": 23.79181708784597, "grad_norm": Infinity, "learning_rate": 0.00017435807209421867, "loss": 11.126, "step": 197710 }, { "epoch": 23.793020457280385, "grad_norm": Infinity, "learning_rate": 0.00017435552832993143, "loss": 11.1907, "step": 197720 }, { "epoch": 23.794223826714802, "grad_norm": Infinity, "learning_rate": 0.0001743529844580334, "loss": 11.1937, "step": 197730 }, { "epoch": 23.79542719614922, "grad_norm": Infinity, "learning_rate": 0.0001743504404785282, "loss": 11.1883, "step": 197740 }, { "epoch": 23.796630565583634, "grad_norm": Infinity, "learning_rate": 0.00017434789639141958, "loss": 11.2438, "step": 197750 }, { "epoch": 23.79783393501805, "grad_norm": Infinity, "learning_rate": 0.0001743453521967112, "loss": 11.3198, "step": 197760 }, { "epoch": 23.799037304452465, "grad_norm": Infinity, "learning_rate": 0.00017434280789440672, "loss": 11.1848, "step": 197770 }, { "epoch": 23.800240673886883, "grad_norm": Infinity, "learning_rate": 0.00017434026348450986, "loss": 11.1922, "step": 197780 }, { "epoch": 23.8014440433213, "grad_norm": Infinity, "learning_rate": 0.00017433771896702428, "loss": 11.047, "step": 197790 }, { "epoch": 23.802647412755714, "grad_norm": Infinity, "learning_rate": 0.00017433517434195368, "loss": 11.1562, "step": 197800 }, { "epoch": 23.803850782190132, "grad_norm": Infinity, "learning_rate": 0.0001743326296093017, "loss": 11.1043, "step": 197810 }, { "epoch": 23.80505415162455, "grad_norm": Infinity, "learning_rate": 0.0001743300847690721, "loss": 11.2207, "step": 197820 }, { "epoch": 23.806257521058964, "grad_norm": Infinity, "learning_rate": 0.00017432753982126847, "loss": 11.1137, "step": 197830 }, { "epoch": 23.80746089049338, "grad_norm": Infinity, "learning_rate": 0.00017432499476589456, "loss": 11.1706, "step": 197840 }, { "epoch": 23.8086642599278, "grad_norm": Infinity, "learning_rate": 0.00017432244960295402, "loss": 11.1371, "step": 197850 }, { "epoch": 23.809867629362213, "grad_norm": Infinity, "learning_rate": 0.00017431990433245057, "loss": 11.3131, "step": 197860 }, { "epoch": 23.81107099879663, "grad_norm": Infinity, "learning_rate": 0.00017431735895438787, "loss": 11.2776, "step": 197870 }, { "epoch": 23.812274368231048, "grad_norm": Infinity, "learning_rate": 0.0001743148134687696, "loss": 11.13, "step": 197880 }, { "epoch": 23.813477737665462, "grad_norm": Infinity, "learning_rate": 0.00017431226787559942, "loss": 11.2197, "step": 197890 }, { "epoch": 23.81468110709988, "grad_norm": Infinity, "learning_rate": 0.00017430972217488103, "loss": 11.1627, "step": 197900 }, { "epoch": 23.815884476534297, "grad_norm": Infinity, "learning_rate": 0.00017430717636661815, "loss": 11.0682, "step": 197910 }, { "epoch": 23.81708784596871, "grad_norm": Infinity, "learning_rate": 0.00017430463045081446, "loss": 11.2374, "step": 197920 }, { "epoch": 23.81829121540313, "grad_norm": Infinity, "learning_rate": 0.0001743020844274736, "loss": 11.1987, "step": 197930 }, { "epoch": 23.819494584837546, "grad_norm": Infinity, "learning_rate": 0.0001742995382965993, "loss": 11.0672, "step": 197940 }, { "epoch": 23.82069795427196, "grad_norm": Infinity, "learning_rate": 0.0001742969920581952, "loss": 11.2038, "step": 197950 }, { "epoch": 23.821901323706378, "grad_norm": Infinity, "learning_rate": 0.000174294445712265, "loss": 11.1895, "step": 197960 }, { "epoch": 23.823104693140795, "grad_norm": Infinity, "learning_rate": 0.00017429189925881244, "loss": 11.2128, "step": 197970 }, { "epoch": 23.82430806257521, "grad_norm": Infinity, "learning_rate": 0.00017428935269784112, "loss": 11.092, "step": 197980 }, { "epoch": 23.825511432009627, "grad_norm": Infinity, "learning_rate": 0.00017428680602935478, "loss": 11.1366, "step": 197990 }, { "epoch": 23.826714801444044, "grad_norm": Infinity, "learning_rate": 0.00017428425925335712, "loss": 11.2333, "step": 198000 }, { "epoch": 23.82791817087846, "grad_norm": Infinity, "learning_rate": 0.00017428171236985173, "loss": 11.0292, "step": 198010 }, { "epoch": 23.829121540312876, "grad_norm": Infinity, "learning_rate": 0.0001742791653788424, "loss": 11.1469, "step": 198020 }, { "epoch": 23.830324909747294, "grad_norm": Infinity, "learning_rate": 0.00017427661828033278, "loss": 11.1232, "step": 198030 }, { "epoch": 23.831528279181708, "grad_norm": Infinity, "learning_rate": 0.00017427407107432655, "loss": 11.1536, "step": 198040 }, { "epoch": 23.832731648616125, "grad_norm": Infinity, "learning_rate": 0.00017427152376082742, "loss": 11.16, "step": 198050 }, { "epoch": 23.833935018050543, "grad_norm": Infinity, "learning_rate": 0.00017426897633983902, "loss": 11.1687, "step": 198060 }, { "epoch": 23.835138387484957, "grad_norm": Infinity, "learning_rate": 0.00017426642881136512, "loss": 11.1294, "step": 198070 }, { "epoch": 23.836341756919374, "grad_norm": Infinity, "learning_rate": 0.00017426388117540935, "loss": 11.0826, "step": 198080 }, { "epoch": 23.837545126353792, "grad_norm": Infinity, "learning_rate": 0.00017426133343197538, "loss": 11.0565, "step": 198090 }, { "epoch": 23.838748495788206, "grad_norm": Infinity, "learning_rate": 0.00017425878558106694, "loss": 11.1567, "step": 198100 }, { "epoch": 23.839951865222623, "grad_norm": Infinity, "learning_rate": 0.00017425623762268771, "loss": 11.0631, "step": 198110 }, { "epoch": 23.84115523465704, "grad_norm": Infinity, "learning_rate": 0.00017425368955684137, "loss": 11.221, "step": 198120 }, { "epoch": 23.842358604091455, "grad_norm": Infinity, "learning_rate": 0.00017425114138353161, "loss": 11.1611, "step": 198130 }, { "epoch": 23.843561973525873, "grad_norm": Infinity, "learning_rate": 0.0001742485931027621, "loss": 11.079, "step": 198140 }, { "epoch": 23.84476534296029, "grad_norm": Infinity, "learning_rate": 0.00017424604471453657, "loss": 11.0097, "step": 198150 }, { "epoch": 23.845968712394704, "grad_norm": Infinity, "learning_rate": 0.00017424349621885868, "loss": 11.1486, "step": 198160 }, { "epoch": 23.84717208182912, "grad_norm": Infinity, "learning_rate": 0.0001742409476157321, "loss": 11.0677, "step": 198170 }, { "epoch": 23.84837545126354, "grad_norm": Infinity, "learning_rate": 0.00017423839890516054, "loss": 11.1365, "step": 198180 }, { "epoch": 23.849578820697953, "grad_norm": Infinity, "learning_rate": 0.00017423585008714772, "loss": 11.1626, "step": 198190 }, { "epoch": 23.85078219013237, "grad_norm": Infinity, "learning_rate": 0.00017423330116169726, "loss": 11.1317, "step": 198200 }, { "epoch": 23.85198555956679, "grad_norm": Infinity, "learning_rate": 0.0001742307521288129, "loss": 11.1111, "step": 198210 }, { "epoch": 23.853188929001202, "grad_norm": Infinity, "learning_rate": 0.0001742282029884983, "loss": 11.2019, "step": 198220 }, { "epoch": 23.85439229843562, "grad_norm": Infinity, "learning_rate": 0.0001742256537407572, "loss": 11.1958, "step": 198230 }, { "epoch": 23.855595667870038, "grad_norm": Infinity, "learning_rate": 0.00017422310438559323, "loss": 11.1331, "step": 198240 }, { "epoch": 23.85679903730445, "grad_norm": Infinity, "learning_rate": 0.0001742205549230101, "loss": 11.1346, "step": 198250 }, { "epoch": 23.85800240673887, "grad_norm": Infinity, "learning_rate": 0.0001742180053530115, "loss": 11.0958, "step": 198260 }, { "epoch": 23.859205776173287, "grad_norm": Infinity, "learning_rate": 0.00017421545567560115, "loss": 11.1356, "step": 198270 }, { "epoch": 23.8604091456077, "grad_norm": Infinity, "learning_rate": 0.0001742129058907827, "loss": 11.134, "step": 198280 }, { "epoch": 23.86161251504212, "grad_norm": Infinity, "learning_rate": 0.00017421035599855981, "loss": 11.1219, "step": 198290 }, { "epoch": 23.862815884476536, "grad_norm": Infinity, "learning_rate": 0.00017420780599893627, "loss": 11.2187, "step": 198300 }, { "epoch": 23.86401925391095, "grad_norm": Infinity, "learning_rate": 0.0001742052558919157, "loss": 11.1758, "step": 198310 }, { "epoch": 23.865222623345367, "grad_norm": Infinity, "learning_rate": 0.0001742027056775018, "loss": 11.1535, "step": 198320 }, { "epoch": 23.866425992779785, "grad_norm": Infinity, "learning_rate": 0.00017420015535569826, "loss": 10.9779, "step": 198330 }, { "epoch": 23.8676293622142, "grad_norm": Infinity, "learning_rate": 0.00017419760492650877, "loss": 11.1329, "step": 198340 }, { "epoch": 23.868832731648617, "grad_norm": Infinity, "learning_rate": 0.000174195054389937, "loss": 11.2001, "step": 198350 }, { "epoch": 23.870036101083034, "grad_norm": Infinity, "learning_rate": 0.00017419250374598673, "loss": 11.1675, "step": 198360 }, { "epoch": 23.871239470517448, "grad_norm": Infinity, "learning_rate": 0.00017418995299466156, "loss": 11.1709, "step": 198370 }, { "epoch": 23.872442839951866, "grad_norm": Infinity, "learning_rate": 0.0001741874021359652, "loss": 11.0995, "step": 198380 }, { "epoch": 23.87364620938628, "grad_norm": Infinity, "learning_rate": 0.00017418485116990135, "loss": 11.0929, "step": 198390 }, { "epoch": 23.874849578820697, "grad_norm": Infinity, "learning_rate": 0.00017418230009647373, "loss": 11.0156, "step": 198400 }, { "epoch": 23.876052948255115, "grad_norm": Infinity, "learning_rate": 0.000174179748915686, "loss": 11.1221, "step": 198410 }, { "epoch": 23.87725631768953, "grad_norm": Infinity, "learning_rate": 0.00017417719762754185, "loss": 11.1425, "step": 198420 }, { "epoch": 23.878459687123947, "grad_norm": Infinity, "learning_rate": 0.000174174646232045, "loss": 11.2075, "step": 198430 }, { "epoch": 23.879663056558364, "grad_norm": Infinity, "learning_rate": 0.00017417209472919912, "loss": 11.0732, "step": 198440 }, { "epoch": 23.880866425992778, "grad_norm": Infinity, "learning_rate": 0.0001741695431190079, "loss": 11.0609, "step": 198450 }, { "epoch": 23.882069795427196, "grad_norm": Infinity, "learning_rate": 0.000174166991401475, "loss": 10.9782, "step": 198460 }, { "epoch": 23.883273164861613, "grad_norm": Infinity, "learning_rate": 0.0001741644395766042, "loss": 11.2292, "step": 198470 }, { "epoch": 23.884476534296027, "grad_norm": Infinity, "learning_rate": 0.00017416188764439913, "loss": 11.1384, "step": 198480 }, { "epoch": 23.885679903730445, "grad_norm": Infinity, "learning_rate": 0.00017415933560486352, "loss": 11.1514, "step": 198490 }, { "epoch": 23.886883273164862, "grad_norm": Infinity, "learning_rate": 0.00017415678345800102, "loss": 11.223, "step": 198500 }, { "epoch": 23.888086642599276, "grad_norm": Infinity, "learning_rate": 0.00017415423120381536, "loss": 10.9908, "step": 198510 }, { "epoch": 23.889290012033694, "grad_norm": Infinity, "learning_rate": 0.00017415167884231018, "loss": 11.1762, "step": 198520 }, { "epoch": 23.89049338146811, "grad_norm": Infinity, "learning_rate": 0.00017414912637348924, "loss": 11.1195, "step": 198530 }, { "epoch": 23.891696750902526, "grad_norm": Infinity, "learning_rate": 0.00017414657379735622, "loss": 11.0932, "step": 198540 }, { "epoch": 23.892900120336943, "grad_norm": Infinity, "learning_rate": 0.00017414402111391478, "loss": 11.0989, "step": 198550 }, { "epoch": 23.89410348977136, "grad_norm": Infinity, "learning_rate": 0.00017414146832316866, "loss": 11.1422, "step": 198560 }, { "epoch": 23.895306859205775, "grad_norm": Infinity, "learning_rate": 0.0001741389154251215, "loss": 11.0829, "step": 198570 }, { "epoch": 23.896510228640192, "grad_norm": Infinity, "learning_rate": 0.00017413636241977708, "loss": 11.1815, "step": 198580 }, { "epoch": 23.89771359807461, "grad_norm": Infinity, "learning_rate": 0.00017413380930713897, "loss": 11.0756, "step": 198590 }, { "epoch": 23.898916967509024, "grad_norm": Infinity, "learning_rate": 0.00017413125608721096, "loss": 11.0634, "step": 198600 }, { "epoch": 23.90012033694344, "grad_norm": Infinity, "learning_rate": 0.00017412870275999675, "loss": 11.2037, "step": 198610 }, { "epoch": 23.90132370637786, "grad_norm": Infinity, "learning_rate": 0.00017412614932549997, "loss": 11.1078, "step": 198620 }, { "epoch": 23.902527075812273, "grad_norm": Infinity, "learning_rate": 0.00017412359578372437, "loss": 11.1544, "step": 198630 }, { "epoch": 23.90373044524669, "grad_norm": Infinity, "learning_rate": 0.0001741210421346736, "loss": 11.2907, "step": 198640 }, { "epoch": 23.904933814681108, "grad_norm": Infinity, "learning_rate": 0.00017411848837835142, "loss": 11.1491, "step": 198650 }, { "epoch": 23.906137184115522, "grad_norm": Infinity, "learning_rate": 0.00017411593451476147, "loss": 11.1781, "step": 198660 }, { "epoch": 23.90734055354994, "grad_norm": Infinity, "learning_rate": 0.00017411338054390747, "loss": 11.1698, "step": 198670 }, { "epoch": 23.908543922984357, "grad_norm": Infinity, "learning_rate": 0.0001741108264657931, "loss": 11.1405, "step": 198680 }, { "epoch": 23.90974729241877, "grad_norm": Infinity, "learning_rate": 0.00017410827228042205, "loss": 11.1147, "step": 198690 }, { "epoch": 23.91095066185319, "grad_norm": Infinity, "learning_rate": 0.00017410571798779807, "loss": 11.0971, "step": 198700 }, { "epoch": 23.912154031287606, "grad_norm": Infinity, "learning_rate": 0.0001741031635879248, "loss": 11.0397, "step": 198710 }, { "epoch": 23.91335740072202, "grad_norm": Infinity, "learning_rate": 0.00017410060908080598, "loss": 11.1268, "step": 198720 }, { "epoch": 23.914560770156438, "grad_norm": Infinity, "learning_rate": 0.00017409805446644527, "loss": 11.1352, "step": 198730 }, { "epoch": 23.915764139590856, "grad_norm": Infinity, "learning_rate": 0.00017409549974484637, "loss": 11.1539, "step": 198740 }, { "epoch": 23.91696750902527, "grad_norm": Infinity, "learning_rate": 0.000174092944916013, "loss": 11.1898, "step": 198750 }, { "epoch": 23.918170878459687, "grad_norm": Infinity, "learning_rate": 0.00017409038997994881, "loss": 11.1078, "step": 198760 }, { "epoch": 23.919374247894105, "grad_norm": Infinity, "learning_rate": 0.00017408783493665756, "loss": 11.1321, "step": 198770 }, { "epoch": 23.92057761732852, "grad_norm": Infinity, "learning_rate": 0.00017408527978614294, "loss": 11.1603, "step": 198780 }, { "epoch": 23.921780986762936, "grad_norm": Infinity, "learning_rate": 0.00017408272452840861, "loss": 11.1587, "step": 198790 }, { "epoch": 23.922984356197354, "grad_norm": Infinity, "learning_rate": 0.0001740801691634583, "loss": 11.2889, "step": 198800 }, { "epoch": 23.924187725631768, "grad_norm": Infinity, "learning_rate": 0.0001740776136912957, "loss": 11.1631, "step": 198810 }, { "epoch": 23.925391095066185, "grad_norm": Infinity, "learning_rate": 0.00017407505811192447, "loss": 11.1012, "step": 198820 }, { "epoch": 23.926594464500603, "grad_norm": Infinity, "learning_rate": 0.00017407250242534836, "loss": 11.1792, "step": 198830 }, { "epoch": 23.927797833935017, "grad_norm": Infinity, "learning_rate": 0.00017406994663157107, "loss": 11.0976, "step": 198840 }, { "epoch": 23.929001203369435, "grad_norm": Infinity, "learning_rate": 0.00017406739073059626, "loss": 11.1242, "step": 198850 }, { "epoch": 23.930204572803852, "grad_norm": Infinity, "learning_rate": 0.00017406483472242763, "loss": 11.0359, "step": 198860 }, { "epoch": 23.931407942238266, "grad_norm": Infinity, "learning_rate": 0.00017406227860706892, "loss": 11.0949, "step": 198870 }, { "epoch": 23.932611311672684, "grad_norm": Infinity, "learning_rate": 0.00017405972238452382, "loss": 11.179, "step": 198880 }, { "epoch": 23.9338146811071, "grad_norm": Infinity, "learning_rate": 0.000174057166054796, "loss": 11.2209, "step": 198890 }, { "epoch": 23.935018050541515, "grad_norm": Infinity, "learning_rate": 0.00017405460961788917, "loss": 11.3049, "step": 198900 }, { "epoch": 23.936221419975933, "grad_norm": Infinity, "learning_rate": 0.00017405205307380705, "loss": 11.2463, "step": 198910 }, { "epoch": 23.93742478941035, "grad_norm": Infinity, "learning_rate": 0.00017404949642255333, "loss": 11.0641, "step": 198920 }, { "epoch": 23.938628158844764, "grad_norm": Infinity, "learning_rate": 0.0001740469396641317, "loss": 11.1371, "step": 198930 }, { "epoch": 23.939831528279182, "grad_norm": Infinity, "learning_rate": 0.0001740443827985459, "loss": 11.1618, "step": 198940 }, { "epoch": 23.9410348977136, "grad_norm": Infinity, "learning_rate": 0.00017404182582579953, "loss": 11.248, "step": 198950 }, { "epoch": 23.942238267148014, "grad_norm": Infinity, "learning_rate": 0.0001740392687458964, "loss": 11.1252, "step": 198960 }, { "epoch": 23.94344163658243, "grad_norm": Infinity, "learning_rate": 0.00017403671155884017, "loss": 11.0593, "step": 198970 }, { "epoch": 23.94464500601685, "grad_norm": Infinity, "learning_rate": 0.0001740341542646345, "loss": 11.062, "step": 198980 }, { "epoch": 23.945848375451263, "grad_norm": Infinity, "learning_rate": 0.00017403159686328316, "loss": 11.0267, "step": 198990 }, { "epoch": 23.94705174488568, "grad_norm": Infinity, "learning_rate": 0.00017402903935478982, "loss": 10.9414, "step": 199000 }, { "epoch": 23.948255114320098, "grad_norm": Infinity, "learning_rate": 0.0001740264817391582, "loss": 11.092, "step": 199010 }, { "epoch": 23.949458483754512, "grad_norm": Infinity, "learning_rate": 0.00017402392401639198, "loss": 11.0984, "step": 199020 }, { "epoch": 23.95066185318893, "grad_norm": Infinity, "learning_rate": 0.00017402136618649484, "loss": 11.2201, "step": 199030 }, { "epoch": 23.951865222623347, "grad_norm": Infinity, "learning_rate": 0.0001740188082494705, "loss": 11.2339, "step": 199040 }, { "epoch": 23.95306859205776, "grad_norm": Infinity, "learning_rate": 0.00017401625020532272, "loss": 11.2408, "step": 199050 }, { "epoch": 23.95427196149218, "grad_norm": Infinity, "learning_rate": 0.0001740136920540551, "loss": 11.1132, "step": 199060 }, { "epoch": 23.955475330926596, "grad_norm": Infinity, "learning_rate": 0.00017401113379567144, "loss": 11.1311, "step": 199070 }, { "epoch": 23.95667870036101, "grad_norm": Infinity, "learning_rate": 0.00017400857543017537, "loss": 11.2444, "step": 199080 }, { "epoch": 23.957882069795428, "grad_norm": Infinity, "learning_rate": 0.00017400601695757064, "loss": 11.1296, "step": 199090 }, { "epoch": 23.959085439229845, "grad_norm": Infinity, "learning_rate": 0.00017400345837786092, "loss": 11.2244, "step": 199100 }, { "epoch": 23.96028880866426, "grad_norm": Infinity, "learning_rate": 0.0001740008996910499, "loss": 11.1838, "step": 199110 }, { "epoch": 23.961492178098677, "grad_norm": Infinity, "learning_rate": 0.00017399834089714132, "loss": 11.1187, "step": 199120 }, { "epoch": 23.96269554753309, "grad_norm": Infinity, "learning_rate": 0.0001739957819961389, "loss": 11.1451, "step": 199130 }, { "epoch": 23.96389891696751, "grad_norm": Infinity, "learning_rate": 0.0001739932229880463, "loss": 11.1041, "step": 199140 }, { "epoch": 23.965102286401926, "grad_norm": Infinity, "learning_rate": 0.00017399066387286722, "loss": 11.1991, "step": 199150 }, { "epoch": 23.96630565583634, "grad_norm": Infinity, "learning_rate": 0.00017398810465060543, "loss": 11.2304, "step": 199160 }, { "epoch": 23.967509025270758, "grad_norm": Infinity, "learning_rate": 0.00017398554532126453, "loss": 11.1529, "step": 199170 }, { "epoch": 23.968712394705175, "grad_norm": Infinity, "learning_rate": 0.0001739829858848483, "loss": 11.2541, "step": 199180 }, { "epoch": 23.96991576413959, "grad_norm": Infinity, "learning_rate": 0.00017398042634136042, "loss": 11.1097, "step": 199190 }, { "epoch": 23.971119133574007, "grad_norm": Infinity, "learning_rate": 0.0001739778666908046, "loss": 11.2848, "step": 199200 }, { "epoch": 23.972322503008424, "grad_norm": Infinity, "learning_rate": 0.00017397530693318453, "loss": 11.1338, "step": 199210 }, { "epoch": 23.97352587244284, "grad_norm": Infinity, "learning_rate": 0.00017397274706850395, "loss": 11.2082, "step": 199220 }, { "epoch": 23.974729241877256, "grad_norm": Infinity, "learning_rate": 0.00017397018709676654, "loss": 11.1871, "step": 199230 }, { "epoch": 23.975932611311674, "grad_norm": Infinity, "learning_rate": 0.00017396762701797598, "loss": 11.1451, "step": 199240 }, { "epoch": 23.977135980746088, "grad_norm": Infinity, "learning_rate": 0.00017396506683213604, "loss": 11.168, "step": 199250 }, { "epoch": 23.978339350180505, "grad_norm": Infinity, "learning_rate": 0.00017396250653925038, "loss": 11.2314, "step": 199260 }, { "epoch": 23.979542719614923, "grad_norm": Infinity, "learning_rate": 0.00017395994613932268, "loss": 11.0986, "step": 199270 }, { "epoch": 23.980746089049337, "grad_norm": Infinity, "learning_rate": 0.0001739573856323567, "loss": 11.2303, "step": 199280 }, { "epoch": 23.981949458483754, "grad_norm": Infinity, "learning_rate": 0.0001739548250183561, "loss": 11.0996, "step": 199290 }, { "epoch": 23.983152827918172, "grad_norm": Infinity, "learning_rate": 0.00017395226429732462, "loss": 11.165, "step": 199300 }, { "epoch": 23.984356197352586, "grad_norm": Infinity, "learning_rate": 0.000173949703469266, "loss": 11.1665, "step": 199310 }, { "epoch": 23.985559566787003, "grad_norm": Infinity, "learning_rate": 0.00017394714253418387, "loss": 11.0457, "step": 199320 }, { "epoch": 23.98676293622142, "grad_norm": Infinity, "learning_rate": 0.00017394458149208196, "loss": 11.1679, "step": 199330 }, { "epoch": 23.987966305655835, "grad_norm": Infinity, "learning_rate": 0.00017394202034296398, "loss": 11.1186, "step": 199340 }, { "epoch": 23.989169675090253, "grad_norm": Infinity, "learning_rate": 0.00017393945908683364, "loss": 11.1849, "step": 199350 }, { "epoch": 23.99037304452467, "grad_norm": Infinity, "learning_rate": 0.00017393689772369467, "loss": 11.1141, "step": 199360 }, { "epoch": 23.991576413959084, "grad_norm": Infinity, "learning_rate": 0.00017393433625355074, "loss": 11.1251, "step": 199370 }, { "epoch": 23.9927797833935, "grad_norm": Infinity, "learning_rate": 0.00017393177467640557, "loss": 11.302, "step": 199380 }, { "epoch": 23.99398315282792, "grad_norm": Infinity, "learning_rate": 0.00017392921299226287, "loss": 11.1394, "step": 199390 }, { "epoch": 23.995186522262333, "grad_norm": Infinity, "learning_rate": 0.00017392665120112634, "loss": 11.102, "step": 199400 }, { "epoch": 23.99638989169675, "grad_norm": Infinity, "learning_rate": 0.00017392408930299973, "loss": 11.0624, "step": 199410 }, { "epoch": 23.99759326113117, "grad_norm": Infinity, "learning_rate": 0.0001739215272978867, "loss": 11.2084, "step": 199420 }, { "epoch": 23.998796630565582, "grad_norm": Infinity, "learning_rate": 0.00017391896518579092, "loss": 11.1525, "step": 199430 }, { "epoch": 24.0, "grad_norm": Infinity, "learning_rate": 0.00017391640296671618, "loss": 10.9994, "step": 199440 }, { "epoch": 24.0, "eval_loss": 11.139605522155762, "eval_runtime": 120.0591, "eval_samples_per_second": 61.528, "eval_steps_per_second": 7.696, "step": 199440 }, { "epoch": 24.001203369434418, "grad_norm": Infinity, "learning_rate": 0.00017391384064066617, "loss": 11.0209, "step": 199450 }, { "epoch": 24.00240673886883, "grad_norm": Infinity, "learning_rate": 0.00017391127820764457, "loss": 11.2911, "step": 199460 }, { "epoch": 24.00361010830325, "grad_norm": Infinity, "learning_rate": 0.0001739087156676551, "loss": 11.2017, "step": 199470 }, { "epoch": 24.004813477737667, "grad_norm": Infinity, "learning_rate": 0.00017390615302070148, "loss": 11.1332, "step": 199480 }, { "epoch": 24.00601684717208, "grad_norm": Infinity, "learning_rate": 0.0001739035902667874, "loss": 11.189, "step": 199490 }, { "epoch": 24.0072202166065, "grad_norm": Infinity, "learning_rate": 0.00017390102740591658, "loss": 11.2744, "step": 199500 }, { "epoch": 24.008423586040916, "grad_norm": Infinity, "learning_rate": 0.00017389846443809271, "loss": 11.1867, "step": 199510 }, { "epoch": 24.00962695547533, "grad_norm": Infinity, "learning_rate": 0.00017389590136331954, "loss": 11.0673, "step": 199520 }, { "epoch": 24.010830324909747, "grad_norm": Infinity, "learning_rate": 0.00017389333818160078, "loss": 11.0787, "step": 199530 }, { "epoch": 24.012033694344165, "grad_norm": Infinity, "learning_rate": 0.00017389077489294007, "loss": 11.2569, "step": 199540 }, { "epoch": 24.01323706377858, "grad_norm": Infinity, "learning_rate": 0.0001738882114973412, "loss": 11.1808, "step": 199550 }, { "epoch": 24.014440433212997, "grad_norm": Infinity, "learning_rate": 0.00017388564799480783, "loss": 11.1974, "step": 199560 }, { "epoch": 24.015643802647414, "grad_norm": Infinity, "learning_rate": 0.00017388308438534369, "loss": 11.228, "step": 199570 }, { "epoch": 24.016847172081828, "grad_norm": Infinity, "learning_rate": 0.0001738805206689525, "loss": 11.0196, "step": 199580 }, { "epoch": 24.018050541516246, "grad_norm": Infinity, "learning_rate": 0.0001738779568456379, "loss": 11.1351, "step": 199590 }, { "epoch": 24.019253910950663, "grad_norm": Infinity, "learning_rate": 0.0001738753929154037, "loss": 11.1036, "step": 199600 }, { "epoch": 24.020457280385077, "grad_norm": Infinity, "learning_rate": 0.00017387282887825357, "loss": 11.0743, "step": 199610 }, { "epoch": 24.021660649819495, "grad_norm": Infinity, "learning_rate": 0.0001738702647341912, "loss": 11.2657, "step": 199620 }, { "epoch": 24.022864019253912, "grad_norm": Infinity, "learning_rate": 0.00017386770048322033, "loss": 11.1197, "step": 199630 }, { "epoch": 24.024067388688326, "grad_norm": Infinity, "learning_rate": 0.00017386513612534468, "loss": 11.1049, "step": 199640 }, { "epoch": 24.025270758122744, "grad_norm": Infinity, "learning_rate": 0.0001738625716605679, "loss": 11.0684, "step": 199650 }, { "epoch": 24.02647412755716, "grad_norm": Infinity, "learning_rate": 0.00017386000708889375, "loss": 11.21, "step": 199660 }, { "epoch": 24.027677496991576, "grad_norm": Infinity, "learning_rate": 0.00017385744241032596, "loss": 11.1931, "step": 199670 }, { "epoch": 24.028880866425993, "grad_norm": Infinity, "learning_rate": 0.0001738548776248682, "loss": 11.1807, "step": 199680 }, { "epoch": 24.03008423586041, "grad_norm": Infinity, "learning_rate": 0.0001738523127325242, "loss": 11.1758, "step": 199690 }, { "epoch": 24.031287605294825, "grad_norm": Infinity, "learning_rate": 0.00017384974773329768, "loss": 11.0785, "step": 199700 }, { "epoch": 24.032490974729242, "grad_norm": Infinity, "learning_rate": 0.00017384718262719232, "loss": 11.1566, "step": 199710 }, { "epoch": 24.03369434416366, "grad_norm": Infinity, "learning_rate": 0.00017384461741421186, "loss": 11.1568, "step": 199720 }, { "epoch": 24.034897713598074, "grad_norm": Infinity, "learning_rate": 0.00017384205209436003, "loss": 11.142, "step": 199730 }, { "epoch": 24.03610108303249, "grad_norm": Infinity, "learning_rate": 0.0001738394866676405, "loss": 11.1465, "step": 199740 }, { "epoch": 24.03730445246691, "grad_norm": Infinity, "learning_rate": 0.00017383692113405702, "loss": 11.1593, "step": 199750 }, { "epoch": 24.038507821901323, "grad_norm": Infinity, "learning_rate": 0.00017383435549361327, "loss": 11.1618, "step": 199760 }, { "epoch": 24.03971119133574, "grad_norm": Infinity, "learning_rate": 0.00017383178974631297, "loss": 11.1748, "step": 199770 }, { "epoch": 24.040914560770158, "grad_norm": Infinity, "learning_rate": 0.00017382922389215985, "loss": 11.2459, "step": 199780 }, { "epoch": 24.042117930204572, "grad_norm": Infinity, "learning_rate": 0.00017382665793115763, "loss": 11.1858, "step": 199790 }, { "epoch": 24.04332129963899, "grad_norm": Infinity, "learning_rate": 0.00017382409186331, "loss": 11.2074, "step": 199800 }, { "epoch": 24.044524669073404, "grad_norm": Infinity, "learning_rate": 0.00017382152568862067, "loss": 11.2532, "step": 199810 }, { "epoch": 24.04572803850782, "grad_norm": Infinity, "learning_rate": 0.00017381895940709336, "loss": 11.2359, "step": 199820 }, { "epoch": 24.04693140794224, "grad_norm": Infinity, "learning_rate": 0.00017381639301873182, "loss": 11.1503, "step": 199830 }, { "epoch": 24.048134777376653, "grad_norm": Infinity, "learning_rate": 0.00017381382652353973, "loss": 11.1877, "step": 199840 }, { "epoch": 24.04933814681107, "grad_norm": Infinity, "learning_rate": 0.00017381125992152078, "loss": 11.1675, "step": 199850 }, { "epoch": 24.050541516245488, "grad_norm": Infinity, "learning_rate": 0.00017380869321267873, "loss": 11.109, "step": 199860 }, { "epoch": 24.051744885679902, "grad_norm": Infinity, "learning_rate": 0.0001738061263970173, "loss": 11.2008, "step": 199870 }, { "epoch": 24.05294825511432, "grad_norm": Infinity, "learning_rate": 0.00017380355947454014, "loss": 11.0457, "step": 199880 }, { "epoch": 24.054151624548737, "grad_norm": Infinity, "learning_rate": 0.00017380099244525106, "loss": 11.141, "step": 199890 }, { "epoch": 24.05535499398315, "grad_norm": Infinity, "learning_rate": 0.00017379842530915366, "loss": 11.1414, "step": 199900 }, { "epoch": 24.05655836341757, "grad_norm": Infinity, "learning_rate": 0.00017379585806625176, "loss": 11.1288, "step": 199910 }, { "epoch": 24.057761732851986, "grad_norm": Infinity, "learning_rate": 0.00017379329071654903, "loss": 11.1121, "step": 199920 }, { "epoch": 24.0589651022864, "grad_norm": Infinity, "learning_rate": 0.00017379072326004918, "loss": 11.1482, "step": 199930 }, { "epoch": 24.060168471720818, "grad_norm": Infinity, "learning_rate": 0.00017378815569675594, "loss": 11.2987, "step": 199940 }, { "epoch": 24.061371841155236, "grad_norm": Infinity, "learning_rate": 0.00017378558802667304, "loss": 11.0505, "step": 199950 }, { "epoch": 24.06257521058965, "grad_norm": Infinity, "learning_rate": 0.00017378302024980414, "loss": 11.1084, "step": 199960 }, { "epoch": 24.063778580024067, "grad_norm": Infinity, "learning_rate": 0.00017378045236615303, "loss": 11.1455, "step": 199970 }, { "epoch": 24.064981949458485, "grad_norm": Infinity, "learning_rate": 0.00017377788437572338, "loss": 11.2559, "step": 199980 }, { "epoch": 24.0661853188929, "grad_norm": Infinity, "learning_rate": 0.0001737753162785189, "loss": 11.2525, "step": 199990 }, { "epoch": 24.067388688327316, "grad_norm": Infinity, "learning_rate": 0.00017377274807454333, "loss": 11.0958, "step": 200000 }, { "epoch": 24.068592057761734, "grad_norm": Infinity, "learning_rate": 0.00017377017976380037, "loss": 11.1051, "step": 200010 }, { "epoch": 24.069795427196148, "grad_norm": Infinity, "learning_rate": 0.00017376761134629378, "loss": 11.0837, "step": 200020 }, { "epoch": 24.070998796630565, "grad_norm": Infinity, "learning_rate": 0.00017376504282202722, "loss": 11.0301, "step": 200030 }, { "epoch": 24.072202166064983, "grad_norm": Infinity, "learning_rate": 0.0001737624741910044, "loss": 11.0521, "step": 200040 }, { "epoch": 24.073405535499397, "grad_norm": Infinity, "learning_rate": 0.0001737599054532291, "loss": 11.0391, "step": 200050 }, { "epoch": 24.074608904933815, "grad_norm": Infinity, "learning_rate": 0.00017375733660870504, "loss": 11.2403, "step": 200060 }, { "epoch": 24.075812274368232, "grad_norm": Infinity, "learning_rate": 0.00017375476765743586, "loss": 11.0647, "step": 200070 }, { "epoch": 24.077015643802646, "grad_norm": Infinity, "learning_rate": 0.00017375219859942534, "loss": 11.1189, "step": 200080 }, { "epoch": 24.078219013237064, "grad_norm": Infinity, "learning_rate": 0.00017374962943467716, "loss": 11.1609, "step": 200090 }, { "epoch": 24.07942238267148, "grad_norm": Infinity, "learning_rate": 0.00017374706016319507, "loss": 11.208, "step": 200100 }, { "epoch": 24.080625752105895, "grad_norm": Infinity, "learning_rate": 0.00017374449078498279, "loss": 11.1805, "step": 200110 }, { "epoch": 24.081829121540313, "grad_norm": Infinity, "learning_rate": 0.000173741921300044, "loss": 11.1044, "step": 200120 }, { "epoch": 24.08303249097473, "grad_norm": Infinity, "learning_rate": 0.00017373935170838248, "loss": 11.0474, "step": 200130 }, { "epoch": 24.084235860409144, "grad_norm": Infinity, "learning_rate": 0.00017373678201000186, "loss": 11.0857, "step": 200140 }, { "epoch": 24.085439229843562, "grad_norm": Infinity, "learning_rate": 0.00017373421220490592, "loss": 11.1374, "step": 200150 }, { "epoch": 24.08664259927798, "grad_norm": Infinity, "learning_rate": 0.0001737316422930984, "loss": 11.1366, "step": 200160 }, { "epoch": 24.087845968712394, "grad_norm": Infinity, "learning_rate": 0.00017372907227458298, "loss": 11.0926, "step": 200170 }, { "epoch": 24.08904933814681, "grad_norm": Infinity, "learning_rate": 0.0001737265021493634, "loss": 11.2044, "step": 200180 }, { "epoch": 24.09025270758123, "grad_norm": Infinity, "learning_rate": 0.00017372393191744334, "loss": 11.1494, "step": 200190 }, { "epoch": 24.091456077015643, "grad_norm": Infinity, "learning_rate": 0.00017372136157882658, "loss": 11.082, "step": 200200 }, { "epoch": 24.09265944645006, "grad_norm": Infinity, "learning_rate": 0.00017371879113351678, "loss": 11.1522, "step": 200210 }, { "epoch": 24.093862815884478, "grad_norm": Infinity, "learning_rate": 0.0001737162205815177, "loss": 11.0132, "step": 200220 }, { "epoch": 24.095066185318892, "grad_norm": Infinity, "learning_rate": 0.0001737136499228331, "loss": 11.0516, "step": 200230 }, { "epoch": 24.09626955475331, "grad_norm": Infinity, "learning_rate": 0.00017371107915746658, "loss": 11.0689, "step": 200240 }, { "epoch": 24.097472924187727, "grad_norm": Infinity, "learning_rate": 0.00017370850828542196, "loss": 11.2285, "step": 200250 }, { "epoch": 24.09867629362214, "grad_norm": Infinity, "learning_rate": 0.00017370593730670292, "loss": 11.1978, "step": 200260 }, { "epoch": 24.09987966305656, "grad_norm": Infinity, "learning_rate": 0.0001737033662213132, "loss": 11.0019, "step": 200270 }, { "epoch": 24.101083032490976, "grad_norm": Infinity, "learning_rate": 0.00017370079502925648, "loss": 11.1283, "step": 200280 }, { "epoch": 24.10228640192539, "grad_norm": Infinity, "learning_rate": 0.00017369822373053655, "loss": 11.1877, "step": 200290 }, { "epoch": 24.103489771359808, "grad_norm": Infinity, "learning_rate": 0.00017369565232515708, "loss": 11.0181, "step": 200300 }, { "epoch": 24.104693140794225, "grad_norm": Infinity, "learning_rate": 0.0001736930808131218, "loss": 11.2241, "step": 200310 }, { "epoch": 24.10589651022864, "grad_norm": Infinity, "learning_rate": 0.00017369050919443445, "loss": 11.1916, "step": 200320 }, { "epoch": 24.107099879663057, "grad_norm": Infinity, "learning_rate": 0.00017368793746909872, "loss": 11.096, "step": 200330 }, { "epoch": 24.108303249097474, "grad_norm": Infinity, "learning_rate": 0.00017368536563711838, "loss": 11.2538, "step": 200340 }, { "epoch": 24.10950661853189, "grad_norm": Infinity, "learning_rate": 0.00017368279369849713, "loss": 10.9995, "step": 200350 }, { "epoch": 24.110709987966306, "grad_norm": Infinity, "learning_rate": 0.00017368022165323868, "loss": 11.0518, "step": 200360 }, { "epoch": 24.111913357400724, "grad_norm": Infinity, "learning_rate": 0.0001736776495013467, "loss": 11.1917, "step": 200370 }, { "epoch": 24.113116726835138, "grad_norm": Infinity, "learning_rate": 0.00017367507724282504, "loss": 11.1399, "step": 200380 }, { "epoch": 24.114320096269555, "grad_norm": Infinity, "learning_rate": 0.00017367250487767733, "loss": 11.2126, "step": 200390 }, { "epoch": 24.115523465703973, "grad_norm": Infinity, "learning_rate": 0.00017366993240590733, "loss": 11.1419, "step": 200400 }, { "epoch": 24.116726835138387, "grad_norm": Infinity, "learning_rate": 0.00017366735982751874, "loss": 11.0349, "step": 200410 }, { "epoch": 24.117930204572804, "grad_norm": Infinity, "learning_rate": 0.00017366478714251525, "loss": 11.1437, "step": 200420 }, { "epoch": 24.119133574007222, "grad_norm": Infinity, "learning_rate": 0.00017366221435090068, "loss": 11.1644, "step": 200430 }, { "epoch": 24.120336943441636, "grad_norm": Infinity, "learning_rate": 0.00017365964145267868, "loss": 11.1133, "step": 200440 }, { "epoch": 24.121540312876053, "grad_norm": Infinity, "learning_rate": 0.00017365706844785298, "loss": 11.1151, "step": 200450 }, { "epoch": 24.12274368231047, "grad_norm": Infinity, "learning_rate": 0.00017365449533642737, "loss": 11.2123, "step": 200460 }, { "epoch": 24.123947051744885, "grad_norm": Infinity, "learning_rate": 0.00017365192211840546, "loss": 11.2145, "step": 200470 }, { "epoch": 24.125150421179303, "grad_norm": Infinity, "learning_rate": 0.00017364934879379105, "loss": 11.0472, "step": 200480 }, { "epoch": 24.126353790613717, "grad_norm": Infinity, "learning_rate": 0.0001736467753625879, "loss": 11.209, "step": 200490 }, { "epoch": 24.127557160048134, "grad_norm": Infinity, "learning_rate": 0.0001736442018247996, "loss": 11.1361, "step": 200500 }, { "epoch": 24.128760529482552, "grad_norm": Infinity, "learning_rate": 0.00017364162818043002, "loss": 11.0859, "step": 200510 }, { "epoch": 24.129963898916966, "grad_norm": Infinity, "learning_rate": 0.0001736390544294828, "loss": 11.1502, "step": 200520 }, { "epoch": 24.131167268351383, "grad_norm": Infinity, "learning_rate": 0.0001736364805719617, "loss": 11.2071, "step": 200530 }, { "epoch": 24.1323706377858, "grad_norm": Infinity, "learning_rate": 0.0001736339066078704, "loss": 11.225, "step": 200540 }, { "epoch": 24.133574007220215, "grad_norm": Infinity, "learning_rate": 0.00017363133253721266, "loss": 11.143, "step": 200550 }, { "epoch": 24.134777376654633, "grad_norm": Infinity, "learning_rate": 0.00017362875835999223, "loss": 11.1317, "step": 200560 }, { "epoch": 24.13598074608905, "grad_norm": Infinity, "learning_rate": 0.00017362618407621282, "loss": 11.1862, "step": 200570 }, { "epoch": 24.137184115523464, "grad_norm": Infinity, "learning_rate": 0.0001736236096858781, "loss": 11.1634, "step": 200580 }, { "epoch": 24.13838748495788, "grad_norm": Infinity, "learning_rate": 0.00017362103518899185, "loss": 11.0827, "step": 200590 }, { "epoch": 24.1395908543923, "grad_norm": Infinity, "learning_rate": 0.00017361846058555782, "loss": 11.1737, "step": 200600 }, { "epoch": 24.140794223826713, "grad_norm": Infinity, "learning_rate": 0.0001736158858755797, "loss": 11.0735, "step": 200610 }, { "epoch": 24.14199759326113, "grad_norm": Infinity, "learning_rate": 0.0001736133110590612, "loss": 11.1858, "step": 200620 }, { "epoch": 24.14320096269555, "grad_norm": Infinity, "learning_rate": 0.00017361073613600603, "loss": 11.052, "step": 200630 }, { "epoch": 24.144404332129962, "grad_norm": Infinity, "learning_rate": 0.00017360816110641798, "loss": 11.1186, "step": 200640 }, { "epoch": 24.14560770156438, "grad_norm": Infinity, "learning_rate": 0.00017360558597030075, "loss": 11.1394, "step": 200650 }, { "epoch": 24.146811070998798, "grad_norm": Infinity, "learning_rate": 0.00017360301072765809, "loss": 11.0977, "step": 200660 }, { "epoch": 24.14801444043321, "grad_norm": Infinity, "learning_rate": 0.00017360043537849367, "loss": 11.1179, "step": 200670 }, { "epoch": 24.14921780986763, "grad_norm": Infinity, "learning_rate": 0.00017359785992281126, "loss": 11.0894, "step": 200680 }, { "epoch": 24.150421179302047, "grad_norm": Infinity, "learning_rate": 0.00017359528436061456, "loss": 11.1359, "step": 200690 }, { "epoch": 24.15162454873646, "grad_norm": Infinity, "learning_rate": 0.00017359270869190732, "loss": 11.077, "step": 200700 }, { "epoch": 24.15282791817088, "grad_norm": Infinity, "learning_rate": 0.00017359013291669327, "loss": 11.1938, "step": 200710 }, { "epoch": 24.154031287605296, "grad_norm": Infinity, "learning_rate": 0.00017358755703497615, "loss": 11.1356, "step": 200720 }, { "epoch": 24.15523465703971, "grad_norm": Infinity, "learning_rate": 0.00017358498104675963, "loss": 11.0784, "step": 200730 }, { "epoch": 24.156438026474127, "grad_norm": Infinity, "learning_rate": 0.00017358240495204746, "loss": 11.0487, "step": 200740 }, { "epoch": 24.157641395908545, "grad_norm": Infinity, "learning_rate": 0.0001735798287508434, "loss": 11.1477, "step": 200750 }, { "epoch": 24.15884476534296, "grad_norm": Infinity, "learning_rate": 0.0001735772524431512, "loss": 11.2281, "step": 200760 }, { "epoch": 24.160048134777377, "grad_norm": Infinity, "learning_rate": 0.00017357467602897451, "loss": 11.082, "step": 200770 }, { "epoch": 24.161251504211794, "grad_norm": Infinity, "learning_rate": 0.00017357209950831712, "loss": 11.103, "step": 200780 }, { "epoch": 24.162454873646208, "grad_norm": Infinity, "learning_rate": 0.0001735695228811827, "loss": 11.1295, "step": 200790 }, { "epoch": 24.163658243080626, "grad_norm": Infinity, "learning_rate": 0.00017356694614757508, "loss": 11.0448, "step": 200800 }, { "epoch": 24.164861612515043, "grad_norm": Infinity, "learning_rate": 0.00017356436930749788, "loss": 11.2002, "step": 200810 }, { "epoch": 24.166064981949457, "grad_norm": Infinity, "learning_rate": 0.0001735617923609549, "loss": 11.1802, "step": 200820 }, { "epoch": 24.167268351383875, "grad_norm": Infinity, "learning_rate": 0.0001735592153079498, "loss": 11.1619, "step": 200830 }, { "epoch": 24.168471720818292, "grad_norm": Infinity, "learning_rate": 0.0001735566381484864, "loss": 11.2427, "step": 200840 }, { "epoch": 24.169675090252706, "grad_norm": Infinity, "learning_rate": 0.00017355406088256835, "loss": 11.0397, "step": 200850 }, { "epoch": 24.170878459687124, "grad_norm": Infinity, "learning_rate": 0.00017355148351019944, "loss": 11.0204, "step": 200860 }, { "epoch": 24.17208182912154, "grad_norm": Infinity, "learning_rate": 0.00017354890603138337, "loss": 11.1485, "step": 200870 }, { "epoch": 24.173285198555956, "grad_norm": Infinity, "learning_rate": 0.00017354632844612388, "loss": 11.1191, "step": 200880 }, { "epoch": 24.174488567990373, "grad_norm": Infinity, "learning_rate": 0.00017354375075442468, "loss": 11.2085, "step": 200890 }, { "epoch": 24.17569193742479, "grad_norm": Infinity, "learning_rate": 0.00017354117295628952, "loss": 11.3263, "step": 200900 }, { "epoch": 24.176895306859205, "grad_norm": Infinity, "learning_rate": 0.0001735385950517221, "loss": 11.2333, "step": 200910 }, { "epoch": 24.178098676293622, "grad_norm": Infinity, "learning_rate": 0.0001735360170407262, "loss": 11.0831, "step": 200920 }, { "epoch": 24.17930204572804, "grad_norm": Infinity, "learning_rate": 0.00017353343892330555, "loss": 11.1334, "step": 200930 }, { "epoch": 24.180505415162454, "grad_norm": Infinity, "learning_rate": 0.00017353086069946385, "loss": 11.2668, "step": 200940 }, { "epoch": 24.18170878459687, "grad_norm": Infinity, "learning_rate": 0.0001735282823692048, "loss": 11.1141, "step": 200950 }, { "epoch": 24.18291215403129, "grad_norm": Infinity, "learning_rate": 0.00017352570393253222, "loss": 11.0742, "step": 200960 }, { "epoch": 24.184115523465703, "grad_norm": Infinity, "learning_rate": 0.00017352312538944977, "loss": 11.2172, "step": 200970 }, { "epoch": 24.18531889290012, "grad_norm": Infinity, "learning_rate": 0.0001735205467399612, "loss": 11.0903, "step": 200980 }, { "epoch": 24.186522262334538, "grad_norm": Infinity, "learning_rate": 0.0001735179679840703, "loss": 11.1309, "step": 200990 }, { "epoch": 24.187725631768952, "grad_norm": Infinity, "learning_rate": 0.0001735153891217807, "loss": 11.1467, "step": 201000 }, { "epoch": 24.18892900120337, "grad_norm": Infinity, "learning_rate": 0.00017351281015309618, "loss": 11.2023, "step": 201010 }, { "epoch": 24.190132370637787, "grad_norm": Infinity, "learning_rate": 0.00017351023107802047, "loss": 11.1543, "step": 201020 }, { "epoch": 24.1913357400722, "grad_norm": Infinity, "learning_rate": 0.00017350765189655734, "loss": 11.1107, "step": 201030 }, { "epoch": 24.19253910950662, "grad_norm": Infinity, "learning_rate": 0.00017350507260871045, "loss": 11.2184, "step": 201040 }, { "epoch": 24.193742478941036, "grad_norm": Infinity, "learning_rate": 0.0001735024932144836, "loss": 11.2849, "step": 201050 }, { "epoch": 24.19494584837545, "grad_norm": Infinity, "learning_rate": 0.0001734999137138805, "loss": 11.1834, "step": 201060 }, { "epoch": 24.196149217809868, "grad_norm": Infinity, "learning_rate": 0.00017349733410690484, "loss": 11.2253, "step": 201070 }, { "epoch": 24.197352587244286, "grad_norm": Infinity, "learning_rate": 0.00017349475439356043, "loss": 11.1736, "step": 201080 }, { "epoch": 24.1985559566787, "grad_norm": Infinity, "learning_rate": 0.00017349217457385095, "loss": 11.222, "step": 201090 }, { "epoch": 24.199759326113117, "grad_norm": Infinity, "learning_rate": 0.00017348959464778013, "loss": 11.0669, "step": 201100 }, { "epoch": 24.200962695547535, "grad_norm": Infinity, "learning_rate": 0.00017348701461535176, "loss": 11.0729, "step": 201110 }, { "epoch": 24.20216606498195, "grad_norm": Infinity, "learning_rate": 0.00017348443447656956, "loss": 11.2728, "step": 201120 }, { "epoch": 24.203369434416366, "grad_norm": Infinity, "learning_rate": 0.00017348185423143717, "loss": 11.0998, "step": 201130 }, { "epoch": 24.204572803850784, "grad_norm": Infinity, "learning_rate": 0.00017347927387995844, "loss": 11.1196, "step": 201140 }, { "epoch": 24.205776173285198, "grad_norm": Infinity, "learning_rate": 0.000173476693422137, "loss": 11.1924, "step": 201150 }, { "epoch": 24.206979542719615, "grad_norm": Infinity, "learning_rate": 0.0001734741128579767, "loss": 11.1145, "step": 201160 }, { "epoch": 24.20818291215403, "grad_norm": Infinity, "learning_rate": 0.00017347153218748122, "loss": 11.0596, "step": 201170 }, { "epoch": 24.209386281588447, "grad_norm": Infinity, "learning_rate": 0.00017346895141065428, "loss": 11.1005, "step": 201180 }, { "epoch": 24.210589651022865, "grad_norm": Infinity, "learning_rate": 0.0001734663705274996, "loss": 11.1415, "step": 201190 }, { "epoch": 24.21179302045728, "grad_norm": Infinity, "learning_rate": 0.00017346378953802098, "loss": 11.1808, "step": 201200 }, { "epoch": 24.212996389891696, "grad_norm": Infinity, "learning_rate": 0.0001734612084422221, "loss": 11.2767, "step": 201210 }, { "epoch": 24.214199759326114, "grad_norm": Infinity, "learning_rate": 0.00017345862724010676, "loss": 11.1925, "step": 201220 }, { "epoch": 24.215403128760528, "grad_norm": Infinity, "learning_rate": 0.0001734560459316786, "loss": 11.135, "step": 201230 }, { "epoch": 24.216606498194945, "grad_norm": Infinity, "learning_rate": 0.00017345346451694144, "loss": 11.0607, "step": 201240 }, { "epoch": 24.217809867629363, "grad_norm": Infinity, "learning_rate": 0.00017345088299589896, "loss": 11.0846, "step": 201250 }, { "epoch": 24.219013237063777, "grad_norm": Infinity, "learning_rate": 0.00017344830136855493, "loss": 11.218, "step": 201260 }, { "epoch": 24.220216606498195, "grad_norm": Infinity, "learning_rate": 0.00017344571963491304, "loss": 11.1366, "step": 201270 }, { "epoch": 24.221419975932612, "grad_norm": Infinity, "learning_rate": 0.0001734431377949771, "loss": 11.113, "step": 201280 }, { "epoch": 24.222623345367026, "grad_norm": Infinity, "learning_rate": 0.00017344055584875078, "loss": 11.1381, "step": 201290 }, { "epoch": 24.223826714801444, "grad_norm": Infinity, "learning_rate": 0.00017343797379623785, "loss": 11.1473, "step": 201300 }, { "epoch": 24.22503008423586, "grad_norm": Infinity, "learning_rate": 0.00017343539163744204, "loss": 11.2727, "step": 201310 }, { "epoch": 24.226233453670275, "grad_norm": Infinity, "learning_rate": 0.00017343280937236712, "loss": 11.1165, "step": 201320 }, { "epoch": 24.227436823104693, "grad_norm": Infinity, "learning_rate": 0.00017343022700101678, "loss": 11.1562, "step": 201330 }, { "epoch": 24.22864019253911, "grad_norm": Infinity, "learning_rate": 0.00017342764452339476, "loss": 11.097, "step": 201340 }, { "epoch": 24.229843561973524, "grad_norm": Infinity, "learning_rate": 0.0001734250619395048, "loss": 11.0585, "step": 201350 }, { "epoch": 24.231046931407942, "grad_norm": Infinity, "learning_rate": 0.00017342247924935066, "loss": 11.1452, "step": 201360 }, { "epoch": 24.23225030084236, "grad_norm": Infinity, "learning_rate": 0.00017341989645293607, "loss": 11.0823, "step": 201370 }, { "epoch": 24.233453670276774, "grad_norm": Infinity, "learning_rate": 0.00017341731355026476, "loss": 11.1407, "step": 201380 }, { "epoch": 24.23465703971119, "grad_norm": Infinity, "learning_rate": 0.00017341473054134046, "loss": 11.0274, "step": 201390 }, { "epoch": 24.23586040914561, "grad_norm": Infinity, "learning_rate": 0.0001734121474261669, "loss": 11.0675, "step": 201400 }, { "epoch": 24.237063778580023, "grad_norm": Infinity, "learning_rate": 0.00017340956420474788, "loss": 11.1382, "step": 201410 }, { "epoch": 24.23826714801444, "grad_norm": Infinity, "learning_rate": 0.0001734069808770871, "loss": 11.1394, "step": 201420 }, { "epoch": 24.239470517448858, "grad_norm": Infinity, "learning_rate": 0.00017340439744318827, "loss": 11.1426, "step": 201430 }, { "epoch": 24.240673886883272, "grad_norm": Infinity, "learning_rate": 0.00017340181390305514, "loss": 11.102, "step": 201440 }, { "epoch": 24.24187725631769, "grad_norm": Infinity, "learning_rate": 0.00017339923025669148, "loss": 11.0605, "step": 201450 }, { "epoch": 24.243080625752107, "grad_norm": Infinity, "learning_rate": 0.00017339664650410105, "loss": 11.1254, "step": 201460 }, { "epoch": 24.24428399518652, "grad_norm": Infinity, "learning_rate": 0.0001733940626452875, "loss": 11.1687, "step": 201470 }, { "epoch": 24.24548736462094, "grad_norm": Infinity, "learning_rate": 0.00017339147868025462, "loss": 11.1794, "step": 201480 }, { "epoch": 24.246690734055356, "grad_norm": Infinity, "learning_rate": 0.0001733888946090062, "loss": 11.0631, "step": 201490 }, { "epoch": 24.24789410348977, "grad_norm": Infinity, "learning_rate": 0.00017338631043154588, "loss": 11.2065, "step": 201500 }, { "epoch": 24.249097472924188, "grad_norm": Infinity, "learning_rate": 0.00017338372614787747, "loss": 11.1384, "step": 201510 }, { "epoch": 24.250300842358605, "grad_norm": Infinity, "learning_rate": 0.00017338114175800468, "loss": 11.1357, "step": 201520 }, { "epoch": 24.25150421179302, "grad_norm": Infinity, "learning_rate": 0.00017337855726193125, "loss": 11.1712, "step": 201530 }, { "epoch": 24.252707581227437, "grad_norm": Infinity, "learning_rate": 0.00017337597265966096, "loss": 11.1226, "step": 201540 }, { "epoch": 24.253910950661854, "grad_norm": Infinity, "learning_rate": 0.00017337338795119753, "loss": 11.1215, "step": 201550 }, { "epoch": 24.25511432009627, "grad_norm": Infinity, "learning_rate": 0.00017337080313654468, "loss": 11.1206, "step": 201560 }, { "epoch": 24.256317689530686, "grad_norm": Infinity, "learning_rate": 0.00017336821821570614, "loss": 11.2056, "step": 201570 }, { "epoch": 24.257521058965104, "grad_norm": Infinity, "learning_rate": 0.0001733656331886857, "loss": 11.1376, "step": 201580 }, { "epoch": 24.258724428399518, "grad_norm": Infinity, "learning_rate": 0.00017336304805548708, "loss": 11.179, "step": 201590 }, { "epoch": 24.259927797833935, "grad_norm": Infinity, "learning_rate": 0.000173360462816114, "loss": 11.2423, "step": 201600 }, { "epoch": 24.261131167268353, "grad_norm": Infinity, "learning_rate": 0.00017335787747057023, "loss": 11.2844, "step": 201610 }, { "epoch": 24.262334536702767, "grad_norm": Infinity, "learning_rate": 0.00017335529201885946, "loss": 11.1613, "step": 201620 }, { "epoch": 24.263537906137184, "grad_norm": Infinity, "learning_rate": 0.00017335270646098554, "loss": 11.1405, "step": 201630 }, { "epoch": 24.264741275571602, "grad_norm": Infinity, "learning_rate": 0.0001733501207969521, "loss": 11.2263, "step": 201640 }, { "epoch": 24.265944645006016, "grad_norm": Infinity, "learning_rate": 0.00017334753502676293, "loss": 11.1083, "step": 201650 }, { "epoch": 24.267148014440433, "grad_norm": Infinity, "learning_rate": 0.00017334494915042178, "loss": 11.229, "step": 201660 }, { "epoch": 24.26835138387485, "grad_norm": Infinity, "learning_rate": 0.00017334236316793238, "loss": 11.2218, "step": 201670 }, { "epoch": 24.269554753309265, "grad_norm": Infinity, "learning_rate": 0.00017333977707929845, "loss": 11.0576, "step": 201680 }, { "epoch": 24.270758122743683, "grad_norm": Infinity, "learning_rate": 0.0001733371908845238, "loss": 11.008, "step": 201690 }, { "epoch": 24.2719614921781, "grad_norm": Infinity, "learning_rate": 0.00017333460458361207, "loss": 11.0592, "step": 201700 }, { "epoch": 24.273164861612514, "grad_norm": Infinity, "learning_rate": 0.0001733320181765671, "loss": 11.2051, "step": 201710 }, { "epoch": 24.27436823104693, "grad_norm": Infinity, "learning_rate": 0.00017332943166339256, "loss": 11.1676, "step": 201720 }, { "epoch": 24.27557160048135, "grad_norm": Infinity, "learning_rate": 0.00017332684504409228, "loss": 11.1241, "step": 201730 }, { "epoch": 24.276774969915763, "grad_norm": Infinity, "learning_rate": 0.00017332425831866993, "loss": 11.2482, "step": 201740 }, { "epoch": 24.27797833935018, "grad_norm": Infinity, "learning_rate": 0.00017332167148712926, "loss": 11.2021, "step": 201750 }, { "epoch": 24.2791817087846, "grad_norm": Infinity, "learning_rate": 0.00017331908454947404, "loss": 11.3209, "step": 201760 }, { "epoch": 24.280385078219012, "grad_norm": Infinity, "learning_rate": 0.000173316497505708, "loss": 11.2091, "step": 201770 }, { "epoch": 24.28158844765343, "grad_norm": Infinity, "learning_rate": 0.00017331391035583485, "loss": 11.2327, "step": 201780 }, { "epoch": 24.282791817087848, "grad_norm": Infinity, "learning_rate": 0.0001733113230998584, "loss": 11.1139, "step": 201790 }, { "epoch": 24.28399518652226, "grad_norm": Infinity, "learning_rate": 0.0001733087357377824, "loss": 11.1365, "step": 201800 }, { "epoch": 24.28519855595668, "grad_norm": Infinity, "learning_rate": 0.00017330614826961056, "loss": 11.1073, "step": 201810 }, { "epoch": 24.286401925391097, "grad_norm": Infinity, "learning_rate": 0.0001733035606953466, "loss": 11.066, "step": 201820 }, { "epoch": 24.28760529482551, "grad_norm": Infinity, "learning_rate": 0.00017330097301499427, "loss": 11.0502, "step": 201830 }, { "epoch": 24.28880866425993, "grad_norm": Infinity, "learning_rate": 0.00017329838522855734, "loss": 11.2217, "step": 201840 }, { "epoch": 24.290012033694346, "grad_norm": Infinity, "learning_rate": 0.00017329579733603957, "loss": 11.1092, "step": 201850 }, { "epoch": 24.29121540312876, "grad_norm": Infinity, "learning_rate": 0.00017329320933744469, "loss": 11.2223, "step": 201860 }, { "epoch": 24.292418772563177, "grad_norm": Infinity, "learning_rate": 0.00017329062123277642, "loss": 11.1845, "step": 201870 }, { "epoch": 24.29362214199759, "grad_norm": Infinity, "learning_rate": 0.00017328803302203854, "loss": 11.2092, "step": 201880 }, { "epoch": 24.29482551143201, "grad_norm": Infinity, "learning_rate": 0.0001732854447052348, "loss": 11.2188, "step": 201890 }, { "epoch": 24.296028880866427, "grad_norm": Infinity, "learning_rate": 0.00017328285628236888, "loss": 11.1232, "step": 201900 }, { "epoch": 24.29723225030084, "grad_norm": Infinity, "learning_rate": 0.0001732802677534446, "loss": 11.1396, "step": 201910 }, { "epoch": 24.29843561973526, "grad_norm": Infinity, "learning_rate": 0.00017327767911846567, "loss": 11.1998, "step": 201920 }, { "epoch": 24.299638989169676, "grad_norm": Infinity, "learning_rate": 0.00017327509037743587, "loss": 11.0563, "step": 201930 }, { "epoch": 24.30084235860409, "grad_norm": Infinity, "learning_rate": 0.00017327250153035889, "loss": 11.0986, "step": 201940 }, { "epoch": 24.302045728038507, "grad_norm": Infinity, "learning_rate": 0.00017326991257723856, "loss": 11.198, "step": 201950 }, { "epoch": 24.303249097472925, "grad_norm": Infinity, "learning_rate": 0.00017326732351807852, "loss": 11.1443, "step": 201960 }, { "epoch": 24.30445246690734, "grad_norm": Infinity, "learning_rate": 0.0001732647343528826, "loss": 11.2522, "step": 201970 }, { "epoch": 24.305655836341757, "grad_norm": Infinity, "learning_rate": 0.00017326214508165452, "loss": 11.0928, "step": 201980 }, { "epoch": 24.306859205776174, "grad_norm": Infinity, "learning_rate": 0.00017325955570439801, "loss": 11.0697, "step": 201990 }, { "epoch": 24.308062575210588, "grad_norm": Infinity, "learning_rate": 0.0001732569662211169, "loss": 11.0921, "step": 202000 }, { "epoch": 24.309265944645006, "grad_norm": Infinity, "learning_rate": 0.0001732543766318148, "loss": 11.1446, "step": 202010 }, { "epoch": 24.310469314079423, "grad_norm": Infinity, "learning_rate": 0.00017325178693649557, "loss": 11.068, "step": 202020 }, { "epoch": 24.311672683513837, "grad_norm": Infinity, "learning_rate": 0.0001732491971351629, "loss": 11.1512, "step": 202030 }, { "epoch": 24.312876052948255, "grad_norm": Infinity, "learning_rate": 0.00017324660722782057, "loss": 11.1301, "step": 202040 }, { "epoch": 24.314079422382672, "grad_norm": Infinity, "learning_rate": 0.0001732440172144723, "loss": 11.1322, "step": 202050 }, { "epoch": 24.315282791817086, "grad_norm": Infinity, "learning_rate": 0.00017324142709512189, "loss": 11.1576, "step": 202060 }, { "epoch": 24.316486161251504, "grad_norm": Infinity, "learning_rate": 0.000173238836869773, "loss": 11.1885, "step": 202070 }, { "epoch": 24.31768953068592, "grad_norm": Infinity, "learning_rate": 0.00017323624653842943, "loss": 11.1461, "step": 202080 }, { "epoch": 24.318892900120336, "grad_norm": Infinity, "learning_rate": 0.000173233656101095, "loss": 11.1913, "step": 202090 }, { "epoch": 24.320096269554753, "grad_norm": Infinity, "learning_rate": 0.00017323106555777333, "loss": 11.0499, "step": 202100 }, { "epoch": 24.32129963898917, "grad_norm": Infinity, "learning_rate": 0.00017322847490846823, "loss": 11.087, "step": 202110 }, { "epoch": 24.322503008423585, "grad_norm": Infinity, "learning_rate": 0.00017322588415318347, "loss": 11.0941, "step": 202120 }, { "epoch": 24.323706377858002, "grad_norm": Infinity, "learning_rate": 0.00017322329329192278, "loss": 11.1832, "step": 202130 }, { "epoch": 24.32490974729242, "grad_norm": Infinity, "learning_rate": 0.00017322070232468989, "loss": 11.0778, "step": 202140 }, { "epoch": 24.326113116726834, "grad_norm": Infinity, "learning_rate": 0.00017321811125148857, "loss": 11.2274, "step": 202150 }, { "epoch": 24.32731648616125, "grad_norm": Infinity, "learning_rate": 0.00017321552007232256, "loss": 11.0402, "step": 202160 }, { "epoch": 24.32851985559567, "grad_norm": Infinity, "learning_rate": 0.00017321292878719563, "loss": 11.1144, "step": 202170 }, { "epoch": 24.329723225030083, "grad_norm": Infinity, "learning_rate": 0.0001732103373961115, "loss": 11.0195, "step": 202180 }, { "epoch": 24.3309265944645, "grad_norm": Infinity, "learning_rate": 0.00017320774589907396, "loss": 11.1208, "step": 202190 }, { "epoch": 24.332129963898918, "grad_norm": Infinity, "learning_rate": 0.00017320515429608674, "loss": 11.2488, "step": 202200 }, { "epoch": 24.333333333333332, "grad_norm": Infinity, "learning_rate": 0.00017320256258715353, "loss": 11.2151, "step": 202210 }, { "epoch": 24.33453670276775, "grad_norm": Infinity, "learning_rate": 0.00017319997077227818, "loss": 11.1032, "step": 202220 }, { "epoch": 24.335740072202167, "grad_norm": Infinity, "learning_rate": 0.00017319737885146443, "loss": 11.1579, "step": 202230 }, { "epoch": 24.33694344163658, "grad_norm": Infinity, "learning_rate": 0.00017319478682471595, "loss": 11.0959, "step": 202240 }, { "epoch": 24.338146811071, "grad_norm": Infinity, "learning_rate": 0.00017319219469203655, "loss": 11.0999, "step": 202250 }, { "epoch": 24.339350180505416, "grad_norm": Infinity, "learning_rate": 0.00017318960245343, "loss": 11.199, "step": 202260 }, { "epoch": 24.34055354993983, "grad_norm": Infinity, "learning_rate": 0.0001731870101089, "loss": 11.1715, "step": 202270 }, { "epoch": 24.341756919374248, "grad_norm": Infinity, "learning_rate": 0.00017318441765845036, "loss": 11.1442, "step": 202280 }, { "epoch": 24.342960288808666, "grad_norm": Infinity, "learning_rate": 0.00017318182510208475, "loss": 11.089, "step": 202290 }, { "epoch": 24.34416365824308, "grad_norm": Infinity, "learning_rate": 0.00017317923243980702, "loss": 11.2527, "step": 202300 }, { "epoch": 24.345367027677497, "grad_norm": Infinity, "learning_rate": 0.00017317663967162083, "loss": 11.1116, "step": 202310 }, { "epoch": 24.346570397111915, "grad_norm": Infinity, "learning_rate": 0.00017317404679753002, "loss": 11.0977, "step": 202320 }, { "epoch": 24.34777376654633, "grad_norm": Infinity, "learning_rate": 0.00017317145381753828, "loss": 11.0884, "step": 202330 }, { "epoch": 24.348977135980746, "grad_norm": Infinity, "learning_rate": 0.00017316886073164939, "loss": 11.266, "step": 202340 }, { "epoch": 24.350180505415164, "grad_norm": Infinity, "learning_rate": 0.00017316626753986704, "loss": 11.0885, "step": 202350 }, { "epoch": 24.351383874849578, "grad_norm": Infinity, "learning_rate": 0.0001731636742421951, "loss": 10.9854, "step": 202360 }, { "epoch": 24.352587244283995, "grad_norm": Infinity, "learning_rate": 0.00017316108083863723, "loss": 11.1087, "step": 202370 }, { "epoch": 24.353790613718413, "grad_norm": Infinity, "learning_rate": 0.0001731584873291972, "loss": 11.1437, "step": 202380 }, { "epoch": 24.354993983152827, "grad_norm": Infinity, "learning_rate": 0.0001731558937138788, "loss": 11.1671, "step": 202390 }, { "epoch": 24.356197352587245, "grad_norm": Infinity, "learning_rate": 0.00017315329999268578, "loss": 11.1036, "step": 202400 }, { "epoch": 24.357400722021662, "grad_norm": Infinity, "learning_rate": 0.00017315070616562186, "loss": 11.1503, "step": 202410 }, { "epoch": 24.358604091456076, "grad_norm": Infinity, "learning_rate": 0.00017314811223269078, "loss": 11.2141, "step": 202420 }, { "epoch": 24.359807460890494, "grad_norm": Infinity, "learning_rate": 0.00017314551819389636, "loss": 11.1071, "step": 202430 }, { "epoch": 24.36101083032491, "grad_norm": Infinity, "learning_rate": 0.00017314292404924228, "loss": 11.127, "step": 202440 }, { "epoch": 24.362214199759325, "grad_norm": Infinity, "learning_rate": 0.00017314032979873236, "loss": 11.0803, "step": 202450 }, { "epoch": 24.363417569193743, "grad_norm": Infinity, "learning_rate": 0.0001731377354423703, "loss": 11.0004, "step": 202460 }, { "epoch": 24.36462093862816, "grad_norm": Infinity, "learning_rate": 0.0001731351409801599, "loss": 11.15, "step": 202470 }, { "epoch": 24.365824308062574, "grad_norm": Infinity, "learning_rate": 0.00017313254641210488, "loss": 11.1934, "step": 202480 }, { "epoch": 24.367027677496992, "grad_norm": Infinity, "learning_rate": 0.000173129951738209, "loss": 11.1057, "step": 202490 }, { "epoch": 24.36823104693141, "grad_norm": Infinity, "learning_rate": 0.00017312735695847607, "loss": 11.0572, "step": 202500 }, { "epoch": 24.369434416365824, "grad_norm": Infinity, "learning_rate": 0.00017312476207290975, "loss": 11.0955, "step": 202510 }, { "epoch": 24.37063778580024, "grad_norm": Infinity, "learning_rate": 0.00017312216708151388, "loss": 11.1555, "step": 202520 }, { "epoch": 24.37184115523466, "grad_norm": Infinity, "learning_rate": 0.00017311957198429213, "loss": 11.1744, "step": 202530 }, { "epoch": 24.373044524669073, "grad_norm": Infinity, "learning_rate": 0.00017311697678124837, "loss": 11.2549, "step": 202540 }, { "epoch": 24.37424789410349, "grad_norm": Infinity, "learning_rate": 0.00017311438147238627, "loss": 11.1706, "step": 202550 }, { "epoch": 24.375451263537904, "grad_norm": Infinity, "learning_rate": 0.0001731117860577096, "loss": 11.0597, "step": 202560 }, { "epoch": 24.376654632972322, "grad_norm": Infinity, "learning_rate": 0.00017310919053722213, "loss": 11.1124, "step": 202570 }, { "epoch": 24.37785800240674, "grad_norm": Infinity, "learning_rate": 0.00017310659491092761, "loss": 11.1403, "step": 202580 }, { "epoch": 24.379061371841154, "grad_norm": Infinity, "learning_rate": 0.0001731039991788298, "loss": 11.1213, "step": 202590 }, { "epoch": 24.38026474127557, "grad_norm": Infinity, "learning_rate": 0.00017310140334093243, "loss": 11.1974, "step": 202600 }, { "epoch": 24.38146811070999, "grad_norm": Infinity, "learning_rate": 0.00017309880739723932, "loss": 11.1309, "step": 202610 }, { "epoch": 24.382671480144403, "grad_norm": Infinity, "learning_rate": 0.00017309621134775418, "loss": 11.1348, "step": 202620 }, { "epoch": 24.38387484957882, "grad_norm": Infinity, "learning_rate": 0.00017309361519248078, "loss": 11.1128, "step": 202630 }, { "epoch": 24.385078219013238, "grad_norm": Infinity, "learning_rate": 0.00017309101893142286, "loss": 11.0654, "step": 202640 }, { "epoch": 24.386281588447652, "grad_norm": Infinity, "learning_rate": 0.00017308842256458416, "loss": 11.1745, "step": 202650 }, { "epoch": 24.38748495788207, "grad_norm": Infinity, "learning_rate": 0.0001730858260919685, "loss": 11.0448, "step": 202660 }, { "epoch": 24.388688327316487, "grad_norm": Infinity, "learning_rate": 0.00017308322951357962, "loss": 11.1019, "step": 202670 }, { "epoch": 24.3898916967509, "grad_norm": Infinity, "learning_rate": 0.00017308063282942123, "loss": 11.297, "step": 202680 }, { "epoch": 24.39109506618532, "grad_norm": Infinity, "learning_rate": 0.00017307803603949714, "loss": 11.1159, "step": 202690 }, { "epoch": 24.392298435619736, "grad_norm": Infinity, "learning_rate": 0.0001730754391438111, "loss": 11.0349, "step": 202700 }, { "epoch": 24.39350180505415, "grad_norm": Infinity, "learning_rate": 0.00017307284214236685, "loss": 11.0906, "step": 202710 }, { "epoch": 24.394705174488568, "grad_norm": Infinity, "learning_rate": 0.00017307024503516812, "loss": 11.1149, "step": 202720 }, { "epoch": 24.395908543922985, "grad_norm": Infinity, "learning_rate": 0.00017306764782221875, "loss": 11.2081, "step": 202730 }, { "epoch": 24.3971119133574, "grad_norm": Infinity, "learning_rate": 0.00017306505050352245, "loss": 11.0717, "step": 202740 }, { "epoch": 24.398315282791817, "grad_norm": Infinity, "learning_rate": 0.00017306245307908296, "loss": 11.116, "step": 202750 }, { "epoch": 24.399518652226234, "grad_norm": Infinity, "learning_rate": 0.00017305985554890406, "loss": 11.136, "step": 202760 }, { "epoch": 24.40072202166065, "grad_norm": Infinity, "learning_rate": 0.00017305725791298956, "loss": 11.1861, "step": 202770 }, { "epoch": 24.401925391095066, "grad_norm": Infinity, "learning_rate": 0.00017305466017134312, "loss": 10.9928, "step": 202780 }, { "epoch": 24.403128760529484, "grad_norm": Infinity, "learning_rate": 0.0001730520623239686, "loss": 11.1095, "step": 202790 }, { "epoch": 24.404332129963898, "grad_norm": Infinity, "learning_rate": 0.00017304946437086963, "loss": 11.0238, "step": 202800 }, { "epoch": 24.405535499398315, "grad_norm": Infinity, "learning_rate": 0.00017304686631205011, "loss": 11.1042, "step": 202810 }, { "epoch": 24.406738868832733, "grad_norm": Infinity, "learning_rate": 0.00017304426814751373, "loss": 11.1892, "step": 202820 }, { "epoch": 24.407942238267147, "grad_norm": Infinity, "learning_rate": 0.00017304166987726425, "loss": 11.2431, "step": 202830 }, { "epoch": 24.409145607701564, "grad_norm": Infinity, "learning_rate": 0.00017303907150130544, "loss": 11.1347, "step": 202840 }, { "epoch": 24.410348977135982, "grad_norm": Infinity, "learning_rate": 0.0001730364730196411, "loss": 11.1796, "step": 202850 }, { "epoch": 24.411552346570396, "grad_norm": Infinity, "learning_rate": 0.0001730338744322749, "loss": 11.2552, "step": 202860 }, { "epoch": 24.412755716004813, "grad_norm": Infinity, "learning_rate": 0.00017303127573921066, "loss": 11.2265, "step": 202870 }, { "epoch": 24.41395908543923, "grad_norm": Infinity, "learning_rate": 0.00017302867694045216, "loss": 11.2379, "step": 202880 }, { "epoch": 24.415162454873645, "grad_norm": Infinity, "learning_rate": 0.00017302607803600312, "loss": 11.1952, "step": 202890 }, { "epoch": 24.416365824308063, "grad_norm": Infinity, "learning_rate": 0.0001730234790258673, "loss": 11.1413, "step": 202900 }, { "epoch": 24.41756919374248, "grad_norm": Infinity, "learning_rate": 0.0001730208799100485, "loss": 11.0516, "step": 202910 }, { "epoch": 24.418772563176894, "grad_norm": Infinity, "learning_rate": 0.00017301828068855044, "loss": 11.1639, "step": 202920 }, { "epoch": 24.41997593261131, "grad_norm": Infinity, "learning_rate": 0.0001730156813613769, "loss": 11.1749, "step": 202930 }, { "epoch": 24.42117930204573, "grad_norm": Infinity, "learning_rate": 0.00017301308192853166, "loss": 11.1614, "step": 202940 }, { "epoch": 24.422382671480143, "grad_norm": Infinity, "learning_rate": 0.00017301048239001845, "loss": 11.1019, "step": 202950 }, { "epoch": 24.42358604091456, "grad_norm": Infinity, "learning_rate": 0.00017300788274584106, "loss": 11.1456, "step": 202960 }, { "epoch": 24.42478941034898, "grad_norm": Infinity, "learning_rate": 0.0001730052829960032, "loss": 10.9926, "step": 202970 }, { "epoch": 24.425992779783392, "grad_norm": Infinity, "learning_rate": 0.00017300268314050872, "loss": 11.2388, "step": 202980 }, { "epoch": 24.42719614921781, "grad_norm": Infinity, "learning_rate": 0.00017300008317936132, "loss": 11.1645, "step": 202990 }, { "epoch": 24.428399518652228, "grad_norm": Infinity, "learning_rate": 0.00017299748311256478, "loss": 11.2056, "step": 203000 }, { "epoch": 24.42960288808664, "grad_norm": Infinity, "learning_rate": 0.00017299488294012286, "loss": 11.1836, "step": 203010 }, { "epoch": 24.43080625752106, "grad_norm": Infinity, "learning_rate": 0.0001729922826620393, "loss": 11.1311, "step": 203020 }, { "epoch": 24.432009626955477, "grad_norm": Infinity, "learning_rate": 0.0001729896822783179, "loss": 11.0801, "step": 203030 }, { "epoch": 24.43321299638989, "grad_norm": Infinity, "learning_rate": 0.00017298708178896242, "loss": 11.2173, "step": 203040 }, { "epoch": 24.43441636582431, "grad_norm": Infinity, "learning_rate": 0.00017298448119397656, "loss": 11.023, "step": 203050 }, { "epoch": 24.435619735258726, "grad_norm": Infinity, "learning_rate": 0.0001729818804933642, "loss": 11.1907, "step": 203060 }, { "epoch": 24.43682310469314, "grad_norm": Infinity, "learning_rate": 0.000172979279687129, "loss": 11.1762, "step": 203070 }, { "epoch": 24.438026474127557, "grad_norm": Infinity, "learning_rate": 0.00017297667877527482, "loss": 11.1403, "step": 203080 }, { "epoch": 24.439229843561975, "grad_norm": Infinity, "learning_rate": 0.00017297407775780532, "loss": 11.1323, "step": 203090 }, { "epoch": 24.44043321299639, "grad_norm": Infinity, "learning_rate": 0.00017297147663472432, "loss": 11.0599, "step": 203100 }, { "epoch": 24.441636582430807, "grad_norm": Infinity, "learning_rate": 0.0001729688754060356, "loss": 11.1402, "step": 203110 }, { "epoch": 24.442839951865224, "grad_norm": Infinity, "learning_rate": 0.00017296627407174285, "loss": 11.1638, "step": 203120 }, { "epoch": 24.444043321299638, "grad_norm": Infinity, "learning_rate": 0.00017296367263184995, "loss": 11.2891, "step": 203130 }, { "epoch": 24.445246690734056, "grad_norm": Infinity, "learning_rate": 0.00017296107108636055, "loss": 10.9729, "step": 203140 }, { "epoch": 24.446450060168473, "grad_norm": Infinity, "learning_rate": 0.00017295846943527846, "loss": 11.0357, "step": 203150 }, { "epoch": 24.447653429602887, "grad_norm": Infinity, "learning_rate": 0.00017295586767860748, "loss": 11.0406, "step": 203160 }, { "epoch": 24.448856799037305, "grad_norm": Infinity, "learning_rate": 0.00017295326581635137, "loss": 11.0706, "step": 203170 }, { "epoch": 24.450060168471722, "grad_norm": Infinity, "learning_rate": 0.00017295066384851384, "loss": 11.1838, "step": 203180 }, { "epoch": 24.451263537906136, "grad_norm": Infinity, "learning_rate": 0.0001729480617750987, "loss": 11.198, "step": 203190 }, { "epoch": 24.452466907340554, "grad_norm": Infinity, "learning_rate": 0.0001729454595961097, "loss": 11.0865, "step": 203200 }, { "epoch": 24.45367027677497, "grad_norm": Infinity, "learning_rate": 0.00017294285731155062, "loss": 11.1278, "step": 203210 }, { "epoch": 24.454873646209386, "grad_norm": Infinity, "learning_rate": 0.0001729402549214252, "loss": 11.2212, "step": 203220 }, { "epoch": 24.456077015643803, "grad_norm": Infinity, "learning_rate": 0.00017293765242573726, "loss": 11.0992, "step": 203230 }, { "epoch": 24.45728038507822, "grad_norm": Infinity, "learning_rate": 0.00017293504982449046, "loss": 11.0595, "step": 203240 }, { "epoch": 24.458483754512635, "grad_norm": Infinity, "learning_rate": 0.0001729324471176887, "loss": 11.135, "step": 203250 }, { "epoch": 24.459687123947052, "grad_norm": Infinity, "learning_rate": 0.00017292984430533566, "loss": 11.2446, "step": 203260 }, { "epoch": 24.460890493381466, "grad_norm": Infinity, "learning_rate": 0.00017292724138743513, "loss": 11.1812, "step": 203270 }, { "epoch": 24.462093862815884, "grad_norm": Infinity, "learning_rate": 0.00017292463836399087, "loss": 11.1719, "step": 203280 }, { "epoch": 24.4632972322503, "grad_norm": Infinity, "learning_rate": 0.00017292203523500664, "loss": 11.1675, "step": 203290 }, { "epoch": 24.464500601684716, "grad_norm": Infinity, "learning_rate": 0.00017291943200048625, "loss": 11.186, "step": 203300 }, { "epoch": 24.465703971119133, "grad_norm": Infinity, "learning_rate": 0.00017291682866043344, "loss": 11.1694, "step": 203310 }, { "epoch": 24.46690734055355, "grad_norm": Infinity, "learning_rate": 0.00017291422521485195, "loss": 11.0342, "step": 203320 }, { "epoch": 24.468110709987965, "grad_norm": Infinity, "learning_rate": 0.00017291162166374559, "loss": 11.2214, "step": 203330 }, { "epoch": 24.469314079422382, "grad_norm": Infinity, "learning_rate": 0.00017290901800711813, "loss": 11.1639, "step": 203340 }, { "epoch": 24.4705174488568, "grad_norm": Infinity, "learning_rate": 0.0001729064142449733, "loss": 11.0322, "step": 203350 }, { "epoch": 24.471720818291214, "grad_norm": Infinity, "learning_rate": 0.0001729038103773149, "loss": 11.1796, "step": 203360 }, { "epoch": 24.47292418772563, "grad_norm": Infinity, "learning_rate": 0.00017290120640414664, "loss": 11.2121, "step": 203370 }, { "epoch": 24.47412755716005, "grad_norm": Infinity, "learning_rate": 0.0001728986023254724, "loss": 11.0983, "step": 203380 }, { "epoch": 24.475330926594463, "grad_norm": Infinity, "learning_rate": 0.00017289599814129586, "loss": 11.2589, "step": 203390 }, { "epoch": 24.47653429602888, "grad_norm": Infinity, "learning_rate": 0.0001728933938516208, "loss": 10.9977, "step": 203400 }, { "epoch": 24.477737665463298, "grad_norm": Infinity, "learning_rate": 0.000172890789456451, "loss": 11.1936, "step": 203410 }, { "epoch": 24.478941034897712, "grad_norm": Infinity, "learning_rate": 0.00017288818495579027, "loss": 11.2151, "step": 203420 }, { "epoch": 24.48014440433213, "grad_norm": Infinity, "learning_rate": 0.0001728855803496423, "loss": 11.0796, "step": 203430 }, { "epoch": 24.481347773766547, "grad_norm": Infinity, "learning_rate": 0.0001728829756380109, "loss": 11.1623, "step": 203440 }, { "epoch": 24.48255114320096, "grad_norm": Infinity, "learning_rate": 0.00017288037082089988, "loss": 10.9397, "step": 203450 }, { "epoch": 24.48375451263538, "grad_norm": Infinity, "learning_rate": 0.00017287776589831294, "loss": 11.2153, "step": 203460 }, { "epoch": 24.484957882069796, "grad_norm": Infinity, "learning_rate": 0.00017287516087025387, "loss": 11.0897, "step": 203470 }, { "epoch": 24.48616125150421, "grad_norm": Infinity, "learning_rate": 0.00017287255573672646, "loss": 11.2646, "step": 203480 }, { "epoch": 24.487364620938628, "grad_norm": Infinity, "learning_rate": 0.0001728699504977345, "loss": 11.2013, "step": 203490 }, { "epoch": 24.488567990373046, "grad_norm": Infinity, "learning_rate": 0.00017286734515328168, "loss": 11.1848, "step": 203500 }, { "epoch": 24.48977135980746, "grad_norm": Infinity, "learning_rate": 0.00017286473970337186, "loss": 11.0456, "step": 203510 }, { "epoch": 24.490974729241877, "grad_norm": Infinity, "learning_rate": 0.00017286213414800878, "loss": 11.0632, "step": 203520 }, { "epoch": 24.492178098676295, "grad_norm": Infinity, "learning_rate": 0.00017285952848719615, "loss": 11.1796, "step": 203530 }, { "epoch": 24.49338146811071, "grad_norm": Infinity, "learning_rate": 0.00017285692272093784, "loss": 11.1602, "step": 203540 }, { "epoch": 24.494584837545126, "grad_norm": Infinity, "learning_rate": 0.00017285431684923753, "loss": 11.2746, "step": 203550 }, { "epoch": 24.495788206979544, "grad_norm": Infinity, "learning_rate": 0.0001728517108720991, "loss": 11.0718, "step": 203560 }, { "epoch": 24.496991576413958, "grad_norm": Infinity, "learning_rate": 0.00017284910478952622, "loss": 11.0925, "step": 203570 }, { "epoch": 24.498194945848375, "grad_norm": Infinity, "learning_rate": 0.00017284649860152272, "loss": 11.1142, "step": 203580 }, { "epoch": 24.499398315282793, "grad_norm": Infinity, "learning_rate": 0.0001728438923080923, "loss": 11.2081, "step": 203590 }, { "epoch": 24.500601684717207, "grad_norm": Infinity, "learning_rate": 0.00017284128590923885, "loss": 11.1378, "step": 203600 }, { "epoch": 24.501805054151625, "grad_norm": Infinity, "learning_rate": 0.00017283867940496604, "loss": 11.1431, "step": 203610 }, { "epoch": 24.503008423586042, "grad_norm": Infinity, "learning_rate": 0.00017283607279527767, "loss": 11.1411, "step": 203620 }, { "epoch": 24.504211793020456, "grad_norm": Infinity, "learning_rate": 0.00017283346608017755, "loss": 11.0164, "step": 203630 }, { "epoch": 24.505415162454874, "grad_norm": Infinity, "learning_rate": 0.00017283085925966937, "loss": 11.1921, "step": 203640 }, { "epoch": 24.50661853188929, "grad_norm": Infinity, "learning_rate": 0.000172828252333757, "loss": 11.1421, "step": 203650 }, { "epoch": 24.507821901323705, "grad_norm": Infinity, "learning_rate": 0.00017282564530244417, "loss": 11.1104, "step": 203660 }, { "epoch": 24.509025270758123, "grad_norm": Infinity, "learning_rate": 0.00017282303816573463, "loss": 11.0402, "step": 203670 }, { "epoch": 24.51022864019254, "grad_norm": Infinity, "learning_rate": 0.0001728204309236322, "loss": 11.1243, "step": 203680 }, { "epoch": 24.511432009626954, "grad_norm": Infinity, "learning_rate": 0.0001728178235761406, "loss": 11.181, "step": 203690 }, { "epoch": 24.512635379061372, "grad_norm": Infinity, "learning_rate": 0.00017281521612326365, "loss": 10.9586, "step": 203700 }, { "epoch": 24.51383874849579, "grad_norm": Infinity, "learning_rate": 0.00017281260856500508, "loss": 11.109, "step": 203710 }, { "epoch": 24.515042117930204, "grad_norm": Infinity, "learning_rate": 0.0001728100009013687, "loss": 11.2251, "step": 203720 }, { "epoch": 24.51624548736462, "grad_norm": Infinity, "learning_rate": 0.0001728073931323583, "loss": 11.1211, "step": 203730 }, { "epoch": 24.51744885679904, "grad_norm": Infinity, "learning_rate": 0.00017280478525797757, "loss": 10.9843, "step": 203740 }, { "epoch": 24.518652226233453, "grad_norm": Infinity, "learning_rate": 0.0001728021772782304, "loss": 11.1796, "step": 203750 }, { "epoch": 24.51985559566787, "grad_norm": Infinity, "learning_rate": 0.0001727995691931205, "loss": 11.1097, "step": 203760 }, { "epoch": 24.521058965102288, "grad_norm": Infinity, "learning_rate": 0.00017279696100265163, "loss": 11.077, "step": 203770 }, { "epoch": 24.522262334536702, "grad_norm": Infinity, "learning_rate": 0.00017279435270682754, "loss": 11.1413, "step": 203780 }, { "epoch": 24.52346570397112, "grad_norm": Infinity, "learning_rate": 0.00017279174430565213, "loss": 11.1108, "step": 203790 }, { "epoch": 24.524669073405537, "grad_norm": Infinity, "learning_rate": 0.00017278913579912906, "loss": 11.1405, "step": 203800 }, { "epoch": 24.52587244283995, "grad_norm": Infinity, "learning_rate": 0.00017278652718726213, "loss": 11.2398, "step": 203810 }, { "epoch": 24.52707581227437, "grad_norm": Infinity, "learning_rate": 0.00017278391847005514, "loss": 11.2712, "step": 203820 }, { "epoch": 24.528279181708786, "grad_norm": Infinity, "learning_rate": 0.00017278130964751184, "loss": 11.123, "step": 203830 }, { "epoch": 24.5294825511432, "grad_norm": Infinity, "learning_rate": 0.00017277870071963602, "loss": 11.0383, "step": 203840 }, { "epoch": 24.530685920577618, "grad_norm": Infinity, "learning_rate": 0.00017277609168643142, "loss": 11.0184, "step": 203850 }, { "epoch": 24.531889290012035, "grad_norm": Infinity, "learning_rate": 0.0001727734825479019, "loss": 11.2418, "step": 203860 }, { "epoch": 24.53309265944645, "grad_norm": Infinity, "learning_rate": 0.00017277087330405119, "loss": 11.1657, "step": 203870 }, { "epoch": 24.534296028880867, "grad_norm": Infinity, "learning_rate": 0.00017276826395488303, "loss": 11.1063, "step": 203880 }, { "epoch": 24.535499398315284, "grad_norm": Infinity, "learning_rate": 0.00017276565450040119, "loss": 11.1911, "step": 203890 }, { "epoch": 24.5367027677497, "grad_norm": Infinity, "learning_rate": 0.00017276304494060954, "loss": 11.0958, "step": 203900 }, { "epoch": 24.537906137184116, "grad_norm": Infinity, "learning_rate": 0.00017276043527551175, "loss": 11.1619, "step": 203910 }, { "epoch": 24.53910950661853, "grad_norm": Infinity, "learning_rate": 0.0001727578255051117, "loss": 11.1901, "step": 203920 }, { "epoch": 24.540312876052948, "grad_norm": Infinity, "learning_rate": 0.00017275521562941308, "loss": 11.2114, "step": 203930 }, { "epoch": 24.541516245487365, "grad_norm": Infinity, "learning_rate": 0.0001727526056484197, "loss": 11.1755, "step": 203940 }, { "epoch": 24.54271961492178, "grad_norm": Infinity, "learning_rate": 0.00017274999556213537, "loss": 11.0446, "step": 203950 }, { "epoch": 24.543922984356197, "grad_norm": Infinity, "learning_rate": 0.0001727473853705638, "loss": 11.1149, "step": 203960 }, { "epoch": 24.545126353790614, "grad_norm": Infinity, "learning_rate": 0.00017274477507370884, "loss": 11.1213, "step": 203970 }, { "epoch": 24.54632972322503, "grad_norm": Infinity, "learning_rate": 0.00017274216467157418, "loss": 11.1316, "step": 203980 }, { "epoch": 24.547533092659446, "grad_norm": Infinity, "learning_rate": 0.00017273955416416368, "loss": 11.26, "step": 203990 }, { "epoch": 24.548736462093864, "grad_norm": Infinity, "learning_rate": 0.00017273694355148108, "loss": 11.1334, "step": 204000 }, { "epoch": 24.549939831528278, "grad_norm": Infinity, "learning_rate": 0.00017273433283353015, "loss": 11.157, "step": 204010 }, { "epoch": 24.551143200962695, "grad_norm": Infinity, "learning_rate": 0.0001727317220103147, "loss": 11.1606, "step": 204020 }, { "epoch": 24.552346570397113, "grad_norm": Infinity, "learning_rate": 0.0001727291110818385, "loss": 11.1745, "step": 204030 }, { "epoch": 24.553549939831527, "grad_norm": Infinity, "learning_rate": 0.0001727265000481053, "loss": 11.0241, "step": 204040 }, { "epoch": 24.554753309265944, "grad_norm": Infinity, "learning_rate": 0.0001727238889091189, "loss": 11.1493, "step": 204050 }, { "epoch": 24.555956678700362, "grad_norm": Infinity, "learning_rate": 0.00017272127766488309, "loss": 11.1397, "step": 204060 }, { "epoch": 24.557160048134776, "grad_norm": Infinity, "learning_rate": 0.00017271866631540163, "loss": 11.1514, "step": 204070 }, { "epoch": 24.558363417569193, "grad_norm": Infinity, "learning_rate": 0.00017271605486067831, "loss": 11.1527, "step": 204080 }, { "epoch": 24.55956678700361, "grad_norm": Infinity, "learning_rate": 0.0001727134433007169, "loss": 11.1628, "step": 204090 }, { "epoch": 24.560770156438025, "grad_norm": Infinity, "learning_rate": 0.00017271083163552118, "loss": 11.1046, "step": 204100 }, { "epoch": 24.561973525872443, "grad_norm": Infinity, "learning_rate": 0.00017270821986509492, "loss": 11.1496, "step": 204110 }, { "epoch": 24.56317689530686, "grad_norm": Infinity, "learning_rate": 0.00017270560798944196, "loss": 10.9967, "step": 204120 }, { "epoch": 24.564380264741274, "grad_norm": Infinity, "learning_rate": 0.00017270299600856604, "loss": 11.1739, "step": 204130 }, { "epoch": 24.56558363417569, "grad_norm": Infinity, "learning_rate": 0.00017270038392247088, "loss": 11.2443, "step": 204140 }, { "epoch": 24.56678700361011, "grad_norm": Infinity, "learning_rate": 0.00017269777173116034, "loss": 11.1713, "step": 204150 }, { "epoch": 24.567990373044523, "grad_norm": Infinity, "learning_rate": 0.00017269515943463818, "loss": 11.2142, "step": 204160 }, { "epoch": 24.56919374247894, "grad_norm": Infinity, "learning_rate": 0.0001726925470329082, "loss": 11.1771, "step": 204170 }, { "epoch": 24.57039711191336, "grad_norm": Infinity, "learning_rate": 0.00017268993452597412, "loss": 11.226, "step": 204180 }, { "epoch": 24.571600481347772, "grad_norm": Infinity, "learning_rate": 0.00017268732191383975, "loss": 11.2209, "step": 204190 }, { "epoch": 24.57280385078219, "grad_norm": Infinity, "learning_rate": 0.0001726847091965089, "loss": 11.0481, "step": 204200 }, { "epoch": 24.574007220216608, "grad_norm": Infinity, "learning_rate": 0.00017268209637398536, "loss": 11.1864, "step": 204210 }, { "epoch": 24.57521058965102, "grad_norm": Infinity, "learning_rate": 0.00017267948344627286, "loss": 11.0929, "step": 204220 }, { "epoch": 24.57641395908544, "grad_norm": Infinity, "learning_rate": 0.0001726768704133752, "loss": 11.0729, "step": 204230 }, { "epoch": 24.577617328519857, "grad_norm": Infinity, "learning_rate": 0.00017267425727529614, "loss": 11.1272, "step": 204240 }, { "epoch": 24.57882069795427, "grad_norm": Infinity, "learning_rate": 0.0001726716440320395, "loss": 11.0649, "step": 204250 }, { "epoch": 24.58002406738869, "grad_norm": Infinity, "learning_rate": 0.00017266903068360908, "loss": 11.2332, "step": 204260 }, { "epoch": 24.581227436823106, "grad_norm": Infinity, "learning_rate": 0.00017266641723000858, "loss": 11.1989, "step": 204270 }, { "epoch": 24.58243080625752, "grad_norm": Infinity, "learning_rate": 0.0001726638036712419, "loss": 11.1747, "step": 204280 }, { "epoch": 24.583634175691937, "grad_norm": Infinity, "learning_rate": 0.00017266119000731268, "loss": 11.2561, "step": 204290 }, { "epoch": 24.584837545126355, "grad_norm": Infinity, "learning_rate": 0.00017265857623822485, "loss": 11.2219, "step": 204300 }, { "epoch": 24.58604091456077, "grad_norm": Infinity, "learning_rate": 0.00017265596236398206, "loss": 11.2434, "step": 204310 }, { "epoch": 24.587244283995187, "grad_norm": Infinity, "learning_rate": 0.0001726533483845882, "loss": 11.0539, "step": 204320 }, { "epoch": 24.588447653429604, "grad_norm": Infinity, "learning_rate": 0.00017265073430004698, "loss": 11.2424, "step": 204330 }, { "epoch": 24.589651022864018, "grad_norm": Infinity, "learning_rate": 0.0001726481201103622, "loss": 11.2003, "step": 204340 }, { "epoch": 24.590854392298436, "grad_norm": Infinity, "learning_rate": 0.0001726455058155377, "loss": 11.1297, "step": 204350 }, { "epoch": 24.592057761732853, "grad_norm": Infinity, "learning_rate": 0.00017264289141557718, "loss": 11.1516, "step": 204360 }, { "epoch": 24.593261131167267, "grad_norm": Infinity, "learning_rate": 0.00017264027691048448, "loss": 11.1794, "step": 204370 }, { "epoch": 24.594464500601685, "grad_norm": Infinity, "learning_rate": 0.00017263766230026336, "loss": 11.1801, "step": 204380 }, { "epoch": 24.595667870036102, "grad_norm": Infinity, "learning_rate": 0.00017263504758491762, "loss": 11.1979, "step": 204390 }, { "epoch": 24.596871239470516, "grad_norm": Infinity, "learning_rate": 0.000172632432764451, "loss": 11.097, "step": 204400 }, { "epoch": 24.598074608904934, "grad_norm": Infinity, "learning_rate": 0.00017262981783886734, "loss": 11.2459, "step": 204410 }, { "epoch": 24.59927797833935, "grad_norm": Infinity, "learning_rate": 0.0001726272028081704, "loss": 11.1592, "step": 204420 }, { "epoch": 24.600481347773766, "grad_norm": Infinity, "learning_rate": 0.00017262458767236397, "loss": 11.2419, "step": 204430 }, { "epoch": 24.601684717208183, "grad_norm": Infinity, "learning_rate": 0.00017262197243145182, "loss": 11.2337, "step": 204440 }, { "epoch": 24.6028880866426, "grad_norm": Infinity, "learning_rate": 0.00017261935708543774, "loss": 11.0574, "step": 204450 }, { "epoch": 24.604091456077015, "grad_norm": Infinity, "learning_rate": 0.00017261674163432554, "loss": 11.1017, "step": 204460 }, { "epoch": 24.605294825511432, "grad_norm": Infinity, "learning_rate": 0.000172614126078119, "loss": 11.1663, "step": 204470 }, { "epoch": 24.60649819494585, "grad_norm": Infinity, "learning_rate": 0.00017261151041682188, "loss": 10.9653, "step": 204480 }, { "epoch": 24.607701564380264, "grad_norm": Infinity, "learning_rate": 0.00017260889465043795, "loss": 11.1901, "step": 204490 }, { "epoch": 24.60890493381468, "grad_norm": Infinity, "learning_rate": 0.00017260627877897105, "loss": 11.1646, "step": 204500 }, { "epoch": 24.6101083032491, "grad_norm": Infinity, "learning_rate": 0.00017260366280242494, "loss": 11.0836, "step": 204510 }, { "epoch": 24.611311672683513, "grad_norm": Infinity, "learning_rate": 0.00017260104672080337, "loss": 11.3492, "step": 204520 }, { "epoch": 24.61251504211793, "grad_norm": Infinity, "learning_rate": 0.0001725984305341102, "loss": 11.1114, "step": 204530 }, { "epoch": 24.613718411552348, "grad_norm": Infinity, "learning_rate": 0.00017259581424234916, "loss": 11.2224, "step": 204540 }, { "epoch": 24.614921780986762, "grad_norm": Infinity, "learning_rate": 0.00017259319784552405, "loss": 11.0706, "step": 204550 }, { "epoch": 24.61612515042118, "grad_norm": Infinity, "learning_rate": 0.00017259058134363867, "loss": 11.0646, "step": 204560 }, { "epoch": 24.617328519855597, "grad_norm": Infinity, "learning_rate": 0.0001725879647366968, "loss": 11.1819, "step": 204570 }, { "epoch": 24.61853188929001, "grad_norm": Infinity, "learning_rate": 0.0001725853480247022, "loss": 11.23, "step": 204580 }, { "epoch": 24.61973525872443, "grad_norm": Infinity, "learning_rate": 0.00017258273120765872, "loss": 11.0573, "step": 204590 }, { "epoch": 24.620938628158846, "grad_norm": Infinity, "learning_rate": 0.00017258011428557008, "loss": 11.2542, "step": 204600 }, { "epoch": 24.62214199759326, "grad_norm": Infinity, "learning_rate": 0.0001725774972584401, "loss": 11.1398, "step": 204610 }, { "epoch": 24.623345367027678, "grad_norm": Infinity, "learning_rate": 0.00017257488012627255, "loss": 11.1744, "step": 204620 }, { "epoch": 24.624548736462096, "grad_norm": Infinity, "learning_rate": 0.00017257226288907126, "loss": 11.0909, "step": 204630 }, { "epoch": 24.62575210589651, "grad_norm": Infinity, "learning_rate": 0.00017256964554683996, "loss": 11.1488, "step": 204640 }, { "epoch": 24.626955475330927, "grad_norm": Infinity, "learning_rate": 0.0001725670280995825, "loss": 11.1572, "step": 204650 }, { "epoch": 24.628158844765345, "grad_norm": Infinity, "learning_rate": 0.00017256441054730258, "loss": 11.065, "step": 204660 }, { "epoch": 24.62936221419976, "grad_norm": Infinity, "learning_rate": 0.0001725617928900041, "loss": 11.1162, "step": 204670 }, { "epoch": 24.630565583634176, "grad_norm": Infinity, "learning_rate": 0.00017255917512769075, "loss": 11.0603, "step": 204680 }, { "epoch": 24.63176895306859, "grad_norm": Infinity, "learning_rate": 0.00017255655726036636, "loss": 11.0942, "step": 204690 }, { "epoch": 24.632972322503008, "grad_norm": Infinity, "learning_rate": 0.00017255393928803473, "loss": 11.0301, "step": 204700 }, { "epoch": 24.634175691937426, "grad_norm": Infinity, "learning_rate": 0.00017255132121069968, "loss": 11.0594, "step": 204710 }, { "epoch": 24.63537906137184, "grad_norm": Infinity, "learning_rate": 0.0001725487030283649, "loss": 11.0392, "step": 204720 }, { "epoch": 24.636582430806257, "grad_norm": Infinity, "learning_rate": 0.00017254608474103424, "loss": 11.0384, "step": 204730 }, { "epoch": 24.637785800240675, "grad_norm": Infinity, "learning_rate": 0.0001725434663487115, "loss": 11.3137, "step": 204740 }, { "epoch": 24.63898916967509, "grad_norm": Infinity, "learning_rate": 0.00017254084785140047, "loss": 11.2037, "step": 204750 }, { "epoch": 24.640192539109506, "grad_norm": Infinity, "learning_rate": 0.0001725382292491049, "loss": 11.1143, "step": 204760 }, { "epoch": 24.641395908543924, "grad_norm": Infinity, "learning_rate": 0.00017253561054182857, "loss": 11.2488, "step": 204770 }, { "epoch": 24.642599277978338, "grad_norm": Infinity, "learning_rate": 0.00017253299172957537, "loss": 11.1119, "step": 204780 }, { "epoch": 24.643802647412755, "grad_norm": Infinity, "learning_rate": 0.00017253037281234897, "loss": 11.1652, "step": 204790 }, { "epoch": 24.645006016847173, "grad_norm": Infinity, "learning_rate": 0.00017252775379015324, "loss": 11.2104, "step": 204800 }, { "epoch": 24.646209386281587, "grad_norm": Infinity, "learning_rate": 0.00017252513466299197, "loss": 11.0869, "step": 204810 }, { "epoch": 24.647412755716005, "grad_norm": Infinity, "learning_rate": 0.00017252251543086887, "loss": 11.1367, "step": 204820 }, { "epoch": 24.648616125150422, "grad_norm": Infinity, "learning_rate": 0.00017251989609378782, "loss": 11.2158, "step": 204830 }, { "epoch": 24.649819494584836, "grad_norm": Infinity, "learning_rate": 0.00017251727665175255, "loss": 11.2252, "step": 204840 }, { "epoch": 24.651022864019254, "grad_norm": Infinity, "learning_rate": 0.0001725146571047669, "loss": 11.1481, "step": 204850 }, { "epoch": 24.65222623345367, "grad_norm": Infinity, "learning_rate": 0.0001725120374528346, "loss": 11.1409, "step": 204860 }, { "epoch": 24.653429602888085, "grad_norm": Infinity, "learning_rate": 0.00017250941769595955, "loss": 11.1508, "step": 204870 }, { "epoch": 24.654632972322503, "grad_norm": Infinity, "learning_rate": 0.0001725067978341454, "loss": 11.2665, "step": 204880 }, { "epoch": 24.65583634175692, "grad_norm": Infinity, "learning_rate": 0.00017250417786739605, "loss": 11.2422, "step": 204890 }, { "epoch": 24.657039711191334, "grad_norm": Infinity, "learning_rate": 0.00017250155779571525, "loss": 11.1574, "step": 204900 }, { "epoch": 24.658243080625752, "grad_norm": Infinity, "learning_rate": 0.00017249893761910677, "loss": 11.2169, "step": 204910 }, { "epoch": 24.65944645006017, "grad_norm": Infinity, "learning_rate": 0.00017249631733757444, "loss": 10.966, "step": 204920 }, { "epoch": 24.660649819494584, "grad_norm": Infinity, "learning_rate": 0.00017249369695112208, "loss": 11.1227, "step": 204930 }, { "epoch": 24.661853188929, "grad_norm": Infinity, "learning_rate": 0.0001724910764597534, "loss": 11.087, "step": 204940 }, { "epoch": 24.66305655836342, "grad_norm": Infinity, "learning_rate": 0.00017248845586347223, "loss": 11.2986, "step": 204950 }, { "epoch": 24.664259927797833, "grad_norm": Infinity, "learning_rate": 0.0001724858351622824, "loss": 11.0763, "step": 204960 }, { "epoch": 24.66546329723225, "grad_norm": Infinity, "learning_rate": 0.00017248321435618763, "loss": 11.0677, "step": 204970 }, { "epoch": 24.666666666666668, "grad_norm": Infinity, "learning_rate": 0.00017248059344519178, "loss": 11.077, "step": 204980 }, { "epoch": 24.667870036101082, "grad_norm": Infinity, "learning_rate": 0.00017247797242929863, "loss": 11.2168, "step": 204990 }, { "epoch": 24.6690734055355, "grad_norm": Infinity, "learning_rate": 0.00017247535130851194, "loss": 11.1452, "step": 205000 }, { "epoch": 24.670276774969917, "grad_norm": Infinity, "learning_rate": 0.00017247273008283553, "loss": 11.0917, "step": 205010 }, { "epoch": 24.67148014440433, "grad_norm": Infinity, "learning_rate": 0.00017247010875227317, "loss": 11.0937, "step": 205020 }, { "epoch": 24.67268351383875, "grad_norm": Infinity, "learning_rate": 0.0001724674873168287, "loss": 11.1087, "step": 205030 }, { "epoch": 24.673886883273166, "grad_norm": Infinity, "learning_rate": 0.00017246486577650587, "loss": 11.2592, "step": 205040 }, { "epoch": 24.67509025270758, "grad_norm": Infinity, "learning_rate": 0.00017246224413130845, "loss": 11.1693, "step": 205050 }, { "epoch": 24.676293622141998, "grad_norm": Infinity, "learning_rate": 0.00017245962238124035, "loss": 11.1687, "step": 205060 }, { "epoch": 24.677496991576415, "grad_norm": Infinity, "learning_rate": 0.00017245700052630523, "loss": 11.0862, "step": 205070 }, { "epoch": 24.67870036101083, "grad_norm": Infinity, "learning_rate": 0.00017245437856650694, "loss": 11.0493, "step": 205080 }, { "epoch": 24.679903730445247, "grad_norm": Infinity, "learning_rate": 0.00017245175650184932, "loss": 11.2379, "step": 205090 }, { "epoch": 24.681107099879664, "grad_norm": Infinity, "learning_rate": 0.00017244913433233606, "loss": 11.0883, "step": 205100 }, { "epoch": 24.68231046931408, "grad_norm": Infinity, "learning_rate": 0.00017244651205797107, "loss": 11.1097, "step": 205110 }, { "epoch": 24.683513838748496, "grad_norm": Infinity, "learning_rate": 0.00017244388967875804, "loss": 11.0599, "step": 205120 }, { "epoch": 24.684717208182914, "grad_norm": Infinity, "learning_rate": 0.00017244126719470084, "loss": 11.2164, "step": 205130 }, { "epoch": 24.685920577617328, "grad_norm": Infinity, "learning_rate": 0.00017243864460580327, "loss": 11.1807, "step": 205140 }, { "epoch": 24.687123947051745, "grad_norm": Infinity, "learning_rate": 0.00017243602191206904, "loss": 11.1286, "step": 205150 }, { "epoch": 24.688327316486163, "grad_norm": Infinity, "learning_rate": 0.00017243339911350203, "loss": 11.1287, "step": 205160 }, { "epoch": 24.689530685920577, "grad_norm": Infinity, "learning_rate": 0.000172430776210106, "loss": 11.126, "step": 205170 }, { "epoch": 24.690734055354994, "grad_norm": Infinity, "learning_rate": 0.0001724281532018848, "loss": 11.174, "step": 205180 }, { "epoch": 24.691937424789412, "grad_norm": Infinity, "learning_rate": 0.00017242553008884216, "loss": 11.2406, "step": 205190 }, { "epoch": 24.693140794223826, "grad_norm": Infinity, "learning_rate": 0.00017242290687098186, "loss": 11.1088, "step": 205200 }, { "epoch": 24.694344163658243, "grad_norm": Infinity, "learning_rate": 0.00017242028354830775, "loss": 11.1296, "step": 205210 }, { "epoch": 24.69554753309266, "grad_norm": Infinity, "learning_rate": 0.0001724176601208236, "loss": 11.2166, "step": 205220 }, { "epoch": 24.696750902527075, "grad_norm": Infinity, "learning_rate": 0.00017241503658853326, "loss": 11.1493, "step": 205230 }, { "epoch": 24.697954271961493, "grad_norm": Infinity, "learning_rate": 0.00017241241295144047, "loss": 11.2224, "step": 205240 }, { "epoch": 24.69915764139591, "grad_norm": Infinity, "learning_rate": 0.000172409789209549, "loss": 11.1077, "step": 205250 }, { "epoch": 24.700361010830324, "grad_norm": Infinity, "learning_rate": 0.00017240716536286272, "loss": 11.0537, "step": 205260 }, { "epoch": 24.70156438026474, "grad_norm": Infinity, "learning_rate": 0.00017240454141138537, "loss": 11.1137, "step": 205270 }, { "epoch": 24.70276774969916, "grad_norm": Infinity, "learning_rate": 0.00017240191735512083, "loss": 11.1246, "step": 205280 }, { "epoch": 24.703971119133573, "grad_norm": Infinity, "learning_rate": 0.0001723992931940728, "loss": 11.0609, "step": 205290 }, { "epoch": 24.70517448856799, "grad_norm": Infinity, "learning_rate": 0.0001723966689282451, "loss": 11.1098, "step": 205300 }, { "epoch": 24.706377858002405, "grad_norm": Infinity, "learning_rate": 0.00017239404455764157, "loss": 11.1885, "step": 205310 }, { "epoch": 24.707581227436823, "grad_norm": Infinity, "learning_rate": 0.000172391420082266, "loss": 11.1716, "step": 205320 }, { "epoch": 24.70878459687124, "grad_norm": Infinity, "learning_rate": 0.00017238879550212212, "loss": 11.1309, "step": 205330 }, { "epoch": 24.709987966305654, "grad_norm": Infinity, "learning_rate": 0.00017238617081721382, "loss": 11.1303, "step": 205340 }, { "epoch": 24.71119133574007, "grad_norm": Infinity, "learning_rate": 0.00017238354602754487, "loss": 11.1221, "step": 205350 }, { "epoch": 24.71239470517449, "grad_norm": Infinity, "learning_rate": 0.000172380921133119, "loss": 11.1669, "step": 205360 }, { "epoch": 24.713598074608903, "grad_norm": Infinity, "learning_rate": 0.00017237829613394015, "loss": 11.037, "step": 205370 }, { "epoch": 24.71480144404332, "grad_norm": Infinity, "learning_rate": 0.00017237567103001197, "loss": 11.1466, "step": 205380 }, { "epoch": 24.71600481347774, "grad_norm": Infinity, "learning_rate": 0.00017237304582133834, "loss": 11.2774, "step": 205390 }, { "epoch": 24.717208182912152, "grad_norm": Infinity, "learning_rate": 0.00017237042050792306, "loss": 11.186, "step": 205400 }, { "epoch": 24.71841155234657, "grad_norm": Infinity, "learning_rate": 0.0001723677950897699, "loss": 11.2295, "step": 205410 }, { "epoch": 24.719614921780988, "grad_norm": Infinity, "learning_rate": 0.00017236516956688268, "loss": 11.126, "step": 205420 }, { "epoch": 24.7208182912154, "grad_norm": Infinity, "learning_rate": 0.0001723625439392652, "loss": 11.0745, "step": 205430 }, { "epoch": 24.72202166064982, "grad_norm": Infinity, "learning_rate": 0.00017235991820692124, "loss": 11.1428, "step": 205440 }, { "epoch": 24.723225030084237, "grad_norm": Infinity, "learning_rate": 0.00017235729236985463, "loss": 11.1674, "step": 205450 }, { "epoch": 24.72442839951865, "grad_norm": Infinity, "learning_rate": 0.0001723546664280691, "loss": 11.2736, "step": 205460 }, { "epoch": 24.72563176895307, "grad_norm": Infinity, "learning_rate": 0.0001723520403815686, "loss": 10.9804, "step": 205470 }, { "epoch": 24.726835138387486, "grad_norm": Infinity, "learning_rate": 0.00017234941423035677, "loss": 11.2009, "step": 205480 }, { "epoch": 24.7280385078219, "grad_norm": Infinity, "learning_rate": 0.0001723467879744375, "loss": 11.1039, "step": 205490 }, { "epoch": 24.729241877256317, "grad_norm": Infinity, "learning_rate": 0.00017234416161381454, "loss": 11.1822, "step": 205500 }, { "epoch": 24.730445246690735, "grad_norm": Infinity, "learning_rate": 0.00017234153514849172, "loss": 11.2578, "step": 205510 }, { "epoch": 24.73164861612515, "grad_norm": Infinity, "learning_rate": 0.00017233890857847286, "loss": 11.1938, "step": 205520 }, { "epoch": 24.732851985559567, "grad_norm": Infinity, "learning_rate": 0.0001723362819037617, "loss": 11.1589, "step": 205530 }, { "epoch": 24.734055354993984, "grad_norm": Infinity, "learning_rate": 0.0001723336551243621, "loss": 11.1152, "step": 205540 }, { "epoch": 24.735258724428398, "grad_norm": Infinity, "learning_rate": 0.00017233102824027786, "loss": 11.0362, "step": 205550 }, { "epoch": 24.736462093862816, "grad_norm": Infinity, "learning_rate": 0.00017232840125151278, "loss": 11.0834, "step": 205560 }, { "epoch": 24.737665463297233, "grad_norm": Infinity, "learning_rate": 0.0001723257741580706, "loss": 11.1979, "step": 205570 }, { "epoch": 24.738868832731647, "grad_norm": Infinity, "learning_rate": 0.0001723231469599552, "loss": 11.1567, "step": 205580 }, { "epoch": 24.740072202166065, "grad_norm": Infinity, "learning_rate": 0.00017232051965717033, "loss": 11.1166, "step": 205590 }, { "epoch": 24.741275571600482, "grad_norm": Infinity, "learning_rate": 0.0001723178922497198, "loss": 11.0169, "step": 205600 }, { "epoch": 24.742478941034896, "grad_norm": Infinity, "learning_rate": 0.00017231526473760747, "loss": 11.1624, "step": 205610 }, { "epoch": 24.743682310469314, "grad_norm": Infinity, "learning_rate": 0.00017231263712083709, "loss": 11.0408, "step": 205620 }, { "epoch": 24.74488567990373, "grad_norm": Infinity, "learning_rate": 0.0001723100093994124, "loss": 11.2233, "step": 205630 }, { "epoch": 24.746089049338146, "grad_norm": Infinity, "learning_rate": 0.00017230738157333737, "loss": 11.1705, "step": 205640 }, { "epoch": 24.747292418772563, "grad_norm": Infinity, "learning_rate": 0.00017230475364261568, "loss": 11.0758, "step": 205650 }, { "epoch": 24.74849578820698, "grad_norm": Infinity, "learning_rate": 0.00017230212560725113, "loss": 11.023, "step": 205660 }, { "epoch": 24.749699157641395, "grad_norm": Infinity, "learning_rate": 0.00017229949746724757, "loss": 11.0784, "step": 205670 }, { "epoch": 24.750902527075812, "grad_norm": Infinity, "learning_rate": 0.00017229686922260877, "loss": 11.1199, "step": 205680 }, { "epoch": 24.75210589651023, "grad_norm": Infinity, "learning_rate": 0.00017229424087333856, "loss": 11.1881, "step": 205690 }, { "epoch": 24.753309265944644, "grad_norm": Infinity, "learning_rate": 0.00017229161241944074, "loss": 11.1362, "step": 205700 }, { "epoch": 24.75451263537906, "grad_norm": Infinity, "learning_rate": 0.00017228898386091913, "loss": 11.0152, "step": 205710 }, { "epoch": 24.75571600481348, "grad_norm": Infinity, "learning_rate": 0.0001722863551977775, "loss": 11.1821, "step": 205720 }, { "epoch": 24.756919374247893, "grad_norm": Infinity, "learning_rate": 0.00017228372643001964, "loss": 11.095, "step": 205730 }, { "epoch": 24.75812274368231, "grad_norm": Infinity, "learning_rate": 0.00017228109755764942, "loss": 11.2645, "step": 205740 }, { "epoch": 24.759326113116728, "grad_norm": Infinity, "learning_rate": 0.00017227846858067057, "loss": 11.0042, "step": 205750 }, { "epoch": 24.760529482551142, "grad_norm": Infinity, "learning_rate": 0.00017227583949908695, "loss": 11.0449, "step": 205760 }, { "epoch": 24.76173285198556, "grad_norm": Infinity, "learning_rate": 0.00017227321031290234, "loss": 11.2319, "step": 205770 }, { "epoch": 24.762936221419977, "grad_norm": Infinity, "learning_rate": 0.00017227058102212058, "loss": 11.1308, "step": 205780 }, { "epoch": 24.76413959085439, "grad_norm": Infinity, "learning_rate": 0.0001722679516267454, "loss": 11.1714, "step": 205790 }, { "epoch": 24.76534296028881, "grad_norm": Infinity, "learning_rate": 0.00017226532212678066, "loss": 11.0361, "step": 205800 }, { "epoch": 24.766546329723226, "grad_norm": Infinity, "learning_rate": 0.00017226269252223017, "loss": 11.2833, "step": 205810 }, { "epoch": 24.76774969915764, "grad_norm": Infinity, "learning_rate": 0.00017226006281309775, "loss": 11.0983, "step": 205820 }, { "epoch": 24.768953068592058, "grad_norm": Infinity, "learning_rate": 0.00017225743299938712, "loss": 11.1982, "step": 205830 }, { "epoch": 24.770156438026476, "grad_norm": Infinity, "learning_rate": 0.0001722548030811022, "loss": 11.2026, "step": 205840 }, { "epoch": 24.77135980746089, "grad_norm": Infinity, "learning_rate": 0.0001722521730582467, "loss": 11.1458, "step": 205850 }, { "epoch": 24.772563176895307, "grad_norm": Infinity, "learning_rate": 0.00017224954293082448, "loss": 11.2007, "step": 205860 }, { "epoch": 24.773766546329725, "grad_norm": Infinity, "learning_rate": 0.00017224691269883933, "loss": 11.1958, "step": 205870 }, { "epoch": 24.77496991576414, "grad_norm": Infinity, "learning_rate": 0.00017224428236229508, "loss": 11.1364, "step": 205880 }, { "epoch": 24.776173285198556, "grad_norm": Infinity, "learning_rate": 0.00017224165192119553, "loss": 11.0171, "step": 205890 }, { "epoch": 24.777376654632974, "grad_norm": Infinity, "learning_rate": 0.00017223902137554443, "loss": 11.0969, "step": 205900 }, { "epoch": 24.778580024067388, "grad_norm": Infinity, "learning_rate": 0.00017223639072534564, "loss": 11.071, "step": 205910 }, { "epoch": 24.779783393501805, "grad_norm": Infinity, "learning_rate": 0.00017223375997060298, "loss": 11.232, "step": 205920 }, { "epoch": 24.780986762936223, "grad_norm": Infinity, "learning_rate": 0.0001722311291113202, "loss": 11.0746, "step": 205930 }, { "epoch": 24.782190132370637, "grad_norm": Infinity, "learning_rate": 0.00017222849814750116, "loss": 11.0144, "step": 205940 }, { "epoch": 24.783393501805055, "grad_norm": Infinity, "learning_rate": 0.00017222586707914966, "loss": 11.1086, "step": 205950 }, { "epoch": 24.784596871239472, "grad_norm": Infinity, "learning_rate": 0.00017222323590626953, "loss": 11.2092, "step": 205960 }, { "epoch": 24.785800240673886, "grad_norm": Infinity, "learning_rate": 0.0001722206046288645, "loss": 11.1945, "step": 205970 }, { "epoch": 24.787003610108304, "grad_norm": Infinity, "learning_rate": 0.00017221797324693843, "loss": 11.0865, "step": 205980 }, { "epoch": 24.78820697954272, "grad_norm": Infinity, "learning_rate": 0.00017221534176049512, "loss": 11.1573, "step": 205990 }, { "epoch": 24.789410348977135, "grad_norm": Infinity, "learning_rate": 0.0001722127101695384, "loss": 11.1529, "step": 206000 }, { "epoch": 24.790613718411553, "grad_norm": Infinity, "learning_rate": 0.00017221007847407205, "loss": 11.1101, "step": 206010 }, { "epoch": 24.79181708784597, "grad_norm": Infinity, "learning_rate": 0.0001722074466740999, "loss": 11.0971, "step": 206020 }, { "epoch": 24.793020457280385, "grad_norm": Infinity, "learning_rate": 0.0001722048147696257, "loss": 11.2051, "step": 206030 }, { "epoch": 24.794223826714802, "grad_norm": Infinity, "learning_rate": 0.00017220218276065333, "loss": 11.1422, "step": 206040 }, { "epoch": 24.79542719614922, "grad_norm": Infinity, "learning_rate": 0.0001721995506471866, "loss": 11.1363, "step": 206050 }, { "epoch": 24.796630565583634, "grad_norm": Infinity, "learning_rate": 0.00017219691842922926, "loss": 11.1196, "step": 206060 }, { "epoch": 24.79783393501805, "grad_norm": Infinity, "learning_rate": 0.0001721942861067852, "loss": 11.1301, "step": 206070 }, { "epoch": 24.799037304452465, "grad_norm": Infinity, "learning_rate": 0.00017219165367985813, "loss": 11.0421, "step": 206080 }, { "epoch": 24.800240673886883, "grad_norm": Infinity, "learning_rate": 0.00017218902114845197, "loss": 11.1288, "step": 206090 }, { "epoch": 24.8014440433213, "grad_norm": Infinity, "learning_rate": 0.00017218638851257046, "loss": 11.0956, "step": 206100 }, { "epoch": 24.802647412755714, "grad_norm": Infinity, "learning_rate": 0.0001721837557722174, "loss": 11.2543, "step": 206110 }, { "epoch": 24.803850782190132, "grad_norm": Infinity, "learning_rate": 0.00017218112292739663, "loss": 11.1714, "step": 206120 }, { "epoch": 24.80505415162455, "grad_norm": Infinity, "learning_rate": 0.00017217848997811195, "loss": 11.1211, "step": 206130 }, { "epoch": 24.806257521058964, "grad_norm": Infinity, "learning_rate": 0.00017217585692436718, "loss": 11.044, "step": 206140 }, { "epoch": 24.80746089049338, "grad_norm": Infinity, "learning_rate": 0.00017217322376616613, "loss": 10.9925, "step": 206150 }, { "epoch": 24.8086642599278, "grad_norm": Infinity, "learning_rate": 0.0001721705905035126, "loss": 11.0859, "step": 206160 }, { "epoch": 24.809867629362213, "grad_norm": Infinity, "learning_rate": 0.00017216795713641042, "loss": 11.3012, "step": 206170 }, { "epoch": 24.81107099879663, "grad_norm": Infinity, "learning_rate": 0.0001721653236648634, "loss": 11.1771, "step": 206180 }, { "epoch": 24.812274368231048, "grad_norm": Infinity, "learning_rate": 0.00017216269008887532, "loss": 11.1421, "step": 206190 }, { "epoch": 24.813477737665462, "grad_norm": Infinity, "learning_rate": 0.00017216005640845, "loss": 11.0431, "step": 206200 }, { "epoch": 24.81468110709988, "grad_norm": Infinity, "learning_rate": 0.00017215742262359127, "loss": 11.1245, "step": 206210 }, { "epoch": 24.815884476534297, "grad_norm": Infinity, "learning_rate": 0.00017215478873430293, "loss": 11.143, "step": 206220 }, { "epoch": 24.81708784596871, "grad_norm": Infinity, "learning_rate": 0.0001721521547405888, "loss": 11.0964, "step": 206230 }, { "epoch": 24.81829121540313, "grad_norm": Infinity, "learning_rate": 0.0001721495206424527, "loss": 11.1142, "step": 206240 }, { "epoch": 24.819494584837546, "grad_norm": Infinity, "learning_rate": 0.00017214688643989843, "loss": 10.9466, "step": 206250 }, { "epoch": 24.82069795427196, "grad_norm": Infinity, "learning_rate": 0.00017214425213292977, "loss": 11.2159, "step": 206260 }, { "epoch": 24.821901323706378, "grad_norm": Infinity, "learning_rate": 0.0001721416177215506, "loss": 11.0813, "step": 206270 }, { "epoch": 24.823104693140795, "grad_norm": Infinity, "learning_rate": 0.00017213898320576467, "loss": 11.1662, "step": 206280 }, { "epoch": 24.82430806257521, "grad_norm": Infinity, "learning_rate": 0.00017213634858557586, "loss": 11.182, "step": 206290 }, { "epoch": 24.825511432009627, "grad_norm": Infinity, "learning_rate": 0.00017213371386098791, "loss": 11.1403, "step": 206300 }, { "epoch": 24.826714801444044, "grad_norm": Infinity, "learning_rate": 0.00017213107903200466, "loss": 11.1672, "step": 206310 }, { "epoch": 24.82791817087846, "grad_norm": Infinity, "learning_rate": 0.00017212844409862998, "loss": 11.102, "step": 206320 }, { "epoch": 24.829121540312876, "grad_norm": Infinity, "learning_rate": 0.0001721258090608676, "loss": 11.0374, "step": 206330 }, { "epoch": 24.830324909747294, "grad_norm": Infinity, "learning_rate": 0.00017212317391872138, "loss": 11.1039, "step": 206340 }, { "epoch": 24.831528279181708, "grad_norm": Infinity, "learning_rate": 0.0001721205386721951, "loss": 11.1424, "step": 206350 }, { "epoch": 24.832731648616125, "grad_norm": Infinity, "learning_rate": 0.0001721179033212926, "loss": 11.2306, "step": 206360 }, { "epoch": 24.833935018050543, "grad_norm": Infinity, "learning_rate": 0.0001721152678660177, "loss": 11.1831, "step": 206370 }, { "epoch": 24.835138387484957, "grad_norm": Infinity, "learning_rate": 0.0001721126323063742, "loss": 11.0905, "step": 206380 }, { "epoch": 24.836341756919374, "grad_norm": Infinity, "learning_rate": 0.0001721099966423659, "loss": 11.1135, "step": 206390 }, { "epoch": 24.837545126353792, "grad_norm": Infinity, "learning_rate": 0.00017210736087399666, "loss": 11.1013, "step": 206400 }, { "epoch": 24.838748495788206, "grad_norm": Infinity, "learning_rate": 0.00017210472500127025, "loss": 11.274, "step": 206410 }, { "epoch": 24.839951865222623, "grad_norm": Infinity, "learning_rate": 0.00017210208902419049, "loss": 11.1311, "step": 206420 }, { "epoch": 24.84115523465704, "grad_norm": Infinity, "learning_rate": 0.00017209945294276123, "loss": 11.1534, "step": 206430 }, { "epoch": 24.842358604091455, "grad_norm": Infinity, "learning_rate": 0.00017209681675698624, "loss": 11.1009, "step": 206440 }, { "epoch": 24.843561973525873, "grad_norm": Infinity, "learning_rate": 0.00017209418046686934, "loss": 11.2222, "step": 206450 }, { "epoch": 24.84476534296029, "grad_norm": Infinity, "learning_rate": 0.0001720915440724144, "loss": 11.178, "step": 206460 }, { "epoch": 24.845968712394704, "grad_norm": Infinity, "learning_rate": 0.00017208890757362518, "loss": 11.2219, "step": 206470 }, { "epoch": 24.84717208182912, "grad_norm": Infinity, "learning_rate": 0.0001720862709705055, "loss": 11.0898, "step": 206480 }, { "epoch": 24.84837545126354, "grad_norm": Infinity, "learning_rate": 0.00017208363426305921, "loss": 11.2325, "step": 206490 }, { "epoch": 24.849578820697953, "grad_norm": Infinity, "learning_rate": 0.00017208099745129012, "loss": 11.1899, "step": 206500 }, { "epoch": 24.85078219013237, "grad_norm": Infinity, "learning_rate": 0.000172078360535202, "loss": 11.2145, "step": 206510 }, { "epoch": 24.85198555956679, "grad_norm": Infinity, "learning_rate": 0.0001720757235147987, "loss": 11.1796, "step": 206520 }, { "epoch": 24.853188929001202, "grad_norm": Infinity, "learning_rate": 0.00017207308639008404, "loss": 11.1751, "step": 206530 }, { "epoch": 24.85439229843562, "grad_norm": Infinity, "learning_rate": 0.0001720704491610618, "loss": 11.2955, "step": 206540 }, { "epoch": 24.855595667870038, "grad_norm": Infinity, "learning_rate": 0.00017206781182773587, "loss": 11.1943, "step": 206550 }, { "epoch": 24.85679903730445, "grad_norm": Infinity, "learning_rate": 0.00017206517439011002, "loss": 11.1047, "step": 206560 }, { "epoch": 24.85800240673887, "grad_norm": Infinity, "learning_rate": 0.00017206253684818806, "loss": 11.1515, "step": 206570 }, { "epoch": 24.859205776173287, "grad_norm": Infinity, "learning_rate": 0.00017205989920197383, "loss": 11.0454, "step": 206580 }, { "epoch": 24.8604091456077, "grad_norm": Infinity, "learning_rate": 0.0001720572614514711, "loss": 11.1421, "step": 206590 }, { "epoch": 24.86161251504212, "grad_norm": Infinity, "learning_rate": 0.00017205462359668375, "loss": 11.1458, "step": 206600 }, { "epoch": 24.862815884476536, "grad_norm": Infinity, "learning_rate": 0.00017205198563761558, "loss": 11.0541, "step": 206610 }, { "epoch": 24.86401925391095, "grad_norm": Infinity, "learning_rate": 0.00017204934757427036, "loss": 11.0889, "step": 206620 }, { "epoch": 24.865222623345367, "grad_norm": Infinity, "learning_rate": 0.00017204670940665196, "loss": 11.2054, "step": 206630 }, { "epoch": 24.866425992779785, "grad_norm": Infinity, "learning_rate": 0.0001720440711347642, "loss": 11.1749, "step": 206640 }, { "epoch": 24.8676293622142, "grad_norm": Infinity, "learning_rate": 0.00017204143275861088, "loss": 11.1575, "step": 206650 }, { "epoch": 24.868832731648617, "grad_norm": Infinity, "learning_rate": 0.0001720387942781958, "loss": 11.0526, "step": 206660 }, { "epoch": 24.870036101083034, "grad_norm": Infinity, "learning_rate": 0.0001720361556935228, "loss": 11.1868, "step": 206670 }, { "epoch": 24.871239470517448, "grad_norm": Infinity, "learning_rate": 0.00017203351700459572, "loss": 11.2091, "step": 206680 }, { "epoch": 24.872442839951866, "grad_norm": Infinity, "learning_rate": 0.00017203087821141835, "loss": 11.0993, "step": 206690 }, { "epoch": 24.87364620938628, "grad_norm": Infinity, "learning_rate": 0.0001720282393139945, "loss": 11.0359, "step": 206700 }, { "epoch": 24.874849578820697, "grad_norm": Infinity, "learning_rate": 0.00017202560031232803, "loss": 11.096, "step": 206710 }, { "epoch": 24.876052948255115, "grad_norm": Infinity, "learning_rate": 0.0001720229612064227, "loss": 11.1901, "step": 206720 }, { "epoch": 24.87725631768953, "grad_norm": Infinity, "learning_rate": 0.00017202032199628236, "loss": 11.2294, "step": 206730 }, { "epoch": 24.878459687123947, "grad_norm": Infinity, "learning_rate": 0.00017201768268191087, "loss": 11.1205, "step": 206740 }, { "epoch": 24.879663056558364, "grad_norm": Infinity, "learning_rate": 0.00017201504326331199, "loss": 11.0705, "step": 206750 }, { "epoch": 24.880866425992778, "grad_norm": Infinity, "learning_rate": 0.0001720124037404896, "loss": 11.1353, "step": 206760 }, { "epoch": 24.882069795427196, "grad_norm": Infinity, "learning_rate": 0.00017200976411344744, "loss": 11.0561, "step": 206770 }, { "epoch": 24.883273164861613, "grad_norm": Infinity, "learning_rate": 0.00017200712438218937, "loss": 11.1002, "step": 206780 }, { "epoch": 24.884476534296027, "grad_norm": Infinity, "learning_rate": 0.00017200448454671925, "loss": 11.1291, "step": 206790 }, { "epoch": 24.885679903730445, "grad_norm": Infinity, "learning_rate": 0.00017200184460704082, "loss": 11.2082, "step": 206800 }, { "epoch": 24.886883273164862, "grad_norm": Infinity, "learning_rate": 0.000171999204563158, "loss": 11.1146, "step": 206810 }, { "epoch": 24.888086642599276, "grad_norm": Infinity, "learning_rate": 0.0001719965644150745, "loss": 11.1087, "step": 206820 }, { "epoch": 24.889290012033694, "grad_norm": Infinity, "learning_rate": 0.00017199392416279423, "loss": 11.2525, "step": 206830 }, { "epoch": 24.89049338146811, "grad_norm": Infinity, "learning_rate": 0.00017199128380632095, "loss": 11.1837, "step": 206840 }, { "epoch": 24.891696750902526, "grad_norm": Infinity, "learning_rate": 0.00017198864334565852, "loss": 11.148, "step": 206850 }, { "epoch": 24.892900120336943, "grad_norm": Infinity, "learning_rate": 0.00017198600278081077, "loss": 11.1606, "step": 206860 }, { "epoch": 24.89410348977136, "grad_norm": Infinity, "learning_rate": 0.00017198336211178146, "loss": 11.1392, "step": 206870 }, { "epoch": 24.895306859205775, "grad_norm": Infinity, "learning_rate": 0.00017198072133857445, "loss": 11.1581, "step": 206880 }, { "epoch": 24.896510228640192, "grad_norm": Infinity, "learning_rate": 0.0001719780804611936, "loss": 11.1352, "step": 206890 }, { "epoch": 24.89771359807461, "grad_norm": Infinity, "learning_rate": 0.0001719754394796427, "loss": 11.1719, "step": 206900 }, { "epoch": 24.898916967509024, "grad_norm": Infinity, "learning_rate": 0.00017197279839392557, "loss": 11.1446, "step": 206910 }, { "epoch": 24.90012033694344, "grad_norm": Infinity, "learning_rate": 0.000171970157204046, "loss": 11.0143, "step": 206920 }, { "epoch": 24.90132370637786, "grad_norm": Infinity, "learning_rate": 0.00017196751591000786, "loss": 11.2199, "step": 206930 }, { "epoch": 24.902527075812273, "grad_norm": Infinity, "learning_rate": 0.00017196487451181494, "loss": 11.2317, "step": 206940 }, { "epoch": 24.90373044524669, "grad_norm": Infinity, "learning_rate": 0.00017196223300947112, "loss": 11.1998, "step": 206950 }, { "epoch": 24.904933814681108, "grad_norm": Infinity, "learning_rate": 0.00017195959140298017, "loss": 11.2029, "step": 206960 }, { "epoch": 24.906137184115522, "grad_norm": Infinity, "learning_rate": 0.0001719569496923459, "loss": 11.12, "step": 206970 }, { "epoch": 24.90734055354994, "grad_norm": Infinity, "learning_rate": 0.0001719543078775722, "loss": 11.0668, "step": 206980 }, { "epoch": 24.908543922984357, "grad_norm": Infinity, "learning_rate": 0.00017195166595866278, "loss": 11.0521, "step": 206990 }, { "epoch": 24.90974729241877, "grad_norm": Infinity, "learning_rate": 0.00017194902393562157, "loss": 11.1313, "step": 207000 }, { "epoch": 24.91095066185319, "grad_norm": Infinity, "learning_rate": 0.0001719463818084524, "loss": 11.0913, "step": 207010 }, { "epoch": 24.912154031287606, "grad_norm": Infinity, "learning_rate": 0.00017194373957715899, "loss": 11.1522, "step": 207020 }, { "epoch": 24.91335740072202, "grad_norm": Infinity, "learning_rate": 0.00017194109724174528, "loss": 11.0622, "step": 207030 }, { "epoch": 24.914560770156438, "grad_norm": Infinity, "learning_rate": 0.00017193845480221502, "loss": 11.1755, "step": 207040 }, { "epoch": 24.915764139590856, "grad_norm": Infinity, "learning_rate": 0.00017193581225857205, "loss": 11.1641, "step": 207050 }, { "epoch": 24.91696750902527, "grad_norm": Infinity, "learning_rate": 0.0001719331696108202, "loss": 11.2042, "step": 207060 }, { "epoch": 24.918170878459687, "grad_norm": Infinity, "learning_rate": 0.00017193052685896327, "loss": 11.1643, "step": 207070 }, { "epoch": 24.919374247894105, "grad_norm": Infinity, "learning_rate": 0.00017192788400300515, "loss": 11.2261, "step": 207080 }, { "epoch": 24.92057761732852, "grad_norm": Infinity, "learning_rate": 0.00017192524104294962, "loss": 11.1804, "step": 207090 }, { "epoch": 24.921780986762936, "grad_norm": Infinity, "learning_rate": 0.00017192259797880048, "loss": 11.1159, "step": 207100 }, { "epoch": 24.922984356197354, "grad_norm": Infinity, "learning_rate": 0.00017191995481056157, "loss": 11.1054, "step": 207110 }, { "epoch": 24.924187725631768, "grad_norm": Infinity, "learning_rate": 0.00017191731153823678, "loss": 11.0783, "step": 207120 }, { "epoch": 24.925391095066185, "grad_norm": Infinity, "learning_rate": 0.00017191466816182985, "loss": 11.1987, "step": 207130 }, { "epoch": 24.926594464500603, "grad_norm": Infinity, "learning_rate": 0.00017191202468134467, "loss": 11.2727, "step": 207140 }, { "epoch": 24.927797833935017, "grad_norm": Infinity, "learning_rate": 0.000171909381096785, "loss": 11.1519, "step": 207150 }, { "epoch": 24.929001203369435, "grad_norm": Infinity, "learning_rate": 0.00017190673740815474, "loss": 11.1968, "step": 207160 }, { "epoch": 24.930204572803852, "grad_norm": Infinity, "learning_rate": 0.00017190409361545763, "loss": 11.1972, "step": 207170 }, { "epoch": 24.931407942238266, "grad_norm": Infinity, "learning_rate": 0.00017190144971869758, "loss": 11.0516, "step": 207180 }, { "epoch": 24.932611311672684, "grad_norm": Infinity, "learning_rate": 0.0001718988057178784, "loss": 11.0858, "step": 207190 }, { "epoch": 24.9338146811071, "grad_norm": Infinity, "learning_rate": 0.00017189616161300385, "loss": 11.1439, "step": 207200 }, { "epoch": 24.935018050541515, "grad_norm": Infinity, "learning_rate": 0.0001718935174040778, "loss": 11.3689, "step": 207210 }, { "epoch": 24.936221419975933, "grad_norm": Infinity, "learning_rate": 0.00017189087309110412, "loss": 11.0715, "step": 207220 }, { "epoch": 24.93742478941035, "grad_norm": Infinity, "learning_rate": 0.0001718882286740866, "loss": 11.164, "step": 207230 }, { "epoch": 24.938628158844764, "grad_norm": Infinity, "learning_rate": 0.00017188558415302902, "loss": 11.0828, "step": 207240 }, { "epoch": 24.939831528279182, "grad_norm": Infinity, "learning_rate": 0.0001718829395279353, "loss": 11.1284, "step": 207250 }, { "epoch": 24.9410348977136, "grad_norm": Infinity, "learning_rate": 0.00017188029479880918, "loss": 11.0516, "step": 207260 }, { "epoch": 24.942238267148014, "grad_norm": Infinity, "learning_rate": 0.0001718776499656545, "loss": 11.1271, "step": 207270 }, { "epoch": 24.94344163658243, "grad_norm": Infinity, "learning_rate": 0.0001718750050284752, "loss": 11.1813, "step": 207280 }, { "epoch": 24.94464500601685, "grad_norm": Infinity, "learning_rate": 0.00017187235998727499, "loss": 11.0852, "step": 207290 }, { "epoch": 24.945848375451263, "grad_norm": Infinity, "learning_rate": 0.0001718697148420577, "loss": 11.0204, "step": 207300 }, { "epoch": 24.94705174488568, "grad_norm": Infinity, "learning_rate": 0.00017186706959282721, "loss": 11.0849, "step": 207310 }, { "epoch": 24.948255114320098, "grad_norm": Infinity, "learning_rate": 0.00017186442423958731, "loss": 11.1137, "step": 207320 }, { "epoch": 24.949458483754512, "grad_norm": Infinity, "learning_rate": 0.00017186177878234188, "loss": 11.1212, "step": 207330 }, { "epoch": 24.95066185318893, "grad_norm": Infinity, "learning_rate": 0.00017185913322109467, "loss": 11.1826, "step": 207340 }, { "epoch": 24.951865222623347, "grad_norm": Infinity, "learning_rate": 0.00017185648755584958, "loss": 11.1985, "step": 207350 }, { "epoch": 24.95306859205776, "grad_norm": Infinity, "learning_rate": 0.0001718538417866104, "loss": 11.0842, "step": 207360 }, { "epoch": 24.95427196149218, "grad_norm": Infinity, "learning_rate": 0.000171851195913381, "loss": 11.138, "step": 207370 }, { "epoch": 24.955475330926596, "grad_norm": Infinity, "learning_rate": 0.00017184854993616515, "loss": 11.1469, "step": 207380 }, { "epoch": 24.95667870036101, "grad_norm": Infinity, "learning_rate": 0.00017184590385496673, "loss": 11.2099, "step": 207390 }, { "epoch": 24.957882069795428, "grad_norm": Infinity, "learning_rate": 0.00017184325766978953, "loss": 11.1662, "step": 207400 }, { "epoch": 24.959085439229845, "grad_norm": Infinity, "learning_rate": 0.0001718406113806374, "loss": 11.1903, "step": 207410 }, { "epoch": 24.96028880866426, "grad_norm": Infinity, "learning_rate": 0.0001718379649875142, "loss": 11.1444, "step": 207420 }, { "epoch": 24.961492178098677, "grad_norm": Infinity, "learning_rate": 0.0001718353184904237, "loss": 11.1144, "step": 207430 }, { "epoch": 24.96269554753309, "grad_norm": Infinity, "learning_rate": 0.00017183267188936973, "loss": 11.2589, "step": 207440 }, { "epoch": 24.96389891696751, "grad_norm": Infinity, "learning_rate": 0.0001718300251843562, "loss": 11.1186, "step": 207450 }, { "epoch": 24.965102286401926, "grad_norm": Infinity, "learning_rate": 0.00017182737837538684, "loss": 11.0918, "step": 207460 }, { "epoch": 24.96630565583634, "grad_norm": Infinity, "learning_rate": 0.00017182473146246558, "loss": 11.1233, "step": 207470 }, { "epoch": 24.967509025270758, "grad_norm": Infinity, "learning_rate": 0.00017182208444559618, "loss": 11.0938, "step": 207480 }, { "epoch": 24.968712394705175, "grad_norm": Infinity, "learning_rate": 0.00017181943732478248, "loss": 11.2075, "step": 207490 }, { "epoch": 24.96991576413959, "grad_norm": Infinity, "learning_rate": 0.00017181679010002835, "loss": 11.1433, "step": 207500 }, { "epoch": 24.971119133574007, "grad_norm": Infinity, "learning_rate": 0.00017181414277133756, "loss": 11.2439, "step": 207510 }, { "epoch": 24.972322503008424, "grad_norm": Infinity, "learning_rate": 0.00017181149533871401, "loss": 11.0801, "step": 207520 }, { "epoch": 24.97352587244284, "grad_norm": Infinity, "learning_rate": 0.00017180884780216147, "loss": 11.1113, "step": 207530 }, { "epoch": 24.974729241877256, "grad_norm": Infinity, "learning_rate": 0.00017180620016168382, "loss": 11.2429, "step": 207540 }, { "epoch": 24.975932611311674, "grad_norm": Infinity, "learning_rate": 0.00017180355241728484, "loss": 11.1875, "step": 207550 }, { "epoch": 24.977135980746088, "grad_norm": Infinity, "learning_rate": 0.0001718009045689684, "loss": 11.1739, "step": 207560 }, { "epoch": 24.978339350180505, "grad_norm": Infinity, "learning_rate": 0.00017179825661673833, "loss": 11.1301, "step": 207570 }, { "epoch": 24.979542719614923, "grad_norm": Infinity, "learning_rate": 0.00017179560856059846, "loss": 11.2103, "step": 207580 }, { "epoch": 24.980746089049337, "grad_norm": Infinity, "learning_rate": 0.00017179296040055264, "loss": 11.1949, "step": 207590 }, { "epoch": 24.981949458483754, "grad_norm": Infinity, "learning_rate": 0.00017179031213660466, "loss": 11.1181, "step": 207600 }, { "epoch": 24.983152827918172, "grad_norm": Infinity, "learning_rate": 0.00017178766376875837, "loss": 11.145, "step": 207610 }, { "epoch": 24.984356197352586, "grad_norm": Infinity, "learning_rate": 0.0001717850152970176, "loss": 11.1285, "step": 207620 }, { "epoch": 24.985559566787003, "grad_norm": Infinity, "learning_rate": 0.0001717823667213862, "loss": 11.1754, "step": 207630 }, { "epoch": 24.98676293622142, "grad_norm": Infinity, "learning_rate": 0.000171779718041868, "loss": 11.174, "step": 207640 }, { "epoch": 24.987966305655835, "grad_norm": Infinity, "learning_rate": 0.00017177706925846682, "loss": 11.1765, "step": 207650 }, { "epoch": 24.989169675090253, "grad_norm": Infinity, "learning_rate": 0.0001717744203711865, "loss": 11.0419, "step": 207660 }, { "epoch": 24.99037304452467, "grad_norm": Infinity, "learning_rate": 0.00017177177138003087, "loss": 11.1228, "step": 207670 }, { "epoch": 24.991576413959084, "grad_norm": Infinity, "learning_rate": 0.00017176912228500377, "loss": 11.2689, "step": 207680 }, { "epoch": 24.9927797833935, "grad_norm": Infinity, "learning_rate": 0.00017176647308610906, "loss": 11.2195, "step": 207690 }, { "epoch": 24.99398315282792, "grad_norm": Infinity, "learning_rate": 0.0001717638237833505, "loss": 11.0816, "step": 207700 }, { "epoch": 24.995186522262333, "grad_norm": Infinity, "learning_rate": 0.00017176117437673195, "loss": 11.1939, "step": 207710 }, { "epoch": 24.99638989169675, "grad_norm": Infinity, "learning_rate": 0.00017175852486625733, "loss": 11.0975, "step": 207720 }, { "epoch": 24.99759326113117, "grad_norm": Infinity, "learning_rate": 0.00017175587525193036, "loss": 11.0899, "step": 207730 }, { "epoch": 24.998796630565582, "grad_norm": Infinity, "learning_rate": 0.00017175322553375494, "loss": 11.1543, "step": 207740 }, { "epoch": 25.0, "grad_norm": Infinity, "learning_rate": 0.0001717505757117349, "loss": 11.0606, "step": 207750 }, { "epoch": 25.0, "eval_loss": 11.139603614807129, "eval_runtime": 119.5071, "eval_samples_per_second": 61.812, "eval_steps_per_second": 7.732, "step": 207750 } ], "logging_steps": 10, "max_steps": 831000, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 4155, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.3680176217994035e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }