{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 25000, "global_step": 183345, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016362595107584062, "grad_norm": 9.97589111328125, "learning_rate": 5.4540496318516504e-09, "loss": 0.8674, "step": 10 }, { "epoch": 0.00032725190215168124, "grad_norm": 9.567214012145996, "learning_rate": 1.0908099263703301e-08, "loss": 0.8385, "step": 20 }, { "epoch": 0.0004908778532275219, "grad_norm": 11.185924530029297, "learning_rate": 1.636214889555495e-08, "loss": 0.8346, "step": 30 }, { "epoch": 0.0006545038043033625, "grad_norm": 7.626499652862549, "learning_rate": 2.1816198527406602e-08, "loss": 0.8504, "step": 40 }, { "epoch": 0.0008181297553792031, "grad_norm": 13.738503456115723, "learning_rate": 2.7270248159258253e-08, "loss": 0.902, "step": 50 }, { "epoch": 0.0009817557064550437, "grad_norm": 11.20395565032959, "learning_rate": 3.27242977911099e-08, "loss": 0.7915, "step": 60 }, { "epoch": 0.0011453816575308843, "grad_norm": 9.65567398071289, "learning_rate": 3.8178347422961555e-08, "loss": 0.8506, "step": 70 }, { "epoch": 0.001309007608606725, "grad_norm": 9.179412841796875, "learning_rate": 4.3632397054813203e-08, "loss": 0.8165, "step": 80 }, { "epoch": 0.0014726335596825656, "grad_norm": 9.80648136138916, "learning_rate": 4.908644668666486e-08, "loss": 0.8035, "step": 90 }, { "epoch": 0.0016362595107584062, "grad_norm": 11.501489639282227, "learning_rate": 5.4540496318516506e-08, "loss": 0.842, "step": 100 }, { "epoch": 0.0017998854618342468, "grad_norm": 10.568979263305664, "learning_rate": 5.999454595036815e-08, "loss": 0.8199, "step": 110 }, { "epoch": 0.0019635114129100874, "grad_norm": 10.183390617370605, "learning_rate": 6.54485955822198e-08, "loss": 0.8713, "step": 120 }, { "epoch": 0.002127137363985928, "grad_norm": 10.735555648803711, "learning_rate": 7.090264521407145e-08, "loss": 0.7941, "step": 130 }, { "epoch": 0.0022907633150617687, "grad_norm": 9.965042114257812, "learning_rate": 7.635669484592311e-08, "loss": 0.8091, "step": 140 }, { "epoch": 0.0024543892661376093, "grad_norm": 11.738472938537598, "learning_rate": 8.181074447777476e-08, "loss": 0.7949, "step": 150 }, { "epoch": 0.00261801521721345, "grad_norm": 10.930262565612793, "learning_rate": 8.726479410962641e-08, "loss": 0.8119, "step": 160 }, { "epoch": 0.0027816411682892906, "grad_norm": 7.951657772064209, "learning_rate": 9.271884374147805e-08, "loss": 0.7635, "step": 170 }, { "epoch": 0.002945267119365131, "grad_norm": 10.602276802062988, "learning_rate": 9.817289337332972e-08, "loss": 0.758, "step": 180 }, { "epoch": 0.003108893070440972, "grad_norm": 8.189801216125488, "learning_rate": 1.0362694300518136e-07, "loss": 0.7383, "step": 190 }, { "epoch": 0.0032725190215168124, "grad_norm": 9.993020057678223, "learning_rate": 1.0908099263703301e-07, "loss": 0.7053, "step": 200 }, { "epoch": 0.003436144972592653, "grad_norm": 8.978446960449219, "learning_rate": 1.1453504226888466e-07, "loss": 0.713, "step": 210 }, { "epoch": 0.0035997709236684937, "grad_norm": 7.887515068054199, "learning_rate": 1.199890919007363e-07, "loss": 0.6923, "step": 220 }, { "epoch": 0.0037633968747443343, "grad_norm": 7.2815632820129395, "learning_rate": 1.2544314153258796e-07, "loss": 0.6419, "step": 230 }, { "epoch": 0.003927022825820175, "grad_norm": 7.335437774658203, "learning_rate": 1.308971911644396e-07, "loss": 0.5948, "step": 240 }, { "epoch": 0.004090648776896016, "grad_norm": 4.979640960693359, "learning_rate": 1.3635124079629125e-07, "loss": 0.5236, "step": 250 }, { "epoch": 0.004254274727971856, "grad_norm": 5.992700099945068, "learning_rate": 1.418052904281429e-07, "loss": 0.5299, "step": 260 }, { "epoch": 0.004417900679047697, "grad_norm": 4.274743556976318, "learning_rate": 1.4725934005999455e-07, "loss": 0.479, "step": 270 }, { "epoch": 0.004581526630123537, "grad_norm": 4.86069917678833, "learning_rate": 1.5271338969184622e-07, "loss": 0.4442, "step": 280 }, { "epoch": 0.004745152581199378, "grad_norm": 4.358114719390869, "learning_rate": 1.5816743932369784e-07, "loss": 0.399, "step": 290 }, { "epoch": 0.004908778532275219, "grad_norm": 3.6695806980133057, "learning_rate": 1.6362148895554952e-07, "loss": 0.3686, "step": 300 }, { "epoch": 0.00507240448335106, "grad_norm": 3.6369130611419678, "learning_rate": 1.6907553858740117e-07, "loss": 0.3371, "step": 310 }, { "epoch": 0.0052360304344269, "grad_norm": 3.5566537380218506, "learning_rate": 1.7452958821925281e-07, "loss": 0.2841, "step": 320 }, { "epoch": 0.005399656385502741, "grad_norm": 2.6107022762298584, "learning_rate": 1.7998363785110446e-07, "loss": 0.2614, "step": 330 }, { "epoch": 0.005563282336578581, "grad_norm": 2.713139295578003, "learning_rate": 1.854376874829561e-07, "loss": 0.2089, "step": 340 }, { "epoch": 0.005726908287654422, "grad_norm": 2.8379340171813965, "learning_rate": 1.9089173711480776e-07, "loss": 0.231, "step": 350 }, { "epoch": 0.005890534238730262, "grad_norm": 2.4595868587493896, "learning_rate": 1.9634578674665943e-07, "loss": 0.2114, "step": 360 }, { "epoch": 0.006054160189806103, "grad_norm": 2.988358736038208, "learning_rate": 2.0179983637851105e-07, "loss": 0.2023, "step": 370 }, { "epoch": 0.006217786140881944, "grad_norm": 3.147031545639038, "learning_rate": 2.0725388601036273e-07, "loss": 0.1946, "step": 380 }, { "epoch": 0.006381412091957785, "grad_norm": 2.546103000640869, "learning_rate": 2.1270793564221435e-07, "loss": 0.1573, "step": 390 }, { "epoch": 0.006545038043033625, "grad_norm": 2.4364805221557617, "learning_rate": 2.1816198527406602e-07, "loss": 0.1953, "step": 400 }, { "epoch": 0.006708663994109466, "grad_norm": 2.171234130859375, "learning_rate": 2.2361603490591767e-07, "loss": 0.1712, "step": 410 }, { "epoch": 0.006872289945185306, "grad_norm": 2.1505839824676514, "learning_rate": 2.2907008453776932e-07, "loss": 0.1713, "step": 420 }, { "epoch": 0.007035915896261147, "grad_norm": 2.621340036392212, "learning_rate": 2.3452413416962097e-07, "loss": 0.1461, "step": 430 }, { "epoch": 0.007199541847336987, "grad_norm": 2.4786622524261475, "learning_rate": 2.399781838014726e-07, "loss": 0.1646, "step": 440 }, { "epoch": 0.007363167798412828, "grad_norm": 2.546891927719116, "learning_rate": 2.4543223343332426e-07, "loss": 0.1606, "step": 450 }, { "epoch": 0.0075267937494886685, "grad_norm": 2.1033828258514404, "learning_rate": 2.508862830651759e-07, "loss": 0.1464, "step": 460 }, { "epoch": 0.00769041970056451, "grad_norm": 2.094399929046631, "learning_rate": 2.5634033269702756e-07, "loss": 0.133, "step": 470 }, { "epoch": 0.00785404565164035, "grad_norm": 2.470501661300659, "learning_rate": 2.617943823288792e-07, "loss": 0.1537, "step": 480 }, { "epoch": 0.00801767160271619, "grad_norm": 2.9982750415802, "learning_rate": 2.6724843196073086e-07, "loss": 0.1262, "step": 490 }, { "epoch": 0.008181297553792032, "grad_norm": 2.8229923248291016, "learning_rate": 2.727024815925825e-07, "loss": 0.127, "step": 500 }, { "epoch": 0.008344923504867872, "grad_norm": 2.3655219078063965, "learning_rate": 2.781565312244342e-07, "loss": 0.1187, "step": 510 }, { "epoch": 0.008508549455943712, "grad_norm": 3.2591352462768555, "learning_rate": 2.836105808562858e-07, "loss": 0.1421, "step": 520 }, { "epoch": 0.008672175407019552, "grad_norm": 2.98222017288208, "learning_rate": 2.890646304881375e-07, "loss": 0.1271, "step": 530 }, { "epoch": 0.008835801358095394, "grad_norm": 1.7123140096664429, "learning_rate": 2.945186801199891e-07, "loss": 0.1121, "step": 540 }, { "epoch": 0.008999427309171235, "grad_norm": 2.169299602508545, "learning_rate": 2.9997272975184074e-07, "loss": 0.1065, "step": 550 }, { "epoch": 0.009163053260247075, "grad_norm": 2.1806955337524414, "learning_rate": 3.0542677938369244e-07, "loss": 0.1074, "step": 560 }, { "epoch": 0.009326679211322917, "grad_norm": 2.582362174987793, "learning_rate": 3.1088082901554404e-07, "loss": 0.0972, "step": 570 }, { "epoch": 0.009490305162398757, "grad_norm": 2.4557204246520996, "learning_rate": 3.163348786473957e-07, "loss": 0.1209, "step": 580 }, { "epoch": 0.009653931113474597, "grad_norm": 2.2875640392303467, "learning_rate": 3.217889282792474e-07, "loss": 0.1062, "step": 590 }, { "epoch": 0.009817557064550437, "grad_norm": 2.3615801334381104, "learning_rate": 3.2724297791109904e-07, "loss": 0.0952, "step": 600 }, { "epoch": 0.00998118301562628, "grad_norm": 2.200690746307373, "learning_rate": 3.3269702754295063e-07, "loss": 0.0958, "step": 610 }, { "epoch": 0.01014480896670212, "grad_norm": 2.3095052242279053, "learning_rate": 3.3815107717480233e-07, "loss": 0.0965, "step": 620 }, { "epoch": 0.01030843491777796, "grad_norm": 2.548759937286377, "learning_rate": 3.43605126806654e-07, "loss": 0.09, "step": 630 }, { "epoch": 0.0104720608688538, "grad_norm": 2.3012022972106934, "learning_rate": 3.4905917643850563e-07, "loss": 0.1115, "step": 640 }, { "epoch": 0.010635686819929642, "grad_norm": 2.221707582473755, "learning_rate": 3.545132260703572e-07, "loss": 0.0783, "step": 650 }, { "epoch": 0.010799312771005482, "grad_norm": 2.049013137817383, "learning_rate": 3.599672757022089e-07, "loss": 0.0842, "step": 660 }, { "epoch": 0.010962938722081322, "grad_norm": 2.9410202503204346, "learning_rate": 3.6542132533406057e-07, "loss": 0.0937, "step": 670 }, { "epoch": 0.011126564673157162, "grad_norm": 2.20444393157959, "learning_rate": 3.708753749659122e-07, "loss": 0.0988, "step": 680 }, { "epoch": 0.011290190624233004, "grad_norm": 1.746721863746643, "learning_rate": 3.7632942459776387e-07, "loss": 0.0854, "step": 690 }, { "epoch": 0.011453816575308844, "grad_norm": 2.119723320007324, "learning_rate": 3.817834742296155e-07, "loss": 0.0854, "step": 700 }, { "epoch": 0.011617442526384684, "grad_norm": 2.0076067447662354, "learning_rate": 3.8723752386146716e-07, "loss": 0.0821, "step": 710 }, { "epoch": 0.011781068477460525, "grad_norm": 2.034360647201538, "learning_rate": 3.9269157349331886e-07, "loss": 0.0891, "step": 720 }, { "epoch": 0.011944694428536367, "grad_norm": 2.2256293296813965, "learning_rate": 3.9814562312517046e-07, "loss": 0.0774, "step": 730 }, { "epoch": 0.012108320379612207, "grad_norm": 2.375505208969116, "learning_rate": 4.035996727570221e-07, "loss": 0.0891, "step": 740 }, { "epoch": 0.012271946330688047, "grad_norm": 2.013505458831787, "learning_rate": 4.0905372238887375e-07, "loss": 0.0904, "step": 750 }, { "epoch": 0.012435572281763887, "grad_norm": 2.3231635093688965, "learning_rate": 4.1450777202072546e-07, "loss": 0.0816, "step": 760 }, { "epoch": 0.012599198232839729, "grad_norm": 1.8532401323318481, "learning_rate": 4.1996182165257705e-07, "loss": 0.0987, "step": 770 }, { "epoch": 0.01276282418391557, "grad_norm": 1.8238180875778198, "learning_rate": 4.254158712844287e-07, "loss": 0.0807, "step": 780 }, { "epoch": 0.01292645013499141, "grad_norm": 2.107550859451294, "learning_rate": 4.308699209162804e-07, "loss": 0.0948, "step": 790 }, { "epoch": 0.01309007608606725, "grad_norm": 3.030282974243164, "learning_rate": 4.3632397054813205e-07, "loss": 0.0847, "step": 800 }, { "epoch": 0.013253702037143092, "grad_norm": 2.112107515335083, "learning_rate": 4.4177802017998364e-07, "loss": 0.0639, "step": 810 }, { "epoch": 0.013417327988218932, "grad_norm": 1.8931057453155518, "learning_rate": 4.4723206981183534e-07, "loss": 0.0842, "step": 820 }, { "epoch": 0.013580953939294772, "grad_norm": 2.5364418029785156, "learning_rate": 4.52686119443687e-07, "loss": 0.0791, "step": 830 }, { "epoch": 0.013744579890370612, "grad_norm": 1.8763220310211182, "learning_rate": 4.5814016907553864e-07, "loss": 0.0784, "step": 840 }, { "epoch": 0.013908205841446454, "grad_norm": 1.9687457084655762, "learning_rate": 4.6359421870739023e-07, "loss": 0.0688, "step": 850 }, { "epoch": 0.014071831792522294, "grad_norm": 2.191291570663452, "learning_rate": 4.6904826833924193e-07, "loss": 0.0728, "step": 860 }, { "epoch": 0.014235457743598134, "grad_norm": 2.8170077800750732, "learning_rate": 4.745023179710936e-07, "loss": 0.0672, "step": 870 }, { "epoch": 0.014399083694673975, "grad_norm": 2.217576265335083, "learning_rate": 4.799563676029452e-07, "loss": 0.0806, "step": 880 }, { "epoch": 0.014562709645749817, "grad_norm": 1.9850986003875732, "learning_rate": 4.854104172347969e-07, "loss": 0.0594, "step": 890 }, { "epoch": 0.014726335596825657, "grad_norm": 2.1633453369140625, "learning_rate": 4.908644668666485e-07, "loss": 0.0686, "step": 900 }, { "epoch": 0.014889961547901497, "grad_norm": 1.5597481727600098, "learning_rate": 4.963185164985001e-07, "loss": 0.0588, "step": 910 }, { "epoch": 0.015053587498977337, "grad_norm": 2.029129981994629, "learning_rate": 5.017725661303518e-07, "loss": 0.0626, "step": 920 }, { "epoch": 0.015217213450053179, "grad_norm": 1.7281333208084106, "learning_rate": 5.072266157622035e-07, "loss": 0.0641, "step": 930 }, { "epoch": 0.01538083940112902, "grad_norm": 2.016092538833618, "learning_rate": 5.126806653940551e-07, "loss": 0.0639, "step": 940 }, { "epoch": 0.01554446535220486, "grad_norm": 1.8831826448440552, "learning_rate": 5.181347150259067e-07, "loss": 0.0686, "step": 950 }, { "epoch": 0.0157080913032807, "grad_norm": 1.9618314504623413, "learning_rate": 5.235887646577584e-07, "loss": 0.0683, "step": 960 }, { "epoch": 0.01587171725435654, "grad_norm": 2.9861929416656494, "learning_rate": 5.290428142896101e-07, "loss": 0.0488, "step": 970 }, { "epoch": 0.01603534320543238, "grad_norm": 1.9134918451309204, "learning_rate": 5.344968639214617e-07, "loss": 0.0535, "step": 980 }, { "epoch": 0.016198969156508224, "grad_norm": 1.8413556814193726, "learning_rate": 5.399509135533134e-07, "loss": 0.0769, "step": 990 }, { "epoch": 0.016362595107584064, "grad_norm": 1.905432939529419, "learning_rate": 5.45404963185165e-07, "loss": 0.048, "step": 1000 }, { "epoch": 0.016526221058659904, "grad_norm": 2.4195327758789062, "learning_rate": 5.508590128170167e-07, "loss": 0.0615, "step": 1010 }, { "epoch": 0.016689847009735744, "grad_norm": 1.89035964012146, "learning_rate": 5.563130624488684e-07, "loss": 0.0616, "step": 1020 }, { "epoch": 0.016853472960811584, "grad_norm": 2.0280380249023438, "learning_rate": 5.6176711208072e-07, "loss": 0.0625, "step": 1030 }, { "epoch": 0.017017098911887425, "grad_norm": 2.0723822116851807, "learning_rate": 5.672211617125716e-07, "loss": 0.0631, "step": 1040 }, { "epoch": 0.017180724862963265, "grad_norm": 2.0635242462158203, "learning_rate": 5.726752113444233e-07, "loss": 0.0686, "step": 1050 }, { "epoch": 0.017344350814039105, "grad_norm": 2.308194398880005, "learning_rate": 5.78129260976275e-07, "loss": 0.0552, "step": 1060 }, { "epoch": 0.01750797676511495, "grad_norm": 2.0093746185302734, "learning_rate": 5.835833106081266e-07, "loss": 0.0667, "step": 1070 }, { "epoch": 0.01767160271619079, "grad_norm": 2.412339210510254, "learning_rate": 5.890373602399782e-07, "loss": 0.0595, "step": 1080 }, { "epoch": 0.01783522866726663, "grad_norm": 2.2292864322662354, "learning_rate": 5.944914098718299e-07, "loss": 0.0558, "step": 1090 }, { "epoch": 0.01799885461834247, "grad_norm": 1.8276525735855103, "learning_rate": 5.999454595036815e-07, "loss": 0.0693, "step": 1100 }, { "epoch": 0.01816248056941831, "grad_norm": 2.1351993083953857, "learning_rate": 6.053995091355332e-07, "loss": 0.0501, "step": 1110 }, { "epoch": 0.01832610652049415, "grad_norm": 1.4929412603378296, "learning_rate": 6.108535587673849e-07, "loss": 0.047, "step": 1120 }, { "epoch": 0.01848973247156999, "grad_norm": 2.781839609146118, "learning_rate": 6.163076083992365e-07, "loss": 0.0629, "step": 1130 }, { "epoch": 0.018653358422645833, "grad_norm": 1.4084773063659668, "learning_rate": 6.217616580310881e-07, "loss": 0.0459, "step": 1140 }, { "epoch": 0.018816984373721674, "grad_norm": 2.2878854274749756, "learning_rate": 6.272157076629398e-07, "loss": 0.0582, "step": 1150 }, { "epoch": 0.018980610324797514, "grad_norm": 1.6368273496627808, "learning_rate": 6.326697572947914e-07, "loss": 0.0461, "step": 1160 }, { "epoch": 0.019144236275873354, "grad_norm": 1.8522087335586548, "learning_rate": 6.381238069266431e-07, "loss": 0.0426, "step": 1170 }, { "epoch": 0.019307862226949194, "grad_norm": 1.780583143234253, "learning_rate": 6.435778565584948e-07, "loss": 0.0485, "step": 1180 }, { "epoch": 0.019471488178025034, "grad_norm": 1.912996530532837, "learning_rate": 6.490319061903464e-07, "loss": 0.0395, "step": 1190 }, { "epoch": 0.019635114129100874, "grad_norm": 1.9334734678268433, "learning_rate": 6.544859558221981e-07, "loss": 0.0631, "step": 1200 }, { "epoch": 0.019798740080176715, "grad_norm": 1.8987680673599243, "learning_rate": 6.599400054540498e-07, "loss": 0.0527, "step": 1210 }, { "epoch": 0.01996236603125256, "grad_norm": 2.10577392578125, "learning_rate": 6.653940550859013e-07, "loss": 0.0457, "step": 1220 }, { "epoch": 0.0201259919823284, "grad_norm": 1.821528434753418, "learning_rate": 6.70848104717753e-07, "loss": 0.0524, "step": 1230 }, { "epoch": 0.02028961793340424, "grad_norm": 1.9784972667694092, "learning_rate": 6.763021543496047e-07, "loss": 0.0503, "step": 1240 }, { "epoch": 0.02045324388448008, "grad_norm": 1.4815479516983032, "learning_rate": 6.817562039814563e-07, "loss": 0.0535, "step": 1250 }, { "epoch": 0.02061686983555592, "grad_norm": 1.542924404144287, "learning_rate": 6.87210253613308e-07, "loss": 0.0532, "step": 1260 }, { "epoch": 0.02078049578663176, "grad_norm": 2.2649707794189453, "learning_rate": 6.926643032451597e-07, "loss": 0.0492, "step": 1270 }, { "epoch": 0.0209441217377076, "grad_norm": 1.3413527011871338, "learning_rate": 6.981183528770113e-07, "loss": 0.0468, "step": 1280 }, { "epoch": 0.02110774768878344, "grad_norm": 1.7332874536514282, "learning_rate": 7.03572402508863e-07, "loss": 0.0549, "step": 1290 }, { "epoch": 0.021271373639859283, "grad_norm": 2.0860042572021484, "learning_rate": 7.090264521407144e-07, "loss": 0.0499, "step": 1300 }, { "epoch": 0.021434999590935123, "grad_norm": 1.7805142402648926, "learning_rate": 7.144805017725661e-07, "loss": 0.0485, "step": 1310 }, { "epoch": 0.021598625542010964, "grad_norm": 2.4757297039031982, "learning_rate": 7.199345514044178e-07, "loss": 0.0543, "step": 1320 }, { "epoch": 0.021762251493086804, "grad_norm": 1.8308428525924683, "learning_rate": 7.253886010362694e-07, "loss": 0.0392, "step": 1330 }, { "epoch": 0.021925877444162644, "grad_norm": 1.7218717336654663, "learning_rate": 7.308426506681211e-07, "loss": 0.0411, "step": 1340 }, { "epoch": 0.022089503395238484, "grad_norm": 1.776108980178833, "learning_rate": 7.362967002999728e-07, "loss": 0.0522, "step": 1350 }, { "epoch": 0.022253129346314324, "grad_norm": 2.626009225845337, "learning_rate": 7.417507499318244e-07, "loss": 0.05, "step": 1360 }, { "epoch": 0.022416755297390165, "grad_norm": 2.433746337890625, "learning_rate": 7.472047995636761e-07, "loss": 0.0584, "step": 1370 }, { "epoch": 0.022580381248466008, "grad_norm": 2.144934892654419, "learning_rate": 7.526588491955277e-07, "loss": 0.0535, "step": 1380 }, { "epoch": 0.02274400719954185, "grad_norm": 1.6727941036224365, "learning_rate": 7.581128988273793e-07, "loss": 0.0455, "step": 1390 }, { "epoch": 0.02290763315061769, "grad_norm": 2.0695009231567383, "learning_rate": 7.63566948459231e-07, "loss": 0.0511, "step": 1400 }, { "epoch": 0.02307125910169353, "grad_norm": 1.9832948446273804, "learning_rate": 7.690209980910827e-07, "loss": 0.0535, "step": 1410 }, { "epoch": 0.02323488505276937, "grad_norm": 1.469416618347168, "learning_rate": 7.744750477229343e-07, "loss": 0.053, "step": 1420 }, { "epoch": 0.02339851100384521, "grad_norm": 1.8490004539489746, "learning_rate": 7.79929097354786e-07, "loss": 0.0485, "step": 1430 }, { "epoch": 0.02356213695492105, "grad_norm": 1.8629415035247803, "learning_rate": 7.853831469866377e-07, "loss": 0.0539, "step": 1440 }, { "epoch": 0.02372576290599689, "grad_norm": 2.072378158569336, "learning_rate": 7.908371966184892e-07, "loss": 0.0429, "step": 1450 }, { "epoch": 0.023889388857072733, "grad_norm": 1.342645287513733, "learning_rate": 7.962912462503409e-07, "loss": 0.0463, "step": 1460 }, { "epoch": 0.024053014808148573, "grad_norm": 1.9463469982147217, "learning_rate": 8.017452958821926e-07, "loss": 0.0368, "step": 1470 }, { "epoch": 0.024216640759224414, "grad_norm": 1.7826083898544312, "learning_rate": 8.071993455140442e-07, "loss": 0.0462, "step": 1480 }, { "epoch": 0.024380266710300254, "grad_norm": 1.5456538200378418, "learning_rate": 8.126533951458959e-07, "loss": 0.0388, "step": 1490 }, { "epoch": 0.024543892661376094, "grad_norm": 1.6366924047470093, "learning_rate": 8.181074447777475e-07, "loss": 0.0421, "step": 1500 }, { "epoch": 0.024707518612451934, "grad_norm": 2.5628910064697266, "learning_rate": 8.235614944095992e-07, "loss": 0.0434, "step": 1510 }, { "epoch": 0.024871144563527774, "grad_norm": 2.053734302520752, "learning_rate": 8.290155440414509e-07, "loss": 0.0476, "step": 1520 }, { "epoch": 0.025034770514603615, "grad_norm": 1.4935264587402344, "learning_rate": 8.344695936733024e-07, "loss": 0.0545, "step": 1530 }, { "epoch": 0.025198396465679458, "grad_norm": 1.3891876935958862, "learning_rate": 8.399236433051541e-07, "loss": 0.0539, "step": 1540 }, { "epoch": 0.0253620224167553, "grad_norm": 1.82400381565094, "learning_rate": 8.453776929370058e-07, "loss": 0.0452, "step": 1550 }, { "epoch": 0.02552564836783114, "grad_norm": 1.3610445261001587, "learning_rate": 8.508317425688574e-07, "loss": 0.0426, "step": 1560 }, { "epoch": 0.02568927431890698, "grad_norm": 1.5624148845672607, "learning_rate": 8.562857922007091e-07, "loss": 0.0408, "step": 1570 }, { "epoch": 0.02585290026998282, "grad_norm": 1.5950453281402588, "learning_rate": 8.617398418325608e-07, "loss": 0.0389, "step": 1580 }, { "epoch": 0.02601652622105866, "grad_norm": 1.7113897800445557, "learning_rate": 8.671938914644124e-07, "loss": 0.0466, "step": 1590 }, { "epoch": 0.0261801521721345, "grad_norm": 1.0999279022216797, "learning_rate": 8.726479410962641e-07, "loss": 0.045, "step": 1600 }, { "epoch": 0.02634377812321034, "grad_norm": 1.6611063480377197, "learning_rate": 8.781019907281158e-07, "loss": 0.0429, "step": 1610 }, { "epoch": 0.026507404074286183, "grad_norm": 1.202569842338562, "learning_rate": 8.835560403599673e-07, "loss": 0.0399, "step": 1620 }, { "epoch": 0.026671030025362023, "grad_norm": 1.280436396598816, "learning_rate": 8.89010089991819e-07, "loss": 0.0308, "step": 1630 }, { "epoch": 0.026834655976437864, "grad_norm": 1.6949858665466309, "learning_rate": 8.944641396236707e-07, "loss": 0.0402, "step": 1640 }, { "epoch": 0.026998281927513704, "grad_norm": 2.375936269760132, "learning_rate": 8.999181892555223e-07, "loss": 0.0474, "step": 1650 }, { "epoch": 0.027161907878589544, "grad_norm": 1.9269670248031616, "learning_rate": 9.05372238887374e-07, "loss": 0.0431, "step": 1660 }, { "epoch": 0.027325533829665384, "grad_norm": 1.5133509635925293, "learning_rate": 9.108262885192257e-07, "loss": 0.0392, "step": 1670 }, { "epoch": 0.027489159780741224, "grad_norm": 1.6996502876281738, "learning_rate": 9.162803381510773e-07, "loss": 0.0453, "step": 1680 }, { "epoch": 0.027652785731817068, "grad_norm": 1.4514844417572021, "learning_rate": 9.217343877829289e-07, "loss": 0.0418, "step": 1690 }, { "epoch": 0.027816411682892908, "grad_norm": 1.8763824701309204, "learning_rate": 9.271884374147805e-07, "loss": 0.0431, "step": 1700 }, { "epoch": 0.02798003763396875, "grad_norm": 1.4516658782958984, "learning_rate": 9.326424870466322e-07, "loss": 0.0424, "step": 1710 }, { "epoch": 0.02814366358504459, "grad_norm": 1.6959811449050903, "learning_rate": 9.380965366784839e-07, "loss": 0.05, "step": 1720 }, { "epoch": 0.02830728953612043, "grad_norm": 1.4374184608459473, "learning_rate": 9.435505863103355e-07, "loss": 0.0345, "step": 1730 }, { "epoch": 0.02847091548719627, "grad_norm": 1.1023731231689453, "learning_rate": 9.490046359421872e-07, "loss": 0.033, "step": 1740 }, { "epoch": 0.02863454143827211, "grad_norm": 1.9677906036376953, "learning_rate": 9.544586855740388e-07, "loss": 0.0373, "step": 1750 }, { "epoch": 0.02879816738934795, "grad_norm": 1.621667742729187, "learning_rate": 9.599127352058905e-07, "loss": 0.0407, "step": 1760 }, { "epoch": 0.028961793340423793, "grad_norm": 1.875444769859314, "learning_rate": 9.653667848377422e-07, "loss": 0.0507, "step": 1770 }, { "epoch": 0.029125419291499633, "grad_norm": 1.7537697553634644, "learning_rate": 9.708208344695939e-07, "loss": 0.0415, "step": 1780 }, { "epoch": 0.029289045242575473, "grad_norm": 2.206434965133667, "learning_rate": 9.762748841014454e-07, "loss": 0.0352, "step": 1790 }, { "epoch": 0.029452671193651313, "grad_norm": 1.4959696531295776, "learning_rate": 9.81728933733297e-07, "loss": 0.0398, "step": 1800 }, { "epoch": 0.029616297144727154, "grad_norm": 1.8054625988006592, "learning_rate": 9.871829833651488e-07, "loss": 0.0243, "step": 1810 }, { "epoch": 0.029779923095802994, "grad_norm": 1.892923355102539, "learning_rate": 9.926370329970002e-07, "loss": 0.0268, "step": 1820 }, { "epoch": 0.029943549046878834, "grad_norm": 1.5711841583251953, "learning_rate": 9.98091082628852e-07, "loss": 0.0472, "step": 1830 }, { "epoch": 0.030107174997954674, "grad_norm": 1.6437585353851318, "learning_rate": 1.0035451322607036e-06, "loss": 0.0335, "step": 1840 }, { "epoch": 0.030270800949030518, "grad_norm": 1.483176589012146, "learning_rate": 1.0089991818925553e-06, "loss": 0.0417, "step": 1850 }, { "epoch": 0.030434426900106358, "grad_norm": 1.8856254816055298, "learning_rate": 1.014453231524407e-06, "loss": 0.0391, "step": 1860 }, { "epoch": 0.030598052851182198, "grad_norm": 1.3388725519180298, "learning_rate": 1.0199072811562587e-06, "loss": 0.0483, "step": 1870 }, { "epoch": 0.03076167880225804, "grad_norm": 2.1064863204956055, "learning_rate": 1.0253613307881102e-06, "loss": 0.0434, "step": 1880 }, { "epoch": 0.03092530475333388, "grad_norm": 1.2316155433654785, "learning_rate": 1.030815380419962e-06, "loss": 0.0371, "step": 1890 }, { "epoch": 0.03108893070440972, "grad_norm": 1.4669277667999268, "learning_rate": 1.0362694300518134e-06, "loss": 0.0337, "step": 1900 }, { "epoch": 0.03125255665548556, "grad_norm": 1.5199886560440063, "learning_rate": 1.0417234796836651e-06, "loss": 0.0402, "step": 1910 }, { "epoch": 0.0314161826065614, "grad_norm": 1.8782730102539062, "learning_rate": 1.0471775293155168e-06, "loss": 0.0347, "step": 1920 }, { "epoch": 0.03157980855763724, "grad_norm": 1.899007797241211, "learning_rate": 1.0526315789473685e-06, "loss": 0.0346, "step": 1930 }, { "epoch": 0.03174343450871308, "grad_norm": 1.5304501056671143, "learning_rate": 1.0580856285792202e-06, "loss": 0.0347, "step": 1940 }, { "epoch": 0.03190706045978892, "grad_norm": 1.7002626657485962, "learning_rate": 1.063539678211072e-06, "loss": 0.0275, "step": 1950 }, { "epoch": 0.03207068641086476, "grad_norm": 1.3978679180145264, "learning_rate": 1.0689937278429234e-06, "loss": 0.0398, "step": 1960 }, { "epoch": 0.032234312361940604, "grad_norm": 1.7616475820541382, "learning_rate": 1.0744477774747751e-06, "loss": 0.0295, "step": 1970 }, { "epoch": 0.03239793831301645, "grad_norm": 1.1628918647766113, "learning_rate": 1.0799018271066268e-06, "loss": 0.0403, "step": 1980 }, { "epoch": 0.032561564264092284, "grad_norm": 1.6942625045776367, "learning_rate": 1.0853558767384783e-06, "loss": 0.0373, "step": 1990 }, { "epoch": 0.03272519021516813, "grad_norm": 1.9778265953063965, "learning_rate": 1.09080992637033e-06, "loss": 0.0366, "step": 2000 }, { "epoch": 0.032888816166243964, "grad_norm": 1.5971571207046509, "learning_rate": 1.0962639760021817e-06, "loss": 0.0407, "step": 2010 }, { "epoch": 0.03305244211731981, "grad_norm": 1.4534926414489746, "learning_rate": 1.1017180256340334e-06, "loss": 0.0404, "step": 2020 }, { "epoch": 0.033216068068395645, "grad_norm": 1.2644355297088623, "learning_rate": 1.1071720752658851e-06, "loss": 0.029, "step": 2030 }, { "epoch": 0.03337969401947149, "grad_norm": 1.8251043558120728, "learning_rate": 1.1126261248977368e-06, "loss": 0.0329, "step": 2040 }, { "epoch": 0.03354331997054733, "grad_norm": 1.685950756072998, "learning_rate": 1.1180801745295883e-06, "loss": 0.0399, "step": 2050 }, { "epoch": 0.03370694592162317, "grad_norm": 1.51814866065979, "learning_rate": 1.12353422416144e-06, "loss": 0.0299, "step": 2060 }, { "epoch": 0.03387057187269901, "grad_norm": 1.473401427268982, "learning_rate": 1.1289882737932917e-06, "loss": 0.0308, "step": 2070 }, { "epoch": 0.03403419782377485, "grad_norm": 1.423195242881775, "learning_rate": 1.1344423234251432e-06, "loss": 0.0305, "step": 2080 }, { "epoch": 0.03419782377485069, "grad_norm": 1.4249560832977295, "learning_rate": 1.139896373056995e-06, "loss": 0.0289, "step": 2090 }, { "epoch": 0.03436144972592653, "grad_norm": 1.7325375080108643, "learning_rate": 1.1453504226888466e-06, "loss": 0.0306, "step": 2100 }, { "epoch": 0.03452507567700237, "grad_norm": 1.6845966577529907, "learning_rate": 1.1508044723206983e-06, "loss": 0.052, "step": 2110 }, { "epoch": 0.03468870162807821, "grad_norm": 1.3788546323776245, "learning_rate": 1.15625852195255e-06, "loss": 0.0316, "step": 2120 }, { "epoch": 0.034852327579154053, "grad_norm": 1.4934451580047607, "learning_rate": 1.1617125715844015e-06, "loss": 0.0375, "step": 2130 }, { "epoch": 0.0350159535302299, "grad_norm": 1.194960594177246, "learning_rate": 1.1671666212162532e-06, "loss": 0.0256, "step": 2140 }, { "epoch": 0.035179579481305734, "grad_norm": 1.423689842224121, "learning_rate": 1.1726206708481049e-06, "loss": 0.0419, "step": 2150 }, { "epoch": 0.03534320543238158, "grad_norm": 0.9441061615943909, "learning_rate": 1.1780747204799564e-06, "loss": 0.0321, "step": 2160 }, { "epoch": 0.035506831383457414, "grad_norm": 1.4725145101547241, "learning_rate": 1.183528770111808e-06, "loss": 0.0309, "step": 2170 }, { "epoch": 0.03567045733453326, "grad_norm": 1.6861231327056885, "learning_rate": 1.1889828197436598e-06, "loss": 0.0348, "step": 2180 }, { "epoch": 0.035834083285609095, "grad_norm": 2.181194543838501, "learning_rate": 1.1944368693755113e-06, "loss": 0.0321, "step": 2190 }, { "epoch": 0.03599770923668494, "grad_norm": 2.2255334854125977, "learning_rate": 1.199890919007363e-06, "loss": 0.0312, "step": 2200 }, { "epoch": 0.03616133518776078, "grad_norm": 1.7597378492355347, "learning_rate": 1.2053449686392147e-06, "loss": 0.0369, "step": 2210 }, { "epoch": 0.03632496113883662, "grad_norm": 2.365129232406616, "learning_rate": 1.2107990182710664e-06, "loss": 0.0312, "step": 2220 }, { "epoch": 0.03648858708991246, "grad_norm": 1.986585021018982, "learning_rate": 1.216253067902918e-06, "loss": 0.0395, "step": 2230 }, { "epoch": 0.0366522130409883, "grad_norm": 1.7252691984176636, "learning_rate": 1.2217071175347698e-06, "loss": 0.0272, "step": 2240 }, { "epoch": 0.03681583899206414, "grad_norm": 1.6250042915344238, "learning_rate": 1.2271611671666213e-06, "loss": 0.0317, "step": 2250 }, { "epoch": 0.03697946494313998, "grad_norm": 1.8350541591644287, "learning_rate": 1.232615216798473e-06, "loss": 0.0283, "step": 2260 }, { "epoch": 0.03714309089421582, "grad_norm": 1.3847283124923706, "learning_rate": 1.2380692664303247e-06, "loss": 0.0288, "step": 2270 }, { "epoch": 0.03730671684529167, "grad_norm": 0.9745829105377197, "learning_rate": 1.2435233160621762e-06, "loss": 0.0284, "step": 2280 }, { "epoch": 0.0374703427963675, "grad_norm": 2.5010721683502197, "learning_rate": 1.2489773656940279e-06, "loss": 0.0276, "step": 2290 }, { "epoch": 0.03763396874744335, "grad_norm": 2.3063294887542725, "learning_rate": 1.2544314153258796e-06, "loss": 0.0329, "step": 2300 }, { "epoch": 0.037797594698519184, "grad_norm": 0.9539628624916077, "learning_rate": 1.2598854649577313e-06, "loss": 0.0316, "step": 2310 }, { "epoch": 0.03796122064959503, "grad_norm": 1.117640495300293, "learning_rate": 1.2653395145895827e-06, "loss": 0.0294, "step": 2320 }, { "epoch": 0.038124846600670864, "grad_norm": 1.8013001680374146, "learning_rate": 1.2707935642214344e-06, "loss": 0.0338, "step": 2330 }, { "epoch": 0.03828847255174671, "grad_norm": 1.5713682174682617, "learning_rate": 1.2762476138532861e-06, "loss": 0.03, "step": 2340 }, { "epoch": 0.038452098502822545, "grad_norm": 1.7282088994979858, "learning_rate": 1.2817016634851378e-06, "loss": 0.0311, "step": 2350 }, { "epoch": 0.03861572445389839, "grad_norm": 0.9448469877243042, "learning_rate": 1.2871557131169895e-06, "loss": 0.0281, "step": 2360 }, { "epoch": 0.03877935040497423, "grad_norm": 0.909325122833252, "learning_rate": 1.2926097627488413e-06, "loss": 0.0263, "step": 2370 }, { "epoch": 0.03894297635605007, "grad_norm": 1.1525700092315674, "learning_rate": 1.2980638123806927e-06, "loss": 0.0279, "step": 2380 }, { "epoch": 0.03910660230712591, "grad_norm": 1.975926160812378, "learning_rate": 1.3035178620125444e-06, "loss": 0.0257, "step": 2390 }, { "epoch": 0.03927022825820175, "grad_norm": 1.2981781959533691, "learning_rate": 1.3089719116443961e-06, "loss": 0.0249, "step": 2400 }, { "epoch": 0.03943385420927759, "grad_norm": 1.5993543863296509, "learning_rate": 1.3144259612762478e-06, "loss": 0.0248, "step": 2410 }, { "epoch": 0.03959748016035343, "grad_norm": 1.5007202625274658, "learning_rate": 1.3198800109080995e-06, "loss": 0.0269, "step": 2420 }, { "epoch": 0.03976110611142927, "grad_norm": 1.7699395418167114, "learning_rate": 1.3253340605399508e-06, "loss": 0.0301, "step": 2430 }, { "epoch": 0.03992473206250512, "grad_norm": 1.59510338306427, "learning_rate": 1.3307881101718025e-06, "loss": 0.0332, "step": 2440 }, { "epoch": 0.04008835801358095, "grad_norm": 1.471999168395996, "learning_rate": 1.3362421598036542e-06, "loss": 0.0361, "step": 2450 }, { "epoch": 0.0402519839646568, "grad_norm": 1.2164160013198853, "learning_rate": 1.341696209435506e-06, "loss": 0.032, "step": 2460 }, { "epoch": 0.040415609915732634, "grad_norm": 1.95022451877594, "learning_rate": 1.3471502590673576e-06, "loss": 0.0239, "step": 2470 }, { "epoch": 0.04057923586680848, "grad_norm": 1.029660940170288, "learning_rate": 1.3526043086992093e-06, "loss": 0.0245, "step": 2480 }, { "epoch": 0.040742861817884314, "grad_norm": 1.6159366369247437, "learning_rate": 1.3580583583310608e-06, "loss": 0.0324, "step": 2490 }, { "epoch": 0.04090648776896016, "grad_norm": 1.67780601978302, "learning_rate": 1.3635124079629125e-06, "loss": 0.0249, "step": 2500 }, { "epoch": 0.041070113720035994, "grad_norm": 0.9726197719573975, "learning_rate": 1.3689664575947642e-06, "loss": 0.0316, "step": 2510 }, { "epoch": 0.04123373967111184, "grad_norm": 1.2978094816207886, "learning_rate": 1.374420507226616e-06, "loss": 0.0266, "step": 2520 }, { "epoch": 0.04139736562218768, "grad_norm": 1.2126712799072266, "learning_rate": 1.3798745568584676e-06, "loss": 0.0277, "step": 2530 }, { "epoch": 0.04156099157326352, "grad_norm": 1.6619802713394165, "learning_rate": 1.3853286064903193e-06, "loss": 0.0293, "step": 2540 }, { "epoch": 0.04172461752433936, "grad_norm": 1.3217777013778687, "learning_rate": 1.3907826561221708e-06, "loss": 0.0281, "step": 2550 }, { "epoch": 0.0418882434754152, "grad_norm": 1.415819764137268, "learning_rate": 1.3962367057540225e-06, "loss": 0.0266, "step": 2560 }, { "epoch": 0.04205186942649104, "grad_norm": 1.6335690021514893, "learning_rate": 1.4016907553858742e-06, "loss": 0.0199, "step": 2570 }, { "epoch": 0.04221549537756688, "grad_norm": 1.4788322448730469, "learning_rate": 1.407144805017726e-06, "loss": 0.0174, "step": 2580 }, { "epoch": 0.04237912132864272, "grad_norm": 1.1941033601760864, "learning_rate": 1.4125988546495776e-06, "loss": 0.0286, "step": 2590 }, { "epoch": 0.04254274727971857, "grad_norm": 0.6530599594116211, "learning_rate": 1.4180529042814289e-06, "loss": 0.0206, "step": 2600 }, { "epoch": 0.0427063732307944, "grad_norm": 0.6440643668174744, "learning_rate": 1.4235069539132806e-06, "loss": 0.0249, "step": 2610 }, { "epoch": 0.04286999918187025, "grad_norm": 2.7297959327697754, "learning_rate": 1.4289610035451323e-06, "loss": 0.0205, "step": 2620 }, { "epoch": 0.043033625132946084, "grad_norm": 1.141425609588623, "learning_rate": 1.434415053176984e-06, "loss": 0.0246, "step": 2630 }, { "epoch": 0.04319725108402193, "grad_norm": 1.098841905593872, "learning_rate": 1.4398691028088357e-06, "loss": 0.0293, "step": 2640 }, { "epoch": 0.043360877035097764, "grad_norm": 1.3283329010009766, "learning_rate": 1.4453231524406874e-06, "loss": 0.0249, "step": 2650 }, { "epoch": 0.04352450298617361, "grad_norm": 1.0602494478225708, "learning_rate": 1.4507772020725389e-06, "loss": 0.0306, "step": 2660 }, { "epoch": 0.043688128937249444, "grad_norm": 1.495590329170227, "learning_rate": 1.4562312517043906e-06, "loss": 0.0217, "step": 2670 }, { "epoch": 0.04385175488832529, "grad_norm": 1.8322491645812988, "learning_rate": 1.4616853013362423e-06, "loss": 0.0221, "step": 2680 }, { "epoch": 0.04401538083940113, "grad_norm": 1.0096715688705444, "learning_rate": 1.467139350968094e-06, "loss": 0.0268, "step": 2690 }, { "epoch": 0.04417900679047697, "grad_norm": 1.028315544128418, "learning_rate": 1.4725934005999457e-06, "loss": 0.0266, "step": 2700 }, { "epoch": 0.04434263274155281, "grad_norm": 1.7434896230697632, "learning_rate": 1.4780474502317974e-06, "loss": 0.0268, "step": 2710 }, { "epoch": 0.04450625869262865, "grad_norm": 1.291272521018982, "learning_rate": 1.4835014998636489e-06, "loss": 0.0239, "step": 2720 }, { "epoch": 0.04466988464370449, "grad_norm": 1.2723766565322876, "learning_rate": 1.4889555494955006e-06, "loss": 0.0246, "step": 2730 }, { "epoch": 0.04483351059478033, "grad_norm": 1.0887057781219482, "learning_rate": 1.4944095991273523e-06, "loss": 0.0229, "step": 2740 }, { "epoch": 0.04499713654585617, "grad_norm": 1.857032060623169, "learning_rate": 1.4998636487592038e-06, "loss": 0.0301, "step": 2750 }, { "epoch": 0.045160762496932016, "grad_norm": 1.183394193649292, "learning_rate": 1.5053176983910555e-06, "loss": 0.0211, "step": 2760 }, { "epoch": 0.04532438844800785, "grad_norm": 1.164291262626648, "learning_rate": 1.5107717480229072e-06, "loss": 0.0236, "step": 2770 }, { "epoch": 0.0454880143990837, "grad_norm": 2.5396125316619873, "learning_rate": 1.5162257976547587e-06, "loss": 0.0354, "step": 2780 }, { "epoch": 0.045651640350159534, "grad_norm": 1.3356671333312988, "learning_rate": 1.5216798472866104e-06, "loss": 0.0248, "step": 2790 }, { "epoch": 0.04581526630123538, "grad_norm": 1.0268887281417847, "learning_rate": 1.527133896918462e-06, "loss": 0.0263, "step": 2800 }, { "epoch": 0.045978892252311214, "grad_norm": 0.8928141593933105, "learning_rate": 1.5325879465503138e-06, "loss": 0.0276, "step": 2810 }, { "epoch": 0.04614251820338706, "grad_norm": 1.9548661708831787, "learning_rate": 1.5380419961821655e-06, "loss": 0.02, "step": 2820 }, { "epoch": 0.0463061441544629, "grad_norm": 1.2414358854293823, "learning_rate": 1.543496045814017e-06, "loss": 0.0249, "step": 2830 }, { "epoch": 0.04646977010553874, "grad_norm": 1.6838544607162476, "learning_rate": 1.5489500954458687e-06, "loss": 0.0255, "step": 2840 }, { "epoch": 0.04663339605661458, "grad_norm": 1.1277804374694824, "learning_rate": 1.5544041450777204e-06, "loss": 0.028, "step": 2850 }, { "epoch": 0.04679702200769042, "grad_norm": 0.892866313457489, "learning_rate": 1.559858194709572e-06, "loss": 0.0247, "step": 2860 }, { "epoch": 0.04696064795876626, "grad_norm": 1.0380139350891113, "learning_rate": 1.5653122443414238e-06, "loss": 0.0271, "step": 2870 }, { "epoch": 0.0471242739098421, "grad_norm": 1.732856273651123, "learning_rate": 1.5707662939732755e-06, "loss": 0.0326, "step": 2880 }, { "epoch": 0.04728789986091794, "grad_norm": 1.3144307136535645, "learning_rate": 1.5762203436051267e-06, "loss": 0.0247, "step": 2890 }, { "epoch": 0.04745152581199378, "grad_norm": 0.7988929152488708, "learning_rate": 1.5816743932369784e-06, "loss": 0.0276, "step": 2900 }, { "epoch": 0.04761515176306962, "grad_norm": 1.2821091413497925, "learning_rate": 1.5871284428688301e-06, "loss": 0.0275, "step": 2910 }, { "epoch": 0.047778777714145466, "grad_norm": 1.8612990379333496, "learning_rate": 1.5925824925006818e-06, "loss": 0.0295, "step": 2920 }, { "epoch": 0.0479424036652213, "grad_norm": 1.0249279737472534, "learning_rate": 1.5980365421325335e-06, "loss": 0.0216, "step": 2930 }, { "epoch": 0.04810602961629715, "grad_norm": 1.6574149131774902, "learning_rate": 1.6034905917643852e-06, "loss": 0.0283, "step": 2940 }, { "epoch": 0.048269655567372984, "grad_norm": 0.7182159423828125, "learning_rate": 1.6089446413962367e-06, "loss": 0.0252, "step": 2950 }, { "epoch": 0.04843328151844883, "grad_norm": 1.0928566455841064, "learning_rate": 1.6143986910280884e-06, "loss": 0.023, "step": 2960 }, { "epoch": 0.048596907469524664, "grad_norm": 1.2761528491973877, "learning_rate": 1.6198527406599401e-06, "loss": 0.0226, "step": 2970 }, { "epoch": 0.04876053342060051, "grad_norm": 1.541603446006775, "learning_rate": 1.6253067902917918e-06, "loss": 0.0237, "step": 2980 }, { "epoch": 0.04892415937167635, "grad_norm": 1.1588711738586426, "learning_rate": 1.6307608399236435e-06, "loss": 0.0235, "step": 2990 }, { "epoch": 0.04908778532275219, "grad_norm": 1.2908154726028442, "learning_rate": 1.636214889555495e-06, "loss": 0.0221, "step": 3000 }, { "epoch": 0.04925141127382803, "grad_norm": 1.1763355731964111, "learning_rate": 1.6416689391873467e-06, "loss": 0.0259, "step": 3010 }, { "epoch": 0.04941503722490387, "grad_norm": 0.9689708352088928, "learning_rate": 1.6471229888191984e-06, "loss": 0.0227, "step": 3020 }, { "epoch": 0.04957866317597971, "grad_norm": 1.828675389289856, "learning_rate": 1.6525770384510501e-06, "loss": 0.0184, "step": 3030 }, { "epoch": 0.04974228912705555, "grad_norm": 1.0045007467269897, "learning_rate": 1.6580310880829018e-06, "loss": 0.0283, "step": 3040 }, { "epoch": 0.04990591507813139, "grad_norm": 1.3750196695327759, "learning_rate": 1.6634851377147535e-06, "loss": 0.0251, "step": 3050 }, { "epoch": 0.05006954102920723, "grad_norm": 1.3335890769958496, "learning_rate": 1.6689391873466048e-06, "loss": 0.0276, "step": 3060 }, { "epoch": 0.05023316698028307, "grad_norm": 1.7532286643981934, "learning_rate": 1.6743932369784565e-06, "loss": 0.0206, "step": 3070 }, { "epoch": 0.050396792931358916, "grad_norm": 1.1638535261154175, "learning_rate": 1.6798472866103082e-06, "loss": 0.0281, "step": 3080 }, { "epoch": 0.05056041888243475, "grad_norm": 1.4859319925308228, "learning_rate": 1.68530133624216e-06, "loss": 0.0245, "step": 3090 }, { "epoch": 0.0507240448335106, "grad_norm": 0.9853307008743286, "learning_rate": 1.6907553858740116e-06, "loss": 0.0219, "step": 3100 }, { "epoch": 0.05088767078458643, "grad_norm": 2.2381179332733154, "learning_rate": 1.6962094355058633e-06, "loss": 0.0213, "step": 3110 }, { "epoch": 0.05105129673566228, "grad_norm": 1.471472144126892, "learning_rate": 1.7016634851377148e-06, "loss": 0.0279, "step": 3120 }, { "epoch": 0.051214922686738114, "grad_norm": 0.9398375749588013, "learning_rate": 1.7071175347695665e-06, "loss": 0.0221, "step": 3130 }, { "epoch": 0.05137854863781396, "grad_norm": 1.147047758102417, "learning_rate": 1.7125715844014182e-06, "loss": 0.0202, "step": 3140 }, { "epoch": 0.0515421745888898, "grad_norm": 1.226751446723938, "learning_rate": 1.71802563403327e-06, "loss": 0.0224, "step": 3150 }, { "epoch": 0.05170580053996564, "grad_norm": 1.1850850582122803, "learning_rate": 1.7234796836651216e-06, "loss": 0.0243, "step": 3160 }, { "epoch": 0.05186942649104148, "grad_norm": 1.0775951147079468, "learning_rate": 1.7289337332969733e-06, "loss": 0.0188, "step": 3170 }, { "epoch": 0.05203305244211732, "grad_norm": 1.3040891885757446, "learning_rate": 1.7343877829288248e-06, "loss": 0.0269, "step": 3180 }, { "epoch": 0.05219667839319316, "grad_norm": 1.0553785562515259, "learning_rate": 1.7398418325606765e-06, "loss": 0.0243, "step": 3190 }, { "epoch": 0.052360304344269, "grad_norm": 1.4058150053024292, "learning_rate": 1.7452958821925282e-06, "loss": 0.0284, "step": 3200 }, { "epoch": 0.05252393029534484, "grad_norm": 1.0478510856628418, "learning_rate": 1.7507499318243799e-06, "loss": 0.0258, "step": 3210 }, { "epoch": 0.05268755624642068, "grad_norm": 1.5595393180847168, "learning_rate": 1.7562039814562316e-06, "loss": 0.0262, "step": 3220 }, { "epoch": 0.05285118219749652, "grad_norm": 1.3992499113082886, "learning_rate": 1.7616580310880829e-06, "loss": 0.0224, "step": 3230 }, { "epoch": 0.053014808148572366, "grad_norm": 1.3505454063415527, "learning_rate": 1.7671120807199346e-06, "loss": 0.0256, "step": 3240 }, { "epoch": 0.0531784340996482, "grad_norm": 1.5510951280593872, "learning_rate": 1.7725661303517863e-06, "loss": 0.0228, "step": 3250 }, { "epoch": 0.05334206005072405, "grad_norm": 1.2564942836761475, "learning_rate": 1.778020179983638e-06, "loss": 0.0243, "step": 3260 }, { "epoch": 0.05350568600179988, "grad_norm": 1.0546241998672485, "learning_rate": 1.7834742296154897e-06, "loss": 0.0234, "step": 3270 }, { "epoch": 0.05366931195287573, "grad_norm": 0.5394402146339417, "learning_rate": 1.7889282792473414e-06, "loss": 0.0229, "step": 3280 }, { "epoch": 0.053832937903951564, "grad_norm": 0.988962709903717, "learning_rate": 1.7943823288791929e-06, "loss": 0.0273, "step": 3290 }, { "epoch": 0.05399656385502741, "grad_norm": 0.838750422000885, "learning_rate": 1.7998363785110446e-06, "loss": 0.0241, "step": 3300 }, { "epoch": 0.05416018980610325, "grad_norm": 0.8716315031051636, "learning_rate": 1.8052904281428963e-06, "loss": 0.0189, "step": 3310 }, { "epoch": 0.05432381575717909, "grad_norm": 1.0897934436798096, "learning_rate": 1.810744477774748e-06, "loss": 0.0188, "step": 3320 }, { "epoch": 0.05448744170825493, "grad_norm": 0.9270868301391602, "learning_rate": 1.8161985274065997e-06, "loss": 0.0187, "step": 3330 }, { "epoch": 0.05465106765933077, "grad_norm": 0.9894445538520813, "learning_rate": 1.8216525770384514e-06, "loss": 0.0181, "step": 3340 }, { "epoch": 0.05481469361040661, "grad_norm": 0.8768455386161804, "learning_rate": 1.8271066266703029e-06, "loss": 0.021, "step": 3350 }, { "epoch": 0.05497831956148245, "grad_norm": 0.981567919254303, "learning_rate": 1.8325606763021546e-06, "loss": 0.0179, "step": 3360 }, { "epoch": 0.05514194551255829, "grad_norm": 0.9962620735168457, "learning_rate": 1.838014725934006e-06, "loss": 0.0201, "step": 3370 }, { "epoch": 0.055305571463634136, "grad_norm": 1.4500595331192017, "learning_rate": 1.8434687755658577e-06, "loss": 0.0326, "step": 3380 }, { "epoch": 0.05546919741470997, "grad_norm": 0.881920337677002, "learning_rate": 1.8489228251977094e-06, "loss": 0.0291, "step": 3390 }, { "epoch": 0.055632823365785816, "grad_norm": 0.9697723984718323, "learning_rate": 1.854376874829561e-06, "loss": 0.0172, "step": 3400 }, { "epoch": 0.05579644931686165, "grad_norm": 1.1145638227462769, "learning_rate": 1.8598309244614126e-06, "loss": 0.0247, "step": 3410 }, { "epoch": 0.0559600752679375, "grad_norm": 0.8215819001197815, "learning_rate": 1.8652849740932643e-06, "loss": 0.0241, "step": 3420 }, { "epoch": 0.05612370121901333, "grad_norm": 1.275476336479187, "learning_rate": 1.870739023725116e-06, "loss": 0.0194, "step": 3430 }, { "epoch": 0.05628732717008918, "grad_norm": 1.3093003034591675, "learning_rate": 1.8761930733569677e-06, "loss": 0.0227, "step": 3440 }, { "epoch": 0.056450953121165014, "grad_norm": 1.2893853187561035, "learning_rate": 1.8816471229888194e-06, "loss": 0.0252, "step": 3450 }, { "epoch": 0.05661457907224086, "grad_norm": 0.7612252235412598, "learning_rate": 1.887101172620671e-06, "loss": 0.0199, "step": 3460 }, { "epoch": 0.0567782050233167, "grad_norm": 0.9209619760513306, "learning_rate": 1.8925552222525226e-06, "loss": 0.0252, "step": 3470 }, { "epoch": 0.05694183097439254, "grad_norm": 1.7326873540878296, "learning_rate": 1.8980092718843743e-06, "loss": 0.0217, "step": 3480 }, { "epoch": 0.05710545692546838, "grad_norm": 0.8247413635253906, "learning_rate": 1.903463321516226e-06, "loss": 0.0204, "step": 3490 }, { "epoch": 0.05726908287654422, "grad_norm": 1.678443431854248, "learning_rate": 1.9089173711480775e-06, "loss": 0.0184, "step": 3500 }, { "epoch": 0.05743270882762006, "grad_norm": 1.1757315397262573, "learning_rate": 1.9143714207799292e-06, "loss": 0.0225, "step": 3510 }, { "epoch": 0.0575963347786959, "grad_norm": 0.8371372222900391, "learning_rate": 1.919825470411781e-06, "loss": 0.0223, "step": 3520 }, { "epoch": 0.05775996072977174, "grad_norm": 1.302064061164856, "learning_rate": 1.9252795200436326e-06, "loss": 0.0233, "step": 3530 }, { "epoch": 0.057923586680847586, "grad_norm": 0.9722303748130798, "learning_rate": 1.9307335696754843e-06, "loss": 0.0256, "step": 3540 }, { "epoch": 0.05808721263192342, "grad_norm": 0.8694469332695007, "learning_rate": 1.936187619307336e-06, "loss": 0.0235, "step": 3550 }, { "epoch": 0.058250838582999266, "grad_norm": 1.2569630146026611, "learning_rate": 1.9416416689391877e-06, "loss": 0.0193, "step": 3560 }, { "epoch": 0.0584144645340751, "grad_norm": 1.2085784673690796, "learning_rate": 1.947095718571039e-06, "loss": 0.0219, "step": 3570 }, { "epoch": 0.058578090485150947, "grad_norm": 1.3117491006851196, "learning_rate": 1.9525497682028907e-06, "loss": 0.0229, "step": 3580 }, { "epoch": 0.05874171643622678, "grad_norm": 1.3743319511413574, "learning_rate": 1.9580038178347424e-06, "loss": 0.0198, "step": 3590 }, { "epoch": 0.05890534238730263, "grad_norm": 1.4419586658477783, "learning_rate": 1.963457867466594e-06, "loss": 0.0227, "step": 3600 }, { "epoch": 0.059068968338378464, "grad_norm": 1.910510778427124, "learning_rate": 1.968911917098446e-06, "loss": 0.023, "step": 3610 }, { "epoch": 0.05923259428945431, "grad_norm": 0.9994873404502869, "learning_rate": 1.9743659667302975e-06, "loss": 0.0194, "step": 3620 }, { "epoch": 0.05939622024053015, "grad_norm": 1.761000633239746, "learning_rate": 1.9798200163621488e-06, "loss": 0.0262, "step": 3630 }, { "epoch": 0.05955984619160599, "grad_norm": 0.8472606539726257, "learning_rate": 1.9852740659940005e-06, "loss": 0.0231, "step": 3640 }, { "epoch": 0.05972347214268183, "grad_norm": 1.8871433734893799, "learning_rate": 1.990728115625852e-06, "loss": 0.0205, "step": 3650 }, { "epoch": 0.05988709809375767, "grad_norm": 1.7018417119979858, "learning_rate": 1.996182165257704e-06, "loss": 0.0262, "step": 3660 }, { "epoch": 0.06005072404483351, "grad_norm": 1.1097400188446045, "learning_rate": 2.0016362148895556e-06, "loss": 0.0232, "step": 3670 }, { "epoch": 0.06021434999590935, "grad_norm": 0.8129346370697021, "learning_rate": 2.0070902645214073e-06, "loss": 0.0188, "step": 3680 }, { "epoch": 0.06037797594698519, "grad_norm": 1.0512713193893433, "learning_rate": 2.012544314153259e-06, "loss": 0.0178, "step": 3690 }, { "epoch": 0.060541601898061036, "grad_norm": 0.9962353706359863, "learning_rate": 2.0179983637851107e-06, "loss": 0.0267, "step": 3700 }, { "epoch": 0.06070522784913687, "grad_norm": 1.0234490633010864, "learning_rate": 2.0234524134169624e-06, "loss": 0.0146, "step": 3710 }, { "epoch": 0.060868853800212716, "grad_norm": 0.7434457540512085, "learning_rate": 2.028906463048814e-06, "loss": 0.0251, "step": 3720 }, { "epoch": 0.06103247975128855, "grad_norm": 1.0221445560455322, "learning_rate": 2.034360512680666e-06, "loss": 0.0227, "step": 3730 }, { "epoch": 0.061196105702364396, "grad_norm": 1.1963093280792236, "learning_rate": 2.0398145623125175e-06, "loss": 0.0216, "step": 3740 }, { "epoch": 0.06135973165344023, "grad_norm": 1.6195560693740845, "learning_rate": 2.0452686119443688e-06, "loss": 0.0229, "step": 3750 }, { "epoch": 0.06152335760451608, "grad_norm": 0.9843617677688599, "learning_rate": 2.0507226615762205e-06, "loss": 0.0237, "step": 3760 }, { "epoch": 0.061686983555591914, "grad_norm": 1.0382856130599976, "learning_rate": 2.056176711208072e-06, "loss": 0.0232, "step": 3770 }, { "epoch": 0.06185060950666776, "grad_norm": 1.4391368627548218, "learning_rate": 2.061630760839924e-06, "loss": 0.0204, "step": 3780 }, { "epoch": 0.0620142354577436, "grad_norm": 0.9887288212776184, "learning_rate": 2.0670848104717756e-06, "loss": 0.0198, "step": 3790 }, { "epoch": 0.06217786140881944, "grad_norm": 1.3060282468795776, "learning_rate": 2.072538860103627e-06, "loss": 0.0221, "step": 3800 }, { "epoch": 0.06234148735989528, "grad_norm": 0.882520318031311, "learning_rate": 2.0779929097354786e-06, "loss": 0.0198, "step": 3810 }, { "epoch": 0.06250511331097112, "grad_norm": 1.0814740657806396, "learning_rate": 2.0834469593673303e-06, "loss": 0.0172, "step": 3820 }, { "epoch": 0.06266873926204695, "grad_norm": 1.0201282501220703, "learning_rate": 2.088901008999182e-06, "loss": 0.0303, "step": 3830 }, { "epoch": 0.0628323652131228, "grad_norm": 1.3715721368789673, "learning_rate": 2.0943550586310337e-06, "loss": 0.0214, "step": 3840 }, { "epoch": 0.06299599116419864, "grad_norm": 1.0639108419418335, "learning_rate": 2.0998091082628854e-06, "loss": 0.0174, "step": 3850 }, { "epoch": 0.06315961711527449, "grad_norm": 1.855281949043274, "learning_rate": 2.105263157894737e-06, "loss": 0.0277, "step": 3860 }, { "epoch": 0.06332324306635033, "grad_norm": 0.84293133020401, "learning_rate": 2.1107172075265888e-06, "loss": 0.0203, "step": 3870 }, { "epoch": 0.06348686901742616, "grad_norm": 1.3143112659454346, "learning_rate": 2.1161712571584405e-06, "loss": 0.0155, "step": 3880 }, { "epoch": 0.063650494968502, "grad_norm": 1.0552860498428345, "learning_rate": 2.121625306790292e-06, "loss": 0.0246, "step": 3890 }, { "epoch": 0.06381412091957785, "grad_norm": 0.819998025894165, "learning_rate": 2.127079356422144e-06, "loss": 0.0154, "step": 3900 }, { "epoch": 0.06397774687065369, "grad_norm": 0.787436842918396, "learning_rate": 2.1325334060539956e-06, "loss": 0.0182, "step": 3910 }, { "epoch": 0.06414137282172952, "grad_norm": 1.3263076543807983, "learning_rate": 2.137987455685847e-06, "loss": 0.0205, "step": 3920 }, { "epoch": 0.06430499877280536, "grad_norm": 1.0753942728042603, "learning_rate": 2.1434415053176985e-06, "loss": 0.0226, "step": 3930 }, { "epoch": 0.06446862472388121, "grad_norm": 1.1934694051742554, "learning_rate": 2.1488955549495502e-06, "loss": 0.0152, "step": 3940 }, { "epoch": 0.06463225067495705, "grad_norm": 0.8780665993690491, "learning_rate": 2.154349604581402e-06, "loss": 0.0182, "step": 3950 }, { "epoch": 0.0647958766260329, "grad_norm": 0.690985918045044, "learning_rate": 2.1598036542132536e-06, "loss": 0.0183, "step": 3960 }, { "epoch": 0.06495950257710872, "grad_norm": 1.1936982870101929, "learning_rate": 2.165257703845105e-06, "loss": 0.0208, "step": 3970 }, { "epoch": 0.06512312852818457, "grad_norm": 1.1310224533081055, "learning_rate": 2.1707117534769566e-06, "loss": 0.0223, "step": 3980 }, { "epoch": 0.06528675447926041, "grad_norm": 1.5926493406295776, "learning_rate": 2.1761658031088083e-06, "loss": 0.0218, "step": 3990 }, { "epoch": 0.06545038043033626, "grad_norm": 0.8800981044769287, "learning_rate": 2.18161985274066e-06, "loss": 0.0243, "step": 4000 }, { "epoch": 0.0656140063814121, "grad_norm": 1.2548387050628662, "learning_rate": 2.1870739023725117e-06, "loss": 0.0187, "step": 4010 }, { "epoch": 0.06577763233248793, "grad_norm": 1.114944338798523, "learning_rate": 2.1925279520043634e-06, "loss": 0.0186, "step": 4020 }, { "epoch": 0.06594125828356377, "grad_norm": 1.1938918828964233, "learning_rate": 2.197982001636215e-06, "loss": 0.0251, "step": 4030 }, { "epoch": 0.06610488423463962, "grad_norm": 1.7698301076889038, "learning_rate": 2.203436051268067e-06, "loss": 0.0169, "step": 4040 }, { "epoch": 0.06626851018571546, "grad_norm": 0.8285405039787292, "learning_rate": 2.2088901008999185e-06, "loss": 0.0234, "step": 4050 }, { "epoch": 0.06643213613679129, "grad_norm": 1.1454505920410156, "learning_rate": 2.2143441505317702e-06, "loss": 0.0176, "step": 4060 }, { "epoch": 0.06659576208786713, "grad_norm": 1.0285584926605225, "learning_rate": 2.219798200163622e-06, "loss": 0.0157, "step": 4070 }, { "epoch": 0.06675938803894298, "grad_norm": 0.9753805994987488, "learning_rate": 2.2252522497954736e-06, "loss": 0.0177, "step": 4080 }, { "epoch": 0.06692301399001882, "grad_norm": 1.5349905490875244, "learning_rate": 2.230706299427325e-06, "loss": 0.022, "step": 4090 }, { "epoch": 0.06708663994109466, "grad_norm": 0.9370464086532593, "learning_rate": 2.2361603490591766e-06, "loss": 0.0147, "step": 4100 }, { "epoch": 0.0672502658921705, "grad_norm": 1.2721103429794312, "learning_rate": 2.2416143986910283e-06, "loss": 0.02, "step": 4110 }, { "epoch": 0.06741389184324634, "grad_norm": 0.795366108417511, "learning_rate": 2.24706844832288e-06, "loss": 0.0256, "step": 4120 }, { "epoch": 0.06757751779432218, "grad_norm": 1.9020237922668457, "learning_rate": 2.2525224979547317e-06, "loss": 0.0192, "step": 4130 }, { "epoch": 0.06774114374539802, "grad_norm": 1.2364789247512817, "learning_rate": 2.2579765475865834e-06, "loss": 0.0175, "step": 4140 }, { "epoch": 0.06790476969647385, "grad_norm": 1.258829116821289, "learning_rate": 2.2634305972184347e-06, "loss": 0.02, "step": 4150 }, { "epoch": 0.0680683956475497, "grad_norm": 1.2653374671936035, "learning_rate": 2.2688846468502864e-06, "loss": 0.0183, "step": 4160 }, { "epoch": 0.06823202159862554, "grad_norm": 0.4251483976840973, "learning_rate": 2.274338696482138e-06, "loss": 0.0118, "step": 4170 }, { "epoch": 0.06839564754970139, "grad_norm": 0.4919903874397278, "learning_rate": 2.27979274611399e-06, "loss": 0.0213, "step": 4180 }, { "epoch": 0.06855927350077723, "grad_norm": 1.0949044227600098, "learning_rate": 2.2852467957458415e-06, "loss": 0.013, "step": 4190 }, { "epoch": 0.06872289945185306, "grad_norm": 1.0280132293701172, "learning_rate": 2.290700845377693e-06, "loss": 0.023, "step": 4200 }, { "epoch": 0.0688865254029289, "grad_norm": 0.8359214663505554, "learning_rate": 2.296154895009545e-06, "loss": 0.0198, "step": 4210 }, { "epoch": 0.06905015135400475, "grad_norm": 1.1138992309570312, "learning_rate": 2.3016089446413966e-06, "loss": 0.0163, "step": 4220 }, { "epoch": 0.06921377730508059, "grad_norm": 1.1474568843841553, "learning_rate": 2.3070629942732483e-06, "loss": 0.0156, "step": 4230 }, { "epoch": 0.06937740325615642, "grad_norm": 1.2642734050750732, "learning_rate": 2.3125170439051e-06, "loss": 0.0202, "step": 4240 }, { "epoch": 0.06954102920723226, "grad_norm": 0.8628132343292236, "learning_rate": 2.3179710935369517e-06, "loss": 0.0369, "step": 4250 }, { "epoch": 0.06970465515830811, "grad_norm": 0.5450739860534668, "learning_rate": 2.323425143168803e-06, "loss": 0.022, "step": 4260 }, { "epoch": 0.06986828110938395, "grad_norm": 0.7991898655891418, "learning_rate": 2.3288791928006547e-06, "loss": 0.019, "step": 4270 }, { "epoch": 0.0700319070604598, "grad_norm": 1.1290448904037476, "learning_rate": 2.3343332424325064e-06, "loss": 0.0165, "step": 4280 }, { "epoch": 0.07019553301153562, "grad_norm": 1.0869791507720947, "learning_rate": 2.339787292064358e-06, "loss": 0.0218, "step": 4290 }, { "epoch": 0.07035915896261147, "grad_norm": 0.9370965957641602, "learning_rate": 2.3452413416962098e-06, "loss": 0.0155, "step": 4300 }, { "epoch": 0.07052278491368731, "grad_norm": 0.9847501516342163, "learning_rate": 2.3506953913280615e-06, "loss": 0.0148, "step": 4310 }, { "epoch": 0.07068641086476316, "grad_norm": 1.3345344066619873, "learning_rate": 2.3561494409599128e-06, "loss": 0.0139, "step": 4320 }, { "epoch": 0.070850036815839, "grad_norm": 1.071442723274231, "learning_rate": 2.3616034905917645e-06, "loss": 0.0274, "step": 4330 }, { "epoch": 0.07101366276691483, "grad_norm": 0.9193683862686157, "learning_rate": 2.367057540223616e-06, "loss": 0.0211, "step": 4340 }, { "epoch": 0.07117728871799067, "grad_norm": 0.9321464896202087, "learning_rate": 2.372511589855468e-06, "loss": 0.0155, "step": 4350 }, { "epoch": 0.07134091466906652, "grad_norm": 1.0057884454727173, "learning_rate": 2.3779656394873196e-06, "loss": 0.0182, "step": 4360 }, { "epoch": 0.07150454062014236, "grad_norm": 1.0530040264129639, "learning_rate": 2.383419689119171e-06, "loss": 0.0252, "step": 4370 }, { "epoch": 0.07166816657121819, "grad_norm": 0.2803080081939697, "learning_rate": 2.3888737387510225e-06, "loss": 0.0255, "step": 4380 }, { "epoch": 0.07183179252229403, "grad_norm": 0.7525278329849243, "learning_rate": 2.3943277883828742e-06, "loss": 0.0197, "step": 4390 }, { "epoch": 0.07199541847336988, "grad_norm": 1.0929770469665527, "learning_rate": 2.399781838014726e-06, "loss": 0.0202, "step": 4400 }, { "epoch": 0.07215904442444572, "grad_norm": 1.0513255596160889, "learning_rate": 2.4052358876465776e-06, "loss": 0.0126, "step": 4410 }, { "epoch": 0.07232267037552156, "grad_norm": 0.7591415047645569, "learning_rate": 2.4106899372784293e-06, "loss": 0.0182, "step": 4420 }, { "epoch": 0.0724862963265974, "grad_norm": 1.4326552152633667, "learning_rate": 2.416143986910281e-06, "loss": 0.0212, "step": 4430 }, { "epoch": 0.07264992227767324, "grad_norm": 1.0587764978408813, "learning_rate": 2.4215980365421327e-06, "loss": 0.0187, "step": 4440 }, { "epoch": 0.07281354822874908, "grad_norm": 1.3421663045883179, "learning_rate": 2.4270520861739844e-06, "loss": 0.0242, "step": 4450 }, { "epoch": 0.07297717417982492, "grad_norm": 0.5612053275108337, "learning_rate": 2.432506135805836e-06, "loss": 0.017, "step": 4460 }, { "epoch": 0.07314080013090075, "grad_norm": 1.1192561388015747, "learning_rate": 2.437960185437688e-06, "loss": 0.0169, "step": 4470 }, { "epoch": 0.0733044260819766, "grad_norm": 0.8539515733718872, "learning_rate": 2.4434142350695395e-06, "loss": 0.016, "step": 4480 }, { "epoch": 0.07346805203305244, "grad_norm": 1.0323679447174072, "learning_rate": 2.448868284701391e-06, "loss": 0.0154, "step": 4490 }, { "epoch": 0.07363167798412829, "grad_norm": 1.013827919960022, "learning_rate": 2.4543223343332425e-06, "loss": 0.0177, "step": 4500 }, { "epoch": 0.07379530393520413, "grad_norm": 0.909745454788208, "learning_rate": 2.4597763839650942e-06, "loss": 0.0234, "step": 4510 }, { "epoch": 0.07395892988627996, "grad_norm": 0.6795579791069031, "learning_rate": 2.465230433596946e-06, "loss": 0.0214, "step": 4520 }, { "epoch": 0.0741225558373558, "grad_norm": 1.1066290140151978, "learning_rate": 2.4706844832287976e-06, "loss": 0.025, "step": 4530 }, { "epoch": 0.07428618178843165, "grad_norm": 0.8269834518432617, "learning_rate": 2.4761385328606493e-06, "loss": 0.0174, "step": 4540 }, { "epoch": 0.07444980773950749, "grad_norm": 0.977855920791626, "learning_rate": 2.4815925824925006e-06, "loss": 0.0175, "step": 4550 }, { "epoch": 0.07461343369058333, "grad_norm": 0.9118476510047913, "learning_rate": 2.4870466321243523e-06, "loss": 0.0206, "step": 4560 }, { "epoch": 0.07477705964165916, "grad_norm": 0.6684682965278625, "learning_rate": 2.492500681756204e-06, "loss": 0.0156, "step": 4570 }, { "epoch": 0.074940685592735, "grad_norm": 0.7412125468254089, "learning_rate": 2.4979547313880557e-06, "loss": 0.0149, "step": 4580 }, { "epoch": 0.07510431154381085, "grad_norm": 1.01152503490448, "learning_rate": 2.5034087810199074e-06, "loss": 0.0156, "step": 4590 }, { "epoch": 0.0752679374948867, "grad_norm": 1.2604584693908691, "learning_rate": 2.508862830651759e-06, "loss": 0.0171, "step": 4600 }, { "epoch": 0.07543156344596252, "grad_norm": 0.8666785955429077, "learning_rate": 2.514316880283611e-06, "loss": 0.0166, "step": 4610 }, { "epoch": 0.07559518939703837, "grad_norm": 1.079569697380066, "learning_rate": 2.5197709299154625e-06, "loss": 0.0166, "step": 4620 }, { "epoch": 0.07575881534811421, "grad_norm": 1.1537538766860962, "learning_rate": 2.525224979547314e-06, "loss": 0.0208, "step": 4630 }, { "epoch": 0.07592244129919005, "grad_norm": 1.1425799131393433, "learning_rate": 2.5306790291791655e-06, "loss": 0.0228, "step": 4640 }, { "epoch": 0.0760860672502659, "grad_norm": 1.1060936450958252, "learning_rate": 2.536133078811017e-06, "loss": 0.0206, "step": 4650 }, { "epoch": 0.07624969320134173, "grad_norm": 0.9448750615119934, "learning_rate": 2.541587128442869e-06, "loss": 0.0198, "step": 4660 }, { "epoch": 0.07641331915241757, "grad_norm": 0.7279178500175476, "learning_rate": 2.5470411780747206e-06, "loss": 0.0197, "step": 4670 }, { "epoch": 0.07657694510349342, "grad_norm": 0.9074788689613342, "learning_rate": 2.5524952277065723e-06, "loss": 0.0182, "step": 4680 }, { "epoch": 0.07674057105456926, "grad_norm": 1.2299587726593018, "learning_rate": 2.557949277338424e-06, "loss": 0.0202, "step": 4690 }, { "epoch": 0.07690419700564509, "grad_norm": 0.9038825035095215, "learning_rate": 2.5634033269702757e-06, "loss": 0.0189, "step": 4700 }, { "epoch": 0.07706782295672093, "grad_norm": 1.1351773738861084, "learning_rate": 2.5688573766021274e-06, "loss": 0.0186, "step": 4710 }, { "epoch": 0.07723144890779678, "grad_norm": 0.8688490390777588, "learning_rate": 2.574311426233979e-06, "loss": 0.0145, "step": 4720 }, { "epoch": 0.07739507485887262, "grad_norm": 0.9342639446258545, "learning_rate": 2.579765475865831e-06, "loss": 0.0179, "step": 4730 }, { "epoch": 0.07755870080994846, "grad_norm": 0.8001863956451416, "learning_rate": 2.5852195254976825e-06, "loss": 0.0196, "step": 4740 }, { "epoch": 0.0777223267610243, "grad_norm": 1.0900238752365112, "learning_rate": 2.5906735751295338e-06, "loss": 0.0259, "step": 4750 }, { "epoch": 0.07788595271210014, "grad_norm": 0.6990840435028076, "learning_rate": 2.5961276247613855e-06, "loss": 0.0122, "step": 4760 }, { "epoch": 0.07804957866317598, "grad_norm": 0.6042187809944153, "learning_rate": 2.601581674393237e-06, "loss": 0.016, "step": 4770 }, { "epoch": 0.07821320461425182, "grad_norm": 0.8377915620803833, "learning_rate": 2.607035724025089e-06, "loss": 0.0134, "step": 4780 }, { "epoch": 0.07837683056532765, "grad_norm": 1.0652810335159302, "learning_rate": 2.6124897736569406e-06, "loss": 0.0173, "step": 4790 }, { "epoch": 0.0785404565164035, "grad_norm": 1.0956909656524658, "learning_rate": 2.6179438232887923e-06, "loss": 0.017, "step": 4800 }, { "epoch": 0.07870408246747934, "grad_norm": 1.2536448240280151, "learning_rate": 2.623397872920644e-06, "loss": 0.0155, "step": 4810 }, { "epoch": 0.07886770841855519, "grad_norm": 0.9264737963676453, "learning_rate": 2.6288519225524957e-06, "loss": 0.0186, "step": 4820 }, { "epoch": 0.07903133436963103, "grad_norm": 1.7796748876571655, "learning_rate": 2.6343059721843474e-06, "loss": 0.0171, "step": 4830 }, { "epoch": 0.07919496032070686, "grad_norm": 1.0903502702713013, "learning_rate": 2.639760021816199e-06, "loss": 0.0169, "step": 4840 }, { "epoch": 0.0793585862717827, "grad_norm": 0.7941282987594604, "learning_rate": 2.6452140714480508e-06, "loss": 0.0157, "step": 4850 }, { "epoch": 0.07952221222285855, "grad_norm": 0.9998249411582947, "learning_rate": 2.6506681210799016e-06, "loss": 0.0199, "step": 4860 }, { "epoch": 0.07968583817393439, "grad_norm": 0.5043797492980957, "learning_rate": 2.6561221707117533e-06, "loss": 0.0158, "step": 4870 }, { "epoch": 0.07984946412501023, "grad_norm": 0.6337336897850037, "learning_rate": 2.661576220343605e-06, "loss": 0.015, "step": 4880 }, { "epoch": 0.08001309007608606, "grad_norm": 0.759972095489502, "learning_rate": 2.6670302699754567e-06, "loss": 0.0114, "step": 4890 }, { "epoch": 0.0801767160271619, "grad_norm": 0.8151446580886841, "learning_rate": 2.6724843196073084e-06, "loss": 0.0177, "step": 4900 }, { "epoch": 0.08034034197823775, "grad_norm": 0.7381401658058167, "learning_rate": 2.67793836923916e-06, "loss": 0.0143, "step": 4910 }, { "epoch": 0.0805039679293136, "grad_norm": 0.5805257558822632, "learning_rate": 2.683392418871012e-06, "loss": 0.0121, "step": 4920 }, { "epoch": 0.08066759388038942, "grad_norm": 1.1568772792816162, "learning_rate": 2.6888464685028635e-06, "loss": 0.0182, "step": 4930 }, { "epoch": 0.08083121983146527, "grad_norm": 1.5141230821609497, "learning_rate": 2.6943005181347152e-06, "loss": 0.0172, "step": 4940 }, { "epoch": 0.08099484578254111, "grad_norm": 0.7351788282394409, "learning_rate": 2.699754567766567e-06, "loss": 0.0152, "step": 4950 }, { "epoch": 0.08115847173361695, "grad_norm": 0.769402801990509, "learning_rate": 2.7052086173984187e-06, "loss": 0.0197, "step": 4960 }, { "epoch": 0.0813220976846928, "grad_norm": 0.3709261119365692, "learning_rate": 2.71066266703027e-06, "loss": 0.0155, "step": 4970 }, { "epoch": 0.08148572363576863, "grad_norm": 0.8174160718917847, "learning_rate": 2.7161167166621216e-06, "loss": 0.0159, "step": 4980 }, { "epoch": 0.08164934958684447, "grad_norm": 1.1775890588760376, "learning_rate": 2.7215707662939733e-06, "loss": 0.0161, "step": 4990 }, { "epoch": 0.08181297553792032, "grad_norm": 0.9500192999839783, "learning_rate": 2.727024815925825e-06, "loss": 0.0171, "step": 5000 }, { "epoch": 0.08197660148899616, "grad_norm": 0.9321677684783936, "learning_rate": 2.7324788655576767e-06, "loss": 0.0167, "step": 5010 }, { "epoch": 0.08214022744007199, "grad_norm": 1.0055530071258545, "learning_rate": 2.7379329151895284e-06, "loss": 0.0121, "step": 5020 }, { "epoch": 0.08230385339114783, "grad_norm": 1.2070386409759521, "learning_rate": 2.74338696482138e-06, "loss": 0.0165, "step": 5030 }, { "epoch": 0.08246747934222368, "grad_norm": 0.5914405584335327, "learning_rate": 2.748841014453232e-06, "loss": 0.018, "step": 5040 }, { "epoch": 0.08263110529329952, "grad_norm": 0.6340383291244507, "learning_rate": 2.7542950640850835e-06, "loss": 0.0189, "step": 5050 }, { "epoch": 0.08279473124437536, "grad_norm": 0.9640193581581116, "learning_rate": 2.7597491137169352e-06, "loss": 0.0157, "step": 5060 }, { "epoch": 0.0829583571954512, "grad_norm": 0.8477957248687744, "learning_rate": 2.765203163348787e-06, "loss": 0.0172, "step": 5070 }, { "epoch": 0.08312198314652704, "grad_norm": 1.026456594467163, "learning_rate": 2.7706572129806386e-06, "loss": 0.0146, "step": 5080 }, { "epoch": 0.08328560909760288, "grad_norm": 0.7711597681045532, "learning_rate": 2.77611126261249e-06, "loss": 0.0173, "step": 5090 }, { "epoch": 0.08344923504867872, "grad_norm": 0.9673550128936768, "learning_rate": 2.7815653122443416e-06, "loss": 0.0142, "step": 5100 }, { "epoch": 0.08361286099975457, "grad_norm": 1.2038995027542114, "learning_rate": 2.7870193618761933e-06, "loss": 0.0156, "step": 5110 }, { "epoch": 0.0837764869508304, "grad_norm": 0.878197193145752, "learning_rate": 2.792473411508045e-06, "loss": 0.022, "step": 5120 }, { "epoch": 0.08394011290190624, "grad_norm": 1.0659819841384888, "learning_rate": 2.7979274611398967e-06, "loss": 0.0136, "step": 5130 }, { "epoch": 0.08410373885298209, "grad_norm": 1.1639721393585205, "learning_rate": 2.8033815107717484e-06, "loss": 0.0187, "step": 5140 }, { "epoch": 0.08426736480405793, "grad_norm": 0.8449233174324036, "learning_rate": 2.8088355604036e-06, "loss": 0.0138, "step": 5150 }, { "epoch": 0.08443099075513376, "grad_norm": 0.8939120173454285, "learning_rate": 2.814289610035452e-06, "loss": 0.0213, "step": 5160 }, { "epoch": 0.0845946167062096, "grad_norm": 1.4400274753570557, "learning_rate": 2.8197436596673035e-06, "loss": 0.0216, "step": 5170 }, { "epoch": 0.08475824265728545, "grad_norm": 0.8084188103675842, "learning_rate": 2.8251977092991552e-06, "loss": 0.0167, "step": 5180 }, { "epoch": 0.08492186860836129, "grad_norm": 0.7128744721412659, "learning_rate": 2.830651758931007e-06, "loss": 0.014, "step": 5190 }, { "epoch": 0.08508549455943713, "grad_norm": 0.9861514568328857, "learning_rate": 2.8361058085628578e-06, "loss": 0.0223, "step": 5200 }, { "epoch": 0.08524912051051296, "grad_norm": 0.5894780158996582, "learning_rate": 2.8415598581947095e-06, "loss": 0.0178, "step": 5210 }, { "epoch": 0.0854127464615888, "grad_norm": 0.7652517557144165, "learning_rate": 2.847013907826561e-06, "loss": 0.0223, "step": 5220 }, { "epoch": 0.08557637241266465, "grad_norm": 0.7430261373519897, "learning_rate": 2.852467957458413e-06, "loss": 0.0153, "step": 5230 }, { "epoch": 0.0857399983637405, "grad_norm": 0.831869900226593, "learning_rate": 2.8579220070902646e-06, "loss": 0.0159, "step": 5240 }, { "epoch": 0.08590362431481632, "grad_norm": 0.7985550761222839, "learning_rate": 2.8633760567221163e-06, "loss": 0.019, "step": 5250 }, { "epoch": 0.08606725026589217, "grad_norm": 0.784176766872406, "learning_rate": 2.868830106353968e-06, "loss": 0.0163, "step": 5260 }, { "epoch": 0.08623087621696801, "grad_norm": 0.9921072125434875, "learning_rate": 2.8742841559858197e-06, "loss": 0.0109, "step": 5270 }, { "epoch": 0.08639450216804385, "grad_norm": 1.004798173904419, "learning_rate": 2.8797382056176714e-06, "loss": 0.0152, "step": 5280 }, { "epoch": 0.0865581281191197, "grad_norm": 1.0165233612060547, "learning_rate": 2.885192255249523e-06, "loss": 0.016, "step": 5290 }, { "epoch": 0.08672175407019553, "grad_norm": 0.8232343792915344, "learning_rate": 2.8906463048813748e-06, "loss": 0.0169, "step": 5300 }, { "epoch": 0.08688538002127137, "grad_norm": 0.7885742783546448, "learning_rate": 2.8961003545132265e-06, "loss": 0.0132, "step": 5310 }, { "epoch": 0.08704900597234722, "grad_norm": 0.602729856967926, "learning_rate": 2.9015544041450778e-06, "loss": 0.0174, "step": 5320 }, { "epoch": 0.08721263192342306, "grad_norm": 0.6252309679985046, "learning_rate": 2.9070084537769295e-06, "loss": 0.0187, "step": 5330 }, { "epoch": 0.08737625787449889, "grad_norm": 1.046807050704956, "learning_rate": 2.912462503408781e-06, "loss": 0.0161, "step": 5340 }, { "epoch": 0.08753988382557473, "grad_norm": 0.8579831719398499, "learning_rate": 2.917916553040633e-06, "loss": 0.0141, "step": 5350 }, { "epoch": 0.08770350977665058, "grad_norm": 0.7528521418571472, "learning_rate": 2.9233706026724846e-06, "loss": 0.0127, "step": 5360 }, { "epoch": 0.08786713572772642, "grad_norm": 0.6912550330162048, "learning_rate": 2.9288246523043363e-06, "loss": 0.0153, "step": 5370 }, { "epoch": 0.08803076167880226, "grad_norm": 0.9151894450187683, "learning_rate": 2.934278701936188e-06, "loss": 0.0218, "step": 5380 }, { "epoch": 0.0881943876298781, "grad_norm": 0.7298992872238159, "learning_rate": 2.9397327515680397e-06, "loss": 0.0164, "step": 5390 }, { "epoch": 0.08835801358095394, "grad_norm": 0.8278002738952637, "learning_rate": 2.9451868011998914e-06, "loss": 0.0154, "step": 5400 }, { "epoch": 0.08852163953202978, "grad_norm": 0.8794282078742981, "learning_rate": 2.950640850831743e-06, "loss": 0.0128, "step": 5410 }, { "epoch": 0.08868526548310562, "grad_norm": 0.856492280960083, "learning_rate": 2.9560949004635948e-06, "loss": 0.0109, "step": 5420 }, { "epoch": 0.08884889143418147, "grad_norm": 0.6660822629928589, "learning_rate": 2.961548950095446e-06, "loss": 0.0127, "step": 5430 }, { "epoch": 0.0890125173852573, "grad_norm": 0.8414649367332458, "learning_rate": 2.9670029997272978e-06, "loss": 0.0137, "step": 5440 }, { "epoch": 0.08917614333633314, "grad_norm": 0.4392111301422119, "learning_rate": 2.9724570493591495e-06, "loss": 0.0108, "step": 5450 }, { "epoch": 0.08933976928740898, "grad_norm": 1.1808362007141113, "learning_rate": 2.977911098991001e-06, "loss": 0.016, "step": 5460 }, { "epoch": 0.08950339523848483, "grad_norm": 0.6742119789123535, "learning_rate": 2.983365148622853e-06, "loss": 0.0121, "step": 5470 }, { "epoch": 0.08966702118956066, "grad_norm": 0.9016894102096558, "learning_rate": 2.9888191982547046e-06, "loss": 0.019, "step": 5480 }, { "epoch": 0.0898306471406365, "grad_norm": 0.802516520023346, "learning_rate": 2.9942732478865563e-06, "loss": 0.0134, "step": 5490 }, { "epoch": 0.08999427309171235, "grad_norm": 0.5612902641296387, "learning_rate": 2.9997272975184075e-06, "loss": 0.0134, "step": 5500 }, { "epoch": 0.09015789904278819, "grad_norm": 0.5004104375839233, "learning_rate": 3.0051813471502592e-06, "loss": 0.0145, "step": 5510 }, { "epoch": 0.09032152499386403, "grad_norm": 0.9202992916107178, "learning_rate": 3.010635396782111e-06, "loss": 0.0127, "step": 5520 }, { "epoch": 0.09048515094493986, "grad_norm": 0.6290443539619446, "learning_rate": 3.0160894464139626e-06, "loss": 0.0159, "step": 5530 }, { "epoch": 0.0906487768960157, "grad_norm": 0.672428548336029, "learning_rate": 3.0215434960458143e-06, "loss": 0.014, "step": 5540 }, { "epoch": 0.09081240284709155, "grad_norm": 0.7635077834129333, "learning_rate": 3.0269975456776656e-06, "loss": 0.0182, "step": 5550 }, { "epoch": 0.0909760287981674, "grad_norm": 0.7806611657142639, "learning_rate": 3.0324515953095173e-06, "loss": 0.0147, "step": 5560 }, { "epoch": 0.09113965474924322, "grad_norm": 0.6791344285011292, "learning_rate": 3.037905644941369e-06, "loss": 0.0179, "step": 5570 }, { "epoch": 0.09130328070031907, "grad_norm": 0.9148838520050049, "learning_rate": 3.0433596945732207e-06, "loss": 0.0154, "step": 5580 }, { "epoch": 0.09146690665139491, "grad_norm": 0.36368292570114136, "learning_rate": 3.0488137442050724e-06, "loss": 0.013, "step": 5590 }, { "epoch": 0.09163053260247075, "grad_norm": 0.7524011135101318, "learning_rate": 3.054267793836924e-06, "loss": 0.0156, "step": 5600 }, { "epoch": 0.0917941585535466, "grad_norm": 0.834479033946991, "learning_rate": 3.059721843468776e-06, "loss": 0.0119, "step": 5610 }, { "epoch": 0.09195778450462243, "grad_norm": 0.921504557132721, "learning_rate": 3.0651758931006275e-06, "loss": 0.0203, "step": 5620 }, { "epoch": 0.09212141045569827, "grad_norm": 1.429835319519043, "learning_rate": 3.0706299427324792e-06, "loss": 0.0169, "step": 5630 }, { "epoch": 0.09228503640677412, "grad_norm": 0.5858962535858154, "learning_rate": 3.076083992364331e-06, "loss": 0.0206, "step": 5640 }, { "epoch": 0.09244866235784996, "grad_norm": 0.8736156225204468, "learning_rate": 3.0815380419961826e-06, "loss": 0.0129, "step": 5650 }, { "epoch": 0.0926122883089258, "grad_norm": 0.7214558124542236, "learning_rate": 3.086992091628034e-06, "loss": 0.0122, "step": 5660 }, { "epoch": 0.09277591426000163, "grad_norm": 1.244158148765564, "learning_rate": 3.0924461412598856e-06, "loss": 0.0161, "step": 5670 }, { "epoch": 0.09293954021107748, "grad_norm": 0.7762056589126587, "learning_rate": 3.0979001908917373e-06, "loss": 0.0154, "step": 5680 }, { "epoch": 0.09310316616215332, "grad_norm": 0.8118436336517334, "learning_rate": 3.103354240523589e-06, "loss": 0.0134, "step": 5690 }, { "epoch": 0.09326679211322916, "grad_norm": 0.6386100053787231, "learning_rate": 3.1088082901554407e-06, "loss": 0.0154, "step": 5700 }, { "epoch": 0.093430418064305, "grad_norm": 0.6355448961257935, "learning_rate": 3.1142623397872924e-06, "loss": 0.0234, "step": 5710 }, { "epoch": 0.09359404401538084, "grad_norm": 0.3626098334789276, "learning_rate": 3.119716389419144e-06, "loss": 0.0129, "step": 5720 }, { "epoch": 0.09375766996645668, "grad_norm": 0.6233932375907898, "learning_rate": 3.125170439050996e-06, "loss": 0.0134, "step": 5730 }, { "epoch": 0.09392129591753252, "grad_norm": 1.3197227716445923, "learning_rate": 3.1306244886828475e-06, "loss": 0.0157, "step": 5740 }, { "epoch": 0.09408492186860837, "grad_norm": 0.7796733975410461, "learning_rate": 3.136078538314699e-06, "loss": 0.0141, "step": 5750 }, { "epoch": 0.0942485478196842, "grad_norm": 0.7965986132621765, "learning_rate": 3.141532587946551e-06, "loss": 0.0184, "step": 5760 }, { "epoch": 0.09441217377076004, "grad_norm": 0.8424684405326843, "learning_rate": 3.1469866375784018e-06, "loss": 0.0147, "step": 5770 }, { "epoch": 0.09457579972183588, "grad_norm": 0.8431611657142639, "learning_rate": 3.1524406872102535e-06, "loss": 0.0157, "step": 5780 }, { "epoch": 0.09473942567291173, "grad_norm": 0.6252585053443909, "learning_rate": 3.157894736842105e-06, "loss": 0.0146, "step": 5790 }, { "epoch": 0.09490305162398756, "grad_norm": 0.7879089117050171, "learning_rate": 3.163348786473957e-06, "loss": 0.0155, "step": 5800 }, { "epoch": 0.0950666775750634, "grad_norm": 0.9320424795150757, "learning_rate": 3.1688028361058086e-06, "loss": 0.0137, "step": 5810 }, { "epoch": 0.09523030352613925, "grad_norm": 0.7367252111434937, "learning_rate": 3.1742568857376603e-06, "loss": 0.0136, "step": 5820 }, { "epoch": 0.09539392947721509, "grad_norm": 0.7247695326805115, "learning_rate": 3.179710935369512e-06, "loss": 0.0188, "step": 5830 }, { "epoch": 0.09555755542829093, "grad_norm": 0.4898044168949127, "learning_rate": 3.1851649850013637e-06, "loss": 0.0126, "step": 5840 }, { "epoch": 0.09572118137936676, "grad_norm": 0.7382405400276184, "learning_rate": 3.1906190346332154e-06, "loss": 0.0154, "step": 5850 }, { "epoch": 0.0958848073304426, "grad_norm": 0.9127718806266785, "learning_rate": 3.196073084265067e-06, "loss": 0.0159, "step": 5860 }, { "epoch": 0.09604843328151845, "grad_norm": 0.5369975566864014, "learning_rate": 3.2015271338969188e-06, "loss": 0.0175, "step": 5870 }, { "epoch": 0.0962120592325943, "grad_norm": 0.7133941054344177, "learning_rate": 3.2069811835287705e-06, "loss": 0.0093, "step": 5880 }, { "epoch": 0.09637568518367012, "grad_norm": 1.0156655311584473, "learning_rate": 3.2124352331606218e-06, "loss": 0.0127, "step": 5890 }, { "epoch": 0.09653931113474597, "grad_norm": 1.0250358581542969, "learning_rate": 3.2178892827924735e-06, "loss": 0.0164, "step": 5900 }, { "epoch": 0.09670293708582181, "grad_norm": 0.5811936855316162, "learning_rate": 3.223343332424325e-06, "loss": 0.0137, "step": 5910 }, { "epoch": 0.09686656303689765, "grad_norm": 0.9672229290008545, "learning_rate": 3.228797382056177e-06, "loss": 0.0145, "step": 5920 }, { "epoch": 0.0970301889879735, "grad_norm": 0.7883620858192444, "learning_rate": 3.2342514316880286e-06, "loss": 0.0115, "step": 5930 }, { "epoch": 0.09719381493904933, "grad_norm": 0.5272173285484314, "learning_rate": 3.2397054813198803e-06, "loss": 0.013, "step": 5940 }, { "epoch": 0.09735744089012517, "grad_norm": 0.78303462266922, "learning_rate": 3.245159530951732e-06, "loss": 0.0176, "step": 5950 }, { "epoch": 0.09752106684120102, "grad_norm": 1.213394284248352, "learning_rate": 3.2506135805835837e-06, "loss": 0.0171, "step": 5960 }, { "epoch": 0.09768469279227686, "grad_norm": 1.1699204444885254, "learning_rate": 3.2560676302154354e-06, "loss": 0.0164, "step": 5970 }, { "epoch": 0.0978483187433527, "grad_norm": 0.8411902785301208, "learning_rate": 3.261521679847287e-06, "loss": 0.0137, "step": 5980 }, { "epoch": 0.09801194469442853, "grad_norm": 0.6784468293190002, "learning_rate": 3.2669757294791388e-06, "loss": 0.0113, "step": 5990 }, { "epoch": 0.09817557064550438, "grad_norm": 1.2000222206115723, "learning_rate": 3.27242977911099e-06, "loss": 0.0151, "step": 6000 }, { "epoch": 0.09833919659658022, "grad_norm": 0.4644208252429962, "learning_rate": 3.2778838287428417e-06, "loss": 0.0173, "step": 6010 }, { "epoch": 0.09850282254765606, "grad_norm": 0.6437108516693115, "learning_rate": 3.2833378783746934e-06, "loss": 0.0141, "step": 6020 }, { "epoch": 0.09866644849873189, "grad_norm": 0.42634034156799316, "learning_rate": 3.288791928006545e-06, "loss": 0.011, "step": 6030 }, { "epoch": 0.09883007444980774, "grad_norm": 0.706085205078125, "learning_rate": 3.294245977638397e-06, "loss": 0.0144, "step": 6040 }, { "epoch": 0.09899370040088358, "grad_norm": 0.6038877367973328, "learning_rate": 3.2997000272702485e-06, "loss": 0.0117, "step": 6050 }, { "epoch": 0.09915732635195942, "grad_norm": 0.20743539929389954, "learning_rate": 3.3051540769021002e-06, "loss": 0.0113, "step": 6060 }, { "epoch": 0.09932095230303527, "grad_norm": 0.8119137287139893, "learning_rate": 3.310608126533952e-06, "loss": 0.0121, "step": 6070 }, { "epoch": 0.0994845782541111, "grad_norm": 0.8317829370498657, "learning_rate": 3.3160621761658036e-06, "loss": 0.0102, "step": 6080 }, { "epoch": 0.09964820420518694, "grad_norm": 0.8082549571990967, "learning_rate": 3.3215162257976553e-06, "loss": 0.0169, "step": 6090 }, { "epoch": 0.09981183015626278, "grad_norm": 0.8954679369926453, "learning_rate": 3.326970275429507e-06, "loss": 0.0102, "step": 6100 }, { "epoch": 0.09997545610733863, "grad_norm": 0.580042839050293, "learning_rate": 3.3324243250613587e-06, "loss": 0.0126, "step": 6110 }, { "epoch": 0.10013908205841446, "grad_norm": 1.0125596523284912, "learning_rate": 3.3378783746932096e-06, "loss": 0.0082, "step": 6120 }, { "epoch": 0.1003027080094903, "grad_norm": 0.5418968200683594, "learning_rate": 3.3433324243250613e-06, "loss": 0.0165, "step": 6130 }, { "epoch": 0.10046633396056615, "grad_norm": 0.917887270450592, "learning_rate": 3.348786473956913e-06, "loss": 0.0131, "step": 6140 }, { "epoch": 0.10062995991164199, "grad_norm": 1.1862261295318604, "learning_rate": 3.3542405235887647e-06, "loss": 0.0158, "step": 6150 }, { "epoch": 0.10079358586271783, "grad_norm": 0.7814752459526062, "learning_rate": 3.3596945732206164e-06, "loss": 0.0156, "step": 6160 }, { "epoch": 0.10095721181379366, "grad_norm": 1.0431448221206665, "learning_rate": 3.365148622852468e-06, "loss": 0.016, "step": 6170 }, { "epoch": 0.1011208377648695, "grad_norm": 1.0791957378387451, "learning_rate": 3.37060267248432e-06, "loss": 0.0157, "step": 6180 }, { "epoch": 0.10128446371594535, "grad_norm": 0.9360935091972351, "learning_rate": 3.3760567221161715e-06, "loss": 0.0162, "step": 6190 }, { "epoch": 0.1014480896670212, "grad_norm": 1.493471622467041, "learning_rate": 3.381510771748023e-06, "loss": 0.0144, "step": 6200 }, { "epoch": 0.10161171561809704, "grad_norm": 0.6363555192947388, "learning_rate": 3.386964821379875e-06, "loss": 0.0105, "step": 6210 }, { "epoch": 0.10177534156917287, "grad_norm": 0.7255340218544006, "learning_rate": 3.3924188710117266e-06, "loss": 0.0152, "step": 6220 }, { "epoch": 0.10193896752024871, "grad_norm": 1.0318663120269775, "learning_rate": 3.397872920643578e-06, "loss": 0.0143, "step": 6230 }, { "epoch": 0.10210259347132455, "grad_norm": 0.6117664575576782, "learning_rate": 3.4033269702754296e-06, "loss": 0.013, "step": 6240 }, { "epoch": 0.1022662194224004, "grad_norm": 1.5089282989501953, "learning_rate": 3.4087810199072813e-06, "loss": 0.0116, "step": 6250 }, { "epoch": 0.10242984537347623, "grad_norm": 0.6036588549613953, "learning_rate": 3.414235069539133e-06, "loss": 0.0096, "step": 6260 }, { "epoch": 0.10259347132455207, "grad_norm": 0.561668872833252, "learning_rate": 3.4196891191709847e-06, "loss": 0.0129, "step": 6270 }, { "epoch": 0.10275709727562791, "grad_norm": 0.7205589413642883, "learning_rate": 3.4251431688028364e-06, "loss": 0.0136, "step": 6280 }, { "epoch": 0.10292072322670376, "grad_norm": 0.7881771922111511, "learning_rate": 3.430597218434688e-06, "loss": 0.014, "step": 6290 }, { "epoch": 0.1030843491777796, "grad_norm": 0.5417429208755493, "learning_rate": 3.43605126806654e-06, "loss": 0.0121, "step": 6300 }, { "epoch": 0.10324797512885543, "grad_norm": 0.7294362783432007, "learning_rate": 3.4415053176983915e-06, "loss": 0.0133, "step": 6310 }, { "epoch": 0.10341160107993128, "grad_norm": 0.9415963888168335, "learning_rate": 3.446959367330243e-06, "loss": 0.0205, "step": 6320 }, { "epoch": 0.10357522703100712, "grad_norm": 0.7842119336128235, "learning_rate": 3.452413416962095e-06, "loss": 0.0168, "step": 6330 }, { "epoch": 0.10373885298208296, "grad_norm": 0.6940158605575562, "learning_rate": 3.4578674665939466e-06, "loss": 0.0156, "step": 6340 }, { "epoch": 0.10390247893315879, "grad_norm": 0.6025082468986511, "learning_rate": 3.463321516225798e-06, "loss": 0.0121, "step": 6350 }, { "epoch": 0.10406610488423464, "grad_norm": 0.3900011479854584, "learning_rate": 3.4687755658576496e-06, "loss": 0.0159, "step": 6360 }, { "epoch": 0.10422973083531048, "grad_norm": 1.0236190557479858, "learning_rate": 3.4742296154895013e-06, "loss": 0.0126, "step": 6370 }, { "epoch": 0.10439335678638632, "grad_norm": 0.6410085558891296, "learning_rate": 3.479683665121353e-06, "loss": 0.0099, "step": 6380 }, { "epoch": 0.10455698273746217, "grad_norm": 0.6382144093513489, "learning_rate": 3.4851377147532047e-06, "loss": 0.014, "step": 6390 }, { "epoch": 0.104720608688538, "grad_norm": 1.0055736303329468, "learning_rate": 3.4905917643850564e-06, "loss": 0.0145, "step": 6400 }, { "epoch": 0.10488423463961384, "grad_norm": 1.1841650009155273, "learning_rate": 3.496045814016908e-06, "loss": 0.0153, "step": 6410 }, { "epoch": 0.10504786059068968, "grad_norm": 0.5654468536376953, "learning_rate": 3.5014998636487598e-06, "loss": 0.0132, "step": 6420 }, { "epoch": 0.10521148654176553, "grad_norm": 0.9150072336196899, "learning_rate": 3.5069539132806115e-06, "loss": 0.0149, "step": 6430 }, { "epoch": 0.10537511249284136, "grad_norm": 0.4784562289714813, "learning_rate": 3.512407962912463e-06, "loss": 0.0114, "step": 6440 }, { "epoch": 0.1055387384439172, "grad_norm": 0.6295190453529358, "learning_rate": 3.517862012544315e-06, "loss": 0.0112, "step": 6450 }, { "epoch": 0.10570236439499305, "grad_norm": 0.5648689866065979, "learning_rate": 3.5233160621761657e-06, "loss": 0.0156, "step": 6460 }, { "epoch": 0.10586599034606889, "grad_norm": 1.2486478090286255, "learning_rate": 3.5287701118080174e-06, "loss": 0.0151, "step": 6470 }, { "epoch": 0.10602961629714473, "grad_norm": 0.2829054594039917, "learning_rate": 3.534224161439869e-06, "loss": 0.014, "step": 6480 }, { "epoch": 0.10619324224822056, "grad_norm": 1.5381412506103516, "learning_rate": 3.539678211071721e-06, "loss": 0.0175, "step": 6490 }, { "epoch": 0.1063568681992964, "grad_norm": 0.5836203098297119, "learning_rate": 3.5451322607035725e-06, "loss": 0.0099, "step": 6500 }, { "epoch": 0.10652049415037225, "grad_norm": 0.5512748956680298, "learning_rate": 3.5505863103354242e-06, "loss": 0.0149, "step": 6510 }, { "epoch": 0.1066841201014481, "grad_norm": 0.8949612379074097, "learning_rate": 3.556040359967276e-06, "loss": 0.0142, "step": 6520 }, { "epoch": 0.10684774605252394, "grad_norm": 0.6022013425827026, "learning_rate": 3.5614944095991276e-06, "loss": 0.013, "step": 6530 }, { "epoch": 0.10701137200359977, "grad_norm": 0.757368266582489, "learning_rate": 3.5669484592309793e-06, "loss": 0.0156, "step": 6540 }, { "epoch": 0.10717499795467561, "grad_norm": 0.646453857421875, "learning_rate": 3.572402508862831e-06, "loss": 0.0095, "step": 6550 }, { "epoch": 0.10733862390575145, "grad_norm": 1.159908413887024, "learning_rate": 3.5778565584946827e-06, "loss": 0.0106, "step": 6560 }, { "epoch": 0.1075022498568273, "grad_norm": 0.9116016030311584, "learning_rate": 3.583310608126534e-06, "loss": 0.0156, "step": 6570 }, { "epoch": 0.10766587580790313, "grad_norm": 0.705751359462738, "learning_rate": 3.5887646577583857e-06, "loss": 0.0101, "step": 6580 }, { "epoch": 0.10782950175897897, "grad_norm": 0.6236238479614258, "learning_rate": 3.5942187073902374e-06, "loss": 0.0158, "step": 6590 }, { "epoch": 0.10799312771005481, "grad_norm": 0.8098599910736084, "learning_rate": 3.599672757022089e-06, "loss": 0.0135, "step": 6600 }, { "epoch": 0.10815675366113066, "grad_norm": 1.268117070198059, "learning_rate": 3.605126806653941e-06, "loss": 0.0235, "step": 6610 }, { "epoch": 0.1083203796122065, "grad_norm": 0.9548227190971375, "learning_rate": 3.6105808562857925e-06, "loss": 0.0128, "step": 6620 }, { "epoch": 0.10848400556328233, "grad_norm": 0.6587949991226196, "learning_rate": 3.6160349059176442e-06, "loss": 0.0091, "step": 6630 }, { "epoch": 0.10864763151435818, "grad_norm": 0.785673975944519, "learning_rate": 3.621488955549496e-06, "loss": 0.0179, "step": 6640 }, { "epoch": 0.10881125746543402, "grad_norm": 0.6535679697990417, "learning_rate": 3.6269430051813476e-06, "loss": 0.009, "step": 6650 }, { "epoch": 0.10897488341650986, "grad_norm": 0.5656495094299316, "learning_rate": 3.6323970548131993e-06, "loss": 0.0128, "step": 6660 }, { "epoch": 0.10913850936758569, "grad_norm": 0.5163063406944275, "learning_rate": 3.637851104445051e-06, "loss": 0.0134, "step": 6670 }, { "epoch": 0.10930213531866154, "grad_norm": 1.133750557899475, "learning_rate": 3.6433051540769027e-06, "loss": 0.013, "step": 6680 }, { "epoch": 0.10946576126973738, "grad_norm": 1.0261653661727905, "learning_rate": 3.648759203708754e-06, "loss": 0.0123, "step": 6690 }, { "epoch": 0.10962938722081322, "grad_norm": 0.4472079575061798, "learning_rate": 3.6542132533406057e-06, "loss": 0.0173, "step": 6700 }, { "epoch": 0.10979301317188907, "grad_norm": 0.643719494342804, "learning_rate": 3.6596673029724574e-06, "loss": 0.012, "step": 6710 }, { "epoch": 0.1099566391229649, "grad_norm": 1.0469650030136108, "learning_rate": 3.665121352604309e-06, "loss": 0.0211, "step": 6720 }, { "epoch": 0.11012026507404074, "grad_norm": 0.4100765287876129, "learning_rate": 3.670575402236161e-06, "loss": 0.011, "step": 6730 }, { "epoch": 0.11028389102511658, "grad_norm": 0.8248699307441711, "learning_rate": 3.676029451868012e-06, "loss": 0.0111, "step": 6740 }, { "epoch": 0.11044751697619243, "grad_norm": 0.5361427068710327, "learning_rate": 3.681483501499864e-06, "loss": 0.0101, "step": 6750 }, { "epoch": 0.11061114292726827, "grad_norm": 0.84926837682724, "learning_rate": 3.6869375511317155e-06, "loss": 0.0142, "step": 6760 }, { "epoch": 0.1107747688783441, "grad_norm": 0.618086040019989, "learning_rate": 3.692391600763567e-06, "loss": 0.011, "step": 6770 }, { "epoch": 0.11093839482941995, "grad_norm": 0.7282034158706665, "learning_rate": 3.697845650395419e-06, "loss": 0.0126, "step": 6780 }, { "epoch": 0.11110202078049579, "grad_norm": 0.5833308696746826, "learning_rate": 3.7032997000272706e-06, "loss": 0.0086, "step": 6790 }, { "epoch": 0.11126564673157163, "grad_norm": 0.5305668115615845, "learning_rate": 3.708753749659122e-06, "loss": 0.0094, "step": 6800 }, { "epoch": 0.11142927268264746, "grad_norm": 0.6998348832130432, "learning_rate": 3.7142077992909736e-06, "loss": 0.019, "step": 6810 }, { "epoch": 0.1115928986337233, "grad_norm": 0.7521585822105408, "learning_rate": 3.7196618489228253e-06, "loss": 0.0169, "step": 6820 }, { "epoch": 0.11175652458479915, "grad_norm": 1.0378811359405518, "learning_rate": 3.725115898554677e-06, "loss": 0.0155, "step": 6830 }, { "epoch": 0.111920150535875, "grad_norm": 0.5311976671218872, "learning_rate": 3.7305699481865287e-06, "loss": 0.0151, "step": 6840 }, { "epoch": 0.11208377648695084, "grad_norm": 0.7835608720779419, "learning_rate": 3.7360239978183804e-06, "loss": 0.0134, "step": 6850 }, { "epoch": 0.11224740243802667, "grad_norm": 0.7429501414299011, "learning_rate": 3.741478047450232e-06, "loss": 0.0155, "step": 6860 }, { "epoch": 0.11241102838910251, "grad_norm": 0.788200855255127, "learning_rate": 3.7469320970820838e-06, "loss": 0.0205, "step": 6870 }, { "epoch": 0.11257465434017835, "grad_norm": 1.2643417119979858, "learning_rate": 3.7523861467139355e-06, "loss": 0.016, "step": 6880 }, { "epoch": 0.1127382802912542, "grad_norm": 0.457520455121994, "learning_rate": 3.757840196345787e-06, "loss": 0.0071, "step": 6890 }, { "epoch": 0.11290190624233003, "grad_norm": 0.2600027620792389, "learning_rate": 3.763294245977639e-06, "loss": 0.0186, "step": 6900 }, { "epoch": 0.11306553219340587, "grad_norm": 0.6887505650520325, "learning_rate": 3.7687482956094906e-06, "loss": 0.0127, "step": 6910 }, { "epoch": 0.11322915814448171, "grad_norm": 0.4825083911418915, "learning_rate": 3.774202345241342e-06, "loss": 0.01, "step": 6920 }, { "epoch": 0.11339278409555756, "grad_norm": 0.723751962184906, "learning_rate": 3.7796563948731936e-06, "loss": 0.0128, "step": 6930 }, { "epoch": 0.1135564100466334, "grad_norm": 0.7400292158126831, "learning_rate": 3.7851104445050453e-06, "loss": 0.0122, "step": 6940 }, { "epoch": 0.11372003599770923, "grad_norm": 0.8067269921302795, "learning_rate": 3.790564494136897e-06, "loss": 0.0103, "step": 6950 }, { "epoch": 0.11388366194878508, "grad_norm": 0.7380013465881348, "learning_rate": 3.7960185437687487e-06, "loss": 0.0128, "step": 6960 }, { "epoch": 0.11404728789986092, "grad_norm": 0.9683087468147278, "learning_rate": 3.8014725934006004e-06, "loss": 0.0173, "step": 6970 }, { "epoch": 0.11421091385093676, "grad_norm": 0.5647590756416321, "learning_rate": 3.806926643032452e-06, "loss": 0.0153, "step": 6980 }, { "epoch": 0.11437453980201259, "grad_norm": 0.5487265586853027, "learning_rate": 3.8123806926643038e-06, "loss": 0.013, "step": 6990 }, { "epoch": 0.11453816575308844, "grad_norm": 0.7692117094993591, "learning_rate": 3.817834742296155e-06, "loss": 0.0113, "step": 7000 }, { "epoch": 0.11470179170416428, "grad_norm": 0.7856493592262268, "learning_rate": 3.823288791928007e-06, "loss": 0.0104, "step": 7010 }, { "epoch": 0.11486541765524012, "grad_norm": 1.0421538352966309, "learning_rate": 3.8287428415598584e-06, "loss": 0.0129, "step": 7020 }, { "epoch": 0.11502904360631597, "grad_norm": 0.684248685836792, "learning_rate": 3.83419689119171e-06, "loss": 0.0143, "step": 7030 }, { "epoch": 0.1151926695573918, "grad_norm": 0.9292379021644592, "learning_rate": 3.839650940823562e-06, "loss": 0.0174, "step": 7040 }, { "epoch": 0.11535629550846764, "grad_norm": 1.2012156248092651, "learning_rate": 3.8451049904554135e-06, "loss": 0.0114, "step": 7050 }, { "epoch": 0.11551992145954348, "grad_norm": 0.4060277044773102, "learning_rate": 3.850559040087265e-06, "loss": 0.0112, "step": 7060 }, { "epoch": 0.11568354741061933, "grad_norm": 0.5306548476219177, "learning_rate": 3.856013089719117e-06, "loss": 0.0151, "step": 7070 }, { "epoch": 0.11584717336169517, "grad_norm": 0.8730077743530273, "learning_rate": 3.861467139350969e-06, "loss": 0.017, "step": 7080 }, { "epoch": 0.116010799312771, "grad_norm": 0.2362775057554245, "learning_rate": 3.86692118898282e-06, "loss": 0.0142, "step": 7090 }, { "epoch": 0.11617442526384684, "grad_norm": 1.0708410739898682, "learning_rate": 3.872375238614672e-06, "loss": 0.0127, "step": 7100 }, { "epoch": 0.11633805121492269, "grad_norm": 0.8410795331001282, "learning_rate": 3.877829288246524e-06, "loss": 0.0138, "step": 7110 }, { "epoch": 0.11650167716599853, "grad_norm": 0.7324991822242737, "learning_rate": 3.8832833378783755e-06, "loss": 0.0136, "step": 7120 }, { "epoch": 0.11666530311707436, "grad_norm": 0.8725026249885559, "learning_rate": 3.888737387510227e-06, "loss": 0.014, "step": 7130 }, { "epoch": 0.1168289290681502, "grad_norm": 0.6370802521705627, "learning_rate": 3.894191437142078e-06, "loss": 0.0095, "step": 7140 }, { "epoch": 0.11699255501922605, "grad_norm": 0.6782422065734863, "learning_rate": 3.89964548677393e-06, "loss": 0.0156, "step": 7150 }, { "epoch": 0.11715618097030189, "grad_norm": 0.8580349683761597, "learning_rate": 3.905099536405781e-06, "loss": 0.015, "step": 7160 }, { "epoch": 0.11731980692137774, "grad_norm": 1.2276225090026855, "learning_rate": 3.910553586037633e-06, "loss": 0.0122, "step": 7170 }, { "epoch": 0.11748343287245357, "grad_norm": 0.5423526167869568, "learning_rate": 3.916007635669485e-06, "loss": 0.0139, "step": 7180 }, { "epoch": 0.11764705882352941, "grad_norm": 0.8889505863189697, "learning_rate": 3.9214616853013365e-06, "loss": 0.0144, "step": 7190 }, { "epoch": 0.11781068477460525, "grad_norm": 0.7310557961463928, "learning_rate": 3.926915734933188e-06, "loss": 0.0094, "step": 7200 }, { "epoch": 0.1179743107256811, "grad_norm": 0.5209395885467529, "learning_rate": 3.93236978456504e-06, "loss": 0.0128, "step": 7210 }, { "epoch": 0.11813793667675693, "grad_norm": 1.2866625785827637, "learning_rate": 3.937823834196892e-06, "loss": 0.0123, "step": 7220 }, { "epoch": 0.11830156262783277, "grad_norm": 1.1112022399902344, "learning_rate": 3.943277883828743e-06, "loss": 0.01, "step": 7230 }, { "epoch": 0.11846518857890861, "grad_norm": 0.7369223237037659, "learning_rate": 3.948731933460595e-06, "loss": 0.0154, "step": 7240 }, { "epoch": 0.11862881452998446, "grad_norm": 0.6381825804710388, "learning_rate": 3.954185983092447e-06, "loss": 0.0065, "step": 7250 }, { "epoch": 0.1187924404810603, "grad_norm": 0.2766132652759552, "learning_rate": 3.9596400327242976e-06, "loss": 0.0161, "step": 7260 }, { "epoch": 0.11895606643213613, "grad_norm": 0.6037293076515198, "learning_rate": 3.965094082356149e-06, "loss": 0.0105, "step": 7270 }, { "epoch": 0.11911969238321198, "grad_norm": 0.34659820795059204, "learning_rate": 3.970548131988001e-06, "loss": 0.0116, "step": 7280 }, { "epoch": 0.11928331833428782, "grad_norm": 0.9719557166099548, "learning_rate": 3.976002181619853e-06, "loss": 0.0129, "step": 7290 }, { "epoch": 0.11944694428536366, "grad_norm": 0.49065473675727844, "learning_rate": 3.981456231251704e-06, "loss": 0.011, "step": 7300 }, { "epoch": 0.11961057023643949, "grad_norm": 1.0903617143630981, "learning_rate": 3.986910280883556e-06, "loss": 0.0136, "step": 7310 }, { "epoch": 0.11977419618751534, "grad_norm": 0.6037145853042603, "learning_rate": 3.992364330515408e-06, "loss": 0.0124, "step": 7320 }, { "epoch": 0.11993782213859118, "grad_norm": 0.8191437721252441, "learning_rate": 3.9978183801472595e-06, "loss": 0.0117, "step": 7330 }, { "epoch": 0.12010144808966702, "grad_norm": 0.6067626476287842, "learning_rate": 4.003272429779111e-06, "loss": 0.0113, "step": 7340 }, { "epoch": 0.12026507404074287, "grad_norm": 0.831027626991272, "learning_rate": 4.008726479410963e-06, "loss": 0.0187, "step": 7350 }, { "epoch": 0.1204286999918187, "grad_norm": 0.6959092617034912, "learning_rate": 4.014180529042815e-06, "loss": 0.0127, "step": 7360 }, { "epoch": 0.12059232594289454, "grad_norm": 1.0949978828430176, "learning_rate": 4.019634578674666e-06, "loss": 0.0153, "step": 7370 }, { "epoch": 0.12075595189397038, "grad_norm": 0.704740047454834, "learning_rate": 4.025088628306518e-06, "loss": 0.0117, "step": 7380 }, { "epoch": 0.12091957784504623, "grad_norm": 0.9571103453636169, "learning_rate": 4.03054267793837e-06, "loss": 0.0202, "step": 7390 }, { "epoch": 0.12108320379612207, "grad_norm": 0.8239849209785461, "learning_rate": 4.035996727570221e-06, "loss": 0.0088, "step": 7400 }, { "epoch": 0.1212468297471979, "grad_norm": 0.625403106212616, "learning_rate": 4.041450777202073e-06, "loss": 0.0097, "step": 7410 }, { "epoch": 0.12141045569827374, "grad_norm": 0.4190848171710968, "learning_rate": 4.046904826833925e-06, "loss": 0.016, "step": 7420 }, { "epoch": 0.12157408164934959, "grad_norm": 0.7225220203399658, "learning_rate": 4.0523588764657765e-06, "loss": 0.0133, "step": 7430 }, { "epoch": 0.12173770760042543, "grad_norm": 0.2791774868965149, "learning_rate": 4.057812926097628e-06, "loss": 0.0098, "step": 7440 }, { "epoch": 0.12190133355150126, "grad_norm": 0.30875951051712036, "learning_rate": 4.06326697572948e-06, "loss": 0.0127, "step": 7450 }, { "epoch": 0.1220649595025771, "grad_norm": 0.7031594514846802, "learning_rate": 4.068721025361332e-06, "loss": 0.0146, "step": 7460 }, { "epoch": 0.12222858545365295, "grad_norm": 0.4741123914718628, "learning_rate": 4.074175074993183e-06, "loss": 0.0083, "step": 7470 }, { "epoch": 0.12239221140472879, "grad_norm": 0.5694860219955444, "learning_rate": 4.079629124625035e-06, "loss": 0.0085, "step": 7480 }, { "epoch": 0.12255583735580464, "grad_norm": 0.5967519283294678, "learning_rate": 4.085083174256886e-06, "loss": 0.0108, "step": 7490 }, { "epoch": 0.12271946330688047, "grad_norm": 0.7985425591468811, "learning_rate": 4.0905372238887375e-06, "loss": 0.0141, "step": 7500 }, { "epoch": 0.12288308925795631, "grad_norm": 0.4691658616065979, "learning_rate": 4.095991273520589e-06, "loss": 0.0189, "step": 7510 }, { "epoch": 0.12304671520903215, "grad_norm": 1.1882740259170532, "learning_rate": 4.101445323152441e-06, "loss": 0.0125, "step": 7520 }, { "epoch": 0.123210341160108, "grad_norm": 0.85139399766922, "learning_rate": 4.106899372784293e-06, "loss": 0.0096, "step": 7530 }, { "epoch": 0.12337396711118383, "grad_norm": 0.7582197189331055, "learning_rate": 4.112353422416144e-06, "loss": 0.014, "step": 7540 }, { "epoch": 0.12353759306225967, "grad_norm": 0.5662003755569458, "learning_rate": 4.117807472047996e-06, "loss": 0.0137, "step": 7550 }, { "epoch": 0.12370121901333551, "grad_norm": 0.40338239073753357, "learning_rate": 4.123261521679848e-06, "loss": 0.0111, "step": 7560 }, { "epoch": 0.12386484496441136, "grad_norm": 0.857072651386261, "learning_rate": 4.1287155713116995e-06, "loss": 0.0119, "step": 7570 }, { "epoch": 0.1240284709154872, "grad_norm": 0.4618501663208008, "learning_rate": 4.134169620943551e-06, "loss": 0.0105, "step": 7580 }, { "epoch": 0.12419209686656303, "grad_norm": 0.8550952672958374, "learning_rate": 4.139623670575403e-06, "loss": 0.0168, "step": 7590 }, { "epoch": 0.12435572281763888, "grad_norm": 0.7462050318717957, "learning_rate": 4.145077720207254e-06, "loss": 0.0125, "step": 7600 }, { "epoch": 0.12451934876871472, "grad_norm": 0.9552479982376099, "learning_rate": 4.150531769839105e-06, "loss": 0.0119, "step": 7610 }, { "epoch": 0.12468297471979056, "grad_norm": 0.744327187538147, "learning_rate": 4.155985819470957e-06, "loss": 0.0134, "step": 7620 }, { "epoch": 0.1248466006708664, "grad_norm": 0.8605981469154358, "learning_rate": 4.161439869102809e-06, "loss": 0.012, "step": 7630 }, { "epoch": 0.12501022662194225, "grad_norm": 0.968020498752594, "learning_rate": 4.1668939187346605e-06, "loss": 0.0099, "step": 7640 }, { "epoch": 0.12517385257301808, "grad_norm": 1.0020442008972168, "learning_rate": 4.172347968366512e-06, "loss": 0.0102, "step": 7650 }, { "epoch": 0.1253374785240939, "grad_norm": 0.9526031017303467, "learning_rate": 4.177802017998364e-06, "loss": 0.0114, "step": 7660 }, { "epoch": 0.12550110447516977, "grad_norm": 0.3526127338409424, "learning_rate": 4.183256067630216e-06, "loss": 0.0106, "step": 7670 }, { "epoch": 0.1256647304262456, "grad_norm": 1.0006099939346313, "learning_rate": 4.188710117262067e-06, "loss": 0.0151, "step": 7680 }, { "epoch": 0.12582835637732145, "grad_norm": 0.6407269835472107, "learning_rate": 4.194164166893919e-06, "loss": 0.0172, "step": 7690 }, { "epoch": 0.12599198232839728, "grad_norm": 0.9559716582298279, "learning_rate": 4.199618216525771e-06, "loss": 0.0117, "step": 7700 }, { "epoch": 0.1261556082794731, "grad_norm": 0.39195576310157776, "learning_rate": 4.205072266157622e-06, "loss": 0.0085, "step": 7710 }, { "epoch": 0.12631923423054897, "grad_norm": 0.4272684156894684, "learning_rate": 4.210526315789474e-06, "loss": 0.0104, "step": 7720 }, { "epoch": 0.1264828601816248, "grad_norm": 0.30527105927467346, "learning_rate": 4.215980365421326e-06, "loss": 0.0105, "step": 7730 }, { "epoch": 0.12664648613270066, "grad_norm": 0.7783389687538147, "learning_rate": 4.2214344150531775e-06, "loss": 0.013, "step": 7740 }, { "epoch": 0.1268101120837765, "grad_norm": 1.2681180238723755, "learning_rate": 4.226888464685029e-06, "loss": 0.0116, "step": 7750 }, { "epoch": 0.12697373803485232, "grad_norm": 0.7202063798904419, "learning_rate": 4.232342514316881e-06, "loss": 0.0147, "step": 7760 }, { "epoch": 0.12713736398592818, "grad_norm": 0.59439617395401, "learning_rate": 4.237796563948733e-06, "loss": 0.0155, "step": 7770 }, { "epoch": 0.127300989937004, "grad_norm": 0.8765865564346313, "learning_rate": 4.243250613580584e-06, "loss": 0.0101, "step": 7780 }, { "epoch": 0.12746461588807986, "grad_norm": 0.6685693264007568, "learning_rate": 4.248704663212436e-06, "loss": 0.0137, "step": 7790 }, { "epoch": 0.1276282418391557, "grad_norm": 8.568276405334473, "learning_rate": 4.254158712844288e-06, "loss": 0.0159, "step": 7800 }, { "epoch": 0.12779186779023152, "grad_norm": 0.7810708284378052, "learning_rate": 4.2596127624761394e-06, "loss": 0.0156, "step": 7810 }, { "epoch": 0.12795549374130738, "grad_norm": 0.5474937558174133, "learning_rate": 4.265066812107991e-06, "loss": 0.0068, "step": 7820 }, { "epoch": 0.1281191196923832, "grad_norm": 0.6665557026863098, "learning_rate": 4.270520861739842e-06, "loss": 0.012, "step": 7830 }, { "epoch": 0.12828274564345904, "grad_norm": 1.1190690994262695, "learning_rate": 4.275974911371694e-06, "loss": 0.0127, "step": 7840 }, { "epoch": 0.1284463715945349, "grad_norm": 0.34522131085395813, "learning_rate": 4.281428961003545e-06, "loss": 0.0075, "step": 7850 }, { "epoch": 0.12860999754561073, "grad_norm": 0.5141227841377258, "learning_rate": 4.286883010635397e-06, "loss": 0.0136, "step": 7860 }, { "epoch": 0.12877362349668658, "grad_norm": 0.9033347368240356, "learning_rate": 4.292337060267249e-06, "loss": 0.0099, "step": 7870 }, { "epoch": 0.12893724944776241, "grad_norm": 0.48658838868141174, "learning_rate": 4.2977911098991005e-06, "loss": 0.0129, "step": 7880 }, { "epoch": 0.12910087539883824, "grad_norm": 0.8204610347747803, "learning_rate": 4.303245159530952e-06, "loss": 0.012, "step": 7890 }, { "epoch": 0.1292645013499141, "grad_norm": 0.642123281955719, "learning_rate": 4.308699209162804e-06, "loss": 0.013, "step": 7900 }, { "epoch": 0.12942812730098993, "grad_norm": 0.7733058333396912, "learning_rate": 4.314153258794656e-06, "loss": 0.0152, "step": 7910 }, { "epoch": 0.1295917532520658, "grad_norm": 0.7520296573638916, "learning_rate": 4.319607308426507e-06, "loss": 0.0094, "step": 7920 }, { "epoch": 0.12975537920314162, "grad_norm": 0.7927498817443848, "learning_rate": 4.325061358058359e-06, "loss": 0.0111, "step": 7930 }, { "epoch": 0.12991900515421745, "grad_norm": 0.5819029211997986, "learning_rate": 4.33051540769021e-06, "loss": 0.0094, "step": 7940 }, { "epoch": 0.1300826311052933, "grad_norm": 0.527286946773529, "learning_rate": 4.3359694573220615e-06, "loss": 0.0131, "step": 7950 }, { "epoch": 0.13024625705636914, "grad_norm": 1.093508005142212, "learning_rate": 4.341423506953913e-06, "loss": 0.0104, "step": 7960 }, { "epoch": 0.130409883007445, "grad_norm": 0.536765456199646, "learning_rate": 4.346877556585765e-06, "loss": 0.0102, "step": 7970 }, { "epoch": 0.13057350895852082, "grad_norm": 0.7686751484870911, "learning_rate": 4.352331606217617e-06, "loss": 0.0107, "step": 7980 }, { "epoch": 0.13073713490959665, "grad_norm": 0.7606210112571716, "learning_rate": 4.357785655849468e-06, "loss": 0.013, "step": 7990 }, { "epoch": 0.1309007608606725, "grad_norm": 0.720030665397644, "learning_rate": 4.36323970548132e-06, "loss": 0.0147, "step": 8000 }, { "epoch": 0.13106438681174834, "grad_norm": 1.2556663751602173, "learning_rate": 4.368693755113172e-06, "loss": 0.0121, "step": 8010 }, { "epoch": 0.1312280127628242, "grad_norm": 0.6101266145706177, "learning_rate": 4.3741478047450235e-06, "loss": 0.0091, "step": 8020 }, { "epoch": 0.13139163871390003, "grad_norm": 1.2447086572647095, "learning_rate": 4.379601854376875e-06, "loss": 0.0112, "step": 8030 }, { "epoch": 0.13155526466497586, "grad_norm": 0.2675427198410034, "learning_rate": 4.385055904008727e-06, "loss": 0.0102, "step": 8040 }, { "epoch": 0.13171889061605171, "grad_norm": 0.6429067254066467, "learning_rate": 4.3905099536405786e-06, "loss": 0.0101, "step": 8050 }, { "epoch": 0.13188251656712754, "grad_norm": 0.5738160014152527, "learning_rate": 4.39596400327243e-06, "loss": 0.0075, "step": 8060 }, { "epoch": 0.13204614251820337, "grad_norm": 0.9842446446418762, "learning_rate": 4.401418052904282e-06, "loss": 0.0105, "step": 8070 }, { "epoch": 0.13220976846927923, "grad_norm": 0.5476564168930054, "learning_rate": 4.406872102536134e-06, "loss": 0.0112, "step": 8080 }, { "epoch": 0.13237339442035506, "grad_norm": 0.4678577482700348, "learning_rate": 4.412326152167985e-06, "loss": 0.0101, "step": 8090 }, { "epoch": 0.13253702037143092, "grad_norm": 0.7975099682807922, "learning_rate": 4.417780201799837e-06, "loss": 0.0095, "step": 8100 }, { "epoch": 0.13270064632250675, "grad_norm": 0.5060747861862183, "learning_rate": 4.423234251431689e-06, "loss": 0.0094, "step": 8110 }, { "epoch": 0.13286427227358258, "grad_norm": 0.8256040811538696, "learning_rate": 4.4286883010635405e-06, "loss": 0.0093, "step": 8120 }, { "epoch": 0.13302789822465844, "grad_norm": 0.6360666751861572, "learning_rate": 4.434142350695392e-06, "loss": 0.0126, "step": 8130 }, { "epoch": 0.13319152417573427, "grad_norm": 0.9268854260444641, "learning_rate": 4.439596400327244e-06, "loss": 0.009, "step": 8140 }, { "epoch": 0.13335515012681012, "grad_norm": 0.4691714346408844, "learning_rate": 4.4450504499590956e-06, "loss": 0.0115, "step": 8150 }, { "epoch": 0.13351877607788595, "grad_norm": 0.8491403460502625, "learning_rate": 4.450504499590947e-06, "loss": 0.0156, "step": 8160 }, { "epoch": 0.13368240202896178, "grad_norm": 0.5958046317100525, "learning_rate": 4.455958549222798e-06, "loss": 0.0096, "step": 8170 }, { "epoch": 0.13384602798003764, "grad_norm": 0.8047823905944824, "learning_rate": 4.46141259885465e-06, "loss": 0.013, "step": 8180 }, { "epoch": 0.13400965393111347, "grad_norm": 0.5287350416183472, "learning_rate": 4.4668666484865015e-06, "loss": 0.0094, "step": 8190 }, { "epoch": 0.13417327988218933, "grad_norm": 0.8187251687049866, "learning_rate": 4.472320698118353e-06, "loss": 0.0171, "step": 8200 }, { "epoch": 0.13433690583326516, "grad_norm": 0.8458852171897888, "learning_rate": 4.477774747750205e-06, "loss": 0.0119, "step": 8210 }, { "epoch": 0.134500531784341, "grad_norm": 0.6979679465293884, "learning_rate": 4.483228797382057e-06, "loss": 0.0121, "step": 8220 }, { "epoch": 0.13466415773541685, "grad_norm": 0.573388397693634, "learning_rate": 4.488682847013908e-06, "loss": 0.0115, "step": 8230 }, { "epoch": 0.13482778368649267, "grad_norm": 0.6398588418960571, "learning_rate": 4.49413689664576e-06, "loss": 0.0118, "step": 8240 }, { "epoch": 0.1349914096375685, "grad_norm": 0.4842328429222107, "learning_rate": 4.499590946277612e-06, "loss": 0.0092, "step": 8250 }, { "epoch": 0.13515503558864436, "grad_norm": 0.6921097636222839, "learning_rate": 4.5050449959094634e-06, "loss": 0.01, "step": 8260 }, { "epoch": 0.1353186615397202, "grad_norm": 0.7509792447090149, "learning_rate": 4.510499045541315e-06, "loss": 0.0095, "step": 8270 }, { "epoch": 0.13548228749079605, "grad_norm": 0.7071582674980164, "learning_rate": 4.515953095173167e-06, "loss": 0.0103, "step": 8280 }, { "epoch": 0.13564591344187188, "grad_norm": 0.987937331199646, "learning_rate": 4.521407144805018e-06, "loss": 0.0126, "step": 8290 }, { "epoch": 0.1358095393929477, "grad_norm": 1.0022671222686768, "learning_rate": 4.526861194436869e-06, "loss": 0.015, "step": 8300 }, { "epoch": 0.13597316534402357, "grad_norm": 0.4881025552749634, "learning_rate": 4.532315244068721e-06, "loss": 0.0153, "step": 8310 }, { "epoch": 0.1361367912950994, "grad_norm": 0.6487968564033508, "learning_rate": 4.537769293700573e-06, "loss": 0.0116, "step": 8320 }, { "epoch": 0.13630041724617525, "grad_norm": 0.5809783935546875, "learning_rate": 4.5432233433324245e-06, "loss": 0.0104, "step": 8330 }, { "epoch": 0.13646404319725108, "grad_norm": 0.45486441254615784, "learning_rate": 4.548677392964276e-06, "loss": 0.0118, "step": 8340 }, { "epoch": 0.1366276691483269, "grad_norm": 0.5191390514373779, "learning_rate": 4.554131442596128e-06, "loss": 0.0091, "step": 8350 }, { "epoch": 0.13679129509940277, "grad_norm": 0.649986982345581, "learning_rate": 4.55958549222798e-06, "loss": 0.0079, "step": 8360 }, { "epoch": 0.1369549210504786, "grad_norm": 0.7410485744476318, "learning_rate": 4.565039541859831e-06, "loss": 0.009, "step": 8370 }, { "epoch": 0.13711854700155446, "grad_norm": 0.961081326007843, "learning_rate": 4.570493591491683e-06, "loss": 0.0122, "step": 8380 }, { "epoch": 0.1372821729526303, "grad_norm": 0.668884813785553, "learning_rate": 4.575947641123535e-06, "loss": 0.0112, "step": 8390 }, { "epoch": 0.13744579890370612, "grad_norm": 0.5774788856506348, "learning_rate": 4.581401690755386e-06, "loss": 0.0119, "step": 8400 }, { "epoch": 0.13760942485478198, "grad_norm": 0.8657992482185364, "learning_rate": 4.586855740387238e-06, "loss": 0.0094, "step": 8410 }, { "epoch": 0.1377730508058578, "grad_norm": 0.6547164916992188, "learning_rate": 4.59230979001909e-06, "loss": 0.0103, "step": 8420 }, { "epoch": 0.13793667675693366, "grad_norm": 0.783018946647644, "learning_rate": 4.5977638396509415e-06, "loss": 0.0143, "step": 8430 }, { "epoch": 0.1381003027080095, "grad_norm": 0.7820207476615906, "learning_rate": 4.603217889282793e-06, "loss": 0.0114, "step": 8440 }, { "epoch": 0.13826392865908532, "grad_norm": 0.6100033521652222, "learning_rate": 4.608671938914645e-06, "loss": 0.011, "step": 8450 }, { "epoch": 0.13842755461016118, "grad_norm": 0.4886954128742218, "learning_rate": 4.614125988546497e-06, "loss": 0.0104, "step": 8460 }, { "epoch": 0.138591180561237, "grad_norm": 0.31829285621643066, "learning_rate": 4.619580038178348e-06, "loss": 0.0137, "step": 8470 }, { "epoch": 0.13875480651231284, "grad_norm": 0.8497973084449768, "learning_rate": 4.6250340878102e-06, "loss": 0.0107, "step": 8480 }, { "epoch": 0.1389184324633887, "grad_norm": 0.42186859250068665, "learning_rate": 4.630488137442052e-06, "loss": 0.0145, "step": 8490 }, { "epoch": 0.13908205841446453, "grad_norm": 0.341957688331604, "learning_rate": 4.635942187073903e-06, "loss": 0.0091, "step": 8500 }, { "epoch": 0.13924568436554038, "grad_norm": 0.8097950220108032, "learning_rate": 4.641396236705755e-06, "loss": 0.0114, "step": 8510 }, { "epoch": 0.13940931031661621, "grad_norm": 0.5531971454620361, "learning_rate": 4.646850286337606e-06, "loss": 0.0086, "step": 8520 }, { "epoch": 0.13957293626769204, "grad_norm": 0.42774277925491333, "learning_rate": 4.652304335969458e-06, "loss": 0.0135, "step": 8530 }, { "epoch": 0.1397365622187679, "grad_norm": 0.630917489528656, "learning_rate": 4.657758385601309e-06, "loss": 0.0105, "step": 8540 }, { "epoch": 0.13990018816984373, "grad_norm": 0.35983219742774963, "learning_rate": 4.663212435233161e-06, "loss": 0.0097, "step": 8550 }, { "epoch": 0.1400638141209196, "grad_norm": 0.909866452217102, "learning_rate": 4.668666484865013e-06, "loss": 0.0092, "step": 8560 }, { "epoch": 0.14022744007199542, "grad_norm": 0.5690034031867981, "learning_rate": 4.6741205344968645e-06, "loss": 0.0121, "step": 8570 }, { "epoch": 0.14039106602307125, "grad_norm": 0.9370225071907043, "learning_rate": 4.679574584128716e-06, "loss": 0.0124, "step": 8580 }, { "epoch": 0.1405546919741471, "grad_norm": 1.0086448192596436, "learning_rate": 4.685028633760568e-06, "loss": 0.014, "step": 8590 }, { "epoch": 0.14071831792522294, "grad_norm": 0.49329251050949097, "learning_rate": 4.6904826833924196e-06, "loss": 0.013, "step": 8600 }, { "epoch": 0.1408819438762988, "grad_norm": 0.5797995924949646, "learning_rate": 4.695936733024271e-06, "loss": 0.0088, "step": 8610 }, { "epoch": 0.14104556982737462, "grad_norm": 0.498033732175827, "learning_rate": 4.701390782656123e-06, "loss": 0.0098, "step": 8620 }, { "epoch": 0.14120919577845045, "grad_norm": 0.8719580769538879, "learning_rate": 4.706844832287974e-06, "loss": 0.0127, "step": 8630 }, { "epoch": 0.1413728217295263, "grad_norm": 0.6618049740791321, "learning_rate": 4.7122988819198255e-06, "loss": 0.015, "step": 8640 }, { "epoch": 0.14153644768060214, "grad_norm": 0.41451624035835266, "learning_rate": 4.717752931551677e-06, "loss": 0.012, "step": 8650 }, { "epoch": 0.141700073631678, "grad_norm": 0.479082852602005, "learning_rate": 4.723206981183529e-06, "loss": 0.0095, "step": 8660 }, { "epoch": 0.14186369958275383, "grad_norm": 0.6567007303237915, "learning_rate": 4.728661030815381e-06, "loss": 0.0146, "step": 8670 }, { "epoch": 0.14202732553382966, "grad_norm": 1.1912174224853516, "learning_rate": 4.734115080447232e-06, "loss": 0.0105, "step": 8680 }, { "epoch": 0.14219095148490551, "grad_norm": 0.5104548931121826, "learning_rate": 4.739569130079084e-06, "loss": 0.0093, "step": 8690 }, { "epoch": 0.14235457743598134, "grad_norm": 0.5026189088821411, "learning_rate": 4.745023179710936e-06, "loss": 0.0119, "step": 8700 }, { "epoch": 0.14251820338705717, "grad_norm": 0.8047844767570496, "learning_rate": 4.750477229342787e-06, "loss": 0.0112, "step": 8710 }, { "epoch": 0.14268182933813303, "grad_norm": 0.933950662612915, "learning_rate": 4.755931278974639e-06, "loss": 0.0142, "step": 8720 }, { "epoch": 0.14284545528920886, "grad_norm": 0.31917062401771545, "learning_rate": 4.761385328606491e-06, "loss": 0.0174, "step": 8730 }, { "epoch": 0.14300908124028472, "grad_norm": 1.0437222719192505, "learning_rate": 4.766839378238342e-06, "loss": 0.0111, "step": 8740 }, { "epoch": 0.14317270719136055, "grad_norm": 0.7204347252845764, "learning_rate": 4.772293427870193e-06, "loss": 0.0136, "step": 8750 }, { "epoch": 0.14333633314243638, "grad_norm": 0.5480690598487854, "learning_rate": 4.777747477502045e-06, "loss": 0.0108, "step": 8760 }, { "epoch": 0.14349995909351224, "grad_norm": 1.1413639783859253, "learning_rate": 4.783201527133897e-06, "loss": 0.0132, "step": 8770 }, { "epoch": 0.14366358504458807, "grad_norm": 0.5628373622894287, "learning_rate": 4.7886555767657485e-06, "loss": 0.0095, "step": 8780 }, { "epoch": 0.14382721099566392, "grad_norm": 0.8018162250518799, "learning_rate": 4.7941096263976e-06, "loss": 0.0095, "step": 8790 }, { "epoch": 0.14399083694673975, "grad_norm": 0.8129953145980835, "learning_rate": 4.799563676029452e-06, "loss": 0.0104, "step": 8800 }, { "epoch": 0.14415446289781558, "grad_norm": 0.572077751159668, "learning_rate": 4.805017725661304e-06, "loss": 0.0072, "step": 8810 }, { "epoch": 0.14431808884889144, "grad_norm": 0.47636815905570984, "learning_rate": 4.810471775293155e-06, "loss": 0.0102, "step": 8820 }, { "epoch": 0.14448171479996727, "grad_norm": 0.7629075646400452, "learning_rate": 4.815925824925007e-06, "loss": 0.0113, "step": 8830 }, { "epoch": 0.14464534075104313, "grad_norm": 0.18702660501003265, "learning_rate": 4.821379874556859e-06, "loss": 0.007, "step": 8840 }, { "epoch": 0.14480896670211896, "grad_norm": 0.4570651948451996, "learning_rate": 4.82683392418871e-06, "loss": 0.0113, "step": 8850 }, { "epoch": 0.1449725926531948, "grad_norm": 0.3744865655899048, "learning_rate": 4.832287973820562e-06, "loss": 0.0122, "step": 8860 }, { "epoch": 0.14513621860427064, "grad_norm": 0.49121788144111633, "learning_rate": 4.837742023452414e-06, "loss": 0.009, "step": 8870 }, { "epoch": 0.14529984455534647, "grad_norm": 0.41438502073287964, "learning_rate": 4.8431960730842655e-06, "loss": 0.0119, "step": 8880 }, { "epoch": 0.14546347050642233, "grad_norm": 0.4890996813774109, "learning_rate": 4.848650122716117e-06, "loss": 0.0101, "step": 8890 }, { "epoch": 0.14562709645749816, "grad_norm": 0.7709494233131409, "learning_rate": 4.854104172347969e-06, "loss": 0.0091, "step": 8900 }, { "epoch": 0.145790722408574, "grad_norm": 0.5240187048912048, "learning_rate": 4.859558221979821e-06, "loss": 0.0093, "step": 8910 }, { "epoch": 0.14595434835964985, "grad_norm": 0.8679760694503784, "learning_rate": 4.865012271611672e-06, "loss": 0.0117, "step": 8920 }, { "epoch": 0.14611797431072568, "grad_norm": 0.44508063793182373, "learning_rate": 4.870466321243524e-06, "loss": 0.0114, "step": 8930 }, { "epoch": 0.1462816002618015, "grad_norm": 0.6625714898109436, "learning_rate": 4.875920370875376e-06, "loss": 0.009, "step": 8940 }, { "epoch": 0.14644522621287737, "grad_norm": 0.6329509615898132, "learning_rate": 4.881374420507227e-06, "loss": 0.0093, "step": 8950 }, { "epoch": 0.1466088521639532, "grad_norm": 0.7100282907485962, "learning_rate": 4.886828470139079e-06, "loss": 0.008, "step": 8960 }, { "epoch": 0.14677247811502905, "grad_norm": 0.37761127948760986, "learning_rate": 4.89228251977093e-06, "loss": 0.0102, "step": 8970 }, { "epoch": 0.14693610406610488, "grad_norm": 0.8917548656463623, "learning_rate": 4.897736569402782e-06, "loss": 0.01, "step": 8980 }, { "epoch": 0.1470997300171807, "grad_norm": 0.7606556415557861, "learning_rate": 4.903190619034633e-06, "loss": 0.0094, "step": 8990 }, { "epoch": 0.14726335596825657, "grad_norm": 0.4368397891521454, "learning_rate": 4.908644668666485e-06, "loss": 0.0087, "step": 9000 }, { "epoch": 0.1474269819193324, "grad_norm": 0.624626100063324, "learning_rate": 4.914098718298337e-06, "loss": 0.012, "step": 9010 }, { "epoch": 0.14759060787040826, "grad_norm": 0.3818283975124359, "learning_rate": 4.9195527679301885e-06, "loss": 0.0101, "step": 9020 }, { "epoch": 0.1477542338214841, "grad_norm": 0.7135481834411621, "learning_rate": 4.92500681756204e-06, "loss": 0.0112, "step": 9030 }, { "epoch": 0.14791785977255992, "grad_norm": 0.8082696199417114, "learning_rate": 4.930460867193892e-06, "loss": 0.0108, "step": 9040 }, { "epoch": 0.14808148572363578, "grad_norm": 0.5904209017753601, "learning_rate": 4.9359149168257436e-06, "loss": 0.0105, "step": 9050 }, { "epoch": 0.1482451116747116, "grad_norm": 0.6689426302909851, "learning_rate": 4.941368966457595e-06, "loss": 0.0102, "step": 9060 }, { "epoch": 0.14840873762578746, "grad_norm": 0.6649727821350098, "learning_rate": 4.946823016089447e-06, "loss": 0.0116, "step": 9070 }, { "epoch": 0.1485723635768633, "grad_norm": 1.2194387912750244, "learning_rate": 4.952277065721299e-06, "loss": 0.012, "step": 9080 }, { "epoch": 0.14873598952793912, "grad_norm": 0.656234622001648, "learning_rate": 4.9577311153531495e-06, "loss": 0.0072, "step": 9090 }, { "epoch": 0.14889961547901498, "grad_norm": 0.7400660514831543, "learning_rate": 4.963185164985001e-06, "loss": 0.0095, "step": 9100 }, { "epoch": 0.1490632414300908, "grad_norm": 0.7004467844963074, "learning_rate": 4.968639214616853e-06, "loss": 0.0092, "step": 9110 }, { "epoch": 0.14922686738116667, "grad_norm": 0.40032580494880676, "learning_rate": 4.974093264248705e-06, "loss": 0.0149, "step": 9120 }, { "epoch": 0.1493904933322425, "grad_norm": 0.5339474678039551, "learning_rate": 4.979547313880556e-06, "loss": 0.0125, "step": 9130 }, { "epoch": 0.14955411928331833, "grad_norm": 0.43809598684310913, "learning_rate": 4.985001363512408e-06, "loss": 0.0089, "step": 9140 }, { "epoch": 0.14971774523439418, "grad_norm": 0.753343939781189, "learning_rate": 4.99045541314426e-06, "loss": 0.0098, "step": 9150 }, { "epoch": 0.14988137118547, "grad_norm": 0.6842641830444336, "learning_rate": 4.995909462776111e-06, "loss": 0.0141, "step": 9160 }, { "epoch": 0.15004499713654584, "grad_norm": 0.339585542678833, "learning_rate": 5.001363512407963e-06, "loss": 0.0094, "step": 9170 }, { "epoch": 0.1502086230876217, "grad_norm": 0.8428782820701599, "learning_rate": 5.006817562039815e-06, "loss": 0.0106, "step": 9180 }, { "epoch": 0.15037224903869753, "grad_norm": 0.9628096222877502, "learning_rate": 5.0122716116716665e-06, "loss": 0.0098, "step": 9190 }, { "epoch": 0.1505358749897734, "grad_norm": 0.866793692111969, "learning_rate": 5.017725661303518e-06, "loss": 0.0116, "step": 9200 }, { "epoch": 0.15069950094084922, "grad_norm": 0.8987458944320679, "learning_rate": 5.02317971093537e-06, "loss": 0.0096, "step": 9210 }, { "epoch": 0.15086312689192505, "grad_norm": 0.22411911189556122, "learning_rate": 5.028633760567222e-06, "loss": 0.0065, "step": 9220 }, { "epoch": 0.1510267528430009, "grad_norm": 1.012649655342102, "learning_rate": 5.034087810199073e-06, "loss": 0.0116, "step": 9230 }, { "epoch": 0.15119037879407674, "grad_norm": 0.48535045981407166, "learning_rate": 5.039541859830925e-06, "loss": 0.008, "step": 9240 }, { "epoch": 0.1513540047451526, "grad_norm": 0.35569506883621216, "learning_rate": 5.044995909462777e-06, "loss": 0.0077, "step": 9250 }, { "epoch": 0.15151763069622842, "grad_norm": 0.43013277649879456, "learning_rate": 5.050449959094628e-06, "loss": 0.0088, "step": 9260 }, { "epoch": 0.15168125664730425, "grad_norm": 0.4324105679988861, "learning_rate": 5.05590400872648e-06, "loss": 0.0083, "step": 9270 }, { "epoch": 0.1518448825983801, "grad_norm": 0.6981995105743408, "learning_rate": 5.061358058358331e-06, "loss": 0.0071, "step": 9280 }, { "epoch": 0.15200850854945594, "grad_norm": 0.7040393948554993, "learning_rate": 5.0668121079901835e-06, "loss": 0.0086, "step": 9290 }, { "epoch": 0.1521721345005318, "grad_norm": 0.6929276585578918, "learning_rate": 5.072266157622034e-06, "loss": 0.0085, "step": 9300 }, { "epoch": 0.15233576045160763, "grad_norm": 0.8001575469970703, "learning_rate": 5.077720207253887e-06, "loss": 0.0141, "step": 9310 }, { "epoch": 0.15249938640268346, "grad_norm": 0.6251505017280579, "learning_rate": 5.083174256885738e-06, "loss": 0.0114, "step": 9320 }, { "epoch": 0.15266301235375931, "grad_norm": 0.44184187054634094, "learning_rate": 5.08862830651759e-06, "loss": 0.0083, "step": 9330 }, { "epoch": 0.15282663830483514, "grad_norm": 0.7383847236633301, "learning_rate": 5.094082356149441e-06, "loss": 0.0128, "step": 9340 }, { "epoch": 0.15299026425591097, "grad_norm": 0.8107274770736694, "learning_rate": 5.099536405781294e-06, "loss": 0.0166, "step": 9350 }, { "epoch": 0.15315389020698683, "grad_norm": 0.6202419400215149, "learning_rate": 5.104990455413145e-06, "loss": 0.0109, "step": 9360 }, { "epoch": 0.15331751615806266, "grad_norm": 0.9023040533065796, "learning_rate": 5.1104445050449954e-06, "loss": 0.0121, "step": 9370 }, { "epoch": 0.15348114210913852, "grad_norm": 0.6068389415740967, "learning_rate": 5.115898554676848e-06, "loss": 0.0104, "step": 9380 }, { "epoch": 0.15364476806021435, "grad_norm": 0.8096945285797119, "learning_rate": 5.121352604308699e-06, "loss": 0.0083, "step": 9390 }, { "epoch": 0.15380839401129018, "grad_norm": 0.597368597984314, "learning_rate": 5.126806653940551e-06, "loss": 0.0112, "step": 9400 }, { "epoch": 0.15397201996236604, "grad_norm": 0.5406811237335205, "learning_rate": 5.132260703572402e-06, "loss": 0.011, "step": 9410 }, { "epoch": 0.15413564591344187, "grad_norm": 0.6746290326118469, "learning_rate": 5.137714753204255e-06, "loss": 0.0076, "step": 9420 }, { "epoch": 0.15429927186451772, "grad_norm": 0.9298248887062073, "learning_rate": 5.143168802836106e-06, "loss": 0.0108, "step": 9430 }, { "epoch": 0.15446289781559355, "grad_norm": 0.2614902853965759, "learning_rate": 5.148622852467958e-06, "loss": 0.0121, "step": 9440 }, { "epoch": 0.15462652376666938, "grad_norm": 0.8659275770187378, "learning_rate": 5.154076902099809e-06, "loss": 0.0131, "step": 9450 }, { "epoch": 0.15479014971774524, "grad_norm": 1.1671043634414673, "learning_rate": 5.159530951731662e-06, "loss": 0.0126, "step": 9460 }, { "epoch": 0.15495377566882107, "grad_norm": 0.8133409023284912, "learning_rate": 5.1649850013635125e-06, "loss": 0.0111, "step": 9470 }, { "epoch": 0.15511740161989693, "grad_norm": 0.8114234209060669, "learning_rate": 5.170439050995365e-06, "loss": 0.012, "step": 9480 }, { "epoch": 0.15528102757097276, "grad_norm": 0.47942283749580383, "learning_rate": 5.175893100627216e-06, "loss": 0.0121, "step": 9490 }, { "epoch": 0.1554446535220486, "grad_norm": 0.749294102191925, "learning_rate": 5.1813471502590676e-06, "loss": 0.0086, "step": 9500 }, { "epoch": 0.15560827947312444, "grad_norm": 0.621303141117096, "learning_rate": 5.186801199890919e-06, "loss": 0.0094, "step": 9510 }, { "epoch": 0.15577190542420027, "grad_norm": 0.380519837141037, "learning_rate": 5.192255249522771e-06, "loss": 0.0069, "step": 9520 }, { "epoch": 0.15593553137527613, "grad_norm": 0.37362000346183777, "learning_rate": 5.197709299154623e-06, "loss": 0.0143, "step": 9530 }, { "epoch": 0.15609915732635196, "grad_norm": 0.5878266096115112, "learning_rate": 5.203163348786474e-06, "loss": 0.0094, "step": 9540 }, { "epoch": 0.1562627832774278, "grad_norm": 0.45064565539360046, "learning_rate": 5.208617398418326e-06, "loss": 0.0097, "step": 9550 }, { "epoch": 0.15642640922850365, "grad_norm": 0.3453048765659332, "learning_rate": 5.214071448050178e-06, "loss": 0.0085, "step": 9560 }, { "epoch": 0.15659003517957948, "grad_norm": 0.3360031247138977, "learning_rate": 5.2195254976820295e-06, "loss": 0.0129, "step": 9570 }, { "epoch": 0.1567536611306553, "grad_norm": 0.3435528874397278, "learning_rate": 5.224979547313881e-06, "loss": 0.0094, "step": 9580 }, { "epoch": 0.15691728708173117, "grad_norm": 0.5522723197937012, "learning_rate": 5.230433596945733e-06, "loss": 0.0115, "step": 9590 }, { "epoch": 0.157080913032807, "grad_norm": 0.6665348410606384, "learning_rate": 5.2358876465775846e-06, "loss": 0.0128, "step": 9600 }, { "epoch": 0.15724453898388285, "grad_norm": 0.6444919109344482, "learning_rate": 5.241341696209435e-06, "loss": 0.009, "step": 9610 }, { "epoch": 0.15740816493495868, "grad_norm": 0.39760738611221313, "learning_rate": 5.246795745841288e-06, "loss": 0.0081, "step": 9620 }, { "epoch": 0.1575717908860345, "grad_norm": 0.27387863397598267, "learning_rate": 5.252249795473139e-06, "loss": 0.0065, "step": 9630 }, { "epoch": 0.15773541683711037, "grad_norm": 0.7389279007911682, "learning_rate": 5.257703845104991e-06, "loss": 0.0146, "step": 9640 }, { "epoch": 0.1578990427881862, "grad_norm": 0.6964746117591858, "learning_rate": 5.263157894736842e-06, "loss": 0.01, "step": 9650 }, { "epoch": 0.15806266873926206, "grad_norm": 0.6602382659912109, "learning_rate": 5.268611944368695e-06, "loss": 0.0097, "step": 9660 }, { "epoch": 0.1582262946903379, "grad_norm": 0.6786006689071655, "learning_rate": 5.274065994000546e-06, "loss": 0.0127, "step": 9670 }, { "epoch": 0.15838992064141372, "grad_norm": 0.5183022022247314, "learning_rate": 5.279520043632398e-06, "loss": 0.0112, "step": 9680 }, { "epoch": 0.15855354659248957, "grad_norm": 0.7277314066886902, "learning_rate": 5.284974093264249e-06, "loss": 0.0131, "step": 9690 }, { "epoch": 0.1587171725435654, "grad_norm": 0.6161181330680847, "learning_rate": 5.2904281428961016e-06, "loss": 0.0096, "step": 9700 }, { "epoch": 0.15888079849464126, "grad_norm": 0.6199920773506165, "learning_rate": 5.2958821925279524e-06, "loss": 0.0091, "step": 9710 }, { "epoch": 0.1590444244457171, "grad_norm": 0.43742048740386963, "learning_rate": 5.301336242159803e-06, "loss": 0.0077, "step": 9720 }, { "epoch": 0.15920805039679292, "grad_norm": 0.8142781853675842, "learning_rate": 5.306790291791656e-06, "loss": 0.0105, "step": 9730 }, { "epoch": 0.15937167634786878, "grad_norm": 0.626279354095459, "learning_rate": 5.312244341423507e-06, "loss": 0.0071, "step": 9740 }, { "epoch": 0.1595353022989446, "grad_norm": 0.7502890229225159, "learning_rate": 5.317698391055359e-06, "loss": 0.0095, "step": 9750 }, { "epoch": 0.15969892825002047, "grad_norm": 0.2243766188621521, "learning_rate": 5.32315244068721e-06, "loss": 0.0075, "step": 9760 }, { "epoch": 0.1598625542010963, "grad_norm": 0.15477269887924194, "learning_rate": 5.328606490319063e-06, "loss": 0.0097, "step": 9770 }, { "epoch": 0.16002618015217213, "grad_norm": 0.32237935066223145, "learning_rate": 5.3340605399509135e-06, "loss": 0.01, "step": 9780 }, { "epoch": 0.16018980610324798, "grad_norm": 0.8506962060928345, "learning_rate": 5.339514589582766e-06, "loss": 0.0109, "step": 9790 }, { "epoch": 0.1603534320543238, "grad_norm": 0.5185884237289429, "learning_rate": 5.344968639214617e-06, "loss": 0.0119, "step": 9800 }, { "epoch": 0.16051705800539964, "grad_norm": 0.529741108417511, "learning_rate": 5.3504226888464694e-06, "loss": 0.0117, "step": 9810 }, { "epoch": 0.1606806839564755, "grad_norm": 0.37562671303749084, "learning_rate": 5.35587673847832e-06, "loss": 0.009, "step": 9820 }, { "epoch": 0.16084430990755133, "grad_norm": 0.9291324019432068, "learning_rate": 5.361330788110172e-06, "loss": 0.0118, "step": 9830 }, { "epoch": 0.1610079358586272, "grad_norm": 0.5216712951660156, "learning_rate": 5.366784837742024e-06, "loss": 0.01, "step": 9840 }, { "epoch": 0.16117156180970302, "grad_norm": 0.486609548330307, "learning_rate": 5.372238887373875e-06, "loss": 0.0139, "step": 9850 }, { "epoch": 0.16133518776077885, "grad_norm": 0.4292732775211334, "learning_rate": 5.377692937005727e-06, "loss": 0.012, "step": 9860 }, { "epoch": 0.1614988137118547, "grad_norm": 0.8930361270904541, "learning_rate": 5.383146986637579e-06, "loss": 0.0123, "step": 9870 }, { "epoch": 0.16166243966293053, "grad_norm": 0.5616849064826965, "learning_rate": 5.3886010362694305e-06, "loss": 0.0103, "step": 9880 }, { "epoch": 0.1618260656140064, "grad_norm": 0.6051303148269653, "learning_rate": 5.394055085901282e-06, "loss": 0.0058, "step": 9890 }, { "epoch": 0.16198969156508222, "grad_norm": 0.7344470024108887, "learning_rate": 5.399509135533134e-06, "loss": 0.0121, "step": 9900 }, { "epoch": 0.16215331751615805, "grad_norm": 0.45683553814888, "learning_rate": 5.404963185164986e-06, "loss": 0.0111, "step": 9910 }, { "epoch": 0.1623169434672339, "grad_norm": 0.23688705265522003, "learning_rate": 5.410417234796837e-06, "loss": 0.0093, "step": 9920 }, { "epoch": 0.16248056941830974, "grad_norm": 0.5689866542816162, "learning_rate": 5.415871284428689e-06, "loss": 0.0101, "step": 9930 }, { "epoch": 0.1626441953693856, "grad_norm": 0.4035464823246002, "learning_rate": 5.42132533406054e-06, "loss": 0.0113, "step": 9940 }, { "epoch": 0.16280782132046143, "grad_norm": 0.646021842956543, "learning_rate": 5.426779383692392e-06, "loss": 0.0117, "step": 9950 }, { "epoch": 0.16297144727153726, "grad_norm": 0.1906503587961197, "learning_rate": 5.432233433324243e-06, "loss": 0.0057, "step": 9960 }, { "epoch": 0.16313507322261311, "grad_norm": 0.5345288515090942, "learning_rate": 5.437687482956096e-06, "loss": 0.0096, "step": 9970 }, { "epoch": 0.16329869917368894, "grad_norm": 0.5528846979141235, "learning_rate": 5.443141532587947e-06, "loss": 0.0085, "step": 9980 }, { "epoch": 0.1634623251247648, "grad_norm": 0.4603172242641449, "learning_rate": 5.448595582219799e-06, "loss": 0.0115, "step": 9990 }, { "epoch": 0.16362595107584063, "grad_norm": 0.5774797797203064, "learning_rate": 5.45404963185165e-06, "loss": 0.0081, "step": 10000 }, { "epoch": 0.16378957702691646, "grad_norm": 0.5093913078308105, "learning_rate": 5.459503681483503e-06, "loss": 0.0123, "step": 10010 }, { "epoch": 0.16395320297799232, "grad_norm": 0.32274413108825684, "learning_rate": 5.4649577311153535e-06, "loss": 0.0115, "step": 10020 }, { "epoch": 0.16411682892906815, "grad_norm": 0.6581262350082397, "learning_rate": 5.470411780747206e-06, "loss": 0.0125, "step": 10030 }, { "epoch": 0.16428045488014398, "grad_norm": 0.7366822957992554, "learning_rate": 5.475865830379057e-06, "loss": 0.0106, "step": 10040 }, { "epoch": 0.16444408083121984, "grad_norm": 0.5786474943161011, "learning_rate": 5.481319880010909e-06, "loss": 0.0091, "step": 10050 }, { "epoch": 0.16460770678229567, "grad_norm": 0.5384429097175598, "learning_rate": 5.48677392964276e-06, "loss": 0.0088, "step": 10060 }, { "epoch": 0.16477133273337152, "grad_norm": 0.8376790285110474, "learning_rate": 5.492227979274611e-06, "loss": 0.0113, "step": 10070 }, { "epoch": 0.16493495868444735, "grad_norm": 0.5172038078308105, "learning_rate": 5.497682028906464e-06, "loss": 0.0105, "step": 10080 }, { "epoch": 0.16509858463552318, "grad_norm": 0.5650915503501892, "learning_rate": 5.5031360785383145e-06, "loss": 0.0094, "step": 10090 }, { "epoch": 0.16526221058659904, "grad_norm": 0.7378827333450317, "learning_rate": 5.508590128170167e-06, "loss": 0.0109, "step": 10100 }, { "epoch": 0.16542583653767487, "grad_norm": 0.8093585968017578, "learning_rate": 5.514044177802018e-06, "loss": 0.0126, "step": 10110 }, { "epoch": 0.16558946248875073, "grad_norm": 0.5197385549545288, "learning_rate": 5.5194982274338705e-06, "loss": 0.0092, "step": 10120 }, { "epoch": 0.16575308843982656, "grad_norm": 0.632228672504425, "learning_rate": 5.524952277065721e-06, "loss": 0.011, "step": 10130 }, { "epoch": 0.1659167143909024, "grad_norm": 0.5733054876327515, "learning_rate": 5.530406326697574e-06, "loss": 0.0092, "step": 10140 }, { "epoch": 0.16608034034197824, "grad_norm": 0.5369895100593567, "learning_rate": 5.535860376329425e-06, "loss": 0.012, "step": 10150 }, { "epoch": 0.16624396629305407, "grad_norm": 0.6807331442832947, "learning_rate": 5.541314425961277e-06, "loss": 0.0105, "step": 10160 }, { "epoch": 0.16640759224412993, "grad_norm": 0.3529227375984192, "learning_rate": 5.546768475593128e-06, "loss": 0.0093, "step": 10170 }, { "epoch": 0.16657121819520576, "grad_norm": 0.6981804966926575, "learning_rate": 5.55222252522498e-06, "loss": 0.0135, "step": 10180 }, { "epoch": 0.1667348441462816, "grad_norm": 0.9030161499977112, "learning_rate": 5.5576765748568315e-06, "loss": 0.0063, "step": 10190 }, { "epoch": 0.16689847009735745, "grad_norm": 0.3628447651863098, "learning_rate": 5.563130624488683e-06, "loss": 0.0079, "step": 10200 }, { "epoch": 0.16706209604843328, "grad_norm": 0.8995673060417175, "learning_rate": 5.568584674120535e-06, "loss": 0.0096, "step": 10210 }, { "epoch": 0.16722572199950914, "grad_norm": 0.6258118748664856, "learning_rate": 5.574038723752387e-06, "loss": 0.0083, "step": 10220 }, { "epoch": 0.16738934795058497, "grad_norm": 0.5971655249595642, "learning_rate": 5.579492773384238e-06, "loss": 0.0095, "step": 10230 }, { "epoch": 0.1675529739016608, "grad_norm": 0.9196944832801819, "learning_rate": 5.58494682301609e-06, "loss": 0.0127, "step": 10240 }, { "epoch": 0.16771659985273665, "grad_norm": 0.30177071690559387, "learning_rate": 5.590400872647942e-06, "loss": 0.0091, "step": 10250 }, { "epoch": 0.16788022580381248, "grad_norm": 0.6043103933334351, "learning_rate": 5.5958549222797934e-06, "loss": 0.0099, "step": 10260 }, { "epoch": 0.1680438517548883, "grad_norm": 0.6831212639808655, "learning_rate": 5.601308971911645e-06, "loss": 0.0133, "step": 10270 }, { "epoch": 0.16820747770596417, "grad_norm": 0.5136745572090149, "learning_rate": 5.606763021543497e-06, "loss": 0.0124, "step": 10280 }, { "epoch": 0.16837110365704, "grad_norm": 0.4598332941532135, "learning_rate": 5.612217071175348e-06, "loss": 0.0097, "step": 10290 }, { "epoch": 0.16853472960811586, "grad_norm": 0.5498688817024231, "learning_rate": 5.6176711208072e-06, "loss": 0.0123, "step": 10300 }, { "epoch": 0.1686983555591917, "grad_norm": 0.48969921469688416, "learning_rate": 5.623125170439051e-06, "loss": 0.0102, "step": 10310 }, { "epoch": 0.16886198151026752, "grad_norm": 0.18896116316318512, "learning_rate": 5.628579220070904e-06, "loss": 0.0102, "step": 10320 }, { "epoch": 0.16902560746134337, "grad_norm": 0.3152559995651245, "learning_rate": 5.6340332697027545e-06, "loss": 0.0115, "step": 10330 }, { "epoch": 0.1691892334124192, "grad_norm": 0.7210392951965332, "learning_rate": 5.639487319334607e-06, "loss": 0.009, "step": 10340 }, { "epoch": 0.16935285936349506, "grad_norm": 0.9130593538284302, "learning_rate": 5.644941368966458e-06, "loss": 0.0126, "step": 10350 }, { "epoch": 0.1695164853145709, "grad_norm": 0.5895740985870361, "learning_rate": 5.6503954185983104e-06, "loss": 0.0104, "step": 10360 }, { "epoch": 0.16968011126564672, "grad_norm": 0.3843470513820648, "learning_rate": 5.655849468230161e-06, "loss": 0.0079, "step": 10370 }, { "epoch": 0.16984373721672258, "grad_norm": 0.2323817014694214, "learning_rate": 5.661303517862014e-06, "loss": 0.0088, "step": 10380 }, { "epoch": 0.1700073631677984, "grad_norm": 0.3774637281894684, "learning_rate": 5.666757567493865e-06, "loss": 0.016, "step": 10390 }, { "epoch": 0.17017098911887427, "grad_norm": 0.40302830934524536, "learning_rate": 5.6722116171257156e-06, "loss": 0.009, "step": 10400 }, { "epoch": 0.1703346150699501, "grad_norm": 0.36535724997520447, "learning_rate": 5.677665666757568e-06, "loss": 0.009, "step": 10410 }, { "epoch": 0.17049824102102593, "grad_norm": 0.3651065230369568, "learning_rate": 5.683119716389419e-06, "loss": 0.0076, "step": 10420 }, { "epoch": 0.17066186697210178, "grad_norm": 0.7712186574935913, "learning_rate": 5.6885737660212715e-06, "loss": 0.0113, "step": 10430 }, { "epoch": 0.1708254929231776, "grad_norm": 0.6908636093139648, "learning_rate": 5.694027815653122e-06, "loss": 0.007, "step": 10440 }, { "epoch": 0.17098911887425344, "grad_norm": 0.4428759515285492, "learning_rate": 5.699481865284975e-06, "loss": 0.008, "step": 10450 }, { "epoch": 0.1711527448253293, "grad_norm": 0.4941957890987396, "learning_rate": 5.704935914916826e-06, "loss": 0.011, "step": 10460 }, { "epoch": 0.17131637077640513, "grad_norm": 0.5548277497291565, "learning_rate": 5.710389964548678e-06, "loss": 0.0105, "step": 10470 }, { "epoch": 0.171479996727481, "grad_norm": 0.4565807282924652, "learning_rate": 5.715844014180529e-06, "loss": 0.0128, "step": 10480 }, { "epoch": 0.17164362267855682, "grad_norm": 0.5146138668060303, "learning_rate": 5.721298063812382e-06, "loss": 0.0148, "step": 10490 }, { "epoch": 0.17180724862963265, "grad_norm": 0.6249204874038696, "learning_rate": 5.7267521134442326e-06, "loss": 0.0073, "step": 10500 }, { "epoch": 0.1719708745807085, "grad_norm": 0.49384769797325134, "learning_rate": 5.732206163076084e-06, "loss": 0.0116, "step": 10510 }, { "epoch": 0.17213450053178433, "grad_norm": 1.056188702583313, "learning_rate": 5.737660212707936e-06, "loss": 0.0113, "step": 10520 }, { "epoch": 0.1722981264828602, "grad_norm": 0.5830293893814087, "learning_rate": 5.743114262339788e-06, "loss": 0.0095, "step": 10530 }, { "epoch": 0.17246175243393602, "grad_norm": 0.8748631477355957, "learning_rate": 5.748568311971639e-06, "loss": 0.0118, "step": 10540 }, { "epoch": 0.17262537838501185, "grad_norm": 0.6459181904792786, "learning_rate": 5.754022361603491e-06, "loss": 0.0104, "step": 10550 }, { "epoch": 0.1727890043360877, "grad_norm": 0.28606754541397095, "learning_rate": 5.759476411235343e-06, "loss": 0.0111, "step": 10560 }, { "epoch": 0.17295263028716354, "grad_norm": 0.1575181782245636, "learning_rate": 5.7649304608671945e-06, "loss": 0.0118, "step": 10570 }, { "epoch": 0.1731162562382394, "grad_norm": 0.42970117926597595, "learning_rate": 5.770384510499046e-06, "loss": 0.0094, "step": 10580 }, { "epoch": 0.17327988218931523, "grad_norm": 0.26632261276245117, "learning_rate": 5.775838560130898e-06, "loss": 0.0097, "step": 10590 }, { "epoch": 0.17344350814039106, "grad_norm": 0.6736506819725037, "learning_rate": 5.7812926097627496e-06, "loss": 0.0122, "step": 10600 }, { "epoch": 0.1736071340914669, "grad_norm": 0.5773181915283203, "learning_rate": 5.786746659394601e-06, "loss": 0.0059, "step": 10610 }, { "epoch": 0.17377076004254274, "grad_norm": 0.6499444246292114, "learning_rate": 5.792200709026453e-06, "loss": 0.0096, "step": 10620 }, { "epoch": 0.1739343859936186, "grad_norm": 0.7350925207138062, "learning_rate": 5.797654758658305e-06, "loss": 0.0073, "step": 10630 }, { "epoch": 0.17409801194469443, "grad_norm": 0.5747483372688293, "learning_rate": 5.8031088082901555e-06, "loss": 0.009, "step": 10640 }, { "epoch": 0.17426163789577026, "grad_norm": 0.6138623356819153, "learning_rate": 5.808562857922008e-06, "loss": 0.0073, "step": 10650 }, { "epoch": 0.17442526384684612, "grad_norm": 0.5799426436424255, "learning_rate": 5.814016907553859e-06, "loss": 0.0072, "step": 10660 }, { "epoch": 0.17458888979792195, "grad_norm": 0.5614217519760132, "learning_rate": 5.8194709571857115e-06, "loss": 0.0074, "step": 10670 }, { "epoch": 0.17475251574899778, "grad_norm": 0.5787138938903809, "learning_rate": 5.824925006817562e-06, "loss": 0.0138, "step": 10680 }, { "epoch": 0.17491614170007364, "grad_norm": 0.7112692594528198, "learning_rate": 5.830379056449415e-06, "loss": 0.007, "step": 10690 }, { "epoch": 0.17507976765114946, "grad_norm": 0.3200352191925049, "learning_rate": 5.835833106081266e-06, "loss": 0.0095, "step": 10700 }, { "epoch": 0.17524339360222532, "grad_norm": 0.5317075848579407, "learning_rate": 5.841287155713118e-06, "loss": 0.0084, "step": 10710 }, { "epoch": 0.17540701955330115, "grad_norm": 0.7819271087646484, "learning_rate": 5.846741205344969e-06, "loss": 0.0108, "step": 10720 }, { "epoch": 0.17557064550437698, "grad_norm": 0.36027494072914124, "learning_rate": 5.852195254976822e-06, "loss": 0.0091, "step": 10730 }, { "epoch": 0.17573427145545284, "grad_norm": 0.8856569528579712, "learning_rate": 5.8576493046086725e-06, "loss": 0.0087, "step": 10740 }, { "epoch": 0.17589789740652867, "grad_norm": 0.3918082118034363, "learning_rate": 5.863103354240523e-06, "loss": 0.0055, "step": 10750 }, { "epoch": 0.17606152335760453, "grad_norm": 0.6352339386940002, "learning_rate": 5.868557403872376e-06, "loss": 0.0108, "step": 10760 }, { "epoch": 0.17622514930868036, "grad_norm": 0.5247442722320557, "learning_rate": 5.874011453504227e-06, "loss": 0.009, "step": 10770 }, { "epoch": 0.1763887752597562, "grad_norm": 0.7295508980751038, "learning_rate": 5.879465503136079e-06, "loss": 0.0087, "step": 10780 }, { "epoch": 0.17655240121083204, "grad_norm": 0.5269325971603394, "learning_rate": 5.88491955276793e-06, "loss": 0.0069, "step": 10790 }, { "epoch": 0.17671602716190787, "grad_norm": 0.11139679700136185, "learning_rate": 5.890373602399783e-06, "loss": 0.0075, "step": 10800 }, { "epoch": 0.17687965311298373, "grad_norm": 0.4286610186100006, "learning_rate": 5.895827652031634e-06, "loss": 0.0076, "step": 10810 }, { "epoch": 0.17704327906405956, "grad_norm": 0.15217582881450653, "learning_rate": 5.901281701663486e-06, "loss": 0.0101, "step": 10820 }, { "epoch": 0.1772069050151354, "grad_norm": 0.13957741856575012, "learning_rate": 5.906735751295337e-06, "loss": 0.0072, "step": 10830 }, { "epoch": 0.17737053096621125, "grad_norm": 0.24470998346805573, "learning_rate": 5.9121898009271895e-06, "loss": 0.0058, "step": 10840 }, { "epoch": 0.17753415691728708, "grad_norm": 0.4903556704521179, "learning_rate": 5.91764385055904e-06, "loss": 0.0101, "step": 10850 }, { "epoch": 0.17769778286836294, "grad_norm": 0.5044123530387878, "learning_rate": 5.923097900190892e-06, "loss": 0.0077, "step": 10860 }, { "epoch": 0.17786140881943877, "grad_norm": 0.6955841779708862, "learning_rate": 5.928551949822744e-06, "loss": 0.0092, "step": 10870 }, { "epoch": 0.1780250347705146, "grad_norm": 0.6097860932350159, "learning_rate": 5.9340059994545955e-06, "loss": 0.0083, "step": 10880 }, { "epoch": 0.17818866072159045, "grad_norm": 0.6501186490058899, "learning_rate": 5.939460049086447e-06, "loss": 0.0073, "step": 10890 }, { "epoch": 0.17835228667266628, "grad_norm": 0.7196681499481201, "learning_rate": 5.944914098718299e-06, "loss": 0.0091, "step": 10900 }, { "epoch": 0.1785159126237421, "grad_norm": 0.8038938641548157, "learning_rate": 5.950368148350151e-06, "loss": 0.0089, "step": 10910 }, { "epoch": 0.17867953857481797, "grad_norm": 0.4989466369152069, "learning_rate": 5.955822197982002e-06, "loss": 0.0074, "step": 10920 }, { "epoch": 0.1788431645258938, "grad_norm": 0.4776281416416168, "learning_rate": 5.961276247613854e-06, "loss": 0.0103, "step": 10930 }, { "epoch": 0.17900679047696966, "grad_norm": 0.4999660849571228, "learning_rate": 5.966730297245706e-06, "loss": 0.0077, "step": 10940 }, { "epoch": 0.1791704164280455, "grad_norm": 0.6260465383529663, "learning_rate": 5.972184346877557e-06, "loss": 0.0111, "step": 10950 }, { "epoch": 0.17933404237912132, "grad_norm": 0.5149052143096924, "learning_rate": 5.977638396509409e-06, "loss": 0.0106, "step": 10960 }, { "epoch": 0.17949766833019717, "grad_norm": 0.648848831653595, "learning_rate": 5.98309244614126e-06, "loss": 0.0106, "step": 10970 }, { "epoch": 0.179661294281273, "grad_norm": 0.45320019125938416, "learning_rate": 5.9885464957731125e-06, "loss": 0.0078, "step": 10980 }, { "epoch": 0.17982492023234886, "grad_norm": 0.44847699999809265, "learning_rate": 5.994000545404963e-06, "loss": 0.0097, "step": 10990 }, { "epoch": 0.1799885461834247, "grad_norm": 0.5372036099433899, "learning_rate": 5.999454595036815e-06, "loss": 0.01, "step": 11000 }, { "epoch": 0.18015217213450052, "grad_norm": 0.516180694103241, "learning_rate": 6.004908644668667e-06, "loss": 0.0099, "step": 11010 }, { "epoch": 0.18031579808557638, "grad_norm": 0.30993184447288513, "learning_rate": 6.0103626943005185e-06, "loss": 0.0112, "step": 11020 }, { "epoch": 0.1804794240366522, "grad_norm": 0.33052492141723633, "learning_rate": 6.01581674393237e-06, "loss": 0.0115, "step": 11030 }, { "epoch": 0.18064304998772807, "grad_norm": 0.7691507935523987, "learning_rate": 6.021270793564222e-06, "loss": 0.0103, "step": 11040 }, { "epoch": 0.1808066759388039, "grad_norm": 0.48284903168678284, "learning_rate": 6.0267248431960736e-06, "loss": 0.0075, "step": 11050 }, { "epoch": 0.18097030188987973, "grad_norm": 0.6617900729179382, "learning_rate": 6.032178892827925e-06, "loss": 0.0116, "step": 11060 }, { "epoch": 0.18113392784095558, "grad_norm": 0.3081563115119934, "learning_rate": 6.037632942459777e-06, "loss": 0.0107, "step": 11070 }, { "epoch": 0.1812975537920314, "grad_norm": 0.4150715470314026, "learning_rate": 6.043086992091629e-06, "loss": 0.0132, "step": 11080 }, { "epoch": 0.18146117974310727, "grad_norm": 0.34169572591781616, "learning_rate": 6.04854104172348e-06, "loss": 0.0101, "step": 11090 }, { "epoch": 0.1816248056941831, "grad_norm": 0.34667065739631653, "learning_rate": 6.053995091355331e-06, "loss": 0.0056, "step": 11100 }, { "epoch": 0.18178843164525893, "grad_norm": 0.749790608882904, "learning_rate": 6.059449140987184e-06, "loss": 0.009, "step": 11110 }, { "epoch": 0.1819520575963348, "grad_norm": 0.8240759372711182, "learning_rate": 6.064903190619035e-06, "loss": 0.0083, "step": 11120 }, { "epoch": 0.18211568354741062, "grad_norm": 0.3187662363052368, "learning_rate": 6.070357240250887e-06, "loss": 0.0066, "step": 11130 }, { "epoch": 0.18227930949848645, "grad_norm": 1.5054137706756592, "learning_rate": 6.075811289882738e-06, "loss": 0.0093, "step": 11140 }, { "epoch": 0.1824429354495623, "grad_norm": 0.8990772366523743, "learning_rate": 6.081265339514591e-06, "loss": 0.0074, "step": 11150 }, { "epoch": 0.18260656140063813, "grad_norm": 0.4735490679740906, "learning_rate": 6.0867193891464414e-06, "loss": 0.0089, "step": 11160 }, { "epoch": 0.182770187351714, "grad_norm": 0.39772114157676697, "learning_rate": 6.092173438778294e-06, "loss": 0.0082, "step": 11170 }, { "epoch": 0.18293381330278982, "grad_norm": 0.540266215801239, "learning_rate": 6.097627488410145e-06, "loss": 0.0085, "step": 11180 }, { "epoch": 0.18309743925386565, "grad_norm": 0.2835488021373749, "learning_rate": 6.103081538041997e-06, "loss": 0.0117, "step": 11190 }, { "epoch": 0.1832610652049415, "grad_norm": 0.4021994471549988, "learning_rate": 6.108535587673848e-06, "loss": 0.0057, "step": 11200 }, { "epoch": 0.18342469115601734, "grad_norm": 0.6735053658485413, "learning_rate": 6.113989637305699e-06, "loss": 0.0099, "step": 11210 }, { "epoch": 0.1835883171070932, "grad_norm": 0.6619128584861755, "learning_rate": 6.119443686937552e-06, "loss": 0.009, "step": 11220 }, { "epoch": 0.18375194305816903, "grad_norm": 0.7361101508140564, "learning_rate": 6.1248977365694025e-06, "loss": 0.0075, "step": 11230 }, { "epoch": 0.18391556900924486, "grad_norm": 0.5200756192207336, "learning_rate": 6.130351786201255e-06, "loss": 0.0085, "step": 11240 }, { "epoch": 0.1840791949603207, "grad_norm": 0.47258487343788147, "learning_rate": 6.135805835833106e-06, "loss": 0.0085, "step": 11250 }, { "epoch": 0.18424282091139654, "grad_norm": 0.22633542120456696, "learning_rate": 6.1412598854649584e-06, "loss": 0.0087, "step": 11260 }, { "epoch": 0.1844064468624724, "grad_norm": 0.7213738560676575, "learning_rate": 6.146713935096809e-06, "loss": 0.0094, "step": 11270 }, { "epoch": 0.18457007281354823, "grad_norm": 0.5219589471817017, "learning_rate": 6.152167984728662e-06, "loss": 0.0079, "step": 11280 }, { "epoch": 0.18473369876462406, "grad_norm": 0.5819460153579712, "learning_rate": 6.157622034360513e-06, "loss": 0.0091, "step": 11290 }, { "epoch": 0.18489732471569992, "grad_norm": 0.3599722385406494, "learning_rate": 6.163076083992365e-06, "loss": 0.0087, "step": 11300 }, { "epoch": 0.18506095066677575, "grad_norm": 0.49820250272750854, "learning_rate": 6.168530133624216e-06, "loss": 0.0133, "step": 11310 }, { "epoch": 0.1852245766178516, "grad_norm": 0.5457605123519897, "learning_rate": 6.173984183256068e-06, "loss": 0.0065, "step": 11320 }, { "epoch": 0.18538820256892743, "grad_norm": 0.5832045078277588, "learning_rate": 6.1794382328879195e-06, "loss": 0.0092, "step": 11330 }, { "epoch": 0.18555182852000326, "grad_norm": 0.42447108030319214, "learning_rate": 6.184892282519771e-06, "loss": 0.0104, "step": 11340 }, { "epoch": 0.18571545447107912, "grad_norm": 0.5838319063186646, "learning_rate": 6.190346332151623e-06, "loss": 0.0069, "step": 11350 }, { "epoch": 0.18587908042215495, "grad_norm": 0.37487807869911194, "learning_rate": 6.195800381783475e-06, "loss": 0.0094, "step": 11360 }, { "epoch": 0.18604270637323078, "grad_norm": 0.45324909687042236, "learning_rate": 6.201254431415326e-06, "loss": 0.0108, "step": 11370 }, { "epoch": 0.18620633232430664, "grad_norm": 0.6902481317520142, "learning_rate": 6.206708481047178e-06, "loss": 0.0119, "step": 11380 }, { "epoch": 0.18636995827538247, "grad_norm": 0.4646401107311249, "learning_rate": 6.21216253067903e-06, "loss": 0.0129, "step": 11390 }, { "epoch": 0.18653358422645833, "grad_norm": 0.49047648906707764, "learning_rate": 6.217616580310881e-06, "loss": 0.01, "step": 11400 }, { "epoch": 0.18669721017753416, "grad_norm": 0.6002476811408997, "learning_rate": 6.223070629942733e-06, "loss": 0.009, "step": 11410 }, { "epoch": 0.18686083612861, "grad_norm": 1.2768020629882812, "learning_rate": 6.228524679574585e-06, "loss": 0.0109, "step": 11420 }, { "epoch": 0.18702446207968584, "grad_norm": 0.4653293490409851, "learning_rate": 6.233978729206436e-06, "loss": 0.0081, "step": 11430 }, { "epoch": 0.18718808803076167, "grad_norm": 0.37930235266685486, "learning_rate": 6.239432778838288e-06, "loss": 0.0087, "step": 11440 }, { "epoch": 0.18735171398183753, "grad_norm": 0.6293927431106567, "learning_rate": 6.244886828470139e-06, "loss": 0.012, "step": 11450 }, { "epoch": 0.18751533993291336, "grad_norm": 0.38692766427993774, "learning_rate": 6.250340878101992e-06, "loss": 0.0076, "step": 11460 }, { "epoch": 0.1876789658839892, "grad_norm": 0.28060996532440186, "learning_rate": 6.2557949277338425e-06, "loss": 0.0097, "step": 11470 }, { "epoch": 0.18784259183506505, "grad_norm": 0.27393385767936707, "learning_rate": 6.261248977365695e-06, "loss": 0.0092, "step": 11480 }, { "epoch": 0.18800621778614088, "grad_norm": 0.36885377764701843, "learning_rate": 6.266703026997546e-06, "loss": 0.0072, "step": 11490 }, { "epoch": 0.18816984373721674, "grad_norm": 0.48685377836227417, "learning_rate": 6.272157076629398e-06, "loss": 0.0075, "step": 11500 }, { "epoch": 0.18833346968829257, "grad_norm": 0.3773362636566162, "learning_rate": 6.277611126261249e-06, "loss": 0.0074, "step": 11510 }, { "epoch": 0.1884970956393684, "grad_norm": 0.6746549606323242, "learning_rate": 6.283065175893102e-06, "loss": 0.0128, "step": 11520 }, { "epoch": 0.18866072159044425, "grad_norm": 0.5694946050643921, "learning_rate": 6.288519225524953e-06, "loss": 0.0115, "step": 11530 }, { "epoch": 0.18882434754152008, "grad_norm": 0.36031264066696167, "learning_rate": 6.2939732751568035e-06, "loss": 0.0087, "step": 11540 }, { "epoch": 0.1889879734925959, "grad_norm": 0.6497841477394104, "learning_rate": 6.299427324788656e-06, "loss": 0.0104, "step": 11550 }, { "epoch": 0.18915159944367177, "grad_norm": 0.5403355956077576, "learning_rate": 6.304881374420507e-06, "loss": 0.0095, "step": 11560 }, { "epoch": 0.1893152253947476, "grad_norm": 0.7536537647247314, "learning_rate": 6.3103354240523595e-06, "loss": 0.0107, "step": 11570 }, { "epoch": 0.18947885134582346, "grad_norm": 0.34756720066070557, "learning_rate": 6.31578947368421e-06, "loss": 0.0101, "step": 11580 }, { "epoch": 0.1896424772968993, "grad_norm": 0.1733919233083725, "learning_rate": 6.321243523316063e-06, "loss": 0.0062, "step": 11590 }, { "epoch": 0.18980610324797512, "grad_norm": 0.303161084651947, "learning_rate": 6.326697572947914e-06, "loss": 0.0125, "step": 11600 }, { "epoch": 0.18996972919905097, "grad_norm": 0.3984090983867645, "learning_rate": 6.332151622579766e-06, "loss": 0.007, "step": 11610 }, { "epoch": 0.1901333551501268, "grad_norm": 0.2361973524093628, "learning_rate": 6.337605672211617e-06, "loss": 0.0105, "step": 11620 }, { "epoch": 0.19029698110120266, "grad_norm": 0.2629629373550415, "learning_rate": 6.34305972184347e-06, "loss": 0.007, "step": 11630 }, { "epoch": 0.1904606070522785, "grad_norm": 0.6978625655174255, "learning_rate": 6.3485137714753205e-06, "loss": 0.0097, "step": 11640 }, { "epoch": 0.19062423300335432, "grad_norm": 0.2649421989917755, "learning_rate": 6.353967821107173e-06, "loss": 0.0078, "step": 11650 }, { "epoch": 0.19078785895443018, "grad_norm": 0.33282360434532166, "learning_rate": 6.359421870739024e-06, "loss": 0.0073, "step": 11660 }, { "epoch": 0.190951484905506, "grad_norm": 0.5424314737319946, "learning_rate": 6.364875920370876e-06, "loss": 0.0081, "step": 11670 }, { "epoch": 0.19111511085658187, "grad_norm": 0.1660630702972412, "learning_rate": 6.370329970002727e-06, "loss": 0.0078, "step": 11680 }, { "epoch": 0.1912787368076577, "grad_norm": 0.6327295303344727, "learning_rate": 6.375784019634579e-06, "loss": 0.0109, "step": 11690 }, { "epoch": 0.19144236275873353, "grad_norm": 0.32394713163375854, "learning_rate": 6.381238069266431e-06, "loss": 0.0061, "step": 11700 }, { "epoch": 0.19160598870980938, "grad_norm": 0.5135193467140198, "learning_rate": 6.3866921188982824e-06, "loss": 0.01, "step": 11710 }, { "epoch": 0.1917696146608852, "grad_norm": 0.6632450819015503, "learning_rate": 6.392146168530134e-06, "loss": 0.0097, "step": 11720 }, { "epoch": 0.19193324061196107, "grad_norm": 0.6025798320770264, "learning_rate": 6.397600218161986e-06, "loss": 0.0077, "step": 11730 }, { "epoch": 0.1920968665630369, "grad_norm": 0.5043540000915527, "learning_rate": 6.4030542677938375e-06, "loss": 0.0056, "step": 11740 }, { "epoch": 0.19226049251411273, "grad_norm": 0.14362969994544983, "learning_rate": 6.408508317425689e-06, "loss": 0.0086, "step": 11750 }, { "epoch": 0.1924241184651886, "grad_norm": 0.22613579034805298, "learning_rate": 6.413962367057541e-06, "loss": 0.0058, "step": 11760 }, { "epoch": 0.19258774441626442, "grad_norm": 0.7183791399002075, "learning_rate": 6.419416416689393e-06, "loss": 0.0111, "step": 11770 }, { "epoch": 0.19275137036734025, "grad_norm": 0.6961106061935425, "learning_rate": 6.4248704663212435e-06, "loss": 0.0076, "step": 11780 }, { "epoch": 0.1929149963184161, "grad_norm": 0.328006386756897, "learning_rate": 6.430324515953096e-06, "loss": 0.0068, "step": 11790 }, { "epoch": 0.19307862226949193, "grad_norm": 0.6311428546905518, "learning_rate": 6.435778565584947e-06, "loss": 0.0083, "step": 11800 }, { "epoch": 0.1932422482205678, "grad_norm": 0.3945896029472351, "learning_rate": 6.4412326152167994e-06, "loss": 0.0066, "step": 11810 }, { "epoch": 0.19340587417164362, "grad_norm": 0.6109340190887451, "learning_rate": 6.44668666484865e-06, "loss": 0.0104, "step": 11820 }, { "epoch": 0.19356950012271945, "grad_norm": 0.5317077040672302, "learning_rate": 6.452140714480503e-06, "loss": 0.0082, "step": 11830 }, { "epoch": 0.1937331260737953, "grad_norm": 0.787543773651123, "learning_rate": 6.457594764112354e-06, "loss": 0.0066, "step": 11840 }, { "epoch": 0.19389675202487114, "grad_norm": 0.7824800610542297, "learning_rate": 6.463048813744206e-06, "loss": 0.0074, "step": 11850 }, { "epoch": 0.194060377975947, "grad_norm": 0.4073673486709595, "learning_rate": 6.468502863376057e-06, "loss": 0.0094, "step": 11860 }, { "epoch": 0.19422400392702283, "grad_norm": 0.5223649740219116, "learning_rate": 6.47395691300791e-06, "loss": 0.0116, "step": 11870 }, { "epoch": 0.19438762987809866, "grad_norm": 0.696689784526825, "learning_rate": 6.4794109626397605e-06, "loss": 0.0118, "step": 11880 }, { "epoch": 0.1945512558291745, "grad_norm": 0.7363672852516174, "learning_rate": 6.484865012271611e-06, "loss": 0.008, "step": 11890 }, { "epoch": 0.19471488178025034, "grad_norm": 0.7384046912193298, "learning_rate": 6.490319061903464e-06, "loss": 0.0117, "step": 11900 }, { "epoch": 0.1948785077313262, "grad_norm": 0.4886281192302704, "learning_rate": 6.495773111535315e-06, "loss": 0.007, "step": 11910 }, { "epoch": 0.19504213368240203, "grad_norm": 0.45536166429519653, "learning_rate": 6.501227161167167e-06, "loss": 0.0084, "step": 11920 }, { "epoch": 0.19520575963347786, "grad_norm": 0.1903892308473587, "learning_rate": 6.506681210799018e-06, "loss": 0.0065, "step": 11930 }, { "epoch": 0.19536938558455372, "grad_norm": 0.395683616399765, "learning_rate": 6.512135260430871e-06, "loss": 0.0074, "step": 11940 }, { "epoch": 0.19553301153562955, "grad_norm": 0.3251670300960541, "learning_rate": 6.5175893100627216e-06, "loss": 0.0102, "step": 11950 }, { "epoch": 0.1956966374867054, "grad_norm": 0.48893827199935913, "learning_rate": 6.523043359694574e-06, "loss": 0.0069, "step": 11960 }, { "epoch": 0.19586026343778123, "grad_norm": 0.5587809681892395, "learning_rate": 6.528497409326425e-06, "loss": 0.0081, "step": 11970 }, { "epoch": 0.19602388938885706, "grad_norm": 0.6132639050483704, "learning_rate": 6.5339514589582775e-06, "loss": 0.0088, "step": 11980 }, { "epoch": 0.19618751533993292, "grad_norm": 0.5497077107429504, "learning_rate": 6.539405508590128e-06, "loss": 0.0149, "step": 11990 }, { "epoch": 0.19635114129100875, "grad_norm": 0.42586514353752136, "learning_rate": 6.54485955822198e-06, "loss": 0.0087, "step": 12000 }, { "epoch": 0.19651476724208458, "grad_norm": 0.2691415548324585, "learning_rate": 6.550313607853832e-06, "loss": 0.0102, "step": 12010 }, { "epoch": 0.19667839319316044, "grad_norm": 0.37258654832839966, "learning_rate": 6.5557676574856835e-06, "loss": 0.0105, "step": 12020 }, { "epoch": 0.19684201914423627, "grad_norm": 0.43392255902290344, "learning_rate": 6.561221707117535e-06, "loss": 0.0095, "step": 12030 }, { "epoch": 0.19700564509531213, "grad_norm": 0.1617450714111328, "learning_rate": 6.566675756749387e-06, "loss": 0.0088, "step": 12040 }, { "epoch": 0.19716927104638796, "grad_norm": 0.48435476422309875, "learning_rate": 6.572129806381239e-06, "loss": 0.0101, "step": 12050 }, { "epoch": 0.19733289699746379, "grad_norm": 0.6208023428916931, "learning_rate": 6.57758385601309e-06, "loss": 0.0132, "step": 12060 }, { "epoch": 0.19749652294853964, "grad_norm": 0.4030143618583679, "learning_rate": 6.583037905644942e-06, "loss": 0.0091, "step": 12070 }, { "epoch": 0.19766014889961547, "grad_norm": 0.38547268509864807, "learning_rate": 6.588491955276794e-06, "loss": 0.0085, "step": 12080 }, { "epoch": 0.19782377485069133, "grad_norm": 0.6603232622146606, "learning_rate": 6.593946004908645e-06, "loss": 0.0092, "step": 12090 }, { "epoch": 0.19798740080176716, "grad_norm": 0.5414279103279114, "learning_rate": 6.599400054540497e-06, "loss": 0.0113, "step": 12100 }, { "epoch": 0.198151026752843, "grad_norm": 0.45296400785446167, "learning_rate": 6.604854104172348e-06, "loss": 0.0077, "step": 12110 }, { "epoch": 0.19831465270391885, "grad_norm": 0.6430184841156006, "learning_rate": 6.6103081538042005e-06, "loss": 0.0082, "step": 12120 }, { "epoch": 0.19847827865499468, "grad_norm": 0.1737295538187027, "learning_rate": 6.615762203436051e-06, "loss": 0.0059, "step": 12130 }, { "epoch": 0.19864190460607054, "grad_norm": 0.3895517587661743, "learning_rate": 6.621216253067904e-06, "loss": 0.0093, "step": 12140 }, { "epoch": 0.19880553055714636, "grad_norm": 1.083504319190979, "learning_rate": 6.626670302699755e-06, "loss": 0.0112, "step": 12150 }, { "epoch": 0.1989691565082222, "grad_norm": 0.7300825119018555, "learning_rate": 6.632124352331607e-06, "loss": 0.0073, "step": 12160 }, { "epoch": 0.19913278245929805, "grad_norm": 0.44121623039245605, "learning_rate": 6.637578401963458e-06, "loss": 0.0126, "step": 12170 }, { "epoch": 0.19929640841037388, "grad_norm": 0.421705961227417, "learning_rate": 6.643032451595311e-06, "loss": 0.0104, "step": 12180 }, { "epoch": 0.19946003436144974, "grad_norm": 0.33037325739860535, "learning_rate": 6.6484865012271615e-06, "loss": 0.0079, "step": 12190 }, { "epoch": 0.19962366031252557, "grad_norm": 1.2830380201339722, "learning_rate": 6.653940550859014e-06, "loss": 0.0089, "step": 12200 }, { "epoch": 0.1997872862636014, "grad_norm": 0.4163344204425812, "learning_rate": 6.659394600490865e-06, "loss": 0.0055, "step": 12210 }, { "epoch": 0.19995091221467726, "grad_norm": 0.9814919829368591, "learning_rate": 6.6648486501227175e-06, "loss": 0.0103, "step": 12220 }, { "epoch": 0.2001145381657531, "grad_norm": 0.6290691494941711, "learning_rate": 6.670302699754568e-06, "loss": 0.01, "step": 12230 }, { "epoch": 0.20027816411682892, "grad_norm": 0.5497564673423767, "learning_rate": 6.675756749386419e-06, "loss": 0.0101, "step": 12240 }, { "epoch": 0.20044179006790477, "grad_norm": 0.5460956692695618, "learning_rate": 6.681210799018272e-06, "loss": 0.0104, "step": 12250 }, { "epoch": 0.2006054160189806, "grad_norm": 0.45882344245910645, "learning_rate": 6.686664848650123e-06, "loss": 0.0066, "step": 12260 }, { "epoch": 0.20076904197005646, "grad_norm": 0.3451559245586395, "learning_rate": 6.692118898281975e-06, "loss": 0.0063, "step": 12270 }, { "epoch": 0.2009326679211323, "grad_norm": 0.6180064082145691, "learning_rate": 6.697572947913826e-06, "loss": 0.0084, "step": 12280 }, { "epoch": 0.20109629387220812, "grad_norm": 0.5369217991828918, "learning_rate": 6.7030269975456786e-06, "loss": 0.0116, "step": 12290 }, { "epoch": 0.20125991982328398, "grad_norm": 0.4721609354019165, "learning_rate": 6.708481047177529e-06, "loss": 0.0077, "step": 12300 }, { "epoch": 0.2014235457743598, "grad_norm": 0.5312516689300537, "learning_rate": 6.713935096809382e-06, "loss": 0.0097, "step": 12310 }, { "epoch": 0.20158717172543567, "grad_norm": 0.32041916251182556, "learning_rate": 6.719389146441233e-06, "loss": 0.008, "step": 12320 }, { "epoch": 0.2017507976765115, "grad_norm": 0.4275837242603302, "learning_rate": 6.724843196073085e-06, "loss": 0.0095, "step": 12330 }, { "epoch": 0.20191442362758732, "grad_norm": 0.5569990277290344, "learning_rate": 6.730297245704936e-06, "loss": 0.007, "step": 12340 }, { "epoch": 0.20207804957866318, "grad_norm": 0.37230172753334045, "learning_rate": 6.735751295336788e-06, "loss": 0.0093, "step": 12350 }, { "epoch": 0.202241675529739, "grad_norm": 0.4427005648612976, "learning_rate": 6.74120534496864e-06, "loss": 0.0076, "step": 12360 }, { "epoch": 0.20240530148081487, "grad_norm": 0.29859134554862976, "learning_rate": 6.746659394600491e-06, "loss": 0.0082, "step": 12370 }, { "epoch": 0.2025689274318907, "grad_norm": 0.33112832903862, "learning_rate": 6.752113444232343e-06, "loss": 0.0075, "step": 12380 }, { "epoch": 0.20273255338296653, "grad_norm": 0.13960298895835876, "learning_rate": 6.757567493864195e-06, "loss": 0.0155, "step": 12390 }, { "epoch": 0.2028961793340424, "grad_norm": 0.19399921596050262, "learning_rate": 6.763021543496046e-06, "loss": 0.0055, "step": 12400 }, { "epoch": 0.20305980528511822, "grad_norm": 0.6567419767379761, "learning_rate": 6.768475593127898e-06, "loss": 0.01, "step": 12410 }, { "epoch": 0.20322343123619407, "grad_norm": 1.0741888284683228, "learning_rate": 6.77392964275975e-06, "loss": 0.0084, "step": 12420 }, { "epoch": 0.2033870571872699, "grad_norm": 0.6558672189712524, "learning_rate": 6.7793836923916015e-06, "loss": 0.0084, "step": 12430 }, { "epoch": 0.20355068313834573, "grad_norm": 0.6887688040733337, "learning_rate": 6.784837742023453e-06, "loss": 0.0089, "step": 12440 }, { "epoch": 0.2037143090894216, "grad_norm": 0.5970726013183594, "learning_rate": 6.790291791655305e-06, "loss": 0.0084, "step": 12450 }, { "epoch": 0.20387793504049742, "grad_norm": 0.534174382686615, "learning_rate": 6.795745841287156e-06, "loss": 0.0099, "step": 12460 }, { "epoch": 0.20404156099157325, "grad_norm": 0.45492666959762573, "learning_rate": 6.801199890919008e-06, "loss": 0.0094, "step": 12470 }, { "epoch": 0.2042051869426491, "grad_norm": 0.6736888885498047, "learning_rate": 6.806653940550859e-06, "loss": 0.0083, "step": 12480 }, { "epoch": 0.20436881289372494, "grad_norm": 0.42749863862991333, "learning_rate": 6.812107990182712e-06, "loss": 0.0079, "step": 12490 }, { "epoch": 0.2045324388448008, "grad_norm": 0.21442770957946777, "learning_rate": 6.8175620398145626e-06, "loss": 0.0114, "step": 12500 }, { "epoch": 0.20469606479587663, "grad_norm": 0.36209985613822937, "learning_rate": 6.823016089446415e-06, "loss": 0.0097, "step": 12510 }, { "epoch": 0.20485969074695246, "grad_norm": 0.41109931468963623, "learning_rate": 6.828470139078266e-06, "loss": 0.0081, "step": 12520 }, { "epoch": 0.2050233166980283, "grad_norm": 0.30309098958969116, "learning_rate": 6.8339241887101185e-06, "loss": 0.0115, "step": 12530 }, { "epoch": 0.20518694264910414, "grad_norm": 0.34034353494644165, "learning_rate": 6.839378238341969e-06, "loss": 0.0083, "step": 12540 }, { "epoch": 0.20535056860018, "grad_norm": 0.3176431953907013, "learning_rate": 6.844832287973822e-06, "loss": 0.0058, "step": 12550 }, { "epoch": 0.20551419455125583, "grad_norm": 0.5689629912376404, "learning_rate": 6.850286337605673e-06, "loss": 0.0082, "step": 12560 }, { "epoch": 0.20567782050233166, "grad_norm": 0.702111005783081, "learning_rate": 6.855740387237524e-06, "loss": 0.0069, "step": 12570 }, { "epoch": 0.20584144645340752, "grad_norm": 0.5140200257301331, "learning_rate": 6.861194436869376e-06, "loss": 0.0113, "step": 12580 }, { "epoch": 0.20600507240448335, "grad_norm": 0.5493470430374146, "learning_rate": 6.866648486501227e-06, "loss": 0.0089, "step": 12590 }, { "epoch": 0.2061686983555592, "grad_norm": 0.6805787086486816, "learning_rate": 6.87210253613308e-06, "loss": 0.0091, "step": 12600 }, { "epoch": 0.20633232430663503, "grad_norm": 0.6136297583580017, "learning_rate": 6.8775565857649304e-06, "loss": 0.0078, "step": 12610 }, { "epoch": 0.20649595025771086, "grad_norm": 0.3267809748649597, "learning_rate": 6.883010635396783e-06, "loss": 0.008, "step": 12620 }, { "epoch": 0.20665957620878672, "grad_norm": 0.29001057147979736, "learning_rate": 6.888464685028634e-06, "loss": 0.0097, "step": 12630 }, { "epoch": 0.20682320215986255, "grad_norm": 0.689616322517395, "learning_rate": 6.893918734660486e-06, "loss": 0.0091, "step": 12640 }, { "epoch": 0.20698682811093838, "grad_norm": 0.37942513823509216, "learning_rate": 6.899372784292337e-06, "loss": 0.0115, "step": 12650 }, { "epoch": 0.20715045406201424, "grad_norm": 0.2932007908821106, "learning_rate": 6.90482683392419e-06, "loss": 0.0079, "step": 12660 }, { "epoch": 0.20731408001309007, "grad_norm": 0.36570748686790466, "learning_rate": 6.910280883556041e-06, "loss": 0.0105, "step": 12670 }, { "epoch": 0.20747770596416593, "grad_norm": 0.5460178852081299, "learning_rate": 6.915734933187893e-06, "loss": 0.0116, "step": 12680 }, { "epoch": 0.20764133191524176, "grad_norm": 0.2802366018295288, "learning_rate": 6.921188982819744e-06, "loss": 0.0066, "step": 12690 }, { "epoch": 0.20780495786631759, "grad_norm": 0.4743119478225708, "learning_rate": 6.926643032451596e-06, "loss": 0.0084, "step": 12700 }, { "epoch": 0.20796858381739344, "grad_norm": 0.5068359375, "learning_rate": 6.9320970820834474e-06, "loss": 0.0091, "step": 12710 }, { "epoch": 0.20813220976846927, "grad_norm": 0.21520860493183136, "learning_rate": 6.937551131715299e-06, "loss": 0.0056, "step": 12720 }, { "epoch": 0.20829583571954513, "grad_norm": 0.28961360454559326, "learning_rate": 6.943005181347151e-06, "loss": 0.0077, "step": 12730 }, { "epoch": 0.20845946167062096, "grad_norm": 0.5005819201469421, "learning_rate": 6.9484592309790025e-06, "loss": 0.0107, "step": 12740 }, { "epoch": 0.2086230876216968, "grad_norm": 0.31138646602630615, "learning_rate": 6.953913280610854e-06, "loss": 0.0086, "step": 12750 }, { "epoch": 0.20878671357277265, "grad_norm": 0.6689060926437378, "learning_rate": 6.959367330242706e-06, "loss": 0.0103, "step": 12760 }, { "epoch": 0.20895033952384848, "grad_norm": 0.5686567425727844, "learning_rate": 6.964821379874558e-06, "loss": 0.011, "step": 12770 }, { "epoch": 0.20911396547492433, "grad_norm": 0.18714195489883423, "learning_rate": 6.970275429506409e-06, "loss": 0.0064, "step": 12780 }, { "epoch": 0.20927759142600016, "grad_norm": 0.5293023586273193, "learning_rate": 6.975729479138261e-06, "loss": 0.011, "step": 12790 }, { "epoch": 0.209441217377076, "grad_norm": 0.29385289549827576, "learning_rate": 6.981183528770113e-06, "loss": 0.0114, "step": 12800 }, { "epoch": 0.20960484332815185, "grad_norm": 0.31326308846473694, "learning_rate": 6.986637578401964e-06, "loss": 0.0074, "step": 12810 }, { "epoch": 0.20976846927922768, "grad_norm": 0.7566695213317871, "learning_rate": 6.992091628033816e-06, "loss": 0.0079, "step": 12820 }, { "epoch": 0.20993209523030354, "grad_norm": 0.267514169216156, "learning_rate": 6.997545677665667e-06, "loss": 0.0082, "step": 12830 }, { "epoch": 0.21009572118137937, "grad_norm": 0.0504985935986042, "learning_rate": 7.0029997272975196e-06, "loss": 0.0091, "step": 12840 }, { "epoch": 0.2102593471324552, "grad_norm": 0.4737120270729065, "learning_rate": 7.00845377692937e-06, "loss": 0.0082, "step": 12850 }, { "epoch": 0.21042297308353106, "grad_norm": 0.23650555312633514, "learning_rate": 7.013907826561223e-06, "loss": 0.0076, "step": 12860 }, { "epoch": 0.21058659903460689, "grad_norm": 0.41000521183013916, "learning_rate": 7.019361876193074e-06, "loss": 0.0051, "step": 12870 }, { "epoch": 0.21075022498568272, "grad_norm": 0.18648408353328705, "learning_rate": 7.024815925824926e-06, "loss": 0.0059, "step": 12880 }, { "epoch": 0.21091385093675857, "grad_norm": 0.34689322113990784, "learning_rate": 7.030269975456777e-06, "loss": 0.0071, "step": 12890 }, { "epoch": 0.2110774768878344, "grad_norm": 0.7265233397483826, "learning_rate": 7.03572402508863e-06, "loss": 0.0092, "step": 12900 }, { "epoch": 0.21124110283891026, "grad_norm": 0.4350171685218811, "learning_rate": 7.041178074720481e-06, "loss": 0.0096, "step": 12910 }, { "epoch": 0.2114047287899861, "grad_norm": 0.5271282196044922, "learning_rate": 7.0466321243523315e-06, "loss": 0.0084, "step": 12920 }, { "epoch": 0.21156835474106192, "grad_norm": 0.6984720230102539, "learning_rate": 7.052086173984184e-06, "loss": 0.0063, "step": 12930 }, { "epoch": 0.21173198069213778, "grad_norm": 0.6105838418006897, "learning_rate": 7.057540223616035e-06, "loss": 0.0074, "step": 12940 }, { "epoch": 0.2118956066432136, "grad_norm": 0.3719397485256195, "learning_rate": 7.062994273247887e-06, "loss": 0.0065, "step": 12950 }, { "epoch": 0.21205923259428947, "grad_norm": 0.35808417201042175, "learning_rate": 7.068448322879738e-06, "loss": 0.0087, "step": 12960 }, { "epoch": 0.2122228585453653, "grad_norm": 0.4282531142234802, "learning_rate": 7.073902372511591e-06, "loss": 0.0077, "step": 12970 }, { "epoch": 0.21238648449644112, "grad_norm": 0.5499398708343506, "learning_rate": 7.079356422143442e-06, "loss": 0.0098, "step": 12980 }, { "epoch": 0.21255011044751698, "grad_norm": 0.5832967758178711, "learning_rate": 7.084810471775294e-06, "loss": 0.0113, "step": 12990 }, { "epoch": 0.2127137363985928, "grad_norm": 0.6456331014633179, "learning_rate": 7.090264521407145e-06, "loss": 0.0067, "step": 13000 }, { "epoch": 0.21287736234966867, "grad_norm": 0.42318278551101685, "learning_rate": 7.095718571038998e-06, "loss": 0.0045, "step": 13010 }, { "epoch": 0.2130409883007445, "grad_norm": 0.2748035788536072, "learning_rate": 7.1011726206708485e-06, "loss": 0.0059, "step": 13020 }, { "epoch": 0.21320461425182033, "grad_norm": 0.4589381814002991, "learning_rate": 7.1066266703027e-06, "loss": 0.0091, "step": 13030 }, { "epoch": 0.2133682402028962, "grad_norm": 0.5025801658630371, "learning_rate": 7.112080719934552e-06, "loss": 0.0093, "step": 13040 }, { "epoch": 0.21353186615397202, "grad_norm": 1.3389508724212646, "learning_rate": 7.117534769566404e-06, "loss": 0.009, "step": 13050 }, { "epoch": 0.21369549210504787, "grad_norm": 0.28795158863067627, "learning_rate": 7.122988819198255e-06, "loss": 0.0075, "step": 13060 }, { "epoch": 0.2138591180561237, "grad_norm": 0.628476619720459, "learning_rate": 7.128442868830107e-06, "loss": 0.0079, "step": 13070 }, { "epoch": 0.21402274400719953, "grad_norm": 1.038084626197815, "learning_rate": 7.133896918461959e-06, "loss": 0.0093, "step": 13080 }, { "epoch": 0.2141863699582754, "grad_norm": 0.44432532787323, "learning_rate": 7.13935096809381e-06, "loss": 0.0071, "step": 13090 }, { "epoch": 0.21434999590935122, "grad_norm": 0.37490004301071167, "learning_rate": 7.144805017725662e-06, "loss": 0.0101, "step": 13100 }, { "epoch": 0.21451362186042705, "grad_norm": 0.3405896723270416, "learning_rate": 7.150259067357514e-06, "loss": 0.0065, "step": 13110 }, { "epoch": 0.2146772478115029, "grad_norm": 0.3738662302494049, "learning_rate": 7.1557131169893655e-06, "loss": 0.0065, "step": 13120 }, { "epoch": 0.21484087376257874, "grad_norm": 0.5956369638442993, "learning_rate": 7.161167166621217e-06, "loss": 0.0088, "step": 13130 }, { "epoch": 0.2150044997136546, "grad_norm": 0.3642753064632416, "learning_rate": 7.166621216253068e-06, "loss": 0.0084, "step": 13140 }, { "epoch": 0.21516812566473043, "grad_norm": 0.3063509166240692, "learning_rate": 7.172075265884921e-06, "loss": 0.0093, "step": 13150 }, { "epoch": 0.21533175161580626, "grad_norm": 0.26861312985420227, "learning_rate": 7.1775293155167714e-06, "loss": 0.0088, "step": 13160 }, { "epoch": 0.2154953775668821, "grad_norm": 0.5493288636207581, "learning_rate": 7.182983365148624e-06, "loss": 0.009, "step": 13170 }, { "epoch": 0.21565900351795794, "grad_norm": 0.47742828726768494, "learning_rate": 7.188437414780475e-06, "loss": 0.0104, "step": 13180 }, { "epoch": 0.2158226294690338, "grad_norm": 0.36240002512931824, "learning_rate": 7.193891464412327e-06, "loss": 0.0112, "step": 13190 }, { "epoch": 0.21598625542010963, "grad_norm": 0.48867344856262207, "learning_rate": 7.199345514044178e-06, "loss": 0.0082, "step": 13200 }, { "epoch": 0.21614988137118546, "grad_norm": 0.41958922147750854, "learning_rate": 7.204799563676031e-06, "loss": 0.0087, "step": 13210 }, { "epoch": 0.21631350732226132, "grad_norm": 0.6330770254135132, "learning_rate": 7.210253613307882e-06, "loss": 0.0077, "step": 13220 }, { "epoch": 0.21647713327333715, "grad_norm": 0.342072993516922, "learning_rate": 7.215707662939734e-06, "loss": 0.0092, "step": 13230 }, { "epoch": 0.216640759224413, "grad_norm": 0.16671772301197052, "learning_rate": 7.221161712571585e-06, "loss": 0.0069, "step": 13240 }, { "epoch": 0.21680438517548883, "grad_norm": 0.6191067099571228, "learning_rate": 7.226615762203438e-06, "loss": 0.0091, "step": 13250 }, { "epoch": 0.21696801112656466, "grad_norm": 0.5939600467681885, "learning_rate": 7.2320698118352885e-06, "loss": 0.01, "step": 13260 }, { "epoch": 0.21713163707764052, "grad_norm": 0.3789665400981903, "learning_rate": 7.237523861467139e-06, "loss": 0.0077, "step": 13270 }, { "epoch": 0.21729526302871635, "grad_norm": 0.42645567655563354, "learning_rate": 7.242977911098992e-06, "loss": 0.007, "step": 13280 }, { "epoch": 0.2174588889797922, "grad_norm": 0.3949757516384125, "learning_rate": 7.248431960730843e-06, "loss": 0.0077, "step": 13290 }, { "epoch": 0.21762251493086804, "grad_norm": 0.311788409948349, "learning_rate": 7.253886010362695e-06, "loss": 0.0076, "step": 13300 }, { "epoch": 0.21778614088194387, "grad_norm": 0.7898632287979126, "learning_rate": 7.259340059994546e-06, "loss": 0.0088, "step": 13310 }, { "epoch": 0.21794976683301973, "grad_norm": 0.5830650329589844, "learning_rate": 7.264794109626399e-06, "loss": 0.0058, "step": 13320 }, { "epoch": 0.21811339278409556, "grad_norm": 0.5990749597549438, "learning_rate": 7.2702481592582495e-06, "loss": 0.0097, "step": 13330 }, { "epoch": 0.21827701873517139, "grad_norm": 0.6458852291107178, "learning_rate": 7.275702208890102e-06, "loss": 0.0073, "step": 13340 }, { "epoch": 0.21844064468624724, "grad_norm": 0.35592833161354065, "learning_rate": 7.281156258521953e-06, "loss": 0.0103, "step": 13350 }, { "epoch": 0.21860427063732307, "grad_norm": 0.5433804988861084, "learning_rate": 7.2866103081538055e-06, "loss": 0.0145, "step": 13360 }, { "epoch": 0.21876789658839893, "grad_norm": 0.6583028435707092, "learning_rate": 7.292064357785656e-06, "loss": 0.0107, "step": 13370 }, { "epoch": 0.21893152253947476, "grad_norm": 0.34199726581573486, "learning_rate": 7.297518407417508e-06, "loss": 0.0067, "step": 13380 }, { "epoch": 0.2190951484905506, "grad_norm": 0.611171543598175, "learning_rate": 7.30297245704936e-06, "loss": 0.0085, "step": 13390 }, { "epoch": 0.21925877444162645, "grad_norm": 0.3668595850467682, "learning_rate": 7.308426506681211e-06, "loss": 0.0089, "step": 13400 }, { "epoch": 0.21942240039270228, "grad_norm": 0.6533665657043457, "learning_rate": 7.313880556313063e-06, "loss": 0.0101, "step": 13410 }, { "epoch": 0.21958602634377813, "grad_norm": 0.24867713451385498, "learning_rate": 7.319334605944915e-06, "loss": 0.0051, "step": 13420 }, { "epoch": 0.21974965229485396, "grad_norm": 0.5480397939682007, "learning_rate": 7.3247886555767665e-06, "loss": 0.0134, "step": 13430 }, { "epoch": 0.2199132782459298, "grad_norm": 0.2016737312078476, "learning_rate": 7.330242705208618e-06, "loss": 0.0109, "step": 13440 }, { "epoch": 0.22007690419700565, "grad_norm": 0.28459542989730835, "learning_rate": 7.33569675484047e-06, "loss": 0.0079, "step": 13450 }, { "epoch": 0.22024053014808148, "grad_norm": 0.3175831735134125, "learning_rate": 7.341150804472322e-06, "loss": 0.0059, "step": 13460 }, { "epoch": 0.22040415609915734, "grad_norm": 0.5060707926750183, "learning_rate": 7.346604854104173e-06, "loss": 0.0086, "step": 13470 }, { "epoch": 0.22056778205023317, "grad_norm": 0.4972872734069824, "learning_rate": 7.352058903736024e-06, "loss": 0.0069, "step": 13480 }, { "epoch": 0.220731408001309, "grad_norm": 0.3736521005630493, "learning_rate": 7.357512953367876e-06, "loss": 0.0081, "step": 13490 }, { "epoch": 0.22089503395238486, "grad_norm": 0.3874336779117584, "learning_rate": 7.362967002999728e-06, "loss": 0.0107, "step": 13500 }, { "epoch": 0.22105865990346069, "grad_norm": 0.21691761910915375, "learning_rate": 7.368421052631579e-06, "loss": 0.0049, "step": 13510 }, { "epoch": 0.22122228585453654, "grad_norm": 0.42537644505500793, "learning_rate": 7.373875102263431e-06, "loss": 0.0093, "step": 13520 }, { "epoch": 0.22138591180561237, "grad_norm": 1.0397299528121948, "learning_rate": 7.379329151895283e-06, "loss": 0.0101, "step": 13530 }, { "epoch": 0.2215495377566882, "grad_norm": 0.55242919921875, "learning_rate": 7.384783201527134e-06, "loss": 0.0086, "step": 13540 }, { "epoch": 0.22171316370776406, "grad_norm": 0.4072169065475464, "learning_rate": 7.390237251158986e-06, "loss": 0.0055, "step": 13550 }, { "epoch": 0.2218767896588399, "grad_norm": 0.198331817984581, "learning_rate": 7.395691300790838e-06, "loss": 0.0076, "step": 13560 }, { "epoch": 0.22204041560991572, "grad_norm": 0.6559755802154541, "learning_rate": 7.4011453504226895e-06, "loss": 0.0123, "step": 13570 }, { "epoch": 0.22220404156099158, "grad_norm": 0.26538601517677307, "learning_rate": 7.406599400054541e-06, "loss": 0.0069, "step": 13580 }, { "epoch": 0.2223676675120674, "grad_norm": 0.5764954686164856, "learning_rate": 7.412053449686393e-06, "loss": 0.0071, "step": 13590 }, { "epoch": 0.22253129346314326, "grad_norm": 0.7161535024642944, "learning_rate": 7.417507499318244e-06, "loss": 0.0089, "step": 13600 }, { "epoch": 0.2226949194142191, "grad_norm": 0.49198058247566223, "learning_rate": 7.422961548950096e-06, "loss": 0.0089, "step": 13610 }, { "epoch": 0.22285854536529492, "grad_norm": 0.2610313594341278, "learning_rate": 7.428415598581947e-06, "loss": 0.0092, "step": 13620 }, { "epoch": 0.22302217131637078, "grad_norm": 0.35409772396087646, "learning_rate": 7.4338696482138e-06, "loss": 0.0081, "step": 13630 }, { "epoch": 0.2231857972674466, "grad_norm": 0.5974505543708801, "learning_rate": 7.4393236978456505e-06, "loss": 0.0067, "step": 13640 }, { "epoch": 0.22334942321852247, "grad_norm": 0.29375317692756653, "learning_rate": 7.444777747477503e-06, "loss": 0.005, "step": 13650 }, { "epoch": 0.2235130491695983, "grad_norm": 0.35343772172927856, "learning_rate": 7.450231797109354e-06, "loss": 0.0075, "step": 13660 }, { "epoch": 0.22367667512067413, "grad_norm": 0.4877508878707886, "learning_rate": 7.4556858467412065e-06, "loss": 0.007, "step": 13670 }, { "epoch": 0.22384030107175, "grad_norm": 0.38897445797920227, "learning_rate": 7.461139896373057e-06, "loss": 0.0088, "step": 13680 }, { "epoch": 0.22400392702282582, "grad_norm": 0.4827202260494232, "learning_rate": 7.46659394600491e-06, "loss": 0.0064, "step": 13690 }, { "epoch": 0.22416755297390167, "grad_norm": 0.4307715594768524, "learning_rate": 7.472047995636761e-06, "loss": 0.0086, "step": 13700 }, { "epoch": 0.2243311789249775, "grad_norm": 0.4649055302143097, "learning_rate": 7.477502045268612e-06, "loss": 0.0056, "step": 13710 }, { "epoch": 0.22449480487605333, "grad_norm": 0.565535306930542, "learning_rate": 7.482956094900464e-06, "loss": 0.0063, "step": 13720 }, { "epoch": 0.2246584308271292, "grad_norm": 0.3925972878932953, "learning_rate": 7.488410144532315e-06, "loss": 0.0097, "step": 13730 }, { "epoch": 0.22482205677820502, "grad_norm": 0.38579362630844116, "learning_rate": 7.4938641941641676e-06, "loss": 0.0086, "step": 13740 }, { "epoch": 0.22498568272928085, "grad_norm": 0.34197136759757996, "learning_rate": 7.499318243796018e-06, "loss": 0.007, "step": 13750 }, { "epoch": 0.2251493086803567, "grad_norm": 0.42359283566474915, "learning_rate": 7.504772293427871e-06, "loss": 0.0071, "step": 13760 }, { "epoch": 0.22531293463143254, "grad_norm": 0.5756303071975708, "learning_rate": 7.510226343059722e-06, "loss": 0.0147, "step": 13770 }, { "epoch": 0.2254765605825084, "grad_norm": 0.5594773888587952, "learning_rate": 7.515680392691574e-06, "loss": 0.0072, "step": 13780 }, { "epoch": 0.22564018653358422, "grad_norm": 0.5141112804412842, "learning_rate": 7.521134442323425e-06, "loss": 0.0093, "step": 13790 }, { "epoch": 0.22580381248466005, "grad_norm": 0.524696409702301, "learning_rate": 7.526588491955278e-06, "loss": 0.0083, "step": 13800 }, { "epoch": 0.2259674384357359, "grad_norm": 0.2597118020057678, "learning_rate": 7.532042541587129e-06, "loss": 0.0073, "step": 13810 }, { "epoch": 0.22613106438681174, "grad_norm": 0.43524059653282166, "learning_rate": 7.537496591218981e-06, "loss": 0.0092, "step": 13820 }, { "epoch": 0.2262946903378876, "grad_norm": 0.38005179166793823, "learning_rate": 7.542950640850832e-06, "loss": 0.0079, "step": 13830 }, { "epoch": 0.22645831628896343, "grad_norm": 0.33128640055656433, "learning_rate": 7.548404690482684e-06, "loss": 0.0106, "step": 13840 }, { "epoch": 0.22662194224003926, "grad_norm": 0.7541969418525696, "learning_rate": 7.553858740114535e-06, "loss": 0.0093, "step": 13850 }, { "epoch": 0.22678556819111512, "grad_norm": 0.5331867933273315, "learning_rate": 7.559312789746387e-06, "loss": 0.0077, "step": 13860 }, { "epoch": 0.22694919414219095, "grad_norm": 0.47539660334587097, "learning_rate": 7.564766839378239e-06, "loss": 0.0108, "step": 13870 }, { "epoch": 0.2271128200932668, "grad_norm": 0.40205758810043335, "learning_rate": 7.5702208890100905e-06, "loss": 0.0098, "step": 13880 }, { "epoch": 0.22727644604434263, "grad_norm": 0.25452202558517456, "learning_rate": 7.575674938641942e-06, "loss": 0.0083, "step": 13890 }, { "epoch": 0.22744007199541846, "grad_norm": 0.24174167215824127, "learning_rate": 7.581128988273794e-06, "loss": 0.0095, "step": 13900 }, { "epoch": 0.22760369794649432, "grad_norm": 0.35427331924438477, "learning_rate": 7.586583037905646e-06, "loss": 0.009, "step": 13910 }, { "epoch": 0.22776732389757015, "grad_norm": 0.3100494146347046, "learning_rate": 7.592037087537497e-06, "loss": 0.0079, "step": 13920 }, { "epoch": 0.227930949848646, "grad_norm": 0.3048965632915497, "learning_rate": 7.597491137169349e-06, "loss": 0.0096, "step": 13930 }, { "epoch": 0.22809457579972184, "grad_norm": 0.21039795875549316, "learning_rate": 7.602945186801201e-06, "loss": 0.0065, "step": 13940 }, { "epoch": 0.22825820175079767, "grad_norm": 0.355277955532074, "learning_rate": 7.608399236433052e-06, "loss": 0.0057, "step": 13950 }, { "epoch": 0.22842182770187353, "grad_norm": 0.47577816247940063, "learning_rate": 7.613853286064904e-06, "loss": 0.0068, "step": 13960 }, { "epoch": 0.22858545365294936, "grad_norm": 0.5222656726837158, "learning_rate": 7.619307335696755e-06, "loss": 0.0076, "step": 13970 }, { "epoch": 0.22874907960402519, "grad_norm": 0.8510044813156128, "learning_rate": 7.6247613853286075e-06, "loss": 0.0079, "step": 13980 }, { "epoch": 0.22891270555510104, "grad_norm": 0.3962283432483673, "learning_rate": 7.63021543496046e-06, "loss": 0.0096, "step": 13990 }, { "epoch": 0.22907633150617687, "grad_norm": 0.23457308113574982, "learning_rate": 7.63566948459231e-06, "loss": 0.0086, "step": 14000 }, { "epoch": 0.22923995745725273, "grad_norm": 0.7131142020225525, "learning_rate": 7.641123534224163e-06, "loss": 0.0075, "step": 14010 }, { "epoch": 0.22940358340832856, "grad_norm": 0.6043888330459595, "learning_rate": 7.646577583856013e-06, "loss": 0.0113, "step": 14020 }, { "epoch": 0.2295672093594044, "grad_norm": 0.40674737095832825, "learning_rate": 7.652031633487866e-06, "loss": 0.0045, "step": 14030 }, { "epoch": 0.22973083531048025, "grad_norm": 0.49257761240005493, "learning_rate": 7.657485683119717e-06, "loss": 0.009, "step": 14040 }, { "epoch": 0.22989446126155608, "grad_norm": 0.5192036628723145, "learning_rate": 7.66293973275157e-06, "loss": 0.0083, "step": 14050 }, { "epoch": 0.23005808721263193, "grad_norm": 0.5333401560783386, "learning_rate": 7.66839378238342e-06, "loss": 0.0081, "step": 14060 }, { "epoch": 0.23022171316370776, "grad_norm": 0.4272986650466919, "learning_rate": 7.673847832015273e-06, "loss": 0.007, "step": 14070 }, { "epoch": 0.2303853391147836, "grad_norm": 0.807560920715332, "learning_rate": 7.679301881647124e-06, "loss": 0.0139, "step": 14080 }, { "epoch": 0.23054896506585945, "grad_norm": 0.4795917570590973, "learning_rate": 7.684755931278976e-06, "loss": 0.0072, "step": 14090 }, { "epoch": 0.23071259101693528, "grad_norm": 0.6201799511909485, "learning_rate": 7.690209980910827e-06, "loss": 0.0078, "step": 14100 }, { "epoch": 0.23087621696801114, "grad_norm": 0.5711755156517029, "learning_rate": 7.69566403054268e-06, "loss": 0.0053, "step": 14110 }, { "epoch": 0.23103984291908697, "grad_norm": 0.50462406873703, "learning_rate": 7.70111808017453e-06, "loss": 0.0078, "step": 14120 }, { "epoch": 0.2312034688701628, "grad_norm": 0.6455571055412292, "learning_rate": 7.706572129806383e-06, "loss": 0.0101, "step": 14130 }, { "epoch": 0.23136709482123866, "grad_norm": 0.2799656093120575, "learning_rate": 7.712026179438234e-06, "loss": 0.0088, "step": 14140 }, { "epoch": 0.23153072077231449, "grad_norm": 0.13593356311321259, "learning_rate": 7.717480229070086e-06, "loss": 0.0103, "step": 14150 }, { "epoch": 0.23169434672339034, "grad_norm": 0.3700540065765381, "learning_rate": 7.722934278701937e-06, "loss": 0.0065, "step": 14160 }, { "epoch": 0.23185797267446617, "grad_norm": 0.3780127465724945, "learning_rate": 7.728388328333788e-06, "loss": 0.0071, "step": 14170 }, { "epoch": 0.232021598625542, "grad_norm": 0.779640257358551, "learning_rate": 7.73384237796564e-06, "loss": 0.0114, "step": 14180 }, { "epoch": 0.23218522457661786, "grad_norm": 0.793234646320343, "learning_rate": 7.739296427597492e-06, "loss": 0.0098, "step": 14190 }, { "epoch": 0.2323488505276937, "grad_norm": 0.49452006816864014, "learning_rate": 7.744750477229344e-06, "loss": 0.0088, "step": 14200 }, { "epoch": 0.23251247647876952, "grad_norm": 0.4760637581348419, "learning_rate": 7.750204526861195e-06, "loss": 0.0091, "step": 14210 }, { "epoch": 0.23267610242984538, "grad_norm": 0.5735825300216675, "learning_rate": 7.755658576493048e-06, "loss": 0.0085, "step": 14220 }, { "epoch": 0.2328397283809212, "grad_norm": 0.4971274733543396, "learning_rate": 7.761112626124898e-06, "loss": 0.0112, "step": 14230 }, { "epoch": 0.23300335433199706, "grad_norm": 0.7157065868377686, "learning_rate": 7.766566675756751e-06, "loss": 0.0076, "step": 14240 }, { "epoch": 0.2331669802830729, "grad_norm": 0.34946438670158386, "learning_rate": 7.772020725388602e-06, "loss": 0.0095, "step": 14250 }, { "epoch": 0.23333060623414872, "grad_norm": 0.6798130869865417, "learning_rate": 7.777474775020454e-06, "loss": 0.0071, "step": 14260 }, { "epoch": 0.23349423218522458, "grad_norm": 0.275322288274765, "learning_rate": 7.782928824652305e-06, "loss": 0.0096, "step": 14270 }, { "epoch": 0.2336578581363004, "grad_norm": 0.4094807207584381, "learning_rate": 7.788382874284156e-06, "loss": 0.0062, "step": 14280 }, { "epoch": 0.23382148408737627, "grad_norm": 0.33121684193611145, "learning_rate": 7.793836923916009e-06, "loss": 0.0065, "step": 14290 }, { "epoch": 0.2339851100384521, "grad_norm": 0.6018854975700378, "learning_rate": 7.79929097354786e-06, "loss": 0.0095, "step": 14300 }, { "epoch": 0.23414873598952793, "grad_norm": 0.51739901304245, "learning_rate": 7.804745023179712e-06, "loss": 0.0074, "step": 14310 }, { "epoch": 0.23431236194060379, "grad_norm": 0.8751764297485352, "learning_rate": 7.810199072811563e-06, "loss": 0.013, "step": 14320 }, { "epoch": 0.23447598789167962, "grad_norm": 0.36460477113723755, "learning_rate": 7.815653122443415e-06, "loss": 0.0085, "step": 14330 }, { "epoch": 0.23463961384275547, "grad_norm": 0.410590261220932, "learning_rate": 7.821107172075266e-06, "loss": 0.0077, "step": 14340 }, { "epoch": 0.2348032397938313, "grad_norm": 0.2005511224269867, "learning_rate": 7.826561221707119e-06, "loss": 0.0105, "step": 14350 }, { "epoch": 0.23496686574490713, "grad_norm": 0.486274391412735, "learning_rate": 7.83201527133897e-06, "loss": 0.0104, "step": 14360 }, { "epoch": 0.235130491695983, "grad_norm": 0.3867475986480713, "learning_rate": 7.837469320970822e-06, "loss": 0.0082, "step": 14370 }, { "epoch": 0.23529411764705882, "grad_norm": 0.38502442836761475, "learning_rate": 7.842923370602673e-06, "loss": 0.0099, "step": 14380 }, { "epoch": 0.23545774359813468, "grad_norm": 0.29765263199806213, "learning_rate": 7.848377420234526e-06, "loss": 0.0112, "step": 14390 }, { "epoch": 0.2356213695492105, "grad_norm": 0.5395899415016174, "learning_rate": 7.853831469866376e-06, "loss": 0.0108, "step": 14400 }, { "epoch": 0.23578499550028634, "grad_norm": 0.5860231518745422, "learning_rate": 7.859285519498227e-06, "loss": 0.0086, "step": 14410 }, { "epoch": 0.2359486214513622, "grad_norm": 0.38418200612068176, "learning_rate": 7.86473956913008e-06, "loss": 0.0062, "step": 14420 }, { "epoch": 0.23611224740243802, "grad_norm": 0.5440731048583984, "learning_rate": 7.87019361876193e-06, "loss": 0.0088, "step": 14430 }, { "epoch": 0.23627587335351385, "grad_norm": 0.43382129073143005, "learning_rate": 7.875647668393783e-06, "loss": 0.0082, "step": 14440 }, { "epoch": 0.2364394993045897, "grad_norm": 0.2749595046043396, "learning_rate": 7.881101718025634e-06, "loss": 0.0111, "step": 14450 }, { "epoch": 0.23660312525566554, "grad_norm": 0.1973612755537033, "learning_rate": 7.886555767657487e-06, "loss": 0.006, "step": 14460 }, { "epoch": 0.2367667512067414, "grad_norm": 0.913813054561615, "learning_rate": 7.892009817289337e-06, "loss": 0.0076, "step": 14470 }, { "epoch": 0.23693037715781723, "grad_norm": 0.4277299642562866, "learning_rate": 7.89746386692119e-06, "loss": 0.0061, "step": 14480 }, { "epoch": 0.23709400310889306, "grad_norm": 0.62294602394104, "learning_rate": 7.902917916553041e-06, "loss": 0.0062, "step": 14490 }, { "epoch": 0.23725762905996892, "grad_norm": 0.2680823504924774, "learning_rate": 7.908371966184893e-06, "loss": 0.0051, "step": 14500 }, { "epoch": 0.23742125501104475, "grad_norm": 0.11393973231315613, "learning_rate": 7.913826015816744e-06, "loss": 0.0068, "step": 14510 }, { "epoch": 0.2375848809621206, "grad_norm": 0.36629122495651245, "learning_rate": 7.919280065448595e-06, "loss": 0.0067, "step": 14520 }, { "epoch": 0.23774850691319643, "grad_norm": 0.4813237190246582, "learning_rate": 7.924734115080448e-06, "loss": 0.0106, "step": 14530 }, { "epoch": 0.23791213286427226, "grad_norm": 0.46408146619796753, "learning_rate": 7.930188164712299e-06, "loss": 0.0056, "step": 14540 }, { "epoch": 0.23807575881534812, "grad_norm": 0.615135133266449, "learning_rate": 7.935642214344151e-06, "loss": 0.0073, "step": 14550 }, { "epoch": 0.23823938476642395, "grad_norm": 0.5712845325469971, "learning_rate": 7.941096263976002e-06, "loss": 0.0061, "step": 14560 }, { "epoch": 0.2384030107174998, "grad_norm": 0.36727845668792725, "learning_rate": 7.946550313607854e-06, "loss": 0.0066, "step": 14570 }, { "epoch": 0.23856663666857564, "grad_norm": 0.2183261513710022, "learning_rate": 7.952004363239705e-06, "loss": 0.0072, "step": 14580 }, { "epoch": 0.23873026261965147, "grad_norm": 0.4431883990764618, "learning_rate": 7.957458412871558e-06, "loss": 0.0109, "step": 14590 }, { "epoch": 0.23889388857072733, "grad_norm": 0.1619562953710556, "learning_rate": 7.962912462503409e-06, "loss": 0.0051, "step": 14600 }, { "epoch": 0.23905751452180315, "grad_norm": 0.6055853962898254, "learning_rate": 7.968366512135261e-06, "loss": 0.0103, "step": 14610 }, { "epoch": 0.23922114047287898, "grad_norm": 0.6191697120666504, "learning_rate": 7.973820561767112e-06, "loss": 0.0072, "step": 14620 }, { "epoch": 0.23938476642395484, "grad_norm": 0.3025716245174408, "learning_rate": 7.979274611398965e-06, "loss": 0.0066, "step": 14630 }, { "epoch": 0.23954839237503067, "grad_norm": 0.5540299415588379, "learning_rate": 7.984728661030816e-06, "loss": 0.0076, "step": 14640 }, { "epoch": 0.23971201832610653, "grad_norm": 0.7998112440109253, "learning_rate": 7.990182710662668e-06, "loss": 0.0078, "step": 14650 }, { "epoch": 0.23987564427718236, "grad_norm": 0.31662535667419434, "learning_rate": 7.995636760294519e-06, "loss": 0.0105, "step": 14660 }, { "epoch": 0.2400392702282582, "grad_norm": 0.3014910817146301, "learning_rate": 8.001090809926372e-06, "loss": 0.0086, "step": 14670 }, { "epoch": 0.24020289617933405, "grad_norm": 0.4394930303096771, "learning_rate": 8.006544859558222e-06, "loss": 0.0061, "step": 14680 }, { "epoch": 0.24036652213040988, "grad_norm": 0.19743195176124573, "learning_rate": 8.011998909190075e-06, "loss": 0.0043, "step": 14690 }, { "epoch": 0.24053014808148573, "grad_norm": 0.09330590814352036, "learning_rate": 8.017452958821926e-06, "loss": 0.006, "step": 14700 }, { "epoch": 0.24069377403256156, "grad_norm": 0.7159872651100159, "learning_rate": 8.022907008453777e-06, "loss": 0.0095, "step": 14710 }, { "epoch": 0.2408573999836374, "grad_norm": 0.6109306812286377, "learning_rate": 8.02836105808563e-06, "loss": 0.0066, "step": 14720 }, { "epoch": 0.24102102593471325, "grad_norm": 0.1929505616426468, "learning_rate": 8.03381510771748e-06, "loss": 0.0078, "step": 14730 }, { "epoch": 0.24118465188578908, "grad_norm": 0.23274843394756317, "learning_rate": 8.039269157349333e-06, "loss": 0.0065, "step": 14740 }, { "epoch": 0.24134827783686494, "grad_norm": 0.5227785706520081, "learning_rate": 8.044723206981183e-06, "loss": 0.0085, "step": 14750 }, { "epoch": 0.24151190378794077, "grad_norm": 1.1571177244186401, "learning_rate": 8.050177256613036e-06, "loss": 0.0095, "step": 14760 }, { "epoch": 0.2416755297390166, "grad_norm": 0.34047138690948486, "learning_rate": 8.055631306244887e-06, "loss": 0.008, "step": 14770 }, { "epoch": 0.24183915569009246, "grad_norm": 0.25412407517433167, "learning_rate": 8.06108535587674e-06, "loss": 0.0061, "step": 14780 }, { "epoch": 0.24200278164116829, "grad_norm": 0.3473801612854004, "learning_rate": 8.06653940550859e-06, "loss": 0.0052, "step": 14790 }, { "epoch": 0.24216640759224414, "grad_norm": 0.22174876928329468, "learning_rate": 8.071993455140443e-06, "loss": 0.0097, "step": 14800 }, { "epoch": 0.24233003354331997, "grad_norm": 0.6153866052627563, "learning_rate": 8.077447504772294e-06, "loss": 0.0079, "step": 14810 }, { "epoch": 0.2424936594943958, "grad_norm": 0.481741726398468, "learning_rate": 8.082901554404146e-06, "loss": 0.0086, "step": 14820 }, { "epoch": 0.24265728544547166, "grad_norm": 0.42188334465026855, "learning_rate": 8.088355604035997e-06, "loss": 0.0113, "step": 14830 }, { "epoch": 0.2428209113965475, "grad_norm": 0.3054935932159424, "learning_rate": 8.09380965366785e-06, "loss": 0.0082, "step": 14840 }, { "epoch": 0.24298453734762332, "grad_norm": 0.23599357903003693, "learning_rate": 8.0992637032997e-06, "loss": 0.0078, "step": 14850 }, { "epoch": 0.24314816329869918, "grad_norm": 0.38109397888183594, "learning_rate": 8.104717752931553e-06, "loss": 0.0092, "step": 14860 }, { "epoch": 0.243311789249775, "grad_norm": 0.4762151837348938, "learning_rate": 8.110171802563404e-06, "loss": 0.006, "step": 14870 }, { "epoch": 0.24347541520085086, "grad_norm": 0.5742477178573608, "learning_rate": 8.115625852195256e-06, "loss": 0.007, "step": 14880 }, { "epoch": 0.2436390411519267, "grad_norm": 0.34737327694892883, "learning_rate": 8.121079901827107e-06, "loss": 0.0059, "step": 14890 }, { "epoch": 0.24380266710300252, "grad_norm": 0.27185627818107605, "learning_rate": 8.12653395145896e-06, "loss": 0.0051, "step": 14900 }, { "epoch": 0.24396629305407838, "grad_norm": 0.2783239781856537, "learning_rate": 8.13198800109081e-06, "loss": 0.0112, "step": 14910 }, { "epoch": 0.2441299190051542, "grad_norm": 0.2922504246234894, "learning_rate": 8.137442050722663e-06, "loss": 0.005, "step": 14920 }, { "epoch": 0.24429354495623007, "grad_norm": 0.386493980884552, "learning_rate": 8.142896100354514e-06, "loss": 0.0104, "step": 14930 }, { "epoch": 0.2444571709073059, "grad_norm": 0.16024218499660492, "learning_rate": 8.148350149986367e-06, "loss": 0.0054, "step": 14940 }, { "epoch": 0.24462079685838173, "grad_norm": 0.43841415643692017, "learning_rate": 8.153804199618217e-06, "loss": 0.0065, "step": 14950 }, { "epoch": 0.24478442280945759, "grad_norm": 0.3885124623775482, "learning_rate": 8.15925824925007e-06, "loss": 0.0073, "step": 14960 }, { "epoch": 0.24494804876053342, "grad_norm": 0.42438313364982605, "learning_rate": 8.16471229888192e-06, "loss": 0.0094, "step": 14970 }, { "epoch": 0.24511167471160927, "grad_norm": 0.5339972972869873, "learning_rate": 8.170166348513772e-06, "loss": 0.0104, "step": 14980 }, { "epoch": 0.2452753006626851, "grad_norm": 0.24884118139743805, "learning_rate": 8.175620398145624e-06, "loss": 0.0076, "step": 14990 }, { "epoch": 0.24543892661376093, "grad_norm": 0.4015233814716339, "learning_rate": 8.181074447777475e-06, "loss": 0.009, "step": 15000 }, { "epoch": 0.2456025525648368, "grad_norm": 0.4313192069530487, "learning_rate": 8.186528497409328e-06, "loss": 0.0055, "step": 15010 }, { "epoch": 0.24576617851591262, "grad_norm": 0.4145377576351166, "learning_rate": 8.191982547041178e-06, "loss": 0.0083, "step": 15020 }, { "epoch": 0.24592980446698848, "grad_norm": 0.3716921806335449, "learning_rate": 8.197436596673031e-06, "loss": 0.0084, "step": 15030 }, { "epoch": 0.2460934304180643, "grad_norm": 0.41910916566848755, "learning_rate": 8.202890646304882e-06, "loss": 0.0109, "step": 15040 }, { "epoch": 0.24625705636914014, "grad_norm": 0.24850162863731384, "learning_rate": 8.208344695936734e-06, "loss": 0.0096, "step": 15050 }, { "epoch": 0.246420682320216, "grad_norm": 0.4973500072956085, "learning_rate": 8.213798745568585e-06, "loss": 0.0076, "step": 15060 }, { "epoch": 0.24658430827129182, "grad_norm": 0.16750168800354004, "learning_rate": 8.219252795200438e-06, "loss": 0.0071, "step": 15070 }, { "epoch": 0.24674793422236765, "grad_norm": 0.514723002910614, "learning_rate": 8.224706844832289e-06, "loss": 0.0101, "step": 15080 }, { "epoch": 0.2469115601734435, "grad_norm": 0.16123224794864655, "learning_rate": 8.23016089446414e-06, "loss": 0.0057, "step": 15090 }, { "epoch": 0.24707518612451934, "grad_norm": 0.5143868327140808, "learning_rate": 8.235614944095992e-06, "loss": 0.0064, "step": 15100 }, { "epoch": 0.2472388120755952, "grad_norm": 0.3330678343772888, "learning_rate": 8.241068993727843e-06, "loss": 0.0097, "step": 15110 }, { "epoch": 0.24740243802667103, "grad_norm": 0.3562721610069275, "learning_rate": 8.246523043359696e-06, "loss": 0.0065, "step": 15120 }, { "epoch": 0.24756606397774686, "grad_norm": 0.360736221075058, "learning_rate": 8.251977092991546e-06, "loss": 0.0067, "step": 15130 }, { "epoch": 0.24772968992882272, "grad_norm": 0.304109126329422, "learning_rate": 8.257431142623399e-06, "loss": 0.0072, "step": 15140 }, { "epoch": 0.24789331587989855, "grad_norm": 0.09711117297410965, "learning_rate": 8.26288519225525e-06, "loss": 0.0056, "step": 15150 }, { "epoch": 0.2480569418309744, "grad_norm": 0.31720617413520813, "learning_rate": 8.268339241887102e-06, "loss": 0.0066, "step": 15160 }, { "epoch": 0.24822056778205023, "grad_norm": 0.3169492781162262, "learning_rate": 8.273793291518953e-06, "loss": 0.0065, "step": 15170 }, { "epoch": 0.24838419373312606, "grad_norm": 0.5174160003662109, "learning_rate": 8.279247341150806e-06, "loss": 0.0083, "step": 15180 }, { "epoch": 0.24854781968420192, "grad_norm": 0.603222131729126, "learning_rate": 8.284701390782657e-06, "loss": 0.0088, "step": 15190 }, { "epoch": 0.24871144563527775, "grad_norm": 0.48851442337036133, "learning_rate": 8.290155440414507e-06, "loss": 0.0071, "step": 15200 }, { "epoch": 0.2488750715863536, "grad_norm": 0.14064426720142365, "learning_rate": 8.29560949004636e-06, "loss": 0.0038, "step": 15210 }, { "epoch": 0.24903869753742944, "grad_norm": 0.37737977504730225, "learning_rate": 8.30106353967821e-06, "loss": 0.0072, "step": 15220 }, { "epoch": 0.24920232348850527, "grad_norm": 0.25355586409568787, "learning_rate": 8.306517589310063e-06, "loss": 0.0096, "step": 15230 }, { "epoch": 0.24936594943958112, "grad_norm": 0.47630444169044495, "learning_rate": 8.311971638941914e-06, "loss": 0.0076, "step": 15240 }, { "epoch": 0.24952957539065695, "grad_norm": 0.6583170890808105, "learning_rate": 8.317425688573767e-06, "loss": 0.0064, "step": 15250 }, { "epoch": 0.2496932013417328, "grad_norm": 0.6292118430137634, "learning_rate": 8.322879738205618e-06, "loss": 0.0088, "step": 15260 }, { "epoch": 0.24985682729280864, "grad_norm": 0.13277125358581543, "learning_rate": 8.32833378783747e-06, "loss": 0.0071, "step": 15270 }, { "epoch": 0.2500204532438845, "grad_norm": 0.4996962547302246, "learning_rate": 8.333787837469321e-06, "loss": 0.0078, "step": 15280 }, { "epoch": 0.25018407919496033, "grad_norm": 0.555380642414093, "learning_rate": 8.339241887101174e-06, "loss": 0.0067, "step": 15290 }, { "epoch": 0.25034770514603616, "grad_norm": 0.7256217002868652, "learning_rate": 8.344695936733024e-06, "loss": 0.0081, "step": 15300 }, { "epoch": 0.250511331097112, "grad_norm": 0.3761509358882904, "learning_rate": 8.350149986364875e-06, "loss": 0.0067, "step": 15310 }, { "epoch": 0.2506749570481878, "grad_norm": 0.36821871995925903, "learning_rate": 8.355604035996728e-06, "loss": 0.0075, "step": 15320 }, { "epoch": 0.2508385829992637, "grad_norm": 0.20835179090499878, "learning_rate": 8.361058085628579e-06, "loss": 0.0071, "step": 15330 }, { "epoch": 0.25100220895033953, "grad_norm": 0.4317692816257477, "learning_rate": 8.366512135260431e-06, "loss": 0.0069, "step": 15340 }, { "epoch": 0.25116583490141536, "grad_norm": 0.7908754944801331, "learning_rate": 8.371966184892282e-06, "loss": 0.0084, "step": 15350 }, { "epoch": 0.2513294608524912, "grad_norm": 0.5661348700523376, "learning_rate": 8.377420234524135e-06, "loss": 0.0113, "step": 15360 }, { "epoch": 0.251493086803567, "grad_norm": 0.5994781255722046, "learning_rate": 8.382874284155985e-06, "loss": 0.0066, "step": 15370 }, { "epoch": 0.2516567127546429, "grad_norm": 0.13631018996238708, "learning_rate": 8.388328333787838e-06, "loss": 0.0068, "step": 15380 }, { "epoch": 0.25182033870571874, "grad_norm": 0.6861046552658081, "learning_rate": 8.393782383419689e-06, "loss": 0.0082, "step": 15390 }, { "epoch": 0.25198396465679457, "grad_norm": 0.22847877442836761, "learning_rate": 8.399236433051541e-06, "loss": 0.0085, "step": 15400 }, { "epoch": 0.2521475906078704, "grad_norm": 0.36022627353668213, "learning_rate": 8.404690482683392e-06, "loss": 0.0064, "step": 15410 }, { "epoch": 0.2523112165589462, "grad_norm": 0.3559449315071106, "learning_rate": 8.410144532315245e-06, "loss": 0.009, "step": 15420 }, { "epoch": 0.2524748425100221, "grad_norm": 0.9223658442497253, "learning_rate": 8.415598581947096e-06, "loss": 0.0062, "step": 15430 }, { "epoch": 0.25263846846109794, "grad_norm": 0.4064810276031494, "learning_rate": 8.421052631578948e-06, "loss": 0.0083, "step": 15440 }, { "epoch": 0.2528020944121738, "grad_norm": 0.5033938884735107, "learning_rate": 8.426506681210799e-06, "loss": 0.0061, "step": 15450 }, { "epoch": 0.2529657203632496, "grad_norm": 0.1993841975927353, "learning_rate": 8.431960730842652e-06, "loss": 0.0112, "step": 15460 }, { "epoch": 0.25312934631432543, "grad_norm": 0.8136267066001892, "learning_rate": 8.437414780474502e-06, "loss": 0.0042, "step": 15470 }, { "epoch": 0.2532929722654013, "grad_norm": 0.45578423142433167, "learning_rate": 8.442868830106355e-06, "loss": 0.0094, "step": 15480 }, { "epoch": 0.25345659821647715, "grad_norm": 0.7431492805480957, "learning_rate": 8.448322879738206e-06, "loss": 0.0117, "step": 15490 }, { "epoch": 0.253620224167553, "grad_norm": 0.4185831844806671, "learning_rate": 8.453776929370058e-06, "loss": 0.0123, "step": 15500 }, { "epoch": 0.2537838501186288, "grad_norm": 0.10859210789203644, "learning_rate": 8.45923097900191e-06, "loss": 0.0085, "step": 15510 }, { "epoch": 0.25394747606970464, "grad_norm": 0.31318360567092896, "learning_rate": 8.464685028633762e-06, "loss": 0.0058, "step": 15520 }, { "epoch": 0.2541111020207805, "grad_norm": 0.4922339618206024, "learning_rate": 8.470139078265613e-06, "loss": 0.0064, "step": 15530 }, { "epoch": 0.25427472797185635, "grad_norm": 0.29603078961372375, "learning_rate": 8.475593127897465e-06, "loss": 0.006, "step": 15540 }, { "epoch": 0.2544383539229322, "grad_norm": 0.39668726921081543, "learning_rate": 8.481047177529316e-06, "loss": 0.0123, "step": 15550 }, { "epoch": 0.254601979874008, "grad_norm": 0.7039226293563843, "learning_rate": 8.486501227161169e-06, "loss": 0.008, "step": 15560 }, { "epoch": 0.25476560582508384, "grad_norm": 0.5413767695426941, "learning_rate": 8.49195527679302e-06, "loss": 0.005, "step": 15570 }, { "epoch": 0.2549292317761597, "grad_norm": 0.2888321578502655, "learning_rate": 8.497409326424872e-06, "loss": 0.0065, "step": 15580 }, { "epoch": 0.25509285772723556, "grad_norm": 1.0297186374664307, "learning_rate": 8.502863376056723e-06, "loss": 0.0082, "step": 15590 }, { "epoch": 0.2552564836783114, "grad_norm": 0.5334463119506836, "learning_rate": 8.508317425688575e-06, "loss": 0.007, "step": 15600 }, { "epoch": 0.2554201096293872, "grad_norm": 0.4290338158607483, "learning_rate": 8.513771475320426e-06, "loss": 0.0089, "step": 15610 }, { "epoch": 0.25558373558046305, "grad_norm": 0.5034772753715515, "learning_rate": 8.519225524952279e-06, "loss": 0.0075, "step": 15620 }, { "epoch": 0.2557473615315389, "grad_norm": 0.6412157416343689, "learning_rate": 8.52467957458413e-06, "loss": 0.006, "step": 15630 }, { "epoch": 0.25591098748261476, "grad_norm": 0.4565969705581665, "learning_rate": 8.530133624215982e-06, "loss": 0.0051, "step": 15640 }, { "epoch": 0.2560746134336906, "grad_norm": 0.6854017972946167, "learning_rate": 8.535587673847833e-06, "loss": 0.0087, "step": 15650 }, { "epoch": 0.2562382393847664, "grad_norm": 0.5298134088516235, "learning_rate": 8.541041723479684e-06, "loss": 0.0058, "step": 15660 }, { "epoch": 0.25640186533584225, "grad_norm": 0.277515709400177, "learning_rate": 8.546495773111537e-06, "loss": 0.0094, "step": 15670 }, { "epoch": 0.2565654912869181, "grad_norm": 0.08279644697904587, "learning_rate": 8.551949822743387e-06, "loss": 0.0061, "step": 15680 }, { "epoch": 0.25672911723799396, "grad_norm": 0.1460450440645218, "learning_rate": 8.55740387237524e-06, "loss": 0.0063, "step": 15690 }, { "epoch": 0.2568927431890698, "grad_norm": 0.36915767192840576, "learning_rate": 8.56285792200709e-06, "loss": 0.0066, "step": 15700 }, { "epoch": 0.2570563691401456, "grad_norm": 0.28821367025375366, "learning_rate": 8.568311971638943e-06, "loss": 0.0082, "step": 15710 }, { "epoch": 0.25721999509122145, "grad_norm": 0.47287726402282715, "learning_rate": 8.573766021270794e-06, "loss": 0.0085, "step": 15720 }, { "epoch": 0.2573836210422973, "grad_norm": 0.3398825228214264, "learning_rate": 8.579220070902647e-06, "loss": 0.0096, "step": 15730 }, { "epoch": 0.25754724699337317, "grad_norm": 0.3178982436656952, "learning_rate": 8.584674120534498e-06, "loss": 0.0134, "step": 15740 }, { "epoch": 0.257710872944449, "grad_norm": 0.5679348111152649, "learning_rate": 8.59012817016635e-06, "loss": 0.0089, "step": 15750 }, { "epoch": 0.25787449889552483, "grad_norm": 0.12127234041690826, "learning_rate": 8.595582219798201e-06, "loss": 0.0054, "step": 15760 }, { "epoch": 0.25803812484660066, "grad_norm": 0.5184342265129089, "learning_rate": 8.601036269430052e-06, "loss": 0.0088, "step": 15770 }, { "epoch": 0.2582017507976765, "grad_norm": 0.2941882908344269, "learning_rate": 8.606490319061904e-06, "loss": 0.007, "step": 15780 }, { "epoch": 0.2583653767487524, "grad_norm": 0.21098819375038147, "learning_rate": 8.611944368693755e-06, "loss": 0.0047, "step": 15790 }, { "epoch": 0.2585290026998282, "grad_norm": 0.5106900930404663, "learning_rate": 8.617398418325608e-06, "loss": 0.0065, "step": 15800 }, { "epoch": 0.25869262865090403, "grad_norm": 0.1790807992219925, "learning_rate": 8.622852467957459e-06, "loss": 0.006, "step": 15810 }, { "epoch": 0.25885625460197986, "grad_norm": 0.25317224860191345, "learning_rate": 8.628306517589311e-06, "loss": 0.0059, "step": 15820 }, { "epoch": 0.2590198805530557, "grad_norm": 0.37723788619041443, "learning_rate": 8.633760567221162e-06, "loss": 0.006, "step": 15830 }, { "epoch": 0.2591835065041316, "grad_norm": 0.4963933825492859, "learning_rate": 8.639214616853015e-06, "loss": 0.0062, "step": 15840 }, { "epoch": 0.2593471324552074, "grad_norm": 0.42260056734085083, "learning_rate": 8.644668666484865e-06, "loss": 0.0071, "step": 15850 }, { "epoch": 0.25951075840628324, "grad_norm": 0.24291133880615234, "learning_rate": 8.650122716116718e-06, "loss": 0.0062, "step": 15860 }, { "epoch": 0.25967438435735907, "grad_norm": 0.17453806102275848, "learning_rate": 8.655576765748569e-06, "loss": 0.0063, "step": 15870 }, { "epoch": 0.2598380103084349, "grad_norm": 0.30748218297958374, "learning_rate": 8.66103081538042e-06, "loss": 0.0075, "step": 15880 }, { "epoch": 0.2600016362595108, "grad_norm": 0.5070960521697998, "learning_rate": 8.666484865012272e-06, "loss": 0.0052, "step": 15890 }, { "epoch": 0.2601652622105866, "grad_norm": 0.4176754951477051, "learning_rate": 8.671938914644123e-06, "loss": 0.0052, "step": 15900 }, { "epoch": 0.26032888816166244, "grad_norm": 0.43345314264297485, "learning_rate": 8.677392964275976e-06, "loss": 0.0079, "step": 15910 }, { "epoch": 0.26049251411273827, "grad_norm": 0.40920785069465637, "learning_rate": 8.682847013907826e-06, "loss": 0.0117, "step": 15920 }, { "epoch": 0.2606561400638141, "grad_norm": 0.37591618299484253, "learning_rate": 8.688301063539679e-06, "loss": 0.0097, "step": 15930 }, { "epoch": 0.26081976601489, "grad_norm": 0.42752519249916077, "learning_rate": 8.69375511317153e-06, "loss": 0.0046, "step": 15940 }, { "epoch": 0.2609833919659658, "grad_norm": 0.2730713188648224, "learning_rate": 8.699209162803382e-06, "loss": 0.0052, "step": 15950 }, { "epoch": 0.26114701791704165, "grad_norm": 0.6756895780563354, "learning_rate": 8.704663212435233e-06, "loss": 0.0145, "step": 15960 }, { "epoch": 0.2613106438681175, "grad_norm": 0.3605157434940338, "learning_rate": 8.710117262067086e-06, "loss": 0.0091, "step": 15970 }, { "epoch": 0.2614742698191933, "grad_norm": 0.3654633164405823, "learning_rate": 8.715571311698937e-06, "loss": 0.0064, "step": 15980 }, { "epoch": 0.2616378957702692, "grad_norm": 0.9474996328353882, "learning_rate": 8.72102536133079e-06, "loss": 0.009, "step": 15990 }, { "epoch": 0.261801521721345, "grad_norm": 0.09037787467241287, "learning_rate": 8.72647941096264e-06, "loss": 0.0077, "step": 16000 }, { "epoch": 0.26196514767242085, "grad_norm": 0.43435654044151306, "learning_rate": 8.731933460594491e-06, "loss": 0.0081, "step": 16010 }, { "epoch": 0.2621287736234967, "grad_norm": 0.3199719488620758, "learning_rate": 8.737387510226343e-06, "loss": 0.0071, "step": 16020 }, { "epoch": 0.2622923995745725, "grad_norm": 0.25331613421440125, "learning_rate": 8.742841559858194e-06, "loss": 0.0092, "step": 16030 }, { "epoch": 0.2624560255256484, "grad_norm": 0.3290269076824188, "learning_rate": 8.748295609490047e-06, "loss": 0.007, "step": 16040 }, { "epoch": 0.2626196514767242, "grad_norm": 0.4656314253807068, "learning_rate": 8.753749659121898e-06, "loss": 0.0078, "step": 16050 }, { "epoch": 0.26278327742780005, "grad_norm": 0.4236758351325989, "learning_rate": 8.75920370875375e-06, "loss": 0.0072, "step": 16060 }, { "epoch": 0.2629469033788759, "grad_norm": 0.23814909160137177, "learning_rate": 8.764657758385601e-06, "loss": 0.0054, "step": 16070 }, { "epoch": 0.2631105293299517, "grad_norm": 0.4015258252620697, "learning_rate": 8.770111808017454e-06, "loss": 0.0052, "step": 16080 }, { "epoch": 0.26327415528102754, "grad_norm": 0.677941083908081, "learning_rate": 8.775565857649305e-06, "loss": 0.0078, "step": 16090 }, { "epoch": 0.26343778123210343, "grad_norm": 0.4373498260974884, "learning_rate": 8.781019907281157e-06, "loss": 0.0086, "step": 16100 }, { "epoch": 0.26360140718317926, "grad_norm": 0.41308775544166565, "learning_rate": 8.786473956913008e-06, "loss": 0.0082, "step": 16110 }, { "epoch": 0.2637650331342551, "grad_norm": 0.2800823450088501, "learning_rate": 8.79192800654486e-06, "loss": 0.0072, "step": 16120 }, { "epoch": 0.2639286590853309, "grad_norm": 0.3212105929851532, "learning_rate": 8.797382056176711e-06, "loss": 0.0074, "step": 16130 }, { "epoch": 0.26409228503640675, "grad_norm": 0.43777480721473694, "learning_rate": 8.802836105808564e-06, "loss": 0.0064, "step": 16140 }, { "epoch": 0.26425591098748263, "grad_norm": 0.10684596002101898, "learning_rate": 8.808290155440415e-06, "loss": 0.0078, "step": 16150 }, { "epoch": 0.26441953693855846, "grad_norm": 0.3250572681427002, "learning_rate": 8.813744205072267e-06, "loss": 0.0065, "step": 16160 }, { "epoch": 0.2645831628896343, "grad_norm": 0.2860829830169678, "learning_rate": 8.819198254704118e-06, "loss": 0.0053, "step": 16170 }, { "epoch": 0.2647467888407101, "grad_norm": 0.7128570675849915, "learning_rate": 8.82465230433597e-06, "loss": 0.0103, "step": 16180 }, { "epoch": 0.26491041479178595, "grad_norm": 0.2804383933544159, "learning_rate": 8.830106353967822e-06, "loss": 0.0146, "step": 16190 }, { "epoch": 0.26507404074286184, "grad_norm": 0.12914694845676422, "learning_rate": 8.835560403599674e-06, "loss": 0.0034, "step": 16200 }, { "epoch": 0.26523766669393767, "grad_norm": 0.36098477244377136, "learning_rate": 8.841014453231525e-06, "loss": 0.007, "step": 16210 }, { "epoch": 0.2654012926450135, "grad_norm": 0.6338185667991638, "learning_rate": 8.846468502863378e-06, "loss": 0.0063, "step": 16220 }, { "epoch": 0.2655649185960893, "grad_norm": 0.4452401399612427, "learning_rate": 8.851922552495228e-06, "loss": 0.0085, "step": 16230 }, { "epoch": 0.26572854454716516, "grad_norm": 0.6460739970207214, "learning_rate": 8.857376602127081e-06, "loss": 0.0069, "step": 16240 }, { "epoch": 0.26589217049824104, "grad_norm": 0.4192502498626709, "learning_rate": 8.862830651758932e-06, "loss": 0.0056, "step": 16250 }, { "epoch": 0.2660557964493169, "grad_norm": 0.3042267858982086, "learning_rate": 8.868284701390784e-06, "loss": 0.0047, "step": 16260 }, { "epoch": 0.2662194224003927, "grad_norm": 0.5498833060264587, "learning_rate": 8.873738751022635e-06, "loss": 0.0091, "step": 16270 }, { "epoch": 0.26638304835146853, "grad_norm": 0.3096252381801605, "learning_rate": 8.879192800654488e-06, "loss": 0.0055, "step": 16280 }, { "epoch": 0.26654667430254436, "grad_norm": 0.45774009823799133, "learning_rate": 8.884646850286339e-06, "loss": 0.0066, "step": 16290 }, { "epoch": 0.26671030025362025, "grad_norm": 0.15017341077327728, "learning_rate": 8.890100899918191e-06, "loss": 0.0066, "step": 16300 }, { "epoch": 0.2668739262046961, "grad_norm": 0.4404623508453369, "learning_rate": 8.895554949550042e-06, "loss": 0.0068, "step": 16310 }, { "epoch": 0.2670375521557719, "grad_norm": 0.3334588408470154, "learning_rate": 8.901008999181895e-06, "loss": 0.0083, "step": 16320 }, { "epoch": 0.26720117810684774, "grad_norm": 0.6172933578491211, "learning_rate": 8.906463048813745e-06, "loss": 0.0082, "step": 16330 }, { "epoch": 0.26736480405792357, "grad_norm": 0.22365346550941467, "learning_rate": 8.911917098445596e-06, "loss": 0.0093, "step": 16340 }, { "epoch": 0.26752843000899945, "grad_norm": 0.28134772181510925, "learning_rate": 8.917371148077449e-06, "loss": 0.0056, "step": 16350 }, { "epoch": 0.2676920559600753, "grad_norm": 0.3498561382293701, "learning_rate": 8.9228251977093e-06, "loss": 0.0083, "step": 16360 }, { "epoch": 0.2678556819111511, "grad_norm": 0.2254149168729782, "learning_rate": 8.928279247341152e-06, "loss": 0.0075, "step": 16370 }, { "epoch": 0.26801930786222694, "grad_norm": 0.27590957283973694, "learning_rate": 8.933733296973003e-06, "loss": 0.0084, "step": 16380 }, { "epoch": 0.26818293381330277, "grad_norm": 0.40174850821495056, "learning_rate": 8.939187346604856e-06, "loss": 0.0133, "step": 16390 }, { "epoch": 0.26834655976437866, "grad_norm": 0.2599905729293823, "learning_rate": 8.944641396236706e-06, "loss": 0.0071, "step": 16400 }, { "epoch": 0.2685101857154545, "grad_norm": 0.4147341251373291, "learning_rate": 8.950095445868559e-06, "loss": 0.0054, "step": 16410 }, { "epoch": 0.2686738116665303, "grad_norm": 0.4697679281234741, "learning_rate": 8.95554949550041e-06, "loss": 0.0106, "step": 16420 }, { "epoch": 0.26883743761760615, "grad_norm": 0.34753677248954773, "learning_rate": 8.961003545132262e-06, "loss": 0.0062, "step": 16430 }, { "epoch": 0.269001063568682, "grad_norm": 0.8293981552124023, "learning_rate": 8.966457594764113e-06, "loss": 0.0082, "step": 16440 }, { "epoch": 0.26916468951975786, "grad_norm": 0.45402100682258606, "learning_rate": 8.971911644395966e-06, "loss": 0.008, "step": 16450 }, { "epoch": 0.2693283154708337, "grad_norm": 0.6655663847923279, "learning_rate": 8.977365694027817e-06, "loss": 0.0079, "step": 16460 }, { "epoch": 0.2694919414219095, "grad_norm": 0.6291821002960205, "learning_rate": 8.982819743659667e-06, "loss": 0.0109, "step": 16470 }, { "epoch": 0.26965556737298535, "grad_norm": 0.541570782661438, "learning_rate": 8.98827379329152e-06, "loss": 0.0109, "step": 16480 }, { "epoch": 0.2698191933240612, "grad_norm": 0.32790982723236084, "learning_rate": 8.993727842923371e-06, "loss": 0.0071, "step": 16490 }, { "epoch": 0.269982819275137, "grad_norm": 0.2682746946811676, "learning_rate": 8.999181892555223e-06, "loss": 0.0072, "step": 16500 }, { "epoch": 0.2701464452262129, "grad_norm": 0.6405606865882874, "learning_rate": 9.004635942187074e-06, "loss": 0.0098, "step": 16510 }, { "epoch": 0.2703100711772887, "grad_norm": 0.4136946499347687, "learning_rate": 9.010089991818927e-06, "loss": 0.0088, "step": 16520 }, { "epoch": 0.27047369712836455, "grad_norm": 0.9547814726829529, "learning_rate": 9.015544041450778e-06, "loss": 0.0072, "step": 16530 }, { "epoch": 0.2706373230794404, "grad_norm": 0.4734065532684326, "learning_rate": 9.02099809108263e-06, "loss": 0.0056, "step": 16540 }, { "epoch": 0.2708009490305162, "grad_norm": 0.3656756579875946, "learning_rate": 9.026452140714481e-06, "loss": 0.0131, "step": 16550 }, { "epoch": 0.2709645749815921, "grad_norm": 0.41437825560569763, "learning_rate": 9.031906190346334e-06, "loss": 0.0092, "step": 16560 }, { "epoch": 0.27112820093266793, "grad_norm": 0.4583395719528198, "learning_rate": 9.037360239978185e-06, "loss": 0.0061, "step": 16570 }, { "epoch": 0.27129182688374376, "grad_norm": 0.4452890157699585, "learning_rate": 9.042814289610035e-06, "loss": 0.008, "step": 16580 }, { "epoch": 0.2714554528348196, "grad_norm": 0.2936899960041046, "learning_rate": 9.048268339241888e-06, "loss": 0.0061, "step": 16590 }, { "epoch": 0.2716190787858954, "grad_norm": 0.6065975427627563, "learning_rate": 9.053722388873739e-06, "loss": 0.0081, "step": 16600 }, { "epoch": 0.2717827047369713, "grad_norm": 0.43540599942207336, "learning_rate": 9.059176438505591e-06, "loss": 0.0091, "step": 16610 }, { "epoch": 0.27194633068804713, "grad_norm": 0.371324360370636, "learning_rate": 9.064630488137442e-06, "loss": 0.0055, "step": 16620 }, { "epoch": 0.27210995663912296, "grad_norm": 0.2378314733505249, "learning_rate": 9.070084537769295e-06, "loss": 0.0052, "step": 16630 }, { "epoch": 0.2722735825901988, "grad_norm": 0.2827133536338806, "learning_rate": 9.075538587401146e-06, "loss": 0.0069, "step": 16640 }, { "epoch": 0.2724372085412746, "grad_norm": 0.49517032504081726, "learning_rate": 9.080992637032998e-06, "loss": 0.0066, "step": 16650 }, { "epoch": 0.2726008344923505, "grad_norm": 0.43881359696388245, "learning_rate": 9.086446686664849e-06, "loss": 0.0079, "step": 16660 }, { "epoch": 0.27276446044342634, "grad_norm": 0.4152918756008148, "learning_rate": 9.091900736296702e-06, "loss": 0.0072, "step": 16670 }, { "epoch": 0.27292808639450217, "grad_norm": 0.3023996949195862, "learning_rate": 9.097354785928552e-06, "loss": 0.0045, "step": 16680 }, { "epoch": 0.273091712345578, "grad_norm": 0.7321740984916687, "learning_rate": 9.102808835560403e-06, "loss": 0.0082, "step": 16690 }, { "epoch": 0.2732553382966538, "grad_norm": 0.7708824872970581, "learning_rate": 9.108262885192256e-06, "loss": 0.0098, "step": 16700 }, { "epoch": 0.2734189642477297, "grad_norm": 0.2992950975894928, "learning_rate": 9.113716934824107e-06, "loss": 0.0073, "step": 16710 }, { "epoch": 0.27358259019880554, "grad_norm": 0.11642202734947205, "learning_rate": 9.11917098445596e-06, "loss": 0.0058, "step": 16720 }, { "epoch": 0.27374621614988137, "grad_norm": 0.34320154786109924, "learning_rate": 9.12462503408781e-06, "loss": 0.0109, "step": 16730 }, { "epoch": 0.2739098421009572, "grad_norm": 0.35033077001571655, "learning_rate": 9.130079083719663e-06, "loss": 0.0072, "step": 16740 }, { "epoch": 0.27407346805203303, "grad_norm": 0.48964419960975647, "learning_rate": 9.135533133351513e-06, "loss": 0.0052, "step": 16750 }, { "epoch": 0.2742370940031089, "grad_norm": 0.7566307783126831, "learning_rate": 9.140987182983366e-06, "loss": 0.0075, "step": 16760 }, { "epoch": 0.27440071995418475, "grad_norm": 0.33908167481422424, "learning_rate": 9.146441232615217e-06, "loss": 0.0079, "step": 16770 }, { "epoch": 0.2745643459052606, "grad_norm": 0.2446870505809784, "learning_rate": 9.15189528224707e-06, "loss": 0.0065, "step": 16780 }, { "epoch": 0.2747279718563364, "grad_norm": 0.4471055269241333, "learning_rate": 9.15734933187892e-06, "loss": 0.0076, "step": 16790 }, { "epoch": 0.27489159780741224, "grad_norm": 0.8024077415466309, "learning_rate": 9.162803381510773e-06, "loss": 0.006, "step": 16800 }, { "epoch": 0.2750552237584881, "grad_norm": 0.484811931848526, "learning_rate": 9.168257431142624e-06, "loss": 0.0044, "step": 16810 }, { "epoch": 0.27521884970956395, "grad_norm": 0.19515646994113922, "learning_rate": 9.173711480774476e-06, "loss": 0.0057, "step": 16820 }, { "epoch": 0.2753824756606398, "grad_norm": 0.46697765588760376, "learning_rate": 9.179165530406327e-06, "loss": 0.0065, "step": 16830 }, { "epoch": 0.2755461016117156, "grad_norm": 0.42592164874076843, "learning_rate": 9.18461958003818e-06, "loss": 0.0088, "step": 16840 }, { "epoch": 0.27570972756279144, "grad_norm": 0.48190489411354065, "learning_rate": 9.19007362967003e-06, "loss": 0.004, "step": 16850 }, { "epoch": 0.2758733535138673, "grad_norm": 0.3762807250022888, "learning_rate": 9.195527679301883e-06, "loss": 0.01, "step": 16860 }, { "epoch": 0.27603697946494316, "grad_norm": 0.17557047307491302, "learning_rate": 9.200981728933734e-06, "loss": 0.0069, "step": 16870 }, { "epoch": 0.276200605416019, "grad_norm": 0.7334563136100769, "learning_rate": 9.206435778565586e-06, "loss": 0.0082, "step": 16880 }, { "epoch": 0.2763642313670948, "grad_norm": 0.6913423538208008, "learning_rate": 9.211889828197437e-06, "loss": 0.0054, "step": 16890 }, { "epoch": 0.27652785731817064, "grad_norm": 0.8012818098068237, "learning_rate": 9.21734387782929e-06, "loss": 0.0113, "step": 16900 }, { "epoch": 0.27669148326924653, "grad_norm": 0.5596820116043091, "learning_rate": 9.22279792746114e-06, "loss": 0.0081, "step": 16910 }, { "epoch": 0.27685510922032236, "grad_norm": 0.4714412987232208, "learning_rate": 9.228251977092993e-06, "loss": 0.0083, "step": 16920 }, { "epoch": 0.2770187351713982, "grad_norm": 0.6238222718238831, "learning_rate": 9.233706026724844e-06, "loss": 0.0062, "step": 16930 }, { "epoch": 0.277182361122474, "grad_norm": 0.3426477611064911, "learning_rate": 9.239160076356697e-06, "loss": 0.008, "step": 16940 }, { "epoch": 0.27734598707354985, "grad_norm": 0.6691911220550537, "learning_rate": 9.244614125988547e-06, "loss": 0.0089, "step": 16950 }, { "epoch": 0.2775096130246257, "grad_norm": 0.40745797753334045, "learning_rate": 9.2500681756204e-06, "loss": 0.0089, "step": 16960 }, { "epoch": 0.27767323897570156, "grad_norm": 0.5076944231987, "learning_rate": 9.255522225252251e-06, "loss": 0.0082, "step": 16970 }, { "epoch": 0.2778368649267774, "grad_norm": 0.07181976735591888, "learning_rate": 9.260976274884103e-06, "loss": 0.0098, "step": 16980 }, { "epoch": 0.2780004908778532, "grad_norm": 0.36731722950935364, "learning_rate": 9.266430324515954e-06, "loss": 0.0068, "step": 16990 }, { "epoch": 0.27816411682892905, "grad_norm": 0.5639378428459167, "learning_rate": 9.271884374147807e-06, "loss": 0.0058, "step": 17000 }, { "epoch": 0.2783277427800049, "grad_norm": 0.3828292489051819, "learning_rate": 9.277338423779658e-06, "loss": 0.0097, "step": 17010 }, { "epoch": 0.27849136873108077, "grad_norm": 0.3754061162471771, "learning_rate": 9.28279247341151e-06, "loss": 0.0126, "step": 17020 }, { "epoch": 0.2786549946821566, "grad_norm": 0.4050922095775604, "learning_rate": 9.288246523043361e-06, "loss": 0.0042, "step": 17030 }, { "epoch": 0.27881862063323243, "grad_norm": 0.04091784730553627, "learning_rate": 9.293700572675212e-06, "loss": 0.0053, "step": 17040 }, { "epoch": 0.27898224658430826, "grad_norm": 0.7078526616096497, "learning_rate": 9.299154622307064e-06, "loss": 0.0061, "step": 17050 }, { "epoch": 0.2791458725353841, "grad_norm": 0.5788561105728149, "learning_rate": 9.304608671938915e-06, "loss": 0.0081, "step": 17060 }, { "epoch": 0.27930949848646, "grad_norm": 0.17846155166625977, "learning_rate": 9.310062721570768e-06, "loss": 0.0058, "step": 17070 }, { "epoch": 0.2794731244375358, "grad_norm": 0.28911489248275757, "learning_rate": 9.315516771202619e-06, "loss": 0.0082, "step": 17080 }, { "epoch": 0.27963675038861163, "grad_norm": 0.4038585126399994, "learning_rate": 9.320970820834471e-06, "loss": 0.0047, "step": 17090 }, { "epoch": 0.27980037633968746, "grad_norm": 0.3644547164440155, "learning_rate": 9.326424870466322e-06, "loss": 0.0094, "step": 17100 }, { "epoch": 0.2799640022907633, "grad_norm": 0.07796856015920639, "learning_rate": 9.331878920098175e-06, "loss": 0.0067, "step": 17110 }, { "epoch": 0.2801276282418392, "grad_norm": 0.2838761806488037, "learning_rate": 9.337332969730026e-06, "loss": 0.0103, "step": 17120 }, { "epoch": 0.280291254192915, "grad_norm": 0.33975324034690857, "learning_rate": 9.342787019361878e-06, "loss": 0.0059, "step": 17130 }, { "epoch": 0.28045488014399084, "grad_norm": 0.3931236267089844, "learning_rate": 9.348241068993729e-06, "loss": 0.0064, "step": 17140 }, { "epoch": 0.28061850609506667, "grad_norm": 0.5075228810310364, "learning_rate": 9.35369511862558e-06, "loss": 0.0118, "step": 17150 }, { "epoch": 0.2807821320461425, "grad_norm": 0.31072351336479187, "learning_rate": 9.359149168257432e-06, "loss": 0.0083, "step": 17160 }, { "epoch": 0.2809457579972184, "grad_norm": 0.33038461208343506, "learning_rate": 9.364603217889283e-06, "loss": 0.007, "step": 17170 }, { "epoch": 0.2811093839482942, "grad_norm": 0.32689762115478516, "learning_rate": 9.370057267521136e-06, "loss": 0.0074, "step": 17180 }, { "epoch": 0.28127300989937004, "grad_norm": 0.4697996973991394, "learning_rate": 9.375511317152987e-06, "loss": 0.0069, "step": 17190 }, { "epoch": 0.28143663585044587, "grad_norm": 0.573824942111969, "learning_rate": 9.380965366784839e-06, "loss": 0.0095, "step": 17200 }, { "epoch": 0.2816002618015217, "grad_norm": 0.5459651947021484, "learning_rate": 9.38641941641669e-06, "loss": 0.0079, "step": 17210 }, { "epoch": 0.2817638877525976, "grad_norm": 0.3976970613002777, "learning_rate": 9.391873466048543e-06, "loss": 0.0084, "step": 17220 }, { "epoch": 0.2819275137036734, "grad_norm": 1.0190106630325317, "learning_rate": 9.397327515680393e-06, "loss": 0.0074, "step": 17230 }, { "epoch": 0.28209113965474925, "grad_norm": 0.13550855219364166, "learning_rate": 9.402781565312246e-06, "loss": 0.0077, "step": 17240 }, { "epoch": 0.2822547656058251, "grad_norm": 0.4691948890686035, "learning_rate": 9.408235614944097e-06, "loss": 0.0067, "step": 17250 }, { "epoch": 0.2824183915569009, "grad_norm": 0.5311391949653625, "learning_rate": 9.413689664575948e-06, "loss": 0.0064, "step": 17260 }, { "epoch": 0.2825820175079768, "grad_norm": 0.23525945842266083, "learning_rate": 9.4191437142078e-06, "loss": 0.0076, "step": 17270 }, { "epoch": 0.2827456434590526, "grad_norm": 0.8739100694656372, "learning_rate": 9.424597763839651e-06, "loss": 0.007, "step": 17280 }, { "epoch": 0.28290926941012845, "grad_norm": 0.31303292512893677, "learning_rate": 9.430051813471504e-06, "loss": 0.0078, "step": 17290 }, { "epoch": 0.2830728953612043, "grad_norm": 0.22775372862815857, "learning_rate": 9.435505863103354e-06, "loss": 0.0059, "step": 17300 }, { "epoch": 0.2832365213122801, "grad_norm": 0.21936200559139252, "learning_rate": 9.440959912735207e-06, "loss": 0.0051, "step": 17310 }, { "epoch": 0.283400147263356, "grad_norm": 0.24235956370830536, "learning_rate": 9.446413962367058e-06, "loss": 0.0059, "step": 17320 }, { "epoch": 0.2835637732144318, "grad_norm": 0.5034245848655701, "learning_rate": 9.45186801199891e-06, "loss": 0.0065, "step": 17330 }, { "epoch": 0.28372739916550765, "grad_norm": 0.3012360632419586, "learning_rate": 9.457322061630761e-06, "loss": 0.0072, "step": 17340 }, { "epoch": 0.2838910251165835, "grad_norm": 0.256351113319397, "learning_rate": 9.462776111262614e-06, "loss": 0.0054, "step": 17350 }, { "epoch": 0.2840546510676593, "grad_norm": 0.5906171798706055, "learning_rate": 9.468230160894465e-06, "loss": 0.0068, "step": 17360 }, { "epoch": 0.28421827701873514, "grad_norm": 0.2543254792690277, "learning_rate": 9.473684210526315e-06, "loss": 0.0054, "step": 17370 }, { "epoch": 0.28438190296981103, "grad_norm": 0.4507140517234802, "learning_rate": 9.479138260158168e-06, "loss": 0.0053, "step": 17380 }, { "epoch": 0.28454552892088686, "grad_norm": 0.36774569749832153, "learning_rate": 9.484592309790019e-06, "loss": 0.0083, "step": 17390 }, { "epoch": 0.2847091548719627, "grad_norm": 0.42855939269065857, "learning_rate": 9.490046359421871e-06, "loss": 0.0073, "step": 17400 }, { "epoch": 0.2848727808230385, "grad_norm": 0.18056045472621918, "learning_rate": 9.495500409053722e-06, "loss": 0.0053, "step": 17410 }, { "epoch": 0.28503640677411435, "grad_norm": 0.45547768473625183, "learning_rate": 9.500954458685575e-06, "loss": 0.008, "step": 17420 }, { "epoch": 0.28520003272519023, "grad_norm": 0.40392816066741943, "learning_rate": 9.506408508317426e-06, "loss": 0.0074, "step": 17430 }, { "epoch": 0.28536365867626606, "grad_norm": 0.2421988844871521, "learning_rate": 9.511862557949278e-06, "loss": 0.0052, "step": 17440 }, { "epoch": 0.2855272846273419, "grad_norm": 0.36258062720298767, "learning_rate": 9.517316607581129e-06, "loss": 0.0081, "step": 17450 }, { "epoch": 0.2856909105784177, "grad_norm": 0.2071145623922348, "learning_rate": 9.522770657212982e-06, "loss": 0.0085, "step": 17460 }, { "epoch": 0.28585453652949355, "grad_norm": 0.7389194965362549, "learning_rate": 9.528224706844833e-06, "loss": 0.0075, "step": 17470 }, { "epoch": 0.28601816248056944, "grad_norm": 0.22596819698810577, "learning_rate": 9.533678756476683e-06, "loss": 0.0052, "step": 17480 }, { "epoch": 0.28618178843164527, "grad_norm": 0.6479752063751221, "learning_rate": 9.539132806108536e-06, "loss": 0.0065, "step": 17490 }, { "epoch": 0.2863454143827211, "grad_norm": 0.2373426854610443, "learning_rate": 9.544586855740387e-06, "loss": 0.0061, "step": 17500 }, { "epoch": 0.2865090403337969, "grad_norm": 0.22162528336048126, "learning_rate": 9.55004090537224e-06, "loss": 0.0036, "step": 17510 }, { "epoch": 0.28667266628487276, "grad_norm": 0.6735430955886841, "learning_rate": 9.55549495500409e-06, "loss": 0.0112, "step": 17520 }, { "epoch": 0.28683629223594864, "grad_norm": 0.2916731536388397, "learning_rate": 9.560949004635943e-06, "loss": 0.0059, "step": 17530 }, { "epoch": 0.28699991818702447, "grad_norm": 0.25756481289863586, "learning_rate": 9.566403054267794e-06, "loss": 0.0055, "step": 17540 }, { "epoch": 0.2871635441381003, "grad_norm": 0.25046148896217346, "learning_rate": 9.571857103899646e-06, "loss": 0.0066, "step": 17550 }, { "epoch": 0.28732717008917613, "grad_norm": 0.49978095293045044, "learning_rate": 9.577311153531497e-06, "loss": 0.0099, "step": 17560 }, { "epoch": 0.28749079604025196, "grad_norm": 0.33705317974090576, "learning_rate": 9.58276520316335e-06, "loss": 0.0072, "step": 17570 }, { "epoch": 0.28765442199132785, "grad_norm": 0.49843356013298035, "learning_rate": 9.5882192527952e-06, "loss": 0.0085, "step": 17580 }, { "epoch": 0.2878180479424037, "grad_norm": 0.3631350100040436, "learning_rate": 9.593673302427053e-06, "loss": 0.006, "step": 17590 }, { "epoch": 0.2879816738934795, "grad_norm": 0.7583593130111694, "learning_rate": 9.599127352058904e-06, "loss": 0.0118, "step": 17600 }, { "epoch": 0.28814529984455534, "grad_norm": 0.07032642513513565, "learning_rate": 9.604581401690756e-06, "loss": 0.0059, "step": 17610 }, { "epoch": 0.28830892579563117, "grad_norm": 0.2079721987247467, "learning_rate": 9.610035451322607e-06, "loss": 0.0052, "step": 17620 }, { "epoch": 0.28847255174670705, "grad_norm": 0.5555415749549866, "learning_rate": 9.61548950095446e-06, "loss": 0.0103, "step": 17630 }, { "epoch": 0.2886361776977829, "grad_norm": 0.40738505125045776, "learning_rate": 9.62094355058631e-06, "loss": 0.0066, "step": 17640 }, { "epoch": 0.2887998036488587, "grad_norm": 0.4552417993545532, "learning_rate": 9.626397600218163e-06, "loss": 0.0058, "step": 17650 }, { "epoch": 0.28896342959993454, "grad_norm": 0.20900996029376984, "learning_rate": 9.631851649850014e-06, "loss": 0.0059, "step": 17660 }, { "epoch": 0.28912705555101037, "grad_norm": 1.4825387001037598, "learning_rate": 9.637305699481867e-06, "loss": 0.0161, "step": 17670 }, { "epoch": 0.28929068150208626, "grad_norm": 0.37469592690467834, "learning_rate": 9.642759749113717e-06, "loss": 0.0069, "step": 17680 }, { "epoch": 0.2894543074531621, "grad_norm": 0.17680047452449799, "learning_rate": 9.64821379874557e-06, "loss": 0.0054, "step": 17690 }, { "epoch": 0.2896179334042379, "grad_norm": 0.27439039945602417, "learning_rate": 9.65366784837742e-06, "loss": 0.0051, "step": 17700 }, { "epoch": 0.28978155935531374, "grad_norm": 0.6349892020225525, "learning_rate": 9.659121898009273e-06, "loss": 0.0051, "step": 17710 }, { "epoch": 0.2899451853063896, "grad_norm": 0.6358305215835571, "learning_rate": 9.664575947641124e-06, "loss": 0.0064, "step": 17720 }, { "epoch": 0.29010881125746546, "grad_norm": 0.4132031798362732, "learning_rate": 9.670029997272977e-06, "loss": 0.0077, "step": 17730 }, { "epoch": 0.2902724372085413, "grad_norm": 0.3340214490890503, "learning_rate": 9.675484046904828e-06, "loss": 0.0076, "step": 17740 }, { "epoch": 0.2904360631596171, "grad_norm": 0.26891523599624634, "learning_rate": 9.68093809653668e-06, "loss": 0.008, "step": 17750 }, { "epoch": 0.29059968911069295, "grad_norm": 1.0061402320861816, "learning_rate": 9.686392146168531e-06, "loss": 0.0055, "step": 17760 }, { "epoch": 0.2907633150617688, "grad_norm": 0.1788272261619568, "learning_rate": 9.691846195800384e-06, "loss": 0.008, "step": 17770 }, { "epoch": 0.29092694101284466, "grad_norm": 0.3419720232486725, "learning_rate": 9.697300245432234e-06, "loss": 0.0069, "step": 17780 }, { "epoch": 0.2910905669639205, "grad_norm": 0.16235148906707764, "learning_rate": 9.702754295064087e-06, "loss": 0.0072, "step": 17790 }, { "epoch": 0.2912541929149963, "grad_norm": 0.3408327102661133, "learning_rate": 9.708208344695938e-06, "loss": 0.0049, "step": 17800 }, { "epoch": 0.29141781886607215, "grad_norm": 0.42627087235450745, "learning_rate": 9.71366239432779e-06, "loss": 0.0059, "step": 17810 }, { "epoch": 0.291581444817148, "grad_norm": 0.29981982707977295, "learning_rate": 9.719116443959641e-06, "loss": 0.0107, "step": 17820 }, { "epoch": 0.2917450707682238, "grad_norm": 0.18751360476016998, "learning_rate": 9.724570493591492e-06, "loss": 0.0049, "step": 17830 }, { "epoch": 0.2919086967192997, "grad_norm": 0.18362772464752197, "learning_rate": 9.730024543223345e-06, "loss": 0.0042, "step": 17840 }, { "epoch": 0.29207232267037553, "grad_norm": 0.3897661566734314, "learning_rate": 9.735478592855195e-06, "loss": 0.0059, "step": 17850 }, { "epoch": 0.29223594862145136, "grad_norm": 0.272909939289093, "learning_rate": 9.740932642487048e-06, "loss": 0.008, "step": 17860 }, { "epoch": 0.2923995745725272, "grad_norm": 0.330464631319046, "learning_rate": 9.746386692118899e-06, "loss": 0.0056, "step": 17870 }, { "epoch": 0.292563200523603, "grad_norm": 0.5986437201499939, "learning_rate": 9.751840741750751e-06, "loss": 0.0059, "step": 17880 }, { "epoch": 0.2927268264746789, "grad_norm": 0.1420435756444931, "learning_rate": 9.757294791382602e-06, "loss": 0.0067, "step": 17890 }, { "epoch": 0.29289045242575473, "grad_norm": 0.29462817311286926, "learning_rate": 9.762748841014455e-06, "loss": 0.0065, "step": 17900 }, { "epoch": 0.29305407837683056, "grad_norm": 0.6878054738044739, "learning_rate": 9.768202890646306e-06, "loss": 0.0094, "step": 17910 }, { "epoch": 0.2932177043279064, "grad_norm": 0.3721540570259094, "learning_rate": 9.773656940278158e-06, "loss": 0.0063, "step": 17920 }, { "epoch": 0.2933813302789822, "grad_norm": 0.39864540100097656, "learning_rate": 9.779110989910009e-06, "loss": 0.0056, "step": 17930 }, { "epoch": 0.2935449562300581, "grad_norm": 0.2694548964500427, "learning_rate": 9.78456503954186e-06, "loss": 0.0064, "step": 17940 }, { "epoch": 0.29370858218113394, "grad_norm": 0.2016744613647461, "learning_rate": 9.790019089173712e-06, "loss": 0.0065, "step": 17950 }, { "epoch": 0.29387220813220977, "grad_norm": 0.24006912112236023, "learning_rate": 9.795473138805563e-06, "loss": 0.0035, "step": 17960 }, { "epoch": 0.2940358340832856, "grad_norm": 0.037999190390110016, "learning_rate": 9.800927188437416e-06, "loss": 0.0096, "step": 17970 }, { "epoch": 0.2941994600343614, "grad_norm": 0.3086879551410675, "learning_rate": 9.806381238069267e-06, "loss": 0.0072, "step": 17980 }, { "epoch": 0.2943630859854373, "grad_norm": 0.3267678916454315, "learning_rate": 9.81183528770112e-06, "loss": 0.0087, "step": 17990 }, { "epoch": 0.29452671193651314, "grad_norm": 0.2561754882335663, "learning_rate": 9.81728933733297e-06, "loss": 0.0058, "step": 18000 }, { "epoch": 0.29469033788758897, "grad_norm": 0.13376009464263916, "learning_rate": 9.822743386964823e-06, "loss": 0.0048, "step": 18010 }, { "epoch": 0.2948539638386648, "grad_norm": 0.4218680262565613, "learning_rate": 9.828197436596674e-06, "loss": 0.007, "step": 18020 }, { "epoch": 0.29501758978974063, "grad_norm": 0.28427305817604065, "learning_rate": 9.833651486228526e-06, "loss": 0.0096, "step": 18030 }, { "epoch": 0.2951812157408165, "grad_norm": 0.4316966235637665, "learning_rate": 9.839105535860377e-06, "loss": 0.0054, "step": 18040 }, { "epoch": 0.29534484169189235, "grad_norm": 0.5705351233482361, "learning_rate": 9.844559585492228e-06, "loss": 0.0058, "step": 18050 }, { "epoch": 0.2955084676429682, "grad_norm": 0.4316052794456482, "learning_rate": 9.85001363512408e-06, "loss": 0.0088, "step": 18060 }, { "epoch": 0.295672093594044, "grad_norm": 0.06517240405082703, "learning_rate": 9.855467684755931e-06, "loss": 0.0124, "step": 18070 }, { "epoch": 0.29583571954511984, "grad_norm": 0.3734110891819, "learning_rate": 9.860921734387784e-06, "loss": 0.007, "step": 18080 }, { "epoch": 0.2959993454961957, "grad_norm": 0.4664648175239563, "learning_rate": 9.866375784019635e-06, "loss": 0.0082, "step": 18090 }, { "epoch": 0.29616297144727155, "grad_norm": 0.5983517169952393, "learning_rate": 9.871829833651487e-06, "loss": 0.0089, "step": 18100 }, { "epoch": 0.2963265973983474, "grad_norm": 0.2989373803138733, "learning_rate": 9.877283883283338e-06, "loss": 0.0079, "step": 18110 }, { "epoch": 0.2964902233494232, "grad_norm": 0.5892401337623596, "learning_rate": 9.88273793291519e-06, "loss": 0.0095, "step": 18120 }, { "epoch": 0.29665384930049904, "grad_norm": 0.25288066267967224, "learning_rate": 9.888191982547041e-06, "loss": 0.0064, "step": 18130 }, { "epoch": 0.2968174752515749, "grad_norm": 0.20859119296073914, "learning_rate": 9.893646032178894e-06, "loss": 0.0087, "step": 18140 }, { "epoch": 0.29698110120265075, "grad_norm": 0.4292795956134796, "learning_rate": 9.899100081810745e-06, "loss": 0.0093, "step": 18150 }, { "epoch": 0.2971447271537266, "grad_norm": 0.3262891173362732, "learning_rate": 9.904554131442597e-06, "loss": 0.0079, "step": 18160 }, { "epoch": 0.2973083531048024, "grad_norm": 0.35146385431289673, "learning_rate": 9.910008181074448e-06, "loss": 0.008, "step": 18170 }, { "epoch": 0.29747197905587824, "grad_norm": 0.29222363233566284, "learning_rate": 9.915462230706299e-06, "loss": 0.0083, "step": 18180 }, { "epoch": 0.29763560500695413, "grad_norm": 0.4378841519355774, "learning_rate": 9.920916280338152e-06, "loss": 0.0082, "step": 18190 }, { "epoch": 0.29779923095802996, "grad_norm": 0.3232148587703705, "learning_rate": 9.926370329970002e-06, "loss": 0.0069, "step": 18200 }, { "epoch": 0.2979628569091058, "grad_norm": 0.3009403645992279, "learning_rate": 9.931824379601855e-06, "loss": 0.0058, "step": 18210 }, { "epoch": 0.2981264828601816, "grad_norm": 0.5111549496650696, "learning_rate": 9.937278429233706e-06, "loss": 0.0051, "step": 18220 }, { "epoch": 0.29829010881125745, "grad_norm": 0.3713594973087311, "learning_rate": 9.942732478865558e-06, "loss": 0.0049, "step": 18230 }, { "epoch": 0.29845373476233333, "grad_norm": 0.5472033619880676, "learning_rate": 9.94818652849741e-06, "loss": 0.0066, "step": 18240 }, { "epoch": 0.29861736071340916, "grad_norm": 0.4773631691932678, "learning_rate": 9.953640578129262e-06, "loss": 0.0055, "step": 18250 }, { "epoch": 0.298780986664485, "grad_norm": 0.29816052317619324, "learning_rate": 9.959094627761113e-06, "loss": 0.0073, "step": 18260 }, { "epoch": 0.2989446126155608, "grad_norm": 0.4285408854484558, "learning_rate": 9.964548677392965e-06, "loss": 0.0041, "step": 18270 }, { "epoch": 0.29910823856663665, "grad_norm": 0.5150445699691772, "learning_rate": 9.970002727024816e-06, "loss": 0.0056, "step": 18280 }, { "epoch": 0.2992718645177125, "grad_norm": 0.5367032885551453, "learning_rate": 9.975456776656669e-06, "loss": 0.0072, "step": 18290 }, { "epoch": 0.29943549046878837, "grad_norm": 0.5799115896224976, "learning_rate": 9.98091082628852e-06, "loss": 0.0062, "step": 18300 }, { "epoch": 0.2995991164198642, "grad_norm": 0.22263823449611664, "learning_rate": 9.986364875920372e-06, "loss": 0.007, "step": 18310 }, { "epoch": 0.29976274237094, "grad_norm": 0.2263379991054535, "learning_rate": 9.991818925552223e-06, "loss": 0.009, "step": 18320 }, { "epoch": 0.29992636832201586, "grad_norm": 0.4348183870315552, "learning_rate": 9.997272975184075e-06, "loss": 0.0078, "step": 18330 }, { "epoch": 0.3000899942730917, "grad_norm": 0.2248072326183319, "learning_rate": 9.999999977345253e-06, "loss": 0.0081, "step": 18340 }, { "epoch": 0.30025362022416757, "grad_norm": 0.5009114742279053, "learning_rate": 9.999999796107271e-06, "loss": 0.0062, "step": 18350 }, { "epoch": 0.3004172461752434, "grad_norm": 0.21323329210281372, "learning_rate": 9.999999433631314e-06, "loss": 0.0079, "step": 18360 }, { "epoch": 0.30058087212631923, "grad_norm": 0.28179433941841125, "learning_rate": 9.999998889917394e-06, "loss": 0.0065, "step": 18370 }, { "epoch": 0.30074449807739506, "grad_norm": 0.32057973742485046, "learning_rate": 9.999998164965532e-06, "loss": 0.0074, "step": 18380 }, { "epoch": 0.3009081240284709, "grad_norm": 0.43141409754753113, "learning_rate": 9.999997258775753e-06, "loss": 0.0073, "step": 18390 }, { "epoch": 0.3010717499795468, "grad_norm": 0.6593682765960693, "learning_rate": 9.99999617134809e-06, "loss": 0.0046, "step": 18400 }, { "epoch": 0.3012353759306226, "grad_norm": 0.21974030137062073, "learning_rate": 9.999994902682584e-06, "loss": 0.0056, "step": 18410 }, { "epoch": 0.30139900188169844, "grad_norm": 0.6519566774368286, "learning_rate": 9.99999345277928e-06, "loss": 0.0062, "step": 18420 }, { "epoch": 0.30156262783277427, "grad_norm": 0.14988838136196136, "learning_rate": 9.999991821638232e-06, "loss": 0.0101, "step": 18430 }, { "epoch": 0.3017262537838501, "grad_norm": 0.4918505847454071, "learning_rate": 9.999990009259495e-06, "loss": 0.007, "step": 18440 }, { "epoch": 0.301889879734926, "grad_norm": 0.33565282821655273, "learning_rate": 9.999988015643138e-06, "loss": 0.0059, "step": 18450 }, { "epoch": 0.3020535056860018, "grad_norm": 0.3530016541481018, "learning_rate": 9.999985840789232e-06, "loss": 0.0076, "step": 18460 }, { "epoch": 0.30221713163707764, "grad_norm": 0.2299710512161255, "learning_rate": 9.99998348469786e-06, "loss": 0.0076, "step": 18470 }, { "epoch": 0.30238075758815347, "grad_norm": 0.3344787657260895, "learning_rate": 9.9999809473691e-06, "loss": 0.0049, "step": 18480 }, { "epoch": 0.3025443835392293, "grad_norm": 0.6200084686279297, "learning_rate": 9.99997822880305e-06, "loss": 0.0077, "step": 18490 }, { "epoch": 0.3027080094903052, "grad_norm": 0.36080074310302734, "learning_rate": 9.999975328999805e-06, "loss": 0.0102, "step": 18500 }, { "epoch": 0.302871635441381, "grad_norm": 0.296271413564682, "learning_rate": 9.99997224795947e-06, "loss": 0.0062, "step": 18510 }, { "epoch": 0.30303526139245685, "grad_norm": 0.562339186668396, "learning_rate": 9.999968985682161e-06, "loss": 0.0091, "step": 18520 }, { "epoch": 0.3031988873435327, "grad_norm": 0.2859978973865509, "learning_rate": 9.999965542167992e-06, "loss": 0.006, "step": 18530 }, { "epoch": 0.3033625132946085, "grad_norm": 0.5382653474807739, "learning_rate": 9.99996191741709e-06, "loss": 0.0071, "step": 18540 }, { "epoch": 0.3035261392456844, "grad_norm": 0.16001670062541962, "learning_rate": 9.999958111429584e-06, "loss": 0.008, "step": 18550 }, { "epoch": 0.3036897651967602, "grad_norm": 0.5494171977043152, "learning_rate": 9.999954124205615e-06, "loss": 0.0079, "step": 18560 }, { "epoch": 0.30385339114783605, "grad_norm": 0.5336729288101196, "learning_rate": 9.999949955745324e-06, "loss": 0.0103, "step": 18570 }, { "epoch": 0.3040170170989119, "grad_norm": 0.15034836530685425, "learning_rate": 9.999945606048867e-06, "loss": 0.0065, "step": 18580 }, { "epoch": 0.3041806430499877, "grad_norm": 0.2007811814546585, "learning_rate": 9.999941075116399e-06, "loss": 0.0049, "step": 18590 }, { "epoch": 0.3043442690010636, "grad_norm": 0.2226434350013733, "learning_rate": 9.999936362948082e-06, "loss": 0.0066, "step": 18600 }, { "epoch": 0.3045078949521394, "grad_norm": 0.5107800364494324, "learning_rate": 9.99993146954409e-06, "loss": 0.0056, "step": 18610 }, { "epoch": 0.30467152090321525, "grad_norm": 0.5736075043678284, "learning_rate": 9.999926394904597e-06, "loss": 0.008, "step": 18620 }, { "epoch": 0.3048351468542911, "grad_norm": 0.35707661509513855, "learning_rate": 9.999921139029792e-06, "loss": 0.0094, "step": 18630 }, { "epoch": 0.3049987728053669, "grad_norm": 0.17102129757404327, "learning_rate": 9.999915701919862e-06, "loss": 0.0091, "step": 18640 }, { "epoch": 0.3051623987564428, "grad_norm": 0.3269667327404022, "learning_rate": 9.999910083575004e-06, "loss": 0.0065, "step": 18650 }, { "epoch": 0.30532602470751863, "grad_norm": 0.07784273475408554, "learning_rate": 9.999904283995422e-06, "loss": 0.0081, "step": 18660 }, { "epoch": 0.30548965065859446, "grad_norm": 0.31072136759757996, "learning_rate": 9.999898303181329e-06, "loss": 0.0063, "step": 18670 }, { "epoch": 0.3056532766096703, "grad_norm": 0.4528925120830536, "learning_rate": 9.999892141132938e-06, "loss": 0.0059, "step": 18680 }, { "epoch": 0.3058169025607461, "grad_norm": 0.42534682154655457, "learning_rate": 9.999885797850475e-06, "loss": 0.0065, "step": 18690 }, { "epoch": 0.30598052851182195, "grad_norm": 0.15108926594257355, "learning_rate": 9.999879273334166e-06, "loss": 0.0095, "step": 18700 }, { "epoch": 0.30614415446289783, "grad_norm": 0.4689083695411682, "learning_rate": 9.999872567584253e-06, "loss": 0.0069, "step": 18710 }, { "epoch": 0.30630778041397366, "grad_norm": 0.3014495074748993, "learning_rate": 9.999865680600975e-06, "loss": 0.0065, "step": 18720 }, { "epoch": 0.3064714063650495, "grad_norm": 0.2090315818786621, "learning_rate": 9.999858612384583e-06, "loss": 0.0098, "step": 18730 }, { "epoch": 0.3066350323161253, "grad_norm": 0.20829305052757263, "learning_rate": 9.999851362935334e-06, "loss": 0.0067, "step": 18740 }, { "epoch": 0.30679865826720115, "grad_norm": 0.3801248371601105, "learning_rate": 9.999843932253487e-06, "loss": 0.0067, "step": 18750 }, { "epoch": 0.30696228421827704, "grad_norm": 0.07702095061540604, "learning_rate": 9.999836320339318e-06, "loss": 0.0066, "step": 18760 }, { "epoch": 0.30712591016935287, "grad_norm": 0.9566939473152161, "learning_rate": 9.999828527193097e-06, "loss": 0.0051, "step": 18770 }, { "epoch": 0.3072895361204287, "grad_norm": 0.2995423674583435, "learning_rate": 9.99982055281511e-06, "loss": 0.0066, "step": 18780 }, { "epoch": 0.3074531620715045, "grad_norm": 0.5662979483604431, "learning_rate": 9.999812397205643e-06, "loss": 0.0077, "step": 18790 }, { "epoch": 0.30761678802258036, "grad_norm": 0.3930233120918274, "learning_rate": 9.999804060364995e-06, "loss": 0.0046, "step": 18800 }, { "epoch": 0.30778041397365624, "grad_norm": 0.3896386921405792, "learning_rate": 9.999795542293466e-06, "loss": 0.0096, "step": 18810 }, { "epoch": 0.30794403992473207, "grad_norm": 0.3319282531738281, "learning_rate": 9.999786842991366e-06, "loss": 0.0051, "step": 18820 }, { "epoch": 0.3081076658758079, "grad_norm": 0.5183929204940796, "learning_rate": 9.999777962459007e-06, "loss": 0.0058, "step": 18830 }, { "epoch": 0.30827129182688373, "grad_norm": 0.262056827545166, "learning_rate": 9.999768900696716e-06, "loss": 0.0059, "step": 18840 }, { "epoch": 0.30843491777795956, "grad_norm": 0.43029534816741943, "learning_rate": 9.999759657704817e-06, "loss": 0.0071, "step": 18850 }, { "epoch": 0.30859854372903545, "grad_norm": 0.21654173731803894, "learning_rate": 9.99975023348365e-06, "loss": 0.0053, "step": 18860 }, { "epoch": 0.3087621696801113, "grad_norm": 0.3761322498321533, "learning_rate": 9.999740628033552e-06, "loss": 0.0083, "step": 18870 }, { "epoch": 0.3089257956311871, "grad_norm": 0.04160545766353607, "learning_rate": 9.999730841354871e-06, "loss": 0.0034, "step": 18880 }, { "epoch": 0.30908942158226294, "grad_norm": 0.36300981044769287, "learning_rate": 9.999720873447966e-06, "loss": 0.0097, "step": 18890 }, { "epoch": 0.30925304753333877, "grad_norm": 0.4820455312728882, "learning_rate": 9.999710724313195e-06, "loss": 0.0086, "step": 18900 }, { "epoch": 0.30941667348441465, "grad_norm": 0.3691643476486206, "learning_rate": 9.999700393950926e-06, "loss": 0.0073, "step": 18910 }, { "epoch": 0.3095802994354905, "grad_norm": 0.46267586946487427, "learning_rate": 9.999689882361536e-06, "loss": 0.005, "step": 18920 }, { "epoch": 0.3097439253865663, "grad_norm": 0.5345110297203064, "learning_rate": 9.999679189545402e-06, "loss": 0.0044, "step": 18930 }, { "epoch": 0.30990755133764214, "grad_norm": 0.34804192185401917, "learning_rate": 9.999668315502914e-06, "loss": 0.0067, "step": 18940 }, { "epoch": 0.31007117728871797, "grad_norm": 0.29948872327804565, "learning_rate": 9.999657260234469e-06, "loss": 0.0065, "step": 18950 }, { "epoch": 0.31023480323979385, "grad_norm": 0.5881202220916748, "learning_rate": 9.999646023740462e-06, "loss": 0.0086, "step": 18960 }, { "epoch": 0.3103984291908697, "grad_norm": 0.43824076652526855, "learning_rate": 9.999634606021304e-06, "loss": 0.006, "step": 18970 }, { "epoch": 0.3105620551419455, "grad_norm": 0.3577647805213928, "learning_rate": 9.999623007077407e-06, "loss": 0.0078, "step": 18980 }, { "epoch": 0.31072568109302134, "grad_norm": 0.2878649830818176, "learning_rate": 9.999611226909193e-06, "loss": 0.0078, "step": 18990 }, { "epoch": 0.3108893070440972, "grad_norm": 0.3834591209888458, "learning_rate": 9.999599265517087e-06, "loss": 0.0058, "step": 19000 }, { "epoch": 0.31105293299517306, "grad_norm": 0.17713794112205505, "learning_rate": 9.999587122901526e-06, "loss": 0.0068, "step": 19010 }, { "epoch": 0.3112165589462489, "grad_norm": 0.5166522264480591, "learning_rate": 9.999574799062946e-06, "loss": 0.0071, "step": 19020 }, { "epoch": 0.3113801848973247, "grad_norm": 0.5418213605880737, "learning_rate": 9.999562294001796e-06, "loss": 0.0099, "step": 19030 }, { "epoch": 0.31154381084840055, "grad_norm": 0.32879868149757385, "learning_rate": 9.99954960771853e-06, "loss": 0.0052, "step": 19040 }, { "epoch": 0.3117074367994764, "grad_norm": 0.2878336012363434, "learning_rate": 9.999536740213607e-06, "loss": 0.0063, "step": 19050 }, { "epoch": 0.31187106275055226, "grad_norm": 0.28318139910697937, "learning_rate": 9.999523691487492e-06, "loss": 0.0059, "step": 19060 }, { "epoch": 0.3120346887016281, "grad_norm": 0.3776904344558716, "learning_rate": 9.99951046154066e-06, "loss": 0.008, "step": 19070 }, { "epoch": 0.3121983146527039, "grad_norm": 0.3345799446105957, "learning_rate": 9.99949705037359e-06, "loss": 0.0101, "step": 19080 }, { "epoch": 0.31236194060377975, "grad_norm": 0.4610148072242737, "learning_rate": 9.999483457986767e-06, "loss": 0.006, "step": 19090 }, { "epoch": 0.3125255665548556, "grad_norm": 0.20774437487125397, "learning_rate": 9.999469684380685e-06, "loss": 0.0042, "step": 19100 }, { "epoch": 0.31268919250593147, "grad_norm": 0.5823769569396973, "learning_rate": 9.999455729555842e-06, "loss": 0.0065, "step": 19110 }, { "epoch": 0.3128528184570073, "grad_norm": 0.28292059898376465, "learning_rate": 9.999441593512747e-06, "loss": 0.0082, "step": 19120 }, { "epoch": 0.3130164444080831, "grad_norm": 0.7109828591346741, "learning_rate": 9.999427276251907e-06, "loss": 0.0061, "step": 19130 }, { "epoch": 0.31318007035915896, "grad_norm": 0.09907187521457672, "learning_rate": 9.999412777773845e-06, "loss": 0.0044, "step": 19140 }, { "epoch": 0.3133436963102348, "grad_norm": 0.28128212690353394, "learning_rate": 9.999398098079087e-06, "loss": 0.0066, "step": 19150 }, { "epoch": 0.3135073222613106, "grad_norm": 0.16690900921821594, "learning_rate": 9.999383237168163e-06, "loss": 0.0068, "step": 19160 }, { "epoch": 0.3136709482123865, "grad_norm": 0.44006332755088806, "learning_rate": 9.999368195041612e-06, "loss": 0.0077, "step": 19170 }, { "epoch": 0.31383457416346233, "grad_norm": 0.26346489787101746, "learning_rate": 9.99935297169998e-06, "loss": 0.0062, "step": 19180 }, { "epoch": 0.31399820011453816, "grad_norm": 0.22483132779598236, "learning_rate": 9.999337567143819e-06, "loss": 0.0071, "step": 19190 }, { "epoch": 0.314161826065614, "grad_norm": 0.3907037675380707, "learning_rate": 9.999321981373686e-06, "loss": 0.0079, "step": 19200 }, { "epoch": 0.3143254520166898, "grad_norm": 0.4094851315021515, "learning_rate": 9.999306214390147e-06, "loss": 0.006, "step": 19210 }, { "epoch": 0.3144890779677657, "grad_norm": 0.47151899337768555, "learning_rate": 9.999290266193773e-06, "loss": 0.0094, "step": 19220 }, { "epoch": 0.31465270391884154, "grad_norm": 0.33708736300468445, "learning_rate": 9.999274136785141e-06, "loss": 0.0065, "step": 19230 }, { "epoch": 0.31481632986991737, "grad_norm": 0.7017020583152771, "learning_rate": 9.999257826164839e-06, "loss": 0.0059, "step": 19240 }, { "epoch": 0.3149799558209932, "grad_norm": 0.4297424256801605, "learning_rate": 9.999241334333457e-06, "loss": 0.0071, "step": 19250 }, { "epoch": 0.315143581772069, "grad_norm": 0.26415255665779114, "learning_rate": 9.999224661291588e-06, "loss": 0.0075, "step": 19260 }, { "epoch": 0.3153072077231449, "grad_norm": 0.42910659313201904, "learning_rate": 9.999207807039843e-06, "loss": 0.0064, "step": 19270 }, { "epoch": 0.31547083367422074, "grad_norm": 0.17327217757701874, "learning_rate": 9.99919077157883e-06, "loss": 0.0054, "step": 19280 }, { "epoch": 0.31563445962529657, "grad_norm": 0.4476442039012909, "learning_rate": 9.999173554909167e-06, "loss": 0.0049, "step": 19290 }, { "epoch": 0.3157980855763724, "grad_norm": 0.36961737275123596, "learning_rate": 9.999156157031477e-06, "loss": 0.0075, "step": 19300 }, { "epoch": 0.31596171152744823, "grad_norm": 0.5618547797203064, "learning_rate": 9.999138577946393e-06, "loss": 0.0092, "step": 19310 }, { "epoch": 0.3161253374785241, "grad_norm": 0.3292243480682373, "learning_rate": 9.99912081765455e-06, "loss": 0.0051, "step": 19320 }, { "epoch": 0.31628896342959995, "grad_norm": 0.3421551287174225, "learning_rate": 9.999102876156592e-06, "loss": 0.0053, "step": 19330 }, { "epoch": 0.3164525893806758, "grad_norm": 0.36465516686439514, "learning_rate": 9.999084753453171e-06, "loss": 0.0082, "step": 19340 }, { "epoch": 0.3166162153317516, "grad_norm": 0.5549119710922241, "learning_rate": 9.999066449544941e-06, "loss": 0.0074, "step": 19350 }, { "epoch": 0.31677984128282743, "grad_norm": 0.33259063959121704, "learning_rate": 9.999047964432568e-06, "loss": 0.006, "step": 19360 }, { "epoch": 0.3169434672339033, "grad_norm": 0.31920158863067627, "learning_rate": 9.999029298116722e-06, "loss": 0.0075, "step": 19370 }, { "epoch": 0.31710709318497915, "grad_norm": 0.5421958565711975, "learning_rate": 9.999010450598076e-06, "loss": 0.0096, "step": 19380 }, { "epoch": 0.317270719136055, "grad_norm": 0.4390092194080353, "learning_rate": 9.99899142187732e-06, "loss": 0.0071, "step": 19390 }, { "epoch": 0.3174343450871308, "grad_norm": 0.7274793982505798, "learning_rate": 9.998972211955137e-06, "loss": 0.0063, "step": 19400 }, { "epoch": 0.31759797103820664, "grad_norm": 0.4780811369419098, "learning_rate": 9.998952820832227e-06, "loss": 0.0054, "step": 19410 }, { "epoch": 0.3177615969892825, "grad_norm": 0.5134124159812927, "learning_rate": 9.99893324850929e-06, "loss": 0.006, "step": 19420 }, { "epoch": 0.31792522294035835, "grad_norm": 0.25927048921585083, "learning_rate": 9.99891349498704e-06, "loss": 0.0101, "step": 19430 }, { "epoch": 0.3180888488914342, "grad_norm": 0.3968285918235779, "learning_rate": 9.998893560266188e-06, "loss": 0.0069, "step": 19440 }, { "epoch": 0.31825247484251, "grad_norm": 0.48260748386383057, "learning_rate": 9.998873444347461e-06, "loss": 0.0072, "step": 19450 }, { "epoch": 0.31841610079358584, "grad_norm": 0.2677678167819977, "learning_rate": 9.998853147231585e-06, "loss": 0.0067, "step": 19460 }, { "epoch": 0.31857972674466173, "grad_norm": 0.36443576216697693, "learning_rate": 9.998832668919297e-06, "loss": 0.0073, "step": 19470 }, { "epoch": 0.31874335269573756, "grad_norm": 0.3514650762081146, "learning_rate": 9.99881200941134e-06, "loss": 0.0045, "step": 19480 }, { "epoch": 0.3189069786468134, "grad_norm": 0.3199397623538971, "learning_rate": 9.998791168708462e-06, "loss": 0.0084, "step": 19490 }, { "epoch": 0.3190706045978892, "grad_norm": 0.3068281412124634, "learning_rate": 9.998770146811416e-06, "loss": 0.0075, "step": 19500 }, { "epoch": 0.31923423054896505, "grad_norm": 0.24740198254585266, "learning_rate": 9.998748943720968e-06, "loss": 0.0067, "step": 19510 }, { "epoch": 0.31939785650004093, "grad_norm": 0.5325202345848083, "learning_rate": 9.998727559437886e-06, "loss": 0.0051, "step": 19520 }, { "epoch": 0.31956148245111676, "grad_norm": 0.19455644488334656, "learning_rate": 9.998705993962943e-06, "loss": 0.0068, "step": 19530 }, { "epoch": 0.3197251084021926, "grad_norm": 0.5948394536972046, "learning_rate": 9.998684247296921e-06, "loss": 0.0063, "step": 19540 }, { "epoch": 0.3198887343532684, "grad_norm": 0.7155441045761108, "learning_rate": 9.998662319440611e-06, "loss": 0.0048, "step": 19550 }, { "epoch": 0.32005236030434425, "grad_norm": 0.6412357091903687, "learning_rate": 9.998640210394804e-06, "loss": 0.0044, "step": 19560 }, { "epoch": 0.3202159862554201, "grad_norm": 0.11753785610198975, "learning_rate": 9.998617920160304e-06, "loss": 0.0051, "step": 19570 }, { "epoch": 0.32037961220649597, "grad_norm": 0.3821144700050354, "learning_rate": 9.998595448737919e-06, "loss": 0.0089, "step": 19580 }, { "epoch": 0.3205432381575718, "grad_norm": 0.49600300192832947, "learning_rate": 9.998572796128462e-06, "loss": 0.0061, "step": 19590 }, { "epoch": 0.3207068641086476, "grad_norm": 0.24403300881385803, "learning_rate": 9.998549962332756e-06, "loss": 0.0072, "step": 19600 }, { "epoch": 0.32087049005972346, "grad_norm": 0.3227308392524719, "learning_rate": 9.998526947351625e-06, "loss": 0.0073, "step": 19610 }, { "epoch": 0.3210341160107993, "grad_norm": 0.2587415874004364, "learning_rate": 9.998503751185908e-06, "loss": 0.0073, "step": 19620 }, { "epoch": 0.32119774196187517, "grad_norm": 0.46121665835380554, "learning_rate": 9.998480373836444e-06, "loss": 0.0094, "step": 19630 }, { "epoch": 0.321361367912951, "grad_norm": 0.12452620267868042, "learning_rate": 9.998456815304078e-06, "loss": 0.0053, "step": 19640 }, { "epoch": 0.32152499386402683, "grad_norm": 0.6648384928703308, "learning_rate": 9.998433075589667e-06, "loss": 0.0083, "step": 19650 }, { "epoch": 0.32168861981510266, "grad_norm": 0.5547574758529663, "learning_rate": 9.998409154694071e-06, "loss": 0.0061, "step": 19660 }, { "epoch": 0.3218522457661785, "grad_norm": 0.28415676951408386, "learning_rate": 9.998385052618156e-06, "loss": 0.0067, "step": 19670 }, { "epoch": 0.3220158717172544, "grad_norm": 0.3386670649051666, "learning_rate": 9.998360769362795e-06, "loss": 0.0075, "step": 19680 }, { "epoch": 0.3221794976683302, "grad_norm": 0.7446482181549072, "learning_rate": 9.998336304928872e-06, "loss": 0.0079, "step": 19690 }, { "epoch": 0.32234312361940604, "grad_norm": 0.3214438855648041, "learning_rate": 9.998311659317269e-06, "loss": 0.0047, "step": 19700 }, { "epoch": 0.32250674957048187, "grad_norm": 0.2506428062915802, "learning_rate": 9.998286832528882e-06, "loss": 0.0038, "step": 19710 }, { "epoch": 0.3226703755215577, "grad_norm": 0.12644913792610168, "learning_rate": 9.99826182456461e-06, "loss": 0.0056, "step": 19720 }, { "epoch": 0.3228340014726336, "grad_norm": 0.27583956718444824, "learning_rate": 9.99823663542536e-06, "loss": 0.0042, "step": 19730 }, { "epoch": 0.3229976274237094, "grad_norm": 0.22835558652877808, "learning_rate": 9.998211265112046e-06, "loss": 0.0065, "step": 19740 }, { "epoch": 0.32316125337478524, "grad_norm": 0.2603740692138672, "learning_rate": 9.998185713625586e-06, "loss": 0.0085, "step": 19750 }, { "epoch": 0.32332487932586107, "grad_norm": 0.20266957581043243, "learning_rate": 9.998159980966906e-06, "loss": 0.0048, "step": 19760 }, { "epoch": 0.3234885052769369, "grad_norm": 0.35531818866729736, "learning_rate": 9.998134067136939e-06, "loss": 0.0063, "step": 19770 }, { "epoch": 0.3236521312280128, "grad_norm": 0.21686922013759613, "learning_rate": 9.998107972136626e-06, "loss": 0.0032, "step": 19780 }, { "epoch": 0.3238157571790886, "grad_norm": 0.5397780537605286, "learning_rate": 9.998081695966913e-06, "loss": 0.0062, "step": 19790 }, { "epoch": 0.32397938313016444, "grad_norm": 0.5007716417312622, "learning_rate": 9.998055238628748e-06, "loss": 0.0084, "step": 19800 }, { "epoch": 0.3241430090812403, "grad_norm": 0.40624916553497314, "learning_rate": 9.998028600123095e-06, "loss": 0.006, "step": 19810 }, { "epoch": 0.3243066350323161, "grad_norm": 0.17299938201904297, "learning_rate": 9.998001780450917e-06, "loss": 0.0058, "step": 19820 }, { "epoch": 0.324470260983392, "grad_norm": 0.2824023365974426, "learning_rate": 9.997974779613186e-06, "loss": 0.0062, "step": 19830 }, { "epoch": 0.3246338869344678, "grad_norm": 0.43262213468551636, "learning_rate": 9.997947597610884e-06, "loss": 0.0061, "step": 19840 }, { "epoch": 0.32479751288554365, "grad_norm": 0.25371748208999634, "learning_rate": 9.99792023444499e-06, "loss": 0.0054, "step": 19850 }, { "epoch": 0.3249611388366195, "grad_norm": 0.2944333255290985, "learning_rate": 9.997892690116503e-06, "loss": 0.0069, "step": 19860 }, { "epoch": 0.3251247647876953, "grad_norm": 0.4182451665401459, "learning_rate": 9.997864964626417e-06, "loss": 0.0067, "step": 19870 }, { "epoch": 0.3252883907387712, "grad_norm": 0.3526301681995392, "learning_rate": 9.997837057975738e-06, "loss": 0.009, "step": 19880 }, { "epoch": 0.325452016689847, "grad_norm": 0.29576632380485535, "learning_rate": 9.99780897016548e-06, "loss": 0.0069, "step": 19890 }, { "epoch": 0.32561564264092285, "grad_norm": 0.5171969532966614, "learning_rate": 9.997780701196655e-06, "loss": 0.0081, "step": 19900 }, { "epoch": 0.3257792685919987, "grad_norm": 0.7963478565216064, "learning_rate": 9.997752251070294e-06, "loss": 0.0087, "step": 19910 }, { "epoch": 0.3259428945430745, "grad_norm": 0.32467925548553467, "learning_rate": 9.997723619787425e-06, "loss": 0.0064, "step": 19920 }, { "epoch": 0.3261065204941504, "grad_norm": 0.5840205550193787, "learning_rate": 9.997694807349085e-06, "loss": 0.0064, "step": 19930 }, { "epoch": 0.32627014644522623, "grad_norm": 0.5618772506713867, "learning_rate": 9.997665813756323e-06, "loss": 0.0054, "step": 19940 }, { "epoch": 0.32643377239630206, "grad_norm": 0.28135111927986145, "learning_rate": 9.997636639010184e-06, "loss": 0.0079, "step": 19950 }, { "epoch": 0.3265973983473779, "grad_norm": 0.5040724873542786, "learning_rate": 9.99760728311173e-06, "loss": 0.0053, "step": 19960 }, { "epoch": 0.3267610242984537, "grad_norm": 0.36021214723587036, "learning_rate": 9.997577746062022e-06, "loss": 0.0077, "step": 19970 }, { "epoch": 0.3269246502495296, "grad_norm": 0.25864776968955994, "learning_rate": 9.997548027862132e-06, "loss": 0.0051, "step": 19980 }, { "epoch": 0.32708827620060543, "grad_norm": 0.38222116231918335, "learning_rate": 9.99751812851314e-06, "loss": 0.006, "step": 19990 }, { "epoch": 0.32725190215168126, "grad_norm": 0.4257170557975769, "learning_rate": 9.997488048016124e-06, "loss": 0.0099, "step": 20000 }, { "epoch": 0.3274155281027571, "grad_norm": 0.21867577731609344, "learning_rate": 9.997457786372179e-06, "loss": 0.0056, "step": 20010 }, { "epoch": 0.3275791540538329, "grad_norm": 0.4962421953678131, "learning_rate": 9.9974273435824e-06, "loss": 0.0066, "step": 20020 }, { "epoch": 0.32774278000490875, "grad_norm": 0.2248992621898651, "learning_rate": 9.997396719647889e-06, "loss": 0.0041, "step": 20030 }, { "epoch": 0.32790640595598464, "grad_norm": 0.33530429005622864, "learning_rate": 9.997365914569762e-06, "loss": 0.0076, "step": 20040 }, { "epoch": 0.32807003190706047, "grad_norm": 0.3895592987537384, "learning_rate": 9.997334928349128e-06, "loss": 0.0071, "step": 20050 }, { "epoch": 0.3282336578581363, "grad_norm": 0.5960682034492493, "learning_rate": 9.997303760987114e-06, "loss": 0.0094, "step": 20060 }, { "epoch": 0.3283972838092121, "grad_norm": 0.24708130955696106, "learning_rate": 9.99727241248485e-06, "loss": 0.0079, "step": 20070 }, { "epoch": 0.32856090976028796, "grad_norm": 0.21781188249588013, "learning_rate": 9.997240882843472e-06, "loss": 0.0036, "step": 20080 }, { "epoch": 0.32872453571136384, "grad_norm": 0.6580252647399902, "learning_rate": 9.997209172064123e-06, "loss": 0.0105, "step": 20090 }, { "epoch": 0.32888816166243967, "grad_norm": 0.22322967648506165, "learning_rate": 9.99717728014795e-06, "loss": 0.0059, "step": 20100 }, { "epoch": 0.3290517876135155, "grad_norm": 0.34823718667030334, "learning_rate": 9.997145207096113e-06, "loss": 0.0062, "step": 20110 }, { "epoch": 0.32921541356459133, "grad_norm": 0.4263283312320709, "learning_rate": 9.997112952909771e-06, "loss": 0.0089, "step": 20120 }, { "epoch": 0.32937903951566716, "grad_norm": 0.10307332128286362, "learning_rate": 9.997080517590096e-06, "loss": 0.0089, "step": 20130 }, { "epoch": 0.32954266546674305, "grad_norm": 0.5239238739013672, "learning_rate": 9.997047901138262e-06, "loss": 0.0079, "step": 20140 }, { "epoch": 0.3297062914178189, "grad_norm": 0.4170688986778259, "learning_rate": 9.997015103555452e-06, "loss": 0.0061, "step": 20150 }, { "epoch": 0.3298699173688947, "grad_norm": 0.13897143304347992, "learning_rate": 9.996982124842853e-06, "loss": 0.0083, "step": 20160 }, { "epoch": 0.33003354331997053, "grad_norm": 0.5043318867683411, "learning_rate": 9.996948965001662e-06, "loss": 0.0119, "step": 20170 }, { "epoch": 0.33019716927104636, "grad_norm": 0.4660276174545288, "learning_rate": 9.996915624033082e-06, "loss": 0.0054, "step": 20180 }, { "epoch": 0.33036079522212225, "grad_norm": 0.17434312403202057, "learning_rate": 9.996882101938321e-06, "loss": 0.0068, "step": 20190 }, { "epoch": 0.3305244211731981, "grad_norm": 0.5469229221343994, "learning_rate": 9.996848398718593e-06, "loss": 0.0064, "step": 20200 }, { "epoch": 0.3306880471242739, "grad_norm": 0.2864542007446289, "learning_rate": 9.99681451437512e-06, "loss": 0.0081, "step": 20210 }, { "epoch": 0.33085167307534974, "grad_norm": 0.4709395170211792, "learning_rate": 9.99678044890913e-06, "loss": 0.0094, "step": 20220 }, { "epoch": 0.33101529902642557, "grad_norm": 0.5131140351295471, "learning_rate": 9.996746202321857e-06, "loss": 0.0057, "step": 20230 }, { "epoch": 0.33117892497750145, "grad_norm": 0.3796265125274658, "learning_rate": 9.996711774614545e-06, "loss": 0.0037, "step": 20240 }, { "epoch": 0.3313425509285773, "grad_norm": 0.4387819766998291, "learning_rate": 9.996677165788442e-06, "loss": 0.0075, "step": 20250 }, { "epoch": 0.3315061768796531, "grad_norm": 0.16999395191669464, "learning_rate": 9.996642375844799e-06, "loss": 0.0061, "step": 20260 }, { "epoch": 0.33166980283072894, "grad_norm": 0.3768167495727539, "learning_rate": 9.996607404784878e-06, "loss": 0.0044, "step": 20270 }, { "epoch": 0.3318334287818048, "grad_norm": 0.2779279053211212, "learning_rate": 9.996572252609948e-06, "loss": 0.0062, "step": 20280 }, { "epoch": 0.33199705473288066, "grad_norm": 0.2242118865251541, "learning_rate": 9.996536919321285e-06, "loss": 0.0053, "step": 20290 }, { "epoch": 0.3321606806839565, "grad_norm": 0.17876176536083221, "learning_rate": 9.996501404920166e-06, "loss": 0.0091, "step": 20300 }, { "epoch": 0.3323243066350323, "grad_norm": 0.3561529219150543, "learning_rate": 9.99646570940788e-06, "loss": 0.0063, "step": 20310 }, { "epoch": 0.33248793258610815, "grad_norm": 0.47531288862228394, "learning_rate": 9.996429832785722e-06, "loss": 0.0042, "step": 20320 }, { "epoch": 0.332651558537184, "grad_norm": 0.602146565914154, "learning_rate": 9.996393775054988e-06, "loss": 0.0073, "step": 20330 }, { "epoch": 0.33281518448825986, "grad_norm": 0.31655630469322205, "learning_rate": 9.996357536216991e-06, "loss": 0.0076, "step": 20340 }, { "epoch": 0.3329788104393357, "grad_norm": 0.3042921721935272, "learning_rate": 9.99632111627304e-06, "loss": 0.0048, "step": 20350 }, { "epoch": 0.3331424363904115, "grad_norm": 0.14022202789783478, "learning_rate": 9.996284515224459e-06, "loss": 0.0061, "step": 20360 }, { "epoch": 0.33330606234148735, "grad_norm": 0.5969071388244629, "learning_rate": 9.99624773307257e-06, "loss": 0.0057, "step": 20370 }, { "epoch": 0.3334696882925632, "grad_norm": 0.9815368056297302, "learning_rate": 9.996210769818711e-06, "loss": 0.006, "step": 20380 }, { "epoch": 0.33363331424363907, "grad_norm": 0.11673838645219803, "learning_rate": 9.996173625464218e-06, "loss": 0.0066, "step": 20390 }, { "epoch": 0.3337969401947149, "grad_norm": 0.23781123757362366, "learning_rate": 9.996136300010441e-06, "loss": 0.0055, "step": 20400 }, { "epoch": 0.3339605661457907, "grad_norm": 0.20059452950954437, "learning_rate": 9.99609879345873e-06, "loss": 0.0058, "step": 20410 }, { "epoch": 0.33412419209686656, "grad_norm": 0.28475067019462585, "learning_rate": 9.996061105810445e-06, "loss": 0.0085, "step": 20420 }, { "epoch": 0.3342878180479424, "grad_norm": 0.27808523178100586, "learning_rate": 9.996023237066953e-06, "loss": 0.0081, "step": 20430 }, { "epoch": 0.33445144399901827, "grad_norm": 0.37142521142959595, "learning_rate": 9.995985187229626e-06, "loss": 0.006, "step": 20440 }, { "epoch": 0.3346150699500941, "grad_norm": 0.5273996591567993, "learning_rate": 9.995946956299845e-06, "loss": 0.0059, "step": 20450 }, { "epoch": 0.33477869590116993, "grad_norm": 0.022713884711265564, "learning_rate": 9.995908544278993e-06, "loss": 0.0045, "step": 20460 }, { "epoch": 0.33494232185224576, "grad_norm": 0.248569056391716, "learning_rate": 9.995869951168462e-06, "loss": 0.0047, "step": 20470 }, { "epoch": 0.3351059478033216, "grad_norm": 0.43785926699638367, "learning_rate": 9.995831176969655e-06, "loss": 0.0132, "step": 20480 }, { "epoch": 0.3352695737543974, "grad_norm": 0.6996095776557922, "learning_rate": 9.995792221683975e-06, "loss": 0.0059, "step": 20490 }, { "epoch": 0.3354331997054733, "grad_norm": 0.5188438892364502, "learning_rate": 9.995753085312833e-06, "loss": 0.0072, "step": 20500 }, { "epoch": 0.33559682565654914, "grad_norm": 0.3539886772632599, "learning_rate": 9.99571376785765e-06, "loss": 0.0067, "step": 20510 }, { "epoch": 0.33576045160762497, "grad_norm": 0.29389485716819763, "learning_rate": 9.995674269319849e-06, "loss": 0.0072, "step": 20520 }, { "epoch": 0.3359240775587008, "grad_norm": 0.24048228561878204, "learning_rate": 9.995634589700861e-06, "loss": 0.0101, "step": 20530 }, { "epoch": 0.3360877035097766, "grad_norm": 0.39865291118621826, "learning_rate": 9.995594729002126e-06, "loss": 0.0079, "step": 20540 }, { "epoch": 0.3362513294608525, "grad_norm": 0.44991105794906616, "learning_rate": 9.995554687225092e-06, "loss": 0.0064, "step": 20550 }, { "epoch": 0.33641495541192834, "grad_norm": 0.279826283454895, "learning_rate": 9.995514464371203e-06, "loss": 0.0053, "step": 20560 }, { "epoch": 0.33657858136300417, "grad_norm": 0.1260901540517807, "learning_rate": 9.995474060441923e-06, "loss": 0.0047, "step": 20570 }, { "epoch": 0.33674220731408, "grad_norm": 0.30685099959373474, "learning_rate": 9.995433475438715e-06, "loss": 0.0077, "step": 20580 }, { "epoch": 0.33690583326515583, "grad_norm": 0.41488710045814514, "learning_rate": 9.99539270936305e-06, "loss": 0.0062, "step": 20590 }, { "epoch": 0.3370694592162317, "grad_norm": 3.008312225341797, "learning_rate": 9.995351762216404e-06, "loss": 0.0066, "step": 20600 }, { "epoch": 0.33723308516730754, "grad_norm": 0.38788747787475586, "learning_rate": 9.995310634000264e-06, "loss": 0.0053, "step": 20610 }, { "epoch": 0.3373967111183834, "grad_norm": 0.19717708230018616, "learning_rate": 9.995269324716119e-06, "loss": 0.0048, "step": 20620 }, { "epoch": 0.3375603370694592, "grad_norm": 0.19570958614349365, "learning_rate": 9.995227834365465e-06, "loss": 0.0048, "step": 20630 }, { "epoch": 0.33772396302053503, "grad_norm": 0.23481355607509613, "learning_rate": 9.99518616294981e-06, "loss": 0.0054, "step": 20640 }, { "epoch": 0.3378875889716109, "grad_norm": 0.26487621665000916, "learning_rate": 9.995144310470661e-06, "loss": 0.0076, "step": 20650 }, { "epoch": 0.33805121492268675, "grad_norm": 0.2517988085746765, "learning_rate": 9.995102276929537e-06, "loss": 0.0042, "step": 20660 }, { "epoch": 0.3382148408737626, "grad_norm": 0.21403658390045166, "learning_rate": 9.995060062327961e-06, "loss": 0.0047, "step": 20670 }, { "epoch": 0.3383784668248384, "grad_norm": 0.28809064626693726, "learning_rate": 9.99501766666746e-06, "loss": 0.0081, "step": 20680 }, { "epoch": 0.33854209277591424, "grad_norm": 0.2755778729915619, "learning_rate": 9.99497508994958e-06, "loss": 0.0049, "step": 20690 }, { "epoch": 0.3387057187269901, "grad_norm": 0.34083205461502075, "learning_rate": 9.994932332175853e-06, "loss": 0.0076, "step": 20700 }, { "epoch": 0.33886934467806595, "grad_norm": 0.29603058099746704, "learning_rate": 9.994889393347835e-06, "loss": 0.0058, "step": 20710 }, { "epoch": 0.3390329706291418, "grad_norm": 0.0946914330124855, "learning_rate": 9.994846273467083e-06, "loss": 0.0047, "step": 20720 }, { "epoch": 0.3391965965802176, "grad_norm": 0.6166210770606995, "learning_rate": 9.994802972535158e-06, "loss": 0.008, "step": 20730 }, { "epoch": 0.33936022253129344, "grad_norm": 0.16732168197631836, "learning_rate": 9.994759490553626e-06, "loss": 0.0064, "step": 20740 }, { "epoch": 0.33952384848236933, "grad_norm": 0.3816123306751251, "learning_rate": 9.994715827524073e-06, "loss": 0.0051, "step": 20750 }, { "epoch": 0.33968747443344516, "grad_norm": 0.5703874826431274, "learning_rate": 9.994671983448073e-06, "loss": 0.0059, "step": 20760 }, { "epoch": 0.339851100384521, "grad_norm": 0.16485288739204407, "learning_rate": 9.994627958327216e-06, "loss": 0.005, "step": 20770 }, { "epoch": 0.3400147263355968, "grad_norm": 0.29808759689331055, "learning_rate": 9.994583752163103e-06, "loss": 0.0051, "step": 20780 }, { "epoch": 0.34017835228667265, "grad_norm": 0.2384519875049591, "learning_rate": 9.994539364957333e-06, "loss": 0.0053, "step": 20790 }, { "epoch": 0.34034197823774853, "grad_norm": 0.5800129771232605, "learning_rate": 9.994494796711512e-06, "loss": 0.0085, "step": 20800 }, { "epoch": 0.34050560418882436, "grad_norm": 0.1808997392654419, "learning_rate": 9.99445004742726e-06, "loss": 0.0044, "step": 20810 }, { "epoch": 0.3406692301399002, "grad_norm": 0.4030112326145172, "learning_rate": 9.9944051171062e-06, "loss": 0.0064, "step": 20820 }, { "epoch": 0.340832856090976, "grad_norm": 0.1567230373620987, "learning_rate": 9.994360005749955e-06, "loss": 0.0066, "step": 20830 }, { "epoch": 0.34099648204205185, "grad_norm": 0.2962917387485504, "learning_rate": 9.994314713360166e-06, "loss": 0.0063, "step": 20840 }, { "epoch": 0.34116010799312774, "grad_norm": 0.2686673700809479, "learning_rate": 9.99426923993847e-06, "loss": 0.0094, "step": 20850 }, { "epoch": 0.34132373394420357, "grad_norm": 0.2440519779920578, "learning_rate": 9.994223585486518e-06, "loss": 0.0084, "step": 20860 }, { "epoch": 0.3414873598952794, "grad_norm": 0.1828407347202301, "learning_rate": 9.994177750005966e-06, "loss": 0.0057, "step": 20870 }, { "epoch": 0.3416509858463552, "grad_norm": 0.21303662657737732, "learning_rate": 9.994131733498472e-06, "loss": 0.0041, "step": 20880 }, { "epoch": 0.34181461179743106, "grad_norm": 0.306277334690094, "learning_rate": 9.994085535965707e-06, "loss": 0.0063, "step": 20890 }, { "epoch": 0.3419782377485069, "grad_norm": 0.3458658754825592, "learning_rate": 9.994039157409343e-06, "loss": 0.013, "step": 20900 }, { "epoch": 0.34214186369958277, "grad_norm": 0.7181628346443176, "learning_rate": 9.993992597831064e-06, "loss": 0.009, "step": 20910 }, { "epoch": 0.3423054896506586, "grad_norm": 0.33908718824386597, "learning_rate": 9.993945857232555e-06, "loss": 0.0073, "step": 20920 }, { "epoch": 0.34246911560173443, "grad_norm": 0.23480235040187836, "learning_rate": 9.993898935615511e-06, "loss": 0.007, "step": 20930 }, { "epoch": 0.34263274155281026, "grad_norm": 0.3750567138195038, "learning_rate": 9.993851832981633e-06, "loss": 0.0061, "step": 20940 }, { "epoch": 0.3427963675038861, "grad_norm": 0.4308174252510071, "learning_rate": 9.993804549332628e-06, "loss": 0.0046, "step": 20950 }, { "epoch": 0.342959993454962, "grad_norm": 0.20649023354053497, "learning_rate": 9.993757084670213e-06, "loss": 0.0065, "step": 20960 }, { "epoch": 0.3431236194060378, "grad_norm": 0.5056235194206238, "learning_rate": 9.993709438996103e-06, "loss": 0.0121, "step": 20970 }, { "epoch": 0.34328724535711364, "grad_norm": 0.1302957534790039, "learning_rate": 9.993661612312029e-06, "loss": 0.0061, "step": 20980 }, { "epoch": 0.34345087130818946, "grad_norm": 0.4491141140460968, "learning_rate": 9.993613604619723e-06, "loss": 0.0055, "step": 20990 }, { "epoch": 0.3436144972592653, "grad_norm": 0.2363044023513794, "learning_rate": 9.993565415920927e-06, "loss": 0.004, "step": 21000 }, { "epoch": 0.3437781232103412, "grad_norm": 0.2518073320388794, "learning_rate": 9.993517046217385e-06, "loss": 0.0072, "step": 21010 }, { "epoch": 0.343941749161417, "grad_norm": 0.28679320216178894, "learning_rate": 9.993468495510853e-06, "loss": 0.0048, "step": 21020 }, { "epoch": 0.34410537511249284, "grad_norm": 0.43081966042518616, "learning_rate": 9.99341976380309e-06, "loss": 0.0049, "step": 21030 }, { "epoch": 0.34426900106356867, "grad_norm": 0.19454362988471985, "learning_rate": 9.993370851095859e-06, "loss": 0.0062, "step": 21040 }, { "epoch": 0.3444326270146445, "grad_norm": 0.31099480390548706, "learning_rate": 9.993321757390938e-06, "loss": 0.0048, "step": 21050 }, { "epoch": 0.3445962529657204, "grad_norm": 0.27064645290374756, "learning_rate": 9.993272482690104e-06, "loss": 0.0058, "step": 21060 }, { "epoch": 0.3447598789167962, "grad_norm": 0.41544073820114136, "learning_rate": 9.993223026995144e-06, "loss": 0.007, "step": 21070 }, { "epoch": 0.34492350486787204, "grad_norm": 0.2886449694633484, "learning_rate": 9.99317339030785e-06, "loss": 0.0042, "step": 21080 }, { "epoch": 0.3450871308189479, "grad_norm": 0.19265399873256683, "learning_rate": 9.993123572630021e-06, "loss": 0.0034, "step": 21090 }, { "epoch": 0.3452507567700237, "grad_norm": 0.1833256483078003, "learning_rate": 9.993073573963463e-06, "loss": 0.0082, "step": 21100 }, { "epoch": 0.3454143827210996, "grad_norm": 0.29618486762046814, "learning_rate": 9.993023394309988e-06, "loss": 0.0068, "step": 21110 }, { "epoch": 0.3455780086721754, "grad_norm": 0.4258694648742676, "learning_rate": 9.992973033671417e-06, "loss": 0.0069, "step": 21120 }, { "epoch": 0.34574163462325125, "grad_norm": 0.6530909538269043, "learning_rate": 9.992922492049573e-06, "loss": 0.0074, "step": 21130 }, { "epoch": 0.3459052605743271, "grad_norm": 0.5368636250495911, "learning_rate": 9.992871769446289e-06, "loss": 0.006, "step": 21140 }, { "epoch": 0.3460688865254029, "grad_norm": 0.30355000495910645, "learning_rate": 9.992820865863403e-06, "loss": 0.0061, "step": 21150 }, { "epoch": 0.3462325124764788, "grad_norm": 0.46395808458328247, "learning_rate": 9.99276978130276e-06, "loss": 0.005, "step": 21160 }, { "epoch": 0.3463961384275546, "grad_norm": 0.1760774552822113, "learning_rate": 9.992718515766213e-06, "loss": 0.0084, "step": 21170 }, { "epoch": 0.34655976437863045, "grad_norm": 0.3363729417324066, "learning_rate": 9.99266706925562e-06, "loss": 0.0094, "step": 21180 }, { "epoch": 0.3467233903297063, "grad_norm": 0.2082325965166092, "learning_rate": 9.992615441772845e-06, "loss": 0.0063, "step": 21190 }, { "epoch": 0.3468870162807821, "grad_norm": 0.3925575911998749, "learning_rate": 9.992563633319759e-06, "loss": 0.0047, "step": 21200 }, { "epoch": 0.347050642231858, "grad_norm": 0.4477984607219696, "learning_rate": 9.992511643898242e-06, "loss": 0.009, "step": 21210 }, { "epoch": 0.3472142681829338, "grad_norm": 0.23646071553230286, "learning_rate": 9.992459473510175e-06, "loss": 0.0055, "step": 21220 }, { "epoch": 0.34737789413400966, "grad_norm": 0.28470760583877563, "learning_rate": 9.992407122157452e-06, "loss": 0.0063, "step": 21230 }, { "epoch": 0.3475415200850855, "grad_norm": 0.717832088470459, "learning_rate": 9.99235458984197e-06, "loss": 0.0043, "step": 21240 }, { "epoch": 0.3477051460361613, "grad_norm": 0.2909640073776245, "learning_rate": 9.992301876565634e-06, "loss": 0.009, "step": 21250 }, { "epoch": 0.3478687719872372, "grad_norm": 0.531152606010437, "learning_rate": 9.992248982330352e-06, "loss": 0.0067, "step": 21260 }, { "epoch": 0.34803239793831303, "grad_norm": 0.4054419994354248, "learning_rate": 9.992195907138045e-06, "loss": 0.0059, "step": 21270 }, { "epoch": 0.34819602388938886, "grad_norm": 0.23385083675384521, "learning_rate": 9.992142650990633e-06, "loss": 0.0043, "step": 21280 }, { "epoch": 0.3483596498404647, "grad_norm": 0.4649423658847809, "learning_rate": 9.992089213890048e-06, "loss": 0.0044, "step": 21290 }, { "epoch": 0.3485232757915405, "grad_norm": 0.3326658308506012, "learning_rate": 9.992035595838228e-06, "loss": 0.0048, "step": 21300 }, { "epoch": 0.3486869017426164, "grad_norm": 0.19111086428165436, "learning_rate": 9.991981796837116e-06, "loss": 0.0068, "step": 21310 }, { "epoch": 0.34885052769369224, "grad_norm": 0.2604122459888458, "learning_rate": 9.99192781688866e-06, "loss": 0.0061, "step": 21320 }, { "epoch": 0.34901415364476807, "grad_norm": 0.06770627200603485, "learning_rate": 9.99187365599482e-06, "loss": 0.005, "step": 21330 }, { "epoch": 0.3491777795958439, "grad_norm": 0.396420955657959, "learning_rate": 9.991819314157558e-06, "loss": 0.009, "step": 21340 }, { "epoch": 0.3493414055469197, "grad_norm": 0.31943750381469727, "learning_rate": 9.991764791378842e-06, "loss": 0.0046, "step": 21350 }, { "epoch": 0.34950503149799556, "grad_norm": 0.11120828986167908, "learning_rate": 9.99171008766065e-06, "loss": 0.0074, "step": 21360 }, { "epoch": 0.34966865744907144, "grad_norm": 0.17532435059547424, "learning_rate": 9.991655203004966e-06, "loss": 0.0062, "step": 21370 }, { "epoch": 0.34983228340014727, "grad_norm": 0.14777524769306183, "learning_rate": 9.991600137413777e-06, "loss": 0.0051, "step": 21380 }, { "epoch": 0.3499959093512231, "grad_norm": 0.19834992289543152, "learning_rate": 9.991544890889079e-06, "loss": 0.0125, "step": 21390 }, { "epoch": 0.35015953530229893, "grad_norm": 0.25837865471839905, "learning_rate": 9.991489463432877e-06, "loss": 0.0057, "step": 21400 }, { "epoch": 0.35032316125337476, "grad_norm": 0.4817623198032379, "learning_rate": 9.991433855047177e-06, "loss": 0.0068, "step": 21410 }, { "epoch": 0.35048678720445064, "grad_norm": 0.3779519200325012, "learning_rate": 9.991378065733999e-06, "loss": 0.006, "step": 21420 }, { "epoch": 0.3506504131555265, "grad_norm": 0.20369569957256317, "learning_rate": 9.991322095495359e-06, "loss": 0.005, "step": 21430 }, { "epoch": 0.3508140391066023, "grad_norm": 0.20326143503189087, "learning_rate": 9.991265944333292e-06, "loss": 0.0034, "step": 21440 }, { "epoch": 0.35097766505767813, "grad_norm": 0.19558410346508026, "learning_rate": 9.99120961224983e-06, "loss": 0.0091, "step": 21450 }, { "epoch": 0.35114129100875396, "grad_norm": 0.4241901636123657, "learning_rate": 9.991153099247018e-06, "loss": 0.0092, "step": 21460 }, { "epoch": 0.35130491695982985, "grad_norm": 0.5111272931098938, "learning_rate": 9.9910964053269e-06, "loss": 0.0064, "step": 21470 }, { "epoch": 0.3514685429109057, "grad_norm": 0.446339875459671, "learning_rate": 9.99103953049153e-06, "loss": 0.0055, "step": 21480 }, { "epoch": 0.3516321688619815, "grad_norm": 0.2024042308330536, "learning_rate": 9.990982474742977e-06, "loss": 0.0081, "step": 21490 }, { "epoch": 0.35179579481305734, "grad_norm": 0.30922695994377136, "learning_rate": 9.990925238083304e-06, "loss": 0.0069, "step": 21500 }, { "epoch": 0.35195942076413317, "grad_norm": 0.420624315738678, "learning_rate": 9.990867820514584e-06, "loss": 0.0058, "step": 21510 }, { "epoch": 0.35212304671520905, "grad_norm": 0.307954341173172, "learning_rate": 9.990810222038904e-06, "loss": 0.0042, "step": 21520 }, { "epoch": 0.3522866726662849, "grad_norm": 0.3861580789089203, "learning_rate": 9.990752442658347e-06, "loss": 0.006, "step": 21530 }, { "epoch": 0.3524502986173607, "grad_norm": 0.11174075305461884, "learning_rate": 9.99069448237501e-06, "loss": 0.0062, "step": 21540 }, { "epoch": 0.35261392456843654, "grad_norm": 0.13128244876861572, "learning_rate": 9.99063634119099e-06, "loss": 0.0042, "step": 21550 }, { "epoch": 0.3527775505195124, "grad_norm": 0.2859407067298889, "learning_rate": 9.990578019108401e-06, "loss": 0.0053, "step": 21560 }, { "epoch": 0.35294117647058826, "grad_norm": 0.303383469581604, "learning_rate": 9.99051951612935e-06, "loss": 0.0053, "step": 21570 }, { "epoch": 0.3531048024216641, "grad_norm": 0.25243061780929565, "learning_rate": 9.990460832255964e-06, "loss": 0.0066, "step": 21580 }, { "epoch": 0.3532684283727399, "grad_norm": 0.44662171602249146, "learning_rate": 9.990401967490366e-06, "loss": 0.0053, "step": 21590 }, { "epoch": 0.35343205432381575, "grad_norm": 0.48392048478126526, "learning_rate": 9.990342921834691e-06, "loss": 0.0049, "step": 21600 }, { "epoch": 0.3535956802748916, "grad_norm": 0.21543946862220764, "learning_rate": 9.990283695291077e-06, "loss": 0.0071, "step": 21610 }, { "epoch": 0.35375930622596746, "grad_norm": 0.37745505571365356, "learning_rate": 9.990224287861676e-06, "loss": 0.0073, "step": 21620 }, { "epoch": 0.3539229321770433, "grad_norm": 0.23981940746307373, "learning_rate": 9.990164699548636e-06, "loss": 0.0066, "step": 21630 }, { "epoch": 0.3540865581281191, "grad_norm": 0.26420408487319946, "learning_rate": 9.990104930354118e-06, "loss": 0.0081, "step": 21640 }, { "epoch": 0.35425018407919495, "grad_norm": 0.33763372898101807, "learning_rate": 9.990044980280293e-06, "loss": 0.0052, "step": 21650 }, { "epoch": 0.3544138100302708, "grad_norm": 0.4525817930698395, "learning_rate": 9.989984849329329e-06, "loss": 0.0116, "step": 21660 }, { "epoch": 0.35457743598134667, "grad_norm": 0.2738310396671295, "learning_rate": 9.989924537503408e-06, "loss": 0.0082, "step": 21670 }, { "epoch": 0.3547410619324225, "grad_norm": 0.15360699594020844, "learning_rate": 9.989864044804715e-06, "loss": 0.0047, "step": 21680 }, { "epoch": 0.3549046878834983, "grad_norm": 0.31382203102111816, "learning_rate": 9.989803371235442e-06, "loss": 0.0116, "step": 21690 }, { "epoch": 0.35506831383457416, "grad_norm": 0.14262357354164124, "learning_rate": 9.989742516797791e-06, "loss": 0.0052, "step": 21700 }, { "epoch": 0.35523193978565, "grad_norm": 0.4709635376930237, "learning_rate": 9.989681481493966e-06, "loss": 0.0064, "step": 21710 }, { "epoch": 0.35539556573672587, "grad_norm": 0.29232409596443176, "learning_rate": 9.98962026532618e-06, "loss": 0.0058, "step": 21720 }, { "epoch": 0.3555591916878017, "grad_norm": 0.20618091523647308, "learning_rate": 9.98955886829665e-06, "loss": 0.0056, "step": 21730 }, { "epoch": 0.35572281763887753, "grad_norm": 0.2509346306324005, "learning_rate": 9.989497290407606e-06, "loss": 0.0063, "step": 21740 }, { "epoch": 0.35588644358995336, "grad_norm": 0.26989296078681946, "learning_rate": 9.989435531661275e-06, "loss": 0.0061, "step": 21750 }, { "epoch": 0.3560500695410292, "grad_norm": 0.3324601352214813, "learning_rate": 9.989373592059898e-06, "loss": 0.0053, "step": 21760 }, { "epoch": 0.356213695492105, "grad_norm": 0.335764616727829, "learning_rate": 9.98931147160572e-06, "loss": 0.0079, "step": 21770 }, { "epoch": 0.3563773214431809, "grad_norm": 0.5803649425506592, "learning_rate": 9.989249170300993e-06, "loss": 0.0082, "step": 21780 }, { "epoch": 0.35654094739425674, "grad_norm": 0.19377391040325165, "learning_rate": 9.989186688147977e-06, "loss": 0.0038, "step": 21790 }, { "epoch": 0.35670457334533257, "grad_norm": 0.5861106514930725, "learning_rate": 9.989124025148931e-06, "loss": 0.0101, "step": 21800 }, { "epoch": 0.3568681992964084, "grad_norm": 0.39570486545562744, "learning_rate": 9.989061181306132e-06, "loss": 0.005, "step": 21810 }, { "epoch": 0.3570318252474842, "grad_norm": 0.13977734744548798, "learning_rate": 9.988998156621857e-06, "loss": 0.0063, "step": 21820 }, { "epoch": 0.3571954511985601, "grad_norm": 0.2916661500930786, "learning_rate": 9.988934951098388e-06, "loss": 0.0057, "step": 21830 }, { "epoch": 0.35735907714963594, "grad_norm": 0.16173802316188812, "learning_rate": 9.988871564738017e-06, "loss": 0.0042, "step": 21840 }, { "epoch": 0.35752270310071177, "grad_norm": 0.21764524281024933, "learning_rate": 9.988807997543044e-06, "loss": 0.0065, "step": 21850 }, { "epoch": 0.3576863290517876, "grad_norm": 0.22217422723770142, "learning_rate": 9.988744249515769e-06, "loss": 0.0043, "step": 21860 }, { "epoch": 0.35784995500286343, "grad_norm": 0.2889345586299896, "learning_rate": 9.988680320658508e-06, "loss": 0.0057, "step": 21870 }, { "epoch": 0.3580135809539393, "grad_norm": 0.3199993073940277, "learning_rate": 9.988616210973574e-06, "loss": 0.0044, "step": 21880 }, { "epoch": 0.35817720690501514, "grad_norm": 0.41283565759658813, "learning_rate": 9.988551920463293e-06, "loss": 0.0061, "step": 21890 }, { "epoch": 0.358340832856091, "grad_norm": 0.6704068779945374, "learning_rate": 9.988487449129993e-06, "loss": 0.008, "step": 21900 }, { "epoch": 0.3585044588071668, "grad_norm": 0.3587813377380371, "learning_rate": 9.988422796976013e-06, "loss": 0.0071, "step": 21910 }, { "epoch": 0.35866808475824263, "grad_norm": 0.3461415767669678, "learning_rate": 9.988357964003696e-06, "loss": 0.0069, "step": 21920 }, { "epoch": 0.3588317107093185, "grad_norm": 0.28841766715049744, "learning_rate": 9.988292950215393e-06, "loss": 0.0078, "step": 21930 }, { "epoch": 0.35899533666039435, "grad_norm": 0.3538677990436554, "learning_rate": 9.988227755613458e-06, "loss": 0.0047, "step": 21940 }, { "epoch": 0.3591589626114702, "grad_norm": 0.3300904929637909, "learning_rate": 9.988162380200257e-06, "loss": 0.0074, "step": 21950 }, { "epoch": 0.359322588562546, "grad_norm": 0.06635846942663193, "learning_rate": 9.988096823978158e-06, "loss": 0.0039, "step": 21960 }, { "epoch": 0.35948621451362184, "grad_norm": 0.29696011543273926, "learning_rate": 9.988031086949536e-06, "loss": 0.0054, "step": 21970 }, { "epoch": 0.3596498404646977, "grad_norm": 0.15317226946353912, "learning_rate": 9.987965169116778e-06, "loss": 0.007, "step": 21980 }, { "epoch": 0.35981346641577355, "grad_norm": 0.2748468518257141, "learning_rate": 9.987899070482269e-06, "loss": 0.0056, "step": 21990 }, { "epoch": 0.3599770923668494, "grad_norm": 0.2048082798719406, "learning_rate": 9.987832791048408e-06, "loss": 0.0057, "step": 22000 }, { "epoch": 0.3601407183179252, "grad_norm": 0.1489567756652832, "learning_rate": 9.987766330817598e-06, "loss": 0.0041, "step": 22010 }, { "epoch": 0.36030434426900104, "grad_norm": 0.2870359420776367, "learning_rate": 9.987699689792244e-06, "loss": 0.006, "step": 22020 }, { "epoch": 0.3604679702200769, "grad_norm": 0.2506335973739624, "learning_rate": 9.987632867974764e-06, "loss": 0.0065, "step": 22030 }, { "epoch": 0.36063159617115276, "grad_norm": 0.19328466057777405, "learning_rate": 9.987565865367582e-06, "loss": 0.0075, "step": 22040 }, { "epoch": 0.3607952221222286, "grad_norm": 0.1783657819032669, "learning_rate": 9.987498681973121e-06, "loss": 0.004, "step": 22050 }, { "epoch": 0.3609588480733044, "grad_norm": 0.16120240092277527, "learning_rate": 9.987431317793823e-06, "loss": 0.0051, "step": 22060 }, { "epoch": 0.36112247402438025, "grad_norm": 0.4215061664581299, "learning_rate": 9.987363772832125e-06, "loss": 0.0081, "step": 22070 }, { "epoch": 0.36128609997545613, "grad_norm": 0.5700271725654602, "learning_rate": 9.987296047090477e-06, "loss": 0.0066, "step": 22080 }, { "epoch": 0.36144972592653196, "grad_norm": 0.21919545531272888, "learning_rate": 9.987228140571335e-06, "loss": 0.0053, "step": 22090 }, { "epoch": 0.3616133518776078, "grad_norm": 0.23127950727939606, "learning_rate": 9.987160053277158e-06, "loss": 0.0054, "step": 22100 }, { "epoch": 0.3617769778286836, "grad_norm": 0.20025378465652466, "learning_rate": 9.987091785210416e-06, "loss": 0.0057, "step": 22110 }, { "epoch": 0.36194060377975945, "grad_norm": 0.5790795087814331, "learning_rate": 9.987023336373584e-06, "loss": 0.0047, "step": 22120 }, { "epoch": 0.36210422973083534, "grad_norm": 0.22350969910621643, "learning_rate": 9.986954706769142e-06, "loss": 0.0073, "step": 22130 }, { "epoch": 0.36226785568191117, "grad_norm": 0.2985982298851013, "learning_rate": 9.986885896399577e-06, "loss": 0.0067, "step": 22140 }, { "epoch": 0.362431481632987, "grad_norm": 0.18830154836177826, "learning_rate": 9.986816905267385e-06, "loss": 0.0063, "step": 22150 }, { "epoch": 0.3625951075840628, "grad_norm": 0.07355014979839325, "learning_rate": 9.986747733375064e-06, "loss": 0.0069, "step": 22160 }, { "epoch": 0.36275873353513866, "grad_norm": 0.6007991433143616, "learning_rate": 9.986678380725123e-06, "loss": 0.0044, "step": 22170 }, { "epoch": 0.36292235948621454, "grad_norm": 0.20156943798065186, "learning_rate": 9.986608847320077e-06, "loss": 0.0073, "step": 22180 }, { "epoch": 0.36308598543729037, "grad_norm": 0.21956665813922882, "learning_rate": 9.986539133162444e-06, "loss": 0.0072, "step": 22190 }, { "epoch": 0.3632496113883662, "grad_norm": 0.3403819501399994, "learning_rate": 9.986469238254754e-06, "loss": 0.0062, "step": 22200 }, { "epoch": 0.36341323733944203, "grad_norm": 0.28590965270996094, "learning_rate": 9.986399162599538e-06, "loss": 0.0076, "step": 22210 }, { "epoch": 0.36357686329051786, "grad_norm": 0.311116099357605, "learning_rate": 9.986328906199337e-06, "loss": 0.0076, "step": 22220 }, { "epoch": 0.3637404892415937, "grad_norm": 0.3970174789428711, "learning_rate": 9.986258469056697e-06, "loss": 0.0088, "step": 22230 }, { "epoch": 0.3639041151926696, "grad_norm": 0.2976403534412384, "learning_rate": 9.986187851174171e-06, "loss": 0.0056, "step": 22240 }, { "epoch": 0.3640677411437454, "grad_norm": 0.23498351871967316, "learning_rate": 9.98611705255432e-06, "loss": 0.0073, "step": 22250 }, { "epoch": 0.36423136709482123, "grad_norm": 0.23971201479434967, "learning_rate": 9.986046073199711e-06, "loss": 0.006, "step": 22260 }, { "epoch": 0.36439499304589706, "grad_norm": 0.2019869089126587, "learning_rate": 9.985974913112914e-06, "loss": 0.0057, "step": 22270 }, { "epoch": 0.3645586189969729, "grad_norm": 0.30041128396987915, "learning_rate": 9.98590357229651e-06, "loss": 0.0053, "step": 22280 }, { "epoch": 0.3647222449480488, "grad_norm": 0.1984991729259491, "learning_rate": 9.985832050753085e-06, "loss": 0.0052, "step": 22290 }, { "epoch": 0.3648858708991246, "grad_norm": 0.467449814081192, "learning_rate": 9.985760348485232e-06, "loss": 0.01, "step": 22300 }, { "epoch": 0.36504949685020044, "grad_norm": 0.15146076679229736, "learning_rate": 9.985688465495549e-06, "loss": 0.0057, "step": 22310 }, { "epoch": 0.36521312280127627, "grad_norm": 0.1645674705505371, "learning_rate": 9.985616401786641e-06, "loss": 0.0056, "step": 22320 }, { "epoch": 0.3653767487523521, "grad_norm": 0.38499942421913147, "learning_rate": 9.985544157361122e-06, "loss": 0.0049, "step": 22330 }, { "epoch": 0.365540374703428, "grad_norm": 0.21839094161987305, "learning_rate": 9.985471732221609e-06, "loss": 0.0035, "step": 22340 }, { "epoch": 0.3657040006545038, "grad_norm": 0.023548418655991554, "learning_rate": 9.985399126370729e-06, "loss": 0.0083, "step": 22350 }, { "epoch": 0.36586762660557964, "grad_norm": 0.19656912982463837, "learning_rate": 9.985326339811113e-06, "loss": 0.0046, "step": 22360 }, { "epoch": 0.3660312525566555, "grad_norm": 0.4063679277896881, "learning_rate": 9.985253372545396e-06, "loss": 0.0063, "step": 22370 }, { "epoch": 0.3661948785077313, "grad_norm": 0.19815057516098022, "learning_rate": 9.985180224576228e-06, "loss": 0.0055, "step": 22380 }, { "epoch": 0.3663585044588072, "grad_norm": 0.4024091362953186, "learning_rate": 9.98510689590626e-06, "loss": 0.0056, "step": 22390 }, { "epoch": 0.366522130409883, "grad_norm": 0.12570717930793762, "learning_rate": 9.985033386538146e-06, "loss": 0.0037, "step": 22400 }, { "epoch": 0.36668575636095885, "grad_norm": 0.36101505160331726, "learning_rate": 9.984959696474555e-06, "loss": 0.0056, "step": 22410 }, { "epoch": 0.3668493823120347, "grad_norm": 0.08656579256057739, "learning_rate": 9.984885825718155e-06, "loss": 0.0053, "step": 22420 }, { "epoch": 0.3670130082631105, "grad_norm": 0.5284775495529175, "learning_rate": 9.984811774271624e-06, "loss": 0.0044, "step": 22430 }, { "epoch": 0.3671766342141864, "grad_norm": 0.09360700845718384, "learning_rate": 9.984737542137648e-06, "loss": 0.0066, "step": 22440 }, { "epoch": 0.3673402601652622, "grad_norm": 0.2975481152534485, "learning_rate": 9.984663129318917e-06, "loss": 0.0045, "step": 22450 }, { "epoch": 0.36750388611633805, "grad_norm": 0.3162071406841278, "learning_rate": 9.984588535818126e-06, "loss": 0.0063, "step": 22460 }, { "epoch": 0.3676675120674139, "grad_norm": 0.48850497603416443, "learning_rate": 9.984513761637982e-06, "loss": 0.0053, "step": 22470 }, { "epoch": 0.3678311380184897, "grad_norm": 0.25399038195610046, "learning_rate": 9.984438806781194e-06, "loss": 0.0078, "step": 22480 }, { "epoch": 0.3679947639695656, "grad_norm": 0.28231891989707947, "learning_rate": 9.98436367125048e-06, "loss": 0.0078, "step": 22490 }, { "epoch": 0.3681583899206414, "grad_norm": 0.21071882545948029, "learning_rate": 9.98428835504856e-06, "loss": 0.0041, "step": 22500 }, { "epoch": 0.36832201587171726, "grad_norm": 0.1944991797208786, "learning_rate": 9.984212858178169e-06, "loss": 0.009, "step": 22510 }, { "epoch": 0.3684856418227931, "grad_norm": 0.06897173821926117, "learning_rate": 9.984137180642039e-06, "loss": 0.0058, "step": 22520 }, { "epoch": 0.3686492677738689, "grad_norm": 0.17861177027225494, "learning_rate": 9.984061322442917e-06, "loss": 0.0046, "step": 22530 }, { "epoch": 0.3688128937249448, "grad_norm": 0.45208612084388733, "learning_rate": 9.983985283583551e-06, "loss": 0.0053, "step": 22540 }, { "epoch": 0.36897651967602063, "grad_norm": 0.21381516754627228, "learning_rate": 9.983909064066695e-06, "loss": 0.0067, "step": 22550 }, { "epoch": 0.36914014562709646, "grad_norm": 0.15785840153694153, "learning_rate": 9.983832663895115e-06, "loss": 0.0061, "step": 22560 }, { "epoch": 0.3693037715781723, "grad_norm": 0.13654188811779022, "learning_rate": 9.983756083071581e-06, "loss": 0.0057, "step": 22570 }, { "epoch": 0.3694673975292481, "grad_norm": 0.25045299530029297, "learning_rate": 9.983679321598865e-06, "loss": 0.0031, "step": 22580 }, { "epoch": 0.369631023480324, "grad_norm": 0.23947203159332275, "learning_rate": 9.983602379479752e-06, "loss": 0.0059, "step": 22590 }, { "epoch": 0.36979464943139984, "grad_norm": 0.36851584911346436, "learning_rate": 9.98352525671703e-06, "loss": 0.0065, "step": 22600 }, { "epoch": 0.36995827538247567, "grad_norm": 0.2738041877746582, "learning_rate": 9.983447953313497e-06, "loss": 0.0059, "step": 22610 }, { "epoch": 0.3701219013335515, "grad_norm": 0.39628249406814575, "learning_rate": 9.983370469271951e-06, "loss": 0.0069, "step": 22620 }, { "epoch": 0.3702855272846273, "grad_norm": 0.7322098612785339, "learning_rate": 9.983292804595205e-06, "loss": 0.0092, "step": 22630 }, { "epoch": 0.3704491532357032, "grad_norm": 0.2309613823890686, "learning_rate": 9.983214959286071e-06, "loss": 0.0056, "step": 22640 }, { "epoch": 0.37061277918677904, "grad_norm": 0.33896979689598083, "learning_rate": 9.983136933347371e-06, "loss": 0.0077, "step": 22650 }, { "epoch": 0.37077640513785487, "grad_norm": 0.38524144887924194, "learning_rate": 9.983058726781934e-06, "loss": 0.009, "step": 22660 }, { "epoch": 0.3709400310889307, "grad_norm": 0.39209043979644775, "learning_rate": 9.982980339592594e-06, "loss": 0.0109, "step": 22670 }, { "epoch": 0.37110365704000653, "grad_norm": 0.34785038232803345, "learning_rate": 9.982901771782195e-06, "loss": 0.0087, "step": 22680 }, { "epoch": 0.37126728299108236, "grad_norm": 0.19190365076065063, "learning_rate": 9.982823023353581e-06, "loss": 0.0056, "step": 22690 }, { "epoch": 0.37143090894215824, "grad_norm": 0.30775558948516846, "learning_rate": 9.982744094309608e-06, "loss": 0.0073, "step": 22700 }, { "epoch": 0.3715945348932341, "grad_norm": 0.2999928891658783, "learning_rate": 9.982664984653138e-06, "loss": 0.0045, "step": 22710 }, { "epoch": 0.3717581608443099, "grad_norm": 0.2771570682525635, "learning_rate": 9.982585694387036e-06, "loss": 0.0037, "step": 22720 }, { "epoch": 0.37192178679538573, "grad_norm": 0.18505041301250458, "learning_rate": 9.982506223514181e-06, "loss": 0.0053, "step": 22730 }, { "epoch": 0.37208541274646156, "grad_norm": 0.23630085587501526, "learning_rate": 9.982426572037449e-06, "loss": 0.0037, "step": 22740 }, { "epoch": 0.37224903869753745, "grad_norm": 0.25697746872901917, "learning_rate": 9.98234673995973e-06, "loss": 0.0055, "step": 22750 }, { "epoch": 0.3724126646486133, "grad_norm": 0.3935088515281677, "learning_rate": 9.982266727283915e-06, "loss": 0.0059, "step": 22760 }, { "epoch": 0.3725762905996891, "grad_norm": 0.2000584602355957, "learning_rate": 9.982186534012905e-06, "loss": 0.0037, "step": 22770 }, { "epoch": 0.37273991655076494, "grad_norm": 0.19533513486385345, "learning_rate": 9.982106160149609e-06, "loss": 0.0041, "step": 22780 }, { "epoch": 0.37290354250184077, "grad_norm": 0.21803849935531616, "learning_rate": 9.982025605696939e-06, "loss": 0.0059, "step": 22790 }, { "epoch": 0.37306716845291665, "grad_norm": 0.1725212186574936, "learning_rate": 9.981944870657813e-06, "loss": 0.0069, "step": 22800 }, { "epoch": 0.3732307944039925, "grad_norm": 0.23436790704727173, "learning_rate": 9.981863955035163e-06, "loss": 0.0056, "step": 22810 }, { "epoch": 0.3733944203550683, "grad_norm": 0.1467805802822113, "learning_rate": 9.981782858831914e-06, "loss": 0.0053, "step": 22820 }, { "epoch": 0.37355804630614414, "grad_norm": 0.2499193698167801, "learning_rate": 9.981701582051011e-06, "loss": 0.0051, "step": 22830 }, { "epoch": 0.37372167225722, "grad_norm": 0.265960156917572, "learning_rate": 9.9816201246954e-06, "loss": 0.0046, "step": 22840 }, { "epoch": 0.37388529820829586, "grad_norm": 0.3409392237663269, "learning_rate": 9.981538486768032e-06, "loss": 0.0052, "step": 22850 }, { "epoch": 0.3740489241593717, "grad_norm": 0.37110787630081177, "learning_rate": 9.981456668271867e-06, "loss": 0.0077, "step": 22860 }, { "epoch": 0.3742125501104475, "grad_norm": 0.2040826827287674, "learning_rate": 9.981374669209869e-06, "loss": 0.0057, "step": 22870 }, { "epoch": 0.37437617606152335, "grad_norm": 0.24345263838768005, "learning_rate": 9.981292489585013e-06, "loss": 0.0063, "step": 22880 }, { "epoch": 0.3745398020125992, "grad_norm": 0.34455573558807373, "learning_rate": 9.981210129400276e-06, "loss": 0.004, "step": 22890 }, { "epoch": 0.37470342796367506, "grad_norm": 0.1967410147190094, "learning_rate": 9.981127588658644e-06, "loss": 0.0047, "step": 22900 }, { "epoch": 0.3748670539147509, "grad_norm": 0.11044573038816452, "learning_rate": 9.981044867363107e-06, "loss": 0.0052, "step": 22910 }, { "epoch": 0.3750306798658267, "grad_norm": 0.146162748336792, "learning_rate": 9.98096196551667e-06, "loss": 0.0048, "step": 22920 }, { "epoch": 0.37519430581690255, "grad_norm": 0.05192472040653229, "learning_rate": 9.980878883122329e-06, "loss": 0.0036, "step": 22930 }, { "epoch": 0.3753579317679784, "grad_norm": 0.3095148205757141, "learning_rate": 9.9807956201831e-06, "loss": 0.006, "step": 22940 }, { "epoch": 0.37552155771905427, "grad_norm": 0.22653578221797943, "learning_rate": 9.980712176702002e-06, "loss": 0.0046, "step": 22950 }, { "epoch": 0.3756851836701301, "grad_norm": 0.3559248745441437, "learning_rate": 9.980628552682056e-06, "loss": 0.0043, "step": 22960 }, { "epoch": 0.3758488096212059, "grad_norm": 0.1362975537776947, "learning_rate": 9.9805447481263e-06, "loss": 0.0054, "step": 22970 }, { "epoch": 0.37601243557228176, "grad_norm": 0.2152615487575531, "learning_rate": 9.980460763037763e-06, "loss": 0.0036, "step": 22980 }, { "epoch": 0.3761760615233576, "grad_norm": 0.5250092148780823, "learning_rate": 9.980376597419496e-06, "loss": 0.0065, "step": 22990 }, { "epoch": 0.37633968747443347, "grad_norm": 0.24772511422634125, "learning_rate": 9.980292251274546e-06, "loss": 0.0071, "step": 23000 }, { "epoch": 0.3765033134255093, "grad_norm": 0.1705523431301117, "learning_rate": 9.980207724605972e-06, "loss": 0.0054, "step": 23010 }, { "epoch": 0.37666693937658513, "grad_norm": 0.07682956010103226, "learning_rate": 9.980123017416838e-06, "loss": 0.0054, "step": 23020 }, { "epoch": 0.37683056532766096, "grad_norm": 0.18210425972938538, "learning_rate": 9.980038129710214e-06, "loss": 0.0053, "step": 23030 }, { "epoch": 0.3769941912787368, "grad_norm": 0.1908276379108429, "learning_rate": 9.979953061489179e-06, "loss": 0.0076, "step": 23040 }, { "epoch": 0.3771578172298127, "grad_norm": 0.44596797227859497, "learning_rate": 9.979867812756811e-06, "loss": 0.0069, "step": 23050 }, { "epoch": 0.3773214431808885, "grad_norm": 0.29015466570854187, "learning_rate": 9.979782383516206e-06, "loss": 0.0058, "step": 23060 }, { "epoch": 0.37748506913196433, "grad_norm": 0.17226456105709076, "learning_rate": 9.979696773770458e-06, "loss": 0.0043, "step": 23070 }, { "epoch": 0.37764869508304016, "grad_norm": 0.06641542166471481, "learning_rate": 9.97961098352267e-06, "loss": 0.005, "step": 23080 }, { "epoch": 0.377812321034116, "grad_norm": 0.3365395963191986, "learning_rate": 9.97952501277595e-06, "loss": 0.0065, "step": 23090 }, { "epoch": 0.3779759469851918, "grad_norm": 0.799506425857544, "learning_rate": 9.97943886153342e-06, "loss": 0.0046, "step": 23100 }, { "epoch": 0.3781395729362677, "grad_norm": 0.18472743034362793, "learning_rate": 9.979352529798195e-06, "loss": 0.0045, "step": 23110 }, { "epoch": 0.37830319888734354, "grad_norm": 0.13883015513420105, "learning_rate": 9.979266017573411e-06, "loss": 0.0045, "step": 23120 }, { "epoch": 0.37846682483841937, "grad_norm": 0.3223952054977417, "learning_rate": 9.9791793248622e-06, "loss": 0.0072, "step": 23130 }, { "epoch": 0.3786304507894952, "grad_norm": 0.499763160943985, "learning_rate": 9.979092451667707e-06, "loss": 0.0054, "step": 23140 }, { "epoch": 0.37879407674057103, "grad_norm": 0.4987930655479431, "learning_rate": 9.979005397993078e-06, "loss": 0.0069, "step": 23150 }, { "epoch": 0.3789577026916469, "grad_norm": 0.1661977916955948, "learning_rate": 9.97891816384147e-06, "loss": 0.0052, "step": 23160 }, { "epoch": 0.37912132864272274, "grad_norm": 0.30234283208847046, "learning_rate": 9.978830749216046e-06, "loss": 0.0061, "step": 23170 }, { "epoch": 0.3792849545937986, "grad_norm": 0.4478006660938263, "learning_rate": 9.978743154119975e-06, "loss": 0.0052, "step": 23180 }, { "epoch": 0.3794485805448744, "grad_norm": 0.15942557156085968, "learning_rate": 9.978655378556429e-06, "loss": 0.0045, "step": 23190 }, { "epoch": 0.37961220649595023, "grad_norm": 0.37059086561203003, "learning_rate": 9.97856742252859e-06, "loss": 0.0047, "step": 23200 }, { "epoch": 0.3797758324470261, "grad_norm": 0.3322787284851074, "learning_rate": 9.97847928603965e-06, "loss": 0.0069, "step": 23210 }, { "epoch": 0.37993945839810195, "grad_norm": 0.4209263026714325, "learning_rate": 9.978390969092802e-06, "loss": 0.0065, "step": 23220 }, { "epoch": 0.3801030843491778, "grad_norm": 0.1979064792394638, "learning_rate": 9.978302471691245e-06, "loss": 0.0041, "step": 23230 }, { "epoch": 0.3802667103002536, "grad_norm": 0.20982396602630615, "learning_rate": 9.978213793838188e-06, "loss": 0.0053, "step": 23240 }, { "epoch": 0.38043033625132944, "grad_norm": 0.13042889535427094, "learning_rate": 9.978124935536848e-06, "loss": 0.0041, "step": 23250 }, { "epoch": 0.3805939622024053, "grad_norm": 0.20270025730133057, "learning_rate": 9.978035896790442e-06, "loss": 0.0034, "step": 23260 }, { "epoch": 0.38075758815348115, "grad_norm": 0.6419389247894287, "learning_rate": 9.977946677602198e-06, "loss": 0.0062, "step": 23270 }, { "epoch": 0.380921214104557, "grad_norm": 0.20245814323425293, "learning_rate": 9.977857277975353e-06, "loss": 0.0045, "step": 23280 }, { "epoch": 0.3810848400556328, "grad_norm": 0.3386939764022827, "learning_rate": 9.977767697913145e-06, "loss": 0.0067, "step": 23290 }, { "epoch": 0.38124846600670864, "grad_norm": 0.19438815116882324, "learning_rate": 9.977677937418821e-06, "loss": 0.0042, "step": 23300 }, { "epoch": 0.3814120919577845, "grad_norm": 0.3810281753540039, "learning_rate": 9.977587996495636e-06, "loss": 0.0068, "step": 23310 }, { "epoch": 0.38157571790886036, "grad_norm": 0.5735651254653931, "learning_rate": 9.97749787514685e-06, "loss": 0.0045, "step": 23320 }, { "epoch": 0.3817393438599362, "grad_norm": 0.16405542194843292, "learning_rate": 9.977407573375728e-06, "loss": 0.0049, "step": 23330 }, { "epoch": 0.381902969811012, "grad_norm": 0.1767434924840927, "learning_rate": 9.977317091185545e-06, "loss": 0.006, "step": 23340 }, { "epoch": 0.38206659576208785, "grad_norm": 0.19959715008735657, "learning_rate": 9.97722642857958e-06, "loss": 0.0052, "step": 23350 }, { "epoch": 0.38223022171316373, "grad_norm": 0.4158332347869873, "learning_rate": 9.97713558556112e-06, "loss": 0.0057, "step": 23360 }, { "epoch": 0.38239384766423956, "grad_norm": 0.1551690548658371, "learning_rate": 9.977044562133458e-06, "loss": 0.0044, "step": 23370 }, { "epoch": 0.3825574736153154, "grad_norm": 0.4347027838230133, "learning_rate": 9.97695335829989e-06, "loss": 0.0066, "step": 23380 }, { "epoch": 0.3827210995663912, "grad_norm": 0.33756881952285767, "learning_rate": 9.976861974063728e-06, "loss": 0.0052, "step": 23390 }, { "epoch": 0.38288472551746705, "grad_norm": 0.07451612502336502, "learning_rate": 9.976770409428278e-06, "loss": 0.0048, "step": 23400 }, { "epoch": 0.38304835146854294, "grad_norm": 0.5526024699211121, "learning_rate": 9.976678664396862e-06, "loss": 0.0052, "step": 23410 }, { "epoch": 0.38321197741961877, "grad_norm": 0.22901882231235504, "learning_rate": 9.976586738972806e-06, "loss": 0.0059, "step": 23420 }, { "epoch": 0.3833756033706946, "grad_norm": 0.39812806248664856, "learning_rate": 9.976494633159444e-06, "loss": 0.0084, "step": 23430 }, { "epoch": 0.3835392293217704, "grad_norm": 0.14845570921897888, "learning_rate": 9.97640234696011e-06, "loss": 0.0043, "step": 23440 }, { "epoch": 0.38370285527284625, "grad_norm": 0.15554490685462952, "learning_rate": 9.976309880378152e-06, "loss": 0.0077, "step": 23450 }, { "epoch": 0.38386648122392214, "grad_norm": 0.471924751996994, "learning_rate": 9.976217233416921e-06, "loss": 0.0048, "step": 23460 }, { "epoch": 0.38403010717499797, "grad_norm": 0.4548797309398651, "learning_rate": 9.976124406079775e-06, "loss": 0.0057, "step": 23470 }, { "epoch": 0.3841937331260738, "grad_norm": 0.4401860535144806, "learning_rate": 9.976031398370078e-06, "loss": 0.0066, "step": 23480 }, { "epoch": 0.38435735907714963, "grad_norm": 0.2766363024711609, "learning_rate": 9.975938210291206e-06, "loss": 0.0044, "step": 23490 }, { "epoch": 0.38452098502822546, "grad_norm": 0.366016685962677, "learning_rate": 9.97584484184653e-06, "loss": 0.0055, "step": 23500 }, { "epoch": 0.38468461097930134, "grad_norm": 0.4660361409187317, "learning_rate": 9.975751293039437e-06, "loss": 0.0045, "step": 23510 }, { "epoch": 0.3848482369303772, "grad_norm": 0.24821795523166656, "learning_rate": 9.975657563873322e-06, "loss": 0.0103, "step": 23520 }, { "epoch": 0.385011862881453, "grad_norm": 0.2739928066730499, "learning_rate": 9.975563654351577e-06, "loss": 0.0049, "step": 23530 }, { "epoch": 0.38517548883252883, "grad_norm": 0.04939445108175278, "learning_rate": 9.975469564477606e-06, "loss": 0.0038, "step": 23540 }, { "epoch": 0.38533911478360466, "grad_norm": 0.3742635250091553, "learning_rate": 9.975375294254824e-06, "loss": 0.0076, "step": 23550 }, { "epoch": 0.3855027407346805, "grad_norm": 0.25204190611839294, "learning_rate": 9.975280843686645e-06, "loss": 0.0044, "step": 23560 }, { "epoch": 0.3856663666857564, "grad_norm": 0.28611651062965393, "learning_rate": 9.975186212776493e-06, "loss": 0.0063, "step": 23570 }, { "epoch": 0.3858299926368322, "grad_norm": 0.2678607702255249, "learning_rate": 9.975091401527799e-06, "loss": 0.0048, "step": 23580 }, { "epoch": 0.38599361858790804, "grad_norm": 0.2967936396598816, "learning_rate": 9.974996409943998e-06, "loss": 0.0045, "step": 23590 }, { "epoch": 0.38615724453898387, "grad_norm": 0.41232213377952576, "learning_rate": 9.974901238028536e-06, "loss": 0.0087, "step": 23600 }, { "epoch": 0.3863208704900597, "grad_norm": 0.26939794421195984, "learning_rate": 9.974805885784859e-06, "loss": 0.006, "step": 23610 }, { "epoch": 0.3864844964411356, "grad_norm": 0.32232874631881714, "learning_rate": 9.974710353216427e-06, "loss": 0.0043, "step": 23620 }, { "epoch": 0.3866481223922114, "grad_norm": 0.22538581490516663, "learning_rate": 9.974614640326699e-06, "loss": 0.0039, "step": 23630 }, { "epoch": 0.38681174834328724, "grad_norm": 0.19105230271816254, "learning_rate": 9.974518747119148e-06, "loss": 0.0061, "step": 23640 }, { "epoch": 0.3869753742943631, "grad_norm": 0.16645918786525726, "learning_rate": 9.974422673597248e-06, "loss": 0.0046, "step": 23650 }, { "epoch": 0.3871390002454389, "grad_norm": 0.18031635880470276, "learning_rate": 9.974326419764482e-06, "loss": 0.0059, "step": 23660 }, { "epoch": 0.3873026261965148, "grad_norm": 0.26605668663978577, "learning_rate": 9.97422998562434e-06, "loss": 0.0056, "step": 23670 }, { "epoch": 0.3874662521475906, "grad_norm": 0.13481472432613373, "learning_rate": 9.974133371180314e-06, "loss": 0.008, "step": 23680 }, { "epoch": 0.38762987809866645, "grad_norm": 0.22045265138149261, "learning_rate": 9.974036576435909e-06, "loss": 0.006, "step": 23690 }, { "epoch": 0.3877935040497423, "grad_norm": 0.23106570541858673, "learning_rate": 9.973939601394634e-06, "loss": 0.0041, "step": 23700 }, { "epoch": 0.3879571300008181, "grad_norm": 0.09335854649543762, "learning_rate": 9.97384244606e-06, "loss": 0.0046, "step": 23710 }, { "epoch": 0.388120755951894, "grad_norm": 0.17957039177417755, "learning_rate": 9.973745110435536e-06, "loss": 0.0064, "step": 23720 }, { "epoch": 0.3882843819029698, "grad_norm": 0.1819697767496109, "learning_rate": 9.973647594524762e-06, "loss": 0.0051, "step": 23730 }, { "epoch": 0.38844800785404565, "grad_norm": 0.46709126234054565, "learning_rate": 9.973549898331219e-06, "loss": 0.0038, "step": 23740 }, { "epoch": 0.3886116338051215, "grad_norm": 0.2450115829706192, "learning_rate": 9.973452021858445e-06, "loss": 0.0047, "step": 23750 }, { "epoch": 0.3887752597561973, "grad_norm": 0.3151560127735138, "learning_rate": 9.973353965109987e-06, "loss": 0.0047, "step": 23760 }, { "epoch": 0.3889388857072732, "grad_norm": 0.26489320397377014, "learning_rate": 9.973255728089403e-06, "loss": 0.0075, "step": 23770 }, { "epoch": 0.389102511658349, "grad_norm": 0.4936962127685547, "learning_rate": 9.973157310800251e-06, "loss": 0.0054, "step": 23780 }, { "epoch": 0.38926613760942486, "grad_norm": 0.1364099383354187, "learning_rate": 9.9730587132461e-06, "loss": 0.0061, "step": 23790 }, { "epoch": 0.3894297635605007, "grad_norm": 0.43895190954208374, "learning_rate": 9.972959935430522e-06, "loss": 0.0076, "step": 23800 }, { "epoch": 0.3895933895115765, "grad_norm": 0.2118120640516281, "learning_rate": 9.972860977357099e-06, "loss": 0.0065, "step": 23810 }, { "epoch": 0.3897570154626524, "grad_norm": 0.28332260251045227, "learning_rate": 9.972761839029416e-06, "loss": 0.0094, "step": 23820 }, { "epoch": 0.38992064141372823, "grad_norm": 0.18957801163196564, "learning_rate": 9.972662520451071e-06, "loss": 0.0069, "step": 23830 }, { "epoch": 0.39008426736480406, "grad_norm": 0.10194365680217743, "learning_rate": 9.972563021625658e-06, "loss": 0.0042, "step": 23840 }, { "epoch": 0.3902478933158799, "grad_norm": 0.07737588882446289, "learning_rate": 9.972463342556789e-06, "loss": 0.0076, "step": 23850 }, { "epoch": 0.3904115192669557, "grad_norm": 0.07712378352880478, "learning_rate": 9.972363483248075e-06, "loss": 0.0055, "step": 23860 }, { "epoch": 0.3905751452180316, "grad_norm": 0.26023125648498535, "learning_rate": 9.972263443703134e-06, "loss": 0.0057, "step": 23870 }, { "epoch": 0.39073877116910744, "grad_norm": 0.6190078258514404, "learning_rate": 9.972163223925595e-06, "loss": 0.0078, "step": 23880 }, { "epoch": 0.39090239712018326, "grad_norm": 0.24326372146606445, "learning_rate": 9.972062823919088e-06, "loss": 0.0047, "step": 23890 }, { "epoch": 0.3910660230712591, "grad_norm": 0.29368191957473755, "learning_rate": 9.971962243687254e-06, "loss": 0.0052, "step": 23900 }, { "epoch": 0.3912296490223349, "grad_norm": 0.43202829360961914, "learning_rate": 9.971861483233738e-06, "loss": 0.0041, "step": 23910 }, { "epoch": 0.3913932749734108, "grad_norm": 0.415345698595047, "learning_rate": 9.971760542562195e-06, "loss": 0.0034, "step": 23920 }, { "epoch": 0.39155690092448664, "grad_norm": 0.40836766362190247, "learning_rate": 9.97165942167628e-06, "loss": 0.0032, "step": 23930 }, { "epoch": 0.39172052687556247, "grad_norm": 0.2623162567615509, "learning_rate": 9.971558120579661e-06, "loss": 0.0064, "step": 23940 }, { "epoch": 0.3918841528266383, "grad_norm": 0.18547464907169342, "learning_rate": 9.971456639276007e-06, "loss": 0.0069, "step": 23950 }, { "epoch": 0.39204777877771413, "grad_norm": 0.3880669176578522, "learning_rate": 9.971354977769e-06, "loss": 0.0045, "step": 23960 }, { "epoch": 0.39221140472878996, "grad_norm": 0.4460575580596924, "learning_rate": 9.971253136062323e-06, "loss": 0.0063, "step": 23970 }, { "epoch": 0.39237503067986584, "grad_norm": 0.12719711661338806, "learning_rate": 9.971151114159668e-06, "loss": 0.0044, "step": 23980 }, { "epoch": 0.3925386566309417, "grad_norm": 0.0935448482632637, "learning_rate": 9.971048912064733e-06, "loss": 0.0061, "step": 23990 }, { "epoch": 0.3927022825820175, "grad_norm": 0.3725396692752838, "learning_rate": 9.970946529781223e-06, "loss": 0.0055, "step": 24000 }, { "epoch": 0.39286590853309333, "grad_norm": 0.24368101358413696, "learning_rate": 9.970843967312848e-06, "loss": 0.0052, "step": 24010 }, { "epoch": 0.39302953448416916, "grad_norm": 0.21469877660274506, "learning_rate": 9.970741224663326e-06, "loss": 0.0045, "step": 24020 }, { "epoch": 0.39319316043524505, "grad_norm": 0.19806301593780518, "learning_rate": 9.970638301836382e-06, "loss": 0.006, "step": 24030 }, { "epoch": 0.3933567863863209, "grad_norm": 0.18765555322170258, "learning_rate": 9.970535198835748e-06, "loss": 0.0044, "step": 24040 }, { "epoch": 0.3935204123373967, "grad_norm": 0.29159626364707947, "learning_rate": 9.970431915665156e-06, "loss": 0.0058, "step": 24050 }, { "epoch": 0.39368403828847254, "grad_norm": 0.2907009422779083, "learning_rate": 9.970328452328354e-06, "loss": 0.0031, "step": 24060 }, { "epoch": 0.39384766423954837, "grad_norm": 0.38016605377197266, "learning_rate": 9.970224808829092e-06, "loss": 0.004, "step": 24070 }, { "epoch": 0.39401129019062425, "grad_norm": 0.2068358212709427, "learning_rate": 9.970120985171127e-06, "loss": 0.008, "step": 24080 }, { "epoch": 0.3941749161417001, "grad_norm": 0.09498483687639236, "learning_rate": 9.970016981358222e-06, "loss": 0.0063, "step": 24090 }, { "epoch": 0.3943385420927759, "grad_norm": 0.22719818353652954, "learning_rate": 9.969912797394144e-06, "loss": 0.0028, "step": 24100 }, { "epoch": 0.39450216804385174, "grad_norm": 0.8270195722579956, "learning_rate": 9.969808433282675e-06, "loss": 0.0053, "step": 24110 }, { "epoch": 0.39466579399492757, "grad_norm": 0.2571798861026764, "learning_rate": 9.969703889027593e-06, "loss": 0.0106, "step": 24120 }, { "epoch": 0.39482941994600346, "grad_norm": 0.36881402134895325, "learning_rate": 9.969599164632689e-06, "loss": 0.0044, "step": 24130 }, { "epoch": 0.3949930458970793, "grad_norm": 0.4917198717594147, "learning_rate": 9.969494260101762e-06, "loss": 0.0042, "step": 24140 }, { "epoch": 0.3951566718481551, "grad_norm": 0.4277592599391937, "learning_rate": 9.969389175438609e-06, "loss": 0.0054, "step": 24150 }, { "epoch": 0.39532029779923095, "grad_norm": 0.2754640281200409, "learning_rate": 9.969283910647042e-06, "loss": 0.0098, "step": 24160 }, { "epoch": 0.3954839237503068, "grad_norm": 0.31599128246307373, "learning_rate": 9.969178465730879e-06, "loss": 0.0046, "step": 24170 }, { "epoch": 0.39564754970138266, "grad_norm": 0.2879290282726288, "learning_rate": 9.969072840693938e-06, "loss": 0.0045, "step": 24180 }, { "epoch": 0.3958111756524585, "grad_norm": 0.09363383054733276, "learning_rate": 9.968967035540051e-06, "loss": 0.0044, "step": 24190 }, { "epoch": 0.3959748016035343, "grad_norm": 0.3829444646835327, "learning_rate": 9.968861050273049e-06, "loss": 0.0045, "step": 24200 }, { "epoch": 0.39613842755461015, "grad_norm": 0.2220722734928131, "learning_rate": 9.968754884896778e-06, "loss": 0.0063, "step": 24210 }, { "epoch": 0.396302053505686, "grad_norm": 0.3211401402950287, "learning_rate": 9.968648539415085e-06, "loss": 0.0061, "step": 24220 }, { "epoch": 0.39646567945676187, "grad_norm": 0.49462971091270447, "learning_rate": 9.968542013831824e-06, "loss": 0.0058, "step": 24230 }, { "epoch": 0.3966293054078377, "grad_norm": 0.1990286409854889, "learning_rate": 9.968435308150856e-06, "loss": 0.0055, "step": 24240 }, { "epoch": 0.3967929313589135, "grad_norm": 0.22975203394889832, "learning_rate": 9.96832842237605e-06, "loss": 0.0049, "step": 24250 }, { "epoch": 0.39695655730998936, "grad_norm": 0.3899569809436798, "learning_rate": 9.968221356511278e-06, "loss": 0.0072, "step": 24260 }, { "epoch": 0.3971201832610652, "grad_norm": 0.21084776520729065, "learning_rate": 9.968114110560424e-06, "loss": 0.0059, "step": 24270 }, { "epoch": 0.39728380921214107, "grad_norm": 0.42953652143478394, "learning_rate": 9.968006684527374e-06, "loss": 0.0042, "step": 24280 }, { "epoch": 0.3974474351632169, "grad_norm": 0.25477996468544006, "learning_rate": 9.967899078416022e-06, "loss": 0.0033, "step": 24290 }, { "epoch": 0.39761106111429273, "grad_norm": 0.36159929633140564, "learning_rate": 9.967791292230268e-06, "loss": 0.0052, "step": 24300 }, { "epoch": 0.39777468706536856, "grad_norm": 0.3326166570186615, "learning_rate": 9.96768332597402e-06, "loss": 0.0093, "step": 24310 }, { "epoch": 0.3979383130164444, "grad_norm": 0.5496561527252197, "learning_rate": 9.967575179651191e-06, "loss": 0.0054, "step": 24320 }, { "epoch": 0.3981019389675203, "grad_norm": 0.1711733490228653, "learning_rate": 9.967466853265701e-06, "loss": 0.0032, "step": 24330 }, { "epoch": 0.3982655649185961, "grad_norm": 0.25165197253227234, "learning_rate": 9.967358346821476e-06, "loss": 0.0056, "step": 24340 }, { "epoch": 0.39842919086967193, "grad_norm": 0.19688613712787628, "learning_rate": 9.96724966032245e-06, "loss": 0.0037, "step": 24350 }, { "epoch": 0.39859281682074776, "grad_norm": 0.25742360949516296, "learning_rate": 9.967140793772563e-06, "loss": 0.0061, "step": 24360 }, { "epoch": 0.3987564427718236, "grad_norm": 0.40701621770858765, "learning_rate": 9.96703174717576e-06, "loss": 0.0053, "step": 24370 }, { "epoch": 0.3989200687228995, "grad_norm": 0.3093240261077881, "learning_rate": 9.966922520535993e-06, "loss": 0.0049, "step": 24380 }, { "epoch": 0.3990836946739753, "grad_norm": 0.2780626714229584, "learning_rate": 9.966813113857223e-06, "loss": 0.0045, "step": 24390 }, { "epoch": 0.39924732062505114, "grad_norm": 0.3644535541534424, "learning_rate": 9.966703527143415e-06, "loss": 0.0045, "step": 24400 }, { "epoch": 0.39941094657612697, "grad_norm": 0.11512038111686707, "learning_rate": 9.966593760398542e-06, "loss": 0.0032, "step": 24410 }, { "epoch": 0.3995745725272028, "grad_norm": 0.19217462837696075, "learning_rate": 9.966483813626581e-06, "loss": 0.0057, "step": 24420 }, { "epoch": 0.39973819847827863, "grad_norm": 0.33124637603759766, "learning_rate": 9.966373686831518e-06, "loss": 0.0059, "step": 24430 }, { "epoch": 0.3999018244293545, "grad_norm": 0.19283248484134674, "learning_rate": 9.966263380017348e-06, "loss": 0.0028, "step": 24440 }, { "epoch": 0.40006545038043034, "grad_norm": 0.320963054895401, "learning_rate": 9.966152893188064e-06, "loss": 0.0058, "step": 24450 }, { "epoch": 0.4002290763315062, "grad_norm": 0.3013334572315216, "learning_rate": 9.966042226347674e-06, "loss": 0.0052, "step": 24460 }, { "epoch": 0.400392702282582, "grad_norm": 0.2365194410085678, "learning_rate": 9.965931379500188e-06, "loss": 0.0038, "step": 24470 }, { "epoch": 0.40055632823365783, "grad_norm": 0.1528308242559433, "learning_rate": 9.965820352649626e-06, "loss": 0.0048, "step": 24480 }, { "epoch": 0.4007199541847337, "grad_norm": 0.0778537392616272, "learning_rate": 9.965709145800011e-06, "loss": 0.0066, "step": 24490 }, { "epoch": 0.40088358013580955, "grad_norm": 0.26332536339759827, "learning_rate": 9.965597758955375e-06, "loss": 0.0044, "step": 24500 }, { "epoch": 0.4010472060868854, "grad_norm": 0.24666878581047058, "learning_rate": 9.965486192119754e-06, "loss": 0.0048, "step": 24510 }, { "epoch": 0.4012108320379612, "grad_norm": 0.3467451333999634, "learning_rate": 9.965374445297192e-06, "loss": 0.0046, "step": 24520 }, { "epoch": 0.40137445798903704, "grad_norm": 0.25522151589393616, "learning_rate": 9.965262518491742e-06, "loss": 0.0045, "step": 24530 }, { "epoch": 0.4015380839401129, "grad_norm": 0.4946388006210327, "learning_rate": 9.965150411707458e-06, "loss": 0.0092, "step": 24540 }, { "epoch": 0.40170170989118875, "grad_norm": 0.14260706305503845, "learning_rate": 9.965038124948406e-06, "loss": 0.0057, "step": 24550 }, { "epoch": 0.4018653358422646, "grad_norm": 0.5055813193321228, "learning_rate": 9.964925658218655e-06, "loss": 0.0062, "step": 24560 }, { "epoch": 0.4020289617933404, "grad_norm": 0.14866884052753448, "learning_rate": 9.964813011522281e-06, "loss": 0.0053, "step": 24570 }, { "epoch": 0.40219258774441624, "grad_norm": 0.313092976808548, "learning_rate": 9.964700184863368e-06, "loss": 0.0053, "step": 24580 }, { "epoch": 0.4023562136954921, "grad_norm": 0.40639904141426086, "learning_rate": 9.964587178246006e-06, "loss": 0.0047, "step": 24590 }, { "epoch": 0.40251983964656796, "grad_norm": 0.35632413625717163, "learning_rate": 9.964473991674291e-06, "loss": 0.0061, "step": 24600 }, { "epoch": 0.4026834655976438, "grad_norm": 0.12481741607189178, "learning_rate": 9.964360625152326e-06, "loss": 0.0048, "step": 24610 }, { "epoch": 0.4028470915487196, "grad_norm": 0.10687965154647827, "learning_rate": 9.96424707868422e-06, "loss": 0.0022, "step": 24620 }, { "epoch": 0.40301071749979545, "grad_norm": 0.19886836409568787, "learning_rate": 9.964133352274086e-06, "loss": 0.0041, "step": 24630 }, { "epoch": 0.40317434345087133, "grad_norm": 0.3724556267261505, "learning_rate": 9.964019445926052e-06, "loss": 0.0077, "step": 24640 }, { "epoch": 0.40333796940194716, "grad_norm": 0.1827770471572876, "learning_rate": 9.963905359644243e-06, "loss": 0.0054, "step": 24650 }, { "epoch": 0.403501595353023, "grad_norm": 0.6065165400505066, "learning_rate": 9.963791093432794e-06, "loss": 0.0051, "step": 24660 }, { "epoch": 0.4036652213040988, "grad_norm": 0.18687261641025543, "learning_rate": 9.96367664729585e-06, "loss": 0.007, "step": 24670 }, { "epoch": 0.40382884725517465, "grad_norm": 0.3484671413898468, "learning_rate": 9.963562021237555e-06, "loss": 0.0044, "step": 24680 }, { "epoch": 0.40399247320625054, "grad_norm": 0.32855385541915894, "learning_rate": 9.963447215262067e-06, "loss": 0.0062, "step": 24690 }, { "epoch": 0.40415609915732637, "grad_norm": 0.22319531440734863, "learning_rate": 9.963332229373548e-06, "loss": 0.0061, "step": 24700 }, { "epoch": 0.4043197251084022, "grad_norm": 0.43020373582839966, "learning_rate": 9.963217063576165e-06, "loss": 0.0049, "step": 24710 }, { "epoch": 0.404483351059478, "grad_norm": 0.2594371438026428, "learning_rate": 9.963101717874091e-06, "loss": 0.0048, "step": 24720 }, { "epoch": 0.40464697701055385, "grad_norm": 0.2644640803337097, "learning_rate": 9.962986192271508e-06, "loss": 0.0044, "step": 24730 }, { "epoch": 0.40481060296162974, "grad_norm": 0.17103584110736847, "learning_rate": 9.962870486772605e-06, "loss": 0.0061, "step": 24740 }, { "epoch": 0.40497422891270557, "grad_norm": 0.2089265137910843, "learning_rate": 9.962754601381574e-06, "loss": 0.0056, "step": 24750 }, { "epoch": 0.4051378548637814, "grad_norm": 0.34814906120300293, "learning_rate": 9.962638536102617e-06, "loss": 0.0057, "step": 24760 }, { "epoch": 0.40530148081485723, "grad_norm": 0.27355480194091797, "learning_rate": 9.96252229093994e-06, "loss": 0.004, "step": 24770 }, { "epoch": 0.40546510676593306, "grad_norm": 0.19638685882091522, "learning_rate": 9.962405865897757e-06, "loss": 0.0052, "step": 24780 }, { "epoch": 0.40562873271700894, "grad_norm": 0.36804428696632385, "learning_rate": 9.96228926098029e-06, "loss": 0.0063, "step": 24790 }, { "epoch": 0.4057923586680848, "grad_norm": 0.19375556707382202, "learning_rate": 9.962172476191761e-06, "loss": 0.0049, "step": 24800 }, { "epoch": 0.4059559846191606, "grad_norm": 0.33867430686950684, "learning_rate": 9.962055511536406e-06, "loss": 0.0035, "step": 24810 }, { "epoch": 0.40611961057023643, "grad_norm": 0.2779190242290497, "learning_rate": 9.961938367018467e-06, "loss": 0.0046, "step": 24820 }, { "epoch": 0.40628323652131226, "grad_norm": 0.4429919719696045, "learning_rate": 9.961821042642186e-06, "loss": 0.0073, "step": 24830 }, { "epoch": 0.40644686247238815, "grad_norm": 0.24504682421684265, "learning_rate": 9.961703538411817e-06, "loss": 0.0063, "step": 24840 }, { "epoch": 0.406610488423464, "grad_norm": 0.5340507626533508, "learning_rate": 9.961585854331622e-06, "loss": 0.004, "step": 24850 }, { "epoch": 0.4067741143745398, "grad_norm": 0.10961088538169861, "learning_rate": 9.961467990405865e-06, "loss": 0.0043, "step": 24860 }, { "epoch": 0.40693774032561564, "grad_norm": 0.18699420988559723, "learning_rate": 9.961349946638816e-06, "loss": 0.0054, "step": 24870 }, { "epoch": 0.40710136627669147, "grad_norm": 0.25895199179649353, "learning_rate": 9.961231723034756e-06, "loss": 0.0047, "step": 24880 }, { "epoch": 0.4072649922277673, "grad_norm": 0.19572466611862183, "learning_rate": 9.96111331959797e-06, "loss": 0.0068, "step": 24890 }, { "epoch": 0.4074286181788432, "grad_norm": 0.20218001306056976, "learning_rate": 9.960994736332751e-06, "loss": 0.0035, "step": 24900 }, { "epoch": 0.407592244129919, "grad_norm": 0.17006951570510864, "learning_rate": 9.960875973243396e-06, "loss": 0.0035, "step": 24910 }, { "epoch": 0.40775587008099484, "grad_norm": 0.20042966306209564, "learning_rate": 9.96075703033421e-06, "loss": 0.0036, "step": 24920 }, { "epoch": 0.40791949603207067, "grad_norm": 0.44041380286216736, "learning_rate": 9.960637907609505e-06, "loss": 0.0059, "step": 24930 }, { "epoch": 0.4080831219831465, "grad_norm": 0.5424758791923523, "learning_rate": 9.960518605073596e-06, "loss": 0.0053, "step": 24940 }, { "epoch": 0.4082467479342224, "grad_norm": 0.24550536274909973, "learning_rate": 9.960399122730813e-06, "loss": 0.0066, "step": 24950 }, { "epoch": 0.4084103738852982, "grad_norm": 0.32169726490974426, "learning_rate": 9.960279460585482e-06, "loss": 0.006, "step": 24960 }, { "epoch": 0.40857399983637405, "grad_norm": 0.12515616416931152, "learning_rate": 9.960159618641942e-06, "loss": 0.0074, "step": 24970 }, { "epoch": 0.4087376257874499, "grad_norm": 0.310051828622818, "learning_rate": 9.960039596904538e-06, "loss": 0.0068, "step": 24980 }, { "epoch": 0.4089012517385257, "grad_norm": 0.5180619359016418, "learning_rate": 9.95991939537762e-06, "loss": 0.0045, "step": 24990 }, { "epoch": 0.4090648776896016, "grad_norm": 0.19713109731674194, "learning_rate": 9.959799014065546e-06, "loss": 0.0042, "step": 25000 }, { "epoch": 0.4090648776896016, "eval_loss": 0.0036062640137970448, "eval_runtime": 3.0986, "eval_samples_per_second": 64.545, "eval_steps_per_second": 16.136, "step": 25000 }, { "epoch": 0.4092285036406774, "grad_norm": 0.29881104826927185, "learning_rate": 9.959678452972675e-06, "loss": 0.0071, "step": 25010 }, { "epoch": 0.40939212959175325, "grad_norm": 0.2008904218673706, "learning_rate": 9.959557712103383e-06, "loss": 0.0051, "step": 25020 }, { "epoch": 0.4095557555428291, "grad_norm": 0.3467547297477722, "learning_rate": 9.959436791462043e-06, "loss": 0.0054, "step": 25030 }, { "epoch": 0.4097193814939049, "grad_norm": 0.08272959291934967, "learning_rate": 9.959315691053039e-06, "loss": 0.0078, "step": 25040 }, { "epoch": 0.4098830074449808, "grad_norm": 0.3445785343647003, "learning_rate": 9.959194410880761e-06, "loss": 0.0058, "step": 25050 }, { "epoch": 0.4100466333960566, "grad_norm": 0.0949714183807373, "learning_rate": 9.959072950949603e-06, "loss": 0.0077, "step": 25060 }, { "epoch": 0.41021025934713246, "grad_norm": 0.012899404391646385, "learning_rate": 9.958951311263971e-06, "loss": 0.0035, "step": 25070 }, { "epoch": 0.4103738852982083, "grad_norm": 0.20581640303134918, "learning_rate": 9.958829491828273e-06, "loss": 0.0061, "step": 25080 }, { "epoch": 0.4105375112492841, "grad_norm": 0.19220706820487976, "learning_rate": 9.958707492646921e-06, "loss": 0.005, "step": 25090 }, { "epoch": 0.41070113720036, "grad_norm": 0.32810795307159424, "learning_rate": 9.958585313724345e-06, "loss": 0.0059, "step": 25100 }, { "epoch": 0.41086476315143583, "grad_norm": 0.16524268686771393, "learning_rate": 9.958462955064967e-06, "loss": 0.0052, "step": 25110 }, { "epoch": 0.41102838910251166, "grad_norm": 0.21436114609241486, "learning_rate": 9.958340416673222e-06, "loss": 0.0054, "step": 25120 }, { "epoch": 0.4111920150535875, "grad_norm": 0.29300281405448914, "learning_rate": 9.958217698553556e-06, "loss": 0.004, "step": 25130 }, { "epoch": 0.4113556410046633, "grad_norm": 0.1949840486049652, "learning_rate": 9.958094800710417e-06, "loss": 0.0082, "step": 25140 }, { "epoch": 0.4115192669557392, "grad_norm": 0.42509058117866516, "learning_rate": 9.957971723148254e-06, "loss": 0.0046, "step": 25150 }, { "epoch": 0.41168289290681503, "grad_norm": 0.35835668444633484, "learning_rate": 9.957848465871536e-06, "loss": 0.0057, "step": 25160 }, { "epoch": 0.41184651885789086, "grad_norm": 0.29913750290870667, "learning_rate": 9.957725028884724e-06, "loss": 0.008, "step": 25170 }, { "epoch": 0.4120101448089667, "grad_norm": 0.20176512002944946, "learning_rate": 9.957601412192297e-06, "loss": 0.0059, "step": 25180 }, { "epoch": 0.4121737707600425, "grad_norm": 0.15453588962554932, "learning_rate": 9.957477615798732e-06, "loss": 0.0043, "step": 25190 }, { "epoch": 0.4123373967111184, "grad_norm": 0.13700389862060547, "learning_rate": 9.957353639708522e-06, "loss": 0.008, "step": 25200 }, { "epoch": 0.41250102266219424, "grad_norm": 0.17809541523456573, "learning_rate": 9.957229483926155e-06, "loss": 0.0071, "step": 25210 }, { "epoch": 0.41266464861327007, "grad_norm": 0.41866710782051086, "learning_rate": 9.957105148456133e-06, "loss": 0.004, "step": 25220 }, { "epoch": 0.4128282745643459, "grad_norm": 0.6000007390975952, "learning_rate": 9.956980633302963e-06, "loss": 0.0075, "step": 25230 }, { "epoch": 0.41299190051542173, "grad_norm": 0.22050197422504425, "learning_rate": 9.956855938471162e-06, "loss": 0.0034, "step": 25240 }, { "epoch": 0.4131555264664976, "grad_norm": 0.14122271537780762, "learning_rate": 9.956731063965244e-06, "loss": 0.0052, "step": 25250 }, { "epoch": 0.41331915241757344, "grad_norm": 0.18973210453987122, "learning_rate": 9.95660600978974e-06, "loss": 0.0052, "step": 25260 }, { "epoch": 0.4134827783686493, "grad_norm": 0.34211334586143494, "learning_rate": 9.95648077594918e-06, "loss": 0.0038, "step": 25270 }, { "epoch": 0.4136464043197251, "grad_norm": 0.3392001986503601, "learning_rate": 9.956355362448104e-06, "loss": 0.0054, "step": 25280 }, { "epoch": 0.41381003027080093, "grad_norm": 0.9375589489936829, "learning_rate": 9.95622976929106e-06, "loss": 0.0052, "step": 25290 }, { "epoch": 0.41397365622187676, "grad_norm": 0.3106217384338379, "learning_rate": 9.956103996482596e-06, "loss": 0.0047, "step": 25300 }, { "epoch": 0.41413728217295265, "grad_norm": 0.22567102313041687, "learning_rate": 9.955978044027276e-06, "loss": 0.0051, "step": 25310 }, { "epoch": 0.4143009081240285, "grad_norm": 0.27923932671546936, "learning_rate": 9.955851911929662e-06, "loss": 0.0073, "step": 25320 }, { "epoch": 0.4144645340751043, "grad_norm": 0.18773263692855835, "learning_rate": 9.955725600194328e-06, "loss": 0.0075, "step": 25330 }, { "epoch": 0.41462816002618014, "grad_norm": 0.6497575640678406, "learning_rate": 9.955599108825853e-06, "loss": 0.007, "step": 25340 }, { "epoch": 0.41479178597725597, "grad_norm": 0.062236130237579346, "learning_rate": 9.95547243782882e-06, "loss": 0.0044, "step": 25350 }, { "epoch": 0.41495541192833185, "grad_norm": 0.12795664370059967, "learning_rate": 9.95534558720782e-06, "loss": 0.0052, "step": 25360 }, { "epoch": 0.4151190378794077, "grad_norm": 0.22792263329029083, "learning_rate": 9.955218556967453e-06, "loss": 0.0066, "step": 25370 }, { "epoch": 0.4152826638304835, "grad_norm": 0.2341441959142685, "learning_rate": 9.955091347112324e-06, "loss": 0.005, "step": 25380 }, { "epoch": 0.41544628978155934, "grad_norm": 0.302999883890152, "learning_rate": 9.954963957647042e-06, "loss": 0.0053, "step": 25390 }, { "epoch": 0.41560991573263517, "grad_norm": 0.09005288779735565, "learning_rate": 9.954836388576224e-06, "loss": 0.0046, "step": 25400 }, { "epoch": 0.41577354168371106, "grad_norm": 0.07903607189655304, "learning_rate": 9.954708639904498e-06, "loss": 0.006, "step": 25410 }, { "epoch": 0.4159371676347869, "grad_norm": 0.3181702494621277, "learning_rate": 9.95458071163649e-06, "loss": 0.0052, "step": 25420 }, { "epoch": 0.4161007935858627, "grad_norm": 0.3068331778049469, "learning_rate": 9.954452603776841e-06, "loss": 0.0045, "step": 25430 }, { "epoch": 0.41626441953693855, "grad_norm": 0.40216565132141113, "learning_rate": 9.954324316330193e-06, "loss": 0.0066, "step": 25440 }, { "epoch": 0.4164280454880144, "grad_norm": 0.34599897265434265, "learning_rate": 9.954195849301195e-06, "loss": 0.0068, "step": 25450 }, { "epoch": 0.41659167143909026, "grad_norm": 0.06795250624418259, "learning_rate": 9.954067202694505e-06, "loss": 0.006, "step": 25460 }, { "epoch": 0.4167552973901661, "grad_norm": 0.18456536531448364, "learning_rate": 9.953938376514785e-06, "loss": 0.0054, "step": 25470 }, { "epoch": 0.4169189233412419, "grad_norm": 0.4055711030960083, "learning_rate": 9.953809370766705e-06, "loss": 0.0037, "step": 25480 }, { "epoch": 0.41708254929231775, "grad_norm": 0.43466177582740784, "learning_rate": 9.953680185454943e-06, "loss": 0.006, "step": 25490 }, { "epoch": 0.4172461752433936, "grad_norm": 0.5423679947853088, "learning_rate": 9.95355082058418e-06, "loss": 0.0058, "step": 25500 }, { "epoch": 0.41740980119446947, "grad_norm": 0.16508300602436066, "learning_rate": 9.953421276159103e-06, "loss": 0.0048, "step": 25510 }, { "epoch": 0.4175734271455453, "grad_norm": 0.16442042589187622, "learning_rate": 9.953291552184413e-06, "loss": 0.0046, "step": 25520 }, { "epoch": 0.4177370530966211, "grad_norm": 0.5984448790550232, "learning_rate": 9.953161648664807e-06, "loss": 0.005, "step": 25530 }, { "epoch": 0.41790067904769695, "grad_norm": 0.799819827079773, "learning_rate": 9.953031565604996e-06, "loss": 0.0051, "step": 25540 }, { "epoch": 0.4180643049987728, "grad_norm": 0.16470642387866974, "learning_rate": 9.952901303009696e-06, "loss": 0.0053, "step": 25550 }, { "epoch": 0.41822793094984867, "grad_norm": 0.22018200159072876, "learning_rate": 9.952770860883627e-06, "loss": 0.0054, "step": 25560 }, { "epoch": 0.4183915569009245, "grad_norm": 0.4381274878978729, "learning_rate": 9.95264023923152e-06, "loss": 0.0047, "step": 25570 }, { "epoch": 0.41855518285200033, "grad_norm": 0.16278640925884247, "learning_rate": 9.952509438058104e-06, "loss": 0.0046, "step": 25580 }, { "epoch": 0.41871880880307616, "grad_norm": 0.05348186939954758, "learning_rate": 9.952378457368126e-06, "loss": 0.0059, "step": 25590 }, { "epoch": 0.418882434754152, "grad_norm": 0.3621346950531006, "learning_rate": 9.952247297166334e-06, "loss": 0.0101, "step": 25600 }, { "epoch": 0.4190460607052279, "grad_norm": 0.3846125900745392, "learning_rate": 9.952115957457475e-06, "loss": 0.0038, "step": 25610 }, { "epoch": 0.4192096866563037, "grad_norm": 0.34133437275886536, "learning_rate": 9.951984438246317e-06, "loss": 0.0076, "step": 25620 }, { "epoch": 0.41937331260737953, "grad_norm": 0.29336848855018616, "learning_rate": 9.951852739537626e-06, "loss": 0.0065, "step": 25630 }, { "epoch": 0.41953693855845536, "grad_norm": 0.11607229709625244, "learning_rate": 9.951720861336174e-06, "loss": 0.0054, "step": 25640 }, { "epoch": 0.4197005645095312, "grad_norm": 0.34873926639556885, "learning_rate": 9.951588803646741e-06, "loss": 0.0035, "step": 25650 }, { "epoch": 0.4198641904606071, "grad_norm": 0.3979949951171875, "learning_rate": 9.951456566474116e-06, "loss": 0.0047, "step": 25660 }, { "epoch": 0.4200278164116829, "grad_norm": 0.13472244143486023, "learning_rate": 9.95132414982309e-06, "loss": 0.0062, "step": 25670 }, { "epoch": 0.42019144236275874, "grad_norm": 0.18795715272426605, "learning_rate": 9.951191553698464e-06, "loss": 0.0042, "step": 25680 }, { "epoch": 0.42035506831383457, "grad_norm": 0.23879875242710114, "learning_rate": 9.951058778105045e-06, "loss": 0.0048, "step": 25690 }, { "epoch": 0.4205186942649104, "grad_norm": 0.13192777335643768, "learning_rate": 9.950925823047644e-06, "loss": 0.0034, "step": 25700 }, { "epoch": 0.4206823202159863, "grad_norm": 0.5185250639915466, "learning_rate": 9.95079268853108e-06, "loss": 0.0055, "step": 25710 }, { "epoch": 0.4208459461670621, "grad_norm": 0.15754516422748566, "learning_rate": 9.95065937456018e-06, "loss": 0.0037, "step": 25720 }, { "epoch": 0.42100957211813794, "grad_norm": 0.3642040193080902, "learning_rate": 9.950525881139778e-06, "loss": 0.0051, "step": 25730 }, { "epoch": 0.42117319806921377, "grad_norm": 0.48439934849739075, "learning_rate": 9.95039220827471e-06, "loss": 0.003, "step": 25740 }, { "epoch": 0.4213368240202896, "grad_norm": 0.20476143062114716, "learning_rate": 9.950258355969825e-06, "loss": 0.0044, "step": 25750 }, { "epoch": 0.42150044997136543, "grad_norm": 0.4189421832561493, "learning_rate": 9.950124324229969e-06, "loss": 0.0062, "step": 25760 }, { "epoch": 0.4216640759224413, "grad_norm": 0.5145852565765381, "learning_rate": 9.949990113060003e-06, "loss": 0.0049, "step": 25770 }, { "epoch": 0.42182770187351715, "grad_norm": 0.1941939741373062, "learning_rate": 9.949855722464797e-06, "loss": 0.0052, "step": 25780 }, { "epoch": 0.421991327824593, "grad_norm": 0.2683684527873993, "learning_rate": 9.949721152449213e-06, "loss": 0.0053, "step": 25790 }, { "epoch": 0.4221549537756688, "grad_norm": 0.17171452939510345, "learning_rate": 9.949586403018136e-06, "loss": 0.0062, "step": 25800 }, { "epoch": 0.42231857972674464, "grad_norm": 0.3140377402305603, "learning_rate": 9.949451474176446e-06, "loss": 0.0046, "step": 25810 }, { "epoch": 0.4224822056778205, "grad_norm": 0.0775977075099945, "learning_rate": 9.949316365929038e-06, "loss": 0.0036, "step": 25820 }, { "epoch": 0.42264583162889635, "grad_norm": 0.5512652397155762, "learning_rate": 9.949181078280806e-06, "loss": 0.0045, "step": 25830 }, { "epoch": 0.4228094575799722, "grad_norm": 0.1899128258228302, "learning_rate": 9.949045611236655e-06, "loss": 0.0053, "step": 25840 }, { "epoch": 0.422973083531048, "grad_norm": 0.1803659051656723, "learning_rate": 9.948909964801495e-06, "loss": 0.0068, "step": 25850 }, { "epoch": 0.42313670948212384, "grad_norm": 0.1032019630074501, "learning_rate": 9.948774138980244e-06, "loss": 0.0043, "step": 25860 }, { "epoch": 0.4233003354331997, "grad_norm": 0.48989441990852356, "learning_rate": 9.948638133777825e-06, "loss": 0.0057, "step": 25870 }, { "epoch": 0.42346396138427556, "grad_norm": 0.22366076707839966, "learning_rate": 9.948501949199165e-06, "loss": 0.0049, "step": 25880 }, { "epoch": 0.4236275873353514, "grad_norm": 0.10869742929935455, "learning_rate": 9.948365585249204e-06, "loss": 0.0047, "step": 25890 }, { "epoch": 0.4237912132864272, "grad_norm": 0.23598021268844604, "learning_rate": 9.948229041932884e-06, "loss": 0.005, "step": 25900 }, { "epoch": 0.42395483923750305, "grad_norm": 0.27074310183525085, "learning_rate": 9.948092319255155e-06, "loss": 0.0057, "step": 25910 }, { "epoch": 0.42411846518857893, "grad_norm": 0.3151009976863861, "learning_rate": 9.94795541722097e-06, "loss": 0.0052, "step": 25920 }, { "epoch": 0.42428209113965476, "grad_norm": 0.20643682777881622, "learning_rate": 9.947818335835293e-06, "loss": 0.0059, "step": 25930 }, { "epoch": 0.4244457170907306, "grad_norm": 0.11311911046504974, "learning_rate": 9.947681075103095e-06, "loss": 0.0063, "step": 25940 }, { "epoch": 0.4246093430418064, "grad_norm": 0.058958884328603745, "learning_rate": 9.947543635029347e-06, "loss": 0.0033, "step": 25950 }, { "epoch": 0.42477296899288225, "grad_norm": 0.20372574031352997, "learning_rate": 9.947406015619036e-06, "loss": 0.005, "step": 25960 }, { "epoch": 0.42493659494395813, "grad_norm": 0.1304374486207962, "learning_rate": 9.947268216877146e-06, "loss": 0.0042, "step": 25970 }, { "epoch": 0.42510022089503396, "grad_norm": 0.47513291239738464, "learning_rate": 9.947130238808673e-06, "loss": 0.0041, "step": 25980 }, { "epoch": 0.4252638468461098, "grad_norm": 0.1874646544456482, "learning_rate": 9.94699208141862e-06, "loss": 0.0032, "step": 25990 }, { "epoch": 0.4254274727971856, "grad_norm": 0.4611448347568512, "learning_rate": 9.946853744711995e-06, "loss": 0.0049, "step": 26000 }, { "epoch": 0.42559109874826145, "grad_norm": 0.20589487254619598, "learning_rate": 9.94671522869381e-06, "loss": 0.0074, "step": 26010 }, { "epoch": 0.42575472469933734, "grad_norm": 0.47777605056762695, "learning_rate": 9.946576533369086e-06, "loss": 0.0063, "step": 26020 }, { "epoch": 0.42591835065041317, "grad_norm": 0.3368110954761505, "learning_rate": 9.946437658742854e-06, "loss": 0.0052, "step": 26030 }, { "epoch": 0.426081976601489, "grad_norm": 0.14642922580242157, "learning_rate": 9.946298604820143e-06, "loss": 0.0037, "step": 26040 }, { "epoch": 0.42624560255256483, "grad_norm": 0.45016244053840637, "learning_rate": 9.946159371605996e-06, "loss": 0.006, "step": 26050 }, { "epoch": 0.42640922850364066, "grad_norm": 0.0836092010140419, "learning_rate": 9.946019959105462e-06, "loss": 0.0031, "step": 26060 }, { "epoch": 0.42657285445471654, "grad_norm": 0.1861150711774826, "learning_rate": 9.945880367323589e-06, "loss": 0.0039, "step": 26070 }, { "epoch": 0.4267364804057924, "grad_norm": 0.3985017240047455, "learning_rate": 9.945740596265441e-06, "loss": 0.0062, "step": 26080 }, { "epoch": 0.4269001063568682, "grad_norm": 0.15152108669281006, "learning_rate": 9.945600645936084e-06, "loss": 0.0058, "step": 26090 }, { "epoch": 0.42706373230794403, "grad_norm": 0.506436288356781, "learning_rate": 9.945460516340589e-06, "loss": 0.0051, "step": 26100 }, { "epoch": 0.42722735825901986, "grad_norm": 0.31075620651245117, "learning_rate": 9.945320207484035e-06, "loss": 0.0038, "step": 26110 }, { "epoch": 0.42739098421009575, "grad_norm": 0.2524474859237671, "learning_rate": 9.945179719371511e-06, "loss": 0.0068, "step": 26120 }, { "epoch": 0.4275546101611716, "grad_norm": 0.4262329041957855, "learning_rate": 9.945039052008108e-06, "loss": 0.0057, "step": 26130 }, { "epoch": 0.4277182361122474, "grad_norm": 0.27304673194885254, "learning_rate": 9.944898205398923e-06, "loss": 0.0057, "step": 26140 }, { "epoch": 0.42788186206332324, "grad_norm": 0.47253692150115967, "learning_rate": 9.944757179549064e-06, "loss": 0.0032, "step": 26150 }, { "epoch": 0.42804548801439907, "grad_norm": 0.176385760307312, "learning_rate": 9.94461597446364e-06, "loss": 0.0045, "step": 26160 }, { "epoch": 0.4282091139654749, "grad_norm": 0.24605652689933777, "learning_rate": 9.944474590147773e-06, "loss": 0.007, "step": 26170 }, { "epoch": 0.4283727399165508, "grad_norm": 0.22253073751926422, "learning_rate": 9.944333026606585e-06, "loss": 0.0047, "step": 26180 }, { "epoch": 0.4285363658676266, "grad_norm": 0.24287839233875275, "learning_rate": 9.944191283845209e-06, "loss": 0.005, "step": 26190 }, { "epoch": 0.42869999181870244, "grad_norm": 0.26345324516296387, "learning_rate": 9.944049361868781e-06, "loss": 0.0049, "step": 26200 }, { "epoch": 0.42886361776977827, "grad_norm": 0.475466251373291, "learning_rate": 9.943907260682445e-06, "loss": 0.0046, "step": 26210 }, { "epoch": 0.4290272437208541, "grad_norm": 0.15794482827186584, "learning_rate": 9.943764980291355e-06, "loss": 0.0056, "step": 26220 }, { "epoch": 0.42919086967193, "grad_norm": 0.14782395958900452, "learning_rate": 9.943622520700666e-06, "loss": 0.0042, "step": 26230 }, { "epoch": 0.4293544956230058, "grad_norm": 0.2919323146343231, "learning_rate": 9.943479881915543e-06, "loss": 0.0039, "step": 26240 }, { "epoch": 0.42951812157408165, "grad_norm": 0.5810136795043945, "learning_rate": 9.943337063941157e-06, "loss": 0.0057, "step": 26250 }, { "epoch": 0.4296817475251575, "grad_norm": 0.3564576804637909, "learning_rate": 9.94319406678268e-06, "loss": 0.0081, "step": 26260 }, { "epoch": 0.4298453734762333, "grad_norm": 0.41717562079429626, "learning_rate": 9.9430508904453e-06, "loss": 0.003, "step": 26270 }, { "epoch": 0.4300089994273092, "grad_norm": 0.12562857568264008, "learning_rate": 9.942907534934207e-06, "loss": 0.004, "step": 26280 }, { "epoch": 0.430172625378385, "grad_norm": 0.3914327323436737, "learning_rate": 9.942764000254595e-06, "loss": 0.0049, "step": 26290 }, { "epoch": 0.43033625132946085, "grad_norm": 0.13155989348888397, "learning_rate": 9.942620286411667e-06, "loss": 0.0047, "step": 26300 }, { "epoch": 0.4304998772805367, "grad_norm": 0.39990848302841187, "learning_rate": 9.942476393410632e-06, "loss": 0.0075, "step": 26310 }, { "epoch": 0.4306635032316125, "grad_norm": 0.09514040499925613, "learning_rate": 9.942332321256707e-06, "loss": 0.0052, "step": 26320 }, { "epoch": 0.4308271291826884, "grad_norm": 0.45336511731147766, "learning_rate": 9.942188069955116e-06, "loss": 0.0058, "step": 26330 }, { "epoch": 0.4309907551337642, "grad_norm": 0.35713547468185425, "learning_rate": 9.942043639511085e-06, "loss": 0.0078, "step": 26340 }, { "epoch": 0.43115438108484005, "grad_norm": 0.38602957129478455, "learning_rate": 9.94189902992985e-06, "loss": 0.0037, "step": 26350 }, { "epoch": 0.4313180070359159, "grad_norm": 0.26430776715278625, "learning_rate": 9.941754241216651e-06, "loss": 0.0056, "step": 26360 }, { "epoch": 0.4314816329869917, "grad_norm": 0.2346680462360382, "learning_rate": 9.941609273376739e-06, "loss": 0.0046, "step": 26370 }, { "epoch": 0.4316452589380676, "grad_norm": 0.2572612464427948, "learning_rate": 9.941464126415367e-06, "loss": 0.0054, "step": 26380 }, { "epoch": 0.43180888488914343, "grad_norm": 0.28751105070114136, "learning_rate": 9.941318800337798e-06, "loss": 0.0031, "step": 26390 }, { "epoch": 0.43197251084021926, "grad_norm": 0.36122652888298035, "learning_rate": 9.9411732951493e-06, "loss": 0.0055, "step": 26400 }, { "epoch": 0.4321361367912951, "grad_norm": 0.22892005741596222, "learning_rate": 9.941027610855143e-06, "loss": 0.0055, "step": 26410 }, { "epoch": 0.4322997627423709, "grad_norm": 0.23452228307724, "learning_rate": 9.94088174746061e-06, "loss": 0.0084, "step": 26420 }, { "epoch": 0.4324633886934468, "grad_norm": 0.051503460854291916, "learning_rate": 9.940735704970992e-06, "loss": 0.0142, "step": 26430 }, { "epoch": 0.43262701464452263, "grad_norm": 0.27087894082069397, "learning_rate": 9.940589483391578e-06, "loss": 0.0045, "step": 26440 }, { "epoch": 0.43279064059559846, "grad_norm": 0.34042420983314514, "learning_rate": 9.94044308272767e-06, "loss": 0.0044, "step": 26450 }, { "epoch": 0.4329542665466743, "grad_norm": 0.29429343342781067, "learning_rate": 9.940296502984574e-06, "loss": 0.0045, "step": 26460 }, { "epoch": 0.4331178924977501, "grad_norm": 0.5314293503761292, "learning_rate": 9.940149744167602e-06, "loss": 0.0043, "step": 26470 }, { "epoch": 0.433281518448826, "grad_norm": 0.057193126529455185, "learning_rate": 9.940002806282077e-06, "loss": 0.0037, "step": 26480 }, { "epoch": 0.43344514439990184, "grad_norm": 0.43834778666496277, "learning_rate": 9.939855689333322e-06, "loss": 0.0064, "step": 26490 }, { "epoch": 0.43360877035097767, "grad_norm": 1.0762265920639038, "learning_rate": 9.939708393326672e-06, "loss": 0.0117, "step": 26500 }, { "epoch": 0.4337723963020535, "grad_norm": 0.2556087076663971, "learning_rate": 9.939560918267465e-06, "loss": 0.0065, "step": 26510 }, { "epoch": 0.4339360222531293, "grad_norm": 0.2927626371383667, "learning_rate": 9.939413264161047e-06, "loss": 0.004, "step": 26520 }, { "epoch": 0.4340996482042052, "grad_norm": 0.13159334659576416, "learning_rate": 9.939265431012769e-06, "loss": 0.0054, "step": 26530 }, { "epoch": 0.43426327415528104, "grad_norm": 0.11064198613166809, "learning_rate": 9.93911741882799e-06, "loss": 0.0031, "step": 26540 }, { "epoch": 0.4344269001063569, "grad_norm": 0.21083663403987885, "learning_rate": 9.938969227612076e-06, "loss": 0.0026, "step": 26550 }, { "epoch": 0.4345905260574327, "grad_norm": 0.13292013108730316, "learning_rate": 9.938820857370397e-06, "loss": 0.0032, "step": 26560 }, { "epoch": 0.43475415200850853, "grad_norm": 0.4474940299987793, "learning_rate": 9.938672308108333e-06, "loss": 0.0055, "step": 26570 }, { "epoch": 0.4349177779595844, "grad_norm": 0.27184492349624634, "learning_rate": 9.938523579831267e-06, "loss": 0.0038, "step": 26580 }, { "epoch": 0.43508140391066025, "grad_norm": 0.2035464495420456, "learning_rate": 9.93837467254459e-06, "loss": 0.0076, "step": 26590 }, { "epoch": 0.4352450298617361, "grad_norm": 0.2861744165420532, "learning_rate": 9.938225586253703e-06, "loss": 0.0055, "step": 26600 }, { "epoch": 0.4354086558128119, "grad_norm": 0.13672231137752533, "learning_rate": 9.938076320964006e-06, "loss": 0.0042, "step": 26610 }, { "epoch": 0.43557228176388774, "grad_norm": 0.22904746234416962, "learning_rate": 9.93792687668091e-06, "loss": 0.0038, "step": 26620 }, { "epoch": 0.43573590771496357, "grad_norm": 0.48036491870880127, "learning_rate": 9.937777253409832e-06, "loss": 0.0123, "step": 26630 }, { "epoch": 0.43589953366603945, "grad_norm": 0.08625561743974686, "learning_rate": 9.937627451156197e-06, "loss": 0.0066, "step": 26640 }, { "epoch": 0.4360631596171153, "grad_norm": 0.38035905361175537, "learning_rate": 9.937477469925434e-06, "loss": 0.0055, "step": 26650 }, { "epoch": 0.4362267855681911, "grad_norm": 0.2845790982246399, "learning_rate": 9.937327309722979e-06, "loss": 0.0054, "step": 26660 }, { "epoch": 0.43639041151926694, "grad_norm": 0.4349279999732971, "learning_rate": 9.937176970554278e-06, "loss": 0.0072, "step": 26670 }, { "epoch": 0.43655403747034277, "grad_norm": 0.5188882350921631, "learning_rate": 9.937026452424776e-06, "loss": 0.0047, "step": 26680 }, { "epoch": 0.43671766342141866, "grad_norm": 0.16659517586231232, "learning_rate": 9.936875755339931e-06, "loss": 0.0081, "step": 26690 }, { "epoch": 0.4368812893724945, "grad_norm": 0.21452824771404266, "learning_rate": 9.936724879305206e-06, "loss": 0.0042, "step": 26700 }, { "epoch": 0.4370449153235703, "grad_norm": 0.1589195877313614, "learning_rate": 9.93657382432607e-06, "loss": 0.0067, "step": 26710 }, { "epoch": 0.43720854127464615, "grad_norm": 0.3009520471096039, "learning_rate": 9.936422590407994e-06, "loss": 0.0038, "step": 26720 }, { "epoch": 0.437372167225722, "grad_norm": 0.507652223110199, "learning_rate": 9.936271177556467e-06, "loss": 0.0073, "step": 26730 }, { "epoch": 0.43753579317679786, "grad_norm": 0.12943866848945618, "learning_rate": 9.936119585776973e-06, "loss": 0.0048, "step": 26740 }, { "epoch": 0.4376994191278737, "grad_norm": 0.3426955044269562, "learning_rate": 9.935967815075007e-06, "loss": 0.0065, "step": 26750 }, { "epoch": 0.4378630450789495, "grad_norm": 0.3047889471054077, "learning_rate": 9.935815865456073e-06, "loss": 0.0075, "step": 26760 }, { "epoch": 0.43802667103002535, "grad_norm": 0.36037224531173706, "learning_rate": 9.935663736925675e-06, "loss": 0.0041, "step": 26770 }, { "epoch": 0.4381902969811012, "grad_norm": 0.5686681866645813, "learning_rate": 9.935511429489328e-06, "loss": 0.0058, "step": 26780 }, { "epoch": 0.43835392293217706, "grad_norm": 0.5435603260993958, "learning_rate": 9.935358943152557e-06, "loss": 0.0059, "step": 26790 }, { "epoch": 0.4385175488832529, "grad_norm": 0.28427553176879883, "learning_rate": 9.935206277920884e-06, "loss": 0.0111, "step": 26800 }, { "epoch": 0.4386811748343287, "grad_norm": 0.17619702219963074, "learning_rate": 9.935053433799846e-06, "loss": 0.0034, "step": 26810 }, { "epoch": 0.43884480078540455, "grad_norm": 0.12450452148914337, "learning_rate": 9.934900410794981e-06, "loss": 0.0049, "step": 26820 }, { "epoch": 0.4390084267364804, "grad_norm": 0.2546733319759369, "learning_rate": 9.934747208911837e-06, "loss": 0.0054, "step": 26830 }, { "epoch": 0.43917205268755627, "grad_norm": 0.2232169359922409, "learning_rate": 9.934593828155966e-06, "loss": 0.0039, "step": 26840 }, { "epoch": 0.4393356786386321, "grad_norm": 0.33902662992477417, "learning_rate": 9.934440268532931e-06, "loss": 0.0071, "step": 26850 }, { "epoch": 0.43949930458970793, "grad_norm": 0.24697628617286682, "learning_rate": 9.934286530048294e-06, "loss": 0.0045, "step": 26860 }, { "epoch": 0.43966293054078376, "grad_norm": 0.31107616424560547, "learning_rate": 9.934132612707631e-06, "loss": 0.0044, "step": 26870 }, { "epoch": 0.4398265564918596, "grad_norm": 0.3301653265953064, "learning_rate": 9.93397851651652e-06, "loss": 0.0066, "step": 26880 }, { "epoch": 0.4399901824429355, "grad_norm": 0.2726227045059204, "learning_rate": 9.933824241480545e-06, "loss": 0.0041, "step": 26890 }, { "epoch": 0.4401538083940113, "grad_norm": 0.24102412164211273, "learning_rate": 9.9336697876053e-06, "loss": 0.0041, "step": 26900 }, { "epoch": 0.44031743434508713, "grad_norm": 0.18632589280605316, "learning_rate": 9.933515154896383e-06, "loss": 0.0042, "step": 26910 }, { "epoch": 0.44048106029616296, "grad_norm": 0.1324058473110199, "learning_rate": 9.933360343359399e-06, "loss": 0.0042, "step": 26920 }, { "epoch": 0.4406446862472388, "grad_norm": 0.32733872532844543, "learning_rate": 9.933205352999958e-06, "loss": 0.0078, "step": 26930 }, { "epoch": 0.4408083121983147, "grad_norm": 0.2764756381511688, "learning_rate": 9.933050183823681e-06, "loss": 0.0054, "step": 26940 }, { "epoch": 0.4409719381493905, "grad_norm": 0.1801196038722992, "learning_rate": 9.932894835836191e-06, "loss": 0.0047, "step": 26950 }, { "epoch": 0.44113556410046634, "grad_norm": 0.11624041944742203, "learning_rate": 9.93273930904312e-06, "loss": 0.0035, "step": 26960 }, { "epoch": 0.44129919005154217, "grad_norm": 0.48371636867523193, "learning_rate": 9.932583603450103e-06, "loss": 0.0077, "step": 26970 }, { "epoch": 0.441462816002618, "grad_norm": 0.4422214925289154, "learning_rate": 9.932427719062787e-06, "loss": 0.0048, "step": 26980 }, { "epoch": 0.4416264419536939, "grad_norm": 0.23202046751976013, "learning_rate": 9.93227165588682e-06, "loss": 0.0051, "step": 26990 }, { "epoch": 0.4417900679047697, "grad_norm": 0.4644702970981598, "learning_rate": 9.93211541392786e-06, "loss": 0.0042, "step": 27000 }, { "epoch": 0.44195369385584554, "grad_norm": 0.1372828185558319, "learning_rate": 9.93195899319157e-06, "loss": 0.004, "step": 27010 }, { "epoch": 0.44211731980692137, "grad_norm": 0.24187833070755005, "learning_rate": 9.93180239368362e-06, "loss": 0.0075, "step": 27020 }, { "epoch": 0.4422809457579972, "grad_norm": 0.48835405707359314, "learning_rate": 9.931645615409687e-06, "loss": 0.0047, "step": 27030 }, { "epoch": 0.4424445717090731, "grad_norm": 0.12844069302082062, "learning_rate": 9.931488658375454e-06, "loss": 0.0052, "step": 27040 }, { "epoch": 0.4426081976601489, "grad_norm": 0.13725093007087708, "learning_rate": 9.931331522586608e-06, "loss": 0.0044, "step": 27050 }, { "epoch": 0.44277182361122475, "grad_norm": 0.3767290413379669, "learning_rate": 9.931174208048849e-06, "loss": 0.0047, "step": 27060 }, { "epoch": 0.4429354495623006, "grad_norm": 0.14739324152469635, "learning_rate": 9.931016714767874e-06, "loss": 0.0041, "step": 27070 }, { "epoch": 0.4430990755133764, "grad_norm": 0.18387803435325623, "learning_rate": 9.930859042749397e-06, "loss": 0.0061, "step": 27080 }, { "epoch": 0.44326270146445224, "grad_norm": 0.1818620264530182, "learning_rate": 9.930701191999127e-06, "loss": 0.0042, "step": 27090 }, { "epoch": 0.4434263274155281, "grad_norm": 0.33416905999183655, "learning_rate": 9.930543162522794e-06, "loss": 0.0072, "step": 27100 }, { "epoch": 0.44358995336660395, "grad_norm": 0.18846382200717926, "learning_rate": 9.930384954326118e-06, "loss": 0.0039, "step": 27110 }, { "epoch": 0.4437535793176798, "grad_norm": 0.1889810413122177, "learning_rate": 9.930226567414838e-06, "loss": 0.0036, "step": 27120 }, { "epoch": 0.4439172052687556, "grad_norm": 0.1603645533323288, "learning_rate": 9.930068001794696e-06, "loss": 0.0049, "step": 27130 }, { "epoch": 0.44408083121983144, "grad_norm": 0.11223021149635315, "learning_rate": 9.929909257471437e-06, "loss": 0.0043, "step": 27140 }, { "epoch": 0.4442444571709073, "grad_norm": 0.08833596855401993, "learning_rate": 9.929750334450816e-06, "loss": 0.0042, "step": 27150 }, { "epoch": 0.44440808312198316, "grad_norm": 0.19301727414131165, "learning_rate": 9.929591232738595e-06, "loss": 0.0042, "step": 27160 }, { "epoch": 0.444571709073059, "grad_norm": 1.130907654762268, "learning_rate": 9.929431952340539e-06, "loss": 0.0048, "step": 27170 }, { "epoch": 0.4447353350241348, "grad_norm": 0.2923726737499237, "learning_rate": 9.92927249326242e-06, "loss": 0.0039, "step": 27180 }, { "epoch": 0.44489896097521064, "grad_norm": 0.19610510766506195, "learning_rate": 9.929112855510023e-06, "loss": 0.0051, "step": 27190 }, { "epoch": 0.44506258692628653, "grad_norm": 0.3318614363670349, "learning_rate": 9.928953039089131e-06, "loss": 0.0081, "step": 27200 }, { "epoch": 0.44522621287736236, "grad_norm": 0.3307742774486542, "learning_rate": 9.928793044005538e-06, "loss": 0.0053, "step": 27210 }, { "epoch": 0.4453898388284382, "grad_norm": 0.36365392804145813, "learning_rate": 9.928632870265044e-06, "loss": 0.0059, "step": 27220 }, { "epoch": 0.445553464779514, "grad_norm": 0.16435521841049194, "learning_rate": 9.928472517873454e-06, "loss": 0.0036, "step": 27230 }, { "epoch": 0.44571709073058985, "grad_norm": 0.2523607313632965, "learning_rate": 9.928311986836578e-06, "loss": 0.0036, "step": 27240 }, { "epoch": 0.44588071668166573, "grad_norm": 0.16454745829105377, "learning_rate": 9.92815127716024e-06, "loss": 0.0033, "step": 27250 }, { "epoch": 0.44604434263274156, "grad_norm": 0.677886426448822, "learning_rate": 9.927990388850261e-06, "loss": 0.0075, "step": 27260 }, { "epoch": 0.4462079685838174, "grad_norm": 0.16890017688274384, "learning_rate": 9.927829321912477e-06, "loss": 0.0057, "step": 27270 }, { "epoch": 0.4463715945348932, "grad_norm": 0.2555929124355316, "learning_rate": 9.927668076352721e-06, "loss": 0.0044, "step": 27280 }, { "epoch": 0.44653522048596905, "grad_norm": 0.20007185637950897, "learning_rate": 9.927506652176842e-06, "loss": 0.0051, "step": 27290 }, { "epoch": 0.44669884643704494, "grad_norm": 0.1796633005142212, "learning_rate": 9.927345049390688e-06, "loss": 0.0038, "step": 27300 }, { "epoch": 0.44686247238812077, "grad_norm": 0.3198784589767456, "learning_rate": 9.92718326800012e-06, "loss": 0.0039, "step": 27310 }, { "epoch": 0.4470260983391966, "grad_norm": 0.09753880649805069, "learning_rate": 9.927021308011003e-06, "loss": 0.0049, "step": 27320 }, { "epoch": 0.44718972429027243, "grad_norm": 0.24289223551750183, "learning_rate": 9.926859169429201e-06, "loss": 0.0032, "step": 27330 }, { "epoch": 0.44735335024134826, "grad_norm": 0.16761480271816254, "learning_rate": 9.926696852260598e-06, "loss": 0.0066, "step": 27340 }, { "epoch": 0.44751697619242414, "grad_norm": 0.2215719074010849, "learning_rate": 9.926534356511075e-06, "loss": 0.0047, "step": 27350 }, { "epoch": 0.4476806021435, "grad_norm": 0.3459492325782776, "learning_rate": 9.926371682186522e-06, "loss": 0.005, "step": 27360 }, { "epoch": 0.4478442280945758, "grad_norm": 0.14066335558891296, "learning_rate": 9.926208829292837e-06, "loss": 0.0048, "step": 27370 }, { "epoch": 0.44800785404565163, "grad_norm": 0.4356427490711212, "learning_rate": 9.92604579783592e-06, "loss": 0.0042, "step": 27380 }, { "epoch": 0.44817147999672746, "grad_norm": 0.18119563162326813, "learning_rate": 9.925882587821682e-06, "loss": 0.0054, "step": 27390 }, { "epoch": 0.44833510594780335, "grad_norm": 0.43294450640678406, "learning_rate": 9.92571919925604e-06, "loss": 0.0048, "step": 27400 }, { "epoch": 0.4484987318988792, "grad_norm": 0.20888350903987885, "learning_rate": 9.925555632144916e-06, "loss": 0.0038, "step": 27410 }, { "epoch": 0.448662357849955, "grad_norm": 0.23409788310527802, "learning_rate": 9.925391886494237e-06, "loss": 0.0047, "step": 27420 }, { "epoch": 0.44882598380103084, "grad_norm": 0.19678986072540283, "learning_rate": 9.925227962309942e-06, "loss": 0.0028, "step": 27430 }, { "epoch": 0.44898960975210667, "grad_norm": 0.23898477852344513, "learning_rate": 9.92506385959797e-06, "loss": 0.003, "step": 27440 }, { "epoch": 0.44915323570318255, "grad_norm": 0.2357511669397354, "learning_rate": 9.92489957836427e-06, "loss": 0.0062, "step": 27450 }, { "epoch": 0.4493168616542584, "grad_norm": 0.3089338541030884, "learning_rate": 9.924735118614797e-06, "loss": 0.0071, "step": 27460 }, { "epoch": 0.4494804876053342, "grad_norm": 0.19275996088981628, "learning_rate": 9.924570480355513e-06, "loss": 0.0065, "step": 27470 }, { "epoch": 0.44964411355641004, "grad_norm": 0.02500307373702526, "learning_rate": 9.924405663592384e-06, "loss": 0.0059, "step": 27480 }, { "epoch": 0.44980773950748587, "grad_norm": 0.6308009624481201, "learning_rate": 9.924240668331386e-06, "loss": 0.0038, "step": 27490 }, { "epoch": 0.4499713654585617, "grad_norm": 0.380270779132843, "learning_rate": 9.924075494578498e-06, "loss": 0.0039, "step": 27500 }, { "epoch": 0.4501349914096376, "grad_norm": 0.37860944867134094, "learning_rate": 9.923910142339708e-06, "loss": 0.0051, "step": 27510 }, { "epoch": 0.4502986173607134, "grad_norm": 0.2461448758840561, "learning_rate": 9.92374461162101e-06, "loss": 0.004, "step": 27520 }, { "epoch": 0.45046224331178925, "grad_norm": 0.35521289706230164, "learning_rate": 9.923578902428403e-06, "loss": 0.0058, "step": 27530 }, { "epoch": 0.4506258692628651, "grad_norm": 0.11499812453985214, "learning_rate": 9.923413014767897e-06, "loss": 0.0052, "step": 27540 }, { "epoch": 0.4507894952139409, "grad_norm": 0.30423709750175476, "learning_rate": 9.923246948645499e-06, "loss": 0.0037, "step": 27550 }, { "epoch": 0.4509531211650168, "grad_norm": 0.17959947884082794, "learning_rate": 9.923080704067233e-06, "loss": 0.0039, "step": 27560 }, { "epoch": 0.4511167471160926, "grad_norm": 0.25133025646209717, "learning_rate": 9.922914281039124e-06, "loss": 0.0034, "step": 27570 }, { "epoch": 0.45128037306716845, "grad_norm": 0.38148918747901917, "learning_rate": 9.922747679567206e-06, "loss": 0.0043, "step": 27580 }, { "epoch": 0.4514439990182443, "grad_norm": 0.6596552729606628, "learning_rate": 9.922580899657514e-06, "loss": 0.0116, "step": 27590 }, { "epoch": 0.4516076249693201, "grad_norm": 0.2443445473909378, "learning_rate": 9.922413941316095e-06, "loss": 0.0065, "step": 27600 }, { "epoch": 0.451771250920396, "grad_norm": 0.36659279465675354, "learning_rate": 9.922246804549002e-06, "loss": 0.005, "step": 27610 }, { "epoch": 0.4519348768714718, "grad_norm": 0.32651492953300476, "learning_rate": 9.922079489362294e-06, "loss": 0.0068, "step": 27620 }, { "epoch": 0.45209850282254765, "grad_norm": 0.2040097564458847, "learning_rate": 9.921911995762032e-06, "loss": 0.0046, "step": 27630 }, { "epoch": 0.4522621287736235, "grad_norm": 0.4699828624725342, "learning_rate": 9.921744323754292e-06, "loss": 0.0051, "step": 27640 }, { "epoch": 0.4524257547246993, "grad_norm": 0.2855335772037506, "learning_rate": 9.92157647334515e-06, "loss": 0.0048, "step": 27650 }, { "epoch": 0.4525893806757752, "grad_norm": 0.3780006468296051, "learning_rate": 9.921408444540687e-06, "loss": 0.0051, "step": 27660 }, { "epoch": 0.45275300662685103, "grad_norm": 0.30700623989105225, "learning_rate": 9.921240237346996e-06, "loss": 0.0047, "step": 27670 }, { "epoch": 0.45291663257792686, "grad_norm": 0.3167283833026886, "learning_rate": 9.921071851770177e-06, "loss": 0.0056, "step": 27680 }, { "epoch": 0.4530802585290027, "grad_norm": 0.26490625739097595, "learning_rate": 9.920903287816329e-06, "loss": 0.0042, "step": 27690 }, { "epoch": 0.4532438844800785, "grad_norm": 0.13398846983909607, "learning_rate": 9.920734545491564e-06, "loss": 0.0051, "step": 27700 }, { "epoch": 0.4534075104311544, "grad_norm": 0.23275910317897797, "learning_rate": 9.920565624801997e-06, "loss": 0.0038, "step": 27710 }, { "epoch": 0.45357113638223023, "grad_norm": 0.296029657125473, "learning_rate": 9.920396525753755e-06, "loss": 0.0034, "step": 27720 }, { "epoch": 0.45373476233330606, "grad_norm": 0.24191559851169586, "learning_rate": 9.920227248352963e-06, "loss": 0.0044, "step": 27730 }, { "epoch": 0.4538983882843819, "grad_norm": 0.24207191169261932, "learning_rate": 9.92005779260576e-06, "loss": 0.0056, "step": 27740 }, { "epoch": 0.4540620142354577, "grad_norm": 0.1859743744134903, "learning_rate": 9.919888158518286e-06, "loss": 0.0046, "step": 27750 }, { "epoch": 0.4542256401865336, "grad_norm": 0.4077349007129669, "learning_rate": 9.919718346096692e-06, "loss": 0.004, "step": 27760 }, { "epoch": 0.45438926613760944, "grad_norm": 0.39792948961257935, "learning_rate": 9.91954835534713e-06, "loss": 0.0059, "step": 27770 }, { "epoch": 0.45455289208868527, "grad_norm": 0.46911177039146423, "learning_rate": 9.919378186275767e-06, "loss": 0.0045, "step": 27780 }, { "epoch": 0.4547165180397611, "grad_norm": 0.14708006381988525, "learning_rate": 9.919207838888767e-06, "loss": 0.004, "step": 27790 }, { "epoch": 0.4548801439908369, "grad_norm": 0.5174422264099121, "learning_rate": 9.919037313192305e-06, "loss": 0.0037, "step": 27800 }, { "epoch": 0.4550437699419128, "grad_norm": 0.20993366837501526, "learning_rate": 9.918866609192563e-06, "loss": 0.0056, "step": 27810 }, { "epoch": 0.45520739589298864, "grad_norm": 0.22028259932994843, "learning_rate": 9.91869572689573e-06, "loss": 0.0055, "step": 27820 }, { "epoch": 0.45537102184406447, "grad_norm": 0.076836496591568, "learning_rate": 9.918524666307998e-06, "loss": 0.0033, "step": 27830 }, { "epoch": 0.4555346477951403, "grad_norm": 0.04713664948940277, "learning_rate": 9.918353427435568e-06, "loss": 0.0041, "step": 27840 }, { "epoch": 0.45569827374621613, "grad_norm": 0.2616214454174042, "learning_rate": 9.918182010284645e-06, "loss": 0.0062, "step": 27850 }, { "epoch": 0.455861899697292, "grad_norm": 0.13661831617355347, "learning_rate": 9.918010414861448e-06, "loss": 0.0056, "step": 27860 }, { "epoch": 0.45602552564836785, "grad_norm": 0.19420835375785828, "learning_rate": 9.917838641172193e-06, "loss": 0.0046, "step": 27870 }, { "epoch": 0.4561891515994437, "grad_norm": 0.17217794060707092, "learning_rate": 9.917666689223106e-06, "loss": 0.0051, "step": 27880 }, { "epoch": 0.4563527775505195, "grad_norm": 0.17375342547893524, "learning_rate": 9.91749455902042e-06, "loss": 0.0044, "step": 27890 }, { "epoch": 0.45651640350159534, "grad_norm": 0.23703983426094055, "learning_rate": 9.917322250570378e-06, "loss": 0.0035, "step": 27900 }, { "epoch": 0.4566800294526712, "grad_norm": 0.5515260696411133, "learning_rate": 9.91714976387922e-06, "loss": 0.0052, "step": 27910 }, { "epoch": 0.45684365540374705, "grad_norm": 0.06264668703079224, "learning_rate": 9.916977098953202e-06, "loss": 0.0063, "step": 27920 }, { "epoch": 0.4570072813548229, "grad_norm": 0.12721134722232819, "learning_rate": 9.916804255798584e-06, "loss": 0.0051, "step": 27930 }, { "epoch": 0.4571709073058987, "grad_norm": 0.27503839135169983, "learning_rate": 9.916631234421627e-06, "loss": 0.0076, "step": 27940 }, { "epoch": 0.45733453325697454, "grad_norm": 0.12606807053089142, "learning_rate": 9.916458034828605e-06, "loss": 0.0053, "step": 27950 }, { "epoch": 0.45749815920805037, "grad_norm": 0.28796064853668213, "learning_rate": 9.916284657025795e-06, "loss": 0.004, "step": 27960 }, { "epoch": 0.45766178515912626, "grad_norm": 0.19446000456809998, "learning_rate": 9.91611110101948e-06, "loss": 0.0037, "step": 27970 }, { "epoch": 0.4578254111102021, "grad_norm": 0.35852789878845215, "learning_rate": 9.915937366815958e-06, "loss": 0.0063, "step": 27980 }, { "epoch": 0.4579890370612779, "grad_norm": 0.2826062738895416, "learning_rate": 9.915763454421519e-06, "loss": 0.003, "step": 27990 }, { "epoch": 0.45815266301235374, "grad_norm": 0.2767725884914398, "learning_rate": 9.91558936384247e-06, "loss": 0.0036, "step": 28000 }, { "epoch": 0.4583162889634296, "grad_norm": 0.5194330215454102, "learning_rate": 9.91541509508512e-06, "loss": 0.0043, "step": 28010 }, { "epoch": 0.45847991491450546, "grad_norm": 0.16330242156982422, "learning_rate": 9.915240648155787e-06, "loss": 0.0073, "step": 28020 }, { "epoch": 0.4586435408655813, "grad_norm": 0.09129302203655243, "learning_rate": 9.915066023060794e-06, "loss": 0.0048, "step": 28030 }, { "epoch": 0.4588071668166571, "grad_norm": 0.31927719712257385, "learning_rate": 9.91489121980647e-06, "loss": 0.0058, "step": 28040 }, { "epoch": 0.45897079276773295, "grad_norm": 0.10923999547958374, "learning_rate": 9.914716238399153e-06, "loss": 0.0081, "step": 28050 }, { "epoch": 0.4591344187188088, "grad_norm": 0.1889079511165619, "learning_rate": 9.914541078845186e-06, "loss": 0.0039, "step": 28060 }, { "epoch": 0.45929804466988466, "grad_norm": 0.22986048460006714, "learning_rate": 9.914365741150915e-06, "loss": 0.0058, "step": 28070 }, { "epoch": 0.4594616706209605, "grad_norm": 0.10280491411685944, "learning_rate": 9.914190225322697e-06, "loss": 0.0038, "step": 28080 }, { "epoch": 0.4596252965720363, "grad_norm": 0.7463960647583008, "learning_rate": 9.914014531366894e-06, "loss": 0.0066, "step": 28090 }, { "epoch": 0.45978892252311215, "grad_norm": 0.04366424307227135, "learning_rate": 9.913838659289875e-06, "loss": 0.006, "step": 28100 }, { "epoch": 0.459952548474188, "grad_norm": 0.17364706099033356, "learning_rate": 9.913662609098015e-06, "loss": 0.0055, "step": 28110 }, { "epoch": 0.46011617442526387, "grad_norm": 0.40444594621658325, "learning_rate": 9.913486380797696e-06, "loss": 0.0052, "step": 28120 }, { "epoch": 0.4602798003763397, "grad_norm": 0.2098781317472458, "learning_rate": 9.913309974395305e-06, "loss": 0.0038, "step": 28130 }, { "epoch": 0.46044342632741553, "grad_norm": 0.10730168223381042, "learning_rate": 9.913133389897235e-06, "loss": 0.0043, "step": 28140 }, { "epoch": 0.46060705227849136, "grad_norm": 0.1922830194234848, "learning_rate": 9.912956627309888e-06, "loss": 0.004, "step": 28150 }, { "epoch": 0.4607706782295672, "grad_norm": 0.1791941225528717, "learning_rate": 9.912779686639671e-06, "loss": 0.0064, "step": 28160 }, { "epoch": 0.4609343041806431, "grad_norm": 0.2935792803764343, "learning_rate": 9.912602567893e-06, "loss": 0.0084, "step": 28170 }, { "epoch": 0.4610979301317189, "grad_norm": 0.28350767493247986, "learning_rate": 9.912425271076291e-06, "loss": 0.0053, "step": 28180 }, { "epoch": 0.46126155608279473, "grad_norm": 0.12251194566488266, "learning_rate": 9.912247796195975e-06, "loss": 0.0052, "step": 28190 }, { "epoch": 0.46142518203387056, "grad_norm": 0.21796534955501556, "learning_rate": 9.91207014325848e-06, "loss": 0.0041, "step": 28200 }, { "epoch": 0.4615888079849464, "grad_norm": 0.11030439287424088, "learning_rate": 9.911892312270248e-06, "loss": 0.0049, "step": 28210 }, { "epoch": 0.4617524339360223, "grad_norm": 0.3622218072414398, "learning_rate": 9.911714303237728e-06, "loss": 0.0033, "step": 28220 }, { "epoch": 0.4619160598870981, "grad_norm": 0.3663948178291321, "learning_rate": 9.911536116167367e-06, "loss": 0.0069, "step": 28230 }, { "epoch": 0.46207968583817394, "grad_norm": 0.16379572451114655, "learning_rate": 9.911357751065628e-06, "loss": 0.007, "step": 28240 }, { "epoch": 0.46224331178924977, "grad_norm": 0.23335902392864227, "learning_rate": 9.911179207938972e-06, "loss": 0.0039, "step": 28250 }, { "epoch": 0.4624069377403256, "grad_norm": 0.22836972773075104, "learning_rate": 9.911000486793876e-06, "loss": 0.0047, "step": 28260 }, { "epoch": 0.4625705636914015, "grad_norm": 1.1119749546051025, "learning_rate": 9.910821587636814e-06, "loss": 0.0039, "step": 28270 }, { "epoch": 0.4627341896424773, "grad_norm": 0.3211715519428253, "learning_rate": 9.910642510474273e-06, "loss": 0.0071, "step": 28280 }, { "epoch": 0.46289781559355314, "grad_norm": 0.2997148931026459, "learning_rate": 9.910463255312744e-06, "loss": 0.0039, "step": 28290 }, { "epoch": 0.46306144154462897, "grad_norm": 0.16480353474617004, "learning_rate": 9.910283822158724e-06, "loss": 0.0058, "step": 28300 }, { "epoch": 0.4632250674957048, "grad_norm": 0.5872746109962463, "learning_rate": 9.910104211018716e-06, "loss": 0.0096, "step": 28310 }, { "epoch": 0.4633886934467807, "grad_norm": 0.2717980146408081, "learning_rate": 9.909924421899232e-06, "loss": 0.0048, "step": 28320 }, { "epoch": 0.4635523193978565, "grad_norm": 0.3067272901535034, "learning_rate": 9.909744454806789e-06, "loss": 0.0057, "step": 28330 }, { "epoch": 0.46371594534893235, "grad_norm": 0.22667890787124634, "learning_rate": 9.909564309747908e-06, "loss": 0.0057, "step": 28340 }, { "epoch": 0.4638795713000082, "grad_norm": 0.23339539766311646, "learning_rate": 9.909383986729121e-06, "loss": 0.0036, "step": 28350 }, { "epoch": 0.464043197251084, "grad_norm": 0.28694164752960205, "learning_rate": 9.909203485756966e-06, "loss": 0.0055, "step": 28360 }, { "epoch": 0.46420682320215984, "grad_norm": 0.04660428687930107, "learning_rate": 9.909022806837983e-06, "loss": 0.0038, "step": 28370 }, { "epoch": 0.4643704491532357, "grad_norm": 0.046696942299604416, "learning_rate": 9.90884194997872e-06, "loss": 0.0051, "step": 28380 }, { "epoch": 0.46453407510431155, "grad_norm": 0.259512335062027, "learning_rate": 9.908660915185734e-06, "loss": 0.0036, "step": 28390 }, { "epoch": 0.4646977010553874, "grad_norm": 0.6218687295913696, "learning_rate": 9.90847970246559e-06, "loss": 0.0059, "step": 28400 }, { "epoch": 0.4648613270064632, "grad_norm": 0.3486070930957794, "learning_rate": 9.908298311824853e-06, "loss": 0.0065, "step": 28410 }, { "epoch": 0.46502495295753904, "grad_norm": 0.2569338381290436, "learning_rate": 9.908116743270098e-06, "loss": 0.0045, "step": 28420 }, { "epoch": 0.4651885789086149, "grad_norm": 0.25672534108161926, "learning_rate": 9.907934996807908e-06, "loss": 0.0035, "step": 28430 }, { "epoch": 0.46535220485969075, "grad_norm": 0.09729569405317307, "learning_rate": 9.907753072444871e-06, "loss": 0.0034, "step": 28440 }, { "epoch": 0.4655158308107666, "grad_norm": 0.17873574793338776, "learning_rate": 9.907570970187582e-06, "loss": 0.0044, "step": 28450 }, { "epoch": 0.4656794567618424, "grad_norm": 0.15932105481624603, "learning_rate": 9.907388690042636e-06, "loss": 0.0041, "step": 28460 }, { "epoch": 0.46584308271291824, "grad_norm": 0.0673116147518158, "learning_rate": 9.907206232016648e-06, "loss": 0.0064, "step": 28470 }, { "epoch": 0.46600670866399413, "grad_norm": 0.11695891618728638, "learning_rate": 9.907023596116228e-06, "loss": 0.0044, "step": 28480 }, { "epoch": 0.46617033461506996, "grad_norm": 0.14565777778625488, "learning_rate": 9.906840782347995e-06, "loss": 0.0048, "step": 28490 }, { "epoch": 0.4663339605661458, "grad_norm": 0.3451426923274994, "learning_rate": 9.906657790718577e-06, "loss": 0.0036, "step": 28500 }, { "epoch": 0.4664975865172216, "grad_norm": 0.2421916425228119, "learning_rate": 9.906474621234607e-06, "loss": 0.006, "step": 28510 }, { "epoch": 0.46666121246829745, "grad_norm": 0.27039963006973267, "learning_rate": 9.906291273902726e-06, "loss": 0.0053, "step": 28520 }, { "epoch": 0.46682483841937333, "grad_norm": 0.14194738864898682, "learning_rate": 9.906107748729577e-06, "loss": 0.0043, "step": 28530 }, { "epoch": 0.46698846437044916, "grad_norm": 0.15197667479515076, "learning_rate": 9.905924045721814e-06, "loss": 0.0037, "step": 28540 }, { "epoch": 0.467152090321525, "grad_norm": 0.45280808210372925, "learning_rate": 9.905740164886095e-06, "loss": 0.0046, "step": 28550 }, { "epoch": 0.4673157162726008, "grad_norm": 0.06453012675046921, "learning_rate": 9.905556106229085e-06, "loss": 0.0063, "step": 28560 }, { "epoch": 0.46747934222367665, "grad_norm": 0.1308635026216507, "learning_rate": 9.905371869757458e-06, "loss": 0.0067, "step": 28570 }, { "epoch": 0.46764296817475254, "grad_norm": 0.40309959650039673, "learning_rate": 9.90518745547789e-06, "loss": 0.0073, "step": 28580 }, { "epoch": 0.46780659412582837, "grad_norm": 0.3105323016643524, "learning_rate": 9.905002863397066e-06, "loss": 0.0037, "step": 28590 }, { "epoch": 0.4679702200769042, "grad_norm": 0.3754200339317322, "learning_rate": 9.904818093521678e-06, "loss": 0.0034, "step": 28600 }, { "epoch": 0.46813384602798, "grad_norm": 0.11845695227384567, "learning_rate": 9.90463314585842e-06, "loss": 0.0042, "step": 28610 }, { "epoch": 0.46829747197905586, "grad_norm": 0.05855477228760719, "learning_rate": 9.904448020414e-06, "loss": 0.0059, "step": 28620 }, { "epoch": 0.46846109793013174, "grad_norm": 0.09374213218688965, "learning_rate": 9.904262717195126e-06, "loss": 0.0036, "step": 28630 }, { "epoch": 0.46862472388120757, "grad_norm": 0.05167952552437782, "learning_rate": 9.904077236208516e-06, "loss": 0.009, "step": 28640 }, { "epoch": 0.4687883498322834, "grad_norm": 0.20160222053527832, "learning_rate": 9.903891577460894e-06, "loss": 0.0059, "step": 28650 }, { "epoch": 0.46895197578335923, "grad_norm": 0.3289794921875, "learning_rate": 9.903705740958986e-06, "loss": 0.0063, "step": 28660 }, { "epoch": 0.46911560173443506, "grad_norm": 0.06347501277923584, "learning_rate": 9.903519726709533e-06, "loss": 0.0063, "step": 28670 }, { "epoch": 0.46927922768551095, "grad_norm": 0.3217048943042755, "learning_rate": 9.903333534719275e-06, "loss": 0.0025, "step": 28680 }, { "epoch": 0.4694428536365868, "grad_norm": 0.23959380388259888, "learning_rate": 9.90314716499496e-06, "loss": 0.0042, "step": 28690 }, { "epoch": 0.4696064795876626, "grad_norm": 0.3354378938674927, "learning_rate": 9.902960617543345e-06, "loss": 0.0081, "step": 28700 }, { "epoch": 0.46977010553873844, "grad_norm": 0.1581249237060547, "learning_rate": 9.902773892371193e-06, "loss": 0.0057, "step": 28710 }, { "epoch": 0.46993373148981427, "grad_norm": 0.10706600546836853, "learning_rate": 9.902586989485269e-06, "loss": 0.0056, "step": 28720 }, { "epoch": 0.47009735744089015, "grad_norm": 0.12246666103601456, "learning_rate": 9.90239990889235e-06, "loss": 0.0048, "step": 28730 }, { "epoch": 0.470260983391966, "grad_norm": 0.13429774343967438, "learning_rate": 9.902212650599218e-06, "loss": 0.0025, "step": 28740 }, { "epoch": 0.4704246093430418, "grad_norm": 0.2532491087913513, "learning_rate": 9.902025214612658e-06, "loss": 0.0073, "step": 28750 }, { "epoch": 0.47058823529411764, "grad_norm": 0.14271461963653564, "learning_rate": 9.901837600939468e-06, "loss": 0.004, "step": 28760 }, { "epoch": 0.47075186124519347, "grad_norm": 0.11834526062011719, "learning_rate": 9.901649809586445e-06, "loss": 0.0033, "step": 28770 }, { "epoch": 0.47091548719626936, "grad_norm": 0.390371710062027, "learning_rate": 9.901461840560396e-06, "loss": 0.0046, "step": 28780 }, { "epoch": 0.4710791131473452, "grad_norm": 0.522347092628479, "learning_rate": 9.901273693868136e-06, "loss": 0.0058, "step": 28790 }, { "epoch": 0.471242739098421, "grad_norm": 0.3997916281223297, "learning_rate": 9.901085369516485e-06, "loss": 0.0067, "step": 28800 }, { "epoch": 0.47140636504949684, "grad_norm": 0.2708657681941986, "learning_rate": 9.90089686751227e-06, "loss": 0.0042, "step": 28810 }, { "epoch": 0.4715699910005727, "grad_norm": 0.6393135786056519, "learning_rate": 9.900708187862321e-06, "loss": 0.0062, "step": 28820 }, { "epoch": 0.4717336169516485, "grad_norm": 0.17552627623081207, "learning_rate": 9.90051933057348e-06, "loss": 0.0048, "step": 28830 }, { "epoch": 0.4718972429027244, "grad_norm": 0.12909357249736786, "learning_rate": 9.900330295652589e-06, "loss": 0.0079, "step": 28840 }, { "epoch": 0.4720608688538002, "grad_norm": 0.2059362530708313, "learning_rate": 9.900141083106505e-06, "loss": 0.0034, "step": 28850 }, { "epoch": 0.47222449480487605, "grad_norm": 0.43750259280204773, "learning_rate": 9.899951692942082e-06, "loss": 0.005, "step": 28860 }, { "epoch": 0.4723881207559519, "grad_norm": 0.17548465728759766, "learning_rate": 9.89976212516619e-06, "loss": 0.0042, "step": 28870 }, { "epoch": 0.4725517467070277, "grad_norm": 0.06503050774335861, "learning_rate": 9.899572379785693e-06, "loss": 0.0074, "step": 28880 }, { "epoch": 0.4727153726581036, "grad_norm": 0.1215314120054245, "learning_rate": 9.899382456807475e-06, "loss": 0.0046, "step": 28890 }, { "epoch": 0.4728789986091794, "grad_norm": 0.15483234822750092, "learning_rate": 9.899192356238418e-06, "loss": 0.0043, "step": 28900 }, { "epoch": 0.47304262456025525, "grad_norm": 0.6191183924674988, "learning_rate": 9.899002078085415e-06, "loss": 0.0038, "step": 28910 }, { "epoch": 0.4732062505113311, "grad_norm": 0.5984652638435364, "learning_rate": 9.898811622355358e-06, "loss": 0.009, "step": 28920 }, { "epoch": 0.4733698764624069, "grad_norm": 0.1429656744003296, "learning_rate": 9.898620989055156e-06, "loss": 0.0047, "step": 28930 }, { "epoch": 0.4735335024134828, "grad_norm": 0.17204104363918304, "learning_rate": 9.898430178191716e-06, "loss": 0.0072, "step": 28940 }, { "epoch": 0.47369712836455863, "grad_norm": 0.2196149230003357, "learning_rate": 9.898239189771955e-06, "loss": 0.0074, "step": 28950 }, { "epoch": 0.47386075431563446, "grad_norm": 0.1460697203874588, "learning_rate": 9.898048023802795e-06, "loss": 0.0057, "step": 28960 }, { "epoch": 0.4740243802667103, "grad_norm": 0.07840320467948914, "learning_rate": 9.897856680291168e-06, "loss": 0.0027, "step": 28970 }, { "epoch": 0.4741880062177861, "grad_norm": 0.39064085483551025, "learning_rate": 9.897665159244008e-06, "loss": 0.0041, "step": 28980 }, { "epoch": 0.474351632168862, "grad_norm": 0.3270370364189148, "learning_rate": 9.897473460668256e-06, "loss": 0.0065, "step": 28990 }, { "epoch": 0.47451525811993783, "grad_norm": 0.1632394641637802, "learning_rate": 9.897281584570861e-06, "loss": 0.0057, "step": 29000 }, { "epoch": 0.47467888407101366, "grad_norm": 0.2010754495859146, "learning_rate": 9.897089530958782e-06, "loss": 0.0045, "step": 29010 }, { "epoch": 0.4748425100220895, "grad_norm": 0.28294432163238525, "learning_rate": 9.896897299838974e-06, "loss": 0.004, "step": 29020 }, { "epoch": 0.4750061359731653, "grad_norm": 0.11046252399682999, "learning_rate": 9.89670489121841e-06, "loss": 0.0055, "step": 29030 }, { "epoch": 0.4751697619242412, "grad_norm": 0.29268643260002136, "learning_rate": 9.896512305104063e-06, "loss": 0.0039, "step": 29040 }, { "epoch": 0.47533338787531704, "grad_norm": 0.22420576214790344, "learning_rate": 9.896319541502912e-06, "loss": 0.0054, "step": 29050 }, { "epoch": 0.47549701382639287, "grad_norm": 0.35233908891677856, "learning_rate": 9.896126600421945e-06, "loss": 0.0058, "step": 29060 }, { "epoch": 0.4756606397774687, "grad_norm": 0.2954990565776825, "learning_rate": 9.895933481868158e-06, "loss": 0.0043, "step": 29070 }, { "epoch": 0.4758242657285445, "grad_norm": 0.09990601241588593, "learning_rate": 9.89574018584855e-06, "loss": 0.0035, "step": 29080 }, { "epoch": 0.4759878916796204, "grad_norm": 0.42652109265327454, "learning_rate": 9.895546712370126e-06, "loss": 0.0068, "step": 29090 }, { "epoch": 0.47615151763069624, "grad_norm": 0.18180683255195618, "learning_rate": 9.895353061439899e-06, "loss": 0.0033, "step": 29100 }, { "epoch": 0.47631514358177207, "grad_norm": 0.12242705374956131, "learning_rate": 9.89515923306489e-06, "loss": 0.0044, "step": 29110 }, { "epoch": 0.4764787695328479, "grad_norm": 0.16486947238445282, "learning_rate": 9.894965227252123e-06, "loss": 0.005, "step": 29120 }, { "epoch": 0.47664239548392373, "grad_norm": 0.1478731632232666, "learning_rate": 9.894771044008633e-06, "loss": 0.0049, "step": 29130 }, { "epoch": 0.4768060214349996, "grad_norm": 0.25755301117897034, "learning_rate": 9.894576683341455e-06, "loss": 0.0063, "step": 29140 }, { "epoch": 0.47696964738607545, "grad_norm": 0.2787677049636841, "learning_rate": 9.894382145257637e-06, "loss": 0.0047, "step": 29150 }, { "epoch": 0.4771332733371513, "grad_norm": 0.197701096534729, "learning_rate": 9.89418742976423e-06, "loss": 0.0038, "step": 29160 }, { "epoch": 0.4772968992882271, "grad_norm": 0.09459872543811798, "learning_rate": 9.893992536868291e-06, "loss": 0.0036, "step": 29170 }, { "epoch": 0.47746052523930294, "grad_norm": 0.1716059446334839, "learning_rate": 9.893797466576886e-06, "loss": 0.0055, "step": 29180 }, { "epoch": 0.4776241511903788, "grad_norm": 0.48710349202156067, "learning_rate": 9.893602218897083e-06, "loss": 0.0048, "step": 29190 }, { "epoch": 0.47778777714145465, "grad_norm": 0.18181033432483673, "learning_rate": 9.893406793835962e-06, "loss": 0.0083, "step": 29200 }, { "epoch": 0.4779514030925305, "grad_norm": 0.1216956079006195, "learning_rate": 9.893211191400605e-06, "loss": 0.0042, "step": 29210 }, { "epoch": 0.4781150290436063, "grad_norm": 0.24959330260753632, "learning_rate": 9.893015411598104e-06, "loss": 0.0047, "step": 29220 }, { "epoch": 0.47827865499468214, "grad_norm": 0.4371202886104584, "learning_rate": 9.892819454435555e-06, "loss": 0.0039, "step": 29230 }, { "epoch": 0.47844228094575797, "grad_norm": 0.2708517909049988, "learning_rate": 9.892623319920059e-06, "loss": 0.0055, "step": 29240 }, { "epoch": 0.47860590689683385, "grad_norm": 0.07128234207630157, "learning_rate": 9.892427008058727e-06, "loss": 0.004, "step": 29250 }, { "epoch": 0.4787695328479097, "grad_norm": 0.11701580137014389, "learning_rate": 9.892230518858674e-06, "loss": 0.0066, "step": 29260 }, { "epoch": 0.4789331587989855, "grad_norm": 0.1885947585105896, "learning_rate": 9.892033852327023e-06, "loss": 0.0047, "step": 29270 }, { "epoch": 0.47909678475006134, "grad_norm": 0.4105403423309326, "learning_rate": 9.891837008470902e-06, "loss": 0.0064, "step": 29280 }, { "epoch": 0.4792604107011372, "grad_norm": 0.1233682855963707, "learning_rate": 9.891639987297448e-06, "loss": 0.0034, "step": 29290 }, { "epoch": 0.47942403665221306, "grad_norm": 0.3517018258571625, "learning_rate": 9.891442788813801e-06, "loss": 0.0046, "step": 29300 }, { "epoch": 0.4795876626032889, "grad_norm": 0.2979389727115631, "learning_rate": 9.891245413027108e-06, "loss": 0.0033, "step": 29310 }, { "epoch": 0.4797512885543647, "grad_norm": 0.2267286777496338, "learning_rate": 9.891047859944527e-06, "loss": 0.0034, "step": 29320 }, { "epoch": 0.47991491450544055, "grad_norm": 0.1396339237689972, "learning_rate": 9.890850129573215e-06, "loss": 0.006, "step": 29330 }, { "epoch": 0.4800785404565164, "grad_norm": 0.26734721660614014, "learning_rate": 9.89065222192034e-06, "loss": 0.0044, "step": 29340 }, { "epoch": 0.48024216640759226, "grad_norm": 0.06409502774477005, "learning_rate": 9.890454136993077e-06, "loss": 0.0066, "step": 29350 }, { "epoch": 0.4804057923586681, "grad_norm": 0.03526555746793747, "learning_rate": 9.890255874798605e-06, "loss": 0.0045, "step": 29360 }, { "epoch": 0.4805694183097439, "grad_norm": 0.1905120462179184, "learning_rate": 9.89005743534411e-06, "loss": 0.0051, "step": 29370 }, { "epoch": 0.48073304426081975, "grad_norm": 0.18688473105430603, "learning_rate": 9.88985881863679e-06, "loss": 0.0045, "step": 29380 }, { "epoch": 0.4808966702118956, "grad_norm": 0.3594113290309906, "learning_rate": 9.889660024683836e-06, "loss": 0.0041, "step": 29390 }, { "epoch": 0.48106029616297147, "grad_norm": 0.250940203666687, "learning_rate": 9.889461053492459e-06, "loss": 0.004, "step": 29400 }, { "epoch": 0.4812239221140473, "grad_norm": 0.060092538595199585, "learning_rate": 9.889261905069873e-06, "loss": 0.0032, "step": 29410 }, { "epoch": 0.4813875480651231, "grad_norm": 0.3339327871799469, "learning_rate": 9.889062579423292e-06, "loss": 0.0049, "step": 29420 }, { "epoch": 0.48155117401619896, "grad_norm": 0.1326649785041809, "learning_rate": 9.888863076559942e-06, "loss": 0.0041, "step": 29430 }, { "epoch": 0.4817147999672748, "grad_norm": 0.14469024538993835, "learning_rate": 9.888663396487057e-06, "loss": 0.004, "step": 29440 }, { "epoch": 0.4818784259183507, "grad_norm": 0.17382074892520905, "learning_rate": 9.888463539211873e-06, "loss": 0.0032, "step": 29450 }, { "epoch": 0.4820420518694265, "grad_norm": 0.049370840191841125, "learning_rate": 9.888263504741635e-06, "loss": 0.0045, "step": 29460 }, { "epoch": 0.48220567782050233, "grad_norm": 0.17935901880264282, "learning_rate": 9.888063293083593e-06, "loss": 0.0058, "step": 29470 }, { "epoch": 0.48236930377157816, "grad_norm": 0.13717053830623627, "learning_rate": 9.887862904245005e-06, "loss": 0.0062, "step": 29480 }, { "epoch": 0.482532929722654, "grad_norm": 0.25411278009414673, "learning_rate": 9.887662338233135e-06, "loss": 0.003, "step": 29490 }, { "epoch": 0.4826965556737299, "grad_norm": 0.41999804973602295, "learning_rate": 9.887461595055251e-06, "loss": 0.0056, "step": 29500 }, { "epoch": 0.4828601816248057, "grad_norm": 0.32119449973106384, "learning_rate": 9.887260674718633e-06, "loss": 0.0063, "step": 29510 }, { "epoch": 0.48302380757588154, "grad_norm": 0.5428754687309265, "learning_rate": 9.88705957723056e-06, "loss": 0.0059, "step": 29520 }, { "epoch": 0.48318743352695737, "grad_norm": 0.11517683416604996, "learning_rate": 9.886858302598323e-06, "loss": 0.0043, "step": 29530 }, { "epoch": 0.4833510594780332, "grad_norm": 0.19034425914287567, "learning_rate": 9.88665685082922e-06, "loss": 0.0041, "step": 29540 }, { "epoch": 0.4835146854291091, "grad_norm": 0.17365509271621704, "learning_rate": 9.886455221930548e-06, "loss": 0.0035, "step": 29550 }, { "epoch": 0.4836783113801849, "grad_norm": 0.23726002871990204, "learning_rate": 9.886253415909622e-06, "loss": 0.0056, "step": 29560 }, { "epoch": 0.48384193733126074, "grad_norm": 0.1593717634677887, "learning_rate": 9.88605143277375e-06, "loss": 0.0047, "step": 29570 }, { "epoch": 0.48400556328233657, "grad_norm": 0.23736333847045898, "learning_rate": 9.885849272530257e-06, "loss": 0.0057, "step": 29580 }, { "epoch": 0.4841691892334124, "grad_norm": 0.19513937830924988, "learning_rate": 9.885646935186473e-06, "loss": 0.0052, "step": 29590 }, { "epoch": 0.4843328151844883, "grad_norm": 0.1746743768453598, "learning_rate": 9.88544442074973e-06, "loss": 0.008, "step": 29600 }, { "epoch": 0.4844964411355641, "grad_norm": 0.25018948316574097, "learning_rate": 9.885241729227366e-06, "loss": 0.0032, "step": 29610 }, { "epoch": 0.48466006708663995, "grad_norm": 0.036033306270837784, "learning_rate": 9.885038860626732e-06, "loss": 0.0038, "step": 29620 }, { "epoch": 0.4848236930377158, "grad_norm": 0.3072309195995331, "learning_rate": 9.88483581495518e-06, "loss": 0.0049, "step": 29630 }, { "epoch": 0.4849873189887916, "grad_norm": 0.09974529594182968, "learning_rate": 9.884632592220071e-06, "loss": 0.005, "step": 29640 }, { "epoch": 0.4851509449398675, "grad_norm": 0.25555264949798584, "learning_rate": 9.884429192428771e-06, "loss": 0.0039, "step": 29650 }, { "epoch": 0.4853145708909433, "grad_norm": 0.4265265166759491, "learning_rate": 9.884225615588651e-06, "loss": 0.0042, "step": 29660 }, { "epoch": 0.48547819684201915, "grad_norm": 0.2798185646533966, "learning_rate": 9.884021861707091e-06, "loss": 0.0046, "step": 29670 }, { "epoch": 0.485641822793095, "grad_norm": 0.2792820930480957, "learning_rate": 9.883817930791477e-06, "loss": 0.0048, "step": 29680 }, { "epoch": 0.4858054487441708, "grad_norm": 0.2896956205368042, "learning_rate": 9.883613822849201e-06, "loss": 0.0069, "step": 29690 }, { "epoch": 0.48596907469524664, "grad_norm": 0.1544959545135498, "learning_rate": 9.883409537887662e-06, "loss": 0.0036, "step": 29700 }, { "epoch": 0.4861327006463225, "grad_norm": 0.2987891137599945, "learning_rate": 9.883205075914263e-06, "loss": 0.0056, "step": 29710 }, { "epoch": 0.48629632659739835, "grad_norm": 0.16056248545646667, "learning_rate": 9.883000436936418e-06, "loss": 0.006, "step": 29720 }, { "epoch": 0.4864599525484742, "grad_norm": 0.24921059608459473, "learning_rate": 9.882795620961544e-06, "loss": 0.0044, "step": 29730 }, { "epoch": 0.48662357849955, "grad_norm": 0.3647303283214569, "learning_rate": 9.882590627997061e-06, "loss": 0.0058, "step": 29740 }, { "epoch": 0.48678720445062584, "grad_norm": 0.2851385176181793, "learning_rate": 9.882385458050406e-06, "loss": 0.0056, "step": 29750 }, { "epoch": 0.48695083040170173, "grad_norm": 0.15056554973125458, "learning_rate": 9.882180111129012e-06, "loss": 0.0038, "step": 29760 }, { "epoch": 0.48711445635277756, "grad_norm": 0.17657239735126495, "learning_rate": 9.881974587240321e-06, "loss": 0.0045, "step": 29770 }, { "epoch": 0.4872780823038534, "grad_norm": 0.3117468059062958, "learning_rate": 9.881768886391787e-06, "loss": 0.008, "step": 29780 }, { "epoch": 0.4874417082549292, "grad_norm": 0.6961111426353455, "learning_rate": 9.881563008590861e-06, "loss": 0.003, "step": 29790 }, { "epoch": 0.48760533420600505, "grad_norm": 0.23479531705379486, "learning_rate": 9.881356953845011e-06, "loss": 0.0079, "step": 29800 }, { "epoch": 0.48776896015708093, "grad_norm": 0.20981964468955994, "learning_rate": 9.881150722161703e-06, "loss": 0.0039, "step": 29810 }, { "epoch": 0.48793258610815676, "grad_norm": 0.04459293186664581, "learning_rate": 9.880944313548413e-06, "loss": 0.0033, "step": 29820 }, { "epoch": 0.4880962120592326, "grad_norm": 0.05605221912264824, "learning_rate": 9.880737728012622e-06, "loss": 0.0032, "step": 29830 }, { "epoch": 0.4882598380103084, "grad_norm": 0.24640503525733948, "learning_rate": 9.880530965561818e-06, "loss": 0.0058, "step": 29840 }, { "epoch": 0.48842346396138425, "grad_norm": 0.3623533248901367, "learning_rate": 9.880324026203498e-06, "loss": 0.0045, "step": 29850 }, { "epoch": 0.48858708991246014, "grad_norm": 0.2251889407634735, "learning_rate": 9.880116909945162e-06, "loss": 0.0051, "step": 29860 }, { "epoch": 0.48875071586353597, "grad_norm": 0.29930582642555237, "learning_rate": 9.879909616794316e-06, "loss": 0.0053, "step": 29870 }, { "epoch": 0.4889143418146118, "grad_norm": 0.14795860648155212, "learning_rate": 9.879702146758477e-06, "loss": 0.0054, "step": 29880 }, { "epoch": 0.4890779677656876, "grad_norm": 0.14577575027942657, "learning_rate": 9.87949449984516e-06, "loss": 0.0042, "step": 29890 }, { "epoch": 0.48924159371676346, "grad_norm": 0.2441176474094391, "learning_rate": 9.879286676061897e-06, "loss": 0.0032, "step": 29900 }, { "epoch": 0.48940521966783934, "grad_norm": 0.26571568846702576, "learning_rate": 9.879078675416218e-06, "loss": 0.0041, "step": 29910 }, { "epoch": 0.48956884561891517, "grad_norm": 0.33371320366859436, "learning_rate": 9.878870497915664e-06, "loss": 0.0039, "step": 29920 }, { "epoch": 0.489732471569991, "grad_norm": 0.47707465291023254, "learning_rate": 9.878662143567782e-06, "loss": 0.0034, "step": 29930 }, { "epoch": 0.48989609752106683, "grad_norm": 0.288194477558136, "learning_rate": 9.878453612380122e-06, "loss": 0.003, "step": 29940 }, { "epoch": 0.49005972347214266, "grad_norm": 0.15214216709136963, "learning_rate": 9.87824490436024e-06, "loss": 0.0043, "step": 29950 }, { "epoch": 0.49022334942321855, "grad_norm": 0.3886477053165436, "learning_rate": 9.878036019515708e-06, "loss": 0.0031, "step": 29960 }, { "epoch": 0.4903869753742944, "grad_norm": 0.12571264803409576, "learning_rate": 9.877826957854095e-06, "loss": 0.0045, "step": 29970 }, { "epoch": 0.4905506013253702, "grad_norm": 0.2895621955394745, "learning_rate": 9.877617719382978e-06, "loss": 0.0047, "step": 29980 }, { "epoch": 0.49071422727644604, "grad_norm": 0.17046798765659332, "learning_rate": 9.87740830410994e-06, "loss": 0.0053, "step": 29990 }, { "epoch": 0.49087785322752187, "grad_norm": 0.37388870120048523, "learning_rate": 9.877198712042576e-06, "loss": 0.0026, "step": 30000 }, { "epoch": 0.49104147917859775, "grad_norm": 0.40467822551727295, "learning_rate": 9.876988943188479e-06, "loss": 0.0035, "step": 30010 }, { "epoch": 0.4912051051296736, "grad_norm": 0.5484909415245056, "learning_rate": 9.876778997555255e-06, "loss": 0.0055, "step": 30020 }, { "epoch": 0.4913687310807494, "grad_norm": 0.12758401036262512, "learning_rate": 9.876568875150515e-06, "loss": 0.005, "step": 30030 }, { "epoch": 0.49153235703182524, "grad_norm": 0.21156053245067596, "learning_rate": 9.87635857598187e-06, "loss": 0.0052, "step": 30040 }, { "epoch": 0.49169598298290107, "grad_norm": 0.16070495545864105, "learning_rate": 9.87614810005695e-06, "loss": 0.003, "step": 30050 }, { "epoch": 0.49185960893397696, "grad_norm": 0.13787518441677094, "learning_rate": 9.87593744738338e-06, "loss": 0.0076, "step": 30060 }, { "epoch": 0.4920232348850528, "grad_norm": 0.2332271784543991, "learning_rate": 9.875726617968794e-06, "loss": 0.0048, "step": 30070 }, { "epoch": 0.4921868608361286, "grad_norm": 0.3964560329914093, "learning_rate": 9.875515611820839e-06, "loss": 0.0047, "step": 30080 }, { "epoch": 0.49235048678720444, "grad_norm": 0.15190444886684418, "learning_rate": 9.87530442894716e-06, "loss": 0.0046, "step": 30090 }, { "epoch": 0.4925141127382803, "grad_norm": 0.04631827399134636, "learning_rate": 9.875093069355414e-06, "loss": 0.0028, "step": 30100 }, { "epoch": 0.49267773868935616, "grad_norm": 0.23227931559085846, "learning_rate": 9.874881533053262e-06, "loss": 0.0051, "step": 30110 }, { "epoch": 0.492841364640432, "grad_norm": 0.20683607459068298, "learning_rate": 9.874669820048369e-06, "loss": 0.0041, "step": 30120 }, { "epoch": 0.4930049905915078, "grad_norm": 0.18111008405685425, "learning_rate": 9.874457930348411e-06, "loss": 0.0039, "step": 30130 }, { "epoch": 0.49316861654258365, "grad_norm": 0.07248686254024506, "learning_rate": 9.874245863961071e-06, "loss": 0.004, "step": 30140 }, { "epoch": 0.4933322424936595, "grad_norm": 0.39866340160369873, "learning_rate": 9.874033620894032e-06, "loss": 0.0059, "step": 30150 }, { "epoch": 0.4934958684447353, "grad_norm": 0.3728131949901581, "learning_rate": 9.873821201154988e-06, "loss": 0.0052, "step": 30160 }, { "epoch": 0.4936594943958112, "grad_norm": 0.22588664293289185, "learning_rate": 9.87360860475164e-06, "loss": 0.0042, "step": 30170 }, { "epoch": 0.493823120346887, "grad_norm": 0.2180001437664032, "learning_rate": 9.873395831691692e-06, "loss": 0.0046, "step": 30180 }, { "epoch": 0.49398674629796285, "grad_norm": 0.15236681699752808, "learning_rate": 9.87318288198286e-06, "loss": 0.0045, "step": 30190 }, { "epoch": 0.4941503722490387, "grad_norm": 0.11469181627035141, "learning_rate": 9.87296975563286e-06, "loss": 0.0047, "step": 30200 }, { "epoch": 0.4943139982001145, "grad_norm": 0.360554963350296, "learning_rate": 9.872756452649417e-06, "loss": 0.0039, "step": 30210 }, { "epoch": 0.4944776241511904, "grad_norm": 0.30228957533836365, "learning_rate": 9.872542973040266e-06, "loss": 0.0061, "step": 30220 }, { "epoch": 0.49464125010226623, "grad_norm": 0.16423900425434113, "learning_rate": 9.872329316813143e-06, "loss": 0.0049, "step": 30230 }, { "epoch": 0.49480487605334206, "grad_norm": 0.11911007761955261, "learning_rate": 9.87211548397579e-06, "loss": 0.0042, "step": 30240 }, { "epoch": 0.4949685020044179, "grad_norm": 0.20877450704574585, "learning_rate": 9.871901474535964e-06, "loss": 0.0038, "step": 30250 }, { "epoch": 0.4951321279554937, "grad_norm": 0.09507668018341064, "learning_rate": 9.871687288501418e-06, "loss": 0.0048, "step": 30260 }, { "epoch": 0.4952957539065696, "grad_norm": 0.1973990499973297, "learning_rate": 9.871472925879914e-06, "loss": 0.0039, "step": 30270 }, { "epoch": 0.49545937985764543, "grad_norm": 0.2648104131221771, "learning_rate": 9.871258386679228e-06, "loss": 0.0031, "step": 30280 }, { "epoch": 0.49562300580872126, "grad_norm": 0.18706491589546204, "learning_rate": 9.871043670907132e-06, "loss": 0.0042, "step": 30290 }, { "epoch": 0.4957866317597971, "grad_norm": 0.49145370721817017, "learning_rate": 9.870828778571408e-06, "loss": 0.0033, "step": 30300 }, { "epoch": 0.4959502577108729, "grad_norm": 0.25051698088645935, "learning_rate": 9.87061370967985e-06, "loss": 0.0046, "step": 30310 }, { "epoch": 0.4961138836619488, "grad_norm": 0.12890347838401794, "learning_rate": 9.870398464240251e-06, "loss": 0.0042, "step": 30320 }, { "epoch": 0.49627750961302464, "grad_norm": 0.2778070867061615, "learning_rate": 9.870183042260414e-06, "loss": 0.0037, "step": 30330 }, { "epoch": 0.49644113556410047, "grad_norm": 0.33920711278915405, "learning_rate": 9.869967443748145e-06, "loss": 0.0052, "step": 30340 }, { "epoch": 0.4966047615151763, "grad_norm": 0.23967944085597992, "learning_rate": 9.869751668711262e-06, "loss": 0.0055, "step": 30350 }, { "epoch": 0.4967683874662521, "grad_norm": 0.0423504002392292, "learning_rate": 9.869535717157585e-06, "loss": 0.005, "step": 30360 }, { "epoch": 0.496932013417328, "grad_norm": 0.1894690841436386, "learning_rate": 9.869319589094943e-06, "loss": 0.0034, "step": 30370 }, { "epoch": 0.49709563936840384, "grad_norm": 0.22895008325576782, "learning_rate": 9.869103284531167e-06, "loss": 0.0045, "step": 30380 }, { "epoch": 0.49725926531947967, "grad_norm": 0.3898073732852936, "learning_rate": 9.8688868034741e-06, "loss": 0.0076, "step": 30390 }, { "epoch": 0.4974228912705555, "grad_norm": 0.7379021644592285, "learning_rate": 9.868670145931589e-06, "loss": 0.0056, "step": 30400 }, { "epoch": 0.49758651722163133, "grad_norm": 0.20226426422595978, "learning_rate": 9.868453311911487e-06, "loss": 0.0059, "step": 30410 }, { "epoch": 0.4977501431727072, "grad_norm": 0.2828306257724762, "learning_rate": 9.868236301421653e-06, "loss": 0.007, "step": 30420 }, { "epoch": 0.49791376912378305, "grad_norm": 0.14391179382801056, "learning_rate": 9.868019114469953e-06, "loss": 0.0041, "step": 30430 }, { "epoch": 0.4980773950748589, "grad_norm": 0.318920373916626, "learning_rate": 9.867801751064262e-06, "loss": 0.0026, "step": 30440 }, { "epoch": 0.4982410210259347, "grad_norm": 0.1021595448255539, "learning_rate": 9.867584211212456e-06, "loss": 0.006, "step": 30450 }, { "epoch": 0.49840464697701053, "grad_norm": 0.2872607707977295, "learning_rate": 9.86736649492242e-06, "loss": 0.0043, "step": 30460 }, { "epoch": 0.4985682729280864, "grad_norm": 0.08095104992389679, "learning_rate": 9.867148602202049e-06, "loss": 0.0044, "step": 30470 }, { "epoch": 0.49873189887916225, "grad_norm": 0.15095247328281403, "learning_rate": 9.866930533059237e-06, "loss": 0.0044, "step": 30480 }, { "epoch": 0.4988955248302381, "grad_norm": 0.08777711540460587, "learning_rate": 9.866712287501891e-06, "loss": 0.003, "step": 30490 }, { "epoch": 0.4990591507813139, "grad_norm": 0.10970611125230789, "learning_rate": 9.866493865537922e-06, "loss": 0.0061, "step": 30500 }, { "epoch": 0.49922277673238974, "grad_norm": 0.18514953553676605, "learning_rate": 9.866275267175248e-06, "loss": 0.0054, "step": 30510 }, { "epoch": 0.4993864026834656, "grad_norm": 0.1681094616651535, "learning_rate": 9.866056492421789e-06, "loss": 0.0049, "step": 30520 }, { "epoch": 0.49955002863454145, "grad_norm": 0.32745957374572754, "learning_rate": 9.865837541285479e-06, "loss": 0.0032, "step": 30530 }, { "epoch": 0.4997136545856173, "grad_norm": 0.3193792700767517, "learning_rate": 9.865618413774251e-06, "loss": 0.0051, "step": 30540 }, { "epoch": 0.4998772805366931, "grad_norm": 0.2464493066072464, "learning_rate": 9.865399109896051e-06, "loss": 0.0054, "step": 30550 }, { "epoch": 0.500040906487769, "grad_norm": 0.2660923898220062, "learning_rate": 9.865179629658827e-06, "loss": 0.0045, "step": 30560 }, { "epoch": 0.5002045324388448, "grad_norm": 0.1223653182387352, "learning_rate": 9.864959973070534e-06, "loss": 0.0059, "step": 30570 }, { "epoch": 0.5003681583899207, "grad_norm": 0.3126308023929596, "learning_rate": 9.864740140139135e-06, "loss": 0.0044, "step": 30580 }, { "epoch": 0.5005317843409964, "grad_norm": 0.11172177642583847, "learning_rate": 9.864520130872599e-06, "loss": 0.0044, "step": 30590 }, { "epoch": 0.5006954102920723, "grad_norm": 0.17950640618801117, "learning_rate": 9.8642999452789e-06, "loss": 0.0036, "step": 30600 }, { "epoch": 0.5008590362431482, "grad_norm": 0.22110264003276825, "learning_rate": 9.864079583366016e-06, "loss": 0.0059, "step": 30610 }, { "epoch": 0.501022662194224, "grad_norm": 0.43560776114463806, "learning_rate": 9.86385904514194e-06, "loss": 0.0051, "step": 30620 }, { "epoch": 0.5011862881452999, "grad_norm": 0.43948209285736084, "learning_rate": 9.863638330614663e-06, "loss": 0.0041, "step": 30630 }, { "epoch": 0.5013499140963756, "grad_norm": 0.09683792293071747, "learning_rate": 9.863417439792187e-06, "loss": 0.0059, "step": 30640 }, { "epoch": 0.5015135400474515, "grad_norm": 0.0957476869225502, "learning_rate": 9.863196372682516e-06, "loss": 0.0049, "step": 30650 }, { "epoch": 0.5016771659985274, "grad_norm": 0.22459042072296143, "learning_rate": 9.862975129293666e-06, "loss": 0.0064, "step": 30660 }, { "epoch": 0.5018407919496032, "grad_norm": 0.049262214452028275, "learning_rate": 9.862753709633654e-06, "loss": 0.005, "step": 30670 }, { "epoch": 0.5020044179006791, "grad_norm": 0.13619364798069, "learning_rate": 9.862532113710509e-06, "loss": 0.0029, "step": 30680 }, { "epoch": 0.5021680438517548, "grad_norm": 0.15392081439495087, "learning_rate": 9.862310341532263e-06, "loss": 0.0043, "step": 30690 }, { "epoch": 0.5023316698028307, "grad_norm": 0.22392538189888, "learning_rate": 9.86208839310695e-06, "loss": 0.0039, "step": 30700 }, { "epoch": 0.5024952957539066, "grad_norm": 0.5785654783248901, "learning_rate": 9.861866268442618e-06, "loss": 0.0079, "step": 30710 }, { "epoch": 0.5026589217049824, "grad_norm": 0.04843432083725929, "learning_rate": 9.861643967547323e-06, "loss": 0.0059, "step": 30720 }, { "epoch": 0.5028225476560583, "grad_norm": 0.27586302161216736, "learning_rate": 9.861421490429116e-06, "loss": 0.0041, "step": 30730 }, { "epoch": 0.502986173607134, "grad_norm": 0.37647032737731934, "learning_rate": 9.861198837096064e-06, "loss": 0.0039, "step": 30740 }, { "epoch": 0.5031497995582099, "grad_norm": 0.17302639782428741, "learning_rate": 9.86097600755624e-06, "loss": 0.0046, "step": 30750 }, { "epoch": 0.5033134255092858, "grad_norm": 0.07171842455863953, "learning_rate": 9.860753001817716e-06, "loss": 0.0033, "step": 30760 }, { "epoch": 0.5034770514603616, "grad_norm": 0.24697132408618927, "learning_rate": 9.86052981988858e-06, "loss": 0.0049, "step": 30770 }, { "epoch": 0.5036406774114375, "grad_norm": 0.0713362991809845, "learning_rate": 9.86030646177692e-06, "loss": 0.0034, "step": 30780 }, { "epoch": 0.5038043033625133, "grad_norm": 0.12136947363615036, "learning_rate": 9.860082927490831e-06, "loss": 0.0044, "step": 30790 }, { "epoch": 0.5039679293135891, "grad_norm": 0.3462497591972351, "learning_rate": 9.859859217038418e-06, "loss": 0.0042, "step": 30800 }, { "epoch": 0.504131555264665, "grad_norm": 0.2680438458919525, "learning_rate": 9.859635330427789e-06, "loss": 0.0032, "step": 30810 }, { "epoch": 0.5042951812157408, "grad_norm": 0.22928646206855774, "learning_rate": 9.859411267667059e-06, "loss": 0.003, "step": 30820 }, { "epoch": 0.5044588071668167, "grad_norm": 0.10160088539123535, "learning_rate": 9.859187028764348e-06, "loss": 0.0051, "step": 30830 }, { "epoch": 0.5046224331178925, "grad_norm": 0.24085910618305206, "learning_rate": 9.858962613727787e-06, "loss": 0.004, "step": 30840 }, { "epoch": 0.5047860590689683, "grad_norm": 0.1074313223361969, "learning_rate": 9.85873802256551e-06, "loss": 0.0033, "step": 30850 }, { "epoch": 0.5049496850200442, "grad_norm": 0.10650540888309479, "learning_rate": 9.858513255285657e-06, "loss": 0.0054, "step": 30860 }, { "epoch": 0.50511331097112, "grad_norm": 0.41054514050483704, "learning_rate": 9.858288311896375e-06, "loss": 0.0051, "step": 30870 }, { "epoch": 0.5052769369221959, "grad_norm": 0.4507800340652466, "learning_rate": 9.85806319240582e-06, "loss": 0.0051, "step": 30880 }, { "epoch": 0.5054405628732717, "grad_norm": 0.20634064078330994, "learning_rate": 9.857837896822149e-06, "loss": 0.0037, "step": 30890 }, { "epoch": 0.5056041888243475, "grad_norm": 0.2859896719455719, "learning_rate": 9.857612425153529e-06, "loss": 0.0048, "step": 30900 }, { "epoch": 0.5057678147754234, "grad_norm": 0.23569360375404358, "learning_rate": 9.857386777408135e-06, "loss": 0.0041, "step": 30910 }, { "epoch": 0.5059314407264992, "grad_norm": 0.3860852122306824, "learning_rate": 9.857160953594144e-06, "loss": 0.0028, "step": 30920 }, { "epoch": 0.5060950666775751, "grad_norm": 0.20479625463485718, "learning_rate": 9.856934953719742e-06, "loss": 0.004, "step": 30930 }, { "epoch": 0.5062586926286509, "grad_norm": 0.11897959560155869, "learning_rate": 9.856708777793122e-06, "loss": 0.0026, "step": 30940 }, { "epoch": 0.5064223185797267, "grad_norm": 0.3292287588119507, "learning_rate": 9.856482425822482e-06, "loss": 0.0045, "step": 30950 }, { "epoch": 0.5065859445308026, "grad_norm": 0.3212360739707947, "learning_rate": 9.856255897816024e-06, "loss": 0.0036, "step": 30960 }, { "epoch": 0.5067495704818784, "grad_norm": 0.20862159132957458, "learning_rate": 9.856029193781964e-06, "loss": 0.0031, "step": 30970 }, { "epoch": 0.5069131964329543, "grad_norm": 0.2558629810810089, "learning_rate": 9.855802313728517e-06, "loss": 0.0044, "step": 30980 }, { "epoch": 0.5070768223840301, "grad_norm": 0.2657250165939331, "learning_rate": 9.855575257663905e-06, "loss": 0.0061, "step": 30990 }, { "epoch": 0.507240448335106, "grad_norm": 0.1743023544549942, "learning_rate": 9.855348025596362e-06, "loss": 0.0046, "step": 31000 }, { "epoch": 0.5074040742861818, "grad_norm": 0.25727203488349915, "learning_rate": 9.85512061753412e-06, "loss": 0.0032, "step": 31010 }, { "epoch": 0.5075677002372576, "grad_norm": 0.4770571291446686, "learning_rate": 9.854893033485427e-06, "loss": 0.0054, "step": 31020 }, { "epoch": 0.5077313261883335, "grad_norm": 0.3970147967338562, "learning_rate": 9.854665273458527e-06, "loss": 0.0046, "step": 31030 }, { "epoch": 0.5078949521394093, "grad_norm": 0.29395201802253723, "learning_rate": 9.854437337461682e-06, "loss": 0.0073, "step": 31040 }, { "epoch": 0.5080585780904852, "grad_norm": 0.33381879329681396, "learning_rate": 9.854209225503148e-06, "loss": 0.0035, "step": 31050 }, { "epoch": 0.508222204041561, "grad_norm": 0.3724513351917267, "learning_rate": 9.853980937591199e-06, "loss": 0.0063, "step": 31060 }, { "epoch": 0.5083858299926368, "grad_norm": 0.3101649880409241, "learning_rate": 9.853752473734107e-06, "loss": 0.0065, "step": 31070 }, { "epoch": 0.5085494559437127, "grad_norm": 0.1964121311903, "learning_rate": 9.853523833940152e-06, "loss": 0.0033, "step": 31080 }, { "epoch": 0.5087130818947885, "grad_norm": 0.1315566897392273, "learning_rate": 9.853295018217623e-06, "loss": 0.0045, "step": 31090 }, { "epoch": 0.5088767078458644, "grad_norm": 0.23415246605873108, "learning_rate": 9.853066026574816e-06, "loss": 0.0031, "step": 31100 }, { "epoch": 0.5090403337969402, "grad_norm": 0.3314189910888672, "learning_rate": 9.852836859020027e-06, "loss": 0.0058, "step": 31110 }, { "epoch": 0.509203959748016, "grad_norm": 0.051704198122024536, "learning_rate": 9.852607515561567e-06, "loss": 0.0069, "step": 31120 }, { "epoch": 0.5093675856990919, "grad_norm": 0.5094984173774719, "learning_rate": 9.852377996207748e-06, "loss": 0.0052, "step": 31130 }, { "epoch": 0.5095312116501677, "grad_norm": 0.19634106755256653, "learning_rate": 9.852148300966888e-06, "loss": 0.0051, "step": 31140 }, { "epoch": 0.5096948376012436, "grad_norm": 0.15406464040279388, "learning_rate": 9.851918429847315e-06, "loss": 0.0031, "step": 31150 }, { "epoch": 0.5098584635523195, "grad_norm": 0.504865288734436, "learning_rate": 9.851688382857359e-06, "loss": 0.004, "step": 31160 }, { "epoch": 0.5100220895033952, "grad_norm": 0.16970601677894592, "learning_rate": 9.851458160005361e-06, "loss": 0.0043, "step": 31170 }, { "epoch": 0.5101857154544711, "grad_norm": 0.26412877440452576, "learning_rate": 9.851227761299662e-06, "loss": 0.0028, "step": 31180 }, { "epoch": 0.5103493414055469, "grad_norm": 0.11685189604759216, "learning_rate": 9.85099718674862e-06, "loss": 0.0036, "step": 31190 }, { "epoch": 0.5105129673566228, "grad_norm": 0.34057921171188354, "learning_rate": 9.850766436360587e-06, "loss": 0.0055, "step": 31200 }, { "epoch": 0.5106765933076987, "grad_norm": 0.14303185045719147, "learning_rate": 9.85053551014393e-06, "loss": 0.0031, "step": 31210 }, { "epoch": 0.5108402192587744, "grad_norm": 0.10585922747850418, "learning_rate": 9.850304408107019e-06, "loss": 0.0036, "step": 31220 }, { "epoch": 0.5110038452098503, "grad_norm": 0.24011142551898956, "learning_rate": 9.85007313025823e-06, "loss": 0.0031, "step": 31230 }, { "epoch": 0.5111674711609261, "grad_norm": 0.047218699008226395, "learning_rate": 9.849841676605946e-06, "loss": 0.0048, "step": 31240 }, { "epoch": 0.511331097112002, "grad_norm": 0.1564851552248001, "learning_rate": 9.849610047158558e-06, "loss": 0.003, "step": 31250 }, { "epoch": 0.5114947230630777, "grad_norm": 0.804195761680603, "learning_rate": 9.849378241924464e-06, "loss": 0.0036, "step": 31260 }, { "epoch": 0.5116583490141536, "grad_norm": 0.21099935472011566, "learning_rate": 9.84914626091206e-06, "loss": 0.0045, "step": 31270 }, { "epoch": 0.5118219749652295, "grad_norm": 0.16904276609420776, "learning_rate": 9.848914104129761e-06, "loss": 0.0041, "step": 31280 }, { "epoch": 0.5119856009163053, "grad_norm": 0.10721002519130707, "learning_rate": 9.84868177158598e-06, "loss": 0.0035, "step": 31290 }, { "epoch": 0.5121492268673812, "grad_norm": 0.2088523656129837, "learning_rate": 9.848449263289137e-06, "loss": 0.0051, "step": 31300 }, { "epoch": 0.512312852818457, "grad_norm": 0.27241212129592896, "learning_rate": 9.84821657924766e-06, "loss": 0.0041, "step": 31310 }, { "epoch": 0.5124764787695328, "grad_norm": 0.25362634658813477, "learning_rate": 9.847983719469987e-06, "loss": 0.0049, "step": 31320 }, { "epoch": 0.5126401047206087, "grad_norm": 0.3286561369895935, "learning_rate": 9.847750683964555e-06, "loss": 0.0047, "step": 31330 }, { "epoch": 0.5128037306716845, "grad_norm": 0.19310718774795532, "learning_rate": 9.847517472739813e-06, "loss": 0.0041, "step": 31340 }, { "epoch": 0.5129673566227604, "grad_norm": 0.26821574568748474, "learning_rate": 9.847284085804212e-06, "loss": 0.0048, "step": 31350 }, { "epoch": 0.5131309825738362, "grad_norm": 0.3510286808013916, "learning_rate": 9.847050523166213e-06, "loss": 0.0071, "step": 31360 }, { "epoch": 0.513294608524912, "grad_norm": 0.4269711673259735, "learning_rate": 9.846816784834284e-06, "loss": 0.0067, "step": 31370 }, { "epoch": 0.5134582344759879, "grad_norm": 0.30935657024383545, "learning_rate": 9.846582870816893e-06, "loss": 0.0045, "step": 31380 }, { "epoch": 0.5136218604270637, "grad_norm": 0.2932969331741333, "learning_rate": 9.846348781122522e-06, "loss": 0.003, "step": 31390 }, { "epoch": 0.5137854863781396, "grad_norm": 0.445114403963089, "learning_rate": 9.846114515759656e-06, "loss": 0.0057, "step": 31400 }, { "epoch": 0.5139491123292154, "grad_norm": 0.1886824667453766, "learning_rate": 9.845880074736787e-06, "loss": 0.0031, "step": 31410 }, { "epoch": 0.5141127382802912, "grad_norm": 0.1140340119600296, "learning_rate": 9.84564545806241e-06, "loss": 0.0064, "step": 31420 }, { "epoch": 0.5142763642313671, "grad_norm": 0.08062157034873962, "learning_rate": 9.845410665745033e-06, "loss": 0.0049, "step": 31430 }, { "epoch": 0.5144399901824429, "grad_norm": 0.26562511920928955, "learning_rate": 9.845175697793164e-06, "loss": 0.0033, "step": 31440 }, { "epoch": 0.5146036161335188, "grad_norm": 0.17411820590496063, "learning_rate": 9.844940554215323e-06, "loss": 0.0053, "step": 31450 }, { "epoch": 0.5147672420845946, "grad_norm": 0.18165600299835205, "learning_rate": 9.84470523502003e-06, "loss": 0.0054, "step": 31460 }, { "epoch": 0.5149308680356705, "grad_norm": 0.2793065011501312, "learning_rate": 9.844469740215817e-06, "loss": 0.005, "step": 31470 }, { "epoch": 0.5150944939867463, "grad_norm": 0.2496550977230072, "learning_rate": 9.844234069811217e-06, "loss": 0.0065, "step": 31480 }, { "epoch": 0.5152581199378221, "grad_norm": 0.10200822353363037, "learning_rate": 9.843998223814777e-06, "loss": 0.0107, "step": 31490 }, { "epoch": 0.515421745888898, "grad_norm": 0.5874437093734741, "learning_rate": 9.843762202235042e-06, "loss": 0.007, "step": 31500 }, { "epoch": 0.5155853718399738, "grad_norm": 0.2364024817943573, "learning_rate": 9.84352600508057e-06, "loss": 0.0043, "step": 31510 }, { "epoch": 0.5157489977910497, "grad_norm": 0.3517710268497467, "learning_rate": 9.84328963235992e-06, "loss": 0.0035, "step": 31520 }, { "epoch": 0.5159126237421255, "grad_norm": 0.38161441683769226, "learning_rate": 9.843053084081664e-06, "loss": 0.0048, "step": 31530 }, { "epoch": 0.5160762496932013, "grad_norm": 0.2573119103908539, "learning_rate": 9.842816360254372e-06, "loss": 0.0033, "step": 31540 }, { "epoch": 0.5162398756442772, "grad_norm": 0.22384265065193176, "learning_rate": 9.842579460886626e-06, "loss": 0.0053, "step": 31550 }, { "epoch": 0.516403501595353, "grad_norm": 0.2548035681247711, "learning_rate": 9.842342385987014e-06, "loss": 0.0081, "step": 31560 }, { "epoch": 0.5165671275464289, "grad_norm": 0.2440217286348343, "learning_rate": 9.842105135564129e-06, "loss": 0.0035, "step": 31570 }, { "epoch": 0.5167307534975047, "grad_norm": 0.18748502433300018, "learning_rate": 9.84186770962657e-06, "loss": 0.0082, "step": 31580 }, { "epoch": 0.5168943794485805, "grad_norm": 0.5127915740013123, "learning_rate": 9.841630108182944e-06, "loss": 0.0066, "step": 31590 }, { "epoch": 0.5170580053996564, "grad_norm": 0.1906578242778778, "learning_rate": 9.841392331241863e-06, "loss": 0.0054, "step": 31600 }, { "epoch": 0.5172216313507322, "grad_norm": 0.2827862799167633, "learning_rate": 9.841154378811946e-06, "loss": 0.0059, "step": 31610 }, { "epoch": 0.5173852573018081, "grad_norm": 0.43559354543685913, "learning_rate": 9.840916250901818e-06, "loss": 0.005, "step": 31620 }, { "epoch": 0.517548883252884, "grad_norm": 0.10356776416301727, "learning_rate": 9.84067794752011e-06, "loss": 0.005, "step": 31630 }, { "epoch": 0.5177125092039597, "grad_norm": 0.19057238101959229, "learning_rate": 9.840439468675462e-06, "loss": 0.0042, "step": 31640 }, { "epoch": 0.5178761351550356, "grad_norm": 0.1440851241350174, "learning_rate": 9.840200814376516e-06, "loss": 0.004, "step": 31650 }, { "epoch": 0.5180397611061114, "grad_norm": 0.26910269260406494, "learning_rate": 9.839961984631926e-06, "loss": 0.0045, "step": 31660 }, { "epoch": 0.5182033870571873, "grad_norm": 0.16595585644245148, "learning_rate": 9.839722979450343e-06, "loss": 0.0041, "step": 31670 }, { "epoch": 0.5183670130082632, "grad_norm": 0.1385856419801712, "learning_rate": 9.839483798840437e-06, "loss": 0.0052, "step": 31680 }, { "epoch": 0.5185306389593389, "grad_norm": 0.30816200375556946, "learning_rate": 9.839244442810874e-06, "loss": 0.0038, "step": 31690 }, { "epoch": 0.5186942649104148, "grad_norm": 0.16760753095149994, "learning_rate": 9.839004911370329e-06, "loss": 0.0035, "step": 31700 }, { "epoch": 0.5188578908614906, "grad_norm": 0.06721753627061844, "learning_rate": 9.838765204527487e-06, "loss": 0.005, "step": 31710 }, { "epoch": 0.5190215168125665, "grad_norm": 0.15035152435302734, "learning_rate": 9.838525322291036e-06, "loss": 0.0056, "step": 31720 }, { "epoch": 0.5191851427636424, "grad_norm": 0.252145379781723, "learning_rate": 9.838285264669672e-06, "loss": 0.0031, "step": 31730 }, { "epoch": 0.5193487687147181, "grad_norm": 0.22252891957759857, "learning_rate": 9.838045031672096e-06, "loss": 0.0039, "step": 31740 }, { "epoch": 0.519512394665794, "grad_norm": 0.19901922345161438, "learning_rate": 9.837804623307014e-06, "loss": 0.0061, "step": 31750 }, { "epoch": 0.5196760206168698, "grad_norm": 0.20793382823467255, "learning_rate": 9.837564039583143e-06, "loss": 0.0041, "step": 31760 }, { "epoch": 0.5198396465679457, "grad_norm": 0.15770792961120605, "learning_rate": 9.837323280509202e-06, "loss": 0.0036, "step": 31770 }, { "epoch": 0.5200032725190216, "grad_norm": 0.2245488315820694, "learning_rate": 9.837082346093919e-06, "loss": 0.006, "step": 31780 }, { "epoch": 0.5201668984700973, "grad_norm": 0.4283466935157776, "learning_rate": 9.836841236346024e-06, "loss": 0.0079, "step": 31790 }, { "epoch": 0.5203305244211732, "grad_norm": 0.1798923760652542, "learning_rate": 9.83659995127426e-06, "loss": 0.0046, "step": 31800 }, { "epoch": 0.520494150372249, "grad_norm": 0.09698275476694107, "learning_rate": 9.836358490887375e-06, "loss": 0.0033, "step": 31810 }, { "epoch": 0.5206577763233249, "grad_norm": 0.12913936376571655, "learning_rate": 9.836116855194116e-06, "loss": 0.0057, "step": 31820 }, { "epoch": 0.5208214022744008, "grad_norm": 0.10202960669994354, "learning_rate": 9.835875044203245e-06, "loss": 0.0047, "step": 31830 }, { "epoch": 0.5209850282254765, "grad_norm": 0.2646859884262085, "learning_rate": 9.835633057923526e-06, "loss": 0.0045, "step": 31840 }, { "epoch": 0.5211486541765524, "grad_norm": 0.49617961049079895, "learning_rate": 9.835390896363731e-06, "loss": 0.0042, "step": 31850 }, { "epoch": 0.5213122801276282, "grad_norm": 0.15102189779281616, "learning_rate": 9.835148559532638e-06, "loss": 0.0034, "step": 31860 }, { "epoch": 0.5214759060787041, "grad_norm": 0.2633160352706909, "learning_rate": 9.83490604743903e-06, "loss": 0.0058, "step": 31870 }, { "epoch": 0.52163953202978, "grad_norm": 0.4304446876049042, "learning_rate": 9.834663360091698e-06, "loss": 0.0053, "step": 31880 }, { "epoch": 0.5218031579808557, "grad_norm": 0.21876242756843567, "learning_rate": 9.83442049749944e-06, "loss": 0.0044, "step": 31890 }, { "epoch": 0.5219667839319316, "grad_norm": 0.12237274646759033, "learning_rate": 9.834177459671059e-06, "loss": 0.0044, "step": 31900 }, { "epoch": 0.5221304098830074, "grad_norm": 0.28060805797576904, "learning_rate": 9.833934246615363e-06, "loss": 0.0045, "step": 31910 }, { "epoch": 0.5222940358340833, "grad_norm": 0.09831776469945908, "learning_rate": 9.833690858341167e-06, "loss": 0.0035, "step": 31920 }, { "epoch": 0.5224576617851592, "grad_norm": 0.17224234342575073, "learning_rate": 9.833447294857295e-06, "loss": 0.0038, "step": 31930 }, { "epoch": 0.522621287736235, "grad_norm": 0.22440104186534882, "learning_rate": 9.833203556172576e-06, "loss": 0.0035, "step": 31940 }, { "epoch": 0.5227849136873108, "grad_norm": 0.20979750156402588, "learning_rate": 9.832959642295845e-06, "loss": 0.0049, "step": 31950 }, { "epoch": 0.5229485396383866, "grad_norm": 0.5015364289283752, "learning_rate": 9.832715553235943e-06, "loss": 0.0045, "step": 31960 }, { "epoch": 0.5231121655894625, "grad_norm": 0.11745341122150421, "learning_rate": 9.832471289001718e-06, "loss": 0.0031, "step": 31970 }, { "epoch": 0.5232757915405384, "grad_norm": 0.34065333008766174, "learning_rate": 9.832226849602022e-06, "loss": 0.0041, "step": 31980 }, { "epoch": 0.5234394174916142, "grad_norm": 0.2888043522834778, "learning_rate": 9.831982235045716e-06, "loss": 0.0033, "step": 31990 }, { "epoch": 0.52360304344269, "grad_norm": 0.03164900839328766, "learning_rate": 9.831737445341667e-06, "loss": 0.0021, "step": 32000 }, { "epoch": 0.5237666693937658, "grad_norm": 0.17371325194835663, "learning_rate": 9.83149248049875e-06, "loss": 0.004, "step": 32010 }, { "epoch": 0.5239302953448417, "grad_norm": 0.13506458699703217, "learning_rate": 9.831247340525842e-06, "loss": 0.0038, "step": 32020 }, { "epoch": 0.5240939212959176, "grad_norm": 0.28841733932495117, "learning_rate": 9.83100202543183e-06, "loss": 0.0057, "step": 32030 }, { "epoch": 0.5242575472469934, "grad_norm": 0.5486514568328857, "learning_rate": 9.830756535225605e-06, "loss": 0.0056, "step": 32040 }, { "epoch": 0.5244211731980692, "grad_norm": 0.1623668670654297, "learning_rate": 9.830510869916068e-06, "loss": 0.0034, "step": 32050 }, { "epoch": 0.524584799149145, "grad_norm": 0.24796000123023987, "learning_rate": 9.83026502951212e-06, "loss": 0.0046, "step": 32060 }, { "epoch": 0.5247484251002209, "grad_norm": 0.247153639793396, "learning_rate": 9.830019014022674e-06, "loss": 0.0061, "step": 32070 }, { "epoch": 0.5249120510512968, "grad_norm": 0.28486204147338867, "learning_rate": 9.829772823456646e-06, "loss": 0.0032, "step": 32080 }, { "epoch": 0.5250756770023726, "grad_norm": 0.35897791385650635, "learning_rate": 9.829526457822964e-06, "loss": 0.0029, "step": 32090 }, { "epoch": 0.5252393029534485, "grad_norm": 0.26973316073417664, "learning_rate": 9.829279917130555e-06, "loss": 0.0045, "step": 32100 }, { "epoch": 0.5254029289045242, "grad_norm": 0.21221882104873657, "learning_rate": 9.829033201388353e-06, "loss": 0.0032, "step": 32110 }, { "epoch": 0.5255665548556001, "grad_norm": 0.16623906791210175, "learning_rate": 9.828786310605306e-06, "loss": 0.0037, "step": 32120 }, { "epoch": 0.5257301808066759, "grad_norm": 0.17663079500198364, "learning_rate": 9.828539244790361e-06, "loss": 0.0066, "step": 32130 }, { "epoch": 0.5258938067577518, "grad_norm": 0.31358128786087036, "learning_rate": 9.828292003952473e-06, "loss": 0.0042, "step": 32140 }, { "epoch": 0.5260574327088277, "grad_norm": 0.03548801690340042, "learning_rate": 9.828044588100605e-06, "loss": 0.0019, "step": 32150 }, { "epoch": 0.5262210586599034, "grad_norm": 0.3755888044834137, "learning_rate": 9.827796997243724e-06, "loss": 0.0041, "step": 32160 }, { "epoch": 0.5263846846109793, "grad_norm": 0.22414498031139374, "learning_rate": 9.827549231390803e-06, "loss": 0.0044, "step": 32170 }, { "epoch": 0.5265483105620551, "grad_norm": 0.16670654714107513, "learning_rate": 9.827301290550828e-06, "loss": 0.0065, "step": 32180 }, { "epoch": 0.526711936513131, "grad_norm": 0.30883532762527466, "learning_rate": 9.827053174732783e-06, "loss": 0.004, "step": 32190 }, { "epoch": 0.5268755624642069, "grad_norm": 0.34060797095298767, "learning_rate": 9.82680488394566e-06, "loss": 0.0057, "step": 32200 }, { "epoch": 0.5270391884152826, "grad_norm": 0.250896692276001, "learning_rate": 9.826556418198463e-06, "loss": 0.0035, "step": 32210 }, { "epoch": 0.5272028143663585, "grad_norm": 0.3988708555698395, "learning_rate": 9.826307777500196e-06, "loss": 0.0042, "step": 32220 }, { "epoch": 0.5273664403174343, "grad_norm": 0.2920372188091278, "learning_rate": 9.826058961859872e-06, "loss": 0.0047, "step": 32230 }, { "epoch": 0.5275300662685102, "grad_norm": 0.17561347782611847, "learning_rate": 9.825809971286509e-06, "loss": 0.0028, "step": 32240 }, { "epoch": 0.5276936922195861, "grad_norm": 0.19744329154491425, "learning_rate": 9.825560805789131e-06, "loss": 0.0032, "step": 32250 }, { "epoch": 0.5278573181706618, "grad_norm": 0.2579793930053711, "learning_rate": 9.825311465376775e-06, "loss": 0.0039, "step": 32260 }, { "epoch": 0.5280209441217377, "grad_norm": 0.3522445559501648, "learning_rate": 9.825061950058474e-06, "loss": 0.0056, "step": 32270 }, { "epoch": 0.5281845700728135, "grad_norm": 0.27567538619041443, "learning_rate": 9.824812259843275e-06, "loss": 0.0048, "step": 32280 }, { "epoch": 0.5283481960238894, "grad_norm": 0.3310495913028717, "learning_rate": 9.824562394740227e-06, "loss": 0.0044, "step": 32290 }, { "epoch": 0.5285118219749653, "grad_norm": 0.37350690364837646, "learning_rate": 9.824312354758389e-06, "loss": 0.005, "step": 32300 }, { "epoch": 0.528675447926041, "grad_norm": 0.2704145312309265, "learning_rate": 9.824062139906821e-06, "loss": 0.0058, "step": 32310 }, { "epoch": 0.5288390738771169, "grad_norm": 0.08880201727151871, "learning_rate": 9.823811750194597e-06, "loss": 0.0038, "step": 32320 }, { "epoch": 0.5290026998281927, "grad_norm": 0.18930236995220184, "learning_rate": 9.82356118563079e-06, "loss": 0.0053, "step": 32330 }, { "epoch": 0.5291663257792686, "grad_norm": 0.28846243023872375, "learning_rate": 9.82331044622448e-06, "loss": 0.0063, "step": 32340 }, { "epoch": 0.5293299517303445, "grad_norm": 0.4184209108352661, "learning_rate": 9.823059531984764e-06, "loss": 0.0051, "step": 32350 }, { "epoch": 0.5294935776814202, "grad_norm": 0.08133115619421005, "learning_rate": 9.822808442920728e-06, "loss": 0.0027, "step": 32360 }, { "epoch": 0.5296572036324961, "grad_norm": 0.28720521926879883, "learning_rate": 9.82255717904148e-06, "loss": 0.0043, "step": 32370 }, { "epoch": 0.5298208295835719, "grad_norm": 0.1819610595703125, "learning_rate": 9.822305740356123e-06, "loss": 0.0053, "step": 32380 }, { "epoch": 0.5299844555346478, "grad_norm": 0.15878736972808838, "learning_rate": 9.822054126873775e-06, "loss": 0.0049, "step": 32390 }, { "epoch": 0.5301480814857237, "grad_norm": 0.20396092534065247, "learning_rate": 9.821802338603552e-06, "loss": 0.0039, "step": 32400 }, { "epoch": 0.5303117074367995, "grad_norm": 0.30080902576446533, "learning_rate": 9.821550375554586e-06, "loss": 0.0033, "step": 32410 }, { "epoch": 0.5304753333878753, "grad_norm": 0.193630650639534, "learning_rate": 9.821298237736004e-06, "loss": 0.0054, "step": 32420 }, { "epoch": 0.5306389593389511, "grad_norm": 0.2316664755344391, "learning_rate": 9.82104592515695e-06, "loss": 0.0024, "step": 32430 }, { "epoch": 0.530802585290027, "grad_norm": 0.08750326931476593, "learning_rate": 9.820793437826568e-06, "loss": 0.0051, "step": 32440 }, { "epoch": 0.5309662112411029, "grad_norm": 0.3620743453502655, "learning_rate": 9.82054077575401e-06, "loss": 0.0054, "step": 32450 }, { "epoch": 0.5311298371921787, "grad_norm": 0.09510017186403275, "learning_rate": 9.820287938948437e-06, "loss": 0.0054, "step": 32460 }, { "epoch": 0.5312934631432545, "grad_norm": 0.3773811161518097, "learning_rate": 9.820034927419009e-06, "loss": 0.0054, "step": 32470 }, { "epoch": 0.5314570890943303, "grad_norm": 0.37230491638183594, "learning_rate": 9.819781741174901e-06, "loss": 0.0053, "step": 32480 }, { "epoch": 0.5316207150454062, "grad_norm": 0.11946693062782288, "learning_rate": 9.819528380225287e-06, "loss": 0.0034, "step": 32490 }, { "epoch": 0.5317843409964821, "grad_norm": 0.5339857339859009, "learning_rate": 9.819274844579354e-06, "loss": 0.0048, "step": 32500 }, { "epoch": 0.5319479669475579, "grad_norm": 0.1704990714788437, "learning_rate": 9.819021134246292e-06, "loss": 0.0036, "step": 32510 }, { "epoch": 0.5321115928986337, "grad_norm": 0.12953652441501617, "learning_rate": 9.818767249235296e-06, "loss": 0.0052, "step": 32520 }, { "epoch": 0.5322752188497095, "grad_norm": 0.3123488426208496, "learning_rate": 9.818513189555565e-06, "loss": 0.0054, "step": 32530 }, { "epoch": 0.5324388448007854, "grad_norm": 0.4499529004096985, "learning_rate": 9.818258955216316e-06, "loss": 0.0053, "step": 32540 }, { "epoch": 0.5326024707518613, "grad_norm": 0.05439787730574608, "learning_rate": 9.818004546226758e-06, "loss": 0.0044, "step": 32550 }, { "epoch": 0.5327660967029371, "grad_norm": 0.19502659142017365, "learning_rate": 9.817749962596115e-06, "loss": 0.0049, "step": 32560 }, { "epoch": 0.532929722654013, "grad_norm": 0.13590234518051147, "learning_rate": 9.817495204333615e-06, "loss": 0.0027, "step": 32570 }, { "epoch": 0.5330933486050887, "grad_norm": 0.3587286174297333, "learning_rate": 9.817240271448494e-06, "loss": 0.004, "step": 32580 }, { "epoch": 0.5332569745561646, "grad_norm": 0.1723286360502243, "learning_rate": 9.81698516394999e-06, "loss": 0.004, "step": 32590 }, { "epoch": 0.5334206005072405, "grad_norm": 0.16744588315486908, "learning_rate": 9.816729881847351e-06, "loss": 0.0029, "step": 32600 }, { "epoch": 0.5335842264583163, "grad_norm": 0.2356855720281601, "learning_rate": 9.816474425149831e-06, "loss": 0.0045, "step": 32610 }, { "epoch": 0.5337478524093922, "grad_norm": 0.1434105634689331, "learning_rate": 9.81621879386669e-06, "loss": 0.0043, "step": 32620 }, { "epoch": 0.5339114783604679, "grad_norm": 0.20921590924263, "learning_rate": 9.815962988007192e-06, "loss": 0.0039, "step": 32630 }, { "epoch": 0.5340751043115438, "grad_norm": 0.04490165039896965, "learning_rate": 9.81570700758061e-06, "loss": 0.003, "step": 32640 }, { "epoch": 0.5342387302626197, "grad_norm": 0.17965340614318848, "learning_rate": 9.815450852596225e-06, "loss": 0.0032, "step": 32650 }, { "epoch": 0.5344023562136955, "grad_norm": 0.1089031994342804, "learning_rate": 9.815194523063318e-06, "loss": 0.0026, "step": 32660 }, { "epoch": 0.5345659821647714, "grad_norm": 0.30800139904022217, "learning_rate": 9.814938018991184e-06, "loss": 0.0028, "step": 32670 }, { "epoch": 0.5347296081158471, "grad_norm": 0.5891119837760925, "learning_rate": 9.81468134038912e-06, "loss": 0.0035, "step": 32680 }, { "epoch": 0.534893234066923, "grad_norm": 0.235852912068367, "learning_rate": 9.814424487266427e-06, "loss": 0.0032, "step": 32690 }, { "epoch": 0.5350568600179989, "grad_norm": 0.4114832282066345, "learning_rate": 9.814167459632418e-06, "loss": 0.0034, "step": 32700 }, { "epoch": 0.5352204859690747, "grad_norm": 0.08472443372011185, "learning_rate": 9.81391025749641e-06, "loss": 0.0033, "step": 32710 }, { "epoch": 0.5353841119201506, "grad_norm": 0.1922909915447235, "learning_rate": 9.813652880867725e-06, "loss": 0.0037, "step": 32720 }, { "epoch": 0.5355477378712263, "grad_norm": 0.26681867241859436, "learning_rate": 9.813395329755693e-06, "loss": 0.0047, "step": 32730 }, { "epoch": 0.5357113638223022, "grad_norm": 0.3745154142379761, "learning_rate": 9.813137604169649e-06, "loss": 0.0033, "step": 32740 }, { "epoch": 0.5358749897733781, "grad_norm": 0.2619208097457886, "learning_rate": 9.812879704118934e-06, "loss": 0.004, "step": 32750 }, { "epoch": 0.5360386157244539, "grad_norm": 0.11802687495946884, "learning_rate": 9.812621629612896e-06, "loss": 0.0048, "step": 32760 }, { "epoch": 0.5362022416755298, "grad_norm": 0.21977204084396362, "learning_rate": 9.812363380660892e-06, "loss": 0.0043, "step": 32770 }, { "epoch": 0.5363658676266055, "grad_norm": 0.13824838399887085, "learning_rate": 9.812104957272281e-06, "loss": 0.0093, "step": 32780 }, { "epoch": 0.5365294935776814, "grad_norm": 0.0591072253882885, "learning_rate": 9.811846359456433e-06, "loss": 0.003, "step": 32790 }, { "epoch": 0.5366931195287573, "grad_norm": 0.10084434598684311, "learning_rate": 9.811587587222716e-06, "loss": 0.0032, "step": 32800 }, { "epoch": 0.5368567454798331, "grad_norm": 0.24370764195919037, "learning_rate": 9.811328640580517e-06, "loss": 0.007, "step": 32810 }, { "epoch": 0.537020371430909, "grad_norm": 0.3512839376926422, "learning_rate": 9.811069519539217e-06, "loss": 0.004, "step": 32820 }, { "epoch": 0.5371839973819847, "grad_norm": 0.3090587854385376, "learning_rate": 9.81081022410821e-06, "loss": 0.0033, "step": 32830 }, { "epoch": 0.5373476233330606, "grad_norm": 0.0773339569568634, "learning_rate": 9.810550754296894e-06, "loss": 0.0045, "step": 32840 }, { "epoch": 0.5375112492841365, "grad_norm": 0.15011484920978546, "learning_rate": 9.810291110114676e-06, "loss": 0.0025, "step": 32850 }, { "epoch": 0.5376748752352123, "grad_norm": 0.2257227897644043, "learning_rate": 9.810031291570967e-06, "loss": 0.0029, "step": 32860 }, { "epoch": 0.5378385011862882, "grad_norm": 0.5098716616630554, "learning_rate": 9.809771298675184e-06, "loss": 0.0048, "step": 32870 }, { "epoch": 0.538002127137364, "grad_norm": 0.20705170929431915, "learning_rate": 9.80951113143675e-06, "loss": 0.0046, "step": 32880 }, { "epoch": 0.5381657530884398, "grad_norm": 0.20500947535037994, "learning_rate": 9.809250789865097e-06, "loss": 0.0035, "step": 32890 }, { "epoch": 0.5383293790395157, "grad_norm": 0.11755549907684326, "learning_rate": 9.808990273969662e-06, "loss": 0.0065, "step": 32900 }, { "epoch": 0.5384930049905915, "grad_norm": 0.18920952081680298, "learning_rate": 9.80872958375989e-06, "loss": 0.0028, "step": 32910 }, { "epoch": 0.5386566309416674, "grad_norm": 0.3245505094528198, "learning_rate": 9.808468719245225e-06, "loss": 0.0039, "step": 32920 }, { "epoch": 0.5388202568927432, "grad_norm": 0.2592693269252777, "learning_rate": 9.808207680435127e-06, "loss": 0.0053, "step": 32930 }, { "epoch": 0.538983882843819, "grad_norm": 0.17564500868320465, "learning_rate": 9.807946467339059e-06, "loss": 0.0053, "step": 32940 }, { "epoch": 0.5391475087948949, "grad_norm": 0.23559381067752838, "learning_rate": 9.807685079966483e-06, "loss": 0.003, "step": 32950 }, { "epoch": 0.5393111347459707, "grad_norm": 0.0649779886007309, "learning_rate": 9.807423518326879e-06, "loss": 0.0028, "step": 32960 }, { "epoch": 0.5394747606970466, "grad_norm": 0.2339847832918167, "learning_rate": 9.807161782429729e-06, "loss": 0.0048, "step": 32970 }, { "epoch": 0.5396383866481224, "grad_norm": 0.3472442924976349, "learning_rate": 9.806899872284517e-06, "loss": 0.0036, "step": 32980 }, { "epoch": 0.5398020125991982, "grad_norm": 0.24282602965831757, "learning_rate": 9.806637787900735e-06, "loss": 0.0039, "step": 32990 }, { "epoch": 0.539965638550274, "grad_norm": 0.3652968406677246, "learning_rate": 9.806375529287888e-06, "loss": 0.0044, "step": 33000 }, { "epoch": 0.5401292645013499, "grad_norm": 0.6360982656478882, "learning_rate": 9.80611309645548e-06, "loss": 0.0062, "step": 33010 }, { "epoch": 0.5402928904524258, "grad_norm": 0.1364808827638626, "learning_rate": 9.805850489413025e-06, "loss": 0.0038, "step": 33020 }, { "epoch": 0.5404565164035016, "grad_norm": 0.12798751890659332, "learning_rate": 9.805587708170036e-06, "loss": 0.006, "step": 33030 }, { "epoch": 0.5406201423545774, "grad_norm": 0.22705502808094025, "learning_rate": 9.805324752736047e-06, "loss": 0.0034, "step": 33040 }, { "epoch": 0.5407837683056532, "grad_norm": 0.1974307894706726, "learning_rate": 9.805061623120583e-06, "loss": 0.0029, "step": 33050 }, { "epoch": 0.5409473942567291, "grad_norm": 0.21818414330482483, "learning_rate": 9.804798319333184e-06, "loss": 0.0051, "step": 33060 }, { "epoch": 0.541111020207805, "grad_norm": 0.16655340790748596, "learning_rate": 9.804534841383394e-06, "loss": 0.0032, "step": 33070 }, { "epoch": 0.5412746461588808, "grad_norm": 0.0988406091928482, "learning_rate": 9.804271189280763e-06, "loss": 0.0046, "step": 33080 }, { "epoch": 0.5414382721099567, "grad_norm": 0.41690224409103394, "learning_rate": 9.804007363034849e-06, "loss": 0.0035, "step": 33090 }, { "epoch": 0.5416018980610324, "grad_norm": 0.2624330520629883, "learning_rate": 9.803743362655213e-06, "loss": 0.0043, "step": 33100 }, { "epoch": 0.5417655240121083, "grad_norm": 0.24833914637565613, "learning_rate": 9.803479188151426e-06, "loss": 0.0044, "step": 33110 }, { "epoch": 0.5419291499631842, "grad_norm": 0.15569384396076202, "learning_rate": 9.803214839533065e-06, "loss": 0.0052, "step": 33120 }, { "epoch": 0.54209277591426, "grad_norm": 0.1732093244791031, "learning_rate": 9.802950316809707e-06, "loss": 0.0057, "step": 33130 }, { "epoch": 0.5422564018653359, "grad_norm": 0.16405533254146576, "learning_rate": 9.802685619990945e-06, "loss": 0.0044, "step": 33140 }, { "epoch": 0.5424200278164116, "grad_norm": 0.3588194251060486, "learning_rate": 9.802420749086373e-06, "loss": 0.0041, "step": 33150 }, { "epoch": 0.5425836537674875, "grad_norm": 0.2474457174539566, "learning_rate": 9.802155704105591e-06, "loss": 0.0062, "step": 33160 }, { "epoch": 0.5427472797185634, "grad_norm": 0.25152698159217834, "learning_rate": 9.801890485058206e-06, "loss": 0.0043, "step": 33170 }, { "epoch": 0.5429109056696392, "grad_norm": 0.1282421201467514, "learning_rate": 9.801625091953832e-06, "loss": 0.0034, "step": 33180 }, { "epoch": 0.5430745316207151, "grad_norm": 0.23702237010002136, "learning_rate": 9.80135952480209e-06, "loss": 0.0045, "step": 33190 }, { "epoch": 0.5432381575717908, "grad_norm": 0.2506360709667206, "learning_rate": 9.801093783612603e-06, "loss": 0.0038, "step": 33200 }, { "epoch": 0.5434017835228667, "grad_norm": 0.12637735903263092, "learning_rate": 9.800827868395006e-06, "loss": 0.0042, "step": 33210 }, { "epoch": 0.5435654094739426, "grad_norm": 0.2550637125968933, "learning_rate": 9.800561779158937e-06, "loss": 0.0049, "step": 33220 }, { "epoch": 0.5437290354250184, "grad_norm": 0.21127969026565552, "learning_rate": 9.800295515914044e-06, "loss": 0.0021, "step": 33230 }, { "epoch": 0.5438926613760943, "grad_norm": 0.27860361337661743, "learning_rate": 9.800029078669973e-06, "loss": 0.01, "step": 33240 }, { "epoch": 0.54405628732717, "grad_norm": 0.24480634927749634, "learning_rate": 9.799762467436384e-06, "loss": 0.0041, "step": 33250 }, { "epoch": 0.5442199132782459, "grad_norm": 0.13082528114318848, "learning_rate": 9.799495682222944e-06, "loss": 0.0047, "step": 33260 }, { "epoch": 0.5443835392293218, "grad_norm": 0.3724941611289978, "learning_rate": 9.79922872303932e-06, "loss": 0.0041, "step": 33270 }, { "epoch": 0.5445471651803976, "grad_norm": 0.37125328183174133, "learning_rate": 9.798961589895188e-06, "loss": 0.0032, "step": 33280 }, { "epoch": 0.5447107911314735, "grad_norm": 0.18387950956821442, "learning_rate": 9.798694282800234e-06, "loss": 0.0044, "step": 33290 }, { "epoch": 0.5448744170825492, "grad_norm": 0.47088825702667236, "learning_rate": 9.798426801764144e-06, "loss": 0.0041, "step": 33300 }, { "epoch": 0.5450380430336251, "grad_norm": 0.0341993011534214, "learning_rate": 9.798159146796617e-06, "loss": 0.0068, "step": 33310 }, { "epoch": 0.545201668984701, "grad_norm": 0.19534042477607727, "learning_rate": 9.797891317907352e-06, "loss": 0.0027, "step": 33320 }, { "epoch": 0.5453652949357768, "grad_norm": 0.3997628688812256, "learning_rate": 9.797623315106057e-06, "loss": 0.0038, "step": 33330 }, { "epoch": 0.5455289208868527, "grad_norm": 0.05273151770234108, "learning_rate": 9.79735513840245e-06, "loss": 0.0061, "step": 33340 }, { "epoch": 0.5456925468379284, "grad_norm": 0.3081619441509247, "learning_rate": 9.797086787806249e-06, "loss": 0.0032, "step": 33350 }, { "epoch": 0.5458561727890043, "grad_norm": 0.10779151320457458, "learning_rate": 9.79681826332718e-06, "loss": 0.0021, "step": 33360 }, { "epoch": 0.5460197987400802, "grad_norm": 0.22849293053150177, "learning_rate": 9.796549564974978e-06, "loss": 0.0042, "step": 33370 }, { "epoch": 0.546183424691156, "grad_norm": 0.0554744191467762, "learning_rate": 9.796280692759384e-06, "loss": 0.0042, "step": 33380 }, { "epoch": 0.5463470506422319, "grad_norm": 0.2788725197315216, "learning_rate": 9.79601164669014e-06, "loss": 0.0045, "step": 33390 }, { "epoch": 0.5465106765933077, "grad_norm": 0.1635172814130783, "learning_rate": 9.795742426777002e-06, "loss": 0.0048, "step": 33400 }, { "epoch": 0.5466743025443835, "grad_norm": 0.1375514566898346, "learning_rate": 9.795473033029727e-06, "loss": 0.0062, "step": 33410 }, { "epoch": 0.5468379284954594, "grad_norm": 0.10028968751430511, "learning_rate": 9.795203465458081e-06, "loss": 0.0036, "step": 33420 }, { "epoch": 0.5470015544465352, "grad_norm": 0.27354317903518677, "learning_rate": 9.794933724071834e-06, "loss": 0.0036, "step": 33430 }, { "epoch": 0.5471651803976111, "grad_norm": 0.32796114683151245, "learning_rate": 9.794663808880764e-06, "loss": 0.004, "step": 33440 }, { "epoch": 0.5473288063486869, "grad_norm": 0.4347842335700989, "learning_rate": 9.794393719894655e-06, "loss": 0.0035, "step": 33450 }, { "epoch": 0.5474924322997627, "grad_norm": 0.25115635991096497, "learning_rate": 9.794123457123296e-06, "loss": 0.0033, "step": 33460 }, { "epoch": 0.5476560582508386, "grad_norm": 0.23931430280208588, "learning_rate": 9.793853020576484e-06, "loss": 0.0024, "step": 33470 }, { "epoch": 0.5478196842019144, "grad_norm": 0.13950666785240173, "learning_rate": 9.793582410264023e-06, "loss": 0.0036, "step": 33480 }, { "epoch": 0.5479833101529903, "grad_norm": 0.3506321609020233, "learning_rate": 9.793311626195718e-06, "loss": 0.0042, "step": 33490 }, { "epoch": 0.5481469361040661, "grad_norm": 0.23200082778930664, "learning_rate": 9.793040668381388e-06, "loss": 0.0039, "step": 33500 }, { "epoch": 0.548310562055142, "grad_norm": 0.6095235347747803, "learning_rate": 9.792769536830855e-06, "loss": 0.0039, "step": 33510 }, { "epoch": 0.5484741880062178, "grad_norm": 0.32369744777679443, "learning_rate": 9.792498231553943e-06, "loss": 0.0032, "step": 33520 }, { "epoch": 0.5486378139572936, "grad_norm": 0.1714697927236557, "learning_rate": 9.792226752560492e-06, "loss": 0.0074, "step": 33530 }, { "epoch": 0.5488014399083695, "grad_norm": 0.15778622031211853, "learning_rate": 9.791955099860335e-06, "loss": 0.0027, "step": 33540 }, { "epoch": 0.5489650658594453, "grad_norm": 0.28746315836906433, "learning_rate": 9.791683273463327e-06, "loss": 0.0057, "step": 33550 }, { "epoch": 0.5491286918105212, "grad_norm": 0.3084624409675598, "learning_rate": 9.791411273379314e-06, "loss": 0.0034, "step": 33560 }, { "epoch": 0.549292317761597, "grad_norm": 0.3382299244403839, "learning_rate": 9.79113909961816e-06, "loss": 0.0039, "step": 33570 }, { "epoch": 0.5494559437126728, "grad_norm": 0.16015921533107758, "learning_rate": 9.790866752189725e-06, "loss": 0.0051, "step": 33580 }, { "epoch": 0.5496195696637487, "grad_norm": 0.2711232900619507, "learning_rate": 9.790594231103888e-06, "loss": 0.004, "step": 33590 }, { "epoch": 0.5497831956148245, "grad_norm": 0.5179532170295715, "learning_rate": 9.790321536370525e-06, "loss": 0.0039, "step": 33600 }, { "epoch": 0.5499468215659004, "grad_norm": 0.12061049789190292, "learning_rate": 9.790048667999517e-06, "loss": 0.0038, "step": 33610 }, { "epoch": 0.5501104475169762, "grad_norm": 0.3108615577220917, "learning_rate": 9.789775626000757e-06, "loss": 0.0029, "step": 33620 }, { "epoch": 0.550274073468052, "grad_norm": 0.26426491141319275, "learning_rate": 9.789502410384145e-06, "loss": 0.0056, "step": 33630 }, { "epoch": 0.5504376994191279, "grad_norm": 0.04900655150413513, "learning_rate": 9.789229021159581e-06, "loss": 0.0035, "step": 33640 }, { "epoch": 0.5506013253702037, "grad_norm": 0.16743774712085724, "learning_rate": 9.788955458336976e-06, "loss": 0.0048, "step": 33650 }, { "epoch": 0.5507649513212796, "grad_norm": 0.5871763229370117, "learning_rate": 9.788681721926243e-06, "loss": 0.0046, "step": 33660 }, { "epoch": 0.5509285772723554, "grad_norm": 0.3706881105899811, "learning_rate": 9.788407811937311e-06, "loss": 0.0032, "step": 33670 }, { "epoch": 0.5510922032234312, "grad_norm": 0.2674044668674469, "learning_rate": 9.7881337283801e-06, "loss": 0.0036, "step": 33680 }, { "epoch": 0.5512558291745071, "grad_norm": 0.19376537203788757, "learning_rate": 9.787859471264552e-06, "loss": 0.0035, "step": 33690 }, { "epoch": 0.5514194551255829, "grad_norm": 0.16932715475559235, "learning_rate": 9.787585040600605e-06, "loss": 0.0043, "step": 33700 }, { "epoch": 0.5515830810766588, "grad_norm": 0.7847943305969238, "learning_rate": 9.787310436398208e-06, "loss": 0.0036, "step": 33710 }, { "epoch": 0.5517467070277347, "grad_norm": 0.3932487368583679, "learning_rate": 9.787035658667313e-06, "loss": 0.0031, "step": 33720 }, { "epoch": 0.5519103329788104, "grad_norm": 0.14990553259849548, "learning_rate": 9.78676070741788e-06, "loss": 0.0035, "step": 33730 }, { "epoch": 0.5520739589298863, "grad_norm": 0.6247564554214478, "learning_rate": 9.786485582659876e-06, "loss": 0.0076, "step": 33740 }, { "epoch": 0.5522375848809621, "grad_norm": 0.09049864858388901, "learning_rate": 9.786210284403274e-06, "loss": 0.0032, "step": 33750 }, { "epoch": 0.552401210832038, "grad_norm": 0.07898964732885361, "learning_rate": 9.785934812658053e-06, "loss": 0.0033, "step": 33760 }, { "epoch": 0.5525648367831139, "grad_norm": 0.059457868337631226, "learning_rate": 9.785659167434197e-06, "loss": 0.0061, "step": 33770 }, { "epoch": 0.5527284627341896, "grad_norm": 0.12354923039674759, "learning_rate": 9.785383348741699e-06, "loss": 0.0024, "step": 33780 }, { "epoch": 0.5528920886852655, "grad_norm": 0.41873377561569214, "learning_rate": 9.785107356590555e-06, "loss": 0.0029, "step": 33790 }, { "epoch": 0.5530557146363413, "grad_norm": 0.33212020993232727, "learning_rate": 9.78483119099077e-06, "loss": 0.0053, "step": 33800 }, { "epoch": 0.5532193405874172, "grad_norm": 0.10025341063737869, "learning_rate": 9.784554851952354e-06, "loss": 0.0041, "step": 33810 }, { "epoch": 0.5533829665384931, "grad_norm": 0.36780327558517456, "learning_rate": 9.784278339485327e-06, "loss": 0.0052, "step": 33820 }, { "epoch": 0.5535465924895688, "grad_norm": 0.19688554108142853, "learning_rate": 9.784001653599707e-06, "loss": 0.0029, "step": 33830 }, { "epoch": 0.5537102184406447, "grad_norm": 0.23417767882347107, "learning_rate": 9.783724794305524e-06, "loss": 0.0022, "step": 33840 }, { "epoch": 0.5538738443917205, "grad_norm": 0.4738108217716217, "learning_rate": 9.783447761612816e-06, "loss": 0.0062, "step": 33850 }, { "epoch": 0.5540374703427964, "grad_norm": 0.07847896963357925, "learning_rate": 9.783170555531623e-06, "loss": 0.0033, "step": 33860 }, { "epoch": 0.5542010962938722, "grad_norm": 0.20875278115272522, "learning_rate": 9.782893176071993e-06, "loss": 0.0042, "step": 33870 }, { "epoch": 0.554364722244948, "grad_norm": 0.388008713722229, "learning_rate": 9.78261562324398e-06, "loss": 0.0058, "step": 33880 }, { "epoch": 0.5545283481960239, "grad_norm": 0.20827463269233704, "learning_rate": 9.782337897057648e-06, "loss": 0.0038, "step": 33890 }, { "epoch": 0.5546919741470997, "grad_norm": 0.22781474888324738, "learning_rate": 9.782059997523058e-06, "loss": 0.0032, "step": 33900 }, { "epoch": 0.5548556000981756, "grad_norm": 0.21710669994354248, "learning_rate": 9.78178192465029e-06, "loss": 0.0044, "step": 33910 }, { "epoch": 0.5550192260492514, "grad_norm": 0.23331332206726074, "learning_rate": 9.781503678449417e-06, "loss": 0.0037, "step": 33920 }, { "epoch": 0.5551828520003272, "grad_norm": 0.3796658217906952, "learning_rate": 9.781225258930529e-06, "loss": 0.0052, "step": 33930 }, { "epoch": 0.5553464779514031, "grad_norm": 0.2554663121700287, "learning_rate": 9.780946666103717e-06, "loss": 0.0042, "step": 33940 }, { "epoch": 0.5555101039024789, "grad_norm": 0.06200270354747772, "learning_rate": 9.780667899979077e-06, "loss": 0.0057, "step": 33950 }, { "epoch": 0.5556737298535548, "grad_norm": 0.37055638432502747, "learning_rate": 9.780388960566719e-06, "loss": 0.004, "step": 33960 }, { "epoch": 0.5558373558046306, "grad_norm": 0.10813041776418686, "learning_rate": 9.780109847876749e-06, "loss": 0.0049, "step": 33970 }, { "epoch": 0.5560009817557064, "grad_norm": 0.20948156714439392, "learning_rate": 9.779830561919285e-06, "loss": 0.003, "step": 33980 }, { "epoch": 0.5561646077067823, "grad_norm": 0.3225252032279968, "learning_rate": 9.77955110270445e-06, "loss": 0.0052, "step": 33990 }, { "epoch": 0.5563282336578581, "grad_norm": 0.13214503228664398, "learning_rate": 9.779271470242377e-06, "loss": 0.0046, "step": 34000 }, { "epoch": 0.556491859608934, "grad_norm": 0.23867765069007874, "learning_rate": 9.7789916645432e-06, "loss": 0.0039, "step": 34010 }, { "epoch": 0.5566554855600098, "grad_norm": 0.10316136479377747, "learning_rate": 9.77871168561706e-06, "loss": 0.0032, "step": 34020 }, { "epoch": 0.5568191115110857, "grad_norm": 0.07306062430143356, "learning_rate": 9.778431533474107e-06, "loss": 0.0044, "step": 34030 }, { "epoch": 0.5569827374621615, "grad_norm": 0.1291823536157608, "learning_rate": 9.778151208124496e-06, "loss": 0.0038, "step": 34040 }, { "epoch": 0.5571463634132373, "grad_norm": 0.19382648169994354, "learning_rate": 9.777870709578388e-06, "loss": 0.0043, "step": 34050 }, { "epoch": 0.5573099893643132, "grad_norm": 0.20677529275417328, "learning_rate": 9.777590037845949e-06, "loss": 0.003, "step": 34060 }, { "epoch": 0.557473615315389, "grad_norm": 0.2174794226884842, "learning_rate": 9.777309192937356e-06, "loss": 0.0037, "step": 34070 }, { "epoch": 0.5576372412664649, "grad_norm": 0.27535009384155273, "learning_rate": 9.777028174862784e-06, "loss": 0.005, "step": 34080 }, { "epoch": 0.5578008672175407, "grad_norm": 0.24558651447296143, "learning_rate": 9.776746983632424e-06, "loss": 0.0049, "step": 34090 }, { "epoch": 0.5579644931686165, "grad_norm": 0.17967216670513153, "learning_rate": 9.776465619256465e-06, "loss": 0.0046, "step": 34100 }, { "epoch": 0.5581281191196924, "grad_norm": 0.12029209733009338, "learning_rate": 9.776184081745108e-06, "loss": 0.0043, "step": 34110 }, { "epoch": 0.5582917450707682, "grad_norm": 0.0979442149400711, "learning_rate": 9.775902371108556e-06, "loss": 0.0051, "step": 34120 }, { "epoch": 0.5584553710218441, "grad_norm": 0.16064612567424774, "learning_rate": 9.775620487357023e-06, "loss": 0.0029, "step": 34130 }, { "epoch": 0.55861899697292, "grad_norm": 0.2996758222579956, "learning_rate": 9.775338430500724e-06, "loss": 0.0057, "step": 34140 }, { "epoch": 0.5587826229239957, "grad_norm": 0.05495762452483177, "learning_rate": 9.775056200549885e-06, "loss": 0.0029, "step": 34150 }, { "epoch": 0.5589462488750716, "grad_norm": 0.3233628571033478, "learning_rate": 9.774773797514737e-06, "loss": 0.0033, "step": 34160 }, { "epoch": 0.5591098748261474, "grad_norm": 0.08589668571949005, "learning_rate": 9.774491221405512e-06, "loss": 0.0027, "step": 34170 }, { "epoch": 0.5592735007772233, "grad_norm": 0.31055769324302673, "learning_rate": 9.774208472232456e-06, "loss": 0.0045, "step": 34180 }, { "epoch": 0.5594371267282992, "grad_norm": 0.042775653302669525, "learning_rate": 9.773925550005818e-06, "loss": 0.004, "step": 34190 }, { "epoch": 0.5596007526793749, "grad_norm": 0.07803049683570862, "learning_rate": 9.773642454735852e-06, "loss": 0.0047, "step": 34200 }, { "epoch": 0.5597643786304508, "grad_norm": 0.24542154371738434, "learning_rate": 9.773359186432822e-06, "loss": 0.0042, "step": 34210 }, { "epoch": 0.5599280045815266, "grad_norm": 0.07078225910663605, "learning_rate": 9.773075745106992e-06, "loss": 0.0031, "step": 34220 }, { "epoch": 0.5600916305326025, "grad_norm": 0.08734782785177231, "learning_rate": 9.772792130768638e-06, "loss": 0.0025, "step": 34230 }, { "epoch": 0.5602552564836784, "grad_norm": 0.19833236932754517, "learning_rate": 9.772508343428042e-06, "loss": 0.0041, "step": 34240 }, { "epoch": 0.5604188824347541, "grad_norm": 0.09172920882701874, "learning_rate": 9.772224383095487e-06, "loss": 0.0021, "step": 34250 }, { "epoch": 0.56058250838583, "grad_norm": 0.2891700565814972, "learning_rate": 9.77194024978127e-06, "loss": 0.0032, "step": 34260 }, { "epoch": 0.5607461343369058, "grad_norm": 0.10319384932518005, "learning_rate": 9.771655943495685e-06, "loss": 0.004, "step": 34270 }, { "epoch": 0.5609097602879817, "grad_norm": 0.34937310218811035, "learning_rate": 9.771371464249043e-06, "loss": 0.0042, "step": 34280 }, { "epoch": 0.5610733862390576, "grad_norm": 0.07920363545417786, "learning_rate": 9.771086812051652e-06, "loss": 0.0047, "step": 34290 }, { "epoch": 0.5612370121901333, "grad_norm": 0.39690378308296204, "learning_rate": 9.77080198691383e-06, "loss": 0.0049, "step": 34300 }, { "epoch": 0.5614006381412092, "grad_norm": 0.4145283102989197, "learning_rate": 9.770516988845906e-06, "loss": 0.0038, "step": 34310 }, { "epoch": 0.561564264092285, "grad_norm": 0.31935012340545654, "learning_rate": 9.770231817858205e-06, "loss": 0.0054, "step": 34320 }, { "epoch": 0.5617278900433609, "grad_norm": 0.15822888910770416, "learning_rate": 9.769946473961064e-06, "loss": 0.0042, "step": 34330 }, { "epoch": 0.5618915159944368, "grad_norm": 0.2548803389072418, "learning_rate": 9.769660957164828e-06, "loss": 0.0059, "step": 34340 }, { "epoch": 0.5620551419455125, "grad_norm": 0.25123193860054016, "learning_rate": 9.769375267479847e-06, "loss": 0.005, "step": 34350 }, { "epoch": 0.5622187678965884, "grad_norm": 0.2755795121192932, "learning_rate": 9.769089404916475e-06, "loss": 0.0041, "step": 34360 }, { "epoch": 0.5623823938476642, "grad_norm": 0.08042644709348679, "learning_rate": 9.768803369485074e-06, "loss": 0.0025, "step": 34370 }, { "epoch": 0.5625460197987401, "grad_norm": 0.4067413806915283, "learning_rate": 9.768517161196012e-06, "loss": 0.0057, "step": 34380 }, { "epoch": 0.562709645749816, "grad_norm": 0.32594916224479675, "learning_rate": 9.768230780059664e-06, "loss": 0.0073, "step": 34390 }, { "epoch": 0.5628732717008917, "grad_norm": 0.11061684042215347, "learning_rate": 9.767944226086413e-06, "loss": 0.0024, "step": 34400 }, { "epoch": 0.5630368976519676, "grad_norm": 0.26365336775779724, "learning_rate": 9.76765749928664e-06, "loss": 0.0024, "step": 34410 }, { "epoch": 0.5632005236030434, "grad_norm": 0.1506739854812622, "learning_rate": 9.767370599670743e-06, "loss": 0.0042, "step": 34420 }, { "epoch": 0.5633641495541193, "grad_norm": 0.19781681895256042, "learning_rate": 9.767083527249118e-06, "loss": 0.0035, "step": 34430 }, { "epoch": 0.5635277755051952, "grad_norm": 0.14687316119670868, "learning_rate": 9.766796282032173e-06, "loss": 0.0037, "step": 34440 }, { "epoch": 0.563691401456271, "grad_norm": 0.1054859310388565, "learning_rate": 9.76650886403032e-06, "loss": 0.0033, "step": 34450 }, { "epoch": 0.5638550274073468, "grad_norm": 0.206525519490242, "learning_rate": 9.766221273253978e-06, "loss": 0.0063, "step": 34460 }, { "epoch": 0.5640186533584226, "grad_norm": 0.16110996901988983, "learning_rate": 9.76593350971357e-06, "loss": 0.0038, "step": 34470 }, { "epoch": 0.5641822793094985, "grad_norm": 0.2761392891407013, "learning_rate": 9.765645573419525e-06, "loss": 0.004, "step": 34480 }, { "epoch": 0.5643459052605744, "grad_norm": 0.2078597992658615, "learning_rate": 9.765357464382282e-06, "loss": 0.0058, "step": 34490 }, { "epoch": 0.5645095312116502, "grad_norm": 0.32887399196624756, "learning_rate": 9.765069182612286e-06, "loss": 0.0049, "step": 34500 }, { "epoch": 0.564673157162726, "grad_norm": 0.2882753014564514, "learning_rate": 9.764780728119983e-06, "loss": 0.003, "step": 34510 }, { "epoch": 0.5648367831138018, "grad_norm": 0.17291636765003204, "learning_rate": 9.764492100915832e-06, "loss": 0.0041, "step": 34520 }, { "epoch": 0.5650004090648777, "grad_norm": 0.2206551879644394, "learning_rate": 9.76420330101029e-06, "loss": 0.0053, "step": 34530 }, { "epoch": 0.5651640350159536, "grad_norm": 0.07411439716815948, "learning_rate": 9.763914328413833e-06, "loss": 0.0032, "step": 34540 }, { "epoch": 0.5653276609670294, "grad_norm": 0.03896886855363846, "learning_rate": 9.76362518313693e-06, "loss": 0.0034, "step": 34550 }, { "epoch": 0.5654912869181052, "grad_norm": 0.17319238185882568, "learning_rate": 9.763335865190062e-06, "loss": 0.0045, "step": 34560 }, { "epoch": 0.565654912869181, "grad_norm": 0.4278779923915863, "learning_rate": 9.76304637458372e-06, "loss": 0.0052, "step": 34570 }, { "epoch": 0.5658185388202569, "grad_norm": 0.20016364753246307, "learning_rate": 9.762756711328391e-06, "loss": 0.0044, "step": 34580 }, { "epoch": 0.5659821647713328, "grad_norm": 0.20062294602394104, "learning_rate": 9.762466875434579e-06, "loss": 0.0033, "step": 34590 }, { "epoch": 0.5661457907224086, "grad_norm": 0.3652080297470093, "learning_rate": 9.76217686691279e-06, "loss": 0.0042, "step": 34600 }, { "epoch": 0.5663094166734844, "grad_norm": 0.22696973383426666, "learning_rate": 9.761886685773536e-06, "loss": 0.0034, "step": 34610 }, { "epoch": 0.5664730426245602, "grad_norm": 0.11855337023735046, "learning_rate": 9.761596332027334e-06, "loss": 0.0065, "step": 34620 }, { "epoch": 0.5666366685756361, "grad_norm": 0.09045524150133133, "learning_rate": 9.761305805684708e-06, "loss": 0.0018, "step": 34630 }, { "epoch": 0.566800294526712, "grad_norm": 0.31754690408706665, "learning_rate": 9.761015106756192e-06, "loss": 0.0053, "step": 34640 }, { "epoch": 0.5669639204777878, "grad_norm": 0.10781533271074295, "learning_rate": 9.76072423525232e-06, "loss": 0.003, "step": 34650 }, { "epoch": 0.5671275464288636, "grad_norm": 0.07485975325107574, "learning_rate": 9.760433191183636e-06, "loss": 0.0037, "step": 34660 }, { "epoch": 0.5672911723799394, "grad_norm": 0.36674466729164124, "learning_rate": 9.760141974560692e-06, "loss": 0.0055, "step": 34670 }, { "epoch": 0.5674547983310153, "grad_norm": 0.2253779172897339, "learning_rate": 9.75985058539404e-06, "loss": 0.0047, "step": 34680 }, { "epoch": 0.5676184242820912, "grad_norm": 0.21675962209701538, "learning_rate": 9.759559023694246e-06, "loss": 0.0039, "step": 34690 }, { "epoch": 0.567782050233167, "grad_norm": 0.23162716627120972, "learning_rate": 9.759267289471876e-06, "loss": 0.0032, "step": 34700 }, { "epoch": 0.5679456761842429, "grad_norm": 0.19693949818611145, "learning_rate": 9.758975382737505e-06, "loss": 0.0035, "step": 34710 }, { "epoch": 0.5681093021353186, "grad_norm": 0.3346758186817169, "learning_rate": 9.758683303501714e-06, "loss": 0.0032, "step": 34720 }, { "epoch": 0.5682729280863945, "grad_norm": 0.18781304359436035, "learning_rate": 9.758391051775092e-06, "loss": 0.0029, "step": 34730 }, { "epoch": 0.5684365540374703, "grad_norm": 0.08256262540817261, "learning_rate": 9.75809862756823e-06, "loss": 0.0045, "step": 34740 }, { "epoch": 0.5686001799885462, "grad_norm": 0.26031774282455444, "learning_rate": 9.757806030891728e-06, "loss": 0.0026, "step": 34750 }, { "epoch": 0.5687638059396221, "grad_norm": 0.18769051134586334, "learning_rate": 9.757513261756193e-06, "loss": 0.0037, "step": 34760 }, { "epoch": 0.5689274318906978, "grad_norm": 0.16542164981365204, "learning_rate": 9.757220320172237e-06, "loss": 0.0054, "step": 34770 }, { "epoch": 0.5690910578417737, "grad_norm": 0.2837505638599396, "learning_rate": 9.756927206150477e-06, "loss": 0.0061, "step": 34780 }, { "epoch": 0.5692546837928495, "grad_norm": 0.22357076406478882, "learning_rate": 9.75663391970154e-06, "loss": 0.0055, "step": 34790 }, { "epoch": 0.5694183097439254, "grad_norm": 0.2976042330265045, "learning_rate": 9.756340460836055e-06, "loss": 0.0052, "step": 34800 }, { "epoch": 0.5695819356950013, "grad_norm": 0.5176551342010498, "learning_rate": 9.75604682956466e-06, "loss": 0.0049, "step": 34810 }, { "epoch": 0.569745561646077, "grad_norm": 0.22788123786449432, "learning_rate": 9.755753025897999e-06, "loss": 0.0041, "step": 34820 }, { "epoch": 0.5699091875971529, "grad_norm": 0.3112598955631256, "learning_rate": 9.75545904984672e-06, "loss": 0.0047, "step": 34830 }, { "epoch": 0.5700728135482287, "grad_norm": 0.30748066306114197, "learning_rate": 9.75516490142148e-06, "loss": 0.0043, "step": 34840 }, { "epoch": 0.5702364394993046, "grad_norm": 0.27794697880744934, "learning_rate": 9.75487058063294e-06, "loss": 0.0026, "step": 34850 }, { "epoch": 0.5704000654503805, "grad_norm": 0.23995459079742432, "learning_rate": 9.754576087491772e-06, "loss": 0.0026, "step": 34860 }, { "epoch": 0.5705636914014562, "grad_norm": 0.37012141942977905, "learning_rate": 9.754281422008647e-06, "loss": 0.0069, "step": 34870 }, { "epoch": 0.5707273173525321, "grad_norm": 0.20414775609970093, "learning_rate": 9.753986584194246e-06, "loss": 0.0055, "step": 34880 }, { "epoch": 0.5708909433036079, "grad_norm": 0.17128324508666992, "learning_rate": 9.753691574059258e-06, "loss": 0.0041, "step": 34890 }, { "epoch": 0.5710545692546838, "grad_norm": 0.2541649639606476, "learning_rate": 9.753396391614377e-06, "loss": 0.0062, "step": 34900 }, { "epoch": 0.5712181952057597, "grad_norm": 0.04486648738384247, "learning_rate": 9.753101036870298e-06, "loss": 0.0046, "step": 34910 }, { "epoch": 0.5713818211568354, "grad_norm": 0.14260824024677277, "learning_rate": 9.752805509837733e-06, "loss": 0.0022, "step": 34920 }, { "epoch": 0.5715454471079113, "grad_norm": 0.2230953574180603, "learning_rate": 9.75250981052739e-06, "loss": 0.004, "step": 34930 }, { "epoch": 0.5717090730589871, "grad_norm": 0.49457302689552307, "learning_rate": 9.75221393894999e-06, "loss": 0.006, "step": 34940 }, { "epoch": 0.571872699010063, "grad_norm": 0.23775379359722137, "learning_rate": 9.751917895116255e-06, "loss": 0.0025, "step": 34950 }, { "epoch": 0.5720363249611389, "grad_norm": 0.14535000920295715, "learning_rate": 9.751621679036917e-06, "loss": 0.0041, "step": 34960 }, { "epoch": 0.5721999509122146, "grad_norm": 0.17630788683891296, "learning_rate": 9.751325290722715e-06, "loss": 0.0052, "step": 34970 }, { "epoch": 0.5723635768632905, "grad_norm": 0.1856747269630432, "learning_rate": 9.751028730184392e-06, "loss": 0.0035, "step": 34980 }, { "epoch": 0.5725272028143663, "grad_norm": 0.1989014744758606, "learning_rate": 9.750731997432691e-06, "loss": 0.0034, "step": 34990 }, { "epoch": 0.5726908287654422, "grad_norm": 0.269896924495697, "learning_rate": 9.75043509247838e-06, "loss": 0.0061, "step": 35000 }, { "epoch": 0.5728544547165181, "grad_norm": 0.09226461499929428, "learning_rate": 9.750138015332208e-06, "loss": 0.0036, "step": 35010 }, { "epoch": 0.5730180806675939, "grad_norm": 0.2072274088859558, "learning_rate": 9.749840766004953e-06, "loss": 0.0049, "step": 35020 }, { "epoch": 0.5731817066186697, "grad_norm": 0.013144253753125668, "learning_rate": 9.749543344507385e-06, "loss": 0.0029, "step": 35030 }, { "epoch": 0.5733453325697455, "grad_norm": 0.1586284339427948, "learning_rate": 9.749245750850287e-06, "loss": 0.0031, "step": 35040 }, { "epoch": 0.5735089585208214, "grad_norm": 0.11746500432491302, "learning_rate": 9.748947985044443e-06, "loss": 0.0037, "step": 35050 }, { "epoch": 0.5736725844718973, "grad_norm": 0.22508607804775238, "learning_rate": 9.748650047100651e-06, "loss": 0.0034, "step": 35060 }, { "epoch": 0.5738362104229731, "grad_norm": 0.2871067523956299, "learning_rate": 9.748351937029707e-06, "loss": 0.0052, "step": 35070 }, { "epoch": 0.5739998363740489, "grad_norm": 0.03430821746587753, "learning_rate": 9.748053654842417e-06, "loss": 0.003, "step": 35080 }, { "epoch": 0.5741634623251247, "grad_norm": 0.35699984431266785, "learning_rate": 9.747755200549594e-06, "loss": 0.0056, "step": 35090 }, { "epoch": 0.5743270882762006, "grad_norm": 0.18932944536209106, "learning_rate": 9.747456574162055e-06, "loss": 0.0048, "step": 35100 }, { "epoch": 0.5744907142272765, "grad_norm": 0.14558500051498413, "learning_rate": 9.747157775690628e-06, "loss": 0.004, "step": 35110 }, { "epoch": 0.5746543401783523, "grad_norm": 0.2817893326282501, "learning_rate": 9.74685880514614e-06, "loss": 0.004, "step": 35120 }, { "epoch": 0.5748179661294281, "grad_norm": 0.4181196987628937, "learning_rate": 9.746559662539427e-06, "loss": 0.0029, "step": 35130 }, { "epoch": 0.5749815920805039, "grad_norm": 0.5082183480262756, "learning_rate": 9.746260347881335e-06, "loss": 0.0035, "step": 35140 }, { "epoch": 0.5751452180315798, "grad_norm": 0.3064762055873871, "learning_rate": 9.745960861182716e-06, "loss": 0.0034, "step": 35150 }, { "epoch": 0.5753088439826557, "grad_norm": 0.514564037322998, "learning_rate": 9.745661202454419e-06, "loss": 0.0035, "step": 35160 }, { "epoch": 0.5754724699337315, "grad_norm": 0.5590274333953857, "learning_rate": 9.745361371707309e-06, "loss": 0.0041, "step": 35170 }, { "epoch": 0.5756360958848074, "grad_norm": 0.17944744229316711, "learning_rate": 9.745061368952257e-06, "loss": 0.002, "step": 35180 }, { "epoch": 0.5757997218358831, "grad_norm": 0.26431846618652344, "learning_rate": 9.744761194200133e-06, "loss": 0.007, "step": 35190 }, { "epoch": 0.575963347786959, "grad_norm": 0.22643274068832397, "learning_rate": 9.74446084746182e-06, "loss": 0.004, "step": 35200 }, { "epoch": 0.5761269737380349, "grad_norm": 0.431440144777298, "learning_rate": 9.744160328748204e-06, "loss": 0.0069, "step": 35210 }, { "epoch": 0.5762905996891107, "grad_norm": 0.16164946556091309, "learning_rate": 9.74385963807018e-06, "loss": 0.0039, "step": 35220 }, { "epoch": 0.5764542256401866, "grad_norm": 0.14265932142734528, "learning_rate": 9.743558775438644e-06, "loss": 0.0061, "step": 35230 }, { "epoch": 0.5766178515912623, "grad_norm": 0.17127510905265808, "learning_rate": 9.743257740864502e-06, "loss": 0.0039, "step": 35240 }, { "epoch": 0.5767814775423382, "grad_norm": 0.10949498414993286, "learning_rate": 9.742956534358669e-06, "loss": 0.0041, "step": 35250 }, { "epoch": 0.5769451034934141, "grad_norm": 0.3070734441280365, "learning_rate": 9.742655155932061e-06, "loss": 0.0027, "step": 35260 }, { "epoch": 0.5771087294444899, "grad_norm": 0.408394455909729, "learning_rate": 9.742353605595602e-06, "loss": 0.0028, "step": 35270 }, { "epoch": 0.5772723553955658, "grad_norm": 0.391413152217865, "learning_rate": 9.742051883360222e-06, "loss": 0.0031, "step": 35280 }, { "epoch": 0.5774359813466415, "grad_norm": 0.015228900127112865, "learning_rate": 9.74174998923686e-06, "loss": 0.0034, "step": 35290 }, { "epoch": 0.5775996072977174, "grad_norm": 0.36725103855133057, "learning_rate": 9.741447923236456e-06, "loss": 0.0023, "step": 35300 }, { "epoch": 0.5777632332487933, "grad_norm": 0.3825167417526245, "learning_rate": 9.741145685369962e-06, "loss": 0.004, "step": 35310 }, { "epoch": 0.5779268591998691, "grad_norm": 0.41490596532821655, "learning_rate": 9.74084327564833e-06, "loss": 0.0045, "step": 35320 }, { "epoch": 0.578090485150945, "grad_norm": 0.3690876066684723, "learning_rate": 9.740540694082524e-06, "loss": 0.0044, "step": 35330 }, { "epoch": 0.5782541111020207, "grad_norm": 0.19393190741539001, "learning_rate": 9.740237940683512e-06, "loss": 0.0053, "step": 35340 }, { "epoch": 0.5784177370530966, "grad_norm": 0.08227064460515976, "learning_rate": 9.739935015462267e-06, "loss": 0.0051, "step": 35350 }, { "epoch": 0.5785813630041725, "grad_norm": 0.33437106013298035, "learning_rate": 9.73963191842977e-06, "loss": 0.0036, "step": 35360 }, { "epoch": 0.5787449889552483, "grad_norm": 0.3459462821483612, "learning_rate": 9.73932864959701e-06, "loss": 0.005, "step": 35370 }, { "epoch": 0.5789086149063242, "grad_norm": 0.5705816149711609, "learning_rate": 9.739025208974974e-06, "loss": 0.0049, "step": 35380 }, { "epoch": 0.5790722408574, "grad_norm": 0.1507769674062729, "learning_rate": 9.738721596574666e-06, "loss": 0.0055, "step": 35390 }, { "epoch": 0.5792358668084758, "grad_norm": 0.33719950914382935, "learning_rate": 9.738417812407085e-06, "loss": 0.0065, "step": 35400 }, { "epoch": 0.5793994927595517, "grad_norm": 0.2390710860490799, "learning_rate": 9.738113856483251e-06, "loss": 0.0062, "step": 35410 }, { "epoch": 0.5795631187106275, "grad_norm": 0.027272971346974373, "learning_rate": 9.737809728814178e-06, "loss": 0.0042, "step": 35420 }, { "epoch": 0.5797267446617034, "grad_norm": 0.3524695038795471, "learning_rate": 9.737505429410888e-06, "loss": 0.0032, "step": 35430 }, { "epoch": 0.5798903706127791, "grad_norm": 0.2170034945011139, "learning_rate": 9.737200958284412e-06, "loss": 0.0048, "step": 35440 }, { "epoch": 0.580053996563855, "grad_norm": 0.38364577293395996, "learning_rate": 9.736896315445787e-06, "loss": 0.0043, "step": 35450 }, { "epoch": 0.5802176225149309, "grad_norm": 0.28469038009643555, "learning_rate": 9.736591500906055e-06, "loss": 0.0039, "step": 35460 }, { "epoch": 0.5803812484660067, "grad_norm": 0.09250683337450027, "learning_rate": 9.736286514676265e-06, "loss": 0.0042, "step": 35470 }, { "epoch": 0.5805448744170826, "grad_norm": 0.1815451979637146, "learning_rate": 9.735981356767475e-06, "loss": 0.0039, "step": 35480 }, { "epoch": 0.5807085003681584, "grad_norm": 0.28491681814193726, "learning_rate": 9.73567602719074e-06, "loss": 0.0039, "step": 35490 }, { "epoch": 0.5808721263192342, "grad_norm": 0.10905561596155167, "learning_rate": 9.735370525957133e-06, "loss": 0.0028, "step": 35500 }, { "epoch": 0.5810357522703101, "grad_norm": 0.06161411479115486, "learning_rate": 9.735064853077724e-06, "loss": 0.0033, "step": 35510 }, { "epoch": 0.5811993782213859, "grad_norm": 0.21105897426605225, "learning_rate": 9.734759008563596e-06, "loss": 0.0041, "step": 35520 }, { "epoch": 0.5813630041724618, "grad_norm": 0.058447372168302536, "learning_rate": 9.734452992425834e-06, "loss": 0.0042, "step": 35530 }, { "epoch": 0.5815266301235376, "grad_norm": 0.21096155047416687, "learning_rate": 9.734146804675528e-06, "loss": 0.0035, "step": 35540 }, { "epoch": 0.5816902560746134, "grad_norm": 0.09213443100452423, "learning_rate": 9.733840445323778e-06, "loss": 0.0037, "step": 35550 }, { "epoch": 0.5818538820256893, "grad_norm": 0.20286403596401215, "learning_rate": 9.733533914381692e-06, "loss": 0.0032, "step": 35560 }, { "epoch": 0.5820175079767651, "grad_norm": 0.0792188048362732, "learning_rate": 9.733227211860378e-06, "loss": 0.0044, "step": 35570 }, { "epoch": 0.582181133927841, "grad_norm": 0.09005182236433029, "learning_rate": 9.732920337770953e-06, "loss": 0.005, "step": 35580 }, { "epoch": 0.5823447598789168, "grad_norm": 0.18099474906921387, "learning_rate": 9.73261329212454e-06, "loss": 0.0037, "step": 35590 }, { "epoch": 0.5825083858299926, "grad_norm": 0.23028463125228882, "learning_rate": 9.732306074932271e-06, "loss": 0.0058, "step": 35600 }, { "epoch": 0.5826720117810685, "grad_norm": 0.1142086535692215, "learning_rate": 9.731998686205279e-06, "loss": 0.0042, "step": 35610 }, { "epoch": 0.5828356377321443, "grad_norm": 0.26584315299987793, "learning_rate": 9.73169112595471e-06, "loss": 0.0041, "step": 35620 }, { "epoch": 0.5829992636832202, "grad_norm": 0.19755353033542633, "learning_rate": 9.731383394191708e-06, "loss": 0.0062, "step": 35630 }, { "epoch": 0.583162889634296, "grad_norm": 0.49182742834091187, "learning_rate": 9.73107549092743e-06, "loss": 0.0057, "step": 35640 }, { "epoch": 0.5833265155853719, "grad_norm": 0.272129088640213, "learning_rate": 9.730767416173038e-06, "loss": 0.0051, "step": 35650 }, { "epoch": 0.5834901415364476, "grad_norm": 0.3390112519264221, "learning_rate": 9.730459169939697e-06, "loss": 0.0051, "step": 35660 }, { "epoch": 0.5836537674875235, "grad_norm": 0.048624761402606964, "learning_rate": 9.730150752238578e-06, "loss": 0.0032, "step": 35670 }, { "epoch": 0.5838173934385994, "grad_norm": 0.1803896576166153, "learning_rate": 9.729842163080866e-06, "loss": 0.003, "step": 35680 }, { "epoch": 0.5839810193896752, "grad_norm": 0.21148480474948883, "learning_rate": 9.72953340247774e-06, "loss": 0.004, "step": 35690 }, { "epoch": 0.5841446453407511, "grad_norm": 0.08211598545312881, "learning_rate": 9.729224470440397e-06, "loss": 0.0014, "step": 35700 }, { "epoch": 0.5843082712918268, "grad_norm": 0.24902889132499695, "learning_rate": 9.728915366980033e-06, "loss": 0.0048, "step": 35710 }, { "epoch": 0.5844718972429027, "grad_norm": 0.4118281304836273, "learning_rate": 9.728606092107854e-06, "loss": 0.0039, "step": 35720 }, { "epoch": 0.5846355231939786, "grad_norm": 0.1850242018699646, "learning_rate": 9.728296645835069e-06, "loss": 0.0036, "step": 35730 }, { "epoch": 0.5847991491450544, "grad_norm": 0.10127825289964676, "learning_rate": 9.727987028172892e-06, "loss": 0.0038, "step": 35740 }, { "epoch": 0.5849627750961303, "grad_norm": 0.24307718873023987, "learning_rate": 9.727677239132552e-06, "loss": 0.0039, "step": 35750 }, { "epoch": 0.585126401047206, "grad_norm": 0.32504841685295105, "learning_rate": 9.727367278725272e-06, "loss": 0.0091, "step": 35760 }, { "epoch": 0.5852900269982819, "grad_norm": 0.07922003418207169, "learning_rate": 9.727057146962291e-06, "loss": 0.0052, "step": 35770 }, { "epoch": 0.5854536529493578, "grad_norm": 0.39402493834495544, "learning_rate": 9.726746843854851e-06, "loss": 0.0035, "step": 35780 }, { "epoch": 0.5856172789004336, "grad_norm": 0.1367437243461609, "learning_rate": 9.726436369414196e-06, "loss": 0.0037, "step": 35790 }, { "epoch": 0.5857809048515095, "grad_norm": 0.15938787162303925, "learning_rate": 9.726125723651584e-06, "loss": 0.0046, "step": 35800 }, { "epoch": 0.5859445308025852, "grad_norm": 0.12762023508548737, "learning_rate": 9.725814906578272e-06, "loss": 0.0032, "step": 35810 }, { "epoch": 0.5861081567536611, "grad_norm": 0.2957753837108612, "learning_rate": 9.72550391820553e-06, "loss": 0.0041, "step": 35820 }, { "epoch": 0.586271782704737, "grad_norm": 0.34607669711112976, "learning_rate": 9.725192758544628e-06, "loss": 0.0026, "step": 35830 }, { "epoch": 0.5864354086558128, "grad_norm": 0.3427541255950928, "learning_rate": 9.724881427606842e-06, "loss": 0.0033, "step": 35840 }, { "epoch": 0.5865990346068887, "grad_norm": 0.1023557260632515, "learning_rate": 9.724569925403465e-06, "loss": 0.003, "step": 35850 }, { "epoch": 0.5867626605579644, "grad_norm": 0.1070786640048027, "learning_rate": 9.724258251945779e-06, "loss": 0.0067, "step": 35860 }, { "epoch": 0.5869262865090403, "grad_norm": 0.1273365169763565, "learning_rate": 9.723946407245089e-06, "loss": 0.0028, "step": 35870 }, { "epoch": 0.5870899124601162, "grad_norm": 0.18796522915363312, "learning_rate": 9.723634391312694e-06, "loss": 0.0034, "step": 35880 }, { "epoch": 0.587253538411192, "grad_norm": 0.1727188676595688, "learning_rate": 9.723322204159906e-06, "loss": 0.0049, "step": 35890 }, { "epoch": 0.5874171643622679, "grad_norm": 0.013268476352095604, "learning_rate": 9.723009845798039e-06, "loss": 0.0032, "step": 35900 }, { "epoch": 0.5875807903133436, "grad_norm": 0.15942905843257904, "learning_rate": 9.722697316238419e-06, "loss": 0.0027, "step": 35910 }, { "epoch": 0.5877444162644195, "grad_norm": 0.258378267288208, "learning_rate": 9.72238461549237e-06, "loss": 0.0043, "step": 35920 }, { "epoch": 0.5879080422154954, "grad_norm": 0.27693480253219604, "learning_rate": 9.722071743571229e-06, "loss": 0.0028, "step": 35930 }, { "epoch": 0.5880716681665712, "grad_norm": 0.3835853338241577, "learning_rate": 9.721758700486336e-06, "loss": 0.0043, "step": 35940 }, { "epoch": 0.5882352941176471, "grad_norm": 0.16655398905277252, "learning_rate": 9.72144548624904e-06, "loss": 0.0033, "step": 35950 }, { "epoch": 0.5883989200687229, "grad_norm": 0.15240994095802307, "learning_rate": 9.721132100870692e-06, "loss": 0.003, "step": 35960 }, { "epoch": 0.5885625460197987, "grad_norm": 0.08357996493577957, "learning_rate": 9.720818544362652e-06, "loss": 0.0037, "step": 35970 }, { "epoch": 0.5887261719708746, "grad_norm": 0.2652566730976105, "learning_rate": 9.720504816736285e-06, "loss": 0.0043, "step": 35980 }, { "epoch": 0.5888897979219504, "grad_norm": 0.1606847494840622, "learning_rate": 9.720190918002965e-06, "loss": 0.0046, "step": 35990 }, { "epoch": 0.5890534238730263, "grad_norm": 0.0774414911866188, "learning_rate": 9.71987684817407e-06, "loss": 0.0029, "step": 36000 }, { "epoch": 0.5892170498241021, "grad_norm": 0.31056180596351624, "learning_rate": 9.719562607260982e-06, "loss": 0.0043, "step": 36010 }, { "epoch": 0.5893806757751779, "grad_norm": 0.15677975118160248, "learning_rate": 9.719248195275092e-06, "loss": 0.0029, "step": 36020 }, { "epoch": 0.5895443017262538, "grad_norm": 0.23312224447727203, "learning_rate": 9.718933612227799e-06, "loss": 0.0049, "step": 36030 }, { "epoch": 0.5897079276773296, "grad_norm": 0.15039682388305664, "learning_rate": 9.718618858130502e-06, "loss": 0.0043, "step": 36040 }, { "epoch": 0.5898715536284055, "grad_norm": 0.2911875545978546, "learning_rate": 9.718303932994614e-06, "loss": 0.0042, "step": 36050 }, { "epoch": 0.5900351795794813, "grad_norm": 0.3128955364227295, "learning_rate": 9.717988836831548e-06, "loss": 0.0041, "step": 36060 }, { "epoch": 0.5901988055305571, "grad_norm": 0.211540088057518, "learning_rate": 9.717673569652726e-06, "loss": 0.0034, "step": 36070 }, { "epoch": 0.590362431481633, "grad_norm": 0.11134030669927597, "learning_rate": 9.717358131469576e-06, "loss": 0.0042, "step": 36080 }, { "epoch": 0.5905260574327088, "grad_norm": 0.21403639018535614, "learning_rate": 9.717042522293531e-06, "loss": 0.002, "step": 36090 }, { "epoch": 0.5906896833837847, "grad_norm": 0.6550421118736267, "learning_rate": 9.716726742136034e-06, "loss": 0.003, "step": 36100 }, { "epoch": 0.5908533093348605, "grad_norm": 0.2903192341327667, "learning_rate": 9.716410791008526e-06, "loss": 0.0043, "step": 36110 }, { "epoch": 0.5910169352859364, "grad_norm": 0.18604125082492828, "learning_rate": 9.716094668922464e-06, "loss": 0.0032, "step": 36120 }, { "epoch": 0.5911805612370122, "grad_norm": 0.08308400958776474, "learning_rate": 9.715778375889304e-06, "loss": 0.0027, "step": 36130 }, { "epoch": 0.591344187188088, "grad_norm": 0.26171454787254333, "learning_rate": 9.715461911920513e-06, "loss": 0.0049, "step": 36140 }, { "epoch": 0.5915078131391639, "grad_norm": 0.09076100587844849, "learning_rate": 9.71514527702756e-06, "loss": 0.0024, "step": 36150 }, { "epoch": 0.5916714390902397, "grad_norm": 0.14372113347053528, "learning_rate": 9.714828471221924e-06, "loss": 0.0039, "step": 36160 }, { "epoch": 0.5918350650413156, "grad_norm": 0.11785857379436493, "learning_rate": 9.714511494515089e-06, "loss": 0.004, "step": 36170 }, { "epoch": 0.5919986909923914, "grad_norm": 0.2088654637336731, "learning_rate": 9.714194346918542e-06, "loss": 0.0062, "step": 36180 }, { "epoch": 0.5921623169434672, "grad_norm": 0.2550960183143616, "learning_rate": 9.713877028443779e-06, "loss": 0.0043, "step": 36190 }, { "epoch": 0.5923259428945431, "grad_norm": 0.058798037469387054, "learning_rate": 9.713559539102304e-06, "loss": 0.003, "step": 36200 }, { "epoch": 0.5924895688456189, "grad_norm": 0.25456178188323975, "learning_rate": 9.713241878905626e-06, "loss": 0.0056, "step": 36210 }, { "epoch": 0.5926531947966948, "grad_norm": 0.29534563422203064, "learning_rate": 9.712924047865255e-06, "loss": 0.0052, "step": 36220 }, { "epoch": 0.5928168207477706, "grad_norm": 0.15518364310264587, "learning_rate": 9.712606045992717e-06, "loss": 0.0039, "step": 36230 }, { "epoch": 0.5929804466988464, "grad_norm": 0.08899492770433426, "learning_rate": 9.712287873299535e-06, "loss": 0.0066, "step": 36240 }, { "epoch": 0.5931440726499223, "grad_norm": 0.1559165120124817, "learning_rate": 9.711969529797245e-06, "loss": 0.0045, "step": 36250 }, { "epoch": 0.5933076986009981, "grad_norm": 0.22468434274196625, "learning_rate": 9.711651015497382e-06, "loss": 0.0032, "step": 36260 }, { "epoch": 0.593471324552074, "grad_norm": 0.15269573032855988, "learning_rate": 9.711332330411495e-06, "loss": 0.0046, "step": 36270 }, { "epoch": 0.5936349505031498, "grad_norm": 0.336885005235672, "learning_rate": 9.711013474551135e-06, "loss": 0.0031, "step": 36280 }, { "epoch": 0.5937985764542256, "grad_norm": 0.22085560858249664, "learning_rate": 9.710694447927858e-06, "loss": 0.0031, "step": 36290 }, { "epoch": 0.5939622024053015, "grad_norm": 0.16883236169815063, "learning_rate": 9.71037525055323e-06, "loss": 0.0035, "step": 36300 }, { "epoch": 0.5941258283563773, "grad_norm": 0.9199559092521667, "learning_rate": 9.71005588243882e-06, "loss": 0.006, "step": 36310 }, { "epoch": 0.5942894543074532, "grad_norm": 0.09754136204719543, "learning_rate": 9.709736343596206e-06, "loss": 0.0033, "step": 36320 }, { "epoch": 0.594453080258529, "grad_norm": 0.0830918550491333, "learning_rate": 9.709416634036966e-06, "loss": 0.0053, "step": 36330 }, { "epoch": 0.5946167062096048, "grad_norm": 0.428536593914032, "learning_rate": 9.709096753772694e-06, "loss": 0.0055, "step": 36340 }, { "epoch": 0.5947803321606807, "grad_norm": 0.12406786531209946, "learning_rate": 9.708776702814982e-06, "loss": 0.0032, "step": 36350 }, { "epoch": 0.5949439581117565, "grad_norm": 0.34767746925354004, "learning_rate": 9.708456481175433e-06, "loss": 0.0036, "step": 36360 }, { "epoch": 0.5951075840628324, "grad_norm": 0.3666210472583771, "learning_rate": 9.708136088865651e-06, "loss": 0.0048, "step": 36370 }, { "epoch": 0.5952712100139083, "grad_norm": 0.1579841524362564, "learning_rate": 9.707815525897253e-06, "loss": 0.0031, "step": 36380 }, { "epoch": 0.595434835964984, "grad_norm": 0.19056245684623718, "learning_rate": 9.707494792281856e-06, "loss": 0.0034, "step": 36390 }, { "epoch": 0.5955984619160599, "grad_norm": 0.5286058783531189, "learning_rate": 9.707173888031087e-06, "loss": 0.0054, "step": 36400 }, { "epoch": 0.5957620878671357, "grad_norm": 0.4452703297138214, "learning_rate": 9.70685281315658e-06, "loss": 0.0046, "step": 36410 }, { "epoch": 0.5959257138182116, "grad_norm": 0.2830319106578827, "learning_rate": 9.70653156766997e-06, "loss": 0.0042, "step": 36420 }, { "epoch": 0.5960893397692875, "grad_norm": 0.07279734313488007, "learning_rate": 9.706210151582901e-06, "loss": 0.0055, "step": 36430 }, { "epoch": 0.5962529657203632, "grad_norm": 0.5065034031867981, "learning_rate": 9.705888564907027e-06, "loss": 0.0065, "step": 36440 }, { "epoch": 0.5964165916714391, "grad_norm": 0.09429576992988586, "learning_rate": 9.705566807654e-06, "loss": 0.0052, "step": 36450 }, { "epoch": 0.5965802176225149, "grad_norm": 0.14021041989326477, "learning_rate": 9.70524487983549e-06, "loss": 0.0022, "step": 36460 }, { "epoch": 0.5967438435735908, "grad_norm": 0.13279235363006592, "learning_rate": 9.704922781463159e-06, "loss": 0.0023, "step": 36470 }, { "epoch": 0.5969074695246667, "grad_norm": 0.09888876229524612, "learning_rate": 9.704600512548685e-06, "loss": 0.0026, "step": 36480 }, { "epoch": 0.5970710954757424, "grad_norm": 0.21202786266803741, "learning_rate": 9.70427807310375e-06, "loss": 0.0035, "step": 36490 }, { "epoch": 0.5972347214268183, "grad_norm": 0.268649160861969, "learning_rate": 9.703955463140042e-06, "loss": 0.0025, "step": 36500 }, { "epoch": 0.5973983473778941, "grad_norm": 0.13659344613552094, "learning_rate": 9.703632682669255e-06, "loss": 0.0048, "step": 36510 }, { "epoch": 0.59756197332897, "grad_norm": 0.1162571832537651, "learning_rate": 9.703309731703088e-06, "loss": 0.0039, "step": 36520 }, { "epoch": 0.5977255992800458, "grad_norm": 0.322752982378006, "learning_rate": 9.702986610253244e-06, "loss": 0.0033, "step": 36530 }, { "epoch": 0.5978892252311216, "grad_norm": 0.06960514187812805, "learning_rate": 9.702663318331443e-06, "loss": 0.0047, "step": 36540 }, { "epoch": 0.5980528511821975, "grad_norm": 0.30746543407440186, "learning_rate": 9.702339855949396e-06, "loss": 0.0058, "step": 36550 }, { "epoch": 0.5982164771332733, "grad_norm": 0.09129039943218231, "learning_rate": 9.702016223118832e-06, "loss": 0.0043, "step": 36560 }, { "epoch": 0.5983801030843492, "grad_norm": 0.2228856384754181, "learning_rate": 9.701692419851483e-06, "loss": 0.0032, "step": 36570 }, { "epoch": 0.598543729035425, "grad_norm": 0.012627189978957176, "learning_rate": 9.701368446159081e-06, "loss": 0.0046, "step": 36580 }, { "epoch": 0.5987073549865009, "grad_norm": 0.12724575400352478, "learning_rate": 9.701044302053373e-06, "loss": 0.0025, "step": 36590 }, { "epoch": 0.5988709809375767, "grad_norm": 0.11833687126636505, "learning_rate": 9.700719987546108e-06, "loss": 0.0052, "step": 36600 }, { "epoch": 0.5990346068886525, "grad_norm": 0.1939002275466919, "learning_rate": 9.700395502649042e-06, "loss": 0.004, "step": 36610 }, { "epoch": 0.5991982328397284, "grad_norm": 0.09875940531492233, "learning_rate": 9.700070847373937e-06, "loss": 0.0035, "step": 36620 }, { "epoch": 0.5993618587908042, "grad_norm": 0.09660714119672775, "learning_rate": 9.699746021732559e-06, "loss": 0.0033, "step": 36630 }, { "epoch": 0.59952548474188, "grad_norm": 0.38685843348503113, "learning_rate": 9.69942102573668e-06, "loss": 0.0047, "step": 36640 }, { "epoch": 0.5996891106929559, "grad_norm": 0.28250786662101746, "learning_rate": 9.699095859398088e-06, "loss": 0.0027, "step": 36650 }, { "epoch": 0.5998527366440317, "grad_norm": 0.39558863639831543, "learning_rate": 9.698770522728564e-06, "loss": 0.0039, "step": 36660 }, { "epoch": 0.6000163625951076, "grad_norm": 0.07542917132377625, "learning_rate": 9.698445015739902e-06, "loss": 0.0049, "step": 36670 }, { "epoch": 0.6001799885461834, "grad_norm": 0.27011069655418396, "learning_rate": 9.6981193384439e-06, "loss": 0.0043, "step": 36680 }, { "epoch": 0.6003436144972593, "grad_norm": 0.2498656064271927, "learning_rate": 9.697793490852364e-06, "loss": 0.0036, "step": 36690 }, { "epoch": 0.6005072404483351, "grad_norm": 0.15047931671142578, "learning_rate": 9.697467472977104e-06, "loss": 0.0045, "step": 36700 }, { "epoch": 0.6006708663994109, "grad_norm": 0.15089407563209534, "learning_rate": 9.697141284829939e-06, "loss": 0.0049, "step": 36710 }, { "epoch": 0.6008344923504868, "grad_norm": 0.1703311949968338, "learning_rate": 9.69681492642269e-06, "loss": 0.0027, "step": 36720 }, { "epoch": 0.6009981183015626, "grad_norm": 0.04145166277885437, "learning_rate": 9.69648839776719e-06, "loss": 0.0059, "step": 36730 }, { "epoch": 0.6011617442526385, "grad_norm": 0.06942036747932434, "learning_rate": 9.696161698875274e-06, "loss": 0.0027, "step": 36740 }, { "epoch": 0.6013253702037143, "grad_norm": 0.1678304374217987, "learning_rate": 9.695834829758782e-06, "loss": 0.0043, "step": 36750 }, { "epoch": 0.6014889961547901, "grad_norm": 0.562107503414154, "learning_rate": 9.695507790429564e-06, "loss": 0.0048, "step": 36760 }, { "epoch": 0.601652622105866, "grad_norm": 0.28181058168411255, "learning_rate": 9.695180580899475e-06, "loss": 0.0031, "step": 36770 }, { "epoch": 0.6018162480569418, "grad_norm": 0.11822933703660965, "learning_rate": 9.694853201180372e-06, "loss": 0.005, "step": 36780 }, { "epoch": 0.6019798740080177, "grad_norm": 0.15481255948543549, "learning_rate": 9.694525651284126e-06, "loss": 0.0036, "step": 36790 }, { "epoch": 0.6021434999590936, "grad_norm": 0.10693569481372833, "learning_rate": 9.69419793122261e-06, "loss": 0.0029, "step": 36800 }, { "epoch": 0.6023071259101693, "grad_norm": 0.6296468377113342, "learning_rate": 9.693870041007698e-06, "loss": 0.0034, "step": 36810 }, { "epoch": 0.6024707518612452, "grad_norm": 0.10660327225923538, "learning_rate": 9.693541980651278e-06, "loss": 0.0035, "step": 36820 }, { "epoch": 0.602634377812321, "grad_norm": 0.44996020197868347, "learning_rate": 9.693213750165244e-06, "loss": 0.0042, "step": 36830 }, { "epoch": 0.6027980037633969, "grad_norm": 0.08215480297803879, "learning_rate": 9.69288534956149e-06, "loss": 0.0071, "step": 36840 }, { "epoch": 0.6029616297144728, "grad_norm": 0.3710929751396179, "learning_rate": 9.692556778851922e-06, "loss": 0.0023, "step": 36850 }, { "epoch": 0.6031252556655485, "grad_norm": 0.41604313254356384, "learning_rate": 9.692228038048449e-06, "loss": 0.0057, "step": 36860 }, { "epoch": 0.6032888816166244, "grad_norm": 0.05612803250551224, "learning_rate": 9.691899127162986e-06, "loss": 0.0049, "step": 36870 }, { "epoch": 0.6034525075677002, "grad_norm": 0.14243543148040771, "learning_rate": 9.691570046207458e-06, "loss": 0.0049, "step": 36880 }, { "epoch": 0.6036161335187761, "grad_norm": 0.1722288578748703, "learning_rate": 9.69124079519379e-06, "loss": 0.0043, "step": 36890 }, { "epoch": 0.603779759469852, "grad_norm": 0.2720753252506256, "learning_rate": 9.69091137413392e-06, "loss": 0.0053, "step": 36900 }, { "epoch": 0.6039433854209277, "grad_norm": 0.12692444026470184, "learning_rate": 9.690581783039786e-06, "loss": 0.0051, "step": 36910 }, { "epoch": 0.6041070113720036, "grad_norm": 0.22577054798603058, "learning_rate": 9.690252021923337e-06, "loss": 0.0039, "step": 36920 }, { "epoch": 0.6042706373230794, "grad_norm": 0.24379587173461914, "learning_rate": 9.689922090796525e-06, "loss": 0.0039, "step": 36930 }, { "epoch": 0.6044342632741553, "grad_norm": 0.19056163728237152, "learning_rate": 9.689591989671308e-06, "loss": 0.0047, "step": 36940 }, { "epoch": 0.6045978892252312, "grad_norm": 0.0722750797867775, "learning_rate": 9.689261718559652e-06, "loss": 0.0041, "step": 36950 }, { "epoch": 0.6047615151763069, "grad_norm": 0.15594319999217987, "learning_rate": 9.68893127747353e-06, "loss": 0.0027, "step": 36960 }, { "epoch": 0.6049251411273828, "grad_norm": 0.1811019480228424, "learning_rate": 9.688600666424917e-06, "loss": 0.0058, "step": 36970 }, { "epoch": 0.6050887670784586, "grad_norm": 0.12612028419971466, "learning_rate": 9.6882698854258e-06, "loss": 0.0054, "step": 36980 }, { "epoch": 0.6052523930295345, "grad_norm": 0.2595139741897583, "learning_rate": 9.687938934488169e-06, "loss": 0.0028, "step": 36990 }, { "epoch": 0.6054160189806104, "grad_norm": 0.19056661427021027, "learning_rate": 9.687607813624017e-06, "loss": 0.0035, "step": 37000 }, { "epoch": 0.6055796449316861, "grad_norm": 0.20385126769542694, "learning_rate": 9.687276522845348e-06, "loss": 0.0043, "step": 37010 }, { "epoch": 0.605743270882762, "grad_norm": 0.38853633403778076, "learning_rate": 9.686945062164173e-06, "loss": 0.0042, "step": 37020 }, { "epoch": 0.6059068968338378, "grad_norm": 0.1379052847623825, "learning_rate": 9.686613431592503e-06, "loss": 0.0034, "step": 37030 }, { "epoch": 0.6060705227849137, "grad_norm": 0.0941978469491005, "learning_rate": 9.686281631142357e-06, "loss": 0.003, "step": 37040 }, { "epoch": 0.6062341487359896, "grad_norm": 0.2398861199617386, "learning_rate": 9.685949660825768e-06, "loss": 0.0086, "step": 37050 }, { "epoch": 0.6063977746870653, "grad_norm": 0.40306493639945984, "learning_rate": 9.685617520654767e-06, "loss": 0.0062, "step": 37060 }, { "epoch": 0.6065614006381412, "grad_norm": 0.18662330508232117, "learning_rate": 9.685285210641391e-06, "loss": 0.0038, "step": 37070 }, { "epoch": 0.606725026589217, "grad_norm": 0.2957414388656616, "learning_rate": 9.684952730797687e-06, "loss": 0.0048, "step": 37080 }, { "epoch": 0.6068886525402929, "grad_norm": 0.06518682837486267, "learning_rate": 9.684620081135707e-06, "loss": 0.0023, "step": 37090 }, { "epoch": 0.6070522784913688, "grad_norm": 0.1331016719341278, "learning_rate": 9.684287261667506e-06, "loss": 0.0033, "step": 37100 }, { "epoch": 0.6072159044424446, "grad_norm": 0.1152452602982521, "learning_rate": 9.683954272405154e-06, "loss": 0.005, "step": 37110 }, { "epoch": 0.6073795303935204, "grad_norm": 0.2517909109592438, "learning_rate": 9.683621113360715e-06, "loss": 0.0035, "step": 37120 }, { "epoch": 0.6075431563445962, "grad_norm": 0.11674592643976212, "learning_rate": 9.683287784546267e-06, "loss": 0.0034, "step": 37130 }, { "epoch": 0.6077067822956721, "grad_norm": 0.3762049674987793, "learning_rate": 9.682954285973893e-06, "loss": 0.0027, "step": 37140 }, { "epoch": 0.607870408246748, "grad_norm": 0.1305396556854248, "learning_rate": 9.682620617655682e-06, "loss": 0.0057, "step": 37150 }, { "epoch": 0.6080340341978238, "grad_norm": 0.13146241009235382, "learning_rate": 9.68228677960373e-06, "loss": 0.0034, "step": 37160 }, { "epoch": 0.6081976601488996, "grad_norm": 0.12051857262849808, "learning_rate": 9.681952771830132e-06, "loss": 0.0029, "step": 37170 }, { "epoch": 0.6083612860999754, "grad_norm": 0.16091465950012207, "learning_rate": 9.681618594347e-06, "loss": 0.0063, "step": 37180 }, { "epoch": 0.6085249120510513, "grad_norm": 0.25111061334609985, "learning_rate": 9.681284247166446e-06, "loss": 0.0044, "step": 37190 }, { "epoch": 0.6086885380021272, "grad_norm": 0.3670584559440613, "learning_rate": 9.68094973030059e-06, "loss": 0.0032, "step": 37200 }, { "epoch": 0.608852163953203, "grad_norm": 0.04304076358675957, "learning_rate": 9.680615043761557e-06, "loss": 0.0044, "step": 37210 }, { "epoch": 0.6090157899042788, "grad_norm": 0.4578183889389038, "learning_rate": 9.680280187561477e-06, "loss": 0.0101, "step": 37220 }, { "epoch": 0.6091794158553546, "grad_norm": 0.32647621631622314, "learning_rate": 9.67994516171249e-06, "loss": 0.0033, "step": 37230 }, { "epoch": 0.6093430418064305, "grad_norm": 0.3882319927215576, "learning_rate": 9.679609966226737e-06, "loss": 0.0031, "step": 37240 }, { "epoch": 0.6095066677575064, "grad_norm": 0.18886899948120117, "learning_rate": 9.679274601116372e-06, "loss": 0.0033, "step": 37250 }, { "epoch": 0.6096702937085822, "grad_norm": 0.2093072384595871, "learning_rate": 9.678939066393547e-06, "loss": 0.0032, "step": 37260 }, { "epoch": 0.609833919659658, "grad_norm": 0.5415998101234436, "learning_rate": 9.67860336207043e-06, "loss": 0.0061, "step": 37270 }, { "epoch": 0.6099975456107338, "grad_norm": 0.07922516763210297, "learning_rate": 9.678267488159182e-06, "loss": 0.0024, "step": 37280 }, { "epoch": 0.6101611715618097, "grad_norm": 0.12227454036474228, "learning_rate": 9.677931444671983e-06, "loss": 0.0043, "step": 37290 }, { "epoch": 0.6103247975128856, "grad_norm": 0.08456715941429138, "learning_rate": 9.677595231621012e-06, "loss": 0.0029, "step": 37300 }, { "epoch": 0.6104884234639614, "grad_norm": 0.6788985729217529, "learning_rate": 9.677258849018457e-06, "loss": 0.0036, "step": 37310 }, { "epoch": 0.6106520494150373, "grad_norm": 0.17371058464050293, "learning_rate": 9.676922296876509e-06, "loss": 0.0039, "step": 37320 }, { "epoch": 0.610815675366113, "grad_norm": 0.2948424816131592, "learning_rate": 9.67658557520737e-06, "loss": 0.0027, "step": 37330 }, { "epoch": 0.6109793013171889, "grad_norm": 0.3183381259441376, "learning_rate": 9.676248684023242e-06, "loss": 0.0056, "step": 37340 }, { "epoch": 0.6111429272682648, "grad_norm": 0.08708910644054413, "learning_rate": 9.675911623336338e-06, "loss": 0.0039, "step": 37350 }, { "epoch": 0.6113065532193406, "grad_norm": 0.4103429317474365, "learning_rate": 9.675574393158877e-06, "loss": 0.0048, "step": 37360 }, { "epoch": 0.6114701791704165, "grad_norm": 0.06971242278814316, "learning_rate": 9.67523699350308e-06, "loss": 0.0035, "step": 37370 }, { "epoch": 0.6116338051214922, "grad_norm": 0.2830762267112732, "learning_rate": 9.67489942438118e-06, "loss": 0.0049, "step": 37380 }, { "epoch": 0.6117974310725681, "grad_norm": 0.43115437030792236, "learning_rate": 9.674561685805413e-06, "loss": 0.0032, "step": 37390 }, { "epoch": 0.6119610570236439, "grad_norm": 0.10677501559257507, "learning_rate": 9.674223777788018e-06, "loss": 0.0021, "step": 37400 }, { "epoch": 0.6121246829747198, "grad_norm": 0.3374081254005432, "learning_rate": 9.673885700341248e-06, "loss": 0.0036, "step": 37410 }, { "epoch": 0.6122883089257957, "grad_norm": 0.17241886258125305, "learning_rate": 9.673547453477352e-06, "loss": 0.003, "step": 37420 }, { "epoch": 0.6124519348768714, "grad_norm": 0.07272238284349442, "learning_rate": 9.673209037208596e-06, "loss": 0.0045, "step": 37430 }, { "epoch": 0.6126155608279473, "grad_norm": 0.303509920835495, "learning_rate": 9.67287045154724e-06, "loss": 0.0035, "step": 37440 }, { "epoch": 0.6127791867790231, "grad_norm": 0.1668083518743515, "learning_rate": 9.672531696505564e-06, "loss": 0.0024, "step": 37450 }, { "epoch": 0.612942812730099, "grad_norm": 0.23766106367111206, "learning_rate": 9.672192772095843e-06, "loss": 0.0056, "step": 37460 }, { "epoch": 0.6131064386811749, "grad_norm": 0.29345017671585083, "learning_rate": 9.671853678330365e-06, "loss": 0.0036, "step": 37470 }, { "epoch": 0.6132700646322506, "grad_norm": 0.16987884044647217, "learning_rate": 9.67151441522142e-06, "loss": 0.0029, "step": 37480 }, { "epoch": 0.6134336905833265, "grad_norm": 0.2603413462638855, "learning_rate": 9.671174982781302e-06, "loss": 0.0056, "step": 37490 }, { "epoch": 0.6135973165344023, "grad_norm": 0.13226963579654694, "learning_rate": 9.67083538102232e-06, "loss": 0.0049, "step": 37500 }, { "epoch": 0.6137609424854782, "grad_norm": 0.2015450894832611, "learning_rate": 9.67049560995678e-06, "loss": 0.0054, "step": 37510 }, { "epoch": 0.6139245684365541, "grad_norm": 0.36624816060066223, "learning_rate": 9.670155669597002e-06, "loss": 0.0064, "step": 37520 }, { "epoch": 0.6140881943876298, "grad_norm": 0.09674598276615143, "learning_rate": 9.669815559955302e-06, "loss": 0.0025, "step": 37530 }, { "epoch": 0.6142518203387057, "grad_norm": 0.029832294210791588, "learning_rate": 9.669475281044013e-06, "loss": 0.0062, "step": 37540 }, { "epoch": 0.6144154462897815, "grad_norm": 0.1951131522655487, "learning_rate": 9.669134832875469e-06, "loss": 0.0023, "step": 37550 }, { "epoch": 0.6145790722408574, "grad_norm": 0.17030267417430878, "learning_rate": 9.668794215462009e-06, "loss": 0.0025, "step": 37560 }, { "epoch": 0.6147426981919333, "grad_norm": 0.26466241478919983, "learning_rate": 9.668453428815978e-06, "loss": 0.0039, "step": 37570 }, { "epoch": 0.614906324143009, "grad_norm": 0.26683205366134644, "learning_rate": 9.668112472949732e-06, "loss": 0.0049, "step": 37580 }, { "epoch": 0.6150699500940849, "grad_norm": 0.28840070962905884, "learning_rate": 9.667771347875628e-06, "loss": 0.006, "step": 37590 }, { "epoch": 0.6152335760451607, "grad_norm": 0.09830860793590546, "learning_rate": 9.667430053606032e-06, "loss": 0.0092, "step": 37600 }, { "epoch": 0.6153972019962366, "grad_norm": 0.11398457735776901, "learning_rate": 9.667088590153314e-06, "loss": 0.0029, "step": 37610 }, { "epoch": 0.6155608279473125, "grad_norm": 0.21779796481132507, "learning_rate": 9.666746957529851e-06, "loss": 0.0042, "step": 37620 }, { "epoch": 0.6157244538983883, "grad_norm": 0.1915261447429657, "learning_rate": 9.666405155748029e-06, "loss": 0.0035, "step": 37630 }, { "epoch": 0.6158880798494641, "grad_norm": 0.2669447064399719, "learning_rate": 9.666063184820234e-06, "loss": 0.0046, "step": 37640 }, { "epoch": 0.6160517058005399, "grad_norm": 0.13493488729000092, "learning_rate": 9.665721044758863e-06, "loss": 0.0043, "step": 37650 }, { "epoch": 0.6162153317516158, "grad_norm": 0.3728507459163666, "learning_rate": 9.665378735576317e-06, "loss": 0.0044, "step": 37660 }, { "epoch": 0.6163789577026917, "grad_norm": 0.20458364486694336, "learning_rate": 9.665036257285006e-06, "loss": 0.0044, "step": 37670 }, { "epoch": 0.6165425836537675, "grad_norm": 0.3213384747505188, "learning_rate": 9.664693609897342e-06, "loss": 0.0047, "step": 37680 }, { "epoch": 0.6167062096048433, "grad_norm": 0.32243579626083374, "learning_rate": 9.664350793425748e-06, "loss": 0.0032, "step": 37690 }, { "epoch": 0.6168698355559191, "grad_norm": 0.14428888261318207, "learning_rate": 9.664007807882647e-06, "loss": 0.003, "step": 37700 }, { "epoch": 0.617033461506995, "grad_norm": 0.3491305112838745, "learning_rate": 9.663664653280473e-06, "loss": 0.0049, "step": 37710 }, { "epoch": 0.6171970874580709, "grad_norm": 0.08974195271730423, "learning_rate": 9.663321329631664e-06, "loss": 0.0041, "step": 37720 }, { "epoch": 0.6173607134091467, "grad_norm": 0.42107313871383667, "learning_rate": 9.662977836948663e-06, "loss": 0.0047, "step": 37730 }, { "epoch": 0.6175243393602226, "grad_norm": 0.07104238122701645, "learning_rate": 9.662634175243923e-06, "loss": 0.0056, "step": 37740 }, { "epoch": 0.6176879653112983, "grad_norm": 0.33224251866340637, "learning_rate": 9.662290344529902e-06, "loss": 0.0041, "step": 37750 }, { "epoch": 0.6178515912623742, "grad_norm": 0.446054607629776, "learning_rate": 9.661946344819062e-06, "loss": 0.0037, "step": 37760 }, { "epoch": 0.6180152172134501, "grad_norm": 0.4401337802410126, "learning_rate": 9.66160217612387e-06, "loss": 0.0058, "step": 37770 }, { "epoch": 0.6181788431645259, "grad_norm": 0.22429263591766357, "learning_rate": 9.661257838456804e-06, "loss": 0.0045, "step": 37780 }, { "epoch": 0.6183424691156018, "grad_norm": 0.14740681648254395, "learning_rate": 9.660913331830344e-06, "loss": 0.0027, "step": 37790 }, { "epoch": 0.6185060950666775, "grad_norm": 0.01172002125531435, "learning_rate": 9.66056865625698e-06, "loss": 0.0047, "step": 37800 }, { "epoch": 0.6186697210177534, "grad_norm": 0.13470607995986938, "learning_rate": 9.6602238117492e-06, "loss": 0.0036, "step": 37810 }, { "epoch": 0.6188333469688293, "grad_norm": 0.3673902451992035, "learning_rate": 9.659878798319508e-06, "loss": 0.006, "step": 37820 }, { "epoch": 0.6189969729199051, "grad_norm": 0.5027232766151428, "learning_rate": 9.659533615980411e-06, "loss": 0.0043, "step": 37830 }, { "epoch": 0.619160598870981, "grad_norm": 1.0106658935546875, "learning_rate": 9.65918826474442e-06, "loss": 0.0074, "step": 37840 }, { "epoch": 0.6193242248220567, "grad_norm": 0.2663329839706421, "learning_rate": 9.65884274462405e-06, "loss": 0.0057, "step": 37850 }, { "epoch": 0.6194878507731326, "grad_norm": 0.37738049030303955, "learning_rate": 9.658497055631827e-06, "loss": 0.0053, "step": 37860 }, { "epoch": 0.6196514767242085, "grad_norm": 0.1489264816045761, "learning_rate": 9.658151197780285e-06, "loss": 0.0041, "step": 37870 }, { "epoch": 0.6198151026752843, "grad_norm": 0.2652358114719391, "learning_rate": 9.657805171081958e-06, "loss": 0.0026, "step": 37880 }, { "epoch": 0.6199787286263602, "grad_norm": 0.3128514289855957, "learning_rate": 9.657458975549386e-06, "loss": 0.0056, "step": 37890 }, { "epoch": 0.6201423545774359, "grad_norm": 0.14601120352745056, "learning_rate": 9.657112611195121e-06, "loss": 0.0036, "step": 37900 }, { "epoch": 0.6203059805285118, "grad_norm": 0.5487610697746277, "learning_rate": 9.656766078031717e-06, "loss": 0.0047, "step": 37910 }, { "epoch": 0.6204696064795877, "grad_norm": 0.2473091185092926, "learning_rate": 9.656419376071736e-06, "loss": 0.0034, "step": 37920 }, { "epoch": 0.6206332324306635, "grad_norm": 0.298767626285553, "learning_rate": 9.656072505327741e-06, "loss": 0.0041, "step": 37930 }, { "epoch": 0.6207968583817394, "grad_norm": 0.23336118459701538, "learning_rate": 9.65572546581231e-06, "loss": 0.0076, "step": 37940 }, { "epoch": 0.6209604843328151, "grad_norm": 0.1006658673286438, "learning_rate": 9.65537825753802e-06, "loss": 0.0033, "step": 37950 }, { "epoch": 0.621124110283891, "grad_norm": 0.22614005208015442, "learning_rate": 9.65503088051746e-06, "loss": 0.0052, "step": 37960 }, { "epoch": 0.6212877362349669, "grad_norm": 0.2829948365688324, "learning_rate": 9.654683334763216e-06, "loss": 0.0031, "step": 37970 }, { "epoch": 0.6214513621860427, "grad_norm": 0.0900287851691246, "learning_rate": 9.654335620287886e-06, "loss": 0.0029, "step": 37980 }, { "epoch": 0.6216149881371186, "grad_norm": 0.27310997247695923, "learning_rate": 9.65398773710408e-06, "loss": 0.0034, "step": 37990 }, { "epoch": 0.6217786140881943, "grad_norm": 0.15457694232463837, "learning_rate": 9.653639685224403e-06, "loss": 0.0036, "step": 38000 }, { "epoch": 0.6219422400392702, "grad_norm": 0.15960688889026642, "learning_rate": 9.653291464661473e-06, "loss": 0.0036, "step": 38010 }, { "epoch": 0.6221058659903461, "grad_norm": 0.6965922713279724, "learning_rate": 9.65294307542791e-06, "loss": 0.0027, "step": 38020 }, { "epoch": 0.6222694919414219, "grad_norm": 0.14204402267932892, "learning_rate": 9.652594517536345e-06, "loss": 0.0036, "step": 38030 }, { "epoch": 0.6224331178924978, "grad_norm": 0.22957226634025574, "learning_rate": 9.652245790999409e-06, "loss": 0.0043, "step": 38040 }, { "epoch": 0.6225967438435736, "grad_norm": 0.13951456546783447, "learning_rate": 9.651896895829745e-06, "loss": 0.0044, "step": 38050 }, { "epoch": 0.6227603697946494, "grad_norm": 0.05449066311120987, "learning_rate": 9.65154783204e-06, "loss": 0.0025, "step": 38060 }, { "epoch": 0.6229239957457253, "grad_norm": 0.3685210049152374, "learning_rate": 9.651198599642826e-06, "loss": 0.004, "step": 38070 }, { "epoch": 0.6230876216968011, "grad_norm": 0.18782642483711243, "learning_rate": 9.65084919865088e-06, "loss": 0.0037, "step": 38080 }, { "epoch": 0.623251247647877, "grad_norm": 0.039590705186128616, "learning_rate": 9.650499629076829e-06, "loss": 0.0042, "step": 38090 }, { "epoch": 0.6234148735989528, "grad_norm": 0.1745484620332718, "learning_rate": 9.650149890933345e-06, "loss": 0.0033, "step": 38100 }, { "epoch": 0.6235784995500286, "grad_norm": 0.4898908734321594, "learning_rate": 9.649799984233102e-06, "loss": 0.0077, "step": 38110 }, { "epoch": 0.6237421255011045, "grad_norm": 0.0713873952627182, "learning_rate": 9.649449908988786e-06, "loss": 0.0021, "step": 38120 }, { "epoch": 0.6239057514521803, "grad_norm": 0.20937389135360718, "learning_rate": 9.649099665213086e-06, "loss": 0.0024, "step": 38130 }, { "epoch": 0.6240693774032562, "grad_norm": 0.0904269590973854, "learning_rate": 9.648749252918697e-06, "loss": 0.0047, "step": 38140 }, { "epoch": 0.624233003354332, "grad_norm": 0.15568599104881287, "learning_rate": 9.648398672118318e-06, "loss": 0.0058, "step": 38150 }, { "epoch": 0.6243966293054078, "grad_norm": 0.10997980833053589, "learning_rate": 9.648047922824661e-06, "loss": 0.0057, "step": 38160 }, { "epoch": 0.6245602552564837, "grad_norm": 0.11430040746927261, "learning_rate": 9.64769700505044e-06, "loss": 0.0037, "step": 38170 }, { "epoch": 0.6247238812075595, "grad_norm": 0.26384666562080383, "learning_rate": 9.64734591880837e-06, "loss": 0.004, "step": 38180 }, { "epoch": 0.6248875071586354, "grad_norm": 0.012330407276749611, "learning_rate": 9.646994664111183e-06, "loss": 0.0054, "step": 38190 }, { "epoch": 0.6250511331097112, "grad_norm": 0.1942172646522522, "learning_rate": 9.646643240971605e-06, "loss": 0.0026, "step": 38200 }, { "epoch": 0.625214759060787, "grad_norm": 0.18183833360671997, "learning_rate": 9.64629164940238e-06, "loss": 0.0031, "step": 38210 }, { "epoch": 0.6253783850118629, "grad_norm": 0.09610706567764282, "learning_rate": 9.645939889416249e-06, "loss": 0.0032, "step": 38220 }, { "epoch": 0.6255420109629387, "grad_norm": 0.22070765495300293, "learning_rate": 9.645587961025961e-06, "loss": 0.0029, "step": 38230 }, { "epoch": 0.6257056369140146, "grad_norm": 0.1928679198026657, "learning_rate": 9.645235864244278e-06, "loss": 0.0022, "step": 38240 }, { "epoch": 0.6258692628650904, "grad_norm": 0.31935417652130127, "learning_rate": 9.644883599083959e-06, "loss": 0.0037, "step": 38250 }, { "epoch": 0.6260328888161663, "grad_norm": 0.13179603219032288, "learning_rate": 9.64453116555777e-06, "loss": 0.0039, "step": 38260 }, { "epoch": 0.626196514767242, "grad_norm": 0.07257692515850067, "learning_rate": 9.644178563678493e-06, "loss": 0.0031, "step": 38270 }, { "epoch": 0.6263601407183179, "grad_norm": 0.5487810373306274, "learning_rate": 9.643825793458904e-06, "loss": 0.0049, "step": 38280 }, { "epoch": 0.6265237666693938, "grad_norm": 0.30215689539909363, "learning_rate": 9.64347285491179e-06, "loss": 0.0043, "step": 38290 }, { "epoch": 0.6266873926204696, "grad_norm": 0.11180410534143448, "learning_rate": 9.643119748049945e-06, "loss": 0.0036, "step": 38300 }, { "epoch": 0.6268510185715455, "grad_norm": 0.31685671210289, "learning_rate": 9.64276647288617e-06, "loss": 0.0031, "step": 38310 }, { "epoch": 0.6270146445226212, "grad_norm": 0.2429034411907196, "learning_rate": 9.642413029433268e-06, "loss": 0.0061, "step": 38320 }, { "epoch": 0.6271782704736971, "grad_norm": 0.2383284866809845, "learning_rate": 9.642059417704052e-06, "loss": 0.0034, "step": 38330 }, { "epoch": 0.627341896424773, "grad_norm": 0.16095753014087677, "learning_rate": 9.641705637711341e-06, "loss": 0.0033, "step": 38340 }, { "epoch": 0.6275055223758488, "grad_norm": 0.057584162801504135, "learning_rate": 9.641351689467954e-06, "loss": 0.0029, "step": 38350 }, { "epoch": 0.6276691483269247, "grad_norm": 0.10399903357028961, "learning_rate": 9.640997572986723e-06, "loss": 0.0056, "step": 38360 }, { "epoch": 0.6278327742780004, "grad_norm": 0.2439599633216858, "learning_rate": 9.640643288280487e-06, "loss": 0.0033, "step": 38370 }, { "epoch": 0.6279964002290763, "grad_norm": 0.2845558226108551, "learning_rate": 9.640288835362082e-06, "loss": 0.0033, "step": 38380 }, { "epoch": 0.6281600261801522, "grad_norm": 0.0931815654039383, "learning_rate": 9.639934214244362e-06, "loss": 0.0028, "step": 38390 }, { "epoch": 0.628323652131228, "grad_norm": 0.1392059177160263, "learning_rate": 9.639579424940178e-06, "loss": 0.0026, "step": 38400 }, { "epoch": 0.6284872780823039, "grad_norm": 0.11994877457618713, "learning_rate": 9.63922446746239e-06, "loss": 0.0046, "step": 38410 }, { "epoch": 0.6286509040333796, "grad_norm": 0.1402147114276886, "learning_rate": 9.638869341823865e-06, "loss": 0.006, "step": 38420 }, { "epoch": 0.6288145299844555, "grad_norm": 0.14142590761184692, "learning_rate": 9.638514048037476e-06, "loss": 0.0036, "step": 38430 }, { "epoch": 0.6289781559355314, "grad_norm": 0.03410758823156357, "learning_rate": 9.638158586116102e-06, "loss": 0.0035, "step": 38440 }, { "epoch": 0.6291417818866072, "grad_norm": 0.10523486137390137, "learning_rate": 9.637802956072627e-06, "loss": 0.0045, "step": 38450 }, { "epoch": 0.6293054078376831, "grad_norm": 0.08307523280382156, "learning_rate": 9.637447157919939e-06, "loss": 0.003, "step": 38460 }, { "epoch": 0.6294690337887588, "grad_norm": 0.1560235172510147, "learning_rate": 9.637091191670939e-06, "loss": 0.0043, "step": 38470 }, { "epoch": 0.6296326597398347, "grad_norm": 0.0628463625907898, "learning_rate": 9.63673505733853e-06, "loss": 0.0039, "step": 38480 }, { "epoch": 0.6297962856909106, "grad_norm": 0.3192249834537506, "learning_rate": 9.636378754935615e-06, "loss": 0.0032, "step": 38490 }, { "epoch": 0.6299599116419864, "grad_norm": 0.1372830867767334, "learning_rate": 9.636022284475117e-06, "loss": 0.0033, "step": 38500 }, { "epoch": 0.6301235375930623, "grad_norm": 0.07836729288101196, "learning_rate": 9.635665645969952e-06, "loss": 0.0028, "step": 38510 }, { "epoch": 0.630287163544138, "grad_norm": 0.10574813932180405, "learning_rate": 9.635308839433048e-06, "loss": 0.0046, "step": 38520 }, { "epoch": 0.6304507894952139, "grad_norm": 0.12654341757297516, "learning_rate": 9.63495186487734e-06, "loss": 0.0032, "step": 38530 }, { "epoch": 0.6306144154462898, "grad_norm": 0.28303995728492737, "learning_rate": 9.634594722315768e-06, "loss": 0.0047, "step": 38540 }, { "epoch": 0.6307780413973656, "grad_norm": 0.22968409955501556, "learning_rate": 9.634237411761275e-06, "loss": 0.0029, "step": 38550 }, { "epoch": 0.6309416673484415, "grad_norm": 0.0440322607755661, "learning_rate": 9.633879933226815e-06, "loss": 0.0049, "step": 38560 }, { "epoch": 0.6311052932995173, "grad_norm": 0.23966920375823975, "learning_rate": 9.633522286725344e-06, "loss": 0.0035, "step": 38570 }, { "epoch": 0.6312689192505931, "grad_norm": 0.3072430491447449, "learning_rate": 9.633164472269828e-06, "loss": 0.007, "step": 38580 }, { "epoch": 0.631432545201669, "grad_norm": 0.2619354724884033, "learning_rate": 9.632806489873231e-06, "loss": 0.0054, "step": 38590 }, { "epoch": 0.6315961711527448, "grad_norm": 0.226179838180542, "learning_rate": 9.632448339548536e-06, "loss": 0.0028, "step": 38600 }, { "epoch": 0.6317597971038207, "grad_norm": 0.10131746530532837, "learning_rate": 9.632090021308725e-06, "loss": 0.0028, "step": 38610 }, { "epoch": 0.6319234230548965, "grad_norm": 0.14819808304309845, "learning_rate": 9.631731535166781e-06, "loss": 0.0026, "step": 38620 }, { "epoch": 0.6320870490059723, "grad_norm": 0.185590460896492, "learning_rate": 9.6313728811357e-06, "loss": 0.0038, "step": 38630 }, { "epoch": 0.6322506749570482, "grad_norm": 0.255806028842926, "learning_rate": 9.631014059228486e-06, "loss": 0.0038, "step": 38640 }, { "epoch": 0.632414300908124, "grad_norm": 0.21789845824241638, "learning_rate": 9.630655069458141e-06, "loss": 0.005, "step": 38650 }, { "epoch": 0.6325779268591999, "grad_norm": 0.28489506244659424, "learning_rate": 9.63029591183768e-06, "loss": 0.0033, "step": 38660 }, { "epoch": 0.6327415528102757, "grad_norm": 0.19765153527259827, "learning_rate": 9.629936586380119e-06, "loss": 0.0029, "step": 38670 }, { "epoch": 0.6329051787613516, "grad_norm": 0.061400216072797775, "learning_rate": 9.629577093098486e-06, "loss": 0.0032, "step": 38680 }, { "epoch": 0.6330688047124274, "grad_norm": 0.23604781925678253, "learning_rate": 9.629217432005811e-06, "loss": 0.0037, "step": 38690 }, { "epoch": 0.6332324306635032, "grad_norm": 0.28271064162254333, "learning_rate": 9.628857603115129e-06, "loss": 0.0038, "step": 38700 }, { "epoch": 0.6333960566145791, "grad_norm": 0.2158617377281189, "learning_rate": 9.628497606439484e-06, "loss": 0.0036, "step": 38710 }, { "epoch": 0.6335596825656549, "grad_norm": 0.09813366085290909, "learning_rate": 9.628137441991926e-06, "loss": 0.0041, "step": 38720 }, { "epoch": 0.6337233085167308, "grad_norm": 0.05281127244234085, "learning_rate": 9.627777109785507e-06, "loss": 0.0027, "step": 38730 }, { "epoch": 0.6338869344678066, "grad_norm": 0.088776133954525, "learning_rate": 9.627416609833294e-06, "loss": 0.0035, "step": 38740 }, { "epoch": 0.6340505604188824, "grad_norm": 0.2632506787776947, "learning_rate": 9.627055942148347e-06, "loss": 0.0037, "step": 38750 }, { "epoch": 0.6342141863699583, "grad_norm": 0.07854590564966202, "learning_rate": 9.626695106743744e-06, "loss": 0.0033, "step": 38760 }, { "epoch": 0.6343778123210341, "grad_norm": 0.1894259750843048, "learning_rate": 9.626334103632564e-06, "loss": 0.0045, "step": 38770 }, { "epoch": 0.63454143827211, "grad_norm": 0.32894113659858704, "learning_rate": 9.625972932827892e-06, "loss": 0.0049, "step": 38780 }, { "epoch": 0.6347050642231858, "grad_norm": 0.08008615672588348, "learning_rate": 9.625611594342819e-06, "loss": 0.0024, "step": 38790 }, { "epoch": 0.6348686901742616, "grad_norm": 0.08497855812311172, "learning_rate": 9.625250088190444e-06, "loss": 0.005, "step": 38800 }, { "epoch": 0.6350323161253375, "grad_norm": 0.2697611153125763, "learning_rate": 9.624888414383868e-06, "loss": 0.0047, "step": 38810 }, { "epoch": 0.6351959420764133, "grad_norm": 0.15111055970191956, "learning_rate": 9.624526572936203e-06, "loss": 0.0046, "step": 38820 }, { "epoch": 0.6353595680274892, "grad_norm": 0.1395147293806076, "learning_rate": 9.624164563860565e-06, "loss": 0.0036, "step": 38830 }, { "epoch": 0.635523193978565, "grad_norm": 0.26285654306411743, "learning_rate": 9.623802387170076e-06, "loss": 0.0029, "step": 38840 }, { "epoch": 0.6356868199296408, "grad_norm": 0.08407123386859894, "learning_rate": 9.623440042877863e-06, "loss": 0.0047, "step": 38850 }, { "epoch": 0.6358504458807167, "grad_norm": 0.15698997676372528, "learning_rate": 9.62307753099706e-06, "loss": 0.0039, "step": 38860 }, { "epoch": 0.6360140718317925, "grad_norm": 0.1328202188014984, "learning_rate": 9.62271485154081e-06, "loss": 0.004, "step": 38870 }, { "epoch": 0.6361776977828684, "grad_norm": 0.14202706515789032, "learning_rate": 9.622352004522255e-06, "loss": 0.0031, "step": 38880 }, { "epoch": 0.6363413237339443, "grad_norm": 0.3476366400718689, "learning_rate": 9.621988989954549e-06, "loss": 0.0038, "step": 38890 }, { "epoch": 0.63650494968502, "grad_norm": 0.24466153979301453, "learning_rate": 9.621625807850851e-06, "loss": 0.0034, "step": 38900 }, { "epoch": 0.6366685756360959, "grad_norm": 0.10046984255313873, "learning_rate": 9.621262458224324e-06, "loss": 0.004, "step": 38910 }, { "epoch": 0.6368322015871717, "grad_norm": 0.3870885670185089, "learning_rate": 9.620898941088144e-06, "loss": 0.0037, "step": 38920 }, { "epoch": 0.6369958275382476, "grad_norm": 0.12600962817668915, "learning_rate": 9.62053525645548e-06, "loss": 0.0047, "step": 38930 }, { "epoch": 0.6371594534893235, "grad_norm": 0.2957196831703186, "learning_rate": 9.62017140433952e-06, "loss": 0.0047, "step": 38940 }, { "epoch": 0.6373230794403992, "grad_norm": 0.18455517292022705, "learning_rate": 9.619807384753449e-06, "loss": 0.0038, "step": 38950 }, { "epoch": 0.6374867053914751, "grad_norm": 0.12891778349876404, "learning_rate": 9.619443197710465e-06, "loss": 0.0044, "step": 38960 }, { "epoch": 0.6376503313425509, "grad_norm": 0.2780033051967621, "learning_rate": 9.619078843223766e-06, "loss": 0.0076, "step": 38970 }, { "epoch": 0.6378139572936268, "grad_norm": 0.18556208908557892, "learning_rate": 9.618714321306563e-06, "loss": 0.0038, "step": 38980 }, { "epoch": 0.6379775832447027, "grad_norm": 0.10493610054254532, "learning_rate": 9.618349631972064e-06, "loss": 0.0038, "step": 38990 }, { "epoch": 0.6381412091957784, "grad_norm": 0.244174987077713, "learning_rate": 9.617984775233493e-06, "loss": 0.0037, "step": 39000 }, { "epoch": 0.6383048351468543, "grad_norm": 0.11400426179170609, "learning_rate": 9.617619751104072e-06, "loss": 0.0066, "step": 39010 }, { "epoch": 0.6384684610979301, "grad_norm": 0.1196909248828888, "learning_rate": 9.617254559597034e-06, "loss": 0.0045, "step": 39020 }, { "epoch": 0.638632087049006, "grad_norm": 0.08639112114906311, "learning_rate": 9.616889200725615e-06, "loss": 0.0024, "step": 39030 }, { "epoch": 0.6387957130000819, "grad_norm": 0.2317192554473877, "learning_rate": 9.616523674503057e-06, "loss": 0.0055, "step": 39040 }, { "epoch": 0.6389593389511576, "grad_norm": 0.3311014175415039, "learning_rate": 9.616157980942616e-06, "loss": 0.0026, "step": 39050 }, { "epoch": 0.6391229649022335, "grad_norm": 0.25754231214523315, "learning_rate": 9.615792120057539e-06, "loss": 0.0026, "step": 39060 }, { "epoch": 0.6392865908533093, "grad_norm": 0.1464586704969406, "learning_rate": 9.615426091861091e-06, "loss": 0.0027, "step": 39070 }, { "epoch": 0.6394502168043852, "grad_norm": 0.04481315240263939, "learning_rate": 9.615059896366542e-06, "loss": 0.0028, "step": 39080 }, { "epoch": 0.6396138427554611, "grad_norm": 0.2647063136100769, "learning_rate": 9.614693533587162e-06, "loss": 0.0049, "step": 39090 }, { "epoch": 0.6397774687065368, "grad_norm": 0.21080859005451202, "learning_rate": 9.614327003536235e-06, "loss": 0.0049, "step": 39100 }, { "epoch": 0.6399410946576127, "grad_norm": 0.1746206134557724, "learning_rate": 9.613960306227043e-06, "loss": 0.0039, "step": 39110 }, { "epoch": 0.6401047206086885, "grad_norm": 0.12504762411117554, "learning_rate": 9.613593441672879e-06, "loss": 0.0025, "step": 39120 }, { "epoch": 0.6402683465597644, "grad_norm": 0.46945270895957947, "learning_rate": 9.613226409887042e-06, "loss": 0.0041, "step": 39130 }, { "epoch": 0.6404319725108402, "grad_norm": 0.08291307091712952, "learning_rate": 9.612859210882835e-06, "loss": 0.003, "step": 39140 }, { "epoch": 0.640595598461916, "grad_norm": 0.08960380405187607, "learning_rate": 9.612491844673568e-06, "loss": 0.003, "step": 39150 }, { "epoch": 0.6407592244129919, "grad_norm": 0.12569643557071686, "learning_rate": 9.612124311272558e-06, "loss": 0.0029, "step": 39160 }, { "epoch": 0.6409228503640677, "grad_norm": 0.14201229810714722, "learning_rate": 9.611756610693124e-06, "loss": 0.0042, "step": 39170 }, { "epoch": 0.6410864763151436, "grad_norm": 0.10764100402593613, "learning_rate": 9.6113887429486e-06, "loss": 0.0038, "step": 39180 }, { "epoch": 0.6412501022662194, "grad_norm": 0.42665010690689087, "learning_rate": 9.611020708052316e-06, "loss": 0.004, "step": 39190 }, { "epoch": 0.6414137282172953, "grad_norm": 0.23785610496997833, "learning_rate": 9.610652506017615e-06, "loss": 0.0035, "step": 39200 }, { "epoch": 0.6415773541683711, "grad_norm": 0.1503949612379074, "learning_rate": 9.61028413685784e-06, "loss": 0.0055, "step": 39210 }, { "epoch": 0.6417409801194469, "grad_norm": 0.12189337611198425, "learning_rate": 9.609915600586347e-06, "loss": 0.0063, "step": 39220 }, { "epoch": 0.6419046060705228, "grad_norm": 0.06078196316957474, "learning_rate": 9.609546897216493e-06, "loss": 0.0064, "step": 39230 }, { "epoch": 0.6420682320215986, "grad_norm": 0.1902364045381546, "learning_rate": 9.609178026761643e-06, "loss": 0.0062, "step": 39240 }, { "epoch": 0.6422318579726745, "grad_norm": 0.3740362823009491, "learning_rate": 9.608808989235165e-06, "loss": 0.0058, "step": 39250 }, { "epoch": 0.6423954839237503, "grad_norm": 0.1996176540851593, "learning_rate": 9.60843978465044e-06, "loss": 0.0045, "step": 39260 }, { "epoch": 0.6425591098748261, "grad_norm": 0.28526821732521057, "learning_rate": 9.60807041302085e-06, "loss": 0.0023, "step": 39270 }, { "epoch": 0.642722735825902, "grad_norm": 0.12540899217128754, "learning_rate": 9.607700874359781e-06, "loss": 0.0034, "step": 39280 }, { "epoch": 0.6428863617769778, "grad_norm": 0.14467529952526093, "learning_rate": 9.607331168680632e-06, "loss": 0.0033, "step": 39290 }, { "epoch": 0.6430499877280537, "grad_norm": 0.32027381658554077, "learning_rate": 9.606961295996798e-06, "loss": 0.0038, "step": 39300 }, { "epoch": 0.6432136136791295, "grad_norm": 0.04616748169064522, "learning_rate": 9.606591256321693e-06, "loss": 0.0028, "step": 39310 }, { "epoch": 0.6433772396302053, "grad_norm": 0.4567842483520508, "learning_rate": 9.606221049668725e-06, "loss": 0.0042, "step": 39320 }, { "epoch": 0.6435408655812812, "grad_norm": 0.23921778798103333, "learning_rate": 9.605850676051314e-06, "loss": 0.0057, "step": 39330 }, { "epoch": 0.643704491532357, "grad_norm": 0.1458958089351654, "learning_rate": 9.605480135482886e-06, "loss": 0.0034, "step": 39340 }, { "epoch": 0.6438681174834329, "grad_norm": 0.12824426591396332, "learning_rate": 9.605109427976873e-06, "loss": 0.0043, "step": 39350 }, { "epoch": 0.6440317434345088, "grad_norm": 0.2134348303079605, "learning_rate": 9.60473855354671e-06, "loss": 0.0021, "step": 39360 }, { "epoch": 0.6441953693855845, "grad_norm": 0.27929699420928955, "learning_rate": 9.604367512205843e-06, "loss": 0.0041, "step": 39370 }, { "epoch": 0.6443589953366604, "grad_norm": 0.04714587703347206, "learning_rate": 9.60399630396772e-06, "loss": 0.0038, "step": 39380 }, { "epoch": 0.6445226212877362, "grad_norm": 0.12128560990095139, "learning_rate": 9.603624928845796e-06, "loss": 0.0051, "step": 39390 }, { "epoch": 0.6446862472388121, "grad_norm": 0.14154492318630219, "learning_rate": 9.603253386853533e-06, "loss": 0.0022, "step": 39400 }, { "epoch": 0.644849873189888, "grad_norm": 0.34802916646003723, "learning_rate": 9.602881678004398e-06, "loss": 0.0057, "step": 39410 }, { "epoch": 0.6450134991409637, "grad_norm": 0.15548211336135864, "learning_rate": 9.602509802311865e-06, "loss": 0.0061, "step": 39420 }, { "epoch": 0.6451771250920396, "grad_norm": 0.11623035371303558, "learning_rate": 9.602137759789415e-06, "loss": 0.0031, "step": 39430 }, { "epoch": 0.6453407510431154, "grad_norm": 0.13361559808254242, "learning_rate": 9.60176555045053e-06, "loss": 0.0032, "step": 39440 }, { "epoch": 0.6455043769941913, "grad_norm": 0.36541488766670227, "learning_rate": 9.601393174308705e-06, "loss": 0.0031, "step": 39450 }, { "epoch": 0.6456680029452672, "grad_norm": 0.18346063792705536, "learning_rate": 9.601020631377435e-06, "loss": 0.005, "step": 39460 }, { "epoch": 0.6458316288963429, "grad_norm": 0.251495897769928, "learning_rate": 9.600647921670227e-06, "loss": 0.0071, "step": 39470 }, { "epoch": 0.6459952548474188, "grad_norm": 0.3548803925514221, "learning_rate": 9.600275045200589e-06, "loss": 0.0049, "step": 39480 }, { "epoch": 0.6461588807984946, "grad_norm": 0.09060274064540863, "learning_rate": 9.599902001982035e-06, "loss": 0.0038, "step": 39490 }, { "epoch": 0.6463225067495705, "grad_norm": 0.23502935469150543, "learning_rate": 9.599528792028091e-06, "loss": 0.0043, "step": 39500 }, { "epoch": 0.6464861327006464, "grad_norm": 0.18305960297584534, "learning_rate": 9.599155415352284e-06, "loss": 0.0088, "step": 39510 }, { "epoch": 0.6466497586517221, "grad_norm": 0.19083371758460999, "learning_rate": 9.598781871968145e-06, "loss": 0.0037, "step": 39520 }, { "epoch": 0.646813384602798, "grad_norm": 0.189573273062706, "learning_rate": 9.598408161889217e-06, "loss": 0.0045, "step": 39530 }, { "epoch": 0.6469770105538738, "grad_norm": 0.14892877638339996, "learning_rate": 9.598034285129045e-06, "loss": 0.0042, "step": 39540 }, { "epoch": 0.6471406365049497, "grad_norm": 0.32697221636772156, "learning_rate": 9.59766024170118e-06, "loss": 0.0058, "step": 39550 }, { "epoch": 0.6473042624560256, "grad_norm": 0.39728957414627075, "learning_rate": 9.597286031619185e-06, "loss": 0.0036, "step": 39560 }, { "epoch": 0.6474678884071013, "grad_norm": 0.12987461686134338, "learning_rate": 9.596911654896616e-06, "loss": 0.0053, "step": 39570 }, { "epoch": 0.6476315143581772, "grad_norm": 0.0808095708489418, "learning_rate": 9.596537111547051e-06, "loss": 0.0033, "step": 39580 }, { "epoch": 0.647795140309253, "grad_norm": 0.10307874530553818, "learning_rate": 9.596162401584062e-06, "loss": 0.0047, "step": 39590 }, { "epoch": 0.6479587662603289, "grad_norm": 0.11977291107177734, "learning_rate": 9.595787525021234e-06, "loss": 0.003, "step": 39600 }, { "epoch": 0.6481223922114048, "grad_norm": 0.18306361138820648, "learning_rate": 9.595412481872152e-06, "loss": 0.0036, "step": 39610 }, { "epoch": 0.6482860181624805, "grad_norm": 0.25254836678504944, "learning_rate": 9.595037272150414e-06, "loss": 0.0056, "step": 39620 }, { "epoch": 0.6484496441135564, "grad_norm": 0.1175762414932251, "learning_rate": 9.594661895869618e-06, "loss": 0.0041, "step": 39630 }, { "epoch": 0.6486132700646322, "grad_norm": 0.1297464370727539, "learning_rate": 9.594286353043369e-06, "loss": 0.0029, "step": 39640 }, { "epoch": 0.6487768960157081, "grad_norm": 0.1492602527141571, "learning_rate": 9.593910643685284e-06, "loss": 0.0031, "step": 39650 }, { "epoch": 0.648940521966784, "grad_norm": 0.146444633603096, "learning_rate": 9.59353476780898e-06, "loss": 0.0026, "step": 39660 }, { "epoch": 0.6491041479178598, "grad_norm": 0.15150833129882812, "learning_rate": 9.59315872542808e-06, "loss": 0.0038, "step": 39670 }, { "epoch": 0.6492677738689356, "grad_norm": 0.10827313363552094, "learning_rate": 9.592782516556216e-06, "loss": 0.0039, "step": 39680 }, { "epoch": 0.6494313998200114, "grad_norm": 0.17238198220729828, "learning_rate": 9.592406141207024e-06, "loss": 0.0026, "step": 39690 }, { "epoch": 0.6495950257710873, "grad_norm": 0.05940546840429306, "learning_rate": 9.592029599394147e-06, "loss": 0.0029, "step": 39700 }, { "epoch": 0.6497586517221632, "grad_norm": 0.12967193126678467, "learning_rate": 9.591652891131233e-06, "loss": 0.004, "step": 39710 }, { "epoch": 0.649922277673239, "grad_norm": 0.33647042512893677, "learning_rate": 9.591276016431938e-06, "loss": 0.0027, "step": 39720 }, { "epoch": 0.6500859036243148, "grad_norm": 0.09277136623859406, "learning_rate": 9.590898975309924e-06, "loss": 0.0029, "step": 39730 }, { "epoch": 0.6502495295753906, "grad_norm": 0.17488396167755127, "learning_rate": 9.590521767778853e-06, "loss": 0.0035, "step": 39740 }, { "epoch": 0.6504131555264665, "grad_norm": 0.050710029900074005, "learning_rate": 9.590144393852403e-06, "loss": 0.0066, "step": 39750 }, { "epoch": 0.6505767814775424, "grad_norm": 0.013859692960977554, "learning_rate": 9.589766853544248e-06, "loss": 0.0037, "step": 39760 }, { "epoch": 0.6507404074286182, "grad_norm": 0.43363457918167114, "learning_rate": 9.58938914686808e-06, "loss": 0.0035, "step": 39770 }, { "epoch": 0.650904033379694, "grad_norm": 0.1285753697156906, "learning_rate": 9.589011273837582e-06, "loss": 0.0035, "step": 39780 }, { "epoch": 0.6510676593307698, "grad_norm": 0.14213183522224426, "learning_rate": 9.588633234466459e-06, "loss": 0.0049, "step": 39790 }, { "epoch": 0.6512312852818457, "grad_norm": 0.13648316264152527, "learning_rate": 9.588255028768405e-06, "loss": 0.0048, "step": 39800 }, { "epoch": 0.6513949112329216, "grad_norm": 0.22802965342998505, "learning_rate": 9.587876656757136e-06, "loss": 0.0037, "step": 39810 }, { "epoch": 0.6515585371839974, "grad_norm": 0.06534487754106522, "learning_rate": 9.587498118446365e-06, "loss": 0.0057, "step": 39820 }, { "epoch": 0.6517221631350733, "grad_norm": 0.17357707023620605, "learning_rate": 9.587119413849812e-06, "loss": 0.005, "step": 39830 }, { "epoch": 0.651885789086149, "grad_norm": 0.15947392582893372, "learning_rate": 9.586740542981206e-06, "loss": 0.003, "step": 39840 }, { "epoch": 0.6520494150372249, "grad_norm": 0.1313660442829132, "learning_rate": 9.58636150585428e-06, "loss": 0.0034, "step": 39850 }, { "epoch": 0.6522130409883008, "grad_norm": 0.12863187491893768, "learning_rate": 9.585982302482772e-06, "loss": 0.0044, "step": 39860 }, { "epoch": 0.6523766669393766, "grad_norm": 0.2548704147338867, "learning_rate": 9.585602932880427e-06, "loss": 0.0073, "step": 39870 }, { "epoch": 0.6525402928904525, "grad_norm": 0.2391563504934311, "learning_rate": 9.585223397060996e-06, "loss": 0.0042, "step": 39880 }, { "epoch": 0.6527039188415282, "grad_norm": 0.41539669036865234, "learning_rate": 9.58484369503824e-06, "loss": 0.0049, "step": 39890 }, { "epoch": 0.6528675447926041, "grad_norm": 0.2751244604587555, "learning_rate": 9.584463826825916e-06, "loss": 0.0028, "step": 39900 }, { "epoch": 0.65303117074368, "grad_norm": 0.09683585911989212, "learning_rate": 9.584083792437798e-06, "loss": 0.006, "step": 39910 }, { "epoch": 0.6531947966947558, "grad_norm": 0.2703448235988617, "learning_rate": 9.583703591887661e-06, "loss": 0.0045, "step": 39920 }, { "epoch": 0.6533584226458317, "grad_norm": 0.08823627978563309, "learning_rate": 9.583323225189286e-06, "loss": 0.0027, "step": 39930 }, { "epoch": 0.6535220485969074, "grad_norm": 0.5508773922920227, "learning_rate": 9.582942692356458e-06, "loss": 0.0031, "step": 39940 }, { "epoch": 0.6536856745479833, "grad_norm": 0.08115766197443008, "learning_rate": 9.582561993402972e-06, "loss": 0.0047, "step": 39950 }, { "epoch": 0.6538493004990592, "grad_norm": 0.11804918199777603, "learning_rate": 9.58218112834263e-06, "loss": 0.005, "step": 39960 }, { "epoch": 0.654012926450135, "grad_norm": 0.2557043731212616, "learning_rate": 9.581800097189233e-06, "loss": 0.0039, "step": 39970 }, { "epoch": 0.6541765524012109, "grad_norm": 0.1431952267885208, "learning_rate": 9.581418899956594e-06, "loss": 0.0056, "step": 39980 }, { "epoch": 0.6543401783522866, "grad_norm": 0.5375110507011414, "learning_rate": 9.581037536658533e-06, "loss": 0.0045, "step": 39990 }, { "epoch": 0.6545038043033625, "grad_norm": 0.42734554409980774, "learning_rate": 9.58065600730887e-06, "loss": 0.0024, "step": 40000 }, { "epoch": 0.6546674302544384, "grad_norm": 0.19054429233074188, "learning_rate": 9.580274311921437e-06, "loss": 0.0034, "step": 40010 }, { "epoch": 0.6548310562055142, "grad_norm": 0.14221473038196564, "learning_rate": 9.57989245051007e-06, "loss": 0.006, "step": 40020 }, { "epoch": 0.6549946821565901, "grad_norm": 0.14326852560043335, "learning_rate": 9.579510423088608e-06, "loss": 0.0045, "step": 40030 }, { "epoch": 0.6551583081076658, "grad_norm": 0.2570078670978546, "learning_rate": 9.579128229670898e-06, "loss": 0.0032, "step": 40040 }, { "epoch": 0.6553219340587417, "grad_norm": 0.08198045194149017, "learning_rate": 9.578745870270796e-06, "loss": 0.0029, "step": 40050 }, { "epoch": 0.6554855600098175, "grad_norm": 0.3780317008495331, "learning_rate": 9.578363344902162e-06, "loss": 0.0038, "step": 40060 }, { "epoch": 0.6556491859608934, "grad_norm": 0.2337639480829239, "learning_rate": 9.57798065357886e-06, "loss": 0.0049, "step": 40070 }, { "epoch": 0.6558128119119693, "grad_norm": 0.07617715746164322, "learning_rate": 9.577597796314762e-06, "loss": 0.0032, "step": 40080 }, { "epoch": 0.655976437863045, "grad_norm": 0.09904934465885162, "learning_rate": 9.577214773123746e-06, "loss": 0.0024, "step": 40090 }, { "epoch": 0.6561400638141209, "grad_norm": 0.3449536859989166, "learning_rate": 9.576831584019697e-06, "loss": 0.0024, "step": 40100 }, { "epoch": 0.6563036897651967, "grad_norm": 0.34825918078422546, "learning_rate": 9.576448229016501e-06, "loss": 0.0029, "step": 40110 }, { "epoch": 0.6564673157162726, "grad_norm": 0.33838579058647156, "learning_rate": 9.576064708128056e-06, "loss": 0.0031, "step": 40120 }, { "epoch": 0.6566309416673485, "grad_norm": 0.07733172178268433, "learning_rate": 9.575681021368265e-06, "loss": 0.0058, "step": 40130 }, { "epoch": 0.6567945676184243, "grad_norm": 0.2668304443359375, "learning_rate": 9.575297168751035e-06, "loss": 0.0045, "step": 40140 }, { "epoch": 0.6569581935695001, "grad_norm": 0.5039167404174805, "learning_rate": 9.574913150290277e-06, "loss": 0.0094, "step": 40150 }, { "epoch": 0.6571218195205759, "grad_norm": 0.13782106339931488, "learning_rate": 9.574528965999913e-06, "loss": 0.0031, "step": 40160 }, { "epoch": 0.6572854454716518, "grad_norm": 0.2670063078403473, "learning_rate": 9.574144615893869e-06, "loss": 0.004, "step": 40170 }, { "epoch": 0.6574490714227277, "grad_norm": 0.08842339366674423, "learning_rate": 9.573760099986077e-06, "loss": 0.0041, "step": 40180 }, { "epoch": 0.6576126973738035, "grad_norm": 0.06852489709854126, "learning_rate": 9.573375418290474e-06, "loss": 0.0025, "step": 40190 }, { "epoch": 0.6577763233248793, "grad_norm": 0.11243218183517456, "learning_rate": 9.572990570821003e-06, "loss": 0.0057, "step": 40200 }, { "epoch": 0.6579399492759551, "grad_norm": 0.18241071701049805, "learning_rate": 9.572605557591614e-06, "loss": 0.004, "step": 40210 }, { "epoch": 0.658103575227031, "grad_norm": 0.18272794783115387, "learning_rate": 9.572220378616266e-06, "loss": 0.005, "step": 40220 }, { "epoch": 0.6582672011781069, "grad_norm": 0.13834288716316223, "learning_rate": 9.571835033908915e-06, "loss": 0.0028, "step": 40230 }, { "epoch": 0.6584308271291827, "grad_norm": 0.22063976526260376, "learning_rate": 9.571449523483536e-06, "loss": 0.0039, "step": 40240 }, { "epoch": 0.6585944530802585, "grad_norm": 0.0930066704750061, "learning_rate": 9.571063847354096e-06, "loss": 0.0041, "step": 40250 }, { "epoch": 0.6587580790313343, "grad_norm": 0.2133239358663559, "learning_rate": 9.570678005534579e-06, "loss": 0.0038, "step": 40260 }, { "epoch": 0.6589217049824102, "grad_norm": 0.042088355869054794, "learning_rate": 9.570291998038972e-06, "loss": 0.0021, "step": 40270 }, { "epoch": 0.6590853309334861, "grad_norm": 0.20709596574306488, "learning_rate": 9.56990582488126e-06, "loss": 0.0045, "step": 40280 }, { "epoch": 0.6592489568845619, "grad_norm": 0.2154361605644226, "learning_rate": 9.569519486075448e-06, "loss": 0.0053, "step": 40290 }, { "epoch": 0.6594125828356378, "grad_norm": 0.1612267941236496, "learning_rate": 9.569132981635536e-06, "loss": 0.0053, "step": 40300 }, { "epoch": 0.6595762087867135, "grad_norm": 0.25180354714393616, "learning_rate": 9.568746311575536e-06, "loss": 0.0033, "step": 40310 }, { "epoch": 0.6597398347377894, "grad_norm": 0.09668106585741043, "learning_rate": 9.568359475909464e-06, "loss": 0.0042, "step": 40320 }, { "epoch": 0.6599034606888653, "grad_norm": 0.09837563335895538, "learning_rate": 9.567972474651338e-06, "loss": 0.0062, "step": 40330 }, { "epoch": 0.6600670866399411, "grad_norm": 0.24022804200649261, "learning_rate": 9.56758530781519e-06, "loss": 0.0026, "step": 40340 }, { "epoch": 0.660230712591017, "grad_norm": 0.27872827649116516, "learning_rate": 9.567197975415052e-06, "loss": 0.004, "step": 40350 }, { "epoch": 0.6603943385420927, "grad_norm": 0.3612937927246094, "learning_rate": 9.566810477464965e-06, "loss": 0.0055, "step": 40360 }, { "epoch": 0.6605579644931686, "grad_norm": 0.21453024446964264, "learning_rate": 9.566422813978974e-06, "loss": 0.005, "step": 40370 }, { "epoch": 0.6607215904442445, "grad_norm": 0.03908712416887283, "learning_rate": 9.56603498497113e-06, "loss": 0.0035, "step": 40380 }, { "epoch": 0.6608852163953203, "grad_norm": 0.17099544405937195, "learning_rate": 9.565646990455494e-06, "loss": 0.0038, "step": 40390 }, { "epoch": 0.6610488423463962, "grad_norm": 0.0667608231306076, "learning_rate": 9.565258830446128e-06, "loss": 0.0029, "step": 40400 }, { "epoch": 0.6612124682974719, "grad_norm": 0.10545757412910461, "learning_rate": 9.5648705049571e-06, "loss": 0.0034, "step": 40410 }, { "epoch": 0.6613760942485478, "grad_norm": 0.1538141369819641, "learning_rate": 9.564482014002487e-06, "loss": 0.0033, "step": 40420 }, { "epoch": 0.6615397201996237, "grad_norm": 0.3816950023174286, "learning_rate": 9.564093357596373e-06, "loss": 0.0028, "step": 40430 }, { "epoch": 0.6617033461506995, "grad_norm": 0.2177911102771759, "learning_rate": 9.563704535752844e-06, "loss": 0.0041, "step": 40440 }, { "epoch": 0.6618669721017754, "grad_norm": 0.17686927318572998, "learning_rate": 9.563315548485993e-06, "loss": 0.0021, "step": 40450 }, { "epoch": 0.6620305980528511, "grad_norm": 0.37919172644615173, "learning_rate": 9.562926395809923e-06, "loss": 0.0038, "step": 40460 }, { "epoch": 0.662194224003927, "grad_norm": 0.41865721344947815, "learning_rate": 9.562537077738737e-06, "loss": 0.0032, "step": 40470 }, { "epoch": 0.6623578499550029, "grad_norm": 0.12198913097381592, "learning_rate": 9.562147594286549e-06, "loss": 0.0014, "step": 40480 }, { "epoch": 0.6625214759060787, "grad_norm": 0.08148010820150375, "learning_rate": 9.561757945467474e-06, "loss": 0.0026, "step": 40490 }, { "epoch": 0.6626851018571546, "grad_norm": 0.11657990515232086, "learning_rate": 9.561368131295637e-06, "loss": 0.0033, "step": 40500 }, { "epoch": 0.6628487278082303, "grad_norm": 0.3662223517894745, "learning_rate": 9.560978151785167e-06, "loss": 0.0061, "step": 40510 }, { "epoch": 0.6630123537593062, "grad_norm": 0.2581077218055725, "learning_rate": 9.560588006950204e-06, "loss": 0.0032, "step": 40520 }, { "epoch": 0.6631759797103821, "grad_norm": 0.30310264229774475, "learning_rate": 9.560197696804885e-06, "loss": 0.0043, "step": 40530 }, { "epoch": 0.6633396056614579, "grad_norm": 0.203278049826622, "learning_rate": 9.55980722136336e-06, "loss": 0.0027, "step": 40540 }, { "epoch": 0.6635032316125338, "grad_norm": 0.0290276687592268, "learning_rate": 9.559416580639783e-06, "loss": 0.0035, "step": 40550 }, { "epoch": 0.6636668575636095, "grad_norm": 0.2354142963886261, "learning_rate": 9.559025774648315e-06, "loss": 0.0042, "step": 40560 }, { "epoch": 0.6638304835146854, "grad_norm": 0.1471615433692932, "learning_rate": 9.558634803403117e-06, "loss": 0.0053, "step": 40570 }, { "epoch": 0.6639941094657613, "grad_norm": 0.21577584743499756, "learning_rate": 9.558243666918362e-06, "loss": 0.0042, "step": 40580 }, { "epoch": 0.6641577354168371, "grad_norm": 0.30954912304878235, "learning_rate": 9.557852365208234e-06, "loss": 0.0041, "step": 40590 }, { "epoch": 0.664321361367913, "grad_norm": 0.37253338098526, "learning_rate": 9.55746089828691e-06, "loss": 0.004, "step": 40600 }, { "epoch": 0.6644849873189888, "grad_norm": 0.17140115797519684, "learning_rate": 9.557069266168585e-06, "loss": 0.0045, "step": 40610 }, { "epoch": 0.6646486132700646, "grad_norm": 0.04907820373773575, "learning_rate": 9.556677468867448e-06, "loss": 0.0026, "step": 40620 }, { "epoch": 0.6648122392211405, "grad_norm": 0.02016725391149521, "learning_rate": 9.556285506397705e-06, "loss": 0.0041, "step": 40630 }, { "epoch": 0.6649758651722163, "grad_norm": 0.150553897023201, "learning_rate": 9.555893378773564e-06, "loss": 0.0031, "step": 40640 }, { "epoch": 0.6651394911232922, "grad_norm": 0.176218643784523, "learning_rate": 9.555501086009236e-06, "loss": 0.0047, "step": 40650 }, { "epoch": 0.665303117074368, "grad_norm": 0.1619766801595688, "learning_rate": 9.555108628118945e-06, "loss": 0.004, "step": 40660 }, { "epoch": 0.6654667430254438, "grad_norm": 0.3033839762210846, "learning_rate": 9.554716005116913e-06, "loss": 0.0035, "step": 40670 }, { "epoch": 0.6656303689765197, "grad_norm": 0.2725808620452881, "learning_rate": 9.554323217017373e-06, "loss": 0.005, "step": 40680 }, { "epoch": 0.6657939949275955, "grad_norm": 0.1550709456205368, "learning_rate": 9.553930263834563e-06, "loss": 0.0039, "step": 40690 }, { "epoch": 0.6659576208786714, "grad_norm": 0.36677852272987366, "learning_rate": 9.553537145582726e-06, "loss": 0.0031, "step": 40700 }, { "epoch": 0.6661212468297472, "grad_norm": 0.195937842130661, "learning_rate": 9.553143862276111e-06, "loss": 0.0053, "step": 40710 }, { "epoch": 0.666284872780823, "grad_norm": 0.22115139663219452, "learning_rate": 9.552750413928973e-06, "loss": 0.0043, "step": 40720 }, { "epoch": 0.6664484987318989, "grad_norm": 0.16133953630924225, "learning_rate": 9.552356800555575e-06, "loss": 0.0039, "step": 40730 }, { "epoch": 0.6666121246829747, "grad_norm": 0.1360553503036499, "learning_rate": 9.551963022170186e-06, "loss": 0.0038, "step": 40740 }, { "epoch": 0.6667757506340506, "grad_norm": 0.09650354832410812, "learning_rate": 9.551569078787077e-06, "loss": 0.0021, "step": 40750 }, { "epoch": 0.6669393765851264, "grad_norm": 0.08582047373056412, "learning_rate": 9.55117497042053e-06, "loss": 0.0026, "step": 40760 }, { "epoch": 0.6671030025362022, "grad_norm": 0.1923443078994751, "learning_rate": 9.550780697084826e-06, "loss": 0.0025, "step": 40770 }, { "epoch": 0.6672666284872781, "grad_norm": 0.311466246843338, "learning_rate": 9.55038625879426e-06, "loss": 0.0045, "step": 40780 }, { "epoch": 0.6674302544383539, "grad_norm": 0.1760646551847458, "learning_rate": 9.549991655563129e-06, "loss": 0.0026, "step": 40790 }, { "epoch": 0.6675938803894298, "grad_norm": 0.1887482851743698, "learning_rate": 9.549596887405736e-06, "loss": 0.0025, "step": 40800 }, { "epoch": 0.6677575063405056, "grad_norm": 0.2653856873512268, "learning_rate": 9.549201954336392e-06, "loss": 0.0041, "step": 40810 }, { "epoch": 0.6679211322915815, "grad_norm": 0.1052040383219719, "learning_rate": 9.548806856369407e-06, "loss": 0.0014, "step": 40820 }, { "epoch": 0.6680847582426573, "grad_norm": 0.07335983961820602, "learning_rate": 9.548411593519112e-06, "loss": 0.0025, "step": 40830 }, { "epoch": 0.6682483841937331, "grad_norm": 0.06771315634250641, "learning_rate": 9.548016165799824e-06, "loss": 0.0031, "step": 40840 }, { "epoch": 0.668412010144809, "grad_norm": 0.2092895209789276, "learning_rate": 9.547620573225882e-06, "loss": 0.0091, "step": 40850 }, { "epoch": 0.6685756360958848, "grad_norm": 0.14687207341194153, "learning_rate": 9.547224815811624e-06, "loss": 0.0051, "step": 40860 }, { "epoch": 0.6687392620469607, "grad_norm": 0.22625429928302765, "learning_rate": 9.546828893571395e-06, "loss": 0.0043, "step": 40870 }, { "epoch": 0.6689028879980365, "grad_norm": 0.10107409209012985, "learning_rate": 9.546432806519548e-06, "loss": 0.0035, "step": 40880 }, { "epoch": 0.6690665139491123, "grad_norm": 0.08284055441617966, "learning_rate": 9.546036554670438e-06, "loss": 0.0029, "step": 40890 }, { "epoch": 0.6692301399001882, "grad_norm": 0.033760856837034225, "learning_rate": 9.54564013803843e-06, "loss": 0.0036, "step": 40900 }, { "epoch": 0.669393765851264, "grad_norm": 0.08804108202457428, "learning_rate": 9.545243556637892e-06, "loss": 0.0031, "step": 40910 }, { "epoch": 0.6695573918023399, "grad_norm": 0.30136144161224365, "learning_rate": 9.544846810483197e-06, "loss": 0.003, "step": 40920 }, { "epoch": 0.6697210177534156, "grad_norm": 0.17357979714870453, "learning_rate": 9.54444989958873e-06, "loss": 0.0034, "step": 40930 }, { "epoch": 0.6698846437044915, "grad_norm": 0.11663365364074707, "learning_rate": 9.544052823968876e-06, "loss": 0.0063, "step": 40940 }, { "epoch": 0.6700482696555674, "grad_norm": 0.09595862776041031, "learning_rate": 9.54365558363803e-06, "loss": 0.0035, "step": 40950 }, { "epoch": 0.6702118956066432, "grad_norm": 0.0594622977077961, "learning_rate": 9.543258178610587e-06, "loss": 0.0089, "step": 40960 }, { "epoch": 0.6703755215577191, "grad_norm": 0.2683613896369934, "learning_rate": 9.542860608900958e-06, "loss": 0.0036, "step": 40970 }, { "epoch": 0.6705391475087948, "grad_norm": 0.15494564175605774, "learning_rate": 9.542462874523549e-06, "loss": 0.0022, "step": 40980 }, { "epoch": 0.6707027734598707, "grad_norm": 0.40871626138687134, "learning_rate": 9.542064975492776e-06, "loss": 0.0036, "step": 40990 }, { "epoch": 0.6708663994109466, "grad_norm": 0.13518080115318298, "learning_rate": 9.541666911823066e-06, "loss": 0.0038, "step": 41000 }, { "epoch": 0.6710300253620224, "grad_norm": 0.08975598216056824, "learning_rate": 9.541268683528846e-06, "loss": 0.0026, "step": 41010 }, { "epoch": 0.6711936513130983, "grad_norm": 0.19219611585140228, "learning_rate": 9.540870290624551e-06, "loss": 0.003, "step": 41020 }, { "epoch": 0.671357277264174, "grad_norm": 0.07628770172595978, "learning_rate": 9.54047173312462e-06, "loss": 0.003, "step": 41030 }, { "epoch": 0.6715209032152499, "grad_norm": 0.11276376247406006, "learning_rate": 9.540073011043504e-06, "loss": 0.0037, "step": 41040 }, { "epoch": 0.6716845291663258, "grad_norm": 0.2766047418117523, "learning_rate": 9.539674124395652e-06, "loss": 0.005, "step": 41050 }, { "epoch": 0.6718481551174016, "grad_norm": 0.040906086564064026, "learning_rate": 9.539275073195522e-06, "loss": 0.0022, "step": 41060 }, { "epoch": 0.6720117810684775, "grad_norm": 0.12412739545106888, "learning_rate": 9.538875857457583e-06, "loss": 0.0029, "step": 41070 }, { "epoch": 0.6721754070195533, "grad_norm": 0.10310149192810059, "learning_rate": 9.538476477196302e-06, "loss": 0.0037, "step": 41080 }, { "epoch": 0.6723390329706291, "grad_norm": 0.10124338418245316, "learning_rate": 9.538076932426157e-06, "loss": 0.0027, "step": 41090 }, { "epoch": 0.672502658921705, "grad_norm": 0.25430813431739807, "learning_rate": 9.53767722316163e-06, "loss": 0.0037, "step": 41100 }, { "epoch": 0.6726662848727808, "grad_norm": 0.15678854286670685, "learning_rate": 9.53727734941721e-06, "loss": 0.0043, "step": 41110 }, { "epoch": 0.6728299108238567, "grad_norm": 0.3007122576236725, "learning_rate": 9.536877311207391e-06, "loss": 0.0035, "step": 41120 }, { "epoch": 0.6729935367749325, "grad_norm": 0.2486082762479782, "learning_rate": 9.536477108546674e-06, "loss": 0.0037, "step": 41130 }, { "epoch": 0.6731571627260083, "grad_norm": 0.23905012011528015, "learning_rate": 9.536076741449563e-06, "loss": 0.0031, "step": 41140 }, { "epoch": 0.6733207886770842, "grad_norm": 0.08154893666505814, "learning_rate": 9.535676209930574e-06, "loss": 0.003, "step": 41150 }, { "epoch": 0.67348441462816, "grad_norm": 0.19384953379631042, "learning_rate": 9.535275514004222e-06, "loss": 0.0039, "step": 41160 }, { "epoch": 0.6736480405792359, "grad_norm": 0.16193881630897522, "learning_rate": 9.534874653685036e-06, "loss": 0.0033, "step": 41170 }, { "epoch": 0.6738116665303117, "grad_norm": 0.3026077449321747, "learning_rate": 9.534473628987542e-06, "loss": 0.0035, "step": 41180 }, { "epoch": 0.6739752924813875, "grad_norm": 0.12581059336662292, "learning_rate": 9.534072439926276e-06, "loss": 0.0042, "step": 41190 }, { "epoch": 0.6741389184324634, "grad_norm": 0.21835175156593323, "learning_rate": 9.533671086515781e-06, "loss": 0.0041, "step": 41200 }, { "epoch": 0.6743025443835392, "grad_norm": 0.0905032530426979, "learning_rate": 9.533269568770607e-06, "loss": 0.002, "step": 41210 }, { "epoch": 0.6744661703346151, "grad_norm": 0.3723595440387726, "learning_rate": 9.532867886705305e-06, "loss": 0.0044, "step": 41220 }, { "epoch": 0.6746297962856909, "grad_norm": 0.3325466513633728, "learning_rate": 9.532466040334438e-06, "loss": 0.0051, "step": 41230 }, { "epoch": 0.6747934222367667, "grad_norm": 0.2079477608203888, "learning_rate": 9.53206402967257e-06, "loss": 0.0044, "step": 41240 }, { "epoch": 0.6749570481878426, "grad_norm": 0.36569222807884216, "learning_rate": 9.531661854734275e-06, "loss": 0.0027, "step": 41250 }, { "epoch": 0.6751206741389184, "grad_norm": 0.42900118231773376, "learning_rate": 9.531259515534127e-06, "loss": 0.0081, "step": 41260 }, { "epoch": 0.6752843000899943, "grad_norm": 0.17705038189888, "learning_rate": 9.530857012086712e-06, "loss": 0.0048, "step": 41270 }, { "epoch": 0.6754479260410701, "grad_norm": 0.17323188483715057, "learning_rate": 9.53045434440662e-06, "loss": 0.0028, "step": 41280 }, { "epoch": 0.675611551992146, "grad_norm": 0.1745935082435608, "learning_rate": 9.530051512508448e-06, "loss": 0.0061, "step": 41290 }, { "epoch": 0.6757751779432218, "grad_norm": 0.21723127365112305, "learning_rate": 9.529648516406798e-06, "loss": 0.0041, "step": 41300 }, { "epoch": 0.6759388038942976, "grad_norm": 0.2178402692079544, "learning_rate": 9.529245356116273e-06, "loss": 0.0049, "step": 41310 }, { "epoch": 0.6761024298453735, "grad_norm": 0.205748051404953, "learning_rate": 9.528842031651491e-06, "loss": 0.0031, "step": 41320 }, { "epoch": 0.6762660557964493, "grad_norm": 0.04753841832280159, "learning_rate": 9.52843854302707e-06, "loss": 0.0042, "step": 41330 }, { "epoch": 0.6764296817475252, "grad_norm": 0.1326904147863388, "learning_rate": 9.528034890257635e-06, "loss": 0.0066, "step": 41340 }, { "epoch": 0.676593307698601, "grad_norm": 0.11043822765350342, "learning_rate": 9.527631073357818e-06, "loss": 0.0021, "step": 41350 }, { "epoch": 0.6767569336496768, "grad_norm": 0.4312784671783447, "learning_rate": 9.527227092342258e-06, "loss": 0.0027, "step": 41360 }, { "epoch": 0.6769205596007527, "grad_norm": 0.07799684256315231, "learning_rate": 9.526822947225597e-06, "loss": 0.0018, "step": 41370 }, { "epoch": 0.6770841855518285, "grad_norm": 0.39989206194877625, "learning_rate": 9.52641863802248e-06, "loss": 0.0043, "step": 41380 }, { "epoch": 0.6772478115029044, "grad_norm": 0.17345643043518066, "learning_rate": 9.52601416474757e-06, "loss": 0.0022, "step": 41390 }, { "epoch": 0.6774114374539802, "grad_norm": 0.040480829775333405, "learning_rate": 9.525609527415524e-06, "loss": 0.0029, "step": 41400 }, { "epoch": 0.677575063405056, "grad_norm": 0.2246304303407669, "learning_rate": 9.525204726041009e-06, "loss": 0.0058, "step": 41410 }, { "epoch": 0.6777386893561319, "grad_norm": 0.19318869709968567, "learning_rate": 9.524799760638698e-06, "loss": 0.004, "step": 41420 }, { "epoch": 0.6779023153072077, "grad_norm": 0.1640588790178299, "learning_rate": 9.524394631223272e-06, "loss": 0.0027, "step": 41430 }, { "epoch": 0.6780659412582836, "grad_norm": 0.15712791681289673, "learning_rate": 9.523989337809417e-06, "loss": 0.003, "step": 41440 }, { "epoch": 0.6782295672093595, "grad_norm": 0.2708931863307953, "learning_rate": 9.523583880411817e-06, "loss": 0.0022, "step": 41450 }, { "epoch": 0.6783931931604352, "grad_norm": 0.13915149867534637, "learning_rate": 9.523178259045176e-06, "loss": 0.0047, "step": 41460 }, { "epoch": 0.6785568191115111, "grad_norm": 0.13243943452835083, "learning_rate": 9.522772473724194e-06, "loss": 0.0039, "step": 41470 }, { "epoch": 0.6787204450625869, "grad_norm": 0.24703291058540344, "learning_rate": 9.52236652446358e-06, "loss": 0.0035, "step": 41480 }, { "epoch": 0.6788840710136628, "grad_norm": 0.1995680183172226, "learning_rate": 9.521960411278049e-06, "loss": 0.0041, "step": 41490 }, { "epoch": 0.6790476969647387, "grad_norm": 0.19879138469696045, "learning_rate": 9.52155413418232e-06, "loss": 0.006, "step": 41500 }, { "epoch": 0.6792113229158144, "grad_norm": 0.32783085107803345, "learning_rate": 9.521147693191121e-06, "loss": 0.0027, "step": 41510 }, { "epoch": 0.6793749488668903, "grad_norm": 0.16779537498950958, "learning_rate": 9.520741088319188e-06, "loss": 0.0024, "step": 41520 }, { "epoch": 0.6795385748179661, "grad_norm": 0.12358634173870087, "learning_rate": 9.520334319581252e-06, "loss": 0.0024, "step": 41530 }, { "epoch": 0.679702200769042, "grad_norm": 0.22999624907970428, "learning_rate": 9.519927386992063e-06, "loss": 0.0045, "step": 41540 }, { "epoch": 0.6798658267201179, "grad_norm": 0.11387041956186295, "learning_rate": 9.51952029056637e-06, "loss": 0.0034, "step": 41550 }, { "epoch": 0.6800294526711936, "grad_norm": 0.25392088294029236, "learning_rate": 9.519113030318927e-06, "loss": 0.0057, "step": 41560 }, { "epoch": 0.6801930786222695, "grad_norm": 0.4185047745704651, "learning_rate": 9.518705606264497e-06, "loss": 0.004, "step": 41570 }, { "epoch": 0.6803567045733453, "grad_norm": 0.16185013949871063, "learning_rate": 9.518298018417852e-06, "loss": 0.003, "step": 41580 }, { "epoch": 0.6805203305244212, "grad_norm": 0.18516352772712708, "learning_rate": 9.517890266793762e-06, "loss": 0.0035, "step": 41590 }, { "epoch": 0.6806839564754971, "grad_norm": 0.2935173213481903, "learning_rate": 9.51748235140701e-06, "loss": 0.0034, "step": 41600 }, { "epoch": 0.6808475824265728, "grad_norm": 0.09066560119390488, "learning_rate": 9.517074272272378e-06, "loss": 0.0041, "step": 41610 }, { "epoch": 0.6810112083776487, "grad_norm": 0.8889887928962708, "learning_rate": 9.516666029404661e-06, "loss": 0.0043, "step": 41620 }, { "epoch": 0.6811748343287245, "grad_norm": 0.3778586983680725, "learning_rate": 9.516257622818657e-06, "loss": 0.0034, "step": 41630 }, { "epoch": 0.6813384602798004, "grad_norm": 0.13633938133716583, "learning_rate": 9.515849052529167e-06, "loss": 0.0061, "step": 41640 }, { "epoch": 0.6815020862308763, "grad_norm": 0.1451253741979599, "learning_rate": 9.515440318551002e-06, "loss": 0.0036, "step": 41650 }, { "epoch": 0.681665712181952, "grad_norm": 0.06916622817516327, "learning_rate": 9.515031420898979e-06, "loss": 0.0037, "step": 41660 }, { "epoch": 0.6818293381330279, "grad_norm": 0.11693932861089706, "learning_rate": 9.51462235958792e-06, "loss": 0.0038, "step": 41670 }, { "epoch": 0.6819929640841037, "grad_norm": 0.1786661595106125, "learning_rate": 9.51421313463265e-06, "loss": 0.0041, "step": 41680 }, { "epoch": 0.6821565900351796, "grad_norm": 0.17368175089359283, "learning_rate": 9.513803746048002e-06, "loss": 0.0021, "step": 41690 }, { "epoch": 0.6823202159862555, "grad_norm": 0.18363507091999054, "learning_rate": 9.51339419384882e-06, "loss": 0.0023, "step": 41700 }, { "epoch": 0.6824838419373312, "grad_norm": 0.2775869369506836, "learning_rate": 9.512984478049942e-06, "loss": 0.0036, "step": 41710 }, { "epoch": 0.6826474678884071, "grad_norm": 0.3735685348510742, "learning_rate": 9.512574598666226e-06, "loss": 0.0023, "step": 41720 }, { "epoch": 0.6828110938394829, "grad_norm": 0.13980357348918915, "learning_rate": 9.512164555712526e-06, "loss": 0.0022, "step": 41730 }, { "epoch": 0.6829747197905588, "grad_norm": 0.19643956422805786, "learning_rate": 9.511754349203705e-06, "loss": 0.0029, "step": 41740 }, { "epoch": 0.6831383457416347, "grad_norm": 0.07771702855825424, "learning_rate": 9.511343979154635e-06, "loss": 0.0062, "step": 41750 }, { "epoch": 0.6833019716927105, "grad_norm": 0.05773426964879036, "learning_rate": 9.510933445580185e-06, "loss": 0.0021, "step": 41760 }, { "epoch": 0.6834655976437863, "grad_norm": 0.2177582085132599, "learning_rate": 9.510522748495243e-06, "loss": 0.0037, "step": 41770 }, { "epoch": 0.6836292235948621, "grad_norm": 0.19544053077697754, "learning_rate": 9.51011188791469e-06, "loss": 0.0033, "step": 41780 }, { "epoch": 0.683792849545938, "grad_norm": 0.11291540414094925, "learning_rate": 9.50970086385342e-06, "loss": 0.0076, "step": 41790 }, { "epoch": 0.6839564754970138, "grad_norm": 0.18975688517093658, "learning_rate": 9.509289676326334e-06, "loss": 0.0033, "step": 41800 }, { "epoch": 0.6841201014480897, "grad_norm": 0.22611401975154877, "learning_rate": 9.508878325348334e-06, "loss": 0.003, "step": 41810 }, { "epoch": 0.6842837273991655, "grad_norm": 0.3169178068637848, "learning_rate": 9.508466810934332e-06, "loss": 0.0052, "step": 41820 }, { "epoch": 0.6844473533502413, "grad_norm": 0.363726407289505, "learning_rate": 9.508055133099244e-06, "loss": 0.0045, "step": 41830 }, { "epoch": 0.6846109793013172, "grad_norm": 0.14350302517414093, "learning_rate": 9.507643291857992e-06, "loss": 0.0036, "step": 41840 }, { "epoch": 0.684774605252393, "grad_norm": 0.12181004881858826, "learning_rate": 9.507231287225504e-06, "loss": 0.0031, "step": 41850 }, { "epoch": 0.6849382312034689, "grad_norm": 0.04048116132616997, "learning_rate": 9.506819119216717e-06, "loss": 0.0029, "step": 41860 }, { "epoch": 0.6851018571545447, "grad_norm": 0.24783559143543243, "learning_rate": 9.506406787846567e-06, "loss": 0.0021, "step": 41870 }, { "epoch": 0.6852654831056205, "grad_norm": 0.07389844954013824, "learning_rate": 9.50599429313e-06, "loss": 0.0067, "step": 41880 }, { "epoch": 0.6854291090566964, "grad_norm": 0.3724473714828491, "learning_rate": 9.505581635081972e-06, "loss": 0.0034, "step": 41890 }, { "epoch": 0.6855927350077722, "grad_norm": 0.5259312391281128, "learning_rate": 9.50516881371744e-06, "loss": 0.0052, "step": 41900 }, { "epoch": 0.6857563609588481, "grad_norm": 0.19595074653625488, "learning_rate": 9.504755829051364e-06, "loss": 0.0034, "step": 41910 }, { "epoch": 0.685919986909924, "grad_norm": 0.2707808017730713, "learning_rate": 9.504342681098717e-06, "loss": 0.0037, "step": 41920 }, { "epoch": 0.6860836128609997, "grad_norm": 0.1302749514579773, "learning_rate": 9.503929369874473e-06, "loss": 0.0042, "step": 41930 }, { "epoch": 0.6862472388120756, "grad_norm": 0.16718685626983643, "learning_rate": 9.503515895393615e-06, "loss": 0.0044, "step": 41940 }, { "epoch": 0.6864108647631514, "grad_norm": 0.25853249430656433, "learning_rate": 9.503102257671128e-06, "loss": 0.0032, "step": 41950 }, { "epoch": 0.6865744907142273, "grad_norm": 0.13117872178554535, "learning_rate": 9.502688456722008e-06, "loss": 0.0049, "step": 41960 }, { "epoch": 0.6867381166653032, "grad_norm": 0.3355722725391388, "learning_rate": 9.502274492561256e-06, "loss": 0.0039, "step": 41970 }, { "epoch": 0.6869017426163789, "grad_norm": 0.11014098674058914, "learning_rate": 9.501860365203873e-06, "loss": 0.0035, "step": 41980 }, { "epoch": 0.6870653685674548, "grad_norm": 0.3031855523586273, "learning_rate": 9.501446074664871e-06, "loss": 0.0061, "step": 41990 }, { "epoch": 0.6872289945185306, "grad_norm": 0.11191651225090027, "learning_rate": 9.501031620959267e-06, "loss": 0.0039, "step": 42000 }, { "epoch": 0.6873926204696065, "grad_norm": 0.2681221663951874, "learning_rate": 9.500617004102086e-06, "loss": 0.0042, "step": 42010 }, { "epoch": 0.6875562464206824, "grad_norm": 0.11693637073040009, "learning_rate": 9.500202224108355e-06, "loss": 0.0025, "step": 42020 }, { "epoch": 0.6877198723717581, "grad_norm": 0.2281288355588913, "learning_rate": 9.499787280993109e-06, "loss": 0.0023, "step": 42030 }, { "epoch": 0.687883498322834, "grad_norm": 0.13584215939044952, "learning_rate": 9.499372174771389e-06, "loss": 0.0045, "step": 42040 }, { "epoch": 0.6880471242739098, "grad_norm": 0.3787764310836792, "learning_rate": 9.498956905458244e-06, "loss": 0.0041, "step": 42050 }, { "epoch": 0.6882107502249857, "grad_norm": 0.2026219516992569, "learning_rate": 9.49854147306872e-06, "loss": 0.0036, "step": 42060 }, { "epoch": 0.6883743761760616, "grad_norm": 0.18095897138118744, "learning_rate": 9.498125877617884e-06, "loss": 0.0042, "step": 42070 }, { "epoch": 0.6885380021271373, "grad_norm": 0.1485137939453125, "learning_rate": 9.497710119120793e-06, "loss": 0.0048, "step": 42080 }, { "epoch": 0.6887016280782132, "grad_norm": 0.32534754276275635, "learning_rate": 9.49729419759252e-06, "loss": 0.0055, "step": 42090 }, { "epoch": 0.688865254029289, "grad_norm": 0.0838620588183403, "learning_rate": 9.496878113048143e-06, "loss": 0.0018, "step": 42100 }, { "epoch": 0.6890288799803649, "grad_norm": 4.149435520172119, "learning_rate": 9.49646186550274e-06, "loss": 0.008, "step": 42110 }, { "epoch": 0.6891925059314408, "grad_norm": 0.23951877653598785, "learning_rate": 9.496045454971403e-06, "loss": 0.0032, "step": 42120 }, { "epoch": 0.6893561318825165, "grad_norm": 0.2085265964269638, "learning_rate": 9.495628881469225e-06, "loss": 0.0052, "step": 42130 }, { "epoch": 0.6895197578335924, "grad_norm": 0.30439940094947815, "learning_rate": 9.495212145011303e-06, "loss": 0.0052, "step": 42140 }, { "epoch": 0.6896833837846682, "grad_norm": 0.05532262846827507, "learning_rate": 9.494795245612747e-06, "loss": 0.0028, "step": 42150 }, { "epoch": 0.6898470097357441, "grad_norm": 0.20707818865776062, "learning_rate": 9.494378183288665e-06, "loss": 0.006, "step": 42160 }, { "epoch": 0.69001063568682, "grad_norm": 0.09967313706874847, "learning_rate": 9.493960958054175e-06, "loss": 0.0035, "step": 42170 }, { "epoch": 0.6901742616378957, "grad_norm": 0.20637306571006775, "learning_rate": 9.493543569924403e-06, "loss": 0.0021, "step": 42180 }, { "epoch": 0.6903378875889716, "grad_norm": 0.2893702983856201, "learning_rate": 9.493126018914476e-06, "loss": 0.0061, "step": 42190 }, { "epoch": 0.6905015135400474, "grad_norm": 0.2046521008014679, "learning_rate": 9.492708305039529e-06, "loss": 0.0041, "step": 42200 }, { "epoch": 0.6906651394911233, "grad_norm": 0.18706543743610382, "learning_rate": 9.492290428314703e-06, "loss": 0.0026, "step": 42210 }, { "epoch": 0.6908287654421992, "grad_norm": 0.31529700756073, "learning_rate": 9.491872388755148e-06, "loss": 0.0025, "step": 42220 }, { "epoch": 0.690992391393275, "grad_norm": 0.17167064547538757, "learning_rate": 9.491454186376015e-06, "loss": 0.0049, "step": 42230 }, { "epoch": 0.6911560173443508, "grad_norm": 0.25095802545547485, "learning_rate": 9.49103582119246e-06, "loss": 0.002, "step": 42240 }, { "epoch": 0.6913196432954266, "grad_norm": 0.11241482198238373, "learning_rate": 9.490617293219652e-06, "loss": 0.002, "step": 42250 }, { "epoch": 0.6914832692465025, "grad_norm": 0.36931461095809937, "learning_rate": 9.49019860247276e-06, "loss": 0.004, "step": 42260 }, { "epoch": 0.6916468951975784, "grad_norm": 0.06430893391370773, "learning_rate": 9.489779748966962e-06, "loss": 0.0058, "step": 42270 }, { "epoch": 0.6918105211486542, "grad_norm": 0.19548408687114716, "learning_rate": 9.489360732717438e-06, "loss": 0.0058, "step": 42280 }, { "epoch": 0.69197414709973, "grad_norm": 0.4175778329372406, "learning_rate": 9.488941553739377e-06, "loss": 0.0074, "step": 42290 }, { "epoch": 0.6921377730508058, "grad_norm": 0.1676895022392273, "learning_rate": 9.488522212047972e-06, "loss": 0.0047, "step": 42300 }, { "epoch": 0.6923013990018817, "grad_norm": 0.09545018523931503, "learning_rate": 9.488102707658428e-06, "loss": 0.0037, "step": 42310 }, { "epoch": 0.6924650249529576, "grad_norm": 0.15663392841815948, "learning_rate": 9.487683040585945e-06, "loss": 0.0036, "step": 42320 }, { "epoch": 0.6926286509040334, "grad_norm": 0.23286955058574677, "learning_rate": 9.487263210845739e-06, "loss": 0.0034, "step": 42330 }, { "epoch": 0.6927922768551092, "grad_norm": 0.13943170011043549, "learning_rate": 9.486843218453026e-06, "loss": 0.0033, "step": 42340 }, { "epoch": 0.692955902806185, "grad_norm": 0.15861579775810242, "learning_rate": 9.486423063423031e-06, "loss": 0.0031, "step": 42350 }, { "epoch": 0.6931195287572609, "grad_norm": 0.09583029896020889, "learning_rate": 9.48600274577098e-06, "loss": 0.0042, "step": 42360 }, { "epoch": 0.6932831547083368, "grad_norm": 0.11641132831573486, "learning_rate": 9.485582265512115e-06, "loss": 0.0039, "step": 42370 }, { "epoch": 0.6934467806594126, "grad_norm": 0.2458818256855011, "learning_rate": 9.485161622661672e-06, "loss": 0.0033, "step": 42380 }, { "epoch": 0.6936104066104885, "grad_norm": 0.07750624418258667, "learning_rate": 9.4847408172349e-06, "loss": 0.003, "step": 42390 }, { "epoch": 0.6937740325615642, "grad_norm": 0.22216324508190155, "learning_rate": 9.484319849247053e-06, "loss": 0.0043, "step": 42400 }, { "epoch": 0.6939376585126401, "grad_norm": 0.050752464681863785, "learning_rate": 9.483898718713389e-06, "loss": 0.0024, "step": 42410 }, { "epoch": 0.694101284463716, "grad_norm": 0.31082597374916077, "learning_rate": 9.48347742564917e-06, "loss": 0.0026, "step": 42420 }, { "epoch": 0.6942649104147918, "grad_norm": 0.192427858710289, "learning_rate": 9.483055970069674e-06, "loss": 0.0031, "step": 42430 }, { "epoch": 0.6944285363658677, "grad_norm": 0.25390905141830444, "learning_rate": 9.482634351990172e-06, "loss": 0.0025, "step": 42440 }, { "epoch": 0.6945921623169434, "grad_norm": 0.1646750420331955, "learning_rate": 9.482212571425951e-06, "loss": 0.0033, "step": 42450 }, { "epoch": 0.6947557882680193, "grad_norm": 0.07395486533641815, "learning_rate": 9.481790628392294e-06, "loss": 0.0026, "step": 42460 }, { "epoch": 0.6949194142190952, "grad_norm": 0.12713567912578583, "learning_rate": 9.481368522904499e-06, "loss": 0.0054, "step": 42470 }, { "epoch": 0.695083040170171, "grad_norm": 0.18395251035690308, "learning_rate": 9.480946254977866e-06, "loss": 0.0055, "step": 42480 }, { "epoch": 0.6952466661212469, "grad_norm": 0.0654275119304657, "learning_rate": 9.4805238246277e-06, "loss": 0.0052, "step": 42490 }, { "epoch": 0.6954102920723226, "grad_norm": 0.218764066696167, "learning_rate": 9.480101231869317e-06, "loss": 0.0023, "step": 42500 }, { "epoch": 0.6955739180233985, "grad_norm": 0.12461761385202408, "learning_rate": 9.47967847671803e-06, "loss": 0.0038, "step": 42510 }, { "epoch": 0.6957375439744744, "grad_norm": 0.31489256024360657, "learning_rate": 9.479255559189164e-06, "loss": 0.003, "step": 42520 }, { "epoch": 0.6959011699255502, "grad_norm": 0.3809584081172943, "learning_rate": 9.478832479298051e-06, "loss": 0.0022, "step": 42530 }, { "epoch": 0.6960647958766261, "grad_norm": 0.19557444751262665, "learning_rate": 9.478409237060024e-06, "loss": 0.0034, "step": 42540 }, { "epoch": 0.6962284218277018, "grad_norm": 0.285917729139328, "learning_rate": 9.477985832490427e-06, "loss": 0.0059, "step": 42550 }, { "epoch": 0.6963920477787777, "grad_norm": 0.10538316518068314, "learning_rate": 9.477562265604606e-06, "loss": 0.003, "step": 42560 }, { "epoch": 0.6965556737298536, "grad_norm": 0.08849726617336273, "learning_rate": 9.477138536417914e-06, "loss": 0.0033, "step": 42570 }, { "epoch": 0.6967192996809294, "grad_norm": 0.13097114861011505, "learning_rate": 9.476714644945711e-06, "loss": 0.0053, "step": 42580 }, { "epoch": 0.6968829256320053, "grad_norm": 0.17211544513702393, "learning_rate": 9.476290591203361e-06, "loss": 0.0035, "step": 42590 }, { "epoch": 0.697046551583081, "grad_norm": 0.3398285210132599, "learning_rate": 9.475866375206235e-06, "loss": 0.0022, "step": 42600 }, { "epoch": 0.6972101775341569, "grad_norm": 0.07803649455308914, "learning_rate": 9.475441996969712e-06, "loss": 0.0041, "step": 42610 }, { "epoch": 0.6973738034852328, "grad_norm": 0.12283594161272049, "learning_rate": 9.475017456509172e-06, "loss": 0.0022, "step": 42620 }, { "epoch": 0.6975374294363086, "grad_norm": 0.3924367427825928, "learning_rate": 9.474592753840004e-06, "loss": 0.0034, "step": 42630 }, { "epoch": 0.6977010553873845, "grad_norm": 0.09592388570308685, "learning_rate": 9.474167888977605e-06, "loss": 0.0027, "step": 42640 }, { "epoch": 0.6978646813384602, "grad_norm": 0.09245303273200989, "learning_rate": 9.473742861937372e-06, "loss": 0.0047, "step": 42650 }, { "epoch": 0.6980283072895361, "grad_norm": 0.3495856821537018, "learning_rate": 9.473317672734711e-06, "loss": 0.0055, "step": 42660 }, { "epoch": 0.6981919332406119, "grad_norm": 0.29541856050491333, "learning_rate": 9.472892321385039e-06, "loss": 0.0025, "step": 42670 }, { "epoch": 0.6983555591916878, "grad_norm": 0.18021422624588013, "learning_rate": 9.472466807903769e-06, "loss": 0.0029, "step": 42680 }, { "epoch": 0.6985191851427637, "grad_norm": 0.14795605838298798, "learning_rate": 9.472041132306327e-06, "loss": 0.0032, "step": 42690 }, { "epoch": 0.6986828110938395, "grad_norm": 0.26293906569480896, "learning_rate": 9.471615294608142e-06, "loss": 0.0024, "step": 42700 }, { "epoch": 0.6988464370449153, "grad_norm": 0.34060484170913696, "learning_rate": 9.47118929482465e-06, "loss": 0.0044, "step": 42710 }, { "epoch": 0.6990100629959911, "grad_norm": 0.05026577040553093, "learning_rate": 9.470763132971292e-06, "loss": 0.0022, "step": 42720 }, { "epoch": 0.699173688947067, "grad_norm": 0.21517863869667053, "learning_rate": 9.470336809063516e-06, "loss": 0.0038, "step": 42730 }, { "epoch": 0.6993373148981429, "grad_norm": 0.024979673326015472, "learning_rate": 9.469910323116774e-06, "loss": 0.0037, "step": 42740 }, { "epoch": 0.6995009408492187, "grad_norm": 0.24875278770923615, "learning_rate": 9.469483675146526e-06, "loss": 0.0027, "step": 42750 }, { "epoch": 0.6996645668002945, "grad_norm": 0.0886520966887474, "learning_rate": 9.469056865168238e-06, "loss": 0.0024, "step": 42760 }, { "epoch": 0.6998281927513703, "grad_norm": 0.2391182780265808, "learning_rate": 9.468629893197378e-06, "loss": 0.004, "step": 42770 }, { "epoch": 0.6999918187024462, "grad_norm": 0.2099774181842804, "learning_rate": 9.468202759249428e-06, "loss": 0.0034, "step": 42780 }, { "epoch": 0.7001554446535221, "grad_norm": 0.1299414485692978, "learning_rate": 9.467775463339862e-06, "loss": 0.0017, "step": 42790 }, { "epoch": 0.7003190706045979, "grad_norm": 0.1556621491909027, "learning_rate": 9.467348005484179e-06, "loss": 0.0034, "step": 42800 }, { "epoch": 0.7004826965556737, "grad_norm": 0.3767586648464203, "learning_rate": 9.466920385697862e-06, "loss": 0.0035, "step": 42810 }, { "epoch": 0.7006463225067495, "grad_norm": 0.11935310810804367, "learning_rate": 9.466492603996421e-06, "loss": 0.0037, "step": 42820 }, { "epoch": 0.7008099484578254, "grad_norm": 0.13375429809093475, "learning_rate": 9.466064660395357e-06, "loss": 0.0028, "step": 42830 }, { "epoch": 0.7009735744089013, "grad_norm": 0.12701818346977234, "learning_rate": 9.465636554910183e-06, "loss": 0.003, "step": 42840 }, { "epoch": 0.7011372003599771, "grad_norm": 0.08951638638973236, "learning_rate": 9.465208287556417e-06, "loss": 0.0037, "step": 42850 }, { "epoch": 0.701300826311053, "grad_norm": 0.3227202892303467, "learning_rate": 9.464779858349582e-06, "loss": 0.0037, "step": 42860 }, { "epoch": 0.7014644522621287, "grad_norm": 0.04804162681102753, "learning_rate": 9.464351267305207e-06, "loss": 0.004, "step": 42870 }, { "epoch": 0.7016280782132046, "grad_norm": 0.23942385613918304, "learning_rate": 9.46392251443883e-06, "loss": 0.0045, "step": 42880 }, { "epoch": 0.7017917041642805, "grad_norm": 0.22162482142448425, "learning_rate": 9.463493599765989e-06, "loss": 0.0031, "step": 42890 }, { "epoch": 0.7019553301153563, "grad_norm": 0.17343223094940186, "learning_rate": 9.463064523302234e-06, "loss": 0.0045, "step": 42900 }, { "epoch": 0.7021189560664322, "grad_norm": 0.12951546907424927, "learning_rate": 9.462635285063116e-06, "loss": 0.0026, "step": 42910 }, { "epoch": 0.7022825820175079, "grad_norm": 0.3482697904109955, "learning_rate": 9.462205885064196e-06, "loss": 0.0039, "step": 42920 }, { "epoch": 0.7024462079685838, "grad_norm": 0.1096210703253746, "learning_rate": 9.461776323321035e-06, "loss": 0.002, "step": 42930 }, { "epoch": 0.7026098339196597, "grad_norm": 0.34508779644966125, "learning_rate": 9.461346599849209e-06, "loss": 0.0046, "step": 42940 }, { "epoch": 0.7027734598707355, "grad_norm": 0.032945528626441956, "learning_rate": 9.460916714664288e-06, "loss": 0.0041, "step": 42950 }, { "epoch": 0.7029370858218114, "grad_norm": 0.15586833655834198, "learning_rate": 9.460486667781858e-06, "loss": 0.0034, "step": 42960 }, { "epoch": 0.7031007117728871, "grad_norm": 0.25630250573158264, "learning_rate": 9.460056459217509e-06, "loss": 0.0025, "step": 42970 }, { "epoch": 0.703264337723963, "grad_norm": 0.18831761181354523, "learning_rate": 9.459626088986831e-06, "loss": 0.0045, "step": 42980 }, { "epoch": 0.7034279636750389, "grad_norm": 0.5098690986633301, "learning_rate": 9.459195557105427e-06, "loss": 0.0062, "step": 42990 }, { "epoch": 0.7035915896261147, "grad_norm": 0.19844293594360352, "learning_rate": 9.4587648635889e-06, "loss": 0.0016, "step": 43000 }, { "epoch": 0.7037552155771906, "grad_norm": 0.19846397638320923, "learning_rate": 9.458334008452866e-06, "loss": 0.0042, "step": 43010 }, { "epoch": 0.7039188415282663, "grad_norm": 0.18688060343265533, "learning_rate": 9.457902991712936e-06, "loss": 0.0022, "step": 43020 }, { "epoch": 0.7040824674793422, "grad_norm": 0.14431153237819672, "learning_rate": 9.457471813384737e-06, "loss": 0.0023, "step": 43030 }, { "epoch": 0.7042460934304181, "grad_norm": 0.20458489656448364, "learning_rate": 9.4570404734839e-06, "loss": 0.0064, "step": 43040 }, { "epoch": 0.7044097193814939, "grad_norm": 0.205845445394516, "learning_rate": 9.456608972026057e-06, "loss": 0.0028, "step": 43050 }, { "epoch": 0.7045733453325698, "grad_norm": 0.0740434005856514, "learning_rate": 9.45617730902685e-06, "loss": 0.0063, "step": 43060 }, { "epoch": 0.7047369712836455, "grad_norm": 0.07615919411182404, "learning_rate": 9.455745484501927e-06, "loss": 0.0034, "step": 43070 }, { "epoch": 0.7049005972347214, "grad_norm": 0.16960659623146057, "learning_rate": 9.455313498466937e-06, "loss": 0.0029, "step": 43080 }, { "epoch": 0.7050642231857973, "grad_norm": 0.13934388756752014, "learning_rate": 9.454881350937544e-06, "loss": 0.0027, "step": 43090 }, { "epoch": 0.7052278491368731, "grad_norm": 0.12008245289325714, "learning_rate": 9.454449041929406e-06, "loss": 0.0033, "step": 43100 }, { "epoch": 0.705391475087949, "grad_norm": 0.1778629869222641, "learning_rate": 9.454016571458198e-06, "loss": 0.0037, "step": 43110 }, { "epoch": 0.7055551010390247, "grad_norm": 0.1880698800086975, "learning_rate": 9.453583939539593e-06, "loss": 0.0031, "step": 43120 }, { "epoch": 0.7057187269901006, "grad_norm": 0.026169197633862495, "learning_rate": 9.453151146189275e-06, "loss": 0.0036, "step": 43130 }, { "epoch": 0.7058823529411765, "grad_norm": 0.41859370470046997, "learning_rate": 9.45271819142293e-06, "loss": 0.002, "step": 43140 }, { "epoch": 0.7060459788922523, "grad_norm": 0.12803144752979279, "learning_rate": 9.452285075256251e-06, "loss": 0.0039, "step": 43150 }, { "epoch": 0.7062096048433282, "grad_norm": 0.22554750740528107, "learning_rate": 9.451851797704941e-06, "loss": 0.0032, "step": 43160 }, { "epoch": 0.706373230794404, "grad_norm": 0.20387428998947144, "learning_rate": 9.451418358784703e-06, "loss": 0.0035, "step": 43170 }, { "epoch": 0.7065368567454798, "grad_norm": 0.10441246628761292, "learning_rate": 9.450984758511248e-06, "loss": 0.0048, "step": 43180 }, { "epoch": 0.7067004826965557, "grad_norm": 0.06095161288976669, "learning_rate": 9.450550996900295e-06, "loss": 0.0036, "step": 43190 }, { "epoch": 0.7068641086476315, "grad_norm": 0.4035806357860565, "learning_rate": 9.450117073967561e-06, "loss": 0.0059, "step": 43200 }, { "epoch": 0.7070277345987074, "grad_norm": 0.6646978259086609, "learning_rate": 9.44968298972878e-06, "loss": 0.0056, "step": 43210 }, { "epoch": 0.7071913605497832, "grad_norm": 0.14821387827396393, "learning_rate": 9.449248744199687e-06, "loss": 0.0056, "step": 43220 }, { "epoch": 0.707354986500859, "grad_norm": 0.2699081301689148, "learning_rate": 9.44881433739602e-06, "loss": 0.009, "step": 43230 }, { "epoch": 0.7075186124519349, "grad_norm": 0.11489080637693405, "learning_rate": 9.448379769333524e-06, "loss": 0.0032, "step": 43240 }, { "epoch": 0.7076822384030107, "grad_norm": 0.24692316353321075, "learning_rate": 9.447945040027954e-06, "loss": 0.0027, "step": 43250 }, { "epoch": 0.7078458643540866, "grad_norm": 0.0392230786383152, "learning_rate": 9.447510149495065e-06, "loss": 0.0039, "step": 43260 }, { "epoch": 0.7080094903051624, "grad_norm": 0.09486676007509232, "learning_rate": 9.447075097750626e-06, "loss": 0.0032, "step": 43270 }, { "epoch": 0.7081731162562382, "grad_norm": 0.11020425707101822, "learning_rate": 9.4466398848104e-06, "loss": 0.0025, "step": 43280 }, { "epoch": 0.7083367422073141, "grad_norm": 0.10121571272611618, "learning_rate": 9.446204510690167e-06, "loss": 0.0027, "step": 43290 }, { "epoch": 0.7085003681583899, "grad_norm": 0.185047447681427, "learning_rate": 9.445768975405704e-06, "loss": 0.0028, "step": 43300 }, { "epoch": 0.7086639941094658, "grad_norm": 0.4259854555130005, "learning_rate": 9.445333278972804e-06, "loss": 0.0049, "step": 43310 }, { "epoch": 0.7088276200605416, "grad_norm": 0.34021973609924316, "learning_rate": 9.444897421407255e-06, "loss": 0.003, "step": 43320 }, { "epoch": 0.7089912460116174, "grad_norm": 0.2663522958755493, "learning_rate": 9.444461402724858e-06, "loss": 0.004, "step": 43330 }, { "epoch": 0.7091548719626933, "grad_norm": 0.25476956367492676, "learning_rate": 9.444025222941416e-06, "loss": 0.007, "step": 43340 }, { "epoch": 0.7093184979137691, "grad_norm": 0.34113025665283203, "learning_rate": 9.443588882072743e-06, "loss": 0.0073, "step": 43350 }, { "epoch": 0.709482123864845, "grad_norm": 0.0735788568854332, "learning_rate": 9.44315238013465e-06, "loss": 0.0028, "step": 43360 }, { "epoch": 0.7096457498159208, "grad_norm": 0.027225032448768616, "learning_rate": 9.442715717142963e-06, "loss": 0.0026, "step": 43370 }, { "epoch": 0.7098093757669967, "grad_norm": 0.1284790188074112, "learning_rate": 9.44227889311351e-06, "loss": 0.0031, "step": 43380 }, { "epoch": 0.7099730017180725, "grad_norm": 0.30750277638435364, "learning_rate": 9.441841908062124e-06, "loss": 0.0035, "step": 43390 }, { "epoch": 0.7101366276691483, "grad_norm": 0.4352801442146301, "learning_rate": 9.441404762004644e-06, "loss": 0.0056, "step": 43400 }, { "epoch": 0.7103002536202242, "grad_norm": 0.8874611258506775, "learning_rate": 9.440967454956915e-06, "loss": 0.0046, "step": 43410 }, { "epoch": 0.7104638795713, "grad_norm": 0.1412227600812912, "learning_rate": 9.44052998693479e-06, "loss": 0.0034, "step": 43420 }, { "epoch": 0.7106275055223759, "grad_norm": 0.19500382244586945, "learning_rate": 9.440092357954127e-06, "loss": 0.0051, "step": 43430 }, { "epoch": 0.7107911314734517, "grad_norm": 0.337568074464798, "learning_rate": 9.439654568030785e-06, "loss": 0.0022, "step": 43440 }, { "epoch": 0.7109547574245275, "grad_norm": 0.03067833185195923, "learning_rate": 9.439216617180638e-06, "loss": 0.0027, "step": 43450 }, { "epoch": 0.7111183833756034, "grad_norm": 0.27238795161247253, "learning_rate": 9.438778505419557e-06, "loss": 0.0031, "step": 43460 }, { "epoch": 0.7112820093266792, "grad_norm": 0.2768290936946869, "learning_rate": 9.438340232763422e-06, "loss": 0.0037, "step": 43470 }, { "epoch": 0.7114456352777551, "grad_norm": 0.29424840211868286, "learning_rate": 9.437901799228121e-06, "loss": 0.0064, "step": 43480 }, { "epoch": 0.711609261228831, "grad_norm": 0.10928195714950562, "learning_rate": 9.437463204829549e-06, "loss": 0.0038, "step": 43490 }, { "epoch": 0.7117728871799067, "grad_norm": 0.2692672312259674, "learning_rate": 9.437024449583598e-06, "loss": 0.0025, "step": 43500 }, { "epoch": 0.7119365131309826, "grad_norm": 0.29281020164489746, "learning_rate": 9.436585533506176e-06, "loss": 0.0028, "step": 43510 }, { "epoch": 0.7121001390820584, "grad_norm": 0.4347575902938843, "learning_rate": 9.436146456613192e-06, "loss": 0.005, "step": 43520 }, { "epoch": 0.7122637650331343, "grad_norm": 0.1944822520017624, "learning_rate": 9.43570721892056e-06, "loss": 0.0038, "step": 43530 }, { "epoch": 0.71242739098421, "grad_norm": 0.38200923800468445, "learning_rate": 9.435267820444203e-06, "loss": 0.0042, "step": 43540 }, { "epoch": 0.7125910169352859, "grad_norm": 0.30823472142219543, "learning_rate": 9.434828261200046e-06, "loss": 0.0029, "step": 43550 }, { "epoch": 0.7127546428863618, "grad_norm": 0.16902115941047668, "learning_rate": 9.434388541204025e-06, "loss": 0.0024, "step": 43560 }, { "epoch": 0.7129182688374376, "grad_norm": 0.07349956780672073, "learning_rate": 9.433948660472075e-06, "loss": 0.0035, "step": 43570 }, { "epoch": 0.7130818947885135, "grad_norm": 0.07912345975637436, "learning_rate": 9.433508619020144e-06, "loss": 0.0029, "step": 43580 }, { "epoch": 0.7132455207395892, "grad_norm": 0.19981731474399567, "learning_rate": 9.433068416864182e-06, "loss": 0.0041, "step": 43590 }, { "epoch": 0.7134091466906651, "grad_norm": 0.2753267288208008, "learning_rate": 9.432628054020143e-06, "loss": 0.0038, "step": 43600 }, { "epoch": 0.713572772641741, "grad_norm": 0.11021915078163147, "learning_rate": 9.432187530503993e-06, "loss": 0.0059, "step": 43610 }, { "epoch": 0.7137363985928168, "grad_norm": 0.22448433935642242, "learning_rate": 9.431746846331694e-06, "loss": 0.0038, "step": 43620 }, { "epoch": 0.7139000245438927, "grad_norm": 0.11207084357738495, "learning_rate": 9.431306001519227e-06, "loss": 0.0049, "step": 43630 }, { "epoch": 0.7140636504949684, "grad_norm": 0.034532126039266586, "learning_rate": 9.430864996082565e-06, "loss": 0.0039, "step": 43640 }, { "epoch": 0.7142272764460443, "grad_norm": 0.20960968732833862, "learning_rate": 9.430423830037699e-06, "loss": 0.0047, "step": 43650 }, { "epoch": 0.7143909023971202, "grad_norm": 0.47492852807044983, "learning_rate": 9.429982503400614e-06, "loss": 0.0033, "step": 43660 }, { "epoch": 0.714554528348196, "grad_norm": 0.31388694047927856, "learning_rate": 9.429541016187312e-06, "loss": 0.0029, "step": 43670 }, { "epoch": 0.7147181542992719, "grad_norm": 0.046706922352313995, "learning_rate": 9.429099368413794e-06, "loss": 0.0034, "step": 43680 }, { "epoch": 0.7148817802503477, "grad_norm": 0.10895107686519623, "learning_rate": 9.42865756009607e-06, "loss": 0.0028, "step": 43690 }, { "epoch": 0.7150454062014235, "grad_norm": 0.21794630587100983, "learning_rate": 9.428215591250151e-06, "loss": 0.0047, "step": 43700 }, { "epoch": 0.7152090321524994, "grad_norm": 0.13827158510684967, "learning_rate": 9.427773461892063e-06, "loss": 0.0024, "step": 43710 }, { "epoch": 0.7153726581035752, "grad_norm": 0.0689816102385521, "learning_rate": 9.427331172037826e-06, "loss": 0.0029, "step": 43720 }, { "epoch": 0.7155362840546511, "grad_norm": 0.20435881614685059, "learning_rate": 9.426888721703477e-06, "loss": 0.0034, "step": 43730 }, { "epoch": 0.7156999100057269, "grad_norm": 0.2274610549211502, "learning_rate": 9.42644611090505e-06, "loss": 0.0047, "step": 43740 }, { "epoch": 0.7158635359568027, "grad_norm": 0.3494691848754883, "learning_rate": 9.426003339658591e-06, "loss": 0.0027, "step": 43750 }, { "epoch": 0.7160271619078786, "grad_norm": 0.3882181942462921, "learning_rate": 9.42556040798015e-06, "loss": 0.0038, "step": 43760 }, { "epoch": 0.7161907878589544, "grad_norm": 0.1835632622241974, "learning_rate": 9.42511731588578e-06, "loss": 0.0019, "step": 43770 }, { "epoch": 0.7163544138100303, "grad_norm": 0.25872087478637695, "learning_rate": 9.42467406339154e-06, "loss": 0.0034, "step": 43780 }, { "epoch": 0.7165180397611061, "grad_norm": 0.14610989391803741, "learning_rate": 9.424230650513501e-06, "loss": 0.0023, "step": 43790 }, { "epoch": 0.716681665712182, "grad_norm": 0.18413010239601135, "learning_rate": 9.423787077267737e-06, "loss": 0.0037, "step": 43800 }, { "epoch": 0.7168452916632578, "grad_norm": 0.10271312296390533, "learning_rate": 9.423343343670321e-06, "loss": 0.0045, "step": 43810 }, { "epoch": 0.7170089176143336, "grad_norm": 0.06794589757919312, "learning_rate": 9.422899449737339e-06, "loss": 0.0031, "step": 43820 }, { "epoch": 0.7171725435654095, "grad_norm": 0.29964444041252136, "learning_rate": 9.422455395484886e-06, "loss": 0.0035, "step": 43830 }, { "epoch": 0.7173361695164853, "grad_norm": 0.4265291094779968, "learning_rate": 9.42201118092905e-06, "loss": 0.0052, "step": 43840 }, { "epoch": 0.7174997954675612, "grad_norm": 0.13908329606056213, "learning_rate": 9.42156680608594e-06, "loss": 0.003, "step": 43850 }, { "epoch": 0.717663421418637, "grad_norm": 0.30660727620124817, "learning_rate": 9.42112227097166e-06, "loss": 0.0027, "step": 43860 }, { "epoch": 0.7178270473697128, "grad_norm": 0.12608669698238373, "learning_rate": 9.42067757560232e-06, "loss": 0.0024, "step": 43870 }, { "epoch": 0.7179906733207887, "grad_norm": 0.2620046138763428, "learning_rate": 9.420232719994044e-06, "loss": 0.0036, "step": 43880 }, { "epoch": 0.7181542992718645, "grad_norm": 0.06332611292600632, "learning_rate": 9.419787704162957e-06, "loss": 0.0046, "step": 43890 }, { "epoch": 0.7183179252229404, "grad_norm": 0.08811411261558533, "learning_rate": 9.419342528125188e-06, "loss": 0.0039, "step": 43900 }, { "epoch": 0.7184815511740162, "grad_norm": 0.004064450040459633, "learning_rate": 9.418897191896873e-06, "loss": 0.0031, "step": 43910 }, { "epoch": 0.718645177125092, "grad_norm": 0.208746075630188, "learning_rate": 9.418451695494158e-06, "loss": 0.0026, "step": 43920 }, { "epoch": 0.7188088030761679, "grad_norm": 0.3180221915245056, "learning_rate": 9.418006038933187e-06, "loss": 0.0052, "step": 43930 }, { "epoch": 0.7189724290272437, "grad_norm": 0.08794719725847244, "learning_rate": 9.417560222230115e-06, "loss": 0.0021, "step": 43940 }, { "epoch": 0.7191360549783196, "grad_norm": 0.18991270661354065, "learning_rate": 9.417114245401103e-06, "loss": 0.0033, "step": 43950 }, { "epoch": 0.7192996809293954, "grad_norm": 0.05480005964636803, "learning_rate": 9.416668108462316e-06, "loss": 0.0026, "step": 43960 }, { "epoch": 0.7194633068804712, "grad_norm": 0.1448337286710739, "learning_rate": 9.416221811429924e-06, "loss": 0.0036, "step": 43970 }, { "epoch": 0.7196269328315471, "grad_norm": 0.16126029193401337, "learning_rate": 9.415775354320106e-06, "loss": 0.0041, "step": 43980 }, { "epoch": 0.7197905587826229, "grad_norm": 0.2131180763244629, "learning_rate": 9.415328737149045e-06, "loss": 0.002, "step": 43990 }, { "epoch": 0.7199541847336988, "grad_norm": 0.5852167010307312, "learning_rate": 9.414881959932929e-06, "loss": 0.0092, "step": 44000 }, { "epoch": 0.7201178106847747, "grad_norm": 0.20315052568912506, "learning_rate": 9.414435022687954e-06, "loss": 0.0027, "step": 44010 }, { "epoch": 0.7202814366358504, "grad_norm": 0.06888915598392487, "learning_rate": 9.413987925430317e-06, "loss": 0.003, "step": 44020 }, { "epoch": 0.7204450625869263, "grad_norm": 0.022122442722320557, "learning_rate": 9.413540668176226e-06, "loss": 0.0112, "step": 44030 }, { "epoch": 0.7206086885380021, "grad_norm": 0.1346496194601059, "learning_rate": 9.413093250941895e-06, "loss": 0.0037, "step": 44040 }, { "epoch": 0.720772314489078, "grad_norm": 0.06178409978747368, "learning_rate": 9.412645673743542e-06, "loss": 0.0042, "step": 44050 }, { "epoch": 0.7209359404401539, "grad_norm": 0.07769902050495148, "learning_rate": 9.412197936597386e-06, "loss": 0.0021, "step": 44060 }, { "epoch": 0.7210995663912296, "grad_norm": 0.1280083805322647, "learning_rate": 9.41175003951966e-06, "loss": 0.0041, "step": 44070 }, { "epoch": 0.7212631923423055, "grad_norm": 0.15115448832511902, "learning_rate": 9.411301982526598e-06, "loss": 0.0036, "step": 44080 }, { "epoch": 0.7214268182933813, "grad_norm": 0.15542688965797424, "learning_rate": 9.410853765634443e-06, "loss": 0.0025, "step": 44090 }, { "epoch": 0.7215904442444572, "grad_norm": 0.1315353810787201, "learning_rate": 9.410405388859439e-06, "loss": 0.0038, "step": 44100 }, { "epoch": 0.7217540701955331, "grad_norm": 0.11461956799030304, "learning_rate": 9.40995685221784e-06, "loss": 0.0036, "step": 44110 }, { "epoch": 0.7219176961466088, "grad_norm": 0.23914045095443726, "learning_rate": 9.409508155725903e-06, "loss": 0.0038, "step": 44120 }, { "epoch": 0.7220813220976847, "grad_norm": 0.0565275214612484, "learning_rate": 9.409059299399895e-06, "loss": 0.0029, "step": 44130 }, { "epoch": 0.7222449480487605, "grad_norm": 0.02471841126680374, "learning_rate": 9.408610283256084e-06, "loss": 0.0024, "step": 44140 }, { "epoch": 0.7224085739998364, "grad_norm": 0.09245089441537857, "learning_rate": 9.408161107310746e-06, "loss": 0.0027, "step": 44150 }, { "epoch": 0.7225721999509123, "grad_norm": 0.3753833472728729, "learning_rate": 9.407711771580162e-06, "loss": 0.0047, "step": 44160 }, { "epoch": 0.722735825901988, "grad_norm": 0.09006118029356003, "learning_rate": 9.40726227608062e-06, "loss": 0.0034, "step": 44170 }, { "epoch": 0.7228994518530639, "grad_norm": 0.07483652234077454, "learning_rate": 9.406812620828413e-06, "loss": 0.0024, "step": 44180 }, { "epoch": 0.7230630778041397, "grad_norm": 0.2620481252670288, "learning_rate": 9.40636280583984e-06, "loss": 0.0036, "step": 44190 }, { "epoch": 0.7232267037552156, "grad_norm": 0.39716148376464844, "learning_rate": 9.405912831131207e-06, "loss": 0.0028, "step": 44200 }, { "epoch": 0.7233903297062915, "grad_norm": 0.1393423080444336, "learning_rate": 9.405462696718824e-06, "loss": 0.0028, "step": 44210 }, { "epoch": 0.7235539556573672, "grad_norm": 0.17728587985038757, "learning_rate": 9.405012402619004e-06, "loss": 0.002, "step": 44220 }, { "epoch": 0.7237175816084431, "grad_norm": 0.13728955388069153, "learning_rate": 9.404561948848072e-06, "loss": 0.0037, "step": 44230 }, { "epoch": 0.7238812075595189, "grad_norm": 0.15387779474258423, "learning_rate": 9.404111335422356e-06, "loss": 0.0033, "step": 44240 }, { "epoch": 0.7240448335105948, "grad_norm": 0.14162440598011017, "learning_rate": 9.40366056235819e-06, "loss": 0.0039, "step": 44250 }, { "epoch": 0.7242084594616707, "grad_norm": 0.1799604594707489, "learning_rate": 9.40320962967191e-06, "loss": 0.0028, "step": 44260 }, { "epoch": 0.7243720854127464, "grad_norm": 0.09081440418958664, "learning_rate": 9.402758537379866e-06, "loss": 0.003, "step": 44270 }, { "epoch": 0.7245357113638223, "grad_norm": 0.15042786300182343, "learning_rate": 9.402307285498405e-06, "loss": 0.0035, "step": 44280 }, { "epoch": 0.7246993373148981, "grad_norm": 0.20422321557998657, "learning_rate": 9.401855874043887e-06, "loss": 0.0032, "step": 44290 }, { "epoch": 0.724862963265974, "grad_norm": 0.09770471602678299, "learning_rate": 9.401404303032673e-06, "loss": 0.0025, "step": 44300 }, { "epoch": 0.7250265892170499, "grad_norm": 0.18016590178012848, "learning_rate": 9.40095257248113e-06, "loss": 0.0028, "step": 44310 }, { "epoch": 0.7251902151681257, "grad_norm": 0.19632486999034882, "learning_rate": 9.400500682405635e-06, "loss": 0.003, "step": 44320 }, { "epoch": 0.7253538411192015, "grad_norm": 0.08654646575450897, "learning_rate": 9.400048632822566e-06, "loss": 0.0033, "step": 44330 }, { "epoch": 0.7255174670702773, "grad_norm": 0.10829994082450867, "learning_rate": 9.399596423748311e-06, "loss": 0.002, "step": 44340 }, { "epoch": 0.7256810930213532, "grad_norm": 0.117234006524086, "learning_rate": 9.399144055199258e-06, "loss": 0.0025, "step": 44350 }, { "epoch": 0.7258447189724291, "grad_norm": 0.1839020848274231, "learning_rate": 9.398691527191808e-06, "loss": 0.0023, "step": 44360 }, { "epoch": 0.7260083449235049, "grad_norm": 0.2735215425491333, "learning_rate": 9.39823883974236e-06, "loss": 0.0034, "step": 44370 }, { "epoch": 0.7261719708745807, "grad_norm": 0.045950066298246384, "learning_rate": 9.397785992867325e-06, "loss": 0.0021, "step": 44380 }, { "epoch": 0.7263355968256565, "grad_norm": 0.2915136516094208, "learning_rate": 9.39733298658312e-06, "loss": 0.0035, "step": 44390 }, { "epoch": 0.7264992227767324, "grad_norm": 0.2397589087486267, "learning_rate": 9.396879820906159e-06, "loss": 0.0025, "step": 44400 }, { "epoch": 0.7266628487278082, "grad_norm": 0.120446115732193, "learning_rate": 9.396426495852875e-06, "loss": 0.0053, "step": 44410 }, { "epoch": 0.7268264746788841, "grad_norm": 0.13101591169834137, "learning_rate": 9.395973011439696e-06, "loss": 0.0026, "step": 44420 }, { "epoch": 0.72699010062996, "grad_norm": 0.34294649958610535, "learning_rate": 9.39551936768306e-06, "loss": 0.0043, "step": 44430 }, { "epoch": 0.7271537265810357, "grad_norm": 0.24978701770305634, "learning_rate": 9.395065564599413e-06, "loss": 0.0035, "step": 44440 }, { "epoch": 0.7273173525321116, "grad_norm": 0.2867276668548584, "learning_rate": 9.394611602205202e-06, "loss": 0.0036, "step": 44450 }, { "epoch": 0.7274809784831874, "grad_norm": 0.26214414834976196, "learning_rate": 9.394157480516883e-06, "loss": 0.0029, "step": 44460 }, { "epoch": 0.7276446044342633, "grad_norm": 0.2770514488220215, "learning_rate": 9.393703199550916e-06, "loss": 0.0072, "step": 44470 }, { "epoch": 0.7278082303853391, "grad_norm": 0.17170031368732452, "learning_rate": 9.393248759323768e-06, "loss": 0.002, "step": 44480 }, { "epoch": 0.7279718563364149, "grad_norm": 0.18607540428638458, "learning_rate": 9.392794159851912e-06, "loss": 0.0024, "step": 44490 }, { "epoch": 0.7281354822874908, "grad_norm": 0.1586611568927765, "learning_rate": 9.392339401151824e-06, "loss": 0.0034, "step": 44500 }, { "epoch": 0.7282991082385666, "grad_norm": 0.1039513424038887, "learning_rate": 9.391884483239991e-06, "loss": 0.0041, "step": 44510 }, { "epoch": 0.7284627341896425, "grad_norm": 0.2333914041519165, "learning_rate": 9.3914294061329e-06, "loss": 0.0043, "step": 44520 }, { "epoch": 0.7286263601407184, "grad_norm": 0.07784456759691238, "learning_rate": 9.390974169847048e-06, "loss": 0.0022, "step": 44530 }, { "epoch": 0.7287899860917941, "grad_norm": 0.3544568717479706, "learning_rate": 9.390518774398935e-06, "loss": 0.0062, "step": 44540 }, { "epoch": 0.72895361204287, "grad_norm": 0.24859455227851868, "learning_rate": 9.39006321980507e-06, "loss": 0.0032, "step": 44550 }, { "epoch": 0.7291172379939458, "grad_norm": 0.14105965197086334, "learning_rate": 9.389607506081964e-06, "loss": 0.0037, "step": 44560 }, { "epoch": 0.7292808639450217, "grad_norm": 0.14568167924880981, "learning_rate": 9.389151633246137e-06, "loss": 0.0045, "step": 44570 }, { "epoch": 0.7294444898960976, "grad_norm": 0.11969415843486786, "learning_rate": 9.38869560131411e-06, "loss": 0.0037, "step": 44580 }, { "epoch": 0.7296081158471733, "grad_norm": 0.2141263782978058, "learning_rate": 9.388239410302418e-06, "loss": 0.0048, "step": 44590 }, { "epoch": 0.7297717417982492, "grad_norm": 0.2572079002857208, "learning_rate": 9.387783060227592e-06, "loss": 0.0038, "step": 44600 }, { "epoch": 0.729935367749325, "grad_norm": 0.2962052822113037, "learning_rate": 9.387326551106177e-06, "loss": 0.0037, "step": 44610 }, { "epoch": 0.7300989937004009, "grad_norm": 0.23166991770267487, "learning_rate": 9.386869882954718e-06, "loss": 0.0041, "step": 44620 }, { "epoch": 0.7302626196514768, "grad_norm": 0.035091642290353775, "learning_rate": 9.386413055789772e-06, "loss": 0.0024, "step": 44630 }, { "epoch": 0.7304262456025525, "grad_norm": 0.1557222157716751, "learning_rate": 9.385956069627893e-06, "loss": 0.0031, "step": 44640 }, { "epoch": 0.7305898715536284, "grad_norm": 0.18170341849327087, "learning_rate": 9.385498924485648e-06, "loss": 0.0055, "step": 44650 }, { "epoch": 0.7307534975047042, "grad_norm": 0.28071677684783936, "learning_rate": 9.385041620379607e-06, "loss": 0.0024, "step": 44660 }, { "epoch": 0.7309171234557801, "grad_norm": 0.4183295965194702, "learning_rate": 9.384584157326347e-06, "loss": 0.0035, "step": 44670 }, { "epoch": 0.731080749406856, "grad_norm": 0.152989000082016, "learning_rate": 9.384126535342451e-06, "loss": 0.0027, "step": 44680 }, { "epoch": 0.7312443753579317, "grad_norm": 0.32978740334510803, "learning_rate": 9.383668754444503e-06, "loss": 0.002, "step": 44690 }, { "epoch": 0.7314080013090076, "grad_norm": 0.2996117174625397, "learning_rate": 9.383210814649098e-06, "loss": 0.0049, "step": 44700 }, { "epoch": 0.7315716272600834, "grad_norm": 0.24867761135101318, "learning_rate": 9.382752715972837e-06, "loss": 0.0028, "step": 44710 }, { "epoch": 0.7317352532111593, "grad_norm": 0.051696084439754486, "learning_rate": 9.382294458432324e-06, "loss": 0.0026, "step": 44720 }, { "epoch": 0.7318988791622352, "grad_norm": 0.34324270486831665, "learning_rate": 9.381836042044168e-06, "loss": 0.0039, "step": 44730 }, { "epoch": 0.732062505113311, "grad_norm": 0.16919779777526855, "learning_rate": 9.381377466824989e-06, "loss": 0.0025, "step": 44740 }, { "epoch": 0.7322261310643868, "grad_norm": 0.09304846078157425, "learning_rate": 9.380918732791406e-06, "loss": 0.0046, "step": 44750 }, { "epoch": 0.7323897570154626, "grad_norm": 0.44175365567207336, "learning_rate": 9.380459839960048e-06, "loss": 0.0032, "step": 44760 }, { "epoch": 0.7325533829665385, "grad_norm": 0.25202420353889465, "learning_rate": 9.380000788347549e-06, "loss": 0.0048, "step": 44770 }, { "epoch": 0.7327170089176144, "grad_norm": 0.09570915997028351, "learning_rate": 9.379541577970547e-06, "loss": 0.003, "step": 44780 }, { "epoch": 0.7328806348686902, "grad_norm": 0.15347380936145782, "learning_rate": 9.37908220884569e-06, "loss": 0.0027, "step": 44790 }, { "epoch": 0.733044260819766, "grad_norm": 0.18991000950336456, "learning_rate": 9.378622680989631e-06, "loss": 0.0025, "step": 44800 }, { "epoch": 0.7332078867708418, "grad_norm": 0.12974664568901062, "learning_rate": 9.37816299441902e-06, "loss": 0.002, "step": 44810 }, { "epoch": 0.7333715127219177, "grad_norm": 0.2423994094133377, "learning_rate": 9.377703149150523e-06, "loss": 0.0019, "step": 44820 }, { "epoch": 0.7335351386729936, "grad_norm": 0.2589271664619446, "learning_rate": 9.37724314520081e-06, "loss": 0.0043, "step": 44830 }, { "epoch": 0.7336987646240694, "grad_norm": 0.319818377494812, "learning_rate": 9.376782982586553e-06, "loss": 0.0034, "step": 44840 }, { "epoch": 0.7338623905751452, "grad_norm": 0.17180155217647552, "learning_rate": 9.37632266132443e-06, "loss": 0.0039, "step": 44850 }, { "epoch": 0.734026016526221, "grad_norm": 0.1244574636220932, "learning_rate": 9.375862181431132e-06, "loss": 0.0028, "step": 44860 }, { "epoch": 0.7341896424772969, "grad_norm": 0.09944602847099304, "learning_rate": 9.375401542923346e-06, "loss": 0.003, "step": 44870 }, { "epoch": 0.7343532684283728, "grad_norm": 0.36826252937316895, "learning_rate": 9.374940745817769e-06, "loss": 0.0058, "step": 44880 }, { "epoch": 0.7345168943794486, "grad_norm": 0.07497650384902954, "learning_rate": 9.374479790131106e-06, "loss": 0.0045, "step": 44890 }, { "epoch": 0.7346805203305244, "grad_norm": 0.12750157713890076, "learning_rate": 9.374018675880063e-06, "loss": 0.0028, "step": 44900 }, { "epoch": 0.7348441462816002, "grad_norm": 0.17026767134666443, "learning_rate": 9.373557403081357e-06, "loss": 0.003, "step": 44910 }, { "epoch": 0.7350077722326761, "grad_norm": 0.26842716336250305, "learning_rate": 9.373095971751706e-06, "loss": 0.0044, "step": 44920 }, { "epoch": 0.735171398183752, "grad_norm": 0.08554580062627792, "learning_rate": 9.372634381907837e-06, "loss": 0.0047, "step": 44930 }, { "epoch": 0.7353350241348278, "grad_norm": 0.284843385219574, "learning_rate": 9.37217263356648e-06, "loss": 0.0031, "step": 44940 }, { "epoch": 0.7354986500859036, "grad_norm": 0.04252878949046135, "learning_rate": 9.371710726744375e-06, "loss": 0.0025, "step": 44950 }, { "epoch": 0.7356622760369794, "grad_norm": 0.06299583613872528, "learning_rate": 9.371248661458263e-06, "loss": 0.0028, "step": 44960 }, { "epoch": 0.7358259019880553, "grad_norm": 0.1219446137547493, "learning_rate": 9.370786437724892e-06, "loss": 0.0041, "step": 44970 }, { "epoch": 0.7359895279391312, "grad_norm": 0.19243265688419342, "learning_rate": 9.370324055561017e-06, "loss": 0.0037, "step": 44980 }, { "epoch": 0.736153153890207, "grad_norm": 0.3377157747745514, "learning_rate": 9.3698615149834e-06, "loss": 0.0021, "step": 44990 }, { "epoch": 0.7363167798412829, "grad_norm": 0.10912178456783295, "learning_rate": 9.369398816008805e-06, "loss": 0.0029, "step": 45000 }, { "epoch": 0.7364804057923586, "grad_norm": 0.06433393806219101, "learning_rate": 9.368935958654006e-06, "loss": 0.0055, "step": 45010 }, { "epoch": 0.7366440317434345, "grad_norm": 0.20445063710212708, "learning_rate": 9.368472942935778e-06, "loss": 0.0039, "step": 45020 }, { "epoch": 0.7368076576945104, "grad_norm": 0.14766888320446014, "learning_rate": 9.368009768870906e-06, "loss": 0.0036, "step": 45030 }, { "epoch": 0.7369712836455862, "grad_norm": 0.07786289602518082, "learning_rate": 9.367546436476176e-06, "loss": 0.003, "step": 45040 }, { "epoch": 0.7371349095966621, "grad_norm": 0.058195970952510834, "learning_rate": 9.367082945768386e-06, "loss": 0.0035, "step": 45050 }, { "epoch": 0.7372985355477378, "grad_norm": 0.16606906056404114, "learning_rate": 9.366619296764335e-06, "loss": 0.0027, "step": 45060 }, { "epoch": 0.7374621614988137, "grad_norm": 0.4443644881248474, "learning_rate": 9.36615548948083e-06, "loss": 0.0033, "step": 45070 }, { "epoch": 0.7376257874498896, "grad_norm": 0.1841696947813034, "learning_rate": 9.365691523934682e-06, "loss": 0.005, "step": 45080 }, { "epoch": 0.7377894134009654, "grad_norm": 0.24207483232021332, "learning_rate": 9.36522740014271e-06, "loss": 0.0044, "step": 45090 }, { "epoch": 0.7379530393520413, "grad_norm": 0.13808202743530273, "learning_rate": 9.364763118121734e-06, "loss": 0.0041, "step": 45100 }, { "epoch": 0.738116665303117, "grad_norm": 0.23255422711372375, "learning_rate": 9.364298677888586e-06, "loss": 0.0028, "step": 45110 }, { "epoch": 0.7382802912541929, "grad_norm": 0.1403762549161911, "learning_rate": 9.363834079460101e-06, "loss": 0.004, "step": 45120 }, { "epoch": 0.7384439172052688, "grad_norm": 0.38627293705940247, "learning_rate": 9.36336932285312e-06, "loss": 0.0049, "step": 45130 }, { "epoch": 0.7386075431563446, "grad_norm": 0.31459856033325195, "learning_rate": 9.362904408084486e-06, "loss": 0.0029, "step": 45140 }, { "epoch": 0.7387711691074205, "grad_norm": 0.21671229600906372, "learning_rate": 9.362439335171053e-06, "loss": 0.0024, "step": 45150 }, { "epoch": 0.7389347950584962, "grad_norm": 0.1458991914987564, "learning_rate": 9.36197410412968e-06, "loss": 0.0017, "step": 45160 }, { "epoch": 0.7390984210095721, "grad_norm": 0.11365485191345215, "learning_rate": 9.36150871497723e-06, "loss": 0.0035, "step": 45170 }, { "epoch": 0.739262046960648, "grad_norm": 0.30745479464530945, "learning_rate": 9.36104316773057e-06, "loss": 0.0037, "step": 45180 }, { "epoch": 0.7394256729117238, "grad_norm": 0.30352458357810974, "learning_rate": 9.360577462406579e-06, "loss": 0.0031, "step": 45190 }, { "epoch": 0.7395892988627997, "grad_norm": 0.11075519770383835, "learning_rate": 9.360111599022133e-06, "loss": 0.0043, "step": 45200 }, { "epoch": 0.7397529248138754, "grad_norm": 0.22227925062179565, "learning_rate": 9.359645577594122e-06, "loss": 0.0067, "step": 45210 }, { "epoch": 0.7399165507649513, "grad_norm": 0.4556516110897064, "learning_rate": 9.359179398139437e-06, "loss": 0.002, "step": 45220 }, { "epoch": 0.7400801767160272, "grad_norm": 0.10171130299568176, "learning_rate": 9.358713060674975e-06, "loss": 0.0029, "step": 45230 }, { "epoch": 0.740243802667103, "grad_norm": 0.1968950778245926, "learning_rate": 9.35824656521764e-06, "loss": 0.0032, "step": 45240 }, { "epoch": 0.7404074286181789, "grad_norm": 0.28342896699905396, "learning_rate": 9.357779911784343e-06, "loss": 0.0032, "step": 45250 }, { "epoch": 0.7405710545692546, "grad_norm": 0.15065467357635498, "learning_rate": 9.357313100391998e-06, "loss": 0.0031, "step": 45260 }, { "epoch": 0.7407346805203305, "grad_norm": 0.37738969922065735, "learning_rate": 9.356846131057526e-06, "loss": 0.0039, "step": 45270 }, { "epoch": 0.7408983064714064, "grad_norm": 0.06322255730628967, "learning_rate": 9.356379003797853e-06, "loss": 0.0018, "step": 45280 }, { "epoch": 0.7410619324224822, "grad_norm": 0.14703792333602905, "learning_rate": 9.35591171862991e-06, "loss": 0.0039, "step": 45290 }, { "epoch": 0.7412255583735581, "grad_norm": 0.08885933458805084, "learning_rate": 9.355444275570637e-06, "loss": 0.0021, "step": 45300 }, { "epoch": 0.7413891843246339, "grad_norm": 0.32444217801094055, "learning_rate": 9.354976674636977e-06, "loss": 0.0063, "step": 45310 }, { "epoch": 0.7415528102757097, "grad_norm": 0.35946983098983765, "learning_rate": 9.35450891584588e-06, "loss": 0.0047, "step": 45320 }, { "epoch": 0.7417164362267855, "grad_norm": 0.419741690158844, "learning_rate": 9.3540409992143e-06, "loss": 0.0053, "step": 45330 }, { "epoch": 0.7418800621778614, "grad_norm": 0.20957323908805847, "learning_rate": 9.3535729247592e-06, "loss": 0.003, "step": 45340 }, { "epoch": 0.7420436881289373, "grad_norm": 0.02961203269660473, "learning_rate": 9.353104692497544e-06, "loss": 0.0031, "step": 45350 }, { "epoch": 0.7422073140800131, "grad_norm": 0.03943047672510147, "learning_rate": 9.352636302446305e-06, "loss": 0.0027, "step": 45360 }, { "epoch": 0.7423709400310889, "grad_norm": 0.12195006757974625, "learning_rate": 9.35216775462246e-06, "loss": 0.0052, "step": 45370 }, { "epoch": 0.7425345659821647, "grad_norm": 0.24874858558177948, "learning_rate": 9.351699049042995e-06, "loss": 0.0036, "step": 45380 }, { "epoch": 0.7426981919332406, "grad_norm": 0.1881808340549469, "learning_rate": 9.3512301857249e-06, "loss": 0.0032, "step": 45390 }, { "epoch": 0.7428618178843165, "grad_norm": 0.360744446516037, "learning_rate": 9.350761164685167e-06, "loss": 0.0044, "step": 45400 }, { "epoch": 0.7430254438353923, "grad_norm": 0.1845385730266571, "learning_rate": 9.350291985940799e-06, "loss": 0.0019, "step": 45410 }, { "epoch": 0.7431890697864681, "grad_norm": 0.0915493592619896, "learning_rate": 9.349822649508801e-06, "loss": 0.0023, "step": 45420 }, { "epoch": 0.7433526957375439, "grad_norm": 0.42872482538223267, "learning_rate": 9.349353155406189e-06, "loss": 0.0049, "step": 45430 }, { "epoch": 0.7435163216886198, "grad_norm": 0.3561018705368042, "learning_rate": 9.348883503649976e-06, "loss": 0.0036, "step": 45440 }, { "epoch": 0.7436799476396957, "grad_norm": 0.0710846334695816, "learning_rate": 9.34841369425719e-06, "loss": 0.0017, "step": 45450 }, { "epoch": 0.7438435735907715, "grad_norm": 0.15055815875530243, "learning_rate": 9.347943727244858e-06, "loss": 0.0023, "step": 45460 }, { "epoch": 0.7440071995418474, "grad_norm": 0.27672040462493896, "learning_rate": 9.347473602630018e-06, "loss": 0.0016, "step": 45470 }, { "epoch": 0.7441708254929231, "grad_norm": 0.259103000164032, "learning_rate": 9.347003320429706e-06, "loss": 0.0035, "step": 45480 }, { "epoch": 0.744334451443999, "grad_norm": 0.2535003125667572, "learning_rate": 9.346532880660972e-06, "loss": 0.0034, "step": 45490 }, { "epoch": 0.7444980773950749, "grad_norm": 0.3494164049625397, "learning_rate": 9.346062283340869e-06, "loss": 0.0043, "step": 45500 }, { "epoch": 0.7446617033461507, "grad_norm": 0.03437132015824318, "learning_rate": 9.345591528486452e-06, "loss": 0.0025, "step": 45510 }, { "epoch": 0.7448253292972266, "grad_norm": 0.15139859914779663, "learning_rate": 9.345120616114787e-06, "loss": 0.0024, "step": 45520 }, { "epoch": 0.7449889552483023, "grad_norm": 0.19978711009025574, "learning_rate": 9.344649546242943e-06, "loss": 0.0021, "step": 45530 }, { "epoch": 0.7451525811993782, "grad_norm": 0.0410379134118557, "learning_rate": 9.344178318887995e-06, "loss": 0.0015, "step": 45540 }, { "epoch": 0.7453162071504541, "grad_norm": 0.051625676453113556, "learning_rate": 9.343706934067026e-06, "loss": 0.0028, "step": 45550 }, { "epoch": 0.7454798331015299, "grad_norm": 0.16650187969207764, "learning_rate": 9.343235391797117e-06, "loss": 0.0027, "step": 45560 }, { "epoch": 0.7456434590526058, "grad_norm": 0.08861218392848969, "learning_rate": 9.342763692095366e-06, "loss": 0.0043, "step": 45570 }, { "epoch": 0.7458070850036815, "grad_norm": 0.13995987176895142, "learning_rate": 9.342291834978868e-06, "loss": 0.0032, "step": 45580 }, { "epoch": 0.7459707109547574, "grad_norm": 0.01826346106827259, "learning_rate": 9.341819820464727e-06, "loss": 0.0056, "step": 45590 }, { "epoch": 0.7461343369058333, "grad_norm": 0.2519683241844177, "learning_rate": 9.341347648570053e-06, "loss": 0.002, "step": 45600 }, { "epoch": 0.7462979628569091, "grad_norm": 0.07952021807432175, "learning_rate": 9.340875319311962e-06, "loss": 0.0036, "step": 45610 }, { "epoch": 0.746461588807985, "grad_norm": 0.3329220414161682, "learning_rate": 9.340402832707572e-06, "loss": 0.0034, "step": 45620 }, { "epoch": 0.7466252147590607, "grad_norm": 0.25139468908309937, "learning_rate": 9.339930188774012e-06, "loss": 0.0025, "step": 45630 }, { "epoch": 0.7467888407101366, "grad_norm": 0.05135999247431755, "learning_rate": 9.339457387528412e-06, "loss": 0.0043, "step": 45640 }, { "epoch": 0.7469524666612125, "grad_norm": 0.4018520712852478, "learning_rate": 9.338984428987915e-06, "loss": 0.0051, "step": 45650 }, { "epoch": 0.7471160926122883, "grad_norm": 0.11271344125270844, "learning_rate": 9.338511313169657e-06, "loss": 0.0024, "step": 45660 }, { "epoch": 0.7472797185633642, "grad_norm": 0.2551169991493225, "learning_rate": 9.338038040090793e-06, "loss": 0.002, "step": 45670 }, { "epoch": 0.74744334451444, "grad_norm": 0.2805965542793274, "learning_rate": 9.337564609768477e-06, "loss": 0.003, "step": 45680 }, { "epoch": 0.7476069704655158, "grad_norm": 0.04204365983605385, "learning_rate": 9.337091022219866e-06, "loss": 0.0018, "step": 45690 }, { "epoch": 0.7477705964165917, "grad_norm": 0.09109525382518768, "learning_rate": 9.336617277462131e-06, "loss": 0.0017, "step": 45700 }, { "epoch": 0.7479342223676675, "grad_norm": 0.33344176411628723, "learning_rate": 9.336143375512445e-06, "loss": 0.0039, "step": 45710 }, { "epoch": 0.7480978483187434, "grad_norm": 0.14615987241268158, "learning_rate": 9.335669316387981e-06, "loss": 0.0036, "step": 45720 }, { "epoch": 0.7482614742698191, "grad_norm": 0.2519879639148712, "learning_rate": 9.335195100105924e-06, "loss": 0.0052, "step": 45730 }, { "epoch": 0.748425100220895, "grad_norm": 0.18035291135311127, "learning_rate": 9.334720726683466e-06, "loss": 0.0039, "step": 45740 }, { "epoch": 0.7485887261719709, "grad_norm": 0.3346319794654846, "learning_rate": 9.334246196137798e-06, "loss": 0.0044, "step": 45750 }, { "epoch": 0.7487523521230467, "grad_norm": 0.230926513671875, "learning_rate": 9.333771508486122e-06, "loss": 0.0029, "step": 45760 }, { "epoch": 0.7489159780741226, "grad_norm": 0.09823073446750641, "learning_rate": 9.333296663745648e-06, "loss": 0.0034, "step": 45770 }, { "epoch": 0.7490796040251984, "grad_norm": 0.11959690600633621, "learning_rate": 9.332821661933582e-06, "loss": 0.0023, "step": 45780 }, { "epoch": 0.7492432299762742, "grad_norm": 0.13075628876686096, "learning_rate": 9.332346503067146e-06, "loss": 0.0021, "step": 45790 }, { "epoch": 0.7494068559273501, "grad_norm": 0.12976419925689697, "learning_rate": 9.33187118716356e-06, "loss": 0.0043, "step": 45800 }, { "epoch": 0.7495704818784259, "grad_norm": 0.22187969088554382, "learning_rate": 9.331395714240056e-06, "loss": 0.0032, "step": 45810 }, { "epoch": 0.7497341078295018, "grad_norm": 0.12239314615726471, "learning_rate": 9.330920084313868e-06, "loss": 0.0016, "step": 45820 }, { "epoch": 0.7498977337805776, "grad_norm": 0.06627237051725388, "learning_rate": 9.330444297402234e-06, "loss": 0.0014, "step": 45830 }, { "epoch": 0.7500613597316534, "grad_norm": 0.15680819749832153, "learning_rate": 9.329968353522405e-06, "loss": 0.0042, "step": 45840 }, { "epoch": 0.7502249856827293, "grad_norm": 0.15960851311683655, "learning_rate": 9.329492252691628e-06, "loss": 0.0027, "step": 45850 }, { "epoch": 0.7503886116338051, "grad_norm": 0.1933201253414154, "learning_rate": 9.329015994927164e-06, "loss": 0.0024, "step": 45860 }, { "epoch": 0.750552237584881, "grad_norm": 0.12402519583702087, "learning_rate": 9.328539580246274e-06, "loss": 0.0033, "step": 45870 }, { "epoch": 0.7507158635359568, "grad_norm": 0.2637035846710205, "learning_rate": 9.328063008666226e-06, "loss": 0.005, "step": 45880 }, { "epoch": 0.7508794894870326, "grad_norm": 0.0336226224899292, "learning_rate": 9.327586280204298e-06, "loss": 0.0028, "step": 45890 }, { "epoch": 0.7510431154381085, "grad_norm": 0.18882043659687042, "learning_rate": 9.327109394877768e-06, "loss": 0.0041, "step": 45900 }, { "epoch": 0.7512067413891843, "grad_norm": 0.15450966358184814, "learning_rate": 9.326632352703922e-06, "loss": 0.0073, "step": 45910 }, { "epoch": 0.7513703673402602, "grad_norm": 0.5061607360839844, "learning_rate": 9.326155153700053e-06, "loss": 0.0077, "step": 45920 }, { "epoch": 0.751533993291336, "grad_norm": 0.22285473346710205, "learning_rate": 9.325677797883457e-06, "loss": 0.0034, "step": 45930 }, { "epoch": 0.7516976192424119, "grad_norm": 0.1413329839706421, "learning_rate": 9.325200285271437e-06, "loss": 0.0042, "step": 45940 }, { "epoch": 0.7518612451934877, "grad_norm": 0.2556937336921692, "learning_rate": 9.3247226158813e-06, "loss": 0.0031, "step": 45950 }, { "epoch": 0.7520248711445635, "grad_norm": 0.14354334771633148, "learning_rate": 9.324244789730365e-06, "loss": 0.0051, "step": 45960 }, { "epoch": 0.7521884970956394, "grad_norm": 0.1389549970626831, "learning_rate": 9.32376680683595e-06, "loss": 0.0026, "step": 45970 }, { "epoch": 0.7523521230467152, "grad_norm": 0.12958888709545135, "learning_rate": 9.323288667215377e-06, "loss": 0.0036, "step": 45980 }, { "epoch": 0.7525157489977911, "grad_norm": 0.13619212806224823, "learning_rate": 9.322810370885984e-06, "loss": 0.0023, "step": 45990 }, { "epoch": 0.7526793749488669, "grad_norm": 0.07780390977859497, "learning_rate": 9.322331917865104e-06, "loss": 0.0023, "step": 46000 }, { "epoch": 0.7528430008999427, "grad_norm": 0.2370491474866867, "learning_rate": 9.32185330817008e-06, "loss": 0.0032, "step": 46010 }, { "epoch": 0.7530066268510186, "grad_norm": 0.16289196908473969, "learning_rate": 9.32137454181826e-06, "loss": 0.0028, "step": 46020 }, { "epoch": 0.7531702528020944, "grad_norm": 0.18555554747581482, "learning_rate": 9.320895618827e-06, "loss": 0.0033, "step": 46030 }, { "epoch": 0.7533338787531703, "grad_norm": 0.04601273313164711, "learning_rate": 9.320416539213656e-06, "loss": 0.0024, "step": 46040 }, { "epoch": 0.7534975047042461, "grad_norm": 0.15345878899097443, "learning_rate": 9.3199373029956e-06, "loss": 0.0046, "step": 46050 }, { "epoch": 0.7536611306553219, "grad_norm": 0.15434256196022034, "learning_rate": 9.319457910190197e-06, "loss": 0.0041, "step": 46060 }, { "epoch": 0.7538247566063978, "grad_norm": 0.07933124154806137, "learning_rate": 9.318978360814827e-06, "loss": 0.0031, "step": 46070 }, { "epoch": 0.7539883825574736, "grad_norm": 0.21763266623020172, "learning_rate": 9.318498654886874e-06, "loss": 0.0035, "step": 46080 }, { "epoch": 0.7541520085085495, "grad_norm": 0.07607048004865646, "learning_rate": 9.318018792423722e-06, "loss": 0.0022, "step": 46090 }, { "epoch": 0.7543156344596254, "grad_norm": 0.06933566927909851, "learning_rate": 9.317538773442766e-06, "loss": 0.0031, "step": 46100 }, { "epoch": 0.7544792604107011, "grad_norm": 0.13332277536392212, "learning_rate": 9.317058597961408e-06, "loss": 0.0022, "step": 46110 }, { "epoch": 0.754642886361777, "grad_norm": 0.1752205640077591, "learning_rate": 9.316578265997051e-06, "loss": 0.0049, "step": 46120 }, { "epoch": 0.7548065123128528, "grad_norm": 0.16984455287456512, "learning_rate": 9.316097777567105e-06, "loss": 0.0031, "step": 46130 }, { "epoch": 0.7549701382639287, "grad_norm": 0.19780689477920532, "learning_rate": 9.315617132688991e-06, "loss": 0.0031, "step": 46140 }, { "epoch": 0.7551337642150046, "grad_norm": 0.07790403068065643, "learning_rate": 9.315136331380126e-06, "loss": 0.003, "step": 46150 }, { "epoch": 0.7552973901660803, "grad_norm": 0.25169387459754944, "learning_rate": 9.314655373657942e-06, "loss": 0.0024, "step": 46160 }, { "epoch": 0.7554610161171562, "grad_norm": 0.15293337404727936, "learning_rate": 9.31417425953987e-06, "loss": 0.0027, "step": 46170 }, { "epoch": 0.755624642068232, "grad_norm": 0.09488566219806671, "learning_rate": 9.313692989043349e-06, "loss": 0.0024, "step": 46180 }, { "epoch": 0.7557882680193079, "grad_norm": 0.2919982969760895, "learning_rate": 9.313211562185826e-06, "loss": 0.0017, "step": 46190 }, { "epoch": 0.7559518939703836, "grad_norm": 0.05947905406355858, "learning_rate": 9.312729978984749e-06, "loss": 0.0035, "step": 46200 }, { "epoch": 0.7561155199214595, "grad_norm": 0.14913895726203918, "learning_rate": 9.312248239457578e-06, "loss": 0.0024, "step": 46210 }, { "epoch": 0.7562791458725354, "grad_norm": 0.10114043951034546, "learning_rate": 9.31176634362177e-06, "loss": 0.0021, "step": 46220 }, { "epoch": 0.7564427718236112, "grad_norm": 0.09346242249011993, "learning_rate": 9.311284291494796e-06, "loss": 0.0019, "step": 46230 }, { "epoch": 0.7566063977746871, "grad_norm": 0.07998554408550262, "learning_rate": 9.310802083094128e-06, "loss": 0.0035, "step": 46240 }, { "epoch": 0.7567700237257629, "grad_norm": 0.2284916639328003, "learning_rate": 9.310319718437247e-06, "loss": 0.004, "step": 46250 }, { "epoch": 0.7569336496768387, "grad_norm": 0.17484083771705627, "learning_rate": 9.309837197541634e-06, "loss": 0.0036, "step": 46260 }, { "epoch": 0.7570972756279146, "grad_norm": 0.21179313957691193, "learning_rate": 9.30935452042478e-06, "loss": 0.0071, "step": 46270 }, { "epoch": 0.7572609015789904, "grad_norm": 0.10206786543130875, "learning_rate": 9.308871687104184e-06, "loss": 0.0029, "step": 46280 }, { "epoch": 0.7574245275300663, "grad_norm": 0.6523825526237488, "learning_rate": 9.308388697597346e-06, "loss": 0.0037, "step": 46290 }, { "epoch": 0.7575881534811421, "grad_norm": 0.08849554508924484, "learning_rate": 9.307905551921769e-06, "loss": 0.0036, "step": 46300 }, { "epoch": 0.7577517794322179, "grad_norm": 0.15587395429611206, "learning_rate": 9.307422250094974e-06, "loss": 0.0035, "step": 46310 }, { "epoch": 0.7579154053832938, "grad_norm": 0.19340258836746216, "learning_rate": 9.306938792134471e-06, "loss": 0.0059, "step": 46320 }, { "epoch": 0.7580790313343696, "grad_norm": 0.32111769914627075, "learning_rate": 9.306455178057791e-06, "loss": 0.0025, "step": 46330 }, { "epoch": 0.7582426572854455, "grad_norm": 0.25783252716064453, "learning_rate": 9.305971407882462e-06, "loss": 0.0035, "step": 46340 }, { "epoch": 0.7584062832365213, "grad_norm": 0.1390017569065094, "learning_rate": 9.305487481626017e-06, "loss": 0.0036, "step": 46350 }, { "epoch": 0.7585699091875971, "grad_norm": 0.27731814980506897, "learning_rate": 9.305003399305997e-06, "loss": 0.0032, "step": 46360 }, { "epoch": 0.758733535138673, "grad_norm": 0.0679384097456932, "learning_rate": 9.304519160939954e-06, "loss": 0.0024, "step": 46370 }, { "epoch": 0.7588971610897488, "grad_norm": 0.3053310513496399, "learning_rate": 9.304034766545433e-06, "loss": 0.0025, "step": 46380 }, { "epoch": 0.7590607870408247, "grad_norm": 0.28973039984703064, "learning_rate": 9.303550216140001e-06, "loss": 0.0021, "step": 46390 }, { "epoch": 0.7592244129919005, "grad_norm": 0.10076013952493668, "learning_rate": 9.303065509741214e-06, "loss": 0.0037, "step": 46400 }, { "epoch": 0.7593880389429764, "grad_norm": 0.13185669481754303, "learning_rate": 9.302580647366643e-06, "loss": 0.0042, "step": 46410 }, { "epoch": 0.7595516648940522, "grad_norm": 0.06140631064772606, "learning_rate": 9.302095629033866e-06, "loss": 0.0035, "step": 46420 }, { "epoch": 0.759715290845128, "grad_norm": 0.30516910552978516, "learning_rate": 9.301610454760463e-06, "loss": 0.0048, "step": 46430 }, { "epoch": 0.7598789167962039, "grad_norm": 0.11276406049728394, "learning_rate": 9.301125124564021e-06, "loss": 0.0028, "step": 46440 }, { "epoch": 0.7600425427472797, "grad_norm": 0.09542270749807358, "learning_rate": 9.300639638462128e-06, "loss": 0.0058, "step": 46450 }, { "epoch": 0.7602061686983556, "grad_norm": 0.08172499388456345, "learning_rate": 9.300153996472386e-06, "loss": 0.0033, "step": 46460 }, { "epoch": 0.7603697946494314, "grad_norm": 0.08125490695238113, "learning_rate": 9.299668198612396e-06, "loss": 0.0028, "step": 46470 }, { "epoch": 0.7605334206005072, "grad_norm": 0.4125836193561554, "learning_rate": 9.299182244899768e-06, "loss": 0.0052, "step": 46480 }, { "epoch": 0.7606970465515831, "grad_norm": 0.16170723736286163, "learning_rate": 9.298696135352116e-06, "loss": 0.0036, "step": 46490 }, { "epoch": 0.7608606725026589, "grad_norm": 0.1094004362821579, "learning_rate": 9.298209869987062e-06, "loss": 0.0026, "step": 46500 }, { "epoch": 0.7610242984537348, "grad_norm": 0.22159212827682495, "learning_rate": 9.297723448822229e-06, "loss": 0.0031, "step": 46510 }, { "epoch": 0.7611879244048106, "grad_norm": 0.12661929428577423, "learning_rate": 9.297236871875253e-06, "loss": 0.0026, "step": 46520 }, { "epoch": 0.7613515503558864, "grad_norm": 0.3631120026111603, "learning_rate": 9.296750139163766e-06, "loss": 0.0048, "step": 46530 }, { "epoch": 0.7615151763069623, "grad_norm": 0.13580480217933655, "learning_rate": 9.296263250705415e-06, "loss": 0.003, "step": 46540 }, { "epoch": 0.7616788022580381, "grad_norm": 0.09591087698936462, "learning_rate": 9.295776206517847e-06, "loss": 0.0021, "step": 46550 }, { "epoch": 0.761842428209114, "grad_norm": 0.14948637783527374, "learning_rate": 9.295289006618713e-06, "loss": 0.0031, "step": 46560 }, { "epoch": 0.7620060541601898, "grad_norm": 0.45437610149383545, "learning_rate": 9.294801651025678e-06, "loss": 0.0038, "step": 46570 }, { "epoch": 0.7621696801112656, "grad_norm": 0.07336080819368362, "learning_rate": 9.294314139756406e-06, "loss": 0.0024, "step": 46580 }, { "epoch": 0.7623333060623415, "grad_norm": 0.03860901668667793, "learning_rate": 9.293826472828567e-06, "loss": 0.004, "step": 46590 }, { "epoch": 0.7624969320134173, "grad_norm": 0.0506257563829422, "learning_rate": 9.293338650259838e-06, "loss": 0.0027, "step": 46600 }, { "epoch": 0.7626605579644932, "grad_norm": 0.3378849923610687, "learning_rate": 9.292850672067904e-06, "loss": 0.0064, "step": 46610 }, { "epoch": 0.762824183915569, "grad_norm": 0.1885622888803482, "learning_rate": 9.292362538270448e-06, "loss": 0.0054, "step": 46620 }, { "epoch": 0.7629878098666448, "grad_norm": 0.12624213099479675, "learning_rate": 9.291874248885167e-06, "loss": 0.0033, "step": 46630 }, { "epoch": 0.7631514358177207, "grad_norm": 0.2975305914878845, "learning_rate": 9.29138580392976e-06, "loss": 0.0035, "step": 46640 }, { "epoch": 0.7633150617687965, "grad_norm": 0.29391130805015564, "learning_rate": 9.290897203421931e-06, "loss": 0.0027, "step": 46650 }, { "epoch": 0.7634786877198724, "grad_norm": 0.04558788239955902, "learning_rate": 9.290408447379393e-06, "loss": 0.0038, "step": 46660 }, { "epoch": 0.7636423136709483, "grad_norm": 0.3681044280529022, "learning_rate": 9.28991953581986e-06, "loss": 0.0021, "step": 46670 }, { "epoch": 0.763805939622024, "grad_norm": 0.21886494755744934, "learning_rate": 9.289430468761054e-06, "loss": 0.0035, "step": 46680 }, { "epoch": 0.7639695655730999, "grad_norm": 0.1452564299106598, "learning_rate": 9.288941246220701e-06, "loss": 0.0025, "step": 46690 }, { "epoch": 0.7641331915241757, "grad_norm": 0.28494179248809814, "learning_rate": 9.288451868216538e-06, "loss": 0.0035, "step": 46700 }, { "epoch": 0.7642968174752516, "grad_norm": 0.22606857120990753, "learning_rate": 9.287962334766302e-06, "loss": 0.0034, "step": 46710 }, { "epoch": 0.7644604434263275, "grad_norm": 0.3624116778373718, "learning_rate": 9.287472645887737e-06, "loss": 0.0031, "step": 46720 }, { "epoch": 0.7646240693774032, "grad_norm": 0.13198938965797424, "learning_rate": 9.286982801598595e-06, "loss": 0.0024, "step": 46730 }, { "epoch": 0.7647876953284791, "grad_norm": 0.3184228837490082, "learning_rate": 9.286492801916626e-06, "loss": 0.0043, "step": 46740 }, { "epoch": 0.7649513212795549, "grad_norm": 0.06921160966157913, "learning_rate": 9.286002646859598e-06, "loss": 0.0031, "step": 46750 }, { "epoch": 0.7651149472306308, "grad_norm": 0.18555854260921478, "learning_rate": 9.285512336445275e-06, "loss": 0.0035, "step": 46760 }, { "epoch": 0.7652785731817067, "grad_norm": 0.1534012258052826, "learning_rate": 9.285021870691429e-06, "loss": 0.0038, "step": 46770 }, { "epoch": 0.7654421991327824, "grad_norm": 0.3063267469406128, "learning_rate": 9.284531249615838e-06, "loss": 0.0046, "step": 46780 }, { "epoch": 0.7656058250838583, "grad_norm": 0.6759603023529053, "learning_rate": 9.284040473236288e-06, "loss": 0.0048, "step": 46790 }, { "epoch": 0.7657694510349341, "grad_norm": 0.321036159992218, "learning_rate": 9.283549541570568e-06, "loss": 0.0028, "step": 46800 }, { "epoch": 0.76593307698601, "grad_norm": 0.20663334429264069, "learning_rate": 9.28305845463647e-06, "loss": 0.0025, "step": 46810 }, { "epoch": 0.7660967029370859, "grad_norm": 0.11464294046163559, "learning_rate": 9.2825672124518e-06, "loss": 0.0046, "step": 46820 }, { "epoch": 0.7662603288881616, "grad_norm": 0.18782205879688263, "learning_rate": 9.28207581503436e-06, "loss": 0.0031, "step": 46830 }, { "epoch": 0.7664239548392375, "grad_norm": 0.03613877296447754, "learning_rate": 9.281584262401963e-06, "loss": 0.0066, "step": 46840 }, { "epoch": 0.7665875807903133, "grad_norm": 0.09105803817510605, "learning_rate": 9.281092554572428e-06, "loss": 0.0022, "step": 46850 }, { "epoch": 0.7667512067413892, "grad_norm": 0.2554304003715515, "learning_rate": 9.280600691563577e-06, "loss": 0.0038, "step": 46860 }, { "epoch": 0.7669148326924651, "grad_norm": 0.2795160710811615, "learning_rate": 9.28010867339324e-06, "loss": 0.0037, "step": 46870 }, { "epoch": 0.7670784586435409, "grad_norm": 0.15072228014469147, "learning_rate": 9.279616500079249e-06, "loss": 0.0032, "step": 46880 }, { "epoch": 0.7672420845946167, "grad_norm": 0.30160611867904663, "learning_rate": 9.279124171639446e-06, "loss": 0.0032, "step": 46890 }, { "epoch": 0.7674057105456925, "grad_norm": 0.49359646439552307, "learning_rate": 9.278631688091675e-06, "loss": 0.0042, "step": 46900 }, { "epoch": 0.7675693364967684, "grad_norm": 0.06053291633725166, "learning_rate": 9.278139049453792e-06, "loss": 0.003, "step": 46910 }, { "epoch": 0.7677329624478443, "grad_norm": 0.17476892471313477, "learning_rate": 9.277646255743648e-06, "loss": 0.0031, "step": 46920 }, { "epoch": 0.76789658839892, "grad_norm": 0.1530722677707672, "learning_rate": 9.27715330697911e-06, "loss": 0.0036, "step": 46930 }, { "epoch": 0.7680602143499959, "grad_norm": 0.16645754873752594, "learning_rate": 9.276660203178043e-06, "loss": 0.0048, "step": 46940 }, { "epoch": 0.7682238403010717, "grad_norm": 0.12035121023654938, "learning_rate": 9.276166944358322e-06, "loss": 0.0043, "step": 46950 }, { "epoch": 0.7683874662521476, "grad_norm": 0.27420181035995483, "learning_rate": 9.275673530537827e-06, "loss": 0.0049, "step": 46960 }, { "epoch": 0.7685510922032235, "grad_norm": 0.22867822647094727, "learning_rate": 9.275179961734444e-06, "loss": 0.0024, "step": 46970 }, { "epoch": 0.7687147181542993, "grad_norm": 0.21577972173690796, "learning_rate": 9.274686237966063e-06, "loss": 0.0029, "step": 46980 }, { "epoch": 0.7688783441053751, "grad_norm": 0.1786114126443863, "learning_rate": 9.27419235925058e-06, "loss": 0.0024, "step": 46990 }, { "epoch": 0.7690419700564509, "grad_norm": 0.05456943437457085, "learning_rate": 9.273698325605894e-06, "loss": 0.0042, "step": 47000 }, { "epoch": 0.7692055960075268, "grad_norm": 0.46462979912757874, "learning_rate": 9.273204137049916e-06, "loss": 0.0026, "step": 47010 }, { "epoch": 0.7693692219586027, "grad_norm": 0.08982861787080765, "learning_rate": 9.27270979360056e-06, "loss": 0.0023, "step": 47020 }, { "epoch": 0.7695328479096785, "grad_norm": 0.17524635791778564, "learning_rate": 9.272215295275743e-06, "loss": 0.0043, "step": 47030 }, { "epoch": 0.7696964738607543, "grad_norm": 0.1675378978252411, "learning_rate": 9.27172064209339e-06, "loss": 0.0035, "step": 47040 }, { "epoch": 0.7698600998118301, "grad_norm": 0.11323246359825134, "learning_rate": 9.271225834071428e-06, "loss": 0.0071, "step": 47050 }, { "epoch": 0.770023725762906, "grad_norm": 0.09345797449350357, "learning_rate": 9.270730871227796e-06, "loss": 0.0022, "step": 47060 }, { "epoch": 0.7701873517139818, "grad_norm": 0.07348580658435822, "learning_rate": 9.270235753580436e-06, "loss": 0.0043, "step": 47070 }, { "epoch": 0.7703509776650577, "grad_norm": 0.1204700842499733, "learning_rate": 9.26974048114729e-06, "loss": 0.0023, "step": 47080 }, { "epoch": 0.7705146036161336, "grad_norm": 0.1438237875699997, "learning_rate": 9.269245053946318e-06, "loss": 0.0035, "step": 47090 }, { "epoch": 0.7706782295672093, "grad_norm": 0.1774197667837143, "learning_rate": 9.268749471995472e-06, "loss": 0.0038, "step": 47100 }, { "epoch": 0.7708418555182852, "grad_norm": 0.14052198827266693, "learning_rate": 9.268253735312716e-06, "loss": 0.003, "step": 47110 }, { "epoch": 0.771005481469361, "grad_norm": 0.09925854206085205, "learning_rate": 9.26775784391602e-06, "loss": 0.0028, "step": 47120 }, { "epoch": 0.7711691074204369, "grad_norm": 0.03176082670688629, "learning_rate": 9.267261797823362e-06, "loss": 0.0046, "step": 47130 }, { "epoch": 0.7713327333715128, "grad_norm": 0.2940159738063812, "learning_rate": 9.266765597052718e-06, "loss": 0.0033, "step": 47140 }, { "epoch": 0.7714963593225885, "grad_norm": 0.07327362149953842, "learning_rate": 9.266269241622077e-06, "loss": 0.0034, "step": 47150 }, { "epoch": 0.7716599852736644, "grad_norm": 0.1620359569787979, "learning_rate": 9.26577273154943e-06, "loss": 0.0025, "step": 47160 }, { "epoch": 0.7718236112247402, "grad_norm": 0.1572045236825943, "learning_rate": 9.265276066852774e-06, "loss": 0.0023, "step": 47170 }, { "epoch": 0.7719872371758161, "grad_norm": 1.5911974906921387, "learning_rate": 9.264779247550111e-06, "loss": 0.0027, "step": 47180 }, { "epoch": 0.772150863126892, "grad_norm": 0.22655841708183289, "learning_rate": 9.26428227365945e-06, "loss": 0.0041, "step": 47190 }, { "epoch": 0.7723144890779677, "grad_norm": 0.16720829904079437, "learning_rate": 9.263785145198806e-06, "loss": 0.0044, "step": 47200 }, { "epoch": 0.7724781150290436, "grad_norm": 0.15368755161762238, "learning_rate": 9.263287862186198e-06, "loss": 0.0022, "step": 47210 }, { "epoch": 0.7726417409801194, "grad_norm": 0.08605608344078064, "learning_rate": 9.262790424639651e-06, "loss": 0.0034, "step": 47220 }, { "epoch": 0.7728053669311953, "grad_norm": 0.25703421235084534, "learning_rate": 9.2622928325772e-06, "loss": 0.0039, "step": 47230 }, { "epoch": 0.7729689928822712, "grad_norm": 0.5112340450286865, "learning_rate": 9.261795086016873e-06, "loss": 0.003, "step": 47240 }, { "epoch": 0.7731326188333469, "grad_norm": 0.13069429993629456, "learning_rate": 9.261297184976721e-06, "loss": 0.0034, "step": 47250 }, { "epoch": 0.7732962447844228, "grad_norm": 0.25137898325920105, "learning_rate": 9.260799129474785e-06, "loss": 0.0019, "step": 47260 }, { "epoch": 0.7734598707354986, "grad_norm": 0.2937735617160797, "learning_rate": 9.260300919529123e-06, "loss": 0.0048, "step": 47270 }, { "epoch": 0.7736234966865745, "grad_norm": 0.19685785472393036, "learning_rate": 9.259802555157792e-06, "loss": 0.0029, "step": 47280 }, { "epoch": 0.7737871226376504, "grad_norm": 0.08265804499387741, "learning_rate": 9.259304036378856e-06, "loss": 0.0037, "step": 47290 }, { "epoch": 0.7739507485887261, "grad_norm": 0.018450217321515083, "learning_rate": 9.258805363210386e-06, "loss": 0.0033, "step": 47300 }, { "epoch": 0.774114374539802, "grad_norm": 0.051352452486753464, "learning_rate": 9.258306535670457e-06, "loss": 0.0019, "step": 47310 }, { "epoch": 0.7742780004908778, "grad_norm": 0.3335985243320465, "learning_rate": 9.25780755377715e-06, "loss": 0.0028, "step": 47320 }, { "epoch": 0.7744416264419537, "grad_norm": 0.1750146746635437, "learning_rate": 9.257308417548553e-06, "loss": 0.0031, "step": 47330 }, { "epoch": 0.7746052523930296, "grad_norm": 0.080111064016819, "learning_rate": 9.256809127002759e-06, "loss": 0.0032, "step": 47340 }, { "epoch": 0.7747688783441053, "grad_norm": 0.15874405205249786, "learning_rate": 9.256309682157863e-06, "loss": 0.0028, "step": 47350 }, { "epoch": 0.7749325042951812, "grad_norm": 0.3410603404045105, "learning_rate": 9.255810083031973e-06, "loss": 0.0026, "step": 47360 }, { "epoch": 0.775096130246257, "grad_norm": 0.07764293253421783, "learning_rate": 9.255310329643195e-06, "loss": 0.0029, "step": 47370 }, { "epoch": 0.7752597561973329, "grad_norm": 0.0763319730758667, "learning_rate": 9.254810422009645e-06, "loss": 0.0025, "step": 47380 }, { "epoch": 0.7754233821484088, "grad_norm": 0.0930410772562027, "learning_rate": 9.254310360149444e-06, "loss": 0.0022, "step": 47390 }, { "epoch": 0.7755870080994846, "grad_norm": 0.22531259059906006, "learning_rate": 9.253810144080716e-06, "loss": 0.0026, "step": 47400 }, { "epoch": 0.7757506340505604, "grad_norm": 0.1400601714849472, "learning_rate": 9.253309773821595e-06, "loss": 0.0043, "step": 47410 }, { "epoch": 0.7759142600016362, "grad_norm": 0.2750956118106842, "learning_rate": 9.252809249390218e-06, "loss": 0.0025, "step": 47420 }, { "epoch": 0.7760778859527121, "grad_norm": 0.1686827540397644, "learning_rate": 9.252308570804725e-06, "loss": 0.0028, "step": 47430 }, { "epoch": 0.776241511903788, "grad_norm": 0.4864672124385834, "learning_rate": 9.251807738083268e-06, "loss": 0.0048, "step": 47440 }, { "epoch": 0.7764051378548638, "grad_norm": 0.17237213253974915, "learning_rate": 9.251306751244e-06, "loss": 0.0028, "step": 47450 }, { "epoch": 0.7765687638059396, "grad_norm": 0.47553494572639465, "learning_rate": 9.250805610305079e-06, "loss": 0.0047, "step": 47460 }, { "epoch": 0.7767323897570154, "grad_norm": 0.19099704921245575, "learning_rate": 9.25030431528467e-06, "loss": 0.0042, "step": 47470 }, { "epoch": 0.7768960157080913, "grad_norm": 0.19183245301246643, "learning_rate": 9.249802866200948e-06, "loss": 0.0024, "step": 47480 }, { "epoch": 0.7770596416591672, "grad_norm": 0.15168438851833344, "learning_rate": 9.249301263072083e-06, "loss": 0.004, "step": 47490 }, { "epoch": 0.777223267610243, "grad_norm": 0.26051074266433716, "learning_rate": 9.248799505916261e-06, "loss": 0.0033, "step": 47500 }, { "epoch": 0.7773868935613188, "grad_norm": 0.202837735414505, "learning_rate": 9.24829759475167e-06, "loss": 0.0029, "step": 47510 }, { "epoch": 0.7775505195123946, "grad_norm": 0.17700165510177612, "learning_rate": 9.247795529596501e-06, "loss": 0.0026, "step": 47520 }, { "epoch": 0.7777141454634705, "grad_norm": 0.2127302885055542, "learning_rate": 9.247293310468954e-06, "loss": 0.0034, "step": 47530 }, { "epoch": 0.7778777714145464, "grad_norm": 0.13977067172527313, "learning_rate": 9.246790937387231e-06, "loss": 0.0057, "step": 47540 }, { "epoch": 0.7780413973656222, "grad_norm": 0.10118871182203293, "learning_rate": 9.246288410369545e-06, "loss": 0.003, "step": 47550 }, { "epoch": 0.778205023316698, "grad_norm": 0.17957332730293274, "learning_rate": 9.24578572943411e-06, "loss": 0.0033, "step": 47560 }, { "epoch": 0.7783686492677738, "grad_norm": 0.2622086703777313, "learning_rate": 9.245282894599145e-06, "loss": 0.0038, "step": 47570 }, { "epoch": 0.7785322752188497, "grad_norm": 0.10515914112329483, "learning_rate": 9.24477990588288e-06, "loss": 0.0031, "step": 47580 }, { "epoch": 0.7786959011699256, "grad_norm": 0.283098042011261, "learning_rate": 9.244276763303544e-06, "loss": 0.0042, "step": 47590 }, { "epoch": 0.7788595271210014, "grad_norm": 0.295404314994812, "learning_rate": 9.243773466879377e-06, "loss": 0.002, "step": 47600 }, { "epoch": 0.7790231530720773, "grad_norm": 0.08057957887649536, "learning_rate": 9.24327001662862e-06, "loss": 0.0024, "step": 47610 }, { "epoch": 0.779186779023153, "grad_norm": 0.2663296163082123, "learning_rate": 9.242766412569526e-06, "loss": 0.0038, "step": 47620 }, { "epoch": 0.7793504049742289, "grad_norm": 0.14266766607761383, "learning_rate": 9.242262654720345e-06, "loss": 0.0022, "step": 47630 }, { "epoch": 0.7795140309253048, "grad_norm": 0.22005395591259003, "learning_rate": 9.24175874309934e-06, "loss": 0.0034, "step": 47640 }, { "epoch": 0.7796776568763806, "grad_norm": 0.07073821872472763, "learning_rate": 9.241254677724773e-06, "loss": 0.0033, "step": 47650 }, { "epoch": 0.7798412828274565, "grad_norm": 0.4702405631542206, "learning_rate": 9.24075045861492e-06, "loss": 0.0029, "step": 47660 }, { "epoch": 0.7800049087785322, "grad_norm": 0.12413022667169571, "learning_rate": 9.240246085788055e-06, "loss": 0.0029, "step": 47670 }, { "epoch": 0.7801685347296081, "grad_norm": 0.11062261462211609, "learning_rate": 9.23974155926246e-06, "loss": 0.004, "step": 47680 }, { "epoch": 0.780332160680684, "grad_norm": 0.2187574952840805, "learning_rate": 9.239236879056423e-06, "loss": 0.0033, "step": 47690 }, { "epoch": 0.7804957866317598, "grad_norm": 0.1472589373588562, "learning_rate": 9.23873204518824e-06, "loss": 0.0028, "step": 47700 }, { "epoch": 0.7806594125828357, "grad_norm": 0.21112596988677979, "learning_rate": 9.238227057676204e-06, "loss": 0.0051, "step": 47710 }, { "epoch": 0.7808230385339114, "grad_norm": 0.09035231173038483, "learning_rate": 9.237721916538625e-06, "loss": 0.004, "step": 47720 }, { "epoch": 0.7809866644849873, "grad_norm": 0.2239183932542801, "learning_rate": 9.237216621793812e-06, "loss": 0.0043, "step": 47730 }, { "epoch": 0.7811502904360632, "grad_norm": 0.14135926961898804, "learning_rate": 9.236711173460082e-06, "loss": 0.0056, "step": 47740 }, { "epoch": 0.781313916387139, "grad_norm": 0.10764330625534058, "learning_rate": 9.236205571555752e-06, "loss": 0.0019, "step": 47750 }, { "epoch": 0.7814775423382149, "grad_norm": 0.14232297241687775, "learning_rate": 9.235699816099153e-06, "loss": 0.0045, "step": 47760 }, { "epoch": 0.7816411682892906, "grad_norm": 0.09331347048282623, "learning_rate": 9.235193907108617e-06, "loss": 0.0038, "step": 47770 }, { "epoch": 0.7818047942403665, "grad_norm": 0.28311392664909363, "learning_rate": 9.234687844602479e-06, "loss": 0.0066, "step": 47780 }, { "epoch": 0.7819684201914424, "grad_norm": 0.15356793999671936, "learning_rate": 9.234181628599083e-06, "loss": 0.0021, "step": 47790 }, { "epoch": 0.7821320461425182, "grad_norm": 0.2692772448062897, "learning_rate": 9.233675259116783e-06, "loss": 0.0056, "step": 47800 }, { "epoch": 0.7822956720935941, "grad_norm": 0.23091362416744232, "learning_rate": 9.233168736173928e-06, "loss": 0.0049, "step": 47810 }, { "epoch": 0.7824592980446698, "grad_norm": 0.06629378348588943, "learning_rate": 9.232662059788881e-06, "loss": 0.0043, "step": 47820 }, { "epoch": 0.7826229239957457, "grad_norm": 0.1617034375667572, "learning_rate": 9.232155229980007e-06, "loss": 0.0021, "step": 47830 }, { "epoch": 0.7827865499468216, "grad_norm": 0.19493144750595093, "learning_rate": 9.23164824676568e-06, "loss": 0.0034, "step": 47840 }, { "epoch": 0.7829501758978974, "grad_norm": 0.060315508395433426, "learning_rate": 9.231141110164273e-06, "loss": 0.0029, "step": 47850 }, { "epoch": 0.7831138018489733, "grad_norm": 0.05947552248835564, "learning_rate": 9.230633820194168e-06, "loss": 0.0026, "step": 47860 }, { "epoch": 0.783277427800049, "grad_norm": 0.19408294558525085, "learning_rate": 9.230126376873757e-06, "loss": 0.0039, "step": 47870 }, { "epoch": 0.7834410537511249, "grad_norm": 0.20794975757598877, "learning_rate": 9.229618780221433e-06, "loss": 0.0039, "step": 47880 }, { "epoch": 0.7836046797022008, "grad_norm": 0.23113468289375305, "learning_rate": 9.229111030255591e-06, "loss": 0.0016, "step": 47890 }, { "epoch": 0.7837683056532766, "grad_norm": 0.18818505108356476, "learning_rate": 9.22860312699464e-06, "loss": 0.0035, "step": 47900 }, { "epoch": 0.7839319316043525, "grad_norm": 0.0857291892170906, "learning_rate": 9.228095070456989e-06, "loss": 0.0037, "step": 47910 }, { "epoch": 0.7840955575554283, "grad_norm": 0.04636092111468315, "learning_rate": 9.227586860661053e-06, "loss": 0.0032, "step": 47920 }, { "epoch": 0.7842591835065041, "grad_norm": 0.08256290853023529, "learning_rate": 9.227078497625253e-06, "loss": 0.0034, "step": 47930 }, { "epoch": 0.7844228094575799, "grad_norm": 0.09330001473426819, "learning_rate": 9.226569981368018e-06, "loss": 0.0073, "step": 47940 }, { "epoch": 0.7845864354086558, "grad_norm": 0.21023191511631012, "learning_rate": 9.22606131190778e-06, "loss": 0.005, "step": 47950 }, { "epoch": 0.7847500613597317, "grad_norm": 0.042733803391456604, "learning_rate": 9.225552489262977e-06, "loss": 0.0031, "step": 47960 }, { "epoch": 0.7849136873108075, "grad_norm": 0.09576703608036041, "learning_rate": 9.225043513452052e-06, "loss": 0.0039, "step": 47970 }, { "epoch": 0.7850773132618833, "grad_norm": 0.5880565643310547, "learning_rate": 9.224534384493453e-06, "loss": 0.0033, "step": 47980 }, { "epoch": 0.7852409392129591, "grad_norm": 0.24097760021686554, "learning_rate": 9.224025102405636e-06, "loss": 0.0036, "step": 47990 }, { "epoch": 0.785404565164035, "grad_norm": 0.12412737309932709, "learning_rate": 9.223515667207061e-06, "loss": 0.0044, "step": 48000 }, { "epoch": 0.7855681911151109, "grad_norm": 0.15705639123916626, "learning_rate": 9.223006078916196e-06, "loss": 0.0034, "step": 48010 }, { "epoch": 0.7857318170661867, "grad_norm": 0.198961541056633, "learning_rate": 9.222496337551509e-06, "loss": 0.0031, "step": 48020 }, { "epoch": 0.7858954430172626, "grad_norm": 0.21408711373806, "learning_rate": 9.221986443131479e-06, "loss": 0.0035, "step": 48030 }, { "epoch": 0.7860590689683383, "grad_norm": 0.19704949855804443, "learning_rate": 9.221476395674586e-06, "loss": 0.0038, "step": 48040 }, { "epoch": 0.7862226949194142, "grad_norm": 0.10040422528982162, "learning_rate": 9.220966195199321e-06, "loss": 0.0021, "step": 48050 }, { "epoch": 0.7863863208704901, "grad_norm": 0.04798143729567528, "learning_rate": 9.220455841724176e-06, "loss": 0.0018, "step": 48060 }, { "epoch": 0.7865499468215659, "grad_norm": 0.18889322876930237, "learning_rate": 9.21994533526765e-06, "loss": 0.0035, "step": 48070 }, { "epoch": 0.7867135727726418, "grad_norm": 0.1173163652420044, "learning_rate": 9.21943467584825e-06, "loss": 0.0028, "step": 48080 }, { "epoch": 0.7868771987237175, "grad_norm": 0.14069336652755737, "learning_rate": 9.21892386348448e-06, "loss": 0.0021, "step": 48090 }, { "epoch": 0.7870408246747934, "grad_norm": 0.1269276887178421, "learning_rate": 9.218412898194865e-06, "loss": 0.0027, "step": 48100 }, { "epoch": 0.7872044506258693, "grad_norm": 0.24161434173583984, "learning_rate": 9.217901779997918e-06, "loss": 0.0022, "step": 48110 }, { "epoch": 0.7873680765769451, "grad_norm": 0.07993855327367783, "learning_rate": 9.217390508912169e-06, "loss": 0.0033, "step": 48120 }, { "epoch": 0.787531702528021, "grad_norm": 0.00963317696005106, "learning_rate": 9.216879084956152e-06, "loss": 0.0021, "step": 48130 }, { "epoch": 0.7876953284790967, "grad_norm": 0.09157359600067139, "learning_rate": 9.216367508148401e-06, "loss": 0.0022, "step": 48140 }, { "epoch": 0.7878589544301726, "grad_norm": 0.13302327692508698, "learning_rate": 9.215855778507463e-06, "loss": 0.0026, "step": 48150 }, { "epoch": 0.7880225803812485, "grad_norm": 0.10796001553535461, "learning_rate": 9.215343896051885e-06, "loss": 0.0028, "step": 48160 }, { "epoch": 0.7881862063323243, "grad_norm": 0.15687522292137146, "learning_rate": 9.214831860800221e-06, "loss": 0.003, "step": 48170 }, { "epoch": 0.7883498322834002, "grad_norm": 0.019233981147408485, "learning_rate": 9.214319672771036e-06, "loss": 0.0033, "step": 48180 }, { "epoch": 0.7885134582344759, "grad_norm": 0.09423023462295532, "learning_rate": 9.213807331982887e-06, "loss": 0.0026, "step": 48190 }, { "epoch": 0.7886770841855518, "grad_norm": 0.08601681888103485, "learning_rate": 9.213294838454352e-06, "loss": 0.0049, "step": 48200 }, { "epoch": 0.7888407101366277, "grad_norm": 0.0757230892777443, "learning_rate": 9.212782192204007e-06, "loss": 0.0032, "step": 48210 }, { "epoch": 0.7890043360877035, "grad_norm": 0.25804081559181213, "learning_rate": 9.21226939325043e-06, "loss": 0.0018, "step": 48220 }, { "epoch": 0.7891679620387794, "grad_norm": 0.06971985101699829, "learning_rate": 9.211756441612211e-06, "loss": 0.0033, "step": 48230 }, { "epoch": 0.7893315879898551, "grad_norm": 0.12275729328393936, "learning_rate": 9.211243337307947e-06, "loss": 0.0026, "step": 48240 }, { "epoch": 0.789495213940931, "grad_norm": 0.5567205548286438, "learning_rate": 9.210730080356232e-06, "loss": 0.0032, "step": 48250 }, { "epoch": 0.7896588398920069, "grad_norm": 0.048873912543058395, "learning_rate": 9.210216670775671e-06, "loss": 0.0041, "step": 48260 }, { "epoch": 0.7898224658430827, "grad_norm": 0.06869818270206451, "learning_rate": 9.209703108584876e-06, "loss": 0.0036, "step": 48270 }, { "epoch": 0.7899860917941586, "grad_norm": 0.09845008701086044, "learning_rate": 9.20918939380246e-06, "loss": 0.0033, "step": 48280 }, { "epoch": 0.7901497177452343, "grad_norm": 0.09102193266153336, "learning_rate": 9.208675526447044e-06, "loss": 0.0024, "step": 48290 }, { "epoch": 0.7903133436963102, "grad_norm": 0.29814067482948303, "learning_rate": 9.208161506537257e-06, "loss": 0.0048, "step": 48300 }, { "epoch": 0.7904769696473861, "grad_norm": 0.08440706878900528, "learning_rate": 9.207647334091729e-06, "loss": 0.0026, "step": 48310 }, { "epoch": 0.7906405955984619, "grad_norm": 0.27004656195640564, "learning_rate": 9.207133009129098e-06, "loss": 0.0041, "step": 48320 }, { "epoch": 0.7908042215495378, "grad_norm": 0.10873109847307205, "learning_rate": 9.206618531668007e-06, "loss": 0.0029, "step": 48330 }, { "epoch": 0.7909678475006136, "grad_norm": 0.168745756149292, "learning_rate": 9.206103901727104e-06, "loss": 0.0045, "step": 48340 }, { "epoch": 0.7911314734516894, "grad_norm": 0.1363922506570816, "learning_rate": 9.205589119325045e-06, "loss": 0.0043, "step": 48350 }, { "epoch": 0.7912950994027653, "grad_norm": 0.1527826189994812, "learning_rate": 9.205074184480488e-06, "loss": 0.0043, "step": 48360 }, { "epoch": 0.7914587253538411, "grad_norm": 0.11487593501806259, "learning_rate": 9.204559097212097e-06, "loss": 0.0047, "step": 48370 }, { "epoch": 0.791622351304917, "grad_norm": 0.2888651490211487, "learning_rate": 9.204043857538546e-06, "loss": 0.0047, "step": 48380 }, { "epoch": 0.7917859772559928, "grad_norm": 0.09659779816865921, "learning_rate": 9.203528465478509e-06, "loss": 0.0037, "step": 48390 }, { "epoch": 0.7919496032070686, "grad_norm": 0.07407717406749725, "learning_rate": 9.203012921050667e-06, "loss": 0.0023, "step": 48400 }, { "epoch": 0.7921132291581445, "grad_norm": 0.20125484466552734, "learning_rate": 9.202497224273708e-06, "loss": 0.0026, "step": 48410 }, { "epoch": 0.7922768551092203, "grad_norm": 0.12445491552352905, "learning_rate": 9.201981375166326e-06, "loss": 0.0024, "step": 48420 }, { "epoch": 0.7924404810602962, "grad_norm": 0.17671945691108704, "learning_rate": 9.201465373747219e-06, "loss": 0.0039, "step": 48430 }, { "epoch": 0.792604107011372, "grad_norm": 0.24571382999420166, "learning_rate": 9.20094922003509e-06, "loss": 0.0031, "step": 48440 }, { "epoch": 0.7927677329624478, "grad_norm": 0.11097700893878937, "learning_rate": 9.200432914048647e-06, "loss": 0.0019, "step": 48450 }, { "epoch": 0.7929313589135237, "grad_norm": 0.1446879804134369, "learning_rate": 9.199916455806606e-06, "loss": 0.0021, "step": 48460 }, { "epoch": 0.7930949848645995, "grad_norm": 0.12244662642478943, "learning_rate": 9.19939984532769e-06, "loss": 0.0034, "step": 48470 }, { "epoch": 0.7932586108156754, "grad_norm": 0.3136637806892395, "learning_rate": 9.198883082630621e-06, "loss": 0.0023, "step": 48480 }, { "epoch": 0.7934222367667512, "grad_norm": 0.060511503368616104, "learning_rate": 9.198366167734133e-06, "loss": 0.0039, "step": 48490 }, { "epoch": 0.793585862717827, "grad_norm": 0.2677363455295563, "learning_rate": 9.197849100656961e-06, "loss": 0.004, "step": 48500 }, { "epoch": 0.7937494886689029, "grad_norm": 0.17779919505119324, "learning_rate": 9.197331881417847e-06, "loss": 0.0037, "step": 48510 }, { "epoch": 0.7939131146199787, "grad_norm": 0.32646405696868896, "learning_rate": 9.196814510035542e-06, "loss": 0.0076, "step": 48520 }, { "epoch": 0.7940767405710546, "grad_norm": 0.18580962717533112, "learning_rate": 9.1962969865288e-06, "loss": 0.0019, "step": 48530 }, { "epoch": 0.7942403665221304, "grad_norm": 0.2901622951030731, "learning_rate": 9.195779310916374e-06, "loss": 0.0037, "step": 48540 }, { "epoch": 0.7944039924732063, "grad_norm": 0.313329815864563, "learning_rate": 9.195261483217034e-06, "loss": 0.0047, "step": 48550 }, { "epoch": 0.7945676184242821, "grad_norm": 0.1565229892730713, "learning_rate": 9.194743503449551e-06, "loss": 0.0045, "step": 48560 }, { "epoch": 0.7947312443753579, "grad_norm": 0.12905044853687286, "learning_rate": 9.194225371632693e-06, "loss": 0.004, "step": 48570 }, { "epoch": 0.7948948703264338, "grad_norm": 0.253761351108551, "learning_rate": 9.193707087785248e-06, "loss": 0.0033, "step": 48580 }, { "epoch": 0.7950584962775096, "grad_norm": 0.2196241021156311, "learning_rate": 9.193188651926e-06, "loss": 0.003, "step": 48590 }, { "epoch": 0.7952221222285855, "grad_norm": 0.21832162141799927, "learning_rate": 9.192670064073743e-06, "loss": 0.0026, "step": 48600 }, { "epoch": 0.7953857481796613, "grad_norm": 0.012147759087383747, "learning_rate": 9.19215132424727e-06, "loss": 0.0021, "step": 48610 }, { "epoch": 0.7955493741307371, "grad_norm": 0.08017176389694214, "learning_rate": 9.19163243246539e-06, "loss": 0.0029, "step": 48620 }, { "epoch": 0.795713000081813, "grad_norm": 0.3405068516731262, "learning_rate": 9.191113388746908e-06, "loss": 0.0031, "step": 48630 }, { "epoch": 0.7958766260328888, "grad_norm": 0.3401029407978058, "learning_rate": 9.190594193110639e-06, "loss": 0.0029, "step": 48640 }, { "epoch": 0.7960402519839647, "grad_norm": 0.21912595629692078, "learning_rate": 9.1900748455754e-06, "loss": 0.0035, "step": 48650 }, { "epoch": 0.7962038779350405, "grad_norm": 0.12028355896472931, "learning_rate": 9.18955534616002e-06, "loss": 0.0043, "step": 48660 }, { "epoch": 0.7963675038861163, "grad_norm": 0.1338496059179306, "learning_rate": 9.189035694883326e-06, "loss": 0.0032, "step": 48670 }, { "epoch": 0.7965311298371922, "grad_norm": 0.013301242142915726, "learning_rate": 9.188515891764159e-06, "loss": 0.0047, "step": 48680 }, { "epoch": 0.796694755788268, "grad_norm": 0.09481485188007355, "learning_rate": 9.187995936821357e-06, "loss": 0.0033, "step": 48690 }, { "epoch": 0.7968583817393439, "grad_norm": 0.4685578942298889, "learning_rate": 9.187475830073768e-06, "loss": 0.0068, "step": 48700 }, { "epoch": 0.7970220076904198, "grad_norm": 0.5887050628662109, "learning_rate": 9.186955571540244e-06, "loss": 0.0087, "step": 48710 }, { "epoch": 0.7971856336414955, "grad_norm": 0.17826080322265625, "learning_rate": 9.186435161239645e-06, "loss": 0.0038, "step": 48720 }, { "epoch": 0.7973492595925714, "grad_norm": 0.17676234245300293, "learning_rate": 9.18591459919083e-06, "loss": 0.0034, "step": 48730 }, { "epoch": 0.7975128855436472, "grad_norm": 0.14908593893051147, "learning_rate": 9.185393885412675e-06, "loss": 0.004, "step": 48740 }, { "epoch": 0.7976765114947231, "grad_norm": 0.16212302446365356, "learning_rate": 9.184873019924048e-06, "loss": 0.0033, "step": 48750 }, { "epoch": 0.797840137445799, "grad_norm": 0.19955624639987946, "learning_rate": 9.184352002743835e-06, "loss": 0.0022, "step": 48760 }, { "epoch": 0.7980037633968747, "grad_norm": 0.08536086231470108, "learning_rate": 9.183830833890916e-06, "loss": 0.0025, "step": 48770 }, { "epoch": 0.7981673893479506, "grad_norm": 0.16760310530662537, "learning_rate": 9.183309513384186e-06, "loss": 0.0046, "step": 48780 }, { "epoch": 0.7983310152990264, "grad_norm": 0.27276304364204407, "learning_rate": 9.18278804124254e-06, "loss": 0.0037, "step": 48790 }, { "epoch": 0.7984946412501023, "grad_norm": 0.07501021027565002, "learning_rate": 9.182266417484881e-06, "loss": 0.003, "step": 48800 }, { "epoch": 0.798658267201178, "grad_norm": 0.1924322098493576, "learning_rate": 9.181744642130117e-06, "loss": 0.0029, "step": 48810 }, { "epoch": 0.7988218931522539, "grad_norm": 0.1344781070947647, "learning_rate": 9.18122271519716e-06, "loss": 0.0018, "step": 48820 }, { "epoch": 0.7989855191033298, "grad_norm": 0.1316998302936554, "learning_rate": 9.180700636704927e-06, "loss": 0.006, "step": 48830 }, { "epoch": 0.7991491450544056, "grad_norm": 0.23098084330558777, "learning_rate": 9.180178406672346e-06, "loss": 0.0048, "step": 48840 }, { "epoch": 0.7993127710054815, "grad_norm": 0.22029943764209747, "learning_rate": 9.179656025118344e-06, "loss": 0.0025, "step": 48850 }, { "epoch": 0.7994763969565573, "grad_norm": 0.2143545001745224, "learning_rate": 9.179133492061856e-06, "loss": 0.0035, "step": 48860 }, { "epoch": 0.7996400229076331, "grad_norm": 0.131936177611351, "learning_rate": 9.178610807521824e-06, "loss": 0.003, "step": 48870 }, { "epoch": 0.799803648858709, "grad_norm": 0.10593626648187637, "learning_rate": 9.178087971517194e-06, "loss": 0.0035, "step": 48880 }, { "epoch": 0.7999672748097848, "grad_norm": 0.13795939087867737, "learning_rate": 9.177564984066915e-06, "loss": 0.0034, "step": 48890 }, { "epoch": 0.8001309007608607, "grad_norm": 0.08845298737287521, "learning_rate": 9.177041845189947e-06, "loss": 0.0037, "step": 48900 }, { "epoch": 0.8002945267119365, "grad_norm": 0.2725151479244232, "learning_rate": 9.17651855490525e-06, "loss": 0.0033, "step": 48910 }, { "epoch": 0.8004581526630123, "grad_norm": 0.07664602994918823, "learning_rate": 9.175995113231794e-06, "loss": 0.0039, "step": 48920 }, { "epoch": 0.8006217786140882, "grad_norm": 0.12458325922489166, "learning_rate": 9.175471520188552e-06, "loss": 0.0026, "step": 48930 }, { "epoch": 0.800785404565164, "grad_norm": 0.1353135108947754, "learning_rate": 9.174947775794504e-06, "loss": 0.0036, "step": 48940 }, { "epoch": 0.8009490305162399, "grad_norm": 0.06275133043527603, "learning_rate": 9.174423880068633e-06, "loss": 0.0017, "step": 48950 }, { "epoch": 0.8011126564673157, "grad_norm": 0.12637528777122498, "learning_rate": 9.173899833029928e-06, "loss": 0.0036, "step": 48960 }, { "epoch": 0.8012762824183915, "grad_norm": 0.34527072310447693, "learning_rate": 9.173375634697387e-06, "loss": 0.0052, "step": 48970 }, { "epoch": 0.8014399083694674, "grad_norm": 0.10525242239236832, "learning_rate": 9.17285128509001e-06, "loss": 0.0021, "step": 48980 }, { "epoch": 0.8016035343205432, "grad_norm": 0.031754203140735626, "learning_rate": 9.172326784226802e-06, "loss": 0.0025, "step": 48990 }, { "epoch": 0.8017671602716191, "grad_norm": 0.2534172534942627, "learning_rate": 9.171802132126777e-06, "loss": 0.0031, "step": 49000 }, { "epoch": 0.8019307862226949, "grad_norm": 0.03221935033798218, "learning_rate": 9.17127732880895e-06, "loss": 0.0048, "step": 49010 }, { "epoch": 0.8020944121737708, "grad_norm": 0.159492626786232, "learning_rate": 9.170752374292347e-06, "loss": 0.0031, "step": 49020 }, { "epoch": 0.8022580381248466, "grad_norm": 0.1019534170627594, "learning_rate": 9.170227268595992e-06, "loss": 0.0048, "step": 49030 }, { "epoch": 0.8024216640759224, "grad_norm": 0.5864030718803406, "learning_rate": 9.169702011738923e-06, "loss": 0.0044, "step": 49040 }, { "epoch": 0.8025852900269983, "grad_norm": 0.12144570797681808, "learning_rate": 9.169176603740178e-06, "loss": 0.0038, "step": 49050 }, { "epoch": 0.8027489159780741, "grad_norm": 0.14907336235046387, "learning_rate": 9.168651044618802e-06, "loss": 0.0035, "step": 49060 }, { "epoch": 0.80291254192915, "grad_norm": 0.24622511863708496, "learning_rate": 9.168125334393843e-06, "loss": 0.0027, "step": 49070 }, { "epoch": 0.8030761678802258, "grad_norm": 0.3472890853881836, "learning_rate": 9.167599473084358e-06, "loss": 0.0032, "step": 49080 }, { "epoch": 0.8032397938313016, "grad_norm": 0.49071112275123596, "learning_rate": 9.167073460709411e-06, "loss": 0.0023, "step": 49090 }, { "epoch": 0.8034034197823775, "grad_norm": 0.12722264230251312, "learning_rate": 9.166547297288064e-06, "loss": 0.004, "step": 49100 }, { "epoch": 0.8035670457334533, "grad_norm": 0.5407263040542603, "learning_rate": 9.166020982839394e-06, "loss": 0.0025, "step": 49110 }, { "epoch": 0.8037306716845292, "grad_norm": 0.11173861473798752, "learning_rate": 9.165494517382474e-06, "loss": 0.0028, "step": 49120 }, { "epoch": 0.803894297635605, "grad_norm": 0.14655622839927673, "learning_rate": 9.164967900936392e-06, "loss": 0.0049, "step": 49130 }, { "epoch": 0.8040579235866808, "grad_norm": 0.24831844866275787, "learning_rate": 9.16444113352023e-06, "loss": 0.0018, "step": 49140 }, { "epoch": 0.8042215495377567, "grad_norm": 0.03322440758347511, "learning_rate": 9.16391421515309e-06, "loss": 0.0036, "step": 49150 }, { "epoch": 0.8043851754888325, "grad_norm": 0.1715569645166397, "learning_rate": 9.163387145854063e-06, "loss": 0.0038, "step": 49160 }, { "epoch": 0.8045488014399084, "grad_norm": 0.1208120658993721, "learning_rate": 9.162859925642263e-06, "loss": 0.0062, "step": 49170 }, { "epoch": 0.8047124273909843, "grad_norm": 0.18374855816364288, "learning_rate": 9.162332554536793e-06, "loss": 0.0026, "step": 49180 }, { "epoch": 0.80487605334206, "grad_norm": 0.10204806178808212, "learning_rate": 9.161805032556771e-06, "loss": 0.0056, "step": 49190 }, { "epoch": 0.8050396792931359, "grad_norm": 0.5097343325614929, "learning_rate": 9.161277359721322e-06, "loss": 0.0029, "step": 49200 }, { "epoch": 0.8052033052442117, "grad_norm": 0.21722246706485748, "learning_rate": 9.160749536049567e-06, "loss": 0.0025, "step": 49210 }, { "epoch": 0.8053669311952876, "grad_norm": 0.20632918179035187, "learning_rate": 9.160221561560643e-06, "loss": 0.0033, "step": 49220 }, { "epoch": 0.8055305571463635, "grad_norm": 0.2717243731021881, "learning_rate": 9.159693436273687e-06, "loss": 0.0034, "step": 49230 }, { "epoch": 0.8056941830974392, "grad_norm": 0.19756168127059937, "learning_rate": 9.159165160207842e-06, "loss": 0.0032, "step": 49240 }, { "epoch": 0.8058578090485151, "grad_norm": 0.4554564952850342, "learning_rate": 9.158636733382253e-06, "loss": 0.0056, "step": 49250 }, { "epoch": 0.8060214349995909, "grad_norm": 0.05653040483593941, "learning_rate": 9.15810815581608e-06, "loss": 0.0037, "step": 49260 }, { "epoch": 0.8061850609506668, "grad_norm": 0.08855530619621277, "learning_rate": 9.15757942752848e-06, "loss": 0.0016, "step": 49270 }, { "epoch": 0.8063486869017427, "grad_norm": 0.04314913600683212, "learning_rate": 9.157050548538617e-06, "loss": 0.0022, "step": 49280 }, { "epoch": 0.8065123128528184, "grad_norm": 0.18650372326374054, "learning_rate": 9.156521518865665e-06, "loss": 0.0027, "step": 49290 }, { "epoch": 0.8066759388038943, "grad_norm": 0.17142866551876068, "learning_rate": 9.155992338528796e-06, "loss": 0.0033, "step": 49300 }, { "epoch": 0.8068395647549701, "grad_norm": 0.3487672805786133, "learning_rate": 9.155463007547193e-06, "loss": 0.0066, "step": 49310 }, { "epoch": 0.807003190706046, "grad_norm": 0.045737870037555695, "learning_rate": 9.154933525940045e-06, "loss": 0.0031, "step": 49320 }, { "epoch": 0.8071668166571219, "grad_norm": 0.3658466339111328, "learning_rate": 9.154403893726541e-06, "loss": 0.0021, "step": 49330 }, { "epoch": 0.8073304426081976, "grad_norm": 0.08573342114686966, "learning_rate": 9.153874110925882e-06, "loss": 0.0022, "step": 49340 }, { "epoch": 0.8074940685592735, "grad_norm": 0.1402796506881714, "learning_rate": 9.15334417755727e-06, "loss": 0.0029, "step": 49350 }, { "epoch": 0.8076576945103493, "grad_norm": 0.3999117612838745, "learning_rate": 9.152814093639914e-06, "loss": 0.0039, "step": 49360 }, { "epoch": 0.8078213204614252, "grad_norm": 0.09455102682113647, "learning_rate": 9.152283859193027e-06, "loss": 0.0022, "step": 49370 }, { "epoch": 0.8079849464125011, "grad_norm": 0.008731618523597717, "learning_rate": 9.151753474235832e-06, "loss": 0.0021, "step": 49380 }, { "epoch": 0.8081485723635768, "grad_norm": 0.1310076117515564, "learning_rate": 9.151222938787549e-06, "loss": 0.003, "step": 49390 }, { "epoch": 0.8083121983146527, "grad_norm": 0.062411386519670486, "learning_rate": 9.150692252867414e-06, "loss": 0.0034, "step": 49400 }, { "epoch": 0.8084758242657285, "grad_norm": 0.36796289682388306, "learning_rate": 9.150161416494661e-06, "loss": 0.004, "step": 49410 }, { "epoch": 0.8086394502168044, "grad_norm": 0.24233144521713257, "learning_rate": 9.14963042968853e-06, "loss": 0.0029, "step": 49420 }, { "epoch": 0.8088030761678803, "grad_norm": 0.15177123248577118, "learning_rate": 9.149099292468268e-06, "loss": 0.0039, "step": 49430 }, { "epoch": 0.808966702118956, "grad_norm": 0.08123491704463959, "learning_rate": 9.148568004853131e-06, "loss": 0.0016, "step": 49440 }, { "epoch": 0.8091303280700319, "grad_norm": 0.2115696370601654, "learning_rate": 9.148036566862375e-06, "loss": 0.0035, "step": 49450 }, { "epoch": 0.8092939540211077, "grad_norm": 0.25085505843162537, "learning_rate": 9.147504978515262e-06, "loss": 0.0031, "step": 49460 }, { "epoch": 0.8094575799721836, "grad_norm": 0.12272707372903824, "learning_rate": 9.14697323983106e-06, "loss": 0.0023, "step": 49470 }, { "epoch": 0.8096212059232595, "grad_norm": 0.12907728552818298, "learning_rate": 9.146441350829048e-06, "loss": 0.0024, "step": 49480 }, { "epoch": 0.8097848318743353, "grad_norm": 0.08441904187202454, "learning_rate": 9.145909311528502e-06, "loss": 0.0023, "step": 49490 }, { "epoch": 0.8099484578254111, "grad_norm": 0.10625497996807098, "learning_rate": 9.145377121948707e-06, "loss": 0.0045, "step": 49500 }, { "epoch": 0.8101120837764869, "grad_norm": 0.12744413316249847, "learning_rate": 9.144844782108954e-06, "loss": 0.0033, "step": 49510 }, { "epoch": 0.8102757097275628, "grad_norm": 0.1864137500524521, "learning_rate": 9.144312292028542e-06, "loss": 0.0026, "step": 49520 }, { "epoch": 0.8104393356786387, "grad_norm": 0.07206683605909348, "learning_rate": 9.143779651726768e-06, "loss": 0.0044, "step": 49530 }, { "epoch": 0.8106029616297145, "grad_norm": 0.2719031572341919, "learning_rate": 9.143246861222941e-06, "loss": 0.0049, "step": 49540 }, { "epoch": 0.8107665875807903, "grad_norm": 0.1794460564851761, "learning_rate": 9.142713920536375e-06, "loss": 0.003, "step": 49550 }, { "epoch": 0.8109302135318661, "grad_norm": 0.0942332074046135, "learning_rate": 9.142180829686384e-06, "loss": 0.0029, "step": 49560 }, { "epoch": 0.811093839482942, "grad_norm": 0.09986443817615509, "learning_rate": 9.141647588692295e-06, "loss": 0.0031, "step": 49570 }, { "epoch": 0.8112574654340179, "grad_norm": 0.20012661814689636, "learning_rate": 9.141114197573435e-06, "loss": 0.0029, "step": 49580 }, { "epoch": 0.8114210913850937, "grad_norm": 0.12359360605478287, "learning_rate": 9.140580656349138e-06, "loss": 0.0044, "step": 49590 }, { "epoch": 0.8115847173361695, "grad_norm": 0.059907495975494385, "learning_rate": 9.140046965038743e-06, "loss": 0.0018, "step": 49600 }, { "epoch": 0.8117483432872453, "grad_norm": 0.05025084689259529, "learning_rate": 9.139513123661596e-06, "loss": 0.0034, "step": 49610 }, { "epoch": 0.8119119692383212, "grad_norm": 0.0567028634250164, "learning_rate": 9.138979132237047e-06, "loss": 0.0042, "step": 49620 }, { "epoch": 0.8120755951893971, "grad_norm": 0.04546193405985832, "learning_rate": 9.138444990784455e-06, "loss": 0.0028, "step": 49630 }, { "epoch": 0.8122392211404729, "grad_norm": 0.05458003655076027, "learning_rate": 9.137910699323175e-06, "loss": 0.0041, "step": 49640 }, { "epoch": 0.8124028470915488, "grad_norm": 0.0964931845664978, "learning_rate": 9.137376257872578e-06, "loss": 0.0026, "step": 49650 }, { "epoch": 0.8125664730426245, "grad_norm": 0.058308687061071396, "learning_rate": 9.136841666452037e-06, "loss": 0.0041, "step": 49660 }, { "epoch": 0.8127300989937004, "grad_norm": 0.06652742624282837, "learning_rate": 9.136306925080929e-06, "loss": 0.0025, "step": 49670 }, { "epoch": 0.8128937249447763, "grad_norm": 0.1931263655424118, "learning_rate": 9.135772033778634e-06, "loss": 0.003, "step": 49680 }, { "epoch": 0.8130573508958521, "grad_norm": 0.18454749882221222, "learning_rate": 9.135236992564545e-06, "loss": 0.0017, "step": 49690 }, { "epoch": 0.813220976846928, "grad_norm": 0.16015540063381195, "learning_rate": 9.134701801458052e-06, "loss": 0.0027, "step": 49700 }, { "epoch": 0.8133846027980037, "grad_norm": 0.07232258468866348, "learning_rate": 9.134166460478555e-06, "loss": 0.0031, "step": 49710 }, { "epoch": 0.8135482287490796, "grad_norm": 0.1756167709827423, "learning_rate": 9.133630969645463e-06, "loss": 0.0028, "step": 49720 }, { "epoch": 0.8137118547001554, "grad_norm": 0.19829516112804413, "learning_rate": 9.133095328978181e-06, "loss": 0.0047, "step": 49730 }, { "epoch": 0.8138754806512313, "grad_norm": 0.1780901700258255, "learning_rate": 9.132559538496128e-06, "loss": 0.0029, "step": 49740 }, { "epoch": 0.8140391066023072, "grad_norm": 0.13632865250110626, "learning_rate": 9.132023598218724e-06, "loss": 0.0024, "step": 49750 }, { "epoch": 0.8142027325533829, "grad_norm": 0.2727030813694, "learning_rate": 9.131487508165397e-06, "loss": 0.0044, "step": 49760 }, { "epoch": 0.8143663585044588, "grad_norm": 0.14128953218460083, "learning_rate": 9.130951268355576e-06, "loss": 0.0017, "step": 49770 }, { "epoch": 0.8145299844555346, "grad_norm": 0.1939658522605896, "learning_rate": 9.1304148788087e-06, "loss": 0.0044, "step": 49780 }, { "epoch": 0.8146936104066105, "grad_norm": 0.09017079323530197, "learning_rate": 9.129878339544213e-06, "loss": 0.0043, "step": 49790 }, { "epoch": 0.8148572363576864, "grad_norm": 0.14885945618152618, "learning_rate": 9.12934165058156e-06, "loss": 0.0023, "step": 49800 }, { "epoch": 0.8150208623087621, "grad_norm": 0.008533250540494919, "learning_rate": 9.1288048119402e-06, "loss": 0.0026, "step": 49810 }, { "epoch": 0.815184488259838, "grad_norm": 0.12529003620147705, "learning_rate": 9.128267823639585e-06, "loss": 0.0048, "step": 49820 }, { "epoch": 0.8153481142109138, "grad_norm": 0.09427347779273987, "learning_rate": 9.127730685699186e-06, "loss": 0.0031, "step": 49830 }, { "epoch": 0.8155117401619897, "grad_norm": 0.06731635332107544, "learning_rate": 9.12719339813847e-06, "loss": 0.002, "step": 49840 }, { "epoch": 0.8156753661130656, "grad_norm": 0.12200576066970825, "learning_rate": 9.126655960976912e-06, "loss": 0.0029, "step": 49850 }, { "epoch": 0.8158389920641413, "grad_norm": 0.19231025874614716, "learning_rate": 9.126118374233994e-06, "loss": 0.0081, "step": 49860 }, { "epoch": 0.8160026180152172, "grad_norm": 0.17219844460487366, "learning_rate": 9.125580637929203e-06, "loss": 0.0028, "step": 49870 }, { "epoch": 0.816166243966293, "grad_norm": 0.10270903259515762, "learning_rate": 9.125042752082027e-06, "loss": 0.0037, "step": 49880 }, { "epoch": 0.8163298699173689, "grad_norm": 0.06245635077357292, "learning_rate": 9.124504716711968e-06, "loss": 0.0036, "step": 49890 }, { "epoch": 0.8164934958684448, "grad_norm": 0.3058038055896759, "learning_rate": 9.123966531838525e-06, "loss": 0.0043, "step": 49900 }, { "epoch": 0.8166571218195205, "grad_norm": 0.0785287469625473, "learning_rate": 9.123428197481205e-06, "loss": 0.0046, "step": 49910 }, { "epoch": 0.8168207477705964, "grad_norm": 0.0849079117178917, "learning_rate": 9.122889713659526e-06, "loss": 0.0032, "step": 49920 }, { "epoch": 0.8169843737216722, "grad_norm": 0.0672604963183403, "learning_rate": 9.122351080393003e-06, "loss": 0.0024, "step": 49930 }, { "epoch": 0.8171479996727481, "grad_norm": 0.2374507188796997, "learning_rate": 9.121812297701162e-06, "loss": 0.0023, "step": 49940 }, { "epoch": 0.817311625623824, "grad_norm": 0.06891575455665588, "learning_rate": 9.12127336560353e-06, "loss": 0.0039, "step": 49950 }, { "epoch": 0.8174752515748998, "grad_norm": 0.14454405009746552, "learning_rate": 9.120734284119645e-06, "loss": 0.003, "step": 49960 }, { "epoch": 0.8176388775259756, "grad_norm": 0.052260179072618484, "learning_rate": 9.120195053269046e-06, "loss": 0.0026, "step": 49970 }, { "epoch": 0.8178025034770514, "grad_norm": 0.17448776960372925, "learning_rate": 9.119655673071278e-06, "loss": 0.0024, "step": 49980 }, { "epoch": 0.8179661294281273, "grad_norm": 0.21683809161186218, "learning_rate": 9.119116143545894e-06, "loss": 0.0037, "step": 49990 }, { "epoch": 0.8181297553792032, "grad_norm": 0.25295209884643555, "learning_rate": 9.11857646471245e-06, "loss": 0.0026, "step": 50000 }, { "epoch": 0.8181297553792032, "eval_loss": 0.002267897129058838, "eval_runtime": 3.0955, "eval_samples_per_second": 64.609, "eval_steps_per_second": 16.152, "step": 50000 }, { "epoch": 0.818293381330279, "grad_norm": 0.1907290369272232, "learning_rate": 9.118036636590507e-06, "loss": 0.0039, "step": 50010 }, { "epoch": 0.8184570072813548, "grad_norm": 0.08578497916460037, "learning_rate": 9.117496659199633e-06, "loss": 0.0034, "step": 50020 }, { "epoch": 0.8186206332324306, "grad_norm": 0.4064314067363739, "learning_rate": 9.116956532559403e-06, "loss": 0.004, "step": 50030 }, { "epoch": 0.8187842591835065, "grad_norm": 0.17687946557998657, "learning_rate": 9.116416256689393e-06, "loss": 0.0039, "step": 50040 }, { "epoch": 0.8189478851345824, "grad_norm": 0.22434987127780914, "learning_rate": 9.115875831609187e-06, "loss": 0.0037, "step": 50050 }, { "epoch": 0.8191115110856582, "grad_norm": 0.029149552807211876, "learning_rate": 9.115335257338374e-06, "loss": 0.0034, "step": 50060 }, { "epoch": 0.819275137036734, "grad_norm": 0.09738941490650177, "learning_rate": 9.114794533896549e-06, "loss": 0.003, "step": 50070 }, { "epoch": 0.8194387629878098, "grad_norm": 0.08028104901313782, "learning_rate": 9.114253661303311e-06, "loss": 0.0027, "step": 50080 }, { "epoch": 0.8196023889388857, "grad_norm": 0.13614508509635925, "learning_rate": 9.113712639578268e-06, "loss": 0.0035, "step": 50090 }, { "epoch": 0.8197660148899616, "grad_norm": 0.3525117337703705, "learning_rate": 9.113171468741028e-06, "loss": 0.0042, "step": 50100 }, { "epoch": 0.8199296408410374, "grad_norm": 0.1918036788702011, "learning_rate": 9.112630148811207e-06, "loss": 0.0038, "step": 50110 }, { "epoch": 0.8200932667921133, "grad_norm": 0.13757005333900452, "learning_rate": 9.112088679808429e-06, "loss": 0.0022, "step": 50120 }, { "epoch": 0.820256892743189, "grad_norm": 0.21057748794555664, "learning_rate": 9.11154706175232e-06, "loss": 0.0046, "step": 50130 }, { "epoch": 0.8204205186942649, "grad_norm": 0.2428678423166275, "learning_rate": 9.11100529466251e-06, "loss": 0.0038, "step": 50140 }, { "epoch": 0.8205841446453408, "grad_norm": 0.09326040744781494, "learning_rate": 9.11046337855864e-06, "loss": 0.0029, "step": 50150 }, { "epoch": 0.8207477705964166, "grad_norm": 0.12856489419937134, "learning_rate": 9.109921313460351e-06, "loss": 0.0032, "step": 50160 }, { "epoch": 0.8209113965474925, "grad_norm": 0.06457704305648804, "learning_rate": 9.109379099387292e-06, "loss": 0.0027, "step": 50170 }, { "epoch": 0.8210750224985682, "grad_norm": 0.10474004596471786, "learning_rate": 9.10883673635912e-06, "loss": 0.0033, "step": 50180 }, { "epoch": 0.8212386484496441, "grad_norm": 0.20883670449256897, "learning_rate": 9.108294224395488e-06, "loss": 0.0025, "step": 50190 }, { "epoch": 0.82140227440072, "grad_norm": 0.1303151696920395, "learning_rate": 9.107751563516065e-06, "loss": 0.004, "step": 50200 }, { "epoch": 0.8215659003517958, "grad_norm": 0.19828058779239655, "learning_rate": 9.107208753740522e-06, "loss": 0.0029, "step": 50210 }, { "epoch": 0.8217295263028717, "grad_norm": 0.4396517276763916, "learning_rate": 9.106665795088533e-06, "loss": 0.0033, "step": 50220 }, { "epoch": 0.8218931522539474, "grad_norm": 0.1698131412267685, "learning_rate": 9.106122687579779e-06, "loss": 0.0027, "step": 50230 }, { "epoch": 0.8220567782050233, "grad_norm": 0.03647887706756592, "learning_rate": 9.105579431233946e-06, "loss": 0.0027, "step": 50240 }, { "epoch": 0.8222204041560992, "grad_norm": 0.2587306797504425, "learning_rate": 9.105036026070726e-06, "loss": 0.0035, "step": 50250 }, { "epoch": 0.822384030107175, "grad_norm": 0.1795492321252823, "learning_rate": 9.104492472109816e-06, "loss": 0.0024, "step": 50260 }, { "epoch": 0.8225476560582509, "grad_norm": 0.0518924742937088, "learning_rate": 9.103948769370918e-06, "loss": 0.0027, "step": 50270 }, { "epoch": 0.8227112820093266, "grad_norm": 0.11034813523292542, "learning_rate": 9.103404917873742e-06, "loss": 0.0025, "step": 50280 }, { "epoch": 0.8228749079604025, "grad_norm": 0.14446285367012024, "learning_rate": 9.102860917638e-06, "loss": 0.002, "step": 50290 }, { "epoch": 0.8230385339114784, "grad_norm": 0.1639178991317749, "learning_rate": 9.10231676868341e-06, "loss": 0.0038, "step": 50300 }, { "epoch": 0.8232021598625542, "grad_norm": 0.04208364337682724, "learning_rate": 9.101772471029697e-06, "loss": 0.0025, "step": 50310 }, { "epoch": 0.8233657858136301, "grad_norm": 0.10019554197788239, "learning_rate": 9.10122802469659e-06, "loss": 0.0038, "step": 50320 }, { "epoch": 0.8235294117647058, "grad_norm": 0.16462786495685577, "learning_rate": 9.100683429703827e-06, "loss": 0.0032, "step": 50330 }, { "epoch": 0.8236930377157817, "grad_norm": 0.25822529196739197, "learning_rate": 9.100138686071143e-06, "loss": 0.0023, "step": 50340 }, { "epoch": 0.8238566636668576, "grad_norm": 0.1642756164073944, "learning_rate": 9.099593793818285e-06, "loss": 0.0064, "step": 50350 }, { "epoch": 0.8240202896179334, "grad_norm": 0.19849711656570435, "learning_rate": 9.099048752965005e-06, "loss": 0.0026, "step": 50360 }, { "epoch": 0.8241839155690093, "grad_norm": 0.23884549736976624, "learning_rate": 9.098503563531063e-06, "loss": 0.0022, "step": 50370 }, { "epoch": 0.824347541520085, "grad_norm": 0.12391776591539383, "learning_rate": 9.097958225536214e-06, "loss": 0.0014, "step": 50380 }, { "epoch": 0.8245111674711609, "grad_norm": 0.09041792899370193, "learning_rate": 9.09741273900023e-06, "loss": 0.0023, "step": 50390 }, { "epoch": 0.8246747934222368, "grad_norm": 0.2077135592699051, "learning_rate": 9.096867103942881e-06, "loss": 0.0034, "step": 50400 }, { "epoch": 0.8248384193733126, "grad_norm": 0.3110194504261017, "learning_rate": 9.096321320383948e-06, "loss": 0.0036, "step": 50410 }, { "epoch": 0.8250020453243885, "grad_norm": 0.2612054646015167, "learning_rate": 9.09577538834321e-06, "loss": 0.0033, "step": 50420 }, { "epoch": 0.8251656712754643, "grad_norm": 0.11150077730417252, "learning_rate": 9.095229307840459e-06, "loss": 0.0029, "step": 50430 }, { "epoch": 0.8253292972265401, "grad_norm": 0.17644207179546356, "learning_rate": 9.09468307889549e-06, "loss": 0.0033, "step": 50440 }, { "epoch": 0.825492923177616, "grad_norm": 0.21658693253993988, "learning_rate": 9.094136701528098e-06, "loss": 0.002, "step": 50450 }, { "epoch": 0.8256565491286918, "grad_norm": 0.08151908218860626, "learning_rate": 9.093590175758091e-06, "loss": 0.002, "step": 50460 }, { "epoch": 0.8258201750797677, "grad_norm": 0.10395552963018417, "learning_rate": 9.09304350160528e-06, "loss": 0.0045, "step": 50470 }, { "epoch": 0.8259838010308435, "grad_norm": 0.22743777930736542, "learning_rate": 9.092496679089479e-06, "loss": 0.0033, "step": 50480 }, { "epoch": 0.8261474269819193, "grad_norm": 0.33236101269721985, "learning_rate": 9.09194970823051e-06, "loss": 0.0033, "step": 50490 }, { "epoch": 0.8263110529329952, "grad_norm": 0.11979358643293381, "learning_rate": 9.091402589048197e-06, "loss": 0.0044, "step": 50500 }, { "epoch": 0.826474678884071, "grad_norm": 0.4249751567840576, "learning_rate": 9.090855321562375e-06, "loss": 0.0043, "step": 50510 }, { "epoch": 0.8266383048351469, "grad_norm": 0.04183148592710495, "learning_rate": 9.090307905792879e-06, "loss": 0.0026, "step": 50520 }, { "epoch": 0.8268019307862227, "grad_norm": 0.14201410114765167, "learning_rate": 9.089760341759552e-06, "loss": 0.0028, "step": 50530 }, { "epoch": 0.8269655567372985, "grad_norm": 0.13985739648342133, "learning_rate": 9.089212629482244e-06, "loss": 0.0031, "step": 50540 }, { "epoch": 0.8271291826883744, "grad_norm": 0.054805707186460495, "learning_rate": 9.088664768980806e-06, "loss": 0.0028, "step": 50550 }, { "epoch": 0.8272928086394502, "grad_norm": 0.13985218107700348, "learning_rate": 9.088116760275095e-06, "loss": 0.0031, "step": 50560 }, { "epoch": 0.8274564345905261, "grad_norm": 0.1397520750761032, "learning_rate": 9.087568603384977e-06, "loss": 0.002, "step": 50570 }, { "epoch": 0.8276200605416019, "grad_norm": 0.10762417316436768, "learning_rate": 9.087020298330324e-06, "loss": 0.002, "step": 50580 }, { "epoch": 0.8277836864926778, "grad_norm": 0.10410875082015991, "learning_rate": 9.086471845131007e-06, "loss": 0.0026, "step": 50590 }, { "epoch": 0.8279473124437535, "grad_norm": 0.14096710085868835, "learning_rate": 9.085923243806906e-06, "loss": 0.002, "step": 50600 }, { "epoch": 0.8281109383948294, "grad_norm": 0.12788322567939758, "learning_rate": 9.085374494377908e-06, "loss": 0.0024, "step": 50610 }, { "epoch": 0.8282745643459053, "grad_norm": 0.36692094802856445, "learning_rate": 9.084825596863904e-06, "loss": 0.0037, "step": 50620 }, { "epoch": 0.8284381902969811, "grad_norm": 0.07190009951591492, "learning_rate": 9.08427655128479e-06, "loss": 0.0026, "step": 50630 }, { "epoch": 0.828601816248057, "grad_norm": 0.27206164598464966, "learning_rate": 9.083727357660466e-06, "loss": 0.0038, "step": 50640 }, { "epoch": 0.8287654421991327, "grad_norm": 0.11852709203958511, "learning_rate": 9.08317801601084e-06, "loss": 0.0032, "step": 50650 }, { "epoch": 0.8289290681502086, "grad_norm": 0.3692225515842438, "learning_rate": 9.082628526355828e-06, "loss": 0.0044, "step": 50660 }, { "epoch": 0.8290926941012845, "grad_norm": 0.0759807676076889, "learning_rate": 9.08207888871534e-06, "loss": 0.0028, "step": 50670 }, { "epoch": 0.8292563200523603, "grad_norm": 0.15307533740997314, "learning_rate": 9.081529103109306e-06, "loss": 0.0028, "step": 50680 }, { "epoch": 0.8294199460034362, "grad_norm": 0.3663370907306671, "learning_rate": 9.080979169557652e-06, "loss": 0.0023, "step": 50690 }, { "epoch": 0.8295835719545119, "grad_norm": 0.2570980489253998, "learning_rate": 9.08042908808031e-06, "loss": 0.0037, "step": 50700 }, { "epoch": 0.8297471979055878, "grad_norm": 0.06891855597496033, "learning_rate": 9.07987885869722e-06, "loss": 0.0028, "step": 50710 }, { "epoch": 0.8299108238566637, "grad_norm": 0.1795913577079773, "learning_rate": 9.07932848142833e-06, "loss": 0.004, "step": 50720 }, { "epoch": 0.8300744498077395, "grad_norm": 0.07551542669534683, "learning_rate": 9.078777956293584e-06, "loss": 0.0018, "step": 50730 }, { "epoch": 0.8302380757588154, "grad_norm": 0.16215987503528595, "learning_rate": 9.078227283312942e-06, "loss": 0.0027, "step": 50740 }, { "epoch": 0.8304017017098911, "grad_norm": 0.4029521644115448, "learning_rate": 9.077676462506361e-06, "loss": 0.0031, "step": 50750 }, { "epoch": 0.830565327660967, "grad_norm": 0.043793685734272, "learning_rate": 9.07712549389381e-06, "loss": 0.0033, "step": 50760 }, { "epoch": 0.8307289536120429, "grad_norm": 0.40310558676719666, "learning_rate": 9.07657437749526e-06, "loss": 0.0031, "step": 50770 }, { "epoch": 0.8308925795631187, "grad_norm": 0.18080948293209076, "learning_rate": 9.076023113330685e-06, "loss": 0.0024, "step": 50780 }, { "epoch": 0.8310562055141946, "grad_norm": 0.14649733901023865, "learning_rate": 9.07547170142007e-06, "loss": 0.0037, "step": 50790 }, { "epoch": 0.8312198314652703, "grad_norm": 0.41183507442474365, "learning_rate": 9.074920141783398e-06, "loss": 0.0048, "step": 50800 }, { "epoch": 0.8313834574163462, "grad_norm": 0.025577817112207413, "learning_rate": 9.074368434440668e-06, "loss": 0.0052, "step": 50810 }, { "epoch": 0.8315470833674221, "grad_norm": 0.30468156933784485, "learning_rate": 9.073816579411871e-06, "loss": 0.0043, "step": 50820 }, { "epoch": 0.8317107093184979, "grad_norm": 0.1438124030828476, "learning_rate": 9.073264576717019e-06, "loss": 0.0018, "step": 50830 }, { "epoch": 0.8318743352695738, "grad_norm": 0.18616560101509094, "learning_rate": 9.072712426376113e-06, "loss": 0.0044, "step": 50840 }, { "epoch": 0.8320379612206495, "grad_norm": 0.08166428655385971, "learning_rate": 9.072160128409168e-06, "loss": 0.0027, "step": 50850 }, { "epoch": 0.8322015871717254, "grad_norm": 0.22281168401241302, "learning_rate": 9.07160768283621e-06, "loss": 0.0026, "step": 50860 }, { "epoch": 0.8323652131228013, "grad_norm": 0.16007889807224274, "learning_rate": 9.071055089677256e-06, "loss": 0.0049, "step": 50870 }, { "epoch": 0.8325288390738771, "grad_norm": 0.14258666336536407, "learning_rate": 9.07050234895234e-06, "loss": 0.0031, "step": 50880 }, { "epoch": 0.832692465024953, "grad_norm": 0.23663482069969177, "learning_rate": 9.0699494606815e-06, "loss": 0.0036, "step": 50890 }, { "epoch": 0.8328560909760288, "grad_norm": 0.09731143712997437, "learning_rate": 9.069396424884771e-06, "loss": 0.0033, "step": 50900 }, { "epoch": 0.8330197169271046, "grad_norm": 0.29777204990386963, "learning_rate": 9.068843241582204e-06, "loss": 0.0047, "step": 50910 }, { "epoch": 0.8331833428781805, "grad_norm": 0.12265215069055557, "learning_rate": 9.068289910793846e-06, "loss": 0.0029, "step": 50920 }, { "epoch": 0.8333469688292563, "grad_norm": 0.0338740237057209, "learning_rate": 9.06773643253976e-06, "loss": 0.0033, "step": 50930 }, { "epoch": 0.8335105947803322, "grad_norm": 0.0460546649992466, "learning_rate": 9.067182806840003e-06, "loss": 0.0036, "step": 50940 }, { "epoch": 0.833674220731408, "grad_norm": 0.26102080941200256, "learning_rate": 9.066629033714644e-06, "loss": 0.0039, "step": 50950 }, { "epoch": 0.8338378466824838, "grad_norm": 0.17924408614635468, "learning_rate": 9.066075113183757e-06, "loss": 0.0034, "step": 50960 }, { "epoch": 0.8340014726335597, "grad_norm": 0.14767718315124512, "learning_rate": 9.065521045267418e-06, "loss": 0.0037, "step": 50970 }, { "epoch": 0.8341650985846355, "grad_norm": 0.20270393788814545, "learning_rate": 9.064966829985715e-06, "loss": 0.0021, "step": 50980 }, { "epoch": 0.8343287245357114, "grad_norm": 0.3154531717300415, "learning_rate": 9.064412467358733e-06, "loss": 0.0047, "step": 50990 }, { "epoch": 0.8344923504867872, "grad_norm": 0.20131541788578033, "learning_rate": 9.063857957406567e-06, "loss": 0.0032, "step": 51000 }, { "epoch": 0.834655976437863, "grad_norm": 0.1727403998374939, "learning_rate": 9.06330330014932e-06, "loss": 0.0037, "step": 51010 }, { "epoch": 0.8348196023889389, "grad_norm": 0.4080318212509155, "learning_rate": 9.062748495607092e-06, "loss": 0.0062, "step": 51020 }, { "epoch": 0.8349832283400147, "grad_norm": 0.14040258526802063, "learning_rate": 9.062193543799996e-06, "loss": 0.0041, "step": 51030 }, { "epoch": 0.8351468542910906, "grad_norm": 0.2805325984954834, "learning_rate": 9.061638444748148e-06, "loss": 0.0029, "step": 51040 }, { "epoch": 0.8353104802421664, "grad_norm": 0.02529214508831501, "learning_rate": 9.061083198471667e-06, "loss": 0.003, "step": 51050 }, { "epoch": 0.8354741061932422, "grad_norm": 0.14584942162036896, "learning_rate": 9.060527804990682e-06, "loss": 0.0026, "step": 51060 }, { "epoch": 0.8356377321443181, "grad_norm": 0.27215349674224854, "learning_rate": 9.059972264325321e-06, "loss": 0.0035, "step": 51070 }, { "epoch": 0.8358013580953939, "grad_norm": 0.0744057446718216, "learning_rate": 9.059416576495725e-06, "loss": 0.0031, "step": 51080 }, { "epoch": 0.8359649840464698, "grad_norm": 0.03862448409199715, "learning_rate": 9.058860741522035e-06, "loss": 0.003, "step": 51090 }, { "epoch": 0.8361286099975456, "grad_norm": 0.8369637131690979, "learning_rate": 9.058304759424396e-06, "loss": 0.0022, "step": 51100 }, { "epoch": 0.8362922359486215, "grad_norm": 0.2615121006965637, "learning_rate": 9.057748630222966e-06, "loss": 0.0046, "step": 51110 }, { "epoch": 0.8364558618996973, "grad_norm": 0.21378803253173828, "learning_rate": 9.057192353937901e-06, "loss": 0.0026, "step": 51120 }, { "epoch": 0.8366194878507731, "grad_norm": 0.17934903502464294, "learning_rate": 9.056635930589362e-06, "loss": 0.0036, "step": 51130 }, { "epoch": 0.836783113801849, "grad_norm": 0.22878490388393402, "learning_rate": 9.056079360197522e-06, "loss": 0.0038, "step": 51140 }, { "epoch": 0.8369467397529248, "grad_norm": 0.13659565150737762, "learning_rate": 9.055522642782553e-06, "loss": 0.0041, "step": 51150 }, { "epoch": 0.8371103657040007, "grad_norm": 0.0801631361246109, "learning_rate": 9.054965778364637e-06, "loss": 0.0016, "step": 51160 }, { "epoch": 0.8372739916550765, "grad_norm": 0.1493367850780487, "learning_rate": 9.054408766963956e-06, "loss": 0.0035, "step": 51170 }, { "epoch": 0.8374376176061523, "grad_norm": 0.10300180315971375, "learning_rate": 9.053851608600702e-06, "loss": 0.0045, "step": 51180 }, { "epoch": 0.8376012435572282, "grad_norm": 0.06744793802499771, "learning_rate": 9.053294303295069e-06, "loss": 0.0046, "step": 51190 }, { "epoch": 0.837764869508304, "grad_norm": 0.07411858439445496, "learning_rate": 9.052736851067261e-06, "loss": 0.0037, "step": 51200 }, { "epoch": 0.8379284954593799, "grad_norm": 0.20761512219905853, "learning_rate": 9.052179251937482e-06, "loss": 0.0042, "step": 51210 }, { "epoch": 0.8380921214104557, "grad_norm": 0.2238827794790268, "learning_rate": 9.051621505925945e-06, "loss": 0.0027, "step": 51220 }, { "epoch": 0.8382557473615315, "grad_norm": 0.03649295121431351, "learning_rate": 9.051063613052865e-06, "loss": 0.003, "step": 51230 }, { "epoch": 0.8384193733126074, "grad_norm": 0.20875859260559082, "learning_rate": 9.050505573338467e-06, "loss": 0.0046, "step": 51240 }, { "epoch": 0.8385829992636832, "grad_norm": 0.10120797902345657, "learning_rate": 9.049947386802975e-06, "loss": 0.0028, "step": 51250 }, { "epoch": 0.8387466252147591, "grad_norm": 0.22312410175800323, "learning_rate": 9.049389053466625e-06, "loss": 0.0033, "step": 51260 }, { "epoch": 0.838910251165835, "grad_norm": 0.05088645592331886, "learning_rate": 9.048830573349656e-06, "loss": 0.0042, "step": 51270 }, { "epoch": 0.8390738771169107, "grad_norm": 0.1280689388513565, "learning_rate": 9.048271946472306e-06, "loss": 0.0034, "step": 51280 }, { "epoch": 0.8392375030679866, "grad_norm": 0.09160040318965912, "learning_rate": 9.047713172854832e-06, "loss": 0.0034, "step": 51290 }, { "epoch": 0.8394011290190624, "grad_norm": 0.4399280846118927, "learning_rate": 9.047154252517482e-06, "loss": 0.003, "step": 51300 }, { "epoch": 0.8395647549701383, "grad_norm": 0.2895500957965851, "learning_rate": 9.046595185480517e-06, "loss": 0.0031, "step": 51310 }, { "epoch": 0.8397283809212142, "grad_norm": 0.13036033511161804, "learning_rate": 9.046035971764202e-06, "loss": 0.0031, "step": 51320 }, { "epoch": 0.8398920068722899, "grad_norm": 0.273827463388443, "learning_rate": 9.045476611388808e-06, "loss": 0.0023, "step": 51330 }, { "epoch": 0.8400556328233658, "grad_norm": 0.31248846650123596, "learning_rate": 9.04491710437461e-06, "loss": 0.0032, "step": 51340 }, { "epoch": 0.8402192587744416, "grad_norm": 0.13093781471252441, "learning_rate": 9.04435745074189e-06, "loss": 0.0052, "step": 51350 }, { "epoch": 0.8403828847255175, "grad_norm": 0.35147008299827576, "learning_rate": 9.04379765051093e-06, "loss": 0.0028, "step": 51360 }, { "epoch": 0.8405465106765934, "grad_norm": 0.14003416895866394, "learning_rate": 9.043237703702026e-06, "loss": 0.0023, "step": 51370 }, { "epoch": 0.8407101366276691, "grad_norm": 0.36045512557029724, "learning_rate": 9.042677610335473e-06, "loss": 0.0034, "step": 51380 }, { "epoch": 0.840873762578745, "grad_norm": 0.04314054176211357, "learning_rate": 9.042117370431573e-06, "loss": 0.0017, "step": 51390 }, { "epoch": 0.8410373885298208, "grad_norm": 0.03455325588583946, "learning_rate": 9.041556984010633e-06, "loss": 0.0028, "step": 51400 }, { "epoch": 0.8412010144808967, "grad_norm": 0.11042078584432602, "learning_rate": 9.040996451092966e-06, "loss": 0.0025, "step": 51410 }, { "epoch": 0.8413646404319726, "grad_norm": 0.08467378467321396, "learning_rate": 9.040435771698887e-06, "loss": 0.0034, "step": 51420 }, { "epoch": 0.8415282663830483, "grad_norm": 0.29322943091392517, "learning_rate": 9.039874945848726e-06, "loss": 0.0042, "step": 51430 }, { "epoch": 0.8416918923341242, "grad_norm": 0.1992769092321396, "learning_rate": 9.039313973562807e-06, "loss": 0.0027, "step": 51440 }, { "epoch": 0.8418555182852, "grad_norm": 0.09897596389055252, "learning_rate": 9.038752854861464e-06, "loss": 0.0025, "step": 51450 }, { "epoch": 0.8420191442362759, "grad_norm": 0.08596764504909515, "learning_rate": 9.038191589765038e-06, "loss": 0.0032, "step": 51460 }, { "epoch": 0.8421827701873517, "grad_norm": 0.1174035295844078, "learning_rate": 9.037630178293871e-06, "loss": 0.0041, "step": 51470 }, { "epoch": 0.8423463961384275, "grad_norm": 0.1402428299188614, "learning_rate": 9.037068620468315e-06, "loss": 0.0027, "step": 51480 }, { "epoch": 0.8425100220895034, "grad_norm": 0.18283773958683014, "learning_rate": 9.036506916308723e-06, "loss": 0.0023, "step": 51490 }, { "epoch": 0.8426736480405792, "grad_norm": 0.13473039865493774, "learning_rate": 9.035945065835459e-06, "loss": 0.0027, "step": 51500 }, { "epoch": 0.8428372739916551, "grad_norm": 0.047684382647275925, "learning_rate": 9.035383069068884e-06, "loss": 0.0037, "step": 51510 }, { "epoch": 0.8430008999427309, "grad_norm": 0.15443789958953857, "learning_rate": 9.034820926029374e-06, "loss": 0.0024, "step": 51520 }, { "epoch": 0.8431645258938067, "grad_norm": 0.23368026316165924, "learning_rate": 9.034258636737302e-06, "loss": 0.0019, "step": 51530 }, { "epoch": 0.8433281518448826, "grad_norm": 0.08092086762189865, "learning_rate": 9.033696201213051e-06, "loss": 0.0053, "step": 51540 }, { "epoch": 0.8434917777959584, "grad_norm": 0.10714762657880783, "learning_rate": 9.033133619477006e-06, "loss": 0.0023, "step": 51550 }, { "epoch": 0.8436554037470343, "grad_norm": 0.24213559925556183, "learning_rate": 9.032570891549562e-06, "loss": 0.0028, "step": 51560 }, { "epoch": 0.8438190296981101, "grad_norm": 0.17505262792110443, "learning_rate": 9.032008017451114e-06, "loss": 0.0029, "step": 51570 }, { "epoch": 0.843982655649186, "grad_norm": 0.07047256082296371, "learning_rate": 9.031444997202068e-06, "loss": 0.0023, "step": 51580 }, { "epoch": 0.8441462816002618, "grad_norm": 0.21796371042728424, "learning_rate": 9.030881830822829e-06, "loss": 0.0031, "step": 51590 }, { "epoch": 0.8443099075513376, "grad_norm": 0.02481258288025856, "learning_rate": 9.03031851833381e-06, "loss": 0.0046, "step": 51600 }, { "epoch": 0.8444735335024135, "grad_norm": 0.014050047844648361, "learning_rate": 9.029755059755433e-06, "loss": 0.0017, "step": 51610 }, { "epoch": 0.8446371594534893, "grad_norm": 0.24635645747184753, "learning_rate": 9.029191455108121e-06, "loss": 0.0047, "step": 51620 }, { "epoch": 0.8448007854045652, "grad_norm": 0.14891695976257324, "learning_rate": 9.0286277044123e-06, "loss": 0.0024, "step": 51630 }, { "epoch": 0.844964411355641, "grad_norm": 0.14402726292610168, "learning_rate": 9.02806380768841e-06, "loss": 0.0035, "step": 51640 }, { "epoch": 0.8451280373067168, "grad_norm": 0.0439751110970974, "learning_rate": 9.027499764956888e-06, "loss": 0.0048, "step": 51650 }, { "epoch": 0.8452916632577927, "grad_norm": 0.09451764822006226, "learning_rate": 9.026935576238178e-06, "loss": 0.0037, "step": 51660 }, { "epoch": 0.8454552892088685, "grad_norm": 0.3115558326244354, "learning_rate": 9.026371241552732e-06, "loss": 0.0068, "step": 51670 }, { "epoch": 0.8456189151599444, "grad_norm": 0.1732897311449051, "learning_rate": 9.025806760921007e-06, "loss": 0.0029, "step": 51680 }, { "epoch": 0.8457825411110202, "grad_norm": 0.16963207721710205, "learning_rate": 9.025242134363462e-06, "loss": 0.0042, "step": 51690 }, { "epoch": 0.845946167062096, "grad_norm": 0.11961447447538376, "learning_rate": 9.024677361900563e-06, "loss": 0.0031, "step": 51700 }, { "epoch": 0.8461097930131719, "grad_norm": 0.1747836470603943, "learning_rate": 9.024112443552784e-06, "loss": 0.0035, "step": 51710 }, { "epoch": 0.8462734189642477, "grad_norm": 0.20227637887001038, "learning_rate": 9.0235473793406e-06, "loss": 0.0033, "step": 51720 }, { "epoch": 0.8464370449153236, "grad_norm": 0.26923802495002747, "learning_rate": 9.022982169284494e-06, "loss": 0.0024, "step": 51730 }, { "epoch": 0.8466006708663995, "grad_norm": 0.2732408046722412, "learning_rate": 9.022416813404954e-06, "loss": 0.004, "step": 51740 }, { "epoch": 0.8467642968174752, "grad_norm": 0.056906960904598236, "learning_rate": 9.021851311722472e-06, "loss": 0.0024, "step": 51750 }, { "epoch": 0.8469279227685511, "grad_norm": 0.15122783184051514, "learning_rate": 9.021285664257544e-06, "loss": 0.0039, "step": 51760 }, { "epoch": 0.8470915487196269, "grad_norm": 0.2581248879432678, "learning_rate": 9.020719871030678e-06, "loss": 0.0029, "step": 51770 }, { "epoch": 0.8472551746707028, "grad_norm": 0.12173102796077728, "learning_rate": 9.020153932062379e-06, "loss": 0.0047, "step": 51780 }, { "epoch": 0.8474188006217787, "grad_norm": 0.10298284888267517, "learning_rate": 9.019587847373163e-06, "loss": 0.002, "step": 51790 }, { "epoch": 0.8475824265728544, "grad_norm": 0.4102534353733063, "learning_rate": 9.019021616983548e-06, "loss": 0.0044, "step": 51800 }, { "epoch": 0.8477460525239303, "grad_norm": 0.13126803934574127, "learning_rate": 9.01845524091406e-06, "loss": 0.0042, "step": 51810 }, { "epoch": 0.8479096784750061, "grad_norm": 0.1789359599351883, "learning_rate": 9.017888719185226e-06, "loss": 0.0044, "step": 51820 }, { "epoch": 0.848073304426082, "grad_norm": 0.16379286348819733, "learning_rate": 9.017322051817584e-06, "loss": 0.006, "step": 51830 }, { "epoch": 0.8482369303771579, "grad_norm": 0.13456517457962036, "learning_rate": 9.016755238831671e-06, "loss": 0.0024, "step": 51840 }, { "epoch": 0.8484005563282336, "grad_norm": 0.11003376543521881, "learning_rate": 9.016188280248034e-06, "loss": 0.0033, "step": 51850 }, { "epoch": 0.8485641822793095, "grad_norm": 0.10518805682659149, "learning_rate": 9.015621176087228e-06, "loss": 0.0032, "step": 51860 }, { "epoch": 0.8487278082303853, "grad_norm": 0.2536409795284271, "learning_rate": 9.015053926369803e-06, "loss": 0.003, "step": 51870 }, { "epoch": 0.8488914341814612, "grad_norm": 0.31367555260658264, "learning_rate": 9.014486531116324e-06, "loss": 0.003, "step": 51880 }, { "epoch": 0.8490550601325371, "grad_norm": 0.06684146076440811, "learning_rate": 9.013918990347357e-06, "loss": 0.0026, "step": 51890 }, { "epoch": 0.8492186860836128, "grad_norm": 0.19261233508586884, "learning_rate": 9.013351304083475e-06, "loss": 0.0035, "step": 51900 }, { "epoch": 0.8493823120346887, "grad_norm": 0.11500176787376404, "learning_rate": 9.012783472345253e-06, "loss": 0.0019, "step": 51910 }, { "epoch": 0.8495459379857645, "grad_norm": 0.26511287689208984, "learning_rate": 9.012215495153273e-06, "loss": 0.0047, "step": 51920 }, { "epoch": 0.8497095639368404, "grad_norm": 0.07897260785102844, "learning_rate": 9.011647372528125e-06, "loss": 0.0033, "step": 51930 }, { "epoch": 0.8498731898879163, "grad_norm": 0.041406337171792984, "learning_rate": 9.011079104490403e-06, "loss": 0.003, "step": 51940 }, { "epoch": 0.850036815838992, "grad_norm": 0.1999581903219223, "learning_rate": 9.010510691060704e-06, "loss": 0.0029, "step": 51950 }, { "epoch": 0.8502004417900679, "grad_norm": 0.274642676115036, "learning_rate": 9.009942132259629e-06, "loss": 0.0097, "step": 51960 }, { "epoch": 0.8503640677411437, "grad_norm": 0.13690555095672607, "learning_rate": 9.00937342810779e-06, "loss": 0.0027, "step": 51970 }, { "epoch": 0.8505276936922196, "grad_norm": 0.1821022778749466, "learning_rate": 9.008804578625803e-06, "loss": 0.0038, "step": 51980 }, { "epoch": 0.8506913196432955, "grad_norm": 0.214046448469162, "learning_rate": 9.008235583834283e-06, "loss": 0.0037, "step": 51990 }, { "epoch": 0.8508549455943712, "grad_norm": 0.1042679101228714, "learning_rate": 9.007666443753856e-06, "loss": 0.0025, "step": 52000 }, { "epoch": 0.8510185715454471, "grad_norm": 0.06679016351699829, "learning_rate": 9.007097158405153e-06, "loss": 0.0025, "step": 52010 }, { "epoch": 0.8511821974965229, "grad_norm": 0.10288181155920029, "learning_rate": 9.006527727808811e-06, "loss": 0.0047, "step": 52020 }, { "epoch": 0.8513458234475988, "grad_norm": 0.12589871883392334, "learning_rate": 9.005958151985466e-06, "loss": 0.0025, "step": 52030 }, { "epoch": 0.8515094493986747, "grad_norm": 0.13085179030895233, "learning_rate": 9.005388430955767e-06, "loss": 0.0034, "step": 52040 }, { "epoch": 0.8516730753497505, "grad_norm": 0.10764490067958832, "learning_rate": 9.004818564740362e-06, "loss": 0.0041, "step": 52050 }, { "epoch": 0.8518367013008263, "grad_norm": 0.10860119014978409, "learning_rate": 9.004248553359911e-06, "loss": 0.002, "step": 52060 }, { "epoch": 0.8520003272519021, "grad_norm": 0.1613011360168457, "learning_rate": 9.003678396835076e-06, "loss": 0.0023, "step": 52070 }, { "epoch": 0.852163953202978, "grad_norm": 0.04479849338531494, "learning_rate": 9.003108095186519e-06, "loss": 0.0018, "step": 52080 }, { "epoch": 0.8523275791540539, "grad_norm": 0.06335500627756119, "learning_rate": 9.002537648434914e-06, "loss": 0.0021, "step": 52090 }, { "epoch": 0.8524912051051297, "grad_norm": 0.3290322422981262, "learning_rate": 9.001967056600941e-06, "loss": 0.0026, "step": 52100 }, { "epoch": 0.8526548310562055, "grad_norm": 0.1582181304693222, "learning_rate": 9.001396319705282e-06, "loss": 0.0022, "step": 52110 }, { "epoch": 0.8528184570072813, "grad_norm": 0.03260332718491554, "learning_rate": 9.000825437768622e-06, "loss": 0.0029, "step": 52120 }, { "epoch": 0.8529820829583572, "grad_norm": 0.13020731508731842, "learning_rate": 9.000254410811654e-06, "loss": 0.0032, "step": 52130 }, { "epoch": 0.8531457089094331, "grad_norm": 0.02632703073322773, "learning_rate": 8.99968323885508e-06, "loss": 0.0024, "step": 52140 }, { "epoch": 0.8533093348605089, "grad_norm": 0.12739895284175873, "learning_rate": 8.999111921919601e-06, "loss": 0.0033, "step": 52150 }, { "epoch": 0.8534729608115847, "grad_norm": 0.09220941364765167, "learning_rate": 8.998540460025926e-06, "loss": 0.0026, "step": 52160 }, { "epoch": 0.8536365867626605, "grad_norm": 0.2832767963409424, "learning_rate": 8.99796885319477e-06, "loss": 0.0024, "step": 52170 }, { "epoch": 0.8538002127137364, "grad_norm": 0.1344723403453827, "learning_rate": 8.997397101446852e-06, "loss": 0.0021, "step": 52180 }, { "epoch": 0.8539638386648123, "grad_norm": 0.35045450925827026, "learning_rate": 8.996825204802895e-06, "loss": 0.0017, "step": 52190 }, { "epoch": 0.8541274646158881, "grad_norm": 0.08236874639987946, "learning_rate": 8.996253163283633e-06, "loss": 0.0044, "step": 52200 }, { "epoch": 0.854291090566964, "grad_norm": 0.3529832661151886, "learning_rate": 8.995680976909798e-06, "loss": 0.0028, "step": 52210 }, { "epoch": 0.8544547165180397, "grad_norm": 0.3102409541606903, "learning_rate": 8.995108645702128e-06, "loss": 0.0047, "step": 52220 }, { "epoch": 0.8546183424691156, "grad_norm": 0.11922548711299896, "learning_rate": 8.994536169681376e-06, "loss": 0.0042, "step": 52230 }, { "epoch": 0.8547819684201915, "grad_norm": 0.12232251465320587, "learning_rate": 8.993963548868286e-06, "loss": 0.0018, "step": 52240 }, { "epoch": 0.8549455943712673, "grad_norm": 0.17454329133033752, "learning_rate": 8.993390783283617e-06, "loss": 0.0048, "step": 52250 }, { "epoch": 0.8551092203223432, "grad_norm": 0.1342998445034027, "learning_rate": 8.99281787294813e-06, "loss": 0.0026, "step": 52260 }, { "epoch": 0.8552728462734189, "grad_norm": 0.15353047847747803, "learning_rate": 8.992244817882592e-06, "loss": 0.0026, "step": 52270 }, { "epoch": 0.8554364722244948, "grad_norm": 0.164621502161026, "learning_rate": 8.991671618107773e-06, "loss": 0.0023, "step": 52280 }, { "epoch": 0.8556000981755707, "grad_norm": 0.11834081262350082, "learning_rate": 8.991098273644453e-06, "loss": 0.0038, "step": 52290 }, { "epoch": 0.8557637241266465, "grad_norm": 0.04167987033724785, "learning_rate": 8.990524784513412e-06, "loss": 0.0048, "step": 52300 }, { "epoch": 0.8559273500777224, "grad_norm": 0.13660024106502533, "learning_rate": 8.989951150735438e-06, "loss": 0.002, "step": 52310 }, { "epoch": 0.8560909760287981, "grad_norm": 0.1240919679403305, "learning_rate": 8.989377372331327e-06, "loss": 0.0026, "step": 52320 }, { "epoch": 0.856254601979874, "grad_norm": 0.11201497912406921, "learning_rate": 8.988803449321872e-06, "loss": 0.0025, "step": 52330 }, { "epoch": 0.8564182279309498, "grad_norm": 0.29503849148750305, "learning_rate": 8.98822938172788e-06, "loss": 0.0031, "step": 52340 }, { "epoch": 0.8565818538820257, "grad_norm": 0.1383712738752365, "learning_rate": 8.987655169570157e-06, "loss": 0.0025, "step": 52350 }, { "epoch": 0.8567454798331016, "grad_norm": 0.15909098088741302, "learning_rate": 8.98708081286952e-06, "loss": 0.0023, "step": 52360 }, { "epoch": 0.8569091057841773, "grad_norm": 0.17658723890781403, "learning_rate": 8.986506311646785e-06, "loss": 0.0028, "step": 52370 }, { "epoch": 0.8570727317352532, "grad_norm": 0.07994277775287628, "learning_rate": 8.985931665922778e-06, "loss": 0.0042, "step": 52380 }, { "epoch": 0.857236357686329, "grad_norm": 0.07423728704452515, "learning_rate": 8.98535687571833e-06, "loss": 0.0026, "step": 52390 }, { "epoch": 0.8573999836374049, "grad_norm": 0.09365329146385193, "learning_rate": 8.98478194105427e-06, "loss": 0.0027, "step": 52400 }, { "epoch": 0.8575636095884808, "grad_norm": 0.14266206324100494, "learning_rate": 8.984206861951443e-06, "loss": 0.0028, "step": 52410 }, { "epoch": 0.8577272355395565, "grad_norm": 0.16483327746391296, "learning_rate": 8.983631638430694e-06, "loss": 0.0041, "step": 52420 }, { "epoch": 0.8578908614906324, "grad_norm": 0.1589885801076889, "learning_rate": 8.983056270512874e-06, "loss": 0.0036, "step": 52430 }, { "epoch": 0.8580544874417082, "grad_norm": 0.09253576397895813, "learning_rate": 8.982480758218835e-06, "loss": 0.0036, "step": 52440 }, { "epoch": 0.8582181133927841, "grad_norm": 0.17469380795955658, "learning_rate": 8.98190510156944e-06, "loss": 0.0029, "step": 52450 }, { "epoch": 0.85838173934386, "grad_norm": 0.08178523182868958, "learning_rate": 8.981329300585556e-06, "loss": 0.0026, "step": 52460 }, { "epoch": 0.8585453652949357, "grad_norm": 0.3584573566913605, "learning_rate": 8.980753355288055e-06, "loss": 0.0056, "step": 52470 }, { "epoch": 0.8587089912460116, "grad_norm": 0.08372239768505096, "learning_rate": 8.98017726569781e-06, "loss": 0.0026, "step": 52480 }, { "epoch": 0.8588726171970874, "grad_norm": 0.2778053879737854, "learning_rate": 8.979601031835705e-06, "loss": 0.0032, "step": 52490 }, { "epoch": 0.8590362431481633, "grad_norm": 0.30251872539520264, "learning_rate": 8.97902465372263e-06, "loss": 0.0033, "step": 52500 }, { "epoch": 0.8591998690992392, "grad_norm": 0.09378662705421448, "learning_rate": 8.978448131379474e-06, "loss": 0.0042, "step": 52510 }, { "epoch": 0.859363495050315, "grad_norm": 0.19447028636932373, "learning_rate": 8.977871464827133e-06, "loss": 0.0038, "step": 52520 }, { "epoch": 0.8595271210013908, "grad_norm": 0.26850658655166626, "learning_rate": 8.977294654086514e-06, "loss": 0.0048, "step": 52530 }, { "epoch": 0.8596907469524666, "grad_norm": 0.08822564780712128, "learning_rate": 8.976717699178521e-06, "loss": 0.0022, "step": 52540 }, { "epoch": 0.8598543729035425, "grad_norm": 0.21378576755523682, "learning_rate": 8.976140600124071e-06, "loss": 0.0037, "step": 52550 }, { "epoch": 0.8600179988546184, "grad_norm": 0.09782931208610535, "learning_rate": 8.97556335694408e-06, "loss": 0.0025, "step": 52560 }, { "epoch": 0.8601816248056942, "grad_norm": 0.13276933133602142, "learning_rate": 8.974985969659473e-06, "loss": 0.0035, "step": 52570 }, { "epoch": 0.86034525075677, "grad_norm": 0.32663071155548096, "learning_rate": 8.974408438291178e-06, "loss": 0.0037, "step": 52580 }, { "epoch": 0.8605088767078458, "grad_norm": 0.4171352982521057, "learning_rate": 8.973830762860128e-06, "loss": 0.0033, "step": 52590 }, { "epoch": 0.8606725026589217, "grad_norm": 0.1790311336517334, "learning_rate": 8.973252943387265e-06, "loss": 0.0023, "step": 52600 }, { "epoch": 0.8608361286099976, "grad_norm": 0.11530248075723648, "learning_rate": 8.972674979893531e-06, "loss": 0.0023, "step": 52610 }, { "epoch": 0.8609997545610734, "grad_norm": 0.13855765759944916, "learning_rate": 8.972096872399879e-06, "loss": 0.0081, "step": 52620 }, { "epoch": 0.8611633805121492, "grad_norm": 0.043049514293670654, "learning_rate": 8.971518620927263e-06, "loss": 0.0032, "step": 52630 }, { "epoch": 0.861327006463225, "grad_norm": 0.4812357425689697, "learning_rate": 8.970940225496639e-06, "loss": 0.0043, "step": 52640 }, { "epoch": 0.8614906324143009, "grad_norm": 0.08960550278425217, "learning_rate": 8.970361686128976e-06, "loss": 0.0048, "step": 52650 }, { "epoch": 0.8616542583653768, "grad_norm": 0.10986928641796112, "learning_rate": 8.969783002845247e-06, "loss": 0.0044, "step": 52660 }, { "epoch": 0.8618178843164526, "grad_norm": 0.17350338399410248, "learning_rate": 8.969204175666424e-06, "loss": 0.0032, "step": 52670 }, { "epoch": 0.8619815102675285, "grad_norm": 0.18268626928329468, "learning_rate": 8.968625204613488e-06, "loss": 0.0024, "step": 52680 }, { "epoch": 0.8621451362186042, "grad_norm": 0.06502486020326614, "learning_rate": 8.968046089707427e-06, "loss": 0.0027, "step": 52690 }, { "epoch": 0.8623087621696801, "grad_norm": 0.23045609891414642, "learning_rate": 8.967466830969234e-06, "loss": 0.0025, "step": 52700 }, { "epoch": 0.862472388120756, "grad_norm": 0.1758364588022232, "learning_rate": 8.966887428419903e-06, "loss": 0.0028, "step": 52710 }, { "epoch": 0.8626360140718318, "grad_norm": 0.2817644476890564, "learning_rate": 8.966307882080436e-06, "loss": 0.0032, "step": 52720 }, { "epoch": 0.8627996400229077, "grad_norm": 0.10124043375253677, "learning_rate": 8.965728191971843e-06, "loss": 0.003, "step": 52730 }, { "epoch": 0.8629632659739834, "grad_norm": 0.4323711097240448, "learning_rate": 8.965148358115131e-06, "loss": 0.0031, "step": 52740 }, { "epoch": 0.8631268919250593, "grad_norm": 0.3049001097679138, "learning_rate": 8.964568380531325e-06, "loss": 0.0036, "step": 52750 }, { "epoch": 0.8632905178761352, "grad_norm": 0.13822802901268005, "learning_rate": 8.963988259241443e-06, "loss": 0.0027, "step": 52760 }, { "epoch": 0.863454143827211, "grad_norm": 0.46392422914505005, "learning_rate": 8.963407994266512e-06, "loss": 0.0041, "step": 52770 }, { "epoch": 0.8636177697782869, "grad_norm": 0.23145531117916107, "learning_rate": 8.962827585627569e-06, "loss": 0.0024, "step": 52780 }, { "epoch": 0.8637813957293626, "grad_norm": 0.07914381474256516, "learning_rate": 8.962247033345648e-06, "loss": 0.0031, "step": 52790 }, { "epoch": 0.8639450216804385, "grad_norm": 0.08892399817705154, "learning_rate": 8.961666337441797e-06, "loss": 0.0024, "step": 52800 }, { "epoch": 0.8641086476315144, "grad_norm": 0.19455312192440033, "learning_rate": 8.961085497937061e-06, "loss": 0.0045, "step": 52810 }, { "epoch": 0.8642722735825902, "grad_norm": 0.174282506108284, "learning_rate": 8.960504514852498e-06, "loss": 0.0037, "step": 52820 }, { "epoch": 0.8644358995336661, "grad_norm": 0.06773357093334198, "learning_rate": 8.959923388209164e-06, "loss": 0.005, "step": 52830 }, { "epoch": 0.8645995254847418, "grad_norm": 0.11846756190061569, "learning_rate": 8.959342118028124e-06, "loss": 0.0024, "step": 52840 }, { "epoch": 0.8647631514358177, "grad_norm": 0.016769489273428917, "learning_rate": 8.95876070433045e-06, "loss": 0.0035, "step": 52850 }, { "epoch": 0.8649267773868936, "grad_norm": 0.08660092204809189, "learning_rate": 8.958179147137215e-06, "loss": 0.0024, "step": 52860 }, { "epoch": 0.8650904033379694, "grad_norm": 0.0785297304391861, "learning_rate": 8.957597446469498e-06, "loss": 0.0037, "step": 52870 }, { "epoch": 0.8652540292890453, "grad_norm": 0.10576910525560379, "learning_rate": 8.957015602348387e-06, "loss": 0.0034, "step": 52880 }, { "epoch": 0.865417655240121, "grad_norm": 0.11605571955442429, "learning_rate": 8.956433614794968e-06, "loss": 0.0018, "step": 52890 }, { "epoch": 0.8655812811911969, "grad_norm": 0.2694227397441864, "learning_rate": 8.955851483830342e-06, "loss": 0.003, "step": 52900 }, { "epoch": 0.8657449071422728, "grad_norm": 0.1706376075744629, "learning_rate": 8.955269209475607e-06, "loss": 0.0027, "step": 52910 }, { "epoch": 0.8659085330933486, "grad_norm": 0.47280558943748474, "learning_rate": 8.954686791751868e-06, "loss": 0.0028, "step": 52920 }, { "epoch": 0.8660721590444245, "grad_norm": 0.4548928439617157, "learning_rate": 8.954104230680238e-06, "loss": 0.0024, "step": 52930 }, { "epoch": 0.8662357849955002, "grad_norm": 0.06332504004240036, "learning_rate": 8.953521526281834e-06, "loss": 0.0017, "step": 52940 }, { "epoch": 0.8663994109465761, "grad_norm": 0.3046591579914093, "learning_rate": 8.952938678577777e-06, "loss": 0.0046, "step": 52950 }, { "epoch": 0.866563036897652, "grad_norm": 0.1686720997095108, "learning_rate": 8.952355687589194e-06, "loss": 0.0029, "step": 52960 }, { "epoch": 0.8667266628487278, "grad_norm": 0.2987702488899231, "learning_rate": 8.951772553337215e-06, "loss": 0.0029, "step": 52970 }, { "epoch": 0.8668902887998037, "grad_norm": 0.07991890609264374, "learning_rate": 8.95118927584298e-06, "loss": 0.0039, "step": 52980 }, { "epoch": 0.8670539147508795, "grad_norm": 0.11487843841314316, "learning_rate": 8.950605855127627e-06, "loss": 0.0035, "step": 52990 }, { "epoch": 0.8672175407019553, "grad_norm": 0.20543067157268524, "learning_rate": 8.95002229121231e-06, "loss": 0.0052, "step": 53000 }, { "epoch": 0.8673811666530312, "grad_norm": 0.17694096267223358, "learning_rate": 8.949438584118178e-06, "loss": 0.0037, "step": 53010 }, { "epoch": 0.867544792604107, "grad_norm": 0.08411154896020889, "learning_rate": 8.94885473386639e-06, "loss": 0.0034, "step": 53020 }, { "epoch": 0.8677084185551829, "grad_norm": 0.10121965408325195, "learning_rate": 8.948270740478107e-06, "loss": 0.0016, "step": 53030 }, { "epoch": 0.8678720445062587, "grad_norm": 0.2577684223651886, "learning_rate": 8.947686603974501e-06, "loss": 0.0036, "step": 53040 }, { "epoch": 0.8680356704573345, "grad_norm": 0.153594508767128, "learning_rate": 8.947102324376742e-06, "loss": 0.0037, "step": 53050 }, { "epoch": 0.8681992964084104, "grad_norm": 0.20292901992797852, "learning_rate": 8.946517901706012e-06, "loss": 0.0019, "step": 53060 }, { "epoch": 0.8683629223594862, "grad_norm": 0.15858130156993866, "learning_rate": 8.945933335983491e-06, "loss": 0.0021, "step": 53070 }, { "epoch": 0.8685265483105621, "grad_norm": 0.06600574404001236, "learning_rate": 8.945348627230373e-06, "loss": 0.003, "step": 53080 }, { "epoch": 0.8686901742616379, "grad_norm": 0.03988082706928253, "learning_rate": 8.944763775467848e-06, "loss": 0.0039, "step": 53090 }, { "epoch": 0.8688538002127137, "grad_norm": 0.29407060146331787, "learning_rate": 8.944178780717118e-06, "loss": 0.003, "step": 53100 }, { "epoch": 0.8690174261637896, "grad_norm": 0.1616322100162506, "learning_rate": 8.943593642999386e-06, "loss": 0.0035, "step": 53110 }, { "epoch": 0.8691810521148654, "grad_norm": 0.10926707834005356, "learning_rate": 8.943008362335864e-06, "loss": 0.0025, "step": 53120 }, { "epoch": 0.8693446780659413, "grad_norm": 0.1041572317481041, "learning_rate": 8.942422938747763e-06, "loss": 0.0027, "step": 53130 }, { "epoch": 0.8695083040170171, "grad_norm": 0.2496742159128189, "learning_rate": 8.941837372256307e-06, "loss": 0.0021, "step": 53140 }, { "epoch": 0.869671929968093, "grad_norm": 0.13319042325019836, "learning_rate": 8.941251662882722e-06, "loss": 0.0027, "step": 53150 }, { "epoch": 0.8698355559191688, "grad_norm": 0.11574677377939224, "learning_rate": 8.940665810648235e-06, "loss": 0.0059, "step": 53160 }, { "epoch": 0.8699991818702446, "grad_norm": 0.1485525369644165, "learning_rate": 8.940079815574083e-06, "loss": 0.0031, "step": 53170 }, { "epoch": 0.8701628078213205, "grad_norm": 0.25555598735809326, "learning_rate": 8.939493677681509e-06, "loss": 0.0022, "step": 53180 }, { "epoch": 0.8703264337723963, "grad_norm": 0.18181513249874115, "learning_rate": 8.938907396991756e-06, "loss": 0.0031, "step": 53190 }, { "epoch": 0.8704900597234722, "grad_norm": 0.22958390414714813, "learning_rate": 8.938320973526078e-06, "loss": 0.0037, "step": 53200 }, { "epoch": 0.8706536856745479, "grad_norm": 0.16609273850917816, "learning_rate": 8.937734407305729e-06, "loss": 0.0043, "step": 53210 }, { "epoch": 0.8708173116256238, "grad_norm": 0.15709634125232697, "learning_rate": 8.937147698351973e-06, "loss": 0.0018, "step": 53220 }, { "epoch": 0.8709809375766997, "grad_norm": 0.09992536157369614, "learning_rate": 8.936560846686076e-06, "loss": 0.0061, "step": 53230 }, { "epoch": 0.8711445635277755, "grad_norm": 0.2760920822620392, "learning_rate": 8.935973852329309e-06, "loss": 0.0038, "step": 53240 }, { "epoch": 0.8713081894788514, "grad_norm": 0.03906510770320892, "learning_rate": 8.93538671530295e-06, "loss": 0.0012, "step": 53250 }, { "epoch": 0.8714718154299271, "grad_norm": 0.17667658627033234, "learning_rate": 8.93479943562828e-06, "loss": 0.0026, "step": 53260 }, { "epoch": 0.871635441381003, "grad_norm": 0.08896145224571228, "learning_rate": 8.934212013326589e-06, "loss": 0.0021, "step": 53270 }, { "epoch": 0.8717990673320789, "grad_norm": 0.14169903099536896, "learning_rate": 8.933624448419165e-06, "loss": 0.0034, "step": 53280 }, { "epoch": 0.8719626932831547, "grad_norm": 0.2344195544719696, "learning_rate": 8.933036740927313e-06, "loss": 0.0034, "step": 53290 }, { "epoch": 0.8721263192342306, "grad_norm": 0.10642287135124207, "learning_rate": 8.932448890872329e-06, "loss": 0.003, "step": 53300 }, { "epoch": 0.8722899451853063, "grad_norm": 0.39549535512924194, "learning_rate": 8.931860898275526e-06, "loss": 0.0045, "step": 53310 }, { "epoch": 0.8724535711363822, "grad_norm": 0.3656351864337921, "learning_rate": 8.931272763158214e-06, "loss": 0.0035, "step": 53320 }, { "epoch": 0.8726171970874581, "grad_norm": 0.14767798781394958, "learning_rate": 8.930684485541713e-06, "loss": 0.0053, "step": 53330 }, { "epoch": 0.8727808230385339, "grad_norm": 0.24421629309654236, "learning_rate": 8.930096065447348e-06, "loss": 0.0033, "step": 53340 }, { "epoch": 0.8729444489896098, "grad_norm": 0.18194125592708588, "learning_rate": 8.929507502896446e-06, "loss": 0.0034, "step": 53350 }, { "epoch": 0.8731080749406855, "grad_norm": 0.1932399719953537, "learning_rate": 8.92891879791034e-06, "loss": 0.0019, "step": 53360 }, { "epoch": 0.8732717008917614, "grad_norm": 0.17778369784355164, "learning_rate": 8.928329950510371e-06, "loss": 0.0037, "step": 53370 }, { "epoch": 0.8734353268428373, "grad_norm": 0.2531684935092926, "learning_rate": 8.927740960717883e-06, "loss": 0.0034, "step": 53380 }, { "epoch": 0.8735989527939131, "grad_norm": 0.21164102852344513, "learning_rate": 8.927151828554226e-06, "loss": 0.0081, "step": 53390 }, { "epoch": 0.873762578744989, "grad_norm": 0.21142245829105377, "learning_rate": 8.926562554040754e-06, "loss": 0.0027, "step": 53400 }, { "epoch": 0.8739262046960647, "grad_norm": 0.24757084250450134, "learning_rate": 8.925973137198824e-06, "loss": 0.0027, "step": 53410 }, { "epoch": 0.8740898306471406, "grad_norm": 0.21466612815856934, "learning_rate": 8.925383578049806e-06, "loss": 0.0024, "step": 53420 }, { "epoch": 0.8742534565982165, "grad_norm": 0.25179845094680786, "learning_rate": 8.924793876615067e-06, "loss": 0.0023, "step": 53430 }, { "epoch": 0.8744170825492923, "grad_norm": 0.281654953956604, "learning_rate": 8.924204032915984e-06, "loss": 0.0043, "step": 53440 }, { "epoch": 0.8745807085003682, "grad_norm": 0.08162378519773483, "learning_rate": 8.923614046973935e-06, "loss": 0.002, "step": 53450 }, { "epoch": 0.874744334451444, "grad_norm": 0.38746851682662964, "learning_rate": 8.923023918810308e-06, "loss": 0.0025, "step": 53460 }, { "epoch": 0.8749079604025198, "grad_norm": 0.17148450016975403, "learning_rate": 8.922433648446491e-06, "loss": 0.0052, "step": 53470 }, { "epoch": 0.8750715863535957, "grad_norm": 0.25914186239242554, "learning_rate": 8.921843235903884e-06, "loss": 0.0043, "step": 53480 }, { "epoch": 0.8752352123046715, "grad_norm": 0.09895703196525574, "learning_rate": 8.921252681203884e-06, "loss": 0.0018, "step": 53490 }, { "epoch": 0.8753988382557474, "grad_norm": 0.1403970718383789, "learning_rate": 8.9206619843679e-06, "loss": 0.0032, "step": 53500 }, { "epoch": 0.8755624642068232, "grad_norm": 0.2040606439113617, "learning_rate": 8.92007114541734e-06, "loss": 0.0037, "step": 53510 }, { "epoch": 0.875726090157899, "grad_norm": 0.20477795600891113, "learning_rate": 8.919480164373623e-06, "loss": 0.0023, "step": 53520 }, { "epoch": 0.8758897161089749, "grad_norm": 0.47427594661712646, "learning_rate": 8.918889041258172e-06, "loss": 0.0027, "step": 53530 }, { "epoch": 0.8760533420600507, "grad_norm": 0.39713793992996216, "learning_rate": 8.918297776092412e-06, "loss": 0.0047, "step": 53540 }, { "epoch": 0.8762169680111266, "grad_norm": 0.38035979866981506, "learning_rate": 8.917706368897774e-06, "loss": 0.0026, "step": 53550 }, { "epoch": 0.8763805939622024, "grad_norm": 0.29770806431770325, "learning_rate": 8.917114819695695e-06, "loss": 0.0037, "step": 53560 }, { "epoch": 0.8765442199132782, "grad_norm": 0.08524775505065918, "learning_rate": 8.91652312850762e-06, "loss": 0.0016, "step": 53570 }, { "epoch": 0.8767078458643541, "grad_norm": 0.09087630361318588, "learning_rate": 8.915931295354995e-06, "loss": 0.002, "step": 53580 }, { "epoch": 0.8768714718154299, "grad_norm": 0.280171275138855, "learning_rate": 8.91533932025927e-06, "loss": 0.002, "step": 53590 }, { "epoch": 0.8770350977665058, "grad_norm": 0.2526150643825531, "learning_rate": 8.914747203241908e-06, "loss": 0.0031, "step": 53600 }, { "epoch": 0.8771987237175816, "grad_norm": 0.3521006107330322, "learning_rate": 8.914154944324367e-06, "loss": 0.0033, "step": 53610 }, { "epoch": 0.8773623496686574, "grad_norm": 0.16499029099941254, "learning_rate": 8.913562543528116e-06, "loss": 0.0023, "step": 53620 }, { "epoch": 0.8775259756197333, "grad_norm": 0.03129316493868828, "learning_rate": 8.91297000087463e-06, "loss": 0.0028, "step": 53630 }, { "epoch": 0.8776896015708091, "grad_norm": 0.23531906306743622, "learning_rate": 8.912377316385386e-06, "loss": 0.0024, "step": 53640 }, { "epoch": 0.877853227521885, "grad_norm": 0.24655872583389282, "learning_rate": 8.911784490081867e-06, "loss": 0.0021, "step": 53650 }, { "epoch": 0.8780168534729608, "grad_norm": 0.09599456191062927, "learning_rate": 8.911191521985562e-06, "loss": 0.0029, "step": 53660 }, { "epoch": 0.8781804794240367, "grad_norm": 0.3862043023109436, "learning_rate": 8.910598412117965e-06, "loss": 0.0045, "step": 53670 }, { "epoch": 0.8783441053751125, "grad_norm": 0.04599544405937195, "learning_rate": 8.910005160500575e-06, "loss": 0.0041, "step": 53680 }, { "epoch": 0.8785077313261883, "grad_norm": 0.2448827475309372, "learning_rate": 8.909411767154894e-06, "loss": 0.0039, "step": 53690 }, { "epoch": 0.8786713572772642, "grad_norm": 0.15530982613563538, "learning_rate": 8.908818232102433e-06, "loss": 0.0026, "step": 53700 }, { "epoch": 0.87883498322834, "grad_norm": 0.2259899526834488, "learning_rate": 8.908224555364707e-06, "loss": 0.0028, "step": 53710 }, { "epoch": 0.8789986091794159, "grad_norm": 0.20185403525829315, "learning_rate": 8.907630736963231e-06, "loss": 0.0018, "step": 53720 }, { "epoch": 0.8791622351304917, "grad_norm": 0.04519975930452347, "learning_rate": 8.907036776919536e-06, "loss": 0.0023, "step": 53730 }, { "epoch": 0.8793258610815675, "grad_norm": 0.061125755310058594, "learning_rate": 8.906442675255145e-06, "loss": 0.004, "step": 53740 }, { "epoch": 0.8794894870326434, "grad_norm": 0.3056689500808716, "learning_rate": 8.905848431991597e-06, "loss": 0.0028, "step": 53750 }, { "epoch": 0.8796531129837192, "grad_norm": 0.36371874809265137, "learning_rate": 8.905254047150431e-06, "loss": 0.0048, "step": 53760 }, { "epoch": 0.8798167389347951, "grad_norm": 0.03165926784276962, "learning_rate": 8.904659520753192e-06, "loss": 0.0032, "step": 53770 }, { "epoch": 0.879980364885871, "grad_norm": 0.057995691895484924, "learning_rate": 8.90406485282143e-06, "loss": 0.003, "step": 53780 }, { "epoch": 0.8801439908369467, "grad_norm": 0.22391577064990997, "learning_rate": 8.903470043376701e-06, "loss": 0.0043, "step": 53790 }, { "epoch": 0.8803076167880226, "grad_norm": 0.007737902924418449, "learning_rate": 8.902875092440564e-06, "loss": 0.0015, "step": 53800 }, { "epoch": 0.8804712427390984, "grad_norm": 0.09004518389701843, "learning_rate": 8.902280000034584e-06, "loss": 0.0031, "step": 53810 }, { "epoch": 0.8806348686901743, "grad_norm": 0.29592758417129517, "learning_rate": 8.901684766180335e-06, "loss": 0.0037, "step": 53820 }, { "epoch": 0.8807984946412502, "grad_norm": 0.04724936932325363, "learning_rate": 8.901089390899388e-06, "loss": 0.0027, "step": 53830 }, { "epoch": 0.8809621205923259, "grad_norm": 0.14172354340553284, "learning_rate": 8.90049387421333e-06, "loss": 0.0048, "step": 53840 }, { "epoch": 0.8811257465434018, "grad_norm": 0.10313286632299423, "learning_rate": 8.89989821614374e-06, "loss": 0.0014, "step": 53850 }, { "epoch": 0.8812893724944776, "grad_norm": 0.0624067522585392, "learning_rate": 8.899302416712217e-06, "loss": 0.0023, "step": 53860 }, { "epoch": 0.8814529984455535, "grad_norm": 0.07169128954410553, "learning_rate": 8.89870647594035e-06, "loss": 0.0023, "step": 53870 }, { "epoch": 0.8816166243966294, "grad_norm": 0.15577267110347748, "learning_rate": 8.898110393849744e-06, "loss": 0.0025, "step": 53880 }, { "epoch": 0.8817802503477051, "grad_norm": 0.19391897320747375, "learning_rate": 8.897514170462005e-06, "loss": 0.0031, "step": 53890 }, { "epoch": 0.881943876298781, "grad_norm": 0.12142909318208694, "learning_rate": 8.896917805798746e-06, "loss": 0.0029, "step": 53900 }, { "epoch": 0.8821075022498568, "grad_norm": 0.08712892234325409, "learning_rate": 8.896321299881582e-06, "loss": 0.0036, "step": 53910 }, { "epoch": 0.8822711282009327, "grad_norm": 0.03784019127488136, "learning_rate": 8.895724652732133e-06, "loss": 0.0018, "step": 53920 }, { "epoch": 0.8824347541520086, "grad_norm": 0.08243045210838318, "learning_rate": 8.895127864372033e-06, "loss": 0.0034, "step": 53930 }, { "epoch": 0.8825983801030843, "grad_norm": 0.18200337886810303, "learning_rate": 8.894530934822908e-06, "loss": 0.0043, "step": 53940 }, { "epoch": 0.8827620060541602, "grad_norm": 0.11088070273399353, "learning_rate": 8.893933864106394e-06, "loss": 0.0018, "step": 53950 }, { "epoch": 0.882925632005236, "grad_norm": 0.2738510072231293, "learning_rate": 8.89333665224414e-06, "loss": 0.0029, "step": 53960 }, { "epoch": 0.8830892579563119, "grad_norm": 0.11092813313007355, "learning_rate": 8.892739299257786e-06, "loss": 0.0028, "step": 53970 }, { "epoch": 0.8832528839073878, "grad_norm": 0.2351102977991104, "learning_rate": 8.892141805168992e-06, "loss": 0.0036, "step": 53980 }, { "epoch": 0.8834165098584635, "grad_norm": 0.1342463344335556, "learning_rate": 8.89154416999941e-06, "loss": 0.0027, "step": 53990 }, { "epoch": 0.8835801358095394, "grad_norm": 0.07385512441396713, "learning_rate": 8.890946393770707e-06, "loss": 0.0026, "step": 54000 }, { "epoch": 0.8837437617606152, "grad_norm": 0.1501883864402771, "learning_rate": 8.890348476504547e-06, "loss": 0.0032, "step": 54010 }, { "epoch": 0.8839073877116911, "grad_norm": 0.144461527466774, "learning_rate": 8.889750418222605e-06, "loss": 0.0021, "step": 54020 }, { "epoch": 0.884071013662767, "grad_norm": 0.2875502109527588, "learning_rate": 8.88915221894656e-06, "loss": 0.002, "step": 54030 }, { "epoch": 0.8842346396138427, "grad_norm": 0.037042923271656036, "learning_rate": 8.888553878698093e-06, "loss": 0.0046, "step": 54040 }, { "epoch": 0.8843982655649186, "grad_norm": 0.11383754760026932, "learning_rate": 8.887955397498895e-06, "loss": 0.0045, "step": 54050 }, { "epoch": 0.8845618915159944, "grad_norm": 0.4360297620296478, "learning_rate": 8.887356775370657e-06, "loss": 0.0026, "step": 54060 }, { "epoch": 0.8847255174670703, "grad_norm": 0.27691003680229187, "learning_rate": 8.88675801233508e-06, "loss": 0.0037, "step": 54070 }, { "epoch": 0.8848891434181462, "grad_norm": 0.2729971408843994, "learning_rate": 8.886159108413865e-06, "loss": 0.0026, "step": 54080 }, { "epoch": 0.885052769369222, "grad_norm": 0.19147582352161407, "learning_rate": 8.885560063628724e-06, "loss": 0.0025, "step": 54090 }, { "epoch": 0.8852163953202978, "grad_norm": 0.10575056076049805, "learning_rate": 8.88496087800137e-06, "loss": 0.0043, "step": 54100 }, { "epoch": 0.8853800212713736, "grad_norm": 0.17590594291687012, "learning_rate": 8.884361551553522e-06, "loss": 0.0018, "step": 54110 }, { "epoch": 0.8855436472224495, "grad_norm": 0.16460944712162018, "learning_rate": 8.883762084306901e-06, "loss": 0.003, "step": 54120 }, { "epoch": 0.8857072731735253, "grad_norm": 0.24677202105522156, "learning_rate": 8.88316247628324e-06, "loss": 0.0052, "step": 54130 }, { "epoch": 0.8858708991246012, "grad_norm": 0.16806961596012115, "learning_rate": 8.882562727504272e-06, "loss": 0.0019, "step": 54140 }, { "epoch": 0.886034525075677, "grad_norm": 0.3846401870250702, "learning_rate": 8.881962837991738e-06, "loss": 0.005, "step": 54150 }, { "epoch": 0.8861981510267528, "grad_norm": 0.21025578677654266, "learning_rate": 8.88136280776738e-06, "loss": 0.0023, "step": 54160 }, { "epoch": 0.8863617769778287, "grad_norm": 0.19421720504760742, "learning_rate": 8.880762636852949e-06, "loss": 0.0035, "step": 54170 }, { "epoch": 0.8865254029289045, "grad_norm": 0.07441296428442001, "learning_rate": 8.880162325270199e-06, "loss": 0.0028, "step": 54180 }, { "epoch": 0.8866890288799804, "grad_norm": 0.25098496675491333, "learning_rate": 8.879561873040891e-06, "loss": 0.0045, "step": 54190 }, { "epoch": 0.8868526548310562, "grad_norm": 0.20665836334228516, "learning_rate": 8.878961280186789e-06, "loss": 0.004, "step": 54200 }, { "epoch": 0.887016280782132, "grad_norm": 0.053514499217271805, "learning_rate": 8.878360546729664e-06, "loss": 0.004, "step": 54210 }, { "epoch": 0.8871799067332079, "grad_norm": 0.07332173734903336, "learning_rate": 8.877759672691291e-06, "loss": 0.0031, "step": 54220 }, { "epoch": 0.8873435326842837, "grad_norm": 0.23525884747505188, "learning_rate": 8.877158658093448e-06, "loss": 0.0026, "step": 54230 }, { "epoch": 0.8875071586353596, "grad_norm": 0.3173944652080536, "learning_rate": 8.876557502957923e-06, "loss": 0.0032, "step": 54240 }, { "epoch": 0.8876707845864354, "grad_norm": 0.03907003626227379, "learning_rate": 8.875956207306507e-06, "loss": 0.0025, "step": 54250 }, { "epoch": 0.8878344105375112, "grad_norm": 0.21646857261657715, "learning_rate": 8.875354771160991e-06, "loss": 0.0042, "step": 54260 }, { "epoch": 0.8879980364885871, "grad_norm": 0.040403611958026886, "learning_rate": 8.874753194543181e-06, "loss": 0.0036, "step": 54270 }, { "epoch": 0.8881616624396629, "grad_norm": 0.24802431464195251, "learning_rate": 8.874151477474881e-06, "loss": 0.0029, "step": 54280 }, { "epoch": 0.8883252883907388, "grad_norm": 0.18411269783973694, "learning_rate": 8.8735496199779e-06, "loss": 0.004, "step": 54290 }, { "epoch": 0.8884889143418147, "grad_norm": 0.41890931129455566, "learning_rate": 8.872947622074056e-06, "loss": 0.0029, "step": 54300 }, { "epoch": 0.8886525402928904, "grad_norm": 0.161081463098526, "learning_rate": 8.872345483785168e-06, "loss": 0.0019, "step": 54310 }, { "epoch": 0.8888161662439663, "grad_norm": 0.37078016996383667, "learning_rate": 8.871743205133063e-06, "loss": 0.0048, "step": 54320 }, { "epoch": 0.8889797921950421, "grad_norm": 0.28318163752555847, "learning_rate": 8.871140786139574e-06, "loss": 0.0028, "step": 54330 }, { "epoch": 0.889143418146118, "grad_norm": 0.056515686213970184, "learning_rate": 8.870538226826534e-06, "loss": 0.0022, "step": 54340 }, { "epoch": 0.8893070440971939, "grad_norm": 0.18007394671440125, "learning_rate": 8.869935527215787e-06, "loss": 0.0031, "step": 54350 }, { "epoch": 0.8894706700482696, "grad_norm": 0.08672183007001877, "learning_rate": 8.869332687329177e-06, "loss": 0.0034, "step": 54360 }, { "epoch": 0.8896342959993455, "grad_norm": 0.2902868092060089, "learning_rate": 8.868729707188558e-06, "loss": 0.0038, "step": 54370 }, { "epoch": 0.8897979219504213, "grad_norm": 0.03184123337268829, "learning_rate": 8.868126586815785e-06, "loss": 0.0031, "step": 54380 }, { "epoch": 0.8899615479014972, "grad_norm": 0.42973706126213074, "learning_rate": 8.86752332623272e-06, "loss": 0.0058, "step": 54390 }, { "epoch": 0.8901251738525731, "grad_norm": 0.14185434579849243, "learning_rate": 8.866919925461229e-06, "loss": 0.0022, "step": 54400 }, { "epoch": 0.8902887998036488, "grad_norm": 0.2531667649745941, "learning_rate": 8.866316384523185e-06, "loss": 0.0027, "step": 54410 }, { "epoch": 0.8904524257547247, "grad_norm": 0.16212552785873413, "learning_rate": 8.865712703440464e-06, "loss": 0.0043, "step": 54420 }, { "epoch": 0.8906160517058005, "grad_norm": 0.1044308990240097, "learning_rate": 8.86510888223495e-06, "loss": 0.0027, "step": 54430 }, { "epoch": 0.8907796776568764, "grad_norm": 0.05360344052314758, "learning_rate": 8.864504920928527e-06, "loss": 0.0041, "step": 54440 }, { "epoch": 0.8909433036079523, "grad_norm": 0.05559851601719856, "learning_rate": 8.86390081954309e-06, "loss": 0.0015, "step": 54450 }, { "epoch": 0.891106929559028, "grad_norm": 0.8955914974212646, "learning_rate": 8.863296578100535e-06, "loss": 0.0043, "step": 54460 }, { "epoch": 0.8912705555101039, "grad_norm": 0.017737558111548424, "learning_rate": 8.862692196622761e-06, "loss": 0.0028, "step": 54470 }, { "epoch": 0.8914341814611797, "grad_norm": 0.41233164072036743, "learning_rate": 8.862087675131684e-06, "loss": 0.0066, "step": 54480 }, { "epoch": 0.8915978074122556, "grad_norm": 0.06687049567699432, "learning_rate": 8.861483013649207e-06, "loss": 0.0037, "step": 54490 }, { "epoch": 0.8917614333633315, "grad_norm": 0.19879239797592163, "learning_rate": 8.860878212197252e-06, "loss": 0.0055, "step": 54500 }, { "epoch": 0.8919250593144072, "grad_norm": 0.27851152420043945, "learning_rate": 8.860273270797743e-06, "loss": 0.0031, "step": 54510 }, { "epoch": 0.8920886852654831, "grad_norm": 0.2351270616054535, "learning_rate": 8.859668189472605e-06, "loss": 0.0026, "step": 54520 }, { "epoch": 0.8922523112165589, "grad_norm": 0.14612500369548798, "learning_rate": 8.859062968243773e-06, "loss": 0.0024, "step": 54530 }, { "epoch": 0.8924159371676348, "grad_norm": 0.11215360462665558, "learning_rate": 8.85845760713318e-06, "loss": 0.0026, "step": 54540 }, { "epoch": 0.8925795631187107, "grad_norm": 0.35958877205848694, "learning_rate": 8.857852106162774e-06, "loss": 0.0049, "step": 54550 }, { "epoch": 0.8927431890697864, "grad_norm": 0.08596529811620712, "learning_rate": 8.857246465354502e-06, "loss": 0.0037, "step": 54560 }, { "epoch": 0.8929068150208623, "grad_norm": 0.14215393364429474, "learning_rate": 8.856640684730315e-06, "loss": 0.0035, "step": 54570 }, { "epoch": 0.8930704409719381, "grad_norm": 0.22472992539405823, "learning_rate": 8.856034764312173e-06, "loss": 0.0023, "step": 54580 }, { "epoch": 0.893234066923014, "grad_norm": 0.09880702942609787, "learning_rate": 8.85542870412204e-06, "loss": 0.0026, "step": 54590 }, { "epoch": 0.8933976928740899, "grad_norm": 0.13637830317020416, "learning_rate": 8.854822504181881e-06, "loss": 0.0028, "step": 54600 }, { "epoch": 0.8935613188251657, "grad_norm": 0.14674554765224457, "learning_rate": 8.854216164513671e-06, "loss": 0.0046, "step": 54610 }, { "epoch": 0.8937249447762415, "grad_norm": 0.05734406039118767, "learning_rate": 8.85360968513939e-06, "loss": 0.0034, "step": 54620 }, { "epoch": 0.8938885707273173, "grad_norm": 0.09534914791584015, "learning_rate": 8.85300306608102e-06, "loss": 0.0023, "step": 54630 }, { "epoch": 0.8940521966783932, "grad_norm": 0.02331485226750374, "learning_rate": 8.852396307360548e-06, "loss": 0.0024, "step": 54640 }, { "epoch": 0.8942158226294691, "grad_norm": 0.22868740558624268, "learning_rate": 8.851789408999968e-06, "loss": 0.0055, "step": 54650 }, { "epoch": 0.8943794485805449, "grad_norm": 0.1800401657819748, "learning_rate": 8.851182371021278e-06, "loss": 0.003, "step": 54660 }, { "epoch": 0.8945430745316207, "grad_norm": 1.7709912061691284, "learning_rate": 8.850575193446487e-06, "loss": 0.0047, "step": 54670 }, { "epoch": 0.8947067004826965, "grad_norm": 0.1639859825372696, "learning_rate": 8.849967876297598e-06, "loss": 0.0017, "step": 54680 }, { "epoch": 0.8948703264337724, "grad_norm": 0.06918571144342422, "learning_rate": 8.849360419596626e-06, "loss": 0.0022, "step": 54690 }, { "epoch": 0.8950339523848483, "grad_norm": 0.11224681884050369, "learning_rate": 8.848752823365593e-06, "loss": 0.0035, "step": 54700 }, { "epoch": 0.8951975783359241, "grad_norm": 0.10752326250076294, "learning_rate": 8.848145087626518e-06, "loss": 0.004, "step": 54710 }, { "epoch": 0.895361204287, "grad_norm": 0.29402297735214233, "learning_rate": 8.847537212401432e-06, "loss": 0.0034, "step": 54720 }, { "epoch": 0.8955248302380757, "grad_norm": 0.10052228718996048, "learning_rate": 8.846929197712372e-06, "loss": 0.0033, "step": 54730 }, { "epoch": 0.8956884561891516, "grad_norm": 0.08744067698717117, "learning_rate": 8.846321043581373e-06, "loss": 0.0024, "step": 54740 }, { "epoch": 0.8958520821402275, "grad_norm": 0.08275303244590759, "learning_rate": 8.84571275003048e-06, "loss": 0.0025, "step": 54750 }, { "epoch": 0.8960157080913033, "grad_norm": 0.585598349571228, "learning_rate": 8.845104317081743e-06, "loss": 0.0034, "step": 54760 }, { "epoch": 0.8961793340423791, "grad_norm": 0.13055624067783356, "learning_rate": 8.844495744757215e-06, "loss": 0.0026, "step": 54770 }, { "epoch": 0.8963429599934549, "grad_norm": 0.11452317982912064, "learning_rate": 8.843887033078959e-06, "loss": 0.0041, "step": 54780 }, { "epoch": 0.8965065859445308, "grad_norm": 0.1353876143693924, "learning_rate": 8.843278182069034e-06, "loss": 0.0028, "step": 54790 }, { "epoch": 0.8966702118956067, "grad_norm": 0.15332217514514923, "learning_rate": 8.842669191749514e-06, "loss": 0.0031, "step": 54800 }, { "epoch": 0.8968338378466825, "grad_norm": 0.09307501465082169, "learning_rate": 8.84206006214247e-06, "loss": 0.0029, "step": 54810 }, { "epoch": 0.8969974637977584, "grad_norm": 0.04865473136305809, "learning_rate": 8.841450793269984e-06, "loss": 0.0024, "step": 54820 }, { "epoch": 0.8971610897488341, "grad_norm": 0.027074819430708885, "learning_rate": 8.840841385154137e-06, "loss": 0.0023, "step": 54830 }, { "epoch": 0.89732471569991, "grad_norm": 0.16106519103050232, "learning_rate": 8.840231837817024e-06, "loss": 0.0026, "step": 54840 }, { "epoch": 0.8974883416509859, "grad_norm": 0.29977506399154663, "learning_rate": 8.839622151280736e-06, "loss": 0.0045, "step": 54850 }, { "epoch": 0.8976519676020617, "grad_norm": 0.15003938972949982, "learning_rate": 8.839012325567372e-06, "loss": 0.0053, "step": 54860 }, { "epoch": 0.8978155935531376, "grad_norm": 0.06466463953256607, "learning_rate": 8.838402360699039e-06, "loss": 0.0025, "step": 54870 }, { "epoch": 0.8979792195042133, "grad_norm": 0.15948380529880524, "learning_rate": 8.837792256697846e-06, "loss": 0.0044, "step": 54880 }, { "epoch": 0.8981428454552892, "grad_norm": 0.2415420114994049, "learning_rate": 8.837182013585907e-06, "loss": 0.0023, "step": 54890 }, { "epoch": 0.8983064714063651, "grad_norm": 0.19046470522880554, "learning_rate": 8.836571631385342e-06, "loss": 0.0021, "step": 54900 }, { "epoch": 0.8984700973574409, "grad_norm": 0.20351184904575348, "learning_rate": 8.835961110118276e-06, "loss": 0.0028, "step": 54910 }, { "epoch": 0.8986337233085168, "grad_norm": 0.14510388672351837, "learning_rate": 8.835350449806842e-06, "loss": 0.0025, "step": 54920 }, { "epoch": 0.8987973492595925, "grad_norm": 0.17106325924396515, "learning_rate": 8.834739650473169e-06, "loss": 0.0044, "step": 54930 }, { "epoch": 0.8989609752106684, "grad_norm": 0.27555814385414124, "learning_rate": 8.834128712139402e-06, "loss": 0.0031, "step": 54940 }, { "epoch": 0.8991246011617443, "grad_norm": 0.14989875257015228, "learning_rate": 8.833517634827683e-06, "loss": 0.0043, "step": 54950 }, { "epoch": 0.8992882271128201, "grad_norm": 0.22055450081825256, "learning_rate": 8.832906418560165e-06, "loss": 0.0016, "step": 54960 }, { "epoch": 0.899451853063896, "grad_norm": 0.41166171431541443, "learning_rate": 8.832295063359e-06, "loss": 0.0037, "step": 54970 }, { "epoch": 0.8996154790149717, "grad_norm": 0.19088685512542725, "learning_rate": 8.83168356924635e-06, "loss": 0.0027, "step": 54980 }, { "epoch": 0.8997791049660476, "grad_norm": 0.10413893312215805, "learning_rate": 8.83107193624438e-06, "loss": 0.0034, "step": 54990 }, { "epoch": 0.8999427309171234, "grad_norm": 0.20757974684238434, "learning_rate": 8.830460164375261e-06, "loss": 0.0028, "step": 55000 }, { "epoch": 0.9001063568681993, "grad_norm": 0.09585452079772949, "learning_rate": 8.829848253661166e-06, "loss": 0.0031, "step": 55010 }, { "epoch": 0.9002699828192752, "grad_norm": 0.07869952917098999, "learning_rate": 8.829236204124276e-06, "loss": 0.005, "step": 55020 }, { "epoch": 0.900433608770351, "grad_norm": 0.16095200181007385, "learning_rate": 8.82862401578678e-06, "loss": 0.0033, "step": 55030 }, { "epoch": 0.9005972347214268, "grad_norm": 0.17637254297733307, "learning_rate": 8.828011688670862e-06, "loss": 0.0026, "step": 55040 }, { "epoch": 0.9007608606725026, "grad_norm": 0.1566551923751831, "learning_rate": 8.827399222798722e-06, "loss": 0.003, "step": 55050 }, { "epoch": 0.9009244866235785, "grad_norm": 0.2678000330924988, "learning_rate": 8.82678661819256e-06, "loss": 0.0036, "step": 55060 }, { "epoch": 0.9010881125746544, "grad_norm": 0.12816756963729858, "learning_rate": 8.826173874874578e-06, "loss": 0.0027, "step": 55070 }, { "epoch": 0.9012517385257302, "grad_norm": 0.021010136231780052, "learning_rate": 8.82556099286699e-06, "loss": 0.0059, "step": 55080 }, { "epoch": 0.901415364476806, "grad_norm": 0.07008292526006699, "learning_rate": 8.824947972192012e-06, "loss": 0.0025, "step": 55090 }, { "epoch": 0.9015789904278818, "grad_norm": 0.2838267982006073, "learning_rate": 8.824334812871862e-06, "loss": 0.0033, "step": 55100 }, { "epoch": 0.9017426163789577, "grad_norm": 0.17677299678325653, "learning_rate": 8.823721514928766e-06, "loss": 0.0033, "step": 55110 }, { "epoch": 0.9019062423300336, "grad_norm": 0.11584359407424927, "learning_rate": 8.823108078384956e-06, "loss": 0.0034, "step": 55120 }, { "epoch": 0.9020698682811094, "grad_norm": 0.11494273692369461, "learning_rate": 8.822494503262668e-06, "loss": 0.0028, "step": 55130 }, { "epoch": 0.9022334942321852, "grad_norm": 0.12606169283390045, "learning_rate": 8.821880789584138e-06, "loss": 0.0033, "step": 55140 }, { "epoch": 0.902397120183261, "grad_norm": 0.124476857483387, "learning_rate": 8.821266937371618e-06, "loss": 0.0025, "step": 55150 }, { "epoch": 0.9025607461343369, "grad_norm": 0.06272819638252258, "learning_rate": 8.820652946647355e-06, "loss": 0.0031, "step": 55160 }, { "epoch": 0.9027243720854128, "grad_norm": 0.2096959799528122, "learning_rate": 8.820038817433605e-06, "loss": 0.0024, "step": 55170 }, { "epoch": 0.9028879980364886, "grad_norm": 0.09812069684267044, "learning_rate": 8.81942454975263e-06, "loss": 0.0031, "step": 55180 }, { "epoch": 0.9030516239875644, "grad_norm": 0.04358232766389847, "learning_rate": 8.818810143626693e-06, "loss": 0.0031, "step": 55190 }, { "epoch": 0.9032152499386402, "grad_norm": 0.08749408274888992, "learning_rate": 8.818195599078068e-06, "loss": 0.0029, "step": 55200 }, { "epoch": 0.9033788758897161, "grad_norm": 0.056532811373472214, "learning_rate": 8.817580916129028e-06, "loss": 0.0022, "step": 55210 }, { "epoch": 0.903542501840792, "grad_norm": 0.14252284169197083, "learning_rate": 8.816966094801856e-06, "loss": 0.0031, "step": 55220 }, { "epoch": 0.9037061277918678, "grad_norm": 0.417191743850708, "learning_rate": 8.816351135118836e-06, "loss": 0.0049, "step": 55230 }, { "epoch": 0.9038697537429436, "grad_norm": 0.11611729860305786, "learning_rate": 8.815736037102262e-06, "loss": 0.0023, "step": 55240 }, { "epoch": 0.9040333796940194, "grad_norm": 0.06029880419373512, "learning_rate": 8.815120800774426e-06, "loss": 0.0045, "step": 55250 }, { "epoch": 0.9041970056450953, "grad_norm": 0.11495212465524673, "learning_rate": 8.81450542615763e-06, "loss": 0.0035, "step": 55260 }, { "epoch": 0.9043606315961712, "grad_norm": 0.17255397140979767, "learning_rate": 8.81388991327418e-06, "loss": 0.0033, "step": 55270 }, { "epoch": 0.904524257547247, "grad_norm": 0.16232207417488098, "learning_rate": 8.81327426214639e-06, "loss": 0.0022, "step": 55280 }, { "epoch": 0.9046878834983229, "grad_norm": 0.10415556281805038, "learning_rate": 8.812658472796569e-06, "loss": 0.0021, "step": 55290 }, { "epoch": 0.9048515094493986, "grad_norm": 0.7202304601669312, "learning_rate": 8.812042545247044e-06, "loss": 0.002, "step": 55300 }, { "epoch": 0.9050151354004745, "grad_norm": 0.07775498926639557, "learning_rate": 8.811426479520138e-06, "loss": 0.0038, "step": 55310 }, { "epoch": 0.9051787613515504, "grad_norm": 0.009761802852153778, "learning_rate": 8.810810275638183e-06, "loss": 0.0041, "step": 55320 }, { "epoch": 0.9053423873026262, "grad_norm": 0.1412675529718399, "learning_rate": 8.810193933623514e-06, "loss": 0.0037, "step": 55330 }, { "epoch": 0.9055060132537021, "grad_norm": 0.17761661112308502, "learning_rate": 8.809577453498474e-06, "loss": 0.0033, "step": 55340 }, { "epoch": 0.9056696392047778, "grad_norm": 0.16839146614074707, "learning_rate": 8.808960835285405e-06, "loss": 0.0021, "step": 55350 }, { "epoch": 0.9058332651558537, "grad_norm": 0.02789127826690674, "learning_rate": 8.808344079006663e-06, "loss": 0.0017, "step": 55360 }, { "epoch": 0.9059968911069296, "grad_norm": 0.08321288973093033, "learning_rate": 8.8077271846846e-06, "loss": 0.0017, "step": 55370 }, { "epoch": 0.9061605170580054, "grad_norm": 0.1785695105791092, "learning_rate": 8.807110152341577e-06, "loss": 0.0042, "step": 55380 }, { "epoch": 0.9063241430090813, "grad_norm": 0.2165706306695938, "learning_rate": 8.806492981999964e-06, "loss": 0.0029, "step": 55390 }, { "epoch": 0.906487768960157, "grad_norm": 0.33078229427337646, "learning_rate": 8.805875673682127e-06, "loss": 0.0038, "step": 55400 }, { "epoch": 0.9066513949112329, "grad_norm": 0.052473850548267365, "learning_rate": 8.805258227410444e-06, "loss": 0.0023, "step": 55410 }, { "epoch": 0.9068150208623088, "grad_norm": 0.08056936413049698, "learning_rate": 8.804640643207296e-06, "loss": 0.0018, "step": 55420 }, { "epoch": 0.9069786468133846, "grad_norm": 0.055580999702215195, "learning_rate": 8.80402292109507e-06, "loss": 0.0057, "step": 55430 }, { "epoch": 0.9071422727644605, "grad_norm": 0.217872753739357, "learning_rate": 8.803405061096154e-06, "loss": 0.0039, "step": 55440 }, { "epoch": 0.9073058987155362, "grad_norm": 0.039096899330616, "learning_rate": 8.802787063232947e-06, "loss": 0.0031, "step": 55450 }, { "epoch": 0.9074695246666121, "grad_norm": 0.1173924058675766, "learning_rate": 8.80216892752785e-06, "loss": 0.0034, "step": 55460 }, { "epoch": 0.907633150617688, "grad_norm": 0.0818713903427124, "learning_rate": 8.801550654003263e-06, "loss": 0.0023, "step": 55470 }, { "epoch": 0.9077967765687638, "grad_norm": 0.13284772634506226, "learning_rate": 8.800932242681605e-06, "loss": 0.0027, "step": 55480 }, { "epoch": 0.9079604025198397, "grad_norm": 0.05722169205546379, "learning_rate": 8.80031369358529e-06, "loss": 0.0019, "step": 55490 }, { "epoch": 0.9081240284709154, "grad_norm": 0.07156434655189514, "learning_rate": 8.799695006736735e-06, "loss": 0.0033, "step": 55500 }, { "epoch": 0.9082876544219913, "grad_norm": 0.18691639602184296, "learning_rate": 8.799076182158368e-06, "loss": 0.0048, "step": 55510 }, { "epoch": 0.9084512803730672, "grad_norm": 0.10891542583703995, "learning_rate": 8.798457219872622e-06, "loss": 0.0021, "step": 55520 }, { "epoch": 0.908614906324143, "grad_norm": 0.4028400480747223, "learning_rate": 8.797838119901931e-06, "loss": 0.004, "step": 55530 }, { "epoch": 0.9087785322752189, "grad_norm": 0.38665276765823364, "learning_rate": 8.797218882268734e-06, "loss": 0.0046, "step": 55540 }, { "epoch": 0.9089421582262946, "grad_norm": 0.09540103375911713, "learning_rate": 8.796599506995481e-06, "loss": 0.0021, "step": 55550 }, { "epoch": 0.9091057841773705, "grad_norm": 0.04197270795702934, "learning_rate": 8.795979994104619e-06, "loss": 0.0035, "step": 55560 }, { "epoch": 0.9092694101284464, "grad_norm": 0.0794285386800766, "learning_rate": 8.795360343618608e-06, "loss": 0.002, "step": 55570 }, { "epoch": 0.9094330360795222, "grad_norm": 0.16556252539157867, "learning_rate": 8.794740555559905e-06, "loss": 0.0026, "step": 55580 }, { "epoch": 0.9095966620305981, "grad_norm": 0.05589031055569649, "learning_rate": 8.794120629950977e-06, "loss": 0.0023, "step": 55590 }, { "epoch": 0.9097602879816739, "grad_norm": 0.2827475070953369, "learning_rate": 8.793500566814295e-06, "loss": 0.0042, "step": 55600 }, { "epoch": 0.9099239139327497, "grad_norm": 0.12089551985263824, "learning_rate": 8.792880366172337e-06, "loss": 0.0029, "step": 55610 }, { "epoch": 0.9100875398838256, "grad_norm": 0.05288970470428467, "learning_rate": 8.79226002804758e-06, "loss": 0.0024, "step": 55620 }, { "epoch": 0.9102511658349014, "grad_norm": 0.014621092937886715, "learning_rate": 8.791639552462509e-06, "loss": 0.0027, "step": 55630 }, { "epoch": 0.9104147917859773, "grad_norm": 0.07463811337947845, "learning_rate": 8.791018939439622e-06, "loss": 0.002, "step": 55640 }, { "epoch": 0.9105784177370531, "grad_norm": 0.08513284474611282, "learning_rate": 8.790398189001407e-06, "loss": 0.0025, "step": 55650 }, { "epoch": 0.9107420436881289, "grad_norm": 0.044549815356731415, "learning_rate": 8.789777301170368e-06, "loss": 0.0032, "step": 55660 }, { "epoch": 0.9109056696392048, "grad_norm": 0.11088958382606506, "learning_rate": 8.789156275969011e-06, "loss": 0.0032, "step": 55670 }, { "epoch": 0.9110692955902806, "grad_norm": 0.12103458493947983, "learning_rate": 8.788535113419844e-06, "loss": 0.0029, "step": 55680 }, { "epoch": 0.9112329215413565, "grad_norm": 0.19530555605888367, "learning_rate": 8.787913813545388e-06, "loss": 0.0047, "step": 55690 }, { "epoch": 0.9113965474924323, "grad_norm": 0.07805991917848587, "learning_rate": 8.787292376368158e-06, "loss": 0.0014, "step": 55700 }, { "epoch": 0.9115601734435081, "grad_norm": 0.04136421158909798, "learning_rate": 8.786670801910682e-06, "loss": 0.0021, "step": 55710 }, { "epoch": 0.911723799394584, "grad_norm": 0.07676675170660019, "learning_rate": 8.786049090195491e-06, "loss": 0.0016, "step": 55720 }, { "epoch": 0.9118874253456598, "grad_norm": 0.11405820399522781, "learning_rate": 8.785427241245118e-06, "loss": 0.0021, "step": 55730 }, { "epoch": 0.9120510512967357, "grad_norm": 0.16153748333454132, "learning_rate": 8.78480525508211e-06, "loss": 0.0037, "step": 55740 }, { "epoch": 0.9122146772478115, "grad_norm": 0.07060796022415161, "learning_rate": 8.784183131729005e-06, "loss": 0.0021, "step": 55750 }, { "epoch": 0.9123783031988874, "grad_norm": 0.2343321293592453, "learning_rate": 8.783560871208356e-06, "loss": 0.0032, "step": 55760 }, { "epoch": 0.9125419291499632, "grad_norm": 0.030766038224101067, "learning_rate": 8.78293847354272e-06, "loss": 0.0018, "step": 55770 }, { "epoch": 0.912705555101039, "grad_norm": 0.14081358909606934, "learning_rate": 8.782315938754657e-06, "loss": 0.0026, "step": 55780 }, { "epoch": 0.9128691810521149, "grad_norm": 0.029263710603117943, "learning_rate": 8.78169326686673e-06, "loss": 0.0018, "step": 55790 }, { "epoch": 0.9130328070031907, "grad_norm": 0.13216765224933624, "learning_rate": 8.781070457901513e-06, "loss": 0.0031, "step": 55800 }, { "epoch": 0.9131964329542666, "grad_norm": 0.06182245910167694, "learning_rate": 8.780447511881578e-06, "loss": 0.0034, "step": 55810 }, { "epoch": 0.9133600589053424, "grad_norm": 0.051942311227321625, "learning_rate": 8.779824428829507e-06, "loss": 0.0038, "step": 55820 }, { "epoch": 0.9135236848564182, "grad_norm": 0.13555654883384705, "learning_rate": 8.779201208767885e-06, "loss": 0.0031, "step": 55830 }, { "epoch": 0.9136873108074941, "grad_norm": 0.18750157952308655, "learning_rate": 8.778577851719302e-06, "loss": 0.0028, "step": 55840 }, { "epoch": 0.9138509367585699, "grad_norm": 0.2095109522342682, "learning_rate": 8.777954357706356e-06, "loss": 0.0042, "step": 55850 }, { "epoch": 0.9140145627096458, "grad_norm": 0.18244940042495728, "learning_rate": 8.777330726751643e-06, "loss": 0.003, "step": 55860 }, { "epoch": 0.9141781886607215, "grad_norm": 0.32803255319595337, "learning_rate": 8.77670695887777e-06, "loss": 0.0028, "step": 55870 }, { "epoch": 0.9143418146117974, "grad_norm": 0.09819483757019043, "learning_rate": 8.776083054107346e-06, "loss": 0.003, "step": 55880 }, { "epoch": 0.9145054405628733, "grad_norm": 0.2654581665992737, "learning_rate": 8.775459012462986e-06, "loss": 0.0035, "step": 55890 }, { "epoch": 0.9146690665139491, "grad_norm": 0.08447451889514923, "learning_rate": 8.774834833967312e-06, "loss": 0.0023, "step": 55900 }, { "epoch": 0.914832692465025, "grad_norm": 0.16487756371498108, "learning_rate": 8.774210518642948e-06, "loss": 0.0037, "step": 55910 }, { "epoch": 0.9149963184161007, "grad_norm": 0.1917775273323059, "learning_rate": 8.773586066512524e-06, "loss": 0.0026, "step": 55920 }, { "epoch": 0.9151599443671766, "grad_norm": 0.1459125280380249, "learning_rate": 8.772961477598674e-06, "loss": 0.002, "step": 55930 }, { "epoch": 0.9153235703182525, "grad_norm": 0.44084975123405457, "learning_rate": 8.772336751924038e-06, "loss": 0.0019, "step": 55940 }, { "epoch": 0.9154871962693283, "grad_norm": 0.15338511765003204, "learning_rate": 8.771711889511263e-06, "loss": 0.0026, "step": 55950 }, { "epoch": 0.9156508222204042, "grad_norm": 0.20346993207931519, "learning_rate": 8.771086890382996e-06, "loss": 0.0046, "step": 55960 }, { "epoch": 0.91581444817148, "grad_norm": 0.3131324350833893, "learning_rate": 8.770461754561891e-06, "loss": 0.0038, "step": 55970 }, { "epoch": 0.9159780741225558, "grad_norm": 0.1511867791414261, "learning_rate": 8.769836482070613e-06, "loss": 0.005, "step": 55980 }, { "epoch": 0.9161417000736317, "grad_norm": 0.27550679445266724, "learning_rate": 8.769211072931823e-06, "loss": 0.0042, "step": 55990 }, { "epoch": 0.9163053260247075, "grad_norm": 0.08323934674263, "learning_rate": 8.768585527168187e-06, "loss": 0.0019, "step": 56000 }, { "epoch": 0.9164689519757834, "grad_norm": 0.3180956542491913, "learning_rate": 8.767959844802387e-06, "loss": 0.0043, "step": 56010 }, { "epoch": 0.9166325779268591, "grad_norm": 0.04906865581870079, "learning_rate": 8.767334025857097e-06, "loss": 0.0037, "step": 56020 }, { "epoch": 0.916796203877935, "grad_norm": 0.12980033457279205, "learning_rate": 8.766708070355003e-06, "loss": 0.002, "step": 56030 }, { "epoch": 0.9169598298290109, "grad_norm": 0.3076556324958801, "learning_rate": 8.766081978318794e-06, "loss": 0.0063, "step": 56040 }, { "epoch": 0.9171234557800867, "grad_norm": 0.21607230603694916, "learning_rate": 8.765455749771166e-06, "loss": 0.0033, "step": 56050 }, { "epoch": 0.9172870817311626, "grad_norm": 0.13356612622737885, "learning_rate": 8.764829384734817e-06, "loss": 0.0038, "step": 56060 }, { "epoch": 0.9174507076822384, "grad_norm": 0.08096982538700104, "learning_rate": 8.76420288323245e-06, "loss": 0.0032, "step": 56070 }, { "epoch": 0.9176143336333142, "grad_norm": 0.19389745593070984, "learning_rate": 8.763576245286777e-06, "loss": 0.0026, "step": 56080 }, { "epoch": 0.9177779595843901, "grad_norm": 0.08451075851917267, "learning_rate": 8.76294947092051e-06, "loss": 0.0031, "step": 56090 }, { "epoch": 0.9179415855354659, "grad_norm": 0.07520750164985657, "learning_rate": 8.762322560156369e-06, "loss": 0.0026, "step": 56100 }, { "epoch": 0.9181052114865418, "grad_norm": 0.16138632595539093, "learning_rate": 8.761695513017077e-06, "loss": 0.0046, "step": 56110 }, { "epoch": 0.9182688374376176, "grad_norm": 0.15444326400756836, "learning_rate": 8.761068329525363e-06, "loss": 0.0022, "step": 56120 }, { "epoch": 0.9184324633886934, "grad_norm": 0.06740682572126389, "learning_rate": 8.760441009703964e-06, "loss": 0.0041, "step": 56130 }, { "epoch": 0.9185960893397693, "grad_norm": 0.130819171667099, "learning_rate": 8.759813553575614e-06, "loss": 0.0025, "step": 56140 }, { "epoch": 0.9187597152908451, "grad_norm": 0.11753668636083603, "learning_rate": 8.75918596116306e-06, "loss": 0.0015, "step": 56150 }, { "epoch": 0.918923341241921, "grad_norm": 0.16721569001674652, "learning_rate": 8.758558232489051e-06, "loss": 0.0017, "step": 56160 }, { "epoch": 0.9190869671929968, "grad_norm": 0.1588633954524994, "learning_rate": 8.75793036757634e-06, "loss": 0.002, "step": 56170 }, { "epoch": 0.9192505931440726, "grad_norm": 0.1676766276359558, "learning_rate": 8.757302366447682e-06, "loss": 0.0037, "step": 56180 }, { "epoch": 0.9194142190951485, "grad_norm": 0.03938222676515579, "learning_rate": 8.756674229125845e-06, "loss": 0.0018, "step": 56190 }, { "epoch": 0.9195778450462243, "grad_norm": 0.069462351500988, "learning_rate": 8.756045955633598e-06, "loss": 0.0021, "step": 56200 }, { "epoch": 0.9197414709973002, "grad_norm": 0.0305546373128891, "learning_rate": 8.75541754599371e-06, "loss": 0.0036, "step": 56210 }, { "epoch": 0.919905096948376, "grad_norm": 0.07955552637577057, "learning_rate": 8.754789000228963e-06, "loss": 0.0039, "step": 56220 }, { "epoch": 0.9200687228994519, "grad_norm": 0.41352513432502747, "learning_rate": 8.75416031836214e-06, "loss": 0.003, "step": 56230 }, { "epoch": 0.9202323488505277, "grad_norm": 0.06355006992816925, "learning_rate": 8.753531500416027e-06, "loss": 0.0025, "step": 56240 }, { "epoch": 0.9203959748016035, "grad_norm": 0.15708458423614502, "learning_rate": 8.752902546413418e-06, "loss": 0.0022, "step": 56250 }, { "epoch": 0.9205596007526794, "grad_norm": 0.06660819053649902, "learning_rate": 8.752273456377114e-06, "loss": 0.0018, "step": 56260 }, { "epoch": 0.9207232267037552, "grad_norm": 0.35426750779151917, "learning_rate": 8.751644230329913e-06, "loss": 0.0035, "step": 56270 }, { "epoch": 0.9208868526548311, "grad_norm": 0.11654836684465408, "learning_rate": 8.751014868294628e-06, "loss": 0.0025, "step": 56280 }, { "epoch": 0.9210504786059069, "grad_norm": 0.0773138627409935, "learning_rate": 8.750385370294065e-06, "loss": 0.0033, "step": 56290 }, { "epoch": 0.9212141045569827, "grad_norm": 0.20983535051345825, "learning_rate": 8.74975573635105e-06, "loss": 0.0027, "step": 56300 }, { "epoch": 0.9213777305080586, "grad_norm": 0.13140127062797546, "learning_rate": 8.7491259664884e-06, "loss": 0.0022, "step": 56310 }, { "epoch": 0.9215413564591344, "grad_norm": 0.25688043236732483, "learning_rate": 8.748496060728945e-06, "loss": 0.0039, "step": 56320 }, { "epoch": 0.9217049824102103, "grad_norm": 0.03212318569421768, "learning_rate": 8.74786601909552e-06, "loss": 0.0016, "step": 56330 }, { "epoch": 0.9218686083612861, "grad_norm": 0.20308706164360046, "learning_rate": 8.747235841610956e-06, "loss": 0.003, "step": 56340 }, { "epoch": 0.9220322343123619, "grad_norm": 0.3436363637447357, "learning_rate": 8.7466055282981e-06, "loss": 0.0024, "step": 56350 }, { "epoch": 0.9221958602634378, "grad_norm": 0.4917292296886444, "learning_rate": 8.745975079179799e-06, "loss": 0.003, "step": 56360 }, { "epoch": 0.9223594862145136, "grad_norm": 0.07783607393503189, "learning_rate": 8.745344494278903e-06, "loss": 0.0041, "step": 56370 }, { "epoch": 0.9225231121655895, "grad_norm": 0.10799320042133331, "learning_rate": 8.744713773618272e-06, "loss": 0.0024, "step": 56380 }, { "epoch": 0.9226867381166654, "grad_norm": 0.2581583857536316, "learning_rate": 8.744082917220766e-06, "loss": 0.0032, "step": 56390 }, { "epoch": 0.9228503640677411, "grad_norm": 0.0991620346903801, "learning_rate": 8.743451925109254e-06, "loss": 0.0027, "step": 56400 }, { "epoch": 0.923013990018817, "grad_norm": 0.06590348482131958, "learning_rate": 8.742820797306606e-06, "loss": 0.0022, "step": 56410 }, { "epoch": 0.9231776159698928, "grad_norm": 0.1018282026052475, "learning_rate": 8.7421895338357e-06, "loss": 0.0021, "step": 56420 }, { "epoch": 0.9233412419209687, "grad_norm": 0.19533893465995789, "learning_rate": 8.741558134719418e-06, "loss": 0.0018, "step": 56430 }, { "epoch": 0.9235048678720446, "grad_norm": 0.2927216589450836, "learning_rate": 8.740926599980646e-06, "loss": 0.0036, "step": 56440 }, { "epoch": 0.9236684938231203, "grad_norm": 0.11482112854719162, "learning_rate": 8.740294929642275e-06, "loss": 0.002, "step": 56450 }, { "epoch": 0.9238321197741962, "grad_norm": 0.26916325092315674, "learning_rate": 8.739663123727203e-06, "loss": 0.0025, "step": 56460 }, { "epoch": 0.923995745725272, "grad_norm": 0.039856091141700745, "learning_rate": 8.739031182258331e-06, "loss": 0.0027, "step": 56470 }, { "epoch": 0.9241593716763479, "grad_norm": 0.07528948038816452, "learning_rate": 8.738399105258563e-06, "loss": 0.004, "step": 56480 }, { "epoch": 0.9243229976274238, "grad_norm": 0.13336074352264404, "learning_rate": 8.737766892750817e-06, "loss": 0.0039, "step": 56490 }, { "epoch": 0.9244866235784995, "grad_norm": 0.1255578100681305, "learning_rate": 8.737134544758001e-06, "loss": 0.0029, "step": 56500 }, { "epoch": 0.9246502495295754, "grad_norm": 0.011512682773172855, "learning_rate": 8.736502061303041e-06, "loss": 0.0023, "step": 56510 }, { "epoch": 0.9248138754806512, "grad_norm": 0.1771090030670166, "learning_rate": 8.735869442408862e-06, "loss": 0.0024, "step": 56520 }, { "epoch": 0.9249775014317271, "grad_norm": 0.2767779529094696, "learning_rate": 8.735236688098393e-06, "loss": 0.003, "step": 56530 }, { "epoch": 0.925141127382803, "grad_norm": 0.22273513674736023, "learning_rate": 8.734603798394571e-06, "loss": 0.0019, "step": 56540 }, { "epoch": 0.9253047533338787, "grad_norm": 0.028323331847786903, "learning_rate": 8.73397077332034e-06, "loss": 0.0032, "step": 56550 }, { "epoch": 0.9254683792849546, "grad_norm": 0.07312364876270294, "learning_rate": 8.733337612898642e-06, "loss": 0.0024, "step": 56560 }, { "epoch": 0.9256320052360304, "grad_norm": 0.3575909435749054, "learning_rate": 8.732704317152427e-06, "loss": 0.0031, "step": 56570 }, { "epoch": 0.9257956311871063, "grad_norm": 0.1104685440659523, "learning_rate": 8.732070886104653e-06, "loss": 0.0034, "step": 56580 }, { "epoch": 0.9259592571381822, "grad_norm": 0.14856906235218048, "learning_rate": 8.73143731977828e-06, "loss": 0.0028, "step": 56590 }, { "epoch": 0.9261228830892579, "grad_norm": 0.3008265793323517, "learning_rate": 8.73080361819627e-06, "loss": 0.0046, "step": 56600 }, { "epoch": 0.9262865090403338, "grad_norm": 0.1314973086118698, "learning_rate": 8.730169781381597e-06, "loss": 0.004, "step": 56610 }, { "epoch": 0.9264501349914096, "grad_norm": 0.10367554426193237, "learning_rate": 8.729535809357236e-06, "loss": 0.0031, "step": 56620 }, { "epoch": 0.9266137609424855, "grad_norm": 0.24642257392406464, "learning_rate": 8.728901702146164e-06, "loss": 0.0024, "step": 56630 }, { "epoch": 0.9267773868935614, "grad_norm": 0.022402064874768257, "learning_rate": 8.728267459771366e-06, "loss": 0.0042, "step": 56640 }, { "epoch": 0.9269410128446371, "grad_norm": 0.19319544732570648, "learning_rate": 8.727633082255837e-06, "loss": 0.0037, "step": 56650 }, { "epoch": 0.927104638795713, "grad_norm": 0.14075158536434174, "learning_rate": 8.726998569622565e-06, "loss": 0.0023, "step": 56660 }, { "epoch": 0.9272682647467888, "grad_norm": 0.15454579889774323, "learning_rate": 8.726363921894555e-06, "loss": 0.0023, "step": 56670 }, { "epoch": 0.9274318906978647, "grad_norm": 0.08716664463281631, "learning_rate": 8.725729139094805e-06, "loss": 0.0022, "step": 56680 }, { "epoch": 0.9275955166489406, "grad_norm": 0.026017392054200172, "learning_rate": 8.725094221246331e-06, "loss": 0.0016, "step": 56690 }, { "epoch": 0.9277591426000164, "grad_norm": 0.11958318203687668, "learning_rate": 8.724459168372144e-06, "loss": 0.0023, "step": 56700 }, { "epoch": 0.9279227685510922, "grad_norm": 0.14724621176719666, "learning_rate": 8.723823980495264e-06, "loss": 0.0027, "step": 56710 }, { "epoch": 0.928086394502168, "grad_norm": 0.23805959522724152, "learning_rate": 8.723188657638715e-06, "loss": 0.0043, "step": 56720 }, { "epoch": 0.9282500204532439, "grad_norm": 0.10530497133731842, "learning_rate": 8.722553199825525e-06, "loss": 0.0044, "step": 56730 }, { "epoch": 0.9284136464043197, "grad_norm": 0.36908823251724243, "learning_rate": 8.721917607078729e-06, "loss": 0.0028, "step": 56740 }, { "epoch": 0.9285772723553956, "grad_norm": 0.4372231364250183, "learning_rate": 8.721281879421363e-06, "loss": 0.0025, "step": 56750 }, { "epoch": 0.9287408983064714, "grad_norm": 0.07128158956766129, "learning_rate": 8.720646016876474e-06, "loss": 0.0034, "step": 56760 }, { "epoch": 0.9289045242575472, "grad_norm": 0.361255943775177, "learning_rate": 8.72001001946711e-06, "loss": 0.0051, "step": 56770 }, { "epoch": 0.9290681502086231, "grad_norm": 0.11073429137468338, "learning_rate": 8.719373887216322e-06, "loss": 0.0021, "step": 56780 }, { "epoch": 0.9292317761596989, "grad_norm": 0.049818575382232666, "learning_rate": 8.718737620147173e-06, "loss": 0.0039, "step": 56790 }, { "epoch": 0.9293954021107748, "grad_norm": 0.2616579234600067, "learning_rate": 8.71810121828272e-06, "loss": 0.003, "step": 56800 }, { "epoch": 0.9295590280618506, "grad_norm": 0.20634043216705322, "learning_rate": 8.717464681646035e-06, "loss": 0.0033, "step": 56810 }, { "epoch": 0.9297226540129264, "grad_norm": 0.14242428541183472, "learning_rate": 8.71682801026019e-06, "loss": 0.0025, "step": 56820 }, { "epoch": 0.9298862799640023, "grad_norm": 0.26235896348953247, "learning_rate": 8.716191204148262e-06, "loss": 0.0024, "step": 56830 }, { "epoch": 0.9300499059150781, "grad_norm": 0.19843776524066925, "learning_rate": 8.715554263333337e-06, "loss": 0.0029, "step": 56840 }, { "epoch": 0.930213531866154, "grad_norm": 0.26787033677101135, "learning_rate": 8.714917187838497e-06, "loss": 0.0022, "step": 56850 }, { "epoch": 0.9303771578172298, "grad_norm": 0.1914227306842804, "learning_rate": 8.714279977686839e-06, "loss": 0.0042, "step": 56860 }, { "epoch": 0.9305407837683056, "grad_norm": 0.09505842626094818, "learning_rate": 8.713642632901459e-06, "loss": 0.0017, "step": 56870 }, { "epoch": 0.9307044097193815, "grad_norm": 0.243962362408638, "learning_rate": 8.713005153505458e-06, "loss": 0.0021, "step": 56880 }, { "epoch": 0.9308680356704573, "grad_norm": 0.05524337291717529, "learning_rate": 8.712367539521945e-06, "loss": 0.0033, "step": 56890 }, { "epoch": 0.9310316616215332, "grad_norm": 0.15506170690059662, "learning_rate": 8.711729790974031e-06, "loss": 0.0012, "step": 56900 }, { "epoch": 0.931195287572609, "grad_norm": 0.15791694819927216, "learning_rate": 8.711091907884833e-06, "loss": 0.0037, "step": 56910 }, { "epoch": 0.9313589135236848, "grad_norm": 0.05830807983875275, "learning_rate": 8.710453890277473e-06, "loss": 0.0035, "step": 56920 }, { "epoch": 0.9315225394747607, "grad_norm": 0.22038206458091736, "learning_rate": 8.709815738175077e-06, "loss": 0.0027, "step": 56930 }, { "epoch": 0.9316861654258365, "grad_norm": 0.21177567541599274, "learning_rate": 8.709177451600776e-06, "loss": 0.0034, "step": 56940 }, { "epoch": 0.9318497913769124, "grad_norm": 0.07490844279527664, "learning_rate": 8.70853903057771e-06, "loss": 0.0026, "step": 56950 }, { "epoch": 0.9320134173279883, "grad_norm": 0.15769219398498535, "learning_rate": 8.707900475129014e-06, "loss": 0.0029, "step": 56960 }, { "epoch": 0.932177043279064, "grad_norm": 0.25551092624664307, "learning_rate": 8.707261785277839e-06, "loss": 0.0038, "step": 56970 }, { "epoch": 0.9323406692301399, "grad_norm": 0.06905394047498703, "learning_rate": 8.706622961047333e-06, "loss": 0.0017, "step": 56980 }, { "epoch": 0.9325042951812157, "grad_norm": 0.15709897875785828, "learning_rate": 8.705984002460655e-06, "loss": 0.0042, "step": 56990 }, { "epoch": 0.9326679211322916, "grad_norm": 0.08229276537895203, "learning_rate": 8.705344909540965e-06, "loss": 0.0026, "step": 57000 }, { "epoch": 0.9328315470833675, "grad_norm": 0.02851949632167816, "learning_rate": 8.704705682311424e-06, "loss": 0.0035, "step": 57010 }, { "epoch": 0.9329951730344432, "grad_norm": 0.059260979294776917, "learning_rate": 8.70406632079521e-06, "loss": 0.0027, "step": 57020 }, { "epoch": 0.9331587989855191, "grad_norm": 0.14293326437473297, "learning_rate": 8.703426825015493e-06, "loss": 0.0026, "step": 57030 }, { "epoch": 0.9333224249365949, "grad_norm": 0.09775244444608688, "learning_rate": 8.702787194995455e-06, "loss": 0.0022, "step": 57040 }, { "epoch": 0.9334860508876708, "grad_norm": 0.11891881376504898, "learning_rate": 8.70214743075828e-06, "loss": 0.0045, "step": 57050 }, { "epoch": 0.9336496768387467, "grad_norm": 0.21938017010688782, "learning_rate": 8.701507532327157e-06, "loss": 0.0044, "step": 57060 }, { "epoch": 0.9338133027898224, "grad_norm": 0.2713831067085266, "learning_rate": 8.700867499725283e-06, "loss": 0.003, "step": 57070 }, { "epoch": 0.9339769287408983, "grad_norm": 0.09283127635717392, "learning_rate": 8.700227332975859e-06, "loss": 0.0034, "step": 57080 }, { "epoch": 0.9341405546919741, "grad_norm": 0.01693669892847538, "learning_rate": 8.699587032102084e-06, "loss": 0.0036, "step": 57090 }, { "epoch": 0.93430418064305, "grad_norm": 0.3564091622829437, "learning_rate": 8.698946597127174e-06, "loss": 0.0034, "step": 57100 }, { "epoch": 0.9344678065941259, "grad_norm": 0.1506435126066208, "learning_rate": 8.698306028074339e-06, "loss": 0.0038, "step": 57110 }, { "epoch": 0.9346314325452016, "grad_norm": 0.24460448324680328, "learning_rate": 8.697665324966798e-06, "loss": 0.0042, "step": 57120 }, { "epoch": 0.9347950584962775, "grad_norm": 0.16294312477111816, "learning_rate": 8.697024487827776e-06, "loss": 0.0032, "step": 57130 }, { "epoch": 0.9349586844473533, "grad_norm": 0.05100912228226662, "learning_rate": 8.696383516680505e-06, "loss": 0.0022, "step": 57140 }, { "epoch": 0.9351223103984292, "grad_norm": 0.12327504903078079, "learning_rate": 8.695742411548212e-06, "loss": 0.0032, "step": 57150 }, { "epoch": 0.9352859363495051, "grad_norm": 0.31645238399505615, "learning_rate": 8.695101172454141e-06, "loss": 0.0039, "step": 57160 }, { "epoch": 0.9354495623005809, "grad_norm": 0.15630635619163513, "learning_rate": 8.694459799421533e-06, "loss": 0.0027, "step": 57170 }, { "epoch": 0.9356131882516567, "grad_norm": 0.10846993327140808, "learning_rate": 8.693818292473637e-06, "loss": 0.0024, "step": 57180 }, { "epoch": 0.9357768142027325, "grad_norm": 0.21625679731369019, "learning_rate": 8.693176651633704e-06, "loss": 0.002, "step": 57190 }, { "epoch": 0.9359404401538084, "grad_norm": 0.09098803251981735, "learning_rate": 8.692534876924996e-06, "loss": 0.0027, "step": 57200 }, { "epoch": 0.9361040661048843, "grad_norm": 0.09953711181879044, "learning_rate": 8.691892968370771e-06, "loss": 0.003, "step": 57210 }, { "epoch": 0.93626769205596, "grad_norm": 0.050231534987688065, "learning_rate": 8.691250925994301e-06, "loss": 0.0023, "step": 57220 }, { "epoch": 0.9364313180070359, "grad_norm": 0.21652771532535553, "learning_rate": 8.690608749818857e-06, "loss": 0.0022, "step": 57230 }, { "epoch": 0.9365949439581117, "grad_norm": 0.13481707870960236, "learning_rate": 8.689966439867715e-06, "loss": 0.0032, "step": 57240 }, { "epoch": 0.9367585699091876, "grad_norm": 0.10447494685649872, "learning_rate": 8.689323996164158e-06, "loss": 0.003, "step": 57250 }, { "epoch": 0.9369221958602635, "grad_norm": 0.32264572381973267, "learning_rate": 8.688681418731474e-06, "loss": 0.004, "step": 57260 }, { "epoch": 0.9370858218113393, "grad_norm": 0.3122585713863373, "learning_rate": 8.688038707592955e-06, "loss": 0.0041, "step": 57270 }, { "epoch": 0.9372494477624151, "grad_norm": 0.24094940721988678, "learning_rate": 8.687395862771894e-06, "loss": 0.003, "step": 57280 }, { "epoch": 0.9374130737134909, "grad_norm": 0.11071789264678955, "learning_rate": 8.686752884291599e-06, "loss": 0.0022, "step": 57290 }, { "epoch": 0.9375766996645668, "grad_norm": 0.4375678300857544, "learning_rate": 8.686109772175368e-06, "loss": 0.0026, "step": 57300 }, { "epoch": 0.9377403256156427, "grad_norm": 0.12545320391654968, "learning_rate": 8.68546652644652e-06, "loss": 0.0037, "step": 57310 }, { "epoch": 0.9379039515667185, "grad_norm": 0.26050540804862976, "learning_rate": 8.68482314712837e-06, "loss": 0.003, "step": 57320 }, { "epoch": 0.9380675775177943, "grad_norm": 0.1724195033311844, "learning_rate": 8.684179634244234e-06, "loss": 0.0018, "step": 57330 }, { "epoch": 0.9382312034688701, "grad_norm": 0.11418847739696503, "learning_rate": 8.683535987817441e-06, "loss": 0.0041, "step": 57340 }, { "epoch": 0.938394829419946, "grad_norm": 0.09722284227609634, "learning_rate": 8.682892207871322e-06, "loss": 0.0028, "step": 57350 }, { "epoch": 0.9385584553710219, "grad_norm": 0.03472256287932396, "learning_rate": 8.682248294429214e-06, "loss": 0.0026, "step": 57360 }, { "epoch": 0.9387220813220977, "grad_norm": 0.2636689245700836, "learning_rate": 8.681604247514455e-06, "loss": 0.0026, "step": 57370 }, { "epoch": 0.9388857072731736, "grad_norm": 0.22415636479854584, "learning_rate": 8.68096006715039e-06, "loss": 0.0037, "step": 57380 }, { "epoch": 0.9390493332242493, "grad_norm": 0.027264980599284172, "learning_rate": 8.680315753360368e-06, "loss": 0.0025, "step": 57390 }, { "epoch": 0.9392129591753252, "grad_norm": 0.08876129984855652, "learning_rate": 8.679671306167748e-06, "loss": 0.0023, "step": 57400 }, { "epoch": 0.9393765851264011, "grad_norm": 0.11450862884521484, "learning_rate": 8.679026725595886e-06, "loss": 0.0032, "step": 57410 }, { "epoch": 0.9395402110774769, "grad_norm": 0.3536887466907501, "learning_rate": 8.678382011668145e-06, "loss": 0.0027, "step": 57420 }, { "epoch": 0.9397038370285528, "grad_norm": 0.2696461081504822, "learning_rate": 8.6777371644079e-06, "loss": 0.003, "step": 57430 }, { "epoch": 0.9398674629796285, "grad_norm": 0.21755576133728027, "learning_rate": 8.67709218383852e-06, "loss": 0.0033, "step": 57440 }, { "epoch": 0.9400310889307044, "grad_norm": 0.16868865489959717, "learning_rate": 8.676447069983387e-06, "loss": 0.0038, "step": 57450 }, { "epoch": 0.9401947148817803, "grad_norm": 0.05901087820529938, "learning_rate": 8.675801822865883e-06, "loss": 0.0025, "step": 57460 }, { "epoch": 0.9403583408328561, "grad_norm": 0.2886706292629242, "learning_rate": 8.6751564425094e-06, "loss": 0.0061, "step": 57470 }, { "epoch": 0.940521966783932, "grad_norm": 0.6585190296173096, "learning_rate": 8.674510928937327e-06, "loss": 0.0045, "step": 57480 }, { "epoch": 0.9406855927350077, "grad_norm": 0.2559524178504944, "learning_rate": 8.673865282173064e-06, "loss": 0.002, "step": 57490 }, { "epoch": 0.9408492186860836, "grad_norm": 0.15461988747119904, "learning_rate": 8.673219502240014e-06, "loss": 0.0034, "step": 57500 }, { "epoch": 0.9410128446371595, "grad_norm": 0.14877702295780182, "learning_rate": 8.672573589161586e-06, "loss": 0.0029, "step": 57510 }, { "epoch": 0.9411764705882353, "grad_norm": 0.13619624078273773, "learning_rate": 8.671927542961194e-06, "loss": 0.0027, "step": 57520 }, { "epoch": 0.9413400965393112, "grad_norm": 0.1986972689628601, "learning_rate": 8.671281363662252e-06, "loss": 0.0017, "step": 57530 }, { "epoch": 0.9415037224903869, "grad_norm": 0.10938013345003128, "learning_rate": 8.670635051288182e-06, "loss": 0.0058, "step": 57540 }, { "epoch": 0.9416673484414628, "grad_norm": 0.08242452889680862, "learning_rate": 8.669988605862419e-06, "loss": 0.0031, "step": 57550 }, { "epoch": 0.9418309743925387, "grad_norm": 0.08062282204627991, "learning_rate": 8.669342027408385e-06, "loss": 0.003, "step": 57560 }, { "epoch": 0.9419946003436145, "grad_norm": 0.16516931354999542, "learning_rate": 8.668695315949524e-06, "loss": 0.0038, "step": 57570 }, { "epoch": 0.9421582262946904, "grad_norm": 0.19662010669708252, "learning_rate": 8.668048471509274e-06, "loss": 0.0036, "step": 57580 }, { "epoch": 0.9423218522457661, "grad_norm": 0.03410712257027626, "learning_rate": 8.667401494111083e-06, "loss": 0.0025, "step": 57590 }, { "epoch": 0.942485478196842, "grad_norm": 0.0899646133184433, "learning_rate": 8.666754383778403e-06, "loss": 0.0027, "step": 57600 }, { "epoch": 0.9426491041479178, "grad_norm": 0.13241948187351227, "learning_rate": 8.666107140534688e-06, "loss": 0.0012, "step": 57610 }, { "epoch": 0.9428127300989937, "grad_norm": 0.17516230046749115, "learning_rate": 8.665459764403402e-06, "loss": 0.0027, "step": 57620 }, { "epoch": 0.9429763560500696, "grad_norm": 0.16082867980003357, "learning_rate": 8.664812255408008e-06, "loss": 0.0035, "step": 57630 }, { "epoch": 0.9431399820011453, "grad_norm": 0.23999162018299103, "learning_rate": 8.664164613571979e-06, "loss": 0.0026, "step": 57640 }, { "epoch": 0.9433036079522212, "grad_norm": 0.2453894168138504, "learning_rate": 8.663516838918788e-06, "loss": 0.0027, "step": 57650 }, { "epoch": 0.943467233903297, "grad_norm": 0.24244779348373413, "learning_rate": 8.662868931471919e-06, "loss": 0.0032, "step": 57660 }, { "epoch": 0.9436308598543729, "grad_norm": 0.431749552488327, "learning_rate": 8.662220891254851e-06, "loss": 0.0024, "step": 57670 }, { "epoch": 0.9437944858054488, "grad_norm": 0.0839313417673111, "learning_rate": 8.66157271829108e-06, "loss": 0.0114, "step": 57680 }, { "epoch": 0.9439581117565246, "grad_norm": 0.12909993529319763, "learning_rate": 8.660924412604098e-06, "loss": 0.0027, "step": 57690 }, { "epoch": 0.9441217377076004, "grad_norm": 0.09456297755241394, "learning_rate": 8.660275974217406e-06, "loss": 0.0028, "step": 57700 }, { "epoch": 0.9442853636586762, "grad_norm": 0.4690358340740204, "learning_rate": 8.659627403154506e-06, "loss": 0.0035, "step": 57710 }, { "epoch": 0.9444489896097521, "grad_norm": 0.22912617027759552, "learning_rate": 8.658978699438909e-06, "loss": 0.005, "step": 57720 }, { "epoch": 0.944612615560828, "grad_norm": 0.1195877194404602, "learning_rate": 8.658329863094128e-06, "loss": 0.0023, "step": 57730 }, { "epoch": 0.9447762415119038, "grad_norm": 0.1556454449892044, "learning_rate": 8.65768089414368e-06, "loss": 0.0022, "step": 57740 }, { "epoch": 0.9449398674629796, "grad_norm": 0.29133787751197815, "learning_rate": 8.657031792611092e-06, "loss": 0.003, "step": 57750 }, { "epoch": 0.9451034934140554, "grad_norm": 0.3909377157688141, "learning_rate": 8.656382558519893e-06, "loss": 0.0046, "step": 57760 }, { "epoch": 0.9452671193651313, "grad_norm": 0.17427799105644226, "learning_rate": 8.655733191893611e-06, "loss": 0.0039, "step": 57770 }, { "epoch": 0.9454307453162072, "grad_norm": 0.22057141363620758, "learning_rate": 8.655083692755789e-06, "loss": 0.0027, "step": 57780 }, { "epoch": 0.945594371267283, "grad_norm": 0.07879922538995743, "learning_rate": 8.654434061129969e-06, "loss": 0.0022, "step": 57790 }, { "epoch": 0.9457579972183588, "grad_norm": 0.04771272838115692, "learning_rate": 8.653784297039694e-06, "loss": 0.0019, "step": 57800 }, { "epoch": 0.9459216231694346, "grad_norm": 0.3877989947795868, "learning_rate": 8.653134400508523e-06, "loss": 0.0044, "step": 57810 }, { "epoch": 0.9460852491205105, "grad_norm": 0.34246203303337097, "learning_rate": 8.652484371560008e-06, "loss": 0.002, "step": 57820 }, { "epoch": 0.9462488750715864, "grad_norm": 0.2034769356250763, "learning_rate": 8.651834210217713e-06, "loss": 0.003, "step": 57830 }, { "epoch": 0.9464125010226622, "grad_norm": 0.2086765170097351, "learning_rate": 8.651183916505205e-06, "loss": 0.0034, "step": 57840 }, { "epoch": 0.946576126973738, "grad_norm": 0.1092161312699318, "learning_rate": 8.650533490446057e-06, "loss": 0.0032, "step": 57850 }, { "epoch": 0.9467397529248138, "grad_norm": 0.07904884964227676, "learning_rate": 8.649882932063843e-06, "loss": 0.003, "step": 57860 }, { "epoch": 0.9469033788758897, "grad_norm": 0.21050243079662323, "learning_rate": 8.649232241382146e-06, "loss": 0.0035, "step": 57870 }, { "epoch": 0.9470670048269656, "grad_norm": 0.09052559733390808, "learning_rate": 8.648581418424549e-06, "loss": 0.0048, "step": 57880 }, { "epoch": 0.9472306307780414, "grad_norm": 0.09729443490505219, "learning_rate": 8.647930463214646e-06, "loss": 0.0028, "step": 57890 }, { "epoch": 0.9473942567291173, "grad_norm": 0.15841154754161835, "learning_rate": 8.64727937577603e-06, "loss": 0.0021, "step": 57900 }, { "epoch": 0.947557882680193, "grad_norm": 0.1975061297416687, "learning_rate": 8.646628156132304e-06, "loss": 0.0052, "step": 57910 }, { "epoch": 0.9477215086312689, "grad_norm": 0.07624470442533493, "learning_rate": 8.645976804307071e-06, "loss": 0.0032, "step": 57920 }, { "epoch": 0.9478851345823448, "grad_norm": 0.2026941180229187, "learning_rate": 8.645325320323942e-06, "loss": 0.0024, "step": 57930 }, { "epoch": 0.9480487605334206, "grad_norm": 0.032917656004428864, "learning_rate": 8.644673704206533e-06, "loss": 0.0016, "step": 57940 }, { "epoch": 0.9482123864844965, "grad_norm": 0.2713351547718048, "learning_rate": 8.644021955978458e-06, "loss": 0.003, "step": 57950 }, { "epoch": 0.9483760124355722, "grad_norm": 0.07845479249954224, "learning_rate": 8.643370075663348e-06, "loss": 0.0033, "step": 57960 }, { "epoch": 0.9485396383866481, "grad_norm": 0.06043951213359833, "learning_rate": 8.64271806328483e-06, "loss": 0.0018, "step": 57970 }, { "epoch": 0.948703264337724, "grad_norm": 0.1575866937637329, "learning_rate": 8.642065918866536e-06, "loss": 0.0047, "step": 57980 }, { "epoch": 0.9488668902887998, "grad_norm": 0.14943064749240875, "learning_rate": 8.641413642432106e-06, "loss": 0.0027, "step": 57990 }, { "epoch": 0.9490305162398757, "grad_norm": 0.1311347484588623, "learning_rate": 8.640761234005183e-06, "loss": 0.0023, "step": 58000 }, { "epoch": 0.9491941421909514, "grad_norm": 0.21922120451927185, "learning_rate": 8.640108693609416e-06, "loss": 0.0025, "step": 58010 }, { "epoch": 0.9493577681420273, "grad_norm": 0.2126881331205368, "learning_rate": 8.639456021268459e-06, "loss": 0.0036, "step": 58020 }, { "epoch": 0.9495213940931032, "grad_norm": 0.29935869574546814, "learning_rate": 8.638803217005966e-06, "loss": 0.002, "step": 58030 }, { "epoch": 0.949685020044179, "grad_norm": 0.2871516942977905, "learning_rate": 8.638150280845603e-06, "loss": 0.0028, "step": 58040 }, { "epoch": 0.9498486459952549, "grad_norm": 0.2636096179485321, "learning_rate": 8.637497212811036e-06, "loss": 0.0018, "step": 58050 }, { "epoch": 0.9500122719463306, "grad_norm": 0.053623076528310776, "learning_rate": 8.63684401292594e-06, "loss": 0.0016, "step": 58060 }, { "epoch": 0.9501758978974065, "grad_norm": 0.34329134225845337, "learning_rate": 8.636190681213988e-06, "loss": 0.0018, "step": 58070 }, { "epoch": 0.9503395238484824, "grad_norm": 0.051948998123407364, "learning_rate": 8.635537217698862e-06, "loss": 0.0038, "step": 58080 }, { "epoch": 0.9505031497995582, "grad_norm": 0.10153673589229584, "learning_rate": 8.634883622404251e-06, "loss": 0.0018, "step": 58090 }, { "epoch": 0.9506667757506341, "grad_norm": 0.21416954696178436, "learning_rate": 8.634229895353843e-06, "loss": 0.0053, "step": 58100 }, { "epoch": 0.9508304017017098, "grad_norm": 0.20511853694915771, "learning_rate": 8.633576036571338e-06, "loss": 0.0023, "step": 58110 }, { "epoch": 0.9509940276527857, "grad_norm": 0.26264485716819763, "learning_rate": 8.632922046080434e-06, "loss": 0.0027, "step": 58120 }, { "epoch": 0.9511576536038616, "grad_norm": 0.09430819749832153, "learning_rate": 8.632267923904836e-06, "loss": 0.0031, "step": 58130 }, { "epoch": 0.9513212795549374, "grad_norm": 0.048129092901945114, "learning_rate": 8.631613670068257e-06, "loss": 0.0035, "step": 58140 }, { "epoch": 0.9514849055060133, "grad_norm": 0.12212701886892319, "learning_rate": 8.63095928459441e-06, "loss": 0.003, "step": 58150 }, { "epoch": 0.951648531457089, "grad_norm": 0.053956203162670135, "learning_rate": 8.630304767507018e-06, "loss": 0.0019, "step": 58160 }, { "epoch": 0.9518121574081649, "grad_norm": 0.29734739661216736, "learning_rate": 8.629650118829803e-06, "loss": 0.0019, "step": 58170 }, { "epoch": 0.9519757833592408, "grad_norm": 0.16599032282829285, "learning_rate": 8.628995338586492e-06, "loss": 0.003, "step": 58180 }, { "epoch": 0.9521394093103166, "grad_norm": 0.25429263710975647, "learning_rate": 8.628340426800825e-06, "loss": 0.0026, "step": 58190 }, { "epoch": 0.9523030352613925, "grad_norm": 0.17607857286930084, "learning_rate": 8.627685383496536e-06, "loss": 0.0026, "step": 58200 }, { "epoch": 0.9524666612124683, "grad_norm": 0.17603527009487152, "learning_rate": 8.62703020869737e-06, "loss": 0.0025, "step": 58210 }, { "epoch": 0.9526302871635441, "grad_norm": 0.0441669337451458, "learning_rate": 8.626374902427079e-06, "loss": 0.0023, "step": 58220 }, { "epoch": 0.95279391311462, "grad_norm": 0.11860113590955734, "learning_rate": 8.625719464709414e-06, "loss": 0.0019, "step": 58230 }, { "epoch": 0.9529575390656958, "grad_norm": 0.40554141998291016, "learning_rate": 8.62506389556813e-06, "loss": 0.0025, "step": 58240 }, { "epoch": 0.9531211650167717, "grad_norm": 0.2965421676635742, "learning_rate": 8.624408195026993e-06, "loss": 0.0026, "step": 58250 }, { "epoch": 0.9532847909678475, "grad_norm": 0.17998652160167694, "learning_rate": 8.62375236310977e-06, "loss": 0.0028, "step": 58260 }, { "epoch": 0.9534484169189233, "grad_norm": 0.0767703726887703, "learning_rate": 8.623096399840234e-06, "loss": 0.0031, "step": 58270 }, { "epoch": 0.9536120428699992, "grad_norm": 0.19731241464614868, "learning_rate": 8.622440305242161e-06, "loss": 0.0022, "step": 58280 }, { "epoch": 0.953775668821075, "grad_norm": 0.2790059447288513, "learning_rate": 8.621784079339334e-06, "loss": 0.002, "step": 58290 }, { "epoch": 0.9539392947721509, "grad_norm": 0.1442946493625641, "learning_rate": 8.621127722155537e-06, "loss": 0.0032, "step": 58300 }, { "epoch": 0.9541029207232267, "grad_norm": 0.31856971979141235, "learning_rate": 8.620471233714565e-06, "loss": 0.0026, "step": 58310 }, { "epoch": 0.9542665466743026, "grad_norm": 0.1451014280319214, "learning_rate": 8.619814614040211e-06, "loss": 0.0026, "step": 58320 }, { "epoch": 0.9544301726253784, "grad_norm": 0.15975750982761383, "learning_rate": 8.619157863156278e-06, "loss": 0.0036, "step": 58330 }, { "epoch": 0.9545937985764542, "grad_norm": 0.1194847971200943, "learning_rate": 8.61850098108657e-06, "loss": 0.0035, "step": 58340 }, { "epoch": 0.9547574245275301, "grad_norm": 0.12278693169355392, "learning_rate": 8.6178439678549e-06, "loss": 0.0041, "step": 58350 }, { "epoch": 0.9549210504786059, "grad_norm": 0.24334728717803955, "learning_rate": 8.61718682348508e-06, "loss": 0.0041, "step": 58360 }, { "epoch": 0.9550846764296818, "grad_norm": 0.37377408146858215, "learning_rate": 8.616529548000934e-06, "loss": 0.0039, "step": 58370 }, { "epoch": 0.9552483023807576, "grad_norm": 0.10425055027008057, "learning_rate": 8.61587214142628e-06, "loss": 0.0045, "step": 58380 }, { "epoch": 0.9554119283318334, "grad_norm": 0.11645399779081345, "learning_rate": 8.615214603784955e-06, "loss": 0.0034, "step": 58390 }, { "epoch": 0.9555755542829093, "grad_norm": 0.09546682238578796, "learning_rate": 8.614556935100786e-06, "loss": 0.0024, "step": 58400 }, { "epoch": 0.9557391802339851, "grad_norm": 0.1266881823539734, "learning_rate": 8.613899135397618e-06, "loss": 0.0014, "step": 58410 }, { "epoch": 0.955902806185061, "grad_norm": 0.10030030459165573, "learning_rate": 8.613241204699291e-06, "loss": 0.0036, "step": 58420 }, { "epoch": 0.9560664321361368, "grad_norm": 0.07037805020809174, "learning_rate": 8.612583143029654e-06, "loss": 0.0033, "step": 58430 }, { "epoch": 0.9562300580872126, "grad_norm": 0.133728489279747, "learning_rate": 8.611924950412562e-06, "loss": 0.0018, "step": 58440 }, { "epoch": 0.9563936840382885, "grad_norm": 0.08356554806232452, "learning_rate": 8.611266626871871e-06, "loss": 0.003, "step": 58450 }, { "epoch": 0.9565573099893643, "grad_norm": 0.11183792352676392, "learning_rate": 8.610608172431444e-06, "loss": 0.0019, "step": 58460 }, { "epoch": 0.9567209359404402, "grad_norm": 0.10106891393661499, "learning_rate": 8.609949587115149e-06, "loss": 0.0032, "step": 58470 }, { "epoch": 0.9568845618915159, "grad_norm": 0.13128845393657684, "learning_rate": 8.609290870946857e-06, "loss": 0.0035, "step": 58480 }, { "epoch": 0.9570481878425918, "grad_norm": 0.10600393265485764, "learning_rate": 8.608632023950448e-06, "loss": 0.0023, "step": 58490 }, { "epoch": 0.9572118137936677, "grad_norm": 0.08835292607545853, "learning_rate": 8.6079730461498e-06, "loss": 0.0029, "step": 58500 }, { "epoch": 0.9573754397447435, "grad_norm": 0.059927403926849365, "learning_rate": 8.6073139375688e-06, "loss": 0.0027, "step": 58510 }, { "epoch": 0.9575390656958194, "grad_norm": 0.11428363621234894, "learning_rate": 8.60665469823134e-06, "loss": 0.0021, "step": 58520 }, { "epoch": 0.9577026916468951, "grad_norm": 0.13040103018283844, "learning_rate": 8.605995328161314e-06, "loss": 0.0027, "step": 58530 }, { "epoch": 0.957866317597971, "grad_norm": 0.27606552839279175, "learning_rate": 8.605335827382626e-06, "loss": 0.002, "step": 58540 }, { "epoch": 0.9580299435490469, "grad_norm": 0.03385399654507637, "learning_rate": 8.60467619591918e-06, "loss": 0.0033, "step": 58550 }, { "epoch": 0.9581935695001227, "grad_norm": 0.07648373395204544, "learning_rate": 8.604016433794884e-06, "loss": 0.002, "step": 58560 }, { "epoch": 0.9583571954511986, "grad_norm": 0.02985292486846447, "learning_rate": 8.603356541033655e-06, "loss": 0.003, "step": 58570 }, { "epoch": 0.9585208214022743, "grad_norm": 0.24194005131721497, "learning_rate": 8.602696517659412e-06, "loss": 0.0046, "step": 58580 }, { "epoch": 0.9586844473533502, "grad_norm": 0.07669809460639954, "learning_rate": 8.602036363696078e-06, "loss": 0.0017, "step": 58590 }, { "epoch": 0.9588480733044261, "grad_norm": 0.2314564287662506, "learning_rate": 8.601376079167585e-06, "loss": 0.0018, "step": 58600 }, { "epoch": 0.9590116992555019, "grad_norm": 0.08914574980735779, "learning_rate": 8.600715664097863e-06, "loss": 0.0027, "step": 58610 }, { "epoch": 0.9591753252065778, "grad_norm": 0.06118670478463173, "learning_rate": 8.600055118510852e-06, "loss": 0.0025, "step": 58620 }, { "epoch": 0.9593389511576536, "grad_norm": 0.18590016663074493, "learning_rate": 8.599394442430497e-06, "loss": 0.003, "step": 58630 }, { "epoch": 0.9595025771087294, "grad_norm": 0.22793278098106384, "learning_rate": 8.598733635880744e-06, "loss": 0.0022, "step": 58640 }, { "epoch": 0.9596662030598053, "grad_norm": 0.28833839297294617, "learning_rate": 8.598072698885546e-06, "loss": 0.0031, "step": 58650 }, { "epoch": 0.9598298290108811, "grad_norm": 0.3035374879837036, "learning_rate": 8.597411631468861e-06, "loss": 0.0044, "step": 58660 }, { "epoch": 0.959993454961957, "grad_norm": 0.0633229985833168, "learning_rate": 8.596750433654649e-06, "loss": 0.0029, "step": 58670 }, { "epoch": 0.9601570809130328, "grad_norm": 0.22874128818511963, "learning_rate": 8.596089105466878e-06, "loss": 0.0041, "step": 58680 }, { "epoch": 0.9603207068641086, "grad_norm": 0.09386830776929855, "learning_rate": 8.595427646929521e-06, "loss": 0.0027, "step": 58690 }, { "epoch": 0.9604843328151845, "grad_norm": 0.11341681331396103, "learning_rate": 8.594766058066555e-06, "loss": 0.003, "step": 58700 }, { "epoch": 0.9606479587662603, "grad_norm": 0.2628450393676758, "learning_rate": 8.594104338901957e-06, "loss": 0.0024, "step": 58710 }, { "epoch": 0.9608115847173362, "grad_norm": 0.0664951279759407, "learning_rate": 8.593442489459714e-06, "loss": 0.0034, "step": 58720 }, { "epoch": 0.960975210668412, "grad_norm": 0.17633040249347687, "learning_rate": 8.59278050976382e-06, "loss": 0.0025, "step": 58730 }, { "epoch": 0.9611388366194878, "grad_norm": 0.13413134217262268, "learning_rate": 8.592118399838265e-06, "loss": 0.003, "step": 58740 }, { "epoch": 0.9613024625705637, "grad_norm": 0.1787150651216507, "learning_rate": 8.591456159707053e-06, "loss": 0.0021, "step": 58750 }, { "epoch": 0.9614660885216395, "grad_norm": 0.12327458709478378, "learning_rate": 8.590793789394186e-06, "loss": 0.0039, "step": 58760 }, { "epoch": 0.9616297144727154, "grad_norm": 0.12161486595869064, "learning_rate": 8.590131288923676e-06, "loss": 0.0025, "step": 58770 }, { "epoch": 0.9617933404237912, "grad_norm": 0.1718994826078415, "learning_rate": 8.589468658319535e-06, "loss": 0.0027, "step": 58780 }, { "epoch": 0.961956966374867, "grad_norm": 0.08942586183547974, "learning_rate": 8.588805897605781e-06, "loss": 0.0027, "step": 58790 }, { "epoch": 0.9621205923259429, "grad_norm": 0.17843519151210785, "learning_rate": 8.58814300680644e-06, "loss": 0.0033, "step": 58800 }, { "epoch": 0.9622842182770187, "grad_norm": 0.06974881887435913, "learning_rate": 8.587479985945537e-06, "loss": 0.0022, "step": 58810 }, { "epoch": 0.9624478442280946, "grad_norm": 0.13175734877586365, "learning_rate": 8.586816835047108e-06, "loss": 0.0022, "step": 58820 }, { "epoch": 0.9626114701791704, "grad_norm": 0.39447152614593506, "learning_rate": 8.58615355413519e-06, "loss": 0.0033, "step": 58830 }, { "epoch": 0.9627750961302463, "grad_norm": 0.07321786880493164, "learning_rate": 8.585490143233824e-06, "loss": 0.0024, "step": 58840 }, { "epoch": 0.9629387220813221, "grad_norm": 0.15211625397205353, "learning_rate": 8.584826602367058e-06, "loss": 0.0049, "step": 58850 }, { "epoch": 0.9631023480323979, "grad_norm": 0.23689234256744385, "learning_rate": 8.584162931558944e-06, "loss": 0.0036, "step": 58860 }, { "epoch": 0.9632659739834738, "grad_norm": 0.10103043913841248, "learning_rate": 8.583499130833536e-06, "loss": 0.0025, "step": 58870 }, { "epoch": 0.9634295999345496, "grad_norm": 0.17015494406223297, "learning_rate": 8.5828352002149e-06, "loss": 0.0035, "step": 58880 }, { "epoch": 0.9635932258856255, "grad_norm": 0.06825874745845795, "learning_rate": 8.582171139727098e-06, "loss": 0.0032, "step": 58890 }, { "epoch": 0.9637568518367013, "grad_norm": 0.4128832519054413, "learning_rate": 8.581506949394201e-06, "loss": 0.0019, "step": 58900 }, { "epoch": 0.9639204777877771, "grad_norm": 0.22421450912952423, "learning_rate": 8.580842629240288e-06, "loss": 0.0024, "step": 58910 }, { "epoch": 0.964084103738853, "grad_norm": 0.13705191016197205, "learning_rate": 8.58017817928943e-06, "loss": 0.002, "step": 58920 }, { "epoch": 0.9642477296899288, "grad_norm": 0.08183940500020981, "learning_rate": 8.579513599565723e-06, "loss": 0.0046, "step": 58930 }, { "epoch": 0.9644113556410047, "grad_norm": 0.05310506373643875, "learning_rate": 8.578848890093249e-06, "loss": 0.003, "step": 58940 }, { "epoch": 0.9645749815920805, "grad_norm": 0.3109532594680786, "learning_rate": 8.578184050896106e-06, "loss": 0.002, "step": 58950 }, { "epoch": 0.9647386075431563, "grad_norm": 0.3090008795261383, "learning_rate": 8.577519081998389e-06, "loss": 0.0028, "step": 58960 }, { "epoch": 0.9649022334942322, "grad_norm": 0.376575231552124, "learning_rate": 8.576853983424206e-06, "loss": 0.0048, "step": 58970 }, { "epoch": 0.965065859445308, "grad_norm": 0.266393780708313, "learning_rate": 8.57618875519766e-06, "loss": 0.0026, "step": 58980 }, { "epoch": 0.9652294853963839, "grad_norm": 0.24883608520030975, "learning_rate": 8.575523397342868e-06, "loss": 0.0031, "step": 58990 }, { "epoch": 0.9653931113474598, "grad_norm": 0.12536068260669708, "learning_rate": 8.574857909883946e-06, "loss": 0.0026, "step": 59000 }, { "epoch": 0.9655567372985355, "grad_norm": 0.1851234883069992, "learning_rate": 8.574192292845019e-06, "loss": 0.0026, "step": 59010 }, { "epoch": 0.9657203632496114, "grad_norm": 0.22310858964920044, "learning_rate": 8.573526546250209e-06, "loss": 0.0028, "step": 59020 }, { "epoch": 0.9658839892006872, "grad_norm": 0.05807534232735634, "learning_rate": 8.572860670123651e-06, "loss": 0.0022, "step": 59030 }, { "epoch": 0.9660476151517631, "grad_norm": 0.06185042858123779, "learning_rate": 8.57219466448948e-06, "loss": 0.0049, "step": 59040 }, { "epoch": 0.966211241102839, "grad_norm": 0.5232572555541992, "learning_rate": 8.571528529371842e-06, "loss": 0.0035, "step": 59050 }, { "epoch": 0.9663748670539147, "grad_norm": 0.07686765491962433, "learning_rate": 8.570862264794875e-06, "loss": 0.0024, "step": 59060 }, { "epoch": 0.9665384930049906, "grad_norm": 0.08040457218885422, "learning_rate": 8.570195870782734e-06, "loss": 0.0028, "step": 59070 }, { "epoch": 0.9667021189560664, "grad_norm": 0.0629778727889061, "learning_rate": 8.569529347359574e-06, "loss": 0.0028, "step": 59080 }, { "epoch": 0.9668657449071423, "grad_norm": 0.07602206617593765, "learning_rate": 8.568862694549554e-06, "loss": 0.0025, "step": 59090 }, { "epoch": 0.9670293708582182, "grad_norm": 0.13948380947113037, "learning_rate": 8.568195912376838e-06, "loss": 0.0039, "step": 59100 }, { "epoch": 0.9671929968092939, "grad_norm": 0.27862828969955444, "learning_rate": 8.567529000865598e-06, "loss": 0.0014, "step": 59110 }, { "epoch": 0.9673566227603698, "grad_norm": 0.09404278546571732, "learning_rate": 8.566861960040005e-06, "loss": 0.0043, "step": 59120 }, { "epoch": 0.9675202487114456, "grad_norm": 0.10638560354709625, "learning_rate": 8.566194789924237e-06, "loss": 0.002, "step": 59130 }, { "epoch": 0.9676838746625215, "grad_norm": 0.1763361692428589, "learning_rate": 8.565527490542482e-06, "loss": 0.0027, "step": 59140 }, { "epoch": 0.9678475006135974, "grad_norm": 0.2545202672481537, "learning_rate": 8.564860061918924e-06, "loss": 0.0036, "step": 59150 }, { "epoch": 0.9680111265646731, "grad_norm": 0.12541721761226654, "learning_rate": 8.564192504077755e-06, "loss": 0.0023, "step": 59160 }, { "epoch": 0.968174752515749, "grad_norm": 0.21643178164958954, "learning_rate": 8.563524817043176e-06, "loss": 0.0021, "step": 59170 }, { "epoch": 0.9683383784668248, "grad_norm": 0.06688152253627777, "learning_rate": 8.562857000839386e-06, "loss": 0.0022, "step": 59180 }, { "epoch": 0.9685020044179007, "grad_norm": 0.08326565474271774, "learning_rate": 8.562189055490593e-06, "loss": 0.0024, "step": 59190 }, { "epoch": 0.9686656303689766, "grad_norm": 0.11828333884477615, "learning_rate": 8.561520981021009e-06, "loss": 0.0031, "step": 59200 }, { "epoch": 0.9688292563200523, "grad_norm": 0.10928668826818466, "learning_rate": 8.560852777454849e-06, "loss": 0.004, "step": 59210 }, { "epoch": 0.9689928822711282, "grad_norm": 0.21463769674301147, "learning_rate": 8.560184444816333e-06, "loss": 0.0039, "step": 59220 }, { "epoch": 0.969156508222204, "grad_norm": 0.28723961114883423, "learning_rate": 8.55951598312969e-06, "loss": 0.004, "step": 59230 }, { "epoch": 0.9693201341732799, "grad_norm": 0.205383762717247, "learning_rate": 8.558847392419145e-06, "loss": 0.0021, "step": 59240 }, { "epoch": 0.9694837601243558, "grad_norm": 0.06544926762580872, "learning_rate": 8.558178672708937e-06, "loss": 0.0026, "step": 59250 }, { "epoch": 0.9696473860754315, "grad_norm": 0.6217775940895081, "learning_rate": 8.557509824023303e-06, "loss": 0.0044, "step": 59260 }, { "epoch": 0.9698110120265074, "grad_norm": 0.2745841145515442, "learning_rate": 8.556840846386489e-06, "loss": 0.0022, "step": 59270 }, { "epoch": 0.9699746379775832, "grad_norm": 0.07420367002487183, "learning_rate": 8.556171739822742e-06, "loss": 0.0061, "step": 59280 }, { "epoch": 0.9701382639286591, "grad_norm": 0.22379092872142792, "learning_rate": 8.555502504356317e-06, "loss": 0.0025, "step": 59290 }, { "epoch": 0.970301889879735, "grad_norm": 0.21570985019207, "learning_rate": 8.554833140011473e-06, "loss": 0.0015, "step": 59300 }, { "epoch": 0.9704655158308108, "grad_norm": 0.15618379414081573, "learning_rate": 8.55416364681247e-06, "loss": 0.0032, "step": 59310 }, { "epoch": 0.9706291417818866, "grad_norm": 0.09433108568191528, "learning_rate": 8.553494024783578e-06, "loss": 0.0028, "step": 59320 }, { "epoch": 0.9707927677329624, "grad_norm": 0.17836423218250275, "learning_rate": 8.552824273949067e-06, "loss": 0.0053, "step": 59330 }, { "epoch": 0.9709563936840383, "grad_norm": 0.2055259644985199, "learning_rate": 8.552154394333216e-06, "loss": 0.0024, "step": 59340 }, { "epoch": 0.9711200196351142, "grad_norm": 0.18296679854393005, "learning_rate": 8.551484385960304e-06, "loss": 0.0036, "step": 59350 }, { "epoch": 0.97128364558619, "grad_norm": 0.486174613237381, "learning_rate": 8.55081424885462e-06, "loss": 0.0042, "step": 59360 }, { "epoch": 0.9714472715372658, "grad_norm": 0.053949691355228424, "learning_rate": 8.550143983040454e-06, "loss": 0.0016, "step": 59370 }, { "epoch": 0.9716108974883416, "grad_norm": 0.044979531317949295, "learning_rate": 8.549473588542101e-06, "loss": 0.0026, "step": 59380 }, { "epoch": 0.9717745234394175, "grad_norm": 0.2997584939002991, "learning_rate": 8.54880306538386e-06, "loss": 0.0026, "step": 59390 }, { "epoch": 0.9719381493904933, "grad_norm": 0.0504860021173954, "learning_rate": 8.548132413590038e-06, "loss": 0.0021, "step": 59400 }, { "epoch": 0.9721017753415692, "grad_norm": 0.27527013421058655, "learning_rate": 8.547461633184945e-06, "loss": 0.0043, "step": 59410 }, { "epoch": 0.972265401292645, "grad_norm": 0.1653563231229782, "learning_rate": 8.546790724192892e-06, "loss": 0.0034, "step": 59420 }, { "epoch": 0.9724290272437208, "grad_norm": 0.1331002116203308, "learning_rate": 8.546119686638202e-06, "loss": 0.0028, "step": 59430 }, { "epoch": 0.9725926531947967, "grad_norm": 0.13089890778064728, "learning_rate": 8.545448520545195e-06, "loss": 0.0022, "step": 59440 }, { "epoch": 0.9727562791458725, "grad_norm": 0.040818240493535995, "learning_rate": 8.544777225938199e-06, "loss": 0.0031, "step": 59450 }, { "epoch": 0.9729199050969484, "grad_norm": 0.03818698972463608, "learning_rate": 8.54410580284155e-06, "loss": 0.0021, "step": 59460 }, { "epoch": 0.9730835310480243, "grad_norm": 0.6097914576530457, "learning_rate": 8.543434251279583e-06, "loss": 0.0033, "step": 59470 }, { "epoch": 0.9732471569991, "grad_norm": 0.24187172949314117, "learning_rate": 8.542762571276641e-06, "loss": 0.0031, "step": 59480 }, { "epoch": 0.9734107829501759, "grad_norm": 0.24722379446029663, "learning_rate": 8.542090762857071e-06, "loss": 0.004, "step": 59490 }, { "epoch": 0.9735744089012517, "grad_norm": 0.185979425907135, "learning_rate": 8.541418826045223e-06, "loss": 0.0031, "step": 59500 }, { "epoch": 0.9737380348523276, "grad_norm": 0.09226076304912567, "learning_rate": 8.540746760865455e-06, "loss": 0.0035, "step": 59510 }, { "epoch": 0.9739016608034035, "grad_norm": 0.17793650925159454, "learning_rate": 8.540074567342126e-06, "loss": 0.0028, "step": 59520 }, { "epoch": 0.9740652867544792, "grad_norm": 0.12207861244678497, "learning_rate": 8.539402245499603e-06, "loss": 0.0031, "step": 59530 }, { "epoch": 0.9742289127055551, "grad_norm": 0.1498616486787796, "learning_rate": 8.538729795362255e-06, "loss": 0.0033, "step": 59540 }, { "epoch": 0.9743925386566309, "grad_norm": 0.12463486939668655, "learning_rate": 8.538057216954456e-06, "loss": 0.0036, "step": 59550 }, { "epoch": 0.9745561646077068, "grad_norm": 0.289331316947937, "learning_rate": 8.53738451030059e-06, "loss": 0.0031, "step": 59560 }, { "epoch": 0.9747197905587827, "grad_norm": 0.23395973443984985, "learning_rate": 8.536711675425034e-06, "loss": 0.004, "step": 59570 }, { "epoch": 0.9748834165098584, "grad_norm": 0.056549884378910065, "learning_rate": 8.536038712352181e-06, "loss": 0.0028, "step": 59580 }, { "epoch": 0.9750470424609343, "grad_norm": 0.17443841695785522, "learning_rate": 8.535365621106425e-06, "loss": 0.0024, "step": 59590 }, { "epoch": 0.9752106684120101, "grad_norm": 0.06441398710012436, "learning_rate": 8.53469240171216e-06, "loss": 0.0015, "step": 59600 }, { "epoch": 0.975374294363086, "grad_norm": 0.1061554029583931, "learning_rate": 8.53401905419379e-06, "loss": 0.003, "step": 59610 }, { "epoch": 0.9755379203141619, "grad_norm": 0.25653916597366333, "learning_rate": 8.533345578575724e-06, "loss": 0.0047, "step": 59620 }, { "epoch": 0.9757015462652376, "grad_norm": 0.37085962295532227, "learning_rate": 8.532671974882374e-06, "loss": 0.0033, "step": 59630 }, { "epoch": 0.9758651722163135, "grad_norm": 0.13060380518436432, "learning_rate": 8.531998243138155e-06, "loss": 0.0023, "step": 59640 }, { "epoch": 0.9760287981673893, "grad_norm": 0.11418633162975311, "learning_rate": 8.531324383367488e-06, "loss": 0.0028, "step": 59650 }, { "epoch": 0.9761924241184652, "grad_norm": 0.2217169851064682, "learning_rate": 8.530650395594801e-06, "loss": 0.003, "step": 59660 }, { "epoch": 0.9763560500695411, "grad_norm": 0.005907972808927298, "learning_rate": 8.529976279844521e-06, "loss": 0.002, "step": 59670 }, { "epoch": 0.9765196760206168, "grad_norm": 0.19005122780799866, "learning_rate": 8.529302036141087e-06, "loss": 0.0021, "step": 59680 }, { "epoch": 0.9766833019716927, "grad_norm": 0.17092862725257874, "learning_rate": 8.528627664508935e-06, "loss": 0.0056, "step": 59690 }, { "epoch": 0.9768469279227685, "grad_norm": 0.23192507028579712, "learning_rate": 8.527953164972511e-06, "loss": 0.0034, "step": 59700 }, { "epoch": 0.9770105538738444, "grad_norm": 0.03187968581914902, "learning_rate": 8.527278537556265e-06, "loss": 0.0026, "step": 59710 }, { "epoch": 0.9771741798249203, "grad_norm": 0.16410218179225922, "learning_rate": 8.52660378228465e-06, "loss": 0.0041, "step": 59720 }, { "epoch": 0.977337805775996, "grad_norm": 0.1285248100757599, "learning_rate": 8.52592889918212e-06, "loss": 0.0031, "step": 59730 }, { "epoch": 0.9775014317270719, "grad_norm": 0.22292162477970123, "learning_rate": 8.525253888273146e-06, "loss": 0.0022, "step": 59740 }, { "epoch": 0.9776650576781477, "grad_norm": 0.016681628301739693, "learning_rate": 8.524578749582189e-06, "loss": 0.0032, "step": 59750 }, { "epoch": 0.9778286836292236, "grad_norm": 0.35194265842437744, "learning_rate": 8.523903483133725e-06, "loss": 0.0029, "step": 59760 }, { "epoch": 0.9779923095802995, "grad_norm": 0.036682259291410446, "learning_rate": 8.523228088952229e-06, "loss": 0.0021, "step": 59770 }, { "epoch": 0.9781559355313753, "grad_norm": 0.25140687823295593, "learning_rate": 8.522552567062182e-06, "loss": 0.0032, "step": 59780 }, { "epoch": 0.9783195614824511, "grad_norm": 0.3055187165737152, "learning_rate": 8.52187691748807e-06, "loss": 0.0033, "step": 59790 }, { "epoch": 0.9784831874335269, "grad_norm": 0.2021966278553009, "learning_rate": 8.521201140254385e-06, "loss": 0.0018, "step": 59800 }, { "epoch": 0.9786468133846028, "grad_norm": 0.10952519625425339, "learning_rate": 8.520525235385622e-06, "loss": 0.0028, "step": 59810 }, { "epoch": 0.9788104393356787, "grad_norm": 0.050762154161930084, "learning_rate": 8.51984920290628e-06, "loss": 0.0026, "step": 59820 }, { "epoch": 0.9789740652867545, "grad_norm": 0.03261469304561615, "learning_rate": 8.519173042840866e-06, "loss": 0.0035, "step": 59830 }, { "epoch": 0.9791376912378303, "grad_norm": 0.09155498445034027, "learning_rate": 8.518496755213885e-06, "loss": 0.0038, "step": 59840 }, { "epoch": 0.9793013171889061, "grad_norm": 0.12844696640968323, "learning_rate": 8.517820340049853e-06, "loss": 0.0029, "step": 59850 }, { "epoch": 0.979464943139982, "grad_norm": 0.4480613172054291, "learning_rate": 8.51714379737329e-06, "loss": 0.003, "step": 59860 }, { "epoch": 0.9796285690910579, "grad_norm": 0.16266338527202606, "learning_rate": 8.516467127208718e-06, "loss": 0.0026, "step": 59870 }, { "epoch": 0.9797921950421337, "grad_norm": 0.06926774978637695, "learning_rate": 8.515790329580664e-06, "loss": 0.0023, "step": 59880 }, { "epoch": 0.9799558209932095, "grad_norm": 0.20574086904525757, "learning_rate": 8.51511340451366e-06, "loss": 0.0032, "step": 59890 }, { "epoch": 0.9801194469442853, "grad_norm": 0.13596323132514954, "learning_rate": 8.514436352032242e-06, "loss": 0.0026, "step": 59900 }, { "epoch": 0.9802830728953612, "grad_norm": 0.10957399755716324, "learning_rate": 8.513759172160955e-06, "loss": 0.0031, "step": 59910 }, { "epoch": 0.9804466988464371, "grad_norm": 0.22817716002464294, "learning_rate": 8.513081864924343e-06, "loss": 0.0027, "step": 59920 }, { "epoch": 0.9806103247975129, "grad_norm": 0.2074064165353775, "learning_rate": 8.512404430346955e-06, "loss": 0.0024, "step": 59930 }, { "epoch": 0.9807739507485888, "grad_norm": 0.007189332041889429, "learning_rate": 8.51172686845335e-06, "loss": 0.0029, "step": 59940 }, { "epoch": 0.9809375766996645, "grad_norm": 0.12334074825048447, "learning_rate": 8.511049179268085e-06, "loss": 0.0021, "step": 59950 }, { "epoch": 0.9811012026507404, "grad_norm": 0.05503307655453682, "learning_rate": 8.510371362815726e-06, "loss": 0.0029, "step": 59960 }, { "epoch": 0.9812648286018163, "grad_norm": 0.18143554031848907, "learning_rate": 8.509693419120844e-06, "loss": 0.0027, "step": 59970 }, { "epoch": 0.9814284545528921, "grad_norm": 0.07888347655534744, "learning_rate": 8.50901534820801e-06, "loss": 0.0022, "step": 59980 }, { "epoch": 0.981592080503968, "grad_norm": 0.3083827793598175, "learning_rate": 8.508337150101801e-06, "loss": 0.0047, "step": 59990 }, { "epoch": 0.9817557064550437, "grad_norm": 0.12481352686882019, "learning_rate": 8.507658824826805e-06, "loss": 0.0047, "step": 60000 }, { "epoch": 0.9819193324061196, "grad_norm": 0.4041292071342468, "learning_rate": 8.506980372407605e-06, "loss": 0.0041, "step": 60010 }, { "epoch": 0.9820829583571955, "grad_norm": 0.13590195775032043, "learning_rate": 8.506301792868798e-06, "loss": 0.003, "step": 60020 }, { "epoch": 0.9822465843082713, "grad_norm": 0.08283043652772903, "learning_rate": 8.505623086234976e-06, "loss": 0.0031, "step": 60030 }, { "epoch": 0.9824102102593472, "grad_norm": 0.08336950838565826, "learning_rate": 8.504944252530743e-06, "loss": 0.0034, "step": 60040 }, { "epoch": 0.9825738362104229, "grad_norm": 0.24332402646541595, "learning_rate": 8.504265291780706e-06, "loss": 0.0019, "step": 60050 }, { "epoch": 0.9827374621614988, "grad_norm": 0.10673798620700836, "learning_rate": 8.503586204009474e-06, "loss": 0.0035, "step": 60060 }, { "epoch": 0.9829010881125747, "grad_norm": 0.06880920380353928, "learning_rate": 8.502906989241662e-06, "loss": 0.0031, "step": 60070 }, { "epoch": 0.9830647140636505, "grad_norm": 0.07995596528053284, "learning_rate": 8.50222764750189e-06, "loss": 0.0016, "step": 60080 }, { "epoch": 0.9832283400147264, "grad_norm": 0.18849436938762665, "learning_rate": 8.501548178814785e-06, "loss": 0.0052, "step": 60090 }, { "epoch": 0.9833919659658021, "grad_norm": 0.1039353460073471, "learning_rate": 8.500868583204974e-06, "loss": 0.0029, "step": 60100 }, { "epoch": 0.983555591916878, "grad_norm": 0.673334538936615, "learning_rate": 8.500188860697089e-06, "loss": 0.0036, "step": 60110 }, { "epoch": 0.9837192178679539, "grad_norm": 0.1642826348543167, "learning_rate": 8.499509011315772e-06, "loss": 0.0038, "step": 60120 }, { "epoch": 0.9838828438190297, "grad_norm": 0.13992322981357574, "learning_rate": 8.498829035085663e-06, "loss": 0.0049, "step": 60130 }, { "epoch": 0.9840464697701056, "grad_norm": 0.18044134974479675, "learning_rate": 8.498148932031413e-06, "loss": 0.0026, "step": 60140 }, { "epoch": 0.9842100957211813, "grad_norm": 0.08346856385469437, "learning_rate": 8.497468702177669e-06, "loss": 0.0013, "step": 60150 }, { "epoch": 0.9843737216722572, "grad_norm": 0.29542285203933716, "learning_rate": 8.496788345549092e-06, "loss": 0.0044, "step": 60160 }, { "epoch": 0.9845373476233331, "grad_norm": 0.29454171657562256, "learning_rate": 8.496107862170341e-06, "loss": 0.0037, "step": 60170 }, { "epoch": 0.9847009735744089, "grad_norm": 0.026632007211446762, "learning_rate": 8.495427252066084e-06, "loss": 0.0019, "step": 60180 }, { "epoch": 0.9848645995254848, "grad_norm": 0.05433214455842972, "learning_rate": 8.49474651526099e-06, "loss": 0.0024, "step": 60190 }, { "epoch": 0.9850282254765605, "grad_norm": 0.027612656354904175, "learning_rate": 8.494065651779733e-06, "loss": 0.0019, "step": 60200 }, { "epoch": 0.9851918514276364, "grad_norm": 0.05650606006383896, "learning_rate": 8.493384661646995e-06, "loss": 0.0015, "step": 60210 }, { "epoch": 0.9853554773787123, "grad_norm": 0.26662370562553406, "learning_rate": 8.49270354488746e-06, "loss": 0.0017, "step": 60220 }, { "epoch": 0.9855191033297881, "grad_norm": 0.06338777393102646, "learning_rate": 8.492022301525812e-06, "loss": 0.0034, "step": 60230 }, { "epoch": 0.985682729280864, "grad_norm": 0.2385823279619217, "learning_rate": 8.491340931586753e-06, "loss": 0.0026, "step": 60240 }, { "epoch": 0.9858463552319398, "grad_norm": 0.36735841631889343, "learning_rate": 8.490659435094975e-06, "loss": 0.0024, "step": 60250 }, { "epoch": 0.9860099811830156, "grad_norm": 0.14980167150497437, "learning_rate": 8.489977812075181e-06, "loss": 0.0027, "step": 60260 }, { "epoch": 0.9861736071340914, "grad_norm": 0.20571166276931763, "learning_rate": 8.48929606255208e-06, "loss": 0.0022, "step": 60270 }, { "epoch": 0.9863372330851673, "grad_norm": 0.1867142617702484, "learning_rate": 8.488614186550382e-06, "loss": 0.0019, "step": 60280 }, { "epoch": 0.9865008590362432, "grad_norm": 0.15288801491260529, "learning_rate": 8.487932184094806e-06, "loss": 0.0044, "step": 60290 }, { "epoch": 0.986664484987319, "grad_norm": 0.13992652297019958, "learning_rate": 8.48725005521007e-06, "loss": 0.0032, "step": 60300 }, { "epoch": 0.9868281109383948, "grad_norm": 0.03404860198497772, "learning_rate": 8.486567799920901e-06, "loss": 0.0035, "step": 60310 }, { "epoch": 0.9869917368894706, "grad_norm": 0.031207703053951263, "learning_rate": 8.485885418252032e-06, "loss": 0.0043, "step": 60320 }, { "epoch": 0.9871553628405465, "grad_norm": 0.15165279805660248, "learning_rate": 8.48520291022819e-06, "loss": 0.0026, "step": 60330 }, { "epoch": 0.9873189887916224, "grad_norm": 0.2213481366634369, "learning_rate": 8.484520275874123e-06, "loss": 0.0024, "step": 60340 }, { "epoch": 0.9874826147426982, "grad_norm": 0.04767608642578125, "learning_rate": 8.483837515214568e-06, "loss": 0.003, "step": 60350 }, { "epoch": 0.987646240693774, "grad_norm": 0.26207292079925537, "learning_rate": 8.483154628274277e-06, "loss": 0.0059, "step": 60360 }, { "epoch": 0.9878098666448498, "grad_norm": 0.12950855493545532, "learning_rate": 8.482471615078002e-06, "loss": 0.0026, "step": 60370 }, { "epoch": 0.9879734925959257, "grad_norm": 0.1369142383337021, "learning_rate": 8.481788475650502e-06, "loss": 0.0031, "step": 60380 }, { "epoch": 0.9881371185470016, "grad_norm": 0.036217134445905685, "learning_rate": 8.481105210016538e-06, "loss": 0.002, "step": 60390 }, { "epoch": 0.9883007444980774, "grad_norm": 0.11564240604639053, "learning_rate": 8.480421818200874e-06, "loss": 0.002, "step": 60400 }, { "epoch": 0.9884643704491533, "grad_norm": 0.26017099618911743, "learning_rate": 8.479738300228287e-06, "loss": 0.0029, "step": 60410 }, { "epoch": 0.988627996400229, "grad_norm": 0.16226722300052643, "learning_rate": 8.47905465612355e-06, "loss": 0.0023, "step": 60420 }, { "epoch": 0.9887916223513049, "grad_norm": 0.25014492869377136, "learning_rate": 8.478370885911442e-06, "loss": 0.0023, "step": 60430 }, { "epoch": 0.9889552483023808, "grad_norm": 0.21477025747299194, "learning_rate": 8.477686989616751e-06, "loss": 0.0029, "step": 60440 }, { "epoch": 0.9891188742534566, "grad_norm": 0.1658000499010086, "learning_rate": 8.477002967264262e-06, "loss": 0.0024, "step": 60450 }, { "epoch": 0.9892825002045325, "grad_norm": 0.08058953285217285, "learning_rate": 8.476318818878775e-06, "loss": 0.0029, "step": 60460 }, { "epoch": 0.9894461261556082, "grad_norm": 0.34774577617645264, "learning_rate": 8.475634544485084e-06, "loss": 0.0024, "step": 60470 }, { "epoch": 0.9896097521066841, "grad_norm": 0.06716769188642502, "learning_rate": 8.474950144107998e-06, "loss": 0.0014, "step": 60480 }, { "epoch": 0.98977337805776, "grad_norm": 0.28167226910591125, "learning_rate": 8.474265617772317e-06, "loss": 0.003, "step": 60490 }, { "epoch": 0.9899370040088358, "grad_norm": 0.15282514691352844, "learning_rate": 8.47358096550286e-06, "loss": 0.0029, "step": 60500 }, { "epoch": 0.9901006299599117, "grad_norm": 0.09227076917886734, "learning_rate": 8.47289618732444e-06, "loss": 0.0023, "step": 60510 }, { "epoch": 0.9902642559109874, "grad_norm": 0.05908651277422905, "learning_rate": 8.47221128326188e-06, "loss": 0.0027, "step": 60520 }, { "epoch": 0.9904278818620633, "grad_norm": 0.02395007200539112, "learning_rate": 8.471526253340007e-06, "loss": 0.0028, "step": 60530 }, { "epoch": 0.9905915078131392, "grad_norm": 0.12977057695388794, "learning_rate": 8.470841097583651e-06, "loss": 0.0028, "step": 60540 }, { "epoch": 0.990755133764215, "grad_norm": 0.21846355497837067, "learning_rate": 8.470155816017646e-06, "loss": 0.0038, "step": 60550 }, { "epoch": 0.9909187597152909, "grad_norm": 0.05971357226371765, "learning_rate": 8.469470408666836e-06, "loss": 0.0024, "step": 60560 }, { "epoch": 0.9910823856663666, "grad_norm": 0.10828137397766113, "learning_rate": 8.46878487555606e-06, "loss": 0.0033, "step": 60570 }, { "epoch": 0.9912460116174425, "grad_norm": 0.0945863202214241, "learning_rate": 8.468099216710168e-06, "loss": 0.0025, "step": 60580 }, { "epoch": 0.9914096375685184, "grad_norm": 0.07669234275817871, "learning_rate": 8.467413432154018e-06, "loss": 0.0032, "step": 60590 }, { "epoch": 0.9915732635195942, "grad_norm": 0.2728372812271118, "learning_rate": 8.466727521912463e-06, "loss": 0.0039, "step": 60600 }, { "epoch": 0.9917368894706701, "grad_norm": 0.029599549248814583, "learning_rate": 8.466041486010365e-06, "loss": 0.0025, "step": 60610 }, { "epoch": 0.9919005154217458, "grad_norm": 0.08870405703783035, "learning_rate": 8.465355324472597e-06, "loss": 0.0027, "step": 60620 }, { "epoch": 0.9920641413728217, "grad_norm": 0.21733129024505615, "learning_rate": 8.464669037324026e-06, "loss": 0.0024, "step": 60630 }, { "epoch": 0.9922277673238976, "grad_norm": 0.09045561403036118, "learning_rate": 8.46398262458953e-06, "loss": 0.0028, "step": 60640 }, { "epoch": 0.9923913932749734, "grad_norm": 0.13382533192634583, "learning_rate": 8.46329608629399e-06, "loss": 0.0022, "step": 60650 }, { "epoch": 0.9925550192260493, "grad_norm": 0.18012291193008423, "learning_rate": 8.46260942246229e-06, "loss": 0.0042, "step": 60660 }, { "epoch": 0.992718645177125, "grad_norm": 0.23046180605888367, "learning_rate": 8.46192263311932e-06, "loss": 0.0024, "step": 60670 }, { "epoch": 0.9928822711282009, "grad_norm": 0.02615022286772728, "learning_rate": 8.461235718289976e-06, "loss": 0.0023, "step": 60680 }, { "epoch": 0.9930458970792768, "grad_norm": 0.110099658370018, "learning_rate": 8.460548677999153e-06, "loss": 0.0039, "step": 60690 }, { "epoch": 0.9932095230303526, "grad_norm": 0.18425911664962769, "learning_rate": 8.45986151227176e-06, "loss": 0.0026, "step": 60700 }, { "epoch": 0.9933731489814285, "grad_norm": 0.05597611516714096, "learning_rate": 8.459174221132704e-06, "loss": 0.0028, "step": 60710 }, { "epoch": 0.9935367749325043, "grad_norm": 0.10822423547506332, "learning_rate": 8.458486804606895e-06, "loss": 0.0025, "step": 60720 }, { "epoch": 0.9937004008835801, "grad_norm": 0.12967842817306519, "learning_rate": 8.457799262719254e-06, "loss": 0.0029, "step": 60730 }, { "epoch": 0.993864026834656, "grad_norm": 0.18580889701843262, "learning_rate": 8.457111595494697e-06, "loss": 0.0059, "step": 60740 }, { "epoch": 0.9940276527857318, "grad_norm": 0.08482278883457184, "learning_rate": 8.456423802958154e-06, "loss": 0.0029, "step": 60750 }, { "epoch": 0.9941912787368077, "grad_norm": 0.13644836843013763, "learning_rate": 8.455735885134558e-06, "loss": 0.0036, "step": 60760 }, { "epoch": 0.9943549046878835, "grad_norm": 0.06407059729099274, "learning_rate": 8.455047842048839e-06, "loss": 0.0031, "step": 60770 }, { "epoch": 0.9945185306389593, "grad_norm": 0.26606786251068115, "learning_rate": 8.45435967372594e-06, "loss": 0.0016, "step": 60780 }, { "epoch": 0.9946821565900352, "grad_norm": 0.19546149671077728, "learning_rate": 8.453671380190807e-06, "loss": 0.0042, "step": 60790 }, { "epoch": 0.994845782541111, "grad_norm": 0.19736286997795105, "learning_rate": 8.452982961468385e-06, "loss": 0.0037, "step": 60800 }, { "epoch": 0.9950094084921869, "grad_norm": 0.2795342206954956, "learning_rate": 8.45229441758363e-06, "loss": 0.0036, "step": 60810 }, { "epoch": 0.9951730344432627, "grad_norm": 0.18036170303821564, "learning_rate": 8.451605748561502e-06, "loss": 0.0014, "step": 60820 }, { "epoch": 0.9953366603943385, "grad_norm": 0.11996934562921524, "learning_rate": 8.45091695442696e-06, "loss": 0.0036, "step": 60830 }, { "epoch": 0.9955002863454144, "grad_norm": 0.08755335211753845, "learning_rate": 8.450228035204972e-06, "loss": 0.0024, "step": 60840 }, { "epoch": 0.9956639122964902, "grad_norm": 0.1050562858581543, "learning_rate": 8.44953899092051e-06, "loss": 0.0029, "step": 60850 }, { "epoch": 0.9958275382475661, "grad_norm": 0.3128628730773926, "learning_rate": 8.44884982159855e-06, "loss": 0.0053, "step": 60860 }, { "epoch": 0.9959911641986419, "grad_norm": 0.16530530154705048, "learning_rate": 8.448160527264075e-06, "loss": 0.0028, "step": 60870 }, { "epoch": 0.9961547901497178, "grad_norm": 0.203591451048851, "learning_rate": 8.447471107942066e-06, "loss": 0.0026, "step": 60880 }, { "epoch": 0.9963184161007936, "grad_norm": 0.11729732155799866, "learning_rate": 8.446781563657517e-06, "loss": 0.0014, "step": 60890 }, { "epoch": 0.9964820420518694, "grad_norm": 0.3393700420856476, "learning_rate": 8.44609189443542e-06, "loss": 0.0022, "step": 60900 }, { "epoch": 0.9966456680029453, "grad_norm": 0.05291201174259186, "learning_rate": 8.445402100300772e-06, "loss": 0.0033, "step": 60910 }, { "epoch": 0.9968092939540211, "grad_norm": 0.06167982146143913, "learning_rate": 8.444712181278583e-06, "loss": 0.0027, "step": 60920 }, { "epoch": 0.996972919905097, "grad_norm": 0.23836767673492432, "learning_rate": 8.444022137393854e-06, "loss": 0.0041, "step": 60930 }, { "epoch": 0.9971365458561728, "grad_norm": 0.21049663424491882, "learning_rate": 8.4433319686716e-06, "loss": 0.0035, "step": 60940 }, { "epoch": 0.9973001718072486, "grad_norm": 0.16214777529239655, "learning_rate": 8.442641675136838e-06, "loss": 0.0033, "step": 60950 }, { "epoch": 0.9974637977583245, "grad_norm": 0.08187725394964218, "learning_rate": 8.441951256814588e-06, "loss": 0.0041, "step": 60960 }, { "epoch": 0.9976274237094003, "grad_norm": 0.23726464807987213, "learning_rate": 8.441260713729879e-06, "loss": 0.0017, "step": 60970 }, { "epoch": 0.9977910496604762, "grad_norm": 0.029425758868455887, "learning_rate": 8.44057004590774e-06, "loss": 0.003, "step": 60980 }, { "epoch": 0.997954675611552, "grad_norm": 0.17834286391735077, "learning_rate": 8.439879253373207e-06, "loss": 0.0044, "step": 60990 }, { "epoch": 0.9981183015626278, "grad_norm": 0.08476901799440384, "learning_rate": 8.439188336151318e-06, "loss": 0.0016, "step": 61000 }, { "epoch": 0.9982819275137037, "grad_norm": 0.1598939448595047, "learning_rate": 8.438497294267117e-06, "loss": 0.0021, "step": 61010 }, { "epoch": 0.9984455534647795, "grad_norm": 0.1463746875524521, "learning_rate": 8.437806127745654e-06, "loss": 0.0025, "step": 61020 }, { "epoch": 0.9986091794158554, "grad_norm": 0.1795947551727295, "learning_rate": 8.437114836611981e-06, "loss": 0.002, "step": 61030 }, { "epoch": 0.9987728053669312, "grad_norm": 0.426880806684494, "learning_rate": 8.436423420891155e-06, "loss": 0.0027, "step": 61040 }, { "epoch": 0.998936431318007, "grad_norm": 0.2773195505142212, "learning_rate": 8.435731880608242e-06, "loss": 0.0018, "step": 61050 }, { "epoch": 0.9991000572690829, "grad_norm": 0.03795728459954262, "learning_rate": 8.435040215788303e-06, "loss": 0.0017, "step": 61060 }, { "epoch": 0.9992636832201587, "grad_norm": 0.3899232745170593, "learning_rate": 8.434348426456414e-06, "loss": 0.002, "step": 61070 }, { "epoch": 0.9994273091712346, "grad_norm": 0.04569382593035698, "learning_rate": 8.433656512637648e-06, "loss": 0.0029, "step": 61080 }, { "epoch": 0.9995909351223105, "grad_norm": 0.08163157105445862, "learning_rate": 8.432964474357085e-06, "loss": 0.0033, "step": 61090 }, { "epoch": 0.9997545610733862, "grad_norm": 0.14875875413417816, "learning_rate": 8.432272311639814e-06, "loss": 0.0016, "step": 61100 }, { "epoch": 0.9999181870244621, "grad_norm": 0.12114173918962479, "learning_rate": 8.431580024510917e-06, "loss": 0.0024, "step": 61110 }, { "epoch": 1.000081812975538, "grad_norm": 0.11625766009092331, "learning_rate": 8.430887612995495e-06, "loss": 0.0019, "step": 61120 }, { "epoch": 1.0002454389266138, "grad_norm": 0.17931455373764038, "learning_rate": 8.43019507711864e-06, "loss": 0.0029, "step": 61130 }, { "epoch": 1.0004090648776895, "grad_norm": 0.12281627207994461, "learning_rate": 8.42950241690546e-06, "loss": 0.002, "step": 61140 }, { "epoch": 1.0005726908287655, "grad_norm": 0.2563318908214569, "learning_rate": 8.428809632381059e-06, "loss": 0.0056, "step": 61150 }, { "epoch": 1.0007363167798413, "grad_norm": 0.3004699647426605, "learning_rate": 8.42811672357055e-06, "loss": 0.0014, "step": 61160 }, { "epoch": 1.000899942730917, "grad_norm": 0.1399531215429306, "learning_rate": 8.427423690499047e-06, "loss": 0.0026, "step": 61170 }, { "epoch": 1.0010635686819929, "grad_norm": 0.122503861784935, "learning_rate": 8.426730533191676e-06, "loss": 0.0015, "step": 61180 }, { "epoch": 1.0012271946330689, "grad_norm": 0.30409717559814453, "learning_rate": 8.426037251673558e-06, "loss": 0.0023, "step": 61190 }, { "epoch": 1.0013908205841446, "grad_norm": 0.07384887337684631, "learning_rate": 8.425343845969824e-06, "loss": 0.0034, "step": 61200 }, { "epoch": 1.0015544465352204, "grad_norm": 0.1534566730260849, "learning_rate": 8.424650316105607e-06, "loss": 0.0038, "step": 61210 }, { "epoch": 1.0017180724862964, "grad_norm": 0.1939021646976471, "learning_rate": 8.423956662106048e-06, "loss": 0.0025, "step": 61220 }, { "epoch": 1.0018816984373722, "grad_norm": 0.03150082379579544, "learning_rate": 8.42326288399629e-06, "loss": 0.0016, "step": 61230 }, { "epoch": 1.002045324388448, "grad_norm": 0.0625978410243988, "learning_rate": 8.422568981801478e-06, "loss": 0.0026, "step": 61240 }, { "epoch": 1.002208950339524, "grad_norm": 0.16247856616973877, "learning_rate": 8.421874955546769e-06, "loss": 0.0033, "step": 61250 }, { "epoch": 1.0023725762905997, "grad_norm": 0.0780845358967781, "learning_rate": 8.421180805257315e-06, "loss": 0.0015, "step": 61260 }, { "epoch": 1.0025362022416755, "grad_norm": 0.015654591843485832, "learning_rate": 8.420486530958278e-06, "loss": 0.0013, "step": 61270 }, { "epoch": 1.0026998281927513, "grad_norm": 0.18486538529396057, "learning_rate": 8.41979213267483e-06, "loss": 0.0029, "step": 61280 }, { "epoch": 1.0028634541438273, "grad_norm": 0.08054941892623901, "learning_rate": 8.419097610432133e-06, "loss": 0.0034, "step": 61290 }, { "epoch": 1.003027080094903, "grad_norm": 0.2680058777332306, "learning_rate": 8.418402964255366e-06, "loss": 0.004, "step": 61300 }, { "epoch": 1.0031907060459788, "grad_norm": 0.031576067209243774, "learning_rate": 8.417708194169707e-06, "loss": 0.0015, "step": 61310 }, { "epoch": 1.0033543319970548, "grad_norm": 0.0929732620716095, "learning_rate": 8.41701330020034e-06, "loss": 0.0023, "step": 61320 }, { "epoch": 1.0035179579481306, "grad_norm": 0.18532471358776093, "learning_rate": 8.416318282372455e-06, "loss": 0.0022, "step": 61330 }, { "epoch": 1.0036815838992064, "grad_norm": 0.2866421341896057, "learning_rate": 8.415623140711242e-06, "loss": 0.0031, "step": 61340 }, { "epoch": 1.0038452098502824, "grad_norm": 0.06344977021217346, "learning_rate": 8.4149278752419e-06, "loss": 0.0042, "step": 61350 }, { "epoch": 1.0040088358013581, "grad_norm": 0.3297552466392517, "learning_rate": 8.414232485989628e-06, "loss": 0.0026, "step": 61360 }, { "epoch": 1.004172461752434, "grad_norm": 0.0796428918838501, "learning_rate": 8.413536972979635e-06, "loss": 0.0023, "step": 61370 }, { "epoch": 1.0043360877035097, "grad_norm": 0.13391965627670288, "learning_rate": 8.412841336237134e-06, "loss": 0.0025, "step": 61380 }, { "epoch": 1.0044997136545857, "grad_norm": 0.15371493995189667, "learning_rate": 8.412145575787334e-06, "loss": 0.0058, "step": 61390 }, { "epoch": 1.0046633396056615, "grad_norm": 0.1355946958065033, "learning_rate": 8.411449691655457e-06, "loss": 0.0028, "step": 61400 }, { "epoch": 1.0048269655567372, "grad_norm": 0.16831043362617493, "learning_rate": 8.41075368386673e-06, "loss": 0.0023, "step": 61410 }, { "epoch": 1.0049905915078132, "grad_norm": 0.10733257979154587, "learning_rate": 8.41005755244638e-06, "loss": 0.0023, "step": 61420 }, { "epoch": 1.005154217458889, "grad_norm": 0.20408815145492554, "learning_rate": 8.409361297419639e-06, "loss": 0.003, "step": 61430 }, { "epoch": 1.0053178434099648, "grad_norm": 0.22214002907276154, "learning_rate": 8.408664918811744e-06, "loss": 0.0027, "step": 61440 }, { "epoch": 1.0054814693610408, "grad_norm": 0.07550564408302307, "learning_rate": 8.40796841664794e-06, "loss": 0.0041, "step": 61450 }, { "epoch": 1.0056450953121165, "grad_norm": 0.2276439368724823, "learning_rate": 8.407271790953471e-06, "loss": 0.0018, "step": 61460 }, { "epoch": 1.0058087212631923, "grad_norm": 0.12818531692028046, "learning_rate": 8.406575041753588e-06, "loss": 0.0015, "step": 61470 }, { "epoch": 1.005972347214268, "grad_norm": 0.30886825919151306, "learning_rate": 8.405878169073546e-06, "loss": 0.0021, "step": 61480 }, { "epoch": 1.006135973165344, "grad_norm": 0.21089695394039154, "learning_rate": 8.405181172938609e-06, "loss": 0.002, "step": 61490 }, { "epoch": 1.0062995991164199, "grad_norm": 0.4853508174419403, "learning_rate": 8.404484053374038e-06, "loss": 0.0043, "step": 61500 }, { "epoch": 1.0064632250674956, "grad_norm": 0.08074169605970383, "learning_rate": 8.4037868104051e-06, "loss": 0.0029, "step": 61510 }, { "epoch": 1.0066268510185716, "grad_norm": 0.21607299149036407, "learning_rate": 8.403089444057074e-06, "loss": 0.0023, "step": 61520 }, { "epoch": 1.0067904769696474, "grad_norm": 0.14379531145095825, "learning_rate": 8.402391954355235e-06, "loss": 0.0014, "step": 61530 }, { "epoch": 1.0069541029207232, "grad_norm": 0.12721051275730133, "learning_rate": 8.401694341324863e-06, "loss": 0.0025, "step": 61540 }, { "epoch": 1.007117728871799, "grad_norm": 0.069223552942276, "learning_rate": 8.400996604991247e-06, "loss": 0.0014, "step": 61550 }, { "epoch": 1.007281354822875, "grad_norm": 0.1583051234483719, "learning_rate": 8.400298745379679e-06, "loss": 0.0021, "step": 61560 }, { "epoch": 1.0074449807739507, "grad_norm": 0.07988758385181427, "learning_rate": 8.399600762515454e-06, "loss": 0.0036, "step": 61570 }, { "epoch": 1.0076086067250265, "grad_norm": 0.17742285132408142, "learning_rate": 8.398902656423872e-06, "loss": 0.0033, "step": 61580 }, { "epoch": 1.0077722326761025, "grad_norm": 0.08552506566047668, "learning_rate": 8.398204427130238e-06, "loss": 0.0034, "step": 61590 }, { "epoch": 1.0079358586271783, "grad_norm": 0.07694020122289658, "learning_rate": 8.397506074659861e-06, "loss": 0.0015, "step": 61600 }, { "epoch": 1.008099484578254, "grad_norm": 0.028427090495824814, "learning_rate": 8.396807599038053e-06, "loss": 0.0025, "step": 61610 }, { "epoch": 1.00826311052933, "grad_norm": 0.11284486204385757, "learning_rate": 8.396109000290133e-06, "loss": 0.0035, "step": 61620 }, { "epoch": 1.0084267364804058, "grad_norm": 0.052602652460336685, "learning_rate": 8.395410278441427e-06, "loss": 0.0016, "step": 61630 }, { "epoch": 1.0085903624314816, "grad_norm": 0.07618909329175949, "learning_rate": 8.394711433517257e-06, "loss": 0.0012, "step": 61640 }, { "epoch": 1.0087539883825574, "grad_norm": 0.10430283844470978, "learning_rate": 8.394012465542958e-06, "loss": 0.0043, "step": 61650 }, { "epoch": 1.0089176143336334, "grad_norm": 0.05425436794757843, "learning_rate": 8.393313374543863e-06, "loss": 0.0018, "step": 61660 }, { "epoch": 1.0090812402847091, "grad_norm": 0.05373930558562279, "learning_rate": 8.392614160545313e-06, "loss": 0.003, "step": 61670 }, { "epoch": 1.009244866235785, "grad_norm": 0.2939392924308777, "learning_rate": 8.391914823572655e-06, "loss": 0.0015, "step": 61680 }, { "epoch": 1.009408492186861, "grad_norm": 0.16351237893104553, "learning_rate": 8.391215363651237e-06, "loss": 0.002, "step": 61690 }, { "epoch": 1.0095721181379367, "grad_norm": 0.02641957253217697, "learning_rate": 8.390515780806412e-06, "loss": 0.0026, "step": 61700 }, { "epoch": 1.0097357440890125, "grad_norm": 0.12252381443977356, "learning_rate": 8.38981607506354e-06, "loss": 0.0025, "step": 61710 }, { "epoch": 1.0098993700400885, "grad_norm": 0.0973534882068634, "learning_rate": 8.389116246447981e-06, "loss": 0.0019, "step": 61720 }, { "epoch": 1.0100629959911642, "grad_norm": 0.21252557635307312, "learning_rate": 8.388416294985105e-06, "loss": 0.003, "step": 61730 }, { "epoch": 1.01022662194224, "grad_norm": 0.0555267371237278, "learning_rate": 8.387716220700282e-06, "loss": 0.001, "step": 61740 }, { "epoch": 1.0103902478933158, "grad_norm": 0.1361568421125412, "learning_rate": 8.387016023618889e-06, "loss": 0.0033, "step": 61750 }, { "epoch": 1.0105538738443918, "grad_norm": 0.32422080636024475, "learning_rate": 8.386315703766303e-06, "loss": 0.0016, "step": 61760 }, { "epoch": 1.0107174997954675, "grad_norm": 0.02642049267888069, "learning_rate": 8.385615261167912e-06, "loss": 0.0027, "step": 61770 }, { "epoch": 1.0108811257465433, "grad_norm": 0.08894288539886475, "learning_rate": 8.384914695849106e-06, "loss": 0.0016, "step": 61780 }, { "epoch": 1.0110447516976193, "grad_norm": 0.10418354719877243, "learning_rate": 8.384214007835278e-06, "loss": 0.0018, "step": 61790 }, { "epoch": 1.011208377648695, "grad_norm": 0.07337412983179092, "learning_rate": 8.383513197151825e-06, "loss": 0.0013, "step": 61800 }, { "epoch": 1.0113720035997709, "grad_norm": 0.06198417395353317, "learning_rate": 8.382812263824151e-06, "loss": 0.0023, "step": 61810 }, { "epoch": 1.0115356295508469, "grad_norm": 0.22408807277679443, "learning_rate": 8.382111207877663e-06, "loss": 0.001, "step": 61820 }, { "epoch": 1.0116992555019226, "grad_norm": 0.01795016974210739, "learning_rate": 8.381410029337774e-06, "loss": 0.0019, "step": 61830 }, { "epoch": 1.0118628814529984, "grad_norm": 0.11608357727527618, "learning_rate": 8.380708728229896e-06, "loss": 0.0015, "step": 61840 }, { "epoch": 1.0120265074040742, "grad_norm": 0.1601213961839676, "learning_rate": 8.380007304579451e-06, "loss": 0.0016, "step": 61850 }, { "epoch": 1.0121901333551502, "grad_norm": 0.29916322231292725, "learning_rate": 8.379305758411868e-06, "loss": 0.0025, "step": 61860 }, { "epoch": 1.012353759306226, "grad_norm": 0.18652862310409546, "learning_rate": 8.37860408975257e-06, "loss": 0.0024, "step": 61870 }, { "epoch": 1.0125173852573017, "grad_norm": 0.14501667022705078, "learning_rate": 8.377902298626997e-06, "loss": 0.0037, "step": 61880 }, { "epoch": 1.0126810112083777, "grad_norm": 0.04337180405855179, "learning_rate": 8.377200385060583e-06, "loss": 0.0021, "step": 61890 }, { "epoch": 1.0128446371594535, "grad_norm": 0.09240813553333282, "learning_rate": 8.376498349078772e-06, "loss": 0.0026, "step": 61900 }, { "epoch": 1.0130082631105293, "grad_norm": 0.0846519023180008, "learning_rate": 8.375796190707011e-06, "loss": 0.0024, "step": 61910 }, { "epoch": 1.0131718890616053, "grad_norm": 0.07384341955184937, "learning_rate": 8.375093909970751e-06, "loss": 0.0019, "step": 61920 }, { "epoch": 1.013335515012681, "grad_norm": 0.06110379099845886, "learning_rate": 8.37439150689545e-06, "loss": 0.0017, "step": 61930 }, { "epoch": 1.0134991409637568, "grad_norm": 0.2784845232963562, "learning_rate": 8.373688981506564e-06, "loss": 0.0038, "step": 61940 }, { "epoch": 1.0136627669148326, "grad_norm": 0.02236044779419899, "learning_rate": 8.372986333829565e-06, "loss": 0.0017, "step": 61950 }, { "epoch": 1.0138263928659086, "grad_norm": 0.09520578384399414, "learning_rate": 8.372283563889916e-06, "loss": 0.0026, "step": 61960 }, { "epoch": 1.0139900188169844, "grad_norm": 0.12372705340385437, "learning_rate": 8.371580671713092e-06, "loss": 0.0019, "step": 61970 }, { "epoch": 1.0141536447680601, "grad_norm": 0.17561574280261993, "learning_rate": 8.370877657324574e-06, "loss": 0.0017, "step": 61980 }, { "epoch": 1.0143172707191361, "grad_norm": 0.07626992464065552, "learning_rate": 8.37017452074984e-06, "loss": 0.0018, "step": 61990 }, { "epoch": 1.014480896670212, "grad_norm": 0.08037300407886505, "learning_rate": 8.369471262014383e-06, "loss": 0.0032, "step": 62000 }, { "epoch": 1.0146445226212877, "grad_norm": 0.19899636507034302, "learning_rate": 8.368767881143689e-06, "loss": 0.0012, "step": 62010 }, { "epoch": 1.0148081485723637, "grad_norm": 0.22281433641910553, "learning_rate": 8.368064378163255e-06, "loss": 0.0032, "step": 62020 }, { "epoch": 1.0149717745234395, "grad_norm": 0.16252335906028748, "learning_rate": 8.367360753098585e-06, "loss": 0.0027, "step": 62030 }, { "epoch": 1.0151354004745152, "grad_norm": 0.273134708404541, "learning_rate": 8.366657005975179e-06, "loss": 0.0025, "step": 62040 }, { "epoch": 1.015299026425591, "grad_norm": 0.09302778542041779, "learning_rate": 8.365953136818548e-06, "loss": 0.0021, "step": 62050 }, { "epoch": 1.015462652376667, "grad_norm": 0.1567562073469162, "learning_rate": 8.365249145654207e-06, "loss": 0.0025, "step": 62060 }, { "epoch": 1.0156262783277428, "grad_norm": 0.10178086906671524, "learning_rate": 8.364545032507672e-06, "loss": 0.0022, "step": 62070 }, { "epoch": 1.0157899042788185, "grad_norm": 0.16511395573616028, "learning_rate": 8.363840797404466e-06, "loss": 0.0021, "step": 62080 }, { "epoch": 1.0159535302298945, "grad_norm": 0.12475978583097458, "learning_rate": 8.363136440370115e-06, "loss": 0.0016, "step": 62090 }, { "epoch": 1.0161171561809703, "grad_norm": 0.2665903568267822, "learning_rate": 8.362431961430152e-06, "loss": 0.0024, "step": 62100 }, { "epoch": 1.016280782132046, "grad_norm": 0.13130570948123932, "learning_rate": 8.361727360610112e-06, "loss": 0.0032, "step": 62110 }, { "epoch": 1.016444408083122, "grad_norm": 0.04836617782711983, "learning_rate": 8.361022637935534e-06, "loss": 0.0029, "step": 62120 }, { "epoch": 1.0166080340341979, "grad_norm": 0.28147563338279724, "learning_rate": 8.360317793431964e-06, "loss": 0.0021, "step": 62130 }, { "epoch": 1.0167716599852736, "grad_norm": 0.05762926861643791, "learning_rate": 8.35961282712495e-06, "loss": 0.0028, "step": 62140 }, { "epoch": 1.0169352859363494, "grad_norm": 0.06845203787088394, "learning_rate": 8.358907739040046e-06, "loss": 0.003, "step": 62150 }, { "epoch": 1.0170989118874254, "grad_norm": 0.09512495249509811, "learning_rate": 8.358202529202809e-06, "loss": 0.0013, "step": 62160 }, { "epoch": 1.0172625378385012, "grad_norm": 0.19341616332530975, "learning_rate": 8.357497197638802e-06, "loss": 0.0024, "step": 62170 }, { "epoch": 1.017426163789577, "grad_norm": 0.31077906489372253, "learning_rate": 8.35679174437359e-06, "loss": 0.0017, "step": 62180 }, { "epoch": 1.017589789740653, "grad_norm": 0.2906704545021057, "learning_rate": 8.356086169432747e-06, "loss": 0.0024, "step": 62190 }, { "epoch": 1.0177534156917287, "grad_norm": 0.008857743814587593, "learning_rate": 8.355380472841845e-06, "loss": 0.0029, "step": 62200 }, { "epoch": 1.0179170416428045, "grad_norm": 0.00857104454189539, "learning_rate": 8.354674654626466e-06, "loss": 0.0044, "step": 62210 }, { "epoch": 1.0180806675938805, "grad_norm": 0.08437494188547134, "learning_rate": 8.353968714812193e-06, "loss": 0.0023, "step": 62220 }, { "epoch": 1.0182442935449563, "grad_norm": 0.23425538837909698, "learning_rate": 8.353262653424615e-06, "loss": 0.0031, "step": 62230 }, { "epoch": 1.018407919496032, "grad_norm": 0.06720089912414551, "learning_rate": 8.352556470489326e-06, "loss": 0.003, "step": 62240 }, { "epoch": 1.0185715454471078, "grad_norm": 0.256036639213562, "learning_rate": 8.351850166031922e-06, "loss": 0.002, "step": 62250 }, { "epoch": 1.0187351713981838, "grad_norm": 0.02549031563103199, "learning_rate": 8.351143740078005e-06, "loss": 0.0031, "step": 62260 }, { "epoch": 1.0188987973492596, "grad_norm": 1.3627597093582153, "learning_rate": 8.350437192653182e-06, "loss": 0.0026, "step": 62270 }, { "epoch": 1.0190624233003354, "grad_norm": 0.10078544169664383, "learning_rate": 8.349730523783065e-06, "loss": 0.0025, "step": 62280 }, { "epoch": 1.0192260492514114, "grad_norm": 0.2564089298248291, "learning_rate": 8.349023733493265e-06, "loss": 0.0032, "step": 62290 }, { "epoch": 1.0193896752024871, "grad_norm": 0.1104068011045456, "learning_rate": 8.348316821809405e-06, "loss": 0.0031, "step": 62300 }, { "epoch": 1.019553301153563, "grad_norm": 0.013330202549695969, "learning_rate": 8.347609788757107e-06, "loss": 0.0016, "step": 62310 }, { "epoch": 1.019716927104639, "grad_norm": 0.2409638613462448, "learning_rate": 8.346902634362002e-06, "loss": 0.0036, "step": 62320 }, { "epoch": 1.0198805530557147, "grad_norm": 0.17588280141353607, "learning_rate": 8.346195358649718e-06, "loss": 0.0031, "step": 62330 }, { "epoch": 1.0200441790067905, "grad_norm": 0.017134644091129303, "learning_rate": 8.345487961645896e-06, "loss": 0.0041, "step": 62340 }, { "epoch": 1.0202078049578662, "grad_norm": 0.224152073264122, "learning_rate": 8.344780443376176e-06, "loss": 0.0022, "step": 62350 }, { "epoch": 1.0203714309089422, "grad_norm": 0.11352168023586273, "learning_rate": 8.344072803866204e-06, "loss": 0.0015, "step": 62360 }, { "epoch": 1.020535056860018, "grad_norm": 0.07339297980070114, "learning_rate": 8.343365043141629e-06, "loss": 0.0026, "step": 62370 }, { "epoch": 1.0206986828110938, "grad_norm": 0.08674383163452148, "learning_rate": 8.342657161228108e-06, "loss": 0.0024, "step": 62380 }, { "epoch": 1.0208623087621698, "grad_norm": 0.2911551892757416, "learning_rate": 8.341949158151298e-06, "loss": 0.0033, "step": 62390 }, { "epoch": 1.0210259347132455, "grad_norm": 0.12789283692836761, "learning_rate": 8.341241033936862e-06, "loss": 0.0036, "step": 62400 }, { "epoch": 1.0211895606643213, "grad_norm": 0.06214087828993797, "learning_rate": 8.340532788610471e-06, "loss": 0.0022, "step": 62410 }, { "epoch": 1.0213531866153973, "grad_norm": 0.08382989466190338, "learning_rate": 8.339824422197796e-06, "loss": 0.0029, "step": 62420 }, { "epoch": 1.021516812566473, "grad_norm": 0.27731460332870483, "learning_rate": 8.33911593472451e-06, "loss": 0.0042, "step": 62430 }, { "epoch": 1.0216804385175489, "grad_norm": 0.11863048374652863, "learning_rate": 8.338407326216299e-06, "loss": 0.0025, "step": 62440 }, { "epoch": 1.0218440644686246, "grad_norm": 0.3180948793888092, "learning_rate": 8.337698596698844e-06, "loss": 0.004, "step": 62450 }, { "epoch": 1.0220076904197006, "grad_norm": 0.07720239460468292, "learning_rate": 8.336989746197839e-06, "loss": 0.0071, "step": 62460 }, { "epoch": 1.0221713163707764, "grad_norm": 0.147596076130867, "learning_rate": 8.336280774738974e-06, "loss": 0.0025, "step": 62470 }, { "epoch": 1.0223349423218522, "grad_norm": 0.44029510021209717, "learning_rate": 8.33557168234795e-06, "loss": 0.0027, "step": 62480 }, { "epoch": 1.0224985682729282, "grad_norm": 0.02467416413128376, "learning_rate": 8.334862469050471e-06, "loss": 0.0024, "step": 62490 }, { "epoch": 1.022662194224004, "grad_norm": 0.1042647659778595, "learning_rate": 8.334153134872242e-06, "loss": 0.0015, "step": 62500 }, { "epoch": 1.0228258201750797, "grad_norm": 0.15093326568603516, "learning_rate": 8.333443679838972e-06, "loss": 0.0022, "step": 62510 }, { "epoch": 1.0229894461261555, "grad_norm": 0.09556972980499268, "learning_rate": 8.332734103976385e-06, "loss": 0.0019, "step": 62520 }, { "epoch": 1.0231530720772315, "grad_norm": 0.48387524485588074, "learning_rate": 8.332024407310195e-06, "loss": 0.0018, "step": 62530 }, { "epoch": 1.0233166980283073, "grad_norm": 0.2792567014694214, "learning_rate": 8.331314589866127e-06, "loss": 0.0025, "step": 62540 }, { "epoch": 1.023480323979383, "grad_norm": 0.06885558366775513, "learning_rate": 8.330604651669913e-06, "loss": 0.0015, "step": 62550 }, { "epoch": 1.023643949930459, "grad_norm": 0.25647929310798645, "learning_rate": 8.329894592747285e-06, "loss": 0.0027, "step": 62560 }, { "epoch": 1.0238075758815348, "grad_norm": 0.31204888224601746, "learning_rate": 8.329184413123982e-06, "loss": 0.0022, "step": 62570 }, { "epoch": 1.0239712018326106, "grad_norm": 0.21856515109539032, "learning_rate": 8.328474112825745e-06, "loss": 0.0027, "step": 62580 }, { "epoch": 1.0241348277836866, "grad_norm": 0.0903751477599144, "learning_rate": 8.327763691878321e-06, "loss": 0.0015, "step": 62590 }, { "epoch": 1.0242984537347624, "grad_norm": 0.0380975604057312, "learning_rate": 8.327053150307462e-06, "loss": 0.0033, "step": 62600 }, { "epoch": 1.0244620796858381, "grad_norm": 0.15065494179725647, "learning_rate": 8.326342488138921e-06, "loss": 0.003, "step": 62610 }, { "epoch": 1.024625705636914, "grad_norm": 0.055887166410684586, "learning_rate": 8.325631705398461e-06, "loss": 0.0021, "step": 62620 }, { "epoch": 1.02478933158799, "grad_norm": 0.09244561940431595, "learning_rate": 8.324920802111844e-06, "loss": 0.0016, "step": 62630 }, { "epoch": 1.0249529575390657, "grad_norm": 0.06201239302754402, "learning_rate": 8.32420977830484e-06, "loss": 0.002, "step": 62640 }, { "epoch": 1.0251165834901415, "grad_norm": 0.13963104784488678, "learning_rate": 8.32349863400322e-06, "loss": 0.0021, "step": 62650 }, { "epoch": 1.0252802094412174, "grad_norm": 0.07053521275520325, "learning_rate": 8.322787369232763e-06, "loss": 0.0049, "step": 62660 }, { "epoch": 1.0254438353922932, "grad_norm": 0.03840421885251999, "learning_rate": 8.322075984019248e-06, "loss": 0.0029, "step": 62670 }, { "epoch": 1.025607461343369, "grad_norm": 0.09541652351617813, "learning_rate": 8.321364478388467e-06, "loss": 0.002, "step": 62680 }, { "epoch": 1.025771087294445, "grad_norm": 0.08917645364999771, "learning_rate": 8.320652852366202e-06, "loss": 0.0015, "step": 62690 }, { "epoch": 1.0259347132455208, "grad_norm": 0.11140826344490051, "learning_rate": 8.319941105978255e-06, "loss": 0.003, "step": 62700 }, { "epoch": 1.0260983391965965, "grad_norm": 0.18213094770908356, "learning_rate": 8.319229239250422e-06, "loss": 0.0024, "step": 62710 }, { "epoch": 1.0262619651476723, "grad_norm": 0.07034864276647568, "learning_rate": 8.318517252208505e-06, "loss": 0.0018, "step": 62720 }, { "epoch": 1.0264255910987483, "grad_norm": 0.05921407416462898, "learning_rate": 8.317805144878315e-06, "loss": 0.0025, "step": 62730 }, { "epoch": 1.026589217049824, "grad_norm": 0.1356448382139206, "learning_rate": 8.31709291728566e-06, "loss": 0.0022, "step": 62740 }, { "epoch": 1.0267528430008999, "grad_norm": 0.0674639642238617, "learning_rate": 8.316380569456363e-06, "loss": 0.0015, "step": 62750 }, { "epoch": 1.0269164689519759, "grad_norm": 0.23950588703155518, "learning_rate": 8.315668101416239e-06, "loss": 0.0026, "step": 62760 }, { "epoch": 1.0270800949030516, "grad_norm": 0.13180133700370789, "learning_rate": 8.314955513191114e-06, "loss": 0.0014, "step": 62770 }, { "epoch": 1.0272437208541274, "grad_norm": 0.00595868518576026, "learning_rate": 8.314242804806822e-06, "loss": 0.0045, "step": 62780 }, { "epoch": 1.0274073468052034, "grad_norm": 0.034348130226135254, "learning_rate": 8.313529976289193e-06, "loss": 0.0012, "step": 62790 }, { "epoch": 1.0275709727562792, "grad_norm": 0.060016658157110214, "learning_rate": 8.312817027664065e-06, "loss": 0.0023, "step": 62800 }, { "epoch": 1.027734598707355, "grad_norm": 0.03407631069421768, "learning_rate": 8.312103958957282e-06, "loss": 0.0039, "step": 62810 }, { "epoch": 1.0278982246584307, "grad_norm": 0.04801015183329582, "learning_rate": 8.311390770194691e-06, "loss": 0.0023, "step": 62820 }, { "epoch": 1.0280618506095067, "grad_norm": 0.544994592666626, "learning_rate": 8.310677461402143e-06, "loss": 0.0024, "step": 62830 }, { "epoch": 1.0282254765605825, "grad_norm": 0.03717778995633125, "learning_rate": 8.309964032605495e-06, "loss": 0.0026, "step": 62840 }, { "epoch": 1.0283891025116583, "grad_norm": 0.023024654015898705, "learning_rate": 8.309250483830607e-06, "loss": 0.0015, "step": 62850 }, { "epoch": 1.0285527284627343, "grad_norm": 0.12822191417217255, "learning_rate": 8.308536815103341e-06, "loss": 0.0037, "step": 62860 }, { "epoch": 1.02871635441381, "grad_norm": 0.2804183065891266, "learning_rate": 8.307823026449568e-06, "loss": 0.0028, "step": 62870 }, { "epoch": 1.0288799803648858, "grad_norm": 0.0640629380941391, "learning_rate": 8.30710911789516e-06, "loss": 0.0012, "step": 62880 }, { "epoch": 1.0290436063159618, "grad_norm": 0.06334434449672699, "learning_rate": 8.306395089465995e-06, "loss": 0.0024, "step": 62890 }, { "epoch": 1.0292072322670376, "grad_norm": 0.12159447371959686, "learning_rate": 8.305680941187955e-06, "loss": 0.0026, "step": 62900 }, { "epoch": 1.0293708582181134, "grad_norm": 0.17732298374176025, "learning_rate": 8.304966673086925e-06, "loss": 0.0025, "step": 62910 }, { "epoch": 1.0295344841691891, "grad_norm": 0.15575121343135834, "learning_rate": 8.304252285188797e-06, "loss": 0.0029, "step": 62920 }, { "epoch": 1.0296981101202651, "grad_norm": 0.12282532453536987, "learning_rate": 8.303537777519466e-06, "loss": 0.0027, "step": 62930 }, { "epoch": 1.029861736071341, "grad_norm": 0.1006195917725563, "learning_rate": 8.302823150104829e-06, "loss": 0.0021, "step": 62940 }, { "epoch": 1.0300253620224167, "grad_norm": 0.09268511086702347, "learning_rate": 8.302108402970792e-06, "loss": 0.002, "step": 62950 }, { "epoch": 1.0301889879734927, "grad_norm": 0.08966106921434402, "learning_rate": 8.30139353614326e-06, "loss": 0.002, "step": 62960 }, { "epoch": 1.0303526139245685, "grad_norm": 0.03228648006916046, "learning_rate": 8.30067854964815e-06, "loss": 0.0019, "step": 62970 }, { "epoch": 1.0305162398756442, "grad_norm": 0.011323303915560246, "learning_rate": 8.299963443511373e-06, "loss": 0.0016, "step": 62980 }, { "epoch": 1.0306798658267202, "grad_norm": 0.06857464462518692, "learning_rate": 8.299248217758853e-06, "loss": 0.0023, "step": 62990 }, { "epoch": 1.030843491777796, "grad_norm": 0.22043435275554657, "learning_rate": 8.298532872416515e-06, "loss": 0.0019, "step": 63000 }, { "epoch": 1.0310071177288718, "grad_norm": 0.10400668531656265, "learning_rate": 8.297817407510289e-06, "loss": 0.0015, "step": 63010 }, { "epoch": 1.0311707436799475, "grad_norm": 0.20460723340511322, "learning_rate": 8.297101823066106e-06, "loss": 0.0036, "step": 63020 }, { "epoch": 1.0313343696310235, "grad_norm": 0.12914440035820007, "learning_rate": 8.296386119109908e-06, "loss": 0.0017, "step": 63030 }, { "epoch": 1.0314979955820993, "grad_norm": 0.12779386341571808, "learning_rate": 8.295670295667635e-06, "loss": 0.0026, "step": 63040 }, { "epoch": 1.031661621533175, "grad_norm": 0.1792525351047516, "learning_rate": 8.294954352765235e-06, "loss": 0.0027, "step": 63050 }, { "epoch": 1.031825247484251, "grad_norm": 0.19937963783740997, "learning_rate": 8.29423829042866e-06, "loss": 0.0019, "step": 63060 }, { "epoch": 1.0319888734353269, "grad_norm": 0.03238700330257416, "learning_rate": 8.293522108683863e-06, "loss": 0.0018, "step": 63070 }, { "epoch": 1.0321524993864026, "grad_norm": 0.15098980069160461, "learning_rate": 8.292805807556806e-06, "loss": 0.0058, "step": 63080 }, { "epoch": 1.0323161253374786, "grad_norm": 0.06254418939352036, "learning_rate": 8.292089387073452e-06, "loss": 0.0025, "step": 63090 }, { "epoch": 1.0324797512885544, "grad_norm": 0.1866140365600586, "learning_rate": 8.29137284725977e-06, "loss": 0.0024, "step": 63100 }, { "epoch": 1.0326433772396302, "grad_norm": 0.11695242673158646, "learning_rate": 8.290656188141733e-06, "loss": 0.0021, "step": 63110 }, { "epoch": 1.032807003190706, "grad_norm": 0.16276447474956512, "learning_rate": 8.289939409745317e-06, "loss": 0.0019, "step": 63120 }, { "epoch": 1.032970629141782, "grad_norm": 0.1279701590538025, "learning_rate": 8.289222512096508e-06, "loss": 0.0022, "step": 63130 }, { "epoch": 1.0331342550928577, "grad_norm": 0.19693182408809662, "learning_rate": 8.288505495221286e-06, "loss": 0.0023, "step": 63140 }, { "epoch": 1.0332978810439335, "grad_norm": 0.13077808916568756, "learning_rate": 8.287788359145643e-06, "loss": 0.0024, "step": 63150 }, { "epoch": 1.0334615069950095, "grad_norm": 0.17228233814239502, "learning_rate": 8.287071103895575e-06, "loss": 0.0031, "step": 63160 }, { "epoch": 1.0336251329460853, "grad_norm": 0.03512703627347946, "learning_rate": 8.28635372949708e-06, "loss": 0.0015, "step": 63170 }, { "epoch": 1.033788758897161, "grad_norm": 0.23893366754055023, "learning_rate": 8.28563623597616e-06, "loss": 0.003, "step": 63180 }, { "epoch": 1.033952384848237, "grad_norm": 0.20334574580192566, "learning_rate": 8.284918623358824e-06, "loss": 0.002, "step": 63190 }, { "epoch": 1.0341160107993128, "grad_norm": 0.16438986361026764, "learning_rate": 8.284200891671083e-06, "loss": 0.002, "step": 63200 }, { "epoch": 1.0342796367503886, "grad_norm": 0.06547048687934875, "learning_rate": 8.283483040938954e-06, "loss": 0.0021, "step": 63210 }, { "epoch": 1.0344432627014644, "grad_norm": 0.19469082355499268, "learning_rate": 8.282765071188453e-06, "loss": 0.0019, "step": 63220 }, { "epoch": 1.0346068886525404, "grad_norm": 0.13101544976234436, "learning_rate": 8.282046982445611e-06, "loss": 0.0024, "step": 63230 }, { "epoch": 1.0347705146036161, "grad_norm": 0.11263741552829742, "learning_rate": 8.281328774736452e-06, "loss": 0.0021, "step": 63240 }, { "epoch": 1.034934140554692, "grad_norm": 0.13953259587287903, "learning_rate": 8.280610448087013e-06, "loss": 0.0022, "step": 63250 }, { "epoch": 1.035097766505768, "grad_norm": 0.14145800471305847, "learning_rate": 8.27989200252333e-06, "loss": 0.0024, "step": 63260 }, { "epoch": 1.0352613924568437, "grad_norm": 0.20280882716178894, "learning_rate": 8.279173438071444e-06, "loss": 0.0023, "step": 63270 }, { "epoch": 1.0354250184079195, "grad_norm": 0.27344730496406555, "learning_rate": 8.278454754757403e-06, "loss": 0.0028, "step": 63280 }, { "epoch": 1.0355886443589952, "grad_norm": 0.2773624062538147, "learning_rate": 8.277735952607257e-06, "loss": 0.0024, "step": 63290 }, { "epoch": 1.0357522703100712, "grad_norm": 0.07628427445888519, "learning_rate": 8.277017031647059e-06, "loss": 0.0018, "step": 63300 }, { "epoch": 1.035915896261147, "grad_norm": 0.13781200349330902, "learning_rate": 8.27629799190287e-06, "loss": 0.0013, "step": 63310 }, { "epoch": 1.0360795222122228, "grad_norm": 0.0606485940515995, "learning_rate": 8.275578833400754e-06, "loss": 0.0027, "step": 63320 }, { "epoch": 1.0362431481632988, "grad_norm": 0.18993821740150452, "learning_rate": 8.274859556166779e-06, "loss": 0.0025, "step": 63330 }, { "epoch": 1.0364067741143745, "grad_norm": 0.26492953300476074, "learning_rate": 8.274140160227016e-06, "loss": 0.0026, "step": 63340 }, { "epoch": 1.0365704000654503, "grad_norm": 0.1297852098941803, "learning_rate": 8.273420645607539e-06, "loss": 0.0025, "step": 63350 }, { "epoch": 1.0367340260165263, "grad_norm": 0.1854548156261444, "learning_rate": 8.272701012334432e-06, "loss": 0.0027, "step": 63360 }, { "epoch": 1.036897651967602, "grad_norm": 0.08247770369052887, "learning_rate": 8.271981260433779e-06, "loss": 0.0027, "step": 63370 }, { "epoch": 1.0370612779186779, "grad_norm": 0.02969399094581604, "learning_rate": 8.271261389931668e-06, "loss": 0.0011, "step": 63380 }, { "epoch": 1.0372249038697539, "grad_norm": 0.39014166593551636, "learning_rate": 8.270541400854197e-06, "loss": 0.0026, "step": 63390 }, { "epoch": 1.0373885298208296, "grad_norm": 0.1571214348077774, "learning_rate": 8.269821293227459e-06, "loss": 0.0029, "step": 63400 }, { "epoch": 1.0375521557719054, "grad_norm": 0.32384729385375977, "learning_rate": 8.269101067077558e-06, "loss": 0.0019, "step": 63410 }, { "epoch": 1.0377157817229812, "grad_norm": 0.0747719407081604, "learning_rate": 8.2683807224306e-06, "loss": 0.0027, "step": 63420 }, { "epoch": 1.0378794076740572, "grad_norm": 0.06614229828119278, "learning_rate": 8.267660259312697e-06, "loss": 0.0018, "step": 63430 }, { "epoch": 1.038043033625133, "grad_norm": 0.019833263009786606, "learning_rate": 8.266939677749964e-06, "loss": 0.0018, "step": 63440 }, { "epoch": 1.0382066595762087, "grad_norm": 0.15740786492824554, "learning_rate": 8.266218977768518e-06, "loss": 0.0025, "step": 63450 }, { "epoch": 1.0383702855272847, "grad_norm": 0.046031709760427475, "learning_rate": 8.265498159394486e-06, "loss": 0.0019, "step": 63460 }, { "epoch": 1.0385339114783605, "grad_norm": 0.20547205209732056, "learning_rate": 8.26477722265399e-06, "loss": 0.0063, "step": 63470 }, { "epoch": 1.0386975374294363, "grad_norm": 0.16334377229213715, "learning_rate": 8.26405616757317e-06, "loss": 0.0018, "step": 63480 }, { "epoch": 1.038861163380512, "grad_norm": 0.047372184693813324, "learning_rate": 8.263334994178158e-06, "loss": 0.0021, "step": 63490 }, { "epoch": 1.039024789331588, "grad_norm": 0.07817479223012924, "learning_rate": 8.262613702495096e-06, "loss": 0.0054, "step": 63500 }, { "epoch": 1.0391884152826638, "grad_norm": 0.024200119078159332, "learning_rate": 8.26189229255013e-06, "loss": 0.0033, "step": 63510 }, { "epoch": 1.0393520412337396, "grad_norm": 0.18235932290554047, "learning_rate": 8.261170764369405e-06, "loss": 0.0017, "step": 63520 }, { "epoch": 1.0395156671848156, "grad_norm": 0.40467819571495056, "learning_rate": 8.26044911797908e-06, "loss": 0.0033, "step": 63530 }, { "epoch": 1.0396792931358914, "grad_norm": 0.16961003839969635, "learning_rate": 8.25972735340531e-06, "loss": 0.0027, "step": 63540 }, { "epoch": 1.0398429190869671, "grad_norm": 0.2214461714029312, "learning_rate": 8.259005470674257e-06, "loss": 0.0019, "step": 63550 }, { "epoch": 1.0400065450380431, "grad_norm": 0.14852003753185272, "learning_rate": 8.258283469812091e-06, "loss": 0.0059, "step": 63560 }, { "epoch": 1.040170170989119, "grad_norm": 0.09031914174556732, "learning_rate": 8.257561350844977e-06, "loss": 0.001, "step": 63570 }, { "epoch": 1.0403337969401947, "grad_norm": 0.08425884693861008, "learning_rate": 8.256839113799097e-06, "loss": 0.0059, "step": 63580 }, { "epoch": 1.0404974228912705, "grad_norm": 0.15355518460273743, "learning_rate": 8.256116758700625e-06, "loss": 0.0026, "step": 63590 }, { "epoch": 1.0406610488423464, "grad_norm": 0.13370011746883392, "learning_rate": 8.255394285575746e-06, "loss": 0.0019, "step": 63600 }, { "epoch": 1.0408246747934222, "grad_norm": 0.07177018374204636, "learning_rate": 8.25467169445065e-06, "loss": 0.0016, "step": 63610 }, { "epoch": 1.040988300744498, "grad_norm": 0.2018873542547226, "learning_rate": 8.253948985351525e-06, "loss": 0.0017, "step": 63620 }, { "epoch": 1.041151926695574, "grad_norm": 0.1159633919596672, "learning_rate": 8.253226158304571e-06, "loss": 0.0024, "step": 63630 }, { "epoch": 1.0413155526466498, "grad_norm": 0.08404206484556198, "learning_rate": 8.252503213335989e-06, "loss": 0.0031, "step": 63640 }, { "epoch": 1.0414791785977255, "grad_norm": 0.030515529215335846, "learning_rate": 8.251780150471981e-06, "loss": 0.0032, "step": 63650 }, { "epoch": 1.0416428045488015, "grad_norm": 0.238947793841362, "learning_rate": 8.251056969738757e-06, "loss": 0.0017, "step": 63660 }, { "epoch": 1.0418064304998773, "grad_norm": 0.18806204199790955, "learning_rate": 8.250333671162534e-06, "loss": 0.0041, "step": 63670 }, { "epoch": 1.041970056450953, "grad_norm": 0.21754586696624756, "learning_rate": 8.249610254769527e-06, "loss": 0.0013, "step": 63680 }, { "epoch": 1.0421336824020289, "grad_norm": 0.09268508851528168, "learning_rate": 8.248886720585958e-06, "loss": 0.0037, "step": 63690 }, { "epoch": 1.0422973083531049, "grad_norm": 0.2234266847372055, "learning_rate": 8.248163068638055e-06, "loss": 0.0024, "step": 63700 }, { "epoch": 1.0424609343041806, "grad_norm": 0.19320163130760193, "learning_rate": 8.247439298952044e-06, "loss": 0.0035, "step": 63710 }, { "epoch": 1.0426245602552564, "grad_norm": 0.04447995871305466, "learning_rate": 8.246715411554167e-06, "loss": 0.0028, "step": 63720 }, { "epoch": 1.0427881862063324, "grad_norm": 0.02140369638800621, "learning_rate": 8.245991406470658e-06, "loss": 0.0021, "step": 63730 }, { "epoch": 1.0429518121574082, "grad_norm": 0.043271198868751526, "learning_rate": 8.245267283727762e-06, "loss": 0.003, "step": 63740 }, { "epoch": 1.043115438108484, "grad_norm": 0.08963941782712936, "learning_rate": 8.244543043351729e-06, "loss": 0.0027, "step": 63750 }, { "epoch": 1.04327906405956, "grad_norm": 0.0702454149723053, "learning_rate": 8.243818685368805e-06, "loss": 0.0026, "step": 63760 }, { "epoch": 1.0434426900106357, "grad_norm": 0.0729159265756607, "learning_rate": 8.243094209805254e-06, "loss": 0.0038, "step": 63770 }, { "epoch": 1.0436063159617115, "grad_norm": 0.0551057867705822, "learning_rate": 8.242369616687329e-06, "loss": 0.0032, "step": 63780 }, { "epoch": 1.0437699419127873, "grad_norm": 0.1939677894115448, "learning_rate": 8.241644906041299e-06, "loss": 0.0061, "step": 63790 }, { "epoch": 1.0439335678638633, "grad_norm": 0.05500328168272972, "learning_rate": 8.240920077893433e-06, "loss": 0.0016, "step": 63800 }, { "epoch": 1.044097193814939, "grad_norm": 0.294474333524704, "learning_rate": 8.240195132270003e-06, "loss": 0.0024, "step": 63810 }, { "epoch": 1.0442608197660148, "grad_norm": 0.07188540697097778, "learning_rate": 8.239470069197289e-06, "loss": 0.0014, "step": 63820 }, { "epoch": 1.0444244457170908, "grad_norm": 0.19571511447429657, "learning_rate": 8.23874488870157e-06, "loss": 0.003, "step": 63830 }, { "epoch": 1.0445880716681666, "grad_norm": 0.06561805307865143, "learning_rate": 8.238019590809131e-06, "loss": 0.0014, "step": 63840 }, { "epoch": 1.0447516976192424, "grad_norm": 0.05515031889081001, "learning_rate": 8.237294175546266e-06, "loss": 0.0021, "step": 63850 }, { "epoch": 1.0449153235703184, "grad_norm": 0.12624773383140564, "learning_rate": 8.236568642939267e-06, "loss": 0.0023, "step": 63860 }, { "epoch": 1.0450789495213941, "grad_norm": 0.13351145386695862, "learning_rate": 8.235842993014434e-06, "loss": 0.0028, "step": 63870 }, { "epoch": 1.04524257547247, "grad_norm": 0.06946561485528946, "learning_rate": 8.235117225798069e-06, "loss": 0.0034, "step": 63880 }, { "epoch": 1.0454062014235457, "grad_norm": 0.20091894268989563, "learning_rate": 8.23439134131648e-06, "loss": 0.0021, "step": 63890 }, { "epoch": 1.0455698273746217, "grad_norm": 0.08368875831365585, "learning_rate": 8.23366533959598e-06, "loss": 0.002, "step": 63900 }, { "epoch": 1.0457334533256974, "grad_norm": 0.2012326419353485, "learning_rate": 8.232939220662881e-06, "loss": 0.003, "step": 63910 }, { "epoch": 1.0458970792767732, "grad_norm": 0.03791200742125511, "learning_rate": 8.232212984543507e-06, "loss": 0.0018, "step": 63920 }, { "epoch": 1.0460607052278492, "grad_norm": 0.08582803606987, "learning_rate": 8.231486631264181e-06, "loss": 0.003, "step": 63930 }, { "epoch": 1.046224331178925, "grad_norm": 0.23705624043941498, "learning_rate": 8.23076016085123e-06, "loss": 0.0055, "step": 63940 }, { "epoch": 1.0463879571300008, "grad_norm": 0.10906633734703064, "learning_rate": 8.230033573330989e-06, "loss": 0.0027, "step": 63950 }, { "epoch": 1.0465515830810768, "grad_norm": 0.12108849734067917, "learning_rate": 8.229306868729793e-06, "loss": 0.0035, "step": 63960 }, { "epoch": 1.0467152090321525, "grad_norm": 0.06064796447753906, "learning_rate": 8.228580047073986e-06, "loss": 0.0021, "step": 63970 }, { "epoch": 1.0468788349832283, "grad_norm": 0.10400663316249847, "learning_rate": 8.227853108389911e-06, "loss": 0.0025, "step": 63980 }, { "epoch": 1.047042460934304, "grad_norm": 0.170183464884758, "learning_rate": 8.22712605270392e-06, "loss": 0.0031, "step": 63990 }, { "epoch": 1.04720608688538, "grad_norm": 0.12702810764312744, "learning_rate": 8.226398880042364e-06, "loss": 0.0028, "step": 64000 }, { "epoch": 1.0473697128364559, "grad_norm": 0.23696675896644592, "learning_rate": 8.225671590431604e-06, "loss": 0.0018, "step": 64010 }, { "epoch": 1.0475333387875316, "grad_norm": 0.054465774446725845, "learning_rate": 8.224944183898002e-06, "loss": 0.0032, "step": 64020 }, { "epoch": 1.0476969647386076, "grad_norm": 0.06987177580595016, "learning_rate": 8.224216660467924e-06, "loss": 0.0013, "step": 64030 }, { "epoch": 1.0478605906896834, "grad_norm": 0.12339454144239426, "learning_rate": 8.223489020167741e-06, "loss": 0.0018, "step": 64040 }, { "epoch": 1.0480242166407592, "grad_norm": 0.17792271077632904, "learning_rate": 8.222761263023829e-06, "loss": 0.0014, "step": 64050 }, { "epoch": 1.048187842591835, "grad_norm": 0.046114180237054825, "learning_rate": 8.222033389062565e-06, "loss": 0.0026, "step": 64060 }, { "epoch": 1.048351468542911, "grad_norm": 0.04991060122847557, "learning_rate": 8.221305398310336e-06, "loss": 0.0018, "step": 64070 }, { "epoch": 1.0485150944939867, "grad_norm": 0.22806178033351898, "learning_rate": 8.220577290793529e-06, "loss": 0.0027, "step": 64080 }, { "epoch": 1.0486787204450625, "grad_norm": 0.0364716611802578, "learning_rate": 8.219849066538535e-06, "loss": 0.0013, "step": 64090 }, { "epoch": 1.0488423463961385, "grad_norm": 0.1451956331729889, "learning_rate": 8.21912072557175e-06, "loss": 0.0041, "step": 64100 }, { "epoch": 1.0490059723472143, "grad_norm": 0.07945800572633743, "learning_rate": 8.218392267919578e-06, "loss": 0.0038, "step": 64110 }, { "epoch": 1.04916959829829, "grad_norm": 0.09665296226739883, "learning_rate": 8.217663693608419e-06, "loss": 0.0026, "step": 64120 }, { "epoch": 1.049333224249366, "grad_norm": 0.0728873461484909, "learning_rate": 8.216935002664688e-06, "loss": 0.0017, "step": 64130 }, { "epoch": 1.0494968502004418, "grad_norm": 0.15877701342105865, "learning_rate": 8.216206195114792e-06, "loss": 0.002, "step": 64140 }, { "epoch": 1.0496604761515176, "grad_norm": 0.10207909345626831, "learning_rate": 8.215477270985152e-06, "loss": 0.0023, "step": 64150 }, { "epoch": 1.0498241021025936, "grad_norm": 0.08115428686141968, "learning_rate": 8.21474823030219e-06, "loss": 0.0019, "step": 64160 }, { "epoch": 1.0499877280536694, "grad_norm": 0.19729632139205933, "learning_rate": 8.21401907309233e-06, "loss": 0.0029, "step": 64170 }, { "epoch": 1.0501513540047451, "grad_norm": 0.13712078332901, "learning_rate": 8.213289799382002e-06, "loss": 0.0031, "step": 64180 }, { "epoch": 1.050314979955821, "grad_norm": 0.1926317662000656, "learning_rate": 8.212560409197645e-06, "loss": 0.0022, "step": 64190 }, { "epoch": 1.050478605906897, "grad_norm": 0.20843687653541565, "learning_rate": 8.211830902565693e-06, "loss": 0.003, "step": 64200 }, { "epoch": 1.0506422318579727, "grad_norm": 0.23143619298934937, "learning_rate": 8.21110127951259e-06, "loss": 0.0023, "step": 64210 }, { "epoch": 1.0508058578090484, "grad_norm": 0.31759482622146606, "learning_rate": 8.210371540064783e-06, "loss": 0.0033, "step": 64220 }, { "epoch": 1.0509694837601244, "grad_norm": 0.053235169500112534, "learning_rate": 8.209641684248723e-06, "loss": 0.0014, "step": 64230 }, { "epoch": 1.0511331097112002, "grad_norm": 0.07178881764411926, "learning_rate": 8.208911712090868e-06, "loss": 0.0022, "step": 64240 }, { "epoch": 1.051296735662276, "grad_norm": 0.12044107913970947, "learning_rate": 8.208181623617675e-06, "loss": 0.0026, "step": 64250 }, { "epoch": 1.0514603616133518, "grad_norm": 0.1554144322872162, "learning_rate": 8.207451418855607e-06, "loss": 0.0027, "step": 64260 }, { "epoch": 1.0516239875644278, "grad_norm": 0.05538433417677879, "learning_rate": 8.206721097831136e-06, "loss": 0.0021, "step": 64270 }, { "epoch": 1.0517876135155035, "grad_norm": 0.10453752428293228, "learning_rate": 8.205990660570733e-06, "loss": 0.0018, "step": 64280 }, { "epoch": 1.0519512394665793, "grad_norm": 0.08280295878648758, "learning_rate": 8.205260107100872e-06, "loss": 0.0022, "step": 64290 }, { "epoch": 1.0521148654176553, "grad_norm": 0.08725742250680923, "learning_rate": 8.204529437448036e-06, "loss": 0.0026, "step": 64300 }, { "epoch": 1.052278491368731, "grad_norm": 0.0965590700507164, "learning_rate": 8.203798651638713e-06, "loss": 0.0026, "step": 64310 }, { "epoch": 1.0524421173198069, "grad_norm": 0.23796500265598297, "learning_rate": 8.203067749699384e-06, "loss": 0.0014, "step": 64320 }, { "epoch": 1.0526057432708829, "grad_norm": 0.020064018666744232, "learning_rate": 8.202336731656551e-06, "loss": 0.0029, "step": 64330 }, { "epoch": 1.0527693692219586, "grad_norm": 0.08776815235614777, "learning_rate": 8.201605597536707e-06, "loss": 0.0017, "step": 64340 }, { "epoch": 1.0529329951730344, "grad_norm": 0.12225066125392914, "learning_rate": 8.200874347366354e-06, "loss": 0.0023, "step": 64350 }, { "epoch": 1.0530966211241102, "grad_norm": 0.29338234663009644, "learning_rate": 8.200142981172001e-06, "loss": 0.002, "step": 64360 }, { "epoch": 1.0532602470751862, "grad_norm": 0.15350478887557983, "learning_rate": 8.199411498980155e-06, "loss": 0.0023, "step": 64370 }, { "epoch": 1.053423873026262, "grad_norm": 0.2017962485551834, "learning_rate": 8.198679900817333e-06, "loss": 0.0017, "step": 64380 }, { "epoch": 1.0535874989773377, "grad_norm": 0.011517062783241272, "learning_rate": 8.197948186710049e-06, "loss": 0.0018, "step": 64390 }, { "epoch": 1.0537511249284137, "grad_norm": 0.25941595435142517, "learning_rate": 8.197216356684833e-06, "loss": 0.002, "step": 64400 }, { "epoch": 1.0539147508794895, "grad_norm": 0.32127970457077026, "learning_rate": 8.196484410768206e-06, "loss": 0.003, "step": 64410 }, { "epoch": 1.0540783768305653, "grad_norm": 0.27373647689819336, "learning_rate": 8.195752348986703e-06, "loss": 0.0031, "step": 64420 }, { "epoch": 1.0542420027816413, "grad_norm": 0.1393657773733139, "learning_rate": 8.195020171366857e-06, "loss": 0.0024, "step": 64430 }, { "epoch": 1.054405628732717, "grad_norm": 0.18474701046943665, "learning_rate": 8.194287877935211e-06, "loss": 0.0017, "step": 64440 }, { "epoch": 1.0545692546837928, "grad_norm": 0.09622348099946976, "learning_rate": 8.193555468718306e-06, "loss": 0.0021, "step": 64450 }, { "epoch": 1.0547328806348686, "grad_norm": 0.14394673705101013, "learning_rate": 8.192822943742689e-06, "loss": 0.0021, "step": 64460 }, { "epoch": 1.0548965065859446, "grad_norm": 0.13619962334632874, "learning_rate": 8.192090303034916e-06, "loss": 0.002, "step": 64470 }, { "epoch": 1.0550601325370204, "grad_norm": 0.04721641167998314, "learning_rate": 8.19135754662154e-06, "loss": 0.0032, "step": 64480 }, { "epoch": 1.0552237584880961, "grad_norm": 0.1592308133840561, "learning_rate": 8.190624674529124e-06, "loss": 0.0015, "step": 64490 }, { "epoch": 1.0553873844391721, "grad_norm": 0.22303423285484314, "learning_rate": 8.189891686784232e-06, "loss": 0.0015, "step": 64500 }, { "epoch": 1.055551010390248, "grad_norm": 0.12353459745645523, "learning_rate": 8.189158583413432e-06, "loss": 0.0015, "step": 64510 }, { "epoch": 1.0557146363413237, "grad_norm": 0.1472569704055786, "learning_rate": 8.1884253644433e-06, "loss": 0.0037, "step": 64520 }, { "epoch": 1.0558782622923997, "grad_norm": 0.09601698815822601, "learning_rate": 8.18769202990041e-06, "loss": 0.0024, "step": 64530 }, { "epoch": 1.0560418882434754, "grad_norm": 0.08635096251964569, "learning_rate": 8.186958579811347e-06, "loss": 0.0022, "step": 64540 }, { "epoch": 1.0562055141945512, "grad_norm": 0.06166418641805649, "learning_rate": 8.186225014202694e-06, "loss": 0.0017, "step": 64550 }, { "epoch": 1.056369140145627, "grad_norm": 0.05661597102880478, "learning_rate": 8.185491333101043e-06, "loss": 0.0025, "step": 64560 }, { "epoch": 1.056532766096703, "grad_norm": 0.15421047806739807, "learning_rate": 8.184757536532985e-06, "loss": 0.0027, "step": 64570 }, { "epoch": 1.0566963920477788, "grad_norm": 0.20517005026340485, "learning_rate": 8.184023624525123e-06, "loss": 0.0026, "step": 64580 }, { "epoch": 1.0568600179988545, "grad_norm": 0.22030770778656006, "learning_rate": 8.183289597104058e-06, "loss": 0.0024, "step": 64590 }, { "epoch": 1.0570236439499305, "grad_norm": 0.05827048048377037, "learning_rate": 8.182555454296394e-06, "loss": 0.0014, "step": 64600 }, { "epoch": 1.0571872699010063, "grad_norm": 0.14805006980895996, "learning_rate": 8.181821196128745e-06, "loss": 0.0021, "step": 64610 }, { "epoch": 1.057350895852082, "grad_norm": 0.13481366634368896, "learning_rate": 8.181086822627724e-06, "loss": 0.0022, "step": 64620 }, { "epoch": 1.057514521803158, "grad_norm": 0.07633001357316971, "learning_rate": 8.180352333819953e-06, "loss": 0.0031, "step": 64630 }, { "epoch": 1.0576781477542339, "grad_norm": 0.19227145612239838, "learning_rate": 8.17961772973205e-06, "loss": 0.002, "step": 64640 }, { "epoch": 1.0578417737053096, "grad_norm": 0.038855452090501785, "learning_rate": 8.17888301039065e-06, "loss": 0.0021, "step": 64650 }, { "epoch": 1.0580053996563854, "grad_norm": 0.15322133898735046, "learning_rate": 8.17814817582238e-06, "loss": 0.0021, "step": 64660 }, { "epoch": 1.0581690256074614, "grad_norm": 0.1207018569111824, "learning_rate": 8.177413226053876e-06, "loss": 0.0015, "step": 64670 }, { "epoch": 1.0583326515585372, "grad_norm": 0.05391998961567879, "learning_rate": 8.17667816111178e-06, "loss": 0.0022, "step": 64680 }, { "epoch": 1.058496277509613, "grad_norm": 0.27931055426597595, "learning_rate": 8.175942981022737e-06, "loss": 0.0029, "step": 64690 }, { "epoch": 1.058659903460689, "grad_norm": 0.045851610600948334, "learning_rate": 8.175207685813392e-06, "loss": 0.0021, "step": 64700 }, { "epoch": 1.0588235294117647, "grad_norm": 0.07769758254289627, "learning_rate": 8.1744722755104e-06, "loss": 0.0018, "step": 64710 }, { "epoch": 1.0589871553628405, "grad_norm": 0.13252119719982147, "learning_rate": 8.17373675014042e-06, "loss": 0.0026, "step": 64720 }, { "epoch": 1.0591507813139165, "grad_norm": 0.3611391484737396, "learning_rate": 8.173001109730107e-06, "loss": 0.0018, "step": 64730 }, { "epoch": 1.0593144072649923, "grad_norm": 0.11499658226966858, "learning_rate": 8.172265354306132e-06, "loss": 0.0018, "step": 64740 }, { "epoch": 1.059478033216068, "grad_norm": 0.3381210267543793, "learning_rate": 8.171529483895161e-06, "loss": 0.0032, "step": 64750 }, { "epoch": 1.0596416591671438, "grad_norm": 0.0493021160364151, "learning_rate": 8.170793498523871e-06, "loss": 0.0025, "step": 64760 }, { "epoch": 1.0598052851182198, "grad_norm": 0.17737528681755066, "learning_rate": 8.170057398218936e-06, "loss": 0.0023, "step": 64770 }, { "epoch": 1.0599689110692956, "grad_norm": 0.01416482962667942, "learning_rate": 8.169321183007039e-06, "loss": 0.0027, "step": 64780 }, { "epoch": 1.0601325370203714, "grad_norm": 0.18234539031982422, "learning_rate": 8.168584852914869e-06, "loss": 0.0018, "step": 64790 }, { "epoch": 1.0602961629714474, "grad_norm": 0.049760852009058, "learning_rate": 8.16784840796911e-06, "loss": 0.0021, "step": 64800 }, { "epoch": 1.0604597889225231, "grad_norm": 0.11862653493881226, "learning_rate": 8.167111848196462e-06, "loss": 0.0037, "step": 64810 }, { "epoch": 1.060623414873599, "grad_norm": 0.021947648376226425, "learning_rate": 8.166375173623621e-06, "loss": 0.0042, "step": 64820 }, { "epoch": 1.060787040824675, "grad_norm": 0.20204272866249084, "learning_rate": 8.16563838427729e-06, "loss": 0.0023, "step": 64830 }, { "epoch": 1.0609506667757507, "grad_norm": 0.2470373958349228, "learning_rate": 8.164901480184177e-06, "loss": 0.0031, "step": 64840 }, { "epoch": 1.0611142927268264, "grad_norm": 0.10044754296541214, "learning_rate": 8.164164461370991e-06, "loss": 0.0033, "step": 64850 }, { "epoch": 1.0612779186779022, "grad_norm": 0.047854505479335785, "learning_rate": 8.163427327864449e-06, "loss": 0.0029, "step": 64860 }, { "epoch": 1.0614415446289782, "grad_norm": 0.1003831997513771, "learning_rate": 8.162690079691269e-06, "loss": 0.0019, "step": 64870 }, { "epoch": 1.061605170580054, "grad_norm": 0.11221947520971298, "learning_rate": 8.161952716878174e-06, "loss": 0.0017, "step": 64880 }, { "epoch": 1.0617687965311298, "grad_norm": 0.2193416953086853, "learning_rate": 8.161215239451893e-06, "loss": 0.0029, "step": 64890 }, { "epoch": 1.0619324224822058, "grad_norm": 0.016209201887249947, "learning_rate": 8.160477647439159e-06, "loss": 0.002, "step": 64900 }, { "epoch": 1.0620960484332815, "grad_norm": 0.2978039085865021, "learning_rate": 8.159739940866704e-06, "loss": 0.0022, "step": 64910 }, { "epoch": 1.0622596743843573, "grad_norm": 0.07247889041900635, "learning_rate": 8.15900211976127e-06, "loss": 0.0021, "step": 64920 }, { "epoch": 1.0624233003354333, "grad_norm": 0.15218892693519592, "learning_rate": 8.158264184149602e-06, "loss": 0.0019, "step": 64930 }, { "epoch": 1.062586926286509, "grad_norm": 0.13460765779018402, "learning_rate": 8.157526134058452e-06, "loss": 0.0018, "step": 64940 }, { "epoch": 1.0627505522375849, "grad_norm": 0.11842235177755356, "learning_rate": 8.156787969514564e-06, "loss": 0.0041, "step": 64950 }, { "epoch": 1.0629141781886606, "grad_norm": 0.1161569207906723, "learning_rate": 8.156049690544701e-06, "loss": 0.0028, "step": 64960 }, { "epoch": 1.0630778041397366, "grad_norm": 0.09794294834136963, "learning_rate": 8.15531129717562e-06, "loss": 0.0021, "step": 64970 }, { "epoch": 1.0632414300908124, "grad_norm": 0.06227337196469307, "learning_rate": 8.15457278943409e-06, "loss": 0.0025, "step": 64980 }, { "epoch": 1.0634050560418882, "grad_norm": 0.11521907150745392, "learning_rate": 8.15383416734688e-06, "loss": 0.0018, "step": 64990 }, { "epoch": 1.0635686819929642, "grad_norm": 0.15379422903060913, "learning_rate": 8.153095430940758e-06, "loss": 0.0025, "step": 65000 }, { "epoch": 1.06373230794404, "grad_norm": 0.14501148462295532, "learning_rate": 8.152356580242507e-06, "loss": 0.0032, "step": 65010 }, { "epoch": 1.0638959338951157, "grad_norm": 0.10077831894159317, "learning_rate": 8.151617615278907e-06, "loss": 0.003, "step": 65020 }, { "epoch": 1.0640595598461915, "grad_norm": 0.1052640825510025, "learning_rate": 8.15087853607674e-06, "loss": 0.0026, "step": 65030 }, { "epoch": 1.0642231857972675, "grad_norm": 0.05653892830014229, "learning_rate": 8.150139342662801e-06, "loss": 0.0038, "step": 65040 }, { "epoch": 1.0643868117483433, "grad_norm": 0.11214754730463028, "learning_rate": 8.149400035063883e-06, "loss": 0.0021, "step": 65050 }, { "epoch": 1.064550437699419, "grad_norm": 0.11909761279821396, "learning_rate": 8.148660613306783e-06, "loss": 0.0022, "step": 65060 }, { "epoch": 1.064714063650495, "grad_norm": 0.08854610472917557, "learning_rate": 8.147921077418301e-06, "loss": 0.002, "step": 65070 }, { "epoch": 1.0648776896015708, "grad_norm": 0.18064527213573456, "learning_rate": 8.147181427425248e-06, "loss": 0.0027, "step": 65080 }, { "epoch": 1.0650413155526466, "grad_norm": 0.029910102486610413, "learning_rate": 8.146441663354431e-06, "loss": 0.0015, "step": 65090 }, { "epoch": 1.0652049415037226, "grad_norm": 0.26644033193588257, "learning_rate": 8.145701785232666e-06, "loss": 0.0031, "step": 65100 }, { "epoch": 1.0653685674547984, "grad_norm": 0.3423719108104706, "learning_rate": 8.144961793086775e-06, "loss": 0.0016, "step": 65110 }, { "epoch": 1.0655321934058741, "grad_norm": 0.13085444271564484, "learning_rate": 8.144221686943574e-06, "loss": 0.0039, "step": 65120 }, { "epoch": 1.0656958193569501, "grad_norm": 0.11731607466936111, "learning_rate": 8.143481466829895e-06, "loss": 0.0016, "step": 65130 }, { "epoch": 1.065859445308026, "grad_norm": 0.11845797300338745, "learning_rate": 8.142741132772567e-06, "loss": 0.0025, "step": 65140 }, { "epoch": 1.0660230712591017, "grad_norm": 0.13626961410045624, "learning_rate": 8.142000684798427e-06, "loss": 0.0027, "step": 65150 }, { "epoch": 1.0661866972101774, "grad_norm": 0.08413999527692795, "learning_rate": 8.141260122934313e-06, "loss": 0.0019, "step": 65160 }, { "epoch": 1.0663503231612534, "grad_norm": 0.12726691365242004, "learning_rate": 8.140519447207072e-06, "loss": 0.0019, "step": 65170 }, { "epoch": 1.0665139491123292, "grad_norm": 0.1418721228837967, "learning_rate": 8.139778657643547e-06, "loss": 0.0036, "step": 65180 }, { "epoch": 1.066677575063405, "grad_norm": 0.16693459451198578, "learning_rate": 8.13903775427059e-06, "loss": 0.0028, "step": 65190 }, { "epoch": 1.066841201014481, "grad_norm": 0.18969066441059113, "learning_rate": 8.138296737115063e-06, "loss": 0.0031, "step": 65200 }, { "epoch": 1.0670048269655568, "grad_norm": 0.1474536806344986, "learning_rate": 8.13755560620382e-06, "loss": 0.0019, "step": 65210 }, { "epoch": 1.0671684529166325, "grad_norm": 0.01634121686220169, "learning_rate": 8.136814361563728e-06, "loss": 0.0027, "step": 65220 }, { "epoch": 1.0673320788677083, "grad_norm": 0.05115247890353203, "learning_rate": 8.136073003221654e-06, "loss": 0.0023, "step": 65230 }, { "epoch": 1.0674957048187843, "grad_norm": 0.4415786564350128, "learning_rate": 8.13533153120447e-06, "loss": 0.0018, "step": 65240 }, { "epoch": 1.06765933076986, "grad_norm": 0.06870345026254654, "learning_rate": 8.134589945539055e-06, "loss": 0.0037, "step": 65250 }, { "epoch": 1.0678229567209359, "grad_norm": 0.0712616816163063, "learning_rate": 8.133848246252288e-06, "loss": 0.0026, "step": 65260 }, { "epoch": 1.0679865826720119, "grad_norm": 0.11226426810026169, "learning_rate": 8.133106433371055e-06, "loss": 0.0024, "step": 65270 }, { "epoch": 1.0681502086230876, "grad_norm": 0.08850929141044617, "learning_rate": 8.132364506922241e-06, "loss": 0.0029, "step": 65280 }, { "epoch": 1.0683138345741634, "grad_norm": 0.226271390914917, "learning_rate": 8.131622466932744e-06, "loss": 0.0031, "step": 65290 }, { "epoch": 1.0684774605252394, "grad_norm": 0.10922946035861969, "learning_rate": 8.130880313429459e-06, "loss": 0.0033, "step": 65300 }, { "epoch": 1.0686410864763152, "grad_norm": 0.14066576957702637, "learning_rate": 8.130138046439287e-06, "loss": 0.0031, "step": 65310 }, { "epoch": 1.068804712427391, "grad_norm": 0.14621934294700623, "learning_rate": 8.129395665989135e-06, "loss": 0.0022, "step": 65320 }, { "epoch": 1.068968338378467, "grad_norm": 0.4749991297721863, "learning_rate": 8.128653172105911e-06, "loss": 0.0054, "step": 65330 }, { "epoch": 1.0691319643295427, "grad_norm": 0.08464115858078003, "learning_rate": 8.127910564816527e-06, "loss": 0.0037, "step": 65340 }, { "epoch": 1.0692955902806185, "grad_norm": 0.20063425600528717, "learning_rate": 8.127167844147908e-06, "loss": 0.0021, "step": 65350 }, { "epoch": 1.0694592162316943, "grad_norm": 0.059448108077049255, "learning_rate": 8.126425010126966e-06, "loss": 0.0021, "step": 65360 }, { "epoch": 1.0696228421827703, "grad_norm": 0.17158937454223633, "learning_rate": 8.125682062780634e-06, "loss": 0.0022, "step": 65370 }, { "epoch": 1.069786468133846, "grad_norm": 0.1435229331254959, "learning_rate": 8.124939002135837e-06, "loss": 0.0022, "step": 65380 }, { "epoch": 1.0699500940849218, "grad_norm": 0.29025959968566895, "learning_rate": 8.124195828219513e-06, "loss": 0.0027, "step": 65390 }, { "epoch": 1.0701137200359978, "grad_norm": 0.09354247897863388, "learning_rate": 8.1234525410586e-06, "loss": 0.0029, "step": 65400 }, { "epoch": 1.0702773459870736, "grad_norm": 0.27978625893592834, "learning_rate": 8.122709140680039e-06, "loss": 0.0029, "step": 65410 }, { "epoch": 1.0704409719381494, "grad_norm": 0.0676610991358757, "learning_rate": 8.121965627110775e-06, "loss": 0.0056, "step": 65420 }, { "epoch": 1.0706045978892251, "grad_norm": 0.23127678036689758, "learning_rate": 8.121222000377763e-06, "loss": 0.0033, "step": 65430 }, { "epoch": 1.0707682238403011, "grad_norm": 0.22223244607448578, "learning_rate": 8.120478260507954e-06, "loss": 0.003, "step": 65440 }, { "epoch": 1.070931849791377, "grad_norm": 0.1291331946849823, "learning_rate": 8.119734407528308e-06, "loss": 0.0021, "step": 65450 }, { "epoch": 1.0710954757424527, "grad_norm": 0.18976299464702606, "learning_rate": 8.118990441465789e-06, "loss": 0.0024, "step": 65460 }, { "epoch": 1.0712591016935287, "grad_norm": 0.09180238097906113, "learning_rate": 8.118246362347361e-06, "loss": 0.0043, "step": 65470 }, { "epoch": 1.0714227276446044, "grad_norm": 0.11240053176879883, "learning_rate": 8.117502170199997e-06, "loss": 0.002, "step": 65480 }, { "epoch": 1.0715863535956802, "grad_norm": 0.061869967728853226, "learning_rate": 8.116757865050674e-06, "loss": 0.0021, "step": 65490 }, { "epoch": 1.0717499795467562, "grad_norm": 0.19311775267124176, "learning_rate": 8.116013446926367e-06, "loss": 0.0027, "step": 65500 }, { "epoch": 1.071913605497832, "grad_norm": 0.06645571440458298, "learning_rate": 8.115268915854062e-06, "loss": 0.0025, "step": 65510 }, { "epoch": 1.0720772314489078, "grad_norm": 0.14953108131885529, "learning_rate": 8.114524271860746e-06, "loss": 0.0024, "step": 65520 }, { "epoch": 1.0722408573999835, "grad_norm": 0.06905048340559006, "learning_rate": 8.113779514973412e-06, "loss": 0.0016, "step": 65530 }, { "epoch": 1.0724044833510595, "grad_norm": 0.12818819284439087, "learning_rate": 8.113034645219053e-06, "loss": 0.0029, "step": 65540 }, { "epoch": 1.0725681093021353, "grad_norm": 0.5096604824066162, "learning_rate": 8.112289662624673e-06, "loss": 0.004, "step": 65550 }, { "epoch": 1.072731735253211, "grad_norm": 0.19868259131908417, "learning_rate": 8.11154456721727e-06, "loss": 0.0031, "step": 65560 }, { "epoch": 1.072895361204287, "grad_norm": 0.007214093115180731, "learning_rate": 8.110799359023855e-06, "loss": 0.0028, "step": 65570 }, { "epoch": 1.0730589871553629, "grad_norm": 0.05731329321861267, "learning_rate": 8.11005403807144e-06, "loss": 0.0015, "step": 65580 }, { "epoch": 1.0732226131064386, "grad_norm": 0.14137881994247437, "learning_rate": 8.109308604387043e-06, "loss": 0.0025, "step": 65590 }, { "epoch": 1.0733862390575146, "grad_norm": 0.29019469022750854, "learning_rate": 8.10856305799768e-06, "loss": 0.0017, "step": 65600 }, { "epoch": 1.0735498650085904, "grad_norm": 0.11052026599645615, "learning_rate": 8.107817398930377e-06, "loss": 0.0018, "step": 65610 }, { "epoch": 1.0737134909596662, "grad_norm": 0.24654839932918549, "learning_rate": 8.107071627212166e-06, "loss": 0.0036, "step": 65620 }, { "epoch": 1.073877116910742, "grad_norm": 0.08177804201841354, "learning_rate": 8.106325742870072e-06, "loss": 0.0035, "step": 65630 }, { "epoch": 1.074040742861818, "grad_norm": 0.1591581553220749, "learning_rate": 8.105579745931139e-06, "loss": 0.004, "step": 65640 }, { "epoch": 1.0742043688128937, "grad_norm": 0.17649294435977936, "learning_rate": 8.104833636422402e-06, "loss": 0.0028, "step": 65650 }, { "epoch": 1.0743679947639695, "grad_norm": 0.05311176925897598, "learning_rate": 8.104087414370908e-06, "loss": 0.0022, "step": 65660 }, { "epoch": 1.0745316207150455, "grad_norm": 0.22333522140979767, "learning_rate": 8.103341079803707e-06, "loss": 0.0028, "step": 65670 }, { "epoch": 1.0746952466661213, "grad_norm": 0.13097503781318665, "learning_rate": 8.10259463274785e-06, "loss": 0.0027, "step": 65680 }, { "epoch": 1.074858872617197, "grad_norm": 0.04455450549721718, "learning_rate": 8.101848073230393e-06, "loss": 0.0037, "step": 65690 }, { "epoch": 1.075022498568273, "grad_norm": 0.2615099251270294, "learning_rate": 8.101101401278402e-06, "loss": 0.0027, "step": 65700 }, { "epoch": 1.0751861245193488, "grad_norm": 0.15217532217502594, "learning_rate": 8.100354616918936e-06, "loss": 0.0016, "step": 65710 }, { "epoch": 1.0753497504704246, "grad_norm": 0.07289324700832367, "learning_rate": 8.099607720179067e-06, "loss": 0.002, "step": 65720 }, { "epoch": 1.0755133764215004, "grad_norm": 0.2076735496520996, "learning_rate": 8.098860711085867e-06, "loss": 0.0023, "step": 65730 }, { "epoch": 1.0756770023725764, "grad_norm": 0.08935001492500305, "learning_rate": 8.098113589666416e-06, "loss": 0.0018, "step": 65740 }, { "epoch": 1.0758406283236521, "grad_norm": 0.2966357469558716, "learning_rate": 8.097366355947793e-06, "loss": 0.0019, "step": 65750 }, { "epoch": 1.076004254274728, "grad_norm": 0.29122012853622437, "learning_rate": 8.096619009957082e-06, "loss": 0.0018, "step": 65760 }, { "epoch": 1.076167880225804, "grad_norm": 0.11934593319892883, "learning_rate": 8.095871551721377e-06, "loss": 0.0018, "step": 65770 }, { "epoch": 1.0763315061768797, "grad_norm": 0.4460866451263428, "learning_rate": 8.095123981267767e-06, "loss": 0.003, "step": 65780 }, { "epoch": 1.0764951321279554, "grad_norm": 0.1869867444038391, "learning_rate": 8.094376298623354e-06, "loss": 0.0024, "step": 65790 }, { "epoch": 1.0766587580790312, "grad_norm": 0.07907191663980484, "learning_rate": 8.093628503815236e-06, "loss": 0.0024, "step": 65800 }, { "epoch": 1.0768223840301072, "grad_norm": 0.08090656250715256, "learning_rate": 8.092880596870518e-06, "loss": 0.0017, "step": 65810 }, { "epoch": 1.076986009981183, "grad_norm": 0.9056613445281982, "learning_rate": 8.092132577816314e-06, "loss": 0.0044, "step": 65820 }, { "epoch": 1.0771496359322588, "grad_norm": 0.3343521058559418, "learning_rate": 8.091384446679736e-06, "loss": 0.0036, "step": 65830 }, { "epoch": 1.0773132618833348, "grad_norm": 0.02946481667459011, "learning_rate": 8.090636203487902e-06, "loss": 0.0022, "step": 65840 }, { "epoch": 1.0774768878344105, "grad_norm": 0.2516515254974365, "learning_rate": 8.089887848267933e-06, "loss": 0.0026, "step": 65850 }, { "epoch": 1.0776405137854863, "grad_norm": 0.007942191325128078, "learning_rate": 8.089139381046955e-06, "loss": 0.0012, "step": 65860 }, { "epoch": 1.0778041397365623, "grad_norm": 0.05471820384263992, "learning_rate": 8.088390801852101e-06, "loss": 0.0011, "step": 65870 }, { "epoch": 1.077967765687638, "grad_norm": 0.34158727526664734, "learning_rate": 8.087642110710501e-06, "loss": 0.0034, "step": 65880 }, { "epoch": 1.0781313916387139, "grad_norm": 0.14389953017234802, "learning_rate": 8.086893307649296e-06, "loss": 0.002, "step": 65890 }, { "epoch": 1.0782950175897899, "grad_norm": 0.1541469395160675, "learning_rate": 8.086144392695629e-06, "loss": 0.0034, "step": 65900 }, { "epoch": 1.0784586435408656, "grad_norm": 0.15493831038475037, "learning_rate": 8.085395365876644e-06, "loss": 0.0021, "step": 65910 }, { "epoch": 1.0786222694919414, "grad_norm": 0.09754495322704315, "learning_rate": 8.084646227219493e-06, "loss": 0.0029, "step": 65920 }, { "epoch": 1.0787858954430172, "grad_norm": 0.10205633193254471, "learning_rate": 8.083896976751332e-06, "loss": 0.0028, "step": 65930 }, { "epoch": 1.0789495213940932, "grad_norm": 0.12074751406908035, "learning_rate": 8.083147614499314e-06, "loss": 0.0036, "step": 65940 }, { "epoch": 1.079113147345169, "grad_norm": 0.07306011766195297, "learning_rate": 8.082398140490608e-06, "loss": 0.0025, "step": 65950 }, { "epoch": 1.0792767732962447, "grad_norm": 0.3032417297363281, "learning_rate": 8.081648554752374e-06, "loss": 0.005, "step": 65960 }, { "epoch": 1.0794403992473207, "grad_norm": 0.10294811427593231, "learning_rate": 8.080898857311789e-06, "loss": 0.0019, "step": 65970 }, { "epoch": 1.0796040251983965, "grad_norm": 0.0685034766793251, "learning_rate": 8.080149048196027e-06, "loss": 0.0014, "step": 65980 }, { "epoch": 1.0797676511494723, "grad_norm": 0.14835114777088165, "learning_rate": 8.079399127432263e-06, "loss": 0.0031, "step": 65990 }, { "epoch": 1.079931277100548, "grad_norm": 0.005194514058530331, "learning_rate": 8.078649095047681e-06, "loss": 0.0018, "step": 66000 }, { "epoch": 1.080094903051624, "grad_norm": 0.11142878979444504, "learning_rate": 8.07789895106947e-06, "loss": 0.0017, "step": 66010 }, { "epoch": 1.0802585290026998, "grad_norm": 0.18695776164531708, "learning_rate": 8.077148695524819e-06, "loss": 0.0029, "step": 66020 }, { "epoch": 1.0804221549537756, "grad_norm": 0.05895831808447838, "learning_rate": 8.076398328440922e-06, "loss": 0.0019, "step": 66030 }, { "epoch": 1.0805857809048516, "grad_norm": 0.2404588907957077, "learning_rate": 8.075647849844981e-06, "loss": 0.0025, "step": 66040 }, { "epoch": 1.0807494068559274, "grad_norm": 0.07785984873771667, "learning_rate": 8.074897259764198e-06, "loss": 0.002, "step": 66050 }, { "epoch": 1.0809130328070031, "grad_norm": 0.10359875857830048, "learning_rate": 8.07414655822578e-06, "loss": 0.0016, "step": 66060 }, { "epoch": 1.0810766587580791, "grad_norm": 0.04366911202669144, "learning_rate": 8.073395745256937e-06, "loss": 0.0024, "step": 66070 }, { "epoch": 1.081240284709155, "grad_norm": 0.12163852900266647, "learning_rate": 8.072644820884884e-06, "loss": 0.0022, "step": 66080 }, { "epoch": 1.0814039106602307, "grad_norm": 0.1838352084159851, "learning_rate": 8.071893785136842e-06, "loss": 0.0035, "step": 66090 }, { "epoch": 1.0815675366113067, "grad_norm": 0.088449627161026, "learning_rate": 8.071142638040035e-06, "loss": 0.0021, "step": 66100 }, { "epoch": 1.0817311625623824, "grad_norm": 0.045412205159664154, "learning_rate": 8.070391379621687e-06, "loss": 0.0021, "step": 66110 }, { "epoch": 1.0818947885134582, "grad_norm": 0.3884061276912689, "learning_rate": 8.06964000990903e-06, "loss": 0.0055, "step": 66120 }, { "epoch": 1.082058414464534, "grad_norm": 0.08041780441999435, "learning_rate": 8.068888528929303e-06, "loss": 0.0017, "step": 66130 }, { "epoch": 1.08222204041561, "grad_norm": 0.2716643214225769, "learning_rate": 8.06813693670974e-06, "loss": 0.0023, "step": 66140 }, { "epoch": 1.0823856663666858, "grad_norm": 0.17752836644649506, "learning_rate": 8.067385233277589e-06, "loss": 0.0028, "step": 66150 }, { "epoch": 1.0825492923177615, "grad_norm": 0.3403341472148895, "learning_rate": 8.066633418660095e-06, "loss": 0.0038, "step": 66160 }, { "epoch": 1.0827129182688375, "grad_norm": 0.06830795854330063, "learning_rate": 8.06588149288451e-06, "loss": 0.0016, "step": 66170 }, { "epoch": 1.0828765442199133, "grad_norm": 0.15338610112667084, "learning_rate": 8.065129455978087e-06, "loss": 0.0042, "step": 66180 }, { "epoch": 1.083040170170989, "grad_norm": 0.06503737717866898, "learning_rate": 8.064377307968092e-06, "loss": 0.0025, "step": 66190 }, { "epoch": 1.0832037961220649, "grad_norm": 0.3710920512676239, "learning_rate": 8.063625048881782e-06, "loss": 0.0023, "step": 66200 }, { "epoch": 1.0833674220731409, "grad_norm": 0.1289042979478836, "learning_rate": 8.062872678746429e-06, "loss": 0.0025, "step": 66210 }, { "epoch": 1.0835310480242166, "grad_norm": 0.06448682397603989, "learning_rate": 8.062120197589303e-06, "loss": 0.002, "step": 66220 }, { "epoch": 1.0836946739752924, "grad_norm": 0.3412668704986572, "learning_rate": 8.061367605437677e-06, "loss": 0.0024, "step": 66230 }, { "epoch": 1.0838582999263684, "grad_norm": 0.19381700456142426, "learning_rate": 8.060614902318835e-06, "loss": 0.0015, "step": 66240 }, { "epoch": 1.0840219258774442, "grad_norm": 0.0045209359377622604, "learning_rate": 8.05986208826006e-06, "loss": 0.0027, "step": 66250 }, { "epoch": 1.08418555182852, "grad_norm": 0.12231379002332687, "learning_rate": 8.059109163288637e-06, "loss": 0.0024, "step": 66260 }, { "epoch": 1.084349177779596, "grad_norm": 0.26199087500572205, "learning_rate": 8.058356127431858e-06, "loss": 0.0017, "step": 66270 }, { "epoch": 1.0845128037306717, "grad_norm": 0.07341767102479935, "learning_rate": 8.057602980717021e-06, "loss": 0.002, "step": 66280 }, { "epoch": 1.0846764296817475, "grad_norm": 0.01413731463253498, "learning_rate": 8.056849723171426e-06, "loss": 0.0029, "step": 66290 }, { "epoch": 1.0848400556328233, "grad_norm": 0.2918248772621155, "learning_rate": 8.056096354822374e-06, "loss": 0.0024, "step": 66300 }, { "epoch": 1.0850036815838993, "grad_norm": 0.10700853168964386, "learning_rate": 8.055342875697175e-06, "loss": 0.0022, "step": 66310 }, { "epoch": 1.085167307534975, "grad_norm": 0.5969021320343018, "learning_rate": 8.054589285823137e-06, "loss": 0.005, "step": 66320 }, { "epoch": 1.0853309334860508, "grad_norm": 0.1458989828824997, "learning_rate": 8.053835585227582e-06, "loss": 0.0019, "step": 66330 }, { "epoch": 1.0854945594371268, "grad_norm": 0.07172498106956482, "learning_rate": 8.053081773937828e-06, "loss": 0.0026, "step": 66340 }, { "epoch": 1.0856581853882026, "grad_norm": 0.14115053415298462, "learning_rate": 8.052327851981195e-06, "loss": 0.0022, "step": 66350 }, { "epoch": 1.0858218113392784, "grad_norm": 0.18594765663146973, "learning_rate": 8.051573819385015e-06, "loss": 0.002, "step": 66360 }, { "epoch": 1.0859854372903543, "grad_norm": 0.07159534841775894, "learning_rate": 8.050819676176616e-06, "loss": 0.0039, "step": 66370 }, { "epoch": 1.0861490632414301, "grad_norm": 0.3044295012950897, "learning_rate": 8.050065422383337e-06, "loss": 0.0013, "step": 66380 }, { "epoch": 1.086312689192506, "grad_norm": 0.12141037732362747, "learning_rate": 8.049311058032519e-06, "loss": 0.0024, "step": 66390 }, { "epoch": 1.0864763151435817, "grad_norm": 0.49803096055984497, "learning_rate": 8.048556583151504e-06, "loss": 0.0021, "step": 66400 }, { "epoch": 1.0866399410946577, "grad_norm": 0.1595144420862198, "learning_rate": 8.047801997767638e-06, "loss": 0.004, "step": 66410 }, { "epoch": 1.0868035670457334, "grad_norm": 0.11515387892723083, "learning_rate": 8.047047301908275e-06, "loss": 0.0037, "step": 66420 }, { "epoch": 1.0869671929968092, "grad_norm": 0.10269689559936523, "learning_rate": 8.04629249560077e-06, "loss": 0.0022, "step": 66430 }, { "epoch": 1.0871308189478852, "grad_norm": 0.17100584506988525, "learning_rate": 8.045537578872486e-06, "loss": 0.0024, "step": 66440 }, { "epoch": 1.087294444898961, "grad_norm": 0.2069419026374817, "learning_rate": 8.044782551750784e-06, "loss": 0.0023, "step": 66450 }, { "epoch": 1.0874580708500368, "grad_norm": 0.0044179982505738735, "learning_rate": 8.044027414263032e-06, "loss": 0.0034, "step": 66460 }, { "epoch": 1.0876216968011128, "grad_norm": 0.14417746663093567, "learning_rate": 8.043272166436602e-06, "loss": 0.003, "step": 66470 }, { "epoch": 1.0877853227521885, "grad_norm": 0.24433189630508423, "learning_rate": 8.042516808298871e-06, "loss": 0.0028, "step": 66480 }, { "epoch": 1.0879489487032643, "grad_norm": 0.20231904089450836, "learning_rate": 8.041761339877216e-06, "loss": 0.0026, "step": 66490 }, { "epoch": 1.08811257465434, "grad_norm": 0.23650319874286652, "learning_rate": 8.041005761199026e-06, "loss": 0.0024, "step": 66500 }, { "epoch": 1.088276200605416, "grad_norm": 0.06208028271794319, "learning_rate": 8.040250072291683e-06, "loss": 0.0028, "step": 66510 }, { "epoch": 1.0884398265564919, "grad_norm": 0.059307318180799484, "learning_rate": 8.039494273182584e-06, "loss": 0.0018, "step": 66520 }, { "epoch": 1.0886034525075676, "grad_norm": 0.060293685644865036, "learning_rate": 8.038738363899124e-06, "loss": 0.0021, "step": 66530 }, { "epoch": 1.0887670784586436, "grad_norm": 0.06537779420614243, "learning_rate": 8.037982344468698e-06, "loss": 0.0022, "step": 66540 }, { "epoch": 1.0889307044097194, "grad_norm": 0.157841756939888, "learning_rate": 8.037226214918716e-06, "loss": 0.0018, "step": 66550 }, { "epoch": 1.0890943303607952, "grad_norm": 0.2087858021259308, "learning_rate": 8.036469975276583e-06, "loss": 0.0023, "step": 66560 }, { "epoch": 1.089257956311871, "grad_norm": 0.04547343775629997, "learning_rate": 8.035713625569712e-06, "loss": 0.0025, "step": 66570 }, { "epoch": 1.089421582262947, "grad_norm": 0.19873729348182678, "learning_rate": 8.034957165825518e-06, "loss": 0.0026, "step": 66580 }, { "epoch": 1.0895852082140227, "grad_norm": 0.20767268538475037, "learning_rate": 8.034200596071422e-06, "loss": 0.0017, "step": 66590 }, { "epoch": 1.0897488341650985, "grad_norm": 0.2506837546825409, "learning_rate": 8.033443916334846e-06, "loss": 0.0015, "step": 66600 }, { "epoch": 1.0899124601161745, "grad_norm": 0.09592298418283463, "learning_rate": 8.032687126643218e-06, "loss": 0.0031, "step": 66610 }, { "epoch": 1.0900760860672503, "grad_norm": 0.2865099310874939, "learning_rate": 8.031930227023972e-06, "loss": 0.0029, "step": 66620 }, { "epoch": 1.090239712018326, "grad_norm": 0.044565241783857346, "learning_rate": 8.03117321750454e-06, "loss": 0.0027, "step": 66630 }, { "epoch": 1.090403337969402, "grad_norm": 0.266215443611145, "learning_rate": 8.030416098112366e-06, "loss": 0.0019, "step": 66640 }, { "epoch": 1.0905669639204778, "grad_norm": 0.04512857273221016, "learning_rate": 8.029658868874892e-06, "loss": 0.0012, "step": 66650 }, { "epoch": 1.0907305898715536, "grad_norm": 0.27870652079582214, "learning_rate": 8.028901529819564e-06, "loss": 0.0023, "step": 66660 }, { "epoch": 1.0908942158226296, "grad_norm": 0.13263162970542908, "learning_rate": 8.028144080973836e-06, "loss": 0.0025, "step": 66670 }, { "epoch": 1.0910578417737054, "grad_norm": 0.01751447282731533, "learning_rate": 8.027386522365163e-06, "loss": 0.0025, "step": 66680 }, { "epoch": 1.0912214677247811, "grad_norm": 0.048395249992609024, "learning_rate": 8.026628854021005e-06, "loss": 0.0014, "step": 66690 }, { "epoch": 1.091385093675857, "grad_norm": 0.0751519426703453, "learning_rate": 8.025871075968828e-06, "loss": 0.0028, "step": 66700 }, { "epoch": 1.091548719626933, "grad_norm": 0.10317564010620117, "learning_rate": 8.025113188236094e-06, "loss": 0.0018, "step": 66710 }, { "epoch": 1.0917123455780087, "grad_norm": 0.17076021432876587, "learning_rate": 8.024355190850276e-06, "loss": 0.0025, "step": 66720 }, { "epoch": 1.0918759715290844, "grad_norm": 0.04208666458725929, "learning_rate": 8.023597083838854e-06, "loss": 0.002, "step": 66730 }, { "epoch": 1.0920395974801604, "grad_norm": 0.3747827410697937, "learning_rate": 8.022838867229304e-06, "loss": 0.007, "step": 66740 }, { "epoch": 1.0922032234312362, "grad_norm": 0.028362950310111046, "learning_rate": 8.022080541049111e-06, "loss": 0.0022, "step": 66750 }, { "epoch": 1.092366849382312, "grad_norm": 0.20069214701652527, "learning_rate": 8.02132210532576e-06, "loss": 0.0028, "step": 66760 }, { "epoch": 1.0925304753333878, "grad_norm": 0.10993585735559464, "learning_rate": 8.020563560086745e-06, "loss": 0.0048, "step": 66770 }, { "epoch": 1.0926941012844638, "grad_norm": 0.23520207405090332, "learning_rate": 8.01980490535956e-06, "loss": 0.002, "step": 66780 }, { "epoch": 1.0928577272355395, "grad_norm": 0.05501289665699005, "learning_rate": 8.019046141171706e-06, "loss": 0.0043, "step": 66790 }, { "epoch": 1.0930213531866153, "grad_norm": 0.30154648423194885, "learning_rate": 8.018287267550686e-06, "loss": 0.0038, "step": 66800 }, { "epoch": 1.0931849791376913, "grad_norm": 0.12688271701335907, "learning_rate": 8.017528284524007e-06, "loss": 0.0018, "step": 66810 }, { "epoch": 1.093348605088767, "grad_norm": 0.28403204679489136, "learning_rate": 8.016769192119178e-06, "loss": 0.0016, "step": 66820 }, { "epoch": 1.0935122310398429, "grad_norm": 0.10046349465847015, "learning_rate": 8.016009990363718e-06, "loss": 0.0019, "step": 66830 }, { "epoch": 1.0936758569909188, "grad_norm": 0.060041528195142746, "learning_rate": 8.015250679285144e-06, "loss": 0.0026, "step": 66840 }, { "epoch": 1.0938394829419946, "grad_norm": 0.11048177629709244, "learning_rate": 8.014491258910982e-06, "loss": 0.0013, "step": 66850 }, { "epoch": 1.0940031088930704, "grad_norm": 0.2221585363149643, "learning_rate": 8.013731729268754e-06, "loss": 0.0023, "step": 66860 }, { "epoch": 1.0941667348441464, "grad_norm": 0.13272634148597717, "learning_rate": 8.012972090385996e-06, "loss": 0.0029, "step": 66870 }, { "epoch": 1.0943303607952222, "grad_norm": 0.12997838854789734, "learning_rate": 8.01221234229024e-06, "loss": 0.0037, "step": 66880 }, { "epoch": 1.094493986746298, "grad_norm": 0.2496015429496765, "learning_rate": 8.011452485009026e-06, "loss": 0.0027, "step": 66890 }, { "epoch": 1.0946576126973737, "grad_norm": 0.07141968607902527, "learning_rate": 8.010692518569899e-06, "loss": 0.0029, "step": 66900 }, { "epoch": 1.0948212386484497, "grad_norm": 0.2587110102176666, "learning_rate": 8.009932443000403e-06, "loss": 0.0023, "step": 66910 }, { "epoch": 1.0949848645995255, "grad_norm": 0.5316643118858337, "learning_rate": 8.009172258328091e-06, "loss": 0.002, "step": 66920 }, { "epoch": 1.0951484905506013, "grad_norm": 0.26069799065589905, "learning_rate": 8.008411964580517e-06, "loss": 0.0018, "step": 66930 }, { "epoch": 1.0953121165016773, "grad_norm": 0.19477787613868713, "learning_rate": 8.007651561785236e-06, "loss": 0.0024, "step": 66940 }, { "epoch": 1.095475742452753, "grad_norm": 0.018018892034888268, "learning_rate": 8.006891049969821e-06, "loss": 0.0014, "step": 66950 }, { "epoch": 1.0956393684038288, "grad_norm": 0.17283983528614044, "learning_rate": 8.006130429161828e-06, "loss": 0.0037, "step": 66960 }, { "epoch": 1.0958029943549046, "grad_norm": 0.06775011122226715, "learning_rate": 8.005369699388833e-06, "loss": 0.0028, "step": 66970 }, { "epoch": 1.0959666203059806, "grad_norm": 0.1613825261592865, "learning_rate": 8.004608860678409e-06, "loss": 0.0022, "step": 66980 }, { "epoch": 1.0961302462570564, "grad_norm": 0.1508966088294983, "learning_rate": 8.003847913058135e-06, "loss": 0.0026, "step": 66990 }, { "epoch": 1.0962938722081321, "grad_norm": 0.13129541277885437, "learning_rate": 8.003086856555596e-06, "loss": 0.0028, "step": 67000 }, { "epoch": 1.0964574981592081, "grad_norm": 0.22265157103538513, "learning_rate": 8.002325691198375e-06, "loss": 0.0031, "step": 67010 }, { "epoch": 1.096621124110284, "grad_norm": 0.1172194853425026, "learning_rate": 8.001564417014062e-06, "loss": 0.0026, "step": 67020 }, { "epoch": 1.0967847500613597, "grad_norm": 0.09978342056274414, "learning_rate": 8.000803034030256e-06, "loss": 0.0016, "step": 67030 }, { "epoch": 1.0969483760124357, "grad_norm": 0.15964211523532867, "learning_rate": 8.000041542274549e-06, "loss": 0.0028, "step": 67040 }, { "epoch": 1.0971120019635114, "grad_norm": 0.24837394058704376, "learning_rate": 7.999279941774549e-06, "loss": 0.0019, "step": 67050 }, { "epoch": 1.0972756279145872, "grad_norm": 0.08646401762962341, "learning_rate": 7.99851823255786e-06, "loss": 0.002, "step": 67060 }, { "epoch": 1.0974392538656632, "grad_norm": 0.06600673496723175, "learning_rate": 7.997756414652089e-06, "loss": 0.0018, "step": 67070 }, { "epoch": 1.097602879816739, "grad_norm": 0.05896184593439102, "learning_rate": 7.996994488084855e-06, "loss": 0.0044, "step": 67080 }, { "epoch": 1.0977665057678148, "grad_norm": 0.058279041200876236, "learning_rate": 7.996232452883772e-06, "loss": 0.003, "step": 67090 }, { "epoch": 1.0979301317188905, "grad_norm": 0.2605247497558594, "learning_rate": 7.995470309076466e-06, "loss": 0.0016, "step": 67100 }, { "epoch": 1.0980937576699665, "grad_norm": 0.18463708460330963, "learning_rate": 7.99470805669056e-06, "loss": 0.0027, "step": 67110 }, { "epoch": 1.0982573836210423, "grad_norm": 0.059139564633369446, "learning_rate": 7.993945695753684e-06, "loss": 0.0012, "step": 67120 }, { "epoch": 1.098421009572118, "grad_norm": 0.156093567609787, "learning_rate": 7.993183226293473e-06, "loss": 0.0023, "step": 67130 }, { "epoch": 1.098584635523194, "grad_norm": 0.16678935289382935, "learning_rate": 7.992420648337564e-06, "loss": 0.0023, "step": 67140 }, { "epoch": 1.0987482614742698, "grad_norm": 0.16258345544338226, "learning_rate": 7.991657961913599e-06, "loss": 0.0029, "step": 67150 }, { "epoch": 1.0989118874253456, "grad_norm": 0.14234188199043274, "learning_rate": 7.990895167049222e-06, "loss": 0.0013, "step": 67160 }, { "epoch": 1.0990755133764214, "grad_norm": 0.7577089667320251, "learning_rate": 7.990132263772083e-06, "loss": 0.0017, "step": 67170 }, { "epoch": 1.0992391393274974, "grad_norm": 0.20397929847240448, "learning_rate": 7.989369252109838e-06, "loss": 0.0021, "step": 67180 }, { "epoch": 1.0994027652785732, "grad_norm": 0.17214415967464447, "learning_rate": 7.988606132090141e-06, "loss": 0.0022, "step": 67190 }, { "epoch": 1.099566391229649, "grad_norm": 0.3056853413581848, "learning_rate": 7.987842903740654e-06, "loss": 0.0034, "step": 67200 }, { "epoch": 1.099730017180725, "grad_norm": 0.32553645968437195, "learning_rate": 7.987079567089045e-06, "loss": 0.002, "step": 67210 }, { "epoch": 1.0998936431318007, "grad_norm": 0.06345076858997345, "learning_rate": 7.98631612216298e-06, "loss": 0.0063, "step": 67220 }, { "epoch": 1.1000572690828765, "grad_norm": 0.10789083689451218, "learning_rate": 7.985552568990134e-06, "loss": 0.0029, "step": 67230 }, { "epoch": 1.1002208950339525, "grad_norm": 0.13412733376026154, "learning_rate": 7.98478890759818e-06, "loss": 0.0027, "step": 67240 }, { "epoch": 1.1003845209850283, "grad_norm": 0.07595600187778473, "learning_rate": 7.984025138014803e-06, "loss": 0.0031, "step": 67250 }, { "epoch": 1.100548146936104, "grad_norm": 0.10003720968961716, "learning_rate": 7.98326126026769e-06, "loss": 0.003, "step": 67260 }, { "epoch": 1.1007117728871798, "grad_norm": 0.13010506331920624, "learning_rate": 7.982497274384523e-06, "loss": 0.003, "step": 67270 }, { "epoch": 1.1008753988382558, "grad_norm": 0.10623190551996231, "learning_rate": 7.981733180392999e-06, "loss": 0.0029, "step": 67280 }, { "epoch": 1.1010390247893316, "grad_norm": 0.2330292910337448, "learning_rate": 7.980968978320812e-06, "loss": 0.0026, "step": 67290 }, { "epoch": 1.1012026507404074, "grad_norm": 0.18196021020412445, "learning_rate": 7.980204668195665e-06, "loss": 0.0026, "step": 67300 }, { "epoch": 1.1013662766914833, "grad_norm": 0.15365463495254517, "learning_rate": 7.979440250045262e-06, "loss": 0.0015, "step": 67310 }, { "epoch": 1.1015299026425591, "grad_norm": 0.11036017537117004, "learning_rate": 7.978675723897312e-06, "loss": 0.0016, "step": 67320 }, { "epoch": 1.101693528593635, "grad_norm": 0.15332701802253723, "learning_rate": 7.977911089779525e-06, "loss": 0.0021, "step": 67330 }, { "epoch": 1.101857154544711, "grad_norm": 0.3528757095336914, "learning_rate": 7.977146347719618e-06, "loss": 0.0039, "step": 67340 }, { "epoch": 1.1020207804957867, "grad_norm": 0.056583061814308167, "learning_rate": 7.976381497745312e-06, "loss": 0.0015, "step": 67350 }, { "epoch": 1.1021844064468624, "grad_norm": 0.1486535668373108, "learning_rate": 7.97561653988433e-06, "loss": 0.0018, "step": 67360 }, { "epoch": 1.1023480323979382, "grad_norm": 0.1272255927324295, "learning_rate": 7.974851474164399e-06, "loss": 0.0025, "step": 67370 }, { "epoch": 1.1025116583490142, "grad_norm": 0.1979738473892212, "learning_rate": 7.974086300613252e-06, "loss": 0.0016, "step": 67380 }, { "epoch": 1.10267528430009, "grad_norm": 0.013305054977536201, "learning_rate": 7.973321019258627e-06, "loss": 0.0022, "step": 67390 }, { "epoch": 1.1028389102511658, "grad_norm": 0.0996328741312027, "learning_rate": 7.972555630128258e-06, "loss": 0.0026, "step": 67400 }, { "epoch": 1.1030025362022418, "grad_norm": 0.11364498734474182, "learning_rate": 7.971790133249895e-06, "loss": 0.0023, "step": 67410 }, { "epoch": 1.1031661621533175, "grad_norm": 0.0554550401866436, "learning_rate": 7.971024528651278e-06, "loss": 0.0018, "step": 67420 }, { "epoch": 1.1033297881043933, "grad_norm": 0.21192406117916107, "learning_rate": 7.970258816360165e-06, "loss": 0.0021, "step": 67430 }, { "epoch": 1.1034934140554693, "grad_norm": 0.23106718063354492, "learning_rate": 7.96949299640431e-06, "loss": 0.0029, "step": 67440 }, { "epoch": 1.103657040006545, "grad_norm": 0.1673358529806137, "learning_rate": 7.96872706881147e-06, "loss": 0.0029, "step": 67450 }, { "epoch": 1.1038206659576209, "grad_norm": 0.21540683507919312, "learning_rate": 7.967961033609407e-06, "loss": 0.0029, "step": 67460 }, { "epoch": 1.1039842919086966, "grad_norm": 0.241883784532547, "learning_rate": 7.967194890825893e-06, "loss": 0.0023, "step": 67470 }, { "epoch": 1.1041479178597726, "grad_norm": 0.0331779383122921, "learning_rate": 7.966428640488695e-06, "loss": 0.0025, "step": 67480 }, { "epoch": 1.1043115438108484, "grad_norm": 0.047347042709589005, "learning_rate": 7.965662282625586e-06, "loss": 0.0016, "step": 67490 }, { "epoch": 1.1044751697619242, "grad_norm": 0.14074267446994781, "learning_rate": 7.964895817264349e-06, "loss": 0.0023, "step": 67500 }, { "epoch": 1.1046387957130002, "grad_norm": 0.3148994743824005, "learning_rate": 7.964129244432763e-06, "loss": 0.0029, "step": 67510 }, { "epoch": 1.104802421664076, "grad_norm": 0.23355428874492645, "learning_rate": 7.963362564158619e-06, "loss": 0.0022, "step": 67520 }, { "epoch": 1.1049660476151517, "grad_norm": 0.11430051177740097, "learning_rate": 7.9625957764697e-06, "loss": 0.0032, "step": 67530 }, { "epoch": 1.1051296735662275, "grad_norm": 0.22647179663181305, "learning_rate": 7.961828881393808e-06, "loss": 0.003, "step": 67540 }, { "epoch": 1.1052932995173035, "grad_norm": 0.20279039442539215, "learning_rate": 7.961061878958736e-06, "loss": 0.0041, "step": 67550 }, { "epoch": 1.1054569254683793, "grad_norm": 0.0628499686717987, "learning_rate": 7.960294769192289e-06, "loss": 0.0019, "step": 67560 }, { "epoch": 1.105620551419455, "grad_norm": 0.0232760701328516, "learning_rate": 7.95952755212227e-06, "loss": 0.0021, "step": 67570 }, { "epoch": 1.105784177370531, "grad_norm": 0.14633068442344666, "learning_rate": 7.958760227776492e-06, "loss": 0.0015, "step": 67580 }, { "epoch": 1.1059478033216068, "grad_norm": 0.15686815977096558, "learning_rate": 7.957992796182765e-06, "loss": 0.0018, "step": 67590 }, { "epoch": 1.1061114292726826, "grad_norm": 0.04825974628329277, "learning_rate": 7.957225257368909e-06, "loss": 0.0026, "step": 67600 }, { "epoch": 1.1062750552237586, "grad_norm": 0.013961097225546837, "learning_rate": 7.956457611362747e-06, "loss": 0.0013, "step": 67610 }, { "epoch": 1.1064386811748343, "grad_norm": 0.15498970448970795, "learning_rate": 7.955689858192099e-06, "loss": 0.0059, "step": 67620 }, { "epoch": 1.1066023071259101, "grad_norm": 0.009553973563015461, "learning_rate": 7.954921997884799e-06, "loss": 0.0015, "step": 67630 }, { "epoch": 1.1067659330769861, "grad_norm": 0.058502793312072754, "learning_rate": 7.954154030468678e-06, "loss": 0.0017, "step": 67640 }, { "epoch": 1.106929559028062, "grad_norm": 0.10463238507509232, "learning_rate": 7.953385955971574e-06, "loss": 0.0024, "step": 67650 }, { "epoch": 1.1070931849791377, "grad_norm": 0.21524441242218018, "learning_rate": 7.952617774421328e-06, "loss": 0.0026, "step": 67660 }, { "epoch": 1.1072568109302134, "grad_norm": 0.09608050435781479, "learning_rate": 7.951849485845784e-06, "loss": 0.0046, "step": 67670 }, { "epoch": 1.1074204368812894, "grad_norm": 0.1355513632297516, "learning_rate": 7.95108109027279e-06, "loss": 0.0019, "step": 67680 }, { "epoch": 1.1075840628323652, "grad_norm": 0.14819177985191345, "learning_rate": 7.950312587730199e-06, "loss": 0.0028, "step": 67690 }, { "epoch": 1.107747688783441, "grad_norm": 0.23417548835277557, "learning_rate": 7.949543978245867e-06, "loss": 0.002, "step": 67700 }, { "epoch": 1.107911314734517, "grad_norm": 0.14641791582107544, "learning_rate": 7.948775261847656e-06, "loss": 0.0027, "step": 67710 }, { "epoch": 1.1080749406855928, "grad_norm": 0.10514626652002335, "learning_rate": 7.94800643856343e-06, "loss": 0.0035, "step": 67720 }, { "epoch": 1.1082385666366685, "grad_norm": 0.004406503867357969, "learning_rate": 7.947237508421054e-06, "loss": 0.0029, "step": 67730 }, { "epoch": 1.1084021925877443, "grad_norm": 0.15475668013095856, "learning_rate": 7.946468471448403e-06, "loss": 0.0034, "step": 67740 }, { "epoch": 1.1085658185388203, "grad_norm": 0.10073676705360413, "learning_rate": 7.94569932767335e-06, "loss": 0.0027, "step": 67750 }, { "epoch": 1.108729444489896, "grad_norm": 0.07992584258317947, "learning_rate": 7.944930077123777e-06, "loss": 0.002, "step": 67760 }, { "epoch": 1.1088930704409719, "grad_norm": 0.23091736435890198, "learning_rate": 7.944160719827569e-06, "loss": 0.0033, "step": 67770 }, { "epoch": 1.1090566963920478, "grad_norm": 0.11401800066232681, "learning_rate": 7.943391255812608e-06, "loss": 0.0021, "step": 67780 }, { "epoch": 1.1092203223431236, "grad_norm": 0.11418555676937103, "learning_rate": 7.942621685106789e-06, "loss": 0.0038, "step": 67790 }, { "epoch": 1.1093839482941994, "grad_norm": 0.043558269739151, "learning_rate": 7.941852007738006e-06, "loss": 0.004, "step": 67800 }, { "epoch": 1.1095475742452754, "grad_norm": 0.0932808667421341, "learning_rate": 7.94108222373416e-06, "loss": 0.003, "step": 67810 }, { "epoch": 1.1097112001963512, "grad_norm": 0.21740348637104034, "learning_rate": 7.94031233312315e-06, "loss": 0.0039, "step": 67820 }, { "epoch": 1.109874826147427, "grad_norm": 0.19150827825069427, "learning_rate": 7.939542335932886e-06, "loss": 0.0028, "step": 67830 }, { "epoch": 1.110038452098503, "grad_norm": 0.29575255513191223, "learning_rate": 7.938772232191276e-06, "loss": 0.0025, "step": 67840 }, { "epoch": 1.1102020780495787, "grad_norm": 0.21679075062274933, "learning_rate": 7.938002021926237e-06, "loss": 0.003, "step": 67850 }, { "epoch": 1.1103657040006545, "grad_norm": 0.12247305363416672, "learning_rate": 7.937231705165687e-06, "loss": 0.0025, "step": 67860 }, { "epoch": 1.1105293299517303, "grad_norm": 0.2624988257884979, "learning_rate": 7.936461281937545e-06, "loss": 0.0027, "step": 67870 }, { "epoch": 1.1106929559028063, "grad_norm": 0.23529274761676788, "learning_rate": 7.93569075226974e-06, "loss": 0.0021, "step": 67880 }, { "epoch": 1.110856581853882, "grad_norm": 0.34660282731056213, "learning_rate": 7.934920116190202e-06, "loss": 0.0013, "step": 67890 }, { "epoch": 1.1110202078049578, "grad_norm": 0.01306147687137127, "learning_rate": 7.934149373726862e-06, "loss": 0.0024, "step": 67900 }, { "epoch": 1.1111838337560338, "grad_norm": 0.0663568302989006, "learning_rate": 7.93337852490766e-06, "loss": 0.0017, "step": 67910 }, { "epoch": 1.1113474597071096, "grad_norm": 0.06737388670444489, "learning_rate": 7.932607569760537e-06, "loss": 0.0016, "step": 67920 }, { "epoch": 1.1115110856581853, "grad_norm": 0.10520081967115402, "learning_rate": 7.931836508313438e-06, "loss": 0.0021, "step": 67930 }, { "epoch": 1.1116747116092611, "grad_norm": 0.1524018794298172, "learning_rate": 7.93106534059431e-06, "loss": 0.003, "step": 67940 }, { "epoch": 1.1118383375603371, "grad_norm": 0.15310916304588318, "learning_rate": 7.93029406663111e-06, "loss": 0.0011, "step": 67950 }, { "epoch": 1.112001963511413, "grad_norm": 0.08758015930652618, "learning_rate": 7.929522686451793e-06, "loss": 0.0026, "step": 67960 }, { "epoch": 1.1121655894624887, "grad_norm": 0.15671318769454956, "learning_rate": 7.928751200084319e-06, "loss": 0.0031, "step": 67970 }, { "epoch": 1.1123292154135647, "grad_norm": 0.16461004316806793, "learning_rate": 7.927979607556652e-06, "loss": 0.0038, "step": 67980 }, { "epoch": 1.1124928413646404, "grad_norm": 0.1444018930196762, "learning_rate": 7.927207908896763e-06, "loss": 0.0027, "step": 67990 }, { "epoch": 1.1126564673157162, "grad_norm": 0.07200793921947479, "learning_rate": 7.926436104132622e-06, "loss": 0.0023, "step": 68000 }, { "epoch": 1.1128200932667922, "grad_norm": 0.03750094026327133, "learning_rate": 7.925664193292206e-06, "loss": 0.003, "step": 68010 }, { "epoch": 1.112983719217868, "grad_norm": 0.05523361265659332, "learning_rate": 7.924892176403494e-06, "loss": 0.0037, "step": 68020 }, { "epoch": 1.1131473451689438, "grad_norm": 0.19418194890022278, "learning_rate": 7.924120053494471e-06, "loss": 0.0042, "step": 68030 }, { "epoch": 1.1133109711200195, "grad_norm": 0.06008940562605858, "learning_rate": 7.923347824593125e-06, "loss": 0.0032, "step": 68040 }, { "epoch": 1.1134745970710955, "grad_norm": 0.1261216104030609, "learning_rate": 7.922575489727444e-06, "loss": 0.0027, "step": 68050 }, { "epoch": 1.1136382230221713, "grad_norm": 0.10495466738939285, "learning_rate": 7.921803048925427e-06, "loss": 0.0022, "step": 68060 }, { "epoch": 1.113801848973247, "grad_norm": 0.050722554326057434, "learning_rate": 7.921030502215072e-06, "loss": 0.0025, "step": 68070 }, { "epoch": 1.113965474924323, "grad_norm": 0.14870263636112213, "learning_rate": 7.920257849624382e-06, "loss": 0.0021, "step": 68080 }, { "epoch": 1.1141291008753988, "grad_norm": 0.050933562219142914, "learning_rate": 7.919485091181362e-06, "loss": 0.0022, "step": 68090 }, { "epoch": 1.1142927268264746, "grad_norm": 0.15519891679286957, "learning_rate": 7.918712226914027e-06, "loss": 0.0019, "step": 68100 }, { "epoch": 1.1144563527775506, "grad_norm": 0.025211861357092857, "learning_rate": 7.917939256850387e-06, "loss": 0.0023, "step": 68110 }, { "epoch": 1.1146199787286264, "grad_norm": 0.10136483609676361, "learning_rate": 7.917166181018464e-06, "loss": 0.0024, "step": 68120 }, { "epoch": 1.1147836046797022, "grad_norm": 0.2363646924495697, "learning_rate": 7.916392999446274e-06, "loss": 0.0016, "step": 68130 }, { "epoch": 1.114947230630778, "grad_norm": 0.1900477111339569, "learning_rate": 7.91561971216185e-06, "loss": 0.0033, "step": 68140 }, { "epoch": 1.115110856581854, "grad_norm": 0.14923186600208282, "learning_rate": 7.91484631919322e-06, "loss": 0.0021, "step": 68150 }, { "epoch": 1.1152744825329297, "grad_norm": 0.13170966506004333, "learning_rate": 7.914072820568415e-06, "loss": 0.0015, "step": 68160 }, { "epoch": 1.1154381084840055, "grad_norm": 0.1346287876367569, "learning_rate": 7.913299216315474e-06, "loss": 0.0027, "step": 68170 }, { "epoch": 1.1156017344350815, "grad_norm": 0.10992290824651718, "learning_rate": 7.912525506462439e-06, "loss": 0.0018, "step": 68180 }, { "epoch": 1.1157653603861573, "grad_norm": 0.0763528048992157, "learning_rate": 7.911751691037353e-06, "loss": 0.0018, "step": 68190 }, { "epoch": 1.115928986337233, "grad_norm": 0.1582881212234497, "learning_rate": 7.910977770068267e-06, "loss": 0.0033, "step": 68200 }, { "epoch": 1.116092612288309, "grad_norm": 0.10619562119245529, "learning_rate": 7.910203743583234e-06, "loss": 0.0015, "step": 68210 }, { "epoch": 1.1162562382393848, "grad_norm": 0.11039046198129654, "learning_rate": 7.90942961161031e-06, "loss": 0.0019, "step": 68220 }, { "epoch": 1.1164198641904606, "grad_norm": 0.17310935258865356, "learning_rate": 7.908655374177555e-06, "loss": 0.0031, "step": 68230 }, { "epoch": 1.1165834901415364, "grad_norm": 0.1316337138414383, "learning_rate": 7.907881031313032e-06, "loss": 0.0018, "step": 68240 }, { "epoch": 1.1167471160926123, "grad_norm": 0.0757899135351181, "learning_rate": 7.90710658304481e-06, "loss": 0.0016, "step": 68250 }, { "epoch": 1.1169107420436881, "grad_norm": 0.07081680744886398, "learning_rate": 7.906332029400964e-06, "loss": 0.0024, "step": 68260 }, { "epoch": 1.117074367994764, "grad_norm": 0.3196355998516083, "learning_rate": 7.905557370409566e-06, "loss": 0.0041, "step": 68270 }, { "epoch": 1.11723799394584, "grad_norm": 0.051009662449359894, "learning_rate": 7.904782606098698e-06, "loss": 0.0018, "step": 68280 }, { "epoch": 1.1174016198969157, "grad_norm": 0.18096141517162323, "learning_rate": 7.90400773649644e-06, "loss": 0.0019, "step": 68290 }, { "epoch": 1.1175652458479914, "grad_norm": 0.1862032115459442, "learning_rate": 7.903232761630881e-06, "loss": 0.002, "step": 68300 }, { "epoch": 1.1177288717990674, "grad_norm": 0.17516174912452698, "learning_rate": 7.902457681530114e-06, "loss": 0.0012, "step": 68310 }, { "epoch": 1.1178924977501432, "grad_norm": 0.1434076577425003, "learning_rate": 7.901682496222231e-06, "loss": 0.0036, "step": 68320 }, { "epoch": 1.118056123701219, "grad_norm": 0.0856248065829277, "learning_rate": 7.900907205735331e-06, "loss": 0.0035, "step": 68330 }, { "epoch": 1.1182197496522948, "grad_norm": 0.10628880560398102, "learning_rate": 7.900131810097517e-06, "loss": 0.0024, "step": 68340 }, { "epoch": 1.1183833756033708, "grad_norm": 0.0780029296875, "learning_rate": 7.899356309336895e-06, "loss": 0.0021, "step": 68350 }, { "epoch": 1.1185470015544465, "grad_norm": 0.045987654477357864, "learning_rate": 7.898580703481577e-06, "loss": 0.0037, "step": 68360 }, { "epoch": 1.1187106275055223, "grad_norm": 0.02341105043888092, "learning_rate": 7.897804992559674e-06, "loss": 0.0023, "step": 68370 }, { "epoch": 1.1188742534565983, "grad_norm": 0.0918494388461113, "learning_rate": 7.897029176599304e-06, "loss": 0.0021, "step": 68380 }, { "epoch": 1.119037879407674, "grad_norm": 0.17768150568008423, "learning_rate": 7.89625325562859e-06, "loss": 0.0029, "step": 68390 }, { "epoch": 1.1192015053587498, "grad_norm": 0.0587909072637558, "learning_rate": 7.895477229675655e-06, "loss": 0.0024, "step": 68400 }, { "epoch": 1.1193651313098258, "grad_norm": 0.1557624340057373, "learning_rate": 7.894701098768631e-06, "loss": 0.0024, "step": 68410 }, { "epoch": 1.1195287572609016, "grad_norm": 0.04897768795490265, "learning_rate": 7.893924862935648e-06, "loss": 0.0028, "step": 68420 }, { "epoch": 1.1196923832119774, "grad_norm": 0.08152521401643753, "learning_rate": 7.893148522204847e-06, "loss": 0.0028, "step": 68430 }, { "epoch": 1.1198560091630532, "grad_norm": 0.13534341752529144, "learning_rate": 7.892372076604363e-06, "loss": 0.0021, "step": 68440 }, { "epoch": 1.1200196351141292, "grad_norm": 0.2423546314239502, "learning_rate": 7.891595526162344e-06, "loss": 0.0022, "step": 68450 }, { "epoch": 1.120183261065205, "grad_norm": 0.26700547337532043, "learning_rate": 7.890818870906937e-06, "loss": 0.0027, "step": 68460 }, { "epoch": 1.1203468870162807, "grad_norm": 0.06260558217763901, "learning_rate": 7.890042110866293e-06, "loss": 0.002, "step": 68470 }, { "epoch": 1.1205105129673567, "grad_norm": 0.02388470619916916, "learning_rate": 7.889265246068569e-06, "loss": 0.0024, "step": 68480 }, { "epoch": 1.1206741389184325, "grad_norm": 0.16415877640247345, "learning_rate": 7.888488276541923e-06, "loss": 0.0029, "step": 68490 }, { "epoch": 1.1208377648695083, "grad_norm": 0.10648050904273987, "learning_rate": 7.88771120231452e-06, "loss": 0.0019, "step": 68500 }, { "epoch": 1.121001390820584, "grad_norm": 0.11352626979351044, "learning_rate": 7.886934023414527e-06, "loss": 0.0017, "step": 68510 }, { "epoch": 1.12116501677166, "grad_norm": 0.22070206701755524, "learning_rate": 7.886156739870114e-06, "loss": 0.0018, "step": 68520 }, { "epoch": 1.1213286427227358, "grad_norm": 0.08376161754131317, "learning_rate": 7.885379351709455e-06, "loss": 0.0032, "step": 68530 }, { "epoch": 1.1214922686738116, "grad_norm": 0.14602339267730713, "learning_rate": 7.88460185896073e-06, "loss": 0.0024, "step": 68540 }, { "epoch": 1.1216558946248876, "grad_norm": 0.17993013560771942, "learning_rate": 7.88382426165212e-06, "loss": 0.0024, "step": 68550 }, { "epoch": 1.1218195205759633, "grad_norm": 0.1910533607006073, "learning_rate": 7.883046559811813e-06, "loss": 0.0032, "step": 68560 }, { "epoch": 1.1219831465270391, "grad_norm": 0.22462201118469238, "learning_rate": 7.882268753467997e-06, "loss": 0.0031, "step": 68570 }, { "epoch": 1.1221467724781151, "grad_norm": 0.1692439168691635, "learning_rate": 7.881490842648864e-06, "loss": 0.0025, "step": 68580 }, { "epoch": 1.122310398429191, "grad_norm": 0.09960126131772995, "learning_rate": 7.880712827382615e-06, "loss": 0.0039, "step": 68590 }, { "epoch": 1.1224740243802667, "grad_norm": 0.03694210201501846, "learning_rate": 7.879934707697449e-06, "loss": 0.0034, "step": 68600 }, { "epoch": 1.1226376503313427, "grad_norm": 0.3607424199581146, "learning_rate": 7.879156483621573e-06, "loss": 0.0036, "step": 68610 }, { "epoch": 1.1228012762824184, "grad_norm": 0.03478715941309929, "learning_rate": 7.878378155183192e-06, "loss": 0.0016, "step": 68620 }, { "epoch": 1.1229649022334942, "grad_norm": 0.03479054197669029, "learning_rate": 7.877599722410524e-06, "loss": 0.0018, "step": 68630 }, { "epoch": 1.12312852818457, "grad_norm": 0.1400851309299469, "learning_rate": 7.87682118533178e-06, "loss": 0.0026, "step": 68640 }, { "epoch": 1.123292154135646, "grad_norm": 0.14542587101459503, "learning_rate": 7.876042543975183e-06, "loss": 0.0032, "step": 68650 }, { "epoch": 1.1234557800867218, "grad_norm": 0.16380688548088074, "learning_rate": 7.875263798368957e-06, "loss": 0.0027, "step": 68660 }, { "epoch": 1.1236194060377975, "grad_norm": 0.03666701912879944, "learning_rate": 7.874484948541328e-06, "loss": 0.002, "step": 68670 }, { "epoch": 1.1237830319888735, "grad_norm": 0.13301661610603333, "learning_rate": 7.873705994520527e-06, "loss": 0.0011, "step": 68680 }, { "epoch": 1.1239466579399493, "grad_norm": 0.19392654299736023, "learning_rate": 7.872926936334792e-06, "loss": 0.0024, "step": 68690 }, { "epoch": 1.124110283891025, "grad_norm": 0.1347409337759018, "learning_rate": 7.87214777401236e-06, "loss": 0.0018, "step": 68700 }, { "epoch": 1.1242739098421008, "grad_norm": 0.07486072182655334, "learning_rate": 7.871368507581474e-06, "loss": 0.0013, "step": 68710 }, { "epoch": 1.1244375357931768, "grad_norm": 0.09588923305273056, "learning_rate": 7.870589137070382e-06, "loss": 0.0018, "step": 68720 }, { "epoch": 1.1246011617442526, "grad_norm": 0.05466753616929054, "learning_rate": 7.869809662507331e-06, "loss": 0.0022, "step": 68730 }, { "epoch": 1.1247647876953284, "grad_norm": 0.057994551956653595, "learning_rate": 7.869030083920578e-06, "loss": 0.0023, "step": 68740 }, { "epoch": 1.1249284136464044, "grad_norm": 0.0489218533039093, "learning_rate": 7.86825040133838e-06, "loss": 0.0024, "step": 68750 }, { "epoch": 1.1250920395974802, "grad_norm": 0.08175227791070938, "learning_rate": 7.867470614788998e-06, "loss": 0.0034, "step": 68760 }, { "epoch": 1.125255665548556, "grad_norm": 0.02568182907998562, "learning_rate": 7.8666907243007e-06, "loss": 0.0023, "step": 68770 }, { "epoch": 1.125419291499632, "grad_norm": 0.04513145238161087, "learning_rate": 7.865910729901751e-06, "loss": 0.0018, "step": 68780 }, { "epoch": 1.1255829174507077, "grad_norm": 0.13794945180416107, "learning_rate": 7.865130631620428e-06, "loss": 0.0022, "step": 68790 }, { "epoch": 1.1257465434017835, "grad_norm": 0.1073811948299408, "learning_rate": 7.864350429485004e-06, "loss": 0.0044, "step": 68800 }, { "epoch": 1.1259101693528595, "grad_norm": 0.009210161864757538, "learning_rate": 7.863570123523763e-06, "loss": 0.0023, "step": 68810 }, { "epoch": 1.1260737953039353, "grad_norm": 0.04893968626856804, "learning_rate": 7.862789713764987e-06, "loss": 0.002, "step": 68820 }, { "epoch": 1.126237421255011, "grad_norm": 0.06745315343141556, "learning_rate": 7.862009200236962e-06, "loss": 0.0025, "step": 68830 }, { "epoch": 1.1264010472060868, "grad_norm": 0.14464573562145233, "learning_rate": 7.861228582967985e-06, "loss": 0.0039, "step": 68840 }, { "epoch": 1.1265646731571628, "grad_norm": 0.035705361515283585, "learning_rate": 7.860447861986347e-06, "loss": 0.0018, "step": 68850 }, { "epoch": 1.1267282991082386, "grad_norm": 0.11243464797735214, "learning_rate": 7.859667037320351e-06, "loss": 0.0021, "step": 68860 }, { "epoch": 1.1268919250593143, "grad_norm": 0.11256809532642365, "learning_rate": 7.858886108998296e-06, "loss": 0.0021, "step": 68870 }, { "epoch": 1.1270555510103903, "grad_norm": 0.328066349029541, "learning_rate": 7.85810507704849e-06, "loss": 0.0027, "step": 68880 }, { "epoch": 1.1272191769614661, "grad_norm": 0.2756465971469879, "learning_rate": 7.857323941499247e-06, "loss": 0.0022, "step": 68890 }, { "epoch": 1.127382802912542, "grad_norm": 0.058144621551036835, "learning_rate": 7.856542702378875e-06, "loss": 0.0024, "step": 68900 }, { "epoch": 1.1275464288636177, "grad_norm": 0.0866839587688446, "learning_rate": 7.8557613597157e-06, "loss": 0.0012, "step": 68910 }, { "epoch": 1.1277100548146937, "grad_norm": 0.12420640885829926, "learning_rate": 7.854979913538035e-06, "loss": 0.0016, "step": 68920 }, { "epoch": 1.1278736807657694, "grad_norm": 0.017458591610193253, "learning_rate": 7.854198363874212e-06, "loss": 0.0025, "step": 68930 }, { "epoch": 1.1280373067168452, "grad_norm": 0.060509685426950455, "learning_rate": 7.853416710752556e-06, "loss": 0.0036, "step": 68940 }, { "epoch": 1.1282009326679212, "grad_norm": 0.08177191019058228, "learning_rate": 7.852634954201405e-06, "loss": 0.0019, "step": 68950 }, { "epoch": 1.128364558618997, "grad_norm": 0.23244555294513702, "learning_rate": 7.851853094249093e-06, "loss": 0.0033, "step": 68960 }, { "epoch": 1.1285281845700728, "grad_norm": 0.23035791516304016, "learning_rate": 7.851071130923959e-06, "loss": 0.0024, "step": 68970 }, { "epoch": 1.1286918105211488, "grad_norm": 0.4902045428752899, "learning_rate": 7.85028906425435e-06, "loss": 0.0034, "step": 68980 }, { "epoch": 1.1288554364722245, "grad_norm": 0.06800995022058487, "learning_rate": 7.849506894268613e-06, "loss": 0.0019, "step": 68990 }, { "epoch": 1.1290190624233003, "grad_norm": 0.14782725274562836, "learning_rate": 7.8487246209951e-06, "loss": 0.002, "step": 69000 }, { "epoch": 1.1291826883743763, "grad_norm": 0.16419872641563416, "learning_rate": 7.847942244462165e-06, "loss": 0.002, "step": 69010 }, { "epoch": 1.129346314325452, "grad_norm": 0.1362604796886444, "learning_rate": 7.84715976469817e-06, "loss": 0.0046, "step": 69020 }, { "epoch": 1.1295099402765278, "grad_norm": 0.3837820887565613, "learning_rate": 7.846377181731475e-06, "loss": 0.0038, "step": 69030 }, { "epoch": 1.1296735662276036, "grad_norm": 0.040347859263420105, "learning_rate": 7.845594495590448e-06, "loss": 0.0023, "step": 69040 }, { "epoch": 1.1298371921786796, "grad_norm": 0.12403661012649536, "learning_rate": 7.844811706303461e-06, "loss": 0.0022, "step": 69050 }, { "epoch": 1.1300008181297554, "grad_norm": 0.02095261961221695, "learning_rate": 7.844028813898887e-06, "loss": 0.0025, "step": 69060 }, { "epoch": 1.1301644440808312, "grad_norm": 0.0585404597222805, "learning_rate": 7.843245818405103e-06, "loss": 0.0016, "step": 69070 }, { "epoch": 1.130328070031907, "grad_norm": 0.10304427146911621, "learning_rate": 7.842462719850492e-06, "loss": 0.0022, "step": 69080 }, { "epoch": 1.130491695982983, "grad_norm": 0.04839986562728882, "learning_rate": 7.841679518263439e-06, "loss": 0.0027, "step": 69090 }, { "epoch": 1.1306553219340587, "grad_norm": 0.06605979055166245, "learning_rate": 7.840896213672334e-06, "loss": 0.0023, "step": 69100 }, { "epoch": 1.1308189478851345, "grad_norm": 0.1461275964975357, "learning_rate": 7.840112806105568e-06, "loss": 0.0021, "step": 69110 }, { "epoch": 1.1309825738362105, "grad_norm": 0.0795038491487503, "learning_rate": 7.83932929559154e-06, "loss": 0.0031, "step": 69120 }, { "epoch": 1.1311461997872863, "grad_norm": 0.14406965672969818, "learning_rate": 7.838545682158647e-06, "loss": 0.0023, "step": 69130 }, { "epoch": 1.131309825738362, "grad_norm": 0.008858182467520237, "learning_rate": 7.837761965835298e-06, "loss": 0.0021, "step": 69140 }, { "epoch": 1.131473451689438, "grad_norm": 0.13654375076293945, "learning_rate": 7.836978146649897e-06, "loss": 0.0031, "step": 69150 }, { "epoch": 1.1316370776405138, "grad_norm": 0.06601133197546005, "learning_rate": 7.836194224630857e-06, "loss": 0.0013, "step": 69160 }, { "epoch": 1.1318007035915896, "grad_norm": 0.509337306022644, "learning_rate": 7.835410199806592e-06, "loss": 0.0025, "step": 69170 }, { "epoch": 1.1319643295426656, "grad_norm": 0.18314862251281738, "learning_rate": 7.834626072205522e-06, "loss": 0.0026, "step": 69180 }, { "epoch": 1.1321279554937413, "grad_norm": 0.1151198074221611, "learning_rate": 7.83384184185607e-06, "loss": 0.0028, "step": 69190 }, { "epoch": 1.1322915814448171, "grad_norm": 0.057028695940971375, "learning_rate": 7.833057508786664e-06, "loss": 0.0013, "step": 69200 }, { "epoch": 1.132455207395893, "grad_norm": 0.13587886095046997, "learning_rate": 7.832273073025732e-06, "loss": 0.0036, "step": 69210 }, { "epoch": 1.132618833346969, "grad_norm": 0.1952548623085022, "learning_rate": 7.831488534601706e-06, "loss": 0.0021, "step": 69220 }, { "epoch": 1.1327824592980447, "grad_norm": 0.15132951736450195, "learning_rate": 7.830703893543026e-06, "loss": 0.002, "step": 69230 }, { "epoch": 1.1329460852491204, "grad_norm": 0.08181136846542358, "learning_rate": 7.829919149878134e-06, "loss": 0.0018, "step": 69240 }, { "epoch": 1.1331097112001964, "grad_norm": 0.17251145839691162, "learning_rate": 7.829134303635474e-06, "loss": 0.0024, "step": 69250 }, { "epoch": 1.1332733371512722, "grad_norm": 0.17258213460445404, "learning_rate": 7.828349354843498e-06, "loss": 0.0025, "step": 69260 }, { "epoch": 1.133436963102348, "grad_norm": 0.250765860080719, "learning_rate": 7.827564303530653e-06, "loss": 0.0039, "step": 69270 }, { "epoch": 1.1336005890534238, "grad_norm": 0.12064200639724731, "learning_rate": 7.826779149725398e-06, "loss": 0.0024, "step": 69280 }, { "epoch": 1.1337642150044998, "grad_norm": 0.16128379106521606, "learning_rate": 7.825993893456192e-06, "loss": 0.0014, "step": 69290 }, { "epoch": 1.1339278409555755, "grad_norm": 0.2249792516231537, "learning_rate": 7.825208534751501e-06, "loss": 0.0018, "step": 69300 }, { "epoch": 1.1340914669066513, "grad_norm": 0.13834290206432343, "learning_rate": 7.824423073639791e-06, "loss": 0.0018, "step": 69310 }, { "epoch": 1.1342550928577273, "grad_norm": 0.32640811800956726, "learning_rate": 7.823637510149533e-06, "loss": 0.0029, "step": 69320 }, { "epoch": 1.134418718808803, "grad_norm": 0.49388423562049866, "learning_rate": 7.822851844309201e-06, "loss": 0.0018, "step": 69330 }, { "epoch": 1.1345823447598788, "grad_norm": 0.02632729709148407, "learning_rate": 7.822066076147274e-06, "loss": 0.002, "step": 69340 }, { "epoch": 1.1347459707109548, "grad_norm": 0.059555403888225555, "learning_rate": 7.821280205692234e-06, "loss": 0.002, "step": 69350 }, { "epoch": 1.1349095966620306, "grad_norm": 0.3162457346916199, "learning_rate": 7.82049423297257e-06, "loss": 0.0024, "step": 69360 }, { "epoch": 1.1350732226131064, "grad_norm": 0.09541064500808716, "learning_rate": 7.819708158016765e-06, "loss": 0.0021, "step": 69370 }, { "epoch": 1.1352368485641824, "grad_norm": 0.15407849848270416, "learning_rate": 7.818921980853317e-06, "loss": 0.0026, "step": 69380 }, { "epoch": 1.1354004745152582, "grad_norm": 0.06315534561872482, "learning_rate": 7.818135701510725e-06, "loss": 0.0031, "step": 69390 }, { "epoch": 1.135564100466334, "grad_norm": 0.08403895795345306, "learning_rate": 7.817349320017485e-06, "loss": 0.0026, "step": 69400 }, { "epoch": 1.1357277264174097, "grad_norm": 0.06529895216226578, "learning_rate": 7.816562836402104e-06, "loss": 0.0029, "step": 69410 }, { "epoch": 1.1358913523684857, "grad_norm": 0.1551051288843155, "learning_rate": 7.81577625069309e-06, "loss": 0.0021, "step": 69420 }, { "epoch": 1.1360549783195615, "grad_norm": 0.482984334230423, "learning_rate": 7.814989562918954e-06, "loss": 0.0027, "step": 69430 }, { "epoch": 1.1362186042706373, "grad_norm": 0.18356969952583313, "learning_rate": 7.814202773108212e-06, "loss": 0.0018, "step": 69440 }, { "epoch": 1.1363822302217133, "grad_norm": 0.17813226580619812, "learning_rate": 7.813415881289384e-06, "loss": 0.0024, "step": 69450 }, { "epoch": 1.136545856172789, "grad_norm": 0.04675832763314247, "learning_rate": 7.81262888749099e-06, "loss": 0.0028, "step": 69460 }, { "epoch": 1.1367094821238648, "grad_norm": 0.16692353785037994, "learning_rate": 7.811841791741561e-06, "loss": 0.0024, "step": 69470 }, { "epoch": 1.1368731080749406, "grad_norm": 0.06796851754188538, "learning_rate": 7.811054594069625e-06, "loss": 0.0018, "step": 69480 }, { "epoch": 1.1370367340260166, "grad_norm": 0.16330274939537048, "learning_rate": 7.810267294503715e-06, "loss": 0.0032, "step": 69490 }, { "epoch": 1.1372003599770923, "grad_norm": 0.1672714352607727, "learning_rate": 7.809479893072369e-06, "loss": 0.0024, "step": 69500 }, { "epoch": 1.1373639859281681, "grad_norm": 0.02578585222363472, "learning_rate": 7.80869238980413e-06, "loss": 0.0021, "step": 69510 }, { "epoch": 1.1375276118792441, "grad_norm": 0.12554201483726501, "learning_rate": 7.807904784727544e-06, "loss": 0.0034, "step": 69520 }, { "epoch": 1.13769123783032, "grad_norm": 0.015130684711039066, "learning_rate": 7.807117077871157e-06, "loss": 0.0024, "step": 69530 }, { "epoch": 1.1378548637813957, "grad_norm": 0.06824557483196259, "learning_rate": 7.806329269263523e-06, "loss": 0.002, "step": 69540 }, { "epoch": 1.1380184897324717, "grad_norm": 0.038978103548288345, "learning_rate": 7.805541358933197e-06, "loss": 0.002, "step": 69550 }, { "epoch": 1.1381821156835474, "grad_norm": 0.04791449010372162, "learning_rate": 7.804753346908742e-06, "loss": 0.0012, "step": 69560 }, { "epoch": 1.1383457416346232, "grad_norm": 0.06030956283211708, "learning_rate": 7.803965233218717e-06, "loss": 0.002, "step": 69570 }, { "epoch": 1.1385093675856992, "grad_norm": 0.11117040365934372, "learning_rate": 7.803177017891693e-06, "loss": 0.002, "step": 69580 }, { "epoch": 1.138672993536775, "grad_norm": 0.13186952471733093, "learning_rate": 7.802388700956238e-06, "loss": 0.0025, "step": 69590 }, { "epoch": 1.1388366194878508, "grad_norm": 0.06334838271141052, "learning_rate": 7.801600282440929e-06, "loss": 0.0011, "step": 69600 }, { "epoch": 1.1390002454389265, "grad_norm": 0.013611716218292713, "learning_rate": 7.800811762374342e-06, "loss": 0.0016, "step": 69610 }, { "epoch": 1.1391638713900025, "grad_norm": 0.060349758714437485, "learning_rate": 7.800023140785061e-06, "loss": 0.0017, "step": 69620 }, { "epoch": 1.1393274973410783, "grad_norm": 0.11975428462028503, "learning_rate": 7.799234417701672e-06, "loss": 0.0023, "step": 69630 }, { "epoch": 1.139491123292154, "grad_norm": 0.10301206260919571, "learning_rate": 7.798445593152761e-06, "loss": 0.0024, "step": 69640 }, { "epoch": 1.13965474924323, "grad_norm": 0.07847728580236435, "learning_rate": 7.797656667166923e-06, "loss": 0.0036, "step": 69650 }, { "epoch": 1.1398183751943058, "grad_norm": 0.043589264154434204, "learning_rate": 7.796867639772755e-06, "loss": 0.0016, "step": 69660 }, { "epoch": 1.1399820011453816, "grad_norm": 0.23483453691005707, "learning_rate": 7.79607851099886e-06, "loss": 0.0029, "step": 69670 }, { "epoch": 1.1401456270964574, "grad_norm": 0.0905677080154419, "learning_rate": 7.795289280873836e-06, "loss": 0.0034, "step": 69680 }, { "epoch": 1.1403092530475334, "grad_norm": 0.1493551880121231, "learning_rate": 7.794499949426293e-06, "loss": 0.0036, "step": 69690 }, { "epoch": 1.1404728789986092, "grad_norm": 0.21221008896827698, "learning_rate": 7.793710516684847e-06, "loss": 0.0027, "step": 69700 }, { "epoch": 1.140636504949685, "grad_norm": 0.16252242028713226, "learning_rate": 7.792920982678107e-06, "loss": 0.0023, "step": 69710 }, { "epoch": 1.140800130900761, "grad_norm": 0.18081463873386383, "learning_rate": 7.792131347434695e-06, "loss": 0.0027, "step": 69720 }, { "epoch": 1.1409637568518367, "grad_norm": 0.1700911670923233, "learning_rate": 7.791341610983232e-06, "loss": 0.0018, "step": 69730 }, { "epoch": 1.1411273828029125, "grad_norm": 0.5025933980941772, "learning_rate": 7.790551773352344e-06, "loss": 0.0019, "step": 69740 }, { "epoch": 1.1412910087539885, "grad_norm": 0.09026019275188446, "learning_rate": 7.789761834570661e-06, "loss": 0.003, "step": 69750 }, { "epoch": 1.1414546347050643, "grad_norm": 0.31321465969085693, "learning_rate": 7.788971794666816e-06, "loss": 0.0024, "step": 69760 }, { "epoch": 1.14161826065614, "grad_norm": 0.475144624710083, "learning_rate": 7.788181653669448e-06, "loss": 0.0022, "step": 69770 }, { "epoch": 1.141781886607216, "grad_norm": 0.1627066433429718, "learning_rate": 7.787391411607195e-06, "loss": 0.0021, "step": 69780 }, { "epoch": 1.1419455125582918, "grad_norm": 0.10785926878452301, "learning_rate": 7.786601068508703e-06, "loss": 0.0025, "step": 69790 }, { "epoch": 1.1421091385093676, "grad_norm": 0.2096908539533615, "learning_rate": 7.78581062440262e-06, "loss": 0.0018, "step": 69800 }, { "epoch": 1.1422727644604433, "grad_norm": 0.1953137367963791, "learning_rate": 7.785020079317597e-06, "loss": 0.0028, "step": 69810 }, { "epoch": 1.1424363904115193, "grad_norm": 0.10830351710319519, "learning_rate": 7.784229433282291e-06, "loss": 0.0021, "step": 69820 }, { "epoch": 1.1426000163625951, "grad_norm": 0.3584763705730438, "learning_rate": 7.783438686325359e-06, "loss": 0.0024, "step": 69830 }, { "epoch": 1.142763642313671, "grad_norm": 0.18040603399276733, "learning_rate": 7.782647838475464e-06, "loss": 0.0019, "step": 69840 }, { "epoch": 1.1429272682647467, "grad_norm": 0.11206971853971481, "learning_rate": 7.781856889761274e-06, "loss": 0.0018, "step": 69850 }, { "epoch": 1.1430908942158227, "grad_norm": 0.21511170268058777, "learning_rate": 7.781065840211456e-06, "loss": 0.0019, "step": 69860 }, { "epoch": 1.1432545201668984, "grad_norm": 0.08971478790044785, "learning_rate": 7.780274689854687e-06, "loss": 0.0027, "step": 69870 }, { "epoch": 1.1434181461179742, "grad_norm": 0.11468818038702011, "learning_rate": 7.779483438719642e-06, "loss": 0.002, "step": 69880 }, { "epoch": 1.1435817720690502, "grad_norm": 0.09300553798675537, "learning_rate": 7.778692086835003e-06, "loss": 0.0013, "step": 69890 }, { "epoch": 1.143745398020126, "grad_norm": 0.005916991271078587, "learning_rate": 7.777900634229452e-06, "loss": 0.0023, "step": 69900 }, { "epoch": 1.1439090239712018, "grad_norm": 0.20844735205173492, "learning_rate": 7.777109080931682e-06, "loss": 0.0037, "step": 69910 }, { "epoch": 1.1440726499222778, "grad_norm": 0.17260238528251648, "learning_rate": 7.776317426970383e-06, "loss": 0.0023, "step": 69920 }, { "epoch": 1.1442362758733535, "grad_norm": 0.11354384571313858, "learning_rate": 7.775525672374247e-06, "loss": 0.0043, "step": 69930 }, { "epoch": 1.1443999018244293, "grad_norm": 0.04650479555130005, "learning_rate": 7.774733817171979e-06, "loss": 0.0013, "step": 69940 }, { "epoch": 1.1445635277755053, "grad_norm": 0.38329213857650757, "learning_rate": 7.773941861392277e-06, "loss": 0.0021, "step": 69950 }, { "epoch": 1.144727153726581, "grad_norm": 0.14534588158130646, "learning_rate": 7.773149805063851e-06, "loss": 0.0022, "step": 69960 }, { "epoch": 1.1448907796776568, "grad_norm": 0.22051934897899628, "learning_rate": 7.77235764821541e-06, "loss": 0.0035, "step": 69970 }, { "epoch": 1.1450544056287328, "grad_norm": 0.14047622680664062, "learning_rate": 7.771565390875665e-06, "loss": 0.002, "step": 69980 }, { "epoch": 1.1452180315798086, "grad_norm": 0.1916721612215042, "learning_rate": 7.77077303307334e-06, "loss": 0.0024, "step": 69990 }, { "epoch": 1.1453816575308844, "grad_norm": 0.08109626919031143, "learning_rate": 7.769980574837148e-06, "loss": 0.0013, "step": 70000 }, { "epoch": 1.1455452834819602, "grad_norm": 0.31952276825904846, "learning_rate": 7.76918801619582e-06, "loss": 0.0025, "step": 70010 }, { "epoch": 1.1457089094330362, "grad_norm": 0.06412523239850998, "learning_rate": 7.76839535717808e-06, "loss": 0.0016, "step": 70020 }, { "epoch": 1.145872535384112, "grad_norm": 0.05007540434598923, "learning_rate": 7.767602597812663e-06, "loss": 0.0019, "step": 70030 }, { "epoch": 1.1460361613351877, "grad_norm": 0.17421574890613556, "learning_rate": 7.766809738128303e-06, "loss": 0.0019, "step": 70040 }, { "epoch": 1.1461997872862635, "grad_norm": 0.012598136439919472, "learning_rate": 7.766016778153741e-06, "loss": 0.0017, "step": 70050 }, { "epoch": 1.1463634132373395, "grad_norm": 0.08941764384508133, "learning_rate": 7.765223717917719e-06, "loss": 0.0021, "step": 70060 }, { "epoch": 1.1465270391884153, "grad_norm": 0.1546737402677536, "learning_rate": 7.764430557448981e-06, "loss": 0.0026, "step": 70070 }, { "epoch": 1.146690665139491, "grad_norm": 0.2542679011821747, "learning_rate": 7.763637296776281e-06, "loss": 0.0045, "step": 70080 }, { "epoch": 1.146854291090567, "grad_norm": 0.03860015422105789, "learning_rate": 7.762843935928371e-06, "loss": 0.0031, "step": 70090 }, { "epoch": 1.1470179170416428, "grad_norm": 0.14806663990020752, "learning_rate": 7.762050474934009e-06, "loss": 0.0026, "step": 70100 }, { "epoch": 1.1471815429927186, "grad_norm": 0.613617479801178, "learning_rate": 7.761256913821955e-06, "loss": 0.0027, "step": 70110 }, { "epoch": 1.1473451689437946, "grad_norm": 0.11001269519329071, "learning_rate": 7.760463252620975e-06, "loss": 0.0029, "step": 70120 }, { "epoch": 1.1475087948948703, "grad_norm": 0.14136607944965363, "learning_rate": 7.759669491359837e-06, "loss": 0.0017, "step": 70130 }, { "epoch": 1.1476724208459461, "grad_norm": 0.2886188328266144, "learning_rate": 7.758875630067312e-06, "loss": 0.0027, "step": 70140 }, { "epoch": 1.1478360467970221, "grad_norm": 0.1937817633152008, "learning_rate": 7.758081668772176e-06, "loss": 0.0015, "step": 70150 }, { "epoch": 1.1479996727480979, "grad_norm": 0.116033636033535, "learning_rate": 7.75728760750321e-06, "loss": 0.0024, "step": 70160 }, { "epoch": 1.1481632986991737, "grad_norm": 0.14908021688461304, "learning_rate": 7.756493446289193e-06, "loss": 0.002, "step": 70170 }, { "epoch": 1.1483269246502494, "grad_norm": 0.13305851817131042, "learning_rate": 7.755699185158914e-06, "loss": 0.002, "step": 70180 }, { "epoch": 1.1484905506013254, "grad_norm": 0.2889268398284912, "learning_rate": 7.754904824141162e-06, "loss": 0.0026, "step": 70190 }, { "epoch": 1.1486541765524012, "grad_norm": 0.010579154826700687, "learning_rate": 7.754110363264735e-06, "loss": 0.0036, "step": 70200 }, { "epoch": 1.148817802503477, "grad_norm": 0.5627008676528931, "learning_rate": 7.753315802558424e-06, "loss": 0.0034, "step": 70210 }, { "epoch": 1.148981428454553, "grad_norm": 0.17042863368988037, "learning_rate": 7.752521142051033e-06, "loss": 0.0051, "step": 70220 }, { "epoch": 1.1491450544056288, "grad_norm": 0.04580502584576607, "learning_rate": 7.751726381771365e-06, "loss": 0.0028, "step": 70230 }, { "epoch": 1.1493086803567045, "grad_norm": 0.033817559480667114, "learning_rate": 7.75093152174823e-06, "loss": 0.0019, "step": 70240 }, { "epoch": 1.1494723063077803, "grad_norm": 0.19072742760181427, "learning_rate": 7.750136562010439e-06, "loss": 0.0028, "step": 70250 }, { "epoch": 1.1496359322588563, "grad_norm": 0.08436397463083267, "learning_rate": 7.749341502586807e-06, "loss": 0.0024, "step": 70260 }, { "epoch": 1.149799558209932, "grad_norm": 0.08733312040567398, "learning_rate": 7.748546343506156e-06, "loss": 0.0014, "step": 70270 }, { "epoch": 1.1499631841610078, "grad_norm": 0.23128721117973328, "learning_rate": 7.747751084797303e-06, "loss": 0.0021, "step": 70280 }, { "epoch": 1.1501268101120838, "grad_norm": 0.12886542081832886, "learning_rate": 7.746955726489079e-06, "loss": 0.0024, "step": 70290 }, { "epoch": 1.1502904360631596, "grad_norm": 0.08301883190870285, "learning_rate": 7.74616026861031e-06, "loss": 0.003, "step": 70300 }, { "epoch": 1.1504540620142354, "grad_norm": 0.07706377655267715, "learning_rate": 7.745364711189834e-06, "loss": 0.0019, "step": 70310 }, { "epoch": 1.1506176879653114, "grad_norm": 0.08896639198064804, "learning_rate": 7.744569054256483e-06, "loss": 0.002, "step": 70320 }, { "epoch": 1.1507813139163872, "grad_norm": 0.259365439414978, "learning_rate": 7.743773297839102e-06, "loss": 0.0038, "step": 70330 }, { "epoch": 1.150944939867463, "grad_norm": 0.050292663276195526, "learning_rate": 7.742977441966532e-06, "loss": 0.0027, "step": 70340 }, { "epoch": 1.151108565818539, "grad_norm": 0.09711932390928268, "learning_rate": 7.742181486667623e-06, "loss": 0.0033, "step": 70350 }, { "epoch": 1.1512721917696147, "grad_norm": 0.06031165271997452, "learning_rate": 7.741385431971227e-06, "loss": 0.0023, "step": 70360 }, { "epoch": 1.1514358177206905, "grad_norm": 0.21656079590320587, "learning_rate": 7.740589277906197e-06, "loss": 0.0021, "step": 70370 }, { "epoch": 1.1515994436717663, "grad_norm": 0.15291477739810944, "learning_rate": 7.739793024501393e-06, "loss": 0.0022, "step": 70380 }, { "epoch": 1.1517630696228423, "grad_norm": 0.12854357063770294, "learning_rate": 7.738996671785675e-06, "loss": 0.0014, "step": 70390 }, { "epoch": 1.151926695573918, "grad_norm": 0.03711121156811714, "learning_rate": 7.738200219787913e-06, "loss": 0.0031, "step": 70400 }, { "epoch": 1.1520903215249938, "grad_norm": 0.16242754459381104, "learning_rate": 7.73740366853697e-06, "loss": 0.0033, "step": 70410 }, { "epoch": 1.1522539474760698, "grad_norm": 0.12611828744411469, "learning_rate": 7.736607018061728e-06, "loss": 0.0016, "step": 70420 }, { "epoch": 1.1524175734271456, "grad_norm": 0.09919460117816925, "learning_rate": 7.735810268391058e-06, "loss": 0.0019, "step": 70430 }, { "epoch": 1.1525811993782213, "grad_norm": 0.03554411977529526, "learning_rate": 7.73501341955384e-06, "loss": 0.0022, "step": 70440 }, { "epoch": 1.1527448253292971, "grad_norm": 0.05773541331291199, "learning_rate": 7.734216471578958e-06, "loss": 0.0028, "step": 70450 }, { "epoch": 1.1529084512803731, "grad_norm": 0.1583910584449768, "learning_rate": 7.733419424495301e-06, "loss": 0.0023, "step": 70460 }, { "epoch": 1.1530720772314489, "grad_norm": 0.057962872087955475, "learning_rate": 7.73262227833176e-06, "loss": 0.0026, "step": 70470 }, { "epoch": 1.1532357031825247, "grad_norm": 0.030212948098778725, "learning_rate": 7.731825033117231e-06, "loss": 0.0016, "step": 70480 }, { "epoch": 1.1533993291336007, "grad_norm": 0.18173915147781372, "learning_rate": 7.731027688880608e-06, "loss": 0.0013, "step": 70490 }, { "epoch": 1.1535629550846764, "grad_norm": 0.039716802537441254, "learning_rate": 7.730230245650795e-06, "loss": 0.002, "step": 70500 }, { "epoch": 1.1537265810357522, "grad_norm": 0.03940742090344429, "learning_rate": 7.729432703456697e-06, "loss": 0.0023, "step": 70510 }, { "epoch": 1.1538902069868282, "grad_norm": 0.2081855982542038, "learning_rate": 7.728635062327225e-06, "loss": 0.0017, "step": 70520 }, { "epoch": 1.154053832937904, "grad_norm": 0.1273934543132782, "learning_rate": 7.72783732229129e-06, "loss": 0.0019, "step": 70530 }, { "epoch": 1.1542174588889798, "grad_norm": 0.13988907635211945, "learning_rate": 7.727039483377807e-06, "loss": 0.0024, "step": 70540 }, { "epoch": 1.1543810848400557, "grad_norm": 0.07988998293876648, "learning_rate": 7.726241545615698e-06, "loss": 0.002, "step": 70550 }, { "epoch": 1.1545447107911315, "grad_norm": 0.2179417610168457, "learning_rate": 7.725443509033885e-06, "loss": 0.0018, "step": 70560 }, { "epoch": 1.1547083367422073, "grad_norm": 0.12431062012910843, "learning_rate": 7.724645373661295e-06, "loss": 0.0017, "step": 70570 }, { "epoch": 1.154871962693283, "grad_norm": 0.035822976380586624, "learning_rate": 7.72384713952686e-06, "loss": 0.0024, "step": 70580 }, { "epoch": 1.155035588644359, "grad_norm": 0.04636373370885849, "learning_rate": 7.723048806659512e-06, "loss": 0.002, "step": 70590 }, { "epoch": 1.1551992145954348, "grad_norm": 0.1291734278202057, "learning_rate": 7.722250375088189e-06, "loss": 0.0028, "step": 70600 }, { "epoch": 1.1553628405465106, "grad_norm": 0.13300809264183044, "learning_rate": 7.721451844841834e-06, "loss": 0.0032, "step": 70610 }, { "epoch": 1.1555264664975866, "grad_norm": 0.33549442887306213, "learning_rate": 7.72065321594939e-06, "loss": 0.0031, "step": 70620 }, { "epoch": 1.1556900924486624, "grad_norm": 0.2980208992958069, "learning_rate": 7.719854488439804e-06, "loss": 0.0026, "step": 70630 }, { "epoch": 1.1558537183997382, "grad_norm": 0.07061202079057693, "learning_rate": 7.719055662342032e-06, "loss": 0.001, "step": 70640 }, { "epoch": 1.156017344350814, "grad_norm": 0.036851078271865845, "learning_rate": 7.718256737685026e-06, "loss": 0.0022, "step": 70650 }, { "epoch": 1.15618097030189, "grad_norm": 0.12601856887340546, "learning_rate": 7.717457714497745e-06, "loss": 0.0032, "step": 70660 }, { "epoch": 1.1563445962529657, "grad_norm": 0.13237760961055756, "learning_rate": 7.716658592809156e-06, "loss": 0.0014, "step": 70670 }, { "epoch": 1.1565082222040415, "grad_norm": 0.18847492337226868, "learning_rate": 7.715859372648223e-06, "loss": 0.0018, "step": 70680 }, { "epoch": 1.1566718481551175, "grad_norm": 0.0651775524020195, "learning_rate": 7.715060054043913e-06, "loss": 0.0025, "step": 70690 }, { "epoch": 1.1568354741061933, "grad_norm": 0.44074270129203796, "learning_rate": 7.714260637025201e-06, "loss": 0.0033, "step": 70700 }, { "epoch": 1.156999100057269, "grad_norm": 0.19722644984722137, "learning_rate": 7.713461121621065e-06, "loss": 0.0048, "step": 70710 }, { "epoch": 1.157162726008345, "grad_norm": 0.019424978643655777, "learning_rate": 7.712661507860486e-06, "loss": 0.0016, "step": 70720 }, { "epoch": 1.1573263519594208, "grad_norm": 0.33553346991539, "learning_rate": 7.711861795772446e-06, "loss": 0.0026, "step": 70730 }, { "epoch": 1.1574899779104966, "grad_norm": 0.027482450008392334, "learning_rate": 7.711061985385935e-06, "loss": 0.0035, "step": 70740 }, { "epoch": 1.1576536038615726, "grad_norm": 0.07315513491630554, "learning_rate": 7.71026207672994e-06, "loss": 0.0021, "step": 70750 }, { "epoch": 1.1578172298126483, "grad_norm": 0.11577493697404861, "learning_rate": 7.709462069833461e-06, "loss": 0.0048, "step": 70760 }, { "epoch": 1.1579808557637241, "grad_norm": 0.025808915495872498, "learning_rate": 7.708661964725495e-06, "loss": 0.0025, "step": 70770 }, { "epoch": 1.1581444817148, "grad_norm": 0.2006557583808899, "learning_rate": 7.707861761435041e-06, "loss": 0.0031, "step": 70780 }, { "epoch": 1.1583081076658759, "grad_norm": 0.17072750627994537, "learning_rate": 7.707061459991106e-06, "loss": 0.0038, "step": 70790 }, { "epoch": 1.1584717336169517, "grad_norm": 0.21064308285713196, "learning_rate": 7.7062610604227e-06, "loss": 0.0021, "step": 70800 }, { "epoch": 1.1586353595680274, "grad_norm": 0.23670190572738647, "learning_rate": 7.705460562758836e-06, "loss": 0.0026, "step": 70810 }, { "epoch": 1.1587989855191032, "grad_norm": 0.12288448214530945, "learning_rate": 7.704659967028527e-06, "loss": 0.0015, "step": 70820 }, { "epoch": 1.1589626114701792, "grad_norm": 0.07719285041093826, "learning_rate": 7.703859273260796e-06, "loss": 0.0014, "step": 70830 }, { "epoch": 1.159126237421255, "grad_norm": 0.14321456849575043, "learning_rate": 7.703058481484665e-06, "loss": 0.0015, "step": 70840 }, { "epoch": 1.1592898633723308, "grad_norm": 0.09547753632068634, "learning_rate": 7.702257591729161e-06, "loss": 0.0018, "step": 70850 }, { "epoch": 1.1594534893234067, "grad_norm": 0.14161425828933716, "learning_rate": 7.701456604023312e-06, "loss": 0.0014, "step": 70860 }, { "epoch": 1.1596171152744825, "grad_norm": 0.15306857228279114, "learning_rate": 7.700655518396157e-06, "loss": 0.0029, "step": 70870 }, { "epoch": 1.1597807412255583, "grad_norm": 0.08018869906663895, "learning_rate": 7.699854334876727e-06, "loss": 0.0024, "step": 70880 }, { "epoch": 1.1599443671766343, "grad_norm": 0.020031031221151352, "learning_rate": 7.699053053494068e-06, "loss": 0.0024, "step": 70890 }, { "epoch": 1.16010799312771, "grad_norm": 0.07199593633413315, "learning_rate": 7.698251674277221e-06, "loss": 0.0032, "step": 70900 }, { "epoch": 1.1602716190787858, "grad_norm": 0.04047798365354538, "learning_rate": 7.697450197255239e-06, "loss": 0.0013, "step": 70910 }, { "epoch": 1.1604352450298618, "grad_norm": 0.357229083776474, "learning_rate": 7.69664862245717e-06, "loss": 0.0021, "step": 70920 }, { "epoch": 1.1605988709809376, "grad_norm": 0.05563312768936157, "learning_rate": 7.695846949912067e-06, "loss": 0.0038, "step": 70930 }, { "epoch": 1.1607624969320134, "grad_norm": 0.12651728093624115, "learning_rate": 7.695045179648995e-06, "loss": 0.0034, "step": 70940 }, { "epoch": 1.1609261228830892, "grad_norm": 0.13400737941265106, "learning_rate": 7.694243311697009e-06, "loss": 0.0019, "step": 70950 }, { "epoch": 1.1610897488341652, "grad_norm": 0.057702917605638504, "learning_rate": 7.693441346085179e-06, "loss": 0.002, "step": 70960 }, { "epoch": 1.161253374785241, "grad_norm": 0.1612233966588974, "learning_rate": 7.692639282842575e-06, "loss": 0.0027, "step": 70970 }, { "epoch": 1.1614170007363167, "grad_norm": 0.1036149188876152, "learning_rate": 7.691837121998268e-06, "loss": 0.0022, "step": 70980 }, { "epoch": 1.1615806266873927, "grad_norm": 0.04147518053650856, "learning_rate": 7.691034863581336e-06, "loss": 0.0016, "step": 70990 }, { "epoch": 1.1617442526384685, "grad_norm": 0.18594759702682495, "learning_rate": 7.690232507620859e-06, "loss": 0.0016, "step": 71000 }, { "epoch": 1.1619078785895443, "grad_norm": 0.04953709617257118, "learning_rate": 7.689430054145917e-06, "loss": 0.0019, "step": 71010 }, { "epoch": 1.16207150454062, "grad_norm": 0.06181958690285683, "learning_rate": 7.6886275031856e-06, "loss": 0.0022, "step": 71020 }, { "epoch": 1.162235130491696, "grad_norm": 0.10076411068439484, "learning_rate": 7.687824854768998e-06, "loss": 0.002, "step": 71030 }, { "epoch": 1.1623987564427718, "grad_norm": 0.13595019280910492, "learning_rate": 7.687022108925204e-06, "loss": 0.0013, "step": 71040 }, { "epoch": 1.1625623823938476, "grad_norm": 0.1439180225133896, "learning_rate": 7.686219265683319e-06, "loss": 0.0018, "step": 71050 }, { "epoch": 1.1627260083449236, "grad_norm": 0.09472621232271194, "learning_rate": 7.685416325072439e-06, "loss": 0.0013, "step": 71060 }, { "epoch": 1.1628896342959993, "grad_norm": 0.2254975587129593, "learning_rate": 7.684613287121673e-06, "loss": 0.0024, "step": 71070 }, { "epoch": 1.1630532602470751, "grad_norm": 0.07248338311910629, "learning_rate": 7.683810151860129e-06, "loss": 0.0026, "step": 71080 }, { "epoch": 1.1632168861981511, "grad_norm": 0.06300617754459381, "learning_rate": 7.683006919316915e-06, "loss": 0.0024, "step": 71090 }, { "epoch": 1.1633805121492269, "grad_norm": 0.19227281212806702, "learning_rate": 7.682203589521149e-06, "loss": 0.0027, "step": 71100 }, { "epoch": 1.1635441381003027, "grad_norm": 0.08374682813882828, "learning_rate": 7.68140016250195e-06, "loss": 0.0028, "step": 71110 }, { "epoch": 1.1637077640513787, "grad_norm": 0.27543559670448303, "learning_rate": 7.68059663828844e-06, "loss": 0.004, "step": 71120 }, { "epoch": 1.1638713900024544, "grad_norm": 0.06532328575849533, "learning_rate": 7.679793016909745e-06, "loss": 0.0036, "step": 71130 }, { "epoch": 1.1640350159535302, "grad_norm": 0.15541760623455048, "learning_rate": 7.678989298394991e-06, "loss": 0.0018, "step": 71140 }, { "epoch": 1.164198641904606, "grad_norm": 0.08065922558307648, "learning_rate": 7.678185482773317e-06, "loss": 0.0021, "step": 71150 }, { "epoch": 1.164362267855682, "grad_norm": 0.11972736567258835, "learning_rate": 7.677381570073855e-06, "loss": 0.0037, "step": 71160 }, { "epoch": 1.1645258938067578, "grad_norm": 0.13993096351623535, "learning_rate": 7.676577560325745e-06, "loss": 0.0015, "step": 71170 }, { "epoch": 1.1646895197578335, "grad_norm": 0.19419540464878082, "learning_rate": 7.675773453558133e-06, "loss": 0.0017, "step": 71180 }, { "epoch": 1.1648531457089095, "grad_norm": 0.09884954988956451, "learning_rate": 7.674969249800162e-06, "loss": 0.0025, "step": 71190 }, { "epoch": 1.1650167716599853, "grad_norm": 0.11203473061323166, "learning_rate": 7.674164949080986e-06, "loss": 0.0031, "step": 71200 }, { "epoch": 1.165180397611061, "grad_norm": 0.1971210390329361, "learning_rate": 7.673360551429759e-06, "loss": 0.002, "step": 71210 }, { "epoch": 1.1653440235621368, "grad_norm": 0.050767164677381516, "learning_rate": 7.672556056875638e-06, "loss": 0.0015, "step": 71220 }, { "epoch": 1.1655076495132128, "grad_norm": 0.6226209402084351, "learning_rate": 7.67175146544778e-06, "loss": 0.0043, "step": 71230 }, { "epoch": 1.1656712754642886, "grad_norm": 0.058105431497097015, "learning_rate": 7.670946777175354e-06, "loss": 0.0015, "step": 71240 }, { "epoch": 1.1658349014153644, "grad_norm": 0.15004487335681915, "learning_rate": 7.670141992087528e-06, "loss": 0.0024, "step": 71250 }, { "epoch": 1.1659985273664404, "grad_norm": 0.0597807839512825, "learning_rate": 7.66933711021347e-06, "loss": 0.0023, "step": 71260 }, { "epoch": 1.1661621533175162, "grad_norm": 0.21553616225719452, "learning_rate": 7.66853213158236e-06, "loss": 0.0029, "step": 71270 }, { "epoch": 1.166325779268592, "grad_norm": 0.061164747923612595, "learning_rate": 7.667727056223374e-06, "loss": 0.001, "step": 71280 }, { "epoch": 1.166489405219668, "grad_norm": 0.08249186724424362, "learning_rate": 7.666921884165693e-06, "loss": 0.0016, "step": 71290 }, { "epoch": 1.1666530311707437, "grad_norm": 0.05865321308374405, "learning_rate": 7.666116615438501e-06, "loss": 0.0011, "step": 71300 }, { "epoch": 1.1668166571218195, "grad_norm": 0.19730877876281738, "learning_rate": 7.665311250070992e-06, "loss": 0.0024, "step": 71310 }, { "epoch": 1.1669802830728955, "grad_norm": 0.11595425754785538, "learning_rate": 7.664505788092356e-06, "loss": 0.0016, "step": 71320 }, { "epoch": 1.1671439090239712, "grad_norm": 0.12209577113389969, "learning_rate": 7.66370022953179e-06, "loss": 0.0026, "step": 71330 }, { "epoch": 1.167307534975047, "grad_norm": 0.2315395176410675, "learning_rate": 7.662894574418491e-06, "loss": 0.0019, "step": 71340 }, { "epoch": 1.1674711609261228, "grad_norm": 0.11272265017032623, "learning_rate": 7.662088822781663e-06, "loss": 0.0016, "step": 71350 }, { "epoch": 1.1676347868771988, "grad_norm": 0.3565988838672638, "learning_rate": 7.661282974650515e-06, "loss": 0.0037, "step": 71360 }, { "epoch": 1.1677984128282746, "grad_norm": 0.22530940175056458, "learning_rate": 7.660477030054256e-06, "loss": 0.0023, "step": 71370 }, { "epoch": 1.1679620387793503, "grad_norm": 0.13648036122322083, "learning_rate": 7.659670989022098e-06, "loss": 0.0021, "step": 71380 }, { "epoch": 1.1681256647304263, "grad_norm": 0.03399841487407684, "learning_rate": 7.65886485158326e-06, "loss": 0.001, "step": 71390 }, { "epoch": 1.1682892906815021, "grad_norm": 0.05450304225087166, "learning_rate": 7.658058617766958e-06, "loss": 0.0018, "step": 71400 }, { "epoch": 1.1684529166325779, "grad_norm": 0.16956022381782532, "learning_rate": 7.657252287602423e-06, "loss": 0.0023, "step": 71410 }, { "epoch": 1.1686165425836537, "grad_norm": 0.06380729377269745, "learning_rate": 7.656445861118878e-06, "loss": 0.0035, "step": 71420 }, { "epoch": 1.1687801685347297, "grad_norm": 0.07962498068809509, "learning_rate": 7.655639338345557e-06, "loss": 0.0017, "step": 71430 }, { "epoch": 1.1689437944858054, "grad_norm": 0.13164030015468597, "learning_rate": 7.654832719311691e-06, "loss": 0.0034, "step": 71440 }, { "epoch": 1.1691074204368812, "grad_norm": 0.05280362069606781, "learning_rate": 7.654026004046518e-06, "loss": 0.0013, "step": 71450 }, { "epoch": 1.1692710463879572, "grad_norm": 0.36253800988197327, "learning_rate": 7.653219192579283e-06, "loss": 0.0028, "step": 71460 }, { "epoch": 1.169434672339033, "grad_norm": 0.07338973134756088, "learning_rate": 7.65241228493923e-06, "loss": 0.0035, "step": 71470 }, { "epoch": 1.1695982982901088, "grad_norm": 0.0691860020160675, "learning_rate": 7.651605281155606e-06, "loss": 0.0014, "step": 71480 }, { "epoch": 1.1697619242411847, "grad_norm": 0.1511940062046051, "learning_rate": 7.650798181257662e-06, "loss": 0.0023, "step": 71490 }, { "epoch": 1.1699255501922605, "grad_norm": 0.39839473366737366, "learning_rate": 7.649990985274657e-06, "loss": 0.0022, "step": 71500 }, { "epoch": 1.1700891761433363, "grad_norm": 0.23905257880687714, "learning_rate": 7.649183693235847e-06, "loss": 0.0023, "step": 71510 }, { "epoch": 1.1702528020944123, "grad_norm": 0.23282641172409058, "learning_rate": 7.648376305170494e-06, "loss": 0.0058, "step": 71520 }, { "epoch": 1.170416428045488, "grad_norm": 0.24495255947113037, "learning_rate": 7.647568821107868e-06, "loss": 0.0022, "step": 71530 }, { "epoch": 1.1705800539965638, "grad_norm": 0.2635161280632019, "learning_rate": 7.646761241077233e-06, "loss": 0.0031, "step": 71540 }, { "epoch": 1.1707436799476396, "grad_norm": 0.10612886399030685, "learning_rate": 7.645953565107864e-06, "loss": 0.0021, "step": 71550 }, { "epoch": 1.1709073058987156, "grad_norm": 0.13540451228618622, "learning_rate": 7.645145793229039e-06, "loss": 0.0019, "step": 71560 }, { "epoch": 1.1710709318497914, "grad_norm": 0.07193749397993088, "learning_rate": 7.644337925470036e-06, "loss": 0.0024, "step": 71570 }, { "epoch": 1.1712345578008672, "grad_norm": 0.03849054127931595, "learning_rate": 7.643529961860139e-06, "loss": 0.0027, "step": 71580 }, { "epoch": 1.171398183751943, "grad_norm": 0.041433919221162796, "learning_rate": 7.642721902428634e-06, "loss": 0.0037, "step": 71590 }, { "epoch": 1.171561809703019, "grad_norm": 0.1281454712152481, "learning_rate": 7.641913747204811e-06, "loss": 0.0018, "step": 71600 }, { "epoch": 1.1717254356540947, "grad_norm": 0.08238737285137177, "learning_rate": 7.641105496217966e-06, "loss": 0.0028, "step": 71610 }, { "epoch": 1.1718890616051705, "grad_norm": 0.06016228348016739, "learning_rate": 7.640297149497396e-06, "loss": 0.0017, "step": 71620 }, { "epoch": 1.1720526875562465, "grad_norm": 0.5093915462493896, "learning_rate": 7.639488707072399e-06, "loss": 0.0026, "step": 71630 }, { "epoch": 1.1722163135073222, "grad_norm": 0.21576476097106934, "learning_rate": 7.638680168972277e-06, "loss": 0.0052, "step": 71640 }, { "epoch": 1.172379939458398, "grad_norm": 0.12510138750076294, "learning_rate": 7.637871535226344e-06, "loss": 0.0024, "step": 71650 }, { "epoch": 1.172543565409474, "grad_norm": 0.03411099314689636, "learning_rate": 7.637062805863906e-06, "loss": 0.0016, "step": 71660 }, { "epoch": 1.1727071913605498, "grad_norm": 0.25399377942085266, "learning_rate": 7.636253980914278e-06, "loss": 0.0026, "step": 71670 }, { "epoch": 1.1728708173116256, "grad_norm": 0.14685702323913574, "learning_rate": 7.635445060406783e-06, "loss": 0.0016, "step": 71680 }, { "epoch": 1.1730344432627016, "grad_norm": 0.05005675554275513, "learning_rate": 7.634636044370738e-06, "loss": 0.0017, "step": 71690 }, { "epoch": 1.1731980692137773, "grad_norm": 0.2060636579990387, "learning_rate": 7.633826932835466e-06, "loss": 0.0027, "step": 71700 }, { "epoch": 1.1733616951648531, "grad_norm": 0.22579315304756165, "learning_rate": 7.633017725830301e-06, "loss": 0.0032, "step": 71710 }, { "epoch": 1.173525321115929, "grad_norm": 0.014119814150035381, "learning_rate": 7.63220842338457e-06, "loss": 0.0025, "step": 71720 }, { "epoch": 1.1736889470670049, "grad_norm": 0.0524456650018692, "learning_rate": 7.63139902552761e-06, "loss": 0.0014, "step": 71730 }, { "epoch": 1.1738525730180807, "grad_norm": 0.1027493104338646, "learning_rate": 7.63058953228876e-06, "loss": 0.0027, "step": 71740 }, { "epoch": 1.1740161989691564, "grad_norm": 0.18032707273960114, "learning_rate": 7.629779943697362e-06, "loss": 0.0018, "step": 71750 }, { "epoch": 1.1741798249202324, "grad_norm": 0.07175762951374054, "learning_rate": 7.628970259782761e-06, "loss": 0.0028, "step": 71760 }, { "epoch": 1.1743434508713082, "grad_norm": 0.21787424385547638, "learning_rate": 7.628160480574307e-06, "loss": 0.0025, "step": 71770 }, { "epoch": 1.174507076822384, "grad_norm": 0.3073088526725769, "learning_rate": 7.627350606101353e-06, "loss": 0.0022, "step": 71780 }, { "epoch": 1.1746707027734598, "grad_norm": 0.06641089916229248, "learning_rate": 7.626540636393254e-06, "loss": 0.0025, "step": 71790 }, { "epoch": 1.1748343287245357, "grad_norm": 0.1391206681728363, "learning_rate": 7.625730571479369e-06, "loss": 0.003, "step": 71800 }, { "epoch": 1.1749979546756115, "grad_norm": 0.10353788733482361, "learning_rate": 7.624920411389061e-06, "loss": 0.0037, "step": 71810 }, { "epoch": 1.1751615806266873, "grad_norm": 0.07975750416517258, "learning_rate": 7.624110156151696e-06, "loss": 0.0024, "step": 71820 }, { "epoch": 1.1753252065777633, "grad_norm": 0.0043273488990962505, "learning_rate": 7.6232998057966476e-06, "loss": 0.0049, "step": 71830 }, { "epoch": 1.175488832528839, "grad_norm": 0.14998240768909454, "learning_rate": 7.6224893603532845e-06, "loss": 0.0026, "step": 71840 }, { "epoch": 1.1756524584799148, "grad_norm": 0.10849238187074661, "learning_rate": 7.621678819850986e-06, "loss": 0.0021, "step": 71850 }, { "epoch": 1.1758160844309908, "grad_norm": 0.05144664645195007, "learning_rate": 7.62086818431913e-06, "loss": 0.0022, "step": 71860 }, { "epoch": 1.1759797103820666, "grad_norm": 0.09146712720394135, "learning_rate": 7.620057453787103e-06, "loss": 0.0027, "step": 71870 }, { "epoch": 1.1761433363331424, "grad_norm": 0.16517168283462524, "learning_rate": 7.619246628284289e-06, "loss": 0.003, "step": 71880 }, { "epoch": 1.1763069622842184, "grad_norm": 0.2248610556125641, "learning_rate": 7.618435707840082e-06, "loss": 0.004, "step": 71890 }, { "epoch": 1.1764705882352942, "grad_norm": 0.0688471719622612, "learning_rate": 7.617624692483872e-06, "loss": 0.0037, "step": 71900 }, { "epoch": 1.17663421418637, "grad_norm": 0.0887303575873375, "learning_rate": 7.616813582245058e-06, "loss": 0.0023, "step": 71910 }, { "epoch": 1.1767978401374457, "grad_norm": 0.24531736969947815, "learning_rate": 7.616002377153042e-06, "loss": 0.0031, "step": 71920 }, { "epoch": 1.1769614660885217, "grad_norm": 0.026818662881851196, "learning_rate": 7.615191077237227e-06, "loss": 0.0022, "step": 71930 }, { "epoch": 1.1771250920395975, "grad_norm": 0.008812198415398598, "learning_rate": 7.61437968252702e-06, "loss": 0.003, "step": 71940 }, { "epoch": 1.1772887179906733, "grad_norm": 0.024305429309606552, "learning_rate": 7.613568193051834e-06, "loss": 0.0015, "step": 71950 }, { "epoch": 1.1774523439417492, "grad_norm": 0.08036768436431885, "learning_rate": 7.612756608841081e-06, "loss": 0.0022, "step": 71960 }, { "epoch": 1.177615969892825, "grad_norm": 0.1106012836098671, "learning_rate": 7.611944929924182e-06, "loss": 0.0028, "step": 71970 }, { "epoch": 1.1777795958439008, "grad_norm": 0.0743735283613205, "learning_rate": 7.611133156330558e-06, "loss": 0.0016, "step": 71980 }, { "epoch": 1.1779432217949766, "grad_norm": 0.07157034426927567, "learning_rate": 7.610321288089631e-06, "loss": 0.0019, "step": 71990 }, { "epoch": 1.1781068477460526, "grad_norm": 0.10042784363031387, "learning_rate": 7.609509325230831e-06, "loss": 0.0023, "step": 72000 }, { "epoch": 1.1782704736971283, "grad_norm": 0.12921467423439026, "learning_rate": 7.608697267783591e-06, "loss": 0.0029, "step": 72010 }, { "epoch": 1.1784340996482041, "grad_norm": 0.16908426582813263, "learning_rate": 7.607885115777342e-06, "loss": 0.0021, "step": 72020 }, { "epoch": 1.17859772559928, "grad_norm": 0.10650560259819031, "learning_rate": 7.60707286924153e-06, "loss": 0.0018, "step": 72030 }, { "epoch": 1.1787613515503559, "grad_norm": 0.1354352831840515, "learning_rate": 7.606260528205589e-06, "loss": 0.0017, "step": 72040 }, { "epoch": 1.1789249775014317, "grad_norm": 0.019955743104219437, "learning_rate": 7.6054480926989695e-06, "loss": 0.002, "step": 72050 }, { "epoch": 1.1790886034525077, "grad_norm": 0.10579939931631088, "learning_rate": 7.604635562751118e-06, "loss": 0.0028, "step": 72060 }, { "epoch": 1.1792522294035834, "grad_norm": 0.05810854583978653, "learning_rate": 7.6038229383914875e-06, "loss": 0.0024, "step": 72070 }, { "epoch": 1.1794158553546592, "grad_norm": 0.19434931874275208, "learning_rate": 7.6030102196495335e-06, "loss": 0.0053, "step": 72080 }, { "epoch": 1.1795794813057352, "grad_norm": 0.030727526172995567, "learning_rate": 7.602197406554714e-06, "loss": 0.0021, "step": 72090 }, { "epoch": 1.179743107256811, "grad_norm": 0.2397388070821762, "learning_rate": 7.601384499136495e-06, "loss": 0.0028, "step": 72100 }, { "epoch": 1.1799067332078867, "grad_norm": 0.05750928446650505, "learning_rate": 7.60057149742434e-06, "loss": 0.0019, "step": 72110 }, { "epoch": 1.1800703591589625, "grad_norm": 0.23812896013259888, "learning_rate": 7.599758401447717e-06, "loss": 0.0031, "step": 72120 }, { "epoch": 1.1802339851100385, "grad_norm": 0.1328461468219757, "learning_rate": 7.598945211236102e-06, "loss": 0.0021, "step": 72130 }, { "epoch": 1.1803976110611143, "grad_norm": 0.1501968502998352, "learning_rate": 7.598131926818968e-06, "loss": 0.0014, "step": 72140 }, { "epoch": 1.18056123701219, "grad_norm": 0.03078266978263855, "learning_rate": 7.597318548225796e-06, "loss": 0.0018, "step": 72150 }, { "epoch": 1.180724862963266, "grad_norm": 0.16636280715465546, "learning_rate": 7.596505075486069e-06, "loss": 0.0041, "step": 72160 }, { "epoch": 1.1808884889143418, "grad_norm": 0.09739551693201065, "learning_rate": 7.595691508629275e-06, "loss": 0.0018, "step": 72170 }, { "epoch": 1.1810521148654176, "grad_norm": 0.046920355409383774, "learning_rate": 7.594877847684903e-06, "loss": 0.002, "step": 72180 }, { "epoch": 1.1812157408164934, "grad_norm": 0.3047250509262085, "learning_rate": 7.594064092682443e-06, "loss": 0.002, "step": 72190 }, { "epoch": 1.1813793667675694, "grad_norm": 0.11077884584665298, "learning_rate": 7.593250243651395e-06, "loss": 0.0027, "step": 72200 }, { "epoch": 1.1815429927186452, "grad_norm": 0.05937687307596207, "learning_rate": 7.59243630062126e-06, "loss": 0.0022, "step": 72210 }, { "epoch": 1.181706618669721, "grad_norm": 0.04560388624668121, "learning_rate": 7.591622263621539e-06, "loss": 0.0021, "step": 72220 }, { "epoch": 1.181870244620797, "grad_norm": 0.047350239008665085, "learning_rate": 7.590808132681739e-06, "loss": 0.002, "step": 72230 }, { "epoch": 1.1820338705718727, "grad_norm": 0.07642187923192978, "learning_rate": 7.589993907831372e-06, "loss": 0.0026, "step": 72240 }, { "epoch": 1.1821974965229485, "grad_norm": 0.0686226636171341, "learning_rate": 7.5891795890999495e-06, "loss": 0.0015, "step": 72250 }, { "epoch": 1.1823611224740245, "grad_norm": 0.03936338424682617, "learning_rate": 7.588365176516992e-06, "loss": 0.002, "step": 72260 }, { "epoch": 1.1825247484251002, "grad_norm": 0.1994786560535431, "learning_rate": 7.587550670112016e-06, "loss": 0.0031, "step": 72270 }, { "epoch": 1.182688374376176, "grad_norm": 0.1494719237089157, "learning_rate": 7.586736069914549e-06, "loss": 0.0021, "step": 72280 }, { "epoch": 1.182852000327252, "grad_norm": 0.0997762605547905, "learning_rate": 7.585921375954115e-06, "loss": 0.0012, "step": 72290 }, { "epoch": 1.1830156262783278, "grad_norm": 0.1472926288843155, "learning_rate": 7.585106588260245e-06, "loss": 0.0021, "step": 72300 }, { "epoch": 1.1831792522294036, "grad_norm": 0.06689479947090149, "learning_rate": 7.584291706862476e-06, "loss": 0.0011, "step": 72310 }, { "epoch": 1.1833428781804793, "grad_norm": 0.06242343783378601, "learning_rate": 7.583476731790343e-06, "loss": 0.0021, "step": 72320 }, { "epoch": 1.1835065041315553, "grad_norm": 0.13247735798358917, "learning_rate": 7.582661663073389e-06, "loss": 0.0036, "step": 72330 }, { "epoch": 1.1836701300826311, "grad_norm": 0.1880936622619629, "learning_rate": 7.581846500741157e-06, "loss": 0.0018, "step": 72340 }, { "epoch": 1.1838337560337069, "grad_norm": 0.1525871455669403, "learning_rate": 7.581031244823193e-06, "loss": 0.0021, "step": 72350 }, { "epoch": 1.1839973819847829, "grad_norm": 0.08362074941396713, "learning_rate": 7.58021589534905e-06, "loss": 0.0016, "step": 72360 }, { "epoch": 1.1841610079358587, "grad_norm": 0.03664948791265488, "learning_rate": 7.579400452348285e-06, "loss": 0.0018, "step": 72370 }, { "epoch": 1.1843246338869344, "grad_norm": 0.08488146215677261, "learning_rate": 7.578584915850448e-06, "loss": 0.0013, "step": 72380 }, { "epoch": 1.1844882598380102, "grad_norm": 0.10582694411277771, "learning_rate": 7.57776928588511e-06, "loss": 0.002, "step": 72390 }, { "epoch": 1.1846518857890862, "grad_norm": 0.05558272823691368, "learning_rate": 7.576953562481828e-06, "loss": 0.002, "step": 72400 }, { "epoch": 1.184815511740162, "grad_norm": 0.13190887868404388, "learning_rate": 7.576137745670174e-06, "loss": 0.0026, "step": 72410 }, { "epoch": 1.1849791376912377, "grad_norm": 0.1701160967350006, "learning_rate": 7.575321835479719e-06, "loss": 0.0024, "step": 72420 }, { "epoch": 1.1851427636423137, "grad_norm": 0.12425373494625092, "learning_rate": 7.574505831940037e-06, "loss": 0.0011, "step": 72430 }, { "epoch": 1.1853063895933895, "grad_norm": 0.10949856787919998, "learning_rate": 7.573689735080705e-06, "loss": 0.0017, "step": 72440 }, { "epoch": 1.1854700155444653, "grad_norm": 0.08130192756652832, "learning_rate": 7.572873544931305e-06, "loss": 0.0035, "step": 72450 }, { "epoch": 1.1856336414955413, "grad_norm": 0.13593576848506927, "learning_rate": 7.572057261521424e-06, "loss": 0.0014, "step": 72460 }, { "epoch": 1.185797267446617, "grad_norm": 0.10031068325042725, "learning_rate": 7.571240884880649e-06, "loss": 0.0013, "step": 72470 }, { "epoch": 1.1859608933976928, "grad_norm": 0.16531318426132202, "learning_rate": 7.570424415038573e-06, "loss": 0.0029, "step": 72480 }, { "epoch": 1.1861245193487688, "grad_norm": 0.23652106523513794, "learning_rate": 7.56960785202479e-06, "loss": 0.0018, "step": 72490 }, { "epoch": 1.1862881452998446, "grad_norm": 0.09523310512304306, "learning_rate": 7.568791195868896e-06, "loss": 0.0015, "step": 72500 }, { "epoch": 1.1864517712509204, "grad_norm": 0.2035551369190216, "learning_rate": 7.567974446600496e-06, "loss": 0.0018, "step": 72510 }, { "epoch": 1.1866153972019962, "grad_norm": 0.05382750555872917, "learning_rate": 7.567157604249194e-06, "loss": 0.0014, "step": 72520 }, { "epoch": 1.1867790231530722, "grad_norm": 0.09363462030887604, "learning_rate": 7.566340668844598e-06, "loss": 0.0018, "step": 72530 }, { "epoch": 1.186942649104148, "grad_norm": 0.0806182399392128, "learning_rate": 7.5655236404163235e-06, "loss": 0.0017, "step": 72540 }, { "epoch": 1.1871062750552237, "grad_norm": 0.06348676234483719, "learning_rate": 7.56470651899398e-06, "loss": 0.0029, "step": 72550 }, { "epoch": 1.1872699010062995, "grad_norm": 0.08105562627315521, "learning_rate": 7.563889304607193e-06, "loss": 0.0022, "step": 72560 }, { "epoch": 1.1874335269573755, "grad_norm": 0.1868240237236023, "learning_rate": 7.5630719972855784e-06, "loss": 0.0017, "step": 72570 }, { "epoch": 1.1875971529084512, "grad_norm": 0.029343510046601295, "learning_rate": 7.562254597058764e-06, "loss": 0.0025, "step": 72580 }, { "epoch": 1.187760778859527, "grad_norm": 0.06690794974565506, "learning_rate": 7.561437103956379e-06, "loss": 0.0032, "step": 72590 }, { "epoch": 1.187924404810603, "grad_norm": 0.07641276717185974, "learning_rate": 7.560619518008054e-06, "loss": 0.0017, "step": 72600 }, { "epoch": 1.1880880307616788, "grad_norm": 0.24838106334209442, "learning_rate": 7.559801839243428e-06, "loss": 0.0031, "step": 72610 }, { "epoch": 1.1882516567127546, "grad_norm": 0.14931818842887878, "learning_rate": 7.558984067692137e-06, "loss": 0.0019, "step": 72620 }, { "epoch": 1.1884152826638306, "grad_norm": 0.013912595808506012, "learning_rate": 7.558166203383823e-06, "loss": 0.0014, "step": 72630 }, { "epoch": 1.1885789086149063, "grad_norm": 0.19385413825511932, "learning_rate": 7.557348246348133e-06, "loss": 0.0025, "step": 72640 }, { "epoch": 1.1887425345659821, "grad_norm": 0.17417462170124054, "learning_rate": 7.556530196614717e-06, "loss": 0.0028, "step": 72650 }, { "epoch": 1.188906160517058, "grad_norm": 0.008293598890304565, "learning_rate": 7.555712054213222e-06, "loss": 0.002, "step": 72660 }, { "epoch": 1.1890697864681339, "grad_norm": 0.1257864534854889, "learning_rate": 7.55489381917331e-06, "loss": 0.0018, "step": 72670 }, { "epoch": 1.1892334124192097, "grad_norm": 0.02947196178138256, "learning_rate": 7.55407549152464e-06, "loss": 0.0019, "step": 72680 }, { "epoch": 1.1893970383702854, "grad_norm": 0.12592129409313202, "learning_rate": 7.553257071296869e-06, "loss": 0.0021, "step": 72690 }, { "epoch": 1.1895606643213614, "grad_norm": 0.25786641240119934, "learning_rate": 7.552438558519666e-06, "loss": 0.0024, "step": 72700 }, { "epoch": 1.1897242902724372, "grad_norm": 0.07107283920049667, "learning_rate": 7.5516199532227e-06, "loss": 0.0025, "step": 72710 }, { "epoch": 1.189887916223513, "grad_norm": 0.12497085332870483, "learning_rate": 7.550801255435642e-06, "loss": 0.0033, "step": 72720 }, { "epoch": 1.190051542174589, "grad_norm": 0.22226111590862274, "learning_rate": 7.5499824651881726e-06, "loss": 0.0025, "step": 72730 }, { "epoch": 1.1902151681256647, "grad_norm": 0.1252511888742447, "learning_rate": 7.549163582509964e-06, "loss": 0.0016, "step": 72740 }, { "epoch": 1.1903787940767405, "grad_norm": 0.06868183612823486, "learning_rate": 7.548344607430704e-06, "loss": 0.0036, "step": 72750 }, { "epoch": 1.1905424200278163, "grad_norm": 0.024801086634397507, "learning_rate": 7.547525539980077e-06, "loss": 0.0015, "step": 72760 }, { "epoch": 1.1907060459788923, "grad_norm": 0.19810540974140167, "learning_rate": 7.546706380187772e-06, "loss": 0.0019, "step": 72770 }, { "epoch": 1.190869671929968, "grad_norm": 0.11733773350715637, "learning_rate": 7.545887128083482e-06, "loss": 0.0026, "step": 72780 }, { "epoch": 1.1910332978810438, "grad_norm": 0.25038450956344604, "learning_rate": 7.545067783696901e-06, "loss": 0.0014, "step": 72790 }, { "epoch": 1.1911969238321198, "grad_norm": 0.06976586580276489, "learning_rate": 7.54424834705773e-06, "loss": 0.0043, "step": 72800 }, { "epoch": 1.1913605497831956, "grad_norm": 0.26963064074516296, "learning_rate": 7.543428818195673e-06, "loss": 0.002, "step": 72810 }, { "epoch": 1.1915241757342714, "grad_norm": 0.07638239860534668, "learning_rate": 7.542609197140433e-06, "loss": 0.0016, "step": 72820 }, { "epoch": 1.1916878016853474, "grad_norm": 0.028073985129594803, "learning_rate": 7.541789483921721e-06, "loss": 0.0028, "step": 72830 }, { "epoch": 1.1918514276364232, "grad_norm": 0.11851628869771957, "learning_rate": 7.540969678569249e-06, "loss": 0.0018, "step": 72840 }, { "epoch": 1.192015053587499, "grad_norm": 0.09475129842758179, "learning_rate": 7.540149781112733e-06, "loss": 0.0018, "step": 72850 }, { "epoch": 1.192178679538575, "grad_norm": 0.12802912294864655, "learning_rate": 7.539329791581893e-06, "loss": 0.0018, "step": 72860 }, { "epoch": 1.1923423054896507, "grad_norm": 0.20128066837787628, "learning_rate": 7.53850971000645e-06, "loss": 0.0019, "step": 72870 }, { "epoch": 1.1925059314407265, "grad_norm": 0.1378030627965927, "learning_rate": 7.537689536416133e-06, "loss": 0.001, "step": 72880 }, { "epoch": 1.1926695573918022, "grad_norm": 0.21198512613773346, "learning_rate": 7.536869270840668e-06, "loss": 0.0026, "step": 72890 }, { "epoch": 1.1928331833428782, "grad_norm": 0.11297036707401276, "learning_rate": 7.536048913309791e-06, "loss": 0.0022, "step": 72900 }, { "epoch": 1.192996809293954, "grad_norm": 0.18209007382392883, "learning_rate": 7.535228463853234e-06, "loss": 0.0016, "step": 72910 }, { "epoch": 1.1931604352450298, "grad_norm": 0.09069020301103592, "learning_rate": 7.53440792250074e-06, "loss": 0.0014, "step": 72920 }, { "epoch": 1.1933240611961058, "grad_norm": 0.16172488033771515, "learning_rate": 7.5335872892820485e-06, "loss": 0.0026, "step": 72930 }, { "epoch": 1.1934876871471816, "grad_norm": 0.21352505683898926, "learning_rate": 7.532766564226908e-06, "loss": 0.002, "step": 72940 }, { "epoch": 1.1936513130982573, "grad_norm": 0.08566544950008392, "learning_rate": 7.531945747365066e-06, "loss": 0.0055, "step": 72950 }, { "epoch": 1.1938149390493331, "grad_norm": 0.11239166557788849, "learning_rate": 7.531124838726277e-06, "loss": 0.0016, "step": 72960 }, { "epoch": 1.193978565000409, "grad_norm": 0.04482845216989517, "learning_rate": 7.530303838340296e-06, "loss": 0.0014, "step": 72970 }, { "epoch": 1.1941421909514849, "grad_norm": 0.06593041867017746, "learning_rate": 7.5294827462368825e-06, "loss": 0.0018, "step": 72980 }, { "epoch": 1.1943058169025607, "grad_norm": 0.07472705096006393, "learning_rate": 7.528661562445798e-06, "loss": 0.0012, "step": 72990 }, { "epoch": 1.1944694428536367, "grad_norm": 0.21327978372573853, "learning_rate": 7.52784028699681e-06, "loss": 0.0027, "step": 73000 }, { "epoch": 1.1946330688047124, "grad_norm": 0.14941608905792236, "learning_rate": 7.527018919919686e-06, "loss": 0.0026, "step": 73010 }, { "epoch": 1.1947966947557882, "grad_norm": 0.24984896183013916, "learning_rate": 7.526197461244201e-06, "loss": 0.004, "step": 73020 }, { "epoch": 1.1949603207068642, "grad_norm": 0.2016327977180481, "learning_rate": 7.52537591100013e-06, "loss": 0.0018, "step": 73030 }, { "epoch": 1.19512394665794, "grad_norm": 0.04638541862368584, "learning_rate": 7.524554269217252e-06, "loss": 0.002, "step": 73040 }, { "epoch": 1.1952875726090157, "grad_norm": 0.09789019078016281, "learning_rate": 7.5237325359253475e-06, "loss": 0.0021, "step": 73050 }, { "epoch": 1.1954511985600917, "grad_norm": 0.14114952087402344, "learning_rate": 7.522910711154206e-06, "loss": 0.003, "step": 73060 }, { "epoch": 1.1956148245111675, "grad_norm": 0.26666074991226196, "learning_rate": 7.522088794933613e-06, "loss": 0.0017, "step": 73070 }, { "epoch": 1.1957784504622433, "grad_norm": 0.07841170579195023, "learning_rate": 7.521266787293364e-06, "loss": 0.0008, "step": 73080 }, { "epoch": 1.195942076413319, "grad_norm": 0.050705742090940475, "learning_rate": 7.520444688263253e-06, "loss": 0.0018, "step": 73090 }, { "epoch": 1.196105702364395, "grad_norm": 0.06191769242286682, "learning_rate": 7.519622497873081e-06, "loss": 0.0031, "step": 73100 }, { "epoch": 1.1962693283154708, "grad_norm": 0.03205306455492973, "learning_rate": 7.518800216152649e-06, "loss": 0.0017, "step": 73110 }, { "epoch": 1.1964329542665466, "grad_norm": 0.06465652585029602, "learning_rate": 7.517977843131762e-06, "loss": 0.0022, "step": 73120 }, { "epoch": 1.1965965802176226, "grad_norm": 0.08968547731637955, "learning_rate": 7.517155378840231e-06, "loss": 0.0034, "step": 73130 }, { "epoch": 1.1967602061686984, "grad_norm": 0.0811348631978035, "learning_rate": 7.516332823307867e-06, "loss": 0.0018, "step": 73140 }, { "epoch": 1.1969238321197742, "grad_norm": 0.0599542073905468, "learning_rate": 7.515510176564485e-06, "loss": 0.0021, "step": 73150 }, { "epoch": 1.19708745807085, "grad_norm": 0.16323351860046387, "learning_rate": 7.514687438639905e-06, "loss": 0.0025, "step": 73160 }, { "epoch": 1.197251084021926, "grad_norm": 0.18347062170505524, "learning_rate": 7.51386460956395e-06, "loss": 0.0024, "step": 73170 }, { "epoch": 1.1974147099730017, "grad_norm": 0.04154191538691521, "learning_rate": 7.513041689366446e-06, "loss": 0.0023, "step": 73180 }, { "epoch": 1.1975783359240775, "grad_norm": 0.26237234473228455, "learning_rate": 7.512218678077219e-06, "loss": 0.002, "step": 73190 }, { "epoch": 1.1977419618751535, "grad_norm": 0.24974653124809265, "learning_rate": 7.511395575726104e-06, "loss": 0.0036, "step": 73200 }, { "epoch": 1.1979055878262292, "grad_norm": 0.19067102670669556, "learning_rate": 7.5105723823429345e-06, "loss": 0.0027, "step": 73210 }, { "epoch": 1.198069213777305, "grad_norm": 0.13213880360126495, "learning_rate": 7.50974909795755e-06, "loss": 0.002, "step": 73220 }, { "epoch": 1.198232839728381, "grad_norm": 0.12766993045806885, "learning_rate": 7.5089257225997935e-06, "loss": 0.0019, "step": 73230 }, { "epoch": 1.1983964656794568, "grad_norm": 0.16532419621944427, "learning_rate": 7.50810225629951e-06, "loss": 0.0023, "step": 73240 }, { "epoch": 1.1985600916305326, "grad_norm": 0.13638997077941895, "learning_rate": 7.5072786990865465e-06, "loss": 0.0017, "step": 73250 }, { "epoch": 1.1987237175816086, "grad_norm": 0.26773399114608765, "learning_rate": 7.506455050990756e-06, "loss": 0.0018, "step": 73260 }, { "epoch": 1.1988873435326843, "grad_norm": 0.06717701256275177, "learning_rate": 7.505631312041994e-06, "loss": 0.0015, "step": 73270 }, { "epoch": 1.19905096948376, "grad_norm": 0.09393195807933807, "learning_rate": 7.5048074822701226e-06, "loss": 0.0014, "step": 73280 }, { "epoch": 1.1992145954348359, "grad_norm": 0.13148963451385498, "learning_rate": 7.503983561704995e-06, "loss": 0.0023, "step": 73290 }, { "epoch": 1.1993782213859119, "grad_norm": 0.13892988860607147, "learning_rate": 7.5031595503764846e-06, "loss": 0.0037, "step": 73300 }, { "epoch": 1.1995418473369877, "grad_norm": 0.15006650984287262, "learning_rate": 7.502335448314455e-06, "loss": 0.0021, "step": 73310 }, { "epoch": 1.1997054732880634, "grad_norm": 0.07400722801685333, "learning_rate": 7.501511255548782e-06, "loss": 0.0018, "step": 73320 }, { "epoch": 1.1998690992391392, "grad_norm": 0.1496189534664154, "learning_rate": 7.5006869721093365e-06, "loss": 0.003, "step": 73330 }, { "epoch": 1.2000327251902152, "grad_norm": 0.03369363397359848, "learning_rate": 7.499862598025999e-06, "loss": 0.0026, "step": 73340 }, { "epoch": 1.200196351141291, "grad_norm": 0.04446745291352272, "learning_rate": 7.499038133328649e-06, "loss": 0.0017, "step": 73350 }, { "epoch": 1.2003599770923667, "grad_norm": 0.13312967121601105, "learning_rate": 7.498213578047174e-06, "loss": 0.0028, "step": 73360 }, { "epoch": 1.2005236030434427, "grad_norm": 0.037850793451070786, "learning_rate": 7.4973889322114614e-06, "loss": 0.0015, "step": 73370 }, { "epoch": 1.2006872289945185, "grad_norm": 0.12466956675052643, "learning_rate": 7.496564195851401e-06, "loss": 0.0032, "step": 73380 }, { "epoch": 1.2008508549455943, "grad_norm": 0.16296936571598053, "learning_rate": 7.4957393689968914e-06, "loss": 0.0021, "step": 73390 }, { "epoch": 1.2010144808966703, "grad_norm": 0.056699302047491074, "learning_rate": 7.494914451677827e-06, "loss": 0.0022, "step": 73400 }, { "epoch": 1.201178106847746, "grad_norm": 0.058173660188913345, "learning_rate": 7.4940894439241094e-06, "loss": 0.0023, "step": 73410 }, { "epoch": 1.2013417327988218, "grad_norm": 0.18045341968536377, "learning_rate": 7.493264345765644e-06, "loss": 0.0073, "step": 73420 }, { "epoch": 1.2015053587498978, "grad_norm": 0.08167928457260132, "learning_rate": 7.492439157232339e-06, "loss": 0.0027, "step": 73430 }, { "epoch": 1.2016689847009736, "grad_norm": 0.1054738461971283, "learning_rate": 7.491613878354105e-06, "loss": 0.0018, "step": 73440 }, { "epoch": 1.2018326106520494, "grad_norm": 0.3511328101158142, "learning_rate": 7.490788509160856e-06, "loss": 0.0014, "step": 73450 }, { "epoch": 1.2019962366031254, "grad_norm": 0.05170730873942375, "learning_rate": 7.489963049682509e-06, "loss": 0.0011, "step": 73460 }, { "epoch": 1.2021598625542012, "grad_norm": 0.1402779370546341, "learning_rate": 7.4891374999489875e-06, "loss": 0.002, "step": 73470 }, { "epoch": 1.202323488505277, "grad_norm": 0.7360239028930664, "learning_rate": 7.4883118599902136e-06, "loss": 0.0029, "step": 73480 }, { "epoch": 1.2024871144563527, "grad_norm": 0.12634000182151794, "learning_rate": 7.487486129836115e-06, "loss": 0.0042, "step": 73490 }, { "epoch": 1.2026507404074287, "grad_norm": 0.2138158529996872, "learning_rate": 7.486660309516624e-06, "loss": 0.0025, "step": 73500 }, { "epoch": 1.2028143663585045, "grad_norm": 0.04578448086977005, "learning_rate": 7.485834399061671e-06, "loss": 0.0017, "step": 73510 }, { "epoch": 1.2029779923095802, "grad_norm": 0.07813996076583862, "learning_rate": 7.485008398501197e-06, "loss": 0.002, "step": 73520 }, { "epoch": 1.203141618260656, "grad_norm": 0.18239513039588928, "learning_rate": 7.48418230786514e-06, "loss": 0.0017, "step": 73530 }, { "epoch": 1.203305244211732, "grad_norm": 0.038591306656599045, "learning_rate": 7.483356127183448e-06, "loss": 0.0016, "step": 73540 }, { "epoch": 1.2034688701628078, "grad_norm": 0.10440230369567871, "learning_rate": 7.482529856486063e-06, "loss": 0.003, "step": 73550 }, { "epoch": 1.2036324961138836, "grad_norm": 0.0993897020816803, "learning_rate": 7.481703495802937e-06, "loss": 0.0015, "step": 73560 }, { "epoch": 1.2037961220649596, "grad_norm": 0.0966823622584343, "learning_rate": 7.480877045164023e-06, "loss": 0.0028, "step": 73570 }, { "epoch": 1.2039597480160353, "grad_norm": 0.026725709438323975, "learning_rate": 7.48005050459928e-06, "loss": 0.002, "step": 73580 }, { "epoch": 1.204123373967111, "grad_norm": 0.13750910758972168, "learning_rate": 7.4792238741386655e-06, "loss": 0.003, "step": 73590 }, { "epoch": 1.204286999918187, "grad_norm": 0.1044209897518158, "learning_rate": 7.478397153812146e-06, "loss": 0.002, "step": 73600 }, { "epoch": 1.2044506258692629, "grad_norm": 0.3290788531303406, "learning_rate": 7.477570343649686e-06, "loss": 0.0021, "step": 73610 }, { "epoch": 1.2046142518203387, "grad_norm": 0.0903046503663063, "learning_rate": 7.476743443681255e-06, "loss": 0.0024, "step": 73620 }, { "epoch": 1.2047778777714147, "grad_norm": 0.010570233687758446, "learning_rate": 7.475916453936827e-06, "loss": 0.0014, "step": 73630 }, { "epoch": 1.2049415037224904, "grad_norm": 0.00955671351402998, "learning_rate": 7.47508937444638e-06, "loss": 0.0027, "step": 73640 }, { "epoch": 1.2051051296735662, "grad_norm": 0.06452976167201996, "learning_rate": 7.47426220523989e-06, "loss": 0.0007, "step": 73650 }, { "epoch": 1.205268755624642, "grad_norm": 0.1388501673936844, "learning_rate": 7.473434946347341e-06, "loss": 0.0025, "step": 73660 }, { "epoch": 1.205432381575718, "grad_norm": 0.03420530632138252, "learning_rate": 7.472607597798721e-06, "loss": 0.0018, "step": 73670 }, { "epoch": 1.2055960075267937, "grad_norm": 0.13640691339969635, "learning_rate": 7.471780159624019e-06, "loss": 0.0026, "step": 73680 }, { "epoch": 1.2057596334778695, "grad_norm": 0.13650503754615784, "learning_rate": 7.470952631853228e-06, "loss": 0.0038, "step": 73690 }, { "epoch": 1.2059232594289455, "grad_norm": 0.042516257613897324, "learning_rate": 7.4701250145163404e-06, "loss": 0.0023, "step": 73700 }, { "epoch": 1.2060868853800213, "grad_norm": 0.01118846982717514, "learning_rate": 7.469297307643359e-06, "loss": 0.0019, "step": 73710 }, { "epoch": 1.206250511331097, "grad_norm": 0.2110971212387085, "learning_rate": 7.468469511264285e-06, "loss": 0.0032, "step": 73720 }, { "epoch": 1.2064141372821728, "grad_norm": 0.18284577131271362, "learning_rate": 7.467641625409122e-06, "loss": 0.0027, "step": 73730 }, { "epoch": 1.2065777632332488, "grad_norm": 0.04361890256404877, "learning_rate": 7.466813650107884e-06, "loss": 0.0017, "step": 73740 }, { "epoch": 1.2067413891843246, "grad_norm": 0.0691043883562088, "learning_rate": 7.465985585390579e-06, "loss": 0.0017, "step": 73750 }, { "epoch": 1.2069050151354004, "grad_norm": 0.15436670184135437, "learning_rate": 7.4651574312872225e-06, "loss": 0.0016, "step": 73760 }, { "epoch": 1.2070686410864764, "grad_norm": 0.08111707866191864, "learning_rate": 7.464329187827835e-06, "loss": 0.0022, "step": 73770 }, { "epoch": 1.2072322670375522, "grad_norm": 0.06875109672546387, "learning_rate": 7.4635008550424355e-06, "loss": 0.0012, "step": 73780 }, { "epoch": 1.207395892988628, "grad_norm": 0.18053790926933289, "learning_rate": 7.462672432961053e-06, "loss": 0.0024, "step": 73790 }, { "epoch": 1.207559518939704, "grad_norm": 0.028427856042981148, "learning_rate": 7.4618439216137125e-06, "loss": 0.0009, "step": 73800 }, { "epoch": 1.2077231448907797, "grad_norm": 0.04011492803692818, "learning_rate": 7.461015321030447e-06, "loss": 0.0016, "step": 73810 }, { "epoch": 1.2078867708418555, "grad_norm": 0.30109745264053345, "learning_rate": 7.4601866312412905e-06, "loss": 0.0026, "step": 73820 }, { "epoch": 1.2080503967929315, "grad_norm": 0.22535495460033417, "learning_rate": 7.459357852276284e-06, "loss": 0.0034, "step": 73830 }, { "epoch": 1.2082140227440072, "grad_norm": 0.1572130024433136, "learning_rate": 7.458528984165464e-06, "loss": 0.0032, "step": 73840 }, { "epoch": 1.208377648695083, "grad_norm": 0.15236243605613708, "learning_rate": 7.457700026938878e-06, "loss": 0.0022, "step": 73850 }, { "epoch": 1.2085412746461588, "grad_norm": 0.13043826818466187, "learning_rate": 7.456870980626573e-06, "loss": 0.0013, "step": 73860 }, { "epoch": 1.2087049005972348, "grad_norm": 0.09628401696681976, "learning_rate": 7.456041845258598e-06, "loss": 0.0013, "step": 73870 }, { "epoch": 1.2088685265483106, "grad_norm": 0.040267400443553925, "learning_rate": 7.45521262086501e-06, "loss": 0.0017, "step": 73880 }, { "epoch": 1.2090321524993863, "grad_norm": 0.008964836597442627, "learning_rate": 7.454383307475867e-06, "loss": 0.0029, "step": 73890 }, { "epoch": 1.2091957784504623, "grad_norm": 0.030985837802290916, "learning_rate": 7.4535539051212266e-06, "loss": 0.0013, "step": 73900 }, { "epoch": 1.209359404401538, "grad_norm": 0.13246433436870575, "learning_rate": 7.452724413831155e-06, "loss": 0.0038, "step": 73910 }, { "epoch": 1.2095230303526139, "grad_norm": 0.08268111944198608, "learning_rate": 7.451894833635718e-06, "loss": 0.0021, "step": 73920 }, { "epoch": 1.2096866563036897, "grad_norm": 0.08752504736185074, "learning_rate": 7.451065164564985e-06, "loss": 0.0025, "step": 73930 }, { "epoch": 1.2098502822547657, "grad_norm": 0.34026047587394714, "learning_rate": 7.450235406649033e-06, "loss": 0.0025, "step": 73940 }, { "epoch": 1.2100139082058414, "grad_norm": 0.10241920500993729, "learning_rate": 7.449405559917935e-06, "loss": 0.002, "step": 73950 }, { "epoch": 1.2101775341569172, "grad_norm": 0.1253937929868698, "learning_rate": 7.448575624401773e-06, "loss": 0.0022, "step": 73960 }, { "epoch": 1.2103411601079932, "grad_norm": 0.040853723883628845, "learning_rate": 7.4477456001306295e-06, "loss": 0.0017, "step": 73970 }, { "epoch": 1.210504786059069, "grad_norm": 0.07868856191635132, "learning_rate": 7.446915487134591e-06, "loss": 0.0038, "step": 73980 }, { "epoch": 1.2106684120101447, "grad_norm": 0.09584248811006546, "learning_rate": 7.446085285443748e-06, "loss": 0.0032, "step": 73990 }, { "epoch": 1.2108320379612207, "grad_norm": 0.048996828496456146, "learning_rate": 7.4452549950881905e-06, "loss": 0.0019, "step": 74000 }, { "epoch": 1.2109956639122965, "grad_norm": 0.1800202876329422, "learning_rate": 7.4444246160980176e-06, "loss": 0.0021, "step": 74010 }, { "epoch": 1.2111592898633723, "grad_norm": 0.09470319747924805, "learning_rate": 7.443594148503327e-06, "loss": 0.004, "step": 74020 }, { "epoch": 1.2113229158144483, "grad_norm": 0.23442859947681427, "learning_rate": 7.442763592334222e-06, "loss": 0.002, "step": 74030 }, { "epoch": 1.211486541765524, "grad_norm": 0.07005157321691513, "learning_rate": 7.441932947620808e-06, "loss": 0.0021, "step": 74040 }, { "epoch": 1.2116501677165998, "grad_norm": 0.09264736622571945, "learning_rate": 7.441102214393193e-06, "loss": 0.0021, "step": 74050 }, { "epoch": 1.2118137936676756, "grad_norm": 0.31985676288604736, "learning_rate": 7.440271392681491e-06, "loss": 0.0032, "step": 74060 }, { "epoch": 1.2119774196187516, "grad_norm": 0.18059299886226654, "learning_rate": 7.439440482515814e-06, "loss": 0.0017, "step": 74070 }, { "epoch": 1.2121410455698274, "grad_norm": 0.16675914824008942, "learning_rate": 7.438609483926282e-06, "loss": 0.0015, "step": 74080 }, { "epoch": 1.2123046715209032, "grad_norm": 0.24970807135105133, "learning_rate": 7.4377783969430206e-06, "loss": 0.0042, "step": 74090 }, { "epoch": 1.2124682974719792, "grad_norm": 0.11183233559131622, "learning_rate": 7.436947221596149e-06, "loss": 0.0025, "step": 74100 }, { "epoch": 1.212631923423055, "grad_norm": 0.09275684505701065, "learning_rate": 7.436115957915799e-06, "loss": 0.0019, "step": 74110 }, { "epoch": 1.2127955493741307, "grad_norm": 0.15110190212726593, "learning_rate": 7.435284605932099e-06, "loss": 0.0022, "step": 74120 }, { "epoch": 1.2129591753252065, "grad_norm": 0.028079111129045486, "learning_rate": 7.434453165675186e-06, "loss": 0.0023, "step": 74130 }, { "epoch": 1.2131228012762825, "grad_norm": 0.15616779029369354, "learning_rate": 7.433621637175197e-06, "loss": 0.0013, "step": 74140 }, { "epoch": 1.2132864272273582, "grad_norm": 0.1304924041032791, "learning_rate": 7.4327900204622725e-06, "loss": 0.0017, "step": 74150 }, { "epoch": 1.213450053178434, "grad_norm": 0.1529300957918167, "learning_rate": 7.4319583155665565e-06, "loss": 0.0024, "step": 74160 }, { "epoch": 1.21361367912951, "grad_norm": 0.12559092044830322, "learning_rate": 7.431126522518197e-06, "loss": 0.0044, "step": 74170 }, { "epoch": 1.2137773050805858, "grad_norm": 0.0652841329574585, "learning_rate": 7.430294641347344e-06, "loss": 0.0012, "step": 74180 }, { "epoch": 1.2139409310316616, "grad_norm": 0.0622672438621521, "learning_rate": 7.429462672084153e-06, "loss": 0.0016, "step": 74190 }, { "epoch": 1.2141045569827376, "grad_norm": 0.02094794623553753, "learning_rate": 7.428630614758776e-06, "loss": 0.0017, "step": 74200 }, { "epoch": 1.2142681829338133, "grad_norm": 0.18284811079502106, "learning_rate": 7.427798469401378e-06, "loss": 0.0022, "step": 74210 }, { "epoch": 1.214431808884889, "grad_norm": 0.1217878982424736, "learning_rate": 7.426966236042119e-06, "loss": 0.002, "step": 74220 }, { "epoch": 1.214595434835965, "grad_norm": 0.07922865450382233, "learning_rate": 7.426133914711169e-06, "loss": 0.0018, "step": 74230 }, { "epoch": 1.2147590607870409, "grad_norm": 0.29961922764778137, "learning_rate": 7.425301505438696e-06, "loss": 0.0048, "step": 74240 }, { "epoch": 1.2149226867381167, "grad_norm": 0.041613925248384476, "learning_rate": 7.4244690082548706e-06, "loss": 0.0041, "step": 74250 }, { "epoch": 1.2150863126891924, "grad_norm": 0.17574548721313477, "learning_rate": 7.423636423189873e-06, "loss": 0.0022, "step": 74260 }, { "epoch": 1.2152499386402684, "grad_norm": 0.0806005522608757, "learning_rate": 7.4228037502738785e-06, "loss": 0.0026, "step": 74270 }, { "epoch": 1.2154135645913442, "grad_norm": 0.09811805188655853, "learning_rate": 7.4219709895370705e-06, "loss": 0.0019, "step": 74280 }, { "epoch": 1.21557719054242, "grad_norm": 0.3578907549381256, "learning_rate": 7.421138141009636e-06, "loss": 0.0024, "step": 74290 }, { "epoch": 1.2157408164934957, "grad_norm": 0.1324716955423355, "learning_rate": 7.4203052047217624e-06, "loss": 0.0017, "step": 74300 }, { "epoch": 1.2159044424445717, "grad_norm": 0.1546030193567276, "learning_rate": 7.419472180703643e-06, "loss": 0.0023, "step": 74310 }, { "epoch": 1.2160680683956475, "grad_norm": 0.05751853808760643, "learning_rate": 7.4186390689854714e-06, "loss": 0.0024, "step": 74320 }, { "epoch": 1.2162316943467233, "grad_norm": 0.21121624112129211, "learning_rate": 7.4178058695974474e-06, "loss": 0.0048, "step": 74330 }, { "epoch": 1.2163953202977993, "grad_norm": 0.2034398317337036, "learning_rate": 7.416972582569772e-06, "loss": 0.0017, "step": 74340 }, { "epoch": 1.216558946248875, "grad_norm": 0.0504169799387455, "learning_rate": 7.416139207932649e-06, "loss": 0.0015, "step": 74350 }, { "epoch": 1.2167225721999508, "grad_norm": 0.17262530326843262, "learning_rate": 7.415305745716286e-06, "loss": 0.0017, "step": 74360 }, { "epoch": 1.2168861981510268, "grad_norm": 0.2116984874010086, "learning_rate": 7.414472195950894e-06, "loss": 0.0021, "step": 74370 }, { "epoch": 1.2170498241021026, "grad_norm": 0.022609880194067955, "learning_rate": 7.413638558666689e-06, "loss": 0.0018, "step": 74380 }, { "epoch": 1.2172134500531784, "grad_norm": 0.14700868725776672, "learning_rate": 7.41280483389389e-06, "loss": 0.0015, "step": 74390 }, { "epoch": 1.2173770760042544, "grad_norm": 0.09180580079555511, "learning_rate": 7.411971021662712e-06, "loss": 0.0016, "step": 74400 }, { "epoch": 1.2175407019553302, "grad_norm": 0.061221521347761154, "learning_rate": 7.411137122003381e-06, "loss": 0.0022, "step": 74410 }, { "epoch": 1.217704327906406, "grad_norm": 0.20302820205688477, "learning_rate": 7.410303134946125e-06, "loss": 0.0027, "step": 74420 }, { "epoch": 1.2178679538574817, "grad_norm": 0.08845239132642746, "learning_rate": 7.409469060521174e-06, "loss": 0.0019, "step": 74430 }, { "epoch": 1.2180315798085577, "grad_norm": 0.1340804249048233, "learning_rate": 7.40863489875876e-06, "loss": 0.0018, "step": 74440 }, { "epoch": 1.2181952057596335, "grad_norm": 0.2781793475151062, "learning_rate": 7.4078006496891195e-06, "loss": 0.0028, "step": 74450 }, { "epoch": 1.2183588317107092, "grad_norm": 0.0992671549320221, "learning_rate": 7.406966313342493e-06, "loss": 0.0018, "step": 74460 }, { "epoch": 1.2185224576617852, "grad_norm": 0.05967795103788376, "learning_rate": 7.406131889749122e-06, "loss": 0.0019, "step": 74470 }, { "epoch": 1.218686083612861, "grad_norm": 0.07916160672903061, "learning_rate": 7.405297378939253e-06, "loss": 0.002, "step": 74480 }, { "epoch": 1.2188497095639368, "grad_norm": 0.07955983281135559, "learning_rate": 7.404462780943138e-06, "loss": 0.0019, "step": 74490 }, { "epoch": 1.2190133355150126, "grad_norm": 0.08532337844371796, "learning_rate": 7.4036280957910226e-06, "loss": 0.0034, "step": 74500 }, { "epoch": 1.2191769614660886, "grad_norm": 0.08964477479457855, "learning_rate": 7.4027933235131666e-06, "loss": 0.0019, "step": 74510 }, { "epoch": 1.2193405874171643, "grad_norm": 0.09393186867237091, "learning_rate": 7.401958464139827e-06, "loss": 0.0014, "step": 74520 }, { "epoch": 1.21950421336824, "grad_norm": 0.04157751798629761, "learning_rate": 7.401123517701268e-06, "loss": 0.004, "step": 74530 }, { "epoch": 1.219667839319316, "grad_norm": 0.15870150923728943, "learning_rate": 7.400288484227751e-06, "loss": 0.0018, "step": 74540 }, { "epoch": 1.2198314652703919, "grad_norm": 0.1219710111618042, "learning_rate": 7.399453363749546e-06, "loss": 0.0019, "step": 74550 }, { "epoch": 1.2199950912214677, "grad_norm": 0.03308141976594925, "learning_rate": 7.398618156296923e-06, "loss": 0.0018, "step": 74560 }, { "epoch": 1.2201587171725436, "grad_norm": 0.22446565330028534, "learning_rate": 7.3977828619001564e-06, "loss": 0.0018, "step": 74570 }, { "epoch": 1.2203223431236194, "grad_norm": 0.14658498764038086, "learning_rate": 7.396947480589524e-06, "loss": 0.0028, "step": 74580 }, { "epoch": 1.2204859690746952, "grad_norm": 0.07871846854686737, "learning_rate": 7.396112012395308e-06, "loss": 0.0034, "step": 74590 }, { "epoch": 1.2206495950257712, "grad_norm": 0.22183069586753845, "learning_rate": 7.395276457347789e-06, "loss": 0.0024, "step": 74600 }, { "epoch": 1.220813220976847, "grad_norm": 0.12578792870044708, "learning_rate": 7.394440815477255e-06, "loss": 0.0021, "step": 74610 }, { "epoch": 1.2209768469279227, "grad_norm": 0.24926680326461792, "learning_rate": 7.393605086813998e-06, "loss": 0.0035, "step": 74620 }, { "epoch": 1.2211404728789985, "grad_norm": 0.07707841694355011, "learning_rate": 7.392769271388307e-06, "loss": 0.0027, "step": 74630 }, { "epoch": 1.2213040988300745, "grad_norm": 0.10797322541475296, "learning_rate": 7.3919333692304825e-06, "loss": 0.0019, "step": 74640 }, { "epoch": 1.2214677247811503, "grad_norm": 0.3985747694969177, "learning_rate": 7.391097380370822e-06, "loss": 0.0025, "step": 74650 }, { "epoch": 1.221631350732226, "grad_norm": 0.2966378927230835, "learning_rate": 7.390261304839628e-06, "loss": 0.0025, "step": 74660 }, { "epoch": 1.221794976683302, "grad_norm": 0.11711665242910385, "learning_rate": 7.389425142667207e-06, "loss": 0.0012, "step": 74670 }, { "epoch": 1.2219586026343778, "grad_norm": 0.25657913088798523, "learning_rate": 7.388588893883867e-06, "loss": 0.0038, "step": 74680 }, { "epoch": 1.2221222285854536, "grad_norm": 0.1111886128783226, "learning_rate": 7.3877525585199206e-06, "loss": 0.0024, "step": 74690 }, { "epoch": 1.2222858545365294, "grad_norm": 0.07965485751628876, "learning_rate": 7.386916136605683e-06, "loss": 0.0011, "step": 74700 }, { "epoch": 1.2224494804876054, "grad_norm": 0.12249523401260376, "learning_rate": 7.386079628171472e-06, "loss": 0.0037, "step": 74710 }, { "epoch": 1.2226131064386812, "grad_norm": 0.1074841096997261, "learning_rate": 7.385243033247609e-06, "loss": 0.0016, "step": 74720 }, { "epoch": 1.222776732389757, "grad_norm": 0.09446748346090317, "learning_rate": 7.384406351864418e-06, "loss": 0.0012, "step": 74730 }, { "epoch": 1.222940358340833, "grad_norm": 0.05598645284771919, "learning_rate": 7.38356958405223e-06, "loss": 0.0022, "step": 74740 }, { "epoch": 1.2231039842919087, "grad_norm": 0.22682826220989227, "learning_rate": 7.382732729841372e-06, "loss": 0.0022, "step": 74750 }, { "epoch": 1.2232676102429845, "grad_norm": 0.2344180941581726, "learning_rate": 7.381895789262179e-06, "loss": 0.0044, "step": 74760 }, { "epoch": 1.2234312361940605, "grad_norm": 0.21609066426753998, "learning_rate": 7.381058762344988e-06, "loss": 0.0032, "step": 74770 }, { "epoch": 1.2235948621451362, "grad_norm": 0.09093273431062698, "learning_rate": 7.380221649120141e-06, "loss": 0.0021, "step": 74780 }, { "epoch": 1.223758488096212, "grad_norm": 0.031923212110996246, "learning_rate": 7.379384449617979e-06, "loss": 0.0027, "step": 74790 }, { "epoch": 1.223922114047288, "grad_norm": 0.1188010424375534, "learning_rate": 7.37854716386885e-06, "loss": 0.0016, "step": 74800 }, { "epoch": 1.2240857399983638, "grad_norm": 0.14683733880519867, "learning_rate": 7.377709791903102e-06, "loss": 0.0014, "step": 74810 }, { "epoch": 1.2242493659494396, "grad_norm": 0.050854261964559555, "learning_rate": 7.376872333751089e-06, "loss": 0.0017, "step": 74820 }, { "epoch": 1.2244129919005153, "grad_norm": 0.08790922164916992, "learning_rate": 7.376034789443167e-06, "loss": 0.0017, "step": 74830 }, { "epoch": 1.2245766178515913, "grad_norm": 0.01875845342874527, "learning_rate": 7.375197159009695e-06, "loss": 0.001, "step": 74840 }, { "epoch": 1.224740243802667, "grad_norm": 0.24824963510036469, "learning_rate": 7.3743594424810335e-06, "loss": 0.0018, "step": 74850 }, { "epoch": 1.2249038697537429, "grad_norm": 0.13142342865467072, "learning_rate": 7.373521639887548e-06, "loss": 0.0016, "step": 74860 }, { "epoch": 1.2250674957048189, "grad_norm": 0.07221851497888565, "learning_rate": 7.3726837512596094e-06, "loss": 0.0021, "step": 74870 }, { "epoch": 1.2252311216558947, "grad_norm": 0.06531517207622528, "learning_rate": 7.3718457766275865e-06, "loss": 0.0027, "step": 74880 }, { "epoch": 1.2253947476069704, "grad_norm": 0.26921960711479187, "learning_rate": 7.371007716021857e-06, "loss": 0.0027, "step": 74890 }, { "epoch": 1.2255583735580462, "grad_norm": 0.11885719746351242, "learning_rate": 7.370169569472794e-06, "loss": 0.0016, "step": 74900 }, { "epoch": 1.2257219995091222, "grad_norm": 0.13144849240779877, "learning_rate": 7.3693313370107814e-06, "loss": 0.0023, "step": 74910 }, { "epoch": 1.225885625460198, "grad_norm": 0.0871674045920372, "learning_rate": 7.368493018666202e-06, "loss": 0.0023, "step": 74920 }, { "epoch": 1.2260492514112737, "grad_norm": 0.14775460958480835, "learning_rate": 7.367654614469444e-06, "loss": 0.002, "step": 74930 }, { "epoch": 1.2262128773623497, "grad_norm": 0.03863407298922539, "learning_rate": 7.366816124450895e-06, "loss": 0.002, "step": 74940 }, { "epoch": 1.2263765033134255, "grad_norm": 0.08186143636703491, "learning_rate": 7.365977548640952e-06, "loss": 0.0013, "step": 74950 }, { "epoch": 1.2265401292645013, "grad_norm": 0.1456160843372345, "learning_rate": 7.365138887070008e-06, "loss": 0.0025, "step": 74960 }, { "epoch": 1.2267037552155773, "grad_norm": 0.032390229403972626, "learning_rate": 7.364300139768464e-06, "loss": 0.0011, "step": 74970 }, { "epoch": 1.226867381166653, "grad_norm": 0.2192324697971344, "learning_rate": 7.363461306766722e-06, "loss": 0.0029, "step": 74980 }, { "epoch": 1.2270310071177288, "grad_norm": 0.1215648353099823, "learning_rate": 7.362622388095191e-06, "loss": 0.003, "step": 74990 }, { "epoch": 1.2271946330688048, "grad_norm": 0.11336185783147812, "learning_rate": 7.361783383784274e-06, "loss": 0.0025, "step": 75000 }, { "epoch": 1.2271946330688048, "eval_loss": 0.0017955370713025331, "eval_runtime": 3.0975, "eval_samples_per_second": 64.568, "eval_steps_per_second": 16.142, "step": 75000 }, { "epoch": 1.2273582590198806, "grad_norm": 0.15772020816802979, "learning_rate": 7.360944293864385e-06, "loss": 0.0016, "step": 75010 }, { "epoch": 1.2275218849709564, "grad_norm": 0.13088387250900269, "learning_rate": 7.360105118365941e-06, "loss": 0.0023, "step": 75020 }, { "epoch": 1.2276855109220322, "grad_norm": 0.045769598335027695, "learning_rate": 7.3592658573193595e-06, "loss": 0.0019, "step": 75030 }, { "epoch": 1.2278491368731081, "grad_norm": 0.16903211176395416, "learning_rate": 7.358426510755061e-06, "loss": 0.0035, "step": 75040 }, { "epoch": 1.228012762824184, "grad_norm": 0.03118967078626156, "learning_rate": 7.35758707870347e-06, "loss": 0.0019, "step": 75050 }, { "epoch": 1.2281763887752597, "grad_norm": 0.051891084760427475, "learning_rate": 7.356747561195013e-06, "loss": 0.0026, "step": 75060 }, { "epoch": 1.2283400147263357, "grad_norm": 0.059333398938179016, "learning_rate": 7.35590795826012e-06, "loss": 0.0019, "step": 75070 }, { "epoch": 1.2285036406774115, "grad_norm": 0.12364913523197174, "learning_rate": 7.3550682699292266e-06, "loss": 0.0024, "step": 75080 }, { "epoch": 1.2286672666284872, "grad_norm": 0.2238989621400833, "learning_rate": 7.35422849623277e-06, "loss": 0.0017, "step": 75090 }, { "epoch": 1.228830892579563, "grad_norm": 0.04785197973251343, "learning_rate": 7.353388637201186e-06, "loss": 0.0015, "step": 75100 }, { "epoch": 1.228994518530639, "grad_norm": 0.15885503590106964, "learning_rate": 7.352548692864923e-06, "loss": 0.0018, "step": 75110 }, { "epoch": 1.2291581444817148, "grad_norm": 0.05512414500117302, "learning_rate": 7.3517086632544225e-06, "loss": 0.0022, "step": 75120 }, { "epoch": 1.2293217704327906, "grad_norm": 0.08134091645479202, "learning_rate": 7.350868548400135e-06, "loss": 0.0031, "step": 75130 }, { "epoch": 1.2294853963838666, "grad_norm": 0.06621944159269333, "learning_rate": 7.350028348332513e-06, "loss": 0.0029, "step": 75140 }, { "epoch": 1.2296490223349423, "grad_norm": 0.07733746618032455, "learning_rate": 7.3491880630820126e-06, "loss": 0.0018, "step": 75150 }, { "epoch": 1.229812648286018, "grad_norm": 0.052341047674417496, "learning_rate": 7.348347692679089e-06, "loss": 0.0026, "step": 75160 }, { "epoch": 1.229976274237094, "grad_norm": 0.04901240020990372, "learning_rate": 7.347507237154208e-06, "loss": 0.0015, "step": 75170 }, { "epoch": 1.2301399001881699, "grad_norm": 0.19879671931266785, "learning_rate": 7.34666669653783e-06, "loss": 0.0018, "step": 75180 }, { "epoch": 1.2303035261392457, "grad_norm": 0.19959190487861633, "learning_rate": 7.345826070860425e-06, "loss": 0.0013, "step": 75190 }, { "epoch": 1.2304671520903216, "grad_norm": 0.2508465349674225, "learning_rate": 7.344985360152464e-06, "loss": 0.0023, "step": 75200 }, { "epoch": 1.2306307780413974, "grad_norm": 0.12279736250638962, "learning_rate": 7.344144564444418e-06, "loss": 0.0015, "step": 75210 }, { "epoch": 1.2307944039924732, "grad_norm": 0.04754587262868881, "learning_rate": 7.343303683766765e-06, "loss": 0.0025, "step": 75220 }, { "epoch": 1.230958029943549, "grad_norm": 0.0939144566655159, "learning_rate": 7.342462718149988e-06, "loss": 0.0023, "step": 75230 }, { "epoch": 1.231121655894625, "grad_norm": 0.07351992279291153, "learning_rate": 7.341621667624566e-06, "loss": 0.0013, "step": 75240 }, { "epoch": 1.2312852818457007, "grad_norm": 0.05443692207336426, "learning_rate": 7.340780532220986e-06, "loss": 0.0012, "step": 75250 }, { "epoch": 1.2314489077967765, "grad_norm": 0.25558826327323914, "learning_rate": 7.339939311969738e-06, "loss": 0.0033, "step": 75260 }, { "epoch": 1.2316125337478523, "grad_norm": 0.07000099122524261, "learning_rate": 7.339098006901314e-06, "loss": 0.0032, "step": 75270 }, { "epoch": 1.2317761596989283, "grad_norm": 0.11121514439582825, "learning_rate": 7.338256617046209e-06, "loss": 0.0021, "step": 75280 }, { "epoch": 1.231939785650004, "grad_norm": 0.18084879219532013, "learning_rate": 7.337415142434921e-06, "loss": 0.003, "step": 75290 }, { "epoch": 1.2321034116010798, "grad_norm": 0.06975261867046356, "learning_rate": 7.336573583097953e-06, "loss": 0.0017, "step": 75300 }, { "epoch": 1.2322670375521558, "grad_norm": 0.15908686816692352, "learning_rate": 7.3357319390658075e-06, "loss": 0.0013, "step": 75310 }, { "epoch": 1.2324306635032316, "grad_norm": 0.1834021806716919, "learning_rate": 7.334890210368994e-06, "loss": 0.0044, "step": 75320 }, { "epoch": 1.2325942894543074, "grad_norm": 0.1268003284931183, "learning_rate": 7.334048397038021e-06, "loss": 0.0023, "step": 75330 }, { "epoch": 1.2327579154053834, "grad_norm": 0.12502337992191315, "learning_rate": 7.333206499103404e-06, "loss": 0.0016, "step": 75340 }, { "epoch": 1.2329215413564591, "grad_norm": 0.36029180884361267, "learning_rate": 7.332364516595659e-06, "loss": 0.0021, "step": 75350 }, { "epoch": 1.233085167307535, "grad_norm": 0.04359174519777298, "learning_rate": 7.331522449545307e-06, "loss": 0.0013, "step": 75360 }, { "epoch": 1.233248793258611, "grad_norm": 0.1549016833305359, "learning_rate": 7.330680297982868e-06, "loss": 0.0019, "step": 75370 }, { "epoch": 1.2334124192096867, "grad_norm": 0.14654278755187988, "learning_rate": 7.3298380619388695e-06, "loss": 0.0016, "step": 75380 }, { "epoch": 1.2335760451607625, "grad_norm": 0.10543999820947647, "learning_rate": 7.328995741443843e-06, "loss": 0.0083, "step": 75390 }, { "epoch": 1.2337396711118382, "grad_norm": 0.1945066750049591, "learning_rate": 7.328153336528319e-06, "loss": 0.0018, "step": 75400 }, { "epoch": 1.2339032970629142, "grad_norm": 0.07839474827051163, "learning_rate": 7.327310847222831e-06, "loss": 0.0019, "step": 75410 }, { "epoch": 1.23406692301399, "grad_norm": 0.03426682949066162, "learning_rate": 7.3264682735579165e-06, "loss": 0.0014, "step": 75420 }, { "epoch": 1.2342305489650658, "grad_norm": 0.081778384745121, "learning_rate": 7.325625615564119e-06, "loss": 0.0014, "step": 75430 }, { "epoch": 1.2343941749161418, "grad_norm": 0.11724001169204712, "learning_rate": 7.324782873271984e-06, "loss": 0.0018, "step": 75440 }, { "epoch": 1.2345578008672176, "grad_norm": 0.04757022485136986, "learning_rate": 7.323940046712057e-06, "loss": 0.0014, "step": 75450 }, { "epoch": 1.2347214268182933, "grad_norm": 0.057816553860902786, "learning_rate": 7.323097135914889e-06, "loss": 0.0017, "step": 75460 }, { "epoch": 1.234885052769369, "grad_norm": 0.559280276298523, "learning_rate": 7.3222541409110325e-06, "loss": 0.0034, "step": 75470 }, { "epoch": 1.235048678720445, "grad_norm": 0.5130457878112793, "learning_rate": 7.3214110617310454e-06, "loss": 0.0026, "step": 75480 }, { "epoch": 1.2352123046715209, "grad_norm": 0.02552800253033638, "learning_rate": 7.320567898405487e-06, "loss": 0.0016, "step": 75490 }, { "epoch": 1.2353759306225967, "grad_norm": 0.04089820384979248, "learning_rate": 7.319724650964918e-06, "loss": 0.0025, "step": 75500 }, { "epoch": 1.2355395565736726, "grad_norm": 0.08133328706026077, "learning_rate": 7.318881319439907e-06, "loss": 0.003, "step": 75510 }, { "epoch": 1.2357031825247484, "grad_norm": 0.184864804148674, "learning_rate": 7.318037903861021e-06, "loss": 0.0032, "step": 75520 }, { "epoch": 1.2358668084758242, "grad_norm": 0.1661335974931717, "learning_rate": 7.317194404258832e-06, "loss": 0.002, "step": 75530 }, { "epoch": 1.2360304344269002, "grad_norm": 0.11695144325494766, "learning_rate": 7.316350820663915e-06, "loss": 0.0017, "step": 75540 }, { "epoch": 1.236194060377976, "grad_norm": 0.2725054621696472, "learning_rate": 7.31550715310685e-06, "loss": 0.0021, "step": 75550 }, { "epoch": 1.2363576863290517, "grad_norm": 0.22499892115592957, "learning_rate": 7.314663401618214e-06, "loss": 0.0015, "step": 75560 }, { "epoch": 1.2365213122801277, "grad_norm": 0.07645678520202637, "learning_rate": 7.313819566228592e-06, "loss": 0.0017, "step": 75570 }, { "epoch": 1.2366849382312035, "grad_norm": 0.11282423883676529, "learning_rate": 7.312975646968573e-06, "loss": 0.0026, "step": 75580 }, { "epoch": 1.2368485641822793, "grad_norm": 0.03659214451909065, "learning_rate": 7.312131643868746e-06, "loss": 0.0019, "step": 75590 }, { "epoch": 1.237012190133355, "grad_norm": 0.2823604345321655, "learning_rate": 7.311287556959705e-06, "loss": 0.0018, "step": 75600 }, { "epoch": 1.237175816084431, "grad_norm": 0.012864679098129272, "learning_rate": 7.310443386272043e-06, "loss": 0.0012, "step": 75610 }, { "epoch": 1.2373394420355068, "grad_norm": 0.16263914108276367, "learning_rate": 7.309599131836363e-06, "loss": 0.002, "step": 75620 }, { "epoch": 1.2375030679865826, "grad_norm": 0.18927639722824097, "learning_rate": 7.308754793683265e-06, "loss": 0.002, "step": 75630 }, { "epoch": 1.2376666939376586, "grad_norm": 0.07541917264461517, "learning_rate": 7.307910371843354e-06, "loss": 0.0017, "step": 75640 }, { "epoch": 1.2378303198887344, "grad_norm": 0.06526405364274979, "learning_rate": 7.307065866347238e-06, "loss": 0.002, "step": 75650 }, { "epoch": 1.2379939458398102, "grad_norm": 0.22764533758163452, "learning_rate": 7.3062212772255314e-06, "loss": 0.0041, "step": 75660 }, { "epoch": 1.238157571790886, "grad_norm": 0.06345629692077637, "learning_rate": 7.305376604508845e-06, "loss": 0.0015, "step": 75670 }, { "epoch": 1.238321197741962, "grad_norm": 0.05479049310088158, "learning_rate": 7.304531848227798e-06, "loss": 0.0017, "step": 75680 }, { "epoch": 1.2384848236930377, "grad_norm": 0.04212285950779915, "learning_rate": 7.303687008413011e-06, "loss": 0.0015, "step": 75690 }, { "epoch": 1.2386484496441135, "grad_norm": 0.21634085476398468, "learning_rate": 7.302842085095106e-06, "loss": 0.0029, "step": 75700 }, { "epoch": 1.2388120755951895, "grad_norm": 0.06691079586744308, "learning_rate": 7.30199707830471e-06, "loss": 0.002, "step": 75710 }, { "epoch": 1.2389757015462652, "grad_norm": 0.061119671911001205, "learning_rate": 7.301151988072452e-06, "loss": 0.0013, "step": 75720 }, { "epoch": 1.239139327497341, "grad_norm": 0.1389298290014267, "learning_rate": 7.300306814428966e-06, "loss": 0.0022, "step": 75730 }, { "epoch": 1.239302953448417, "grad_norm": 0.15854953229427338, "learning_rate": 7.299461557404886e-06, "loss": 0.0017, "step": 75740 }, { "epoch": 1.2394665793994928, "grad_norm": 0.02639460749924183, "learning_rate": 7.298616217030853e-06, "loss": 0.0019, "step": 75750 }, { "epoch": 1.2396302053505686, "grad_norm": 0.26457294821739197, "learning_rate": 7.2977707933375054e-06, "loss": 0.0025, "step": 75760 }, { "epoch": 1.2397938313016446, "grad_norm": 0.15434350073337555, "learning_rate": 7.296925286355489e-06, "loss": 0.0032, "step": 75770 }, { "epoch": 1.2399574572527203, "grad_norm": 0.1481945812702179, "learning_rate": 7.296079696115451e-06, "loss": 0.003, "step": 75780 }, { "epoch": 1.240121083203796, "grad_norm": 0.13540132343769073, "learning_rate": 7.295234022648042e-06, "loss": 0.0028, "step": 75790 }, { "epoch": 1.2402847091548719, "grad_norm": 0.1139894425868988, "learning_rate": 7.294388265983919e-06, "loss": 0.0016, "step": 75800 }, { "epoch": 1.2404483351059479, "grad_norm": 0.422225683927536, "learning_rate": 7.293542426153732e-06, "loss": 0.0024, "step": 75810 }, { "epoch": 1.2406119610570236, "grad_norm": 0.08003806322813034, "learning_rate": 7.2926965031881465e-06, "loss": 0.0015, "step": 75820 }, { "epoch": 1.2407755870080994, "grad_norm": 0.20687858760356903, "learning_rate": 7.291850497117824e-06, "loss": 0.0023, "step": 75830 }, { "epoch": 1.2409392129591754, "grad_norm": 0.08286328613758087, "learning_rate": 7.291004407973428e-06, "loss": 0.002, "step": 75840 }, { "epoch": 1.2411028389102512, "grad_norm": 0.0753655657172203, "learning_rate": 7.2901582357856285e-06, "loss": 0.0016, "step": 75850 }, { "epoch": 1.241266464861327, "grad_norm": 0.22551557421684265, "learning_rate": 7.289311980585098e-06, "loss": 0.0014, "step": 75860 }, { "epoch": 1.2414300908124027, "grad_norm": 0.08038531988859177, "learning_rate": 7.2884656424025095e-06, "loss": 0.0018, "step": 75870 }, { "epoch": 1.2415937167634787, "grad_norm": 0.16861200332641602, "learning_rate": 7.287619221268541e-06, "loss": 0.0055, "step": 75880 }, { "epoch": 1.2417573427145545, "grad_norm": 0.10394301265478134, "learning_rate": 7.286772717213874e-06, "loss": 0.003, "step": 75890 }, { "epoch": 1.2419209686656303, "grad_norm": 0.09122875332832336, "learning_rate": 7.285926130269195e-06, "loss": 0.0046, "step": 75900 }, { "epoch": 1.2420845946167063, "grad_norm": 0.2115568220615387, "learning_rate": 7.285079460465185e-06, "loss": 0.0023, "step": 75910 }, { "epoch": 1.242248220567782, "grad_norm": 0.1815410554409027, "learning_rate": 7.284232707832538e-06, "loss": 0.0027, "step": 75920 }, { "epoch": 1.2424118465188578, "grad_norm": 0.15625382959842682, "learning_rate": 7.283385872401944e-06, "loss": 0.0018, "step": 75930 }, { "epoch": 1.2425754724699338, "grad_norm": 0.058230139315128326, "learning_rate": 7.282538954204102e-06, "loss": 0.0018, "step": 75940 }, { "epoch": 1.2427390984210096, "grad_norm": 0.037460390478372574, "learning_rate": 7.281691953269707e-06, "loss": 0.0066, "step": 75950 }, { "epoch": 1.2429027243720854, "grad_norm": 0.020542221143841743, "learning_rate": 7.280844869629464e-06, "loss": 0.0015, "step": 75960 }, { "epoch": 1.2430663503231614, "grad_norm": 0.09572634845972061, "learning_rate": 7.279997703314076e-06, "loss": 0.0014, "step": 75970 }, { "epoch": 1.2432299762742371, "grad_norm": 0.030780604109168053, "learning_rate": 7.27915045435425e-06, "loss": 0.0026, "step": 75980 }, { "epoch": 1.243393602225313, "grad_norm": 0.36935174465179443, "learning_rate": 7.278303122780699e-06, "loss": 0.0027, "step": 75990 }, { "epoch": 1.2435572281763887, "grad_norm": 0.08313564956188202, "learning_rate": 7.277455708624135e-06, "loss": 0.0017, "step": 76000 }, { "epoch": 1.2437208541274647, "grad_norm": 0.03930523991584778, "learning_rate": 7.276608211915275e-06, "loss": 0.0016, "step": 76010 }, { "epoch": 1.2438844800785405, "grad_norm": 0.037010665982961655, "learning_rate": 7.27576063268484e-06, "loss": 0.002, "step": 76020 }, { "epoch": 1.2440481060296162, "grad_norm": 0.028221316635608673, "learning_rate": 7.27491297096355e-06, "loss": 0.0018, "step": 76030 }, { "epoch": 1.244211731980692, "grad_norm": 0.01731671579182148, "learning_rate": 7.274065226782134e-06, "loss": 0.0023, "step": 76040 }, { "epoch": 1.244375357931768, "grad_norm": 0.15924237668514252, "learning_rate": 7.27321740017132e-06, "loss": 0.0023, "step": 76050 }, { "epoch": 1.2445389838828438, "grad_norm": 0.06417131423950195, "learning_rate": 7.272369491161837e-06, "loss": 0.0017, "step": 76060 }, { "epoch": 1.2447026098339196, "grad_norm": 0.07908331602811813, "learning_rate": 7.271521499784421e-06, "loss": 0.002, "step": 76070 }, { "epoch": 1.2448662357849956, "grad_norm": 0.15662676095962524, "learning_rate": 7.270673426069811e-06, "loss": 0.0021, "step": 76080 }, { "epoch": 1.2450298617360713, "grad_norm": 0.026165012270212173, "learning_rate": 7.269825270048747e-06, "loss": 0.0021, "step": 76090 }, { "epoch": 1.245193487687147, "grad_norm": 0.19637195765972137, "learning_rate": 7.2689770317519726e-06, "loss": 0.002, "step": 76100 }, { "epoch": 1.245357113638223, "grad_norm": 0.1039731353521347, "learning_rate": 7.268128711210234e-06, "loss": 0.0025, "step": 76110 }, { "epoch": 1.2455207395892989, "grad_norm": 0.10456263273954391, "learning_rate": 7.26728030845428e-06, "loss": 0.0017, "step": 76120 }, { "epoch": 1.2456843655403746, "grad_norm": 0.17909160256385803, "learning_rate": 7.266431823514864e-06, "loss": 0.0013, "step": 76130 }, { "epoch": 1.2458479914914506, "grad_norm": 0.1260414570569992, "learning_rate": 7.265583256422742e-06, "loss": 0.0016, "step": 76140 }, { "epoch": 1.2460116174425264, "grad_norm": 0.12704215943813324, "learning_rate": 7.264734607208673e-06, "loss": 0.0015, "step": 76150 }, { "epoch": 1.2461752433936022, "grad_norm": 0.0274752639234066, "learning_rate": 7.263885875903417e-06, "loss": 0.0011, "step": 76160 }, { "epoch": 1.2463388693446782, "grad_norm": 0.21337048709392548, "learning_rate": 7.263037062537739e-06, "loss": 0.0023, "step": 76170 }, { "epoch": 1.246502495295754, "grad_norm": 0.11034653335809708, "learning_rate": 7.262188167142406e-06, "loss": 0.0025, "step": 76180 }, { "epoch": 1.2466661212468297, "grad_norm": 0.06820209324359894, "learning_rate": 7.26133918974819e-06, "loss": 0.0041, "step": 76190 }, { "epoch": 1.2468297471979055, "grad_norm": 0.1598266065120697, "learning_rate": 7.260490130385863e-06, "loss": 0.002, "step": 76200 }, { "epoch": 1.2469933731489815, "grad_norm": 0.11024352163076401, "learning_rate": 7.259640989086202e-06, "loss": 0.002, "step": 76210 }, { "epoch": 1.2471569991000573, "grad_norm": 0.12314579635858536, "learning_rate": 7.2587917658799854e-06, "loss": 0.0014, "step": 76220 }, { "epoch": 1.247320625051133, "grad_norm": 0.14845508337020874, "learning_rate": 7.257942460797996e-06, "loss": 0.0017, "step": 76230 }, { "epoch": 1.2474842510022088, "grad_norm": 0.11476452648639679, "learning_rate": 7.25709307387102e-06, "loss": 0.0014, "step": 76240 }, { "epoch": 1.2476478769532848, "grad_norm": 0.0909375324845314, "learning_rate": 7.256243605129846e-06, "loss": 0.0035, "step": 76250 }, { "epoch": 1.2478115029043606, "grad_norm": 0.1726021021604538, "learning_rate": 7.2553940546052625e-06, "loss": 0.0021, "step": 76260 }, { "epoch": 1.2479751288554364, "grad_norm": 0.012697853147983551, "learning_rate": 7.2545444223280645e-06, "loss": 0.0033, "step": 76270 }, { "epoch": 1.2481387548065124, "grad_norm": 0.2170550674200058, "learning_rate": 7.253694708329051e-06, "loss": 0.0017, "step": 76280 }, { "epoch": 1.2483023807575881, "grad_norm": 0.09155912697315216, "learning_rate": 7.252844912639019e-06, "loss": 0.0024, "step": 76290 }, { "epoch": 1.248466006708664, "grad_norm": 0.21381379663944244, "learning_rate": 7.251995035288777e-06, "loss": 0.0023, "step": 76300 }, { "epoch": 1.24862963265974, "grad_norm": 0.13857270777225494, "learning_rate": 7.251145076309125e-06, "loss": 0.0028, "step": 76310 }, { "epoch": 1.2487932586108157, "grad_norm": 0.12481576949357986, "learning_rate": 7.250295035730874e-06, "loss": 0.0036, "step": 76320 }, { "epoch": 1.2489568845618915, "grad_norm": 0.03987983986735344, "learning_rate": 7.249444913584839e-06, "loss": 0.0024, "step": 76330 }, { "epoch": 1.2491205105129675, "grad_norm": 0.10182177275419235, "learning_rate": 7.248594709901831e-06, "loss": 0.0017, "step": 76340 }, { "epoch": 1.2492841364640432, "grad_norm": 0.08626046031713486, "learning_rate": 7.247744424712668e-06, "loss": 0.002, "step": 76350 }, { "epoch": 1.249447762415119, "grad_norm": 0.05633806809782982, "learning_rate": 7.246894058048173e-06, "loss": 0.0017, "step": 76360 }, { "epoch": 1.2496113883661948, "grad_norm": 0.09064376354217529, "learning_rate": 7.246043609939169e-06, "loss": 0.0019, "step": 76370 }, { "epoch": 1.2497750143172708, "grad_norm": 0.19761058688163757, "learning_rate": 7.24519308041648e-06, "loss": 0.0042, "step": 76380 }, { "epoch": 1.2499386402683466, "grad_norm": 0.06989564746618271, "learning_rate": 7.244342469510941e-06, "loss": 0.0015, "step": 76390 }, { "epoch": 1.2501022662194223, "grad_norm": 0.2068031132221222, "learning_rate": 7.243491777253381e-06, "loss": 0.0017, "step": 76400 }, { "epoch": 1.250265892170498, "grad_norm": 0.21270398795604706, "learning_rate": 7.242641003674635e-06, "loss": 0.0017, "step": 76410 }, { "epoch": 1.250429518121574, "grad_norm": 0.24101294577121735, "learning_rate": 7.241790148805543e-06, "loss": 0.0016, "step": 76420 }, { "epoch": 1.2505931440726499, "grad_norm": 0.07772132754325867, "learning_rate": 7.240939212676946e-06, "loss": 0.0016, "step": 76430 }, { "epoch": 1.2507567700237257, "grad_norm": 0.09166130423545837, "learning_rate": 7.240088195319689e-06, "loss": 0.0031, "step": 76440 }, { "epoch": 1.2509203959748016, "grad_norm": 0.08283478766679764, "learning_rate": 7.23923709676462e-06, "loss": 0.0018, "step": 76450 }, { "epoch": 1.2510840219258774, "grad_norm": 0.09896039962768555, "learning_rate": 7.238385917042586e-06, "loss": 0.0011, "step": 76460 }, { "epoch": 1.2512476478769532, "grad_norm": 0.07560428977012634, "learning_rate": 7.237534656184443e-06, "loss": 0.0011, "step": 76470 }, { "epoch": 1.2514112738280292, "grad_norm": 0.1504267007112503, "learning_rate": 7.236683314221047e-06, "loss": 0.0018, "step": 76480 }, { "epoch": 1.251574899779105, "grad_norm": 0.09247831255197525, "learning_rate": 7.2358318911832556e-06, "loss": 0.002, "step": 76490 }, { "epoch": 1.2517385257301807, "grad_norm": 0.023884030058979988, "learning_rate": 7.234980387101933e-06, "loss": 0.0013, "step": 76500 }, { "epoch": 1.2519021516812567, "grad_norm": 0.009047040715813637, "learning_rate": 7.234128802007943e-06, "loss": 0.0009, "step": 76510 }, { "epoch": 1.2520657776323325, "grad_norm": 0.1714857965707779, "learning_rate": 7.233277135932152e-06, "loss": 0.0023, "step": 76520 }, { "epoch": 1.2522294035834083, "grad_norm": 0.20701821148395538, "learning_rate": 7.232425388905433e-06, "loss": 0.0019, "step": 76530 }, { "epoch": 1.2523930295344843, "grad_norm": 0.1728150099515915, "learning_rate": 7.23157356095866e-06, "loss": 0.0024, "step": 76540 }, { "epoch": 1.25255665548556, "grad_norm": 0.12822218239307404, "learning_rate": 7.230721652122708e-06, "loss": 0.0021, "step": 76550 }, { "epoch": 1.2527202814366358, "grad_norm": 0.09107663482427597, "learning_rate": 7.229869662428457e-06, "loss": 0.0021, "step": 76560 }, { "epoch": 1.2528839073877118, "grad_norm": 0.1213102787733078, "learning_rate": 7.229017591906792e-06, "loss": 0.002, "step": 76570 }, { "epoch": 1.2530475333387876, "grad_norm": 0.10051631927490234, "learning_rate": 7.228165440588594e-06, "loss": 0.0015, "step": 76580 }, { "epoch": 1.2532111592898634, "grad_norm": 0.21822141110897064, "learning_rate": 7.227313208504756e-06, "loss": 0.0018, "step": 76590 }, { "epoch": 1.2533747852409391, "grad_norm": 0.37389299273490906, "learning_rate": 7.226460895686168e-06, "loss": 0.0026, "step": 76600 }, { "epoch": 1.253538411192015, "grad_norm": 0.20237046480178833, "learning_rate": 7.225608502163723e-06, "loss": 0.0024, "step": 76610 }, { "epoch": 1.253702037143091, "grad_norm": 0.17468029260635376, "learning_rate": 7.2247560279683184e-06, "loss": 0.0029, "step": 76620 }, { "epoch": 1.2538656630941667, "grad_norm": 0.09932902455329895, "learning_rate": 7.2239034731308545e-06, "loss": 0.0022, "step": 76630 }, { "epoch": 1.2540292890452425, "grad_norm": 0.18450143933296204, "learning_rate": 7.223050837682235e-06, "loss": 0.0032, "step": 76640 }, { "epoch": 1.2541929149963185, "grad_norm": 0.1467326283454895, "learning_rate": 7.222198121653365e-06, "loss": 0.0028, "step": 76650 }, { "epoch": 1.2543565409473942, "grad_norm": 0.13686597347259521, "learning_rate": 7.221345325075155e-06, "loss": 0.0021, "step": 76660 }, { "epoch": 1.25452016689847, "grad_norm": 0.17153418064117432, "learning_rate": 7.220492447978516e-06, "loss": 0.003, "step": 76670 }, { "epoch": 1.254683792849546, "grad_norm": 0.23260289430618286, "learning_rate": 7.219639490394363e-06, "loss": 0.0035, "step": 76680 }, { "epoch": 1.2548474188006218, "grad_norm": 0.05313664302229881, "learning_rate": 7.218786452353612e-06, "loss": 0.0018, "step": 76690 }, { "epoch": 1.2550110447516976, "grad_norm": 0.17395347356796265, "learning_rate": 7.2179333338871845e-06, "loss": 0.0021, "step": 76700 }, { "epoch": 1.2551746707027736, "grad_norm": 0.15894368290901184, "learning_rate": 7.217080135026006e-06, "loss": 0.0017, "step": 76710 }, { "epoch": 1.2553382966538493, "grad_norm": 0.017329467460513115, "learning_rate": 7.216226855800999e-06, "loss": 0.003, "step": 76720 }, { "epoch": 1.255501922604925, "grad_norm": 0.1391407549381256, "learning_rate": 7.215373496243097e-06, "loss": 0.0024, "step": 76730 }, { "epoch": 1.255665548556001, "grad_norm": 0.190855011343956, "learning_rate": 7.21452005638323e-06, "loss": 0.0024, "step": 76740 }, { "epoch": 1.2558291745070769, "grad_norm": 0.09568745642900467, "learning_rate": 7.213666536252333e-06, "loss": 0.0033, "step": 76750 }, { "epoch": 1.2559928004581526, "grad_norm": 0.06416328251361847, "learning_rate": 7.212812935881345e-06, "loss": 0.0019, "step": 76760 }, { "epoch": 1.2561564264092284, "grad_norm": 0.09854545444250107, "learning_rate": 7.2119592553012064e-06, "loss": 0.004, "step": 76770 }, { "epoch": 1.2563200523603044, "grad_norm": 0.2991683781147003, "learning_rate": 7.2111054945428584e-06, "loss": 0.0017, "step": 76780 }, { "epoch": 1.2564836783113802, "grad_norm": 0.06790527701377869, "learning_rate": 7.210251653637253e-06, "loss": 0.0022, "step": 76790 }, { "epoch": 1.256647304262456, "grad_norm": 0.13237342238426208, "learning_rate": 7.209397732615338e-06, "loss": 0.0023, "step": 76800 }, { "epoch": 1.2568109302135317, "grad_norm": 0.050308067351579666, "learning_rate": 7.208543731508064e-06, "loss": 0.0019, "step": 76810 }, { "epoch": 1.2569745561646077, "grad_norm": 0.04378840699791908, "learning_rate": 7.207689650346388e-06, "loss": 0.0012, "step": 76820 }, { "epoch": 1.2571381821156835, "grad_norm": 0.07707502692937851, "learning_rate": 7.206835489161269e-06, "loss": 0.0015, "step": 76830 }, { "epoch": 1.2573018080667593, "grad_norm": 0.10681436210870743, "learning_rate": 7.2059812479836665e-06, "loss": 0.0026, "step": 76840 }, { "epoch": 1.2574654340178353, "grad_norm": 0.05072954297065735, "learning_rate": 7.2051269268445455e-06, "loss": 0.0026, "step": 76850 }, { "epoch": 1.257629059968911, "grad_norm": 0.04281345382332802, "learning_rate": 7.204272525774875e-06, "loss": 0.0047, "step": 76860 }, { "epoch": 1.2577926859199868, "grad_norm": 0.04866739735007286, "learning_rate": 7.203418044805621e-06, "loss": 0.0017, "step": 76870 }, { "epoch": 1.2579563118710628, "grad_norm": 0.11990425735712051, "learning_rate": 7.202563483967761e-06, "loss": 0.0024, "step": 76880 }, { "epoch": 1.2581199378221386, "grad_norm": 0.08831249922513962, "learning_rate": 7.201708843292266e-06, "loss": 0.0023, "step": 76890 }, { "epoch": 1.2582835637732144, "grad_norm": 0.11786916851997375, "learning_rate": 7.2008541228101194e-06, "loss": 0.0031, "step": 76900 }, { "epoch": 1.2584471897242904, "grad_norm": 0.2058493047952652, "learning_rate": 7.1999993225523e-06, "loss": 0.0018, "step": 76910 }, { "epoch": 1.2586108156753661, "grad_norm": 0.17707034945487976, "learning_rate": 7.19914444254979e-06, "loss": 0.0013, "step": 76920 }, { "epoch": 1.258774441626442, "grad_norm": 0.09533452242612839, "learning_rate": 7.198289482833583e-06, "loss": 0.0026, "step": 76930 }, { "epoch": 1.258938067577518, "grad_norm": 0.16579443216323853, "learning_rate": 7.197434443434663e-06, "loss": 0.0023, "step": 76940 }, { "epoch": 1.2591016935285937, "grad_norm": 0.20056231319904327, "learning_rate": 7.1965793243840284e-06, "loss": 0.0029, "step": 76950 }, { "epoch": 1.2592653194796695, "grad_norm": 0.040070660412311554, "learning_rate": 7.195724125712671e-06, "loss": 0.0014, "step": 76960 }, { "epoch": 1.2594289454307452, "grad_norm": 0.08155545592308044, "learning_rate": 7.194868847451593e-06, "loss": 0.0019, "step": 76970 }, { "epoch": 1.2595925713818212, "grad_norm": 0.3704371750354767, "learning_rate": 7.194013489631793e-06, "loss": 0.0017, "step": 76980 }, { "epoch": 1.259756197332897, "grad_norm": 0.0917673259973526, "learning_rate": 7.193158052284277e-06, "loss": 0.0039, "step": 76990 }, { "epoch": 1.2599198232839728, "grad_norm": 0.14519546926021576, "learning_rate": 7.192302535440052e-06, "loss": 0.0038, "step": 77000 }, { "epoch": 1.2600834492350486, "grad_norm": 0.29657483100891113, "learning_rate": 7.191446939130131e-06, "loss": 0.0014, "step": 77010 }, { "epoch": 1.2602470751861246, "grad_norm": 0.25027918815612793, "learning_rate": 7.190591263385525e-06, "loss": 0.0021, "step": 77020 }, { "epoch": 1.2604107011372003, "grad_norm": 0.1431395560503006, "learning_rate": 7.18973550823725e-06, "loss": 0.0026, "step": 77030 }, { "epoch": 1.260574327088276, "grad_norm": 0.1428649127483368, "learning_rate": 7.188879673716326e-06, "loss": 0.0023, "step": 77040 }, { "epoch": 1.260737953039352, "grad_norm": 0.07966932654380798, "learning_rate": 7.188023759853775e-06, "loss": 0.0028, "step": 77050 }, { "epoch": 1.2609015789904279, "grad_norm": 0.11174963414669037, "learning_rate": 7.18716776668062e-06, "loss": 0.0025, "step": 77060 }, { "epoch": 1.2610652049415036, "grad_norm": 0.18909013271331787, "learning_rate": 7.186311694227892e-06, "loss": 0.0021, "step": 77070 }, { "epoch": 1.2612288308925796, "grad_norm": 0.14445863664150238, "learning_rate": 7.185455542526618e-06, "loss": 0.002, "step": 77080 }, { "epoch": 1.2613924568436554, "grad_norm": 0.01052671018987894, "learning_rate": 7.184599311607835e-06, "loss": 0.0017, "step": 77090 }, { "epoch": 1.2615560827947312, "grad_norm": 0.034729667007923126, "learning_rate": 7.183743001502578e-06, "loss": 0.0015, "step": 77100 }, { "epoch": 1.2617197087458072, "grad_norm": 0.19659213721752167, "learning_rate": 7.182886612241884e-06, "loss": 0.0035, "step": 77110 }, { "epoch": 1.261883334696883, "grad_norm": 0.18396835029125214, "learning_rate": 7.182030143856797e-06, "loss": 0.0025, "step": 77120 }, { "epoch": 1.2620469606479587, "grad_norm": 0.10773087292909622, "learning_rate": 7.181173596378362e-06, "loss": 0.0028, "step": 77130 }, { "epoch": 1.2622105865990347, "grad_norm": 0.027716603130102158, "learning_rate": 7.1803169698376255e-06, "loss": 0.0025, "step": 77140 }, { "epoch": 1.2623742125501105, "grad_norm": 0.08923885226249695, "learning_rate": 7.179460264265641e-06, "loss": 0.0019, "step": 77150 }, { "epoch": 1.2625378385011863, "grad_norm": 0.2653767466545105, "learning_rate": 7.178603479693459e-06, "loss": 0.0026, "step": 77160 }, { "epoch": 1.262701464452262, "grad_norm": 0.04988548159599304, "learning_rate": 7.177746616152137e-06, "loss": 0.0013, "step": 77170 }, { "epoch": 1.2628650904033378, "grad_norm": 0.1218423992395401, "learning_rate": 7.176889673672733e-06, "loss": 0.0025, "step": 77180 }, { "epoch": 1.2630287163544138, "grad_norm": 0.20130939781665802, "learning_rate": 7.176032652286312e-06, "loss": 0.0015, "step": 77190 }, { "epoch": 1.2631923423054896, "grad_norm": 0.15818671882152557, "learning_rate": 7.175175552023938e-06, "loss": 0.0047, "step": 77200 }, { "epoch": 1.2633559682565654, "grad_norm": 0.22315305471420288, "learning_rate": 7.174318372916676e-06, "loss": 0.0016, "step": 77210 }, { "epoch": 1.2635195942076414, "grad_norm": 0.09150251746177673, "learning_rate": 7.1734611149956e-06, "loss": 0.0027, "step": 77220 }, { "epoch": 1.2636832201587171, "grad_norm": 0.15513551235198975, "learning_rate": 7.172603778291782e-06, "loss": 0.0038, "step": 77230 }, { "epoch": 1.263846846109793, "grad_norm": 0.1001538634300232, "learning_rate": 7.171746362836299e-06, "loss": 0.0023, "step": 77240 }, { "epoch": 1.264010472060869, "grad_norm": 0.13093596696853638, "learning_rate": 7.170888868660231e-06, "loss": 0.0016, "step": 77250 }, { "epoch": 1.2641740980119447, "grad_norm": 0.2395472675561905, "learning_rate": 7.170031295794658e-06, "loss": 0.0033, "step": 77260 }, { "epoch": 1.2643377239630205, "grad_norm": 0.16735276579856873, "learning_rate": 7.169173644270666e-06, "loss": 0.0027, "step": 77270 }, { "epoch": 1.2645013499140965, "grad_norm": 0.10939212143421173, "learning_rate": 7.168315914119342e-06, "loss": 0.0012, "step": 77280 }, { "epoch": 1.2646649758651722, "grad_norm": 0.12859097123146057, "learning_rate": 7.167458105371778e-06, "loss": 0.0012, "step": 77290 }, { "epoch": 1.264828601816248, "grad_norm": 0.26028716564178467, "learning_rate": 7.1666002180590686e-06, "loss": 0.0083, "step": 77300 }, { "epoch": 1.264992227767324, "grad_norm": 0.0801466554403305, "learning_rate": 7.165742252212307e-06, "loss": 0.0017, "step": 77310 }, { "epoch": 1.2651558537183998, "grad_norm": 0.013302541337907314, "learning_rate": 7.164884207862596e-06, "loss": 0.0017, "step": 77320 }, { "epoch": 1.2653194796694756, "grad_norm": 0.26006826758384705, "learning_rate": 7.164026085041034e-06, "loss": 0.0021, "step": 77330 }, { "epoch": 1.2654831056205516, "grad_norm": 0.12897484004497528, "learning_rate": 7.163167883778728e-06, "loss": 0.0015, "step": 77340 }, { "epoch": 1.2656467315716273, "grad_norm": 0.04795488342642784, "learning_rate": 7.162309604106784e-06, "loss": 0.0013, "step": 77350 }, { "epoch": 1.265810357522703, "grad_norm": 0.19672997295856476, "learning_rate": 7.1614512460563145e-06, "loss": 0.0022, "step": 77360 }, { "epoch": 1.2659739834737789, "grad_norm": 0.07301844656467438, "learning_rate": 7.160592809658433e-06, "loss": 0.0024, "step": 77370 }, { "epoch": 1.2661376094248546, "grad_norm": 0.03807608783245087, "learning_rate": 7.159734294944254e-06, "loss": 0.0043, "step": 77380 }, { "epoch": 1.2663012353759306, "grad_norm": 0.196418896317482, "learning_rate": 7.158875701944898e-06, "loss": 0.0026, "step": 77390 }, { "epoch": 1.2664648613270064, "grad_norm": 0.37065741419792175, "learning_rate": 7.158017030691487e-06, "loss": 0.0024, "step": 77400 }, { "epoch": 1.2666284872780822, "grad_norm": 0.25617995858192444, "learning_rate": 7.157158281215144e-06, "loss": 0.0026, "step": 77410 }, { "epoch": 1.2667921132291582, "grad_norm": 0.01631050743162632, "learning_rate": 7.1562994535469995e-06, "loss": 0.0008, "step": 77420 }, { "epoch": 1.266955739180234, "grad_norm": 0.10586243867874146, "learning_rate": 7.155440547718182e-06, "loss": 0.002, "step": 77430 }, { "epoch": 1.2671193651313097, "grad_norm": 0.13382920622825623, "learning_rate": 7.1545815637598235e-06, "loss": 0.0021, "step": 77440 }, { "epoch": 1.2672829910823857, "grad_norm": 0.12447115778923035, "learning_rate": 7.1537225017030644e-06, "loss": 0.002, "step": 77450 }, { "epoch": 1.2674466170334615, "grad_norm": 0.07894103229045868, "learning_rate": 7.15286336157904e-06, "loss": 0.0014, "step": 77460 }, { "epoch": 1.2676102429845373, "grad_norm": 0.14126263558864594, "learning_rate": 7.152004143418892e-06, "loss": 0.0015, "step": 77470 }, { "epoch": 1.2677738689356133, "grad_norm": 0.16852203011512756, "learning_rate": 7.151144847253767e-06, "loss": 0.0046, "step": 77480 }, { "epoch": 1.267937494886689, "grad_norm": 0.1813594102859497, "learning_rate": 7.1502854731148095e-06, "loss": 0.0023, "step": 77490 }, { "epoch": 1.2681011208377648, "grad_norm": 0.09075183421373367, "learning_rate": 7.149426021033173e-06, "loss": 0.0036, "step": 77500 }, { "epoch": 1.2682647467888408, "grad_norm": 0.14291784167289734, "learning_rate": 7.14856649104001e-06, "loss": 0.0024, "step": 77510 }, { "epoch": 1.2684283727399166, "grad_norm": 0.08343230932950974, "learning_rate": 7.147706883166476e-06, "loss": 0.001, "step": 77520 }, { "epoch": 1.2685919986909924, "grad_norm": 0.05725209042429924, "learning_rate": 7.146847197443728e-06, "loss": 0.0056, "step": 77530 }, { "epoch": 1.2687556246420684, "grad_norm": 0.05695614963769913, "learning_rate": 7.145987433902929e-06, "loss": 0.0021, "step": 77540 }, { "epoch": 1.2689192505931441, "grad_norm": 0.1129378080368042, "learning_rate": 7.1451275925752435e-06, "loss": 0.002, "step": 77550 }, { "epoch": 1.26908287654422, "grad_norm": 0.21865597367286682, "learning_rate": 7.1442676734918385e-06, "loss": 0.0035, "step": 77560 }, { "epoch": 1.2692465024952957, "grad_norm": 0.15413720905780792, "learning_rate": 7.143407676683885e-06, "loss": 0.0011, "step": 77570 }, { "epoch": 1.2694101284463715, "grad_norm": 0.04753812402486801, "learning_rate": 7.142547602182552e-06, "loss": 0.0015, "step": 77580 }, { "epoch": 1.2695737543974475, "grad_norm": 0.6218154430389404, "learning_rate": 7.141687450019021e-06, "loss": 0.0016, "step": 77590 }, { "epoch": 1.2697373803485232, "grad_norm": 0.07290442287921906, "learning_rate": 7.140827220224466e-06, "loss": 0.0025, "step": 77600 }, { "epoch": 1.269901006299599, "grad_norm": 0.07512924820184708, "learning_rate": 7.13996691283007e-06, "loss": 0.0012, "step": 77610 }, { "epoch": 1.270064632250675, "grad_norm": 0.11224841326475143, "learning_rate": 7.139106527867016e-06, "loss": 0.0009, "step": 77620 }, { "epoch": 1.2702282582017508, "grad_norm": 0.1324429214000702, "learning_rate": 7.138246065366491e-06, "loss": 0.0021, "step": 77630 }, { "epoch": 1.2703918841528266, "grad_norm": 0.07526404410600662, "learning_rate": 7.137385525359686e-06, "loss": 0.001, "step": 77640 }, { "epoch": 1.2705555101039026, "grad_norm": 0.24098370969295502, "learning_rate": 7.136524907877793e-06, "loss": 0.0016, "step": 77650 }, { "epoch": 1.2707191360549783, "grad_norm": 0.021831529214978218, "learning_rate": 7.135664212952007e-06, "loss": 0.0019, "step": 77660 }, { "epoch": 1.270882762006054, "grad_norm": 0.04822388291358948, "learning_rate": 7.134803440613527e-06, "loss": 0.0015, "step": 77670 }, { "epoch": 1.27104638795713, "grad_norm": 0.01008241344243288, "learning_rate": 7.133942590893552e-06, "loss": 0.0011, "step": 77680 }, { "epoch": 1.2712100139082059, "grad_norm": 0.1856769174337387, "learning_rate": 7.133081663823287e-06, "loss": 0.0021, "step": 77690 }, { "epoch": 1.2713736398592816, "grad_norm": 0.11940106004476547, "learning_rate": 7.132220659433938e-06, "loss": 0.0025, "step": 77700 }, { "epoch": 1.2715372658103576, "grad_norm": 0.157598614692688, "learning_rate": 7.131359577756716e-06, "loss": 0.001, "step": 77710 }, { "epoch": 1.2717008917614334, "grad_norm": 0.1254163533449173, "learning_rate": 7.130498418822831e-06, "loss": 0.0028, "step": 77720 }, { "epoch": 1.2718645177125092, "grad_norm": 0.1596977859735489, "learning_rate": 7.129637182663499e-06, "loss": 0.002, "step": 77730 }, { "epoch": 1.272028143663585, "grad_norm": 0.07138001918792725, "learning_rate": 7.1287758693099385e-06, "loss": 0.0016, "step": 77740 }, { "epoch": 1.272191769614661, "grad_norm": 0.17522409558296204, "learning_rate": 7.127914478793368e-06, "loss": 0.002, "step": 77750 }, { "epoch": 1.2723553955657367, "grad_norm": 0.16013853251934052, "learning_rate": 7.1270530111450136e-06, "loss": 0.0037, "step": 77760 }, { "epoch": 1.2725190215168125, "grad_norm": 0.07808253169059753, "learning_rate": 7.126191466396096e-06, "loss": 0.0011, "step": 77770 }, { "epoch": 1.2726826474678883, "grad_norm": 0.19619536399841309, "learning_rate": 7.125329844577851e-06, "loss": 0.0014, "step": 77780 }, { "epoch": 1.2728462734189643, "grad_norm": 0.32044342160224915, "learning_rate": 7.124468145721506e-06, "loss": 0.002, "step": 77790 }, { "epoch": 1.27300989937004, "grad_norm": 0.23559348285198212, "learning_rate": 7.123606369858298e-06, "loss": 0.0023, "step": 77800 }, { "epoch": 1.2731735253211158, "grad_norm": 0.06079702451825142, "learning_rate": 7.122744517019463e-06, "loss": 0.0028, "step": 77810 }, { "epoch": 1.2733371512721918, "grad_norm": 0.18395693600177765, "learning_rate": 7.121882587236241e-06, "loss": 0.0015, "step": 77820 }, { "epoch": 1.2735007772232676, "grad_norm": 0.032289862632751465, "learning_rate": 7.121020580539873e-06, "loss": 0.0014, "step": 77830 }, { "epoch": 1.2736644031743434, "grad_norm": 0.0709395557641983, "learning_rate": 7.120158496961609e-06, "loss": 0.0034, "step": 77840 }, { "epoch": 1.2738280291254194, "grad_norm": 0.11847332864999771, "learning_rate": 7.119296336532693e-06, "loss": 0.0015, "step": 77850 }, { "epoch": 1.2739916550764951, "grad_norm": 0.13741576671600342, "learning_rate": 7.118434099284382e-06, "loss": 0.0021, "step": 77860 }, { "epoch": 1.274155281027571, "grad_norm": 0.056646984070539474, "learning_rate": 7.117571785247923e-06, "loss": 0.0013, "step": 77870 }, { "epoch": 1.274318906978647, "grad_norm": 0.12713302671909332, "learning_rate": 7.116709394454577e-06, "loss": 0.0014, "step": 77880 }, { "epoch": 1.2744825329297227, "grad_norm": 0.15320396423339844, "learning_rate": 7.115846926935604e-06, "loss": 0.003, "step": 77890 }, { "epoch": 1.2746461588807985, "grad_norm": 0.24882537126541138, "learning_rate": 7.114984382722264e-06, "loss": 0.0028, "step": 77900 }, { "epoch": 1.2748097848318745, "grad_norm": 0.3457034230232239, "learning_rate": 7.114121761845823e-06, "loss": 0.0019, "step": 77910 }, { "epoch": 1.2749734107829502, "grad_norm": 0.009374169632792473, "learning_rate": 7.113259064337551e-06, "loss": 0.001, "step": 77920 }, { "epoch": 1.275137036734026, "grad_norm": 0.06365344673395157, "learning_rate": 7.112396290228716e-06, "loss": 0.0014, "step": 77930 }, { "epoch": 1.2753006626851018, "grad_norm": 0.11823870986700058, "learning_rate": 7.111533439550593e-06, "loss": 0.0019, "step": 77940 }, { "epoch": 1.2754642886361778, "grad_norm": 0.20753897726535797, "learning_rate": 7.110670512334456e-06, "loss": 0.0023, "step": 77950 }, { "epoch": 1.2756279145872536, "grad_norm": 0.15599587559700012, "learning_rate": 7.109807508611589e-06, "loss": 0.0022, "step": 77960 }, { "epoch": 1.2757915405383293, "grad_norm": 0.06066054850816727, "learning_rate": 7.1089444284132695e-06, "loss": 0.0024, "step": 77970 }, { "epoch": 1.275955166489405, "grad_norm": 0.11398950964212418, "learning_rate": 7.1080812717707815e-06, "loss": 0.0014, "step": 77980 }, { "epoch": 1.276118792440481, "grad_norm": 0.29165568947792053, "learning_rate": 7.107218038715414e-06, "loss": 0.0029, "step": 77990 }, { "epoch": 1.2762824183915569, "grad_norm": 0.051056746393442154, "learning_rate": 7.1063547292784585e-06, "loss": 0.0016, "step": 78000 }, { "epoch": 1.2764460443426326, "grad_norm": 0.04217154532670975, "learning_rate": 7.105491343491207e-06, "loss": 0.0014, "step": 78010 }, { "epoch": 1.2766096702937086, "grad_norm": 0.15325215458869934, "learning_rate": 7.104627881384953e-06, "loss": 0.005, "step": 78020 }, { "epoch": 1.2767732962447844, "grad_norm": 0.05750011280179024, "learning_rate": 7.103764342990997e-06, "loss": 0.0017, "step": 78030 }, { "epoch": 1.2769369221958602, "grad_norm": 0.08452239632606506, "learning_rate": 7.102900728340641e-06, "loss": 0.002, "step": 78040 }, { "epoch": 1.2771005481469362, "grad_norm": 0.07005751878023148, "learning_rate": 7.102037037465188e-06, "loss": 0.0019, "step": 78050 }, { "epoch": 1.277264174098012, "grad_norm": 0.15740787982940674, "learning_rate": 7.101173270395943e-06, "loss": 0.0016, "step": 78060 }, { "epoch": 1.2774278000490877, "grad_norm": 0.301065593957901, "learning_rate": 7.1003094271642185e-06, "loss": 0.0023, "step": 78070 }, { "epoch": 1.2775914260001637, "grad_norm": 0.1475955843925476, "learning_rate": 7.099445507801324e-06, "loss": 0.0016, "step": 78080 }, { "epoch": 1.2777550519512395, "grad_norm": 0.04055211693048477, "learning_rate": 7.098581512338576e-06, "loss": 0.0021, "step": 78090 }, { "epoch": 1.2779186779023153, "grad_norm": 0.08093906939029694, "learning_rate": 7.097717440807291e-06, "loss": 0.0022, "step": 78100 }, { "epoch": 1.2780823038533913, "grad_norm": 0.10586875677108765, "learning_rate": 7.0968532932387935e-06, "loss": 0.0029, "step": 78110 }, { "epoch": 1.278245929804467, "grad_norm": 0.06545928865671158, "learning_rate": 7.095989069664402e-06, "loss": 0.0037, "step": 78120 }, { "epoch": 1.2784095557555428, "grad_norm": 0.21746321022510529, "learning_rate": 7.095124770115442e-06, "loss": 0.0025, "step": 78130 }, { "epoch": 1.2785731817066186, "grad_norm": 0.04639434069395065, "learning_rate": 7.094260394623247e-06, "loss": 0.0018, "step": 78140 }, { "epoch": 1.2787368076576944, "grad_norm": 0.10664045810699463, "learning_rate": 7.093395943219146e-06, "loss": 0.0018, "step": 78150 }, { "epoch": 1.2789004336087704, "grad_norm": 0.05785437300801277, "learning_rate": 7.092531415934473e-06, "loss": 0.0021, "step": 78160 }, { "epoch": 1.2790640595598461, "grad_norm": 0.28598418831825256, "learning_rate": 7.091666812800567e-06, "loss": 0.0026, "step": 78170 }, { "epoch": 1.279227685510922, "grad_norm": 0.10265430808067322, "learning_rate": 7.090802133848764e-06, "loss": 0.0011, "step": 78180 }, { "epoch": 1.279391311461998, "grad_norm": 0.18367788195610046, "learning_rate": 7.08993737911041e-06, "loss": 0.0019, "step": 78190 }, { "epoch": 1.2795549374130737, "grad_norm": 0.09943254292011261, "learning_rate": 7.089072548616849e-06, "loss": 0.0012, "step": 78200 }, { "epoch": 1.2797185633641495, "grad_norm": 0.036855001002550125, "learning_rate": 7.088207642399429e-06, "loss": 0.0011, "step": 78210 }, { "epoch": 1.2798821893152255, "grad_norm": 0.07823864370584488, "learning_rate": 7.0873426604895e-06, "loss": 0.0013, "step": 78220 }, { "epoch": 1.2800458152663012, "grad_norm": 0.23223811388015747, "learning_rate": 7.086477602918418e-06, "loss": 0.0023, "step": 78230 }, { "epoch": 1.280209441217377, "grad_norm": 0.10750643163919449, "learning_rate": 7.085612469717536e-06, "loss": 0.0015, "step": 78240 }, { "epoch": 1.280373067168453, "grad_norm": 0.09745393693447113, "learning_rate": 7.084747260918214e-06, "loss": 0.0018, "step": 78250 }, { "epoch": 1.2805366931195288, "grad_norm": 0.0866905227303505, "learning_rate": 7.083881976551817e-06, "loss": 0.0009, "step": 78260 }, { "epoch": 1.2807003190706046, "grad_norm": 0.4757748246192932, "learning_rate": 7.083016616649705e-06, "loss": 0.0017, "step": 78270 }, { "epoch": 1.2808639450216805, "grad_norm": 0.26839548349380493, "learning_rate": 7.082151181243248e-06, "loss": 0.0024, "step": 78280 }, { "epoch": 1.2810275709727563, "grad_norm": 0.1678846776485443, "learning_rate": 7.0812856703638135e-06, "loss": 0.0032, "step": 78290 }, { "epoch": 1.281191196923832, "grad_norm": 0.08280716091394424, "learning_rate": 7.0804200840427765e-06, "loss": 0.0018, "step": 78300 }, { "epoch": 1.281354822874908, "grad_norm": 0.17985455691814423, "learning_rate": 7.079554422311513e-06, "loss": 0.008, "step": 78310 }, { "epoch": 1.2815184488259839, "grad_norm": 0.0738675519824028, "learning_rate": 7.078688685201398e-06, "loss": 0.0022, "step": 78320 }, { "epoch": 1.2816820747770596, "grad_norm": 0.5514221787452698, "learning_rate": 7.0778228727438146e-06, "loss": 0.0033, "step": 78330 }, { "epoch": 1.2818457007281354, "grad_norm": 0.07533999532461166, "learning_rate": 7.076956984970147e-06, "loss": 0.0025, "step": 78340 }, { "epoch": 1.2820093266792112, "grad_norm": 0.1644175499677658, "learning_rate": 7.076091021911777e-06, "loss": 0.0025, "step": 78350 }, { "epoch": 1.2821729526302872, "grad_norm": 0.09712538868188858, "learning_rate": 7.075224983600102e-06, "loss": 0.0017, "step": 78360 }, { "epoch": 1.282336578581363, "grad_norm": 0.04440287500619888, "learning_rate": 7.074358870066507e-06, "loss": 0.0029, "step": 78370 }, { "epoch": 1.2825002045324387, "grad_norm": 0.02812427282333374, "learning_rate": 7.073492681342388e-06, "loss": 0.0015, "step": 78380 }, { "epoch": 1.2826638304835147, "grad_norm": 0.050098370760679245, "learning_rate": 7.072626417459143e-06, "loss": 0.002, "step": 78390 }, { "epoch": 1.2828274564345905, "grad_norm": 0.0541042797267437, "learning_rate": 7.071760078448171e-06, "loss": 0.0013, "step": 78400 }, { "epoch": 1.2829910823856663, "grad_norm": 0.10522080212831497, "learning_rate": 7.070893664340877e-06, "loss": 0.0021, "step": 78410 }, { "epoch": 1.2831547083367423, "grad_norm": 0.04674161970615387, "learning_rate": 7.070027175168665e-06, "loss": 0.001, "step": 78420 }, { "epoch": 1.283318334287818, "grad_norm": 0.09381629526615143, "learning_rate": 7.069160610962942e-06, "loss": 0.0014, "step": 78430 }, { "epoch": 1.2834819602388938, "grad_norm": 0.01699410378932953, "learning_rate": 7.0682939717551205e-06, "loss": 0.0023, "step": 78440 }, { "epoch": 1.2836455861899698, "grad_norm": 0.195709228515625, "learning_rate": 7.067427257576613e-06, "loss": 0.0026, "step": 78450 }, { "epoch": 1.2838092121410456, "grad_norm": 0.1978006511926651, "learning_rate": 7.066560468458837e-06, "loss": 0.0024, "step": 78460 }, { "epoch": 1.2839728380921214, "grad_norm": 0.07010656595230103, "learning_rate": 7.065693604433212e-06, "loss": 0.0036, "step": 78470 }, { "epoch": 1.2841364640431974, "grad_norm": 0.10471019893884659, "learning_rate": 7.064826665531157e-06, "loss": 0.0025, "step": 78480 }, { "epoch": 1.2843000899942731, "grad_norm": 0.12300293892621994, "learning_rate": 7.063959651784097e-06, "loss": 0.002, "step": 78490 }, { "epoch": 1.284463715945349, "grad_norm": 0.16696114838123322, "learning_rate": 7.063092563223461e-06, "loss": 0.0021, "step": 78500 }, { "epoch": 1.2846273418964247, "grad_norm": 0.08266822248697281, "learning_rate": 7.06222539988068e-06, "loss": 0.0017, "step": 78510 }, { "epoch": 1.2847909678475007, "grad_norm": 0.05953631550073624, "learning_rate": 7.061358161787183e-06, "loss": 0.0033, "step": 78520 }, { "epoch": 1.2849545937985765, "grad_norm": 0.08135845512151718, "learning_rate": 7.0604908489744075e-06, "loss": 0.0013, "step": 78530 }, { "epoch": 1.2851182197496522, "grad_norm": 0.050916410982608795, "learning_rate": 7.05962346147379e-06, "loss": 0.0029, "step": 78540 }, { "epoch": 1.285281845700728, "grad_norm": 0.06050095707178116, "learning_rate": 7.058755999316773e-06, "loss": 0.0016, "step": 78550 }, { "epoch": 1.285445471651804, "grad_norm": 0.045107923448085785, "learning_rate": 7.0578884625347985e-06, "loss": 0.0016, "step": 78560 }, { "epoch": 1.2856090976028798, "grad_norm": 0.06090124696493149, "learning_rate": 7.057020851159315e-06, "loss": 0.0013, "step": 78570 }, { "epoch": 1.2857727235539556, "grad_norm": 0.15376600623130798, "learning_rate": 7.056153165221768e-06, "loss": 0.0016, "step": 78580 }, { "epoch": 1.2859363495050316, "grad_norm": 0.16558267176151276, "learning_rate": 7.055285404753611e-06, "loss": 0.0017, "step": 78590 }, { "epoch": 1.2860999754561073, "grad_norm": 0.16581164300441742, "learning_rate": 7.054417569786298e-06, "loss": 0.0022, "step": 78600 }, { "epoch": 1.286263601407183, "grad_norm": 0.12265679240226746, "learning_rate": 7.053549660351286e-06, "loss": 0.0021, "step": 78610 }, { "epoch": 1.286427227358259, "grad_norm": 0.26179322600364685, "learning_rate": 7.052681676480033e-06, "loss": 0.0024, "step": 78620 }, { "epoch": 1.2865908533093349, "grad_norm": 0.034944869577884674, "learning_rate": 7.0518136182040045e-06, "loss": 0.0022, "step": 78630 }, { "epoch": 1.2867544792604106, "grad_norm": 0.061872292309999466, "learning_rate": 7.050945485554662e-06, "loss": 0.0026, "step": 78640 }, { "epoch": 1.2869181052114866, "grad_norm": 0.04613882675766945, "learning_rate": 7.050077278563476e-06, "loss": 0.0016, "step": 78650 }, { "epoch": 1.2870817311625624, "grad_norm": 0.09044929593801498, "learning_rate": 7.049208997261916e-06, "loss": 0.002, "step": 78660 }, { "epoch": 1.2872453571136382, "grad_norm": 0.18111932277679443, "learning_rate": 7.048340641681455e-06, "loss": 0.0025, "step": 78670 }, { "epoch": 1.2874089830647142, "grad_norm": 0.054907627403736115, "learning_rate": 7.047472211853569e-06, "loss": 0.0036, "step": 78680 }, { "epoch": 1.28757260901579, "grad_norm": 0.14432856440544128, "learning_rate": 7.046603707809736e-06, "loss": 0.0029, "step": 78690 }, { "epoch": 1.2877362349668657, "grad_norm": 0.1697741448879242, "learning_rate": 7.045735129581436e-06, "loss": 0.0026, "step": 78700 }, { "epoch": 1.2878998609179415, "grad_norm": 0.12358502298593521, "learning_rate": 7.044866477200156e-06, "loss": 0.0031, "step": 78710 }, { "epoch": 1.2880634868690175, "grad_norm": 0.1889401525259018, "learning_rate": 7.043997750697381e-06, "loss": 0.004, "step": 78720 }, { "epoch": 1.2882271128200933, "grad_norm": 0.14887337386608124, "learning_rate": 7.0431289501046e-06, "loss": 0.0045, "step": 78730 }, { "epoch": 1.288390738771169, "grad_norm": 0.08990081399679184, "learning_rate": 7.0422600754533045e-06, "loss": 0.0022, "step": 78740 }, { "epoch": 1.2885543647222448, "grad_norm": 0.06419666111469269, "learning_rate": 7.0413911267749905e-06, "loss": 0.0018, "step": 78750 }, { "epoch": 1.2887179906733208, "grad_norm": 0.030298173427581787, "learning_rate": 7.040522104101153e-06, "loss": 0.0023, "step": 78760 }, { "epoch": 1.2888816166243966, "grad_norm": 0.0547197200357914, "learning_rate": 7.039653007463294e-06, "loss": 0.0019, "step": 78770 }, { "epoch": 1.2890452425754724, "grad_norm": 0.20454275608062744, "learning_rate": 7.038783836892917e-06, "loss": 0.002, "step": 78780 }, { "epoch": 1.2892088685265484, "grad_norm": 0.0036465420853346586, "learning_rate": 7.0379145924215255e-06, "loss": 0.0026, "step": 78790 }, { "epoch": 1.2893724944776241, "grad_norm": 0.18773530423641205, "learning_rate": 7.037045274080628e-06, "loss": 0.003, "step": 78800 }, { "epoch": 1.2895361204287, "grad_norm": 0.6376838684082031, "learning_rate": 7.036175881901735e-06, "loss": 0.0014, "step": 78810 }, { "epoch": 1.289699746379776, "grad_norm": 0.02673187293112278, "learning_rate": 7.0353064159163595e-06, "loss": 0.0026, "step": 78820 }, { "epoch": 1.2898633723308517, "grad_norm": 0.16669833660125732, "learning_rate": 7.034436876156019e-06, "loss": 0.0027, "step": 78830 }, { "epoch": 1.2900269982819275, "grad_norm": 0.13926498591899872, "learning_rate": 7.03356726265223e-06, "loss": 0.0052, "step": 78840 }, { "epoch": 1.2901906242330035, "grad_norm": 0.054456811398267746, "learning_rate": 7.0326975754365165e-06, "loss": 0.0017, "step": 78850 }, { "epoch": 1.2903542501840792, "grad_norm": 0.072306789457798, "learning_rate": 7.0318278145404016e-06, "loss": 0.002, "step": 78860 }, { "epoch": 1.290517876135155, "grad_norm": 0.3277745842933655, "learning_rate": 7.030957979995412e-06, "loss": 0.0023, "step": 78870 }, { "epoch": 1.290681502086231, "grad_norm": 0.11229134351015091, "learning_rate": 7.030088071833077e-06, "loss": 0.0032, "step": 78880 }, { "epoch": 1.2908451280373068, "grad_norm": 0.15741701424121857, "learning_rate": 7.0292180900849275e-06, "loss": 0.0024, "step": 78890 }, { "epoch": 1.2910087539883826, "grad_norm": 0.09956037998199463, "learning_rate": 7.0283480347825e-06, "loss": 0.0023, "step": 78900 }, { "epoch": 1.2911723799394583, "grad_norm": 0.044183846563100815, "learning_rate": 7.027477905957329e-06, "loss": 0.0017, "step": 78910 }, { "epoch": 1.2913360058905343, "grad_norm": 0.05782116577029228, "learning_rate": 7.0266077036409594e-06, "loss": 0.0018, "step": 78920 }, { "epoch": 1.29149963184161, "grad_norm": 0.2579874098300934, "learning_rate": 7.0257374278649314e-06, "loss": 0.0022, "step": 78930 }, { "epoch": 1.2916632577926859, "grad_norm": 0.05683138966560364, "learning_rate": 7.024867078660789e-06, "loss": 0.0026, "step": 78940 }, { "epoch": 1.2918268837437616, "grad_norm": 0.2643553912639618, "learning_rate": 7.023996656060082e-06, "loss": 0.0027, "step": 78950 }, { "epoch": 1.2919905096948376, "grad_norm": 0.07228142023086548, "learning_rate": 7.023126160094363e-06, "loss": 0.0017, "step": 78960 }, { "epoch": 1.2921541356459134, "grad_norm": 0.021867431700229645, "learning_rate": 7.022255590795181e-06, "loss": 0.0016, "step": 78970 }, { "epoch": 1.2923177615969892, "grad_norm": 0.3119870126247406, "learning_rate": 7.021384948194094e-06, "loss": 0.0032, "step": 78980 }, { "epoch": 1.2924813875480652, "grad_norm": 0.12576338648796082, "learning_rate": 7.02051423232266e-06, "loss": 0.0024, "step": 78990 }, { "epoch": 1.292645013499141, "grad_norm": 0.03334770351648331, "learning_rate": 7.0196434432124425e-06, "loss": 0.0015, "step": 79000 }, { "epoch": 1.2928086394502167, "grad_norm": 0.1753448247909546, "learning_rate": 7.018772580895006e-06, "loss": 0.002, "step": 79010 }, { "epoch": 1.2929722654012927, "grad_norm": 0.08526654541492462, "learning_rate": 7.017901645401915e-06, "loss": 0.0015, "step": 79020 }, { "epoch": 1.2931358913523685, "grad_norm": 0.26862430572509766, "learning_rate": 7.0170306367647366e-06, "loss": 0.003, "step": 79030 }, { "epoch": 1.2932995173034443, "grad_norm": 0.09505033493041992, "learning_rate": 7.016159555015047e-06, "loss": 0.0024, "step": 79040 }, { "epoch": 1.2934631432545203, "grad_norm": 0.013281266205012798, "learning_rate": 7.015288400184419e-06, "loss": 0.0024, "step": 79050 }, { "epoch": 1.293626769205596, "grad_norm": 0.10048270970582962, "learning_rate": 7.014417172304431e-06, "loss": 0.0018, "step": 79060 }, { "epoch": 1.2937903951566718, "grad_norm": 0.14620321989059448, "learning_rate": 7.0135458714066604e-06, "loss": 0.0024, "step": 79070 }, { "epoch": 1.2939540211077478, "grad_norm": 0.21103666722774506, "learning_rate": 7.012674497522692e-06, "loss": 0.0022, "step": 79080 }, { "epoch": 1.2941176470588236, "grad_norm": 0.06079899147152901, "learning_rate": 7.0118030506841114e-06, "loss": 0.0015, "step": 79090 }, { "epoch": 1.2942812730098994, "grad_norm": 0.12934254109859467, "learning_rate": 7.010931530922504e-06, "loss": 0.0021, "step": 79100 }, { "epoch": 1.2944448989609751, "grad_norm": 0.07510894536972046, "learning_rate": 7.010059938269463e-06, "loss": 0.0019, "step": 79110 }, { "epoch": 1.294608524912051, "grad_norm": 0.027073414996266365, "learning_rate": 7.0091882727565795e-06, "loss": 0.0019, "step": 79120 }, { "epoch": 1.294772150863127, "grad_norm": 0.034520912915468216, "learning_rate": 7.0083165344154505e-06, "loss": 0.002, "step": 79130 }, { "epoch": 1.2949357768142027, "grad_norm": 0.20583203434944153, "learning_rate": 7.007444723277673e-06, "loss": 0.003, "step": 79140 }, { "epoch": 1.2950994027652785, "grad_norm": 0.2449686974287033, "learning_rate": 7.00657283937485e-06, "loss": 0.0039, "step": 79150 }, { "epoch": 1.2952630287163545, "grad_norm": 0.13770616054534912, "learning_rate": 7.005700882738585e-06, "loss": 0.0023, "step": 79160 }, { "epoch": 1.2954266546674302, "grad_norm": 0.02674509398639202, "learning_rate": 7.004828853400483e-06, "loss": 0.0014, "step": 79170 }, { "epoch": 1.295590280618506, "grad_norm": 0.06522715091705322, "learning_rate": 7.003956751392154e-06, "loss": 0.0037, "step": 79180 }, { "epoch": 1.295753906569582, "grad_norm": 0.06211570277810097, "learning_rate": 7.003084576745207e-06, "loss": 0.0023, "step": 79190 }, { "epoch": 1.2959175325206578, "grad_norm": 0.08482442796230316, "learning_rate": 7.002212329491259e-06, "loss": 0.002, "step": 79200 }, { "epoch": 1.2960811584717336, "grad_norm": 0.10277386754751205, "learning_rate": 7.001340009661927e-06, "loss": 0.0028, "step": 79210 }, { "epoch": 1.2962447844228095, "grad_norm": 0.07776857167482376, "learning_rate": 7.0004676172888305e-06, "loss": 0.0012, "step": 79220 }, { "epoch": 1.2964084103738853, "grad_norm": 0.05071259289979935, "learning_rate": 6.999595152403591e-06, "loss": 0.0037, "step": 79230 }, { "epoch": 1.296572036324961, "grad_norm": 0.1432274878025055, "learning_rate": 6.998722615037832e-06, "loss": 0.0022, "step": 79240 }, { "epoch": 1.296735662276037, "grad_norm": 0.16380329430103302, "learning_rate": 6.997850005223182e-06, "loss": 0.0012, "step": 79250 }, { "epoch": 1.2968992882271129, "grad_norm": 0.26686760783195496, "learning_rate": 6.996977322991272e-06, "loss": 0.0043, "step": 79260 }, { "epoch": 1.2970629141781886, "grad_norm": 0.04556310921907425, "learning_rate": 6.9961045683737316e-06, "loss": 0.0013, "step": 79270 }, { "epoch": 1.2972265401292646, "grad_norm": 0.22555766999721527, "learning_rate": 6.995231741402199e-06, "loss": 0.0027, "step": 79280 }, { "epoch": 1.2973901660803404, "grad_norm": 0.3267669081687927, "learning_rate": 6.994358842108313e-06, "loss": 0.0013, "step": 79290 }, { "epoch": 1.2975537920314162, "grad_norm": 0.07013623416423798, "learning_rate": 6.993485870523711e-06, "loss": 0.0018, "step": 79300 }, { "epoch": 1.297717417982492, "grad_norm": 0.19409142434597015, "learning_rate": 6.992612826680037e-06, "loss": 0.0023, "step": 79310 }, { "epoch": 1.2978810439335677, "grad_norm": 0.032443542033433914, "learning_rate": 6.991739710608938e-06, "loss": 0.0027, "step": 79320 }, { "epoch": 1.2980446698846437, "grad_norm": 0.17591342329978943, "learning_rate": 6.990866522342061e-06, "loss": 0.002, "step": 79330 }, { "epoch": 1.2982082958357195, "grad_norm": 0.16668100655078888, "learning_rate": 6.989993261911057e-06, "loss": 0.0021, "step": 79340 }, { "epoch": 1.2983719217867953, "grad_norm": 0.0907452404499054, "learning_rate": 6.9891199293475794e-06, "loss": 0.0036, "step": 79350 }, { "epoch": 1.2985355477378713, "grad_norm": 0.02006194181740284, "learning_rate": 6.988246524683288e-06, "loss": 0.0017, "step": 79360 }, { "epoch": 1.298699173688947, "grad_norm": 0.15572962164878845, "learning_rate": 6.987373047949837e-06, "loss": 0.002, "step": 79370 }, { "epoch": 1.2988627996400228, "grad_norm": 0.26907023787498474, "learning_rate": 6.986499499178889e-06, "loss": 0.0029, "step": 79380 }, { "epoch": 1.2990264255910988, "grad_norm": 0.10807106643915176, "learning_rate": 6.985625878402109e-06, "loss": 0.0023, "step": 79390 }, { "epoch": 1.2991900515421746, "grad_norm": 0.05270524322986603, "learning_rate": 6.9847521856511635e-06, "loss": 0.0021, "step": 79400 }, { "epoch": 1.2993536774932504, "grad_norm": 0.3921222686767578, "learning_rate": 6.983878420957722e-06, "loss": 0.0036, "step": 79410 }, { "epoch": 1.2995173034443264, "grad_norm": 0.1369101107120514, "learning_rate": 6.9830045843534546e-06, "loss": 0.0027, "step": 79420 }, { "epoch": 1.2996809293954021, "grad_norm": 0.23730507493019104, "learning_rate": 6.982130675870037e-06, "loss": 0.0021, "step": 79430 }, { "epoch": 1.299844555346478, "grad_norm": 0.038298897445201874, "learning_rate": 6.981256695539147e-06, "loss": 0.0018, "step": 79440 }, { "epoch": 1.300008181297554, "grad_norm": 0.06326301395893097, "learning_rate": 6.980382643392463e-06, "loss": 0.0012, "step": 79450 }, { "epoch": 1.3001718072486297, "grad_norm": 0.039917342364788055, "learning_rate": 6.9795085194616685e-06, "loss": 0.0014, "step": 79460 }, { "epoch": 1.3003354331997055, "grad_norm": 0.25611624121665955, "learning_rate": 6.978634323778449e-06, "loss": 0.0022, "step": 79470 }, { "epoch": 1.3004990591507812, "grad_norm": 0.14208780229091644, "learning_rate": 6.977760056374489e-06, "loss": 0.0014, "step": 79480 }, { "epoch": 1.3006626851018572, "grad_norm": 0.1369677037000656, "learning_rate": 6.976885717281481e-06, "loss": 0.004, "step": 79490 }, { "epoch": 1.300826311052933, "grad_norm": 0.052475377917289734, "learning_rate": 6.976011306531117e-06, "loss": 0.0027, "step": 79500 }, { "epoch": 1.3009899370040088, "grad_norm": 0.11846672743558884, "learning_rate": 6.975136824155091e-06, "loss": 0.0016, "step": 79510 }, { "epoch": 1.3011535629550846, "grad_norm": 0.10087010264396667, "learning_rate": 6.974262270185105e-06, "loss": 0.002, "step": 79520 }, { "epoch": 1.3013171889061605, "grad_norm": 0.1653912514448166, "learning_rate": 6.973387644652856e-06, "loss": 0.0023, "step": 79530 }, { "epoch": 1.3014808148572363, "grad_norm": 0.08504143357276917, "learning_rate": 6.972512947590047e-06, "loss": 0.0023, "step": 79540 }, { "epoch": 1.301644440808312, "grad_norm": 0.08199617266654968, "learning_rate": 6.971638179028384e-06, "loss": 0.002, "step": 79550 }, { "epoch": 1.301808066759388, "grad_norm": 0.07083486765623093, "learning_rate": 6.970763338999575e-06, "loss": 0.0008, "step": 79560 }, { "epoch": 1.3019716927104639, "grad_norm": 0.09887305647134781, "learning_rate": 6.969888427535334e-06, "loss": 0.0081, "step": 79570 }, { "epoch": 1.3021353186615396, "grad_norm": 0.19988499581813812, "learning_rate": 6.969013444667371e-06, "loss": 0.0013, "step": 79580 }, { "epoch": 1.3022989446126156, "grad_norm": 0.025854721665382385, "learning_rate": 6.968138390427403e-06, "loss": 0.0022, "step": 79590 }, { "epoch": 1.3024625705636914, "grad_norm": 0.2710879147052765, "learning_rate": 6.967263264847148e-06, "loss": 0.0032, "step": 79600 }, { "epoch": 1.3026261965147672, "grad_norm": 0.06273536384105682, "learning_rate": 6.966388067958329e-06, "loss": 0.0019, "step": 79610 }, { "epoch": 1.3027898224658432, "grad_norm": 0.1396106630563736, "learning_rate": 6.965512799792666e-06, "loss": 0.0028, "step": 79620 }, { "epoch": 1.302953448416919, "grad_norm": 0.3284461498260498, "learning_rate": 6.964637460381891e-06, "loss": 0.002, "step": 79630 }, { "epoch": 1.3031170743679947, "grad_norm": 0.24664875864982605, "learning_rate": 6.963762049757729e-06, "loss": 0.0017, "step": 79640 }, { "epoch": 1.3032807003190707, "grad_norm": 0.03524404764175415, "learning_rate": 6.962886567951912e-06, "loss": 0.0014, "step": 79650 }, { "epoch": 1.3034443262701465, "grad_norm": 0.359945684671402, "learning_rate": 6.962011014996175e-06, "loss": 0.0032, "step": 79660 }, { "epoch": 1.3036079522212223, "grad_norm": 0.5091138482093811, "learning_rate": 6.961135390922255e-06, "loss": 0.0035, "step": 79670 }, { "epoch": 1.303771578172298, "grad_norm": 0.19365240633487701, "learning_rate": 6.960259695761889e-06, "loss": 0.0023, "step": 79680 }, { "epoch": 1.303935204123374, "grad_norm": 0.0849161446094513, "learning_rate": 6.959383929546823e-06, "loss": 0.0015, "step": 79690 }, { "epoch": 1.3040988300744498, "grad_norm": 0.13367627561092377, "learning_rate": 6.958508092308796e-06, "loss": 0.0043, "step": 79700 }, { "epoch": 1.3042624560255256, "grad_norm": 0.10489772260189056, "learning_rate": 6.957632184079558e-06, "loss": 0.0026, "step": 79710 }, { "epoch": 1.3044260819766014, "grad_norm": 0.08480428904294968, "learning_rate": 6.95675620489086e-06, "loss": 0.0013, "step": 79720 }, { "epoch": 1.3045897079276774, "grad_norm": 0.18104498088359833, "learning_rate": 6.955880154774451e-06, "loss": 0.0025, "step": 79730 }, { "epoch": 1.3047533338787531, "grad_norm": 0.07868177443742752, "learning_rate": 6.955004033762088e-06, "loss": 0.0016, "step": 79740 }, { "epoch": 1.304916959829829, "grad_norm": 0.09211232513189316, "learning_rate": 6.954127841885528e-06, "loss": 0.0018, "step": 79750 }, { "epoch": 1.305080585780905, "grad_norm": 0.12492793053388596, "learning_rate": 6.95325157917653e-06, "loss": 0.002, "step": 79760 }, { "epoch": 1.3052442117319807, "grad_norm": 0.1060870811343193, "learning_rate": 6.952375245666856e-06, "loss": 0.0023, "step": 79770 }, { "epoch": 1.3054078376830565, "grad_norm": 0.07007330656051636, "learning_rate": 6.951498841388272e-06, "loss": 0.0028, "step": 79780 }, { "epoch": 1.3055714636341325, "grad_norm": 0.2370176464319229, "learning_rate": 6.9506223663725454e-06, "loss": 0.0023, "step": 79790 }, { "epoch": 1.3057350895852082, "grad_norm": 0.04554685950279236, "learning_rate": 6.949745820651447e-06, "loss": 0.0025, "step": 79800 }, { "epoch": 1.305898715536284, "grad_norm": 0.06224415451288223, "learning_rate": 6.948869204256748e-06, "loss": 0.0019, "step": 79810 }, { "epoch": 1.30606234148736, "grad_norm": 0.16847485303878784, "learning_rate": 6.947992517220224e-06, "loss": 0.0023, "step": 79820 }, { "epoch": 1.3062259674384358, "grad_norm": 0.05676880106329918, "learning_rate": 6.947115759573653e-06, "loss": 0.0015, "step": 79830 }, { "epoch": 1.3063895933895115, "grad_norm": 0.11860613524913788, "learning_rate": 6.946238931348816e-06, "loss": 0.0027, "step": 79840 }, { "epoch": 1.3065532193405875, "grad_norm": 0.5313612222671509, "learning_rate": 6.945362032577496e-06, "loss": 0.0013, "step": 79850 }, { "epoch": 1.3067168452916633, "grad_norm": 0.22232674062252045, "learning_rate": 6.9444850632914775e-06, "loss": 0.0015, "step": 79860 }, { "epoch": 1.306880471242739, "grad_norm": 0.048142462968826294, "learning_rate": 6.943608023522549e-06, "loss": 0.0034, "step": 79870 }, { "epoch": 1.3070440971938149, "grad_norm": 0.24687138199806213, "learning_rate": 6.942730913302501e-06, "loss": 0.0026, "step": 79880 }, { "epoch": 1.3072077231448906, "grad_norm": 0.19159138202667236, "learning_rate": 6.941853732663127e-06, "loss": 0.0033, "step": 79890 }, { "epoch": 1.3073713490959666, "grad_norm": 0.08600754290819168, "learning_rate": 6.9409764816362215e-06, "loss": 0.0022, "step": 79900 }, { "epoch": 1.3075349750470424, "grad_norm": 0.1998615264892578, "learning_rate": 6.9400991602535815e-06, "loss": 0.0018, "step": 79910 }, { "epoch": 1.3076986009981182, "grad_norm": 0.10417447239160538, "learning_rate": 6.939221768547013e-06, "loss": 0.0022, "step": 79920 }, { "epoch": 1.3078622269491942, "grad_norm": 0.1524556428194046, "learning_rate": 6.9383443065483155e-06, "loss": 0.0015, "step": 79930 }, { "epoch": 1.30802585290027, "grad_norm": 0.04746182635426521, "learning_rate": 6.937466774289296e-06, "loss": 0.0017, "step": 79940 }, { "epoch": 1.3081894788513457, "grad_norm": 0.2966523766517639, "learning_rate": 6.936589171801761e-06, "loss": 0.0017, "step": 79950 }, { "epoch": 1.3083531048024217, "grad_norm": 0.1536683440208435, "learning_rate": 6.935711499117523e-06, "loss": 0.0017, "step": 79960 }, { "epoch": 1.3085167307534975, "grad_norm": 0.055951982736587524, "learning_rate": 6.934833756268398e-06, "loss": 0.0012, "step": 79970 }, { "epoch": 1.3086803567045733, "grad_norm": 0.16837568581104279, "learning_rate": 6.933955943286197e-06, "loss": 0.0028, "step": 79980 }, { "epoch": 1.3088439826556493, "grad_norm": 0.0725744217634201, "learning_rate": 6.933078060202742e-06, "loss": 0.0038, "step": 79990 }, { "epoch": 1.309007608606725, "grad_norm": 0.07453279197216034, "learning_rate": 6.932200107049854e-06, "loss": 0.0023, "step": 80000 }, { "epoch": 1.3091712345578008, "grad_norm": 0.2862430810928345, "learning_rate": 6.931322083859355e-06, "loss": 0.0057, "step": 80010 }, { "epoch": 1.3093348605088768, "grad_norm": 0.08960889279842377, "learning_rate": 6.930443990663074e-06, "loss": 0.0022, "step": 80020 }, { "epoch": 1.3094984864599526, "grad_norm": 0.24223047494888306, "learning_rate": 6.929565827492837e-06, "loss": 0.002, "step": 80030 }, { "epoch": 1.3096621124110284, "grad_norm": 0.20139755308628082, "learning_rate": 6.928687594380475e-06, "loss": 0.0029, "step": 80040 }, { "epoch": 1.3098257383621044, "grad_norm": 0.04352078586816788, "learning_rate": 6.927809291357823e-06, "loss": 0.0021, "step": 80050 }, { "epoch": 1.3099893643131801, "grad_norm": 0.08494716137647629, "learning_rate": 6.926930918456719e-06, "loss": 0.0043, "step": 80060 }, { "epoch": 1.310152990264256, "grad_norm": 0.25197190046310425, "learning_rate": 6.926052475709002e-06, "loss": 0.0021, "step": 80070 }, { "epoch": 1.3103166162153317, "grad_norm": 0.2362338751554489, "learning_rate": 6.9251739631465095e-06, "loss": 0.0031, "step": 80080 }, { "epoch": 1.3104802421664075, "grad_norm": 0.1120762825012207, "learning_rate": 6.9242953808010896e-06, "loss": 0.0018, "step": 80090 }, { "epoch": 1.3106438681174835, "grad_norm": 0.17322848737239838, "learning_rate": 6.923416728704585e-06, "loss": 0.0013, "step": 80100 }, { "epoch": 1.3108074940685592, "grad_norm": 0.05941097438335419, "learning_rate": 6.922538006888848e-06, "loss": 0.0015, "step": 80110 }, { "epoch": 1.310971120019635, "grad_norm": 0.14604979753494263, "learning_rate": 6.921659215385729e-06, "loss": 0.0019, "step": 80120 }, { "epoch": 1.311134745970711, "grad_norm": 0.08937562257051468, "learning_rate": 6.9207803542270815e-06, "loss": 0.0016, "step": 80130 }, { "epoch": 1.3112983719217868, "grad_norm": 0.1113855242729187, "learning_rate": 6.919901423444764e-06, "loss": 0.0024, "step": 80140 }, { "epoch": 1.3114619978728626, "grad_norm": 0.026293085888028145, "learning_rate": 6.919022423070632e-06, "loss": 0.0013, "step": 80150 }, { "epoch": 1.3116256238239385, "grad_norm": 0.16834822297096252, "learning_rate": 6.91814335313655e-06, "loss": 0.0021, "step": 80160 }, { "epoch": 1.3117892497750143, "grad_norm": 0.17871636152267456, "learning_rate": 6.917264213674384e-06, "loss": 0.0018, "step": 80170 }, { "epoch": 1.31195287572609, "grad_norm": 0.08431093394756317, "learning_rate": 6.916385004715997e-06, "loss": 0.0014, "step": 80180 }, { "epoch": 1.312116501677166, "grad_norm": 0.0246613547205925, "learning_rate": 6.915505726293258e-06, "loss": 0.002, "step": 80190 }, { "epoch": 1.3122801276282419, "grad_norm": 0.045339860022068024, "learning_rate": 6.914626378438041e-06, "loss": 0.0025, "step": 80200 }, { "epoch": 1.3124437535793176, "grad_norm": 0.049601420760154724, "learning_rate": 6.91374696118222e-06, "loss": 0.0019, "step": 80210 }, { "epoch": 1.3126073795303936, "grad_norm": 0.14835095405578613, "learning_rate": 6.912867474557672e-06, "loss": 0.0022, "step": 80220 }, { "epoch": 1.3127710054814694, "grad_norm": 0.029804671183228493, "learning_rate": 6.9119879185962736e-06, "loss": 0.0028, "step": 80230 }, { "epoch": 1.3129346314325452, "grad_norm": 0.12879392504692078, "learning_rate": 6.911108293329909e-06, "loss": 0.0016, "step": 80240 }, { "epoch": 1.313098257383621, "grad_norm": 0.06669005751609802, "learning_rate": 6.910228598790463e-06, "loss": 0.0026, "step": 80250 }, { "epoch": 1.313261883334697, "grad_norm": 0.12902788817882538, "learning_rate": 6.90934883500982e-06, "loss": 0.0018, "step": 80260 }, { "epoch": 1.3134255092857727, "grad_norm": 0.05246992036700249, "learning_rate": 6.908469002019872e-06, "loss": 0.0011, "step": 80270 }, { "epoch": 1.3135891352368485, "grad_norm": 0.12925085425376892, "learning_rate": 6.907589099852508e-06, "loss": 0.0019, "step": 80280 }, { "epoch": 1.3137527611879243, "grad_norm": 0.07201463729143143, "learning_rate": 6.906709128539626e-06, "loss": 0.0012, "step": 80290 }, { "epoch": 1.3139163871390003, "grad_norm": 0.13973310589790344, "learning_rate": 6.905829088113118e-06, "loss": 0.0018, "step": 80300 }, { "epoch": 1.314080013090076, "grad_norm": 0.10512196272611618, "learning_rate": 6.904948978604888e-06, "loss": 0.0018, "step": 80310 }, { "epoch": 1.3142436390411518, "grad_norm": 0.1256009340286255, "learning_rate": 6.904068800046837e-06, "loss": 0.0023, "step": 80320 }, { "epoch": 1.3144072649922278, "grad_norm": 0.13715624809265137, "learning_rate": 6.903188552470867e-06, "loss": 0.0019, "step": 80330 }, { "epoch": 1.3145708909433036, "grad_norm": 0.15711772441864014, "learning_rate": 6.902308235908885e-06, "loss": 0.0022, "step": 80340 }, { "epoch": 1.3147345168943794, "grad_norm": 0.09801596403121948, "learning_rate": 6.901427850392804e-06, "loss": 0.0026, "step": 80350 }, { "epoch": 1.3148981428454554, "grad_norm": 0.14305567741394043, "learning_rate": 6.900547395954531e-06, "loss": 0.0017, "step": 80360 }, { "epoch": 1.3150617687965311, "grad_norm": 0.18697737157344818, "learning_rate": 6.899666872625986e-06, "loss": 0.0026, "step": 80370 }, { "epoch": 1.315225394747607, "grad_norm": 0.06310805678367615, "learning_rate": 6.898786280439079e-06, "loss": 0.0026, "step": 80380 }, { "epoch": 1.315389020698683, "grad_norm": 0.1619984656572342, "learning_rate": 6.897905619425735e-06, "loss": 0.0023, "step": 80390 }, { "epoch": 1.3155526466497587, "grad_norm": 0.05076758936047554, "learning_rate": 6.897024889617873e-06, "loss": 0.0025, "step": 80400 }, { "epoch": 1.3157162726008345, "grad_norm": 0.17684154212474823, "learning_rate": 6.896144091047417e-06, "loss": 0.0037, "step": 80410 }, { "epoch": 1.3158798985519105, "grad_norm": 0.13992469012737274, "learning_rate": 6.895263223746297e-06, "loss": 0.0031, "step": 80420 }, { "epoch": 1.3160435245029862, "grad_norm": 0.0648011714220047, "learning_rate": 6.8943822877464395e-06, "loss": 0.0011, "step": 80430 }, { "epoch": 1.316207150454062, "grad_norm": 0.0791841596364975, "learning_rate": 6.893501283079778e-06, "loss": 0.0016, "step": 80440 }, { "epoch": 1.3163707764051378, "grad_norm": 0.17516595125198364, "learning_rate": 6.892620209778244e-06, "loss": 0.0021, "step": 80450 }, { "epoch": 1.3165344023562138, "grad_norm": 0.041413962841033936, "learning_rate": 6.891739067873777e-06, "loss": 0.0012, "step": 80460 }, { "epoch": 1.3166980283072895, "grad_norm": 0.02108311466872692, "learning_rate": 6.890857857398317e-06, "loss": 0.0019, "step": 80470 }, { "epoch": 1.3168616542583653, "grad_norm": 0.11779098957777023, "learning_rate": 6.889976578383801e-06, "loss": 0.0017, "step": 80480 }, { "epoch": 1.317025280209441, "grad_norm": 0.05293414369225502, "learning_rate": 6.889095230862179e-06, "loss": 0.0016, "step": 80490 }, { "epoch": 1.317188906160517, "grad_norm": 0.07809446007013321, "learning_rate": 6.888213814865394e-06, "loss": 0.0026, "step": 80500 }, { "epoch": 1.3173525321115929, "grad_norm": 0.22116778790950775, "learning_rate": 6.887332330425398e-06, "loss": 0.0026, "step": 80510 }, { "epoch": 1.3175161580626686, "grad_norm": 0.07223636656999588, "learning_rate": 6.886450777574141e-06, "loss": 0.0011, "step": 80520 }, { "epoch": 1.3176797840137446, "grad_norm": 0.4159006178379059, "learning_rate": 6.885569156343577e-06, "loss": 0.0019, "step": 80530 }, { "epoch": 1.3178434099648204, "grad_norm": 0.16960126161575317, "learning_rate": 6.8846874667656614e-06, "loss": 0.0015, "step": 80540 }, { "epoch": 1.3180070359158962, "grad_norm": 0.18952810764312744, "learning_rate": 6.883805708872355e-06, "loss": 0.0021, "step": 80550 }, { "epoch": 1.3181706618669722, "grad_norm": 0.10406055301427841, "learning_rate": 6.8829238826956205e-06, "loss": 0.0038, "step": 80560 }, { "epoch": 1.318334287818048, "grad_norm": 0.05601206421852112, "learning_rate": 6.882041988267421e-06, "loss": 0.0016, "step": 80570 }, { "epoch": 1.3184979137691237, "grad_norm": 0.040606867522001266, "learning_rate": 6.881160025619722e-06, "loss": 0.0022, "step": 80580 }, { "epoch": 1.3186615397201997, "grad_norm": 0.1328790932893753, "learning_rate": 6.880277994784492e-06, "loss": 0.0029, "step": 80590 }, { "epoch": 1.3188251656712755, "grad_norm": 0.061259590089321136, "learning_rate": 6.879395895793706e-06, "loss": 0.0016, "step": 80600 }, { "epoch": 1.3189887916223513, "grad_norm": 0.08032367378473282, "learning_rate": 6.878513728679335e-06, "loss": 0.001, "step": 80610 }, { "epoch": 1.3191524175734273, "grad_norm": 0.1469169408082962, "learning_rate": 6.877631493473356e-06, "loss": 0.002, "step": 80620 }, { "epoch": 1.319316043524503, "grad_norm": 0.033294882625341415, "learning_rate": 6.876749190207749e-06, "loss": 0.0014, "step": 80630 }, { "epoch": 1.3194796694755788, "grad_norm": 0.2902836799621582, "learning_rate": 6.875866818914494e-06, "loss": 0.0021, "step": 80640 }, { "epoch": 1.3196432954266546, "grad_norm": 0.08702889829874039, "learning_rate": 6.874984379625574e-06, "loss": 0.0018, "step": 80650 }, { "epoch": 1.3198069213777306, "grad_norm": 0.1536441296339035, "learning_rate": 6.874101872372978e-06, "loss": 0.0028, "step": 80660 }, { "epoch": 1.3199705473288064, "grad_norm": 0.0942351371049881, "learning_rate": 6.873219297188695e-06, "loss": 0.0024, "step": 80670 }, { "epoch": 1.3201341732798821, "grad_norm": 0.038800500333309174, "learning_rate": 6.872336654104712e-06, "loss": 0.0036, "step": 80680 }, { "epoch": 1.320297799230958, "grad_norm": 0.11567391455173492, "learning_rate": 6.8714539431530255e-06, "loss": 0.0021, "step": 80690 }, { "epoch": 1.320461425182034, "grad_norm": 0.1451907455921173, "learning_rate": 6.870571164365631e-06, "loss": 0.0026, "step": 80700 }, { "epoch": 1.3206250511331097, "grad_norm": 0.09195049852132797, "learning_rate": 6.86968831777453e-06, "loss": 0.0017, "step": 80710 }, { "epoch": 1.3207886770841855, "grad_norm": 0.048784442245960236, "learning_rate": 6.86880540341172e-06, "loss": 0.0018, "step": 80720 }, { "epoch": 1.3209523030352615, "grad_norm": 0.012231039814651012, "learning_rate": 6.867922421309207e-06, "loss": 0.0032, "step": 80730 }, { "epoch": 1.3211159289863372, "grad_norm": 0.15991760790348053, "learning_rate": 6.867039371498993e-06, "loss": 0.0016, "step": 80740 }, { "epoch": 1.321279554937413, "grad_norm": 0.2143750935792923, "learning_rate": 6.866156254013091e-06, "loss": 0.0036, "step": 80750 }, { "epoch": 1.321443180888489, "grad_norm": 0.18175247311592102, "learning_rate": 6.865273068883509e-06, "loss": 0.004, "step": 80760 }, { "epoch": 1.3216068068395648, "grad_norm": 0.1462283432483673, "learning_rate": 6.864389816142262e-06, "loss": 0.0015, "step": 80770 }, { "epoch": 1.3217704327906405, "grad_norm": 0.2172851413488388, "learning_rate": 6.863506495821365e-06, "loss": 0.0016, "step": 80780 }, { "epoch": 1.3219340587417165, "grad_norm": 0.3600696623325348, "learning_rate": 6.862623107952836e-06, "loss": 0.0019, "step": 80790 }, { "epoch": 1.3220976846927923, "grad_norm": 0.11137879639863968, "learning_rate": 6.8617396525686955e-06, "loss": 0.0017, "step": 80800 }, { "epoch": 1.322261310643868, "grad_norm": 0.08805038779973984, "learning_rate": 6.860856129700968e-06, "loss": 0.0022, "step": 80810 }, { "epoch": 1.322424936594944, "grad_norm": 0.08044935762882233, "learning_rate": 6.85997253938168e-06, "loss": 0.0023, "step": 80820 }, { "epoch": 1.3225885625460199, "grad_norm": 0.07507777959108353, "learning_rate": 6.859088881642855e-06, "loss": 0.0018, "step": 80830 }, { "epoch": 1.3227521884970956, "grad_norm": 0.27787670493125916, "learning_rate": 6.858205156516526e-06, "loss": 0.0032, "step": 80840 }, { "epoch": 1.3229158144481714, "grad_norm": 0.04928312078118324, "learning_rate": 6.857321364034727e-06, "loss": 0.0019, "step": 80850 }, { "epoch": 1.3230794403992472, "grad_norm": 0.23112504184246063, "learning_rate": 6.856437504229493e-06, "loss": 0.0028, "step": 80860 }, { "epoch": 1.3232430663503232, "grad_norm": 0.20398372411727905, "learning_rate": 6.855553577132862e-06, "loss": 0.0013, "step": 80870 }, { "epoch": 1.323406692301399, "grad_norm": 0.08332142978906631, "learning_rate": 6.854669582776873e-06, "loss": 0.0014, "step": 80880 }, { "epoch": 1.3235703182524747, "grad_norm": 0.07513080537319183, "learning_rate": 6.853785521193568e-06, "loss": 0.0026, "step": 80890 }, { "epoch": 1.3237339442035507, "grad_norm": 0.07339072972536087, "learning_rate": 6.8529013924149945e-06, "loss": 0.0015, "step": 80900 }, { "epoch": 1.3238975701546265, "grad_norm": 0.22547726333141327, "learning_rate": 6.852017196473197e-06, "loss": 0.0021, "step": 80910 }, { "epoch": 1.3240611961057023, "grad_norm": 0.1536349654197693, "learning_rate": 6.851132933400231e-06, "loss": 0.0014, "step": 80920 }, { "epoch": 1.3242248220567783, "grad_norm": 0.2098226547241211, "learning_rate": 6.850248603228143e-06, "loss": 0.0015, "step": 80930 }, { "epoch": 1.324388448007854, "grad_norm": 0.046367328613996506, "learning_rate": 6.8493642059889886e-06, "loss": 0.0023, "step": 80940 }, { "epoch": 1.3245520739589298, "grad_norm": 0.17950865626335144, "learning_rate": 6.848479741714829e-06, "loss": 0.0021, "step": 80950 }, { "epoch": 1.3247156999100058, "grad_norm": 0.19667834043502808, "learning_rate": 6.847595210437721e-06, "loss": 0.0019, "step": 80960 }, { "epoch": 1.3248793258610816, "grad_norm": 0.08681071549654007, "learning_rate": 6.846710612189726e-06, "loss": 0.0012, "step": 80970 }, { "epoch": 1.3250429518121574, "grad_norm": 0.14749613404273987, "learning_rate": 6.845825947002911e-06, "loss": 0.0023, "step": 80980 }, { "epoch": 1.3252065777632334, "grad_norm": 0.13464826345443726, "learning_rate": 6.844941214909341e-06, "loss": 0.0016, "step": 80990 }, { "epoch": 1.3253702037143091, "grad_norm": 0.05621561035513878, "learning_rate": 6.844056415941086e-06, "loss": 0.0015, "step": 81000 }, { "epoch": 1.325533829665385, "grad_norm": 0.040989041328430176, "learning_rate": 6.843171550130219e-06, "loss": 0.002, "step": 81010 }, { "epoch": 1.325697455616461, "grad_norm": 0.07023930549621582, "learning_rate": 6.842286617508813e-06, "loss": 0.0039, "step": 81020 }, { "epoch": 1.3258610815675367, "grad_norm": 0.09771239757537842, "learning_rate": 6.8414016181089445e-06, "loss": 0.0027, "step": 81030 }, { "epoch": 1.3260247075186125, "grad_norm": 0.34062427282333374, "learning_rate": 6.840516551962692e-06, "loss": 0.0046, "step": 81040 }, { "epoch": 1.3261883334696882, "grad_norm": 0.07065514475107193, "learning_rate": 6.83963141910214e-06, "loss": 0.0023, "step": 81050 }, { "epoch": 1.326351959420764, "grad_norm": 0.09008131921291351, "learning_rate": 6.838746219559369e-06, "loss": 0.0014, "step": 81060 }, { "epoch": 1.32651558537184, "grad_norm": 0.028675584122538567, "learning_rate": 6.837860953366468e-06, "loss": 0.0019, "step": 81070 }, { "epoch": 1.3266792113229158, "grad_norm": 0.27796679735183716, "learning_rate": 6.836975620555525e-06, "loss": 0.0017, "step": 81080 }, { "epoch": 1.3268428372739915, "grad_norm": 0.1774597316980362, "learning_rate": 6.83609022115863e-06, "loss": 0.0026, "step": 81090 }, { "epoch": 1.3270064632250675, "grad_norm": 0.15093399584293365, "learning_rate": 6.835204755207877e-06, "loss": 0.0034, "step": 81100 }, { "epoch": 1.3271700891761433, "grad_norm": 0.17880648374557495, "learning_rate": 6.834319222735363e-06, "loss": 0.0014, "step": 81110 }, { "epoch": 1.327333715127219, "grad_norm": 0.08786498755216599, "learning_rate": 6.833433623773185e-06, "loss": 0.0011, "step": 81120 }, { "epoch": 1.327497341078295, "grad_norm": 0.17943353950977325, "learning_rate": 6.832547958353446e-06, "loss": 0.0018, "step": 81130 }, { "epoch": 1.3276609670293709, "grad_norm": 0.08343411982059479, "learning_rate": 6.831662226508247e-06, "loss": 0.0024, "step": 81140 }, { "epoch": 1.3278245929804466, "grad_norm": 0.16493742167949677, "learning_rate": 6.830776428269696e-06, "loss": 0.0011, "step": 81150 }, { "epoch": 1.3279882189315226, "grad_norm": 0.11954963207244873, "learning_rate": 6.829890563669897e-06, "loss": 0.0015, "step": 81160 }, { "epoch": 1.3281518448825984, "grad_norm": 0.028271540999412537, "learning_rate": 6.829004632740967e-06, "loss": 0.0027, "step": 81170 }, { "epoch": 1.3283154708336742, "grad_norm": 0.14057444036006927, "learning_rate": 6.828118635515011e-06, "loss": 0.0019, "step": 81180 }, { "epoch": 1.3284790967847502, "grad_norm": 0.012790342792868614, "learning_rate": 6.827232572024149e-06, "loss": 0.0019, "step": 81190 }, { "epoch": 1.328642722735826, "grad_norm": 0.11902596056461334, "learning_rate": 6.826346442300499e-06, "loss": 0.0018, "step": 81200 }, { "epoch": 1.3288063486869017, "grad_norm": 0.07261660695075989, "learning_rate": 6.82546024637618e-06, "loss": 0.002, "step": 81210 }, { "epoch": 1.3289699746379775, "grad_norm": 0.024522479623556137, "learning_rate": 6.824573984283315e-06, "loss": 0.0017, "step": 81220 }, { "epoch": 1.3291336005890535, "grad_norm": 0.08588574826717377, "learning_rate": 6.823687656054027e-06, "loss": 0.0015, "step": 81230 }, { "epoch": 1.3292972265401293, "grad_norm": 0.2700470983982086, "learning_rate": 6.822801261720444e-06, "loss": 0.0031, "step": 81240 }, { "epoch": 1.329460852491205, "grad_norm": 0.2070290446281433, "learning_rate": 6.821914801314698e-06, "loss": 0.0022, "step": 81250 }, { "epoch": 1.3296244784422808, "grad_norm": 0.05021479353308678, "learning_rate": 6.821028274868917e-06, "loss": 0.0029, "step": 81260 }, { "epoch": 1.3297881043933568, "grad_norm": 0.0749955102801323, "learning_rate": 6.820141682415241e-06, "loss": 0.0017, "step": 81270 }, { "epoch": 1.3299517303444326, "grad_norm": 0.3230012357234955, "learning_rate": 6.8192550239858024e-06, "loss": 0.003, "step": 81280 }, { "epoch": 1.3301153562955084, "grad_norm": 0.185124933719635, "learning_rate": 6.818368299612741e-06, "loss": 0.0044, "step": 81290 }, { "epoch": 1.3302789822465844, "grad_norm": 0.07615358382463455, "learning_rate": 6.817481509328199e-06, "loss": 0.0015, "step": 81300 }, { "epoch": 1.3304426081976601, "grad_norm": 0.11890163272619247, "learning_rate": 6.81659465316432e-06, "loss": 0.0022, "step": 81310 }, { "epoch": 1.330606234148736, "grad_norm": 0.09652549028396606, "learning_rate": 6.815707731153252e-06, "loss": 0.0019, "step": 81320 }, { "epoch": 1.330769860099812, "grad_norm": 0.011425946839153767, "learning_rate": 6.814820743327142e-06, "loss": 0.0022, "step": 81330 }, { "epoch": 1.3309334860508877, "grad_norm": 0.16947855055332184, "learning_rate": 6.8139336897181414e-06, "loss": 0.0015, "step": 81340 }, { "epoch": 1.3310971120019635, "grad_norm": 0.21875901520252228, "learning_rate": 6.813046570358406e-06, "loss": 0.0019, "step": 81350 }, { "epoch": 1.3312607379530395, "grad_norm": 0.3321792781352997, "learning_rate": 6.8121593852800885e-06, "loss": 0.0021, "step": 81360 }, { "epoch": 1.3314243639041152, "grad_norm": 0.12055980414152145, "learning_rate": 6.8112721345153495e-06, "loss": 0.0024, "step": 81370 }, { "epoch": 1.331587989855191, "grad_norm": 0.08639483153820038, "learning_rate": 6.81038481809635e-06, "loss": 0.0017, "step": 81380 }, { "epoch": 1.331751615806267, "grad_norm": 0.18138200044631958, "learning_rate": 6.8094974360552505e-06, "loss": 0.0021, "step": 81390 }, { "epoch": 1.3319152417573428, "grad_norm": 0.09246304631233215, "learning_rate": 6.808609988424217e-06, "loss": 0.0015, "step": 81400 }, { "epoch": 1.3320788677084185, "grad_norm": 0.01652436889708042, "learning_rate": 6.807722475235419e-06, "loss": 0.0021, "step": 81410 }, { "epoch": 1.3322424936594943, "grad_norm": 0.10271818935871124, "learning_rate": 6.806834896521027e-06, "loss": 0.0011, "step": 81420 }, { "epoch": 1.3324061196105703, "grad_norm": 0.14635935425758362, "learning_rate": 6.805947252313212e-06, "loss": 0.0015, "step": 81430 }, { "epoch": 1.332569745561646, "grad_norm": 0.015574569813907146, "learning_rate": 6.805059542644149e-06, "loss": 0.001, "step": 81440 }, { "epoch": 1.3327333715127219, "grad_norm": 0.20389361679553986, "learning_rate": 6.8041717675460175e-06, "loss": 0.002, "step": 81450 }, { "epoch": 1.3328969974637976, "grad_norm": 0.03448345139622688, "learning_rate": 6.803283927050994e-06, "loss": 0.0015, "step": 81460 }, { "epoch": 1.3330606234148736, "grad_norm": 0.164043590426445, "learning_rate": 6.802396021191262e-06, "loss": 0.0021, "step": 81470 }, { "epoch": 1.3332242493659494, "grad_norm": 0.0645124539732933, "learning_rate": 6.8015080499990075e-06, "loss": 0.0019, "step": 81480 }, { "epoch": 1.3333878753170252, "grad_norm": 0.03765098378062248, "learning_rate": 6.800620013506416e-06, "loss": 0.0013, "step": 81490 }, { "epoch": 1.3335515012681012, "grad_norm": 0.044347334653139114, "learning_rate": 6.799731911745675e-06, "loss": 0.0025, "step": 81500 }, { "epoch": 1.333715127219177, "grad_norm": 0.25207430124282837, "learning_rate": 6.79884374474898e-06, "loss": 0.0035, "step": 81510 }, { "epoch": 1.3338787531702527, "grad_norm": 0.0751473531126976, "learning_rate": 6.797955512548521e-06, "loss": 0.003, "step": 81520 }, { "epoch": 1.3340423791213287, "grad_norm": 0.1638592928647995, "learning_rate": 6.797067215176497e-06, "loss": 0.0022, "step": 81530 }, { "epoch": 1.3342060050724045, "grad_norm": 0.0020773319993168116, "learning_rate": 6.796178852665104e-06, "loss": 0.0022, "step": 81540 }, { "epoch": 1.3343696310234803, "grad_norm": 0.16474537551403046, "learning_rate": 6.795290425046546e-06, "loss": 0.002, "step": 81550 }, { "epoch": 1.3345332569745563, "grad_norm": 0.20047356188297272, "learning_rate": 6.794401932353025e-06, "loss": 0.0018, "step": 81560 }, { "epoch": 1.334696882925632, "grad_norm": 0.10200228542089462, "learning_rate": 6.793513374616746e-06, "loss": 0.0028, "step": 81570 }, { "epoch": 1.3348605088767078, "grad_norm": 0.19652168452739716, "learning_rate": 6.792624751869919e-06, "loss": 0.0021, "step": 81580 }, { "epoch": 1.3350241348277838, "grad_norm": 0.15968818962574005, "learning_rate": 6.791736064144753e-06, "loss": 0.0015, "step": 81590 }, { "epoch": 1.3351877607788596, "grad_norm": 0.19664816558361053, "learning_rate": 6.79084731147346e-06, "loss": 0.0028, "step": 81600 }, { "epoch": 1.3353513867299354, "grad_norm": 0.5528716444969177, "learning_rate": 6.789958493888256e-06, "loss": 0.0027, "step": 81610 }, { "epoch": 1.3355150126810111, "grad_norm": 0.03361961990594864, "learning_rate": 6.789069611421358e-06, "loss": 0.0014, "step": 81620 }, { "epoch": 1.335678638632087, "grad_norm": 0.26299917697906494, "learning_rate": 6.788180664104989e-06, "loss": 0.003, "step": 81630 }, { "epoch": 1.335842264583163, "grad_norm": 0.07342177629470825, "learning_rate": 6.787291651971367e-06, "loss": 0.0016, "step": 81640 }, { "epoch": 1.3360058905342387, "grad_norm": 0.08538855612277985, "learning_rate": 6.786402575052718e-06, "loss": 0.0021, "step": 81650 }, { "epoch": 1.3361695164853145, "grad_norm": 0.09256561845541, "learning_rate": 6.78551343338127e-06, "loss": 0.004, "step": 81660 }, { "epoch": 1.3363331424363905, "grad_norm": 0.0368439182639122, "learning_rate": 6.784624226989251e-06, "loss": 0.0018, "step": 81670 }, { "epoch": 1.3364967683874662, "grad_norm": 0.0727786123752594, "learning_rate": 6.783734955908894e-06, "loss": 0.0011, "step": 81680 }, { "epoch": 1.336660394338542, "grad_norm": 0.04618093743920326, "learning_rate": 6.78284562017243e-06, "loss": 0.0029, "step": 81690 }, { "epoch": 1.336824020289618, "grad_norm": 0.05337835103273392, "learning_rate": 6.781956219812098e-06, "loss": 0.0011, "step": 81700 }, { "epoch": 1.3369876462406938, "grad_norm": 0.09037453681230545, "learning_rate": 6.781066754860137e-06, "loss": 0.0023, "step": 81710 }, { "epoch": 1.3371512721917695, "grad_norm": 0.29447680711746216, "learning_rate": 6.780177225348785e-06, "loss": 0.002, "step": 81720 }, { "epoch": 1.3373148981428455, "grad_norm": 0.39623990654945374, "learning_rate": 6.7792876313102895e-06, "loss": 0.0022, "step": 81730 }, { "epoch": 1.3374785240939213, "grad_norm": 0.2530408501625061, "learning_rate": 6.778397972776892e-06, "loss": 0.0041, "step": 81740 }, { "epoch": 1.337642150044997, "grad_norm": 0.29657208919525146, "learning_rate": 6.777508249780843e-06, "loss": 0.0024, "step": 81750 }, { "epoch": 1.337805775996073, "grad_norm": 0.28433340787887573, "learning_rate": 6.776618462354391e-06, "loss": 0.0019, "step": 81760 }, { "epoch": 1.3379694019471489, "grad_norm": 0.08459485322237015, "learning_rate": 6.775728610529791e-06, "loss": 0.0027, "step": 81770 }, { "epoch": 1.3381330278982246, "grad_norm": 0.012726878747344017, "learning_rate": 6.774838694339297e-06, "loss": 0.0019, "step": 81780 }, { "epoch": 1.3382966538493006, "grad_norm": 0.08615007251501083, "learning_rate": 6.773948713815164e-06, "loss": 0.0012, "step": 81790 }, { "epoch": 1.3384602798003764, "grad_norm": 0.030178209766745567, "learning_rate": 6.773058668989655e-06, "loss": 0.0016, "step": 81800 }, { "epoch": 1.3386239057514522, "grad_norm": 0.08457730710506439, "learning_rate": 6.77216855989503e-06, "loss": 0.0011, "step": 81810 }, { "epoch": 1.338787531702528, "grad_norm": 0.34581729769706726, "learning_rate": 6.771278386563556e-06, "loss": 0.0033, "step": 81820 }, { "epoch": 1.3389511576536037, "grad_norm": 0.06941339373588562, "learning_rate": 6.770388149027496e-06, "loss": 0.0022, "step": 81830 }, { "epoch": 1.3391147836046797, "grad_norm": 0.0813087597489357, "learning_rate": 6.76949784731912e-06, "loss": 0.002, "step": 81840 }, { "epoch": 1.3392784095557555, "grad_norm": 0.0886281281709671, "learning_rate": 6.7686074814707015e-06, "loss": 0.001, "step": 81850 }, { "epoch": 1.3394420355068313, "grad_norm": 0.12007439136505127, "learning_rate": 6.76771705151451e-06, "loss": 0.0018, "step": 81860 }, { "epoch": 1.3396056614579073, "grad_norm": 0.018145669251680374, "learning_rate": 6.766826557482826e-06, "loss": 0.0017, "step": 81870 }, { "epoch": 1.339769287408983, "grad_norm": 0.20330819487571716, "learning_rate": 6.765935999407927e-06, "loss": 0.0016, "step": 81880 }, { "epoch": 1.3399329133600588, "grad_norm": 0.018880534917116165, "learning_rate": 6.765045377322091e-06, "loss": 0.0025, "step": 81890 }, { "epoch": 1.3400965393111348, "grad_norm": 0.039413075894117355, "learning_rate": 6.764154691257601e-06, "loss": 0.0011, "step": 81900 }, { "epoch": 1.3402601652622106, "grad_norm": 0.08342961221933365, "learning_rate": 6.763263941246745e-06, "loss": 0.001, "step": 81910 }, { "epoch": 1.3404237912132864, "grad_norm": 0.17808179557323456, "learning_rate": 6.7623731273218084e-06, "loss": 0.0035, "step": 81920 }, { "epoch": 1.3405874171643624, "grad_norm": 0.355609267950058, "learning_rate": 6.761482249515082e-06, "loss": 0.0031, "step": 81930 }, { "epoch": 1.3407510431154381, "grad_norm": 0.1452360302209854, "learning_rate": 6.760591307858858e-06, "loss": 0.0039, "step": 81940 }, { "epoch": 1.340914669066514, "grad_norm": 0.12057671695947647, "learning_rate": 6.75970030238543e-06, "loss": 0.0021, "step": 81950 }, { "epoch": 1.34107829501759, "grad_norm": 0.08325853943824768, "learning_rate": 6.758809233127095e-06, "loss": 0.0033, "step": 81960 }, { "epoch": 1.3412419209686657, "grad_norm": 0.3102765381336212, "learning_rate": 6.757918100116153e-06, "loss": 0.0031, "step": 81970 }, { "epoch": 1.3414055469197415, "grad_norm": 0.09912633895874023, "learning_rate": 6.757026903384906e-06, "loss": 0.0015, "step": 81980 }, { "epoch": 1.3415691728708175, "grad_norm": 0.021710427477955818, "learning_rate": 6.756135642965655e-06, "loss": 0.0021, "step": 81990 }, { "epoch": 1.3417327988218932, "grad_norm": 0.08131515979766846, "learning_rate": 6.755244318890708e-06, "loss": 0.0031, "step": 82000 }, { "epoch": 1.341896424772969, "grad_norm": 0.45112860202789307, "learning_rate": 6.754352931192373e-06, "loss": 0.0019, "step": 82010 }, { "epoch": 1.3420600507240448, "grad_norm": 0.19981126487255096, "learning_rate": 6.75346147990296e-06, "loss": 0.0024, "step": 82020 }, { "epoch": 1.3422236766751205, "grad_norm": 0.09313564002513885, "learning_rate": 6.752569965054786e-06, "loss": 0.0011, "step": 82030 }, { "epoch": 1.3423873026261965, "grad_norm": 0.22114932537078857, "learning_rate": 6.751678386680159e-06, "loss": 0.0035, "step": 82040 }, { "epoch": 1.3425509285772723, "grad_norm": 0.1432260274887085, "learning_rate": 6.7507867448114025e-06, "loss": 0.003, "step": 82050 }, { "epoch": 1.342714554528348, "grad_norm": 0.04274013265967369, "learning_rate": 6.749895039480834e-06, "loss": 0.0019, "step": 82060 }, { "epoch": 1.342878180479424, "grad_norm": 0.14999033510684967, "learning_rate": 6.749003270720776e-06, "loss": 0.0022, "step": 82070 }, { "epoch": 1.3430418064304999, "grad_norm": 0.09435473382472992, "learning_rate": 6.748111438563554e-06, "loss": 0.0013, "step": 82080 }, { "epoch": 1.3432054323815756, "grad_norm": 0.1602046638727188, "learning_rate": 6.747219543041493e-06, "loss": 0.0056, "step": 82090 }, { "epoch": 1.3433690583326516, "grad_norm": 0.1247342899441719, "learning_rate": 6.7463275841869234e-06, "loss": 0.0019, "step": 82100 }, { "epoch": 1.3435326842837274, "grad_norm": 0.07622984796762466, "learning_rate": 6.745435562032175e-06, "loss": 0.002, "step": 82110 }, { "epoch": 1.3436963102348032, "grad_norm": 0.06252642720937729, "learning_rate": 6.744543476609582e-06, "loss": 0.0029, "step": 82120 }, { "epoch": 1.3438599361858792, "grad_norm": 0.08780164271593094, "learning_rate": 6.743651327951484e-06, "loss": 0.0029, "step": 82130 }, { "epoch": 1.344023562136955, "grad_norm": 0.10929341614246368, "learning_rate": 6.742759116090214e-06, "loss": 0.0014, "step": 82140 }, { "epoch": 1.3441871880880307, "grad_norm": 0.16372643411159515, "learning_rate": 6.741866841058114e-06, "loss": 0.0039, "step": 82150 }, { "epoch": 1.3443508140391067, "grad_norm": 0.2781262695789337, "learning_rate": 6.740974502887528e-06, "loss": 0.0016, "step": 82160 }, { "epoch": 1.3445144399901825, "grad_norm": 0.20046466588974, "learning_rate": 6.740082101610801e-06, "loss": 0.0027, "step": 82170 }, { "epoch": 1.3446780659412583, "grad_norm": 0.07419824600219727, "learning_rate": 6.739189637260279e-06, "loss": 0.0019, "step": 82180 }, { "epoch": 1.344841691892334, "grad_norm": 0.02324909158051014, "learning_rate": 6.7382971098683146e-06, "loss": 0.0028, "step": 82190 }, { "epoch": 1.34500531784341, "grad_norm": 0.003428959520533681, "learning_rate": 6.7374045194672565e-06, "loss": 0.0025, "step": 82200 }, { "epoch": 1.3451689437944858, "grad_norm": 0.04728710278868675, "learning_rate": 6.736511866089461e-06, "loss": 0.0016, "step": 82210 }, { "epoch": 1.3453325697455616, "grad_norm": 0.16150616109371185, "learning_rate": 6.7356191497672825e-06, "loss": 0.0026, "step": 82220 }, { "epoch": 1.3454961956966374, "grad_norm": 0.19331979751586914, "learning_rate": 6.734726370533084e-06, "loss": 0.007, "step": 82230 }, { "epoch": 1.3456598216477134, "grad_norm": 0.10543633252382278, "learning_rate": 6.733833528419222e-06, "loss": 0.0026, "step": 82240 }, { "epoch": 1.3458234475987891, "grad_norm": 0.04915414750576019, "learning_rate": 6.732940623458061e-06, "loss": 0.0024, "step": 82250 }, { "epoch": 1.345987073549865, "grad_norm": 0.12134712189435959, "learning_rate": 6.732047655681968e-06, "loss": 0.0015, "step": 82260 }, { "epoch": 1.346150699500941, "grad_norm": 0.03422749787569046, "learning_rate": 6.73115462512331e-06, "loss": 0.0019, "step": 82270 }, { "epoch": 1.3463143254520167, "grad_norm": 0.14769293367862701, "learning_rate": 6.73026153181446e-06, "loss": 0.0019, "step": 82280 }, { "epoch": 1.3464779514030925, "grad_norm": 0.08648399263620377, "learning_rate": 6.729368375787786e-06, "loss": 0.0014, "step": 82290 }, { "epoch": 1.3466415773541685, "grad_norm": 0.0378507636487484, "learning_rate": 6.728475157075666e-06, "loss": 0.003, "step": 82300 }, { "epoch": 1.3468052033052442, "grad_norm": 0.04808375984430313, "learning_rate": 6.727581875710474e-06, "loss": 0.0035, "step": 82310 }, { "epoch": 1.34696882925632, "grad_norm": 0.08152385801076889, "learning_rate": 6.726688531724592e-06, "loss": 0.0026, "step": 82320 }, { "epoch": 1.347132455207396, "grad_norm": 0.14797881245613098, "learning_rate": 6.725795125150401e-06, "loss": 0.0037, "step": 82330 }, { "epoch": 1.3472960811584718, "grad_norm": 0.11176542192697525, "learning_rate": 6.724901656020284e-06, "loss": 0.0023, "step": 82340 }, { "epoch": 1.3474597071095475, "grad_norm": 0.08064939081668854, "learning_rate": 6.724008124366628e-06, "loss": 0.0022, "step": 82350 }, { "epoch": 1.3476233330606235, "grad_norm": 0.11147861927747726, "learning_rate": 6.723114530221822e-06, "loss": 0.0016, "step": 82360 }, { "epoch": 1.3477869590116993, "grad_norm": 0.09922857582569122, "learning_rate": 6.722220873618254e-06, "loss": 0.0021, "step": 82370 }, { "epoch": 1.347950584962775, "grad_norm": 0.08916383236646652, "learning_rate": 6.72132715458832e-06, "loss": 0.0018, "step": 82380 }, { "epoch": 1.3481142109138509, "grad_norm": 0.17162224650382996, "learning_rate": 6.720433373164412e-06, "loss": 0.0035, "step": 82390 }, { "epoch": 1.3482778368649269, "grad_norm": 0.11682993918657303, "learning_rate": 6.71953952937893e-06, "loss": 0.0024, "step": 82400 }, { "epoch": 1.3484414628160026, "grad_norm": 0.05537468567490578, "learning_rate": 6.718645623264273e-06, "loss": 0.002, "step": 82410 }, { "epoch": 1.3486050887670784, "grad_norm": 0.09403576701879501, "learning_rate": 6.717751654852843e-06, "loss": 0.0016, "step": 82420 }, { "epoch": 1.3487687147181542, "grad_norm": 0.19460827112197876, "learning_rate": 6.716857624177045e-06, "loss": 0.0032, "step": 82430 }, { "epoch": 1.3489323406692302, "grad_norm": 0.26909196376800537, "learning_rate": 6.7159635312692825e-06, "loss": 0.0012, "step": 82440 }, { "epoch": 1.349095966620306, "grad_norm": 0.10852129012346268, "learning_rate": 6.715069376161968e-06, "loss": 0.002, "step": 82450 }, { "epoch": 1.3492595925713817, "grad_norm": 0.29933688044548035, "learning_rate": 6.71417515888751e-06, "loss": 0.0031, "step": 82460 }, { "epoch": 1.3494232185224577, "grad_norm": 0.16139064729213715, "learning_rate": 6.713280879478321e-06, "loss": 0.0021, "step": 82470 }, { "epoch": 1.3495868444735335, "grad_norm": 0.07797890901565552, "learning_rate": 6.7123865379668196e-06, "loss": 0.0015, "step": 82480 }, { "epoch": 1.3497504704246093, "grad_norm": 0.1679505854845047, "learning_rate": 6.711492134385421e-06, "loss": 0.0022, "step": 82490 }, { "epoch": 1.3499140963756853, "grad_norm": 0.03788674622774124, "learning_rate": 6.710597668766546e-06, "loss": 0.0025, "step": 82500 }, { "epoch": 1.350077722326761, "grad_norm": 0.2341817021369934, "learning_rate": 6.709703141142618e-06, "loss": 0.0016, "step": 82510 }, { "epoch": 1.3502413482778368, "grad_norm": 0.04366068169474602, "learning_rate": 6.708808551546059e-06, "loss": 0.0026, "step": 82520 }, { "epoch": 1.3504049742289128, "grad_norm": 0.08851949870586395, "learning_rate": 6.707913900009298e-06, "loss": 0.0028, "step": 82530 }, { "epoch": 1.3505686001799886, "grad_norm": 0.4423024356365204, "learning_rate": 6.707019186564761e-06, "loss": 0.0046, "step": 82540 }, { "epoch": 1.3507322261310644, "grad_norm": 0.05465300753712654, "learning_rate": 6.706124411244883e-06, "loss": 0.0021, "step": 82550 }, { "epoch": 1.3508958520821404, "grad_norm": 0.14524762332439423, "learning_rate": 6.705229574082095e-06, "loss": 0.0013, "step": 82560 }, { "epoch": 1.3510594780332161, "grad_norm": 0.0880657285451889, "learning_rate": 6.7043346751088344e-06, "loss": 0.0028, "step": 82570 }, { "epoch": 1.351223103984292, "grad_norm": 0.08138912171125412, "learning_rate": 6.7034397143575384e-06, "loss": 0.0028, "step": 82580 }, { "epoch": 1.3513867299353677, "grad_norm": 0.13508175313472748, "learning_rate": 6.702544691860645e-06, "loss": 0.002, "step": 82590 }, { "epoch": 1.3515503558864435, "grad_norm": 0.013995610177516937, "learning_rate": 6.7016496076506e-06, "loss": 0.0025, "step": 82600 }, { "epoch": 1.3517139818375195, "grad_norm": 0.4418068528175354, "learning_rate": 6.700754461759844e-06, "loss": 0.0022, "step": 82610 }, { "epoch": 1.3518776077885952, "grad_norm": 0.02073877491056919, "learning_rate": 6.699859254220829e-06, "loss": 0.0031, "step": 82620 }, { "epoch": 1.352041233739671, "grad_norm": 0.06183016300201416, "learning_rate": 6.698963985066001e-06, "loss": 0.0011, "step": 82630 }, { "epoch": 1.352204859690747, "grad_norm": 0.0949455201625824, "learning_rate": 6.698068654327811e-06, "loss": 0.003, "step": 82640 }, { "epoch": 1.3523684856418228, "grad_norm": 0.15206284821033478, "learning_rate": 6.697173262038714e-06, "loss": 0.0027, "step": 82650 }, { "epoch": 1.3525321115928985, "grad_norm": 0.1209939643740654, "learning_rate": 6.696277808231166e-06, "loss": 0.0021, "step": 82660 }, { "epoch": 1.3526957375439745, "grad_norm": 0.10925428569316864, "learning_rate": 6.695382292937623e-06, "loss": 0.0015, "step": 82670 }, { "epoch": 1.3528593634950503, "grad_norm": 0.20161554217338562, "learning_rate": 6.694486716190548e-06, "loss": 0.002, "step": 82680 }, { "epoch": 1.353022989446126, "grad_norm": 0.0996408611536026, "learning_rate": 6.693591078022401e-06, "loss": 0.0062, "step": 82690 }, { "epoch": 1.353186615397202, "grad_norm": 0.020687879994511604, "learning_rate": 6.692695378465648e-06, "loss": 0.0043, "step": 82700 }, { "epoch": 1.3533502413482779, "grad_norm": 0.48303523659706116, "learning_rate": 6.6917996175527566e-06, "loss": 0.002, "step": 82710 }, { "epoch": 1.3535138672993536, "grad_norm": 0.048049282282590866, "learning_rate": 6.690903795316195e-06, "loss": 0.002, "step": 82720 }, { "epoch": 1.3536774932504296, "grad_norm": 0.05348822847008705, "learning_rate": 6.690007911788435e-06, "loss": 0.0035, "step": 82730 }, { "epoch": 1.3538411192015054, "grad_norm": 0.1339612603187561, "learning_rate": 6.68911196700195e-06, "loss": 0.002, "step": 82740 }, { "epoch": 1.3540047451525812, "grad_norm": 0.10389325022697449, "learning_rate": 6.688215960989214e-06, "loss": 0.0025, "step": 82750 }, { "epoch": 1.3541683711036572, "grad_norm": 0.2246437817811966, "learning_rate": 6.687319893782709e-06, "loss": 0.0023, "step": 82760 }, { "epoch": 1.354331997054733, "grad_norm": 0.04397423937916756, "learning_rate": 6.686423765414912e-06, "loss": 0.0015, "step": 82770 }, { "epoch": 1.3544956230058087, "grad_norm": 0.2279076874256134, "learning_rate": 6.685527575918308e-06, "loss": 0.0019, "step": 82780 }, { "epoch": 1.3546592489568845, "grad_norm": 0.2660253643989563, "learning_rate": 6.6846313253253804e-06, "loss": 0.0018, "step": 82790 }, { "epoch": 1.3548228749079603, "grad_norm": 0.0699646845459938, "learning_rate": 6.6837350136686155e-06, "loss": 0.0012, "step": 82800 }, { "epoch": 1.3549865008590363, "grad_norm": 0.09754594415426254, "learning_rate": 6.682838640980503e-06, "loss": 0.0031, "step": 82810 }, { "epoch": 1.355150126810112, "grad_norm": 0.08128782361745834, "learning_rate": 6.681942207293535e-06, "loss": 0.0024, "step": 82820 }, { "epoch": 1.3553137527611878, "grad_norm": 0.011833651922643185, "learning_rate": 6.681045712640203e-06, "loss": 0.0035, "step": 82830 }, { "epoch": 1.3554773787122638, "grad_norm": 0.07097772508859634, "learning_rate": 6.680149157053005e-06, "loss": 0.0017, "step": 82840 }, { "epoch": 1.3556410046633396, "grad_norm": 0.08166410773992538, "learning_rate": 6.679252540564438e-06, "loss": 0.0018, "step": 82850 }, { "epoch": 1.3558046306144154, "grad_norm": 0.09992627054452896, "learning_rate": 6.678355863207003e-06, "loss": 0.0021, "step": 82860 }, { "epoch": 1.3559682565654914, "grad_norm": 0.0964866578578949, "learning_rate": 6.677459125013202e-06, "loss": 0.0014, "step": 82870 }, { "epoch": 1.3561318825165671, "grad_norm": 0.0698394924402237, "learning_rate": 6.6765623260155385e-06, "loss": 0.0017, "step": 82880 }, { "epoch": 1.356295508467643, "grad_norm": 0.14303836226463318, "learning_rate": 6.675665466246521e-06, "loss": 0.0019, "step": 82890 }, { "epoch": 1.356459134418719, "grad_norm": 0.13235551118850708, "learning_rate": 6.6747685457386565e-06, "loss": 0.0023, "step": 82900 }, { "epoch": 1.3566227603697947, "grad_norm": 0.2126377820968628, "learning_rate": 6.673871564524458e-06, "loss": 0.0036, "step": 82910 }, { "epoch": 1.3567863863208705, "grad_norm": 0.11085109412670135, "learning_rate": 6.6729745226364385e-06, "loss": 0.0013, "step": 82920 }, { "epoch": 1.3569500122719464, "grad_norm": 0.18328532576560974, "learning_rate": 6.672077420107115e-06, "loss": 0.0017, "step": 82930 }, { "epoch": 1.3571136382230222, "grad_norm": 0.06743686646223068, "learning_rate": 6.671180256969001e-06, "loss": 0.0019, "step": 82940 }, { "epoch": 1.357277264174098, "grad_norm": 0.09188753366470337, "learning_rate": 6.670283033254621e-06, "loss": 0.0018, "step": 82950 }, { "epoch": 1.3574408901251738, "grad_norm": 0.18873248994350433, "learning_rate": 6.669385748996495e-06, "loss": 0.0017, "step": 82960 }, { "epoch": 1.3576045160762498, "grad_norm": 0.15171048045158386, "learning_rate": 6.668488404227147e-06, "loss": 0.0016, "step": 82970 }, { "epoch": 1.3577681420273255, "grad_norm": 0.40069663524627686, "learning_rate": 6.667590998979106e-06, "loss": 0.0023, "step": 82980 }, { "epoch": 1.3579317679784013, "grad_norm": 0.09955428540706635, "learning_rate": 6.666693533284898e-06, "loss": 0.0029, "step": 82990 }, { "epoch": 1.358095393929477, "grad_norm": 0.3799716532230377, "learning_rate": 6.6657960071770555e-06, "loss": 0.0044, "step": 83000 }, { "epoch": 1.358259019880553, "grad_norm": 0.04664906486868858, "learning_rate": 6.664898420688112e-06, "loss": 0.0015, "step": 83010 }, { "epoch": 1.3584226458316289, "grad_norm": 0.011510207317769527, "learning_rate": 6.664000773850602e-06, "loss": 0.0023, "step": 83020 }, { "epoch": 1.3585862717827046, "grad_norm": 0.218876913189888, "learning_rate": 6.663103066697063e-06, "loss": 0.0011, "step": 83030 }, { "epoch": 1.3587498977337806, "grad_norm": 0.04186856746673584, "learning_rate": 6.662205299260034e-06, "loss": 0.0019, "step": 83040 }, { "epoch": 1.3589135236848564, "grad_norm": 0.12495332211256027, "learning_rate": 6.6613074715720595e-06, "loss": 0.0028, "step": 83050 }, { "epoch": 1.3590771496359322, "grad_norm": 0.08146946132183075, "learning_rate": 6.660409583665681e-06, "loss": 0.0021, "step": 83060 }, { "epoch": 1.3592407755870082, "grad_norm": 0.16242024302482605, "learning_rate": 6.659511635573447e-06, "loss": 0.0036, "step": 83070 }, { "epoch": 1.359404401538084, "grad_norm": 0.23032371699810028, "learning_rate": 6.658613627327905e-06, "loss": 0.0014, "step": 83080 }, { "epoch": 1.3595680274891597, "grad_norm": 0.20575572550296783, "learning_rate": 6.657715558961604e-06, "loss": 0.0018, "step": 83090 }, { "epoch": 1.3597316534402357, "grad_norm": 0.08109265565872192, "learning_rate": 6.656817430507099e-06, "loss": 0.0017, "step": 83100 }, { "epoch": 1.3598952793913115, "grad_norm": 0.21918833255767822, "learning_rate": 6.6559192419969424e-06, "loss": 0.0022, "step": 83110 }, { "epoch": 1.3600589053423873, "grad_norm": 0.05995751544833183, "learning_rate": 6.655020993463695e-06, "loss": 0.0028, "step": 83120 }, { "epoch": 1.3602225312934633, "grad_norm": 0.042985714972019196, "learning_rate": 6.654122684939914e-06, "loss": 0.0021, "step": 83130 }, { "epoch": 1.360386157244539, "grad_norm": 0.158111572265625, "learning_rate": 6.653224316458161e-06, "loss": 0.0027, "step": 83140 }, { "epoch": 1.3605497831956148, "grad_norm": 0.07086026668548584, "learning_rate": 6.652325888051e-06, "loss": 0.0016, "step": 83150 }, { "epoch": 1.3607134091466906, "grad_norm": 0.14250591397285461, "learning_rate": 6.651427399750997e-06, "loss": 0.0022, "step": 83160 }, { "epoch": 1.3608770350977666, "grad_norm": 0.07695437967777252, "learning_rate": 6.65052885159072e-06, "loss": 0.001, "step": 83170 }, { "epoch": 1.3610406610488424, "grad_norm": 0.06912094354629517, "learning_rate": 6.649630243602738e-06, "loss": 0.0019, "step": 83180 }, { "epoch": 1.3612042869999181, "grad_norm": 0.050239890813827515, "learning_rate": 6.648731575819626e-06, "loss": 0.0016, "step": 83190 }, { "epoch": 1.361367912950994, "grad_norm": 0.05261654034256935, "learning_rate": 6.6478328482739566e-06, "loss": 0.002, "step": 83200 }, { "epoch": 1.36153153890207, "grad_norm": 0.08113102614879608, "learning_rate": 6.6469340609983056e-06, "loss": 0.0033, "step": 83210 }, { "epoch": 1.3616951648531457, "grad_norm": 0.09291556477546692, "learning_rate": 6.646035214025255e-06, "loss": 0.0022, "step": 83220 }, { "epoch": 1.3618587908042215, "grad_norm": 0.026314903050661087, "learning_rate": 6.6451363073873835e-06, "loss": 0.0015, "step": 83230 }, { "epoch": 1.3620224167552974, "grad_norm": 0.29929569363594055, "learning_rate": 6.644237341117273e-06, "loss": 0.0019, "step": 83240 }, { "epoch": 1.3621860427063732, "grad_norm": 0.19190314412117004, "learning_rate": 6.643338315247513e-06, "loss": 0.0033, "step": 83250 }, { "epoch": 1.362349668657449, "grad_norm": 0.20633620023727417, "learning_rate": 6.642439229810688e-06, "loss": 0.0046, "step": 83260 }, { "epoch": 1.362513294608525, "grad_norm": 0.03409310057759285, "learning_rate": 6.6415400848393885e-06, "loss": 0.0022, "step": 83270 }, { "epoch": 1.3626769205596008, "grad_norm": 0.02324938215315342, "learning_rate": 6.640640880366207e-06, "loss": 0.0019, "step": 83280 }, { "epoch": 1.3628405465106765, "grad_norm": 0.024398524314165115, "learning_rate": 6.639741616423736e-06, "loss": 0.0019, "step": 83290 }, { "epoch": 1.3630041724617525, "grad_norm": 0.11157342046499252, "learning_rate": 6.638842293044572e-06, "loss": 0.0021, "step": 83300 }, { "epoch": 1.3631677984128283, "grad_norm": 0.07478858530521393, "learning_rate": 6.6379429102613135e-06, "loss": 0.0024, "step": 83310 }, { "epoch": 1.363331424363904, "grad_norm": 0.2838660180568695, "learning_rate": 6.637043468106562e-06, "loss": 0.0022, "step": 83320 }, { "epoch": 1.36349505031498, "grad_norm": 0.15137562155723572, "learning_rate": 6.636143966612919e-06, "loss": 0.0035, "step": 83330 }, { "epoch": 1.3636586762660559, "grad_norm": 0.15146780014038086, "learning_rate": 6.6352444058129895e-06, "loss": 0.0027, "step": 83340 }, { "epoch": 1.3638223022171316, "grad_norm": 0.2696821987628937, "learning_rate": 6.6343447857393804e-06, "loss": 0.0038, "step": 83350 }, { "epoch": 1.3639859281682074, "grad_norm": 0.02089490182697773, "learning_rate": 6.6334451064247004e-06, "loss": 0.0014, "step": 83360 }, { "epoch": 1.3641495541192832, "grad_norm": 0.2376302182674408, "learning_rate": 6.632545367901563e-06, "loss": 0.0017, "step": 83370 }, { "epoch": 1.3643131800703592, "grad_norm": 0.0406130887567997, "learning_rate": 6.631645570202578e-06, "loss": 0.0017, "step": 83380 }, { "epoch": 1.364476806021435, "grad_norm": 0.24158768355846405, "learning_rate": 6.630745713360363e-06, "loss": 0.0015, "step": 83390 }, { "epoch": 1.3646404319725107, "grad_norm": 0.04271834343671799, "learning_rate": 6.629845797407536e-06, "loss": 0.0017, "step": 83400 }, { "epoch": 1.3648040579235867, "grad_norm": 0.07265377789735794, "learning_rate": 6.628945822376716e-06, "loss": 0.0021, "step": 83410 }, { "epoch": 1.3649676838746625, "grad_norm": 0.02887498401105404, "learning_rate": 6.628045788300524e-06, "loss": 0.0014, "step": 83420 }, { "epoch": 1.3651313098257383, "grad_norm": 0.1427624374628067, "learning_rate": 6.627145695211586e-06, "loss": 0.0021, "step": 83430 }, { "epoch": 1.3652949357768143, "grad_norm": 0.10718031972646713, "learning_rate": 6.626245543142528e-06, "loss": 0.0015, "step": 83440 }, { "epoch": 1.36545856172789, "grad_norm": 0.008648457936942577, "learning_rate": 6.625345332125976e-06, "loss": 0.0016, "step": 83450 }, { "epoch": 1.3656221876789658, "grad_norm": 0.05365251004695892, "learning_rate": 6.6244450621945624e-06, "loss": 0.0024, "step": 83460 }, { "epoch": 1.3657858136300418, "grad_norm": 0.2586226165294647, "learning_rate": 6.62354473338092e-06, "loss": 0.0014, "step": 83470 }, { "epoch": 1.3659494395811176, "grad_norm": 0.06785815209150314, "learning_rate": 6.6226443457176836e-06, "loss": 0.0019, "step": 83480 }, { "epoch": 1.3661130655321934, "grad_norm": 0.13367609679698944, "learning_rate": 6.621743899237489e-06, "loss": 0.0016, "step": 83490 }, { "epoch": 1.3662766914832694, "grad_norm": 0.0460641123354435, "learning_rate": 6.620843393972975e-06, "loss": 0.0019, "step": 83500 }, { "epoch": 1.3664403174343451, "grad_norm": 0.05655849352478981, "learning_rate": 6.619942829956785e-06, "loss": 0.0019, "step": 83510 }, { "epoch": 1.366603943385421, "grad_norm": 0.045292120426893234, "learning_rate": 6.61904220722156e-06, "loss": 0.0023, "step": 83520 }, { "epoch": 1.366767569336497, "grad_norm": 0.3453798294067383, "learning_rate": 6.618141525799946e-06, "loss": 0.0023, "step": 83530 }, { "epoch": 1.3669311952875727, "grad_norm": 0.06676346063613892, "learning_rate": 6.617240785724593e-06, "loss": 0.0027, "step": 83540 }, { "epoch": 1.3670948212386484, "grad_norm": 0.13619641959667206, "learning_rate": 6.616339987028145e-06, "loss": 0.0023, "step": 83550 }, { "epoch": 1.3672584471897242, "grad_norm": 0.14426226913928986, "learning_rate": 6.61543912974326e-06, "loss": 0.0033, "step": 83560 }, { "epoch": 1.3674220731408, "grad_norm": 0.12246865779161453, "learning_rate": 6.614538213902587e-06, "loss": 0.002, "step": 83570 }, { "epoch": 1.367585699091876, "grad_norm": 0.19099079072475433, "learning_rate": 6.613637239538786e-06, "loss": 0.0023, "step": 83580 }, { "epoch": 1.3677493250429518, "grad_norm": 0.020889585837721825, "learning_rate": 6.612736206684513e-06, "loss": 0.001, "step": 83590 }, { "epoch": 1.3679129509940275, "grad_norm": 0.25056174397468567, "learning_rate": 6.611835115372427e-06, "loss": 0.0029, "step": 83600 }, { "epoch": 1.3680765769451035, "grad_norm": 0.09843003004789352, "learning_rate": 6.610933965635193e-06, "loss": 0.0016, "step": 83610 }, { "epoch": 1.3682402028961793, "grad_norm": 0.09536533057689667, "learning_rate": 6.610032757505476e-06, "loss": 0.0012, "step": 83620 }, { "epoch": 1.368403828847255, "grad_norm": 0.08397623896598816, "learning_rate": 6.6091314910159386e-06, "loss": 0.0018, "step": 83630 }, { "epoch": 1.368567454798331, "grad_norm": 0.15769927203655243, "learning_rate": 6.608230166199255e-06, "loss": 0.0033, "step": 83640 }, { "epoch": 1.3687310807494069, "grad_norm": 0.05273592099547386, "learning_rate": 6.607328783088092e-06, "loss": 0.0017, "step": 83650 }, { "epoch": 1.3688947067004826, "grad_norm": 0.06668082624673843, "learning_rate": 6.606427341715123e-06, "loss": 0.0013, "step": 83660 }, { "epoch": 1.3690583326515586, "grad_norm": 0.23718303442001343, "learning_rate": 6.605525842113024e-06, "loss": 0.0024, "step": 83670 }, { "epoch": 1.3692219586026344, "grad_norm": 0.04254599288105965, "learning_rate": 6.604624284314472e-06, "loss": 0.0024, "step": 83680 }, { "epoch": 1.3693855845537102, "grad_norm": 0.03069309890270233, "learning_rate": 6.603722668352146e-06, "loss": 0.0025, "step": 83690 }, { "epoch": 1.3695492105047862, "grad_norm": 0.11927279084920883, "learning_rate": 6.602820994258728e-06, "loss": 0.0019, "step": 83700 }, { "epoch": 1.369712836455862, "grad_norm": 0.1013031005859375, "learning_rate": 6.601919262066903e-06, "loss": 0.0013, "step": 83710 }, { "epoch": 1.3698764624069377, "grad_norm": 0.2055090218782425, "learning_rate": 6.601017471809352e-06, "loss": 0.002, "step": 83720 }, { "epoch": 1.3700400883580137, "grad_norm": 0.05567789822816849, "learning_rate": 6.600115623518767e-06, "loss": 0.0027, "step": 83730 }, { "epoch": 1.3702037143090895, "grad_norm": 0.06563231348991394, "learning_rate": 6.599213717227836e-06, "loss": 0.0017, "step": 83740 }, { "epoch": 1.3703673402601653, "grad_norm": 0.10842576622962952, "learning_rate": 6.598311752969252e-06, "loss": 0.0026, "step": 83750 }, { "epoch": 1.370530966211241, "grad_norm": 0.1533299833536148, "learning_rate": 6.597409730775708e-06, "loss": 0.0016, "step": 83760 }, { "epoch": 1.3706945921623168, "grad_norm": 0.16760112345218658, "learning_rate": 6.5965076506799e-06, "loss": 0.003, "step": 83770 }, { "epoch": 1.3708582181133928, "grad_norm": 0.12350402027368546, "learning_rate": 6.595605512714527e-06, "loss": 0.0032, "step": 83780 }, { "epoch": 1.3710218440644686, "grad_norm": 0.035299547016620636, "learning_rate": 6.594703316912291e-06, "loss": 0.0032, "step": 83790 }, { "epoch": 1.3711854700155444, "grad_norm": 0.1186697781085968, "learning_rate": 6.593801063305891e-06, "loss": 0.0023, "step": 83800 }, { "epoch": 1.3713490959666204, "grad_norm": 0.3366985619068146, "learning_rate": 6.592898751928033e-06, "loss": 0.0054, "step": 83810 }, { "epoch": 1.3715127219176961, "grad_norm": 0.24717161059379578, "learning_rate": 6.591996382811423e-06, "loss": 0.0019, "step": 83820 }, { "epoch": 1.371676347868772, "grad_norm": 0.055966999381780624, "learning_rate": 6.59109395598877e-06, "loss": 0.0016, "step": 83830 }, { "epoch": 1.371839973819848, "grad_norm": 0.08399087935686111, "learning_rate": 6.590191471492789e-06, "loss": 0.002, "step": 83840 }, { "epoch": 1.3720035997709237, "grad_norm": 0.11979738622903824, "learning_rate": 6.589288929356185e-06, "loss": 0.0013, "step": 83850 }, { "epoch": 1.3721672257219995, "grad_norm": 0.10720144957304001, "learning_rate": 6.588386329611678e-06, "loss": 0.002, "step": 83860 }, { "epoch": 1.3723308516730754, "grad_norm": 0.06949020177125931, "learning_rate": 6.587483672291986e-06, "loss": 0.0012, "step": 83870 }, { "epoch": 1.3724944776241512, "grad_norm": 0.19956162571907043, "learning_rate": 6.5865809574298235e-06, "loss": 0.0027, "step": 83880 }, { "epoch": 1.372658103575227, "grad_norm": 0.1763794720172882, "learning_rate": 6.585678185057915e-06, "loss": 0.0028, "step": 83890 }, { "epoch": 1.372821729526303, "grad_norm": 0.07271043956279755, "learning_rate": 6.584775355208986e-06, "loss": 0.0016, "step": 83900 }, { "epoch": 1.3729853554773788, "grad_norm": 0.0488058403134346, "learning_rate": 6.583872467915757e-06, "loss": 0.0016, "step": 83910 }, { "epoch": 1.3731489814284545, "grad_norm": 0.3219110667705536, "learning_rate": 6.582969523210958e-06, "loss": 0.0036, "step": 83920 }, { "epoch": 1.3733126073795303, "grad_norm": 0.09001462906599045, "learning_rate": 6.582066521127318e-06, "loss": 0.0029, "step": 83930 }, { "epoch": 1.3734762333306063, "grad_norm": 0.06409922242164612, "learning_rate": 6.581163461697571e-06, "loss": 0.0045, "step": 83940 }, { "epoch": 1.373639859281682, "grad_norm": 0.1760329157114029, "learning_rate": 6.580260344954447e-06, "loss": 0.0016, "step": 83950 }, { "epoch": 1.3738034852327579, "grad_norm": 0.07624134421348572, "learning_rate": 6.579357170930684e-06, "loss": 0.0029, "step": 83960 }, { "epoch": 1.3739671111838336, "grad_norm": 0.07200030237436295, "learning_rate": 6.578453939659018e-06, "loss": 0.0016, "step": 83970 }, { "epoch": 1.3741307371349096, "grad_norm": 0.0995061844587326, "learning_rate": 6.5775506511721935e-06, "loss": 0.0016, "step": 83980 }, { "epoch": 1.3742943630859854, "grad_norm": 0.33647817373275757, "learning_rate": 6.576647305502948e-06, "loss": 0.0025, "step": 83990 }, { "epoch": 1.3744579890370612, "grad_norm": 0.263393372297287, "learning_rate": 6.575743902684026e-06, "loss": 0.0027, "step": 84000 }, { "epoch": 1.3746216149881372, "grad_norm": 0.044238802045583725, "learning_rate": 6.574840442748176e-06, "loss": 0.0023, "step": 84010 }, { "epoch": 1.374785240939213, "grad_norm": 0.13966703414916992, "learning_rate": 6.573936925728144e-06, "loss": 0.0011, "step": 84020 }, { "epoch": 1.3749488668902887, "grad_norm": 0.045412782579660416, "learning_rate": 6.573033351656683e-06, "loss": 0.0015, "step": 84030 }, { "epoch": 1.3751124928413647, "grad_norm": 0.10076606273651123, "learning_rate": 6.572129720566541e-06, "loss": 0.005, "step": 84040 }, { "epoch": 1.3752761187924405, "grad_norm": 0.08290413022041321, "learning_rate": 6.571226032490477e-06, "loss": 0.0026, "step": 84050 }, { "epoch": 1.3754397447435163, "grad_norm": 0.2463889867067337, "learning_rate": 6.570322287461246e-06, "loss": 0.0023, "step": 84060 }, { "epoch": 1.3756033706945923, "grad_norm": 0.14944596588611603, "learning_rate": 6.569418485511605e-06, "loss": 0.0014, "step": 84070 }, { "epoch": 1.375766996645668, "grad_norm": 0.1984856128692627, "learning_rate": 6.568514626674316e-06, "loss": 0.0024, "step": 84080 }, { "epoch": 1.3759306225967438, "grad_norm": 0.2240912914276123, "learning_rate": 6.567610710982143e-06, "loss": 0.002, "step": 84090 }, { "epoch": 1.3760942485478198, "grad_norm": 0.3564155399799347, "learning_rate": 6.5667067384678475e-06, "loss": 0.0033, "step": 84100 }, { "epoch": 1.3762578744988956, "grad_norm": 0.12061890959739685, "learning_rate": 6.565802709164199e-06, "loss": 0.002, "step": 84110 }, { "epoch": 1.3764215004499714, "grad_norm": 0.3658267557621002, "learning_rate": 6.564898623103966e-06, "loss": 0.0015, "step": 84120 }, { "epoch": 1.3765851264010471, "grad_norm": 0.4962645173072815, "learning_rate": 6.563994480319919e-06, "loss": 0.0034, "step": 84130 }, { "epoch": 1.3767487523521231, "grad_norm": 0.24085886776447296, "learning_rate": 6.563090280844832e-06, "loss": 0.0023, "step": 84140 }, { "epoch": 1.376912378303199, "grad_norm": 0.10248955339193344, "learning_rate": 6.562186024711478e-06, "loss": 0.0027, "step": 84150 }, { "epoch": 1.3770760042542747, "grad_norm": 0.2887658476829529, "learning_rate": 6.561281711952634e-06, "loss": 0.0052, "step": 84160 }, { "epoch": 1.3772396302053505, "grad_norm": 0.16747045516967773, "learning_rate": 6.560377342601082e-06, "loss": 0.0023, "step": 84170 }, { "epoch": 1.3774032561564264, "grad_norm": 0.09652359038591385, "learning_rate": 6.5594729166896e-06, "loss": 0.0019, "step": 84180 }, { "epoch": 1.3775668821075022, "grad_norm": 0.07700984179973602, "learning_rate": 6.558568434250975e-06, "loss": 0.0011, "step": 84190 }, { "epoch": 1.377730508058578, "grad_norm": 0.05332626402378082, "learning_rate": 6.55766389531799e-06, "loss": 0.0022, "step": 84200 }, { "epoch": 1.377894134009654, "grad_norm": 0.021167131140828133, "learning_rate": 6.5567592999234306e-06, "loss": 0.0021, "step": 84210 }, { "epoch": 1.3780577599607298, "grad_norm": 0.1650628000497818, "learning_rate": 6.55585464810009e-06, "loss": 0.0035, "step": 84220 }, { "epoch": 1.3782213859118055, "grad_norm": 0.11689629405736923, "learning_rate": 6.554949939880757e-06, "loss": 0.0012, "step": 84230 }, { "epoch": 1.3783850118628815, "grad_norm": 0.10608311742544174, "learning_rate": 6.554045175298226e-06, "loss": 0.0014, "step": 84240 }, { "epoch": 1.3785486378139573, "grad_norm": 0.04725388064980507, "learning_rate": 6.553140354385292e-06, "loss": 0.0012, "step": 84250 }, { "epoch": 1.378712263765033, "grad_norm": 0.2436954379081726, "learning_rate": 6.5522354771747535e-06, "loss": 0.0023, "step": 84260 }, { "epoch": 1.378875889716109, "grad_norm": 0.2977786958217621, "learning_rate": 6.551330543699409e-06, "loss": 0.0014, "step": 84270 }, { "epoch": 1.3790395156671849, "grad_norm": 0.05094189569354057, "learning_rate": 6.550425553992062e-06, "loss": 0.002, "step": 84280 }, { "epoch": 1.3792031416182606, "grad_norm": 0.09922634065151215, "learning_rate": 6.549520508085515e-06, "loss": 0.0016, "step": 84290 }, { "epoch": 1.3793667675693366, "grad_norm": 0.04487872123718262, "learning_rate": 6.548615406012573e-06, "loss": 0.0019, "step": 84300 }, { "epoch": 1.3795303935204124, "grad_norm": 0.06067148968577385, "learning_rate": 6.547710247806045e-06, "loss": 0.0018, "step": 84310 }, { "epoch": 1.3796940194714882, "grad_norm": 0.12283045053482056, "learning_rate": 6.546805033498739e-06, "loss": 0.0016, "step": 84320 }, { "epoch": 1.379857645422564, "grad_norm": 0.09044544398784637, "learning_rate": 6.545899763123469e-06, "loss": 0.0018, "step": 84330 }, { "epoch": 1.3800212713736397, "grad_norm": 0.09511348605155945, "learning_rate": 6.54499443671305e-06, "loss": 0.0028, "step": 84340 }, { "epoch": 1.3801848973247157, "grad_norm": 0.23826463520526886, "learning_rate": 6.5440890543002936e-06, "loss": 0.0033, "step": 84350 }, { "epoch": 1.3803485232757915, "grad_norm": 0.008653919212520123, "learning_rate": 6.543183615918021e-06, "loss": 0.0015, "step": 84360 }, { "epoch": 1.3805121492268673, "grad_norm": 0.021614257246255875, "learning_rate": 6.54227812159905e-06, "loss": 0.0014, "step": 84370 }, { "epoch": 1.3806757751779433, "grad_norm": 0.16154079139232635, "learning_rate": 6.541372571376206e-06, "loss": 0.0028, "step": 84380 }, { "epoch": 1.380839401129019, "grad_norm": 0.13914792239665985, "learning_rate": 6.540466965282309e-06, "loss": 0.0017, "step": 84390 }, { "epoch": 1.3810030270800948, "grad_norm": 0.06671799719333649, "learning_rate": 6.539561303350188e-06, "loss": 0.0019, "step": 84400 }, { "epoch": 1.3811666530311708, "grad_norm": 0.08875478804111481, "learning_rate": 6.53865558561267e-06, "loss": 0.0019, "step": 84410 }, { "epoch": 1.3813302789822466, "grad_norm": 0.1481930911540985, "learning_rate": 6.537749812102585e-06, "loss": 0.0031, "step": 84420 }, { "epoch": 1.3814939049333224, "grad_norm": 0.1867065280675888, "learning_rate": 6.536843982852765e-06, "loss": 0.0031, "step": 84430 }, { "epoch": 1.3816575308843984, "grad_norm": 0.0756026953458786, "learning_rate": 6.535938097896045e-06, "loss": 0.0024, "step": 84440 }, { "epoch": 1.3818211568354741, "grad_norm": 0.06768887490034103, "learning_rate": 6.535032157265258e-06, "loss": 0.0023, "step": 84450 }, { "epoch": 1.38198478278655, "grad_norm": 0.16543477773666382, "learning_rate": 6.534126160993247e-06, "loss": 0.0023, "step": 84460 }, { "epoch": 1.382148408737626, "grad_norm": 0.12259591370820999, "learning_rate": 6.533220109112849e-06, "loss": 0.0031, "step": 84470 }, { "epoch": 1.3823120346887017, "grad_norm": 0.06564922630786896, "learning_rate": 6.5323140016569075e-06, "loss": 0.0013, "step": 84480 }, { "epoch": 1.3824756606397774, "grad_norm": 0.04190611466765404, "learning_rate": 6.531407838658267e-06, "loss": 0.0017, "step": 84490 }, { "epoch": 1.3826392865908534, "grad_norm": 0.13495048880577087, "learning_rate": 6.5305016201497715e-06, "loss": 0.0017, "step": 84500 }, { "epoch": 1.3828029125419292, "grad_norm": 0.1354617476463318, "learning_rate": 6.529595346164273e-06, "loss": 0.002, "step": 84510 }, { "epoch": 1.382966538493005, "grad_norm": 0.14890646934509277, "learning_rate": 6.528689016734617e-06, "loss": 0.002, "step": 84520 }, { "epoch": 1.3831301644440808, "grad_norm": 0.19110403954982758, "learning_rate": 6.527782631893658e-06, "loss": 0.002, "step": 84530 }, { "epoch": 1.3832937903951565, "grad_norm": 0.07025747001171112, "learning_rate": 6.526876191674252e-06, "loss": 0.0021, "step": 84540 }, { "epoch": 1.3834574163462325, "grad_norm": 0.07577703893184662, "learning_rate": 6.525969696109254e-06, "loss": 0.0027, "step": 84550 }, { "epoch": 1.3836210422973083, "grad_norm": 0.13240648806095123, "learning_rate": 6.52506314523152e-06, "loss": 0.002, "step": 84560 }, { "epoch": 1.383784668248384, "grad_norm": 0.2762661576271057, "learning_rate": 6.524156539073915e-06, "loss": 0.0021, "step": 84570 }, { "epoch": 1.38394829419946, "grad_norm": 0.04597948491573334, "learning_rate": 6.5232498776692975e-06, "loss": 0.0016, "step": 84580 }, { "epoch": 1.3841119201505359, "grad_norm": 0.02019342966377735, "learning_rate": 6.522343161050533e-06, "loss": 0.0013, "step": 84590 }, { "epoch": 1.3842755461016116, "grad_norm": 0.18653501570224762, "learning_rate": 6.521436389250488e-06, "loss": 0.0014, "step": 84600 }, { "epoch": 1.3844391720526876, "grad_norm": 0.09549687802791595, "learning_rate": 6.520529562302029e-06, "loss": 0.0017, "step": 84610 }, { "epoch": 1.3846027980037634, "grad_norm": 0.005977577064186335, "learning_rate": 6.519622680238029e-06, "loss": 0.002, "step": 84620 }, { "epoch": 1.3847664239548392, "grad_norm": 0.02042592316865921, "learning_rate": 6.518715743091359e-06, "loss": 0.002, "step": 84630 }, { "epoch": 1.3849300499059152, "grad_norm": 0.16510340571403503, "learning_rate": 6.5178087508948965e-06, "loss": 0.002, "step": 84640 }, { "epoch": 1.385093675856991, "grad_norm": 0.09785642474889755, "learning_rate": 6.516901703681511e-06, "loss": 0.003, "step": 84650 }, { "epoch": 1.3852573018080667, "grad_norm": 0.035534076392650604, "learning_rate": 6.5159946014840854e-06, "loss": 0.0018, "step": 84660 }, { "epoch": 1.3854209277591427, "grad_norm": 0.0675075426697731, "learning_rate": 6.515087444335498e-06, "loss": 0.0013, "step": 84670 }, { "epoch": 1.3855845537102185, "grad_norm": 0.22779537737369537, "learning_rate": 6.5141802322686345e-06, "loss": 0.0026, "step": 84680 }, { "epoch": 1.3857481796612943, "grad_norm": 0.04339180886745453, "learning_rate": 6.513272965316376e-06, "loss": 0.0013, "step": 84690 }, { "epoch": 1.38591180561237, "grad_norm": 0.055955853313207626, "learning_rate": 6.512365643511609e-06, "loss": 0.006, "step": 84700 }, { "epoch": 1.386075431563446, "grad_norm": 0.12068263441324234, "learning_rate": 6.511458266887223e-06, "loss": 0.0029, "step": 84710 }, { "epoch": 1.3862390575145218, "grad_norm": 0.1273995041847229, "learning_rate": 6.5105508354761075e-06, "loss": 0.0024, "step": 84720 }, { "epoch": 1.3864026834655976, "grad_norm": 0.3073548674583435, "learning_rate": 6.509643349311153e-06, "loss": 0.0019, "step": 84730 }, { "epoch": 1.3865663094166734, "grad_norm": 0.0018799295648932457, "learning_rate": 6.5087358084252575e-06, "loss": 0.0014, "step": 84740 }, { "epoch": 1.3867299353677494, "grad_norm": 0.5400633811950684, "learning_rate": 6.507828212851315e-06, "loss": 0.0031, "step": 84750 }, { "epoch": 1.3868935613188251, "grad_norm": 0.0834333673119545, "learning_rate": 6.506920562622222e-06, "loss": 0.0017, "step": 84760 }, { "epoch": 1.387057187269901, "grad_norm": 0.06015801057219505, "learning_rate": 6.506012857770882e-06, "loss": 0.0016, "step": 84770 }, { "epoch": 1.387220813220977, "grad_norm": 0.25534600019454956, "learning_rate": 6.505105098330194e-06, "loss": 0.0025, "step": 84780 }, { "epoch": 1.3873844391720527, "grad_norm": 0.10330186784267426, "learning_rate": 6.5041972843330654e-06, "loss": 0.0014, "step": 84790 }, { "epoch": 1.3875480651231284, "grad_norm": 0.37720584869384766, "learning_rate": 6.503289415812401e-06, "loss": 0.002, "step": 84800 }, { "epoch": 1.3877116910742044, "grad_norm": 0.16098958253860474, "learning_rate": 6.502381492801105e-06, "loss": 0.0013, "step": 84810 }, { "epoch": 1.3878753170252802, "grad_norm": 0.0783856213092804, "learning_rate": 6.501473515332093e-06, "loss": 0.0019, "step": 84820 }, { "epoch": 1.388038942976356, "grad_norm": 0.013071945868432522, "learning_rate": 6.500565483438275e-06, "loss": 0.0014, "step": 84830 }, { "epoch": 1.388202568927432, "grad_norm": 0.00263294018805027, "learning_rate": 6.499657397152566e-06, "loss": 0.0028, "step": 84840 }, { "epoch": 1.3883661948785078, "grad_norm": 0.04995952174067497, "learning_rate": 6.498749256507878e-06, "loss": 0.0012, "step": 84850 }, { "epoch": 1.3885298208295835, "grad_norm": 0.010463164187967777, "learning_rate": 6.497841061537132e-06, "loss": 0.0013, "step": 84860 }, { "epoch": 1.3886934467806595, "grad_norm": 0.19178052246570587, "learning_rate": 6.496932812273249e-06, "loss": 0.0041, "step": 84870 }, { "epoch": 1.3888570727317353, "grad_norm": 0.1179526299238205, "learning_rate": 6.496024508749148e-06, "loss": 0.0017, "step": 84880 }, { "epoch": 1.389020698682811, "grad_norm": 0.358823299407959, "learning_rate": 6.495116150997754e-06, "loss": 0.0032, "step": 84890 }, { "epoch": 1.3891843246338869, "grad_norm": 0.09603816270828247, "learning_rate": 6.494207739051994e-06, "loss": 0.0016, "step": 84900 }, { "epoch": 1.3893479505849629, "grad_norm": 0.10268957912921906, "learning_rate": 6.493299272944794e-06, "loss": 0.0018, "step": 84910 }, { "epoch": 1.3895115765360386, "grad_norm": 0.02950035035610199, "learning_rate": 6.4923907527090845e-06, "loss": 0.0009, "step": 84920 }, { "epoch": 1.3896752024871144, "grad_norm": 0.10302207618951797, "learning_rate": 6.491482178377797e-06, "loss": 0.0017, "step": 84930 }, { "epoch": 1.3898388284381902, "grad_norm": 0.040904585272073746, "learning_rate": 6.490573549983866e-06, "loss": 0.0012, "step": 84940 }, { "epoch": 1.3900024543892662, "grad_norm": 0.04564441367983818, "learning_rate": 6.4896648675602255e-06, "loss": 0.0021, "step": 84950 }, { "epoch": 1.390166080340342, "grad_norm": 0.06184501200914383, "learning_rate": 6.488756131139814e-06, "loss": 0.0015, "step": 84960 }, { "epoch": 1.3903297062914177, "grad_norm": 0.056682560592889786, "learning_rate": 6.4878473407555705e-06, "loss": 0.0016, "step": 84970 }, { "epoch": 1.3904933322424937, "grad_norm": 0.17566584050655365, "learning_rate": 6.486938496440438e-06, "loss": 0.0019, "step": 84980 }, { "epoch": 1.3906569581935695, "grad_norm": 0.09449051320552826, "learning_rate": 6.486029598227359e-06, "loss": 0.0025, "step": 84990 }, { "epoch": 1.3908205841446453, "grad_norm": 0.028499802574515343, "learning_rate": 6.4851206461492765e-06, "loss": 0.0018, "step": 85000 }, { "epoch": 1.3909842100957213, "grad_norm": 0.046537838876247406, "learning_rate": 6.484211640239142e-06, "loss": 0.0033, "step": 85010 }, { "epoch": 1.391147836046797, "grad_norm": 0.016465824097394943, "learning_rate": 6.483302580529902e-06, "loss": 0.0014, "step": 85020 }, { "epoch": 1.3913114619978728, "grad_norm": 0.09119275957345963, "learning_rate": 6.4823934670545075e-06, "loss": 0.0014, "step": 85030 }, { "epoch": 1.3914750879489488, "grad_norm": 0.17053423821926117, "learning_rate": 6.481484299845914e-06, "loss": 0.0027, "step": 85040 }, { "epoch": 1.3916387139000246, "grad_norm": 0.12290740013122559, "learning_rate": 6.480575078937075e-06, "loss": 0.0016, "step": 85050 }, { "epoch": 1.3918023398511004, "grad_norm": 0.03296781703829765, "learning_rate": 6.479665804360947e-06, "loss": 0.0018, "step": 85060 }, { "epoch": 1.3919659658021764, "grad_norm": 0.08438185602426529, "learning_rate": 6.478756476150492e-06, "loss": 0.0013, "step": 85070 }, { "epoch": 1.3921295917532521, "grad_norm": 0.1113714650273323, "learning_rate": 6.477847094338666e-06, "loss": 0.0014, "step": 85080 }, { "epoch": 1.392293217704328, "grad_norm": 0.12829799950122833, "learning_rate": 6.476937658958436e-06, "loss": 0.0017, "step": 85090 }, { "epoch": 1.3924568436554037, "grad_norm": 0.06273569166660309, "learning_rate": 6.4760281700427665e-06, "loss": 0.0018, "step": 85100 }, { "epoch": 1.3926204696064794, "grad_norm": 0.014807458966970444, "learning_rate": 6.475118627624622e-06, "loss": 0.0021, "step": 85110 }, { "epoch": 1.3927840955575554, "grad_norm": 0.1321907341480255, "learning_rate": 6.474209031736973e-06, "loss": 0.0012, "step": 85120 }, { "epoch": 1.3929477215086312, "grad_norm": 0.15450501441955566, "learning_rate": 6.473299382412791e-06, "loss": 0.0033, "step": 85130 }, { "epoch": 1.393111347459707, "grad_norm": 0.09167923778295517, "learning_rate": 6.472389679685047e-06, "loss": 0.0015, "step": 85140 }, { "epoch": 1.393274973410783, "grad_norm": 0.1072862520813942, "learning_rate": 6.471479923586715e-06, "loss": 0.0019, "step": 85150 }, { "epoch": 1.3934385993618588, "grad_norm": 0.172815203666687, "learning_rate": 6.470570114150773e-06, "loss": 0.0018, "step": 85160 }, { "epoch": 1.3936022253129345, "grad_norm": 0.003767084563151002, "learning_rate": 6.469660251410197e-06, "loss": 0.0008, "step": 85170 }, { "epoch": 1.3937658512640105, "grad_norm": 0.18949241936206818, "learning_rate": 6.4687503353979705e-06, "loss": 0.002, "step": 85180 }, { "epoch": 1.3939294772150863, "grad_norm": 0.018428917974233627, "learning_rate": 6.467840366147076e-06, "loss": 0.0024, "step": 85190 }, { "epoch": 1.394093103166162, "grad_norm": 0.054442957043647766, "learning_rate": 6.466930343690494e-06, "loss": 0.0019, "step": 85200 }, { "epoch": 1.394256729117238, "grad_norm": 0.00616827979683876, "learning_rate": 6.466020268061214e-06, "loss": 0.0017, "step": 85210 }, { "epoch": 1.3944203550683139, "grad_norm": 0.10218977928161621, "learning_rate": 6.465110139292221e-06, "loss": 0.0029, "step": 85220 }, { "epoch": 1.3945839810193896, "grad_norm": 0.08989010751247406, "learning_rate": 6.464199957416509e-06, "loss": 0.002, "step": 85230 }, { "epoch": 1.3947476069704656, "grad_norm": 0.08905472606420517, "learning_rate": 6.463289722467065e-06, "loss": 0.0011, "step": 85240 }, { "epoch": 1.3949112329215414, "grad_norm": 0.047487299889326096, "learning_rate": 6.462379434476888e-06, "loss": 0.0012, "step": 85250 }, { "epoch": 1.3950748588726172, "grad_norm": 0.1420343816280365, "learning_rate": 6.4614690934789695e-06, "loss": 0.0011, "step": 85260 }, { "epoch": 1.3952384848236932, "grad_norm": 0.04268716648221016, "learning_rate": 6.4605586995063095e-06, "loss": 0.0013, "step": 85270 }, { "epoch": 1.395402110774769, "grad_norm": 0.021871600300073624, "learning_rate": 6.459648252591907e-06, "loss": 0.002, "step": 85280 }, { "epoch": 1.3955657367258447, "grad_norm": 0.02697080932557583, "learning_rate": 6.458737752768765e-06, "loss": 0.0021, "step": 85290 }, { "epoch": 1.3957293626769205, "grad_norm": 0.004370892886072397, "learning_rate": 6.457827200069884e-06, "loss": 0.0021, "step": 85300 }, { "epoch": 1.3958929886279963, "grad_norm": 0.1204993799328804, "learning_rate": 6.45691659452827e-06, "loss": 0.0014, "step": 85310 }, { "epoch": 1.3960566145790723, "grad_norm": 0.03712393715977669, "learning_rate": 6.456005936176931e-06, "loss": 0.0017, "step": 85320 }, { "epoch": 1.396220240530148, "grad_norm": 0.1656939834356308, "learning_rate": 6.455095225048877e-06, "loss": 0.0017, "step": 85330 }, { "epoch": 1.3963838664812238, "grad_norm": 0.04335532709956169, "learning_rate": 6.45418446117712e-06, "loss": 0.0013, "step": 85340 }, { "epoch": 1.3965474924322998, "grad_norm": 0.021557774394750595, "learning_rate": 6.45327364459467e-06, "loss": 0.0019, "step": 85350 }, { "epoch": 1.3967111183833756, "grad_norm": 0.0697641521692276, "learning_rate": 6.452362775334542e-06, "loss": 0.002, "step": 85360 }, { "epoch": 1.3968747443344514, "grad_norm": 0.11676472425460815, "learning_rate": 6.4514518534297554e-06, "loss": 0.0046, "step": 85370 }, { "epoch": 1.3970383702855274, "grad_norm": 0.10342272371053696, "learning_rate": 6.450540878913327e-06, "loss": 0.0019, "step": 85380 }, { "epoch": 1.3972019962366031, "grad_norm": 0.07428403943777084, "learning_rate": 6.449629851818279e-06, "loss": 0.0012, "step": 85390 }, { "epoch": 1.397365622187679, "grad_norm": 0.04550778120756149, "learning_rate": 6.448718772177631e-06, "loss": 0.0016, "step": 85400 }, { "epoch": 1.397529248138755, "grad_norm": 0.11181758344173431, "learning_rate": 6.447807640024412e-06, "loss": 0.0013, "step": 85410 }, { "epoch": 1.3976928740898307, "grad_norm": 0.05142239108681679, "learning_rate": 6.446896455391644e-06, "loss": 0.0015, "step": 85420 }, { "epoch": 1.3978565000409064, "grad_norm": 0.0831603929400444, "learning_rate": 6.445985218312357e-06, "loss": 0.001, "step": 85430 }, { "epoch": 1.3980201259919824, "grad_norm": 0.03458372503519058, "learning_rate": 6.445073928819583e-06, "loss": 0.001, "step": 85440 }, { "epoch": 1.3981837519430582, "grad_norm": 0.07006905972957611, "learning_rate": 6.44416258694635e-06, "loss": 0.0026, "step": 85450 }, { "epoch": 1.398347377894134, "grad_norm": 0.08403483778238297, "learning_rate": 6.443251192725694e-06, "loss": 0.0023, "step": 85460 }, { "epoch": 1.39851100384521, "grad_norm": 0.049667708575725555, "learning_rate": 6.442339746190652e-06, "loss": 0.0009, "step": 85470 }, { "epoch": 1.3986746297962858, "grad_norm": 0.02640850841999054, "learning_rate": 6.441428247374261e-06, "loss": 0.0017, "step": 85480 }, { "epoch": 1.3988382557473615, "grad_norm": 0.19479133188724518, "learning_rate": 6.440516696309561e-06, "loss": 0.0023, "step": 85490 }, { "epoch": 1.3990018816984373, "grad_norm": 0.3405801057815552, "learning_rate": 6.439605093029593e-06, "loss": 0.002, "step": 85500 }, { "epoch": 1.399165507649513, "grad_norm": 0.09277898073196411, "learning_rate": 6.438693437567398e-06, "loss": 0.0016, "step": 85510 }, { "epoch": 1.399329133600589, "grad_norm": 0.17509441077709198, "learning_rate": 6.437781729956026e-06, "loss": 0.0021, "step": 85520 }, { "epoch": 1.3994927595516649, "grad_norm": 0.2545210123062134, "learning_rate": 6.43686997022852e-06, "loss": 0.0018, "step": 85530 }, { "epoch": 1.3996563855027406, "grad_norm": 0.2908412218093872, "learning_rate": 6.435958158417933e-06, "loss": 0.0024, "step": 85540 }, { "epoch": 1.3998200114538166, "grad_norm": 0.11098383367061615, "learning_rate": 6.435046294557314e-06, "loss": 0.0015, "step": 85550 }, { "epoch": 1.3999836374048924, "grad_norm": 0.04553581029176712, "learning_rate": 6.434134378679714e-06, "loss": 0.001, "step": 85560 }, { "epoch": 1.4001472633559682, "grad_norm": 0.05689772963523865, "learning_rate": 6.433222410818191e-06, "loss": 0.0013, "step": 85570 }, { "epoch": 1.4003108893070442, "grad_norm": 0.34929540753364563, "learning_rate": 6.432310391005799e-06, "loss": 0.0018, "step": 85580 }, { "epoch": 1.40047451525812, "grad_norm": 0.14437046647071838, "learning_rate": 6.431398319275599e-06, "loss": 0.0029, "step": 85590 }, { "epoch": 1.4006381412091957, "grad_norm": 0.08320649713277817, "learning_rate": 6.43048619566065e-06, "loss": 0.0035, "step": 85600 }, { "epoch": 1.4008017671602717, "grad_norm": 0.14735233783721924, "learning_rate": 6.429574020194014e-06, "loss": 0.0011, "step": 85610 }, { "epoch": 1.4009653931113475, "grad_norm": 0.13174661993980408, "learning_rate": 6.4286617929087565e-06, "loss": 0.0024, "step": 85620 }, { "epoch": 1.4011290190624233, "grad_norm": 0.09346847236156464, "learning_rate": 6.427749513837942e-06, "loss": 0.0019, "step": 85630 }, { "epoch": 1.4012926450134993, "grad_norm": 0.04263807833194733, "learning_rate": 6.426837183014639e-06, "loss": 0.0014, "step": 85640 }, { "epoch": 1.401456270964575, "grad_norm": 0.13263411819934845, "learning_rate": 6.425924800471918e-06, "loss": 0.0021, "step": 85650 }, { "epoch": 1.4016198969156508, "grad_norm": 0.05086040124297142, "learning_rate": 6.42501236624285e-06, "loss": 0.0012, "step": 85660 }, { "epoch": 1.4017835228667266, "grad_norm": 0.23759202659130096, "learning_rate": 6.424099880360508e-06, "loss": 0.0019, "step": 85670 }, { "epoch": 1.4019471488178026, "grad_norm": 0.08066391944885254, "learning_rate": 6.42318734285797e-06, "loss": 0.0018, "step": 85680 }, { "epoch": 1.4021107747688784, "grad_norm": 0.2468746453523636, "learning_rate": 6.422274753768309e-06, "loss": 0.005, "step": 85690 }, { "epoch": 1.4022744007199541, "grad_norm": 0.09473896771669388, "learning_rate": 6.421362113124609e-06, "loss": 0.0019, "step": 85700 }, { "epoch": 1.40243802667103, "grad_norm": 0.1353735625743866, "learning_rate": 6.420449420959946e-06, "loss": 0.0041, "step": 85710 }, { "epoch": 1.402601652622106, "grad_norm": 0.11348866671323776, "learning_rate": 6.419536677307408e-06, "loss": 0.0024, "step": 85720 }, { "epoch": 1.4027652785731817, "grad_norm": 0.02876884676516056, "learning_rate": 6.418623882200075e-06, "loss": 0.0022, "step": 85730 }, { "epoch": 1.4029289045242574, "grad_norm": 0.02298186533153057, "learning_rate": 6.417711035671035e-06, "loss": 0.0015, "step": 85740 }, { "epoch": 1.4030925304753334, "grad_norm": 0.07512074708938599, "learning_rate": 6.4167981377533795e-06, "loss": 0.0013, "step": 85750 }, { "epoch": 1.4032561564264092, "grad_norm": 0.12532132863998413, "learning_rate": 6.4158851884801964e-06, "loss": 0.0022, "step": 85760 }, { "epoch": 1.403419782377485, "grad_norm": 0.19724524021148682, "learning_rate": 6.414972187884578e-06, "loss": 0.0015, "step": 85770 }, { "epoch": 1.403583408328561, "grad_norm": 0.15418198704719543, "learning_rate": 6.414059135999618e-06, "loss": 0.0026, "step": 85780 }, { "epoch": 1.4037470342796368, "grad_norm": 0.1800295114517212, "learning_rate": 6.413146032858414e-06, "loss": 0.0022, "step": 85790 }, { "epoch": 1.4039106602307125, "grad_norm": 0.14737734198570251, "learning_rate": 6.412232878494063e-06, "loss": 0.0015, "step": 85800 }, { "epoch": 1.4040742861817885, "grad_norm": 0.02988053299486637, "learning_rate": 6.411319672939662e-06, "loss": 0.0037, "step": 85810 }, { "epoch": 1.4042379121328643, "grad_norm": 0.2550038993358612, "learning_rate": 6.410406416228316e-06, "loss": 0.0023, "step": 85820 }, { "epoch": 1.40440153808394, "grad_norm": 0.057182129472494125, "learning_rate": 6.409493108393128e-06, "loss": 0.0013, "step": 85830 }, { "epoch": 1.404565164035016, "grad_norm": 0.17738579213619232, "learning_rate": 6.408579749467202e-06, "loss": 0.0021, "step": 85840 }, { "epoch": 1.4047287899860919, "grad_norm": 0.23171603679656982, "learning_rate": 6.407666339483647e-06, "loss": 0.0026, "step": 85850 }, { "epoch": 1.4048924159371676, "grad_norm": 0.03303425386548042, "learning_rate": 6.406752878475568e-06, "loss": 0.0017, "step": 85860 }, { "epoch": 1.4050560418882434, "grad_norm": 0.2983608543872833, "learning_rate": 6.4058393664760795e-06, "loss": 0.0028, "step": 85870 }, { "epoch": 1.4052196678393194, "grad_norm": 0.2720623016357422, "learning_rate": 6.404925803518292e-06, "loss": 0.0021, "step": 85880 }, { "epoch": 1.4053832937903952, "grad_norm": 0.021536650136113167, "learning_rate": 6.404012189635321e-06, "loss": 0.0011, "step": 85890 }, { "epoch": 1.405546919741471, "grad_norm": 0.07657308131456375, "learning_rate": 6.403098524860284e-06, "loss": 0.0023, "step": 85900 }, { "epoch": 1.4057105456925467, "grad_norm": 0.21242614090442657, "learning_rate": 6.4021848092262964e-06, "loss": 0.0017, "step": 85910 }, { "epoch": 1.4058741716436227, "grad_norm": 0.07514762878417969, "learning_rate": 6.401271042766479e-06, "loss": 0.0017, "step": 85920 }, { "epoch": 1.4060377975946985, "grad_norm": 0.2019110769033432, "learning_rate": 6.400357225513956e-06, "loss": 0.0017, "step": 85930 }, { "epoch": 1.4062014235457743, "grad_norm": 0.017901379615068436, "learning_rate": 6.399443357501848e-06, "loss": 0.0017, "step": 85940 }, { "epoch": 1.4063650494968503, "grad_norm": 0.1320720762014389, "learning_rate": 6.398529438763281e-06, "loss": 0.0012, "step": 85950 }, { "epoch": 1.406528675447926, "grad_norm": 0.09165991842746735, "learning_rate": 6.397615469331384e-06, "loss": 0.003, "step": 85960 }, { "epoch": 1.4066923013990018, "grad_norm": 0.0652407631278038, "learning_rate": 6.396701449239286e-06, "loss": 0.0024, "step": 85970 }, { "epoch": 1.4068559273500778, "grad_norm": 0.10655125975608826, "learning_rate": 6.395787378520117e-06, "loss": 0.0041, "step": 85980 }, { "epoch": 1.4070195533011536, "grad_norm": 0.16529837250709534, "learning_rate": 6.394873257207009e-06, "loss": 0.0018, "step": 85990 }, { "epoch": 1.4071831792522294, "grad_norm": 0.12448734790086746, "learning_rate": 6.3939590853331e-06, "loss": 0.0028, "step": 86000 }, { "epoch": 1.4073468052033054, "grad_norm": 0.12860512733459473, "learning_rate": 6.3930448629315236e-06, "loss": 0.0023, "step": 86010 }, { "epoch": 1.4075104311543811, "grad_norm": 0.06594104319810867, "learning_rate": 6.3921305900354165e-06, "loss": 0.0016, "step": 86020 }, { "epoch": 1.407674057105457, "grad_norm": 0.13318806886672974, "learning_rate": 6.391216266677924e-06, "loss": 0.0024, "step": 86030 }, { "epoch": 1.407837683056533, "grad_norm": 0.10796231031417847, "learning_rate": 6.390301892892186e-06, "loss": 0.0023, "step": 86040 }, { "epoch": 1.4080013090076087, "grad_norm": 0.27822428941726685, "learning_rate": 6.389387468711346e-06, "loss": 0.0045, "step": 86050 }, { "epoch": 1.4081649349586844, "grad_norm": 0.16741658747196198, "learning_rate": 6.388472994168548e-06, "loss": 0.0015, "step": 86060 }, { "epoch": 1.4083285609097602, "grad_norm": 0.007848259061574936, "learning_rate": 6.3875584692969414e-06, "loss": 0.0016, "step": 86070 }, { "epoch": 1.408492186860836, "grad_norm": 0.03564431518316269, "learning_rate": 6.386643894129676e-06, "loss": 0.0015, "step": 86080 }, { "epoch": 1.408655812811912, "grad_norm": 0.03646448627114296, "learning_rate": 6.3857292686999e-06, "loss": 0.0014, "step": 86090 }, { "epoch": 1.4088194387629878, "grad_norm": 0.10099207609891891, "learning_rate": 6.384814593040771e-06, "loss": 0.0015, "step": 86100 }, { "epoch": 1.4089830647140635, "grad_norm": 0.10978300124406815, "learning_rate": 6.3838998671854405e-06, "loss": 0.0018, "step": 86110 }, { "epoch": 1.4091466906651395, "grad_norm": 0.055727116763591766, "learning_rate": 6.382985091167065e-06, "loss": 0.0016, "step": 86120 }, { "epoch": 1.4093103166162153, "grad_norm": 0.13373956084251404, "learning_rate": 6.382070265018805e-06, "loss": 0.0018, "step": 86130 }, { "epoch": 1.409473942567291, "grad_norm": 0.008454692550003529, "learning_rate": 6.38115538877382e-06, "loss": 0.0021, "step": 86140 }, { "epoch": 1.409637568518367, "grad_norm": 0.12276609987020493, "learning_rate": 6.380240462465272e-06, "loss": 0.0018, "step": 86150 }, { "epoch": 1.4098011944694429, "grad_norm": 0.3033892512321472, "learning_rate": 6.379325486126322e-06, "loss": 0.0024, "step": 86160 }, { "epoch": 1.4099648204205186, "grad_norm": 0.03300086036324501, "learning_rate": 6.37841045979014e-06, "loss": 0.0017, "step": 86170 }, { "epoch": 1.4101284463715946, "grad_norm": 0.05865491181612015, "learning_rate": 6.377495383489891e-06, "loss": 0.0019, "step": 86180 }, { "epoch": 1.4102920723226704, "grad_norm": 0.04635308310389519, "learning_rate": 6.376580257258746e-06, "loss": 0.0015, "step": 86190 }, { "epoch": 1.4104556982737462, "grad_norm": 0.07225222140550613, "learning_rate": 6.375665081129876e-06, "loss": 0.0014, "step": 86200 }, { "epoch": 1.4106193242248222, "grad_norm": 0.07321282476186752, "learning_rate": 6.374749855136451e-06, "loss": 0.0035, "step": 86210 }, { "epoch": 1.410782950175898, "grad_norm": 0.13845479488372803, "learning_rate": 6.37383457931165e-06, "loss": 0.0015, "step": 86220 }, { "epoch": 1.4109465761269737, "grad_norm": 0.24736085534095764, "learning_rate": 6.372919253688645e-06, "loss": 0.0025, "step": 86230 }, { "epoch": 1.4111102020780497, "grad_norm": 0.014483810402452946, "learning_rate": 6.372003878300618e-06, "loss": 0.0019, "step": 86240 }, { "epoch": 1.4112738280291255, "grad_norm": 0.07591627538204193, "learning_rate": 6.371088453180749e-06, "loss": 0.0014, "step": 86250 }, { "epoch": 1.4114374539802013, "grad_norm": 0.06420943140983582, "learning_rate": 6.370172978362217e-06, "loss": 0.0028, "step": 86260 }, { "epoch": 1.411601079931277, "grad_norm": 0.08503778278827667, "learning_rate": 6.369257453878208e-06, "loss": 0.0021, "step": 86270 }, { "epoch": 1.4117647058823528, "grad_norm": 0.021942317485809326, "learning_rate": 6.3683418797619075e-06, "loss": 0.0014, "step": 86280 }, { "epoch": 1.4119283318334288, "grad_norm": 0.06504148989915848, "learning_rate": 6.3674262560465036e-06, "loss": 0.0021, "step": 86290 }, { "epoch": 1.4120919577845046, "grad_norm": 0.06938780844211578, "learning_rate": 6.366510582765184e-06, "loss": 0.0018, "step": 86300 }, { "epoch": 1.4122555837355804, "grad_norm": 0.06081559509038925, "learning_rate": 6.36559485995114e-06, "loss": 0.0021, "step": 86310 }, { "epoch": 1.4124192096866564, "grad_norm": 0.16031400859355927, "learning_rate": 6.3646790876375645e-06, "loss": 0.0021, "step": 86320 }, { "epoch": 1.4125828356377321, "grad_norm": 0.1319561004638672, "learning_rate": 6.3637632658576505e-06, "loss": 0.0016, "step": 86330 }, { "epoch": 1.412746461588808, "grad_norm": 0.09470713138580322, "learning_rate": 6.362847394644598e-06, "loss": 0.0019, "step": 86340 }, { "epoch": 1.412910087539884, "grad_norm": 0.020638514310121536, "learning_rate": 6.361931474031602e-06, "loss": 0.0055, "step": 86350 }, { "epoch": 1.4130737134909597, "grad_norm": 0.08732564002275467, "learning_rate": 6.361015504051864e-06, "loss": 0.0035, "step": 86360 }, { "epoch": 1.4132373394420354, "grad_norm": 0.0567031092941761, "learning_rate": 6.3600994847385845e-06, "loss": 0.0018, "step": 86370 }, { "epoch": 1.4134009653931114, "grad_norm": 0.17103177309036255, "learning_rate": 6.359183416124965e-06, "loss": 0.0019, "step": 86380 }, { "epoch": 1.4135645913441872, "grad_norm": 0.06520421802997589, "learning_rate": 6.358267298244217e-06, "loss": 0.0026, "step": 86390 }, { "epoch": 1.413728217295263, "grad_norm": 0.09191549569368362, "learning_rate": 6.357351131129543e-06, "loss": 0.0013, "step": 86400 }, { "epoch": 1.413891843246339, "grad_norm": 0.10752929747104645, "learning_rate": 6.356434914814154e-06, "loss": 0.0014, "step": 86410 }, { "epoch": 1.4140554691974148, "grad_norm": 0.10741899907588959, "learning_rate": 6.355518649331258e-06, "loss": 0.0019, "step": 86420 }, { "epoch": 1.4142190951484905, "grad_norm": 0.15231560170650482, "learning_rate": 6.3546023347140686e-06, "loss": 0.002, "step": 86430 }, { "epoch": 1.4143827210995663, "grad_norm": 0.10231545567512512, "learning_rate": 6.353685970995801e-06, "loss": 0.0018, "step": 86440 }, { "epoch": 1.4145463470506423, "grad_norm": 0.08341783285140991, "learning_rate": 6.35276955820967e-06, "loss": 0.0037, "step": 86450 }, { "epoch": 1.414709973001718, "grad_norm": 0.03726617619395256, "learning_rate": 6.351853096388893e-06, "loss": 0.0013, "step": 86460 }, { "epoch": 1.4148735989527939, "grad_norm": 0.10797476023435593, "learning_rate": 6.350936585566692e-06, "loss": 0.0018, "step": 86470 }, { "epoch": 1.4150372249038696, "grad_norm": 0.06698980182409286, "learning_rate": 6.350020025776286e-06, "loss": 0.002, "step": 86480 }, { "epoch": 1.4152008508549456, "grad_norm": 0.026316193863749504, "learning_rate": 6.349103417050898e-06, "loss": 0.0022, "step": 86490 }, { "epoch": 1.4153644768060214, "grad_norm": 0.08767364919185638, "learning_rate": 6.348186759423756e-06, "loss": 0.0024, "step": 86500 }, { "epoch": 1.4155281027570972, "grad_norm": 0.28113704919815063, "learning_rate": 6.3472700529280825e-06, "loss": 0.0014, "step": 86510 }, { "epoch": 1.4156917287081732, "grad_norm": 0.07719475775957108, "learning_rate": 6.346353297597107e-06, "loss": 0.0015, "step": 86520 }, { "epoch": 1.415855354659249, "grad_norm": 0.0954866036772728, "learning_rate": 6.345436493464061e-06, "loss": 0.0023, "step": 86530 }, { "epoch": 1.4160189806103247, "grad_norm": 0.13685014843940735, "learning_rate": 6.344519640562176e-06, "loss": 0.0028, "step": 86540 }, { "epoch": 1.4161826065614007, "grad_norm": 0.06707563251256943, "learning_rate": 6.343602738924686e-06, "loss": 0.0019, "step": 86550 }, { "epoch": 1.4163462325124765, "grad_norm": 0.16344474256038666, "learning_rate": 6.3426857885848245e-06, "loss": 0.0035, "step": 86560 }, { "epoch": 1.4165098584635523, "grad_norm": 0.3190053105354309, "learning_rate": 6.341768789575832e-06, "loss": 0.0025, "step": 86570 }, { "epoch": 1.4166734844146283, "grad_norm": 0.08888473361730576, "learning_rate": 6.340851741930944e-06, "loss": 0.0035, "step": 86580 }, { "epoch": 1.416837110365704, "grad_norm": 0.164824977517128, "learning_rate": 6.339934645683403e-06, "loss": 0.0026, "step": 86590 }, { "epoch": 1.4170007363167798, "grad_norm": 0.06405527144670486, "learning_rate": 6.339017500866453e-06, "loss": 0.0035, "step": 86600 }, { "epoch": 1.4171643622678558, "grad_norm": 0.0634603276848793, "learning_rate": 6.3381003075133366e-06, "loss": 0.0015, "step": 86610 }, { "epoch": 1.4173279882189316, "grad_norm": 0.13260386884212494, "learning_rate": 6.3371830656573e-06, "loss": 0.0013, "step": 86620 }, { "epoch": 1.4174916141700074, "grad_norm": 0.025781145319342613, "learning_rate": 6.33626577533159e-06, "loss": 0.001, "step": 86630 }, { "epoch": 1.4176552401210831, "grad_norm": 0.04053422436118126, "learning_rate": 6.335348436569459e-06, "loss": 0.0021, "step": 86640 }, { "epoch": 1.4178188660721591, "grad_norm": 0.24975523352622986, "learning_rate": 6.334431049404156e-06, "loss": 0.0018, "step": 86650 }, { "epoch": 1.417982492023235, "grad_norm": 0.20761322975158691, "learning_rate": 6.3335136138689336e-06, "loss": 0.0019, "step": 86660 }, { "epoch": 1.4181461179743107, "grad_norm": 0.11526196449995041, "learning_rate": 6.3325961299970475e-06, "loss": 0.002, "step": 86670 }, { "epoch": 1.4183097439253864, "grad_norm": 0.06664963066577911, "learning_rate": 6.331678597821756e-06, "loss": 0.0017, "step": 86680 }, { "epoch": 1.4184733698764624, "grad_norm": 0.321092426776886, "learning_rate": 6.330761017376315e-06, "loss": 0.0039, "step": 86690 }, { "epoch": 1.4186369958275382, "grad_norm": 0.04938043653964996, "learning_rate": 6.329843388693987e-06, "loss": 0.0013, "step": 86700 }, { "epoch": 1.418800621778614, "grad_norm": 0.07609985768795013, "learning_rate": 6.328925711808032e-06, "loss": 0.0025, "step": 86710 }, { "epoch": 1.41896424772969, "grad_norm": 0.024673007428646088, "learning_rate": 6.328007986751712e-06, "loss": 0.0017, "step": 86720 }, { "epoch": 1.4191278736807658, "grad_norm": 0.20874156057834625, "learning_rate": 6.3270902135582954e-06, "loss": 0.0023, "step": 86730 }, { "epoch": 1.4192914996318415, "grad_norm": 0.025386065244674683, "learning_rate": 6.326172392261047e-06, "loss": 0.0016, "step": 86740 }, { "epoch": 1.4194551255829175, "grad_norm": 0.18945609033107758, "learning_rate": 6.32525452289324e-06, "loss": 0.0025, "step": 86750 }, { "epoch": 1.4196187515339933, "grad_norm": 0.0067061251029372215, "learning_rate": 6.3243366054881394e-06, "loss": 0.0022, "step": 86760 }, { "epoch": 1.419782377485069, "grad_norm": 0.1727306842803955, "learning_rate": 6.32341864007902e-06, "loss": 0.0016, "step": 86770 }, { "epoch": 1.419946003436145, "grad_norm": 0.09030259400606155, "learning_rate": 6.322500626699155e-06, "loss": 0.0016, "step": 86780 }, { "epoch": 1.4201096293872209, "grad_norm": 0.06210469827055931, "learning_rate": 6.321582565381822e-06, "loss": 0.0018, "step": 86790 }, { "epoch": 1.4202732553382966, "grad_norm": 0.08547190576791763, "learning_rate": 6.320664456160297e-06, "loss": 0.0023, "step": 86800 }, { "epoch": 1.4204368812893726, "grad_norm": 0.07036415487527847, "learning_rate": 6.319746299067859e-06, "loss": 0.0024, "step": 86810 }, { "epoch": 1.4206005072404484, "grad_norm": 0.1708243042230606, "learning_rate": 6.31882809413779e-06, "loss": 0.0028, "step": 86820 }, { "epoch": 1.4207641331915242, "grad_norm": 0.10920480638742447, "learning_rate": 6.317909841403372e-06, "loss": 0.0019, "step": 86830 }, { "epoch": 1.4209277591426, "grad_norm": 0.10143332928419113, "learning_rate": 6.31699154089789e-06, "loss": 0.0013, "step": 86840 }, { "epoch": 1.4210913850936757, "grad_norm": 0.048016954213380814, "learning_rate": 6.316073192654632e-06, "loss": 0.0031, "step": 86850 }, { "epoch": 1.4212550110447517, "grad_norm": 0.09750109910964966, "learning_rate": 6.315154796706881e-06, "loss": 0.0035, "step": 86860 }, { "epoch": 1.4214186369958275, "grad_norm": 0.01112271286547184, "learning_rate": 6.314236353087929e-06, "loss": 0.0019, "step": 86870 }, { "epoch": 1.4215822629469033, "grad_norm": 0.14084471762180328, "learning_rate": 6.313317861831069e-06, "loss": 0.0017, "step": 86880 }, { "epoch": 1.4217458888979793, "grad_norm": 0.1658242791891098, "learning_rate": 6.312399322969593e-06, "loss": 0.0022, "step": 86890 }, { "epoch": 1.421909514849055, "grad_norm": 0.05576367676258087, "learning_rate": 6.311480736536798e-06, "loss": 0.0017, "step": 86900 }, { "epoch": 1.4220731408001308, "grad_norm": 0.1433534026145935, "learning_rate": 6.3105621025659766e-06, "loss": 0.0019, "step": 86910 }, { "epoch": 1.4222367667512068, "grad_norm": 0.03527117148041725, "learning_rate": 6.309643421090429e-06, "loss": 0.0017, "step": 86920 }, { "epoch": 1.4224003927022826, "grad_norm": 0.2578357458114624, "learning_rate": 6.308724692143454e-06, "loss": 0.002, "step": 86930 }, { "epoch": 1.4225640186533584, "grad_norm": 0.009160785004496574, "learning_rate": 6.307805915758356e-06, "loss": 0.0028, "step": 86940 }, { "epoch": 1.4227276446044343, "grad_norm": 0.0328209213912487, "learning_rate": 6.306887091968435e-06, "loss": 0.0017, "step": 86950 }, { "epoch": 1.4228912705555101, "grad_norm": 0.029015813022851944, "learning_rate": 6.305968220806999e-06, "loss": 0.0017, "step": 86960 }, { "epoch": 1.423054896506586, "grad_norm": 0.08222070336341858, "learning_rate": 6.305049302307354e-06, "loss": 0.0013, "step": 86970 }, { "epoch": 1.423218522457662, "grad_norm": 0.09447557479143143, "learning_rate": 6.3041303365028075e-06, "loss": 0.0016, "step": 86980 }, { "epoch": 1.4233821484087377, "grad_norm": 0.18126505613327026, "learning_rate": 6.303211323426671e-06, "loss": 0.0016, "step": 86990 }, { "epoch": 1.4235457743598134, "grad_norm": 0.05435970053076744, "learning_rate": 6.3022922631122575e-06, "loss": 0.0017, "step": 87000 }, { "epoch": 1.4237094003108894, "grad_norm": 0.055926378816366196, "learning_rate": 6.301373155592878e-06, "loss": 0.0023, "step": 87010 }, { "epoch": 1.4238730262619652, "grad_norm": 0.1094655767083168, "learning_rate": 6.30045400090185e-06, "loss": 0.0025, "step": 87020 }, { "epoch": 1.424036652213041, "grad_norm": 0.08583476394414902, "learning_rate": 6.299534799072488e-06, "loss": 0.0024, "step": 87030 }, { "epoch": 1.4242002781641168, "grad_norm": 0.10022076219320297, "learning_rate": 6.298615550138116e-06, "loss": 0.0031, "step": 87040 }, { "epoch": 1.4243639041151925, "grad_norm": 0.08233474940061569, "learning_rate": 6.297696254132051e-06, "loss": 0.002, "step": 87050 }, { "epoch": 1.4245275300662685, "grad_norm": 0.050768155604600906, "learning_rate": 6.296776911087614e-06, "loss": 0.0017, "step": 87060 }, { "epoch": 1.4246911560173443, "grad_norm": 0.10270874202251434, "learning_rate": 6.295857521038132e-06, "loss": 0.0009, "step": 87070 }, { "epoch": 1.42485478196842, "grad_norm": 0.10369263589382172, "learning_rate": 6.294938084016928e-06, "loss": 0.0014, "step": 87080 }, { "epoch": 1.425018407919496, "grad_norm": 0.16410450637340546, "learning_rate": 6.294018600057331e-06, "loss": 0.0028, "step": 87090 }, { "epoch": 1.4251820338705719, "grad_norm": 0.024053268134593964, "learning_rate": 6.293099069192672e-06, "loss": 0.0014, "step": 87100 }, { "epoch": 1.4253456598216476, "grad_norm": 0.1633210927248001, "learning_rate": 6.292179491456279e-06, "loss": 0.0015, "step": 87110 }, { "epoch": 1.4255092857727236, "grad_norm": 0.15724772214889526, "learning_rate": 6.291259866881483e-06, "loss": 0.0027, "step": 87120 }, { "epoch": 1.4256729117237994, "grad_norm": 0.19977770745754242, "learning_rate": 6.290340195501622e-06, "loss": 0.002, "step": 87130 }, { "epoch": 1.4258365376748752, "grad_norm": 0.08609337359666824, "learning_rate": 6.28942047735003e-06, "loss": 0.0021, "step": 87140 }, { "epoch": 1.4260001636259512, "grad_norm": 0.17799293994903564, "learning_rate": 6.288500712460044e-06, "loss": 0.0021, "step": 87150 }, { "epoch": 1.426163789577027, "grad_norm": 0.017902495339512825, "learning_rate": 6.287580900865005e-06, "loss": 0.0008, "step": 87160 }, { "epoch": 1.4263274155281027, "grad_norm": 0.08376562595367432, "learning_rate": 6.286661042598251e-06, "loss": 0.0019, "step": 87170 }, { "epoch": 1.4264910414791787, "grad_norm": 0.016464361920952797, "learning_rate": 6.285741137693129e-06, "loss": 0.0007, "step": 87180 }, { "epoch": 1.4266546674302545, "grad_norm": 0.061246857047080994, "learning_rate": 6.2848211861829785e-06, "loss": 0.0013, "step": 87190 }, { "epoch": 1.4268182933813303, "grad_norm": 0.015438602305948734, "learning_rate": 6.2839011881011516e-06, "loss": 0.0025, "step": 87200 }, { "epoch": 1.4269819193324063, "grad_norm": 0.10207697749137878, "learning_rate": 6.28298114348099e-06, "loss": 0.0021, "step": 87210 }, { "epoch": 1.427145545283482, "grad_norm": 0.13249380886554718, "learning_rate": 6.2820610523558454e-06, "loss": 0.0016, "step": 87220 }, { "epoch": 1.4273091712345578, "grad_norm": 0.1997976154088974, "learning_rate": 6.2811409147590684e-06, "loss": 0.0023, "step": 87230 }, { "epoch": 1.4274727971856336, "grad_norm": 0.35590627789497375, "learning_rate": 6.280220730724013e-06, "loss": 0.0024, "step": 87240 }, { "epoch": 1.4276364231367094, "grad_norm": 0.10247053951025009, "learning_rate": 6.279300500284035e-06, "loss": 0.0016, "step": 87250 }, { "epoch": 1.4278000490877854, "grad_norm": 0.12032989412546158, "learning_rate": 6.278380223472487e-06, "loss": 0.0022, "step": 87260 }, { "epoch": 1.4279636750388611, "grad_norm": 0.04637526720762253, "learning_rate": 6.2774599003227274e-06, "loss": 0.002, "step": 87270 }, { "epoch": 1.428127300989937, "grad_norm": 0.06521841883659363, "learning_rate": 6.276539530868117e-06, "loss": 0.0026, "step": 87280 }, { "epoch": 1.428290926941013, "grad_norm": 0.09272162616252899, "learning_rate": 6.275619115142018e-06, "loss": 0.0013, "step": 87290 }, { "epoch": 1.4284545528920887, "grad_norm": 0.011339899152517319, "learning_rate": 6.274698653177792e-06, "loss": 0.0018, "step": 87300 }, { "epoch": 1.4286181788431644, "grad_norm": 0.03778223320841789, "learning_rate": 6.273778145008802e-06, "loss": 0.0018, "step": 87310 }, { "epoch": 1.4287818047942404, "grad_norm": 0.10297144949436188, "learning_rate": 6.272857590668418e-06, "loss": 0.0013, "step": 87320 }, { "epoch": 1.4289454307453162, "grad_norm": 0.14527471363544464, "learning_rate": 6.271936990190003e-06, "loss": 0.0015, "step": 87330 }, { "epoch": 1.429109056696392, "grad_norm": 0.0076146963983774185, "learning_rate": 6.271016343606931e-06, "loss": 0.001, "step": 87340 }, { "epoch": 1.429272682647468, "grad_norm": 0.052294228225946426, "learning_rate": 6.27009565095257e-06, "loss": 0.0017, "step": 87350 }, { "epoch": 1.4294363085985438, "grad_norm": 0.09691692143678665, "learning_rate": 6.2691749122602954e-06, "loss": 0.0031, "step": 87360 }, { "epoch": 1.4295999345496195, "grad_norm": 0.03427308797836304, "learning_rate": 6.268254127563479e-06, "loss": 0.0021, "step": 87370 }, { "epoch": 1.4297635605006955, "grad_norm": 0.10378725826740265, "learning_rate": 6.267333296895499e-06, "loss": 0.0021, "step": 87380 }, { "epoch": 1.4299271864517713, "grad_norm": 0.032112445682287216, "learning_rate": 6.266412420289734e-06, "loss": 0.0022, "step": 87390 }, { "epoch": 1.430090812402847, "grad_norm": 0.0702488124370575, "learning_rate": 6.265491497779564e-06, "loss": 0.0014, "step": 87400 }, { "epoch": 1.4302544383539229, "grad_norm": 0.04237963259220123, "learning_rate": 6.264570529398366e-06, "loss": 0.0013, "step": 87410 }, { "epoch": 1.4304180643049988, "grad_norm": 0.016835927963256836, "learning_rate": 6.263649515179526e-06, "loss": 0.0012, "step": 87420 }, { "epoch": 1.4305816902560746, "grad_norm": 0.136257141828537, "learning_rate": 6.262728455156428e-06, "loss": 0.0028, "step": 87430 }, { "epoch": 1.4307453162071504, "grad_norm": 0.06842660903930664, "learning_rate": 6.26180734936246e-06, "loss": 0.0013, "step": 87440 }, { "epoch": 1.4309089421582262, "grad_norm": 0.031677670776844025, "learning_rate": 6.2608861978310065e-06, "loss": 0.0029, "step": 87450 }, { "epoch": 1.4310725681093022, "grad_norm": 0.0027826251462101936, "learning_rate": 6.25996500059546e-06, "loss": 0.0017, "step": 87460 }, { "epoch": 1.431236194060378, "grad_norm": 0.252929151058197, "learning_rate": 6.2590437576892095e-06, "loss": 0.0019, "step": 87470 }, { "epoch": 1.4313998200114537, "grad_norm": 0.2734964191913605, "learning_rate": 6.25812246914565e-06, "loss": 0.002, "step": 87480 }, { "epoch": 1.4315634459625297, "grad_norm": 0.10104244202375412, "learning_rate": 6.257201134998174e-06, "loss": 0.0016, "step": 87490 }, { "epoch": 1.4317270719136055, "grad_norm": 0.11319293826818466, "learning_rate": 6.256279755280179e-06, "loss": 0.0016, "step": 87500 }, { "epoch": 1.4318906978646813, "grad_norm": 0.06297991424798965, "learning_rate": 6.255358330025061e-06, "loss": 0.0017, "step": 87510 }, { "epoch": 1.4320543238157573, "grad_norm": 0.07003606110811234, "learning_rate": 6.254436859266222e-06, "loss": 0.0011, "step": 87520 }, { "epoch": 1.432217949766833, "grad_norm": 0.047473471611738205, "learning_rate": 6.253515343037061e-06, "loss": 0.0014, "step": 87530 }, { "epoch": 1.4323815757179088, "grad_norm": 0.003380363341420889, "learning_rate": 6.252593781370981e-06, "loss": 0.0018, "step": 87540 }, { "epoch": 1.4325452016689848, "grad_norm": 0.09378622472286224, "learning_rate": 6.2516721743013894e-06, "loss": 0.0014, "step": 87550 }, { "epoch": 1.4327088276200606, "grad_norm": 0.12010186910629272, "learning_rate": 6.250750521861688e-06, "loss": 0.0014, "step": 87560 }, { "epoch": 1.4328724535711364, "grad_norm": 0.08450304716825485, "learning_rate": 6.249828824085288e-06, "loss": 0.0013, "step": 87570 }, { "epoch": 1.4330360795222123, "grad_norm": 0.08225636929273605, "learning_rate": 6.248907081005595e-06, "loss": 0.0027, "step": 87580 }, { "epoch": 1.4331997054732881, "grad_norm": 0.06786559522151947, "learning_rate": 6.247985292656022e-06, "loss": 0.0016, "step": 87590 }, { "epoch": 1.433363331424364, "grad_norm": 0.011361506767570972, "learning_rate": 6.247063459069982e-06, "loss": 0.0011, "step": 87600 }, { "epoch": 1.4335269573754397, "grad_norm": 0.16560013592243195, "learning_rate": 6.24614158028089e-06, "loss": 0.002, "step": 87610 }, { "epoch": 1.4336905833265157, "grad_norm": 0.16421037912368774, "learning_rate": 6.245219656322159e-06, "loss": 0.0018, "step": 87620 }, { "epoch": 1.4338542092775914, "grad_norm": 0.03722596913576126, "learning_rate": 6.24429768722721e-06, "loss": 0.0012, "step": 87630 }, { "epoch": 1.4340178352286672, "grad_norm": 0.2592485547065735, "learning_rate": 6.243375673029459e-06, "loss": 0.0036, "step": 87640 }, { "epoch": 1.434181461179743, "grad_norm": 0.08613810688257217, "learning_rate": 6.242453613762328e-06, "loss": 0.0008, "step": 87650 }, { "epoch": 1.434345087130819, "grad_norm": 0.21068191528320312, "learning_rate": 6.241531509459241e-06, "loss": 0.0021, "step": 87660 }, { "epoch": 1.4345087130818948, "grad_norm": 0.17119361460208893, "learning_rate": 6.24060936015362e-06, "loss": 0.0016, "step": 87670 }, { "epoch": 1.4346723390329705, "grad_norm": 0.07850255817174911, "learning_rate": 6.239687165878891e-06, "loss": 0.0011, "step": 87680 }, { "epoch": 1.4348359649840465, "grad_norm": 0.08151416480541229, "learning_rate": 6.238764926668482e-06, "loss": 0.0021, "step": 87690 }, { "epoch": 1.4349995909351223, "grad_norm": 0.17128393054008484, "learning_rate": 6.237842642555822e-06, "loss": 0.0022, "step": 87700 }, { "epoch": 1.435163216886198, "grad_norm": 0.16158713400363922, "learning_rate": 6.236920313574342e-06, "loss": 0.002, "step": 87710 }, { "epoch": 1.435326842837274, "grad_norm": 0.01578509248793125, "learning_rate": 6.235997939757473e-06, "loss": 0.0017, "step": 87720 }, { "epoch": 1.4354904687883498, "grad_norm": 0.1902141124010086, "learning_rate": 6.2350755211386486e-06, "loss": 0.0032, "step": 87730 }, { "epoch": 1.4356540947394256, "grad_norm": 0.1205715611577034, "learning_rate": 6.2341530577513045e-06, "loss": 0.0034, "step": 87740 }, { "epoch": 1.4358177206905016, "grad_norm": 0.054978013038635254, "learning_rate": 6.233230549628879e-06, "loss": 0.0015, "step": 87750 }, { "epoch": 1.4359813466415774, "grad_norm": 0.14329639077186584, "learning_rate": 6.232307996804812e-06, "loss": 0.0027, "step": 87760 }, { "epoch": 1.4361449725926532, "grad_norm": 0.005561621394008398, "learning_rate": 6.231385399312539e-06, "loss": 0.0027, "step": 87770 }, { "epoch": 1.4363085985437292, "grad_norm": 0.09101153165102005, "learning_rate": 6.230462757185507e-06, "loss": 0.002, "step": 87780 }, { "epoch": 1.436472224494805, "grad_norm": 0.17430588603019714, "learning_rate": 6.2295400704571565e-06, "loss": 0.0022, "step": 87790 }, { "epoch": 1.4366358504458807, "grad_norm": 0.13493849337100983, "learning_rate": 6.228617339160933e-06, "loss": 0.0018, "step": 87800 }, { "epoch": 1.4367994763969565, "grad_norm": 0.04612865298986435, "learning_rate": 6.227694563330284e-06, "loss": 0.0013, "step": 87810 }, { "epoch": 1.4369631023480323, "grad_norm": 0.0956103727221489, "learning_rate": 6.226771742998658e-06, "loss": 0.0017, "step": 87820 }, { "epoch": 1.4371267282991083, "grad_norm": 0.08610368520021439, "learning_rate": 6.225848878199505e-06, "loss": 0.0011, "step": 87830 }, { "epoch": 1.437290354250184, "grad_norm": 0.07028482109308243, "learning_rate": 6.224925968966277e-06, "loss": 0.0029, "step": 87840 }, { "epoch": 1.4374539802012598, "grad_norm": 0.10654260963201523, "learning_rate": 6.2240030153324245e-06, "loss": 0.0024, "step": 87850 }, { "epoch": 1.4376176061523358, "grad_norm": 0.07870994508266449, "learning_rate": 6.223080017331408e-06, "loss": 0.0018, "step": 87860 }, { "epoch": 1.4377812321034116, "grad_norm": 0.22111137211322784, "learning_rate": 6.222156974996677e-06, "loss": 0.0014, "step": 87870 }, { "epoch": 1.4379448580544874, "grad_norm": 0.026112055405974388, "learning_rate": 6.221233888361694e-06, "loss": 0.0014, "step": 87880 }, { "epoch": 1.4381084840055633, "grad_norm": 0.04195261374115944, "learning_rate": 6.220310757459919e-06, "loss": 0.001, "step": 87890 }, { "epoch": 1.4382721099566391, "grad_norm": 0.010488742962479591, "learning_rate": 6.219387582324811e-06, "loss": 0.0012, "step": 87900 }, { "epoch": 1.438435735907715, "grad_norm": 0.2689409852027893, "learning_rate": 6.218464362989834e-06, "loss": 0.0024, "step": 87910 }, { "epoch": 1.438599361858791, "grad_norm": 0.11243392527103424, "learning_rate": 6.217541099488454e-06, "loss": 0.0016, "step": 87920 }, { "epoch": 1.4387629878098667, "grad_norm": 0.05361480638384819, "learning_rate": 6.2166177918541325e-06, "loss": 0.0014, "step": 87930 }, { "epoch": 1.4389266137609424, "grad_norm": 0.18623264133930206, "learning_rate": 6.215694440120341e-06, "loss": 0.0022, "step": 87940 }, { "epoch": 1.4390902397120184, "grad_norm": 0.14766648411750793, "learning_rate": 6.2147710443205475e-06, "loss": 0.0027, "step": 87950 }, { "epoch": 1.4392538656630942, "grad_norm": 0.15501351654529572, "learning_rate": 6.213847604488225e-06, "loss": 0.0025, "step": 87960 }, { "epoch": 1.43941749161417, "grad_norm": 0.26560544967651367, "learning_rate": 6.212924120656843e-06, "loss": 0.0024, "step": 87970 }, { "epoch": 1.439581117565246, "grad_norm": 0.045336879789829254, "learning_rate": 6.212000592859877e-06, "loss": 0.0008, "step": 87980 }, { "epoch": 1.4397447435163218, "grad_norm": 0.28163769841194153, "learning_rate": 6.211077021130802e-06, "loss": 0.0018, "step": 87990 }, { "epoch": 1.4399083694673975, "grad_norm": 0.08188337087631226, "learning_rate": 6.210153405503097e-06, "loss": 0.0019, "step": 88000 }, { "epoch": 1.4400719954184733, "grad_norm": 0.265301376581192, "learning_rate": 6.209229746010239e-06, "loss": 0.0021, "step": 88010 }, { "epoch": 1.440235621369549, "grad_norm": 0.016715556383132935, "learning_rate": 6.208306042685709e-06, "loss": 0.002, "step": 88020 }, { "epoch": 1.440399247320625, "grad_norm": 0.23580516874790192, "learning_rate": 6.207382295562988e-06, "loss": 0.0019, "step": 88030 }, { "epoch": 1.4405628732717008, "grad_norm": 0.6500301957130432, "learning_rate": 6.206458504675562e-06, "loss": 0.0025, "step": 88040 }, { "epoch": 1.4407264992227766, "grad_norm": 0.16514134407043457, "learning_rate": 6.205534670056914e-06, "loss": 0.0015, "step": 88050 }, { "epoch": 1.4408901251738526, "grad_norm": 0.027558708563447, "learning_rate": 6.204610791740533e-06, "loss": 0.0021, "step": 88060 }, { "epoch": 1.4410537511249284, "grad_norm": 0.07553763687610626, "learning_rate": 6.203686869759905e-06, "loss": 0.0032, "step": 88070 }, { "epoch": 1.4412173770760042, "grad_norm": 0.015969766303896904, "learning_rate": 6.202762904148521e-06, "loss": 0.002, "step": 88080 }, { "epoch": 1.4413810030270802, "grad_norm": 0.16341017186641693, "learning_rate": 6.201838894939872e-06, "loss": 0.0018, "step": 88090 }, { "epoch": 1.441544628978156, "grad_norm": 0.05430678650736809, "learning_rate": 6.200914842167451e-06, "loss": 0.0013, "step": 88100 }, { "epoch": 1.4417082549292317, "grad_norm": 0.044062547385692596, "learning_rate": 6.199990745864757e-06, "loss": 0.0024, "step": 88110 }, { "epoch": 1.4418718808803077, "grad_norm": 0.20913057029247284, "learning_rate": 6.199066606065279e-06, "loss": 0.0023, "step": 88120 }, { "epoch": 1.4420355068313835, "grad_norm": 0.0816015675663948, "learning_rate": 6.19814242280252e-06, "loss": 0.0023, "step": 88130 }, { "epoch": 1.4421991327824593, "grad_norm": 0.10608971863985062, "learning_rate": 6.197218196109976e-06, "loss": 0.002, "step": 88140 }, { "epoch": 1.4423627587335353, "grad_norm": 0.05991344898939133, "learning_rate": 6.196293926021152e-06, "loss": 0.0025, "step": 88150 }, { "epoch": 1.442526384684611, "grad_norm": 0.1403913050889969, "learning_rate": 6.195369612569547e-06, "loss": 0.0017, "step": 88160 }, { "epoch": 1.4426900106356868, "grad_norm": 0.15912337601184845, "learning_rate": 6.194445255788667e-06, "loss": 0.0016, "step": 88170 }, { "epoch": 1.4428536365867626, "grad_norm": 0.1175880879163742, "learning_rate": 6.193520855712018e-06, "loss": 0.005, "step": 88180 }, { "epoch": 1.4430172625378386, "grad_norm": 0.2903042733669281, "learning_rate": 6.192596412373105e-06, "loss": 0.0033, "step": 88190 }, { "epoch": 1.4431808884889143, "grad_norm": 0.046630214899778366, "learning_rate": 6.1916719258054395e-06, "loss": 0.0014, "step": 88200 }, { "epoch": 1.4433445144399901, "grad_norm": 0.03290325403213501, "learning_rate": 6.1907473960425315e-06, "loss": 0.001, "step": 88210 }, { "epoch": 1.443508140391066, "grad_norm": 0.1491260826587677, "learning_rate": 6.18982282311789e-06, "loss": 0.0023, "step": 88220 }, { "epoch": 1.443671766342142, "grad_norm": 0.27651795744895935, "learning_rate": 6.188898207065032e-06, "loss": 0.0022, "step": 88230 }, { "epoch": 1.4438353922932177, "grad_norm": 0.1683707982301712, "learning_rate": 6.187973547917471e-06, "loss": 0.0037, "step": 88240 }, { "epoch": 1.4439990182442934, "grad_norm": 0.07415008544921875, "learning_rate": 6.1870488457087254e-06, "loss": 0.0026, "step": 88250 }, { "epoch": 1.4441626441953694, "grad_norm": 0.19869717955589294, "learning_rate": 6.186124100472311e-06, "loss": 0.0015, "step": 88260 }, { "epoch": 1.4443262701464452, "grad_norm": 0.1601708084344864, "learning_rate": 6.18519931224175e-06, "loss": 0.0022, "step": 88270 }, { "epoch": 1.444489896097521, "grad_norm": 0.22000622749328613, "learning_rate": 6.1842744810505606e-06, "loss": 0.0016, "step": 88280 }, { "epoch": 1.444653522048597, "grad_norm": 0.12108389288187027, "learning_rate": 6.183349606932269e-06, "loss": 0.003, "step": 88290 }, { "epoch": 1.4448171479996728, "grad_norm": 0.21934489905834198, "learning_rate": 6.1824246899203966e-06, "loss": 0.0036, "step": 88300 }, { "epoch": 1.4449807739507485, "grad_norm": 0.07911412417888641, "learning_rate": 6.181499730048474e-06, "loss": 0.0009, "step": 88310 }, { "epoch": 1.4451443999018245, "grad_norm": 0.13370271027088165, "learning_rate": 6.180574727350024e-06, "loss": 0.0026, "step": 88320 }, { "epoch": 1.4453080258529003, "grad_norm": 0.012059704400599003, "learning_rate": 6.179649681858577e-06, "loss": 0.0011, "step": 88330 }, { "epoch": 1.445471651803976, "grad_norm": 0.27589085698127747, "learning_rate": 6.178724593607664e-06, "loss": 0.0014, "step": 88340 }, { "epoch": 1.445635277755052, "grad_norm": 0.22645272314548492, "learning_rate": 6.177799462630819e-06, "loss": 0.0022, "step": 88350 }, { "epoch": 1.4457989037061278, "grad_norm": 0.010753162205219269, "learning_rate": 6.176874288961574e-06, "loss": 0.0019, "step": 88360 }, { "epoch": 1.4459625296572036, "grad_norm": 0.07627929002046585, "learning_rate": 6.1759490726334635e-06, "loss": 0.0019, "step": 88370 }, { "epoch": 1.4461261556082794, "grad_norm": 0.2940148413181305, "learning_rate": 6.175023813680027e-06, "loss": 0.0029, "step": 88380 }, { "epoch": 1.4462897815593554, "grad_norm": 0.10072994232177734, "learning_rate": 6.174098512134801e-06, "loss": 0.0023, "step": 88390 }, { "epoch": 1.4464534075104312, "grad_norm": 0.06951545923948288, "learning_rate": 6.173173168031325e-06, "loss": 0.0018, "step": 88400 }, { "epoch": 1.446617033461507, "grad_norm": 0.050156425684690475, "learning_rate": 6.172247781403142e-06, "loss": 0.0024, "step": 88410 }, { "epoch": 1.4467806594125827, "grad_norm": 0.057347770780324936, "learning_rate": 6.171322352283794e-06, "loss": 0.002, "step": 88420 }, { "epoch": 1.4469442853636587, "grad_norm": 0.052968766540288925, "learning_rate": 6.170396880706826e-06, "loss": 0.002, "step": 88430 }, { "epoch": 1.4471079113147345, "grad_norm": 0.19407209753990173, "learning_rate": 6.169471366705782e-06, "loss": 0.0031, "step": 88440 }, { "epoch": 1.4472715372658103, "grad_norm": 0.19769182801246643, "learning_rate": 6.168545810314214e-06, "loss": 0.0016, "step": 88450 }, { "epoch": 1.4474351632168863, "grad_norm": 0.07108977437019348, "learning_rate": 6.16762021156567e-06, "loss": 0.0016, "step": 88460 }, { "epoch": 1.447598789167962, "grad_norm": 0.10954627394676208, "learning_rate": 6.1666945704936976e-06, "loss": 0.0021, "step": 88470 }, { "epoch": 1.4477624151190378, "grad_norm": 0.02270127460360527, "learning_rate": 6.165768887131852e-06, "loss": 0.0027, "step": 88480 }, { "epoch": 1.4479260410701138, "grad_norm": 0.10544577240943909, "learning_rate": 6.1648431615136865e-06, "loss": 0.0022, "step": 88490 }, { "epoch": 1.4480896670211896, "grad_norm": 0.10242760926485062, "learning_rate": 6.1639173936727545e-06, "loss": 0.0019, "step": 88500 }, { "epoch": 1.4482532929722653, "grad_norm": 0.09470520913600922, "learning_rate": 6.162991583642615e-06, "loss": 0.002, "step": 88510 }, { "epoch": 1.4484169189233413, "grad_norm": 0.17790016531944275, "learning_rate": 6.162065731456825e-06, "loss": 0.0024, "step": 88520 }, { "epoch": 1.4485805448744171, "grad_norm": 0.07417348772287369, "learning_rate": 6.161139837148947e-06, "loss": 0.0031, "step": 88530 }, { "epoch": 1.448744170825493, "grad_norm": 0.03617400676012039, "learning_rate": 6.1602139007525396e-06, "loss": 0.0015, "step": 88540 }, { "epoch": 1.448907796776569, "grad_norm": 0.11755359917879105, "learning_rate": 6.159287922301167e-06, "loss": 0.0016, "step": 88550 }, { "epoch": 1.4490714227276447, "grad_norm": 0.09294844418764114, "learning_rate": 6.158361901828395e-06, "loss": 0.0014, "step": 88560 }, { "epoch": 1.4492350486787204, "grad_norm": 0.10152885317802429, "learning_rate": 6.157435839367786e-06, "loss": 0.0022, "step": 88570 }, { "epoch": 1.4493986746297962, "grad_norm": 0.14733688533306122, "learning_rate": 6.156509734952909e-06, "loss": 0.0019, "step": 88580 }, { "epoch": 1.4495623005808722, "grad_norm": 0.06413263082504272, "learning_rate": 6.1555835886173356e-06, "loss": 0.0014, "step": 88590 }, { "epoch": 1.449725926531948, "grad_norm": 0.004010655917227268, "learning_rate": 6.154657400394633e-06, "loss": 0.0017, "step": 88600 }, { "epoch": 1.4498895524830238, "grad_norm": 0.04507434740662575, "learning_rate": 6.153731170318376e-06, "loss": 0.0027, "step": 88610 }, { "epoch": 1.4500531784340995, "grad_norm": 0.06232910230755806, "learning_rate": 6.152804898422138e-06, "loss": 0.0011, "step": 88620 }, { "epoch": 1.4502168043851755, "grad_norm": 0.18974187970161438, "learning_rate": 6.151878584739492e-06, "loss": 0.0056, "step": 88630 }, { "epoch": 1.4503804303362513, "grad_norm": 0.32222968339920044, "learning_rate": 6.150952229304015e-06, "loss": 0.0024, "step": 88640 }, { "epoch": 1.450544056287327, "grad_norm": 0.20506054162979126, "learning_rate": 6.150025832149285e-06, "loss": 0.0022, "step": 88650 }, { "epoch": 1.450707682238403, "grad_norm": 0.1331210881471634, "learning_rate": 6.149099393308885e-06, "loss": 0.0019, "step": 88660 }, { "epoch": 1.4508713081894788, "grad_norm": 0.026927372440695763, "learning_rate": 6.148172912816392e-06, "loss": 0.0019, "step": 88670 }, { "epoch": 1.4510349341405546, "grad_norm": 0.04572971910238266, "learning_rate": 6.147246390705391e-06, "loss": 0.0021, "step": 88680 }, { "epoch": 1.4511985600916306, "grad_norm": 0.06819307804107666, "learning_rate": 6.1463198270094655e-06, "loss": 0.0015, "step": 88690 }, { "epoch": 1.4513621860427064, "grad_norm": 0.09001713246107101, "learning_rate": 6.145393221762203e-06, "loss": 0.0021, "step": 88700 }, { "epoch": 1.4515258119937822, "grad_norm": 0.05180295556783676, "learning_rate": 6.1444665749971885e-06, "loss": 0.0011, "step": 88710 }, { "epoch": 1.4516894379448582, "grad_norm": 0.14307278394699097, "learning_rate": 6.143539886748009e-06, "loss": 0.0016, "step": 88720 }, { "epoch": 1.451853063895934, "grad_norm": 0.17013002932071686, "learning_rate": 6.142613157048259e-06, "loss": 0.0027, "step": 88730 }, { "epoch": 1.4520166898470097, "grad_norm": 0.09118823707103729, "learning_rate": 6.141686385931528e-06, "loss": 0.0023, "step": 88740 }, { "epoch": 1.4521803157980857, "grad_norm": 0.18551497161388397, "learning_rate": 6.1407595734314095e-06, "loss": 0.0019, "step": 88750 }, { "epoch": 1.4523439417491615, "grad_norm": 0.018743224442005157, "learning_rate": 6.139832719581499e-06, "loss": 0.0025, "step": 88760 }, { "epoch": 1.4525075677002373, "grad_norm": 0.08347221463918686, "learning_rate": 6.138905824415391e-06, "loss": 0.0014, "step": 88770 }, { "epoch": 1.452671193651313, "grad_norm": 0.21199621260166168, "learning_rate": 6.1379788879666845e-06, "loss": 0.0014, "step": 88780 }, { "epoch": 1.4528348196023888, "grad_norm": 0.09008124470710754, "learning_rate": 6.137051910268978e-06, "loss": 0.0014, "step": 88790 }, { "epoch": 1.4529984455534648, "grad_norm": 0.16531740128993988, "learning_rate": 6.136124891355871e-06, "loss": 0.0023, "step": 88800 }, { "epoch": 1.4531620715045406, "grad_norm": 0.13263924419879913, "learning_rate": 6.1351978312609704e-06, "loss": 0.0009, "step": 88810 }, { "epoch": 1.4533256974556163, "grad_norm": 0.10232087224721909, "learning_rate": 6.134270730017876e-06, "loss": 0.0021, "step": 88820 }, { "epoch": 1.4534893234066923, "grad_norm": 0.12131325155496597, "learning_rate": 6.133343587660194e-06, "loss": 0.0021, "step": 88830 }, { "epoch": 1.4536529493577681, "grad_norm": 0.05863536521792412, "learning_rate": 6.1324164042215285e-06, "loss": 0.0028, "step": 88840 }, { "epoch": 1.453816575308844, "grad_norm": 0.059465281665325165, "learning_rate": 6.131489179735492e-06, "loss": 0.0028, "step": 88850 }, { "epoch": 1.45398020125992, "grad_norm": 0.08583907037973404, "learning_rate": 6.130561914235692e-06, "loss": 0.0023, "step": 88860 }, { "epoch": 1.4541438272109957, "grad_norm": 0.12189940363168716, "learning_rate": 6.129634607755739e-06, "loss": 0.0018, "step": 88870 }, { "epoch": 1.4543074531620714, "grad_norm": 0.1641402393579483, "learning_rate": 6.128707260329248e-06, "loss": 0.0021, "step": 88880 }, { "epoch": 1.4544710791131474, "grad_norm": 0.15608078241348267, "learning_rate": 6.1277798719898295e-06, "loss": 0.0026, "step": 88890 }, { "epoch": 1.4546347050642232, "grad_norm": 0.10500966757535934, "learning_rate": 6.126852442771103e-06, "loss": 0.002, "step": 88900 }, { "epoch": 1.454798331015299, "grad_norm": 0.09700223803520203, "learning_rate": 6.1259249727066834e-06, "loss": 0.0021, "step": 88910 }, { "epoch": 1.454961956966375, "grad_norm": 0.18499355018138885, "learning_rate": 6.12499746183019e-06, "loss": 0.0018, "step": 88920 }, { "epoch": 1.4551255829174508, "grad_norm": 0.17238733172416687, "learning_rate": 6.124069910175241e-06, "loss": 0.0022, "step": 88930 }, { "epoch": 1.4552892088685265, "grad_norm": 0.02622097171843052, "learning_rate": 6.12314231777546e-06, "loss": 0.0022, "step": 88940 }, { "epoch": 1.4554528348196025, "grad_norm": 0.1282316893339157, "learning_rate": 6.122214684664469e-06, "loss": 0.0023, "step": 88950 }, { "epoch": 1.4556164607706783, "grad_norm": 0.144830122590065, "learning_rate": 6.121287010875895e-06, "loss": 0.0018, "step": 88960 }, { "epoch": 1.455780086721754, "grad_norm": 0.0503481850028038, "learning_rate": 6.12035929644336e-06, "loss": 0.0016, "step": 88970 }, { "epoch": 1.4559437126728298, "grad_norm": 0.12261451780796051, "learning_rate": 6.1194315414004935e-06, "loss": 0.0025, "step": 88980 }, { "epoch": 1.4561073386239056, "grad_norm": 0.06970880180597305, "learning_rate": 6.118503745780924e-06, "loss": 0.0023, "step": 88990 }, { "epoch": 1.4562709645749816, "grad_norm": 0.140335813164711, "learning_rate": 6.117575909618282e-06, "loss": 0.0017, "step": 89000 }, { "epoch": 1.4564345905260574, "grad_norm": 0.09891358017921448, "learning_rate": 6.1166480329462e-06, "loss": 0.0011, "step": 89010 }, { "epoch": 1.4565982164771332, "grad_norm": 0.06568451970815659, "learning_rate": 6.115720115798309e-06, "loss": 0.0018, "step": 89020 }, { "epoch": 1.4567618424282092, "grad_norm": 0.03837354853749275, "learning_rate": 6.114792158208247e-06, "loss": 0.0009, "step": 89030 }, { "epoch": 1.456925468379285, "grad_norm": 0.06534527242183685, "learning_rate": 6.113864160209647e-06, "loss": 0.0029, "step": 89040 }, { "epoch": 1.4570890943303607, "grad_norm": 0.16001906991004944, "learning_rate": 6.112936121836149e-06, "loss": 0.0016, "step": 89050 }, { "epoch": 1.4572527202814367, "grad_norm": 0.1633518636226654, "learning_rate": 6.112008043121393e-06, "loss": 0.0071, "step": 89060 }, { "epoch": 1.4574163462325125, "grad_norm": 0.09686394035816193, "learning_rate": 6.111079924099017e-06, "loss": 0.0015, "step": 89070 }, { "epoch": 1.4575799721835883, "grad_norm": 0.08456028252840042, "learning_rate": 6.110151764802663e-06, "loss": 0.0023, "step": 89080 }, { "epoch": 1.4577435981346643, "grad_norm": 0.12298714369535446, "learning_rate": 6.109223565265976e-06, "loss": 0.0025, "step": 89090 }, { "epoch": 1.45790722408574, "grad_norm": 0.07047034054994583, "learning_rate": 6.108295325522602e-06, "loss": 0.0019, "step": 89100 }, { "epoch": 1.4580708500368158, "grad_norm": 0.34442999958992004, "learning_rate": 6.107367045606186e-06, "loss": 0.0024, "step": 89110 }, { "epoch": 1.4582344759878918, "grad_norm": 0.09606515616178513, "learning_rate": 6.106438725550375e-06, "loss": 0.002, "step": 89120 }, { "epoch": 1.4583981019389676, "grad_norm": 0.08594342321157455, "learning_rate": 6.105510365388821e-06, "loss": 0.0015, "step": 89130 }, { "epoch": 1.4585617278900433, "grad_norm": 0.11370536684989929, "learning_rate": 6.104581965155172e-06, "loss": 0.0012, "step": 89140 }, { "epoch": 1.4587253538411191, "grad_norm": 0.04058707505464554, "learning_rate": 6.103653524883082e-06, "loss": 0.0014, "step": 89150 }, { "epoch": 1.4588889797921951, "grad_norm": 0.29345694184303284, "learning_rate": 6.1027250446062035e-06, "loss": 0.0029, "step": 89160 }, { "epoch": 1.459052605743271, "grad_norm": 0.1731075793504715, "learning_rate": 6.1017965243581936e-06, "loss": 0.0036, "step": 89170 }, { "epoch": 1.4592162316943467, "grad_norm": 0.10479287058115005, "learning_rate": 6.100867964172706e-06, "loss": 0.0017, "step": 89180 }, { "epoch": 1.4593798576454224, "grad_norm": 0.1564171463251114, "learning_rate": 6.099939364083403e-06, "loss": 0.003, "step": 89190 }, { "epoch": 1.4595434835964984, "grad_norm": 0.13803674280643463, "learning_rate": 6.09901072412394e-06, "loss": 0.0019, "step": 89200 }, { "epoch": 1.4597071095475742, "grad_norm": 0.07581519335508347, "learning_rate": 6.098082044327979e-06, "loss": 0.0014, "step": 89210 }, { "epoch": 1.45987073549865, "grad_norm": 0.048461902886629105, "learning_rate": 6.097153324729186e-06, "loss": 0.0029, "step": 89220 }, { "epoch": 1.460034361449726, "grad_norm": 0.1506853997707367, "learning_rate": 6.096224565361219e-06, "loss": 0.0014, "step": 89230 }, { "epoch": 1.4601979874008018, "grad_norm": 0.2848055064678192, "learning_rate": 6.095295766257749e-06, "loss": 0.0011, "step": 89240 }, { "epoch": 1.4603616133518775, "grad_norm": 0.26289716362953186, "learning_rate": 6.094366927452437e-06, "loss": 0.0023, "step": 89250 }, { "epoch": 1.4605252393029535, "grad_norm": 0.08146172761917114, "learning_rate": 6.0934380489789566e-06, "loss": 0.0017, "step": 89260 }, { "epoch": 1.4606888652540293, "grad_norm": 0.03863754868507385, "learning_rate": 6.092509130870974e-06, "loss": 0.0028, "step": 89270 }, { "epoch": 1.460852491205105, "grad_norm": 0.1781115084886551, "learning_rate": 6.091580173162161e-06, "loss": 0.0016, "step": 89280 }, { "epoch": 1.461016117156181, "grad_norm": 0.3425150513648987, "learning_rate": 6.090651175886188e-06, "loss": 0.0028, "step": 89290 }, { "epoch": 1.4611797431072568, "grad_norm": 0.336593359708786, "learning_rate": 6.089722139076735e-06, "loss": 0.0014, "step": 89300 }, { "epoch": 1.4613433690583326, "grad_norm": 0.1394587755203247, "learning_rate": 6.088793062767473e-06, "loss": 0.0024, "step": 89310 }, { "epoch": 1.4615069950094086, "grad_norm": 0.0684904158115387, "learning_rate": 6.087863946992077e-06, "loss": 0.0018, "step": 89320 }, { "epoch": 1.4616706209604844, "grad_norm": 0.08376423269510269, "learning_rate": 6.0869347917842295e-06, "loss": 0.001, "step": 89330 }, { "epoch": 1.4618342469115602, "grad_norm": 0.10150188207626343, "learning_rate": 6.086005597177608e-06, "loss": 0.0013, "step": 89340 }, { "epoch": 1.461997872862636, "grad_norm": 0.1953052282333374, "learning_rate": 6.085076363205893e-06, "loss": 0.0024, "step": 89350 }, { "epoch": 1.462161498813712, "grad_norm": 0.07185788452625275, "learning_rate": 6.084147089902768e-06, "loss": 0.0029, "step": 89360 }, { "epoch": 1.4623251247647877, "grad_norm": 0.07225426286458969, "learning_rate": 6.0832177773019165e-06, "loss": 0.0007, "step": 89370 }, { "epoch": 1.4624887507158635, "grad_norm": 0.12425591051578522, "learning_rate": 6.082288425437024e-06, "loss": 0.0016, "step": 89380 }, { "epoch": 1.4626523766669393, "grad_norm": 0.05503705143928528, "learning_rate": 6.081359034341777e-06, "loss": 0.0036, "step": 89390 }, { "epoch": 1.4628160026180153, "grad_norm": 0.19168508052825928, "learning_rate": 6.0804296040498645e-06, "loss": 0.0017, "step": 89400 }, { "epoch": 1.462979628569091, "grad_norm": 0.5246029496192932, "learning_rate": 6.079500134594977e-06, "loss": 0.0024, "step": 89410 }, { "epoch": 1.4631432545201668, "grad_norm": 0.09161466360092163, "learning_rate": 6.078570626010803e-06, "loss": 0.0021, "step": 89420 }, { "epoch": 1.4633068804712428, "grad_norm": 0.10353600978851318, "learning_rate": 6.077641078331033e-06, "loss": 0.0024, "step": 89430 }, { "epoch": 1.4634705064223186, "grad_norm": 0.031692489981651306, "learning_rate": 6.076711491589367e-06, "loss": 0.0018, "step": 89440 }, { "epoch": 1.4636341323733943, "grad_norm": 0.004756220616400242, "learning_rate": 6.075781865819498e-06, "loss": 0.003, "step": 89450 }, { "epoch": 1.4637977583244703, "grad_norm": 0.20435446500778198, "learning_rate": 6.074852201055121e-06, "loss": 0.0021, "step": 89460 }, { "epoch": 1.4639613842755461, "grad_norm": 0.3000190556049347, "learning_rate": 6.073922497329936e-06, "loss": 0.003, "step": 89470 }, { "epoch": 1.464125010226622, "grad_norm": 0.011369393207132816, "learning_rate": 6.07299275467764e-06, "loss": 0.003, "step": 89480 }, { "epoch": 1.4642886361776979, "grad_norm": 0.12313947081565857, "learning_rate": 6.072062973131936e-06, "loss": 0.0027, "step": 89490 }, { "epoch": 1.4644522621287737, "grad_norm": 0.07110333442687988, "learning_rate": 6.071133152726526e-06, "loss": 0.0009, "step": 89500 }, { "epoch": 1.4646158880798494, "grad_norm": 0.13085950911045074, "learning_rate": 6.0702032934951135e-06, "loss": 0.0014, "step": 89510 }, { "epoch": 1.4647795140309254, "grad_norm": 0.04204509034752846, "learning_rate": 6.069273395471403e-06, "loss": 0.0011, "step": 89520 }, { "epoch": 1.4649431399820012, "grad_norm": 0.04049510136246681, "learning_rate": 6.0683434586891035e-06, "loss": 0.0012, "step": 89530 }, { "epoch": 1.465106765933077, "grad_norm": 0.23977002501487732, "learning_rate": 6.067413483181919e-06, "loss": 0.0016, "step": 89540 }, { "epoch": 1.4652703918841528, "grad_norm": 0.11275684833526611, "learning_rate": 6.066483468983564e-06, "loss": 0.0011, "step": 89550 }, { "epoch": 1.4654340178352285, "grad_norm": 0.1879366785287857, "learning_rate": 6.065553416127745e-06, "loss": 0.0028, "step": 89560 }, { "epoch": 1.4655976437863045, "grad_norm": 0.2041204571723938, "learning_rate": 6.064623324648176e-06, "loss": 0.0014, "step": 89570 }, { "epoch": 1.4657612697373803, "grad_norm": 0.2825053632259369, "learning_rate": 6.063693194578569e-06, "loss": 0.0013, "step": 89580 }, { "epoch": 1.465924895688456, "grad_norm": 0.06186201423406601, "learning_rate": 6.062763025952642e-06, "loss": 0.0015, "step": 89590 }, { "epoch": 1.466088521639532, "grad_norm": 0.025388730689883232, "learning_rate": 6.061832818804109e-06, "loss": 0.0012, "step": 89600 }, { "epoch": 1.4662521475906078, "grad_norm": 0.03444620221853256, "learning_rate": 6.060902573166688e-06, "loss": 0.0022, "step": 89610 }, { "epoch": 1.4664157735416836, "grad_norm": 0.14855274558067322, "learning_rate": 6.059972289074099e-06, "loss": 0.0018, "step": 89620 }, { "epoch": 1.4665793994927596, "grad_norm": 0.0637306272983551, "learning_rate": 6.059041966560062e-06, "loss": 0.0024, "step": 89630 }, { "epoch": 1.4667430254438354, "grad_norm": 0.0553729347884655, "learning_rate": 6.058111605658299e-06, "loss": 0.0011, "step": 89640 }, { "epoch": 1.4669066513949112, "grad_norm": 0.04310569167137146, "learning_rate": 6.057181206402531e-06, "loss": 0.0008, "step": 89650 }, { "epoch": 1.4670702773459872, "grad_norm": 0.15886901319026947, "learning_rate": 6.056250768826488e-06, "loss": 0.0017, "step": 89660 }, { "epoch": 1.467233903297063, "grad_norm": 0.035845134407281876, "learning_rate": 6.0553202929638945e-06, "loss": 0.0018, "step": 89670 }, { "epoch": 1.4673975292481387, "grad_norm": 0.1825220137834549, "learning_rate": 6.054389778848474e-06, "loss": 0.0034, "step": 89680 }, { "epoch": 1.4675611551992147, "grad_norm": 0.09203601628541946, "learning_rate": 6.053459226513959e-06, "loss": 0.0018, "step": 89690 }, { "epoch": 1.4677247811502905, "grad_norm": 0.04955407604575157, "learning_rate": 6.0525286359940794e-06, "loss": 0.0019, "step": 89700 }, { "epoch": 1.4678884071013663, "grad_norm": 0.08668463677167892, "learning_rate": 6.051598007322565e-06, "loss": 0.0026, "step": 89710 }, { "epoch": 1.4680520330524423, "grad_norm": 0.10305390506982803, "learning_rate": 6.050667340533153e-06, "loss": 0.0018, "step": 89720 }, { "epoch": 1.468215659003518, "grad_norm": 0.07880593836307526, "learning_rate": 6.049736635659573e-06, "loss": 0.0016, "step": 89730 }, { "epoch": 1.4683792849545938, "grad_norm": 0.2380165159702301, "learning_rate": 6.048805892735563e-06, "loss": 0.0019, "step": 89740 }, { "epoch": 1.4685429109056696, "grad_norm": 0.22985823452472687, "learning_rate": 6.047875111794861e-06, "loss": 0.0019, "step": 89750 }, { "epoch": 1.4687065368567453, "grad_norm": 0.08305205404758453, "learning_rate": 6.046944292871204e-06, "loss": 0.0016, "step": 89760 }, { "epoch": 1.4688701628078213, "grad_norm": 0.12747210264205933, "learning_rate": 6.046013435998335e-06, "loss": 0.0022, "step": 89770 }, { "epoch": 1.4690337887588971, "grad_norm": 0.2798585295677185, "learning_rate": 6.04508254120999e-06, "loss": 0.0029, "step": 89780 }, { "epoch": 1.469197414709973, "grad_norm": 0.11919490993022919, "learning_rate": 6.044151608539914e-06, "loss": 0.0025, "step": 89790 }, { "epoch": 1.469361040661049, "grad_norm": 0.1528950184583664, "learning_rate": 6.043220638021854e-06, "loss": 0.0071, "step": 89800 }, { "epoch": 1.4695246666121247, "grad_norm": 0.2320098876953125, "learning_rate": 6.042289629689552e-06, "loss": 0.0018, "step": 89810 }, { "epoch": 1.4696882925632004, "grad_norm": 0.133858785033226, "learning_rate": 6.041358583576757e-06, "loss": 0.002, "step": 89820 }, { "epoch": 1.4698519185142764, "grad_norm": 0.16765472292900085, "learning_rate": 6.040427499717216e-06, "loss": 0.001, "step": 89830 }, { "epoch": 1.4700155444653522, "grad_norm": 0.08123244345188141, "learning_rate": 6.039496378144678e-06, "loss": 0.0011, "step": 89840 }, { "epoch": 1.470179170416428, "grad_norm": 0.05696301907300949, "learning_rate": 6.0385652188928955e-06, "loss": 0.0013, "step": 89850 }, { "epoch": 1.470342796367504, "grad_norm": 0.09900623559951782, "learning_rate": 6.0376340219956195e-06, "loss": 0.0029, "step": 89860 }, { "epoch": 1.4705064223185798, "grad_norm": 0.08991098403930664, "learning_rate": 6.036702787486604e-06, "loss": 0.0017, "step": 89870 }, { "epoch": 1.4706700482696555, "grad_norm": 0.28116747736930847, "learning_rate": 6.035771515399605e-06, "loss": 0.0024, "step": 89880 }, { "epoch": 1.4708336742207315, "grad_norm": 0.07289552688598633, "learning_rate": 6.034840205768376e-06, "loss": 0.0018, "step": 89890 }, { "epoch": 1.4709973001718073, "grad_norm": 0.009397052228450775, "learning_rate": 6.033908858626678e-06, "loss": 0.0024, "step": 89900 }, { "epoch": 1.471160926122883, "grad_norm": 0.18024064600467682, "learning_rate": 6.0329774740082695e-06, "loss": 0.0018, "step": 89910 }, { "epoch": 1.4713245520739588, "grad_norm": 0.15219052135944366, "learning_rate": 6.03204605194691e-06, "loss": 0.0019, "step": 89920 }, { "epoch": 1.4714881780250348, "grad_norm": 0.04910378158092499, "learning_rate": 6.0311145924763605e-06, "loss": 0.0014, "step": 89930 }, { "epoch": 1.4716518039761106, "grad_norm": 0.08571364730596542, "learning_rate": 6.030183095630385e-06, "loss": 0.0033, "step": 89940 }, { "epoch": 1.4718154299271864, "grad_norm": 0.1076994240283966, "learning_rate": 6.029251561442749e-06, "loss": 0.0026, "step": 89950 }, { "epoch": 1.4719790558782622, "grad_norm": 0.16011343896389008, "learning_rate": 6.028319989947217e-06, "loss": 0.0012, "step": 89960 }, { "epoch": 1.4721426818293382, "grad_norm": 0.20377467572689056, "learning_rate": 6.0273883811775595e-06, "loss": 0.0025, "step": 89970 }, { "epoch": 1.472306307780414, "grad_norm": 0.14934121072292328, "learning_rate": 6.026456735167541e-06, "loss": 0.0016, "step": 89980 }, { "epoch": 1.4724699337314897, "grad_norm": 0.3843739628791809, "learning_rate": 6.025525051950931e-06, "loss": 0.0017, "step": 89990 }, { "epoch": 1.4726335596825657, "grad_norm": 0.10053126513957977, "learning_rate": 6.0245933315615044e-06, "loss": 0.0013, "step": 90000 }, { "epoch": 1.4727971856336415, "grad_norm": 0.06712966412305832, "learning_rate": 6.02366157403303e-06, "loss": 0.0029, "step": 90010 }, { "epoch": 1.4729608115847173, "grad_norm": 0.15196646749973297, "learning_rate": 6.022729779399288e-06, "loss": 0.0022, "step": 90020 }, { "epoch": 1.4731244375357933, "grad_norm": 0.14024144411087036, "learning_rate": 6.021797947694047e-06, "loss": 0.003, "step": 90030 }, { "epoch": 1.473288063486869, "grad_norm": 0.0713011622428894, "learning_rate": 6.020866078951085e-06, "loss": 0.0013, "step": 90040 }, { "epoch": 1.4734516894379448, "grad_norm": 0.013995494693517685, "learning_rate": 6.019934173204182e-06, "loss": 0.0013, "step": 90050 }, { "epoch": 1.4736153153890208, "grad_norm": 0.05694727227091789, "learning_rate": 6.019002230487119e-06, "loss": 0.0025, "step": 90060 }, { "epoch": 1.4737789413400966, "grad_norm": 0.10397981852293015, "learning_rate": 6.018070250833672e-06, "loss": 0.004, "step": 90070 }, { "epoch": 1.4739425672911723, "grad_norm": 0.06639304757118225, "learning_rate": 6.017138234277625e-06, "loss": 0.0016, "step": 90080 }, { "epoch": 1.4741061932422483, "grad_norm": 0.19975242018699646, "learning_rate": 6.016206180852762e-06, "loss": 0.003, "step": 90090 }, { "epoch": 1.4742698191933241, "grad_norm": 0.17541468143463135, "learning_rate": 6.0152740905928685e-06, "loss": 0.0028, "step": 90100 }, { "epoch": 1.4744334451444, "grad_norm": 0.02349376678466797, "learning_rate": 6.014341963531728e-06, "loss": 0.0009, "step": 90110 }, { "epoch": 1.4745970710954757, "grad_norm": 0.14489562809467316, "learning_rate": 6.01340979970313e-06, "loss": 0.0025, "step": 90120 }, { "epoch": 1.4747606970465517, "grad_norm": 0.11375927180051804, "learning_rate": 6.012477599140863e-06, "loss": 0.0017, "step": 90130 }, { "epoch": 1.4749243229976274, "grad_norm": 0.10609984397888184, "learning_rate": 6.011545361878716e-06, "loss": 0.0026, "step": 90140 }, { "epoch": 1.4750879489487032, "grad_norm": 0.1411619633436203, "learning_rate": 6.010613087950479e-06, "loss": 0.0025, "step": 90150 }, { "epoch": 1.475251574899779, "grad_norm": 0.2597443163394928, "learning_rate": 6.009680777389948e-06, "loss": 0.0018, "step": 90160 }, { "epoch": 1.475415200850855, "grad_norm": 0.0626143217086792, "learning_rate": 6.008748430230916e-06, "loss": 0.0014, "step": 90170 }, { "epoch": 1.4755788268019308, "grad_norm": 0.09211093187332153, "learning_rate": 6.007816046507177e-06, "loss": 0.0017, "step": 90180 }, { "epoch": 1.4757424527530065, "grad_norm": 0.2062859684228897, "learning_rate": 6.006883626252529e-06, "loss": 0.0014, "step": 90190 }, { "epoch": 1.4759060787040825, "grad_norm": 0.023430490866303444, "learning_rate": 6.00595116950077e-06, "loss": 0.0016, "step": 90200 }, { "epoch": 1.4760697046551583, "grad_norm": 0.02907814458012581, "learning_rate": 6.005018676285698e-06, "loss": 0.0019, "step": 90210 }, { "epoch": 1.476233330606234, "grad_norm": 0.07631395757198334, "learning_rate": 6.004086146641115e-06, "loss": 0.0018, "step": 90220 }, { "epoch": 1.47639695655731, "grad_norm": 0.13297241926193237, "learning_rate": 6.003153580600821e-06, "loss": 0.0023, "step": 90230 }, { "epoch": 1.4765605825083858, "grad_norm": 0.25503307580947876, "learning_rate": 6.002220978198623e-06, "loss": 0.0029, "step": 90240 }, { "epoch": 1.4767242084594616, "grad_norm": 0.12940753996372223, "learning_rate": 6.001288339468322e-06, "loss": 0.0024, "step": 90250 }, { "epoch": 1.4768878344105376, "grad_norm": 0.09497050940990448, "learning_rate": 6.0003556644437244e-06, "loss": 0.0023, "step": 90260 }, { "epoch": 1.4770514603616134, "grad_norm": 0.04867600277066231, "learning_rate": 5.999422953158641e-06, "loss": 0.0027, "step": 90270 }, { "epoch": 1.4772150863126892, "grad_norm": 0.06594760715961456, "learning_rate": 5.998490205646874e-06, "loss": 0.0029, "step": 90280 }, { "epoch": 1.4773787122637652, "grad_norm": 0.15169459581375122, "learning_rate": 5.997557421942239e-06, "loss": 0.0018, "step": 90290 }, { "epoch": 1.477542338214841, "grad_norm": 0.20777268707752228, "learning_rate": 5.996624602078545e-06, "loss": 0.0013, "step": 90300 }, { "epoch": 1.4777059641659167, "grad_norm": 0.09152727574110031, "learning_rate": 5.995691746089604e-06, "loss": 0.0016, "step": 90310 }, { "epoch": 1.4778695901169925, "grad_norm": 0.07250243425369263, "learning_rate": 5.994758854009232e-06, "loss": 0.0013, "step": 90320 }, { "epoch": 1.4780332160680685, "grad_norm": 0.32375043630599976, "learning_rate": 5.99382592587124e-06, "loss": 0.0017, "step": 90330 }, { "epoch": 1.4781968420191443, "grad_norm": 0.1271114945411682, "learning_rate": 5.992892961709448e-06, "loss": 0.0021, "step": 90340 }, { "epoch": 1.47836046797022, "grad_norm": 0.08686201274394989, "learning_rate": 5.9919599615576716e-06, "loss": 0.002, "step": 90350 }, { "epoch": 1.4785240939212958, "grad_norm": 0.13541172444820404, "learning_rate": 5.991026925449732e-06, "loss": 0.001, "step": 90360 }, { "epoch": 1.4786877198723718, "grad_norm": 0.05715705081820488, "learning_rate": 5.990093853419447e-06, "loss": 0.0043, "step": 90370 }, { "epoch": 1.4788513458234476, "grad_norm": 0.11732552200555801, "learning_rate": 5.989160745500641e-06, "loss": 0.0017, "step": 90380 }, { "epoch": 1.4790149717745233, "grad_norm": 0.039383791387081146, "learning_rate": 5.988227601727133e-06, "loss": 0.0012, "step": 90390 }, { "epoch": 1.4791785977255993, "grad_norm": 0.10291915386915207, "learning_rate": 5.987294422132751e-06, "loss": 0.0022, "step": 90400 }, { "epoch": 1.4793422236766751, "grad_norm": 0.07636168599128723, "learning_rate": 5.986361206751319e-06, "loss": 0.0015, "step": 90410 }, { "epoch": 1.479505849627751, "grad_norm": 0.233656108379364, "learning_rate": 5.985427955616664e-06, "loss": 0.0031, "step": 90420 }, { "epoch": 1.4796694755788269, "grad_norm": 0.09308003634214401, "learning_rate": 5.984494668762614e-06, "loss": 0.0025, "step": 90430 }, { "epoch": 1.4798331015299027, "grad_norm": 0.15329548716545105, "learning_rate": 5.983561346222998e-06, "loss": 0.0016, "step": 90440 }, { "epoch": 1.4799967274809784, "grad_norm": 0.06337698549032211, "learning_rate": 5.982627988031647e-06, "loss": 0.0029, "step": 90450 }, { "epoch": 1.4801603534320544, "grad_norm": 0.15393149852752686, "learning_rate": 5.981694594222395e-06, "loss": 0.002, "step": 90460 }, { "epoch": 1.4803239793831302, "grad_norm": 0.02823197841644287, "learning_rate": 5.980761164829072e-06, "loss": 0.0012, "step": 90470 }, { "epoch": 1.480487605334206, "grad_norm": 0.1599329710006714, "learning_rate": 5.979827699885514e-06, "loss": 0.0022, "step": 90480 }, { "epoch": 1.480651231285282, "grad_norm": 0.0665646642446518, "learning_rate": 5.978894199425556e-06, "loss": 0.0036, "step": 90490 }, { "epoch": 1.4808148572363578, "grad_norm": 0.1860765665769577, "learning_rate": 5.977960663483037e-06, "loss": 0.0025, "step": 90500 }, { "epoch": 1.4809784831874335, "grad_norm": 0.0776752158999443, "learning_rate": 5.977027092091794e-06, "loss": 0.0012, "step": 90510 }, { "epoch": 1.4811421091385093, "grad_norm": 0.012308900244534016, "learning_rate": 5.976093485285668e-06, "loss": 0.0012, "step": 90520 }, { "epoch": 1.481305735089585, "grad_norm": 0.030911168083548546, "learning_rate": 5.975159843098499e-06, "loss": 0.0018, "step": 90530 }, { "epoch": 1.481469361040661, "grad_norm": 0.05663762986660004, "learning_rate": 5.9742261655641284e-06, "loss": 0.0014, "step": 90540 }, { "epoch": 1.4816329869917368, "grad_norm": 0.22436107695102692, "learning_rate": 5.973292452716401e-06, "loss": 0.0027, "step": 90550 }, { "epoch": 1.4817966129428126, "grad_norm": 0.14220155775547028, "learning_rate": 5.972358704589162e-06, "loss": 0.0012, "step": 90560 }, { "epoch": 1.4819602388938886, "grad_norm": 0.21445974707603455, "learning_rate": 5.971424921216257e-06, "loss": 0.0022, "step": 90570 }, { "epoch": 1.4821238648449644, "grad_norm": 0.08079153299331665, "learning_rate": 5.9704911026315325e-06, "loss": 0.0031, "step": 90580 }, { "epoch": 1.4822874907960402, "grad_norm": 0.1684669852256775, "learning_rate": 5.969557248868839e-06, "loss": 0.0019, "step": 90590 }, { "epoch": 1.4824511167471162, "grad_norm": 0.18776816129684448, "learning_rate": 5.968623359962025e-06, "loss": 0.0018, "step": 90600 }, { "epoch": 1.482614742698192, "grad_norm": 0.09689784795045853, "learning_rate": 5.967689435944942e-06, "loss": 0.0011, "step": 90610 }, { "epoch": 1.4827783686492677, "grad_norm": 0.07097337394952774, "learning_rate": 5.966755476851444e-06, "loss": 0.0019, "step": 90620 }, { "epoch": 1.4829419946003437, "grad_norm": 0.055122386664152145, "learning_rate": 5.965821482715382e-06, "loss": 0.002, "step": 90630 }, { "epoch": 1.4831056205514195, "grad_norm": 0.045418158173561096, "learning_rate": 5.964887453570613e-06, "loss": 0.0027, "step": 90640 }, { "epoch": 1.4832692465024953, "grad_norm": 0.03818480297923088, "learning_rate": 5.963953389450993e-06, "loss": 0.0014, "step": 90650 }, { "epoch": 1.4834328724535712, "grad_norm": 0.3111845850944519, "learning_rate": 5.9630192903903795e-06, "loss": 0.0021, "step": 90660 }, { "epoch": 1.483596498404647, "grad_norm": 0.1479066014289856, "learning_rate": 5.962085156422631e-06, "loss": 0.001, "step": 90670 }, { "epoch": 1.4837601243557228, "grad_norm": 0.05225803703069687, "learning_rate": 5.961150987581608e-06, "loss": 0.0015, "step": 90680 }, { "epoch": 1.4839237503067988, "grad_norm": 0.06672590225934982, "learning_rate": 5.9602167839011725e-06, "loss": 0.0014, "step": 90690 }, { "epoch": 1.4840873762578746, "grad_norm": 0.06295999139547348, "learning_rate": 5.959282545415186e-06, "loss": 0.0011, "step": 90700 }, { "epoch": 1.4842510022089503, "grad_norm": 0.04728035628795624, "learning_rate": 5.9583482721575125e-06, "loss": 0.0034, "step": 90710 }, { "epoch": 1.4844146281600261, "grad_norm": 0.1677972823381424, "learning_rate": 5.957413964162016e-06, "loss": 0.0012, "step": 90720 }, { "epoch": 1.484578254111102, "grad_norm": 0.19957977533340454, "learning_rate": 5.9564796214625665e-06, "loss": 0.0018, "step": 90730 }, { "epoch": 1.4847418800621779, "grad_norm": 0.08272489905357361, "learning_rate": 5.95554524409303e-06, "loss": 0.0017, "step": 90740 }, { "epoch": 1.4849055060132537, "grad_norm": 0.03452751785516739, "learning_rate": 5.954610832087273e-06, "loss": 0.0011, "step": 90750 }, { "epoch": 1.4850691319643294, "grad_norm": 0.08366342633962631, "learning_rate": 5.953676385479168e-06, "loss": 0.0019, "step": 90760 }, { "epoch": 1.4852327579154054, "grad_norm": 0.037623144686222076, "learning_rate": 5.952741904302588e-06, "loss": 0.0022, "step": 90770 }, { "epoch": 1.4853963838664812, "grad_norm": 0.1549617499113083, "learning_rate": 5.951807388591402e-06, "loss": 0.0015, "step": 90780 }, { "epoch": 1.485560009817557, "grad_norm": 0.014540654607117176, "learning_rate": 5.950872838379486e-06, "loss": 0.0033, "step": 90790 }, { "epoch": 1.485723635768633, "grad_norm": 0.1081242635846138, "learning_rate": 5.949938253700716e-06, "loss": 0.0036, "step": 90800 }, { "epoch": 1.4858872617197088, "grad_norm": 0.11421678215265274, "learning_rate": 5.949003634588965e-06, "loss": 0.0028, "step": 90810 }, { "epoch": 1.4860508876707845, "grad_norm": 0.09907367825508118, "learning_rate": 5.948068981078117e-06, "loss": 0.0024, "step": 90820 }, { "epoch": 1.4862145136218605, "grad_norm": 0.06756455451250076, "learning_rate": 5.9471342932020444e-06, "loss": 0.0011, "step": 90830 }, { "epoch": 1.4863781395729363, "grad_norm": 0.12372926622629166, "learning_rate": 5.946199570994631e-06, "loss": 0.0026, "step": 90840 }, { "epoch": 1.486541765524012, "grad_norm": 0.05814385786652565, "learning_rate": 5.945264814489756e-06, "loss": 0.0012, "step": 90850 }, { "epoch": 1.486705391475088, "grad_norm": 0.10990256071090698, "learning_rate": 5.944330023721303e-06, "loss": 0.0017, "step": 90860 }, { "epoch": 1.4868690174261638, "grad_norm": 0.11669552326202393, "learning_rate": 5.943395198723159e-06, "loss": 0.0015, "step": 90870 }, { "epoch": 1.4870326433772396, "grad_norm": 0.027970630675554276, "learning_rate": 5.942460339529205e-06, "loss": 0.0013, "step": 90880 }, { "epoch": 1.4871962693283154, "grad_norm": 0.1698177307844162, "learning_rate": 5.941525446173328e-06, "loss": 0.0016, "step": 90890 }, { "epoch": 1.4873598952793914, "grad_norm": 0.10908200591802597, "learning_rate": 5.940590518689417e-06, "loss": 0.0011, "step": 90900 }, { "epoch": 1.4875235212304672, "grad_norm": 0.07497163116931915, "learning_rate": 5.939655557111361e-06, "loss": 0.0012, "step": 90910 }, { "epoch": 1.487687147181543, "grad_norm": 0.14659839868545532, "learning_rate": 5.938720561473049e-06, "loss": 0.0026, "step": 90920 }, { "epoch": 1.4878507731326187, "grad_norm": 0.1053742915391922, "learning_rate": 5.937785531808373e-06, "loss": 0.0023, "step": 90930 }, { "epoch": 1.4880143990836947, "grad_norm": 0.19887208938598633, "learning_rate": 5.936850468151225e-06, "loss": 0.0013, "step": 90940 }, { "epoch": 1.4881780250347705, "grad_norm": 0.23559197783470154, "learning_rate": 5.9359153705355e-06, "loss": 0.0015, "step": 90950 }, { "epoch": 1.4883416509858463, "grad_norm": 0.48884499073028564, "learning_rate": 5.934980238995091e-06, "loss": 0.0018, "step": 90960 }, { "epoch": 1.4885052769369223, "grad_norm": 0.02871726267039776, "learning_rate": 5.9340450735638976e-06, "loss": 0.002, "step": 90970 }, { "epoch": 1.488668902887998, "grad_norm": 0.2396923303604126, "learning_rate": 5.9331098742758134e-06, "loss": 0.0011, "step": 90980 }, { "epoch": 1.4888325288390738, "grad_norm": 0.06204919144511223, "learning_rate": 5.932174641164738e-06, "loss": 0.0015, "step": 90990 }, { "epoch": 1.4889961547901498, "grad_norm": 0.13123126327991486, "learning_rate": 5.931239374264573e-06, "loss": 0.001, "step": 91000 }, { "epoch": 1.4891597807412256, "grad_norm": 0.14632417261600494, "learning_rate": 5.93030407360922e-06, "loss": 0.0011, "step": 91010 }, { "epoch": 1.4893234066923013, "grad_norm": 0.07743164151906967, "learning_rate": 5.9293687392325806e-06, "loss": 0.0021, "step": 91020 }, { "epoch": 1.4894870326433773, "grad_norm": 0.05706700682640076, "learning_rate": 5.928433371168557e-06, "loss": 0.0016, "step": 91030 }, { "epoch": 1.4896506585944531, "grad_norm": 0.25472885370254517, "learning_rate": 5.9274979694510545e-06, "loss": 0.0025, "step": 91040 }, { "epoch": 1.4898142845455289, "grad_norm": 0.17433689534664154, "learning_rate": 5.926562534113981e-06, "loss": 0.0012, "step": 91050 }, { "epoch": 1.4899779104966049, "grad_norm": 0.059186335653066635, "learning_rate": 5.925627065191242e-06, "loss": 0.0016, "step": 91060 }, { "epoch": 1.4901415364476807, "grad_norm": 0.10192203521728516, "learning_rate": 5.9246915627167465e-06, "loss": 0.0019, "step": 91070 }, { "epoch": 1.4903051623987564, "grad_norm": 0.06516320258378983, "learning_rate": 5.923756026724406e-06, "loss": 0.0011, "step": 91080 }, { "epoch": 1.4904687883498322, "grad_norm": 0.19599901139736176, "learning_rate": 5.9228204572481276e-06, "loss": 0.0023, "step": 91090 }, { "epoch": 1.4906324143009082, "grad_norm": 0.1105550080537796, "learning_rate": 5.921884854321825e-06, "loss": 0.0008, "step": 91100 }, { "epoch": 1.490796040251984, "grad_norm": 0.07540274411439896, "learning_rate": 5.920949217979414e-06, "loss": 0.0014, "step": 91110 }, { "epoch": 1.4909596662030598, "grad_norm": 0.06494259834289551, "learning_rate": 5.9200135482548075e-06, "loss": 0.001, "step": 91120 }, { "epoch": 1.4911232921541355, "grad_norm": 0.2632906138896942, "learning_rate": 5.91907784518192e-06, "loss": 0.0023, "step": 91130 }, { "epoch": 1.4912869181052115, "grad_norm": 0.24756161868572235, "learning_rate": 5.9181421087946685e-06, "loss": 0.003, "step": 91140 }, { "epoch": 1.4914505440562873, "grad_norm": 0.012240666896104813, "learning_rate": 5.917206339126973e-06, "loss": 0.0038, "step": 91150 }, { "epoch": 1.491614170007363, "grad_norm": 0.22262974083423615, "learning_rate": 5.916270536212753e-06, "loss": 0.0027, "step": 91160 }, { "epoch": 1.491777795958439, "grad_norm": 0.49610939621925354, "learning_rate": 5.915334700085928e-06, "loss": 0.0039, "step": 91170 }, { "epoch": 1.4919414219095148, "grad_norm": 0.0776975154876709, "learning_rate": 5.914398830780421e-06, "loss": 0.0019, "step": 91180 }, { "epoch": 1.4921050478605906, "grad_norm": 0.07331117242574692, "learning_rate": 5.913462928330153e-06, "loss": 0.0017, "step": 91190 }, { "epoch": 1.4922686738116666, "grad_norm": 0.1427258998155594, "learning_rate": 5.9125269927690485e-06, "loss": 0.0016, "step": 91200 }, { "epoch": 1.4924322997627424, "grad_norm": 0.056492023169994354, "learning_rate": 5.911591024131035e-06, "loss": 0.0022, "step": 91210 }, { "epoch": 1.4925959257138182, "grad_norm": 0.053930215537548065, "learning_rate": 5.910655022450037e-06, "loss": 0.0013, "step": 91220 }, { "epoch": 1.4927595516648942, "grad_norm": 0.061126112937927246, "learning_rate": 5.909718987759983e-06, "loss": 0.0021, "step": 91230 }, { "epoch": 1.49292317761597, "grad_norm": 0.05530744045972824, "learning_rate": 5.908782920094803e-06, "loss": 0.0018, "step": 91240 }, { "epoch": 1.4930868035670457, "grad_norm": 0.08054271340370178, "learning_rate": 5.907846819488426e-06, "loss": 0.0013, "step": 91250 }, { "epoch": 1.4932504295181217, "grad_norm": 0.16302962601184845, "learning_rate": 5.906910685974784e-06, "loss": 0.0017, "step": 91260 }, { "epoch": 1.4934140554691975, "grad_norm": 0.344541072845459, "learning_rate": 5.905974519587811e-06, "loss": 0.0026, "step": 91270 }, { "epoch": 1.4935776814202733, "grad_norm": 0.027134334668517113, "learning_rate": 5.905038320361436e-06, "loss": 0.0021, "step": 91280 }, { "epoch": 1.493741307371349, "grad_norm": 0.04441569745540619, "learning_rate": 5.904102088329597e-06, "loss": 0.002, "step": 91290 }, { "epoch": 1.4939049333224248, "grad_norm": 0.21158558130264282, "learning_rate": 5.903165823526233e-06, "loss": 0.0017, "step": 91300 }, { "epoch": 1.4940685592735008, "grad_norm": 0.16600491106510162, "learning_rate": 5.902229525985276e-06, "loss": 0.0018, "step": 91310 }, { "epoch": 1.4942321852245766, "grad_norm": 0.09154591709375381, "learning_rate": 5.901293195740669e-06, "loss": 0.0006, "step": 91320 }, { "epoch": 1.4943958111756523, "grad_norm": 0.12867434322834015, "learning_rate": 5.90035683282635e-06, "loss": 0.0019, "step": 91330 }, { "epoch": 1.4945594371267283, "grad_norm": 0.1908167153596878, "learning_rate": 5.899420437276259e-06, "loss": 0.0033, "step": 91340 }, { "epoch": 1.4947230630778041, "grad_norm": 0.09817846119403839, "learning_rate": 5.898484009124338e-06, "loss": 0.0011, "step": 91350 }, { "epoch": 1.49488668902888, "grad_norm": 0.12647955119609833, "learning_rate": 5.897547548404531e-06, "loss": 0.0009, "step": 91360 }, { "epoch": 1.4950503149799559, "grad_norm": 0.46595045924186707, "learning_rate": 5.896611055150784e-06, "loss": 0.0016, "step": 91370 }, { "epoch": 1.4952139409310317, "grad_norm": 0.1935940831899643, "learning_rate": 5.895674529397041e-06, "loss": 0.0018, "step": 91380 }, { "epoch": 1.4953775668821074, "grad_norm": 0.0651322677731514, "learning_rate": 5.894737971177249e-06, "loss": 0.0016, "step": 91390 }, { "epoch": 1.4955411928331834, "grad_norm": 0.030681701377034187, "learning_rate": 5.893801380525355e-06, "loss": 0.0017, "step": 91400 }, { "epoch": 1.4957048187842592, "grad_norm": 0.07809016108512878, "learning_rate": 5.89286475747531e-06, "loss": 0.0019, "step": 91410 }, { "epoch": 1.495868444735335, "grad_norm": 0.10390011966228485, "learning_rate": 5.891928102061063e-06, "loss": 0.0013, "step": 91420 }, { "epoch": 1.496032070686411, "grad_norm": 0.08651623129844666, "learning_rate": 5.8909914143165654e-06, "loss": 0.0016, "step": 91430 }, { "epoch": 1.4961956966374867, "grad_norm": 0.3666991591453552, "learning_rate": 5.890054694275772e-06, "loss": 0.003, "step": 91440 }, { "epoch": 1.4963593225885625, "grad_norm": 0.11857790499925613, "learning_rate": 5.889117941972634e-06, "loss": 0.0013, "step": 91450 }, { "epoch": 1.4965229485396385, "grad_norm": 0.04910532012581825, "learning_rate": 5.8881811574411085e-06, "loss": 0.002, "step": 91460 }, { "epoch": 1.4966865744907143, "grad_norm": 0.14415289461612701, "learning_rate": 5.887244340715151e-06, "loss": 0.0016, "step": 91470 }, { "epoch": 1.49685020044179, "grad_norm": 0.04828951507806778, "learning_rate": 5.886307491828718e-06, "loss": 0.0011, "step": 91480 }, { "epoch": 1.4970138263928658, "grad_norm": 0.14078818261623383, "learning_rate": 5.885370610815767e-06, "loss": 0.0022, "step": 91490 }, { "epoch": 1.4971774523439416, "grad_norm": 0.2517523169517517, "learning_rate": 5.884433697710261e-06, "loss": 0.0032, "step": 91500 }, { "epoch": 1.4973410782950176, "grad_norm": 0.07893718779087067, "learning_rate": 5.883496752546158e-06, "loss": 0.0019, "step": 91510 }, { "epoch": 1.4975047042460934, "grad_norm": 0.13495363295078278, "learning_rate": 5.882559775357424e-06, "loss": 0.0028, "step": 91520 }, { "epoch": 1.4976683301971692, "grad_norm": 0.12683121860027313, "learning_rate": 5.881622766178017e-06, "loss": 0.002, "step": 91530 }, { "epoch": 1.4978319561482452, "grad_norm": 0.011476846411824226, "learning_rate": 5.880685725041904e-06, "loss": 0.002, "step": 91540 }, { "epoch": 1.497995582099321, "grad_norm": 0.11071216315031052, "learning_rate": 5.8797486519830505e-06, "loss": 0.0024, "step": 91550 }, { "epoch": 1.4981592080503967, "grad_norm": 0.12032093852758408, "learning_rate": 5.878811547035421e-06, "loss": 0.0016, "step": 91560 }, { "epoch": 1.4983228340014727, "grad_norm": 0.16129246354103088, "learning_rate": 5.877874410232987e-06, "loss": 0.002, "step": 91570 }, { "epoch": 1.4984864599525485, "grad_norm": 0.06416504085063934, "learning_rate": 5.876937241609715e-06, "loss": 0.0018, "step": 91580 }, { "epoch": 1.4986500859036243, "grad_norm": 0.30934450030326843, "learning_rate": 5.876000041199575e-06, "loss": 0.0028, "step": 91590 }, { "epoch": 1.4988137118547002, "grad_norm": 0.06825904548168182, "learning_rate": 5.87506280903654e-06, "loss": 0.0012, "step": 91600 }, { "epoch": 1.498977337805776, "grad_norm": 0.08883596956729889, "learning_rate": 5.874125545154581e-06, "loss": 0.0022, "step": 91610 }, { "epoch": 1.4991409637568518, "grad_norm": 0.11655041575431824, "learning_rate": 5.873188249587673e-06, "loss": 0.0014, "step": 91620 }, { "epoch": 1.4993045897079278, "grad_norm": 0.10432343184947968, "learning_rate": 5.872250922369788e-06, "loss": 0.0012, "step": 91630 }, { "epoch": 1.4994682156590036, "grad_norm": 0.12425997853279114, "learning_rate": 5.871313563534903e-06, "loss": 0.0021, "step": 91640 }, { "epoch": 1.4996318416100793, "grad_norm": 0.23453903198242188, "learning_rate": 5.870376173116996e-06, "loss": 0.0013, "step": 91650 }, { "epoch": 1.4997954675611553, "grad_norm": 0.08189664036035538, "learning_rate": 5.869438751150045e-06, "loss": 0.0018, "step": 91660 }, { "epoch": 1.4999590935122311, "grad_norm": 0.153335303068161, "learning_rate": 5.86850129766803e-06, "loss": 0.0016, "step": 91670 }, { "epoch": 1.5001227194633069, "grad_norm": 0.14467045664787292, "learning_rate": 5.86756381270493e-06, "loss": 0.0021, "step": 91680 }, { "epoch": 1.5002863454143829, "grad_norm": 0.11179284006357193, "learning_rate": 5.8666262962947255e-06, "loss": 0.002, "step": 91690 }, { "epoch": 1.5004499713654584, "grad_norm": 0.027374764904379845, "learning_rate": 5.865688748471401e-06, "loss": 0.0014, "step": 91700 }, { "epoch": 1.5006135973165344, "grad_norm": 0.02600018121302128, "learning_rate": 5.864751169268939e-06, "loss": 0.0018, "step": 91710 }, { "epoch": 1.5007772232676102, "grad_norm": 0.019345024600625038, "learning_rate": 5.863813558721327e-06, "loss": 0.0013, "step": 91720 }, { "epoch": 1.500940849218686, "grad_norm": 0.1246350035071373, "learning_rate": 5.86287591686255e-06, "loss": 0.0016, "step": 91730 }, { "epoch": 1.501104475169762, "grad_norm": 0.11646867543458939, "learning_rate": 5.861938243726595e-06, "loss": 0.0012, "step": 91740 }, { "epoch": 1.5012681011208378, "grad_norm": 0.09142351895570755, "learning_rate": 5.861000539347449e-06, "loss": 0.0023, "step": 91750 }, { "epoch": 1.5014317270719135, "grad_norm": 0.15168513357639313, "learning_rate": 5.860062803759103e-06, "loss": 0.0023, "step": 91760 }, { "epoch": 1.5015953530229895, "grad_norm": 0.04419459402561188, "learning_rate": 5.8591250369955484e-06, "loss": 0.0042, "step": 91770 }, { "epoch": 1.5017589789740653, "grad_norm": 0.11802008748054504, "learning_rate": 5.858187239090776e-06, "loss": 0.0015, "step": 91780 }, { "epoch": 1.501922604925141, "grad_norm": 0.13296104967594147, "learning_rate": 5.857249410078778e-06, "loss": 0.0029, "step": 91790 }, { "epoch": 1.502086230876217, "grad_norm": 0.07423113286495209, "learning_rate": 5.85631154999355e-06, "loss": 0.0009, "step": 91800 }, { "epoch": 1.5022498568272928, "grad_norm": 0.21428649127483368, "learning_rate": 5.8553736588690844e-06, "loss": 0.0013, "step": 91810 }, { "epoch": 1.5024134827783686, "grad_norm": 0.12769107520580292, "learning_rate": 5.854435736739381e-06, "loss": 0.0034, "step": 91820 }, { "epoch": 1.5025771087294446, "grad_norm": 0.08731834590435028, "learning_rate": 5.853497783638438e-06, "loss": 0.0017, "step": 91830 }, { "epoch": 1.5027407346805204, "grad_norm": 0.15267683565616608, "learning_rate": 5.852559799600248e-06, "loss": 0.0024, "step": 91840 }, { "epoch": 1.5029043606315962, "grad_norm": 0.06569907814264297, "learning_rate": 5.851621784658815e-06, "loss": 0.0019, "step": 91850 }, { "epoch": 1.5030679865826722, "grad_norm": 0.01709694229066372, "learning_rate": 5.85068373884814e-06, "loss": 0.0015, "step": 91860 }, { "epoch": 1.5032316125337477, "grad_norm": 0.048211030662059784, "learning_rate": 5.849745662202224e-06, "loss": 0.0014, "step": 91870 }, { "epoch": 1.5033952384848237, "grad_norm": 0.10423155128955841, "learning_rate": 5.848807554755071e-06, "loss": 0.0018, "step": 91880 }, { "epoch": 1.5035588644358997, "grad_norm": 0.09340240806341171, "learning_rate": 5.8478694165406835e-06, "loss": 0.0011, "step": 91890 }, { "epoch": 1.5037224903869753, "grad_norm": 0.08209457248449326, "learning_rate": 5.8469312475930665e-06, "loss": 0.0019, "step": 91900 }, { "epoch": 1.5038861163380512, "grad_norm": 0.07592350989580154, "learning_rate": 5.845993047946228e-06, "loss": 0.0013, "step": 91910 }, { "epoch": 1.504049742289127, "grad_norm": 0.14570800960063934, "learning_rate": 5.8450548176341745e-06, "loss": 0.0025, "step": 91920 }, { "epoch": 1.5042133682402028, "grad_norm": 0.05754445493221283, "learning_rate": 5.8441165566909155e-06, "loss": 0.0017, "step": 91930 }, { "epoch": 1.5043769941912788, "grad_norm": 0.006399228237569332, "learning_rate": 5.84317826515046e-06, "loss": 0.0017, "step": 91940 }, { "epoch": 1.5045406201423546, "grad_norm": 0.11717326194047928, "learning_rate": 5.842239943046818e-06, "loss": 0.0018, "step": 91950 }, { "epoch": 1.5047042460934303, "grad_norm": 0.066010482609272, "learning_rate": 5.841301590414004e-06, "loss": 0.0011, "step": 91960 }, { "epoch": 1.5048678720445063, "grad_norm": 0.04655023664236069, "learning_rate": 5.840363207286027e-06, "loss": 0.0013, "step": 91970 }, { "epoch": 1.5050314979955821, "grad_norm": 0.25716346502304077, "learning_rate": 5.8394247936969065e-06, "loss": 0.0015, "step": 91980 }, { "epoch": 1.5051951239466579, "grad_norm": 0.1548953652381897, "learning_rate": 5.838486349680652e-06, "loss": 0.0019, "step": 91990 }, { "epoch": 1.5053587498977339, "grad_norm": 0.11267152428627014, "learning_rate": 5.837547875271283e-06, "loss": 0.0028, "step": 92000 }, { "epoch": 1.5055223758488097, "grad_norm": 0.13756878674030304, "learning_rate": 5.8366093705028174e-06, "loss": 0.0016, "step": 92010 }, { "epoch": 1.5056860017998854, "grad_norm": 0.025240028277039528, "learning_rate": 5.835670835409273e-06, "loss": 0.0012, "step": 92020 }, { "epoch": 1.5058496277509614, "grad_norm": 0.15717455744743347, "learning_rate": 5.8347322700246704e-06, "loss": 0.0016, "step": 92030 }, { "epoch": 1.506013253702037, "grad_norm": 0.04991165176033974, "learning_rate": 5.833793674383027e-06, "loss": 0.0033, "step": 92040 }, { "epoch": 1.506176879653113, "grad_norm": 0.02723216451704502, "learning_rate": 5.832855048518367e-06, "loss": 0.0011, "step": 92050 }, { "epoch": 1.506340505604189, "grad_norm": 0.12921293079853058, "learning_rate": 5.831916392464714e-06, "loss": 0.0017, "step": 92060 }, { "epoch": 1.5065041315552645, "grad_norm": 0.01611558347940445, "learning_rate": 5.83097770625609e-06, "loss": 0.0012, "step": 92070 }, { "epoch": 1.5066677575063405, "grad_norm": 0.04227316752076149, "learning_rate": 5.830038989926523e-06, "loss": 0.0017, "step": 92080 }, { "epoch": 1.5068313834574163, "grad_norm": 0.35677310824394226, "learning_rate": 5.8291002435100374e-06, "loss": 0.0035, "step": 92090 }, { "epoch": 1.506995009408492, "grad_norm": 0.05789892002940178, "learning_rate": 5.828161467040661e-06, "loss": 0.0028, "step": 92100 }, { "epoch": 1.507158635359568, "grad_norm": 0.0786251351237297, "learning_rate": 5.827222660552421e-06, "loss": 0.0017, "step": 92110 }, { "epoch": 1.5073222613106438, "grad_norm": 0.06178510561585426, "learning_rate": 5.826283824079348e-06, "loss": 0.0012, "step": 92120 }, { "epoch": 1.5074858872617196, "grad_norm": 0.11979813128709793, "learning_rate": 5.825344957655471e-06, "loss": 0.0019, "step": 92130 }, { "epoch": 1.5076495132127956, "grad_norm": 0.046463653445243835, "learning_rate": 5.824406061314825e-06, "loss": 0.0016, "step": 92140 }, { "epoch": 1.5078131391638714, "grad_norm": 0.132389634847641, "learning_rate": 5.823467135091438e-06, "loss": 0.0021, "step": 92150 }, { "epoch": 1.5079767651149472, "grad_norm": 0.08071785420179367, "learning_rate": 5.822528179019349e-06, "loss": 0.0014, "step": 92160 }, { "epoch": 1.5081403910660232, "grad_norm": 0.014393768273293972, "learning_rate": 5.82158919313259e-06, "loss": 0.0015, "step": 92170 }, { "epoch": 1.508304017017099, "grad_norm": 0.06573475897312164, "learning_rate": 5.820650177465198e-06, "loss": 0.002, "step": 92180 }, { "epoch": 1.5084676429681747, "grad_norm": 0.27615317702293396, "learning_rate": 5.819711132051208e-06, "loss": 0.0013, "step": 92190 }, { "epoch": 1.5086312689192507, "grad_norm": 0.012685788795351982, "learning_rate": 5.81877205692466e-06, "loss": 0.0015, "step": 92200 }, { "epoch": 1.5087948948703265, "grad_norm": 0.08624280989170074, "learning_rate": 5.817832952119592e-06, "loss": 0.0017, "step": 92210 }, { "epoch": 1.5089585208214022, "grad_norm": 0.2544707953929901, "learning_rate": 5.816893817670046e-06, "loss": 0.0022, "step": 92220 }, { "epoch": 1.5091221467724782, "grad_norm": 0.15449848771095276, "learning_rate": 5.815954653610063e-06, "loss": 0.002, "step": 92230 }, { "epoch": 1.5092857727235538, "grad_norm": 0.07188665121793747, "learning_rate": 5.815015459973685e-06, "loss": 0.0011, "step": 92240 }, { "epoch": 1.5094493986746298, "grad_norm": 0.08724378794431686, "learning_rate": 5.8140762367949545e-06, "loss": 0.0037, "step": 92250 }, { "epoch": 1.5096130246257058, "grad_norm": 0.1033218577504158, "learning_rate": 5.813136984107917e-06, "loss": 0.0022, "step": 92260 }, { "epoch": 1.5097766505767813, "grad_norm": 0.20820222795009613, "learning_rate": 5.812197701946618e-06, "loss": 0.0039, "step": 92270 }, { "epoch": 1.5099402765278573, "grad_norm": 0.12309335917234421, "learning_rate": 5.811258390345105e-06, "loss": 0.0023, "step": 92280 }, { "epoch": 1.5101039024789331, "grad_norm": 0.11384641379117966, "learning_rate": 5.810319049337425e-06, "loss": 0.0017, "step": 92290 }, { "epoch": 1.5102675284300089, "grad_norm": 0.0959995836019516, "learning_rate": 5.8093796789576275e-06, "loss": 0.0011, "step": 92300 }, { "epoch": 1.5104311543810849, "grad_norm": 0.2018422782421112, "learning_rate": 5.8084402792397606e-06, "loss": 0.0026, "step": 92310 }, { "epoch": 1.5105947803321607, "grad_norm": 0.05143508315086365, "learning_rate": 5.807500850217877e-06, "loss": 0.003, "step": 92320 }, { "epoch": 1.5107584062832364, "grad_norm": 0.02562854066491127, "learning_rate": 5.80656139192603e-06, "loss": 0.0012, "step": 92330 }, { "epoch": 1.5109220322343124, "grad_norm": 0.3012652099132538, "learning_rate": 5.80562190439827e-06, "loss": 0.0019, "step": 92340 }, { "epoch": 1.5110856581853882, "grad_norm": 0.1167297214269638, "learning_rate": 5.804682387668651e-06, "loss": 0.0022, "step": 92350 }, { "epoch": 1.511249284136464, "grad_norm": 0.07637261599302292, "learning_rate": 5.80374284177123e-06, "loss": 0.0013, "step": 92360 }, { "epoch": 1.51141291008754, "grad_norm": 0.06420022994279861, "learning_rate": 5.802803266740063e-06, "loss": 0.0014, "step": 92370 }, { "epoch": 1.5115765360386157, "grad_norm": 0.10261160135269165, "learning_rate": 5.801863662609208e-06, "loss": 0.0018, "step": 92380 }, { "epoch": 1.5117401619896915, "grad_norm": 0.08156854659318924, "learning_rate": 5.800924029412722e-06, "loss": 0.0014, "step": 92390 }, { "epoch": 1.5119037879407675, "grad_norm": 0.11222469806671143, "learning_rate": 5.799984367184666e-06, "loss": 0.0017, "step": 92400 }, { "epoch": 1.5120674138918433, "grad_norm": 0.11377032846212387, "learning_rate": 5.7990446759590965e-06, "loss": 0.0025, "step": 92410 }, { "epoch": 1.512231039842919, "grad_norm": 0.03881581500172615, "learning_rate": 5.79810495577008e-06, "loss": 0.001, "step": 92420 }, { "epoch": 1.512394665793995, "grad_norm": 0.0993439257144928, "learning_rate": 5.797165206651677e-06, "loss": 0.0014, "step": 92430 }, { "epoch": 1.5125582917450706, "grad_norm": 0.07430081814527512, "learning_rate": 5.7962254286379505e-06, "loss": 0.0034, "step": 92440 }, { "epoch": 1.5127219176961466, "grad_norm": 0.15329165756702423, "learning_rate": 5.795285621762966e-06, "loss": 0.0015, "step": 92450 }, { "epoch": 1.5128855436472226, "grad_norm": 0.10792247205972672, "learning_rate": 5.794345786060789e-06, "loss": 0.0012, "step": 92460 }, { "epoch": 1.5130491695982982, "grad_norm": 0.11069736629724503, "learning_rate": 5.793405921565487e-06, "loss": 0.002, "step": 92470 }, { "epoch": 1.5132127955493742, "grad_norm": 0.1862097531557083, "learning_rate": 5.792466028311128e-06, "loss": 0.0017, "step": 92480 }, { "epoch": 1.51337642150045, "grad_norm": 0.04604573920369148, "learning_rate": 5.791526106331778e-06, "loss": 0.0019, "step": 92490 }, { "epoch": 1.5135400474515257, "grad_norm": 0.07079262286424637, "learning_rate": 5.79058615566151e-06, "loss": 0.0032, "step": 92500 }, { "epoch": 1.5137036734026017, "grad_norm": 0.0942351296544075, "learning_rate": 5.789646176334394e-06, "loss": 0.0017, "step": 92510 }, { "epoch": 1.5138672993536775, "grad_norm": 0.10990624874830246, "learning_rate": 5.788706168384502e-06, "loss": 0.0025, "step": 92520 }, { "epoch": 1.5140309253047533, "grad_norm": 0.09396950900554657, "learning_rate": 5.787766131845908e-06, "loss": 0.0009, "step": 92530 }, { "epoch": 1.5141945512558292, "grad_norm": 0.07850679755210876, "learning_rate": 5.786826066752683e-06, "loss": 0.0077, "step": 92540 }, { "epoch": 1.514358177206905, "grad_norm": 0.13065828382968903, "learning_rate": 5.785885973138906e-06, "loss": 0.0013, "step": 92550 }, { "epoch": 1.5145218031579808, "grad_norm": 0.05356348678469658, "learning_rate": 5.78494585103865e-06, "loss": 0.0134, "step": 92560 }, { "epoch": 1.5146854291090568, "grad_norm": 0.11250830441713333, "learning_rate": 5.784005700485993e-06, "loss": 0.0022, "step": 92570 }, { "epoch": 1.5148490550601326, "grad_norm": 0.11656971275806427, "learning_rate": 5.783065521515016e-06, "loss": 0.0035, "step": 92580 }, { "epoch": 1.5150126810112083, "grad_norm": 0.09869369119405746, "learning_rate": 5.782125314159795e-06, "loss": 0.0016, "step": 92590 }, { "epoch": 1.5151763069622843, "grad_norm": 0.053420618176460266, "learning_rate": 5.7811850784544096e-06, "loss": 0.0011, "step": 92600 }, { "epoch": 1.51533993291336, "grad_norm": 0.06559205055236816, "learning_rate": 5.780244814432945e-06, "loss": 0.0023, "step": 92610 }, { "epoch": 1.5155035588644359, "grad_norm": 0.09503054618835449, "learning_rate": 5.779304522129478e-06, "loss": 0.002, "step": 92620 }, { "epoch": 1.5156671848155119, "grad_norm": 0.0468287393450737, "learning_rate": 5.778364201578098e-06, "loss": 0.0011, "step": 92630 }, { "epoch": 1.5158308107665874, "grad_norm": 0.240324467420578, "learning_rate": 5.777423852812885e-06, "loss": 0.002, "step": 92640 }, { "epoch": 1.5159944367176634, "grad_norm": 0.0628838762640953, "learning_rate": 5.776483475867927e-06, "loss": 0.0018, "step": 92650 }, { "epoch": 1.5161580626687394, "grad_norm": 0.11935097724199295, "learning_rate": 5.775543070777308e-06, "loss": 0.0022, "step": 92660 }, { "epoch": 1.516321688619815, "grad_norm": 0.2892530560493469, "learning_rate": 5.7746026375751175e-06, "loss": 0.0029, "step": 92670 }, { "epoch": 1.516485314570891, "grad_norm": 0.07304031401872635, "learning_rate": 5.773662176295444e-06, "loss": 0.0011, "step": 92680 }, { "epoch": 1.5166489405219667, "grad_norm": 0.12155020236968994, "learning_rate": 5.772721686972374e-06, "loss": 0.0017, "step": 92690 }, { "epoch": 1.5168125664730425, "grad_norm": 0.06272289156913757, "learning_rate": 5.7717811696400004e-06, "loss": 0.0012, "step": 92700 }, { "epoch": 1.5169761924241185, "grad_norm": 0.2657904624938965, "learning_rate": 5.7708406243324145e-06, "loss": 0.0029, "step": 92710 }, { "epoch": 1.5171398183751943, "grad_norm": 0.08259091526269913, "learning_rate": 5.769900051083709e-06, "loss": 0.0014, "step": 92720 }, { "epoch": 1.51730344432627, "grad_norm": 0.3218785226345062, "learning_rate": 5.768959449927978e-06, "loss": 0.0037, "step": 92730 }, { "epoch": 1.517467070277346, "grad_norm": 0.17818595468997955, "learning_rate": 5.768018820899314e-06, "loss": 0.0017, "step": 92740 }, { "epoch": 1.5176306962284218, "grad_norm": 0.07660868018865585, "learning_rate": 5.767078164031813e-06, "loss": 0.0013, "step": 92750 }, { "epoch": 1.5177943221794976, "grad_norm": 0.023576997220516205, "learning_rate": 5.766137479359573e-06, "loss": 0.0027, "step": 92760 }, { "epoch": 1.5179579481305736, "grad_norm": 0.0293793436139822, "learning_rate": 5.765196766916691e-06, "loss": 0.0013, "step": 92770 }, { "epoch": 1.5181215740816494, "grad_norm": 0.08452704548835754, "learning_rate": 5.764256026737265e-06, "loss": 0.0014, "step": 92780 }, { "epoch": 1.5182852000327252, "grad_norm": 0.10538189113140106, "learning_rate": 5.7633152588553945e-06, "loss": 0.0012, "step": 92790 }, { "epoch": 1.5184488259838012, "grad_norm": 0.04788101464509964, "learning_rate": 5.76237446330518e-06, "loss": 0.0019, "step": 92800 }, { "epoch": 1.5186124519348767, "grad_norm": 0.3791896104812622, "learning_rate": 5.761433640120724e-06, "loss": 0.0022, "step": 92810 }, { "epoch": 1.5187760778859527, "grad_norm": 0.07501193881034851, "learning_rate": 5.76049278933613e-06, "loss": 0.0013, "step": 92820 }, { "epoch": 1.5189397038370287, "grad_norm": 0.12513427436351776, "learning_rate": 5.759551910985499e-06, "loss": 0.0017, "step": 92830 }, { "epoch": 1.5191033297881043, "grad_norm": 0.3205004930496216, "learning_rate": 5.758611005102935e-06, "loss": 0.0021, "step": 92840 }, { "epoch": 1.5192669557391802, "grad_norm": 0.1463504284620285, "learning_rate": 5.757670071722547e-06, "loss": 0.0019, "step": 92850 }, { "epoch": 1.519430581690256, "grad_norm": 0.040811534970998764, "learning_rate": 5.75672911087844e-06, "loss": 0.0031, "step": 92860 }, { "epoch": 1.5195942076413318, "grad_norm": 0.10794855654239655, "learning_rate": 5.755788122604722e-06, "loss": 0.0009, "step": 92870 }, { "epoch": 1.5197578335924078, "grad_norm": 0.13340447843074799, "learning_rate": 5.754847106935501e-06, "loss": 0.0012, "step": 92880 }, { "epoch": 1.5199214595434836, "grad_norm": 0.22035622596740723, "learning_rate": 5.753906063904887e-06, "loss": 0.002, "step": 92890 }, { "epoch": 1.5200850854945593, "grad_norm": 0.09209461510181427, "learning_rate": 5.752964993546989e-06, "loss": 0.0017, "step": 92900 }, { "epoch": 1.5202487114456353, "grad_norm": 0.017345735803246498, "learning_rate": 5.7520238958959196e-06, "loss": 0.0012, "step": 92910 }, { "epoch": 1.5204123373967111, "grad_norm": 0.040596071630716324, "learning_rate": 5.751082770985791e-06, "loss": 0.0019, "step": 92920 }, { "epoch": 1.5205759633477869, "grad_norm": 0.074709951877594, "learning_rate": 5.7501416188507194e-06, "loss": 0.0016, "step": 92930 }, { "epoch": 1.5207395892988629, "grad_norm": 0.02531144767999649, "learning_rate": 5.749200439524816e-06, "loss": 0.0025, "step": 92940 }, { "epoch": 1.5209032152499387, "grad_norm": 0.0254683755338192, "learning_rate": 5.748259233042196e-06, "loss": 0.0026, "step": 92950 }, { "epoch": 1.5210668412010144, "grad_norm": 0.12783615291118622, "learning_rate": 5.747317999436979e-06, "loss": 0.0032, "step": 92960 }, { "epoch": 1.5212304671520904, "grad_norm": 0.06539308279752731, "learning_rate": 5.746376738743279e-06, "loss": 0.0013, "step": 92970 }, { "epoch": 1.5213940931031662, "grad_norm": 0.2535451054573059, "learning_rate": 5.745435450995217e-06, "loss": 0.0017, "step": 92980 }, { "epoch": 1.521557719054242, "grad_norm": 0.27654707431793213, "learning_rate": 5.744494136226911e-06, "loss": 0.0026, "step": 92990 }, { "epoch": 1.521721345005318, "grad_norm": 0.008708052337169647, "learning_rate": 5.743552794472483e-06, "loss": 0.0016, "step": 93000 }, { "epoch": 1.5218849709563935, "grad_norm": 0.15754805505275726, "learning_rate": 5.742611425766052e-06, "loss": 0.0023, "step": 93010 }, { "epoch": 1.5220485969074695, "grad_norm": 0.08248157799243927, "learning_rate": 5.741670030141742e-06, "loss": 0.0017, "step": 93020 }, { "epoch": 1.5222122228585455, "grad_norm": 0.11970769613981247, "learning_rate": 5.740728607633676e-06, "loss": 0.0025, "step": 93030 }, { "epoch": 1.522375848809621, "grad_norm": 0.06609880924224854, "learning_rate": 5.739787158275979e-06, "loss": 0.0019, "step": 93040 }, { "epoch": 1.522539474760697, "grad_norm": 0.28861188888549805, "learning_rate": 5.738845682102775e-06, "loss": 0.0035, "step": 93050 }, { "epoch": 1.5227031007117728, "grad_norm": 0.07973862439393997, "learning_rate": 5.737904179148189e-06, "loss": 0.002, "step": 93060 }, { "epoch": 1.5228667266628486, "grad_norm": 0.1383950263261795, "learning_rate": 5.736962649446352e-06, "loss": 0.0014, "step": 93070 }, { "epoch": 1.5230303526139246, "grad_norm": 0.13354241847991943, "learning_rate": 5.736021093031391e-06, "loss": 0.0027, "step": 93080 }, { "epoch": 1.5231939785650004, "grad_norm": 0.1700313240289688, "learning_rate": 5.735079509937434e-06, "loss": 0.0021, "step": 93090 }, { "epoch": 1.5233576045160762, "grad_norm": 0.13881738483905792, "learning_rate": 5.734137900198611e-06, "loss": 0.0028, "step": 93100 }, { "epoch": 1.5235212304671522, "grad_norm": 0.05037563666701317, "learning_rate": 5.733196263849052e-06, "loss": 0.0021, "step": 93110 }, { "epoch": 1.523684856418228, "grad_norm": 0.1296752691268921, "learning_rate": 5.732254600922893e-06, "loss": 0.0017, "step": 93120 }, { "epoch": 1.5238484823693037, "grad_norm": 0.18967126309871674, "learning_rate": 5.731312911454263e-06, "loss": 0.0017, "step": 93130 }, { "epoch": 1.5240121083203797, "grad_norm": 0.0447850376367569, "learning_rate": 5.730371195477298e-06, "loss": 0.0007, "step": 93140 }, { "epoch": 1.5241757342714555, "grad_norm": 0.09801087528467178, "learning_rate": 5.729429453026133e-06, "loss": 0.0037, "step": 93150 }, { "epoch": 1.5243393602225312, "grad_norm": 0.008364412933588028, "learning_rate": 5.728487684134904e-06, "loss": 0.0015, "step": 93160 }, { "epoch": 1.5245029861736072, "grad_norm": 0.09155111759901047, "learning_rate": 5.7275458888377455e-06, "loss": 0.0019, "step": 93170 }, { "epoch": 1.524666612124683, "grad_norm": 0.12245266884565353, "learning_rate": 5.726604067168799e-06, "loss": 0.0018, "step": 93180 }, { "epoch": 1.5248302380757588, "grad_norm": 0.033675309270620346, "learning_rate": 5.725662219162201e-06, "loss": 0.0021, "step": 93190 }, { "epoch": 1.5249938640268348, "grad_norm": 0.05388784036040306, "learning_rate": 5.72472034485209e-06, "loss": 0.0022, "step": 93200 }, { "epoch": 1.5251574899779103, "grad_norm": 0.150056853890419, "learning_rate": 5.723778444272609e-06, "loss": 0.0024, "step": 93210 }, { "epoch": 1.5253211159289863, "grad_norm": 0.11732979863882065, "learning_rate": 5.7228365174579e-06, "loss": 0.0018, "step": 93220 }, { "epoch": 1.5254847418800623, "grad_norm": 0.09717657417058945, "learning_rate": 5.721894564442105e-06, "loss": 0.0015, "step": 93230 }, { "epoch": 1.5256483678311379, "grad_norm": 0.0715896487236023, "learning_rate": 5.720952585259368e-06, "loss": 0.0025, "step": 93240 }, { "epoch": 1.5258119937822139, "grad_norm": 0.07088734209537506, "learning_rate": 5.720010579943829e-06, "loss": 0.001, "step": 93250 }, { "epoch": 1.5259756197332897, "grad_norm": 0.08547870814800262, "learning_rate": 5.719068548529639e-06, "loss": 0.0016, "step": 93260 }, { "epoch": 1.5261392456843654, "grad_norm": 0.02438625507056713, "learning_rate": 5.718126491050941e-06, "loss": 0.0011, "step": 93270 }, { "epoch": 1.5263028716354414, "grad_norm": 0.11504413932561874, "learning_rate": 5.717184407541886e-06, "loss": 0.0013, "step": 93280 }, { "epoch": 1.5264664975865172, "grad_norm": 0.061093397438526154, "learning_rate": 5.716242298036618e-06, "loss": 0.0012, "step": 93290 }, { "epoch": 1.526630123537593, "grad_norm": 0.13431446254253387, "learning_rate": 5.715300162569288e-06, "loss": 0.0023, "step": 93300 }, { "epoch": 1.526793749488669, "grad_norm": 0.27459803223609924, "learning_rate": 5.714358001174048e-06, "loss": 0.0034, "step": 93310 }, { "epoch": 1.5269573754397447, "grad_norm": 0.04602712392807007, "learning_rate": 5.713415813885046e-06, "loss": 0.0032, "step": 93320 }, { "epoch": 1.5271210013908205, "grad_norm": 0.16966798901557922, "learning_rate": 5.712473600736437e-06, "loss": 0.0078, "step": 93330 }, { "epoch": 1.5272846273418965, "grad_norm": 0.0796501561999321, "learning_rate": 5.71153136176237e-06, "loss": 0.0021, "step": 93340 }, { "epoch": 1.5274482532929723, "grad_norm": 0.21493178606033325, "learning_rate": 5.710589096997002e-06, "loss": 0.0021, "step": 93350 }, { "epoch": 1.527611879244048, "grad_norm": 0.03788410872220993, "learning_rate": 5.7096468064744885e-06, "loss": 0.0014, "step": 93360 }, { "epoch": 1.527775505195124, "grad_norm": 0.04838072508573532, "learning_rate": 5.708704490228983e-06, "loss": 0.0019, "step": 93370 }, { "epoch": 1.5279391311461998, "grad_norm": 0.06535517424345016, "learning_rate": 5.707762148294643e-06, "loss": 0.0019, "step": 93380 }, { "epoch": 1.5281027570972756, "grad_norm": 0.03749818727374077, "learning_rate": 5.706819780705627e-06, "loss": 0.0024, "step": 93390 }, { "epoch": 1.5282663830483516, "grad_norm": 0.22494404017925262, "learning_rate": 5.705877387496091e-06, "loss": 0.0015, "step": 93400 }, { "epoch": 1.5284300089994272, "grad_norm": 0.07843749970197678, "learning_rate": 5.704934968700197e-06, "loss": 0.0024, "step": 93410 }, { "epoch": 1.5285936349505032, "grad_norm": 0.07453209906816483, "learning_rate": 5.703992524352104e-06, "loss": 0.0022, "step": 93420 }, { "epoch": 1.5287572609015792, "grad_norm": 0.04251955822110176, "learning_rate": 5.703050054485976e-06, "loss": 0.0009, "step": 93430 }, { "epoch": 1.5289208868526547, "grad_norm": 0.06735698878765106, "learning_rate": 5.702107559135972e-06, "loss": 0.0016, "step": 93440 }, { "epoch": 1.5290845128037307, "grad_norm": 0.11309357732534409, "learning_rate": 5.701165038336256e-06, "loss": 0.0018, "step": 93450 }, { "epoch": 1.5292481387548065, "grad_norm": 0.16005238890647888, "learning_rate": 5.7002224921209935e-06, "loss": 0.003, "step": 93460 }, { "epoch": 1.5294117647058822, "grad_norm": 0.06462351977825165, "learning_rate": 5.699279920524348e-06, "loss": 0.0013, "step": 93470 }, { "epoch": 1.5295753906569582, "grad_norm": 0.012961739674210548, "learning_rate": 5.698337323580486e-06, "loss": 0.0015, "step": 93480 }, { "epoch": 1.529739016608034, "grad_norm": 0.025400031358003616, "learning_rate": 5.697394701323574e-06, "loss": 0.0026, "step": 93490 }, { "epoch": 1.5299026425591098, "grad_norm": 0.1740194410085678, "learning_rate": 5.696452053787781e-06, "loss": 0.0023, "step": 93500 }, { "epoch": 1.5300662685101858, "grad_norm": 0.12747587263584137, "learning_rate": 5.695509381007275e-06, "loss": 0.0012, "step": 93510 }, { "epoch": 1.5302298944612616, "grad_norm": 0.09946295619010925, "learning_rate": 5.694566683016225e-06, "loss": 0.0017, "step": 93520 }, { "epoch": 1.5303935204123373, "grad_norm": 0.17157401144504547, "learning_rate": 5.693623959848804e-06, "loss": 0.0022, "step": 93530 }, { "epoch": 1.5305571463634133, "grad_norm": 0.32592588663101196, "learning_rate": 5.692681211539179e-06, "loss": 0.0011, "step": 93540 }, { "epoch": 1.530720772314489, "grad_norm": 0.5229214429855347, "learning_rate": 5.691738438121527e-06, "loss": 0.0031, "step": 93550 }, { "epoch": 1.5308843982655649, "grad_norm": 0.09958718717098236, "learning_rate": 5.690795639630017e-06, "loss": 0.0032, "step": 93560 }, { "epoch": 1.5310480242166409, "grad_norm": 0.22845932841300964, "learning_rate": 5.689852816098825e-06, "loss": 0.0016, "step": 93570 }, { "epoch": 1.5312116501677167, "grad_norm": 0.12248405069112778, "learning_rate": 5.688909967562129e-06, "loss": 0.0013, "step": 93580 }, { "epoch": 1.5313752761187924, "grad_norm": 0.07610813528299332, "learning_rate": 5.687967094054102e-06, "loss": 0.0013, "step": 93590 }, { "epoch": 1.5315389020698684, "grad_norm": 0.20003165304660797, "learning_rate": 5.68702419560892e-06, "loss": 0.0025, "step": 93600 }, { "epoch": 1.531702528020944, "grad_norm": 0.05017508938908577, "learning_rate": 5.686081272260763e-06, "loss": 0.0029, "step": 93610 }, { "epoch": 1.53186615397202, "grad_norm": 0.14710979163646698, "learning_rate": 5.685138324043808e-06, "loss": 0.0013, "step": 93620 }, { "epoch": 1.532029779923096, "grad_norm": 0.6115508675575256, "learning_rate": 5.684195350992236e-06, "loss": 0.0025, "step": 93630 }, { "epoch": 1.5321934058741715, "grad_norm": 0.049535464495420456, "learning_rate": 5.683252353140227e-06, "loss": 0.0018, "step": 93640 }, { "epoch": 1.5323570318252475, "grad_norm": 0.1612945944070816, "learning_rate": 5.682309330521962e-06, "loss": 0.0016, "step": 93650 }, { "epoch": 1.5325206577763233, "grad_norm": 0.10504493117332458, "learning_rate": 5.681366283171624e-06, "loss": 0.0017, "step": 93660 }, { "epoch": 1.532684283727399, "grad_norm": 0.05319498851895332, "learning_rate": 5.680423211123396e-06, "loss": 0.0025, "step": 93670 }, { "epoch": 1.532847909678475, "grad_norm": 0.05620495229959488, "learning_rate": 5.679480114411463e-06, "loss": 0.0009, "step": 93680 }, { "epoch": 1.5330115356295508, "grad_norm": 0.05144511163234711, "learning_rate": 5.678536993070007e-06, "loss": 0.0016, "step": 93690 }, { "epoch": 1.5331751615806266, "grad_norm": 0.02680237591266632, "learning_rate": 5.677593847133216e-06, "loss": 0.0019, "step": 93700 }, { "epoch": 1.5333387875317026, "grad_norm": 0.07639974355697632, "learning_rate": 5.676650676635277e-06, "loss": 0.0014, "step": 93710 }, { "epoch": 1.5335024134827784, "grad_norm": 0.10937551409006119, "learning_rate": 5.675707481610377e-06, "loss": 0.0018, "step": 93720 }, { "epoch": 1.5336660394338542, "grad_norm": 0.050971146672964096, "learning_rate": 5.674764262092705e-06, "loss": 0.0027, "step": 93730 }, { "epoch": 1.5338296653849302, "grad_norm": 0.028386056423187256, "learning_rate": 5.673821018116452e-06, "loss": 0.0015, "step": 93740 }, { "epoch": 1.533993291336006, "grad_norm": 0.1227363646030426, "learning_rate": 5.672877749715804e-06, "loss": 0.0017, "step": 93750 }, { "epoch": 1.5341569172870817, "grad_norm": 0.1913052499294281, "learning_rate": 5.671934456924956e-06, "loss": 0.003, "step": 93760 }, { "epoch": 1.5343205432381577, "grad_norm": 0.11627969890832901, "learning_rate": 5.6709911397780994e-06, "loss": 0.0014, "step": 93770 }, { "epoch": 1.5344841691892332, "grad_norm": 0.19431929290294647, "learning_rate": 5.6700477983094236e-06, "loss": 0.0017, "step": 93780 }, { "epoch": 1.5346477951403092, "grad_norm": 0.15582622587680817, "learning_rate": 5.66910443255313e-06, "loss": 0.0019, "step": 93790 }, { "epoch": 1.5348114210913852, "grad_norm": 0.06606801599264145, "learning_rate": 5.6681610425434065e-06, "loss": 0.0015, "step": 93800 }, { "epoch": 1.5349750470424608, "grad_norm": 0.06744344532489777, "learning_rate": 5.6672176283144505e-06, "loss": 0.0029, "step": 93810 }, { "epoch": 1.5351386729935368, "grad_norm": 0.09633451700210571, "learning_rate": 5.66627418990046e-06, "loss": 0.0022, "step": 93820 }, { "epoch": 1.5353022989446126, "grad_norm": 0.09357360750436783, "learning_rate": 5.665330727335632e-06, "loss": 0.0023, "step": 93830 }, { "epoch": 1.5354659248956883, "grad_norm": 0.09555008262395859, "learning_rate": 5.664387240654163e-06, "loss": 0.0013, "step": 93840 }, { "epoch": 1.5356295508467643, "grad_norm": 0.3494846224784851, "learning_rate": 5.663443729890255e-06, "loss": 0.0017, "step": 93850 }, { "epoch": 1.53579317679784, "grad_norm": 0.10591018944978714, "learning_rate": 5.6625001950781044e-06, "loss": 0.0036, "step": 93860 }, { "epoch": 1.5359568027489159, "grad_norm": 0.017858639359474182, "learning_rate": 5.661556636251915e-06, "loss": 0.0013, "step": 93870 }, { "epoch": 1.5361204286999919, "grad_norm": 0.055446069687604904, "learning_rate": 5.660613053445887e-06, "loss": 0.001, "step": 93880 }, { "epoch": 1.5362840546510677, "grad_norm": 0.04519607126712799, "learning_rate": 5.6596694466942245e-06, "loss": 0.0033, "step": 93890 }, { "epoch": 1.5364476806021434, "grad_norm": 0.03166491538286209, "learning_rate": 5.6587258160311295e-06, "loss": 0.0014, "step": 93900 }, { "epoch": 1.5366113065532194, "grad_norm": 0.09406087547540665, "learning_rate": 5.657782161490806e-06, "loss": 0.0017, "step": 93910 }, { "epoch": 1.5367749325042952, "grad_norm": 0.07638242840766907, "learning_rate": 5.656838483107461e-06, "loss": 0.0015, "step": 93920 }, { "epoch": 1.536938558455371, "grad_norm": 0.13121093809604645, "learning_rate": 5.6558947809153e-06, "loss": 0.0117, "step": 93930 }, { "epoch": 1.537102184406447, "grad_norm": 0.1247217059135437, "learning_rate": 5.65495105494853e-06, "loss": 0.0009, "step": 93940 }, { "epoch": 1.5372658103575227, "grad_norm": 0.009813793003559113, "learning_rate": 5.654007305241358e-06, "loss": 0.0015, "step": 93950 }, { "epoch": 1.5374294363085985, "grad_norm": 0.11754433065652847, "learning_rate": 5.653063531827993e-06, "loss": 0.0012, "step": 93960 }, { "epoch": 1.5375930622596745, "grad_norm": 0.055549997836351395, "learning_rate": 5.652119734742642e-06, "loss": 0.001, "step": 93970 }, { "epoch": 1.53775668821075, "grad_norm": 0.12981769442558289, "learning_rate": 5.6511759140195214e-06, "loss": 0.002, "step": 93980 }, { "epoch": 1.537920314161826, "grad_norm": 0.08106281608343124, "learning_rate": 5.650232069692838e-06, "loss": 0.0023, "step": 93990 }, { "epoch": 1.538083940112902, "grad_norm": 0.33574244379997253, "learning_rate": 5.649288201796804e-06, "loss": 0.0026, "step": 94000 }, { "epoch": 1.5382475660639776, "grad_norm": 0.1332085132598877, "learning_rate": 5.648344310365634e-06, "loss": 0.0026, "step": 94010 }, { "epoch": 1.5384111920150536, "grad_norm": 0.16106055676937103, "learning_rate": 5.647400395433541e-06, "loss": 0.0033, "step": 94020 }, { "epoch": 1.5385748179661294, "grad_norm": 0.13626396656036377, "learning_rate": 5.646456457034739e-06, "loss": 0.003, "step": 94030 }, { "epoch": 1.5387384439172052, "grad_norm": 0.10682516545057297, "learning_rate": 5.645512495203446e-06, "loss": 0.002, "step": 94040 }, { "epoch": 1.5389020698682812, "grad_norm": 0.06553791463375092, "learning_rate": 5.644568509973874e-06, "loss": 0.0014, "step": 94050 }, { "epoch": 1.539065695819357, "grad_norm": 0.16887657344341278, "learning_rate": 5.6436245013802436e-06, "loss": 0.0038, "step": 94060 }, { "epoch": 1.5392293217704327, "grad_norm": 0.10290270298719406, "learning_rate": 5.642680469456771e-06, "loss": 0.002, "step": 94070 }, { "epoch": 1.5393929477215087, "grad_norm": 0.09996610134840012, "learning_rate": 5.641736414237678e-06, "loss": 0.002, "step": 94080 }, { "epoch": 1.5395565736725845, "grad_norm": 0.04237337037920952, "learning_rate": 5.640792335757182e-06, "loss": 0.0028, "step": 94090 }, { "epoch": 1.5397201996236602, "grad_norm": 0.18140679597854614, "learning_rate": 5.639848234049504e-06, "loss": 0.0021, "step": 94100 }, { "epoch": 1.5398838255747362, "grad_norm": 0.24077308177947998, "learning_rate": 5.638904109148865e-06, "loss": 0.002, "step": 94110 }, { "epoch": 1.540047451525812, "grad_norm": 0.07921037077903748, "learning_rate": 5.637959961089487e-06, "loss": 0.0017, "step": 94120 }, { "epoch": 1.5402110774768878, "grad_norm": 0.07384908944368362, "learning_rate": 5.637015789905593e-06, "loss": 0.0017, "step": 94130 }, { "epoch": 1.5403747034279638, "grad_norm": 0.2995862364768982, "learning_rate": 5.636071595631409e-06, "loss": 0.0024, "step": 94140 }, { "epoch": 1.5405383293790396, "grad_norm": 0.06803367286920547, "learning_rate": 5.63512737830116e-06, "loss": 0.001, "step": 94150 }, { "epoch": 1.5407019553301153, "grad_norm": 0.08918260037899017, "learning_rate": 5.634183137949068e-06, "loss": 0.003, "step": 94160 }, { "epoch": 1.5408655812811913, "grad_norm": 0.10834448784589767, "learning_rate": 5.633238874609361e-06, "loss": 0.0018, "step": 94170 }, { "epoch": 1.5410292072322669, "grad_norm": 0.057486191391944885, "learning_rate": 5.632294588316267e-06, "loss": 0.0025, "step": 94180 }, { "epoch": 1.5411928331833429, "grad_norm": 0.10096415132284164, "learning_rate": 5.6313502791040145e-06, "loss": 0.0028, "step": 94190 }, { "epoch": 1.5413564591344189, "grad_norm": 0.11893796920776367, "learning_rate": 5.630405947006832e-06, "loss": 0.0015, "step": 94200 }, { "epoch": 1.5415200850854944, "grad_norm": 0.06593409180641174, "learning_rate": 5.629461592058948e-06, "loss": 0.0014, "step": 94210 }, { "epoch": 1.5416837110365704, "grad_norm": 0.021921513602137566, "learning_rate": 5.628517214294594e-06, "loss": 0.0016, "step": 94220 }, { "epoch": 1.5418473369876462, "grad_norm": 0.05870698392391205, "learning_rate": 5.6275728137480034e-06, "loss": 0.001, "step": 94230 }, { "epoch": 1.542010962938722, "grad_norm": 0.13184301555156708, "learning_rate": 5.626628390453406e-06, "loss": 0.0015, "step": 94240 }, { "epoch": 1.542174588889798, "grad_norm": 0.06875904649496078, "learning_rate": 5.625683944445035e-06, "loss": 0.0012, "step": 94250 }, { "epoch": 1.5423382148408737, "grad_norm": 0.06648250669240952, "learning_rate": 5.624739475757125e-06, "loss": 0.0021, "step": 94260 }, { "epoch": 1.5425018407919495, "grad_norm": 0.06631302088499069, "learning_rate": 5.623794984423909e-06, "loss": 0.0017, "step": 94270 }, { "epoch": 1.5426654667430255, "grad_norm": 0.06341740489006042, "learning_rate": 5.622850470479626e-06, "loss": 0.002, "step": 94280 }, { "epoch": 1.5428290926941013, "grad_norm": 0.06753361225128174, "learning_rate": 5.6219059339585115e-06, "loss": 0.002, "step": 94290 }, { "epoch": 1.542992718645177, "grad_norm": 0.06312259286642075, "learning_rate": 5.6209613748948e-06, "loss": 0.0023, "step": 94300 }, { "epoch": 1.543156344596253, "grad_norm": 0.08323318511247635, "learning_rate": 5.62001679332273e-06, "loss": 0.0016, "step": 94310 }, { "epoch": 1.5433199705473288, "grad_norm": 0.24576202034950256, "learning_rate": 5.6190721892765435e-06, "loss": 0.0015, "step": 94320 }, { "epoch": 1.5434835964984046, "grad_norm": 0.14893104135990143, "learning_rate": 5.618127562790476e-06, "loss": 0.0017, "step": 94330 }, { "epoch": 1.5436472224494806, "grad_norm": 0.06032591685652733, "learning_rate": 5.617182913898772e-06, "loss": 0.0016, "step": 94340 }, { "epoch": 1.5438108484005564, "grad_norm": 0.1430543214082718, "learning_rate": 5.6162382426356696e-06, "loss": 0.0016, "step": 94350 }, { "epoch": 1.5439744743516322, "grad_norm": 0.01067027822136879, "learning_rate": 5.615293549035412e-06, "loss": 0.0013, "step": 94360 }, { "epoch": 1.5441381003027081, "grad_norm": 0.07077572494745255, "learning_rate": 5.614348833132242e-06, "loss": 0.002, "step": 94370 }, { "epoch": 1.5443017262537837, "grad_norm": 0.11045695841312408, "learning_rate": 5.613404094960404e-06, "loss": 0.0023, "step": 94380 }, { "epoch": 1.5444653522048597, "grad_norm": 0.02371237799525261, "learning_rate": 5.612459334554143e-06, "loss": 0.0009, "step": 94390 }, { "epoch": 1.5446289781559357, "grad_norm": 0.1202223002910614, "learning_rate": 5.611514551947701e-06, "loss": 0.0023, "step": 94400 }, { "epoch": 1.5447926041070112, "grad_norm": 0.08319107443094254, "learning_rate": 5.610569747175325e-06, "loss": 0.0032, "step": 94410 }, { "epoch": 1.5449562300580872, "grad_norm": 0.06446091085672379, "learning_rate": 5.609624920271265e-06, "loss": 0.0013, "step": 94420 }, { "epoch": 1.545119856009163, "grad_norm": 0.10032033920288086, "learning_rate": 5.608680071269768e-06, "loss": 0.0026, "step": 94430 }, { "epoch": 1.5452834819602388, "grad_norm": 0.19656500220298767, "learning_rate": 5.60773520020508e-06, "loss": 0.0024, "step": 94440 }, { "epoch": 1.5454471079113148, "grad_norm": 0.04258721321821213, "learning_rate": 5.606790307111452e-06, "loss": 0.0023, "step": 94450 }, { "epoch": 1.5456107338623906, "grad_norm": 0.4815067946910858, "learning_rate": 5.605845392023133e-06, "loss": 0.0021, "step": 94460 }, { "epoch": 1.5457743598134663, "grad_norm": 0.10441657155752182, "learning_rate": 5.6049004549743755e-06, "loss": 0.0022, "step": 94470 }, { "epoch": 1.5459379857645423, "grad_norm": 0.2149830162525177, "learning_rate": 5.60395549599943e-06, "loss": 0.0025, "step": 94480 }, { "epoch": 1.546101611715618, "grad_norm": 0.11567191779613495, "learning_rate": 5.6030105151325485e-06, "loss": 0.0022, "step": 94490 }, { "epoch": 1.5462652376666939, "grad_norm": 0.25759270787239075, "learning_rate": 5.602065512407987e-06, "loss": 0.0027, "step": 94500 }, { "epoch": 1.5464288636177699, "grad_norm": 0.13471858203411102, "learning_rate": 5.601120487859997e-06, "loss": 0.0027, "step": 94510 }, { "epoch": 1.5465924895688457, "grad_norm": 0.06561803817749023, "learning_rate": 5.600175441522832e-06, "loss": 0.0016, "step": 94520 }, { "epoch": 1.5467561155199214, "grad_norm": 0.0779205858707428, "learning_rate": 5.5992303734307515e-06, "loss": 0.0008, "step": 94530 }, { "epoch": 1.5469197414709974, "grad_norm": 0.1218620166182518, "learning_rate": 5.5982852836180125e-06, "loss": 0.001, "step": 94540 }, { "epoch": 1.547083367422073, "grad_norm": 0.13607439398765564, "learning_rate": 5.597340172118866e-06, "loss": 0.0017, "step": 94550 }, { "epoch": 1.547246993373149, "grad_norm": 0.10613955557346344, "learning_rate": 5.596395038967576e-06, "loss": 0.0021, "step": 94560 }, { "epoch": 1.547410619324225, "grad_norm": 0.028159035369753838, "learning_rate": 5.595449884198399e-06, "loss": 0.0023, "step": 94570 }, { "epoch": 1.5475742452753005, "grad_norm": 0.15041640400886536, "learning_rate": 5.594504707845594e-06, "loss": 0.0015, "step": 94580 }, { "epoch": 1.5477378712263765, "grad_norm": 0.12473514676094055, "learning_rate": 5.593559509943423e-06, "loss": 0.0016, "step": 94590 }, { "epoch": 1.5479014971774523, "grad_norm": 0.08697626739740372, "learning_rate": 5.592614290526146e-06, "loss": 0.0015, "step": 94600 }, { "epoch": 1.548065123128528, "grad_norm": 0.3288215398788452, "learning_rate": 5.591669049628026e-06, "loss": 0.0025, "step": 94610 }, { "epoch": 1.548228749079604, "grad_norm": 0.18107466399669647, "learning_rate": 5.5907237872833246e-06, "loss": 0.0016, "step": 94620 }, { "epoch": 1.5483923750306798, "grad_norm": 0.3711811900138855, "learning_rate": 5.589778503526304e-06, "loss": 0.0044, "step": 94630 }, { "epoch": 1.5485560009817556, "grad_norm": 0.061258625239133835, "learning_rate": 5.588833198391233e-06, "loss": 0.0013, "step": 94640 }, { "epoch": 1.5487196269328316, "grad_norm": 0.06625762581825256, "learning_rate": 5.587887871912372e-06, "loss": 0.0014, "step": 94650 }, { "epoch": 1.5488832528839074, "grad_norm": 0.14573733508586884, "learning_rate": 5.586942524123988e-06, "loss": 0.0021, "step": 94660 }, { "epoch": 1.5490468788349832, "grad_norm": 0.057698752731084824, "learning_rate": 5.58599715506035e-06, "loss": 0.0007, "step": 94670 }, { "epoch": 1.5492105047860592, "grad_norm": 0.018713563680648804, "learning_rate": 5.585051764755722e-06, "loss": 0.0014, "step": 94680 }, { "epoch": 1.549374130737135, "grad_norm": 0.04081779718399048, "learning_rate": 5.584106353244374e-06, "loss": 0.0018, "step": 94690 }, { "epoch": 1.5495377566882107, "grad_norm": 0.09372851252555847, "learning_rate": 5.583160920560576e-06, "loss": 0.0018, "step": 94700 }, { "epoch": 1.5497013826392867, "grad_norm": 0.08448544144630432, "learning_rate": 5.582215466738594e-06, "loss": 0.0008, "step": 94710 }, { "epoch": 1.5498650085903625, "grad_norm": 0.06745804101228714, "learning_rate": 5.581269991812702e-06, "loss": 0.0011, "step": 94720 }, { "epoch": 1.5500286345414382, "grad_norm": 0.01718783751130104, "learning_rate": 5.58032449581717e-06, "loss": 0.0015, "step": 94730 }, { "epoch": 1.5501922604925142, "grad_norm": 0.10950352251529694, "learning_rate": 5.5793789787862694e-06, "loss": 0.001, "step": 94740 }, { "epoch": 1.5503558864435898, "grad_norm": 0.08597629517316818, "learning_rate": 5.578433440754274e-06, "loss": 0.0018, "step": 94750 }, { "epoch": 1.5505195123946658, "grad_norm": 0.042926348745822906, "learning_rate": 5.577487881755456e-06, "loss": 0.0011, "step": 94760 }, { "epoch": 1.5506831383457418, "grad_norm": 0.11701756715774536, "learning_rate": 5.5765423018240884e-06, "loss": 0.0012, "step": 94770 }, { "epoch": 1.5508467642968173, "grad_norm": 0.19784662127494812, "learning_rate": 5.57559670099445e-06, "loss": 0.0026, "step": 94780 }, { "epoch": 1.5510103902478933, "grad_norm": 0.22977997362613678, "learning_rate": 5.5746510793008156e-06, "loss": 0.0015, "step": 94790 }, { "epoch": 1.551174016198969, "grad_norm": 0.09358534216880798, "learning_rate": 5.573705436777459e-06, "loss": 0.0028, "step": 94800 }, { "epoch": 1.5513376421500449, "grad_norm": 0.17664788663387299, "learning_rate": 5.572759773458661e-06, "loss": 0.0016, "step": 94810 }, { "epoch": 1.5515012681011209, "grad_norm": 0.06913845986127853, "learning_rate": 5.571814089378695e-06, "loss": 0.0011, "step": 94820 }, { "epoch": 1.5516648940521967, "grad_norm": 0.07980062067508698, "learning_rate": 5.570868384571845e-06, "loss": 0.002, "step": 94830 }, { "epoch": 1.5518285200032724, "grad_norm": 0.011418584734201431, "learning_rate": 5.5699226590723865e-06, "loss": 0.0017, "step": 94840 }, { "epoch": 1.5519921459543484, "grad_norm": 0.10197842866182327, "learning_rate": 5.568976912914602e-06, "loss": 0.002, "step": 94850 }, { "epoch": 1.5521557719054242, "grad_norm": 0.048731040209531784, "learning_rate": 5.5680311461327716e-06, "loss": 0.0018, "step": 94860 }, { "epoch": 1.5523193978565, "grad_norm": 0.10829249769449234, "learning_rate": 5.567085358761179e-06, "loss": 0.0025, "step": 94870 }, { "epoch": 1.552483023807576, "grad_norm": 0.05648889020085335, "learning_rate": 5.5661395508341035e-06, "loss": 0.002, "step": 94880 }, { "epoch": 1.5526466497586517, "grad_norm": 0.07775375247001648, "learning_rate": 5.565193722385831e-06, "loss": 0.0011, "step": 94890 }, { "epoch": 1.5528102757097275, "grad_norm": 0.2735661268234253, "learning_rate": 5.564247873450643e-06, "loss": 0.0034, "step": 94900 }, { "epoch": 1.5529739016608035, "grad_norm": 0.11515632271766663, "learning_rate": 5.563302004062826e-06, "loss": 0.0023, "step": 94910 }, { "epoch": 1.5531375276118793, "grad_norm": 0.2695034146308899, "learning_rate": 5.562356114256666e-06, "loss": 0.0023, "step": 94920 }, { "epoch": 1.553301153562955, "grad_norm": 0.13368797302246094, "learning_rate": 5.561410204066447e-06, "loss": 0.0012, "step": 94930 }, { "epoch": 1.553464779514031, "grad_norm": 0.07765199989080429, "learning_rate": 5.5604642735264605e-06, "loss": 0.001, "step": 94940 }, { "epoch": 1.5536284054651066, "grad_norm": 0.040681757032871246, "learning_rate": 5.559518322670989e-06, "loss": 0.0017, "step": 94950 }, { "epoch": 1.5537920314161826, "grad_norm": 0.09414543956518173, "learning_rate": 5.558572351534323e-06, "loss": 0.0013, "step": 94960 }, { "epoch": 1.5539556573672586, "grad_norm": 0.13122491538524628, "learning_rate": 5.557626360150752e-06, "loss": 0.0013, "step": 94970 }, { "epoch": 1.5541192833183342, "grad_norm": 0.030454393476247787, "learning_rate": 5.556680348554566e-06, "loss": 0.0017, "step": 94980 }, { "epoch": 1.5542829092694102, "grad_norm": 0.06736773252487183, "learning_rate": 5.555734316780055e-06, "loss": 0.0027, "step": 94990 }, { "epoch": 1.554446535220486, "grad_norm": 0.05552549287676811, "learning_rate": 5.55478826486151e-06, "loss": 0.0014, "step": 95000 }, { "epoch": 1.5546101611715617, "grad_norm": 0.05441640689969063, "learning_rate": 5.553842192833225e-06, "loss": 0.0021, "step": 95010 }, { "epoch": 1.5547737871226377, "grad_norm": 0.13764795660972595, "learning_rate": 5.5528961007294915e-06, "loss": 0.0014, "step": 95020 }, { "epoch": 1.5549374130737135, "grad_norm": 0.1312810778617859, "learning_rate": 5.5519499885846025e-06, "loss": 0.0019, "step": 95030 }, { "epoch": 1.5551010390247892, "grad_norm": 0.12424931675195694, "learning_rate": 5.551003856432853e-06, "loss": 0.0011, "step": 95040 }, { "epoch": 1.5552646649758652, "grad_norm": 0.3694095015525818, "learning_rate": 5.550057704308538e-06, "loss": 0.0021, "step": 95050 }, { "epoch": 1.555428290926941, "grad_norm": 0.758162260055542, "learning_rate": 5.5491115322459545e-06, "loss": 0.0015, "step": 95060 }, { "epoch": 1.5555919168780168, "grad_norm": 0.07636237889528275, "learning_rate": 5.548165340279397e-06, "loss": 0.0014, "step": 95070 }, { "epoch": 1.5557555428290928, "grad_norm": 0.41262513399124146, "learning_rate": 5.547219128443162e-06, "loss": 0.0037, "step": 95080 }, { "epoch": 1.5559191687801686, "grad_norm": 0.2304043471813202, "learning_rate": 5.546272896771552e-06, "loss": 0.0016, "step": 95090 }, { "epoch": 1.5560827947312443, "grad_norm": 0.05563835799694061, "learning_rate": 5.5453266452988596e-06, "loss": 0.0019, "step": 95100 }, { "epoch": 1.5562464206823203, "grad_norm": 0.07931830734014511, "learning_rate": 5.544380374059388e-06, "loss": 0.0017, "step": 95110 }, { "epoch": 1.556410046633396, "grad_norm": 0.09851199388504028, "learning_rate": 5.543434083087434e-06, "loss": 0.0018, "step": 95120 }, { "epoch": 1.5565736725844719, "grad_norm": 0.2715357840061188, "learning_rate": 5.5424877724173035e-06, "loss": 0.0018, "step": 95130 }, { "epoch": 1.5567372985355479, "grad_norm": 0.18828217685222626, "learning_rate": 5.541541442083295e-06, "loss": 0.0006, "step": 95140 }, { "epoch": 1.5569009244866234, "grad_norm": 0.06957677006721497, "learning_rate": 5.540595092119709e-06, "loss": 0.0011, "step": 95150 }, { "epoch": 1.5570645504376994, "grad_norm": 0.400738000869751, "learning_rate": 5.539648722560852e-06, "loss": 0.0015, "step": 95160 }, { "epoch": 1.5572281763887754, "grad_norm": 0.2115340381860733, "learning_rate": 5.538702333441025e-06, "loss": 0.0026, "step": 95170 }, { "epoch": 1.557391802339851, "grad_norm": 0.020447999238967896, "learning_rate": 5.537755924794533e-06, "loss": 0.0017, "step": 95180 }, { "epoch": 1.557555428290927, "grad_norm": 0.06256935000419617, "learning_rate": 5.53680949665568e-06, "loss": 0.0014, "step": 95190 }, { "epoch": 1.5577190542420027, "grad_norm": 0.14920572936534882, "learning_rate": 5.535863049058775e-06, "loss": 0.0013, "step": 95200 }, { "epoch": 1.5578826801930785, "grad_norm": 0.060673076659440994, "learning_rate": 5.534916582038122e-06, "loss": 0.0017, "step": 95210 }, { "epoch": 1.5580463061441545, "grad_norm": 0.08463137596845627, "learning_rate": 5.533970095628029e-06, "loss": 0.002, "step": 95220 }, { "epoch": 1.5582099320952303, "grad_norm": 0.0693800151348114, "learning_rate": 5.5330235898628014e-06, "loss": 0.0017, "step": 95230 }, { "epoch": 1.558373558046306, "grad_norm": 0.1274455040693283, "learning_rate": 5.532077064776752e-06, "loss": 0.0018, "step": 95240 }, { "epoch": 1.558537183997382, "grad_norm": 0.05534921959042549, "learning_rate": 5.531130520404186e-06, "loss": 0.001, "step": 95250 }, { "epoch": 1.5587008099484578, "grad_norm": 0.13551075756549835, "learning_rate": 5.530183956779416e-06, "loss": 0.0015, "step": 95260 }, { "epoch": 1.5588644358995336, "grad_norm": 0.07281338423490524, "learning_rate": 5.529237373936751e-06, "loss": 0.0019, "step": 95270 }, { "epoch": 1.5590280618506096, "grad_norm": 0.15223409235477448, "learning_rate": 5.528290771910504e-06, "loss": 0.002, "step": 95280 }, { "epoch": 1.5591916878016854, "grad_norm": 0.14520815014839172, "learning_rate": 5.527344150734988e-06, "loss": 0.0011, "step": 95290 }, { "epoch": 1.5593553137527612, "grad_norm": 0.06700566411018372, "learning_rate": 5.526397510444511e-06, "loss": 0.0021, "step": 95300 }, { "epoch": 1.5595189397038371, "grad_norm": 0.07925879210233688, "learning_rate": 5.525450851073391e-06, "loss": 0.0018, "step": 95310 }, { "epoch": 1.559682565654913, "grad_norm": 0.10962279140949249, "learning_rate": 5.52450417265594e-06, "loss": 0.0012, "step": 95320 }, { "epoch": 1.5598461916059887, "grad_norm": 0.19689267873764038, "learning_rate": 5.523557475226473e-06, "loss": 0.0022, "step": 95330 }, { "epoch": 1.5600098175570647, "grad_norm": 0.09281846135854721, "learning_rate": 5.522610758819307e-06, "loss": 0.0019, "step": 95340 }, { "epoch": 1.5601734435081402, "grad_norm": 0.09342874586582184, "learning_rate": 5.521664023468756e-06, "loss": 0.0034, "step": 95350 }, { "epoch": 1.5603370694592162, "grad_norm": 0.279384970664978, "learning_rate": 5.5207172692091386e-06, "loss": 0.0019, "step": 95360 }, { "epoch": 1.5605006954102922, "grad_norm": 0.01354216132313013, "learning_rate": 5.519770496074771e-06, "loss": 0.0019, "step": 95370 }, { "epoch": 1.5606643213613678, "grad_norm": 0.08631009608507156, "learning_rate": 5.518823704099973e-06, "loss": 0.0021, "step": 95380 }, { "epoch": 1.5608279473124438, "grad_norm": 0.04090086370706558, "learning_rate": 5.517876893319063e-06, "loss": 0.0008, "step": 95390 }, { "epoch": 1.5609915732635196, "grad_norm": 0.0927191749215126, "learning_rate": 5.516930063766358e-06, "loss": 0.0011, "step": 95400 }, { "epoch": 1.5611551992145953, "grad_norm": 0.027318453416228294, "learning_rate": 5.515983215476182e-06, "loss": 0.0024, "step": 95410 }, { "epoch": 1.5613188251656713, "grad_norm": 0.15403422713279724, "learning_rate": 5.515036348482855e-06, "loss": 0.002, "step": 95420 }, { "epoch": 1.561482451116747, "grad_norm": 0.17846344411373138, "learning_rate": 5.514089462820697e-06, "loss": 0.0018, "step": 95430 }, { "epoch": 1.5616460770678229, "grad_norm": 0.21921469271183014, "learning_rate": 5.513142558524034e-06, "loss": 0.0018, "step": 95440 }, { "epoch": 1.5618097030188989, "grad_norm": 0.016745688393712044, "learning_rate": 5.512195635627185e-06, "loss": 0.0015, "step": 95450 }, { "epoch": 1.5619733289699747, "grad_norm": 0.017428111284971237, "learning_rate": 5.511248694164475e-06, "loss": 0.0012, "step": 95460 }, { "epoch": 1.5621369549210504, "grad_norm": 0.0534135177731514, "learning_rate": 5.510301734170229e-06, "loss": 0.0012, "step": 95470 }, { "epoch": 1.5623005808721264, "grad_norm": 0.10955236107110977, "learning_rate": 5.509354755678771e-06, "loss": 0.0015, "step": 95480 }, { "epoch": 1.5624642068232022, "grad_norm": 0.13564962148666382, "learning_rate": 5.5084077587244275e-06, "loss": 0.002, "step": 95490 }, { "epoch": 1.562627832774278, "grad_norm": 0.031119294464588165, "learning_rate": 5.507460743341526e-06, "loss": 0.0021, "step": 95500 }, { "epoch": 1.562791458725354, "grad_norm": 0.09961502254009247, "learning_rate": 5.506513709564391e-06, "loss": 0.0028, "step": 95510 }, { "epoch": 1.5629550846764295, "grad_norm": 0.009616827592253685, "learning_rate": 5.505566657427352e-06, "loss": 0.0013, "step": 95520 }, { "epoch": 1.5631187106275055, "grad_norm": 0.015020889230072498, "learning_rate": 5.504619586964735e-06, "loss": 0.0046, "step": 95530 }, { "epoch": 1.5632823365785815, "grad_norm": 0.09533273428678513, "learning_rate": 5.5036724982108726e-06, "loss": 0.0018, "step": 95540 }, { "epoch": 1.563445962529657, "grad_norm": 0.0701201781630516, "learning_rate": 5.502725391200092e-06, "loss": 0.0015, "step": 95550 }, { "epoch": 1.563609588480733, "grad_norm": 0.1311163306236267, "learning_rate": 5.5017782659667244e-06, "loss": 0.0019, "step": 95560 }, { "epoch": 1.5637732144318088, "grad_norm": 0.13136912882328033, "learning_rate": 5.5008311225451e-06, "loss": 0.0011, "step": 95570 }, { "epoch": 1.5639368403828846, "grad_norm": 0.08102806657552719, "learning_rate": 5.499883960969552e-06, "loss": 0.0009, "step": 95580 }, { "epoch": 1.5641004663339606, "grad_norm": 0.04429985582828522, "learning_rate": 5.498936781274414e-06, "loss": 0.0015, "step": 95590 }, { "epoch": 1.5642640922850364, "grad_norm": 0.062323760241270065, "learning_rate": 5.497989583494013e-06, "loss": 0.0026, "step": 95600 }, { "epoch": 1.5644277182361122, "grad_norm": 0.12206123024225235, "learning_rate": 5.497042367662688e-06, "loss": 0.0009, "step": 95610 }, { "epoch": 1.5645913441871881, "grad_norm": 0.11871977895498276, "learning_rate": 5.496095133814772e-06, "loss": 0.0014, "step": 95620 }, { "epoch": 1.564754970138264, "grad_norm": 0.04354385659098625, "learning_rate": 5.495147881984599e-06, "loss": 0.0017, "step": 95630 }, { "epoch": 1.5649185960893397, "grad_norm": 0.1686330884695053, "learning_rate": 5.4942006122065074e-06, "loss": 0.0017, "step": 95640 }, { "epoch": 1.5650822220404157, "grad_norm": 0.09138898551464081, "learning_rate": 5.493253324514829e-06, "loss": 0.0021, "step": 95650 }, { "epoch": 1.5652458479914915, "grad_norm": 0.02709484100341797, "learning_rate": 5.492306018943903e-06, "loss": 0.0026, "step": 95660 }, { "epoch": 1.5654094739425672, "grad_norm": 0.10914696753025055, "learning_rate": 5.4913586955280685e-06, "loss": 0.0014, "step": 95670 }, { "epoch": 1.5655730998936432, "grad_norm": 0.019028667360544205, "learning_rate": 5.490411354301663e-06, "loss": 0.0017, "step": 95680 }, { "epoch": 1.565736725844719, "grad_norm": 0.28978875279426575, "learning_rate": 5.489463995299023e-06, "loss": 0.0028, "step": 95690 }, { "epoch": 1.5659003517957948, "grad_norm": 0.061479244381189346, "learning_rate": 5.488516618554489e-06, "loss": 0.0012, "step": 95700 }, { "epoch": 1.5660639777468708, "grad_norm": 0.14110243320465088, "learning_rate": 5.487569224102403e-06, "loss": 0.0017, "step": 95710 }, { "epoch": 1.5662276036979463, "grad_norm": 0.03730341047048569, "learning_rate": 5.486621811977103e-06, "loss": 0.0022, "step": 95720 }, { "epoch": 1.5663912296490223, "grad_norm": 0.15115883946418762, "learning_rate": 5.485674382212933e-06, "loss": 0.0025, "step": 95730 }, { "epoch": 1.5665548556000983, "grad_norm": 0.03132474049925804, "learning_rate": 5.484726934844236e-06, "loss": 0.001, "step": 95740 }, { "epoch": 1.5667184815511739, "grad_norm": 0.0572274886071682, "learning_rate": 5.483779469905349e-06, "loss": 0.0014, "step": 95750 }, { "epoch": 1.5668821075022499, "grad_norm": 0.07731965184211731, "learning_rate": 5.48283198743062e-06, "loss": 0.0013, "step": 95760 }, { "epoch": 1.5670457334533257, "grad_norm": 0.026288717985153198, "learning_rate": 5.481884487454392e-06, "loss": 0.0015, "step": 95770 }, { "epoch": 1.5672093594044014, "grad_norm": 0.16183966398239136, "learning_rate": 5.48093697001101e-06, "loss": 0.0025, "step": 95780 }, { "epoch": 1.5673729853554774, "grad_norm": 0.068525031208992, "learning_rate": 5.479989435134818e-06, "loss": 0.0022, "step": 95790 }, { "epoch": 1.5675366113065532, "grad_norm": 0.10348375886678696, "learning_rate": 5.479041882860164e-06, "loss": 0.0013, "step": 95800 }, { "epoch": 1.567700237257629, "grad_norm": 0.05617411434650421, "learning_rate": 5.4780943132213925e-06, "loss": 0.0017, "step": 95810 }, { "epoch": 1.567863863208705, "grad_norm": 0.3189500570297241, "learning_rate": 5.477146726252851e-06, "loss": 0.0019, "step": 95820 }, { "epoch": 1.5680274891597807, "grad_norm": 0.011146112345159054, "learning_rate": 5.476199121988888e-06, "loss": 0.0021, "step": 95830 }, { "epoch": 1.5681911151108565, "grad_norm": 0.06527738273143768, "learning_rate": 5.475251500463849e-06, "loss": 0.0014, "step": 95840 }, { "epoch": 1.5683547410619325, "grad_norm": 0.12300140410661697, "learning_rate": 5.474303861712088e-06, "loss": 0.0031, "step": 95850 }, { "epoch": 1.5685183670130083, "grad_norm": 0.08339247107505798, "learning_rate": 5.473356205767952e-06, "loss": 0.0025, "step": 95860 }, { "epoch": 1.568681992964084, "grad_norm": 0.05605313554406166, "learning_rate": 5.4724085326657894e-06, "loss": 0.0012, "step": 95870 }, { "epoch": 1.56884561891516, "grad_norm": 0.053953222930431366, "learning_rate": 5.471460842439954e-06, "loss": 0.0024, "step": 95880 }, { "epoch": 1.5690092448662358, "grad_norm": 0.07839611172676086, "learning_rate": 5.470513135124796e-06, "loss": 0.0012, "step": 95890 }, { "epoch": 1.5691728708173116, "grad_norm": 0.09375961869955063, "learning_rate": 5.469565410754668e-06, "loss": 0.0018, "step": 95900 }, { "epoch": 1.5693364967683876, "grad_norm": 0.10395301133394241, "learning_rate": 5.468617669363922e-06, "loss": 0.002, "step": 95910 }, { "epoch": 1.5695001227194632, "grad_norm": 0.18192291259765625, "learning_rate": 5.467669910986912e-06, "loss": 0.0016, "step": 95920 }, { "epoch": 1.5696637486705391, "grad_norm": 0.13954170048236847, "learning_rate": 5.466722135657991e-06, "loss": 0.002, "step": 95930 }, { "epoch": 1.5698273746216151, "grad_norm": 0.0689585879445076, "learning_rate": 5.465774343411515e-06, "loss": 0.002, "step": 95940 }, { "epoch": 1.5699910005726907, "grad_norm": 0.054865192621946335, "learning_rate": 5.46482653428184e-06, "loss": 0.0013, "step": 95950 }, { "epoch": 1.5701546265237667, "grad_norm": 0.22509394586086273, "learning_rate": 5.463878708303319e-06, "loss": 0.0021, "step": 95960 }, { "epoch": 1.5703182524748425, "grad_norm": 0.005309760104864836, "learning_rate": 5.462930865510309e-06, "loss": 0.0028, "step": 95970 }, { "epoch": 1.5704818784259182, "grad_norm": 0.11050279438495636, "learning_rate": 5.461983005937167e-06, "loss": 0.0021, "step": 95980 }, { "epoch": 1.5706455043769942, "grad_norm": 0.14140237867832184, "learning_rate": 5.4610351296182525e-06, "loss": 0.0008, "step": 95990 }, { "epoch": 1.57080913032807, "grad_norm": 0.1602981686592102, "learning_rate": 5.460087236587924e-06, "loss": 0.002, "step": 96000 }, { "epoch": 1.5709727562791458, "grad_norm": 0.8511959314346313, "learning_rate": 5.459139326880538e-06, "loss": 0.0032, "step": 96010 }, { "epoch": 1.5711363822302218, "grad_norm": 0.31806570291519165, "learning_rate": 5.458191400530453e-06, "loss": 0.0027, "step": 96020 }, { "epoch": 1.5713000081812976, "grad_norm": 0.05419665202498436, "learning_rate": 5.457243457572033e-06, "loss": 0.004, "step": 96030 }, { "epoch": 1.5714636341323733, "grad_norm": 0.01707945205271244, "learning_rate": 5.456295498039636e-06, "loss": 0.0022, "step": 96040 }, { "epoch": 1.5716272600834493, "grad_norm": 0.04216013103723526, "learning_rate": 5.455347521967624e-06, "loss": 0.001, "step": 96050 }, { "epoch": 1.571790886034525, "grad_norm": 0.09771130979061127, "learning_rate": 5.4543995293903595e-06, "loss": 0.001, "step": 96060 }, { "epoch": 1.5719545119856009, "grad_norm": 0.03456299379467964, "learning_rate": 5.453451520342204e-06, "loss": 0.0014, "step": 96070 }, { "epoch": 1.5721181379366769, "grad_norm": 0.22416889667510986, "learning_rate": 5.452503494857519e-06, "loss": 0.0033, "step": 96080 }, { "epoch": 1.5722817638877526, "grad_norm": 0.05665203183889389, "learning_rate": 5.451555452970671e-06, "loss": 0.0012, "step": 96090 }, { "epoch": 1.5724453898388284, "grad_norm": 0.018297085538506508, "learning_rate": 5.450607394716025e-06, "loss": 0.0009, "step": 96100 }, { "epoch": 1.5726090157899044, "grad_norm": 0.07933646440505981, "learning_rate": 5.449659320127942e-06, "loss": 0.0016, "step": 96110 }, { "epoch": 1.57277264174098, "grad_norm": 0.03373287245631218, "learning_rate": 5.448711229240789e-06, "loss": 0.0011, "step": 96120 }, { "epoch": 1.572936267692056, "grad_norm": 0.13468582928180695, "learning_rate": 5.447763122088933e-06, "loss": 0.003, "step": 96130 }, { "epoch": 1.573099893643132, "grad_norm": 0.09387189894914627, "learning_rate": 5.446814998706739e-06, "loss": 0.0016, "step": 96140 }, { "epoch": 1.5732635195942075, "grad_norm": 0.05871766805648804, "learning_rate": 5.445866859128578e-06, "loss": 0.0009, "step": 96150 }, { "epoch": 1.5734271455452835, "grad_norm": 0.035517919808626175, "learning_rate": 5.444918703388813e-06, "loss": 0.0025, "step": 96160 }, { "epoch": 1.5735907714963593, "grad_norm": 0.23661324381828308, "learning_rate": 5.443970531521814e-06, "loss": 0.0019, "step": 96170 }, { "epoch": 1.573754397447435, "grad_norm": 0.1736181229352951, "learning_rate": 5.443022343561951e-06, "loss": 0.0021, "step": 96180 }, { "epoch": 1.573918023398511, "grad_norm": 0.02907741256058216, "learning_rate": 5.442074139543593e-06, "loss": 0.0021, "step": 96190 }, { "epoch": 1.5740816493495868, "grad_norm": 0.1024564579129219, "learning_rate": 5.441125919501109e-06, "loss": 0.0025, "step": 96200 }, { "epoch": 1.5742452753006626, "grad_norm": 0.13733956217765808, "learning_rate": 5.440177683468871e-06, "loss": 0.0018, "step": 96210 }, { "epoch": 1.5744089012517386, "grad_norm": 0.0663224309682846, "learning_rate": 5.43922943148125e-06, "loss": 0.002, "step": 96220 }, { "epoch": 1.5745725272028144, "grad_norm": 0.07897910475730896, "learning_rate": 5.438281163572618e-06, "loss": 0.0015, "step": 96230 }, { "epoch": 1.5747361531538902, "grad_norm": 0.08347785472869873, "learning_rate": 5.437332879777346e-06, "loss": 0.0017, "step": 96240 }, { "epoch": 1.5748997791049661, "grad_norm": 0.2307664155960083, "learning_rate": 5.43638458012981e-06, "loss": 0.0018, "step": 96250 }, { "epoch": 1.575063405056042, "grad_norm": 0.009943345561623573, "learning_rate": 5.435436264664381e-06, "loss": 0.0016, "step": 96260 }, { "epoch": 1.5752270310071177, "grad_norm": 0.09883014112710953, "learning_rate": 5.434487933415434e-06, "loss": 0.0014, "step": 96270 }, { "epoch": 1.5753906569581937, "grad_norm": 0.25964921712875366, "learning_rate": 5.433539586417343e-06, "loss": 0.0017, "step": 96280 }, { "epoch": 1.5755542829092692, "grad_norm": 0.14060312509536743, "learning_rate": 5.432591223704484e-06, "loss": 0.0024, "step": 96290 }, { "epoch": 1.5757179088603452, "grad_norm": 0.13465207815170288, "learning_rate": 5.431642845311235e-06, "loss": 0.0024, "step": 96300 }, { "epoch": 1.5758815348114212, "grad_norm": 0.08285928517580032, "learning_rate": 5.4306944512719686e-06, "loss": 0.0014, "step": 96310 }, { "epoch": 1.5760451607624968, "grad_norm": 0.05067675560712814, "learning_rate": 5.4297460416210635e-06, "loss": 0.0045, "step": 96320 }, { "epoch": 1.5762087867135728, "grad_norm": 0.09514006227254868, "learning_rate": 5.4287976163928965e-06, "loss": 0.0008, "step": 96330 }, { "epoch": 1.5763724126646486, "grad_norm": 0.04533170163631439, "learning_rate": 5.427849175621848e-06, "loss": 0.0016, "step": 96340 }, { "epoch": 1.5765360386157243, "grad_norm": 0.03759074956178665, "learning_rate": 5.426900719342295e-06, "loss": 0.0016, "step": 96350 }, { "epoch": 1.5766996645668003, "grad_norm": 0.07338988035917282, "learning_rate": 5.425952247588616e-06, "loss": 0.0012, "step": 96360 }, { "epoch": 1.576863290517876, "grad_norm": 0.08183891326189041, "learning_rate": 5.425003760395192e-06, "loss": 0.0027, "step": 96370 }, { "epoch": 1.5770269164689519, "grad_norm": 0.1686449944972992, "learning_rate": 5.424055257796404e-06, "loss": 0.0035, "step": 96380 }, { "epoch": 1.5771905424200279, "grad_norm": 0.14980323612689972, "learning_rate": 5.423106739826631e-06, "loss": 0.0012, "step": 96390 }, { "epoch": 1.5773541683711036, "grad_norm": 0.1996951550245285, "learning_rate": 5.4221582065202575e-06, "loss": 0.0021, "step": 96400 }, { "epoch": 1.5775177943221794, "grad_norm": 0.29163432121276855, "learning_rate": 5.421209657911661e-06, "loss": 0.0024, "step": 96410 }, { "epoch": 1.5776814202732554, "grad_norm": 0.12521252036094666, "learning_rate": 5.420261094035228e-06, "loss": 0.0026, "step": 96420 }, { "epoch": 1.5778450462243312, "grad_norm": 0.013463077135384083, "learning_rate": 5.419312514925341e-06, "loss": 0.0046, "step": 96430 }, { "epoch": 1.578008672175407, "grad_norm": 0.048513565212488174, "learning_rate": 5.418363920616382e-06, "loss": 0.0021, "step": 96440 }, { "epoch": 1.578172298126483, "grad_norm": 0.00983465276658535, "learning_rate": 5.417415311142739e-06, "loss": 0.0016, "step": 96450 }, { "epoch": 1.5783359240775587, "grad_norm": 0.16053526103496552, "learning_rate": 5.416466686538792e-06, "loss": 0.0018, "step": 96460 }, { "epoch": 1.5784995500286345, "grad_norm": 0.03312075138092041, "learning_rate": 5.4155180468389265e-06, "loss": 0.0026, "step": 96470 }, { "epoch": 1.5786631759797105, "grad_norm": 0.10898065567016602, "learning_rate": 5.414569392077533e-06, "loss": 0.0016, "step": 96480 }, { "epoch": 1.578826801930786, "grad_norm": 0.1669439673423767, "learning_rate": 5.413620722288995e-06, "loss": 0.0026, "step": 96490 }, { "epoch": 1.578990427881862, "grad_norm": 0.07796142995357513, "learning_rate": 5.412672037507701e-06, "loss": 0.0011, "step": 96500 }, { "epoch": 1.579154053832938, "grad_norm": 0.031167810782790184, "learning_rate": 5.411723337768037e-06, "loss": 0.0014, "step": 96510 }, { "epoch": 1.5793176797840136, "grad_norm": 0.05776277929544449, "learning_rate": 5.410774623104391e-06, "loss": 0.0017, "step": 96520 }, { "epoch": 1.5794813057350896, "grad_norm": 0.0943605899810791, "learning_rate": 5.409825893551152e-06, "loss": 0.0016, "step": 96530 }, { "epoch": 1.5796449316861654, "grad_norm": 0.05084320157766342, "learning_rate": 5.40887714914271e-06, "loss": 0.0022, "step": 96540 }, { "epoch": 1.5798085576372412, "grad_norm": 0.13700053095817566, "learning_rate": 5.407928389913453e-06, "loss": 0.0018, "step": 96550 }, { "epoch": 1.5799721835883171, "grad_norm": 0.18050524592399597, "learning_rate": 5.406979615897773e-06, "loss": 0.0011, "step": 96560 }, { "epoch": 1.580135809539393, "grad_norm": 0.2934496998786926, "learning_rate": 5.40603082713006e-06, "loss": 0.0018, "step": 96570 }, { "epoch": 1.5802994354904687, "grad_norm": 0.056892964988946915, "learning_rate": 5.405082023644705e-06, "loss": 0.0012, "step": 96580 }, { "epoch": 1.5804630614415447, "grad_norm": 0.14180442690849304, "learning_rate": 5.404133205476101e-06, "loss": 0.0018, "step": 96590 }, { "epoch": 1.5806266873926205, "grad_norm": 0.16149818897247314, "learning_rate": 5.403184372658641e-06, "loss": 0.0008, "step": 96600 }, { "epoch": 1.5807903133436962, "grad_norm": 0.24570034444332123, "learning_rate": 5.402235525226713e-06, "loss": 0.0019, "step": 96610 }, { "epoch": 1.5809539392947722, "grad_norm": 0.07833485305309296, "learning_rate": 5.401286663214715e-06, "loss": 0.0017, "step": 96620 }, { "epoch": 1.581117565245848, "grad_norm": 0.08162520080804825, "learning_rate": 5.400337786657042e-06, "loss": 0.0011, "step": 96630 }, { "epoch": 1.5812811911969238, "grad_norm": 0.10021688044071198, "learning_rate": 5.399388895588084e-06, "loss": 0.0012, "step": 96640 }, { "epoch": 1.5814448171479998, "grad_norm": 0.0414818711578846, "learning_rate": 5.398439990042241e-06, "loss": 0.0014, "step": 96650 }, { "epoch": 1.5816084430990756, "grad_norm": 0.06889066100120544, "learning_rate": 5.397491070053904e-06, "loss": 0.0013, "step": 96660 }, { "epoch": 1.5817720690501513, "grad_norm": 0.21186897158622742, "learning_rate": 5.396542135657472e-06, "loss": 0.0016, "step": 96670 }, { "epoch": 1.5819356950012273, "grad_norm": 0.04048130661249161, "learning_rate": 5.39559318688734e-06, "loss": 0.0034, "step": 96680 }, { "epoch": 1.5820993209523029, "grad_norm": 0.17197616398334503, "learning_rate": 5.394644223777905e-06, "loss": 0.0033, "step": 96690 }, { "epoch": 1.5822629469033789, "grad_norm": 0.07203453034162521, "learning_rate": 5.393695246363567e-06, "loss": 0.0019, "step": 96700 }, { "epoch": 1.5824265728544549, "grad_norm": 0.05323415249586105, "learning_rate": 5.392746254678723e-06, "loss": 0.0015, "step": 96710 }, { "epoch": 1.5825901988055304, "grad_norm": 0.1038953885436058, "learning_rate": 5.39179724875777e-06, "loss": 0.0041, "step": 96720 }, { "epoch": 1.5827538247566064, "grad_norm": 0.04694046080112457, "learning_rate": 5.390848228635108e-06, "loss": 0.0014, "step": 96730 }, { "epoch": 1.5829174507076822, "grad_norm": 0.39353007078170776, "learning_rate": 5.389899194345137e-06, "loss": 0.0019, "step": 96740 }, { "epoch": 1.583081076658758, "grad_norm": 0.1378042846918106, "learning_rate": 5.388950145922258e-06, "loss": 0.0011, "step": 96750 }, { "epoch": 1.583244702609834, "grad_norm": 0.06501578539609909, "learning_rate": 5.388001083400871e-06, "loss": 0.0021, "step": 96760 }, { "epoch": 1.5834083285609097, "grad_norm": 0.11308283358812332, "learning_rate": 5.387052006815378e-06, "loss": 0.0013, "step": 96770 }, { "epoch": 1.5835719545119855, "grad_norm": 0.3216404318809509, "learning_rate": 5.386102916200178e-06, "loss": 0.001, "step": 96780 }, { "epoch": 1.5837355804630615, "grad_norm": 0.0115175386890769, "learning_rate": 5.385153811589677e-06, "loss": 0.0011, "step": 96790 }, { "epoch": 1.5838992064141373, "grad_norm": 0.12202366441488266, "learning_rate": 5.3842046930182765e-06, "loss": 0.0014, "step": 96800 }, { "epoch": 1.584062832365213, "grad_norm": 0.04255205765366554, "learning_rate": 5.3832555605203784e-06, "loss": 0.0014, "step": 96810 }, { "epoch": 1.584226458316289, "grad_norm": 0.44954219460487366, "learning_rate": 5.382306414130387e-06, "loss": 0.002, "step": 96820 }, { "epoch": 1.5843900842673648, "grad_norm": 0.09452875703573227, "learning_rate": 5.381357253882706e-06, "loss": 0.0008, "step": 96830 }, { "epoch": 1.5845537102184406, "grad_norm": 0.08879400044679642, "learning_rate": 5.380408079811742e-06, "loss": 0.0015, "step": 96840 }, { "epoch": 1.5847173361695166, "grad_norm": 0.02210010215640068, "learning_rate": 5.379458891951901e-06, "loss": 0.0016, "step": 96850 }, { "epoch": 1.5848809621205924, "grad_norm": 0.06366530805826187, "learning_rate": 5.3785096903375854e-06, "loss": 0.0007, "step": 96860 }, { "epoch": 1.5850445880716681, "grad_norm": 0.11315908282995224, "learning_rate": 5.377560475003204e-06, "loss": 0.0019, "step": 96870 }, { "epoch": 1.5852082140227441, "grad_norm": 0.13750480115413666, "learning_rate": 5.3766112459831625e-06, "loss": 0.001, "step": 96880 }, { "epoch": 1.5853718399738197, "grad_norm": 0.04556933417916298, "learning_rate": 5.375662003311869e-06, "loss": 0.0016, "step": 96890 }, { "epoch": 1.5855354659248957, "grad_norm": 0.0854511708021164, "learning_rate": 5.374712747023729e-06, "loss": 0.001, "step": 96900 }, { "epoch": 1.5856990918759717, "grad_norm": 0.09421778470277786, "learning_rate": 5.373763477153154e-06, "loss": 0.0019, "step": 96910 }, { "epoch": 1.5858627178270472, "grad_norm": 0.04897712916135788, "learning_rate": 5.372814193734551e-06, "loss": 0.001, "step": 96920 }, { "epoch": 1.5860263437781232, "grad_norm": 0.11712972074747086, "learning_rate": 5.37186489680233e-06, "loss": 0.0027, "step": 96930 }, { "epoch": 1.586189969729199, "grad_norm": 0.08070344477891922, "learning_rate": 5.3709155863909e-06, "loss": 0.0015, "step": 96940 }, { "epoch": 1.5863535956802748, "grad_norm": 0.13983389735221863, "learning_rate": 5.369966262534672e-06, "loss": 0.002, "step": 96950 }, { "epoch": 1.5865172216313508, "grad_norm": 0.10265609622001648, "learning_rate": 5.3690169252680555e-06, "loss": 0.001, "step": 96960 }, { "epoch": 1.5866808475824266, "grad_norm": 0.03303215280175209, "learning_rate": 5.368067574625461e-06, "loss": 0.0018, "step": 96970 }, { "epoch": 1.5868444735335023, "grad_norm": 0.05763198807835579, "learning_rate": 5.3671182106413035e-06, "loss": 0.0019, "step": 96980 }, { "epoch": 1.5870080994845783, "grad_norm": 0.3887484669685364, "learning_rate": 5.366168833349992e-06, "loss": 0.0052, "step": 96990 }, { "epoch": 1.587171725435654, "grad_norm": 0.10816752165555954, "learning_rate": 5.3652194427859414e-06, "loss": 0.0018, "step": 97000 }, { "epoch": 1.5873353513867299, "grad_norm": 0.12773962318897247, "learning_rate": 5.364270038983564e-06, "loss": 0.0014, "step": 97010 }, { "epoch": 1.5874989773378059, "grad_norm": 0.11422409862279892, "learning_rate": 5.363320621977273e-06, "loss": 0.0021, "step": 97020 }, { "epoch": 1.5876626032888816, "grad_norm": 0.17639519274234772, "learning_rate": 5.362371191801482e-06, "loss": 0.0018, "step": 97030 }, { "epoch": 1.5878262292399574, "grad_norm": 0.10923583805561066, "learning_rate": 5.361421748490606e-06, "loss": 0.0015, "step": 97040 }, { "epoch": 1.5879898551910334, "grad_norm": 0.032507553696632385, "learning_rate": 5.360472292079061e-06, "loss": 0.0013, "step": 97050 }, { "epoch": 1.5881534811421092, "grad_norm": 0.04594506695866585, "learning_rate": 5.3595228226012605e-06, "loss": 0.0019, "step": 97060 }, { "epoch": 1.588317107093185, "grad_norm": 0.10563760250806808, "learning_rate": 5.358573340091623e-06, "loss": 0.0017, "step": 97070 }, { "epoch": 1.588480733044261, "grad_norm": 0.026333678513765335, "learning_rate": 5.357623844584564e-06, "loss": 0.0018, "step": 97080 }, { "epoch": 1.5886443589953365, "grad_norm": 0.060836926102638245, "learning_rate": 5.356674336114499e-06, "loss": 0.0023, "step": 97090 }, { "epoch": 1.5888079849464125, "grad_norm": 0.05666762590408325, "learning_rate": 5.355724814715848e-06, "loss": 0.0015, "step": 97100 }, { "epoch": 1.5889716108974885, "grad_norm": 0.3019115924835205, "learning_rate": 5.354775280423026e-06, "loss": 0.0034, "step": 97110 }, { "epoch": 1.589135236848564, "grad_norm": 0.10878598690032959, "learning_rate": 5.353825733270454e-06, "loss": 0.0014, "step": 97120 }, { "epoch": 1.58929886279964, "grad_norm": 0.08829003572463989, "learning_rate": 5.352876173292548e-06, "loss": 0.0019, "step": 97130 }, { "epoch": 1.5894624887507158, "grad_norm": 0.19793982803821564, "learning_rate": 5.35192660052373e-06, "loss": 0.0022, "step": 97140 }, { "epoch": 1.5896261147017916, "grad_norm": 0.10565929859876633, "learning_rate": 5.35097701499842e-06, "loss": 0.0011, "step": 97150 }, { "epoch": 1.5897897406528676, "grad_norm": 0.10133195668458939, "learning_rate": 5.350027416751034e-06, "loss": 0.0017, "step": 97160 }, { "epoch": 1.5899533666039434, "grad_norm": 0.05131659284234047, "learning_rate": 5.349077805815997e-06, "loss": 0.0023, "step": 97170 }, { "epoch": 1.5901169925550191, "grad_norm": 0.04218674078583717, "learning_rate": 5.348128182227728e-06, "loss": 0.0019, "step": 97180 }, { "epoch": 1.5902806185060951, "grad_norm": 0.08554338663816452, "learning_rate": 5.347178546020648e-06, "loss": 0.0017, "step": 97190 }, { "epoch": 1.590444244457171, "grad_norm": 0.06378992646932602, "learning_rate": 5.346228897229182e-06, "loss": 0.0017, "step": 97200 }, { "epoch": 1.5906078704082467, "grad_norm": 0.10118988901376724, "learning_rate": 5.34527923588775e-06, "loss": 0.001, "step": 97210 }, { "epoch": 1.5907714963593227, "grad_norm": 0.060810595750808716, "learning_rate": 5.344329562030775e-06, "loss": 0.0011, "step": 97220 }, { "epoch": 1.5909351223103985, "grad_norm": 0.07332179695367813, "learning_rate": 5.343379875692681e-06, "loss": 0.0064, "step": 97230 }, { "epoch": 1.5910987482614742, "grad_norm": 0.08635635673999786, "learning_rate": 5.342430176907893e-06, "loss": 0.0017, "step": 97240 }, { "epoch": 1.5912623742125502, "grad_norm": 0.1599757820367813, "learning_rate": 5.341480465710834e-06, "loss": 0.0014, "step": 97250 }, { "epoch": 1.5914260001636258, "grad_norm": 0.12012854218482971, "learning_rate": 5.3405307421359275e-06, "loss": 0.0013, "step": 97260 }, { "epoch": 1.5915896261147018, "grad_norm": 0.08608757704496384, "learning_rate": 5.3395810062176e-06, "loss": 0.0031, "step": 97270 }, { "epoch": 1.5917532520657778, "grad_norm": 0.03763792663812637, "learning_rate": 5.338631257990279e-06, "loss": 0.0009, "step": 97280 }, { "epoch": 1.5919168780168533, "grad_norm": 0.07741168886423111, "learning_rate": 5.337681497488387e-06, "loss": 0.001, "step": 97290 }, { "epoch": 1.5920805039679293, "grad_norm": 0.09594119340181351, "learning_rate": 5.336731724746353e-06, "loss": 0.0014, "step": 97300 }, { "epoch": 1.592244129919005, "grad_norm": 0.09797827899456024, "learning_rate": 5.335781939798604e-06, "loss": 0.0016, "step": 97310 }, { "epoch": 1.5924077558700809, "grad_norm": 0.015874236822128296, "learning_rate": 5.3348321426795644e-06, "loss": 0.0024, "step": 97320 }, { "epoch": 1.5925713818211569, "grad_norm": 0.09323128312826157, "learning_rate": 5.3338823334236665e-06, "loss": 0.0019, "step": 97330 }, { "epoch": 1.5927350077722326, "grad_norm": 0.15608489513397217, "learning_rate": 5.3329325120653355e-06, "loss": 0.0025, "step": 97340 }, { "epoch": 1.5928986337233084, "grad_norm": 0.036289140582084656, "learning_rate": 5.3319826786390025e-06, "loss": 0.0014, "step": 97350 }, { "epoch": 1.5930622596743844, "grad_norm": 0.0709943175315857, "learning_rate": 5.331032833179095e-06, "loss": 0.0014, "step": 97360 }, { "epoch": 1.5932258856254602, "grad_norm": 0.06235069781541824, "learning_rate": 5.330082975720041e-06, "loss": 0.0009, "step": 97370 }, { "epoch": 1.593389511576536, "grad_norm": 0.07837652415037155, "learning_rate": 5.329133106296275e-06, "loss": 0.0014, "step": 97380 }, { "epoch": 1.593553137527612, "grad_norm": 0.05847574770450592, "learning_rate": 5.328183224942223e-06, "loss": 0.0011, "step": 97390 }, { "epoch": 1.5937167634786877, "grad_norm": 0.04407477378845215, "learning_rate": 5.3272333316923185e-06, "loss": 0.0019, "step": 97400 }, { "epoch": 1.5938803894297635, "grad_norm": 0.08566240221261978, "learning_rate": 5.3262834265809926e-06, "loss": 0.0019, "step": 97410 }, { "epoch": 1.5940440153808395, "grad_norm": 0.143513485789299, "learning_rate": 5.325333509642676e-06, "loss": 0.0008, "step": 97420 }, { "epoch": 1.5942076413319153, "grad_norm": 0.1667342185974121, "learning_rate": 5.324383580911802e-06, "loss": 0.0017, "step": 97430 }, { "epoch": 1.594371267282991, "grad_norm": 0.0306320209056139, "learning_rate": 5.323433640422803e-06, "loss": 0.0041, "step": 97440 }, { "epoch": 1.594534893234067, "grad_norm": 0.05147324874997139, "learning_rate": 5.322483688210112e-06, "loss": 0.0011, "step": 97450 }, { "epoch": 1.5946985191851426, "grad_norm": 0.020326945930719376, "learning_rate": 5.321533724308161e-06, "loss": 0.0015, "step": 97460 }, { "epoch": 1.5948621451362186, "grad_norm": 0.2365170121192932, "learning_rate": 5.320583748751387e-06, "loss": 0.0025, "step": 97470 }, { "epoch": 1.5950257710872946, "grad_norm": 0.05175967887043953, "learning_rate": 5.319633761574221e-06, "loss": 0.0022, "step": 97480 }, { "epoch": 1.5951893970383701, "grad_norm": 0.03263901546597481, "learning_rate": 5.3186837628111e-06, "loss": 0.0011, "step": 97490 }, { "epoch": 1.5953530229894461, "grad_norm": 0.053251780569553375, "learning_rate": 5.31773375249646e-06, "loss": 0.0014, "step": 97500 }, { "epoch": 1.595516648940522, "grad_norm": 0.11376066505908966, "learning_rate": 5.316783730664734e-06, "loss": 0.0016, "step": 97510 }, { "epoch": 1.5956802748915977, "grad_norm": 0.10079194605350494, "learning_rate": 5.315833697350359e-06, "loss": 0.0016, "step": 97520 }, { "epoch": 1.5958439008426737, "grad_norm": 0.07104119658470154, "learning_rate": 5.3148836525877715e-06, "loss": 0.0034, "step": 97530 }, { "epoch": 1.5960075267937495, "grad_norm": 0.06889428198337555, "learning_rate": 5.313933596411407e-06, "loss": 0.0013, "step": 97540 }, { "epoch": 1.5961711527448252, "grad_norm": 0.1272384375333786, "learning_rate": 5.312983528855708e-06, "loss": 0.0033, "step": 97550 }, { "epoch": 1.5963347786959012, "grad_norm": 0.1345529407262802, "learning_rate": 5.312033449955105e-06, "loss": 0.0018, "step": 97560 }, { "epoch": 1.596498404646977, "grad_norm": 0.06928007304668427, "learning_rate": 5.311083359744041e-06, "loss": 0.0013, "step": 97570 }, { "epoch": 1.5966620305980528, "grad_norm": 0.15723025798797607, "learning_rate": 5.310133258256951e-06, "loss": 0.0017, "step": 97580 }, { "epoch": 1.5968256565491288, "grad_norm": 0.06681761890649796, "learning_rate": 5.309183145528278e-06, "loss": 0.0023, "step": 97590 }, { "epoch": 1.5969892825002046, "grad_norm": 0.07914423197507858, "learning_rate": 5.308233021592457e-06, "loss": 0.0015, "step": 97600 }, { "epoch": 1.5971529084512803, "grad_norm": 0.20144113898277283, "learning_rate": 5.307282886483931e-06, "loss": 0.0031, "step": 97610 }, { "epoch": 1.5973165344023563, "grad_norm": 0.09355407953262329, "learning_rate": 5.306332740237139e-06, "loss": 0.0035, "step": 97620 }, { "epoch": 1.597480160353432, "grad_norm": 0.20603951811790466, "learning_rate": 5.305382582886521e-06, "loss": 0.0016, "step": 97630 }, { "epoch": 1.5976437863045079, "grad_norm": 0.14873671531677246, "learning_rate": 5.304432414466518e-06, "loss": 0.0019, "step": 97640 }, { "epoch": 1.5978074122555839, "grad_norm": 0.051463715732097626, "learning_rate": 5.303482235011573e-06, "loss": 0.002, "step": 97650 }, { "epoch": 1.5979710382066594, "grad_norm": 0.13742105662822723, "learning_rate": 5.3025320445561265e-06, "loss": 0.0025, "step": 97660 }, { "epoch": 1.5981346641577354, "grad_norm": 0.09530379623174667, "learning_rate": 5.301581843134619e-06, "loss": 0.002, "step": 97670 }, { "epoch": 1.5982982901088114, "grad_norm": 0.0777374729514122, "learning_rate": 5.300631630781494e-06, "loss": 0.0019, "step": 97680 }, { "epoch": 1.598461916059887, "grad_norm": 0.031285736709833145, "learning_rate": 5.299681407531197e-06, "loss": 0.0014, "step": 97690 }, { "epoch": 1.598625542010963, "grad_norm": 0.0172182135283947, "learning_rate": 5.298731173418169e-06, "loss": 0.0013, "step": 97700 }, { "epoch": 1.5987891679620387, "grad_norm": 0.11044405400753021, "learning_rate": 5.297780928476852e-06, "loss": 0.0026, "step": 97710 }, { "epoch": 1.5989527939131145, "grad_norm": 0.06067486107349396, "learning_rate": 5.296830672741694e-06, "loss": 0.0007, "step": 97720 }, { "epoch": 1.5991164198641905, "grad_norm": 0.1673189103603363, "learning_rate": 5.295880406247137e-06, "loss": 0.0016, "step": 97730 }, { "epoch": 1.5992800458152663, "grad_norm": 0.04932381212711334, "learning_rate": 5.294930129027625e-06, "loss": 0.0018, "step": 97740 }, { "epoch": 1.599443671766342, "grad_norm": 0.07004158943891525, "learning_rate": 5.293979841117607e-06, "loss": 0.0022, "step": 97750 }, { "epoch": 1.599607297717418, "grad_norm": 0.08543721586465836, "learning_rate": 5.293029542551524e-06, "loss": 0.002, "step": 97760 }, { "epoch": 1.5997709236684938, "grad_norm": 0.3352496325969696, "learning_rate": 5.292079233363826e-06, "loss": 0.0025, "step": 97770 }, { "epoch": 1.5999345496195696, "grad_norm": 0.0744246244430542, "learning_rate": 5.2911289135889575e-06, "loss": 0.0028, "step": 97780 }, { "epoch": 1.6000981755706456, "grad_norm": 0.11093030869960785, "learning_rate": 5.290178583261365e-06, "loss": 0.0018, "step": 97790 }, { "epoch": 1.6002618015217214, "grad_norm": 0.052570831030607224, "learning_rate": 5.2892282424154975e-06, "loss": 0.0026, "step": 97800 }, { "epoch": 1.6004254274727971, "grad_norm": 0.24708853662014008, "learning_rate": 5.288277891085801e-06, "loss": 0.0019, "step": 97810 }, { "epoch": 1.6005890534238731, "grad_norm": 0.08212660253047943, "learning_rate": 5.287327529306722e-06, "loss": 0.0014, "step": 97820 }, { "epoch": 1.600752679374949, "grad_norm": 0.06644292920827866, "learning_rate": 5.286377157112712e-06, "loss": 0.0007, "step": 97830 }, { "epoch": 1.6009163053260247, "grad_norm": 0.04395301640033722, "learning_rate": 5.2854267745382185e-06, "loss": 0.0019, "step": 97840 }, { "epoch": 1.6010799312771007, "grad_norm": 0.04277435317635536, "learning_rate": 5.28447638161769e-06, "loss": 0.0018, "step": 97850 }, { "epoch": 1.6012435572281762, "grad_norm": 0.1711159348487854, "learning_rate": 5.283525978385577e-06, "loss": 0.0016, "step": 97860 }, { "epoch": 1.6014071831792522, "grad_norm": 0.1063678115606308, "learning_rate": 5.282575564876329e-06, "loss": 0.0013, "step": 97870 }, { "epoch": 1.6015708091303282, "grad_norm": 0.035489022731781006, "learning_rate": 5.281625141124396e-06, "loss": 0.0018, "step": 97880 }, { "epoch": 1.6017344350814038, "grad_norm": 0.15812666714191437, "learning_rate": 5.280674707164227e-06, "loss": 0.0024, "step": 97890 }, { "epoch": 1.6018980610324798, "grad_norm": 0.028400007635354996, "learning_rate": 5.279724263030274e-06, "loss": 0.0029, "step": 97900 }, { "epoch": 1.6020616869835556, "grad_norm": 0.08711693435907364, "learning_rate": 5.278773808756992e-06, "loss": 0.0012, "step": 97910 }, { "epoch": 1.6022253129346313, "grad_norm": 0.016361359506845474, "learning_rate": 5.277823344378827e-06, "loss": 0.002, "step": 97920 }, { "epoch": 1.6023889388857073, "grad_norm": 0.14891168475151062, "learning_rate": 5.2768728699302344e-06, "loss": 0.0029, "step": 97930 }, { "epoch": 1.602552564836783, "grad_norm": 0.2883024513721466, "learning_rate": 5.275922385445665e-06, "loss": 0.002, "step": 97940 }, { "epoch": 1.6027161907878589, "grad_norm": 0.06108860298991203, "learning_rate": 5.274971890959571e-06, "loss": 0.0011, "step": 97950 }, { "epoch": 1.6028798167389349, "grad_norm": 0.0763351172208786, "learning_rate": 5.27402138650641e-06, "loss": 0.0018, "step": 97960 }, { "epoch": 1.6030434426900106, "grad_norm": 0.14666365087032318, "learning_rate": 5.27307087212063e-06, "loss": 0.0015, "step": 97970 }, { "epoch": 1.6032070686410864, "grad_norm": 0.11977433413267136, "learning_rate": 5.272120347836687e-06, "loss": 0.0019, "step": 97980 }, { "epoch": 1.6033706945921624, "grad_norm": 0.1182369738817215, "learning_rate": 5.271169813689037e-06, "loss": 0.0044, "step": 97990 }, { "epoch": 1.6035343205432382, "grad_norm": 0.18548201024532318, "learning_rate": 5.270219269712132e-06, "loss": 0.0021, "step": 98000 }, { "epoch": 1.603697946494314, "grad_norm": 0.03501434251666069, "learning_rate": 5.26926871594043e-06, "loss": 0.001, "step": 98010 }, { "epoch": 1.60386157244539, "grad_norm": 0.1672307252883911, "learning_rate": 5.2683181524083815e-06, "loss": 0.0032, "step": 98020 }, { "epoch": 1.6040251983964657, "grad_norm": 0.09246539324522018, "learning_rate": 5.267367579150447e-06, "loss": 0.0012, "step": 98030 }, { "epoch": 1.6041888243475415, "grad_norm": 0.2178044617176056, "learning_rate": 5.266416996201077e-06, "loss": 0.0019, "step": 98040 }, { "epoch": 1.6043524502986175, "grad_norm": 0.038778070360422134, "learning_rate": 5.265466403594733e-06, "loss": 0.0011, "step": 98050 }, { "epoch": 1.604516076249693, "grad_norm": 0.17926302552223206, "learning_rate": 5.264515801365872e-06, "loss": 0.0012, "step": 98060 }, { "epoch": 1.604679702200769, "grad_norm": 0.17083346843719482, "learning_rate": 5.263565189548947e-06, "loss": 0.0022, "step": 98070 }, { "epoch": 1.6048433281518448, "grad_norm": 0.03759386017918587, "learning_rate": 5.262614568178418e-06, "loss": 0.0009, "step": 98080 }, { "epoch": 1.6050069541029206, "grad_norm": 0.11836827546358109, "learning_rate": 5.2616639372887416e-06, "loss": 0.0025, "step": 98090 }, { "epoch": 1.6051705800539966, "grad_norm": 0.013447455130517483, "learning_rate": 5.260713296914376e-06, "loss": 0.0016, "step": 98100 }, { "epoch": 1.6053342060050724, "grad_norm": 0.11086362600326538, "learning_rate": 5.25976264708978e-06, "loss": 0.0019, "step": 98110 }, { "epoch": 1.6054978319561481, "grad_norm": 0.0646359845995903, "learning_rate": 5.258811987849413e-06, "loss": 0.0017, "step": 98120 }, { "epoch": 1.6056614579072241, "grad_norm": 0.24520373344421387, "learning_rate": 5.2578613192277325e-06, "loss": 0.0021, "step": 98130 }, { "epoch": 1.6058250838583, "grad_norm": 0.09542601555585861, "learning_rate": 5.256910641259201e-06, "loss": 0.0016, "step": 98140 }, { "epoch": 1.6059887098093757, "grad_norm": 0.472331166267395, "learning_rate": 5.255959953978275e-06, "loss": 0.0022, "step": 98150 }, { "epoch": 1.6061523357604517, "grad_norm": 0.06958793103694916, "learning_rate": 5.2550092574194165e-06, "loss": 0.0018, "step": 98160 }, { "epoch": 1.6063159617115275, "grad_norm": 0.09929090738296509, "learning_rate": 5.254058551617084e-06, "loss": 0.0016, "step": 98170 }, { "epoch": 1.6064795876626032, "grad_norm": 0.07176434248685837, "learning_rate": 5.25310783660574e-06, "loss": 0.0014, "step": 98180 }, { "epoch": 1.6066432136136792, "grad_norm": 0.0863189771771431, "learning_rate": 5.252157112419844e-06, "loss": 0.0014, "step": 98190 }, { "epoch": 1.606806839564755, "grad_norm": 0.05574188381433487, "learning_rate": 5.251206379093861e-06, "loss": 0.0018, "step": 98200 }, { "epoch": 1.6069704655158308, "grad_norm": 0.3285781443119049, "learning_rate": 5.25025563666225e-06, "loss": 0.0033, "step": 98210 }, { "epoch": 1.6071340914669068, "grad_norm": 0.1631193906068802, "learning_rate": 5.249304885159474e-06, "loss": 0.0006, "step": 98220 }, { "epoch": 1.6072977174179823, "grad_norm": 0.2060345858335495, "learning_rate": 5.248354124619994e-06, "loss": 0.0016, "step": 98230 }, { "epoch": 1.6074613433690583, "grad_norm": 0.09031964093446732, "learning_rate": 5.247403355078275e-06, "loss": 0.0012, "step": 98240 }, { "epoch": 1.6076249693201343, "grad_norm": 0.13665316998958588, "learning_rate": 5.246452576568778e-06, "loss": 0.0019, "step": 98250 }, { "epoch": 1.6077885952712099, "grad_norm": 0.08412686735391617, "learning_rate": 5.245501789125968e-06, "loss": 0.0022, "step": 98260 }, { "epoch": 1.6079522212222859, "grad_norm": 0.03635793179273605, "learning_rate": 5.2445509927843074e-06, "loss": 0.0013, "step": 98270 }, { "epoch": 1.6081158471733616, "grad_norm": 0.04021189734339714, "learning_rate": 5.243600187578262e-06, "loss": 0.003, "step": 98280 }, { "epoch": 1.6082794731244374, "grad_norm": 0.04579615592956543, "learning_rate": 5.242649373542295e-06, "loss": 0.001, "step": 98290 }, { "epoch": 1.6084430990755134, "grad_norm": 0.03251798078417778, "learning_rate": 5.241698550710871e-06, "loss": 0.002, "step": 98300 }, { "epoch": 1.6086067250265892, "grad_norm": 0.08444109559059143, "learning_rate": 5.240747719118456e-06, "loss": 0.0019, "step": 98310 }, { "epoch": 1.608770350977665, "grad_norm": 0.08320547640323639, "learning_rate": 5.239796878799514e-06, "loss": 0.0016, "step": 98320 }, { "epoch": 1.608933976928741, "grad_norm": 0.0728529691696167, "learning_rate": 5.238846029788511e-06, "loss": 0.001, "step": 98330 }, { "epoch": 1.6090976028798167, "grad_norm": 0.0988699272274971, "learning_rate": 5.2378951721199154e-06, "loss": 0.0026, "step": 98340 }, { "epoch": 1.6092612288308925, "grad_norm": 0.1827646940946579, "learning_rate": 5.23694430582819e-06, "loss": 0.0015, "step": 98350 }, { "epoch": 1.6094248547819685, "grad_norm": 0.12196511030197144, "learning_rate": 5.235993430947806e-06, "loss": 0.0026, "step": 98360 }, { "epoch": 1.6095884807330443, "grad_norm": 0.07533494383096695, "learning_rate": 5.235042547513224e-06, "loss": 0.0013, "step": 98370 }, { "epoch": 1.60975210668412, "grad_norm": 0.14361101388931274, "learning_rate": 5.234091655558915e-06, "loss": 0.0013, "step": 98380 }, { "epoch": 1.609915732635196, "grad_norm": 0.0888654962182045, "learning_rate": 5.233140755119347e-06, "loss": 0.0012, "step": 98390 }, { "epoch": 1.6100793585862718, "grad_norm": 0.13821378350257874, "learning_rate": 5.2321898462289855e-06, "loss": 0.0024, "step": 98400 }, { "epoch": 1.6102429845373476, "grad_norm": 0.17549480497837067, "learning_rate": 5.231238928922302e-06, "loss": 0.0026, "step": 98410 }, { "epoch": 1.6104066104884236, "grad_norm": 0.34210479259490967, "learning_rate": 5.230288003233764e-06, "loss": 0.0011, "step": 98420 }, { "epoch": 1.6105702364394991, "grad_norm": 0.12255807220935822, "learning_rate": 5.229337069197836e-06, "loss": 0.0015, "step": 98430 }, { "epoch": 1.6107338623905751, "grad_norm": 0.1388000100851059, "learning_rate": 5.228386126848992e-06, "loss": 0.0025, "step": 98440 }, { "epoch": 1.6108974883416511, "grad_norm": 0.17837822437286377, "learning_rate": 5.2274351762217005e-06, "loss": 0.0014, "step": 98450 }, { "epoch": 1.6110611142927267, "grad_norm": 0.057697638869285583, "learning_rate": 5.226484217350429e-06, "loss": 0.0014, "step": 98460 }, { "epoch": 1.6112247402438027, "grad_norm": 0.03137467801570892, "learning_rate": 5.225533250269651e-06, "loss": 0.0012, "step": 98470 }, { "epoch": 1.6113883661948785, "grad_norm": 0.07953894883394241, "learning_rate": 5.224582275013833e-06, "loss": 0.0021, "step": 98480 }, { "epoch": 1.6115519921459542, "grad_norm": 0.17380009591579437, "learning_rate": 5.2236312916174484e-06, "loss": 0.0016, "step": 98490 }, { "epoch": 1.6117156180970302, "grad_norm": 0.24816903471946716, "learning_rate": 5.222680300114966e-06, "loss": 0.0017, "step": 98500 }, { "epoch": 1.611879244048106, "grad_norm": 0.08738347887992859, "learning_rate": 5.22172930054086e-06, "loss": 0.0032, "step": 98510 }, { "epoch": 1.6120428699991818, "grad_norm": 0.24474535882472992, "learning_rate": 5.220778292929598e-06, "loss": 0.0016, "step": 98520 }, { "epoch": 1.6122064959502578, "grad_norm": 0.06241137534379959, "learning_rate": 5.219827277315652e-06, "loss": 0.0014, "step": 98530 }, { "epoch": 1.6123701219013336, "grad_norm": 0.0731671005487442, "learning_rate": 5.218876253733498e-06, "loss": 0.0015, "step": 98540 }, { "epoch": 1.6125337478524093, "grad_norm": 0.036975424736738205, "learning_rate": 5.217925222217604e-06, "loss": 0.001, "step": 98550 }, { "epoch": 1.6126973738034853, "grad_norm": 0.09705603867769241, "learning_rate": 5.216974182802448e-06, "loss": 0.0017, "step": 98560 }, { "epoch": 1.612860999754561, "grad_norm": 0.4200478792190552, "learning_rate": 5.216023135522497e-06, "loss": 0.0016, "step": 98570 }, { "epoch": 1.6130246257056369, "grad_norm": 0.07903515547513962, "learning_rate": 5.215072080412225e-06, "loss": 0.0012, "step": 98580 }, { "epoch": 1.6131882516567129, "grad_norm": 0.17032834887504578, "learning_rate": 5.214121017506108e-06, "loss": 0.0012, "step": 98590 }, { "epoch": 1.6133518776077886, "grad_norm": 0.11135027557611465, "learning_rate": 5.21316994683862e-06, "loss": 0.0018, "step": 98600 }, { "epoch": 1.6135155035588644, "grad_norm": 0.03420327976346016, "learning_rate": 5.212218868444233e-06, "loss": 0.0018, "step": 98610 }, { "epoch": 1.6136791295099404, "grad_norm": 0.08032601326704025, "learning_rate": 5.211267782357422e-06, "loss": 0.0019, "step": 98620 }, { "epoch": 1.613842755461016, "grad_norm": 0.056985605508089066, "learning_rate": 5.210316688612662e-06, "loss": 0.0014, "step": 98630 }, { "epoch": 1.614006381412092, "grad_norm": 0.1564008891582489, "learning_rate": 5.209365587244427e-06, "loss": 0.0014, "step": 98640 }, { "epoch": 1.614170007363168, "grad_norm": 0.021098554134368896, "learning_rate": 5.208414478287193e-06, "loss": 0.0021, "step": 98650 }, { "epoch": 1.6143336333142435, "grad_norm": 0.057873841375112534, "learning_rate": 5.207463361775436e-06, "loss": 0.002, "step": 98660 }, { "epoch": 1.6144972592653195, "grad_norm": 0.17769621312618256, "learning_rate": 5.206512237743628e-06, "loss": 0.0017, "step": 98670 }, { "epoch": 1.6146608852163953, "grad_norm": 0.1033492311835289, "learning_rate": 5.20556110622625e-06, "loss": 0.0018, "step": 98680 }, { "epoch": 1.614824511167471, "grad_norm": 0.014122343622148037, "learning_rate": 5.204609967257775e-06, "loss": 0.0015, "step": 98690 }, { "epoch": 1.614988137118547, "grad_norm": 0.008405575528740883, "learning_rate": 5.203658820872681e-06, "loss": 0.0031, "step": 98700 }, { "epoch": 1.6151517630696228, "grad_norm": 0.05221950635313988, "learning_rate": 5.202707667105445e-06, "loss": 0.0015, "step": 98710 }, { "epoch": 1.6153153890206986, "grad_norm": 0.06928742676973343, "learning_rate": 5.2017565059905415e-06, "loss": 0.0026, "step": 98720 }, { "epoch": 1.6154790149717746, "grad_norm": 0.056128133088350296, "learning_rate": 5.2008053375624515e-06, "loss": 0.0013, "step": 98730 }, { "epoch": 1.6156426409228504, "grad_norm": 0.5218814015388489, "learning_rate": 5.199854161855649e-06, "loss": 0.0026, "step": 98740 }, { "epoch": 1.6158062668739261, "grad_norm": 0.08757639676332474, "learning_rate": 5.198902978904613e-06, "loss": 0.0018, "step": 98750 }, { "epoch": 1.6159698928250021, "grad_norm": 0.23714497685432434, "learning_rate": 5.197951788743824e-06, "loss": 0.002, "step": 98760 }, { "epoch": 1.616133518776078, "grad_norm": 0.0703224167227745, "learning_rate": 5.197000591407757e-06, "loss": 0.0016, "step": 98770 }, { "epoch": 1.6162971447271537, "grad_norm": 0.05482254922389984, "learning_rate": 5.196049386930893e-06, "loss": 0.0009, "step": 98780 }, { "epoch": 1.6164607706782297, "grad_norm": 0.05517350509762764, "learning_rate": 5.195098175347709e-06, "loss": 0.0013, "step": 98790 }, { "epoch": 1.6166243966293055, "grad_norm": 0.35550621151924133, "learning_rate": 5.194146956692686e-06, "loss": 0.0019, "step": 98800 }, { "epoch": 1.6167880225803812, "grad_norm": 0.01884710229933262, "learning_rate": 5.193195731000301e-06, "loss": 0.0009, "step": 98810 }, { "epoch": 1.6169516485314572, "grad_norm": 0.20061872899532318, "learning_rate": 5.192244498305037e-06, "loss": 0.0018, "step": 98820 }, { "epoch": 1.6171152744825328, "grad_norm": 0.2248377650976181, "learning_rate": 5.19129325864137e-06, "loss": 0.0014, "step": 98830 }, { "epoch": 1.6172789004336088, "grad_norm": 0.19466260075569153, "learning_rate": 5.190342012043783e-06, "loss": 0.0013, "step": 98840 }, { "epoch": 1.6174425263846848, "grad_norm": 0.32903891801834106, "learning_rate": 5.189390758546756e-06, "loss": 0.002, "step": 98850 }, { "epoch": 1.6176061523357603, "grad_norm": 0.08313871920108795, "learning_rate": 5.188439498184769e-06, "loss": 0.0024, "step": 98860 }, { "epoch": 1.6177697782868363, "grad_norm": 0.06497074663639069, "learning_rate": 5.187488230992304e-06, "loss": 0.0037, "step": 98870 }, { "epoch": 1.617933404237912, "grad_norm": 0.02705974504351616, "learning_rate": 5.186536957003841e-06, "loss": 0.0011, "step": 98880 }, { "epoch": 1.6180970301889879, "grad_norm": 0.13264022767543793, "learning_rate": 5.185585676253859e-06, "loss": 0.007, "step": 98890 }, { "epoch": 1.6182606561400639, "grad_norm": 0.12600697576999664, "learning_rate": 5.184634388776844e-06, "loss": 0.0021, "step": 98900 }, { "epoch": 1.6184242820911396, "grad_norm": 0.08227627724409103, "learning_rate": 5.183683094607277e-06, "loss": 0.0015, "step": 98910 }, { "epoch": 1.6185879080422154, "grad_norm": 0.1436682790517807, "learning_rate": 5.182731793779639e-06, "loss": 0.0021, "step": 98920 }, { "epoch": 1.6187515339932914, "grad_norm": 0.09100278466939926, "learning_rate": 5.1817804863284114e-06, "loss": 0.0016, "step": 98930 }, { "epoch": 1.6189151599443672, "grad_norm": 0.07239263504743576, "learning_rate": 5.18082917228808e-06, "loss": 0.001, "step": 98940 }, { "epoch": 1.619078785895443, "grad_norm": 0.001040601171553135, "learning_rate": 5.179877851693123e-06, "loss": 0.0013, "step": 98950 }, { "epoch": 1.619242411846519, "grad_norm": 0.11623147130012512, "learning_rate": 5.1789265245780286e-06, "loss": 0.0018, "step": 98960 }, { "epoch": 1.6194060377975947, "grad_norm": 0.1853215992450714, "learning_rate": 5.177975190977277e-06, "loss": 0.0016, "step": 98970 }, { "epoch": 1.6195696637486705, "grad_norm": 0.042178887873888016, "learning_rate": 5.177023850925353e-06, "loss": 0.0016, "step": 98980 }, { "epoch": 1.6197332896997465, "grad_norm": 0.09309615939855576, "learning_rate": 5.1760725044567385e-06, "loss": 0.0012, "step": 98990 }, { "epoch": 1.619896915650822, "grad_norm": 0.08934682607650757, "learning_rate": 5.17512115160592e-06, "loss": 0.0028, "step": 99000 }, { "epoch": 1.620060541601898, "grad_norm": 0.17292897403240204, "learning_rate": 5.1741697924073806e-06, "loss": 0.0013, "step": 99010 }, { "epoch": 1.620224167552974, "grad_norm": 0.22815579175949097, "learning_rate": 5.173218426895606e-06, "loss": 0.0039, "step": 99020 }, { "epoch": 1.6203877935040496, "grad_norm": 0.2680216431617737, "learning_rate": 5.172267055105076e-06, "loss": 0.0024, "step": 99030 }, { "epoch": 1.6205514194551256, "grad_norm": 0.02104036509990692, "learning_rate": 5.1713156770702825e-06, "loss": 0.0012, "step": 99040 }, { "epoch": 1.6207150454062014, "grad_norm": 0.08003778010606766, "learning_rate": 5.170364292825706e-06, "loss": 0.0024, "step": 99050 }, { "epoch": 1.6208786713572771, "grad_norm": 0.15551789104938507, "learning_rate": 5.169412902405835e-06, "loss": 0.0027, "step": 99060 }, { "epoch": 1.6210422973083531, "grad_norm": 0.04570987820625305, "learning_rate": 5.168461505845153e-06, "loss": 0.0009, "step": 99070 }, { "epoch": 1.621205923259429, "grad_norm": 0.08118777722120285, "learning_rate": 5.1675101031781446e-06, "loss": 0.0017, "step": 99080 }, { "epoch": 1.6213695492105047, "grad_norm": 0.09732763469219208, "learning_rate": 5.1665586944392985e-06, "loss": 0.0017, "step": 99090 }, { "epoch": 1.6215331751615807, "grad_norm": 0.06535319983959198, "learning_rate": 5.1656072796631006e-06, "loss": 0.0017, "step": 99100 }, { "epoch": 1.6216968011126565, "grad_norm": 0.06291642785072327, "learning_rate": 5.164655858884036e-06, "loss": 0.0021, "step": 99110 }, { "epoch": 1.6218604270637322, "grad_norm": 0.24212847650051117, "learning_rate": 5.163704432136593e-06, "loss": 0.0017, "step": 99120 }, { "epoch": 1.6220240530148082, "grad_norm": 0.0826900377869606, "learning_rate": 5.1627529994552576e-06, "loss": 0.0015, "step": 99130 }, { "epoch": 1.622187678965884, "grad_norm": 0.08897511661052704, "learning_rate": 5.161801560874517e-06, "loss": 0.0014, "step": 99140 }, { "epoch": 1.6223513049169598, "grad_norm": 0.005243068560957909, "learning_rate": 5.160850116428859e-06, "loss": 0.0013, "step": 99150 }, { "epoch": 1.6225149308680358, "grad_norm": 0.03812631964683533, "learning_rate": 5.15989866615277e-06, "loss": 0.0015, "step": 99160 }, { "epoch": 1.6226785568191116, "grad_norm": 0.13536326587200165, "learning_rate": 5.158947210080738e-06, "loss": 0.0013, "step": 99170 }, { "epoch": 1.6228421827701873, "grad_norm": 0.12537388503551483, "learning_rate": 5.157995748247253e-06, "loss": 0.002, "step": 99180 }, { "epoch": 1.6230058087212633, "grad_norm": 0.3827470541000366, "learning_rate": 5.157044280686802e-06, "loss": 0.0019, "step": 99190 }, { "epoch": 1.6231694346723389, "grad_norm": 0.014011899009346962, "learning_rate": 5.1560928074338735e-06, "loss": 0.0013, "step": 99200 }, { "epoch": 1.6233330606234149, "grad_norm": 0.0309713464230299, "learning_rate": 5.155141328522956e-06, "loss": 0.0015, "step": 99210 }, { "epoch": 1.6234966865744909, "grad_norm": 0.07933453470468521, "learning_rate": 5.154189843988538e-06, "loss": 0.0015, "step": 99220 }, { "epoch": 1.6236603125255664, "grad_norm": 0.06462419778108597, "learning_rate": 5.1532383538651075e-06, "loss": 0.0018, "step": 99230 }, { "epoch": 1.6238239384766424, "grad_norm": 0.03575167804956436, "learning_rate": 5.1522868581871555e-06, "loss": 0.0008, "step": 99240 }, { "epoch": 1.6239875644277182, "grad_norm": 0.05680159106850624, "learning_rate": 5.15133535698917e-06, "loss": 0.001, "step": 99250 }, { "epoch": 1.624151190378794, "grad_norm": 0.059106118977069855, "learning_rate": 5.1503838503056446e-06, "loss": 0.003, "step": 99260 }, { "epoch": 1.62431481632987, "grad_norm": 0.08148624747991562, "learning_rate": 5.149432338171065e-06, "loss": 0.0011, "step": 99270 }, { "epoch": 1.6244784422809457, "grad_norm": 0.07005994766950607, "learning_rate": 5.14848082061992e-06, "loss": 0.0027, "step": 99280 }, { "epoch": 1.6246420682320215, "grad_norm": 0.04762598127126694, "learning_rate": 5.147529297686704e-06, "loss": 0.0012, "step": 99290 }, { "epoch": 1.6248056941830975, "grad_norm": 0.2658834457397461, "learning_rate": 5.146577769405905e-06, "loss": 0.0011, "step": 99300 }, { "epoch": 1.6249693201341733, "grad_norm": 0.11333608627319336, "learning_rate": 5.145626235812015e-06, "loss": 0.0021, "step": 99310 }, { "epoch": 1.625132946085249, "grad_norm": 0.12355748564004898, "learning_rate": 5.144674696939522e-06, "loss": 0.0015, "step": 99320 }, { "epoch": 1.625296572036325, "grad_norm": 0.20626477897167206, "learning_rate": 5.14372315282292e-06, "loss": 0.0019, "step": 99330 }, { "epoch": 1.6254601979874008, "grad_norm": 0.04642678424715996, "learning_rate": 5.1427716034967e-06, "loss": 0.0009, "step": 99340 }, { "epoch": 1.6256238239384766, "grad_norm": 0.042989879846572876, "learning_rate": 5.141820048995352e-06, "loss": 0.0026, "step": 99350 }, { "epoch": 1.6257874498895526, "grad_norm": 0.2868697941303253, "learning_rate": 5.140868489353369e-06, "loss": 0.0017, "step": 99360 }, { "epoch": 1.6259510758406284, "grad_norm": 0.12098990380764008, "learning_rate": 5.13991692460524e-06, "loss": 0.0018, "step": 99370 }, { "epoch": 1.6261147017917041, "grad_norm": 0.01503510121256113, "learning_rate": 5.13896535478546e-06, "loss": 0.0009, "step": 99380 }, { "epoch": 1.6262783277427801, "grad_norm": 0.08358880877494812, "learning_rate": 5.138013779928518e-06, "loss": 0.0012, "step": 99390 }, { "epoch": 1.6264419536938557, "grad_norm": 0.028315600007772446, "learning_rate": 5.13706220006891e-06, "loss": 0.0021, "step": 99400 }, { "epoch": 1.6266055796449317, "grad_norm": 0.04941651225090027, "learning_rate": 5.1361106152411265e-06, "loss": 0.0018, "step": 99410 }, { "epoch": 1.6267692055960077, "grad_norm": 0.07402452081441879, "learning_rate": 5.13515902547966e-06, "loss": 0.0028, "step": 99420 }, { "epoch": 1.6269328315470832, "grad_norm": 0.3740224540233612, "learning_rate": 5.134207430819004e-06, "loss": 0.0017, "step": 99430 }, { "epoch": 1.6270964574981592, "grad_norm": 0.14909420907497406, "learning_rate": 5.133255831293651e-06, "loss": 0.0017, "step": 99440 }, { "epoch": 1.627260083449235, "grad_norm": 0.12497895210981369, "learning_rate": 5.132304226938094e-06, "loss": 0.0032, "step": 99450 }, { "epoch": 1.6274237094003108, "grad_norm": 0.12497039139270782, "learning_rate": 5.131352617786828e-06, "loss": 0.0022, "step": 99460 }, { "epoch": 1.6275873353513868, "grad_norm": 0.055445168167352676, "learning_rate": 5.130401003874343e-06, "loss": 0.0016, "step": 99470 }, { "epoch": 1.6277509613024626, "grad_norm": 0.09017663449048996, "learning_rate": 5.129449385235137e-06, "loss": 0.002, "step": 99480 }, { "epoch": 1.6279145872535383, "grad_norm": 0.05772421509027481, "learning_rate": 5.128497761903701e-06, "loss": 0.0017, "step": 99490 }, { "epoch": 1.6280782132046143, "grad_norm": 0.18963901698589325, "learning_rate": 5.1275461339145304e-06, "loss": 0.0019, "step": 99500 }, { "epoch": 1.62824183915569, "grad_norm": 0.0779237151145935, "learning_rate": 5.12659450130212e-06, "loss": 0.0019, "step": 99510 }, { "epoch": 1.6284054651067659, "grad_norm": 0.12682569026947021, "learning_rate": 5.125642864100962e-06, "loss": 0.0019, "step": 99520 }, { "epoch": 1.6285690910578419, "grad_norm": 0.06850654631853104, "learning_rate": 5.124691222345551e-06, "loss": 0.0011, "step": 99530 }, { "epoch": 1.6287327170089176, "grad_norm": 0.13229204714298248, "learning_rate": 5.123739576070384e-06, "loss": 0.0042, "step": 99540 }, { "epoch": 1.6288963429599934, "grad_norm": 0.10641125589609146, "learning_rate": 5.122787925309955e-06, "loss": 0.0017, "step": 99550 }, { "epoch": 1.6290599689110694, "grad_norm": 0.16319550573825836, "learning_rate": 5.12183627009876e-06, "loss": 0.0018, "step": 99560 }, { "epoch": 1.6292235948621452, "grad_norm": 0.004445069935172796, "learning_rate": 5.120884610471292e-06, "loss": 0.0009, "step": 99570 }, { "epoch": 1.629387220813221, "grad_norm": 0.016375571489334106, "learning_rate": 5.119932946462047e-06, "loss": 0.0009, "step": 99580 }, { "epoch": 1.629550846764297, "grad_norm": 0.1307780146598816, "learning_rate": 5.118981278105521e-06, "loss": 0.0016, "step": 99590 }, { "epoch": 1.6297144727153725, "grad_norm": 0.05227362737059593, "learning_rate": 5.11802960543621e-06, "loss": 0.0019, "step": 99600 }, { "epoch": 1.6298780986664485, "grad_norm": 0.11242035031318665, "learning_rate": 5.1170779284886085e-06, "loss": 0.0021, "step": 99610 }, { "epoch": 1.6300417246175245, "grad_norm": 0.08303198963403702, "learning_rate": 5.116126247297214e-06, "loss": 0.0017, "step": 99620 }, { "epoch": 1.6302053505686, "grad_norm": 0.008203918114304543, "learning_rate": 5.1151745618965235e-06, "loss": 0.0016, "step": 99630 }, { "epoch": 1.630368976519676, "grad_norm": 0.060079704970121384, "learning_rate": 5.11422287232103e-06, "loss": 0.0021, "step": 99640 }, { "epoch": 1.6305326024707518, "grad_norm": 0.19092924892902374, "learning_rate": 5.113271178605232e-06, "loss": 0.0014, "step": 99650 }, { "epoch": 1.6306962284218276, "grad_norm": 0.31950604915618896, "learning_rate": 5.112319480783627e-06, "loss": 0.0058, "step": 99660 }, { "epoch": 1.6308598543729036, "grad_norm": 0.07352284342050552, "learning_rate": 5.11136777889071e-06, "loss": 0.0012, "step": 99670 }, { "epoch": 1.6310234803239794, "grad_norm": 0.2863670885562897, "learning_rate": 5.11041607296098e-06, "loss": 0.0043, "step": 99680 }, { "epoch": 1.6311871062750551, "grad_norm": 0.02335083857178688, "learning_rate": 5.109464363028932e-06, "loss": 0.0013, "step": 99690 }, { "epoch": 1.6313507322261311, "grad_norm": 0.07093873620033264, "learning_rate": 5.108512649129064e-06, "loss": 0.0011, "step": 99700 }, { "epoch": 1.631514358177207, "grad_norm": 0.0666184350848198, "learning_rate": 5.107560931295873e-06, "loss": 0.0018, "step": 99710 }, { "epoch": 1.6316779841282827, "grad_norm": 0.07024268805980682, "learning_rate": 5.106609209563857e-06, "loss": 0.0015, "step": 99720 }, { "epoch": 1.6318416100793587, "grad_norm": 0.10046453773975372, "learning_rate": 5.105657483967513e-06, "loss": 0.0016, "step": 99730 }, { "epoch": 1.6320052360304345, "grad_norm": 0.1354244500398636, "learning_rate": 5.104705754541338e-06, "loss": 0.0019, "step": 99740 }, { "epoch": 1.6321688619815102, "grad_norm": 0.00312987738288939, "learning_rate": 5.103754021319833e-06, "loss": 0.0019, "step": 99750 }, { "epoch": 1.6323324879325862, "grad_norm": 0.1013323962688446, "learning_rate": 5.102802284337495e-06, "loss": 0.0027, "step": 99760 }, { "epoch": 1.632496113883662, "grad_norm": 0.08389976620674133, "learning_rate": 5.101850543628819e-06, "loss": 0.0023, "step": 99770 }, { "epoch": 1.6326597398347378, "grad_norm": 0.07518161833286285, "learning_rate": 5.100898799228307e-06, "loss": 0.0011, "step": 99780 }, { "epoch": 1.6328233657858138, "grad_norm": 0.16343435645103455, "learning_rate": 5.099947051170455e-06, "loss": 0.0011, "step": 99790 }, { "epoch": 1.6329869917368893, "grad_norm": 0.04742646962404251, "learning_rate": 5.098995299489763e-06, "loss": 0.0018, "step": 99800 }, { "epoch": 1.6331506176879653, "grad_norm": 0.02837369218468666, "learning_rate": 5.098043544220729e-06, "loss": 0.0022, "step": 99810 }, { "epoch": 1.633314243639041, "grad_norm": 0.014262927696108818, "learning_rate": 5.097091785397853e-06, "loss": 0.001, "step": 99820 }, { "epoch": 1.6334778695901169, "grad_norm": 0.2345682978630066, "learning_rate": 5.0961400230556325e-06, "loss": 0.0023, "step": 99830 }, { "epoch": 1.6336414955411929, "grad_norm": 0.10950388014316559, "learning_rate": 5.095188257228567e-06, "loss": 0.0022, "step": 99840 }, { "epoch": 1.6338051214922686, "grad_norm": 0.06905578076839447, "learning_rate": 5.094236487951156e-06, "loss": 0.0012, "step": 99850 }, { "epoch": 1.6339687474433444, "grad_norm": 0.16081662476062775, "learning_rate": 5.093284715257899e-06, "loss": 0.0022, "step": 99860 }, { "epoch": 1.6341323733944204, "grad_norm": 0.019619664177298546, "learning_rate": 5.092332939183297e-06, "loss": 0.0017, "step": 99870 }, { "epoch": 1.6342959993454962, "grad_norm": 0.07543018460273743, "learning_rate": 5.091381159761844e-06, "loss": 0.0016, "step": 99880 }, { "epoch": 1.634459625296572, "grad_norm": 0.034987207502126694, "learning_rate": 5.090429377028047e-06, "loss": 0.001, "step": 99890 }, { "epoch": 1.634623251247648, "grad_norm": 0.09814410656690598, "learning_rate": 5.089477591016401e-06, "loss": 0.0017, "step": 99900 }, { "epoch": 1.6347868771987237, "grad_norm": 0.17710573971271515, "learning_rate": 5.0885258017614085e-06, "loss": 0.0022, "step": 99910 }, { "epoch": 1.6349505031497995, "grad_norm": 0.1105768233537674, "learning_rate": 5.087574009297568e-06, "loss": 0.0017, "step": 99920 }, { "epoch": 1.6351141291008755, "grad_norm": 0.13785147666931152, "learning_rate": 5.086622213659379e-06, "loss": 0.0016, "step": 99930 }, { "epoch": 1.6352777550519513, "grad_norm": 0.05918983742594719, "learning_rate": 5.0856704148813445e-06, "loss": 0.0013, "step": 99940 }, { "epoch": 1.635441381003027, "grad_norm": 0.45161759853363037, "learning_rate": 5.084718612997962e-06, "loss": 0.0029, "step": 99950 }, { "epoch": 1.635605006954103, "grad_norm": 0.021274369210004807, "learning_rate": 5.083766808043733e-06, "loss": 0.001, "step": 99960 }, { "epoch": 1.6357686329051786, "grad_norm": 0.11046069115400314, "learning_rate": 5.082815000053161e-06, "loss": 0.0042, "step": 99970 }, { "epoch": 1.6359322588562546, "grad_norm": 0.19795791804790497, "learning_rate": 5.081863189060741e-06, "loss": 0.0021, "step": 99980 }, { "epoch": 1.6360958848073306, "grad_norm": 0.16111892461776733, "learning_rate": 5.08091137510098e-06, "loss": 0.002, "step": 99990 }, { "epoch": 1.6362595107584061, "grad_norm": 0.5397040247917175, "learning_rate": 5.079959558208375e-06, "loss": 0.0009, "step": 100000 }, { "epoch": 1.6362595107584061, "eval_loss": 0.0012626892421394587, "eval_runtime": 3.0965, "eval_samples_per_second": 64.59, "eval_steps_per_second": 16.147, "step": 100000 }, { "epoch": 1.6364231367094821, "grad_norm": 0.05758707970380783, "learning_rate": 5.079007738417429e-06, "loss": 0.0018, "step": 100010 }, { "epoch": 1.636586762660558, "grad_norm": 0.2286084145307541, "learning_rate": 5.0780559157626405e-06, "loss": 0.0014, "step": 100020 }, { "epoch": 1.6367503886116337, "grad_norm": 0.04949363321065903, "learning_rate": 5.077104090278514e-06, "loss": 0.0015, "step": 100030 }, { "epoch": 1.6369140145627097, "grad_norm": 0.05404341220855713, "learning_rate": 5.07615226199955e-06, "loss": 0.0015, "step": 100040 }, { "epoch": 1.6370776405137855, "grad_norm": 0.1697544902563095, "learning_rate": 5.075200430960248e-06, "loss": 0.0018, "step": 100050 }, { "epoch": 1.6372412664648612, "grad_norm": 0.1308528035879135, "learning_rate": 5.074248597195112e-06, "loss": 0.002, "step": 100060 }, { "epoch": 1.6374048924159372, "grad_norm": 0.19643068313598633, "learning_rate": 5.073296760738644e-06, "loss": 0.0024, "step": 100070 }, { "epoch": 1.637568518367013, "grad_norm": 0.0887424498796463, "learning_rate": 5.072344921625343e-06, "loss": 0.0015, "step": 100080 }, { "epoch": 1.6377321443180888, "grad_norm": 0.16684751212596893, "learning_rate": 5.071393079889713e-06, "loss": 0.0011, "step": 100090 }, { "epoch": 1.6378957702691648, "grad_norm": 0.07709401845932007, "learning_rate": 5.070441235566254e-06, "loss": 0.0019, "step": 100100 }, { "epoch": 1.6380593962202405, "grad_norm": 0.06361225992441177, "learning_rate": 5.06948938868947e-06, "loss": 0.0029, "step": 100110 }, { "epoch": 1.6382230221713163, "grad_norm": 0.1230153813958168, "learning_rate": 5.068537539293865e-06, "loss": 0.0034, "step": 100120 }, { "epoch": 1.6383866481223923, "grad_norm": 0.12074223905801773, "learning_rate": 5.067585687413937e-06, "loss": 0.0022, "step": 100130 }, { "epoch": 1.638550274073468, "grad_norm": 0.03241406008601189, "learning_rate": 5.066633833084191e-06, "loss": 0.0011, "step": 100140 }, { "epoch": 1.6387139000245439, "grad_norm": 0.594880998134613, "learning_rate": 5.065681976339129e-06, "loss": 0.0039, "step": 100150 }, { "epoch": 1.6388775259756199, "grad_norm": 0.057453226298093796, "learning_rate": 5.064730117213252e-06, "loss": 0.0014, "step": 100160 }, { "epoch": 1.6390411519266954, "grad_norm": 0.11164303123950958, "learning_rate": 5.063778255741064e-06, "loss": 0.0015, "step": 100170 }, { "epoch": 1.6392047778777714, "grad_norm": 0.09474091231822968, "learning_rate": 5.062826391957068e-06, "loss": 0.0011, "step": 100180 }, { "epoch": 1.6393684038288474, "grad_norm": 0.005235459189862013, "learning_rate": 5.0618745258957666e-06, "loss": 0.0024, "step": 100190 }, { "epoch": 1.639532029779923, "grad_norm": 0.039148926734924316, "learning_rate": 5.0609226575916614e-06, "loss": 0.0023, "step": 100200 }, { "epoch": 1.639695655730999, "grad_norm": 0.08107282221317291, "learning_rate": 5.059970787079257e-06, "loss": 0.0012, "step": 100210 }, { "epoch": 1.6398592816820747, "grad_norm": 0.13695508241653442, "learning_rate": 5.059018914393057e-06, "loss": 0.0017, "step": 100220 }, { "epoch": 1.6400229076331505, "grad_norm": 0.14939555525779724, "learning_rate": 5.0580670395675615e-06, "loss": 0.0016, "step": 100230 }, { "epoch": 1.6401865335842265, "grad_norm": 0.030955970287322998, "learning_rate": 5.057115162637274e-06, "loss": 0.0014, "step": 100240 }, { "epoch": 1.6403501595353023, "grad_norm": 0.08128859102725983, "learning_rate": 5.056163283636701e-06, "loss": 0.0013, "step": 100250 }, { "epoch": 1.640513785486378, "grad_norm": 0.035780806094408035, "learning_rate": 5.055211402600344e-06, "loss": 0.0021, "step": 100260 }, { "epoch": 1.640677411437454, "grad_norm": 0.13452747464179993, "learning_rate": 5.054259519562706e-06, "loss": 0.0015, "step": 100270 }, { "epoch": 1.6408410373885298, "grad_norm": 0.08999369293451309, "learning_rate": 5.053307634558291e-06, "loss": 0.0019, "step": 100280 }, { "epoch": 1.6410046633396056, "grad_norm": 0.04026127606630325, "learning_rate": 5.0523557476216025e-06, "loss": 0.0011, "step": 100290 }, { "epoch": 1.6411682892906816, "grad_norm": 0.07283289730548859, "learning_rate": 5.051403858787144e-06, "loss": 0.0024, "step": 100300 }, { "epoch": 1.6413319152417574, "grad_norm": 0.018817242234945297, "learning_rate": 5.050451968089418e-06, "loss": 0.0022, "step": 100310 }, { "epoch": 1.6414955411928331, "grad_norm": 0.021105105057358742, "learning_rate": 5.0495000755629305e-06, "loss": 0.0015, "step": 100320 }, { "epoch": 1.6416591671439091, "grad_norm": 0.06151590123772621, "learning_rate": 5.048548181242185e-06, "loss": 0.0013, "step": 100330 }, { "epoch": 1.641822793094985, "grad_norm": 0.09511099010705948, "learning_rate": 5.047596285161683e-06, "loss": 0.0018, "step": 100340 }, { "epoch": 1.6419864190460607, "grad_norm": 0.1867058277130127, "learning_rate": 5.046644387355931e-06, "loss": 0.0024, "step": 100350 }, { "epoch": 1.6421500449971367, "grad_norm": 0.12276323139667511, "learning_rate": 5.045692487859431e-06, "loss": 0.0021, "step": 100360 }, { "epoch": 1.6423136709482122, "grad_norm": 0.09928416460752487, "learning_rate": 5.04474058670669e-06, "loss": 0.0015, "step": 100370 }, { "epoch": 1.6424772968992882, "grad_norm": 0.1165539100766182, "learning_rate": 5.0437886839322085e-06, "loss": 0.0017, "step": 100380 }, { "epoch": 1.6426409228503642, "grad_norm": 0.049185093492269516, "learning_rate": 5.042836779570493e-06, "loss": 0.0011, "step": 100390 }, { "epoch": 1.6428045488014398, "grad_norm": 0.06552588194608688, "learning_rate": 5.041884873656048e-06, "loss": 0.0018, "step": 100400 }, { "epoch": 1.6429681747525158, "grad_norm": 0.05554191768169403, "learning_rate": 5.040932966223375e-06, "loss": 0.0021, "step": 100410 }, { "epoch": 1.6431318007035915, "grad_norm": 0.19864696264266968, "learning_rate": 5.039981057306983e-06, "loss": 0.0028, "step": 100420 }, { "epoch": 1.6432954266546673, "grad_norm": 0.2563404440879822, "learning_rate": 5.039029146941372e-06, "loss": 0.0019, "step": 100430 }, { "epoch": 1.6434590526057433, "grad_norm": 0.04238563030958176, "learning_rate": 5.0380772351610495e-06, "loss": 0.0022, "step": 100440 }, { "epoch": 1.643622678556819, "grad_norm": 0.19535382091999054, "learning_rate": 5.037125322000518e-06, "loss": 0.003, "step": 100450 }, { "epoch": 1.6437863045078949, "grad_norm": 0.06673549115657806, "learning_rate": 5.036173407494282e-06, "loss": 0.0011, "step": 100460 }, { "epoch": 1.6439499304589709, "grad_norm": 0.08663325756788254, "learning_rate": 5.035221491676849e-06, "loss": 0.0018, "step": 100470 }, { "epoch": 1.6441135564100466, "grad_norm": 0.08847000449895859, "learning_rate": 5.034269574582718e-06, "loss": 0.002, "step": 100480 }, { "epoch": 1.6442771823611224, "grad_norm": 0.017689118161797523, "learning_rate": 5.0333176562464004e-06, "loss": 0.0012, "step": 100490 }, { "epoch": 1.6444408083121984, "grad_norm": 0.10258142650127411, "learning_rate": 5.032365736702397e-06, "loss": 0.0018, "step": 100500 }, { "epoch": 1.6446044342632742, "grad_norm": 0.1761052906513214, "learning_rate": 5.031413815985213e-06, "loss": 0.0018, "step": 100510 }, { "epoch": 1.64476806021435, "grad_norm": 0.02352570742368698, "learning_rate": 5.030461894129353e-06, "loss": 0.0008, "step": 100520 }, { "epoch": 1.644931686165426, "grad_norm": 0.32751473784446716, "learning_rate": 5.029509971169325e-06, "loss": 0.0021, "step": 100530 }, { "epoch": 1.6450953121165017, "grad_norm": 0.17443785071372986, "learning_rate": 5.02855804713963e-06, "loss": 0.002, "step": 100540 }, { "epoch": 1.6452589380675775, "grad_norm": 0.11152517795562744, "learning_rate": 5.027606122074774e-06, "loss": 0.003, "step": 100550 }, { "epoch": 1.6454225640186535, "grad_norm": 0.04345415160059929, "learning_rate": 5.026654196009263e-06, "loss": 0.0014, "step": 100560 }, { "epoch": 1.645586189969729, "grad_norm": 0.32868242263793945, "learning_rate": 5.025702268977602e-06, "loss": 0.0014, "step": 100570 }, { "epoch": 1.645749815920805, "grad_norm": 0.03208612650632858, "learning_rate": 5.024750341014295e-06, "loss": 0.0006, "step": 100580 }, { "epoch": 1.645913441871881, "grad_norm": 0.07968027144670486, "learning_rate": 5.023798412153847e-06, "loss": 0.0027, "step": 100590 }, { "epoch": 1.6460770678229566, "grad_norm": 0.11156504601240158, "learning_rate": 5.022846482430763e-06, "loss": 0.0014, "step": 100600 }, { "epoch": 1.6462406937740326, "grad_norm": 0.03609035536646843, "learning_rate": 5.021894551879552e-06, "loss": 0.0018, "step": 100610 }, { "epoch": 1.6464043197251084, "grad_norm": 0.07954364269971848, "learning_rate": 5.020942620534715e-06, "loss": 0.0015, "step": 100620 }, { "epoch": 1.6465679456761841, "grad_norm": 0.03949743136763573, "learning_rate": 5.019990688430757e-06, "loss": 0.0013, "step": 100630 }, { "epoch": 1.6467315716272601, "grad_norm": 0.32489776611328125, "learning_rate": 5.019038755602184e-06, "loss": 0.0021, "step": 100640 }, { "epoch": 1.646895197578336, "grad_norm": 0.0025795656256377697, "learning_rate": 5.018086822083504e-06, "loss": 0.0016, "step": 100650 }, { "epoch": 1.6470588235294117, "grad_norm": 0.13780875504016876, "learning_rate": 5.017134887909219e-06, "loss": 0.0013, "step": 100660 }, { "epoch": 1.6472224494804877, "grad_norm": 0.017130527645349503, "learning_rate": 5.016182953113836e-06, "loss": 0.002, "step": 100670 }, { "epoch": 1.6473860754315635, "grad_norm": 0.14699749648571014, "learning_rate": 5.015231017731859e-06, "loss": 0.0031, "step": 100680 }, { "epoch": 1.6475497013826392, "grad_norm": 0.16536717116832733, "learning_rate": 5.014279081797795e-06, "loss": 0.0011, "step": 100690 }, { "epoch": 1.6477133273337152, "grad_norm": 0.0576251819729805, "learning_rate": 5.013327145346148e-06, "loss": 0.0025, "step": 100700 }, { "epoch": 1.647876953284791, "grad_norm": 0.05248529464006424, "learning_rate": 5.0123752084114254e-06, "loss": 0.0019, "step": 100710 }, { "epoch": 1.6480405792358668, "grad_norm": 0.25557661056518555, "learning_rate": 5.011423271028131e-06, "loss": 0.0024, "step": 100720 }, { "epoch": 1.6482042051869428, "grad_norm": 0.24020500481128693, "learning_rate": 5.01047133323077e-06, "loss": 0.0025, "step": 100730 }, { "epoch": 1.6483678311380183, "grad_norm": 0.06769675016403198, "learning_rate": 5.009519395053847e-06, "loss": 0.0029, "step": 100740 }, { "epoch": 1.6485314570890943, "grad_norm": 0.09917227178812027, "learning_rate": 5.00856745653187e-06, "loss": 0.0007, "step": 100750 }, { "epoch": 1.6486950830401703, "grad_norm": 0.051329903304576874, "learning_rate": 5.007615517699342e-06, "loss": 0.0024, "step": 100760 }, { "epoch": 1.6488587089912459, "grad_norm": 0.12798112630844116, "learning_rate": 5.006663578590772e-06, "loss": 0.0014, "step": 100770 }, { "epoch": 1.6490223349423219, "grad_norm": 0.14152054488658905, "learning_rate": 5.005711639240664e-06, "loss": 0.0017, "step": 100780 }, { "epoch": 1.6491859608933976, "grad_norm": 0.05302917957305908, "learning_rate": 5.00475969968352e-06, "loss": 0.001, "step": 100790 }, { "epoch": 1.6493495868444734, "grad_norm": 0.13204002380371094, "learning_rate": 5.003807759953849e-06, "loss": 0.0011, "step": 100800 }, { "epoch": 1.6495132127955494, "grad_norm": 0.0041569992899894714, "learning_rate": 5.002855820086156e-06, "loss": 0.0021, "step": 100810 }, { "epoch": 1.6496768387466252, "grad_norm": 0.15092462301254272, "learning_rate": 5.0019038801149475e-06, "loss": 0.0018, "step": 100820 }, { "epoch": 1.649840464697701, "grad_norm": 0.08255352824926376, "learning_rate": 5.000951940074727e-06, "loss": 0.0017, "step": 100830 }, { "epoch": 1.650004090648777, "grad_norm": 0.05293919891119003, "learning_rate": 5e-06, "loss": 0.0014, "step": 100840 }, { "epoch": 1.6501677165998527, "grad_norm": 0.03797246143221855, "learning_rate": 4.999048059925274e-06, "loss": 0.0015, "step": 100850 }, { "epoch": 1.6503313425509285, "grad_norm": 0.04087596386671066, "learning_rate": 4.998096119885054e-06, "loss": 0.0011, "step": 100860 }, { "epoch": 1.6504949685020045, "grad_norm": 0.07037179172039032, "learning_rate": 4.997144179913845e-06, "loss": 0.0017, "step": 100870 }, { "epoch": 1.6506585944530803, "grad_norm": 0.13170303404331207, "learning_rate": 4.996192240046152e-06, "loss": 0.002, "step": 100880 }, { "epoch": 1.650822220404156, "grad_norm": 0.06456456333398819, "learning_rate": 4.995240300316481e-06, "loss": 0.0015, "step": 100890 }, { "epoch": 1.650985846355232, "grad_norm": 0.16258928179740906, "learning_rate": 4.9942883607593395e-06, "loss": 0.003, "step": 100900 }, { "epoch": 1.6511494723063078, "grad_norm": 0.05388384312391281, "learning_rate": 4.99333642140923e-06, "loss": 0.0026, "step": 100910 }, { "epoch": 1.6513130982573836, "grad_norm": 0.3144051432609558, "learning_rate": 4.992384482300659e-06, "loss": 0.0026, "step": 100920 }, { "epoch": 1.6514767242084596, "grad_norm": 0.10867591202259064, "learning_rate": 4.991432543468132e-06, "loss": 0.0011, "step": 100930 }, { "epoch": 1.6516403501595351, "grad_norm": 0.20788510143756866, "learning_rate": 4.990480604946155e-06, "loss": 0.0033, "step": 100940 }, { "epoch": 1.6518039761106111, "grad_norm": 0.07863085716962814, "learning_rate": 4.989528666769232e-06, "loss": 0.0015, "step": 100950 }, { "epoch": 1.6519676020616871, "grad_norm": 0.07487991452217102, "learning_rate": 4.988576728971871e-06, "loss": 0.0016, "step": 100960 }, { "epoch": 1.6521312280127627, "grad_norm": 0.11730311810970306, "learning_rate": 4.987624791588575e-06, "loss": 0.0016, "step": 100970 }, { "epoch": 1.6522948539638387, "grad_norm": 0.07989366352558136, "learning_rate": 4.986672854653852e-06, "loss": 0.0019, "step": 100980 }, { "epoch": 1.6524584799149145, "grad_norm": 0.03052920289337635, "learning_rate": 4.985720918202205e-06, "loss": 0.0018, "step": 100990 }, { "epoch": 1.6526221058659902, "grad_norm": 0.060879986733198166, "learning_rate": 4.984768982268143e-06, "loss": 0.0011, "step": 101000 }, { "epoch": 1.6527857318170662, "grad_norm": 0.22433774173259735, "learning_rate": 4.983817046886165e-06, "loss": 0.0015, "step": 101010 }, { "epoch": 1.652949357768142, "grad_norm": 0.08787201344966888, "learning_rate": 4.982865112090782e-06, "loss": 0.0018, "step": 101020 }, { "epoch": 1.6531129837192178, "grad_norm": 0.051112495362758636, "learning_rate": 4.981913177916498e-06, "loss": 0.0015, "step": 101030 }, { "epoch": 1.6532766096702938, "grad_norm": 0.24164657294750214, "learning_rate": 4.9809612443978165e-06, "loss": 0.0021, "step": 101040 }, { "epoch": 1.6534402356213695, "grad_norm": 0.010544318705797195, "learning_rate": 4.9800093115692444e-06, "loss": 0.0016, "step": 101050 }, { "epoch": 1.6536038615724453, "grad_norm": 0.12615899741649628, "learning_rate": 4.979057379465287e-06, "loss": 0.0013, "step": 101060 }, { "epoch": 1.6537674875235213, "grad_norm": 0.02685977704823017, "learning_rate": 4.978105448120449e-06, "loss": 0.0014, "step": 101070 }, { "epoch": 1.653931113474597, "grad_norm": 0.022763237357139587, "learning_rate": 4.977153517569237e-06, "loss": 0.0005, "step": 101080 }, { "epoch": 1.6540947394256729, "grad_norm": 0.4035637080669403, "learning_rate": 4.9762015878461536e-06, "loss": 0.0013, "step": 101090 }, { "epoch": 1.6542583653767489, "grad_norm": 0.020210569724440575, "learning_rate": 4.975249658985707e-06, "loss": 0.0024, "step": 101100 }, { "epoch": 1.6544219913278246, "grad_norm": 0.006763089913874865, "learning_rate": 4.9742977310224e-06, "loss": 0.0015, "step": 101110 }, { "epoch": 1.6545856172789004, "grad_norm": 0.23854990303516388, "learning_rate": 4.9733458039907386e-06, "loss": 0.0026, "step": 101120 }, { "epoch": 1.6547492432299764, "grad_norm": 0.12342479079961777, "learning_rate": 4.972393877925227e-06, "loss": 0.0015, "step": 101130 }, { "epoch": 1.654912869181052, "grad_norm": 0.05465370789170265, "learning_rate": 4.971441952860372e-06, "loss": 0.0023, "step": 101140 }, { "epoch": 1.655076495132128, "grad_norm": 0.10789911448955536, "learning_rate": 4.970490028830677e-06, "loss": 0.0014, "step": 101150 }, { "epoch": 1.655240121083204, "grad_norm": 0.02612781710922718, "learning_rate": 4.9695381058706475e-06, "loss": 0.0012, "step": 101160 }, { "epoch": 1.6554037470342795, "grad_norm": 0.1010490208864212, "learning_rate": 4.968586184014787e-06, "loss": 0.0027, "step": 101170 }, { "epoch": 1.6555673729853555, "grad_norm": 0.018832700327038765, "learning_rate": 4.967634263297603e-06, "loss": 0.0012, "step": 101180 }, { "epoch": 1.6557309989364313, "grad_norm": 0.13586290180683136, "learning_rate": 4.9666823437535995e-06, "loss": 0.0019, "step": 101190 }, { "epoch": 1.655894624887507, "grad_norm": 0.10224128514528275, "learning_rate": 4.965730425417284e-06, "loss": 0.0012, "step": 101200 }, { "epoch": 1.656058250838583, "grad_norm": 0.02834460139274597, "learning_rate": 4.964778508323155e-06, "loss": 0.0009, "step": 101210 }, { "epoch": 1.6562218767896588, "grad_norm": 0.1569192260503769, "learning_rate": 4.963826592505721e-06, "loss": 0.0011, "step": 101220 }, { "epoch": 1.6563855027407346, "grad_norm": 0.1372176855802536, "learning_rate": 4.962874677999484e-06, "loss": 0.0016, "step": 101230 }, { "epoch": 1.6565491286918106, "grad_norm": 0.09356169402599335, "learning_rate": 4.961922764838952e-06, "loss": 0.0017, "step": 101240 }, { "epoch": 1.6567127546428864, "grad_norm": 0.12333190441131592, "learning_rate": 4.960970853058629e-06, "loss": 0.0018, "step": 101250 }, { "epoch": 1.6568763805939621, "grad_norm": 0.046497467905282974, "learning_rate": 4.960018942693018e-06, "loss": 0.0015, "step": 101260 }, { "epoch": 1.6570400065450381, "grad_norm": 0.3702888488769531, "learning_rate": 4.959067033776625e-06, "loss": 0.0021, "step": 101270 }, { "epoch": 1.657203632496114, "grad_norm": 0.10438147932291031, "learning_rate": 4.958115126343953e-06, "loss": 0.002, "step": 101280 }, { "epoch": 1.6573672584471897, "grad_norm": 0.11183849722146988, "learning_rate": 4.957163220429507e-06, "loss": 0.0023, "step": 101290 }, { "epoch": 1.6575308843982657, "grad_norm": 0.08231933414936066, "learning_rate": 4.956211316067793e-06, "loss": 0.0011, "step": 101300 }, { "epoch": 1.6576945103493415, "grad_norm": 0.03424077108502388, "learning_rate": 4.955259413293312e-06, "loss": 0.0013, "step": 101310 }, { "epoch": 1.6578581363004172, "grad_norm": 0.14053568243980408, "learning_rate": 4.95430751214057e-06, "loss": 0.0007, "step": 101320 }, { "epoch": 1.6580217622514932, "grad_norm": 0.09763520210981369, "learning_rate": 4.9533556126440705e-06, "loss": 0.004, "step": 101330 }, { "epoch": 1.6581853882025688, "grad_norm": 0.1472080796957016, "learning_rate": 4.952403714838319e-06, "loss": 0.0013, "step": 101340 }, { "epoch": 1.6583490141536448, "grad_norm": 0.19369348883628845, "learning_rate": 4.951451818757817e-06, "loss": 0.0019, "step": 101350 }, { "epoch": 1.6585126401047208, "grad_norm": 0.1374535858631134, "learning_rate": 4.95049992443707e-06, "loss": 0.0016, "step": 101360 }, { "epoch": 1.6586762660557963, "grad_norm": 0.03457542136311531, "learning_rate": 4.9495480319105825e-06, "loss": 0.0017, "step": 101370 }, { "epoch": 1.6588398920068723, "grad_norm": 0.11844110488891602, "learning_rate": 4.9485961412128575e-06, "loss": 0.0018, "step": 101380 }, { "epoch": 1.659003517957948, "grad_norm": 0.06468743085861206, "learning_rate": 4.9476442523783974e-06, "loss": 0.0014, "step": 101390 }, { "epoch": 1.6591671439090239, "grad_norm": 0.15154115855693817, "learning_rate": 4.946692365441711e-06, "loss": 0.0017, "step": 101400 }, { "epoch": 1.6593307698600999, "grad_norm": 0.08270050585269928, "learning_rate": 4.945740480437296e-06, "loss": 0.0015, "step": 101410 }, { "epoch": 1.6594943958111756, "grad_norm": 0.07217680662870407, "learning_rate": 4.944788597399658e-06, "loss": 0.0017, "step": 101420 }, { "epoch": 1.6596580217622514, "grad_norm": 0.13497240841388702, "learning_rate": 4.943836716363301e-06, "loss": 0.0027, "step": 101430 }, { "epoch": 1.6598216477133274, "grad_norm": 0.014526392333209515, "learning_rate": 4.942884837362727e-06, "loss": 0.0025, "step": 101440 }, { "epoch": 1.6599852736644032, "grad_norm": 0.1114325225353241, "learning_rate": 4.94193296043244e-06, "loss": 0.0084, "step": 101450 }, { "epoch": 1.660148899615479, "grad_norm": 0.28763630986213684, "learning_rate": 4.9409810856069445e-06, "loss": 0.0015, "step": 101460 }, { "epoch": 1.660312525566555, "grad_norm": 0.1795928180217743, "learning_rate": 4.940029212920743e-06, "loss": 0.0009, "step": 101470 }, { "epoch": 1.6604761515176307, "grad_norm": 0.03637954220175743, "learning_rate": 4.9390773424083385e-06, "loss": 0.0011, "step": 101480 }, { "epoch": 1.6606397774687065, "grad_norm": 0.06823773682117462, "learning_rate": 4.938125474104234e-06, "loss": 0.0018, "step": 101490 }, { "epoch": 1.6608034034197825, "grad_norm": 0.1163569837808609, "learning_rate": 4.937173608042934e-06, "loss": 0.0023, "step": 101500 }, { "epoch": 1.6609670293708583, "grad_norm": 0.027382541447877884, "learning_rate": 4.9362217442589375e-06, "loss": 0.0009, "step": 101510 }, { "epoch": 1.661130655321934, "grad_norm": 0.1631830930709839, "learning_rate": 4.935269882786749e-06, "loss": 0.0013, "step": 101520 }, { "epoch": 1.66129428127301, "grad_norm": 0.14096766710281372, "learning_rate": 4.934318023660873e-06, "loss": 0.0018, "step": 101530 }, { "epoch": 1.6614579072240856, "grad_norm": 0.12342914193868637, "learning_rate": 4.93336616691581e-06, "loss": 0.002, "step": 101540 }, { "epoch": 1.6616215331751616, "grad_norm": 0.06792972981929779, "learning_rate": 4.9324143125860645e-06, "loss": 0.0019, "step": 101550 }, { "epoch": 1.6617851591262376, "grad_norm": 0.17113278806209564, "learning_rate": 4.9314624607061365e-06, "loss": 0.0024, "step": 101560 }, { "epoch": 1.6619487850773131, "grad_norm": 0.18928660452365875, "learning_rate": 4.930510611310529e-06, "loss": 0.0018, "step": 101570 }, { "epoch": 1.6621124110283891, "grad_norm": 0.021074140444397926, "learning_rate": 4.929558764433746e-06, "loss": 0.0012, "step": 101580 }, { "epoch": 1.662276036979465, "grad_norm": 0.0746341347694397, "learning_rate": 4.928606920110288e-06, "loss": 0.0009, "step": 101590 }, { "epoch": 1.6624396629305407, "grad_norm": 0.24103547632694244, "learning_rate": 4.927655078374659e-06, "loss": 0.0016, "step": 101600 }, { "epoch": 1.6626032888816167, "grad_norm": 0.08127669245004654, "learning_rate": 4.926703239261358e-06, "loss": 0.0014, "step": 101610 }, { "epoch": 1.6627669148326925, "grad_norm": 0.10390322655439377, "learning_rate": 4.925751402804889e-06, "loss": 0.0012, "step": 101620 }, { "epoch": 1.6629305407837682, "grad_norm": 0.06363886594772339, "learning_rate": 4.924799569039753e-06, "loss": 0.0012, "step": 101630 }, { "epoch": 1.6630941667348442, "grad_norm": 0.06226643547415733, "learning_rate": 4.923847738000452e-06, "loss": 0.0013, "step": 101640 }, { "epoch": 1.66325779268592, "grad_norm": 0.2445535510778427, "learning_rate": 4.922895909721488e-06, "loss": 0.0016, "step": 101650 }, { "epoch": 1.6634214186369958, "grad_norm": 0.2508658170700073, "learning_rate": 4.92194408423736e-06, "loss": 0.0018, "step": 101660 }, { "epoch": 1.6635850445880718, "grad_norm": 0.0561600998044014, "learning_rate": 4.920992261582572e-06, "loss": 0.0015, "step": 101670 }, { "epoch": 1.6637486705391475, "grad_norm": 0.04056508466601372, "learning_rate": 4.920040441791625e-06, "loss": 0.0015, "step": 101680 }, { "epoch": 1.6639122964902233, "grad_norm": 0.14251777529716492, "learning_rate": 4.91908862489902e-06, "loss": 0.0026, "step": 101690 }, { "epoch": 1.6640759224412993, "grad_norm": 0.07927603274583817, "learning_rate": 4.91813681093926e-06, "loss": 0.001, "step": 101700 }, { "epoch": 1.6642395483923749, "grad_norm": 0.16103298962116241, "learning_rate": 4.917184999946842e-06, "loss": 0.0017, "step": 101710 }, { "epoch": 1.6644031743434509, "grad_norm": 0.06844595819711685, "learning_rate": 4.916233191956268e-06, "loss": 0.0028, "step": 101720 }, { "epoch": 1.6645668002945269, "grad_norm": 0.05104893445968628, "learning_rate": 4.915281387002039e-06, "loss": 0.0025, "step": 101730 }, { "epoch": 1.6647304262456024, "grad_norm": 0.09660768508911133, "learning_rate": 4.914329585118657e-06, "loss": 0.0014, "step": 101740 }, { "epoch": 1.6648940521966784, "grad_norm": 0.10084972530603409, "learning_rate": 4.9133777863406215e-06, "loss": 0.0021, "step": 101750 }, { "epoch": 1.6650576781477542, "grad_norm": 0.017485352233052254, "learning_rate": 4.912425990702434e-06, "loss": 0.0012, "step": 101760 }, { "epoch": 1.66522130409883, "grad_norm": 0.08894899487495422, "learning_rate": 4.911474198238592e-06, "loss": 0.0017, "step": 101770 }, { "epoch": 1.665384930049906, "grad_norm": 0.007512619718909264, "learning_rate": 4.9105224089836e-06, "loss": 0.0015, "step": 101780 }, { "epoch": 1.6655485560009817, "grad_norm": 0.024128632619976997, "learning_rate": 4.909570622971953e-06, "loss": 0.0026, "step": 101790 }, { "epoch": 1.6657121819520575, "grad_norm": 0.07698648422956467, "learning_rate": 4.9086188402381565e-06, "loss": 0.0011, "step": 101800 }, { "epoch": 1.6658758079031335, "grad_norm": 0.07614482939243317, "learning_rate": 4.907667060816706e-06, "loss": 0.0018, "step": 101810 }, { "epoch": 1.6660394338542093, "grad_norm": 0.05847081169486046, "learning_rate": 4.906715284742102e-06, "loss": 0.0013, "step": 101820 }, { "epoch": 1.666203059805285, "grad_norm": 0.12756794691085815, "learning_rate": 4.905763512048845e-06, "loss": 0.0012, "step": 101830 }, { "epoch": 1.666366685756361, "grad_norm": 0.18289168179035187, "learning_rate": 4.904811742771434e-06, "loss": 0.0015, "step": 101840 }, { "epoch": 1.6665303117074368, "grad_norm": 0.2453628033399582, "learning_rate": 4.903859976944369e-06, "loss": 0.0013, "step": 101850 }, { "epoch": 1.6666939376585126, "grad_norm": 0.1711772233247757, "learning_rate": 4.9029082146021486e-06, "loss": 0.0011, "step": 101860 }, { "epoch": 1.6668575636095886, "grad_norm": 0.047105997800827026, "learning_rate": 4.901956455779272e-06, "loss": 0.0008, "step": 101870 }, { "epoch": 1.6670211895606644, "grad_norm": 0.12165622413158417, "learning_rate": 4.901004700510238e-06, "loss": 0.0019, "step": 101880 }, { "epoch": 1.6671848155117401, "grad_norm": 0.06262020766735077, "learning_rate": 4.900052948829546e-06, "loss": 0.0011, "step": 101890 }, { "epoch": 1.6673484414628161, "grad_norm": 0.08097384870052338, "learning_rate": 4.899101200771696e-06, "loss": 0.0018, "step": 101900 }, { "epoch": 1.6675120674138917, "grad_norm": 0.35216423869132996, "learning_rate": 4.898149456371184e-06, "loss": 0.0017, "step": 101910 }, { "epoch": 1.6676756933649677, "grad_norm": 0.15078285336494446, "learning_rate": 4.897197715662507e-06, "loss": 0.001, "step": 101920 }, { "epoch": 1.6678393193160437, "grad_norm": 0.3330957293510437, "learning_rate": 4.896245978680169e-06, "loss": 0.001, "step": 101930 }, { "epoch": 1.6680029452671192, "grad_norm": 0.10701004415750504, "learning_rate": 4.895294245458663e-06, "loss": 0.0015, "step": 101940 }, { "epoch": 1.6681665712181952, "grad_norm": 0.13928097486495972, "learning_rate": 4.894342516032488e-06, "loss": 0.0034, "step": 101950 }, { "epoch": 1.668330197169271, "grad_norm": 0.041342947632074356, "learning_rate": 4.893390790436144e-06, "loss": 0.0012, "step": 101960 }, { "epoch": 1.6684938231203468, "grad_norm": 0.04353650286793709, "learning_rate": 4.892439068704128e-06, "loss": 0.0011, "step": 101970 }, { "epoch": 1.6686574490714228, "grad_norm": 0.37439417839050293, "learning_rate": 4.891487350870937e-06, "loss": 0.002, "step": 101980 }, { "epoch": 1.6688210750224985, "grad_norm": 0.13891646265983582, "learning_rate": 4.890535636971069e-06, "loss": 0.0023, "step": 101990 }, { "epoch": 1.6689847009735743, "grad_norm": 0.06375404447317123, "learning_rate": 4.8895839270390215e-06, "loss": 0.001, "step": 102000 }, { "epoch": 1.6691483269246503, "grad_norm": 0.054322827607393265, "learning_rate": 4.888632221109291e-06, "loss": 0.0015, "step": 102010 }, { "epoch": 1.669311952875726, "grad_norm": 0.15197350084781647, "learning_rate": 4.887680519216374e-06, "loss": 0.0031, "step": 102020 }, { "epoch": 1.6694755788268019, "grad_norm": 0.0697273313999176, "learning_rate": 4.886728821394769e-06, "loss": 0.0021, "step": 102030 }, { "epoch": 1.6696392047778779, "grad_norm": 0.13675615191459656, "learning_rate": 4.885777127678972e-06, "loss": 0.0007, "step": 102040 }, { "epoch": 1.6698028307289536, "grad_norm": 0.34040510654449463, "learning_rate": 4.884825438103479e-06, "loss": 0.0028, "step": 102050 }, { "epoch": 1.6699664566800294, "grad_norm": 0.23215000331401825, "learning_rate": 4.883873752702787e-06, "loss": 0.0016, "step": 102060 }, { "epoch": 1.6701300826311054, "grad_norm": 0.1076582595705986, "learning_rate": 4.882922071511393e-06, "loss": 0.0012, "step": 102070 }, { "epoch": 1.6702937085821812, "grad_norm": 0.1291218400001526, "learning_rate": 4.881970394563792e-06, "loss": 0.0016, "step": 102080 }, { "epoch": 1.670457334533257, "grad_norm": 0.19146296381950378, "learning_rate": 4.88101872189448e-06, "loss": 0.0023, "step": 102090 }, { "epoch": 1.670620960484333, "grad_norm": 0.054804664105176926, "learning_rate": 4.880067053537956e-06, "loss": 0.0014, "step": 102100 }, { "epoch": 1.6707845864354085, "grad_norm": 0.04826841130852699, "learning_rate": 4.879115389528711e-06, "loss": 0.0016, "step": 102110 }, { "epoch": 1.6709482123864845, "grad_norm": 0.04021270573139191, "learning_rate": 4.878163729901242e-06, "loss": 0.0015, "step": 102120 }, { "epoch": 1.6711118383375605, "grad_norm": 0.14630895853042603, "learning_rate": 4.877212074690046e-06, "loss": 0.0021, "step": 102130 }, { "epoch": 1.671275464288636, "grad_norm": 0.042796749621629715, "learning_rate": 4.8762604239296165e-06, "loss": 0.0016, "step": 102140 }, { "epoch": 1.671439090239712, "grad_norm": 0.03952167555689812, "learning_rate": 4.87530877765445e-06, "loss": 0.0013, "step": 102150 }, { "epoch": 1.6716027161907878, "grad_norm": 0.42436733841896057, "learning_rate": 4.8743571358990386e-06, "loss": 0.0023, "step": 102160 }, { "epoch": 1.6717663421418636, "grad_norm": 0.06887306272983551, "learning_rate": 4.8734054986978815e-06, "loss": 0.0014, "step": 102170 }, { "epoch": 1.6719299680929396, "grad_norm": 0.019923284649848938, "learning_rate": 4.8724538660854695e-06, "loss": 0.0014, "step": 102180 }, { "epoch": 1.6720935940440154, "grad_norm": 0.04699859023094177, "learning_rate": 4.871502238096299e-06, "loss": 0.0012, "step": 102190 }, { "epoch": 1.6722572199950911, "grad_norm": 0.04753861203789711, "learning_rate": 4.870550614764865e-06, "loss": 0.001, "step": 102200 }, { "epoch": 1.6724208459461671, "grad_norm": 0.06307676434516907, "learning_rate": 4.869598996125658e-06, "loss": 0.002, "step": 102210 }, { "epoch": 1.672584471897243, "grad_norm": 0.08829619735479355, "learning_rate": 4.868647382213174e-06, "loss": 0.0027, "step": 102220 }, { "epoch": 1.6727480978483187, "grad_norm": 0.05037305876612663, "learning_rate": 4.867695773061907e-06, "loss": 0.0011, "step": 102230 }, { "epoch": 1.6729117237993947, "grad_norm": 0.22658604383468628, "learning_rate": 4.8667441687063506e-06, "loss": 0.0018, "step": 102240 }, { "epoch": 1.6730753497504705, "grad_norm": 0.19167353212833405, "learning_rate": 4.865792569180997e-06, "loss": 0.0011, "step": 102250 }, { "epoch": 1.6732389757015462, "grad_norm": 0.011489463970065117, "learning_rate": 4.864840974520341e-06, "loss": 0.0008, "step": 102260 }, { "epoch": 1.6734026016526222, "grad_norm": 0.1869586706161499, "learning_rate": 4.863889384758874e-06, "loss": 0.0008, "step": 102270 }, { "epoch": 1.673566227603698, "grad_norm": 0.07528888434171677, "learning_rate": 4.862937799931091e-06, "loss": 0.0018, "step": 102280 }, { "epoch": 1.6737298535547738, "grad_norm": 0.09467701613903046, "learning_rate": 4.8619862200714826e-06, "loss": 0.0012, "step": 102290 }, { "epoch": 1.6738934795058498, "grad_norm": 0.1533868908882141, "learning_rate": 4.861034645214543e-06, "loss": 0.0014, "step": 102300 }, { "epoch": 1.6740571054569253, "grad_norm": 0.08616022765636444, "learning_rate": 4.860083075394762e-06, "loss": 0.0013, "step": 102310 }, { "epoch": 1.6742207314080013, "grad_norm": 0.08038025349378586, "learning_rate": 4.859131510646634e-06, "loss": 0.0019, "step": 102320 }, { "epoch": 1.6743843573590773, "grad_norm": 0.19674372673034668, "learning_rate": 4.8581799510046495e-06, "loss": 0.0013, "step": 102330 }, { "epoch": 1.6745479833101529, "grad_norm": 0.07996311783790588, "learning_rate": 4.857228396503302e-06, "loss": 0.0011, "step": 102340 }, { "epoch": 1.6747116092612289, "grad_norm": 0.09866899251937866, "learning_rate": 4.856276847177081e-06, "loss": 0.0013, "step": 102350 }, { "epoch": 1.6748752352123046, "grad_norm": 0.08916238695383072, "learning_rate": 4.855325303060479e-06, "loss": 0.0014, "step": 102360 }, { "epoch": 1.6750388611633804, "grad_norm": 0.01661216840147972, "learning_rate": 4.854373764187988e-06, "loss": 0.003, "step": 102370 }, { "epoch": 1.6752024871144564, "grad_norm": 0.029480399563908577, "learning_rate": 4.853422230594096e-06, "loss": 0.0013, "step": 102380 }, { "epoch": 1.6753661130655322, "grad_norm": 0.07779080420732498, "learning_rate": 4.852470702313297e-06, "loss": 0.0023, "step": 102390 }, { "epoch": 1.675529739016608, "grad_norm": 0.12956365942955017, "learning_rate": 4.851519179380082e-06, "loss": 0.0017, "step": 102400 }, { "epoch": 1.675693364967684, "grad_norm": 0.010274041444063187, "learning_rate": 4.850567661828939e-06, "loss": 0.0011, "step": 102410 }, { "epoch": 1.6758569909187597, "grad_norm": 0.09707976132631302, "learning_rate": 4.849616149694358e-06, "loss": 0.0019, "step": 102420 }, { "epoch": 1.6760206168698355, "grad_norm": 0.11230769008398056, "learning_rate": 4.848664643010831e-06, "loss": 0.0015, "step": 102430 }, { "epoch": 1.6761842428209115, "grad_norm": 0.007492445409297943, "learning_rate": 4.847713141812845e-06, "loss": 0.0009, "step": 102440 }, { "epoch": 1.6763478687719873, "grad_norm": 0.08946806192398071, "learning_rate": 4.846761646134894e-06, "loss": 0.0013, "step": 102450 }, { "epoch": 1.676511494723063, "grad_norm": 0.08170616626739502, "learning_rate": 4.845810156011464e-06, "loss": 0.0019, "step": 102460 }, { "epoch": 1.676675120674139, "grad_norm": 0.11584620177745819, "learning_rate": 4.844858671477046e-06, "loss": 0.0016, "step": 102470 }, { "epoch": 1.6768387466252146, "grad_norm": 0.29062244296073914, "learning_rate": 4.843907192566127e-06, "loss": 0.0016, "step": 102480 }, { "epoch": 1.6770023725762906, "grad_norm": 0.058468639850616455, "learning_rate": 4.842955719313198e-06, "loss": 0.0028, "step": 102490 }, { "epoch": 1.6771659985273666, "grad_norm": 0.19412654638290405, "learning_rate": 4.842004251752748e-06, "loss": 0.0021, "step": 102500 }, { "epoch": 1.6773296244784421, "grad_norm": 0.017403732985258102, "learning_rate": 4.841052789919263e-06, "loss": 0.0006, "step": 102510 }, { "epoch": 1.6774932504295181, "grad_norm": 0.06674040853977203, "learning_rate": 4.840101333847232e-06, "loss": 0.0012, "step": 102520 }, { "epoch": 1.677656876380594, "grad_norm": 0.07726369798183441, "learning_rate": 4.839149883571144e-06, "loss": 0.0017, "step": 102530 }, { "epoch": 1.6778205023316697, "grad_norm": 0.02296977862715721, "learning_rate": 4.838198439125485e-06, "loss": 0.0009, "step": 102540 }, { "epoch": 1.6779841282827457, "grad_norm": 0.1686161309480667, "learning_rate": 4.837247000544745e-06, "loss": 0.0012, "step": 102550 }, { "epoch": 1.6781477542338215, "grad_norm": 0.050670076161623, "learning_rate": 4.836295567863409e-06, "loss": 0.001, "step": 102560 }, { "epoch": 1.6783113801848972, "grad_norm": 0.006474161520600319, "learning_rate": 4.835344141115965e-06, "loss": 0.0022, "step": 102570 }, { "epoch": 1.6784750061359732, "grad_norm": 0.050367601215839386, "learning_rate": 4.834392720336901e-06, "loss": 0.0017, "step": 102580 }, { "epoch": 1.678638632087049, "grad_norm": 0.11805913597345352, "learning_rate": 4.833441305560702e-06, "loss": 0.0023, "step": 102590 }, { "epoch": 1.6788022580381248, "grad_norm": 0.015265713445842266, "learning_rate": 4.832489896821858e-06, "loss": 0.0013, "step": 102600 }, { "epoch": 1.6789658839892008, "grad_norm": 0.13472580909729004, "learning_rate": 4.831538494154851e-06, "loss": 0.0078, "step": 102610 }, { "epoch": 1.6791295099402765, "grad_norm": 0.18250545859336853, "learning_rate": 4.830587097594167e-06, "loss": 0.0015, "step": 102620 }, { "epoch": 1.6792931358913523, "grad_norm": 0.010597911663353443, "learning_rate": 4.829635707174295e-06, "loss": 0.0015, "step": 102630 }, { "epoch": 1.6794567618424283, "grad_norm": 0.3909345269203186, "learning_rate": 4.82868432292972e-06, "loss": 0.0015, "step": 102640 }, { "epoch": 1.679620387793504, "grad_norm": 0.05626668408513069, "learning_rate": 4.827732944894925e-06, "loss": 0.0019, "step": 102650 }, { "epoch": 1.6797840137445799, "grad_norm": 0.10206018388271332, "learning_rate": 4.826781573104397e-06, "loss": 0.0014, "step": 102660 }, { "epoch": 1.6799476396956559, "grad_norm": 0.18538853526115417, "learning_rate": 4.82583020759262e-06, "loss": 0.0009, "step": 102670 }, { "epoch": 1.6801112656467314, "grad_norm": 0.07171491533517838, "learning_rate": 4.824878848394081e-06, "loss": 0.0022, "step": 102680 }, { "epoch": 1.6802748915978074, "grad_norm": 0.07217099517583847, "learning_rate": 4.8239274955432615e-06, "loss": 0.0012, "step": 102690 }, { "epoch": 1.6804385175488834, "grad_norm": 0.013641337864100933, "learning_rate": 4.82297614907465e-06, "loss": 0.0011, "step": 102700 }, { "epoch": 1.680602143499959, "grad_norm": 0.10386582463979721, "learning_rate": 4.822024809022725e-06, "loss": 0.0015, "step": 102710 }, { "epoch": 1.680765769451035, "grad_norm": 0.012440638616681099, "learning_rate": 4.821073475421972e-06, "loss": 0.0019, "step": 102720 }, { "epoch": 1.6809293954021107, "grad_norm": 0.10142288357019424, "learning_rate": 4.820122148306878e-06, "loss": 0.0014, "step": 102730 }, { "epoch": 1.6810930213531865, "grad_norm": 0.06657785922288895, "learning_rate": 4.819170827711922e-06, "loss": 0.0017, "step": 102740 }, { "epoch": 1.6812566473042625, "grad_norm": 0.1728191375732422, "learning_rate": 4.818219513671589e-06, "loss": 0.0025, "step": 102750 }, { "epoch": 1.6814202732553383, "grad_norm": 0.34341153502464294, "learning_rate": 4.817268206220362e-06, "loss": 0.0013, "step": 102760 }, { "epoch": 1.681583899206414, "grad_norm": 0.07850497215986252, "learning_rate": 4.816316905392724e-06, "loss": 0.0013, "step": 102770 }, { "epoch": 1.68174752515749, "grad_norm": 0.1214834600687027, "learning_rate": 4.815365611223156e-06, "loss": 0.002, "step": 102780 }, { "epoch": 1.6819111511085658, "grad_norm": 0.09038961678743362, "learning_rate": 4.8144143237461416e-06, "loss": 0.0019, "step": 102790 }, { "epoch": 1.6820747770596416, "grad_norm": 0.04943402484059334, "learning_rate": 4.813463042996162e-06, "loss": 0.0022, "step": 102800 }, { "epoch": 1.6822384030107176, "grad_norm": 0.40753304958343506, "learning_rate": 4.812511769007699e-06, "loss": 0.0013, "step": 102810 }, { "epoch": 1.6824020289617934, "grad_norm": 0.023161349818110466, "learning_rate": 4.811560501815232e-06, "loss": 0.0016, "step": 102820 }, { "epoch": 1.6825656549128691, "grad_norm": 0.14704051613807678, "learning_rate": 4.810609241453245e-06, "loss": 0.0013, "step": 102830 }, { "epoch": 1.6827292808639451, "grad_norm": 0.14873899519443512, "learning_rate": 4.809657987956218e-06, "loss": 0.0011, "step": 102840 }, { "epoch": 1.682892906815021, "grad_norm": 0.15760017931461334, "learning_rate": 4.808706741358631e-06, "loss": 0.0012, "step": 102850 }, { "epoch": 1.6830565327660967, "grad_norm": 0.10897698253393173, "learning_rate": 4.807755501694965e-06, "loss": 0.0014, "step": 102860 }, { "epoch": 1.6832201587171727, "grad_norm": 0.014204490929841995, "learning_rate": 4.8068042689997e-06, "loss": 0.001, "step": 102870 }, { "epoch": 1.6833837846682482, "grad_norm": 0.20954525470733643, "learning_rate": 4.805853043307315e-06, "loss": 0.0015, "step": 102880 }, { "epoch": 1.6835474106193242, "grad_norm": 0.02612285688519478, "learning_rate": 4.804901824652291e-06, "loss": 0.0017, "step": 102890 }, { "epoch": 1.6837110365704002, "grad_norm": 0.05482814833521843, "learning_rate": 4.803950613069107e-06, "loss": 0.001, "step": 102900 }, { "epoch": 1.6838746625214758, "grad_norm": 0.10835659503936768, "learning_rate": 4.8029994085922446e-06, "loss": 0.0013, "step": 102910 }, { "epoch": 1.6840382884725518, "grad_norm": 0.03488203510642052, "learning_rate": 4.802048211256179e-06, "loss": 0.0009, "step": 102920 }, { "epoch": 1.6842019144236275, "grad_norm": 0.09130261838436127, "learning_rate": 4.801097021095388e-06, "loss": 0.0016, "step": 102930 }, { "epoch": 1.6843655403747033, "grad_norm": 0.06012992560863495, "learning_rate": 4.800145838144352e-06, "loss": 0.007, "step": 102940 }, { "epoch": 1.6845291663257793, "grad_norm": 0.006967310328036547, "learning_rate": 4.79919466243755e-06, "loss": 0.002, "step": 102950 }, { "epoch": 1.684692792276855, "grad_norm": 0.16076301038265228, "learning_rate": 4.798243494009459e-06, "loss": 0.0019, "step": 102960 }, { "epoch": 1.6848564182279309, "grad_norm": 0.03123948909342289, "learning_rate": 4.797292332894556e-06, "loss": 0.0012, "step": 102970 }, { "epoch": 1.6850200441790069, "grad_norm": 0.4742262065410614, "learning_rate": 4.79634117912732e-06, "loss": 0.0028, "step": 102980 }, { "epoch": 1.6851836701300826, "grad_norm": 0.06803572922945023, "learning_rate": 4.7953900327422254e-06, "loss": 0.0027, "step": 102990 }, { "epoch": 1.6853472960811584, "grad_norm": 0.10734101384878159, "learning_rate": 4.794438893773751e-06, "loss": 0.0021, "step": 103000 }, { "epoch": 1.6855109220322344, "grad_norm": 0.08367973566055298, "learning_rate": 4.793487762256373e-06, "loss": 0.0013, "step": 103010 }, { "epoch": 1.6856745479833102, "grad_norm": 0.13024799525737762, "learning_rate": 4.792536638224567e-06, "loss": 0.002, "step": 103020 }, { "epoch": 1.685838173934386, "grad_norm": 0.05504586920142174, "learning_rate": 4.791585521712809e-06, "loss": 0.0017, "step": 103030 }, { "epoch": 1.686001799885462, "grad_norm": 0.13645358383655548, "learning_rate": 4.790634412755575e-06, "loss": 0.002, "step": 103040 }, { "epoch": 1.6861654258365377, "grad_norm": 0.09810586273670197, "learning_rate": 4.78968331138734e-06, "loss": 0.0016, "step": 103050 }, { "epoch": 1.6863290517876135, "grad_norm": 0.19095927476882935, "learning_rate": 4.788732217642579e-06, "loss": 0.0019, "step": 103060 }, { "epoch": 1.6864926777386895, "grad_norm": 0.13595440983772278, "learning_rate": 4.787781131555768e-06, "loss": 0.0015, "step": 103070 }, { "epoch": 1.686656303689765, "grad_norm": 0.004306282382458448, "learning_rate": 4.786830053161381e-06, "loss": 0.0027, "step": 103080 }, { "epoch": 1.686819929640841, "grad_norm": 0.09551531076431274, "learning_rate": 4.785878982493893e-06, "loss": 0.0028, "step": 103090 }, { "epoch": 1.686983555591917, "grad_norm": 0.015349028632044792, "learning_rate": 4.784927919587775e-06, "loss": 0.0008, "step": 103100 }, { "epoch": 1.6871471815429926, "grad_norm": 0.16596737504005432, "learning_rate": 4.7839768644775066e-06, "loss": 0.0022, "step": 103110 }, { "epoch": 1.6873108074940686, "grad_norm": 0.058846212923526764, "learning_rate": 4.783025817197555e-06, "loss": 0.0023, "step": 103120 }, { "epoch": 1.6874744334451444, "grad_norm": 0.02900262549519539, "learning_rate": 4.7820747777823965e-06, "loss": 0.0018, "step": 103130 }, { "epoch": 1.6876380593962201, "grad_norm": 0.056173913180828094, "learning_rate": 4.781123746266504e-06, "loss": 0.0019, "step": 103140 }, { "epoch": 1.6878016853472961, "grad_norm": 0.05425018072128296, "learning_rate": 4.780172722684349e-06, "loss": 0.0014, "step": 103150 }, { "epoch": 1.687965311298372, "grad_norm": 0.08125453442335129, "learning_rate": 4.779221707070404e-06, "loss": 0.0017, "step": 103160 }, { "epoch": 1.6881289372494477, "grad_norm": 0.16048917174339294, "learning_rate": 4.778270699459141e-06, "loss": 0.0023, "step": 103170 }, { "epoch": 1.6882925632005237, "grad_norm": 0.11048179119825363, "learning_rate": 4.777319699885034e-06, "loss": 0.0016, "step": 103180 }, { "epoch": 1.6884561891515995, "grad_norm": 0.034016139805316925, "learning_rate": 4.776368708382552e-06, "loss": 0.0023, "step": 103190 }, { "epoch": 1.6886198151026752, "grad_norm": 0.020225824788212776, "learning_rate": 4.775417724986167e-06, "loss": 0.0011, "step": 103200 }, { "epoch": 1.6887834410537512, "grad_norm": 0.13484328985214233, "learning_rate": 4.774466749730351e-06, "loss": 0.0017, "step": 103210 }, { "epoch": 1.688947067004827, "grad_norm": 0.03486631065607071, "learning_rate": 4.773515782649572e-06, "loss": 0.0025, "step": 103220 }, { "epoch": 1.6891106929559028, "grad_norm": 0.0689317137002945, "learning_rate": 4.772564823778301e-06, "loss": 0.0016, "step": 103230 }, { "epoch": 1.6892743189069788, "grad_norm": 0.08258197456598282, "learning_rate": 4.771613873151009e-06, "loss": 0.002, "step": 103240 }, { "epoch": 1.6894379448580545, "grad_norm": 0.0489678680896759, "learning_rate": 4.7706629308021655e-06, "loss": 0.0018, "step": 103250 }, { "epoch": 1.6896015708091303, "grad_norm": 0.06331710517406464, "learning_rate": 4.769711996766239e-06, "loss": 0.001, "step": 103260 }, { "epoch": 1.6897651967602063, "grad_norm": 0.10209188610315323, "learning_rate": 4.768761071077699e-06, "loss": 0.001, "step": 103270 }, { "epoch": 1.6899288227112819, "grad_norm": 0.048321615904569626, "learning_rate": 4.7678101537710145e-06, "loss": 0.001, "step": 103280 }, { "epoch": 1.6900924486623579, "grad_norm": 0.12960122525691986, "learning_rate": 4.766859244880654e-06, "loss": 0.003, "step": 103290 }, { "epoch": 1.6902560746134339, "grad_norm": 0.05311160162091255, "learning_rate": 4.765908344441086e-06, "loss": 0.0015, "step": 103300 }, { "epoch": 1.6904197005645094, "grad_norm": 0.07875974476337433, "learning_rate": 4.764957452486778e-06, "loss": 0.0011, "step": 103310 }, { "epoch": 1.6905833265155854, "grad_norm": 0.06443706899881363, "learning_rate": 4.764006569052198e-06, "loss": 0.0012, "step": 103320 }, { "epoch": 1.6907469524666612, "grad_norm": 0.04119015485048294, "learning_rate": 4.763055694171811e-06, "loss": 0.0009, "step": 103330 }, { "epoch": 1.690910578417737, "grad_norm": 0.39334192872047424, "learning_rate": 4.762104827880086e-06, "loss": 0.0029, "step": 103340 }, { "epoch": 1.691074204368813, "grad_norm": 0.10048554092645645, "learning_rate": 4.76115397021149e-06, "loss": 0.0012, "step": 103350 }, { "epoch": 1.6912378303198887, "grad_norm": 0.06044529378414154, "learning_rate": 4.760203121200488e-06, "loss": 0.0009, "step": 103360 }, { "epoch": 1.6914014562709645, "grad_norm": 0.07305005192756653, "learning_rate": 4.759252280881545e-06, "loss": 0.0022, "step": 103370 }, { "epoch": 1.6915650822220405, "grad_norm": 0.28686127066612244, "learning_rate": 4.75830144928913e-06, "loss": 0.0021, "step": 103380 }, { "epoch": 1.6917287081731163, "grad_norm": 0.10054086893796921, "learning_rate": 4.757350626457706e-06, "loss": 0.0022, "step": 103390 }, { "epoch": 1.691892334124192, "grad_norm": 0.1459931582212448, "learning_rate": 4.756399812421739e-06, "loss": 0.0017, "step": 103400 }, { "epoch": 1.692055960075268, "grad_norm": 0.06654481589794159, "learning_rate": 4.755449007215695e-06, "loss": 0.0008, "step": 103410 }, { "epoch": 1.6922195860263438, "grad_norm": 0.10007412731647491, "learning_rate": 4.754498210874035e-06, "loss": 0.0022, "step": 103420 }, { "epoch": 1.6923832119774196, "grad_norm": 0.04054652899503708, "learning_rate": 4.753547423431224e-06, "loss": 0.003, "step": 103430 }, { "epoch": 1.6925468379284956, "grad_norm": 0.15338733792304993, "learning_rate": 4.752596644921727e-06, "loss": 0.0015, "step": 103440 }, { "epoch": 1.6927104638795711, "grad_norm": 0.008783353492617607, "learning_rate": 4.751645875380007e-06, "loss": 0.0012, "step": 103450 }, { "epoch": 1.6928740898306471, "grad_norm": 0.04697680473327637, "learning_rate": 4.750695114840528e-06, "loss": 0.0012, "step": 103460 }, { "epoch": 1.6930377157817231, "grad_norm": 0.03068576194345951, "learning_rate": 4.749744363337751e-06, "loss": 0.0012, "step": 103470 }, { "epoch": 1.6932013417327987, "grad_norm": 0.11015503108501434, "learning_rate": 4.74879362090614e-06, "loss": 0.002, "step": 103480 }, { "epoch": 1.6933649676838747, "grad_norm": 0.1084451824426651, "learning_rate": 4.747842887580156e-06, "loss": 0.0019, "step": 103490 }, { "epoch": 1.6935285936349505, "grad_norm": 0.052836913615465164, "learning_rate": 4.746892163394261e-06, "loss": 0.001, "step": 103500 }, { "epoch": 1.6936922195860262, "grad_norm": 0.1374897062778473, "learning_rate": 4.745941448382918e-06, "loss": 0.0016, "step": 103510 }, { "epoch": 1.6938558455371022, "grad_norm": 0.19534315168857574, "learning_rate": 4.744990742580586e-06, "loss": 0.0013, "step": 103520 }, { "epoch": 1.694019471488178, "grad_norm": 0.007172324229031801, "learning_rate": 4.744040046021728e-06, "loss": 0.0008, "step": 103530 }, { "epoch": 1.6941830974392538, "grad_norm": 0.04822874441742897, "learning_rate": 4.743089358740801e-06, "loss": 0.0014, "step": 103540 }, { "epoch": 1.6943467233903298, "grad_norm": 0.0776359960436821, "learning_rate": 4.742138680772268e-06, "loss": 0.0009, "step": 103550 }, { "epoch": 1.6945103493414055, "grad_norm": 0.021961791440844536, "learning_rate": 4.741188012150588e-06, "loss": 0.0011, "step": 103560 }, { "epoch": 1.6946739752924813, "grad_norm": 0.08490357547998428, "learning_rate": 4.740237352910221e-06, "loss": 0.0022, "step": 103570 }, { "epoch": 1.6948376012435573, "grad_norm": 0.08773563802242279, "learning_rate": 4.739286703085625e-06, "loss": 0.0021, "step": 103580 }, { "epoch": 1.695001227194633, "grad_norm": 0.11180084943771362, "learning_rate": 4.738336062711259e-06, "loss": 0.0029, "step": 103590 }, { "epoch": 1.6951648531457089, "grad_norm": 0.22186006605625153, "learning_rate": 4.737385431821583e-06, "loss": 0.0021, "step": 103600 }, { "epoch": 1.6953284790967849, "grad_norm": 0.07085279375314713, "learning_rate": 4.736434810451055e-06, "loss": 0.0025, "step": 103610 }, { "epoch": 1.6954921050478606, "grad_norm": 0.04977083578705788, "learning_rate": 4.735484198634131e-06, "loss": 0.0012, "step": 103620 }, { "epoch": 1.6956557309989364, "grad_norm": 0.08676992356777191, "learning_rate": 4.734533596405268e-06, "loss": 0.0017, "step": 103630 }, { "epoch": 1.6958193569500124, "grad_norm": 0.16149508953094482, "learning_rate": 4.733583003798924e-06, "loss": 0.0013, "step": 103640 }, { "epoch": 1.695982982901088, "grad_norm": 0.022713854908943176, "learning_rate": 4.732632420849555e-06, "loss": 0.0015, "step": 103650 }, { "epoch": 1.696146608852164, "grad_norm": 0.1551360934972763, "learning_rate": 4.731681847591619e-06, "loss": 0.0024, "step": 103660 }, { "epoch": 1.69631023480324, "grad_norm": 0.0020840680226683617, "learning_rate": 4.730731284059572e-06, "loss": 0.0011, "step": 103670 }, { "epoch": 1.6964738607543155, "grad_norm": 0.22082167863845825, "learning_rate": 4.729780730287868e-06, "loss": 0.002, "step": 103680 }, { "epoch": 1.6966374867053915, "grad_norm": 0.05507946014404297, "learning_rate": 4.728830186310963e-06, "loss": 0.0014, "step": 103690 }, { "epoch": 1.6968011126564673, "grad_norm": 0.048816781491041183, "learning_rate": 4.7278796521633126e-06, "loss": 0.0021, "step": 103700 }, { "epoch": 1.696964738607543, "grad_norm": 0.08371330052614212, "learning_rate": 4.726929127879372e-06, "loss": 0.0019, "step": 103710 }, { "epoch": 1.697128364558619, "grad_norm": 0.029956074431538582, "learning_rate": 4.725978613493593e-06, "loss": 0.0021, "step": 103720 }, { "epoch": 1.6972919905096948, "grad_norm": 0.14128124713897705, "learning_rate": 4.72502810904043e-06, "loss": 0.0019, "step": 103730 }, { "epoch": 1.6974556164607706, "grad_norm": 0.15012232959270477, "learning_rate": 4.724077614554337e-06, "loss": 0.0032, "step": 103740 }, { "epoch": 1.6976192424118466, "grad_norm": 0.05193810537457466, "learning_rate": 4.723127130069768e-06, "loss": 0.0019, "step": 103750 }, { "epoch": 1.6977828683629224, "grad_norm": 0.11085796356201172, "learning_rate": 4.722176655621174e-06, "loss": 0.0016, "step": 103760 }, { "epoch": 1.6979464943139981, "grad_norm": 0.2096051573753357, "learning_rate": 4.72122619124301e-06, "loss": 0.0015, "step": 103770 }, { "epoch": 1.6981101202650741, "grad_norm": 0.03283541277050972, "learning_rate": 4.720275736969726e-06, "loss": 0.0015, "step": 103780 }, { "epoch": 1.69827374621615, "grad_norm": 0.01805386133491993, "learning_rate": 4.719325292835774e-06, "loss": 0.0009, "step": 103790 }, { "epoch": 1.6984373721672257, "grad_norm": 0.1322762668132782, "learning_rate": 4.718374858875606e-06, "loss": 0.0022, "step": 103800 }, { "epoch": 1.6986009981183017, "grad_norm": 0.4298863410949707, "learning_rate": 4.717424435123673e-06, "loss": 0.0026, "step": 103810 }, { "epoch": 1.6987646240693774, "grad_norm": 0.008327971212565899, "learning_rate": 4.716474021614425e-06, "loss": 0.0017, "step": 103820 }, { "epoch": 1.6989282500204532, "grad_norm": 0.031137490645051003, "learning_rate": 4.715523618382312e-06, "loss": 0.0008, "step": 103830 }, { "epoch": 1.6990918759715292, "grad_norm": 0.1273033618927002, "learning_rate": 4.714573225461783e-06, "loss": 0.0015, "step": 103840 }, { "epoch": 1.6992555019226048, "grad_norm": 0.10375382751226425, "learning_rate": 4.71362284288729e-06, "loss": 0.002, "step": 103850 }, { "epoch": 1.6994191278736808, "grad_norm": 0.016301585361361504, "learning_rate": 4.712672470693279e-06, "loss": 0.0009, "step": 103860 }, { "epoch": 1.6995827538247568, "grad_norm": 0.23067854344844818, "learning_rate": 4.7117221089142e-06, "loss": 0.0023, "step": 103870 }, { "epoch": 1.6997463797758323, "grad_norm": 0.3417958915233612, "learning_rate": 4.710771757584503e-06, "loss": 0.0016, "step": 103880 }, { "epoch": 1.6999100057269083, "grad_norm": 0.1388198435306549, "learning_rate": 4.7098214167386356e-06, "loss": 0.0018, "step": 103890 }, { "epoch": 1.700073631677984, "grad_norm": 0.11060667037963867, "learning_rate": 4.708871086411043e-06, "loss": 0.0013, "step": 103900 }, { "epoch": 1.7002372576290599, "grad_norm": 0.04264014959335327, "learning_rate": 4.707920766636176e-06, "loss": 0.0025, "step": 103910 }, { "epoch": 1.7004008835801359, "grad_norm": 0.06201804056763649, "learning_rate": 4.706970457448478e-06, "loss": 0.0006, "step": 103920 }, { "epoch": 1.7005645095312116, "grad_norm": 0.09567633271217346, "learning_rate": 4.706020158882395e-06, "loss": 0.0017, "step": 103930 }, { "epoch": 1.7007281354822874, "grad_norm": 0.02220143750309944, "learning_rate": 4.7050698709723754e-06, "loss": 0.0015, "step": 103940 }, { "epoch": 1.7008917614333634, "grad_norm": 0.1430577039718628, "learning_rate": 4.704119593752865e-06, "loss": 0.0043, "step": 103950 }, { "epoch": 1.7010553873844392, "grad_norm": 0.23501190543174744, "learning_rate": 4.703169327258307e-06, "loss": 0.002, "step": 103960 }, { "epoch": 1.701219013335515, "grad_norm": 0.0639282688498497, "learning_rate": 4.702219071523149e-06, "loss": 0.0022, "step": 103970 }, { "epoch": 1.701382639286591, "grad_norm": 0.10013594478368759, "learning_rate": 4.701268826581833e-06, "loss": 0.0014, "step": 103980 }, { "epoch": 1.7015462652376667, "grad_norm": 0.14735466241836548, "learning_rate": 4.700318592468804e-06, "loss": 0.0018, "step": 103990 }, { "epoch": 1.7017098911887425, "grad_norm": 0.04981229081749916, "learning_rate": 4.699368369218506e-06, "loss": 0.0008, "step": 104000 }, { "epoch": 1.7018735171398185, "grad_norm": 0.03957488387823105, "learning_rate": 4.698418156865383e-06, "loss": 0.0036, "step": 104010 }, { "epoch": 1.7020371430908943, "grad_norm": 0.017070377245545387, "learning_rate": 4.697467955443876e-06, "loss": 0.0017, "step": 104020 }, { "epoch": 1.70220076904197, "grad_norm": 0.005074461922049522, "learning_rate": 4.6965177649884285e-06, "loss": 0.0009, "step": 104030 }, { "epoch": 1.702364394993046, "grad_norm": 0.034012116491794586, "learning_rate": 4.695567585533483e-06, "loss": 0.0013, "step": 104040 }, { "epoch": 1.7025280209441216, "grad_norm": 0.055761151015758514, "learning_rate": 4.69461741711348e-06, "loss": 0.0016, "step": 104050 }, { "epoch": 1.7026916468951976, "grad_norm": 0.06119237095117569, "learning_rate": 4.693667259762863e-06, "loss": 0.0015, "step": 104060 }, { "epoch": 1.7028552728462736, "grad_norm": 0.18447674810886383, "learning_rate": 4.692717113516071e-06, "loss": 0.0006, "step": 104070 }, { "epoch": 1.7030188987973491, "grad_norm": 0.1918679177761078, "learning_rate": 4.691766978407544e-06, "loss": 0.0027, "step": 104080 }, { "epoch": 1.7031825247484251, "grad_norm": 0.12115088105201721, "learning_rate": 4.690816854471723e-06, "loss": 0.0017, "step": 104090 }, { "epoch": 1.703346150699501, "grad_norm": 0.010279589332640171, "learning_rate": 4.689866741743049e-06, "loss": 0.001, "step": 104100 }, { "epoch": 1.7035097766505767, "grad_norm": 0.031147800385951996, "learning_rate": 4.688916640255962e-06, "loss": 0.0013, "step": 104110 }, { "epoch": 1.7036734026016527, "grad_norm": 0.1230681762099266, "learning_rate": 4.687966550044898e-06, "loss": 0.0019, "step": 104120 }, { "epoch": 1.7038370285527284, "grad_norm": 0.17274823784828186, "learning_rate": 4.687016471144296e-06, "loss": 0.001, "step": 104130 }, { "epoch": 1.7040006545038042, "grad_norm": 0.04568912833929062, "learning_rate": 4.686066403588594e-06, "loss": 0.0015, "step": 104140 }, { "epoch": 1.7041642804548802, "grad_norm": 0.008773451671004295, "learning_rate": 4.68511634741223e-06, "loss": 0.001, "step": 104150 }, { "epoch": 1.704327906405956, "grad_norm": 0.11072470247745514, "learning_rate": 4.684166302649643e-06, "loss": 0.0026, "step": 104160 }, { "epoch": 1.7044915323570318, "grad_norm": 0.049063943326473236, "learning_rate": 4.6832162693352674e-06, "loss": 0.0018, "step": 104170 }, { "epoch": 1.7046551583081078, "grad_norm": 0.018669793382287025, "learning_rate": 4.682266247503542e-06, "loss": 0.0011, "step": 104180 }, { "epoch": 1.7048187842591835, "grad_norm": 0.09930496662855148, "learning_rate": 4.6813162371889e-06, "loss": 0.001, "step": 104190 }, { "epoch": 1.7049824102102593, "grad_norm": 0.044762734323740005, "learning_rate": 4.6803662384257796e-06, "loss": 0.0015, "step": 104200 }, { "epoch": 1.7051460361613353, "grad_norm": 0.017309872433543205, "learning_rate": 4.679416251248615e-06, "loss": 0.0019, "step": 104210 }, { "epoch": 1.7053096621124109, "grad_norm": 0.04318152368068695, "learning_rate": 4.67846627569184e-06, "loss": 0.0022, "step": 104220 }, { "epoch": 1.7054732880634869, "grad_norm": 0.05442947894334793, "learning_rate": 4.6775163117898905e-06, "loss": 0.0005, "step": 104230 }, { "epoch": 1.7056369140145629, "grad_norm": 0.04039434716105461, "learning_rate": 4.6765663595771995e-06, "loss": 0.0011, "step": 104240 }, { "epoch": 1.7058005399656384, "grad_norm": 0.051713377237319946, "learning_rate": 4.6756164190882e-06, "loss": 0.0026, "step": 104250 }, { "epoch": 1.7059641659167144, "grad_norm": 0.06726450473070145, "learning_rate": 4.674666490357326e-06, "loss": 0.0019, "step": 104260 }, { "epoch": 1.7061277918677902, "grad_norm": 0.01772114261984825, "learning_rate": 4.673716573419009e-06, "loss": 0.0016, "step": 104270 }, { "epoch": 1.706291417818866, "grad_norm": 0.07602676749229431, "learning_rate": 4.672766668307682e-06, "loss": 0.0016, "step": 104280 }, { "epoch": 1.706455043769942, "grad_norm": 0.0698457732796669, "learning_rate": 4.671816775057779e-06, "loss": 0.0019, "step": 104290 }, { "epoch": 1.7066186697210177, "grad_norm": 0.03086455538868904, "learning_rate": 4.670866893703727e-06, "loss": 0.0017, "step": 104300 }, { "epoch": 1.7067822956720935, "grad_norm": 0.09401411563158035, "learning_rate": 4.66991702427996e-06, "loss": 0.0025, "step": 104310 }, { "epoch": 1.7069459216231695, "grad_norm": 0.023286331444978714, "learning_rate": 4.668967166820908e-06, "loss": 0.0023, "step": 104320 }, { "epoch": 1.7071095475742453, "grad_norm": 0.08056782186031342, "learning_rate": 4.668017321361e-06, "loss": 0.0011, "step": 104330 }, { "epoch": 1.707273173525321, "grad_norm": 0.03559701517224312, "learning_rate": 4.667067487934665e-06, "loss": 0.0014, "step": 104340 }, { "epoch": 1.707436799476397, "grad_norm": 0.0799616128206253, "learning_rate": 4.666117666576335e-06, "loss": 0.0014, "step": 104350 }, { "epoch": 1.7076004254274728, "grad_norm": 0.08515532314777374, "learning_rate": 4.665167857320436e-06, "loss": 0.0013, "step": 104360 }, { "epoch": 1.7077640513785486, "grad_norm": 0.05431477725505829, "learning_rate": 4.664218060201397e-06, "loss": 0.0011, "step": 104370 }, { "epoch": 1.7079276773296246, "grad_norm": 0.10838386416435242, "learning_rate": 4.6632682752536475e-06, "loss": 0.0022, "step": 104380 }, { "epoch": 1.7080913032807004, "grad_norm": 0.26230278611183167, "learning_rate": 4.662318502511614e-06, "loss": 0.0014, "step": 104390 }, { "epoch": 1.7082549292317761, "grad_norm": 0.19241011142730713, "learning_rate": 4.661368742009722e-06, "loss": 0.002, "step": 104400 }, { "epoch": 1.7084185551828521, "grad_norm": 0.06803303211927414, "learning_rate": 4.660418993782402e-06, "loss": 0.0013, "step": 104410 }, { "epoch": 1.7085821811339277, "grad_norm": 0.04411003366112709, "learning_rate": 4.659469257864074e-06, "loss": 0.0018, "step": 104420 }, { "epoch": 1.7087458070850037, "grad_norm": 0.034814633429050446, "learning_rate": 4.658519534289168e-06, "loss": 0.0014, "step": 104430 }, { "epoch": 1.7089094330360797, "grad_norm": 0.06919162720441818, "learning_rate": 4.6575698230921085e-06, "loss": 0.0013, "step": 104440 }, { "epoch": 1.7090730589871552, "grad_norm": 0.11741115152835846, "learning_rate": 4.65662012430732e-06, "loss": 0.0018, "step": 104450 }, { "epoch": 1.7092366849382312, "grad_norm": 0.05263363942503929, "learning_rate": 4.6556704379692255e-06, "loss": 0.0008, "step": 104460 }, { "epoch": 1.709400310889307, "grad_norm": 0.051880110055208206, "learning_rate": 4.654720764112251e-06, "loss": 0.0014, "step": 104470 }, { "epoch": 1.7095639368403828, "grad_norm": 0.07953453063964844, "learning_rate": 4.653771102770819e-06, "loss": 0.0021, "step": 104480 }, { "epoch": 1.7097275627914588, "grad_norm": 0.19696493446826935, "learning_rate": 4.6528214539793526e-06, "loss": 0.0022, "step": 104490 }, { "epoch": 1.7098911887425345, "grad_norm": 0.028127476572990417, "learning_rate": 4.651871817772273e-06, "loss": 0.0011, "step": 104500 }, { "epoch": 1.7100548146936103, "grad_norm": 0.07425235956907272, "learning_rate": 4.650922194184006e-06, "loss": 0.0011, "step": 104510 }, { "epoch": 1.7102184406446863, "grad_norm": 0.11308459937572479, "learning_rate": 4.649972583248968e-06, "loss": 0.0016, "step": 104520 }, { "epoch": 1.710382066595762, "grad_norm": 0.12562865018844604, "learning_rate": 4.649022985001584e-06, "loss": 0.0018, "step": 104530 }, { "epoch": 1.7105456925468379, "grad_norm": 0.068203866481781, "learning_rate": 4.6480733994762716e-06, "loss": 0.001, "step": 104540 }, { "epoch": 1.7107093184979139, "grad_norm": 0.18430009484291077, "learning_rate": 4.647123826707453e-06, "loss": 0.0012, "step": 104550 }, { "epoch": 1.7108729444489896, "grad_norm": 0.05046533793210983, "learning_rate": 4.646174266729549e-06, "loss": 0.0014, "step": 104560 }, { "epoch": 1.7110365704000654, "grad_norm": 0.19980613887310028, "learning_rate": 4.6452247195769755e-06, "loss": 0.0018, "step": 104570 }, { "epoch": 1.7112001963511414, "grad_norm": 0.10487794131040573, "learning_rate": 4.644275185284153e-06, "loss": 0.0013, "step": 104580 }, { "epoch": 1.7113638223022172, "grad_norm": 0.06061362847685814, "learning_rate": 4.6433256638855015e-06, "loss": 0.0012, "step": 104590 }, { "epoch": 1.711527448253293, "grad_norm": 0.18957191705703735, "learning_rate": 4.6423761554154364e-06, "loss": 0.0023, "step": 104600 }, { "epoch": 1.711691074204369, "grad_norm": 0.09567522257566452, "learning_rate": 4.641426659908379e-06, "loss": 0.0013, "step": 104610 }, { "epoch": 1.7118547001554445, "grad_norm": 0.07723551988601685, "learning_rate": 4.640477177398742e-06, "loss": 0.0014, "step": 104620 }, { "epoch": 1.7120183261065205, "grad_norm": 0.09386233985424042, "learning_rate": 4.6395277079209425e-06, "loss": 0.0022, "step": 104630 }, { "epoch": 1.7121819520575965, "grad_norm": 0.06522838026285172, "learning_rate": 4.638578251509395e-06, "loss": 0.0011, "step": 104640 }, { "epoch": 1.712345578008672, "grad_norm": 0.028610533103346825, "learning_rate": 4.63762880819852e-06, "loss": 0.0027, "step": 104650 }, { "epoch": 1.712509203959748, "grad_norm": 0.17405836284160614, "learning_rate": 4.63667937802273e-06, "loss": 0.0017, "step": 104660 }, { "epoch": 1.7126728299108238, "grad_norm": 0.06394306570291519, "learning_rate": 4.635729961016438e-06, "loss": 0.0019, "step": 104670 }, { "epoch": 1.7128364558618996, "grad_norm": 0.043279122561216354, "learning_rate": 4.634780557214059e-06, "loss": 0.0017, "step": 104680 }, { "epoch": 1.7130000818129756, "grad_norm": 0.15762358903884888, "learning_rate": 4.633831166650008e-06, "loss": 0.0019, "step": 104690 }, { "epoch": 1.7131637077640514, "grad_norm": 0.09532841295003891, "learning_rate": 4.632881789358697e-06, "loss": 0.0013, "step": 104700 }, { "epoch": 1.7133273337151271, "grad_norm": 0.24289748072624207, "learning_rate": 4.63193242537454e-06, "loss": 0.0017, "step": 104710 }, { "epoch": 1.7134909596662031, "grad_norm": 0.020615093410015106, "learning_rate": 4.630983074731947e-06, "loss": 0.002, "step": 104720 }, { "epoch": 1.713654585617279, "grad_norm": 0.035493504256010056, "learning_rate": 4.630033737465331e-06, "loss": 0.0008, "step": 104730 }, { "epoch": 1.7138182115683547, "grad_norm": 0.15942108631134033, "learning_rate": 4.629084413609102e-06, "loss": 0.0014, "step": 104740 }, { "epoch": 1.7139818375194307, "grad_norm": 0.031997308135032654, "learning_rate": 4.628135103197671e-06, "loss": 0.0017, "step": 104750 }, { "epoch": 1.7141454634705064, "grad_norm": 0.11948850750923157, "learning_rate": 4.62718580626545e-06, "loss": 0.0012, "step": 104760 }, { "epoch": 1.7143090894215822, "grad_norm": 0.2451469451189041, "learning_rate": 4.626236522846847e-06, "loss": 0.0019, "step": 104770 }, { "epoch": 1.7144727153726582, "grad_norm": 0.04973438009619713, "learning_rate": 4.625287252976272e-06, "loss": 0.0022, "step": 104780 }, { "epoch": 1.714636341323734, "grad_norm": 0.1627313792705536, "learning_rate": 4.624337996688133e-06, "loss": 0.0012, "step": 104790 }, { "epoch": 1.7147999672748098, "grad_norm": 0.06593550741672516, "learning_rate": 4.6233887540168375e-06, "loss": 0.0011, "step": 104800 }, { "epoch": 1.7149635932258858, "grad_norm": 0.09836692363023758, "learning_rate": 4.622439524996798e-06, "loss": 0.002, "step": 104810 }, { "epoch": 1.7151272191769613, "grad_norm": 0.11044531315565109, "learning_rate": 4.621490309662417e-06, "loss": 0.0016, "step": 104820 }, { "epoch": 1.7152908451280373, "grad_norm": 0.11051111668348312, "learning_rate": 4.620541108048102e-06, "loss": 0.001, "step": 104830 }, { "epoch": 1.7154544710791133, "grad_norm": 0.013667666353285313, "learning_rate": 4.619591920188259e-06, "loss": 0.0034, "step": 104840 }, { "epoch": 1.7156180970301889, "grad_norm": 0.13319715857505798, "learning_rate": 4.618642746117295e-06, "loss": 0.0017, "step": 104850 }, { "epoch": 1.7157817229812649, "grad_norm": 0.06539462506771088, "learning_rate": 4.617693585869615e-06, "loss": 0.0014, "step": 104860 }, { "epoch": 1.7159453489323406, "grad_norm": 0.08072780072689056, "learning_rate": 4.616744439479623e-06, "loss": 0.0015, "step": 104870 }, { "epoch": 1.7161089748834164, "grad_norm": 0.13563817739486694, "learning_rate": 4.615795306981725e-06, "loss": 0.0013, "step": 104880 }, { "epoch": 1.7162726008344924, "grad_norm": 0.01548470463603735, "learning_rate": 4.614846188410324e-06, "loss": 0.0018, "step": 104890 }, { "epoch": 1.7164362267855682, "grad_norm": 0.08023739606142044, "learning_rate": 4.613897083799822e-06, "loss": 0.0022, "step": 104900 }, { "epoch": 1.716599852736644, "grad_norm": 0.07746725529432297, "learning_rate": 4.6129479931846255e-06, "loss": 0.0014, "step": 104910 }, { "epoch": 1.71676347868772, "grad_norm": 0.008192102424800396, "learning_rate": 4.61199891659913e-06, "loss": 0.0015, "step": 104920 }, { "epoch": 1.7169271046387957, "grad_norm": 0.03445400297641754, "learning_rate": 4.611049854077743e-06, "loss": 0.0009, "step": 104930 }, { "epoch": 1.7170907305898715, "grad_norm": 0.2858472764492035, "learning_rate": 4.6101008056548644e-06, "loss": 0.0021, "step": 104940 }, { "epoch": 1.7172543565409475, "grad_norm": 0.045578714460134506, "learning_rate": 4.609151771364894e-06, "loss": 0.0004, "step": 104950 }, { "epoch": 1.7174179824920233, "grad_norm": 0.2471114993095398, "learning_rate": 4.6082027512422325e-06, "loss": 0.0018, "step": 104960 }, { "epoch": 1.717581608443099, "grad_norm": 0.054288070648908615, "learning_rate": 4.607253745321279e-06, "loss": 0.0012, "step": 104970 }, { "epoch": 1.717745234394175, "grad_norm": 0.10607331991195679, "learning_rate": 4.606304753636434e-06, "loss": 0.0011, "step": 104980 }, { "epoch": 1.7179088603452508, "grad_norm": 0.04377147927880287, "learning_rate": 4.605355776222095e-06, "loss": 0.001, "step": 104990 }, { "epoch": 1.7180724862963266, "grad_norm": 0.07112236320972443, "learning_rate": 4.604406813112661e-06, "loss": 0.0024, "step": 105000 }, { "epoch": 1.7182361122474026, "grad_norm": 0.05970732867717743, "learning_rate": 4.60345786434253e-06, "loss": 0.0019, "step": 105010 }, { "epoch": 1.7183997381984781, "grad_norm": 0.17530439794063568, "learning_rate": 4.602508929946098e-06, "loss": 0.0031, "step": 105020 }, { "epoch": 1.7185633641495541, "grad_norm": 0.11928589642047882, "learning_rate": 4.6015600099577615e-06, "loss": 0.0012, "step": 105030 }, { "epoch": 1.7187269901006301, "grad_norm": 0.06392933428287506, "learning_rate": 4.600611104411917e-06, "loss": 0.0011, "step": 105040 }, { "epoch": 1.7188906160517057, "grad_norm": 0.2600875794887543, "learning_rate": 4.5996622133429605e-06, "loss": 0.0021, "step": 105050 }, { "epoch": 1.7190542420027817, "grad_norm": 0.0062109315767884254, "learning_rate": 4.598713336785286e-06, "loss": 0.0019, "step": 105060 }, { "epoch": 1.7192178679538574, "grad_norm": 0.10749581456184387, "learning_rate": 4.5977644747732885e-06, "loss": 0.0022, "step": 105070 }, { "epoch": 1.7193814939049332, "grad_norm": 0.09237188845872879, "learning_rate": 4.596815627341361e-06, "loss": 0.0024, "step": 105080 }, { "epoch": 1.7195451198560092, "grad_norm": 0.1016964465379715, "learning_rate": 4.595866794523899e-06, "loss": 0.0016, "step": 105090 }, { "epoch": 1.719708745807085, "grad_norm": 0.016306094825267792, "learning_rate": 4.5949179763552946e-06, "loss": 0.0037, "step": 105100 }, { "epoch": 1.7198723717581608, "grad_norm": 0.08354363590478897, "learning_rate": 4.59396917286994e-06, "loss": 0.0025, "step": 105110 }, { "epoch": 1.7200359977092368, "grad_norm": 0.04717186838388443, "learning_rate": 4.593020384102229e-06, "loss": 0.0012, "step": 105120 }, { "epoch": 1.7201996236603125, "grad_norm": 0.004931902978569269, "learning_rate": 4.59207161008655e-06, "loss": 0.0019, "step": 105130 }, { "epoch": 1.7203632496113883, "grad_norm": 0.43936723470687866, "learning_rate": 4.591122850857292e-06, "loss": 0.0025, "step": 105140 }, { "epoch": 1.7205268755624643, "grad_norm": 0.42821720242500305, "learning_rate": 4.590174106448849e-06, "loss": 0.0034, "step": 105150 }, { "epoch": 1.72069050151354, "grad_norm": 0.1212688684463501, "learning_rate": 4.589225376895611e-06, "loss": 0.0015, "step": 105160 }, { "epoch": 1.7208541274646159, "grad_norm": 0.00839544553309679, "learning_rate": 4.588276662231965e-06, "loss": 0.0015, "step": 105170 }, { "epoch": 1.7210177534156919, "grad_norm": 0.06713610887527466, "learning_rate": 4.5873279624923e-06, "loss": 0.0016, "step": 105180 }, { "epoch": 1.7211813793667674, "grad_norm": 0.1132173091173172, "learning_rate": 4.586379277711005e-06, "loss": 0.0009, "step": 105190 }, { "epoch": 1.7213450053178434, "grad_norm": 0.08310399204492569, "learning_rate": 4.585430607922467e-06, "loss": 0.0019, "step": 105200 }, { "epoch": 1.7215086312689194, "grad_norm": 0.04769390448927879, "learning_rate": 4.5844819531610735e-06, "loss": 0.0021, "step": 105210 }, { "epoch": 1.721672257219995, "grad_norm": 0.029055854305624962, "learning_rate": 4.5835333134612106e-06, "loss": 0.0036, "step": 105220 }, { "epoch": 1.721835883171071, "grad_norm": 0.20655041933059692, "learning_rate": 4.582584688857265e-06, "loss": 0.0018, "step": 105230 }, { "epoch": 1.7219995091221467, "grad_norm": 0.12657411396503448, "learning_rate": 4.581636079383619e-06, "loss": 0.002, "step": 105240 }, { "epoch": 1.7221631350732225, "grad_norm": 0.06903214752674103, "learning_rate": 4.580687485074661e-06, "loss": 0.0011, "step": 105250 }, { "epoch": 1.7223267610242985, "grad_norm": 0.0782797634601593, "learning_rate": 4.579738905964773e-06, "loss": 0.0016, "step": 105260 }, { "epoch": 1.7224903869753743, "grad_norm": 0.07491712272167206, "learning_rate": 4.5787903420883405e-06, "loss": 0.0019, "step": 105270 }, { "epoch": 1.72265401292645, "grad_norm": 0.08151070773601532, "learning_rate": 4.577841793479745e-06, "loss": 0.0009, "step": 105280 }, { "epoch": 1.722817638877526, "grad_norm": 0.031249873340129852, "learning_rate": 4.57689326017337e-06, "loss": 0.0012, "step": 105290 }, { "epoch": 1.7229812648286018, "grad_norm": 0.12277843058109283, "learning_rate": 4.575944742203597e-06, "loss": 0.0011, "step": 105300 }, { "epoch": 1.7231448907796776, "grad_norm": 0.06664171814918518, "learning_rate": 4.574996239604808e-06, "loss": 0.0014, "step": 105310 }, { "epoch": 1.7233085167307536, "grad_norm": 0.12797382473945618, "learning_rate": 4.574047752411386e-06, "loss": 0.0021, "step": 105320 }, { "epoch": 1.7234721426818294, "grad_norm": 0.008203708566725254, "learning_rate": 4.573099280657708e-06, "loss": 0.0021, "step": 105330 }, { "epoch": 1.7236357686329051, "grad_norm": 0.047096915543079376, "learning_rate": 4.572150824378154e-06, "loss": 0.0034, "step": 105340 }, { "epoch": 1.7237993945839811, "grad_norm": 0.04287709295749664, "learning_rate": 4.571202383607105e-06, "loss": 0.0025, "step": 105350 }, { "epoch": 1.723963020535057, "grad_norm": 0.1158096045255661, "learning_rate": 4.570253958378937e-06, "loss": 0.0013, "step": 105360 }, { "epoch": 1.7241266464861327, "grad_norm": 0.08027780055999756, "learning_rate": 4.569305548728032e-06, "loss": 0.0011, "step": 105370 }, { "epoch": 1.7242902724372087, "grad_norm": 0.15397228300571442, "learning_rate": 4.568357154688766e-06, "loss": 0.0024, "step": 105380 }, { "epoch": 1.7244538983882842, "grad_norm": 0.04861374944448471, "learning_rate": 4.5674087762955156e-06, "loss": 0.0013, "step": 105390 }, { "epoch": 1.7246175243393602, "grad_norm": 0.03549257293343544, "learning_rate": 4.5664604135826576e-06, "loss": 0.0015, "step": 105400 }, { "epoch": 1.7247811502904362, "grad_norm": 0.0403401181101799, "learning_rate": 4.565512066584567e-06, "loss": 0.0019, "step": 105410 }, { "epoch": 1.7249447762415118, "grad_norm": 0.11930004507303238, "learning_rate": 4.564563735335621e-06, "loss": 0.0014, "step": 105420 }, { "epoch": 1.7251084021925878, "grad_norm": 0.037674449384212494, "learning_rate": 4.5636154198701925e-06, "loss": 0.0007, "step": 105430 }, { "epoch": 1.7252720281436635, "grad_norm": 0.04420173168182373, "learning_rate": 4.562667120222655e-06, "loss": 0.0016, "step": 105440 }, { "epoch": 1.7254356540947393, "grad_norm": 0.1562848538160324, "learning_rate": 4.561718836427384e-06, "loss": 0.0015, "step": 105450 }, { "epoch": 1.7255992800458153, "grad_norm": 0.10649880021810532, "learning_rate": 4.560770568518751e-06, "loss": 0.0008, "step": 105460 }, { "epoch": 1.725762905996891, "grad_norm": 0.043602969497442245, "learning_rate": 4.55982231653113e-06, "loss": 0.0014, "step": 105470 }, { "epoch": 1.7259265319479669, "grad_norm": 0.11116377264261246, "learning_rate": 4.558874080498893e-06, "loss": 0.0023, "step": 105480 }, { "epoch": 1.7260901578990429, "grad_norm": 0.2922625243663788, "learning_rate": 4.5579258604564095e-06, "loss": 0.0026, "step": 105490 }, { "epoch": 1.7262537838501186, "grad_norm": 0.07799088209867477, "learning_rate": 4.556977656438051e-06, "loss": 0.001, "step": 105500 }, { "epoch": 1.7264174098011944, "grad_norm": 0.0895681381225586, "learning_rate": 4.556029468478187e-06, "loss": 0.0023, "step": 105510 }, { "epoch": 1.7265810357522704, "grad_norm": 0.14441503584384918, "learning_rate": 4.55508129661119e-06, "loss": 0.0012, "step": 105520 }, { "epoch": 1.7267446617033462, "grad_norm": 0.06283687800168991, "learning_rate": 4.5541331408714255e-06, "loss": 0.0019, "step": 105530 }, { "epoch": 1.726908287654422, "grad_norm": 0.06532728672027588, "learning_rate": 4.553185001293262e-06, "loss": 0.0011, "step": 105540 }, { "epoch": 1.727071913605498, "grad_norm": 0.1469145119190216, "learning_rate": 4.552236877911069e-06, "loss": 0.0024, "step": 105550 }, { "epoch": 1.7272355395565737, "grad_norm": 0.08351492136716843, "learning_rate": 4.551288770759212e-06, "loss": 0.0008, "step": 105560 }, { "epoch": 1.7273991655076495, "grad_norm": 0.18137399852275848, "learning_rate": 4.550340679872059e-06, "loss": 0.0019, "step": 105570 }, { "epoch": 1.7275627914587255, "grad_norm": 0.22894568741321564, "learning_rate": 4.549392605283976e-06, "loss": 0.0015, "step": 105580 }, { "epoch": 1.727726417409801, "grad_norm": 0.05140344426035881, "learning_rate": 4.548444547029328e-06, "loss": 0.0012, "step": 105590 }, { "epoch": 1.727890043360877, "grad_norm": 0.3428930640220642, "learning_rate": 4.54749650514248e-06, "loss": 0.0018, "step": 105600 }, { "epoch": 1.728053669311953, "grad_norm": 0.12339276075363159, "learning_rate": 4.546548479657796e-06, "loss": 0.0021, "step": 105610 }, { "epoch": 1.7282172952630286, "grad_norm": 0.2664509117603302, "learning_rate": 4.545600470609643e-06, "loss": 0.0019, "step": 105620 }, { "epoch": 1.7283809212141046, "grad_norm": 0.0845964327454567, "learning_rate": 4.544652478032377e-06, "loss": 0.0015, "step": 105630 }, { "epoch": 1.7285445471651804, "grad_norm": 0.10369815677404404, "learning_rate": 4.543704501960365e-06, "loss": 0.0016, "step": 105640 }, { "epoch": 1.7287081731162561, "grad_norm": 0.07420734316110611, "learning_rate": 4.542756542427968e-06, "loss": 0.0011, "step": 105650 }, { "epoch": 1.7288717990673321, "grad_norm": 0.2443924844264984, "learning_rate": 4.5418085994695475e-06, "loss": 0.0013, "step": 105660 }, { "epoch": 1.729035425018408, "grad_norm": 0.0880240648984909, "learning_rate": 4.540860673119464e-06, "loss": 0.0015, "step": 105670 }, { "epoch": 1.7291990509694837, "grad_norm": 0.4116462767124176, "learning_rate": 4.5399127634120775e-06, "loss": 0.0022, "step": 105680 }, { "epoch": 1.7293626769205597, "grad_norm": 0.19862475991249084, "learning_rate": 4.538964870381748e-06, "loss": 0.0022, "step": 105690 }, { "epoch": 1.7295263028716354, "grad_norm": 0.1152542307972908, "learning_rate": 4.538016994062833e-06, "loss": 0.0012, "step": 105700 }, { "epoch": 1.7296899288227112, "grad_norm": 0.1057204082608223, "learning_rate": 4.537069134489693e-06, "loss": 0.0019, "step": 105710 }, { "epoch": 1.7298535547737872, "grad_norm": 0.09458913654088974, "learning_rate": 4.536121291696684e-06, "loss": 0.0017, "step": 105720 }, { "epoch": 1.730017180724863, "grad_norm": 0.07179639488458633, "learning_rate": 4.535173465718163e-06, "loss": 0.0017, "step": 105730 }, { "epoch": 1.7301808066759388, "grad_norm": 0.31437626481056213, "learning_rate": 4.534225656588486e-06, "loss": 0.0009, "step": 105740 }, { "epoch": 1.7303444326270148, "grad_norm": 0.0046170419082045555, "learning_rate": 4.53327786434201e-06, "loss": 0.0011, "step": 105750 }, { "epoch": 1.7305080585780905, "grad_norm": 0.019300058484077454, "learning_rate": 4.53233008901309e-06, "loss": 0.0019, "step": 105760 }, { "epoch": 1.7306716845291663, "grad_norm": 0.020240964367985725, "learning_rate": 4.53138233063608e-06, "loss": 0.0011, "step": 105770 }, { "epoch": 1.7308353104802423, "grad_norm": 0.07901404052972794, "learning_rate": 4.530434589245334e-06, "loss": 0.0015, "step": 105780 }, { "epoch": 1.7309989364313179, "grad_norm": 0.026835916563868523, "learning_rate": 4.529486864875205e-06, "loss": 0.0009, "step": 105790 }, { "epoch": 1.7311625623823939, "grad_norm": 0.031669583171606064, "learning_rate": 4.528539157560046e-06, "loss": 0.0009, "step": 105800 }, { "epoch": 1.7313261883334699, "grad_norm": 0.038425758481025696, "learning_rate": 4.5275914673342105e-06, "loss": 0.0013, "step": 105810 }, { "epoch": 1.7314898142845454, "grad_norm": 0.07629675418138504, "learning_rate": 4.526643794232051e-06, "loss": 0.0012, "step": 105820 }, { "epoch": 1.7316534402356214, "grad_norm": 0.07075434923171997, "learning_rate": 4.525696138287914e-06, "loss": 0.0021, "step": 105830 }, { "epoch": 1.7318170661866972, "grad_norm": 0.11859335005283356, "learning_rate": 4.524748499536152e-06, "loss": 0.0013, "step": 105840 }, { "epoch": 1.731980692137773, "grad_norm": 0.12713493406772614, "learning_rate": 4.523800878011114e-06, "loss": 0.0018, "step": 105850 }, { "epoch": 1.732144318088849, "grad_norm": 0.1701129525899887, "learning_rate": 4.5228532737471505e-06, "loss": 0.0014, "step": 105860 }, { "epoch": 1.7323079440399247, "grad_norm": 0.17722174525260925, "learning_rate": 4.521905686778609e-06, "loss": 0.0014, "step": 105870 }, { "epoch": 1.7324715699910005, "grad_norm": 0.0571046806871891, "learning_rate": 4.520958117139837e-06, "loss": 0.0015, "step": 105880 }, { "epoch": 1.7326351959420765, "grad_norm": 0.07202465087175369, "learning_rate": 4.520010564865182e-06, "loss": 0.0015, "step": 105890 }, { "epoch": 1.7327988218931523, "grad_norm": 0.03762871026992798, "learning_rate": 4.519063029988991e-06, "loss": 0.0018, "step": 105900 }, { "epoch": 1.732962447844228, "grad_norm": 0.0380859375, "learning_rate": 4.518115512545608e-06, "loss": 0.0014, "step": 105910 }, { "epoch": 1.733126073795304, "grad_norm": 0.270017147064209, "learning_rate": 4.517168012569382e-06, "loss": 0.0011, "step": 105920 }, { "epoch": 1.7332896997463798, "grad_norm": 0.08873353898525238, "learning_rate": 4.516220530094652e-06, "loss": 0.0013, "step": 105930 }, { "epoch": 1.7334533256974556, "grad_norm": 0.13098040223121643, "learning_rate": 4.5152730651557675e-06, "loss": 0.0016, "step": 105940 }, { "epoch": 1.7336169516485316, "grad_norm": 0.25841355323791504, "learning_rate": 4.514325617787068e-06, "loss": 0.0011, "step": 105950 }, { "epoch": 1.7337805775996074, "grad_norm": 0.06951875239610672, "learning_rate": 4.513378188022898e-06, "loss": 0.0018, "step": 105960 }, { "epoch": 1.7339442035506831, "grad_norm": 0.0480615571141243, "learning_rate": 4.512430775897599e-06, "loss": 0.0017, "step": 105970 }, { "epoch": 1.7341078295017591, "grad_norm": 0.08204066753387451, "learning_rate": 4.511483381445512e-06, "loss": 0.001, "step": 105980 }, { "epoch": 1.7342714554528347, "grad_norm": 0.06344208121299744, "learning_rate": 4.510536004700979e-06, "loss": 0.001, "step": 105990 }, { "epoch": 1.7344350814039107, "grad_norm": 0.033863600343465805, "learning_rate": 4.50958864569834e-06, "loss": 0.001, "step": 106000 }, { "epoch": 1.7345987073549864, "grad_norm": 0.0188735518604517, "learning_rate": 4.5086413044719314e-06, "loss": 0.0014, "step": 106010 }, { "epoch": 1.7347623333060622, "grad_norm": 0.0985097885131836, "learning_rate": 4.507693981056098e-06, "loss": 0.002, "step": 106020 }, { "epoch": 1.7349259592571382, "grad_norm": 0.005792176350951195, "learning_rate": 4.506746675485173e-06, "loss": 0.0016, "step": 106030 }, { "epoch": 1.735089585208214, "grad_norm": 0.2639869749546051, "learning_rate": 4.505799387793496e-06, "loss": 0.0011, "step": 106040 }, { "epoch": 1.7352532111592898, "grad_norm": 0.09465857595205307, "learning_rate": 4.5048521180154025e-06, "loss": 0.0022, "step": 106050 }, { "epoch": 1.7354168371103658, "grad_norm": 0.027954036369919777, "learning_rate": 4.50390486618523e-06, "loss": 0.0011, "step": 106060 }, { "epoch": 1.7355804630614415, "grad_norm": 0.20050562918186188, "learning_rate": 4.502957632337313e-06, "loss": 0.0015, "step": 106070 }, { "epoch": 1.7357440890125173, "grad_norm": 0.2531886398792267, "learning_rate": 4.502010416505988e-06, "loss": 0.0013, "step": 106080 }, { "epoch": 1.7359077149635933, "grad_norm": 0.0523017980158329, "learning_rate": 4.501063218725588e-06, "loss": 0.0013, "step": 106090 }, { "epoch": 1.736071340914669, "grad_norm": 0.02355448715388775, "learning_rate": 4.500116039030448e-06, "loss": 0.0008, "step": 106100 }, { "epoch": 1.7362349668657449, "grad_norm": 0.03599315136671066, "learning_rate": 4.4991688774549e-06, "loss": 0.0009, "step": 106110 }, { "epoch": 1.7363985928168209, "grad_norm": 0.056937552988529205, "learning_rate": 4.498221734033278e-06, "loss": 0.0011, "step": 106120 }, { "epoch": 1.7365622187678966, "grad_norm": 0.20131470263004303, "learning_rate": 4.4972746087999095e-06, "loss": 0.003, "step": 106130 }, { "epoch": 1.7367258447189724, "grad_norm": 0.1308128982782364, "learning_rate": 4.496327501789129e-06, "loss": 0.0011, "step": 106140 }, { "epoch": 1.7368894706700484, "grad_norm": 0.11488918960094452, "learning_rate": 4.4953804130352656e-06, "loss": 0.0012, "step": 106150 }, { "epoch": 1.737053096621124, "grad_norm": 0.06370162218809128, "learning_rate": 4.49443334257265e-06, "loss": 0.002, "step": 106160 }, { "epoch": 1.7372167225722, "grad_norm": 0.010542163625359535, "learning_rate": 4.493486290435611e-06, "loss": 0.0016, "step": 106170 }, { "epoch": 1.737380348523276, "grad_norm": 0.1069006696343422, "learning_rate": 4.492539256658476e-06, "loss": 0.001, "step": 106180 }, { "epoch": 1.7375439744743515, "grad_norm": 0.3381604850292206, "learning_rate": 4.491592241275573e-06, "loss": 0.0018, "step": 106190 }, { "epoch": 1.7377076004254275, "grad_norm": 0.036023881286382675, "learning_rate": 4.49064524432123e-06, "loss": 0.0014, "step": 106200 }, { "epoch": 1.7378712263765033, "grad_norm": 0.06754820048809052, "learning_rate": 4.4896982658297725e-06, "loss": 0.0008, "step": 106210 }, { "epoch": 1.738034852327579, "grad_norm": 0.20016489923000336, "learning_rate": 4.488751305835527e-06, "loss": 0.0027, "step": 106220 }, { "epoch": 1.738198478278655, "grad_norm": 0.05603393167257309, "learning_rate": 4.487804364372818e-06, "loss": 0.0009, "step": 106230 }, { "epoch": 1.7383621042297308, "grad_norm": 0.2260708063840866, "learning_rate": 4.4868574414759684e-06, "loss": 0.0036, "step": 106240 }, { "epoch": 1.7385257301808066, "grad_norm": 0.050879478454589844, "learning_rate": 4.4859105371793034e-06, "loss": 0.0012, "step": 106250 }, { "epoch": 1.7386893561318826, "grad_norm": 0.14517945051193237, "learning_rate": 4.4849636515171464e-06, "loss": 0.0018, "step": 106260 }, { "epoch": 1.7388529820829584, "grad_norm": 0.14041414856910706, "learning_rate": 4.484016784523819e-06, "loss": 0.0014, "step": 106270 }, { "epoch": 1.7390166080340341, "grad_norm": 0.1947072595357895, "learning_rate": 4.4830699362336436e-06, "loss": 0.0013, "step": 106280 }, { "epoch": 1.7391802339851101, "grad_norm": 0.25436705350875854, "learning_rate": 4.482123106680939e-06, "loss": 0.0014, "step": 106290 }, { "epoch": 1.739343859936186, "grad_norm": 0.231724813580513, "learning_rate": 4.481176295900028e-06, "loss": 0.0025, "step": 106300 }, { "epoch": 1.7395074858872617, "grad_norm": 0.053413812071084976, "learning_rate": 4.48022950392523e-06, "loss": 0.0009, "step": 106310 }, { "epoch": 1.7396711118383377, "grad_norm": 0.04543972760438919, "learning_rate": 4.479282730790864e-06, "loss": 0.0011, "step": 106320 }, { "epoch": 1.7398347377894134, "grad_norm": 0.05133870616555214, "learning_rate": 4.478335976531246e-06, "loss": 0.0012, "step": 106330 }, { "epoch": 1.7399983637404892, "grad_norm": 0.03417579457163811, "learning_rate": 4.477389241180696e-06, "loss": 0.0014, "step": 106340 }, { "epoch": 1.7401619896915652, "grad_norm": 0.09423999488353729, "learning_rate": 4.476442524773528e-06, "loss": 0.0017, "step": 106350 }, { "epoch": 1.7403256156426408, "grad_norm": 0.06281811743974686, "learning_rate": 4.475495827344061e-06, "loss": 0.0008, "step": 106360 }, { "epoch": 1.7404892415937168, "grad_norm": 0.07891196012496948, "learning_rate": 4.47454914892661e-06, "loss": 0.0006, "step": 106370 }, { "epoch": 1.7406528675447928, "grad_norm": 0.042788852006196976, "learning_rate": 4.47360248955549e-06, "loss": 0.0015, "step": 106380 }, { "epoch": 1.7408164934958683, "grad_norm": 0.14284440875053406, "learning_rate": 4.472655849265014e-06, "loss": 0.0019, "step": 106390 }, { "epoch": 1.7409801194469443, "grad_norm": 0.12304006516933441, "learning_rate": 4.4717092280894966e-06, "loss": 0.0013, "step": 106400 }, { "epoch": 1.74114374539802, "grad_norm": 0.14403140544891357, "learning_rate": 4.470762626063249e-06, "loss": 0.0018, "step": 106410 }, { "epoch": 1.7413073713490959, "grad_norm": 0.05266826972365379, "learning_rate": 4.469816043220586e-06, "loss": 0.0022, "step": 106420 }, { "epoch": 1.7414709973001719, "grad_norm": 0.12585389614105225, "learning_rate": 4.468869479595815e-06, "loss": 0.0022, "step": 106430 }, { "epoch": 1.7416346232512476, "grad_norm": 0.13879172503948212, "learning_rate": 4.467922935223251e-06, "loss": 0.0014, "step": 106440 }, { "epoch": 1.7417982492023234, "grad_norm": 0.043695759028196335, "learning_rate": 4.4669764101372e-06, "loss": 0.0017, "step": 106450 }, { "epoch": 1.7419618751533994, "grad_norm": 0.09431126713752747, "learning_rate": 4.466029904371974e-06, "loss": 0.002, "step": 106460 }, { "epoch": 1.7421255011044752, "grad_norm": 0.1239037811756134, "learning_rate": 4.46508341796188e-06, "loss": 0.002, "step": 106470 }, { "epoch": 1.742289127055551, "grad_norm": 0.13240762054920197, "learning_rate": 4.464136950941226e-06, "loss": 0.0016, "step": 106480 }, { "epoch": 1.742452753006627, "grad_norm": 0.03952857851982117, "learning_rate": 4.4631905033443204e-06, "loss": 0.0018, "step": 106490 }, { "epoch": 1.7426163789577027, "grad_norm": 0.2009357362985611, "learning_rate": 4.462244075205468e-06, "loss": 0.0021, "step": 106500 }, { "epoch": 1.7427800049087785, "grad_norm": 0.07433266937732697, "learning_rate": 4.4612976665589754e-06, "loss": 0.0008, "step": 106510 }, { "epoch": 1.7429436308598545, "grad_norm": 0.02268165908753872, "learning_rate": 4.460351277439151e-06, "loss": 0.0008, "step": 106520 }, { "epoch": 1.7431072568109303, "grad_norm": 0.017054162919521332, "learning_rate": 4.459404907880293e-06, "loss": 0.0016, "step": 106530 }, { "epoch": 1.743270882762006, "grad_norm": 0.10768765956163406, "learning_rate": 4.458458557916707e-06, "loss": 0.0018, "step": 106540 }, { "epoch": 1.743434508713082, "grad_norm": 0.10166916996240616, "learning_rate": 4.457512227582698e-06, "loss": 0.0005, "step": 106550 }, { "epoch": 1.7435981346641576, "grad_norm": 0.3123462200164795, "learning_rate": 4.456565916912567e-06, "loss": 0.0013, "step": 106560 }, { "epoch": 1.7437617606152336, "grad_norm": 0.03125937283039093, "learning_rate": 4.455619625940614e-06, "loss": 0.001, "step": 106570 }, { "epoch": 1.7439253865663096, "grad_norm": 0.2657507658004761, "learning_rate": 4.454673354701142e-06, "loss": 0.0026, "step": 106580 }, { "epoch": 1.7440890125173851, "grad_norm": 0.09327944368124008, "learning_rate": 4.45372710322845e-06, "loss": 0.0058, "step": 106590 }, { "epoch": 1.7442526384684611, "grad_norm": 0.09993879497051239, "learning_rate": 4.452780871556838e-06, "loss": 0.0015, "step": 106600 }, { "epoch": 1.744416264419537, "grad_norm": 0.09217282384634018, "learning_rate": 4.451834659720604e-06, "loss": 0.0016, "step": 106610 }, { "epoch": 1.7445798903706127, "grad_norm": 0.030844299122691154, "learning_rate": 4.450888467754048e-06, "loss": 0.0013, "step": 106620 }, { "epoch": 1.7447435163216887, "grad_norm": 0.023357996717095375, "learning_rate": 4.449942295691463e-06, "loss": 0.0011, "step": 106630 }, { "epoch": 1.7449071422727644, "grad_norm": 0.10645625740289688, "learning_rate": 4.448996143567148e-06, "loss": 0.0014, "step": 106640 }, { "epoch": 1.7450707682238402, "grad_norm": 0.15672515332698822, "learning_rate": 4.448050011415399e-06, "loss": 0.0025, "step": 106650 }, { "epoch": 1.7452343941749162, "grad_norm": 0.04402748495340347, "learning_rate": 4.44710389927051e-06, "loss": 0.0014, "step": 106660 }, { "epoch": 1.745398020125992, "grad_norm": 0.06813716143369675, "learning_rate": 4.446157807166776e-06, "loss": 0.0019, "step": 106670 }, { "epoch": 1.7455616460770678, "grad_norm": 0.08055794984102249, "learning_rate": 4.445211735138491e-06, "loss": 0.002, "step": 106680 }, { "epoch": 1.7457252720281438, "grad_norm": 0.14186498522758484, "learning_rate": 4.444265683219947e-06, "loss": 0.0018, "step": 106690 }, { "epoch": 1.7458888979792195, "grad_norm": 0.05113886669278145, "learning_rate": 4.443319651445436e-06, "loss": 0.0008, "step": 106700 }, { "epoch": 1.7460525239302953, "grad_norm": 0.12206219136714935, "learning_rate": 4.442373639849249e-06, "loss": 0.0016, "step": 106710 }, { "epoch": 1.7462161498813713, "grad_norm": 0.1278335154056549, "learning_rate": 4.441427648465679e-06, "loss": 0.0017, "step": 106720 }, { "epoch": 1.746379775832447, "grad_norm": 0.08471362292766571, "learning_rate": 4.4404816773290134e-06, "loss": 0.0017, "step": 106730 }, { "epoch": 1.7465434017835229, "grad_norm": 0.051166124641895294, "learning_rate": 4.439535726473542e-06, "loss": 0.0028, "step": 106740 }, { "epoch": 1.7467070277345988, "grad_norm": 0.41442620754241943, "learning_rate": 4.438589795933554e-06, "loss": 0.0018, "step": 106750 }, { "epoch": 1.7468706536856744, "grad_norm": 0.1367095708847046, "learning_rate": 4.437643885743335e-06, "loss": 0.0013, "step": 106760 }, { "epoch": 1.7470342796367504, "grad_norm": 0.0769708976149559, "learning_rate": 4.436697995937176e-06, "loss": 0.0015, "step": 106770 }, { "epoch": 1.7471979055878264, "grad_norm": 0.07013755291700363, "learning_rate": 4.435752126549358e-06, "loss": 0.0015, "step": 106780 }, { "epoch": 1.747361531538902, "grad_norm": 0.06018676608800888, "learning_rate": 4.43480627761417e-06, "loss": 0.002, "step": 106790 }, { "epoch": 1.747525157489978, "grad_norm": 0.16071408987045288, "learning_rate": 4.433860449165897e-06, "loss": 0.0015, "step": 106800 }, { "epoch": 1.7476887834410537, "grad_norm": 0.03976573795080185, "learning_rate": 4.432914641238822e-06, "loss": 0.0011, "step": 106810 }, { "epoch": 1.7478524093921295, "grad_norm": 0.04503847658634186, "learning_rate": 4.43196885386723e-06, "loss": 0.001, "step": 106820 }, { "epoch": 1.7480160353432055, "grad_norm": 0.06381500512361526, "learning_rate": 4.4310230870854e-06, "loss": 0.0015, "step": 106830 }, { "epoch": 1.7481796612942813, "grad_norm": 0.0932886004447937, "learning_rate": 4.430077340927615e-06, "loss": 0.0024, "step": 106840 }, { "epoch": 1.748343287245357, "grad_norm": 0.06423021852970123, "learning_rate": 4.429131615428156e-06, "loss": 0.0015, "step": 106850 }, { "epoch": 1.748506913196433, "grad_norm": 0.14009442925453186, "learning_rate": 4.428185910621306e-06, "loss": 0.0027, "step": 106860 }, { "epoch": 1.7486705391475088, "grad_norm": 0.28321385383605957, "learning_rate": 4.427240226541341e-06, "loss": 0.0013, "step": 106870 }, { "epoch": 1.7488341650985846, "grad_norm": 0.06612227112054825, "learning_rate": 4.426294563222542e-06, "loss": 0.0018, "step": 106880 }, { "epoch": 1.7489977910496606, "grad_norm": 0.0508941188454628, "learning_rate": 4.425348920699185e-06, "loss": 0.0008, "step": 106890 }, { "epoch": 1.7491614170007364, "grad_norm": 0.12185681611299515, "learning_rate": 4.42440329900555e-06, "loss": 0.0005, "step": 106900 }, { "epoch": 1.7493250429518121, "grad_norm": 0.16195951402187347, "learning_rate": 4.4234576981759115e-06, "loss": 0.0023, "step": 106910 }, { "epoch": 1.7494886689028881, "grad_norm": 0.059162188321352005, "learning_rate": 4.422512118244547e-06, "loss": 0.001, "step": 106920 }, { "epoch": 1.7496522948539637, "grad_norm": 0.25423464179039, "learning_rate": 4.421566559245728e-06, "loss": 0.0018, "step": 106930 }, { "epoch": 1.7498159208050397, "grad_norm": 0.2411469668149948, "learning_rate": 4.420621021213732e-06, "loss": 0.0018, "step": 106940 }, { "epoch": 1.7499795467561157, "grad_norm": 0.02898610197007656, "learning_rate": 4.419675504182832e-06, "loss": 0.0016, "step": 106950 }, { "epoch": 1.7501431727071912, "grad_norm": 0.07759073376655579, "learning_rate": 4.4187300081872995e-06, "loss": 0.0015, "step": 106960 }, { "epoch": 1.7503067986582672, "grad_norm": 0.1778598576784134, "learning_rate": 4.417784533261407e-06, "loss": 0.0017, "step": 106970 }, { "epoch": 1.750470424609343, "grad_norm": 0.07045693695545197, "learning_rate": 4.416839079439426e-06, "loss": 0.0017, "step": 106980 }, { "epoch": 1.7506340505604188, "grad_norm": 0.0670531764626503, "learning_rate": 4.415893646755627e-06, "loss": 0.0008, "step": 106990 }, { "epoch": 1.7507976765114948, "grad_norm": 0.0903734341263771, "learning_rate": 4.414948235244278e-06, "loss": 0.002, "step": 107000 }, { "epoch": 1.7509613024625705, "grad_norm": 0.05755678936839104, "learning_rate": 4.414002844939651e-06, "loss": 0.0015, "step": 107010 }, { "epoch": 1.7511249284136463, "grad_norm": 0.004980756435543299, "learning_rate": 4.413057475876012e-06, "loss": 0.0016, "step": 107020 }, { "epoch": 1.7512885543647223, "grad_norm": 0.09295650571584702, "learning_rate": 4.412112128087631e-06, "loss": 0.0014, "step": 107030 }, { "epoch": 1.751452180315798, "grad_norm": 0.025870684534311295, "learning_rate": 4.411166801608771e-06, "loss": 0.0016, "step": 107040 }, { "epoch": 1.7516158062668739, "grad_norm": 0.01784415915608406, "learning_rate": 4.410221496473698e-06, "loss": 0.0008, "step": 107050 }, { "epoch": 1.7517794322179499, "grad_norm": 0.0039959256537258625, "learning_rate": 4.409276212716678e-06, "loss": 0.0027, "step": 107060 }, { "epoch": 1.7519430581690256, "grad_norm": 0.125201016664505, "learning_rate": 4.408330950371976e-06, "loss": 0.0018, "step": 107070 }, { "epoch": 1.7521066841201014, "grad_norm": 0.09336341917514801, "learning_rate": 4.407385709473855e-06, "loss": 0.0021, "step": 107080 }, { "epoch": 1.7522703100711774, "grad_norm": 0.22945904731750488, "learning_rate": 4.406440490056578e-06, "loss": 0.0017, "step": 107090 }, { "epoch": 1.7524339360222532, "grad_norm": 0.023744212463498116, "learning_rate": 4.405495292154407e-06, "loss": 0.0012, "step": 107100 }, { "epoch": 1.752597561973329, "grad_norm": 0.04172080010175705, "learning_rate": 4.404550115801602e-06, "loss": 0.0013, "step": 107110 }, { "epoch": 1.752761187924405, "grad_norm": 0.09865215420722961, "learning_rate": 4.403604961032425e-06, "loss": 0.002, "step": 107120 }, { "epoch": 1.7529248138754805, "grad_norm": 0.0782988891005516, "learning_rate": 4.402659827881136e-06, "loss": 0.0014, "step": 107130 }, { "epoch": 1.7530884398265565, "grad_norm": 0.09502945840358734, "learning_rate": 4.401714716381991e-06, "loss": 0.0044, "step": 107140 }, { "epoch": 1.7532520657776325, "grad_norm": 0.05701727047562599, "learning_rate": 4.400769626569249e-06, "loss": 0.0017, "step": 107150 }, { "epoch": 1.753415691728708, "grad_norm": 0.1252390444278717, "learning_rate": 4.3998245584771685e-06, "loss": 0.0015, "step": 107160 }, { "epoch": 1.753579317679784, "grad_norm": 0.2256326526403427, "learning_rate": 4.398879512140005e-06, "loss": 0.0014, "step": 107170 }, { "epoch": 1.7537429436308598, "grad_norm": 0.29919615387916565, "learning_rate": 4.397934487592015e-06, "loss": 0.0014, "step": 107180 }, { "epoch": 1.7539065695819356, "grad_norm": 0.02892584167420864, "learning_rate": 4.396989484867452e-06, "loss": 0.0024, "step": 107190 }, { "epoch": 1.7540701955330116, "grad_norm": 0.013569826260209084, "learning_rate": 4.396044504000571e-06, "loss": 0.0008, "step": 107200 }, { "epoch": 1.7542338214840874, "grad_norm": 0.038387250155210495, "learning_rate": 4.395099545025626e-06, "loss": 0.0026, "step": 107210 }, { "epoch": 1.7543974474351631, "grad_norm": 0.3043919503688812, "learning_rate": 4.394154607976867e-06, "loss": 0.0024, "step": 107220 }, { "epoch": 1.7545610733862391, "grad_norm": 0.1005256325006485, "learning_rate": 4.39320969288855e-06, "loss": 0.0023, "step": 107230 }, { "epoch": 1.754724699337315, "grad_norm": 0.0739796981215477, "learning_rate": 4.392264799794923e-06, "loss": 0.002, "step": 107240 }, { "epoch": 1.7548883252883907, "grad_norm": 0.0268404521048069, "learning_rate": 4.391319928730235e-06, "loss": 0.0009, "step": 107250 }, { "epoch": 1.7550519512394667, "grad_norm": 0.05511759594082832, "learning_rate": 4.390375079728736e-06, "loss": 0.0023, "step": 107260 }, { "epoch": 1.7552155771905424, "grad_norm": 0.030853334814310074, "learning_rate": 4.389430252824675e-06, "loss": 0.0012, "step": 107270 }, { "epoch": 1.7553792031416182, "grad_norm": 0.3306729793548584, "learning_rate": 4.388485448052301e-06, "loss": 0.0022, "step": 107280 }, { "epoch": 1.7555428290926942, "grad_norm": 0.06889146566390991, "learning_rate": 4.387540665445859e-06, "loss": 0.002, "step": 107290 }, { "epoch": 1.75570645504377, "grad_norm": 0.0657404437661171, "learning_rate": 4.386595905039597e-06, "loss": 0.001, "step": 107300 }, { "epoch": 1.7558700809948458, "grad_norm": 0.10056816041469574, "learning_rate": 4.3856511668677585e-06, "loss": 0.0015, "step": 107310 }, { "epoch": 1.7560337069459218, "grad_norm": 0.16094477474689484, "learning_rate": 4.3847064509645884e-06, "loss": 0.0017, "step": 107320 }, { "epoch": 1.7561973328969973, "grad_norm": 0.06102989614009857, "learning_rate": 4.383761757364333e-06, "loss": 0.0013, "step": 107330 }, { "epoch": 1.7563609588480733, "grad_norm": 0.2393198162317276, "learning_rate": 4.38281708610123e-06, "loss": 0.0016, "step": 107340 }, { "epoch": 1.7565245847991493, "grad_norm": 0.03686806187033653, "learning_rate": 4.381872437209525e-06, "loss": 0.0007, "step": 107350 }, { "epoch": 1.7566882107502249, "grad_norm": 0.05576544627547264, "learning_rate": 4.380927810723458e-06, "loss": 0.0012, "step": 107360 }, { "epoch": 1.7568518367013009, "grad_norm": 0.10331454873085022, "learning_rate": 4.3799832066772705e-06, "loss": 0.0013, "step": 107370 }, { "epoch": 1.7570154626523766, "grad_norm": 0.10350741446018219, "learning_rate": 4.379038625105202e-06, "loss": 0.0022, "step": 107380 }, { "epoch": 1.7571790886034524, "grad_norm": 0.07576325535774231, "learning_rate": 4.378094066041491e-06, "loss": 0.0016, "step": 107390 }, { "epoch": 1.7573427145545284, "grad_norm": 0.08821357786655426, "learning_rate": 4.377149529520374e-06, "loss": 0.0011, "step": 107400 }, { "epoch": 1.7575063405056042, "grad_norm": 0.12737809121608734, "learning_rate": 4.376205015576091e-06, "loss": 0.0015, "step": 107410 }, { "epoch": 1.75766996645668, "grad_norm": 0.06821626424789429, "learning_rate": 4.375260524242876e-06, "loss": 0.0014, "step": 107420 }, { "epoch": 1.757833592407756, "grad_norm": 0.042156461626291275, "learning_rate": 4.374316055554967e-06, "loss": 0.0008, "step": 107430 }, { "epoch": 1.7579972183588317, "grad_norm": 0.26228514313697815, "learning_rate": 4.3733716095465965e-06, "loss": 0.0023, "step": 107440 }, { "epoch": 1.7581608443099075, "grad_norm": 0.03440668806433678, "learning_rate": 4.372427186251998e-06, "loss": 0.0014, "step": 107450 }, { "epoch": 1.7583244702609835, "grad_norm": 0.10256185382604599, "learning_rate": 4.371482785705407e-06, "loss": 0.002, "step": 107460 }, { "epoch": 1.7584880962120593, "grad_norm": 0.06891493499279022, "learning_rate": 4.370538407941054e-06, "loss": 0.0008, "step": 107470 }, { "epoch": 1.758651722163135, "grad_norm": 0.17362768948078156, "learning_rate": 4.36959405299317e-06, "loss": 0.0038, "step": 107480 }, { "epoch": 1.758815348114211, "grad_norm": 0.004728842992335558, "learning_rate": 4.368649720895987e-06, "loss": 0.0013, "step": 107490 }, { "epoch": 1.7589789740652868, "grad_norm": 0.0966043695807457, "learning_rate": 4.367705411683733e-06, "loss": 0.0023, "step": 107500 }, { "epoch": 1.7591426000163626, "grad_norm": 0.1799917370080948, "learning_rate": 4.3667611253906395e-06, "loss": 0.0009, "step": 107510 }, { "epoch": 1.7593062259674386, "grad_norm": 0.06199290230870247, "learning_rate": 4.365816862050933e-06, "loss": 0.0016, "step": 107520 }, { "epoch": 1.7594698519185141, "grad_norm": 0.005445132963359356, "learning_rate": 4.364872621698844e-06, "loss": 0.0014, "step": 107530 }, { "epoch": 1.7596334778695901, "grad_norm": 0.11272435635328293, "learning_rate": 4.363928404368592e-06, "loss": 0.0015, "step": 107540 }, { "epoch": 1.7597971038206661, "grad_norm": 0.09983003884553909, "learning_rate": 4.362984210094408e-06, "loss": 0.002, "step": 107550 }, { "epoch": 1.7599607297717417, "grad_norm": 0.09683094173669815, "learning_rate": 4.362040038910514e-06, "loss": 0.0018, "step": 107560 }, { "epoch": 1.7601243557228177, "grad_norm": 0.023452797904610634, "learning_rate": 4.361095890851137e-06, "loss": 0.0013, "step": 107570 }, { "epoch": 1.7602879816738934, "grad_norm": 0.012471520341932774, "learning_rate": 4.360151765950498e-06, "loss": 0.0016, "step": 107580 }, { "epoch": 1.7604516076249692, "grad_norm": 0.06669203191995621, "learning_rate": 4.3592076642428185e-06, "loss": 0.0012, "step": 107590 }, { "epoch": 1.7606152335760452, "grad_norm": 0.2633495032787323, "learning_rate": 4.358263585762323e-06, "loss": 0.0019, "step": 107600 }, { "epoch": 1.760778859527121, "grad_norm": 0.08729929476976395, "learning_rate": 4.357319530543228e-06, "loss": 0.0012, "step": 107610 }, { "epoch": 1.7609424854781968, "grad_norm": 0.20309489965438843, "learning_rate": 4.356375498619757e-06, "loss": 0.0014, "step": 107620 }, { "epoch": 1.7611061114292728, "grad_norm": 0.029592743143439293, "learning_rate": 4.355431490026127e-06, "loss": 0.0011, "step": 107630 }, { "epoch": 1.7612697373803485, "grad_norm": 0.16835619509220123, "learning_rate": 4.3544875047965565e-06, "loss": 0.0018, "step": 107640 }, { "epoch": 1.7614333633314243, "grad_norm": 0.20504416525363922, "learning_rate": 4.353543542965262e-06, "loss": 0.0028, "step": 107650 }, { "epoch": 1.7615969892825003, "grad_norm": 0.08531437814235687, "learning_rate": 4.352599604566461e-06, "loss": 0.0016, "step": 107660 }, { "epoch": 1.761760615233576, "grad_norm": 0.12506519258022308, "learning_rate": 4.351655689634367e-06, "loss": 0.0017, "step": 107670 }, { "epoch": 1.7619242411846519, "grad_norm": 0.06132843717932701, "learning_rate": 4.350711798203197e-06, "loss": 0.002, "step": 107680 }, { "epoch": 1.7620878671357278, "grad_norm": 0.19101710617542267, "learning_rate": 4.3497679303071636e-06, "loss": 0.001, "step": 107690 }, { "epoch": 1.7622514930868036, "grad_norm": 0.0844285637140274, "learning_rate": 4.34882408598048e-06, "loss": 0.0015, "step": 107700 }, { "epoch": 1.7624151190378794, "grad_norm": 0.12264546006917953, "learning_rate": 4.3478802652573585e-06, "loss": 0.0021, "step": 107710 }, { "epoch": 1.7625787449889554, "grad_norm": 0.1508146971464157, "learning_rate": 4.346936468172008e-06, "loss": 0.0016, "step": 107720 }, { "epoch": 1.762742370940031, "grad_norm": 0.04992277920246124, "learning_rate": 4.3459926947586454e-06, "loss": 0.0023, "step": 107730 }, { "epoch": 1.762905996891107, "grad_norm": 0.05946950986981392, "learning_rate": 4.345048945051472e-06, "loss": 0.0027, "step": 107740 }, { "epoch": 1.7630696228421827, "grad_norm": 0.02963419444859028, "learning_rate": 4.344105219084702e-06, "loss": 0.0013, "step": 107750 }, { "epoch": 1.7632332487932585, "grad_norm": 0.13194815814495087, "learning_rate": 4.34316151689254e-06, "loss": 0.001, "step": 107760 }, { "epoch": 1.7633968747443345, "grad_norm": 0.04725028946995735, "learning_rate": 4.342217838509195e-06, "loss": 0.0015, "step": 107770 }, { "epoch": 1.7635605006954103, "grad_norm": 0.09281488507986069, "learning_rate": 4.341274183968871e-06, "loss": 0.0011, "step": 107780 }, { "epoch": 1.763724126646486, "grad_norm": 0.07120165973901749, "learning_rate": 4.340330553305776e-06, "loss": 0.0016, "step": 107790 }, { "epoch": 1.763887752597562, "grad_norm": 0.07696498930454254, "learning_rate": 4.339386946554113e-06, "loss": 0.001, "step": 107800 }, { "epoch": 1.7640513785486378, "grad_norm": 0.07577105611562729, "learning_rate": 4.338443363748086e-06, "loss": 0.0012, "step": 107810 }, { "epoch": 1.7642150044997136, "grad_norm": 0.09016141295433044, "learning_rate": 4.337499804921896e-06, "loss": 0.0019, "step": 107820 }, { "epoch": 1.7643786304507896, "grad_norm": 0.04631270840764046, "learning_rate": 4.336556270109748e-06, "loss": 0.0011, "step": 107830 }, { "epoch": 1.7645422564018653, "grad_norm": 0.002991677960380912, "learning_rate": 4.335612759345838e-06, "loss": 0.0036, "step": 107840 }, { "epoch": 1.7647058823529411, "grad_norm": 0.286753386259079, "learning_rate": 4.33466927266437e-06, "loss": 0.0033, "step": 107850 }, { "epoch": 1.7648695083040171, "grad_norm": 0.1092371866106987, "learning_rate": 4.333725810099541e-06, "loss": 0.0034, "step": 107860 }, { "epoch": 1.765033134255093, "grad_norm": 0.022794928401708603, "learning_rate": 4.33278237168555e-06, "loss": 0.0007, "step": 107870 }, { "epoch": 1.7651967602061687, "grad_norm": 0.07644696533679962, "learning_rate": 4.331838957456595e-06, "loss": 0.0014, "step": 107880 }, { "epoch": 1.7653603861572447, "grad_norm": 0.08925812691450119, "learning_rate": 4.330895567446873e-06, "loss": 0.0011, "step": 107890 }, { "epoch": 1.7655240121083202, "grad_norm": 0.17215490341186523, "learning_rate": 4.3299522016905756e-06, "loss": 0.0016, "step": 107900 }, { "epoch": 1.7656876380593962, "grad_norm": 0.1430642157793045, "learning_rate": 4.329008860221903e-06, "loss": 0.0021, "step": 107910 }, { "epoch": 1.7658512640104722, "grad_norm": 0.09571950882673264, "learning_rate": 4.328065543075045e-06, "loss": 0.003, "step": 107920 }, { "epoch": 1.7660148899615478, "grad_norm": 0.07642622292041779, "learning_rate": 4.327122250284198e-06, "loss": 0.0018, "step": 107930 }, { "epoch": 1.7661785159126238, "grad_norm": 0.07825654000043869, "learning_rate": 4.326178981883551e-06, "loss": 0.0019, "step": 107940 }, { "epoch": 1.7663421418636995, "grad_norm": 0.008431176654994488, "learning_rate": 4.325235737907296e-06, "loss": 0.0017, "step": 107950 }, { "epoch": 1.7665057678147753, "grad_norm": 0.0462493970990181, "learning_rate": 4.3242925183896246e-06, "loss": 0.0017, "step": 107960 }, { "epoch": 1.7666693937658513, "grad_norm": 0.016918199136853218, "learning_rate": 4.323349323364725e-06, "loss": 0.0048, "step": 107970 }, { "epoch": 1.766833019716927, "grad_norm": 0.2721311151981354, "learning_rate": 4.322406152866786e-06, "loss": 0.0018, "step": 107980 }, { "epoch": 1.7669966456680029, "grad_norm": 0.07073357701301575, "learning_rate": 4.321463006929994e-06, "loss": 0.0014, "step": 107990 }, { "epoch": 1.7671602716190788, "grad_norm": 0.05635339021682739, "learning_rate": 4.320519885588539e-06, "loss": 0.0009, "step": 108000 }, { "epoch": 1.7673238975701546, "grad_norm": 0.1365121603012085, "learning_rate": 4.319576788876605e-06, "loss": 0.0029, "step": 108010 }, { "epoch": 1.7674875235212304, "grad_norm": 0.09400023519992828, "learning_rate": 4.318633716828376e-06, "loss": 0.0014, "step": 108020 }, { "epoch": 1.7676511494723064, "grad_norm": 0.09861313551664352, "learning_rate": 4.31769066947804e-06, "loss": 0.0018, "step": 108030 }, { "epoch": 1.7678147754233822, "grad_norm": 0.0662696436047554, "learning_rate": 4.316747646859775e-06, "loss": 0.0019, "step": 108040 }, { "epoch": 1.767978401374458, "grad_norm": 0.1007886677980423, "learning_rate": 4.315804649007765e-06, "loss": 0.0013, "step": 108050 }, { "epoch": 1.768142027325534, "grad_norm": 0.09016755223274231, "learning_rate": 4.314861675956193e-06, "loss": 0.001, "step": 108060 }, { "epoch": 1.7683056532766097, "grad_norm": 0.13750973343849182, "learning_rate": 4.313918727739239e-06, "loss": 0.0015, "step": 108070 }, { "epoch": 1.7684692792276855, "grad_norm": 0.06927937269210815, "learning_rate": 4.312975804391081e-06, "loss": 0.0018, "step": 108080 }, { "epoch": 1.7686329051787615, "grad_norm": 0.05693129077553749, "learning_rate": 4.3120329059459e-06, "loss": 0.0006, "step": 108090 }, { "epoch": 1.768796531129837, "grad_norm": 0.25312113761901855, "learning_rate": 4.3110900324378715e-06, "loss": 0.0014, "step": 108100 }, { "epoch": 1.768960157080913, "grad_norm": 0.04375087842345238, "learning_rate": 4.310147183901175e-06, "loss": 0.0017, "step": 108110 }, { "epoch": 1.769123783031989, "grad_norm": 0.07191846519708633, "learning_rate": 4.309204360369984e-06, "loss": 0.0016, "step": 108120 }, { "epoch": 1.7692874089830646, "grad_norm": 0.15051211416721344, "learning_rate": 4.308261561878476e-06, "loss": 0.0016, "step": 108130 }, { "epoch": 1.7694510349341406, "grad_norm": 0.06570404022932053, "learning_rate": 4.3073187884608226e-06, "loss": 0.0008, "step": 108140 }, { "epoch": 1.7696146608852164, "grad_norm": 0.18022041022777557, "learning_rate": 4.306376040151199e-06, "loss": 0.002, "step": 108150 }, { "epoch": 1.7697782868362921, "grad_norm": 0.10511868447065353, "learning_rate": 4.305433316983776e-06, "loss": 0.0016, "step": 108160 }, { "epoch": 1.7699419127873681, "grad_norm": 0.11730948835611343, "learning_rate": 4.304490618992726e-06, "loss": 0.0016, "step": 108170 }, { "epoch": 1.770105538738444, "grad_norm": 0.007314768619835377, "learning_rate": 4.3035479462122195e-06, "loss": 0.0011, "step": 108180 }, { "epoch": 1.7702691646895197, "grad_norm": 0.4316589832305908, "learning_rate": 4.302605298676427e-06, "loss": 0.0014, "step": 108190 }, { "epoch": 1.7704327906405957, "grad_norm": 0.03252164646983147, "learning_rate": 4.3016626764195155e-06, "loss": 0.001, "step": 108200 }, { "epoch": 1.7705964165916714, "grad_norm": 0.07453364133834839, "learning_rate": 4.300720079475653e-06, "loss": 0.001, "step": 108210 }, { "epoch": 1.7707600425427472, "grad_norm": 0.007671711500734091, "learning_rate": 4.299777507879007e-06, "loss": 0.0012, "step": 108220 }, { "epoch": 1.7709236684938232, "grad_norm": 0.1688695251941681, "learning_rate": 4.2988349616637455e-06, "loss": 0.002, "step": 108230 }, { "epoch": 1.771087294444899, "grad_norm": 0.02427785098552704, "learning_rate": 4.297892440864031e-06, "loss": 0.0018, "step": 108240 }, { "epoch": 1.7712509203959748, "grad_norm": 0.15369784832000732, "learning_rate": 4.296949945514027e-06, "loss": 0.0024, "step": 108250 }, { "epoch": 1.7714145463470508, "grad_norm": 0.06314118951559067, "learning_rate": 4.296007475647897e-06, "loss": 0.0012, "step": 108260 }, { "epoch": 1.7715781722981265, "grad_norm": 0.16762550175189972, "learning_rate": 4.2950650312998036e-06, "loss": 0.0017, "step": 108270 }, { "epoch": 1.7717417982492023, "grad_norm": 0.03174692392349243, "learning_rate": 4.2941226125039105e-06, "loss": 0.0011, "step": 108280 }, { "epoch": 1.7719054242002783, "grad_norm": 0.376834899187088, "learning_rate": 4.293180219294375e-06, "loss": 0.0023, "step": 108290 }, { "epoch": 1.7720690501513539, "grad_norm": 0.09652141481637955, "learning_rate": 4.292237851705358e-06, "loss": 0.0019, "step": 108300 }, { "epoch": 1.7722326761024298, "grad_norm": 0.15823204815387726, "learning_rate": 4.291295509771018e-06, "loss": 0.0019, "step": 108310 }, { "epoch": 1.7723963020535058, "grad_norm": 0.15063823759555817, "learning_rate": 4.290353193525512e-06, "loss": 0.0018, "step": 108320 }, { "epoch": 1.7725599280045814, "grad_norm": 0.08814850449562073, "learning_rate": 4.289410903002999e-06, "loss": 0.0013, "step": 108330 }, { "epoch": 1.7727235539556574, "grad_norm": 0.08576839417219162, "learning_rate": 4.288468638237631e-06, "loss": 0.0034, "step": 108340 }, { "epoch": 1.7728871799067332, "grad_norm": 0.08166787773370743, "learning_rate": 4.287526399263565e-06, "loss": 0.0013, "step": 108350 }, { "epoch": 1.773050805857809, "grad_norm": 0.11101599782705307, "learning_rate": 4.286584186114955e-06, "loss": 0.0014, "step": 108360 }, { "epoch": 1.773214431808885, "grad_norm": 0.039324697107076645, "learning_rate": 4.285641998825953e-06, "loss": 0.0009, "step": 108370 }, { "epoch": 1.7733780577599607, "grad_norm": 0.07791756838560104, "learning_rate": 4.284699837430713e-06, "loss": 0.0022, "step": 108380 }, { "epoch": 1.7735416837110365, "grad_norm": 0.16867142915725708, "learning_rate": 4.283757701963383e-06, "loss": 0.0013, "step": 108390 }, { "epoch": 1.7737053096621125, "grad_norm": 0.04311840608716011, "learning_rate": 4.282815592458116e-06, "loss": 0.0015, "step": 108400 }, { "epoch": 1.7738689356131883, "grad_norm": 0.008796016685664654, "learning_rate": 4.281873508949059e-06, "loss": 0.0033, "step": 108410 }, { "epoch": 1.774032561564264, "grad_norm": 0.034651562571525574, "learning_rate": 4.280931451470362e-06, "loss": 0.0016, "step": 108420 }, { "epoch": 1.77419618751534, "grad_norm": 0.16715854406356812, "learning_rate": 4.279989420056173e-06, "loss": 0.0014, "step": 108430 }, { "epoch": 1.7743598134664158, "grad_norm": 0.19988031685352325, "learning_rate": 4.279047414740636e-06, "loss": 0.0019, "step": 108440 }, { "epoch": 1.7745234394174916, "grad_norm": 0.11294061690568924, "learning_rate": 4.2781054355578975e-06, "loss": 0.0011, "step": 108450 }, { "epoch": 1.7746870653685676, "grad_norm": 0.10826808214187622, "learning_rate": 4.277163482542101e-06, "loss": 0.0017, "step": 108460 }, { "epoch": 1.7748506913196433, "grad_norm": 0.053027503192424774, "learning_rate": 4.2762215557273914e-06, "loss": 0.0015, "step": 108470 }, { "epoch": 1.7750143172707191, "grad_norm": 0.009415642358362675, "learning_rate": 4.275279655147911e-06, "loss": 0.0014, "step": 108480 }, { "epoch": 1.7751779432217951, "grad_norm": 0.13150624930858612, "learning_rate": 4.2743377808378004e-06, "loss": 0.0013, "step": 108490 }, { "epoch": 1.7753415691728707, "grad_norm": 0.08152973651885986, "learning_rate": 4.2733959328312016e-06, "loss": 0.0007, "step": 108500 }, { "epoch": 1.7755051951239467, "grad_norm": 0.05660593509674072, "learning_rate": 4.2724541111622545e-06, "loss": 0.0037, "step": 108510 }, { "epoch": 1.7756688210750227, "grad_norm": 0.052778106182813644, "learning_rate": 4.271512315865097e-06, "loss": 0.0024, "step": 108520 }, { "epoch": 1.7758324470260982, "grad_norm": 0.0690077692270279, "learning_rate": 4.270570546973869e-06, "loss": 0.0024, "step": 108530 }, { "epoch": 1.7759960729771742, "grad_norm": 0.25202247500419617, "learning_rate": 4.2696288045227045e-06, "loss": 0.0017, "step": 108540 }, { "epoch": 1.77615969892825, "grad_norm": 0.00635350588709116, "learning_rate": 4.268687088545738e-06, "loss": 0.0007, "step": 108550 }, { "epoch": 1.7763233248793258, "grad_norm": 0.13210521638393402, "learning_rate": 4.267745399077109e-06, "loss": 0.0015, "step": 108560 }, { "epoch": 1.7764869508304018, "grad_norm": 0.019530724734067917, "learning_rate": 4.266803736150949e-06, "loss": 0.0012, "step": 108570 }, { "epoch": 1.7766505767814775, "grad_norm": 0.1282965987920761, "learning_rate": 4.265862099801391e-06, "loss": 0.0015, "step": 108580 }, { "epoch": 1.7768142027325533, "grad_norm": 0.14573904871940613, "learning_rate": 4.264920490062568e-06, "loss": 0.0019, "step": 108590 }, { "epoch": 1.7769778286836293, "grad_norm": 0.07039479911327362, "learning_rate": 4.26397890696861e-06, "loss": 0.0011, "step": 108600 }, { "epoch": 1.777141454634705, "grad_norm": 0.08231132477521896, "learning_rate": 4.2630373505536485e-06, "loss": 0.001, "step": 108610 }, { "epoch": 1.7773050805857808, "grad_norm": 0.23946666717529297, "learning_rate": 4.26209582085181e-06, "loss": 0.002, "step": 108620 }, { "epoch": 1.7774687065368568, "grad_norm": 0.07521241903305054, "learning_rate": 4.2611543178972274e-06, "loss": 0.0011, "step": 108630 }, { "epoch": 1.7776323324879326, "grad_norm": 0.06746865063905716, "learning_rate": 4.260212841724023e-06, "loss": 0.001, "step": 108640 }, { "epoch": 1.7777959584390084, "grad_norm": 0.05463298410177231, "learning_rate": 4.259271392366325e-06, "loss": 0.0014, "step": 108650 }, { "epoch": 1.7779595843900844, "grad_norm": 0.08373922854661942, "learning_rate": 4.25832996985826e-06, "loss": 0.0011, "step": 108660 }, { "epoch": 1.77812321034116, "grad_norm": 0.06922430545091629, "learning_rate": 4.25738857423395e-06, "loss": 0.0009, "step": 108670 }, { "epoch": 1.778286836292236, "grad_norm": 0.09196797758340836, "learning_rate": 4.25644720552752e-06, "loss": 0.0017, "step": 108680 }, { "epoch": 1.778450462243312, "grad_norm": 0.2511034607887268, "learning_rate": 4.25550586377309e-06, "loss": 0.0014, "step": 108690 }, { "epoch": 1.7786140881943875, "grad_norm": 0.08643194288015366, "learning_rate": 4.254564549004785e-06, "loss": 0.0012, "step": 108700 }, { "epoch": 1.7787777141454635, "grad_norm": 0.0703665018081665, "learning_rate": 4.253623261256721e-06, "loss": 0.0022, "step": 108710 }, { "epoch": 1.7789413400965393, "grad_norm": 0.13588392734527588, "learning_rate": 4.252682000563022e-06, "loss": 0.0019, "step": 108720 }, { "epoch": 1.779104966047615, "grad_norm": 0.0583539679646492, "learning_rate": 4.251740766957806e-06, "loss": 0.0017, "step": 108730 }, { "epoch": 1.779268591998691, "grad_norm": 0.05728354677557945, "learning_rate": 4.250799560475187e-06, "loss": 0.0014, "step": 108740 }, { "epoch": 1.7794322179497668, "grad_norm": 0.14650148153305054, "learning_rate": 4.249858381149283e-06, "loss": 0.0012, "step": 108750 }, { "epoch": 1.7795958439008426, "grad_norm": 0.012672608718276024, "learning_rate": 4.24891722901421e-06, "loss": 0.0038, "step": 108760 }, { "epoch": 1.7797594698519186, "grad_norm": 0.07040093839168549, "learning_rate": 4.247976104104081e-06, "loss": 0.0009, "step": 108770 }, { "epoch": 1.7799230958029943, "grad_norm": 0.03387663885951042, "learning_rate": 4.247035006453012e-06, "loss": 0.0016, "step": 108780 }, { "epoch": 1.7800867217540701, "grad_norm": 0.16323858499526978, "learning_rate": 4.246093936095115e-06, "loss": 0.0015, "step": 108790 }, { "epoch": 1.7802503477051461, "grad_norm": 0.05390274524688721, "learning_rate": 4.2451528930645e-06, "loss": 0.0014, "step": 108800 }, { "epoch": 1.780413973656222, "grad_norm": 0.070576012134552, "learning_rate": 4.244211877395278e-06, "loss": 0.0019, "step": 108810 }, { "epoch": 1.7805775996072977, "grad_norm": 0.01758529618382454, "learning_rate": 4.24327088912156e-06, "loss": 0.0008, "step": 108820 }, { "epoch": 1.7807412255583737, "grad_norm": 0.03800707682967186, "learning_rate": 4.2423299282774534e-06, "loss": 0.0013, "step": 108830 }, { "epoch": 1.7809048515094494, "grad_norm": 0.09394492954015732, "learning_rate": 4.241388994897066e-06, "loss": 0.001, "step": 108840 }, { "epoch": 1.7810684774605252, "grad_norm": 0.09371686726808548, "learning_rate": 4.240448089014503e-06, "loss": 0.0024, "step": 108850 }, { "epoch": 1.7812321034116012, "grad_norm": 0.1738329529762268, "learning_rate": 4.239507210663873e-06, "loss": 0.0008, "step": 108860 }, { "epoch": 1.7813957293626768, "grad_norm": 0.027741970494389534, "learning_rate": 4.238566359879277e-06, "loss": 0.0015, "step": 108870 }, { "epoch": 1.7815593553137528, "grad_norm": 0.21437999606132507, "learning_rate": 4.237625536694821e-06, "loss": 0.0018, "step": 108880 }, { "epoch": 1.7817229812648288, "grad_norm": 0.07579521089792252, "learning_rate": 4.236684741144608e-06, "loss": 0.0017, "step": 108890 }, { "epoch": 1.7818866072159043, "grad_norm": 0.0707777738571167, "learning_rate": 4.235743973262737e-06, "loss": 0.0053, "step": 108900 }, { "epoch": 1.7820502331669803, "grad_norm": 0.06048990413546562, "learning_rate": 4.2348032330833115e-06, "loss": 0.0016, "step": 108910 }, { "epoch": 1.782213859118056, "grad_norm": 0.3154433071613312, "learning_rate": 4.233862520640428e-06, "loss": 0.0018, "step": 108920 }, { "epoch": 1.7823774850691319, "grad_norm": 0.07315990328788757, "learning_rate": 4.232921835968189e-06, "loss": 0.0014, "step": 108930 }, { "epoch": 1.7825411110202078, "grad_norm": 0.1767452359199524, "learning_rate": 4.231981179100688e-06, "loss": 0.0018, "step": 108940 }, { "epoch": 1.7827047369712836, "grad_norm": 0.3045351207256317, "learning_rate": 4.231040550072025e-06, "loss": 0.0024, "step": 108950 }, { "epoch": 1.7828683629223594, "grad_norm": 0.06885327398777008, "learning_rate": 4.230099948916293e-06, "loss": 0.0016, "step": 108960 }, { "epoch": 1.7830319888734354, "grad_norm": 0.09926348179578781, "learning_rate": 4.229159375667587e-06, "loss": 0.0013, "step": 108970 }, { "epoch": 1.7831956148245112, "grad_norm": 0.05821095407009125, "learning_rate": 4.228218830360001e-06, "loss": 0.001, "step": 108980 }, { "epoch": 1.783359240775587, "grad_norm": 0.07888317853212357, "learning_rate": 4.227278313027627e-06, "loss": 0.0023, "step": 108990 }, { "epoch": 1.783522866726663, "grad_norm": 0.07661629468202591, "learning_rate": 4.226337823704558e-06, "loss": 0.002, "step": 109000 }, { "epoch": 1.7836864926777387, "grad_norm": 0.04742414504289627, "learning_rate": 4.225397362424883e-06, "loss": 0.0008, "step": 109010 }, { "epoch": 1.7838501186288145, "grad_norm": 0.15801192820072174, "learning_rate": 4.2244569292226925e-06, "loss": 0.0053, "step": 109020 }, { "epoch": 1.7840137445798905, "grad_norm": 0.03451507166028023, "learning_rate": 4.223516524132073e-06, "loss": 0.0011, "step": 109030 }, { "epoch": 1.7841773705309663, "grad_norm": 0.20218102633953094, "learning_rate": 4.2225761471871155e-06, "loss": 0.0022, "step": 109040 }, { "epoch": 1.784340996482042, "grad_norm": 0.29750487208366394, "learning_rate": 4.221635798421904e-06, "loss": 0.0019, "step": 109050 }, { "epoch": 1.784504622433118, "grad_norm": 0.21108809113502502, "learning_rate": 4.2206954778705225e-06, "loss": 0.0013, "step": 109060 }, { "epoch": 1.7846682483841936, "grad_norm": 0.04725620523095131, "learning_rate": 4.219755185567057e-06, "loss": 0.0007, "step": 109070 }, { "epoch": 1.7848318743352696, "grad_norm": 0.10169366747140884, "learning_rate": 4.218814921545591e-06, "loss": 0.0019, "step": 109080 }, { "epoch": 1.7849955002863456, "grad_norm": 0.13469062745571136, "learning_rate": 4.217874685840207e-06, "loss": 0.0015, "step": 109090 }, { "epoch": 1.7851591262374211, "grad_norm": 0.10235961526632309, "learning_rate": 4.216934478484985e-06, "loss": 0.0012, "step": 109100 }, { "epoch": 1.7853227521884971, "grad_norm": 0.30775031447410583, "learning_rate": 4.215994299514007e-06, "loss": 0.0025, "step": 109110 }, { "epoch": 1.785486378139573, "grad_norm": 0.13852088153362274, "learning_rate": 4.215054148961351e-06, "loss": 0.0023, "step": 109120 }, { "epoch": 1.7856500040906487, "grad_norm": 0.11138801276683807, "learning_rate": 4.214114026861096e-06, "loss": 0.0011, "step": 109130 }, { "epoch": 1.7858136300417247, "grad_norm": 0.23532314598560333, "learning_rate": 4.213173933247318e-06, "loss": 0.0019, "step": 109140 }, { "epoch": 1.7859772559928004, "grad_norm": 0.03958771750330925, "learning_rate": 4.2122338681540945e-06, "loss": 0.0013, "step": 109150 }, { "epoch": 1.7861408819438762, "grad_norm": 0.06906077265739441, "learning_rate": 4.2112938316155e-06, "loss": 0.0016, "step": 109160 }, { "epoch": 1.7863045078949522, "grad_norm": 0.0847359374165535, "learning_rate": 4.210353823665608e-06, "loss": 0.0016, "step": 109170 }, { "epoch": 1.786468133846028, "grad_norm": 0.04323013499379158, "learning_rate": 4.209413844338492e-06, "loss": 0.0014, "step": 109180 }, { "epoch": 1.7866317597971038, "grad_norm": 0.2102014422416687, "learning_rate": 4.208473893668224e-06, "loss": 0.0018, "step": 109190 }, { "epoch": 1.7867953857481798, "grad_norm": 0.09043881297111511, "learning_rate": 4.207533971688874e-06, "loss": 0.0015, "step": 109200 }, { "epoch": 1.7869590116992555, "grad_norm": 0.04318220913410187, "learning_rate": 4.206594078434514e-06, "loss": 0.0007, "step": 109210 }, { "epoch": 1.7871226376503313, "grad_norm": 0.024060461670160294, "learning_rate": 4.2056542139392105e-06, "loss": 0.0031, "step": 109220 }, { "epoch": 1.7872862636014073, "grad_norm": 0.03796510398387909, "learning_rate": 4.2047143782370346e-06, "loss": 0.0016, "step": 109230 }, { "epoch": 1.787449889552483, "grad_norm": 0.03802335262298584, "learning_rate": 4.203774571362052e-06, "loss": 0.0023, "step": 109240 }, { "epoch": 1.7876135155035588, "grad_norm": 0.026080958545207977, "learning_rate": 4.202834793348326e-06, "loss": 0.0012, "step": 109250 }, { "epoch": 1.7877771414546348, "grad_norm": 0.2294594943523407, "learning_rate": 4.201895044229921e-06, "loss": 0.0007, "step": 109260 }, { "epoch": 1.7879407674057104, "grad_norm": 0.17704853415489197, "learning_rate": 4.200955324040904e-06, "loss": 0.003, "step": 109270 }, { "epoch": 1.7881043933567864, "grad_norm": 0.2035708725452423, "learning_rate": 4.2000156328153365e-06, "loss": 0.001, "step": 109280 }, { "epoch": 1.7882680193078624, "grad_norm": 0.1668217033147812, "learning_rate": 4.1990759705872785e-06, "loss": 0.0032, "step": 109290 }, { "epoch": 1.788431645258938, "grad_norm": 0.1366310715675354, "learning_rate": 4.198136337390793e-06, "loss": 0.0013, "step": 109300 }, { "epoch": 1.788595271210014, "grad_norm": 0.010214103385806084, "learning_rate": 4.197196733259937e-06, "loss": 0.0021, "step": 109310 }, { "epoch": 1.7887588971610897, "grad_norm": 0.02637605555355549, "learning_rate": 4.19625715822877e-06, "loss": 0.0017, "step": 109320 }, { "epoch": 1.7889225231121655, "grad_norm": 0.05430176481604576, "learning_rate": 4.195317612331349e-06, "loss": 0.001, "step": 109330 }, { "epoch": 1.7890861490632415, "grad_norm": 0.17617493867874146, "learning_rate": 4.194378095601732e-06, "loss": 0.0021, "step": 109340 }, { "epoch": 1.7892497750143173, "grad_norm": 0.0738600417971611, "learning_rate": 4.193438608073972e-06, "loss": 0.0013, "step": 109350 }, { "epoch": 1.789413400965393, "grad_norm": 0.054634593427181244, "learning_rate": 4.192499149782123e-06, "loss": 0.0014, "step": 109360 }, { "epoch": 1.789577026916469, "grad_norm": 0.16539928317070007, "learning_rate": 4.191559720760241e-06, "loss": 0.0013, "step": 109370 }, { "epoch": 1.7897406528675448, "grad_norm": 0.1007479578256607, "learning_rate": 4.190620321042374e-06, "loss": 0.0014, "step": 109380 }, { "epoch": 1.7899042788186206, "grad_norm": 0.07088002562522888, "learning_rate": 4.189680950662576e-06, "loss": 0.0014, "step": 109390 }, { "epoch": 1.7900679047696966, "grad_norm": 0.13595464825630188, "learning_rate": 4.188741609654896e-06, "loss": 0.0021, "step": 109400 }, { "epoch": 1.7902315307207723, "grad_norm": 0.3133603632450104, "learning_rate": 4.187802298053383e-06, "loss": 0.0022, "step": 109410 }, { "epoch": 1.7903951566718481, "grad_norm": 0.012337069027125835, "learning_rate": 4.186863015892083e-06, "loss": 0.0017, "step": 109420 }, { "epoch": 1.7905587826229241, "grad_norm": 0.1178058385848999, "learning_rate": 4.185923763205046e-06, "loss": 0.0024, "step": 109430 }, { "epoch": 1.790722408574, "grad_norm": 0.07045537233352661, "learning_rate": 4.184984540026318e-06, "loss": 0.0015, "step": 109440 }, { "epoch": 1.7908860345250757, "grad_norm": 0.11354552209377289, "learning_rate": 4.184045346389939e-06, "loss": 0.0014, "step": 109450 }, { "epoch": 1.7910496604761517, "grad_norm": 0.008082418702542782, "learning_rate": 4.183106182329955e-06, "loss": 0.0014, "step": 109460 }, { "epoch": 1.7912132864272272, "grad_norm": 0.07397264987230301, "learning_rate": 4.182167047880409e-06, "loss": 0.0011, "step": 109470 }, { "epoch": 1.7913769123783032, "grad_norm": 0.17240633070468903, "learning_rate": 4.181227943075341e-06, "loss": 0.0031, "step": 109480 }, { "epoch": 1.791540538329379, "grad_norm": 0.042109161615371704, "learning_rate": 4.180288867948793e-06, "loss": 0.0009, "step": 109490 }, { "epoch": 1.7917041642804548, "grad_norm": 0.2711697518825531, "learning_rate": 4.1793498225348035e-06, "loss": 0.0025, "step": 109500 }, { "epoch": 1.7918677902315308, "grad_norm": 0.10704971104860306, "learning_rate": 4.178410806867411e-06, "loss": 0.0016, "step": 109510 }, { "epoch": 1.7920314161826065, "grad_norm": 0.0459461510181427, "learning_rate": 4.177471820980652e-06, "loss": 0.0019, "step": 109520 }, { "epoch": 1.7921950421336823, "grad_norm": 0.22187310457229614, "learning_rate": 4.176532864908561e-06, "loss": 0.0017, "step": 109530 }, { "epoch": 1.7923586680847583, "grad_norm": 0.03752073645591736, "learning_rate": 4.175593938685178e-06, "loss": 0.0026, "step": 109540 }, { "epoch": 1.792522294035834, "grad_norm": 0.025677448138594627, "learning_rate": 4.17465504234453e-06, "loss": 0.0022, "step": 109550 }, { "epoch": 1.7926859199869098, "grad_norm": 0.06776012480258942, "learning_rate": 4.173716175920655e-06, "loss": 0.0047, "step": 109560 }, { "epoch": 1.7928495459379858, "grad_norm": 0.04825940728187561, "learning_rate": 4.172777339447581e-06, "loss": 0.0009, "step": 109570 }, { "epoch": 1.7930131718890616, "grad_norm": 0.09853023290634155, "learning_rate": 4.171838532959341e-06, "loss": 0.0017, "step": 109580 }, { "epoch": 1.7931767978401374, "grad_norm": 0.028595373034477234, "learning_rate": 4.170899756489964e-06, "loss": 0.001, "step": 109590 }, { "epoch": 1.7933404237912134, "grad_norm": 0.34742894768714905, "learning_rate": 4.169961010073478e-06, "loss": 0.0034, "step": 109600 }, { "epoch": 1.7935040497422892, "grad_norm": 0.045857951045036316, "learning_rate": 4.169022293743911e-06, "loss": 0.0022, "step": 109610 }, { "epoch": 1.793667675693365, "grad_norm": 0.14168353378772736, "learning_rate": 4.168083607535287e-06, "loss": 0.0014, "step": 109620 }, { "epoch": 1.793831301644441, "grad_norm": 0.11010447144508362, "learning_rate": 4.167144951481634e-06, "loss": 0.0014, "step": 109630 }, { "epoch": 1.7939949275955165, "grad_norm": 0.3340127468109131, "learning_rate": 4.166206325616976e-06, "loss": 0.0014, "step": 109640 }, { "epoch": 1.7941585535465925, "grad_norm": 0.01691707782447338, "learning_rate": 4.165267729975333e-06, "loss": 0.0012, "step": 109650 }, { "epoch": 1.7943221794976685, "grad_norm": 0.015904491767287254, "learning_rate": 4.164329164590729e-06, "loss": 0.0045, "step": 109660 }, { "epoch": 1.794485805448744, "grad_norm": 0.04675578698515892, "learning_rate": 4.163390629497184e-06, "loss": 0.002, "step": 109670 }, { "epoch": 1.79464943139982, "grad_norm": 0.04986276105046272, "learning_rate": 4.1624521247287185e-06, "loss": 0.0014, "step": 109680 }, { "epoch": 1.7948130573508958, "grad_norm": 0.045936889946460724, "learning_rate": 4.16151365031935e-06, "loss": 0.0013, "step": 109690 }, { "epoch": 1.7949766833019716, "grad_norm": 0.1072971522808075, "learning_rate": 4.160575206303095e-06, "loss": 0.0016, "step": 109700 }, { "epoch": 1.7951403092530476, "grad_norm": 0.23212066292762756, "learning_rate": 4.159636792713973e-06, "loss": 0.0017, "step": 109710 }, { "epoch": 1.7953039352041233, "grad_norm": 0.03734473139047623, "learning_rate": 4.158698409585997e-06, "loss": 0.0011, "step": 109720 }, { "epoch": 1.7954675611551991, "grad_norm": 0.032329753041267395, "learning_rate": 4.1577600569531825e-06, "loss": 0.0021, "step": 109730 }, { "epoch": 1.7956311871062751, "grad_norm": 0.07856018096208572, "learning_rate": 4.1568217348495436e-06, "loss": 0.0012, "step": 109740 }, { "epoch": 1.795794813057351, "grad_norm": 0.06815605610609055, "learning_rate": 4.155883443309087e-06, "loss": 0.0012, "step": 109750 }, { "epoch": 1.7959584390084267, "grad_norm": 0.4105449616909027, "learning_rate": 4.154945182365826e-06, "loss": 0.0008, "step": 109760 }, { "epoch": 1.7961220649595027, "grad_norm": 0.04402492195367813, "learning_rate": 4.154006952053773e-06, "loss": 0.0011, "step": 109770 }, { "epoch": 1.7962856909105784, "grad_norm": 0.0037064552307128906, "learning_rate": 4.153068752406934e-06, "loss": 0.0014, "step": 109780 }, { "epoch": 1.7964493168616542, "grad_norm": 0.08393971621990204, "learning_rate": 4.152130583459318e-06, "loss": 0.0013, "step": 109790 }, { "epoch": 1.7966129428127302, "grad_norm": 0.032570336014032364, "learning_rate": 4.15119244524493e-06, "loss": 0.0013, "step": 109800 }, { "epoch": 1.796776568763806, "grad_norm": 0.06312573701143265, "learning_rate": 4.150254337797777e-06, "loss": 0.0015, "step": 109810 }, { "epoch": 1.7969401947148818, "grad_norm": 0.08007284253835678, "learning_rate": 4.14931626115186e-06, "loss": 0.0031, "step": 109820 }, { "epoch": 1.7971038206659578, "grad_norm": 0.255429744720459, "learning_rate": 4.148378215341185e-06, "loss": 0.0042, "step": 109830 }, { "epoch": 1.7972674466170333, "grad_norm": 0.07173515111207962, "learning_rate": 4.147440200399753e-06, "loss": 0.0025, "step": 109840 }, { "epoch": 1.7974310725681093, "grad_norm": 0.05971740931272507, "learning_rate": 4.146502216361566e-06, "loss": 0.0018, "step": 109850 }, { "epoch": 1.7975946985191853, "grad_norm": 0.07941849529743195, "learning_rate": 4.1455642632606196e-06, "loss": 0.0013, "step": 109860 }, { "epoch": 1.7977583244702608, "grad_norm": 0.03401295840740204, "learning_rate": 4.144626341130916e-06, "loss": 0.0006, "step": 109870 }, { "epoch": 1.7979219504213368, "grad_norm": 0.07793786376714706, "learning_rate": 4.143688450006452e-06, "loss": 0.0012, "step": 109880 }, { "epoch": 1.7980855763724126, "grad_norm": 0.04069236293435097, "learning_rate": 4.142750589921224e-06, "loss": 0.0011, "step": 109890 }, { "epoch": 1.7982492023234884, "grad_norm": 0.13962702453136444, "learning_rate": 4.141812760909226e-06, "loss": 0.0026, "step": 109900 }, { "epoch": 1.7984128282745644, "grad_norm": 0.13767190277576447, "learning_rate": 4.140874963004453e-06, "loss": 0.0012, "step": 109910 }, { "epoch": 1.7985764542256402, "grad_norm": 0.038861233741045, "learning_rate": 4.139937196240897e-06, "loss": 0.0015, "step": 109920 }, { "epoch": 1.798740080176716, "grad_norm": 0.04253111407160759, "learning_rate": 4.138999460652551e-06, "loss": 0.0013, "step": 109930 }, { "epoch": 1.798903706127792, "grad_norm": 0.12363880127668381, "learning_rate": 4.138061756273408e-06, "loss": 0.0018, "step": 109940 }, { "epoch": 1.7990673320788677, "grad_norm": 0.13247255980968475, "learning_rate": 4.137124083137452e-06, "loss": 0.0017, "step": 109950 }, { "epoch": 1.7992309580299435, "grad_norm": 0.12151532620191574, "learning_rate": 4.136186441278674e-06, "loss": 0.0017, "step": 109960 }, { "epoch": 1.7993945839810195, "grad_norm": 0.07101260870695114, "learning_rate": 4.135248830731062e-06, "loss": 0.0015, "step": 109970 }, { "epoch": 1.7995582099320953, "grad_norm": 0.0801393985748291, "learning_rate": 4.1343112515286e-06, "loss": 0.0015, "step": 109980 }, { "epoch": 1.799721835883171, "grad_norm": 0.09624263644218445, "learning_rate": 4.133373703705275e-06, "loss": 0.0019, "step": 109990 }, { "epoch": 1.799885461834247, "grad_norm": 0.174933522939682, "learning_rate": 4.132436187295072e-06, "loss": 0.0007, "step": 110000 }, { "epoch": 1.8000490877853228, "grad_norm": 0.13256537914276123, "learning_rate": 4.131498702331972e-06, "loss": 0.0011, "step": 110010 }, { "epoch": 1.8002127137363986, "grad_norm": 0.24644337594509125, "learning_rate": 4.130561248849954e-06, "loss": 0.0022, "step": 110020 }, { "epoch": 1.8003763396874746, "grad_norm": 0.0620858408510685, "learning_rate": 4.129623826883004e-06, "loss": 0.001, "step": 110030 }, { "epoch": 1.8005399656385501, "grad_norm": 0.04734351485967636, "learning_rate": 4.128686436465098e-06, "loss": 0.0014, "step": 110040 }, { "epoch": 1.8007035915896261, "grad_norm": 0.09699749946594238, "learning_rate": 4.127749077630214e-06, "loss": 0.003, "step": 110050 }, { "epoch": 1.8008672175407021, "grad_norm": 0.16596907377243042, "learning_rate": 4.126811750412329e-06, "loss": 0.0037, "step": 110060 }, { "epoch": 1.8010308434917777, "grad_norm": 0.03903839737176895, "learning_rate": 4.125874454845421e-06, "loss": 0.002, "step": 110070 }, { "epoch": 1.8011944694428537, "grad_norm": 0.06339141726493835, "learning_rate": 4.124937190963461e-06, "loss": 0.0009, "step": 110080 }, { "epoch": 1.8013580953939294, "grad_norm": 0.12396344542503357, "learning_rate": 4.123999958800426e-06, "loss": 0.0014, "step": 110090 }, { "epoch": 1.8015217213450052, "grad_norm": 0.0903758704662323, "learning_rate": 4.123062758390286e-06, "loss": 0.001, "step": 110100 }, { "epoch": 1.8016853472960812, "grad_norm": 0.1341772973537445, "learning_rate": 4.122125589767014e-06, "loss": 0.0017, "step": 110110 }, { "epoch": 1.801848973247157, "grad_norm": 0.005815631244331598, "learning_rate": 4.12118845296458e-06, "loss": 0.0021, "step": 110120 }, { "epoch": 1.8020125991982328, "grad_norm": 0.36865493655204773, "learning_rate": 4.120251348016951e-06, "loss": 0.0016, "step": 110130 }, { "epoch": 1.8021762251493088, "grad_norm": 0.021256666630506516, "learning_rate": 4.119314274958099e-06, "loss": 0.0015, "step": 110140 }, { "epoch": 1.8023398511003845, "grad_norm": 0.03181765228509903, "learning_rate": 4.1183772338219855e-06, "loss": 0.001, "step": 110150 }, { "epoch": 1.8025034770514603, "grad_norm": 0.11860894411802292, "learning_rate": 4.117440224642579e-06, "loss": 0.0071, "step": 110160 }, { "epoch": 1.8026671030025363, "grad_norm": 0.08703216910362244, "learning_rate": 4.116503247453843e-06, "loss": 0.0011, "step": 110170 }, { "epoch": 1.802830728953612, "grad_norm": 0.03114217333495617, "learning_rate": 4.115566302289741e-06, "loss": 0.001, "step": 110180 }, { "epoch": 1.8029943549046878, "grad_norm": 0.06639087945222855, "learning_rate": 4.114629389184234e-06, "loss": 0.0012, "step": 110190 }, { "epoch": 1.8031579808557638, "grad_norm": 0.03908311948180199, "learning_rate": 4.113692508171283e-06, "loss": 0.0019, "step": 110200 }, { "epoch": 1.8033216068068396, "grad_norm": 0.053245507180690765, "learning_rate": 4.11275565928485e-06, "loss": 0.0012, "step": 110210 }, { "epoch": 1.8034852327579154, "grad_norm": 0.254879891872406, "learning_rate": 4.1118188425588915e-06, "loss": 0.0012, "step": 110220 }, { "epoch": 1.8036488587089914, "grad_norm": 0.09775219857692719, "learning_rate": 4.110882058027366e-06, "loss": 0.0021, "step": 110230 }, { "epoch": 1.803812484660067, "grad_norm": 0.014985921792685986, "learning_rate": 4.109945305724231e-06, "loss": 0.002, "step": 110240 }, { "epoch": 1.803976110611143, "grad_norm": 0.02410202845931053, "learning_rate": 4.109008585683435e-06, "loss": 0.0021, "step": 110250 }, { "epoch": 1.804139736562219, "grad_norm": 0.04714832827448845, "learning_rate": 4.108071897938938e-06, "loss": 0.0011, "step": 110260 }, { "epoch": 1.8043033625132945, "grad_norm": 0.03473523259162903, "learning_rate": 4.107135242524692e-06, "loss": 0.0011, "step": 110270 }, { "epoch": 1.8044669884643705, "grad_norm": 0.057406578212976456, "learning_rate": 4.106198619474646e-06, "loss": 0.0016, "step": 110280 }, { "epoch": 1.8046306144154463, "grad_norm": 0.12736418843269348, "learning_rate": 4.105262028822753e-06, "loss": 0.0026, "step": 110290 }, { "epoch": 1.804794240366522, "grad_norm": 0.04663657024502754, "learning_rate": 4.1043254706029605e-06, "loss": 0.0009, "step": 110300 }, { "epoch": 1.804957866317598, "grad_norm": 0.020401667803525925, "learning_rate": 4.103388944849217e-06, "loss": 0.0022, "step": 110310 }, { "epoch": 1.8051214922686738, "grad_norm": 0.02002941444516182, "learning_rate": 4.10245245159547e-06, "loss": 0.003, "step": 110320 }, { "epoch": 1.8052851182197496, "grad_norm": 0.08543750643730164, "learning_rate": 4.101515990875663e-06, "loss": 0.0012, "step": 110330 }, { "epoch": 1.8054487441708256, "grad_norm": 0.05389794334769249, "learning_rate": 4.100579562723744e-06, "loss": 0.001, "step": 110340 }, { "epoch": 1.8056123701219013, "grad_norm": 0.07623659819364548, "learning_rate": 4.099643167173653e-06, "loss": 0.0008, "step": 110350 }, { "epoch": 1.8057759960729771, "grad_norm": 0.11723193526268005, "learning_rate": 4.0987068042593335e-06, "loss": 0.001, "step": 110360 }, { "epoch": 1.8059396220240531, "grad_norm": 0.14318618178367615, "learning_rate": 4.097770474014725e-06, "loss": 0.0013, "step": 110370 }, { "epoch": 1.806103247975129, "grad_norm": 0.02823631465435028, "learning_rate": 4.09683417647377e-06, "loss": 0.0012, "step": 110380 }, { "epoch": 1.8062668739262047, "grad_norm": 0.13594180345535278, "learning_rate": 4.095897911670404e-06, "loss": 0.0025, "step": 110390 }, { "epoch": 1.8064304998772807, "grad_norm": 0.08361697196960449, "learning_rate": 4.0949616796385665e-06, "loss": 0.0021, "step": 110400 }, { "epoch": 1.8065941258283562, "grad_norm": 0.03334399312734604, "learning_rate": 4.094025480412191e-06, "loss": 0.0014, "step": 110410 }, { "epoch": 1.8067577517794322, "grad_norm": 0.047940444201231, "learning_rate": 4.093089314025216e-06, "loss": 0.0007, "step": 110420 }, { "epoch": 1.8069213777305082, "grad_norm": 0.10455339401960373, "learning_rate": 4.0921531805115736e-06, "loss": 0.0018, "step": 110430 }, { "epoch": 1.8070850036815838, "grad_norm": 0.03962060064077377, "learning_rate": 4.091217079905198e-06, "loss": 0.0021, "step": 110440 }, { "epoch": 1.8072486296326598, "grad_norm": 0.013049505650997162, "learning_rate": 4.090281012240018e-06, "loss": 0.0018, "step": 110450 }, { "epoch": 1.8074122555837355, "grad_norm": 0.29218167066574097, "learning_rate": 4.089344977549965e-06, "loss": 0.0019, "step": 110460 }, { "epoch": 1.8075758815348113, "grad_norm": 0.046039290726184845, "learning_rate": 4.088408975868966e-06, "loss": 0.0009, "step": 110470 }, { "epoch": 1.8077395074858873, "grad_norm": 0.029266629368066788, "learning_rate": 4.087473007230952e-06, "loss": 0.001, "step": 110480 }, { "epoch": 1.807903133436963, "grad_norm": 0.06409554928541183, "learning_rate": 4.086537071669848e-06, "loss": 0.0017, "step": 110490 }, { "epoch": 1.8080667593880388, "grad_norm": 0.0726979449391365, "learning_rate": 4.08560116921958e-06, "loss": 0.0015, "step": 110500 }, { "epoch": 1.8082303853391148, "grad_norm": 0.048297226428985596, "learning_rate": 4.084665299914073e-06, "loss": 0.0016, "step": 110510 }, { "epoch": 1.8083940112901906, "grad_norm": 0.04781966283917427, "learning_rate": 4.083729463787247e-06, "loss": 0.0007, "step": 110520 }, { "epoch": 1.8085576372412664, "grad_norm": 0.0702255517244339, "learning_rate": 4.082793660873027e-06, "loss": 0.0011, "step": 110530 }, { "epoch": 1.8087212631923424, "grad_norm": 0.04958431050181389, "learning_rate": 4.081857891205333e-06, "loss": 0.0017, "step": 110540 }, { "epoch": 1.8088848891434182, "grad_norm": 0.04033922776579857, "learning_rate": 4.080922154818082e-06, "loss": 0.0009, "step": 110550 }, { "epoch": 1.809048515094494, "grad_norm": 0.15074045956134796, "learning_rate": 4.079986451745195e-06, "loss": 0.0015, "step": 110560 }, { "epoch": 1.80921214104557, "grad_norm": 0.13608914613723755, "learning_rate": 4.079050782020588e-06, "loss": 0.0022, "step": 110570 }, { "epoch": 1.8093757669966457, "grad_norm": 0.08144427835941315, "learning_rate": 4.078115145678176e-06, "loss": 0.0014, "step": 110580 }, { "epoch": 1.8095393929477215, "grad_norm": 0.10574265569448471, "learning_rate": 4.077179542751875e-06, "loss": 0.0026, "step": 110590 }, { "epoch": 1.8097030188987975, "grad_norm": 0.09092884510755539, "learning_rate": 4.076243973275597e-06, "loss": 0.0011, "step": 110600 }, { "epoch": 1.809866644849873, "grad_norm": 0.013703079894185066, "learning_rate": 4.075308437283254e-06, "loss": 0.0021, "step": 110610 }, { "epoch": 1.810030270800949, "grad_norm": 0.09637783467769623, "learning_rate": 4.074372934808759e-06, "loss": 0.0015, "step": 110620 }, { "epoch": 1.810193896752025, "grad_norm": 0.1178259551525116, "learning_rate": 4.073437465886019e-06, "loss": 0.0015, "step": 110630 }, { "epoch": 1.8103575227031006, "grad_norm": 0.05465967208147049, "learning_rate": 4.072502030548947e-06, "loss": 0.0012, "step": 110640 }, { "epoch": 1.8105211486541766, "grad_norm": 0.05660577118396759, "learning_rate": 4.071566628831446e-06, "loss": 0.0012, "step": 110650 }, { "epoch": 1.8106847746052523, "grad_norm": 0.005766091402620077, "learning_rate": 4.070631260767422e-06, "loss": 0.0015, "step": 110660 }, { "epoch": 1.8108484005563281, "grad_norm": 0.06887154281139374, "learning_rate": 4.069695926390781e-06, "loss": 0.0025, "step": 110670 }, { "epoch": 1.8110120265074041, "grad_norm": 0.07911459356546402, "learning_rate": 4.068760625735428e-06, "loss": 0.0019, "step": 110680 }, { "epoch": 1.81117565245848, "grad_norm": 0.0156877338886261, "learning_rate": 4.067825358835263e-06, "loss": 0.0016, "step": 110690 }, { "epoch": 1.8113392784095557, "grad_norm": 0.06569913774728775, "learning_rate": 4.066890125724188e-06, "loss": 0.0017, "step": 110700 }, { "epoch": 1.8115029043606317, "grad_norm": 0.05675230920314789, "learning_rate": 4.065954926436104e-06, "loss": 0.0024, "step": 110710 }, { "epoch": 1.8116665303117074, "grad_norm": 0.131346195936203, "learning_rate": 4.0650197610049094e-06, "loss": 0.0014, "step": 110720 }, { "epoch": 1.8118301562627832, "grad_norm": 0.06230497732758522, "learning_rate": 4.064084629464501e-06, "loss": 0.0013, "step": 110730 }, { "epoch": 1.8119937822138592, "grad_norm": 0.07833479344844818, "learning_rate": 4.063149531848777e-06, "loss": 0.0026, "step": 110740 }, { "epoch": 1.812157408164935, "grad_norm": 0.08271912485361099, "learning_rate": 4.062214468191629e-06, "loss": 0.0028, "step": 110750 }, { "epoch": 1.8123210341160108, "grad_norm": 0.0654798299074173, "learning_rate": 4.0612794385269525e-06, "loss": 0.003, "step": 110760 }, { "epoch": 1.8124846600670868, "grad_norm": 0.1459072083234787, "learning_rate": 4.06034444288864e-06, "loss": 0.001, "step": 110770 }, { "epoch": 1.8126482860181625, "grad_norm": 0.07602879405021667, "learning_rate": 4.059409481310584e-06, "loss": 0.0017, "step": 110780 }, { "epoch": 1.8128119119692383, "grad_norm": 0.05197446420788765, "learning_rate": 4.0584745538266725e-06, "loss": 0.0007, "step": 110790 }, { "epoch": 1.8129755379203143, "grad_norm": 0.02462603896856308, "learning_rate": 4.057539660470796e-06, "loss": 0.0013, "step": 110800 }, { "epoch": 1.8131391638713898, "grad_norm": 0.0999525636434555, "learning_rate": 4.056604801276842e-06, "loss": 0.0019, "step": 110810 }, { "epoch": 1.8133027898224658, "grad_norm": 0.05077390372753143, "learning_rate": 4.055669976278697e-06, "loss": 0.0017, "step": 110820 }, { "epoch": 1.8134664157735418, "grad_norm": 0.024046001955866814, "learning_rate": 4.0547351855102446e-06, "loss": 0.0011, "step": 110830 }, { "epoch": 1.8136300417246174, "grad_norm": 0.015562029555439949, "learning_rate": 4.053800429005372e-06, "loss": 0.0006, "step": 110840 }, { "epoch": 1.8137936676756934, "grad_norm": 0.05708250403404236, "learning_rate": 4.052865706797958e-06, "loss": 0.0009, "step": 110850 }, { "epoch": 1.8139572936267692, "grad_norm": 0.13831275701522827, "learning_rate": 4.051931018921886e-06, "loss": 0.0014, "step": 110860 }, { "epoch": 1.814120919577845, "grad_norm": 0.06047903373837471, "learning_rate": 4.0509963654110355e-06, "loss": 0.0028, "step": 110870 }, { "epoch": 1.814284545528921, "grad_norm": 0.1734655648469925, "learning_rate": 4.050061746299287e-06, "loss": 0.0016, "step": 110880 }, { "epoch": 1.8144481714799967, "grad_norm": 0.04613037407398224, "learning_rate": 4.049127161620515e-06, "loss": 0.0014, "step": 110890 }, { "epoch": 1.8146117974310725, "grad_norm": 0.018489491194486618, "learning_rate": 4.048192611408599e-06, "loss": 0.0007, "step": 110900 }, { "epoch": 1.8147754233821485, "grad_norm": 0.011923164129257202, "learning_rate": 4.0472580956974125e-06, "loss": 0.001, "step": 110910 }, { "epoch": 1.8149390493332243, "grad_norm": 0.11047337204217911, "learning_rate": 4.046323614520832e-06, "loss": 0.0021, "step": 110920 }, { "epoch": 1.8151026752843, "grad_norm": 0.12533338367938995, "learning_rate": 4.045389167912728e-06, "loss": 0.002, "step": 110930 }, { "epoch": 1.815266301235376, "grad_norm": 0.02617049589753151, "learning_rate": 4.0444547559069735e-06, "loss": 0.0009, "step": 110940 }, { "epoch": 1.8154299271864518, "grad_norm": 0.07797876745462418, "learning_rate": 4.043520378537435e-06, "loss": 0.0016, "step": 110950 }, { "epoch": 1.8155935531375276, "grad_norm": 0.06467451900243759, "learning_rate": 4.042586035837985e-06, "loss": 0.0009, "step": 110960 }, { "epoch": 1.8157571790886036, "grad_norm": 0.07913364470005035, "learning_rate": 4.041651727842489e-06, "loss": 0.002, "step": 110970 }, { "epoch": 1.8159208050396793, "grad_norm": 0.037827394902706146, "learning_rate": 4.040717454584816e-06, "loss": 0.0011, "step": 110980 }, { "epoch": 1.8160844309907551, "grad_norm": 0.06379867345094681, "learning_rate": 4.039783216098829e-06, "loss": 0.002, "step": 110990 }, { "epoch": 1.8162480569418311, "grad_norm": 0.061186887323856354, "learning_rate": 4.0388490124183925e-06, "loss": 0.0011, "step": 111000 }, { "epoch": 1.8164116828929067, "grad_norm": 0.11079651117324829, "learning_rate": 4.0379148435773695e-06, "loss": 0.0015, "step": 111010 }, { "epoch": 1.8165753088439827, "grad_norm": 0.013405365869402885, "learning_rate": 4.036980709609621e-06, "loss": 0.0012, "step": 111020 }, { "epoch": 1.8167389347950587, "grad_norm": 0.18405316770076752, "learning_rate": 4.036046610549007e-06, "loss": 0.0016, "step": 111030 }, { "epoch": 1.8169025607461342, "grad_norm": 0.09061921387910843, "learning_rate": 4.035112546429387e-06, "loss": 0.0019, "step": 111040 }, { "epoch": 1.8170661866972102, "grad_norm": 0.05544005334377289, "learning_rate": 4.03417851728462e-06, "loss": 0.0015, "step": 111050 }, { "epoch": 1.817229812648286, "grad_norm": 0.11394292116165161, "learning_rate": 4.033244523148558e-06, "loss": 0.0011, "step": 111060 }, { "epoch": 1.8173934385993618, "grad_norm": 0.27714091539382935, "learning_rate": 4.032310564055059e-06, "loss": 0.0017, "step": 111070 }, { "epoch": 1.8175570645504378, "grad_norm": 0.08347877860069275, "learning_rate": 4.031376640037977e-06, "loss": 0.0011, "step": 111080 }, { "epoch": 1.8177206905015135, "grad_norm": 0.009594296105206013, "learning_rate": 4.030442751131162e-06, "loss": 0.0015, "step": 111090 }, { "epoch": 1.8178843164525893, "grad_norm": 0.0414123460650444, "learning_rate": 4.029508897368468e-06, "loss": 0.0008, "step": 111100 }, { "epoch": 1.8180479424036653, "grad_norm": 0.21520625054836273, "learning_rate": 4.028575078783745e-06, "loss": 0.002, "step": 111110 }, { "epoch": 1.818211568354741, "grad_norm": 0.06374149024486542, "learning_rate": 4.027641295410839e-06, "loss": 0.0013, "step": 111120 }, { "epoch": 1.8183751943058168, "grad_norm": 0.042976684868335724, "learning_rate": 4.0267075472836e-06, "loss": 0.0008, "step": 111130 }, { "epoch": 1.8185388202568928, "grad_norm": 0.15336090326309204, "learning_rate": 4.0257738344358715e-06, "loss": 0.0015, "step": 111140 }, { "epoch": 1.8187024462079686, "grad_norm": 0.19176100194454193, "learning_rate": 4.0248401569015035e-06, "loss": 0.0009, "step": 111150 }, { "epoch": 1.8188660721590444, "grad_norm": 0.1569773256778717, "learning_rate": 4.0239065147143345e-06, "loss": 0.0009, "step": 111160 }, { "epoch": 1.8190296981101204, "grad_norm": 0.3488730490207672, "learning_rate": 4.022972907908208e-06, "loss": 0.001, "step": 111170 }, { "epoch": 1.8191933240611962, "grad_norm": 0.055071260780096054, "learning_rate": 4.022039336516965e-06, "loss": 0.003, "step": 111180 }, { "epoch": 1.819356950012272, "grad_norm": 0.07290855050086975, "learning_rate": 4.021105800574444e-06, "loss": 0.0022, "step": 111190 }, { "epoch": 1.819520575963348, "grad_norm": 0.03136163204908371, "learning_rate": 4.0201723001144875e-06, "loss": 0.0024, "step": 111200 }, { "epoch": 1.8196842019144235, "grad_norm": 0.10398129373788834, "learning_rate": 4.019238835170929e-06, "loss": 0.002, "step": 111210 }, { "epoch": 1.8198478278654995, "grad_norm": 0.19065433740615845, "learning_rate": 4.018305405777606e-06, "loss": 0.002, "step": 111220 }, { "epoch": 1.8200114538165755, "grad_norm": 0.01684756949543953, "learning_rate": 4.017372011968353e-06, "loss": 0.0014, "step": 111230 }, { "epoch": 1.820175079767651, "grad_norm": 0.09546512365341187, "learning_rate": 4.016438653777002e-06, "loss": 0.0012, "step": 111240 }, { "epoch": 1.820338705718727, "grad_norm": 0.08042707294225693, "learning_rate": 4.015505331237387e-06, "loss": 0.0011, "step": 111250 }, { "epoch": 1.8205023316698028, "grad_norm": 0.15508301556110382, "learning_rate": 4.014572044383337e-06, "loss": 0.0021, "step": 111260 }, { "epoch": 1.8206659576208786, "grad_norm": 0.1055852472782135, "learning_rate": 4.013638793248682e-06, "loss": 0.0019, "step": 111270 }, { "epoch": 1.8208295835719546, "grad_norm": 0.03518375754356384, "learning_rate": 4.01270557786725e-06, "loss": 0.0011, "step": 111280 }, { "epoch": 1.8209932095230303, "grad_norm": 0.025266781449317932, "learning_rate": 4.011772398272868e-06, "loss": 0.0011, "step": 111290 }, { "epoch": 1.8211568354741061, "grad_norm": 0.16470734775066376, "learning_rate": 4.010839254499362e-06, "loss": 0.0018, "step": 111300 }, { "epoch": 1.8213204614251821, "grad_norm": 0.08801986277103424, "learning_rate": 4.009906146580554e-06, "loss": 0.0019, "step": 111310 }, { "epoch": 1.8214840873762579, "grad_norm": 0.06811599433422089, "learning_rate": 4.0089730745502695e-06, "loss": 0.0021, "step": 111320 }, { "epoch": 1.8216477133273337, "grad_norm": 0.1471439003944397, "learning_rate": 4.008040038442329e-06, "loss": 0.0018, "step": 111330 }, { "epoch": 1.8218113392784097, "grad_norm": 0.05991533026099205, "learning_rate": 4.007107038290553e-06, "loss": 0.0019, "step": 111340 }, { "epoch": 1.8219749652294854, "grad_norm": 0.04979366809129715, "learning_rate": 4.006174074128762e-06, "loss": 0.0021, "step": 111350 }, { "epoch": 1.8221385911805612, "grad_norm": 0.3377133905887604, "learning_rate": 4.005241145990771e-06, "loss": 0.0015, "step": 111360 }, { "epoch": 1.8223022171316372, "grad_norm": 0.08466890454292297, "learning_rate": 4.004308253910397e-06, "loss": 0.0007, "step": 111370 }, { "epoch": 1.8224658430827128, "grad_norm": 0.028911596164107323, "learning_rate": 4.003375397921457e-06, "loss": 0.0025, "step": 111380 }, { "epoch": 1.8226294690337888, "grad_norm": 0.12721887230873108, "learning_rate": 4.002442578057763e-06, "loss": 0.002, "step": 111390 }, { "epoch": 1.8227930949848647, "grad_norm": 0.0052746557630598545, "learning_rate": 4.001509794353127e-06, "loss": 0.0027, "step": 111400 }, { "epoch": 1.8229567209359403, "grad_norm": 0.10063627362251282, "learning_rate": 4.000577046841361e-06, "loss": 0.0006, "step": 111410 }, { "epoch": 1.8231203468870163, "grad_norm": 0.21845905482769012, "learning_rate": 3.9996443355562755e-06, "loss": 0.0015, "step": 111420 }, { "epoch": 1.823283972838092, "grad_norm": 0.16975466907024384, "learning_rate": 3.998711660531679e-06, "loss": 0.0019, "step": 111430 }, { "epoch": 1.8234475987891678, "grad_norm": 0.03261583298444748, "learning_rate": 3.997779021801378e-06, "loss": 0.0018, "step": 111440 }, { "epoch": 1.8236112247402438, "grad_norm": 0.049272313714027405, "learning_rate": 3.996846419399181e-06, "loss": 0.0015, "step": 111450 }, { "epoch": 1.8237748506913196, "grad_norm": 0.02194603905081749, "learning_rate": 3.995913853358887e-06, "loss": 0.0021, "step": 111460 }, { "epoch": 1.8239384766423954, "grad_norm": 0.09400305896997452, "learning_rate": 3.994981323714304e-06, "loss": 0.0008, "step": 111470 }, { "epoch": 1.8241021025934714, "grad_norm": 0.1379026621580124, "learning_rate": 3.994048830499232e-06, "loss": 0.0022, "step": 111480 }, { "epoch": 1.8242657285445472, "grad_norm": 0.08610210567712784, "learning_rate": 3.993116373747472e-06, "loss": 0.0014, "step": 111490 }, { "epoch": 1.824429354495623, "grad_norm": 0.05595541000366211, "learning_rate": 3.992183953492824e-06, "loss": 0.0008, "step": 111500 }, { "epoch": 1.824592980446699, "grad_norm": 0.06122478470206261, "learning_rate": 3.991251569769085e-06, "loss": 0.0018, "step": 111510 }, { "epoch": 1.8247566063977747, "grad_norm": 0.08093955367803574, "learning_rate": 3.990319222610053e-06, "loss": 0.0013, "step": 111520 }, { "epoch": 1.8249202323488505, "grad_norm": 0.11188928782939911, "learning_rate": 3.989386912049521e-06, "loss": 0.0012, "step": 111530 }, { "epoch": 1.8250838582999265, "grad_norm": 0.09362109750509262, "learning_rate": 3.988454638121286e-06, "loss": 0.0015, "step": 111540 }, { "epoch": 1.8252474842510023, "grad_norm": 0.047657836228609085, "learning_rate": 3.98752240085914e-06, "loss": 0.0016, "step": 111550 }, { "epoch": 1.825411110202078, "grad_norm": 0.06392499059438705, "learning_rate": 3.986590200296872e-06, "loss": 0.0013, "step": 111560 }, { "epoch": 1.825574736153154, "grad_norm": 0.021842509508132935, "learning_rate": 3.985658036468273e-06, "loss": 0.0009, "step": 111570 }, { "epoch": 1.8257383621042296, "grad_norm": 0.06912890821695328, "learning_rate": 3.984725909407133e-06, "loss": 0.0014, "step": 111580 }, { "epoch": 1.8259019880553056, "grad_norm": 0.062236666679382324, "learning_rate": 3.983793819147239e-06, "loss": 0.0014, "step": 111590 }, { "epoch": 1.8260656140063816, "grad_norm": 0.051780059933662415, "learning_rate": 3.9828617657223765e-06, "loss": 0.0018, "step": 111600 }, { "epoch": 1.8262292399574571, "grad_norm": 0.37786462903022766, "learning_rate": 3.98192974916633e-06, "loss": 0.0019, "step": 111610 }, { "epoch": 1.8263928659085331, "grad_norm": 0.07932049036026001, "learning_rate": 3.980997769512882e-06, "loss": 0.0009, "step": 111620 }, { "epoch": 1.8265564918596089, "grad_norm": 0.08780483901500702, "learning_rate": 3.980065826795817e-06, "loss": 0.0024, "step": 111630 }, { "epoch": 1.8267201178106847, "grad_norm": 0.14719057083129883, "learning_rate": 3.979133921048915e-06, "loss": 0.0014, "step": 111640 }, { "epoch": 1.8268837437617607, "grad_norm": 0.024466460570693016, "learning_rate": 3.9782020523059565e-06, "loss": 0.0017, "step": 111650 }, { "epoch": 1.8270473697128364, "grad_norm": 0.0958031713962555, "learning_rate": 3.977270220600715e-06, "loss": 0.0024, "step": 111660 }, { "epoch": 1.8272109956639122, "grad_norm": 0.14239314198493958, "learning_rate": 3.976338425966971e-06, "loss": 0.0017, "step": 111670 }, { "epoch": 1.8273746216149882, "grad_norm": 0.13566423952579498, "learning_rate": 3.975406668438496e-06, "loss": 0.0032, "step": 111680 }, { "epoch": 1.827538247566064, "grad_norm": 0.07592587172985077, "learning_rate": 3.9744749480490695e-06, "loss": 0.0011, "step": 111690 }, { "epoch": 1.8277018735171398, "grad_norm": 0.11134257912635803, "learning_rate": 3.973543264832461e-06, "loss": 0.0009, "step": 111700 }, { "epoch": 1.8278654994682157, "grad_norm": 0.12962402403354645, "learning_rate": 3.972611618822441e-06, "loss": 0.0013, "step": 111710 }, { "epoch": 1.8280291254192915, "grad_norm": 0.025920966640114784, "learning_rate": 3.9716800100527826e-06, "loss": 0.0012, "step": 111720 }, { "epoch": 1.8281927513703673, "grad_norm": 0.04941943287849426, "learning_rate": 3.970748438557252e-06, "loss": 0.0015, "step": 111730 }, { "epoch": 1.8283563773214433, "grad_norm": 0.0675387755036354, "learning_rate": 3.9698169043696155e-06, "loss": 0.0016, "step": 111740 }, { "epoch": 1.828520003272519, "grad_norm": 0.17346258461475372, "learning_rate": 3.968885407523641e-06, "loss": 0.0017, "step": 111750 }, { "epoch": 1.8286836292235948, "grad_norm": 0.3269699811935425, "learning_rate": 3.967953948053093e-06, "loss": 0.0027, "step": 111760 }, { "epoch": 1.8288472551746708, "grad_norm": 0.21526245772838593, "learning_rate": 3.967022525991733e-06, "loss": 0.002, "step": 111770 }, { "epoch": 1.8290108811257464, "grad_norm": 0.12804865837097168, "learning_rate": 3.966091141373323e-06, "loss": 0.0019, "step": 111780 }, { "epoch": 1.8291745070768224, "grad_norm": 0.06192389875650406, "learning_rate": 3.965159794231625e-06, "loss": 0.0017, "step": 111790 }, { "epoch": 1.8293381330278984, "grad_norm": 0.06430929154157639, "learning_rate": 3.964228484600397e-06, "loss": 0.0014, "step": 111800 }, { "epoch": 1.829501758978974, "grad_norm": 0.03203820809721947, "learning_rate": 3.963297212513397e-06, "loss": 0.0011, "step": 111810 }, { "epoch": 1.82966538493005, "grad_norm": 0.19966986775398254, "learning_rate": 3.962365978004381e-06, "loss": 0.0016, "step": 111820 }, { "epoch": 1.8298290108811257, "grad_norm": 0.05639839172363281, "learning_rate": 3.961434781107105e-06, "loss": 0.0013, "step": 111830 }, { "epoch": 1.8299926368322015, "grad_norm": 0.04205803573131561, "learning_rate": 3.960503621855322e-06, "loss": 0.0023, "step": 111840 }, { "epoch": 1.8301562627832775, "grad_norm": 0.1549540013074875, "learning_rate": 3.959572500282786e-06, "loss": 0.0013, "step": 111850 }, { "epoch": 1.8303198887343533, "grad_norm": 0.022245367988944054, "learning_rate": 3.958641416423245e-06, "loss": 0.0018, "step": 111860 }, { "epoch": 1.830483514685429, "grad_norm": 0.10699019581079483, "learning_rate": 3.957710370310449e-06, "loss": 0.0013, "step": 111870 }, { "epoch": 1.830647140636505, "grad_norm": 0.05376095697283745, "learning_rate": 3.956779361978148e-06, "loss": 0.0013, "step": 111880 }, { "epoch": 1.8308107665875808, "grad_norm": 0.09463055431842804, "learning_rate": 3.955848391460087e-06, "loss": 0.0014, "step": 111890 }, { "epoch": 1.8309743925386566, "grad_norm": 0.023398658260703087, "learning_rate": 3.954917458790011e-06, "loss": 0.0021, "step": 111900 }, { "epoch": 1.8311380184897326, "grad_norm": 0.16249677538871765, "learning_rate": 3.9539865640016675e-06, "loss": 0.0012, "step": 111910 }, { "epoch": 1.8313016444408083, "grad_norm": 0.08558373898267746, "learning_rate": 3.953055707128796e-06, "loss": 0.0014, "step": 111920 }, { "epoch": 1.8314652703918841, "grad_norm": 0.04150815308094025, "learning_rate": 3.952124888205139e-06, "loss": 0.001, "step": 111930 }, { "epoch": 1.8316288963429601, "grad_norm": 0.039937574416399, "learning_rate": 3.951194107264436e-06, "loss": 0.0016, "step": 111940 }, { "epoch": 1.8317925222940359, "grad_norm": 0.18946243822574615, "learning_rate": 3.95026336434043e-06, "loss": 0.0025, "step": 111950 }, { "epoch": 1.8319561482451117, "grad_norm": 0.023021534085273743, "learning_rate": 3.9493326594668494e-06, "loss": 0.0018, "step": 111960 }, { "epoch": 1.8321197741961877, "grad_norm": 0.22383436560630798, "learning_rate": 3.9484019926774356e-06, "loss": 0.0016, "step": 111970 }, { "epoch": 1.8322834001472632, "grad_norm": 0.07325699180364609, "learning_rate": 3.947471364005922e-06, "loss": 0.0018, "step": 111980 }, { "epoch": 1.8324470260983392, "grad_norm": 0.09470678120851517, "learning_rate": 3.946540773486042e-06, "loss": 0.0017, "step": 111990 }, { "epoch": 1.8326106520494152, "grad_norm": 0.07208456844091415, "learning_rate": 3.945610221151527e-06, "loss": 0.0011, "step": 112000 }, { "epoch": 1.8327742780004908, "grad_norm": 0.22049634158611298, "learning_rate": 3.944679707036107e-06, "loss": 0.0031, "step": 112010 }, { "epoch": 1.8329379039515667, "grad_norm": 0.04909619316458702, "learning_rate": 3.943749231173512e-06, "loss": 0.0012, "step": 112020 }, { "epoch": 1.8331015299026425, "grad_norm": 0.07673922926187515, "learning_rate": 3.942818793597468e-06, "loss": 0.0019, "step": 112030 }, { "epoch": 1.8332651558537183, "grad_norm": 0.14284048974514008, "learning_rate": 3.941888394341703e-06, "loss": 0.0018, "step": 112040 }, { "epoch": 1.8334287818047943, "grad_norm": 0.046910837292671204, "learning_rate": 3.94095803343994e-06, "loss": 0.0013, "step": 112050 }, { "epoch": 1.83359240775587, "grad_norm": 0.08056025952100754, "learning_rate": 3.940027710925903e-06, "loss": 0.0013, "step": 112060 }, { "epoch": 1.8337560337069458, "grad_norm": 0.06476296484470367, "learning_rate": 3.9390974268333134e-06, "loss": 0.0021, "step": 112070 }, { "epoch": 1.8339196596580218, "grad_norm": 0.14994403719902039, "learning_rate": 3.938167181195893e-06, "loss": 0.0014, "step": 112080 }, { "epoch": 1.8340832856090976, "grad_norm": 0.09715993702411652, "learning_rate": 3.93723697404736e-06, "loss": 0.0017, "step": 112090 }, { "epoch": 1.8342469115601734, "grad_norm": 0.12101032584905624, "learning_rate": 3.936306805421432e-06, "loss": 0.0009, "step": 112100 }, { "epoch": 1.8344105375112494, "grad_norm": 0.08441198617219925, "learning_rate": 3.935376675351826e-06, "loss": 0.0013, "step": 112110 }, { "epoch": 1.8345741634623252, "grad_norm": 0.13522681593894958, "learning_rate": 3.934446583872256e-06, "loss": 0.0029, "step": 112120 }, { "epoch": 1.834737789413401, "grad_norm": 0.34163525700569153, "learning_rate": 3.933516531016437e-06, "loss": 0.002, "step": 112130 }, { "epoch": 1.834901415364477, "grad_norm": 0.09370183944702148, "learning_rate": 3.9325865168180806e-06, "loss": 0.0011, "step": 112140 }, { "epoch": 1.8350650413155525, "grad_norm": 0.05973367393016815, "learning_rate": 3.9316565413109e-06, "loss": 0.0016, "step": 112150 }, { "epoch": 1.8352286672666285, "grad_norm": 0.047129448503255844, "learning_rate": 3.930726604528599e-06, "loss": 0.0009, "step": 112160 }, { "epoch": 1.8353922932177045, "grad_norm": 0.06735831499099731, "learning_rate": 3.929796706504889e-06, "loss": 0.0019, "step": 112170 }, { "epoch": 1.83555591916878, "grad_norm": 0.03659762814640999, "learning_rate": 3.928866847273476e-06, "loss": 0.0015, "step": 112180 }, { "epoch": 1.835719545119856, "grad_norm": 0.1317969709634781, "learning_rate": 3.927937026868065e-06, "loss": 0.0016, "step": 112190 }, { "epoch": 1.8358831710709318, "grad_norm": 0.08199972659349442, "learning_rate": 3.927007245322362e-06, "loss": 0.0008, "step": 112200 }, { "epoch": 1.8360467970220076, "grad_norm": 0.24865856766700745, "learning_rate": 3.926077502670066e-06, "loss": 0.0019, "step": 112210 }, { "epoch": 1.8362104229730836, "grad_norm": 0.01620643213391304, "learning_rate": 3.92514779894488e-06, "loss": 0.001, "step": 112220 }, { "epoch": 1.8363740489241593, "grad_norm": 0.06472311168909073, "learning_rate": 3.924218134180503e-06, "loss": 0.0014, "step": 112230 }, { "epoch": 1.8365376748752351, "grad_norm": 0.04937063530087471, "learning_rate": 3.923288508410633e-06, "loss": 0.0019, "step": 112240 }, { "epoch": 1.8367013008263111, "grad_norm": 0.06868140399456024, "learning_rate": 3.9223589216689674e-06, "loss": 0.0009, "step": 112250 }, { "epoch": 1.8368649267773869, "grad_norm": 0.04069497808814049, "learning_rate": 3.9214293739892e-06, "loss": 0.0014, "step": 112260 }, { "epoch": 1.8370285527284627, "grad_norm": 0.11491206288337708, "learning_rate": 3.9204998654050256e-06, "loss": 0.0009, "step": 112270 }, { "epoch": 1.8371921786795387, "grad_norm": 0.06248701736330986, "learning_rate": 3.919570395950137e-06, "loss": 0.0029, "step": 112280 }, { "epoch": 1.8373558046306144, "grad_norm": 0.07207425683736801, "learning_rate": 3.9186409656582245e-06, "loss": 0.0015, "step": 112290 }, { "epoch": 1.8375194305816902, "grad_norm": 0.06867527216672897, "learning_rate": 3.9177115745629775e-06, "loss": 0.0012, "step": 112300 }, { "epoch": 1.8376830565327662, "grad_norm": 0.38233765959739685, "learning_rate": 3.916782222698085e-06, "loss": 0.0018, "step": 112310 }, { "epoch": 1.837846682483842, "grad_norm": 0.07279100269079208, "learning_rate": 3.915852910097234e-06, "loss": 0.0015, "step": 112320 }, { "epoch": 1.8380103084349178, "grad_norm": 0.1465032696723938, "learning_rate": 3.914923636794109e-06, "loss": 0.0012, "step": 112330 }, { "epoch": 1.8381739343859937, "grad_norm": 0.08444641530513763, "learning_rate": 3.913994402822393e-06, "loss": 0.0012, "step": 112340 }, { "epoch": 1.8383375603370693, "grad_norm": 0.11615827679634094, "learning_rate": 3.913065208215773e-06, "loss": 0.001, "step": 112350 }, { "epoch": 1.8385011862881453, "grad_norm": 0.1292015016078949, "learning_rate": 3.912136053007925e-06, "loss": 0.0015, "step": 112360 }, { "epoch": 1.8386648122392213, "grad_norm": 0.05482890456914902, "learning_rate": 3.91120693723253e-06, "loss": 0.0011, "step": 112370 }, { "epoch": 1.8388284381902968, "grad_norm": 0.05896975100040436, "learning_rate": 3.910277860923267e-06, "loss": 0.0014, "step": 112380 }, { "epoch": 1.8389920641413728, "grad_norm": 0.0504162423312664, "learning_rate": 3.9093488241138125e-06, "loss": 0.0008, "step": 112390 }, { "epoch": 1.8391556900924486, "grad_norm": 0.19282062351703644, "learning_rate": 3.90841982683784e-06, "loss": 0.0018, "step": 112400 }, { "epoch": 1.8393193160435244, "grad_norm": 0.030025051906704903, "learning_rate": 3.907490869129028e-06, "loss": 0.0011, "step": 112410 }, { "epoch": 1.8394829419946004, "grad_norm": 0.051199305802583694, "learning_rate": 3.906561951021044e-06, "loss": 0.0014, "step": 112420 }, { "epoch": 1.8396465679456762, "grad_norm": 0.00431101955473423, "learning_rate": 3.905633072547564e-06, "loss": 0.0021, "step": 112430 }, { "epoch": 1.839810193896752, "grad_norm": 0.07762044668197632, "learning_rate": 3.904704233742252e-06, "loss": 0.0019, "step": 112440 }, { "epoch": 1.839973819847828, "grad_norm": 0.07841220498085022, "learning_rate": 3.903775434638782e-06, "loss": 0.001, "step": 112450 }, { "epoch": 1.8401374457989037, "grad_norm": 0.025246400386095047, "learning_rate": 3.902846675270816e-06, "loss": 0.0012, "step": 112460 }, { "epoch": 1.8403010717499795, "grad_norm": 0.03706952929496765, "learning_rate": 3.901917955672021e-06, "loss": 0.0006, "step": 112470 }, { "epoch": 1.8404646977010555, "grad_norm": 0.18933212757110596, "learning_rate": 3.900989275876061e-06, "loss": 0.0018, "step": 112480 }, { "epoch": 1.8406283236521312, "grad_norm": 0.02917795069515705, "learning_rate": 3.900060635916599e-06, "loss": 0.0014, "step": 112490 }, { "epoch": 1.840791949603207, "grad_norm": 0.1295352280139923, "learning_rate": 3.8991320358272945e-06, "loss": 0.0013, "step": 112500 }, { "epoch": 1.840955575554283, "grad_norm": 0.05583084374666214, "learning_rate": 3.898203475641808e-06, "loss": 0.0009, "step": 112510 }, { "epoch": 1.8411192015053588, "grad_norm": 0.09758180379867554, "learning_rate": 3.897274955393798e-06, "loss": 0.0023, "step": 112520 }, { "epoch": 1.8412828274564346, "grad_norm": 0.07284069806337357, "learning_rate": 3.89634647511692e-06, "loss": 0.0012, "step": 112530 }, { "epoch": 1.8414464534075106, "grad_norm": 0.012880626134574413, "learning_rate": 3.89541803484483e-06, "loss": 0.0029, "step": 112540 }, { "epoch": 1.8416100793585861, "grad_norm": 0.010092221200466156, "learning_rate": 3.894489634611182e-06, "loss": 0.0014, "step": 112550 }, { "epoch": 1.8417737053096621, "grad_norm": 0.0824425220489502, "learning_rate": 3.893561274449627e-06, "loss": 0.0012, "step": 112560 }, { "epoch": 1.841937331260738, "grad_norm": 0.14740385115146637, "learning_rate": 3.892632954393817e-06, "loss": 0.0012, "step": 112570 }, { "epoch": 1.8421009572118137, "grad_norm": 0.11079197376966476, "learning_rate": 3.8917046744774e-06, "loss": 0.0006, "step": 112580 }, { "epoch": 1.8422645831628897, "grad_norm": 0.05790552496910095, "learning_rate": 3.890776434734025e-06, "loss": 0.0026, "step": 112590 }, { "epoch": 1.8424282091139654, "grad_norm": 0.03640960156917572, "learning_rate": 3.8898482351973385e-06, "loss": 0.0016, "step": 112600 }, { "epoch": 1.8425918350650412, "grad_norm": 0.15015466511249542, "learning_rate": 3.888920075900985e-06, "loss": 0.0021, "step": 112610 }, { "epoch": 1.8427554610161172, "grad_norm": 0.16366706788539886, "learning_rate": 3.8879919568786086e-06, "loss": 0.0015, "step": 112620 }, { "epoch": 1.842919086967193, "grad_norm": 0.07634207606315613, "learning_rate": 3.887063878163851e-06, "loss": 0.0012, "step": 112630 }, { "epoch": 1.8430827129182688, "grad_norm": 0.1957336962223053, "learning_rate": 3.886135839790353e-06, "loss": 0.0009, "step": 112640 }, { "epoch": 1.8432463388693447, "grad_norm": 0.04755368083715439, "learning_rate": 3.885207841791756e-06, "loss": 0.0016, "step": 112650 }, { "epoch": 1.8434099648204205, "grad_norm": 0.02451455406844616, "learning_rate": 3.8842798842016935e-06, "loss": 0.0017, "step": 112660 }, { "epoch": 1.8435735907714963, "grad_norm": 0.10452144593000412, "learning_rate": 3.8833519670538026e-06, "loss": 0.0014, "step": 112670 }, { "epoch": 1.8437372167225723, "grad_norm": 0.1028767079114914, "learning_rate": 3.88242409038172e-06, "loss": 0.0025, "step": 112680 }, { "epoch": 1.843900842673648, "grad_norm": 0.01705436035990715, "learning_rate": 3.881496254219078e-06, "loss": 0.0017, "step": 112690 }, { "epoch": 1.8440644686247238, "grad_norm": 0.1025237888097763, "learning_rate": 3.880568458599508e-06, "loss": 0.0023, "step": 112700 }, { "epoch": 1.8442280945757998, "grad_norm": 0.044633958488702774, "learning_rate": 3.879640703556641e-06, "loss": 0.0015, "step": 112710 }, { "epoch": 1.8443917205268756, "grad_norm": 0.08035072684288025, "learning_rate": 3.878712989124107e-06, "loss": 0.0005, "step": 112720 }, { "epoch": 1.8445553464779514, "grad_norm": 0.013839014805853367, "learning_rate": 3.877785315335531e-06, "loss": 0.0013, "step": 112730 }, { "epoch": 1.8447189724290274, "grad_norm": 0.043414004147052765, "learning_rate": 3.8768576822245405e-06, "loss": 0.0018, "step": 112740 }, { "epoch": 1.844882598380103, "grad_norm": 0.08317239582538605, "learning_rate": 3.875930089824761e-06, "loss": 0.0013, "step": 112750 }, { "epoch": 1.845046224331179, "grad_norm": 0.16005107760429382, "learning_rate": 3.875002538169812e-06, "loss": 0.0013, "step": 112760 }, { "epoch": 1.845209850282255, "grad_norm": 0.06475251913070679, "learning_rate": 3.874075027293318e-06, "loss": 0.0015, "step": 112770 }, { "epoch": 1.8453734762333305, "grad_norm": 0.13961884379386902, "learning_rate": 3.873147557228899e-06, "loss": 0.0008, "step": 112780 }, { "epoch": 1.8455371021844065, "grad_norm": 0.14066645503044128, "learning_rate": 3.872220128010171e-06, "loss": 0.0014, "step": 112790 }, { "epoch": 1.8457007281354822, "grad_norm": 0.0874023362994194, "learning_rate": 3.871292739670754e-06, "loss": 0.0022, "step": 112800 }, { "epoch": 1.845864354086558, "grad_norm": 0.14040616154670715, "learning_rate": 3.8703653922442615e-06, "loss": 0.0009, "step": 112810 }, { "epoch": 1.846027980037634, "grad_norm": 0.38856273889541626, "learning_rate": 3.86943808576431e-06, "loss": 0.0014, "step": 112820 }, { "epoch": 1.8461916059887098, "grad_norm": 0.01496563758701086, "learning_rate": 3.868510820264508e-06, "loss": 0.0011, "step": 112830 }, { "epoch": 1.8463552319397856, "grad_norm": 0.068240687251091, "learning_rate": 3.8675835957784715e-06, "loss": 0.0021, "step": 112840 }, { "epoch": 1.8465188578908616, "grad_norm": 0.07026727497577667, "learning_rate": 3.86665641233981e-06, "loss": 0.0014, "step": 112850 }, { "epoch": 1.8466824838419373, "grad_norm": 0.14649271965026855, "learning_rate": 3.8657292699821265e-06, "loss": 0.0009, "step": 112860 }, { "epoch": 1.8468461097930131, "grad_norm": 0.2210388481616974, "learning_rate": 3.864802168739032e-06, "loss": 0.0021, "step": 112870 }, { "epoch": 1.847009735744089, "grad_norm": 0.21292605996131897, "learning_rate": 3.86387510864413e-06, "loss": 0.0012, "step": 112880 }, { "epoch": 1.8471733616951649, "grad_norm": 0.06951511651277542, "learning_rate": 3.8629480897310235e-06, "loss": 0.0012, "step": 112890 }, { "epoch": 1.8473369876462407, "grad_norm": 0.06008486449718475, "learning_rate": 3.862021112033317e-06, "loss": 0.0017, "step": 112900 }, { "epoch": 1.8475006135973167, "grad_norm": 0.08109413832426071, "learning_rate": 3.86109417558461e-06, "loss": 0.0022, "step": 112910 }, { "epoch": 1.8476642395483924, "grad_norm": 0.04310496151447296, "learning_rate": 3.860167280418502e-06, "loss": 0.0015, "step": 112920 }, { "epoch": 1.8478278654994682, "grad_norm": 0.048203244805336, "learning_rate": 3.8592404265685905e-06, "loss": 0.0017, "step": 112930 }, { "epoch": 1.8479914914505442, "grad_norm": 0.10649536550045013, "learning_rate": 3.858313614068472e-06, "loss": 0.0013, "step": 112940 }, { "epoch": 1.8481551174016198, "grad_norm": 0.005922969430685043, "learning_rate": 3.857386842951742e-06, "loss": 0.0006, "step": 112950 }, { "epoch": 1.8483187433526957, "grad_norm": 0.15154141187667847, "learning_rate": 3.856460113251992e-06, "loss": 0.0017, "step": 112960 }, { "epoch": 1.8484823693037717, "grad_norm": 0.15933385491371155, "learning_rate": 3.855533425002814e-06, "loss": 0.001, "step": 112970 }, { "epoch": 1.8486459952548473, "grad_norm": 0.23300373554229736, "learning_rate": 3.854606778237799e-06, "loss": 0.0023, "step": 112980 }, { "epoch": 1.8488096212059233, "grad_norm": 0.2681264877319336, "learning_rate": 3.853680172990535e-06, "loss": 0.0011, "step": 112990 }, { "epoch": 1.848973247156999, "grad_norm": 0.06967673450708389, "learning_rate": 3.85275360929461e-06, "loss": 0.0011, "step": 113000 }, { "epoch": 1.8491368731080748, "grad_norm": 0.12202316522598267, "learning_rate": 3.8518270871836094e-06, "loss": 0.0025, "step": 113010 }, { "epoch": 1.8493004990591508, "grad_norm": 0.06977274268865585, "learning_rate": 3.850900606691117e-06, "loss": 0.0016, "step": 113020 }, { "epoch": 1.8494641250102266, "grad_norm": 0.13219143450260162, "learning_rate": 3.849974167850715e-06, "loss": 0.0014, "step": 113030 }, { "epoch": 1.8496277509613024, "grad_norm": 0.005337636452168226, "learning_rate": 3.849047770695987e-06, "loss": 0.0008, "step": 113040 }, { "epoch": 1.8497913769123784, "grad_norm": 0.05751505866646767, "learning_rate": 3.848121415260512e-06, "loss": 0.001, "step": 113050 }, { "epoch": 1.8499550028634542, "grad_norm": 0.04354952648282051, "learning_rate": 3.8471951015778655e-06, "loss": 0.0019, "step": 113060 }, { "epoch": 1.85011862881453, "grad_norm": 0.05468391999602318, "learning_rate": 3.8462688296816254e-06, "loss": 0.0012, "step": 113070 }, { "epoch": 1.850282254765606, "grad_norm": 0.08674248307943344, "learning_rate": 3.845342599605368e-06, "loss": 0.0014, "step": 113080 }, { "epoch": 1.8504458807166817, "grad_norm": 0.12483728677034378, "learning_rate": 3.844416411382667e-06, "loss": 0.0021, "step": 113090 }, { "epoch": 1.8506095066677575, "grad_norm": 0.013608505949378014, "learning_rate": 3.843490265047092e-06, "loss": 0.0021, "step": 113100 }, { "epoch": 1.8507731326188335, "grad_norm": 0.11857075989246368, "learning_rate": 3.842564160632216e-06, "loss": 0.0019, "step": 113110 }, { "epoch": 1.850936758569909, "grad_norm": 0.18585699796676636, "learning_rate": 3.841638098171607e-06, "loss": 0.0019, "step": 113120 }, { "epoch": 1.851100384520985, "grad_norm": 0.15069429576396942, "learning_rate": 3.8407120776988335e-06, "loss": 0.0016, "step": 113130 }, { "epoch": 1.851264010472061, "grad_norm": 0.015933746472001076, "learning_rate": 3.83978609924746e-06, "loss": 0.0014, "step": 113140 }, { "epoch": 1.8514276364231366, "grad_norm": 0.03865732252597809, "learning_rate": 3.838860162851054e-06, "loss": 0.001, "step": 113150 }, { "epoch": 1.8515912623742126, "grad_norm": 0.05710527300834656, "learning_rate": 3.837934268543176e-06, "loss": 0.0006, "step": 113160 }, { "epoch": 1.8517548883252883, "grad_norm": 0.09011007845401764, "learning_rate": 3.837008416357386e-06, "loss": 0.0015, "step": 113170 }, { "epoch": 1.8519185142763641, "grad_norm": 0.4016435444355011, "learning_rate": 3.836082606327247e-06, "loss": 0.0024, "step": 113180 }, { "epoch": 1.85208214022744, "grad_norm": 0.10283300280570984, "learning_rate": 3.835156838486316e-06, "loss": 0.001, "step": 113190 }, { "epoch": 1.8522457661785159, "grad_norm": 0.15302906930446625, "learning_rate": 3.83423111286815e-06, "loss": 0.001, "step": 113200 }, { "epoch": 1.8524093921295917, "grad_norm": 0.045707181096076965, "learning_rate": 3.833305429506303e-06, "loss": 0.0013, "step": 113210 }, { "epoch": 1.8525730180806677, "grad_norm": 0.06647147238254547, "learning_rate": 3.832379788434331e-06, "loss": 0.0011, "step": 113220 }, { "epoch": 1.8527366440317434, "grad_norm": 0.2728255093097687, "learning_rate": 3.831454189685786e-06, "loss": 0.0026, "step": 113230 }, { "epoch": 1.8529002699828192, "grad_norm": 0.10586028546094894, "learning_rate": 3.830528633294218e-06, "loss": 0.0019, "step": 113240 }, { "epoch": 1.8530638959338952, "grad_norm": 0.12289302051067352, "learning_rate": 3.829603119293176e-06, "loss": 0.002, "step": 113250 }, { "epoch": 1.853227521884971, "grad_norm": 0.025495300069451332, "learning_rate": 3.828677647716209e-06, "loss": 0.0015, "step": 113260 }, { "epoch": 1.8533911478360467, "grad_norm": 0.083602674305439, "learning_rate": 3.827752218596861e-06, "loss": 0.0014, "step": 113270 }, { "epoch": 1.8535547737871227, "grad_norm": 0.14927873015403748, "learning_rate": 3.826826831968677e-06, "loss": 0.002, "step": 113280 }, { "epoch": 1.8537183997381985, "grad_norm": 0.10858125239610672, "learning_rate": 3.825901487865201e-06, "loss": 0.0013, "step": 113290 }, { "epoch": 1.8538820256892743, "grad_norm": 0.24482282996177673, "learning_rate": 3.824976186319975e-06, "loss": 0.0013, "step": 113300 }, { "epoch": 1.8540456516403503, "grad_norm": 0.10449431091547012, "learning_rate": 3.824050927366537e-06, "loss": 0.0017, "step": 113310 }, { "epoch": 1.8542092775914258, "grad_norm": 0.053672026842832565, "learning_rate": 3.823125711038427e-06, "loss": 0.0011, "step": 113320 }, { "epoch": 1.8543729035425018, "grad_norm": 0.05485406890511513, "learning_rate": 3.822200537369181e-06, "loss": 0.0019, "step": 113330 }, { "epoch": 1.8545365294935778, "grad_norm": 0.1184929832816124, "learning_rate": 3.821275406392336e-06, "loss": 0.0013, "step": 113340 }, { "epoch": 1.8547001554446534, "grad_norm": 0.08440351486206055, "learning_rate": 3.8203503181414234e-06, "loss": 0.0015, "step": 113350 }, { "epoch": 1.8548637813957294, "grad_norm": 0.3196423649787903, "learning_rate": 3.819425272649979e-06, "loss": 0.0013, "step": 113360 }, { "epoch": 1.8550274073468052, "grad_norm": 0.1023973599076271, "learning_rate": 3.81850026995153e-06, "loss": 0.0012, "step": 113370 }, { "epoch": 1.855191033297881, "grad_norm": 0.019896909594535828, "learning_rate": 3.817575310079605e-06, "loss": 0.0007, "step": 113380 }, { "epoch": 1.855354659248957, "grad_norm": 0.021599577739834785, "learning_rate": 3.816650393067732e-06, "loss": 0.0011, "step": 113390 }, { "epoch": 1.8555182852000327, "grad_norm": 0.10887371748685837, "learning_rate": 3.81572551894944e-06, "loss": 0.0009, "step": 113400 }, { "epoch": 1.8556819111511085, "grad_norm": 0.028830768540501595, "learning_rate": 3.814800687758252e-06, "loss": 0.0006, "step": 113410 }, { "epoch": 1.8558455371021845, "grad_norm": 0.03024921752512455, "learning_rate": 3.8138758995276896e-06, "loss": 0.0024, "step": 113420 }, { "epoch": 1.8560091630532602, "grad_norm": 0.024848848581314087, "learning_rate": 3.812951154291276e-06, "loss": 0.0012, "step": 113430 }, { "epoch": 1.856172789004336, "grad_norm": 0.06721098721027374, "learning_rate": 3.812026452082529e-06, "loss": 0.0019, "step": 113440 }, { "epoch": 1.856336414955412, "grad_norm": 0.09745202958583832, "learning_rate": 3.8111017929349685e-06, "loss": 0.0016, "step": 113450 }, { "epoch": 1.8565000409064878, "grad_norm": 0.027345167472958565, "learning_rate": 3.8101771768821117e-06, "loss": 0.0008, "step": 113460 }, { "epoch": 1.8566636668575636, "grad_norm": 0.12136715650558472, "learning_rate": 3.809252603957472e-06, "loss": 0.0015, "step": 113470 }, { "epoch": 1.8568272928086396, "grad_norm": 0.03409392014145851, "learning_rate": 3.808328074194562e-06, "loss": 0.0018, "step": 113480 }, { "epoch": 1.8569909187597153, "grad_norm": 0.04928814619779587, "learning_rate": 3.807403587626897e-06, "loss": 0.0018, "step": 113490 }, { "epoch": 1.857154544710791, "grad_norm": 0.0720706507563591, "learning_rate": 3.806479144287984e-06, "loss": 0.0015, "step": 113500 }, { "epoch": 1.857318170661867, "grad_norm": 0.048940613865852356, "learning_rate": 3.8055547442113345e-06, "loss": 0.001, "step": 113510 }, { "epoch": 1.8574817966129427, "grad_norm": 0.019793879240751266, "learning_rate": 3.8046303874304543e-06, "loss": 0.0015, "step": 113520 }, { "epoch": 1.8576454225640187, "grad_norm": 0.10107264667749405, "learning_rate": 3.8037060739788495e-06, "loss": 0.001, "step": 113530 }, { "epoch": 1.8578090485150947, "grad_norm": 0.06543976068496704, "learning_rate": 3.8027818038900245e-06, "loss": 0.0008, "step": 113540 }, { "epoch": 1.8579726744661702, "grad_norm": 0.04555689916014671, "learning_rate": 3.8018575771974808e-06, "loss": 0.0013, "step": 113550 }, { "epoch": 1.8581363004172462, "grad_norm": 0.031223345547914505, "learning_rate": 3.800933393934723e-06, "loss": 0.0016, "step": 113560 }, { "epoch": 1.858299926368322, "grad_norm": 0.07470271736383438, "learning_rate": 3.800009254135246e-06, "loss": 0.001, "step": 113570 }, { "epoch": 1.8584635523193977, "grad_norm": 0.0019687185995280743, "learning_rate": 3.7990851578325496e-06, "loss": 0.0017, "step": 113580 }, { "epoch": 1.8586271782704737, "grad_norm": 0.1276399940252304, "learning_rate": 3.7981611050601298e-06, "loss": 0.0008, "step": 113590 }, { "epoch": 1.8587908042215495, "grad_norm": 0.18228983879089355, "learning_rate": 3.797237095851481e-06, "loss": 0.0017, "step": 113600 }, { "epoch": 1.8589544301726253, "grad_norm": 0.08215165138244629, "learning_rate": 3.7963131302400957e-06, "loss": 0.0018, "step": 113610 }, { "epoch": 1.8591180561237013, "grad_norm": 0.04695281758904457, "learning_rate": 3.7953892082594677e-06, "loss": 0.0016, "step": 113620 }, { "epoch": 1.859281682074777, "grad_norm": 0.19269028306007385, "learning_rate": 3.7944653299430854e-06, "loss": 0.0012, "step": 113630 }, { "epoch": 1.8594453080258528, "grad_norm": 0.03540990501642227, "learning_rate": 3.7935414953244382e-06, "loss": 0.0014, "step": 113640 }, { "epoch": 1.8596089339769288, "grad_norm": 0.0897429808974266, "learning_rate": 3.7926177044370116e-06, "loss": 0.0008, "step": 113650 }, { "epoch": 1.8597725599280046, "grad_norm": 0.17248032987117767, "learning_rate": 3.791693957314293e-06, "loss": 0.002, "step": 113660 }, { "epoch": 1.8599361858790804, "grad_norm": 0.09790355712175369, "learning_rate": 3.790770253989763e-06, "loss": 0.0009, "step": 113670 }, { "epoch": 1.8600998118301564, "grad_norm": 0.08103250712156296, "learning_rate": 3.789846594496904e-06, "loss": 0.0013, "step": 113680 }, { "epoch": 1.8602634377812322, "grad_norm": 0.08564095944166183, "learning_rate": 3.7889229788691985e-06, "loss": 0.0015, "step": 113690 }, { "epoch": 1.860427063732308, "grad_norm": 0.07448884099721909, "learning_rate": 3.787999407140124e-06, "loss": 0.0012, "step": 113700 }, { "epoch": 1.860590689683384, "grad_norm": 0.026719890534877777, "learning_rate": 3.7870758793431584e-06, "loss": 0.0014, "step": 113710 }, { "epoch": 1.8607543156344595, "grad_norm": 0.04840150102972984, "learning_rate": 3.786152395511776e-06, "loss": 0.0017, "step": 113720 }, { "epoch": 1.8609179415855355, "grad_norm": 0.05359083414077759, "learning_rate": 3.785228955679453e-06, "loss": 0.0011, "step": 113730 }, { "epoch": 1.8610815675366115, "grad_norm": 0.30011770129203796, "learning_rate": 3.78430555987966e-06, "loss": 0.0016, "step": 113740 }, { "epoch": 1.861245193487687, "grad_norm": 0.08568993210792542, "learning_rate": 3.7833822081458687e-06, "loss": 0.0017, "step": 113750 }, { "epoch": 1.861408819438763, "grad_norm": 0.06905089318752289, "learning_rate": 3.7824589005115497e-06, "loss": 0.0007, "step": 113760 }, { "epoch": 1.8615724453898388, "grad_norm": 0.10211402177810669, "learning_rate": 3.781535637010168e-06, "loss": 0.0016, "step": 113770 }, { "epoch": 1.8617360713409146, "grad_norm": 0.1789311319589615, "learning_rate": 3.7806124176751906e-06, "loss": 0.0013, "step": 113780 }, { "epoch": 1.8618996972919906, "grad_norm": 0.04372971132397652, "learning_rate": 3.779689242540083e-06, "loss": 0.0018, "step": 113790 }, { "epoch": 1.8620633232430663, "grad_norm": 0.10091143846511841, "learning_rate": 3.778766111638307e-06, "loss": 0.0034, "step": 113800 }, { "epoch": 1.8622269491941421, "grad_norm": 0.008842662908136845, "learning_rate": 3.7778430250033244e-06, "loss": 0.001, "step": 113810 }, { "epoch": 1.862390575145218, "grad_norm": 0.1413860321044922, "learning_rate": 3.7769199826685937e-06, "loss": 0.0013, "step": 113820 }, { "epoch": 1.8625542010962939, "grad_norm": 0.10416331887245178, "learning_rate": 3.7759969846675746e-06, "loss": 0.001, "step": 113830 }, { "epoch": 1.8627178270473697, "grad_norm": 0.1822335422039032, "learning_rate": 3.775074031033724e-06, "loss": 0.0018, "step": 113840 }, { "epoch": 1.8628814529984457, "grad_norm": 0.051955800503492355, "learning_rate": 3.774151121800495e-06, "loss": 0.0013, "step": 113850 }, { "epoch": 1.8630450789495214, "grad_norm": 0.07419634610414505, "learning_rate": 3.773228257001344e-06, "loss": 0.0009, "step": 113860 }, { "epoch": 1.8632087049005972, "grad_norm": 0.08996762335300446, "learning_rate": 3.772305436669718e-06, "loss": 0.0011, "step": 113870 }, { "epoch": 1.8633723308516732, "grad_norm": 0.008551595732569695, "learning_rate": 3.771382660839068e-06, "loss": 0.001, "step": 113880 }, { "epoch": 1.8635359568027487, "grad_norm": 0.15793809294700623, "learning_rate": 3.770459929542845e-06, "loss": 0.0011, "step": 113890 }, { "epoch": 1.8636995827538247, "grad_norm": 0.07449346780776978, "learning_rate": 3.7695372428144945e-06, "loss": 0.0023, "step": 113900 }, { "epoch": 1.8638632087049007, "grad_norm": 0.05696192756295204, "learning_rate": 3.7686146006874615e-06, "loss": 0.0011, "step": 113910 }, { "epoch": 1.8640268346559763, "grad_norm": 0.04130572825670242, "learning_rate": 3.7676920031951897e-06, "loss": 0.0026, "step": 113920 }, { "epoch": 1.8641904606070523, "grad_norm": 0.16821983456611633, "learning_rate": 3.7667694503711207e-06, "loss": 0.0024, "step": 113930 }, { "epoch": 1.864354086558128, "grad_norm": 0.028291620314121246, "learning_rate": 3.7658469422486955e-06, "loss": 0.0011, "step": 113940 }, { "epoch": 1.8645177125092038, "grad_norm": 0.023886337876319885, "learning_rate": 3.764924478861352e-06, "loss": 0.0019, "step": 113950 }, { "epoch": 1.8646813384602798, "grad_norm": 0.12927983701229095, "learning_rate": 3.7640020602425294e-06, "loss": 0.0009, "step": 113960 }, { "epoch": 1.8648449644113556, "grad_norm": 0.23467424511909485, "learning_rate": 3.76307968642566e-06, "loss": 0.0012, "step": 113970 }, { "epoch": 1.8650085903624314, "grad_norm": 0.062484048306941986, "learning_rate": 3.7621573574441794e-06, "loss": 0.0013, "step": 113980 }, { "epoch": 1.8651722163135074, "grad_norm": 0.0071717011742293835, "learning_rate": 3.761235073331519e-06, "loss": 0.0014, "step": 113990 }, { "epoch": 1.8653358422645832, "grad_norm": 0.10824783891439438, "learning_rate": 3.7603128341211105e-06, "loss": 0.0013, "step": 114000 }, { "epoch": 1.865499468215659, "grad_norm": 0.10666628926992416, "learning_rate": 3.759390639846382e-06, "loss": 0.0018, "step": 114010 }, { "epoch": 1.865663094166735, "grad_norm": 0.10495135933160782, "learning_rate": 3.758468490540761e-06, "loss": 0.0012, "step": 114020 }, { "epoch": 1.8658267201178107, "grad_norm": 0.0713539868593216, "learning_rate": 3.757546386237673e-06, "loss": 0.0025, "step": 114030 }, { "epoch": 1.8659903460688865, "grad_norm": 0.05254041403532028, "learning_rate": 3.7566243269705415e-06, "loss": 0.0009, "step": 114040 }, { "epoch": 1.8661539720199625, "grad_norm": 0.1140858605504036, "learning_rate": 3.755702312772791e-06, "loss": 0.0013, "step": 114050 }, { "epoch": 1.8663175979710382, "grad_norm": 0.00451473006978631, "learning_rate": 3.7547803436778428e-06, "loss": 0.002, "step": 114060 }, { "epoch": 1.866481223922114, "grad_norm": 0.062445018440485, "learning_rate": 3.753858419719113e-06, "loss": 0.0016, "step": 114070 }, { "epoch": 1.86664484987319, "grad_norm": 0.20512571930885315, "learning_rate": 3.75293654093002e-06, "loss": 0.0012, "step": 114080 }, { "epoch": 1.8668084758242656, "grad_norm": 0.05938255414366722, "learning_rate": 3.75201470734398e-06, "loss": 0.0019, "step": 114090 }, { "epoch": 1.8669721017753416, "grad_norm": 0.027770772576332092, "learning_rate": 3.751092918994407e-06, "loss": 0.0022, "step": 114100 }, { "epoch": 1.8671357277264176, "grad_norm": 0.1386348456144333, "learning_rate": 3.750171175914714e-06, "loss": 0.0016, "step": 114110 }, { "epoch": 1.8672993536774931, "grad_norm": 0.1404191255569458, "learning_rate": 3.7492494781383126e-06, "loss": 0.0013, "step": 114120 }, { "epoch": 1.867462979628569, "grad_norm": 0.15915922820568085, "learning_rate": 3.748327825698611e-06, "loss": 0.0017, "step": 114130 }, { "epoch": 1.8676266055796449, "grad_norm": 0.049886420369148254, "learning_rate": 3.7474062186290183e-06, "loss": 0.0021, "step": 114140 }, { "epoch": 1.8677902315307207, "grad_norm": 0.08974398672580719, "learning_rate": 3.746484656962939e-06, "loss": 0.0013, "step": 114150 }, { "epoch": 1.8679538574817967, "grad_norm": 0.09072622656822205, "learning_rate": 3.7455631407337794e-06, "loss": 0.0012, "step": 114160 }, { "epoch": 1.8681174834328724, "grad_norm": 0.06740576028823853, "learning_rate": 3.74464166997494e-06, "loss": 0.0026, "step": 114170 }, { "epoch": 1.8682811093839482, "grad_norm": 0.045059073716402054, "learning_rate": 3.743720244719823e-06, "loss": 0.0009, "step": 114180 }, { "epoch": 1.8684447353350242, "grad_norm": 0.10823206603527069, "learning_rate": 3.7427988650018275e-06, "loss": 0.0012, "step": 114190 }, { "epoch": 1.8686083612861, "grad_norm": 0.03656313195824623, "learning_rate": 3.741877530854352e-06, "loss": 0.0018, "step": 114200 }, { "epoch": 1.8687719872371757, "grad_norm": 0.06545853614807129, "learning_rate": 3.7409562423107914e-06, "loss": 0.0017, "step": 114210 }, { "epoch": 1.8689356131882517, "grad_norm": 0.07835962623357773, "learning_rate": 3.7400349994045414e-06, "loss": 0.0021, "step": 114220 }, { "epoch": 1.8690992391393275, "grad_norm": 0.410089910030365, "learning_rate": 3.7391138021689943e-06, "loss": 0.0014, "step": 114230 }, { "epoch": 1.8692628650904033, "grad_norm": 0.1445467323064804, "learning_rate": 3.7381926506375417e-06, "loss": 0.0017, "step": 114240 }, { "epoch": 1.8694264910414793, "grad_norm": 0.08186060935258865, "learning_rate": 3.7372715448435727e-06, "loss": 0.001, "step": 114250 }, { "epoch": 1.869590116992555, "grad_norm": 0.08139314502477646, "learning_rate": 3.7363504848204762e-06, "loss": 0.001, "step": 114260 }, { "epoch": 1.8697537429436308, "grad_norm": 0.2628469467163086, "learning_rate": 3.735429470601637e-06, "loss": 0.0018, "step": 114270 }, { "epoch": 1.8699173688947068, "grad_norm": 0.004719309974461794, "learning_rate": 3.7345085022204396e-06, "loss": 0.0006, "step": 114280 }, { "epoch": 1.8700809948457824, "grad_norm": 0.1432575285434723, "learning_rate": 3.7335875797102673e-06, "loss": 0.0032, "step": 114290 }, { "epoch": 1.8702446207968584, "grad_norm": 0.004074563272297382, "learning_rate": 3.7326667031045017e-06, "loss": 0.0014, "step": 114300 }, { "epoch": 1.8704082467479344, "grad_norm": 0.005161906126886606, "learning_rate": 3.731745872436523e-06, "loss": 0.001, "step": 114310 }, { "epoch": 1.87057187269901, "grad_norm": 0.10010579973459244, "learning_rate": 3.730825087739706e-06, "loss": 0.0037, "step": 114320 }, { "epoch": 1.870735498650086, "grad_norm": 0.0828857570886612, "learning_rate": 3.7299043490474305e-06, "loss": 0.0015, "step": 114330 }, { "epoch": 1.8708991246011617, "grad_norm": 0.09110900014638901, "learning_rate": 3.7289836563930705e-06, "loss": 0.0018, "step": 114340 }, { "epoch": 1.8710627505522375, "grad_norm": 0.20596274733543396, "learning_rate": 3.728063009809997e-06, "loss": 0.0012, "step": 114350 }, { "epoch": 1.8712263765033135, "grad_norm": 0.05929534137248993, "learning_rate": 3.7271424093315857e-06, "loss": 0.0009, "step": 114360 }, { "epoch": 1.8713900024543892, "grad_norm": 0.04741894453763962, "learning_rate": 3.7262218549912e-06, "loss": 0.0019, "step": 114370 }, { "epoch": 1.871553628405465, "grad_norm": 0.10395877063274384, "learning_rate": 3.72530134682221e-06, "loss": 0.0016, "step": 114380 }, { "epoch": 1.871717254356541, "grad_norm": 0.1305025815963745, "learning_rate": 3.7243808848579834e-06, "loss": 0.0017, "step": 114390 }, { "epoch": 1.8718808803076168, "grad_norm": 0.0972873792052269, "learning_rate": 3.723460469131883e-06, "loss": 0.0012, "step": 114400 }, { "epoch": 1.8720445062586926, "grad_norm": 0.29801979660987854, "learning_rate": 3.722540099677273e-06, "loss": 0.002, "step": 114410 }, { "epoch": 1.8722081322097686, "grad_norm": 0.122939832508564, "learning_rate": 3.7216197765275146e-06, "loss": 0.0019, "step": 114420 }, { "epoch": 1.8723717581608443, "grad_norm": 0.1650342494249344, "learning_rate": 3.7206994997159667e-06, "loss": 0.0017, "step": 114430 }, { "epoch": 1.87253538411192, "grad_norm": 0.01189844124019146, "learning_rate": 3.719779269275987e-06, "loss": 0.0007, "step": 114440 }, { "epoch": 1.872699010062996, "grad_norm": 0.049906808882951736, "learning_rate": 3.7188590852409324e-06, "loss": 0.0052, "step": 114450 }, { "epoch": 1.8728626360140719, "grad_norm": 0.191539004445076, "learning_rate": 3.717938947644157e-06, "loss": 0.0015, "step": 114460 }, { "epoch": 1.8730262619651477, "grad_norm": 0.08642210066318512, "learning_rate": 3.717018856519012e-06, "loss": 0.001, "step": 114470 }, { "epoch": 1.8731898879162237, "grad_norm": 0.07195541262626648, "learning_rate": 3.716098811898851e-06, "loss": 0.0029, "step": 114480 }, { "epoch": 1.8733535138672992, "grad_norm": 0.1362007111310959, "learning_rate": 3.715178813817022e-06, "loss": 0.0015, "step": 114490 }, { "epoch": 1.8735171398183752, "grad_norm": 0.09035487473011017, "learning_rate": 3.714258862306873e-06, "loss": 0.0062, "step": 114500 }, { "epoch": 1.8736807657694512, "grad_norm": 0.07487788051366806, "learning_rate": 3.7133389574017497e-06, "loss": 0.0009, "step": 114510 }, { "epoch": 1.8738443917205267, "grad_norm": 0.046876538544893265, "learning_rate": 3.7124190991349973e-06, "loss": 0.0011, "step": 114520 }, { "epoch": 1.8740080176716027, "grad_norm": 0.008197380229830742, "learning_rate": 3.711499287539958e-06, "loss": 0.0011, "step": 114530 }, { "epoch": 1.8741716436226785, "grad_norm": 0.15889716148376465, "learning_rate": 3.710579522649971e-06, "loss": 0.0034, "step": 114540 }, { "epoch": 1.8743352695737543, "grad_norm": 0.016329629346728325, "learning_rate": 3.709659804498379e-06, "loss": 0.0029, "step": 114550 }, { "epoch": 1.8744988955248303, "grad_norm": 0.11683212220668793, "learning_rate": 3.708740133118519e-06, "loss": 0.0014, "step": 114560 }, { "epoch": 1.874662521475906, "grad_norm": 0.02318652905523777, "learning_rate": 3.7078205085437246e-06, "loss": 0.0014, "step": 114570 }, { "epoch": 1.8748261474269818, "grad_norm": 0.01382275577634573, "learning_rate": 3.7069009308073306e-06, "loss": 0.0014, "step": 114580 }, { "epoch": 1.8749897733780578, "grad_norm": 0.05621064826846123, "learning_rate": 3.70598139994267e-06, "loss": 0.0015, "step": 114590 }, { "epoch": 1.8751533993291336, "grad_norm": 0.0998888686299324, "learning_rate": 3.7050619159830727e-06, "loss": 0.0014, "step": 114600 }, { "epoch": 1.8753170252802094, "grad_norm": 0.10172632336616516, "learning_rate": 3.70414247896187e-06, "loss": 0.0015, "step": 114610 }, { "epoch": 1.8754806512312854, "grad_norm": 0.08705252408981323, "learning_rate": 3.7032230889123865e-06, "loss": 0.0013, "step": 114620 }, { "epoch": 1.8756442771823612, "grad_norm": 0.03748011961579323, "learning_rate": 3.7023037458679505e-06, "loss": 0.0017, "step": 114630 }, { "epoch": 1.875807903133437, "grad_norm": 0.1621546745300293, "learning_rate": 3.7013844498618845e-06, "loss": 0.0012, "step": 114640 }, { "epoch": 1.875971529084513, "grad_norm": 0.07121383398771286, "learning_rate": 3.700465200927511e-06, "loss": 0.0011, "step": 114650 }, { "epoch": 1.8761351550355887, "grad_norm": 0.058320626616477966, "learning_rate": 3.699545999098152e-06, "loss": 0.0008, "step": 114660 }, { "epoch": 1.8762987809866645, "grad_norm": 0.09125392138957977, "learning_rate": 3.698626844407124e-06, "loss": 0.0008, "step": 114670 }, { "epoch": 1.8764624069377405, "grad_norm": 0.1257014125585556, "learning_rate": 3.6977077368877446e-06, "loss": 0.0014, "step": 114680 }, { "epoch": 1.876626032888816, "grad_norm": 0.1659439206123352, "learning_rate": 3.69678867657333e-06, "loss": 0.0015, "step": 114690 }, { "epoch": 1.876789658839892, "grad_norm": 0.03649391233921051, "learning_rate": 3.6958696634971937e-06, "loss": 0.0017, "step": 114700 }, { "epoch": 1.876953284790968, "grad_norm": 0.15149487555027008, "learning_rate": 3.6949506976926476e-06, "loss": 0.0015, "step": 114710 }, { "epoch": 1.8771169107420436, "grad_norm": 0.04018774628639221, "learning_rate": 3.6940317791930027e-06, "loss": 0.0011, "step": 114720 }, { "epoch": 1.8772805366931196, "grad_norm": 0.08447539806365967, "learning_rate": 3.6931129080315665e-06, "loss": 0.001, "step": 114730 }, { "epoch": 1.8774441626441953, "grad_norm": 0.06348925828933716, "learning_rate": 3.692194084241646e-06, "loss": 0.0013, "step": 114740 }, { "epoch": 1.877607788595271, "grad_norm": 0.045159969478845596, "learning_rate": 3.691275307856547e-06, "loss": 0.0012, "step": 114750 }, { "epoch": 1.877771414546347, "grad_norm": 0.02692187763750553, "learning_rate": 3.690356578909574e-06, "loss": 0.0006, "step": 114760 }, { "epoch": 1.8779350404974229, "grad_norm": 0.15412628650665283, "learning_rate": 3.689437897434026e-06, "loss": 0.0026, "step": 114770 }, { "epoch": 1.8780986664484987, "grad_norm": 0.5776950716972351, "learning_rate": 3.688519263463204e-06, "loss": 0.0013, "step": 114780 }, { "epoch": 1.8782622923995747, "grad_norm": 0.07777441293001175, "learning_rate": 3.6876006770304075e-06, "loss": 0.0013, "step": 114790 }, { "epoch": 1.8784259183506504, "grad_norm": 0.13046696782112122, "learning_rate": 3.6866821381689314e-06, "loss": 0.0016, "step": 114800 }, { "epoch": 1.8785895443017262, "grad_norm": 0.2156224399805069, "learning_rate": 3.6857636469120715e-06, "loss": 0.0019, "step": 114810 }, { "epoch": 1.8787531702528022, "grad_norm": 0.07816867530345917, "learning_rate": 3.6848452032931204e-06, "loss": 0.0013, "step": 114820 }, { "epoch": 1.878916796203878, "grad_norm": 0.027057237923145294, "learning_rate": 3.6839268073453704e-06, "loss": 0.0012, "step": 114830 }, { "epoch": 1.8790804221549537, "grad_norm": 0.016057878732681274, "learning_rate": 3.68300845910211e-06, "loss": 0.0017, "step": 114840 }, { "epoch": 1.8792440481060297, "grad_norm": 0.07097262144088745, "learning_rate": 3.6820901585966284e-06, "loss": 0.002, "step": 114850 }, { "epoch": 1.8794076740571053, "grad_norm": 0.13552403450012207, "learning_rate": 3.681171905862213e-06, "loss": 0.0012, "step": 114860 }, { "epoch": 1.8795713000081813, "grad_norm": 0.45568346977233887, "learning_rate": 3.6802537009321427e-06, "loss": 0.003, "step": 114870 }, { "epoch": 1.8797349259592573, "grad_norm": 0.08118440955877304, "learning_rate": 3.6793355438397048e-06, "loss": 0.0016, "step": 114880 }, { "epoch": 1.8798985519103328, "grad_norm": 0.08286190032958984, "learning_rate": 3.67841743461818e-06, "loss": 0.0012, "step": 114890 }, { "epoch": 1.8800621778614088, "grad_norm": 0.005519147962331772, "learning_rate": 3.6774993733008463e-06, "loss": 0.001, "step": 114900 }, { "epoch": 1.8802258038124846, "grad_norm": 0.005147336516529322, "learning_rate": 3.6765813599209822e-06, "loss": 0.001, "step": 114910 }, { "epoch": 1.8803894297635604, "grad_norm": 0.03634466975927353, "learning_rate": 3.6756633945118626e-06, "loss": 0.0007, "step": 114920 }, { "epoch": 1.8805530557146364, "grad_norm": 0.14535662531852722, "learning_rate": 3.6747454771067624e-06, "loss": 0.0016, "step": 114930 }, { "epoch": 1.8807166816657122, "grad_norm": 0.11837170273065567, "learning_rate": 3.6738276077389534e-06, "loss": 0.0025, "step": 114940 }, { "epoch": 1.880880307616788, "grad_norm": 0.04300757870078087, "learning_rate": 3.672909786441706e-06, "loss": 0.0012, "step": 114950 }, { "epoch": 1.881043933567864, "grad_norm": 0.052255939692258835, "learning_rate": 3.6719920132482902e-06, "loss": 0.0007, "step": 114960 }, { "epoch": 1.8812075595189397, "grad_norm": 0.13350211083889008, "learning_rate": 3.6710742881919713e-06, "loss": 0.001, "step": 114970 }, { "epoch": 1.8813711854700155, "grad_norm": 0.04611939191818237, "learning_rate": 3.6701566113060155e-06, "loss": 0.0013, "step": 114980 }, { "epoch": 1.8815348114210915, "grad_norm": 0.017334578558802605, "learning_rate": 3.669238982623686e-06, "loss": 0.0011, "step": 114990 }, { "epoch": 1.8816984373721672, "grad_norm": 0.17697244882583618, "learning_rate": 3.6683214021782454e-06, "loss": 0.0011, "step": 115000 }, { "epoch": 1.881862063323243, "grad_norm": 0.07738719880580902, "learning_rate": 3.6674038700029537e-06, "loss": 0.0009, "step": 115010 }, { "epoch": 1.882025689274319, "grad_norm": 0.10444359481334686, "learning_rate": 3.6664863861310685e-06, "loss": 0.0027, "step": 115020 }, { "epoch": 1.8821893152253948, "grad_norm": 0.08220594376325607, "learning_rate": 3.6655689505958457e-06, "loss": 0.0012, "step": 115030 }, { "epoch": 1.8823529411764706, "grad_norm": 0.08716414123773575, "learning_rate": 3.6646515634305426e-06, "loss": 0.0019, "step": 115040 }, { "epoch": 1.8825165671275466, "grad_norm": 0.03188503533601761, "learning_rate": 3.6637342246684104e-06, "loss": 0.0008, "step": 115050 }, { "epoch": 1.882680193078622, "grad_norm": 0.0638166293501854, "learning_rate": 3.6628169343427033e-06, "loss": 0.002, "step": 115060 }, { "epoch": 1.882843819029698, "grad_norm": 0.13622552156448364, "learning_rate": 3.6618996924866664e-06, "loss": 0.003, "step": 115070 }, { "epoch": 1.883007444980774, "grad_norm": 0.03605085238814354, "learning_rate": 3.6609824991335495e-06, "loss": 0.0006, "step": 115080 }, { "epoch": 1.8831710709318497, "grad_norm": 0.06659548729658127, "learning_rate": 3.6600653543165974e-06, "loss": 0.0008, "step": 115090 }, { "epoch": 1.8833346968829257, "grad_norm": 0.02931560017168522, "learning_rate": 3.6591482580690575e-06, "loss": 0.0016, "step": 115100 }, { "epoch": 1.8834983228340014, "grad_norm": 0.1813018023967743, "learning_rate": 3.6582312104241703e-06, "loss": 0.0023, "step": 115110 }, { "epoch": 1.8836619487850772, "grad_norm": 0.021614031866192818, "learning_rate": 3.6573142114151767e-06, "loss": 0.0015, "step": 115120 }, { "epoch": 1.8838255747361532, "grad_norm": 0.011086588725447655, "learning_rate": 3.6563972610753156e-06, "loss": 0.0014, "step": 115130 }, { "epoch": 1.883989200687229, "grad_norm": 0.11071640253067017, "learning_rate": 3.6554803594378254e-06, "loss": 0.0021, "step": 115140 }, { "epoch": 1.8841528266383047, "grad_norm": 0.10829813778400421, "learning_rate": 3.65456350653594e-06, "loss": 0.0012, "step": 115150 }, { "epoch": 1.8843164525893807, "grad_norm": 0.04370303824543953, "learning_rate": 3.6536467024028945e-06, "loss": 0.0015, "step": 115160 }, { "epoch": 1.8844800785404565, "grad_norm": 0.101749949157238, "learning_rate": 3.6527299470719204e-06, "loss": 0.0015, "step": 115170 }, { "epoch": 1.8846437044915323, "grad_norm": 0.04757365584373474, "learning_rate": 3.6518132405762463e-06, "loss": 0.0005, "step": 115180 }, { "epoch": 1.8848073304426083, "grad_norm": 0.10030307620763779, "learning_rate": 3.6508965829491027e-06, "loss": 0.0008, "step": 115190 }, { "epoch": 1.884970956393684, "grad_norm": 0.16621261835098267, "learning_rate": 3.6499799742237156e-06, "loss": 0.0012, "step": 115200 }, { "epoch": 1.8851345823447598, "grad_norm": 0.08705494552850723, "learning_rate": 3.6490634144333094e-06, "loss": 0.0022, "step": 115210 }, { "epoch": 1.8852982082958358, "grad_norm": 0.08569933474063873, "learning_rate": 3.648146903611108e-06, "loss": 0.003, "step": 115220 }, { "epoch": 1.8854618342469116, "grad_norm": 0.22078154981136322, "learning_rate": 3.6472304417903315e-06, "loss": 0.0019, "step": 115230 }, { "epoch": 1.8856254601979874, "grad_norm": 0.06548316031694412, "learning_rate": 3.646314029004201e-06, "loss": 0.0017, "step": 115240 }, { "epoch": 1.8857890861490634, "grad_norm": 0.1948816180229187, "learning_rate": 3.6453976652859314e-06, "loss": 0.0023, "step": 115250 }, { "epoch": 1.885952712100139, "grad_norm": 0.029058460146188736, "learning_rate": 3.6444813506687426e-06, "loss": 0.0017, "step": 115260 }, { "epoch": 1.886116338051215, "grad_norm": 0.1261594444513321, "learning_rate": 3.643565085185849e-06, "loss": 0.0018, "step": 115270 }, { "epoch": 1.886279964002291, "grad_norm": 0.0823182463645935, "learning_rate": 3.642648868870459e-06, "loss": 0.0013, "step": 115280 }, { "epoch": 1.8864435899533665, "grad_norm": 0.04459149390459061, "learning_rate": 3.6417327017557848e-06, "loss": 0.0027, "step": 115290 }, { "epoch": 1.8866072159044425, "grad_norm": 0.008105945773422718, "learning_rate": 3.6408165838750355e-06, "loss": 0.0011, "step": 115300 }, { "epoch": 1.8867708418555182, "grad_norm": 0.05859200283885002, "learning_rate": 3.6399005152614176e-06, "loss": 0.0009, "step": 115310 }, { "epoch": 1.886934467806594, "grad_norm": 0.06281672418117523, "learning_rate": 3.6389844959481378e-06, "loss": 0.0014, "step": 115320 }, { "epoch": 1.88709809375767, "grad_norm": 0.028158392757177353, "learning_rate": 3.638068525968399e-06, "loss": 0.0012, "step": 115330 }, { "epoch": 1.8872617197087458, "grad_norm": 0.004508281126618385, "learning_rate": 3.6371526053554028e-06, "loss": 0.0011, "step": 115340 }, { "epoch": 1.8874253456598216, "grad_norm": 0.030568337067961693, "learning_rate": 3.6362367341423495e-06, "loss": 0.0011, "step": 115350 }, { "epoch": 1.8875889716108976, "grad_norm": 0.029163209721446037, "learning_rate": 3.6353209123624367e-06, "loss": 0.0008, "step": 115360 }, { "epoch": 1.8877525975619733, "grad_norm": 0.07090315222740173, "learning_rate": 3.6344051400488623e-06, "loss": 0.0012, "step": 115370 }, { "epoch": 1.887916223513049, "grad_norm": 0.1078358069062233, "learning_rate": 3.633489417234818e-06, "loss": 0.0008, "step": 115380 }, { "epoch": 1.888079849464125, "grad_norm": 0.09491295367479324, "learning_rate": 3.6325737439534977e-06, "loss": 0.0013, "step": 115390 }, { "epoch": 1.8882434754152009, "grad_norm": 0.2326422929763794, "learning_rate": 3.631658120238093e-06, "loss": 0.0023, "step": 115400 }, { "epoch": 1.8884071013662767, "grad_norm": 0.09236391633749008, "learning_rate": 3.6307425461217925e-06, "loss": 0.0017, "step": 115410 }, { "epoch": 1.8885707273173526, "grad_norm": 0.12886789441108704, "learning_rate": 3.629827021637784e-06, "loss": 0.001, "step": 115420 }, { "epoch": 1.8887343532684284, "grad_norm": 0.21762512624263763, "learning_rate": 3.6289115468192527e-06, "loss": 0.0016, "step": 115430 }, { "epoch": 1.8888979792195042, "grad_norm": 0.23086415231227875, "learning_rate": 3.6279961216993833e-06, "loss": 0.0021, "step": 115440 }, { "epoch": 1.8890616051705802, "grad_norm": 0.008762970566749573, "learning_rate": 3.6270807463113556e-06, "loss": 0.0006, "step": 115450 }, { "epoch": 1.8892252311216557, "grad_norm": 0.22121384739875793, "learning_rate": 3.6261654206883524e-06, "loss": 0.0014, "step": 115460 }, { "epoch": 1.8893888570727317, "grad_norm": 0.08388222754001617, "learning_rate": 3.6252501448635508e-06, "loss": 0.001, "step": 115470 }, { "epoch": 1.8895524830238077, "grad_norm": 0.12055619806051254, "learning_rate": 3.6243349188701273e-06, "loss": 0.0022, "step": 115480 }, { "epoch": 1.8897161089748833, "grad_norm": 0.07749462127685547, "learning_rate": 3.623419742741255e-06, "loss": 0.003, "step": 115490 }, { "epoch": 1.8898797349259593, "grad_norm": 0.057838600128889084, "learning_rate": 3.62250461651011e-06, "loss": 0.0013, "step": 115500 }, { "epoch": 1.890043360877035, "grad_norm": 0.004064687993377447, "learning_rate": 3.6215895402098617e-06, "loss": 0.0013, "step": 115510 }, { "epoch": 1.8902069868281108, "grad_norm": 0.06768014281988144, "learning_rate": 3.6206745138736794e-06, "loss": 0.0011, "step": 115520 }, { "epoch": 1.8903706127791868, "grad_norm": 0.08197381347417831, "learning_rate": 3.61975953753473e-06, "loss": 0.0011, "step": 115530 }, { "epoch": 1.8905342387302626, "grad_norm": 0.07014795392751694, "learning_rate": 3.6188446112261812e-06, "loss": 0.0014, "step": 115540 }, { "epoch": 1.8906978646813384, "grad_norm": 0.11544433981180191, "learning_rate": 3.6179297349811947e-06, "loss": 0.0014, "step": 115550 }, { "epoch": 1.8908614906324144, "grad_norm": 0.060647331178188324, "learning_rate": 3.6170149088329346e-06, "loss": 0.0025, "step": 115560 }, { "epoch": 1.8910251165834902, "grad_norm": 0.03720762953162193, "learning_rate": 3.6161001328145624e-06, "loss": 0.0011, "step": 115570 }, { "epoch": 1.891188742534566, "grad_norm": 0.07355286180973053, "learning_rate": 3.6151854069592317e-06, "loss": 0.0008, "step": 115580 }, { "epoch": 1.891352368485642, "grad_norm": 0.11242623627185822, "learning_rate": 3.6142707313001e-06, "loss": 0.0028, "step": 115590 }, { "epoch": 1.8915159944367177, "grad_norm": 0.12988241016864777, "learning_rate": 3.613356105870326e-06, "loss": 0.0017, "step": 115600 }, { "epoch": 1.8916796203877935, "grad_norm": 0.07360272854566574, "learning_rate": 3.6124415307030602e-06, "loss": 0.0012, "step": 115610 }, { "epoch": 1.8918432463388695, "grad_norm": 0.12990425527095795, "learning_rate": 3.611527005831453e-06, "loss": 0.0018, "step": 115620 }, { "epoch": 1.8920068722899452, "grad_norm": 0.11665112525224686, "learning_rate": 3.6106125312886566e-06, "loss": 0.0015, "step": 115630 }, { "epoch": 1.892170498241021, "grad_norm": 0.1733156144618988, "learning_rate": 3.6096981071078154e-06, "loss": 0.0023, "step": 115640 }, { "epoch": 1.892334124192097, "grad_norm": 0.13508737087249756, "learning_rate": 3.6087837333220765e-06, "loss": 0.0015, "step": 115650 }, { "epoch": 1.8924977501431726, "grad_norm": 0.15719018876552582, "learning_rate": 3.6078694099645834e-06, "loss": 0.002, "step": 115660 }, { "epoch": 1.8926613760942486, "grad_norm": 0.06254814565181732, "learning_rate": 3.60695513706848e-06, "loss": 0.0007, "step": 115670 }, { "epoch": 1.8928250020453243, "grad_norm": 0.22196121513843536, "learning_rate": 3.6060409146669028e-06, "loss": 0.0018, "step": 115680 }, { "epoch": 1.8929886279964, "grad_norm": 0.12035829573869705, "learning_rate": 3.6051267427929925e-06, "loss": 0.0012, "step": 115690 }, { "epoch": 1.893152253947476, "grad_norm": 0.05168607085943222, "learning_rate": 3.604212621479885e-06, "loss": 0.0008, "step": 115700 }, { "epoch": 1.8933158798985519, "grad_norm": 0.002085619606077671, "learning_rate": 3.6032985507607155e-06, "loss": 0.0013, "step": 115710 }, { "epoch": 1.8934795058496277, "grad_norm": 0.029281770810484886, "learning_rate": 3.602384530668617e-06, "loss": 0.0022, "step": 115720 }, { "epoch": 1.8936431318007036, "grad_norm": 0.44876983761787415, "learning_rate": 3.6014705612367203e-06, "loss": 0.0023, "step": 115730 }, { "epoch": 1.8938067577517794, "grad_norm": 0.024062059819698334, "learning_rate": 3.600556642498154e-06, "loss": 0.0014, "step": 115740 }, { "epoch": 1.8939703837028552, "grad_norm": 0.06086837127804756, "learning_rate": 3.599642774486045e-06, "loss": 0.001, "step": 115750 }, { "epoch": 1.8941340096539312, "grad_norm": 0.058834198862314224, "learning_rate": 3.5987289572335205e-06, "loss": 0.0013, "step": 115760 }, { "epoch": 1.894297635605007, "grad_norm": 0.07535523921251297, "learning_rate": 3.5978151907737057e-06, "loss": 0.0015, "step": 115770 }, { "epoch": 1.8944612615560827, "grad_norm": 0.09088030457496643, "learning_rate": 3.5969014751397187e-06, "loss": 0.0008, "step": 115780 }, { "epoch": 1.8946248875071587, "grad_norm": 0.09978930652141571, "learning_rate": 3.5959878103646807e-06, "loss": 0.0017, "step": 115790 }, { "epoch": 1.8947885134582345, "grad_norm": 0.06307248771190643, "learning_rate": 3.5950741964817094e-06, "loss": 0.0011, "step": 115800 }, { "epoch": 1.8949521394093103, "grad_norm": 0.05008325353264809, "learning_rate": 3.5941606335239217e-06, "loss": 0.0014, "step": 115810 }, { "epoch": 1.8951157653603863, "grad_norm": 0.04925980791449547, "learning_rate": 3.5932471215244323e-06, "loss": 0.0011, "step": 115820 }, { "epoch": 1.8952793913114618, "grad_norm": 0.0952456146478653, "learning_rate": 3.592333660516355e-06, "loss": 0.0009, "step": 115830 }, { "epoch": 1.8954430172625378, "grad_norm": 0.052398569881916046, "learning_rate": 3.5914202505327977e-06, "loss": 0.0012, "step": 115840 }, { "epoch": 1.8956066432136138, "grad_norm": 0.07709585875272751, "learning_rate": 3.590506891606872e-06, "loss": 0.001, "step": 115850 }, { "epoch": 1.8957702691646894, "grad_norm": 0.19080610573291779, "learning_rate": 3.5895935837716835e-06, "loss": 0.0022, "step": 115860 }, { "epoch": 1.8959338951157654, "grad_norm": 0.10824272036552429, "learning_rate": 3.5886803270603388e-06, "loss": 0.0016, "step": 115870 }, { "epoch": 1.8960975210668412, "grad_norm": 0.1646525114774704, "learning_rate": 3.5877671215059394e-06, "loss": 0.0011, "step": 115880 }, { "epoch": 1.896261147017917, "grad_norm": 0.07289395481348038, "learning_rate": 3.5868539671415874e-06, "loss": 0.0012, "step": 115890 }, { "epoch": 1.896424772968993, "grad_norm": 0.08073840290307999, "learning_rate": 3.5859408640003827e-06, "loss": 0.0019, "step": 115900 }, { "epoch": 1.8965883989200687, "grad_norm": 0.018072491511702538, "learning_rate": 3.5850278121154236e-06, "loss": 0.0016, "step": 115910 }, { "epoch": 1.8967520248711445, "grad_norm": 0.2929452955722809, "learning_rate": 3.584114811519805e-06, "loss": 0.0038, "step": 115920 }, { "epoch": 1.8969156508222205, "grad_norm": 0.03789547085762024, "learning_rate": 3.5832018622466213e-06, "loss": 0.0016, "step": 115930 }, { "epoch": 1.8970792767732962, "grad_norm": 0.04028784856200218, "learning_rate": 3.582288964328965e-06, "loss": 0.0011, "step": 115940 }, { "epoch": 1.897242902724372, "grad_norm": 0.18984396755695343, "learning_rate": 3.5813761177999267e-06, "loss": 0.0014, "step": 115950 }, { "epoch": 1.897406528675448, "grad_norm": 0.04057304561138153, "learning_rate": 3.5804633226925943e-06, "loss": 0.0012, "step": 115960 }, { "epoch": 1.8975701546265238, "grad_norm": 0.0909348577260971, "learning_rate": 3.5795505790400557e-06, "loss": 0.0008, "step": 115970 }, { "epoch": 1.8977337805775996, "grad_norm": 0.020928556099534035, "learning_rate": 3.578637886875394e-06, "loss": 0.0014, "step": 115980 }, { "epoch": 1.8978974065286756, "grad_norm": 0.048062387853860855, "learning_rate": 3.5777252462316927e-06, "loss": 0.001, "step": 115990 }, { "epoch": 1.8980610324797513, "grad_norm": 0.10723450779914856, "learning_rate": 3.5768126571420325e-06, "loss": 0.0017, "step": 116000 }, { "epoch": 1.898224658430827, "grad_norm": 0.019371047616004944, "learning_rate": 3.5759001196394926e-06, "loss": 0.0039, "step": 116010 }, { "epoch": 1.898388284381903, "grad_norm": 0.07669398933649063, "learning_rate": 3.5749876337571513e-06, "loss": 0.002, "step": 116020 }, { "epoch": 1.8985519103329787, "grad_norm": 0.04043760895729065, "learning_rate": 3.574075199528082e-06, "loss": 0.0014, "step": 116030 }, { "epoch": 1.8987155362840547, "grad_norm": 0.09259256720542908, "learning_rate": 3.573162816985361e-06, "loss": 0.0016, "step": 116040 }, { "epoch": 1.8988791622351306, "grad_norm": 0.04458944872021675, "learning_rate": 3.5722504861620584e-06, "loss": 0.0034, "step": 116050 }, { "epoch": 1.8990427881862062, "grad_norm": 0.07175411283969879, "learning_rate": 3.571338207091244e-06, "loss": 0.0036, "step": 116060 }, { "epoch": 1.8992064141372822, "grad_norm": 0.16684776544570923, "learning_rate": 3.5704259798059883e-06, "loss": 0.0023, "step": 116070 }, { "epoch": 1.899370040088358, "grad_norm": 0.06403648853302002, "learning_rate": 3.5695138043393514e-06, "loss": 0.0031, "step": 116080 }, { "epoch": 1.8995336660394337, "grad_norm": 0.29816925525665283, "learning_rate": 3.5686016807244022e-06, "loss": 0.0021, "step": 116090 }, { "epoch": 1.8996972919905097, "grad_norm": 0.09877566993236542, "learning_rate": 3.5676896089942013e-06, "loss": 0.0023, "step": 116100 }, { "epoch": 1.8998609179415855, "grad_norm": 0.07043410837650299, "learning_rate": 3.5667775891818108e-06, "loss": 0.0014, "step": 116110 }, { "epoch": 1.9000245438926613, "grad_norm": 0.10018762946128845, "learning_rate": 3.565865621320287e-06, "loss": 0.0016, "step": 116120 }, { "epoch": 1.9001881698437373, "grad_norm": 0.11627231538295746, "learning_rate": 3.5649537054426885e-06, "loss": 0.0017, "step": 116130 }, { "epoch": 1.900351795794813, "grad_norm": 0.04538247734308243, "learning_rate": 3.564041841582068e-06, "loss": 0.0009, "step": 116140 }, { "epoch": 1.9005154217458888, "grad_norm": 0.0025490394327789545, "learning_rate": 3.5631300297714804e-06, "loss": 0.0018, "step": 116150 }, { "epoch": 1.9006790476969648, "grad_norm": 0.14060045778751373, "learning_rate": 3.562218270043976e-06, "loss": 0.0015, "step": 116160 }, { "epoch": 1.9008426736480406, "grad_norm": 0.02201317623257637, "learning_rate": 3.561306562432604e-06, "loss": 0.0015, "step": 116170 }, { "epoch": 1.9010062995991164, "grad_norm": 0.11906857043504715, "learning_rate": 3.5603949069704103e-06, "loss": 0.0014, "step": 116180 }, { "epoch": 1.9011699255501924, "grad_norm": 0.1848725825548172, "learning_rate": 3.559483303690441e-06, "loss": 0.0012, "step": 116190 }, { "epoch": 1.9013335515012681, "grad_norm": 0.09553230553865433, "learning_rate": 3.5585717526257402e-06, "loss": 0.0009, "step": 116200 }, { "epoch": 1.901497177452344, "grad_norm": 0.14421942830085754, "learning_rate": 3.5576602538093493e-06, "loss": 0.0029, "step": 116210 }, { "epoch": 1.90166080340342, "grad_norm": 0.009725402109324932, "learning_rate": 3.5567488072743073e-06, "loss": 0.0007, "step": 116220 }, { "epoch": 1.9018244293544955, "grad_norm": 0.20262233912944794, "learning_rate": 3.5558374130536523e-06, "loss": 0.0015, "step": 116230 }, { "epoch": 1.9019880553055715, "grad_norm": 0.1150551363825798, "learning_rate": 3.554926071180419e-06, "loss": 0.0067, "step": 116240 }, { "epoch": 1.9021516812566475, "grad_norm": 0.03977881371974945, "learning_rate": 3.5540147816876437e-06, "loss": 0.0011, "step": 116250 }, { "epoch": 1.902315307207723, "grad_norm": 0.06885017454624176, "learning_rate": 3.553103544608357e-06, "loss": 0.0015, "step": 116260 }, { "epoch": 1.902478933158799, "grad_norm": 0.033356595784425735, "learning_rate": 3.5521923599755913e-06, "loss": 0.0008, "step": 116270 }, { "epoch": 1.9026425591098748, "grad_norm": 0.01597157120704651, "learning_rate": 3.5512812278223707e-06, "loss": 0.0008, "step": 116280 }, { "epoch": 1.9028061850609506, "grad_norm": 0.12661388516426086, "learning_rate": 3.550370148181724e-06, "loss": 0.0019, "step": 116290 }, { "epoch": 1.9029698110120266, "grad_norm": 0.11293073743581772, "learning_rate": 3.5494591210866742e-06, "loss": 0.001, "step": 116300 }, { "epoch": 1.9031334369631023, "grad_norm": 0.11791744083166122, "learning_rate": 3.548548146570246e-06, "loss": 0.002, "step": 116310 }, { "epoch": 1.903297062914178, "grad_norm": 0.01586984470486641, "learning_rate": 3.547637224665459e-06, "loss": 0.0013, "step": 116320 }, { "epoch": 1.903460688865254, "grad_norm": 0.16123881936073303, "learning_rate": 3.5467263554053322e-06, "loss": 0.0028, "step": 116330 }, { "epoch": 1.9036243148163299, "grad_norm": 0.011089896783232689, "learning_rate": 3.5458155388228817e-06, "loss": 0.0011, "step": 116340 }, { "epoch": 1.9037879407674057, "grad_norm": 0.07788226753473282, "learning_rate": 3.5449047749511234e-06, "loss": 0.001, "step": 116350 }, { "epoch": 1.9039515667184816, "grad_norm": 0.0635981559753418, "learning_rate": 3.543994063823069e-06, "loss": 0.0009, "step": 116360 }, { "epoch": 1.9041151926695574, "grad_norm": 0.03099675104022026, "learning_rate": 3.5430834054717324e-06, "loss": 0.0008, "step": 116370 }, { "epoch": 1.9042788186206332, "grad_norm": 0.08618183434009552, "learning_rate": 3.5421727999301187e-06, "loss": 0.0007, "step": 116380 }, { "epoch": 1.9044424445717092, "grad_norm": 0.00502800801768899, "learning_rate": 3.5412622472312373e-06, "loss": 0.001, "step": 116390 }, { "epoch": 1.904606070522785, "grad_norm": 0.03535205498337746, "learning_rate": 3.540351747408094e-06, "loss": 0.0023, "step": 116400 }, { "epoch": 1.9047696964738607, "grad_norm": 0.0633096769452095, "learning_rate": 3.5394413004936917e-06, "loss": 0.0011, "step": 116410 }, { "epoch": 1.9049333224249367, "grad_norm": 0.05483037233352661, "learning_rate": 3.5385309065210317e-06, "loss": 0.0011, "step": 116420 }, { "epoch": 1.9050969483760123, "grad_norm": 0.1297806203365326, "learning_rate": 3.5376205655231138e-06, "loss": 0.0018, "step": 116430 }, { "epoch": 1.9052605743270883, "grad_norm": 0.03207574039697647, "learning_rate": 3.536710277532936e-06, "loss": 0.0013, "step": 116440 }, { "epoch": 1.9054242002781643, "grad_norm": 0.052945345640182495, "learning_rate": 3.535800042583493e-06, "loss": 0.0025, "step": 116450 }, { "epoch": 1.9055878262292398, "grad_norm": 0.09225403517484665, "learning_rate": 3.5348898607077785e-06, "loss": 0.0019, "step": 116460 }, { "epoch": 1.9057514521803158, "grad_norm": 0.14315487444400787, "learning_rate": 3.5339797319387893e-06, "loss": 0.0018, "step": 116470 }, { "epoch": 1.9059150781313916, "grad_norm": 0.16204451024532318, "learning_rate": 3.533069656309508e-06, "loss": 0.0033, "step": 116480 }, { "epoch": 1.9060787040824674, "grad_norm": 0.06964494287967682, "learning_rate": 3.532159633852927e-06, "loss": 0.0015, "step": 116490 }, { "epoch": 1.9062423300335434, "grad_norm": 0.12034778296947479, "learning_rate": 3.5312496646020307e-06, "loss": 0.0017, "step": 116500 }, { "epoch": 1.9064059559846191, "grad_norm": 0.11584047973155975, "learning_rate": 3.530339748589804e-06, "loss": 0.0013, "step": 116510 }, { "epoch": 1.906569581935695, "grad_norm": 0.045869264751672745, "learning_rate": 3.5294298858492284e-06, "loss": 0.0006, "step": 116520 }, { "epoch": 1.906733207886771, "grad_norm": 0.15335826575756073, "learning_rate": 3.5285200764132866e-06, "loss": 0.0014, "step": 116530 }, { "epoch": 1.9068968338378467, "grad_norm": 0.14240601658821106, "learning_rate": 3.527610320314955e-06, "loss": 0.0009, "step": 116540 }, { "epoch": 1.9070604597889225, "grad_norm": 0.07842226326465607, "learning_rate": 3.52670061758721e-06, "loss": 0.0024, "step": 116550 }, { "epoch": 1.9072240857399985, "grad_norm": 0.03699515014886856, "learning_rate": 3.5257909682630266e-06, "loss": 0.0014, "step": 116560 }, { "epoch": 1.9073877116910742, "grad_norm": 0.06172734871506691, "learning_rate": 3.52488137237538e-06, "loss": 0.0005, "step": 116570 }, { "epoch": 1.90755133764215, "grad_norm": 0.004461097065359354, "learning_rate": 3.523971829957235e-06, "loss": 0.0012, "step": 116580 }, { "epoch": 1.907714963593226, "grad_norm": 0.05300891399383545, "learning_rate": 3.523062341041565e-06, "loss": 0.0015, "step": 116590 }, { "epoch": 1.9078785895443016, "grad_norm": 0.05722389370203018, "learning_rate": 3.5221529056613346e-06, "loss": 0.0016, "step": 116600 }, { "epoch": 1.9080422154953776, "grad_norm": 0.16748911142349243, "learning_rate": 3.5212435238495104e-06, "loss": 0.0016, "step": 116610 }, { "epoch": 1.9082058414464536, "grad_norm": 0.3486364185810089, "learning_rate": 3.5203341956390534e-06, "loss": 0.0012, "step": 116620 }, { "epoch": 1.908369467397529, "grad_norm": 0.2486005425453186, "learning_rate": 3.5194249210629263e-06, "loss": 0.0023, "step": 116630 }, { "epoch": 1.908533093348605, "grad_norm": 0.06751266121864319, "learning_rate": 3.518515700154087e-06, "loss": 0.0016, "step": 116640 }, { "epoch": 1.9086967192996809, "grad_norm": 0.20044063031673431, "learning_rate": 3.5176065329454933e-06, "loss": 0.0011, "step": 116650 }, { "epoch": 1.9088603452507567, "grad_norm": 0.0877494141459465, "learning_rate": 3.5166974194700997e-06, "loss": 0.0013, "step": 116660 }, { "epoch": 1.9090239712018326, "grad_norm": 0.08999335020780563, "learning_rate": 3.5157883597608606e-06, "loss": 0.0017, "step": 116670 }, { "epoch": 1.9091875971529084, "grad_norm": 0.14329926669597626, "learning_rate": 3.514879353850725e-06, "loss": 0.0013, "step": 116680 }, { "epoch": 1.9093512231039842, "grad_norm": 0.2274893969297409, "learning_rate": 3.513970401772644e-06, "loss": 0.002, "step": 116690 }, { "epoch": 1.9095148490550602, "grad_norm": 0.024614742025732994, "learning_rate": 3.513061503559564e-06, "loss": 0.0009, "step": 116700 }, { "epoch": 1.909678475006136, "grad_norm": 0.0431903600692749, "learning_rate": 3.512152659244431e-06, "loss": 0.0011, "step": 116710 }, { "epoch": 1.9098421009572117, "grad_norm": 0.1141614317893982, "learning_rate": 3.511243868860188e-06, "loss": 0.0017, "step": 116720 }, { "epoch": 1.9100057269082877, "grad_norm": 0.051052387803792953, "learning_rate": 3.510335132439776e-06, "loss": 0.0013, "step": 116730 }, { "epoch": 1.9101693528593635, "grad_norm": 0.09419436007738113, "learning_rate": 3.5094264500161345e-06, "loss": 0.0029, "step": 116740 }, { "epoch": 1.9103329788104393, "grad_norm": 0.06832196563482285, "learning_rate": 3.5085178216222034e-06, "loss": 0.0008, "step": 116750 }, { "epoch": 1.9104966047615153, "grad_norm": 0.12940138578414917, "learning_rate": 3.5076092472909163e-06, "loss": 0.001, "step": 116760 }, { "epoch": 1.910660230712591, "grad_norm": 0.11513227969408035, "learning_rate": 3.5067007270552084e-06, "loss": 0.0011, "step": 116770 }, { "epoch": 1.9108238566636668, "grad_norm": 0.048090241849422455, "learning_rate": 3.505792260948009e-06, "loss": 0.0013, "step": 116780 }, { "epoch": 1.9109874826147428, "grad_norm": 0.03949848562479019, "learning_rate": 3.5048838490022487e-06, "loss": 0.0019, "step": 116790 }, { "epoch": 1.9111511085658184, "grad_norm": 0.050484053790569305, "learning_rate": 3.503975491250854e-06, "loss": 0.0018, "step": 116800 }, { "epoch": 1.9113147345168944, "grad_norm": 0.03547117859125137, "learning_rate": 3.503067187726753e-06, "loss": 0.0018, "step": 116810 }, { "epoch": 1.9114783604679704, "grad_norm": 0.06423182040452957, "learning_rate": 3.5021589384628692e-06, "loss": 0.0009, "step": 116820 }, { "epoch": 1.911641986419046, "grad_norm": 0.032825957983732224, "learning_rate": 3.501250743492124e-06, "loss": 0.0017, "step": 116830 }, { "epoch": 1.911805612370122, "grad_norm": 0.1677391529083252, "learning_rate": 3.5003426028474365e-06, "loss": 0.0016, "step": 116840 }, { "epoch": 1.9119692383211977, "grad_norm": 0.20630770921707153, "learning_rate": 3.499434516561726e-06, "loss": 0.001, "step": 116850 }, { "epoch": 1.9121328642722735, "grad_norm": 0.04731212183833122, "learning_rate": 3.4985264846679075e-06, "loss": 0.0021, "step": 116860 }, { "epoch": 1.9122964902233495, "grad_norm": 0.13591893017292023, "learning_rate": 3.497618507198896e-06, "loss": 0.0023, "step": 116870 }, { "epoch": 1.9124601161744252, "grad_norm": 0.028104446828365326, "learning_rate": 3.496710584187602e-06, "loss": 0.0011, "step": 116880 }, { "epoch": 1.912623742125501, "grad_norm": 0.039137497544288635, "learning_rate": 3.4958027156669362e-06, "loss": 0.0016, "step": 116890 }, { "epoch": 1.912787368076577, "grad_norm": 0.03131349757313728, "learning_rate": 3.4948949016698068e-06, "loss": 0.0013, "step": 116900 }, { "epoch": 1.9129509940276528, "grad_norm": 0.062079157680273056, "learning_rate": 3.4939871422291195e-06, "loss": 0.0013, "step": 116910 }, { "epoch": 1.9131146199787286, "grad_norm": 0.08098719269037247, "learning_rate": 3.493079437377779e-06, "loss": 0.0011, "step": 116920 }, { "epoch": 1.9132782459298046, "grad_norm": 0.032798703759908676, "learning_rate": 3.492171787148687e-06, "loss": 0.0016, "step": 116930 }, { "epoch": 1.9134418718808803, "grad_norm": 0.00832432322204113, "learning_rate": 3.491264191574744e-06, "loss": 0.0008, "step": 116940 }, { "epoch": 1.913605497831956, "grad_norm": 0.07371308654546738, "learning_rate": 3.4903566506888476e-06, "loss": 0.0009, "step": 116950 }, { "epoch": 1.913769123783032, "grad_norm": 0.06439722329378128, "learning_rate": 3.4894491645238937e-06, "loss": 0.0009, "step": 116960 }, { "epoch": 1.9139327497341079, "grad_norm": 0.08005999773740768, "learning_rate": 3.4885417331127797e-06, "loss": 0.0009, "step": 116970 }, { "epoch": 1.9140963756851836, "grad_norm": 0.09819091856479645, "learning_rate": 3.487634356488393e-06, "loss": 0.0013, "step": 116980 }, { "epoch": 1.9142600016362596, "grad_norm": 0.06988803297281265, "learning_rate": 3.4867270346836263e-06, "loss": 0.0013, "step": 116990 }, { "epoch": 1.9144236275873352, "grad_norm": 0.022273756563663483, "learning_rate": 3.4858197677313676e-06, "loss": 0.001, "step": 117000 }, { "epoch": 1.9145872535384112, "grad_norm": 0.07267189770936966, "learning_rate": 3.4849125556645026e-06, "loss": 0.0016, "step": 117010 }, { "epoch": 1.9147508794894872, "grad_norm": 0.07633133977651596, "learning_rate": 3.4840053985159154e-06, "loss": 0.0012, "step": 117020 }, { "epoch": 1.9149145054405627, "grad_norm": 0.0614902563393116, "learning_rate": 3.4830982963184902e-06, "loss": 0.0011, "step": 117030 }, { "epoch": 1.9150781313916387, "grad_norm": 0.046877384185791016, "learning_rate": 3.4821912491051056e-06, "loss": 0.0013, "step": 117040 }, { "epoch": 1.9152417573427145, "grad_norm": 0.03472324460744858, "learning_rate": 3.4812842569086403e-06, "loss": 0.0024, "step": 117050 }, { "epoch": 1.9154053832937903, "grad_norm": 0.12010905891656876, "learning_rate": 3.4803773197619707e-06, "loss": 0.0018, "step": 117060 }, { "epoch": 1.9155690092448663, "grad_norm": 0.18073269724845886, "learning_rate": 3.4794704376979714e-06, "loss": 0.0035, "step": 117070 }, { "epoch": 1.915732635195942, "grad_norm": 0.12548808753490448, "learning_rate": 3.478563610749514e-06, "loss": 0.0026, "step": 117080 }, { "epoch": 1.9158962611470178, "grad_norm": 0.05737784877419472, "learning_rate": 3.477656838949469e-06, "loss": 0.0019, "step": 117090 }, { "epoch": 1.9160598870980938, "grad_norm": 0.04905271530151367, "learning_rate": 3.476750122330704e-06, "loss": 0.0012, "step": 117100 }, { "epoch": 1.9162235130491696, "grad_norm": 0.10038364678621292, "learning_rate": 3.4758434609260865e-06, "loss": 0.0015, "step": 117110 }, { "epoch": 1.9163871390002454, "grad_norm": 0.03029683418571949, "learning_rate": 3.4749368547684804e-06, "loss": 0.0013, "step": 117120 }, { "epoch": 1.9165507649513214, "grad_norm": 0.16878052055835724, "learning_rate": 3.474030303890748e-06, "loss": 0.0009, "step": 117130 }, { "epoch": 1.9167143909023971, "grad_norm": 0.0847473070025444, "learning_rate": 3.473123808325749e-06, "loss": 0.0013, "step": 117140 }, { "epoch": 1.916878016853473, "grad_norm": 0.0683387890458107, "learning_rate": 3.472217368106343e-06, "loss": 0.0009, "step": 117150 }, { "epoch": 1.917041642804549, "grad_norm": 0.0396602600812912, "learning_rate": 3.4713109832653846e-06, "loss": 0.001, "step": 117160 }, { "epoch": 1.9172052687556247, "grad_norm": 0.18097415566444397, "learning_rate": 3.4704046538357294e-06, "loss": 0.0016, "step": 117170 }, { "epoch": 1.9173688947067005, "grad_norm": 0.13552826642990112, "learning_rate": 3.4694983798502302e-06, "loss": 0.0009, "step": 117180 }, { "epoch": 1.9175325206577765, "grad_norm": 0.013414356857538223, "learning_rate": 3.468592161341735e-06, "loss": 0.0016, "step": 117190 }, { "epoch": 1.917696146608852, "grad_norm": 0.051815301179885864, "learning_rate": 3.4676859983430938e-06, "loss": 0.0012, "step": 117200 }, { "epoch": 1.917859772559928, "grad_norm": 0.02268894761800766, "learning_rate": 3.466779890887152e-06, "loss": 0.0015, "step": 117210 }, { "epoch": 1.918023398511004, "grad_norm": 0.029339473694562912, "learning_rate": 3.465873839006755e-06, "loss": 0.0009, "step": 117220 }, { "epoch": 1.9181870244620796, "grad_norm": 0.34612491726875305, "learning_rate": 3.464967842734743e-06, "loss": 0.002, "step": 117230 }, { "epoch": 1.9183506504131556, "grad_norm": 0.14712712168693542, "learning_rate": 3.464061902103957e-06, "loss": 0.0015, "step": 117240 }, { "epoch": 1.9185142763642313, "grad_norm": 0.1953924298286438, "learning_rate": 3.4631560171472357e-06, "loss": 0.0026, "step": 117250 }, { "epoch": 1.918677902315307, "grad_norm": 0.02603316865861416, "learning_rate": 3.462250187897416e-06, "loss": 0.0012, "step": 117260 }, { "epoch": 1.918841528266383, "grad_norm": 0.0692998468875885, "learning_rate": 3.4613444143873306e-06, "loss": 0.0012, "step": 117270 }, { "epoch": 1.9190051542174589, "grad_norm": 0.056674063205718994, "learning_rate": 3.460438696649814e-06, "loss": 0.0019, "step": 117280 }, { "epoch": 1.9191687801685346, "grad_norm": 0.05686596408486366, "learning_rate": 3.459533034717692e-06, "loss": 0.0015, "step": 117290 }, { "epoch": 1.9193324061196106, "grad_norm": 0.030991658568382263, "learning_rate": 3.4586274286237955e-06, "loss": 0.0007, "step": 117300 }, { "epoch": 1.9194960320706864, "grad_norm": 0.030475223436951637, "learning_rate": 3.45772187840095e-06, "loss": 0.001, "step": 117310 }, { "epoch": 1.9196596580217622, "grad_norm": 0.08460620045661926, "learning_rate": 3.4568163840819803e-06, "loss": 0.0014, "step": 117320 }, { "epoch": 1.9198232839728382, "grad_norm": 0.00464920699596405, "learning_rate": 3.4559109456997077e-06, "loss": 0.0012, "step": 117330 }, { "epoch": 1.919986909923914, "grad_norm": 0.061519816517829895, "learning_rate": 3.455005563286952e-06, "loss": 0.0019, "step": 117340 }, { "epoch": 1.9201505358749897, "grad_norm": 0.04516362026333809, "learning_rate": 3.454100236876531e-06, "loss": 0.0011, "step": 117350 }, { "epoch": 1.9203141618260657, "grad_norm": 0.018166685476899147, "learning_rate": 3.453194966501261e-06, "loss": 0.0008, "step": 117360 }, { "epoch": 1.9204777877771415, "grad_norm": 0.014799079857766628, "learning_rate": 3.452289752193956e-06, "loss": 0.0012, "step": 117370 }, { "epoch": 1.9206414137282173, "grad_norm": 0.5212557315826416, "learning_rate": 3.451384593987429e-06, "loss": 0.0077, "step": 117380 }, { "epoch": 1.9208050396792933, "grad_norm": 0.053820863366127014, "learning_rate": 3.450479491914487e-06, "loss": 0.0005, "step": 117390 }, { "epoch": 1.9209686656303688, "grad_norm": 0.05057581514120102, "learning_rate": 3.4495744460079395e-06, "loss": 0.0012, "step": 117400 }, { "epoch": 1.9211322915814448, "grad_norm": 0.17194992303848267, "learning_rate": 3.448669456300592e-06, "loss": 0.0015, "step": 117410 }, { "epoch": 1.9212959175325206, "grad_norm": 0.0869787335395813, "learning_rate": 3.447764522825248e-06, "loss": 0.0019, "step": 117420 }, { "epoch": 1.9214595434835964, "grad_norm": 0.07992877811193466, "learning_rate": 3.4468596456147096e-06, "loss": 0.001, "step": 117430 }, { "epoch": 1.9216231694346724, "grad_norm": 0.10534562915563583, "learning_rate": 3.445954824701776e-06, "loss": 0.0011, "step": 117440 }, { "epoch": 1.9217867953857481, "grad_norm": 0.2773364782333374, "learning_rate": 3.445050060119244e-06, "loss": 0.0023, "step": 117450 }, { "epoch": 1.921950421336824, "grad_norm": 0.1982908993959427, "learning_rate": 3.4441453518999107e-06, "loss": 0.0016, "step": 117460 }, { "epoch": 1.9221140472879, "grad_norm": 0.027084222063422203, "learning_rate": 3.4432407000765694e-06, "loss": 0.0011, "step": 117470 }, { "epoch": 1.9222776732389757, "grad_norm": 0.13308580219745636, "learning_rate": 3.4423361046820137e-06, "loss": 0.0014, "step": 117480 }, { "epoch": 1.9224412991900515, "grad_norm": 0.09366597980260849, "learning_rate": 3.4414315657490273e-06, "loss": 0.0012, "step": 117490 }, { "epoch": 1.9226049251411275, "grad_norm": 0.013624492101371288, "learning_rate": 3.440527083310401e-06, "loss": 0.0013, "step": 117500 }, { "epoch": 1.9227685510922032, "grad_norm": 0.3883621394634247, "learning_rate": 3.4396226573989195e-06, "loss": 0.0015, "step": 117510 }, { "epoch": 1.922932177043279, "grad_norm": 0.005440632347017527, "learning_rate": 3.438718288047367e-06, "loss": 0.001, "step": 117520 }, { "epoch": 1.923095802994355, "grad_norm": 0.2455538958311081, "learning_rate": 3.4378139752885243e-06, "loss": 0.0015, "step": 117530 }, { "epoch": 1.9232594289454308, "grad_norm": 0.0019826283678412437, "learning_rate": 3.43690971915517e-06, "loss": 0.0008, "step": 117540 }, { "epoch": 1.9234230548965066, "grad_norm": 0.11283384263515472, "learning_rate": 3.436005519680082e-06, "loss": 0.0019, "step": 117550 }, { "epoch": 1.9235866808475826, "grad_norm": 0.08573871105909348, "learning_rate": 3.4351013768960345e-06, "loss": 0.0018, "step": 117560 }, { "epoch": 1.923750306798658, "grad_norm": 0.07458851486444473, "learning_rate": 3.434197290835801e-06, "loss": 0.0012, "step": 117570 }, { "epoch": 1.923913932749734, "grad_norm": 0.03993265703320503, "learning_rate": 3.4332932615321534e-06, "loss": 0.0018, "step": 117580 }, { "epoch": 1.92407755870081, "grad_norm": 0.006638629361987114, "learning_rate": 3.432389289017859e-06, "loss": 0.0011, "step": 117590 }, { "epoch": 1.9242411846518856, "grad_norm": 0.1080513596534729, "learning_rate": 3.431485373325685e-06, "loss": 0.0012, "step": 117600 }, { "epoch": 1.9244048106029616, "grad_norm": 0.04473069682717323, "learning_rate": 3.4305815144883965e-06, "loss": 0.0011, "step": 117610 }, { "epoch": 1.9245684365540374, "grad_norm": 0.013832427561283112, "learning_rate": 3.4296777125387557e-06, "loss": 0.0024, "step": 117620 }, { "epoch": 1.9247320625051132, "grad_norm": 0.05827764421701431, "learning_rate": 3.4287739675095244e-06, "loss": 0.0029, "step": 117630 }, { "epoch": 1.9248956884561892, "grad_norm": 0.26668936014175415, "learning_rate": 3.4278702794334595e-06, "loss": 0.0014, "step": 117640 }, { "epoch": 1.925059314407265, "grad_norm": 0.029696675017476082, "learning_rate": 3.4269666483433195e-06, "loss": 0.0013, "step": 117650 }, { "epoch": 1.9252229403583407, "grad_norm": 0.1839888095855713, "learning_rate": 3.426063074271857e-06, "loss": 0.001, "step": 117660 }, { "epoch": 1.9253865663094167, "grad_norm": 0.025584794580936432, "learning_rate": 3.425159557251824e-06, "loss": 0.0016, "step": 117670 }, { "epoch": 1.9255501922604925, "grad_norm": 0.20758064091205597, "learning_rate": 3.424256097315976e-06, "loss": 0.0022, "step": 117680 }, { "epoch": 1.9257138182115683, "grad_norm": 0.022229362279176712, "learning_rate": 3.423352694497055e-06, "loss": 0.001, "step": 117690 }, { "epoch": 1.9258774441626443, "grad_norm": 0.05995320901274681, "learning_rate": 3.4224493488278086e-06, "loss": 0.0011, "step": 117700 }, { "epoch": 1.92604107011372, "grad_norm": 0.006995433010160923, "learning_rate": 3.4215460603409824e-06, "loss": 0.0016, "step": 117710 }, { "epoch": 1.9262046960647958, "grad_norm": 0.047567665576934814, "learning_rate": 3.4206428290693184e-06, "loss": 0.004, "step": 117720 }, { "epoch": 1.9263683220158718, "grad_norm": 0.024939458817243576, "learning_rate": 3.4197396550455543e-06, "loss": 0.0015, "step": 117730 }, { "epoch": 1.9265319479669476, "grad_norm": 0.047782547771930695, "learning_rate": 3.41883653830243e-06, "loss": 0.0011, "step": 117740 }, { "epoch": 1.9266955739180234, "grad_norm": 0.13636085391044617, "learning_rate": 3.4179334788726816e-06, "loss": 0.0029, "step": 117750 }, { "epoch": 1.9268591998690994, "grad_norm": 0.10191311687231064, "learning_rate": 3.4170304767890426e-06, "loss": 0.0015, "step": 117760 }, { "epoch": 1.927022825820175, "grad_norm": 0.1681540161371231, "learning_rate": 3.4161275320842435e-06, "loss": 0.0013, "step": 117770 }, { "epoch": 1.927186451771251, "grad_norm": 0.07280343770980835, "learning_rate": 3.4152246447910174e-06, "loss": 0.0016, "step": 117780 }, { "epoch": 1.927350077722327, "grad_norm": 0.05919857323169708, "learning_rate": 3.4143218149420855e-06, "loss": 0.0021, "step": 117790 }, { "epoch": 1.9275137036734025, "grad_norm": 0.06087833642959595, "learning_rate": 3.4134190425701773e-06, "loss": 0.0017, "step": 117800 }, { "epoch": 1.9276773296244785, "grad_norm": 0.09941631555557251, "learning_rate": 3.4125163277080166e-06, "loss": 0.0015, "step": 117810 }, { "epoch": 1.9278409555755542, "grad_norm": 0.19936014711856842, "learning_rate": 3.411613670388323e-06, "loss": 0.0012, "step": 117820 }, { "epoch": 1.92800458152663, "grad_norm": 0.04386596381664276, "learning_rate": 3.410711070643816e-06, "loss": 0.0008, "step": 117830 }, { "epoch": 1.928168207477706, "grad_norm": 0.052457183599472046, "learning_rate": 3.4098085285072135e-06, "loss": 0.0017, "step": 117840 }, { "epoch": 1.9283318334287818, "grad_norm": 0.15446393191814423, "learning_rate": 3.40890604401123e-06, "loss": 0.0016, "step": 117850 }, { "epoch": 1.9284954593798576, "grad_norm": 0.05146511644124985, "learning_rate": 3.4080036171885776e-06, "loss": 0.0016, "step": 117860 }, { "epoch": 1.9286590853309336, "grad_norm": 0.03194218873977661, "learning_rate": 3.407101248071969e-06, "loss": 0.0006, "step": 117870 }, { "epoch": 1.9288227112820093, "grad_norm": 0.0396483913064003, "learning_rate": 3.406198936694112e-06, "loss": 0.0015, "step": 117880 }, { "epoch": 1.928986337233085, "grad_norm": 0.08825863897800446, "learning_rate": 3.4052966830877117e-06, "loss": 0.0016, "step": 117890 }, { "epoch": 1.929149963184161, "grad_norm": 0.2015662044286728, "learning_rate": 3.404394487285474e-06, "loss": 0.0022, "step": 117900 }, { "epoch": 1.9293135891352369, "grad_norm": 0.005432970356196165, "learning_rate": 3.403492349320101e-06, "loss": 0.0004, "step": 117910 }, { "epoch": 1.9294772150863126, "grad_norm": 0.05115284398198128, "learning_rate": 3.402590269224294e-06, "loss": 0.0012, "step": 117920 }, { "epoch": 1.9296408410373886, "grad_norm": 0.09749193489551544, "learning_rate": 3.40168824703075e-06, "loss": 0.001, "step": 117930 }, { "epoch": 1.9298044669884644, "grad_norm": 0.01844898983836174, "learning_rate": 3.400786282772165e-06, "loss": 0.001, "step": 117940 }, { "epoch": 1.9299680929395402, "grad_norm": 0.1313873678445816, "learning_rate": 3.3998843764812332e-06, "loss": 0.0021, "step": 117950 }, { "epoch": 1.9301317188906162, "grad_norm": 0.11527419090270996, "learning_rate": 3.3989825281906486e-06, "loss": 0.0021, "step": 117960 }, { "epoch": 1.9302953448416917, "grad_norm": 0.17308522760868073, "learning_rate": 3.3980807379330983e-06, "loss": 0.0014, "step": 117970 }, { "epoch": 1.9304589707927677, "grad_norm": 0.35439544916152954, "learning_rate": 3.3971790057412735e-06, "loss": 0.0015, "step": 117980 }, { "epoch": 1.9306225967438437, "grad_norm": 0.11949679255485535, "learning_rate": 3.3962773316478557e-06, "loss": 0.0011, "step": 117990 }, { "epoch": 1.9307862226949193, "grad_norm": 0.03896412253379822, "learning_rate": 3.3953757156855307e-06, "loss": 0.0012, "step": 118000 }, { "epoch": 1.9309498486459953, "grad_norm": 0.08541835099458694, "learning_rate": 3.3944741578869778e-06, "loss": 0.0016, "step": 118010 }, { "epoch": 1.931113474597071, "grad_norm": 0.14984484016895294, "learning_rate": 3.393572658284878e-06, "loss": 0.0014, "step": 118020 }, { "epoch": 1.9312771005481468, "grad_norm": 0.11960729211568832, "learning_rate": 3.3926712169119097e-06, "loss": 0.0016, "step": 118030 }, { "epoch": 1.9314407264992228, "grad_norm": 0.035255882889032364, "learning_rate": 3.3917698338007464e-06, "loss": 0.0013, "step": 118040 }, { "epoch": 1.9316043524502986, "grad_norm": 0.01587798073887825, "learning_rate": 3.390868508984061e-06, "loss": 0.001, "step": 118050 }, { "epoch": 1.9317679784013744, "grad_norm": 0.11635394394397736, "learning_rate": 3.389967242494525e-06, "loss": 0.0012, "step": 118060 }, { "epoch": 1.9319316043524504, "grad_norm": 0.13837963342666626, "learning_rate": 3.389066034364806e-06, "loss": 0.001, "step": 118070 }, { "epoch": 1.9320952303035261, "grad_norm": 0.09984228760004044, "learning_rate": 3.388164884627574e-06, "loss": 0.0016, "step": 118080 }, { "epoch": 1.932258856254602, "grad_norm": 0.05568477511405945, "learning_rate": 3.3872637933154893e-06, "loss": 0.0021, "step": 118090 }, { "epoch": 1.932422482205678, "grad_norm": 0.052926015108823776, "learning_rate": 3.386362760461216e-06, "loss": 0.0038, "step": 118100 }, { "epoch": 1.9325861081567537, "grad_norm": 0.065251424908638, "learning_rate": 3.3854617860974136e-06, "loss": 0.0027, "step": 118110 }, { "epoch": 1.9327497341078295, "grad_norm": 0.019251657649874687, "learning_rate": 3.3845608702567424e-06, "loss": 0.0008, "step": 118120 }, { "epoch": 1.9329133600589055, "grad_norm": 0.03404831886291504, "learning_rate": 3.383660012971856e-06, "loss": 0.0009, "step": 118130 }, { "epoch": 1.9330769860099812, "grad_norm": 0.08136383444070816, "learning_rate": 3.3827592142754095e-06, "loss": 0.0015, "step": 118140 }, { "epoch": 1.933240611961057, "grad_norm": 0.15428341925144196, "learning_rate": 3.381858474200055e-06, "loss": 0.0021, "step": 118150 }, { "epoch": 1.933404237912133, "grad_norm": 0.0905461311340332, "learning_rate": 3.3809577927784407e-06, "loss": 0.0014, "step": 118160 }, { "epoch": 1.9335678638632086, "grad_norm": 0.1536383479833603, "learning_rate": 3.380057170043215e-06, "loss": 0.0018, "step": 118170 }, { "epoch": 1.9337314898142846, "grad_norm": 0.06010723114013672, "learning_rate": 3.3791566060270264e-06, "loss": 0.0016, "step": 118180 }, { "epoch": 1.9338951157653606, "grad_norm": 0.05428522452712059, "learning_rate": 3.3782561007625138e-06, "loss": 0.0014, "step": 118190 }, { "epoch": 1.934058741716436, "grad_norm": 0.013938963413238525, "learning_rate": 3.377355654282319e-06, "loss": 0.0007, "step": 118200 }, { "epoch": 1.934222367667512, "grad_norm": 0.07637760043144226, "learning_rate": 3.376455266619082e-06, "loss": 0.0011, "step": 118210 }, { "epoch": 1.9343859936185879, "grad_norm": 0.09367643296718597, "learning_rate": 3.3755549378054396e-06, "loss": 0.0014, "step": 118220 }, { "epoch": 1.9345496195696636, "grad_norm": 0.08403017371892929, "learning_rate": 3.374654667874025e-06, "loss": 0.0009, "step": 118230 }, { "epoch": 1.9347132455207396, "grad_norm": 0.06915469467639923, "learning_rate": 3.3737544568574736e-06, "loss": 0.0005, "step": 118240 }, { "epoch": 1.9348768714718154, "grad_norm": 0.2075105905532837, "learning_rate": 3.3728543047884142e-06, "loss": 0.0014, "step": 118250 }, { "epoch": 1.9350404974228912, "grad_norm": 0.05022398754954338, "learning_rate": 3.3719542116994765e-06, "loss": 0.0018, "step": 118260 }, { "epoch": 1.9352041233739672, "grad_norm": 0.04598352313041687, "learning_rate": 3.371054177623285e-06, "loss": 0.0018, "step": 118270 }, { "epoch": 1.935367749325043, "grad_norm": 0.05838308855891228, "learning_rate": 3.3701542025924657e-06, "loss": 0.0011, "step": 118280 }, { "epoch": 1.9355313752761187, "grad_norm": 0.07422434538602829, "learning_rate": 3.3692542866396383e-06, "loss": 0.0032, "step": 118290 }, { "epoch": 1.9356950012271947, "grad_norm": 0.10354265570640564, "learning_rate": 3.368354429797423e-06, "loss": 0.002, "step": 118300 }, { "epoch": 1.9358586271782705, "grad_norm": 0.006993905175477266, "learning_rate": 3.3674546320984385e-06, "loss": 0.0012, "step": 118310 }, { "epoch": 1.9360222531293463, "grad_norm": 0.048503126949071884, "learning_rate": 3.3665548935753e-06, "loss": 0.0018, "step": 118320 }, { "epoch": 1.9361858790804223, "grad_norm": 0.05226905271410942, "learning_rate": 3.365655214260621e-06, "loss": 0.0019, "step": 118330 }, { "epoch": 1.9363495050314978, "grad_norm": 0.023771366104483604, "learning_rate": 3.3647555941870118e-06, "loss": 0.0031, "step": 118340 }, { "epoch": 1.9365131309825738, "grad_norm": 0.05646058917045593, "learning_rate": 3.3638560333870824e-06, "loss": 0.0015, "step": 118350 }, { "epoch": 1.9366767569336498, "grad_norm": 0.020113365724682808, "learning_rate": 3.36295653189344e-06, "loss": 0.0017, "step": 118360 }, { "epoch": 1.9368403828847254, "grad_norm": 0.018712610006332397, "learning_rate": 3.3620570897386877e-06, "loss": 0.0016, "step": 118370 }, { "epoch": 1.9370040088358014, "grad_norm": 0.041349492967128754, "learning_rate": 3.361157706955431e-06, "loss": 0.0011, "step": 118380 }, { "epoch": 1.9371676347868771, "grad_norm": 0.03684849664568901, "learning_rate": 3.360258383576267e-06, "loss": 0.0012, "step": 118390 }, { "epoch": 1.937331260737953, "grad_norm": 0.019560372456908226, "learning_rate": 3.3593591196337962e-06, "loss": 0.0008, "step": 118400 }, { "epoch": 1.937494886689029, "grad_norm": 0.13348115980625153, "learning_rate": 3.3584599151606136e-06, "loss": 0.0015, "step": 118410 }, { "epoch": 1.9376585126401047, "grad_norm": 0.07879369705915451, "learning_rate": 3.3575607701893144e-06, "loss": 0.0012, "step": 118420 }, { "epoch": 1.9378221385911805, "grad_norm": 0.11280323565006256, "learning_rate": 3.356661684752489e-06, "loss": 0.0023, "step": 118430 }, { "epoch": 1.9379857645422565, "grad_norm": 0.04920584708452225, "learning_rate": 3.355762658882728e-06, "loss": 0.0021, "step": 118440 }, { "epoch": 1.9381493904933322, "grad_norm": 0.08949918299913406, "learning_rate": 3.3548636926126177e-06, "loss": 0.0027, "step": 118450 }, { "epoch": 1.938313016444408, "grad_norm": 0.10668892413377762, "learning_rate": 3.353964785974746e-06, "loss": 0.0015, "step": 118460 }, { "epoch": 1.938476642395484, "grad_norm": 0.05627458170056343, "learning_rate": 3.3530659390016944e-06, "loss": 0.0016, "step": 118470 }, { "epoch": 1.9386402683465598, "grad_norm": 0.26361411809921265, "learning_rate": 3.352167151726047e-06, "loss": 0.0016, "step": 118480 }, { "epoch": 1.9388038942976356, "grad_norm": 0.1131122037768364, "learning_rate": 3.351268424180377e-06, "loss": 0.0012, "step": 118490 }, { "epoch": 1.9389675202487116, "grad_norm": 0.02040417492389679, "learning_rate": 3.3503697563972632e-06, "loss": 0.0015, "step": 118500 }, { "epoch": 1.9391311461997873, "grad_norm": 0.28824949264526367, "learning_rate": 3.349471148409282e-06, "loss": 0.0016, "step": 118510 }, { "epoch": 1.939294772150863, "grad_norm": 0.01501093152910471, "learning_rate": 3.348572600249005e-06, "loss": 0.0014, "step": 118520 }, { "epoch": 1.939458398101939, "grad_norm": 0.01099783182144165, "learning_rate": 3.3476741119490013e-06, "loss": 0.0011, "step": 118530 }, { "epoch": 1.9396220240530146, "grad_norm": 0.07206486165523529, "learning_rate": 3.34677568354184e-06, "loss": 0.0009, "step": 118540 }, { "epoch": 1.9397856500040906, "grad_norm": 0.09977972507476807, "learning_rate": 3.3458773150600877e-06, "loss": 0.0013, "step": 118550 }, { "epoch": 1.9399492759551666, "grad_norm": 0.2926282584667206, "learning_rate": 3.3449790065363066e-06, "loss": 0.0022, "step": 118560 }, { "epoch": 1.9401129019062422, "grad_norm": 0.08862805366516113, "learning_rate": 3.3440807580030584e-06, "loss": 0.0014, "step": 118570 }, { "epoch": 1.9402765278573182, "grad_norm": 0.18451426923274994, "learning_rate": 3.3431825694929044e-06, "loss": 0.0012, "step": 118580 }, { "epoch": 1.940440153808394, "grad_norm": 0.07498830556869507, "learning_rate": 3.3422844410383985e-06, "loss": 0.001, "step": 118590 }, { "epoch": 1.9406037797594697, "grad_norm": 0.061219144612550735, "learning_rate": 3.3413863726720978e-06, "loss": 0.0012, "step": 118600 }, { "epoch": 1.9407674057105457, "grad_norm": 0.04234624654054642, "learning_rate": 3.340488364426555e-06, "loss": 0.0029, "step": 118610 }, { "epoch": 1.9409310316616215, "grad_norm": 0.06528057157993317, "learning_rate": 3.3395904163343202e-06, "loss": 0.0011, "step": 118620 }, { "epoch": 1.9410946576126973, "grad_norm": 0.13006280362606049, "learning_rate": 3.338692528427942e-06, "loss": 0.0021, "step": 118630 }, { "epoch": 1.9412582835637733, "grad_norm": 0.005967382341623306, "learning_rate": 3.337794700739967e-06, "loss": 0.0021, "step": 118640 }, { "epoch": 1.941421909514849, "grad_norm": 0.02839636616408825, "learning_rate": 3.3368969333029388e-06, "loss": 0.0013, "step": 118650 }, { "epoch": 1.9415855354659248, "grad_norm": 0.09886398166418076, "learning_rate": 3.335999226149399e-06, "loss": 0.0018, "step": 118660 }, { "epoch": 1.9417491614170008, "grad_norm": 0.06083366647362709, "learning_rate": 3.3351015793118885e-06, "loss": 0.0021, "step": 118670 }, { "epoch": 1.9419127873680766, "grad_norm": 0.07727126777172089, "learning_rate": 3.3342039928229466e-06, "loss": 0.0015, "step": 118680 }, { "epoch": 1.9420764133191524, "grad_norm": 0.09327799081802368, "learning_rate": 3.3333064667151045e-06, "loss": 0.0009, "step": 118690 }, { "epoch": 1.9422400392702284, "grad_norm": 0.06905252486467361, "learning_rate": 3.3324090010208963e-06, "loss": 0.0022, "step": 118700 }, { "epoch": 1.9424036652213041, "grad_norm": 0.04499583691358566, "learning_rate": 3.331511595772855e-06, "loss": 0.0019, "step": 118710 }, { "epoch": 1.94256729117238, "grad_norm": 0.07709557563066483, "learning_rate": 3.3306142510035068e-06, "loss": 0.0012, "step": 118720 }, { "epoch": 1.942730917123456, "grad_norm": 0.032073136419057846, "learning_rate": 3.3297169667453805e-06, "loss": 0.0006, "step": 118730 }, { "epoch": 1.9428945430745315, "grad_norm": 0.05796315521001816, "learning_rate": 3.3288197430309997e-06, "loss": 0.0009, "step": 118740 }, { "epoch": 1.9430581690256075, "grad_norm": 0.0746682807803154, "learning_rate": 3.3279225798928873e-06, "loss": 0.001, "step": 118750 }, { "epoch": 1.9432217949766835, "grad_norm": 0.20048604905605316, "learning_rate": 3.327025477363562e-06, "loss": 0.0009, "step": 118760 }, { "epoch": 1.943385420927759, "grad_norm": 0.040062688291072845, "learning_rate": 3.3261284354755422e-06, "loss": 0.0013, "step": 118770 }, { "epoch": 1.943549046878835, "grad_norm": 0.0030569257214665413, "learning_rate": 3.325231454261345e-06, "loss": 0.0011, "step": 118780 }, { "epoch": 1.9437126728299108, "grad_norm": 0.020612314343452454, "learning_rate": 3.3243345337534814e-06, "loss": 0.0012, "step": 118790 }, { "epoch": 1.9438762987809866, "grad_norm": 0.09768067300319672, "learning_rate": 3.323437673984463e-06, "loss": 0.0011, "step": 118800 }, { "epoch": 1.9440399247320626, "grad_norm": 0.04377545416355133, "learning_rate": 3.3225408749868e-06, "loss": 0.0013, "step": 118810 }, { "epoch": 1.9442035506831383, "grad_norm": 0.07871692627668381, "learning_rate": 3.3216441367929987e-06, "loss": 0.0013, "step": 118820 }, { "epoch": 1.944367176634214, "grad_norm": 0.04945913329720497, "learning_rate": 3.3207474594355627e-06, "loss": 0.0013, "step": 118830 }, { "epoch": 1.94453080258529, "grad_norm": 0.3979766368865967, "learning_rate": 3.3198508429469968e-06, "loss": 0.0017, "step": 118840 }, { "epoch": 1.9446944285363659, "grad_norm": 0.10912535339593887, "learning_rate": 3.3189542873597978e-06, "loss": 0.0009, "step": 118850 }, { "epoch": 1.9448580544874416, "grad_norm": 0.029575081542134285, "learning_rate": 3.3180577927064672e-06, "loss": 0.0015, "step": 118860 }, { "epoch": 1.9450216804385176, "grad_norm": 0.0829453319311142, "learning_rate": 3.3171613590194986e-06, "loss": 0.0023, "step": 118870 }, { "epoch": 1.9451853063895934, "grad_norm": 0.21266023814678192, "learning_rate": 3.3162649863313874e-06, "loss": 0.0028, "step": 118880 }, { "epoch": 1.9453489323406692, "grad_norm": 0.17821168899536133, "learning_rate": 3.315368674674622e-06, "loss": 0.0013, "step": 118890 }, { "epoch": 1.9455125582917452, "grad_norm": 0.11173222213983536, "learning_rate": 3.314472424081694e-06, "loss": 0.0013, "step": 118900 }, { "epoch": 1.945676184242821, "grad_norm": 0.05157876014709473, "learning_rate": 3.3135762345850896e-06, "loss": 0.0012, "step": 118910 }, { "epoch": 1.9458398101938967, "grad_norm": 0.1706482619047165, "learning_rate": 3.3126801062172933e-06, "loss": 0.0018, "step": 118920 }, { "epoch": 1.9460034361449727, "grad_norm": 0.05622512474656105, "learning_rate": 3.311784039010787e-06, "loss": 0.0013, "step": 118930 }, { "epoch": 1.9461670620960483, "grad_norm": 0.3344621956348419, "learning_rate": 3.310888032998052e-06, "loss": 0.0017, "step": 118940 }, { "epoch": 1.9463306880471243, "grad_norm": 0.04745124280452728, "learning_rate": 3.309992088211566e-06, "loss": 0.0011, "step": 118950 }, { "epoch": 1.9464943139982003, "grad_norm": 0.07338631898164749, "learning_rate": 3.309096204683806e-06, "loss": 0.0011, "step": 118960 }, { "epoch": 1.9466579399492758, "grad_norm": 0.170085608959198, "learning_rate": 3.308200382447244e-06, "loss": 0.0015, "step": 118970 }, { "epoch": 1.9468215659003518, "grad_norm": 0.27082160115242004, "learning_rate": 3.307304621534354e-06, "loss": 0.0012, "step": 118980 }, { "epoch": 1.9469851918514276, "grad_norm": 0.08283862471580505, "learning_rate": 3.3064089219776013e-06, "loss": 0.0013, "step": 118990 }, { "epoch": 1.9471488178025034, "grad_norm": 0.06743039190769196, "learning_rate": 3.305513283809454e-06, "loss": 0.0018, "step": 119000 }, { "epoch": 1.9473124437535794, "grad_norm": 0.20719702541828156, "learning_rate": 3.304617707062378e-06, "loss": 0.0042, "step": 119010 }, { "epoch": 1.9474760697046551, "grad_norm": 0.07183507829904556, "learning_rate": 3.3037221917688357e-06, "loss": 0.0009, "step": 119020 }, { "epoch": 1.947639695655731, "grad_norm": 0.013016562908887863, "learning_rate": 3.3028267379612865e-06, "loss": 0.0006, "step": 119030 }, { "epoch": 1.947803321606807, "grad_norm": 0.058981407433748245, "learning_rate": 3.3019313456721903e-06, "loss": 0.0022, "step": 119040 }, { "epoch": 1.9479669475578827, "grad_norm": 0.03922748193144798, "learning_rate": 3.3010360149340006e-06, "loss": 0.0016, "step": 119050 }, { "epoch": 1.9481305735089585, "grad_norm": 0.03358004614710808, "learning_rate": 3.3001407457791723e-06, "loss": 0.0014, "step": 119060 }, { "epoch": 1.9482941994600345, "grad_norm": 0.09242702275514603, "learning_rate": 3.2992455382401566e-06, "loss": 0.0013, "step": 119070 }, { "epoch": 1.9484578254111102, "grad_norm": 0.049873560667037964, "learning_rate": 3.2983503923494033e-06, "loss": 0.0016, "step": 119080 }, { "epoch": 1.948621451362186, "grad_norm": 0.00814768485724926, "learning_rate": 3.2974553081393575e-06, "loss": 0.0011, "step": 119090 }, { "epoch": 1.948785077313262, "grad_norm": 0.12519040703773499, "learning_rate": 3.2965602856424645e-06, "loss": 0.0023, "step": 119100 }, { "epoch": 1.9489487032643378, "grad_norm": 0.016813622787594795, "learning_rate": 3.2956653248911672e-06, "loss": 0.0024, "step": 119110 }, { "epoch": 1.9491123292154136, "grad_norm": 0.058385998010635376, "learning_rate": 3.294770425917906e-06, "loss": 0.0013, "step": 119120 }, { "epoch": 1.9492759551664895, "grad_norm": 0.04116176441311836, "learning_rate": 3.293875588755118e-06, "loss": 0.0014, "step": 119130 }, { "epoch": 1.949439581117565, "grad_norm": 0.12463755160570145, "learning_rate": 3.2929808134352392e-06, "loss": 0.0019, "step": 119140 }, { "epoch": 1.949603207068641, "grad_norm": 0.04978432506322861, "learning_rate": 3.292086099990704e-06, "loss": 0.0007, "step": 119150 }, { "epoch": 1.9497668330197169, "grad_norm": 0.047517552971839905, "learning_rate": 3.291191448453941e-06, "loss": 0.0022, "step": 119160 }, { "epoch": 1.9499304589707926, "grad_norm": 0.1562863290309906, "learning_rate": 3.2902968588573823e-06, "loss": 0.0013, "step": 119170 }, { "epoch": 1.9500940849218686, "grad_norm": 0.037684086710214615, "learning_rate": 3.289402331233453e-06, "loss": 0.0012, "step": 119180 }, { "epoch": 1.9502577108729444, "grad_norm": 0.028352729976177216, "learning_rate": 3.288507865614581e-06, "loss": 0.0019, "step": 119190 }, { "epoch": 1.9504213368240202, "grad_norm": 0.024281544610857964, "learning_rate": 3.2876134620331825e-06, "loss": 0.0027, "step": 119200 }, { "epoch": 1.9505849627750962, "grad_norm": 0.08222485333681107, "learning_rate": 3.286719120521681e-06, "loss": 0.0012, "step": 119210 }, { "epoch": 1.950748588726172, "grad_norm": 0.09528180956840515, "learning_rate": 3.2858248411124916e-06, "loss": 0.0014, "step": 119220 }, { "epoch": 1.9509122146772477, "grad_norm": 0.09575015306472778, "learning_rate": 3.2849306238380335e-06, "loss": 0.003, "step": 119230 }, { "epoch": 1.9510758406283237, "grad_norm": 0.05369120091199875, "learning_rate": 3.284036468730718e-06, "loss": 0.0025, "step": 119240 }, { "epoch": 1.9512394665793995, "grad_norm": 0.05860856920480728, "learning_rate": 3.2831423758229565e-06, "loss": 0.0015, "step": 119250 }, { "epoch": 1.9514030925304753, "grad_norm": 0.06878620386123657, "learning_rate": 3.282248345147157e-06, "loss": 0.0017, "step": 119260 }, { "epoch": 1.9515667184815513, "grad_norm": 0.020936544984579086, "learning_rate": 3.281354376735727e-06, "loss": 0.0016, "step": 119270 }, { "epoch": 1.951730344432627, "grad_norm": 0.16774356365203857, "learning_rate": 3.2804604706210697e-06, "loss": 0.0013, "step": 119280 }, { "epoch": 1.9518939703837028, "grad_norm": 0.004553603008389473, "learning_rate": 3.2795666268355893e-06, "loss": 0.0008, "step": 119290 }, { "epoch": 1.9520575963347788, "grad_norm": 0.10118407011032104, "learning_rate": 3.2786728454116823e-06, "loss": 0.0015, "step": 119300 }, { "epoch": 1.9522212222858544, "grad_norm": 0.08749271929264069, "learning_rate": 3.2777791263817472e-06, "loss": 0.001, "step": 119310 }, { "epoch": 1.9523848482369304, "grad_norm": 0.01847228966653347, "learning_rate": 3.2768854697781806e-06, "loss": 0.0005, "step": 119320 }, { "epoch": 1.9525484741880064, "grad_norm": 0.0919080600142479, "learning_rate": 3.275991875633373e-06, "loss": 0.0008, "step": 119330 }, { "epoch": 1.952712100139082, "grad_norm": 0.07413464039564133, "learning_rate": 3.2750983439797167e-06, "loss": 0.0039, "step": 119340 }, { "epoch": 1.952875726090158, "grad_norm": 0.11682228744029999, "learning_rate": 3.2742048748496003e-06, "loss": 0.002, "step": 119350 }, { "epoch": 1.9530393520412337, "grad_norm": 0.07065902650356293, "learning_rate": 3.273311468275409e-06, "loss": 0.0009, "step": 119360 }, { "epoch": 1.9532029779923095, "grad_norm": 0.1162300705909729, "learning_rate": 3.272418124289527e-06, "loss": 0.0011, "step": 119370 }, { "epoch": 1.9533666039433855, "grad_norm": 0.19947503507137299, "learning_rate": 3.2715248429243353e-06, "loss": 0.0086, "step": 119380 }, { "epoch": 1.9535302298944612, "grad_norm": 0.18376384675502777, "learning_rate": 3.270631624212216e-06, "loss": 0.0024, "step": 119390 }, { "epoch": 1.953693855845537, "grad_norm": 0.03526970371603966, "learning_rate": 3.2697384681855426e-06, "loss": 0.0011, "step": 119400 }, { "epoch": 1.953857481796613, "grad_norm": 0.032023217529058456, "learning_rate": 3.2688453748766905e-06, "loss": 0.0014, "step": 119410 }, { "epoch": 1.9540211077476888, "grad_norm": 0.01755298301577568, "learning_rate": 3.267952344318033e-06, "loss": 0.001, "step": 119420 }, { "epoch": 1.9541847336987646, "grad_norm": 0.07165593653917313, "learning_rate": 3.2670593765419406e-06, "loss": 0.0018, "step": 119430 }, { "epoch": 1.9543483596498405, "grad_norm": 0.17985360324382782, "learning_rate": 3.2661664715807794e-06, "loss": 0.0012, "step": 119440 }, { "epoch": 1.9545119856009163, "grad_norm": 0.12182191014289856, "learning_rate": 3.2652736294669174e-06, "loss": 0.0011, "step": 119450 }, { "epoch": 1.954675611551992, "grad_norm": 0.04350307211279869, "learning_rate": 3.2643808502327175e-06, "loss": 0.0011, "step": 119460 }, { "epoch": 1.954839237503068, "grad_norm": 0.060890890657901764, "learning_rate": 3.2634881339105395e-06, "loss": 0.0025, "step": 119470 }, { "epoch": 1.9550028634541439, "grad_norm": 0.035087425261735916, "learning_rate": 3.2625954805327435e-06, "loss": 0.0011, "step": 119480 }, { "epoch": 1.9551664894052196, "grad_norm": 0.07437703758478165, "learning_rate": 3.261702890131687e-06, "loss": 0.0008, "step": 119490 }, { "epoch": 1.9553301153562956, "grad_norm": 0.04648261144757271, "learning_rate": 3.2608103627397214e-06, "loss": 0.002, "step": 119500 }, { "epoch": 1.9554937413073712, "grad_norm": 0.10275488346815109, "learning_rate": 3.2599178983892e-06, "loss": 0.0015, "step": 119510 }, { "epoch": 1.9556573672584472, "grad_norm": 0.0663665384054184, "learning_rate": 3.259025497112473e-06, "loss": 0.0027, "step": 119520 }, { "epoch": 1.9558209932095232, "grad_norm": 0.050408367067575455, "learning_rate": 3.2581331589418875e-06, "loss": 0.0017, "step": 119530 }, { "epoch": 1.9559846191605987, "grad_norm": 0.08994525671005249, "learning_rate": 3.257240883909788e-06, "loss": 0.0014, "step": 119540 }, { "epoch": 1.9561482451116747, "grad_norm": 0.07794841378927231, "learning_rate": 3.256348672048518e-06, "loss": 0.0016, "step": 119550 }, { "epoch": 1.9563118710627505, "grad_norm": 0.22953827679157257, "learning_rate": 3.2554565233904178e-06, "loss": 0.0017, "step": 119560 }, { "epoch": 1.9564754970138263, "grad_norm": 0.04843238741159439, "learning_rate": 3.254564437967826e-06, "loss": 0.0018, "step": 119570 }, { "epoch": 1.9566391229649023, "grad_norm": 0.09702663123607635, "learning_rate": 3.2536724158130782e-06, "loss": 0.0008, "step": 119580 }, { "epoch": 1.956802748915978, "grad_norm": 0.04243415221571922, "learning_rate": 3.252780456958509e-06, "loss": 0.0009, "step": 119590 }, { "epoch": 1.9569663748670538, "grad_norm": 0.3596123456954956, "learning_rate": 3.251888561436448e-06, "loss": 0.0015, "step": 119600 }, { "epoch": 1.9571300008181298, "grad_norm": 0.12265444546937943, "learning_rate": 3.2509967292792254e-06, "loss": 0.0029, "step": 119610 }, { "epoch": 1.9572936267692056, "grad_norm": 0.08800535649061203, "learning_rate": 3.250104960519167e-06, "loss": 0.0015, "step": 119620 }, { "epoch": 1.9574572527202814, "grad_norm": 0.027957957237958908, "learning_rate": 3.249213255188599e-06, "loss": 0.0009, "step": 119630 }, { "epoch": 1.9576208786713574, "grad_norm": 0.12052654474973679, "learning_rate": 3.248321613319842e-06, "loss": 0.0008, "step": 119640 }, { "epoch": 1.9577845046224331, "grad_norm": 0.06927067786455154, "learning_rate": 3.247430034945216e-06, "loss": 0.0022, "step": 119650 }, { "epoch": 1.957948130573509, "grad_norm": 0.01786019094288349, "learning_rate": 3.2465385200970388e-06, "loss": 0.0014, "step": 119660 }, { "epoch": 1.958111756524585, "grad_norm": 0.018616029992699623, "learning_rate": 3.245647068807627e-06, "loss": 0.0036, "step": 119670 }, { "epoch": 1.9582753824756607, "grad_norm": 0.061355095356702805, "learning_rate": 3.244755681109293e-06, "loss": 0.0015, "step": 119680 }, { "epoch": 1.9584390084267365, "grad_norm": 0.07513242959976196, "learning_rate": 3.2438643570343474e-06, "loss": 0.0004, "step": 119690 }, { "epoch": 1.9586026343778125, "grad_norm": 0.07908494770526886, "learning_rate": 3.2429730966150975e-06, "loss": 0.0014, "step": 119700 }, { "epoch": 1.958766260328888, "grad_norm": 0.08945582807064056, "learning_rate": 3.242081899883848e-06, "loss": 0.0015, "step": 119710 }, { "epoch": 1.958929886279964, "grad_norm": 0.12176748365163803, "learning_rate": 3.2411907668729057e-06, "loss": 0.001, "step": 119720 }, { "epoch": 1.95909351223104, "grad_norm": 0.005745396483689547, "learning_rate": 3.2402996976145713e-06, "loss": 0.0012, "step": 119730 }, { "epoch": 1.9592571381821156, "grad_norm": 0.07633748650550842, "learning_rate": 3.2394086921411433e-06, "loss": 0.001, "step": 119740 }, { "epoch": 1.9594207641331916, "grad_norm": 0.0820692852139473, "learning_rate": 3.2385177504849185e-06, "loss": 0.0012, "step": 119750 }, { "epoch": 1.9595843900842673, "grad_norm": 0.20230506360530853, "learning_rate": 3.2376268726781924e-06, "loss": 0.0012, "step": 119760 }, { "epoch": 1.959748016035343, "grad_norm": 0.07433783262968063, "learning_rate": 3.236736058753256e-06, "loss": 0.0009, "step": 119770 }, { "epoch": 1.959911641986419, "grad_norm": 0.044084712862968445, "learning_rate": 3.235845308742399e-06, "loss": 0.0014, "step": 119780 }, { "epoch": 1.9600752679374949, "grad_norm": 0.04565664008259773, "learning_rate": 3.2349546226779117e-06, "loss": 0.0012, "step": 119790 }, { "epoch": 1.9602388938885706, "grad_norm": 0.03038705885410309, "learning_rate": 3.2340640005920755e-06, "loss": 0.0015, "step": 119800 }, { "epoch": 1.9604025198396466, "grad_norm": 0.05366574227809906, "learning_rate": 3.2331734425171744e-06, "loss": 0.002, "step": 119810 }, { "epoch": 1.9605661457907224, "grad_norm": 0.0558791346848011, "learning_rate": 3.23228294848549e-06, "loss": 0.0008, "step": 119820 }, { "epoch": 1.9607297717417982, "grad_norm": 0.03427383676171303, "learning_rate": 3.2313925185293006e-06, "loss": 0.0012, "step": 119830 }, { "epoch": 1.9608933976928742, "grad_norm": 0.2161935418844223, "learning_rate": 3.230502152680881e-06, "loss": 0.0019, "step": 119840 }, { "epoch": 1.96105702364395, "grad_norm": 0.1068362295627594, "learning_rate": 3.229611850972506e-06, "loss": 0.0008, "step": 119850 }, { "epoch": 1.9612206495950257, "grad_norm": 0.09048112481832504, "learning_rate": 3.2287216134364465e-06, "loss": 0.0012, "step": 119860 }, { "epoch": 1.9613842755461017, "grad_norm": 0.006110770627856255, "learning_rate": 3.2278314401049694e-06, "loss": 0.0011, "step": 119870 }, { "epoch": 1.9615479014971775, "grad_norm": 0.01368734147399664, "learning_rate": 3.226941331010345e-06, "loss": 0.0011, "step": 119880 }, { "epoch": 1.9617115274482533, "grad_norm": 0.12932072579860687, "learning_rate": 3.2260512861848383e-06, "loss": 0.001, "step": 119890 }, { "epoch": 1.9618751533993293, "grad_norm": 0.024174556136131287, "learning_rate": 3.2251613056607062e-06, "loss": 0.0016, "step": 119900 }, { "epoch": 1.9620387793504048, "grad_norm": 0.0598149299621582, "learning_rate": 3.2242713894702118e-06, "loss": 0.0014, "step": 119910 }, { "epoch": 1.9622024053014808, "grad_norm": 0.025891564786434174, "learning_rate": 3.223381537645611e-06, "loss": 0.0006, "step": 119920 }, { "epoch": 1.9623660312525568, "grad_norm": 0.09268096834421158, "learning_rate": 3.2224917502191583e-06, "loss": 0.0007, "step": 119930 }, { "epoch": 1.9625296572036324, "grad_norm": 0.037256430834531784, "learning_rate": 3.2216020272231085e-06, "loss": 0.0014, "step": 119940 }, { "epoch": 1.9626932831547084, "grad_norm": 0.09555631130933762, "learning_rate": 3.2207123686897117e-06, "loss": 0.0019, "step": 119950 }, { "epoch": 1.9628569091057841, "grad_norm": 0.06838426738977432, "learning_rate": 3.2198227746512145e-06, "loss": 0.0016, "step": 119960 }, { "epoch": 1.96302053505686, "grad_norm": 0.05205368250608444, "learning_rate": 3.2189332451398638e-06, "loss": 0.0012, "step": 119970 }, { "epoch": 1.963184161007936, "grad_norm": 0.04782318323850632, "learning_rate": 3.2180437801879015e-06, "loss": 0.0011, "step": 119980 }, { "epoch": 1.9633477869590117, "grad_norm": 0.07910872250795364, "learning_rate": 3.217154379827571e-06, "loss": 0.0016, "step": 119990 }, { "epoch": 1.9635114129100875, "grad_norm": 0.22480346262454987, "learning_rate": 3.216265044091108e-06, "loss": 0.0014, "step": 120000 }, { "epoch": 1.9636750388611635, "grad_norm": 0.005954029969871044, "learning_rate": 3.2153757730107504e-06, "loss": 0.0008, "step": 120010 }, { "epoch": 1.9638386648122392, "grad_norm": 0.11898721754550934, "learning_rate": 3.2144865666187314e-06, "loss": 0.002, "step": 120020 }, { "epoch": 1.964002290763315, "grad_norm": 0.0021562569309026003, "learning_rate": 3.2135974249472827e-06, "loss": 0.0006, "step": 120030 }, { "epoch": 1.964165916714391, "grad_norm": 0.034869857132434845, "learning_rate": 3.2127083480286348e-06, "loss": 0.0005, "step": 120040 }, { "epoch": 1.9643295426654668, "grad_norm": 0.18674185872077942, "learning_rate": 3.211819335895013e-06, "loss": 0.0014, "step": 120050 }, { "epoch": 1.9644931686165426, "grad_norm": 0.09326320886611938, "learning_rate": 3.2109303885786427e-06, "loss": 0.0009, "step": 120060 }, { "epoch": 1.9646567945676185, "grad_norm": 0.03501841053366661, "learning_rate": 3.210041506111745e-06, "loss": 0.0009, "step": 120070 }, { "epoch": 1.964820420518694, "grad_norm": 0.07621312141418457, "learning_rate": 3.2091526885265415e-06, "loss": 0.0014, "step": 120080 }, { "epoch": 1.96498404646977, "grad_norm": 0.07037162780761719, "learning_rate": 3.2082639358552504e-06, "loss": 0.0026, "step": 120090 }, { "epoch": 1.965147672420846, "grad_norm": 0.11594855785369873, "learning_rate": 3.2073752481300835e-06, "loss": 0.001, "step": 120100 }, { "epoch": 1.9653112983719216, "grad_norm": 0.19071663916110992, "learning_rate": 3.2064866253832555e-06, "loss": 0.0013, "step": 120110 }, { "epoch": 1.9654749243229976, "grad_norm": 0.22957459092140198, "learning_rate": 3.2055980676469766e-06, "loss": 0.0019, "step": 120120 }, { "epoch": 1.9656385502740734, "grad_norm": 0.06698928773403168, "learning_rate": 3.2047095749534552e-06, "loss": 0.0022, "step": 120130 }, { "epoch": 1.9658021762251492, "grad_norm": 0.23116269707679749, "learning_rate": 3.203821147334897e-06, "loss": 0.0018, "step": 120140 }, { "epoch": 1.9659658021762252, "grad_norm": 0.060072775930166245, "learning_rate": 3.202932784823504e-06, "loss": 0.0021, "step": 120150 }, { "epoch": 1.966129428127301, "grad_norm": 0.004396048840135336, "learning_rate": 3.2020444874514793e-06, "loss": 0.0015, "step": 120160 }, { "epoch": 1.9662930540783767, "grad_norm": 0.10115991532802582, "learning_rate": 3.201156255251021e-06, "loss": 0.0014, "step": 120170 }, { "epoch": 1.9664566800294527, "grad_norm": 0.021288610994815826, "learning_rate": 3.2002680882543246e-06, "loss": 0.0008, "step": 120180 }, { "epoch": 1.9666203059805285, "grad_norm": 0.04749828577041626, "learning_rate": 3.199379986493587e-06, "loss": 0.001, "step": 120190 }, { "epoch": 1.9667839319316043, "grad_norm": 0.11034960299730301, "learning_rate": 3.1984919500009946e-06, "loss": 0.0019, "step": 120200 }, { "epoch": 1.9669475578826803, "grad_norm": 0.17236697673797607, "learning_rate": 3.197603978808739e-06, "loss": 0.0018, "step": 120210 }, { "epoch": 1.967111183833756, "grad_norm": 0.3318023383617401, "learning_rate": 3.1967160729490078e-06, "loss": 0.0014, "step": 120220 }, { "epoch": 1.9672748097848318, "grad_norm": 0.005125746130943298, "learning_rate": 3.1958282324539846e-06, "loss": 0.0011, "step": 120230 }, { "epoch": 1.9674384357359078, "grad_norm": 0.04570695012807846, "learning_rate": 3.194940457355852e-06, "loss": 0.0013, "step": 120240 }, { "epoch": 1.9676020616869836, "grad_norm": 0.13440853357315063, "learning_rate": 3.19405274768679e-06, "loss": 0.0011, "step": 120250 }, { "epoch": 1.9677656876380594, "grad_norm": 0.3376067876815796, "learning_rate": 3.1931651034789746e-06, "loss": 0.0025, "step": 120260 }, { "epoch": 1.9679293135891354, "grad_norm": 0.14046721160411835, "learning_rate": 3.192277524764582e-06, "loss": 0.0012, "step": 120270 }, { "epoch": 1.968092939540211, "grad_norm": 0.02051129750907421, "learning_rate": 3.191390011575784e-06, "loss": 0.0006, "step": 120280 }, { "epoch": 1.968256565491287, "grad_norm": 0.0698816180229187, "learning_rate": 3.1905025639447525e-06, "loss": 0.0012, "step": 120290 }, { "epoch": 1.968420191442363, "grad_norm": 0.09853628277778625, "learning_rate": 3.189615181903653e-06, "loss": 0.0007, "step": 120300 }, { "epoch": 1.9685838173934385, "grad_norm": 0.04063701629638672, "learning_rate": 3.188727865484652e-06, "loss": 0.0011, "step": 120310 }, { "epoch": 1.9687474433445145, "grad_norm": 0.04318047687411308, "learning_rate": 3.1878406147199127e-06, "loss": 0.001, "step": 120320 }, { "epoch": 1.9689110692955902, "grad_norm": 0.12426009774208069, "learning_rate": 3.1869534296415954e-06, "loss": 0.0012, "step": 120330 }, { "epoch": 1.969074695246666, "grad_norm": 0.08373285084962845, "learning_rate": 3.186066310281859e-06, "loss": 0.0006, "step": 120340 }, { "epoch": 1.969238321197742, "grad_norm": 0.0494953952729702, "learning_rate": 3.185179256672859e-06, "loss": 0.0021, "step": 120350 }, { "epoch": 1.9694019471488178, "grad_norm": 0.2756549119949341, "learning_rate": 3.1842922688467493e-06, "loss": 0.0022, "step": 120360 }, { "epoch": 1.9695655730998936, "grad_norm": 0.1246170699596405, "learning_rate": 3.1834053468356797e-06, "loss": 0.0017, "step": 120370 }, { "epoch": 1.9697291990509695, "grad_norm": 0.026286819949746132, "learning_rate": 3.182518490671802e-06, "loss": 0.0023, "step": 120380 }, { "epoch": 1.9698928250020453, "grad_norm": 0.17359715700149536, "learning_rate": 3.181631700387262e-06, "loss": 0.0015, "step": 120390 }, { "epoch": 1.970056450953121, "grad_norm": 0.06552699953317642, "learning_rate": 3.1807449760142005e-06, "loss": 0.002, "step": 120400 }, { "epoch": 1.970220076904197, "grad_norm": 0.0627862960100174, "learning_rate": 3.179858317584762e-06, "loss": 0.0015, "step": 120410 }, { "epoch": 1.9703837028552729, "grad_norm": 0.00994772370904684, "learning_rate": 3.1789717251310836e-06, "loss": 0.0014, "step": 120420 }, { "epoch": 1.9705473288063486, "grad_norm": 0.07194051146507263, "learning_rate": 3.1780851986853034e-06, "loss": 0.0011, "step": 120430 }, { "epoch": 1.9707109547574246, "grad_norm": 0.08550158143043518, "learning_rate": 3.1771987382795566e-06, "loss": 0.0015, "step": 120440 }, { "epoch": 1.9708745807085004, "grad_norm": 0.044907134026288986, "learning_rate": 3.1763123439459744e-06, "loss": 0.0023, "step": 120450 }, { "epoch": 1.9710382066595762, "grad_norm": 0.21220263838768005, "learning_rate": 3.175426015716687e-06, "loss": 0.0014, "step": 120460 }, { "epoch": 1.9712018326106522, "grad_norm": 0.11774542927742004, "learning_rate": 3.174539753623821e-06, "loss": 0.0018, "step": 120470 }, { "epoch": 1.9713654585617277, "grad_norm": 0.07862376421689987, "learning_rate": 3.1736535576995016e-06, "loss": 0.0009, "step": 120480 }, { "epoch": 1.9715290845128037, "grad_norm": 0.03766781836748123, "learning_rate": 3.1727674279758523e-06, "loss": 0.0009, "step": 120490 }, { "epoch": 1.9716927104638797, "grad_norm": 0.05893450602889061, "learning_rate": 3.171881364484991e-06, "loss": 0.0016, "step": 120500 }, { "epoch": 1.9718563364149553, "grad_norm": 0.03269325569272041, "learning_rate": 3.1709953672590367e-06, "loss": 0.0015, "step": 120510 }, { "epoch": 1.9720199623660313, "grad_norm": 0.03670454025268555, "learning_rate": 3.170109436330104e-06, "loss": 0.0014, "step": 120520 }, { "epoch": 1.972183588317107, "grad_norm": 0.04544687643647194, "learning_rate": 3.1692235717303064e-06, "loss": 0.0018, "step": 120530 }, { "epoch": 1.9723472142681828, "grad_norm": 0.11783545464277267, "learning_rate": 3.1683377734917542e-06, "loss": 0.001, "step": 120540 }, { "epoch": 1.9725108402192588, "grad_norm": 0.04126141965389252, "learning_rate": 3.1674520416465552e-06, "loss": 0.0009, "step": 120550 }, { "epoch": 1.9726744661703346, "grad_norm": 0.010780500248074532, "learning_rate": 3.166566376226816e-06, "loss": 0.0014, "step": 120560 }, { "epoch": 1.9728380921214104, "grad_norm": 0.2720125615596771, "learning_rate": 3.165680777264638e-06, "loss": 0.0021, "step": 120570 }, { "epoch": 1.9730017180724864, "grad_norm": 0.07383672893047333, "learning_rate": 3.164795244792124e-06, "loss": 0.0011, "step": 120580 }, { "epoch": 1.9731653440235621, "grad_norm": 0.1570451408624649, "learning_rate": 3.1639097788413727e-06, "loss": 0.0018, "step": 120590 }, { "epoch": 1.973328969974638, "grad_norm": 0.059065233916044235, "learning_rate": 3.1630243794444772e-06, "loss": 0.0026, "step": 120600 }, { "epoch": 1.973492595925714, "grad_norm": 0.06240195780992508, "learning_rate": 3.1621390466335334e-06, "loss": 0.0016, "step": 120610 }, { "epoch": 1.9736562218767897, "grad_norm": 0.016657231375575066, "learning_rate": 3.161253780440632e-06, "loss": 0.0005, "step": 120620 }, { "epoch": 1.9738198478278655, "grad_norm": 0.1470792591571808, "learning_rate": 3.160368580897862e-06, "loss": 0.0021, "step": 120630 }, { "epoch": 1.9739834737789415, "grad_norm": 0.14704667031764984, "learning_rate": 3.1594834480373093e-06, "loss": 0.0012, "step": 120640 }, { "epoch": 1.9741470997300172, "grad_norm": 0.19293953478336334, "learning_rate": 3.1585983818910563e-06, "loss": 0.0016, "step": 120650 }, { "epoch": 1.974310725681093, "grad_norm": 0.07996378093957901, "learning_rate": 3.1577133824911884e-06, "loss": 0.0014, "step": 120660 }, { "epoch": 1.974474351632169, "grad_norm": 0.056830473244190216, "learning_rate": 3.156828449869782e-06, "loss": 0.0011, "step": 120670 }, { "epoch": 1.9746379775832446, "grad_norm": 0.051241327077150345, "learning_rate": 3.1559435840589144e-06, "loss": 0.0024, "step": 120680 }, { "epoch": 1.9748016035343205, "grad_norm": 0.04359995946288109, "learning_rate": 3.1550587850906617e-06, "loss": 0.0022, "step": 120690 }, { "epoch": 1.9749652294853965, "grad_norm": 0.17184430360794067, "learning_rate": 3.154174052997091e-06, "loss": 0.0014, "step": 120700 }, { "epoch": 1.975128855436472, "grad_norm": 0.18006198108196259, "learning_rate": 3.1532893878102756e-06, "loss": 0.0017, "step": 120710 }, { "epoch": 1.975292481387548, "grad_norm": 0.03541097790002823, "learning_rate": 3.1524047895622812e-06, "loss": 0.003, "step": 120720 }, { "epoch": 1.9754561073386239, "grad_norm": 0.06656794250011444, "learning_rate": 3.151520258285172e-06, "loss": 0.0006, "step": 120730 }, { "epoch": 1.9756197332896996, "grad_norm": 0.03277677670121193, "learning_rate": 3.1506357940110123e-06, "loss": 0.0015, "step": 120740 }, { "epoch": 1.9757833592407756, "grad_norm": 0.04108794033527374, "learning_rate": 3.1497513967718594e-06, "loss": 0.0018, "step": 120750 }, { "epoch": 1.9759469851918514, "grad_norm": 0.06622787564992905, "learning_rate": 3.148867066599771e-06, "loss": 0.0009, "step": 120760 }, { "epoch": 1.9761106111429272, "grad_norm": 0.08014035224914551, "learning_rate": 3.147982803526803e-06, "loss": 0.0006, "step": 120770 }, { "epoch": 1.9762742370940032, "grad_norm": 0.12275131791830063, "learning_rate": 3.1470986075850067e-06, "loss": 0.0011, "step": 120780 }, { "epoch": 1.976437863045079, "grad_norm": 0.13255465030670166, "learning_rate": 3.146214478806434e-06, "loss": 0.0009, "step": 120790 }, { "epoch": 1.9766014889961547, "grad_norm": 0.047800809144973755, "learning_rate": 3.1453304172231295e-06, "loss": 0.0012, "step": 120800 }, { "epoch": 1.9767651149472307, "grad_norm": 0.06365638226270676, "learning_rate": 3.14444642286714e-06, "loss": 0.0021, "step": 120810 }, { "epoch": 1.9769287408983065, "grad_norm": 0.14298172295093536, "learning_rate": 3.143562495770508e-06, "loss": 0.0012, "step": 120820 }, { "epoch": 1.9770923668493823, "grad_norm": 0.10809532552957535, "learning_rate": 3.1426786359652738e-06, "loss": 0.0021, "step": 120830 }, { "epoch": 1.9772559928004583, "grad_norm": 0.0827135443687439, "learning_rate": 3.1417948434834756e-06, "loss": 0.0018, "step": 120840 }, { "epoch": 1.977419618751534, "grad_norm": 0.06294337660074234, "learning_rate": 3.1409111183571474e-06, "loss": 0.0014, "step": 120850 }, { "epoch": 1.9775832447026098, "grad_norm": 0.11825191229581833, "learning_rate": 3.1400274606183223e-06, "loss": 0.0029, "step": 120860 }, { "epoch": 1.9777468706536858, "grad_norm": 0.07278622686862946, "learning_rate": 3.1391438702990325e-06, "loss": 0.0019, "step": 120870 }, { "epoch": 1.9779104966047614, "grad_norm": 0.03915053978562355, "learning_rate": 3.1382603474313044e-06, "loss": 0.0011, "step": 120880 }, { "epoch": 1.9780741225558374, "grad_norm": 0.13825000822544098, "learning_rate": 3.137376892047167e-06, "loss": 0.0017, "step": 120890 }, { "epoch": 1.9782377485069134, "grad_norm": 0.15757323801517487, "learning_rate": 3.136493504178638e-06, "loss": 0.0009, "step": 120900 }, { "epoch": 1.978401374457989, "grad_norm": 0.1990974396467209, "learning_rate": 3.13561018385774e-06, "loss": 0.0015, "step": 120910 }, { "epoch": 1.978565000409065, "grad_norm": 0.04972890019416809, "learning_rate": 3.134726931116492e-06, "loss": 0.001, "step": 120920 }, { "epoch": 1.9787286263601407, "grad_norm": 0.053142745047807693, "learning_rate": 3.133843745986911e-06, "loss": 0.0018, "step": 120930 }, { "epoch": 1.9788922523112165, "grad_norm": 0.01802622526884079, "learning_rate": 3.132960628501008e-06, "loss": 0.0013, "step": 120940 }, { "epoch": 1.9790558782622925, "grad_norm": 0.07915715128183365, "learning_rate": 3.1320775786907954e-06, "loss": 0.0008, "step": 120950 }, { "epoch": 1.9792195042133682, "grad_norm": 0.06082886829972267, "learning_rate": 3.1311945965882805e-06, "loss": 0.0017, "step": 120960 }, { "epoch": 1.979383130164444, "grad_norm": 0.12227126955986023, "learning_rate": 3.130311682225471e-06, "loss": 0.0023, "step": 120970 }, { "epoch": 1.97954675611552, "grad_norm": 0.15932504832744598, "learning_rate": 3.129428835634368e-06, "loss": 0.0019, "step": 120980 }, { "epoch": 1.9797103820665958, "grad_norm": 0.049096036702394485, "learning_rate": 3.128546056846976e-06, "loss": 0.0009, "step": 120990 }, { "epoch": 1.9798740080176715, "grad_norm": 0.05467770993709564, "learning_rate": 3.1276633458952897e-06, "loss": 0.0023, "step": 121000 }, { "epoch": 1.9800376339687475, "grad_norm": 0.16764862835407257, "learning_rate": 3.1267807028113084e-06, "loss": 0.0018, "step": 121010 }, { "epoch": 1.9802012599198233, "grad_norm": 0.2800367772579193, "learning_rate": 3.125898127627023e-06, "loss": 0.0023, "step": 121020 }, { "epoch": 1.980364885870899, "grad_norm": 0.03497200459241867, "learning_rate": 3.125015620374427e-06, "loss": 0.0007, "step": 121030 }, { "epoch": 1.980528511821975, "grad_norm": 0.03715066611766815, "learning_rate": 3.124133181085508e-06, "loss": 0.0008, "step": 121040 }, { "epoch": 1.9806921377730506, "grad_norm": 0.06147247552871704, "learning_rate": 3.123250809792253e-06, "loss": 0.0008, "step": 121050 }, { "epoch": 1.9808557637241266, "grad_norm": 0.08657212555408478, "learning_rate": 3.122368506526645e-06, "loss": 0.002, "step": 121060 }, { "epoch": 1.9810193896752026, "grad_norm": 0.07349998503923416, "learning_rate": 3.1214862713206663e-06, "loss": 0.0014, "step": 121070 }, { "epoch": 1.9811830156262782, "grad_norm": 0.08907697349786758, "learning_rate": 3.120604104206294e-06, "loss": 0.0008, "step": 121080 }, { "epoch": 1.9813466415773542, "grad_norm": 0.0578862726688385, "learning_rate": 3.11972200521551e-06, "loss": 0.0018, "step": 121090 }, { "epoch": 1.98151026752843, "grad_norm": 0.12811973690986633, "learning_rate": 3.118839974380281e-06, "loss": 0.0011, "step": 121100 }, { "epoch": 1.9816738934795057, "grad_norm": 0.0376611165702343, "learning_rate": 3.1179580117325817e-06, "loss": 0.0007, "step": 121110 }, { "epoch": 1.9818375194305817, "grad_norm": 0.03496914356946945, "learning_rate": 3.1170761173043816e-06, "loss": 0.001, "step": 121120 }, { "epoch": 1.9820011453816575, "grad_norm": 0.1055348739027977, "learning_rate": 3.1161942911276467e-06, "loss": 0.0011, "step": 121130 }, { "epoch": 1.9821647713327333, "grad_norm": 0.040344804525375366, "learning_rate": 3.11531253323434e-06, "loss": 0.0011, "step": 121140 }, { "epoch": 1.9823283972838093, "grad_norm": 0.02520809881389141, "learning_rate": 3.114430843656425e-06, "loss": 0.001, "step": 121150 }, { "epoch": 1.982492023234885, "grad_norm": 0.11748852580785751, "learning_rate": 3.11354922242586e-06, "loss": 0.0012, "step": 121160 }, { "epoch": 1.9826556491859608, "grad_norm": 0.23024903237819672, "learning_rate": 3.112667669574603e-06, "loss": 0.0016, "step": 121170 }, { "epoch": 1.9828192751370368, "grad_norm": 0.006447488442063332, "learning_rate": 3.1117861851346056e-06, "loss": 0.0007, "step": 121180 }, { "epoch": 1.9829829010881126, "grad_norm": 0.07881323248147964, "learning_rate": 3.110904769137823e-06, "loss": 0.0021, "step": 121190 }, { "epoch": 1.9831465270391884, "grad_norm": 0.12419156730175018, "learning_rate": 3.1100234216161995e-06, "loss": 0.0025, "step": 121200 }, { "epoch": 1.9833101529902644, "grad_norm": 0.015714025124907494, "learning_rate": 3.1091421426016854e-06, "loss": 0.0015, "step": 121210 }, { "epoch": 1.9834737789413401, "grad_norm": 0.06415874511003494, "learning_rate": 3.1082609321262237e-06, "loss": 0.0015, "step": 121220 }, { "epoch": 1.983637404892416, "grad_norm": 0.00596196623519063, "learning_rate": 3.1073797902217573e-06, "loss": 0.0021, "step": 121230 }, { "epoch": 1.983801030843492, "grad_norm": 0.08138204365968704, "learning_rate": 3.1064987169202245e-06, "loss": 0.0013, "step": 121240 }, { "epoch": 1.9839646567945675, "grad_norm": 0.10327907651662827, "learning_rate": 3.1056177122535613e-06, "loss": 0.0011, "step": 121250 }, { "epoch": 1.9841282827456435, "grad_norm": 0.09449491649866104, "learning_rate": 3.104736776253704e-06, "loss": 0.0016, "step": 121260 }, { "epoch": 1.9842919086967195, "grad_norm": 0.4243868887424469, "learning_rate": 3.103855908952583e-06, "loss": 0.003, "step": 121270 }, { "epoch": 1.984455534647795, "grad_norm": 0.10233750939369202, "learning_rate": 3.1029751103821286e-06, "loss": 0.0031, "step": 121280 }, { "epoch": 1.984619160598871, "grad_norm": 0.01858137920498848, "learning_rate": 3.1020943805742666e-06, "loss": 0.0011, "step": 121290 }, { "epoch": 1.9847827865499468, "grad_norm": 0.15394708514213562, "learning_rate": 3.1012137195609234e-06, "loss": 0.0027, "step": 121300 }, { "epoch": 1.9849464125010226, "grad_norm": 0.01425251830369234, "learning_rate": 3.1003331273740178e-06, "loss": 0.0007, "step": 121310 }, { "epoch": 1.9851100384520985, "grad_norm": 0.37392958998680115, "learning_rate": 3.09945260404547e-06, "loss": 0.0014, "step": 121320 }, { "epoch": 1.9852736644031743, "grad_norm": 0.1045062318444252, "learning_rate": 3.0985721496071985e-06, "loss": 0.0025, "step": 121330 }, { "epoch": 1.98543729035425, "grad_norm": 0.07641514390707016, "learning_rate": 3.097691764091116e-06, "loss": 0.001, "step": 121340 }, { "epoch": 1.985600916305326, "grad_norm": 0.08478570729494095, "learning_rate": 3.096811447529135e-06, "loss": 0.0009, "step": 121350 }, { "epoch": 1.9857645422564019, "grad_norm": 0.39634406566619873, "learning_rate": 3.0959311999531637e-06, "loss": 0.0022, "step": 121360 }, { "epoch": 1.9859281682074776, "grad_norm": 0.06185883283615112, "learning_rate": 3.095051021395111e-06, "loss": 0.001, "step": 121370 }, { "epoch": 1.9860917941585536, "grad_norm": 0.08347039669752121, "learning_rate": 3.094170911886881e-06, "loss": 0.0012, "step": 121380 }, { "epoch": 1.9862554201096294, "grad_norm": 0.031031904742121696, "learning_rate": 3.093290871460375e-06, "loss": 0.0009, "step": 121390 }, { "epoch": 1.9864190460607052, "grad_norm": 0.08313092589378357, "learning_rate": 3.092410900147494e-06, "loss": 0.0014, "step": 121400 }, { "epoch": 1.9865826720117812, "grad_norm": 0.0425654873251915, "learning_rate": 3.091530997980131e-06, "loss": 0.001, "step": 121410 }, { "epoch": 1.986746297962857, "grad_norm": 0.11481112986803055, "learning_rate": 3.0906511649901817e-06, "loss": 0.0017, "step": 121420 }, { "epoch": 1.9869099239139327, "grad_norm": 0.14661121368408203, "learning_rate": 3.0897714012095394e-06, "loss": 0.0021, "step": 121430 }, { "epoch": 1.9870735498650087, "grad_norm": 0.01511545293033123, "learning_rate": 3.088891706670092e-06, "loss": 0.0012, "step": 121440 }, { "epoch": 1.9872371758160843, "grad_norm": 0.007139799650758505, "learning_rate": 3.0880120814037277e-06, "loss": 0.0015, "step": 121450 }, { "epoch": 1.9874008017671603, "grad_norm": 0.10353057086467743, "learning_rate": 3.08713252544233e-06, "loss": 0.001, "step": 121460 }, { "epoch": 1.9875644277182363, "grad_norm": 0.04498282074928284, "learning_rate": 3.086253038817781e-06, "loss": 0.0006, "step": 121470 }, { "epoch": 1.9877280536693118, "grad_norm": 0.11789406836032867, "learning_rate": 3.0853736215619594e-06, "loss": 0.0016, "step": 121480 }, { "epoch": 1.9878916796203878, "grad_norm": 0.03144877776503563, "learning_rate": 3.084494273706743e-06, "loss": 0.0012, "step": 121490 }, { "epoch": 1.9880553055714636, "grad_norm": 0.055184394121170044, "learning_rate": 3.0836149952840054e-06, "loss": 0.0011, "step": 121500 }, { "epoch": 1.9882189315225394, "grad_norm": 0.10970151424407959, "learning_rate": 3.082735786325618e-06, "loss": 0.0013, "step": 121510 }, { "epoch": 1.9883825574736154, "grad_norm": 0.11161845177412033, "learning_rate": 3.0818566468634503e-06, "loss": 0.0013, "step": 121520 }, { "epoch": 1.9885461834246911, "grad_norm": 0.15732745826244354, "learning_rate": 3.0809775769293694e-06, "loss": 0.0007, "step": 121530 }, { "epoch": 1.988709809375767, "grad_norm": 0.07671280950307846, "learning_rate": 3.0800985765552384e-06, "loss": 0.0011, "step": 121540 }, { "epoch": 1.988873435326843, "grad_norm": 0.16346527636051178, "learning_rate": 3.0792196457729194e-06, "loss": 0.0007, "step": 121550 }, { "epoch": 1.9890370612779187, "grad_norm": 0.051224809139966965, "learning_rate": 3.078340784614272e-06, "loss": 0.0014, "step": 121560 }, { "epoch": 1.9892006872289945, "grad_norm": 0.03920578584074974, "learning_rate": 3.0774619931111534e-06, "loss": 0.0024, "step": 121570 }, { "epoch": 1.9893643131800705, "grad_norm": 0.14436499774456024, "learning_rate": 3.0765832712954145e-06, "loss": 0.0012, "step": 121580 }, { "epoch": 1.9895279391311462, "grad_norm": 0.06951668113470078, "learning_rate": 3.0757046191989117e-06, "loss": 0.0009, "step": 121590 }, { "epoch": 1.989691565082222, "grad_norm": 0.08532749861478806, "learning_rate": 3.0748260368534926e-06, "loss": 0.0019, "step": 121600 }, { "epoch": 1.989855191033298, "grad_norm": 0.04310256987810135, "learning_rate": 3.073947524291001e-06, "loss": 0.0014, "step": 121610 }, { "epoch": 1.9900188169843738, "grad_norm": 0.11547352373600006, "learning_rate": 3.0730690815432817e-06, "loss": 0.0013, "step": 121620 }, { "epoch": 1.9901824429354495, "grad_norm": 0.035640716552734375, "learning_rate": 3.0721907086421778e-06, "loss": 0.0018, "step": 121630 }, { "epoch": 1.9903460688865255, "grad_norm": 0.02316875010728836, "learning_rate": 3.0713124056195264e-06, "loss": 0.0022, "step": 121640 }, { "epoch": 1.990509694837601, "grad_norm": 0.025657478719949722, "learning_rate": 3.070434172507165e-06, "loss": 0.001, "step": 121650 }, { "epoch": 1.990673320788677, "grad_norm": 0.13552971184253693, "learning_rate": 3.069556009336928e-06, "loss": 0.0016, "step": 121660 }, { "epoch": 1.990836946739753, "grad_norm": 0.08910717815160751, "learning_rate": 3.0686779161406455e-06, "loss": 0.0008, "step": 121670 }, { "epoch": 1.9910005726908286, "grad_norm": 0.1187802329659462, "learning_rate": 3.0677998929501475e-06, "loss": 0.0015, "step": 121680 }, { "epoch": 1.9911641986419046, "grad_norm": 0.11421894282102585, "learning_rate": 3.066921939797258e-06, "loss": 0.0018, "step": 121690 }, { "epoch": 1.9913278245929804, "grad_norm": 0.0626273825764656, "learning_rate": 3.066044056713805e-06, "loss": 0.0009, "step": 121700 }, { "epoch": 1.9914914505440562, "grad_norm": 0.03288870304822922, "learning_rate": 3.0651662437316043e-06, "loss": 0.0013, "step": 121710 }, { "epoch": 1.9916550764951322, "grad_norm": 0.019862784072756767, "learning_rate": 3.0642885008824773e-06, "loss": 0.0013, "step": 121720 }, { "epoch": 1.991818702446208, "grad_norm": 0.21904274821281433, "learning_rate": 3.0634108281982405e-06, "loss": 0.0014, "step": 121730 }, { "epoch": 1.9919823283972837, "grad_norm": 0.07169432193040848, "learning_rate": 3.0625332257107064e-06, "loss": 0.0009, "step": 121740 }, { "epoch": 1.9921459543483597, "grad_norm": 0.09033267199993134, "learning_rate": 3.061655693451686e-06, "loss": 0.0024, "step": 121750 }, { "epoch": 1.9923095802994355, "grad_norm": 0.12846073508262634, "learning_rate": 3.060778231452988e-06, "loss": 0.0016, "step": 121760 }, { "epoch": 1.9924732062505113, "grad_norm": 0.05491194128990173, "learning_rate": 3.0599008397464185e-06, "loss": 0.0018, "step": 121770 }, { "epoch": 1.9926368322015873, "grad_norm": 0.11106289178133011, "learning_rate": 3.0590235183637806e-06, "loss": 0.0018, "step": 121780 }, { "epoch": 1.992800458152663, "grad_norm": 0.023505032062530518, "learning_rate": 3.0581462673368754e-06, "loss": 0.0014, "step": 121790 }, { "epoch": 1.9929640841037388, "grad_norm": 0.011300702579319477, "learning_rate": 3.057269086697502e-06, "loss": 0.001, "step": 121800 }, { "epoch": 1.9931277100548148, "grad_norm": 0.11059733480215073, "learning_rate": 3.056391976477453e-06, "loss": 0.0018, "step": 121810 }, { "epoch": 1.9932913360058904, "grad_norm": 0.11768092960119247, "learning_rate": 3.0555149367085246e-06, "loss": 0.0017, "step": 121820 }, { "epoch": 1.9934549619569664, "grad_norm": 0.1224488690495491, "learning_rate": 3.0546379674225057e-06, "loss": 0.0011, "step": 121830 }, { "epoch": 1.9936185879080424, "grad_norm": 0.051735639572143555, "learning_rate": 3.0537610686511855e-06, "loss": 0.0005, "step": 121840 }, { "epoch": 1.993782213859118, "grad_norm": 0.04027004912495613, "learning_rate": 3.052884240426348e-06, "loss": 0.0024, "step": 121850 }, { "epoch": 1.993945839810194, "grad_norm": 0.03983096405863762, "learning_rate": 3.0520074827797764e-06, "loss": 0.0011, "step": 121860 }, { "epoch": 1.9941094657612697, "grad_norm": 0.08986416459083557, "learning_rate": 3.0511307957432527e-06, "loss": 0.0011, "step": 121870 }, { "epoch": 1.9942730917123455, "grad_norm": 0.05776599422097206, "learning_rate": 3.0502541793485542e-06, "loss": 0.0013, "step": 121880 }, { "epoch": 1.9944367176634215, "grad_norm": 0.09268534928560257, "learning_rate": 3.049377633627455e-06, "loss": 0.0016, "step": 121890 }, { "epoch": 1.9946003436144972, "grad_norm": 0.13744103908538818, "learning_rate": 3.048501158611731e-06, "loss": 0.0019, "step": 121900 }, { "epoch": 1.994763969565573, "grad_norm": 0.11338288336992264, "learning_rate": 3.0476247543331453e-06, "loss": 0.0017, "step": 121910 }, { "epoch": 1.994927595516649, "grad_norm": 0.054671287536621094, "learning_rate": 3.046748420823472e-06, "loss": 0.0011, "step": 121920 }, { "epoch": 1.9950912214677248, "grad_norm": 0.17806117236614227, "learning_rate": 3.0458721581144735e-06, "loss": 0.0023, "step": 121930 }, { "epoch": 1.9952548474188005, "grad_norm": 0.15047022700309753, "learning_rate": 3.044995966237913e-06, "loss": 0.001, "step": 121940 }, { "epoch": 1.9954184733698765, "grad_norm": 0.13910718262195587, "learning_rate": 3.0441198452255497e-06, "loss": 0.001, "step": 121950 }, { "epoch": 1.9955820993209523, "grad_norm": 0.23290413618087769, "learning_rate": 3.043243795109142e-06, "loss": 0.002, "step": 121960 }, { "epoch": 1.995745725272028, "grad_norm": 0.01668221317231655, "learning_rate": 3.042367815920443e-06, "loss": 0.0004, "step": 121970 }, { "epoch": 1.995909351223104, "grad_norm": 0.0994178056716919, "learning_rate": 3.041491907691205e-06, "loss": 0.0011, "step": 121980 }, { "epoch": 1.9960729771741799, "grad_norm": 0.07157222926616669, "learning_rate": 3.0406160704531794e-06, "loss": 0.0006, "step": 121990 }, { "epoch": 1.9962366031252556, "grad_norm": 0.31447213888168335, "learning_rate": 3.0397403042381124e-06, "loss": 0.0012, "step": 122000 }, { "epoch": 1.9964002290763316, "grad_norm": 0.12848053872585297, "learning_rate": 3.0388646090777475e-06, "loss": 0.0015, "step": 122010 }, { "epoch": 1.9965638550274072, "grad_norm": 0.09544669836759567, "learning_rate": 3.037988985003826e-06, "loss": 0.0013, "step": 122020 }, { "epoch": 1.9967274809784832, "grad_norm": 0.09430322051048279, "learning_rate": 3.0371134320480896e-06, "loss": 0.0016, "step": 122030 }, { "epoch": 1.9968911069295592, "grad_norm": 0.13307379186153412, "learning_rate": 3.0362379502422722e-06, "loss": 0.0015, "step": 122040 }, { "epoch": 1.9970547328806347, "grad_norm": 0.14317603409290314, "learning_rate": 3.035362539618111e-06, "loss": 0.0019, "step": 122050 }, { "epoch": 1.9972183588317107, "grad_norm": 0.006431240122765303, "learning_rate": 3.0344872002073343e-06, "loss": 0.0008, "step": 122060 }, { "epoch": 1.9973819847827865, "grad_norm": 0.1079738661646843, "learning_rate": 3.0336119320416725e-06, "loss": 0.0028, "step": 122070 }, { "epoch": 1.9975456107338623, "grad_norm": 0.059948522597551346, "learning_rate": 3.0327367351528523e-06, "loss": 0.0015, "step": 122080 }, { "epoch": 1.9977092366849383, "grad_norm": 0.09824782609939575, "learning_rate": 3.031861609572598e-06, "loss": 0.0014, "step": 122090 }, { "epoch": 1.997872862636014, "grad_norm": 0.050781819969415665, "learning_rate": 3.0309865553326323e-06, "loss": 0.0011, "step": 122100 }, { "epoch": 1.9980364885870898, "grad_norm": 0.1278054267168045, "learning_rate": 3.030111572464669e-06, "loss": 0.0008, "step": 122110 }, { "epoch": 1.9982001145381658, "grad_norm": 0.08743195235729218, "learning_rate": 3.0292366610004265e-06, "loss": 0.0012, "step": 122120 }, { "epoch": 1.9983637404892416, "grad_norm": 0.03167599439620972, "learning_rate": 3.0283618209716176e-06, "loss": 0.0013, "step": 122130 }, { "epoch": 1.9985273664403174, "grad_norm": 0.1520116925239563, "learning_rate": 3.0274870524099553e-06, "loss": 0.0014, "step": 122140 }, { "epoch": 1.9986909923913934, "grad_norm": 0.044764839112758636, "learning_rate": 3.026612355347146e-06, "loss": 0.0012, "step": 122150 }, { "epoch": 1.9988546183424691, "grad_norm": 0.10647802799940109, "learning_rate": 3.025737729814896e-06, "loss": 0.0017, "step": 122160 }, { "epoch": 1.999018244293545, "grad_norm": 0.022934338077902794, "learning_rate": 3.0248631758449086e-06, "loss": 0.0014, "step": 122170 }, { "epoch": 1.999181870244621, "grad_norm": 0.05098085105419159, "learning_rate": 3.0239886934688845e-06, "loss": 0.0026, "step": 122180 }, { "epoch": 1.9993454961956967, "grad_norm": 0.031086700037121773, "learning_rate": 3.02311428271852e-06, "loss": 0.0011, "step": 122190 }, { "epoch": 1.9995091221467725, "grad_norm": 0.1163347065448761, "learning_rate": 3.022239943625513e-06, "loss": 0.0011, "step": 122200 }, { "epoch": 1.9996727480978485, "grad_norm": 0.07484007626771927, "learning_rate": 3.0213656762215537e-06, "loss": 0.0013, "step": 122210 }, { "epoch": 1.999836374048924, "grad_norm": 0.05343592166900635, "learning_rate": 3.0204914805383324e-06, "loss": 0.0007, "step": 122220 }, { "epoch": 2.0, "grad_norm": 0.007802479900419712, "learning_rate": 3.0196173566075383e-06, "loss": 0.0007, "step": 122230 }, { "epoch": 2.000163625951076, "grad_norm": 0.039337046444416046, "learning_rate": 3.0187433044608545e-06, "loss": 0.0008, "step": 122240 }, { "epoch": 2.0003272519021515, "grad_norm": 0.07899314165115356, "learning_rate": 3.017869324129964e-06, "loss": 0.0011, "step": 122250 }, { "epoch": 2.0004908778532275, "grad_norm": 0.12674546241760254, "learning_rate": 3.0169954156465475e-06, "loss": 0.0009, "step": 122260 }, { "epoch": 2.0006545038043035, "grad_norm": 0.032715678215026855, "learning_rate": 3.0161215790422803e-06, "loss": 0.0012, "step": 122270 }, { "epoch": 2.000818129755379, "grad_norm": 0.10841579735279083, "learning_rate": 3.015247814348838e-06, "loss": 0.0016, "step": 122280 }, { "epoch": 2.000981755706455, "grad_norm": 0.07675003260374069, "learning_rate": 3.0143741215978917e-06, "loss": 0.001, "step": 122290 }, { "epoch": 2.001145381657531, "grad_norm": 0.004280860535800457, "learning_rate": 3.013500500821114e-06, "loss": 0.0014, "step": 122300 }, { "epoch": 2.0013090076086066, "grad_norm": 0.06504587084054947, "learning_rate": 3.012626952050166e-06, "loss": 0.0011, "step": 122310 }, { "epoch": 2.0014726335596826, "grad_norm": 0.033733267337083817, "learning_rate": 3.0117534753167145e-06, "loss": 0.0009, "step": 122320 }, { "epoch": 2.001636259510758, "grad_norm": 0.03392750024795532, "learning_rate": 3.010880070652422e-06, "loss": 0.0031, "step": 122330 }, { "epoch": 2.001799885461834, "grad_norm": 0.06573561578989029, "learning_rate": 3.0100067380889453e-06, "loss": 0.0009, "step": 122340 }, { "epoch": 2.00196351141291, "grad_norm": 0.10168496519327164, "learning_rate": 3.009133477657941e-06, "loss": 0.0011, "step": 122350 }, { "epoch": 2.0021271373639857, "grad_norm": 0.0023911609314382076, "learning_rate": 3.0082602893910636e-06, "loss": 0.0008, "step": 122360 }, { "epoch": 2.0022907633150617, "grad_norm": 0.12138720601797104, "learning_rate": 3.007387173319964e-06, "loss": 0.0013, "step": 122370 }, { "epoch": 2.0024543892661377, "grad_norm": 0.09041742235422134, "learning_rate": 3.0065141294762903e-06, "loss": 0.0006, "step": 122380 }, { "epoch": 2.0026180152172133, "grad_norm": 0.12582673132419586, "learning_rate": 3.005641157891688e-06, "loss": 0.001, "step": 122390 }, { "epoch": 2.0027816411682893, "grad_norm": 0.03776866942644119, "learning_rate": 3.0047682585978023e-06, "loss": 0.0013, "step": 122400 }, { "epoch": 2.0029452671193653, "grad_norm": 0.1255517154932022, "learning_rate": 3.0038954316262693e-06, "loss": 0.0008, "step": 122410 }, { "epoch": 2.003108893070441, "grad_norm": 0.0556458905339241, "learning_rate": 3.0030226770087302e-06, "loss": 0.0005, "step": 122420 }, { "epoch": 2.003272519021517, "grad_norm": 0.045946069061756134, "learning_rate": 3.0021499947768197e-06, "loss": 0.0008, "step": 122430 }, { "epoch": 2.003436144972593, "grad_norm": 0.11650247871875763, "learning_rate": 3.0012773849621694e-06, "loss": 0.0007, "step": 122440 }, { "epoch": 2.0035997709236684, "grad_norm": 0.029856454581022263, "learning_rate": 3.0004048475964107e-06, "loss": 0.0011, "step": 122450 }, { "epoch": 2.0037633968747444, "grad_norm": 0.11715023964643478, "learning_rate": 2.99953238271117e-06, "loss": 0.0018, "step": 122460 }, { "epoch": 2.0039270228258204, "grad_norm": 0.0032879426144063473, "learning_rate": 2.998659990338073e-06, "loss": 0.001, "step": 122470 }, { "epoch": 2.004090648776896, "grad_norm": 0.026977650821208954, "learning_rate": 2.997787670508741e-06, "loss": 0.0016, "step": 122480 }, { "epoch": 2.004254274727972, "grad_norm": 0.029142964631319046, "learning_rate": 2.9969154232547936e-06, "loss": 0.0006, "step": 122490 }, { "epoch": 2.004417900679048, "grad_norm": 0.04164513945579529, "learning_rate": 2.9960432486078494e-06, "loss": 0.0008, "step": 122500 }, { "epoch": 2.0045815266301235, "grad_norm": 0.08730745315551758, "learning_rate": 2.995171146599519e-06, "loss": 0.002, "step": 122510 }, { "epoch": 2.0047451525811995, "grad_norm": 0.01673181727528572, "learning_rate": 2.9942991172614168e-06, "loss": 0.0006, "step": 122520 }, { "epoch": 2.004908778532275, "grad_norm": 0.016155635938048363, "learning_rate": 2.993427160625151e-06, "loss": 0.0016, "step": 122530 }, { "epoch": 2.005072404483351, "grad_norm": 0.011071273125708103, "learning_rate": 2.9925552767223276e-06, "loss": 0.0008, "step": 122540 }, { "epoch": 2.005236030434427, "grad_norm": 0.023952504619956017, "learning_rate": 2.991683465584551e-06, "loss": 0.0012, "step": 122550 }, { "epoch": 2.0053996563855025, "grad_norm": 0.10510613769292831, "learning_rate": 2.990811727243422e-06, "loss": 0.0006, "step": 122560 }, { "epoch": 2.0055632823365785, "grad_norm": 0.16675055027008057, "learning_rate": 2.9899400617305374e-06, "loss": 0.0007, "step": 122570 }, { "epoch": 2.0057269082876545, "grad_norm": 0.05483875051140785, "learning_rate": 2.989068469077496e-06, "loss": 0.001, "step": 122580 }, { "epoch": 2.00589053423873, "grad_norm": 0.05115516111254692, "learning_rate": 2.988196949315889e-06, "loss": 0.0024, "step": 122590 }, { "epoch": 2.006054160189806, "grad_norm": 0.0855933129787445, "learning_rate": 2.9873255024773096e-06, "loss": 0.0012, "step": 122600 }, { "epoch": 2.006217786140882, "grad_norm": 0.10746615380048752, "learning_rate": 2.9864541285933417e-06, "loss": 0.0013, "step": 122610 }, { "epoch": 2.0063814120919576, "grad_norm": 0.0017163899028673768, "learning_rate": 2.9855828276955724e-06, "loss": 0.0006, "step": 122620 }, { "epoch": 2.0065450380430336, "grad_norm": 0.10305550694465637, "learning_rate": 2.9847115998155824e-06, "loss": 0.0006, "step": 122630 }, { "epoch": 2.0067086639941096, "grad_norm": 0.04796452447772026, "learning_rate": 2.9838404449849544e-06, "loss": 0.0006, "step": 122640 }, { "epoch": 2.006872289945185, "grad_norm": 0.120184026658535, "learning_rate": 2.9829693632352647e-06, "loss": 0.0008, "step": 122650 }, { "epoch": 2.007035915896261, "grad_norm": 0.00197650701738894, "learning_rate": 2.9820983545980874e-06, "loss": 0.0007, "step": 122660 }, { "epoch": 2.007199541847337, "grad_norm": 0.18119893968105316, "learning_rate": 2.9812274191049952e-06, "loss": 0.0007, "step": 122670 }, { "epoch": 2.0073631677984127, "grad_norm": 0.3269008696079254, "learning_rate": 2.980356556787557e-06, "loss": 0.0017, "step": 122680 }, { "epoch": 2.0075267937494887, "grad_norm": 0.10589903593063354, "learning_rate": 2.97948576767734e-06, "loss": 0.0007, "step": 122690 }, { "epoch": 2.0076904197005647, "grad_norm": 0.0075447214767336845, "learning_rate": 2.978615051805908e-06, "loss": 0.0008, "step": 122700 }, { "epoch": 2.0078540456516403, "grad_norm": 0.16812700033187866, "learning_rate": 2.9777444092048214e-06, "loss": 0.0009, "step": 122710 }, { "epoch": 2.0080176716027163, "grad_norm": 0.4088684320449829, "learning_rate": 2.9768738399056395e-06, "loss": 0.0011, "step": 122720 }, { "epoch": 2.008181297553792, "grad_norm": 0.044109005481004715, "learning_rate": 2.976003343939919e-06, "loss": 0.0006, "step": 122730 }, { "epoch": 2.008344923504868, "grad_norm": 0.07573593407869339, "learning_rate": 2.975132921339212e-06, "loss": 0.0008, "step": 122740 }, { "epoch": 2.008508549455944, "grad_norm": 0.10288336127996445, "learning_rate": 2.9742625721350702e-06, "loss": 0.001, "step": 122750 }, { "epoch": 2.0086721754070194, "grad_norm": 0.05118601769208908, "learning_rate": 2.973392296359042e-06, "loss": 0.0013, "step": 122760 }, { "epoch": 2.0088358013580954, "grad_norm": 0.010381937958300114, "learning_rate": 2.972522094042671e-06, "loss": 0.0005, "step": 122770 }, { "epoch": 2.0089994273091714, "grad_norm": 0.05149473249912262, "learning_rate": 2.9716519652175024e-06, "loss": 0.0007, "step": 122780 }, { "epoch": 2.009163053260247, "grad_norm": 0.1264180690050125, "learning_rate": 2.9707819099150737e-06, "loss": 0.0024, "step": 122790 }, { "epoch": 2.009326679211323, "grad_norm": 0.03264115750789642, "learning_rate": 2.9699119281669263e-06, "loss": 0.0007, "step": 122800 }, { "epoch": 2.009490305162399, "grad_norm": 0.03596828505396843, "learning_rate": 2.9690420200045903e-06, "loss": 0.0018, "step": 122810 }, { "epoch": 2.0096539311134745, "grad_norm": 0.030557796359062195, "learning_rate": 2.9681721854596e-06, "loss": 0.0006, "step": 122820 }, { "epoch": 2.0098175570645505, "grad_norm": 0.028343254700303078, "learning_rate": 2.967302424563485e-06, "loss": 0.0007, "step": 122830 }, { "epoch": 2.0099811830156264, "grad_norm": 0.004581385292112827, "learning_rate": 2.9664327373477707e-06, "loss": 0.0008, "step": 122840 }, { "epoch": 2.010144808966702, "grad_norm": 0.049201253801584244, "learning_rate": 2.965563123843982e-06, "loss": 0.0005, "step": 122850 }, { "epoch": 2.010308434917778, "grad_norm": 0.11184683442115784, "learning_rate": 2.964693584083641e-06, "loss": 0.0006, "step": 122860 }, { "epoch": 2.010472060868854, "grad_norm": 0.11916273087263107, "learning_rate": 2.9638241180982663e-06, "loss": 0.0009, "step": 122870 }, { "epoch": 2.0106356868199295, "grad_norm": 0.045228488743305206, "learning_rate": 2.9629547259193736e-06, "loss": 0.0006, "step": 122880 }, { "epoch": 2.0107993127710055, "grad_norm": 0.11494050174951553, "learning_rate": 2.9620854075784753e-06, "loss": 0.0015, "step": 122890 }, { "epoch": 2.0109629387220815, "grad_norm": 0.01775909960269928, "learning_rate": 2.9612161631070857e-06, "loss": 0.0009, "step": 122900 }, { "epoch": 2.011126564673157, "grad_norm": 0.06603578478097916, "learning_rate": 2.960346992536707e-06, "loss": 0.001, "step": 122910 }, { "epoch": 2.011290190624233, "grad_norm": 0.1089678555727005, "learning_rate": 2.959477895898848e-06, "loss": 0.0016, "step": 122920 }, { "epoch": 2.0114538165753086, "grad_norm": 0.6310131549835205, "learning_rate": 2.9586088732250116e-06, "loss": 0.0175, "step": 122930 }, { "epoch": 2.0116174425263846, "grad_norm": 0.0440681166946888, "learning_rate": 2.957739924546697e-06, "loss": 0.0011, "step": 122940 }, { "epoch": 2.0117810684774606, "grad_norm": 0.017576299607753754, "learning_rate": 2.956871049895402e-06, "loss": 0.0031, "step": 122950 }, { "epoch": 2.011944694428536, "grad_norm": 0.0920783281326294, "learning_rate": 2.956002249302621e-06, "loss": 0.0005, "step": 122960 }, { "epoch": 2.012108320379612, "grad_norm": 0.2482539713382721, "learning_rate": 2.9551335227998453e-06, "loss": 0.0008, "step": 122970 }, { "epoch": 2.012271946330688, "grad_norm": 0.11471476405858994, "learning_rate": 2.9542648704185652e-06, "loss": 0.0009, "step": 122980 }, { "epoch": 2.0124355722817637, "grad_norm": 0.1749299168586731, "learning_rate": 2.953396292190266e-06, "loss": 0.0017, "step": 122990 }, { "epoch": 2.0125991982328397, "grad_norm": 0.06950230151414871, "learning_rate": 2.952527788146434e-06, "loss": 0.0006, "step": 123000 }, { "epoch": 2.0127628241839157, "grad_norm": 0.014681133441627026, "learning_rate": 2.9516593583185473e-06, "loss": 0.0013, "step": 123010 }, { "epoch": 2.0129264501349913, "grad_norm": 0.04077136144042015, "learning_rate": 2.950791002738086e-06, "loss": 0.0006, "step": 123020 }, { "epoch": 2.0130900760860673, "grad_norm": 0.04269719496369362, "learning_rate": 2.9499227214365257e-06, "loss": 0.0006, "step": 123030 }, { "epoch": 2.0132537020371433, "grad_norm": 0.08492803573608398, "learning_rate": 2.9490545144453397e-06, "loss": 0.0006, "step": 123040 }, { "epoch": 2.013417327988219, "grad_norm": 0.05451168119907379, "learning_rate": 2.9481863817959976e-06, "loss": 0.0011, "step": 123050 }, { "epoch": 2.013580953939295, "grad_norm": 0.039799947291612625, "learning_rate": 2.9473183235199688e-06, "loss": 0.0013, "step": 123060 }, { "epoch": 2.013744579890371, "grad_norm": 0.08183470368385315, "learning_rate": 2.9464503396487155e-06, "loss": 0.0007, "step": 123070 }, { "epoch": 2.0139082058414464, "grad_norm": 0.10906807333230972, "learning_rate": 2.9455824302137024e-06, "loss": 0.0008, "step": 123080 }, { "epoch": 2.0140718317925224, "grad_norm": 0.0852169394493103, "learning_rate": 2.9447145952463897e-06, "loss": 0.0014, "step": 123090 }, { "epoch": 2.014235457743598, "grad_norm": 0.07407552003860474, "learning_rate": 2.9438468347782343e-06, "loss": 0.0008, "step": 123100 }, { "epoch": 2.014399083694674, "grad_norm": 0.0034440841991454363, "learning_rate": 2.9429791488406876e-06, "loss": 0.0012, "step": 123110 }, { "epoch": 2.01456270964575, "grad_norm": 0.02655049040913582, "learning_rate": 2.942111537465202e-06, "loss": 0.0008, "step": 123120 }, { "epoch": 2.0147263355968255, "grad_norm": 0.0492183193564415, "learning_rate": 2.9412440006832276e-06, "loss": 0.0006, "step": 123130 }, { "epoch": 2.0148899615479015, "grad_norm": 0.1544003188610077, "learning_rate": 2.940376538526211e-06, "loss": 0.0003, "step": 123140 }, { "epoch": 2.0150535874989774, "grad_norm": 0.04998074844479561, "learning_rate": 2.939509151025594e-06, "loss": 0.0006, "step": 123150 }, { "epoch": 2.015217213450053, "grad_norm": 0.0605974905192852, "learning_rate": 2.9386418382128184e-06, "loss": 0.0007, "step": 123160 }, { "epoch": 2.015380839401129, "grad_norm": 0.05646251514554024, "learning_rate": 2.9377746001193218e-06, "loss": 0.001, "step": 123170 }, { "epoch": 2.015544465352205, "grad_norm": 0.1739867627620697, "learning_rate": 2.936907436776539e-06, "loss": 0.0016, "step": 123180 }, { "epoch": 2.0157080913032805, "grad_norm": 0.17260171473026276, "learning_rate": 2.936040348215904e-06, "loss": 0.0023, "step": 123190 }, { "epoch": 2.0158717172543565, "grad_norm": 0.022806884720921516, "learning_rate": 2.935173334468846e-06, "loss": 0.0006, "step": 123200 }, { "epoch": 2.0160353432054325, "grad_norm": 0.0752556324005127, "learning_rate": 2.9343063955667913e-06, "loss": 0.0013, "step": 123210 }, { "epoch": 2.016198969156508, "grad_norm": 0.2098541110754013, "learning_rate": 2.9334395315411644e-06, "loss": 0.0008, "step": 123220 }, { "epoch": 2.016362595107584, "grad_norm": 0.05458023026585579, "learning_rate": 2.9325727424233887e-06, "loss": 0.0011, "step": 123230 }, { "epoch": 2.01652622105866, "grad_norm": 0.02899891696870327, "learning_rate": 2.9317060282448815e-06, "loss": 0.0006, "step": 123240 }, { "epoch": 2.0166898470097356, "grad_norm": 0.044367577880620956, "learning_rate": 2.9308393890370602e-06, "loss": 0.0009, "step": 123250 }, { "epoch": 2.0168534729608116, "grad_norm": 0.05011260136961937, "learning_rate": 2.9299728248313374e-06, "loss": 0.0005, "step": 123260 }, { "epoch": 2.0170170989118876, "grad_norm": 0.0861293151974678, "learning_rate": 2.9291063356591245e-06, "loss": 0.0009, "step": 123270 }, { "epoch": 2.017180724862963, "grad_norm": 0.07861240208148956, "learning_rate": 2.928239921551829e-06, "loss": 0.0013, "step": 123280 }, { "epoch": 2.017344350814039, "grad_norm": 0.12659820914268494, "learning_rate": 2.927373582540858e-06, "loss": 0.0013, "step": 123290 }, { "epoch": 2.0175079767651147, "grad_norm": 0.11719324439764023, "learning_rate": 2.926507318657613e-06, "loss": 0.0024, "step": 123300 }, { "epoch": 2.0176716027161907, "grad_norm": 0.013598169200122356, "learning_rate": 2.9256411299334964e-06, "loss": 0.0006, "step": 123310 }, { "epoch": 2.0178352286672667, "grad_norm": 0.00677691213786602, "learning_rate": 2.924775016399901e-06, "loss": 0.0009, "step": 123320 }, { "epoch": 2.0179988546183423, "grad_norm": 0.07832401990890503, "learning_rate": 2.9239089780882234e-06, "loss": 0.0008, "step": 123330 }, { "epoch": 2.0181624805694183, "grad_norm": 0.11921148002147675, "learning_rate": 2.923043015029855e-06, "loss": 0.0009, "step": 123340 }, { "epoch": 2.0183261065204943, "grad_norm": 0.10726704448461533, "learning_rate": 2.9221771272561863e-06, "loss": 0.0013, "step": 123350 }, { "epoch": 2.01848973247157, "grad_norm": 0.32264629006385803, "learning_rate": 2.9213113147986026e-06, "loss": 0.0012, "step": 123360 }, { "epoch": 2.018653358422646, "grad_norm": 0.3090819716453552, "learning_rate": 2.9204455776884888e-06, "loss": 0.0007, "step": 123370 }, { "epoch": 2.018816984373722, "grad_norm": 0.010182442143559456, "learning_rate": 2.9195799159572235e-06, "loss": 0.0007, "step": 123380 }, { "epoch": 2.0189806103247974, "grad_norm": 0.09297315031290054, "learning_rate": 2.9187143296361864e-06, "loss": 0.0009, "step": 123390 }, { "epoch": 2.0191442362758734, "grad_norm": 0.12436174601316452, "learning_rate": 2.917848818756752e-06, "loss": 0.0016, "step": 123400 }, { "epoch": 2.0193078622269494, "grad_norm": 0.04419776052236557, "learning_rate": 2.9169833833502974e-06, "loss": 0.001, "step": 123410 }, { "epoch": 2.019471488178025, "grad_norm": 0.11890144646167755, "learning_rate": 2.9161180234481844e-06, "loss": 0.0018, "step": 123420 }, { "epoch": 2.019635114129101, "grad_norm": 0.19173476099967957, "learning_rate": 2.9152527390817874e-06, "loss": 0.0011, "step": 123430 }, { "epoch": 2.019798740080177, "grad_norm": 0.03202402964234352, "learning_rate": 2.9143875302824655e-06, "loss": 0.0008, "step": 123440 }, { "epoch": 2.0199623660312525, "grad_norm": 0.45920664072036743, "learning_rate": 2.913522397081583e-06, "loss": 0.001, "step": 123450 }, { "epoch": 2.0201259919823285, "grad_norm": 0.09136476367712021, "learning_rate": 2.912657339510501e-06, "loss": 0.0009, "step": 123460 }, { "epoch": 2.0202896179334044, "grad_norm": 0.2168736308813095, "learning_rate": 2.9117923576005713e-06, "loss": 0.0017, "step": 123470 }, { "epoch": 2.02045324388448, "grad_norm": 0.023497063666582108, "learning_rate": 2.910927451383152e-06, "loss": 0.0011, "step": 123480 }, { "epoch": 2.020616869835556, "grad_norm": 0.05596434324979782, "learning_rate": 2.91006262088959e-06, "loss": 0.0012, "step": 123490 }, { "epoch": 2.0207804957866315, "grad_norm": 0.10114511847496033, "learning_rate": 2.9091978661512366e-06, "loss": 0.0015, "step": 123500 }, { "epoch": 2.0209441217377075, "grad_norm": 0.03261394053697586, "learning_rate": 2.908333187199436e-06, "loss": 0.0011, "step": 123510 }, { "epoch": 2.0211077476887835, "grad_norm": 0.0462544783949852, "learning_rate": 2.907468584065527e-06, "loss": 0.0014, "step": 123520 }, { "epoch": 2.021271373639859, "grad_norm": 0.5521852970123291, "learning_rate": 2.9066040567808553e-06, "loss": 0.0005, "step": 123530 }, { "epoch": 2.021434999590935, "grad_norm": 0.06335219740867615, "learning_rate": 2.905739605376753e-06, "loss": 0.0013, "step": 123540 }, { "epoch": 2.021598625542011, "grad_norm": 0.043145306408405304, "learning_rate": 2.904875229884559e-06, "loss": 0.0006, "step": 123550 }, { "epoch": 2.0217622514930866, "grad_norm": 0.07603517919778824, "learning_rate": 2.9040109303356e-06, "loss": 0.0008, "step": 123560 }, { "epoch": 2.0219258774441626, "grad_norm": 0.06110682338476181, "learning_rate": 2.9031467067612095e-06, "loss": 0.0007, "step": 123570 }, { "epoch": 2.0220895033952386, "grad_norm": 0.04667156934738159, "learning_rate": 2.9022825591927086e-06, "loss": 0.0008, "step": 123580 }, { "epoch": 2.022253129346314, "grad_norm": 0.06395115703344345, "learning_rate": 2.901418487661426e-06, "loss": 0.0009, "step": 123590 }, { "epoch": 2.02241675529739, "grad_norm": 0.050273597240448, "learning_rate": 2.9005544921986774e-06, "loss": 0.0009, "step": 123600 }, { "epoch": 2.022580381248466, "grad_norm": 0.06157859414815903, "learning_rate": 2.899690572835784e-06, "loss": 0.0004, "step": 123610 }, { "epoch": 2.0227440071995417, "grad_norm": 0.11917011439800262, "learning_rate": 2.8988267296040594e-06, "loss": 0.0013, "step": 123620 }, { "epoch": 2.0229076331506177, "grad_norm": 0.011408847756683826, "learning_rate": 2.8979629625348138e-06, "loss": 0.0005, "step": 123630 }, { "epoch": 2.0230712591016937, "grad_norm": 0.22224397957324982, "learning_rate": 2.897099271659361e-06, "loss": 0.0012, "step": 123640 }, { "epoch": 2.0232348850527693, "grad_norm": 0.0042869048193097115, "learning_rate": 2.8962356570090035e-06, "loss": 0.0011, "step": 123650 }, { "epoch": 2.0233985110038453, "grad_norm": 0.059125710278749466, "learning_rate": 2.8953721186150485e-06, "loss": 0.0011, "step": 123660 }, { "epoch": 2.0235621369549213, "grad_norm": 0.01862114667892456, "learning_rate": 2.894508656508794e-06, "loss": 0.0008, "step": 123670 }, { "epoch": 2.023725762905997, "grad_norm": 0.08622175455093384, "learning_rate": 2.893645270721541e-06, "loss": 0.0018, "step": 123680 }, { "epoch": 2.023889388857073, "grad_norm": 0.01131894439458847, "learning_rate": 2.8927819612845863e-06, "loss": 0.001, "step": 123690 }, { "epoch": 2.0240530148081484, "grad_norm": 0.10598060488700867, "learning_rate": 2.891918728229219e-06, "loss": 0.0007, "step": 123700 }, { "epoch": 2.0242166407592244, "grad_norm": 0.08070161193609238, "learning_rate": 2.891055571586735e-06, "loss": 0.0006, "step": 123710 }, { "epoch": 2.0243802667103004, "grad_norm": 0.05542897433042526, "learning_rate": 2.890192491388413e-06, "loss": 0.0012, "step": 123720 }, { "epoch": 2.024543892661376, "grad_norm": 0.0634867325425148, "learning_rate": 2.889329487665543e-06, "loss": 0.0007, "step": 123730 }, { "epoch": 2.024707518612452, "grad_norm": 0.02125907689332962, "learning_rate": 2.8884665604494088e-06, "loss": 0.0012, "step": 123740 }, { "epoch": 2.024871144563528, "grad_norm": 0.07873150706291199, "learning_rate": 2.887603709771284e-06, "loss": 0.0007, "step": 123750 }, { "epoch": 2.0250347705146035, "grad_norm": 0.133035808801651, "learning_rate": 2.8867409356624503e-06, "loss": 0.0004, "step": 123760 }, { "epoch": 2.0251983964656795, "grad_norm": 0.05324627831578255, "learning_rate": 2.8858782381541763e-06, "loss": 0.0006, "step": 123770 }, { "epoch": 2.0253620224167554, "grad_norm": 0.10550583153963089, "learning_rate": 2.885015617277737e-06, "loss": 0.0009, "step": 123780 }, { "epoch": 2.025525648367831, "grad_norm": 0.10863450914621353, "learning_rate": 2.8841530730643963e-06, "loss": 0.0008, "step": 123790 }, { "epoch": 2.025689274318907, "grad_norm": 0.10028284043073654, "learning_rate": 2.8832906055454237e-06, "loss": 0.0012, "step": 123800 }, { "epoch": 2.025852900269983, "grad_norm": 0.17284740507602692, "learning_rate": 2.8824282147520786e-06, "loss": 0.0007, "step": 123810 }, { "epoch": 2.0260165262210585, "grad_norm": 0.06500721722841263, "learning_rate": 2.8815659007156205e-06, "loss": 0.001, "step": 123820 }, { "epoch": 2.0261801521721345, "grad_norm": 0.05808574706315994, "learning_rate": 2.880703663467308e-06, "loss": 0.0004, "step": 123830 }, { "epoch": 2.0263437781232105, "grad_norm": 0.09927265346050262, "learning_rate": 2.879841503038392e-06, "loss": 0.0006, "step": 123840 }, { "epoch": 2.026507404074286, "grad_norm": 0.024597322568297386, "learning_rate": 2.8789794194601285e-06, "loss": 0.0019, "step": 123850 }, { "epoch": 2.026671030025362, "grad_norm": 0.058953169733285904, "learning_rate": 2.8781174127637612e-06, "loss": 0.0008, "step": 123860 }, { "epoch": 2.026834655976438, "grad_norm": 0.09125494956970215, "learning_rate": 2.8772554829805394e-06, "loss": 0.0011, "step": 123870 }, { "epoch": 2.0269982819275136, "grad_norm": 0.06706090271472931, "learning_rate": 2.8763936301417028e-06, "loss": 0.0009, "step": 123880 }, { "epoch": 2.0271619078785896, "grad_norm": 0.11271265149116516, "learning_rate": 2.8755318542784925e-06, "loss": 0.001, "step": 123890 }, { "epoch": 2.027325533829665, "grad_norm": 0.05396962910890579, "learning_rate": 2.8746701554221494e-06, "loss": 0.0007, "step": 123900 }, { "epoch": 2.027489159780741, "grad_norm": 0.07635393738746643, "learning_rate": 2.8738085336039047e-06, "loss": 0.0012, "step": 123910 }, { "epoch": 2.027652785731817, "grad_norm": 0.1010383889079094, "learning_rate": 2.8729469888549906e-06, "loss": 0.0006, "step": 123920 }, { "epoch": 2.0278164116828927, "grad_norm": 0.04615742713212967, "learning_rate": 2.8720855212066336e-06, "loss": 0.0009, "step": 123930 }, { "epoch": 2.0279800376339687, "grad_norm": 0.07685250788927078, "learning_rate": 2.8712241306900644e-06, "loss": 0.001, "step": 123940 }, { "epoch": 2.0281436635850447, "grad_norm": 0.056674107909202576, "learning_rate": 2.8703628173365022e-06, "loss": 0.0015, "step": 123950 }, { "epoch": 2.0283072895361203, "grad_norm": 0.043701887130737305, "learning_rate": 2.869501581177169e-06, "loss": 0.0008, "step": 123960 }, { "epoch": 2.0284709154871963, "grad_norm": 0.06434541195631027, "learning_rate": 2.868640422243285e-06, "loss": 0.0017, "step": 123970 }, { "epoch": 2.0286345414382723, "grad_norm": 0.15282042324543, "learning_rate": 2.8677793405660614e-06, "loss": 0.0006, "step": 123980 }, { "epoch": 2.028798167389348, "grad_norm": 0.08339986950159073, "learning_rate": 2.866918336176714e-06, "loss": 0.0016, "step": 123990 }, { "epoch": 2.028961793340424, "grad_norm": 0.009265325032174587, "learning_rate": 2.8660574091064487e-06, "loss": 0.0007, "step": 124000 }, { "epoch": 2.0291254192915, "grad_norm": 0.006840995047241449, "learning_rate": 2.865196559386475e-06, "loss": 0.0009, "step": 124010 }, { "epoch": 2.0292890452425754, "grad_norm": 0.00866103358566761, "learning_rate": 2.8643357870479947e-06, "loss": 0.0004, "step": 124020 }, { "epoch": 2.0294526711936514, "grad_norm": 0.08059342205524445, "learning_rate": 2.863475092122208e-06, "loss": 0.0014, "step": 124030 }, { "epoch": 2.0296162971447274, "grad_norm": 0.05488971620798111, "learning_rate": 2.8626144746403155e-06, "loss": 0.0019, "step": 124040 }, { "epoch": 2.029779923095803, "grad_norm": 0.13141994178295135, "learning_rate": 2.8617539346335097e-06, "loss": 0.0011, "step": 124050 }, { "epoch": 2.029943549046879, "grad_norm": 0.09598705172538757, "learning_rate": 2.8608934721329863e-06, "loss": 0.0006, "step": 124060 }, { "epoch": 2.0301071749979545, "grad_norm": 0.0666499063372612, "learning_rate": 2.8600330871699317e-06, "loss": 0.0008, "step": 124070 }, { "epoch": 2.0302708009490305, "grad_norm": 0.12923429906368256, "learning_rate": 2.8591727797755363e-06, "loss": 0.001, "step": 124080 }, { "epoch": 2.0304344269001064, "grad_norm": 0.08453816920518875, "learning_rate": 2.8583125499809805e-06, "loss": 0.0008, "step": 124090 }, { "epoch": 2.030598052851182, "grad_norm": 0.051464490592479706, "learning_rate": 2.8574523978174484e-06, "loss": 0.0007, "step": 124100 }, { "epoch": 2.030761678802258, "grad_norm": 0.13468940556049347, "learning_rate": 2.8565923233161187e-06, "loss": 0.0006, "step": 124110 }, { "epoch": 2.030925304753334, "grad_norm": 0.060485709458589554, "learning_rate": 2.855732326508163e-06, "loss": 0.0009, "step": 124120 }, { "epoch": 2.0310889307044095, "grad_norm": 0.0032297370489686728, "learning_rate": 2.854872407424759e-06, "loss": 0.0011, "step": 124130 }, { "epoch": 2.0312525566554855, "grad_norm": 0.06342637538909912, "learning_rate": 2.854012566097072e-06, "loss": 0.0008, "step": 124140 }, { "epoch": 2.0314161826065615, "grad_norm": 0.06412553042173386, "learning_rate": 2.8531528025562742e-06, "loss": 0.0011, "step": 124150 }, { "epoch": 2.031579808557637, "grad_norm": 0.12065336853265762, "learning_rate": 2.8522931168335255e-06, "loss": 0.0008, "step": 124160 }, { "epoch": 2.031743434508713, "grad_norm": 0.09229922294616699, "learning_rate": 2.85143350895999e-06, "loss": 0.0006, "step": 124170 }, { "epoch": 2.031907060459789, "grad_norm": 0.5208779573440552, "learning_rate": 2.8505739789668272e-06, "loss": 0.0011, "step": 124180 }, { "epoch": 2.0320706864108646, "grad_norm": 0.02819540910422802, "learning_rate": 2.84971452688519e-06, "loss": 0.0011, "step": 124190 }, { "epoch": 2.0322343123619406, "grad_norm": 0.056050196290016174, "learning_rate": 2.848855152746235e-06, "loss": 0.0007, "step": 124200 }, { "epoch": 2.0323979383130166, "grad_norm": 0.050121258944272995, "learning_rate": 2.8479958565811106e-06, "loss": 0.0008, "step": 124210 }, { "epoch": 2.032561564264092, "grad_norm": 0.04265778511762619, "learning_rate": 2.847136638420962e-06, "loss": 0.0008, "step": 124220 }, { "epoch": 2.032725190215168, "grad_norm": 0.011792679317295551, "learning_rate": 2.8462774982969376e-06, "loss": 0.0008, "step": 124230 }, { "epoch": 2.032888816166244, "grad_norm": 0.4496041238307953, "learning_rate": 2.845418436240176e-06, "loss": 0.0011, "step": 124240 }, { "epoch": 2.0330524421173197, "grad_norm": 0.2413029968738556, "learning_rate": 2.84455945228182e-06, "loss": 0.001, "step": 124250 }, { "epoch": 2.0332160680683957, "grad_norm": 0.09965922683477402, "learning_rate": 2.843700546453001e-06, "loss": 0.0007, "step": 124260 }, { "epoch": 2.0333796940194713, "grad_norm": 0.04634123295545578, "learning_rate": 2.8428417187848565e-06, "loss": 0.0007, "step": 124270 }, { "epoch": 2.0335433199705473, "grad_norm": 0.061617203056812286, "learning_rate": 2.841982969308513e-06, "loss": 0.0013, "step": 124280 }, { "epoch": 2.0337069459216233, "grad_norm": 0.038158830255270004, "learning_rate": 2.8411242980551034e-06, "loss": 0.0019, "step": 124290 }, { "epoch": 2.033870571872699, "grad_norm": 0.038674451410770416, "learning_rate": 2.840265705055746e-06, "loss": 0.0011, "step": 124300 }, { "epoch": 2.034034197823775, "grad_norm": 0.00625960249453783, "learning_rate": 2.839407190341569e-06, "loss": 0.001, "step": 124310 }, { "epoch": 2.034197823774851, "grad_norm": 0.04486432299017906, "learning_rate": 2.838548753943687e-06, "loss": 0.001, "step": 124320 }, { "epoch": 2.0343614497259264, "grad_norm": 0.02593972347676754, "learning_rate": 2.8376903958932174e-06, "loss": 0.0009, "step": 124330 }, { "epoch": 2.0345250756770024, "grad_norm": 0.019660795107483864, "learning_rate": 2.836832116221275e-06, "loss": 0.0008, "step": 124340 }, { "epoch": 2.0346887016280784, "grad_norm": 0.29560962319374084, "learning_rate": 2.8359739149589676e-06, "loss": 0.0012, "step": 124350 }, { "epoch": 2.034852327579154, "grad_norm": 0.07079388946294785, "learning_rate": 2.835115792137407e-06, "loss": 0.0011, "step": 124360 }, { "epoch": 2.03501595353023, "grad_norm": 0.039573509246110916, "learning_rate": 2.834257747787693e-06, "loss": 0.0009, "step": 124370 }, { "epoch": 2.035179579481306, "grad_norm": 0.022018538787961006, "learning_rate": 2.833399781940931e-06, "loss": 0.0007, "step": 124380 }, { "epoch": 2.0353432054323815, "grad_norm": 0.03453710675239563, "learning_rate": 2.832541894628222e-06, "loss": 0.001, "step": 124390 }, { "epoch": 2.0355068313834574, "grad_norm": 0.032292742282152176, "learning_rate": 2.8316840858806573e-06, "loss": 0.0008, "step": 124400 }, { "epoch": 2.0356704573345334, "grad_norm": 0.07762788981199265, "learning_rate": 2.830826355729337e-06, "loss": 0.0004, "step": 124410 }, { "epoch": 2.035834083285609, "grad_norm": 0.0215966384857893, "learning_rate": 2.829968704205344e-06, "loss": 0.002, "step": 124420 }, { "epoch": 2.035997709236685, "grad_norm": 0.06726696342229843, "learning_rate": 2.8291111313397724e-06, "loss": 0.001, "step": 124430 }, { "epoch": 2.036161335187761, "grad_norm": 0.02724245935678482, "learning_rate": 2.8282536371637026e-06, "loss": 0.0012, "step": 124440 }, { "epoch": 2.0363249611388365, "grad_norm": 0.1215810626745224, "learning_rate": 2.8273962217082186e-06, "loss": 0.001, "step": 124450 }, { "epoch": 2.0364885870899125, "grad_norm": 0.00971477571874857, "learning_rate": 2.826538885004401e-06, "loss": 0.0004, "step": 124460 }, { "epoch": 2.036652213040988, "grad_norm": 0.09301253408193588, "learning_rate": 2.825681627083324e-06, "loss": 0.0013, "step": 124470 }, { "epoch": 2.036815838992064, "grad_norm": 0.03361900895833969, "learning_rate": 2.8248244479760643e-06, "loss": 0.001, "step": 124480 }, { "epoch": 2.03697946494314, "grad_norm": 0.024573149159550667, "learning_rate": 2.823967347713688e-06, "loss": 0.0012, "step": 124490 }, { "epoch": 2.0371430908942156, "grad_norm": 0.005056385882198811, "learning_rate": 2.8231103263272674e-06, "loss": 0.001, "step": 124500 }, { "epoch": 2.0373067168452916, "grad_norm": 0.08200830221176147, "learning_rate": 2.822253383847866e-06, "loss": 0.0005, "step": 124510 }, { "epoch": 2.0374703427963676, "grad_norm": 0.0070111858658492565, "learning_rate": 2.8213965203065428e-06, "loss": 0.001, "step": 124520 }, { "epoch": 2.037633968747443, "grad_norm": 0.06848406046628952, "learning_rate": 2.8205397357343617e-06, "loss": 0.0011, "step": 124530 }, { "epoch": 2.037797594698519, "grad_norm": 0.08236326277256012, "learning_rate": 2.8196830301623753e-06, "loss": 0.0008, "step": 124540 }, { "epoch": 2.037961220649595, "grad_norm": 0.06222716346383095, "learning_rate": 2.8188264036216406e-06, "loss": 0.0008, "step": 124550 }, { "epoch": 2.0381248466006707, "grad_norm": 0.032447900623083115, "learning_rate": 2.8179698561432043e-06, "loss": 0.0009, "step": 124560 }, { "epoch": 2.0382884725517467, "grad_norm": 0.08052060008049011, "learning_rate": 2.817113387758118e-06, "loss": 0.0006, "step": 124570 }, { "epoch": 2.0384520985028227, "grad_norm": 0.030338529497385025, "learning_rate": 2.8162569984974236e-06, "loss": 0.0008, "step": 124580 }, { "epoch": 2.0386157244538983, "grad_norm": 0.03316605091094971, "learning_rate": 2.8154006883921662e-06, "loss": 0.0007, "step": 124590 }, { "epoch": 2.0387793504049743, "grad_norm": 0.18145349621772766, "learning_rate": 2.8145444574733814e-06, "loss": 0.0024, "step": 124600 }, { "epoch": 2.0389429763560503, "grad_norm": 0.053525932133197784, "learning_rate": 2.81368830577211e-06, "loss": 0.0008, "step": 124610 }, { "epoch": 2.039106602307126, "grad_norm": 0.005097508430480957, "learning_rate": 2.8128322333193817e-06, "loss": 0.0006, "step": 124620 }, { "epoch": 2.039270228258202, "grad_norm": 0.038973547518253326, "learning_rate": 2.811976240146227e-06, "loss": 0.0006, "step": 124630 }, { "epoch": 2.039433854209278, "grad_norm": 0.015489310026168823, "learning_rate": 2.8111203262836767e-06, "loss": 0.0006, "step": 124640 }, { "epoch": 2.0395974801603534, "grad_norm": 0.09316594898700714, "learning_rate": 2.810264491762752e-06, "loss": 0.0018, "step": 124650 }, { "epoch": 2.0397611061114294, "grad_norm": 0.08134543150663376, "learning_rate": 2.8094087366144763e-06, "loss": 0.0008, "step": 124660 }, { "epoch": 2.039924732062505, "grad_norm": 0.06554632633924484, "learning_rate": 2.8085530608698707e-06, "loss": 0.0008, "step": 124670 }, { "epoch": 2.040088358013581, "grad_norm": 0.21216492354869843, "learning_rate": 2.8076974645599476e-06, "loss": 0.0011, "step": 124680 }, { "epoch": 2.040251983964657, "grad_norm": 0.1355913281440735, "learning_rate": 2.806841947715725e-06, "loss": 0.0005, "step": 124690 }, { "epoch": 2.0404156099157325, "grad_norm": 0.23601964116096497, "learning_rate": 2.805986510368208e-06, "loss": 0.0011, "step": 124700 }, { "epoch": 2.0405792358668084, "grad_norm": 0.005074649583548307, "learning_rate": 2.8051311525484115e-06, "loss": 0.0004, "step": 124710 }, { "epoch": 2.0407428618178844, "grad_norm": 0.21899673342704773, "learning_rate": 2.804275874287331e-06, "loss": 0.0005, "step": 124720 }, { "epoch": 2.04090648776896, "grad_norm": 0.012752222828567028, "learning_rate": 2.803420675615973e-06, "loss": 0.0008, "step": 124730 }, { "epoch": 2.041070113720036, "grad_norm": 0.24455346167087555, "learning_rate": 2.8025655565653374e-06, "loss": 0.0006, "step": 124740 }, { "epoch": 2.041233739671112, "grad_norm": 0.048273444175720215, "learning_rate": 2.8017105171664184e-06, "loss": 0.0004, "step": 124750 }, { "epoch": 2.0413973656221875, "grad_norm": 0.09782805293798447, "learning_rate": 2.8008555574502105e-06, "loss": 0.0017, "step": 124760 }, { "epoch": 2.0415609915732635, "grad_norm": 0.0613371916115284, "learning_rate": 2.800000677447702e-06, "loss": 0.001, "step": 124770 }, { "epoch": 2.0417246175243395, "grad_norm": 0.1340452879667282, "learning_rate": 2.7991458771898827e-06, "loss": 0.0013, "step": 124780 }, { "epoch": 2.041888243475415, "grad_norm": 0.1598801463842392, "learning_rate": 2.798291156707734e-06, "loss": 0.0014, "step": 124790 }, { "epoch": 2.042051869426491, "grad_norm": 0.27418652176856995, "learning_rate": 2.797436516032241e-06, "loss": 0.001, "step": 124800 }, { "epoch": 2.042215495377567, "grad_norm": 0.07217760384082794, "learning_rate": 2.796581955194381e-06, "loss": 0.002, "step": 124810 }, { "epoch": 2.0423791213286426, "grad_norm": 0.005071448162198067, "learning_rate": 2.795727474225127e-06, "loss": 0.0004, "step": 124820 }, { "epoch": 2.0425427472797186, "grad_norm": 0.22226819396018982, "learning_rate": 2.794873073155456e-06, "loss": 0.0008, "step": 124830 }, { "epoch": 2.0427063732307946, "grad_norm": 0.05380718410015106, "learning_rate": 2.7940187520163347e-06, "loss": 0.001, "step": 124840 }, { "epoch": 2.04286999918187, "grad_norm": 0.10900881886482239, "learning_rate": 2.793164510838734e-06, "loss": 0.0011, "step": 124850 }, { "epoch": 2.043033625132946, "grad_norm": 0.04940652847290039, "learning_rate": 2.792310349653613e-06, "loss": 0.0009, "step": 124860 }, { "epoch": 2.0431972510840217, "grad_norm": 0.08405929058790207, "learning_rate": 2.7914562684919377e-06, "loss": 0.0005, "step": 124870 }, { "epoch": 2.0433608770350977, "grad_norm": 0.274469792842865, "learning_rate": 2.7906022673846634e-06, "loss": 0.0011, "step": 124880 }, { "epoch": 2.0435245029861737, "grad_norm": 0.07355121523141861, "learning_rate": 2.789748346362746e-06, "loss": 0.0009, "step": 124890 }, { "epoch": 2.0436881289372493, "grad_norm": 0.07836034148931503, "learning_rate": 2.7888945054571415e-06, "loss": 0.0013, "step": 124900 }, { "epoch": 2.0438517548883253, "grad_norm": 0.05816254764795303, "learning_rate": 2.788040744698797e-06, "loss": 0.0006, "step": 124910 }, { "epoch": 2.0440153808394013, "grad_norm": 0.037988368421792984, "learning_rate": 2.787187064118658e-06, "loss": 0.001, "step": 124920 }, { "epoch": 2.044179006790477, "grad_norm": 0.0506337471306324, "learning_rate": 2.786333463747669e-06, "loss": 0.0007, "step": 124930 }, { "epoch": 2.044342632741553, "grad_norm": 0.13877160847187042, "learning_rate": 2.7854799436167713e-06, "loss": 0.0004, "step": 124940 }, { "epoch": 2.044506258692629, "grad_norm": 0.11030317842960358, "learning_rate": 2.784626503756904e-06, "loss": 0.0004, "step": 124950 }, { "epoch": 2.0446698846437044, "grad_norm": 0.1030447781085968, "learning_rate": 2.7837731441990006e-06, "loss": 0.0007, "step": 124960 }, { "epoch": 2.0448335105947804, "grad_norm": 0.04685341566801071, "learning_rate": 2.7829198649739957e-06, "loss": 0.0007, "step": 124970 }, { "epoch": 2.0449971365458564, "grad_norm": 0.02271329239010811, "learning_rate": 2.7820666661128155e-06, "loss": 0.0006, "step": 124980 }, { "epoch": 2.045160762496932, "grad_norm": 0.07728131860494614, "learning_rate": 2.7812135476463898e-06, "loss": 0.0008, "step": 124990 }, { "epoch": 2.045324388448008, "grad_norm": 0.0070898751728236675, "learning_rate": 2.7803605096056378e-06, "loss": 0.0013, "step": 125000 }, { "epoch": 2.045324388448008, "eval_loss": 0.0010078081395477057, "eval_runtime": 3.0959, "eval_samples_per_second": 64.601, "eval_steps_per_second": 16.15, "step": 125000 }, { "epoch": 2.045488014399084, "grad_norm": 0.1685505360364914, "learning_rate": 2.7795075520214855e-06, "loss": 0.0013, "step": 125010 }, { "epoch": 2.0456516403501595, "grad_norm": 0.17462106049060822, "learning_rate": 2.7786546749248468e-06, "loss": 0.0015, "step": 125020 }, { "epoch": 2.0458152663012354, "grad_norm": 0.0633067786693573, "learning_rate": 2.7778018783466353e-06, "loss": 0.0012, "step": 125030 }, { "epoch": 2.045978892252311, "grad_norm": 0.013548222370445728, "learning_rate": 2.776949162317767e-06, "loss": 0.0009, "step": 125040 }, { "epoch": 2.046142518203387, "grad_norm": 0.051946550607681274, "learning_rate": 2.7760965268691463e-06, "loss": 0.0012, "step": 125050 }, { "epoch": 2.046306144154463, "grad_norm": 0.0045087323524057865, "learning_rate": 2.775243972031684e-06, "loss": 0.0014, "step": 125060 }, { "epoch": 2.0464697701055385, "grad_norm": 0.12369585037231445, "learning_rate": 2.7743914978362786e-06, "loss": 0.0008, "step": 125070 }, { "epoch": 2.0466333960566145, "grad_norm": 0.028464335948228836, "learning_rate": 2.773539104313834e-06, "loss": 0.0006, "step": 125080 }, { "epoch": 2.0467970220076905, "grad_norm": 0.10350577533245087, "learning_rate": 2.772686791495244e-06, "loss": 0.0006, "step": 125090 }, { "epoch": 2.046960647958766, "grad_norm": 0.019794771447777748, "learning_rate": 2.7718345594114047e-06, "loss": 0.0009, "step": 125100 }, { "epoch": 2.047124273909842, "grad_norm": 0.0605911947786808, "learning_rate": 2.7709824080932113e-06, "loss": 0.001, "step": 125110 }, { "epoch": 2.047287899860918, "grad_norm": 0.15976080298423767, "learning_rate": 2.7701303375715437e-06, "loss": 0.0011, "step": 125120 }, { "epoch": 2.0474515258119936, "grad_norm": 0.09896986931562424, "learning_rate": 2.7692783478772943e-06, "loss": 0.0009, "step": 125130 }, { "epoch": 2.0476151517630696, "grad_norm": 0.16207146644592285, "learning_rate": 2.768426439041342e-06, "loss": 0.0013, "step": 125140 }, { "epoch": 2.0477787777141456, "grad_norm": 0.03074829652905464, "learning_rate": 2.767574611094569e-06, "loss": 0.0009, "step": 125150 }, { "epoch": 2.047942403665221, "grad_norm": 0.05201895907521248, "learning_rate": 2.7667228640678495e-06, "loss": 0.0008, "step": 125160 }, { "epoch": 2.048106029616297, "grad_norm": 0.009670531377196312, "learning_rate": 2.765871197992058e-06, "loss": 0.0024, "step": 125170 }, { "epoch": 2.048269655567373, "grad_norm": 0.07450035959482193, "learning_rate": 2.765019612898069e-06, "loss": 0.0011, "step": 125180 }, { "epoch": 2.0484332815184487, "grad_norm": 0.0816681906580925, "learning_rate": 2.764168108816745e-06, "loss": 0.0006, "step": 125190 }, { "epoch": 2.0485969074695247, "grad_norm": 0.06683312356472015, "learning_rate": 2.7633166857789545e-06, "loss": 0.0004, "step": 125200 }, { "epoch": 2.0487605334206007, "grad_norm": 0.030104627832770348, "learning_rate": 2.762465343815559e-06, "loss": 0.0013, "step": 125210 }, { "epoch": 2.0489241593716763, "grad_norm": 0.010404893197119236, "learning_rate": 2.7616140829574157e-06, "loss": 0.0011, "step": 125220 }, { "epoch": 2.0490877853227523, "grad_norm": 0.003294219495728612, "learning_rate": 2.7607629032353834e-06, "loss": 0.0005, "step": 125230 }, { "epoch": 2.049251411273828, "grad_norm": 0.08630453050136566, "learning_rate": 2.759911804680312e-06, "loss": 0.0009, "step": 125240 }, { "epoch": 2.049415037224904, "grad_norm": 0.1154833510518074, "learning_rate": 2.7590607873230555e-06, "loss": 0.001, "step": 125250 }, { "epoch": 2.04957866317598, "grad_norm": 0.09941628575325012, "learning_rate": 2.7582098511944576e-06, "loss": 0.0008, "step": 125260 }, { "epoch": 2.0497422891270554, "grad_norm": 0.03716711699962616, "learning_rate": 2.7573589963253665e-06, "loss": 0.0006, "step": 125270 }, { "epoch": 2.0499059150781314, "grad_norm": 0.0713198184967041, "learning_rate": 2.7565082227466208e-06, "loss": 0.0008, "step": 125280 }, { "epoch": 2.0500695410292074, "grad_norm": 0.13239486515522003, "learning_rate": 2.7556575304890608e-06, "loss": 0.0009, "step": 125290 }, { "epoch": 2.050233166980283, "grad_norm": 0.039633095264434814, "learning_rate": 2.75480691958352e-06, "loss": 0.0005, "step": 125300 }, { "epoch": 2.050396792931359, "grad_norm": 0.01314575970172882, "learning_rate": 2.7539563900608333e-06, "loss": 0.0007, "step": 125310 }, { "epoch": 2.050560418882435, "grad_norm": 0.0521257258951664, "learning_rate": 2.7531059419518298e-06, "loss": 0.0012, "step": 125320 }, { "epoch": 2.0507240448335105, "grad_norm": 0.03675038740038872, "learning_rate": 2.7522555752873337e-06, "loss": 0.0007, "step": 125330 }, { "epoch": 2.0508876707845864, "grad_norm": 0.026663633063435555, "learning_rate": 2.7514052900981723e-06, "loss": 0.001, "step": 125340 }, { "epoch": 2.0510512967356624, "grad_norm": 0.06920617818832397, "learning_rate": 2.750555086415163e-06, "loss": 0.001, "step": 125350 }, { "epoch": 2.051214922686738, "grad_norm": 0.09731853753328323, "learning_rate": 2.7497049642691277e-06, "loss": 0.0017, "step": 125360 }, { "epoch": 2.051378548637814, "grad_norm": 0.2518601417541504, "learning_rate": 2.748854923690876e-06, "loss": 0.0023, "step": 125370 }, { "epoch": 2.05154217458889, "grad_norm": 0.07137306779623032, "learning_rate": 2.748004964711224e-06, "loss": 0.0009, "step": 125380 }, { "epoch": 2.0517058005399655, "grad_norm": 0.11210113763809204, "learning_rate": 2.747155087360981e-06, "loss": 0.0009, "step": 125390 }, { "epoch": 2.0518694264910415, "grad_norm": 0.024057498201727867, "learning_rate": 2.7463052916709497e-06, "loss": 0.0007, "step": 125400 }, { "epoch": 2.0520330524421175, "grad_norm": 0.15057770907878876, "learning_rate": 2.7454555776719367e-06, "loss": 0.0015, "step": 125410 }, { "epoch": 2.052196678393193, "grad_norm": 0.08948255330324173, "learning_rate": 2.7446059453947405e-06, "loss": 0.0005, "step": 125420 }, { "epoch": 2.052360304344269, "grad_norm": 0.027236690744757652, "learning_rate": 2.7437563948701564e-06, "loss": 0.0006, "step": 125430 }, { "epoch": 2.0525239302953446, "grad_norm": 0.10346370190382004, "learning_rate": 2.7429069261289815e-06, "loss": 0.0008, "step": 125440 }, { "epoch": 2.0526875562464206, "grad_norm": 0.10105740278959274, "learning_rate": 2.7420575392020044e-06, "loss": 0.0006, "step": 125450 }, { "epoch": 2.0528511821974966, "grad_norm": 0.05941835790872574, "learning_rate": 2.7412082341200162e-06, "loss": 0.0038, "step": 125460 }, { "epoch": 2.053014808148572, "grad_norm": 0.12703943252563477, "learning_rate": 2.740359010913799e-06, "loss": 0.001, "step": 125470 }, { "epoch": 2.053178434099648, "grad_norm": 0.06517039239406586, "learning_rate": 2.7395098696141386e-06, "loss": 0.0013, "step": 125480 }, { "epoch": 2.053342060050724, "grad_norm": 0.08573034405708313, "learning_rate": 2.7386608102518103e-06, "loss": 0.001, "step": 125490 }, { "epoch": 2.0535056860017997, "grad_norm": 0.01614839769899845, "learning_rate": 2.7378118328575946e-06, "loss": 0.0007, "step": 125500 }, { "epoch": 2.0536693119528757, "grad_norm": 0.02678837440907955, "learning_rate": 2.7369629374622614e-06, "loss": 0.0009, "step": 125510 }, { "epoch": 2.0538329379039517, "grad_norm": 0.08163011819124222, "learning_rate": 2.7361141240965846e-06, "loss": 0.0008, "step": 125520 }, { "epoch": 2.0539965638550273, "grad_norm": 0.061010152101516724, "learning_rate": 2.7352653927913296e-06, "loss": 0.0012, "step": 125530 }, { "epoch": 2.0541601898061033, "grad_norm": 0.11241861432790756, "learning_rate": 2.7344167435772583e-06, "loss": 0.0008, "step": 125540 }, { "epoch": 2.0543238157571793, "grad_norm": 0.11194446682929993, "learning_rate": 2.733568176485138e-06, "loss": 0.0017, "step": 125550 }, { "epoch": 2.054487441708255, "grad_norm": 0.042952071875333786, "learning_rate": 2.732719691545721e-06, "loss": 0.0007, "step": 125560 }, { "epoch": 2.054651067659331, "grad_norm": 0.16491369903087616, "learning_rate": 2.731871288789769e-06, "loss": 0.0008, "step": 125570 }, { "epoch": 2.054814693610407, "grad_norm": 0.05632215738296509, "learning_rate": 2.7310229682480287e-06, "loss": 0.0017, "step": 125580 }, { "epoch": 2.0549783195614824, "grad_norm": 0.06487197428941727, "learning_rate": 2.7301747299512526e-06, "loss": 0.0005, "step": 125590 }, { "epoch": 2.0551419455125584, "grad_norm": 0.0015484405448660254, "learning_rate": 2.7293265739301896e-06, "loss": 0.0011, "step": 125600 }, { "epoch": 2.0553055714636344, "grad_norm": 0.046187739819288254, "learning_rate": 2.7284785002155785e-06, "loss": 0.001, "step": 125610 }, { "epoch": 2.05546919741471, "grad_norm": 0.101535864174366, "learning_rate": 2.727630508838166e-06, "loss": 0.0009, "step": 125620 }, { "epoch": 2.055632823365786, "grad_norm": 0.02309855818748474, "learning_rate": 2.7267825998286824e-06, "loss": 0.0009, "step": 125630 }, { "epoch": 2.0557964493168615, "grad_norm": 0.004552806727588177, "learning_rate": 2.725934773217868e-06, "loss": 0.0007, "step": 125640 }, { "epoch": 2.0559600752679374, "grad_norm": 0.05617595463991165, "learning_rate": 2.7250870290364506e-06, "loss": 0.0006, "step": 125650 }, { "epoch": 2.0561237012190134, "grad_norm": 0.1635713279247284, "learning_rate": 2.7242393673151605e-06, "loss": 0.001, "step": 125660 }, { "epoch": 2.056287327170089, "grad_norm": 0.05797175318002701, "learning_rate": 2.723391788084726e-06, "loss": 0.0009, "step": 125670 }, { "epoch": 2.056450953121165, "grad_norm": 0.12851861119270325, "learning_rate": 2.7225442913758654e-06, "loss": 0.0013, "step": 125680 }, { "epoch": 2.056614579072241, "grad_norm": 0.13630680739879608, "learning_rate": 2.7216968772193025e-06, "loss": 0.0015, "step": 125690 }, { "epoch": 2.0567782050233165, "grad_norm": 0.03139360621571541, "learning_rate": 2.7208495456457503e-06, "loss": 0.0008, "step": 125700 }, { "epoch": 2.0569418309743925, "grad_norm": 0.040974587202072144, "learning_rate": 2.7200022966859257e-06, "loss": 0.0012, "step": 125710 }, { "epoch": 2.0571054569254685, "grad_norm": 0.05311542749404907, "learning_rate": 2.7191551303705386e-06, "loss": 0.0012, "step": 125720 }, { "epoch": 2.057269082876544, "grad_norm": 0.08876026421785355, "learning_rate": 2.7183080467302936e-06, "loss": 0.0009, "step": 125730 }, { "epoch": 2.05743270882762, "grad_norm": 0.09702279418706894, "learning_rate": 2.7174610457959004e-06, "loss": 0.0007, "step": 125740 }, { "epoch": 2.057596334778696, "grad_norm": 0.02597414143383503, "learning_rate": 2.716614127598056e-06, "loss": 0.0006, "step": 125750 }, { "epoch": 2.0577599607297716, "grad_norm": 0.0771472305059433, "learning_rate": 2.7157672921674638e-06, "loss": 0.0007, "step": 125760 }, { "epoch": 2.0579235866808476, "grad_norm": 0.006495010107755661, "learning_rate": 2.7149205395348154e-06, "loss": 0.0008, "step": 125770 }, { "epoch": 2.0580872126319236, "grad_norm": 0.05171903595328331, "learning_rate": 2.7140738697308073e-06, "loss": 0.0008, "step": 125780 }, { "epoch": 2.058250838582999, "grad_norm": 0.15237563848495483, "learning_rate": 2.713227282786125e-06, "loss": 0.0005, "step": 125790 }, { "epoch": 2.058414464534075, "grad_norm": 0.06719356030225754, "learning_rate": 2.71238077873146e-06, "loss": 0.0018, "step": 125800 }, { "epoch": 2.0585780904851507, "grad_norm": 0.14036859571933746, "learning_rate": 2.7115343575974918e-06, "loss": 0.0011, "step": 125810 }, { "epoch": 2.0587417164362267, "grad_norm": 0.008981617167592049, "learning_rate": 2.7106880194149043e-06, "loss": 0.0016, "step": 125820 }, { "epoch": 2.0589053423873027, "grad_norm": 0.04011741653084755, "learning_rate": 2.7098417642143736e-06, "loss": 0.0008, "step": 125830 }, { "epoch": 2.0590689683383783, "grad_norm": 0.02799740433692932, "learning_rate": 2.708995592026574e-06, "loss": 0.0007, "step": 125840 }, { "epoch": 2.0592325942894543, "grad_norm": 0.004476170055568218, "learning_rate": 2.708149502882179e-06, "loss": 0.0015, "step": 125850 }, { "epoch": 2.0593962202405303, "grad_norm": 0.07621454447507858, "learning_rate": 2.707303496811854e-06, "loss": 0.0011, "step": 125860 }, { "epoch": 2.059559846191606, "grad_norm": 0.08320765197277069, "learning_rate": 2.7064575738462672e-06, "loss": 0.0009, "step": 125870 }, { "epoch": 2.059723472142682, "grad_norm": 0.04357193410396576, "learning_rate": 2.705611734016084e-06, "loss": 0.0006, "step": 125880 }, { "epoch": 2.059887098093758, "grad_norm": 0.10944069176912308, "learning_rate": 2.7047659773519575e-06, "loss": 0.0011, "step": 125890 }, { "epoch": 2.0600507240448334, "grad_norm": 0.07267940789461136, "learning_rate": 2.7039203038845507e-06, "loss": 0.0012, "step": 125900 }, { "epoch": 2.0602143499959094, "grad_norm": 0.04635965824127197, "learning_rate": 2.7030747136445117e-06, "loss": 0.0009, "step": 125910 }, { "epoch": 2.0603779759469854, "grad_norm": 0.0933053120970726, "learning_rate": 2.7022292066624984e-06, "loss": 0.0007, "step": 125920 }, { "epoch": 2.060541601898061, "grad_norm": 0.01606355980038643, "learning_rate": 2.7013837829691496e-06, "loss": 0.0006, "step": 125930 }, { "epoch": 2.060705227849137, "grad_norm": 0.0085771344602108, "learning_rate": 2.700538442595114e-06, "loss": 0.0008, "step": 125940 }, { "epoch": 2.060868853800213, "grad_norm": 0.06753838807344437, "learning_rate": 2.6996931855710356e-06, "loss": 0.0006, "step": 125950 }, { "epoch": 2.0610324797512884, "grad_norm": 0.13619694113731384, "learning_rate": 2.6988480119275486e-06, "loss": 0.0015, "step": 125960 }, { "epoch": 2.0611961057023644, "grad_norm": 0.10229191184043884, "learning_rate": 2.698002921695292e-06, "loss": 0.0016, "step": 125970 }, { "epoch": 2.0613597316534404, "grad_norm": 0.09704141318798065, "learning_rate": 2.6971579149048952e-06, "loss": 0.0008, "step": 125980 }, { "epoch": 2.061523357604516, "grad_norm": 0.10450167208909988, "learning_rate": 2.6963129915869914e-06, "loss": 0.0007, "step": 125990 }, { "epoch": 2.061686983555592, "grad_norm": 0.07419240474700928, "learning_rate": 2.695468151772202e-06, "loss": 0.0013, "step": 126000 }, { "epoch": 2.0618506095066675, "grad_norm": 0.0417998768389225, "learning_rate": 2.694623395491156e-06, "loss": 0.0014, "step": 126010 }, { "epoch": 2.0620142354577435, "grad_norm": 0.07510271668434143, "learning_rate": 2.693778722774471e-06, "loss": 0.001, "step": 126020 }, { "epoch": 2.0621778614088195, "grad_norm": 0.09251377731561661, "learning_rate": 2.6929341336527627e-06, "loss": 0.001, "step": 126030 }, { "epoch": 2.062341487359895, "grad_norm": 0.12444275617599487, "learning_rate": 2.6920896281566487e-06, "loss": 0.0007, "step": 126040 }, { "epoch": 2.062505113310971, "grad_norm": 0.1669503003358841, "learning_rate": 2.691245206316737e-06, "loss": 0.0011, "step": 126050 }, { "epoch": 2.062668739262047, "grad_norm": 0.07519892603158951, "learning_rate": 2.6904008681636394e-06, "loss": 0.0016, "step": 126060 }, { "epoch": 2.0628323652131226, "grad_norm": 0.17180950939655304, "learning_rate": 2.689556613727957e-06, "loss": 0.0007, "step": 126070 }, { "epoch": 2.0629959911641986, "grad_norm": 0.05516451224684715, "learning_rate": 2.6887124430402977e-06, "loss": 0.0006, "step": 126080 }, { "epoch": 2.0631596171152746, "grad_norm": 0.1077236458659172, "learning_rate": 2.6878683561312546e-06, "loss": 0.001, "step": 126090 }, { "epoch": 2.06332324306635, "grad_norm": 0.1515728086233139, "learning_rate": 2.687024353031426e-06, "loss": 0.0015, "step": 126100 }, { "epoch": 2.063486869017426, "grad_norm": 0.05036976560950279, "learning_rate": 2.686180433771408e-06, "loss": 0.0014, "step": 126110 }, { "epoch": 2.063650494968502, "grad_norm": 0.055411651730537415, "learning_rate": 2.685336598381788e-06, "loss": 0.0011, "step": 126120 }, { "epoch": 2.0638141209195777, "grad_norm": 0.15393434464931488, "learning_rate": 2.684492846893153e-06, "loss": 0.0015, "step": 126130 }, { "epoch": 2.0639777468706537, "grad_norm": 0.04202623292803764, "learning_rate": 2.683649179336085e-06, "loss": 0.0015, "step": 126140 }, { "epoch": 2.0641413728217297, "grad_norm": 0.0740785077214241, "learning_rate": 2.6828055957411683e-06, "loss": 0.001, "step": 126150 }, { "epoch": 2.0643049987728053, "grad_norm": 0.08951706439256668, "learning_rate": 2.6819620961389804e-06, "loss": 0.0006, "step": 126160 }, { "epoch": 2.0644686247238813, "grad_norm": 0.11555913090705872, "learning_rate": 2.681118680560094e-06, "loss": 0.0004, "step": 126170 }, { "epoch": 2.0646322506749573, "grad_norm": 0.033050332218408585, "learning_rate": 2.680275349035083e-06, "loss": 0.0007, "step": 126180 }, { "epoch": 2.064795876626033, "grad_norm": 0.056626204401254654, "learning_rate": 2.6794321015945142e-06, "loss": 0.0011, "step": 126190 }, { "epoch": 2.064959502577109, "grad_norm": 0.12083358317613602, "learning_rate": 2.678588938268956e-06, "loss": 0.0002, "step": 126200 }, { "epoch": 2.0651231285281844, "grad_norm": 0.0798579603433609, "learning_rate": 2.6777458590889675e-06, "loss": 0.0015, "step": 126210 }, { "epoch": 2.0652867544792604, "grad_norm": 0.005720012821257114, "learning_rate": 2.6769028640851124e-06, "loss": 0.0015, "step": 126220 }, { "epoch": 2.0654503804303364, "grad_norm": 0.006193923763930798, "learning_rate": 2.676059953287945e-06, "loss": 0.0005, "step": 126230 }, { "epoch": 2.065614006381412, "grad_norm": 0.004612925928086042, "learning_rate": 2.675217126728017e-06, "loss": 0.0009, "step": 126240 }, { "epoch": 2.065777632332488, "grad_norm": 0.077668696641922, "learning_rate": 2.6743743844358815e-06, "loss": 0.0015, "step": 126250 }, { "epoch": 2.065941258283564, "grad_norm": 0.0745009183883667, "learning_rate": 2.6735317264420847e-06, "loss": 0.0009, "step": 126260 }, { "epoch": 2.0661048842346394, "grad_norm": 0.03949791565537453, "learning_rate": 2.6726891527771725e-06, "loss": 0.0005, "step": 126270 }, { "epoch": 2.0662685101857154, "grad_norm": 0.020940886810421944, "learning_rate": 2.6718466634716834e-06, "loss": 0.0044, "step": 126280 }, { "epoch": 2.0664321361367914, "grad_norm": 0.005787865724414587, "learning_rate": 2.6710042585561584e-06, "loss": 0.0012, "step": 126290 }, { "epoch": 2.066595762087867, "grad_norm": 0.009020130150020123, "learning_rate": 2.67016193806113e-06, "loss": 0.0009, "step": 126300 }, { "epoch": 2.066759388038943, "grad_norm": 0.03346995636820793, "learning_rate": 2.6693197020171314e-06, "loss": 0.0012, "step": 126310 }, { "epoch": 2.066923013990019, "grad_norm": 0.08424467593431473, "learning_rate": 2.6684775504546966e-06, "loss": 0.0016, "step": 126320 }, { "epoch": 2.0670866399410945, "grad_norm": 0.05418844893574715, "learning_rate": 2.6676354834043424e-06, "loss": 0.001, "step": 126330 }, { "epoch": 2.0672502658921705, "grad_norm": 0.001397250802256167, "learning_rate": 2.6667935008965985e-06, "loss": 0.0012, "step": 126340 }, { "epoch": 2.0674138918432465, "grad_norm": 0.03630855306982994, "learning_rate": 2.6659516029619804e-06, "loss": 0.0008, "step": 126350 }, { "epoch": 2.067577517794322, "grad_norm": 0.09149971604347229, "learning_rate": 2.6651097896310086e-06, "loss": 0.0014, "step": 126360 }, { "epoch": 2.067741143745398, "grad_norm": 0.06094851717352867, "learning_rate": 2.6642680609341933e-06, "loss": 0.0028, "step": 126370 }, { "epoch": 2.067904769696474, "grad_norm": 0.06838110089302063, "learning_rate": 2.663426416902047e-06, "loss": 0.0007, "step": 126380 }, { "epoch": 2.0680683956475496, "grad_norm": 0.06788390129804611, "learning_rate": 2.66258485756508e-06, "loss": 0.0005, "step": 126390 }, { "epoch": 2.0682320215986256, "grad_norm": 0.01420030277222395, "learning_rate": 2.6617433829537908e-06, "loss": 0.0007, "step": 126400 }, { "epoch": 2.068395647549701, "grad_norm": 0.21180304884910583, "learning_rate": 2.6609019930986867e-06, "loss": 0.0009, "step": 126410 }, { "epoch": 2.068559273500777, "grad_norm": 0.0568615160882473, "learning_rate": 2.660060688030264e-06, "loss": 0.0007, "step": 126420 }, { "epoch": 2.068722899451853, "grad_norm": 0.007821504957973957, "learning_rate": 2.6592194677790152e-06, "loss": 0.0012, "step": 126430 }, { "epoch": 2.0688865254029287, "grad_norm": 0.002589412033557892, "learning_rate": 2.6583783323754363e-06, "loss": 0.0004, "step": 126440 }, { "epoch": 2.0690501513540047, "grad_norm": 0.28683483600616455, "learning_rate": 2.6575372818500132e-06, "loss": 0.0018, "step": 126450 }, { "epoch": 2.0692137773050807, "grad_norm": 0.03341438248753548, "learning_rate": 2.656696316233236e-06, "loss": 0.0011, "step": 126460 }, { "epoch": 2.0693774032561563, "grad_norm": 0.12024161964654922, "learning_rate": 2.6558554355555834e-06, "loss": 0.0007, "step": 126470 }, { "epoch": 2.0695410292072323, "grad_norm": 0.029168833047151566, "learning_rate": 2.6550146398475385e-06, "loss": 0.001, "step": 126480 }, { "epoch": 2.0697046551583083, "grad_norm": 0.02467380091547966, "learning_rate": 2.654173929139575e-06, "loss": 0.0013, "step": 126490 }, { "epoch": 2.069868281109384, "grad_norm": 0.1980421543121338, "learning_rate": 2.653333303462171e-06, "loss": 0.001, "step": 126500 }, { "epoch": 2.07003190706046, "grad_norm": 0.058259110897779465, "learning_rate": 2.652492762845793e-06, "loss": 0.0011, "step": 126510 }, { "epoch": 2.070195533011536, "grad_norm": 0.11114665120840073, "learning_rate": 2.6516523073209114e-06, "loss": 0.0008, "step": 126520 }, { "epoch": 2.0703591589626114, "grad_norm": 0.07873139530420303, "learning_rate": 2.6508119369179904e-06, "loss": 0.0017, "step": 126530 }, { "epoch": 2.0705227849136874, "grad_norm": 0.06262929737567902, "learning_rate": 2.649971651667488e-06, "loss": 0.0008, "step": 126540 }, { "epoch": 2.0706864108647633, "grad_norm": 0.08900629729032516, "learning_rate": 2.6491314515998666e-06, "loss": 0.0008, "step": 126550 }, { "epoch": 2.070850036815839, "grad_norm": 0.07310552895069122, "learning_rate": 2.6482913367455788e-06, "loss": 0.001, "step": 126560 }, { "epoch": 2.071013662766915, "grad_norm": 0.10595718026161194, "learning_rate": 2.64745130713508e-06, "loss": 0.0006, "step": 126570 }, { "epoch": 2.0711772887179905, "grad_norm": 0.06159178540110588, "learning_rate": 2.6466113627988145e-06, "loss": 0.0009, "step": 126580 }, { "epoch": 2.0713409146690664, "grad_norm": 0.032799605280160904, "learning_rate": 2.6457715037672312e-06, "loss": 0.0006, "step": 126590 }, { "epoch": 2.0715045406201424, "grad_norm": 0.08961030095815659, "learning_rate": 2.6449317300707742e-06, "loss": 0.001, "step": 126600 }, { "epoch": 2.071668166571218, "grad_norm": 0.007548108231276274, "learning_rate": 2.6440920417398804e-06, "loss": 0.0011, "step": 126610 }, { "epoch": 2.071831792522294, "grad_norm": 0.022765422239899635, "learning_rate": 2.643252438804991e-06, "loss": 0.0005, "step": 126620 }, { "epoch": 2.07199541847337, "grad_norm": 0.030695730820298195, "learning_rate": 2.642412921296533e-06, "loss": 0.0004, "step": 126630 }, { "epoch": 2.0721590444244455, "grad_norm": 0.006426703650504351, "learning_rate": 2.64157348924494e-06, "loss": 0.0009, "step": 126640 }, { "epoch": 2.0723226703755215, "grad_norm": 0.06154328212141991, "learning_rate": 2.640734142680642e-06, "loss": 0.001, "step": 126650 }, { "epoch": 2.0724862963265975, "grad_norm": 0.0014200330479070544, "learning_rate": 2.6398948816340587e-06, "loss": 0.0004, "step": 126660 }, { "epoch": 2.072649922277673, "grad_norm": 0.008701635524630547, "learning_rate": 2.639055706135616e-06, "loss": 0.0008, "step": 126670 }, { "epoch": 2.072813548228749, "grad_norm": 0.05183007940649986, "learning_rate": 2.6382166162157276e-06, "loss": 0.0005, "step": 126680 }, { "epoch": 2.072977174179825, "grad_norm": 0.24049779772758484, "learning_rate": 2.6373776119048118e-06, "loss": 0.002, "step": 126690 }, { "epoch": 2.0731408001309006, "grad_norm": 0.025092674419283867, "learning_rate": 2.636538693233277e-06, "loss": 0.001, "step": 126700 }, { "epoch": 2.0733044260819766, "grad_norm": 0.0905616506934166, "learning_rate": 2.635699860231537e-06, "loss": 0.001, "step": 126710 }, { "epoch": 2.0734680520330526, "grad_norm": 0.03912216052412987, "learning_rate": 2.634861112929994e-06, "loss": 0.001, "step": 126720 }, { "epoch": 2.073631677984128, "grad_norm": 0.13487021625041962, "learning_rate": 2.6340224513590496e-06, "loss": 0.0012, "step": 126730 }, { "epoch": 2.073795303935204, "grad_norm": 0.053416233509778976, "learning_rate": 2.633183875549107e-06, "loss": 0.0014, "step": 126740 }, { "epoch": 2.07395892988628, "grad_norm": 0.004410147201269865, "learning_rate": 2.632345385530558e-06, "loss": 0.0005, "step": 126750 }, { "epoch": 2.0741225558373557, "grad_norm": 0.007632214110344648, "learning_rate": 2.6315069813338e-06, "loss": 0.0035, "step": 126760 }, { "epoch": 2.0742861817884317, "grad_norm": 0.007893110625445843, "learning_rate": 2.63066866298922e-06, "loss": 0.0006, "step": 126770 }, { "epoch": 2.0744498077395077, "grad_norm": 0.0865241214632988, "learning_rate": 2.6298304305272084e-06, "loss": 0.0009, "step": 126780 }, { "epoch": 2.0746134336905833, "grad_norm": 0.05783380940556526, "learning_rate": 2.6289922839781445e-06, "loss": 0.0012, "step": 126790 }, { "epoch": 2.0747770596416593, "grad_norm": 0.08939733356237411, "learning_rate": 2.628154223372412e-06, "loss": 0.0008, "step": 126800 }, { "epoch": 2.074940685592735, "grad_norm": 0.048028670251369476, "learning_rate": 2.627316248740391e-06, "loss": 0.0005, "step": 126810 }, { "epoch": 2.075104311543811, "grad_norm": 0.14464986324310303, "learning_rate": 2.6264783601124525e-06, "loss": 0.0015, "step": 126820 }, { "epoch": 2.075267937494887, "grad_norm": 0.15395614504814148, "learning_rate": 2.625640557518969e-06, "loss": 0.001, "step": 126830 }, { "epoch": 2.0754315634459624, "grad_norm": 0.20120304822921753, "learning_rate": 2.624802840990307e-06, "loss": 0.0009, "step": 126840 }, { "epoch": 2.0755951893970384, "grad_norm": 0.03794405236840248, "learning_rate": 2.6239652105568356e-06, "loss": 0.0009, "step": 126850 }, { "epoch": 2.0757588153481144, "grad_norm": 0.09057488292455673, "learning_rate": 2.6231276662489126e-06, "loss": 0.0014, "step": 126860 }, { "epoch": 2.07592244129919, "grad_norm": 0.007605287246406078, "learning_rate": 2.622290208096899e-06, "loss": 0.0031, "step": 126870 }, { "epoch": 2.076086067250266, "grad_norm": 0.013623950071632862, "learning_rate": 2.621452836131152e-06, "loss": 0.0006, "step": 126880 }, { "epoch": 2.076249693201342, "grad_norm": 0.007958965376019478, "learning_rate": 2.620615550382022e-06, "loss": 0.0008, "step": 126890 }, { "epoch": 2.0764133191524174, "grad_norm": 0.099567711353302, "learning_rate": 2.61977835087986e-06, "loss": 0.0008, "step": 126900 }, { "epoch": 2.0765769451034934, "grad_norm": 0.08914471417665482, "learning_rate": 2.6189412376550114e-06, "loss": 0.0016, "step": 126910 }, { "epoch": 2.0767405710545694, "grad_norm": 0.14258307218551636, "learning_rate": 2.6181042107378222e-06, "loss": 0.0012, "step": 126920 }, { "epoch": 2.076904197005645, "grad_norm": 0.09684636443853378, "learning_rate": 2.6172672701586306e-06, "loss": 0.0006, "step": 126930 }, { "epoch": 2.077067822956721, "grad_norm": 0.30909478664398193, "learning_rate": 2.6164304159477714e-06, "loss": 0.0008, "step": 126940 }, { "epoch": 2.077231448907797, "grad_norm": 0.1499127447605133, "learning_rate": 2.6155936481355827e-06, "loss": 0.0007, "step": 126950 }, { "epoch": 2.0773950748588725, "grad_norm": 0.11543216556310654, "learning_rate": 2.614756966752392e-06, "loss": 0.0006, "step": 126960 }, { "epoch": 2.0775587008099485, "grad_norm": 0.07330917567014694, "learning_rate": 2.61392037182853e-06, "loss": 0.0009, "step": 126970 }, { "epoch": 2.077722326761024, "grad_norm": 0.026456883177161217, "learning_rate": 2.613083863394318e-06, "loss": 0.0008, "step": 126980 }, { "epoch": 2.0778859527121, "grad_norm": 0.17785537242889404, "learning_rate": 2.6122474414800815e-06, "loss": 0.001, "step": 126990 }, { "epoch": 2.078049578663176, "grad_norm": 0.02467922866344452, "learning_rate": 2.611411106116134e-06, "loss": 0.0006, "step": 127000 }, { "epoch": 2.0782132046142516, "grad_norm": 0.033409010618925095, "learning_rate": 2.610574857332795e-06, "loss": 0.0007, "step": 127010 }, { "epoch": 2.0783768305653276, "grad_norm": 0.015222851186990738, "learning_rate": 2.6097386951603743e-06, "loss": 0.0009, "step": 127020 }, { "epoch": 2.0785404565164036, "grad_norm": 0.10475429892539978, "learning_rate": 2.6089026196291795e-06, "loss": 0.0007, "step": 127030 }, { "epoch": 2.078704082467479, "grad_norm": 0.09920498728752136, "learning_rate": 2.60806663076952e-06, "loss": 0.001, "step": 127040 }, { "epoch": 2.078867708418555, "grad_norm": 0.1791248768568039, "learning_rate": 2.6072307286116936e-06, "loss": 0.0009, "step": 127050 }, { "epoch": 2.079031334369631, "grad_norm": 0.012286944314837456, "learning_rate": 2.6063949131860057e-06, "loss": 0.0007, "step": 127060 }, { "epoch": 2.0791949603207067, "grad_norm": 0.03031480312347412, "learning_rate": 2.6055591845227462e-06, "loss": 0.0006, "step": 127070 }, { "epoch": 2.0793585862717827, "grad_norm": 0.27137839794158936, "learning_rate": 2.6047235426522114e-06, "loss": 0.001, "step": 127080 }, { "epoch": 2.0795222122228587, "grad_norm": 0.01968301273882389, "learning_rate": 2.603887987604694e-06, "loss": 0.0004, "step": 127090 }, { "epoch": 2.0796858381739343, "grad_norm": 0.09470977634191513, "learning_rate": 2.6030525194104755e-06, "loss": 0.0009, "step": 127100 }, { "epoch": 2.0798494641250103, "grad_norm": 0.11560133099555969, "learning_rate": 2.6022171380998444e-06, "loss": 0.0006, "step": 127110 }, { "epoch": 2.0800130900760863, "grad_norm": 0.11293934285640717, "learning_rate": 2.601381843703079e-06, "loss": 0.0015, "step": 127120 }, { "epoch": 2.080176716027162, "grad_norm": 0.07455451786518097, "learning_rate": 2.600546636250457e-06, "loss": 0.0013, "step": 127130 }, { "epoch": 2.080340341978238, "grad_norm": 0.02396007999777794, "learning_rate": 2.5997115157722508e-06, "loss": 0.0011, "step": 127140 }, { "epoch": 2.080503967929314, "grad_norm": 0.10201536864042282, "learning_rate": 2.5988764822987335e-06, "loss": 0.0004, "step": 127150 }, { "epoch": 2.0806675938803894, "grad_norm": 0.09642160683870316, "learning_rate": 2.598041535860174e-06, "loss": 0.0007, "step": 127160 }, { "epoch": 2.0808312198314654, "grad_norm": 0.15044547617435455, "learning_rate": 2.5972066764868343e-06, "loss": 0.0007, "step": 127170 }, { "epoch": 2.080994845782541, "grad_norm": 0.17772674560546875, "learning_rate": 2.5963719042089795e-06, "loss": 0.0011, "step": 127180 }, { "epoch": 2.081158471733617, "grad_norm": 0.06801004707813263, "learning_rate": 2.595537219056864e-06, "loss": 0.0007, "step": 127190 }, { "epoch": 2.081322097684693, "grad_norm": 0.052550725638866425, "learning_rate": 2.5947026210607475e-06, "loss": 0.0009, "step": 127200 }, { "epoch": 2.0814857236357684, "grad_norm": 0.0493013821542263, "learning_rate": 2.5938681102508776e-06, "loss": 0.0024, "step": 127210 }, { "epoch": 2.0816493495868444, "grad_norm": 0.0904151052236557, "learning_rate": 2.5930336866575085e-06, "loss": 0.0009, "step": 127220 }, { "epoch": 2.0818129755379204, "grad_norm": 0.2083015739917755, "learning_rate": 2.5921993503108826e-06, "loss": 0.0012, "step": 127230 }, { "epoch": 2.081976601488996, "grad_norm": 0.002322424203157425, "learning_rate": 2.5913651012412416e-06, "loss": 0.0013, "step": 127240 }, { "epoch": 2.082140227440072, "grad_norm": 0.040781036019325256, "learning_rate": 2.5905309394788287e-06, "loss": 0.0005, "step": 127250 }, { "epoch": 2.082303853391148, "grad_norm": 0.20012660324573517, "learning_rate": 2.5896968650538763e-06, "loss": 0.0013, "step": 127260 }, { "epoch": 2.0824674793422235, "grad_norm": 0.08569946885108948, "learning_rate": 2.588862877996621e-06, "loss": 0.0016, "step": 127270 }, { "epoch": 2.0826311052932995, "grad_norm": 0.06452517211437225, "learning_rate": 2.58802897833729e-06, "loss": 0.001, "step": 127280 }, { "epoch": 2.0827947312443755, "grad_norm": 0.03474804759025574, "learning_rate": 2.5871951661061133e-06, "loss": 0.0005, "step": 127290 }, { "epoch": 2.082958357195451, "grad_norm": 0.013522247783839703, "learning_rate": 2.5863614413333106e-06, "loss": 0.0012, "step": 127300 }, { "epoch": 2.083121983146527, "grad_norm": 0.22929659485816956, "learning_rate": 2.585527804049105e-06, "loss": 0.0009, "step": 127310 }, { "epoch": 2.083285609097603, "grad_norm": 0.05873755365610123, "learning_rate": 2.584694254283717e-06, "loss": 0.0011, "step": 127320 }, { "epoch": 2.0834492350486786, "grad_norm": 0.05695571377873421, "learning_rate": 2.583860792067353e-06, "loss": 0.0009, "step": 127330 }, { "epoch": 2.0836128609997546, "grad_norm": 0.04229049012064934, "learning_rate": 2.5830274174302312e-06, "loss": 0.0011, "step": 127340 }, { "epoch": 2.08377648695083, "grad_norm": 0.03599122539162636, "learning_rate": 2.5821941304025542e-06, "loss": 0.0007, "step": 127350 }, { "epoch": 2.083940112901906, "grad_norm": 0.08846523612737656, "learning_rate": 2.581360931014528e-06, "loss": 0.001, "step": 127360 }, { "epoch": 2.084103738852982, "grad_norm": 0.0830601379275322, "learning_rate": 2.5805278192963577e-06, "loss": 0.0011, "step": 127370 }, { "epoch": 2.0842673648040577, "grad_norm": 0.010371796786785126, "learning_rate": 2.5796947952782375e-06, "loss": 0.001, "step": 127380 }, { "epoch": 2.0844309907551337, "grad_norm": 0.16246408224105835, "learning_rate": 2.5788618589903657e-06, "loss": 0.0012, "step": 127390 }, { "epoch": 2.0845946167062097, "grad_norm": 0.006537714973092079, "learning_rate": 2.5780290104629303e-06, "loss": 0.0007, "step": 127400 }, { "epoch": 2.0847582426572853, "grad_norm": 0.020766111090779305, "learning_rate": 2.577196249726124e-06, "loss": 0.0026, "step": 127410 }, { "epoch": 2.0849218686083613, "grad_norm": 0.06766985356807709, "learning_rate": 2.576363576810128e-06, "loss": 0.0008, "step": 127420 }, { "epoch": 2.0850854945594373, "grad_norm": 0.19978763163089752, "learning_rate": 2.57553099174513e-06, "loss": 0.0011, "step": 127430 }, { "epoch": 2.085249120510513, "grad_norm": 0.03641851618885994, "learning_rate": 2.5746984945613062e-06, "loss": 0.0004, "step": 127440 }, { "epoch": 2.085412746461589, "grad_norm": 0.06487105041742325, "learning_rate": 2.5738660852888316e-06, "loss": 0.001, "step": 127450 }, { "epoch": 2.085576372412665, "grad_norm": 0.09939587861299515, "learning_rate": 2.5730337639578816e-06, "loss": 0.0008, "step": 127460 }, { "epoch": 2.0857399983637404, "grad_norm": 0.12017474323511124, "learning_rate": 2.572201530598623e-06, "loss": 0.0005, "step": 127470 }, { "epoch": 2.0859036243148164, "grad_norm": 0.08546924591064453, "learning_rate": 2.571369385241226e-06, "loss": 0.0013, "step": 127480 }, { "epoch": 2.0860672502658923, "grad_norm": 0.019498176872730255, "learning_rate": 2.5705373279158495e-06, "loss": 0.0011, "step": 127490 }, { "epoch": 2.086230876216968, "grad_norm": 0.008147344924509525, "learning_rate": 2.569705358652658e-06, "loss": 0.0005, "step": 127500 }, { "epoch": 2.086394502168044, "grad_norm": 0.0732748731970787, "learning_rate": 2.5688734774818037e-06, "loss": 0.002, "step": 127510 }, { "epoch": 2.08655812811912, "grad_norm": 0.17362816631793976, "learning_rate": 2.5680416844334427e-06, "loss": 0.0009, "step": 127520 }, { "epoch": 2.0867217540701954, "grad_norm": 0.09583607316017151, "learning_rate": 2.5672099795377304e-06, "loss": 0.0007, "step": 127530 }, { "epoch": 2.0868853800212714, "grad_norm": 0.04557041451334953, "learning_rate": 2.566378362824804e-06, "loss": 0.001, "step": 127540 }, { "epoch": 2.0870490059723474, "grad_norm": 0.07319483160972595, "learning_rate": 2.5655468343248156e-06, "loss": 0.0005, "step": 127550 }, { "epoch": 2.087212631923423, "grad_norm": 0.05781925842165947, "learning_rate": 2.564715394067902e-06, "loss": 0.0005, "step": 127560 }, { "epoch": 2.087376257874499, "grad_norm": 0.0935058519244194, "learning_rate": 2.5638840420842036e-06, "loss": 0.0008, "step": 127570 }, { "epoch": 2.0875398838255745, "grad_norm": 0.00953678973019123, "learning_rate": 2.563052778403852e-06, "loss": 0.0011, "step": 127580 }, { "epoch": 2.0877035097766505, "grad_norm": 0.03904193639755249, "learning_rate": 2.5622216030569802e-06, "loss": 0.0012, "step": 127590 }, { "epoch": 2.0878671357277265, "grad_norm": 0.09811195731163025, "learning_rate": 2.5613905160737173e-06, "loss": 0.0008, "step": 127600 }, { "epoch": 2.088030761678802, "grad_norm": 0.05469155311584473, "learning_rate": 2.560559517484186e-06, "loss": 0.0011, "step": 127610 }, { "epoch": 2.088194387629878, "grad_norm": 0.0048719062469899654, "learning_rate": 2.5597286073185114e-06, "loss": 0.0006, "step": 127620 }, { "epoch": 2.088358013580954, "grad_norm": 0.003756213467568159, "learning_rate": 2.5588977856068094e-06, "loss": 0.0006, "step": 127630 }, { "epoch": 2.0885216395320296, "grad_norm": 0.04063684120774269, "learning_rate": 2.5580670523791934e-06, "loss": 0.0005, "step": 127640 }, { "epoch": 2.0886852654831056, "grad_norm": 0.07121266424655914, "learning_rate": 2.5572364076657796e-06, "loss": 0.0016, "step": 127650 }, { "epoch": 2.0888488914341816, "grad_norm": 0.03202417120337486, "learning_rate": 2.556405851496674e-06, "loss": 0.0011, "step": 127660 }, { "epoch": 2.089012517385257, "grad_norm": 0.0037659406661987305, "learning_rate": 2.555575383901984e-06, "loss": 0.0004, "step": 127670 }, { "epoch": 2.089176143336333, "grad_norm": 0.0045356908813118935, "learning_rate": 2.55474500491181e-06, "loss": 0.0012, "step": 127680 }, { "epoch": 2.089339769287409, "grad_norm": 0.05127200856804848, "learning_rate": 2.553914714556255e-06, "loss": 0.0008, "step": 127690 }, { "epoch": 2.0895033952384847, "grad_norm": 0.10988567769527435, "learning_rate": 2.5530845128654092e-06, "loss": 0.0015, "step": 127700 }, { "epoch": 2.0896670211895607, "grad_norm": 0.06253285706043243, "learning_rate": 2.5522543998693717e-06, "loss": 0.0007, "step": 127710 }, { "epoch": 2.0898306471406367, "grad_norm": 0.03700726851820946, "learning_rate": 2.551424375598227e-06, "loss": 0.001, "step": 127720 }, { "epoch": 2.0899942730917123, "grad_norm": 0.08548401296138763, "learning_rate": 2.550594440082066e-06, "loss": 0.0008, "step": 127730 }, { "epoch": 2.0901578990427883, "grad_norm": 0.0011281209299340844, "learning_rate": 2.5497645933509697e-06, "loss": 0.0009, "step": 127740 }, { "epoch": 2.090321524993864, "grad_norm": 0.1124374121427536, "learning_rate": 2.548934835435015e-06, "loss": 0.0008, "step": 127750 }, { "epoch": 2.09048515094494, "grad_norm": 0.047349508851766586, "learning_rate": 2.548105166364284e-06, "loss": 0.0016, "step": 127760 }, { "epoch": 2.090648776896016, "grad_norm": 0.1386995166540146, "learning_rate": 2.5472755861688466e-06, "loss": 0.0012, "step": 127770 }, { "epoch": 2.0908124028470914, "grad_norm": 0.33263498544692993, "learning_rate": 2.546446094878775e-06, "loss": 0.0016, "step": 127780 }, { "epoch": 2.0909760287981674, "grad_norm": 0.05350246652960777, "learning_rate": 2.5456166925241345e-06, "loss": 0.0009, "step": 127790 }, { "epoch": 2.0911396547492433, "grad_norm": 0.01669466681778431, "learning_rate": 2.5447873791349893e-06, "loss": 0.0009, "step": 127800 }, { "epoch": 2.091303280700319, "grad_norm": 0.048967618495225906, "learning_rate": 2.5439581547414026e-06, "loss": 0.0007, "step": 127810 }, { "epoch": 2.091466906651395, "grad_norm": 0.5545778274536133, "learning_rate": 2.543129019373428e-06, "loss": 0.0011, "step": 127820 }, { "epoch": 2.091630532602471, "grad_norm": 0.05185588076710701, "learning_rate": 2.5422999730611253e-06, "loss": 0.0006, "step": 127830 }, { "epoch": 2.0917941585535464, "grad_norm": 0.0487663559615612, "learning_rate": 2.541471015834538e-06, "loss": 0.0008, "step": 127840 }, { "epoch": 2.0919577845046224, "grad_norm": 0.12212276458740234, "learning_rate": 2.5406421477237176e-06, "loss": 0.0016, "step": 127850 }, { "epoch": 2.0921214104556984, "grad_norm": 0.06837128102779388, "learning_rate": 2.5398133687587103e-06, "loss": 0.0013, "step": 127860 }, { "epoch": 2.092285036406774, "grad_norm": 0.04178814962506294, "learning_rate": 2.538984678969553e-06, "loss": 0.0011, "step": 127870 }, { "epoch": 2.09244866235785, "grad_norm": 0.20282943546772003, "learning_rate": 2.5381560783862883e-06, "loss": 0.0013, "step": 127880 }, { "epoch": 2.092612288308926, "grad_norm": 0.28980764746665955, "learning_rate": 2.5373275670389474e-06, "loss": 0.001, "step": 127890 }, { "epoch": 2.0927759142600015, "grad_norm": 0.029055584222078323, "learning_rate": 2.536499144957565e-06, "loss": 0.0011, "step": 127900 }, { "epoch": 2.0929395402110775, "grad_norm": 0.0028315105009824038, "learning_rate": 2.5356708121721664e-06, "loss": 0.0007, "step": 127910 }, { "epoch": 2.0931031661621535, "grad_norm": 0.009406630881130695, "learning_rate": 2.5348425687127788e-06, "loss": 0.0004, "step": 127920 }, { "epoch": 2.093266792113229, "grad_norm": 0.01671595871448517, "learning_rate": 2.5340144146094235e-06, "loss": 0.0005, "step": 127930 }, { "epoch": 2.093430418064305, "grad_norm": 0.0425165519118309, "learning_rate": 2.533186349892117e-06, "loss": 0.001, "step": 127940 }, { "epoch": 2.0935940440153806, "grad_norm": 0.1454932689666748, "learning_rate": 2.5323583745908793e-06, "loss": 0.0009, "step": 127950 }, { "epoch": 2.0937576699664566, "grad_norm": 0.05067780241370201, "learning_rate": 2.5315304887357174e-06, "loss": 0.0007, "step": 127960 }, { "epoch": 2.0939212959175326, "grad_norm": 0.09026180952787399, "learning_rate": 2.5307026923566434e-06, "loss": 0.0034, "step": 127970 }, { "epoch": 2.094084921868608, "grad_norm": 0.05677011236548424, "learning_rate": 2.529874985483661e-06, "loss": 0.001, "step": 127980 }, { "epoch": 2.094248547819684, "grad_norm": 0.06247275695204735, "learning_rate": 2.5290473681467753e-06, "loss": 0.0005, "step": 127990 }, { "epoch": 2.09441217377076, "grad_norm": 0.08413197100162506, "learning_rate": 2.5282198403759816e-06, "loss": 0.0004, "step": 128000 }, { "epoch": 2.0945757997218357, "grad_norm": 0.013439448550343513, "learning_rate": 2.5273924022012782e-06, "loss": 0.0008, "step": 128010 }, { "epoch": 2.0947394256729117, "grad_norm": 0.008650384843349457, "learning_rate": 2.5265650536526586e-06, "loss": 0.001, "step": 128020 }, { "epoch": 2.0949030516239877, "grad_norm": 0.0435977466404438, "learning_rate": 2.5257377947601124e-06, "loss": 0.0004, "step": 128030 }, { "epoch": 2.0950666775750633, "grad_norm": 0.06966936588287354, "learning_rate": 2.5249106255536238e-06, "loss": 0.0006, "step": 128040 }, { "epoch": 2.0952303035261393, "grad_norm": 0.020104793831706047, "learning_rate": 2.524083546063174e-06, "loss": 0.001, "step": 128050 }, { "epoch": 2.0953939294772153, "grad_norm": 0.053379885852336884, "learning_rate": 2.5232565563187474e-06, "loss": 0.0014, "step": 128060 }, { "epoch": 2.095557555428291, "grad_norm": 0.08038292080163956, "learning_rate": 2.522429656350316e-06, "loss": 0.0012, "step": 128070 }, { "epoch": 2.095721181379367, "grad_norm": 0.11014243215322495, "learning_rate": 2.5216028461878538e-06, "loss": 0.0018, "step": 128080 }, { "epoch": 2.095884807330443, "grad_norm": 0.05124003440141678, "learning_rate": 2.520776125861335e-06, "loss": 0.0012, "step": 128090 }, { "epoch": 2.0960484332815184, "grad_norm": 0.09936625510454178, "learning_rate": 2.519949495400721e-06, "loss": 0.0009, "step": 128100 }, { "epoch": 2.0962120592325943, "grad_norm": 0.027762778103351593, "learning_rate": 2.5191229548359785e-06, "loss": 0.0005, "step": 128110 }, { "epoch": 2.09637568518367, "grad_norm": 0.08110368996858597, "learning_rate": 2.518296504197064e-06, "loss": 0.0006, "step": 128120 }, { "epoch": 2.096539311134746, "grad_norm": 0.006522516254335642, "learning_rate": 2.517470143513939e-06, "loss": 0.0015, "step": 128130 }, { "epoch": 2.096702937085822, "grad_norm": 0.07181324064731598, "learning_rate": 2.5166438728165555e-06, "loss": 0.0009, "step": 128140 }, { "epoch": 2.0968665630368974, "grad_norm": 0.07044582813978195, "learning_rate": 2.51581769213486e-06, "loss": 0.0006, "step": 128150 }, { "epoch": 2.0970301889879734, "grad_norm": 0.13840921223163605, "learning_rate": 2.5149916014988046e-06, "loss": 0.0011, "step": 128160 }, { "epoch": 2.0971938149390494, "grad_norm": 0.13683614134788513, "learning_rate": 2.5141656009383297e-06, "loss": 0.0013, "step": 128170 }, { "epoch": 2.097357440890125, "grad_norm": 0.1456993669271469, "learning_rate": 2.5133396904833783e-06, "loss": 0.0008, "step": 128180 }, { "epoch": 2.097521066841201, "grad_norm": 0.05824455991387367, "learning_rate": 2.512513870163885e-06, "loss": 0.0006, "step": 128190 }, { "epoch": 2.097684692792277, "grad_norm": 0.05801787227392197, "learning_rate": 2.511688140009788e-06, "loss": 0.001, "step": 128200 }, { "epoch": 2.0978483187433525, "grad_norm": 0.04467809945344925, "learning_rate": 2.5108625000510133e-06, "loss": 0.0008, "step": 128210 }, { "epoch": 2.0980119446944285, "grad_norm": 0.061349913477897644, "learning_rate": 2.5100369503174913e-06, "loss": 0.0012, "step": 128220 }, { "epoch": 2.0981755706455045, "grad_norm": 0.23603478074073792, "learning_rate": 2.5092114908391467e-06, "loss": 0.0012, "step": 128230 }, { "epoch": 2.09833919659658, "grad_norm": 0.09558813273906708, "learning_rate": 2.5083861216458973e-06, "loss": 0.0009, "step": 128240 }, { "epoch": 2.098502822547656, "grad_norm": 0.039386648684740067, "learning_rate": 2.5075608427676634e-06, "loss": 0.0011, "step": 128250 }, { "epoch": 2.098666448498732, "grad_norm": 0.00835203193128109, "learning_rate": 2.5067356542343567e-06, "loss": 0.0007, "step": 128260 }, { "epoch": 2.0988300744498076, "grad_norm": 0.04446261748671532, "learning_rate": 2.5059105560758926e-06, "loss": 0.0009, "step": 128270 }, { "epoch": 2.0989937004008836, "grad_norm": 0.05806954577565193, "learning_rate": 2.505085548322175e-06, "loss": 0.0008, "step": 128280 }, { "epoch": 2.0991573263519596, "grad_norm": 0.2028346210718155, "learning_rate": 2.5042606310031085e-06, "loss": 0.0018, "step": 128290 }, { "epoch": 2.099320952303035, "grad_norm": 0.0670069083571434, "learning_rate": 2.5034358041485983e-06, "loss": 0.0011, "step": 128300 }, { "epoch": 2.099484578254111, "grad_norm": 0.006675695534795523, "learning_rate": 2.5026110677885385e-06, "loss": 0.0004, "step": 128310 }, { "epoch": 2.099648204205187, "grad_norm": 0.07480159401893616, "learning_rate": 2.5017864219528266e-06, "loss": 0.0011, "step": 128320 }, { "epoch": 2.0998118301562627, "grad_norm": 0.18533605337142944, "learning_rate": 2.500961866671353e-06, "loss": 0.0006, "step": 128330 }, { "epoch": 2.0999754561073387, "grad_norm": 0.13750103116035461, "learning_rate": 2.500137401974005e-06, "loss": 0.0009, "step": 128340 }, { "epoch": 2.1001390820584143, "grad_norm": 0.1057664230465889, "learning_rate": 2.4993130278906664e-06, "loss": 0.0009, "step": 128350 }, { "epoch": 2.1003027080094903, "grad_norm": 0.09257295727729797, "learning_rate": 2.4984887444512197e-06, "loss": 0.0009, "step": 128360 }, { "epoch": 2.1004663339605663, "grad_norm": 0.04528724029660225, "learning_rate": 2.497664551685546e-06, "loss": 0.0006, "step": 128370 }, { "epoch": 2.100629959911642, "grad_norm": 0.08403719216585159, "learning_rate": 2.4968404496235167e-06, "loss": 0.001, "step": 128380 }, { "epoch": 2.100793585862718, "grad_norm": 0.008571864105761051, "learning_rate": 2.496016438295006e-06, "loss": 0.0006, "step": 128390 }, { "epoch": 2.100957211813794, "grad_norm": 0.013630916364490986, "learning_rate": 2.4951925177298795e-06, "loss": 0.0012, "step": 128400 }, { "epoch": 2.1011208377648694, "grad_norm": 0.05910114943981171, "learning_rate": 2.4943686879580063e-06, "loss": 0.0007, "step": 128410 }, { "epoch": 2.1012844637159453, "grad_norm": 0.05891280248761177, "learning_rate": 2.4935449490092443e-06, "loss": 0.0019, "step": 128420 }, { "epoch": 2.1014480896670213, "grad_norm": 0.13174556195735931, "learning_rate": 2.4927213009134556e-06, "loss": 0.0007, "step": 128430 }, { "epoch": 2.101611715618097, "grad_norm": 0.054453738033771515, "learning_rate": 2.491897743700493e-06, "loss": 0.0007, "step": 128440 }, { "epoch": 2.101775341569173, "grad_norm": 0.0880228653550148, "learning_rate": 2.4910742774002077e-06, "loss": 0.0007, "step": 128450 }, { "epoch": 2.101938967520249, "grad_norm": 0.05741582810878754, "learning_rate": 2.490250902042452e-06, "loss": 0.001, "step": 128460 }, { "epoch": 2.1021025934713244, "grad_norm": 0.06323856860399246, "learning_rate": 2.489427617657067e-06, "loss": 0.0004, "step": 128470 }, { "epoch": 2.1022662194224004, "grad_norm": 0.0194765105843544, "learning_rate": 2.4886044242738984e-06, "loss": 0.001, "step": 128480 }, { "epoch": 2.1024298453734764, "grad_norm": 0.14218570291996002, "learning_rate": 2.487781321922782e-06, "loss": 0.0015, "step": 128490 }, { "epoch": 2.102593471324552, "grad_norm": 0.06330674886703491, "learning_rate": 2.4869583106335563e-06, "loss": 0.001, "step": 128500 }, { "epoch": 2.102757097275628, "grad_norm": 0.12341947853565216, "learning_rate": 2.48613539043605e-06, "loss": 0.0006, "step": 128510 }, { "epoch": 2.1029207232267035, "grad_norm": 0.05440869927406311, "learning_rate": 2.4853125613600943e-06, "loss": 0.0008, "step": 128520 }, { "epoch": 2.1030843491777795, "grad_norm": 0.02906016632914543, "learning_rate": 2.4844898234355174e-06, "loss": 0.0011, "step": 128530 }, { "epoch": 2.1032479751288555, "grad_norm": 0.037199489772319794, "learning_rate": 2.4836671766921356e-06, "loss": 0.0022, "step": 128540 }, { "epoch": 2.103411601079931, "grad_norm": 0.10032356530427933, "learning_rate": 2.482844621159772e-06, "loss": 0.0015, "step": 128550 }, { "epoch": 2.103575227031007, "grad_norm": 0.015970641747117043, "learning_rate": 2.4820221568682397e-06, "loss": 0.0009, "step": 128560 }, { "epoch": 2.103738852982083, "grad_norm": 0.07271319627761841, "learning_rate": 2.4811997838473517e-06, "loss": 0.0016, "step": 128570 }, { "epoch": 2.1039024789331586, "grad_norm": 0.09008684009313583, "learning_rate": 2.4803775021269206e-06, "loss": 0.0013, "step": 128580 }, { "epoch": 2.1040661048842346, "grad_norm": 0.04694205895066261, "learning_rate": 2.4795553117367464e-06, "loss": 0.0007, "step": 128590 }, { "epoch": 2.1042297308353106, "grad_norm": 0.14489251375198364, "learning_rate": 2.478733212706637e-06, "loss": 0.0013, "step": 128600 }, { "epoch": 2.104393356786386, "grad_norm": 0.002187573816627264, "learning_rate": 2.4779112050663865e-06, "loss": 0.0009, "step": 128610 }, { "epoch": 2.104556982737462, "grad_norm": 0.20876021683216095, "learning_rate": 2.477089288845796e-06, "loss": 0.0008, "step": 128620 }, { "epoch": 2.104720608688538, "grad_norm": 0.11453013122081757, "learning_rate": 2.476267464074654e-06, "loss": 0.0009, "step": 128630 }, { "epoch": 2.1048842346396137, "grad_norm": 0.04934345930814743, "learning_rate": 2.4754457307827496e-06, "loss": 0.0007, "step": 128640 }, { "epoch": 2.1050478605906897, "grad_norm": 0.12709276378154755, "learning_rate": 2.4746240889998717e-06, "loss": 0.0009, "step": 128650 }, { "epoch": 2.1052114865417657, "grad_norm": 0.04703143611550331, "learning_rate": 2.473802538755799e-06, "loss": 0.0014, "step": 128660 }, { "epoch": 2.1053751124928413, "grad_norm": 0.048432085663080215, "learning_rate": 2.4729810800803147e-06, "loss": 0.0015, "step": 128670 }, { "epoch": 2.1055387384439173, "grad_norm": 0.15259037911891937, "learning_rate": 2.4721597130031904e-06, "loss": 0.001, "step": 128680 }, { "epoch": 2.1057023643949933, "grad_norm": 0.16169042885303497, "learning_rate": 2.471338437554203e-06, "loss": 0.0013, "step": 128690 }, { "epoch": 2.105865990346069, "grad_norm": 0.02376764826476574, "learning_rate": 2.4705172537631184e-06, "loss": 0.0005, "step": 128700 }, { "epoch": 2.106029616297145, "grad_norm": 0.016686461865901947, "learning_rate": 2.4696961616597055e-06, "loss": 0.0008, "step": 128710 }, { "epoch": 2.1061932422482204, "grad_norm": 0.11294475197792053, "learning_rate": 2.468875161273723e-06, "loss": 0.0012, "step": 128720 }, { "epoch": 2.1063568681992964, "grad_norm": 0.26991215348243713, "learning_rate": 2.468054252634935e-06, "loss": 0.0014, "step": 128730 }, { "epoch": 2.1065204941503723, "grad_norm": 0.03612404689192772, "learning_rate": 2.4672334357730947e-06, "loss": 0.0005, "step": 128740 }, { "epoch": 2.106684120101448, "grad_norm": 0.02044064551591873, "learning_rate": 2.466412710717953e-06, "loss": 0.0009, "step": 128750 }, { "epoch": 2.106847746052524, "grad_norm": 0.2583995461463928, "learning_rate": 2.4655920774992638e-06, "loss": 0.0012, "step": 128760 }, { "epoch": 2.1070113720036, "grad_norm": 0.18512903153896332, "learning_rate": 2.4647715361467677e-06, "loss": 0.0012, "step": 128770 }, { "epoch": 2.1071749979546754, "grad_norm": 0.14752094447612762, "learning_rate": 2.4639510866902123e-06, "loss": 0.0011, "step": 128780 }, { "epoch": 2.1073386239057514, "grad_norm": 0.10565897822380066, "learning_rate": 2.463130729159333e-06, "loss": 0.0015, "step": 128790 }, { "epoch": 2.1075022498568274, "grad_norm": 0.013931378722190857, "learning_rate": 2.462310463583868e-06, "loss": 0.0007, "step": 128800 }, { "epoch": 2.107665875807903, "grad_norm": 0.009323307313024998, "learning_rate": 2.4614902899935505e-06, "loss": 0.0019, "step": 128810 }, { "epoch": 2.107829501758979, "grad_norm": 0.05106600001454353, "learning_rate": 2.4606702084181077e-06, "loss": 0.0008, "step": 128820 }, { "epoch": 2.107993127710055, "grad_norm": 0.012281034141778946, "learning_rate": 2.4598502188872704e-06, "loss": 0.0008, "step": 128830 }, { "epoch": 2.1081567536611305, "grad_norm": 0.08368691802024841, "learning_rate": 2.459030321430753e-06, "loss": 0.0008, "step": 128840 }, { "epoch": 2.1083203796122065, "grad_norm": 0.08300020545721054, "learning_rate": 2.4582105160782803e-06, "loss": 0.0007, "step": 128850 }, { "epoch": 2.1084840055632825, "grad_norm": 0.013460628688335419, "learning_rate": 2.457390802859569e-06, "loss": 0.0012, "step": 128860 }, { "epoch": 2.108647631514358, "grad_norm": 0.13130810856819153, "learning_rate": 2.456571181804328e-06, "loss": 0.0011, "step": 128870 }, { "epoch": 2.108811257465434, "grad_norm": 0.092060387134552, "learning_rate": 2.455751652942271e-06, "loss": 0.0009, "step": 128880 }, { "epoch": 2.10897488341651, "grad_norm": 0.24746465682983398, "learning_rate": 2.4549322163030997e-06, "loss": 0.0009, "step": 128890 }, { "epoch": 2.1091385093675856, "grad_norm": 0.10120374709367752, "learning_rate": 2.4541128719165203e-06, "loss": 0.0005, "step": 128900 }, { "epoch": 2.1093021353186616, "grad_norm": 0.09808824956417084, "learning_rate": 2.453293619812228e-06, "loss": 0.0008, "step": 128910 }, { "epoch": 2.109465761269737, "grad_norm": 0.08791407197713852, "learning_rate": 2.4524744600199242e-06, "loss": 0.0014, "step": 128920 }, { "epoch": 2.109629387220813, "grad_norm": 0.10329283028841019, "learning_rate": 2.4516553925692978e-06, "loss": 0.002, "step": 128930 }, { "epoch": 2.109793013171889, "grad_norm": 0.012688706628978252, "learning_rate": 2.4508364174900367e-06, "loss": 0.0006, "step": 128940 }, { "epoch": 2.1099566391229647, "grad_norm": 0.15681971609592438, "learning_rate": 2.4500175348118304e-06, "loss": 0.0014, "step": 128950 }, { "epoch": 2.1101202650740407, "grad_norm": 0.09324906021356583, "learning_rate": 2.4491987445643584e-06, "loss": 0.0006, "step": 128960 }, { "epoch": 2.1102838910251167, "grad_norm": 0.047888562083244324, "learning_rate": 2.4483800467773026e-06, "loss": 0.0006, "step": 128970 }, { "epoch": 2.1104475169761923, "grad_norm": 0.05987181514501572, "learning_rate": 2.4475614414803357e-06, "loss": 0.0008, "step": 128980 }, { "epoch": 2.1106111429272683, "grad_norm": 0.16377393901348114, "learning_rate": 2.446742928703134e-06, "loss": 0.0011, "step": 128990 }, { "epoch": 2.1107747688783443, "grad_norm": 0.08190133422613144, "learning_rate": 2.4459245084753626e-06, "loss": 0.0008, "step": 129000 }, { "epoch": 2.11093839482942, "grad_norm": 0.18243969976902008, "learning_rate": 2.445106180826689e-06, "loss": 0.0011, "step": 129010 }, { "epoch": 2.111102020780496, "grad_norm": 0.06374434381723404, "learning_rate": 2.444287945786778e-06, "loss": 0.0017, "step": 129020 }, { "epoch": 2.111265646731572, "grad_norm": 0.12796828150749207, "learning_rate": 2.443469803385286e-06, "loss": 0.0012, "step": 129030 }, { "epoch": 2.1114292726826474, "grad_norm": 0.009844370186328888, "learning_rate": 2.4426517536518695e-06, "loss": 0.0004, "step": 129040 }, { "epoch": 2.1115928986337233, "grad_norm": 0.02215762436389923, "learning_rate": 2.4418337966161786e-06, "loss": 0.0007, "step": 129050 }, { "epoch": 2.1117565245847993, "grad_norm": 0.03647473827004433, "learning_rate": 2.441015932307864e-06, "loss": 0.0008, "step": 129060 }, { "epoch": 2.111920150535875, "grad_norm": 0.1903500258922577, "learning_rate": 2.4401981607565737e-06, "loss": 0.0008, "step": 129070 }, { "epoch": 2.112083776486951, "grad_norm": 0.1311829835176468, "learning_rate": 2.439380481991946e-06, "loss": 0.0004, "step": 129080 }, { "epoch": 2.112247402438027, "grad_norm": 0.15377609431743622, "learning_rate": 2.438562896043623e-06, "loss": 0.0014, "step": 129090 }, { "epoch": 2.1124110283891024, "grad_norm": 0.08897221088409424, "learning_rate": 2.437745402941237e-06, "loss": 0.0008, "step": 129100 }, { "epoch": 2.1125746543401784, "grad_norm": 0.05890337750315666, "learning_rate": 2.4369280027144236e-06, "loss": 0.0011, "step": 129110 }, { "epoch": 2.112738280291254, "grad_norm": 0.07072455435991287, "learning_rate": 2.4361106953928088e-06, "loss": 0.0012, "step": 129120 }, { "epoch": 2.11290190624233, "grad_norm": 0.04468684270977974, "learning_rate": 2.4352934810060205e-06, "loss": 0.0008, "step": 129130 }, { "epoch": 2.113065532193406, "grad_norm": 0.05255168676376343, "learning_rate": 2.434476359583679e-06, "loss": 0.002, "step": 129140 }, { "epoch": 2.1132291581444815, "grad_norm": 0.05946776643395424, "learning_rate": 2.4336593311554022e-06, "loss": 0.0007, "step": 129150 }, { "epoch": 2.1133927840955575, "grad_norm": 0.13737957179546356, "learning_rate": 2.432842395750808e-06, "loss": 0.001, "step": 129160 }, { "epoch": 2.1135564100466335, "grad_norm": 0.091910719871521, "learning_rate": 2.4320255533995056e-06, "loss": 0.0009, "step": 129170 }, { "epoch": 2.113720035997709, "grad_norm": 0.014578170143067837, "learning_rate": 2.4312088041311066e-06, "loss": 0.0008, "step": 129180 }, { "epoch": 2.113883661948785, "grad_norm": 0.06120060756802559, "learning_rate": 2.4303921479752124e-06, "loss": 0.0008, "step": 129190 }, { "epoch": 2.114047287899861, "grad_norm": 0.1434168666601181, "learning_rate": 2.429575584961429e-06, "loss": 0.001, "step": 129200 }, { "epoch": 2.1142109138509366, "grad_norm": 0.013018888421356678, "learning_rate": 2.4287591151193507e-06, "loss": 0.0006, "step": 129210 }, { "epoch": 2.1143745398020126, "grad_norm": 0.0070012821815907955, "learning_rate": 2.4279427384785748e-06, "loss": 0.001, "step": 129220 }, { "epoch": 2.1145381657530886, "grad_norm": 0.10873915255069733, "learning_rate": 2.427126455068697e-06, "loss": 0.0008, "step": 129230 }, { "epoch": 2.114701791704164, "grad_norm": 0.0031114406883716583, "learning_rate": 2.426310264919297e-06, "loss": 0.0008, "step": 129240 }, { "epoch": 2.11486541765524, "grad_norm": 0.05338197946548462, "learning_rate": 2.4254941680599665e-06, "loss": 0.0004, "step": 129250 }, { "epoch": 2.115029043606316, "grad_norm": 0.0195078756660223, "learning_rate": 2.4246781645202826e-06, "loss": 0.0008, "step": 129260 }, { "epoch": 2.1151926695573917, "grad_norm": 0.08901358395814896, "learning_rate": 2.4238622543298277e-06, "loss": 0.0006, "step": 129270 }, { "epoch": 2.1153562955084677, "grad_norm": 0.05972006171941757, "learning_rate": 2.423046437518173e-06, "loss": 0.0011, "step": 129280 }, { "epoch": 2.1155199214595433, "grad_norm": 0.05445694923400879, "learning_rate": 2.422230714114891e-06, "loss": 0.001, "step": 129290 }, { "epoch": 2.1156835474106193, "grad_norm": 0.018343938514590263, "learning_rate": 2.421415084149552e-06, "loss": 0.0007, "step": 129300 }, { "epoch": 2.1158471733616953, "grad_norm": 0.020434966310858727, "learning_rate": 2.420599547651717e-06, "loss": 0.0009, "step": 129310 }, { "epoch": 2.116010799312771, "grad_norm": 0.014766097068786621, "learning_rate": 2.41978410465095e-06, "loss": 0.0008, "step": 129320 }, { "epoch": 2.116174425263847, "grad_norm": 0.031918562948703766, "learning_rate": 2.4189687551768087e-06, "loss": 0.0013, "step": 129330 }, { "epoch": 2.116338051214923, "grad_norm": 0.01247891690582037, "learning_rate": 2.418153499258845e-06, "loss": 0.0007, "step": 129340 }, { "epoch": 2.1165016771659984, "grad_norm": 0.0784144252538681, "learning_rate": 2.417338336926613e-06, "loss": 0.0008, "step": 129350 }, { "epoch": 2.1166653031170743, "grad_norm": 0.007132540922611952, "learning_rate": 2.4165232682096574e-06, "loss": 0.0012, "step": 129360 }, { "epoch": 2.1168289290681503, "grad_norm": 0.13466258347034454, "learning_rate": 2.415708293137526e-06, "loss": 0.0009, "step": 129370 }, { "epoch": 2.116992555019226, "grad_norm": 0.13869674503803253, "learning_rate": 2.4148934117397553e-06, "loss": 0.0008, "step": 129380 }, { "epoch": 2.117156180970302, "grad_norm": 0.0726885050535202, "learning_rate": 2.414078624045888e-06, "loss": 0.0009, "step": 129390 }, { "epoch": 2.117319806921378, "grad_norm": 0.13166485726833344, "learning_rate": 2.4132639300854534e-06, "loss": 0.0007, "step": 129400 }, { "epoch": 2.1174834328724534, "grad_norm": 0.10797033458948135, "learning_rate": 2.412449329887986e-06, "loss": 0.001, "step": 129410 }, { "epoch": 2.1176470588235294, "grad_norm": 0.058146655559539795, "learning_rate": 2.4116348234830097e-06, "loss": 0.0014, "step": 129420 }, { "epoch": 2.1178106847746054, "grad_norm": 0.16427108645439148, "learning_rate": 2.4108204109000517e-06, "loss": 0.0011, "step": 129430 }, { "epoch": 2.117974310725681, "grad_norm": 0.15037886798381805, "learning_rate": 2.410006092168631e-06, "loss": 0.0011, "step": 129440 }, { "epoch": 2.118137936676757, "grad_norm": 0.02971005067229271, "learning_rate": 2.4091918673182625e-06, "loss": 0.0005, "step": 129450 }, { "epoch": 2.118301562627833, "grad_norm": 0.03783925622701645, "learning_rate": 2.408377736378464e-06, "loss": 0.0004, "step": 129460 }, { "epoch": 2.1184651885789085, "grad_norm": 0.14881537854671478, "learning_rate": 2.4075636993787416e-06, "loss": 0.0014, "step": 129470 }, { "epoch": 2.1186288145299845, "grad_norm": 0.14918789267539978, "learning_rate": 2.4067497563486064e-06, "loss": 0.0009, "step": 129480 }, { "epoch": 2.1187924404810605, "grad_norm": 0.007797111291438341, "learning_rate": 2.405935907317558e-06, "loss": 0.0006, "step": 129490 }, { "epoch": 2.118956066432136, "grad_norm": 0.10536335408687592, "learning_rate": 2.405122152315098e-06, "loss": 0.0009, "step": 129500 }, { "epoch": 2.119119692383212, "grad_norm": 0.0683324858546257, "learning_rate": 2.404308491370726e-06, "loss": 0.0005, "step": 129510 }, { "epoch": 2.1192833183342876, "grad_norm": 0.05309787392616272, "learning_rate": 2.4034949245139298e-06, "loss": 0.001, "step": 129520 }, { "epoch": 2.1194469442853636, "grad_norm": 0.07505122572183609, "learning_rate": 2.4026814517742047e-06, "loss": 0.0007, "step": 129530 }, { "epoch": 2.1196105702364396, "grad_norm": 0.08722759038209915, "learning_rate": 2.401868073181034e-06, "loss": 0.0007, "step": 129540 }, { "epoch": 2.119774196187515, "grad_norm": 0.002473121974617243, "learning_rate": 2.4010547887639014e-06, "loss": 0.0013, "step": 129550 }, { "epoch": 2.119937822138591, "grad_norm": 0.4195724129676819, "learning_rate": 2.4002415985522848e-06, "loss": 0.0014, "step": 129560 }, { "epoch": 2.120101448089667, "grad_norm": 0.04630810767412186, "learning_rate": 2.3994285025756615e-06, "loss": 0.0004, "step": 129570 }, { "epoch": 2.1202650740407427, "grad_norm": 0.13098259270191193, "learning_rate": 2.3986155008635066e-06, "loss": 0.001, "step": 129580 }, { "epoch": 2.1204286999918187, "grad_norm": 0.05063588544726372, "learning_rate": 2.3978025934452854e-06, "loss": 0.0005, "step": 129590 }, { "epoch": 2.1205923259428947, "grad_norm": 0.055887069553136826, "learning_rate": 2.396989780350468e-06, "loss": 0.0011, "step": 129600 }, { "epoch": 2.1207559518939703, "grad_norm": 0.22654558718204498, "learning_rate": 2.396177061608513e-06, "loss": 0.0005, "step": 129610 }, { "epoch": 2.1209195778450463, "grad_norm": 0.009620541706681252, "learning_rate": 2.3953644372488833e-06, "loss": 0.0017, "step": 129620 }, { "epoch": 2.1210832037961223, "grad_norm": 0.04028141126036644, "learning_rate": 2.394551907301031e-06, "loss": 0.0011, "step": 129630 }, { "epoch": 2.121246829747198, "grad_norm": 0.012738639488816261, "learning_rate": 2.3937394717944116e-06, "loss": 0.0006, "step": 129640 }, { "epoch": 2.121410455698274, "grad_norm": 0.05119181051850319, "learning_rate": 2.3929271307584726e-06, "loss": 0.0012, "step": 129650 }, { "epoch": 2.12157408164935, "grad_norm": 0.08494886010885239, "learning_rate": 2.392114884222657e-06, "loss": 0.0006, "step": 129660 }, { "epoch": 2.1217377076004253, "grad_norm": 0.033152852207422256, "learning_rate": 2.391302732216411e-06, "loss": 0.0009, "step": 129670 }, { "epoch": 2.1219013335515013, "grad_norm": 0.06638175994157791, "learning_rate": 2.39049067476917e-06, "loss": 0.0008, "step": 129680 }, { "epoch": 2.122064959502577, "grad_norm": 0.024622250348329544, "learning_rate": 2.389678711910371e-06, "loss": 0.0012, "step": 129690 }, { "epoch": 2.122228585453653, "grad_norm": 0.08107002824544907, "learning_rate": 2.3888668436694434e-06, "loss": 0.001, "step": 129700 }, { "epoch": 2.122392211404729, "grad_norm": 0.06753237545490265, "learning_rate": 2.3880550700758187e-06, "loss": 0.0006, "step": 129710 }, { "epoch": 2.1225558373558044, "grad_norm": 0.08426906168460846, "learning_rate": 2.3872433911589188e-06, "loss": 0.0011, "step": 129720 }, { "epoch": 2.1227194633068804, "grad_norm": 0.07835567742586136, "learning_rate": 2.3864318069481662e-06, "loss": 0.0008, "step": 129730 }, { "epoch": 2.1228830892579564, "grad_norm": 0.0586898997426033, "learning_rate": 2.3856203174729823e-06, "loss": 0.0008, "step": 129740 }, { "epoch": 2.123046715209032, "grad_norm": 0.0021694970782846212, "learning_rate": 2.384808922762775e-06, "loss": 0.0003, "step": 129750 }, { "epoch": 2.123210341160108, "grad_norm": 0.10634828358888626, "learning_rate": 2.383997622846961e-06, "loss": 0.0014, "step": 129760 }, { "epoch": 2.123373967111184, "grad_norm": 0.06046171858906746, "learning_rate": 2.3831864177549436e-06, "loss": 0.0005, "step": 129770 }, { "epoch": 2.1235375930622595, "grad_norm": 0.021502366289496422, "learning_rate": 2.3823753075161287e-06, "loss": 0.0012, "step": 129780 }, { "epoch": 2.1237012190133355, "grad_norm": 0.035366907715797424, "learning_rate": 2.38156429215992e-06, "loss": 0.001, "step": 129790 }, { "epoch": 2.1238648449644115, "grad_norm": 0.10414451360702515, "learning_rate": 2.380753371715711e-06, "loss": 0.0005, "step": 129800 }, { "epoch": 2.124028470915487, "grad_norm": 0.059287555515766144, "learning_rate": 2.379942546212898e-06, "loss": 0.0008, "step": 129810 }, { "epoch": 2.124192096866563, "grad_norm": 0.06971880048513412, "learning_rate": 2.3791318156808698e-06, "loss": 0.0011, "step": 129820 }, { "epoch": 2.124355722817639, "grad_norm": 0.04357230290770531, "learning_rate": 2.3783211801490155e-06, "loss": 0.0008, "step": 129830 }, { "epoch": 2.1245193487687146, "grad_norm": 0.002090972615405917, "learning_rate": 2.3775106396467167e-06, "loss": 0.0008, "step": 129840 }, { "epoch": 2.1246829747197906, "grad_norm": 0.020743712782859802, "learning_rate": 2.3767001942033537e-06, "loss": 0.0004, "step": 129850 }, { "epoch": 2.1248466006708666, "grad_norm": 0.056417014449834824, "learning_rate": 2.375889843848305e-06, "loss": 0.0006, "step": 129860 }, { "epoch": 2.125010226621942, "grad_norm": 0.04254251345992088, "learning_rate": 2.3750795886109403e-06, "loss": 0.0007, "step": 129870 }, { "epoch": 2.125173852573018, "grad_norm": 0.05097575485706329, "learning_rate": 2.3742694285206334e-06, "loss": 0.0012, "step": 129880 }, { "epoch": 2.1253374785240937, "grad_norm": 0.06107296049594879, "learning_rate": 2.373459363606748e-06, "loss": 0.0011, "step": 129890 }, { "epoch": 2.1255011044751697, "grad_norm": 0.06233549490571022, "learning_rate": 2.372649393898649e-06, "loss": 0.001, "step": 129900 }, { "epoch": 2.1256647304262457, "grad_norm": 0.07916392385959625, "learning_rate": 2.371839519425693e-06, "loss": 0.0011, "step": 129910 }, { "epoch": 2.1258283563773213, "grad_norm": 0.0730438157916069, "learning_rate": 2.3710297402172404e-06, "loss": 0.0007, "step": 129920 }, { "epoch": 2.1259919823283973, "grad_norm": 0.11279507726430893, "learning_rate": 2.370220056302639e-06, "loss": 0.0008, "step": 129930 }, { "epoch": 2.1261556082794733, "grad_norm": 0.04357591271400452, "learning_rate": 2.369410467711242e-06, "loss": 0.0007, "step": 129940 }, { "epoch": 2.126319234230549, "grad_norm": 0.09587182849645615, "learning_rate": 2.3686009744723924e-06, "loss": 0.0013, "step": 129950 }, { "epoch": 2.126482860181625, "grad_norm": 0.005223568994551897, "learning_rate": 2.3677915766154312e-06, "loss": 0.0009, "step": 129960 }, { "epoch": 2.126646486132701, "grad_norm": 0.07404561340808868, "learning_rate": 2.366982274169702e-06, "loss": 0.0011, "step": 129970 }, { "epoch": 2.1268101120837763, "grad_norm": 0.12952719628810883, "learning_rate": 2.3661730671645346e-06, "loss": 0.0012, "step": 129980 }, { "epoch": 2.1269737380348523, "grad_norm": 0.07653428614139557, "learning_rate": 2.3653639556292646e-06, "loss": 0.0004, "step": 129990 }, { "epoch": 2.1271373639859283, "grad_norm": 0.010169204324483871, "learning_rate": 2.3645549395932177e-06, "loss": 0.001, "step": 130000 }, { "epoch": 2.127300989937004, "grad_norm": 0.04137658700346947, "learning_rate": 2.36374601908572e-06, "loss": 0.0005, "step": 130010 }, { "epoch": 2.12746461588808, "grad_norm": 0.09358082711696625, "learning_rate": 2.3629371941360952e-06, "loss": 0.0004, "step": 130020 }, { "epoch": 2.127628241839156, "grad_norm": 0.07212058454751968, "learning_rate": 2.3621284647736563e-06, "loss": 0.0009, "step": 130030 }, { "epoch": 2.1277918677902314, "grad_norm": 0.07064887136220932, "learning_rate": 2.3613198310277253e-06, "loss": 0.001, "step": 130040 }, { "epoch": 2.1279554937413074, "grad_norm": 0.08241944015026093, "learning_rate": 2.360511292927605e-06, "loss": 0.0009, "step": 130050 }, { "epoch": 2.128119119692383, "grad_norm": 0.13032805919647217, "learning_rate": 2.3597028505026052e-06, "loss": 0.001, "step": 130060 }, { "epoch": 2.128282745643459, "grad_norm": 0.038160644471645355, "learning_rate": 2.3588945037820342e-06, "loss": 0.0009, "step": 130070 }, { "epoch": 2.128446371594535, "grad_norm": 0.016643980517983437, "learning_rate": 2.3580862527951874e-06, "loss": 0.0008, "step": 130080 }, { "epoch": 2.1286099975456105, "grad_norm": 0.0335405059158802, "learning_rate": 2.3572780975713664e-06, "loss": 0.0014, "step": 130090 }, { "epoch": 2.1287736234966865, "grad_norm": 0.15138103067874908, "learning_rate": 2.356470038139861e-06, "loss": 0.0011, "step": 130100 }, { "epoch": 2.1289372494477625, "grad_norm": 0.04020567983388901, "learning_rate": 2.355662074529965e-06, "loss": 0.0009, "step": 130110 }, { "epoch": 2.129100875398838, "grad_norm": 0.04866993427276611, "learning_rate": 2.3548542067709607e-06, "loss": 0.0008, "step": 130120 }, { "epoch": 2.129264501349914, "grad_norm": 0.044186897575855255, "learning_rate": 2.3540464348921365e-06, "loss": 0.0005, "step": 130130 }, { "epoch": 2.12942812730099, "grad_norm": 0.0282368715852499, "learning_rate": 2.3532387589227695e-06, "loss": 0.0006, "step": 130140 }, { "epoch": 2.1295917532520656, "grad_norm": 0.07377664744853973, "learning_rate": 2.3524311788921344e-06, "loss": 0.0011, "step": 130150 }, { "epoch": 2.1297553792031416, "grad_norm": 0.014379864558577538, "learning_rate": 2.3516236948295072e-06, "loss": 0.0011, "step": 130160 }, { "epoch": 2.1299190051542176, "grad_norm": 0.03418883681297302, "learning_rate": 2.3508163067641547e-06, "loss": 0.0004, "step": 130170 }, { "epoch": 2.130082631105293, "grad_norm": 0.06331291794776917, "learning_rate": 2.350009014725346e-06, "loss": 0.0007, "step": 130180 }, { "epoch": 2.130246257056369, "grad_norm": 0.19612273573875427, "learning_rate": 2.3492018187423387e-06, "loss": 0.0007, "step": 130190 }, { "epoch": 2.130409883007445, "grad_norm": 0.012460467405617237, "learning_rate": 2.348394718844397e-06, "loss": 0.0014, "step": 130200 }, { "epoch": 2.1305735089585207, "grad_norm": 0.06946510076522827, "learning_rate": 2.347587715060771e-06, "loss": 0.0006, "step": 130210 }, { "epoch": 2.1307371349095967, "grad_norm": 0.07056104391813278, "learning_rate": 2.346780807420716e-06, "loss": 0.0011, "step": 130220 }, { "epoch": 2.1309007608606727, "grad_norm": 0.1000114232301712, "learning_rate": 2.3459739959534816e-06, "loss": 0.0013, "step": 130230 }, { "epoch": 2.1310643868117483, "grad_norm": 0.06651310622692108, "learning_rate": 2.345167280688311e-06, "loss": 0.0009, "step": 130240 }, { "epoch": 2.1312280127628243, "grad_norm": 0.08983628451824188, "learning_rate": 2.3443606616544463e-06, "loss": 0.0006, "step": 130250 }, { "epoch": 2.1313916387139002, "grad_norm": 0.03396475315093994, "learning_rate": 2.343554138881123e-06, "loss": 0.0005, "step": 130260 }, { "epoch": 2.131555264664976, "grad_norm": 0.051050715148448944, "learning_rate": 2.3427477123975773e-06, "loss": 0.0007, "step": 130270 }, { "epoch": 2.131718890616052, "grad_norm": 0.045728445053100586, "learning_rate": 2.3419413822330424e-06, "loss": 0.0005, "step": 130280 }, { "epoch": 2.1318825165671274, "grad_norm": 0.07560687512159348, "learning_rate": 2.3411351484167414e-06, "loss": 0.0016, "step": 130290 }, { "epoch": 2.1320461425182033, "grad_norm": 0.025737281888723373, "learning_rate": 2.3403290109779036e-06, "loss": 0.0009, "step": 130300 }, { "epoch": 2.1322097684692793, "grad_norm": 0.09985017776489258, "learning_rate": 2.339522969945745e-06, "loss": 0.0006, "step": 130310 }, { "epoch": 2.132373394420355, "grad_norm": 0.039045870304107666, "learning_rate": 2.3387170253494858e-06, "loss": 0.0007, "step": 130320 }, { "epoch": 2.132537020371431, "grad_norm": 0.010081917978823185, "learning_rate": 2.3379111772183366e-06, "loss": 0.0008, "step": 130330 }, { "epoch": 2.132700646322507, "grad_norm": 0.29969823360443115, "learning_rate": 2.3371054255815107e-06, "loss": 0.0016, "step": 130340 }, { "epoch": 2.1328642722735824, "grad_norm": 0.023552823811769485, "learning_rate": 2.3362997704682133e-06, "loss": 0.0012, "step": 130350 }, { "epoch": 2.1330278982246584, "grad_norm": 0.1577552706003189, "learning_rate": 2.335494211907645e-06, "loss": 0.0011, "step": 130360 }, { "epoch": 2.1331915241757344, "grad_norm": 0.1033872663974762, "learning_rate": 2.33468874992901e-06, "loss": 0.0007, "step": 130370 }, { "epoch": 2.13335515012681, "grad_norm": 0.06565041840076447, "learning_rate": 2.3338833845614995e-06, "loss": 0.0007, "step": 130380 }, { "epoch": 2.133518776077886, "grad_norm": 0.11987122148275375, "learning_rate": 2.33307811583431e-06, "loss": 0.0015, "step": 130390 }, { "epoch": 2.133682402028962, "grad_norm": 0.06925756484270096, "learning_rate": 2.332272943776628e-06, "loss": 0.0012, "step": 130400 }, { "epoch": 2.1338460279800375, "grad_norm": 0.0964903011918068, "learning_rate": 2.331467868417642e-06, "loss": 0.0009, "step": 130410 }, { "epoch": 2.1340096539311135, "grad_norm": 0.08841458708047867, "learning_rate": 2.3306628897865297e-06, "loss": 0.0011, "step": 130420 }, { "epoch": 2.1341732798821895, "grad_norm": 0.03820227086544037, "learning_rate": 2.3298580079124723e-06, "loss": 0.0006, "step": 130430 }, { "epoch": 2.134336905833265, "grad_norm": 0.006761993281543255, "learning_rate": 2.329053222824648e-06, "loss": 0.0019, "step": 130440 }, { "epoch": 2.134500531784341, "grad_norm": 0.11034632474184036, "learning_rate": 2.3282485345522215e-06, "loss": 0.0012, "step": 130450 }, { "epoch": 2.1346641577354166, "grad_norm": 0.08911069482564926, "learning_rate": 2.327443943124366e-06, "loss": 0.0013, "step": 130460 }, { "epoch": 2.1348277836864926, "grad_norm": 0.04564070701599121, "learning_rate": 2.326639448570242e-06, "loss": 0.0005, "step": 130470 }, { "epoch": 2.1349914096375686, "grad_norm": 0.023495858535170555, "learning_rate": 2.325835050919015e-06, "loss": 0.0004, "step": 130480 }, { "epoch": 2.135155035588644, "grad_norm": 0.05203722044825554, "learning_rate": 2.3250307501998385e-06, "loss": 0.0005, "step": 130490 }, { "epoch": 2.13531866153972, "grad_norm": 0.11222314834594727, "learning_rate": 2.3242265464418684e-06, "loss": 0.0007, "step": 130500 }, { "epoch": 2.135482287490796, "grad_norm": 0.08663816750049591, "learning_rate": 2.3234224396742563e-06, "loss": 0.0007, "step": 130510 }, { "epoch": 2.1356459134418717, "grad_norm": 0.1006455048918724, "learning_rate": 2.322618429926146e-06, "loss": 0.0011, "step": 130520 }, { "epoch": 2.1358095393929477, "grad_norm": 0.23484046757221222, "learning_rate": 2.3218145172266846e-06, "loss": 0.0006, "step": 130530 }, { "epoch": 2.1359731653440237, "grad_norm": 0.08672276884317398, "learning_rate": 2.32101070160501e-06, "loss": 0.0014, "step": 130540 }, { "epoch": 2.1361367912950993, "grad_norm": 0.052922364324331284, "learning_rate": 2.3202069830902567e-06, "loss": 0.0009, "step": 130550 }, { "epoch": 2.1363004172461753, "grad_norm": 0.06208193674683571, "learning_rate": 2.319403361711562e-06, "loss": 0.001, "step": 130560 }, { "epoch": 2.1364640431972513, "grad_norm": 0.13968831300735474, "learning_rate": 2.3185998374980506e-06, "loss": 0.001, "step": 130570 }, { "epoch": 2.136627669148327, "grad_norm": 0.051334232091903687, "learning_rate": 2.3177964104788527e-06, "loss": 0.001, "step": 130580 }, { "epoch": 2.136791295099403, "grad_norm": 0.008811775594949722, "learning_rate": 2.316993080683086e-06, "loss": 0.0017, "step": 130590 }, { "epoch": 2.136954921050479, "grad_norm": 0.07008931040763855, "learning_rate": 2.3161898481398735e-06, "loss": 0.0009, "step": 130600 }, { "epoch": 2.1371185470015543, "grad_norm": 0.12066005170345306, "learning_rate": 2.315386712878327e-06, "loss": 0.0009, "step": 130610 }, { "epoch": 2.1372821729526303, "grad_norm": 0.15701177716255188, "learning_rate": 2.3145836749275617e-06, "loss": 0.0012, "step": 130620 }, { "epoch": 2.1374457989037063, "grad_norm": 0.1830628365278244, "learning_rate": 2.3137807343166823e-06, "loss": 0.0011, "step": 130630 }, { "epoch": 2.137609424854782, "grad_norm": 0.09408551454544067, "learning_rate": 2.312977891074797e-06, "loss": 0.0006, "step": 130640 }, { "epoch": 2.137773050805858, "grad_norm": 0.16284410655498505, "learning_rate": 2.312175145231005e-06, "loss": 0.0013, "step": 130650 }, { "epoch": 2.137936676756934, "grad_norm": 0.06853299587965012, "learning_rate": 2.311372496814402e-06, "loss": 0.0008, "step": 130660 }, { "epoch": 2.1381003027080094, "grad_norm": 0.03175092115998268, "learning_rate": 2.310569945854086e-06, "loss": 0.0006, "step": 130670 }, { "epoch": 2.1382639286590854, "grad_norm": 0.060112301260232925, "learning_rate": 2.3097674923791435e-06, "loss": 0.0008, "step": 130680 }, { "epoch": 2.138427554610161, "grad_norm": 0.08591391891241074, "learning_rate": 2.3089651364186655e-06, "loss": 0.0009, "step": 130690 }, { "epoch": 2.138591180561237, "grad_norm": 0.1547025591135025, "learning_rate": 2.3081628780017318e-06, "loss": 0.0007, "step": 130700 }, { "epoch": 2.138754806512313, "grad_norm": 0.034328050911426544, "learning_rate": 2.3073607171574243e-06, "loss": 0.0013, "step": 130710 }, { "epoch": 2.1389184324633885, "grad_norm": 0.2480616718530655, "learning_rate": 2.3065586539148205e-06, "loss": 0.0012, "step": 130720 }, { "epoch": 2.1390820584144645, "grad_norm": 0.1626928150653839, "learning_rate": 2.305756688302991e-06, "loss": 0.0008, "step": 130730 }, { "epoch": 2.1392456843655405, "grad_norm": 0.06996849179267883, "learning_rate": 2.304954820351009e-06, "loss": 0.0017, "step": 130740 }, { "epoch": 2.139409310316616, "grad_norm": 0.05595912039279938, "learning_rate": 2.304153050087934e-06, "loss": 0.0009, "step": 130750 }, { "epoch": 2.139572936267692, "grad_norm": 0.08856242895126343, "learning_rate": 2.3033513775428333e-06, "loss": 0.0006, "step": 130760 }, { "epoch": 2.139736562218768, "grad_norm": 0.04912002757191658, "learning_rate": 2.302549802744762e-06, "loss": 0.0006, "step": 130770 }, { "epoch": 2.1399001881698436, "grad_norm": 0.046053074300289154, "learning_rate": 2.301748325722778e-06, "loss": 0.0005, "step": 130780 }, { "epoch": 2.1400638141209196, "grad_norm": 0.025236330926418304, "learning_rate": 2.3009469465059337e-06, "loss": 0.0007, "step": 130790 }, { "epoch": 2.1402274400719956, "grad_norm": 0.05236167460680008, "learning_rate": 2.3001456651232735e-06, "loss": 0.001, "step": 130800 }, { "epoch": 2.140391066023071, "grad_norm": 0.08195017278194427, "learning_rate": 2.2993444816038456e-06, "loss": 0.0005, "step": 130810 }, { "epoch": 2.140554691974147, "grad_norm": 0.06903974711894989, "learning_rate": 2.298543395976688e-06, "loss": 0.0007, "step": 130820 }, { "epoch": 2.1407183179252227, "grad_norm": 0.03243299946188927, "learning_rate": 2.297742408270841e-06, "loss": 0.0006, "step": 130830 }, { "epoch": 2.1408819438762987, "grad_norm": 0.008946661837399006, "learning_rate": 2.296941518515337e-06, "loss": 0.0004, "step": 130840 }, { "epoch": 2.1410455698273747, "grad_norm": 0.05911422148346901, "learning_rate": 2.296140726739205e-06, "loss": 0.0017, "step": 130850 }, { "epoch": 2.1412091957784503, "grad_norm": 0.030707096680998802, "learning_rate": 2.295340032971474e-06, "loss": 0.0008, "step": 130860 }, { "epoch": 2.1413728217295263, "grad_norm": 0.038526423275470734, "learning_rate": 2.2945394372411656e-06, "loss": 0.0008, "step": 130870 }, { "epoch": 2.1415364476806023, "grad_norm": 0.03181355819106102, "learning_rate": 2.2937389395773013e-06, "loss": 0.0005, "step": 130880 }, { "epoch": 2.141700073631678, "grad_norm": 0.11275369673967361, "learning_rate": 2.2929385400088944e-06, "loss": 0.0004, "step": 130890 }, { "epoch": 2.141863699582754, "grad_norm": 0.014903356321156025, "learning_rate": 2.292138238564961e-06, "loss": 0.0015, "step": 130900 }, { "epoch": 2.14202732553383, "grad_norm": 0.0010434207506477833, "learning_rate": 2.291338035274507e-06, "loss": 0.0003, "step": 130910 }, { "epoch": 2.1421909514849053, "grad_norm": 0.1800602823495865, "learning_rate": 2.2905379301665403e-06, "loss": 0.0008, "step": 130920 }, { "epoch": 2.1423545774359813, "grad_norm": 0.054574571549892426, "learning_rate": 2.2897379232700603e-06, "loss": 0.0013, "step": 130930 }, { "epoch": 2.1425182033870573, "grad_norm": 0.045944079756736755, "learning_rate": 2.288938014614068e-06, "loss": 0.0005, "step": 130940 }, { "epoch": 2.142681829338133, "grad_norm": 0.035358306020498276, "learning_rate": 2.288138204227557e-06, "loss": 0.0008, "step": 130950 }, { "epoch": 2.142845455289209, "grad_norm": 0.097017303109169, "learning_rate": 2.287338492139516e-06, "loss": 0.001, "step": 130960 }, { "epoch": 2.143009081240285, "grad_norm": 0.06005150079727173, "learning_rate": 2.2865388783789374e-06, "loss": 0.0006, "step": 130970 }, { "epoch": 2.1431727071913604, "grad_norm": 0.22261619567871094, "learning_rate": 2.2857393629748005e-06, "loss": 0.001, "step": 130980 }, { "epoch": 2.1433363331424364, "grad_norm": 0.1900072991847992, "learning_rate": 2.2849399459560883e-06, "loss": 0.0012, "step": 130990 }, { "epoch": 2.1434999590935124, "grad_norm": 0.04064079001545906, "learning_rate": 2.2841406273517795e-06, "loss": 0.0012, "step": 131000 }, { "epoch": 2.143663585044588, "grad_norm": 0.22620823979377747, "learning_rate": 2.283341407190844e-06, "loss": 0.0014, "step": 131010 }, { "epoch": 2.143827210995664, "grad_norm": 0.020436285063624382, "learning_rate": 2.282542285502255e-06, "loss": 0.0017, "step": 131020 }, { "epoch": 2.14399083694674, "grad_norm": 0.15270817279815674, "learning_rate": 2.281743262314975e-06, "loss": 0.0008, "step": 131030 }, { "epoch": 2.1441544628978155, "grad_norm": 0.24394603073596954, "learning_rate": 2.280944337657972e-06, "loss": 0.001, "step": 131040 }, { "epoch": 2.1443180888488915, "grad_norm": 0.03933184593915939, "learning_rate": 2.280145511560198e-06, "loss": 0.0006, "step": 131050 }, { "epoch": 2.144481714799967, "grad_norm": 0.02958499826490879, "learning_rate": 2.279346784050612e-06, "loss": 0.0006, "step": 131060 }, { "epoch": 2.144645340751043, "grad_norm": 0.05615805834531784, "learning_rate": 2.2785481551581683e-06, "loss": 0.0007, "step": 131070 }, { "epoch": 2.144808966702119, "grad_norm": 0.25226667523384094, "learning_rate": 2.2777496249118113e-06, "loss": 0.0007, "step": 131080 }, { "epoch": 2.1449725926531946, "grad_norm": 0.06918755918741226, "learning_rate": 2.2769511933404897e-06, "loss": 0.0008, "step": 131090 }, { "epoch": 2.1451362186042706, "grad_norm": 0.09297676384449005, "learning_rate": 2.2761528604731408e-06, "loss": 0.0015, "step": 131100 }, { "epoch": 2.1452998445553466, "grad_norm": 0.0052992356941103935, "learning_rate": 2.2753546263387054e-06, "loss": 0.0012, "step": 131110 }, { "epoch": 2.145463470506422, "grad_norm": 0.14361236989498138, "learning_rate": 2.274556490966115e-06, "loss": 0.0008, "step": 131120 }, { "epoch": 2.145627096457498, "grad_norm": 0.03398771584033966, "learning_rate": 2.2737584543843035e-06, "loss": 0.0007, "step": 131130 }, { "epoch": 2.145790722408574, "grad_norm": 0.02347593382000923, "learning_rate": 2.272960516622195e-06, "loss": 0.0013, "step": 131140 }, { "epoch": 2.1459543483596497, "grad_norm": 0.005705694667994976, "learning_rate": 2.272162677708712e-06, "loss": 0.0011, "step": 131150 }, { "epoch": 2.1461179743107257, "grad_norm": 0.07995748519897461, "learning_rate": 2.271364937672777e-06, "loss": 0.0007, "step": 131160 }, { "epoch": 2.1462816002618017, "grad_norm": 0.030589068308472633, "learning_rate": 2.270567296543304e-06, "loss": 0.0007, "step": 131170 }, { "epoch": 2.1464452262128773, "grad_norm": 0.06749261170625687, "learning_rate": 2.2697697543492076e-06, "loss": 0.0015, "step": 131180 }, { "epoch": 2.1466088521639533, "grad_norm": 0.0033417295198887587, "learning_rate": 2.2689723111193938e-06, "loss": 0.0009, "step": 131190 }, { "epoch": 2.1467724781150292, "grad_norm": 0.07669030874967575, "learning_rate": 2.2681749668827724e-06, "loss": 0.0012, "step": 131200 }, { "epoch": 2.146936104066105, "grad_norm": 0.0583362951874733, "learning_rate": 2.26737772166824e-06, "loss": 0.0007, "step": 131210 }, { "epoch": 2.147099730017181, "grad_norm": 0.06939921528100967, "learning_rate": 2.266580575504698e-06, "loss": 0.0006, "step": 131220 }, { "epoch": 2.1472633559682563, "grad_norm": 0.006901615299284458, "learning_rate": 2.2657835284210427e-06, "loss": 0.0005, "step": 131230 }, { "epoch": 2.1474269819193323, "grad_norm": 0.06196759268641472, "learning_rate": 2.2649865804461623e-06, "loss": 0.0005, "step": 131240 }, { "epoch": 2.1475906078704083, "grad_norm": 0.028714854270219803, "learning_rate": 2.2641897316089455e-06, "loss": 0.001, "step": 131250 }, { "epoch": 2.147754233821484, "grad_norm": 0.07110073417425156, "learning_rate": 2.2633929819382733e-06, "loss": 0.0007, "step": 131260 }, { "epoch": 2.14791785977256, "grad_norm": 0.05138629302382469, "learning_rate": 2.2625963314630285e-06, "loss": 0.0006, "step": 131270 }, { "epoch": 2.148081485723636, "grad_norm": 0.1584242582321167, "learning_rate": 2.2617997802120896e-06, "loss": 0.0013, "step": 131280 }, { "epoch": 2.1482451116747114, "grad_norm": 0.13005654513835907, "learning_rate": 2.261003328214325e-06, "loss": 0.0016, "step": 131290 }, { "epoch": 2.1484087376257874, "grad_norm": 0.05873752385377884, "learning_rate": 2.260206975498609e-06, "loss": 0.0007, "step": 131300 }, { "epoch": 2.1485723635768634, "grad_norm": 0.34052443504333496, "learning_rate": 2.2594107220938035e-06, "loss": 0.001, "step": 131310 }, { "epoch": 2.148735989527939, "grad_norm": 0.07647483795881271, "learning_rate": 2.258614568028774e-06, "loss": 0.0013, "step": 131320 }, { "epoch": 2.148899615479015, "grad_norm": 0.0016928694676607847, "learning_rate": 2.257818513332376e-06, "loss": 0.0006, "step": 131330 }, { "epoch": 2.149063241430091, "grad_norm": 0.15298034250736237, "learning_rate": 2.257022558033468e-06, "loss": 0.0006, "step": 131340 }, { "epoch": 2.1492268673811665, "grad_norm": 0.013507881201803684, "learning_rate": 2.2562267021609e-06, "loss": 0.0006, "step": 131350 }, { "epoch": 2.1493904933322425, "grad_norm": 0.06394945085048676, "learning_rate": 2.2554309457435174e-06, "loss": 0.0007, "step": 131360 }, { "epoch": 2.1495541192833185, "grad_norm": 0.005180387292057276, "learning_rate": 2.2546352888101685e-06, "loss": 0.0009, "step": 131370 }, { "epoch": 2.149717745234394, "grad_norm": 0.04113948345184326, "learning_rate": 2.2538397313896904e-06, "loss": 0.0008, "step": 131380 }, { "epoch": 2.14988137118547, "grad_norm": 0.013941937126219273, "learning_rate": 2.2530442735109238e-06, "loss": 0.0006, "step": 131390 }, { "epoch": 2.150044997136546, "grad_norm": 0.06706994026899338, "learning_rate": 2.252248915202698e-06, "loss": 0.0006, "step": 131400 }, { "epoch": 2.1502086230876216, "grad_norm": 0.02786465361714363, "learning_rate": 2.2514536564938464e-06, "loss": 0.001, "step": 131410 }, { "epoch": 2.1503722490386976, "grad_norm": 0.060484081506729126, "learning_rate": 2.250658497413193e-06, "loss": 0.0006, "step": 131420 }, { "epoch": 2.1505358749897736, "grad_norm": 0.08929912000894547, "learning_rate": 2.2498634379895607e-06, "loss": 0.0008, "step": 131430 }, { "epoch": 2.150699500940849, "grad_norm": 0.004630949813872576, "learning_rate": 2.2490684782517704e-06, "loss": 0.0009, "step": 131440 }, { "epoch": 2.150863126891925, "grad_norm": 0.07582361251115799, "learning_rate": 2.248273618228636e-06, "loss": 0.0014, "step": 131450 }, { "epoch": 2.1510267528430007, "grad_norm": 0.15050379931926727, "learning_rate": 2.2474788579489703e-06, "loss": 0.001, "step": 131460 }, { "epoch": 2.1511903787940767, "grad_norm": 0.1495954990386963, "learning_rate": 2.2466841974415784e-06, "loss": 0.0008, "step": 131470 }, { "epoch": 2.1513540047451527, "grad_norm": 0.0027888508047908545, "learning_rate": 2.245889636735266e-06, "loss": 0.0006, "step": 131480 }, { "epoch": 2.1515176306962283, "grad_norm": 0.09791406989097595, "learning_rate": 2.245095175858838e-06, "loss": 0.0013, "step": 131490 }, { "epoch": 2.1516812566473043, "grad_norm": 0.019554410129785538, "learning_rate": 2.244300814841086e-06, "loss": 0.0011, "step": 131500 }, { "epoch": 2.1518448825983802, "grad_norm": 0.06986770033836365, "learning_rate": 2.2435065537108086e-06, "loss": 0.0009, "step": 131510 }, { "epoch": 2.152008508549456, "grad_norm": 0.004764092620462179, "learning_rate": 2.2427123924967914e-06, "loss": 0.0012, "step": 131520 }, { "epoch": 2.152172134500532, "grad_norm": 0.00810950342565775, "learning_rate": 2.241918331227825e-06, "loss": 0.0009, "step": 131530 }, { "epoch": 2.152335760451608, "grad_norm": 0.03504367172718048, "learning_rate": 2.2411243699326877e-06, "loss": 0.0009, "step": 131540 }, { "epoch": 2.1524993864026833, "grad_norm": 0.029177607968449593, "learning_rate": 2.240330508640164e-06, "loss": 0.0007, "step": 131550 }, { "epoch": 2.1526630123537593, "grad_norm": 0.1350104957818985, "learning_rate": 2.2395367473790264e-06, "loss": 0.0007, "step": 131560 }, { "epoch": 2.1528266383048353, "grad_norm": 0.061625488102436066, "learning_rate": 2.238743086178045e-06, "loss": 0.0013, "step": 131570 }, { "epoch": 2.152990264255911, "grad_norm": 0.07101751118898392, "learning_rate": 2.2379495250659925e-06, "loss": 0.0014, "step": 131580 }, { "epoch": 2.153153890206987, "grad_norm": 0.2722167670726776, "learning_rate": 2.2371560640716296e-06, "loss": 0.0017, "step": 131590 }, { "epoch": 2.1533175161580624, "grad_norm": 0.04820204898715019, "learning_rate": 2.2363627032237203e-06, "loss": 0.0008, "step": 131600 }, { "epoch": 2.1534811421091384, "grad_norm": 0.0014235300477594137, "learning_rate": 2.2355694425510192e-06, "loss": 0.0011, "step": 131610 }, { "epoch": 2.1536447680602144, "grad_norm": 0.20849184691905975, "learning_rate": 2.2347762820822832e-06, "loss": 0.0009, "step": 131620 }, { "epoch": 2.15380839401129, "grad_norm": 0.06146630272269249, "learning_rate": 2.2339832218462597e-06, "loss": 0.0008, "step": 131630 }, { "epoch": 2.153972019962366, "grad_norm": 0.227002814412117, "learning_rate": 2.2331902618716962e-06, "loss": 0.0006, "step": 131640 }, { "epoch": 2.154135645913442, "grad_norm": 0.004443961661309004, "learning_rate": 2.2323974021873397e-06, "loss": 0.0014, "step": 131650 }, { "epoch": 2.1542992718645175, "grad_norm": 0.06790367513895035, "learning_rate": 2.2316046428219214e-06, "loss": 0.001, "step": 131660 }, { "epoch": 2.1544628978155935, "grad_norm": 0.12898868322372437, "learning_rate": 2.230811983804183e-06, "loss": 0.0006, "step": 131670 }, { "epoch": 2.1546265237666695, "grad_norm": 0.024522315710783005, "learning_rate": 2.2300194251628534e-06, "loss": 0.0004, "step": 131680 }, { "epoch": 2.154790149717745, "grad_norm": 0.01787228137254715, "learning_rate": 2.2292269669266637e-06, "loss": 0.001, "step": 131690 }, { "epoch": 2.154953775668821, "grad_norm": 0.005904348101466894, "learning_rate": 2.2284346091243353e-06, "loss": 0.0004, "step": 131700 }, { "epoch": 2.155117401619897, "grad_norm": 0.058560553938150406, "learning_rate": 2.2276423517845906e-06, "loss": 0.0007, "step": 131710 }, { "epoch": 2.1552810275709726, "grad_norm": 0.03541845828294754, "learning_rate": 2.22685019493615e-06, "loss": 0.0007, "step": 131720 }, { "epoch": 2.1554446535220486, "grad_norm": 0.35124671459198, "learning_rate": 2.226058138607722e-06, "loss": 0.0011, "step": 131730 }, { "epoch": 2.1556082794731246, "grad_norm": 0.05504801869392395, "learning_rate": 2.225266182828022e-06, "loss": 0.0009, "step": 131740 }, { "epoch": 2.1557719054242, "grad_norm": 0.05280764773488045, "learning_rate": 2.2244743276257543e-06, "loss": 0.0012, "step": 131750 }, { "epoch": 2.155935531375276, "grad_norm": 0.22598738968372345, "learning_rate": 2.2236825730296192e-06, "loss": 0.001, "step": 131760 }, { "epoch": 2.156099157326352, "grad_norm": 0.06541605293750763, "learning_rate": 2.2228909190683197e-06, "loss": 0.0006, "step": 131770 }, { "epoch": 2.1562627832774277, "grad_norm": 0.0897442102432251, "learning_rate": 2.222099365770548e-06, "loss": 0.0006, "step": 131780 }, { "epoch": 2.1564264092285037, "grad_norm": 0.08767735958099365, "learning_rate": 2.2213079131649995e-06, "loss": 0.001, "step": 131790 }, { "epoch": 2.1565900351795797, "grad_norm": 0.06079059839248657, "learning_rate": 2.220516561280359e-06, "loss": 0.0006, "step": 131800 }, { "epoch": 2.1567536611306553, "grad_norm": 0.03120972216129303, "learning_rate": 2.2197253101453155e-06, "loss": 0.0006, "step": 131810 }, { "epoch": 2.1569172870817312, "grad_norm": 0.09705932438373566, "learning_rate": 2.2189341597885445e-06, "loss": 0.0008, "step": 131820 }, { "epoch": 2.157080913032807, "grad_norm": 0.09551506489515305, "learning_rate": 2.218143110238728e-06, "loss": 0.0009, "step": 131830 }, { "epoch": 2.157244538983883, "grad_norm": 0.053732823580503464, "learning_rate": 2.2173521615245363e-06, "loss": 0.0008, "step": 131840 }, { "epoch": 2.157408164934959, "grad_norm": 0.13685588538646698, "learning_rate": 2.216561313674642e-06, "loss": 0.0006, "step": 131850 }, { "epoch": 2.1575717908860343, "grad_norm": 0.19548673927783966, "learning_rate": 2.215770566717711e-06, "loss": 0.0011, "step": 131860 }, { "epoch": 2.1577354168371103, "grad_norm": 0.014646853320300579, "learning_rate": 2.2149799206824036e-06, "loss": 0.0003, "step": 131870 }, { "epoch": 2.1578990427881863, "grad_norm": 0.1305849254131317, "learning_rate": 2.2141893755973814e-06, "loss": 0.0007, "step": 131880 }, { "epoch": 2.158062668739262, "grad_norm": 0.007354858331382275, "learning_rate": 2.213398931491297e-06, "loss": 0.0008, "step": 131890 }, { "epoch": 2.158226294690338, "grad_norm": 0.046017296612262726, "learning_rate": 2.2126085883928065e-06, "loss": 0.001, "step": 131900 }, { "epoch": 2.158389920641414, "grad_norm": 0.0065455688163638115, "learning_rate": 2.2118183463305533e-06, "loss": 0.0004, "step": 131910 }, { "epoch": 2.1585535465924894, "grad_norm": 0.19899258017539978, "learning_rate": 2.211028205333184e-06, "loss": 0.0008, "step": 131920 }, { "epoch": 2.1587171725435654, "grad_norm": 0.08297527581453323, "learning_rate": 2.21023816542934e-06, "loss": 0.0005, "step": 131930 }, { "epoch": 2.1588807984946414, "grad_norm": 0.290002703666687, "learning_rate": 2.209448226647657e-06, "loss": 0.0012, "step": 131940 }, { "epoch": 2.159044424445717, "grad_norm": 0.008366294205188751, "learning_rate": 2.208658389016772e-06, "loss": 0.0011, "step": 131950 }, { "epoch": 2.159208050396793, "grad_norm": 0.13878047466278076, "learning_rate": 2.2078686525653075e-06, "loss": 0.0012, "step": 131960 }, { "epoch": 2.159371676347869, "grad_norm": 0.48104971647262573, "learning_rate": 2.207079017321896e-06, "loss": 0.0017, "step": 131970 }, { "epoch": 2.1595353022989445, "grad_norm": 0.045549146831035614, "learning_rate": 2.2062894833151547e-06, "loss": 0.0023, "step": 131980 }, { "epoch": 2.1596989282500205, "grad_norm": 0.31518876552581787, "learning_rate": 2.2055000505737063e-06, "loss": 0.0008, "step": 131990 }, { "epoch": 2.159862554201096, "grad_norm": 0.08737169206142426, "learning_rate": 2.204710719126166e-06, "loss": 0.0007, "step": 132000 }, { "epoch": 2.160026180152172, "grad_norm": 0.05547555536031723, "learning_rate": 2.2039214890011423e-06, "loss": 0.0018, "step": 132010 }, { "epoch": 2.160189806103248, "grad_norm": 0.07971277832984924, "learning_rate": 2.203132360227246e-06, "loss": 0.0006, "step": 132020 }, { "epoch": 2.1603534320543236, "grad_norm": 0.06319615244865417, "learning_rate": 2.2023433328330774e-06, "loss": 0.001, "step": 132030 }, { "epoch": 2.1605170580053996, "grad_norm": 0.09186331927776337, "learning_rate": 2.201554406847241e-06, "loss": 0.0011, "step": 132040 }, { "epoch": 2.1606806839564756, "grad_norm": 0.15043455362319946, "learning_rate": 2.200765582298332e-06, "loss": 0.0008, "step": 132050 }, { "epoch": 2.160844309907551, "grad_norm": 0.002870648168027401, "learning_rate": 2.1999768592149407e-06, "loss": 0.0008, "step": 132060 }, { "epoch": 2.161007935858627, "grad_norm": 0.13395456969738007, "learning_rate": 2.19918823762566e-06, "loss": 0.0007, "step": 132070 }, { "epoch": 2.161171561809703, "grad_norm": 0.07483165711164474, "learning_rate": 2.198399717559073e-06, "loss": 0.0013, "step": 132080 }, { "epoch": 2.1613351877607787, "grad_norm": 0.08251338452100754, "learning_rate": 2.1976112990437644e-06, "loss": 0.0008, "step": 132090 }, { "epoch": 2.1614988137118547, "grad_norm": 0.14885979890823364, "learning_rate": 2.196822982108308e-06, "loss": 0.0012, "step": 132100 }, { "epoch": 2.1616624396629307, "grad_norm": 0.008364741690456867, "learning_rate": 2.196034766781284e-06, "loss": 0.0007, "step": 132110 }, { "epoch": 2.1618260656140063, "grad_norm": 0.03293644264340401, "learning_rate": 2.1952466530912592e-06, "loss": 0.0008, "step": 132120 }, { "epoch": 2.1619896915650822, "grad_norm": 0.04341413080692291, "learning_rate": 2.1944586410668035e-06, "loss": 0.0006, "step": 132130 }, { "epoch": 2.1621533175161582, "grad_norm": 0.16618528962135315, "learning_rate": 2.1936707307364767e-06, "loss": 0.0012, "step": 132140 }, { "epoch": 2.162316943467234, "grad_norm": 0.11989282071590424, "learning_rate": 2.1928829221288438e-06, "loss": 0.001, "step": 132150 }, { "epoch": 2.16248056941831, "grad_norm": 0.04387267306447029, "learning_rate": 2.1920952152724577e-06, "loss": 0.001, "step": 132160 }, { "epoch": 2.162644195369386, "grad_norm": 0.04715392366051674, "learning_rate": 2.1913076101958696e-06, "loss": 0.0005, "step": 132170 }, { "epoch": 2.1628078213204613, "grad_norm": 0.10019509494304657, "learning_rate": 2.1905201069276323e-06, "loss": 0.0012, "step": 132180 }, { "epoch": 2.1629714472715373, "grad_norm": 0.010975530371069908, "learning_rate": 2.189732705496287e-06, "loss": 0.0007, "step": 132190 }, { "epoch": 2.1631350732226133, "grad_norm": 0.060079388320446014, "learning_rate": 2.188945405930376e-06, "loss": 0.0015, "step": 132200 }, { "epoch": 2.163298699173689, "grad_norm": 0.070175401866436, "learning_rate": 2.188158208258441e-06, "loss": 0.0007, "step": 132210 }, { "epoch": 2.163462325124765, "grad_norm": 0.0512833297252655, "learning_rate": 2.18737111250901e-06, "loss": 0.0007, "step": 132220 }, { "epoch": 2.1636259510758404, "grad_norm": 0.04308553412556648, "learning_rate": 2.186584118710618e-06, "loss": 0.0008, "step": 132230 }, { "epoch": 2.1637895770269164, "grad_norm": 0.07017209380865097, "learning_rate": 2.1857972268917883e-06, "loss": 0.0015, "step": 132240 }, { "epoch": 2.1639532029779924, "grad_norm": 0.22650322318077087, "learning_rate": 2.1850104370810487e-06, "loss": 0.0014, "step": 132250 }, { "epoch": 2.164116828929068, "grad_norm": 0.07379472255706787, "learning_rate": 2.1842237493069116e-06, "loss": 0.0008, "step": 132260 }, { "epoch": 2.164280454880144, "grad_norm": 0.12927652895450592, "learning_rate": 2.183437163597897e-06, "loss": 0.001, "step": 132270 }, { "epoch": 2.16444408083122, "grad_norm": 0.11865617334842682, "learning_rate": 2.1826506799825165e-06, "loss": 0.0008, "step": 132280 }, { "epoch": 2.1646077067822955, "grad_norm": 0.0906696766614914, "learning_rate": 2.1818642984892756e-06, "loss": 0.0007, "step": 132290 }, { "epoch": 2.1647713327333715, "grad_norm": 0.007964309304952621, "learning_rate": 2.181078019146683e-06, "loss": 0.0005, "step": 132300 }, { "epoch": 2.1649349586844475, "grad_norm": 0.08176179975271225, "learning_rate": 2.1802918419832355e-06, "loss": 0.001, "step": 132310 }, { "epoch": 2.165098584635523, "grad_norm": 0.1684895157814026, "learning_rate": 2.179505767027433e-06, "loss": 0.0006, "step": 132320 }, { "epoch": 2.165262210586599, "grad_norm": 0.050997935235500336, "learning_rate": 2.178719794307766e-06, "loss": 0.001, "step": 132330 }, { "epoch": 2.165425836537675, "grad_norm": 0.005915137007832527, "learning_rate": 2.1779339238527274e-06, "loss": 0.0007, "step": 132340 }, { "epoch": 2.1655894624887506, "grad_norm": 0.04680750519037247, "learning_rate": 2.177148155690802e-06, "loss": 0.0009, "step": 132350 }, { "epoch": 2.1657530884398266, "grad_norm": 0.08636245131492615, "learning_rate": 2.1763624898504687e-06, "loss": 0.0011, "step": 132360 }, { "epoch": 2.165916714390902, "grad_norm": 0.03715136647224426, "learning_rate": 2.1755769263602113e-06, "loss": 0.0007, "step": 132370 }, { "epoch": 2.166080340341978, "grad_norm": 0.021826880052685738, "learning_rate": 2.1747914652484997e-06, "loss": 0.0006, "step": 132380 }, { "epoch": 2.166243966293054, "grad_norm": 0.030047036707401276, "learning_rate": 2.174006106543809e-06, "loss": 0.0016, "step": 132390 }, { "epoch": 2.1664075922441297, "grad_norm": 0.19203877449035645, "learning_rate": 2.1732208502746033e-06, "loss": 0.0007, "step": 132400 }, { "epoch": 2.1665712181952057, "grad_norm": 0.08410325646400452, "learning_rate": 2.1724356964693495e-06, "loss": 0.0008, "step": 132410 }, { "epoch": 2.1667348441462817, "grad_norm": 0.04899807274341583, "learning_rate": 2.171650645156504e-06, "loss": 0.001, "step": 132420 }, { "epoch": 2.1668984700973573, "grad_norm": 0.026054365560412407, "learning_rate": 2.1708656963645248e-06, "loss": 0.0008, "step": 132430 }, { "epoch": 2.1670620960484333, "grad_norm": 0.03904905542731285, "learning_rate": 2.170080850121866e-06, "loss": 0.0005, "step": 132440 }, { "epoch": 2.1672257219995092, "grad_norm": 0.05899961292743683, "learning_rate": 2.1692961064569757e-06, "loss": 0.0004, "step": 132450 }, { "epoch": 2.167389347950585, "grad_norm": 0.13511371612548828, "learning_rate": 2.1685114653982974e-06, "loss": 0.001, "step": 132460 }, { "epoch": 2.167552973901661, "grad_norm": 0.034165531396865845, "learning_rate": 2.1677269269742718e-06, "loss": 0.0011, "step": 132470 }, { "epoch": 2.167716599852737, "grad_norm": 0.2054072618484497, "learning_rate": 2.1669424912133376e-06, "loss": 0.0008, "step": 132480 }, { "epoch": 2.1678802258038123, "grad_norm": 0.002577691338956356, "learning_rate": 2.166158158143931e-06, "loss": 0.0006, "step": 132490 }, { "epoch": 2.1680438517548883, "grad_norm": 0.09754899144172668, "learning_rate": 2.1653739277944784e-06, "loss": 0.0006, "step": 132500 }, { "epoch": 2.1682074777059643, "grad_norm": 0.038507331162691116, "learning_rate": 2.1645898001934096e-06, "loss": 0.0007, "step": 132510 }, { "epoch": 2.16837110365704, "grad_norm": 0.08859949558973312, "learning_rate": 2.1638057753691444e-06, "loss": 0.001, "step": 132520 }, { "epoch": 2.168534729608116, "grad_norm": 0.12233471870422363, "learning_rate": 2.1630218533501052e-06, "loss": 0.0005, "step": 132530 }, { "epoch": 2.168698355559192, "grad_norm": 0.1635713428258896, "learning_rate": 2.162238034164703e-06, "loss": 0.0012, "step": 132540 }, { "epoch": 2.1688619815102674, "grad_norm": 0.0937272235751152, "learning_rate": 2.1614543178413533e-06, "loss": 0.0011, "step": 132550 }, { "epoch": 2.1690256074613434, "grad_norm": 0.1224890798330307, "learning_rate": 2.160670704408463e-06, "loss": 0.0006, "step": 132560 }, { "epoch": 2.1691892334124194, "grad_norm": 0.023271478712558746, "learning_rate": 2.159887193894433e-06, "loss": 0.0007, "step": 132570 }, { "epoch": 2.169352859363495, "grad_norm": 0.07572520524263382, "learning_rate": 2.1591037863276685e-06, "loss": 0.0006, "step": 132580 }, { "epoch": 2.169516485314571, "grad_norm": 0.13142816722393036, "learning_rate": 2.1583204817365623e-06, "loss": 0.0005, "step": 132590 }, { "epoch": 2.1696801112656465, "grad_norm": 0.09191661328077316, "learning_rate": 2.15753728014951e-06, "loss": 0.0008, "step": 132600 }, { "epoch": 2.1698437372167225, "grad_norm": 0.0891927033662796, "learning_rate": 2.1567541815948983e-06, "loss": 0.0008, "step": 132610 }, { "epoch": 2.1700073631677985, "grad_norm": 0.03349055349826813, "learning_rate": 2.155971186101115e-06, "loss": 0.0004, "step": 132620 }, { "epoch": 2.170170989118874, "grad_norm": 0.07418906688690186, "learning_rate": 2.1551882936965397e-06, "loss": 0.0017, "step": 132630 }, { "epoch": 2.17033461506995, "grad_norm": 0.2738913297653198, "learning_rate": 2.154405504409551e-06, "loss": 0.0006, "step": 132640 }, { "epoch": 2.170498241021026, "grad_norm": 0.11192761361598969, "learning_rate": 2.1536228182685276e-06, "loss": 0.0014, "step": 132650 }, { "epoch": 2.1706618669721016, "grad_norm": 0.04489920288324356, "learning_rate": 2.1528402353018325e-06, "loss": 0.0008, "step": 132660 }, { "epoch": 2.1708254929231776, "grad_norm": 0.096799835562706, "learning_rate": 2.152057755537837e-06, "loss": 0.0005, "step": 132670 }, { "epoch": 2.1709891188742536, "grad_norm": 0.060996923595666885, "learning_rate": 2.151275379004902e-06, "loss": 0.0011, "step": 132680 }, { "epoch": 2.171152744825329, "grad_norm": 0.05241464450955391, "learning_rate": 2.150493105731387e-06, "loss": 0.0004, "step": 132690 }, { "epoch": 2.171316370776405, "grad_norm": 0.0061174314469099045, "learning_rate": 2.14971093574565e-06, "loss": 0.0009, "step": 132700 }, { "epoch": 2.171479996727481, "grad_norm": 0.03174373134970665, "learning_rate": 2.14892886907604e-06, "loss": 0.0006, "step": 132710 }, { "epoch": 2.1716436226785567, "grad_norm": 0.06321625411510468, "learning_rate": 2.1481469057509074e-06, "loss": 0.0008, "step": 132720 }, { "epoch": 2.1718072486296327, "grad_norm": 0.0803573802113533, "learning_rate": 2.147365045798594e-06, "loss": 0.0007, "step": 132730 }, { "epoch": 2.1719708745807087, "grad_norm": 0.12570713460445404, "learning_rate": 2.146583289247443e-06, "loss": 0.001, "step": 132740 }, { "epoch": 2.1721345005317843, "grad_norm": 0.03365262970328331, "learning_rate": 2.1458016361257898e-06, "loss": 0.0011, "step": 132750 }, { "epoch": 2.1722981264828602, "grad_norm": 0.16729222238063812, "learning_rate": 2.145020086461966e-06, "loss": 0.0009, "step": 132760 }, { "epoch": 2.172461752433936, "grad_norm": 0.10984192788600922, "learning_rate": 2.1442386402843034e-06, "loss": 0.0006, "step": 132770 }, { "epoch": 2.172625378385012, "grad_norm": 0.012926003895699978, "learning_rate": 2.143457297621125e-06, "loss": 0.0008, "step": 132780 }, { "epoch": 2.172789004336088, "grad_norm": 0.042420223355293274, "learning_rate": 2.1426760585007557e-06, "loss": 0.0008, "step": 132790 }, { "epoch": 2.1729526302871633, "grad_norm": 0.20108425617218018, "learning_rate": 2.14189492295151e-06, "loss": 0.0011, "step": 132800 }, { "epoch": 2.1731162562382393, "grad_norm": 0.12637944519519806, "learning_rate": 2.141113891001706e-06, "loss": 0.0009, "step": 132810 }, { "epoch": 2.1732798821893153, "grad_norm": 0.010441829450428486, "learning_rate": 2.1403329626796503e-06, "loss": 0.0004, "step": 132820 }, { "epoch": 2.173443508140391, "grad_norm": 0.06600575149059296, "learning_rate": 2.1395521380136542e-06, "loss": 0.0013, "step": 132830 }, { "epoch": 2.173607134091467, "grad_norm": 0.13038602471351624, "learning_rate": 2.1387714170320154e-06, "loss": 0.0011, "step": 132840 }, { "epoch": 2.173770760042543, "grad_norm": 0.053065553307533264, "learning_rate": 2.1379907997630385e-06, "loss": 0.0007, "step": 132850 }, { "epoch": 2.1739343859936184, "grad_norm": 0.03505024313926697, "learning_rate": 2.1372102862350164e-06, "loss": 0.0005, "step": 132860 }, { "epoch": 2.1740980119446944, "grad_norm": 0.030295664444565773, "learning_rate": 2.1364298764762387e-06, "loss": 0.0003, "step": 132870 }, { "epoch": 2.1742616378957704, "grad_norm": 0.026615168899297714, "learning_rate": 2.1356495705149977e-06, "loss": 0.0004, "step": 132880 }, { "epoch": 2.174425263846846, "grad_norm": 0.08507993817329407, "learning_rate": 2.1348693683795736e-06, "loss": 0.0008, "step": 132890 }, { "epoch": 2.174588889797922, "grad_norm": 0.04860775172710419, "learning_rate": 2.13408927009825e-06, "loss": 0.0005, "step": 132900 }, { "epoch": 2.174752515748998, "grad_norm": 0.02069712057709694, "learning_rate": 2.1333092756993013e-06, "loss": 0.0007, "step": 132910 }, { "epoch": 2.1749161417000735, "grad_norm": 0.040829841047525406, "learning_rate": 2.132529385211001e-06, "loss": 0.0007, "step": 132920 }, { "epoch": 2.1750797676511495, "grad_norm": 0.0293737780302763, "learning_rate": 2.1317495986616204e-06, "loss": 0.0009, "step": 132930 }, { "epoch": 2.1752433936022255, "grad_norm": 0.03710537403821945, "learning_rate": 2.1309699160794218e-06, "loss": 0.0012, "step": 132940 }, { "epoch": 2.175407019553301, "grad_norm": 0.05738138034939766, "learning_rate": 2.130190337492672e-06, "loss": 0.0013, "step": 132950 }, { "epoch": 2.175570645504377, "grad_norm": 0.14531312882900238, "learning_rate": 2.1294108629296205e-06, "loss": 0.001, "step": 132960 }, { "epoch": 2.175734271455453, "grad_norm": 0.054931506514549255, "learning_rate": 2.128631492418527e-06, "loss": 0.0016, "step": 132970 }, { "epoch": 2.1758978974065286, "grad_norm": 0.11332526057958603, "learning_rate": 2.1278522259876423e-06, "loss": 0.0007, "step": 132980 }, { "epoch": 2.1760615233576046, "grad_norm": 0.04289693385362625, "learning_rate": 2.1270730636652094e-06, "loss": 0.0006, "step": 132990 }, { "epoch": 2.17622514930868, "grad_norm": 0.06438548117876053, "learning_rate": 2.1262940054794745e-06, "loss": 0.0006, "step": 133000 }, { "epoch": 2.176388775259756, "grad_norm": 0.005645431112498045, "learning_rate": 2.1255150514586737e-06, "loss": 0.0011, "step": 133010 }, { "epoch": 2.176552401210832, "grad_norm": 0.04957915470004082, "learning_rate": 2.124736201631045e-06, "loss": 0.0005, "step": 133020 }, { "epoch": 2.1767160271619077, "grad_norm": 0.08441191911697388, "learning_rate": 2.1239574560248174e-06, "loss": 0.0006, "step": 133030 }, { "epoch": 2.1768796531129837, "grad_norm": 0.018327437341213226, "learning_rate": 2.1231788146682208e-06, "loss": 0.0011, "step": 133040 }, { "epoch": 2.1770432790640597, "grad_norm": 0.006724250037223101, "learning_rate": 2.1224002775894784e-06, "loss": 0.0006, "step": 133050 }, { "epoch": 2.1772069050151353, "grad_norm": 0.028384385630488396, "learning_rate": 2.121621844816808e-06, "loss": 0.0009, "step": 133060 }, { "epoch": 2.1773705309662112, "grad_norm": 0.17846648395061493, "learning_rate": 2.1208435163784293e-06, "loss": 0.0007, "step": 133070 }, { "epoch": 2.1775341569172872, "grad_norm": 0.004224427510052919, "learning_rate": 2.120065292302552e-06, "loss": 0.0009, "step": 133080 }, { "epoch": 2.177697782868363, "grad_norm": 0.03458670154213905, "learning_rate": 2.119287172617387e-06, "loss": 0.0011, "step": 133090 }, { "epoch": 2.177861408819439, "grad_norm": 0.1660071760416031, "learning_rate": 2.118509157351137e-06, "loss": 0.0005, "step": 133100 }, { "epoch": 2.178025034770515, "grad_norm": 0.11826536059379578, "learning_rate": 2.117731246532006e-06, "loss": 0.0005, "step": 133110 }, { "epoch": 2.1781886607215903, "grad_norm": 0.05635539069771767, "learning_rate": 2.1169534401881886e-06, "loss": 0.0013, "step": 133120 }, { "epoch": 2.1783522866726663, "grad_norm": 0.1616489589214325, "learning_rate": 2.11617573834788e-06, "loss": 0.001, "step": 133130 }, { "epoch": 2.178515912623742, "grad_norm": 0.07777858525514603, "learning_rate": 2.115398141039271e-06, "loss": 0.0007, "step": 133140 }, { "epoch": 2.178679538574818, "grad_norm": 0.055220961570739746, "learning_rate": 2.1146206482905467e-06, "loss": 0.0011, "step": 133150 }, { "epoch": 2.178843164525894, "grad_norm": 0.10929786413908005, "learning_rate": 2.1138432601298893e-06, "loss": 0.0013, "step": 133160 }, { "epoch": 2.1790067904769694, "grad_norm": 0.051200803369283676, "learning_rate": 2.113065976585475e-06, "loss": 0.0009, "step": 133170 }, { "epoch": 2.1791704164280454, "grad_norm": 0.14246031641960144, "learning_rate": 2.112288797685482e-06, "loss": 0.0013, "step": 133180 }, { "epoch": 2.1793340423791214, "grad_norm": 0.49097469449043274, "learning_rate": 2.1115117234580783e-06, "loss": 0.001, "step": 133190 }, { "epoch": 2.179497668330197, "grad_norm": 0.0888969674706459, "learning_rate": 2.1107347539314323e-06, "loss": 0.0009, "step": 133200 }, { "epoch": 2.179661294281273, "grad_norm": 0.017145998775959015, "learning_rate": 2.109957889133709e-06, "loss": 0.001, "step": 133210 }, { "epoch": 2.179824920232349, "grad_norm": 0.12172749638557434, "learning_rate": 2.109181129093064e-06, "loss": 0.0045, "step": 133220 }, { "epoch": 2.1799885461834245, "grad_norm": 0.03835159167647362, "learning_rate": 2.1084044738376576e-06, "loss": 0.0007, "step": 133230 }, { "epoch": 2.1801521721345005, "grad_norm": 0.15464426577091217, "learning_rate": 2.107627923395637e-06, "loss": 0.0006, "step": 133240 }, { "epoch": 2.1803157980855765, "grad_norm": 0.00905674323439598, "learning_rate": 2.106851477795155e-06, "loss": 0.0012, "step": 133250 }, { "epoch": 2.180479424036652, "grad_norm": 0.1142902597784996, "learning_rate": 2.106075137064353e-06, "loss": 0.0008, "step": 133260 }, { "epoch": 2.180643049987728, "grad_norm": 0.04101067781448364, "learning_rate": 2.10529890123137e-06, "loss": 0.0014, "step": 133270 }, { "epoch": 2.180806675938804, "grad_norm": 0.002335965633392334, "learning_rate": 2.1045227703243464e-06, "loss": 0.0012, "step": 133280 }, { "epoch": 2.1809703018898796, "grad_norm": 0.11783561110496521, "learning_rate": 2.1037467443714117e-06, "loss": 0.0012, "step": 133290 }, { "epoch": 2.1811339278409556, "grad_norm": 0.1293431669473648, "learning_rate": 2.102970823400698e-06, "loss": 0.0006, "step": 133300 }, { "epoch": 2.1812975537920316, "grad_norm": 0.15829302370548248, "learning_rate": 2.1021950074403273e-06, "loss": 0.0008, "step": 133310 }, { "epoch": 2.181461179743107, "grad_norm": 0.0488809309899807, "learning_rate": 2.1014192965184253e-06, "loss": 0.0007, "step": 133320 }, { "epoch": 2.181624805694183, "grad_norm": 0.013442493975162506, "learning_rate": 2.100643690663105e-06, "loss": 0.0026, "step": 133330 }, { "epoch": 2.181788431645259, "grad_norm": 0.03188556432723999, "learning_rate": 2.099868189902484e-06, "loss": 0.001, "step": 133340 }, { "epoch": 2.1819520575963347, "grad_norm": 0.05730525776743889, "learning_rate": 2.099092794264672e-06, "loss": 0.0008, "step": 133350 }, { "epoch": 2.1821156835474107, "grad_norm": 0.17060551047325134, "learning_rate": 2.098317503777771e-06, "loss": 0.0016, "step": 133360 }, { "epoch": 2.1822793094984863, "grad_norm": 0.14788545668125153, "learning_rate": 2.097542318469889e-06, "loss": 0.0012, "step": 133370 }, { "epoch": 2.1824429354495622, "grad_norm": 0.03542299196124077, "learning_rate": 2.09676723836912e-06, "loss": 0.0006, "step": 133380 }, { "epoch": 2.1826065614006382, "grad_norm": 0.19183960556983948, "learning_rate": 2.0959922635035623e-06, "loss": 0.0013, "step": 133390 }, { "epoch": 2.182770187351714, "grad_norm": 0.03962555155158043, "learning_rate": 2.095217393901304e-06, "loss": 0.0023, "step": 133400 }, { "epoch": 2.18293381330279, "grad_norm": 0.0568387545645237, "learning_rate": 2.0944426295904342e-06, "loss": 0.0009, "step": 133410 }, { "epoch": 2.183097439253866, "grad_norm": 0.005415895022451878, "learning_rate": 2.093667970599037e-06, "loss": 0.0005, "step": 133420 }, { "epoch": 2.1832610652049413, "grad_norm": 0.07867998629808426, "learning_rate": 2.092893416955189e-06, "loss": 0.001, "step": 133430 }, { "epoch": 2.1834246911560173, "grad_norm": 0.06450201570987701, "learning_rate": 2.092118968686969e-06, "loss": 0.0007, "step": 133440 }, { "epoch": 2.1835883171070933, "grad_norm": 0.1559048295021057, "learning_rate": 2.0913446258224463e-06, "loss": 0.0011, "step": 133450 }, { "epoch": 2.183751943058169, "grad_norm": 0.12856632471084595, "learning_rate": 2.0905703883896937e-06, "loss": 0.0011, "step": 133460 }, { "epoch": 2.183915569009245, "grad_norm": 0.07965055853128433, "learning_rate": 2.089796256416768e-06, "loss": 0.0008, "step": 133470 }, { "epoch": 2.184079194960321, "grad_norm": 0.08476488292217255, "learning_rate": 2.0890222299317336e-06, "loss": 0.0009, "step": 133480 }, { "epoch": 2.1842428209113964, "grad_norm": 0.014255587011575699, "learning_rate": 2.088248308962649e-06, "loss": 0.0005, "step": 133490 }, { "epoch": 2.1844064468624724, "grad_norm": 0.14668671786785126, "learning_rate": 2.0874744935375627e-06, "loss": 0.0009, "step": 133500 }, { "epoch": 2.1845700728135484, "grad_norm": 0.15126439929008484, "learning_rate": 2.0867007836845282e-06, "loss": 0.001, "step": 133510 }, { "epoch": 2.184733698764624, "grad_norm": 0.06401389092206955, "learning_rate": 2.085927179431587e-06, "loss": 0.0008, "step": 133520 }, { "epoch": 2.1848973247157, "grad_norm": 0.06215459480881691, "learning_rate": 2.0851536808067824e-06, "loss": 0.0004, "step": 133530 }, { "epoch": 2.1850609506667755, "grad_norm": 0.0100501524284482, "learning_rate": 2.08438028783815e-06, "loss": 0.0008, "step": 133540 }, { "epoch": 2.1852245766178515, "grad_norm": 0.015297452919185162, "learning_rate": 2.0836070005537264e-06, "loss": 0.0008, "step": 133550 }, { "epoch": 2.1853882025689275, "grad_norm": 0.10489283502101898, "learning_rate": 2.08283381898154e-06, "loss": 0.0014, "step": 133560 }, { "epoch": 2.185551828520003, "grad_norm": 0.12617242336273193, "learning_rate": 2.0820607431496143e-06, "loss": 0.0009, "step": 133570 }, { "epoch": 2.185715454471079, "grad_norm": 0.061616938561201096, "learning_rate": 2.0812877730859754e-06, "loss": 0.0007, "step": 133580 }, { "epoch": 2.185879080422155, "grad_norm": 0.05792645737528801, "learning_rate": 2.0805149088186375e-06, "loss": 0.0006, "step": 133590 }, { "epoch": 2.1860427063732306, "grad_norm": 0.37530988454818726, "learning_rate": 2.0797421503756197e-06, "loss": 0.0011, "step": 133600 }, { "epoch": 2.1862063323243066, "grad_norm": 0.0029510220047086477, "learning_rate": 2.078969497784929e-06, "loss": 0.0012, "step": 133610 }, { "epoch": 2.1863699582753826, "grad_norm": 0.09248299896717072, "learning_rate": 2.078196951074575e-06, "loss": 0.0009, "step": 133620 }, { "epoch": 2.186533584226458, "grad_norm": 0.05774078145623207, "learning_rate": 2.0774245102725565e-06, "loss": 0.0003, "step": 133630 }, { "epoch": 2.186697210177534, "grad_norm": 0.0465075820684433, "learning_rate": 2.076652175406876e-06, "loss": 0.001, "step": 133640 }, { "epoch": 2.18686083612861, "grad_norm": 0.06741433590650558, "learning_rate": 2.07587994650553e-06, "loss": 0.0009, "step": 133650 }, { "epoch": 2.1870244620796857, "grad_norm": 0.004792368970811367, "learning_rate": 2.0751078235965076e-06, "loss": 0.0006, "step": 133660 }, { "epoch": 2.1871880880307617, "grad_norm": 0.1143391877412796, "learning_rate": 2.0743358067077967e-06, "loss": 0.001, "step": 133670 }, { "epoch": 2.1873517139818377, "grad_norm": 0.02038666047155857, "learning_rate": 2.07356389586738e-06, "loss": 0.0006, "step": 133680 }, { "epoch": 2.1875153399329132, "grad_norm": 0.29164034128189087, "learning_rate": 2.0727920911032377e-06, "loss": 0.0008, "step": 133690 }, { "epoch": 2.1876789658839892, "grad_norm": 0.016913043335080147, "learning_rate": 2.072020392443349e-06, "loss": 0.0008, "step": 133700 }, { "epoch": 2.1878425918350652, "grad_norm": 0.05663401260972023, "learning_rate": 2.0712487999156823e-06, "loss": 0.0008, "step": 133710 }, { "epoch": 2.188006217786141, "grad_norm": 0.13354496657848358, "learning_rate": 2.070477313548209e-06, "loss": 0.0005, "step": 133720 }, { "epoch": 2.188169843737217, "grad_norm": 0.09139169752597809, "learning_rate": 2.0697059333688906e-06, "loss": 0.0006, "step": 133730 }, { "epoch": 2.188333469688293, "grad_norm": 0.18714956939220428, "learning_rate": 2.0689346594056914e-06, "loss": 0.0017, "step": 133740 }, { "epoch": 2.1884970956393683, "grad_norm": 0.14113494753837585, "learning_rate": 2.068163491686564e-06, "loss": 0.0011, "step": 133750 }, { "epoch": 2.1886607215904443, "grad_norm": 0.017769034951925278, "learning_rate": 2.067392430239465e-06, "loss": 0.0007, "step": 133760 }, { "epoch": 2.18882434754152, "grad_norm": 0.08506893366575241, "learning_rate": 2.0666214750923426e-06, "loss": 0.0009, "step": 133770 }, { "epoch": 2.188987973492596, "grad_norm": 0.10716760903596878, "learning_rate": 2.065850626273139e-06, "loss": 0.0008, "step": 133780 }, { "epoch": 2.189151599443672, "grad_norm": 0.005107826087623835, "learning_rate": 2.0650798838098007e-06, "loss": 0.0004, "step": 133790 }, { "epoch": 2.1893152253947474, "grad_norm": 0.09522493928670883, "learning_rate": 2.0643092477302606e-06, "loss": 0.0005, "step": 133800 }, { "epoch": 2.1894788513458234, "grad_norm": 0.09006105363368988, "learning_rate": 2.0635387180624567e-06, "loss": 0.002, "step": 133810 }, { "epoch": 2.1896424772968994, "grad_norm": 0.04400699958205223, "learning_rate": 2.0627682948343147e-06, "loss": 0.0004, "step": 133820 }, { "epoch": 2.189806103247975, "grad_norm": 0.11281992495059967, "learning_rate": 2.0619979780737643e-06, "loss": 0.0008, "step": 133830 }, { "epoch": 2.189969729199051, "grad_norm": 0.04164297506213188, "learning_rate": 2.061227767808724e-06, "loss": 0.0006, "step": 133840 }, { "epoch": 2.190133355150127, "grad_norm": 0.05682015046477318, "learning_rate": 2.0604576640671136e-06, "loss": 0.0013, "step": 133850 }, { "epoch": 2.1902969811012025, "grad_norm": 0.03694860637187958, "learning_rate": 2.059687666876852e-06, "loss": 0.0007, "step": 133860 }, { "epoch": 2.1904606070522785, "grad_norm": 0.08465483039617538, "learning_rate": 2.0589177762658417e-06, "loss": 0.0008, "step": 133870 }, { "epoch": 2.1906242330033545, "grad_norm": 0.05325772613286972, "learning_rate": 2.0581479922619956e-06, "loss": 0.0014, "step": 133880 }, { "epoch": 2.19078785895443, "grad_norm": 0.04683837294578552, "learning_rate": 2.0573783148932126e-06, "loss": 0.0012, "step": 133890 }, { "epoch": 2.190951484905506, "grad_norm": 0.05582217872142792, "learning_rate": 2.0566087441873924e-06, "loss": 0.0006, "step": 133900 }, { "epoch": 2.191115110856582, "grad_norm": 0.02832895517349243, "learning_rate": 2.055839280172433e-06, "loss": 0.0006, "step": 133910 }, { "epoch": 2.1912787368076576, "grad_norm": 0.050031501799821854, "learning_rate": 2.055069922876222e-06, "loss": 0.0006, "step": 133920 }, { "epoch": 2.1914423627587336, "grad_norm": 0.1772473156452179, "learning_rate": 2.05430067232665e-06, "loss": 0.0012, "step": 133930 }, { "epoch": 2.191605988709809, "grad_norm": 0.009561151266098022, "learning_rate": 2.053531528551598e-06, "loss": 0.0005, "step": 133940 }, { "epoch": 2.191769614660885, "grad_norm": 0.060883939266204834, "learning_rate": 2.0527624915789467e-06, "loss": 0.0018, "step": 133950 }, { "epoch": 2.191933240611961, "grad_norm": 0.04925726354122162, "learning_rate": 2.051993561436573e-06, "loss": 0.0008, "step": 133960 }, { "epoch": 2.1920968665630367, "grad_norm": 0.07513914257287979, "learning_rate": 2.051224738152344e-06, "loss": 0.0006, "step": 133970 }, { "epoch": 2.1922604925141127, "grad_norm": 0.14928783476352692, "learning_rate": 2.0504560217541343e-06, "loss": 0.0008, "step": 133980 }, { "epoch": 2.1924241184651887, "grad_norm": 0.028079692274332047, "learning_rate": 2.0496874122698017e-06, "loss": 0.0007, "step": 133990 }, { "epoch": 2.1925877444162643, "grad_norm": 0.0803908258676529, "learning_rate": 2.0489189097272124e-06, "loss": 0.0008, "step": 134000 }, { "epoch": 2.1927513703673402, "grad_norm": 0.04568302631378174, "learning_rate": 2.0481505141542176e-06, "loss": 0.0004, "step": 134010 }, { "epoch": 2.1929149963184162, "grad_norm": 0.057581063359975815, "learning_rate": 2.0473822255786733e-06, "loss": 0.0009, "step": 134020 }, { "epoch": 2.193078622269492, "grad_norm": 0.035008154809474945, "learning_rate": 2.046614044028426e-06, "loss": 0.0005, "step": 134030 }, { "epoch": 2.193242248220568, "grad_norm": 0.04905608296394348, "learning_rate": 2.045845969531323e-06, "loss": 0.0011, "step": 134040 }, { "epoch": 2.193405874171644, "grad_norm": 0.0941646546125412, "learning_rate": 2.0450780021152017e-06, "loss": 0.0014, "step": 134050 }, { "epoch": 2.1935695001227193, "grad_norm": 0.07459456473588943, "learning_rate": 2.0443101418079025e-06, "loss": 0.0005, "step": 134060 }, { "epoch": 2.1937331260737953, "grad_norm": 0.00786400306969881, "learning_rate": 2.043542388637257e-06, "loss": 0.0009, "step": 134070 }, { "epoch": 2.1938967520248713, "grad_norm": 0.34217965602874756, "learning_rate": 2.042774742631092e-06, "loss": 0.0017, "step": 134080 }, { "epoch": 2.194060377975947, "grad_norm": 0.12149641662836075, "learning_rate": 2.0420072038172374e-06, "loss": 0.0016, "step": 134090 }, { "epoch": 2.194224003927023, "grad_norm": 0.04716979339718819, "learning_rate": 2.04123977222351e-06, "loss": 0.0005, "step": 134100 }, { "epoch": 2.194387629878099, "grad_norm": 0.02315519005060196, "learning_rate": 2.0404724478777314e-06, "loss": 0.0007, "step": 134110 }, { "epoch": 2.1945512558291744, "grad_norm": 0.05076749250292778, "learning_rate": 2.039705230807712e-06, "loss": 0.0007, "step": 134120 }, { "epoch": 2.1947148817802504, "grad_norm": 0.05808860436081886, "learning_rate": 2.0389381210412634e-06, "loss": 0.0007, "step": 134130 }, { "epoch": 2.1948785077313264, "grad_norm": 0.03759097680449486, "learning_rate": 2.0381711186061925e-06, "loss": 0.0009, "step": 134140 }, { "epoch": 2.195042133682402, "grad_norm": 0.0022665902506560087, "learning_rate": 2.037404223530299e-06, "loss": 0.0018, "step": 134150 }, { "epoch": 2.195205759633478, "grad_norm": 0.06863267719745636, "learning_rate": 2.036637435841385e-06, "loss": 0.0013, "step": 134160 }, { "epoch": 2.1953693855845535, "grad_norm": 0.16734172403812408, "learning_rate": 2.035870755567238e-06, "loss": 0.0007, "step": 134170 }, { "epoch": 2.1955330115356295, "grad_norm": 0.06679178029298782, "learning_rate": 2.0351041827356523e-06, "loss": 0.0008, "step": 134180 }, { "epoch": 2.1956966374867055, "grad_norm": 0.10849496722221375, "learning_rate": 2.034337717374415e-06, "loss": 0.0006, "step": 134190 }, { "epoch": 2.195860263437781, "grad_norm": 0.05884971469640732, "learning_rate": 2.0335713595113066e-06, "loss": 0.0006, "step": 134200 }, { "epoch": 2.196023889388857, "grad_norm": 0.1716867834329605, "learning_rate": 2.0328051091741085e-06, "loss": 0.0008, "step": 134210 }, { "epoch": 2.196187515339933, "grad_norm": 0.0182280782610178, "learning_rate": 2.032038966390592e-06, "loss": 0.0004, "step": 134220 }, { "epoch": 2.1963511412910086, "grad_norm": 0.07228348404169083, "learning_rate": 2.0312729311885314e-06, "loss": 0.0018, "step": 134230 }, { "epoch": 2.1965147672420846, "grad_norm": 0.02363823726773262, "learning_rate": 2.0305070035956903e-06, "loss": 0.0002, "step": 134240 }, { "epoch": 2.1966783931931606, "grad_norm": 0.10083484649658203, "learning_rate": 2.029741183639835e-06, "loss": 0.001, "step": 134250 }, { "epoch": 2.196842019144236, "grad_norm": 0.07068591564893723, "learning_rate": 2.0289754713487227e-06, "loss": 0.002, "step": 134260 }, { "epoch": 2.197005645095312, "grad_norm": 0.026403365656733513, "learning_rate": 2.0282098667501073e-06, "loss": 0.0007, "step": 134270 }, { "epoch": 2.197169271046388, "grad_norm": 0.10789033025503159, "learning_rate": 2.027444369871743e-06, "loss": 0.0007, "step": 134280 }, { "epoch": 2.1973328969974637, "grad_norm": 0.06284467875957489, "learning_rate": 2.026678980741375e-06, "loss": 0.0006, "step": 134290 }, { "epoch": 2.1974965229485397, "grad_norm": 0.032083287835121155, "learning_rate": 2.0259136993867495e-06, "loss": 0.0006, "step": 134300 }, { "epoch": 2.1976601488996153, "grad_norm": 0.08283887803554535, "learning_rate": 2.0251485258356023e-06, "loss": 0.0008, "step": 134310 }, { "epoch": 2.1978237748506912, "grad_norm": 0.005516393110156059, "learning_rate": 2.0243834601156727e-06, "loss": 0.0003, "step": 134320 }, { "epoch": 2.1979874008017672, "grad_norm": 0.07858219742774963, "learning_rate": 2.0236185022546897e-06, "loss": 0.0008, "step": 134330 }, { "epoch": 2.198151026752843, "grad_norm": 0.09823130816221237, "learning_rate": 2.022853652280382e-06, "loss": 0.001, "step": 134340 }, { "epoch": 2.198314652703919, "grad_norm": 0.036159295588731766, "learning_rate": 2.022088910220476e-06, "loss": 0.0009, "step": 134350 }, { "epoch": 2.198478278654995, "grad_norm": 0.06728903949260712, "learning_rate": 2.0213242761026896e-06, "loss": 0.0011, "step": 134360 }, { "epoch": 2.1986419046060703, "grad_norm": 0.12728987634181976, "learning_rate": 2.0205597499547393e-06, "loss": 0.0008, "step": 134370 }, { "epoch": 2.1988055305571463, "grad_norm": 0.028947558254003525, "learning_rate": 2.0197953318043355e-06, "loss": 0.0007, "step": 134380 }, { "epoch": 2.1989691565082223, "grad_norm": 0.04294012114405632, "learning_rate": 2.01903102167919e-06, "loss": 0.001, "step": 134390 }, { "epoch": 2.199132782459298, "grad_norm": 0.28890275955200195, "learning_rate": 2.018266819607003e-06, "loss": 0.0011, "step": 134400 }, { "epoch": 2.199296408410374, "grad_norm": 0.02271469682455063, "learning_rate": 2.0175027256154785e-06, "loss": 0.0004, "step": 134410 }, { "epoch": 2.19946003436145, "grad_norm": 0.10078608244657516, "learning_rate": 2.016738739732313e-06, "loss": 0.001, "step": 134420 }, { "epoch": 2.1996236603125254, "grad_norm": 0.04654580354690552, "learning_rate": 2.015974861985196e-06, "loss": 0.001, "step": 134430 }, { "epoch": 2.1997872862636014, "grad_norm": 0.0663909912109375, "learning_rate": 2.0152110924018204e-06, "loss": 0.0005, "step": 134440 }, { "epoch": 2.1999509122146774, "grad_norm": 0.07165167480707169, "learning_rate": 2.014447431009868e-06, "loss": 0.0006, "step": 134450 }, { "epoch": 2.200114538165753, "grad_norm": 0.08370284736156464, "learning_rate": 2.013683877837021e-06, "loss": 0.0009, "step": 134460 }, { "epoch": 2.200278164116829, "grad_norm": 0.1733408421278, "learning_rate": 2.012920432910957e-06, "loss": 0.0008, "step": 134470 }, { "epoch": 2.200441790067905, "grad_norm": 0.07748666405677795, "learning_rate": 2.0121570962593456e-06, "loss": 0.0004, "step": 134480 }, { "epoch": 2.2006054160189805, "grad_norm": 0.08153659850358963, "learning_rate": 2.011393867909861e-06, "loss": 0.001, "step": 134490 }, { "epoch": 2.2007690419700565, "grad_norm": 0.0690116211771965, "learning_rate": 2.010630747890163e-06, "loss": 0.0013, "step": 134500 }, { "epoch": 2.2009326679211325, "grad_norm": 0.07460810989141464, "learning_rate": 2.0098677362279182e-06, "loss": 0.0007, "step": 134510 }, { "epoch": 2.201096293872208, "grad_norm": 0.043203577399253845, "learning_rate": 2.0091048329507794e-06, "loss": 0.0008, "step": 134520 }, { "epoch": 2.201259919823284, "grad_norm": 0.0757097378373146, "learning_rate": 2.008342038086404e-06, "loss": 0.0005, "step": 134530 }, { "epoch": 2.2014235457743596, "grad_norm": 0.01177325751632452, "learning_rate": 2.007579351662437e-06, "loss": 0.001, "step": 134540 }, { "epoch": 2.2015871717254356, "grad_norm": 0.012817207723855972, "learning_rate": 2.0068167737065286e-06, "loss": 0.0006, "step": 134550 }, { "epoch": 2.2017507976765116, "grad_norm": 0.15084563195705414, "learning_rate": 2.006054304246318e-06, "loss": 0.0008, "step": 134560 }, { "epoch": 2.201914423627587, "grad_norm": 0.09428867697715759, "learning_rate": 2.0052919433094412e-06, "loss": 0.0005, "step": 134570 }, { "epoch": 2.202078049578663, "grad_norm": 0.02012704871594906, "learning_rate": 2.004529690923536e-06, "loss": 0.0005, "step": 134580 }, { "epoch": 2.202241675529739, "grad_norm": 0.10638763755559921, "learning_rate": 2.003767547116229e-06, "loss": 0.0009, "step": 134590 }, { "epoch": 2.2024053014808147, "grad_norm": 0.013622605241835117, "learning_rate": 2.0030055119151474e-06, "loss": 0.0015, "step": 134600 }, { "epoch": 2.2025689274318907, "grad_norm": 0.08915100991725922, "learning_rate": 2.002243585347912e-06, "loss": 0.0007, "step": 134610 }, { "epoch": 2.2027325533829667, "grad_norm": 0.08205676823854446, "learning_rate": 2.0014817674421417e-06, "loss": 0.0009, "step": 134620 }, { "epoch": 2.2028961793340422, "grad_norm": 0.08206752687692642, "learning_rate": 2.000720058225452e-06, "loss": 0.001, "step": 134630 }, { "epoch": 2.2030598052851182, "grad_norm": 0.0041562942788004875, "learning_rate": 1.9999584577254506e-06, "loss": 0.0009, "step": 134640 }, { "epoch": 2.2032234312361942, "grad_norm": 0.020313045009970665, "learning_rate": 1.9991969659697457e-06, "loss": 0.0014, "step": 134650 }, { "epoch": 2.20338705718727, "grad_norm": 0.04158085957169533, "learning_rate": 1.998435582985939e-06, "loss": 0.0006, "step": 134660 }, { "epoch": 2.203550683138346, "grad_norm": 0.13851560652256012, "learning_rate": 1.9976743088016264e-06, "loss": 0.0021, "step": 134670 }, { "epoch": 2.203714309089422, "grad_norm": 0.03624722734093666, "learning_rate": 1.9969131434444062e-06, "loss": 0.0006, "step": 134680 }, { "epoch": 2.2038779350404973, "grad_norm": 0.09091798961162567, "learning_rate": 1.9961520869418643e-06, "loss": 0.0008, "step": 134690 }, { "epoch": 2.2040415609915733, "grad_norm": 0.014389803633093834, "learning_rate": 1.9953911393215923e-06, "loss": 0.0017, "step": 134700 }, { "epoch": 2.204205186942649, "grad_norm": 0.031772494316101074, "learning_rate": 1.994630300611168e-06, "loss": 0.001, "step": 134710 }, { "epoch": 2.204368812893725, "grad_norm": 0.11197585612535477, "learning_rate": 1.993869570838174e-06, "loss": 0.001, "step": 134720 }, { "epoch": 2.204532438844801, "grad_norm": 0.05739670991897583, "learning_rate": 1.9931089500301807e-06, "loss": 0.0006, "step": 134730 }, { "epoch": 2.2046960647958764, "grad_norm": 0.03058839589357376, "learning_rate": 1.9923484382147632e-06, "loss": 0.0007, "step": 134740 }, { "epoch": 2.2048596907469524, "grad_norm": 0.03984062373638153, "learning_rate": 1.991588035419485e-06, "loss": 0.0009, "step": 134750 }, { "epoch": 2.2050233166980284, "grad_norm": 0.1270948052406311, "learning_rate": 1.9908277416719112e-06, "loss": 0.0009, "step": 134760 }, { "epoch": 2.205186942649104, "grad_norm": 0.04314705356955528, "learning_rate": 1.990067556999599e-06, "loss": 0.0003, "step": 134770 }, { "epoch": 2.20535056860018, "grad_norm": 0.03575983643531799, "learning_rate": 1.9893074814301027e-06, "loss": 0.0005, "step": 134780 }, { "epoch": 2.205514194551256, "grad_norm": 0.05156363919377327, "learning_rate": 1.9885475149909755e-06, "loss": 0.0009, "step": 134790 }, { "epoch": 2.2056778205023315, "grad_norm": 0.009673473425209522, "learning_rate": 1.9877876577097614e-06, "loss": 0.0011, "step": 134800 }, { "epoch": 2.2058414464534075, "grad_norm": 0.05272473767399788, "learning_rate": 1.9870279096140067e-06, "loss": 0.0007, "step": 134810 }, { "epoch": 2.2060050724044835, "grad_norm": 0.05631604418158531, "learning_rate": 1.9862682707312468e-06, "loss": 0.0008, "step": 134820 }, { "epoch": 2.206168698355559, "grad_norm": 0.09839834272861481, "learning_rate": 1.985508741089021e-06, "loss": 0.0008, "step": 134830 }, { "epoch": 2.206332324306635, "grad_norm": 0.06707464158535004, "learning_rate": 1.984749320714856e-06, "loss": 0.0006, "step": 134840 }, { "epoch": 2.206495950257711, "grad_norm": 0.02126784808933735, "learning_rate": 1.983990009636282e-06, "loss": 0.0011, "step": 134850 }, { "epoch": 2.2066595762087866, "grad_norm": 0.034013696014881134, "learning_rate": 1.9832308078808236e-06, "loss": 0.0007, "step": 134860 }, { "epoch": 2.2068232021598626, "grad_norm": 0.005292203743010759, "learning_rate": 1.9824717154759953e-06, "loss": 0.0009, "step": 134870 }, { "epoch": 2.2069868281109386, "grad_norm": 0.14281445741653442, "learning_rate": 1.9817127324493163e-06, "loss": 0.0018, "step": 134880 }, { "epoch": 2.207150454062014, "grad_norm": 0.0043332139030098915, "learning_rate": 1.980953858828295e-06, "loss": 0.0003, "step": 134890 }, { "epoch": 2.20731408001309, "grad_norm": 0.005699648056179285, "learning_rate": 1.98019509464044e-06, "loss": 0.0008, "step": 134900 }, { "epoch": 2.207477705964166, "grad_norm": 0.05777039751410484, "learning_rate": 1.9794364399132563e-06, "loss": 0.0007, "step": 134910 }, { "epoch": 2.2076413319152417, "grad_norm": 0.047504715621471405, "learning_rate": 1.9786778946742406e-06, "loss": 0.0008, "step": 134920 }, { "epoch": 2.2078049578663177, "grad_norm": 0.32164159417152405, "learning_rate": 1.9779194589508917e-06, "loss": 0.0008, "step": 134930 }, { "epoch": 2.2079685838173932, "grad_norm": 0.15752612054347992, "learning_rate": 1.977161132770696e-06, "loss": 0.0007, "step": 134940 }, { "epoch": 2.2081322097684692, "grad_norm": 0.026226697489619255, "learning_rate": 1.976402916161147e-06, "loss": 0.0009, "step": 134950 }, { "epoch": 2.2082958357195452, "grad_norm": 0.05336420238018036, "learning_rate": 1.975644809149725e-06, "loss": 0.0011, "step": 134960 }, { "epoch": 2.208459461670621, "grad_norm": 0.0060340347699820995, "learning_rate": 1.974886811763908e-06, "loss": 0.0039, "step": 134970 }, { "epoch": 2.208623087621697, "grad_norm": 0.03583291172981262, "learning_rate": 1.9741289240311757e-06, "loss": 0.0005, "step": 134980 }, { "epoch": 2.208786713572773, "grad_norm": 0.0591684952378273, "learning_rate": 1.973371145978995e-06, "loss": 0.0008, "step": 134990 }, { "epoch": 2.2089503395238483, "grad_norm": 0.02983226254582405, "learning_rate": 1.9726134776348377e-06, "loss": 0.0003, "step": 135000 }, { "epoch": 2.2091139654749243, "grad_norm": 0.08417952805757523, "learning_rate": 1.971855919026164e-06, "loss": 0.0008, "step": 135010 }, { "epoch": 2.2092775914260003, "grad_norm": 0.06742607057094574, "learning_rate": 1.971098470180437e-06, "loss": 0.0004, "step": 135020 }, { "epoch": 2.209441217377076, "grad_norm": 0.01580294594168663, "learning_rate": 1.970341131125109e-06, "loss": 0.001, "step": 135030 }, { "epoch": 2.209604843328152, "grad_norm": 0.01390895526856184, "learning_rate": 1.9695839018876356e-06, "loss": 0.0005, "step": 135040 }, { "epoch": 2.209768469279228, "grad_norm": 0.22131311893463135, "learning_rate": 1.9688267824954597e-06, "loss": 0.0004, "step": 135050 }, { "epoch": 2.2099320952303034, "grad_norm": 0.07881221920251846, "learning_rate": 1.96806977297603e-06, "loss": 0.0008, "step": 135060 }, { "epoch": 2.2100957211813794, "grad_norm": 0.010887800715863705, "learning_rate": 1.967312873356784e-06, "loss": 0.0003, "step": 135070 }, { "epoch": 2.210259347132455, "grad_norm": 0.0820031687617302, "learning_rate": 1.966556083665156e-06, "loss": 0.0006, "step": 135080 }, { "epoch": 2.210422973083531, "grad_norm": 0.21759140491485596, "learning_rate": 1.9657994039285806e-06, "loss": 0.0008, "step": 135090 }, { "epoch": 2.210586599034607, "grad_norm": 0.12937645614147186, "learning_rate": 1.9650428341744824e-06, "loss": 0.0016, "step": 135100 }, { "epoch": 2.2107502249856825, "grad_norm": 0.038025375455617905, "learning_rate": 1.9642863744302875e-06, "loss": 0.0008, "step": 135110 }, { "epoch": 2.2109138509367585, "grad_norm": 0.000715558766387403, "learning_rate": 1.963530024723417e-06, "loss": 0.001, "step": 135120 }, { "epoch": 2.2110774768878345, "grad_norm": 0.003580195363610983, "learning_rate": 1.962773785081283e-06, "loss": 0.0008, "step": 135130 }, { "epoch": 2.21124110283891, "grad_norm": 0.11361295729875565, "learning_rate": 1.962017655531302e-06, "loss": 0.0004, "step": 135140 }, { "epoch": 2.211404728789986, "grad_norm": 0.10403428971767426, "learning_rate": 1.9612616361008773e-06, "loss": 0.001, "step": 135150 }, { "epoch": 2.211568354741062, "grad_norm": 0.11772803217172623, "learning_rate": 1.960505726817418e-06, "loss": 0.0007, "step": 135160 }, { "epoch": 2.2117319806921376, "grad_norm": 0.02060648612678051, "learning_rate": 1.959749927708318e-06, "loss": 0.0005, "step": 135170 }, { "epoch": 2.2118956066432136, "grad_norm": 0.037243980914354324, "learning_rate": 1.958994238800976e-06, "loss": 0.0015, "step": 135180 }, { "epoch": 2.2120592325942896, "grad_norm": 0.058939263224601746, "learning_rate": 1.9582386601227853e-06, "loss": 0.0009, "step": 135190 }, { "epoch": 2.212222858545365, "grad_norm": 0.0410596989095211, "learning_rate": 1.957483191701131e-06, "loss": 0.0006, "step": 135200 }, { "epoch": 2.212386484496441, "grad_norm": 0.08171757310628891, "learning_rate": 1.9567278335634e-06, "loss": 0.0005, "step": 135210 }, { "epoch": 2.212550110447517, "grad_norm": 0.1396220326423645, "learning_rate": 1.9559725857369695e-06, "loss": 0.0012, "step": 135220 }, { "epoch": 2.2127137363985927, "grad_norm": 0.10604510456323624, "learning_rate": 1.955217448249218e-06, "loss": 0.0006, "step": 135230 }, { "epoch": 2.2128773623496687, "grad_norm": 0.0802365094423294, "learning_rate": 1.954462421127514e-06, "loss": 0.0011, "step": 135240 }, { "epoch": 2.2130409883007447, "grad_norm": 0.0205355454236269, "learning_rate": 1.9537075043992296e-06, "loss": 0.0007, "step": 135250 }, { "epoch": 2.2132046142518202, "grad_norm": 0.0017985167214646935, "learning_rate": 1.952952698091727e-06, "loss": 0.0008, "step": 135260 }, { "epoch": 2.2133682402028962, "grad_norm": 0.07845429331064224, "learning_rate": 1.9521980022323635e-06, "loss": 0.0006, "step": 135270 }, { "epoch": 2.2135318661539722, "grad_norm": 0.04080011323094368, "learning_rate": 1.951443416848499e-06, "loss": 0.0008, "step": 135280 }, { "epoch": 2.213695492105048, "grad_norm": 0.007538751699030399, "learning_rate": 1.9506889419674817e-06, "loss": 0.0008, "step": 135290 }, { "epoch": 2.213859118056124, "grad_norm": 0.06548520177602768, "learning_rate": 1.9499345776166634e-06, "loss": 0.0007, "step": 135300 }, { "epoch": 2.2140227440071993, "grad_norm": 0.177206352353096, "learning_rate": 1.949180323823385e-06, "loss": 0.001, "step": 135310 }, { "epoch": 2.2141863699582753, "grad_norm": 0.004906008020043373, "learning_rate": 1.948426180614988e-06, "loss": 0.0014, "step": 135320 }, { "epoch": 2.2143499959093513, "grad_norm": 0.5798944234848022, "learning_rate": 1.947672148018806e-06, "loss": 0.0015, "step": 135330 }, { "epoch": 2.214513621860427, "grad_norm": 0.031253550201654434, "learning_rate": 1.9469182260621733e-06, "loss": 0.0025, "step": 135340 }, { "epoch": 2.214677247811503, "grad_norm": 0.14731977880001068, "learning_rate": 1.9461644147724186e-06, "loss": 0.0023, "step": 135350 }, { "epoch": 2.214840873762579, "grad_norm": 0.05601713806390762, "learning_rate": 1.9454107141768636e-06, "loss": 0.001, "step": 135360 }, { "epoch": 2.2150044997136544, "grad_norm": 0.04318203032016754, "learning_rate": 1.9446571243028288e-06, "loss": 0.0006, "step": 135370 }, { "epoch": 2.2151681256647304, "grad_norm": 0.05038510262966156, "learning_rate": 1.943903645177629e-06, "loss": 0.0008, "step": 135380 }, { "epoch": 2.2153317516158064, "grad_norm": 0.23002773523330688, "learning_rate": 1.943150276828576e-06, "loss": 0.0011, "step": 135390 }, { "epoch": 2.215495377566882, "grad_norm": 0.13838393986225128, "learning_rate": 1.9423970192829807e-06, "loss": 0.0007, "step": 135400 }, { "epoch": 2.215659003517958, "grad_norm": 0.03591505065560341, "learning_rate": 1.941643872568143e-06, "loss": 0.0004, "step": 135410 }, { "epoch": 2.215822629469034, "grad_norm": 0.02029172144830227, "learning_rate": 1.9408908367113655e-06, "loss": 0.001, "step": 135420 }, { "epoch": 2.2159862554201095, "grad_norm": 0.08574935793876648, "learning_rate": 1.9401379117399416e-06, "loss": 0.0004, "step": 135430 }, { "epoch": 2.2161498813711855, "grad_norm": 0.03263247385621071, "learning_rate": 1.939385097681166e-06, "loss": 0.0006, "step": 135440 }, { "epoch": 2.2163135073222615, "grad_norm": 0.2616935968399048, "learning_rate": 1.9386323945623227e-06, "loss": 0.0014, "step": 135450 }, { "epoch": 2.216477133273337, "grad_norm": 0.010783706791698933, "learning_rate": 1.937879802410699e-06, "loss": 0.0025, "step": 135460 }, { "epoch": 2.216640759224413, "grad_norm": 0.15725213289260864, "learning_rate": 1.937127321253573e-06, "loss": 0.001, "step": 135470 }, { "epoch": 2.2168043851754886, "grad_norm": 0.31683480739593506, "learning_rate": 1.9363749511182184e-06, "loss": 0.0006, "step": 135480 }, { "epoch": 2.2169680111265646, "grad_norm": 0.053549326956272125, "learning_rate": 1.9356226920319106e-06, "loss": 0.0011, "step": 135490 }, { "epoch": 2.2171316370776406, "grad_norm": 0.10388994216918945, "learning_rate": 1.934870544021913e-06, "loss": 0.0011, "step": 135500 }, { "epoch": 2.217295263028716, "grad_norm": 0.08462473750114441, "learning_rate": 1.9341185071154935e-06, "loss": 0.0013, "step": 135510 }, { "epoch": 2.217458888979792, "grad_norm": 0.004310683347284794, "learning_rate": 1.9333665813399073e-06, "loss": 0.0007, "step": 135520 }, { "epoch": 2.217622514930868, "grad_norm": 0.03336222469806671, "learning_rate": 1.9326147667224136e-06, "loss": 0.001, "step": 135530 }, { "epoch": 2.2177861408819437, "grad_norm": 0.0442003533244133, "learning_rate": 1.931863063290261e-06, "loss": 0.0004, "step": 135540 }, { "epoch": 2.2179497668330197, "grad_norm": 0.14304393529891968, "learning_rate": 1.931111471070698e-06, "loss": 0.0013, "step": 135550 }, { "epoch": 2.2181133927840957, "grad_norm": 0.12799952924251556, "learning_rate": 1.93035999009097e-06, "loss": 0.0007, "step": 135560 }, { "epoch": 2.2182770187351712, "grad_norm": 0.06458614766597748, "learning_rate": 1.9296086203783155e-06, "loss": 0.0003, "step": 135570 }, { "epoch": 2.2184406446862472, "grad_norm": 0.03923512250185013, "learning_rate": 1.9288573619599684e-06, "loss": 0.001, "step": 135580 }, { "epoch": 2.2186042706373232, "grad_norm": 0.012834126129746437, "learning_rate": 1.928106214863159e-06, "loss": 0.001, "step": 135590 }, { "epoch": 2.218767896588399, "grad_norm": 0.07307049632072449, "learning_rate": 1.927355179115118e-06, "loss": 0.0016, "step": 135600 }, { "epoch": 2.218931522539475, "grad_norm": 0.06783021241426468, "learning_rate": 1.9266042547430646e-06, "loss": 0.0009, "step": 135610 }, { "epoch": 2.219095148490551, "grad_norm": 0.04411931708455086, "learning_rate": 1.9258534417742208e-06, "loss": 0.0008, "step": 135620 }, { "epoch": 2.2192587744416263, "grad_norm": 0.12660111486911774, "learning_rate": 1.925102740235803e-06, "loss": 0.0009, "step": 135630 }, { "epoch": 2.2194224003927023, "grad_norm": 0.03758559376001358, "learning_rate": 1.9243521501550185e-06, "loss": 0.0008, "step": 135640 }, { "epoch": 2.2195860263437783, "grad_norm": 0.048410918563604355, "learning_rate": 1.923601671559078e-06, "loss": 0.0006, "step": 135650 }, { "epoch": 2.219749652294854, "grad_norm": 0.11889099329710007, "learning_rate": 1.922851304475182e-06, "loss": 0.001, "step": 135660 }, { "epoch": 2.21991327824593, "grad_norm": 0.17633835971355438, "learning_rate": 1.9221010489305315e-06, "loss": 0.0008, "step": 135670 }, { "epoch": 2.220076904197006, "grad_norm": 0.030358219519257545, "learning_rate": 1.921350904952321e-06, "loss": 0.0007, "step": 135680 }, { "epoch": 2.2202405301480814, "grad_norm": 0.04289516061544418, "learning_rate": 1.9206008725677383e-06, "loss": 0.0007, "step": 135690 }, { "epoch": 2.2204041560991574, "grad_norm": 0.08639387041330338, "learning_rate": 1.9198509518039754e-06, "loss": 0.0005, "step": 135700 }, { "epoch": 2.220567782050233, "grad_norm": 0.005752048455178738, "learning_rate": 1.9191011426882105e-06, "loss": 0.0006, "step": 135710 }, { "epoch": 2.220731408001309, "grad_norm": 0.04405929520726204, "learning_rate": 1.918351445247626e-06, "loss": 0.0052, "step": 135720 }, { "epoch": 2.220895033952385, "grad_norm": 0.1026475802063942, "learning_rate": 1.917601859509394e-06, "loss": 0.0008, "step": 135730 }, { "epoch": 2.2210586599034605, "grad_norm": 0.01991293951869011, "learning_rate": 1.916852385500687e-06, "loss": 0.0026, "step": 135740 }, { "epoch": 2.2212222858545365, "grad_norm": 0.04852480813860893, "learning_rate": 1.916103023248669e-06, "loss": 0.002, "step": 135750 }, { "epoch": 2.2213859118056125, "grad_norm": 0.03531540557742119, "learning_rate": 1.9153537727805073e-06, "loss": 0.0012, "step": 135760 }, { "epoch": 2.221549537756688, "grad_norm": 0.12029346823692322, "learning_rate": 1.914604634123357e-06, "loss": 0.0015, "step": 135770 }, { "epoch": 2.221713163707764, "grad_norm": 0.05186917632818222, "learning_rate": 1.913855607304372e-06, "loss": 0.0013, "step": 135780 }, { "epoch": 2.22187678965884, "grad_norm": 0.14210397005081177, "learning_rate": 1.9131066923507048e-06, "loss": 0.001, "step": 135790 }, { "epoch": 2.2220404156099156, "grad_norm": 0.0436009056866169, "learning_rate": 1.9123578892894995e-06, "loss": 0.0005, "step": 135800 }, { "epoch": 2.2222040415609916, "grad_norm": 0.006005428731441498, "learning_rate": 1.9116091981479014e-06, "loss": 0.0007, "step": 135810 }, { "epoch": 2.2223676675120676, "grad_norm": 0.04222996160387993, "learning_rate": 1.9108606189530453e-06, "loss": 0.0005, "step": 135820 }, { "epoch": 2.222531293463143, "grad_norm": 0.02853001467883587, "learning_rate": 1.9101121517320676e-06, "loss": 0.001, "step": 135830 }, { "epoch": 2.222694919414219, "grad_norm": 0.027031652629375458, "learning_rate": 1.9093637965120994e-06, "loss": 0.0006, "step": 135840 }, { "epoch": 2.2228585453652947, "grad_norm": 0.04568779841065407, "learning_rate": 1.908615553320264e-06, "loss": 0.0007, "step": 135850 }, { "epoch": 2.2230221713163707, "grad_norm": 0.02065465785562992, "learning_rate": 1.9078674221836865e-06, "loss": 0.001, "step": 135860 }, { "epoch": 2.2231857972674467, "grad_norm": 0.13224564492702484, "learning_rate": 1.9071194031294826e-06, "loss": 0.001, "step": 135870 }, { "epoch": 2.2233494232185222, "grad_norm": 0.12640483677387238, "learning_rate": 1.9063714961847656e-06, "loss": 0.0011, "step": 135880 }, { "epoch": 2.2235130491695982, "grad_norm": 0.060399483889341354, "learning_rate": 1.9056237013766481e-06, "loss": 0.0006, "step": 135890 }, { "epoch": 2.2236766751206742, "grad_norm": 0.005786554887890816, "learning_rate": 1.9048760187322329e-06, "loss": 0.001, "step": 135900 }, { "epoch": 2.22384030107175, "grad_norm": 0.05425728112459183, "learning_rate": 1.9041284482786243e-06, "loss": 0.001, "step": 135910 }, { "epoch": 2.224003927022826, "grad_norm": 0.06975378096103668, "learning_rate": 1.9033809900429174e-06, "loss": 0.002, "step": 135920 }, { "epoch": 2.224167552973902, "grad_norm": 0.1109786257147789, "learning_rate": 1.9026336440522086e-06, "loss": 0.0009, "step": 135930 }, { "epoch": 2.2243311789249773, "grad_norm": 0.3379153609275818, "learning_rate": 1.9018864103335844e-06, "loss": 0.0015, "step": 135940 }, { "epoch": 2.2244948048760533, "grad_norm": 0.01581559143960476, "learning_rate": 1.9011392889141333e-06, "loss": 0.0003, "step": 135950 }, { "epoch": 2.2246584308271293, "grad_norm": 0.17589594423770905, "learning_rate": 1.9003922798209334e-06, "loss": 0.0019, "step": 135960 }, { "epoch": 2.224822056778205, "grad_norm": 0.10207583755254745, "learning_rate": 1.8996453830810653e-06, "loss": 0.0007, "step": 135970 }, { "epoch": 2.224985682729281, "grad_norm": 0.09917905181646347, "learning_rate": 1.8988985987216002e-06, "loss": 0.0007, "step": 135980 }, { "epoch": 2.225149308680357, "grad_norm": 0.31906333565711975, "learning_rate": 1.8981519267696063e-06, "loss": 0.0011, "step": 135990 }, { "epoch": 2.2253129346314324, "grad_norm": 0.06105310842394829, "learning_rate": 1.8974053672521515e-06, "loss": 0.0015, "step": 136000 }, { "epoch": 2.2254765605825084, "grad_norm": 0.10157790780067444, "learning_rate": 1.8966589201962938e-06, "loss": 0.0011, "step": 136010 }, { "epoch": 2.2256401865335844, "grad_norm": 0.05836651846766472, "learning_rate": 1.8959125856290928e-06, "loss": 0.001, "step": 136020 }, { "epoch": 2.22580381248466, "grad_norm": 0.06173566356301308, "learning_rate": 1.8951663635775985e-06, "loss": 0.0005, "step": 136030 }, { "epoch": 2.225967438435736, "grad_norm": 0.040976330637931824, "learning_rate": 1.8944202540688634e-06, "loss": 0.0006, "step": 136040 }, { "epoch": 2.226131064386812, "grad_norm": 0.09951294958591461, "learning_rate": 1.8936742571299282e-06, "loss": 0.001, "step": 136050 }, { "epoch": 2.2262946903378875, "grad_norm": 0.04871763661503792, "learning_rate": 1.892928372787835e-06, "loss": 0.0009, "step": 136060 }, { "epoch": 2.2264583162889635, "grad_norm": 0.19348010420799255, "learning_rate": 1.8921826010696242e-06, "loss": 0.001, "step": 136070 }, { "epoch": 2.226621942240039, "grad_norm": 0.09974869340658188, "learning_rate": 1.8914369420023215e-06, "loss": 0.0011, "step": 136080 }, { "epoch": 2.226785568191115, "grad_norm": 0.21305035054683685, "learning_rate": 1.8906913956129596e-06, "loss": 0.0011, "step": 136090 }, { "epoch": 2.226949194142191, "grad_norm": 0.0825585201382637, "learning_rate": 1.8899459619285604e-06, "loss": 0.001, "step": 136100 }, { "epoch": 2.2271128200932666, "grad_norm": 0.02103789523243904, "learning_rate": 1.889200640976145e-06, "loss": 0.0007, "step": 136110 }, { "epoch": 2.2272764460443426, "grad_norm": 0.09959770739078522, "learning_rate": 1.8884554327827314e-06, "loss": 0.0009, "step": 136120 }, { "epoch": 2.2274400719954186, "grad_norm": 0.07241784036159515, "learning_rate": 1.887710337375328e-06, "loss": 0.001, "step": 136130 }, { "epoch": 2.227603697946494, "grad_norm": 0.06889087706804276, "learning_rate": 1.8869653547809468e-06, "loss": 0.0016, "step": 136140 }, { "epoch": 2.22776732389757, "grad_norm": 0.03271825984120369, "learning_rate": 1.886220485026588e-06, "loss": 0.001, "step": 136150 }, { "epoch": 2.227930949848646, "grad_norm": 0.18051211535930634, "learning_rate": 1.8854757281392544e-06, "loss": 0.0013, "step": 136160 }, { "epoch": 2.2280945757997217, "grad_norm": 0.032112330198287964, "learning_rate": 1.8847310841459398e-06, "loss": 0.0007, "step": 136170 }, { "epoch": 2.2282582017507977, "grad_norm": 0.04270172864198685, "learning_rate": 1.8839865530736346e-06, "loss": 0.001, "step": 136180 }, { "epoch": 2.2284218277018737, "grad_norm": 0.11945780366659164, "learning_rate": 1.883242134949329e-06, "loss": 0.0009, "step": 136190 }, { "epoch": 2.2285854536529492, "grad_norm": 0.0691685602068901, "learning_rate": 1.882497829800004e-06, "loss": 0.0003, "step": 136200 }, { "epoch": 2.2287490796040252, "grad_norm": 0.003541781799867749, "learning_rate": 1.8817536376526412e-06, "loss": 0.0007, "step": 136210 }, { "epoch": 2.2289127055551012, "grad_norm": 0.04141651839017868, "learning_rate": 1.8810095585342131e-06, "loss": 0.0034, "step": 136220 }, { "epoch": 2.229076331506177, "grad_norm": 0.09079122543334961, "learning_rate": 1.880265592471694e-06, "loss": 0.0018, "step": 136230 }, { "epoch": 2.229239957457253, "grad_norm": 0.05387713387608528, "learning_rate": 1.8795217394920467e-06, "loss": 0.0006, "step": 136240 }, { "epoch": 2.2294035834083283, "grad_norm": 0.09211508929729462, "learning_rate": 1.878777999622239e-06, "loss": 0.001, "step": 136250 }, { "epoch": 2.2295672093594043, "grad_norm": 0.07431850582361221, "learning_rate": 1.8780343728892247e-06, "loss": 0.0007, "step": 136260 }, { "epoch": 2.2297308353104803, "grad_norm": 0.033514849841594696, "learning_rate": 1.8772908593199635e-06, "loss": 0.001, "step": 136270 }, { "epoch": 2.229894461261556, "grad_norm": 0.13017304241657257, "learning_rate": 1.876547458941403e-06, "loss": 0.0006, "step": 136280 }, { "epoch": 2.230058087212632, "grad_norm": 0.07834970951080322, "learning_rate": 1.8758041717804882e-06, "loss": 0.001, "step": 136290 }, { "epoch": 2.230221713163708, "grad_norm": 0.007600969169288874, "learning_rate": 1.8750609978641647e-06, "loss": 0.0005, "step": 136300 }, { "epoch": 2.2303853391147834, "grad_norm": 0.13221855461597443, "learning_rate": 1.8743179372193688e-06, "loss": 0.0008, "step": 136310 }, { "epoch": 2.2305489650658594, "grad_norm": 0.21034346520900726, "learning_rate": 1.8735749898730343e-06, "loss": 0.0006, "step": 136320 }, { "epoch": 2.2307125910169354, "grad_norm": 0.012454049661755562, "learning_rate": 1.8728321558520946e-06, "loss": 0.0009, "step": 136330 }, { "epoch": 2.230876216968011, "grad_norm": 0.10688856989145279, "learning_rate": 1.8720894351834713e-06, "loss": 0.0007, "step": 136340 }, { "epoch": 2.231039842919087, "grad_norm": 0.22139160335063934, "learning_rate": 1.87134682789409e-06, "loss": 0.0015, "step": 136350 }, { "epoch": 2.231203468870163, "grad_norm": 0.04291486367583275, "learning_rate": 1.8706043340108649e-06, "loss": 0.0013, "step": 136360 }, { "epoch": 2.2313670948212385, "grad_norm": 0.2205728441476822, "learning_rate": 1.8698619535607149e-06, "loss": 0.0008, "step": 136370 }, { "epoch": 2.2315307207723145, "grad_norm": 0.027403313666582108, "learning_rate": 1.869119686570543e-06, "loss": 0.0008, "step": 136380 }, { "epoch": 2.2316943467233905, "grad_norm": 0.15601393580436707, "learning_rate": 1.868377533067257e-06, "loss": 0.0006, "step": 136390 }, { "epoch": 2.231857972674466, "grad_norm": 0.04582241177558899, "learning_rate": 1.8676354930777606e-06, "loss": 0.0005, "step": 136400 }, { "epoch": 2.232021598625542, "grad_norm": 0.044447582215070724, "learning_rate": 1.8668935666289472e-06, "loss": 0.0009, "step": 136410 }, { "epoch": 2.232185224576618, "grad_norm": 0.07942578941583633, "learning_rate": 1.8661517537477141e-06, "loss": 0.0011, "step": 136420 }, { "epoch": 2.2323488505276936, "grad_norm": 0.0710284560918808, "learning_rate": 1.865410054460946e-06, "loss": 0.0005, "step": 136430 }, { "epoch": 2.2325124764787696, "grad_norm": 0.09684465825557709, "learning_rate": 1.8646684687955313e-06, "loss": 0.0005, "step": 136440 }, { "epoch": 2.2326761024298456, "grad_norm": 0.024918200448155403, "learning_rate": 1.8639269967783468e-06, "loss": 0.0012, "step": 136450 }, { "epoch": 2.232839728380921, "grad_norm": 0.2589452564716339, "learning_rate": 1.8631856384362734e-06, "loss": 0.0012, "step": 136460 }, { "epoch": 2.233003354331997, "grad_norm": 0.05960176885128021, "learning_rate": 1.8624443937961817e-06, "loss": 0.0007, "step": 136470 }, { "epoch": 2.2331669802830727, "grad_norm": 0.03820612281560898, "learning_rate": 1.861703262884938e-06, "loss": 0.0011, "step": 136480 }, { "epoch": 2.2333306062341487, "grad_norm": 0.048957861959934235, "learning_rate": 1.8609622457294101e-06, "loss": 0.0007, "step": 136490 }, { "epoch": 2.2334942321852247, "grad_norm": 0.1657371073961258, "learning_rate": 1.8602213423564547e-06, "loss": 0.001, "step": 136500 }, { "epoch": 2.2336578581363002, "grad_norm": 0.030870649963617325, "learning_rate": 1.8594805527929305e-06, "loss": 0.001, "step": 136510 }, { "epoch": 2.2338214840873762, "grad_norm": 0.10805943608283997, "learning_rate": 1.8587398770656868e-06, "loss": 0.0006, "step": 136520 }, { "epoch": 2.2339851100384522, "grad_norm": 0.0062743667513132095, "learning_rate": 1.8579993152015746e-06, "loss": 0.0004, "step": 136530 }, { "epoch": 2.234148735989528, "grad_norm": 0.0220036618411541, "learning_rate": 1.8572588672274338e-06, "loss": 0.0003, "step": 136540 }, { "epoch": 2.234312361940604, "grad_norm": 0.19887731969356537, "learning_rate": 1.8565185331701057e-06, "loss": 0.0006, "step": 136550 }, { "epoch": 2.23447598789168, "grad_norm": 0.039455074816942215, "learning_rate": 1.8557783130564277e-06, "loss": 0.0012, "step": 136560 }, { "epoch": 2.2346396138427553, "grad_norm": 0.015283849090337753, "learning_rate": 1.8550382069132283e-06, "loss": 0.0005, "step": 136570 }, { "epoch": 2.2348032397938313, "grad_norm": 0.004910776391625404, "learning_rate": 1.8542982147673355e-06, "loss": 0.0004, "step": 136580 }, { "epoch": 2.2349668657449073, "grad_norm": 0.007749320473521948, "learning_rate": 1.85355833664557e-06, "loss": 0.0014, "step": 136590 }, { "epoch": 2.235130491695983, "grad_norm": 0.00988097582012415, "learning_rate": 1.8528185725747528e-06, "loss": 0.0013, "step": 136600 }, { "epoch": 2.235294117647059, "grad_norm": 0.11493630707263947, "learning_rate": 1.8520789225816999e-06, "loss": 0.0006, "step": 136610 }, { "epoch": 2.235457743598135, "grad_norm": 0.04362785443663597, "learning_rate": 1.8513393866932184e-06, "loss": 0.0007, "step": 136620 }, { "epoch": 2.2356213695492104, "grad_norm": 0.05324506387114525, "learning_rate": 1.8505999649361184e-06, "loss": 0.0012, "step": 136630 }, { "epoch": 2.2357849955002864, "grad_norm": 0.011651734821498394, "learning_rate": 1.8498606573371985e-06, "loss": 0.0007, "step": 136640 }, { "epoch": 2.235948621451362, "grad_norm": 0.047895416617393494, "learning_rate": 1.8491214639232602e-06, "loss": 0.001, "step": 136650 }, { "epoch": 2.236112247402438, "grad_norm": 0.018472813069820404, "learning_rate": 1.8483823847210946e-06, "loss": 0.0007, "step": 136660 }, { "epoch": 2.236275873353514, "grad_norm": 0.040261320769786835, "learning_rate": 1.8476434197574943e-06, "loss": 0.0005, "step": 136670 }, { "epoch": 2.2364394993045895, "grad_norm": 0.006628134753555059, "learning_rate": 1.8469045690592436e-06, "loss": 0.0005, "step": 136680 }, { "epoch": 2.2366031252556655, "grad_norm": 0.03279632329940796, "learning_rate": 1.8461658326531218e-06, "loss": 0.001, "step": 136690 }, { "epoch": 2.2367667512067415, "grad_norm": 0.06808466464281082, "learning_rate": 1.845427210565911e-06, "loss": 0.0005, "step": 136700 }, { "epoch": 2.236930377157817, "grad_norm": 0.13237743079662323, "learning_rate": 1.8446887028243799e-06, "loss": 0.0011, "step": 136710 }, { "epoch": 2.237094003108893, "grad_norm": 0.04189366474747658, "learning_rate": 1.8439503094553013e-06, "loss": 0.0006, "step": 136720 }, { "epoch": 2.237257629059969, "grad_norm": 0.15231986343860626, "learning_rate": 1.8432120304854373e-06, "loss": 0.0011, "step": 136730 }, { "epoch": 2.2374212550110446, "grad_norm": 0.05270028114318848, "learning_rate": 1.8424738659415513e-06, "loss": 0.0006, "step": 136740 }, { "epoch": 2.2375848809621206, "grad_norm": 0.22483132779598236, "learning_rate": 1.8417358158503972e-06, "loss": 0.0011, "step": 136750 }, { "epoch": 2.2377485069131966, "grad_norm": 0.2633575201034546, "learning_rate": 1.8409978802387296e-06, "loss": 0.0017, "step": 136760 }, { "epoch": 2.237912132864272, "grad_norm": 0.15843403339385986, "learning_rate": 1.840260059133299e-06, "loss": 0.001, "step": 136770 }, { "epoch": 2.238075758815348, "grad_norm": 0.11026959121227264, "learning_rate": 1.8395223525608441e-06, "loss": 0.0016, "step": 136780 }, { "epoch": 2.238239384766424, "grad_norm": 0.14783233404159546, "learning_rate": 1.8387847605481096e-06, "loss": 0.0013, "step": 136790 }, { "epoch": 2.2384030107174997, "grad_norm": 0.08157098293304443, "learning_rate": 1.8380472831218276e-06, "loss": 0.0011, "step": 136800 }, { "epoch": 2.2385666366685757, "grad_norm": 0.05505836382508278, "learning_rate": 1.8373099203087341e-06, "loss": 0.0009, "step": 136810 }, { "epoch": 2.2387302626196517, "grad_norm": 0.03696466609835625, "learning_rate": 1.8365726721355531e-06, "loss": 0.0023, "step": 136820 }, { "epoch": 2.2388938885707272, "grad_norm": 0.02781374379992485, "learning_rate": 1.8358355386290094e-06, "loss": 0.0009, "step": 136830 }, { "epoch": 2.2390575145218032, "grad_norm": 0.12648823857307434, "learning_rate": 1.8350985198158245e-06, "loss": 0.0009, "step": 136840 }, { "epoch": 2.239221140472879, "grad_norm": 0.036604978144168854, "learning_rate": 1.8343616157227096e-06, "loss": 0.0005, "step": 136850 }, { "epoch": 2.239384766423955, "grad_norm": 0.1295877993106842, "learning_rate": 1.8336248263763795e-06, "loss": 0.0008, "step": 136860 }, { "epoch": 2.239548392375031, "grad_norm": 0.10521502047777176, "learning_rate": 1.8328881518035397e-06, "loss": 0.0007, "step": 136870 }, { "epoch": 2.2397120183261063, "grad_norm": 0.058176614344120026, "learning_rate": 1.8321515920308902e-06, "loss": 0.0006, "step": 136880 }, { "epoch": 2.2398756442771823, "grad_norm": 0.06203823164105415, "learning_rate": 1.8314151470851337e-06, "loss": 0.0008, "step": 136890 }, { "epoch": 2.2400392702282583, "grad_norm": 0.159650519490242, "learning_rate": 1.8306788169929608e-06, "loss": 0.0007, "step": 136900 }, { "epoch": 2.240202896179334, "grad_norm": 0.10077209025621414, "learning_rate": 1.829942601781065e-06, "loss": 0.0007, "step": 136910 }, { "epoch": 2.24036652213041, "grad_norm": 0.03476560860872269, "learning_rate": 1.8292065014761295e-06, "loss": 0.0004, "step": 136920 }, { "epoch": 2.240530148081486, "grad_norm": 0.047771159559488297, "learning_rate": 1.8284705161048393e-06, "loss": 0.0009, "step": 136930 }, { "epoch": 2.2406937740325614, "grad_norm": 0.18116824328899384, "learning_rate": 1.8277346456938683e-06, "loss": 0.0007, "step": 136940 }, { "epoch": 2.2408573999836374, "grad_norm": 0.04550543054938316, "learning_rate": 1.8269988902698944e-06, "loss": 0.0009, "step": 136950 }, { "epoch": 2.2410210259347134, "grad_norm": 0.03038889914751053, "learning_rate": 1.8262632498595824e-06, "loss": 0.0006, "step": 136960 }, { "epoch": 2.241184651885789, "grad_norm": 0.17002473771572113, "learning_rate": 1.8255277244896013e-06, "loss": 0.0008, "step": 136970 }, { "epoch": 2.241348277836865, "grad_norm": 0.06227805092930794, "learning_rate": 1.8247923141866108e-06, "loss": 0.0047, "step": 136980 }, { "epoch": 2.241511903787941, "grad_norm": 0.12031940370798111, "learning_rate": 1.8240570189772655e-06, "loss": 0.0012, "step": 136990 }, { "epoch": 2.2416755297390165, "grad_norm": 0.12806041538715363, "learning_rate": 1.8233218388882218e-06, "loss": 0.0005, "step": 137000 }, { "epoch": 2.2418391556900925, "grad_norm": 0.028910886496305466, "learning_rate": 1.8225867739461251e-06, "loss": 0.0007, "step": 137010 }, { "epoch": 2.242002781641168, "grad_norm": 0.10452350974082947, "learning_rate": 1.8218518241776228e-06, "loss": 0.0005, "step": 137020 }, { "epoch": 2.242166407592244, "grad_norm": 0.12235300242900848, "learning_rate": 1.8211169896093516e-06, "loss": 0.0004, "step": 137030 }, { "epoch": 2.24233003354332, "grad_norm": 0.11636658757925034, "learning_rate": 1.8203822702679492e-06, "loss": 0.0016, "step": 137040 }, { "epoch": 2.2424936594943956, "grad_norm": 0.04234936460852623, "learning_rate": 1.8196476661800494e-06, "loss": 0.0005, "step": 137050 }, { "epoch": 2.2426572854454716, "grad_norm": 0.09420938044786453, "learning_rate": 1.8189131773722762e-06, "loss": 0.0006, "step": 137060 }, { "epoch": 2.2428209113965476, "grad_norm": 0.01809314824640751, "learning_rate": 1.8181788038712578e-06, "loss": 0.0006, "step": 137070 }, { "epoch": 2.242984537347623, "grad_norm": 0.009729955345392227, "learning_rate": 1.8174445457036077e-06, "loss": 0.0007, "step": 137080 }, { "epoch": 2.243148163298699, "grad_norm": 0.09742939472198486, "learning_rate": 1.8167104028959431e-06, "loss": 0.0007, "step": 137090 }, { "epoch": 2.243311789249775, "grad_norm": 0.06619960069656372, "learning_rate": 1.8159763754748776e-06, "loss": 0.0007, "step": 137100 }, { "epoch": 2.2434754152008507, "grad_norm": 0.052535660564899445, "learning_rate": 1.8152424634670141e-06, "loss": 0.0009, "step": 137110 }, { "epoch": 2.2436390411519267, "grad_norm": 0.08759599179029465, "learning_rate": 1.8145086668989587e-06, "loss": 0.0006, "step": 137120 }, { "epoch": 2.2438026671030027, "grad_norm": 0.1907338798046112, "learning_rate": 1.8137749857973063e-06, "loss": 0.0006, "step": 137130 }, { "epoch": 2.2439662930540782, "grad_norm": 0.05970700457692146, "learning_rate": 1.8130414201886543e-06, "loss": 0.0008, "step": 137140 }, { "epoch": 2.2441299190051542, "grad_norm": 0.08070128411054611, "learning_rate": 1.81230797009959e-06, "loss": 0.0011, "step": 137150 }, { "epoch": 2.2442935449562302, "grad_norm": 0.05773957073688507, "learning_rate": 1.8115746355567015e-06, "loss": 0.0009, "step": 137160 }, { "epoch": 2.244457170907306, "grad_norm": 0.10264132171869278, "learning_rate": 1.8108414165865695e-06, "loss": 0.0008, "step": 137170 }, { "epoch": 2.244620796858382, "grad_norm": 0.06791161000728607, "learning_rate": 1.8101083132157694e-06, "loss": 0.0004, "step": 137180 }, { "epoch": 2.244784422809458, "grad_norm": 0.14877314865589142, "learning_rate": 1.8093753254708778e-06, "loss": 0.0023, "step": 137190 }, { "epoch": 2.2449480487605333, "grad_norm": 0.03966068476438522, "learning_rate": 1.8086424533784608e-06, "loss": 0.0007, "step": 137200 }, { "epoch": 2.2451116747116093, "grad_norm": 0.14917223155498505, "learning_rate": 1.8079096969650866e-06, "loss": 0.0009, "step": 137210 }, { "epoch": 2.2452753006626853, "grad_norm": 0.07329724729061127, "learning_rate": 1.8071770562573115e-06, "loss": 0.0005, "step": 137220 }, { "epoch": 2.245438926613761, "grad_norm": 0.004673945251852274, "learning_rate": 1.8064445312816963e-06, "loss": 0.0011, "step": 137230 }, { "epoch": 2.245602552564837, "grad_norm": 0.0570252500474453, "learning_rate": 1.80571212206479e-06, "loss": 0.0006, "step": 137240 }, { "epoch": 2.2457661785159124, "grad_norm": 0.17214785516262054, "learning_rate": 1.8049798286331416e-06, "loss": 0.0008, "step": 137250 }, { "epoch": 2.2459298044669884, "grad_norm": 0.12331068515777588, "learning_rate": 1.8042476510132973e-06, "loss": 0.0014, "step": 137260 }, { "epoch": 2.2460934304180644, "grad_norm": 0.060974083840847015, "learning_rate": 1.8035155892317947e-06, "loss": 0.0015, "step": 137270 }, { "epoch": 2.24625705636914, "grad_norm": 0.16413341462612152, "learning_rate": 1.8027836433151697e-06, "loss": 0.0011, "step": 137280 }, { "epoch": 2.246420682320216, "grad_norm": 0.03981379047036171, "learning_rate": 1.8020518132899517e-06, "loss": 0.0012, "step": 137290 }, { "epoch": 2.246584308271292, "grad_norm": 0.08881141990423203, "learning_rate": 1.8013200991826707e-06, "loss": 0.001, "step": 137300 }, { "epoch": 2.2467479342223675, "grad_norm": 0.1925244927406311, "learning_rate": 1.8005885010198465e-06, "loss": 0.0016, "step": 137310 }, { "epoch": 2.2469115601734435, "grad_norm": 0.05137971043586731, "learning_rate": 1.7998570188280002e-06, "loss": 0.0008, "step": 137320 }, { "epoch": 2.2470751861245195, "grad_norm": 0.02664925530552864, "learning_rate": 1.7991256526336464e-06, "loss": 0.0006, "step": 137330 }, { "epoch": 2.247238812075595, "grad_norm": 0.10559311509132385, "learning_rate": 1.7983944024632938e-06, "loss": 0.0011, "step": 137340 }, { "epoch": 2.247402438026671, "grad_norm": 0.09940566122531891, "learning_rate": 1.7976632683434504e-06, "loss": 0.0007, "step": 137350 }, { "epoch": 2.247566063977747, "grad_norm": 0.058757755905389786, "learning_rate": 1.7969322503006154e-06, "loss": 0.0007, "step": 137360 }, { "epoch": 2.2477296899288226, "grad_norm": 0.04622402414679527, "learning_rate": 1.7962013483612895e-06, "loss": 0.0004, "step": 137370 }, { "epoch": 2.2478933158798986, "grad_norm": 0.1606961339712143, "learning_rate": 1.7954705625519647e-06, "loss": 0.0006, "step": 137380 }, { "epoch": 2.2480569418309746, "grad_norm": 0.10025844722986221, "learning_rate": 1.7947398928991282e-06, "loss": 0.0012, "step": 137390 }, { "epoch": 2.24822056778205, "grad_norm": 0.03751726821064949, "learning_rate": 1.794009339429269e-06, "loss": 0.0007, "step": 137400 }, { "epoch": 2.248384193733126, "grad_norm": 0.02809935249388218, "learning_rate": 1.7932789021688647e-06, "loss": 0.0007, "step": 137410 }, { "epoch": 2.2485478196842017, "grad_norm": 0.13995084166526794, "learning_rate": 1.7925485811443938e-06, "loss": 0.0011, "step": 137420 }, { "epoch": 2.2487114456352777, "grad_norm": 0.1931743174791336, "learning_rate": 1.7918183763823266e-06, "loss": 0.0008, "step": 137430 }, { "epoch": 2.2488750715863537, "grad_norm": 0.11885688453912735, "learning_rate": 1.791088287909134e-06, "loss": 0.0006, "step": 137440 }, { "epoch": 2.2490386975374292, "grad_norm": 0.028767570853233337, "learning_rate": 1.790358315751277e-06, "loss": 0.0008, "step": 137450 }, { "epoch": 2.2492023234885052, "grad_norm": 0.13071975111961365, "learning_rate": 1.7896284599352188e-06, "loss": 0.0007, "step": 137460 }, { "epoch": 2.2493659494395812, "grad_norm": 0.03357565030455589, "learning_rate": 1.7888987204874126e-06, "loss": 0.0006, "step": 137470 }, { "epoch": 2.249529575390657, "grad_norm": 0.06460310518741608, "learning_rate": 1.7881690974343086e-06, "loss": 0.0007, "step": 137480 }, { "epoch": 2.249693201341733, "grad_norm": 0.002815136918798089, "learning_rate": 1.7874395908023573e-06, "loss": 0.0015, "step": 137490 }, { "epoch": 2.249856827292809, "grad_norm": 0.09775934368371964, "learning_rate": 1.7867102006179977e-06, "loss": 0.0008, "step": 137500 }, { "epoch": 2.2500204532438843, "grad_norm": 0.1800021231174469, "learning_rate": 1.7859809269076722e-06, "loss": 0.0023, "step": 137510 }, { "epoch": 2.2501840791949603, "grad_norm": 0.04702913388609886, "learning_rate": 1.7852517696978117e-06, "loss": 0.0016, "step": 137520 }, { "epoch": 2.2503477051460363, "grad_norm": 0.31794503331184387, "learning_rate": 1.784522729014848e-06, "loss": 0.0007, "step": 137530 }, { "epoch": 2.250511331097112, "grad_norm": 0.005553154274821281, "learning_rate": 1.7837938048852089e-06, "loss": 0.0003, "step": 137540 }, { "epoch": 2.250674957048188, "grad_norm": 0.033034540712833405, "learning_rate": 1.7830649973353131e-06, "loss": 0.0013, "step": 137550 }, { "epoch": 2.250838582999264, "grad_norm": 0.2753106951713562, "learning_rate": 1.7823363063915804e-06, "loss": 0.0017, "step": 137560 }, { "epoch": 2.2510022089503394, "grad_norm": 0.05271976441144943, "learning_rate": 1.781607732080422e-06, "loss": 0.0006, "step": 137570 }, { "epoch": 2.2511658349014154, "grad_norm": 0.041430339217185974, "learning_rate": 1.7808792744282516e-06, "loss": 0.0009, "step": 137580 }, { "epoch": 2.2513294608524914, "grad_norm": 0.09326998889446259, "learning_rate": 1.7801509334614668e-06, "loss": 0.0018, "step": 137590 }, { "epoch": 2.251493086803567, "grad_norm": 0.6315110325813293, "learning_rate": 1.779422709206472e-06, "loss": 0.0009, "step": 137600 }, { "epoch": 2.251656712754643, "grad_norm": 0.03473074361681938, "learning_rate": 1.7786946016896655e-06, "loss": 0.0007, "step": 137610 }, { "epoch": 2.251820338705719, "grad_norm": 0.12610793113708496, "learning_rate": 1.7779666109374355e-06, "loss": 0.0009, "step": 137620 }, { "epoch": 2.2519839646567945, "grad_norm": 0.11989783495664597, "learning_rate": 1.7772387369761735e-06, "loss": 0.0007, "step": 137630 }, { "epoch": 2.2521475906078705, "grad_norm": 0.09480896592140198, "learning_rate": 1.7765109798322604e-06, "loss": 0.0008, "step": 137640 }, { "epoch": 2.252311216558946, "grad_norm": 0.023414844647049904, "learning_rate": 1.7757833395320784e-06, "loss": 0.0011, "step": 137650 }, { "epoch": 2.252474842510022, "grad_norm": 0.08589056134223938, "learning_rate": 1.7750558161019986e-06, "loss": 0.0007, "step": 137660 }, { "epoch": 2.252638468461098, "grad_norm": 0.14193852245807648, "learning_rate": 1.7743284095683972e-06, "loss": 0.0011, "step": 137670 }, { "epoch": 2.2528020944121736, "grad_norm": 0.0688340961933136, "learning_rate": 1.773601119957638e-06, "loss": 0.0009, "step": 137680 }, { "epoch": 2.2529657203632496, "grad_norm": 0.10896433144807816, "learning_rate": 1.772873947296082e-06, "loss": 0.0012, "step": 137690 }, { "epoch": 2.2531293463143256, "grad_norm": 0.04505003243684769, "learning_rate": 1.7721468916100908e-06, "loss": 0.0007, "step": 137700 }, { "epoch": 2.253292972265401, "grad_norm": 0.17665976285934448, "learning_rate": 1.771419952926015e-06, "loss": 0.0016, "step": 137710 }, { "epoch": 2.253456598216477, "grad_norm": 0.14921338856220245, "learning_rate": 1.7706931312702086e-06, "loss": 0.0009, "step": 137720 }, { "epoch": 2.253620224167553, "grad_norm": 0.16425618529319763, "learning_rate": 1.7699664266690124e-06, "loss": 0.001, "step": 137730 }, { "epoch": 2.2537838501186287, "grad_norm": 0.06294117867946625, "learning_rate": 1.769239839148772e-06, "loss": 0.0008, "step": 137740 }, { "epoch": 2.2539474760697047, "grad_norm": 0.03120293840765953, "learning_rate": 1.7685133687358208e-06, "loss": 0.0008, "step": 137750 }, { "epoch": 2.2541111020207807, "grad_norm": 0.07448040693998337, "learning_rate": 1.7677870154564936e-06, "loss": 0.0007, "step": 137760 }, { "epoch": 2.2542747279718562, "grad_norm": 0.12275935709476471, "learning_rate": 1.7670607793371197e-06, "loss": 0.0008, "step": 137770 }, { "epoch": 2.2544383539229322, "grad_norm": 0.04093034192919731, "learning_rate": 1.7663346604040227e-06, "loss": 0.0009, "step": 137780 }, { "epoch": 2.254601979874008, "grad_norm": 0.13303150236606598, "learning_rate": 1.7656086586835225e-06, "loss": 0.0008, "step": 137790 }, { "epoch": 2.254765605825084, "grad_norm": 0.0024219504557549953, "learning_rate": 1.764882774201933e-06, "loss": 0.0004, "step": 137800 }, { "epoch": 2.25492923177616, "grad_norm": 0.23065805435180664, "learning_rate": 1.7641570069855674e-06, "loss": 0.0013, "step": 137810 }, { "epoch": 2.2550928577272353, "grad_norm": 0.057689767330884933, "learning_rate": 1.7634313570607349e-06, "loss": 0.0008, "step": 137820 }, { "epoch": 2.2552564836783113, "grad_norm": 0.1388271450996399, "learning_rate": 1.7627058244537355e-06, "loss": 0.001, "step": 137830 }, { "epoch": 2.2554201096293873, "grad_norm": 0.05789140984416008, "learning_rate": 1.7619804091908704e-06, "loss": 0.0008, "step": 137840 }, { "epoch": 2.255583735580463, "grad_norm": 0.10220491141080856, "learning_rate": 1.7612551112984316e-06, "loss": 0.0005, "step": 137850 }, { "epoch": 2.255747361531539, "grad_norm": 0.35700955986976624, "learning_rate": 1.7605299308027129e-06, "loss": 0.0015, "step": 137860 }, { "epoch": 2.255910987482615, "grad_norm": 0.13218556344509125, "learning_rate": 1.7598048677299961e-06, "loss": 0.0043, "step": 137870 }, { "epoch": 2.2560746134336904, "grad_norm": 0.1223706528544426, "learning_rate": 1.7590799221065674e-06, "loss": 0.0015, "step": 137880 }, { "epoch": 2.2562382393847664, "grad_norm": 0.04382982477545738, "learning_rate": 1.7583550939587018e-06, "loss": 0.0007, "step": 137890 }, { "epoch": 2.2564018653358424, "grad_norm": 0.3298861086368561, "learning_rate": 1.7576303833126717e-06, "loss": 0.0014, "step": 137900 }, { "epoch": 2.256565491286918, "grad_norm": 0.029039116576313972, "learning_rate": 1.7569057901947489e-06, "loss": 0.0014, "step": 137910 }, { "epoch": 2.256729117237994, "grad_norm": 0.021003151312470436, "learning_rate": 1.7561813146311952e-06, "loss": 0.001, "step": 137920 }, { "epoch": 2.25689274318907, "grad_norm": 0.04633378982543945, "learning_rate": 1.7554569566482738e-06, "loss": 0.0005, "step": 137930 }, { "epoch": 2.2570563691401455, "grad_norm": 0.07034675031900406, "learning_rate": 1.7547327162722383e-06, "loss": 0.001, "step": 137940 }, { "epoch": 2.2572199950912215, "grad_norm": 0.04566957429051399, "learning_rate": 1.7540085935293433e-06, "loss": 0.001, "step": 137950 }, { "epoch": 2.2573836210422975, "grad_norm": 0.03136362135410309, "learning_rate": 1.7532845884458339e-06, "loss": 0.0013, "step": 137960 }, { "epoch": 2.257547246993373, "grad_norm": 0.09383810311555862, "learning_rate": 1.7525607010479546e-06, "loss": 0.0014, "step": 137970 }, { "epoch": 2.257710872944449, "grad_norm": 0.11463681608438492, "learning_rate": 1.7518369313619488e-06, "loss": 0.0008, "step": 137980 }, { "epoch": 2.257874498895525, "grad_norm": 0.08502574265003204, "learning_rate": 1.7511132794140433e-06, "loss": 0.0008, "step": 137990 }, { "epoch": 2.2580381248466006, "grad_norm": 0.09654149413108826, "learning_rate": 1.7503897452304753e-06, "loss": 0.0008, "step": 138000 }, { "epoch": 2.2582017507976766, "grad_norm": 0.08157924562692642, "learning_rate": 1.7496663288374665e-06, "loss": 0.001, "step": 138010 }, { "epoch": 2.2583653767487526, "grad_norm": 0.056903015822172165, "learning_rate": 1.7489430302612437e-06, "loss": 0.001, "step": 138020 }, { "epoch": 2.258529002699828, "grad_norm": 0.05095800384879112, "learning_rate": 1.7482198495280206e-06, "loss": 0.0005, "step": 138030 }, { "epoch": 2.258692628650904, "grad_norm": 0.04462926462292671, "learning_rate": 1.747496786664012e-06, "loss": 0.0009, "step": 138040 }, { "epoch": 2.2588562546019797, "grad_norm": 0.17156077921390533, "learning_rate": 1.7467738416954293e-06, "loss": 0.0008, "step": 138050 }, { "epoch": 2.2590198805530557, "grad_norm": 0.11664771288633347, "learning_rate": 1.746051014648475e-06, "loss": 0.0012, "step": 138060 }, { "epoch": 2.2591835065041317, "grad_norm": 0.11774548143148422, "learning_rate": 1.745328305549352e-06, "loss": 0.0005, "step": 138070 }, { "epoch": 2.2593471324552072, "grad_norm": 0.042643509805202484, "learning_rate": 1.7446057144242557e-06, "loss": 0.001, "step": 138080 }, { "epoch": 2.2595107584062832, "grad_norm": 0.07703953981399536, "learning_rate": 1.7438832412993762e-06, "loss": 0.0009, "step": 138090 }, { "epoch": 2.2596743843573592, "grad_norm": 0.09358423203229904, "learning_rate": 1.7431608862009047e-06, "loss": 0.0006, "step": 138100 }, { "epoch": 2.259838010308435, "grad_norm": 0.1952233910560608, "learning_rate": 1.7424386491550222e-06, "loss": 0.0011, "step": 138110 }, { "epoch": 2.260001636259511, "grad_norm": 0.21610145270824432, "learning_rate": 1.7417165301879107e-06, "loss": 0.0009, "step": 138120 }, { "epoch": 2.260165262210587, "grad_norm": 0.035237573087215424, "learning_rate": 1.740994529325743e-06, "loss": 0.0007, "step": 138130 }, { "epoch": 2.2603288881616623, "grad_norm": 0.01159909088164568, "learning_rate": 1.7402726465946918e-06, "loss": 0.001, "step": 138140 }, { "epoch": 2.2604925141127383, "grad_norm": 0.008366485126316547, "learning_rate": 1.7395508820209212e-06, "loss": 0.0008, "step": 138150 }, { "epoch": 2.260656140063814, "grad_norm": 0.0494733490049839, "learning_rate": 1.7388292356305964e-06, "loss": 0.0008, "step": 138160 }, { "epoch": 2.26081976601489, "grad_norm": 0.03611666336655617, "learning_rate": 1.738107707449872e-06, "loss": 0.0007, "step": 138170 }, { "epoch": 2.260983391965966, "grad_norm": 0.06630255281925201, "learning_rate": 1.7373862975049056e-06, "loss": 0.001, "step": 138180 }, { "epoch": 2.2611470179170414, "grad_norm": 0.101160429418087, "learning_rate": 1.7366650058218437e-06, "loss": 0.001, "step": 138190 }, { "epoch": 2.2613106438681174, "grad_norm": 0.21658700704574585, "learning_rate": 1.7359438324268306e-06, "loss": 0.0007, "step": 138200 }, { "epoch": 2.2614742698191934, "grad_norm": 0.008043624460697174, "learning_rate": 1.7352227773460107e-06, "loss": 0.0006, "step": 138210 }, { "epoch": 2.261637895770269, "grad_norm": 0.002044062362983823, "learning_rate": 1.734501840605517e-06, "loss": 0.0006, "step": 138220 }, { "epoch": 2.261801521721345, "grad_norm": 0.005898504052311182, "learning_rate": 1.7337810222314844e-06, "loss": 0.0005, "step": 138230 }, { "epoch": 2.261965147672421, "grad_norm": 0.0536799281835556, "learning_rate": 1.7330603222500376e-06, "loss": 0.0008, "step": 138240 }, { "epoch": 2.2621287736234965, "grad_norm": 0.08331379294395447, "learning_rate": 1.7323397406873027e-06, "loss": 0.0007, "step": 138250 }, { "epoch": 2.2622923995745725, "grad_norm": 0.04971930757164955, "learning_rate": 1.7316192775694001e-06, "loss": 0.0006, "step": 138260 }, { "epoch": 2.2624560255256485, "grad_norm": 0.15873540937900543, "learning_rate": 1.730898932922442e-06, "loss": 0.0013, "step": 138270 }, { "epoch": 2.262619651476724, "grad_norm": 0.09229025989770889, "learning_rate": 1.7301787067725434e-06, "loss": 0.0006, "step": 138280 }, { "epoch": 2.2627832774278, "grad_norm": 0.05827879533171654, "learning_rate": 1.7294585991458046e-06, "loss": 0.0009, "step": 138290 }, { "epoch": 2.262946903378876, "grad_norm": 0.10686063021421432, "learning_rate": 1.7287386100683311e-06, "loss": 0.0009, "step": 138300 }, { "epoch": 2.2631105293299516, "grad_norm": 0.05689430609345436, "learning_rate": 1.7280187395662224e-06, "loss": 0.0004, "step": 138310 }, { "epoch": 2.2632741552810276, "grad_norm": 0.17898309230804443, "learning_rate": 1.7272989876655688e-06, "loss": 0.0011, "step": 138320 }, { "epoch": 2.2634377812321036, "grad_norm": 0.18826553225517273, "learning_rate": 1.726579354392463e-06, "loss": 0.0012, "step": 138330 }, { "epoch": 2.263601407183179, "grad_norm": 0.05082348734140396, "learning_rate": 1.725859839772986e-06, "loss": 0.0007, "step": 138340 }, { "epoch": 2.263765033134255, "grad_norm": 0.09817861020565033, "learning_rate": 1.7251404438332226e-06, "loss": 0.0011, "step": 138350 }, { "epoch": 2.263928659085331, "grad_norm": 0.004967184737324715, "learning_rate": 1.7244211665992456e-06, "loss": 0.0007, "step": 138360 }, { "epoch": 2.2640922850364067, "grad_norm": 0.15865665674209595, "learning_rate": 1.72370200809713e-06, "loss": 0.0012, "step": 138370 }, { "epoch": 2.2642559109874827, "grad_norm": 0.014201820828020573, "learning_rate": 1.7229829683529424e-06, "loss": 0.001, "step": 138380 }, { "epoch": 2.2644195369385587, "grad_norm": 0.1520019769668579, "learning_rate": 1.7222640473927443e-06, "loss": 0.0009, "step": 138390 }, { "epoch": 2.2645831628896342, "grad_norm": 0.06845227628946304, "learning_rate": 1.7215452452425986e-06, "loss": 0.0013, "step": 138400 }, { "epoch": 2.2647467888407102, "grad_norm": 0.05643242970108986, "learning_rate": 1.7208265619285563e-06, "loss": 0.001, "step": 138410 }, { "epoch": 2.264910414791786, "grad_norm": 0.019576242193579674, "learning_rate": 1.7201079974766716e-06, "loss": 0.0009, "step": 138420 }, { "epoch": 2.265074040742862, "grad_norm": 0.1266564279794693, "learning_rate": 1.7193895519129877e-06, "loss": 0.0011, "step": 138430 }, { "epoch": 2.265237666693938, "grad_norm": 0.04488179087638855, "learning_rate": 1.718671225263549e-06, "loss": 0.0009, "step": 138440 }, { "epoch": 2.2654012926450133, "grad_norm": 0.06462817639112473, "learning_rate": 1.71795301755439e-06, "loss": 0.0007, "step": 138450 }, { "epoch": 2.2655649185960893, "grad_norm": 0.04263461008667946, "learning_rate": 1.7172349288115464e-06, "loss": 0.0009, "step": 138460 }, { "epoch": 2.2657285445471653, "grad_norm": 0.023121269419789314, "learning_rate": 1.7165169590610486e-06, "loss": 0.0003, "step": 138470 }, { "epoch": 2.265892170498241, "grad_norm": 0.0011576018296182156, "learning_rate": 1.7157991083289189e-06, "loss": 0.0011, "step": 138480 }, { "epoch": 2.266055796449317, "grad_norm": 0.022658947855234146, "learning_rate": 1.7150813766411784e-06, "loss": 0.001, "step": 138490 }, { "epoch": 2.266219422400393, "grad_norm": 0.022222619503736496, "learning_rate": 1.714363764023841e-06, "loss": 0.001, "step": 138500 }, { "epoch": 2.2663830483514684, "grad_norm": 0.01836101897060871, "learning_rate": 1.7136462705029221e-06, "loss": 0.001, "step": 138510 }, { "epoch": 2.2665466743025444, "grad_norm": 0.30837783217430115, "learning_rate": 1.7129288961044255e-06, "loss": 0.0014, "step": 138520 }, { "epoch": 2.2667103002536204, "grad_norm": 0.04001494124531746, "learning_rate": 1.7122116408543566e-06, "loss": 0.0011, "step": 138530 }, { "epoch": 2.266873926204696, "grad_norm": 0.15171189606189728, "learning_rate": 1.711494504778715e-06, "loss": 0.0013, "step": 138540 }, { "epoch": 2.267037552155772, "grad_norm": 0.11166616529226303, "learning_rate": 1.7107774879034927e-06, "loss": 0.0012, "step": 138550 }, { "epoch": 2.2672011781068475, "grad_norm": 0.04553128033876419, "learning_rate": 1.7100605902546824e-06, "loss": 0.0006, "step": 138560 }, { "epoch": 2.2673648040579235, "grad_norm": 0.05317135155200958, "learning_rate": 1.7093438118582666e-06, "loss": 0.0007, "step": 138570 }, { "epoch": 2.2675284300089995, "grad_norm": 0.21715816855430603, "learning_rate": 1.7086271527402308e-06, "loss": 0.0016, "step": 138580 }, { "epoch": 2.267692055960075, "grad_norm": 0.07029976695775986, "learning_rate": 1.70791061292655e-06, "loss": 0.0008, "step": 138590 }, { "epoch": 2.267855681911151, "grad_norm": 0.084548220038414, "learning_rate": 1.707194192443195e-06, "loss": 0.0006, "step": 138600 }, { "epoch": 2.268019307862227, "grad_norm": 0.002569173462688923, "learning_rate": 1.706477891316139e-06, "loss": 0.0006, "step": 138610 }, { "epoch": 2.2681829338133026, "grad_norm": 0.04806216433644295, "learning_rate": 1.7057617095713412e-06, "loss": 0.0008, "step": 138620 }, { "epoch": 2.2683465597643786, "grad_norm": 0.16387756168842316, "learning_rate": 1.705045647234766e-06, "loss": 0.0007, "step": 138630 }, { "epoch": 2.2685101857154546, "grad_norm": 0.023508461192250252, "learning_rate": 1.7043297043323652e-06, "loss": 0.0011, "step": 138640 }, { "epoch": 2.26867381166653, "grad_norm": 0.1013295128941536, "learning_rate": 1.7036138808900938e-06, "loss": 0.0008, "step": 138650 }, { "epoch": 2.268837437617606, "grad_norm": 0.059112463146448135, "learning_rate": 1.7028981769338943e-06, "loss": 0.0013, "step": 138660 }, { "epoch": 2.269001063568682, "grad_norm": 0.11990657448768616, "learning_rate": 1.7021825924897133e-06, "loss": 0.0013, "step": 138670 }, { "epoch": 2.2691646895197577, "grad_norm": 0.03965405747294426, "learning_rate": 1.7014671275834871e-06, "loss": 0.0006, "step": 138680 }, { "epoch": 2.2693283154708337, "grad_norm": 0.09039021283388138, "learning_rate": 1.7007517822411485e-06, "loss": 0.0012, "step": 138690 }, { "epoch": 2.2694919414219097, "grad_norm": 0.05818749591708183, "learning_rate": 1.7000365564886295e-06, "loss": 0.0014, "step": 138700 }, { "epoch": 2.2696555673729852, "grad_norm": 0.09912757575511932, "learning_rate": 1.6993214503518523e-06, "loss": 0.0005, "step": 138710 }, { "epoch": 2.2698191933240612, "grad_norm": 0.06012880802154541, "learning_rate": 1.6986064638567412e-06, "loss": 0.0005, "step": 138720 }, { "epoch": 2.2699828192751372, "grad_norm": 0.07397420704364777, "learning_rate": 1.6978915970292097e-06, "loss": 0.001, "step": 138730 }, { "epoch": 2.270146445226213, "grad_norm": 0.0503038614988327, "learning_rate": 1.6971768498951717e-06, "loss": 0.0008, "step": 138740 }, { "epoch": 2.270310071177289, "grad_norm": 0.02471921779215336, "learning_rate": 1.6964622224805361e-06, "loss": 0.0005, "step": 138750 }, { "epoch": 2.2704736971283648, "grad_norm": 0.03774268925189972, "learning_rate": 1.6957477148112034e-06, "loss": 0.0004, "step": 138760 }, { "epoch": 2.2706373230794403, "grad_norm": 0.3109720051288605, "learning_rate": 1.695033326913076e-06, "loss": 0.0006, "step": 138770 }, { "epoch": 2.2708009490305163, "grad_norm": 0.0879649743437767, "learning_rate": 1.6943190588120472e-06, "loss": 0.0005, "step": 138780 }, { "epoch": 2.2709645749815923, "grad_norm": 0.11692360043525696, "learning_rate": 1.6936049105340079e-06, "loss": 0.0013, "step": 138790 }, { "epoch": 2.271128200932668, "grad_norm": 0.03319636359810829, "learning_rate": 1.6928908821048422e-06, "loss": 0.0009, "step": 138800 }, { "epoch": 2.271291826883744, "grad_norm": 0.03742299973964691, "learning_rate": 1.6921769735504334e-06, "loss": 0.0008, "step": 138810 }, { "epoch": 2.2714554528348194, "grad_norm": 0.04606965184211731, "learning_rate": 1.6914631848966607e-06, "loss": 0.0006, "step": 138820 }, { "epoch": 2.2716190787858954, "grad_norm": 0.05139930173754692, "learning_rate": 1.6907495161693943e-06, "loss": 0.0005, "step": 138830 }, { "epoch": 2.2717827047369714, "grad_norm": 0.007837047800421715, "learning_rate": 1.690035967394506e-06, "loss": 0.0011, "step": 138840 }, { "epoch": 2.271946330688047, "grad_norm": 0.07360134273767471, "learning_rate": 1.6893225385978567e-06, "loss": 0.0005, "step": 138850 }, { "epoch": 2.272109956639123, "grad_norm": 0.023278450593352318, "learning_rate": 1.6886092298053104e-06, "loss": 0.0012, "step": 138860 }, { "epoch": 2.272273582590199, "grad_norm": 0.03744323551654816, "learning_rate": 1.6878960410427187e-06, "loss": 0.0012, "step": 138870 }, { "epoch": 2.2724372085412745, "grad_norm": 0.028468498960137367, "learning_rate": 1.6871829723359368e-06, "loss": 0.0009, "step": 138880 }, { "epoch": 2.2726008344923505, "grad_norm": 0.13218821585178375, "learning_rate": 1.68647002371081e-06, "loss": 0.0012, "step": 138890 }, { "epoch": 2.2727644604434265, "grad_norm": 0.00504994997754693, "learning_rate": 1.6857571951931795e-06, "loss": 0.0012, "step": 138900 }, { "epoch": 2.272928086394502, "grad_norm": 0.16253262758255005, "learning_rate": 1.6850444868088866e-06, "loss": 0.001, "step": 138910 }, { "epoch": 2.273091712345578, "grad_norm": 0.05473746359348297, "learning_rate": 1.6843318985837626e-06, "loss": 0.0005, "step": 138920 }, { "epoch": 2.2732553382966536, "grad_norm": 0.20815257728099823, "learning_rate": 1.6836194305436393e-06, "loss": 0.0015, "step": 138930 }, { "epoch": 2.2734189642477296, "grad_norm": 0.0493653267621994, "learning_rate": 1.6829070827143396e-06, "loss": 0.001, "step": 138940 }, { "epoch": 2.2735825901988056, "grad_norm": 0.03902864083647728, "learning_rate": 1.6821948551216878e-06, "loss": 0.001, "step": 138950 }, { "epoch": 2.273746216149881, "grad_norm": 0.08249147981405258, "learning_rate": 1.6814827477914959e-06, "loss": 0.0009, "step": 138960 }, { "epoch": 2.273909842100957, "grad_norm": 0.01999511383473873, "learning_rate": 1.6807707607495787e-06, "loss": 0.0007, "step": 138970 }, { "epoch": 2.274073468052033, "grad_norm": 0.05212975665926933, "learning_rate": 1.6800588940217478e-06, "loss": 0.0005, "step": 138980 }, { "epoch": 2.2742370940031087, "grad_norm": 0.14709383249282837, "learning_rate": 1.6793471476337985e-06, "loss": 0.0012, "step": 138990 }, { "epoch": 2.2744007199541847, "grad_norm": 0.09429161995649338, "learning_rate": 1.6786355216115363e-06, "loss": 0.0008, "step": 139000 }, { "epoch": 2.2745643459052607, "grad_norm": 0.22234313189983368, "learning_rate": 1.6779240159807526e-06, "loss": 0.0014, "step": 139010 }, { "epoch": 2.2747279718563362, "grad_norm": 0.05838272348046303, "learning_rate": 1.677212630767238e-06, "loss": 0.0009, "step": 139020 }, { "epoch": 2.2748915978074122, "grad_norm": 0.07106290012598038, "learning_rate": 1.6765013659967815e-06, "loss": 0.0007, "step": 139030 }, { "epoch": 2.2750552237584882, "grad_norm": 0.04249867796897888, "learning_rate": 1.6757902216951611e-06, "loss": 0.0014, "step": 139040 }, { "epoch": 2.275218849709564, "grad_norm": 0.028932945802807808, "learning_rate": 1.675079197888157e-06, "loss": 0.0022, "step": 139050 }, { "epoch": 2.27538247566064, "grad_norm": 0.15324606001377106, "learning_rate": 1.6743682946015393e-06, "loss": 0.0006, "step": 139060 }, { "epoch": 2.2755461016117158, "grad_norm": 0.15140491724014282, "learning_rate": 1.6736575118610792e-06, "loss": 0.0011, "step": 139070 }, { "epoch": 2.2757097275627913, "grad_norm": 0.08114051073789597, "learning_rate": 1.6729468496925406e-06, "loss": 0.0005, "step": 139080 }, { "epoch": 2.2758733535138673, "grad_norm": 0.018584074452519417, "learning_rate": 1.6722363081216797e-06, "loss": 0.0008, "step": 139090 }, { "epoch": 2.2760369794649433, "grad_norm": 0.057265494018793106, "learning_rate": 1.6715258871742567e-06, "loss": 0.0011, "step": 139100 }, { "epoch": 2.276200605416019, "grad_norm": 0.004021097905933857, "learning_rate": 1.670815586876019e-06, "loss": 0.0011, "step": 139110 }, { "epoch": 2.276364231367095, "grad_norm": 0.04205913841724396, "learning_rate": 1.6701054072527162e-06, "loss": 0.0011, "step": 139120 }, { "epoch": 2.276527857318171, "grad_norm": 0.03988330066204071, "learning_rate": 1.6693953483300874e-06, "loss": 0.0013, "step": 139130 }, { "epoch": 2.2766914832692464, "grad_norm": 0.011853029951453209, "learning_rate": 1.668685410133874e-06, "loss": 0.0004, "step": 139140 }, { "epoch": 2.2768551092203224, "grad_norm": 0.12218745797872543, "learning_rate": 1.6679755926898066e-06, "loss": 0.001, "step": 139150 }, { "epoch": 2.2770187351713984, "grad_norm": 0.06630395352840424, "learning_rate": 1.667265896023617e-06, "loss": 0.0011, "step": 139160 }, { "epoch": 2.277182361122474, "grad_norm": 0.02177361026406288, "learning_rate": 1.666556320161027e-06, "loss": 0.0007, "step": 139170 }, { "epoch": 2.27734598707355, "grad_norm": 0.0012743271654471755, "learning_rate": 1.6658468651277604e-06, "loss": 0.0009, "step": 139180 }, { "epoch": 2.2775096130246255, "grad_norm": 0.058784399181604385, "learning_rate": 1.6651375309495315e-06, "loss": 0.001, "step": 139190 }, { "epoch": 2.2776732389757015, "grad_norm": 0.11113272607326508, "learning_rate": 1.6644283176520504e-06, "loss": 0.0012, "step": 139200 }, { "epoch": 2.2778368649267775, "grad_norm": 0.10286493599414825, "learning_rate": 1.6637192252610274e-06, "loss": 0.0008, "step": 139210 }, { "epoch": 2.278000490877853, "grad_norm": 0.09020040929317474, "learning_rate": 1.6630102538021625e-06, "loss": 0.001, "step": 139220 }, { "epoch": 2.278164116828929, "grad_norm": 0.11142237484455109, "learning_rate": 1.662301403301157e-06, "loss": 0.0012, "step": 139230 }, { "epoch": 2.278327742780005, "grad_norm": 0.08888515084981918, "learning_rate": 1.6615926737837023e-06, "loss": 0.0005, "step": 139240 }, { "epoch": 2.2784913687310806, "grad_norm": 0.01639641635119915, "learning_rate": 1.6608840652754898e-06, "loss": 0.0004, "step": 139250 }, { "epoch": 2.2786549946821566, "grad_norm": 0.07680242508649826, "learning_rate": 1.6601755778022055e-06, "loss": 0.0013, "step": 139260 }, { "epoch": 2.2788186206332326, "grad_norm": 0.08850005269050598, "learning_rate": 1.6594672113895283e-06, "loss": 0.0006, "step": 139270 }, { "epoch": 2.278982246584308, "grad_norm": 0.082381471991539, "learning_rate": 1.6587589660631392e-06, "loss": 0.0008, "step": 139280 }, { "epoch": 2.279145872535384, "grad_norm": 0.08754458278417587, "learning_rate": 1.658050841848704e-06, "loss": 0.0007, "step": 139290 }, { "epoch": 2.27930949848646, "grad_norm": 0.06146247312426567, "learning_rate": 1.6573428387718933e-06, "loss": 0.0019, "step": 139300 }, { "epoch": 2.2794731244375357, "grad_norm": 0.0855596661567688, "learning_rate": 1.6566349568583723e-06, "loss": 0.0006, "step": 139310 }, { "epoch": 2.2796367503886117, "grad_norm": 0.01028352975845337, "learning_rate": 1.6559271961337974e-06, "loss": 0.0004, "step": 139320 }, { "epoch": 2.2798003763396872, "grad_norm": 0.007872547022998333, "learning_rate": 1.6552195566238254e-06, "loss": 0.0006, "step": 139330 }, { "epoch": 2.2799640022907632, "grad_norm": 0.044140662997961044, "learning_rate": 1.6545120383541047e-06, "loss": 0.0005, "step": 139340 }, { "epoch": 2.2801276282418392, "grad_norm": 0.013871453702449799, "learning_rate": 1.6538046413502828e-06, "loss": 0.0004, "step": 139350 }, { "epoch": 2.280291254192915, "grad_norm": 0.08327380567789078, "learning_rate": 1.653097365637999e-06, "loss": 0.0012, "step": 139360 }, { "epoch": 2.280454880143991, "grad_norm": 0.12830397486686707, "learning_rate": 1.6523902112428935e-06, "loss": 0.001, "step": 139370 }, { "epoch": 2.2806185060950668, "grad_norm": 0.03175719827413559, "learning_rate": 1.6516831781905968e-06, "loss": 0.0011, "step": 139380 }, { "epoch": 2.2807821320461423, "grad_norm": 0.015279852785170078, "learning_rate": 1.6509762665067358e-06, "loss": 0.0004, "step": 139390 }, { "epoch": 2.2809457579972183, "grad_norm": 0.011449161916971207, "learning_rate": 1.6502694762169376e-06, "loss": 0.0007, "step": 139400 }, { "epoch": 2.2811093839482943, "grad_norm": 0.05544215068221092, "learning_rate": 1.6495628073468184e-06, "loss": 0.0018, "step": 139410 }, { "epoch": 2.28127300989937, "grad_norm": 0.007027834188193083, "learning_rate": 1.6488562599219965e-06, "loss": 0.0007, "step": 139420 }, { "epoch": 2.281436635850446, "grad_norm": 0.035232458263635635, "learning_rate": 1.648149833968079e-06, "loss": 0.0016, "step": 139430 }, { "epoch": 2.281600261801522, "grad_norm": 0.15208080410957336, "learning_rate": 1.647443529510676e-06, "loss": 0.0018, "step": 139440 }, { "epoch": 2.2817638877525974, "grad_norm": 0.035637788474559784, "learning_rate": 1.6467373465753855e-06, "loss": 0.0007, "step": 139450 }, { "epoch": 2.2819275137036734, "grad_norm": 0.01628064177930355, "learning_rate": 1.6460312851878074e-06, "loss": 0.0004, "step": 139460 }, { "epoch": 2.2820911396547494, "grad_norm": 0.05448537692427635, "learning_rate": 1.6453253453735353e-06, "loss": 0.0012, "step": 139470 }, { "epoch": 2.282254765605825, "grad_norm": 0.13321532309055328, "learning_rate": 1.6446195271581566e-06, "loss": 0.0008, "step": 139480 }, { "epoch": 2.282418391556901, "grad_norm": 0.05506069213151932, "learning_rate": 1.6439138305672559e-06, "loss": 0.0006, "step": 139490 }, { "epoch": 2.282582017507977, "grad_norm": 0.19812196493148804, "learning_rate": 1.6432082556264111e-06, "loss": 0.0005, "step": 139500 }, { "epoch": 2.2827456434590525, "grad_norm": 0.03946609050035477, "learning_rate": 1.6425028023611994e-06, "loss": 0.0007, "step": 139510 }, { "epoch": 2.2829092694101285, "grad_norm": 0.30005836486816406, "learning_rate": 1.641797470797193e-06, "loss": 0.0013, "step": 139520 }, { "epoch": 2.2830728953612045, "grad_norm": 0.3092999756336212, "learning_rate": 1.641092260959955e-06, "loss": 0.0018, "step": 139530 }, { "epoch": 2.28323652131228, "grad_norm": 0.10823874175548553, "learning_rate": 1.6403871728750514e-06, "loss": 0.0011, "step": 139540 }, { "epoch": 2.283400147263356, "grad_norm": 0.02327662892639637, "learning_rate": 1.6396822065680368e-06, "loss": 0.0009, "step": 139550 }, { "epoch": 2.283563773214432, "grad_norm": 0.06493327766656876, "learning_rate": 1.6389773620644678e-06, "loss": 0.0003, "step": 139560 }, { "epoch": 2.2837273991655076, "grad_norm": 0.06941473484039307, "learning_rate": 1.638272639389889e-06, "loss": 0.0003, "step": 139570 }, { "epoch": 2.2838910251165836, "grad_norm": 0.02352878823876381, "learning_rate": 1.6375680385698496e-06, "loss": 0.0006, "step": 139580 }, { "epoch": 2.284054651067659, "grad_norm": 0.03207981958985329, "learning_rate": 1.6368635596298872e-06, "loss": 0.0007, "step": 139590 }, { "epoch": 2.284218277018735, "grad_norm": 0.049908850342035294, "learning_rate": 1.6361592025955359e-06, "loss": 0.0007, "step": 139600 }, { "epoch": 2.284381902969811, "grad_norm": 0.019871102645993233, "learning_rate": 1.6354549674923303e-06, "loss": 0.001, "step": 139610 }, { "epoch": 2.2845455289208867, "grad_norm": 0.007271668408066034, "learning_rate": 1.6347508543457947e-06, "loss": 0.0019, "step": 139620 }, { "epoch": 2.2847091548719627, "grad_norm": 0.031424205750226974, "learning_rate": 1.6340468631814533e-06, "loss": 0.0004, "step": 139630 }, { "epoch": 2.2848727808230387, "grad_norm": 0.046694621443748474, "learning_rate": 1.6333429940248219e-06, "loss": 0.0009, "step": 139640 }, { "epoch": 2.2850364067741142, "grad_norm": 0.04434395954012871, "learning_rate": 1.6326392469014174e-06, "loss": 0.0006, "step": 139650 }, { "epoch": 2.2852000327251902, "grad_norm": 0.05857578665018082, "learning_rate": 1.6319356218367448e-06, "loss": 0.0013, "step": 139660 }, { "epoch": 2.2853636586762662, "grad_norm": 0.0665457472205162, "learning_rate": 1.6312321188563113e-06, "loss": 0.001, "step": 139670 }, { "epoch": 2.285527284627342, "grad_norm": 0.06540035456418991, "learning_rate": 1.6305287379856182e-06, "loss": 0.0005, "step": 139680 }, { "epoch": 2.285690910578418, "grad_norm": 0.06826451420783997, "learning_rate": 1.6298254792501606e-06, "loss": 0.0007, "step": 139690 }, { "epoch": 2.2858545365294933, "grad_norm": 0.05183140188455582, "learning_rate": 1.6291223426754289e-06, "loss": 0.0008, "step": 139700 }, { "epoch": 2.2860181624805693, "grad_norm": 0.04997354373335838, "learning_rate": 1.6284193282869093e-06, "loss": 0.0014, "step": 139710 }, { "epoch": 2.2861817884316453, "grad_norm": 0.030869508162140846, "learning_rate": 1.6277164361100868e-06, "loss": 0.001, "step": 139720 }, { "epoch": 2.286345414382721, "grad_norm": 0.11118374764919281, "learning_rate": 1.6270136661704373e-06, "loss": 0.001, "step": 139730 }, { "epoch": 2.286509040333797, "grad_norm": 0.22236043214797974, "learning_rate": 1.6263110184934356e-06, "loss": 0.0009, "step": 139740 }, { "epoch": 2.286672666284873, "grad_norm": 0.030773678794503212, "learning_rate": 1.6256084931045524e-06, "loss": 0.0008, "step": 139750 }, { "epoch": 2.2868362922359484, "grad_norm": 0.0017438497161492705, "learning_rate": 1.6249060900292495e-06, "loss": 0.0012, "step": 139760 }, { "epoch": 2.2869999181870244, "grad_norm": 0.12379049509763718, "learning_rate": 1.6242038092929908e-06, "loss": 0.0008, "step": 139770 }, { "epoch": 2.2871635441381004, "grad_norm": 0.09350680559873581, "learning_rate": 1.6235016509212286e-06, "loss": 0.0006, "step": 139780 }, { "epoch": 2.287327170089176, "grad_norm": 0.11177900433540344, "learning_rate": 1.6227996149394182e-06, "loss": 0.0008, "step": 139790 }, { "epoch": 2.287490796040252, "grad_norm": 0.04311496764421463, "learning_rate": 1.6220977013730049e-06, "loss": 0.0008, "step": 139800 }, { "epoch": 2.287654421991328, "grad_norm": 0.09959407895803452, "learning_rate": 1.6213959102474298e-06, "loss": 0.0006, "step": 139810 }, { "epoch": 2.2878180479424035, "grad_norm": 0.03599425405263901, "learning_rate": 1.6206942415881343e-06, "loss": 0.0002, "step": 139820 }, { "epoch": 2.2879816738934795, "grad_norm": 0.1324683129787445, "learning_rate": 1.6199926954205491e-06, "loss": 0.0011, "step": 139830 }, { "epoch": 2.2881452998445555, "grad_norm": 0.05062973499298096, "learning_rate": 1.6192912717701066e-06, "loss": 0.0003, "step": 139840 }, { "epoch": 2.288308925795631, "grad_norm": 0.25937387347221375, "learning_rate": 1.6185899706622282e-06, "loss": 0.0029, "step": 139850 }, { "epoch": 2.288472551746707, "grad_norm": 0.0058353738859295845, "learning_rate": 1.617888792122338e-06, "loss": 0.0007, "step": 139860 }, { "epoch": 2.288636177697783, "grad_norm": 0.050435684621334076, "learning_rate": 1.6171877361758493e-06, "loss": 0.0004, "step": 139870 }, { "epoch": 2.2887998036488586, "grad_norm": 0.09273172914981842, "learning_rate": 1.6164868028481761e-06, "loss": 0.0017, "step": 139880 }, { "epoch": 2.2889634295999346, "grad_norm": 0.09553926438093185, "learning_rate": 1.615785992164724e-06, "loss": 0.0005, "step": 139890 }, { "epoch": 2.2891270555510106, "grad_norm": 0.038811590522527695, "learning_rate": 1.6150853041508945e-06, "loss": 0.0003, "step": 139900 }, { "epoch": 2.289290681502086, "grad_norm": 0.0106173912063241, "learning_rate": 1.6143847388320894e-06, "loss": 0.0013, "step": 139910 }, { "epoch": 2.289454307453162, "grad_norm": 0.07198160886764526, "learning_rate": 1.613684296233698e-06, "loss": 0.0005, "step": 139920 }, { "epoch": 2.289617933404238, "grad_norm": 0.05207657814025879, "learning_rate": 1.612983976381114e-06, "loss": 0.0006, "step": 139930 }, { "epoch": 2.2897815593553137, "grad_norm": 0.017597423866391182, "learning_rate": 1.6122837792997191e-06, "loss": 0.0008, "step": 139940 }, { "epoch": 2.2899451853063897, "grad_norm": 0.0697166845202446, "learning_rate": 1.6115837050148947e-06, "loss": 0.0011, "step": 139950 }, { "epoch": 2.2901088112574657, "grad_norm": 0.024002980440855026, "learning_rate": 1.610883753552019e-06, "loss": 0.0009, "step": 139960 }, { "epoch": 2.2902724372085412, "grad_norm": 0.06490520387887955, "learning_rate": 1.61018392493646e-06, "loss": 0.0007, "step": 139970 }, { "epoch": 2.2904360631596172, "grad_norm": 0.14310207962989807, "learning_rate": 1.6094842191935877e-06, "loss": 0.001, "step": 139980 }, { "epoch": 2.290599689110693, "grad_norm": 0.1240270584821701, "learning_rate": 1.608784636348764e-06, "loss": 0.001, "step": 139990 }, { "epoch": 2.290763315061769, "grad_norm": 0.1480078399181366, "learning_rate": 1.6080851764273465e-06, "loss": 0.0007, "step": 140000 }, { "epoch": 2.2909269410128448, "grad_norm": 0.19509311020374298, "learning_rate": 1.6073858394546875e-06, "loss": 0.0011, "step": 140010 }, { "epoch": 2.2910905669639203, "grad_norm": 0.0041755251586437225, "learning_rate": 1.6066866254561381e-06, "loss": 0.0012, "step": 140020 }, { "epoch": 2.2912541929149963, "grad_norm": 0.13380256295204163, "learning_rate": 1.6059875344570442e-06, "loss": 0.0007, "step": 140030 }, { "epoch": 2.2914178188660723, "grad_norm": 0.14776718616485596, "learning_rate": 1.6052885664827433e-06, "loss": 0.001, "step": 140040 }, { "epoch": 2.291581444817148, "grad_norm": 0.307634562253952, "learning_rate": 1.6045897215585748e-06, "loss": 0.001, "step": 140050 }, { "epoch": 2.291745070768224, "grad_norm": 0.12230852246284485, "learning_rate": 1.6038909997098662e-06, "loss": 0.0009, "step": 140060 }, { "epoch": 2.2919086967193, "grad_norm": 0.005413628648966551, "learning_rate": 1.6031924009619488e-06, "loss": 0.001, "step": 140070 }, { "epoch": 2.2920723226703754, "grad_norm": 0.026682093739509583, "learning_rate": 1.6024939253401407e-06, "loss": 0.0005, "step": 140080 }, { "epoch": 2.2922359486214514, "grad_norm": 0.009006984531879425, "learning_rate": 1.6017955728697638e-06, "loss": 0.0007, "step": 140090 }, { "epoch": 2.292399574572527, "grad_norm": 0.1327577829360962, "learning_rate": 1.6010973435761302e-06, "loss": 0.0009, "step": 140100 }, { "epoch": 2.292563200523603, "grad_norm": 0.04897262528538704, "learning_rate": 1.6003992374845467e-06, "loss": 0.0006, "step": 140110 }, { "epoch": 2.292726826474679, "grad_norm": 0.08505528420209885, "learning_rate": 1.5997012546203223e-06, "loss": 0.0006, "step": 140120 }, { "epoch": 2.2928904524257545, "grad_norm": 0.023017607629299164, "learning_rate": 1.5990033950087537e-06, "loss": 0.0009, "step": 140130 }, { "epoch": 2.2930540783768305, "grad_norm": 0.1936735212802887, "learning_rate": 1.5983056586751388e-06, "loss": 0.0008, "step": 140140 }, { "epoch": 2.2932177043279065, "grad_norm": 0.011501717381179333, "learning_rate": 1.5976080456447668e-06, "loss": 0.0008, "step": 140150 }, { "epoch": 2.293381330278982, "grad_norm": 0.04580685496330261, "learning_rate": 1.596910555942927e-06, "loss": 0.0006, "step": 140160 }, { "epoch": 2.293544956230058, "grad_norm": 0.09653496742248535, "learning_rate": 1.596213189594899e-06, "loss": 0.0007, "step": 140170 }, { "epoch": 2.293708582181134, "grad_norm": 0.043103598058223724, "learning_rate": 1.5955159466259623e-06, "loss": 0.001, "step": 140180 }, { "epoch": 2.2938722081322096, "grad_norm": 0.021309738978743553, "learning_rate": 1.5948188270613935e-06, "loss": 0.0007, "step": 140190 }, { "epoch": 2.2940358340832856, "grad_norm": 0.19352178275585175, "learning_rate": 1.5941218309264545e-06, "loss": 0.0008, "step": 140200 }, { "epoch": 2.2941994600343616, "grad_norm": 0.03913641721010208, "learning_rate": 1.5934249582464146e-06, "loss": 0.001, "step": 140210 }, { "epoch": 2.294363085985437, "grad_norm": 0.06472831219434738, "learning_rate": 1.5927282090465313e-06, "loss": 0.0011, "step": 140220 }, { "epoch": 2.294526711936513, "grad_norm": 0.12262832373380661, "learning_rate": 1.592031583352061e-06, "loss": 0.0007, "step": 140230 }, { "epoch": 2.294690337887589, "grad_norm": 0.08961786329746246, "learning_rate": 1.5913350811882567e-06, "loss": 0.0006, "step": 140240 }, { "epoch": 2.2948539638386647, "grad_norm": 0.013164161704480648, "learning_rate": 1.5906387025803616e-06, "loss": 0.0007, "step": 140250 }, { "epoch": 2.2950175897897407, "grad_norm": 0.0371859073638916, "learning_rate": 1.5899424475536213e-06, "loss": 0.0008, "step": 140260 }, { "epoch": 2.2951812157408167, "grad_norm": 0.10145356506109238, "learning_rate": 1.5892463161332695e-06, "loss": 0.0008, "step": 140270 }, { "epoch": 2.2953448416918922, "grad_norm": 0.026181021705269814, "learning_rate": 1.5885503083445426e-06, "loss": 0.0008, "step": 140280 }, { "epoch": 2.2955084676429682, "grad_norm": 0.08042646944522858, "learning_rate": 1.587854424212668e-06, "loss": 0.001, "step": 140290 }, { "epoch": 2.2956720935940442, "grad_norm": 0.19835972785949707, "learning_rate": 1.5871586637628683e-06, "loss": 0.0005, "step": 140300 }, { "epoch": 2.29583571954512, "grad_norm": 0.0745401605963707, "learning_rate": 1.5864630270203656e-06, "loss": 0.001, "step": 140310 }, { "epoch": 2.2959993454961958, "grad_norm": 0.014865288510918617, "learning_rate": 1.5857675140103728e-06, "loss": 0.0005, "step": 140320 }, { "epoch": 2.2961629714472718, "grad_norm": 0.10254581272602081, "learning_rate": 1.585072124758103e-06, "loss": 0.0011, "step": 140330 }, { "epoch": 2.2963265973983473, "grad_norm": 0.007817289792001247, "learning_rate": 1.58437685928876e-06, "loss": 0.0007, "step": 140340 }, { "epoch": 2.2964902233494233, "grad_norm": 0.12360544502735138, "learning_rate": 1.5836817176275477e-06, "loss": 0.0011, "step": 140350 }, { "epoch": 2.296653849300499, "grad_norm": 0.08883540332317352, "learning_rate": 1.5829866997996608e-06, "loss": 0.0009, "step": 140360 }, { "epoch": 2.296817475251575, "grad_norm": 0.01839541085064411, "learning_rate": 1.582291805830295e-06, "loss": 0.001, "step": 140370 }, { "epoch": 2.296981101202651, "grad_norm": 0.12035586684942245, "learning_rate": 1.5815970357446353e-06, "loss": 0.0013, "step": 140380 }, { "epoch": 2.2971447271537264, "grad_norm": 0.14982575178146362, "learning_rate": 1.5809023895678687e-06, "loss": 0.001, "step": 140390 }, { "epoch": 2.2973083531048024, "grad_norm": 0.05199159309267998, "learning_rate": 1.5802078673251736e-06, "loss": 0.0007, "step": 140400 }, { "epoch": 2.2974719790558784, "grad_norm": 0.089327372610569, "learning_rate": 1.5795134690417218e-06, "loss": 0.0009, "step": 140410 }, { "epoch": 2.297635605006954, "grad_norm": 0.08943206816911697, "learning_rate": 1.5788191947426874e-06, "loss": 0.001, "step": 140420 }, { "epoch": 2.29779923095803, "grad_norm": 0.15100742876529694, "learning_rate": 1.5781250444532331e-06, "loss": 0.0008, "step": 140430 }, { "epoch": 2.297962856909106, "grad_norm": 0.3235109746456146, "learning_rate": 1.5774310181985236e-06, "loss": 0.0014, "step": 140440 }, { "epoch": 2.2981264828601815, "grad_norm": 0.08636219054460526, "learning_rate": 1.576737116003712e-06, "loss": 0.0006, "step": 140450 }, { "epoch": 2.2982901088112575, "grad_norm": 0.1043621376156807, "learning_rate": 1.5760433378939522e-06, "loss": 0.001, "step": 140460 }, { "epoch": 2.2984537347623335, "grad_norm": 0.060790009796619415, "learning_rate": 1.5753496838943938e-06, "loss": 0.0006, "step": 140470 }, { "epoch": 2.298617360713409, "grad_norm": 0.03214503824710846, "learning_rate": 1.5746561540301763e-06, "loss": 0.0006, "step": 140480 }, { "epoch": 2.298780986664485, "grad_norm": 0.05269099026918411, "learning_rate": 1.5739627483264447e-06, "loss": 0.0007, "step": 140490 }, { "epoch": 2.2989446126155606, "grad_norm": 0.06363150477409363, "learning_rate": 1.5732694668083255e-06, "loss": 0.0006, "step": 140500 }, { "epoch": 2.2991082385666366, "grad_norm": 0.07094758003950119, "learning_rate": 1.5725763095009521e-06, "loss": 0.0007, "step": 140510 }, { "epoch": 2.2992718645177126, "grad_norm": 0.055907078087329865, "learning_rate": 1.5718832764294523e-06, "loss": 0.0006, "step": 140520 }, { "epoch": 2.299435490468788, "grad_norm": 0.003993616439402103, "learning_rate": 1.5711903676189422e-06, "loss": 0.0007, "step": 140530 }, { "epoch": 2.299599116419864, "grad_norm": 0.007719043642282486, "learning_rate": 1.5704975830945419e-06, "loss": 0.0015, "step": 140540 }, { "epoch": 2.29976274237094, "grad_norm": 0.1341666728258133, "learning_rate": 1.56980492288136e-06, "loss": 0.0011, "step": 140550 }, { "epoch": 2.2999263683220157, "grad_norm": 0.06888158619403839, "learning_rate": 1.5691123870045071e-06, "loss": 0.0006, "step": 140560 }, { "epoch": 2.3000899942730917, "grad_norm": 0.09794234484434128, "learning_rate": 1.5684199754890828e-06, "loss": 0.0008, "step": 140570 }, { "epoch": 2.3002536202241677, "grad_norm": 0.03282199427485466, "learning_rate": 1.5677276883601884e-06, "loss": 0.0014, "step": 140580 }, { "epoch": 2.3004172461752432, "grad_norm": 0.11884036660194397, "learning_rate": 1.567035525642916e-06, "loss": 0.0009, "step": 140590 }, { "epoch": 2.3005808721263192, "grad_norm": 0.09278003871440887, "learning_rate": 1.5663434873623535e-06, "loss": 0.001, "step": 140600 }, { "epoch": 2.3007444980773952, "grad_norm": 0.17077024281024933, "learning_rate": 1.565651573543588e-06, "loss": 0.0006, "step": 140610 }, { "epoch": 2.300908124028471, "grad_norm": 0.07796622812747955, "learning_rate": 1.5649597842116975e-06, "loss": 0.0011, "step": 140620 }, { "epoch": 2.3010717499795468, "grad_norm": 0.004719328600913286, "learning_rate": 1.5642681193917602e-06, "loss": 0.001, "step": 140630 }, { "epoch": 2.3012353759306228, "grad_norm": 0.002486445941030979, "learning_rate": 1.5635765791088448e-06, "loss": 0.0005, "step": 140640 }, { "epoch": 2.3013990018816983, "grad_norm": 0.023250866681337357, "learning_rate": 1.5628851633880209e-06, "loss": 0.0006, "step": 140650 }, { "epoch": 2.3015626278327743, "grad_norm": 0.007309789769351482, "learning_rate": 1.5621938722543473e-06, "loss": 0.0008, "step": 140660 }, { "epoch": 2.3017262537838503, "grad_norm": 0.04784340783953667, "learning_rate": 1.561502705732883e-06, "loss": 0.0014, "step": 140670 }, { "epoch": 2.301889879734926, "grad_norm": 0.025625161826610565, "learning_rate": 1.5608116638486832e-06, "loss": 0.0018, "step": 140680 }, { "epoch": 2.302053505686002, "grad_norm": 0.12948989868164062, "learning_rate": 1.5601207466267942e-06, "loss": 0.0012, "step": 140690 }, { "epoch": 2.302217131637078, "grad_norm": 0.03865404427051544, "learning_rate": 1.5594299540922614e-06, "loss": 0.0011, "step": 140700 }, { "epoch": 2.3023807575881534, "grad_norm": 0.023492984473705292, "learning_rate": 1.5587392862701211e-06, "loss": 0.0014, "step": 140710 }, { "epoch": 2.3025443835392294, "grad_norm": 0.08059046417474747, "learning_rate": 1.5580487431854113e-06, "loss": 0.0006, "step": 140720 }, { "epoch": 2.3027080094903054, "grad_norm": 0.038865942507982254, "learning_rate": 1.557358324863164e-06, "loss": 0.0014, "step": 140730 }, { "epoch": 2.302871635441381, "grad_norm": 0.0996570736169815, "learning_rate": 1.556668031328401e-06, "loss": 0.0008, "step": 140740 }, { "epoch": 2.303035261392457, "grad_norm": 0.08180870115756989, "learning_rate": 1.5559778626061484e-06, "loss": 0.0008, "step": 140750 }, { "epoch": 2.3031988873435325, "grad_norm": 0.013720731250941753, "learning_rate": 1.5552878187214182e-06, "loss": 0.0005, "step": 140760 }, { "epoch": 2.3033625132946085, "grad_norm": 0.007137408945709467, "learning_rate": 1.5545978996992278e-06, "loss": 0.0004, "step": 140770 }, { "epoch": 2.3035261392456845, "grad_norm": 0.08784053474664688, "learning_rate": 1.553908105564581e-06, "loss": 0.0014, "step": 140780 }, { "epoch": 2.30368976519676, "grad_norm": 0.03646000102162361, "learning_rate": 1.553218436342484e-06, "loss": 0.0007, "step": 140790 }, { "epoch": 2.303853391147836, "grad_norm": 0.09041323512792587, "learning_rate": 1.552528892057935e-06, "loss": 0.001, "step": 140800 }, { "epoch": 2.304017017098912, "grad_norm": 0.020745286718010902, "learning_rate": 1.5518394727359265e-06, "loss": 0.0017, "step": 140810 }, { "epoch": 2.3041806430499876, "grad_norm": 0.0675344169139862, "learning_rate": 1.551150178401451e-06, "loss": 0.0012, "step": 140820 }, { "epoch": 2.3043442690010636, "grad_norm": 0.07974598556756973, "learning_rate": 1.5504610090794908e-06, "loss": 0.0013, "step": 140830 }, { "epoch": 2.3045078949521396, "grad_norm": 0.0454944409430027, "learning_rate": 1.54977196479503e-06, "loss": 0.0007, "step": 140840 }, { "epoch": 2.304671520903215, "grad_norm": 0.03727050870656967, "learning_rate": 1.5490830455730416e-06, "loss": 0.0008, "step": 140850 }, { "epoch": 2.304835146854291, "grad_norm": 0.1320389360189438, "learning_rate": 1.5483942514384997e-06, "loss": 0.0006, "step": 140860 }, { "epoch": 2.3049987728053667, "grad_norm": 0.2437596619129181, "learning_rate": 1.5477055824163696e-06, "loss": 0.0007, "step": 140870 }, { "epoch": 2.3051623987564427, "grad_norm": 0.01806657575070858, "learning_rate": 1.5470170385316141e-06, "loss": 0.0003, "step": 140880 }, { "epoch": 2.3053260247075187, "grad_norm": 0.3036212921142578, "learning_rate": 1.5463286198091958e-06, "loss": 0.001, "step": 140890 }, { "epoch": 2.3054896506585942, "grad_norm": 0.10354020446538925, "learning_rate": 1.545640326274061e-06, "loss": 0.0008, "step": 140900 }, { "epoch": 2.3056532766096702, "grad_norm": 0.05205728858709335, "learning_rate": 1.5449521579511634e-06, "loss": 0.0007, "step": 140910 }, { "epoch": 2.3058169025607462, "grad_norm": 0.10246003419160843, "learning_rate": 1.5442641148654447e-06, "loss": 0.0008, "step": 140920 }, { "epoch": 2.305980528511822, "grad_norm": 0.01056346669793129, "learning_rate": 1.5435761970418473e-06, "loss": 0.0013, "step": 140930 }, { "epoch": 2.3061441544628978, "grad_norm": 0.04447948932647705, "learning_rate": 1.5428884045053044e-06, "loss": 0.0005, "step": 140940 }, { "epoch": 2.3063077804139738, "grad_norm": 0.19907037913799286, "learning_rate": 1.5422007372807479e-06, "loss": 0.0019, "step": 140950 }, { "epoch": 2.3064714063650493, "grad_norm": 0.17131435871124268, "learning_rate": 1.5415131953931051e-06, "loss": 0.0009, "step": 140960 }, { "epoch": 2.3066350323161253, "grad_norm": 0.04842172563076019, "learning_rate": 1.540825778867296e-06, "loss": 0.0012, "step": 140970 }, { "epoch": 2.3067986582672013, "grad_norm": 0.03325434774160385, "learning_rate": 1.5401384877282394e-06, "loss": 0.0002, "step": 140980 }, { "epoch": 2.306962284218277, "grad_norm": 0.014830000698566437, "learning_rate": 1.5394513220008473e-06, "loss": 0.0005, "step": 140990 }, { "epoch": 2.307125910169353, "grad_norm": 0.11610081791877747, "learning_rate": 1.5387642817100257e-06, "loss": 0.001, "step": 141000 }, { "epoch": 2.307289536120429, "grad_norm": 0.04810063913464546, "learning_rate": 1.538077366880682e-06, "loss": 0.0005, "step": 141010 }, { "epoch": 2.3074531620715044, "grad_norm": 0.08294409513473511, "learning_rate": 1.5373905775377112e-06, "loss": 0.0012, "step": 141020 }, { "epoch": 2.3076167880225804, "grad_norm": 0.06494452059268951, "learning_rate": 1.5367039137060119e-06, "loss": 0.0007, "step": 141030 }, { "epoch": 2.3077804139736564, "grad_norm": 0.02179872803390026, "learning_rate": 1.53601737541047e-06, "loss": 0.0005, "step": 141040 }, { "epoch": 2.307944039924732, "grad_norm": 0.06045236811041832, "learning_rate": 1.5353309626759745e-06, "loss": 0.0012, "step": 141050 }, { "epoch": 2.308107665875808, "grad_norm": 0.05607156828045845, "learning_rate": 1.534644675527403e-06, "loss": 0.0005, "step": 141060 }, { "epoch": 2.308271291826884, "grad_norm": 0.1847607046365738, "learning_rate": 1.5339585139896346e-06, "loss": 0.0012, "step": 141070 }, { "epoch": 2.3084349177779595, "grad_norm": 0.059711333364248276, "learning_rate": 1.5332724780875386e-06, "loss": 0.0005, "step": 141080 }, { "epoch": 2.3085985437290355, "grad_norm": 0.028678933158516884, "learning_rate": 1.532586567845984e-06, "loss": 0.0005, "step": 141090 }, { "epoch": 2.3087621696801115, "grad_norm": 0.3262660801410675, "learning_rate": 1.531900783289833e-06, "loss": 0.0006, "step": 141100 }, { "epoch": 2.308925795631187, "grad_norm": 0.025376588106155396, "learning_rate": 1.5312151244439417e-06, "loss": 0.0008, "step": 141110 }, { "epoch": 2.309089421582263, "grad_norm": 0.0805472657084465, "learning_rate": 1.530529591333167e-06, "loss": 0.0008, "step": 141120 }, { "epoch": 2.3092530475333386, "grad_norm": 0.08390944451093674, "learning_rate": 1.5298441839823541e-06, "loss": 0.0007, "step": 141130 }, { "epoch": 2.3094166734844146, "grad_norm": 0.03424423560500145, "learning_rate": 1.5291589024163505e-06, "loss": 0.0004, "step": 141140 }, { "epoch": 2.3095802994354906, "grad_norm": 0.08039483428001404, "learning_rate": 1.5284737466599936e-06, "loss": 0.0011, "step": 141150 }, { "epoch": 2.309743925386566, "grad_norm": 0.09530168771743774, "learning_rate": 1.5277887167381195e-06, "loss": 0.0011, "step": 141160 }, { "epoch": 2.309907551337642, "grad_norm": 0.13639923930168152, "learning_rate": 1.527103812675561e-06, "loss": 0.0009, "step": 141170 }, { "epoch": 2.310071177288718, "grad_norm": 0.011712112464010715, "learning_rate": 1.5264190344971407e-06, "loss": 0.0006, "step": 141180 }, { "epoch": 2.3102348032397937, "grad_norm": 0.05489076301455498, "learning_rate": 1.5257343822276848e-06, "loss": 0.0005, "step": 141190 }, { "epoch": 2.3103984291908697, "grad_norm": 0.06974972784519196, "learning_rate": 1.5250498558920046e-06, "loss": 0.0006, "step": 141200 }, { "epoch": 2.3105620551419457, "grad_norm": 0.03023819997906685, "learning_rate": 1.5243654555149167e-06, "loss": 0.0007, "step": 141210 }, { "epoch": 2.3107256810930212, "grad_norm": 0.0018115267157554626, "learning_rate": 1.523681181121226e-06, "loss": 0.0005, "step": 141220 }, { "epoch": 2.3108893070440972, "grad_norm": 0.010107466019690037, "learning_rate": 1.5229970327357374e-06, "loss": 0.0006, "step": 141230 }, { "epoch": 2.3110529329951732, "grad_norm": 0.1055271327495575, "learning_rate": 1.5223130103832513e-06, "loss": 0.0005, "step": 141240 }, { "epoch": 2.311216558946249, "grad_norm": 0.19064393639564514, "learning_rate": 1.521629114088558e-06, "loss": 0.0007, "step": 141250 }, { "epoch": 2.3113801848973248, "grad_norm": 0.02189820259809494, "learning_rate": 1.5209453438764516e-06, "loss": 0.0006, "step": 141260 }, { "epoch": 2.3115438108484003, "grad_norm": 0.056310828775167465, "learning_rate": 1.5202616997717128e-06, "loss": 0.0013, "step": 141270 }, { "epoch": 2.3117074367994763, "grad_norm": 0.02012699283659458, "learning_rate": 1.5195781817991256e-06, "loss": 0.0007, "step": 141280 }, { "epoch": 2.3118710627505523, "grad_norm": 0.015752756968140602, "learning_rate": 1.518894789983465e-06, "loss": 0.0012, "step": 141290 }, { "epoch": 2.312034688701628, "grad_norm": 0.0858929455280304, "learning_rate": 1.5182115243494994e-06, "loss": 0.0013, "step": 141300 }, { "epoch": 2.312198314652704, "grad_norm": 0.046227049082517624, "learning_rate": 1.5175283849219995e-06, "loss": 0.0007, "step": 141310 }, { "epoch": 2.31236194060378, "grad_norm": 0.12042857706546783, "learning_rate": 1.5168453717257242e-06, "loss": 0.0005, "step": 141320 }, { "epoch": 2.3125255665548554, "grad_norm": 0.005126805044710636, "learning_rate": 1.516162484785434e-06, "loss": 0.0017, "step": 141330 }, { "epoch": 2.3126891925059314, "grad_norm": 0.005924693308770657, "learning_rate": 1.5154797241258795e-06, "loss": 0.0019, "step": 141340 }, { "epoch": 2.3128528184570074, "grad_norm": 0.014651631005108356, "learning_rate": 1.5147970897718111e-06, "loss": 0.0011, "step": 141350 }, { "epoch": 2.313016444408083, "grad_norm": 0.012786723673343658, "learning_rate": 1.5141145817479708e-06, "loss": 0.0006, "step": 141360 }, { "epoch": 2.313180070359159, "grad_norm": 0.12540513277053833, "learning_rate": 1.5134322000790996e-06, "loss": 0.0009, "step": 141370 }, { "epoch": 2.313343696310235, "grad_norm": 0.22066181898117065, "learning_rate": 1.5127499447899307e-06, "loss": 0.0021, "step": 141380 }, { "epoch": 2.3135073222613105, "grad_norm": 0.22398459911346436, "learning_rate": 1.512067815905196e-06, "loss": 0.0009, "step": 141390 }, { "epoch": 2.3136709482123865, "grad_norm": 0.1565244346857071, "learning_rate": 1.51138581344962e-06, "loss": 0.0008, "step": 141400 }, { "epoch": 2.3138345741634625, "grad_norm": 0.36510229110717773, "learning_rate": 1.510703937447922e-06, "loss": 0.0012, "step": 141410 }, { "epoch": 2.313998200114538, "grad_norm": 0.006878407672047615, "learning_rate": 1.5100221879248218e-06, "loss": 0.0014, "step": 141420 }, { "epoch": 2.314161826065614, "grad_norm": 0.029306985437870026, "learning_rate": 1.5093405649050275e-06, "loss": 0.0006, "step": 141430 }, { "epoch": 2.31432545201669, "grad_norm": 0.04922221228480339, "learning_rate": 1.508659068413248e-06, "loss": 0.0008, "step": 141440 }, { "epoch": 2.3144890779677656, "grad_norm": 0.12085670977830887, "learning_rate": 1.5079776984741879e-06, "loss": 0.0009, "step": 141450 }, { "epoch": 2.3146527039188416, "grad_norm": 0.12174364179372787, "learning_rate": 1.5072964551125418e-06, "loss": 0.0006, "step": 141460 }, { "epoch": 2.3148163298699176, "grad_norm": 0.05523371696472168, "learning_rate": 1.5066153383530063e-06, "loss": 0.0006, "step": 141470 }, { "epoch": 2.314979955820993, "grad_norm": 0.07125862687826157, "learning_rate": 1.505934348220267e-06, "loss": 0.0011, "step": 141480 }, { "epoch": 2.315143581772069, "grad_norm": 0.14378949999809265, "learning_rate": 1.5052534847390126e-06, "loss": 0.0011, "step": 141490 }, { "epoch": 2.315307207723145, "grad_norm": 0.055636826902627945, "learning_rate": 1.5045727479339172e-06, "loss": 0.0006, "step": 141500 }, { "epoch": 2.3154708336742207, "grad_norm": 0.024866728112101555, "learning_rate": 1.503892137829659e-06, "loss": 0.0006, "step": 141510 }, { "epoch": 2.3156344596252967, "grad_norm": 0.058483924716711044, "learning_rate": 1.503211654450909e-06, "loss": 0.0012, "step": 141520 }, { "epoch": 2.3157980855763722, "grad_norm": 0.06118519976735115, "learning_rate": 1.502531297822331e-06, "loss": 0.0012, "step": 141530 }, { "epoch": 2.3159617115274482, "grad_norm": 0.10276416689157486, "learning_rate": 1.5018510679685894e-06, "loss": 0.0005, "step": 141540 }, { "epoch": 2.3161253374785242, "grad_norm": 0.009653984569013119, "learning_rate": 1.501170964914337e-06, "loss": 0.0009, "step": 141550 }, { "epoch": 2.3162889634296, "grad_norm": 0.008758802898228168, "learning_rate": 1.5004909886842294e-06, "loss": 0.0005, "step": 141560 }, { "epoch": 2.3164525893806758, "grad_norm": 0.04171372577548027, "learning_rate": 1.4998111393029114e-06, "loss": 0.0006, "step": 141570 }, { "epoch": 2.3166162153317518, "grad_norm": 0.051459841430187225, "learning_rate": 1.4991314167950283e-06, "loss": 0.0007, "step": 141580 }, { "epoch": 2.3167798412828273, "grad_norm": 0.0077985436655581, "learning_rate": 1.4984518211852162e-06, "loss": 0.0007, "step": 141590 }, { "epoch": 2.3169434672339033, "grad_norm": 0.0700778141617775, "learning_rate": 1.4977723524981108e-06, "loss": 0.0007, "step": 141600 }, { "epoch": 2.3171070931849793, "grad_norm": 0.08564010262489319, "learning_rate": 1.4970930107583408e-06, "loss": 0.0032, "step": 141610 }, { "epoch": 2.317270719136055, "grad_norm": 0.027825219556689262, "learning_rate": 1.4964137959905283e-06, "loss": 0.0009, "step": 141620 }, { "epoch": 2.317434345087131, "grad_norm": 0.01738942787051201, "learning_rate": 1.4957347082192962e-06, "loss": 0.0006, "step": 141630 }, { "epoch": 2.3175979710382064, "grad_norm": 0.05528319254517555, "learning_rate": 1.495055747469258e-06, "loss": 0.0006, "step": 141640 }, { "epoch": 2.3177615969892824, "grad_norm": 0.06310944259166718, "learning_rate": 1.494376913765026e-06, "loss": 0.0006, "step": 141650 }, { "epoch": 2.3179252229403584, "grad_norm": 0.08646228909492493, "learning_rate": 1.4936982071312034e-06, "loss": 0.0005, "step": 141660 }, { "epoch": 2.318088848891434, "grad_norm": 0.11556138843297958, "learning_rate": 1.4930196275923942e-06, "loss": 0.0014, "step": 141670 }, { "epoch": 2.31825247484251, "grad_norm": 0.04201691970229149, "learning_rate": 1.4923411751731965e-06, "loss": 0.0011, "step": 141680 }, { "epoch": 2.318416100793586, "grad_norm": 0.017121024429798126, "learning_rate": 1.4916628498981983e-06, "loss": 0.0007, "step": 141690 }, { "epoch": 2.3185797267446615, "grad_norm": 0.017644086852669716, "learning_rate": 1.4909846517919935e-06, "loss": 0.0007, "step": 141700 }, { "epoch": 2.3187433526957375, "grad_norm": 0.15564754605293274, "learning_rate": 1.4903065808791585e-06, "loss": 0.0011, "step": 141710 }, { "epoch": 2.3189069786468135, "grad_norm": 0.014784197323024273, "learning_rate": 1.4896286371842738e-06, "loss": 0.0007, "step": 141720 }, { "epoch": 2.319070604597889, "grad_norm": 0.08888337016105652, "learning_rate": 1.4889508207319165e-06, "loss": 0.0006, "step": 141730 }, { "epoch": 2.319234230548965, "grad_norm": 0.024108178913593292, "learning_rate": 1.488273131546651e-06, "loss": 0.0009, "step": 141740 }, { "epoch": 2.319397856500041, "grad_norm": 0.041571635752916336, "learning_rate": 1.4875955696530464e-06, "loss": 0.0006, "step": 141750 }, { "epoch": 2.3195614824511166, "grad_norm": 0.11115611344575882, "learning_rate": 1.4869181350756584e-06, "loss": 0.001, "step": 141760 }, { "epoch": 2.3197251084021926, "grad_norm": 0.06924387067556381, "learning_rate": 1.4862408278390467e-06, "loss": 0.0006, "step": 141770 }, { "epoch": 2.3198887343532686, "grad_norm": 0.06451443582773209, "learning_rate": 1.4855636479677582e-06, "loss": 0.0013, "step": 141780 }, { "epoch": 2.320052360304344, "grad_norm": 0.07121411710977554, "learning_rate": 1.4848865954863423e-06, "loss": 0.0008, "step": 141790 }, { "epoch": 2.32021598625542, "grad_norm": 0.049608126282691956, "learning_rate": 1.4842096704193387e-06, "loss": 0.0009, "step": 141800 }, { "epoch": 2.320379612206496, "grad_norm": 0.03642949089407921, "learning_rate": 1.4835328727912834e-06, "loss": 0.0006, "step": 141810 }, { "epoch": 2.3205432381575717, "grad_norm": 0.006874953396618366, "learning_rate": 1.4828562026267112e-06, "loss": 0.0006, "step": 141820 }, { "epoch": 2.3207068641086477, "grad_norm": 0.11241057515144348, "learning_rate": 1.4821796599501465e-06, "loss": 0.0008, "step": 141830 }, { "epoch": 2.3208704900597237, "grad_norm": 0.04891185462474823, "learning_rate": 1.481503244786116e-06, "loss": 0.0009, "step": 141840 }, { "epoch": 2.3210341160107992, "grad_norm": 0.04376582056283951, "learning_rate": 1.4808269571591354e-06, "loss": 0.0012, "step": 141850 }, { "epoch": 2.3211977419618752, "grad_norm": 0.006816682871431112, "learning_rate": 1.4801507970937208e-06, "loss": 0.0007, "step": 141860 }, { "epoch": 2.3213613679129512, "grad_norm": 0.029662000015378, "learning_rate": 1.4794747646143781e-06, "loss": 0.0007, "step": 141870 }, { "epoch": 2.3215249938640268, "grad_norm": 0.03641365095973015, "learning_rate": 1.4787988597456142e-06, "loss": 0.0011, "step": 141880 }, { "epoch": 2.3216886198151028, "grad_norm": 0.026273412629961967, "learning_rate": 1.4781230825119304e-06, "loss": 0.0009, "step": 141890 }, { "epoch": 2.3218522457661783, "grad_norm": 0.017448076978325844, "learning_rate": 1.4774474329378196e-06, "loss": 0.0006, "step": 141900 }, { "epoch": 2.3220158717172543, "grad_norm": 0.024807600304484367, "learning_rate": 1.4767719110477736e-06, "loss": 0.0011, "step": 141910 }, { "epoch": 2.3221794976683303, "grad_norm": 0.11860799789428711, "learning_rate": 1.476096516866276e-06, "loss": 0.0015, "step": 141920 }, { "epoch": 2.322343123619406, "grad_norm": 0.2275388538837433, "learning_rate": 1.4754212504178105e-06, "loss": 0.0008, "step": 141930 }, { "epoch": 2.322506749570482, "grad_norm": 0.022442590445280075, "learning_rate": 1.4747461117268552e-06, "loss": 0.0006, "step": 141940 }, { "epoch": 2.322670375521558, "grad_norm": 0.15710876882076263, "learning_rate": 1.4740711008178793e-06, "loss": 0.0008, "step": 141950 }, { "epoch": 2.3228340014726334, "grad_norm": 0.098350889980793, "learning_rate": 1.4733962177153527e-06, "loss": 0.0009, "step": 141960 }, { "epoch": 2.3229976274237094, "grad_norm": 0.007286694832146168, "learning_rate": 1.4727214624437358e-06, "loss": 0.0003, "step": 141970 }, { "epoch": 2.3231612533747854, "grad_norm": 0.2774623930454254, "learning_rate": 1.4720468350274896e-06, "loss": 0.0008, "step": 141980 }, { "epoch": 2.323324879325861, "grad_norm": 0.04916456714272499, "learning_rate": 1.4713723354910654e-06, "loss": 0.0005, "step": 141990 }, { "epoch": 2.323488505276937, "grad_norm": 0.10238541662693024, "learning_rate": 1.4706979638589148e-06, "loss": 0.0008, "step": 142000 }, { "epoch": 2.323652131228013, "grad_norm": 0.012358625419437885, "learning_rate": 1.47002372015548e-06, "loss": 0.0005, "step": 142010 }, { "epoch": 2.3238157571790885, "grad_norm": 0.11864018440246582, "learning_rate": 1.4693496044052002e-06, "loss": 0.0009, "step": 142020 }, { "epoch": 2.3239793831301645, "grad_norm": 0.3684505224227905, "learning_rate": 1.4686756166325128e-06, "loss": 0.0011, "step": 142030 }, { "epoch": 2.32414300908124, "grad_norm": 0.032025329768657684, "learning_rate": 1.4680017568618454e-06, "loss": 0.0009, "step": 142040 }, { "epoch": 2.324306635032316, "grad_norm": 0.025022050365805626, "learning_rate": 1.4673280251176269e-06, "loss": 0.0006, "step": 142050 }, { "epoch": 2.324470260983392, "grad_norm": 0.04085569456219673, "learning_rate": 1.4666544214242756e-06, "loss": 0.0005, "step": 142060 }, { "epoch": 2.3246338869344676, "grad_norm": 0.04850098118185997, "learning_rate": 1.4659809458062113e-06, "loss": 0.0011, "step": 142070 }, { "epoch": 2.3247975128855436, "grad_norm": 0.1168837621808052, "learning_rate": 1.4653075982878417e-06, "loss": 0.0009, "step": 142080 }, { "epoch": 2.3249611388366196, "grad_norm": 0.04493911191821098, "learning_rate": 1.4646343788935762e-06, "loss": 0.0011, "step": 142090 }, { "epoch": 2.325124764787695, "grad_norm": 0.06869116425514221, "learning_rate": 1.463961287647821e-06, "loss": 0.0006, "step": 142100 }, { "epoch": 2.325288390738771, "grad_norm": 0.06428693234920502, "learning_rate": 1.4632883245749675e-06, "loss": 0.0013, "step": 142110 }, { "epoch": 2.325452016689847, "grad_norm": 0.08442873507738113, "learning_rate": 1.4626154896994133e-06, "loss": 0.0007, "step": 142120 }, { "epoch": 2.3256156426409227, "grad_norm": 0.002236641477793455, "learning_rate": 1.4619427830455441e-06, "loss": 0.0003, "step": 142130 }, { "epoch": 2.3257792685919987, "grad_norm": 0.0899309292435646, "learning_rate": 1.461270204637747e-06, "loss": 0.0009, "step": 142140 }, { "epoch": 2.3259428945430747, "grad_norm": 0.12744580209255219, "learning_rate": 1.460597754500398e-06, "loss": 0.0005, "step": 142150 }, { "epoch": 2.3261065204941502, "grad_norm": 0.0532243438065052, "learning_rate": 1.4599254326578743e-06, "loss": 0.0008, "step": 142160 }, { "epoch": 2.3262701464452262, "grad_norm": 0.06452793627977371, "learning_rate": 1.4592532391345466e-06, "loss": 0.0006, "step": 142170 }, { "epoch": 2.3264337723963022, "grad_norm": 0.05454513058066368, "learning_rate": 1.4585811739547773e-06, "loss": 0.0008, "step": 142180 }, { "epoch": 2.3265973983473778, "grad_norm": 0.03910773620009422, "learning_rate": 1.4579092371429305e-06, "loss": 0.0004, "step": 142190 }, { "epoch": 2.3267610242984538, "grad_norm": 0.04444894567131996, "learning_rate": 1.4572374287233609e-06, "loss": 0.0006, "step": 142200 }, { "epoch": 2.3269246502495298, "grad_norm": 0.03417925536632538, "learning_rate": 1.4565657487204183e-06, "loss": 0.0006, "step": 142210 }, { "epoch": 2.3270882762006053, "grad_norm": 0.24629062414169312, "learning_rate": 1.4558941971584516e-06, "loss": 0.0008, "step": 142220 }, { "epoch": 2.3272519021516813, "grad_norm": 0.11486601084470749, "learning_rate": 1.4552227740618013e-06, "loss": 0.0008, "step": 142230 }, { "epoch": 2.3274155281027573, "grad_norm": 0.12311146408319473, "learning_rate": 1.4545514794548076e-06, "loss": 0.0007, "step": 142240 }, { "epoch": 2.327579154053833, "grad_norm": 0.06289301067590714, "learning_rate": 1.4538803133617996e-06, "loss": 0.0007, "step": 142250 }, { "epoch": 2.327742780004909, "grad_norm": 0.1327168047428131, "learning_rate": 1.4532092758071087e-06, "loss": 0.001, "step": 142260 }, { "epoch": 2.327906405955985, "grad_norm": 0.11817660927772522, "learning_rate": 1.452538366815056e-06, "loss": 0.0011, "step": 142270 }, { "epoch": 2.3280700319070604, "grad_norm": 0.08783981949090958, "learning_rate": 1.4518675864099624e-06, "loss": 0.0004, "step": 142280 }, { "epoch": 2.3282336578581364, "grad_norm": 0.07068328559398651, "learning_rate": 1.4511969346161397e-06, "loss": 0.0006, "step": 142290 }, { "epoch": 2.328397283809212, "grad_norm": 0.03534681722521782, "learning_rate": 1.450526411457901e-06, "loss": 0.0008, "step": 142300 }, { "epoch": 2.328560909760288, "grad_norm": 0.19301646947860718, "learning_rate": 1.4498560169595483e-06, "loss": 0.0008, "step": 142310 }, { "epoch": 2.328724535711364, "grad_norm": 0.11231374740600586, "learning_rate": 1.449185751145381e-06, "loss": 0.0008, "step": 142320 }, { "epoch": 2.3288881616624395, "grad_norm": 0.002907052868977189, "learning_rate": 1.4485156140396978e-06, "loss": 0.0005, "step": 142330 }, { "epoch": 2.3290517876135155, "grad_norm": 0.037952326238155365, "learning_rate": 1.4478456056667861e-06, "loss": 0.0011, "step": 142340 }, { "epoch": 2.3292154135645915, "grad_norm": 0.07551920413970947, "learning_rate": 1.447175726050935e-06, "loss": 0.0007, "step": 142350 }, { "epoch": 2.329379039515667, "grad_norm": 0.032292623072862625, "learning_rate": 1.446505975216424e-06, "loss": 0.0009, "step": 142360 }, { "epoch": 2.329542665466743, "grad_norm": 0.05330345779657364, "learning_rate": 1.4458363531875302e-06, "loss": 0.0009, "step": 142370 }, { "epoch": 2.329706291417819, "grad_norm": 0.06061384454369545, "learning_rate": 1.4451668599885282e-06, "loss": 0.0005, "step": 142380 }, { "epoch": 2.3298699173688946, "grad_norm": 0.011113053187727928, "learning_rate": 1.4444974956436824e-06, "loss": 0.0007, "step": 142390 }, { "epoch": 2.3300335433199706, "grad_norm": 0.18374551832675934, "learning_rate": 1.4438282601772597e-06, "loss": 0.0006, "step": 142400 }, { "epoch": 2.330197169271046, "grad_norm": 0.07437953352928162, "learning_rate": 1.4431591536135125e-06, "loss": 0.0012, "step": 142410 }, { "epoch": 2.330360795222122, "grad_norm": 0.09924048185348511, "learning_rate": 1.442490175976699e-06, "loss": 0.0012, "step": 142420 }, { "epoch": 2.330524421173198, "grad_norm": 0.12255416065454483, "learning_rate": 1.4418213272910647e-06, "loss": 0.0013, "step": 142430 }, { "epoch": 2.3306880471242737, "grad_norm": 0.021334750577807426, "learning_rate": 1.4411526075808558e-06, "loss": 0.0004, "step": 142440 }, { "epoch": 2.3308516730753497, "grad_norm": 0.06889131665229797, "learning_rate": 1.4404840168703122e-06, "loss": 0.0004, "step": 142450 }, { "epoch": 2.3310152990264257, "grad_norm": 0.05617127567529678, "learning_rate": 1.4398155551836668e-06, "loss": 0.0009, "step": 142460 }, { "epoch": 2.3311789249775012, "grad_norm": 0.0668850690126419, "learning_rate": 1.4391472225451524e-06, "loss": 0.0011, "step": 142470 }, { "epoch": 2.3313425509285772, "grad_norm": 0.24449512362480164, "learning_rate": 1.4384790189789916e-06, "loss": 0.0007, "step": 142480 }, { "epoch": 2.3315061768796532, "grad_norm": 0.0331275649368763, "learning_rate": 1.4378109445094079e-06, "loss": 0.0005, "step": 142490 }, { "epoch": 2.3316698028307288, "grad_norm": 0.14289069175720215, "learning_rate": 1.4371429991606156e-06, "loss": 0.0013, "step": 142500 }, { "epoch": 2.3318334287818048, "grad_norm": 0.046746205538511276, "learning_rate": 1.4364751829568253e-06, "loss": 0.0008, "step": 142510 }, { "epoch": 2.3319970547328808, "grad_norm": 0.009582074359059334, "learning_rate": 1.4358074959222461e-06, "loss": 0.0007, "step": 142520 }, { "epoch": 2.3321606806839563, "grad_norm": 0.10607443749904633, "learning_rate": 1.4351399380810776e-06, "loss": 0.0006, "step": 142530 }, { "epoch": 2.3323243066350323, "grad_norm": 0.051514916121959686, "learning_rate": 1.43447250945752e-06, "loss": 0.0007, "step": 142540 }, { "epoch": 2.3324879325861083, "grad_norm": 0.3152959644794464, "learning_rate": 1.4338052100757626e-06, "loss": 0.0007, "step": 142550 }, { "epoch": 2.332651558537184, "grad_norm": 0.16308702528476715, "learning_rate": 1.433138039959997e-06, "loss": 0.0013, "step": 142560 }, { "epoch": 2.33281518448826, "grad_norm": 0.0715632513165474, "learning_rate": 1.4324709991344032e-06, "loss": 0.001, "step": 142570 }, { "epoch": 2.332978810439336, "grad_norm": 0.038515858352184296, "learning_rate": 1.431804087623163e-06, "loss": 0.0008, "step": 142580 }, { "epoch": 2.3331424363904114, "grad_norm": 0.07718722522258759, "learning_rate": 1.4311373054504469e-06, "loss": 0.0015, "step": 142590 }, { "epoch": 2.3333060623414874, "grad_norm": 0.05153379589319229, "learning_rate": 1.4304706526404278e-06, "loss": 0.0005, "step": 142600 }, { "epoch": 2.3334696882925634, "grad_norm": 0.018288012593984604, "learning_rate": 1.4298041292172683e-06, "loss": 0.0005, "step": 142610 }, { "epoch": 2.333633314243639, "grad_norm": 0.03371068462729454, "learning_rate": 1.4291377352051266e-06, "loss": 0.0007, "step": 142620 }, { "epoch": 2.333796940194715, "grad_norm": 0.05810781940817833, "learning_rate": 1.4284714706281611e-06, "loss": 0.0005, "step": 142630 }, { "epoch": 2.333960566145791, "grad_norm": 0.018913494423031807, "learning_rate": 1.4278053355105198e-06, "loss": 0.0011, "step": 142640 }, { "epoch": 2.3341241920968665, "grad_norm": 0.01816447637975216, "learning_rate": 1.427139329876349e-06, "loss": 0.0009, "step": 142650 }, { "epoch": 2.3342878180479425, "grad_norm": 0.08949961513280869, "learning_rate": 1.426473453749792e-06, "loss": 0.0005, "step": 142660 }, { "epoch": 2.3344514439990185, "grad_norm": 0.058733511716127396, "learning_rate": 1.4258077071549825e-06, "loss": 0.0014, "step": 142670 }, { "epoch": 2.334615069950094, "grad_norm": 0.02654833346605301, "learning_rate": 1.425142090116054e-06, "loss": 0.0006, "step": 142680 }, { "epoch": 2.33477869590117, "grad_norm": 0.18389055132865906, "learning_rate": 1.424476602657132e-06, "loss": 0.001, "step": 142690 }, { "epoch": 2.3349423218522456, "grad_norm": 0.02986842952668667, "learning_rate": 1.4238112448023422e-06, "loss": 0.0009, "step": 142700 }, { "epoch": 2.3351059478033216, "grad_norm": 0.09791618585586548, "learning_rate": 1.4231460165757972e-06, "loss": 0.0008, "step": 142710 }, { "epoch": 2.3352695737543976, "grad_norm": 0.012414880096912384, "learning_rate": 1.4224809180016114e-06, "loss": 0.0006, "step": 142720 }, { "epoch": 2.335433199705473, "grad_norm": 0.007660441100597382, "learning_rate": 1.4218159491038962e-06, "loss": 0.0011, "step": 142730 }, { "epoch": 2.335596825656549, "grad_norm": 0.024664273485541344, "learning_rate": 1.421151109906751e-06, "loss": 0.0003, "step": 142740 }, { "epoch": 2.335760451607625, "grad_norm": 0.1829763650894165, "learning_rate": 1.4204864004342782e-06, "loss": 0.0005, "step": 142750 }, { "epoch": 2.3359240775587007, "grad_norm": 0.03625491261482239, "learning_rate": 1.419821820710569e-06, "loss": 0.0012, "step": 142760 }, { "epoch": 2.3360877035097767, "grad_norm": 0.13838981091976166, "learning_rate": 1.4191573707597155e-06, "loss": 0.0008, "step": 142770 }, { "epoch": 2.3362513294608527, "grad_norm": 0.04847600683569908, "learning_rate": 1.4184930506057993e-06, "loss": 0.001, "step": 142780 }, { "epoch": 2.3364149554119282, "grad_norm": 0.14510351419448853, "learning_rate": 1.4178288602729035e-06, "loss": 0.0028, "step": 142790 }, { "epoch": 2.3365785813630042, "grad_norm": 0.04510261490941048, "learning_rate": 1.4171647997851024e-06, "loss": 0.0006, "step": 142800 }, { "epoch": 2.3367422073140798, "grad_norm": 0.04008515551686287, "learning_rate": 1.4165008691664644e-06, "loss": 0.0003, "step": 142810 }, { "epoch": 2.3369058332651558, "grad_norm": 0.06763363629579544, "learning_rate": 1.4158370684410588e-06, "loss": 0.0008, "step": 142820 }, { "epoch": 2.3370694592162318, "grad_norm": 0.09663360565900803, "learning_rate": 1.4151733976329434e-06, "loss": 0.0003, "step": 142830 }, { "epoch": 2.3372330851673073, "grad_norm": 0.06752414256334305, "learning_rate": 1.414509856766178e-06, "loss": 0.0004, "step": 142840 }, { "epoch": 2.3373967111183833, "grad_norm": 0.08504626154899597, "learning_rate": 1.4138464458648115e-06, "loss": 0.0017, "step": 142850 }, { "epoch": 2.3375603370694593, "grad_norm": 0.03862268850207329, "learning_rate": 1.4131831649528938e-06, "loss": 0.0006, "step": 142860 }, { "epoch": 2.337723963020535, "grad_norm": 0.11114795506000519, "learning_rate": 1.4125200140544638e-06, "loss": 0.0025, "step": 142870 }, { "epoch": 2.337887588971611, "grad_norm": 0.021131668239831924, "learning_rate": 1.4118569931935612e-06, "loss": 0.0012, "step": 142880 }, { "epoch": 2.338051214922687, "grad_norm": 0.14534032344818115, "learning_rate": 1.4111941023942199e-06, "loss": 0.0012, "step": 142890 }, { "epoch": 2.3382148408737624, "grad_norm": 0.02748599648475647, "learning_rate": 1.4105313416804677e-06, "loss": 0.0005, "step": 142900 }, { "epoch": 2.3383784668248384, "grad_norm": 0.16241908073425293, "learning_rate": 1.4098687110763265e-06, "loss": 0.0006, "step": 142910 }, { "epoch": 2.3385420927759144, "grad_norm": 0.05146666616201401, "learning_rate": 1.4092062106058152e-06, "loss": 0.0008, "step": 142920 }, { "epoch": 2.33870571872699, "grad_norm": 0.06983890384435654, "learning_rate": 1.408543840292948e-06, "loss": 0.0009, "step": 142930 }, { "epoch": 2.338869344678066, "grad_norm": 0.05474630743265152, "learning_rate": 1.4078816001617363e-06, "loss": 0.0007, "step": 142940 }, { "epoch": 2.339032970629142, "grad_norm": 0.03730502352118492, "learning_rate": 1.4072194902361814e-06, "loss": 0.0025, "step": 142950 }, { "epoch": 2.3391965965802175, "grad_norm": 0.033826302736997604, "learning_rate": 1.4065575105402868e-06, "loss": 0.0008, "step": 142960 }, { "epoch": 2.3393602225312935, "grad_norm": 0.1366487741470337, "learning_rate": 1.405895661098044e-06, "loss": 0.002, "step": 142970 }, { "epoch": 2.3395238484823695, "grad_norm": 0.050285596400499344, "learning_rate": 1.4052339419334471e-06, "loss": 0.0005, "step": 142980 }, { "epoch": 2.339687474433445, "grad_norm": 0.02242635004222393, "learning_rate": 1.4045723530704785e-06, "loss": 0.0006, "step": 142990 }, { "epoch": 2.339851100384521, "grad_norm": 0.04862105846405029, "learning_rate": 1.4039108945331225e-06, "loss": 0.0008, "step": 143000 }, { "epoch": 2.340014726335597, "grad_norm": 0.006551207974553108, "learning_rate": 1.4032495663453533e-06, "loss": 0.0009, "step": 143010 }, { "epoch": 2.3401783522866726, "grad_norm": 0.10910077393054962, "learning_rate": 1.402588368531141e-06, "loss": 0.0009, "step": 143020 }, { "epoch": 2.3403419782377486, "grad_norm": 0.002840406494215131, "learning_rate": 1.4019273011144557e-06, "loss": 0.0007, "step": 143030 }, { "epoch": 2.3405056041888246, "grad_norm": 0.11268679052591324, "learning_rate": 1.401266364119257e-06, "loss": 0.0007, "step": 143040 }, { "epoch": 2.3406692301399, "grad_norm": 0.12169431149959564, "learning_rate": 1.4006055575695044e-06, "loss": 0.0009, "step": 143050 }, { "epoch": 2.340832856090976, "grad_norm": 0.028649700805544853, "learning_rate": 1.3999448814891482e-06, "loss": 0.0007, "step": 143060 }, { "epoch": 2.3409964820420517, "grad_norm": 0.08962417393922806, "learning_rate": 1.3992843359021385e-06, "loss": 0.0009, "step": 143070 }, { "epoch": 2.3411601079931277, "grad_norm": 0.03773658722639084, "learning_rate": 1.3986239208324165e-06, "loss": 0.0013, "step": 143080 }, { "epoch": 2.3413237339442037, "grad_norm": 0.02422274462878704, "learning_rate": 1.3979636363039218e-06, "loss": 0.0007, "step": 143090 }, { "epoch": 2.3414873598952792, "grad_norm": 0.05366696044802666, "learning_rate": 1.3973034823405908e-06, "loss": 0.0007, "step": 143100 }, { "epoch": 2.3416509858463552, "grad_norm": 0.08595477044582367, "learning_rate": 1.3966434589663464e-06, "loss": 0.0008, "step": 143110 }, { "epoch": 2.3418146117974312, "grad_norm": 0.06815607100725174, "learning_rate": 1.395983566205118e-06, "loss": 0.0011, "step": 143120 }, { "epoch": 2.3419782377485068, "grad_norm": 0.03770340606570244, "learning_rate": 1.395323804080822e-06, "loss": 0.0007, "step": 143130 }, { "epoch": 2.3421418636995828, "grad_norm": 0.20763979852199554, "learning_rate": 1.3946641726173744e-06, "loss": 0.0014, "step": 143140 }, { "epoch": 2.3423054896506588, "grad_norm": 0.061255186796188354, "learning_rate": 1.3940046718386863e-06, "loss": 0.0005, "step": 143150 }, { "epoch": 2.3424691156017343, "grad_norm": 0.0543268658220768, "learning_rate": 1.393345301768661e-06, "loss": 0.0007, "step": 143160 }, { "epoch": 2.3426327415528103, "grad_norm": 0.08834155648946762, "learning_rate": 1.3926860624312017e-06, "loss": 0.0005, "step": 143170 }, { "epoch": 2.342796367503886, "grad_norm": 0.06812989711761475, "learning_rate": 1.3920269538502013e-06, "loss": 0.001, "step": 143180 }, { "epoch": 2.342959993454962, "grad_norm": 0.10159070044755936, "learning_rate": 1.3913679760495536e-06, "loss": 0.0008, "step": 143190 }, { "epoch": 2.343123619406038, "grad_norm": 0.17776423692703247, "learning_rate": 1.3907091290531433e-06, "loss": 0.0007, "step": 143200 }, { "epoch": 2.3432872453571134, "grad_norm": 0.052238188683986664, "learning_rate": 1.3900504128848512e-06, "loss": 0.0007, "step": 143210 }, { "epoch": 2.3434508713081894, "grad_norm": 0.007336069364100695, "learning_rate": 1.389391827568557e-06, "loss": 0.0004, "step": 143220 }, { "epoch": 2.3436144972592654, "grad_norm": 0.08986391127109528, "learning_rate": 1.3887333731281294e-06, "loss": 0.001, "step": 143230 }, { "epoch": 2.343778123210341, "grad_norm": 0.06561437249183655, "learning_rate": 1.3880750495874395e-06, "loss": 0.0006, "step": 143240 }, { "epoch": 2.343941749161417, "grad_norm": 0.017843183130025864, "learning_rate": 1.3874168569703461e-06, "loss": 0.0008, "step": 143250 }, { "epoch": 2.344105375112493, "grad_norm": 0.24887534976005554, "learning_rate": 1.3867587953007106e-06, "loss": 0.0006, "step": 143260 }, { "epoch": 2.3442690010635685, "grad_norm": 0.0790000706911087, "learning_rate": 1.3861008646023833e-06, "loss": 0.0014, "step": 143270 }, { "epoch": 2.3444326270146445, "grad_norm": 0.07997293025255203, "learning_rate": 1.385443064899215e-06, "loss": 0.0007, "step": 143280 }, { "epoch": 2.3445962529657205, "grad_norm": 0.10728421062231064, "learning_rate": 1.384785396215047e-06, "loss": 0.001, "step": 143290 }, { "epoch": 2.344759878916796, "grad_norm": 0.02984154038131237, "learning_rate": 1.384127858573721e-06, "loss": 0.001, "step": 143300 }, { "epoch": 2.344923504867872, "grad_norm": 0.13059760630130768, "learning_rate": 1.3834704519990695e-06, "loss": 0.0006, "step": 143310 }, { "epoch": 2.345087130818948, "grad_norm": 0.07913115620613098, "learning_rate": 1.3828131765149205e-06, "loss": 0.0007, "step": 143320 }, { "epoch": 2.3452507567700236, "grad_norm": 0.20202594995498657, "learning_rate": 1.3821560321451017e-06, "loss": 0.0008, "step": 143330 }, { "epoch": 2.3454143827210996, "grad_norm": 0.003404019633308053, "learning_rate": 1.3814990189134298e-06, "loss": 0.0008, "step": 143340 }, { "epoch": 2.3455780086721756, "grad_norm": 0.1013917624950409, "learning_rate": 1.3808421368437235e-06, "loss": 0.0009, "step": 143350 }, { "epoch": 2.345741634623251, "grad_norm": 0.01398392952978611, "learning_rate": 1.3801853859597897e-06, "loss": 0.0008, "step": 143360 }, { "epoch": 2.345905260574327, "grad_norm": 0.11806602030992508, "learning_rate": 1.3795287662854351e-06, "loss": 0.0004, "step": 143370 }, { "epoch": 2.346068886525403, "grad_norm": 0.0037658826913684607, "learning_rate": 1.3788722778444634e-06, "loss": 0.0008, "step": 143380 }, { "epoch": 2.3462325124764787, "grad_norm": 0.03889146447181702, "learning_rate": 1.3782159206606666e-06, "loss": 0.0017, "step": 143390 }, { "epoch": 2.3463961384275547, "grad_norm": 0.07284333556890488, "learning_rate": 1.3775596947578413e-06, "loss": 0.0007, "step": 143400 }, { "epoch": 2.3465597643786307, "grad_norm": 0.03644486516714096, "learning_rate": 1.3769036001597674e-06, "loss": 0.0005, "step": 143410 }, { "epoch": 2.3467233903297062, "grad_norm": 0.07116071879863739, "learning_rate": 1.37624763689023e-06, "loss": 0.0008, "step": 143420 }, { "epoch": 2.3468870162807822, "grad_norm": 0.03153816983103752, "learning_rate": 1.3755918049730083e-06, "loss": 0.0006, "step": 143430 }, { "epoch": 2.347050642231858, "grad_norm": 0.08310026675462723, "learning_rate": 1.3749361044318704e-06, "loss": 0.0007, "step": 143440 }, { "epoch": 2.3472142681829338, "grad_norm": 0.09923568367958069, "learning_rate": 1.3742805352905886e-06, "loss": 0.0008, "step": 143450 }, { "epoch": 2.3473778941340098, "grad_norm": 0.21217654645442963, "learning_rate": 1.373625097572921e-06, "loss": 0.0009, "step": 143460 }, { "epoch": 2.3475415200850853, "grad_norm": 0.022235210984945297, "learning_rate": 1.3729697913026297e-06, "loss": 0.0006, "step": 143470 }, { "epoch": 2.3477051460361613, "grad_norm": 0.10887635499238968, "learning_rate": 1.372314616503464e-06, "loss": 0.001, "step": 143480 }, { "epoch": 2.3478687719872373, "grad_norm": 0.008988043293356895, "learning_rate": 1.3716595731991765e-06, "loss": 0.0009, "step": 143490 }, { "epoch": 2.348032397938313, "grad_norm": 0.11141078174114227, "learning_rate": 1.3710046614135092e-06, "loss": 0.0006, "step": 143500 }, { "epoch": 2.348196023889389, "grad_norm": 0.030218739062547684, "learning_rate": 1.3703498811701992e-06, "loss": 0.0009, "step": 143510 }, { "epoch": 2.348359649840465, "grad_norm": 0.10009713470935822, "learning_rate": 1.3696952324929836e-06, "loss": 0.0005, "step": 143520 }, { "epoch": 2.3485232757915404, "grad_norm": 0.15290938317775726, "learning_rate": 1.3690407154055896e-06, "loss": 0.0007, "step": 143530 }, { "epoch": 2.3486869017426164, "grad_norm": 0.12174730002880096, "learning_rate": 1.368386329931744e-06, "loss": 0.0007, "step": 143540 }, { "epoch": 2.3488505276936924, "grad_norm": 0.13056187331676483, "learning_rate": 1.367732076095164e-06, "loss": 0.0012, "step": 143550 }, { "epoch": 2.349014153644768, "grad_norm": 0.09802248328924179, "learning_rate": 1.3670779539195677e-06, "loss": 0.001, "step": 143560 }, { "epoch": 2.349177779595844, "grad_norm": 0.12196998298168182, "learning_rate": 1.366423963428663e-06, "loss": 0.0011, "step": 143570 }, { "epoch": 2.3493414055469195, "grad_norm": 0.011269372887909412, "learning_rate": 1.3657701046461564e-06, "loss": 0.0007, "step": 143580 }, { "epoch": 2.3495050314979955, "grad_norm": 0.12164096534252167, "learning_rate": 1.3651163775957505e-06, "loss": 0.0009, "step": 143590 }, { "epoch": 2.3496686574490715, "grad_norm": 0.09820759296417236, "learning_rate": 1.3644627823011398e-06, "loss": 0.0013, "step": 143600 }, { "epoch": 2.349832283400147, "grad_norm": 0.08753691613674164, "learning_rate": 1.3638093187860151e-06, "loss": 0.0019, "step": 143610 }, { "epoch": 2.349995909351223, "grad_norm": 0.05304019898176193, "learning_rate": 1.3631559870740618e-06, "loss": 0.0015, "step": 143620 }, { "epoch": 2.350159535302299, "grad_norm": 0.10311562567949295, "learning_rate": 1.3625027871889645e-06, "loss": 0.0007, "step": 143630 }, { "epoch": 2.3503231612533746, "grad_norm": 0.010541926138103008, "learning_rate": 1.3618497191543979e-06, "loss": 0.0012, "step": 143640 }, { "epoch": 2.3504867872044506, "grad_norm": 0.09467317909002304, "learning_rate": 1.3611967829940342e-06, "loss": 0.0007, "step": 143650 }, { "epoch": 2.3506504131555266, "grad_norm": 0.0015669726999476552, "learning_rate": 1.3605439787315428e-06, "loss": 0.0011, "step": 143660 }, { "epoch": 2.350814039106602, "grad_norm": 0.1442929357290268, "learning_rate": 1.359891306390584e-06, "loss": 0.0012, "step": 143670 }, { "epoch": 2.350977665057678, "grad_norm": 0.043991636484861374, "learning_rate": 1.3592387659948175e-06, "loss": 0.0005, "step": 143680 }, { "epoch": 2.351141291008754, "grad_norm": 0.011225903406739235, "learning_rate": 1.3585863575678943e-06, "loss": 0.0009, "step": 143690 }, { "epoch": 2.3513049169598297, "grad_norm": 0.20907096564769745, "learning_rate": 1.357934081133465e-06, "loss": 0.0008, "step": 143700 }, { "epoch": 2.3514685429109057, "grad_norm": 0.049303993582725525, "learning_rate": 1.3572819367151724e-06, "loss": 0.001, "step": 143710 }, { "epoch": 2.3516321688619817, "grad_norm": 0.05887862294912338, "learning_rate": 1.3566299243366526e-06, "loss": 0.0004, "step": 143720 }, { "epoch": 2.3517957948130572, "grad_norm": 0.21594934165477753, "learning_rate": 1.355978044021543e-06, "loss": 0.0009, "step": 143730 }, { "epoch": 2.3519594207641332, "grad_norm": 0.058085665106773376, "learning_rate": 1.3553262957934693e-06, "loss": 0.0003, "step": 143740 }, { "epoch": 2.352123046715209, "grad_norm": 0.07392590492963791, "learning_rate": 1.3546746796760596e-06, "loss": 0.0008, "step": 143750 }, { "epoch": 2.3522866726662848, "grad_norm": 0.02047325111925602, "learning_rate": 1.3540231956929295e-06, "loss": 0.0009, "step": 143760 }, { "epoch": 2.3524502986173608, "grad_norm": 0.05070842429995537, "learning_rate": 1.3533718438676974e-06, "loss": 0.0006, "step": 143770 }, { "epoch": 2.3526139245684368, "grad_norm": 0.019670866429805756, "learning_rate": 1.3527206242239704e-06, "loss": 0.0005, "step": 143780 }, { "epoch": 2.3527775505195123, "grad_norm": 0.1090909093618393, "learning_rate": 1.352069536785356e-06, "loss": 0.0013, "step": 143790 }, { "epoch": 2.3529411764705883, "grad_norm": 0.08506675064563751, "learning_rate": 1.3514185815754516e-06, "loss": 0.0006, "step": 143800 }, { "epoch": 2.3531048024216643, "grad_norm": 0.060148466378450394, "learning_rate": 1.3507677586178559e-06, "loss": 0.0009, "step": 143810 }, { "epoch": 2.35326842837274, "grad_norm": 0.00532922288402915, "learning_rate": 1.3501170679361586e-06, "loss": 0.0008, "step": 143820 }, { "epoch": 2.353432054323816, "grad_norm": 0.45534756779670715, "learning_rate": 1.3494665095539438e-06, "loss": 0.001, "step": 143830 }, { "epoch": 2.3535956802748914, "grad_norm": 0.1013123095035553, "learning_rate": 1.3488160834947955e-06, "loss": 0.0008, "step": 143840 }, { "epoch": 2.3537593062259674, "grad_norm": 0.060462888330221176, "learning_rate": 1.3481657897822876e-06, "loss": 0.0009, "step": 143850 }, { "epoch": 2.3539229321770434, "grad_norm": 0.11193085461854935, "learning_rate": 1.3475156284399927e-06, "loss": 0.0008, "step": 143860 }, { "epoch": 2.354086558128119, "grad_norm": 0.007070471066981554, "learning_rate": 1.3468655994914792e-06, "loss": 0.0021, "step": 143870 }, { "epoch": 2.354250184079195, "grad_norm": 0.0891304761171341, "learning_rate": 1.3462157029603057e-06, "loss": 0.0011, "step": 143880 }, { "epoch": 2.354413810030271, "grad_norm": 0.325884073972702, "learning_rate": 1.3455659388700337e-06, "loss": 0.0017, "step": 143890 }, { "epoch": 2.3545774359813465, "grad_norm": 0.06715056300163269, "learning_rate": 1.3449163072442112e-06, "loss": 0.0004, "step": 143900 }, { "epoch": 2.3547410619324225, "grad_norm": 0.039136070758104324, "learning_rate": 1.3442668081063908e-06, "loss": 0.001, "step": 143910 }, { "epoch": 2.3549046878834985, "grad_norm": 0.007674661464989185, "learning_rate": 1.3436174414801095e-06, "loss": 0.0004, "step": 143920 }, { "epoch": 2.355068313834574, "grad_norm": 0.03370317444205284, "learning_rate": 1.3429682073889078e-06, "loss": 0.0005, "step": 143930 }, { "epoch": 2.35523193978565, "grad_norm": 0.021954510360956192, "learning_rate": 1.3423191058563207e-06, "loss": 0.0011, "step": 143940 }, { "epoch": 2.355395565736726, "grad_norm": 0.01823018118739128, "learning_rate": 1.3416701369058739e-06, "loss": 0.0013, "step": 143950 }, { "epoch": 2.3555591916878016, "grad_norm": 0.04143868014216423, "learning_rate": 1.3410213005610928e-06, "loss": 0.0005, "step": 143960 }, { "epoch": 2.3557228176388776, "grad_norm": 0.07386468350887299, "learning_rate": 1.3403725968454945e-06, "loss": 0.0008, "step": 143970 }, { "epoch": 2.355886443589953, "grad_norm": 0.03366195783019066, "learning_rate": 1.3397240257825956e-06, "loss": 0.0014, "step": 143980 }, { "epoch": 2.356050069541029, "grad_norm": 0.007476123981177807, "learning_rate": 1.3390755873959016e-06, "loss": 0.0008, "step": 143990 }, { "epoch": 2.356213695492105, "grad_norm": 0.03846132382750511, "learning_rate": 1.33842728170892e-06, "loss": 0.0007, "step": 144000 }, { "epoch": 2.3563773214431807, "grad_norm": 0.038211554288864136, "learning_rate": 1.3377791087451497e-06, "loss": 0.0009, "step": 144010 }, { "epoch": 2.3565409473942567, "grad_norm": 0.06402503699064255, "learning_rate": 1.337131068528083e-06, "loss": 0.0012, "step": 144020 }, { "epoch": 2.3567045733453327, "grad_norm": 0.009781122207641602, "learning_rate": 1.336483161081213e-06, "loss": 0.0002, "step": 144030 }, { "epoch": 2.3568681992964082, "grad_norm": 0.03583794832229614, "learning_rate": 1.335835386428022e-06, "loss": 0.0007, "step": 144040 }, { "epoch": 2.3570318252474842, "grad_norm": 0.3033040463924408, "learning_rate": 1.335187744591993e-06, "loss": 0.0007, "step": 144050 }, { "epoch": 2.35719545119856, "grad_norm": 0.13907165825366974, "learning_rate": 1.3345402355965992e-06, "loss": 0.0009, "step": 144060 }, { "epoch": 2.3573590771496358, "grad_norm": 0.16955947875976562, "learning_rate": 1.3338928594653128e-06, "loss": 0.0009, "step": 144070 }, { "epoch": 2.3575227031007118, "grad_norm": 0.08944796025753021, "learning_rate": 1.3332456162215979e-06, "loss": 0.001, "step": 144080 }, { "epoch": 2.3576863290517878, "grad_norm": 0.11608299612998962, "learning_rate": 1.3325985058889168e-06, "loss": 0.0006, "step": 144090 }, { "epoch": 2.3578499550028633, "grad_norm": 0.0029833803419023752, "learning_rate": 1.3319515284907269e-06, "loss": 0.0004, "step": 144100 }, { "epoch": 2.3580135809539393, "grad_norm": 0.04761983081698418, "learning_rate": 1.3313046840504778e-06, "loss": 0.0005, "step": 144110 }, { "epoch": 2.3581772069050153, "grad_norm": 0.023272233083844185, "learning_rate": 1.330657972591617e-06, "loss": 0.0007, "step": 144120 }, { "epoch": 2.358340832856091, "grad_norm": 0.17743664979934692, "learning_rate": 1.3300113941375836e-06, "loss": 0.0007, "step": 144130 }, { "epoch": 2.358504458807167, "grad_norm": 0.06324001401662827, "learning_rate": 1.329364948711817e-06, "loss": 0.0011, "step": 144140 }, { "epoch": 2.358668084758243, "grad_norm": 0.11405488103628159, "learning_rate": 1.3287186363377503e-06, "loss": 0.0006, "step": 144150 }, { "epoch": 2.3588317107093184, "grad_norm": 0.11704226583242416, "learning_rate": 1.3280724570388076e-06, "loss": 0.0008, "step": 144160 }, { "epoch": 2.3589953366603944, "grad_norm": 0.15636445581912994, "learning_rate": 1.3274264108384149e-06, "loss": 0.0007, "step": 144170 }, { "epoch": 2.3591589626114704, "grad_norm": 0.10471849143505096, "learning_rate": 1.3267804977599864e-06, "loss": 0.001, "step": 144180 }, { "epoch": 2.359322588562546, "grad_norm": 0.011177187785506248, "learning_rate": 1.3261347178269379e-06, "loss": 0.0007, "step": 144190 }, { "epoch": 2.359486214513622, "grad_norm": 0.08316013962030411, "learning_rate": 1.3254890710626744e-06, "loss": 0.0009, "step": 144200 }, { "epoch": 2.359649840464698, "grad_norm": 0.17139707505702972, "learning_rate": 1.324843557490602e-06, "loss": 0.0007, "step": 144210 }, { "epoch": 2.3598134664157735, "grad_norm": 0.07555080950260162, "learning_rate": 1.3241981771341177e-06, "loss": 0.001, "step": 144220 }, { "epoch": 2.3599770923668495, "grad_norm": 0.09622034430503845, "learning_rate": 1.323552930016614e-06, "loss": 0.0005, "step": 144230 }, { "epoch": 2.360140718317925, "grad_norm": 0.0039297244511544704, "learning_rate": 1.3229078161614816e-06, "loss": 0.0007, "step": 144240 }, { "epoch": 2.360304344269001, "grad_norm": 0.05190461501479149, "learning_rate": 1.3222628355921014e-06, "loss": 0.0022, "step": 144250 }, { "epoch": 2.360467970220077, "grad_norm": 0.11169778555631638, "learning_rate": 1.3216179883318563e-06, "loss": 0.0012, "step": 144260 }, { "epoch": 2.3606315961711526, "grad_norm": 0.06501086056232452, "learning_rate": 1.3209732744041164e-06, "loss": 0.0008, "step": 144270 }, { "epoch": 2.3607952221222286, "grad_norm": 0.030380522832274437, "learning_rate": 1.3203286938322546e-06, "loss": 0.0005, "step": 144280 }, { "epoch": 2.3609588480733046, "grad_norm": 0.19924689829349518, "learning_rate": 1.3196842466396326e-06, "loss": 0.0011, "step": 144290 }, { "epoch": 2.36112247402438, "grad_norm": 0.003139902837574482, "learning_rate": 1.3190399328496107e-06, "loss": 0.0005, "step": 144300 }, { "epoch": 2.361286099975456, "grad_norm": 0.11221468448638916, "learning_rate": 1.3183957524855479e-06, "loss": 0.0014, "step": 144310 }, { "epoch": 2.361449725926532, "grad_norm": 0.05913063511252403, "learning_rate": 1.3177517055707866e-06, "loss": 0.0005, "step": 144320 }, { "epoch": 2.3616133518776077, "grad_norm": 0.10795065760612488, "learning_rate": 1.3171077921286784e-06, "loss": 0.0006, "step": 144330 }, { "epoch": 2.3617769778286837, "grad_norm": 0.07169833034276962, "learning_rate": 1.3164640121825594e-06, "loss": 0.0004, "step": 144340 }, { "epoch": 2.3619406037797592, "grad_norm": 0.0510670505464077, "learning_rate": 1.3158203657557667e-06, "loss": 0.0006, "step": 144350 }, { "epoch": 2.3621042297308352, "grad_norm": 0.07229570299386978, "learning_rate": 1.3151768528716325e-06, "loss": 0.0009, "step": 144360 }, { "epoch": 2.362267855681911, "grad_norm": 0.19994962215423584, "learning_rate": 1.3145334735534798e-06, "loss": 0.0011, "step": 144370 }, { "epoch": 2.3624314816329868, "grad_norm": 0.31083160638809204, "learning_rate": 1.3138902278246324e-06, "loss": 0.0013, "step": 144380 }, { "epoch": 2.3625951075840628, "grad_norm": 0.18726523220539093, "learning_rate": 1.3132471157084032e-06, "loss": 0.0007, "step": 144390 }, { "epoch": 2.3627587335351388, "grad_norm": 0.06729884445667267, "learning_rate": 1.3126041372281068e-06, "loss": 0.0009, "step": 144400 }, { "epoch": 2.3629223594862143, "grad_norm": 0.004335499834269285, "learning_rate": 1.311961292407048e-06, "loss": 0.0009, "step": 144410 }, { "epoch": 2.3630859854372903, "grad_norm": 0.214744433760643, "learning_rate": 1.311318581268527e-06, "loss": 0.0007, "step": 144420 }, { "epoch": 2.3632496113883663, "grad_norm": 0.05723525956273079, "learning_rate": 1.3106760038358429e-06, "loss": 0.0008, "step": 144430 }, { "epoch": 2.363413237339442, "grad_norm": 0.06948518753051758, "learning_rate": 1.3100335601322855e-06, "loss": 0.0005, "step": 144440 }, { "epoch": 2.363576863290518, "grad_norm": 0.005343471188098192, "learning_rate": 1.3093912501811445e-06, "loss": 0.0006, "step": 144450 }, { "epoch": 2.363740489241594, "grad_norm": 0.18003512918949127, "learning_rate": 1.308749074005699e-06, "loss": 0.0005, "step": 144460 }, { "epoch": 2.3639041151926694, "grad_norm": 0.10720117390155792, "learning_rate": 1.3081070316292294e-06, "loss": 0.0007, "step": 144470 }, { "epoch": 2.3640677411437454, "grad_norm": 0.018040727823972702, "learning_rate": 1.3074651230750052e-06, "loss": 0.0003, "step": 144480 }, { "epoch": 2.3642313670948214, "grad_norm": 0.009498990140855312, "learning_rate": 1.3068233483662968e-06, "loss": 0.0002, "step": 144490 }, { "epoch": 2.364394993045897, "grad_norm": 0.0802672803401947, "learning_rate": 1.3061817075263644e-06, "loss": 0.0011, "step": 144500 }, { "epoch": 2.364558618996973, "grad_norm": 0.010741434060037136, "learning_rate": 1.3055402005784684e-06, "loss": 0.0006, "step": 144510 }, { "epoch": 2.364722244948049, "grad_norm": 0.07852223515510559, "learning_rate": 1.3048988275458614e-06, "loss": 0.0015, "step": 144520 }, { "epoch": 2.3648858708991245, "grad_norm": 0.03402560576796532, "learning_rate": 1.3042575884517888e-06, "loss": 0.0007, "step": 144530 }, { "epoch": 2.3650494968502005, "grad_norm": 0.07025851309299469, "learning_rate": 1.3036164833194982e-06, "loss": 0.0011, "step": 144540 }, { "epoch": 2.3652131228012765, "grad_norm": 0.11702341586351395, "learning_rate": 1.3029755121722242e-06, "loss": 0.0008, "step": 144550 }, { "epoch": 2.365376748752352, "grad_norm": 0.03939018025994301, "learning_rate": 1.3023346750332038e-06, "loss": 0.0005, "step": 144560 }, { "epoch": 2.365540374703428, "grad_norm": 0.07736250758171082, "learning_rate": 1.3016939719256628e-06, "loss": 0.0003, "step": 144570 }, { "epoch": 2.365704000654504, "grad_norm": 0.09059028327465057, "learning_rate": 1.3010534028728267e-06, "loss": 0.0014, "step": 144580 }, { "epoch": 2.3658676266055796, "grad_norm": 0.10941189527511597, "learning_rate": 1.3004129678979161e-06, "loss": 0.0008, "step": 144590 }, { "epoch": 2.3660312525566556, "grad_norm": 0.04468689113855362, "learning_rate": 1.2997726670241422e-06, "loss": 0.0009, "step": 144600 }, { "epoch": 2.366194878507731, "grad_norm": 0.2643345892429352, "learning_rate": 1.299132500274719e-06, "loss": 0.0012, "step": 144610 }, { "epoch": 2.366358504458807, "grad_norm": 0.1810191571712494, "learning_rate": 1.2984924676728449e-06, "loss": 0.0008, "step": 144620 }, { "epoch": 2.366522130409883, "grad_norm": 0.06795274466276169, "learning_rate": 1.2978525692417221e-06, "loss": 0.0005, "step": 144630 }, { "epoch": 2.3666857563609587, "grad_norm": 0.09930314123630524, "learning_rate": 1.297212805004548e-06, "loss": 0.0006, "step": 144640 }, { "epoch": 2.3668493823120347, "grad_norm": 0.0886782556772232, "learning_rate": 1.2965731749845078e-06, "loss": 0.0009, "step": 144650 }, { "epoch": 2.3670130082631107, "grad_norm": 0.053361061960458755, "learning_rate": 1.2959336792047916e-06, "loss": 0.0004, "step": 144660 }, { "epoch": 2.3671766342141862, "grad_norm": 0.013816573657095432, "learning_rate": 1.2952943176885746e-06, "loss": 0.0004, "step": 144670 }, { "epoch": 2.3673402601652622, "grad_norm": 0.06315802037715912, "learning_rate": 1.2946550904590366e-06, "loss": 0.001, "step": 144680 }, { "epoch": 2.367503886116338, "grad_norm": 0.023802505806088448, "learning_rate": 1.2940159975393446e-06, "loss": 0.0003, "step": 144690 }, { "epoch": 2.3676675120674138, "grad_norm": 0.07392996549606323, "learning_rate": 1.2933770389526668e-06, "loss": 0.0008, "step": 144700 }, { "epoch": 2.3678311380184898, "grad_norm": 0.0754457488656044, "learning_rate": 1.292738214722163e-06, "loss": 0.001, "step": 144710 }, { "epoch": 2.3679947639695658, "grad_norm": 0.033821530640125275, "learning_rate": 1.2920995248709873e-06, "loss": 0.0009, "step": 144720 }, { "epoch": 2.3681583899206413, "grad_norm": 0.10110820829868317, "learning_rate": 1.291460969422293e-06, "loss": 0.0007, "step": 144730 }, { "epoch": 2.3683220158717173, "grad_norm": 0.042355962097644806, "learning_rate": 1.2908225483992242e-06, "loss": 0.0007, "step": 144740 }, { "epoch": 2.368485641822793, "grad_norm": 0.03476399928331375, "learning_rate": 1.2901842618249249e-06, "loss": 0.0017, "step": 144750 }, { "epoch": 2.368649267773869, "grad_norm": 0.1271200031042099, "learning_rate": 1.2895461097225282e-06, "loss": 0.0004, "step": 144760 }, { "epoch": 2.368812893724945, "grad_norm": 0.057854343205690384, "learning_rate": 1.2889080921151687e-06, "loss": 0.0008, "step": 144770 }, { "epoch": 2.3689765196760204, "grad_norm": 0.031231729313731194, "learning_rate": 1.2882702090259697e-06, "loss": 0.0007, "step": 144780 }, { "epoch": 2.3691401456270964, "grad_norm": 0.09763874858617783, "learning_rate": 1.287632460478055e-06, "loss": 0.0007, "step": 144790 }, { "epoch": 2.3693037715781724, "grad_norm": 0.0897286906838417, "learning_rate": 1.2869948464945426e-06, "loss": 0.0008, "step": 144800 }, { "epoch": 2.369467397529248, "grad_norm": 0.09197789430618286, "learning_rate": 1.2863573670985429e-06, "loss": 0.0006, "step": 144810 }, { "epoch": 2.369631023480324, "grad_norm": 0.02861623279750347, "learning_rate": 1.2857200223131634e-06, "loss": 0.0009, "step": 144820 }, { "epoch": 2.3697946494314, "grad_norm": 0.055444080382585526, "learning_rate": 1.2850828121615045e-06, "loss": 0.0006, "step": 144830 }, { "epoch": 2.3699582753824755, "grad_norm": 0.0253133587539196, "learning_rate": 1.2844457366666662e-06, "loss": 0.0003, "step": 144840 }, { "epoch": 2.3701219013335515, "grad_norm": 0.0453915111720562, "learning_rate": 1.2838087958517388e-06, "loss": 0.0009, "step": 144850 }, { "epoch": 2.3702855272846275, "grad_norm": 0.18144530057907104, "learning_rate": 1.2831719897398108e-06, "loss": 0.0006, "step": 144860 }, { "epoch": 2.370449153235703, "grad_norm": 0.06157657131552696, "learning_rate": 1.2825353183539662e-06, "loss": 0.0012, "step": 144870 }, { "epoch": 2.370612779186779, "grad_norm": 0.10179923474788666, "learning_rate": 1.2818987817172807e-06, "loss": 0.0009, "step": 144880 }, { "epoch": 2.370776405137855, "grad_norm": 0.12163715809583664, "learning_rate": 1.2812623798528295e-06, "loss": 0.0004, "step": 144890 }, { "epoch": 2.3709400310889306, "grad_norm": 0.06507319211959839, "learning_rate": 1.2806261127836778e-06, "loss": 0.0006, "step": 144900 }, { "epoch": 2.3711036570400066, "grad_norm": 0.10177026689052582, "learning_rate": 1.2799899805328914e-06, "loss": 0.0008, "step": 144910 }, { "epoch": 2.3712672829910826, "grad_norm": 0.09454219788312912, "learning_rate": 1.2793539831235275e-06, "loss": 0.0006, "step": 144920 }, { "epoch": 2.371430908942158, "grad_norm": 0.08566269278526306, "learning_rate": 1.278718120578638e-06, "loss": 0.0009, "step": 144930 }, { "epoch": 2.371594534893234, "grad_norm": 0.04897194355726242, "learning_rate": 1.2780823929212743e-06, "loss": 0.001, "step": 144940 }, { "epoch": 2.37175816084431, "grad_norm": 0.10678929835557938, "learning_rate": 1.2774468001744767e-06, "loss": 0.0007, "step": 144950 }, { "epoch": 2.3719217867953857, "grad_norm": 0.11534004658460617, "learning_rate": 1.2768113423612871e-06, "loss": 0.0007, "step": 144960 }, { "epoch": 2.3720854127464617, "grad_norm": 0.026602929458022118, "learning_rate": 1.2761760195047369e-06, "loss": 0.0011, "step": 144970 }, { "epoch": 2.3722490386975377, "grad_norm": 0.049671001732349396, "learning_rate": 1.2755408316278573e-06, "loss": 0.001, "step": 144980 }, { "epoch": 2.3724126646486132, "grad_norm": 0.05801291763782501, "learning_rate": 1.2749057787536694e-06, "loss": 0.0007, "step": 144990 }, { "epoch": 2.372576290599689, "grad_norm": 0.12459327280521393, "learning_rate": 1.2742708609051957e-06, "loss": 0.0007, "step": 145000 }, { "epoch": 2.3727399165507648, "grad_norm": 0.11902379989624023, "learning_rate": 1.2736360781054486e-06, "loss": 0.0007, "step": 145010 }, { "epoch": 2.3729035425018408, "grad_norm": 0.10373744368553162, "learning_rate": 1.2730014303774358e-06, "loss": 0.001, "step": 145020 }, { "epoch": 2.3730671684529168, "grad_norm": 0.0296646561473608, "learning_rate": 1.2723669177441655e-06, "loss": 0.0009, "step": 145030 }, { "epoch": 2.3732307944039923, "grad_norm": 0.010697804391384125, "learning_rate": 1.2717325402286334e-06, "loss": 0.0007, "step": 145040 }, { "epoch": 2.3733944203550683, "grad_norm": 0.007616598159074783, "learning_rate": 1.2710982978538383e-06, "loss": 0.0007, "step": 145050 }, { "epoch": 2.3735580463061443, "grad_norm": 0.037158578634262085, "learning_rate": 1.2704641906427655e-06, "loss": 0.0014, "step": 145060 }, { "epoch": 2.37372167225722, "grad_norm": 0.019848110154271126, "learning_rate": 1.2698302186184025e-06, "loss": 0.0006, "step": 145070 }, { "epoch": 2.373885298208296, "grad_norm": 0.04519448056817055, "learning_rate": 1.26919638180373e-06, "loss": 0.001, "step": 145080 }, { "epoch": 2.374048924159372, "grad_norm": 0.05504337698221207, "learning_rate": 1.2685626802217206e-06, "loss": 0.0008, "step": 145090 }, { "epoch": 2.3742125501104474, "grad_norm": 0.10191631317138672, "learning_rate": 1.2679291138953475e-06, "loss": 0.0008, "step": 145100 }, { "epoch": 2.3743761760615234, "grad_norm": 0.11442823708057404, "learning_rate": 1.2672956828475735e-06, "loss": 0.0016, "step": 145110 }, { "epoch": 2.374539802012599, "grad_norm": 0.03787439689040184, "learning_rate": 1.2666623871013605e-06, "loss": 0.0006, "step": 145120 }, { "epoch": 2.374703427963675, "grad_norm": 0.015325605869293213, "learning_rate": 1.2660292266796615e-06, "loss": 0.0006, "step": 145130 }, { "epoch": 2.374867053914751, "grad_norm": 0.054970066994428635, "learning_rate": 1.2653962016054283e-06, "loss": 0.001, "step": 145140 }, { "epoch": 2.3750306798658265, "grad_norm": 0.1488233059644699, "learning_rate": 1.2647633119016085e-06, "loss": 0.0007, "step": 145150 }, { "epoch": 2.3751943058169025, "grad_norm": 0.08479508012533188, "learning_rate": 1.26413055759114e-06, "loss": 0.0008, "step": 145160 }, { "epoch": 2.3753579317679785, "grad_norm": 0.009266995824873447, "learning_rate": 1.263497938696961e-06, "loss": 0.0004, "step": 145170 }, { "epoch": 2.375521557719054, "grad_norm": 0.061846502125263214, "learning_rate": 1.262865455242e-06, "loss": 0.0006, "step": 145180 }, { "epoch": 2.37568518367013, "grad_norm": 0.026510871946811676, "learning_rate": 1.2622331072491855e-06, "loss": 0.0006, "step": 145190 }, { "epoch": 2.375848809621206, "grad_norm": 0.06797093152999878, "learning_rate": 1.261600894741436e-06, "loss": 0.0005, "step": 145200 }, { "epoch": 2.3760124355722816, "grad_norm": 0.06298942863941193, "learning_rate": 1.2609688177416707e-06, "loss": 0.0005, "step": 145210 }, { "epoch": 2.3761760615233576, "grad_norm": 0.1795349419116974, "learning_rate": 1.2603368762727992e-06, "loss": 0.0008, "step": 145220 }, { "epoch": 2.3763396874744336, "grad_norm": 0.030167920514941216, "learning_rate": 1.2597050703577263e-06, "loss": 0.0008, "step": 145230 }, { "epoch": 2.376503313425509, "grad_norm": 0.026989977806806564, "learning_rate": 1.2590734000193566e-06, "loss": 0.0005, "step": 145240 }, { "epoch": 2.376666939376585, "grad_norm": 0.11359591782093048, "learning_rate": 1.2584418652805835e-06, "loss": 0.0016, "step": 145250 }, { "epoch": 2.376830565327661, "grad_norm": 0.07843035459518433, "learning_rate": 1.257810466164302e-06, "loss": 0.0007, "step": 145260 }, { "epoch": 2.3769941912787367, "grad_norm": 0.051659245043992996, "learning_rate": 1.2571792026933955e-06, "loss": 0.0004, "step": 145270 }, { "epoch": 2.3771578172298127, "grad_norm": 0.016711194068193436, "learning_rate": 1.2565480748907482e-06, "loss": 0.0012, "step": 145280 }, { "epoch": 2.3773214431808887, "grad_norm": 0.01970168948173523, "learning_rate": 1.255917082779235e-06, "loss": 0.0006, "step": 145290 }, { "epoch": 2.3774850691319642, "grad_norm": 0.005773472134023905, "learning_rate": 1.2552862263817288e-06, "loss": 0.0006, "step": 145300 }, { "epoch": 2.37764869508304, "grad_norm": 0.14136260747909546, "learning_rate": 1.2546555057210996e-06, "loss": 0.0009, "step": 145310 }, { "epoch": 2.377812321034116, "grad_norm": 0.06797131896018982, "learning_rate": 1.2540249208202038e-06, "loss": 0.0011, "step": 145320 }, { "epoch": 2.3779759469851918, "grad_norm": 0.13964861631393433, "learning_rate": 1.2533944717019026e-06, "loss": 0.001, "step": 145330 }, { "epoch": 2.3781395729362678, "grad_norm": 0.15940672159194946, "learning_rate": 1.252764158389046e-06, "loss": 0.0013, "step": 145340 }, { "epoch": 2.3783031988873438, "grad_norm": 0.005445521324872971, "learning_rate": 1.252133980904482e-06, "loss": 0.0011, "step": 145350 }, { "epoch": 2.3784668248384193, "grad_norm": 0.0383513979613781, "learning_rate": 1.2515039392710548e-06, "loss": 0.0014, "step": 145360 }, { "epoch": 2.3786304507894953, "grad_norm": 0.06214066594839096, "learning_rate": 1.2508740335116e-06, "loss": 0.0006, "step": 145370 }, { "epoch": 2.378794076740571, "grad_norm": 0.0362640880048275, "learning_rate": 1.250244263648951e-06, "loss": 0.0004, "step": 145380 }, { "epoch": 2.378957702691647, "grad_norm": 0.08192306011915207, "learning_rate": 1.249614629705934e-06, "loss": 0.0004, "step": 145390 }, { "epoch": 2.379121328642723, "grad_norm": 0.0460490882396698, "learning_rate": 1.2489851317053747e-06, "loss": 0.0005, "step": 145400 }, { "epoch": 2.3792849545937984, "grad_norm": 0.006301357876509428, "learning_rate": 1.2483557696700888e-06, "loss": 0.0006, "step": 145410 }, { "epoch": 2.3794485805448744, "grad_norm": 0.1867118626832962, "learning_rate": 1.247726543622888e-06, "loss": 0.0012, "step": 145420 }, { "epoch": 2.3796122064959504, "grad_norm": 0.04853400960564613, "learning_rate": 1.247097453586583e-06, "loss": 0.0006, "step": 145430 }, { "epoch": 2.379775832447026, "grad_norm": 0.08408501744270325, "learning_rate": 1.2464684995839744e-06, "loss": 0.0004, "step": 145440 }, { "epoch": 2.379939458398102, "grad_norm": 0.032083846628665924, "learning_rate": 1.2458396816378627e-06, "loss": 0.0007, "step": 145450 }, { "epoch": 2.380103084349178, "grad_norm": 0.07761266082525253, "learning_rate": 1.2452109997710377e-06, "loss": 0.0012, "step": 145460 }, { "epoch": 2.3802667103002535, "grad_norm": 0.09874489903450012, "learning_rate": 1.2445824540062911e-06, "loss": 0.001, "step": 145470 }, { "epoch": 2.3804303362513295, "grad_norm": 0.026025863364338875, "learning_rate": 1.2439540443664034e-06, "loss": 0.0006, "step": 145480 }, { "epoch": 2.3805939622024055, "grad_norm": 0.05554347485303879, "learning_rate": 1.243325770874156e-06, "loss": 0.001, "step": 145490 }, { "epoch": 2.380757588153481, "grad_norm": 0.0900362953543663, "learning_rate": 1.2426976335523184e-06, "loss": 0.001, "step": 145500 }, { "epoch": 2.380921214104557, "grad_norm": 0.06566480547189713, "learning_rate": 1.2420696324236625e-06, "loss": 0.0011, "step": 145510 }, { "epoch": 2.3810848400556326, "grad_norm": 0.07317785918712616, "learning_rate": 1.241441767510951e-06, "loss": 0.001, "step": 145520 }, { "epoch": 2.3812484660067086, "grad_norm": 0.08004292845726013, "learning_rate": 1.2408140388369405e-06, "loss": 0.0027, "step": 145530 }, { "epoch": 2.3814120919577846, "grad_norm": 0.04221239686012268, "learning_rate": 1.2401864464243874e-06, "loss": 0.0007, "step": 145540 }, { "epoch": 2.38157571790886, "grad_norm": 0.14445243775844574, "learning_rate": 1.2395589902960377e-06, "loss": 0.0012, "step": 145550 }, { "epoch": 2.381739343859936, "grad_norm": 0.040830545127391815, "learning_rate": 1.2389316704746362e-06, "loss": 0.0006, "step": 145560 }, { "epoch": 2.381902969811012, "grad_norm": 0.24122272431850433, "learning_rate": 1.2383044869829242e-06, "loss": 0.0006, "step": 145570 }, { "epoch": 2.3820665957620877, "grad_norm": 0.10114267468452454, "learning_rate": 1.237677439843632e-06, "loss": 0.0007, "step": 145580 }, { "epoch": 2.3822302217131637, "grad_norm": 0.02469678781926632, "learning_rate": 1.237050529079491e-06, "loss": 0.0008, "step": 145590 }, { "epoch": 2.3823938476642397, "grad_norm": 0.070464126765728, "learning_rate": 1.2364237547132235e-06, "loss": 0.0006, "step": 145600 }, { "epoch": 2.3825574736153152, "grad_norm": 0.06091037020087242, "learning_rate": 1.235797116767552e-06, "loss": 0.0015, "step": 145610 }, { "epoch": 2.382721099566391, "grad_norm": 0.004935974720865488, "learning_rate": 1.2351706152651856e-06, "loss": 0.0005, "step": 145620 }, { "epoch": 2.382884725517467, "grad_norm": 0.033449091017246246, "learning_rate": 1.2345442502288353e-06, "loss": 0.0022, "step": 145630 }, { "epoch": 2.3830483514685428, "grad_norm": 0.003820921527221799, "learning_rate": 1.2339180216812074e-06, "loss": 0.0004, "step": 145640 }, { "epoch": 2.3832119774196188, "grad_norm": 0.027925308793783188, "learning_rate": 1.233291929644998e-06, "loss": 0.0011, "step": 145650 }, { "epoch": 2.3833756033706948, "grad_norm": 0.13035884499549866, "learning_rate": 1.2326659741429054e-06, "loss": 0.0009, "step": 145660 }, { "epoch": 2.3835392293217703, "grad_norm": 0.007219295017421246, "learning_rate": 1.2320401551976146e-06, "loss": 0.0008, "step": 145670 }, { "epoch": 2.3837028552728463, "grad_norm": 0.07360933721065521, "learning_rate": 1.2314144728318134e-06, "loss": 0.0005, "step": 145680 }, { "epoch": 2.3838664812239223, "grad_norm": 0.007033075205981731, "learning_rate": 1.2307889270681788e-06, "loss": 0.0013, "step": 145690 }, { "epoch": 2.384030107174998, "grad_norm": 0.07547487318515778, "learning_rate": 1.230163517929388e-06, "loss": 0.0005, "step": 145700 }, { "epoch": 2.384193733126074, "grad_norm": 0.036217715591192245, "learning_rate": 1.2295382454381076e-06, "loss": 0.0008, "step": 145710 }, { "epoch": 2.38435735907715, "grad_norm": 0.02261698804795742, "learning_rate": 1.2289131096170053e-06, "loss": 0.0005, "step": 145720 }, { "epoch": 2.3845209850282254, "grad_norm": 0.08924450725317001, "learning_rate": 1.2282881104887395e-06, "loss": 0.0009, "step": 145730 }, { "epoch": 2.3846846109793014, "grad_norm": 0.01222652941942215, "learning_rate": 1.2276632480759625e-06, "loss": 0.0015, "step": 145740 }, { "epoch": 2.3848482369303774, "grad_norm": 0.018236972391605377, "learning_rate": 1.227038522401328e-06, "loss": 0.0012, "step": 145750 }, { "epoch": 2.385011862881453, "grad_norm": 0.017342614009976387, "learning_rate": 1.2264139334874775e-06, "loss": 0.0017, "step": 145760 }, { "epoch": 2.385175488832529, "grad_norm": 0.07415437698364258, "learning_rate": 1.2257894813570537e-06, "loss": 0.0008, "step": 145770 }, { "epoch": 2.3853391147836045, "grad_norm": 0.06055690720677376, "learning_rate": 1.2251651660326891e-06, "loss": 0.0007, "step": 145780 }, { "epoch": 2.3855027407346805, "grad_norm": 0.002141202799975872, "learning_rate": 1.224540987537014e-06, "loss": 0.0005, "step": 145790 }, { "epoch": 2.3856663666857565, "grad_norm": 0.15238164365291595, "learning_rate": 1.2239169458926558e-06, "loss": 0.0009, "step": 145800 }, { "epoch": 2.385829992636832, "grad_norm": 0.07647334784269333, "learning_rate": 1.2232930411222316e-06, "loss": 0.0007, "step": 145810 }, { "epoch": 2.385993618587908, "grad_norm": 0.04357259348034859, "learning_rate": 1.2226692732483602e-06, "loss": 0.0005, "step": 145820 }, { "epoch": 2.386157244538984, "grad_norm": 0.04395972564816475, "learning_rate": 1.222045642293646e-06, "loss": 0.0007, "step": 145830 }, { "epoch": 2.3863208704900596, "grad_norm": 0.001686961273662746, "learning_rate": 1.2214221482806976e-06, "loss": 0.0009, "step": 145840 }, { "epoch": 2.3864844964411356, "grad_norm": 0.01953599415719509, "learning_rate": 1.2207987912321156e-06, "loss": 0.0006, "step": 145850 }, { "epoch": 2.3866481223922116, "grad_norm": 0.02389693073928356, "learning_rate": 1.2201755711704933e-06, "loss": 0.0016, "step": 145860 }, { "epoch": 2.386811748343287, "grad_norm": 0.04619196057319641, "learning_rate": 1.2195524881184234e-06, "loss": 0.0009, "step": 145870 }, { "epoch": 2.386975374294363, "grad_norm": 0.11164496093988419, "learning_rate": 1.2189295420984882e-06, "loss": 0.0006, "step": 145880 }, { "epoch": 2.3871390002454387, "grad_norm": 0.17305664718151093, "learning_rate": 1.218306733133271e-06, "loss": 0.0012, "step": 145890 }, { "epoch": 2.3873026261965147, "grad_norm": 0.08470435440540314, "learning_rate": 1.2176840612453445e-06, "loss": 0.0013, "step": 145900 }, { "epoch": 2.3874662521475907, "grad_norm": 0.07801004499197006, "learning_rate": 1.217061526457281e-06, "loss": 0.0006, "step": 145910 }, { "epoch": 2.3876298780986662, "grad_norm": 0.16480372846126556, "learning_rate": 1.2164391287916454e-06, "loss": 0.0009, "step": 145920 }, { "epoch": 2.387793504049742, "grad_norm": 0.08581792563199997, "learning_rate": 1.215816868270997e-06, "loss": 0.0006, "step": 145930 }, { "epoch": 2.387957130000818, "grad_norm": 0.05874474346637726, "learning_rate": 1.215194744917893e-06, "loss": 0.0008, "step": 145940 }, { "epoch": 2.3881207559518938, "grad_norm": 0.0018676667241379619, "learning_rate": 1.2145727587548817e-06, "loss": 0.0008, "step": 145950 }, { "epoch": 2.3882843819029698, "grad_norm": 0.13485835492610931, "learning_rate": 1.213950909804511e-06, "loss": 0.0013, "step": 145960 }, { "epoch": 2.3884480078540458, "grad_norm": 0.05608774721622467, "learning_rate": 1.2133291980893191e-06, "loss": 0.0007, "step": 145970 }, { "epoch": 2.3886116338051213, "grad_norm": 0.11544715613126755, "learning_rate": 1.212707623631844e-06, "loss": 0.001, "step": 145980 }, { "epoch": 2.3887752597561973, "grad_norm": 0.0441889762878418, "learning_rate": 1.2120861864546135e-06, "loss": 0.0007, "step": 145990 }, { "epoch": 2.3889388857072733, "grad_norm": 0.045418061316013336, "learning_rate": 1.2114648865801547e-06, "loss": 0.0006, "step": 146000 }, { "epoch": 2.389102511658349, "grad_norm": 0.04495017230510712, "learning_rate": 1.21084372403099e-06, "loss": 0.0008, "step": 146010 }, { "epoch": 2.389266137609425, "grad_norm": 0.00138551602140069, "learning_rate": 1.2102226988296334e-06, "loss": 0.0009, "step": 146020 }, { "epoch": 2.389429763560501, "grad_norm": 0.07855869829654694, "learning_rate": 1.2096018109985951e-06, "loss": 0.0006, "step": 146030 }, { "epoch": 2.3895933895115764, "grad_norm": 0.08308794349431992, "learning_rate": 1.2089810605603802e-06, "loss": 0.0004, "step": 146040 }, { "epoch": 2.3897570154626524, "grad_norm": 0.09332466125488281, "learning_rate": 1.2083604475374917e-06, "loss": 0.0005, "step": 146050 }, { "epoch": 2.3899206414137284, "grad_norm": 0.12089268118143082, "learning_rate": 1.2077399719524218e-06, "loss": 0.001, "step": 146060 }, { "epoch": 2.390084267364804, "grad_norm": 0.13675138354301453, "learning_rate": 1.207119633827664e-06, "loss": 0.0005, "step": 146070 }, { "epoch": 2.39024789331588, "grad_norm": 0.10944545269012451, "learning_rate": 1.2064994331857056e-06, "loss": 0.0006, "step": 146080 }, { "epoch": 2.390411519266956, "grad_norm": 0.012564612552523613, "learning_rate": 1.205879370049023e-06, "loss": 0.0009, "step": 146090 }, { "epoch": 2.3905751452180315, "grad_norm": 0.03477565944194794, "learning_rate": 1.205259444440096e-06, "loss": 0.0006, "step": 146100 }, { "epoch": 2.3907387711691075, "grad_norm": 0.11118346452713013, "learning_rate": 1.2046396563813922e-06, "loss": 0.0007, "step": 146110 }, { "epoch": 2.3909023971201835, "grad_norm": 0.01566425710916519, "learning_rate": 1.204020005895381e-06, "loss": 0.0004, "step": 146120 }, { "epoch": 2.391066023071259, "grad_norm": 0.06493952125310898, "learning_rate": 1.2034004930045206e-06, "loss": 0.0009, "step": 146130 }, { "epoch": 2.391229649022335, "grad_norm": 0.12279953807592392, "learning_rate": 1.202781117731266e-06, "loss": 0.0024, "step": 146140 }, { "epoch": 2.391393274973411, "grad_norm": 0.010475469753146172, "learning_rate": 1.202161880098071e-06, "loss": 0.0009, "step": 146150 }, { "epoch": 2.3915569009244866, "grad_norm": 0.02797282487154007, "learning_rate": 1.2015427801273787e-06, "loss": 0.0007, "step": 146160 }, { "epoch": 2.3917205268755626, "grad_norm": 0.09259378910064697, "learning_rate": 1.2009238178416326e-06, "loss": 0.0009, "step": 146170 }, { "epoch": 2.391884152826638, "grad_norm": 0.04144535958766937, "learning_rate": 1.200304993263266e-06, "loss": 0.0006, "step": 146180 }, { "epoch": 2.392047778777714, "grad_norm": 0.028234334662556648, "learning_rate": 1.1996863064147118e-06, "loss": 0.0008, "step": 146190 }, { "epoch": 2.39221140472879, "grad_norm": 0.16661839187145233, "learning_rate": 1.1990677573183945e-06, "loss": 0.0021, "step": 146200 }, { "epoch": 2.3923750306798657, "grad_norm": 0.06529664993286133, "learning_rate": 1.1984493459967367e-06, "loss": 0.0011, "step": 146210 }, { "epoch": 2.3925386566309417, "grad_norm": 0.026793505996465683, "learning_rate": 1.1978310724721538e-06, "loss": 0.001, "step": 146220 }, { "epoch": 2.3927022825820177, "grad_norm": 0.042733270674943924, "learning_rate": 1.1972129367670543e-06, "loss": 0.0006, "step": 146230 }, { "epoch": 2.3928659085330932, "grad_norm": 0.02896728552877903, "learning_rate": 1.1965949389038479e-06, "loss": 0.0008, "step": 146240 }, { "epoch": 2.393029534484169, "grad_norm": 0.047306910157203674, "learning_rate": 1.1959770789049324e-06, "loss": 0.0008, "step": 146250 }, { "epoch": 2.393193160435245, "grad_norm": 0.030137227848172188, "learning_rate": 1.1953593567927063e-06, "loss": 0.0005, "step": 146260 }, { "epoch": 2.3933567863863208, "grad_norm": 0.12347330152988434, "learning_rate": 1.1947417725895577e-06, "loss": 0.0007, "step": 146270 }, { "epoch": 2.3935204123373968, "grad_norm": 0.020726656541228294, "learning_rate": 1.1941243263178742e-06, "loss": 0.001, "step": 146280 }, { "epoch": 2.3936840382884723, "grad_norm": 0.007426445838063955, "learning_rate": 1.193507018000038e-06, "loss": 0.0005, "step": 146290 }, { "epoch": 2.3938476642395483, "grad_norm": 0.021827425807714462, "learning_rate": 1.1928898476584228e-06, "loss": 0.0007, "step": 146300 }, { "epoch": 2.3940112901906243, "grad_norm": 0.05956770479679108, "learning_rate": 1.1922728153154012e-06, "loss": 0.0014, "step": 146310 }, { "epoch": 2.3941749161417, "grad_norm": 0.08083844929933548, "learning_rate": 1.1916559209933392e-06, "loss": 0.0011, "step": 146320 }, { "epoch": 2.394338542092776, "grad_norm": 0.04298021271824837, "learning_rate": 1.1910391647145963e-06, "loss": 0.0007, "step": 146330 }, { "epoch": 2.394502168043852, "grad_norm": 0.0014561654534190893, "learning_rate": 1.190422546501528e-06, "loss": 0.0006, "step": 146340 }, { "epoch": 2.3946657939949274, "grad_norm": 0.06279488652944565, "learning_rate": 1.189806066376486e-06, "loss": 0.0004, "step": 146350 }, { "epoch": 2.3948294199460034, "grad_norm": 0.04115691035985947, "learning_rate": 1.1891897243618184e-06, "loss": 0.0007, "step": 146360 }, { "epoch": 2.3949930458970794, "grad_norm": 0.12842579185962677, "learning_rate": 1.1885735204798626e-06, "loss": 0.0009, "step": 146370 }, { "epoch": 2.395156671848155, "grad_norm": 0.008940541185438633, "learning_rate": 1.1879574547529576e-06, "loss": 0.001, "step": 146380 }, { "epoch": 2.395320297799231, "grad_norm": 0.4329315721988678, "learning_rate": 1.187341527203431e-06, "loss": 0.0006, "step": 146390 }, { "epoch": 2.395483923750307, "grad_norm": 0.11849091202020645, "learning_rate": 1.1867257378536128e-06, "loss": 0.0007, "step": 146400 }, { "epoch": 2.3956475497013825, "grad_norm": 0.10733088850975037, "learning_rate": 1.1861100867258197e-06, "loss": 0.0011, "step": 146410 }, { "epoch": 2.3958111756524585, "grad_norm": 0.12135057896375656, "learning_rate": 1.1854945738423711e-06, "loss": 0.0009, "step": 146420 }, { "epoch": 2.3959748016035345, "grad_norm": 0.06494513899087906, "learning_rate": 1.1848791992255764e-06, "loss": 0.001, "step": 146430 }, { "epoch": 2.39613842755461, "grad_norm": 0.044759757816791534, "learning_rate": 1.18426396289774e-06, "loss": 0.0009, "step": 146440 }, { "epoch": 2.396302053505686, "grad_norm": 0.08611297607421875, "learning_rate": 1.183648864881165e-06, "loss": 0.0007, "step": 146450 }, { "epoch": 2.396465679456762, "grad_norm": 0.16296802461147308, "learning_rate": 1.183033905198145e-06, "loss": 0.0008, "step": 146460 }, { "epoch": 2.3966293054078376, "grad_norm": 0.03838413208723068, "learning_rate": 1.1824190838709737e-06, "loss": 0.0008, "step": 146470 }, { "epoch": 2.3967929313589136, "grad_norm": 0.03129525110125542, "learning_rate": 1.1818044009219337e-06, "loss": 0.0009, "step": 146480 }, { "epoch": 2.3969565573099896, "grad_norm": 0.03448084369301796, "learning_rate": 1.1811898563733087e-06, "loss": 0.0003, "step": 146490 }, { "epoch": 2.397120183261065, "grad_norm": 0.06899776309728622, "learning_rate": 1.180575450247372e-06, "loss": 0.0007, "step": 146500 }, { "epoch": 2.397283809212141, "grad_norm": 0.02704421617090702, "learning_rate": 1.1799611825663954e-06, "loss": 0.0004, "step": 146510 }, { "epoch": 2.397447435163217, "grad_norm": 0.06607653200626373, "learning_rate": 1.1793470533526473e-06, "loss": 0.0011, "step": 146520 }, { "epoch": 2.3976110611142927, "grad_norm": 0.028309181332588196, "learning_rate": 1.1787330626283834e-06, "loss": 0.001, "step": 146530 }, { "epoch": 2.3977746870653687, "grad_norm": 0.27555280923843384, "learning_rate": 1.178119210415863e-06, "loss": 0.0006, "step": 146540 }, { "epoch": 2.3979383130164442, "grad_norm": 0.20501168072223663, "learning_rate": 1.177505496737334e-06, "loss": 0.0014, "step": 146550 }, { "epoch": 2.39810193896752, "grad_norm": 0.06874565780162811, "learning_rate": 1.1768919216150437e-06, "loss": 0.0009, "step": 146560 }, { "epoch": 2.398265564918596, "grad_norm": 0.042318861931562424, "learning_rate": 1.1762784850712345e-06, "loss": 0.0006, "step": 146570 }, { "epoch": 2.3984291908696718, "grad_norm": 0.14016929268836975, "learning_rate": 1.1756651871281384e-06, "loss": 0.0014, "step": 146580 }, { "epoch": 2.3985928168207478, "grad_norm": 0.08457785099744797, "learning_rate": 1.175052027807989e-06, "loss": 0.0003, "step": 146590 }, { "epoch": 2.3987564427718238, "grad_norm": 0.16232720017433167, "learning_rate": 1.1744390071330092e-06, "loss": 0.0015, "step": 146600 }, { "epoch": 2.3989200687228993, "grad_norm": 0.026947375386953354, "learning_rate": 1.1738261251254224e-06, "loss": 0.0016, "step": 146610 }, { "epoch": 2.3990836946739753, "grad_norm": 0.15962563455104828, "learning_rate": 1.1732133818074426e-06, "loss": 0.0015, "step": 146620 }, { "epoch": 2.3992473206250513, "grad_norm": 0.02198173850774765, "learning_rate": 1.1726007772012793e-06, "loss": 0.0009, "step": 146630 }, { "epoch": 2.399410946576127, "grad_norm": 0.04337415099143982, "learning_rate": 1.1719883113291397e-06, "loss": 0.0004, "step": 146640 }, { "epoch": 2.399574572527203, "grad_norm": 0.03358631581068039, "learning_rate": 1.1713759842132222e-06, "loss": 0.0006, "step": 146650 }, { "epoch": 2.3997381984782784, "grad_norm": 0.017577163875102997, "learning_rate": 1.1707637958757244e-06, "loss": 0.0007, "step": 146660 }, { "epoch": 2.3999018244293544, "grad_norm": 0.007853773422539234, "learning_rate": 1.1701517463388351e-06, "loss": 0.0003, "step": 146670 }, { "epoch": 2.4000654503804304, "grad_norm": 0.10636381059885025, "learning_rate": 1.1695398356247412e-06, "loss": 0.0008, "step": 146680 }, { "epoch": 2.400229076331506, "grad_norm": 0.0726991817355156, "learning_rate": 1.1689280637556205e-06, "loss": 0.001, "step": 146690 }, { "epoch": 2.400392702282582, "grad_norm": 0.21138660609722137, "learning_rate": 1.168316430753651e-06, "loss": 0.0007, "step": 146700 }, { "epoch": 2.400556328233658, "grad_norm": 0.18228918313980103, "learning_rate": 1.1677049366410008e-06, "loss": 0.0009, "step": 146710 }, { "epoch": 2.4007199541847335, "grad_norm": 0.0521659292280674, "learning_rate": 1.167093581439837e-06, "loss": 0.0007, "step": 146720 }, { "epoch": 2.4008835801358095, "grad_norm": 0.000778741086833179, "learning_rate": 1.166482365172319e-06, "loss": 0.0012, "step": 146730 }, { "epoch": 2.4010472060868855, "grad_norm": 0.11970508843660355, "learning_rate": 1.1658712878606e-06, "loss": 0.0006, "step": 146740 }, { "epoch": 2.401210832037961, "grad_norm": 0.006852451246231794, "learning_rate": 1.1652603495268333e-06, "loss": 0.0009, "step": 146750 }, { "epoch": 2.401374457989037, "grad_norm": 0.0973595604300499, "learning_rate": 1.1646495501931604e-06, "loss": 0.0006, "step": 146760 }, { "epoch": 2.401538083940113, "grad_norm": 0.0892665907740593, "learning_rate": 1.164038889881724e-06, "loss": 0.0012, "step": 146770 }, { "epoch": 2.4017017098911886, "grad_norm": 0.09378153085708618, "learning_rate": 1.1634283686146591e-06, "loss": 0.0007, "step": 146780 }, { "epoch": 2.4018653358422646, "grad_norm": 0.048388123512268066, "learning_rate": 1.1628179864140942e-06, "loss": 0.0006, "step": 146790 }, { "epoch": 2.4020289617933406, "grad_norm": 0.07055846601724625, "learning_rate": 1.1622077433021562e-06, "loss": 0.0006, "step": 146800 }, { "epoch": 2.402192587744416, "grad_norm": 0.07691455632448196, "learning_rate": 1.1615976393009614e-06, "loss": 0.0013, "step": 146810 }, { "epoch": 2.402356213695492, "grad_norm": 0.06037082150578499, "learning_rate": 1.1609876744326303e-06, "loss": 0.0006, "step": 146820 }, { "epoch": 2.402519839646568, "grad_norm": 0.0030878460966050625, "learning_rate": 1.1603778487192663e-06, "loss": 0.0008, "step": 146830 }, { "epoch": 2.4026834655976437, "grad_norm": 0.0932287722826004, "learning_rate": 1.1597681621829771e-06, "loss": 0.0009, "step": 146840 }, { "epoch": 2.4028470915487197, "grad_norm": 0.14290370047092438, "learning_rate": 1.1591586148458633e-06, "loss": 0.0006, "step": 146850 }, { "epoch": 2.4030107174997957, "grad_norm": 0.04808291792869568, "learning_rate": 1.1585492067300174e-06, "loss": 0.0006, "step": 146860 }, { "epoch": 2.403174343450871, "grad_norm": 0.07518628984689713, "learning_rate": 1.1579399378575312e-06, "loss": 0.0004, "step": 146870 }, { "epoch": 2.403337969401947, "grad_norm": 0.07845789194107056, "learning_rate": 1.157330808250487e-06, "loss": 0.0009, "step": 146880 }, { "epoch": 2.403501595353023, "grad_norm": 0.07071030884981155, "learning_rate": 1.1567218179309668e-06, "loss": 0.0009, "step": 146890 }, { "epoch": 2.4036652213040988, "grad_norm": 0.026469646021723747, "learning_rate": 1.1561129669210419e-06, "loss": 0.0009, "step": 146900 }, { "epoch": 2.4038288472551748, "grad_norm": 0.12547431886196136, "learning_rate": 1.1555042552427853e-06, "loss": 0.0011, "step": 146910 }, { "epoch": 2.4039924732062508, "grad_norm": 0.010703513398766518, "learning_rate": 1.1548956829182588e-06, "loss": 0.0007, "step": 146920 }, { "epoch": 2.4041560991573263, "grad_norm": 0.13315097987651825, "learning_rate": 1.1542872499695212e-06, "loss": 0.001, "step": 146930 }, { "epoch": 2.4043197251084023, "grad_norm": 0.04090576246380806, "learning_rate": 1.1536789564186297e-06, "loss": 0.001, "step": 146940 }, { "epoch": 2.404483351059478, "grad_norm": 0.10873720049858093, "learning_rate": 1.1530708022876297e-06, "loss": 0.0008, "step": 146950 }, { "epoch": 2.404646977010554, "grad_norm": 0.038747694343328476, "learning_rate": 1.152462787598569e-06, "loss": 0.0005, "step": 146960 }, { "epoch": 2.40481060296163, "grad_norm": 0.056009504944086075, "learning_rate": 1.1518549123734834e-06, "loss": 0.0026, "step": 146970 }, { "epoch": 2.4049742289127054, "grad_norm": 0.0809401348233223, "learning_rate": 1.1512471766344096e-06, "loss": 0.001, "step": 146980 }, { "epoch": 2.4051378548637814, "grad_norm": 0.05070461705327034, "learning_rate": 1.1506395804033743e-06, "loss": 0.0003, "step": 146990 }, { "epoch": 2.4053014808148574, "grad_norm": 0.016141260042786598, "learning_rate": 1.1500321237024027e-06, "loss": 0.0006, "step": 147000 }, { "epoch": 2.405465106765933, "grad_norm": 0.06742151826620102, "learning_rate": 1.1494248065535146e-06, "loss": 0.0009, "step": 147010 }, { "epoch": 2.405628732717009, "grad_norm": 0.05144858360290527, "learning_rate": 1.1488176289787223e-06, "loss": 0.0008, "step": 147020 }, { "epoch": 2.405792358668085, "grad_norm": 0.27428361773490906, "learning_rate": 1.1482105910000352e-06, "loss": 0.001, "step": 147030 }, { "epoch": 2.4059559846191605, "grad_norm": 0.0735877975821495, "learning_rate": 1.1476036926394557e-06, "loss": 0.0009, "step": 147040 }, { "epoch": 2.4061196105702365, "grad_norm": 0.027983354404568672, "learning_rate": 1.1469969339189829e-06, "loss": 0.0011, "step": 147050 }, { "epoch": 2.406283236521312, "grad_norm": 0.08877575397491455, "learning_rate": 1.1463903148606121e-06, "loss": 0.001, "step": 147060 }, { "epoch": 2.406446862472388, "grad_norm": 0.07310646772384644, "learning_rate": 1.1457838354863293e-06, "loss": 0.0013, "step": 147070 }, { "epoch": 2.406610488423464, "grad_norm": 0.042939409613609314, "learning_rate": 1.1451774958181206e-06, "loss": 0.0012, "step": 147080 }, { "epoch": 2.4067741143745396, "grad_norm": 0.06409641355276108, "learning_rate": 1.1445712958779608e-06, "loss": 0.0012, "step": 147090 }, { "epoch": 2.4069377403256156, "grad_norm": 0.07361728698015213, "learning_rate": 1.1439652356878271e-06, "loss": 0.0008, "step": 147100 }, { "epoch": 2.4071013662766916, "grad_norm": 0.14623193442821503, "learning_rate": 1.1433593152696847e-06, "loss": 0.0006, "step": 147110 }, { "epoch": 2.407264992227767, "grad_norm": 0.15490563213825226, "learning_rate": 1.1427535346454993e-06, "loss": 0.0006, "step": 147120 }, { "epoch": 2.407428618178843, "grad_norm": 0.012476619333028793, "learning_rate": 1.142147893837227e-06, "loss": 0.0006, "step": 147130 }, { "epoch": 2.407592244129919, "grad_norm": 0.03630430996417999, "learning_rate": 1.1415423928668207e-06, "loss": 0.0004, "step": 147140 }, { "epoch": 2.4077558700809947, "grad_norm": 0.0012697308557108045, "learning_rate": 1.14093703175623e-06, "loss": 0.0009, "step": 147150 }, { "epoch": 2.4079194960320707, "grad_norm": 0.013975553214550018, "learning_rate": 1.1403318105273958e-06, "loss": 0.0008, "step": 147160 }, { "epoch": 2.4080831219831467, "grad_norm": 0.08526013791561127, "learning_rate": 1.1397267292022585e-06, "loss": 0.0007, "step": 147170 }, { "epoch": 2.408246747934222, "grad_norm": 0.008147203363478184, "learning_rate": 1.1391217878027477e-06, "loss": 0.0009, "step": 147180 }, { "epoch": 2.408410373885298, "grad_norm": 0.024288080632686615, "learning_rate": 1.1385169863507944e-06, "loss": 0.0019, "step": 147190 }, { "epoch": 2.408573999836374, "grad_norm": 0.14177322387695312, "learning_rate": 1.1379123248683178e-06, "loss": 0.0009, "step": 147200 }, { "epoch": 2.4087376257874498, "grad_norm": 0.06706675887107849, "learning_rate": 1.1373078033772373e-06, "loss": 0.0008, "step": 147210 }, { "epoch": 2.4089012517385258, "grad_norm": 0.029751280322670937, "learning_rate": 1.1367034218994682e-06, "loss": 0.0007, "step": 147220 }, { "epoch": 2.4090648776896018, "grad_norm": 0.06997836381196976, "learning_rate": 1.1360991804569116e-06, "loss": 0.0003, "step": 147230 }, { "epoch": 2.4092285036406773, "grad_norm": 0.02214796468615532, "learning_rate": 1.1354950790714748e-06, "loss": 0.0013, "step": 147240 }, { "epoch": 2.4093921295917533, "grad_norm": 0.047099754214286804, "learning_rate": 1.1348911177650518e-06, "loss": 0.0006, "step": 147250 }, { "epoch": 2.4095557555428293, "grad_norm": 0.04467353597283363, "learning_rate": 1.1342872965595374e-06, "loss": 0.0004, "step": 147260 }, { "epoch": 2.409719381493905, "grad_norm": 0.1443975865840912, "learning_rate": 1.1336836154768167e-06, "loss": 0.0017, "step": 147270 }, { "epoch": 2.409883007444981, "grad_norm": 0.013578618876636028, "learning_rate": 1.1330800745387722e-06, "loss": 0.0006, "step": 147280 }, { "epoch": 2.410046633396057, "grad_norm": 0.025736160576343536, "learning_rate": 1.1324766737672822e-06, "loss": 0.0008, "step": 147290 }, { "epoch": 2.4102102593471324, "grad_norm": 0.009556686505675316, "learning_rate": 1.1318734131842162e-06, "loss": 0.0008, "step": 147300 }, { "epoch": 2.4103738852982084, "grad_norm": 0.0320097953081131, "learning_rate": 1.131270292811444e-06, "loss": 0.0014, "step": 147310 }, { "epoch": 2.410537511249284, "grad_norm": 0.11312876641750336, "learning_rate": 1.1306673126708245e-06, "loss": 0.0008, "step": 147320 }, { "epoch": 2.41070113720036, "grad_norm": 0.0021174512803554535, "learning_rate": 1.1300644727842147e-06, "loss": 0.0008, "step": 147330 }, { "epoch": 2.410864763151436, "grad_norm": 0.05402659252285957, "learning_rate": 1.1294617731734676e-06, "loss": 0.0008, "step": 147340 }, { "epoch": 2.4110283891025115, "grad_norm": 0.0137928556650877, "learning_rate": 1.1288592138604276e-06, "loss": 0.0009, "step": 147350 }, { "epoch": 2.4111920150535875, "grad_norm": 0.06480717658996582, "learning_rate": 1.1282567948669383e-06, "loss": 0.0005, "step": 147360 }, { "epoch": 2.4113556410046635, "grad_norm": 0.006785198114812374, "learning_rate": 1.1276545162148327e-06, "loss": 0.0009, "step": 147370 }, { "epoch": 2.411519266955739, "grad_norm": 0.08239125460386276, "learning_rate": 1.1270523779259463e-06, "loss": 0.0008, "step": 147380 }, { "epoch": 2.411682892906815, "grad_norm": 0.06658472865819931, "learning_rate": 1.1264503800221005e-06, "loss": 0.0005, "step": 147390 }, { "epoch": 2.411846518857891, "grad_norm": 0.062044478952884674, "learning_rate": 1.1258485225251203e-06, "loss": 0.0006, "step": 147400 }, { "epoch": 2.4120101448089666, "grad_norm": 0.11041797697544098, "learning_rate": 1.125246805456819e-06, "loss": 0.0008, "step": 147410 }, { "epoch": 2.4121737707600426, "grad_norm": 0.06895242631435394, "learning_rate": 1.1246452288390092e-06, "loss": 0.0017, "step": 147420 }, { "epoch": 2.4123373967111186, "grad_norm": 0.0976947769522667, "learning_rate": 1.1240437926934961e-06, "loss": 0.0008, "step": 147430 }, { "epoch": 2.412501022662194, "grad_norm": 0.011171468533575535, "learning_rate": 1.1234424970420776e-06, "loss": 0.0006, "step": 147440 }, { "epoch": 2.41266464861327, "grad_norm": 0.01117353979498148, "learning_rate": 1.1228413419065537e-06, "loss": 0.0005, "step": 147450 }, { "epoch": 2.4128282745643457, "grad_norm": 0.1373976618051529, "learning_rate": 1.1222403273087112e-06, "loss": 0.0009, "step": 147460 }, { "epoch": 2.4129919005154217, "grad_norm": 0.11558207124471664, "learning_rate": 1.1216394532703378e-06, "loss": 0.0004, "step": 147470 }, { "epoch": 2.4131555264664977, "grad_norm": 0.045516468584537506, "learning_rate": 1.1210387198132123e-06, "loss": 0.0009, "step": 147480 }, { "epoch": 2.413319152417573, "grad_norm": 0.07894159108400345, "learning_rate": 1.1204381269591097e-06, "loss": 0.001, "step": 147490 }, { "epoch": 2.413482778368649, "grad_norm": 0.10689076781272888, "learning_rate": 1.1198376747298022e-06, "loss": 0.0007, "step": 147500 }, { "epoch": 2.413646404319725, "grad_norm": 0.03708473592996597, "learning_rate": 1.1192373631470516e-06, "loss": 0.0005, "step": 147510 }, { "epoch": 2.4138100302708008, "grad_norm": 0.01977674663066864, "learning_rate": 1.1186371922326228e-06, "loss": 0.0008, "step": 147520 }, { "epoch": 2.4139736562218768, "grad_norm": 0.005746379029005766, "learning_rate": 1.1180371620082641e-06, "loss": 0.0017, "step": 147530 }, { "epoch": 2.4141372821729528, "grad_norm": 0.08772705495357513, "learning_rate": 1.1174372724957294e-06, "loss": 0.0011, "step": 147540 }, { "epoch": 2.4143009081240283, "grad_norm": 0.1947985142469406, "learning_rate": 1.116837523716761e-06, "loss": 0.0012, "step": 147550 }, { "epoch": 2.4144645340751043, "grad_norm": 0.07773470133543015, "learning_rate": 1.1162379156930997e-06, "loss": 0.0005, "step": 147560 }, { "epoch": 2.4146281600261803, "grad_norm": 0.017453191801905632, "learning_rate": 1.1156384484464805e-06, "loss": 0.0005, "step": 147570 }, { "epoch": 2.414791785977256, "grad_norm": 0.096875861287117, "learning_rate": 1.11503912199863e-06, "loss": 0.0004, "step": 147580 }, { "epoch": 2.414955411928332, "grad_norm": 0.04409999027848244, "learning_rate": 1.114439936371276e-06, "loss": 0.0006, "step": 147590 }, { "epoch": 2.415119037879408, "grad_norm": 0.008241796866059303, "learning_rate": 1.1138408915861343e-06, "loss": 0.0009, "step": 147600 }, { "epoch": 2.4152826638304834, "grad_norm": 0.013936730101704597, "learning_rate": 1.1132419876649214e-06, "loss": 0.0006, "step": 147610 }, { "epoch": 2.4154462897815594, "grad_norm": 0.05870835483074188, "learning_rate": 1.112643224629345e-06, "loss": 0.0007, "step": 147620 }, { "epoch": 2.4156099157326354, "grad_norm": 0.20783457159996033, "learning_rate": 1.1120446025011071e-06, "loss": 0.0016, "step": 147630 }, { "epoch": 2.415773541683711, "grad_norm": 0.14202994108200073, "learning_rate": 1.111446121301909e-06, "loss": 0.0008, "step": 147640 }, { "epoch": 2.415937167634787, "grad_norm": 0.027852363884449005, "learning_rate": 1.1108477810534423e-06, "loss": 0.0006, "step": 147650 }, { "epoch": 2.416100793585863, "grad_norm": 0.02043231576681137, "learning_rate": 1.1102495817773973e-06, "loss": 0.0004, "step": 147660 }, { "epoch": 2.4162644195369385, "grad_norm": 0.12623021006584167, "learning_rate": 1.1096515234954547e-06, "loss": 0.0008, "step": 147670 }, { "epoch": 2.4164280454880145, "grad_norm": 0.12809127569198608, "learning_rate": 1.1090536062292956e-06, "loss": 0.0008, "step": 147680 }, { "epoch": 2.4165916714390905, "grad_norm": 0.013466783799231052, "learning_rate": 1.1084558300005905e-06, "loss": 0.0013, "step": 147690 }, { "epoch": 2.416755297390166, "grad_norm": 0.04727175831794739, "learning_rate": 1.1078581948310096e-06, "loss": 0.0006, "step": 147700 }, { "epoch": 2.416918923341242, "grad_norm": 0.12849493324756622, "learning_rate": 1.1072607007422136e-06, "loss": 0.0013, "step": 147710 }, { "epoch": 2.4170825492923176, "grad_norm": 0.12960892915725708, "learning_rate": 1.106663347755861e-06, "loss": 0.0007, "step": 147720 }, { "epoch": 2.4172461752433936, "grad_norm": 0.06974505633115768, "learning_rate": 1.1060661358936076e-06, "loss": 0.0009, "step": 147730 }, { "epoch": 2.4174098011944696, "grad_norm": 0.040147941559553146, "learning_rate": 1.105469065177095e-06, "loss": 0.0008, "step": 147740 }, { "epoch": 2.417573427145545, "grad_norm": 0.08561286330223083, "learning_rate": 1.1048721356279695e-06, "loss": 0.0009, "step": 147750 }, { "epoch": 2.417737053096621, "grad_norm": 0.05234234780073166, "learning_rate": 1.1042753472678663e-06, "loss": 0.0008, "step": 147760 }, { "epoch": 2.417900679047697, "grad_norm": 0.11896946281194687, "learning_rate": 1.103678700118419e-06, "loss": 0.0008, "step": 147770 }, { "epoch": 2.4180643049987727, "grad_norm": 0.06305025517940521, "learning_rate": 1.1030821942012553e-06, "loss": 0.0008, "step": 147780 }, { "epoch": 2.4182279309498487, "grad_norm": 0.0925489142537117, "learning_rate": 1.1024858295379949e-06, "loss": 0.0008, "step": 147790 }, { "epoch": 2.4183915569009247, "grad_norm": 0.009487279690802097, "learning_rate": 1.101889606150257e-06, "loss": 0.0008, "step": 147800 }, { "epoch": 2.418555182852, "grad_norm": 0.31293049454689026, "learning_rate": 1.1012935240596506e-06, "loss": 0.0011, "step": 147810 }, { "epoch": 2.418718808803076, "grad_norm": 0.06797828525304794, "learning_rate": 1.1006975832877853e-06, "loss": 0.0008, "step": 147820 }, { "epoch": 2.4188824347541518, "grad_norm": 0.073615662753582, "learning_rate": 1.1001017838562606e-06, "loss": 0.0007, "step": 147830 }, { "epoch": 2.4190460607052278, "grad_norm": 0.09402614086866379, "learning_rate": 1.0995061257866718e-06, "loss": 0.0012, "step": 147840 }, { "epoch": 2.4192096866563038, "grad_norm": 0.09767060726881027, "learning_rate": 1.0989106091006125e-06, "loss": 0.0008, "step": 147850 }, { "epoch": 2.4193733126073793, "grad_norm": 0.02674877643585205, "learning_rate": 1.0983152338196662e-06, "loss": 0.001, "step": 147860 }, { "epoch": 2.4195369385584553, "grad_norm": 0.022399865090847015, "learning_rate": 1.097719999965417e-06, "loss": 0.001, "step": 147870 }, { "epoch": 2.4197005645095313, "grad_norm": 0.1608082801103592, "learning_rate": 1.0971249075594376e-06, "loss": 0.001, "step": 147880 }, { "epoch": 2.419864190460607, "grad_norm": 0.0173958633095026, "learning_rate": 1.0965299566233007e-06, "loss": 0.0007, "step": 147890 }, { "epoch": 2.420027816411683, "grad_norm": 0.05245708301663399, "learning_rate": 1.09593514717857e-06, "loss": 0.0007, "step": 147900 }, { "epoch": 2.420191442362759, "grad_norm": 0.07192543148994446, "learning_rate": 1.0953404792468086e-06, "loss": 0.0004, "step": 147910 }, { "epoch": 2.4203550683138344, "grad_norm": 0.07834618538618088, "learning_rate": 1.094745952849569e-06, "loss": 0.0004, "step": 147920 }, { "epoch": 2.4205186942649104, "grad_norm": 0.08509614318609238, "learning_rate": 1.0941515680084035e-06, "loss": 0.0007, "step": 147930 }, { "epoch": 2.4206823202159864, "grad_norm": 0.06275741755962372, "learning_rate": 1.0935573247448566e-06, "loss": 0.0012, "step": 147940 }, { "epoch": 2.420845946167062, "grad_norm": 0.13832004368305206, "learning_rate": 1.0929632230804665e-06, "loss": 0.0011, "step": 147950 }, { "epoch": 2.421009572118138, "grad_norm": 0.050170887261629105, "learning_rate": 1.0923692630367704e-06, "loss": 0.0005, "step": 147960 }, { "epoch": 2.421173198069214, "grad_norm": 0.05667343735694885, "learning_rate": 1.0917754446352958e-06, "loss": 0.0005, "step": 147970 }, { "epoch": 2.4213368240202895, "grad_norm": 0.07946646958589554, "learning_rate": 1.0911817678975672e-06, "loss": 0.0008, "step": 147980 }, { "epoch": 2.4215004499713655, "grad_norm": 0.021370291709899902, "learning_rate": 1.0905882328451073e-06, "loss": 0.001, "step": 147990 }, { "epoch": 2.4216640759224415, "grad_norm": 0.033564623445272446, "learning_rate": 1.0899948394994258e-06, "loss": 0.0012, "step": 148000 }, { "epoch": 2.421827701873517, "grad_norm": 0.07143114507198334, "learning_rate": 1.0894015878820358e-06, "loss": 0.0019, "step": 148010 }, { "epoch": 2.421991327824593, "grad_norm": 0.16682365536689758, "learning_rate": 1.0888084780144376e-06, "loss": 0.0009, "step": 148020 }, { "epoch": 2.422154953775669, "grad_norm": 0.06324465572834015, "learning_rate": 1.0882155099181353e-06, "loss": 0.0007, "step": 148030 }, { "epoch": 2.4223185797267446, "grad_norm": 0.05568674951791763, "learning_rate": 1.0876226836146154e-06, "loss": 0.0006, "step": 148040 }, { "epoch": 2.4224822056778206, "grad_norm": 0.02703133225440979, "learning_rate": 1.0870299991253702e-06, "loss": 0.0017, "step": 148050 }, { "epoch": 2.4226458316288966, "grad_norm": 0.07188435643911362, "learning_rate": 1.0864374564718848e-06, "loss": 0.0011, "step": 148060 }, { "epoch": 2.422809457579972, "grad_norm": 0.00939931906759739, "learning_rate": 1.0858450556756338e-06, "loss": 0.0006, "step": 148070 }, { "epoch": 2.422973083531048, "grad_norm": 0.005172068253159523, "learning_rate": 1.0852527967580933e-06, "loss": 0.0004, "step": 148080 }, { "epoch": 2.4231367094821237, "grad_norm": 0.11073308438062668, "learning_rate": 1.0846606797407294e-06, "loss": 0.0013, "step": 148090 }, { "epoch": 2.4233003354331997, "grad_norm": 0.07497929036617279, "learning_rate": 1.0840687046450066e-06, "loss": 0.0008, "step": 148100 }, { "epoch": 2.4234639613842757, "grad_norm": 0.07553837448358536, "learning_rate": 1.08347687149238e-06, "loss": 0.0005, "step": 148110 }, { "epoch": 2.423627587335351, "grad_norm": 0.11249102652072906, "learning_rate": 1.0828851803043055e-06, "loss": 0.0006, "step": 148120 }, { "epoch": 2.423791213286427, "grad_norm": 0.009983002208173275, "learning_rate": 1.082293631102228e-06, "loss": 0.0003, "step": 148130 }, { "epoch": 2.423954839237503, "grad_norm": 0.10860998928546906, "learning_rate": 1.0817022239075897e-06, "loss": 0.0007, "step": 148140 }, { "epoch": 2.4241184651885788, "grad_norm": 0.07956936210393906, "learning_rate": 1.0811109587418294e-06, "loss": 0.0035, "step": 148150 }, { "epoch": 2.4242820911396548, "grad_norm": 0.055588968098163605, "learning_rate": 1.0805198356263768e-06, "loss": 0.0011, "step": 148160 }, { "epoch": 2.4244457170907308, "grad_norm": 0.03204049542546272, "learning_rate": 1.0799288545826613e-06, "loss": 0.001, "step": 148170 }, { "epoch": 2.4246093430418063, "grad_norm": 0.039452750235795975, "learning_rate": 1.079338015632102e-06, "loss": 0.0005, "step": 148180 }, { "epoch": 2.4247729689928823, "grad_norm": 0.026575636118650436, "learning_rate": 1.0787473187961179e-06, "loss": 0.001, "step": 148190 }, { "epoch": 2.4249365949439583, "grad_norm": 0.02265419065952301, "learning_rate": 1.078156764096117e-06, "loss": 0.0005, "step": 148200 }, { "epoch": 2.425100220895034, "grad_norm": 0.3368068039417267, "learning_rate": 1.0775663515535084e-06, "loss": 0.0011, "step": 148210 }, { "epoch": 2.42526384684611, "grad_norm": 0.1913364976644516, "learning_rate": 1.0769760811896934e-06, "loss": 0.001, "step": 148220 }, { "epoch": 2.4254274727971854, "grad_norm": 0.07805593311786652, "learning_rate": 1.0763859530260661e-06, "loss": 0.0026, "step": 148230 }, { "epoch": 2.4255910987482614, "grad_norm": 0.09813379496335983, "learning_rate": 1.0757959670840185e-06, "loss": 0.0014, "step": 148240 }, { "epoch": 2.4257547246993374, "grad_norm": 0.08269993960857391, "learning_rate": 1.0752061233849337e-06, "loss": 0.0007, "step": 148250 }, { "epoch": 2.425918350650413, "grad_norm": 0.03976655378937721, "learning_rate": 1.0746164219501942e-06, "loss": 0.0006, "step": 148260 }, { "epoch": 2.426081976601489, "grad_norm": 0.005758312996476889, "learning_rate": 1.0740268628011763e-06, "loss": 0.0009, "step": 148270 }, { "epoch": 2.426245602552565, "grad_norm": 0.061509136110544205, "learning_rate": 1.0734374459592478e-06, "loss": 0.0013, "step": 148280 }, { "epoch": 2.4264092285036405, "grad_norm": 0.01987549662590027, "learning_rate": 1.0728481714457755e-06, "loss": 0.0006, "step": 148290 }, { "epoch": 2.4265728544547165, "grad_norm": 0.07388292998075485, "learning_rate": 1.0722590392821169e-06, "loss": 0.0013, "step": 148300 }, { "epoch": 2.4267364804057925, "grad_norm": 0.0431431420147419, "learning_rate": 1.0716700494896299e-06, "loss": 0.0003, "step": 148310 }, { "epoch": 2.426900106356868, "grad_norm": 0.27885037660598755, "learning_rate": 1.0710812020896606e-06, "loss": 0.0013, "step": 148320 }, { "epoch": 2.427063732307944, "grad_norm": 0.058474019169807434, "learning_rate": 1.0704924971035557e-06, "loss": 0.0009, "step": 148330 }, { "epoch": 2.42722735825902, "grad_norm": 0.30061018466949463, "learning_rate": 1.069903934552654e-06, "loss": 0.0008, "step": 148340 }, { "epoch": 2.4273909842100956, "grad_norm": 0.050697799772024155, "learning_rate": 1.0693155144582872e-06, "loss": 0.0008, "step": 148350 }, { "epoch": 2.4275546101611716, "grad_norm": 0.13990169763565063, "learning_rate": 1.0687272368417872e-06, "loss": 0.0009, "step": 148360 }, { "epoch": 2.4277182361122476, "grad_norm": 0.021898178383708, "learning_rate": 1.0681391017244752e-06, "loss": 0.0008, "step": 148370 }, { "epoch": 2.427881862063323, "grad_norm": 0.017092812806367874, "learning_rate": 1.0675511091276724e-06, "loss": 0.0006, "step": 148380 }, { "epoch": 2.428045488014399, "grad_norm": 0.10358000546693802, "learning_rate": 1.0669632590726887e-06, "loss": 0.0006, "step": 148390 }, { "epoch": 2.428209113965475, "grad_norm": 0.026845429092645645, "learning_rate": 1.0663755515808355e-06, "loss": 0.0015, "step": 148400 }, { "epoch": 2.4283727399165507, "grad_norm": 0.028029434382915497, "learning_rate": 1.0657879866734129e-06, "loss": 0.0005, "step": 148410 }, { "epoch": 2.4285363658676267, "grad_norm": 0.06742192804813385, "learning_rate": 1.0652005643717201e-06, "loss": 0.0011, "step": 148420 }, { "epoch": 2.4286999918187027, "grad_norm": 0.08109088987112045, "learning_rate": 1.0646132846970531e-06, "loss": 0.0012, "step": 148430 }, { "epoch": 2.428863617769778, "grad_norm": 0.01835411973297596, "learning_rate": 1.0640261476706925e-06, "loss": 0.0006, "step": 148440 }, { "epoch": 2.429027243720854, "grad_norm": 0.09666718542575836, "learning_rate": 1.063439153313926e-06, "loss": 0.001, "step": 148450 }, { "epoch": 2.42919086967193, "grad_norm": 0.026973022148013115, "learning_rate": 1.0628523016480275e-06, "loss": 0.0007, "step": 148460 }, { "epoch": 2.4293544956230058, "grad_norm": 0.0076769450679421425, "learning_rate": 1.0622655926942721e-06, "loss": 0.0013, "step": 148470 }, { "epoch": 2.4295181215740818, "grad_norm": 0.10681433975696564, "learning_rate": 1.061679026473923e-06, "loss": 0.0005, "step": 148480 }, { "epoch": 2.4296817475251573, "grad_norm": 0.15214432775974274, "learning_rate": 1.0610926030082442e-06, "loss": 0.0007, "step": 148490 }, { "epoch": 2.4298453734762333, "grad_norm": 0.08576693385839462, "learning_rate": 1.0605063223184926e-06, "loss": 0.0005, "step": 148500 }, { "epoch": 2.4300089994273093, "grad_norm": 0.0789518728852272, "learning_rate": 1.0599201844259171e-06, "loss": 0.0005, "step": 148510 }, { "epoch": 2.430172625378385, "grad_norm": 0.13777755200862885, "learning_rate": 1.0593341893517666e-06, "loss": 0.001, "step": 148520 }, { "epoch": 2.430336251329461, "grad_norm": 0.11252978444099426, "learning_rate": 1.0587483371172802e-06, "loss": 0.0015, "step": 148530 }, { "epoch": 2.430499877280537, "grad_norm": 0.12324199080467224, "learning_rate": 1.0581626277436936e-06, "loss": 0.0004, "step": 148540 }, { "epoch": 2.4306635032316124, "grad_norm": 0.07565046846866608, "learning_rate": 1.0575770612522385e-06, "loss": 0.0006, "step": 148550 }, { "epoch": 2.4308271291826884, "grad_norm": 0.01156596653163433, "learning_rate": 1.056991637664138e-06, "loss": 0.0008, "step": 148560 }, { "epoch": 2.4309907551337644, "grad_norm": 0.00789470225572586, "learning_rate": 1.0564063570006156e-06, "loss": 0.002, "step": 148570 }, { "epoch": 2.43115438108484, "grad_norm": 0.002704763552173972, "learning_rate": 1.055821219282883e-06, "loss": 0.0013, "step": 148580 }, { "epoch": 2.431318007035916, "grad_norm": 0.0011895995121449232, "learning_rate": 1.0552362245321535e-06, "loss": 0.0012, "step": 148590 }, { "epoch": 2.4314816329869915, "grad_norm": 0.06007545441389084, "learning_rate": 1.0546513727696284e-06, "loss": 0.0004, "step": 148600 }, { "epoch": 2.4316452589380675, "grad_norm": 0.02888408862054348, "learning_rate": 1.0540666640165097e-06, "loss": 0.0005, "step": 148610 }, { "epoch": 2.4318088848891435, "grad_norm": 0.08068275451660156, "learning_rate": 1.0534820982939893e-06, "loss": 0.0013, "step": 148620 }, { "epoch": 2.431972510840219, "grad_norm": 0.05243679881095886, "learning_rate": 1.052897675623259e-06, "loss": 0.0009, "step": 148630 }, { "epoch": 2.432136136791295, "grad_norm": 0.013122976757586002, "learning_rate": 1.0523133960255016e-06, "loss": 0.0008, "step": 148640 }, { "epoch": 2.432299762742371, "grad_norm": 0.037479501217603683, "learning_rate": 1.0517292595218936e-06, "loss": 0.0009, "step": 148650 }, { "epoch": 2.4324633886934466, "grad_norm": 0.05510633438825607, "learning_rate": 1.0511452661336125e-06, "loss": 0.0007, "step": 148660 }, { "epoch": 2.4326270146445226, "grad_norm": 0.09597143530845642, "learning_rate": 1.0505614158818228e-06, "loss": 0.0009, "step": 148670 }, { "epoch": 2.4327906405955986, "grad_norm": 0.07137784361839294, "learning_rate": 1.0499777087876917e-06, "loss": 0.0009, "step": 148680 }, { "epoch": 2.432954266546674, "grad_norm": 0.048212554305791855, "learning_rate": 1.0493941448723726e-06, "loss": 0.001, "step": 148690 }, { "epoch": 2.43311789249775, "grad_norm": 0.044151097536087036, "learning_rate": 1.0488107241570217e-06, "loss": 0.0011, "step": 148700 }, { "epoch": 2.433281518448826, "grad_norm": 0.025755858048796654, "learning_rate": 1.0482274466627868e-06, "loss": 0.0009, "step": 148710 }, { "epoch": 2.4334451443999017, "grad_norm": 0.0961119681596756, "learning_rate": 1.0476443124108071e-06, "loss": 0.0004, "step": 148720 }, { "epoch": 2.4336087703509777, "grad_norm": 0.02293434739112854, "learning_rate": 1.0470613214222253e-06, "loss": 0.0006, "step": 148730 }, { "epoch": 2.4337723963020537, "grad_norm": 0.03176530823111534, "learning_rate": 1.0464784737181666e-06, "loss": 0.0007, "step": 148740 }, { "epoch": 2.433936022253129, "grad_norm": 0.07863471657037735, "learning_rate": 1.045895769319762e-06, "loss": 0.0005, "step": 148750 }, { "epoch": 2.434099648204205, "grad_norm": 0.1033850759267807, "learning_rate": 1.0453132082481326e-06, "loss": 0.0012, "step": 148760 }, { "epoch": 2.434263274155281, "grad_norm": 0.1333463042974472, "learning_rate": 1.044730790524394e-06, "loss": 0.0007, "step": 148770 }, { "epoch": 2.4344269001063568, "grad_norm": 0.04183622822165489, "learning_rate": 1.0441485161696591e-06, "loss": 0.0007, "step": 148780 }, { "epoch": 2.4345905260574328, "grad_norm": 0.15752290189266205, "learning_rate": 1.0435663852050315e-06, "loss": 0.0008, "step": 148790 }, { "epoch": 2.4347541520085088, "grad_norm": 0.023099087178707123, "learning_rate": 1.042984397651615e-06, "loss": 0.0012, "step": 148800 }, { "epoch": 2.4349177779595843, "grad_norm": 0.08653181046247482, "learning_rate": 1.042402553530502e-06, "loss": 0.0011, "step": 148810 }, { "epoch": 2.4350814039106603, "grad_norm": 0.057959381490945816, "learning_rate": 1.041820852862786e-06, "loss": 0.0005, "step": 148820 }, { "epoch": 2.4352450298617363, "grad_norm": 0.06256592273712158, "learning_rate": 1.041239295669551e-06, "loss": 0.0005, "step": 148830 }, { "epoch": 2.435408655812812, "grad_norm": 0.022995127364993095, "learning_rate": 1.0406578819718755e-06, "loss": 0.0006, "step": 148840 }, { "epoch": 2.435572281763888, "grad_norm": 0.06696300953626633, "learning_rate": 1.0400766117908374e-06, "loss": 0.0013, "step": 148850 }, { "epoch": 2.4357359077149634, "grad_norm": 0.0834878534078598, "learning_rate": 1.0394954851475025e-06, "loss": 0.0005, "step": 148860 }, { "epoch": 2.4358995336660394, "grad_norm": 0.032019905745983124, "learning_rate": 1.0389145020629398e-06, "loss": 0.0007, "step": 148870 }, { "epoch": 2.4360631596171154, "grad_norm": 0.1997094303369522, "learning_rate": 1.038333662558204e-06, "loss": 0.0013, "step": 148880 }, { "epoch": 2.436226785568191, "grad_norm": 0.03288447856903076, "learning_rate": 1.037752966654353e-06, "loss": 0.0004, "step": 148890 }, { "epoch": 2.436390411519267, "grad_norm": 0.03944979980587959, "learning_rate": 1.0371724143724326e-06, "loss": 0.0007, "step": 148900 }, { "epoch": 2.436554037470343, "grad_norm": 0.028130657970905304, "learning_rate": 1.0365920057334893e-06, "loss": 0.0007, "step": 148910 }, { "epoch": 2.4367176634214185, "grad_norm": 0.07924254238605499, "learning_rate": 1.0360117407585584e-06, "loss": 0.0012, "step": 148920 }, { "epoch": 2.4368812893724945, "grad_norm": 0.002409644890576601, "learning_rate": 1.0354316194686764e-06, "loss": 0.0006, "step": 148930 }, { "epoch": 2.4370449153235705, "grad_norm": 0.008279784582555294, "learning_rate": 1.0348516418848691e-06, "loss": 0.0003, "step": 148940 }, { "epoch": 2.437208541274646, "grad_norm": 0.051741521805524826, "learning_rate": 1.0342718080281588e-06, "loss": 0.0007, "step": 148950 }, { "epoch": 2.437372167225722, "grad_norm": 0.2284378558397293, "learning_rate": 1.0336921179195653e-06, "loss": 0.0007, "step": 148960 }, { "epoch": 2.437535793176798, "grad_norm": 0.038089849054813385, "learning_rate": 1.0331125715800982e-06, "loss": 0.0005, "step": 148970 }, { "epoch": 2.4376994191278736, "grad_norm": 0.09312382340431213, "learning_rate": 1.0325331690307666e-06, "loss": 0.0009, "step": 148980 }, { "epoch": 2.4378630450789496, "grad_norm": 0.14112703502178192, "learning_rate": 1.031953910292573e-06, "loss": 0.0004, "step": 148990 }, { "epoch": 2.438026671030025, "grad_norm": 0.03761272504925728, "learning_rate": 1.0313747953865123e-06, "loss": 0.0006, "step": 149000 }, { "epoch": 2.438190296981101, "grad_norm": 0.03520971164107323, "learning_rate": 1.0307958243335781e-06, "loss": 0.0008, "step": 149010 }, { "epoch": 2.438353922932177, "grad_norm": 0.016446659341454506, "learning_rate": 1.0302169971547542e-06, "loss": 0.0007, "step": 149020 }, { "epoch": 2.4385175488832527, "grad_norm": 0.04374958574771881, "learning_rate": 1.0296383138710241e-06, "loss": 0.0011, "step": 149030 }, { "epoch": 2.4386811748343287, "grad_norm": 0.07221683114767075, "learning_rate": 1.029059774503363e-06, "loss": 0.0005, "step": 149040 }, { "epoch": 2.4388448007854047, "grad_norm": 0.11075487732887268, "learning_rate": 1.0284813790727395e-06, "loss": 0.0008, "step": 149050 }, { "epoch": 2.43900842673648, "grad_norm": 0.11402343213558197, "learning_rate": 1.0279031276001222e-06, "loss": 0.0008, "step": 149060 }, { "epoch": 2.439172052687556, "grad_norm": 0.1286894679069519, "learning_rate": 1.0273250201064689e-06, "loss": 0.0009, "step": 149070 }, { "epoch": 2.439335678638632, "grad_norm": 0.015726864337921143, "learning_rate": 1.0267470566127363e-06, "loss": 0.0006, "step": 149080 }, { "epoch": 2.4394993045897078, "grad_norm": 0.0982898697257042, "learning_rate": 1.0261692371398724e-06, "loss": 0.0006, "step": 149090 }, { "epoch": 2.4396629305407838, "grad_norm": 0.08235268294811249, "learning_rate": 1.025591561708824e-06, "loss": 0.0005, "step": 149100 }, { "epoch": 2.4398265564918598, "grad_norm": 0.00945932324975729, "learning_rate": 1.0250140303405282e-06, "loss": 0.0005, "step": 149110 }, { "epoch": 2.4399901824429353, "grad_norm": 0.40362250804901123, "learning_rate": 1.024436643055921e-06, "loss": 0.0016, "step": 149120 }, { "epoch": 2.4401538083940113, "grad_norm": 0.07312536239624023, "learning_rate": 1.023859399875931e-06, "loss": 0.0006, "step": 149130 }, { "epoch": 2.4403174343450873, "grad_norm": 0.06973206996917725, "learning_rate": 1.0232823008214798e-06, "loss": 0.0003, "step": 149140 }, { "epoch": 2.440481060296163, "grad_norm": 0.06767765432596207, "learning_rate": 1.0227053459134884e-06, "loss": 0.0006, "step": 149150 }, { "epoch": 2.440644686247239, "grad_norm": 0.10995814204216003, "learning_rate": 1.0221285351728682e-06, "loss": 0.0009, "step": 149160 }, { "epoch": 2.440808312198315, "grad_norm": 0.022389668971300125, "learning_rate": 1.021551868620529e-06, "loss": 0.0011, "step": 149170 }, { "epoch": 2.4409719381493904, "grad_norm": 0.009879834949970245, "learning_rate": 1.0209753462773713e-06, "loss": 0.0006, "step": 149180 }, { "epoch": 2.4411355641004664, "grad_norm": 0.039834361523389816, "learning_rate": 1.0203989681642946e-06, "loss": 0.001, "step": 149190 }, { "epoch": 2.4412991900515424, "grad_norm": 0.061837732791900635, "learning_rate": 1.0198227343021911e-06, "loss": 0.0008, "step": 149200 }, { "epoch": 2.441462816002618, "grad_norm": 0.13374507427215576, "learning_rate": 1.0192466447119464e-06, "loss": 0.0007, "step": 149210 }, { "epoch": 2.441626441953694, "grad_norm": 0.05544538050889969, "learning_rate": 1.0186706994144445e-06, "loss": 0.0008, "step": 149220 }, { "epoch": 2.44179006790477, "grad_norm": 0.0643562600016594, "learning_rate": 1.018094898430561e-06, "loss": 0.0009, "step": 149230 }, { "epoch": 2.4419536938558455, "grad_norm": 0.12697699666023254, "learning_rate": 1.0175192417811675e-06, "loss": 0.0013, "step": 149240 }, { "epoch": 2.4421173198069215, "grad_norm": 0.020932389423251152, "learning_rate": 1.016943729487128e-06, "loss": 0.0009, "step": 149250 }, { "epoch": 2.442280945757997, "grad_norm": 0.025386499240994453, "learning_rate": 1.016368361569306e-06, "loss": 0.0008, "step": 149260 }, { "epoch": 2.442444571709073, "grad_norm": 0.0721283107995987, "learning_rate": 1.015793138048557e-06, "loss": 0.0006, "step": 149270 }, { "epoch": 2.442608197660149, "grad_norm": 0.0813249722123146, "learning_rate": 1.01521805894573e-06, "loss": 0.0006, "step": 149280 }, { "epoch": 2.4427718236112246, "grad_norm": 0.03963208571076393, "learning_rate": 1.0146431242816723e-06, "loss": 0.0006, "step": 149290 }, { "epoch": 2.4429354495623006, "grad_norm": 0.045182742178440094, "learning_rate": 1.0140683340772217e-06, "loss": 0.0006, "step": 149300 }, { "epoch": 2.4430990755133766, "grad_norm": 0.0777864158153534, "learning_rate": 1.0134936883532154e-06, "loss": 0.0006, "step": 149310 }, { "epoch": 2.443262701464452, "grad_norm": 0.07508112490177155, "learning_rate": 1.01291918713048e-06, "loss": 0.001, "step": 149320 }, { "epoch": 2.443426327415528, "grad_norm": 0.041357167065143585, "learning_rate": 1.0123448304298428e-06, "loss": 0.0007, "step": 149330 }, { "epoch": 2.443589953366604, "grad_norm": 0.03501289337873459, "learning_rate": 1.0117706182721215e-06, "loss": 0.0006, "step": 149340 }, { "epoch": 2.4437535793176797, "grad_norm": 0.042439885437488556, "learning_rate": 1.0111965506781286e-06, "loss": 0.0008, "step": 149350 }, { "epoch": 2.4439172052687557, "grad_norm": 0.042123135179281235, "learning_rate": 1.0106226276686753e-06, "loss": 0.0003, "step": 149360 }, { "epoch": 2.444080831219831, "grad_norm": 0.030775081366300583, "learning_rate": 1.0100488492645622e-06, "loss": 0.0009, "step": 149370 }, { "epoch": 2.444244457170907, "grad_norm": 0.04664922133088112, "learning_rate": 1.0094752154865895e-06, "loss": 0.0004, "step": 149380 }, { "epoch": 2.444408083121983, "grad_norm": 0.06221944838762283, "learning_rate": 1.0089017263555483e-06, "loss": 0.0004, "step": 149390 }, { "epoch": 2.4445717090730588, "grad_norm": 0.07560790330171585, "learning_rate": 1.0083283818922284e-06, "loss": 0.0007, "step": 149400 }, { "epoch": 2.4447353350241348, "grad_norm": 0.04249478131532669, "learning_rate": 1.0077551821174097e-06, "loss": 0.0016, "step": 149410 }, { "epoch": 2.4448989609752108, "grad_norm": 0.09271867573261261, "learning_rate": 1.0071821270518705e-06, "loss": 0.0008, "step": 149420 }, { "epoch": 2.4450625869262863, "grad_norm": 0.028555242344737053, "learning_rate": 1.0066092167163855e-06, "loss": 0.0008, "step": 149430 }, { "epoch": 2.4452262128773623, "grad_norm": 0.14769424498081207, "learning_rate": 1.0060364511317161e-06, "loss": 0.0007, "step": 149440 }, { "epoch": 2.4453898388284383, "grad_norm": 0.056885331869125366, "learning_rate": 1.0054638303186271e-06, "loss": 0.0005, "step": 149450 }, { "epoch": 2.445553464779514, "grad_norm": 0.0065961177460849285, "learning_rate": 1.0048913542978721e-06, "loss": 0.0003, "step": 149460 }, { "epoch": 2.44571709073059, "grad_norm": 0.3480302095413208, "learning_rate": 1.0043190230902039e-06, "loss": 0.0016, "step": 149470 }, { "epoch": 2.445880716681666, "grad_norm": 0.004454368259757757, "learning_rate": 1.0037468367163689e-06, "loss": 0.0007, "step": 149480 }, { "epoch": 2.4460443426327414, "grad_norm": 0.11310214549303055, "learning_rate": 1.003174795197105e-06, "loss": 0.0005, "step": 149490 }, { "epoch": 2.4462079685838174, "grad_norm": 0.049962688237428665, "learning_rate": 1.0026028985531499e-06, "loss": 0.0013, "step": 149500 }, { "epoch": 2.4463715945348934, "grad_norm": 0.07255863398313522, "learning_rate": 1.0020311468052312e-06, "loss": 0.0021, "step": 149510 }, { "epoch": 2.446535220485969, "grad_norm": 0.07148192077875137, "learning_rate": 1.0014595399740755e-06, "loss": 0.0008, "step": 149520 }, { "epoch": 2.446698846437045, "grad_norm": 0.10815980285406113, "learning_rate": 1.0008880780804014e-06, "loss": 0.0011, "step": 149530 }, { "epoch": 2.446862472388121, "grad_norm": 0.17929501831531525, "learning_rate": 1.0003167611449215e-06, "loss": 0.0005, "step": 149540 }, { "epoch": 2.4470260983391965, "grad_norm": 0.006514658220112324, "learning_rate": 9.997455891883473e-07, "loss": 0.0008, "step": 149550 }, { "epoch": 2.4471897242902725, "grad_norm": 0.08479084074497223, "learning_rate": 9.991745622313803e-07, "loss": 0.0006, "step": 149560 }, { "epoch": 2.4473533502413485, "grad_norm": 0.18950589001178741, "learning_rate": 9.986036802947207e-07, "loss": 0.0009, "step": 149570 }, { "epoch": 2.447516976192424, "grad_norm": 0.07851356267929077, "learning_rate": 9.980329433990588e-07, "loss": 0.0007, "step": 149580 }, { "epoch": 2.4476806021435, "grad_norm": 0.06302106380462646, "learning_rate": 9.97462351565086e-07, "loss": 0.0003, "step": 149590 }, { "epoch": 2.447844228094576, "grad_norm": 0.1629902571439743, "learning_rate": 9.968919048134818e-07, "loss": 0.0013, "step": 149600 }, { "epoch": 2.4480078540456516, "grad_norm": 0.03731869161128998, "learning_rate": 9.963216031649264e-07, "loss": 0.001, "step": 149610 }, { "epoch": 2.4481714799967276, "grad_norm": 0.06414326280355453, "learning_rate": 9.957514466400885e-07, "loss": 0.0007, "step": 149620 }, { "epoch": 2.4483351059478036, "grad_norm": 0.01729876548051834, "learning_rate": 9.951814352596384e-07, "loss": 0.0006, "step": 149630 }, { "epoch": 2.448498731898879, "grad_norm": 0.20005588233470917, "learning_rate": 9.946115690442354e-07, "loss": 0.0013, "step": 149640 }, { "epoch": 2.448662357849955, "grad_norm": 0.02489263191819191, "learning_rate": 9.940418480145352e-07, "loss": 0.0006, "step": 149650 }, { "epoch": 2.4488259838010307, "grad_norm": 0.017807386815547943, "learning_rate": 9.934722721911915e-07, "loss": 0.0008, "step": 149660 }, { "epoch": 2.4489896097521067, "grad_norm": 0.07528525590896606, "learning_rate": 9.929028415948472e-07, "loss": 0.0007, "step": 149670 }, { "epoch": 2.4491532357031827, "grad_norm": 0.0757269486784935, "learning_rate": 9.92333556246145e-07, "loss": 0.0009, "step": 149680 }, { "epoch": 2.449316861654258, "grad_norm": 0.05382451415061951, "learning_rate": 9.917644161657187e-07, "loss": 0.0006, "step": 149690 }, { "epoch": 2.449480487605334, "grad_norm": 0.04170555621385574, "learning_rate": 9.911954213741981e-07, "loss": 0.0007, "step": 149700 }, { "epoch": 2.44964411355641, "grad_norm": 0.004949782509356737, "learning_rate": 9.906265718922097e-07, "loss": 0.0018, "step": 149710 }, { "epoch": 2.4498077395074858, "grad_norm": 0.14138126373291016, "learning_rate": 9.90057867740371e-07, "loss": 0.0013, "step": 149720 }, { "epoch": 2.4499713654585618, "grad_norm": 0.0234735868871212, "learning_rate": 9.894893089392994e-07, "loss": 0.0014, "step": 149730 }, { "epoch": 2.4501349914096378, "grad_norm": 0.1300615519285202, "learning_rate": 9.889208955095986e-07, "loss": 0.0007, "step": 149740 }, { "epoch": 2.4502986173607133, "grad_norm": 0.045777950435876846, "learning_rate": 9.883526274718752e-07, "loss": 0.0008, "step": 149750 }, { "epoch": 2.4504622433117893, "grad_norm": 0.046468086540699005, "learning_rate": 9.877845048467282e-07, "loss": 0.0003, "step": 149760 }, { "epoch": 2.450625869262865, "grad_norm": 0.050322871655225754, "learning_rate": 9.87216527654749e-07, "loss": 0.0007, "step": 149770 }, { "epoch": 2.450789495213941, "grad_norm": 0.03755843639373779, "learning_rate": 9.866486959165273e-07, "loss": 0.0007, "step": 149780 }, { "epoch": 2.450953121165017, "grad_norm": 0.08474308252334595, "learning_rate": 9.86081009652643e-07, "loss": 0.0005, "step": 149790 }, { "epoch": 2.4511167471160924, "grad_norm": 0.07001464068889618, "learning_rate": 9.855134688836765e-07, "loss": 0.0005, "step": 149800 }, { "epoch": 2.4512803730671684, "grad_norm": 0.06313111633062363, "learning_rate": 9.84946073630197e-07, "loss": 0.0004, "step": 149810 }, { "epoch": 2.4514439990182444, "grad_norm": 0.05203631520271301, "learning_rate": 9.843788239127733e-07, "loss": 0.0006, "step": 149820 }, { "epoch": 2.45160762496932, "grad_norm": 0.021992292255163193, "learning_rate": 9.838117197519648e-07, "loss": 0.0011, "step": 149830 }, { "epoch": 2.451771250920396, "grad_norm": 0.005559555254876614, "learning_rate": 9.832447611683305e-07, "loss": 0.0003, "step": 149840 }, { "epoch": 2.451934876871472, "grad_norm": 0.04270913824439049, "learning_rate": 9.82677948182419e-07, "loss": 0.0008, "step": 149850 }, { "epoch": 2.4520985028225475, "grad_norm": 0.05839170888066292, "learning_rate": 9.821112808147753e-07, "loss": 0.0024, "step": 149860 }, { "epoch": 2.4522621287736235, "grad_norm": 0.06364672631025314, "learning_rate": 9.815447590859428e-07, "loss": 0.0007, "step": 149870 }, { "epoch": 2.4524257547246995, "grad_norm": 0.07619593292474747, "learning_rate": 9.809783830164526e-07, "loss": 0.0011, "step": 149880 }, { "epoch": 2.452589380675775, "grad_norm": 0.009764869697391987, "learning_rate": 9.804121526268385e-07, "loss": 0.0012, "step": 149890 }, { "epoch": 2.452753006626851, "grad_norm": 0.004904359579086304, "learning_rate": 9.798460679376215e-07, "loss": 0.0012, "step": 149900 }, { "epoch": 2.452916632577927, "grad_norm": 0.07469763606786728, "learning_rate": 9.79280128969322e-07, "loss": 0.001, "step": 149910 }, { "epoch": 2.4530802585290026, "grad_norm": 0.007057026494294405, "learning_rate": 9.787143357424562e-07, "loss": 0.0007, "step": 149920 }, { "epoch": 2.4532438844800786, "grad_norm": 0.123782679438591, "learning_rate": 9.781486882775293e-07, "loss": 0.0012, "step": 149930 }, { "epoch": 2.4534075104311546, "grad_norm": 0.004463258199393749, "learning_rate": 9.775831865950487e-07, "loss": 0.0006, "step": 149940 }, { "epoch": 2.45357113638223, "grad_norm": 0.022445421665906906, "learning_rate": 9.77017830715507e-07, "loss": 0.0007, "step": 149950 }, { "epoch": 2.453734762333306, "grad_norm": 0.11141089349985123, "learning_rate": 9.764526206594005e-07, "loss": 0.001, "step": 149960 }, { "epoch": 2.453898388284382, "grad_norm": 0.07449063658714294, "learning_rate": 9.75887556447217e-07, "loss": 0.0009, "step": 149970 }, { "epoch": 2.4540620142354577, "grad_norm": 0.03904491290450096, "learning_rate": 9.753226380994368e-07, "loss": 0.0005, "step": 149980 }, { "epoch": 2.4542256401865337, "grad_norm": 0.020189976319670677, "learning_rate": 9.747578656365397e-07, "loss": 0.0008, "step": 149990 }, { "epoch": 2.4543892661376097, "grad_norm": 0.0536421574652195, "learning_rate": 9.741932390789937e-07, "loss": 0.0012, "step": 150000 }, { "epoch": 2.4543892661376097, "eval_loss": 0.0009711539605632424, "eval_runtime": 3.0967, "eval_samples_per_second": 64.584, "eval_steps_per_second": 16.146, "step": 150000 }, { "epoch": 2.454552892088685, "grad_norm": 0.16856323182582855, "learning_rate": 9.736287584472681e-07, "loss": 0.0017, "step": 150010 }, { "epoch": 2.454716518039761, "grad_norm": 0.038104940205812454, "learning_rate": 9.73064423761822e-07, "loss": 0.0007, "step": 150020 }, { "epoch": 2.4548801439908368, "grad_norm": 0.06241224706172943, "learning_rate": 9.725002350431135e-07, "loss": 0.0008, "step": 150030 }, { "epoch": 2.4550437699419128, "grad_norm": 0.057114940136671066, "learning_rate": 9.719361923115912e-07, "loss": 0.0007, "step": 150040 }, { "epoch": 2.4552073958929888, "grad_norm": 0.09591123461723328, "learning_rate": 9.713722955876997e-07, "loss": 0.0006, "step": 150050 }, { "epoch": 2.4553710218440643, "grad_norm": 0.02816822938621044, "learning_rate": 9.708085448918814e-07, "loss": 0.001, "step": 150060 }, { "epoch": 2.4555346477951403, "grad_norm": 0.11524377763271332, "learning_rate": 9.702449402445674e-07, "loss": 0.0008, "step": 150070 }, { "epoch": 2.4556982737462163, "grad_norm": 0.08896174281835556, "learning_rate": 9.696814816661908e-07, "loss": 0.0019, "step": 150080 }, { "epoch": 2.455861899697292, "grad_norm": 0.006541398353874683, "learning_rate": 9.691181691771727e-07, "loss": 0.0009, "step": 150090 }, { "epoch": 2.456025525648368, "grad_norm": 0.067821204662323, "learning_rate": 9.685550027979346e-07, "loss": 0.0014, "step": 150100 }, { "epoch": 2.456189151599444, "grad_norm": 0.08969834446907043, "learning_rate": 9.679919825488865e-07, "loss": 0.0008, "step": 150110 }, { "epoch": 2.4563527775505194, "grad_norm": 0.08168666809797287, "learning_rate": 9.6742910845044e-07, "loss": 0.0011, "step": 150120 }, { "epoch": 2.4565164035015954, "grad_norm": 0.0837826058268547, "learning_rate": 9.668663805229944e-07, "loss": 0.0009, "step": 150130 }, { "epoch": 2.4566800294526714, "grad_norm": 0.04135086387395859, "learning_rate": 9.663037987869512e-07, "loss": 0.0009, "step": 150140 }, { "epoch": 2.456843655403747, "grad_norm": 0.0828266441822052, "learning_rate": 9.657413632627e-07, "loss": 0.0005, "step": 150150 }, { "epoch": 2.457007281354823, "grad_norm": 0.14552417397499084, "learning_rate": 9.651790739706274e-07, "loss": 0.0011, "step": 150160 }, { "epoch": 2.4571709073058985, "grad_norm": 0.021406356245279312, "learning_rate": 9.646169309311165e-07, "loss": 0.0009, "step": 150170 }, { "epoch": 2.4573345332569745, "grad_norm": 0.08791796863079071, "learning_rate": 9.640549341645423e-07, "loss": 0.0014, "step": 150180 }, { "epoch": 2.4574981592080505, "grad_norm": 0.04786737635731697, "learning_rate": 9.634930836912765e-07, "loss": 0.0008, "step": 150190 }, { "epoch": 2.457661785159126, "grad_norm": 0.0012093438999727368, "learning_rate": 9.629313795316863e-07, "loss": 0.0006, "step": 150200 }, { "epoch": 2.457825411110202, "grad_norm": 0.09320314973592758, "learning_rate": 9.623698217061294e-07, "loss": 0.0007, "step": 150210 }, { "epoch": 2.457989037061278, "grad_norm": 0.0720137283205986, "learning_rate": 9.61808410234964e-07, "loss": 0.0006, "step": 150220 }, { "epoch": 2.4581526630123536, "grad_norm": 0.006113241892307997, "learning_rate": 9.612471451385363e-07, "loss": 0.0009, "step": 150230 }, { "epoch": 2.4583162889634296, "grad_norm": 0.04886533319950104, "learning_rate": 9.606860264371942e-07, "loss": 0.0006, "step": 150240 }, { "epoch": 2.4584799149145056, "grad_norm": 0.059853337705135345, "learning_rate": 9.601250541512758e-07, "loss": 0.0005, "step": 150250 }, { "epoch": 2.458643540865581, "grad_norm": 0.053843386471271515, "learning_rate": 9.595642283011125e-07, "loss": 0.0006, "step": 150260 }, { "epoch": 2.458807166816657, "grad_norm": 0.1918380856513977, "learning_rate": 9.59003548907037e-07, "loss": 0.0018, "step": 150270 }, { "epoch": 2.458970792767733, "grad_norm": 0.11621823906898499, "learning_rate": 9.584430159893687e-07, "loss": 0.0007, "step": 150280 }, { "epoch": 2.4591344187188087, "grad_norm": 0.06775251030921936, "learning_rate": 9.57882629568429e-07, "loss": 0.0009, "step": 150290 }, { "epoch": 2.4592980446698847, "grad_norm": 0.06714984774589539, "learning_rate": 9.573223896645278e-07, "loss": 0.0008, "step": 150300 }, { "epoch": 2.4594616706209607, "grad_norm": 0.04675167426466942, "learning_rate": 9.56762296297975e-07, "loss": 0.0004, "step": 150310 }, { "epoch": 2.459625296572036, "grad_norm": 0.07848186790943146, "learning_rate": 9.562023494890698e-07, "loss": 0.0012, "step": 150320 }, { "epoch": 2.459788922523112, "grad_norm": 0.3382268249988556, "learning_rate": 9.556425492581118e-07, "loss": 0.0023, "step": 150330 }, { "epoch": 2.459952548474188, "grad_norm": 0.002287573181092739, "learning_rate": 9.55082895625391e-07, "loss": 0.0006, "step": 150340 }, { "epoch": 2.4601161744252638, "grad_norm": 0.007926524616777897, "learning_rate": 9.545233886111926e-07, "loss": 0.0007, "step": 150350 }, { "epoch": 2.4602798003763398, "grad_norm": 0.06791617721319199, "learning_rate": 9.539640282357992e-07, "loss": 0.0007, "step": 150360 }, { "epoch": 2.4604434263274158, "grad_norm": 0.047069791704416275, "learning_rate": 9.534048145194841e-07, "loss": 0.001, "step": 150370 }, { "epoch": 2.4606070522784913, "grad_norm": 0.007581259123980999, "learning_rate": 9.528457474825203e-07, "loss": 0.0005, "step": 150380 }, { "epoch": 2.4607706782295673, "grad_norm": 0.13201960921287537, "learning_rate": 9.522868271451696e-07, "loss": 0.0011, "step": 150390 }, { "epoch": 2.4609343041806433, "grad_norm": 0.09165289998054504, "learning_rate": 9.517280535276929e-07, "loss": 0.0013, "step": 150400 }, { "epoch": 2.461097930131719, "grad_norm": 0.02847292274236679, "learning_rate": 9.511694266503457e-07, "loss": 0.0006, "step": 150410 }, { "epoch": 2.461261556082795, "grad_norm": 0.08580125868320465, "learning_rate": 9.506109465333746e-07, "loss": 0.0008, "step": 150420 }, { "epoch": 2.4614251820338704, "grad_norm": 0.0063904342241585255, "learning_rate": 9.500526131970255e-07, "loss": 0.0009, "step": 150430 }, { "epoch": 2.4615888079849464, "grad_norm": 0.15242663025856018, "learning_rate": 9.494944266615352e-07, "loss": 0.0005, "step": 150440 }, { "epoch": 2.4617524339360224, "grad_norm": 0.0859651044011116, "learning_rate": 9.489363869471369e-07, "loss": 0.0009, "step": 150450 }, { "epoch": 2.461916059887098, "grad_norm": 0.31555071473121643, "learning_rate": 9.483784940740571e-07, "loss": 0.0007, "step": 150460 }, { "epoch": 2.462079685838174, "grad_norm": 0.06957252323627472, "learning_rate": 9.47820748062519e-07, "loss": 0.0011, "step": 150470 }, { "epoch": 2.46224331178925, "grad_norm": 0.0669672042131424, "learning_rate": 9.472631489327406e-07, "loss": 0.0007, "step": 150480 }, { "epoch": 2.4624069377403255, "grad_norm": 0.04413294047117233, "learning_rate": 9.467056967049315e-07, "loss": 0.0005, "step": 150490 }, { "epoch": 2.4625705636914015, "grad_norm": 0.058431029319763184, "learning_rate": 9.461483913993003e-07, "loss": 0.0006, "step": 150500 }, { "epoch": 2.4627341896424775, "grad_norm": 0.07261022180318832, "learning_rate": 9.45591233036045e-07, "loss": 0.0014, "step": 150510 }, { "epoch": 2.462897815593553, "grad_norm": 0.02955297753214836, "learning_rate": 9.450342216353647e-07, "loss": 0.0005, "step": 150520 }, { "epoch": 2.463061441544629, "grad_norm": 0.05156468600034714, "learning_rate": 9.444773572174471e-07, "loss": 0.0005, "step": 150530 }, { "epoch": 2.4632250674957046, "grad_norm": 0.027956686913967133, "learning_rate": 9.439206398024786e-07, "loss": 0.0017, "step": 150540 }, { "epoch": 2.4633886934467806, "grad_norm": 0.03266816586256027, "learning_rate": 9.433640694106389e-07, "loss": 0.0011, "step": 150550 }, { "epoch": 2.4635523193978566, "grad_norm": 0.06012602522969246, "learning_rate": 9.428076460621005e-07, "loss": 0.0007, "step": 150560 }, { "epoch": 2.463715945348932, "grad_norm": 0.052492085844278336, "learning_rate": 9.422513697770353e-07, "loss": 0.0005, "step": 150570 }, { "epoch": 2.463879571300008, "grad_norm": 0.07562370598316193, "learning_rate": 9.416952405756036e-07, "loss": 0.0011, "step": 150580 }, { "epoch": 2.464043197251084, "grad_norm": 0.1032552719116211, "learning_rate": 9.411392584779666e-07, "loss": 0.0006, "step": 150590 }, { "epoch": 2.4642068232021597, "grad_norm": 0.0015117744915187359, "learning_rate": 9.405834235042754e-07, "loss": 0.0004, "step": 150600 }, { "epoch": 2.4643704491532357, "grad_norm": 0.06402118504047394, "learning_rate": 9.400277356746801e-07, "loss": 0.0004, "step": 150610 }, { "epoch": 2.4645340751043117, "grad_norm": 0.012864670716226101, "learning_rate": 9.3947219500932e-07, "loss": 0.0005, "step": 150620 }, { "epoch": 2.464697701055387, "grad_norm": 0.07661350816488266, "learning_rate": 9.389168015283334e-07, "loss": 0.0007, "step": 150630 }, { "epoch": 2.464861327006463, "grad_norm": 0.09371276944875717, "learning_rate": 9.383615552518549e-07, "loss": 0.0005, "step": 150640 }, { "epoch": 2.465024952957539, "grad_norm": 0.02587028406560421, "learning_rate": 9.378064562000055e-07, "loss": 0.0009, "step": 150650 }, { "epoch": 2.4651885789086148, "grad_norm": 0.04956110194325447, "learning_rate": 9.372515043929098e-07, "loss": 0.001, "step": 150660 }, { "epoch": 2.4653522048596908, "grad_norm": 0.12506696581840515, "learning_rate": 9.366966998506816e-07, "loss": 0.001, "step": 150670 }, { "epoch": 2.4655158308107668, "grad_norm": 0.11542143672704697, "learning_rate": 9.361420425934325e-07, "loss": 0.0007, "step": 150680 }, { "epoch": 2.4656794567618423, "grad_norm": 0.04239243268966675, "learning_rate": 9.355875326412678e-07, "loss": 0.002, "step": 150690 }, { "epoch": 2.4658430827129183, "grad_norm": 0.1125103160738945, "learning_rate": 9.350331700142856e-07, "loss": 0.0005, "step": 150700 }, { "epoch": 2.4660067086639943, "grad_norm": 0.011683939956128597, "learning_rate": 9.344789547325822e-07, "loss": 0.0009, "step": 150710 }, { "epoch": 2.46617033461507, "grad_norm": 0.10891806334257126, "learning_rate": 9.339248868162443e-07, "loss": 0.0008, "step": 150720 }, { "epoch": 2.466333960566146, "grad_norm": 0.04900682345032692, "learning_rate": 9.333709662853579e-07, "loss": 0.0004, "step": 150730 }, { "epoch": 2.466497586517222, "grad_norm": 0.020068299025297165, "learning_rate": 9.3281719316e-07, "loss": 0.0009, "step": 150740 }, { "epoch": 2.4666612124682974, "grad_norm": 0.00489389430731535, "learning_rate": 9.322635674602421e-07, "loss": 0.0004, "step": 150750 }, { "epoch": 2.4668248384193734, "grad_norm": 0.04533260688185692, "learning_rate": 9.317100892061548e-07, "loss": 0.0012, "step": 150760 }, { "epoch": 2.4669884643704494, "grad_norm": 0.0490998700261116, "learning_rate": 9.311567584177978e-07, "loss": 0.0006, "step": 150770 }, { "epoch": 2.467152090321525, "grad_norm": 0.08508940041065216, "learning_rate": 9.306035751152303e-07, "loss": 0.0005, "step": 150780 }, { "epoch": 2.467315716272601, "grad_norm": 0.1055036187171936, "learning_rate": 9.300505393185017e-07, "loss": 0.0009, "step": 150790 }, { "epoch": 2.4674793422236765, "grad_norm": 0.009024235419929028, "learning_rate": 9.2949765104766e-07, "loss": 0.0002, "step": 150800 }, { "epoch": 2.4676429681747525, "grad_norm": 0.029518017545342445, "learning_rate": 9.289449103227444e-07, "loss": 0.0005, "step": 150810 }, { "epoch": 2.4678065941258285, "grad_norm": 0.00842087808996439, "learning_rate": 9.283923171637921e-07, "loss": 0.0004, "step": 150820 }, { "epoch": 2.467970220076904, "grad_norm": 0.1646094024181366, "learning_rate": 9.278398715908316e-07, "loss": 0.0009, "step": 150830 }, { "epoch": 2.46813384602798, "grad_norm": 0.061742912977933884, "learning_rate": 9.272875736238896e-07, "loss": 0.0007, "step": 150840 }, { "epoch": 2.468297471979056, "grad_norm": 0.07695599645376205, "learning_rate": 9.267354232829845e-07, "loss": 0.0009, "step": 150850 }, { "epoch": 2.4684610979301316, "grad_norm": 0.032638199627399445, "learning_rate": 9.261834205881293e-07, "loss": 0.0007, "step": 150860 }, { "epoch": 2.4686247238812076, "grad_norm": 0.056871455162763596, "learning_rate": 9.256315655593351e-07, "loss": 0.0011, "step": 150870 }, { "epoch": 2.4687883498322836, "grad_norm": 0.002389301545917988, "learning_rate": 9.250798582166027e-07, "loss": 0.0005, "step": 150880 }, { "epoch": 2.468951975783359, "grad_norm": 0.059549376368522644, "learning_rate": 9.245282985799331e-07, "loss": 0.0008, "step": 150890 }, { "epoch": 2.469115601734435, "grad_norm": 0.11891381442546844, "learning_rate": 9.239768866693166e-07, "loss": 0.0004, "step": 150900 }, { "epoch": 2.469279227685511, "grad_norm": 0.05941632017493248, "learning_rate": 9.234256225047411e-07, "loss": 0.0006, "step": 150910 }, { "epoch": 2.4694428536365867, "grad_norm": 0.027064897119998932, "learning_rate": 9.228745061061906e-07, "loss": 0.0003, "step": 150920 }, { "epoch": 2.4696064795876627, "grad_norm": 0.13816119730472565, "learning_rate": 9.223235374936385e-07, "loss": 0.0008, "step": 150930 }, { "epoch": 2.469770105538738, "grad_norm": 0.2471037358045578, "learning_rate": 9.217727166870604e-07, "loss": 0.0011, "step": 150940 }, { "epoch": 2.469933731489814, "grad_norm": 0.04939854517579079, "learning_rate": 9.212220437064173e-07, "loss": 0.0006, "step": 150950 }, { "epoch": 2.47009735744089, "grad_norm": 0.0747624933719635, "learning_rate": 9.206715185716714e-07, "loss": 0.0005, "step": 150960 }, { "epoch": 2.4702609833919658, "grad_norm": 0.43480560183525085, "learning_rate": 9.201211413027805e-07, "loss": 0.0019, "step": 150970 }, { "epoch": 2.4704246093430418, "grad_norm": 0.051545508205890656, "learning_rate": 9.195709119196911e-07, "loss": 0.0006, "step": 150980 }, { "epoch": 2.4705882352941178, "grad_norm": 0.07411473989486694, "learning_rate": 9.190208304423498e-07, "loss": 0.0005, "step": 150990 }, { "epoch": 2.4707518612451933, "grad_norm": 0.19455954432487488, "learning_rate": 9.184708968906942e-07, "loss": 0.0012, "step": 151000 }, { "epoch": 2.4709154871962693, "grad_norm": 0.0461120642721653, "learning_rate": 9.179211112846603e-07, "loss": 0.0006, "step": 151010 }, { "epoch": 2.4710791131473453, "grad_norm": 0.07214047759771347, "learning_rate": 9.173714736441736e-07, "loss": 0.0004, "step": 151020 }, { "epoch": 2.471242739098421, "grad_norm": 0.03791683539748192, "learning_rate": 9.168219839891595e-07, "loss": 0.0006, "step": 151030 }, { "epoch": 2.471406365049497, "grad_norm": 0.03463110700249672, "learning_rate": 9.162726423395357e-07, "loss": 0.0006, "step": 151040 }, { "epoch": 2.471569991000573, "grad_norm": 0.0014401538064703345, "learning_rate": 9.157234487152117e-07, "loss": 0.0006, "step": 151050 }, { "epoch": 2.4717336169516484, "grad_norm": 0.027242766693234444, "learning_rate": 9.151744031360982e-07, "loss": 0.0009, "step": 151060 }, { "epoch": 2.4718972429027244, "grad_norm": 0.03567284718155861, "learning_rate": 9.146255056220932e-07, "loss": 0.0005, "step": 151070 }, { "epoch": 2.4720608688538004, "grad_norm": 0.06301916390657425, "learning_rate": 9.140767561930958e-07, "loss": 0.001, "step": 151080 }, { "epoch": 2.472224494804876, "grad_norm": 0.028930868953466415, "learning_rate": 9.13528154868995e-07, "loss": 0.0006, "step": 151090 }, { "epoch": 2.472388120755952, "grad_norm": 0.05450453981757164, "learning_rate": 9.129797016696778e-07, "loss": 0.0005, "step": 151100 }, { "epoch": 2.472551746707028, "grad_norm": 0.1395966112613678, "learning_rate": 9.124313966150228e-07, "loss": 0.0013, "step": 151110 }, { "epoch": 2.4727153726581035, "grad_norm": 0.04936983436346054, "learning_rate": 9.11883239724905e-07, "loss": 0.0008, "step": 151120 }, { "epoch": 2.4728789986091795, "grad_norm": 0.026668457314372063, "learning_rate": 9.113352310191958e-07, "loss": 0.0018, "step": 151130 }, { "epoch": 2.4730426245602555, "grad_norm": 0.03131609410047531, "learning_rate": 9.107873705177572e-07, "loss": 0.0006, "step": 151140 }, { "epoch": 2.473206250511331, "grad_norm": 0.04698535427451134, "learning_rate": 9.102396582404488e-07, "loss": 0.0008, "step": 151150 }, { "epoch": 2.473369876462407, "grad_norm": 0.07373373955488205, "learning_rate": 9.096920942071219e-07, "loss": 0.0006, "step": 151160 }, { "epoch": 2.473533502413483, "grad_norm": 0.035534121096134186, "learning_rate": 9.091446784376257e-07, "loss": 0.0005, "step": 151170 }, { "epoch": 2.4736971283645586, "grad_norm": 0.12654584646224976, "learning_rate": 9.085974109518043e-07, "loss": 0.0006, "step": 151180 }, { "epoch": 2.4738607543156346, "grad_norm": 0.11844980716705322, "learning_rate": 9.080502917694917e-07, "loss": 0.0011, "step": 151190 }, { "epoch": 2.47402438026671, "grad_norm": 0.03693728148937225, "learning_rate": 9.075033209105222e-07, "loss": 0.0004, "step": 151200 }, { "epoch": 2.474188006217786, "grad_norm": 0.058446429669857025, "learning_rate": 9.069564983947204e-07, "loss": 0.0006, "step": 151210 }, { "epoch": 2.474351632168862, "grad_norm": 0.09844362735748291, "learning_rate": 9.064098242419095e-07, "loss": 0.0005, "step": 151220 }, { "epoch": 2.4745152581199377, "grad_norm": 0.06673082709312439, "learning_rate": 9.05863298471903e-07, "loss": 0.0008, "step": 151230 }, { "epoch": 2.4746788840710137, "grad_norm": 0.06816031038761139, "learning_rate": 9.053169211045127e-07, "loss": 0.0005, "step": 151240 }, { "epoch": 2.4748425100220897, "grad_norm": 0.004089050460606813, "learning_rate": 9.047706921595422e-07, "loss": 0.0004, "step": 151250 }, { "epoch": 2.475006135973165, "grad_norm": 0.13913549482822418, "learning_rate": 9.04224611656791e-07, "loss": 0.0008, "step": 151260 }, { "epoch": 2.475169761924241, "grad_norm": 0.15670907497406006, "learning_rate": 9.036786796160546e-07, "loss": 0.0004, "step": 151270 }, { "epoch": 2.475333387875317, "grad_norm": 0.00462714210152626, "learning_rate": 9.031328960571195e-07, "loss": 0.0007, "step": 151280 }, { "epoch": 2.4754970138263928, "grad_norm": 0.11180935800075531, "learning_rate": 9.025872609997715e-07, "loss": 0.0011, "step": 151290 }, { "epoch": 2.4756606397774688, "grad_norm": 0.13851602375507355, "learning_rate": 9.020417744637866e-07, "loss": 0.0006, "step": 151300 }, { "epoch": 2.4758242657285443, "grad_norm": 0.24512170255184174, "learning_rate": 9.014964364689393e-07, "loss": 0.0013, "step": 151310 }, { "epoch": 2.4759878916796203, "grad_norm": 0.009779210202395916, "learning_rate": 9.009512470349946e-07, "loss": 0.0008, "step": 151320 }, { "epoch": 2.4761515176306963, "grad_norm": 0.007476090453565121, "learning_rate": 9.004062061817148e-07, "loss": 0.0008, "step": 151330 }, { "epoch": 2.476315143581772, "grad_norm": 0.011915738694369793, "learning_rate": 8.998613139288597e-07, "loss": 0.001, "step": 151340 }, { "epoch": 2.476478769532848, "grad_norm": 0.13148239254951477, "learning_rate": 8.993165702961753e-07, "loss": 0.0011, "step": 151350 }, { "epoch": 2.476642395483924, "grad_norm": 0.006391483824700117, "learning_rate": 8.987719753034108e-07, "loss": 0.0007, "step": 151360 }, { "epoch": 2.4768060214349994, "grad_norm": 0.14566324651241302, "learning_rate": 8.982275289703035e-07, "loss": 0.0007, "step": 151370 }, { "epoch": 2.4769696473860754, "grad_norm": 0.039603304117918015, "learning_rate": 8.976832313165912e-07, "loss": 0.0005, "step": 151380 }, { "epoch": 2.4771332733371514, "grad_norm": 0.008537969551980495, "learning_rate": 8.971390823620013e-07, "loss": 0.0008, "step": 151390 }, { "epoch": 2.477296899288227, "grad_norm": 0.1054685190320015, "learning_rate": 8.965950821262582e-07, "loss": 0.0011, "step": 151400 }, { "epoch": 2.477460525239303, "grad_norm": 0.023423688486218452, "learning_rate": 8.960512306290825e-07, "loss": 0.0003, "step": 151410 }, { "epoch": 2.477624151190379, "grad_norm": 0.10136052966117859, "learning_rate": 8.955075278901848e-07, "loss": 0.001, "step": 151420 }, { "epoch": 2.4777877771414545, "grad_norm": 0.14078889787197113, "learning_rate": 8.949639739292759e-07, "loss": 0.0011, "step": 151430 }, { "epoch": 2.4779514030925305, "grad_norm": 0.03205703943967819, "learning_rate": 8.944205687660562e-07, "loss": 0.0006, "step": 151440 }, { "epoch": 2.4781150290436065, "grad_norm": 0.20053310692310333, "learning_rate": 8.938773124202227e-07, "loss": 0.0007, "step": 151450 }, { "epoch": 2.478278654994682, "grad_norm": 0.05311646685004234, "learning_rate": 8.93334204911469e-07, "loss": 0.0006, "step": 151460 }, { "epoch": 2.478442280945758, "grad_norm": 0.025086546316742897, "learning_rate": 8.927912462594784e-07, "loss": 0.0007, "step": 151470 }, { "epoch": 2.478605906896834, "grad_norm": 0.1363171488046646, "learning_rate": 8.922484364839357e-07, "loss": 0.0007, "step": 151480 }, { "epoch": 2.4787695328479096, "grad_norm": 0.1924663633108139, "learning_rate": 8.917057756045128e-07, "loss": 0.0009, "step": 151490 }, { "epoch": 2.4789331587989856, "grad_norm": 0.06424161046743393, "learning_rate": 8.911632636408829e-07, "loss": 0.0016, "step": 151500 }, { "epoch": 2.4790967847500616, "grad_norm": 0.04472389444708824, "learning_rate": 8.906209006127082e-07, "loss": 0.001, "step": 151510 }, { "epoch": 2.479260410701137, "grad_norm": 0.032719388604164124, "learning_rate": 8.900786865396504e-07, "loss": 0.0002, "step": 151520 }, { "epoch": 2.479424036652213, "grad_norm": 0.04274709150195122, "learning_rate": 8.895366214413609e-07, "loss": 0.0016, "step": 151530 }, { "epoch": 2.479587662603289, "grad_norm": 0.11197847872972488, "learning_rate": 8.889947053374909e-07, "loss": 0.0013, "step": 151540 }, { "epoch": 2.4797512885543647, "grad_norm": 0.013463164679706097, "learning_rate": 8.884529382476825e-07, "loss": 0.0007, "step": 151550 }, { "epoch": 2.4799149145054407, "grad_norm": 0.14517927169799805, "learning_rate": 8.879113201915717e-07, "loss": 0.0013, "step": 151560 }, { "epoch": 2.480078540456516, "grad_norm": 0.10182119160890579, "learning_rate": 8.873698511887935e-07, "loss": 0.0011, "step": 151570 }, { "epoch": 2.480242166407592, "grad_norm": 0.21422433853149414, "learning_rate": 8.868285312589731e-07, "loss": 0.0004, "step": 151580 }, { "epoch": 2.480405792358668, "grad_norm": 0.04126915708184242, "learning_rate": 8.862873604217337e-07, "loss": 0.0007, "step": 151590 }, { "epoch": 2.4805694183097438, "grad_norm": 0.0857510194182396, "learning_rate": 8.857463386966892e-07, "loss": 0.0007, "step": 151600 }, { "epoch": 2.4807330442608198, "grad_norm": 0.013697189278900623, "learning_rate": 8.852054661034509e-07, "loss": 0.0009, "step": 151610 }, { "epoch": 2.4808966702118957, "grad_norm": 0.07937291264533997, "learning_rate": 8.846647426616267e-07, "loss": 0.001, "step": 151620 }, { "epoch": 2.4810602961629713, "grad_norm": 0.08973510563373566, "learning_rate": 8.841241683908136e-07, "loss": 0.0009, "step": 151630 }, { "epoch": 2.4812239221140473, "grad_norm": 0.1182357668876648, "learning_rate": 8.835837433106092e-07, "loss": 0.0009, "step": 151640 }, { "epoch": 2.4813875480651233, "grad_norm": 0.08104902505874634, "learning_rate": 8.830434674405986e-07, "loss": 0.001, "step": 151650 }, { "epoch": 2.481551174016199, "grad_norm": 0.04527677595615387, "learning_rate": 8.825033408003681e-07, "loss": 0.0007, "step": 151660 }, { "epoch": 2.481714799967275, "grad_norm": 0.12170953303575516, "learning_rate": 8.819633634094943e-07, "loss": 0.0012, "step": 151670 }, { "epoch": 2.481878425918351, "grad_norm": 0.06582707166671753, "learning_rate": 8.814235352875511e-07, "loss": 0.0011, "step": 151680 }, { "epoch": 2.4820420518694264, "grad_norm": 0.038412194699048996, "learning_rate": 8.808838564541073e-07, "loss": 0.0009, "step": 151690 }, { "epoch": 2.4822056778205024, "grad_norm": 0.48581215739250183, "learning_rate": 8.803443269287226e-07, "loss": 0.0008, "step": 151700 }, { "epoch": 2.482369303771578, "grad_norm": 0.07216094434261322, "learning_rate": 8.798049467309561e-07, "loss": 0.0011, "step": 151710 }, { "epoch": 2.482532929722654, "grad_norm": 0.12745238840579987, "learning_rate": 8.792657158803558e-07, "loss": 0.0008, "step": 151720 }, { "epoch": 2.48269655567373, "grad_norm": 0.031186701729893684, "learning_rate": 8.787266343964712e-07, "loss": 0.0008, "step": 151730 }, { "epoch": 2.4828601816248055, "grad_norm": 0.1181822195649147, "learning_rate": 8.781877022988405e-07, "loss": 0.0006, "step": 151740 }, { "epoch": 2.4830238075758815, "grad_norm": 0.3319505453109741, "learning_rate": 8.776489196069982e-07, "loss": 0.0008, "step": 151750 }, { "epoch": 2.4831874335269575, "grad_norm": 0.39207926392555237, "learning_rate": 8.771102863404757e-07, "loss": 0.001, "step": 151760 }, { "epoch": 2.483351059478033, "grad_norm": 0.04138851538300514, "learning_rate": 8.765718025187946e-07, "loss": 0.0015, "step": 151770 }, { "epoch": 2.483514685429109, "grad_norm": 0.04688561335206032, "learning_rate": 8.760334681614774e-07, "loss": 0.001, "step": 151780 }, { "epoch": 2.483678311380185, "grad_norm": 0.048823513090610504, "learning_rate": 8.754952832880331e-07, "loss": 0.0009, "step": 151790 }, { "epoch": 2.4838419373312606, "grad_norm": 0.034230902791023254, "learning_rate": 8.749572479179736e-07, "loss": 0.0003, "step": 151800 }, { "epoch": 2.4840055632823366, "grad_norm": 0.0801633670926094, "learning_rate": 8.744193620707986e-07, "loss": 0.0017, "step": 151810 }, { "epoch": 2.4841691892334126, "grad_norm": 0.09538417309522629, "learning_rate": 8.738816257660071e-07, "loss": 0.001, "step": 151820 }, { "epoch": 2.484332815184488, "grad_norm": 0.1383579671382904, "learning_rate": 8.733440390230885e-07, "loss": 0.0004, "step": 151830 }, { "epoch": 2.484496441135564, "grad_norm": 0.06353263556957245, "learning_rate": 8.7280660186153e-07, "loss": 0.0006, "step": 151840 }, { "epoch": 2.48466006708664, "grad_norm": 0.007243297062814236, "learning_rate": 8.72269314300816e-07, "loss": 0.0004, "step": 151850 }, { "epoch": 2.4848236930377157, "grad_norm": 0.0869915783405304, "learning_rate": 8.717321763604153e-07, "loss": 0.0007, "step": 151860 }, { "epoch": 2.4849873189887917, "grad_norm": 0.016138210892677307, "learning_rate": 8.71195188059803e-07, "loss": 0.0006, "step": 151870 }, { "epoch": 2.4851509449398677, "grad_norm": 0.03762826323509216, "learning_rate": 8.706583494184401e-07, "loss": 0.0012, "step": 151880 }, { "epoch": 2.485314570890943, "grad_norm": 0.0773472711443901, "learning_rate": 8.701216604557877e-07, "loss": 0.0006, "step": 151890 }, { "epoch": 2.485478196842019, "grad_norm": 0.03130606561899185, "learning_rate": 8.695851211913009e-07, "loss": 0.0007, "step": 151900 }, { "epoch": 2.485641822793095, "grad_norm": 0.05759549140930176, "learning_rate": 8.690487316444241e-07, "loss": 0.0008, "step": 151910 }, { "epoch": 2.4858054487441708, "grad_norm": 0.05057528242468834, "learning_rate": 8.685124918346043e-07, "loss": 0.001, "step": 151920 }, { "epoch": 2.4859690746952467, "grad_norm": 0.03764611855149269, "learning_rate": 8.679764017812753e-07, "loss": 0.0016, "step": 151930 }, { "epoch": 2.4861327006463227, "grad_norm": 0.5055283308029175, "learning_rate": 8.674404615038717e-07, "loss": 0.0008, "step": 151940 }, { "epoch": 2.4862963265973983, "grad_norm": 0.19533708691596985, "learning_rate": 8.669046710218199e-07, "loss": 0.0007, "step": 151950 }, { "epoch": 2.4864599525484743, "grad_norm": 0.07553951442241669, "learning_rate": 8.66369030354538e-07, "loss": 0.0006, "step": 151960 }, { "epoch": 2.48662357849955, "grad_norm": 0.11274247616529465, "learning_rate": 8.658335395214457e-07, "loss": 0.0006, "step": 151970 }, { "epoch": 2.486787204450626, "grad_norm": 0.0959426611661911, "learning_rate": 8.652981985419495e-07, "loss": 0.001, "step": 151980 }, { "epoch": 2.486950830401702, "grad_norm": 0.07797201722860336, "learning_rate": 8.64763007435458e-07, "loss": 0.0006, "step": 151990 }, { "epoch": 2.4871144563527774, "grad_norm": 0.01973983645439148, "learning_rate": 8.64227966221367e-07, "loss": 0.0006, "step": 152000 }, { "epoch": 2.4872780823038534, "grad_norm": 0.13674390316009521, "learning_rate": 8.636930749190736e-07, "loss": 0.0011, "step": 152010 }, { "epoch": 2.4874417082549294, "grad_norm": 0.05802439898252487, "learning_rate": 8.631583335479632e-07, "loss": 0.0005, "step": 152020 }, { "epoch": 2.487605334206005, "grad_norm": 0.125584214925766, "learning_rate": 8.626237421274225e-07, "loss": 0.001, "step": 152030 }, { "epoch": 2.487768960157081, "grad_norm": 0.18783192336559296, "learning_rate": 8.620893006768255e-07, "loss": 0.0008, "step": 152040 }, { "epoch": 2.487932586108157, "grad_norm": 0.050240159034729004, "learning_rate": 8.615550092155478e-07, "loss": 0.0007, "step": 152050 }, { "epoch": 2.4880962120592325, "grad_norm": 0.023553142324090004, "learning_rate": 8.610208677629539e-07, "loss": 0.0003, "step": 152060 }, { "epoch": 2.4882598380103085, "grad_norm": 0.08275075256824493, "learning_rate": 8.60486876338405e-07, "loss": 0.0011, "step": 152070 }, { "epoch": 2.488423463961384, "grad_norm": 0.09971792250871658, "learning_rate": 8.59953034961259e-07, "loss": 0.0006, "step": 152080 }, { "epoch": 2.48858708991246, "grad_norm": 0.08894790709018707, "learning_rate": 8.594193436508641e-07, "loss": 0.001, "step": 152090 }, { "epoch": 2.488750715863536, "grad_norm": 0.32146868109703064, "learning_rate": 8.588858024265673e-07, "loss": 0.0008, "step": 152100 }, { "epoch": 2.4889143418146116, "grad_norm": 0.07286940515041351, "learning_rate": 8.583524113077058e-07, "loss": 0.0014, "step": 152110 }, { "epoch": 2.4890779677656876, "grad_norm": 0.0459439717233181, "learning_rate": 8.578191703136157e-07, "loss": 0.0008, "step": 152120 }, { "epoch": 2.4892415937167636, "grad_norm": 0.048590224236249924, "learning_rate": 8.572860794636262e-07, "loss": 0.0013, "step": 152130 }, { "epoch": 2.489405219667839, "grad_norm": 0.05707553029060364, "learning_rate": 8.567531387770583e-07, "loss": 0.0009, "step": 152140 }, { "epoch": 2.489568845618915, "grad_norm": 0.08448565006256104, "learning_rate": 8.562203482732334e-07, "loss": 0.0007, "step": 152150 }, { "epoch": 2.489732471569991, "grad_norm": 0.06321943551301956, "learning_rate": 8.556877079714592e-07, "loss": 0.0007, "step": 152160 }, { "epoch": 2.4898960975210667, "grad_norm": 0.0648920014500618, "learning_rate": 8.551552178910455e-07, "loss": 0.0007, "step": 152170 }, { "epoch": 2.4900597234721427, "grad_norm": 0.05136335268616676, "learning_rate": 8.546228780512938e-07, "loss": 0.0009, "step": 152180 }, { "epoch": 2.4902233494232187, "grad_norm": 0.06561985611915588, "learning_rate": 8.540906884714989e-07, "loss": 0.0008, "step": 152190 }, { "epoch": 2.490386975374294, "grad_norm": 0.044398918747901917, "learning_rate": 8.53558649170953e-07, "loss": 0.0007, "step": 152200 }, { "epoch": 2.49055060132537, "grad_norm": 0.07872509956359863, "learning_rate": 8.53026760168939e-07, "loss": 0.0007, "step": 152210 }, { "epoch": 2.490714227276446, "grad_norm": 0.03529883921146393, "learning_rate": 8.524950214847399e-07, "loss": 0.0008, "step": 152220 }, { "epoch": 2.4908778532275218, "grad_norm": 0.27992355823516846, "learning_rate": 8.519634331376264e-07, "loss": 0.0012, "step": 152230 }, { "epoch": 2.4910414791785978, "grad_norm": 0.008263720199465752, "learning_rate": 8.514319951468697e-07, "loss": 0.0003, "step": 152240 }, { "epoch": 2.4912051051296737, "grad_norm": 0.1407913714647293, "learning_rate": 8.509007075317327e-07, "loss": 0.0004, "step": 152250 }, { "epoch": 2.4913687310807493, "grad_norm": 0.044623058289289474, "learning_rate": 8.503695703114717e-07, "loss": 0.0009, "step": 152260 }, { "epoch": 2.4915323570318253, "grad_norm": 0.04765020310878754, "learning_rate": 8.498385835053419e-07, "loss": 0.0009, "step": 152270 }, { "epoch": 2.4916959829829013, "grad_norm": 0.15740187466144562, "learning_rate": 8.493077471325872e-07, "loss": 0.001, "step": 152280 }, { "epoch": 2.491859608933977, "grad_norm": 0.08389198035001755, "learning_rate": 8.487770612124524e-07, "loss": 0.0005, "step": 152290 }, { "epoch": 2.492023234885053, "grad_norm": 0.06761686503887177, "learning_rate": 8.482465257641703e-07, "loss": 0.0009, "step": 152300 }, { "epoch": 2.492186860836129, "grad_norm": 0.03844596445560455, "learning_rate": 8.477161408069745e-07, "loss": 0.0006, "step": 152310 }, { "epoch": 2.4923504867872044, "grad_norm": 0.04087105765938759, "learning_rate": 8.471859063600874e-07, "loss": 0.0008, "step": 152320 }, { "epoch": 2.4925141127382804, "grad_norm": 0.11108768731355667, "learning_rate": 8.466558224427302e-07, "loss": 0.0006, "step": 152330 }, { "epoch": 2.4926777386893564, "grad_norm": 0.1178639605641365, "learning_rate": 8.461258890741186e-07, "loss": 0.0006, "step": 152340 }, { "epoch": 2.492841364640432, "grad_norm": 0.12647679448127747, "learning_rate": 8.455961062734602e-07, "loss": 0.0012, "step": 152350 }, { "epoch": 2.493004990591508, "grad_norm": 0.1367761641740799, "learning_rate": 8.450664740599579e-07, "loss": 0.0009, "step": 152360 }, { "epoch": 2.4931686165425835, "grad_norm": 0.03589765727519989, "learning_rate": 8.44536992452808e-07, "loss": 0.0004, "step": 152370 }, { "epoch": 2.4933322424936595, "grad_norm": 0.017452800646424294, "learning_rate": 8.440076614712056e-07, "loss": 0.0005, "step": 152380 }, { "epoch": 2.4934958684447355, "grad_norm": 0.09697717428207397, "learning_rate": 8.434784811343371e-07, "loss": 0.0007, "step": 152390 }, { "epoch": 2.493659494395811, "grad_norm": 0.04965473338961601, "learning_rate": 8.429494514613834e-07, "loss": 0.0005, "step": 152400 }, { "epoch": 2.493823120346887, "grad_norm": 0.056667428463697433, "learning_rate": 8.424205724715218e-07, "loss": 0.0007, "step": 152410 }, { "epoch": 2.493986746297963, "grad_norm": 0.006122634280472994, "learning_rate": 8.418918441839202e-07, "loss": 0.0008, "step": 152420 }, { "epoch": 2.4941503722490386, "grad_norm": 0.11279958486557007, "learning_rate": 8.413632666177473e-07, "loss": 0.0006, "step": 152430 }, { "epoch": 2.4943139982001146, "grad_norm": 0.03761591762304306, "learning_rate": 8.408348397921601e-07, "loss": 0.001, "step": 152440 }, { "epoch": 2.4944776241511906, "grad_norm": 0.05883323401212692, "learning_rate": 8.403065637263141e-07, "loss": 0.001, "step": 152450 }, { "epoch": 2.494641250102266, "grad_norm": 0.03394457697868347, "learning_rate": 8.397784384393581e-07, "loss": 0.0006, "step": 152460 }, { "epoch": 2.494804876053342, "grad_norm": 0.08883547782897949, "learning_rate": 8.392504639504334e-07, "loss": 0.0016, "step": 152470 }, { "epoch": 2.4949685020044177, "grad_norm": 0.003349594073370099, "learning_rate": 8.387226402786808e-07, "loss": 0.0008, "step": 152480 }, { "epoch": 2.4951321279554937, "grad_norm": 0.06985002756118774, "learning_rate": 8.381949674432294e-07, "loss": 0.0009, "step": 152490 }, { "epoch": 2.4952957539065697, "grad_norm": 0.019473988562822342, "learning_rate": 8.376674454632094e-07, "loss": 0.0004, "step": 152500 }, { "epoch": 2.495459379857645, "grad_norm": 0.0562312975525856, "learning_rate": 8.371400743577391e-07, "loss": 0.0008, "step": 152510 }, { "epoch": 2.495623005808721, "grad_norm": 0.015812261030077934, "learning_rate": 8.366128541459373e-07, "loss": 0.0005, "step": 152520 }, { "epoch": 2.495786631759797, "grad_norm": 0.1904226541519165, "learning_rate": 8.360857848469123e-07, "loss": 0.0009, "step": 152530 }, { "epoch": 2.4959502577108728, "grad_norm": 0.020058510825037956, "learning_rate": 8.355588664797693e-07, "loss": 0.0004, "step": 152540 }, { "epoch": 2.4961138836619488, "grad_norm": 0.14645639061927795, "learning_rate": 8.350320990636112e-07, "loss": 0.0008, "step": 152550 }, { "epoch": 2.4962775096130247, "grad_norm": 0.2425813525915146, "learning_rate": 8.345054826175264e-07, "loss": 0.0011, "step": 152560 }, { "epoch": 2.4964411355641003, "grad_norm": 0.060374826192855835, "learning_rate": 8.339790171606082e-07, "loss": 0.0006, "step": 152570 }, { "epoch": 2.4966047615151763, "grad_norm": 0.05970359221100807, "learning_rate": 8.334527027119361e-07, "loss": 0.0006, "step": 152580 }, { "epoch": 2.4967683874662523, "grad_norm": 0.03606754541397095, "learning_rate": 8.329265392905911e-07, "loss": 0.0008, "step": 152590 }, { "epoch": 2.496932013417328, "grad_norm": 0.016530266031622887, "learning_rate": 8.32400526915642e-07, "loss": 0.0005, "step": 152600 }, { "epoch": 2.497095639368404, "grad_norm": 0.153111532330513, "learning_rate": 8.318746656061577e-07, "loss": 0.0009, "step": 152610 }, { "epoch": 2.49725926531948, "grad_norm": 0.13251042366027832, "learning_rate": 8.313489553811999e-07, "loss": 0.0015, "step": 152620 }, { "epoch": 2.4974228912705554, "grad_norm": 0.06636282056570053, "learning_rate": 8.308233962598228e-07, "loss": 0.0011, "step": 152630 }, { "epoch": 2.4975865172216314, "grad_norm": 0.0069928402081131935, "learning_rate": 8.302979882610779e-07, "loss": 0.0007, "step": 152640 }, { "epoch": 2.4977501431727074, "grad_norm": 0.0771067813038826, "learning_rate": 8.297727314040094e-07, "loss": 0.0006, "step": 152650 }, { "epoch": 2.497913769123783, "grad_norm": 0.1012401208281517, "learning_rate": 8.292476257076548e-07, "loss": 0.0007, "step": 152660 }, { "epoch": 2.498077395074859, "grad_norm": 0.09480570256710052, "learning_rate": 8.287226711910517e-07, "loss": 0.0007, "step": 152670 }, { "epoch": 2.498241021025935, "grad_norm": 0.04493458941578865, "learning_rate": 8.281978678732244e-07, "loss": 0.0007, "step": 152680 }, { "epoch": 2.4984046469770105, "grad_norm": 0.00846731849014759, "learning_rate": 8.276732157731998e-07, "loss": 0.0006, "step": 152690 }, { "epoch": 2.4985682729280865, "grad_norm": 0.009564354084432125, "learning_rate": 8.271487149099911e-07, "loss": 0.0004, "step": 152700 }, { "epoch": 2.4987318988791625, "grad_norm": 0.07969698309898376, "learning_rate": 8.26624365302614e-07, "loss": 0.001, "step": 152710 }, { "epoch": 2.498895524830238, "grad_norm": 0.008087425492703915, "learning_rate": 8.261001669700719e-07, "loss": 0.0004, "step": 152720 }, { "epoch": 2.499059150781314, "grad_norm": 0.15744855999946594, "learning_rate": 8.255761199313683e-07, "loss": 0.0023, "step": 152730 }, { "epoch": 2.4992227767323896, "grad_norm": 0.30521252751350403, "learning_rate": 8.250522242054965e-07, "loss": 0.0018, "step": 152740 }, { "epoch": 2.4993864026834656, "grad_norm": 0.005865911953151226, "learning_rate": 8.245284798114478e-07, "loss": 0.0005, "step": 152750 }, { "epoch": 2.4995500286345416, "grad_norm": 0.0023570482153445482, "learning_rate": 8.240048867682066e-07, "loss": 0.0011, "step": 152760 }, { "epoch": 2.499713654585617, "grad_norm": 0.0015459408750757575, "learning_rate": 8.234814450947503e-07, "loss": 0.0006, "step": 152770 }, { "epoch": 2.499877280536693, "grad_norm": 0.017839189618825912, "learning_rate": 8.229581548100551e-07, "loss": 0.0006, "step": 152780 }, { "epoch": 2.500040906487769, "grad_norm": 0.21259450912475586, "learning_rate": 8.224350159330863e-07, "loss": 0.0016, "step": 152790 }, { "epoch": 2.5002045324388447, "grad_norm": 0.03174861893057823, "learning_rate": 8.219120284828086e-07, "loss": 0.0014, "step": 152800 }, { "epoch": 2.5003681583899207, "grad_norm": 0.29108726978302, "learning_rate": 8.213891924781769e-07, "loss": 0.0013, "step": 152810 }, { "epoch": 2.500531784340996, "grad_norm": 0.15005546808242798, "learning_rate": 8.208665079381439e-07, "loss": 0.0006, "step": 152820 }, { "epoch": 2.500695410292072, "grad_norm": 0.07798822969198227, "learning_rate": 8.203439748816572e-07, "loss": 0.0007, "step": 152830 }, { "epoch": 2.500859036243148, "grad_norm": 0.08350639790296555, "learning_rate": 8.198215933276543e-07, "loss": 0.0007, "step": 152840 }, { "epoch": 2.5010226621942238, "grad_norm": 0.20817913115024567, "learning_rate": 8.192993632950747e-07, "loss": 0.003, "step": 152850 }, { "epoch": 2.5011862881452998, "grad_norm": 0.04662749543786049, "learning_rate": 8.187772848028419e-07, "loss": 0.0019, "step": 152860 }, { "epoch": 2.5013499140963757, "grad_norm": 0.14220450818538666, "learning_rate": 8.182553578698854e-07, "loss": 0.0009, "step": 152870 }, { "epoch": 2.5015135400474513, "grad_norm": 0.08429339528083801, "learning_rate": 8.1773358251512e-07, "loss": 0.0011, "step": 152880 }, { "epoch": 2.5016771659985273, "grad_norm": 0.11262070387601852, "learning_rate": 8.172119587574601e-07, "loss": 0.0006, "step": 152890 }, { "epoch": 2.5018407919496033, "grad_norm": 0.07514145225286484, "learning_rate": 8.166904866158154e-07, "loss": 0.0007, "step": 152900 }, { "epoch": 2.502004417900679, "grad_norm": 0.0766606330871582, "learning_rate": 8.161691661090848e-07, "loss": 0.0011, "step": 152910 }, { "epoch": 2.502168043851755, "grad_norm": 0.05160791799426079, "learning_rate": 8.156479972561677e-07, "loss": 0.0009, "step": 152920 }, { "epoch": 2.502331669802831, "grad_norm": 0.10994260758161545, "learning_rate": 8.15126980075952e-07, "loss": 0.0005, "step": 152930 }, { "epoch": 2.5024952957539064, "grad_norm": 0.0421556755900383, "learning_rate": 8.146061145873269e-07, "loss": 0.0008, "step": 152940 }, { "epoch": 2.5026589217049824, "grad_norm": 0.05681430548429489, "learning_rate": 8.140854008091708e-07, "loss": 0.0008, "step": 152950 }, { "epoch": 2.5028225476560584, "grad_norm": 0.004639185965061188, "learning_rate": 8.135648387603573e-07, "loss": 0.0005, "step": 152960 }, { "epoch": 2.502986173607134, "grad_norm": 0.06267756968736649, "learning_rate": 8.130444284597577e-07, "loss": 0.0008, "step": 152970 }, { "epoch": 2.50314979955821, "grad_norm": 0.051748666912317276, "learning_rate": 8.125241699262331e-07, "loss": 0.0005, "step": 152980 }, { "epoch": 2.503313425509286, "grad_norm": 0.2513732612133026, "learning_rate": 8.120040631786447e-07, "loss": 0.0014, "step": 152990 }, { "epoch": 2.5034770514603615, "grad_norm": 0.007329247891902924, "learning_rate": 8.114841082358415e-07, "loss": 0.0006, "step": 153000 }, { "epoch": 2.5036406774114375, "grad_norm": 0.050909966230392456, "learning_rate": 8.109643051166743e-07, "loss": 0.0013, "step": 153010 }, { "epoch": 2.5038043033625135, "grad_norm": 0.060371242463588715, "learning_rate": 8.104446538399813e-07, "loss": 0.0006, "step": 153020 }, { "epoch": 2.503967929313589, "grad_norm": 0.16189496219158173, "learning_rate": 8.099251544246018e-07, "loss": 0.0012, "step": 153030 }, { "epoch": 2.504131555264665, "grad_norm": 0.027333498001098633, "learning_rate": 8.094058068893634e-07, "loss": 0.0005, "step": 153040 }, { "epoch": 2.504295181215741, "grad_norm": 0.01574457250535488, "learning_rate": 8.088866112530941e-07, "loss": 0.0005, "step": 153050 }, { "epoch": 2.5044588071668166, "grad_norm": 0.08019930124282837, "learning_rate": 8.08367567534612e-07, "loss": 0.0009, "step": 153060 }, { "epoch": 2.5046224331178926, "grad_norm": 0.10814481228590012, "learning_rate": 8.078486757527304e-07, "loss": 0.0006, "step": 153070 }, { "epoch": 2.5047860590689686, "grad_norm": 0.006461639888584614, "learning_rate": 8.073299359262593e-07, "loss": 0.0008, "step": 153080 }, { "epoch": 2.504949685020044, "grad_norm": 0.013817775063216686, "learning_rate": 8.068113480740003e-07, "loss": 0.0005, "step": 153090 }, { "epoch": 2.50511331097112, "grad_norm": 0.06163875758647919, "learning_rate": 8.062929122147522e-07, "loss": 0.0007, "step": 153100 }, { "epoch": 2.505276936922196, "grad_norm": 0.13409602642059326, "learning_rate": 8.057746283673073e-07, "loss": 0.0009, "step": 153110 }, { "epoch": 2.5054405628732717, "grad_norm": 0.03343839943408966, "learning_rate": 8.052564965504506e-07, "loss": 0.0005, "step": 153120 }, { "epoch": 2.5056041888243477, "grad_norm": 0.020452139899134636, "learning_rate": 8.047385167829658e-07, "loss": 0.0009, "step": 153130 }, { "epoch": 2.5057678147754237, "grad_norm": 0.06642342358827591, "learning_rate": 8.042206890836252e-07, "loss": 0.0007, "step": 153140 }, { "epoch": 2.505931440726499, "grad_norm": 0.11334613710641861, "learning_rate": 8.037030134712026e-07, "loss": 0.0011, "step": 153150 }, { "epoch": 2.506095066677575, "grad_norm": 0.08409946411848068, "learning_rate": 8.031854899644575e-07, "loss": 0.0005, "step": 153160 }, { "epoch": 2.5062586926286508, "grad_norm": 0.07164382189512253, "learning_rate": 8.026681185821522e-07, "loss": 0.0007, "step": 153170 }, { "epoch": 2.5064223185797267, "grad_norm": 0.028023742139339447, "learning_rate": 8.021508993430405e-07, "loss": 0.0012, "step": 153180 }, { "epoch": 2.5065859445308027, "grad_norm": 0.06099424883723259, "learning_rate": 8.016338322658679e-07, "loss": 0.0005, "step": 153190 }, { "epoch": 2.5067495704818783, "grad_norm": 0.10231123864650726, "learning_rate": 8.011169173693801e-07, "loss": 0.0005, "step": 153200 }, { "epoch": 2.5069131964329543, "grad_norm": 0.12664766609668732, "learning_rate": 8.006001546723107e-07, "loss": 0.001, "step": 153210 }, { "epoch": 2.50707682238403, "grad_norm": 0.16597691178321838, "learning_rate": 8.000835441933947e-07, "loss": 0.0007, "step": 153220 }, { "epoch": 2.507240448335106, "grad_norm": 0.12338769435882568, "learning_rate": 7.995670859513538e-07, "loss": 0.0009, "step": 153230 }, { "epoch": 2.507404074286182, "grad_norm": 0.03035760670900345, "learning_rate": 7.990507799649122e-07, "loss": 0.0006, "step": 153240 }, { "epoch": 2.5075677002372574, "grad_norm": 0.06313779205083847, "learning_rate": 7.985346262527832e-07, "loss": 0.0004, "step": 153250 }, { "epoch": 2.5077313261883334, "grad_norm": 0.10467544943094254, "learning_rate": 7.980186248336746e-07, "loss": 0.0011, "step": 153260 }, { "epoch": 2.5078949521394094, "grad_norm": 0.01759118027985096, "learning_rate": 7.975027757262932e-07, "loss": 0.0005, "step": 153270 }, { "epoch": 2.508058578090485, "grad_norm": 0.0060456921346485615, "learning_rate": 7.969870789493345e-07, "loss": 0.0005, "step": 153280 }, { "epoch": 2.508222204041561, "grad_norm": 0.014970232732594013, "learning_rate": 7.964715345214935e-07, "loss": 0.0006, "step": 153290 }, { "epoch": 2.508385829992637, "grad_norm": 0.07907494902610779, "learning_rate": 7.959561424614553e-07, "loss": 0.0008, "step": 153300 }, { "epoch": 2.5085494559437125, "grad_norm": 0.13713517785072327, "learning_rate": 7.95440902787904e-07, "loss": 0.0006, "step": 153310 }, { "epoch": 2.5087130818947885, "grad_norm": 0.05684197321534157, "learning_rate": 7.949258155195133e-07, "loss": 0.0004, "step": 153320 }, { "epoch": 2.5088767078458645, "grad_norm": 0.05368838831782341, "learning_rate": 7.944108806749551e-07, "loss": 0.0007, "step": 153330 }, { "epoch": 2.50904033379694, "grad_norm": 0.04662998020648956, "learning_rate": 7.938960982728961e-07, "loss": 0.0006, "step": 153340 }, { "epoch": 2.509203959748016, "grad_norm": 0.04324856027960777, "learning_rate": 7.93381468331994e-07, "loss": 0.0005, "step": 153350 }, { "epoch": 2.509367585699092, "grad_norm": 0.002995358081534505, "learning_rate": 7.928669908709041e-07, "loss": 0.0008, "step": 153360 }, { "epoch": 2.5095312116501676, "grad_norm": 0.0844714492559433, "learning_rate": 7.923526659082725e-07, "loss": 0.0008, "step": 153370 }, { "epoch": 2.5096948376012436, "grad_norm": 0.06279635429382324, "learning_rate": 7.918384934627438e-07, "loss": 0.0006, "step": 153380 }, { "epoch": 2.5098584635523196, "grad_norm": 0.1071927472949028, "learning_rate": 7.913244735529569e-07, "loss": 0.0008, "step": 153390 }, { "epoch": 2.510022089503395, "grad_norm": 0.22083964943885803, "learning_rate": 7.908106061975418e-07, "loss": 0.0007, "step": 153400 }, { "epoch": 2.510185715454471, "grad_norm": 0.11697695404291153, "learning_rate": 7.902968914151265e-07, "loss": 0.001, "step": 153410 }, { "epoch": 2.510349341405547, "grad_norm": 0.0012254059547558427, "learning_rate": 7.897833292243295e-07, "loss": 0.0005, "step": 153420 }, { "epoch": 2.5105129673566227, "grad_norm": 0.0795721560716629, "learning_rate": 7.892699196437698e-07, "loss": 0.0042, "step": 153430 }, { "epoch": 2.5106765933076987, "grad_norm": 0.10146130621433258, "learning_rate": 7.88756662692054e-07, "loss": 0.0006, "step": 153440 }, { "epoch": 2.5108402192587747, "grad_norm": 0.0592157244682312, "learning_rate": 7.882435583877885e-07, "loss": 0.0007, "step": 153450 }, { "epoch": 2.51100384520985, "grad_norm": 0.012290414422750473, "learning_rate": 7.877306067495716e-07, "loss": 0.001, "step": 153460 }, { "epoch": 2.511167471160926, "grad_norm": 0.03896334022283554, "learning_rate": 7.872178077959953e-07, "loss": 0.0009, "step": 153470 }, { "epoch": 2.511331097112002, "grad_norm": 0.006612281780689955, "learning_rate": 7.867051615456489e-07, "loss": 0.0009, "step": 153480 }, { "epoch": 2.5114947230630777, "grad_norm": 0.07035757601261139, "learning_rate": 7.861926680171133e-07, "loss": 0.0011, "step": 153490 }, { "epoch": 2.5116583490141537, "grad_norm": 0.023695295676589012, "learning_rate": 7.85680327228967e-07, "loss": 0.0005, "step": 153500 }, { "epoch": 2.5118219749652297, "grad_norm": 0.10984907299280167, "learning_rate": 7.851681391997784e-07, "loss": 0.001, "step": 153510 }, { "epoch": 2.5119856009163053, "grad_norm": 0.08776549249887466, "learning_rate": 7.846561039481166e-07, "loss": 0.0009, "step": 153520 }, { "epoch": 2.5121492268673813, "grad_norm": 0.08424146473407745, "learning_rate": 7.841442214925376e-07, "loss": 0.0004, "step": 153530 }, { "epoch": 2.512312852818457, "grad_norm": 0.07765801250934601, "learning_rate": 7.836324918515986e-07, "loss": 0.0012, "step": 153540 }, { "epoch": 2.512476478769533, "grad_norm": 0.08758638799190521, "learning_rate": 7.831209150438507e-07, "loss": 0.0014, "step": 153550 }, { "epoch": 2.512640104720609, "grad_norm": 0.1425127536058426, "learning_rate": 7.826094910878318e-07, "loss": 0.0004, "step": 153560 }, { "epoch": 2.5128037306716844, "grad_norm": 0.13368819653987885, "learning_rate": 7.820982200020838e-07, "loss": 0.0008, "step": 153570 }, { "epoch": 2.5129673566227604, "grad_norm": 0.15390488505363464, "learning_rate": 7.815871018051369e-07, "loss": 0.0009, "step": 153580 }, { "epoch": 2.513130982573836, "grad_norm": 0.07005128264427185, "learning_rate": 7.810761365155184e-07, "loss": 0.0008, "step": 153590 }, { "epoch": 2.513294608524912, "grad_norm": 0.01793326810002327, "learning_rate": 7.805653241517519e-07, "loss": 0.0006, "step": 153600 }, { "epoch": 2.513458234475988, "grad_norm": 0.01182679831981659, "learning_rate": 7.800546647323493e-07, "loss": 0.0002, "step": 153610 }, { "epoch": 2.5136218604270635, "grad_norm": 0.07243483513593674, "learning_rate": 7.795441582758245e-07, "loss": 0.0007, "step": 153620 }, { "epoch": 2.5137854863781395, "grad_norm": 0.11097566783428192, "learning_rate": 7.790338048006791e-07, "loss": 0.0006, "step": 153630 }, { "epoch": 2.5139491123292155, "grad_norm": 0.16654329001903534, "learning_rate": 7.785236043254146e-07, "loss": 0.0008, "step": 153640 }, { "epoch": 2.514112738280291, "grad_norm": 0.10942531377077103, "learning_rate": 7.780135568685232e-07, "loss": 0.0008, "step": 153650 }, { "epoch": 2.514276364231367, "grad_norm": 0.009948287159204483, "learning_rate": 7.775036624484922e-07, "loss": 0.0004, "step": 153660 }, { "epoch": 2.514439990182443, "grad_norm": 0.13962295651435852, "learning_rate": 7.769939210838057e-07, "loss": 0.0013, "step": 153670 }, { "epoch": 2.5146036161335186, "grad_norm": 0.05111374706029892, "learning_rate": 7.764843327929389e-07, "loss": 0.0004, "step": 153680 }, { "epoch": 2.5147672420845946, "grad_norm": 0.0080978162586689, "learning_rate": 7.759748975943654e-07, "loss": 0.0008, "step": 153690 }, { "epoch": 2.5149308680356706, "grad_norm": 0.0877828449010849, "learning_rate": 7.754656155065482e-07, "loss": 0.0006, "step": 153700 }, { "epoch": 2.515094493986746, "grad_norm": 0.08099866658449173, "learning_rate": 7.749564865479503e-07, "loss": 0.0009, "step": 153710 }, { "epoch": 2.515258119937822, "grad_norm": 0.06333150714635849, "learning_rate": 7.744475107370241e-07, "loss": 0.0007, "step": 153720 }, { "epoch": 2.515421745888898, "grad_norm": 0.2652546167373657, "learning_rate": 7.739386880922212e-07, "loss": 0.0014, "step": 153730 }, { "epoch": 2.5155853718399737, "grad_norm": 0.00912480615079403, "learning_rate": 7.734300186319821e-07, "loss": 0.0006, "step": 153740 }, { "epoch": 2.5157489977910497, "grad_norm": 0.010948944836854935, "learning_rate": 7.729215023747477e-07, "loss": 0.0008, "step": 153750 }, { "epoch": 2.5159126237421257, "grad_norm": 0.013310256414115429, "learning_rate": 7.7241313933895e-07, "loss": 0.0006, "step": 153760 }, { "epoch": 2.516076249693201, "grad_norm": 0.00677884416654706, "learning_rate": 7.719049295430131e-07, "loss": 0.0008, "step": 153770 }, { "epoch": 2.516239875644277, "grad_norm": 0.1461547166109085, "learning_rate": 7.713968730053623e-07, "loss": 0.0013, "step": 153780 }, { "epoch": 2.516403501595353, "grad_norm": 0.2367231249809265, "learning_rate": 7.708889697444106e-07, "loss": 0.0008, "step": 153790 }, { "epoch": 2.5165671275464288, "grad_norm": 0.14580337703227997, "learning_rate": 7.703812197785704e-07, "loss": 0.0009, "step": 153800 }, { "epoch": 2.5167307534975047, "grad_norm": 0.08432180434465408, "learning_rate": 7.698736231262444e-07, "loss": 0.0024, "step": 153810 }, { "epoch": 2.5168943794485807, "grad_norm": 0.06188319995999336, "learning_rate": 7.693661798058321e-07, "loss": 0.0008, "step": 153820 }, { "epoch": 2.5170580053996563, "grad_norm": 0.06611020863056183, "learning_rate": 7.688588898357296e-07, "loss": 0.0007, "step": 153830 }, { "epoch": 2.5172216313507323, "grad_norm": 0.04763653501868248, "learning_rate": 7.683517532343215e-07, "loss": 0.0005, "step": 153840 }, { "epoch": 2.5173852573018083, "grad_norm": 0.002139064949005842, "learning_rate": 7.678447700199931e-07, "loss": 0.0003, "step": 153850 }, { "epoch": 2.517548883252884, "grad_norm": 0.0751110166311264, "learning_rate": 7.673379402111197e-07, "loss": 0.0005, "step": 153860 }, { "epoch": 2.51771250920396, "grad_norm": 0.042201071977615356, "learning_rate": 7.668312638260722e-07, "loss": 0.0004, "step": 153870 }, { "epoch": 2.517876135155036, "grad_norm": 0.007482283748686314, "learning_rate": 7.663247408832187e-07, "loss": 0.0008, "step": 153880 }, { "epoch": 2.5180397611061114, "grad_norm": 0.09299102425575256, "learning_rate": 7.658183714009166e-07, "loss": 0.0008, "step": 153890 }, { "epoch": 2.5182033870571874, "grad_norm": 0.051814738661050797, "learning_rate": 7.653121553975229e-07, "loss": 0.0009, "step": 153900 }, { "epoch": 2.5183670130082634, "grad_norm": 0.09576883167028427, "learning_rate": 7.648060928913847e-07, "loss": 0.0009, "step": 153910 }, { "epoch": 2.518530638959339, "grad_norm": 0.017623549327254295, "learning_rate": 7.643001839008479e-07, "loss": 0.0004, "step": 153920 }, { "epoch": 2.518694264910415, "grad_norm": 0.14092279970645905, "learning_rate": 7.637944284442478e-07, "loss": 0.0012, "step": 153930 }, { "epoch": 2.5188578908614905, "grad_norm": 0.13379254937171936, "learning_rate": 7.632888265399196e-07, "loss": 0.0006, "step": 153940 }, { "epoch": 2.5190215168125665, "grad_norm": 0.12886199355125427, "learning_rate": 7.627833782061877e-07, "loss": 0.0017, "step": 153950 }, { "epoch": 2.5191851427636425, "grad_norm": 0.05948249250650406, "learning_rate": 7.622780834613752e-07, "loss": 0.0007, "step": 153960 }, { "epoch": 2.519348768714718, "grad_norm": 0.12851360440254211, "learning_rate": 7.617729423237974e-07, "loss": 0.0008, "step": 153970 }, { "epoch": 2.519512394665794, "grad_norm": 0.09617947787046432, "learning_rate": 7.61267954811763e-07, "loss": 0.0005, "step": 153980 }, { "epoch": 2.5196760206168696, "grad_norm": 0.019781513139605522, "learning_rate": 7.60763120943579e-07, "loss": 0.0006, "step": 153990 }, { "epoch": 2.5198396465679456, "grad_norm": 0.1499721109867096, "learning_rate": 7.602584407375413e-07, "loss": 0.0008, "step": 154000 }, { "epoch": 2.5200032725190216, "grad_norm": 0.07409385591745377, "learning_rate": 7.59753914211947e-07, "loss": 0.0005, "step": 154010 }, { "epoch": 2.520166898470097, "grad_norm": 0.019088326022028923, "learning_rate": 7.592495413850809e-07, "loss": 0.0004, "step": 154020 }, { "epoch": 2.520330524421173, "grad_norm": 0.051016125828027725, "learning_rate": 7.587453222752261e-07, "loss": 0.0005, "step": 154030 }, { "epoch": 2.520494150372249, "grad_norm": 0.08586464077234268, "learning_rate": 7.582412569006614e-07, "loss": 0.0005, "step": 154040 }, { "epoch": 2.5206577763233247, "grad_norm": 0.050012800842523575, "learning_rate": 7.577373452796549e-07, "loss": 0.0007, "step": 154050 }, { "epoch": 2.5208214022744007, "grad_norm": 0.4315398335456848, "learning_rate": 7.572335874304759e-07, "loss": 0.0006, "step": 154060 }, { "epoch": 2.5209850282254767, "grad_norm": 0.17931661009788513, "learning_rate": 7.567299833713803e-07, "loss": 0.0013, "step": 154070 }, { "epoch": 2.521148654176552, "grad_norm": 0.09433405101299286, "learning_rate": 7.562265331206253e-07, "loss": 0.0012, "step": 154080 }, { "epoch": 2.521312280127628, "grad_norm": 0.010839276947081089, "learning_rate": 7.557232366964573e-07, "loss": 0.0006, "step": 154090 }, { "epoch": 2.521475906078704, "grad_norm": 0.06988219916820526, "learning_rate": 7.552200941171217e-07, "loss": 0.0014, "step": 154100 }, { "epoch": 2.5216395320297798, "grad_norm": 0.11719708144664764, "learning_rate": 7.547171054008562e-07, "loss": 0.0017, "step": 154110 }, { "epoch": 2.5218031579808557, "grad_norm": 0.08835003525018692, "learning_rate": 7.542142705658917e-07, "loss": 0.0007, "step": 154120 }, { "epoch": 2.5219667839319317, "grad_norm": 0.08042863011360168, "learning_rate": 7.537115896304565e-07, "loss": 0.0007, "step": 154130 }, { "epoch": 2.5221304098830073, "grad_norm": 0.07623101025819778, "learning_rate": 7.532090626127691e-07, "loss": 0.0012, "step": 154140 }, { "epoch": 2.5222940358340833, "grad_norm": 0.07541704922914505, "learning_rate": 7.527066895310475e-07, "loss": 0.001, "step": 154150 }, { "epoch": 2.5224576617851593, "grad_norm": 0.00587494857609272, "learning_rate": 7.522044704035003e-07, "loss": 0.0005, "step": 154160 }, { "epoch": 2.522621287736235, "grad_norm": 0.2748318016529083, "learning_rate": 7.517024052483307e-07, "loss": 0.001, "step": 154170 }, { "epoch": 2.522784913687311, "grad_norm": 0.1162383109331131, "learning_rate": 7.512004940837392e-07, "loss": 0.0005, "step": 154180 }, { "epoch": 2.522948539638387, "grad_norm": 0.08337512612342834, "learning_rate": 7.506987369279173e-07, "loss": 0.0005, "step": 154190 }, { "epoch": 2.5231121655894624, "grad_norm": 0.0536191388964653, "learning_rate": 7.501971337990544e-07, "loss": 0.0006, "step": 154200 }, { "epoch": 2.5232757915405384, "grad_norm": 0.03441673144698143, "learning_rate": 7.496956847153297e-07, "loss": 0.0007, "step": 154210 }, { "epoch": 2.5234394174916144, "grad_norm": 0.020692232996225357, "learning_rate": 7.491943896949227e-07, "loss": 0.0006, "step": 154220 }, { "epoch": 2.52360304344269, "grad_norm": 0.12605896592140198, "learning_rate": 7.486932487560011e-07, "loss": 0.0007, "step": 154230 }, { "epoch": 2.523766669393766, "grad_norm": 0.06521426141262054, "learning_rate": 7.481922619167331e-07, "loss": 0.0008, "step": 154240 }, { "epoch": 2.523930295344842, "grad_norm": 0.08876814693212509, "learning_rate": 7.476914291952753e-07, "loss": 0.0007, "step": 154250 }, { "epoch": 2.5240939212959175, "grad_norm": 0.034678563475608826, "learning_rate": 7.471907506097842e-07, "loss": 0.0011, "step": 154260 }, { "epoch": 2.5242575472469935, "grad_norm": 0.0986066609621048, "learning_rate": 7.466902261784065e-07, "loss": 0.0007, "step": 154270 }, { "epoch": 2.5244211731980695, "grad_norm": 0.03450404107570648, "learning_rate": 7.461898559192848e-07, "loss": 0.0006, "step": 154280 }, { "epoch": 2.524584799149145, "grad_norm": 0.048597030341625214, "learning_rate": 7.456896398505581e-07, "loss": 0.0004, "step": 154290 }, { "epoch": 2.524748425100221, "grad_norm": 0.04881170764565468, "learning_rate": 7.451895779903562e-07, "loss": 0.0012, "step": 154300 }, { "epoch": 2.524912051051297, "grad_norm": 0.19125165045261383, "learning_rate": 7.446896703568057e-07, "loss": 0.0005, "step": 154310 }, { "epoch": 2.5250756770023726, "grad_norm": 0.014020061120390892, "learning_rate": 7.441899169680284e-07, "loss": 0.0009, "step": 154320 }, { "epoch": 2.5252393029534486, "grad_norm": 0.027234815061092377, "learning_rate": 7.436903178421368e-07, "loss": 0.0005, "step": 154330 }, { "epoch": 2.525402928904524, "grad_norm": 0.019534893333911896, "learning_rate": 7.431908729972426e-07, "loss": 0.0008, "step": 154340 }, { "epoch": 2.5255665548556, "grad_norm": 0.030101966112852097, "learning_rate": 7.426915824514474e-07, "loss": 0.0004, "step": 154350 }, { "epoch": 2.5257301808066757, "grad_norm": 0.1376025378704071, "learning_rate": 7.421924462228519e-07, "loss": 0.001, "step": 154360 }, { "epoch": 2.5258938067577517, "grad_norm": 0.13926704227924347, "learning_rate": 7.416934643295454e-07, "loss": 0.0013, "step": 154370 }, { "epoch": 2.5260574327088277, "grad_norm": 0.0640011802315712, "learning_rate": 7.411946367896156e-07, "loss": 0.0006, "step": 154380 }, { "epoch": 2.526221058659903, "grad_norm": 0.04125287011265755, "learning_rate": 7.406959636211458e-07, "loss": 0.0008, "step": 154390 }, { "epoch": 2.526384684610979, "grad_norm": 0.021210316568613052, "learning_rate": 7.40197444842209e-07, "loss": 0.0012, "step": 154400 }, { "epoch": 2.526548310562055, "grad_norm": 0.0033365320414304733, "learning_rate": 7.396990804708781e-07, "loss": 0.0005, "step": 154410 }, { "epoch": 2.5267119365131308, "grad_norm": 0.22003218531608582, "learning_rate": 7.392008705252146e-07, "loss": 0.0013, "step": 154420 }, { "epoch": 2.5268755624642067, "grad_norm": 0.05437715724110603, "learning_rate": 7.387028150232806e-07, "loss": 0.0012, "step": 154430 }, { "epoch": 2.5270391884152827, "grad_norm": 0.07756680995225906, "learning_rate": 7.382049139831266e-07, "loss": 0.001, "step": 154440 }, { "epoch": 2.5272028143663583, "grad_norm": 0.11223863810300827, "learning_rate": 7.377071674228025e-07, "loss": 0.0008, "step": 154450 }, { "epoch": 2.5273664403174343, "grad_norm": 0.046417899429798126, "learning_rate": 7.372095753603492e-07, "loss": 0.001, "step": 154460 }, { "epoch": 2.5275300662685103, "grad_norm": 0.0523376427590847, "learning_rate": 7.367121378138025e-07, "loss": 0.0007, "step": 154470 }, { "epoch": 2.527693692219586, "grad_norm": 0.012180913239717484, "learning_rate": 7.362148548011955e-07, "loss": 0.0015, "step": 154480 }, { "epoch": 2.527857318170662, "grad_norm": 0.11514504253864288, "learning_rate": 7.357177263405513e-07, "loss": 0.0006, "step": 154490 }, { "epoch": 2.528020944121738, "grad_norm": 0.03242136165499687, "learning_rate": 7.352207524498917e-07, "loss": 0.0004, "step": 154500 }, { "epoch": 2.5281845700728134, "grad_norm": 0.02298622950911522, "learning_rate": 7.347239331472278e-07, "loss": 0.0007, "step": 154510 }, { "epoch": 2.5283481960238894, "grad_norm": 0.016909729689359665, "learning_rate": 7.342272684505719e-07, "loss": 0.0009, "step": 154520 }, { "epoch": 2.5285118219749654, "grad_norm": 0.08366072922945023, "learning_rate": 7.337307583779235e-07, "loss": 0.0009, "step": 154530 }, { "epoch": 2.528675447926041, "grad_norm": 0.07496605068445206, "learning_rate": 7.332344029472815e-07, "loss": 0.0004, "step": 154540 }, { "epoch": 2.528839073877117, "grad_norm": 0.006733448710292578, "learning_rate": 7.327382021766388e-07, "loss": 0.0005, "step": 154550 }, { "epoch": 2.529002699828193, "grad_norm": 0.04287673532962799, "learning_rate": 7.322421560839804e-07, "loss": 0.0006, "step": 154560 }, { "epoch": 2.5291663257792685, "grad_norm": 0.07711534202098846, "learning_rate": 7.317462646872864e-07, "loss": 0.0007, "step": 154570 }, { "epoch": 2.5293299517303445, "grad_norm": 0.28493911027908325, "learning_rate": 7.312505280045301e-07, "loss": 0.0006, "step": 154580 }, { "epoch": 2.5294935776814205, "grad_norm": 0.050935205072164536, "learning_rate": 7.307549460536834e-07, "loss": 0.0004, "step": 154590 }, { "epoch": 2.529657203632496, "grad_norm": 0.026742778718471527, "learning_rate": 7.302595188527095e-07, "loss": 0.0006, "step": 154600 }, { "epoch": 2.529820829583572, "grad_norm": 0.0814829170703888, "learning_rate": 7.297642464195647e-07, "loss": 0.0004, "step": 154610 }, { "epoch": 2.529984455534648, "grad_norm": 0.1479707509279251, "learning_rate": 7.292691287722042e-07, "loss": 0.0007, "step": 154620 }, { "epoch": 2.5301480814857236, "grad_norm": 0.11793030798435211, "learning_rate": 7.287741659285725e-07, "loss": 0.0022, "step": 154630 }, { "epoch": 2.5303117074367996, "grad_norm": 0.013708998449146748, "learning_rate": 7.282793579066128e-07, "loss": 0.0009, "step": 154640 }, { "epoch": 2.5304753333878756, "grad_norm": 0.05208819732069969, "learning_rate": 7.277847047242581e-07, "loss": 0.0018, "step": 154650 }, { "epoch": 2.530638959338951, "grad_norm": 0.040433481335639954, "learning_rate": 7.272902063994408e-07, "loss": 0.001, "step": 154660 }, { "epoch": 2.530802585290027, "grad_norm": 0.07328460365533829, "learning_rate": 7.267958629500849e-07, "loss": 0.0008, "step": 154670 }, { "epoch": 2.530966211241103, "grad_norm": 0.14707572758197784, "learning_rate": 7.26301674394107e-07, "loss": 0.002, "step": 154680 }, { "epoch": 2.5311298371921787, "grad_norm": 0.05553380027413368, "learning_rate": 7.25807640749423e-07, "loss": 0.0011, "step": 154690 }, { "epoch": 2.5312934631432547, "grad_norm": 0.07082612812519073, "learning_rate": 7.253137620339384e-07, "loss": 0.0005, "step": 154700 }, { "epoch": 2.53145708909433, "grad_norm": 0.013697193004190922, "learning_rate": 7.248200382655568e-07, "loss": 0.0004, "step": 154710 }, { "epoch": 2.531620715045406, "grad_norm": 0.07054273784160614, "learning_rate": 7.243264694621727e-07, "loss": 0.0007, "step": 154720 }, { "epoch": 2.531784340996482, "grad_norm": 0.057278189808130264, "learning_rate": 7.238330556416789e-07, "loss": 0.0005, "step": 154730 }, { "epoch": 2.5319479669475577, "grad_norm": 0.06561379134654999, "learning_rate": 7.233397968219585e-07, "loss": 0.0017, "step": 154740 }, { "epoch": 2.5321115928986337, "grad_norm": 0.1253882348537445, "learning_rate": 7.228466930208916e-07, "loss": 0.0005, "step": 154750 }, { "epoch": 2.5322752188497093, "grad_norm": 0.03089512698352337, "learning_rate": 7.223537442563544e-07, "loss": 0.0005, "step": 154760 }, { "epoch": 2.5324388448007853, "grad_norm": 0.01702384650707245, "learning_rate": 7.218609505462104e-07, "loss": 0.0007, "step": 154770 }, { "epoch": 2.5326024707518613, "grad_norm": 0.18375924229621887, "learning_rate": 7.213683119083259e-07, "loss": 0.0039, "step": 154780 }, { "epoch": 2.532766096702937, "grad_norm": 0.3570653200149536, "learning_rate": 7.20875828360556e-07, "loss": 0.0009, "step": 154790 }, { "epoch": 2.532929722654013, "grad_norm": 0.09150499105453491, "learning_rate": 7.203834999207521e-07, "loss": 0.0004, "step": 154800 }, { "epoch": 2.533093348605089, "grad_norm": 0.03373308852314949, "learning_rate": 7.198913266067625e-07, "loss": 0.0003, "step": 154810 }, { "epoch": 2.5332569745561644, "grad_norm": 0.015074292197823524, "learning_rate": 7.193993084364236e-07, "loss": 0.0006, "step": 154820 }, { "epoch": 2.5334206005072404, "grad_norm": 0.0695846900343895, "learning_rate": 7.189074454275729e-07, "loss": 0.0008, "step": 154830 }, { "epoch": 2.5335842264583164, "grad_norm": 0.09850388020277023, "learning_rate": 7.184157375980366e-07, "loss": 0.0006, "step": 154840 }, { "epoch": 2.533747852409392, "grad_norm": 0.14277240633964539, "learning_rate": 7.17924184965641e-07, "loss": 0.0005, "step": 154850 }, { "epoch": 2.533911478360468, "grad_norm": 0.05869103968143463, "learning_rate": 7.174327875482012e-07, "loss": 0.0009, "step": 154860 }, { "epoch": 2.534075104311544, "grad_norm": 0.0034374427050352097, "learning_rate": 7.169415453635292e-07, "loss": 0.0004, "step": 154870 }, { "epoch": 2.5342387302626195, "grad_norm": 0.10387668013572693, "learning_rate": 7.164504584294335e-07, "loss": 0.0009, "step": 154880 }, { "epoch": 2.5344023562136955, "grad_norm": 0.04663649946451187, "learning_rate": 7.15959526763712e-07, "loss": 0.0003, "step": 154890 }, { "epoch": 2.5345659821647715, "grad_norm": 0.13063158094882965, "learning_rate": 7.154687503841629e-07, "loss": 0.0012, "step": 154900 }, { "epoch": 2.534729608115847, "grad_norm": 0.022338690236210823, "learning_rate": 7.149781293085722e-07, "loss": 0.0013, "step": 154910 }, { "epoch": 2.534893234066923, "grad_norm": 0.040566470474004745, "learning_rate": 7.144876635547271e-07, "loss": 0.0008, "step": 154920 }, { "epoch": 2.535056860017999, "grad_norm": 0.05255088955163956, "learning_rate": 7.139973531404026e-07, "loss": 0.0007, "step": 154930 }, { "epoch": 2.5352204859690746, "grad_norm": 0.37699568271636963, "learning_rate": 7.135071980833746e-07, "loss": 0.001, "step": 154940 }, { "epoch": 2.5353841119201506, "grad_norm": 0.09932459145784378, "learning_rate": 7.130171984014072e-07, "loss": 0.0005, "step": 154950 }, { "epoch": 2.5355477378712266, "grad_norm": 0.03639541193842888, "learning_rate": 7.125273541122635e-07, "loss": 0.0009, "step": 154960 }, { "epoch": 2.535711363822302, "grad_norm": 0.0035783532075583935, "learning_rate": 7.120376652336991e-07, "loss": 0.0013, "step": 154970 }, { "epoch": 2.535874989773378, "grad_norm": 0.12814374268054962, "learning_rate": 7.115481317834622e-07, "loss": 0.001, "step": 154980 }, { "epoch": 2.536038615724454, "grad_norm": 0.0740990862250328, "learning_rate": 7.110587537792996e-07, "loss": 0.0008, "step": 154990 }, { "epoch": 2.5362022416755297, "grad_norm": 0.07794848829507828, "learning_rate": 7.105695312389477e-07, "loss": 0.0012, "step": 155000 }, { "epoch": 2.5363658676266057, "grad_norm": 0.02584184892475605, "learning_rate": 7.100804641801417e-07, "loss": 0.0002, "step": 155010 }, { "epoch": 2.5365294935776816, "grad_norm": 0.037902913987636566, "learning_rate": 7.095915526206076e-07, "loss": 0.0007, "step": 155020 }, { "epoch": 2.536693119528757, "grad_norm": 0.2638149559497833, "learning_rate": 7.091027965780683e-07, "loss": 0.0008, "step": 155030 }, { "epoch": 2.536856745479833, "grad_norm": 0.058093030005693436, "learning_rate": 7.086141960702403e-07, "loss": 0.0006, "step": 155040 }, { "epoch": 2.537020371430909, "grad_norm": 0.09543897211551666, "learning_rate": 7.081257511148332e-07, "loss": 0.0006, "step": 155050 }, { "epoch": 2.5371839973819847, "grad_norm": 0.12779735028743744, "learning_rate": 7.07637461729554e-07, "loss": 0.0013, "step": 155060 }, { "epoch": 2.5373476233330607, "grad_norm": 0.054012127220630646, "learning_rate": 7.071493279320984e-07, "loss": 0.0007, "step": 155070 }, { "epoch": 2.5375112492841367, "grad_norm": 0.07275006920099258, "learning_rate": 7.06661349740162e-07, "loss": 0.0007, "step": 155080 }, { "epoch": 2.5376748752352123, "grad_norm": 0.04402687028050423, "learning_rate": 7.061735271714338e-07, "loss": 0.0007, "step": 155090 }, { "epoch": 2.5378385011862883, "grad_norm": 0.22682365775108337, "learning_rate": 7.056858602435945e-07, "loss": 0.0011, "step": 155100 }, { "epoch": 2.538002127137364, "grad_norm": 0.1548054814338684, "learning_rate": 7.051983489743225e-07, "loss": 0.0007, "step": 155110 }, { "epoch": 2.53816575308844, "grad_norm": 0.11934594810009003, "learning_rate": 7.047109933812873e-07, "loss": 0.0004, "step": 155120 }, { "epoch": 2.538329379039516, "grad_norm": 0.022091029211878777, "learning_rate": 7.042237934821561e-07, "loss": 0.0004, "step": 155130 }, { "epoch": 2.5384930049905914, "grad_norm": 0.01121938694268465, "learning_rate": 7.037367492945868e-07, "loss": 0.0004, "step": 155140 }, { "epoch": 2.5386566309416674, "grad_norm": 0.19864557683467865, "learning_rate": 7.032498608362354e-07, "loss": 0.0007, "step": 155150 }, { "epoch": 2.538820256892743, "grad_norm": 0.008831099607050419, "learning_rate": 7.027631281247494e-07, "loss": 0.0004, "step": 155160 }, { "epoch": 2.538983882843819, "grad_norm": 0.08325351774692535, "learning_rate": 7.022765511777712e-07, "loss": 0.0016, "step": 155170 }, { "epoch": 2.539147508794895, "grad_norm": 0.03260786086320877, "learning_rate": 7.017901300129398e-07, "loss": 0.0005, "step": 155180 }, { "epoch": 2.5393111347459705, "grad_norm": 0.0021589382085949183, "learning_rate": 7.013038646478843e-07, "loss": 0.0004, "step": 155190 }, { "epoch": 2.5394747606970465, "grad_norm": 0.02630332112312317, "learning_rate": 7.008177551002332e-07, "loss": 0.0008, "step": 155200 }, { "epoch": 2.5396383866481225, "grad_norm": 0.010716327466070652, "learning_rate": 7.003318013876049e-07, "loss": 0.0007, "step": 155210 }, { "epoch": 2.539802012599198, "grad_norm": 0.1302867829799652, "learning_rate": 6.998460035276155e-07, "loss": 0.0014, "step": 155220 }, { "epoch": 2.539965638550274, "grad_norm": 0.08095908910036087, "learning_rate": 6.993603615378725e-07, "loss": 0.0006, "step": 155230 }, { "epoch": 2.54012926450135, "grad_norm": 0.023711491376161575, "learning_rate": 6.988748754359797e-07, "loss": 0.0005, "step": 155240 }, { "epoch": 2.5402928904524256, "grad_norm": 0.004862272180616856, "learning_rate": 6.983895452395367e-07, "loss": 0.0007, "step": 155250 }, { "epoch": 2.5404565164035016, "grad_norm": 0.019349688664078712, "learning_rate": 6.979043709661337e-07, "loss": 0.0007, "step": 155260 }, { "epoch": 2.5406201423545776, "grad_norm": 0.010107055306434631, "learning_rate": 6.974193526333578e-07, "loss": 0.0018, "step": 155270 }, { "epoch": 2.540783768305653, "grad_norm": 0.011530616320669651, "learning_rate": 6.969344902587883e-07, "loss": 0.001, "step": 155280 }, { "epoch": 2.540947394256729, "grad_norm": 0.131178081035614, "learning_rate": 6.964497838600021e-07, "loss": 0.0014, "step": 155290 }, { "epoch": 2.541111020207805, "grad_norm": 0.034970127046108246, "learning_rate": 6.959652334545669e-07, "loss": 0.0003, "step": 155300 }, { "epoch": 2.5412746461588807, "grad_norm": 0.019622420892119408, "learning_rate": 6.954808390600475e-07, "loss": 0.0004, "step": 155310 }, { "epoch": 2.5414382721099567, "grad_norm": 0.05171030014753342, "learning_rate": 6.949966006940034e-07, "loss": 0.0007, "step": 155320 }, { "epoch": 2.5416018980610326, "grad_norm": 0.007063876371830702, "learning_rate": 6.945125183739848e-07, "loss": 0.0007, "step": 155330 }, { "epoch": 2.541765524012108, "grad_norm": 0.15509851276874542, "learning_rate": 6.940285921175404e-07, "loss": 0.0008, "step": 155340 }, { "epoch": 2.541929149963184, "grad_norm": 0.05866532027721405, "learning_rate": 6.935448219422091e-07, "loss": 0.0006, "step": 155350 }, { "epoch": 2.54209277591426, "grad_norm": 0.1022576168179512, "learning_rate": 6.930612078655285e-07, "loss": 0.0007, "step": 155360 }, { "epoch": 2.5422564018653357, "grad_norm": 0.12192277610301971, "learning_rate": 6.925777499050285e-07, "loss": 0.0009, "step": 155370 }, { "epoch": 2.5424200278164117, "grad_norm": 0.020920803770422935, "learning_rate": 6.920944480782305e-07, "loss": 0.0005, "step": 155380 }, { "epoch": 2.5425836537674877, "grad_norm": 0.158793643116951, "learning_rate": 6.916113024026566e-07, "loss": 0.0007, "step": 155390 }, { "epoch": 2.5427472797185633, "grad_norm": 0.051074981689453125, "learning_rate": 6.911283128958168e-07, "loss": 0.0006, "step": 155400 }, { "epoch": 2.5429109056696393, "grad_norm": 0.03006868250668049, "learning_rate": 6.906454795752204e-07, "loss": 0.0011, "step": 155410 }, { "epoch": 2.5430745316207153, "grad_norm": 0.0280043575912714, "learning_rate": 6.901628024583674e-07, "loss": 0.0011, "step": 155420 }, { "epoch": 2.543238157571791, "grad_norm": 0.0024522743187844753, "learning_rate": 6.896802815627556e-07, "loss": 0.0014, "step": 155430 }, { "epoch": 2.543401783522867, "grad_norm": 0.11928123980760574, "learning_rate": 6.891979169058727e-07, "loss": 0.0009, "step": 155440 }, { "epoch": 2.543565409473943, "grad_norm": 0.0108345290645957, "learning_rate": 6.887157085052054e-07, "loss": 0.0015, "step": 155450 }, { "epoch": 2.5437290354250184, "grad_norm": 0.015453699044883251, "learning_rate": 6.882336563782321e-07, "loss": 0.0005, "step": 155460 }, { "epoch": 2.5438926613760944, "grad_norm": 0.07692184299230576, "learning_rate": 6.877517605424245e-07, "loss": 0.0005, "step": 155470 }, { "epoch": 2.54405628732717, "grad_norm": 0.01467913668602705, "learning_rate": 6.872700210152528e-07, "loss": 0.0006, "step": 155480 }, { "epoch": 2.544219913278246, "grad_norm": 0.22888760268688202, "learning_rate": 6.867884378141759e-07, "loss": 0.001, "step": 155490 }, { "epoch": 2.544383539229322, "grad_norm": 0.12037533521652222, "learning_rate": 6.863070109566528e-07, "loss": 0.0007, "step": 155500 }, { "epoch": 2.5445471651803975, "grad_norm": 0.0068886131048202515, "learning_rate": 6.858257404601315e-07, "loss": 0.0008, "step": 155510 }, { "epoch": 2.5447107911314735, "grad_norm": 0.027898428961634636, "learning_rate": 6.853446263420588e-07, "loss": 0.0006, "step": 155520 }, { "epoch": 2.544874417082549, "grad_norm": 0.0355159267783165, "learning_rate": 6.84863668619874e-07, "loss": 0.0009, "step": 155530 }, { "epoch": 2.545038043033625, "grad_norm": 0.010494722984731197, "learning_rate": 6.843828673110092e-07, "loss": 0.0004, "step": 155540 }, { "epoch": 2.545201668984701, "grad_norm": 0.04081439971923828, "learning_rate": 6.839022224328945e-07, "loss": 0.0007, "step": 155550 }, { "epoch": 2.5453652949357766, "grad_norm": 0.029591375961899757, "learning_rate": 6.834217340029503e-07, "loss": 0.0005, "step": 155560 }, { "epoch": 2.5455289208868526, "grad_norm": 0.08713199198246002, "learning_rate": 6.829414020385938e-07, "loss": 0.0008, "step": 155570 }, { "epoch": 2.5456925468379286, "grad_norm": 0.01554613746702671, "learning_rate": 6.82461226557235e-07, "loss": 0.0003, "step": 155580 }, { "epoch": 2.545856172789004, "grad_norm": 0.169443279504776, "learning_rate": 6.819812075762794e-07, "loss": 0.0006, "step": 155590 }, { "epoch": 2.54601979874008, "grad_norm": 0.0005545279709622264, "learning_rate": 6.815013451131281e-07, "loss": 0.0006, "step": 155600 }, { "epoch": 2.546183424691156, "grad_norm": 0.019296841695904732, "learning_rate": 6.810216391851726e-07, "loss": 0.0006, "step": 155610 }, { "epoch": 2.5463470506422317, "grad_norm": 0.00811464712023735, "learning_rate": 6.805420898098036e-07, "loss": 0.0006, "step": 155620 }, { "epoch": 2.5465106765933077, "grad_norm": 0.044037241488695145, "learning_rate": 6.800626970044011e-07, "loss": 0.0011, "step": 155630 }, { "epoch": 2.5466743025443837, "grad_norm": 0.2193140834569931, "learning_rate": 6.795834607863444e-07, "loss": 0.0011, "step": 155640 }, { "epoch": 2.546837928495459, "grad_norm": 0.04491673782467842, "learning_rate": 6.791043811730019e-07, "loss": 0.0008, "step": 155650 }, { "epoch": 2.547001554446535, "grad_norm": 0.09423117339611053, "learning_rate": 6.78625458181742e-07, "loss": 0.0007, "step": 155660 }, { "epoch": 2.547165180397611, "grad_norm": 0.07031507045030594, "learning_rate": 6.781466918299229e-07, "loss": 0.0004, "step": 155670 }, { "epoch": 2.5473288063486867, "grad_norm": 0.09583548456430435, "learning_rate": 6.77668082134898e-07, "loss": 0.0007, "step": 155680 }, { "epoch": 2.5474924322997627, "grad_norm": 0.12722645699977875, "learning_rate": 6.771896291140173e-07, "loss": 0.0005, "step": 155690 }, { "epoch": 2.5476560582508387, "grad_norm": 0.037257205694913864, "learning_rate": 6.767113327846225e-07, "loss": 0.0009, "step": 155700 }, { "epoch": 2.5478196842019143, "grad_norm": 0.1378713995218277, "learning_rate": 6.76233193164052e-07, "loss": 0.0007, "step": 155710 }, { "epoch": 2.5479833101529903, "grad_norm": 0.002491287887096405, "learning_rate": 6.757552102696352e-07, "loss": 0.0004, "step": 155720 }, { "epoch": 2.5481469361040663, "grad_norm": 0.07962657511234283, "learning_rate": 6.752773841187e-07, "loss": 0.0009, "step": 155730 }, { "epoch": 2.548310562055142, "grad_norm": 0.22309452295303345, "learning_rate": 6.747997147285645e-07, "loss": 0.0008, "step": 155740 }, { "epoch": 2.548474188006218, "grad_norm": 0.08748647570610046, "learning_rate": 6.743222021165441e-07, "loss": 0.0013, "step": 155750 }, { "epoch": 2.548637813957294, "grad_norm": 0.038402657955884933, "learning_rate": 6.738448462999491e-07, "loss": 0.0005, "step": 155760 }, { "epoch": 2.5488014399083694, "grad_norm": 0.08433476835489273, "learning_rate": 6.733676472960787e-07, "loss": 0.0007, "step": 155770 }, { "epoch": 2.5489650658594454, "grad_norm": 0.10695762932300568, "learning_rate": 6.728906051222339e-07, "loss": 0.0008, "step": 155780 }, { "epoch": 2.5491286918105214, "grad_norm": 0.11408143490552902, "learning_rate": 6.724137197957031e-07, "loss": 0.0009, "step": 155790 }, { "epoch": 2.549292317761597, "grad_norm": 0.007140147965401411, "learning_rate": 6.719369913337742e-07, "loss": 0.0005, "step": 155800 }, { "epoch": 2.549455943712673, "grad_norm": 0.0691949725151062, "learning_rate": 6.714604197537283e-07, "loss": 0.0007, "step": 155810 }, { "epoch": 2.549619569663749, "grad_norm": 0.10916735231876373, "learning_rate": 6.709840050728372e-07, "loss": 0.0004, "step": 155820 }, { "epoch": 2.5497831956148245, "grad_norm": 0.06625304371118546, "learning_rate": 6.705077473083732e-07, "loss": 0.0005, "step": 155830 }, { "epoch": 2.5499468215659005, "grad_norm": 0.026830971240997314, "learning_rate": 6.700316464775958e-07, "loss": 0.0012, "step": 155840 }, { "epoch": 2.5501104475169765, "grad_norm": 0.10253141820430756, "learning_rate": 6.695557025977661e-07, "loss": 0.0006, "step": 155850 }, { "epoch": 2.550274073468052, "grad_norm": 0.028377173468470573, "learning_rate": 6.690799156861333e-07, "loss": 0.0009, "step": 155860 }, { "epoch": 2.550437699419128, "grad_norm": 0.08253173530101776, "learning_rate": 6.686042857599451e-07, "loss": 0.0009, "step": 155870 }, { "epoch": 2.5506013253702036, "grad_norm": 0.11206453293561935, "learning_rate": 6.681288128364416e-07, "loss": 0.0006, "step": 155880 }, { "epoch": 2.5507649513212796, "grad_norm": 0.004035826772451401, "learning_rate": 6.676534969328558e-07, "loss": 0.0003, "step": 155890 }, { "epoch": 2.5509285772723556, "grad_norm": 0.013340151868760586, "learning_rate": 6.671783380664193e-07, "loss": 0.0006, "step": 155900 }, { "epoch": 2.551092203223431, "grad_norm": 0.11304190754890442, "learning_rate": 6.667033362543534e-07, "loss": 0.0007, "step": 155910 }, { "epoch": 2.551255829174507, "grad_norm": 0.02534390054643154, "learning_rate": 6.662284915138784e-07, "loss": 0.0008, "step": 155920 }, { "epoch": 2.5514194551255827, "grad_norm": 0.0638873279094696, "learning_rate": 6.65753803862203e-07, "loss": 0.0003, "step": 155930 }, { "epoch": 2.5515830810766587, "grad_norm": 0.05399458110332489, "learning_rate": 6.652792733165364e-07, "loss": 0.0008, "step": 155940 }, { "epoch": 2.5517467070277347, "grad_norm": 0.05092838034033775, "learning_rate": 6.648048998940765e-07, "loss": 0.0006, "step": 155950 }, { "epoch": 2.55191033297881, "grad_norm": 0.299388587474823, "learning_rate": 6.6433068361202e-07, "loss": 0.002, "step": 155960 }, { "epoch": 2.552073958929886, "grad_norm": 0.01573743298649788, "learning_rate": 6.638566244875577e-07, "loss": 0.0005, "step": 155970 }, { "epoch": 2.552237584880962, "grad_norm": 0.0637495219707489, "learning_rate": 6.633827225378692e-07, "loss": 0.0007, "step": 155980 }, { "epoch": 2.5524012108320377, "grad_norm": 0.020792296156287193, "learning_rate": 6.629089777801351e-07, "loss": 0.0009, "step": 155990 }, { "epoch": 2.5525648367831137, "grad_norm": 0.07451620697975159, "learning_rate": 6.624353902315251e-07, "loss": 0.001, "step": 156000 }, { "epoch": 2.5527284627341897, "grad_norm": 0.21134567260742188, "learning_rate": 6.619619599092075e-07, "loss": 0.0015, "step": 156010 }, { "epoch": 2.5528920886852653, "grad_norm": 0.05754583701491356, "learning_rate": 6.614886868303438e-07, "loss": 0.0005, "step": 156020 }, { "epoch": 2.5530557146363413, "grad_norm": 0.07743552327156067, "learning_rate": 6.610155710120869e-07, "loss": 0.0005, "step": 156030 }, { "epoch": 2.5532193405874173, "grad_norm": 0.007690403610467911, "learning_rate": 6.60542612471588e-07, "loss": 0.0005, "step": 156040 }, { "epoch": 2.553382966538493, "grad_norm": 0.13462114334106445, "learning_rate": 6.60069811225989e-07, "loss": 0.0016, "step": 156050 }, { "epoch": 2.553546592489569, "grad_norm": 0.08158423006534576, "learning_rate": 6.595971672924295e-07, "loss": 0.0008, "step": 156060 }, { "epoch": 2.553710218440645, "grad_norm": 0.10743445158004761, "learning_rate": 6.591246806880408e-07, "loss": 0.0012, "step": 156070 }, { "epoch": 2.5538738443917204, "grad_norm": 0.12791578471660614, "learning_rate": 6.586523514299481e-07, "loss": 0.0017, "step": 156080 }, { "epoch": 2.5540374703427964, "grad_norm": 0.04703845456242561, "learning_rate": 6.58180179535275e-07, "loss": 0.0008, "step": 156090 }, { "epoch": 2.5542010962938724, "grad_norm": 0.10353540629148483, "learning_rate": 6.577081650211336e-07, "loss": 0.001, "step": 156100 }, { "epoch": 2.554364722244948, "grad_norm": 0.009049639105796814, "learning_rate": 6.572363079046357e-07, "loss": 0.0006, "step": 156110 }, { "epoch": 2.554528348196024, "grad_norm": 0.023534933105111122, "learning_rate": 6.567646082028834e-07, "loss": 0.0007, "step": 156120 }, { "epoch": 2.5546919741471, "grad_norm": 0.06436805427074432, "learning_rate": 6.562930659329764e-07, "loss": 0.0008, "step": 156130 }, { "epoch": 2.5548556000981755, "grad_norm": 0.005399988032877445, "learning_rate": 6.55821681112005e-07, "loss": 0.0003, "step": 156140 }, { "epoch": 2.5550192260492515, "grad_norm": 0.07851773500442505, "learning_rate": 6.553504537570577e-07, "loss": 0.0009, "step": 156150 }, { "epoch": 2.5551828520003275, "grad_norm": 0.08111679553985596, "learning_rate": 6.54879383885213e-07, "loss": 0.001, "step": 156160 }, { "epoch": 2.555346477951403, "grad_norm": 0.08843546360731125, "learning_rate": 6.54408471513549e-07, "loss": 0.0024, "step": 156170 }, { "epoch": 2.555510103902479, "grad_norm": 0.006365268956869841, "learning_rate": 6.539377166591332e-07, "loss": 0.0011, "step": 156180 }, { "epoch": 2.555673729853555, "grad_norm": 0.10550446808338165, "learning_rate": 6.534671193390286e-07, "loss": 0.0006, "step": 156190 }, { "epoch": 2.5558373558046306, "grad_norm": 0.03861622512340546, "learning_rate": 6.529966795702953e-07, "loss": 0.0011, "step": 156200 }, { "epoch": 2.5560009817557066, "grad_norm": 0.12708547711372375, "learning_rate": 6.525263973699836e-07, "loss": 0.0007, "step": 156210 }, { "epoch": 2.5561646077067826, "grad_norm": 0.015784651041030884, "learning_rate": 6.520562727551422e-07, "loss": 0.0005, "step": 156220 }, { "epoch": 2.556328233657858, "grad_norm": 0.005091626662760973, "learning_rate": 6.515863057428101e-07, "loss": 0.0009, "step": 156230 }, { "epoch": 2.556491859608934, "grad_norm": 0.044475264847278595, "learning_rate": 6.511164963500233e-07, "loss": 0.0008, "step": 156240 }, { "epoch": 2.5566554855600097, "grad_norm": 0.12598271667957306, "learning_rate": 6.506468445938119e-07, "loss": 0.0019, "step": 156250 }, { "epoch": 2.5568191115110857, "grad_norm": 0.06293056905269623, "learning_rate": 6.501773504911979e-07, "loss": 0.0006, "step": 156260 }, { "epoch": 2.5569827374621616, "grad_norm": 0.139814555644989, "learning_rate": 6.497080140592027e-07, "loss": 0.0007, "step": 156270 }, { "epoch": 2.557146363413237, "grad_norm": 0.04048897698521614, "learning_rate": 6.492388353148343e-07, "loss": 0.0005, "step": 156280 }, { "epoch": 2.557309989364313, "grad_norm": 0.23403649032115936, "learning_rate": 6.487698142751014e-07, "loss": 0.0009, "step": 156290 }, { "epoch": 2.5574736153153887, "grad_norm": 0.043612074106931686, "learning_rate": 6.483009509570059e-07, "loss": 0.001, "step": 156300 }, { "epoch": 2.5576372412664647, "grad_norm": 0.0636618435382843, "learning_rate": 6.478322453775409e-07, "loss": 0.0004, "step": 156310 }, { "epoch": 2.5578008672175407, "grad_norm": 0.05871919170022011, "learning_rate": 6.473636975536979e-07, "loss": 0.0009, "step": 156320 }, { "epoch": 2.5579644931686163, "grad_norm": 0.023224275559186935, "learning_rate": 6.468953075024581e-07, "loss": 0.0005, "step": 156330 }, { "epoch": 2.5581281191196923, "grad_norm": 0.1109846830368042, "learning_rate": 6.464270752408025e-07, "loss": 0.0009, "step": 156340 }, { "epoch": 2.5582917450707683, "grad_norm": 0.04998786747455597, "learning_rate": 6.459590007857003e-07, "loss": 0.0004, "step": 156350 }, { "epoch": 2.558455371021844, "grad_norm": 0.004235696978867054, "learning_rate": 6.454910841541212e-07, "loss": 0.0006, "step": 156360 }, { "epoch": 2.55861899697292, "grad_norm": 0.07944649457931519, "learning_rate": 6.450233253630244e-07, "loss": 0.0004, "step": 156370 }, { "epoch": 2.558782622923996, "grad_norm": 0.06845125555992126, "learning_rate": 6.445557244293638e-07, "loss": 0.0003, "step": 156380 }, { "epoch": 2.5589462488750714, "grad_norm": 0.007240402512252331, "learning_rate": 6.440882813700916e-07, "loss": 0.0006, "step": 156390 }, { "epoch": 2.5591098748261474, "grad_norm": 0.06601081788539886, "learning_rate": 6.436209962021489e-07, "loss": 0.0005, "step": 156400 }, { "epoch": 2.5592735007772234, "grad_norm": 0.08220779150724411, "learning_rate": 6.43153868942476e-07, "loss": 0.0005, "step": 156410 }, { "epoch": 2.559437126728299, "grad_norm": 0.015207826159894466, "learning_rate": 6.426868996080027e-07, "loss": 0.0009, "step": 156420 }, { "epoch": 2.559600752679375, "grad_norm": 0.16617946326732635, "learning_rate": 6.422200882156577e-07, "loss": 0.0013, "step": 156430 }, { "epoch": 2.559764378630451, "grad_norm": 0.0746876522898674, "learning_rate": 6.417534347823601e-07, "loss": 0.0005, "step": 156440 }, { "epoch": 2.5599280045815265, "grad_norm": 0.24322889745235443, "learning_rate": 6.412869393250253e-07, "loss": 0.0011, "step": 156450 }, { "epoch": 2.5600916305326025, "grad_norm": 0.07840018719434738, "learning_rate": 6.408206018605645e-07, "loss": 0.0011, "step": 156460 }, { "epoch": 2.5602552564836785, "grad_norm": 0.17892731726169586, "learning_rate": 6.403544224058794e-07, "loss": 0.0008, "step": 156470 }, { "epoch": 2.560418882434754, "grad_norm": 0.14070363342761993, "learning_rate": 6.398884009778689e-07, "loss": 0.0004, "step": 156480 }, { "epoch": 2.56058250838583, "grad_norm": 0.07920346409082413, "learning_rate": 6.394225375934232e-07, "loss": 0.0004, "step": 156490 }, { "epoch": 2.560746134336906, "grad_norm": 0.11343622952699661, "learning_rate": 6.389568322694317e-07, "loss": 0.0008, "step": 156500 }, { "epoch": 2.5609097602879816, "grad_norm": 0.0613960400223732, "learning_rate": 6.38491285022772e-07, "loss": 0.0005, "step": 156510 }, { "epoch": 2.5610733862390576, "grad_norm": 0.04808366298675537, "learning_rate": 6.380258958703206e-07, "loss": 0.0008, "step": 156520 }, { "epoch": 2.5612370121901336, "grad_norm": 0.0035386946983635426, "learning_rate": 6.375606648289478e-07, "loss": 0.0008, "step": 156530 }, { "epoch": 2.561400638141209, "grad_norm": 0.09408704936504364, "learning_rate": 6.370955919155152e-07, "loss": 0.0011, "step": 156540 }, { "epoch": 2.561564264092285, "grad_norm": 0.0018898221896961331, "learning_rate": 6.366306771468822e-07, "loss": 0.0014, "step": 156550 }, { "epoch": 2.561727890043361, "grad_norm": 0.14769554138183594, "learning_rate": 6.361659205398995e-07, "loss": 0.0004, "step": 156560 }, { "epoch": 2.5618915159944367, "grad_norm": 0.0028517222963273525, "learning_rate": 6.35701322111415e-07, "loss": 0.0008, "step": 156570 }, { "epoch": 2.5620551419455126, "grad_norm": 0.04328923672437668, "learning_rate": 6.352368818782678e-07, "loss": 0.0008, "step": 156580 }, { "epoch": 2.5622187678965886, "grad_norm": 0.056838445365428925, "learning_rate": 6.347725998572924e-07, "loss": 0.0007, "step": 156590 }, { "epoch": 2.562382393847664, "grad_norm": 0.017132185399532318, "learning_rate": 6.343084760653195e-07, "loss": 0.0004, "step": 156600 }, { "epoch": 2.56254601979874, "grad_norm": 0.07038655877113342, "learning_rate": 6.338445105191709e-07, "loss": 0.0013, "step": 156610 }, { "epoch": 2.562709645749816, "grad_norm": 0.11558183282613754, "learning_rate": 6.333807032356659e-07, "loss": 0.0003, "step": 156620 }, { "epoch": 2.5628732717008917, "grad_norm": 0.14009438455104828, "learning_rate": 6.329170542316148e-07, "loss": 0.0008, "step": 156630 }, { "epoch": 2.5630368976519677, "grad_norm": 0.1374894678592682, "learning_rate": 6.324535635238255e-07, "loss": 0.0009, "step": 156640 }, { "epoch": 2.5632005236030433, "grad_norm": 0.04323675483465195, "learning_rate": 6.31990231129096e-07, "loss": 0.0006, "step": 156650 }, { "epoch": 2.5633641495541193, "grad_norm": 0.004057818092405796, "learning_rate": 6.315270570642234e-07, "loss": 0.0006, "step": 156660 }, { "epoch": 2.5635277755051953, "grad_norm": 0.022212103009223938, "learning_rate": 6.310640413459962e-07, "loss": 0.0006, "step": 156670 }, { "epoch": 2.563691401456271, "grad_norm": 0.0089713204652071, "learning_rate": 6.306011839911952e-07, "loss": 0.0008, "step": 156680 }, { "epoch": 2.563855027407347, "grad_norm": 0.08577723056077957, "learning_rate": 6.301384850166015e-07, "loss": 0.0005, "step": 156690 }, { "epoch": 2.5640186533584224, "grad_norm": 0.09871995449066162, "learning_rate": 6.296759444389833e-07, "loss": 0.0008, "step": 156700 }, { "epoch": 2.5641822793094984, "grad_norm": 0.046393319964408875, "learning_rate": 6.2921356227511e-07, "loss": 0.0009, "step": 156710 }, { "epoch": 2.5643459052605744, "grad_norm": 0.019942471757531166, "learning_rate": 6.287513385417388e-07, "loss": 0.0006, "step": 156720 }, { "epoch": 2.56450953121165, "grad_norm": 0.02362806536257267, "learning_rate": 6.282892732556256e-07, "loss": 0.0011, "step": 156730 }, { "epoch": 2.564673157162726, "grad_norm": 0.10480473190546036, "learning_rate": 6.278273664335199e-07, "loss": 0.001, "step": 156740 }, { "epoch": 2.564836783113802, "grad_norm": 0.02225787192583084, "learning_rate": 6.273656180921634e-07, "loss": 0.0011, "step": 156750 }, { "epoch": 2.5650004090648775, "grad_norm": 0.07879892736673355, "learning_rate": 6.269040282482947e-07, "loss": 0.0007, "step": 156760 }, { "epoch": 2.5651640350159535, "grad_norm": 0.13206292688846588, "learning_rate": 6.264425969186444e-07, "loss": 0.0009, "step": 156770 }, { "epoch": 2.5653276609670295, "grad_norm": 0.028706125915050507, "learning_rate": 6.259813241199386e-07, "loss": 0.0006, "step": 156780 }, { "epoch": 2.565491286918105, "grad_norm": 0.02318262681365013, "learning_rate": 6.255202098688956e-07, "loss": 0.0006, "step": 156790 }, { "epoch": 2.565654912869181, "grad_norm": 0.04380005970597267, "learning_rate": 6.25059254182232e-07, "loss": 0.0005, "step": 156800 }, { "epoch": 2.565818538820257, "grad_norm": 0.04346119612455368, "learning_rate": 6.245984570766561e-07, "loss": 0.0008, "step": 156810 }, { "epoch": 2.5659821647713326, "grad_norm": 0.05704323202371597, "learning_rate": 6.24137818568869e-07, "loss": 0.0011, "step": 156820 }, { "epoch": 2.5661457907224086, "grad_norm": 0.0700591504573822, "learning_rate": 6.236773386755701e-07, "loss": 0.0005, "step": 156830 }, { "epoch": 2.5663094166734846, "grad_norm": 0.09425564855337143, "learning_rate": 6.232170174134483e-07, "loss": 0.0009, "step": 156840 }, { "epoch": 2.56647304262456, "grad_norm": 0.10713460296392441, "learning_rate": 6.227568547991913e-07, "loss": 0.0006, "step": 156850 }, { "epoch": 2.566636668575636, "grad_norm": 0.022373080253601074, "learning_rate": 6.22296850849477e-07, "loss": 0.0002, "step": 156860 }, { "epoch": 2.566800294526712, "grad_norm": 0.047354452311992645, "learning_rate": 6.218370055809814e-07, "loss": 0.0004, "step": 156870 }, { "epoch": 2.5669639204777877, "grad_norm": 0.1322072148323059, "learning_rate": 6.213773190103717e-07, "loss": 0.0015, "step": 156880 }, { "epoch": 2.5671275464288636, "grad_norm": 0.052421461790800095, "learning_rate": 6.209177911543091e-07, "loss": 0.0006, "step": 156890 }, { "epoch": 2.5672911723799396, "grad_norm": 0.09074535965919495, "learning_rate": 6.204584220294529e-07, "loss": 0.0007, "step": 156900 }, { "epoch": 2.567454798331015, "grad_norm": 0.005451531615108252, "learning_rate": 6.199992116524522e-07, "loss": 0.0005, "step": 156910 }, { "epoch": 2.567618424282091, "grad_norm": 0.1349174529314041, "learning_rate": 6.195401600399536e-07, "loss": 0.001, "step": 156920 }, { "epoch": 2.567782050233167, "grad_norm": 0.06312589347362518, "learning_rate": 6.190812672085955e-07, "loss": 0.0008, "step": 156930 }, { "epoch": 2.5679456761842427, "grad_norm": 0.04968241974711418, "learning_rate": 6.186225331750129e-07, "loss": 0.0008, "step": 156940 }, { "epoch": 2.5681093021353187, "grad_norm": 0.07449182122945786, "learning_rate": 6.181639579558324e-07, "loss": 0.0009, "step": 156950 }, { "epoch": 2.5682729280863947, "grad_norm": 0.041133783757686615, "learning_rate": 6.177055415676769e-07, "loss": 0.0009, "step": 156960 }, { "epoch": 2.5684365540374703, "grad_norm": 0.028303591534495354, "learning_rate": 6.172472840271649e-07, "loss": 0.0004, "step": 156970 }, { "epoch": 2.5686001799885463, "grad_norm": 0.0668925791978836, "learning_rate": 6.167891853509028e-07, "loss": 0.0006, "step": 156980 }, { "epoch": 2.5687638059396223, "grad_norm": 0.06688759475946426, "learning_rate": 6.163312455554993e-07, "loss": 0.0009, "step": 156990 }, { "epoch": 2.568927431890698, "grad_norm": 0.1382204294204712, "learning_rate": 6.158734646575515e-07, "loss": 0.0005, "step": 157000 }, { "epoch": 2.569091057841774, "grad_norm": 0.014015398919582367, "learning_rate": 6.154158426736529e-07, "loss": 0.0004, "step": 157010 }, { "epoch": 2.5692546837928494, "grad_norm": 0.013954179361462593, "learning_rate": 6.149583796203934e-07, "loss": 0.0007, "step": 157020 }, { "epoch": 2.5694183097439254, "grad_norm": 0.11087533831596375, "learning_rate": 6.145010755143527e-07, "loss": 0.0007, "step": 157030 }, { "epoch": 2.5695819356950014, "grad_norm": 0.044837746769189835, "learning_rate": 6.140439303721085e-07, "loss": 0.0005, "step": 157040 }, { "epoch": 2.569745561646077, "grad_norm": 0.057757701724767685, "learning_rate": 6.135869442102294e-07, "loss": 0.0007, "step": 157050 }, { "epoch": 2.569909187597153, "grad_norm": 0.05135273188352585, "learning_rate": 6.131301170452825e-07, "loss": 0.0008, "step": 157060 }, { "epoch": 2.5700728135482285, "grad_norm": 0.13656525313854218, "learning_rate": 6.126734488938246e-07, "loss": 0.0004, "step": 157070 }, { "epoch": 2.5702364394993045, "grad_norm": 0.01967809908092022, "learning_rate": 6.122169397724087e-07, "loss": 0.0003, "step": 157080 }, { "epoch": 2.5704000654503805, "grad_norm": 0.027018357068300247, "learning_rate": 6.117605896975842e-07, "loss": 0.001, "step": 157090 }, { "epoch": 2.570563691401456, "grad_norm": 0.12258586287498474, "learning_rate": 6.113043986858908e-07, "loss": 0.0007, "step": 157100 }, { "epoch": 2.570727317352532, "grad_norm": 0.03481500595808029, "learning_rate": 6.108483667538656e-07, "loss": 0.0005, "step": 157110 }, { "epoch": 2.570890943303608, "grad_norm": 0.1512630730867386, "learning_rate": 6.103924939180366e-07, "loss": 0.0015, "step": 157120 }, { "epoch": 2.5710545692546836, "grad_norm": 0.03388206288218498, "learning_rate": 6.099367801949313e-07, "loss": 0.0004, "step": 157130 }, { "epoch": 2.5712181952057596, "grad_norm": 0.0012828354956582189, "learning_rate": 6.094812256010652e-07, "loss": 0.0007, "step": 157140 }, { "epoch": 2.5713818211568356, "grad_norm": 0.1583210676908493, "learning_rate": 6.090258301529534e-07, "loss": 0.0009, "step": 157150 }, { "epoch": 2.571545447107911, "grad_norm": 0.06827402859926224, "learning_rate": 6.085705938671005e-07, "loss": 0.0006, "step": 157160 }, { "epoch": 2.571709073058987, "grad_norm": 0.035827767103910446, "learning_rate": 6.081155167600106e-07, "loss": 0.0004, "step": 157170 }, { "epoch": 2.571872699010063, "grad_norm": 0.04035130888223648, "learning_rate": 6.076605988481771e-07, "loss": 0.0007, "step": 157180 }, { "epoch": 2.5720363249611387, "grad_norm": 0.10407208651304245, "learning_rate": 6.072058401480896e-07, "loss": 0.0006, "step": 157190 }, { "epoch": 2.5721999509122146, "grad_norm": 0.051525264978408813, "learning_rate": 6.067512406762333e-07, "loss": 0.0011, "step": 157200 }, { "epoch": 2.5723635768632906, "grad_norm": 0.15741367638111115, "learning_rate": 6.062968004490849e-07, "loss": 0.0005, "step": 157210 }, { "epoch": 2.572527202814366, "grad_norm": 0.012779264710843563, "learning_rate": 6.058425194831175e-07, "loss": 0.0004, "step": 157220 }, { "epoch": 2.572690828765442, "grad_norm": 0.016114961355924606, "learning_rate": 6.053883977947983e-07, "loss": 0.0011, "step": 157230 }, { "epoch": 2.572854454716518, "grad_norm": 0.13488760590553284, "learning_rate": 6.049344354005864e-07, "loss": 0.0007, "step": 157240 }, { "epoch": 2.5730180806675937, "grad_norm": 0.31687450408935547, "learning_rate": 6.044806323169394e-07, "loss": 0.0026, "step": 157250 }, { "epoch": 2.5731817066186697, "grad_norm": 0.03509585186839104, "learning_rate": 6.04026988560304e-07, "loss": 0.0012, "step": 157260 }, { "epoch": 2.5733453325697457, "grad_norm": 0.036281418055295944, "learning_rate": 6.035735041471269e-07, "loss": 0.0007, "step": 157270 }, { "epoch": 2.5735089585208213, "grad_norm": 0.03477535396814346, "learning_rate": 6.031201790938418e-07, "loss": 0.0005, "step": 157280 }, { "epoch": 2.5736725844718973, "grad_norm": 0.13433320820331573, "learning_rate": 6.026670134168821e-07, "loss": 0.0013, "step": 157290 }, { "epoch": 2.5738362104229733, "grad_norm": 0.02297845110297203, "learning_rate": 6.02214007132676e-07, "loss": 0.0006, "step": 157300 }, { "epoch": 2.573999836374049, "grad_norm": 0.15281973779201508, "learning_rate": 6.017611602576412e-07, "loss": 0.0008, "step": 157310 }, { "epoch": 2.574163462325125, "grad_norm": 0.06785403937101364, "learning_rate": 6.013084728081941e-07, "loss": 0.0006, "step": 157320 }, { "epoch": 2.574327088276201, "grad_norm": 0.10274849086999893, "learning_rate": 6.008559448007423e-07, "loss": 0.0009, "step": 157330 }, { "epoch": 2.5744907142272764, "grad_norm": 0.055550601333379745, "learning_rate": 6.004035762516908e-07, "loss": 0.0008, "step": 157340 }, { "epoch": 2.5746543401783524, "grad_norm": 0.04932958632707596, "learning_rate": 5.999513671774338e-07, "loss": 0.0008, "step": 157350 }, { "epoch": 2.5748179661294284, "grad_norm": 0.1966402232646942, "learning_rate": 5.994993175943659e-07, "loss": 0.001, "step": 157360 }, { "epoch": 2.574981592080504, "grad_norm": 0.03227045387029648, "learning_rate": 5.990474275188712e-07, "loss": 0.0004, "step": 157370 }, { "epoch": 2.57514521803158, "grad_norm": 0.02620607055723667, "learning_rate": 5.985956969673285e-07, "loss": 0.0013, "step": 157380 }, { "epoch": 2.575308843982656, "grad_norm": 0.0728297084569931, "learning_rate": 5.981441259561149e-07, "loss": 0.0005, "step": 157390 }, { "epoch": 2.5754724699337315, "grad_norm": 0.04984407126903534, "learning_rate": 5.976927145015959e-07, "loss": 0.0008, "step": 157400 }, { "epoch": 2.5756360958848075, "grad_norm": 0.04106207564473152, "learning_rate": 5.972414626201361e-07, "loss": 0.0004, "step": 157410 }, { "epoch": 2.575799721835883, "grad_norm": 0.06779506057500839, "learning_rate": 5.967903703280909e-07, "loss": 0.0006, "step": 157420 }, { "epoch": 2.575963347786959, "grad_norm": 0.015237879939377308, "learning_rate": 5.96339437641813e-07, "loss": 0.0005, "step": 157430 }, { "epoch": 2.576126973738035, "grad_norm": 0.18523386120796204, "learning_rate": 5.958886645776452e-07, "loss": 0.0007, "step": 157440 }, { "epoch": 2.5762905996891106, "grad_norm": 0.07157806307077408, "learning_rate": 5.954380511519286e-07, "loss": 0.0003, "step": 157450 }, { "epoch": 2.5764542256401866, "grad_norm": 0.25537362694740295, "learning_rate": 5.949875973809977e-07, "loss": 0.0007, "step": 157460 }, { "epoch": 2.576617851591262, "grad_norm": 0.028988340869545937, "learning_rate": 5.945373032811785e-07, "loss": 0.0003, "step": 157470 }, { "epoch": 2.576781477542338, "grad_norm": 0.05501402169466019, "learning_rate": 5.940871688687944e-07, "loss": 0.001, "step": 157480 }, { "epoch": 2.576945103493414, "grad_norm": 0.005681911017745733, "learning_rate": 5.936371941601599e-07, "loss": 0.0008, "step": 157490 }, { "epoch": 2.5771087294444897, "grad_norm": 0.06160204857587814, "learning_rate": 5.93187379171587e-07, "loss": 0.0008, "step": 157500 }, { "epoch": 2.5772723553955657, "grad_norm": 0.051627062261104584, "learning_rate": 5.92737723919381e-07, "loss": 0.0004, "step": 157510 }, { "epoch": 2.5774359813466416, "grad_norm": 0.09027938544750214, "learning_rate": 5.922882284198383e-07, "loss": 0.0014, "step": 157520 }, { "epoch": 2.577599607297717, "grad_norm": 0.03917877748608589, "learning_rate": 5.918388926892554e-07, "loss": 0.0005, "step": 157530 }, { "epoch": 2.577763233248793, "grad_norm": 0.0567542165517807, "learning_rate": 5.91389716743917e-07, "loss": 0.0005, "step": 157540 }, { "epoch": 2.577926859199869, "grad_norm": 0.056425791233778, "learning_rate": 5.909407006001061e-07, "loss": 0.0006, "step": 157550 }, { "epoch": 2.5780904851509447, "grad_norm": 0.03405746817588806, "learning_rate": 5.90491844274097e-07, "loss": 0.0007, "step": 157560 }, { "epoch": 2.5782541111020207, "grad_norm": 0.038308098912239075, "learning_rate": 5.900431477821616e-07, "loss": 0.0004, "step": 157570 }, { "epoch": 2.5784177370530967, "grad_norm": 0.08552277088165283, "learning_rate": 5.895946111405632e-07, "loss": 0.0011, "step": 157580 }, { "epoch": 2.5785813630041723, "grad_norm": 0.04410054534673691, "learning_rate": 5.891462343655586e-07, "loss": 0.0005, "step": 157590 }, { "epoch": 2.5787449889552483, "grad_norm": 0.040066108107566833, "learning_rate": 5.886980174734031e-07, "loss": 0.0007, "step": 157600 }, { "epoch": 2.5789086149063243, "grad_norm": 0.05465305596590042, "learning_rate": 5.88249960480341e-07, "loss": 0.0011, "step": 157610 }, { "epoch": 2.5790722408574, "grad_norm": 0.23041120171546936, "learning_rate": 5.87802063402616e-07, "loss": 0.0015, "step": 157620 }, { "epoch": 2.579235866808476, "grad_norm": 0.14901487529277802, "learning_rate": 5.873543262564602e-07, "loss": 0.0011, "step": 157630 }, { "epoch": 2.579399492759552, "grad_norm": 0.015259046107530594, "learning_rate": 5.86906749058106e-07, "loss": 0.0002, "step": 157640 }, { "epoch": 2.5795631187106274, "grad_norm": 0.20521098375320435, "learning_rate": 5.864593318237738e-07, "loss": 0.0004, "step": 157650 }, { "epoch": 2.5797267446617034, "grad_norm": 0.02627727948129177, "learning_rate": 5.86012074569684e-07, "loss": 0.0004, "step": 157660 }, { "epoch": 2.5798903706127794, "grad_norm": 0.054305899888277054, "learning_rate": 5.855649773120492e-07, "loss": 0.0009, "step": 157670 }, { "epoch": 2.580053996563855, "grad_norm": 0.06675571948289871, "learning_rate": 5.851180400670725e-07, "loss": 0.0007, "step": 157680 }, { "epoch": 2.580217622514931, "grad_norm": 0.01838107220828533, "learning_rate": 5.846712628509565e-07, "loss": 0.0009, "step": 157690 }, { "epoch": 2.580381248466007, "grad_norm": 0.05560867488384247, "learning_rate": 5.84224645679895e-07, "loss": 0.0008, "step": 157700 }, { "epoch": 2.5805448744170825, "grad_norm": 0.0031373733654618263, "learning_rate": 5.837781885700777e-07, "loss": 0.0004, "step": 157710 }, { "epoch": 2.5807085003681585, "grad_norm": 0.0669962614774704, "learning_rate": 5.833318915376857e-07, "loss": 0.0006, "step": 157720 }, { "epoch": 2.5808721263192345, "grad_norm": 0.023115383461117744, "learning_rate": 5.828857545988976e-07, "loss": 0.0006, "step": 157730 }, { "epoch": 2.58103575227031, "grad_norm": 0.04747607931494713, "learning_rate": 5.824397777698859e-07, "loss": 0.0005, "step": 157740 }, { "epoch": 2.581199378221386, "grad_norm": 0.0429086871445179, "learning_rate": 5.819939610668135e-07, "loss": 0.0007, "step": 157750 }, { "epoch": 2.581363004172462, "grad_norm": 0.13357284665107727, "learning_rate": 5.815483045058428e-07, "loss": 0.0014, "step": 157760 }, { "epoch": 2.5815266301235376, "grad_norm": 0.09011368453502655, "learning_rate": 5.811028081031267e-07, "loss": 0.0004, "step": 157770 }, { "epoch": 2.5816902560746136, "grad_norm": 0.09679222106933594, "learning_rate": 5.80657471874812e-07, "loss": 0.0005, "step": 157780 }, { "epoch": 2.5818538820256896, "grad_norm": 0.08673311024904251, "learning_rate": 5.802122958370438e-07, "loss": 0.0007, "step": 157790 }, { "epoch": 2.582017507976765, "grad_norm": 0.09451400488615036, "learning_rate": 5.797672800059556e-07, "loss": 0.0007, "step": 157800 }, { "epoch": 2.582181133927841, "grad_norm": 0.11670426279306412, "learning_rate": 5.793224243976814e-07, "loss": 0.0007, "step": 157810 }, { "epoch": 2.5823447598789167, "grad_norm": 0.031171889975667, "learning_rate": 5.788777290283432e-07, "loss": 0.0006, "step": 157820 }, { "epoch": 2.5825083858299926, "grad_norm": 0.09375748783349991, "learning_rate": 5.784331939140619e-07, "loss": 0.001, "step": 157830 }, { "epoch": 2.5826720117810686, "grad_norm": 0.037660080939531326, "learning_rate": 5.779888190709498e-07, "loss": 0.0018, "step": 157840 }, { "epoch": 2.582835637732144, "grad_norm": 0.07823069393634796, "learning_rate": 5.775446045151162e-07, "loss": 0.0014, "step": 157850 }, { "epoch": 2.58299926368322, "grad_norm": 0.0292635727673769, "learning_rate": 5.771005502626603e-07, "loss": 0.0006, "step": 157860 }, { "epoch": 2.5831628896342957, "grad_norm": 0.04550391063094139, "learning_rate": 5.766566563296805e-07, "loss": 0.0007, "step": 157870 }, { "epoch": 2.5833265155853717, "grad_norm": 0.08595076948404312, "learning_rate": 5.762129227322661e-07, "loss": 0.0008, "step": 157880 }, { "epoch": 2.5834901415364477, "grad_norm": 0.05614157021045685, "learning_rate": 5.757693494864991e-07, "loss": 0.0012, "step": 157890 }, { "epoch": 2.5836537674875233, "grad_norm": 0.04381489008665085, "learning_rate": 5.753259366084612e-07, "loss": 0.0004, "step": 157900 }, { "epoch": 2.5838173934385993, "grad_norm": 0.05486591160297394, "learning_rate": 5.74882684114223e-07, "loss": 0.0005, "step": 157910 }, { "epoch": 2.5839810193896753, "grad_norm": 0.049609098583459854, "learning_rate": 5.744395920198525e-07, "loss": 0.0005, "step": 157920 }, { "epoch": 2.584144645340751, "grad_norm": 0.18027809262275696, "learning_rate": 5.739966603414093e-07, "loss": 0.0008, "step": 157930 }, { "epoch": 2.584308271291827, "grad_norm": 0.13170255720615387, "learning_rate": 5.735538890949499e-07, "loss": 0.0004, "step": 157940 }, { "epoch": 2.584471897242903, "grad_norm": 0.039940591901540756, "learning_rate": 5.731112782965237e-07, "loss": 0.0005, "step": 157950 }, { "epoch": 2.5846355231939784, "grad_norm": 0.31801334023475647, "learning_rate": 5.726688279621734e-07, "loss": 0.0013, "step": 157960 }, { "epoch": 2.5847991491450544, "grad_norm": 0.05857295170426369, "learning_rate": 5.722265381079384e-07, "loss": 0.0005, "step": 157970 }, { "epoch": 2.5849627750961304, "grad_norm": 0.01864403858780861, "learning_rate": 5.717844087498492e-07, "loss": 0.0006, "step": 157980 }, { "epoch": 2.585126401047206, "grad_norm": 0.12821301817893982, "learning_rate": 5.713424399039324e-07, "loss": 0.0005, "step": 157990 }, { "epoch": 2.585290026998282, "grad_norm": 0.046080347150564194, "learning_rate": 5.70900631586207e-07, "loss": 0.0005, "step": 158000 }, { "epoch": 2.585453652949358, "grad_norm": 0.0629124641418457, "learning_rate": 5.704589838126889e-07, "loss": 0.0008, "step": 158010 }, { "epoch": 2.5856172789004335, "grad_norm": 0.0994546040892601, "learning_rate": 5.700174965993871e-07, "loss": 0.0007, "step": 158020 }, { "epoch": 2.5857809048515095, "grad_norm": 0.21401220560073853, "learning_rate": 5.695761699623031e-07, "loss": 0.0014, "step": 158030 }, { "epoch": 2.5859445308025855, "grad_norm": 0.13591761887073517, "learning_rate": 5.691350039174359e-07, "loss": 0.001, "step": 158040 }, { "epoch": 2.586108156753661, "grad_norm": 0.07070714980363846, "learning_rate": 5.686939984807743e-07, "loss": 0.0005, "step": 158050 }, { "epoch": 2.586271782704737, "grad_norm": 0.015184435062110424, "learning_rate": 5.682531536683062e-07, "loss": 0.0014, "step": 158060 }, { "epoch": 2.586435408655813, "grad_norm": 0.10118429362773895, "learning_rate": 5.678124694960085e-07, "loss": 0.0012, "step": 158070 }, { "epoch": 2.5865990346068886, "grad_norm": 0.04829082265496254, "learning_rate": 5.673719459798571e-07, "loss": 0.0003, "step": 158080 }, { "epoch": 2.5867626605579646, "grad_norm": 0.03125864639878273, "learning_rate": 5.669315831358196e-07, "loss": 0.0007, "step": 158090 }, { "epoch": 2.5869262865090406, "grad_norm": 0.10676240921020508, "learning_rate": 5.664913809798561e-07, "loss": 0.0007, "step": 158100 }, { "epoch": 2.587089912460116, "grad_norm": 0.1163443773984909, "learning_rate": 5.66051339527926e-07, "loss": 0.0004, "step": 158110 }, { "epoch": 2.587253538411192, "grad_norm": 0.06596343219280243, "learning_rate": 5.656114587959765e-07, "loss": 0.0007, "step": 158120 }, { "epoch": 2.587417164362268, "grad_norm": 0.05356109142303467, "learning_rate": 5.651717387999556e-07, "loss": 0.0003, "step": 158130 }, { "epoch": 2.5875807903133436, "grad_norm": 0.00801369734108448, "learning_rate": 5.647321795557986e-07, "loss": 0.0007, "step": 158140 }, { "epoch": 2.5877444162644196, "grad_norm": 0.02087208814918995, "learning_rate": 5.642927810794413e-07, "loss": 0.0006, "step": 158150 }, { "epoch": 2.5879080422154956, "grad_norm": 0.07800925523042679, "learning_rate": 5.638535433868092e-07, "loss": 0.0007, "step": 158160 }, { "epoch": 2.588071668166571, "grad_norm": 0.0074714599177241325, "learning_rate": 5.634144664938241e-07, "loss": 0.0005, "step": 158170 }, { "epoch": 2.588235294117647, "grad_norm": 0.07059358060359955, "learning_rate": 5.629755504164031e-07, "loss": 0.0006, "step": 158180 }, { "epoch": 2.5883989200687227, "grad_norm": 0.06601470708847046, "learning_rate": 5.625367951704524e-07, "loss": 0.0007, "step": 158190 }, { "epoch": 2.5885625460197987, "grad_norm": 0.017410963773727417, "learning_rate": 5.620982007718789e-07, "loss": 0.0005, "step": 158200 }, { "epoch": 2.5887261719708747, "grad_norm": 0.022253625094890594, "learning_rate": 5.616597672365786e-07, "loss": 0.0007, "step": 158210 }, { "epoch": 2.5888897979219503, "grad_norm": 0.1345239281654358, "learning_rate": 5.612214945804439e-07, "loss": 0.0004, "step": 158220 }, { "epoch": 2.5890534238730263, "grad_norm": 0.005066594574600458, "learning_rate": 5.607833828193632e-07, "loss": 0.0005, "step": 158230 }, { "epoch": 2.589217049824102, "grad_norm": 0.04432334378361702, "learning_rate": 5.603454319692142e-07, "loss": 0.0006, "step": 158240 }, { "epoch": 2.589380675775178, "grad_norm": 0.047843486070632935, "learning_rate": 5.599076420458744e-07, "loss": 0.0006, "step": 158250 }, { "epoch": 2.589544301726254, "grad_norm": 0.06755217909812927, "learning_rate": 5.594700130652092e-07, "loss": 0.0003, "step": 158260 }, { "epoch": 2.5897079276773294, "grad_norm": 0.09173483401536942, "learning_rate": 5.59032545043085e-07, "loss": 0.0009, "step": 158270 }, { "epoch": 2.5898715536284054, "grad_norm": 0.03438813239336014, "learning_rate": 5.585952379953574e-07, "loss": 0.0006, "step": 158280 }, { "epoch": 2.5900351795794814, "grad_norm": 0.04588679224252701, "learning_rate": 5.58158091937877e-07, "loss": 0.0006, "step": 158290 }, { "epoch": 2.590198805530557, "grad_norm": 0.03274879232048988, "learning_rate": 5.577211068864907e-07, "loss": 0.0004, "step": 158300 }, { "epoch": 2.590362431481633, "grad_norm": 0.011022274382412434, "learning_rate": 5.572842828570368e-07, "loss": 0.0006, "step": 158310 }, { "epoch": 2.590526057432709, "grad_norm": 0.07231555134057999, "learning_rate": 5.56847619865351e-07, "loss": 0.0006, "step": 158320 }, { "epoch": 2.5906896833837845, "grad_norm": 0.041771817952394485, "learning_rate": 5.564111179272586e-07, "loss": 0.0005, "step": 158330 }, { "epoch": 2.5908533093348605, "grad_norm": 0.06861957907676697, "learning_rate": 5.55974777058585e-07, "loss": 0.0004, "step": 158340 }, { "epoch": 2.5910169352859365, "grad_norm": 0.11759281903505325, "learning_rate": 5.555385972751431e-07, "loss": 0.001, "step": 158350 }, { "epoch": 2.591180561237012, "grad_norm": 0.134982630610466, "learning_rate": 5.551025785927466e-07, "loss": 0.0005, "step": 158360 }, { "epoch": 2.591344187188088, "grad_norm": 0.05459032207727432, "learning_rate": 5.546667210271972e-07, "loss": 0.0007, "step": 158370 }, { "epoch": 2.591507813139164, "grad_norm": 0.09381499886512756, "learning_rate": 5.542310245942961e-07, "loss": 0.0005, "step": 158380 }, { "epoch": 2.5916714390902396, "grad_norm": 0.03078806772828102, "learning_rate": 5.537954893098357e-07, "loss": 0.0007, "step": 158390 }, { "epoch": 2.5918350650413156, "grad_norm": 0.0036064907908439636, "learning_rate": 5.533601151896013e-07, "loss": 0.0005, "step": 158400 }, { "epoch": 2.5919986909923916, "grad_norm": 0.1519811898469925, "learning_rate": 5.529249022493766e-07, "loss": 0.0013, "step": 158410 }, { "epoch": 2.592162316943467, "grad_norm": 0.039427027106285095, "learning_rate": 5.524898505049348e-07, "loss": 0.0003, "step": 158420 }, { "epoch": 2.592325942894543, "grad_norm": 0.1695336550474167, "learning_rate": 5.520549599720465e-07, "loss": 0.0005, "step": 158430 }, { "epoch": 2.592489568845619, "grad_norm": 0.058742981404066086, "learning_rate": 5.516202306664769e-07, "loss": 0.0004, "step": 158440 }, { "epoch": 2.5926531947966946, "grad_norm": 0.02838270366191864, "learning_rate": 5.511856626039813e-07, "loss": 0.0018, "step": 158450 }, { "epoch": 2.5928168207477706, "grad_norm": 0.043018046766519547, "learning_rate": 5.507512558003142e-07, "loss": 0.001, "step": 158460 }, { "epoch": 2.5929804466988466, "grad_norm": 0.02285918965935707, "learning_rate": 5.503170102712197e-07, "loss": 0.0007, "step": 158470 }, { "epoch": 2.593144072649922, "grad_norm": 0.1558447778224945, "learning_rate": 5.498829260324406e-07, "loss": 0.0012, "step": 158480 }, { "epoch": 2.593307698600998, "grad_norm": 0.03746763989329338, "learning_rate": 5.494490030997079e-07, "loss": 0.0004, "step": 158490 }, { "epoch": 2.593471324552074, "grad_norm": 0.0841994658112526, "learning_rate": 5.490152414887523e-07, "loss": 0.0011, "step": 158500 }, { "epoch": 2.5936349505031497, "grad_norm": 0.11033559590578079, "learning_rate": 5.485816412152978e-07, "loss": 0.0004, "step": 158510 }, { "epoch": 2.5937985764542257, "grad_norm": 0.005358075723052025, "learning_rate": 5.481482022950591e-07, "loss": 0.0003, "step": 158520 }, { "epoch": 2.5939622024053017, "grad_norm": 0.0492313951253891, "learning_rate": 5.477149247437491e-07, "loss": 0.0003, "step": 158530 }, { "epoch": 2.5941258283563773, "grad_norm": 0.10793673992156982, "learning_rate": 5.47281808577071e-07, "loss": 0.0012, "step": 158540 }, { "epoch": 2.5942894543074533, "grad_norm": 0.02911180444061756, "learning_rate": 5.46848853810727e-07, "loss": 0.0005, "step": 158550 }, { "epoch": 2.5944530802585293, "grad_norm": 0.15029145777225494, "learning_rate": 5.464160604604079e-07, "loss": 0.0008, "step": 158560 }, { "epoch": 2.594616706209605, "grad_norm": 0.14947408437728882, "learning_rate": 5.459834285418036e-07, "loss": 0.0008, "step": 158570 }, { "epoch": 2.594780332160681, "grad_norm": 0.07210278511047363, "learning_rate": 5.455509580705953e-07, "loss": 0.0005, "step": 158580 }, { "epoch": 2.5949439581117564, "grad_norm": 0.12886103987693787, "learning_rate": 5.451186490624583e-07, "loss": 0.0006, "step": 158590 }, { "epoch": 2.5951075840628324, "grad_norm": 0.22724021971225739, "learning_rate": 5.446865015330632e-07, "loss": 0.0008, "step": 158600 }, { "epoch": 2.5952712100139084, "grad_norm": 0.0335865318775177, "learning_rate": 5.442545154980744e-07, "loss": 0.0008, "step": 158610 }, { "epoch": 2.595434835964984, "grad_norm": 0.11760455369949341, "learning_rate": 5.438226909731504e-07, "loss": 0.0008, "step": 158620 }, { "epoch": 2.59559846191606, "grad_norm": 0.26422008872032166, "learning_rate": 5.433910279739435e-07, "loss": 0.0012, "step": 158630 }, { "epoch": 2.5957620878671355, "grad_norm": 0.06604434549808502, "learning_rate": 5.429595265161008e-07, "loss": 0.0004, "step": 158640 }, { "epoch": 2.5959257138182115, "grad_norm": 0.05335859954357147, "learning_rate": 5.425281866152627e-07, "loss": 0.0004, "step": 158650 }, { "epoch": 2.5960893397692875, "grad_norm": 0.011076969094574451, "learning_rate": 5.420970082870641e-07, "loss": 0.0013, "step": 158660 }, { "epoch": 2.596252965720363, "grad_norm": 0.02541961707174778, "learning_rate": 5.416659915471362e-07, "loss": 0.0008, "step": 158670 }, { "epoch": 2.596416591671439, "grad_norm": 0.3922705054283142, "learning_rate": 5.412351364111007e-07, "loss": 0.0018, "step": 158680 }, { "epoch": 2.596580217622515, "grad_norm": 0.22217248380184174, "learning_rate": 5.408044428945747e-07, "loss": 0.001, "step": 158690 }, { "epoch": 2.5967438435735906, "grad_norm": 0.0994834452867508, "learning_rate": 5.403739110131695e-07, "loss": 0.0008, "step": 158700 }, { "epoch": 2.5969074695246666, "grad_norm": 0.051326099783182144, "learning_rate": 5.399435407824916e-07, "loss": 0.0007, "step": 158710 }, { "epoch": 2.5970710954757426, "grad_norm": 0.07883021235466003, "learning_rate": 5.395133322181423e-07, "loss": 0.0011, "step": 158720 }, { "epoch": 2.597234721426818, "grad_norm": 0.0007013934082351625, "learning_rate": 5.390832853357125e-07, "loss": 0.0016, "step": 158730 }, { "epoch": 2.597398347377894, "grad_norm": 0.0654192566871643, "learning_rate": 5.386534001507937e-07, "loss": 0.0006, "step": 158740 }, { "epoch": 2.59756197332897, "grad_norm": 0.08958490192890167, "learning_rate": 5.38223676678965e-07, "loss": 0.0011, "step": 158750 }, { "epoch": 2.5977255992800456, "grad_norm": 0.011869450099766254, "learning_rate": 5.377941149358057e-07, "loss": 0.0006, "step": 158760 }, { "epoch": 2.5978892252311216, "grad_norm": 0.025177253410220146, "learning_rate": 5.373647149368843e-07, "loss": 0.0004, "step": 158770 }, { "epoch": 2.5980528511821976, "grad_norm": 0.027847208082675934, "learning_rate": 5.369354766977669e-07, "loss": 0.0005, "step": 158780 }, { "epoch": 2.598216477133273, "grad_norm": 0.08823072910308838, "learning_rate": 5.36506400234012e-07, "loss": 0.0013, "step": 158790 }, { "epoch": 2.598380103084349, "grad_norm": 0.007788459304720163, "learning_rate": 5.360774855611717e-07, "loss": 0.0008, "step": 158800 }, { "epoch": 2.598543729035425, "grad_norm": 0.024371150881052017, "learning_rate": 5.35648732694794e-07, "loss": 0.0005, "step": 158810 }, { "epoch": 2.5987073549865007, "grad_norm": 0.03180588781833649, "learning_rate": 5.352201416504193e-07, "loss": 0.0008, "step": 158820 }, { "epoch": 2.5988709809375767, "grad_norm": 0.048278626054525375, "learning_rate": 5.347917124435853e-07, "loss": 0.0006, "step": 158830 }, { "epoch": 2.5990346068886527, "grad_norm": 0.03488343581557274, "learning_rate": 5.343634450898183e-07, "loss": 0.0004, "step": 158840 }, { "epoch": 2.5991982328397283, "grad_norm": 0.36903172731399536, "learning_rate": 5.339353396046443e-07, "loss": 0.0012, "step": 158850 }, { "epoch": 2.5993618587908043, "grad_norm": 0.025290217250585556, "learning_rate": 5.335073960035797e-07, "loss": 0.0008, "step": 158860 }, { "epoch": 2.5995254847418803, "grad_norm": 0.13256970047950745, "learning_rate": 5.330796143021372e-07, "loss": 0.0014, "step": 158870 }, { "epoch": 2.599689110692956, "grad_norm": 0.06488094478845596, "learning_rate": 5.326519945158242e-07, "loss": 0.0005, "step": 158880 }, { "epoch": 2.599852736644032, "grad_norm": 0.3628421425819397, "learning_rate": 5.322245366601381e-07, "loss": 0.0015, "step": 158890 }, { "epoch": 2.600016362595108, "grad_norm": 0.10871259868144989, "learning_rate": 5.31797240750575e-07, "loss": 0.0012, "step": 158900 }, { "epoch": 2.6001799885461834, "grad_norm": 0.08409851789474487, "learning_rate": 5.313701068026217e-07, "loss": 0.0006, "step": 158910 }, { "epoch": 2.6003436144972594, "grad_norm": 0.10313976556062698, "learning_rate": 5.309431348317634e-07, "loss": 0.0007, "step": 158920 }, { "epoch": 2.6005072404483354, "grad_norm": 0.04597606509923935, "learning_rate": 5.305163248534745e-07, "loss": 0.0006, "step": 158930 }, { "epoch": 2.600670866399411, "grad_norm": 0.041362982243299484, "learning_rate": 5.300896768832264e-07, "loss": 0.0006, "step": 158940 }, { "epoch": 2.600834492350487, "grad_norm": 0.051144372671842575, "learning_rate": 5.296631909364852e-07, "loss": 0.001, "step": 158950 }, { "epoch": 2.6009981183015625, "grad_norm": 0.07852887362241745, "learning_rate": 5.292368670287084e-07, "loss": 0.0004, "step": 158960 }, { "epoch": 2.6011617442526385, "grad_norm": 0.0826437845826149, "learning_rate": 5.288107051753511e-07, "loss": 0.0011, "step": 158970 }, { "epoch": 2.6013253702037145, "grad_norm": 0.034881506115198135, "learning_rate": 5.283847053918595e-07, "loss": 0.0008, "step": 158980 }, { "epoch": 2.60148899615479, "grad_norm": 0.04568963870406151, "learning_rate": 5.279588676936737e-07, "loss": 0.0005, "step": 158990 }, { "epoch": 2.601652622105866, "grad_norm": 0.15884359180927277, "learning_rate": 5.275331920962318e-07, "loss": 0.0009, "step": 159000 }, { "epoch": 2.6018162480569416, "grad_norm": 0.10657093673944473, "learning_rate": 5.271076786149615e-07, "loss": 0.0003, "step": 159010 }, { "epoch": 2.6019798740080176, "grad_norm": 0.061083387583494186, "learning_rate": 5.266823272652887e-07, "loss": 0.001, "step": 159020 }, { "epoch": 2.6021434999590936, "grad_norm": 0.014530250802636147, "learning_rate": 5.262571380626291e-07, "loss": 0.0006, "step": 159030 }, { "epoch": 2.602307125910169, "grad_norm": 0.028397101908922195, "learning_rate": 5.258321110223968e-07, "loss": 0.0006, "step": 159040 }, { "epoch": 2.602470751861245, "grad_norm": 0.17988136410713196, "learning_rate": 5.254072461599963e-07, "loss": 0.0007, "step": 159050 }, { "epoch": 2.602634377812321, "grad_norm": 0.09252893179655075, "learning_rate": 5.249825434908296e-07, "loss": 0.0006, "step": 159060 }, { "epoch": 2.6027980037633967, "grad_norm": 0.02128220535814762, "learning_rate": 5.245580030302888e-07, "loss": 0.0015, "step": 159070 }, { "epoch": 2.6029616297144726, "grad_norm": 0.0709434300661087, "learning_rate": 5.241336247937656e-07, "loss": 0.0005, "step": 159080 }, { "epoch": 2.6031252556655486, "grad_norm": 0.020560117438435555, "learning_rate": 5.23709408796641e-07, "loss": 0.0008, "step": 159090 }, { "epoch": 2.603288881616624, "grad_norm": 0.17943745851516724, "learning_rate": 5.232853550542904e-07, "loss": 0.0014, "step": 159100 }, { "epoch": 2.6034525075677, "grad_norm": 0.053974755108356476, "learning_rate": 5.228614635820878e-07, "loss": 0.0009, "step": 159110 }, { "epoch": 2.603616133518776, "grad_norm": 0.10413036495447159, "learning_rate": 5.224377343953951e-07, "loss": 0.0012, "step": 159120 }, { "epoch": 2.6037797594698517, "grad_norm": 0.12252624332904816, "learning_rate": 5.220141675095741e-07, "loss": 0.0009, "step": 159130 }, { "epoch": 2.6039433854209277, "grad_norm": 0.0013508291449397802, "learning_rate": 5.215907629399763e-07, "loss": 0.0006, "step": 159140 }, { "epoch": 2.6041070113720037, "grad_norm": 0.05760389566421509, "learning_rate": 5.211675207019495e-07, "loss": 0.001, "step": 159150 }, { "epoch": 2.6042706373230793, "grad_norm": 0.0725826546549797, "learning_rate": 5.207444408108364e-07, "loss": 0.001, "step": 159160 }, { "epoch": 2.6044342632741553, "grad_norm": 0.07261897623538971, "learning_rate": 5.203215232819709e-07, "loss": 0.0006, "step": 159170 }, { "epoch": 2.6045978892252313, "grad_norm": 0.11361802369356155, "learning_rate": 5.198987681306855e-07, "loss": 0.0008, "step": 159180 }, { "epoch": 2.604761515176307, "grad_norm": 0.19185525178909302, "learning_rate": 5.194761753722999e-07, "loss": 0.0008, "step": 159190 }, { "epoch": 2.604925141127383, "grad_norm": 0.058302342891693115, "learning_rate": 5.190537450221356e-07, "loss": 0.0005, "step": 159200 }, { "epoch": 2.605088767078459, "grad_norm": 0.018518047407269478, "learning_rate": 5.18631477095502e-07, "loss": 0.0006, "step": 159210 }, { "epoch": 2.6052523930295344, "grad_norm": 0.11293692141771317, "learning_rate": 5.182093716077069e-07, "loss": 0.001, "step": 159220 }, { "epoch": 2.6054160189806104, "grad_norm": 0.07691781967878342, "learning_rate": 5.177874285740513e-07, "loss": 0.0008, "step": 159230 }, { "epoch": 2.6055796449316864, "grad_norm": 0.07937619835138321, "learning_rate": 5.173656480098283e-07, "loss": 0.0004, "step": 159240 }, { "epoch": 2.605743270882762, "grad_norm": 0.182184636592865, "learning_rate": 5.169440299303269e-07, "loss": 0.001, "step": 159250 }, { "epoch": 2.605906896833838, "grad_norm": 0.013033591210842133, "learning_rate": 5.16522574350829e-07, "loss": 0.0006, "step": 159260 }, { "epoch": 2.606070522784914, "grad_norm": 0.11774258315563202, "learning_rate": 5.161012812866134e-07, "loss": 0.0007, "step": 159270 }, { "epoch": 2.6062341487359895, "grad_norm": 0.08002663403749466, "learning_rate": 5.156801507529491e-07, "loss": 0.0011, "step": 159280 }, { "epoch": 2.6063977746870655, "grad_norm": 0.2496061474084854, "learning_rate": 5.152591827651011e-07, "loss": 0.001, "step": 159290 }, { "epoch": 2.6065614006381415, "grad_norm": 0.11888951063156128, "learning_rate": 5.148383773383292e-07, "loss": 0.0009, "step": 159300 }, { "epoch": 2.606725026589217, "grad_norm": 0.08274941891431808, "learning_rate": 5.144177344878859e-07, "loss": 0.0003, "step": 159310 }, { "epoch": 2.606888652540293, "grad_norm": 0.18723848462104797, "learning_rate": 5.1399725422902e-07, "loss": 0.0011, "step": 159320 }, { "epoch": 2.607052278491369, "grad_norm": 0.08062460273504257, "learning_rate": 5.135769365769705e-07, "loss": 0.0007, "step": 159330 }, { "epoch": 2.6072159044424446, "grad_norm": 0.1098063737154007, "learning_rate": 5.131567815469752e-07, "loss": 0.0008, "step": 159340 }, { "epoch": 2.6073795303935206, "grad_norm": 0.057517293840646744, "learning_rate": 5.127367891542618e-07, "loss": 0.0007, "step": 159350 }, { "epoch": 2.607543156344596, "grad_norm": 0.0426187664270401, "learning_rate": 5.123169594140559e-07, "loss": 0.0004, "step": 159360 }, { "epoch": 2.607706782295672, "grad_norm": 0.0022553694434463978, "learning_rate": 5.118972923415733e-07, "loss": 0.0006, "step": 159370 }, { "epoch": 2.607870408246748, "grad_norm": 0.010330184362828732, "learning_rate": 5.114777879520278e-07, "loss": 0.0005, "step": 159380 }, { "epoch": 2.6080340341978236, "grad_norm": 0.03572759032249451, "learning_rate": 5.110584462606249e-07, "loss": 0.0007, "step": 159390 }, { "epoch": 2.6081976601488996, "grad_norm": 0.08679718524217606, "learning_rate": 5.106392672825639e-07, "loss": 0.0009, "step": 159400 }, { "epoch": 2.608361286099975, "grad_norm": 0.09565508365631104, "learning_rate": 5.102202510330396e-07, "loss": 0.0015, "step": 159410 }, { "epoch": 2.608524912051051, "grad_norm": 0.12138701975345612, "learning_rate": 5.098013975272397e-07, "loss": 0.0007, "step": 159420 }, { "epoch": 2.608688538002127, "grad_norm": 0.0751548707485199, "learning_rate": 5.093827067803475e-07, "loss": 0.0006, "step": 159430 }, { "epoch": 2.6088521639532027, "grad_norm": 0.037758518010377884, "learning_rate": 5.0896417880754e-07, "loss": 0.0012, "step": 159440 }, { "epoch": 2.6090157899042787, "grad_norm": 0.0408528707921505, "learning_rate": 5.085458136239862e-07, "loss": 0.0003, "step": 159450 }, { "epoch": 2.6091794158553547, "grad_norm": 0.07202189415693283, "learning_rate": 5.08127611244853e-07, "loss": 0.0007, "step": 159460 }, { "epoch": 2.6093430418064303, "grad_norm": 0.03407467156648636, "learning_rate": 5.077095716852965e-07, "loss": 0.0005, "step": 159470 }, { "epoch": 2.6095066677575063, "grad_norm": 0.13512128591537476, "learning_rate": 5.072916949604723e-07, "loss": 0.0006, "step": 159480 }, { "epoch": 2.6096702937085823, "grad_norm": 0.13897289335727692, "learning_rate": 5.068739810855261e-07, "loss": 0.0009, "step": 159490 }, { "epoch": 2.609833919659658, "grad_norm": 0.23511986434459686, "learning_rate": 5.064564300755981e-07, "loss": 0.001, "step": 159500 }, { "epoch": 2.609997545610734, "grad_norm": 0.03486602380871773, "learning_rate": 5.060390419458255e-07, "loss": 0.0011, "step": 159510 }, { "epoch": 2.61016117156181, "grad_norm": 0.04421933740377426, "learning_rate": 5.056218167113359e-07, "loss": 0.0007, "step": 159520 }, { "epoch": 2.6103247975128854, "grad_norm": 0.03591364622116089, "learning_rate": 5.052047543872546e-07, "loss": 0.0011, "step": 159530 }, { "epoch": 2.6104884234639614, "grad_norm": 0.1253529191017151, "learning_rate": 5.04787854988697e-07, "loss": 0.0011, "step": 159540 }, { "epoch": 2.6106520494150374, "grad_norm": 0.0727643072605133, "learning_rate": 5.043711185307765e-07, "loss": 0.0006, "step": 159550 }, { "epoch": 2.610815675366113, "grad_norm": 0.020934706553816795, "learning_rate": 5.039545450285971e-07, "loss": 0.0006, "step": 159560 }, { "epoch": 2.610979301317189, "grad_norm": 0.02424517087638378, "learning_rate": 5.035381344972601e-07, "loss": 0.0005, "step": 159570 }, { "epoch": 2.611142927268265, "grad_norm": 0.029768170788884163, "learning_rate": 5.03121886951859e-07, "loss": 0.0003, "step": 159580 }, { "epoch": 2.6113065532193405, "grad_norm": 0.045855093747377396, "learning_rate": 5.027058024074805e-07, "loss": 0.0006, "step": 159590 }, { "epoch": 2.6114701791704165, "grad_norm": 0.13867069780826569, "learning_rate": 5.022898808792093e-07, "loss": 0.0005, "step": 159600 }, { "epoch": 2.6116338051214925, "grad_norm": 0.038604188710451126, "learning_rate": 5.018741223821183e-07, "loss": 0.0011, "step": 159610 }, { "epoch": 2.611797431072568, "grad_norm": 0.03313662111759186, "learning_rate": 5.014585269312805e-07, "loss": 0.0009, "step": 159620 }, { "epoch": 2.611961057023644, "grad_norm": 0.06124007701873779, "learning_rate": 5.010430945417582e-07, "loss": 0.0008, "step": 159630 }, { "epoch": 2.61212468297472, "grad_norm": 0.009094329550862312, "learning_rate": 5.006278252286106e-07, "loss": 0.0005, "step": 159640 }, { "epoch": 2.6122883089257956, "grad_norm": 0.033455125987529755, "learning_rate": 5.002127190068917e-07, "loss": 0.0002, "step": 159650 }, { "epoch": 2.6124519348768716, "grad_norm": 0.02971513755619526, "learning_rate": 4.997977758916456e-07, "loss": 0.0009, "step": 159660 }, { "epoch": 2.6126155608279475, "grad_norm": 0.15464822947978973, "learning_rate": 4.993829958979151e-07, "loss": 0.0007, "step": 159670 }, { "epoch": 2.612779186779023, "grad_norm": 0.0738198310136795, "learning_rate": 4.98968379040734e-07, "loss": 0.0008, "step": 159680 }, { "epoch": 2.612942812730099, "grad_norm": 0.1407453864812851, "learning_rate": 4.985539253351318e-07, "loss": 0.0011, "step": 159690 }, { "epoch": 2.613106438681175, "grad_norm": 0.026515763252973557, "learning_rate": 4.981396347961293e-07, "loss": 0.0007, "step": 159700 }, { "epoch": 2.6132700646322506, "grad_norm": 0.0684126541018486, "learning_rate": 4.97725507438745e-07, "loss": 0.0003, "step": 159710 }, { "epoch": 2.6134336905833266, "grad_norm": 0.021793697029352188, "learning_rate": 4.973115432779918e-07, "loss": 0.0013, "step": 159720 }, { "epoch": 2.613597316534402, "grad_norm": 0.013136429712176323, "learning_rate": 4.968977423288718e-07, "loss": 0.0009, "step": 159730 }, { "epoch": 2.613760942485478, "grad_norm": 0.04625855013728142, "learning_rate": 4.964841046063862e-07, "loss": 0.0004, "step": 159740 }, { "epoch": 2.613924568436554, "grad_norm": 0.0081694470718503, "learning_rate": 4.960706301255275e-07, "loss": 0.0005, "step": 159750 }, { "epoch": 2.6140881943876297, "grad_norm": 0.05213101953268051, "learning_rate": 4.956573189012848e-07, "loss": 0.0006, "step": 159760 }, { "epoch": 2.6142518203387057, "grad_norm": 0.06317899376153946, "learning_rate": 4.952441709486366e-07, "loss": 0.0008, "step": 159770 }, { "epoch": 2.6144154462897813, "grad_norm": 0.13428868353366852, "learning_rate": 4.948311862825622e-07, "loss": 0.0006, "step": 159780 }, { "epoch": 2.6145790722408573, "grad_norm": 0.04951729625463486, "learning_rate": 4.944183649180284e-07, "loss": 0.0005, "step": 159790 }, { "epoch": 2.6147426981919333, "grad_norm": 0.06658707559108734, "learning_rate": 4.940057068699999e-07, "loss": 0.0005, "step": 159800 }, { "epoch": 2.614906324143009, "grad_norm": 0.05197855830192566, "learning_rate": 4.935932121534353e-07, "loss": 0.0017, "step": 159810 }, { "epoch": 2.615069950094085, "grad_norm": 0.20846471190452576, "learning_rate": 4.931808807832849e-07, "loss": 0.0007, "step": 159820 }, { "epoch": 2.615233576045161, "grad_norm": 0.2562103271484375, "learning_rate": 4.927687127744968e-07, "loss": 0.0016, "step": 159830 }, { "epoch": 2.6153972019962364, "grad_norm": 0.05031110346317291, "learning_rate": 4.923567081420089e-07, "loss": 0.0007, "step": 159840 }, { "epoch": 2.6155608279473124, "grad_norm": 0.11691921204328537, "learning_rate": 4.919448669007576e-07, "loss": 0.0004, "step": 159850 }, { "epoch": 2.6157244538983884, "grad_norm": 0.20276382565498352, "learning_rate": 4.915331890656689e-07, "loss": 0.0009, "step": 159860 }, { "epoch": 2.615888079849464, "grad_norm": 0.09058766067028046, "learning_rate": 4.911216746516662e-07, "loss": 0.0013, "step": 159870 }, { "epoch": 2.61605170580054, "grad_norm": 0.1431046426296234, "learning_rate": 4.907103236736683e-07, "loss": 0.0006, "step": 159880 }, { "epoch": 2.616215331751616, "grad_norm": 0.16631601750850677, "learning_rate": 4.902991361465809e-07, "loss": 0.0006, "step": 159890 }, { "epoch": 2.6163789577026915, "grad_norm": 0.04936761036515236, "learning_rate": 4.898881120853121e-07, "loss": 0.0004, "step": 159900 }, { "epoch": 2.6165425836537675, "grad_norm": 0.04305139183998108, "learning_rate": 4.894772515047591e-07, "loss": 0.0006, "step": 159910 }, { "epoch": 2.6167062096048435, "grad_norm": 0.004726100247353315, "learning_rate": 4.890665544198147e-07, "loss": 0.0005, "step": 159920 }, { "epoch": 2.616869835555919, "grad_norm": 0.034349896013736725, "learning_rate": 4.886560208453667e-07, "loss": 0.0008, "step": 159930 }, { "epoch": 2.617033461506995, "grad_norm": 0.028120718896389008, "learning_rate": 4.882456507962946e-07, "loss": 0.0009, "step": 159940 }, { "epoch": 2.617197087458071, "grad_norm": 0.16208238899707794, "learning_rate": 4.878354442874745e-07, "loss": 0.0008, "step": 159950 }, { "epoch": 2.6173607134091466, "grad_norm": 0.058615487068891525, "learning_rate": 4.874254013337743e-07, "loss": 0.0006, "step": 159960 }, { "epoch": 2.6175243393602226, "grad_norm": 0.03521759435534477, "learning_rate": 4.870155219500583e-07, "loss": 0.0003, "step": 159970 }, { "epoch": 2.6176879653112985, "grad_norm": 0.05476340651512146, "learning_rate": 4.86605806151182e-07, "loss": 0.0007, "step": 159980 }, { "epoch": 2.617851591262374, "grad_norm": 0.0009394604712724686, "learning_rate": 4.861962539519982e-07, "loss": 0.0008, "step": 159990 }, { "epoch": 2.61801521721345, "grad_norm": 0.13531015813350677, "learning_rate": 4.85786865367352e-07, "loss": 0.0006, "step": 160000 }, { "epoch": 2.618178843164526, "grad_norm": 0.03410707414150238, "learning_rate": 4.853776404120814e-07, "loss": 0.0009, "step": 160010 }, { "epoch": 2.6183424691156016, "grad_norm": 0.0123378811404109, "learning_rate": 4.849685791010217e-07, "loss": 0.0008, "step": 160020 }, { "epoch": 2.6185060950666776, "grad_norm": 0.03474989905953407, "learning_rate": 4.845596814489983e-07, "loss": 0.0006, "step": 160030 }, { "epoch": 2.6186697210177536, "grad_norm": 0.1863306313753128, "learning_rate": 4.841509474708345e-07, "loss": 0.0005, "step": 160040 }, { "epoch": 2.618833346968829, "grad_norm": 0.015147080644965172, "learning_rate": 4.837423771813449e-07, "loss": 0.0008, "step": 160050 }, { "epoch": 2.618996972919905, "grad_norm": 0.061684221029281616, "learning_rate": 4.833339705953405e-07, "loss": 0.0005, "step": 160060 }, { "epoch": 2.619160598870981, "grad_norm": 0.02952273003757, "learning_rate": 4.829257277276228e-07, "loss": 0.0006, "step": 160070 }, { "epoch": 2.6193242248220567, "grad_norm": 0.1860480159521103, "learning_rate": 4.825176485929911e-07, "loss": 0.0011, "step": 160080 }, { "epoch": 2.6194878507731327, "grad_norm": 0.08580615371465683, "learning_rate": 4.821097332062391e-07, "loss": 0.0011, "step": 160090 }, { "epoch": 2.6196514767242087, "grad_norm": 0.12832696735858917, "learning_rate": 4.817019815821489e-07, "loss": 0.0005, "step": 160100 }, { "epoch": 2.6198151026752843, "grad_norm": 0.00863903108984232, "learning_rate": 4.812943937355035e-07, "loss": 0.0011, "step": 160110 }, { "epoch": 2.6199787286263603, "grad_norm": 0.11040623486042023, "learning_rate": 4.808869696810747e-07, "loss": 0.0027, "step": 160120 }, { "epoch": 2.620142354577436, "grad_norm": 0.05967620015144348, "learning_rate": 4.804797094336333e-07, "loss": 0.0004, "step": 160130 }, { "epoch": 2.620305980528512, "grad_norm": 0.12195799499750137, "learning_rate": 4.800726130079386e-07, "loss": 0.0011, "step": 160140 }, { "epoch": 2.620469606479588, "grad_norm": 0.07639157027006149, "learning_rate": 4.796656804187488e-07, "loss": 0.0007, "step": 160150 }, { "epoch": 2.6206332324306634, "grad_norm": 0.03131287544965744, "learning_rate": 4.792589116808144e-07, "loss": 0.0008, "step": 160160 }, { "epoch": 2.6207968583817394, "grad_norm": 0.12282074242830276, "learning_rate": 4.788523068088779e-07, "loss": 0.001, "step": 160170 }, { "epoch": 2.620960484332815, "grad_norm": 0.04232249781489372, "learning_rate": 4.784458658176805e-07, "loss": 0.0004, "step": 160180 }, { "epoch": 2.621124110283891, "grad_norm": 0.007921084761619568, "learning_rate": 4.78039588721953e-07, "loss": 0.0007, "step": 160190 }, { "epoch": 2.621287736234967, "grad_norm": 0.01960846409201622, "learning_rate": 4.77633475536421e-07, "loss": 0.0007, "step": 160200 }, { "epoch": 2.6214513621860425, "grad_norm": 0.07180566340684891, "learning_rate": 4.772275262758069e-07, "loss": 0.0008, "step": 160210 }, { "epoch": 2.6216149881371185, "grad_norm": 0.06493759900331497, "learning_rate": 4.768217409548248e-07, "loss": 0.0005, "step": 160220 }, { "epoch": 2.6217786140881945, "grad_norm": 0.08901230245828629, "learning_rate": 4.764161195881839e-07, "loss": 0.0005, "step": 160230 }, { "epoch": 2.62194224003927, "grad_norm": 0.07273291051387787, "learning_rate": 4.760106621905852e-07, "loss": 0.0005, "step": 160240 }, { "epoch": 2.622105865990346, "grad_norm": 0.030528923496603966, "learning_rate": 4.7560536877672814e-07, "loss": 0.0008, "step": 160250 }, { "epoch": 2.622269491941422, "grad_norm": 0.057882267981767654, "learning_rate": 4.7520023936130147e-07, "loss": 0.0004, "step": 160260 }, { "epoch": 2.6224331178924976, "grad_norm": 0.04587721824645996, "learning_rate": 4.7479527395899175e-07, "loss": 0.0005, "step": 160270 }, { "epoch": 2.6225967438435736, "grad_norm": 0.02117150090634823, "learning_rate": 4.743904725844767e-07, "loss": 0.0006, "step": 160280 }, { "epoch": 2.6227603697946495, "grad_norm": 0.05879410356283188, "learning_rate": 4.739858352524307e-07, "loss": 0.0003, "step": 160290 }, { "epoch": 2.622923995745725, "grad_norm": 0.05598529428243637, "learning_rate": 4.735813619775204e-07, "loss": 0.0008, "step": 160300 }, { "epoch": 2.623087621696801, "grad_norm": 0.0191376693546772, "learning_rate": 4.731770527744056e-07, "loss": 0.0011, "step": 160310 }, { "epoch": 2.623251247647877, "grad_norm": 0.02689223363995552, "learning_rate": 4.727729076577442e-07, "loss": 0.001, "step": 160320 }, { "epoch": 2.6234148735989526, "grad_norm": 0.1103043407201767, "learning_rate": 4.7236892664218217e-07, "loss": 0.0005, "step": 160330 }, { "epoch": 2.6235784995500286, "grad_norm": 0.01456078328192234, "learning_rate": 4.719651097423661e-07, "loss": 0.0009, "step": 160340 }, { "epoch": 2.6237421255011046, "grad_norm": 0.05939066782593727, "learning_rate": 4.71561456972931e-07, "loss": 0.0008, "step": 160350 }, { "epoch": 2.62390575145218, "grad_norm": 0.22322286665439606, "learning_rate": 4.7115796834850957e-07, "loss": 0.0008, "step": 160360 }, { "epoch": 2.624069377403256, "grad_norm": 0.02628941461443901, "learning_rate": 4.707546438837274e-07, "loss": 0.0003, "step": 160370 }, { "epoch": 2.624233003354332, "grad_norm": 0.07713694125413895, "learning_rate": 4.703514835932027e-07, "loss": 0.0006, "step": 160380 }, { "epoch": 2.6243966293054077, "grad_norm": 0.1457454115152359, "learning_rate": 4.699484874915522e-07, "loss": 0.0008, "step": 160390 }, { "epoch": 2.6245602552564837, "grad_norm": 0.04235680028796196, "learning_rate": 4.6954565559337917e-07, "loss": 0.0007, "step": 160400 }, { "epoch": 2.6247238812075597, "grad_norm": 0.0984048843383789, "learning_rate": 4.691429879132886e-07, "loss": 0.0021, "step": 160410 }, { "epoch": 2.6248875071586353, "grad_norm": 0.10287055373191833, "learning_rate": 4.687404844658744e-07, "loss": 0.0007, "step": 160420 }, { "epoch": 2.6250511331097113, "grad_norm": 0.02886793576180935, "learning_rate": 4.6833814526572654e-07, "loss": 0.0008, "step": 160430 }, { "epoch": 2.6252147590607873, "grad_norm": 0.019250407814979553, "learning_rate": 4.679359703274305e-07, "loss": 0.0004, "step": 160440 }, { "epoch": 2.625378385011863, "grad_norm": 0.059348076581954956, "learning_rate": 4.6753395966556203e-07, "loss": 0.0008, "step": 160450 }, { "epoch": 2.625542010962939, "grad_norm": 0.17093642055988312, "learning_rate": 4.6713211329469543e-07, "loss": 0.0007, "step": 160460 }, { "epoch": 2.625705636914015, "grad_norm": 0.11317221075296402, "learning_rate": 4.6673043122939354e-07, "loss": 0.0007, "step": 160470 }, { "epoch": 2.6258692628650904, "grad_norm": 0.021574676036834717, "learning_rate": 4.663289134842197e-07, "loss": 0.0005, "step": 160480 }, { "epoch": 2.6260328888161664, "grad_norm": 0.011897759512066841, "learning_rate": 4.659275600737262e-07, "loss": 0.0005, "step": 160490 }, { "epoch": 2.626196514767242, "grad_norm": 0.014802451245486736, "learning_rate": 4.6552637101246026e-07, "loss": 0.0011, "step": 160500 }, { "epoch": 2.626360140718318, "grad_norm": 0.0313987135887146, "learning_rate": 4.6512534631496575e-07, "loss": 0.0008, "step": 160510 }, { "epoch": 2.626523766669394, "grad_norm": 0.054005250334739685, "learning_rate": 4.6472448599577724e-07, "loss": 0.0004, "step": 160520 }, { "epoch": 2.6266873926204695, "grad_norm": 0.09609302878379822, "learning_rate": 4.6432379006942695e-07, "loss": 0.0024, "step": 160530 }, { "epoch": 2.6268510185715455, "grad_norm": 0.17505386471748352, "learning_rate": 4.6392325855043716e-07, "loss": 0.0005, "step": 160540 }, { "epoch": 2.627014644522621, "grad_norm": 0.050218816846609116, "learning_rate": 4.6352289145332797e-07, "loss": 0.0007, "step": 160550 }, { "epoch": 2.627178270473697, "grad_norm": 0.10288070887327194, "learning_rate": 4.631226887926099e-07, "loss": 0.0009, "step": 160560 }, { "epoch": 2.627341896424773, "grad_norm": 0.01783854514360428, "learning_rate": 4.627226505827914e-07, "loss": 0.0006, "step": 160570 }, { "epoch": 2.6275055223758486, "grad_norm": 0.019573703408241272, "learning_rate": 4.623227768383703e-07, "loss": 0.0003, "step": 160580 }, { "epoch": 2.6276691483269246, "grad_norm": 0.09200419485569, "learning_rate": 4.6192306757384397e-07, "loss": 0.0006, "step": 160590 }, { "epoch": 2.6278327742780005, "grad_norm": 0.25513818860054016, "learning_rate": 4.6152352280369904e-07, "loss": 0.0008, "step": 160600 }, { "epoch": 2.627996400229076, "grad_norm": 0.08952823281288147, "learning_rate": 4.611241425424179e-07, "loss": 0.0006, "step": 160610 }, { "epoch": 2.628160026180152, "grad_norm": 0.02707403525710106, "learning_rate": 4.607249268044789e-07, "loss": 0.0008, "step": 160620 }, { "epoch": 2.628323652131228, "grad_norm": 0.06539997458457947, "learning_rate": 4.603258756043499e-07, "loss": 0.0011, "step": 160630 }, { "epoch": 2.6284872780823036, "grad_norm": 0.24186575412750244, "learning_rate": 4.5992698895649723e-07, "loss": 0.0017, "step": 160640 }, { "epoch": 2.6286509040333796, "grad_norm": 0.08850037306547165, "learning_rate": 4.5952826687538033e-07, "loss": 0.001, "step": 160650 }, { "epoch": 2.6288145299844556, "grad_norm": 0.14397820830345154, "learning_rate": 4.5912970937545044e-07, "loss": 0.0008, "step": 160660 }, { "epoch": 2.628978155935531, "grad_norm": 0.10474473237991333, "learning_rate": 4.5873131647115545e-07, "loss": 0.0006, "step": 160670 }, { "epoch": 2.629141781886607, "grad_norm": 0.07800954580307007, "learning_rate": 4.583330881769349e-07, "loss": 0.0008, "step": 160680 }, { "epoch": 2.629305407837683, "grad_norm": 0.14406746625900269, "learning_rate": 4.57935024507225e-07, "loss": 0.0012, "step": 160690 }, { "epoch": 2.6294690337887587, "grad_norm": 0.267629474401474, "learning_rate": 4.575371254764538e-07, "loss": 0.0008, "step": 160700 }, { "epoch": 2.6296326597398347, "grad_norm": 0.06929265707731247, "learning_rate": 4.571393910990435e-07, "loss": 0.0008, "step": 160710 }, { "epoch": 2.6297962856909107, "grad_norm": 0.28277674317359924, "learning_rate": 4.5674182138941317e-07, "loss": 0.0008, "step": 160720 }, { "epoch": 2.6299599116419863, "grad_norm": 0.11098241060972214, "learning_rate": 4.563444163619707e-07, "loss": 0.0009, "step": 160730 }, { "epoch": 2.6301235375930623, "grad_norm": 0.0017275033751502633, "learning_rate": 4.5594717603112404e-07, "loss": 0.0005, "step": 160740 }, { "epoch": 2.6302871635441383, "grad_norm": 0.11607204377651215, "learning_rate": 4.5555010041127004e-07, "loss": 0.001, "step": 160750 }, { "epoch": 2.630450789495214, "grad_norm": 0.034611113369464874, "learning_rate": 4.551531895168032e-07, "loss": 0.0008, "step": 160760 }, { "epoch": 2.63061441544629, "grad_norm": 0.009676181711256504, "learning_rate": 4.5475644336210933e-07, "loss": 0.0004, "step": 160770 }, { "epoch": 2.630778041397366, "grad_norm": 0.0855923593044281, "learning_rate": 4.5435986196157136e-07, "loss": 0.0009, "step": 160780 }, { "epoch": 2.6309416673484414, "grad_norm": 0.04754136502742767, "learning_rate": 4.539634453295627e-07, "loss": 0.001, "step": 160790 }, { "epoch": 2.6311052932995174, "grad_norm": 0.15470431745052338, "learning_rate": 4.5356719348045253e-07, "loss": 0.001, "step": 160800 }, { "epoch": 2.6312689192505934, "grad_norm": 0.09109357744455338, "learning_rate": 4.531711064286054e-07, "loss": 0.0008, "step": 160810 }, { "epoch": 2.631432545201669, "grad_norm": 0.07098764181137085, "learning_rate": 4.5277518418837654e-07, "loss": 0.0021, "step": 160820 }, { "epoch": 2.631596171152745, "grad_norm": 0.001638766610994935, "learning_rate": 4.523794267741194e-07, "loss": 0.0008, "step": 160830 }, { "epoch": 2.631759797103821, "grad_norm": 0.0016671274788677692, "learning_rate": 4.51983834200177e-07, "loss": 0.0009, "step": 160840 }, { "epoch": 2.6319234230548965, "grad_norm": 0.05554373562335968, "learning_rate": 4.5158840648089006e-07, "loss": 0.0011, "step": 160850 }, { "epoch": 2.6320870490059725, "grad_norm": 0.06322480738162994, "learning_rate": 4.511931436305922e-07, "loss": 0.0006, "step": 160860 }, { "epoch": 2.6322506749570485, "grad_norm": 0.1607675850391388, "learning_rate": 4.5079804566360906e-07, "loss": 0.001, "step": 160870 }, { "epoch": 2.632414300908124, "grad_norm": 0.15252161026000977, "learning_rate": 4.5040311259426426e-07, "loss": 0.0009, "step": 160880 }, { "epoch": 2.6325779268592, "grad_norm": 0.0909084603190422, "learning_rate": 4.500083444368719e-07, "loss": 0.0005, "step": 160890 }, { "epoch": 2.6327415528102756, "grad_norm": 0.19390174746513367, "learning_rate": 4.496137412057411e-07, "loss": 0.0012, "step": 160900 }, { "epoch": 2.6329051787613516, "grad_norm": 0.010709865018725395, "learning_rate": 4.492193029151753e-07, "loss": 0.0007, "step": 160910 }, { "epoch": 2.6330688047124275, "grad_norm": 0.0480482317507267, "learning_rate": 4.488250295794716e-07, "loss": 0.0008, "step": 160920 }, { "epoch": 2.633232430663503, "grad_norm": 0.014920058660209179, "learning_rate": 4.4843092121292344e-07, "loss": 0.0007, "step": 160930 }, { "epoch": 2.633396056614579, "grad_norm": 0.014779975637793541, "learning_rate": 4.480369778298144e-07, "loss": 0.0004, "step": 160940 }, { "epoch": 2.6335596825656546, "grad_norm": 0.03539162874221802, "learning_rate": 4.476431994444247e-07, "loss": 0.0007, "step": 160950 }, { "epoch": 2.6337233085167306, "grad_norm": 0.014194296672940254, "learning_rate": 4.4724958607102744e-07, "loss": 0.0007, "step": 160960 }, { "epoch": 2.6338869344678066, "grad_norm": 0.02027301676571369, "learning_rate": 4.468561377238911e-07, "loss": 0.0005, "step": 160970 }, { "epoch": 2.634050560418882, "grad_norm": 0.09363052248954773, "learning_rate": 4.4646285441727555e-07, "loss": 0.0008, "step": 160980 }, { "epoch": 2.634214186369958, "grad_norm": 0.003301275661215186, "learning_rate": 4.460697361654387e-07, "loss": 0.0009, "step": 160990 }, { "epoch": 2.634377812321034, "grad_norm": 0.12710069119930267, "learning_rate": 4.456767829826281e-07, "loss": 0.0005, "step": 161000 }, { "epoch": 2.6345414382721097, "grad_norm": 0.03920842334628105, "learning_rate": 4.452839948830878e-07, "loss": 0.0014, "step": 161010 }, { "epoch": 2.6347050642231857, "grad_norm": 0.10991855710744858, "learning_rate": 4.4489137188105657e-07, "loss": 0.0008, "step": 161020 }, { "epoch": 2.6348686901742617, "grad_norm": 0.126530721783638, "learning_rate": 4.444989139907641e-07, "loss": 0.001, "step": 161030 }, { "epoch": 2.6350323161253373, "grad_norm": 0.0752364844083786, "learning_rate": 4.441066212264378e-07, "loss": 0.0016, "step": 161040 }, { "epoch": 2.6351959420764133, "grad_norm": 0.09029126167297363, "learning_rate": 4.4371449360229637e-07, "loss": 0.0005, "step": 161050 }, { "epoch": 2.6353595680274893, "grad_norm": 0.13464708626270294, "learning_rate": 4.433225311325545e-07, "loss": 0.0006, "step": 161060 }, { "epoch": 2.635523193978565, "grad_norm": 0.10298698395490646, "learning_rate": 4.429307338314182e-07, "loss": 0.001, "step": 161070 }, { "epoch": 2.635686819929641, "grad_norm": 0.03460034355521202, "learning_rate": 4.425391017130898e-07, "loss": 0.0011, "step": 161080 }, { "epoch": 2.635850445880717, "grad_norm": 0.016474898904561996, "learning_rate": 4.4214763479176804e-07, "loss": 0.0002, "step": 161090 }, { "epoch": 2.6360140718317924, "grad_norm": 0.011465839110314846, "learning_rate": 4.4175633308163766e-07, "loss": 0.0008, "step": 161100 }, { "epoch": 2.6361776977828684, "grad_norm": 0.2227979302406311, "learning_rate": 4.413651965968857e-07, "loss": 0.0012, "step": 161110 }, { "epoch": 2.6363413237339444, "grad_norm": 0.051704708486795425, "learning_rate": 4.40974225351688e-07, "loss": 0.0004, "step": 161120 }, { "epoch": 2.63650494968502, "grad_norm": 0.04620182514190674, "learning_rate": 4.4058341936021707e-07, "loss": 0.0007, "step": 161130 }, { "epoch": 2.636668575636096, "grad_norm": 0.036082345992326736, "learning_rate": 4.4019277863664054e-07, "loss": 0.0004, "step": 161140 }, { "epoch": 2.636832201587172, "grad_norm": 0.009952438995242119, "learning_rate": 4.3980230319511483e-07, "loss": 0.0005, "step": 161150 }, { "epoch": 2.6369958275382475, "grad_norm": 0.026040753349661827, "learning_rate": 4.39411993049797e-07, "loss": 0.0008, "step": 161160 }, { "epoch": 2.6371594534893235, "grad_norm": 0.07812991738319397, "learning_rate": 4.390218482148323e-07, "loss": 0.0006, "step": 161170 }, { "epoch": 2.6373230794403995, "grad_norm": 0.07248272001743317, "learning_rate": 4.386318687043645e-07, "loss": 0.0007, "step": 161180 }, { "epoch": 2.637486705391475, "grad_norm": 0.02122293971478939, "learning_rate": 4.382420545325283e-07, "loss": 0.0004, "step": 161190 }, { "epoch": 2.637650331342551, "grad_norm": 0.045053280889987946, "learning_rate": 4.378524057134531e-07, "loss": 0.0004, "step": 161200 }, { "epoch": 2.637813957293627, "grad_norm": 0.07399696856737137, "learning_rate": 4.374629222612642e-07, "loss": 0.0008, "step": 161210 }, { "epoch": 2.6379775832447026, "grad_norm": 0.04394274204969406, "learning_rate": 4.370736041900775e-07, "loss": 0.0004, "step": 161220 }, { "epoch": 2.6381412091957785, "grad_norm": 0.07947829365730286, "learning_rate": 4.366844515140073e-07, "loss": 0.0006, "step": 161230 }, { "epoch": 2.6383048351468545, "grad_norm": 0.002457377966493368, "learning_rate": 4.362954642471567e-07, "loss": 0.0004, "step": 161240 }, { "epoch": 2.63846846109793, "grad_norm": 0.1295071840286255, "learning_rate": 4.359066424036279e-07, "loss": 0.0005, "step": 161250 }, { "epoch": 2.638632087049006, "grad_norm": 0.08755620568990707, "learning_rate": 4.355179859975134e-07, "loss": 0.0006, "step": 161260 }, { "epoch": 2.638795713000082, "grad_norm": 0.057457584887742996, "learning_rate": 4.3512949504290193e-07, "loss": 0.0004, "step": 161270 }, { "epoch": 2.6389593389511576, "grad_norm": 0.03893290087580681, "learning_rate": 4.347411695538739e-07, "loss": 0.0008, "step": 161280 }, { "epoch": 2.6391229649022336, "grad_norm": 0.2828652262687683, "learning_rate": 4.34353009544507e-07, "loss": 0.0014, "step": 161290 }, { "epoch": 2.639286590853309, "grad_norm": 0.24717114865779877, "learning_rate": 4.3396501502887047e-07, "loss": 0.0013, "step": 161300 }, { "epoch": 2.639450216804385, "grad_norm": 0.0561048686504364, "learning_rate": 4.335771860210275e-07, "loss": 0.0003, "step": 161310 }, { "epoch": 2.639613842755461, "grad_norm": 0.11476080119609833, "learning_rate": 4.331895225350369e-07, "loss": 0.0007, "step": 161320 }, { "epoch": 2.6397774687065367, "grad_norm": 0.13832363486289978, "learning_rate": 4.3280202458494904e-07, "loss": 0.0025, "step": 161330 }, { "epoch": 2.6399410946576127, "grad_norm": 0.0995819941163063, "learning_rate": 4.324146921848116e-07, "loss": 0.0013, "step": 161340 }, { "epoch": 2.6401047206086883, "grad_norm": 0.05939680337905884, "learning_rate": 4.3202752534866333e-07, "loss": 0.001, "step": 161350 }, { "epoch": 2.6402683465597643, "grad_norm": 0.019526952877640724, "learning_rate": 4.3164052409053804e-07, "loss": 0.001, "step": 161360 }, { "epoch": 2.6404319725108403, "grad_norm": 0.008612528443336487, "learning_rate": 4.3125368842446503e-07, "loss": 0.0009, "step": 161370 }, { "epoch": 2.640595598461916, "grad_norm": 0.08374768495559692, "learning_rate": 4.308670183644642e-07, "loss": 0.0009, "step": 161380 }, { "epoch": 2.640759224412992, "grad_norm": 0.11215036362409592, "learning_rate": 4.304805139245538e-07, "loss": 0.0008, "step": 161390 }, { "epoch": 2.640922850364068, "grad_norm": 0.23724402487277985, "learning_rate": 4.3009417511874096e-07, "loss": 0.0008, "step": 161400 }, { "epoch": 2.6410864763151434, "grad_norm": 0.1612662822008133, "learning_rate": 4.2970800196103056e-07, "loss": 0.0008, "step": 161410 }, { "epoch": 2.6412501022662194, "grad_norm": 0.19655001163482666, "learning_rate": 4.2932199446542143e-07, "loss": 0.0014, "step": 161420 }, { "epoch": 2.6414137282172954, "grad_norm": 0.008041562512516975, "learning_rate": 4.289361526459035e-07, "loss": 0.0005, "step": 161430 }, { "epoch": 2.641577354168371, "grad_norm": 0.029825009405612946, "learning_rate": 4.285504765164655e-07, "loss": 0.0004, "step": 161440 }, { "epoch": 2.641740980119447, "grad_norm": 0.13402365148067474, "learning_rate": 4.2816496609108416e-07, "loss": 0.0011, "step": 161450 }, { "epoch": 2.641904606070523, "grad_norm": 0.004117707721889019, "learning_rate": 4.277796213837354e-07, "loss": 0.0004, "step": 161460 }, { "epoch": 2.6420682320215985, "grad_norm": 0.14433643221855164, "learning_rate": 4.273944424083859e-07, "loss": 0.0009, "step": 161470 }, { "epoch": 2.6422318579726745, "grad_norm": 0.062206171452999115, "learning_rate": 4.2700942917899834e-07, "loss": 0.0004, "step": 161480 }, { "epoch": 2.6423954839237505, "grad_norm": 0.054306913167238235, "learning_rate": 4.2662458170952825e-07, "loss": 0.0006, "step": 161490 }, { "epoch": 2.642559109874826, "grad_norm": 0.009300809353590012, "learning_rate": 4.2623990001392447e-07, "loss": 0.0003, "step": 161500 }, { "epoch": 2.642722735825902, "grad_norm": 0.007989195175468922, "learning_rate": 4.2585538410613245e-07, "loss": 0.0007, "step": 161510 }, { "epoch": 2.642886361776978, "grad_norm": 0.05148007348179817, "learning_rate": 4.2547103400008783e-07, "loss": 0.0008, "step": 161520 }, { "epoch": 2.6430499877280536, "grad_norm": 0.018591446802020073, "learning_rate": 4.250868497097249e-07, "loss": 0.0004, "step": 161530 }, { "epoch": 2.6432136136791295, "grad_norm": 0.018516890704631805, "learning_rate": 4.2470283124896705e-07, "loss": 0.0004, "step": 161540 }, { "epoch": 2.6433772396302055, "grad_norm": 0.1172805055975914, "learning_rate": 4.243189786317364e-07, "loss": 0.001, "step": 161550 }, { "epoch": 2.643540865581281, "grad_norm": 0.05946112796664238, "learning_rate": 4.239352918719442e-07, "loss": 0.0004, "step": 161560 }, { "epoch": 2.643704491532357, "grad_norm": 0.0028226361609995365, "learning_rate": 4.2355177098349966e-07, "loss": 0.0004, "step": 161570 }, { "epoch": 2.643868117483433, "grad_norm": 0.06161634251475334, "learning_rate": 4.2316841598030455e-07, "loss": 0.0004, "step": 161580 }, { "epoch": 2.6440317434345086, "grad_norm": 0.005187698174268007, "learning_rate": 4.22785226876255e-07, "loss": 0.0004, "step": 161590 }, { "epoch": 2.6441953693855846, "grad_norm": 0.02173246443271637, "learning_rate": 4.224022036852393e-07, "loss": 0.0003, "step": 161600 }, { "epoch": 2.6443589953366606, "grad_norm": 0.006877606734633446, "learning_rate": 4.220193464211414e-07, "loss": 0.0008, "step": 161610 }, { "epoch": 2.644522621287736, "grad_norm": 0.047592807561159134, "learning_rate": 4.2163665509783956e-07, "loss": 0.0006, "step": 161620 }, { "epoch": 2.644686247238812, "grad_norm": 0.11689148098230362, "learning_rate": 4.21254129729205e-07, "loss": 0.0006, "step": 161630 }, { "epoch": 2.644849873189888, "grad_norm": 0.07109644263982773, "learning_rate": 4.2087177032910263e-07, "loss": 0.0005, "step": 161640 }, { "epoch": 2.6450134991409637, "grad_norm": 0.039288464933633804, "learning_rate": 4.204895769113948e-07, "loss": 0.0008, "step": 161650 }, { "epoch": 2.6451771250920397, "grad_norm": 0.059210970997810364, "learning_rate": 4.201075494899315e-07, "loss": 0.0008, "step": 161660 }, { "epoch": 2.6453407510431153, "grad_norm": 0.11788593232631683, "learning_rate": 4.197256880785633e-07, "loss": 0.0011, "step": 161670 }, { "epoch": 2.6455043769941913, "grad_norm": 0.17615070939064026, "learning_rate": 4.193439926911297e-07, "loss": 0.0017, "step": 161680 }, { "epoch": 2.6456680029452673, "grad_norm": 0.053512394428253174, "learning_rate": 4.1896246334146797e-07, "loss": 0.0014, "step": 161690 }, { "epoch": 2.645831628896343, "grad_norm": 0.02111648954451084, "learning_rate": 4.1858110004340647e-07, "loss": 0.0003, "step": 161700 }, { "epoch": 2.645995254847419, "grad_norm": 0.026846205815672874, "learning_rate": 4.1819990281076807e-07, "loss": 0.0008, "step": 161710 }, { "epoch": 2.6461588807984944, "grad_norm": 0.0030224171932786703, "learning_rate": 4.178188716573722e-07, "loss": 0.0003, "step": 161720 }, { "epoch": 2.6463225067495704, "grad_norm": 0.12042593955993652, "learning_rate": 4.174380065970285e-07, "loss": 0.0005, "step": 161730 }, { "epoch": 2.6464861327006464, "grad_norm": 0.05651644617319107, "learning_rate": 4.170573076435436e-07, "loss": 0.0014, "step": 161740 }, { "epoch": 2.646649758651722, "grad_norm": 0.07848593592643738, "learning_rate": 4.166767748107159e-07, "loss": 0.0011, "step": 161750 }, { "epoch": 2.646813384602798, "grad_norm": 0.028205307200551033, "learning_rate": 4.1629640811234053e-07, "loss": 0.0004, "step": 161760 }, { "epoch": 2.646977010553874, "grad_norm": 0.03122643381357193, "learning_rate": 4.1591620756220254e-07, "loss": 0.0007, "step": 161770 }, { "epoch": 2.6471406365049495, "grad_norm": 0.09759751707315445, "learning_rate": 4.1553617317408535e-07, "loss": 0.0009, "step": 161780 }, { "epoch": 2.6473042624560255, "grad_norm": 0.08184006810188293, "learning_rate": 4.1515630496176293e-07, "loss": 0.0012, "step": 161790 }, { "epoch": 2.6474678884071015, "grad_norm": 0.08442173153162003, "learning_rate": 4.147766029390049e-07, "loss": 0.0006, "step": 161800 }, { "epoch": 2.647631514358177, "grad_norm": 0.02862313576042652, "learning_rate": 4.1439706711957517e-07, "loss": 0.001, "step": 161810 }, { "epoch": 2.647795140309253, "grad_norm": 0.006327215116471052, "learning_rate": 4.1401769751723e-07, "loss": 0.0005, "step": 161820 }, { "epoch": 2.647958766260329, "grad_norm": 0.07080142199993134, "learning_rate": 4.136384941457222e-07, "loss": 0.0008, "step": 161830 }, { "epoch": 2.6481223922114046, "grad_norm": 0.044419772922992706, "learning_rate": 4.132594570187948e-07, "loss": 0.001, "step": 161840 }, { "epoch": 2.6482860181624805, "grad_norm": 0.015733499079942703, "learning_rate": 4.1288058615018777e-07, "loss": 0.0012, "step": 161850 }, { "epoch": 2.6484496441135565, "grad_norm": 0.03354400023818016, "learning_rate": 4.125018815536358e-07, "loss": 0.0007, "step": 161860 }, { "epoch": 2.648613270064632, "grad_norm": 0.16005954146385193, "learning_rate": 4.1212334324286453e-07, "loss": 0.0009, "step": 161870 }, { "epoch": 2.648776896015708, "grad_norm": 0.0617651529610157, "learning_rate": 4.1174497123159573e-07, "loss": 0.0004, "step": 161880 }, { "epoch": 2.648940521966784, "grad_norm": 0.14231514930725098, "learning_rate": 4.1136676553354407e-07, "loss": 0.0004, "step": 161890 }, { "epoch": 2.6491041479178596, "grad_norm": 0.03389750048518181, "learning_rate": 4.109887261624179e-07, "loss": 0.0011, "step": 161900 }, { "epoch": 2.6492677738689356, "grad_norm": 0.08259738236665726, "learning_rate": 4.1061085313192196e-07, "loss": 0.0008, "step": 161910 }, { "epoch": 2.6494313998200116, "grad_norm": 0.09009433537721634, "learning_rate": 4.102331464557513e-07, "loss": 0.0009, "step": 161920 }, { "epoch": 2.649595025771087, "grad_norm": 0.1344250589609146, "learning_rate": 4.098556061475989e-07, "loss": 0.0008, "step": 161930 }, { "epoch": 2.649758651722163, "grad_norm": 0.024069903418421745, "learning_rate": 4.094782322211477e-07, "loss": 0.0008, "step": 161940 }, { "epoch": 2.649922277673239, "grad_norm": 0.07617423683404922, "learning_rate": 4.091010246900784e-07, "loss": 0.0005, "step": 161950 }, { "epoch": 2.6500859036243147, "grad_norm": 0.11341037601232529, "learning_rate": 4.087239835680623e-07, "loss": 0.0007, "step": 161960 }, { "epoch": 2.6502495295753907, "grad_norm": 0.06256689876317978, "learning_rate": 4.0834710886876794e-07, "loss": 0.0005, "step": 161970 }, { "epoch": 2.6504131555264667, "grad_norm": 0.06798188388347626, "learning_rate": 4.079704006058538e-07, "loss": 0.0004, "step": 161980 }, { "epoch": 2.6505767814775423, "grad_norm": 0.009177806787192822, "learning_rate": 4.075938587929773e-07, "loss": 0.0006, "step": 161990 }, { "epoch": 2.6507404074286183, "grad_norm": 0.14366771280765533, "learning_rate": 4.0721748344378533e-07, "loss": 0.0012, "step": 162000 }, { "epoch": 2.6509040333796943, "grad_norm": 0.008799301460385323, "learning_rate": 4.068412745719208e-07, "loss": 0.0006, "step": 162010 }, { "epoch": 2.65106765933077, "grad_norm": 0.005362886004149914, "learning_rate": 4.064652321910212e-07, "loss": 0.0007, "step": 162020 }, { "epoch": 2.651231285281846, "grad_norm": 0.025467636063694954, "learning_rate": 4.0608935631471615e-07, "loss": 0.001, "step": 162030 }, { "epoch": 2.651394911232922, "grad_norm": 0.05472426488995552, "learning_rate": 4.057136469566314e-07, "loss": 0.0011, "step": 162040 }, { "epoch": 2.6515585371839974, "grad_norm": 0.14054687321186066, "learning_rate": 4.053381041303839e-07, "loss": 0.0007, "step": 162050 }, { "epoch": 2.6517221631350734, "grad_norm": 0.06658267229795456, "learning_rate": 4.049627278495871e-07, "loss": 0.0006, "step": 162060 }, { "epoch": 2.651885789086149, "grad_norm": 0.0328206904232502, "learning_rate": 4.0458751812784855e-07, "loss": 0.0009, "step": 162070 }, { "epoch": 2.652049415037225, "grad_norm": 0.0794779509305954, "learning_rate": 4.042124749787668e-07, "loss": 0.0018, "step": 162080 }, { "epoch": 2.652213040988301, "grad_norm": 0.028756428509950638, "learning_rate": 4.038375984159382e-07, "loss": 0.0007, "step": 162090 }, { "epoch": 2.6523766669393765, "grad_norm": 0.027016280218958855, "learning_rate": 4.034628884529501e-07, "loss": 0.001, "step": 162100 }, { "epoch": 2.6525402928904525, "grad_norm": 0.05401135981082916, "learning_rate": 4.030883451033846e-07, "loss": 0.0008, "step": 162110 }, { "epoch": 2.652703918841528, "grad_norm": 0.2249971628189087, "learning_rate": 4.027139683808173e-07, "loss": 0.001, "step": 162120 }, { "epoch": 2.652867544792604, "grad_norm": 0.10034111887216568, "learning_rate": 4.023397582988192e-07, "loss": 0.0008, "step": 162130 }, { "epoch": 2.65303117074368, "grad_norm": 0.02204626426100731, "learning_rate": 4.01965714870956e-07, "loss": 0.0009, "step": 162140 }, { "epoch": 2.6531947966947556, "grad_norm": 0.04960593208670616, "learning_rate": 4.0159183811078305e-07, "loss": 0.0008, "step": 162150 }, { "epoch": 2.6533584226458315, "grad_norm": 0.05953105911612511, "learning_rate": 4.0121812803185556e-07, "loss": 0.0007, "step": 162160 }, { "epoch": 2.6535220485969075, "grad_norm": 0.024794332683086395, "learning_rate": 4.008445846477166e-07, "loss": 0.0006, "step": 162170 }, { "epoch": 2.653685674547983, "grad_norm": 0.09195355325937271, "learning_rate": 4.004712079719092e-07, "loss": 0.0009, "step": 162180 }, { "epoch": 2.653849300499059, "grad_norm": 0.04879765585064888, "learning_rate": 4.000979980179642e-07, "loss": 0.0006, "step": 162190 }, { "epoch": 2.654012926450135, "grad_norm": 0.10736338794231415, "learning_rate": 3.9972495479941255e-07, "loss": 0.0003, "step": 162200 }, { "epoch": 2.6541765524012106, "grad_norm": 0.10588818043470383, "learning_rate": 3.9935207832977497e-07, "loss": 0.0007, "step": 162210 }, { "epoch": 2.6543401783522866, "grad_norm": 0.07515053451061249, "learning_rate": 3.989793686225657e-07, "loss": 0.0012, "step": 162220 }, { "epoch": 2.6545038043033626, "grad_norm": 0.04193764925003052, "learning_rate": 3.986068256912973e-07, "loss": 0.0005, "step": 162230 }, { "epoch": 2.654667430254438, "grad_norm": 0.028369365260004997, "learning_rate": 3.982344495494717e-07, "loss": 0.0006, "step": 162240 }, { "epoch": 2.654831056205514, "grad_norm": 0.05866362899541855, "learning_rate": 3.978622402105875e-07, "loss": 0.0006, "step": 162250 }, { "epoch": 2.65499468215659, "grad_norm": 0.03806088864803314, "learning_rate": 3.974901976881357e-07, "loss": 0.0007, "step": 162260 }, { "epoch": 2.6551583081076657, "grad_norm": 0.02487987093627453, "learning_rate": 3.971183219956032e-07, "loss": 0.0003, "step": 162270 }, { "epoch": 2.6553219340587417, "grad_norm": 0.08773603290319443, "learning_rate": 3.9674661314646814e-07, "loss": 0.0011, "step": 162280 }, { "epoch": 2.6554855600098177, "grad_norm": 0.20498532056808472, "learning_rate": 3.9637507115420417e-07, "loss": 0.0006, "step": 162290 }, { "epoch": 2.6556491859608933, "grad_norm": 0.11393287032842636, "learning_rate": 3.960036960322816e-07, "loss": 0.0011, "step": 162300 }, { "epoch": 2.6558128119119693, "grad_norm": 0.005545942112803459, "learning_rate": 3.956324877941575e-07, "loss": 0.0006, "step": 162310 }, { "epoch": 2.6559764378630453, "grad_norm": 0.05825549364089966, "learning_rate": 3.9526144645329054e-07, "loss": 0.0008, "step": 162320 }, { "epoch": 2.656140063814121, "grad_norm": 0.08447104692459106, "learning_rate": 3.948905720231283e-07, "loss": 0.001, "step": 162330 }, { "epoch": 2.656303689765197, "grad_norm": 0.003699325956404209, "learning_rate": 3.9451986451711446e-07, "loss": 0.0009, "step": 162340 }, { "epoch": 2.656467315716273, "grad_norm": 0.008734593167901039, "learning_rate": 3.941493239486871e-07, "loss": 0.0009, "step": 162350 }, { "epoch": 2.6566309416673484, "grad_norm": 0.056675247848033905, "learning_rate": 3.937789503312767e-07, "loss": 0.0004, "step": 162360 }, { "epoch": 2.6567945676184244, "grad_norm": 0.013299272395670414, "learning_rate": 3.9340874367830903e-07, "loss": 0.0003, "step": 162370 }, { "epoch": 2.6569581935695004, "grad_norm": 0.077885702252388, "learning_rate": 3.930387040032019e-07, "loss": 0.0005, "step": 162380 }, { "epoch": 2.657121819520576, "grad_norm": 0.10574847459793091, "learning_rate": 3.9266883131936994e-07, "loss": 0.0011, "step": 162390 }, { "epoch": 2.657285445471652, "grad_norm": 0.08272770792245865, "learning_rate": 3.922991256402198e-07, "loss": 0.0009, "step": 162400 }, { "epoch": 2.657449071422728, "grad_norm": 0.05639660730957985, "learning_rate": 3.9192958697915063e-07, "loss": 0.0007, "step": 162410 }, { "epoch": 2.6576126973738035, "grad_norm": 0.007139271590858698, "learning_rate": 3.915602153495601e-07, "loss": 0.0014, "step": 162420 }, { "epoch": 2.6577763233248795, "grad_norm": 0.1234506294131279, "learning_rate": 3.911910107648348e-07, "loss": 0.0007, "step": 162430 }, { "epoch": 2.657939949275955, "grad_norm": 0.09703536331653595, "learning_rate": 3.908219732383589e-07, "loss": 0.0007, "step": 162440 }, { "epoch": 2.658103575227031, "grad_norm": 0.08111412823200226, "learning_rate": 3.904531027835079e-07, "loss": 0.0006, "step": 162450 }, { "epoch": 2.658267201178107, "grad_norm": 0.0030443049035966396, "learning_rate": 3.900843994136538e-07, "loss": 0.0007, "step": 162460 }, { "epoch": 2.6584308271291825, "grad_norm": 0.14771559834480286, "learning_rate": 3.897158631421605e-07, "loss": 0.0011, "step": 162470 }, { "epoch": 2.6585944530802585, "grad_norm": 0.1352393627166748, "learning_rate": 3.893474939823866e-07, "loss": 0.0007, "step": 162480 }, { "epoch": 2.658758079031334, "grad_norm": 0.015554625540971756, "learning_rate": 3.8897929194768423e-07, "loss": 0.001, "step": 162490 }, { "epoch": 2.65892170498241, "grad_norm": 0.07386418431997299, "learning_rate": 3.886112570514011e-07, "loss": 0.0007, "step": 162500 }, { "epoch": 2.659085330933486, "grad_norm": 0.1800573319196701, "learning_rate": 3.8824338930687654e-07, "loss": 0.0006, "step": 162510 }, { "epoch": 2.6592489568845616, "grad_norm": 0.006648830138146877, "learning_rate": 3.878756887274443e-07, "loss": 0.0004, "step": 162520 }, { "epoch": 2.6594125828356376, "grad_norm": 0.03640538454055786, "learning_rate": 3.8750815532643425e-07, "loss": 0.0003, "step": 162530 }, { "epoch": 2.6595762087867136, "grad_norm": 0.19106590747833252, "learning_rate": 3.871407891171669e-07, "loss": 0.0008, "step": 162540 }, { "epoch": 2.659739834737789, "grad_norm": 0.016939766705036163, "learning_rate": 3.867735901129599e-07, "loss": 0.0003, "step": 162550 }, { "epoch": 2.659903460688865, "grad_norm": 0.004801356699317694, "learning_rate": 3.8640655832712204e-07, "loss": 0.0006, "step": 162560 }, { "epoch": 2.660067086639941, "grad_norm": 0.05608263984322548, "learning_rate": 3.8603969377295767e-07, "loss": 0.0007, "step": 162570 }, { "epoch": 2.6602307125910167, "grad_norm": 0.06587318331003189, "learning_rate": 3.856729964637662e-07, "loss": 0.0007, "step": 162580 }, { "epoch": 2.6603943385420927, "grad_norm": 0.04565051943063736, "learning_rate": 3.8530646641283755e-07, "loss": 0.0006, "step": 162590 }, { "epoch": 2.6605579644931687, "grad_norm": 0.11791886389255524, "learning_rate": 3.849401036334599e-07, "loss": 0.001, "step": 162600 }, { "epoch": 2.6607215904442443, "grad_norm": 0.5547646880149841, "learning_rate": 3.845739081389094e-07, "loss": 0.0004, "step": 162610 }, { "epoch": 2.6608852163953203, "grad_norm": 0.07615247368812561, "learning_rate": 3.8420787994246254e-07, "loss": 0.0006, "step": 162620 }, { "epoch": 2.6610488423463963, "grad_norm": 0.05546354502439499, "learning_rate": 3.838420190573866e-07, "loss": 0.0008, "step": 162630 }, { "epoch": 2.661212468297472, "grad_norm": 0.009736438281834126, "learning_rate": 3.83476325496942e-07, "loss": 0.0009, "step": 162640 }, { "epoch": 2.661376094248548, "grad_norm": 0.1731843650341034, "learning_rate": 3.8311079927438656e-07, "loss": 0.0009, "step": 162650 }, { "epoch": 2.661539720199624, "grad_norm": 0.1371946781873703, "learning_rate": 3.827454404029668e-07, "loss": 0.001, "step": 162660 }, { "epoch": 2.6617033461506994, "grad_norm": 0.0035603567957878113, "learning_rate": 3.8238024889592884e-07, "loss": 0.0005, "step": 162670 }, { "epoch": 2.6618669721017754, "grad_norm": 0.05154554545879364, "learning_rate": 3.820152247665071e-07, "loss": 0.0014, "step": 162680 }, { "epoch": 2.6620305980528514, "grad_norm": 0.004191881977021694, "learning_rate": 3.81650368027936e-07, "loss": 0.0008, "step": 162690 }, { "epoch": 2.662194224003927, "grad_norm": 0.022267114371061325, "learning_rate": 3.812856786934388e-07, "loss": 0.0007, "step": 162700 }, { "epoch": 2.662357849955003, "grad_norm": 0.07482533901929855, "learning_rate": 3.8092115677623445e-07, "loss": 0.0006, "step": 162710 }, { "epoch": 2.662521475906079, "grad_norm": 0.08196624368429184, "learning_rate": 3.8055680228953673e-07, "loss": 0.0011, "step": 162720 }, { "epoch": 2.6626851018571545, "grad_norm": 0.12013554573059082, "learning_rate": 3.8019261524655184e-07, "loss": 0.0005, "step": 162730 }, { "epoch": 2.6628487278082305, "grad_norm": 0.005785492714494467, "learning_rate": 3.798285956604825e-07, "loss": 0.001, "step": 162740 }, { "epoch": 2.6630123537593064, "grad_norm": 0.34245383739471436, "learning_rate": 3.794647435445209e-07, "loss": 0.0006, "step": 162750 }, { "epoch": 2.663175979710382, "grad_norm": 0.027923041954636574, "learning_rate": 3.791010589118582e-07, "loss": 0.001, "step": 162760 }, { "epoch": 2.663339605661458, "grad_norm": 0.08423155546188354, "learning_rate": 3.7873754177567555e-07, "loss": 0.0007, "step": 162770 }, { "epoch": 2.663503231612534, "grad_norm": 0.027789434418082237, "learning_rate": 3.783741921491496e-07, "loss": 0.0007, "step": 162780 }, { "epoch": 2.6636668575636095, "grad_norm": 0.003839026903733611, "learning_rate": 3.780110100454526e-07, "loss": 0.001, "step": 162790 }, { "epoch": 2.6638304835146855, "grad_norm": 0.06512545794248581, "learning_rate": 3.776479954777473e-07, "loss": 0.0007, "step": 162800 }, { "epoch": 2.6639941094657615, "grad_norm": 0.09297475963830948, "learning_rate": 3.772851484591933e-07, "loss": 0.0005, "step": 162810 }, { "epoch": 2.664157735416837, "grad_norm": 0.026656849309802055, "learning_rate": 3.7692246900294103e-07, "loss": 0.0006, "step": 162820 }, { "epoch": 2.664321361367913, "grad_norm": 0.11279799789190292, "learning_rate": 3.765599571221379e-07, "loss": 0.0009, "step": 162830 }, { "epoch": 2.6644849873189886, "grad_norm": 0.013835698366165161, "learning_rate": 3.761976128299255e-07, "loss": 0.0005, "step": 162840 }, { "epoch": 2.6646486132700646, "grad_norm": 0.16747301816940308, "learning_rate": 3.758354361394351e-07, "loss": 0.0013, "step": 162850 }, { "epoch": 2.6648122392211406, "grad_norm": 0.1294667273759842, "learning_rate": 3.7547342706379773e-07, "loss": 0.0005, "step": 162860 }, { "epoch": 2.664975865172216, "grad_norm": 0.021930944174528122, "learning_rate": 3.7511158561613246e-07, "loss": 0.0011, "step": 162870 }, { "epoch": 2.665139491123292, "grad_norm": 0.033442918211221695, "learning_rate": 3.7474991180955824e-07, "loss": 0.0006, "step": 162880 }, { "epoch": 2.6653031170743677, "grad_norm": 0.15374310314655304, "learning_rate": 3.743884056571817e-07, "loss": 0.0009, "step": 162890 }, { "epoch": 2.6654667430254437, "grad_norm": 0.1310236155986786, "learning_rate": 3.740270671721091e-07, "loss": 0.001, "step": 162900 }, { "epoch": 2.6656303689765197, "grad_norm": 0.0040460145100951195, "learning_rate": 3.736658963674372e-07, "loss": 0.0005, "step": 162910 }, { "epoch": 2.6657939949275953, "grad_norm": 0.017761170864105225, "learning_rate": 3.7330489325625606e-07, "loss": 0.0006, "step": 162920 }, { "epoch": 2.6659576208786713, "grad_norm": 0.12663203477859497, "learning_rate": 3.729440578516541e-07, "loss": 0.0022, "step": 162930 }, { "epoch": 2.6661212468297473, "grad_norm": 0.08255751430988312, "learning_rate": 3.725833901667081e-07, "loss": 0.0005, "step": 162940 }, { "epoch": 2.666284872780823, "grad_norm": 0.023657580837607384, "learning_rate": 3.722228902144931e-07, "loss": 0.0005, "step": 162950 }, { "epoch": 2.666448498731899, "grad_norm": 0.07764837145805359, "learning_rate": 3.7186255800807536e-07, "loss": 0.0007, "step": 162960 }, { "epoch": 2.666612124682975, "grad_norm": 0.07613088935613632, "learning_rate": 3.715023935605172e-07, "loss": 0.0009, "step": 162970 }, { "epoch": 2.6667757506340504, "grad_norm": 0.02817651443183422, "learning_rate": 3.7114239688487206e-07, "loss": 0.0005, "step": 162980 }, { "epoch": 2.6669393765851264, "grad_norm": 0.05277569219470024, "learning_rate": 3.7078256799419066e-07, "loss": 0.0006, "step": 162990 }, { "epoch": 2.6671030025362024, "grad_norm": 0.07029453665018082, "learning_rate": 3.704229069015153e-07, "loss": 0.0004, "step": 163000 }, { "epoch": 2.667266628487278, "grad_norm": 0.030672775581479073, "learning_rate": 3.7006341361988165e-07, "loss": 0.0002, "step": 163010 }, { "epoch": 2.667430254438354, "grad_norm": 0.00485110841691494, "learning_rate": 3.6970408816232205e-07, "loss": 0.0008, "step": 163020 }, { "epoch": 2.66759388038943, "grad_norm": 0.008853784762322903, "learning_rate": 3.693449305418606e-07, "loss": 0.0003, "step": 163030 }, { "epoch": 2.6677575063405055, "grad_norm": 0.0329727828502655, "learning_rate": 3.6898594077151627e-07, "loss": 0.0009, "step": 163040 }, { "epoch": 2.6679211322915815, "grad_norm": 0.027727501466870308, "learning_rate": 3.6862711886430036e-07, "loss": 0.0004, "step": 163050 }, { "epoch": 2.6680847582426575, "grad_norm": 0.05496814101934433, "learning_rate": 3.682684648332202e-07, "loss": 0.0007, "step": 163060 }, { "epoch": 2.668248384193733, "grad_norm": 0.012437095865607262, "learning_rate": 3.679099786912771e-07, "loss": 0.0004, "step": 163070 }, { "epoch": 2.668412010144809, "grad_norm": 0.028985943645238876, "learning_rate": 3.675516604514634e-07, "loss": 0.0008, "step": 163080 }, { "epoch": 2.668575636095885, "grad_norm": 0.03409262374043465, "learning_rate": 3.671935101267687e-07, "loss": 0.0006, "step": 163090 }, { "epoch": 2.6687392620469605, "grad_norm": 0.025195077061653137, "learning_rate": 3.6683552773017495e-07, "loss": 0.0003, "step": 163100 }, { "epoch": 2.6689028879980365, "grad_norm": 0.05438625067472458, "learning_rate": 3.664777132746572e-07, "loss": 0.0008, "step": 163110 }, { "epoch": 2.6690665139491125, "grad_norm": 0.044088296592235565, "learning_rate": 3.6612006677318626e-07, "loss": 0.0006, "step": 163120 }, { "epoch": 2.669230139900188, "grad_norm": 0.07317398488521576, "learning_rate": 3.657625882387256e-07, "loss": 0.0009, "step": 163130 }, { "epoch": 2.669393765851264, "grad_norm": 0.05124928057193756, "learning_rate": 3.6540527768423317e-07, "loss": 0.0004, "step": 163140 }, { "epoch": 2.66955739180234, "grad_norm": 0.11896055191755295, "learning_rate": 3.650481351226598e-07, "loss": 0.0008, "step": 163150 }, { "epoch": 2.6697210177534156, "grad_norm": 0.06174270436167717, "learning_rate": 3.6469116056695287e-07, "loss": 0.0005, "step": 163160 }, { "epoch": 2.6698846437044916, "grad_norm": 0.02606838010251522, "learning_rate": 3.6433435403004923e-07, "loss": 0.001, "step": 163170 }, { "epoch": 2.6700482696555676, "grad_norm": 0.020135827362537384, "learning_rate": 3.639777155248853e-07, "loss": 0.0005, "step": 163180 }, { "epoch": 2.670211895606643, "grad_norm": 0.03413727879524231, "learning_rate": 3.6362124506438555e-07, "loss": 0.0011, "step": 163190 }, { "epoch": 2.670375521557719, "grad_norm": 0.155034601688385, "learning_rate": 3.6326494266147314e-07, "loss": 0.0008, "step": 163200 }, { "epoch": 2.6705391475087947, "grad_norm": 0.0019010301912203431, "learning_rate": 3.6290880832906275e-07, "loss": 0.0006, "step": 163210 }, { "epoch": 2.6707027734598707, "grad_norm": 0.1596842110157013, "learning_rate": 3.6255284208006226e-07, "loss": 0.0007, "step": 163220 }, { "epoch": 2.6708663994109467, "grad_norm": 0.05480702966451645, "learning_rate": 3.6219704392737584e-07, "loss": 0.0008, "step": 163230 }, { "epoch": 2.6710300253620223, "grad_norm": 0.08250179886817932, "learning_rate": 3.618414138838994e-07, "loss": 0.0004, "step": 163240 }, { "epoch": 2.6711936513130983, "grad_norm": 0.07675474882125854, "learning_rate": 3.614859519625252e-07, "loss": 0.0008, "step": 163250 }, { "epoch": 2.671357277264174, "grad_norm": 0.01846032775938511, "learning_rate": 3.611306581761359e-07, "loss": 0.0004, "step": 163260 }, { "epoch": 2.67152090321525, "grad_norm": 0.10128302127122879, "learning_rate": 3.6077553253761056e-07, "loss": 0.0017, "step": 163270 }, { "epoch": 2.671684529166326, "grad_norm": 0.051420170813798904, "learning_rate": 3.604205750598233e-07, "loss": 0.0008, "step": 163280 }, { "epoch": 2.6718481551174014, "grad_norm": 0.023811087012290955, "learning_rate": 3.6006578575563833e-07, "loss": 0.0004, "step": 163290 }, { "epoch": 2.6720117810684774, "grad_norm": 0.10541754961013794, "learning_rate": 3.5971116463791864e-07, "loss": 0.0005, "step": 163300 }, { "epoch": 2.6721754070195534, "grad_norm": 0.7095283269882202, "learning_rate": 3.5935671171951514e-07, "loss": 0.0006, "step": 163310 }, { "epoch": 2.672339032970629, "grad_norm": 0.1180049404501915, "learning_rate": 3.59002427013278e-07, "loss": 0.0005, "step": 163320 }, { "epoch": 2.672502658921705, "grad_norm": 0.0037099390756338835, "learning_rate": 3.586483105320476e-07, "loss": 0.0009, "step": 163330 }, { "epoch": 2.672666284872781, "grad_norm": 0.05947168916463852, "learning_rate": 3.582943622886609e-07, "loss": 0.0007, "step": 163340 }, { "epoch": 2.6728299108238565, "grad_norm": 0.008914670906960964, "learning_rate": 3.579405822959481e-07, "loss": 0.0005, "step": 163350 }, { "epoch": 2.6729935367749325, "grad_norm": 0.03897206112742424, "learning_rate": 3.5758697056673173e-07, "loss": 0.0005, "step": 163360 }, { "epoch": 2.6731571627260085, "grad_norm": 0.16469772160053253, "learning_rate": 3.5723352711383055e-07, "loss": 0.0007, "step": 163370 }, { "epoch": 2.673320788677084, "grad_norm": 0.021919772028923035, "learning_rate": 3.568802519500547e-07, "loss": 0.0007, "step": 163380 }, { "epoch": 2.67348441462816, "grad_norm": 0.11993957310914993, "learning_rate": 3.5652714508821075e-07, "loss": 0.0008, "step": 163390 }, { "epoch": 2.673648040579236, "grad_norm": 0.02776331827044487, "learning_rate": 3.561742065410978e-07, "loss": 0.0003, "step": 163400 }, { "epoch": 2.6738116665303115, "grad_norm": 0.09428710490465164, "learning_rate": 3.558214363215079e-07, "loss": 0.0013, "step": 163410 }, { "epoch": 2.6739752924813875, "grad_norm": 0.023654617369174957, "learning_rate": 3.554688344422297e-07, "loss": 0.0008, "step": 163420 }, { "epoch": 2.6741389184324635, "grad_norm": 0.0558219775557518, "learning_rate": 3.5511640091604293e-07, "loss": 0.002, "step": 163430 }, { "epoch": 2.674302544383539, "grad_norm": 0.15546190738677979, "learning_rate": 3.5476413575572354e-07, "loss": 0.0005, "step": 163440 }, { "epoch": 2.674466170334615, "grad_norm": 0.148259237408638, "learning_rate": 3.5441203897403854e-07, "loss": 0.001, "step": 163450 }, { "epoch": 2.674629796285691, "grad_norm": 0.052620917558670044, "learning_rate": 3.540601105837532e-07, "loss": 0.0005, "step": 163460 }, { "epoch": 2.6747934222367666, "grad_norm": 0.056272465735673904, "learning_rate": 3.537083505976213e-07, "loss": 0.0005, "step": 163470 }, { "epoch": 2.6749570481878426, "grad_norm": 0.04841306805610657, "learning_rate": 3.5335675902839596e-07, "loss": 0.0008, "step": 163480 }, { "epoch": 2.6751206741389186, "grad_norm": 0.037078436464071274, "learning_rate": 3.530053358888186e-07, "loss": 0.0006, "step": 163490 }, { "epoch": 2.675284300089994, "grad_norm": 0.08532621711492538, "learning_rate": 3.526540811916307e-07, "loss": 0.0004, "step": 163500 }, { "epoch": 2.67544792604107, "grad_norm": 0.00392522756010294, "learning_rate": 3.523029949495621e-07, "loss": 0.0004, "step": 163510 }, { "epoch": 2.675611551992146, "grad_norm": 0.2651732861995697, "learning_rate": 3.519520771753387e-07, "loss": 0.0018, "step": 163520 }, { "epoch": 2.6757751779432217, "grad_norm": 0.041324179619550705, "learning_rate": 3.516013278816827e-07, "loss": 0.0004, "step": 163530 }, { "epoch": 2.6759388038942977, "grad_norm": 0.06897354871034622, "learning_rate": 3.512507470813048e-07, "loss": 0.0003, "step": 163540 }, { "epoch": 2.6761024298453737, "grad_norm": 0.03266638517379761, "learning_rate": 3.50900334786915e-07, "loss": 0.0014, "step": 163550 }, { "epoch": 2.6762660557964493, "grad_norm": 0.04949835315346718, "learning_rate": 3.5055009101121475e-07, "loss": 0.0006, "step": 163560 }, { "epoch": 2.6764296817475253, "grad_norm": 0.0038154867943376303, "learning_rate": 3.5020001576689834e-07, "loss": 0.0008, "step": 163570 }, { "epoch": 2.6765933076986013, "grad_norm": 0.3559225797653198, "learning_rate": 3.498501090666567e-07, "loss": 0.001, "step": 163580 }, { "epoch": 2.676756933649677, "grad_norm": 0.0827450081706047, "learning_rate": 3.495003709231709e-07, "loss": 0.0007, "step": 163590 }, { "epoch": 2.676920559600753, "grad_norm": 0.005671032704412937, "learning_rate": 3.491508013491218e-07, "loss": 0.0012, "step": 163600 }, { "epoch": 2.6770841855518284, "grad_norm": 0.030812224373221397, "learning_rate": 3.48801400357176e-07, "loss": 0.001, "step": 163610 }, { "epoch": 2.6772478115029044, "grad_norm": 0.10496310889720917, "learning_rate": 3.4845216796000115e-07, "loss": 0.0008, "step": 163620 }, { "epoch": 2.6774114374539804, "grad_norm": 0.06562676280736923, "learning_rate": 3.4810310417025594e-07, "loss": 0.0007, "step": 163630 }, { "epoch": 2.677575063405056, "grad_norm": 0.028749961405992508, "learning_rate": 3.4775420900059197e-07, "loss": 0.0009, "step": 163640 }, { "epoch": 2.677738689356132, "grad_norm": 0.060133472084999084, "learning_rate": 3.4740548246365745e-07, "loss": 0.0011, "step": 163650 }, { "epoch": 2.6779023153072075, "grad_norm": 0.07799925655126572, "learning_rate": 3.470569245720906e-07, "loss": 0.0004, "step": 163660 }, { "epoch": 2.6780659412582835, "grad_norm": 0.0006128168315626681, "learning_rate": 3.4670853533852857e-07, "loss": 0.0007, "step": 163670 }, { "epoch": 2.6782295672093595, "grad_norm": 0.07360197603702545, "learning_rate": 3.4636031477559727e-07, "loss": 0.0005, "step": 163680 }, { "epoch": 2.678393193160435, "grad_norm": 0.05974465608596802, "learning_rate": 3.460122628959206e-07, "loss": 0.0005, "step": 163690 }, { "epoch": 2.678556819111511, "grad_norm": 0.04297952726483345, "learning_rate": 3.45664379712114e-07, "loss": 0.0006, "step": 163700 }, { "epoch": 2.678720445062587, "grad_norm": 0.05019129812717438, "learning_rate": 3.453166652367862e-07, "loss": 0.0008, "step": 163710 }, { "epoch": 2.6788840710136625, "grad_norm": 0.09492860734462738, "learning_rate": 3.4496911948254274e-07, "loss": 0.0014, "step": 163720 }, { "epoch": 2.6790476969647385, "grad_norm": 0.05131504684686661, "learning_rate": 3.4462174246198023e-07, "loss": 0.0007, "step": 163730 }, { "epoch": 2.6792113229158145, "grad_norm": 0.1537952721118927, "learning_rate": 3.442745341876913e-07, "loss": 0.0005, "step": 163740 }, { "epoch": 2.67937494886689, "grad_norm": 0.09487973153591156, "learning_rate": 3.439274946722598e-07, "loss": 0.0006, "step": 163750 }, { "epoch": 2.679538574817966, "grad_norm": 0.032783087342977524, "learning_rate": 3.4358062392826684e-07, "loss": 0.0006, "step": 163760 }, { "epoch": 2.679702200769042, "grad_norm": 0.09356853365898132, "learning_rate": 3.432339219682845e-07, "loss": 0.0009, "step": 163770 }, { "epoch": 2.6798658267201176, "grad_norm": 0.07034482806921005, "learning_rate": 3.428873888048795e-07, "loss": 0.0005, "step": 163780 }, { "epoch": 2.6800294526711936, "grad_norm": 0.0939268171787262, "learning_rate": 3.4254102445061497e-07, "loss": 0.0007, "step": 163790 }, { "epoch": 2.6801930786222696, "grad_norm": 0.09289959073066711, "learning_rate": 3.4219482891804434e-07, "loss": 0.0004, "step": 163800 }, { "epoch": 2.680356704573345, "grad_norm": 0.06619736552238464, "learning_rate": 3.418488022197164e-07, "loss": 0.0014, "step": 163810 }, { "epoch": 2.680520330524421, "grad_norm": 0.014307755045592785, "learning_rate": 3.415029443681728e-07, "loss": 0.0003, "step": 163820 }, { "epoch": 2.680683956475497, "grad_norm": 0.04305620864033699, "learning_rate": 3.4115725537595125e-07, "loss": 0.0008, "step": 163830 }, { "epoch": 2.6808475824265727, "grad_norm": 0.07943262904882431, "learning_rate": 3.4081173525558233e-07, "loss": 0.0007, "step": 163840 }, { "epoch": 2.6810112083776487, "grad_norm": 0.04751698672771454, "learning_rate": 3.4046638401958985e-07, "loss": 0.0003, "step": 163850 }, { "epoch": 2.6811748343287247, "grad_norm": 0.04202309623360634, "learning_rate": 3.401212016804922e-07, "loss": 0.0006, "step": 163860 }, { "epoch": 2.6813384602798003, "grad_norm": 0.1558055579662323, "learning_rate": 3.3977618825080103e-07, "loss": 0.0008, "step": 163870 }, { "epoch": 2.6815020862308763, "grad_norm": 0.045074786990880966, "learning_rate": 3.39431343743023e-07, "loss": 0.0007, "step": 163880 }, { "epoch": 2.6816657121819523, "grad_norm": 0.09941434115171432, "learning_rate": 3.390866681696564e-07, "loss": 0.0006, "step": 163890 }, { "epoch": 2.681829338133028, "grad_norm": 0.03489271551370621, "learning_rate": 3.3874216154319686e-07, "loss": 0.0007, "step": 163900 }, { "epoch": 2.681992964084104, "grad_norm": 0.0354074165225029, "learning_rate": 3.3839782387613097e-07, "loss": 0.001, "step": 163910 }, { "epoch": 2.68215659003518, "grad_norm": 0.035373836755752563, "learning_rate": 3.380536551809394e-07, "loss": 0.0007, "step": 163920 }, { "epoch": 2.6823202159862554, "grad_norm": 0.056779470294713974, "learning_rate": 3.377096554700987e-07, "loss": 0.0008, "step": 163930 }, { "epoch": 2.6824838419373314, "grad_norm": 0.06307129561901093, "learning_rate": 3.373658247560768e-07, "loss": 0.0006, "step": 163940 }, { "epoch": 2.6826474678884074, "grad_norm": 0.053183939307928085, "learning_rate": 3.370221630513382e-07, "loss": 0.0004, "step": 163950 }, { "epoch": 2.682811093839483, "grad_norm": 0.031169995665550232, "learning_rate": 3.366786703683383e-07, "loss": 0.0005, "step": 163960 }, { "epoch": 2.682974719790559, "grad_norm": 0.21075117588043213, "learning_rate": 3.3633534671952904e-07, "loss": 0.001, "step": 163970 }, { "epoch": 2.683138345741635, "grad_norm": 0.0974084883928299, "learning_rate": 3.3599219211735423e-07, "loss": 0.0006, "step": 163980 }, { "epoch": 2.6833019716927105, "grad_norm": 0.05762771889567375, "learning_rate": 3.356492065742528e-07, "loss": 0.0006, "step": 163990 }, { "epoch": 2.6834655976437864, "grad_norm": 0.04072760418057442, "learning_rate": 3.353063901026576e-07, "loss": 0.0007, "step": 164000 }, { "epoch": 2.683629223594862, "grad_norm": 0.0807693675160408, "learning_rate": 3.3496374271499486e-07, "loss": 0.0004, "step": 164010 }, { "epoch": 2.683792849545938, "grad_norm": 0.009045381098985672, "learning_rate": 3.34621264423684e-07, "loss": 0.0003, "step": 164020 }, { "epoch": 2.6839564754970135, "grad_norm": 0.14643540978431702, "learning_rate": 3.342789552411385e-07, "loss": 0.0012, "step": 164030 }, { "epoch": 2.6841201014480895, "grad_norm": 0.005377517081797123, "learning_rate": 3.3393681517976784e-07, "loss": 0.0008, "step": 164040 }, { "epoch": 2.6842837273991655, "grad_norm": 0.08317583054304123, "learning_rate": 3.335948442519732e-07, "loss": 0.0005, "step": 164050 }, { "epoch": 2.684447353350241, "grad_norm": 0.11639754474163055, "learning_rate": 3.332530424701491e-07, "loss": 0.0015, "step": 164060 }, { "epoch": 2.684610979301317, "grad_norm": 0.07140519469976425, "learning_rate": 3.3291140984668737e-07, "loss": 0.0007, "step": 164070 }, { "epoch": 2.684774605252393, "grad_norm": 0.021656684577465057, "learning_rate": 3.3256994639396856e-07, "loss": 0.0006, "step": 164080 }, { "epoch": 2.6849382312034686, "grad_norm": 0.04261285439133644, "learning_rate": 3.3222865212437227e-07, "loss": 0.0006, "step": 164090 }, { "epoch": 2.6851018571545446, "grad_norm": 0.15076954662799835, "learning_rate": 3.3188752705026804e-07, "loss": 0.0004, "step": 164100 }, { "epoch": 2.6852654831056206, "grad_norm": 0.012875348329544067, "learning_rate": 3.3154657118402265e-07, "loss": 0.0009, "step": 164110 }, { "epoch": 2.685429109056696, "grad_norm": 0.07233261317014694, "learning_rate": 3.312057845379929e-07, "loss": 0.0006, "step": 164120 }, { "epoch": 2.685592735007772, "grad_norm": 0.001244951388798654, "learning_rate": 3.308651671245322e-07, "loss": 0.0013, "step": 164130 }, { "epoch": 2.685756360958848, "grad_norm": 0.03029131516814232, "learning_rate": 3.3052471895598736e-07, "loss": 0.001, "step": 164140 }, { "epoch": 2.6859199869099237, "grad_norm": 0.062144286930561066, "learning_rate": 3.3018444004469853e-07, "loss": 0.0007, "step": 164150 }, { "epoch": 2.6860836128609997, "grad_norm": 0.036022696644067764, "learning_rate": 3.2984433040300026e-07, "loss": 0.0008, "step": 164160 }, { "epoch": 2.6862472388120757, "grad_norm": 0.15065248310565948, "learning_rate": 3.2950439004322046e-07, "loss": 0.0004, "step": 164170 }, { "epoch": 2.6864108647631513, "grad_norm": 0.16965873539447784, "learning_rate": 3.2916461897768147e-07, "loss": 0.0009, "step": 164180 }, { "epoch": 2.6865744907142273, "grad_norm": 0.08845871686935425, "learning_rate": 3.2882501721869855e-07, "loss": 0.0011, "step": 164190 }, { "epoch": 2.6867381166653033, "grad_norm": 0.03509719297289848, "learning_rate": 3.2848558477858286e-07, "loss": 0.0017, "step": 164200 }, { "epoch": 2.686901742616379, "grad_norm": 0.09602032601833344, "learning_rate": 3.281463216696368e-07, "loss": 0.0009, "step": 164210 }, { "epoch": 2.687065368567455, "grad_norm": 0.06660565733909607, "learning_rate": 3.278072279041572e-07, "loss": 0.0004, "step": 164220 }, { "epoch": 2.687228994518531, "grad_norm": 0.016373775899410248, "learning_rate": 3.2746830349443705e-07, "loss": 0.0006, "step": 164230 }, { "epoch": 2.6873926204696064, "grad_norm": 0.0728883147239685, "learning_rate": 3.2712954845276033e-07, "loss": 0.0005, "step": 164240 }, { "epoch": 2.6875562464206824, "grad_norm": 0.01966550201177597, "learning_rate": 3.267909627914068e-07, "loss": 0.0008, "step": 164250 }, { "epoch": 2.6877198723717584, "grad_norm": 0.03364823758602142, "learning_rate": 3.264525465226487e-07, "loss": 0.0004, "step": 164260 }, { "epoch": 2.687883498322834, "grad_norm": 0.0681309923529625, "learning_rate": 3.2611429965875296e-07, "loss": 0.0007, "step": 164270 }, { "epoch": 2.68804712427391, "grad_norm": 0.12694673240184784, "learning_rate": 3.257762222119815e-07, "loss": 0.0011, "step": 164280 }, { "epoch": 2.688210750224986, "grad_norm": 0.02490386739373207, "learning_rate": 3.254383141945866e-07, "loss": 0.0004, "step": 164290 }, { "epoch": 2.6883743761760615, "grad_norm": 0.020304597914218903, "learning_rate": 3.251005756188191e-07, "loss": 0.0011, "step": 164300 }, { "epoch": 2.6885380021271374, "grad_norm": 0.012983457185328007, "learning_rate": 3.247630064969193e-07, "loss": 0.0005, "step": 164310 }, { "epoch": 2.6887016280782134, "grad_norm": 0.0048445831052958965, "learning_rate": 3.244256068411239e-07, "loss": 0.0004, "step": 164320 }, { "epoch": 2.688865254029289, "grad_norm": 0.011834460310637951, "learning_rate": 3.2408837666366267e-07, "loss": 0.0013, "step": 164330 }, { "epoch": 2.689028879980365, "grad_norm": 0.026330221444368362, "learning_rate": 3.2375131597675967e-07, "loss": 0.0005, "step": 164340 }, { "epoch": 2.689192505931441, "grad_norm": 0.09093364328145981, "learning_rate": 3.2341442479263243e-07, "loss": 0.0006, "step": 164350 }, { "epoch": 2.6893561318825165, "grad_norm": 0.05825169384479523, "learning_rate": 3.230777031234922e-07, "loss": 0.0004, "step": 164360 }, { "epoch": 2.6895197578335925, "grad_norm": 0.035214781761169434, "learning_rate": 3.227411509815448e-07, "loss": 0.0005, "step": 164370 }, { "epoch": 2.689683383784668, "grad_norm": 0.04588169604539871, "learning_rate": 3.224047683789888e-07, "loss": 0.0003, "step": 164380 }, { "epoch": 2.689847009735744, "grad_norm": 0.04934491962194443, "learning_rate": 3.220685553280184e-07, "loss": 0.0004, "step": 164390 }, { "epoch": 2.69001063568682, "grad_norm": 0.10355619341135025, "learning_rate": 3.2173251184081876e-07, "loss": 0.0004, "step": 164400 }, { "epoch": 2.6901742616378956, "grad_norm": 0.05426101014018059, "learning_rate": 3.213966379295724e-07, "loss": 0.0008, "step": 164410 }, { "epoch": 2.6903378875889716, "grad_norm": 0.0355713777244091, "learning_rate": 3.210609336064535e-07, "loss": 0.0007, "step": 164420 }, { "epoch": 2.690501513540047, "grad_norm": 0.0870102271437645, "learning_rate": 3.20725398883629e-07, "loss": 0.0004, "step": 164430 }, { "epoch": 2.690665139491123, "grad_norm": 0.1024090051651001, "learning_rate": 3.203900337732635e-07, "loss": 0.0007, "step": 164440 }, { "epoch": 2.690828765442199, "grad_norm": 0.020521871745586395, "learning_rate": 3.200548382875113e-07, "loss": 0.0006, "step": 164450 }, { "epoch": 2.6909923913932747, "grad_norm": 0.14649441838264465, "learning_rate": 3.197198124385242e-07, "loss": 0.0007, "step": 164460 }, { "epoch": 2.6911560173443507, "grad_norm": 0.059924088418483734, "learning_rate": 3.1938495623844435e-07, "loss": 0.0004, "step": 164470 }, { "epoch": 2.6913196432954267, "grad_norm": 0.026155566796660423, "learning_rate": 3.190502696994102e-07, "loss": 0.0005, "step": 164480 }, { "epoch": 2.6914832692465023, "grad_norm": 0.12354004383087158, "learning_rate": 3.1871575283355383e-07, "loss": 0.0015, "step": 164490 }, { "epoch": 2.6916468951975783, "grad_norm": 0.11201626062393188, "learning_rate": 3.183814056529999e-07, "loss": 0.0011, "step": 164500 }, { "epoch": 2.6918105211486543, "grad_norm": 0.06511817127466202, "learning_rate": 3.180472281698693e-07, "loss": 0.0005, "step": 164510 }, { "epoch": 2.69197414709973, "grad_norm": 0.06703072041273117, "learning_rate": 3.177132203962724e-07, "loss": 0.0003, "step": 164520 }, { "epoch": 2.692137773050806, "grad_norm": 0.06826429814100266, "learning_rate": 3.173793823443189e-07, "loss": 0.0006, "step": 164530 }, { "epoch": 2.692301399001882, "grad_norm": 0.09261713176965714, "learning_rate": 3.170457140261074e-07, "loss": 0.0004, "step": 164540 }, { "epoch": 2.6924650249529574, "grad_norm": 0.10847348719835281, "learning_rate": 3.1671221545373334e-07, "loss": 0.0003, "step": 164550 }, { "epoch": 2.6926286509040334, "grad_norm": 0.3239886462688446, "learning_rate": 3.163788866392864e-07, "loss": 0.0008, "step": 164560 }, { "epoch": 2.6927922768551094, "grad_norm": 0.025863589718937874, "learning_rate": 3.160457275948475e-07, "loss": 0.0005, "step": 164570 }, { "epoch": 2.692955902806185, "grad_norm": 0.02969971112906933, "learning_rate": 3.157127383324937e-07, "loss": 0.0005, "step": 164580 }, { "epoch": 2.693119528757261, "grad_norm": 0.11970417946577072, "learning_rate": 3.153799188642942e-07, "loss": 0.0007, "step": 164590 }, { "epoch": 2.693283154708337, "grad_norm": 0.09122994542121887, "learning_rate": 3.1504726920231435e-07, "loss": 0.0011, "step": 164600 }, { "epoch": 2.6934467806594125, "grad_norm": 0.0669901967048645, "learning_rate": 3.1471478935861056e-07, "loss": 0.0006, "step": 164610 }, { "epoch": 2.6936104066104885, "grad_norm": 0.0223726574331522, "learning_rate": 3.1438247934523494e-07, "loss": 0.0009, "step": 164620 }, { "epoch": 2.6937740325615644, "grad_norm": 0.0420125387609005, "learning_rate": 3.1405033917423277e-07, "loss": 0.0005, "step": 164630 }, { "epoch": 2.69393765851264, "grad_norm": 0.10832918435335159, "learning_rate": 3.137183688576428e-07, "loss": 0.0005, "step": 164640 }, { "epoch": 2.694101284463716, "grad_norm": 0.08849113434553146, "learning_rate": 3.1338656840749995e-07, "loss": 0.001, "step": 164650 }, { "epoch": 2.694264910414792, "grad_norm": 0.14301711320877075, "learning_rate": 3.1305493783582885e-07, "loss": 0.0007, "step": 164660 }, { "epoch": 2.6944285363658675, "grad_norm": 0.008534056134521961, "learning_rate": 3.127234771546528e-07, "loss": 0.0004, "step": 164670 }, { "epoch": 2.6945921623169435, "grad_norm": 0.027357056736946106, "learning_rate": 3.123921863759838e-07, "loss": 0.0006, "step": 164680 }, { "epoch": 2.6947557882680195, "grad_norm": 0.04958884045481682, "learning_rate": 3.120610655118328e-07, "loss": 0.0004, "step": 164690 }, { "epoch": 2.694919414219095, "grad_norm": 0.1362619400024414, "learning_rate": 3.1173011457420024e-07, "loss": 0.0009, "step": 164700 }, { "epoch": 2.695083040170171, "grad_norm": 0.003076396184042096, "learning_rate": 3.1139933357508313e-07, "loss": 0.0005, "step": 164710 }, { "epoch": 2.695246666121247, "grad_norm": 0.012932802550494671, "learning_rate": 3.1106872252647244e-07, "loss": 0.0003, "step": 164720 }, { "epoch": 2.6954102920723226, "grad_norm": 0.03946397826075554, "learning_rate": 3.1073828144034977e-07, "loss": 0.0005, "step": 164730 }, { "epoch": 2.6955739180233986, "grad_norm": 0.11020883917808533, "learning_rate": 3.104080103286944e-07, "loss": 0.0006, "step": 164740 }, { "epoch": 2.6957375439744746, "grad_norm": 0.11737523972988129, "learning_rate": 3.1007790920347724e-07, "loss": 0.0006, "step": 164750 }, { "epoch": 2.69590116992555, "grad_norm": 0.008979442529380322, "learning_rate": 3.097479780766638e-07, "loss": 0.0008, "step": 164760 }, { "epoch": 2.696064795876626, "grad_norm": 0.1808817833662033, "learning_rate": 3.0941821696021446e-07, "loss": 0.0011, "step": 164770 }, { "epoch": 2.6962284218277017, "grad_norm": 0.1960621476173401, "learning_rate": 3.0908862586608024e-07, "loss": 0.0007, "step": 164780 }, { "epoch": 2.6963920477787777, "grad_norm": 0.0022592798341065645, "learning_rate": 3.087592048062099e-07, "loss": 0.0007, "step": 164790 }, { "epoch": 2.6965556737298537, "grad_norm": 0.03683222457766533, "learning_rate": 3.0842995379254226e-07, "loss": 0.0005, "step": 164800 }, { "epoch": 2.6967192996809293, "grad_norm": 0.015587952919304371, "learning_rate": 3.0810087283701493e-07, "loss": 0.0012, "step": 164810 }, { "epoch": 2.6968829256320053, "grad_norm": 0.015529097989201546, "learning_rate": 3.0777196195155234e-07, "loss": 0.0005, "step": 164820 }, { "epoch": 2.697046551583081, "grad_norm": 0.06242026016116142, "learning_rate": 3.0744322114807877e-07, "loss": 0.0011, "step": 164830 }, { "epoch": 2.697210177534157, "grad_norm": 0.20920215547084808, "learning_rate": 3.0711465043851084e-07, "loss": 0.0008, "step": 164840 }, { "epoch": 2.697373803485233, "grad_norm": 0.0022285813465714455, "learning_rate": 3.0678624983475733e-07, "loss": 0.0003, "step": 164850 }, { "epoch": 2.6975374294363084, "grad_norm": 0.1550416499376297, "learning_rate": 3.0645801934872264e-07, "loss": 0.0009, "step": 164860 }, { "epoch": 2.6977010553873844, "grad_norm": 0.039119429886341095, "learning_rate": 3.061299589923039e-07, "loss": 0.0006, "step": 164870 }, { "epoch": 2.6978646813384604, "grad_norm": 0.038074083626270294, "learning_rate": 3.0580206877739327e-07, "loss": 0.0004, "step": 164880 }, { "epoch": 2.698028307289536, "grad_norm": 0.18133632838726044, "learning_rate": 3.0547434871587455e-07, "loss": 0.0013, "step": 164890 }, { "epoch": 2.698191933240612, "grad_norm": 0.08124921470880508, "learning_rate": 3.051467988196283e-07, "loss": 0.0006, "step": 164900 }, { "epoch": 2.698355559191688, "grad_norm": 0.04892897978425026, "learning_rate": 3.0481941910052715e-07, "loss": 0.0005, "step": 164910 }, { "epoch": 2.6985191851427635, "grad_norm": 0.009849685244262218, "learning_rate": 3.044922095704367e-07, "loss": 0.0005, "step": 164920 }, { "epoch": 2.6986828110938395, "grad_norm": 0.15147916972637177, "learning_rate": 3.0416517024121905e-07, "loss": 0.0015, "step": 164930 }, { "epoch": 2.6988464370449154, "grad_norm": 0.025554845109581947, "learning_rate": 3.03838301124727e-07, "loss": 0.0008, "step": 164940 }, { "epoch": 2.699010062995991, "grad_norm": 0.10244035720825195, "learning_rate": 3.035116022328105e-07, "loss": 0.0008, "step": 164950 }, { "epoch": 2.699173688947067, "grad_norm": 0.08899954706430435, "learning_rate": 3.031850735773101e-07, "loss": 0.0006, "step": 164960 }, { "epoch": 2.699337314898143, "grad_norm": 0.08565444499254227, "learning_rate": 3.028587151700624e-07, "loss": 0.0009, "step": 164970 }, { "epoch": 2.6995009408492185, "grad_norm": 0.06779224425554276, "learning_rate": 3.0253252702289684e-07, "loss": 0.0003, "step": 164980 }, { "epoch": 2.6996645668002945, "grad_norm": 0.13577570021152496, "learning_rate": 3.022065091476367e-07, "loss": 0.0007, "step": 164990 }, { "epoch": 2.6998281927513705, "grad_norm": 0.05621223524212837, "learning_rate": 3.0188066155610097e-07, "loss": 0.0006, "step": 165000 }, { "epoch": 2.699991818702446, "grad_norm": 0.19461823999881744, "learning_rate": 3.01554984260099e-07, "loss": 0.0006, "step": 165010 }, { "epoch": 2.700155444653522, "grad_norm": 0.02049155905842781, "learning_rate": 3.012294772714369e-07, "loss": 0.0006, "step": 165020 }, { "epoch": 2.700319070604598, "grad_norm": 0.11019431054592133, "learning_rate": 3.0090414060191255e-07, "loss": 0.0009, "step": 165030 }, { "epoch": 2.7004826965556736, "grad_norm": 0.14438244700431824, "learning_rate": 3.005789742633186e-07, "loss": 0.0006, "step": 165040 }, { "epoch": 2.7006463225067496, "grad_norm": 0.01577475666999817, "learning_rate": 3.0025397826744296e-07, "loss": 0.0005, "step": 165050 }, { "epoch": 2.7008099484578256, "grad_norm": 0.057898905128240585, "learning_rate": 2.999291526260645e-07, "loss": 0.0011, "step": 165060 }, { "epoch": 2.700973574408901, "grad_norm": 0.050362300127744675, "learning_rate": 2.996044973509582e-07, "loss": 0.0009, "step": 165070 }, { "epoch": 2.701137200359977, "grad_norm": 0.08215784281492233, "learning_rate": 2.992800124538914e-07, "loss": 0.0011, "step": 165080 }, { "epoch": 2.701300826311053, "grad_norm": 0.06541654467582703, "learning_rate": 2.989556979466268e-07, "loss": 0.0005, "step": 165090 }, { "epoch": 2.7014644522621287, "grad_norm": 0.13151216506958008, "learning_rate": 2.98631553840919e-07, "loss": 0.0005, "step": 165100 }, { "epoch": 2.7016280782132047, "grad_norm": 0.03314230218529701, "learning_rate": 2.983075801485186e-07, "loss": 0.0006, "step": 165110 }, { "epoch": 2.7017917041642807, "grad_norm": 0.18590034544467926, "learning_rate": 2.9798377688116833e-07, "loss": 0.0014, "step": 165120 }, { "epoch": 2.7019553301153563, "grad_norm": 0.05290968343615532, "learning_rate": 2.9766014405060437e-07, "loss": 0.0008, "step": 165130 }, { "epoch": 2.7021189560664323, "grad_norm": 0.11153509467840195, "learning_rate": 2.973366816685591e-07, "loss": 0.0014, "step": 165140 }, { "epoch": 2.702282582017508, "grad_norm": 0.05995293706655502, "learning_rate": 2.970133897467559e-07, "loss": 0.0003, "step": 165150 }, { "epoch": 2.702446207968584, "grad_norm": 0.11263551563024521, "learning_rate": 2.966902682969142e-07, "loss": 0.0007, "step": 165160 }, { "epoch": 2.70260983391966, "grad_norm": 0.02554144337773323, "learning_rate": 2.9636731733074573e-07, "loss": 0.0005, "step": 165170 }, { "epoch": 2.7027734598707354, "grad_norm": 0.443741112947464, "learning_rate": 2.960445368599579e-07, "loss": 0.0012, "step": 165180 }, { "epoch": 2.7029370858218114, "grad_norm": 0.026707736775279045, "learning_rate": 2.957219268962497e-07, "loss": 0.0005, "step": 165190 }, { "epoch": 2.703100711772887, "grad_norm": 0.10003632307052612, "learning_rate": 2.953994874513144e-07, "loss": 0.0005, "step": 165200 }, { "epoch": 2.703264337723963, "grad_norm": 0.035505056381225586, "learning_rate": 2.950772185368428e-07, "loss": 0.0014, "step": 165210 }, { "epoch": 2.703427963675039, "grad_norm": 0.012073550373315811, "learning_rate": 2.947551201645116e-07, "loss": 0.0009, "step": 165220 }, { "epoch": 2.7035915896261145, "grad_norm": 0.06280562281608582, "learning_rate": 2.944331923459998e-07, "loss": 0.0009, "step": 165230 }, { "epoch": 2.7037552155771905, "grad_norm": 0.015164192765951157, "learning_rate": 2.9411143509297467e-07, "loss": 0.0007, "step": 165240 }, { "epoch": 2.7039188415282664, "grad_norm": 0.03793412446975708, "learning_rate": 2.937898484170998e-07, "loss": 0.0009, "step": 165250 }, { "epoch": 2.704082467479342, "grad_norm": 0.06300512701272964, "learning_rate": 2.9346843233003185e-07, "loss": 0.0006, "step": 165260 }, { "epoch": 2.704246093430418, "grad_norm": 0.09818796068429947, "learning_rate": 2.9314718684342105e-07, "loss": 0.0007, "step": 165270 }, { "epoch": 2.704409719381494, "grad_norm": 0.037996191531419754, "learning_rate": 2.92826111968913e-07, "loss": 0.0006, "step": 165280 }, { "epoch": 2.7045733453325695, "grad_norm": 0.1250123679637909, "learning_rate": 2.925052077181445e-07, "loss": 0.0007, "step": 165290 }, { "epoch": 2.7047369712836455, "grad_norm": 0.018413212150335312, "learning_rate": 2.92184474102748e-07, "loss": 0.0011, "step": 165300 }, { "epoch": 2.7049005972347215, "grad_norm": 0.03687401860952377, "learning_rate": 2.918639111343502e-07, "loss": 0.0004, "step": 165310 }, { "epoch": 2.705064223185797, "grad_norm": 0.0521615706384182, "learning_rate": 2.9154351882456855e-07, "loss": 0.0005, "step": 165320 }, { "epoch": 2.705227849136873, "grad_norm": 0.042304541915655136, "learning_rate": 2.9122329718501876e-07, "loss": 0.0006, "step": 165330 }, { "epoch": 2.705391475087949, "grad_norm": 0.1874370276927948, "learning_rate": 2.909032462273065e-07, "loss": 0.0005, "step": 165340 }, { "epoch": 2.7055551010390246, "grad_norm": 0.10817678272724152, "learning_rate": 2.905833659630347e-07, "loss": 0.0017, "step": 165350 }, { "epoch": 2.7057187269901006, "grad_norm": 0.015270596370100975, "learning_rate": 2.9026365640379583e-07, "loss": 0.0009, "step": 165360 }, { "epoch": 2.7058823529411766, "grad_norm": 0.016826342791318893, "learning_rate": 2.899441175611806e-07, "loss": 0.0004, "step": 165370 }, { "epoch": 2.706045978892252, "grad_norm": 0.01082519069314003, "learning_rate": 2.896247494467702e-07, "loss": 0.0011, "step": 165380 }, { "epoch": 2.706209604843328, "grad_norm": 0.03310023620724678, "learning_rate": 2.8930555207214217e-07, "loss": 0.0011, "step": 165390 }, { "epoch": 2.706373230794404, "grad_norm": 0.21069121360778809, "learning_rate": 2.889865254488655e-07, "loss": 0.0005, "step": 165400 }, { "epoch": 2.7065368567454797, "grad_norm": 0.06574288010597229, "learning_rate": 2.8866766958850543e-07, "loss": 0.0004, "step": 165410 }, { "epoch": 2.7067004826965557, "grad_norm": 0.07977749407291412, "learning_rate": 2.8834898450261885e-07, "loss": 0.0009, "step": 165420 }, { "epoch": 2.7068641086476317, "grad_norm": 0.11978202313184738, "learning_rate": 2.880304702027564e-07, "loss": 0.0005, "step": 165430 }, { "epoch": 2.7070277345987073, "grad_norm": 0.02209595777094364, "learning_rate": 2.8771212670046566e-07, "loss": 0.0008, "step": 165440 }, { "epoch": 2.7071913605497833, "grad_norm": 0.11386047303676605, "learning_rate": 2.87393954007284e-07, "loss": 0.0018, "step": 165450 }, { "epoch": 2.7073549865008593, "grad_norm": 0.05324776843190193, "learning_rate": 2.870759521347455e-07, "loss": 0.0015, "step": 165460 }, { "epoch": 2.707518612451935, "grad_norm": 0.0474587082862854, "learning_rate": 2.8675812109437537e-07, "loss": 0.0007, "step": 165470 }, { "epoch": 2.707682238403011, "grad_norm": 0.13699106872081757, "learning_rate": 2.8644046089769615e-07, "loss": 0.001, "step": 165480 }, { "epoch": 2.707845864354087, "grad_norm": 0.07346556335687637, "learning_rate": 2.861229715562219e-07, "loss": 0.0007, "step": 165490 }, { "epoch": 2.7080094903051624, "grad_norm": 0.07224927842617035, "learning_rate": 2.858056530814596e-07, "loss": 0.0007, "step": 165500 }, { "epoch": 2.7081731162562384, "grad_norm": 0.09595628827810287, "learning_rate": 2.854885054849138e-07, "loss": 0.0008, "step": 165510 }, { "epoch": 2.7083367422073144, "grad_norm": 0.038459885865449905, "learning_rate": 2.851715287780765e-07, "loss": 0.0007, "step": 165520 }, { "epoch": 2.70850036815839, "grad_norm": 0.01125528197735548, "learning_rate": 2.848547229724402e-07, "loss": 0.0008, "step": 165530 }, { "epoch": 2.708663994109466, "grad_norm": 0.008230326697230339, "learning_rate": 2.8453808807948846e-07, "loss": 0.0017, "step": 165540 }, { "epoch": 2.7088276200605415, "grad_norm": 0.0632973462343216, "learning_rate": 2.842216241106965e-07, "loss": 0.0004, "step": 165550 }, { "epoch": 2.7089912460116174, "grad_norm": 0.023056013509631157, "learning_rate": 2.8390533107753746e-07, "loss": 0.0005, "step": 165560 }, { "epoch": 2.7091548719626934, "grad_norm": 0.008053119294345379, "learning_rate": 2.835892089914749e-07, "loss": 0.0003, "step": 165570 }, { "epoch": 2.709318497913769, "grad_norm": 0.007786785252392292, "learning_rate": 2.8327325786396797e-07, "loss": 0.0007, "step": 165580 }, { "epoch": 2.709482123864845, "grad_norm": 0.18835268914699554, "learning_rate": 2.829574777064692e-07, "loss": 0.0008, "step": 165590 }, { "epoch": 2.7096457498159205, "grad_norm": 0.04131743311882019, "learning_rate": 2.8264186853042494e-07, "loss": 0.0003, "step": 165600 }, { "epoch": 2.7098093757669965, "grad_norm": 0.09654130786657333, "learning_rate": 2.82326430347275e-07, "loss": 0.001, "step": 165610 }, { "epoch": 2.7099730017180725, "grad_norm": 0.08916892856359482, "learning_rate": 2.82011163168453e-07, "loss": 0.0008, "step": 165620 }, { "epoch": 2.710136627669148, "grad_norm": 0.04151221364736557, "learning_rate": 2.816960670053875e-07, "loss": 0.0004, "step": 165630 }, { "epoch": 2.710300253620224, "grad_norm": 0.07366225868463516, "learning_rate": 2.813811418694984e-07, "loss": 0.0013, "step": 165640 }, { "epoch": 2.7104638795713, "grad_norm": 0.15956096351146698, "learning_rate": 2.810663877722031e-07, "loss": 0.0006, "step": 165650 }, { "epoch": 2.7106275055223756, "grad_norm": 0.015970144420862198, "learning_rate": 2.8075180472490917e-07, "loss": 0.0008, "step": 165660 }, { "epoch": 2.7107911314734516, "grad_norm": 0.06413602083921432, "learning_rate": 2.8043739273902026e-07, "loss": 0.0005, "step": 165670 }, { "epoch": 2.7109547574245276, "grad_norm": 0.06689391285181046, "learning_rate": 2.8012315182593176e-07, "loss": 0.0013, "step": 165680 }, { "epoch": 2.711118383375603, "grad_norm": 0.08121802657842636, "learning_rate": 2.7980908199703503e-07, "loss": 0.0006, "step": 165690 }, { "epoch": 2.711282009326679, "grad_norm": 0.03946439549326897, "learning_rate": 2.794951832637155e-07, "loss": 0.0008, "step": 165700 }, { "epoch": 2.711445635277755, "grad_norm": 0.03253056854009628, "learning_rate": 2.791814556373501e-07, "loss": 0.0006, "step": 165710 }, { "epoch": 2.7116092612288307, "grad_norm": 0.03238653764128685, "learning_rate": 2.7886789912931034e-07, "loss": 0.0007, "step": 165720 }, { "epoch": 2.7117728871799067, "grad_norm": 0.0024456228129565716, "learning_rate": 2.785545137509621e-07, "loss": 0.0005, "step": 165730 }, { "epoch": 2.7119365131309827, "grad_norm": 0.11946140229701996, "learning_rate": 2.7824129951366575e-07, "loss": 0.0007, "step": 165740 }, { "epoch": 2.7121001390820583, "grad_norm": 0.0031162812374532223, "learning_rate": 2.7792825642877276e-07, "loss": 0.0007, "step": 165750 }, { "epoch": 2.7122637650331343, "grad_norm": 0.0009196795872412622, "learning_rate": 2.7761538450763124e-07, "loss": 0.0016, "step": 165760 }, { "epoch": 2.7124273909842103, "grad_norm": 0.042442865669727325, "learning_rate": 2.7730268376158333e-07, "loss": 0.0008, "step": 165770 }, { "epoch": 2.712591016935286, "grad_norm": 0.09980204701423645, "learning_rate": 2.7699015420196095e-07, "loss": 0.0004, "step": 165780 }, { "epoch": 2.712754642886362, "grad_norm": 0.20933552086353302, "learning_rate": 2.766777958400951e-07, "loss": 0.0012, "step": 165790 }, { "epoch": 2.712918268837438, "grad_norm": 0.044055577367544174, "learning_rate": 2.7636560868730676e-07, "loss": 0.0011, "step": 165800 }, { "epoch": 2.7130818947885134, "grad_norm": 0.062321409583091736, "learning_rate": 2.760535927549124e-07, "loss": 0.001, "step": 165810 }, { "epoch": 2.7132455207395894, "grad_norm": 0.0921667143702507, "learning_rate": 2.7574174805422127e-07, "loss": 0.0003, "step": 165820 }, { "epoch": 2.7134091466906654, "grad_norm": 0.06988582015037537, "learning_rate": 2.7543007459653717e-07, "loss": 0.001, "step": 165830 }, { "epoch": 2.713572772641741, "grad_norm": 0.018518909811973572, "learning_rate": 2.751185723931582e-07, "loss": 0.0006, "step": 165840 }, { "epoch": 2.713736398592817, "grad_norm": 0.0658596009016037, "learning_rate": 2.7480724145537376e-07, "loss": 0.0004, "step": 165850 }, { "epoch": 2.713900024543893, "grad_norm": 0.020756209269165993, "learning_rate": 2.744960817944714e-07, "loss": 0.0025, "step": 165860 }, { "epoch": 2.7140636504949684, "grad_norm": 0.003743681823834777, "learning_rate": 2.7418509342172773e-07, "loss": 0.0007, "step": 165870 }, { "epoch": 2.7142272764460444, "grad_norm": 0.13827617466449738, "learning_rate": 2.73874276348417e-07, "loss": 0.0016, "step": 165880 }, { "epoch": 2.7143909023971204, "grad_norm": 0.0017665864434093237, "learning_rate": 2.7356363058580413e-07, "loss": 0.0005, "step": 165890 }, { "epoch": 2.714554528348196, "grad_norm": 0.04928507283329964, "learning_rate": 2.732531561451507e-07, "loss": 0.0008, "step": 165900 }, { "epoch": 2.714718154299272, "grad_norm": 0.007752994075417519, "learning_rate": 2.7294285303770983e-07, "loss": 0.0007, "step": 165910 }, { "epoch": 2.7148817802503475, "grad_norm": 0.06837140768766403, "learning_rate": 2.7263272127472873e-07, "loss": 0.0008, "step": 165920 }, { "epoch": 2.7150454062014235, "grad_norm": 0.007732836063951254, "learning_rate": 2.7232276086745057e-07, "loss": 0.0008, "step": 165930 }, { "epoch": 2.7152090321524995, "grad_norm": 0.06131186708807945, "learning_rate": 2.720129718271086e-07, "loss": 0.0005, "step": 165940 }, { "epoch": 2.715372658103575, "grad_norm": 0.024128209799528122, "learning_rate": 2.717033541649333e-07, "loss": 0.0004, "step": 165950 }, { "epoch": 2.715536284054651, "grad_norm": 0.059776730835437775, "learning_rate": 2.713939078921474e-07, "loss": 0.0006, "step": 165960 }, { "epoch": 2.7156999100057266, "grad_norm": 0.14890922605991364, "learning_rate": 2.710846330199668e-07, "loss": 0.0006, "step": 165970 }, { "epoch": 2.7158635359568026, "grad_norm": 0.2550153434276581, "learning_rate": 2.707755295596037e-07, "loss": 0.0007, "step": 165980 }, { "epoch": 2.7160271619078786, "grad_norm": 0.15260539948940277, "learning_rate": 2.7046659752226035e-07, "loss": 0.0006, "step": 165990 }, { "epoch": 2.716190787858954, "grad_norm": 0.04986754432320595, "learning_rate": 2.70157836919136e-07, "loss": 0.0006, "step": 166000 }, { "epoch": 2.71635441381003, "grad_norm": 0.060021527111530304, "learning_rate": 2.6984924776142287e-07, "loss": 0.0011, "step": 166010 }, { "epoch": 2.716518039761106, "grad_norm": 0.1671852469444275, "learning_rate": 2.695408300603053e-07, "loss": 0.0004, "step": 166020 }, { "epoch": 2.7166816657121817, "grad_norm": 0.024218380451202393, "learning_rate": 2.692325838269633e-07, "loss": 0.0003, "step": 166030 }, { "epoch": 2.7168452916632577, "grad_norm": 0.04165758565068245, "learning_rate": 2.689245090725695e-07, "loss": 0.0005, "step": 166040 }, { "epoch": 2.7170089176143337, "grad_norm": 0.021144501864910126, "learning_rate": 2.686166058082923e-07, "loss": 0.0008, "step": 166050 }, { "epoch": 2.7171725435654093, "grad_norm": 0.06020462140440941, "learning_rate": 2.6830887404529047e-07, "loss": 0.0007, "step": 166060 }, { "epoch": 2.7173361695164853, "grad_norm": 0.13731226325035095, "learning_rate": 2.6800131379472074e-07, "loss": 0.0008, "step": 166070 }, { "epoch": 2.7174997954675613, "grad_norm": 0.12679561972618103, "learning_rate": 2.6769392506772964e-07, "loss": 0.0004, "step": 166080 }, { "epoch": 2.717663421418637, "grad_norm": 0.18874065577983856, "learning_rate": 2.673867078754605e-07, "loss": 0.0011, "step": 166090 }, { "epoch": 2.717827047369713, "grad_norm": 0.03484984487295151, "learning_rate": 2.670796622290478e-07, "loss": 0.0004, "step": 166100 }, { "epoch": 2.717990673320789, "grad_norm": 0.025047173723578453, "learning_rate": 2.6677278813962317e-07, "loss": 0.0007, "step": 166110 }, { "epoch": 2.7181542992718644, "grad_norm": 0.02176237292587757, "learning_rate": 2.6646608561830876e-07, "loss": 0.0006, "step": 166120 }, { "epoch": 2.7183179252229404, "grad_norm": 0.06795595586299896, "learning_rate": 2.6615955467622136e-07, "loss": 0.0008, "step": 166130 }, { "epoch": 2.7184815511740164, "grad_norm": 0.04296782240271568, "learning_rate": 2.658531953244731e-07, "loss": 0.0009, "step": 166140 }, { "epoch": 2.718645177125092, "grad_norm": 0.021120456978678703, "learning_rate": 2.6554700757416784e-07, "loss": 0.0006, "step": 166150 }, { "epoch": 2.718808803076168, "grad_norm": 0.172512024641037, "learning_rate": 2.6524099143640515e-07, "loss": 0.0011, "step": 166160 }, { "epoch": 2.718972429027244, "grad_norm": 0.11277034133672714, "learning_rate": 2.6493514692227664e-07, "loss": 0.0006, "step": 166170 }, { "epoch": 2.7191360549783195, "grad_norm": 0.14692123234272003, "learning_rate": 2.6462947404286845e-07, "loss": 0.001, "step": 166180 }, { "epoch": 2.7192996809293954, "grad_norm": 0.057102132588624954, "learning_rate": 2.643239728092606e-07, "loss": 0.0004, "step": 166190 }, { "epoch": 2.7194633068804714, "grad_norm": 0.05140982195734978, "learning_rate": 2.640186432325265e-07, "loss": 0.0006, "step": 166200 }, { "epoch": 2.719626932831547, "grad_norm": 0.038949571549892426, "learning_rate": 2.63713485323735e-07, "loss": 0.0005, "step": 166210 }, { "epoch": 2.719790558782623, "grad_norm": 0.032647065818309784, "learning_rate": 2.634084990939462e-07, "loss": 0.0004, "step": 166220 }, { "epoch": 2.719954184733699, "grad_norm": 0.15789948403835297, "learning_rate": 2.631036845542151e-07, "loss": 0.0014, "step": 166230 }, { "epoch": 2.7201178106847745, "grad_norm": 0.062313031405210495, "learning_rate": 2.6279904171558954e-07, "loss": 0.0008, "step": 166240 }, { "epoch": 2.7202814366358505, "grad_norm": 0.15636670589447021, "learning_rate": 2.6249457058911356e-07, "loss": 0.001, "step": 166250 }, { "epoch": 2.7204450625869265, "grad_norm": 0.0162058062851429, "learning_rate": 2.6219027118582375e-07, "loss": 0.0005, "step": 166260 }, { "epoch": 2.720608688538002, "grad_norm": 0.005950163584202528, "learning_rate": 2.6188614351674913e-07, "loss": 0.0009, "step": 166270 }, { "epoch": 2.720772314489078, "grad_norm": 0.05566615238785744, "learning_rate": 2.615821875929142e-07, "loss": 0.0006, "step": 166280 }, { "epoch": 2.720935940440154, "grad_norm": 0.0025238730013370514, "learning_rate": 2.6127840342533627e-07, "loss": 0.0003, "step": 166290 }, { "epoch": 2.7210995663912296, "grad_norm": 0.03258737549185753, "learning_rate": 2.6097479102502707e-07, "loss": 0.0024, "step": 166300 }, { "epoch": 2.7212631923423056, "grad_norm": 0.0484694167971611, "learning_rate": 2.6067135040299174e-07, "loss": 0.0005, "step": 166310 }, { "epoch": 2.721426818293381, "grad_norm": 0.03294214978814125, "learning_rate": 2.6036808157022975e-07, "loss": 0.0011, "step": 166320 }, { "epoch": 2.721590444244457, "grad_norm": 0.04648750275373459, "learning_rate": 2.6006498453773345e-07, "loss": 0.0009, "step": 166330 }, { "epoch": 2.721754070195533, "grad_norm": 0.005018678493797779, "learning_rate": 2.5976205931648846e-07, "loss": 0.0007, "step": 166340 }, { "epoch": 2.7219176961466087, "grad_norm": 0.5051056742668152, "learning_rate": 2.594593059174766e-07, "loss": 0.0013, "step": 166350 }, { "epoch": 2.7220813220976847, "grad_norm": 0.08327613770961761, "learning_rate": 2.5915672435167074e-07, "loss": 0.0006, "step": 166360 }, { "epoch": 2.7222449480487603, "grad_norm": 0.00232758023776114, "learning_rate": 2.588543146300404e-07, "loss": 0.0005, "step": 166370 }, { "epoch": 2.7224085739998363, "grad_norm": 0.06786621361970901, "learning_rate": 2.585520767635447e-07, "loss": 0.0009, "step": 166380 }, { "epoch": 2.7225721999509123, "grad_norm": 0.0009498855215497315, "learning_rate": 2.582500107631414e-07, "loss": 0.001, "step": 166390 }, { "epoch": 2.722735825901988, "grad_norm": 0.07119929790496826, "learning_rate": 2.579481166397785e-07, "loss": 0.0009, "step": 166400 }, { "epoch": 2.722899451853064, "grad_norm": 0.028171630576252937, "learning_rate": 2.5764639440439833e-07, "loss": 0.0012, "step": 166410 }, { "epoch": 2.72306307780414, "grad_norm": 0.15306329727172852, "learning_rate": 2.5734484406794046e-07, "loss": 0.0009, "step": 166420 }, { "epoch": 2.7232267037552154, "grad_norm": 0.09083644300699234, "learning_rate": 2.5704346564133174e-07, "loss": 0.0008, "step": 166430 }, { "epoch": 2.7233903297062914, "grad_norm": 0.01764063350856304, "learning_rate": 2.5674225913549843e-07, "loss": 0.0004, "step": 166440 }, { "epoch": 2.7235539556573674, "grad_norm": 0.006110843271017075, "learning_rate": 2.564412245613579e-07, "loss": 0.0013, "step": 166450 }, { "epoch": 2.723717581608443, "grad_norm": 0.003444591537117958, "learning_rate": 2.5614036192982195e-07, "loss": 0.0005, "step": 166460 }, { "epoch": 2.723881207559519, "grad_norm": 0.05287844315171242, "learning_rate": 2.5583967125179687e-07, "loss": 0.001, "step": 166470 }, { "epoch": 2.724044833510595, "grad_norm": 0.019583825021982193, "learning_rate": 2.555391525381806e-07, "loss": 0.0005, "step": 166480 }, { "epoch": 2.7242084594616705, "grad_norm": 0.005608867853879929, "learning_rate": 2.5523880579986783e-07, "loss": 0.0009, "step": 166490 }, { "epoch": 2.7243720854127464, "grad_norm": 0.02322128787636757, "learning_rate": 2.5493863104774417e-07, "loss": 0.0006, "step": 166500 }, { "epoch": 2.7245357113638224, "grad_norm": 0.04924976825714111, "learning_rate": 2.54638628292691e-07, "loss": 0.0005, "step": 166510 }, { "epoch": 2.724699337314898, "grad_norm": 0.0036141378805041313, "learning_rate": 2.5433879754558246e-07, "loss": 0.0011, "step": 166520 }, { "epoch": 2.724862963265974, "grad_norm": 0.004840285051614046, "learning_rate": 2.540391388172864e-07, "loss": 0.0011, "step": 166530 }, { "epoch": 2.72502658921705, "grad_norm": 0.1532498002052307, "learning_rate": 2.5373965211866524e-07, "loss": 0.0011, "step": 166540 }, { "epoch": 2.7251902151681255, "grad_norm": 0.06085013225674629, "learning_rate": 2.534403374605737e-07, "loss": 0.0015, "step": 166550 }, { "epoch": 2.7253538411192015, "grad_norm": 0.0757409930229187, "learning_rate": 2.5314119485386257e-07, "loss": 0.001, "step": 166560 }, { "epoch": 2.7255174670702775, "grad_norm": 0.0154356574639678, "learning_rate": 2.5284222430937365e-07, "loss": 0.0003, "step": 166570 }, { "epoch": 2.725681093021353, "grad_norm": 0.004803037736564875, "learning_rate": 2.52543425837945e-07, "loss": 0.0005, "step": 166580 }, { "epoch": 2.725844718972429, "grad_norm": 0.13299861550331116, "learning_rate": 2.522447994504068e-07, "loss": 0.001, "step": 166590 }, { "epoch": 2.726008344923505, "grad_norm": 0.10318078845739365, "learning_rate": 2.5194634515758376e-07, "loss": 0.0006, "step": 166600 }, { "epoch": 2.7261719708745806, "grad_norm": 0.055688053369522095, "learning_rate": 2.516480629702939e-07, "loss": 0.0013, "step": 166610 }, { "epoch": 2.7263355968256566, "grad_norm": 0.02145429700613022, "learning_rate": 2.5134995289935017e-07, "loss": 0.0003, "step": 166620 }, { "epoch": 2.7264992227767326, "grad_norm": 0.007415490690618753, "learning_rate": 2.510520149555573e-07, "loss": 0.0006, "step": 166630 }, { "epoch": 2.726662848727808, "grad_norm": 0.0563824288547039, "learning_rate": 2.5075424914971445e-07, "loss": 0.0004, "step": 166640 }, { "epoch": 2.726826474678884, "grad_norm": 0.11017031967639923, "learning_rate": 2.5045665549261633e-07, "loss": 0.0005, "step": 166650 }, { "epoch": 2.72699010062996, "grad_norm": 0.09914813190698624, "learning_rate": 2.501592339950482e-07, "loss": 0.0004, "step": 166660 }, { "epoch": 2.7271537265810357, "grad_norm": 0.04692218452692032, "learning_rate": 2.49861984667793e-07, "loss": 0.001, "step": 166670 }, { "epoch": 2.7273173525321117, "grad_norm": 0.028819413855671883, "learning_rate": 2.495649075216233e-07, "loss": 0.0006, "step": 166680 }, { "epoch": 2.7274809784831873, "grad_norm": 0.212903693318367, "learning_rate": 2.492680025673083e-07, "loss": 0.0018, "step": 166690 }, { "epoch": 2.7276446044342633, "grad_norm": 0.06485790014266968, "learning_rate": 2.489712698156105e-07, "loss": 0.0009, "step": 166700 }, { "epoch": 2.7278082303853393, "grad_norm": 0.09056265652179718, "learning_rate": 2.486747092772851e-07, "loss": 0.0011, "step": 166710 }, { "epoch": 2.727971856336415, "grad_norm": 0.01741917058825493, "learning_rate": 2.4837832096308355e-07, "loss": 0.0003, "step": 166720 }, { "epoch": 2.728135482287491, "grad_norm": 0.03494131192564964, "learning_rate": 2.4808210488374615e-07, "loss": 0.0008, "step": 166730 }, { "epoch": 2.7282991082385664, "grad_norm": 0.11381885409355164, "learning_rate": 2.47786061050011e-07, "loss": 0.0007, "step": 166740 }, { "epoch": 2.7284627341896424, "grad_norm": 0.09058448672294617, "learning_rate": 2.474901894726106e-07, "loss": 0.0006, "step": 166750 }, { "epoch": 2.7286263601407184, "grad_norm": 0.17616605758666992, "learning_rate": 2.47194490162268e-07, "loss": 0.0027, "step": 166760 }, { "epoch": 2.728789986091794, "grad_norm": 0.09523951262235641, "learning_rate": 2.468989631297025e-07, "loss": 0.001, "step": 166770 }, { "epoch": 2.72895361204287, "grad_norm": 0.05731816962361336, "learning_rate": 2.4660360838562494e-07, "loss": 0.0003, "step": 166780 }, { "epoch": 2.729117237993946, "grad_norm": 0.09204509109258652, "learning_rate": 2.463084259407428e-07, "loss": 0.0006, "step": 166790 }, { "epoch": 2.7292808639450215, "grad_norm": 0.006113148294389248, "learning_rate": 2.4601341580575477e-07, "loss": 0.0008, "step": 166800 }, { "epoch": 2.7294444898960974, "grad_norm": 0.2661568522453308, "learning_rate": 2.457185779913551e-07, "loss": 0.0008, "step": 166810 }, { "epoch": 2.7296081158471734, "grad_norm": 0.026791594922542572, "learning_rate": 2.454239125082297e-07, "loss": 0.0006, "step": 166820 }, { "epoch": 2.729771741798249, "grad_norm": 0.06673616915941238, "learning_rate": 2.4512941936706e-07, "loss": 0.0004, "step": 166830 }, { "epoch": 2.729935367749325, "grad_norm": 0.03314259648323059, "learning_rate": 2.4483509857852193e-07, "loss": 0.0008, "step": 166840 }, { "epoch": 2.730098993700401, "grad_norm": 0.004142835736274719, "learning_rate": 2.445409501532814e-07, "loss": 0.0005, "step": 166850 }, { "epoch": 2.7302626196514765, "grad_norm": 0.06205581873655319, "learning_rate": 2.4424697410200316e-07, "loss": 0.0008, "step": 166860 }, { "epoch": 2.7304262456025525, "grad_norm": 0.1340777575969696, "learning_rate": 2.4395317043534097e-07, "loss": 0.0009, "step": 166870 }, { "epoch": 2.7305898715536285, "grad_norm": 0.02982744574546814, "learning_rate": 2.4365953916394627e-07, "loss": 0.0004, "step": 166880 }, { "epoch": 2.730753497504704, "grad_norm": 0.10288508236408234, "learning_rate": 2.4336608029846054e-07, "loss": 0.0013, "step": 166890 }, { "epoch": 2.73091712345578, "grad_norm": 0.10716257244348526, "learning_rate": 2.430727938495225e-07, "loss": 0.0007, "step": 166900 }, { "epoch": 2.731080749406856, "grad_norm": 0.08042197674512863, "learning_rate": 2.4277967982776375e-07, "loss": 0.0012, "step": 166910 }, { "epoch": 2.7312443753579316, "grad_norm": 0.0348656103014946, "learning_rate": 2.4248673824380787e-07, "loss": 0.0006, "step": 166920 }, { "epoch": 2.7314080013090076, "grad_norm": 0.06871537119150162, "learning_rate": 2.4219396910827307e-07, "loss": 0.0007, "step": 166930 }, { "epoch": 2.7315716272600836, "grad_norm": 0.05744808539748192, "learning_rate": 2.4190137243177093e-07, "loss": 0.0013, "step": 166940 }, { "epoch": 2.731735253211159, "grad_norm": 0.08951016515493393, "learning_rate": 2.41608948224909e-07, "loss": 0.0008, "step": 166950 }, { "epoch": 2.731898879162235, "grad_norm": 0.1780056655406952, "learning_rate": 2.4131669649828606e-07, "loss": 0.0006, "step": 166960 }, { "epoch": 2.732062505113311, "grad_norm": 0.05167065188288689, "learning_rate": 2.410246172624953e-07, "loss": 0.0006, "step": 166970 }, { "epoch": 2.7322261310643867, "grad_norm": 0.0031203618273139, "learning_rate": 2.407327105281249e-07, "loss": 0.001, "step": 166980 }, { "epoch": 2.7323897570154627, "grad_norm": 0.009707254357635975, "learning_rate": 2.404409763057541e-07, "loss": 0.0005, "step": 166990 }, { "epoch": 2.7325533829665387, "grad_norm": 0.03431081771850586, "learning_rate": 2.401494146059602e-07, "loss": 0.0007, "step": 167000 }, { "epoch": 2.7327170089176143, "grad_norm": 0.04879627749323845, "learning_rate": 2.3985802543930846e-07, "loss": 0.0005, "step": 167010 }, { "epoch": 2.7328806348686903, "grad_norm": 0.04363630712032318, "learning_rate": 2.395668088163638e-07, "loss": 0.0008, "step": 167020 }, { "epoch": 2.7330442608197663, "grad_norm": 0.028646238148212433, "learning_rate": 2.3927576474768064e-07, "loss": 0.0006, "step": 167030 }, { "epoch": 2.733207886770842, "grad_norm": 0.028668992221355438, "learning_rate": 2.389848932438088e-07, "loss": 0.0006, "step": 167040 }, { "epoch": 2.733371512721918, "grad_norm": 0.07995645701885223, "learning_rate": 2.386941943152921e-07, "loss": 0.0005, "step": 167050 }, { "epoch": 2.733535138672994, "grad_norm": 0.037354037165641785, "learning_rate": 2.3840366797266644e-07, "loss": 0.0004, "step": 167060 }, { "epoch": 2.7336987646240694, "grad_norm": 0.05522950366139412, "learning_rate": 2.3811331422646467e-07, "loss": 0.0009, "step": 167070 }, { "epoch": 2.7338623905751454, "grad_norm": 0.005512866657227278, "learning_rate": 2.3782313308720938e-07, "loss": 0.0009, "step": 167080 }, { "epoch": 2.734026016526221, "grad_norm": 0.028287706896662712, "learning_rate": 2.3753312456542055e-07, "loss": 0.0009, "step": 167090 }, { "epoch": 2.734189642477297, "grad_norm": 0.034400373697280884, "learning_rate": 2.3724328867160918e-07, "loss": 0.0015, "step": 167100 }, { "epoch": 2.734353268428373, "grad_norm": 0.007700387388467789, "learning_rate": 2.3695362541628242e-07, "loss": 0.0005, "step": 167110 }, { "epoch": 2.7345168943794484, "grad_norm": 0.05857130140066147, "learning_rate": 2.3666413480993856e-07, "loss": 0.0003, "step": 167120 }, { "epoch": 2.7346805203305244, "grad_norm": 0.029179135337471962, "learning_rate": 2.363748168630714e-07, "loss": 0.0033, "step": 167130 }, { "epoch": 2.7348441462816, "grad_norm": 0.10953580588102341, "learning_rate": 2.3608567158616812e-07, "loss": 0.0007, "step": 167140 }, { "epoch": 2.735007772232676, "grad_norm": 0.0232537928968668, "learning_rate": 2.3579669898970926e-07, "loss": 0.0004, "step": 167150 }, { "epoch": 2.735171398183752, "grad_norm": 0.02705479972064495, "learning_rate": 2.3550789908417027e-07, "loss": 0.0008, "step": 167160 }, { "epoch": 2.7353350241348275, "grad_norm": 0.047809701412916183, "learning_rate": 2.352192718800178e-07, "loss": 0.0007, "step": 167170 }, { "epoch": 2.7354986500859035, "grad_norm": 0.06296701729297638, "learning_rate": 2.3493081738771517e-07, "loss": 0.0006, "step": 167180 }, { "epoch": 2.7356622760369795, "grad_norm": 0.013241423293948174, "learning_rate": 2.3464253561771844e-07, "loss": 0.0004, "step": 167190 }, { "epoch": 2.735825901988055, "grad_norm": 0.025256283581256866, "learning_rate": 2.343544265804759e-07, "loss": 0.0008, "step": 167200 }, { "epoch": 2.735989527939131, "grad_norm": 0.006607535295188427, "learning_rate": 2.3406649028643203e-07, "loss": 0.0004, "step": 167210 }, { "epoch": 2.736153153890207, "grad_norm": 0.0622825063765049, "learning_rate": 2.3377872674602343e-07, "loss": 0.0007, "step": 167220 }, { "epoch": 2.7363167798412826, "grad_norm": 0.05698620155453682, "learning_rate": 2.3349113596968064e-07, "loss": 0.0003, "step": 167230 }, { "epoch": 2.7364804057923586, "grad_norm": 0.0025128894485533237, "learning_rate": 2.3320371796782758e-07, "loss": 0.001, "step": 167240 }, { "epoch": 2.7366440317434346, "grad_norm": 0.1656695008277893, "learning_rate": 2.329164727508826e-07, "loss": 0.0008, "step": 167250 }, { "epoch": 2.73680765769451, "grad_norm": 0.08089619874954224, "learning_rate": 2.326294003292595e-07, "loss": 0.001, "step": 167260 }, { "epoch": 2.736971283645586, "grad_norm": 0.06426557153463364, "learning_rate": 2.3234250071336119e-07, "loss": 0.0012, "step": 167270 }, { "epoch": 2.737134909596662, "grad_norm": 0.06766103953123093, "learning_rate": 2.3205577391358924e-07, "loss": 0.0008, "step": 167280 }, { "epoch": 2.7372985355477377, "grad_norm": 0.10296272486448288, "learning_rate": 2.3176921994033597e-07, "loss": 0.0009, "step": 167290 }, { "epoch": 2.7374621614988137, "grad_norm": 0.04676128551363945, "learning_rate": 2.314828388039886e-07, "loss": 0.0012, "step": 167300 }, { "epoch": 2.7376257874498897, "grad_norm": 0.008685381151735783, "learning_rate": 2.3119663051492659e-07, "loss": 0.0005, "step": 167310 }, { "epoch": 2.7377894134009653, "grad_norm": 0.13609428703784943, "learning_rate": 2.3091059508352609e-07, "loss": 0.0009, "step": 167320 }, { "epoch": 2.7379530393520413, "grad_norm": 0.04204880818724632, "learning_rate": 2.3062473252015438e-07, "loss": 0.0005, "step": 167330 }, { "epoch": 2.7381166653031173, "grad_norm": 0.31170615553855896, "learning_rate": 2.303390428351726e-07, "loss": 0.0012, "step": 167340 }, { "epoch": 2.738280291254193, "grad_norm": 0.06603459268808365, "learning_rate": 2.300535260389375e-07, "loss": 0.0003, "step": 167350 }, { "epoch": 2.738443917205269, "grad_norm": 0.11828278750181198, "learning_rate": 2.2976818214179686e-07, "loss": 0.0006, "step": 167360 }, { "epoch": 2.738607543156345, "grad_norm": 0.21026110649108887, "learning_rate": 2.2948301115409576e-07, "loss": 0.0007, "step": 167370 }, { "epoch": 2.7387711691074204, "grad_norm": 0.23438793420791626, "learning_rate": 2.291980130861693e-07, "loss": 0.0011, "step": 167380 }, { "epoch": 2.7389347950584964, "grad_norm": 0.044866740703582764, "learning_rate": 2.289131879483486e-07, "loss": 0.0006, "step": 167390 }, { "epoch": 2.7390984210095723, "grad_norm": 0.08250736445188522, "learning_rate": 2.2862853575095766e-07, "loss": 0.0003, "step": 167400 }, { "epoch": 2.739262046960648, "grad_norm": 0.05013498291373253, "learning_rate": 2.2834405650431435e-07, "loss": 0.0007, "step": 167410 }, { "epoch": 2.739425672911724, "grad_norm": 0.030337626114487648, "learning_rate": 2.2805975021873206e-07, "loss": 0.0005, "step": 167420 }, { "epoch": 2.7395892988628, "grad_norm": 0.03338876739144325, "learning_rate": 2.2777561690451366e-07, "loss": 0.0009, "step": 167430 }, { "epoch": 2.7397529248138754, "grad_norm": 0.008874011225998402, "learning_rate": 2.2749165657195982e-07, "loss": 0.0007, "step": 167440 }, { "epoch": 2.7399165507649514, "grad_norm": 0.039966460317373276, "learning_rate": 2.272078692313623e-07, "loss": 0.0005, "step": 167450 }, { "epoch": 2.7400801767160274, "grad_norm": 0.10308028012514114, "learning_rate": 2.2692425489300897e-07, "loss": 0.0008, "step": 167460 }, { "epoch": 2.740243802667103, "grad_norm": 0.09634079784154892, "learning_rate": 2.2664081356717938e-07, "loss": 0.0003, "step": 167470 }, { "epoch": 2.740407428618179, "grad_norm": 0.04774674400687218, "learning_rate": 2.263575452641481e-07, "loss": 0.0015, "step": 167480 }, { "epoch": 2.7405710545692545, "grad_norm": 0.07143358141183853, "learning_rate": 2.2607444999418248e-07, "loss": 0.0011, "step": 167490 }, { "epoch": 2.7407346805203305, "grad_norm": 0.013922899961471558, "learning_rate": 2.2579152776754377e-07, "loss": 0.0007, "step": 167500 }, { "epoch": 2.7408983064714065, "grad_norm": 0.07386036217212677, "learning_rate": 2.2550877859448872e-07, "loss": 0.0005, "step": 167510 }, { "epoch": 2.741061932422482, "grad_norm": 0.05879054218530655, "learning_rate": 2.2522620248526528e-07, "loss": 0.0006, "step": 167520 }, { "epoch": 2.741225558373558, "grad_norm": 0.0702357366681099, "learning_rate": 2.2494379945011523e-07, "loss": 0.0008, "step": 167530 }, { "epoch": 2.7413891843246336, "grad_norm": 0.03725006431341171, "learning_rate": 2.2466156949927653e-07, "loss": 0.0007, "step": 167540 }, { "epoch": 2.7415528102757096, "grad_norm": 0.0011913494672626257, "learning_rate": 2.2437951264297763e-07, "loss": 0.0008, "step": 167550 }, { "epoch": 2.7417164362267856, "grad_norm": 0.06734905391931534, "learning_rate": 2.2409762889144482e-07, "loss": 0.0006, "step": 167560 }, { "epoch": 2.741880062177861, "grad_norm": 0.10126426815986633, "learning_rate": 2.2381591825489325e-07, "loss": 0.0007, "step": 167570 }, { "epoch": 2.742043688128937, "grad_norm": 0.11646626889705658, "learning_rate": 2.235343807435364e-07, "loss": 0.0009, "step": 167580 }, { "epoch": 2.742207314080013, "grad_norm": 0.030360354110598564, "learning_rate": 2.2325301636757778e-07, "loss": 0.0009, "step": 167590 }, { "epoch": 2.7423709400310887, "grad_norm": 0.02005661092698574, "learning_rate": 2.2297182513721705e-07, "loss": 0.0008, "step": 167600 }, { "epoch": 2.7425345659821647, "grad_norm": 0.05383145436644554, "learning_rate": 2.22690807062646e-07, "loss": 0.0007, "step": 167610 }, { "epoch": 2.7426981919332407, "grad_norm": 0.14692895114421844, "learning_rate": 2.2240996215405154e-07, "loss": 0.0009, "step": 167620 }, { "epoch": 2.7428618178843163, "grad_norm": 0.05291958898305893, "learning_rate": 2.2212929042161325e-07, "loss": 0.0009, "step": 167630 }, { "epoch": 2.7430254438353923, "grad_norm": 0.07483596354722977, "learning_rate": 2.218487918755047e-07, "loss": 0.0006, "step": 167640 }, { "epoch": 2.7431890697864683, "grad_norm": 0.10124974697828293, "learning_rate": 2.215684665258938e-07, "loss": 0.0011, "step": 167650 }, { "epoch": 2.743352695737544, "grad_norm": 0.06880058348178864, "learning_rate": 2.2128831438294086e-07, "loss": 0.0008, "step": 167660 }, { "epoch": 2.74351632168862, "grad_norm": 0.0684800073504448, "learning_rate": 2.21008335456801e-07, "loss": 0.0004, "step": 167670 }, { "epoch": 2.743679947639696, "grad_norm": 0.01002908032387495, "learning_rate": 2.2072852975762338e-07, "loss": 0.0005, "step": 167680 }, { "epoch": 2.7438435735907714, "grad_norm": 0.112838514149189, "learning_rate": 2.2044889729554985e-07, "loss": 0.0006, "step": 167690 }, { "epoch": 2.7440071995418474, "grad_norm": 0.0022344677709043026, "learning_rate": 2.2016943808071678e-07, "loss": 0.0005, "step": 167700 }, { "epoch": 2.7441708254929233, "grad_norm": 0.018456408753991127, "learning_rate": 2.198901521232527e-07, "loss": 0.0008, "step": 167710 }, { "epoch": 2.744334451443999, "grad_norm": 0.08982288837432861, "learning_rate": 2.1961103943328343e-07, "loss": 0.0008, "step": 167720 }, { "epoch": 2.744498077395075, "grad_norm": 0.10520012676715851, "learning_rate": 2.1933210002092308e-07, "loss": 0.0009, "step": 167730 }, { "epoch": 2.744661703346151, "grad_norm": 0.12554024159908295, "learning_rate": 2.190533338962847e-07, "loss": 0.0007, "step": 167740 }, { "epoch": 2.7448253292972264, "grad_norm": 0.08705730736255646, "learning_rate": 2.1877474106947238e-07, "loss": 0.0004, "step": 167750 }, { "epoch": 2.7449889552483024, "grad_norm": 0.05151456966996193, "learning_rate": 2.1849632155058364e-07, "loss": 0.0008, "step": 167760 }, { "epoch": 2.7451525811993784, "grad_norm": 0.16612239181995392, "learning_rate": 2.1821807534971207e-07, "loss": 0.0008, "step": 167770 }, { "epoch": 2.745316207150454, "grad_norm": 0.08895605802536011, "learning_rate": 2.179400024769418e-07, "loss": 0.0008, "step": 167780 }, { "epoch": 2.74547983310153, "grad_norm": 0.05931415408849716, "learning_rate": 2.1766210294235367e-07, "loss": 0.0007, "step": 167790 }, { "epoch": 2.745643459052606, "grad_norm": 0.06572817265987396, "learning_rate": 2.1738437675601965e-07, "loss": 0.0005, "step": 167800 }, { "epoch": 2.7458070850036815, "grad_norm": 0.12826888263225555, "learning_rate": 2.171068239280083e-07, "loss": 0.0008, "step": 167810 }, { "epoch": 2.7459707109547575, "grad_norm": 0.09133157879114151, "learning_rate": 2.1682944446837884e-07, "loss": 0.0008, "step": 167820 }, { "epoch": 2.7461343369058335, "grad_norm": 0.03975919261574745, "learning_rate": 2.165522383871854e-07, "loss": 0.0006, "step": 167830 }, { "epoch": 2.746297962856909, "grad_norm": 0.2132592797279358, "learning_rate": 2.1627520569447723e-07, "loss": 0.0006, "step": 167840 }, { "epoch": 2.746461588807985, "grad_norm": 0.060637082904577255, "learning_rate": 2.159983464002946e-07, "loss": 0.0004, "step": 167850 }, { "epoch": 2.7466252147590606, "grad_norm": 0.037224747240543365, "learning_rate": 2.1572166051467502e-07, "loss": 0.0005, "step": 167860 }, { "epoch": 2.7467888407101366, "grad_norm": 0.05405132472515106, "learning_rate": 2.1544514804764549e-07, "loss": 0.0013, "step": 167870 }, { "epoch": 2.7469524666612126, "grad_norm": 0.12484581768512726, "learning_rate": 2.1516880900923076e-07, "loss": 0.0006, "step": 167880 }, { "epoch": 2.747116092612288, "grad_norm": 0.17253288626670837, "learning_rate": 2.148926434094456e-07, "loss": 0.0008, "step": 167890 }, { "epoch": 2.747279718563364, "grad_norm": 0.03848373889923096, "learning_rate": 2.14616651258302e-07, "loss": 0.0009, "step": 167900 }, { "epoch": 2.7474433445144397, "grad_norm": 0.10382080078125, "learning_rate": 2.1434083256580364e-07, "loss": 0.0008, "step": 167910 }, { "epoch": 2.7476069704655157, "grad_norm": 0.02144470252096653, "learning_rate": 2.140651873419486e-07, "loss": 0.0003, "step": 167920 }, { "epoch": 2.7477705964165917, "grad_norm": 0.037637434899806976, "learning_rate": 2.1378971559672723e-07, "loss": 0.0005, "step": 167930 }, { "epoch": 2.7479342223676673, "grad_norm": 0.05315548554062843, "learning_rate": 2.135144173401249e-07, "loss": 0.0013, "step": 167940 }, { "epoch": 2.7480978483187433, "grad_norm": 0.0408809520304203, "learning_rate": 2.132392925821214e-07, "loss": 0.0007, "step": 167950 }, { "epoch": 2.7482614742698193, "grad_norm": 0.0859406515955925, "learning_rate": 2.1296434133268873e-07, "loss": 0.0008, "step": 167960 }, { "epoch": 2.748425100220895, "grad_norm": 0.0014430563896894455, "learning_rate": 2.126895636017934e-07, "loss": 0.0005, "step": 167970 }, { "epoch": 2.748588726171971, "grad_norm": 0.16755545139312744, "learning_rate": 2.1241495939939572e-07, "loss": 0.0011, "step": 167980 }, { "epoch": 2.748752352123047, "grad_norm": 0.021886218339204788, "learning_rate": 2.1214052873544832e-07, "loss": 0.0005, "step": 167990 }, { "epoch": 2.7489159780741224, "grad_norm": 0.020729944109916687, "learning_rate": 2.1186627161989993e-07, "loss": 0.0008, "step": 168000 }, { "epoch": 2.7490796040251984, "grad_norm": 0.0311489999294281, "learning_rate": 2.115921880626909e-07, "loss": 0.0009, "step": 168010 }, { "epoch": 2.7492432299762743, "grad_norm": 0.15849840641021729, "learning_rate": 2.1131827807375716e-07, "loss": 0.0009, "step": 168020 }, { "epoch": 2.74940685592735, "grad_norm": 0.10417167842388153, "learning_rate": 2.1104454166302634e-07, "loss": 0.001, "step": 168030 }, { "epoch": 2.749570481878426, "grad_norm": 0.043993063271045685, "learning_rate": 2.1077097884042053e-07, "loss": 0.001, "step": 168040 }, { "epoch": 2.749734107829502, "grad_norm": 0.005415800493210554, "learning_rate": 2.104975896158562e-07, "loss": 0.0005, "step": 168050 }, { "epoch": 2.7498977337805774, "grad_norm": 0.052111536264419556, "learning_rate": 2.1022437399924267e-07, "loss": 0.001, "step": 168060 }, { "epoch": 2.7500613597316534, "grad_norm": 0.07010488957166672, "learning_rate": 2.0995133200048478e-07, "loss": 0.0006, "step": 168070 }, { "epoch": 2.7502249856827294, "grad_norm": 0.033969417214393616, "learning_rate": 2.0967846362947742e-07, "loss": 0.0007, "step": 168080 }, { "epoch": 2.750388611633805, "grad_norm": 0.1209041103720665, "learning_rate": 2.0940576889611318e-07, "loss": 0.0005, "step": 168090 }, { "epoch": 2.750552237584881, "grad_norm": 0.0452299602329731, "learning_rate": 2.0913324781027534e-07, "loss": 0.0011, "step": 168100 }, { "epoch": 2.750715863535957, "grad_norm": 0.001307224971242249, "learning_rate": 2.088609003818426e-07, "loss": 0.0006, "step": 168110 }, { "epoch": 2.7508794894870325, "grad_norm": 0.09175059199333191, "learning_rate": 2.0858872662068764e-07, "loss": 0.0006, "step": 168120 }, { "epoch": 2.7510431154381085, "grad_norm": 0.033883705735206604, "learning_rate": 2.0831672653667535e-07, "loss": 0.0007, "step": 168130 }, { "epoch": 2.7512067413891845, "grad_norm": 0.08595077693462372, "learning_rate": 2.080449001396656e-07, "loss": 0.0005, "step": 168140 }, { "epoch": 2.75137036734026, "grad_norm": 0.06489706039428711, "learning_rate": 2.0777324743951056e-07, "loss": 0.0007, "step": 168150 }, { "epoch": 2.751533993291336, "grad_norm": 0.050071485340595245, "learning_rate": 2.075017684460573e-07, "loss": 0.0015, "step": 168160 }, { "epoch": 2.751697619242412, "grad_norm": 0.05935819447040558, "learning_rate": 2.0723046316914686e-07, "loss": 0.0006, "step": 168170 }, { "epoch": 2.7518612451934876, "grad_norm": 0.04035269469022751, "learning_rate": 2.069593316186125e-07, "loss": 0.0005, "step": 168180 }, { "epoch": 2.7520248711445636, "grad_norm": 0.04774356260895729, "learning_rate": 2.0668837380428296e-07, "loss": 0.0005, "step": 168190 }, { "epoch": 2.7521884970956396, "grad_norm": 0.06154807284474373, "learning_rate": 2.0641758973597936e-07, "loss": 0.0004, "step": 168200 }, { "epoch": 2.752352123046715, "grad_norm": 0.06879732757806778, "learning_rate": 2.0614697942351714e-07, "loss": 0.0005, "step": 168210 }, { "epoch": 2.752515748997791, "grad_norm": 0.051487430930137634, "learning_rate": 2.058765428767051e-07, "loss": 0.0007, "step": 168220 }, { "epoch": 2.752679374948867, "grad_norm": 0.09697844088077545, "learning_rate": 2.0560628010534655e-07, "loss": 0.0003, "step": 168230 }, { "epoch": 2.7528430008999427, "grad_norm": 0.13867394626140594, "learning_rate": 2.0533619111923697e-07, "loss": 0.0007, "step": 168240 }, { "epoch": 2.7530066268510187, "grad_norm": 0.04300633445382118, "learning_rate": 2.050662759281663e-07, "loss": 0.0004, "step": 168250 }, { "epoch": 2.7531702528020943, "grad_norm": 0.1252574324607849, "learning_rate": 2.047965345419195e-07, "loss": 0.0007, "step": 168260 }, { "epoch": 2.7533338787531703, "grad_norm": 0.003122778609395027, "learning_rate": 2.0452696697027264e-07, "loss": 0.0006, "step": 168270 }, { "epoch": 2.7534975047042463, "grad_norm": 0.04841751605272293, "learning_rate": 2.0425757322299843e-07, "loss": 0.0004, "step": 168280 }, { "epoch": 2.753661130655322, "grad_norm": 0.03142056614160538, "learning_rate": 2.039883533098602e-07, "loss": 0.0007, "step": 168290 }, { "epoch": 2.753824756606398, "grad_norm": 0.13306310772895813, "learning_rate": 2.0371930724061795e-07, "loss": 0.0009, "step": 168300 }, { "epoch": 2.7539883825574734, "grad_norm": 0.11865317821502686, "learning_rate": 2.034504350250227e-07, "loss": 0.0008, "step": 168310 }, { "epoch": 2.7541520085085494, "grad_norm": 0.01555688213557005, "learning_rate": 2.0318173667282116e-07, "loss": 0.0007, "step": 168320 }, { "epoch": 2.7543156344596254, "grad_norm": 0.06026741862297058, "learning_rate": 2.0291321219375327e-07, "loss": 0.0005, "step": 168330 }, { "epoch": 2.754479260410701, "grad_norm": 0.11783447861671448, "learning_rate": 2.0264486159755125e-07, "loss": 0.0005, "step": 168340 }, { "epoch": 2.754642886361777, "grad_norm": 0.039642784744501114, "learning_rate": 2.0237668489394346e-07, "loss": 0.0004, "step": 168350 }, { "epoch": 2.754806512312853, "grad_norm": 0.07264643162488937, "learning_rate": 2.0210868209264932e-07, "loss": 0.0004, "step": 168360 }, { "epoch": 2.7549701382639284, "grad_norm": 0.10343144834041595, "learning_rate": 2.0184085320338443e-07, "loss": 0.001, "step": 168370 }, { "epoch": 2.7551337642150044, "grad_norm": 0.08168608695268631, "learning_rate": 2.0157319823585651e-07, "loss": 0.0003, "step": 168380 }, { "epoch": 2.7552973901660804, "grad_norm": 0.16937950253486633, "learning_rate": 2.0130571719976676e-07, "loss": 0.0009, "step": 168390 }, { "epoch": 2.755461016117156, "grad_norm": 0.09547882527112961, "learning_rate": 2.010384101048124e-07, "loss": 0.0006, "step": 168400 }, { "epoch": 2.755624642068232, "grad_norm": 0.006628206465393305, "learning_rate": 2.0077127696068122e-07, "loss": 0.0005, "step": 168410 }, { "epoch": 2.755788268019308, "grad_norm": 0.1632036715745926, "learning_rate": 2.0050431777705714e-07, "loss": 0.0008, "step": 168420 }, { "epoch": 2.7559518939703835, "grad_norm": 0.08565066754817963, "learning_rate": 2.0023753256361577e-07, "loss": 0.0008, "step": 168430 }, { "epoch": 2.7561155199214595, "grad_norm": 0.26326531171798706, "learning_rate": 1.9997092133002827e-07, "loss": 0.0013, "step": 168440 }, { "epoch": 2.7562791458725355, "grad_norm": 0.08266288042068481, "learning_rate": 1.99704484085958e-07, "loss": 0.0005, "step": 168450 }, { "epoch": 2.756442771823611, "grad_norm": 0.023020917549729347, "learning_rate": 1.994382208410628e-07, "loss": 0.0003, "step": 168460 }, { "epoch": 2.756606397774687, "grad_norm": 0.03823775053024292, "learning_rate": 1.9917213160499437e-07, "loss": 0.0006, "step": 168470 }, { "epoch": 2.756770023725763, "grad_norm": 0.16163672506809235, "learning_rate": 1.9890621638739783e-07, "loss": 0.0014, "step": 168480 }, { "epoch": 2.7569336496768386, "grad_norm": 0.002065907930955291, "learning_rate": 1.9864047519791207e-07, "loss": 0.0004, "step": 168490 }, { "epoch": 2.7570972756279146, "grad_norm": 0.06376825273036957, "learning_rate": 1.9837490804616834e-07, "loss": 0.0008, "step": 168500 }, { "epoch": 2.7572609015789906, "grad_norm": 0.002479027258232236, "learning_rate": 1.9810951494179497e-07, "loss": 0.0006, "step": 168510 }, { "epoch": 2.757424527530066, "grad_norm": 0.01967574656009674, "learning_rate": 1.978442958944099e-07, "loss": 0.0005, "step": 168520 }, { "epoch": 2.757588153481142, "grad_norm": 0.0020061624236404896, "learning_rate": 1.975792509136276e-07, "loss": 0.0008, "step": 168530 }, { "epoch": 2.757751779432218, "grad_norm": 0.14201155304908752, "learning_rate": 1.9731438000905602e-07, "loss": 0.0004, "step": 168540 }, { "epoch": 2.7579154053832937, "grad_norm": 0.09914567321538925, "learning_rate": 1.9704968319029405e-07, "loss": 0.0005, "step": 168550 }, { "epoch": 2.7580790313343697, "grad_norm": 0.15743231773376465, "learning_rate": 1.9678516046693797e-07, "loss": 0.001, "step": 168560 }, { "epoch": 2.7582426572854457, "grad_norm": 0.012206529267132282, "learning_rate": 1.9652081184857507e-07, "loss": 0.0003, "step": 168570 }, { "epoch": 2.7584062832365213, "grad_norm": 0.21962665021419525, "learning_rate": 1.9625663734478883e-07, "loss": 0.0008, "step": 168580 }, { "epoch": 2.7585699091875973, "grad_norm": 0.017736637964844704, "learning_rate": 1.959926369651527e-07, "loss": 0.0003, "step": 168590 }, { "epoch": 2.7587335351386733, "grad_norm": 0.10496079176664352, "learning_rate": 1.9572881071923843e-07, "loss": 0.0008, "step": 168600 }, { "epoch": 2.758897161089749, "grad_norm": 0.03617104887962341, "learning_rate": 1.954651586166073e-07, "loss": 0.0005, "step": 168610 }, { "epoch": 2.759060787040825, "grad_norm": 0.01400375459343195, "learning_rate": 1.9520168066681722e-07, "loss": 0.0005, "step": 168620 }, { "epoch": 2.7592244129919004, "grad_norm": 0.07204016298055649, "learning_rate": 1.9493837687941886e-07, "loss": 0.0005, "step": 168630 }, { "epoch": 2.7593880389429764, "grad_norm": 0.11820922791957855, "learning_rate": 1.946752472639546e-07, "loss": 0.0013, "step": 168640 }, { "epoch": 2.7595516648940523, "grad_norm": 0.031934287399053574, "learning_rate": 1.9441229182996402e-07, "loss": 0.001, "step": 168650 }, { "epoch": 2.759715290845128, "grad_norm": 0.05650338530540466, "learning_rate": 1.9414951058697729e-07, "loss": 0.0011, "step": 168660 }, { "epoch": 2.759878916796204, "grad_norm": 0.007053412031382322, "learning_rate": 1.9388690354452012e-07, "loss": 0.0005, "step": 168670 }, { "epoch": 2.7600425427472794, "grad_norm": 0.08474508672952652, "learning_rate": 1.936244707121121e-07, "loss": 0.0007, "step": 168680 }, { "epoch": 2.7602061686983554, "grad_norm": 0.023448863998055458, "learning_rate": 1.9336221209926454e-07, "loss": 0.001, "step": 168690 }, { "epoch": 2.7603697946494314, "grad_norm": 0.00744744623079896, "learning_rate": 1.9310012771548536e-07, "loss": 0.0002, "step": 168700 }, { "epoch": 2.760533420600507, "grad_norm": 0.054840464144945145, "learning_rate": 1.9283821757027254e-07, "loss": 0.0005, "step": 168710 }, { "epoch": 2.760697046551583, "grad_norm": 0.10844000428915024, "learning_rate": 1.9257648167312127e-07, "loss": 0.0008, "step": 168720 }, { "epoch": 2.760860672502659, "grad_norm": 0.03459271416068077, "learning_rate": 1.9231492003351838e-07, "loss": 0.0008, "step": 168730 }, { "epoch": 2.7610242984537345, "grad_norm": 0.039451319724321365, "learning_rate": 1.9205353266094352e-07, "loss": 0.0009, "step": 168740 }, { "epoch": 2.7611879244048105, "grad_norm": 0.11334728449583054, "learning_rate": 1.9179231956487353e-07, "loss": 0.0004, "step": 168750 }, { "epoch": 2.7613515503558865, "grad_norm": 0.06936286389827728, "learning_rate": 1.9153128075477534e-07, "loss": 0.0006, "step": 168760 }, { "epoch": 2.761515176306962, "grad_norm": 0.09018911421298981, "learning_rate": 1.912704162401119e-07, "loss": 0.0008, "step": 168770 }, { "epoch": 2.761678802258038, "grad_norm": 0.0995088741183281, "learning_rate": 1.910097260303373e-07, "loss": 0.0002, "step": 168780 }, { "epoch": 2.761842428209114, "grad_norm": 0.12075003981590271, "learning_rate": 1.907492101349029e-07, "loss": 0.0006, "step": 168790 }, { "epoch": 2.7620060541601896, "grad_norm": 0.002675691619515419, "learning_rate": 1.9048886856325055e-07, "loss": 0.0005, "step": 168800 }, { "epoch": 2.7621696801112656, "grad_norm": 0.08235309273004532, "learning_rate": 1.9022870132481774e-07, "loss": 0.0006, "step": 168810 }, { "epoch": 2.7623333060623416, "grad_norm": 0.002063245279714465, "learning_rate": 1.8996870842903415e-07, "loss": 0.0013, "step": 168820 }, { "epoch": 2.762496932013417, "grad_norm": 0.003093044040724635, "learning_rate": 1.8970888988532444e-07, "loss": 0.0005, "step": 168830 }, { "epoch": 2.762660557964493, "grad_norm": 0.05535441264510155, "learning_rate": 1.8944924570310662e-07, "loss": 0.0004, "step": 168840 }, { "epoch": 2.762824183915569, "grad_norm": 0.06835031509399414, "learning_rate": 1.8918977589179098e-07, "loss": 0.0017, "step": 168850 }, { "epoch": 2.7629878098666447, "grad_norm": 0.05116264522075653, "learning_rate": 1.889304804607839e-07, "loss": 0.0004, "step": 168860 }, { "epoch": 2.7631514358177207, "grad_norm": 0.08292914927005768, "learning_rate": 1.886713594194839e-07, "loss": 0.0007, "step": 168870 }, { "epoch": 2.7633150617687967, "grad_norm": 0.03854930028319359, "learning_rate": 1.8841241277728295e-07, "loss": 0.0006, "step": 168880 }, { "epoch": 2.7634786877198723, "grad_norm": 0.15408547222614288, "learning_rate": 1.8815364054356798e-07, "loss": 0.0015, "step": 168890 }, { "epoch": 2.7636423136709483, "grad_norm": 0.03105132281780243, "learning_rate": 1.878950427277182e-07, "loss": 0.0004, "step": 168900 }, { "epoch": 2.7638059396220243, "grad_norm": 0.07394374907016754, "learning_rate": 1.8763661933910826e-07, "loss": 0.001, "step": 168910 }, { "epoch": 2.7639695655731, "grad_norm": 0.08863889425992966, "learning_rate": 1.8737837038710405e-07, "loss": 0.001, "step": 168920 }, { "epoch": 2.764133191524176, "grad_norm": 0.10919832438230515, "learning_rate": 1.8712029588106862e-07, "loss": 0.0008, "step": 168930 }, { "epoch": 2.764296817475252, "grad_norm": 0.11833513528108597, "learning_rate": 1.8686239583035336e-07, "loss": 0.0007, "step": 168940 }, { "epoch": 2.7644604434263274, "grad_norm": 0.02775750495493412, "learning_rate": 1.8660467024430806e-07, "loss": 0.0005, "step": 168950 }, { "epoch": 2.7646240693774033, "grad_norm": 0.06226540356874466, "learning_rate": 1.8634711913227576e-07, "loss": 0.0007, "step": 168960 }, { "epoch": 2.7647876953284793, "grad_norm": 0.01621055230498314, "learning_rate": 1.8608974250359014e-07, "loss": 0.0007, "step": 168970 }, { "epoch": 2.764951321279555, "grad_norm": 0.051388658583164215, "learning_rate": 1.8583254036758204e-07, "loss": 0.0004, "step": 168980 }, { "epoch": 2.765114947230631, "grad_norm": 0.1612025797367096, "learning_rate": 1.8557551273357344e-07, "loss": 0.0013, "step": 168990 }, { "epoch": 2.765278573181707, "grad_norm": 0.10645221173763275, "learning_rate": 1.853186596108819e-07, "loss": 0.0009, "step": 169000 }, { "epoch": 2.7654421991327824, "grad_norm": 0.0673551931977272, "learning_rate": 1.8506198100881666e-07, "loss": 0.0017, "step": 169010 }, { "epoch": 2.7656058250838584, "grad_norm": 0.09177505224943161, "learning_rate": 1.848054769366825e-07, "loss": 0.0005, "step": 169020 }, { "epoch": 2.765769451034934, "grad_norm": 0.04661207273602486, "learning_rate": 1.8454914740377695e-07, "loss": 0.0005, "step": 169030 }, { "epoch": 2.76593307698601, "grad_norm": 0.07712335139513016, "learning_rate": 1.8429299241939036e-07, "loss": 0.0012, "step": 169040 }, { "epoch": 2.766096702937086, "grad_norm": 0.06612737476825714, "learning_rate": 1.8403701199280976e-07, "loss": 0.0008, "step": 169050 }, { "epoch": 2.7662603288881615, "grad_norm": 0.06156130135059357, "learning_rate": 1.8378120613331108e-07, "loss": 0.0006, "step": 169060 }, { "epoch": 2.7664239548392375, "grad_norm": 0.05858384072780609, "learning_rate": 1.8352557485016963e-07, "loss": 0.0003, "step": 169070 }, { "epoch": 2.766587580790313, "grad_norm": 0.09774133563041687, "learning_rate": 1.8327011815264916e-07, "loss": 0.0007, "step": 169080 }, { "epoch": 2.766751206741389, "grad_norm": 0.12427332997322083, "learning_rate": 1.830148360500106e-07, "loss": 0.0011, "step": 169090 }, { "epoch": 2.766914832692465, "grad_norm": 0.13921351730823517, "learning_rate": 1.8275972855150648e-07, "loss": 0.0007, "step": 169100 }, { "epoch": 2.7670784586435406, "grad_norm": 0.029401782900094986, "learning_rate": 1.8250479566638446e-07, "loss": 0.001, "step": 169110 }, { "epoch": 2.7672420845946166, "grad_norm": 0.004431414883583784, "learning_rate": 1.8225003740388546e-07, "loss": 0.001, "step": 169120 }, { "epoch": 2.7674057105456926, "grad_norm": 0.0358114130795002, "learning_rate": 1.8199545377324323e-07, "loss": 0.0004, "step": 169130 }, { "epoch": 2.767569336496768, "grad_norm": 0.07633160054683685, "learning_rate": 1.8174104478368593e-07, "loss": 0.0005, "step": 169140 }, { "epoch": 2.767732962447844, "grad_norm": 0.06541363149881363, "learning_rate": 1.8148681044443505e-07, "loss": 0.0048, "step": 169150 }, { "epoch": 2.76789658839892, "grad_norm": 0.13870662450790405, "learning_rate": 1.812327507647066e-07, "loss": 0.0009, "step": 169160 }, { "epoch": 2.7680602143499957, "grad_norm": 0.16139960289001465, "learning_rate": 1.8097886575370983e-07, "loss": 0.0014, "step": 169170 }, { "epoch": 2.7682238403010717, "grad_norm": 0.18176931142807007, "learning_rate": 1.8072515542064573e-07, "loss": 0.001, "step": 169180 }, { "epoch": 2.7683874662521477, "grad_norm": 0.07228479534387589, "learning_rate": 1.8047161977471306e-07, "loss": 0.0003, "step": 169190 }, { "epoch": 2.7685510922032233, "grad_norm": 0.01882397197186947, "learning_rate": 1.8021825882510002e-07, "loss": 0.0006, "step": 169200 }, { "epoch": 2.7687147181542993, "grad_norm": 0.05472211912274361, "learning_rate": 1.7996507258099206e-07, "loss": 0.0009, "step": 169210 }, { "epoch": 2.7688783441053753, "grad_norm": 0.06442705541849136, "learning_rate": 1.7971206105156458e-07, "loss": 0.0005, "step": 169220 }, { "epoch": 2.769041970056451, "grad_norm": 0.12161727994680405, "learning_rate": 1.7945922424599026e-07, "loss": 0.0009, "step": 169230 }, { "epoch": 2.769205596007527, "grad_norm": 0.10433565080165863, "learning_rate": 1.792065621734329e-07, "loss": 0.0006, "step": 169240 }, { "epoch": 2.769369221958603, "grad_norm": 0.02848692052066326, "learning_rate": 1.7895407484305126e-07, "loss": 0.0008, "step": 169250 }, { "epoch": 2.7695328479096784, "grad_norm": 0.06888028979301453, "learning_rate": 1.787017622639975e-07, "loss": 0.0012, "step": 169260 }, { "epoch": 2.7696964738607543, "grad_norm": 0.06015644967556, "learning_rate": 1.7844962444541646e-07, "loss": 0.0005, "step": 169270 }, { "epoch": 2.7698600998118303, "grad_norm": 0.04998675361275673, "learning_rate": 1.7819766139644922e-07, "loss": 0.0013, "step": 169280 }, { "epoch": 2.770023725762906, "grad_norm": 0.032340746372938156, "learning_rate": 1.779458731262268e-07, "loss": 0.0007, "step": 169290 }, { "epoch": 2.770187351713982, "grad_norm": 0.1105969026684761, "learning_rate": 1.77694259643878e-07, "loss": 0.0004, "step": 169300 }, { "epoch": 2.770350977665058, "grad_norm": 0.004225532524287701, "learning_rate": 1.7744282095852106e-07, "loss": 0.0006, "step": 169310 }, { "epoch": 2.7705146036161334, "grad_norm": 0.04925353825092316, "learning_rate": 1.7719155707927148e-07, "loss": 0.0004, "step": 169320 }, { "epoch": 2.7706782295672094, "grad_norm": 0.04826878756284714, "learning_rate": 1.7694046801523812e-07, "loss": 0.0006, "step": 169330 }, { "epoch": 2.7708418555182854, "grad_norm": 0.029839392751455307, "learning_rate": 1.7668955377551922e-07, "loss": 0.0004, "step": 169340 }, { "epoch": 2.771005481469361, "grad_norm": 0.01830497942864895, "learning_rate": 1.764388143692125e-07, "loss": 0.0008, "step": 169350 }, { "epoch": 2.771169107420437, "grad_norm": 0.0425875186920166, "learning_rate": 1.761882498054046e-07, "loss": 0.0005, "step": 169360 }, { "epoch": 2.771332733371513, "grad_norm": 0.005086121615022421, "learning_rate": 1.7593786009317992e-07, "loss": 0.0014, "step": 169370 }, { "epoch": 2.7714963593225885, "grad_norm": 0.01563134975731373, "learning_rate": 1.756876452416123e-07, "loss": 0.0004, "step": 169380 }, { "epoch": 2.7716599852736645, "grad_norm": 0.003388570621609688, "learning_rate": 1.754376052597734e-07, "loss": 0.0009, "step": 169390 }, { "epoch": 2.77182361122474, "grad_norm": 0.03311295062303543, "learning_rate": 1.751877401567259e-07, "loss": 0.0009, "step": 169400 }, { "epoch": 2.771987237175816, "grad_norm": 0.02959679253399372, "learning_rate": 1.7493804994152598e-07, "loss": 0.0005, "step": 169410 }, { "epoch": 2.772150863126892, "grad_norm": 0.563259482383728, "learning_rate": 1.7468853462322576e-07, "loss": 0.001, "step": 169420 }, { "epoch": 2.7723144890779676, "grad_norm": 0.02752726711332798, "learning_rate": 1.7443919421086865e-07, "loss": 0.0008, "step": 169430 }, { "epoch": 2.7724781150290436, "grad_norm": 0.004192272666841745, "learning_rate": 1.7419002871349288e-07, "loss": 0.0002, "step": 169440 }, { "epoch": 2.772641740980119, "grad_norm": 0.10158471018075943, "learning_rate": 1.7394103814013018e-07, "loss": 0.0007, "step": 169450 }, { "epoch": 2.772805366931195, "grad_norm": 0.043052881956100464, "learning_rate": 1.73692222499805e-07, "loss": 0.0012, "step": 169460 }, { "epoch": 2.772968992882271, "grad_norm": 0.02363894321024418, "learning_rate": 1.7344358180153787e-07, "loss": 0.0005, "step": 169470 }, { "epoch": 2.7731326188333467, "grad_norm": 0.062009889632463455, "learning_rate": 1.7319511605433992e-07, "loss": 0.0005, "step": 169480 }, { "epoch": 2.7732962447844227, "grad_norm": 0.0013674815418198705, "learning_rate": 1.729468252672184e-07, "loss": 0.0006, "step": 169490 }, { "epoch": 2.7734598707354987, "grad_norm": 0.0437176488339901, "learning_rate": 1.7269870944917276e-07, "loss": 0.0009, "step": 169500 }, { "epoch": 2.7736234966865743, "grad_norm": 0.02126944065093994, "learning_rate": 1.7245076860919697e-07, "loss": 0.0005, "step": 169510 }, { "epoch": 2.7737871226376503, "grad_norm": 0.0443657822906971, "learning_rate": 1.7220300275627766e-07, "loss": 0.0004, "step": 169520 }, { "epoch": 2.7739507485887263, "grad_norm": 0.04853919520974159, "learning_rate": 1.7195541189939712e-07, "loss": 0.0011, "step": 169530 }, { "epoch": 2.774114374539802, "grad_norm": 0.1551184505224228, "learning_rate": 1.7170799604752874e-07, "loss": 0.0007, "step": 169540 }, { "epoch": 2.774278000490878, "grad_norm": 0.07881522923707962, "learning_rate": 1.7146075520964035e-07, "loss": 0.0004, "step": 169550 }, { "epoch": 2.774441626441954, "grad_norm": 0.08043744415044785, "learning_rate": 1.712136893946953e-07, "loss": 0.0012, "step": 169560 }, { "epoch": 2.7746052523930294, "grad_norm": 0.08677130937576294, "learning_rate": 1.7096679861164755e-07, "loss": 0.0006, "step": 169570 }, { "epoch": 2.7747688783441053, "grad_norm": 0.1499408483505249, "learning_rate": 1.7072008286944776e-07, "loss": 0.0004, "step": 169580 }, { "epoch": 2.7749325042951813, "grad_norm": 0.05759814754128456, "learning_rate": 1.704735421770376e-07, "loss": 0.0022, "step": 169590 }, { "epoch": 2.775096130246257, "grad_norm": 0.029853580519557, "learning_rate": 1.7022717654335386e-07, "loss": 0.0007, "step": 169600 }, { "epoch": 2.775259756197333, "grad_norm": 0.014423384331166744, "learning_rate": 1.699809859773277e-07, "loss": 0.0013, "step": 169610 }, { "epoch": 2.775423382148409, "grad_norm": 0.14842648804187775, "learning_rate": 1.6973497048788145e-07, "loss": 0.0002, "step": 169620 }, { "epoch": 2.7755870080994844, "grad_norm": 0.008933762088418007, "learning_rate": 1.694891300839341e-07, "loss": 0.0005, "step": 169630 }, { "epoch": 2.7757506340505604, "grad_norm": 0.16842113435268402, "learning_rate": 1.6924346477439513e-07, "loss": 0.0021, "step": 169640 }, { "epoch": 2.7759142600016364, "grad_norm": 0.05917133018374443, "learning_rate": 1.689979745681708e-07, "loss": 0.0004, "step": 169650 }, { "epoch": 2.776077885952712, "grad_norm": 0.05606184899806976, "learning_rate": 1.687526594741584e-07, "loss": 0.0007, "step": 169660 }, { "epoch": 2.776241511903788, "grad_norm": 0.09376686066389084, "learning_rate": 1.6850751950125032e-07, "loss": 0.0005, "step": 169670 }, { "epoch": 2.776405137854864, "grad_norm": 0.04740943759679794, "learning_rate": 1.682625546583333e-07, "loss": 0.0006, "step": 169680 }, { "epoch": 2.7765687638059395, "grad_norm": 0.09572571516036987, "learning_rate": 1.680177649542847e-07, "loss": 0.0002, "step": 169690 }, { "epoch": 2.7767323897570155, "grad_norm": 0.060865387320518494, "learning_rate": 1.6777315039798014e-07, "loss": 0.0005, "step": 169700 }, { "epoch": 2.7768960157080915, "grad_norm": 0.012650974094867706, "learning_rate": 1.6752871099828373e-07, "loss": 0.0005, "step": 169710 }, { "epoch": 2.777059641659167, "grad_norm": 0.12507717311382294, "learning_rate": 1.6728444676405774e-07, "loss": 0.0006, "step": 169720 }, { "epoch": 2.777223267610243, "grad_norm": 0.10915827751159668, "learning_rate": 1.670403577041557e-07, "loss": 0.0015, "step": 169730 }, { "epoch": 2.777386893561319, "grad_norm": 0.16892381012439728, "learning_rate": 1.6679644382742387e-07, "loss": 0.0008, "step": 169740 }, { "epoch": 2.7775505195123946, "grad_norm": 0.03212336078286171, "learning_rate": 1.6655270514270571e-07, "loss": 0.0004, "step": 169750 }, { "epoch": 2.7777141454634706, "grad_norm": 0.0148609085008502, "learning_rate": 1.6630914165883416e-07, "loss": 0.0006, "step": 169760 }, { "epoch": 2.7778777714145466, "grad_norm": 0.04622737318277359, "learning_rate": 1.6606575338463991e-07, "loss": 0.0008, "step": 169770 }, { "epoch": 2.778041397365622, "grad_norm": 0.017943989485502243, "learning_rate": 1.6582254032894264e-07, "loss": 0.0016, "step": 169780 }, { "epoch": 2.778205023316698, "grad_norm": 0.008822825737297535, "learning_rate": 1.6557950250056076e-07, "loss": 0.0012, "step": 169790 }, { "epoch": 2.7783686492677737, "grad_norm": 0.10040675848722458, "learning_rate": 1.653366399083023e-07, "loss": 0.0005, "step": 169800 }, { "epoch": 2.7785322752188497, "grad_norm": 0.0025606669951230288, "learning_rate": 1.6509395256097128e-07, "loss": 0.0007, "step": 169810 }, { "epoch": 2.7786959011699257, "grad_norm": 0.015258150175213814, "learning_rate": 1.648514404673629e-07, "loss": 0.0006, "step": 169820 }, { "epoch": 2.7788595271210013, "grad_norm": 0.13851045072078705, "learning_rate": 1.6460910363627015e-07, "loss": 0.0007, "step": 169830 }, { "epoch": 2.7790231530720773, "grad_norm": 0.04888838529586792, "learning_rate": 1.643669420764754e-07, "loss": 0.0005, "step": 169840 }, { "epoch": 2.779186779023153, "grad_norm": 0.07551861554384232, "learning_rate": 1.641249557967567e-07, "loss": 0.0006, "step": 169850 }, { "epoch": 2.779350404974229, "grad_norm": 0.06484108418226242, "learning_rate": 1.6388314480588584e-07, "loss": 0.0012, "step": 169860 }, { "epoch": 2.779514030925305, "grad_norm": 0.016625039279460907, "learning_rate": 1.63641509112627e-07, "loss": 0.0008, "step": 169870 }, { "epoch": 2.7796776568763804, "grad_norm": 0.05826619267463684, "learning_rate": 1.6340004872573977e-07, "loss": 0.0008, "step": 169880 }, { "epoch": 2.7798412828274564, "grad_norm": 0.021110009402036667, "learning_rate": 1.631587636539772e-07, "loss": 0.0005, "step": 169890 }, { "epoch": 2.7800049087785323, "grad_norm": 0.02184729464352131, "learning_rate": 1.629176539060834e-07, "loss": 0.0006, "step": 169900 }, { "epoch": 2.780168534729608, "grad_norm": 0.04591450095176697, "learning_rate": 1.6267671949079967e-07, "loss": 0.0003, "step": 169910 }, { "epoch": 2.780332160680684, "grad_norm": 0.06512957066297531, "learning_rate": 1.624359604168585e-07, "loss": 0.0005, "step": 169920 }, { "epoch": 2.78049578663176, "grad_norm": 0.021881897002458572, "learning_rate": 1.621953766929868e-07, "loss": 0.0009, "step": 169930 }, { "epoch": 2.7806594125828354, "grad_norm": 0.024408766999840736, "learning_rate": 1.6195496832790592e-07, "loss": 0.0004, "step": 169940 }, { "epoch": 2.7808230385339114, "grad_norm": 0.040458228439092636, "learning_rate": 1.6171473533032888e-07, "loss": 0.0003, "step": 169950 }, { "epoch": 2.7809866644849874, "grad_norm": 0.12420674413442612, "learning_rate": 1.614746777089643e-07, "loss": 0.0003, "step": 169960 }, { "epoch": 2.781150290436063, "grad_norm": 0.06038803979754448, "learning_rate": 1.6123479547251352e-07, "loss": 0.0005, "step": 169970 }, { "epoch": 2.781313916387139, "grad_norm": 0.0764453336596489, "learning_rate": 1.609950886296724e-07, "loss": 0.0006, "step": 169980 }, { "epoch": 2.781477542338215, "grad_norm": 0.0035014047753065825, "learning_rate": 1.6075555718912784e-07, "loss": 0.0006, "step": 169990 }, { "epoch": 2.7816411682892905, "grad_norm": 0.03599059581756592, "learning_rate": 1.6051620115956456e-07, "loss": 0.0005, "step": 170000 }, { "epoch": 2.7818047942403665, "grad_norm": 0.06846936792135239, "learning_rate": 1.6027702054965677e-07, "loss": 0.0024, "step": 170010 }, { "epoch": 2.7819684201914425, "grad_norm": 0.04152410849928856, "learning_rate": 1.6003801536807583e-07, "loss": 0.0007, "step": 170020 }, { "epoch": 2.782132046142518, "grad_norm": 0.04233233258128166, "learning_rate": 1.597991856234843e-07, "loss": 0.0013, "step": 170030 }, { "epoch": 2.782295672093594, "grad_norm": 0.05166567116975784, "learning_rate": 1.5956053132453853e-07, "loss": 0.0004, "step": 170040 }, { "epoch": 2.78245929804467, "grad_norm": 0.23059377074241638, "learning_rate": 1.5932205247989052e-07, "loss": 0.0004, "step": 170050 }, { "epoch": 2.7826229239957456, "grad_norm": 0.0020703545305877924, "learning_rate": 1.590837490981828e-07, "loss": 0.0004, "step": 170060 }, { "epoch": 2.7827865499468216, "grad_norm": 0.08792093396186829, "learning_rate": 1.5884562118805512e-07, "loss": 0.0002, "step": 170070 }, { "epoch": 2.7829501758978976, "grad_norm": 0.12561355531215668, "learning_rate": 1.5860766875813782e-07, "loss": 0.0013, "step": 170080 }, { "epoch": 2.783113801848973, "grad_norm": 0.05214882269501686, "learning_rate": 1.5836989181705675e-07, "loss": 0.0006, "step": 170090 }, { "epoch": 2.783277427800049, "grad_norm": 0.047570452094078064, "learning_rate": 1.581322903734306e-07, "loss": 0.0014, "step": 170100 }, { "epoch": 2.783441053751125, "grad_norm": 0.06508895754814148, "learning_rate": 1.578948644358719e-07, "loss": 0.0005, "step": 170110 }, { "epoch": 2.7836046797022007, "grad_norm": 0.09429115802049637, "learning_rate": 1.5765761401298652e-07, "loss": 0.0012, "step": 170120 }, { "epoch": 2.7837683056532767, "grad_norm": 0.08601327240467072, "learning_rate": 1.574205391133743e-07, "loss": 0.0014, "step": 170130 }, { "epoch": 2.7839319316043527, "grad_norm": 0.12766635417938232, "learning_rate": 1.5718363974562945e-07, "loss": 0.0005, "step": 170140 }, { "epoch": 2.7840955575554283, "grad_norm": 0.1010679379105568, "learning_rate": 1.5694691591833734e-07, "loss": 0.0003, "step": 170150 }, { "epoch": 2.7842591835065043, "grad_norm": 0.018694084137678146, "learning_rate": 1.5671036764007997e-07, "loss": 0.001, "step": 170160 }, { "epoch": 2.78442280945758, "grad_norm": 0.014573877677321434, "learning_rate": 1.5647399491943106e-07, "loss": 0.0004, "step": 170170 }, { "epoch": 2.784586435408656, "grad_norm": 0.00832452904433012, "learning_rate": 1.5623779776495874e-07, "loss": 0.0004, "step": 170180 }, { "epoch": 2.784750061359732, "grad_norm": 0.06192980334162712, "learning_rate": 1.5600177618522504e-07, "loss": 0.0011, "step": 170190 }, { "epoch": 2.7849136873108074, "grad_norm": 0.034070562571287155, "learning_rate": 1.5576593018878372e-07, "loss": 0.0008, "step": 170200 }, { "epoch": 2.7850773132618833, "grad_norm": 0.030460629612207413, "learning_rate": 1.5553025978418567e-07, "loss": 0.0006, "step": 170210 }, { "epoch": 2.785240939212959, "grad_norm": 0.01990927755832672, "learning_rate": 1.552947649799713e-07, "loss": 0.0008, "step": 170220 }, { "epoch": 2.785404565164035, "grad_norm": 0.014857985079288483, "learning_rate": 1.550594457846788e-07, "loss": 0.0005, "step": 170230 }, { "epoch": 2.785568191115111, "grad_norm": 0.013553816825151443, "learning_rate": 1.548243022068363e-07, "loss": 0.0009, "step": 170240 }, { "epoch": 2.7857318170661864, "grad_norm": 0.10838443785905838, "learning_rate": 1.545893342549676e-07, "loss": 0.0007, "step": 170250 }, { "epoch": 2.7858954430172624, "grad_norm": 0.01778852567076683, "learning_rate": 1.543545419375908e-07, "loss": 0.0009, "step": 170260 }, { "epoch": 2.7860590689683384, "grad_norm": 0.10566195100545883, "learning_rate": 1.5411992526321472e-07, "loss": 0.0007, "step": 170270 }, { "epoch": 2.786222694919414, "grad_norm": 0.034300994127988815, "learning_rate": 1.5388548424034476e-07, "loss": 0.0008, "step": 170280 }, { "epoch": 2.78638632087049, "grad_norm": 0.004707236774265766, "learning_rate": 1.5365121887747857e-07, "loss": 0.0007, "step": 170290 }, { "epoch": 2.786549946821566, "grad_norm": 0.012610087171196938, "learning_rate": 1.5341712918310824e-07, "loss": 0.0004, "step": 170300 }, { "epoch": 2.7867135727726415, "grad_norm": 0.07268116623163223, "learning_rate": 1.531832151657181e-07, "loss": 0.0016, "step": 170310 }, { "epoch": 2.7868771987237175, "grad_norm": 0.013467668555676937, "learning_rate": 1.5294947683378748e-07, "loss": 0.0009, "step": 170320 }, { "epoch": 2.7870408246747935, "grad_norm": 0.09897198528051376, "learning_rate": 1.5271591419578903e-07, "loss": 0.0006, "step": 170330 }, { "epoch": 2.787204450625869, "grad_norm": 0.16411009430885315, "learning_rate": 1.5248252726018876e-07, "loss": 0.0008, "step": 170340 }, { "epoch": 2.787368076576945, "grad_norm": 0.026736533269286156, "learning_rate": 1.52249316035446e-07, "loss": 0.001, "step": 170350 }, { "epoch": 2.787531702528021, "grad_norm": 0.042290136218070984, "learning_rate": 1.520162805300135e-07, "loss": 0.0006, "step": 170360 }, { "epoch": 2.7876953284790966, "grad_norm": 0.02710956707596779, "learning_rate": 1.5178342075233942e-07, "loss": 0.0015, "step": 170370 }, { "epoch": 2.7878589544301726, "grad_norm": 0.36796876788139343, "learning_rate": 1.5155073671086427e-07, "loss": 0.0013, "step": 170380 }, { "epoch": 2.7880225803812486, "grad_norm": 0.17700661718845367, "learning_rate": 1.5131822841402133e-07, "loss": 0.0008, "step": 170390 }, { "epoch": 2.788186206332324, "grad_norm": 0.17054308950901031, "learning_rate": 1.5108589587023993e-07, "loss": 0.0009, "step": 170400 }, { "epoch": 2.7883498322834, "grad_norm": 0.03665393590927124, "learning_rate": 1.5085373908793998e-07, "loss": 0.0004, "step": 170410 }, { "epoch": 2.788513458234476, "grad_norm": 0.06940967589616776, "learning_rate": 1.5062175807553814e-07, "loss": 0.0004, "step": 170420 }, { "epoch": 2.7886770841855517, "grad_norm": 0.02967650257050991, "learning_rate": 1.5038995284144208e-07, "loss": 0.0005, "step": 170430 }, { "epoch": 2.7888407101366277, "grad_norm": 0.12153536826372147, "learning_rate": 1.5015832339405457e-07, "loss": 0.0008, "step": 170440 }, { "epoch": 2.7890043360877037, "grad_norm": 0.03611701726913452, "learning_rate": 1.4992686974177162e-07, "loss": 0.0004, "step": 170450 }, { "epoch": 2.7891679620387793, "grad_norm": 0.07035545259714127, "learning_rate": 1.4969559189298265e-07, "loss": 0.0005, "step": 170460 }, { "epoch": 2.7893315879898553, "grad_norm": 0.1179262101650238, "learning_rate": 1.49464489856071e-07, "loss": 0.0009, "step": 170470 }, { "epoch": 2.7894952139409313, "grad_norm": 0.07383953034877777, "learning_rate": 1.4923356363941376e-07, "loss": 0.0004, "step": 170480 }, { "epoch": 2.789658839892007, "grad_norm": 0.03154398128390312, "learning_rate": 1.49002813251381e-07, "loss": 0.0006, "step": 170490 }, { "epoch": 2.789822465843083, "grad_norm": 0.07447236031293869, "learning_rate": 1.4877223870033763e-07, "loss": 0.0006, "step": 170500 }, { "epoch": 2.789986091794159, "grad_norm": 0.00967530906200409, "learning_rate": 1.4854183999464088e-07, "loss": 0.0006, "step": 170510 }, { "epoch": 2.7901497177452343, "grad_norm": 0.02058189921081066, "learning_rate": 1.4831161714264186e-07, "loss": 0.0009, "step": 170520 }, { "epoch": 2.7903133436963103, "grad_norm": 0.006499041803181171, "learning_rate": 1.480815701526861e-07, "loss": 0.0011, "step": 170530 }, { "epoch": 2.7904769696473863, "grad_norm": 0.03343840315937996, "learning_rate": 1.4785169903311302e-07, "loss": 0.0014, "step": 170540 }, { "epoch": 2.790640595598462, "grad_norm": 0.05925329402089119, "learning_rate": 1.4762200379225322e-07, "loss": 0.0005, "step": 170550 }, { "epoch": 2.790804221549538, "grad_norm": 0.09993096441030502, "learning_rate": 1.473924844384339e-07, "loss": 0.0014, "step": 170560 }, { "epoch": 2.7909678475006134, "grad_norm": 0.039635904133319855, "learning_rate": 1.4716314097997343e-07, "loss": 0.0017, "step": 170570 }, { "epoch": 2.7911314734516894, "grad_norm": 0.15017932653427124, "learning_rate": 1.4693397342518623e-07, "loss": 0.0008, "step": 170580 }, { "epoch": 2.7912950994027654, "grad_norm": 0.16104936599731445, "learning_rate": 1.4670498178237736e-07, "loss": 0.0006, "step": 170590 }, { "epoch": 2.791458725353841, "grad_norm": 0.0601748451590538, "learning_rate": 1.4647616605984903e-07, "loss": 0.0004, "step": 170600 }, { "epoch": 2.791622351304917, "grad_norm": 0.0385313406586647, "learning_rate": 1.4624752626589466e-07, "loss": 0.0005, "step": 170610 }, { "epoch": 2.7917859772559925, "grad_norm": 0.00401941267773509, "learning_rate": 1.460190624088015e-07, "loss": 0.0003, "step": 170620 }, { "epoch": 2.7919496032070685, "grad_norm": 0.019108308479189873, "learning_rate": 1.4579077449685175e-07, "loss": 0.0006, "step": 170630 }, { "epoch": 2.7921132291581445, "grad_norm": 0.014254197478294373, "learning_rate": 1.455626625383194e-07, "loss": 0.0014, "step": 170640 }, { "epoch": 2.79227685510922, "grad_norm": 0.06113189831376076, "learning_rate": 1.4533472654147284e-07, "loss": 0.0007, "step": 170650 }, { "epoch": 2.792440481060296, "grad_norm": 0.10673297196626663, "learning_rate": 1.451069665145749e-07, "loss": 0.0012, "step": 170660 }, { "epoch": 2.792604107011372, "grad_norm": 0.10668063908815384, "learning_rate": 1.4487938246588063e-07, "loss": 0.0004, "step": 170670 }, { "epoch": 2.7927677329624476, "grad_norm": 0.15307560563087463, "learning_rate": 1.446519744036401e-07, "loss": 0.0008, "step": 170680 }, { "epoch": 2.7929313589135236, "grad_norm": 0.06924367696046829, "learning_rate": 1.444247423360956e-07, "loss": 0.0008, "step": 170690 }, { "epoch": 2.7930949848645996, "grad_norm": 0.033333614468574524, "learning_rate": 1.4419768627148444e-07, "loss": 0.0007, "step": 170700 }, { "epoch": 2.793258610815675, "grad_norm": 0.07976774126291275, "learning_rate": 1.4397080621803615e-07, "loss": 0.0006, "step": 170710 }, { "epoch": 2.793422236766751, "grad_norm": 0.025536082684993744, "learning_rate": 1.4374410218397528e-07, "loss": 0.0009, "step": 170720 }, { "epoch": 2.793585862717827, "grad_norm": 0.07201147824525833, "learning_rate": 1.435175741775191e-07, "loss": 0.0003, "step": 170730 }, { "epoch": 2.7937494886689027, "grad_norm": 0.045723602175712585, "learning_rate": 1.4329122220687884e-07, "loss": 0.0008, "step": 170740 }, { "epoch": 2.7939131146199787, "grad_norm": 0.01806153543293476, "learning_rate": 1.4306504628025853e-07, "loss": 0.0006, "step": 170750 }, { "epoch": 2.7940767405710547, "grad_norm": 0.09223387390375137, "learning_rate": 1.4283904640585711e-07, "loss": 0.0013, "step": 170760 }, { "epoch": 2.7942403665221303, "grad_norm": 0.018166862428188324, "learning_rate": 1.4261322259186638e-07, "loss": 0.0006, "step": 170770 }, { "epoch": 2.7944039924732063, "grad_norm": 0.13295575976371765, "learning_rate": 1.4238757484647148e-07, "loss": 0.0007, "step": 170780 }, { "epoch": 2.7945676184242823, "grad_norm": 0.022350525483489037, "learning_rate": 1.4216210317785307e-07, "loss": 0.0007, "step": 170790 }, { "epoch": 2.794731244375358, "grad_norm": 0.022321805357933044, "learning_rate": 1.4193680759418183e-07, "loss": 0.0006, "step": 170800 }, { "epoch": 2.794894870326434, "grad_norm": 0.02440972998738289, "learning_rate": 1.417116881036251e-07, "loss": 0.0007, "step": 170810 }, { "epoch": 2.79505849627751, "grad_norm": 0.02165321446955204, "learning_rate": 1.414867447143442e-07, "loss": 0.0004, "step": 170820 }, { "epoch": 2.7952221222285853, "grad_norm": 0.0063326614908874035, "learning_rate": 1.4126197743449088e-07, "loss": 0.0008, "step": 170830 }, { "epoch": 2.7953857481796613, "grad_norm": 0.07477544993162155, "learning_rate": 1.410373862722142e-07, "loss": 0.0006, "step": 170840 }, { "epoch": 2.7955493741307373, "grad_norm": 0.09436183422803879, "learning_rate": 1.4081297123565317e-07, "loss": 0.0009, "step": 170850 }, { "epoch": 2.795713000081813, "grad_norm": 0.02822510525584221, "learning_rate": 1.4058873233294355e-07, "loss": 0.0011, "step": 170860 }, { "epoch": 2.795876626032889, "grad_norm": 0.04970188066363335, "learning_rate": 1.4036466957221273e-07, "loss": 0.0007, "step": 170870 }, { "epoch": 2.796040251983965, "grad_norm": 0.06209738925099373, "learning_rate": 1.4014078296158252e-07, "loss": 0.0007, "step": 170880 }, { "epoch": 2.7962038779350404, "grad_norm": 0.14559872448444366, "learning_rate": 1.399170725091692e-07, "loss": 0.0007, "step": 170890 }, { "epoch": 2.7963675038861164, "grad_norm": 0.07818220555782318, "learning_rate": 1.3969353822308074e-07, "loss": 0.0009, "step": 170900 }, { "epoch": 2.7965311298371924, "grad_norm": 0.07494895905256271, "learning_rate": 1.3947018011142065e-07, "loss": 0.0009, "step": 170910 }, { "epoch": 2.796694755788268, "grad_norm": 0.02316666580736637, "learning_rate": 1.392469981822836e-07, "loss": 0.0007, "step": 170920 }, { "epoch": 2.796858381739344, "grad_norm": 0.002424146980047226, "learning_rate": 1.3902399244376142e-07, "loss": 0.0004, "step": 170930 }, { "epoch": 2.79702200769042, "grad_norm": 0.022954905405640602, "learning_rate": 1.3880116290393596e-07, "loss": 0.0003, "step": 170940 }, { "epoch": 2.7971856336414955, "grad_norm": 0.17274229228496552, "learning_rate": 1.385785095708847e-07, "loss": 0.0007, "step": 170950 }, { "epoch": 2.7973492595925715, "grad_norm": 0.021951640024781227, "learning_rate": 1.3835603245267893e-07, "loss": 0.0007, "step": 170960 }, { "epoch": 2.797512885543647, "grad_norm": 0.011379717849195004, "learning_rate": 1.3813373155738107e-07, "loss": 0.0013, "step": 170970 }, { "epoch": 2.797676511494723, "grad_norm": 0.09212465584278107, "learning_rate": 1.379116068930514e-07, "loss": 0.0007, "step": 170980 }, { "epoch": 2.797840137445799, "grad_norm": 0.07407135516405106, "learning_rate": 1.3768965846773952e-07, "loss": 0.0011, "step": 170990 }, { "epoch": 2.7980037633968746, "grad_norm": 0.007346815429627895, "learning_rate": 1.374678862894918e-07, "loss": 0.0004, "step": 171000 }, { "epoch": 2.7981673893479506, "grad_norm": 0.02472073957324028, "learning_rate": 1.3724629036634628e-07, "loss": 0.001, "step": 171010 }, { "epoch": 2.798331015299026, "grad_norm": 0.07992814481258392, "learning_rate": 1.370248707063354e-07, "loss": 0.001, "step": 171020 }, { "epoch": 2.798494641250102, "grad_norm": 0.0026793377473950386, "learning_rate": 1.3680362731748498e-07, "loss": 0.0005, "step": 171030 }, { "epoch": 2.798658267201178, "grad_norm": 0.008786062709987164, "learning_rate": 1.3658256020781468e-07, "loss": 0.0007, "step": 171040 }, { "epoch": 2.7988218931522537, "grad_norm": 0.03457338735461235, "learning_rate": 1.3636166938533814e-07, "loss": 0.0006, "step": 171050 }, { "epoch": 2.7989855191033297, "grad_norm": 0.02264559082686901, "learning_rate": 1.3614095485806112e-07, "loss": 0.0005, "step": 171060 }, { "epoch": 2.7991491450544057, "grad_norm": 0.09252757579088211, "learning_rate": 1.3592041663398504e-07, "loss": 0.0006, "step": 171070 }, { "epoch": 2.7993127710054813, "grad_norm": 0.05027490481734276, "learning_rate": 1.3570005472110238e-07, "loss": 0.0004, "step": 171080 }, { "epoch": 2.7994763969565573, "grad_norm": 0.10141106694936752, "learning_rate": 1.354798691274023e-07, "loss": 0.0007, "step": 171090 }, { "epoch": 2.7996400229076333, "grad_norm": 0.014897673390805721, "learning_rate": 1.352598598608651e-07, "loss": 0.0019, "step": 171100 }, { "epoch": 2.799803648858709, "grad_norm": 0.09295172244310379, "learning_rate": 1.3504002692946605e-07, "loss": 0.0011, "step": 171110 }, { "epoch": 2.799967274809785, "grad_norm": 0.22135546803474426, "learning_rate": 1.3482037034117378e-07, "loss": 0.0014, "step": 171120 }, { "epoch": 2.800130900760861, "grad_norm": 0.12304525077342987, "learning_rate": 1.3460089010394915e-07, "loss": 0.0007, "step": 171130 }, { "epoch": 2.8002945267119363, "grad_norm": 0.10921672731637955, "learning_rate": 1.343815862257497e-07, "loss": 0.001, "step": 171140 }, { "epoch": 2.8004581526630123, "grad_norm": 0.014127643778920174, "learning_rate": 1.3416245871452293e-07, "loss": 0.0005, "step": 171150 }, { "epoch": 2.8006217786140883, "grad_norm": 0.05219822749495506, "learning_rate": 1.3394350757821196e-07, "loss": 0.0006, "step": 171160 }, { "epoch": 2.800785404565164, "grad_norm": 0.12080032378435135, "learning_rate": 1.3372473282475428e-07, "loss": 0.0008, "step": 171170 }, { "epoch": 2.80094903051624, "grad_norm": 0.07122016698122025, "learning_rate": 1.3350613446207862e-07, "loss": 0.0005, "step": 171180 }, { "epoch": 2.801112656467316, "grad_norm": 0.08172375708818436, "learning_rate": 1.3328771249810967e-07, "loss": 0.0004, "step": 171190 }, { "epoch": 2.8012762824183914, "grad_norm": 0.04016002267599106, "learning_rate": 1.3306946694076395e-07, "loss": 0.0004, "step": 171200 }, { "epoch": 2.8014399083694674, "grad_norm": 0.12552621960639954, "learning_rate": 1.3285139779795285e-07, "loss": 0.0005, "step": 171210 }, { "epoch": 2.8016035343205434, "grad_norm": 0.054361145943403244, "learning_rate": 1.3263350507758065e-07, "loss": 0.0007, "step": 171220 }, { "epoch": 2.801767160271619, "grad_norm": 0.18638886511325836, "learning_rate": 1.3241578878754602e-07, "loss": 0.0009, "step": 171230 }, { "epoch": 2.801930786222695, "grad_norm": 0.06557103991508484, "learning_rate": 1.3219824893573985e-07, "loss": 0.0002, "step": 171240 }, { "epoch": 2.802094412173771, "grad_norm": 0.04394310712814331, "learning_rate": 1.3198088553004752e-07, "loss": 0.0007, "step": 171250 }, { "epoch": 2.8022580381248465, "grad_norm": 0.03255559504032135, "learning_rate": 1.3176369857834825e-07, "loss": 0.0004, "step": 171260 }, { "epoch": 2.8024216640759225, "grad_norm": 0.10194858908653259, "learning_rate": 1.315466880885141e-07, "loss": 0.0007, "step": 171270 }, { "epoch": 2.8025852900269985, "grad_norm": 0.015548547729849815, "learning_rate": 1.313298540684116e-07, "loss": 0.0007, "step": 171280 }, { "epoch": 2.802748915978074, "grad_norm": 0.2654331624507904, "learning_rate": 1.3111319652590048e-07, "loss": 0.0007, "step": 171290 }, { "epoch": 2.80291254192915, "grad_norm": 0.007964244112372398, "learning_rate": 1.3089671546883343e-07, "loss": 0.0007, "step": 171300 }, { "epoch": 2.803076167880226, "grad_norm": 0.037779804319143295, "learning_rate": 1.3068041090505858e-07, "loss": 0.0006, "step": 171310 }, { "epoch": 2.8032397938313016, "grad_norm": 0.05188390240073204, "learning_rate": 1.304642828424152e-07, "loss": 0.0005, "step": 171320 }, { "epoch": 2.8034034197823776, "grad_norm": 0.037394631654024124, "learning_rate": 1.3024833128873815e-07, "loss": 0.001, "step": 171330 }, { "epoch": 2.803567045733453, "grad_norm": 0.09160242974758148, "learning_rate": 1.3003255625185563e-07, "loss": 0.0007, "step": 171340 }, { "epoch": 2.803730671684529, "grad_norm": 0.08693581819534302, "learning_rate": 1.2981695773958746e-07, "loss": 0.0005, "step": 171350 }, { "epoch": 2.803894297635605, "grad_norm": 0.20058026909828186, "learning_rate": 1.2960153575974965e-07, "loss": 0.0006, "step": 171360 }, { "epoch": 2.8040579235866807, "grad_norm": 0.07765739411115646, "learning_rate": 1.2938629032014982e-07, "loss": 0.0006, "step": 171370 }, { "epoch": 2.8042215495377567, "grad_norm": 0.1717786341905594, "learning_rate": 1.2917122142859173e-07, "loss": 0.0006, "step": 171380 }, { "epoch": 2.8043851754888323, "grad_norm": 0.003905646735802293, "learning_rate": 1.289563290928697e-07, "loss": 0.0015, "step": 171390 }, { "epoch": 2.8045488014399083, "grad_norm": 0.027350837364792824, "learning_rate": 1.2874161332077363e-07, "loss": 0.0004, "step": 171400 }, { "epoch": 2.8047124273909843, "grad_norm": 0.07118779420852661, "learning_rate": 1.2852707412008614e-07, "loss": 0.0009, "step": 171410 }, { "epoch": 2.80487605334206, "grad_norm": 0.07049226760864258, "learning_rate": 1.2831271149858382e-07, "loss": 0.0006, "step": 171420 }, { "epoch": 2.805039679293136, "grad_norm": 0.0034819552674889565, "learning_rate": 1.280985254640371e-07, "loss": 0.0005, "step": 171430 }, { "epoch": 2.805203305244212, "grad_norm": 0.1797843724489212, "learning_rate": 1.2788451602420982e-07, "loss": 0.0013, "step": 171440 }, { "epoch": 2.8053669311952873, "grad_norm": 0.1690874993801117, "learning_rate": 1.2767068318685904e-07, "loss": 0.0008, "step": 171450 }, { "epoch": 2.8055305571463633, "grad_norm": 0.008725961670279503, "learning_rate": 1.2745702695973528e-07, "loss": 0.0003, "step": 171460 }, { "epoch": 2.8056941830974393, "grad_norm": 0.027935713529586792, "learning_rate": 1.272435473505834e-07, "loss": 0.0007, "step": 171470 }, { "epoch": 2.805857809048515, "grad_norm": 0.13406161963939667, "learning_rate": 1.2703024436714172e-07, "loss": 0.0006, "step": 171480 }, { "epoch": 2.806021434999591, "grad_norm": 0.11690158396959305, "learning_rate": 1.268171180171418e-07, "loss": 0.0004, "step": 171490 }, { "epoch": 2.806185060950667, "grad_norm": 0.0568169429898262, "learning_rate": 1.2660416830830857e-07, "loss": 0.0006, "step": 171500 }, { "epoch": 2.8063486869017424, "grad_norm": 0.025959106162190437, "learning_rate": 1.2639139524836196e-07, "loss": 0.0004, "step": 171510 }, { "epoch": 2.8065123128528184, "grad_norm": 0.14221347868442535, "learning_rate": 1.261787988450136e-07, "loss": 0.0008, "step": 171520 }, { "epoch": 2.8066759388038944, "grad_norm": 0.07157721370458603, "learning_rate": 1.2596637910596954e-07, "loss": 0.0004, "step": 171530 }, { "epoch": 2.80683956475497, "grad_norm": 0.09572681784629822, "learning_rate": 1.2575413603893082e-07, "loss": 0.0003, "step": 171540 }, { "epoch": 2.807003190706046, "grad_norm": 0.0512634739279747, "learning_rate": 1.2554206965158855e-07, "loss": 0.0006, "step": 171550 }, { "epoch": 2.807166816657122, "grad_norm": 0.15759795904159546, "learning_rate": 1.2533017995163155e-07, "loss": 0.0006, "step": 171560 }, { "epoch": 2.8073304426081975, "grad_norm": 0.013139360584318638, "learning_rate": 1.2511846694673923e-07, "loss": 0.0007, "step": 171570 }, { "epoch": 2.8074940685592735, "grad_norm": 0.04685650393366814, "learning_rate": 1.2490693064458604e-07, "loss": 0.0008, "step": 171580 }, { "epoch": 2.8076576945103495, "grad_norm": 0.31251591444015503, "learning_rate": 1.246955710528397e-07, "loss": 0.0009, "step": 171590 }, { "epoch": 2.807821320461425, "grad_norm": 0.07122431695461273, "learning_rate": 1.244843881791613e-07, "loss": 0.0003, "step": 171600 }, { "epoch": 2.807984946412501, "grad_norm": 0.03874616324901581, "learning_rate": 1.2427338203120641e-07, "loss": 0.0009, "step": 171610 }, { "epoch": 2.808148572363577, "grad_norm": 0.09619405120611191, "learning_rate": 1.240625526166217e-07, "loss": 0.0006, "step": 171620 }, { "epoch": 2.8083121983146526, "grad_norm": 0.03502504900097847, "learning_rate": 1.238518999430516e-07, "loss": 0.0005, "step": 171630 }, { "epoch": 2.8084758242657286, "grad_norm": 0.015436643734574318, "learning_rate": 1.2364142401813052e-07, "loss": 0.001, "step": 171640 }, { "epoch": 2.8086394502168046, "grad_norm": 0.015409058891236782, "learning_rate": 1.2343112484948737e-07, "loss": 0.0008, "step": 171650 }, { "epoch": 2.80880307616788, "grad_norm": 0.37831759452819824, "learning_rate": 1.2322100244474555e-07, "loss": 0.0012, "step": 171660 }, { "epoch": 2.808966702118956, "grad_norm": 0.1155666708946228, "learning_rate": 1.2301105681152115e-07, "loss": 0.0007, "step": 171670 }, { "epoch": 2.809130328070032, "grad_norm": 0.0317828431725502, "learning_rate": 1.228012879574253e-07, "loss": 0.0011, "step": 171680 }, { "epoch": 2.8092939540211077, "grad_norm": 0.04493634030222893, "learning_rate": 1.225916958900597e-07, "loss": 0.0004, "step": 171690 }, { "epoch": 2.8094575799721837, "grad_norm": 0.03831320255994797, "learning_rate": 1.2238228061702274e-07, "loss": 0.0007, "step": 171700 }, { "epoch": 2.8096212059232597, "grad_norm": 0.02901962399482727, "learning_rate": 1.2217304214590498e-07, "loss": 0.0005, "step": 171710 }, { "epoch": 2.8097848318743353, "grad_norm": 0.052062779664993286, "learning_rate": 1.2196398048429148e-07, "loss": 0.0009, "step": 171720 }, { "epoch": 2.8099484578254112, "grad_norm": 0.07571718841791153, "learning_rate": 1.217550956397595e-07, "loss": 0.0008, "step": 171730 }, { "epoch": 2.810112083776487, "grad_norm": 0.04107063263654709, "learning_rate": 1.215463876198808e-07, "loss": 0.0004, "step": 171740 }, { "epoch": 2.810275709727563, "grad_norm": 0.1320413500070572, "learning_rate": 1.213378564322204e-07, "loss": 0.0008, "step": 171750 }, { "epoch": 2.810439335678639, "grad_norm": 0.0882088765501976, "learning_rate": 1.211295020843367e-07, "loss": 0.0006, "step": 171760 }, { "epoch": 2.8106029616297143, "grad_norm": 0.06519163399934769, "learning_rate": 1.2092132458378314e-07, "loss": 0.0005, "step": 171770 }, { "epoch": 2.8107665875807903, "grad_norm": 0.13670633733272552, "learning_rate": 1.207133239381042e-07, "loss": 0.0008, "step": 171780 }, { "epoch": 2.810930213531866, "grad_norm": 0.12694181501865387, "learning_rate": 1.2050550015484108e-07, "loss": 0.0011, "step": 171790 }, { "epoch": 2.811093839482942, "grad_norm": 0.08233991265296936, "learning_rate": 1.20297853241525e-07, "loss": 0.0007, "step": 171800 }, { "epoch": 2.811257465434018, "grad_norm": 0.06695646047592163, "learning_rate": 1.2009038320568435e-07, "loss": 0.0004, "step": 171810 }, { "epoch": 2.8114210913850934, "grad_norm": 0.07969870418310165, "learning_rate": 1.1988309005483866e-07, "loss": 0.0008, "step": 171820 }, { "epoch": 2.8115847173361694, "grad_norm": 0.15012790262699127, "learning_rate": 1.1967597379650198e-07, "loss": 0.0009, "step": 171830 }, { "epoch": 2.8117483432872454, "grad_norm": 0.0005089350161142647, "learning_rate": 1.1946903443818215e-07, "loss": 0.0006, "step": 171840 }, { "epoch": 2.811911969238321, "grad_norm": 0.05697668343782425, "learning_rate": 1.1926227198737927e-07, "loss": 0.0008, "step": 171850 }, { "epoch": 2.812075595189397, "grad_norm": 0.1401207596063614, "learning_rate": 1.190556864515885e-07, "loss": 0.0004, "step": 171860 }, { "epoch": 2.812239221140473, "grad_norm": 0.05562201514840126, "learning_rate": 1.1884927783829824e-07, "loss": 0.0009, "step": 171870 }, { "epoch": 2.8124028470915485, "grad_norm": 0.11965223401784897, "learning_rate": 1.1864304615498978e-07, "loss": 0.0008, "step": 171880 }, { "epoch": 2.8125664730426245, "grad_norm": 0.05904148146510124, "learning_rate": 1.1843699140913878e-07, "loss": 0.0004, "step": 171890 }, { "epoch": 2.8127300989937005, "grad_norm": 0.019149212166666985, "learning_rate": 1.1823111360821482e-07, "loss": 0.0013, "step": 171900 }, { "epoch": 2.812893724944776, "grad_norm": 0.0012291534803807735, "learning_rate": 1.1802541275967971e-07, "loss": 0.0004, "step": 171910 }, { "epoch": 2.813057350895852, "grad_norm": 0.002934809308499098, "learning_rate": 1.1781988887098972e-07, "loss": 0.0008, "step": 171920 }, { "epoch": 2.813220976846928, "grad_norm": 0.11563082784414291, "learning_rate": 1.1761454194959498e-07, "loss": 0.0006, "step": 171930 }, { "epoch": 2.8133846027980036, "grad_norm": 0.10422677546739578, "learning_rate": 1.1740937200293901e-07, "loss": 0.0008, "step": 171940 }, { "epoch": 2.8135482287490796, "grad_norm": 0.06244797632098198, "learning_rate": 1.172043790384575e-07, "loss": 0.0006, "step": 171950 }, { "epoch": 2.8137118547001556, "grad_norm": 0.15237131714820862, "learning_rate": 1.169995630635823e-07, "loss": 0.0007, "step": 171960 }, { "epoch": 2.813875480651231, "grad_norm": 0.06148587167263031, "learning_rate": 1.1679492408573634e-07, "loss": 0.0007, "step": 171970 }, { "epoch": 2.814039106602307, "grad_norm": 0.003219089936465025, "learning_rate": 1.165904621123387e-07, "loss": 0.0007, "step": 171980 }, { "epoch": 2.814202732553383, "grad_norm": 0.10074108839035034, "learning_rate": 1.1638617715079902e-07, "loss": 0.0005, "step": 171990 }, { "epoch": 2.8143663585044587, "grad_norm": 0.08588927984237671, "learning_rate": 1.1618206920852359e-07, "loss": 0.0008, "step": 172000 }, { "epoch": 2.8145299844555347, "grad_norm": 0.05853542312979698, "learning_rate": 1.1597813829290982e-07, "loss": 0.0015, "step": 172010 }, { "epoch": 2.8146936104066107, "grad_norm": 0.02257656306028366, "learning_rate": 1.1577438441135013e-07, "loss": 0.0003, "step": 172020 }, { "epoch": 2.8148572363576863, "grad_norm": 0.10484442859888077, "learning_rate": 1.155708075712303e-07, "loss": 0.0006, "step": 172030 }, { "epoch": 2.8150208623087623, "grad_norm": 0.05636831372976303, "learning_rate": 1.1536740777992938e-07, "loss": 0.0004, "step": 172040 }, { "epoch": 2.8151844882598382, "grad_norm": 0.2545546591281891, "learning_rate": 1.1516418504482041e-07, "loss": 0.001, "step": 172050 }, { "epoch": 2.815348114210914, "grad_norm": 0.08680564910173416, "learning_rate": 1.1496113937326858e-07, "loss": 0.0007, "step": 172060 }, { "epoch": 2.81551174016199, "grad_norm": 0.0426112599670887, "learning_rate": 1.1475827077263468e-07, "loss": 0.0024, "step": 172070 }, { "epoch": 2.815675366113066, "grad_norm": 0.00658803666010499, "learning_rate": 1.1455557925027172e-07, "loss": 0.0004, "step": 172080 }, { "epoch": 2.8158389920641413, "grad_norm": 0.07226915657520294, "learning_rate": 1.143530648135277e-07, "loss": 0.0007, "step": 172090 }, { "epoch": 2.8160026180152173, "grad_norm": 0.026515459641814232, "learning_rate": 1.1415072746974288e-07, "loss": 0.0004, "step": 172100 }, { "epoch": 2.816166243966293, "grad_norm": 0.11451422423124313, "learning_rate": 1.139485672262508e-07, "loss": 0.0007, "step": 172110 }, { "epoch": 2.816329869917369, "grad_norm": 0.06423421949148178, "learning_rate": 1.1374658409038064e-07, "loss": 0.0011, "step": 172120 }, { "epoch": 2.816493495868445, "grad_norm": 0.03168177977204323, "learning_rate": 1.1354477806945207e-07, "loss": 0.0007, "step": 172130 }, { "epoch": 2.8166571218195204, "grad_norm": 0.065270334482193, "learning_rate": 1.1334314917078149e-07, "loss": 0.0016, "step": 172140 }, { "epoch": 2.8168207477705964, "grad_norm": 0.061595842242240906, "learning_rate": 1.1314169740167747e-07, "loss": 0.0009, "step": 172150 }, { "epoch": 2.816984373721672, "grad_norm": 0.04262515902519226, "learning_rate": 1.1294042276944084e-07, "loss": 0.0007, "step": 172160 }, { "epoch": 2.817147999672748, "grad_norm": 0.08238794654607773, "learning_rate": 1.1273932528136854e-07, "loss": 0.0005, "step": 172170 }, { "epoch": 2.817311625623824, "grad_norm": 0.0027544735930860043, "learning_rate": 1.1253840494474922e-07, "loss": 0.0003, "step": 172180 }, { "epoch": 2.8174752515748995, "grad_norm": 0.2192983329296112, "learning_rate": 1.1233766176686645e-07, "loss": 0.0007, "step": 172190 }, { "epoch": 2.8176388775259755, "grad_norm": 0.0644710510969162, "learning_rate": 1.1213709575499555e-07, "loss": 0.0005, "step": 172200 }, { "epoch": 2.8178025034770515, "grad_norm": 0.020667342469096184, "learning_rate": 1.119367069164079e-07, "loss": 0.0005, "step": 172210 }, { "epoch": 2.817966129428127, "grad_norm": 0.4020586609840393, "learning_rate": 1.1173649525836605e-07, "loss": 0.001, "step": 172220 }, { "epoch": 2.818129755379203, "grad_norm": 0.05030730366706848, "learning_rate": 1.1153646078812808e-07, "loss": 0.0009, "step": 172230 }, { "epoch": 2.818293381330279, "grad_norm": 0.022579913958907127, "learning_rate": 1.1133660351294428e-07, "loss": 0.0005, "step": 172240 }, { "epoch": 2.8184570072813546, "grad_norm": 0.028644638136029243, "learning_rate": 1.1113692344005889e-07, "loss": 0.0006, "step": 172250 }, { "epoch": 2.8186206332324306, "grad_norm": 0.0095040462911129, "learning_rate": 1.1093742057670998e-07, "loss": 0.0006, "step": 172260 }, { "epoch": 2.8187842591835066, "grad_norm": 0.10112432390451431, "learning_rate": 1.1073809493012843e-07, "loss": 0.0007, "step": 172270 }, { "epoch": 2.818947885134582, "grad_norm": 0.0946236327290535, "learning_rate": 1.1053894650754071e-07, "loss": 0.0005, "step": 172280 }, { "epoch": 2.819111511085658, "grad_norm": 0.02809935249388218, "learning_rate": 1.1033997531616436e-07, "loss": 0.0008, "step": 172290 }, { "epoch": 2.819275137036734, "grad_norm": 0.04630189388990402, "learning_rate": 1.101411813632114e-07, "loss": 0.0006, "step": 172300 }, { "epoch": 2.8194387629878097, "grad_norm": 0.0056246803142130375, "learning_rate": 1.0994256465588882e-07, "loss": 0.0004, "step": 172310 }, { "epoch": 2.8196023889388857, "grad_norm": 0.09475484490394592, "learning_rate": 1.0974412520139533e-07, "loss": 0.0006, "step": 172320 }, { "epoch": 2.8197660148899617, "grad_norm": 0.039400387555360794, "learning_rate": 1.0954586300692349e-07, "loss": 0.0004, "step": 172330 }, { "epoch": 2.8199296408410373, "grad_norm": 0.11696458607912064, "learning_rate": 1.0934777807966035e-07, "loss": 0.0005, "step": 172340 }, { "epoch": 2.8200932667921133, "grad_norm": 0.3796849548816681, "learning_rate": 1.0914987042678626e-07, "loss": 0.0016, "step": 172350 }, { "epoch": 2.8202568927431892, "grad_norm": 0.08543279021978378, "learning_rate": 1.0895214005547438e-07, "loss": 0.001, "step": 172360 }, { "epoch": 2.820420518694265, "grad_norm": 0.2826046645641327, "learning_rate": 1.0875458697289176e-07, "loss": 0.001, "step": 172370 }, { "epoch": 2.820584144645341, "grad_norm": 0.07091300189495087, "learning_rate": 1.0855721118619989e-07, "loss": 0.0009, "step": 172380 }, { "epoch": 2.820747770596417, "grad_norm": 0.026521019637584686, "learning_rate": 1.0836001270255248e-07, "loss": 0.0008, "step": 172390 }, { "epoch": 2.8209113965474923, "grad_norm": 0.11292688548564911, "learning_rate": 1.0816299152909826e-07, "loss": 0.0008, "step": 172400 }, { "epoch": 2.8210750224985683, "grad_norm": 0.010016994550824165, "learning_rate": 1.0796614767297764e-07, "loss": 0.0007, "step": 172410 }, { "epoch": 2.8212386484496443, "grad_norm": 0.034603919833898544, "learning_rate": 1.0776948114132769e-07, "loss": 0.0032, "step": 172420 }, { "epoch": 2.82140227440072, "grad_norm": 0.010988828726112843, "learning_rate": 1.075729919412749e-07, "loss": 0.0008, "step": 172430 }, { "epoch": 2.821565900351796, "grad_norm": 0.006998078897595406, "learning_rate": 1.0737668007994306e-07, "loss": 0.0008, "step": 172440 }, { "epoch": 2.821729526302872, "grad_norm": 0.03496389091014862, "learning_rate": 1.0718054556444756e-07, "loss": 0.0011, "step": 172450 }, { "epoch": 2.8218931522539474, "grad_norm": 0.053340282291173935, "learning_rate": 1.0698458840189662e-07, "loss": 0.0017, "step": 172460 }, { "epoch": 2.8220567782050234, "grad_norm": 0.0879424586892128, "learning_rate": 1.0678880859939566e-07, "loss": 0.0005, "step": 172470 }, { "epoch": 2.8222204041560994, "grad_norm": 0.09867893159389496, "learning_rate": 1.0659320616403901e-07, "loss": 0.0007, "step": 172480 }, { "epoch": 2.822384030107175, "grad_norm": 0.027121204882860184, "learning_rate": 1.0639778110291765e-07, "loss": 0.0006, "step": 172490 }, { "epoch": 2.822547656058251, "grad_norm": 0.01210506446659565, "learning_rate": 1.0620253342311538e-07, "loss": 0.0011, "step": 172500 }, { "epoch": 2.8227112820093265, "grad_norm": 0.10004500299692154, "learning_rate": 1.0600746313170929e-07, "loss": 0.0009, "step": 172510 }, { "epoch": 2.8228749079604025, "grad_norm": 0.10850732773542404, "learning_rate": 1.0581257023577096e-07, "loss": 0.0006, "step": 172520 }, { "epoch": 2.8230385339114785, "grad_norm": 0.030207976698875427, "learning_rate": 1.0561785474236307e-07, "loss": 0.0007, "step": 172530 }, { "epoch": 2.823202159862554, "grad_norm": 0.027631904929876328, "learning_rate": 1.0542331665854555e-07, "loss": 0.0005, "step": 172540 }, { "epoch": 2.82336578581363, "grad_norm": 0.07763148844242096, "learning_rate": 1.0522895599136829e-07, "loss": 0.0003, "step": 172550 }, { "epoch": 2.8235294117647056, "grad_norm": 0.04144243896007538, "learning_rate": 1.0503477274787787e-07, "loss": 0.0004, "step": 172560 }, { "epoch": 2.8236930377157816, "grad_norm": 0.18461155891418457, "learning_rate": 1.0484076693511147e-07, "loss": 0.0007, "step": 172570 }, { "epoch": 2.8238566636668576, "grad_norm": 0.07364876568317413, "learning_rate": 1.0464693856010178e-07, "loss": 0.0012, "step": 172580 }, { "epoch": 2.824020289617933, "grad_norm": 0.03745228424668312, "learning_rate": 1.0445328762987595e-07, "loss": 0.0006, "step": 172590 }, { "epoch": 2.824183915569009, "grad_norm": 0.06604311615228653, "learning_rate": 1.0425981415145114e-07, "loss": 0.0006, "step": 172600 }, { "epoch": 2.824347541520085, "grad_norm": 0.014754494652152061, "learning_rate": 1.0406651813184232e-07, "loss": 0.0004, "step": 172610 }, { "epoch": 2.8245111674711607, "grad_norm": 0.1038784608244896, "learning_rate": 1.0387339957805497e-07, "loss": 0.0008, "step": 172620 }, { "epoch": 2.8246747934222367, "grad_norm": 0.02769569493830204, "learning_rate": 1.0368045849708907e-07, "loss": 0.0009, "step": 172630 }, { "epoch": 2.8248384193733127, "grad_norm": 0.0018812255002558231, "learning_rate": 1.0348769489593847e-07, "loss": 0.0005, "step": 172640 }, { "epoch": 2.8250020453243883, "grad_norm": 0.02080894634127617, "learning_rate": 1.0329510878159144e-07, "loss": 0.0008, "step": 172650 }, { "epoch": 2.8251656712754643, "grad_norm": 0.13385115563869476, "learning_rate": 1.0310270016102686e-07, "loss": 0.0007, "step": 172660 }, { "epoch": 2.8253292972265402, "grad_norm": 0.0864618569612503, "learning_rate": 1.0291046904122026e-07, "loss": 0.0021, "step": 172670 }, { "epoch": 2.825492923177616, "grad_norm": 0.03795851767063141, "learning_rate": 1.0271841542913941e-07, "loss": 0.0007, "step": 172680 }, { "epoch": 2.825656549128692, "grad_norm": 0.03524649888277054, "learning_rate": 1.0252653933174539e-07, "loss": 0.0005, "step": 172690 }, { "epoch": 2.825820175079768, "grad_norm": 0.025355640798807144, "learning_rate": 1.0233484075599431e-07, "loss": 0.0005, "step": 172700 }, { "epoch": 2.8259838010308433, "grad_norm": 0.021268676966428757, "learning_rate": 1.0214331970883285e-07, "loss": 0.0003, "step": 172710 }, { "epoch": 2.8261474269819193, "grad_norm": 0.29022401571273804, "learning_rate": 1.0195197619720542e-07, "loss": 0.0009, "step": 172720 }, { "epoch": 2.8263110529329953, "grad_norm": 0.03801962360739708, "learning_rate": 1.0176081022804596e-07, "loss": 0.0005, "step": 172730 }, { "epoch": 2.826474678884071, "grad_norm": 0.08470553159713745, "learning_rate": 1.01569821808285e-07, "loss": 0.0007, "step": 172740 }, { "epoch": 2.826638304835147, "grad_norm": 0.10154219716787338, "learning_rate": 1.0137901094484537e-07, "loss": 0.0006, "step": 172750 }, { "epoch": 2.826801930786223, "grad_norm": 0.09925787895917892, "learning_rate": 1.0118837764464207e-07, "loss": 0.0008, "step": 172760 }, { "epoch": 2.8269655567372984, "grad_norm": 0.2239476591348648, "learning_rate": 1.009979219145868e-07, "loss": 0.0012, "step": 172770 }, { "epoch": 2.8271291826883744, "grad_norm": 0.42145049571990967, "learning_rate": 1.008076437615818e-07, "loss": 0.0016, "step": 172780 }, { "epoch": 2.8272928086394504, "grad_norm": 0.020358722656965256, "learning_rate": 1.0061754319252493e-07, "loss": 0.0008, "step": 172790 }, { "epoch": 2.827456434590526, "grad_norm": 0.012594140134751797, "learning_rate": 1.0042762021430729e-07, "loss": 0.0014, "step": 172800 }, { "epoch": 2.827620060541602, "grad_norm": 0.05987085402011871, "learning_rate": 1.0023787483381175e-07, "loss": 0.0007, "step": 172810 }, { "epoch": 2.827783686492678, "grad_norm": 0.10527680069208145, "learning_rate": 1.000483070579178e-07, "loss": 0.0007, "step": 172820 }, { "epoch": 2.8279473124437535, "grad_norm": 0.08468912541866302, "learning_rate": 9.985891689349548e-08, "loss": 0.0008, "step": 172830 }, { "epoch": 2.8281109383948295, "grad_norm": 0.07809275388717651, "learning_rate": 9.966970434741096e-08, "loss": 0.0008, "step": 172840 }, { "epoch": 2.8282745643459055, "grad_norm": 0.11739545315504074, "learning_rate": 9.948066942652157e-08, "loss": 0.0017, "step": 172850 }, { "epoch": 2.828438190296981, "grad_norm": 0.03874126449227333, "learning_rate": 9.929181213767957e-08, "loss": 0.0005, "step": 172860 }, { "epoch": 2.828601816248057, "grad_norm": 0.03518207371234894, "learning_rate": 9.910313248773118e-08, "loss": 0.0003, "step": 172870 }, { "epoch": 2.8287654421991326, "grad_norm": 0.06403477489948273, "learning_rate": 9.89146304835148e-08, "loss": 0.0003, "step": 172880 }, { "epoch": 2.8289290681502086, "grad_norm": 0.025648249313235283, "learning_rate": 9.87263061318644e-08, "loss": 0.0004, "step": 172890 }, { "epoch": 2.8290926941012846, "grad_norm": 0.07102353125810623, "learning_rate": 9.853815943960454e-08, "loss": 0.0005, "step": 172900 }, { "epoch": 2.82925632005236, "grad_norm": 0.007533930242061615, "learning_rate": 9.835019041355697e-08, "loss": 0.0011, "step": 172910 }, { "epoch": 2.829419946003436, "grad_norm": 0.19549287855625153, "learning_rate": 9.816239906053348e-08, "loss": 0.0004, "step": 172920 }, { "epoch": 2.8295835719545117, "grad_norm": 0.02645001746714115, "learning_rate": 9.79747853873425e-08, "loss": 0.0005, "step": 172930 }, { "epoch": 2.8297471979055877, "grad_norm": 0.06650792062282562, "learning_rate": 9.778734940078305e-08, "loss": 0.0012, "step": 172940 }, { "epoch": 2.8299108238566637, "grad_norm": 0.006504692602902651, "learning_rate": 9.760009110765022e-08, "loss": 0.0005, "step": 172950 }, { "epoch": 2.8300744498077393, "grad_norm": 0.14010770618915558, "learning_rate": 9.741301051473195e-08, "loss": 0.001, "step": 172960 }, { "epoch": 2.8302380757588153, "grad_norm": 0.009898989461362362, "learning_rate": 9.722610762880836e-08, "loss": 0.0006, "step": 172970 }, { "epoch": 2.8304017017098912, "grad_norm": 0.06638094782829285, "learning_rate": 9.703938245665567e-08, "loss": 0.0008, "step": 172980 }, { "epoch": 2.830565327660967, "grad_norm": 0.13438630104064941, "learning_rate": 9.685283500504073e-08, "loss": 0.0008, "step": 172990 }, { "epoch": 2.830728953612043, "grad_norm": 0.006189462263137102, "learning_rate": 9.666646528072643e-08, "loss": 0.0009, "step": 173000 }, { "epoch": 2.830892579563119, "grad_norm": 0.12859684228897095, "learning_rate": 9.648027329046738e-08, "loss": 0.0005, "step": 173010 }, { "epoch": 2.8310562055141943, "grad_norm": 0.2224467396736145, "learning_rate": 9.629425904101319e-08, "loss": 0.0005, "step": 173020 }, { "epoch": 2.8312198314652703, "grad_norm": 0.018172206357121468, "learning_rate": 9.610842253910735e-08, "loss": 0.001, "step": 173030 }, { "epoch": 2.8313834574163463, "grad_norm": 0.03721041977405548, "learning_rate": 9.592276379148391e-08, "loss": 0.0006, "step": 173040 }, { "epoch": 2.831547083367422, "grad_norm": 0.08655502647161484, "learning_rate": 9.573728280487471e-08, "loss": 0.0011, "step": 173050 }, { "epoch": 2.831710709318498, "grad_norm": 0.0030999192968010902, "learning_rate": 9.555197958600104e-08, "loss": 0.0007, "step": 173060 }, { "epoch": 2.831874335269574, "grad_norm": 0.08408474177122116, "learning_rate": 9.536685414158086e-08, "loss": 0.0005, "step": 173070 }, { "epoch": 2.8320379612206494, "grad_norm": 0.09025789797306061, "learning_rate": 9.518190647832438e-08, "loss": 0.0005, "step": 173080 }, { "epoch": 2.8322015871717254, "grad_norm": 0.012574669905006886, "learning_rate": 9.499713660293508e-08, "loss": 0.0007, "step": 173090 }, { "epoch": 2.8323652131228014, "grad_norm": 0.008571373298764229, "learning_rate": 9.481254452211097e-08, "loss": 0.0003, "step": 173100 }, { "epoch": 2.832528839073877, "grad_norm": 0.0567251555621624, "learning_rate": 9.462813024254225e-08, "loss": 0.0005, "step": 173110 }, { "epoch": 2.832692465024953, "grad_norm": 0.029907390475273132, "learning_rate": 9.444389377091468e-08, "loss": 0.0009, "step": 173120 }, { "epoch": 2.832856090976029, "grad_norm": 0.0502646267414093, "learning_rate": 9.425983511390569e-08, "loss": 0.0009, "step": 173130 }, { "epoch": 2.8330197169271045, "grad_norm": 0.035456735640764236, "learning_rate": 9.40759542781866e-08, "loss": 0.0005, "step": 173140 }, { "epoch": 2.8331833428781805, "grad_norm": 0.0010037024039775133, "learning_rate": 9.389225127042378e-08, "loss": 0.0003, "step": 173150 }, { "epoch": 2.8333469688292565, "grad_norm": 0.03924174606800079, "learning_rate": 9.370872609727466e-08, "loss": 0.0003, "step": 173160 }, { "epoch": 2.833510594780332, "grad_norm": 0.02810162864625454, "learning_rate": 9.352537876539281e-08, "loss": 0.0007, "step": 173170 }, { "epoch": 2.833674220731408, "grad_norm": 0.021958060562610626, "learning_rate": 9.334220928142346e-08, "loss": 0.0005, "step": 173180 }, { "epoch": 2.833837846682484, "grad_norm": 0.016990140080451965, "learning_rate": 9.315921765200631e-08, "loss": 0.0005, "step": 173190 }, { "epoch": 2.8340014726335596, "grad_norm": 0.018822545185685158, "learning_rate": 9.297640388377383e-08, "loss": 0.0004, "step": 173200 }, { "epoch": 2.8341650985846356, "grad_norm": 0.24558059871196747, "learning_rate": 9.279376798335294e-08, "loss": 0.0008, "step": 173210 }, { "epoch": 2.8343287245357116, "grad_norm": 0.10267245024442673, "learning_rate": 9.261130995736389e-08, "loss": 0.0006, "step": 173220 }, { "epoch": 2.834492350486787, "grad_norm": 0.15842097997665405, "learning_rate": 9.24290298124203e-08, "loss": 0.0005, "step": 173230 }, { "epoch": 2.834655976437863, "grad_norm": 0.16673073172569275, "learning_rate": 9.224692755512965e-08, "loss": 0.0009, "step": 173240 }, { "epoch": 2.834819602388939, "grad_norm": 0.040787916630506516, "learning_rate": 9.206500319209222e-08, "loss": 0.0006, "step": 173250 }, { "epoch": 2.8349832283400147, "grad_norm": 0.08588576316833496, "learning_rate": 9.188325672990272e-08, "loss": 0.0005, "step": 173260 }, { "epoch": 2.8351468542910907, "grad_norm": 0.18132197856903076, "learning_rate": 9.170168817514813e-08, "loss": 0.0009, "step": 173270 }, { "epoch": 2.8353104802421663, "grad_norm": 0.02750786580145359, "learning_rate": 9.152029753441149e-08, "loss": 0.0006, "step": 173280 }, { "epoch": 2.8354741061932422, "grad_norm": 0.004450025036931038, "learning_rate": 9.13390848142659e-08, "loss": 0.0006, "step": 173290 }, { "epoch": 2.8356377321443182, "grad_norm": 0.008194906637072563, "learning_rate": 9.115805002128109e-08, "loss": 0.0005, "step": 173300 }, { "epoch": 2.835801358095394, "grad_norm": 0.048089105635881424, "learning_rate": 9.097719316201903e-08, "loss": 0.0003, "step": 173310 }, { "epoch": 2.83596498404647, "grad_norm": 0.028552373871207237, "learning_rate": 9.079651424303503e-08, "loss": 0.0006, "step": 173320 }, { "epoch": 2.8361286099975453, "grad_norm": 0.0013884111540392041, "learning_rate": 9.061601327087888e-08, "loss": 0.0006, "step": 173330 }, { "epoch": 2.8362922359486213, "grad_norm": 0.056187860667705536, "learning_rate": 9.043569025209253e-08, "loss": 0.0007, "step": 173340 }, { "epoch": 2.8364558618996973, "grad_norm": 0.011024200357496738, "learning_rate": 9.0255545193213e-08, "loss": 0.0006, "step": 173350 }, { "epoch": 2.836619487850773, "grad_norm": 0.18066450953483582, "learning_rate": 9.00755781007695e-08, "loss": 0.0005, "step": 173360 }, { "epoch": 2.836783113801849, "grad_norm": 0.06157389283180237, "learning_rate": 8.989578898128515e-08, "loss": 0.0005, "step": 173370 }, { "epoch": 2.836946739752925, "grad_norm": 0.07567913830280304, "learning_rate": 8.97161778412775e-08, "loss": 0.0004, "step": 173380 }, { "epoch": 2.8371103657040004, "grad_norm": 0.012874470092356205, "learning_rate": 8.953674468725692e-08, "loss": 0.0005, "step": 173390 }, { "epoch": 2.8372739916550764, "grad_norm": 0.09604622423648834, "learning_rate": 8.935748952572765e-08, "loss": 0.0003, "step": 173400 }, { "epoch": 2.8374376176061524, "grad_norm": 0.06352297961711884, "learning_rate": 8.917841236318669e-08, "loss": 0.0008, "step": 173410 }, { "epoch": 2.837601243557228, "grad_norm": 0.06465206295251846, "learning_rate": 8.899951320612554e-08, "loss": 0.0008, "step": 173420 }, { "epoch": 2.837764869508304, "grad_norm": 0.032496921718120575, "learning_rate": 8.882079206102844e-08, "loss": 0.0008, "step": 173430 }, { "epoch": 2.83792849545938, "grad_norm": 0.060703475028276443, "learning_rate": 8.864224893437412e-08, "loss": 0.0014, "step": 173440 }, { "epoch": 2.8380921214104555, "grad_norm": 0.05830821767449379, "learning_rate": 8.84638838326346e-08, "loss": 0.0005, "step": 173450 }, { "epoch": 2.8382557473615315, "grad_norm": 0.05228746309876442, "learning_rate": 8.828569676227417e-08, "loss": 0.001, "step": 173460 }, { "epoch": 2.8384193733126075, "grad_norm": 0.08254444599151611, "learning_rate": 8.810768772975265e-08, "loss": 0.0004, "step": 173470 }, { "epoch": 2.838582999263683, "grad_norm": 0.03338150680065155, "learning_rate": 8.792985674152099e-08, "loss": 0.0004, "step": 173480 }, { "epoch": 2.838746625214759, "grad_norm": 0.03274044767022133, "learning_rate": 8.775220380402738e-08, "loss": 0.0008, "step": 173490 }, { "epoch": 2.838910251165835, "grad_norm": 0.041836876422166824, "learning_rate": 8.757472892370943e-08, "loss": 0.0006, "step": 173500 }, { "epoch": 2.8390738771169106, "grad_norm": 0.02346048690378666, "learning_rate": 8.739743210700091e-08, "loss": 0.0003, "step": 173510 }, { "epoch": 2.8392375030679866, "grad_norm": 0.07030380517244339, "learning_rate": 8.722031336032888e-08, "loss": 0.0005, "step": 173520 }, { "epoch": 2.8394011290190626, "grad_norm": 0.056974031031131744, "learning_rate": 8.704337269011209e-08, "loss": 0.0015, "step": 173530 }, { "epoch": 2.839564754970138, "grad_norm": 0.02623937651515007, "learning_rate": 8.686661010276598e-08, "loss": 0.0007, "step": 173540 }, { "epoch": 2.839728380921214, "grad_norm": 0.04807814583182335, "learning_rate": 8.669002560469653e-08, "loss": 0.0005, "step": 173550 }, { "epoch": 2.83989200687229, "grad_norm": 0.037363871932029724, "learning_rate": 8.651361920230417e-08, "loss": 0.0005, "step": 173560 }, { "epoch": 2.8400556328233657, "grad_norm": 0.05650179833173752, "learning_rate": 8.63373909019849e-08, "loss": 0.0008, "step": 173570 }, { "epoch": 2.8402192587744417, "grad_norm": 0.01332570519298315, "learning_rate": 8.616134071012472e-08, "loss": 0.0005, "step": 173580 }, { "epoch": 2.8403828847255177, "grad_norm": 0.0773705393075943, "learning_rate": 8.598546863310631e-08, "loss": 0.0004, "step": 173590 }, { "epoch": 2.8405465106765933, "grad_norm": 0.010461600497364998, "learning_rate": 8.580977467730345e-08, "loss": 0.0006, "step": 173600 }, { "epoch": 2.8407101366276692, "grad_norm": 0.045217934995889664, "learning_rate": 8.563425884908604e-08, "loss": 0.0005, "step": 173610 }, { "epoch": 2.8408737625787452, "grad_norm": 0.060924991965293884, "learning_rate": 8.545892115481514e-08, "loss": 0.001, "step": 173620 }, { "epoch": 2.841037388529821, "grad_norm": 0.023583533242344856, "learning_rate": 8.528376160084673e-08, "loss": 0.0007, "step": 173630 }, { "epoch": 2.841201014480897, "grad_norm": 0.10948897898197174, "learning_rate": 8.510878019352965e-08, "loss": 0.0016, "step": 173640 }, { "epoch": 2.841364640431973, "grad_norm": 0.2389497011899948, "learning_rate": 8.493397693920657e-08, "loss": 0.0013, "step": 173650 }, { "epoch": 2.8415282663830483, "grad_norm": 0.027986939996480942, "learning_rate": 8.475935184421413e-08, "loss": 0.0004, "step": 173660 }, { "epoch": 2.8416918923341243, "grad_norm": 0.11178158968687057, "learning_rate": 8.458490491488114e-08, "loss": 0.0007, "step": 173670 }, { "epoch": 2.8418555182852, "grad_norm": 0.200485959649086, "learning_rate": 8.441063615753197e-08, "loss": 0.001, "step": 173680 }, { "epoch": 2.842019144236276, "grad_norm": 0.03016781434416771, "learning_rate": 8.423654557848215e-08, "loss": 0.0011, "step": 173690 }, { "epoch": 2.8421827701873514, "grad_norm": 0.05747764930129051, "learning_rate": 8.406263318404384e-08, "loss": 0.0005, "step": 173700 }, { "epoch": 2.8423463961384274, "grad_norm": 0.08647391200065613, "learning_rate": 8.388889898051922e-08, "loss": 0.0005, "step": 173710 }, { "epoch": 2.8425100220895034, "grad_norm": 0.04320474714040756, "learning_rate": 8.371534297420658e-08, "loss": 0.0005, "step": 173720 }, { "epoch": 2.842673648040579, "grad_norm": 0.11320873349905014, "learning_rate": 8.354196517139701e-08, "loss": 0.0006, "step": 173730 }, { "epoch": 2.842837273991655, "grad_norm": 0.07384967803955078, "learning_rate": 8.336876557837436e-08, "loss": 0.0007, "step": 173740 }, { "epoch": 2.843000899942731, "grad_norm": 0.21078170835971832, "learning_rate": 8.319574420141807e-08, "loss": 0.0012, "step": 173750 }, { "epoch": 2.8431645258938065, "grad_norm": 0.04817560687661171, "learning_rate": 8.302290104679811e-08, "loss": 0.0006, "step": 173760 }, { "epoch": 2.8433281518448825, "grad_norm": 0.058492567390203476, "learning_rate": 8.285023612078059e-08, "loss": 0.0011, "step": 173770 }, { "epoch": 2.8434917777959585, "grad_norm": 0.022522741928696632, "learning_rate": 8.267774942962382e-08, "loss": 0.0004, "step": 173780 }, { "epoch": 2.843655403747034, "grad_norm": 0.21324092149734497, "learning_rate": 8.250544097957946e-08, "loss": 0.0011, "step": 173790 }, { "epoch": 2.84381902969811, "grad_norm": 0.08894387632608414, "learning_rate": 8.233331077689532e-08, "loss": 0.0006, "step": 173800 }, { "epoch": 2.843982655649186, "grad_norm": 0.0018750797025859356, "learning_rate": 8.216135882780807e-08, "loss": 0.0006, "step": 173810 }, { "epoch": 2.8441462816002616, "grad_norm": 0.058654479682445526, "learning_rate": 8.198958513855271e-08, "loss": 0.0003, "step": 173820 }, { "epoch": 2.8443099075513376, "grad_norm": 0.07440018653869629, "learning_rate": 8.181798971535482e-08, "loss": 0.0007, "step": 173830 }, { "epoch": 2.8444735335024136, "grad_norm": 0.029554273933172226, "learning_rate": 8.164657256443386e-08, "loss": 0.0013, "step": 173840 }, { "epoch": 2.844637159453489, "grad_norm": 0.13609381020069122, "learning_rate": 8.147533369200433e-08, "loss": 0.0006, "step": 173850 }, { "epoch": 2.844800785404565, "grad_norm": 0.026124535128474236, "learning_rate": 8.13042731042718e-08, "loss": 0.0002, "step": 173860 }, { "epoch": 2.844964411355641, "grad_norm": 0.07678091526031494, "learning_rate": 8.113339080743798e-08, "loss": 0.0004, "step": 173870 }, { "epoch": 2.8451280373067167, "grad_norm": 0.1092134565114975, "learning_rate": 8.096268680769626e-08, "loss": 0.0006, "step": 173880 }, { "epoch": 2.8452916632577927, "grad_norm": 0.06291822344064713, "learning_rate": 8.0792161111235e-08, "loss": 0.0015, "step": 173890 }, { "epoch": 2.8454552892088687, "grad_norm": 0.15437263250350952, "learning_rate": 8.062181372423428e-08, "loss": 0.0006, "step": 173900 }, { "epoch": 2.8456189151599443, "grad_norm": 0.0075996145606040955, "learning_rate": 8.045164465286969e-08, "loss": 0.0005, "step": 173910 }, { "epoch": 2.8457825411110202, "grad_norm": 0.33961060643196106, "learning_rate": 8.02816539033091e-08, "loss": 0.0008, "step": 173920 }, { "epoch": 2.8459461670620962, "grad_norm": 0.5353030562400818, "learning_rate": 8.01118414817148e-08, "loss": 0.0009, "step": 173930 }, { "epoch": 2.846109793013172, "grad_norm": 0.06713154166936874, "learning_rate": 7.994220739424075e-08, "loss": 0.0005, "step": 173940 }, { "epoch": 2.846273418964248, "grad_norm": 0.02468140609562397, "learning_rate": 7.977275164703757e-08, "loss": 0.0005, "step": 173950 }, { "epoch": 2.846437044915324, "grad_norm": 0.34537604451179504, "learning_rate": 7.960347424624648e-08, "loss": 0.0011, "step": 173960 }, { "epoch": 2.8466006708663993, "grad_norm": 0.06103261560201645, "learning_rate": 7.943437519800312e-08, "loss": 0.0004, "step": 173970 }, { "epoch": 2.8467642968174753, "grad_norm": 0.05063708499073982, "learning_rate": 7.926545450843758e-08, "loss": 0.0004, "step": 173980 }, { "epoch": 2.8469279227685513, "grad_norm": 0.10059316456317902, "learning_rate": 7.909671218367277e-08, "loss": 0.0016, "step": 173990 }, { "epoch": 2.847091548719627, "grad_norm": 0.10250764340162277, "learning_rate": 7.892814822982487e-08, "loss": 0.0005, "step": 174000 }, { "epoch": 2.847255174670703, "grad_norm": 0.16875368356704712, "learning_rate": 7.875976265300456e-08, "loss": 0.0009, "step": 174010 }, { "epoch": 2.847418800621779, "grad_norm": 0.131410151720047, "learning_rate": 7.859155545931418e-08, "loss": 0.0009, "step": 174020 }, { "epoch": 2.8475824265728544, "grad_norm": 0.16458827257156372, "learning_rate": 7.84235266548522e-08, "loss": 0.0014, "step": 174030 }, { "epoch": 2.8477460525239304, "grad_norm": 0.08364089578390121, "learning_rate": 7.825567624570818e-08, "loss": 0.0005, "step": 174040 }, { "epoch": 2.847909678475006, "grad_norm": 0.016370099037885666, "learning_rate": 7.808800423796781e-08, "loss": 0.0004, "step": 174050 }, { "epoch": 2.848073304426082, "grad_norm": 0.009846849367022514, "learning_rate": 7.792051063770733e-08, "loss": 0.0008, "step": 174060 }, { "epoch": 2.848236930377158, "grad_norm": 0.037272486835718155, "learning_rate": 7.775319545099802e-08, "loss": 0.0016, "step": 174070 }, { "epoch": 2.8484005563282335, "grad_norm": 0.12668658792972565, "learning_rate": 7.7586058683905e-08, "loss": 0.0009, "step": 174080 }, { "epoch": 2.8485641822793095, "grad_norm": 0.019610220566391945, "learning_rate": 7.741910034248678e-08, "loss": 0.0008, "step": 174090 }, { "epoch": 2.848727808230385, "grad_norm": 0.0810527428984642, "learning_rate": 7.725232043279574e-08, "loss": 0.0006, "step": 174100 }, { "epoch": 2.848891434181461, "grad_norm": 0.0777733325958252, "learning_rate": 7.708571896087591e-08, "loss": 0.0005, "step": 174110 }, { "epoch": 2.849055060132537, "grad_norm": 0.04647945985198021, "learning_rate": 7.691929593276692e-08, "loss": 0.0006, "step": 174120 }, { "epoch": 2.8492186860836126, "grad_norm": 0.052896466106176376, "learning_rate": 7.675305135450117e-08, "loss": 0.0013, "step": 174130 }, { "epoch": 2.8493823120346886, "grad_norm": 0.09321468323469162, "learning_rate": 7.658698523210496e-08, "loss": 0.0003, "step": 174140 }, { "epoch": 2.8495459379857646, "grad_norm": 0.031998150050640106, "learning_rate": 7.642109757159733e-08, "loss": 0.0004, "step": 174150 }, { "epoch": 2.84970956393684, "grad_norm": 0.14246892929077148, "learning_rate": 7.625538837899071e-08, "loss": 0.0011, "step": 174160 }, { "epoch": 2.849873189887916, "grad_norm": 0.05914086848497391, "learning_rate": 7.608985766029309e-08, "loss": 0.0008, "step": 174170 }, { "epoch": 2.850036815838992, "grad_norm": 0.08203393965959549, "learning_rate": 7.592450542150297e-08, "loss": 0.001, "step": 174180 }, { "epoch": 2.8502004417900677, "grad_norm": 0.07545647025108337, "learning_rate": 7.57593316686156e-08, "loss": 0.0005, "step": 174190 }, { "epoch": 2.8503640677411437, "grad_norm": 0.011526244692504406, "learning_rate": 7.559433640761726e-08, "loss": 0.0004, "step": 174200 }, { "epoch": 2.8505276936922197, "grad_norm": 0.13257025182247162, "learning_rate": 7.542951964448874e-08, "loss": 0.0006, "step": 174210 }, { "epoch": 2.8506913196432953, "grad_norm": 0.045285262167453766, "learning_rate": 7.526488138520415e-08, "loss": 0.0004, "step": 174220 }, { "epoch": 2.8508549455943712, "grad_norm": 0.009759592823684216, "learning_rate": 7.510042163573095e-08, "loss": 0.0003, "step": 174230 }, { "epoch": 2.8510185715454472, "grad_norm": 0.2923644781112671, "learning_rate": 7.493614040203101e-08, "loss": 0.0016, "step": 174240 }, { "epoch": 2.851182197496523, "grad_norm": 0.1614042967557907, "learning_rate": 7.477203769005903e-08, "loss": 0.0008, "step": 174250 }, { "epoch": 2.851345823447599, "grad_norm": 0.0468735545873642, "learning_rate": 7.460811350576359e-08, "loss": 0.0003, "step": 174260 }, { "epoch": 2.851509449398675, "grad_norm": 0.32878729701042175, "learning_rate": 7.444436785508602e-08, "loss": 0.0012, "step": 174270 }, { "epoch": 2.8516730753497503, "grad_norm": 0.08398101478815079, "learning_rate": 7.428080074396105e-08, "loss": 0.0006, "step": 174280 }, { "epoch": 2.8518367013008263, "grad_norm": 0.051134463399648666, "learning_rate": 7.411741217831947e-08, "loss": 0.0009, "step": 174290 }, { "epoch": 2.8520003272519023, "grad_norm": 0.07836651057004929, "learning_rate": 7.395420216408155e-08, "loss": 0.0007, "step": 174300 }, { "epoch": 2.852163953202978, "grad_norm": 0.05573960021138191, "learning_rate": 7.379117070716534e-08, "loss": 0.0007, "step": 174310 }, { "epoch": 2.852327579154054, "grad_norm": 0.05728914588689804, "learning_rate": 7.36283178134789e-08, "loss": 0.0003, "step": 174320 }, { "epoch": 2.85249120510513, "grad_norm": 0.04238661751151085, "learning_rate": 7.346564348892582e-08, "loss": 0.0008, "step": 174330 }, { "epoch": 2.8526548310562054, "grad_norm": 0.028013156726956367, "learning_rate": 7.33031477394025e-08, "loss": 0.0005, "step": 174340 }, { "epoch": 2.8528184570072814, "grad_norm": 0.15349432826042175, "learning_rate": 7.314083057079924e-08, "loss": 0.0011, "step": 174350 }, { "epoch": 2.8529820829583574, "grad_norm": 0.001606814912520349, "learning_rate": 7.29786919889991e-08, "loss": 0.0005, "step": 174360 }, { "epoch": 2.853145708909433, "grad_norm": 0.01952027715742588, "learning_rate": 7.281673199987959e-08, "loss": 0.0005, "step": 174370 }, { "epoch": 2.853309334860509, "grad_norm": 0.007583265658468008, "learning_rate": 7.265495060931216e-08, "loss": 0.0011, "step": 174380 }, { "epoch": 2.853472960811585, "grad_norm": 0.12129753082990646, "learning_rate": 7.249334782315931e-08, "loss": 0.0006, "step": 174390 }, { "epoch": 2.8536365867626605, "grad_norm": 0.21874582767486572, "learning_rate": 7.233192364728025e-08, "loss": 0.001, "step": 174400 }, { "epoch": 2.8538002127137365, "grad_norm": 0.06910939514636993, "learning_rate": 7.217067808752532e-08, "loss": 0.0005, "step": 174410 }, { "epoch": 2.8539638386648125, "grad_norm": 0.08435803651809692, "learning_rate": 7.200961114973925e-08, "loss": 0.0008, "step": 174420 }, { "epoch": 2.854127464615888, "grad_norm": 0.03137621656060219, "learning_rate": 7.18487228397613e-08, "loss": 0.0004, "step": 174430 }, { "epoch": 2.854291090566964, "grad_norm": 0.011698718182742596, "learning_rate": 7.168801316342178e-08, "loss": 0.0007, "step": 174440 }, { "epoch": 2.8544547165180396, "grad_norm": 0.05233995243906975, "learning_rate": 7.152748212654769e-08, "loss": 0.0005, "step": 174450 }, { "epoch": 2.8546183424691156, "grad_norm": 0.01946445368230343, "learning_rate": 7.136712973495719e-08, "loss": 0.0005, "step": 174460 }, { "epoch": 2.8547819684201916, "grad_norm": 0.13896925747394562, "learning_rate": 7.120695599446281e-08, "loss": 0.0008, "step": 174470 }, { "epoch": 2.854945594371267, "grad_norm": 0.008294851519167423, "learning_rate": 7.104696091086937e-08, "loss": 0.0006, "step": 174480 }, { "epoch": 2.855109220322343, "grad_norm": 0.031204285100102425, "learning_rate": 7.08871444899778e-08, "loss": 0.0007, "step": 174490 }, { "epoch": 2.8552728462734187, "grad_norm": 0.014123676344752312, "learning_rate": 7.072750673757955e-08, "loss": 0.0016, "step": 174500 }, { "epoch": 2.8554364722244947, "grad_norm": 0.1242511197924614, "learning_rate": 7.05680476594628e-08, "loss": 0.0009, "step": 174510 }, { "epoch": 2.8556000981755707, "grad_norm": 0.034279245883226395, "learning_rate": 7.040876726140678e-08, "loss": 0.0004, "step": 174520 }, { "epoch": 2.8557637241266463, "grad_norm": 0.08257488906383514, "learning_rate": 7.024966554918411e-08, "loss": 0.0007, "step": 174530 }, { "epoch": 2.8559273500777222, "grad_norm": 0.06265582889318466, "learning_rate": 7.009074252856352e-08, "loss": 0.0004, "step": 174540 }, { "epoch": 2.8560909760287982, "grad_norm": 0.011068779975175858, "learning_rate": 6.993199820530482e-08, "loss": 0.0008, "step": 174550 }, { "epoch": 2.856254601979874, "grad_norm": 0.005089957267045975, "learning_rate": 6.977343258516178e-08, "loss": 0.0008, "step": 174560 }, { "epoch": 2.85641822793095, "grad_norm": 0.0039634425193071365, "learning_rate": 6.961504567388255e-08, "loss": 0.0011, "step": 174570 }, { "epoch": 2.856581853882026, "grad_norm": 0.0073012858629226685, "learning_rate": 6.945683747720755e-08, "loss": 0.0004, "step": 174580 }, { "epoch": 2.8567454798331013, "grad_norm": 0.02657819725573063, "learning_rate": 6.929880800087274e-08, "loss": 0.001, "step": 174590 }, { "epoch": 2.8569091057841773, "grad_norm": 0.020564179867506027, "learning_rate": 6.914095725060466e-08, "loss": 0.0008, "step": 174600 }, { "epoch": 2.8570727317352533, "grad_norm": 0.06299027800559998, "learning_rate": 6.89832852321265e-08, "loss": 0.0009, "step": 174610 }, { "epoch": 2.857236357686329, "grad_norm": 0.02254130132496357, "learning_rate": 6.882579195115257e-08, "loss": 0.0004, "step": 174620 }, { "epoch": 2.857399983637405, "grad_norm": 0.004705629777163267, "learning_rate": 6.866847741339222e-08, "loss": 0.0004, "step": 174630 }, { "epoch": 2.857563609588481, "grad_norm": 0.049303796142339706, "learning_rate": 6.851134162454698e-08, "loss": 0.0005, "step": 174640 }, { "epoch": 2.8577272355395564, "grad_norm": 0.0052415053360164165, "learning_rate": 6.83543845903134e-08, "loss": 0.0009, "step": 174650 }, { "epoch": 2.8578908614906324, "grad_norm": 0.04257628694176674, "learning_rate": 6.819760631638083e-08, "loss": 0.0008, "step": 174660 }, { "epoch": 2.8580544874417084, "grad_norm": 0.15036487579345703, "learning_rate": 6.804100680843084e-08, "loss": 0.001, "step": 174670 }, { "epoch": 2.858218113392784, "grad_norm": 0.03437655046582222, "learning_rate": 6.788458607214166e-08, "loss": 0.0006, "step": 174680 }, { "epoch": 2.85838173934386, "grad_norm": 0.06352607905864716, "learning_rate": 6.772834411318152e-08, "loss": 0.0008, "step": 174690 }, { "epoch": 2.858545365294936, "grad_norm": 0.06638634949922562, "learning_rate": 6.75722809372148e-08, "loss": 0.0005, "step": 174700 }, { "epoch": 2.8587089912460115, "grad_norm": 0.0277895238250494, "learning_rate": 6.741639654989752e-08, "loss": 0.0006, "step": 174710 }, { "epoch": 2.8588726171970875, "grad_norm": 0.06302885711193085, "learning_rate": 6.726069095688126e-08, "loss": 0.0012, "step": 174720 }, { "epoch": 2.8590362431481635, "grad_norm": 0.005250515416264534, "learning_rate": 6.71051641638093e-08, "loss": 0.0007, "step": 174730 }, { "epoch": 2.859199869099239, "grad_norm": 0.077210932970047, "learning_rate": 6.694981617631935e-08, "loss": 0.0003, "step": 174740 }, { "epoch": 2.859363495050315, "grad_norm": 0.0019042406929656863, "learning_rate": 6.679464700004246e-08, "loss": 0.0005, "step": 174750 }, { "epoch": 2.859527121001391, "grad_norm": 0.06501390039920807, "learning_rate": 6.663965664060301e-08, "loss": 0.0007, "step": 174760 }, { "epoch": 2.8596907469524666, "grad_norm": 0.03229280188679695, "learning_rate": 6.648484510361819e-08, "loss": 0.0011, "step": 174770 }, { "epoch": 2.8598543729035426, "grad_norm": 0.01788502186536789, "learning_rate": 6.633021239470127e-08, "loss": 0.0004, "step": 174780 }, { "epoch": 2.8600179988546186, "grad_norm": 0.01595594547688961, "learning_rate": 6.617575851945557e-08, "loss": 0.0005, "step": 174790 }, { "epoch": 2.860181624805694, "grad_norm": 0.06669995188713074, "learning_rate": 6.602148348348159e-08, "loss": 0.0004, "step": 174800 }, { "epoch": 2.86034525075677, "grad_norm": 0.14184415340423584, "learning_rate": 6.58673872923693e-08, "loss": 0.0007, "step": 174810 }, { "epoch": 2.8605088767078457, "grad_norm": 0.4916815757751465, "learning_rate": 6.571346995170591e-08, "loss": 0.0044, "step": 174820 }, { "epoch": 2.8606725026589217, "grad_norm": 0.03106622025370598, "learning_rate": 6.555973146706973e-08, "loss": 0.0004, "step": 174830 }, { "epoch": 2.8608361286099977, "grad_norm": 0.0013145022094249725, "learning_rate": 6.540617184403353e-08, "loss": 0.0002, "step": 174840 }, { "epoch": 2.8609997545610732, "grad_norm": 0.13275207579135895, "learning_rate": 6.525279108816396e-08, "loss": 0.0006, "step": 174850 }, { "epoch": 2.8611633805121492, "grad_norm": 0.01137633714824915, "learning_rate": 6.509958920502046e-08, "loss": 0.0004, "step": 174860 }, { "epoch": 2.861327006463225, "grad_norm": 0.04059248045086861, "learning_rate": 6.494656620015582e-08, "loss": 0.0005, "step": 174870 }, { "epoch": 2.861490632414301, "grad_norm": 0.0016524163074791431, "learning_rate": 6.479372207911672e-08, "loss": 0.0004, "step": 174880 }, { "epoch": 2.861654258365377, "grad_norm": 0.0027647637762129307, "learning_rate": 6.464105684744426e-08, "loss": 0.0007, "step": 174890 }, { "epoch": 2.8618178843164523, "grad_norm": 0.05818874016404152, "learning_rate": 6.448857051067181e-08, "loss": 0.0015, "step": 174900 }, { "epoch": 2.8619815102675283, "grad_norm": 0.012284564785659313, "learning_rate": 6.433626307432606e-08, "loss": 0.0004, "step": 174910 }, { "epoch": 2.8621451362186043, "grad_norm": 0.021488336846232414, "learning_rate": 6.418413454392813e-08, "loss": 0.0007, "step": 174920 }, { "epoch": 2.86230876216968, "grad_norm": 0.09011509269475937, "learning_rate": 6.403218492499253e-08, "loss": 0.0006, "step": 174930 }, { "epoch": 2.862472388120756, "grad_norm": 0.005234993528574705, "learning_rate": 6.388041422302759e-08, "loss": 0.0009, "step": 174940 }, { "epoch": 2.862636014071832, "grad_norm": 0.07900192588567734, "learning_rate": 6.37288224435334e-08, "loss": 0.0008, "step": 174950 }, { "epoch": 2.8627996400229074, "grad_norm": 0.02275443635880947, "learning_rate": 6.357740959200553e-08, "loss": 0.0005, "step": 174960 }, { "epoch": 2.8629632659739834, "grad_norm": 0.05092756822705269, "learning_rate": 6.342617567393184e-08, "loss": 0.0005, "step": 174970 }, { "epoch": 2.8631268919250594, "grad_norm": 0.036253590136766434, "learning_rate": 6.327512069479513e-08, "loss": 0.0008, "step": 174980 }, { "epoch": 2.863290517876135, "grad_norm": 0.017339251935482025, "learning_rate": 6.312424466006995e-08, "loss": 0.0003, "step": 174990 }, { "epoch": 2.863454143827211, "grad_norm": 0.11925626546144485, "learning_rate": 6.297354757522523e-08, "loss": 0.0003, "step": 175000 }, { "epoch": 2.863454143827211, "eval_loss": 0.0009074246045202017, "eval_runtime": 3.0981, "eval_samples_per_second": 64.556, "eval_steps_per_second": 16.139, "step": 175000 }, { "epoch": 2.863617769778287, "grad_norm": 0.05676291137933731, "learning_rate": 6.282302944572383e-08, "loss": 0.0007, "step": 175010 }, { "epoch": 2.8637813957293625, "grad_norm": 0.02615579403936863, "learning_rate": 6.267269027702084e-08, "loss": 0.0007, "step": 175020 }, { "epoch": 2.8639450216804385, "grad_norm": 0.10852567851543427, "learning_rate": 6.252253007456688e-08, "loss": 0.0005, "step": 175030 }, { "epoch": 2.8641086476315145, "grad_norm": 0.04380883648991585, "learning_rate": 6.237254884380373e-08, "loss": 0.0007, "step": 175040 }, { "epoch": 2.86427227358259, "grad_norm": 0.2027987241744995, "learning_rate": 6.222274659016925e-08, "loss": 0.0007, "step": 175050 }, { "epoch": 2.864435899533666, "grad_norm": 0.061378706246614456, "learning_rate": 6.207312331909244e-08, "loss": 0.0013, "step": 175060 }, { "epoch": 2.864599525484742, "grad_norm": 0.12418750673532486, "learning_rate": 6.192367903599617e-08, "loss": 0.001, "step": 175070 }, { "epoch": 2.8647631514358176, "grad_norm": 0.005105985328555107, "learning_rate": 6.177441374629889e-08, "loss": 0.0006, "step": 175080 }, { "epoch": 2.8649267773868936, "grad_norm": 0.007131421472877264, "learning_rate": 6.162532745541017e-08, "loss": 0.0007, "step": 175090 }, { "epoch": 2.8650904033379696, "grad_norm": 0.04102492332458496, "learning_rate": 6.147642016873401e-08, "loss": 0.0011, "step": 175100 }, { "epoch": 2.865254029289045, "grad_norm": 1.4314508438110352, "learning_rate": 6.132769189166832e-08, "loss": 0.0012, "step": 175110 }, { "epoch": 2.865417655240121, "grad_norm": 0.014503179118037224, "learning_rate": 6.117914262960433e-08, "loss": 0.0009, "step": 175120 }, { "epoch": 2.865581281191197, "grad_norm": 0.09569036960601807, "learning_rate": 6.103077238792554e-08, "loss": 0.0008, "step": 175130 }, { "epoch": 2.8657449071422727, "grad_norm": 0.03811396285891533, "learning_rate": 6.088258117201151e-08, "loss": 0.0008, "step": 175140 }, { "epoch": 2.8659085330933487, "grad_norm": 0.09336081147193909, "learning_rate": 6.073456898723296e-08, "loss": 0.0008, "step": 175150 }, { "epoch": 2.8660721590444247, "grad_norm": 0.06616807729005814, "learning_rate": 6.058673583895502e-08, "loss": 0.0014, "step": 175160 }, { "epoch": 2.8662357849955002, "grad_norm": 0.06602644920349121, "learning_rate": 6.043908173253621e-08, "loss": 0.001, "step": 175170 }, { "epoch": 2.8663994109465762, "grad_norm": 0.0740407407283783, "learning_rate": 6.029160667332834e-08, "loss": 0.0005, "step": 175180 }, { "epoch": 2.8665630368976522, "grad_norm": 0.08181922137737274, "learning_rate": 6.014431066667826e-08, "loss": 0.001, "step": 175190 }, { "epoch": 2.866726662848728, "grad_norm": 0.020462464541196823, "learning_rate": 5.99971937179239e-08, "loss": 0.0005, "step": 175200 }, { "epoch": 2.866890288799804, "grad_norm": 0.08246901631355286, "learning_rate": 5.985025583239767e-08, "loss": 0.002, "step": 175210 }, { "epoch": 2.8670539147508793, "grad_norm": 0.12530817091464996, "learning_rate": 5.970349701542755e-08, "loss": 0.0009, "step": 175220 }, { "epoch": 2.8672175407019553, "grad_norm": 0.018198858946561813, "learning_rate": 5.9556917272330925e-08, "loss": 0.0005, "step": 175230 }, { "epoch": 2.8673811666530313, "grad_norm": 0.006078992038965225, "learning_rate": 5.9410516608422455e-08, "loss": 0.0013, "step": 175240 }, { "epoch": 2.867544792604107, "grad_norm": 0.06168961152434349, "learning_rate": 5.9264295029008435e-08, "loss": 0.0008, "step": 175250 }, { "epoch": 2.867708418555183, "grad_norm": 0.028056951239705086, "learning_rate": 5.911825253938963e-08, "loss": 0.0005, "step": 175260 }, { "epoch": 2.8678720445062584, "grad_norm": 0.07744128257036209, "learning_rate": 5.897238914485792e-08, "loss": 0.0005, "step": 175270 }, { "epoch": 2.8680356704573344, "grad_norm": 0.10157930105924606, "learning_rate": 5.882670485070186e-08, "loss": 0.0007, "step": 175280 }, { "epoch": 2.8681992964084104, "grad_norm": 0.7099071741104126, "learning_rate": 5.868119966220221e-08, "loss": 0.0015, "step": 175290 }, { "epoch": 2.868362922359486, "grad_norm": 0.13857851922512054, "learning_rate": 5.8535873584633084e-08, "loss": 0.0009, "step": 175300 }, { "epoch": 2.868526548310562, "grad_norm": 0.20338502526283264, "learning_rate": 5.839072662326195e-08, "loss": 0.0009, "step": 175310 }, { "epoch": 2.868690174261638, "grad_norm": 0.10245632380247116, "learning_rate": 5.8245758783349596e-08, "loss": 0.0011, "step": 175320 }, { "epoch": 2.8688538002127135, "grad_norm": 0.010015835054218769, "learning_rate": 5.810097007015181e-08, "loss": 0.0006, "step": 175330 }, { "epoch": 2.8690174261637895, "grad_norm": 0.10068521648645401, "learning_rate": 5.795636048891606e-08, "loss": 0.0012, "step": 175340 }, { "epoch": 2.8691810521148655, "grad_norm": 0.11369694024324417, "learning_rate": 5.781193004488483e-08, "loss": 0.0014, "step": 175350 }, { "epoch": 2.869344678065941, "grad_norm": 0.08066345006227493, "learning_rate": 5.766767874329282e-08, "loss": 0.0006, "step": 175360 }, { "epoch": 2.869508304017017, "grad_norm": 0.07779251039028168, "learning_rate": 5.752360658936862e-08, "loss": 0.001, "step": 175370 }, { "epoch": 2.869671929968093, "grad_norm": 0.01802058145403862, "learning_rate": 5.737971358833472e-08, "loss": 0.0004, "step": 175380 }, { "epoch": 2.8698355559191686, "grad_norm": 0.05836491659283638, "learning_rate": 5.723599974540695e-08, "loss": 0.001, "step": 175390 }, { "epoch": 2.8699991818702446, "grad_norm": 0.016642700880765915, "learning_rate": 5.709246506579447e-08, "loss": 0.0004, "step": 175400 }, { "epoch": 2.8701628078213206, "grad_norm": 0.03695647791028023, "learning_rate": 5.6949109554700345e-08, "loss": 0.0008, "step": 175410 }, { "epoch": 2.870326433772396, "grad_norm": 0.03990011289715767, "learning_rate": 5.6805933217320975e-08, "loss": 0.0007, "step": 175420 }, { "epoch": 2.870490059723472, "grad_norm": 0.058529507368803024, "learning_rate": 5.666293605884499e-08, "loss": 0.0005, "step": 175430 }, { "epoch": 2.870653685674548, "grad_norm": 0.15740081667900085, "learning_rate": 5.652011808445712e-08, "loss": 0.0009, "step": 175440 }, { "epoch": 2.8708173116256237, "grad_norm": 0.11523908376693726, "learning_rate": 5.6377479299333793e-08, "loss": 0.0007, "step": 175450 }, { "epoch": 2.8709809375766997, "grad_norm": 0.05271280184388161, "learning_rate": 5.623501970864531e-08, "loss": 0.0007, "step": 175460 }, { "epoch": 2.8711445635277757, "grad_norm": 0.05872455984354019, "learning_rate": 5.609273931755532e-08, "loss": 0.0006, "step": 175470 }, { "epoch": 2.8713081894788512, "grad_norm": 0.13023623824119568, "learning_rate": 5.595063813122026e-08, "loss": 0.001, "step": 175480 }, { "epoch": 2.8714718154299272, "grad_norm": 0.06581814587116241, "learning_rate": 5.5808716154792666e-08, "loss": 0.0007, "step": 175490 }, { "epoch": 2.8716354413810032, "grad_norm": 0.5319797992706299, "learning_rate": 5.566697339341565e-08, "loss": 0.0007, "step": 175500 }, { "epoch": 2.871799067332079, "grad_norm": 0.047922633588314056, "learning_rate": 5.552540985222788e-08, "loss": 0.0005, "step": 175510 }, { "epoch": 2.871962693283155, "grad_norm": 0.06529483944177628, "learning_rate": 5.538402553636024e-08, "loss": 0.0007, "step": 175520 }, { "epoch": 2.872126319234231, "grad_norm": 0.1498195230960846, "learning_rate": 5.5242820450936984e-08, "loss": 0.002, "step": 175530 }, { "epoch": 2.8722899451853063, "grad_norm": 0.005223225802183151, "learning_rate": 5.51017946010779e-08, "loss": 0.0004, "step": 175540 }, { "epoch": 2.8724535711363823, "grad_norm": 0.020728344097733498, "learning_rate": 5.4960947991893334e-08, "loss": 0.0002, "step": 175550 }, { "epoch": 2.8726171970874583, "grad_norm": 0.013324741274118423, "learning_rate": 5.4820280628489765e-08, "loss": 0.0002, "step": 175560 }, { "epoch": 2.872780823038534, "grad_norm": 0.02622302435338497, "learning_rate": 5.467979251596533e-08, "loss": 0.0005, "step": 175570 }, { "epoch": 2.87294444898961, "grad_norm": 0.07516628503799438, "learning_rate": 5.453948365941264e-08, "loss": 0.0011, "step": 175580 }, { "epoch": 2.8731080749406854, "grad_norm": 0.05655301362276077, "learning_rate": 5.439935406391816e-08, "loss": 0.0004, "step": 175590 }, { "epoch": 2.8732717008917614, "grad_norm": 0.10728980600833893, "learning_rate": 5.425940373456007e-08, "loss": 0.0006, "step": 175600 }, { "epoch": 2.8734353268428374, "grad_norm": 0.1915968358516693, "learning_rate": 5.411963267641207e-08, "loss": 0.0008, "step": 175610 }, { "epoch": 2.873598952793913, "grad_norm": 0.14025180041790009, "learning_rate": 5.398004089453957e-08, "loss": 0.0006, "step": 175620 }, { "epoch": 2.873762578744989, "grad_norm": 0.002755478722974658, "learning_rate": 5.384062839400406e-08, "loss": 0.0008, "step": 175630 }, { "epoch": 2.8739262046960645, "grad_norm": 0.0776972845196724, "learning_rate": 5.3701395179857615e-08, "loss": 0.001, "step": 175640 }, { "epoch": 2.8740898306471405, "grad_norm": 0.05088486149907112, "learning_rate": 5.3562341257147324e-08, "loss": 0.0008, "step": 175650 }, { "epoch": 2.8742534565982165, "grad_norm": 0.2233416885137558, "learning_rate": 5.3423466630914136e-08, "loss": 0.0005, "step": 175660 }, { "epoch": 2.874417082549292, "grad_norm": 0.038037072867155075, "learning_rate": 5.3284771306191254e-08, "loss": 0.0009, "step": 175670 }, { "epoch": 2.874580708500368, "grad_norm": 0.02905154787003994, "learning_rate": 5.314625528800632e-08, "loss": 0.0004, "step": 175680 }, { "epoch": 2.874744334451444, "grad_norm": 0.027582917362451553, "learning_rate": 5.300791858137977e-08, "loss": 0.0011, "step": 175690 }, { "epoch": 2.8749079604025196, "grad_norm": 0.0049156672321259975, "learning_rate": 5.286976119132703e-08, "loss": 0.0005, "step": 175700 }, { "epoch": 2.8750715863535956, "grad_norm": 0.06905953586101532, "learning_rate": 5.2731783122855206e-08, "loss": 0.0006, "step": 175710 }, { "epoch": 2.8752352123046716, "grad_norm": 0.032916389405727386, "learning_rate": 5.25939843809653e-08, "loss": 0.0003, "step": 175720 }, { "epoch": 2.875398838255747, "grad_norm": 0.06201181933283806, "learning_rate": 5.2456364970653317e-08, "loss": 0.0006, "step": 175730 }, { "epoch": 2.875562464206823, "grad_norm": 0.21089965105056763, "learning_rate": 5.2318924896906375e-08, "loss": 0.0012, "step": 175740 }, { "epoch": 2.875726090157899, "grad_norm": 0.03820250183343887, "learning_rate": 5.218166416470771e-08, "loss": 0.0008, "step": 175750 }, { "epoch": 2.8758897161089747, "grad_norm": 0.028971681371331215, "learning_rate": 5.204458277903168e-08, "loss": 0.0005, "step": 175760 }, { "epoch": 2.8760533420600507, "grad_norm": 0.07568809390068054, "learning_rate": 5.1907680744847086e-08, "loss": 0.0007, "step": 175770 }, { "epoch": 2.8762169680111267, "grad_norm": 0.28838589787483215, "learning_rate": 5.1770958067117184e-08, "loss": 0.0011, "step": 175780 }, { "epoch": 2.8763805939622022, "grad_norm": 0.07241639494895935, "learning_rate": 5.163441475079634e-08, "loss": 0.0005, "step": 175790 }, { "epoch": 2.8765442199132782, "grad_norm": 0.06779029220342636, "learning_rate": 5.1498050800835583e-08, "loss": 0.0009, "step": 175800 }, { "epoch": 2.8767078458643542, "grad_norm": 0.08997996151447296, "learning_rate": 5.136186622217709e-08, "loss": 0.0004, "step": 175810 }, { "epoch": 2.87687147181543, "grad_norm": 0.07818464189767838, "learning_rate": 5.1225861019757464e-08, "loss": 0.0007, "step": 175820 }, { "epoch": 2.877035097766506, "grad_norm": 0.08132357150316238, "learning_rate": 5.109003519850553e-08, "loss": 0.001, "step": 175830 }, { "epoch": 2.877198723717582, "grad_norm": 0.2596634328365326, "learning_rate": 5.095438876334624e-08, "loss": 0.0009, "step": 175840 }, { "epoch": 2.8773623496686573, "grad_norm": 0.07025530189275742, "learning_rate": 5.081892171919511e-08, "loss": 0.0006, "step": 175850 }, { "epoch": 2.8775259756197333, "grad_norm": 0.01663915440440178, "learning_rate": 5.068363407096322e-08, "loss": 0.0005, "step": 175860 }, { "epoch": 2.8776896015708093, "grad_norm": 0.04181954264640808, "learning_rate": 5.054852582355441e-08, "loss": 0.0011, "step": 175870 }, { "epoch": 2.877853227521885, "grad_norm": 0.0700729638338089, "learning_rate": 5.041359698186532e-08, "loss": 0.0005, "step": 175880 }, { "epoch": 2.878016853472961, "grad_norm": 0.11738396435976028, "learning_rate": 5.0278847550787605e-08, "loss": 0.0003, "step": 175890 }, { "epoch": 2.878180479424037, "grad_norm": 0.14627595245838165, "learning_rate": 5.014427753520512e-08, "loss": 0.0013, "step": 175900 }, { "epoch": 2.8783441053751124, "grad_norm": 0.05835794284939766, "learning_rate": 5.00098869399962e-08, "loss": 0.0005, "step": 175910 }, { "epoch": 2.8785077313261884, "grad_norm": 0.03929300233721733, "learning_rate": 4.9875675770031937e-08, "loss": 0.0008, "step": 175920 }, { "epoch": 2.8786713572772644, "grad_norm": 0.13615332543849945, "learning_rate": 4.974164403017678e-08, "loss": 0.0007, "step": 175930 }, { "epoch": 2.87883498322834, "grad_norm": 0.05815568193793297, "learning_rate": 4.960779172528962e-08, "loss": 0.0006, "step": 175940 }, { "epoch": 2.878998609179416, "grad_norm": 0.08804910629987717, "learning_rate": 4.947411886022213e-08, "loss": 0.0005, "step": 175950 }, { "epoch": 2.879162235130492, "grad_norm": 0.005042679142206907, "learning_rate": 4.934062543981988e-08, "loss": 0.0009, "step": 175960 }, { "epoch": 2.8793258610815675, "grad_norm": 0.06677313894033432, "learning_rate": 4.920731146892066e-08, "loss": 0.0005, "step": 175970 }, { "epoch": 2.8794894870326435, "grad_norm": 0.16244550049304962, "learning_rate": 4.907417695235783e-08, "loss": 0.001, "step": 175980 }, { "epoch": 2.879653112983719, "grad_norm": 0.11529611051082611, "learning_rate": 4.894122189495698e-08, "loss": 0.0011, "step": 175990 }, { "epoch": 2.879816738934795, "grad_norm": 0.027447983622550964, "learning_rate": 4.880844630153647e-08, "loss": 0.0007, "step": 176000 }, { "epoch": 2.879980364885871, "grad_norm": 0.07845830172300339, "learning_rate": 4.867585017691079e-08, "loss": 0.0007, "step": 176010 }, { "epoch": 2.8801439908369466, "grad_norm": 0.03748183697462082, "learning_rate": 4.8543433525884974e-08, "loss": 0.0004, "step": 176020 }, { "epoch": 2.8803076167880226, "grad_norm": 0.26329025626182556, "learning_rate": 4.8411196353259636e-08, "loss": 0.0012, "step": 176030 }, { "epoch": 2.880471242739098, "grad_norm": 0.07574775069952011, "learning_rate": 4.827913866382705e-08, "loss": 0.0015, "step": 176040 }, { "epoch": 2.880634868690174, "grad_norm": 0.04075975716114044, "learning_rate": 4.8147260462375054e-08, "loss": 0.0004, "step": 176050 }, { "epoch": 2.88079849464125, "grad_norm": 0.00475145922973752, "learning_rate": 4.801556175368316e-08, "loss": 0.0007, "step": 176060 }, { "epoch": 2.8809621205923257, "grad_norm": 0.04693582281470299, "learning_rate": 4.788404254252477e-08, "loss": 0.0005, "step": 176070 }, { "epoch": 2.8811257465434017, "grad_norm": 0.11720842868089676, "learning_rate": 4.775270283366884e-08, "loss": 0.001, "step": 176080 }, { "epoch": 2.8812893724944777, "grad_norm": 0.1877339780330658, "learning_rate": 4.762154263187435e-08, "loss": 0.0006, "step": 176090 }, { "epoch": 2.8814529984455532, "grad_norm": 0.059261370450258255, "learning_rate": 4.749056194189638e-08, "loss": 0.0006, "step": 176100 }, { "epoch": 2.8816166243966292, "grad_norm": 0.05121181905269623, "learning_rate": 4.735976076848225e-08, "loss": 0.0016, "step": 176110 }, { "epoch": 2.8817802503477052, "grad_norm": 0.09191296249628067, "learning_rate": 4.7229139116373725e-08, "loss": 0.0004, "step": 176120 }, { "epoch": 2.881943876298781, "grad_norm": 0.15236833691596985, "learning_rate": 4.7098696990304785e-08, "loss": 0.0004, "step": 176130 }, { "epoch": 2.882107502249857, "grad_norm": 0.046635061502456665, "learning_rate": 4.6968434395004425e-08, "loss": 0.0009, "step": 176140 }, { "epoch": 2.882271128200933, "grad_norm": 0.08995907008647919, "learning_rate": 4.6838351335193875e-08, "loss": 0.0008, "step": 176150 }, { "epoch": 2.8824347541520083, "grad_norm": 0.22900702059268951, "learning_rate": 4.670844781558881e-08, "loss": 0.0008, "step": 176160 }, { "epoch": 2.8825983801030843, "grad_norm": 0.07879273593425751, "learning_rate": 4.657872384089768e-08, "loss": 0.0008, "step": 176170 }, { "epoch": 2.8827620060541603, "grad_norm": 0.06937658786773682, "learning_rate": 4.644917941582227e-08, "loss": 0.0007, "step": 176180 }, { "epoch": 2.882925632005236, "grad_norm": 0.05095773935317993, "learning_rate": 4.6319814545058847e-08, "loss": 0.0004, "step": 176190 }, { "epoch": 2.883089257956312, "grad_norm": 0.009493554942309856, "learning_rate": 4.6190629233295867e-08, "loss": 0.0013, "step": 176200 }, { "epoch": 2.883252883907388, "grad_norm": 0.06941714882850647, "learning_rate": 4.6061623485216254e-08, "loss": 0.0003, "step": 176210 }, { "epoch": 2.8834165098584634, "grad_norm": 0.008341421373188496, "learning_rate": 4.593279730549682e-08, "loss": 0.0006, "step": 176220 }, { "epoch": 2.8835801358095394, "grad_norm": 0.08923845738172531, "learning_rate": 4.580415069880606e-08, "loss": 0.001, "step": 176230 }, { "epoch": 2.8837437617606154, "grad_norm": 0.03379319980740547, "learning_rate": 4.567568366980857e-08, "loss": 0.0005, "step": 176240 }, { "epoch": 2.883907387711691, "grad_norm": 0.06666112691164017, "learning_rate": 4.554739622315951e-08, "loss": 0.0009, "step": 176250 }, { "epoch": 2.884071013662767, "grad_norm": 0.32875609397888184, "learning_rate": 4.5419288363510175e-08, "loss": 0.0007, "step": 176260 }, { "epoch": 2.884234639613843, "grad_norm": 0.07230079919099808, "learning_rate": 4.52913600955035e-08, "loss": 0.0008, "step": 176270 }, { "epoch": 2.8843982655649185, "grad_norm": 0.14306780695915222, "learning_rate": 4.5163611423776346e-08, "loss": 0.0013, "step": 176280 }, { "epoch": 2.8845618915159945, "grad_norm": 0.09511516243219376, "learning_rate": 4.503604235296e-08, "loss": 0.0006, "step": 176290 }, { "epoch": 2.8847255174670705, "grad_norm": 0.04371592029929161, "learning_rate": 4.490865288767743e-08, "loss": 0.0008, "step": 176300 }, { "epoch": 2.884889143418146, "grad_norm": 0.2356683611869812, "learning_rate": 4.4781443032547725e-08, "loss": 0.0007, "step": 176310 }, { "epoch": 2.885052769369222, "grad_norm": 0.2509373724460602, "learning_rate": 4.465441279218108e-08, "loss": 0.0007, "step": 176320 }, { "epoch": 2.885216395320298, "grad_norm": 0.02567668817937374, "learning_rate": 4.452756217118215e-08, "loss": 0.0003, "step": 176330 }, { "epoch": 2.8853800212713736, "grad_norm": 0.0780859887599945, "learning_rate": 4.440089117414836e-08, "loss": 0.0005, "step": 176340 }, { "epoch": 2.8855436472224496, "grad_norm": 0.008311559446156025, "learning_rate": 4.4274399805672164e-08, "loss": 0.0005, "step": 176350 }, { "epoch": 2.885707273173525, "grad_norm": 0.09923581779003143, "learning_rate": 4.414808807033821e-08, "loss": 0.0013, "step": 176360 }, { "epoch": 2.885870899124601, "grad_norm": 0.06938090175390244, "learning_rate": 4.402195597272507e-08, "loss": 0.0004, "step": 176370 }, { "epoch": 2.886034525075677, "grad_norm": 0.06059519946575165, "learning_rate": 4.389600351740464e-08, "loss": 0.0004, "step": 176380 }, { "epoch": 2.8861981510267527, "grad_norm": 0.05628759786486626, "learning_rate": 4.377023070894215e-08, "loss": 0.0005, "step": 176390 }, { "epoch": 2.8863617769778287, "grad_norm": 0.043270789086818695, "learning_rate": 4.36446375518973e-08, "loss": 0.0008, "step": 176400 }, { "epoch": 2.8865254029289042, "grad_norm": 0.05593279376626015, "learning_rate": 4.351922405082143e-08, "loss": 0.0004, "step": 176410 }, { "epoch": 2.8866890288799802, "grad_norm": 0.015211150981485844, "learning_rate": 4.339399021026147e-08, "loss": 0.0011, "step": 176420 }, { "epoch": 2.8868526548310562, "grad_norm": 0.12670786678791046, "learning_rate": 4.326893603475657e-08, "loss": 0.0009, "step": 176430 }, { "epoch": 2.887016280782132, "grad_norm": 0.07527585327625275, "learning_rate": 4.314406152883921e-08, "loss": 0.0005, "step": 176440 }, { "epoch": 2.887179906733208, "grad_norm": 0.013026727363467216, "learning_rate": 4.3019366697036323e-08, "loss": 0.0008, "step": 176450 }, { "epoch": 2.887343532684284, "grad_norm": 0.17186667025089264, "learning_rate": 4.289485154386763e-08, "loss": 0.0007, "step": 176460 }, { "epoch": 2.8875071586353593, "grad_norm": 0.11464191228151321, "learning_rate": 4.2770516073846745e-08, "loss": 0.0009, "step": 176470 }, { "epoch": 2.8876707845864353, "grad_norm": 0.1785428375005722, "learning_rate": 4.264636029148006e-08, "loss": 0.001, "step": 176480 }, { "epoch": 2.8878344105375113, "grad_norm": 0.020543817430734634, "learning_rate": 4.2522384201267865e-08, "loss": 0.0007, "step": 176490 }, { "epoch": 2.887998036488587, "grad_norm": 0.10109040886163712, "learning_rate": 4.239858780770434e-08, "loss": 0.0005, "step": 176500 }, { "epoch": 2.888161662439663, "grad_norm": 0.1169227659702301, "learning_rate": 4.227497111527645e-08, "loss": 0.0007, "step": 176510 }, { "epoch": 2.888325288390739, "grad_norm": 0.1300945281982422, "learning_rate": 4.2151534128466175e-08, "loss": 0.001, "step": 176520 }, { "epoch": 2.8884889143418144, "grad_norm": 0.09155488014221191, "learning_rate": 4.202827685174604e-08, "loss": 0.0007, "step": 176530 }, { "epoch": 2.8886525402928904, "grad_norm": 0.13801400363445282, "learning_rate": 4.190519928958525e-08, "loss": 0.0005, "step": 176540 }, { "epoch": 2.8888161662439664, "grad_norm": 0.053257450461387634, "learning_rate": 4.178230144644413e-08, "loss": 0.0006, "step": 176550 }, { "epoch": 2.888979792195042, "grad_norm": 0.018202928826212883, "learning_rate": 4.1659583326778e-08, "loss": 0.0006, "step": 176560 }, { "epoch": 2.889143418146118, "grad_norm": 0.026498859748244286, "learning_rate": 4.153704493503496e-08, "loss": 0.0014, "step": 176570 }, { "epoch": 2.889307044097194, "grad_norm": 0.17384694516658783, "learning_rate": 4.141468627565648e-08, "loss": 0.0007, "step": 176580 }, { "epoch": 2.8894706700482695, "grad_norm": 0.019504738971590996, "learning_rate": 4.1292507353078434e-08, "loss": 0.0011, "step": 176590 }, { "epoch": 2.8896342959993455, "grad_norm": 0.0477384589612484, "learning_rate": 4.1170508171728407e-08, "loss": 0.0005, "step": 176600 }, { "epoch": 2.8897979219504215, "grad_norm": 0.13389673829078674, "learning_rate": 4.104868873603007e-08, "loss": 0.0008, "step": 176610 }, { "epoch": 2.889961547901497, "grad_norm": 0.10623396933078766, "learning_rate": 4.0927049050397685e-08, "loss": 0.0005, "step": 176620 }, { "epoch": 2.890125173852573, "grad_norm": 0.06848946958780289, "learning_rate": 4.080558911924104e-08, "loss": 0.0005, "step": 176630 }, { "epoch": 2.890288799803649, "grad_norm": 0.03842702507972717, "learning_rate": 4.068430894696218e-08, "loss": 0.0006, "step": 176640 }, { "epoch": 2.8904524257547246, "grad_norm": 0.024883877485990524, "learning_rate": 4.0563208537958146e-08, "loss": 0.0006, "step": 176650 }, { "epoch": 2.8906160517058006, "grad_norm": 0.18561606109142303, "learning_rate": 4.044228789661819e-08, "loss": 0.0008, "step": 176660 }, { "epoch": 2.8907796776568766, "grad_norm": 0.09274275600910187, "learning_rate": 4.0321547027325495e-08, "loss": 0.0005, "step": 176670 }, { "epoch": 2.890943303607952, "grad_norm": 0.11740662902593613, "learning_rate": 4.020098593445654e-08, "loss": 0.0007, "step": 176680 }, { "epoch": 2.891106929559028, "grad_norm": 0.013774445280432701, "learning_rate": 4.008060462238061e-08, "loss": 0.0009, "step": 176690 }, { "epoch": 2.891270555510104, "grad_norm": 0.03898048773407936, "learning_rate": 3.9960403095462567e-08, "loss": 0.0005, "step": 176700 }, { "epoch": 2.8914341814611797, "grad_norm": 0.0460006482899189, "learning_rate": 3.984038135805835e-08, "loss": 0.0006, "step": 176710 }, { "epoch": 2.8915978074122557, "grad_norm": 0.12517037987709045, "learning_rate": 3.972053941451892e-08, "loss": 0.0003, "step": 176720 }, { "epoch": 2.8917614333633317, "grad_norm": 0.0437329038977623, "learning_rate": 3.9600877269188595e-08, "loss": 0.0005, "step": 176730 }, { "epoch": 2.8919250593144072, "grad_norm": 0.144351065158844, "learning_rate": 3.9481394926403906e-08, "loss": 0.0008, "step": 176740 }, { "epoch": 2.8920886852654832, "grad_norm": 0.14014112949371338, "learning_rate": 3.9362092390496934e-08, "loss": 0.0013, "step": 176750 }, { "epoch": 2.892252311216559, "grad_norm": 0.025197744369506836, "learning_rate": 3.924296966579089e-08, "loss": 0.001, "step": 176760 }, { "epoch": 2.892415937167635, "grad_norm": 0.03783867135643959, "learning_rate": 3.912402675660454e-08, "loss": 0.0011, "step": 176770 }, { "epoch": 2.892579563118711, "grad_norm": 0.10282964259386063, "learning_rate": 3.9005263667249435e-08, "loss": 0.0006, "step": 176780 }, { "epoch": 2.8927431890697863, "grad_norm": 0.023211972787976265, "learning_rate": 3.888668040202992e-08, "loss": 0.0005, "step": 176790 }, { "epoch": 2.8929068150208623, "grad_norm": 0.057641055434942245, "learning_rate": 3.876827696524421e-08, "loss": 0.0004, "step": 176800 }, { "epoch": 2.893070440971938, "grad_norm": 0.08486399054527283, "learning_rate": 3.865005336118499e-08, "loss": 0.0008, "step": 176810 }, { "epoch": 2.893234066923014, "grad_norm": 0.031157856807112694, "learning_rate": 3.853200959413661e-08, "loss": 0.0005, "step": 176820 }, { "epoch": 2.89339769287409, "grad_norm": 0.005268704146146774, "learning_rate": 3.8414145668378425e-08, "loss": 0.0004, "step": 176830 }, { "epoch": 2.8935613188251654, "grad_norm": 0.16689620912075043, "learning_rate": 3.829646158818256e-08, "loss": 0.0008, "step": 176840 }, { "epoch": 2.8937249447762414, "grad_norm": 0.13241204619407654, "learning_rate": 3.817895735781507e-08, "loss": 0.0005, "step": 176850 }, { "epoch": 2.8938885707273174, "grad_norm": 0.15282315015792847, "learning_rate": 3.8061632981534204e-08, "loss": 0.0008, "step": 176860 }, { "epoch": 2.894052196678393, "grad_norm": 0.2884654402732849, "learning_rate": 3.794448846359433e-08, "loss": 0.0017, "step": 176870 }, { "epoch": 2.894215822629469, "grad_norm": 0.03419988229870796, "learning_rate": 3.7827523808240396e-08, "loss": 0.0006, "step": 176880 }, { "epoch": 2.894379448580545, "grad_norm": 0.05453098192811012, "learning_rate": 3.77107390197129e-08, "loss": 0.0022, "step": 176890 }, { "epoch": 2.8945430745316205, "grad_norm": 0.002680734498426318, "learning_rate": 3.759413410224399e-08, "loss": 0.0004, "step": 176900 }, { "epoch": 2.8947067004826965, "grad_norm": 0.048047974705696106, "learning_rate": 3.747770906006087e-08, "loss": 0.0004, "step": 176910 }, { "epoch": 2.8948703264337725, "grad_norm": 0.005502645391970873, "learning_rate": 3.7361463897384045e-08, "loss": 0.0005, "step": 176920 }, { "epoch": 2.895033952384848, "grad_norm": 0.006893428508192301, "learning_rate": 3.724539861842624e-08, "loss": 0.0006, "step": 176930 }, { "epoch": 2.895197578335924, "grad_norm": 0.01869441196322441, "learning_rate": 3.712951322739578e-08, "loss": 0.0012, "step": 176940 }, { "epoch": 2.895361204287, "grad_norm": 0.04699801653623581, "learning_rate": 3.7013807728492054e-08, "loss": 0.0007, "step": 176950 }, { "epoch": 2.8955248302380756, "grad_norm": 0.04201870411634445, "learning_rate": 3.689828212590951e-08, "loss": 0.0005, "step": 176960 }, { "epoch": 2.8956884561891516, "grad_norm": 0.047326136380434036, "learning_rate": 3.67829364238359e-08, "loss": 0.0006, "step": 176970 }, { "epoch": 2.8958520821402276, "grad_norm": 0.02309175580739975, "learning_rate": 3.666777062645233e-08, "loss": 0.0008, "step": 176980 }, { "epoch": 2.896015708091303, "grad_norm": 0.06949126720428467, "learning_rate": 3.655278473793267e-08, "loss": 0.0007, "step": 176990 }, { "epoch": 2.896179334042379, "grad_norm": 0.1710657924413681, "learning_rate": 3.643797876244526e-08, "loss": 0.0009, "step": 177000 }, { "epoch": 2.896342959993455, "grad_norm": 0.1285581886768341, "learning_rate": 3.632335270415177e-08, "loss": 0.0006, "step": 177010 }, { "epoch": 2.8965065859445307, "grad_norm": 0.0462639145553112, "learning_rate": 3.620890656720666e-08, "loss": 0.0007, "step": 177020 }, { "epoch": 2.8966702118956067, "grad_norm": 0.06266423314809799, "learning_rate": 3.609464035575827e-08, "loss": 0.0006, "step": 177030 }, { "epoch": 2.8968338378466827, "grad_norm": 0.2848026752471924, "learning_rate": 3.598055407394885e-08, "loss": 0.0007, "step": 177040 }, { "epoch": 2.8969974637977582, "grad_norm": 0.12045657634735107, "learning_rate": 3.5866647725913975e-08, "loss": 0.0005, "step": 177050 }, { "epoch": 2.8971610897488342, "grad_norm": 0.06083511933684349, "learning_rate": 3.575292131578201e-08, "loss": 0.0008, "step": 177060 }, { "epoch": 2.8973247156999102, "grad_norm": 0.14872334897518158, "learning_rate": 3.563937484767521e-08, "loss": 0.0012, "step": 177070 }, { "epoch": 2.897488341650986, "grad_norm": 0.05737492814660072, "learning_rate": 3.552600832570974e-08, "loss": 0.0006, "step": 177080 }, { "epoch": 2.897651967602062, "grad_norm": 0.12255825847387314, "learning_rate": 3.541282175399452e-08, "loss": 0.0012, "step": 177090 }, { "epoch": 2.897815593553138, "grad_norm": 0.0018714523175731301, "learning_rate": 3.529981513663294e-08, "loss": 0.0005, "step": 177100 }, { "epoch": 2.8979792195042133, "grad_norm": 0.0896361842751503, "learning_rate": 3.518698847772006e-08, "loss": 0.0006, "step": 177110 }, { "epoch": 2.8981428454552893, "grad_norm": 0.017417022958397865, "learning_rate": 3.50743417813465e-08, "loss": 0.0007, "step": 177120 }, { "epoch": 2.8983064714063653, "grad_norm": 0.05408822372555733, "learning_rate": 3.4961875051595095e-08, "loss": 0.001, "step": 177130 }, { "epoch": 2.898470097357441, "grad_norm": 0.11233150213956833, "learning_rate": 3.4849588292542593e-08, "loss": 0.001, "step": 177140 }, { "epoch": 2.898633723308517, "grad_norm": 0.0716085135936737, "learning_rate": 3.473748150825906e-08, "loss": 0.0007, "step": 177150 }, { "epoch": 2.8987973492595924, "grad_norm": 0.0025160168297588825, "learning_rate": 3.462555470280793e-08, "loss": 0.0005, "step": 177160 }, { "epoch": 2.8989609752106684, "grad_norm": 0.010875099338591099, "learning_rate": 3.451380788024705e-08, "loss": 0.0005, "step": 177170 }, { "epoch": 2.8991246011617444, "grad_norm": 0.09221598505973816, "learning_rate": 3.440224104462597e-08, "loss": 0.0003, "step": 177180 }, { "epoch": 2.89928822711282, "grad_norm": 0.1053747832775116, "learning_rate": 3.429085419998923e-08, "loss": 0.0007, "step": 177190 }, { "epoch": 2.899451853063896, "grad_norm": 0.06489173322916031, "learning_rate": 3.417964735037471e-08, "loss": 0.001, "step": 177200 }, { "epoch": 2.8996154790149715, "grad_norm": 0.07743917405605316, "learning_rate": 3.4068620499811964e-08, "loss": 0.001, "step": 177210 }, { "epoch": 2.8997791049660475, "grad_norm": 0.0979137271642685, "learning_rate": 3.395777365232722e-08, "loss": 0.0006, "step": 177220 }, { "epoch": 2.8999427309171235, "grad_norm": 0.11220269650220871, "learning_rate": 3.384710681193726e-08, "loss": 0.0008, "step": 177230 }, { "epoch": 2.900106356868199, "grad_norm": 0.06027612462639809, "learning_rate": 3.3736619982653875e-08, "loss": 0.0005, "step": 177240 }, { "epoch": 2.900269982819275, "grad_norm": 0.06976976245641708, "learning_rate": 3.3626313168482195e-08, "loss": 0.0005, "step": 177250 }, { "epoch": 2.900433608770351, "grad_norm": 0.1064118966460228, "learning_rate": 3.351618637342013e-08, "loss": 0.0006, "step": 177260 }, { "epoch": 2.9005972347214266, "grad_norm": 0.05113185942173004, "learning_rate": 3.34062396014595e-08, "loss": 0.0008, "step": 177270 }, { "epoch": 2.9007608606725026, "grad_norm": 0.04026055335998535, "learning_rate": 3.329647285658599e-08, "loss": 0.0003, "step": 177280 }, { "epoch": 2.9009244866235786, "grad_norm": 0.17570102214813232, "learning_rate": 3.31868861427781e-08, "loss": 0.0006, "step": 177290 }, { "epoch": 2.901088112574654, "grad_norm": 0.07997167855501175, "learning_rate": 3.307747946400819e-08, "loss": 0.0011, "step": 177300 }, { "epoch": 2.90125173852573, "grad_norm": 0.031603582203388214, "learning_rate": 3.2968252824241986e-08, "loss": 0.0006, "step": 177310 }, { "epoch": 2.901415364476806, "grad_norm": 0.018800415098667145, "learning_rate": 3.2859206227438547e-08, "loss": 0.0004, "step": 177320 }, { "epoch": 2.9015789904278817, "grad_norm": 0.06749694049358368, "learning_rate": 3.275033967755081e-08, "loss": 0.0006, "step": 177330 }, { "epoch": 2.9017426163789577, "grad_norm": 0.15386687219142914, "learning_rate": 3.264165317852508e-08, "loss": 0.0037, "step": 177340 }, { "epoch": 2.9019062423300337, "grad_norm": 0.05814754217863083, "learning_rate": 3.253314673429986e-08, "loss": 0.0008, "step": 177350 }, { "epoch": 2.9020698682811092, "grad_norm": 0.030864736065268517, "learning_rate": 3.2424820348809785e-08, "loss": 0.0007, "step": 177360 }, { "epoch": 2.9022334942321852, "grad_norm": 0.10367145389318466, "learning_rate": 3.23166740259806e-08, "loss": 0.001, "step": 177370 }, { "epoch": 2.9023971201832612, "grad_norm": 0.07243552803993225, "learning_rate": 3.2208707769732505e-08, "loss": 0.0007, "step": 177380 }, { "epoch": 2.902560746134337, "grad_norm": 0.012534837238490582, "learning_rate": 3.210092158397904e-08, "loss": 0.0008, "step": 177390 }, { "epoch": 2.902724372085413, "grad_norm": 0.06368628889322281, "learning_rate": 3.199331547262707e-08, "loss": 0.0008, "step": 177400 }, { "epoch": 2.902887998036489, "grad_norm": 0.010164530947804451, "learning_rate": 3.188588943957682e-08, "loss": 0.001, "step": 177410 }, { "epoch": 2.9030516239875643, "grad_norm": 0.035111453384160995, "learning_rate": 3.177864348872239e-08, "loss": 0.0006, "step": 177420 }, { "epoch": 2.9032152499386403, "grad_norm": 0.03125438094139099, "learning_rate": 3.167157762395179e-08, "loss": 0.0007, "step": 177430 }, { "epoch": 2.9033788758897163, "grad_norm": 0.04680990055203438, "learning_rate": 3.156469184914523e-08, "loss": 0.0005, "step": 177440 }, { "epoch": 2.903542501840792, "grad_norm": 0.06495669484138489, "learning_rate": 3.145798616817741e-08, "loss": 0.0008, "step": 177450 }, { "epoch": 2.903706127791868, "grad_norm": 0.003417240222916007, "learning_rate": 3.1351460584915784e-08, "loss": 0.0003, "step": 177460 }, { "epoch": 2.903869753742944, "grad_norm": 0.03974118083715439, "learning_rate": 3.1245115103222254e-08, "loss": 0.0008, "step": 177470 }, { "epoch": 2.9040333796940194, "grad_norm": 0.0995679646730423, "learning_rate": 3.113894972695097e-08, "loss": 0.0008, "step": 177480 }, { "epoch": 2.9041970056450954, "grad_norm": 0.2517743706703186, "learning_rate": 3.103296445995052e-08, "loss": 0.0008, "step": 177490 }, { "epoch": 2.9043606315961714, "grad_norm": 0.01019982434809208, "learning_rate": 3.092715930606227e-08, "loss": 0.0003, "step": 177500 }, { "epoch": 2.904524257547247, "grad_norm": 0.020068984478712082, "learning_rate": 3.082153426912204e-08, "loss": 0.0007, "step": 177510 }, { "epoch": 2.904687883498323, "grad_norm": 0.08207608014345169, "learning_rate": 3.071608935295789e-08, "loss": 0.0004, "step": 177520 }, { "epoch": 2.9048515094493985, "grad_norm": 0.08420080691576004, "learning_rate": 3.061082456139175e-08, "loss": 0.001, "step": 177530 }, { "epoch": 2.9050151354004745, "grad_norm": 0.07505779713392258, "learning_rate": 3.050573989824057e-08, "loss": 0.0004, "step": 177540 }, { "epoch": 2.9051787613515505, "grad_norm": 0.09097757190465927, "learning_rate": 3.040083536731131e-08, "loss": 0.0005, "step": 177550 }, { "epoch": 2.905342387302626, "grad_norm": 0.08892517536878586, "learning_rate": 3.02961109724087e-08, "loss": 0.0007, "step": 177560 }, { "epoch": 2.905506013253702, "grad_norm": 0.041635241359472275, "learning_rate": 3.019156671732693e-08, "loss": 0.0004, "step": 177570 }, { "epoch": 2.9056696392047776, "grad_norm": 0.012195191346108913, "learning_rate": 3.008720260585629e-08, "loss": 0.0006, "step": 177580 }, { "epoch": 2.9058332651558536, "grad_norm": 0.026278575882315636, "learning_rate": 2.9983018641780437e-08, "loss": 0.0004, "step": 177590 }, { "epoch": 2.9059968911069296, "grad_norm": 0.13457244634628296, "learning_rate": 2.987901482887412e-08, "loss": 0.0033, "step": 177600 }, { "epoch": 2.906160517058005, "grad_norm": 0.04808173328638077, "learning_rate": 2.9775191170908768e-08, "loss": 0.0005, "step": 177610 }, { "epoch": 2.906324143009081, "grad_norm": 0.08267733454704285, "learning_rate": 2.9671547671646928e-08, "loss": 0.0007, "step": 177620 }, { "epoch": 2.906487768960157, "grad_norm": 0.01851843111217022, "learning_rate": 2.956808433484504e-08, "loss": 0.0004, "step": 177630 }, { "epoch": 2.9066513949112327, "grad_norm": 0.06739016622304916, "learning_rate": 2.946480116425454e-08, "loss": 0.0011, "step": 177640 }, { "epoch": 2.9068150208623087, "grad_norm": 0.023466920480132103, "learning_rate": 2.9361698163618557e-08, "loss": 0.0006, "step": 177650 }, { "epoch": 2.9069786468133847, "grad_norm": 0.15963415801525116, "learning_rate": 2.925877533667465e-08, "loss": 0.0012, "step": 177660 }, { "epoch": 2.9071422727644602, "grad_norm": 0.09810908883810043, "learning_rate": 2.915603268715317e-08, "loss": 0.0008, "step": 177670 }, { "epoch": 2.9073058987155362, "grad_norm": 0.11275790631771088, "learning_rate": 2.9053470218778356e-08, "loss": 0.0008, "step": 177680 }, { "epoch": 2.9074695246666122, "grad_norm": 0.0028494063299149275, "learning_rate": 2.8951087935267797e-08, "loss": 0.0006, "step": 177690 }, { "epoch": 2.907633150617688, "grad_norm": 0.0122525654733181, "learning_rate": 2.884888584033241e-08, "loss": 0.0006, "step": 177700 }, { "epoch": 2.907796776568764, "grad_norm": 0.020344428718090057, "learning_rate": 2.874686393767756e-08, "loss": 0.0008, "step": 177710 }, { "epoch": 2.90796040251984, "grad_norm": 0.007619696203619242, "learning_rate": 2.8645022231000853e-08, "loss": 0.0007, "step": 177720 }, { "epoch": 2.9081240284709153, "grad_norm": 0.015370062552392483, "learning_rate": 2.8543360723993218e-08, "loss": 0.0016, "step": 177730 }, { "epoch": 2.9082876544219913, "grad_norm": 0.031640708446502686, "learning_rate": 2.8441879420340602e-08, "loss": 0.0006, "step": 177740 }, { "epoch": 2.9084512803730673, "grad_norm": 0.08246639370918274, "learning_rate": 2.834057832372117e-08, "loss": 0.0012, "step": 177750 }, { "epoch": 2.908614906324143, "grad_norm": 0.06570059806108475, "learning_rate": 2.8239457437805872e-08, "loss": 0.0005, "step": 177760 }, { "epoch": 2.908778532275219, "grad_norm": 0.034638144075870514, "learning_rate": 2.813851676626178e-08, "loss": 0.0004, "step": 177770 }, { "epoch": 2.908942158226295, "grad_norm": 0.11941083520650864, "learning_rate": 2.8037756312746522e-08, "loss": 0.001, "step": 177780 }, { "epoch": 2.9091057841773704, "grad_norm": 0.11816394329071045, "learning_rate": 2.793717608091273e-08, "loss": 0.0006, "step": 177790 }, { "epoch": 2.9092694101284464, "grad_norm": 0.003514394862577319, "learning_rate": 2.7836776074406935e-08, "loss": 0.0006, "step": 177800 }, { "epoch": 2.9094330360795224, "grad_norm": 0.005191583652049303, "learning_rate": 2.773655629686678e-08, "loss": 0.0005, "step": 177810 }, { "epoch": 2.909596662030598, "grad_norm": 0.12702132761478424, "learning_rate": 2.7636516751926578e-08, "loss": 0.0008, "step": 177820 }, { "epoch": 2.909760287981674, "grad_norm": 0.040010128170251846, "learning_rate": 2.7536657443211767e-08, "loss": 0.0026, "step": 177830 }, { "epoch": 2.90992391393275, "grad_norm": 0.07131540030241013, "learning_rate": 2.7436978374342226e-08, "loss": 0.001, "step": 177840 }, { "epoch": 2.9100875398838255, "grad_norm": 0.024162158370018005, "learning_rate": 2.7337479548930623e-08, "loss": 0.0014, "step": 177850 }, { "epoch": 2.9102511658349015, "grad_norm": 0.0013242284767329693, "learning_rate": 2.723816097058407e-08, "loss": 0.0007, "step": 177860 }, { "epoch": 2.9104147917859775, "grad_norm": 0.0698835477232933, "learning_rate": 2.713902264290247e-08, "loss": 0.0011, "step": 177870 }, { "epoch": 2.910578417737053, "grad_norm": 0.0022747977636754513, "learning_rate": 2.7040064569479606e-08, "loss": 0.0005, "step": 177880 }, { "epoch": 2.910742043688129, "grad_norm": 0.13248001039028168, "learning_rate": 2.6941286753901507e-08, "loss": 0.0004, "step": 177890 }, { "epoch": 2.910905669639205, "grad_norm": 0.016011444851756096, "learning_rate": 2.684268919974975e-08, "loss": 0.0009, "step": 177900 }, { "epoch": 2.9110692955902806, "grad_norm": 0.189195916056633, "learning_rate": 2.674427191059814e-08, "loss": 0.0004, "step": 177910 }, { "epoch": 2.9112329215413566, "grad_norm": 0.12591543793678284, "learning_rate": 2.6646034890013272e-08, "loss": 0.0013, "step": 177920 }, { "epoch": 2.911396547492432, "grad_norm": 0.007671535015106201, "learning_rate": 2.6547978141556742e-08, "loss": 0.0006, "step": 177930 }, { "epoch": 2.911560173443508, "grad_norm": 0.0503133125603199, "learning_rate": 2.645010166878237e-08, "loss": 0.0004, "step": 177940 }, { "epoch": 2.911723799394584, "grad_norm": 0.04913368821144104, "learning_rate": 2.635240547523843e-08, "loss": 0.0012, "step": 177950 }, { "epoch": 2.9118874253456597, "grad_norm": 0.0596780888736248, "learning_rate": 2.625488956446598e-08, "loss": 0.0004, "step": 177960 }, { "epoch": 2.9120510512967357, "grad_norm": 0.007770646829158068, "learning_rate": 2.615755393999997e-08, "loss": 0.0005, "step": 177970 }, { "epoch": 2.9122146772478112, "grad_norm": 0.07564467191696167, "learning_rate": 2.6060398605368132e-08, "loss": 0.0023, "step": 177980 }, { "epoch": 2.9123783031988872, "grad_norm": 0.0484570637345314, "learning_rate": 2.5963423564092095e-08, "loss": 0.0013, "step": 177990 }, { "epoch": 2.9125419291499632, "grad_norm": 0.06047716736793518, "learning_rate": 2.5866628819686823e-08, "loss": 0.0011, "step": 178000 }, { "epoch": 2.912705555101039, "grad_norm": 0.15049371123313904, "learning_rate": 2.5770014375662288e-08, "loss": 0.0012, "step": 178010 }, { "epoch": 2.912869181052115, "grad_norm": 0.0027358862571418285, "learning_rate": 2.5673580235518468e-08, "loss": 0.0008, "step": 178020 }, { "epoch": 2.913032807003191, "grad_norm": 0.0036435327492654324, "learning_rate": 2.5577326402752565e-08, "loss": 0.0005, "step": 178030 }, { "epoch": 2.9131964329542663, "grad_norm": 0.09912025183439255, "learning_rate": 2.5481252880852348e-08, "loss": 0.0004, "step": 178040 }, { "epoch": 2.9133600589053423, "grad_norm": 0.09612133353948593, "learning_rate": 2.5385359673301137e-08, "loss": 0.0008, "step": 178050 }, { "epoch": 2.9135236848564183, "grad_norm": 0.06519845128059387, "learning_rate": 2.528964678357393e-08, "loss": 0.0009, "step": 178060 }, { "epoch": 2.913687310807494, "grad_norm": 0.039203792810440063, "learning_rate": 2.5194114215141287e-08, "loss": 0.0004, "step": 178070 }, { "epoch": 2.91385093675857, "grad_norm": 0.08433068543672562, "learning_rate": 2.5098761971465434e-08, "loss": 0.0005, "step": 178080 }, { "epoch": 2.914014562709646, "grad_norm": 0.048395153135061264, "learning_rate": 2.5003590056001946e-08, "loss": 0.0004, "step": 178090 }, { "epoch": 2.9141781886607214, "grad_norm": 0.05308118090033531, "learning_rate": 2.490859847220195e-08, "loss": 0.0004, "step": 178100 }, { "epoch": 2.9143418146117974, "grad_norm": 0.09533210098743439, "learning_rate": 2.4813787223507134e-08, "loss": 0.0002, "step": 178110 }, { "epoch": 2.9145054405628734, "grad_norm": 0.007121017202734947, "learning_rate": 2.4719156313355307e-08, "loss": 0.0006, "step": 178120 }, { "epoch": 2.914669066513949, "grad_norm": 0.10885131359100342, "learning_rate": 2.4624705745176503e-08, "loss": 0.0007, "step": 178130 }, { "epoch": 2.914832692465025, "grad_norm": 0.07306266576051712, "learning_rate": 2.4530435522394092e-08, "loss": 0.0005, "step": 178140 }, { "epoch": 2.914996318416101, "grad_norm": 0.025600915774703026, "learning_rate": 2.4436345648424786e-08, "loss": 0.0009, "step": 178150 }, { "epoch": 2.9151599443671765, "grad_norm": 0.12582968175411224, "learning_rate": 2.4342436126679747e-08, "loss": 0.0014, "step": 178160 }, { "epoch": 2.9153235703182525, "grad_norm": 0.008355293422937393, "learning_rate": 2.4248706960563473e-08, "loss": 0.0004, "step": 178170 }, { "epoch": 2.9154871962693285, "grad_norm": 0.05410925671458244, "learning_rate": 2.415515815347158e-08, "loss": 0.0008, "step": 178180 }, { "epoch": 2.915650822220404, "grad_norm": 0.06869533658027649, "learning_rate": 2.4061789708796356e-08, "loss": 0.0006, "step": 178190 }, { "epoch": 2.91581444817148, "grad_norm": 0.09714231640100479, "learning_rate": 2.3968601629922315e-08, "loss": 0.0006, "step": 178200 }, { "epoch": 2.915978074122556, "grad_norm": 0.09477607905864716, "learning_rate": 2.3875593920226203e-08, "loss": 0.0004, "step": 178210 }, { "epoch": 2.9161417000736316, "grad_norm": 0.0013071083230897784, "learning_rate": 2.378276658308032e-08, "loss": 0.0005, "step": 178220 }, { "epoch": 2.9163053260247076, "grad_norm": 0.141518235206604, "learning_rate": 2.3690119621849196e-08, "loss": 0.0015, "step": 178230 }, { "epoch": 2.9164689519757836, "grad_norm": 0.0389409214258194, "learning_rate": 2.3597653039891254e-08, "loss": 0.0009, "step": 178240 }, { "epoch": 2.916632577926859, "grad_norm": 0.07559758424758911, "learning_rate": 2.350536684055771e-08, "loss": 0.0007, "step": 178250 }, { "epoch": 2.916796203877935, "grad_norm": 0.07326913625001907, "learning_rate": 2.3413261027193655e-08, "loss": 0.0008, "step": 178260 }, { "epoch": 2.916959829829011, "grad_norm": 0.08001751452684402, "learning_rate": 2.3321335603138097e-08, "loss": 0.0005, "step": 178270 }, { "epoch": 2.9171234557800867, "grad_norm": 0.0018589055398479104, "learning_rate": 2.322959057172336e-08, "loss": 0.0005, "step": 178280 }, { "epoch": 2.9172870817311627, "grad_norm": 0.09099394828081131, "learning_rate": 2.3138025936274565e-08, "loss": 0.0007, "step": 178290 }, { "epoch": 2.9174507076822382, "grad_norm": 0.030106818303465843, "learning_rate": 2.304664170011017e-08, "loss": 0.0005, "step": 178300 }, { "epoch": 2.9176143336333142, "grad_norm": 0.15790459513664246, "learning_rate": 2.295543786654364e-08, "loss": 0.005, "step": 178310 }, { "epoch": 2.9177779595843902, "grad_norm": 0.049048371613025665, "learning_rate": 2.28644144388801e-08, "loss": 0.0006, "step": 178320 }, { "epoch": 2.917941585535466, "grad_norm": 0.041185732930898666, "learning_rate": 2.277357142042025e-08, "loss": 0.0011, "step": 178330 }, { "epoch": 2.918105211486542, "grad_norm": 0.08303149044513702, "learning_rate": 2.2682908814454785e-08, "loss": 0.0009, "step": 178340 }, { "epoch": 2.9182688374376173, "grad_norm": 0.04346705600619316, "learning_rate": 2.2592426624271635e-08, "loss": 0.0003, "step": 178350 }, { "epoch": 2.9184324633886933, "grad_norm": 0.016193971037864685, "learning_rate": 2.2502124853150396e-08, "loss": 0.0006, "step": 178360 }, { "epoch": 2.9185960893397693, "grad_norm": 0.09852645546197891, "learning_rate": 2.241200350436401e-08, "loss": 0.0005, "step": 178370 }, { "epoch": 2.918759715290845, "grad_norm": 0.054551128298044205, "learning_rate": 2.2322062581179305e-08, "loss": 0.0004, "step": 178380 }, { "epoch": 2.918923341241921, "grad_norm": 0.1080770269036293, "learning_rate": 2.2232302086855894e-08, "loss": 0.0008, "step": 178390 }, { "epoch": 2.919086967192997, "grad_norm": 0.07357295602560043, "learning_rate": 2.2142722024647844e-08, "loss": 0.0008, "step": 178400 }, { "epoch": 2.9192505931440724, "grad_norm": 0.1444116085767746, "learning_rate": 2.2053322397802558e-08, "loss": 0.0014, "step": 178410 }, { "epoch": 2.9194142190951484, "grad_norm": 0.03502218797802925, "learning_rate": 2.196410320955966e-08, "loss": 0.0004, "step": 178420 }, { "epoch": 2.9195778450462244, "grad_norm": 0.09382104128599167, "learning_rate": 2.1875064463153794e-08, "loss": 0.0005, "step": 178430 }, { "epoch": 2.9197414709973, "grad_norm": 0.022338813170790672, "learning_rate": 2.1786206161812373e-08, "loss": 0.0006, "step": 178440 }, { "epoch": 2.919905096948376, "grad_norm": 0.1384303718805313, "learning_rate": 2.1697528308756154e-08, "loss": 0.0011, "step": 178450 }, { "epoch": 2.920068722899452, "grad_norm": 0.02841377817094326, "learning_rate": 2.1609030907199234e-08, "loss": 0.0004, "step": 178460 }, { "epoch": 2.9202323488505275, "grad_norm": 0.023619532585144043, "learning_rate": 2.152071396035016e-08, "loss": 0.0003, "step": 178470 }, { "epoch": 2.9203959748016035, "grad_norm": 0.038409534841775894, "learning_rate": 2.14325774714097e-08, "loss": 0.0005, "step": 178480 }, { "epoch": 2.9205596007526795, "grad_norm": 0.011579222045838833, "learning_rate": 2.1344621443571966e-08, "loss": 0.0003, "step": 178490 }, { "epoch": 2.920723226703755, "grad_norm": 0.012659910134971142, "learning_rate": 2.1256845880026634e-08, "loss": 0.0007, "step": 178500 }, { "epoch": 2.920886852654831, "grad_norm": 0.056106604635715485, "learning_rate": 2.1169250783953933e-08, "loss": 0.0008, "step": 178510 }, { "epoch": 2.921050478605907, "grad_norm": 0.024914825335144997, "learning_rate": 2.1081836158529655e-08, "loss": 0.0008, "step": 178520 }, { "epoch": 2.9212141045569826, "grad_norm": 0.08566151559352875, "learning_rate": 2.0994602006922383e-08, "loss": 0.0006, "step": 178530 }, { "epoch": 2.9213777305080586, "grad_norm": 0.0793798640370369, "learning_rate": 2.0907548332294025e-08, "loss": 0.0006, "step": 178540 }, { "epoch": 2.9215413564591346, "grad_norm": 0.06904827803373337, "learning_rate": 2.0820675137800396e-08, "loss": 0.0005, "step": 178550 }, { "epoch": 2.92170498241021, "grad_norm": 0.02474076673388481, "learning_rate": 2.073398242659008e-08, "loss": 0.0014, "step": 178560 }, { "epoch": 2.921868608361286, "grad_norm": 0.21148435771465302, "learning_rate": 2.064747020180502e-08, "loss": 0.0011, "step": 178570 }, { "epoch": 2.922032234312362, "grad_norm": 0.018140675500035286, "learning_rate": 2.0561138466582142e-08, "loss": 0.0006, "step": 178580 }, { "epoch": 2.9221958602634377, "grad_norm": 0.0843304768204689, "learning_rate": 2.0474987224050613e-08, "loss": 0.001, "step": 178590 }, { "epoch": 2.9223594862145137, "grad_norm": 0.06330302357673645, "learning_rate": 2.0389016477331825e-08, "loss": 0.0008, "step": 178600 }, { "epoch": 2.9225231121655897, "grad_norm": 0.09981589764356613, "learning_rate": 2.0303226229543837e-08, "loss": 0.0006, "step": 178610 }, { "epoch": 2.9226867381166652, "grad_norm": 0.1229078397154808, "learning_rate": 2.0217616483795278e-08, "loss": 0.0005, "step": 178620 }, { "epoch": 2.9228503640677412, "grad_norm": 0.21442505717277527, "learning_rate": 2.0132187243189215e-08, "loss": 0.0007, "step": 178630 }, { "epoch": 2.9230139900188172, "grad_norm": 0.06810841709375381, "learning_rate": 2.0046938510822622e-08, "loss": 0.001, "step": 178640 }, { "epoch": 2.923177615969893, "grad_norm": 0.11991714686155319, "learning_rate": 1.9961870289785802e-08, "loss": 0.0006, "step": 178650 }, { "epoch": 2.923341241920969, "grad_norm": 0.05042572319507599, "learning_rate": 1.987698258316184e-08, "loss": 0.0007, "step": 178660 }, { "epoch": 2.9235048678720448, "grad_norm": 0.008573350496590137, "learning_rate": 1.9792275394027727e-08, "loss": 0.0009, "step": 178670 }, { "epoch": 2.9236684938231203, "grad_norm": 0.1576259881258011, "learning_rate": 1.9707748725454334e-08, "loss": 0.0011, "step": 178680 }, { "epoch": 2.9238321197741963, "grad_norm": 0.018002888187766075, "learning_rate": 1.9623402580505324e-08, "loss": 0.0005, "step": 178690 }, { "epoch": 2.923995745725272, "grad_norm": 0.09167953580617905, "learning_rate": 1.953923696223714e-08, "loss": 0.0006, "step": 178700 }, { "epoch": 2.924159371676348, "grad_norm": 0.011544295586645603, "learning_rate": 1.945525187370234e-08, "loss": 0.001, "step": 178710 }, { "epoch": 2.924322997627424, "grad_norm": 0.06612348556518555, "learning_rate": 1.9371447317943494e-08, "loss": 0.0006, "step": 178720 }, { "epoch": 2.9244866235784994, "grad_norm": 0.03486032038927078, "learning_rate": 1.9287823297999274e-08, "loss": 0.0004, "step": 178730 }, { "epoch": 2.9246502495295754, "grad_norm": 0.03081473894417286, "learning_rate": 1.9204379816901153e-08, "loss": 0.0005, "step": 178740 }, { "epoch": 2.924813875480651, "grad_norm": 0.06880506873130798, "learning_rate": 1.912111687767282e-08, "loss": 0.0005, "step": 178750 }, { "epoch": 2.924977501431727, "grad_norm": 0.20954085886478424, "learning_rate": 1.9038034483332414e-08, "loss": 0.0006, "step": 178760 }, { "epoch": 2.925141127382803, "grad_norm": 0.015317626297473907, "learning_rate": 1.8955132636892526e-08, "loss": 0.0004, "step": 178770 }, { "epoch": 2.9253047533338785, "grad_norm": 0.09438851475715637, "learning_rate": 1.8872411341356866e-08, "loss": 0.0004, "step": 178780 }, { "epoch": 2.9254683792849545, "grad_norm": 0.03295215591788292, "learning_rate": 1.87898705997247e-08, "loss": 0.0009, "step": 178790 }, { "epoch": 2.9256320052360305, "grad_norm": 0.06873004883527756, "learning_rate": 1.8707510414988083e-08, "loss": 0.0013, "step": 178800 }, { "epoch": 2.925795631187106, "grad_norm": 0.1080973744392395, "learning_rate": 1.862533079013129e-08, "loss": 0.0011, "step": 178810 }, { "epoch": 2.925959257138182, "grad_norm": 0.023874951526522636, "learning_rate": 1.854333172813416e-08, "loss": 0.0005, "step": 178820 }, { "epoch": 2.926122883089258, "grad_norm": 0.06175655126571655, "learning_rate": 1.8461513231968764e-08, "loss": 0.0007, "step": 178830 }, { "epoch": 2.9262865090403336, "grad_norm": 0.05289186164736748, "learning_rate": 1.8379875304600502e-08, "loss": 0.0002, "step": 178840 }, { "epoch": 2.9264501349914096, "grad_norm": 0.11557146161794662, "learning_rate": 1.829841794898868e-08, "loss": 0.0009, "step": 178850 }, { "epoch": 2.9266137609424856, "grad_norm": 0.07466714084148407, "learning_rate": 1.8217141168086484e-08, "loss": 0.0004, "step": 178860 }, { "epoch": 2.926777386893561, "grad_norm": 0.06818711757659912, "learning_rate": 1.813604496483934e-08, "loss": 0.0006, "step": 178870 }, { "epoch": 2.926941012844637, "grad_norm": 0.106024831533432, "learning_rate": 1.805512934218656e-08, "loss": 0.0014, "step": 178880 }, { "epoch": 2.927104638795713, "grad_norm": 0.13435648381710052, "learning_rate": 1.7974394303061916e-08, "loss": 0.0007, "step": 178890 }, { "epoch": 2.9272682647467887, "grad_norm": 0.06102341040968895, "learning_rate": 1.7893839850391393e-08, "loss": 0.0007, "step": 178900 }, { "epoch": 2.9274318906978647, "grad_norm": 0.005345731507986784, "learning_rate": 1.781346598709488e-08, "loss": 0.0006, "step": 178910 }, { "epoch": 2.9275955166489407, "grad_norm": 0.07079720497131348, "learning_rate": 1.7733272716086158e-08, "loss": 0.0011, "step": 178920 }, { "epoch": 2.9277591426000162, "grad_norm": 0.0029305056668817997, "learning_rate": 1.7653260040271235e-08, "loss": 0.0039, "step": 178930 }, { "epoch": 2.9279227685510922, "grad_norm": 0.05053407698869705, "learning_rate": 1.7573427962551683e-08, "loss": 0.0015, "step": 178940 }, { "epoch": 2.9280863945021682, "grad_norm": 0.024717232212424278, "learning_rate": 1.7493776485819624e-08, "loss": 0.0008, "step": 178950 }, { "epoch": 2.928250020453244, "grad_norm": 0.1315728724002838, "learning_rate": 1.741430561296331e-08, "loss": 0.0008, "step": 178960 }, { "epoch": 2.92841364640432, "grad_norm": 0.058604784309864044, "learning_rate": 1.7335015346863215e-08, "loss": 0.0005, "step": 178970 }, { "epoch": 2.9285772723553958, "grad_norm": 0.054452478885650635, "learning_rate": 1.7255905690393148e-08, "loss": 0.0005, "step": 178980 }, { "epoch": 2.9287408983064713, "grad_norm": 0.01984330266714096, "learning_rate": 1.7176976646420818e-08, "loss": 0.0003, "step": 178990 }, { "epoch": 2.9289045242575473, "grad_norm": 0.03282059729099274, "learning_rate": 1.7098228217807268e-08, "loss": 0.0004, "step": 179000 }, { "epoch": 2.9290681502086233, "grad_norm": 0.08029294013977051, "learning_rate": 1.701966040740688e-08, "loss": 0.0006, "step": 179010 }, { "epoch": 2.929231776159699, "grad_norm": 0.034790992736816406, "learning_rate": 1.694127321806738e-08, "loss": 0.0012, "step": 179020 }, { "epoch": 2.929395402110775, "grad_norm": 0.009209046140313148, "learning_rate": 1.6863066652630377e-08, "loss": 0.0012, "step": 179030 }, { "epoch": 2.929559028061851, "grad_norm": 0.14171026647090912, "learning_rate": 1.678504071393028e-08, "loss": 0.0007, "step": 179040 }, { "epoch": 2.9297226540129264, "grad_norm": 0.03202229365706444, "learning_rate": 1.670719540479593e-08, "loss": 0.0004, "step": 179050 }, { "epoch": 2.9298862799640024, "grad_norm": 0.09599992632865906, "learning_rate": 1.662953072804896e-08, "loss": 0.0006, "step": 179060 }, { "epoch": 2.930049905915078, "grad_norm": 0.041298095136880875, "learning_rate": 1.6552046686503786e-08, "loss": 0.0005, "step": 179070 }, { "epoch": 2.930213531866154, "grad_norm": 0.09955354779958725, "learning_rate": 1.647474328296983e-08, "loss": 0.0006, "step": 179080 }, { "epoch": 2.93037715781723, "grad_norm": 0.0810522511601448, "learning_rate": 1.6397620520248735e-08, "loss": 0.0011, "step": 179090 }, { "epoch": 2.9305407837683055, "grad_norm": 0.10365840047597885, "learning_rate": 1.6320678401136048e-08, "loss": 0.0015, "step": 179100 }, { "epoch": 2.9307044097193815, "grad_norm": 0.09869789332151413, "learning_rate": 1.6243916928420643e-08, "loss": 0.0005, "step": 179110 }, { "epoch": 2.930868035670457, "grad_norm": 0.056728944182395935, "learning_rate": 1.61673361048853e-08, "loss": 0.0005, "step": 179120 }, { "epoch": 2.931031661621533, "grad_norm": 0.07496386021375656, "learning_rate": 1.6090935933305573e-08, "loss": 0.001, "step": 179130 }, { "epoch": 2.931195287572609, "grad_norm": 0.16725483536720276, "learning_rate": 1.6014716416450915e-08, "loss": 0.0007, "step": 179140 }, { "epoch": 2.9313589135236846, "grad_norm": 0.07106459140777588, "learning_rate": 1.593867755708467e-08, "loss": 0.0017, "step": 179150 }, { "epoch": 2.9315225394747606, "grad_norm": 0.09330414235591888, "learning_rate": 1.5862819357961857e-08, "loss": 0.0006, "step": 179160 }, { "epoch": 2.9316861654258366, "grad_norm": 0.054158855229616165, "learning_rate": 1.578714182183305e-08, "loss": 0.0007, "step": 179170 }, { "epoch": 2.931849791376912, "grad_norm": 0.03244738653302193, "learning_rate": 1.571164495144106e-08, "loss": 0.0019, "step": 179180 }, { "epoch": 2.932013417327988, "grad_norm": 0.07225721329450607, "learning_rate": 1.5636328749522588e-08, "loss": 0.0011, "step": 179190 }, { "epoch": 2.932177043279064, "grad_norm": 0.12369494885206223, "learning_rate": 1.5561193218807114e-08, "loss": 0.0006, "step": 179200 }, { "epoch": 2.9323406692301397, "grad_norm": 0.032483603805303574, "learning_rate": 1.5486238362019122e-08, "loss": 0.0005, "step": 179210 }, { "epoch": 2.9325042951812157, "grad_norm": 0.1872226595878601, "learning_rate": 1.541146418187478e-08, "loss": 0.0009, "step": 179220 }, { "epoch": 2.9326679211322917, "grad_norm": 0.043272245675325394, "learning_rate": 1.5336870681084693e-08, "loss": 0.0006, "step": 179230 }, { "epoch": 2.9328315470833672, "grad_norm": 0.06240785866975784, "learning_rate": 1.5262457862353363e-08, "loss": 0.0007, "step": 179240 }, { "epoch": 2.9329951730344432, "grad_norm": 0.08079081773757935, "learning_rate": 1.5188225728376415e-08, "loss": 0.001, "step": 179250 }, { "epoch": 2.9331587989855192, "grad_norm": 0.012990414164960384, "learning_rate": 1.5114174281846138e-08, "loss": 0.0004, "step": 179260 }, { "epoch": 2.933322424936595, "grad_norm": 0.05128638818860054, "learning_rate": 1.5040303525446498e-08, "loss": 0.0003, "step": 179270 }, { "epoch": 2.933486050887671, "grad_norm": 0.055838439613580704, "learning_rate": 1.496661346185424e-08, "loss": 0.0005, "step": 179280 }, { "epoch": 2.933649676838747, "grad_norm": 0.15984193980693817, "learning_rate": 1.4893104093741672e-08, "loss": 0.0008, "step": 179290 }, { "epoch": 2.9338133027898223, "grad_norm": 0.010083814151585102, "learning_rate": 1.481977542377222e-08, "loss": 0.0003, "step": 179300 }, { "epoch": 2.9339769287408983, "grad_norm": 0.04718276485800743, "learning_rate": 1.4746627454604311e-08, "loss": 0.0007, "step": 179310 }, { "epoch": 2.9341405546919743, "grad_norm": 0.030455535277724266, "learning_rate": 1.467366018888916e-08, "loss": 0.0008, "step": 179320 }, { "epoch": 2.93430418064305, "grad_norm": 0.03171857073903084, "learning_rate": 1.4600873629272427e-08, "loss": 0.0008, "step": 179330 }, { "epoch": 2.934467806594126, "grad_norm": 0.05610997974872589, "learning_rate": 1.4528267778392003e-08, "loss": 0.0009, "step": 179340 }, { "epoch": 2.934631432545202, "grad_norm": 0.0778215080499649, "learning_rate": 1.4455842638879113e-08, "loss": 0.001, "step": 179350 }, { "epoch": 2.9347950584962774, "grad_norm": 0.07766593992710114, "learning_rate": 1.4383598213359995e-08, "loss": 0.0007, "step": 179360 }, { "epoch": 2.9349586844473534, "grad_norm": 0.09563033282756805, "learning_rate": 1.4311534504452551e-08, "loss": 0.0007, "step": 179370 }, { "epoch": 2.9351223103984294, "grad_norm": 0.006421692669391632, "learning_rate": 1.4239651514769138e-08, "loss": 0.0007, "step": 179380 }, { "epoch": 2.935285936349505, "grad_norm": 0.22701066732406616, "learning_rate": 1.4167949246915446e-08, "loss": 0.0011, "step": 179390 }, { "epoch": 2.935449562300581, "grad_norm": 0.09389105439186096, "learning_rate": 1.4096427703490512e-08, "loss": 0.0006, "step": 179400 }, { "epoch": 2.935613188251657, "grad_norm": 0.04223419353365898, "learning_rate": 1.4025086887086703e-08, "loss": 0.0007, "step": 179410 }, { "epoch": 2.9357768142027325, "grad_norm": 0.23792944848537445, "learning_rate": 1.3953926800290284e-08, "loss": 0.0009, "step": 179420 }, { "epoch": 2.9359404401538085, "grad_norm": 0.043693047016859055, "learning_rate": 1.3882947445680306e-08, "loss": 0.0005, "step": 179430 }, { "epoch": 2.9361040661048845, "grad_norm": 0.010036669671535492, "learning_rate": 1.3812148825829152e-08, "loss": 0.0003, "step": 179440 }, { "epoch": 2.93626769205596, "grad_norm": 0.14526362717151642, "learning_rate": 1.3741530943304216e-08, "loss": 0.0009, "step": 179450 }, { "epoch": 2.936431318007036, "grad_norm": 0.05847657844424248, "learning_rate": 1.3671093800664003e-08, "loss": 0.0003, "step": 179460 }, { "epoch": 2.9365949439581116, "grad_norm": 0.06222475692629814, "learning_rate": 1.360083740046314e-08, "loss": 0.0011, "step": 179470 }, { "epoch": 2.9367585699091876, "grad_norm": 0.003975050989538431, "learning_rate": 1.3530761745246812e-08, "loss": 0.0005, "step": 179480 }, { "epoch": 2.9369221958602636, "grad_norm": 0.03169276565313339, "learning_rate": 1.3460866837555764e-08, "loss": 0.0004, "step": 179490 }, { "epoch": 2.937085821811339, "grad_norm": 0.37221333384513855, "learning_rate": 1.339115267992408e-08, "loss": 0.0018, "step": 179500 }, { "epoch": 2.937249447762415, "grad_norm": 0.07852032035589218, "learning_rate": 1.3321619274877517e-08, "loss": 0.0011, "step": 179510 }, { "epoch": 2.9374130737134907, "grad_norm": 0.23444631695747375, "learning_rate": 1.3252266624936838e-08, "loss": 0.0008, "step": 179520 }, { "epoch": 2.9375766996645667, "grad_norm": 0.055987413972616196, "learning_rate": 1.3183094732616696e-08, "loss": 0.0004, "step": 179530 }, { "epoch": 2.9377403256156427, "grad_norm": 0.08950019627809525, "learning_rate": 1.3114103600423977e-08, "loss": 0.0005, "step": 179540 }, { "epoch": 2.9379039515667182, "grad_norm": 0.0325082391500473, "learning_rate": 1.3045293230858902e-08, "loss": 0.0021, "step": 179550 }, { "epoch": 2.9380675775177942, "grad_norm": 0.034914035350084305, "learning_rate": 1.2976663626416142e-08, "loss": 0.001, "step": 179560 }, { "epoch": 2.9382312034688702, "grad_norm": 0.001426022034138441, "learning_rate": 1.2908214789583706e-08, "loss": 0.0013, "step": 179570 }, { "epoch": 2.938394829419946, "grad_norm": 0.07312200963497162, "learning_rate": 1.2839946722841834e-08, "loss": 0.0004, "step": 179580 }, { "epoch": 2.938558455371022, "grad_norm": 0.01221251580864191, "learning_rate": 1.2771859428665767e-08, "loss": 0.0007, "step": 179590 }, { "epoch": 2.938722081322098, "grad_norm": 0.06478913128376007, "learning_rate": 1.2703952909523531e-08, "loss": 0.0005, "step": 179600 }, { "epoch": 2.9388857072731733, "grad_norm": 0.15559005737304688, "learning_rate": 1.2636227167875936e-08, "loss": 0.0005, "step": 179610 }, { "epoch": 2.9390493332242493, "grad_norm": 0.046208590269088745, "learning_rate": 1.2568682206178796e-08, "loss": 0.0003, "step": 179620 }, { "epoch": 2.9392129591753253, "grad_norm": 0.016566820442676544, "learning_rate": 1.2501318026879594e-08, "loss": 0.0012, "step": 179630 }, { "epoch": 2.939376585126401, "grad_norm": 0.03489551693201065, "learning_rate": 1.243413463242027e-08, "loss": 0.0009, "step": 179640 }, { "epoch": 2.939540211077477, "grad_norm": 0.04836392030119896, "learning_rate": 1.236713202523665e-08, "loss": 0.0011, "step": 179650 }, { "epoch": 2.939703837028553, "grad_norm": 0.044863633811473846, "learning_rate": 1.2300310207757348e-08, "loss": 0.0007, "step": 179660 }, { "epoch": 2.9398674629796284, "grad_norm": 0.08851360529661179, "learning_rate": 1.2233669182403207e-08, "loss": 0.0008, "step": 179670 }, { "epoch": 2.9400310889307044, "grad_norm": 0.01972980983555317, "learning_rate": 1.2167208951591736e-08, "loss": 0.0004, "step": 179680 }, { "epoch": 2.9401947148817804, "grad_norm": 0.012178403325378895, "learning_rate": 1.2100929517730454e-08, "loss": 0.0008, "step": 179690 }, { "epoch": 2.940358340832856, "grad_norm": 0.09655093401670456, "learning_rate": 1.2034830883222437e-08, "loss": 0.0004, "step": 179700 }, { "epoch": 2.940521966783932, "grad_norm": 0.002753537381067872, "learning_rate": 1.1968913050464104e-08, "loss": 0.0004, "step": 179710 }, { "epoch": 2.940685592735008, "grad_norm": 0.00786948949098587, "learning_rate": 1.1903176021843543e-08, "loss": 0.0004, "step": 179720 }, { "epoch": 2.9408492186860835, "grad_norm": 0.04288225248456001, "learning_rate": 1.1837619799744404e-08, "loss": 0.0004, "step": 179730 }, { "epoch": 2.9410128446371595, "grad_norm": 0.06836715340614319, "learning_rate": 1.177224438654312e-08, "loss": 0.0006, "step": 179740 }, { "epoch": 2.9411764705882355, "grad_norm": 0.039772454649209976, "learning_rate": 1.1707049784608903e-08, "loss": 0.0005, "step": 179750 }, { "epoch": 2.941340096539311, "grad_norm": 0.003927233163267374, "learning_rate": 1.1642035996304868e-08, "loss": 0.0005, "step": 179760 }, { "epoch": 2.941503722490387, "grad_norm": 0.07734093070030212, "learning_rate": 1.1577203023988015e-08, "loss": 0.0006, "step": 179770 }, { "epoch": 2.941667348441463, "grad_norm": 0.018544917926192284, "learning_rate": 1.1512550870008687e-08, "loss": 0.0008, "step": 179780 }, { "epoch": 2.9418309743925386, "grad_norm": 0.07599605619907379, "learning_rate": 1.14480795367089e-08, "loss": 0.0005, "step": 179790 }, { "epoch": 2.9419946003436146, "grad_norm": 0.051710207015275955, "learning_rate": 1.1383789026427339e-08, "loss": 0.0007, "step": 179800 }, { "epoch": 2.9421582262946906, "grad_norm": 0.001478518359363079, "learning_rate": 1.131967934149325e-08, "loss": 0.0007, "step": 179810 }, { "epoch": 2.942321852245766, "grad_norm": 0.04656429588794708, "learning_rate": 1.1255750484230887e-08, "loss": 0.001, "step": 179820 }, { "epoch": 2.942485478196842, "grad_norm": 0.06156375631690025, "learning_rate": 1.1192002456957286e-08, "loss": 0.001, "step": 179830 }, { "epoch": 2.9426491041479177, "grad_norm": 0.02736838534474373, "learning_rate": 1.1128435261983373e-08, "loss": 0.0007, "step": 179840 }, { "epoch": 2.9428127300989937, "grad_norm": 0.04875340312719345, "learning_rate": 1.1065048901613418e-08, "loss": 0.0007, "step": 179850 }, { "epoch": 2.9429763560500697, "grad_norm": 0.029956664890050888, "learning_rate": 1.1001843378145028e-08, "loss": 0.0022, "step": 179860 }, { "epoch": 2.9431399820011452, "grad_norm": 0.03844644874334335, "learning_rate": 1.0938818693868591e-08, "loss": 0.0005, "step": 179870 }, { "epoch": 2.9433036079522212, "grad_norm": 0.0018806824227795005, "learning_rate": 1.0875974851069504e-08, "loss": 0.0003, "step": 179880 }, { "epoch": 2.943467233903297, "grad_norm": 0.040779080241918564, "learning_rate": 1.0813311852024832e-08, "loss": 0.0004, "step": 179890 }, { "epoch": 2.943630859854373, "grad_norm": 0.09237122535705566, "learning_rate": 1.0750829699006649e-08, "loss": 0.0013, "step": 179900 }, { "epoch": 2.943794485805449, "grad_norm": 0.0704556405544281, "learning_rate": 1.0688528394279807e-08, "loss": 0.0011, "step": 179910 }, { "epoch": 2.9439581117565243, "grad_norm": 0.04864276573061943, "learning_rate": 1.0626407940101946e-08, "loss": 0.0004, "step": 179920 }, { "epoch": 2.9441217377076003, "grad_norm": 0.5201018452644348, "learning_rate": 1.0564468338725153e-08, "loss": 0.0009, "step": 179930 }, { "epoch": 2.9442853636586763, "grad_norm": 0.1330273449420929, "learning_rate": 1.0502709592394855e-08, "loss": 0.0007, "step": 179940 }, { "epoch": 2.944448989609752, "grad_norm": 0.18576321005821228, "learning_rate": 1.044113170334926e-08, "loss": 0.0007, "step": 179950 }, { "epoch": 2.944612615560828, "grad_norm": 0.026141202077269554, "learning_rate": 1.0379734673820474e-08, "loss": 0.0011, "step": 179960 }, { "epoch": 2.944776241511904, "grad_norm": 0.06412602961063385, "learning_rate": 1.0318518506034491e-08, "loss": 0.0007, "step": 179970 }, { "epoch": 2.9449398674629794, "grad_norm": 0.006818361114710569, "learning_rate": 1.0257483202209539e-08, "loss": 0.0009, "step": 179980 }, { "epoch": 2.9451034934140554, "grad_norm": 0.0350944884121418, "learning_rate": 1.0196628764558291e-08, "loss": 0.0012, "step": 179990 }, { "epoch": 2.9452671193651314, "grad_norm": 0.004240986425429583, "learning_rate": 1.0135955195286207e-08, "loss": 0.0013, "step": 180000 }, { "epoch": 2.945430745316207, "grad_norm": 0.01066744513809681, "learning_rate": 1.0075462496593192e-08, "loss": 0.0003, "step": 180010 }, { "epoch": 2.945594371267283, "grad_norm": 0.05837064981460571, "learning_rate": 1.0015150670671937e-08, "loss": 0.0004, "step": 180020 }, { "epoch": 2.945757997218359, "grad_norm": 0.032356880605220795, "learning_rate": 9.955019719707914e-09, "loss": 0.0007, "step": 180030 }, { "epoch": 2.9459216231694345, "grad_norm": 0.004852637182921171, "learning_rate": 9.895069645881605e-09, "loss": 0.0005, "step": 180040 }, { "epoch": 2.9460852491205105, "grad_norm": 0.15991511940956116, "learning_rate": 9.83530045136516e-09, "loss": 0.0005, "step": 180050 }, { "epoch": 2.9462488750715865, "grad_norm": 0.026175465434789658, "learning_rate": 9.775712138325733e-09, "loss": 0.0005, "step": 180060 }, { "epoch": 2.946412501022662, "grad_norm": 0.0007901117787696421, "learning_rate": 9.716304708923262e-09, "loss": 0.0008, "step": 180070 }, { "epoch": 2.946576126973738, "grad_norm": 0.0336458720266819, "learning_rate": 9.65707816531103e-09, "loss": 0.0012, "step": 180080 }, { "epoch": 2.946739752924814, "grad_norm": 0.024761490523815155, "learning_rate": 9.59803250963509e-09, "loss": 0.0008, "step": 180090 }, { "epoch": 2.9469033788758896, "grad_norm": 0.0074148597195744514, "learning_rate": 9.53916774403707e-09, "loss": 0.0008, "step": 180100 }, { "epoch": 2.9470670048269656, "grad_norm": 0.0588478222489357, "learning_rate": 9.480483870649704e-09, "loss": 0.001, "step": 180110 }, { "epoch": 2.9472306307780416, "grad_norm": 0.05119823291897774, "learning_rate": 9.42198089160018e-09, "loss": 0.0008, "step": 180120 }, { "epoch": 2.947394256729117, "grad_norm": 0.10345159471035004, "learning_rate": 9.36365880900958e-09, "loss": 0.0007, "step": 180130 }, { "epoch": 2.947557882680193, "grad_norm": 0.22478891909122467, "learning_rate": 9.305517624991212e-09, "loss": 0.0008, "step": 180140 }, { "epoch": 2.947721508631269, "grad_norm": 0.0670064240694046, "learning_rate": 9.24755734165339e-09, "loss": 0.0016, "step": 180150 }, { "epoch": 2.9478851345823447, "grad_norm": 0.08296709507703781, "learning_rate": 9.189777961096657e-09, "loss": 0.0006, "step": 180160 }, { "epoch": 2.9480487605334207, "grad_norm": 0.034969232976436615, "learning_rate": 9.132179485415449e-09, "loss": 0.0009, "step": 180170 }, { "epoch": 2.9482123864844967, "grad_norm": 0.00866776704788208, "learning_rate": 9.07476191669754e-09, "loss": 0.0011, "step": 180180 }, { "epoch": 2.9483760124355722, "grad_norm": 0.2486938238143921, "learning_rate": 9.017525257023485e-09, "loss": 0.001, "step": 180190 }, { "epoch": 2.9485396383866482, "grad_norm": 0.09883379191160202, "learning_rate": 8.960469508469404e-09, "loss": 0.0003, "step": 180200 }, { "epoch": 2.9487032643377242, "grad_norm": 0.030171388760209084, "learning_rate": 8.903594673102533e-09, "loss": 0.0009, "step": 180210 }, { "epoch": 2.9488668902888, "grad_norm": 0.07790575176477432, "learning_rate": 8.846900752984e-09, "loss": 0.0003, "step": 180220 }, { "epoch": 2.9490305162398758, "grad_norm": 0.07743711769580841, "learning_rate": 8.790387750169938e-09, "loss": 0.0008, "step": 180230 }, { "epoch": 2.9491941421909513, "grad_norm": 0.0037441018503159285, "learning_rate": 8.734055666708153e-09, "loss": 0.0005, "step": 180240 }, { "epoch": 2.9493577681420273, "grad_norm": 0.02908901497721672, "learning_rate": 8.6779045046409e-09, "loss": 0.0007, "step": 180250 }, { "epoch": 2.9495213940931033, "grad_norm": 0.018190663307905197, "learning_rate": 8.621934266002663e-09, "loss": 0.0007, "step": 180260 }, { "epoch": 2.949685020044179, "grad_norm": 0.1992783546447754, "learning_rate": 8.566144952823485e-09, "loss": 0.0008, "step": 180270 }, { "epoch": 2.949848645995255, "grad_norm": 0.028692010790109634, "learning_rate": 8.510536567124528e-09, "loss": 0.0004, "step": 180280 }, { "epoch": 2.9500122719463304, "grad_norm": 0.07820817828178406, "learning_rate": 8.455109110921956e-09, "loss": 0.0007, "step": 180290 }, { "epoch": 2.9501758978974064, "grad_norm": 0.05783466249704361, "learning_rate": 8.399862586224716e-09, "loss": 0.0004, "step": 180300 }, { "epoch": 2.9503395238484824, "grad_norm": 0.012966896407306194, "learning_rate": 8.344796995036208e-09, "loss": 0.0009, "step": 180310 }, { "epoch": 2.950503149799558, "grad_norm": 0.02000482752919197, "learning_rate": 8.289912339350947e-09, "loss": 0.0011, "step": 180320 }, { "epoch": 2.950666775750634, "grad_norm": 0.08531814813613892, "learning_rate": 8.235208621159008e-09, "loss": 0.0008, "step": 180330 }, { "epoch": 2.95083040170171, "grad_norm": 0.008870822377502918, "learning_rate": 8.180685842443803e-09, "loss": 0.0032, "step": 180340 }, { "epoch": 2.9509940276527855, "grad_norm": 0.03043711557984352, "learning_rate": 8.126344005180975e-09, "loss": 0.0006, "step": 180350 }, { "epoch": 2.9511576536038615, "grad_norm": 0.04350176081061363, "learning_rate": 8.072183111340615e-09, "loss": 0.0006, "step": 180360 }, { "epoch": 2.9513212795549375, "grad_norm": 0.07302550971508026, "learning_rate": 8.018203162885597e-09, "loss": 0.0012, "step": 180370 }, { "epoch": 2.951484905506013, "grad_norm": 0.10953707247972488, "learning_rate": 7.964404161773242e-09, "loss": 0.0004, "step": 180380 }, { "epoch": 2.951648531457089, "grad_norm": 0.11266885697841644, "learning_rate": 7.910786109953105e-09, "loss": 0.0008, "step": 180390 }, { "epoch": 2.951812157408165, "grad_norm": 0.20524853467941284, "learning_rate": 7.85734900936863e-09, "loss": 0.0008, "step": 180400 }, { "epoch": 2.9519757833592406, "grad_norm": 0.018620336428284645, "learning_rate": 7.804092861957158e-09, "loss": 0.0005, "step": 180410 }, { "epoch": 2.9521394093103166, "grad_norm": 0.09038258343935013, "learning_rate": 7.751017669648254e-09, "loss": 0.0007, "step": 180420 }, { "epoch": 2.9523030352613926, "grad_norm": 0.10112320631742477, "learning_rate": 7.698123434367043e-09, "loss": 0.0005, "step": 180430 }, { "epoch": 2.952466661212468, "grad_norm": 0.006774208042770624, "learning_rate": 7.645410158029776e-09, "loss": 0.0007, "step": 180440 }, { "epoch": 2.952630287163544, "grad_norm": 0.09673327952623367, "learning_rate": 7.592877842548251e-09, "loss": 0.0007, "step": 180450 }, { "epoch": 2.95279391311462, "grad_norm": 0.3230985999107361, "learning_rate": 7.540526489825395e-09, "loss": 0.0004, "step": 180460 }, { "epoch": 2.9529575390656957, "grad_norm": 0.020928246900439262, "learning_rate": 7.488356101759686e-09, "loss": 0.0005, "step": 180470 }, { "epoch": 2.9531211650167717, "grad_norm": 0.10235882550477982, "learning_rate": 7.4363666802412804e-09, "loss": 0.0011, "step": 180480 }, { "epoch": 2.9532847909678477, "grad_norm": 0.03449095040559769, "learning_rate": 7.3845582271558914e-09, "loss": 0.0007, "step": 180490 }, { "epoch": 2.9534484169189232, "grad_norm": 0.08394557237625122, "learning_rate": 7.332930744380906e-09, "loss": 0.0005, "step": 180500 }, { "epoch": 2.9536120428699992, "grad_norm": 0.11212869733572006, "learning_rate": 7.28148423378705e-09, "loss": 0.0008, "step": 180510 }, { "epoch": 2.9537756688210752, "grad_norm": 0.10503393411636353, "learning_rate": 7.230218697240054e-09, "loss": 0.0006, "step": 180520 }, { "epoch": 2.953939294772151, "grad_norm": 0.102867491543293, "learning_rate": 7.1791341365978765e-09, "loss": 0.0004, "step": 180530 }, { "epoch": 2.9541029207232268, "grad_norm": 0.1311519891023636, "learning_rate": 7.128230553711813e-09, "loss": 0.0007, "step": 180540 }, { "epoch": 2.9542665466743028, "grad_norm": 0.0568668358027935, "learning_rate": 7.0775079504281645e-09, "loss": 0.0008, "step": 180550 }, { "epoch": 2.9544301726253783, "grad_norm": 0.06415213644504547, "learning_rate": 7.026966328583795e-09, "loss": 0.0011, "step": 180560 }, { "epoch": 2.9545937985764543, "grad_norm": 0.03379752114415169, "learning_rate": 6.976605690012239e-09, "loss": 0.0006, "step": 180570 }, { "epoch": 2.9547574245275303, "grad_norm": 0.04067510366439819, "learning_rate": 6.926426036537592e-09, "loss": 0.0004, "step": 180580 }, { "epoch": 2.954921050478606, "grad_norm": 0.06238815188407898, "learning_rate": 6.876427369980066e-09, "loss": 0.0009, "step": 180590 }, { "epoch": 2.955084676429682, "grad_norm": 0.04814431443810463, "learning_rate": 6.8266096921509876e-09, "loss": 0.0007, "step": 180600 }, { "epoch": 2.955248302380758, "grad_norm": 0.003641321789473295, "learning_rate": 6.776973004856691e-09, "loss": 0.0015, "step": 180610 }, { "epoch": 2.9554119283318334, "grad_norm": 0.07586206495761871, "learning_rate": 6.727517309896292e-09, "loss": 0.0004, "step": 180620 }, { "epoch": 2.9555755542829094, "grad_norm": 0.03522724658250809, "learning_rate": 6.678242609062802e-09, "loss": 0.0006, "step": 180630 }, { "epoch": 2.955739180233985, "grad_norm": 0.04871169105172157, "learning_rate": 6.629148904141458e-09, "loss": 0.0009, "step": 180640 }, { "epoch": 2.955902806185061, "grad_norm": 0.04193451255559921, "learning_rate": 6.580236196911949e-09, "loss": 0.0005, "step": 180650 }, { "epoch": 2.956066432136137, "grad_norm": 0.036512892693281174, "learning_rate": 6.5315044891478555e-09, "loss": 0.0005, "step": 180660 }, { "epoch": 2.9562300580872125, "grad_norm": 0.04258807748556137, "learning_rate": 6.4829537826149865e-09, "loss": 0.0007, "step": 180670 }, { "epoch": 2.9563936840382885, "grad_norm": 0.13228893280029297, "learning_rate": 6.434584079073603e-09, "loss": 0.0008, "step": 180680 }, { "epoch": 2.956557309989364, "grad_norm": 0.011979234404861927, "learning_rate": 6.386395380276744e-09, "loss": 0.0006, "step": 180690 }, { "epoch": 2.95672093594044, "grad_norm": 0.003308415412902832, "learning_rate": 6.338387687971348e-09, "loss": 0.0014, "step": 180700 }, { "epoch": 2.956884561891516, "grad_norm": 0.1035497784614563, "learning_rate": 6.290561003897688e-09, "loss": 0.0006, "step": 180710 }, { "epoch": 2.9570481878425916, "grad_norm": 0.177953839302063, "learning_rate": 6.242915329788823e-09, "loss": 0.0007, "step": 180720 }, { "epoch": 2.9572118137936676, "grad_norm": 0.004573258105665445, "learning_rate": 6.19545066737226e-09, "loss": 0.0004, "step": 180730 }, { "epoch": 2.9573754397447436, "grad_norm": 0.05622275173664093, "learning_rate": 6.148167018367734e-09, "loss": 0.0009, "step": 180740 }, { "epoch": 2.957539065695819, "grad_norm": 0.04541414603590965, "learning_rate": 6.101064384490541e-09, "loss": 0.0011, "step": 180750 }, { "epoch": 2.957702691646895, "grad_norm": 0.04543953016400337, "learning_rate": 6.054142767446536e-09, "loss": 0.0009, "step": 180760 }, { "epoch": 2.957866317597971, "grad_norm": 0.07907293736934662, "learning_rate": 6.0074021689376926e-09, "loss": 0.0008, "step": 180770 }, { "epoch": 2.9580299435490467, "grad_norm": 0.00527379522100091, "learning_rate": 5.960842590657656e-09, "loss": 0.0006, "step": 180780 }, { "epoch": 2.9581935695001227, "grad_norm": 0.09399823844432831, "learning_rate": 5.914464034293965e-09, "loss": 0.0009, "step": 180790 }, { "epoch": 2.9583571954511987, "grad_norm": 0.009637588635087013, "learning_rate": 5.868266501528608e-09, "loss": 0.0005, "step": 180800 }, { "epoch": 2.9585208214022742, "grad_norm": 0.09165926277637482, "learning_rate": 5.822249994035245e-09, "loss": 0.0007, "step": 180810 }, { "epoch": 2.9586844473533502, "grad_norm": 0.05676725506782532, "learning_rate": 5.776414513481987e-09, "loss": 0.0003, "step": 180820 }, { "epoch": 2.9588480733044262, "grad_norm": 0.047069769352674484, "learning_rate": 5.730760061530283e-09, "loss": 0.0004, "step": 180830 }, { "epoch": 2.959011699255502, "grad_norm": 0.035100530833005905, "learning_rate": 5.685286639834919e-09, "loss": 0.0011, "step": 180840 }, { "epoch": 2.9591753252065778, "grad_norm": 0.08382165431976318, "learning_rate": 5.639994250045133e-09, "loss": 0.0006, "step": 180850 }, { "epoch": 2.9593389511576538, "grad_norm": 0.02935672551393509, "learning_rate": 5.594882893801279e-09, "loss": 0.0008, "step": 180860 }, { "epoch": 2.9595025771087293, "grad_norm": 0.07367635518312454, "learning_rate": 5.549952572739825e-09, "loss": 0.0011, "step": 180870 }, { "epoch": 2.9596662030598053, "grad_norm": 0.018671972677111626, "learning_rate": 5.505203288488359e-09, "loss": 0.0007, "step": 180880 }, { "epoch": 2.9598298290108813, "grad_norm": 0.058731503784656525, "learning_rate": 5.460635042668916e-09, "loss": 0.0007, "step": 180890 }, { "epoch": 2.959993454961957, "grad_norm": 0.09905584901571274, "learning_rate": 5.416247836897981e-09, "loss": 0.0006, "step": 180900 }, { "epoch": 2.960157080913033, "grad_norm": 0.08817479014396667, "learning_rate": 5.372041672783712e-09, "loss": 0.0005, "step": 180910 }, { "epoch": 2.960320706864109, "grad_norm": 0.03982220217585564, "learning_rate": 5.3280165519287165e-09, "loss": 0.001, "step": 180920 }, { "epoch": 2.9604843328151844, "grad_norm": 0.08821292966604233, "learning_rate": 5.284172475928384e-09, "loss": 0.001, "step": 180930 }, { "epoch": 2.9606479587662604, "grad_norm": 0.0493648424744606, "learning_rate": 5.2405094463731095e-09, "loss": 0.0005, "step": 180940 }, { "epoch": 2.9608115847173364, "grad_norm": 0.10805744677782059, "learning_rate": 5.197027464843851e-09, "loss": 0.0007, "step": 180950 }, { "epoch": 2.960975210668412, "grad_norm": 0.003662973642349243, "learning_rate": 5.1537265329182355e-09, "loss": 0.0004, "step": 180960 }, { "epoch": 2.961138836619488, "grad_norm": 0.23632536828517914, "learning_rate": 5.110606652165007e-09, "loss": 0.0013, "step": 180970 }, { "epoch": 2.961302462570564, "grad_norm": 0.009528382681310177, "learning_rate": 5.0676678241473595e-09, "loss": 0.0007, "step": 180980 }, { "epoch": 2.9614660885216395, "grad_norm": 0.0876237154006958, "learning_rate": 5.024910050422382e-09, "loss": 0.0006, "step": 180990 }, { "epoch": 2.9616297144727155, "grad_norm": 0.03398312255740166, "learning_rate": 4.982333332538836e-09, "loss": 0.0008, "step": 181000 }, { "epoch": 2.961793340423791, "grad_norm": 0.05097678676247597, "learning_rate": 4.9399376720404845e-09, "loss": 0.0011, "step": 181010 }, { "epoch": 2.961956966374867, "grad_norm": 0.03587082773447037, "learning_rate": 4.897723070463878e-09, "loss": 0.0005, "step": 181020 }, { "epoch": 2.962120592325943, "grad_norm": 0.08462606370449066, "learning_rate": 4.855689529339458e-09, "loss": 0.0009, "step": 181030 }, { "epoch": 2.9622842182770186, "grad_norm": 0.002155303256586194, "learning_rate": 4.813837050191006e-09, "loss": 0.0009, "step": 181040 }, { "epoch": 2.9624478442280946, "grad_norm": 0.13236530125141144, "learning_rate": 4.772165634535086e-09, "loss": 0.0004, "step": 181050 }, { "epoch": 2.96261147017917, "grad_norm": 0.09248964488506317, "learning_rate": 4.730675283882713e-09, "loss": 0.0005, "step": 181060 }, { "epoch": 2.962775096130246, "grad_norm": 0.07607980817556381, "learning_rate": 4.689365999737128e-09, "loss": 0.0009, "step": 181070 }, { "epoch": 2.962938722081322, "grad_norm": 0.013493630103766918, "learning_rate": 4.648237783596576e-09, "loss": 0.0013, "step": 181080 }, { "epoch": 2.9631023480323977, "grad_norm": 0.00715222954750061, "learning_rate": 4.607290636951534e-09, "loss": 0.0003, "step": 181090 }, { "epoch": 2.9632659739834737, "grad_norm": 0.15975996851921082, "learning_rate": 4.566524561285812e-09, "loss": 0.0012, "step": 181100 }, { "epoch": 2.9634295999345497, "grad_norm": 0.11691099405288696, "learning_rate": 4.525939558077119e-09, "loss": 0.0005, "step": 181110 }, { "epoch": 2.9635932258856252, "grad_norm": 0.032300256192684174, "learning_rate": 4.485535628797055e-09, "loss": 0.0002, "step": 181120 }, { "epoch": 2.9637568518367012, "grad_norm": 0.03603142499923706, "learning_rate": 4.445312774910005e-09, "loss": 0.0009, "step": 181130 }, { "epoch": 2.9639204777877772, "grad_norm": 0.06174484267830849, "learning_rate": 4.40527099787369e-09, "loss": 0.0008, "step": 181140 }, { "epoch": 2.964084103738853, "grad_norm": 0.034598637372255325, "learning_rate": 4.365410299139727e-09, "loss": 0.0007, "step": 181150 }, { "epoch": 2.964247729689929, "grad_norm": 0.008270945399999619, "learning_rate": 4.325730680153073e-09, "loss": 0.0006, "step": 181160 }, { "epoch": 2.9644113556410048, "grad_norm": 0.11949373036623001, "learning_rate": 4.28623214235202e-09, "loss": 0.0005, "step": 181170 }, { "epoch": 2.9645749815920803, "grad_norm": 0.09367172420024872, "learning_rate": 4.246914687167647e-09, "loss": 0.0008, "step": 181180 }, { "epoch": 2.9647386075431563, "grad_norm": 0.1346801370382309, "learning_rate": 4.2077783160260346e-09, "loss": 0.0004, "step": 181190 }, { "epoch": 2.9649022334942323, "grad_norm": 0.07340075820684433, "learning_rate": 4.168823030345493e-09, "loss": 0.0007, "step": 181200 }, { "epoch": 2.965065859445308, "grad_norm": 0.007032521069049835, "learning_rate": 4.13004883153767e-09, "loss": 0.0015, "step": 181210 }, { "epoch": 2.965229485396384, "grad_norm": 0.047295257449150085, "learning_rate": 4.0914557210086634e-09, "loss": 0.0014, "step": 181220 }, { "epoch": 2.96539311134746, "grad_norm": 0.20061364769935608, "learning_rate": 4.053043700156245e-09, "loss": 0.0012, "step": 181230 }, { "epoch": 2.9655567372985354, "grad_norm": 0.037586621940135956, "learning_rate": 4.014812770374299e-09, "loss": 0.0006, "step": 181240 }, { "epoch": 2.9657203632496114, "grad_norm": 0.0829203873872757, "learning_rate": 3.976762933047829e-09, "loss": 0.001, "step": 181250 }, { "epoch": 2.9658839892006874, "grad_norm": 0.023354124277830124, "learning_rate": 3.9388941895557305e-09, "loss": 0.0013, "step": 181260 }, { "epoch": 2.966047615151763, "grad_norm": 0.05273908004164696, "learning_rate": 3.901206541271352e-09, "loss": 0.0012, "step": 181270 }, { "epoch": 2.966211241102839, "grad_norm": 0.06305355578660965, "learning_rate": 3.863699989560265e-09, "loss": 0.0004, "step": 181280 }, { "epoch": 2.966374867053915, "grad_norm": 0.05551360920071602, "learning_rate": 3.826374535781941e-09, "loss": 0.0007, "step": 181290 }, { "epoch": 2.9665384930049905, "grad_norm": 0.054925598204135895, "learning_rate": 3.789230181289738e-09, "loss": 0.0009, "step": 181300 }, { "epoch": 2.9667021189560665, "grad_norm": 0.08407167345285416, "learning_rate": 3.752266927430359e-09, "loss": 0.0008, "step": 181310 }, { "epoch": 2.9668657449071425, "grad_norm": 0.0009622799698263407, "learning_rate": 3.7154847755421775e-09, "loss": 0.0003, "step": 181320 }, { "epoch": 2.967029370858218, "grad_norm": 0.03719404712319374, "learning_rate": 3.678883726960236e-09, "loss": 0.0011, "step": 181330 }, { "epoch": 2.967192996809294, "grad_norm": 0.031055787578225136, "learning_rate": 3.642463783010142e-09, "loss": 0.0009, "step": 181340 }, { "epoch": 2.96735662276037, "grad_norm": 0.10827036947011948, "learning_rate": 3.606224945011949e-09, "loss": 0.0008, "step": 181350 }, { "epoch": 2.9675202487114456, "grad_norm": 0.03840309754014015, "learning_rate": 3.5701672142801626e-09, "loss": 0.0004, "step": 181360 }, { "epoch": 2.9676838746625216, "grad_norm": 0.12112395465373993, "learning_rate": 3.534290592120959e-09, "loss": 0.0006, "step": 181370 }, { "epoch": 2.9678475006135976, "grad_norm": 0.17998169362545013, "learning_rate": 3.4985950798349654e-09, "loss": 0.0011, "step": 181380 }, { "epoch": 2.968011126564673, "grad_norm": 0.0540277473628521, "learning_rate": 3.4630806787161463e-09, "loss": 0.001, "step": 181390 }, { "epoch": 2.968174752515749, "grad_norm": 0.05453220009803772, "learning_rate": 3.4277473900518055e-09, "loss": 0.0005, "step": 181400 }, { "epoch": 2.9683383784668247, "grad_norm": 0.11262725293636322, "learning_rate": 3.3925952151225848e-09, "loss": 0.0004, "step": 181410 }, { "epoch": 2.9685020044179007, "grad_norm": 0.07697691768407822, "learning_rate": 3.357624155203021e-09, "loss": 0.0006, "step": 181420 }, { "epoch": 2.9686656303689767, "grad_norm": 0.03737746551632881, "learning_rate": 3.322834211560433e-09, "loss": 0.0009, "step": 181430 }, { "epoch": 2.9688292563200522, "grad_norm": 0.07274530082941055, "learning_rate": 3.288225385455479e-09, "loss": 0.0009, "step": 181440 }, { "epoch": 2.9689928822711282, "grad_norm": 0.16672612726688385, "learning_rate": 3.2537976781432666e-09, "loss": 0.0008, "step": 181450 }, { "epoch": 2.969156508222204, "grad_norm": 0.06296700239181519, "learning_rate": 3.2195510908716865e-09, "loss": 0.0007, "step": 181460 }, { "epoch": 2.96932013417328, "grad_norm": 0.02773393504321575, "learning_rate": 3.1854856248819677e-09, "loss": 0.0007, "step": 181470 }, { "epoch": 2.9694837601243558, "grad_norm": 0.07432061433792114, "learning_rate": 3.1516012814086783e-09, "loss": 0.0006, "step": 181480 }, { "epoch": 2.9696473860754313, "grad_norm": 0.040386345237493515, "learning_rate": 3.11789806168028e-09, "loss": 0.0002, "step": 181490 }, { "epoch": 2.9698110120265073, "grad_norm": 0.04588755592703819, "learning_rate": 3.0843759669185736e-09, "loss": 0.0005, "step": 181500 }, { "epoch": 2.9699746379775833, "grad_norm": 0.005032803863286972, "learning_rate": 3.0510349983381428e-09, "loss": 0.0004, "step": 181510 }, { "epoch": 2.970138263928659, "grad_norm": 0.07821472734212875, "learning_rate": 3.0178751571485755e-09, "loss": 0.0008, "step": 181520 }, { "epoch": 2.970301889879735, "grad_norm": 0.27122142910957336, "learning_rate": 2.984896444550578e-09, "loss": 0.0008, "step": 181530 }, { "epoch": 2.970465515830811, "grad_norm": 0.08411692827939987, "learning_rate": 2.9520988617398603e-09, "loss": 0.0005, "step": 181540 }, { "epoch": 2.9706291417818864, "grad_norm": 0.056563347578048706, "learning_rate": 2.919482409905472e-09, "loss": 0.0022, "step": 181550 }, { "epoch": 2.9707927677329624, "grad_norm": 0.012228360399603844, "learning_rate": 2.8870470902298e-09, "loss": 0.0005, "step": 181560 }, { "epoch": 2.9709563936840384, "grad_norm": 0.08094001561403275, "learning_rate": 2.85479290388857e-09, "loss": 0.0011, "step": 181570 }, { "epoch": 2.971120019635114, "grad_norm": 0.15044502913951874, "learning_rate": 2.8227198520508482e-09, "loss": 0.0007, "step": 181580 }, { "epoch": 2.97128364558619, "grad_norm": 0.0014699194580316544, "learning_rate": 2.790827935879037e-09, "loss": 0.0004, "step": 181590 }, { "epoch": 2.971447271537266, "grad_norm": 0.24049873650074005, "learning_rate": 2.759117156529434e-09, "loss": 0.0008, "step": 181600 }, { "epoch": 2.9716108974883415, "grad_norm": 0.08181938529014587, "learning_rate": 2.727587515150565e-09, "loss": 0.0009, "step": 181610 }, { "epoch": 2.9717745234394175, "grad_norm": 0.05511162057518959, "learning_rate": 2.6962390128865147e-09, "loss": 0.0017, "step": 181620 }, { "epoch": 2.9719381493904935, "grad_norm": 0.027295531705021858, "learning_rate": 2.665071650873041e-09, "loss": 0.001, "step": 181630 }, { "epoch": 2.972101775341569, "grad_norm": 0.07224902510643005, "learning_rate": 2.634085430240352e-09, "loss": 0.0008, "step": 181640 }, { "epoch": 2.972265401292645, "grad_norm": 0.09082729369401932, "learning_rate": 2.6032803521108816e-09, "loss": 0.0009, "step": 181650 }, { "epoch": 2.972429027243721, "grad_norm": 0.0989830419421196, "learning_rate": 2.5726564176015156e-09, "loss": 0.0015, "step": 181660 }, { "epoch": 2.9725926531947966, "grad_norm": 0.005387819372117519, "learning_rate": 2.5422136278219213e-09, "loss": 0.0007, "step": 181670 }, { "epoch": 2.9727562791458726, "grad_norm": 0.07024528086185455, "learning_rate": 2.5119519838767705e-09, "loss": 0.0008, "step": 181680 }, { "epoch": 2.9729199050969486, "grad_norm": 0.058167263865470886, "learning_rate": 2.481871486861853e-09, "loss": 0.0005, "step": 181690 }, { "epoch": 2.973083531048024, "grad_norm": 0.0681462436914444, "learning_rate": 2.451972137867964e-09, "loss": 0.0006, "step": 181700 }, { "epoch": 2.9732471569991, "grad_norm": 0.010845375247299671, "learning_rate": 2.42225393797868e-09, "loss": 0.001, "step": 181710 }, { "epoch": 2.973410782950176, "grad_norm": 0.03632626309990883, "learning_rate": 2.3927168882714733e-09, "loss": 0.0003, "step": 181720 }, { "epoch": 2.9735744089012517, "grad_norm": 0.03113074228167534, "learning_rate": 2.363360989816599e-09, "loss": 0.0003, "step": 181730 }, { "epoch": 2.9737380348523277, "grad_norm": 0.009494964964687824, "learning_rate": 2.33418624367876e-09, "loss": 0.0007, "step": 181740 }, { "epoch": 2.9739016608034037, "grad_norm": 0.02749892883002758, "learning_rate": 2.305192650914889e-09, "loss": 0.0015, "step": 181750 }, { "epoch": 2.9740652867544792, "grad_norm": 0.03593337908387184, "learning_rate": 2.276380212576368e-09, "loss": 0.0004, "step": 181760 }, { "epoch": 2.9742289127055552, "grad_norm": 0.11148542910814285, "learning_rate": 2.2477489297068056e-09, "loss": 0.0008, "step": 181770 }, { "epoch": 2.974392538656631, "grad_norm": 0.051847826689481735, "learning_rate": 2.219298803344816e-09, "loss": 0.0006, "step": 181780 }, { "epoch": 2.9745561646077068, "grad_norm": 0.05268994718790054, "learning_rate": 2.1910298345217964e-09, "loss": 0.0006, "step": 181790 }, { "epoch": 2.9747197905587828, "grad_norm": 0.13268661499023438, "learning_rate": 2.1629420242613718e-09, "loss": 0.0006, "step": 181800 }, { "epoch": 2.9748834165098583, "grad_norm": 0.08490833640098572, "learning_rate": 2.1350353735827276e-09, "loss": 0.0015, "step": 181810 }, { "epoch": 2.9750470424609343, "grad_norm": 0.0894186943769455, "learning_rate": 2.107309883496722e-09, "loss": 0.0012, "step": 181820 }, { "epoch": 2.97521066841201, "grad_norm": 0.013229744508862495, "learning_rate": 2.0797655550092168e-09, "loss": 0.0012, "step": 181830 }, { "epoch": 2.975374294363086, "grad_norm": 0.07367993146181107, "learning_rate": 2.052402389117747e-09, "loss": 0.0006, "step": 181840 }, { "epoch": 2.975537920314162, "grad_norm": 0.02634665183722973, "learning_rate": 2.025220386814297e-09, "loss": 0.0011, "step": 181850 }, { "epoch": 2.9757015462652374, "grad_norm": 0.08619394898414612, "learning_rate": 1.9982195490841903e-09, "loss": 0.0006, "step": 181860 }, { "epoch": 2.9758651722163134, "grad_norm": 0.005855135153979063, "learning_rate": 1.9713998769060884e-09, "loss": 0.0005, "step": 181870 }, { "epoch": 2.9760287981673894, "grad_norm": 0.054620079696178436, "learning_rate": 1.9447613712525457e-09, "loss": 0.0009, "step": 181880 }, { "epoch": 2.976192424118465, "grad_norm": 0.045846857130527496, "learning_rate": 1.9183040330889024e-09, "loss": 0.0011, "step": 181890 }, { "epoch": 2.976356050069541, "grad_norm": 0.08647055178880692, "learning_rate": 1.8920278633743904e-09, "loss": 0.0006, "step": 181900 }, { "epoch": 2.976519676020617, "grad_norm": 0.0648687556385994, "learning_rate": 1.8659328630610264e-09, "loss": 0.0005, "step": 181910 }, { "epoch": 2.9766833019716925, "grad_norm": 0.09652049094438553, "learning_rate": 1.84001903309472e-09, "loss": 0.001, "step": 181920 }, { "epoch": 2.9768469279227685, "grad_norm": 0.14204098284244537, "learning_rate": 1.814286374415275e-09, "loss": 0.0006, "step": 181930 }, { "epoch": 2.9770105538738445, "grad_norm": 0.04247429966926575, "learning_rate": 1.788734887954724e-09, "loss": 0.001, "step": 181940 }, { "epoch": 2.97717417982492, "grad_norm": 0.058686546981334686, "learning_rate": 1.7633645746401028e-09, "loss": 0.001, "step": 181950 }, { "epoch": 2.977337805775996, "grad_norm": 0.05955192446708679, "learning_rate": 1.7381754353906765e-09, "loss": 0.002, "step": 181960 }, { "epoch": 2.977501431727072, "grad_norm": 0.026648730039596558, "learning_rate": 1.7131674711190483e-09, "loss": 0.0005, "step": 181970 }, { "epoch": 2.9776650576781476, "grad_norm": 0.3306833505630493, "learning_rate": 1.688340682731715e-09, "loss": 0.0008, "step": 181980 }, { "epoch": 2.9778286836292236, "grad_norm": 0.11923018097877502, "learning_rate": 1.663695071129623e-09, "loss": 0.0008, "step": 181990 }, { "epoch": 2.9779923095802996, "grad_norm": 0.003203323343768716, "learning_rate": 1.639230637204836e-09, "loss": 0.0004, "step": 182000 }, { "epoch": 2.978155935531375, "grad_norm": 0.06617830693721771, "learning_rate": 1.614947381844978e-09, "loss": 0.0007, "step": 182010 }, { "epoch": 2.978319561482451, "grad_norm": 0.01506265252828598, "learning_rate": 1.5908453059299001e-09, "loss": 0.0009, "step": 182020 }, { "epoch": 2.978483187433527, "grad_norm": 0.03575095906853676, "learning_rate": 1.566924410333348e-09, "loss": 0.0014, "step": 182030 }, { "epoch": 2.9786468133846027, "grad_norm": 0.18627874553203583, "learning_rate": 1.543184695921851e-09, "loss": 0.0009, "step": 182040 }, { "epoch": 2.9788104393356787, "grad_norm": 0.03441288694739342, "learning_rate": 1.519626163556942e-09, "loss": 0.0005, "step": 182050 }, { "epoch": 2.9789740652867547, "grad_norm": 0.23827986419200897, "learning_rate": 1.4962488140923826e-09, "loss": 0.0005, "step": 182060 }, { "epoch": 2.9791376912378302, "grad_norm": 0.021551622077822685, "learning_rate": 1.4730526483747177e-09, "loss": 0.0006, "step": 182070 }, { "epoch": 2.9793013171889062, "grad_norm": 0.09389097988605499, "learning_rate": 1.4500376672454963e-09, "loss": 0.0006, "step": 182080 }, { "epoch": 2.9794649431399822, "grad_norm": 0.02499030902981758, "learning_rate": 1.4272038715384962e-09, "loss": 0.0003, "step": 182090 }, { "epoch": 2.9796285690910578, "grad_norm": 0.01591278612613678, "learning_rate": 1.4045512620819434e-09, "loss": 0.0005, "step": 182100 }, { "epoch": 2.9797921950421338, "grad_norm": 0.08829034864902496, "learning_rate": 1.3820798396962932e-09, "loss": 0.0007, "step": 182110 }, { "epoch": 2.9799558209932098, "grad_norm": 0.029104532673954964, "learning_rate": 1.3597896051964487e-09, "loss": 0.0005, "step": 182120 }, { "epoch": 2.9801194469442853, "grad_norm": 0.05610715597867966, "learning_rate": 1.3376805593900977e-09, "loss": 0.0006, "step": 182130 }, { "epoch": 2.9802830728953613, "grad_norm": 0.04652120918035507, "learning_rate": 1.315752703079376e-09, "loss": 0.0004, "step": 182140 }, { "epoch": 2.9804466988464373, "grad_norm": 0.1480550915002823, "learning_rate": 1.2940060370580932e-09, "loss": 0.0013, "step": 182150 }, { "epoch": 2.980610324797513, "grad_norm": 0.03793784976005554, "learning_rate": 1.2724405621150626e-09, "loss": 0.0008, "step": 182160 }, { "epoch": 2.980773950748589, "grad_norm": 0.03641463816165924, "learning_rate": 1.2510562790324365e-09, "loss": 0.001, "step": 182170 }, { "epoch": 2.9809375766996644, "grad_norm": 0.044259123504161835, "learning_rate": 1.2298531885840403e-09, "loss": 0.0004, "step": 182180 }, { "epoch": 2.9811012026507404, "grad_norm": 0.021113863214850426, "learning_rate": 1.2088312915398137e-09, "loss": 0.0006, "step": 182190 }, { "epoch": 2.9812648286018164, "grad_norm": 0.06147037446498871, "learning_rate": 1.1879905886613696e-09, "loss": 0.0007, "step": 182200 }, { "epoch": 2.981428454552892, "grad_norm": 0.046186938881874084, "learning_rate": 1.1673310807036598e-09, "loss": 0.0004, "step": 182210 }, { "epoch": 2.981592080503968, "grad_norm": 0.038473181426525116, "learning_rate": 1.1468527684155295e-09, "loss": 0.0003, "step": 182220 }, { "epoch": 2.9817557064550435, "grad_norm": 0.05932663753628731, "learning_rate": 1.126555652539718e-09, "loss": 0.0004, "step": 182230 }, { "epoch": 2.9819193324061195, "grad_norm": 0.06103590503334999, "learning_rate": 1.1064397338123034e-09, "loss": 0.0007, "step": 182240 }, { "epoch": 2.9820829583571955, "grad_norm": 0.09790539741516113, "learning_rate": 1.0865050129610366e-09, "loss": 0.0006, "step": 182250 }, { "epoch": 2.982246584308271, "grad_norm": 0.14284656941890717, "learning_rate": 1.0667514907103383e-09, "loss": 0.0011, "step": 182260 }, { "epoch": 2.982410210259347, "grad_norm": 0.08526279777288437, "learning_rate": 1.0471791677746367e-09, "loss": 0.0007, "step": 182270 }, { "epoch": 2.982573836210423, "grad_norm": 0.026263464242219925, "learning_rate": 1.02778804486392e-09, "loss": 0.0006, "step": 182280 }, { "epoch": 2.9827374621614986, "grad_norm": 0.04552841931581497, "learning_rate": 1.008578122681514e-09, "loss": 0.0025, "step": 182290 }, { "epoch": 2.9829010881125746, "grad_norm": 0.047282617539167404, "learning_rate": 9.895494019235285e-10, "loss": 0.0009, "step": 182300 }, { "epoch": 2.9830647140636506, "grad_norm": 0.08310357481241226, "learning_rate": 9.707018832794124e-10, "loss": 0.0005, "step": 182310 }, { "epoch": 2.983228340014726, "grad_norm": 0.08298409730195999, "learning_rate": 9.520355674325078e-10, "loss": 0.0003, "step": 182320 }, { "epoch": 2.983391965965802, "grad_norm": 0.031720537692308426, "learning_rate": 9.335504550594954e-10, "loss": 0.0005, "step": 182330 }, { "epoch": 2.983555591916878, "grad_norm": 0.1097896620631218, "learning_rate": 9.1524654682984e-10, "loss": 0.0008, "step": 182340 }, { "epoch": 2.9837192178679537, "grad_norm": 0.11241824179887772, "learning_rate": 8.971238434080098e-10, "loss": 0.0006, "step": 182350 }, { "epoch": 2.9838828438190297, "grad_norm": 0.030461788177490234, "learning_rate": 8.791823454501469e-10, "loss": 0.0002, "step": 182360 }, { "epoch": 2.9840464697701057, "grad_norm": 0.09207808971405029, "learning_rate": 8.614220536073969e-10, "loss": 0.0006, "step": 182370 }, { "epoch": 2.9842100957211812, "grad_norm": 0.05260929837822914, "learning_rate": 8.438429685225791e-10, "loss": 0.0008, "step": 182380 }, { "epoch": 2.9843737216722572, "grad_norm": 0.12333384156227112, "learning_rate": 8.264450908329613e-10, "loss": 0.0008, "step": 182390 }, { "epoch": 2.9845373476233332, "grad_norm": 0.04292740672826767, "learning_rate": 8.092284211697055e-10, "loss": 0.0004, "step": 182400 }, { "epoch": 2.9847009735744088, "grad_norm": 0.08580801635980606, "learning_rate": 7.92192960157312e-10, "loss": 0.0006, "step": 182410 }, { "epoch": 2.9848645995254848, "grad_norm": 0.12693290412425995, "learning_rate": 7.753387084119545e-10, "loss": 0.0008, "step": 182420 }, { "epoch": 2.9850282254765608, "grad_norm": 0.07805383205413818, "learning_rate": 7.586656665453662e-10, "loss": 0.0006, "step": 182430 }, { "epoch": 2.9851918514276363, "grad_norm": 0.021498166024684906, "learning_rate": 7.421738351620634e-10, "loss": 0.0007, "step": 182440 }, { "epoch": 2.9853554773787123, "grad_norm": 0.009048529900610447, "learning_rate": 7.258632148593459e-10, "loss": 0.0009, "step": 182450 }, { "epoch": 2.9855191033297883, "grad_norm": 0.06429654359817505, "learning_rate": 7.097338062284075e-10, "loss": 0.0042, "step": 182460 }, { "epoch": 2.985682729280864, "grad_norm": 0.1947738230228424, "learning_rate": 6.937856098543361e-10, "loss": 0.0012, "step": 182470 }, { "epoch": 2.98584635523194, "grad_norm": 0.01577065698802471, "learning_rate": 6.780186263155574e-10, "loss": 0.0003, "step": 182480 }, { "epoch": 2.986009981183016, "grad_norm": 0.07304892688989639, "learning_rate": 6.624328561821713e-10, "loss": 0.0009, "step": 182490 }, { "epoch": 2.9861736071340914, "grad_norm": 0.04181336984038353, "learning_rate": 6.470283000203914e-10, "loss": 0.0004, "step": 182500 }, { "epoch": 2.9863372330851674, "grad_norm": 0.017054041847586632, "learning_rate": 6.318049583886598e-10, "loss": 0.0003, "step": 182510 }, { "epoch": 2.9865008590362434, "grad_norm": 0.016465414315462112, "learning_rate": 6.167628318376473e-10, "loss": 0.0006, "step": 182520 }, { "epoch": 2.986664484987319, "grad_norm": 0.01617741771042347, "learning_rate": 6.019019209135835e-10, "loss": 0.0005, "step": 182530 }, { "epoch": 2.986828110938395, "grad_norm": 0.048710986971855164, "learning_rate": 5.872222261549265e-10, "loss": 0.0004, "step": 182540 }, { "epoch": 2.9869917368894705, "grad_norm": 0.16679561138153076, "learning_rate": 5.727237480934733e-10, "loss": 0.0008, "step": 182550 }, { "epoch": 2.9871553628405465, "grad_norm": 0.14510859549045563, "learning_rate": 5.584064872549144e-10, "loss": 0.0015, "step": 182560 }, { "epoch": 2.9873189887916225, "grad_norm": 0.11583229899406433, "learning_rate": 5.442704441582791e-10, "loss": 0.0013, "step": 182570 }, { "epoch": 2.987482614742698, "grad_norm": 0.029368869960308075, "learning_rate": 5.303156193164904e-10, "loss": 0.0006, "step": 182580 }, { "epoch": 2.987646240693774, "grad_norm": 0.0253090038895607, "learning_rate": 5.165420132341448e-10, "loss": 0.0008, "step": 182590 }, { "epoch": 2.9878098666448496, "grad_norm": 0.0328531488776207, "learning_rate": 5.029496264119526e-10, "loss": 0.0005, "step": 182600 }, { "epoch": 2.9879734925959256, "grad_norm": 0.09279884397983551, "learning_rate": 4.895384593411878e-10, "loss": 0.0007, "step": 182610 }, { "epoch": 2.9881371185470016, "grad_norm": 0.05588304623961449, "learning_rate": 4.76308512508683e-10, "loss": 0.0008, "step": 182620 }, { "epoch": 2.988300744498077, "grad_norm": 0.08876816183328629, "learning_rate": 4.632597863946098e-10, "loss": 0.0007, "step": 182630 }, { "epoch": 2.988464370449153, "grad_norm": 0.08923585712909698, "learning_rate": 4.503922814708128e-10, "loss": 0.001, "step": 182640 }, { "epoch": 2.988627996400229, "grad_norm": 0.15535946190357208, "learning_rate": 4.3770599820414095e-10, "loss": 0.0006, "step": 182650 }, { "epoch": 2.9887916223513047, "grad_norm": 0.08236636966466904, "learning_rate": 4.252009370547816e-10, "loss": 0.0005, "step": 182660 }, { "epoch": 2.9889552483023807, "grad_norm": 0.034059662371873856, "learning_rate": 4.128770984751507e-10, "loss": 0.0004, "step": 182670 }, { "epoch": 2.9891188742534567, "grad_norm": 0.1988143026828766, "learning_rate": 4.007344829132231e-10, "loss": 0.0006, "step": 182680 }, { "epoch": 2.9892825002045322, "grad_norm": 0.29730433225631714, "learning_rate": 3.8877309080809224e-10, "loss": 0.0009, "step": 182690 }, { "epoch": 2.9894461261556082, "grad_norm": 0.011211794801056385, "learning_rate": 3.769929225938551e-10, "loss": 0.0005, "step": 182700 }, { "epoch": 2.9896097521066842, "grad_norm": 0.07937619835138321, "learning_rate": 3.6539397869683745e-10, "loss": 0.0008, "step": 182710 }, { "epoch": 2.98977337805776, "grad_norm": 0.06813499331474304, "learning_rate": 3.539762595383689e-10, "loss": 0.0013, "step": 182720 }, { "epoch": 2.9899370040088358, "grad_norm": 0.02483954094350338, "learning_rate": 3.427397655320075e-10, "loss": 0.0004, "step": 182730 }, { "epoch": 2.9901006299599118, "grad_norm": 0.05213353410363197, "learning_rate": 3.316844970852051e-10, "loss": 0.0006, "step": 182740 }, { "epoch": 2.9902642559109873, "grad_norm": 0.1355007290840149, "learning_rate": 3.2081045459819713e-10, "loss": 0.0002, "step": 182750 }, { "epoch": 2.9904278818620633, "grad_norm": 0.04843417927622795, "learning_rate": 3.101176384651128e-10, "loss": 0.0007, "step": 182760 }, { "epoch": 2.9905915078131393, "grad_norm": 0.030777323991060257, "learning_rate": 2.9960604907397497e-10, "loss": 0.0003, "step": 182770 }, { "epoch": 2.990755133764215, "grad_norm": 0.20406000316143036, "learning_rate": 2.892756868055901e-10, "loss": 0.0011, "step": 182780 }, { "epoch": 2.990918759715291, "grad_norm": 0.07328087091445923, "learning_rate": 2.791265520346587e-10, "loss": 0.0013, "step": 182790 }, { "epoch": 2.991082385666367, "grad_norm": 0.05528191849589348, "learning_rate": 2.691586451292194e-10, "loss": 0.0004, "step": 182800 }, { "epoch": 2.9912460116174424, "grad_norm": 0.06187997758388519, "learning_rate": 2.5937196644953975e-10, "loss": 0.0005, "step": 182810 }, { "epoch": 2.9914096375685184, "grad_norm": 0.06434860825538635, "learning_rate": 2.4976651635144623e-10, "loss": 0.001, "step": 182820 }, { "epoch": 2.9915732635195944, "grad_norm": 0.10044960677623749, "learning_rate": 2.4034229518299366e-10, "loss": 0.001, "step": 182830 }, { "epoch": 2.99173688947067, "grad_norm": 0.15940852463245392, "learning_rate": 2.3109930328502062e-10, "loss": 0.0006, "step": 182840 }, { "epoch": 2.991900515421746, "grad_norm": 0.011512136086821556, "learning_rate": 2.2203754099336949e-10, "loss": 0.0007, "step": 182850 }, { "epoch": 2.992064141372822, "grad_norm": 0.07312177866697311, "learning_rate": 2.1315700863611122e-10, "loss": 0.0004, "step": 182860 }, { "epoch": 2.9922277673238975, "grad_norm": 0.09036201983690262, "learning_rate": 2.0445770653521046e-10, "loss": 0.0005, "step": 182870 }, { "epoch": 2.9923913932749735, "grad_norm": 0.011012058705091476, "learning_rate": 1.9593963500597056e-10, "loss": 0.0005, "step": 182880 }, { "epoch": 2.9925550192260495, "grad_norm": 0.04315563663840294, "learning_rate": 1.8760279435703354e-10, "loss": 0.001, "step": 182890 }, { "epoch": 2.992718645177125, "grad_norm": 0.011310326866805553, "learning_rate": 1.7944718489149027e-10, "loss": 0.0002, "step": 182900 }, { "epoch": 2.992882271128201, "grad_norm": 0.04459337517619133, "learning_rate": 1.7147280690354984e-10, "loss": 0.0003, "step": 182910 }, { "epoch": 2.993045897079277, "grad_norm": 0.055467452853918076, "learning_rate": 1.6367966068298047e-10, "loss": 0.0004, "step": 182920 }, { "epoch": 2.9932095230303526, "grad_norm": 0.13603198528289795, "learning_rate": 1.5606774651233392e-10, "loss": 0.0005, "step": 182930 }, { "epoch": 2.9933731489814286, "grad_norm": 0.015258897095918655, "learning_rate": 1.486370646675006e-10, "loss": 0.0002, "step": 182940 }, { "epoch": 2.993536774932504, "grad_norm": 0.04858879745006561, "learning_rate": 1.4138761541770963e-10, "loss": 0.0004, "step": 182950 }, { "epoch": 2.99370040088358, "grad_norm": 0.0620223730802536, "learning_rate": 1.3431939902608383e-10, "loss": 0.0007, "step": 182960 }, { "epoch": 2.993864026834656, "grad_norm": 0.052294228225946426, "learning_rate": 1.274324157479745e-10, "loss": 0.001, "step": 182970 }, { "epoch": 2.9940276527857317, "grad_norm": 0.04354868084192276, "learning_rate": 1.2072666583429205e-10, "loss": 0.0006, "step": 182980 }, { "epoch": 2.9941912787368077, "grad_norm": 0.2174597680568695, "learning_rate": 1.1420214952650998e-10, "loss": 0.0006, "step": 182990 }, { "epoch": 2.9943549046878832, "grad_norm": 0.0406862236559391, "learning_rate": 1.0785886706277116e-10, "loss": 0.002, "step": 183000 }, { "epoch": 2.9945185306389592, "grad_norm": 0.09220794588327408, "learning_rate": 1.0169681867178149e-10, "loss": 0.0005, "step": 183010 }, { "epoch": 2.9946821565900352, "grad_norm": 0.20663510262966156, "learning_rate": 9.571600457725094e-11, "loss": 0.0014, "step": 183020 }, { "epoch": 2.994845782541111, "grad_norm": 0.08783263713121414, "learning_rate": 8.99164249962281e-11, "loss": 0.0005, "step": 183030 }, { "epoch": 2.9950094084921868, "grad_norm": 0.0773741602897644, "learning_rate": 8.429808013854513e-11, "loss": 0.0003, "step": 183040 }, { "epoch": 2.9951730344432628, "grad_norm": 0.03865004703402519, "learning_rate": 7.886097020848304e-11, "loss": 0.0006, "step": 183050 }, { "epoch": 2.9953366603943383, "grad_norm": 0.04568766802549362, "learning_rate": 7.360509540255135e-11, "loss": 0.0007, "step": 183060 }, { "epoch": 2.9955002863454143, "grad_norm": 0.0493677519261837, "learning_rate": 6.853045591115325e-11, "loss": 0.0007, "step": 183070 }, { "epoch": 2.9956639122964903, "grad_norm": 0.11075688898563385, "learning_rate": 6.363705191914094e-11, "loss": 0.0006, "step": 183080 }, { "epoch": 2.995827538247566, "grad_norm": 0.044846996665000916, "learning_rate": 5.892488360248471e-11, "loss": 0.0011, "step": 183090 }, { "epoch": 2.995991164198642, "grad_norm": 0.12038176506757736, "learning_rate": 5.4393951133269174e-11, "loss": 0.0006, "step": 183100 }, { "epoch": 2.996154790149718, "grad_norm": 0.007127447985112667, "learning_rate": 5.0044254675252203e-11, "loss": 0.0004, "step": 183110 }, { "epoch": 2.9963184161007934, "grad_norm": 0.12169210612773895, "learning_rate": 4.587579438553036e-11, "loss": 0.0008, "step": 183120 }, { "epoch": 2.9964820420518694, "grad_norm": 0.20229792594909668, "learning_rate": 4.18885704162042e-11, "loss": 0.0006, "step": 183130 }, { "epoch": 2.9966456680029454, "grad_norm": 0.09927067160606384, "learning_rate": 3.8082582911602715e-11, "loss": 0.0007, "step": 183140 }, { "epoch": 2.996809293954021, "grad_norm": 0.08279476314783096, "learning_rate": 3.4457832008838456e-11, "loss": 0.0005, "step": 183150 }, { "epoch": 2.996972919905097, "grad_norm": 0.11548232287168503, "learning_rate": 3.101431784002795e-11, "loss": 0.0008, "step": 183160 }, { "epoch": 2.997136545856173, "grad_norm": 0.01916341483592987, "learning_rate": 2.77520405300713e-11, "loss": 0.0006, "step": 183170 }, { "epoch": 2.9973001718072485, "grad_norm": 0.09040320664644241, "learning_rate": 2.467100019665214e-11, "loss": 0.0002, "step": 183180 }, { "epoch": 2.9974637977583245, "grad_norm": 0.013019285164773464, "learning_rate": 2.1771196951902994e-11, "loss": 0.0005, "step": 183190 }, { "epoch": 2.9976274237094005, "grad_norm": 0.037323277443647385, "learning_rate": 1.9052630900739944e-11, "loss": 0.0008, "step": 183200 }, { "epoch": 2.997791049660476, "grad_norm": 0.16036146879196167, "learning_rate": 1.6515302141972835e-11, "loss": 0.001, "step": 183210 }, { "epoch": 2.997954675611552, "grad_norm": 0.06244174391031265, "learning_rate": 1.4159210767750176e-11, "loss": 0.0012, "step": 183220 }, { "epoch": 2.998118301562628, "grad_norm": 0.10027268528938293, "learning_rate": 1.1984356862448921e-11, "loss": 0.0005, "step": 183230 }, { "epoch": 2.9982819275137036, "grad_norm": 0.03646072745323181, "learning_rate": 9.990740506005126e-12, "loss": 0.0007, "step": 183240 }, { "epoch": 2.9984455534647796, "grad_norm": 0.035138629376888275, "learning_rate": 8.178361770028176e-12, "loss": 0.0006, "step": 183250 }, { "epoch": 2.9986091794158556, "grad_norm": 0.03447127342224121, "learning_rate": 6.54722072002123e-12, "loss": 0.0007, "step": 183260 }, { "epoch": 2.998772805366931, "grad_norm": 0.08323480188846588, "learning_rate": 5.097317415936331e-12, "loss": 0.0009, "step": 183270 }, { "epoch": 2.998936431318007, "grad_norm": 0.21281728148460388, "learning_rate": 3.828651909953962e-12, "loss": 0.0005, "step": 183280 }, { "epoch": 2.999100057269083, "grad_norm": 0.03264172002673149, "learning_rate": 2.7412242481483775e-12, "loss": 0.0009, "step": 183290 }, { "epoch": 2.9992636832201587, "grad_norm": 0.014256869442760944, "learning_rate": 1.8350344693773834e-12, "loss": 0.0007, "step": 183300 }, { "epoch": 2.9994273091712347, "grad_norm": 0.006932792719453573, "learning_rate": 1.110082606947671e-12, "loss": 0.0006, "step": 183310 }, { "epoch": 2.9995909351223107, "grad_norm": 0.10003849864006042, "learning_rate": 5.663686875045926e-13, "loss": 0.0008, "step": 183320 }, { "epoch": 2.9997545610733862, "grad_norm": 0.04948035627603531, "learning_rate": 2.0389272992193953e-13, "loss": 0.0005, "step": 183330 }, { "epoch": 2.9999181870244622, "grad_norm": 0.09036850929260254, "learning_rate": 2.2654748077499677e-14, "loss": 0.0006, "step": 183340 }, { "epoch": 3.0, "step": 183345, "total_flos": 5.837921389771751e+18, "train_loss": 0.004886563129512852, "train_runtime": 102730.9024, "train_samples_per_second": 14.278, "train_steps_per_second": 1.785 } ], "logging_steps": 10, "max_steps": 183345, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.837921389771751e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }