{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 49641, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006043391551338611, "grad_norm": 298.2872354263952, "learning_rate": 4.0273862263391065e-08, "loss": 6.1467, "step": 10 }, { "epoch": 0.0012086783102677222, "grad_norm": 272.5003620952357, "learning_rate": 8.054772452678213e-08, "loss": 6.1276, "step": 20 }, { "epoch": 0.0018130174654015835, "grad_norm": 288.81048256119277, "learning_rate": 1.208215867901732e-07, "loss": 6.0628, "step": 30 }, { "epoch": 0.0024173566205354445, "grad_norm": 269.56952062216857, "learning_rate": 1.6109544905356426e-07, "loss": 5.7986, "step": 40 }, { "epoch": 0.0030216957756693057, "grad_norm": 206.36532951229918, "learning_rate": 2.013693113169553e-07, "loss": 5.3366, "step": 50 }, { "epoch": 0.003626034930803167, "grad_norm": 203.00258326291555, "learning_rate": 2.416431735803464e-07, "loss": 4.8985, "step": 60 }, { "epoch": 0.004230374085937028, "grad_norm": 155.13411973161922, "learning_rate": 2.819170358437374e-07, "loss": 4.4859, "step": 70 }, { "epoch": 0.004834713241070889, "grad_norm": 155.137249664987, "learning_rate": 3.221908981071285e-07, "loss": 3.847, "step": 80 }, { "epoch": 0.00543905239620475, "grad_norm": 68.62487791511931, "learning_rate": 3.6246476037051957e-07, "loss": 2.8263, "step": 90 }, { "epoch": 0.0060433915513386114, "grad_norm": 29.616720475402726, "learning_rate": 4.027386226339106e-07, "loss": 2.5251, "step": 100 }, { "epoch": 0.006647730706472473, "grad_norm": 25.716610662045905, "learning_rate": 4.430124848973017e-07, "loss": 2.3305, "step": 110 }, { "epoch": 0.007252069861606334, "grad_norm": 25.925794056215235, "learning_rate": 4.832863471606928e-07, "loss": 2.0343, "step": 120 }, { "epoch": 0.007856409016740194, "grad_norm": 16.736804626915543, "learning_rate": 5.235602094240838e-07, "loss": 1.7285, "step": 130 }, { "epoch": 0.008460748171874055, "grad_norm": 14.505748439621561, "learning_rate": 5.638340716874748e-07, "loss": 1.4641, "step": 140 }, { "epoch": 0.009065087327007917, "grad_norm": 4.088781078521003, "learning_rate": 6.041079339508659e-07, "loss": 1.2911, "step": 150 }, { "epoch": 0.009669426482141778, "grad_norm": 3.5454597385386095, "learning_rate": 6.44381796214257e-07, "loss": 1.273, "step": 160 }, { "epoch": 0.01027376563727564, "grad_norm": 3.145326385935495, "learning_rate": 6.84655658477648e-07, "loss": 1.2294, "step": 170 }, { "epoch": 0.0108781047924095, "grad_norm": 2.9493249288358885, "learning_rate": 7.249295207410391e-07, "loss": 1.2643, "step": 180 }, { "epoch": 0.011482443947543362, "grad_norm": 3.445852230847896, "learning_rate": 7.652033830044302e-07, "loss": 1.1955, "step": 190 }, { "epoch": 0.012086783102677223, "grad_norm": 2.442717320016556, "learning_rate": 8.054772452678212e-07, "loss": 1.2114, "step": 200 }, { "epoch": 0.012691122257811084, "grad_norm": 2.575139498019344, "learning_rate": 8.457511075312124e-07, "loss": 1.165, "step": 210 }, { "epoch": 0.013295461412944945, "grad_norm": 2.269127757263327, "learning_rate": 8.860249697946034e-07, "loss": 1.1835, "step": 220 }, { "epoch": 0.013899800568078807, "grad_norm": 2.7913296947023287, "learning_rate": 9.262988320579944e-07, "loss": 1.1748, "step": 230 }, { "epoch": 0.014504139723212668, "grad_norm": 2.506161432538682, "learning_rate": 9.665726943213855e-07, "loss": 1.1829, "step": 240 }, { "epoch": 0.015108478878346527, "grad_norm": 2.265386795022933, "learning_rate": 1.0068465565847765e-06, "loss": 1.1918, "step": 250 }, { "epoch": 0.01571281803348039, "grad_norm": 2.3797096473176076, "learning_rate": 1.0471204188481676e-06, "loss": 1.1839, "step": 260 }, { "epoch": 0.01631715718861425, "grad_norm": 2.2402768142876694, "learning_rate": 1.0873942811115588e-06, "loss": 1.1855, "step": 270 }, { "epoch": 0.01692149634374811, "grad_norm": 2.2249442023552852, "learning_rate": 1.1276681433749497e-06, "loss": 1.1649, "step": 280 }, { "epoch": 0.017525835498881974, "grad_norm": 2.085305674911988, "learning_rate": 1.1679420056383408e-06, "loss": 1.1806, "step": 290 }, { "epoch": 0.018130174654015833, "grad_norm": 2.0828553521779183, "learning_rate": 1.2082158679017318e-06, "loss": 1.1447, "step": 300 }, { "epoch": 0.018734513809149696, "grad_norm": 1.9032512166927018, "learning_rate": 1.248489730165123e-06, "loss": 1.1507, "step": 310 }, { "epoch": 0.019338852964283556, "grad_norm": 1.91018837067208, "learning_rate": 1.288763592428514e-06, "loss": 1.1516, "step": 320 }, { "epoch": 0.019943192119417415, "grad_norm": 1.903531796901431, "learning_rate": 1.329037454691905e-06, "loss": 1.1604, "step": 330 }, { "epoch": 0.02054753127455128, "grad_norm": 2.136992877764608, "learning_rate": 1.369311316955296e-06, "loss": 1.1458, "step": 340 }, { "epoch": 0.021151870429685138, "grad_norm": 1.74601727863523, "learning_rate": 1.4095851792186871e-06, "loss": 1.1676, "step": 350 }, { "epoch": 0.021756209584819, "grad_norm": 1.6117672740528162, "learning_rate": 1.4498590414820783e-06, "loss": 1.145, "step": 360 }, { "epoch": 0.02236054873995286, "grad_norm": 1.8149579016476314, "learning_rate": 1.4901329037454692e-06, "loss": 1.1583, "step": 370 }, { "epoch": 0.022964887895086723, "grad_norm": 1.5885652581406067, "learning_rate": 1.5304067660088604e-06, "loss": 1.1487, "step": 380 }, { "epoch": 0.023569227050220583, "grad_norm": 1.7684496469863542, "learning_rate": 1.5706806282722515e-06, "loss": 1.1128, "step": 390 }, { "epoch": 0.024173566205354446, "grad_norm": 1.7621133706989613, "learning_rate": 1.6109544905356425e-06, "loss": 1.1468, "step": 400 }, { "epoch": 0.024777905360488305, "grad_norm": 2.038485171893254, "learning_rate": 1.6512283527990336e-06, "loss": 1.1751, "step": 410 }, { "epoch": 0.025382244515622168, "grad_norm": 1.91524830783998, "learning_rate": 1.6915022150624248e-06, "loss": 1.1654, "step": 420 }, { "epoch": 0.025986583670756028, "grad_norm": 1.8236734227335434, "learning_rate": 1.7317760773258157e-06, "loss": 1.1648, "step": 430 }, { "epoch": 0.02659092282588989, "grad_norm": 2.0232671392154917, "learning_rate": 1.7720499395892068e-06, "loss": 1.1485, "step": 440 }, { "epoch": 0.02719526198102375, "grad_norm": 1.6150477738657656, "learning_rate": 1.812323801852598e-06, "loss": 1.1089, "step": 450 }, { "epoch": 0.027799601136157613, "grad_norm": 1.5572962388413956, "learning_rate": 1.8525976641159887e-06, "loss": 1.1211, "step": 460 }, { "epoch": 0.028403940291291473, "grad_norm": 1.8244931625619223, "learning_rate": 1.8928715263793799e-06, "loss": 1.1244, "step": 470 }, { "epoch": 0.029008279446425336, "grad_norm": 1.6531825283199104, "learning_rate": 1.933145388642771e-06, "loss": 1.1539, "step": 480 }, { "epoch": 0.029612618601559195, "grad_norm": 1.451041168050482, "learning_rate": 1.973419250906162e-06, "loss": 1.1212, "step": 490 }, { "epoch": 0.030216957756693055, "grad_norm": 1.957377854658712, "learning_rate": 2.013693113169553e-06, "loss": 1.144, "step": 500 }, { "epoch": 0.030821296911826918, "grad_norm": 1.886661763056281, "learning_rate": 2.0539669754329443e-06, "loss": 1.1387, "step": 510 }, { "epoch": 0.03142563606696078, "grad_norm": 1.7418883718172464, "learning_rate": 2.094240837696335e-06, "loss": 1.1398, "step": 520 }, { "epoch": 0.03202997522209464, "grad_norm": 1.7816317246389095, "learning_rate": 2.134514699959726e-06, "loss": 1.1407, "step": 530 }, { "epoch": 0.0326343143772285, "grad_norm": 1.7678275511310892, "learning_rate": 2.1747885622231175e-06, "loss": 1.1148, "step": 540 }, { "epoch": 0.03323865353236236, "grad_norm": 1.6510201333960512, "learning_rate": 2.2150624244865084e-06, "loss": 1.0989, "step": 550 }, { "epoch": 0.03384299268749622, "grad_norm": 1.528929289785965, "learning_rate": 2.2553362867498994e-06, "loss": 1.1237, "step": 560 }, { "epoch": 0.03444733184263008, "grad_norm": 1.5209366865980554, "learning_rate": 2.2956101490132908e-06, "loss": 1.1303, "step": 570 }, { "epoch": 0.03505167099776395, "grad_norm": 1.4974206580420846, "learning_rate": 2.3358840112766817e-06, "loss": 1.1397, "step": 580 }, { "epoch": 0.03565601015289781, "grad_norm": 1.690007751571185, "learning_rate": 2.3761578735400726e-06, "loss": 1.1302, "step": 590 }, { "epoch": 0.03626034930803167, "grad_norm": 1.6781638968679657, "learning_rate": 2.4164317358034636e-06, "loss": 1.1237, "step": 600 }, { "epoch": 0.036864688463165526, "grad_norm": 1.7462569548834377, "learning_rate": 2.4567055980668545e-06, "loss": 1.1316, "step": 610 }, { "epoch": 0.03746902761829939, "grad_norm": 1.755005907449779, "learning_rate": 2.496979460330246e-06, "loss": 1.1348, "step": 620 }, { "epoch": 0.03807336677343325, "grad_norm": 1.7005845203245822, "learning_rate": 2.537253322593637e-06, "loss": 1.1146, "step": 630 }, { "epoch": 0.03867770592856711, "grad_norm": 1.9761625298171615, "learning_rate": 2.577527184857028e-06, "loss": 1.112, "step": 640 }, { "epoch": 0.03928204508370097, "grad_norm": 1.8575660787738717, "learning_rate": 2.617801047120419e-06, "loss": 1.1192, "step": 650 }, { "epoch": 0.03988638423883483, "grad_norm": 1.7192840768266824, "learning_rate": 2.65807490938381e-06, "loss": 1.1197, "step": 660 }, { "epoch": 0.0404907233939687, "grad_norm": 1.764024302272521, "learning_rate": 2.6983487716472014e-06, "loss": 1.108, "step": 670 }, { "epoch": 0.04109506254910256, "grad_norm": 1.7154086975607765, "learning_rate": 2.738622633910592e-06, "loss": 1.0843, "step": 680 }, { "epoch": 0.041699401704236416, "grad_norm": 1.8445623759164267, "learning_rate": 2.7788964961739833e-06, "loss": 1.1314, "step": 690 }, { "epoch": 0.042303740859370276, "grad_norm": 1.1164464947539972, "learning_rate": 2.8191703584373742e-06, "loss": 1.1154, "step": 700 }, { "epoch": 0.04290808001450414, "grad_norm": 1.0771280141246888, "learning_rate": 2.8594442207007656e-06, "loss": 1.0927, "step": 710 }, { "epoch": 0.043512419169638, "grad_norm": 1.0823024723600558, "learning_rate": 2.8997180829641565e-06, "loss": 1.1202, "step": 720 }, { "epoch": 0.04411675832477186, "grad_norm": 1.059423786201847, "learning_rate": 2.939991945227548e-06, "loss": 1.1058, "step": 730 }, { "epoch": 0.04472109747990572, "grad_norm": 1.107829209476366, "learning_rate": 2.9802658074909384e-06, "loss": 1.131, "step": 740 }, { "epoch": 0.04532543663503959, "grad_norm": 0.9784201230186164, "learning_rate": 3.0205396697543298e-06, "loss": 1.1314, "step": 750 }, { "epoch": 0.04592977579017345, "grad_norm": 0.9044129481577555, "learning_rate": 3.0608135320177207e-06, "loss": 1.0946, "step": 760 }, { "epoch": 0.046534114945307306, "grad_norm": 0.9427655630220715, "learning_rate": 3.101087394281112e-06, "loss": 1.0971, "step": 770 }, { "epoch": 0.047138454100441166, "grad_norm": 0.9430509363329759, "learning_rate": 3.141361256544503e-06, "loss": 1.1275, "step": 780 }, { "epoch": 0.04774279325557503, "grad_norm": 0.8704066019825859, "learning_rate": 3.1816351188078935e-06, "loss": 1.1066, "step": 790 }, { "epoch": 0.04834713241070889, "grad_norm": 1.085672823483262, "learning_rate": 3.221908981071285e-06, "loss": 1.1035, "step": 800 }, { "epoch": 0.04895147156584275, "grad_norm": 1.0001828747437085, "learning_rate": 3.262182843334676e-06, "loss": 1.112, "step": 810 }, { "epoch": 0.04955581072097661, "grad_norm": 1.0092347772885002, "learning_rate": 3.302456705598067e-06, "loss": 1.1104, "step": 820 }, { "epoch": 0.05016014987611047, "grad_norm": 1.004402462110965, "learning_rate": 3.342730567861458e-06, "loss": 1.1317, "step": 830 }, { "epoch": 0.050764489031244336, "grad_norm": 0.9845202468833524, "learning_rate": 3.3830044301248495e-06, "loss": 1.1244, "step": 840 }, { "epoch": 0.051368828186378196, "grad_norm": 0.714487787028079, "learning_rate": 3.42327829238824e-06, "loss": 1.1242, "step": 850 }, { "epoch": 0.051973167341512055, "grad_norm": 0.6900310140587528, "learning_rate": 3.4635521546516314e-06, "loss": 1.0817, "step": 860 }, { "epoch": 0.052577506496645915, "grad_norm": 0.625412751358801, "learning_rate": 3.5038260169150223e-06, "loss": 1.1088, "step": 870 }, { "epoch": 0.05318184565177978, "grad_norm": 0.6783066840510502, "learning_rate": 3.5440998791784137e-06, "loss": 1.1139, "step": 880 }, { "epoch": 0.05378618480691364, "grad_norm": 0.692374586650823, "learning_rate": 3.5843737414418046e-06, "loss": 1.1108, "step": 890 }, { "epoch": 0.0543905239620475, "grad_norm": 0.678311995376911, "learning_rate": 3.624647603705196e-06, "loss": 1.0974, "step": 900 }, { "epoch": 0.05499486311718136, "grad_norm": 0.7425145958964064, "learning_rate": 3.6649214659685865e-06, "loss": 1.0763, "step": 910 }, { "epoch": 0.055599202272315226, "grad_norm": 0.6856922654960516, "learning_rate": 3.7051953282319774e-06, "loss": 1.1136, "step": 920 }, { "epoch": 0.056203541427449086, "grad_norm": 0.7064784604124972, "learning_rate": 3.745469190495369e-06, "loss": 1.1098, "step": 930 }, { "epoch": 0.056807880582582945, "grad_norm": 0.6703709229378032, "learning_rate": 3.7857430527587597e-06, "loss": 1.1197, "step": 940 }, { "epoch": 0.057412219737716805, "grad_norm": 1.2282719953549017, "learning_rate": 3.826016915022151e-06, "loss": 1.0899, "step": 950 }, { "epoch": 0.05801655889285067, "grad_norm": 1.1929340082708477, "learning_rate": 3.866290777285542e-06, "loss": 1.1288, "step": 960 }, { "epoch": 0.05862089804798453, "grad_norm": 1.2746392955027297, "learning_rate": 3.906564639548933e-06, "loss": 1.0899, "step": 970 }, { "epoch": 0.05922523720311839, "grad_norm": 1.2099210453245777, "learning_rate": 3.946838501812324e-06, "loss": 1.1243, "step": 980 }, { "epoch": 0.05982957635825225, "grad_norm": 1.2112775722352984, "learning_rate": 3.987112364075715e-06, "loss": 1.1175, "step": 990 }, { "epoch": 0.06043391551338611, "grad_norm": 0.6053511060988584, "learning_rate": 4.027386226339106e-06, "loss": 1.1148, "step": 1000 }, { "epoch": 0.061038254668519976, "grad_norm": 0.6446163720622058, "learning_rate": 4.067660088602497e-06, "loss": 1.1122, "step": 1010 }, { "epoch": 0.061642593823653835, "grad_norm": 0.5865290255556028, "learning_rate": 4.1079339508658885e-06, "loss": 1.0932, "step": 1020 }, { "epoch": 0.062246932978787695, "grad_norm": 0.5740116165642117, "learning_rate": 4.14820781312928e-06, "loss": 1.103, "step": 1030 }, { "epoch": 0.06285127213392155, "grad_norm": 0.5698314395636827, "learning_rate": 4.18848167539267e-06, "loss": 1.1156, "step": 1040 }, { "epoch": 0.06345561128905541, "grad_norm": 0.5673180485264345, "learning_rate": 4.228755537656062e-06, "loss": 1.1459, "step": 1050 }, { "epoch": 0.06405995044418927, "grad_norm": 0.6085986093961342, "learning_rate": 4.269029399919452e-06, "loss": 1.0977, "step": 1060 }, { "epoch": 0.06466428959932315, "grad_norm": 0.584955573082463, "learning_rate": 4.309303262182844e-06, "loss": 1.0929, "step": 1070 }, { "epoch": 0.065268628754457, "grad_norm": 0.6106749898890043, "learning_rate": 4.349577124446235e-06, "loss": 1.0647, "step": 1080 }, { "epoch": 0.06587296790959087, "grad_norm": 0.6164260748087341, "learning_rate": 4.3898509867096255e-06, "loss": 1.1274, "step": 1090 }, { "epoch": 0.06647730706472472, "grad_norm": 0.557833684560504, "learning_rate": 4.430124848973017e-06, "loss": 1.1089, "step": 1100 }, { "epoch": 0.06708164621985858, "grad_norm": 0.5829249216386346, "learning_rate": 4.470398711236407e-06, "loss": 1.1347, "step": 1110 }, { "epoch": 0.06768598537499244, "grad_norm": 0.6263876637695363, "learning_rate": 4.510672573499799e-06, "loss": 1.0979, "step": 1120 }, { "epoch": 0.0682903245301263, "grad_norm": 0.6196422245080135, "learning_rate": 4.55094643576319e-06, "loss": 1.1036, "step": 1130 }, { "epoch": 0.06889466368526016, "grad_norm": 0.6158195649523993, "learning_rate": 4.5912202980265815e-06, "loss": 1.1014, "step": 1140 }, { "epoch": 0.06949900284039402, "grad_norm": 0.5800059892091631, "learning_rate": 4.631494160289972e-06, "loss": 1.1179, "step": 1150 }, { "epoch": 0.0701033419955279, "grad_norm": 0.5967167581820909, "learning_rate": 4.671768022553363e-06, "loss": 1.1036, "step": 1160 }, { "epoch": 0.07070768115066176, "grad_norm": 0.5849312766034016, "learning_rate": 4.712041884816754e-06, "loss": 1.0933, "step": 1170 }, { "epoch": 0.07131202030579561, "grad_norm": 0.6111626030450139, "learning_rate": 4.752315747080145e-06, "loss": 1.088, "step": 1180 }, { "epoch": 0.07191635946092947, "grad_norm": 0.5676535264441273, "learning_rate": 4.792589609343537e-06, "loss": 1.1039, "step": 1190 }, { "epoch": 0.07252069861606333, "grad_norm": 0.6056548822582074, "learning_rate": 4.832863471606927e-06, "loss": 1.1005, "step": 1200 }, { "epoch": 0.0731250377711972, "grad_norm": 0.6232710264069062, "learning_rate": 4.8731373338703185e-06, "loss": 1.0864, "step": 1210 }, { "epoch": 0.07372937692633105, "grad_norm": 0.619023692523766, "learning_rate": 4.913411196133709e-06, "loss": 1.1474, "step": 1220 }, { "epoch": 0.07433371608146491, "grad_norm": 0.5897355405120764, "learning_rate": 4.9536850583971e-06, "loss": 1.1148, "step": 1230 }, { "epoch": 0.07493805523659879, "grad_norm": 0.6255020630314708, "learning_rate": 4.993958920660492e-06, "loss": 1.1084, "step": 1240 }, { "epoch": 0.07554239439173265, "grad_norm": 0.572696440955054, "learning_rate": 5.034232782923882e-06, "loss": 1.1045, "step": 1250 }, { "epoch": 0.0761467335468665, "grad_norm": 0.6016228313532224, "learning_rate": 5.074506645187274e-06, "loss": 1.0806, "step": 1260 }, { "epoch": 0.07675107270200036, "grad_norm": 0.6148565648976038, "learning_rate": 5.114780507450665e-06, "loss": 1.0906, "step": 1270 }, { "epoch": 0.07735541185713422, "grad_norm": 0.6250503294264665, "learning_rate": 5.155054369714056e-06, "loss": 1.1088, "step": 1280 }, { "epoch": 0.07795975101226808, "grad_norm": 0.6259358544472459, "learning_rate": 5.195328231977447e-06, "loss": 1.1065, "step": 1290 }, { "epoch": 0.07856409016740194, "grad_norm": 0.6928975213289578, "learning_rate": 5.235602094240838e-06, "loss": 1.0965, "step": 1300 }, { "epoch": 0.0791684293225358, "grad_norm": 0.7480488578586902, "learning_rate": 5.27587595650423e-06, "loss": 1.0945, "step": 1310 }, { "epoch": 0.07977276847766966, "grad_norm": 0.6970973531334974, "learning_rate": 5.31614981876762e-06, "loss": 1.0981, "step": 1320 }, { "epoch": 0.08037710763280353, "grad_norm": 0.6818063136002229, "learning_rate": 5.3564236810310115e-06, "loss": 1.0761, "step": 1330 }, { "epoch": 0.0809814467879374, "grad_norm": 0.6959641966583481, "learning_rate": 5.396697543294403e-06, "loss": 1.0968, "step": 1340 }, { "epoch": 0.08158578594307125, "grad_norm": 0.7593382806459565, "learning_rate": 5.436971405557794e-06, "loss": 1.0682, "step": 1350 }, { "epoch": 0.08219012509820511, "grad_norm": 0.846649633309298, "learning_rate": 5.477245267821184e-06, "loss": 1.1069, "step": 1360 }, { "epoch": 0.08279446425333897, "grad_norm": 0.7368100714028909, "learning_rate": 5.517519130084575e-06, "loss": 1.0768, "step": 1370 }, { "epoch": 0.08339880340847283, "grad_norm": 0.8000337335475295, "learning_rate": 5.557792992347967e-06, "loss": 1.0999, "step": 1380 }, { "epoch": 0.08400314256360669, "grad_norm": 0.8639730071659587, "learning_rate": 5.598066854611358e-06, "loss": 1.0997, "step": 1390 }, { "epoch": 0.08460748171874055, "grad_norm": 0.6671862958037743, "learning_rate": 5.6383407168747485e-06, "loss": 1.0829, "step": 1400 }, { "epoch": 0.08521182087387442, "grad_norm": 0.7100917285829133, "learning_rate": 5.67861457913814e-06, "loss": 1.0887, "step": 1410 }, { "epoch": 0.08581616002900828, "grad_norm": 0.7197083853004631, "learning_rate": 5.718888441401531e-06, "loss": 1.0744, "step": 1420 }, { "epoch": 0.08642049918414214, "grad_norm": 0.6681752999196956, "learning_rate": 5.7591623036649226e-06, "loss": 1.0782, "step": 1430 }, { "epoch": 0.087024838339276, "grad_norm": 0.6961203501826451, "learning_rate": 5.799436165928313e-06, "loss": 1.1028, "step": 1440 }, { "epoch": 0.08762917749440986, "grad_norm": 0.8093873565812489, "learning_rate": 5.8397100281917044e-06, "loss": 1.0667, "step": 1450 }, { "epoch": 0.08823351664954372, "grad_norm": 0.8993433219450102, "learning_rate": 5.879983890455096e-06, "loss": 1.1464, "step": 1460 }, { "epoch": 0.08883785580467758, "grad_norm": 0.8838967444455852, "learning_rate": 5.9202577527184855e-06, "loss": 1.0939, "step": 1470 }, { "epoch": 0.08944219495981144, "grad_norm": 0.8507589619048073, "learning_rate": 5.960531614981877e-06, "loss": 1.1251, "step": 1480 }, { "epoch": 0.0900465341149453, "grad_norm": 0.8594816568663549, "learning_rate": 6.000805477245268e-06, "loss": 1.1003, "step": 1490 }, { "epoch": 0.09065087327007917, "grad_norm": 0.8168422366319431, "learning_rate": 6.0410793395086596e-06, "loss": 1.0726, "step": 1500 }, { "epoch": 0.09125521242521303, "grad_norm": 0.8172683114812207, "learning_rate": 6.08135320177205e-06, "loss": 1.093, "step": 1510 }, { "epoch": 0.0918595515803469, "grad_norm": 0.8166330217530501, "learning_rate": 6.1216270640354414e-06, "loss": 1.0838, "step": 1520 }, { "epoch": 0.09246389073548075, "grad_norm": 0.811202383541008, "learning_rate": 6.161900926298833e-06, "loss": 1.0976, "step": 1530 }, { "epoch": 0.09306822989061461, "grad_norm": 0.8500784323743232, "learning_rate": 6.202174788562224e-06, "loss": 1.0897, "step": 1540 }, { "epoch": 0.09367256904574847, "grad_norm": 1.2505471471205927, "learning_rate": 6.242448650825615e-06, "loss": 1.1265, "step": 1550 }, { "epoch": 0.09427690820088233, "grad_norm": 1.335408088114772, "learning_rate": 6.282722513089006e-06, "loss": 1.0834, "step": 1560 }, { "epoch": 0.09488124735601619, "grad_norm": 1.3279511615791078, "learning_rate": 6.322996375352397e-06, "loss": 1.083, "step": 1570 }, { "epoch": 0.09548558651115006, "grad_norm": 1.3945538602920304, "learning_rate": 6.363270237615787e-06, "loss": 1.1037, "step": 1580 }, { "epoch": 0.09608992566628392, "grad_norm": 1.3895453761538699, "learning_rate": 6.4035440998791784e-06, "loss": 1.0854, "step": 1590 }, { "epoch": 0.09669426482141778, "grad_norm": 7.755727454888002, "learning_rate": 6.44381796214257e-06, "loss": 1.0939, "step": 1600 }, { "epoch": 0.09729860397655164, "grad_norm": 2.19733019790502, "learning_rate": 6.484091824405961e-06, "loss": 1.0602, "step": 1610 }, { "epoch": 0.0979029431316855, "grad_norm": 1.7323413551287472, "learning_rate": 6.524365686669352e-06, "loss": 1.0635, "step": 1620 }, { "epoch": 0.09850728228681936, "grad_norm": 2.5187432382415196, "learning_rate": 6.564639548932743e-06, "loss": 1.0595, "step": 1630 }, { "epoch": 0.09911162144195322, "grad_norm": 1.4119806902781733, "learning_rate": 6.604913411196134e-06, "loss": 1.0269, "step": 1640 }, { "epoch": 0.09971596059708708, "grad_norm": 2.8766116959670938, "learning_rate": 6.645187273459526e-06, "loss": 1.0537, "step": 1650 }, { "epoch": 0.10032029975222094, "grad_norm": 3.268139899080405, "learning_rate": 6.685461135722916e-06, "loss": 1.0603, "step": 1660 }, { "epoch": 0.10092463890735481, "grad_norm": 3.0981208616068776, "learning_rate": 6.725734997986308e-06, "loss": 1.0422, "step": 1670 }, { "epoch": 0.10152897806248867, "grad_norm": 2.9239848803181863, "learning_rate": 6.766008860249699e-06, "loss": 1.0532, "step": 1680 }, { "epoch": 0.10213331721762253, "grad_norm": 3.273189811884654, "learning_rate": 6.80628272251309e-06, "loss": 1.0889, "step": 1690 }, { "epoch": 0.10273765637275639, "grad_norm": 1.5966209709842396, "learning_rate": 6.84655658477648e-06, "loss": 1.0489, "step": 1700 }, { "epoch": 0.10334199552789025, "grad_norm": 1.491149466056692, "learning_rate": 6.886830447039871e-06, "loss": 1.0559, "step": 1710 }, { "epoch": 0.10394633468302411, "grad_norm": 1.320760002171467, "learning_rate": 6.927104309303263e-06, "loss": 1.0572, "step": 1720 }, { "epoch": 0.10455067383815797, "grad_norm": 1.5609458132757414, "learning_rate": 6.967378171566653e-06, "loss": 1.0589, "step": 1730 }, { "epoch": 0.10515501299329183, "grad_norm": 1.7152369715801432, "learning_rate": 7.007652033830045e-06, "loss": 1.0538, "step": 1740 }, { "epoch": 0.1057593521484257, "grad_norm": 1.3133638590103185, "learning_rate": 7.047925896093436e-06, "loss": 1.0822, "step": 1750 }, { "epoch": 0.10636369130355956, "grad_norm": 1.3315601174512322, "learning_rate": 7.088199758356827e-06, "loss": 1.0527, "step": 1760 }, { "epoch": 0.10696803045869342, "grad_norm": 1.2444197931736023, "learning_rate": 7.128473620620218e-06, "loss": 1.0621, "step": 1770 }, { "epoch": 0.10757236961382728, "grad_norm": 1.1643028023866646, "learning_rate": 7.168747482883609e-06, "loss": 1.0513, "step": 1780 }, { "epoch": 0.10817670876896114, "grad_norm": 1.0771408875481137, "learning_rate": 7.209021345147001e-06, "loss": 1.0464, "step": 1790 }, { "epoch": 0.108781047924095, "grad_norm": 1.277208641518362, "learning_rate": 7.249295207410392e-06, "loss": 1.0386, "step": 1800 }, { "epoch": 0.10938538707922886, "grad_norm": 1.118373567361937, "learning_rate": 7.289569069673782e-06, "loss": 1.0497, "step": 1810 }, { "epoch": 0.10998972623436272, "grad_norm": 1.191798426442932, "learning_rate": 7.329842931937173e-06, "loss": 1.0199, "step": 1820 }, { "epoch": 0.11059406538949658, "grad_norm": 1.2835934347441686, "learning_rate": 7.370116794200564e-06, "loss": 1.0529, "step": 1830 }, { "epoch": 0.11119840454463045, "grad_norm": 1.1854467640487083, "learning_rate": 7.410390656463955e-06, "loss": 1.0362, "step": 1840 }, { "epoch": 0.11180274369976431, "grad_norm": 1.3958257827504483, "learning_rate": 7.450664518727346e-06, "loss": 1.0269, "step": 1850 }, { "epoch": 0.11240708285489817, "grad_norm": 1.5318312249471024, "learning_rate": 7.490938380990738e-06, "loss": 1.0718, "step": 1860 }, { "epoch": 0.11301142201003203, "grad_norm": 1.2156396102319647, "learning_rate": 7.531212243254129e-06, "loss": 1.0506, "step": 1870 }, { "epoch": 0.11361576116516589, "grad_norm": 1.2622492188279455, "learning_rate": 7.5714861055175195e-06, "loss": 1.0137, "step": 1880 }, { "epoch": 0.11422010032029975, "grad_norm": 1.1776041082656958, "learning_rate": 7.611759967780911e-06, "loss": 1.0454, "step": 1890 }, { "epoch": 0.11482443947543361, "grad_norm": 1.3457164150405259, "learning_rate": 7.652033830044301e-06, "loss": 1.0562, "step": 1900 }, { "epoch": 0.11542877863056747, "grad_norm": 1.3780287006033587, "learning_rate": 7.692307692307694e-06, "loss": 1.0169, "step": 1910 }, { "epoch": 0.11603311778570134, "grad_norm": 1.3119894231887488, "learning_rate": 7.732581554571084e-06, "loss": 1.061, "step": 1920 }, { "epoch": 0.1166374569408352, "grad_norm": 1.175282317328737, "learning_rate": 7.772855416834475e-06, "loss": 1.0187, "step": 1930 }, { "epoch": 0.11724179609596906, "grad_norm": 1.2808534491075105, "learning_rate": 7.813129279097867e-06, "loss": 1.0327, "step": 1940 }, { "epoch": 0.11784613525110292, "grad_norm": 1.3145724104066527, "learning_rate": 7.853403141361257e-06, "loss": 1.0451, "step": 1950 }, { "epoch": 0.11845047440623678, "grad_norm": 1.149847233731238, "learning_rate": 7.893677003624648e-06, "loss": 1.0422, "step": 1960 }, { "epoch": 0.11905481356137064, "grad_norm": 1.1732062795371199, "learning_rate": 7.93395086588804e-06, "loss": 1.0477, "step": 1970 }, { "epoch": 0.1196591527165045, "grad_norm": 1.4198026696053145, "learning_rate": 7.97422472815143e-06, "loss": 1.0331, "step": 1980 }, { "epoch": 0.12026349187163836, "grad_norm": 1.242108882945878, "learning_rate": 8.014498590414821e-06, "loss": 1.0446, "step": 1990 }, { "epoch": 0.12086783102677222, "grad_norm": 1.3628691710522989, "learning_rate": 8.054772452678212e-06, "loss": 1.0357, "step": 2000 }, { "epoch": 0.12147217018190609, "grad_norm": 1.2834103405995956, "learning_rate": 8.095046314941604e-06, "loss": 1.0575, "step": 2010 }, { "epoch": 0.12207650933703995, "grad_norm": 1.311977552770393, "learning_rate": 8.135320177204994e-06, "loss": 1.0735, "step": 2020 }, { "epoch": 0.12268084849217381, "grad_norm": 1.2260537207160858, "learning_rate": 8.175594039468385e-06, "loss": 1.0375, "step": 2030 }, { "epoch": 0.12328518764730767, "grad_norm": 1.2377992625425653, "learning_rate": 8.215867901731777e-06, "loss": 1.0362, "step": 2040 }, { "epoch": 0.12388952680244153, "grad_norm": 1.2255633080893185, "learning_rate": 8.256141763995168e-06, "loss": 1.0185, "step": 2050 }, { "epoch": 0.12449386595757539, "grad_norm": 1.2455856325728145, "learning_rate": 8.29641562625856e-06, "loss": 1.0438, "step": 2060 }, { "epoch": 0.12509820511270925, "grad_norm": 1.184511244962389, "learning_rate": 8.33668948852195e-06, "loss": 1.0244, "step": 2070 }, { "epoch": 0.1257025442678431, "grad_norm": 1.3061707549777584, "learning_rate": 8.37696335078534e-06, "loss": 1.053, "step": 2080 }, { "epoch": 0.12630688342297697, "grad_norm": 1.1123587859483497, "learning_rate": 8.417237213048733e-06, "loss": 1.0139, "step": 2090 }, { "epoch": 0.12691122257811083, "grad_norm": 1.4726481438754204, "learning_rate": 8.457511075312124e-06, "loss": 1.0373, "step": 2100 }, { "epoch": 0.1275155617332447, "grad_norm": 1.5540649554847172, "learning_rate": 8.497784937575514e-06, "loss": 1.034, "step": 2110 }, { "epoch": 0.12811990088837855, "grad_norm": 1.3649224248440421, "learning_rate": 8.538058799838905e-06, "loss": 1.0515, "step": 2120 }, { "epoch": 0.1287242400435124, "grad_norm": 1.4468487497471012, "learning_rate": 8.578332662102297e-06, "loss": 1.0284, "step": 2130 }, { "epoch": 0.1293285791986463, "grad_norm": 1.4178686802895129, "learning_rate": 8.618606524365687e-06, "loss": 1.0316, "step": 2140 }, { "epoch": 0.12993291835378015, "grad_norm": 1.309626194559123, "learning_rate": 8.658880386629078e-06, "loss": 1.0393, "step": 2150 }, { "epoch": 0.130537257508914, "grad_norm": 1.2593406051274751, "learning_rate": 8.69915424889247e-06, "loss": 1.0316, "step": 2160 }, { "epoch": 0.13114159666404787, "grad_norm": 1.2749406125528224, "learning_rate": 8.73942811115586e-06, "loss": 1.0715, "step": 2170 }, { "epoch": 0.13174593581918173, "grad_norm": 1.3292499035112644, "learning_rate": 8.779701973419251e-06, "loss": 1.0227, "step": 2180 }, { "epoch": 0.1323502749743156, "grad_norm": 1.2617489702802869, "learning_rate": 8.819975835682643e-06, "loss": 1.0398, "step": 2190 }, { "epoch": 0.13295461412944945, "grad_norm": 1.515868586228637, "learning_rate": 8.860249697946034e-06, "loss": 1.0317, "step": 2200 }, { "epoch": 0.1335589532845833, "grad_norm": 1.5160539289633963, "learning_rate": 8.900523560209426e-06, "loss": 1.0477, "step": 2210 }, { "epoch": 0.13416329243971717, "grad_norm": 1.4076986320932554, "learning_rate": 8.940797422472815e-06, "loss": 1.0418, "step": 2220 }, { "epoch": 0.13476763159485103, "grad_norm": 1.3689604228642756, "learning_rate": 8.981071284736207e-06, "loss": 1.0393, "step": 2230 }, { "epoch": 0.1353719707499849, "grad_norm": 1.3925084507461263, "learning_rate": 9.021345146999598e-06, "loss": 1.0398, "step": 2240 }, { "epoch": 0.13597630990511875, "grad_norm": 1.6226182151963606, "learning_rate": 9.061619009262988e-06, "loss": 1.0466, "step": 2250 }, { "epoch": 0.1365806490602526, "grad_norm": 1.5785671925779194, "learning_rate": 9.10189287152638e-06, "loss": 1.0375, "step": 2260 }, { "epoch": 0.13718498821538647, "grad_norm": 1.5336500255536512, "learning_rate": 9.14216673378977e-06, "loss": 1.0175, "step": 2270 }, { "epoch": 0.13778932737052033, "grad_norm": 1.5285248771423983, "learning_rate": 9.182440596053163e-06, "loss": 1.0381, "step": 2280 }, { "epoch": 0.13839366652565419, "grad_norm": 1.5128126964514035, "learning_rate": 9.222714458316554e-06, "loss": 1.019, "step": 2290 }, { "epoch": 0.13899800568078804, "grad_norm": 1.0193583899693481, "learning_rate": 9.262988320579944e-06, "loss": 1.037, "step": 2300 }, { "epoch": 0.13960234483592193, "grad_norm": 0.8968552638464535, "learning_rate": 9.303262182843336e-06, "loss": 1.0174, "step": 2310 }, { "epoch": 0.1402066839910558, "grad_norm": 0.9955662282804074, "learning_rate": 9.343536045106727e-06, "loss": 1.0504, "step": 2320 }, { "epoch": 0.14081102314618965, "grad_norm": 0.935242322809624, "learning_rate": 9.383809907370117e-06, "loss": 1.053, "step": 2330 }, { "epoch": 0.1414153623013235, "grad_norm": 0.9136266237484135, "learning_rate": 9.424083769633508e-06, "loss": 1.0206, "step": 2340 }, { "epoch": 0.14201970145645737, "grad_norm": 0.7990622585749204, "learning_rate": 9.4643576318969e-06, "loss": 1.0361, "step": 2350 }, { "epoch": 0.14262404061159123, "grad_norm": 0.818868112843548, "learning_rate": 9.50463149416029e-06, "loss": 1.0539, "step": 2360 }, { "epoch": 0.1432283797667251, "grad_norm": 0.8291376249740307, "learning_rate": 9.544905356423681e-06, "loss": 1.0226, "step": 2370 }, { "epoch": 0.14383271892185895, "grad_norm": 0.7710790067718756, "learning_rate": 9.585179218687073e-06, "loss": 1.0188, "step": 2380 }, { "epoch": 0.1444370580769928, "grad_norm": 0.7811007599244469, "learning_rate": 9.625453080950464e-06, "loss": 1.0157, "step": 2390 }, { "epoch": 0.14504139723212667, "grad_norm": 0.8911361442899138, "learning_rate": 9.665726943213854e-06, "loss": 1.0348, "step": 2400 }, { "epoch": 0.14564573638726053, "grad_norm": 0.9358415535339627, "learning_rate": 9.706000805477246e-06, "loss": 1.0594, "step": 2410 }, { "epoch": 0.1462500755423944, "grad_norm": 0.8755710396086944, "learning_rate": 9.746274667740637e-06, "loss": 1.0426, "step": 2420 }, { "epoch": 0.14685441469752825, "grad_norm": 0.8256900857975942, "learning_rate": 9.78654853000403e-06, "loss": 1.0327, "step": 2430 }, { "epoch": 0.1474587538526621, "grad_norm": 0.8268143487314841, "learning_rate": 9.826822392267418e-06, "loss": 1.0348, "step": 2440 }, { "epoch": 0.14806309300779597, "grad_norm": 0.6426856777426775, "learning_rate": 9.86709625453081e-06, "loss": 1.0486, "step": 2450 }, { "epoch": 0.14866743216292982, "grad_norm": 0.6186159057742388, "learning_rate": 9.9073701167942e-06, "loss": 1.0212, "step": 2460 }, { "epoch": 0.14927177131806368, "grad_norm": 0.5811580952257833, "learning_rate": 9.947643979057593e-06, "loss": 1.0096, "step": 2470 }, { "epoch": 0.14987611047319757, "grad_norm": 0.5708474838279459, "learning_rate": 9.987917841320983e-06, "loss": 1.0021, "step": 2480 }, { "epoch": 0.15048044962833143, "grad_norm": 0.5654727062145292, "learning_rate": 9.999999456342893e-06, "loss": 1.0364, "step": 2490 }, { "epoch": 0.1510847887834653, "grad_norm": 0.5826878155876029, "learning_rate": 9.999996793532857e-06, "loss": 1.034, "step": 2500 }, { "epoch": 0.15168912793859915, "grad_norm": 0.613278712920909, "learning_rate": 9.999991911715683e-06, "loss": 1.0449, "step": 2510 }, { "epoch": 0.152293467093733, "grad_norm": 0.6115079312969802, "learning_rate": 9.99998481089354e-06, "loss": 1.0383, "step": 2520 }, { "epoch": 0.15289780624886687, "grad_norm": 0.6226956803391436, "learning_rate": 9.99997549106958e-06, "loss": 1.0341, "step": 2530 }, { "epoch": 0.15350214540400073, "grad_norm": 0.6528092973933503, "learning_rate": 9.999963952247936e-06, "loss": 1.05, "step": 2540 }, { "epoch": 0.1541064845591346, "grad_norm": 1.1185585768666104, "learning_rate": 9.999950194433732e-06, "loss": 1.0414, "step": 2550 }, { "epoch": 0.15471082371426845, "grad_norm": 1.0223501722353807, "learning_rate": 9.999934217633071e-06, "loss": 1.0418, "step": 2560 }, { "epoch": 0.1553151628694023, "grad_norm": 1.0729184525617614, "learning_rate": 9.999916021853046e-06, "loss": 1.0401, "step": 2570 }, { "epoch": 0.15591950202453617, "grad_norm": 0.9819539057691418, "learning_rate": 9.99989560710173e-06, "loss": 1.0469, "step": 2580 }, { "epoch": 0.15652384117967003, "grad_norm": 1.0257784624253015, "learning_rate": 9.999872973388185e-06, "loss": 1.0339, "step": 2590 }, { "epoch": 0.15712818033480389, "grad_norm": 0.5320735974056858, "learning_rate": 9.999848120722456e-06, "loss": 1.0172, "step": 2600 }, { "epoch": 0.15773251948993774, "grad_norm": 0.5348157261770901, "learning_rate": 9.999821049115573e-06, "loss": 1.0427, "step": 2610 }, { "epoch": 0.1583368586450716, "grad_norm": 0.5782711821319967, "learning_rate": 9.999791758579547e-06, "loss": 1.0294, "step": 2620 }, { "epoch": 0.15894119780020546, "grad_norm": 0.5279519393941836, "learning_rate": 9.99976024912738e-06, "loss": 1.0266, "step": 2630 }, { "epoch": 0.15954553695533932, "grad_norm": 0.5313993134461803, "learning_rate": 9.999726520773057e-06, "loss": 1.0533, "step": 2640 }, { "epoch": 0.1601498761104732, "grad_norm": 0.4983785625267852, "learning_rate": 9.999690573531544e-06, "loss": 1.0012, "step": 2650 }, { "epoch": 0.16075421526560707, "grad_norm": 0.5487759545240649, "learning_rate": 9.999652407418796e-06, "loss": 1.0237, "step": 2660 }, { "epoch": 0.16135855442074093, "grad_norm": 0.49628698674465005, "learning_rate": 9.99961202245175e-06, "loss": 1.0526, "step": 2670 }, { "epoch": 0.1619628935758748, "grad_norm": 0.5927136450856338, "learning_rate": 9.99956941864833e-06, "loss": 1.0252, "step": 2680 }, { "epoch": 0.16256723273100865, "grad_norm": 0.5363941477651917, "learning_rate": 9.999524596027444e-06, "loss": 1.0062, "step": 2690 }, { "epoch": 0.1631715718861425, "grad_norm": 0.5615419870779127, "learning_rate": 9.999477554608985e-06, "loss": 1.0504, "step": 2700 }, { "epoch": 0.16377591104127637, "grad_norm": 0.5577826362507532, "learning_rate": 9.999428294413827e-06, "loss": 1.0271, "step": 2710 }, { "epoch": 0.16438025019641023, "grad_norm": 0.5715709287082611, "learning_rate": 9.999376815463833e-06, "loss": 1.0379, "step": 2720 }, { "epoch": 0.1649845893515441, "grad_norm": 0.5213940033970969, "learning_rate": 9.999323117781851e-06, "loss": 1.0103, "step": 2730 }, { "epoch": 0.16558892850667795, "grad_norm": 0.5315095683793392, "learning_rate": 9.999267201391711e-06, "loss": 1.0162, "step": 2740 }, { "epoch": 0.1661932676618118, "grad_norm": 0.6022435742068142, "learning_rate": 9.99920906631823e-06, "loss": 1.0169, "step": 2750 }, { "epoch": 0.16679760681694566, "grad_norm": 0.5210443489858492, "learning_rate": 9.999148712587207e-06, "loss": 1.0199, "step": 2760 }, { "epoch": 0.16740194597207952, "grad_norm": 0.5729742309285049, "learning_rate": 9.999086140225427e-06, "loss": 1.0172, "step": 2770 }, { "epoch": 0.16800628512721338, "grad_norm": 0.5076629992942371, "learning_rate": 9.99902134926066e-06, "loss": 1.0148, "step": 2780 }, { "epoch": 0.16861062428234724, "grad_norm": 0.5351987826516066, "learning_rate": 9.99895433972166e-06, "loss": 1.0086, "step": 2790 }, { "epoch": 0.1692149634374811, "grad_norm": 0.5547553571639994, "learning_rate": 9.998885111638167e-06, "loss": 1.0579, "step": 2800 }, { "epoch": 0.16981930259261496, "grad_norm": 0.5692818290723988, "learning_rate": 9.998813665040906e-06, "loss": 1.0353, "step": 2810 }, { "epoch": 0.17042364174774885, "grad_norm": 0.5548982707366165, "learning_rate": 9.99873999996158e-06, "loss": 1.0596, "step": 2820 }, { "epoch": 0.1710279809028827, "grad_norm": 0.5233844420318373, "learning_rate": 9.998664116432886e-06, "loss": 0.9952, "step": 2830 }, { "epoch": 0.17163232005801657, "grad_norm": 0.5082168638296541, "learning_rate": 9.9985860144885e-06, "loss": 1.0238, "step": 2840 }, { "epoch": 0.17223665921315043, "grad_norm": 0.5578950922481182, "learning_rate": 9.998505694163085e-06, "loss": 1.0326, "step": 2850 }, { "epoch": 0.1728409983682843, "grad_norm": 0.5657283177136884, "learning_rate": 9.998423155492285e-06, "loss": 1.0091, "step": 2860 }, { "epoch": 0.17344533752341815, "grad_norm": 0.5596485344170619, "learning_rate": 9.998338398512733e-06, "loss": 1.0367, "step": 2870 }, { "epoch": 0.174049676678552, "grad_norm": 0.51696431960722, "learning_rate": 9.998251423262042e-06, "loss": 0.997, "step": 2880 }, { "epoch": 0.17465401583368587, "grad_norm": 0.5435579243065897, "learning_rate": 9.998162229778813e-06, "loss": 1.0637, "step": 2890 }, { "epoch": 0.17525835498881973, "grad_norm": 0.6325614121742273, "learning_rate": 9.99807081810263e-06, "loss": 1.0168, "step": 2900 }, { "epoch": 0.17586269414395359, "grad_norm": 0.605587728278636, "learning_rate": 9.997977188274064e-06, "loss": 1.0797, "step": 2910 }, { "epoch": 0.17646703329908744, "grad_norm": 0.6757822217558517, "learning_rate": 9.997881340334663e-06, "loss": 1.0213, "step": 2920 }, { "epoch": 0.1770713724542213, "grad_norm": 0.6176956206212408, "learning_rate": 9.99778327432697e-06, "loss": 1.0246, "step": 2930 }, { "epoch": 0.17767571160935516, "grad_norm": 0.6548094212081415, "learning_rate": 9.997682990294503e-06, "loss": 1.0146, "step": 2940 }, { "epoch": 0.17828005076448902, "grad_norm": 0.6951349743054358, "learning_rate": 9.997580488281769e-06, "loss": 1.0141, "step": 2950 }, { "epoch": 0.17888438991962288, "grad_norm": 0.772414307533063, "learning_rate": 9.997475768334261e-06, "loss": 1.05, "step": 2960 }, { "epoch": 0.17948872907475674, "grad_norm": 0.6643376685897671, "learning_rate": 9.99736883049845e-06, "loss": 1.0208, "step": 2970 }, { "epoch": 0.1800930682298906, "grad_norm": 0.765221094147346, "learning_rate": 9.997259674821799e-06, "loss": 1.0196, "step": 2980 }, { "epoch": 0.1806974073850245, "grad_norm": 0.7531802530766694, "learning_rate": 9.997148301352748e-06, "loss": 1.0172, "step": 2990 }, { "epoch": 0.18130174654015835, "grad_norm": 0.6371992091469362, "learning_rate": 9.997034710140728e-06, "loss": 1.0202, "step": 3000 }, { "epoch": 0.1819060856952922, "grad_norm": 0.5948971392823025, "learning_rate": 9.996918901236148e-06, "loss": 1.0086, "step": 3010 }, { "epoch": 0.18251042485042607, "grad_norm": 0.6334731002766227, "learning_rate": 9.996800874690407e-06, "loss": 1.0226, "step": 3020 }, { "epoch": 0.18311476400555993, "grad_norm": 0.6550854027278942, "learning_rate": 9.996680630555882e-06, "loss": 1.0084, "step": 3030 }, { "epoch": 0.1837191031606938, "grad_norm": 0.6284135437513536, "learning_rate": 9.996558168885942e-06, "loss": 1.0125, "step": 3040 }, { "epoch": 0.18432344231582765, "grad_norm": 0.7855845439023009, "learning_rate": 9.996433489734931e-06, "loss": 1.052, "step": 3050 }, { "epoch": 0.1849277814709615, "grad_norm": 0.8434267981407233, "learning_rate": 9.996306593158185e-06, "loss": 1.0281, "step": 3060 }, { "epoch": 0.18553212062609536, "grad_norm": 0.8261274066025182, "learning_rate": 9.996177479212019e-06, "loss": 1.0349, "step": 3070 }, { "epoch": 0.18613645978122922, "grad_norm": 0.7670039231998137, "learning_rate": 9.996046147953735e-06, "loss": 1.0178, "step": 3080 }, { "epoch": 0.18674079893636308, "grad_norm": 0.8340417835072003, "learning_rate": 9.99591259944162e-06, "loss": 1.0132, "step": 3090 }, { "epoch": 0.18734513809149694, "grad_norm": 0.8074205706627108, "learning_rate": 9.995776833734938e-06, "loss": 1.0344, "step": 3100 }, { "epoch": 0.1879494772466308, "grad_norm": 0.7965585543505449, "learning_rate": 9.995638850893947e-06, "loss": 1.0396, "step": 3110 }, { "epoch": 0.18855381640176466, "grad_norm": 0.7987860336651947, "learning_rate": 9.995498650979883e-06, "loss": 1.0457, "step": 3120 }, { "epoch": 0.18915815555689852, "grad_norm": 0.8341307090500774, "learning_rate": 9.995356234054965e-06, "loss": 1.0452, "step": 3130 }, { "epoch": 0.18976249471203238, "grad_norm": 0.8043702412662023, "learning_rate": 9.995211600182397e-06, "loss": 1.0462, "step": 3140 }, { "epoch": 0.19036683386716624, "grad_norm": 1.287384332451675, "learning_rate": 9.995064749426373e-06, "loss": 1.0222, "step": 3150 }, { "epoch": 0.19097117302230013, "grad_norm": 1.329146487219306, "learning_rate": 9.99491568185206e-06, "loss": 1.0179, "step": 3160 }, { "epoch": 0.191575512177434, "grad_norm": 1.3550835898336, "learning_rate": 9.994764397525618e-06, "loss": 1.0194, "step": 3170 }, { "epoch": 0.19217985133256785, "grad_norm": 1.264847416855836, "learning_rate": 9.994610896514186e-06, "loss": 1.0217, "step": 3180 }, { "epoch": 0.1927841904877017, "grad_norm": 1.2899878303672319, "learning_rate": 9.994455178885887e-06, "loss": 1.0285, "step": 3190 }, { "epoch": 0.19338852964283557, "grad_norm": 1.336794198498458, "learning_rate": 9.99429724470983e-06, "loss": 1.0146, "step": 3200 }, { "epoch": 0.19399286879796943, "grad_norm": 1.1547500078832722, "learning_rate": 9.994137094056106e-06, "loss": 1.0192, "step": 3210 }, { "epoch": 0.19459720795310329, "grad_norm": 1.348857656618809, "learning_rate": 9.993974726995792e-06, "loss": 1.001, "step": 3220 }, { "epoch": 0.19520154710823714, "grad_norm": 1.2036929661433324, "learning_rate": 9.993810143600944e-06, "loss": 1.0154, "step": 3230 }, { "epoch": 0.195805886263371, "grad_norm": 2.037536987927584, "learning_rate": 9.993643343944606e-06, "loss": 1.0206, "step": 3240 }, { "epoch": 0.19641022541850486, "grad_norm": 2.5297444195829186, "learning_rate": 9.993474328100803e-06, "loss": 1.0485, "step": 3250 }, { "epoch": 0.19701456457363872, "grad_norm": 3.589903453677699, "learning_rate": 9.993303096144544e-06, "loss": 1.014, "step": 3260 }, { "epoch": 0.19761890372877258, "grad_norm": 2.516134148310326, "learning_rate": 9.993129648151824e-06, "loss": 1.0095, "step": 3270 }, { "epoch": 0.19822324288390644, "grad_norm": 2.7676889250136862, "learning_rate": 9.99295398419962e-06, "loss": 1.0119, "step": 3280 }, { "epoch": 0.1988275820390403, "grad_norm": 2.7667612656362923, "learning_rate": 9.992776104365889e-06, "loss": 1.0407, "step": 3290 }, { "epoch": 0.19943192119417416, "grad_norm": 1.146737343870628, "learning_rate": 9.992596008729576e-06, "loss": 1.0367, "step": 3300 }, { "epoch": 0.20003626034930802, "grad_norm": 1.3733146878560363, "learning_rate": 9.992413697370606e-06, "loss": 1.055, "step": 3310 }, { "epoch": 0.20064059950444188, "grad_norm": 1.1277796533841509, "learning_rate": 9.992229170369893e-06, "loss": 1.0271, "step": 3320 }, { "epoch": 0.20124493865957577, "grad_norm": 1.1534780442251698, "learning_rate": 9.992042427809328e-06, "loss": 1.0345, "step": 3330 }, { "epoch": 0.20184927781470963, "grad_norm": 1.1475591893423531, "learning_rate": 9.991853469771789e-06, "loss": 1.032, "step": 3340 }, { "epoch": 0.2024536169698435, "grad_norm": 1.1501540349651993, "learning_rate": 9.991662296341132e-06, "loss": 1.0389, "step": 3350 }, { "epoch": 0.20305795612497735, "grad_norm": 1.323771729498539, "learning_rate": 9.991468907602205e-06, "loss": 1.0451, "step": 3360 }, { "epoch": 0.2036622952801112, "grad_norm": 1.0373182991384071, "learning_rate": 9.99127330364083e-06, "loss": 0.9728, "step": 3370 }, { "epoch": 0.20426663443524506, "grad_norm": 1.0488182248336289, "learning_rate": 9.99107548454382e-06, "loss": 1.0142, "step": 3380 }, { "epoch": 0.20487097359037892, "grad_norm": 1.0604249314096938, "learning_rate": 9.990875450398964e-06, "loss": 1.0225, "step": 3390 }, { "epoch": 0.20547531274551278, "grad_norm": 1.2090228060976917, "learning_rate": 9.990673201295042e-06, "loss": 1.0389, "step": 3400 }, { "epoch": 0.20607965190064664, "grad_norm": 1.192454200611463, "learning_rate": 9.990468737321809e-06, "loss": 1.0291, "step": 3410 }, { "epoch": 0.2066839910557805, "grad_norm": 1.0957087501776068, "learning_rate": 9.990262058570007e-06, "loss": 1.0122, "step": 3420 }, { "epoch": 0.20728833021091436, "grad_norm": 1.0568327775353616, "learning_rate": 9.990053165131361e-06, "loss": 1.0539, "step": 3430 }, { "epoch": 0.20789266936604822, "grad_norm": 1.143890601431876, "learning_rate": 9.989842057098578e-06, "loss": 1.0208, "step": 3440 }, { "epoch": 0.20849700852118208, "grad_norm": 1.2152187663565115, "learning_rate": 9.989628734565347e-06, "loss": 1.0253, "step": 3450 }, { "epoch": 0.20910134767631594, "grad_norm": 1.151770858930067, "learning_rate": 9.989413197626343e-06, "loss": 1.0048, "step": 3460 }, { "epoch": 0.2097056868314498, "grad_norm": 1.191047547930404, "learning_rate": 9.98919544637722e-06, "loss": 1.0279, "step": 3470 }, { "epoch": 0.21031002598658366, "grad_norm": 1.118603549050447, "learning_rate": 9.988975480914618e-06, "loss": 1.0249, "step": 3480 }, { "epoch": 0.21091436514171752, "grad_norm": 1.2045164357250302, "learning_rate": 9.988753301336158e-06, "loss": 1.011, "step": 3490 }, { "epoch": 0.2115187042968514, "grad_norm": 1.2727860426424333, "learning_rate": 9.98852890774044e-06, "loss": 1.0109, "step": 3500 }, { "epoch": 0.21212304345198527, "grad_norm": 1.2232131903821466, "learning_rate": 9.988302300227055e-06, "loss": 1.0293, "step": 3510 }, { "epoch": 0.21272738260711913, "grad_norm": 1.1581226645248222, "learning_rate": 9.98807347889657e-06, "loss": 1.0228, "step": 3520 }, { "epoch": 0.21333172176225298, "grad_norm": 1.2449768024609256, "learning_rate": 9.987842443850536e-06, "loss": 1.0234, "step": 3530 }, { "epoch": 0.21393606091738684, "grad_norm": 1.1975927583401016, "learning_rate": 9.987609195191486e-06, "loss": 1.0134, "step": 3540 }, { "epoch": 0.2145404000725207, "grad_norm": 1.1875663787990882, "learning_rate": 9.987373733022937e-06, "loss": 1.0101, "step": 3550 }, { "epoch": 0.21514473922765456, "grad_norm": 1.1566686006538525, "learning_rate": 9.98713605744939e-06, "loss": 1.0206, "step": 3560 }, { "epoch": 0.21574907838278842, "grad_norm": 1.13108038867575, "learning_rate": 9.98689616857632e-06, "loss": 1.0198, "step": 3570 }, { "epoch": 0.21635341753792228, "grad_norm": 1.0467233101117432, "learning_rate": 9.986654066510196e-06, "loss": 1.0267, "step": 3580 }, { "epoch": 0.21695775669305614, "grad_norm": 1.0691146534191223, "learning_rate": 9.98640975135846e-06, "loss": 1.0397, "step": 3590 }, { "epoch": 0.21756209584819, "grad_norm": 1.271232751720568, "learning_rate": 9.98616322322954e-06, "loss": 1.0059, "step": 3600 }, { "epoch": 0.21816643500332386, "grad_norm": 1.0941437783791206, "learning_rate": 9.985914482232844e-06, "loss": 1.0254, "step": 3610 }, { "epoch": 0.21877077415845772, "grad_norm": 1.1539838269433296, "learning_rate": 9.985663528478769e-06, "loss": 1.0156, "step": 3620 }, { "epoch": 0.21937511331359158, "grad_norm": 1.1295717857607617, "learning_rate": 9.985410362078682e-06, "loss": 1.0279, "step": 3630 }, { "epoch": 0.21997945246872544, "grad_norm": 1.1362703389129263, "learning_rate": 9.985154983144945e-06, "loss": 1.0326, "step": 3640 }, { "epoch": 0.2205837916238593, "grad_norm": 1.2233926396929016, "learning_rate": 9.98489739179089e-06, "loss": 1.0251, "step": 3650 }, { "epoch": 0.22118813077899316, "grad_norm": 1.1422444246951031, "learning_rate": 9.98463758813084e-06, "loss": 1.0403, "step": 3660 }, { "epoch": 0.22179246993412705, "grad_norm": 1.180709702656107, "learning_rate": 9.984375572280096e-06, "loss": 1.024, "step": 3670 }, { "epoch": 0.2223968090892609, "grad_norm": 1.0663308403722467, "learning_rate": 9.98411134435494e-06, "loss": 1.02, "step": 3680 }, { "epoch": 0.22300114824439476, "grad_norm": 1.0303893693811321, "learning_rate": 9.983844904472638e-06, "loss": 1.0201, "step": 3690 }, { "epoch": 0.22360548739952862, "grad_norm": 1.4702708759476768, "learning_rate": 9.983576252751435e-06, "loss": 1.0119, "step": 3700 }, { "epoch": 0.22420982655466248, "grad_norm": 1.2812013733174148, "learning_rate": 9.983305389310558e-06, "loss": 1.0253, "step": 3710 }, { "epoch": 0.22481416570979634, "grad_norm": 1.2443679370614207, "learning_rate": 9.98303231427022e-06, "loss": 1.0095, "step": 3720 }, { "epoch": 0.2254185048649302, "grad_norm": 1.2591025755974963, "learning_rate": 9.982757027751611e-06, "loss": 1.0251, "step": 3730 }, { "epoch": 0.22602284402006406, "grad_norm": 1.2774968652029854, "learning_rate": 9.982479529876904e-06, "loss": 1.0263, "step": 3740 }, { "epoch": 0.22662718317519792, "grad_norm": 1.172663835151976, "learning_rate": 9.982199820769252e-06, "loss": 1.0097, "step": 3750 }, { "epoch": 0.22723152233033178, "grad_norm": 1.2073777258143432, "learning_rate": 9.981917900552788e-06, "loss": 1.0081, "step": 3760 }, { "epoch": 0.22783586148546564, "grad_norm": 1.1984285625601867, "learning_rate": 9.981633769352634e-06, "loss": 1.0356, "step": 3770 }, { "epoch": 0.2284402006405995, "grad_norm": 1.1323228359391508, "learning_rate": 9.981347427294885e-06, "loss": 1.0246, "step": 3780 }, { "epoch": 0.22904453979573336, "grad_norm": 1.2275135705760998, "learning_rate": 9.98105887450662e-06, "loss": 1.0136, "step": 3790 }, { "epoch": 0.22964887895086722, "grad_norm": 1.3589673185061513, "learning_rate": 9.9807681111159e-06, "loss": 1.0179, "step": 3800 }, { "epoch": 0.23025321810600108, "grad_norm": 1.3504035926727547, "learning_rate": 9.980475137251766e-06, "loss": 1.0197, "step": 3810 }, { "epoch": 0.23085755726113494, "grad_norm": 1.2839034309726234, "learning_rate": 9.980179953044239e-06, "loss": 1.0381, "step": 3820 }, { "epoch": 0.2314618964162688, "grad_norm": 1.285197256321224, "learning_rate": 9.979882558624324e-06, "loss": 1.013, "step": 3830 }, { "epoch": 0.23206623557140268, "grad_norm": 1.3469674248431074, "learning_rate": 9.979582954124005e-06, "loss": 1.0412, "step": 3840 }, { "epoch": 0.23267057472653654, "grad_norm": 1.5282597752166729, "learning_rate": 9.979281139676245e-06, "loss": 1.0061, "step": 3850 }, { "epoch": 0.2332749138816704, "grad_norm": 1.5147290838846537, "learning_rate": 9.978977115414993e-06, "loss": 1.0016, "step": 3860 }, { "epoch": 0.23387925303680426, "grad_norm": 1.5772951028976443, "learning_rate": 9.978670881475173e-06, "loss": 1.0347, "step": 3870 }, { "epoch": 0.23448359219193812, "grad_norm": 1.5110522107773365, "learning_rate": 9.978362437992694e-06, "loss": 1.0028, "step": 3880 }, { "epoch": 0.23508793134707198, "grad_norm": 1.4899685269225527, "learning_rate": 9.978051785104442e-06, "loss": 1.0226, "step": 3890 }, { "epoch": 0.23569227050220584, "grad_norm": 0.9005713878545523, "learning_rate": 9.977738922948285e-06, "loss": 1.0231, "step": 3900 }, { "epoch": 0.2362966096573397, "grad_norm": 0.9030844095327106, "learning_rate": 9.977423851663075e-06, "loss": 1.0343, "step": 3910 }, { "epoch": 0.23690094881247356, "grad_norm": 0.9815562803619603, "learning_rate": 9.977106571388637e-06, "loss": 1.0279, "step": 3920 }, { "epoch": 0.23750528796760742, "grad_norm": 0.9302840544241502, "learning_rate": 9.976787082265785e-06, "loss": 1.0502, "step": 3930 }, { "epoch": 0.23810962712274128, "grad_norm": 0.9790158151791362, "learning_rate": 9.976465384436302e-06, "loss": 1.0381, "step": 3940 }, { "epoch": 0.23871396627787514, "grad_norm": 0.8196333591376035, "learning_rate": 9.976141478042967e-06, "loss": 1.0325, "step": 3950 }, { "epoch": 0.239318305433009, "grad_norm": 0.8263757916793677, "learning_rate": 9.975815363229522e-06, "loss": 1.0205, "step": 3960 }, { "epoch": 0.23992264458814286, "grad_norm": 0.8824787795572878, "learning_rate": 9.975487040140702e-06, "loss": 1.0135, "step": 3970 }, { "epoch": 0.24052698374327672, "grad_norm": 0.793756744958486, "learning_rate": 9.975156508922216e-06, "loss": 1.0198, "step": 3980 }, { "epoch": 0.24113132289841058, "grad_norm": 0.8072068220285223, "learning_rate": 9.974823769720756e-06, "loss": 0.9994, "step": 3990 }, { "epoch": 0.24173566205354444, "grad_norm": 0.857445588862923, "learning_rate": 9.97448882268399e-06, "loss": 1.0383, "step": 4000 }, { "epoch": 0.24234000120867832, "grad_norm": 0.9131111310812462, "learning_rate": 9.974151667960568e-06, "loss": 1.0185, "step": 4010 }, { "epoch": 0.24294434036381218, "grad_norm": 0.8862050022736238, "learning_rate": 9.973812305700122e-06, "loss": 1.0319, "step": 4020 }, { "epoch": 0.24354867951894604, "grad_norm": 0.8330811647742326, "learning_rate": 9.973470736053259e-06, "loss": 1.0207, "step": 4030 }, { "epoch": 0.2441530186740799, "grad_norm": 0.8629570022678702, "learning_rate": 9.97312695917157e-06, "loss": 1.0304, "step": 4040 }, { "epoch": 0.24475735782921376, "grad_norm": 0.6508968864252784, "learning_rate": 9.972780975207624e-06, "loss": 1.0428, "step": 4050 }, { "epoch": 0.24536169698434762, "grad_norm": 0.5707817451030397, "learning_rate": 9.972432784314966e-06, "loss": 1.0409, "step": 4060 }, { "epoch": 0.24596603613948148, "grad_norm": 0.572935983022132, "learning_rate": 9.972082386648128e-06, "loss": 0.9872, "step": 4070 }, { "epoch": 0.24657037529461534, "grad_norm": 0.5663971776134257, "learning_rate": 9.971729782362614e-06, "loss": 0.9942, "step": 4080 }, { "epoch": 0.2471747144497492, "grad_norm": 0.5651070696370811, "learning_rate": 9.971374971614914e-06, "loss": 1.0043, "step": 4090 }, { "epoch": 0.24777905360488306, "grad_norm": 0.636604234486725, "learning_rate": 9.97101795456249e-06, "loss": 1.0289, "step": 4100 }, { "epoch": 0.24838339276001692, "grad_norm": 0.5719926954401137, "learning_rate": 9.970658731363788e-06, "loss": 1.0167, "step": 4110 }, { "epoch": 0.24898773191515078, "grad_norm": 0.6195712000662134, "learning_rate": 9.970297302178232e-06, "loss": 0.9972, "step": 4120 }, { "epoch": 0.24959207107028464, "grad_norm": 0.6320467725781482, "learning_rate": 9.969933667166222e-06, "loss": 0.9918, "step": 4130 }, { "epoch": 0.2501964102254185, "grad_norm": 0.6169397715301146, "learning_rate": 9.969567826489146e-06, "loss": 0.9968, "step": 4140 }, { "epoch": 0.25080074938055236, "grad_norm": 0.884429425115163, "learning_rate": 9.969199780309362e-06, "loss": 0.9993, "step": 4150 }, { "epoch": 0.2514050885356862, "grad_norm": 0.8758021286667406, "learning_rate": 9.968829528790207e-06, "loss": 1.0023, "step": 4160 }, { "epoch": 0.2520094276908201, "grad_norm": 0.9614454188025886, "learning_rate": 9.968457072096e-06, "loss": 1.02, "step": 4170 }, { "epoch": 0.25261376684595394, "grad_norm": 0.8516183537717978, "learning_rate": 9.968082410392041e-06, "loss": 0.9993, "step": 4180 }, { "epoch": 0.2532181060010878, "grad_norm": 0.9340873207567952, "learning_rate": 9.967705543844603e-06, "loss": 1.0084, "step": 4190 }, { "epoch": 0.25382244515622165, "grad_norm": 0.5969120148329723, "learning_rate": 9.967326472620942e-06, "loss": 1.0358, "step": 4200 }, { "epoch": 0.2544267843113555, "grad_norm": 0.5532832780491006, "learning_rate": 9.966945196889287e-06, "loss": 1.0309, "step": 4210 }, { "epoch": 0.2550311234664894, "grad_norm": 0.5279430922926298, "learning_rate": 9.966561716818852e-06, "loss": 1.0077, "step": 4220 }, { "epoch": 0.25563546262162323, "grad_norm": 0.5284510467388175, "learning_rate": 9.966176032579824e-06, "loss": 1.0122, "step": 4230 }, { "epoch": 0.2562398017767571, "grad_norm": 0.5256113886656796, "learning_rate": 9.965788144343372e-06, "loss": 1.0103, "step": 4240 }, { "epoch": 0.25684414093189095, "grad_norm": 0.4921708474117513, "learning_rate": 9.96539805228164e-06, "loss": 1.0124, "step": 4250 }, { "epoch": 0.2574484800870248, "grad_norm": 0.5330450591954504, "learning_rate": 9.965005756567754e-06, "loss": 1.0309, "step": 4260 }, { "epoch": 0.2580528192421587, "grad_norm": 0.5434351708906063, "learning_rate": 9.964611257375814e-06, "loss": 0.991, "step": 4270 }, { "epoch": 0.2586571583972926, "grad_norm": 0.5063139026797504, "learning_rate": 9.964214554880898e-06, "loss": 0.9929, "step": 4280 }, { "epoch": 0.25926149755242645, "grad_norm": 0.5173441985223839, "learning_rate": 9.963815649259064e-06, "loss": 1.0108, "step": 4290 }, { "epoch": 0.2598658367075603, "grad_norm": 0.5302461932782329, "learning_rate": 9.963414540687348e-06, "loss": 0.9879, "step": 4300 }, { "epoch": 0.26047017586269416, "grad_norm": 0.6343881137108661, "learning_rate": 9.963011229343762e-06, "loss": 1.0073, "step": 4310 }, { "epoch": 0.261074515017828, "grad_norm": 0.5665421291453269, "learning_rate": 9.962605715407296e-06, "loss": 0.9907, "step": 4320 }, { "epoch": 0.2616788541729619, "grad_norm": 0.526193747043052, "learning_rate": 9.962197999057919e-06, "loss": 1.0163, "step": 4330 }, { "epoch": 0.26228319332809574, "grad_norm": 0.5122318015872429, "learning_rate": 9.961788080476575e-06, "loss": 0.9979, "step": 4340 }, { "epoch": 0.2628875324832296, "grad_norm": 0.5175499896023672, "learning_rate": 9.961375959845187e-06, "loss": 1.0058, "step": 4350 }, { "epoch": 0.26349187163836346, "grad_norm": 0.5270647330903278, "learning_rate": 9.960961637346655e-06, "loss": 0.9961, "step": 4360 }, { "epoch": 0.2640962107934973, "grad_norm": 0.5003990334724352, "learning_rate": 9.960545113164854e-06, "loss": 1.0146, "step": 4370 }, { "epoch": 0.2647005499486312, "grad_norm": 0.51902917883346, "learning_rate": 9.960126387484643e-06, "loss": 1.0379, "step": 4380 }, { "epoch": 0.26530488910376504, "grad_norm": 0.5345674078416652, "learning_rate": 9.959705460491848e-06, "loss": 0.9949, "step": 4390 }, { "epoch": 0.2659092282588989, "grad_norm": 0.5310656521158249, "learning_rate": 9.959282332373281e-06, "loss": 0.9944, "step": 4400 }, { "epoch": 0.26651356741403276, "grad_norm": 0.513252930661846, "learning_rate": 9.958857003316725e-06, "loss": 1.0199, "step": 4410 }, { "epoch": 0.2671179065691666, "grad_norm": 0.5496207671544353, "learning_rate": 9.95842947351094e-06, "loss": 0.998, "step": 4420 }, { "epoch": 0.2677222457243005, "grad_norm": 0.5423231200415501, "learning_rate": 9.957999743145668e-06, "loss": 1.0302, "step": 4430 }, { "epoch": 0.26832658487943434, "grad_norm": 0.5350558941911503, "learning_rate": 9.957567812411621e-06, "loss": 1.034, "step": 4440 }, { "epoch": 0.2689309240345682, "grad_norm": 0.5517118363987205, "learning_rate": 9.957133681500495e-06, "loss": 0.9973, "step": 4450 }, { "epoch": 0.26953526318970206, "grad_norm": 0.5540472383363395, "learning_rate": 9.956697350604954e-06, "loss": 0.9975, "step": 4460 }, { "epoch": 0.2701396023448359, "grad_norm": 0.5297168446048597, "learning_rate": 9.956258819918643e-06, "loss": 0.9899, "step": 4470 }, { "epoch": 0.2707439414999698, "grad_norm": 0.5390981994209962, "learning_rate": 9.955818089636184e-06, "loss": 1.0248, "step": 4480 }, { "epoch": 0.27134828065510364, "grad_norm": 0.5247451214636177, "learning_rate": 9.955375159953171e-06, "loss": 1.0333, "step": 4490 }, { "epoch": 0.2719526198102375, "grad_norm": 0.5966037843707463, "learning_rate": 9.95493003106618e-06, "loss": 1.004, "step": 4500 }, { "epoch": 0.27255695896537135, "grad_norm": 0.6209620801829305, "learning_rate": 9.954482703172761e-06, "loss": 1.0113, "step": 4510 }, { "epoch": 0.2731612981205052, "grad_norm": 0.6082941355882645, "learning_rate": 9.954033176471436e-06, "loss": 0.9989, "step": 4520 }, { "epoch": 0.2737656372756391, "grad_norm": 0.6614217834772699, "learning_rate": 9.953581451161707e-06, "loss": 0.9866, "step": 4530 }, { "epoch": 0.27436997643077293, "grad_norm": 0.6350953149853057, "learning_rate": 9.953127527444049e-06, "loss": 1.0094, "step": 4540 }, { "epoch": 0.2749743155859068, "grad_norm": 0.7132217019139976, "learning_rate": 9.952671405519916e-06, "loss": 1.0053, "step": 4550 }, { "epoch": 0.27557865474104065, "grad_norm": 0.757638856045083, "learning_rate": 9.952213085591734e-06, "loss": 1.0183, "step": 4560 }, { "epoch": 0.2761829938961745, "grad_norm": 0.7079795324613742, "learning_rate": 9.951752567862906e-06, "loss": 1.0161, "step": 4570 }, { "epoch": 0.27678733305130837, "grad_norm": 0.6734659548315652, "learning_rate": 9.951289852537815e-06, "loss": 1.0004, "step": 4580 }, { "epoch": 0.27739167220644223, "grad_norm": 0.6941761188175806, "learning_rate": 9.950824939821809e-06, "loss": 1.0075, "step": 4590 }, { "epoch": 0.2779960113615761, "grad_norm": 0.6171635347372765, "learning_rate": 9.95035782992122e-06, "loss": 1.0191, "step": 4600 }, { "epoch": 0.27860035051671, "grad_norm": 0.6581497918130428, "learning_rate": 9.949888523043351e-06, "loss": 1.0392, "step": 4610 }, { "epoch": 0.27920468967184386, "grad_norm": 0.6181277058300175, "learning_rate": 9.949417019396483e-06, "loss": 1.0019, "step": 4620 }, { "epoch": 0.2798090288269777, "grad_norm": 0.6137306419982177, "learning_rate": 9.94894331918987e-06, "loss": 1.009, "step": 4630 }, { "epoch": 0.2804133679821116, "grad_norm": 0.6076993344746096, "learning_rate": 9.94846742263374e-06, "loss": 1.0072, "step": 4640 }, { "epoch": 0.28101770713724544, "grad_norm": 0.7929459926101928, "learning_rate": 9.947989329939294e-06, "loss": 1.0214, "step": 4650 }, { "epoch": 0.2816220462923793, "grad_norm": 0.7684915999055225, "learning_rate": 9.947509041318715e-06, "loss": 1.0013, "step": 4660 }, { "epoch": 0.28222638544751316, "grad_norm": 0.8202686356804075, "learning_rate": 9.947026556985154e-06, "loss": 1.0062, "step": 4670 }, { "epoch": 0.282830724602647, "grad_norm": 0.8226214402110111, "learning_rate": 9.946541877152738e-06, "loss": 1.0075, "step": 4680 }, { "epoch": 0.2834350637577809, "grad_norm": 0.7856983742487061, "learning_rate": 9.946055002036569e-06, "loss": 1.0028, "step": 4690 }, { "epoch": 0.28403940291291474, "grad_norm": 0.8505254408387751, "learning_rate": 9.945565931852724e-06, "loss": 1.0311, "step": 4700 }, { "epoch": 0.2846437420680486, "grad_norm": 0.7675633909806334, "learning_rate": 9.945074666818251e-06, "loss": 0.9977, "step": 4710 }, { "epoch": 0.28524808122318246, "grad_norm": 0.7997794300046326, "learning_rate": 9.944581207151176e-06, "loss": 0.9962, "step": 4720 }, { "epoch": 0.2858524203783163, "grad_norm": 0.7720538634296082, "learning_rate": 9.944085553070498e-06, "loss": 1.0163, "step": 4730 }, { "epoch": 0.2864567595334502, "grad_norm": 0.8225570149364019, "learning_rate": 9.943587704796185e-06, "loss": 1.0324, "step": 4740 }, { "epoch": 0.28706109868858404, "grad_norm": 1.1807604509175111, "learning_rate": 9.943087662549187e-06, "loss": 1.0037, "step": 4750 }, { "epoch": 0.2876654378437179, "grad_norm": 1.3134790369548701, "learning_rate": 9.942585426551422e-06, "loss": 1.0346, "step": 4760 }, { "epoch": 0.28826977699885176, "grad_norm": 1.2530231629475332, "learning_rate": 9.942080997025787e-06, "loss": 0.9849, "step": 4770 }, { "epoch": 0.2888741161539856, "grad_norm": 1.3582193716833795, "learning_rate": 9.941574374196143e-06, "loss": 1.0231, "step": 4780 }, { "epoch": 0.2894784553091195, "grad_norm": 1.3835079302012114, "learning_rate": 9.941065558287332e-06, "loss": 1.016, "step": 4790 }, { "epoch": 0.29008279446425334, "grad_norm": 0.967025199831158, "learning_rate": 9.940554549525168e-06, "loss": 1.032, "step": 4800 }, { "epoch": 0.2906871336193872, "grad_norm": 0.9112362947015128, "learning_rate": 9.940041348136439e-06, "loss": 1.0212, "step": 4810 }, { "epoch": 0.29129147277452105, "grad_norm": 1.1436856824772292, "learning_rate": 9.939525954348902e-06, "loss": 1.0162, "step": 4820 }, { "epoch": 0.2918958119296549, "grad_norm": 1.0067462433369134, "learning_rate": 9.93900836839129e-06, "loss": 1.0107, "step": 4830 }, { "epoch": 0.2925001510847888, "grad_norm": 1.1024454895275722, "learning_rate": 9.938488590493307e-06, "loss": 1.0226, "step": 4840 }, { "epoch": 0.29310449023992263, "grad_norm": 2.656804042917146, "learning_rate": 9.937966620885637e-06, "loss": 1.0367, "step": 4850 }, { "epoch": 0.2937088293950565, "grad_norm": 2.370309401156759, "learning_rate": 9.937442459799927e-06, "loss": 1.0053, "step": 4860 }, { "epoch": 0.29431316855019035, "grad_norm": 2.6888181390566563, "learning_rate": 9.9369161074688e-06, "loss": 1.0289, "step": 4870 }, { "epoch": 0.2949175077053242, "grad_norm": 2.5265543603310747, "learning_rate": 9.936387564125853e-06, "loss": 0.9984, "step": 4880 }, { "epoch": 0.29552184686045807, "grad_norm": 2.6560780585820334, "learning_rate": 9.935856830005654e-06, "loss": 1.0069, "step": 4890 }, { "epoch": 0.29612618601559193, "grad_norm": 1.0950460917732527, "learning_rate": 9.935323905343746e-06, "loss": 0.9931, "step": 4900 }, { "epoch": 0.2967305251707258, "grad_norm": 1.3820175656653786, "learning_rate": 9.93478879037664e-06, "loss": 1.0001, "step": 4910 }, { "epoch": 0.29733486432585965, "grad_norm": 1.207982565293866, "learning_rate": 9.934251485341822e-06, "loss": 1.0235, "step": 4920 }, { "epoch": 0.2979392034809935, "grad_norm": 1.0379262494990424, "learning_rate": 9.933711990477746e-06, "loss": 0.9995, "step": 4930 }, { "epoch": 0.29854354263612737, "grad_norm": 1.0061289961358295, "learning_rate": 9.933170306023844e-06, "loss": 0.9947, "step": 4940 }, { "epoch": 0.2991478817912613, "grad_norm": 1.1629824628189593, "learning_rate": 9.932626432220515e-06, "loss": 1.0218, "step": 4950 }, { "epoch": 0.29975222094639514, "grad_norm": 1.0745583116485489, "learning_rate": 9.932080369309131e-06, "loss": 1.0466, "step": 4960 }, { "epoch": 0.300356560101529, "grad_norm": 0.9771012827705086, "learning_rate": 9.931532117532036e-06, "loss": 0.9814, "step": 4970 }, { "epoch": 0.30096089925666286, "grad_norm": 1.049345463556189, "learning_rate": 9.930981677132544e-06, "loss": 1.004, "step": 4980 }, { "epoch": 0.3015652384117967, "grad_norm": 1.0279316367099933, "learning_rate": 9.930429048354944e-06, "loss": 0.9926, "step": 4990 }, { "epoch": 0.3021695775669306, "grad_norm": 1.0842718547937733, "learning_rate": 9.929874231444492e-06, "loss": 0.9865, "step": 5000 }, { "epoch": 0.30277391672206444, "grad_norm": 1.128936697247095, "learning_rate": 9.929317226647417e-06, "loss": 1.0075, "step": 5010 }, { "epoch": 0.3033782558771983, "grad_norm": 1.0606267020366618, "learning_rate": 9.92875803421092e-06, "loss": 0.9727, "step": 5020 }, { "epoch": 0.30398259503233216, "grad_norm": 1.1121105791161146, "learning_rate": 9.928196654383167e-06, "loss": 1.0158, "step": 5030 }, { "epoch": 0.304586934187466, "grad_norm": 1.209476126183107, "learning_rate": 9.927633087413304e-06, "loss": 1.0174, "step": 5040 }, { "epoch": 0.3051912733425999, "grad_norm": 1.1204506000640373, "learning_rate": 9.92706733355144e-06, "loss": 0.992, "step": 5050 }, { "epoch": 0.30579561249773374, "grad_norm": 1.1103095771812197, "learning_rate": 9.92649939304866e-06, "loss": 0.9754, "step": 5060 }, { "epoch": 0.3063999516528676, "grad_norm": 1.0298429237223206, "learning_rate": 9.925929266157016e-06, "loss": 1.0075, "step": 5070 }, { "epoch": 0.30700429080800146, "grad_norm": 1.1301911418300359, "learning_rate": 9.92535695312953e-06, "loss": 1.0109, "step": 5080 }, { "epoch": 0.3076086299631353, "grad_norm": 1.12017833927496, "learning_rate": 9.9247824542202e-06, "loss": 0.9939, "step": 5090 }, { "epoch": 0.3082129691182692, "grad_norm": 1.2825944867534116, "learning_rate": 9.924205769683981e-06, "loss": 1.0008, "step": 5100 }, { "epoch": 0.30881730827340304, "grad_norm": 1.1819104499761663, "learning_rate": 9.923626899776815e-06, "loss": 1.0218, "step": 5110 }, { "epoch": 0.3094216474285369, "grad_norm": 1.2165172294213984, "learning_rate": 9.923045844755602e-06, "loss": 0.9936, "step": 5120 }, { "epoch": 0.31002598658367075, "grad_norm": 1.214728876533914, "learning_rate": 9.922462604878216e-06, "loss": 1.0375, "step": 5130 }, { "epoch": 0.3106303257388046, "grad_norm": 1.1463835104012132, "learning_rate": 9.921877180403499e-06, "loss": 0.9946, "step": 5140 }, { "epoch": 0.3112346648939385, "grad_norm": 0.9960379761045224, "learning_rate": 9.921289571591263e-06, "loss": 0.9939, "step": 5150 }, { "epoch": 0.31183900404907233, "grad_norm": 1.0260036330591795, "learning_rate": 9.92069977870229e-06, "loss": 0.9972, "step": 5160 }, { "epoch": 0.3124433432042062, "grad_norm": 1.1191230761992401, "learning_rate": 9.920107801998332e-06, "loss": 1.0171, "step": 5170 }, { "epoch": 0.31304768235934005, "grad_norm": 0.9602369585823124, "learning_rate": 9.91951364174211e-06, "loss": 1.0161, "step": 5180 }, { "epoch": 0.3136520215144739, "grad_norm": 1.0766618253928928, "learning_rate": 9.918917298197313e-06, "loss": 0.997, "step": 5190 }, { "epoch": 0.31425636066960777, "grad_norm": 1.1470098083236124, "learning_rate": 9.918318771628596e-06, "loss": 1.0065, "step": 5200 }, { "epoch": 0.31486069982474163, "grad_norm": 1.0899646982853766, "learning_rate": 9.91771806230159e-06, "loss": 1.0135, "step": 5210 }, { "epoch": 0.3154650389798755, "grad_norm": 1.1483469336894205, "learning_rate": 9.91711517048289e-06, "loss": 0.9953, "step": 5220 }, { "epoch": 0.31606937813500935, "grad_norm": 1.151446109822897, "learning_rate": 9.916510096440059e-06, "loss": 1.0166, "step": 5230 }, { "epoch": 0.3166737172901432, "grad_norm": 1.170085922207658, "learning_rate": 9.91590284044163e-06, "loss": 1.0046, "step": 5240 }, { "epoch": 0.31727805644527707, "grad_norm": 1.108059940177387, "learning_rate": 9.915293402757106e-06, "loss": 0.9921, "step": 5250 }, { "epoch": 0.3178823956004109, "grad_norm": 1.050844949783572, "learning_rate": 9.914681783656956e-06, "loss": 0.9938, "step": 5260 }, { "epoch": 0.3184867347555448, "grad_norm": 1.0946051348771029, "learning_rate": 9.914067983412616e-06, "loss": 1.022, "step": 5270 }, { "epoch": 0.31909107391067865, "grad_norm": 1.1085941043245624, "learning_rate": 9.913452002296492e-06, "loss": 1.0047, "step": 5280 }, { "epoch": 0.31969541306581256, "grad_norm": 1.0868859335173506, "learning_rate": 9.912833840581961e-06, "loss": 1.001, "step": 5290 }, { "epoch": 0.3202997522209464, "grad_norm": 1.2713784186567478, "learning_rate": 9.912213498543359e-06, "loss": 0.9886, "step": 5300 }, { "epoch": 0.3209040913760803, "grad_norm": 1.3650321918354733, "learning_rate": 9.911590976456e-06, "loss": 1.0351, "step": 5310 }, { "epoch": 0.32150843053121414, "grad_norm": 1.254607633304583, "learning_rate": 9.910966274596154e-06, "loss": 0.9978, "step": 5320 }, { "epoch": 0.322112769686348, "grad_norm": 1.211135276855096, "learning_rate": 9.91033939324107e-06, "loss": 1.0121, "step": 5330 }, { "epoch": 0.32271710884148186, "grad_norm": 1.1815895855599206, "learning_rate": 9.90971033266896e-06, "loss": 1.0034, "step": 5340 }, { "epoch": 0.3233214479966157, "grad_norm": 1.2369326168307309, "learning_rate": 9.909079093158997e-06, "loss": 1.041, "step": 5350 }, { "epoch": 0.3239257871517496, "grad_norm": 1.1079950575888624, "learning_rate": 9.908445674991328e-06, "loss": 1.0083, "step": 5360 }, { "epoch": 0.32453012630688344, "grad_norm": 1.1972584951271126, "learning_rate": 9.907810078447066e-06, "loss": 0.9926, "step": 5370 }, { "epoch": 0.3251344654620173, "grad_norm": 1.1002416731465097, "learning_rate": 9.907172303808293e-06, "loss": 1.0124, "step": 5380 }, { "epoch": 0.32573880461715116, "grad_norm": 1.08929349089889, "learning_rate": 9.906532351358047e-06, "loss": 1.0079, "step": 5390 }, { "epoch": 0.326343143772285, "grad_norm": 1.2489750798564807, "learning_rate": 9.905890221380347e-06, "loss": 1.0056, "step": 5400 }, { "epoch": 0.3269474829274189, "grad_norm": 1.232673278335761, "learning_rate": 9.905245914160167e-06, "loss": 1.0167, "step": 5410 }, { "epoch": 0.32755182208255273, "grad_norm": 1.3690124382242743, "learning_rate": 9.904599429983452e-06, "loss": 1.037, "step": 5420 }, { "epoch": 0.3281561612376866, "grad_norm": 1.2611167113637718, "learning_rate": 9.903950769137116e-06, "loss": 1.0327, "step": 5430 }, { "epoch": 0.32876050039282045, "grad_norm": 1.2642450239980394, "learning_rate": 9.903299931909034e-06, "loss": 1.0029, "step": 5440 }, { "epoch": 0.3293648395479543, "grad_norm": 1.4616179750325198, "learning_rate": 9.902646918588048e-06, "loss": 0.9931, "step": 5450 }, { "epoch": 0.3299691787030882, "grad_norm": 1.4897412660016676, "learning_rate": 9.901991729463966e-06, "loss": 1.0161, "step": 5460 }, { "epoch": 0.33057351785822203, "grad_norm": 1.414515926955237, "learning_rate": 9.901334364827564e-06, "loss": 0.9849, "step": 5470 }, { "epoch": 0.3311778570133559, "grad_norm": 1.3243483131524123, "learning_rate": 9.900674824970579e-06, "loss": 1.0187, "step": 5480 }, { "epoch": 0.33178219616848975, "grad_norm": 1.4291180712523233, "learning_rate": 9.900013110185717e-06, "loss": 1.0251, "step": 5490 }, { "epoch": 0.3323865353236236, "grad_norm": 0.8870838433599929, "learning_rate": 9.89934922076665e-06, "loss": 0.9663, "step": 5500 }, { "epoch": 0.33299087447875747, "grad_norm": 0.8599778729162925, "learning_rate": 9.898683157008012e-06, "loss": 1.0182, "step": 5510 }, { "epoch": 0.33359521363389133, "grad_norm": 0.8685420749064647, "learning_rate": 9.898014919205403e-06, "loss": 1.0133, "step": 5520 }, { "epoch": 0.3341995527890252, "grad_norm": 0.9190668518359403, "learning_rate": 9.897344507655387e-06, "loss": 1.0025, "step": 5530 }, { "epoch": 0.33480389194415905, "grad_norm": 0.8452117875235303, "learning_rate": 9.896671922655497e-06, "loss": 1.0346, "step": 5540 }, { "epoch": 0.3354082310992929, "grad_norm": 0.7608069571147485, "learning_rate": 9.895997164504224e-06, "loss": 1.0058, "step": 5550 }, { "epoch": 0.33601257025442677, "grad_norm": 0.7685634905463093, "learning_rate": 9.895320233501027e-06, "loss": 0.9829, "step": 5560 }, { "epoch": 0.3366169094095606, "grad_norm": 0.7932078532361053, "learning_rate": 9.89464112994633e-06, "loss": 1.0134, "step": 5570 }, { "epoch": 0.3372212485646945, "grad_norm": 0.7729376796762994, "learning_rate": 9.893959854141522e-06, "loss": 0.9869, "step": 5580 }, { "epoch": 0.33782558771982835, "grad_norm": 0.8219890383262062, "learning_rate": 9.893276406388952e-06, "loss": 1.0172, "step": 5590 }, { "epoch": 0.3384299268749622, "grad_norm": 0.8616634454094614, "learning_rate": 9.892590786991938e-06, "loss": 1.041, "step": 5600 }, { "epoch": 0.33903426603009607, "grad_norm": 0.8034396383510313, "learning_rate": 9.891902996254755e-06, "loss": 0.9573, "step": 5610 }, { "epoch": 0.3396386051852299, "grad_norm": 0.939431525813389, "learning_rate": 9.89121303448265e-06, "loss": 0.9926, "step": 5620 }, { "epoch": 0.34024294434036384, "grad_norm": 0.8071969049179112, "learning_rate": 9.890520901981825e-06, "loss": 1.014, "step": 5630 }, { "epoch": 0.3408472834954977, "grad_norm": 0.754025179432674, "learning_rate": 9.889826599059453e-06, "loss": 1.0067, "step": 5640 }, { "epoch": 0.34145162265063156, "grad_norm": 0.5665333715179984, "learning_rate": 9.889130126023665e-06, "loss": 1.0163, "step": 5650 }, { "epoch": 0.3420559618057654, "grad_norm": 0.5668675755503378, "learning_rate": 9.888431483183558e-06, "loss": 1.0242, "step": 5660 }, { "epoch": 0.3426603009608993, "grad_norm": 0.5811077849837687, "learning_rate": 9.887730670849192e-06, "loss": 0.9957, "step": 5670 }, { "epoch": 0.34326464011603314, "grad_norm": 0.54016526581125, "learning_rate": 9.887027689331585e-06, "loss": 0.9898, "step": 5680 }, { "epoch": 0.343868979271167, "grad_norm": 0.5703311662481036, "learning_rate": 9.886322538942723e-06, "loss": 1.019, "step": 5690 }, { "epoch": 0.34447331842630086, "grad_norm": 0.598548325840395, "learning_rate": 9.885615219995554e-06, "loss": 1.0091, "step": 5700 }, { "epoch": 0.3450776575814347, "grad_norm": 0.5577122384284335, "learning_rate": 9.88490573280399e-06, "loss": 0.9998, "step": 5710 }, { "epoch": 0.3456819967365686, "grad_norm": 0.6177372564163169, "learning_rate": 9.884194077682895e-06, "loss": 1.0114, "step": 5720 }, { "epoch": 0.34628633589170243, "grad_norm": 0.5973741439521935, "learning_rate": 9.88348025494811e-06, "loss": 1.0064, "step": 5730 }, { "epoch": 0.3468906750468363, "grad_norm": 0.5968116464639346, "learning_rate": 9.882764264916427e-06, "loss": 1.0043, "step": 5740 }, { "epoch": 0.34749501420197015, "grad_norm": 0.9528692893877637, "learning_rate": 9.882046107905604e-06, "loss": 1.0059, "step": 5750 }, { "epoch": 0.348099353357104, "grad_norm": 0.8641691818073385, "learning_rate": 9.881325784234361e-06, "loss": 1.0101, "step": 5760 }, { "epoch": 0.3487036925122379, "grad_norm": 0.852262168058538, "learning_rate": 9.88060329422238e-06, "loss": 0.9808, "step": 5770 }, { "epoch": 0.34930803166737173, "grad_norm": 0.8819272312678405, "learning_rate": 9.879878638190301e-06, "loss": 0.9889, "step": 5780 }, { "epoch": 0.3499123708225056, "grad_norm": 0.8146998671695789, "learning_rate": 9.87915181645973e-06, "loss": 1.0029, "step": 5790 }, { "epoch": 0.35051670997763945, "grad_norm": 0.5157107777855294, "learning_rate": 9.878422829353229e-06, "loss": 0.9918, "step": 5800 }, { "epoch": 0.3511210491327733, "grad_norm": 0.5327743713540637, "learning_rate": 9.877691677194326e-06, "loss": 0.978, "step": 5810 }, { "epoch": 0.35172538828790717, "grad_norm": 0.5483135621755293, "learning_rate": 9.876958360307506e-06, "loss": 0.967, "step": 5820 }, { "epoch": 0.35232972744304103, "grad_norm": 0.5200417729753583, "learning_rate": 9.876222879018218e-06, "loss": 0.9996, "step": 5830 }, { "epoch": 0.3529340665981749, "grad_norm": 0.5710913007032501, "learning_rate": 9.875485233652867e-06, "loss": 0.9979, "step": 5840 }, { "epoch": 0.35353840575330875, "grad_norm": 0.5065416779787284, "learning_rate": 9.874745424538826e-06, "loss": 0.9951, "step": 5850 }, { "epoch": 0.3541427449084426, "grad_norm": 0.5519496302011243, "learning_rate": 9.874003452004416e-06, "loss": 1.0332, "step": 5860 }, { "epoch": 0.35474708406357647, "grad_norm": 0.5065670748673957, "learning_rate": 9.873259316378934e-06, "loss": 1.0262, "step": 5870 }, { "epoch": 0.3553514232187103, "grad_norm": 0.5191163057311311, "learning_rate": 9.872513017992624e-06, "loss": 1.0082, "step": 5880 }, { "epoch": 0.3559557623738442, "grad_norm": 0.49451164618513554, "learning_rate": 9.871764557176698e-06, "loss": 0.9948, "step": 5890 }, { "epoch": 0.35656010152897805, "grad_norm": 0.49279905161619625, "learning_rate": 9.871013934263319e-06, "loss": 1.0038, "step": 5900 }, { "epoch": 0.3571644406841119, "grad_norm": 0.5371868002364787, "learning_rate": 9.870261149585616e-06, "loss": 1.0145, "step": 5910 }, { "epoch": 0.35776877983924577, "grad_norm": 0.5282710851908254, "learning_rate": 9.86950620347768e-06, "loss": 1.0055, "step": 5920 }, { "epoch": 0.3583731189943796, "grad_norm": 0.5379944232999399, "learning_rate": 9.868749096274554e-06, "loss": 0.9982, "step": 5930 }, { "epoch": 0.3589774581495135, "grad_norm": 0.49212860953679277, "learning_rate": 9.867989828312246e-06, "loss": 0.9876, "step": 5940 }, { "epoch": 0.35958179730464734, "grad_norm": 0.5082399618458712, "learning_rate": 9.867228399927717e-06, "loss": 1.009, "step": 5950 }, { "epoch": 0.3601861364597812, "grad_norm": 0.5328510728537502, "learning_rate": 9.866464811458893e-06, "loss": 0.9996, "step": 5960 }, { "epoch": 0.36079047561491506, "grad_norm": 0.5256073837698226, "learning_rate": 9.865699063244655e-06, "loss": 1.0132, "step": 5970 }, { "epoch": 0.361394814770049, "grad_norm": 0.5242644353528377, "learning_rate": 9.864931155624842e-06, "loss": 0.9831, "step": 5980 }, { "epoch": 0.36199915392518284, "grad_norm": 0.516099706093187, "learning_rate": 9.864161088940256e-06, "loss": 1.0086, "step": 5990 }, { "epoch": 0.3626034930803167, "grad_norm": 0.5776429540423382, "learning_rate": 9.863388863532651e-06, "loss": 1.0152, "step": 6000 }, { "epoch": 0.36320783223545056, "grad_norm": 0.5574160162130866, "learning_rate": 9.862614479744743e-06, "loss": 1.033, "step": 6010 }, { "epoch": 0.3638121713905844, "grad_norm": 0.5841822336746999, "learning_rate": 9.861837937920203e-06, "loss": 1.0281, "step": 6020 }, { "epoch": 0.3644165105457183, "grad_norm": 0.5448786121532411, "learning_rate": 9.861059238403665e-06, "loss": 1.0115, "step": 6030 }, { "epoch": 0.36502084970085213, "grad_norm": 0.5415840371777243, "learning_rate": 9.860278381540715e-06, "loss": 1.0039, "step": 6040 }, { "epoch": 0.365625188855986, "grad_norm": 0.563140715788272, "learning_rate": 9.8594953676779e-06, "loss": 0.9923, "step": 6050 }, { "epoch": 0.36622952801111985, "grad_norm": 0.5569321653314635, "learning_rate": 9.858710197162722e-06, "loss": 0.9894, "step": 6060 }, { "epoch": 0.3668338671662537, "grad_norm": 0.5277713151700876, "learning_rate": 9.85792287034364e-06, "loss": 1.0008, "step": 6070 }, { "epoch": 0.3674382063213876, "grad_norm": 0.48969583395453264, "learning_rate": 9.857133387570072e-06, "loss": 0.9929, "step": 6080 }, { "epoch": 0.36804254547652143, "grad_norm": 0.5288958874123402, "learning_rate": 9.856341749192393e-06, "loss": 1.0121, "step": 6090 }, { "epoch": 0.3686468846316553, "grad_norm": 0.5885370907535019, "learning_rate": 9.855547955561931e-06, "loss": 1.0031, "step": 6100 }, { "epoch": 0.36925122378678915, "grad_norm": 0.6130785511916662, "learning_rate": 9.854752007030976e-06, "loss": 1.0067, "step": 6110 }, { "epoch": 0.369855562941923, "grad_norm": 0.6239670734268047, "learning_rate": 9.853953903952767e-06, "loss": 1.0097, "step": 6120 }, { "epoch": 0.37045990209705687, "grad_norm": 0.6446600588460474, "learning_rate": 9.853153646681509e-06, "loss": 1.0049, "step": 6130 }, { "epoch": 0.37106424125219073, "grad_norm": 0.6324095438719612, "learning_rate": 9.852351235572353e-06, "loss": 0.9902, "step": 6140 }, { "epoch": 0.3716685804073246, "grad_norm": 0.786949200074966, "learning_rate": 9.851546670981411e-06, "loss": 0.9938, "step": 6150 }, { "epoch": 0.37227291956245845, "grad_norm": 0.7316542189995123, "learning_rate": 9.850739953265753e-06, "loss": 0.9957, "step": 6160 }, { "epoch": 0.3728772587175923, "grad_norm": 0.6602545798559167, "learning_rate": 9.8499310827834e-06, "loss": 0.9915, "step": 6170 }, { "epoch": 0.37348159787272617, "grad_norm": 0.6531405610720743, "learning_rate": 9.849120059893329e-06, "loss": 1.0072, "step": 6180 }, { "epoch": 0.37408593702786, "grad_norm": 0.6947368116689114, "learning_rate": 9.848306884955475e-06, "loss": 1.0362, "step": 6190 }, { "epoch": 0.3746902761829939, "grad_norm": 0.6383771212914652, "learning_rate": 9.847491558330726e-06, "loss": 0.9873, "step": 6200 }, { "epoch": 0.37529461533812775, "grad_norm": 0.6405579644703344, "learning_rate": 9.846674080380925e-06, "loss": 1.0091, "step": 6210 }, { "epoch": 0.3758989544932616, "grad_norm": 0.6318280139648661, "learning_rate": 9.845854451468868e-06, "loss": 0.985, "step": 6220 }, { "epoch": 0.37650329364839547, "grad_norm": 0.6339638225372017, "learning_rate": 9.845032671958312e-06, "loss": 1.0154, "step": 6230 }, { "epoch": 0.3771076328035293, "grad_norm": 0.6619136842919839, "learning_rate": 9.844208742213963e-06, "loss": 1.0029, "step": 6240 }, { "epoch": 0.3777119719586632, "grad_norm": 0.7928113525284128, "learning_rate": 9.843382662601481e-06, "loss": 1.011, "step": 6250 }, { "epoch": 0.37831631111379704, "grad_norm": 0.834095380925119, "learning_rate": 9.842554433487482e-06, "loss": 1.0143, "step": 6260 }, { "epoch": 0.3789206502689309, "grad_norm": 0.757658715574136, "learning_rate": 9.841724055239535e-06, "loss": 1.0268, "step": 6270 }, { "epoch": 0.37952498942406476, "grad_norm": 0.7718469183204548, "learning_rate": 9.840891528226164e-06, "loss": 1.0201, "step": 6280 }, { "epoch": 0.3801293285791986, "grad_norm": 0.7627786860683481, "learning_rate": 9.840056852816846e-06, "loss": 1.034, "step": 6290 }, { "epoch": 0.3807336677343325, "grad_norm": 0.7943004246989257, "learning_rate": 9.839220029382011e-06, "loss": 0.988, "step": 6300 }, { "epoch": 0.38133800688946634, "grad_norm": 0.7752178630352604, "learning_rate": 9.838381058293043e-06, "loss": 0.9986, "step": 6310 }, { "epoch": 0.38194234604460026, "grad_norm": 0.7853777183156614, "learning_rate": 9.837539939922279e-06, "loss": 1.0195, "step": 6320 }, { "epoch": 0.3825466851997341, "grad_norm": 0.7621120294237848, "learning_rate": 9.836696674643007e-06, "loss": 1.006, "step": 6330 }, { "epoch": 0.383151024354868, "grad_norm": 0.7774745458608108, "learning_rate": 9.835851262829473e-06, "loss": 1.0281, "step": 6340 }, { "epoch": 0.38375536351000183, "grad_norm": 1.215581564301588, "learning_rate": 9.83500370485687e-06, "loss": 1.0152, "step": 6350 }, { "epoch": 0.3843597026651357, "grad_norm": 1.1912153483343944, "learning_rate": 9.834154001101343e-06, "loss": 1.0194, "step": 6360 }, { "epoch": 0.38496404182026955, "grad_norm": 1.1849026775130922, "learning_rate": 9.83330215194e-06, "loss": 0.9818, "step": 6370 }, { "epoch": 0.3855683809754034, "grad_norm": 1.1765707938172545, "learning_rate": 9.832448157750883e-06, "loss": 1.0108, "step": 6380 }, { "epoch": 0.3861727201305373, "grad_norm": 1.206340887436501, "learning_rate": 9.831592018913003e-06, "loss": 0.9717, "step": 6390 }, { "epoch": 0.38677705928567113, "grad_norm": 0.8389696997546268, "learning_rate": 9.830733735806314e-06, "loss": 1.0048, "step": 6400 }, { "epoch": 0.387381398440805, "grad_norm": 1.081752292506, "learning_rate": 9.829873308811724e-06, "loss": 1.0191, "step": 6410 }, { "epoch": 0.38798573759593885, "grad_norm": 1.2848922573959891, "learning_rate": 9.82901073831109e-06, "loss": 1.0089, "step": 6420 }, { "epoch": 0.3885900767510727, "grad_norm": 1.0491364864125905, "learning_rate": 9.828146024687227e-06, "loss": 1.0092, "step": 6430 }, { "epoch": 0.38919441590620657, "grad_norm": 1.1203027685668425, "learning_rate": 9.82727916832389e-06, "loss": 1.0081, "step": 6440 }, { "epoch": 0.38979875506134043, "grad_norm": 2.949380975637301, "learning_rate": 9.826410169605795e-06, "loss": 0.982, "step": 6450 }, { "epoch": 0.3904030942164743, "grad_norm": 2.5655070580122725, "learning_rate": 9.825539028918605e-06, "loss": 0.9933, "step": 6460 }, { "epoch": 0.39100743337160815, "grad_norm": 2.58481575091607, "learning_rate": 9.824665746648933e-06, "loss": 1.0009, "step": 6470 }, { "epoch": 0.391611772526742, "grad_norm": 2.6736885574250215, "learning_rate": 9.823790323184344e-06, "loss": 1.0031, "step": 6480 }, { "epoch": 0.39221611168187587, "grad_norm": 2.5851065212745823, "learning_rate": 9.822912758913352e-06, "loss": 1.005, "step": 6490 }, { "epoch": 0.3928204508370097, "grad_norm": 1.0358027271238648, "learning_rate": 9.82203305422542e-06, "loss": 1.0108, "step": 6500 }, { "epoch": 0.3934247899921436, "grad_norm": 1.0415077326769058, "learning_rate": 9.821151209510965e-06, "loss": 1.0075, "step": 6510 }, { "epoch": 0.39402912914727745, "grad_norm": 0.8984057919416041, "learning_rate": 9.82026722516135e-06, "loss": 1.0109, "step": 6520 }, { "epoch": 0.3946334683024113, "grad_norm": 1.014800362641625, "learning_rate": 9.819381101568887e-06, "loss": 0.9858, "step": 6530 }, { "epoch": 0.39523780745754516, "grad_norm": 0.9751634770274491, "learning_rate": 9.818492839126844e-06, "loss": 1.0172, "step": 6540 }, { "epoch": 0.395842146612679, "grad_norm": 1.0515289372041692, "learning_rate": 9.817602438229429e-06, "loss": 1.0039, "step": 6550 }, { "epoch": 0.3964464857678129, "grad_norm": 1.167983434940786, "learning_rate": 9.816709899271805e-06, "loss": 1.0189, "step": 6560 }, { "epoch": 0.39705082492294674, "grad_norm": 1.1893825943310294, "learning_rate": 9.815815222650082e-06, "loss": 1.014, "step": 6570 }, { "epoch": 0.3976551640780806, "grad_norm": 1.0242832548952334, "learning_rate": 9.814918408761318e-06, "loss": 0.9807, "step": 6580 }, { "epoch": 0.39825950323321446, "grad_norm": 1.0479578604354343, "learning_rate": 9.814019458003523e-06, "loss": 0.9694, "step": 6590 }, { "epoch": 0.3988638423883483, "grad_norm": 1.2749111022574557, "learning_rate": 9.813118370775652e-06, "loss": 1.0019, "step": 6600 }, { "epoch": 0.3994681815434822, "grad_norm": 1.073361321222981, "learning_rate": 9.812215147477608e-06, "loss": 0.9982, "step": 6610 }, { "epoch": 0.40007252069861604, "grad_norm": 1.047818464589798, "learning_rate": 9.811309788510243e-06, "loss": 0.9972, "step": 6620 }, { "epoch": 0.4006768598537499, "grad_norm": 0.9747534725894509, "learning_rate": 9.810402294275359e-06, "loss": 1.0049, "step": 6630 }, { "epoch": 0.40128119900888376, "grad_norm": 1.050499526975608, "learning_rate": 9.8094926651757e-06, "loss": 0.9962, "step": 6640 }, { "epoch": 0.4018855381640176, "grad_norm": 1.321414260491159, "learning_rate": 9.808580901614964e-06, "loss": 1.0375, "step": 6650 }, { "epoch": 0.40248987731915153, "grad_norm": 1.2177986013454383, "learning_rate": 9.807667003997791e-06, "loss": 0.9848, "step": 6660 }, { "epoch": 0.4030942164742854, "grad_norm": 1.035795192043516, "learning_rate": 9.806750972729774e-06, "loss": 1.0064, "step": 6670 }, { "epoch": 0.40369855562941925, "grad_norm": 1.2967787417060446, "learning_rate": 9.805832808217447e-06, "loss": 1.0159, "step": 6680 }, { "epoch": 0.4043028947845531, "grad_norm": 1.0389867219124678, "learning_rate": 9.804912510868289e-06, "loss": 0.9841, "step": 6690 }, { "epoch": 0.404907233939687, "grad_norm": 1.3409165580157771, "learning_rate": 9.803990081090737e-06, "loss": 1.0186, "step": 6700 }, { "epoch": 0.40551157309482083, "grad_norm": 1.3105392118251176, "learning_rate": 9.803065519294162e-06, "loss": 1.0251, "step": 6710 }, { "epoch": 0.4061159122499547, "grad_norm": 1.3179278079501042, "learning_rate": 9.802138825888887e-06, "loss": 1.0091, "step": 6720 }, { "epoch": 0.40672025140508855, "grad_norm": 1.1782595293863323, "learning_rate": 9.801210001286182e-06, "loss": 0.9931, "step": 6730 }, { "epoch": 0.4073245905602224, "grad_norm": 1.155908582055039, "learning_rate": 9.800279045898255e-06, "loss": 1.0306, "step": 6740 }, { "epoch": 0.40792892971535627, "grad_norm": 1.0862095414943798, "learning_rate": 9.799345960138274e-06, "loss": 0.9881, "step": 6750 }, { "epoch": 0.40853326887049013, "grad_norm": 1.040057157128269, "learning_rate": 9.798410744420338e-06, "loss": 1.014, "step": 6760 }, { "epoch": 0.409137608025624, "grad_norm": 1.0373449963795083, "learning_rate": 9.797473399159501e-06, "loss": 0.9889, "step": 6770 }, { "epoch": 0.40974194718075785, "grad_norm": 0.9810127621452572, "learning_rate": 9.796533924771758e-06, "loss": 1.0151, "step": 6780 }, { "epoch": 0.4103462863358917, "grad_norm": 0.9999966957880234, "learning_rate": 9.795592321674046e-06, "loss": 0.9847, "step": 6790 }, { "epoch": 0.41095062549102557, "grad_norm": 1.21250793554175, "learning_rate": 9.794648590284253e-06, "loss": 1.0017, "step": 6800 }, { "epoch": 0.4115549646461594, "grad_norm": 1.2003842035689833, "learning_rate": 9.793702731021207e-06, "loss": 1.0143, "step": 6810 }, { "epoch": 0.4121593038012933, "grad_norm": 1.018284273328576, "learning_rate": 9.792754744304683e-06, "loss": 1.004, "step": 6820 }, { "epoch": 0.41276364295642715, "grad_norm": 1.0073312646222339, "learning_rate": 9.7918046305554e-06, "loss": 1.0106, "step": 6830 }, { "epoch": 0.413367982111561, "grad_norm": 1.0923757142684147, "learning_rate": 9.790852390195017e-06, "loss": 1.022, "step": 6840 }, { "epoch": 0.41397232126669486, "grad_norm": 1.0304250789790763, "learning_rate": 9.789898023646143e-06, "loss": 0.9912, "step": 6850 }, { "epoch": 0.4145766604218287, "grad_norm": 1.0985006592797248, "learning_rate": 9.788941531332327e-06, "loss": 0.9899, "step": 6860 }, { "epoch": 0.4151809995769626, "grad_norm": 1.0629701582316857, "learning_rate": 9.78798291367806e-06, "loss": 0.9907, "step": 6870 }, { "epoch": 0.41578533873209644, "grad_norm": 1.013124840100367, "learning_rate": 9.787022171108782e-06, "loss": 1.0111, "step": 6880 }, { "epoch": 0.4163896778872303, "grad_norm": 1.1715755837533566, "learning_rate": 9.786059304050866e-06, "loss": 0.9857, "step": 6890 }, { "epoch": 0.41699401704236416, "grad_norm": 1.2565863109710398, "learning_rate": 9.78509431293164e-06, "loss": 0.9911, "step": 6900 }, { "epoch": 0.417598356197498, "grad_norm": 1.3113844912921317, "learning_rate": 9.784127198179368e-06, "loss": 0.99, "step": 6910 }, { "epoch": 0.4182026953526319, "grad_norm": 1.1705994053090263, "learning_rate": 9.783157960223253e-06, "loss": 1.0015, "step": 6920 }, { "epoch": 0.41880703450776574, "grad_norm": 1.165752825855671, "learning_rate": 9.782186599493448e-06, "loss": 0.9877, "step": 6930 }, { "epoch": 0.4194113736628996, "grad_norm": 1.2102328573192687, "learning_rate": 9.781213116421044e-06, "loss": 0.9932, "step": 6940 }, { "epoch": 0.42001571281803346, "grad_norm": 1.080627758934474, "learning_rate": 9.780237511438073e-06, "loss": 0.9994, "step": 6950 }, { "epoch": 0.4206200519731673, "grad_norm": 1.1176859646274222, "learning_rate": 9.779259784977514e-06, "loss": 1.0068, "step": 6960 }, { "epoch": 0.4212243911283012, "grad_norm": 1.1908060720706377, "learning_rate": 9.77827993747328e-06, "loss": 1.0054, "step": 6970 }, { "epoch": 0.42182873028343504, "grad_norm": 1.1166902998641413, "learning_rate": 9.777297969360226e-06, "loss": 0.9921, "step": 6980 }, { "epoch": 0.4224330694385689, "grad_norm": 1.1042008072219143, "learning_rate": 9.77631388107416e-06, "loss": 1.0087, "step": 6990 }, { "epoch": 0.4230374085937028, "grad_norm": 1.1703704357999294, "learning_rate": 9.775327673051814e-06, "loss": 0.9934, "step": 7000 }, { "epoch": 0.4236417477488367, "grad_norm": 1.3204898338176354, "learning_rate": 9.774339345730874e-06, "loss": 1.0288, "step": 7010 }, { "epoch": 0.42424608690397053, "grad_norm": 1.2478257239089718, "learning_rate": 9.773348899549959e-06, "loss": 1.0127, "step": 7020 }, { "epoch": 0.4248504260591044, "grad_norm": 1.1995988774845179, "learning_rate": 9.77235633494863e-06, "loss": 1.0161, "step": 7030 }, { "epoch": 0.42545476521423825, "grad_norm": 1.190152001486567, "learning_rate": 9.77136165236739e-06, "loss": 0.9929, "step": 7040 }, { "epoch": 0.4260591043693721, "grad_norm": 1.468029887672351, "learning_rate": 9.770364852247682e-06, "loss": 0.9863, "step": 7050 }, { "epoch": 0.42666344352450597, "grad_norm": 1.348062017936987, "learning_rate": 9.769365935031885e-06, "loss": 1.0051, "step": 7060 }, { "epoch": 0.42726778267963983, "grad_norm": 1.2737446120875788, "learning_rate": 9.768364901163322e-06, "loss": 1.0074, "step": 7070 }, { "epoch": 0.4278721218347737, "grad_norm": 1.396269211915545, "learning_rate": 9.767361751086255e-06, "loss": 1.0179, "step": 7080 }, { "epoch": 0.42847646098990755, "grad_norm": 1.4199247246451963, "learning_rate": 9.76635648524588e-06, "loss": 1.0162, "step": 7090 }, { "epoch": 0.4290808001450414, "grad_norm": 0.8975674762644424, "learning_rate": 9.765349104088337e-06, "loss": 1.0163, "step": 7100 }, { "epoch": 0.42968513930017527, "grad_norm": 0.821557882217157, "learning_rate": 9.764339608060705e-06, "loss": 1.012, "step": 7110 }, { "epoch": 0.4302894784553091, "grad_norm": 0.8999199060882129, "learning_rate": 9.763327997610998e-06, "loss": 0.9897, "step": 7120 }, { "epoch": 0.430893817610443, "grad_norm": 0.8938660044016248, "learning_rate": 9.762314273188171e-06, "loss": 1.0005, "step": 7130 }, { "epoch": 0.43149815676557685, "grad_norm": 0.8682660718178058, "learning_rate": 9.761298435242119e-06, "loss": 1.0099, "step": 7140 }, { "epoch": 0.4321024959207107, "grad_norm": 0.7362474025654122, "learning_rate": 9.76028048422367e-06, "loss": 0.9971, "step": 7150 }, { "epoch": 0.43270683507584456, "grad_norm": 0.7691450101351018, "learning_rate": 9.759260420584592e-06, "loss": 1.0095, "step": 7160 }, { "epoch": 0.4333111742309784, "grad_norm": 0.73552539237862, "learning_rate": 9.758238244777592e-06, "loss": 1.0283, "step": 7170 }, { "epoch": 0.4339155133861123, "grad_norm": 0.7946866692838345, "learning_rate": 9.757213957256316e-06, "loss": 1.0123, "step": 7180 }, { "epoch": 0.43451985254124614, "grad_norm": 0.8231548083372715, "learning_rate": 9.75618755847534e-06, "loss": 0.9983, "step": 7190 }, { "epoch": 0.43512419169638, "grad_norm": 0.818935982427908, "learning_rate": 9.755159048890183e-06, "loss": 1.0097, "step": 7200 }, { "epoch": 0.43572853085151386, "grad_norm": 0.8766921812361363, "learning_rate": 9.754128428957302e-06, "loss": 0.9929, "step": 7210 }, { "epoch": 0.4363328700066477, "grad_norm": 0.8276226362836122, "learning_rate": 9.753095699134084e-06, "loss": 0.9913, "step": 7220 }, { "epoch": 0.4369372091617816, "grad_norm": 0.840050349505506, "learning_rate": 9.752060859878859e-06, "loss": 0.9804, "step": 7230 }, { "epoch": 0.43754154831691544, "grad_norm": 0.8361762627878246, "learning_rate": 9.75102391165089e-06, "loss": 0.9882, "step": 7240 }, { "epoch": 0.4381458874720493, "grad_norm": 0.5391825704457952, "learning_rate": 9.749984854910375e-06, "loss": 0.988, "step": 7250 }, { "epoch": 0.43875022662718316, "grad_norm": 0.5663380880574846, "learning_rate": 9.74894369011845e-06, "loss": 1.0155, "step": 7260 }, { "epoch": 0.439354565782317, "grad_norm": 0.5136098987146434, "learning_rate": 9.747900417737187e-06, "loss": 0.9892, "step": 7270 }, { "epoch": 0.4399589049374509, "grad_norm": 0.5745091597084321, "learning_rate": 9.746855038229589e-06, "loss": 1.0059, "step": 7280 }, { "epoch": 0.44056324409258474, "grad_norm": 0.543223375127291, "learning_rate": 9.7458075520596e-06, "loss": 0.9785, "step": 7290 }, { "epoch": 0.4411675832477186, "grad_norm": 0.6081263345427493, "learning_rate": 9.744757959692094e-06, "loss": 1.0006, "step": 7300 }, { "epoch": 0.44177192240285246, "grad_norm": 0.6475485365675823, "learning_rate": 9.743706261592884e-06, "loss": 1.0014, "step": 7310 }, { "epoch": 0.4423762615579863, "grad_norm": 0.574667862452884, "learning_rate": 9.742652458228715e-06, "loss": 1.0145, "step": 7320 }, { "epoch": 0.4429806007131202, "grad_norm": 0.5817633067184044, "learning_rate": 9.741596550067268e-06, "loss": 1.0019, "step": 7330 }, { "epoch": 0.4435849398682541, "grad_norm": 0.5737610251519444, "learning_rate": 9.740538537577151e-06, "loss": 1.017, "step": 7340 }, { "epoch": 0.44418927902338795, "grad_norm": 0.8379013520479386, "learning_rate": 9.739478421227918e-06, "loss": 1.0089, "step": 7350 }, { "epoch": 0.4447936181785218, "grad_norm": 0.8653984258209246, "learning_rate": 9.738416201490048e-06, "loss": 1.0115, "step": 7360 }, { "epoch": 0.44539795733365567, "grad_norm": 0.8585524627727098, "learning_rate": 9.737351878834957e-06, "loss": 1.0033, "step": 7370 }, { "epoch": 0.44600229648878953, "grad_norm": 0.8635926564121462, "learning_rate": 9.736285453734992e-06, "loss": 1.0095, "step": 7380 }, { "epoch": 0.4466066356439234, "grad_norm": 0.9364062955477616, "learning_rate": 9.735216926663433e-06, "loss": 1.0285, "step": 7390 }, { "epoch": 0.44721097479905725, "grad_norm": 0.5132235945423931, "learning_rate": 9.734146298094497e-06, "loss": 0.9964, "step": 7400 }, { "epoch": 0.4478153139541911, "grad_norm": 0.5411800684996538, "learning_rate": 9.733073568503331e-06, "loss": 1.0235, "step": 7410 }, { "epoch": 0.44841965310932497, "grad_norm": 0.49242600048418367, "learning_rate": 9.731998738366014e-06, "loss": 1.0147, "step": 7420 }, { "epoch": 0.4490239922644588, "grad_norm": 0.5368617448294334, "learning_rate": 9.730921808159552e-06, "loss": 0.9992, "step": 7430 }, { "epoch": 0.4496283314195927, "grad_norm": 0.540233755894723, "learning_rate": 9.729842778361895e-06, "loss": 1.0016, "step": 7440 }, { "epoch": 0.45023267057472655, "grad_norm": 0.4973184814181504, "learning_rate": 9.728761649451917e-06, "loss": 0.9999, "step": 7450 }, { "epoch": 0.4508370097298604, "grad_norm": 0.5040057736208563, "learning_rate": 9.727678421909423e-06, "loss": 1.004, "step": 7460 }, { "epoch": 0.45144134888499426, "grad_norm": 0.5010346109754319, "learning_rate": 9.726593096215154e-06, "loss": 1.017, "step": 7470 }, { "epoch": 0.4520456880401281, "grad_norm": 0.5159877010609631, "learning_rate": 9.725505672850775e-06, "loss": 0.9955, "step": 7480 }, { "epoch": 0.452650027195262, "grad_norm": 0.5359361237044953, "learning_rate": 9.72441615229889e-06, "loss": 0.9808, "step": 7490 }, { "epoch": 0.45325436635039584, "grad_norm": 0.5420479823701472, "learning_rate": 9.723324535043028e-06, "loss": 0.9925, "step": 7500 }, { "epoch": 0.4538587055055297, "grad_norm": 0.5264705408238455, "learning_rate": 9.722230821567652e-06, "loss": 0.9956, "step": 7510 }, { "epoch": 0.45446304466066356, "grad_norm": 0.5461301540893525, "learning_rate": 9.721135012358156e-06, "loss": 1.0064, "step": 7520 }, { "epoch": 0.4550673838157974, "grad_norm": 0.5015682263575848, "learning_rate": 9.720037107900857e-06, "loss": 1.0002, "step": 7530 }, { "epoch": 0.4556717229709313, "grad_norm": 0.5463703322516429, "learning_rate": 9.718937108683012e-06, "loss": 1.0059, "step": 7540 }, { "epoch": 0.45627606212606514, "grad_norm": 0.5346647708927578, "learning_rate": 9.717835015192797e-06, "loss": 1.0136, "step": 7550 }, { "epoch": 0.456880401281199, "grad_norm": 0.5244071387215217, "learning_rate": 9.716730827919327e-06, "loss": 0.9906, "step": 7560 }, { "epoch": 0.45748474043633286, "grad_norm": 0.5090978820687052, "learning_rate": 9.715624547352641e-06, "loss": 1.0018, "step": 7570 }, { "epoch": 0.4580890795914667, "grad_norm": 0.5368095176163875, "learning_rate": 9.714516173983709e-06, "loss": 0.9922, "step": 7580 }, { "epoch": 0.4586934187466006, "grad_norm": 0.4860640203798647, "learning_rate": 9.713405708304427e-06, "loss": 0.9966, "step": 7590 }, { "epoch": 0.45929775790173444, "grad_norm": 0.45062056663754396, "learning_rate": 9.712293150807624e-06, "loss": 1.0077, "step": 7600 }, { "epoch": 0.4599020970568683, "grad_norm": 0.5264066780993698, "learning_rate": 9.711178501987054e-06, "loss": 0.9944, "step": 7610 }, { "epoch": 0.46050643621200216, "grad_norm": 0.49210314433869495, "learning_rate": 9.7100617623374e-06, "loss": 1.0085, "step": 7620 }, { "epoch": 0.461110775367136, "grad_norm": 0.5360137625880347, "learning_rate": 9.708942932354272e-06, "loss": 0.9829, "step": 7630 }, { "epoch": 0.4617151145222699, "grad_norm": 0.5252314800384593, "learning_rate": 9.70782201253421e-06, "loss": 0.9812, "step": 7640 }, { "epoch": 0.46231945367740374, "grad_norm": 0.5540169774022148, "learning_rate": 9.70669900337468e-06, "loss": 0.9827, "step": 7650 }, { "epoch": 0.4629237928325376, "grad_norm": 0.5071988278808582, "learning_rate": 9.705573905374075e-06, "loss": 0.9996, "step": 7660 }, { "epoch": 0.46352813198767145, "grad_norm": 0.5451423592038422, "learning_rate": 9.704446719031714e-06, "loss": 1.0267, "step": 7670 }, { "epoch": 0.46413247114280537, "grad_norm": 0.5267472331193773, "learning_rate": 9.703317444847849e-06, "loss": 0.9933, "step": 7680 }, { "epoch": 0.46473681029793923, "grad_norm": 0.5561525850421628, "learning_rate": 9.702186083323648e-06, "loss": 0.9939, "step": 7690 }, { "epoch": 0.4653411494530731, "grad_norm": 0.6529114062972717, "learning_rate": 9.701052634961213e-06, "loss": 0.9883, "step": 7700 }, { "epoch": 0.46594548860820695, "grad_norm": 0.5941168324045979, "learning_rate": 9.699917100263571e-06, "loss": 1.0023, "step": 7710 }, { "epoch": 0.4665498277633408, "grad_norm": 0.6355639771440227, "learning_rate": 9.698779479734677e-06, "loss": 1.0085, "step": 7720 }, { "epoch": 0.46715416691847467, "grad_norm": 0.6322387058727607, "learning_rate": 9.697639773879404e-06, "loss": 0.9798, "step": 7730 }, { "epoch": 0.4677585060736085, "grad_norm": 0.6335348266753075, "learning_rate": 9.69649798320356e-06, "loss": 0.998, "step": 7740 }, { "epoch": 0.4683628452287424, "grad_norm": 0.690941642264451, "learning_rate": 9.695354108213868e-06, "loss": 0.9968, "step": 7750 }, { "epoch": 0.46896718438387625, "grad_norm": 0.7029981868793153, "learning_rate": 9.694208149417985e-06, "loss": 0.9806, "step": 7760 }, { "epoch": 0.4695715235390101, "grad_norm": 0.6981601027311941, "learning_rate": 9.693060107324493e-06, "loss": 0.9959, "step": 7770 }, { "epoch": 0.47017586269414396, "grad_norm": 0.7274625431921845, "learning_rate": 9.69190998244289e-06, "loss": 1.0054, "step": 7780 }, { "epoch": 0.4707802018492778, "grad_norm": 0.6867733702462608, "learning_rate": 9.690757775283603e-06, "loss": 1.0368, "step": 7790 }, { "epoch": 0.4713845410044117, "grad_norm": 0.6043675336725368, "learning_rate": 9.689603486357986e-06, "loss": 1.0021, "step": 7800 }, { "epoch": 0.47198888015954554, "grad_norm": 0.6157464021134955, "learning_rate": 9.688447116178315e-06, "loss": 1.0249, "step": 7810 }, { "epoch": 0.4725932193146794, "grad_norm": 0.6427327251905133, "learning_rate": 9.687288665257786e-06, "loss": 0.9808, "step": 7820 }, { "epoch": 0.47319755846981326, "grad_norm": 0.6155938108333662, "learning_rate": 9.686128134110525e-06, "loss": 1.0035, "step": 7830 }, { "epoch": 0.4738018976249471, "grad_norm": 0.6131794954190143, "learning_rate": 9.684965523251575e-06, "loss": 1.0213, "step": 7840 }, { "epoch": 0.474406236780081, "grad_norm": 0.7747597542732607, "learning_rate": 9.683800833196907e-06, "loss": 0.9688, "step": 7850 }, { "epoch": 0.47501057593521484, "grad_norm": 0.7555906781074263, "learning_rate": 9.68263406446341e-06, "loss": 0.9866, "step": 7860 }, { "epoch": 0.4756149150903487, "grad_norm": 0.7527735572152627, "learning_rate": 9.6814652175689e-06, "loss": 0.9971, "step": 7870 }, { "epoch": 0.47621925424548256, "grad_norm": 0.7743973471003973, "learning_rate": 9.680294293032112e-06, "loss": 1.0072, "step": 7880 }, { "epoch": 0.4768235934006164, "grad_norm": 0.8108529448189443, "learning_rate": 9.679121291372704e-06, "loss": 0.9885, "step": 7890 }, { "epoch": 0.4774279325557503, "grad_norm": 0.7460995116908645, "learning_rate": 9.677946213111259e-06, "loss": 0.9697, "step": 7900 }, { "epoch": 0.47803227171088414, "grad_norm": 0.80551425380806, "learning_rate": 9.676769058769274e-06, "loss": 0.9863, "step": 7910 }, { "epoch": 0.478636610866018, "grad_norm": 0.7782410932889495, "learning_rate": 9.675589828869177e-06, "loss": 0.9947, "step": 7920 }, { "epoch": 0.47924095002115186, "grad_norm": 0.8106885579056082, "learning_rate": 9.674408523934308e-06, "loss": 1.014, "step": 7930 }, { "epoch": 0.4798452891762857, "grad_norm": 0.7429403181875742, "learning_rate": 9.673225144488934e-06, "loss": 0.9875, "step": 7940 }, { "epoch": 0.4804496283314196, "grad_norm": 1.2268192425228464, "learning_rate": 9.672039691058242e-06, "loss": 0.9937, "step": 7950 }, { "epoch": 0.48105396748655344, "grad_norm": 1.2759892859249429, "learning_rate": 9.670852164168339e-06, "loss": 1.0152, "step": 7960 }, { "epoch": 0.4816583066416873, "grad_norm": 1.1939470704091346, "learning_rate": 9.669662564346246e-06, "loss": 0.9663, "step": 7970 }, { "epoch": 0.48226264579682115, "grad_norm": 1.1525754224949845, "learning_rate": 9.668470892119915e-06, "loss": 1.0216, "step": 7980 }, { "epoch": 0.482866984951955, "grad_norm": 1.1764028603949268, "learning_rate": 9.66727714801821e-06, "loss": 1.0079, "step": 7990 }, { "epoch": 0.4834713241070889, "grad_norm": 1.1125911325303737, "learning_rate": 9.666081332570917e-06, "loss": 1.0183, "step": 8000 }, { "epoch": 0.48407566326222273, "grad_norm": 0.9207023652756564, "learning_rate": 9.66488344630874e-06, "loss": 1.0017, "step": 8010 }, { "epoch": 0.48468000241735665, "grad_norm": 0.9846740362494372, "learning_rate": 9.663683489763305e-06, "loss": 1.0082, "step": 8020 }, { "epoch": 0.4852843415724905, "grad_norm": 1.2877008149909468, "learning_rate": 9.662481463467154e-06, "loss": 0.9866, "step": 8030 }, { "epoch": 0.48588868072762437, "grad_norm": 0.958446530431782, "learning_rate": 9.661277367953747e-06, "loss": 0.9959, "step": 8040 }, { "epoch": 0.4864930198827582, "grad_norm": 2.4676621726281054, "learning_rate": 9.660071203757465e-06, "loss": 0.991, "step": 8050 }, { "epoch": 0.4870973590378921, "grad_norm": 2.7028175885296237, "learning_rate": 9.658862971413606e-06, "loss": 0.9903, "step": 8060 }, { "epoch": 0.48770169819302595, "grad_norm": 2.836053625288518, "learning_rate": 9.657652671458384e-06, "loss": 0.9829, "step": 8070 }, { "epoch": 0.4883060373481598, "grad_norm": 2.5175583173096814, "learning_rate": 9.656440304428934e-06, "loss": 0.9914, "step": 8080 }, { "epoch": 0.48891037650329366, "grad_norm": 2.4148200191591402, "learning_rate": 9.655225870863305e-06, "loss": 0.9925, "step": 8090 }, { "epoch": 0.4895147156584275, "grad_norm": 0.9786589854518857, "learning_rate": 9.654009371300464e-06, "loss": 1.0089, "step": 8100 }, { "epoch": 0.4901190548135614, "grad_norm": 0.9582223184546904, "learning_rate": 9.6527908062803e-06, "loss": 0.9749, "step": 8110 }, { "epoch": 0.49072339396869524, "grad_norm": 0.9828598576137438, "learning_rate": 9.651570176343607e-06, "loss": 0.989, "step": 8120 }, { "epoch": 0.4913277331238291, "grad_norm": 1.0086996495693867, "learning_rate": 9.65034748203211e-06, "loss": 1.0111, "step": 8130 }, { "epoch": 0.49193207227896296, "grad_norm": 0.9089158258226949, "learning_rate": 9.64912272388844e-06, "loss": 0.9944, "step": 8140 }, { "epoch": 0.4925364114340968, "grad_norm": 0.9903678427113559, "learning_rate": 9.647895902456145e-06, "loss": 0.9949, "step": 8150 }, { "epoch": 0.4931407505892307, "grad_norm": 0.9630889224980455, "learning_rate": 9.646667018279693e-06, "loss": 0.9862, "step": 8160 }, { "epoch": 0.49374508974436454, "grad_norm": 0.9336717507085102, "learning_rate": 9.645436071904464e-06, "loss": 1.0281, "step": 8170 }, { "epoch": 0.4943494288994984, "grad_norm": 1.0469956723088596, "learning_rate": 9.644203063876753e-06, "loss": 0.9809, "step": 8180 }, { "epoch": 0.49495376805463226, "grad_norm": 0.9570505244158888, "learning_rate": 9.64296799474377e-06, "loss": 1.0014, "step": 8190 }, { "epoch": 0.4955581072097661, "grad_norm": 1.0258599596488671, "learning_rate": 9.641730865053646e-06, "loss": 1.0164, "step": 8200 }, { "epoch": 0.4961624463649, "grad_norm": 1.035835625069818, "learning_rate": 9.640491675355417e-06, "loss": 1.0077, "step": 8210 }, { "epoch": 0.49676678552003384, "grad_norm": 1.0276053532929534, "learning_rate": 9.63925042619904e-06, "loss": 1.0317, "step": 8220 }, { "epoch": 0.4973711246751677, "grad_norm": 1.1452176363827613, "learning_rate": 9.63800711813538e-06, "loss": 0.9864, "step": 8230 }, { "epoch": 0.49797546383030156, "grad_norm": 1.0470289586674222, "learning_rate": 9.636761751716222e-06, "loss": 0.9873, "step": 8240 }, { "epoch": 0.4985798029854354, "grad_norm": 1.0735003796994644, "learning_rate": 9.63551432749426e-06, "loss": 0.9952, "step": 8250 }, { "epoch": 0.4991841421405693, "grad_norm": 1.1427966104197986, "learning_rate": 9.634264846023106e-06, "loss": 1.003, "step": 8260 }, { "epoch": 0.49978848129570314, "grad_norm": 1.0781624221613808, "learning_rate": 9.63301330785728e-06, "loss": 1.0147, "step": 8270 }, { "epoch": 0.500392820450837, "grad_norm": 1.148437759316674, "learning_rate": 9.631759713552216e-06, "loss": 0.9879, "step": 8280 }, { "epoch": 0.5009971596059709, "grad_norm": 1.1120292408713583, "learning_rate": 9.630504063664261e-06, "loss": 0.9853, "step": 8290 }, { "epoch": 0.5016014987611047, "grad_norm": 1.1402986570324811, "learning_rate": 9.629246358750676e-06, "loss": 0.9987, "step": 8300 }, { "epoch": 0.5022058379162386, "grad_norm": 1.069535183110544, "learning_rate": 9.627986599369634e-06, "loss": 0.982, "step": 8310 }, { "epoch": 0.5028101770713724, "grad_norm": 1.1654086241178052, "learning_rate": 9.626724786080217e-06, "loss": 0.9809, "step": 8320 }, { "epoch": 0.5034145162265063, "grad_norm": 1.176509443510629, "learning_rate": 9.625460919442418e-06, "loss": 0.9989, "step": 8330 }, { "epoch": 0.5040188553816402, "grad_norm": 1.1800644742301534, "learning_rate": 9.624195000017145e-06, "loss": 0.9934, "step": 8340 }, { "epoch": 0.5046231945367741, "grad_norm": 0.9953993274160409, "learning_rate": 9.622927028366216e-06, "loss": 0.9909, "step": 8350 }, { "epoch": 0.5052275336919079, "grad_norm": 1.0370138623117278, "learning_rate": 9.621657005052355e-06, "loss": 1.0092, "step": 8360 }, { "epoch": 0.5058318728470418, "grad_norm": 0.9922144147056596, "learning_rate": 9.620384930639209e-06, "loss": 1.002, "step": 8370 }, { "epoch": 0.5064362120021756, "grad_norm": 0.9612586208716696, "learning_rate": 9.619110805691317e-06, "loss": 0.9982, "step": 8380 }, { "epoch": 0.5070405511573095, "grad_norm": 1.0259940559255982, "learning_rate": 9.617834630774142e-06, "loss": 1.0022, "step": 8390 }, { "epoch": 0.5076448903124433, "grad_norm": 1.0920260843015757, "learning_rate": 9.616556406454054e-06, "loss": 0.9877, "step": 8400 }, { "epoch": 0.5082492294675772, "grad_norm": 1.1198165725467457, "learning_rate": 9.61527613329833e-06, "loss": 0.9941, "step": 8410 }, { "epoch": 0.508853568622711, "grad_norm": 1.025248313794968, "learning_rate": 9.613993811875158e-06, "loss": 0.9794, "step": 8420 }, { "epoch": 0.5094579077778449, "grad_norm": 1.0482919155273442, "learning_rate": 9.612709442753632e-06, "loss": 0.9843, "step": 8430 }, { "epoch": 0.5100622469329787, "grad_norm": 1.1347725822118602, "learning_rate": 9.61142302650376e-06, "loss": 1.0234, "step": 8440 }, { "epoch": 0.5106665860881127, "grad_norm": 1.0473865614970992, "learning_rate": 9.610134563696455e-06, "loss": 0.9939, "step": 8450 }, { "epoch": 0.5112709252432465, "grad_norm": 0.9657584550175458, "learning_rate": 9.608844054903538e-06, "loss": 1.0148, "step": 8460 }, { "epoch": 0.5118752643983804, "grad_norm": 0.9738617538281833, "learning_rate": 9.60755150069774e-06, "loss": 0.9968, "step": 8470 }, { "epoch": 0.5124796035535142, "grad_norm": 1.0308464966985456, "learning_rate": 9.606256901652697e-06, "loss": 1.0207, "step": 8480 }, { "epoch": 0.5130839427086481, "grad_norm": 1.0433317731026512, "learning_rate": 9.604960258342958e-06, "loss": 1.0295, "step": 8490 }, { "epoch": 0.5136882818637819, "grad_norm": 1.1801333006107229, "learning_rate": 9.603661571343971e-06, "loss": 0.9851, "step": 8500 }, { "epoch": 0.5142926210189158, "grad_norm": 1.2498868118643573, "learning_rate": 9.602360841232102e-06, "loss": 1.0056, "step": 8510 }, { "epoch": 0.5148969601740496, "grad_norm": 1.2145390084331917, "learning_rate": 9.601058068584609e-06, "loss": 0.9993, "step": 8520 }, { "epoch": 0.5155012993291835, "grad_norm": 1.2001517566812112, "learning_rate": 9.599753253979669e-06, "loss": 1.0024, "step": 8530 }, { "epoch": 0.5161056384843175, "grad_norm": 1.2086016201074825, "learning_rate": 9.598446397996362e-06, "loss": 0.9751, "step": 8540 }, { "epoch": 0.5167099776394513, "grad_norm": 1.1466449966340053, "learning_rate": 9.597137501214672e-06, "loss": 1.0079, "step": 8550 }, { "epoch": 0.5173143167945852, "grad_norm": 1.2406782631151254, "learning_rate": 9.595826564215488e-06, "loss": 0.9984, "step": 8560 }, { "epoch": 0.517918655949719, "grad_norm": 1.091992293326711, "learning_rate": 9.594513587580608e-06, "loss": 0.9997, "step": 8570 }, { "epoch": 0.5185229951048529, "grad_norm": 1.0945234033362277, "learning_rate": 9.593198571892732e-06, "loss": 0.9817, "step": 8580 }, { "epoch": 0.5191273342599867, "grad_norm": 1.1029070917753434, "learning_rate": 9.591881517735467e-06, "loss": 0.9758, "step": 8590 }, { "epoch": 0.5197316734151206, "grad_norm": 1.2907186213259112, "learning_rate": 9.590562425693325e-06, "loss": 0.9859, "step": 8600 }, { "epoch": 0.5203360125702544, "grad_norm": 1.2193408801578518, "learning_rate": 9.589241296351719e-06, "loss": 1.0061, "step": 8610 }, { "epoch": 0.5209403517253883, "grad_norm": 1.2730445282162828, "learning_rate": 9.587918130296969e-06, "loss": 1.0179, "step": 8620 }, { "epoch": 0.5215446908805221, "grad_norm": 1.2036104458854633, "learning_rate": 9.586592928116301e-06, "loss": 1.0322, "step": 8630 }, { "epoch": 0.522149030035656, "grad_norm": 1.1623555826965213, "learning_rate": 9.585265690397836e-06, "loss": 0.9926, "step": 8640 }, { "epoch": 0.5227533691907899, "grad_norm": 1.55081919981763, "learning_rate": 9.583936417730612e-06, "loss": 1.028, "step": 8650 }, { "epoch": 0.5233577083459238, "grad_norm": 1.4564423182409123, "learning_rate": 9.582605110704557e-06, "loss": 0.9915, "step": 8660 }, { "epoch": 0.5239620475010576, "grad_norm": 1.3833231922488896, "learning_rate": 9.58127176991051e-06, "loss": 0.9999, "step": 8670 }, { "epoch": 0.5245663866561915, "grad_norm": 1.3857524203326659, "learning_rate": 9.579936395940208e-06, "loss": 1.0047, "step": 8680 }, { "epoch": 0.5251707258113253, "grad_norm": 1.3576348780566196, "learning_rate": 9.578598989386293e-06, "loss": 1.008, "step": 8690 }, { "epoch": 0.5257750649664592, "grad_norm": 0.9212364600268664, "learning_rate": 9.577259550842309e-06, "loss": 1.0002, "step": 8700 }, { "epoch": 0.526379404121593, "grad_norm": 0.9009360363816509, "learning_rate": 9.5759180809027e-06, "loss": 1.003, "step": 8710 }, { "epoch": 0.5269837432767269, "grad_norm": 0.8544273543085027, "learning_rate": 9.574574580162815e-06, "loss": 0.9805, "step": 8720 }, { "epoch": 0.5275880824318607, "grad_norm": 0.8639960734005143, "learning_rate": 9.5732290492189e-06, "loss": 0.9892, "step": 8730 }, { "epoch": 0.5281924215869946, "grad_norm": 0.8431306173063707, "learning_rate": 9.571881488668102e-06, "loss": 0.977, "step": 8740 }, { "epoch": 0.5287967607421284, "grad_norm": 0.7528023093340848, "learning_rate": 9.570531899108474e-06, "loss": 0.9754, "step": 8750 }, { "epoch": 0.5294010998972624, "grad_norm": 0.7696537145424289, "learning_rate": 9.569180281138966e-06, "loss": 1.0071, "step": 8760 }, { "epoch": 0.5300054390523962, "grad_norm": 0.7786988594872661, "learning_rate": 9.567826635359427e-06, "loss": 1.0069, "step": 8770 }, { "epoch": 0.5306097782075301, "grad_norm": 0.753188512965207, "learning_rate": 9.566470962370608e-06, "loss": 1.0119, "step": 8780 }, { "epoch": 0.5312141173626639, "grad_norm": 0.7639611278654884, "learning_rate": 9.565113262774159e-06, "loss": 1.0059, "step": 8790 }, { "epoch": 0.5318184565177978, "grad_norm": 0.8278814556047578, "learning_rate": 9.563753537172628e-06, "loss": 0.9716, "step": 8800 }, { "epoch": 0.5324227956729316, "grad_norm": 0.8358293744224784, "learning_rate": 9.562391786169465e-06, "loss": 0.9886, "step": 8810 }, { "epoch": 0.5330271348280655, "grad_norm": 0.9560295768806172, "learning_rate": 9.561028010369017e-06, "loss": 0.9874, "step": 8820 }, { "epoch": 0.5336314739831993, "grad_norm": 0.8336233591725293, "learning_rate": 9.55966221037653e-06, "loss": 0.9948, "step": 8830 }, { "epoch": 0.5342358131383332, "grad_norm": 0.8508461702610761, "learning_rate": 9.558294386798147e-06, "loss": 0.9879, "step": 8840 }, { "epoch": 0.534840152293467, "grad_norm": 0.49984266226145047, "learning_rate": 9.556924540240914e-06, "loss": 0.9858, "step": 8850 }, { "epoch": 0.535444491448601, "grad_norm": 0.5838740900986712, "learning_rate": 9.555552671312772e-06, "loss": 1.004, "step": 8860 }, { "epoch": 0.5360488306037348, "grad_norm": 0.5152019090813889, "learning_rate": 9.554178780622551e-06, "loss": 0.9748, "step": 8870 }, { "epoch": 0.5366531697588687, "grad_norm": 0.5613467896896484, "learning_rate": 9.552802868779993e-06, "loss": 0.9837, "step": 8880 }, { "epoch": 0.5372575089140026, "grad_norm": 0.5311502872971174, "learning_rate": 9.551424936395728e-06, "loss": 0.9977, "step": 8890 }, { "epoch": 0.5378618480691364, "grad_norm": 0.5815211429254281, "learning_rate": 9.550044984081284e-06, "loss": 0.9845, "step": 8900 }, { "epoch": 0.5384661872242703, "grad_norm": 0.6404477430619733, "learning_rate": 9.548663012449088e-06, "loss": 0.9824, "step": 8910 }, { "epoch": 0.5390705263794041, "grad_norm": 0.6049707245085783, "learning_rate": 9.54727902211246e-06, "loss": 0.9771, "step": 8920 }, { "epoch": 0.539674865534538, "grad_norm": 0.5791098194179001, "learning_rate": 9.545893013685618e-06, "loss": 0.9835, "step": 8930 }, { "epoch": 0.5402792046896718, "grad_norm": 0.6043644346467444, "learning_rate": 9.544504987783674e-06, "loss": 0.9808, "step": 8940 }, { "epoch": 0.5408835438448057, "grad_norm": 0.8286167478370752, "learning_rate": 9.543114945022635e-06, "loss": 0.9771, "step": 8950 }, { "epoch": 0.5414878829999396, "grad_norm": 0.8136288452613204, "learning_rate": 9.54172288601941e-06, "loss": 1.0124, "step": 8960 }, { "epoch": 0.5420922221550735, "grad_norm": 0.7478057049812394, "learning_rate": 9.540328811391792e-06, "loss": 0.9943, "step": 8970 }, { "epoch": 0.5426965613102073, "grad_norm": 0.9397982937807772, "learning_rate": 9.538932721758474e-06, "loss": 0.9726, "step": 8980 }, { "epoch": 0.5433009004653412, "grad_norm": 0.821835902086452, "learning_rate": 9.537534617739044e-06, "loss": 0.9777, "step": 8990 }, { "epoch": 0.543905239620475, "grad_norm": 0.5292199167638089, "learning_rate": 9.536134499953984e-06, "loss": 1.0081, "step": 9000 }, { "epoch": 0.5445095787756089, "grad_norm": 0.5368485696924421, "learning_rate": 9.534732369024667e-06, "loss": 0.9989, "step": 9010 }, { "epoch": 0.5451139179307427, "grad_norm": 0.49688490628385246, "learning_rate": 9.533328225573362e-06, "loss": 0.9971, "step": 9020 }, { "epoch": 0.5457182570858766, "grad_norm": 0.49182087177664563, "learning_rate": 9.53192207022323e-06, "loss": 0.9835, "step": 9030 }, { "epoch": 0.5463225962410104, "grad_norm": 0.5086812831154581, "learning_rate": 9.530513903598326e-06, "loss": 0.9941, "step": 9040 }, { "epoch": 0.5469269353961443, "grad_norm": 0.5462714933115483, "learning_rate": 9.529103726323595e-06, "loss": 0.9876, "step": 9050 }, { "epoch": 0.5475312745512781, "grad_norm": 0.5002948002719604, "learning_rate": 9.527691539024877e-06, "loss": 0.9989, "step": 9060 }, { "epoch": 0.5481356137064121, "grad_norm": 0.4942418792498279, "learning_rate": 9.526277342328904e-06, "loss": 0.988, "step": 9070 }, { "epoch": 0.5487399528615459, "grad_norm": 0.5016699025806897, "learning_rate": 9.524861136863297e-06, "loss": 0.9862, "step": 9080 }, { "epoch": 0.5493442920166798, "grad_norm": 0.5215413382791309, "learning_rate": 9.523442923256572e-06, "loss": 0.9786, "step": 9090 }, { "epoch": 0.5499486311718136, "grad_norm": 0.545135195619704, "learning_rate": 9.522022702138134e-06, "loss": 0.9931, "step": 9100 }, { "epoch": 0.5505529703269475, "grad_norm": 0.519092415968085, "learning_rate": 9.52060047413828e-06, "loss": 0.9827, "step": 9110 }, { "epoch": 0.5511573094820813, "grad_norm": 0.53439021590655, "learning_rate": 9.519176239888196e-06, "loss": 0.9972, "step": 9120 }, { "epoch": 0.5517616486372152, "grad_norm": 0.5422170163674099, "learning_rate": 9.51775000001996e-06, "loss": 0.98, "step": 9130 }, { "epoch": 0.552365987792349, "grad_norm": 0.476965125946667, "learning_rate": 9.516321755166542e-06, "loss": 0.9971, "step": 9140 }, { "epoch": 0.5529703269474829, "grad_norm": 0.5081693326491318, "learning_rate": 9.514891505961798e-06, "loss": 0.9891, "step": 9150 }, { "epoch": 0.5535746661026167, "grad_norm": 0.5383871326489696, "learning_rate": 9.513459253040474e-06, "loss": 0.9966, "step": 9160 }, { "epoch": 0.5541790052577507, "grad_norm": 0.5241087983700397, "learning_rate": 9.512024997038207e-06, "loss": 1.0265, "step": 9170 }, { "epoch": 0.5547833444128845, "grad_norm": 0.5360607049669087, "learning_rate": 9.510588738591523e-06, "loss": 0.9853, "step": 9180 }, { "epoch": 0.5553876835680184, "grad_norm": 0.4941857075784585, "learning_rate": 9.509150478337834e-06, "loss": 0.9896, "step": 9190 }, { "epoch": 0.5559920227231522, "grad_norm": 0.5183668577725419, "learning_rate": 9.507710216915444e-06, "loss": 0.9833, "step": 9200 }, { "epoch": 0.5565963618782861, "grad_norm": 0.5465537464318688, "learning_rate": 9.506267954963543e-06, "loss": 0.9846, "step": 9210 }, { "epoch": 0.55720070103342, "grad_norm": 0.5055592133325096, "learning_rate": 9.50482369312221e-06, "loss": 0.9812, "step": 9220 }, { "epoch": 0.5578050401885538, "grad_norm": 0.5109013938650148, "learning_rate": 9.50337743203241e-06, "loss": 1.0031, "step": 9230 }, { "epoch": 0.5584093793436877, "grad_norm": 0.4979015076590743, "learning_rate": 9.501929172335996e-06, "loss": 0.9884, "step": 9240 }, { "epoch": 0.5590137184988215, "grad_norm": 0.5262878650739787, "learning_rate": 9.500478914675709e-06, "loss": 1.0129, "step": 9250 }, { "epoch": 0.5596180576539554, "grad_norm": 0.5202377604385394, "learning_rate": 9.499026659695176e-06, "loss": 0.971, "step": 9260 }, { "epoch": 0.5602223968090893, "grad_norm": 0.5268814147981004, "learning_rate": 9.497572408038909e-06, "loss": 0.9883, "step": 9270 }, { "epoch": 0.5608267359642232, "grad_norm": 0.5213577286801888, "learning_rate": 9.496116160352308e-06, "loss": 0.9834, "step": 9280 }, { "epoch": 0.561431075119357, "grad_norm": 0.5342746155854908, "learning_rate": 9.494657917281658e-06, "loss": 0.9699, "step": 9290 }, { "epoch": 0.5620354142744909, "grad_norm": 0.6176039596169591, "learning_rate": 9.49319767947413e-06, "loss": 0.9913, "step": 9300 }, { "epoch": 0.5626397534296247, "grad_norm": 0.6478503366597994, "learning_rate": 9.491735447577781e-06, "loss": 0.9913, "step": 9310 }, { "epoch": 0.5632440925847586, "grad_norm": 0.5966945330201456, "learning_rate": 9.490271222241552e-06, "loss": 0.9715, "step": 9320 }, { "epoch": 0.5638484317398924, "grad_norm": 0.5978389545925465, "learning_rate": 9.488805004115267e-06, "loss": 1.0289, "step": 9330 }, { "epoch": 0.5644527708950263, "grad_norm": 0.6286153369876969, "learning_rate": 9.487336793849636e-06, "loss": 0.9878, "step": 9340 }, { "epoch": 0.5650571100501601, "grad_norm": 0.6447773201389241, "learning_rate": 9.485866592096254e-06, "loss": 0.9879, "step": 9350 }, { "epoch": 0.565661449205294, "grad_norm": 0.6832036844551603, "learning_rate": 9.4843943995076e-06, "loss": 0.9977, "step": 9360 }, { "epoch": 0.5662657883604278, "grad_norm": 0.6478441787196813, "learning_rate": 9.482920216737035e-06, "loss": 0.984, "step": 9370 }, { "epoch": 0.5668701275155618, "grad_norm": 0.661678029346523, "learning_rate": 9.481444044438803e-06, "loss": 1.0017, "step": 9380 }, { "epoch": 0.5674744666706956, "grad_norm": 0.6314072896741001, "learning_rate": 9.479965883268034e-06, "loss": 0.9679, "step": 9390 }, { "epoch": 0.5680788058258295, "grad_norm": 0.625023868061403, "learning_rate": 9.478485733880736e-06, "loss": 0.9934, "step": 9400 }, { "epoch": 0.5686831449809633, "grad_norm": 0.6783188143267269, "learning_rate": 9.477003596933802e-06, "loss": 0.9854, "step": 9410 }, { "epoch": 0.5692874841360972, "grad_norm": 0.6059790817111019, "learning_rate": 9.475519473085007e-06, "loss": 1.0014, "step": 9420 }, { "epoch": 0.569891823291231, "grad_norm": 0.6150126760865275, "learning_rate": 9.47403336299301e-06, "loss": 0.9844, "step": 9430 }, { "epoch": 0.5704961624463649, "grad_norm": 0.5849261192824202, "learning_rate": 9.472545267317348e-06, "loss": 0.9821, "step": 9440 }, { "epoch": 0.5711005016014987, "grad_norm": 0.7942566989378923, "learning_rate": 9.471055186718439e-06, "loss": 0.9714, "step": 9450 }, { "epoch": 0.5717048407566326, "grad_norm": 0.7354328013281911, "learning_rate": 9.469563121857584e-06, "loss": 1.0034, "step": 9460 }, { "epoch": 0.5723091799117664, "grad_norm": 0.8356869141980198, "learning_rate": 9.468069073396964e-06, "loss": 0.9974, "step": 9470 }, { "epoch": 0.5729135190669004, "grad_norm": 0.7779732505425232, "learning_rate": 9.46657304199964e-06, "loss": 0.9797, "step": 9480 }, { "epoch": 0.5735178582220342, "grad_norm": 0.7618109936784662, "learning_rate": 9.465075028329556e-06, "loss": 0.9986, "step": 9490 }, { "epoch": 0.5741221973771681, "grad_norm": 0.8120984017758941, "learning_rate": 9.463575033051529e-06, "loss": 0.9714, "step": 9500 }, { "epoch": 0.5747265365323019, "grad_norm": 0.747960001563099, "learning_rate": 9.462073056831262e-06, "loss": 0.9705, "step": 9510 }, { "epoch": 0.5753308756874358, "grad_norm": 0.7586706353112267, "learning_rate": 9.460569100335333e-06, "loss": 1.005, "step": 9520 }, { "epoch": 0.5759352148425696, "grad_norm": 0.7893540322654525, "learning_rate": 9.4590631642312e-06, "loss": 0.9882, "step": 9530 }, { "epoch": 0.5765395539977035, "grad_norm": 0.7753500223338584, "learning_rate": 9.457555249187204e-06, "loss": 0.9868, "step": 9540 }, { "epoch": 0.5771438931528373, "grad_norm": 1.3579732998460725, "learning_rate": 9.456045355872558e-06, "loss": 1.0081, "step": 9550 }, { "epoch": 0.5777482323079712, "grad_norm": 1.3028543465646156, "learning_rate": 9.454533484957355e-06, "loss": 0.986, "step": 9560 }, { "epoch": 0.5783525714631051, "grad_norm": 1.2204451936756202, "learning_rate": 9.453019637112563e-06, "loss": 0.9885, "step": 9570 }, { "epoch": 0.578956910618239, "grad_norm": 1.2688451520885813, "learning_rate": 9.451503813010037e-06, "loss": 0.9742, "step": 9580 }, { "epoch": 0.5795612497733729, "grad_norm": 1.2332251712601991, "learning_rate": 9.449986013322494e-06, "loss": 0.9972, "step": 9590 }, { "epoch": 0.5801655889285067, "grad_norm": 0.9185071923501728, "learning_rate": 9.448466238723544e-06, "loss": 0.9997, "step": 9600 }, { "epoch": 0.5807699280836406, "grad_norm": 1.603028861506127, "learning_rate": 9.446944489887661e-06, "loss": 0.9892, "step": 9610 }, { "epoch": 0.5813742672387744, "grad_norm": 1.0395166278031611, "learning_rate": 9.445420767490202e-06, "loss": 0.9728, "step": 9620 }, { "epoch": 0.5819786063939083, "grad_norm": 0.9347629638545152, "learning_rate": 9.443895072207395e-06, "loss": 0.9502, "step": 9630 }, { "epoch": 0.5825829455490421, "grad_norm": 0.9582723129093502, "learning_rate": 9.442367404716346e-06, "loss": 1.0183, "step": 9640 }, { "epoch": 0.583187284704176, "grad_norm": 2.713079880615707, "learning_rate": 9.440837765695039e-06, "loss": 0.9794, "step": 9650 }, { "epoch": 0.5837916238593098, "grad_norm": 2.4885669667910792, "learning_rate": 9.43930615582233e-06, "loss": 0.9903, "step": 9660 }, { "epoch": 0.5843959630144437, "grad_norm": 2.551078314702416, "learning_rate": 9.43777257577795e-06, "loss": 1.0104, "step": 9670 }, { "epoch": 0.5850003021695775, "grad_norm": 2.4697224232030295, "learning_rate": 9.436237026242504e-06, "loss": 0.9931, "step": 9680 }, { "epoch": 0.5856046413247115, "grad_norm": 3.0647047995108356, "learning_rate": 9.434699507897471e-06, "loss": 1.0124, "step": 9690 }, { "epoch": 0.5862089804798453, "grad_norm": 1.0043624899734567, "learning_rate": 9.433160021425206e-06, "loss": 0.9837, "step": 9700 }, { "epoch": 0.5868133196349792, "grad_norm": 1.0118191531128673, "learning_rate": 9.431618567508933e-06, "loss": 0.9691, "step": 9710 }, { "epoch": 0.587417658790113, "grad_norm": 0.9728501550311862, "learning_rate": 9.430075146832753e-06, "loss": 0.9932, "step": 9720 }, { "epoch": 0.5880219979452469, "grad_norm": 0.9908305095858703, "learning_rate": 9.42852976008164e-06, "loss": 0.9774, "step": 9730 }, { "epoch": 0.5886263371003807, "grad_norm": 0.9506254841230665, "learning_rate": 9.426982407941439e-06, "loss": 0.9982, "step": 9740 }, { "epoch": 0.5892306762555146, "grad_norm": 1.0754394055990935, "learning_rate": 9.425433091098866e-06, "loss": 0.963, "step": 9750 }, { "epoch": 0.5898350154106484, "grad_norm": 1.0204283704917645, "learning_rate": 9.423881810241512e-06, "loss": 1.0157, "step": 9760 }, { "epoch": 0.5904393545657823, "grad_norm": 0.9552442835757272, "learning_rate": 9.422328566057837e-06, "loss": 1.0116, "step": 9770 }, { "epoch": 0.5910436937209161, "grad_norm": 1.0085077374117348, "learning_rate": 9.420773359237174e-06, "loss": 1.0117, "step": 9780 }, { "epoch": 0.5916480328760501, "grad_norm": 1.0391860248819054, "learning_rate": 9.419216190469727e-06, "loss": 0.9777, "step": 9790 }, { "epoch": 0.5922523720311839, "grad_norm": 1.0109958355782462, "learning_rate": 9.417657060446569e-06, "loss": 0.9784, "step": 9800 }, { "epoch": 0.5928567111863178, "grad_norm": 1.1342523088500804, "learning_rate": 9.416095969859644e-06, "loss": 1.0002, "step": 9810 }, { "epoch": 0.5934610503414516, "grad_norm": 1.2380678049192986, "learning_rate": 9.41453291940177e-06, "loss": 0.9912, "step": 9820 }, { "epoch": 0.5940653894965855, "grad_norm": 1.0149853034020109, "learning_rate": 9.412967909766629e-06, "loss": 0.9745, "step": 9830 }, { "epoch": 0.5946697286517193, "grad_norm": 1.049401113645641, "learning_rate": 9.411400941648773e-06, "loss": 1.0254, "step": 9840 }, { "epoch": 0.5952740678068532, "grad_norm": 1.1294305783283562, "learning_rate": 9.40983201574363e-06, "loss": 0.9863, "step": 9850 }, { "epoch": 0.595878406961987, "grad_norm": 1.1648909831717336, "learning_rate": 9.408261132747487e-06, "loss": 0.9868, "step": 9860 }, { "epoch": 0.5964827461171209, "grad_norm": 1.0159094764294985, "learning_rate": 9.40668829335751e-06, "loss": 0.9692, "step": 9870 }, { "epoch": 0.5970870852722547, "grad_norm": 1.066718729195064, "learning_rate": 9.405113498271722e-06, "loss": 1.006, "step": 9880 }, { "epoch": 0.5976914244273887, "grad_norm": 1.2297810928232251, "learning_rate": 9.403536748189024e-06, "loss": 1.0368, "step": 9890 }, { "epoch": 0.5982957635825226, "grad_norm": 1.0228162504015372, "learning_rate": 9.401958043809177e-06, "loss": 0.9941, "step": 9900 }, { "epoch": 0.5989001027376564, "grad_norm": 1.098669404992271, "learning_rate": 9.400377385832816e-06, "loss": 0.9963, "step": 9910 }, { "epoch": 0.5995044418927903, "grad_norm": 1.1280125593355481, "learning_rate": 9.398794774961439e-06, "loss": 1.0084, "step": 9920 }, { "epoch": 0.6001087810479241, "grad_norm": 1.1399829912066743, "learning_rate": 9.397210211897409e-06, "loss": 0.9923, "step": 9930 }, { "epoch": 0.600713120203058, "grad_norm": 1.1029918629585482, "learning_rate": 9.39562369734396e-06, "loss": 1.0308, "step": 9940 }, { "epoch": 0.6013174593581918, "grad_norm": 1.0633303796745397, "learning_rate": 9.39403523200519e-06, "loss": 0.9832, "step": 9950 }, { "epoch": 0.6019217985133257, "grad_norm": 1.0852106183377646, "learning_rate": 9.392444816586062e-06, "loss": 0.9924, "step": 9960 }, { "epoch": 0.6025261376684595, "grad_norm": 1.0236436268379463, "learning_rate": 9.390852451792404e-06, "loss": 1.0122, "step": 9970 }, { "epoch": 0.6031304768235934, "grad_norm": 0.9879030204382597, "learning_rate": 9.389258138330914e-06, "loss": 1.0021, "step": 9980 }, { "epoch": 0.6037348159787272, "grad_norm": 1.0131420022520858, "learning_rate": 9.387661876909146e-06, "loss": 0.9809, "step": 9990 }, { "epoch": 0.6043391551338612, "grad_norm": 1.101723066530562, "learning_rate": 9.386063668235527e-06, "loss": 0.9892, "step": 10000 }, { "epoch": 0.604943494288995, "grad_norm": 1.0696984489293015, "learning_rate": 9.384463513019343e-06, "loss": 0.9983, "step": 10010 }, { "epoch": 0.6055478334441289, "grad_norm": 1.1130438157603528, "learning_rate": 9.382861411970747e-06, "loss": 0.9795, "step": 10020 }, { "epoch": 0.6061521725992627, "grad_norm": 1.2325790126889524, "learning_rate": 9.381257365800752e-06, "loss": 1.0223, "step": 10030 }, { "epoch": 0.6067565117543966, "grad_norm": 1.0193174695653036, "learning_rate": 9.379651375221239e-06, "loss": 0.9832, "step": 10040 }, { "epoch": 0.6073608509095304, "grad_norm": 1.015611141303665, "learning_rate": 9.378043440944949e-06, "loss": 1.0039, "step": 10050 }, { "epoch": 0.6079651900646643, "grad_norm": 1.0920268320292426, "learning_rate": 9.376433563685484e-06, "loss": 0.9977, "step": 10060 }, { "epoch": 0.6085695292197981, "grad_norm": 1.0467741656123792, "learning_rate": 9.374821744157312e-06, "loss": 0.99, "step": 10070 }, { "epoch": 0.609173868374932, "grad_norm": 0.9334623498717842, "learning_rate": 9.37320798307576e-06, "loss": 1.0022, "step": 10080 }, { "epoch": 0.6097782075300658, "grad_norm": 0.9579103125574115, "learning_rate": 9.37159228115702e-06, "loss": 0.9957, "step": 10090 }, { "epoch": 0.6103825466851998, "grad_norm": 1.145755009703452, "learning_rate": 9.369974639118142e-06, "loss": 1.0031, "step": 10100 }, { "epoch": 0.6109868858403336, "grad_norm": 1.2177834882240255, "learning_rate": 9.368355057677038e-06, "loss": 0.9924, "step": 10110 }, { "epoch": 0.6115912249954675, "grad_norm": 1.1834276275165139, "learning_rate": 9.366733537552482e-06, "loss": 0.9833, "step": 10120 }, { "epoch": 0.6121955641506013, "grad_norm": 1.160520941982619, "learning_rate": 9.365110079464106e-06, "loss": 0.9738, "step": 10130 }, { "epoch": 0.6127999033057352, "grad_norm": 1.1972571768669398, "learning_rate": 9.363484684132405e-06, "loss": 1.0049, "step": 10140 }, { "epoch": 0.613404242460869, "grad_norm": 1.1528255829411618, "learning_rate": 9.36185735227873e-06, "loss": 0.982, "step": 10150 }, { "epoch": 0.6140085816160029, "grad_norm": 1.148665291820779, "learning_rate": 9.360228084625295e-06, "loss": 0.9841, "step": 10160 }, { "epoch": 0.6146129207711367, "grad_norm": 1.0587130470191386, "learning_rate": 9.358596881895175e-06, "loss": 0.9881, "step": 10170 }, { "epoch": 0.6152172599262706, "grad_norm": 1.0792518174482242, "learning_rate": 9.356963744812294e-06, "loss": 0.9851, "step": 10180 }, { "epoch": 0.6158215990814044, "grad_norm": 1.1221028189216802, "learning_rate": 9.355328674101445e-06, "loss": 1.0114, "step": 10190 }, { "epoch": 0.6164259382365384, "grad_norm": 1.2120226652942028, "learning_rate": 9.353691670488276e-06, "loss": 0.998, "step": 10200 }, { "epoch": 0.6170302773916722, "grad_norm": 1.209938198372769, "learning_rate": 9.35205273469929e-06, "loss": 1.0204, "step": 10210 }, { "epoch": 0.6176346165468061, "grad_norm": 1.2520259190155245, "learning_rate": 9.35041186746185e-06, "loss": 0.9981, "step": 10220 }, { "epoch": 0.6182389557019399, "grad_norm": 1.2066662035104347, "learning_rate": 9.348769069504177e-06, "loss": 0.9867, "step": 10230 }, { "epoch": 0.6188432948570738, "grad_norm": 1.2675288487407668, "learning_rate": 9.347124341555346e-06, "loss": 1.0054, "step": 10240 }, { "epoch": 0.6194476340122077, "grad_norm": 1.474204975125771, "learning_rate": 9.34547768434529e-06, "loss": 0.9995, "step": 10250 }, { "epoch": 0.6200519731673415, "grad_norm": 1.4503827511507366, "learning_rate": 9.343829098604799e-06, "loss": 1.01, "step": 10260 }, { "epoch": 0.6206563123224754, "grad_norm": 1.4858425013892371, "learning_rate": 9.342178585065518e-06, "loss": 0.9885, "step": 10270 }, { "epoch": 0.6212606514776092, "grad_norm": 1.3819249753680594, "learning_rate": 9.340526144459949e-06, "loss": 0.9985, "step": 10280 }, { "epoch": 0.6218649906327431, "grad_norm": 1.368574954106076, "learning_rate": 9.338871777521445e-06, "loss": 0.9801, "step": 10290 }, { "epoch": 0.622469329787877, "grad_norm": 0.8926510256484639, "learning_rate": 9.337215484984218e-06, "loss": 0.9876, "step": 10300 }, { "epoch": 0.6230736689430109, "grad_norm": 0.8474308596635595, "learning_rate": 9.335557267583336e-06, "loss": 1.0041, "step": 10310 }, { "epoch": 0.6236780080981447, "grad_norm": 0.8574273052321105, "learning_rate": 9.333897126054715e-06, "loss": 0.9984, "step": 10320 }, { "epoch": 0.6242823472532786, "grad_norm": 0.8053143689453196, "learning_rate": 9.33223506113513e-06, "loss": 0.9763, "step": 10330 }, { "epoch": 0.6248866864084124, "grad_norm": 0.8382296713322909, "learning_rate": 9.330571073562212e-06, "loss": 0.9894, "step": 10340 }, { "epoch": 0.6254910255635463, "grad_norm": 0.754298344905235, "learning_rate": 9.328905164074436e-06, "loss": 0.9663, "step": 10350 }, { "epoch": 0.6260953647186801, "grad_norm": 0.7560931894107045, "learning_rate": 9.327237333411137e-06, "loss": 0.9704, "step": 10360 }, { "epoch": 0.626699703873814, "grad_norm": 0.7163717942948837, "learning_rate": 9.325567582312502e-06, "loss": 0.9649, "step": 10370 }, { "epoch": 0.6273040430289478, "grad_norm": 0.7397543008565332, "learning_rate": 9.32389591151957e-06, "loss": 1.0193, "step": 10380 }, { "epoch": 0.6279083821840817, "grad_norm": 0.6857218389504587, "learning_rate": 9.32222232177423e-06, "loss": 0.9848, "step": 10390 }, { "epoch": 0.6285127213392155, "grad_norm": 0.8279028689989585, "learning_rate": 9.320546813819224e-06, "loss": 0.9858, "step": 10400 }, { "epoch": 0.6291170604943495, "grad_norm": 0.811835169761878, "learning_rate": 9.318869388398145e-06, "loss": 0.9726, "step": 10410 }, { "epoch": 0.6297213996494833, "grad_norm": 0.7476098064839439, "learning_rate": 9.317190046255441e-06, "loss": 0.9839, "step": 10420 }, { "epoch": 0.6303257388046172, "grad_norm": 0.8211948668569802, "learning_rate": 9.315508788136403e-06, "loss": 1.0099, "step": 10430 }, { "epoch": 0.630930077959751, "grad_norm": 0.8450834200931795, "learning_rate": 9.313825614787178e-06, "loss": 1.0062, "step": 10440 }, { "epoch": 0.6315344171148849, "grad_norm": 0.5310246813456097, "learning_rate": 9.312140526954758e-06, "loss": 0.9987, "step": 10450 }, { "epoch": 0.6321387562700187, "grad_norm": 0.5425339212418681, "learning_rate": 9.310453525386991e-06, "loss": 1.0092, "step": 10460 }, { "epoch": 0.6327430954251526, "grad_norm": 0.5747138692839948, "learning_rate": 9.308764610832573e-06, "loss": 1.0095, "step": 10470 }, { "epoch": 0.6333474345802864, "grad_norm": 0.5901590026224683, "learning_rate": 9.307073784041042e-06, "loss": 0.9802, "step": 10480 }, { "epoch": 0.6339517737354203, "grad_norm": 0.5281694225065243, "learning_rate": 9.305381045762793e-06, "loss": 0.9591, "step": 10490 }, { "epoch": 0.6345561128905541, "grad_norm": 0.5989764056690364, "learning_rate": 9.303686396749066e-06, "loss": 0.9845, "step": 10500 }, { "epoch": 0.635160452045688, "grad_norm": 0.6003949813112556, "learning_rate": 9.301989837751948e-06, "loss": 1.0148, "step": 10510 }, { "epoch": 0.6357647912008219, "grad_norm": 0.5855380672852845, "learning_rate": 9.300291369524376e-06, "loss": 0.9782, "step": 10520 }, { "epoch": 0.6363691303559558, "grad_norm": 0.573675503149501, "learning_rate": 9.298590992820133e-06, "loss": 0.9998, "step": 10530 }, { "epoch": 0.6369734695110896, "grad_norm": 0.5563569082183251, "learning_rate": 9.29688870839385e-06, "loss": 0.9856, "step": 10540 }, { "epoch": 0.6375778086662235, "grad_norm": 0.8318240048317055, "learning_rate": 9.295184517001001e-06, "loss": 0.9911, "step": 10550 }, { "epoch": 0.6381821478213573, "grad_norm": 0.8333033166195671, "learning_rate": 9.29347841939791e-06, "loss": 0.9498, "step": 10560 }, { "epoch": 0.6387864869764912, "grad_norm": 0.8459279462427219, "learning_rate": 9.291770416341748e-06, "loss": 0.9955, "step": 10570 }, { "epoch": 0.6393908261316251, "grad_norm": 0.924215356492686, "learning_rate": 9.290060508590526e-06, "loss": 1.0037, "step": 10580 }, { "epoch": 0.6399951652867589, "grad_norm": 0.748350916299008, "learning_rate": 9.288348696903108e-06, "loss": 0.9825, "step": 10590 }, { "epoch": 0.6405995044418928, "grad_norm": 0.4819836884833534, "learning_rate": 9.286634982039198e-06, "loss": 0.9847, "step": 10600 }, { "epoch": 0.6412038435970266, "grad_norm": 0.5208292991015511, "learning_rate": 9.284919364759343e-06, "loss": 0.9964, "step": 10610 }, { "epoch": 0.6418081827521606, "grad_norm": 0.532887856663687, "learning_rate": 9.28320184582494e-06, "loss": 0.9974, "step": 10620 }, { "epoch": 0.6424125219072944, "grad_norm": 0.5140749365844139, "learning_rate": 9.281482425998228e-06, "loss": 0.9812, "step": 10630 }, { "epoch": 0.6430168610624283, "grad_norm": 0.49709262973391977, "learning_rate": 9.279761106042283e-06, "loss": 0.9825, "step": 10640 }, { "epoch": 0.6436212002175621, "grad_norm": 0.5071574991468321, "learning_rate": 9.278037886721031e-06, "loss": 0.9666, "step": 10650 }, { "epoch": 0.644225539372696, "grad_norm": 0.4620004023568384, "learning_rate": 9.276312768799244e-06, "loss": 1.004, "step": 10660 }, { "epoch": 0.6448298785278298, "grad_norm": 0.5331264112902342, "learning_rate": 9.274585753042529e-06, "loss": 0.9988, "step": 10670 }, { "epoch": 0.6454342176829637, "grad_norm": 0.5074318556524561, "learning_rate": 9.27285684021734e-06, "loss": 0.9958, "step": 10680 }, { "epoch": 0.6460385568380975, "grad_norm": 0.479338544974117, "learning_rate": 9.271126031090969e-06, "loss": 0.9722, "step": 10690 }, { "epoch": 0.6466428959932314, "grad_norm": 0.4770875357859588, "learning_rate": 9.269393326431555e-06, "loss": 0.976, "step": 10700 }, { "epoch": 0.6472472351483652, "grad_norm": 0.5542886093780625, "learning_rate": 9.267658727008075e-06, "loss": 0.99, "step": 10710 }, { "epoch": 0.6478515743034992, "grad_norm": 0.47188132928993437, "learning_rate": 9.265922233590346e-06, "loss": 0.9772, "step": 10720 }, { "epoch": 0.648455913458633, "grad_norm": 0.505004625991562, "learning_rate": 9.264183846949025e-06, "loss": 1.0104, "step": 10730 }, { "epoch": 0.6490602526137669, "grad_norm": 0.5144095318288245, "learning_rate": 9.262443567855615e-06, "loss": 0.9922, "step": 10740 }, { "epoch": 0.6496645917689007, "grad_norm": 0.5693522566561156, "learning_rate": 9.260701397082453e-06, "loss": 1.0062, "step": 10750 }, { "epoch": 0.6502689309240346, "grad_norm": 0.4663028022486189, "learning_rate": 9.258957335402716e-06, "loss": 0.9705, "step": 10760 }, { "epoch": 0.6508732700791684, "grad_norm": 0.475090514579552, "learning_rate": 9.257211383590422e-06, "loss": 0.9809, "step": 10770 }, { "epoch": 0.6514776092343023, "grad_norm": 0.5183302331900691, "learning_rate": 9.255463542420428e-06, "loss": 0.9714, "step": 10780 }, { "epoch": 0.6520819483894361, "grad_norm": 0.4981893480673195, "learning_rate": 9.253713812668432e-06, "loss": 0.978, "step": 10790 }, { "epoch": 0.65268628754457, "grad_norm": 0.5229191206206769, "learning_rate": 9.251962195110961e-06, "loss": 0.9806, "step": 10800 }, { "epoch": 0.6532906266997038, "grad_norm": 0.4999239946554239, "learning_rate": 9.250208690525392e-06, "loss": 1.0027, "step": 10810 }, { "epoch": 0.6538949658548378, "grad_norm": 0.4848409840368509, "learning_rate": 9.24845329968993e-06, "loss": 0.9724, "step": 10820 }, { "epoch": 0.6544993050099716, "grad_norm": 0.5023465648967292, "learning_rate": 9.246696023383618e-06, "loss": 1.0, "step": 10830 }, { "epoch": 0.6551036441651055, "grad_norm": 0.5589989100230456, "learning_rate": 9.244936862386343e-06, "loss": 0.9863, "step": 10840 }, { "epoch": 0.6557079833202393, "grad_norm": 0.5136965662227517, "learning_rate": 9.243175817478822e-06, "loss": 0.9911, "step": 10850 }, { "epoch": 0.6563123224753732, "grad_norm": 0.5160872271921505, "learning_rate": 9.241412889442608e-06, "loss": 0.9873, "step": 10860 }, { "epoch": 0.656916661630507, "grad_norm": 0.5141559394274802, "learning_rate": 9.239648079060095e-06, "loss": 0.9845, "step": 10870 }, { "epoch": 0.6575210007856409, "grad_norm": 0.4933269971227344, "learning_rate": 9.237881387114506e-06, "loss": 0.9721, "step": 10880 }, { "epoch": 0.6581253399407747, "grad_norm": 0.5753448886446458, "learning_rate": 9.2361128143899e-06, "loss": 0.9709, "step": 10890 }, { "epoch": 0.6587296790959086, "grad_norm": 0.5705198321046384, "learning_rate": 9.23434236167118e-06, "loss": 0.9922, "step": 10900 }, { "epoch": 0.6593340182510424, "grad_norm": 0.595507704789605, "learning_rate": 9.232570029744068e-06, "loss": 0.9783, "step": 10910 }, { "epoch": 0.6599383574061763, "grad_norm": 0.5686412147669858, "learning_rate": 9.230795819395132e-06, "loss": 1.0247, "step": 10920 }, { "epoch": 0.6605426965613103, "grad_norm": 0.6141000987688027, "learning_rate": 9.229019731411769e-06, "loss": 1.0003, "step": 10930 }, { "epoch": 0.6611470357164441, "grad_norm": 0.5601687919154574, "learning_rate": 9.22724176658221e-06, "loss": 1.0045, "step": 10940 }, { "epoch": 0.661751374871578, "grad_norm": 0.6536199635611087, "learning_rate": 9.225461925695518e-06, "loss": 0.987, "step": 10950 }, { "epoch": 0.6623557140267118, "grad_norm": 0.7237551481360515, "learning_rate": 9.22368020954159e-06, "loss": 1.0059, "step": 10960 }, { "epoch": 0.6629600531818457, "grad_norm": 0.6426615334396304, "learning_rate": 9.221896618911155e-06, "loss": 0.9852, "step": 10970 }, { "epoch": 0.6635643923369795, "grad_norm": 0.6895427524504979, "learning_rate": 9.220111154595773e-06, "loss": 0.9708, "step": 10980 }, { "epoch": 0.6641687314921134, "grad_norm": 0.6603713954626483, "learning_rate": 9.218323817387835e-06, "loss": 0.9681, "step": 10990 }, { "epoch": 0.6647730706472472, "grad_norm": 0.6131720081140489, "learning_rate": 9.216534608080567e-06, "loss": 0.9863, "step": 11000 }, { "epoch": 0.6653774098023811, "grad_norm": 0.6334715648239038, "learning_rate": 9.214743527468022e-06, "loss": 0.9938, "step": 11010 }, { "epoch": 0.6659817489575149, "grad_norm": 0.6093963682364176, "learning_rate": 9.212950576345083e-06, "loss": 0.9778, "step": 11020 }, { "epoch": 0.6665860881126489, "grad_norm": 0.6036885481806127, "learning_rate": 9.211155755507469e-06, "loss": 1.001, "step": 11030 }, { "epoch": 0.6671904272677827, "grad_norm": 0.6072954521914475, "learning_rate": 9.20935906575172e-06, "loss": 1.0085, "step": 11040 }, { "epoch": 0.6677947664229166, "grad_norm": 0.7483391104250456, "learning_rate": 9.207560507875211e-06, "loss": 0.9834, "step": 11050 }, { "epoch": 0.6683991055780504, "grad_norm": 0.8143711848042002, "learning_rate": 9.205760082676146e-06, "loss": 0.9707, "step": 11060 }, { "epoch": 0.6690034447331843, "grad_norm": 0.8024408970263641, "learning_rate": 9.203957790953557e-06, "loss": 0.9966, "step": 11070 }, { "epoch": 0.6696077838883181, "grad_norm": 0.7489943244195018, "learning_rate": 9.202153633507305e-06, "loss": 0.9829, "step": 11080 }, { "epoch": 0.670212123043452, "grad_norm": 0.7732069777318934, "learning_rate": 9.200347611138076e-06, "loss": 0.9986, "step": 11090 }, { "epoch": 0.6708164621985858, "grad_norm": 0.7656310561319766, "learning_rate": 9.198539724647386e-06, "loss": 0.9949, "step": 11100 }, { "epoch": 0.6714208013537197, "grad_norm": 0.7255754082020898, "learning_rate": 9.19672997483758e-06, "loss": 1.008, "step": 11110 }, { "epoch": 0.6720251405088535, "grad_norm": 0.7745252880711284, "learning_rate": 9.194918362511824e-06, "loss": 0.9626, "step": 11120 }, { "epoch": 0.6726294796639875, "grad_norm": 0.7779136958327447, "learning_rate": 9.19310488847412e-06, "loss": 0.9878, "step": 11130 }, { "epoch": 0.6732338188191213, "grad_norm": 0.7194593861264345, "learning_rate": 9.19128955352929e-06, "loss": 1.0045, "step": 11140 }, { "epoch": 0.6738381579742552, "grad_norm": 1.1425815135973652, "learning_rate": 9.189472358482979e-06, "loss": 0.9893, "step": 11150 }, { "epoch": 0.674442497129389, "grad_norm": 1.2757276541801958, "learning_rate": 9.187653304141664e-06, "loss": 0.9717, "step": 11160 }, { "epoch": 0.6750468362845229, "grad_norm": 1.2396726750203204, "learning_rate": 9.185832391312644e-06, "loss": 0.9782, "step": 11170 }, { "epoch": 0.6756511754396567, "grad_norm": 1.2196346393467958, "learning_rate": 9.184009620804042e-06, "loss": 0.9921, "step": 11180 }, { "epoch": 0.6762555145947906, "grad_norm": 1.2102711188485422, "learning_rate": 9.182184993424809e-06, "loss": 1.0139, "step": 11190 }, { "epoch": 0.6768598537499244, "grad_norm": 1.0518544448686369, "learning_rate": 9.180358509984717e-06, "loss": 0.9913, "step": 11200 }, { "epoch": 0.6774641929050583, "grad_norm": 1.0281459667255863, "learning_rate": 9.178530171294362e-06, "loss": 0.9845, "step": 11210 }, { "epoch": 0.6780685320601921, "grad_norm": 0.9494328978376024, "learning_rate": 9.176699978165162e-06, "loss": 0.962, "step": 11220 }, { "epoch": 0.678672871215326, "grad_norm": 1.2616756497665285, "learning_rate": 9.174867931409364e-06, "loss": 0.9726, "step": 11230 }, { "epoch": 0.6792772103704598, "grad_norm": 0.9602385959102203, "learning_rate": 9.173034031840031e-06, "loss": 0.9968, "step": 11240 }, { "epoch": 0.6798815495255938, "grad_norm": 2.360136304782415, "learning_rate": 9.17119828027105e-06, "loss": 1.0026, "step": 11250 }, { "epoch": 0.6804858886807277, "grad_norm": 2.5682312858457377, "learning_rate": 9.169360677517132e-06, "loss": 1.004, "step": 11260 }, { "epoch": 0.6810902278358615, "grad_norm": 2.496816033745603, "learning_rate": 9.167521224393807e-06, "loss": 0.9988, "step": 11270 }, { "epoch": 0.6816945669909954, "grad_norm": 2.4382175754864672, "learning_rate": 9.16567992171743e-06, "loss": 0.9995, "step": 11280 }, { "epoch": 0.6822989061461292, "grad_norm": 2.59311472916446, "learning_rate": 9.163836770305173e-06, "loss": 1.0325, "step": 11290 }, { "epoch": 0.6829032453012631, "grad_norm": 0.9643666589556045, "learning_rate": 9.161991770975027e-06, "loss": 0.9827, "step": 11300 }, { "epoch": 0.6835075844563969, "grad_norm": 0.8990940350464417, "learning_rate": 9.160144924545809e-06, "loss": 0.9751, "step": 11310 }, { "epoch": 0.6841119236115308, "grad_norm": 0.99468382355489, "learning_rate": 9.158296231837152e-06, "loss": 0.9865, "step": 11320 }, { "epoch": 0.6847162627666646, "grad_norm": 0.8907197485462354, "learning_rate": 9.15644569366951e-06, "loss": 1.0031, "step": 11330 }, { "epoch": 0.6853206019217986, "grad_norm": 0.9086466011719967, "learning_rate": 9.154593310864152e-06, "loss": 0.9745, "step": 11340 }, { "epoch": 0.6859249410769324, "grad_norm": 0.9451981754943589, "learning_rate": 9.152739084243173e-06, "loss": 0.9804, "step": 11350 }, { "epoch": 0.6865292802320663, "grad_norm": 1.0670467949148794, "learning_rate": 9.150883014629478e-06, "loss": 1.0091, "step": 11360 }, { "epoch": 0.6871336193872001, "grad_norm": 0.8625833050272264, "learning_rate": 9.149025102846796e-06, "loss": 0.9576, "step": 11370 }, { "epoch": 0.687737958542334, "grad_norm": 0.9589680159000987, "learning_rate": 9.14716534971967e-06, "loss": 0.9996, "step": 11380 }, { "epoch": 0.6883422976974678, "grad_norm": 1.1221394032909637, "learning_rate": 9.145303756073462e-06, "loss": 1.0121, "step": 11390 }, { "epoch": 0.6889466368526017, "grad_norm": 1.0797298739274597, "learning_rate": 9.143440322734351e-06, "loss": 0.986, "step": 11400 }, { "epoch": 0.6895509760077355, "grad_norm": 1.0855279220793714, "learning_rate": 9.141575050529333e-06, "loss": 0.9977, "step": 11410 }, { "epoch": 0.6901553151628694, "grad_norm": 1.080811344042126, "learning_rate": 9.139707940286217e-06, "loss": 0.9786, "step": 11420 }, { "epoch": 0.6907596543180032, "grad_norm": 1.1494757087179435, "learning_rate": 9.13783899283363e-06, "loss": 0.9741, "step": 11430 }, { "epoch": 0.6913639934731372, "grad_norm": 0.9931475693939104, "learning_rate": 9.135968209001015e-06, "loss": 0.979, "step": 11440 }, { "epoch": 0.691968332628271, "grad_norm": 1.0369874046079264, "learning_rate": 9.13409558961863e-06, "loss": 1.0099, "step": 11450 }, { "epoch": 0.6925726717834049, "grad_norm": 0.9394206413278563, "learning_rate": 9.132221135517543e-06, "loss": 1.0058, "step": 11460 }, { "epoch": 0.6931770109385387, "grad_norm": 1.1204964671511828, "learning_rate": 9.130344847529642e-06, "loss": 0.9835, "step": 11470 }, { "epoch": 0.6937813500936726, "grad_norm": 1.044209823656874, "learning_rate": 9.128466726487629e-06, "loss": 0.9953, "step": 11480 }, { "epoch": 0.6943856892488064, "grad_norm": 1.019261995820744, "learning_rate": 9.126586773225015e-06, "loss": 1.0044, "step": 11490 }, { "epoch": 0.6949900284039403, "grad_norm": 1.2126115483222022, "learning_rate": 9.124704988576126e-06, "loss": 0.9862, "step": 11500 }, { "epoch": 0.6955943675590741, "grad_norm": 1.3053405603580308, "learning_rate": 9.122821373376103e-06, "loss": 0.995, "step": 11510 }, { "epoch": 0.696198706714208, "grad_norm": 1.0808497663578513, "learning_rate": 9.120935928460896e-06, "loss": 0.9759, "step": 11520 }, { "epoch": 0.6968030458693418, "grad_norm": 1.156907606411265, "learning_rate": 9.11904865466727e-06, "loss": 1.0087, "step": 11530 }, { "epoch": 0.6974073850244757, "grad_norm": 1.1856215445428815, "learning_rate": 9.1171595528328e-06, "loss": 1.0009, "step": 11540 }, { "epoch": 0.6980117241796095, "grad_norm": 0.9628398251299797, "learning_rate": 9.115268623795872e-06, "loss": 1.0248, "step": 11550 }, { "epoch": 0.6986160633347435, "grad_norm": 0.9392412711871685, "learning_rate": 9.113375868395684e-06, "loss": 1.0008, "step": 11560 }, { "epoch": 0.6992204024898773, "grad_norm": 0.936700435997837, "learning_rate": 9.111481287472244e-06, "loss": 0.9865, "step": 11570 }, { "epoch": 0.6998247416450112, "grad_norm": 0.9798770844112202, "learning_rate": 9.10958488186637e-06, "loss": 0.9682, "step": 11580 }, { "epoch": 0.700429080800145, "grad_norm": 1.0060994875143237, "learning_rate": 9.10768665241969e-06, "loss": 0.9872, "step": 11590 }, { "epoch": 0.7010334199552789, "grad_norm": 1.096508664211486, "learning_rate": 9.105786599974643e-06, "loss": 0.9898, "step": 11600 }, { "epoch": 0.7016377591104128, "grad_norm": 1.1152411037856387, "learning_rate": 9.103884725374475e-06, "loss": 0.9824, "step": 11610 }, { "epoch": 0.7022420982655466, "grad_norm": 1.0871318979322648, "learning_rate": 9.101981029463238e-06, "loss": 1.0027, "step": 11620 }, { "epoch": 0.7028464374206805, "grad_norm": 0.9910815261221368, "learning_rate": 9.1000755130858e-06, "loss": 0.9738, "step": 11630 }, { "epoch": 0.7034507765758143, "grad_norm": 0.9908666882350229, "learning_rate": 9.09816817708783e-06, "loss": 0.981, "step": 11640 }, { "epoch": 0.7040551157309483, "grad_norm": 0.959226320905126, "learning_rate": 9.096259022315807e-06, "loss": 1.0114, "step": 11650 }, { "epoch": 0.7046594548860821, "grad_norm": 1.021741168779716, "learning_rate": 9.094348049617018e-06, "loss": 0.9905, "step": 11660 }, { "epoch": 0.705263794041216, "grad_norm": 0.9611665847497872, "learning_rate": 9.092435259839556e-06, "loss": 0.9831, "step": 11670 }, { "epoch": 0.7058681331963498, "grad_norm": 0.9744434453061777, "learning_rate": 9.09052065383232e-06, "loss": 0.9955, "step": 11680 }, { "epoch": 0.7064724723514837, "grad_norm": 0.9913316169570978, "learning_rate": 9.088604232445015e-06, "loss": 1.0069, "step": 11690 }, { "epoch": 0.7070768115066175, "grad_norm": 1.1143855494751294, "learning_rate": 9.086685996528153e-06, "loss": 0.9878, "step": 11700 }, { "epoch": 0.7076811506617514, "grad_norm": 1.1206678887411492, "learning_rate": 9.084765946933049e-06, "loss": 0.9763, "step": 11710 }, { "epoch": 0.7082854898168852, "grad_norm": 1.147611908208286, "learning_rate": 9.082844084511826e-06, "loss": 0.993, "step": 11720 }, { "epoch": 0.7088898289720191, "grad_norm": 1.0563099789647354, "learning_rate": 9.080920410117408e-06, "loss": 0.9674, "step": 11730 }, { "epoch": 0.7094941681271529, "grad_norm": 1.1238285670454469, "learning_rate": 9.078994924603528e-06, "loss": 0.9699, "step": 11740 }, { "epoch": 0.7100985072822869, "grad_norm": 1.0348347262416522, "learning_rate": 9.077067628824717e-06, "loss": 0.9928, "step": 11750 }, { "epoch": 0.7107028464374207, "grad_norm": 1.0877791267266723, "learning_rate": 9.075138523636311e-06, "loss": 0.9852, "step": 11760 }, { "epoch": 0.7113071855925546, "grad_norm": 1.0290753796759835, "learning_rate": 9.073207609894453e-06, "loss": 0.9978, "step": 11770 }, { "epoch": 0.7119115247476884, "grad_norm": 1.0119374235792082, "learning_rate": 9.071274888456085e-06, "loss": 0.968, "step": 11780 }, { "epoch": 0.7125158639028223, "grad_norm": 1.084554789136185, "learning_rate": 9.069340360178954e-06, "loss": 0.98, "step": 11790 }, { "epoch": 0.7131202030579561, "grad_norm": 1.292945247968578, "learning_rate": 9.067404025921602e-06, "loss": 0.989, "step": 11800 }, { "epoch": 0.71372454221309, "grad_norm": 1.2325045084164217, "learning_rate": 9.065465886543383e-06, "loss": 0.9903, "step": 11810 }, { "epoch": 0.7143288813682238, "grad_norm": 1.1954982793614004, "learning_rate": 9.063525942904441e-06, "loss": 1.0155, "step": 11820 }, { "epoch": 0.7149332205233577, "grad_norm": 1.207349366987723, "learning_rate": 9.061584195865731e-06, "loss": 1.0011, "step": 11830 }, { "epoch": 0.7155375596784915, "grad_norm": 1.1640751486901595, "learning_rate": 9.059640646289003e-06, "loss": 0.9834, "step": 11840 }, { "epoch": 0.7161418988336254, "grad_norm": 1.383110403803805, "learning_rate": 9.057695295036806e-06, "loss": 0.9718, "step": 11850 }, { "epoch": 0.7167462379887592, "grad_norm": 1.40955164725621, "learning_rate": 9.055748142972492e-06, "loss": 0.9896, "step": 11860 }, { "epoch": 0.7173505771438932, "grad_norm": 1.3416954089177544, "learning_rate": 9.053799190960207e-06, "loss": 1.0037, "step": 11870 }, { "epoch": 0.717954916299027, "grad_norm": 1.326429827227262, "learning_rate": 9.0518484398649e-06, "loss": 0.9944, "step": 11880 }, { "epoch": 0.7185592554541609, "grad_norm": 1.3703176412803695, "learning_rate": 9.049895890552322e-06, "loss": 0.9749, "step": 11890 }, { "epoch": 0.7191635946092947, "grad_norm": 0.8697629241180893, "learning_rate": 9.047941543889014e-06, "loss": 1.0026, "step": 11900 }, { "epoch": 0.7197679337644286, "grad_norm": 0.86933569591174, "learning_rate": 9.045985400742321e-06, "loss": 0.9954, "step": 11910 }, { "epoch": 0.7203722729195624, "grad_norm": 0.8375890804454419, "learning_rate": 9.044027461980381e-06, "loss": 0.9912, "step": 11920 }, { "epoch": 0.7209766120746963, "grad_norm": 0.8707385495020277, "learning_rate": 9.042067728472129e-06, "loss": 1.0009, "step": 11930 }, { "epoch": 0.7215809512298301, "grad_norm": 0.8135350789072329, "learning_rate": 9.040106201087297e-06, "loss": 0.9781, "step": 11940 }, { "epoch": 0.722185290384964, "grad_norm": 0.7407151800562409, "learning_rate": 9.03814288069642e-06, "loss": 0.9838, "step": 11950 }, { "epoch": 0.722789629540098, "grad_norm": 0.8452069511713939, "learning_rate": 9.03617776817082e-06, "loss": 0.9963, "step": 11960 }, { "epoch": 0.7233939686952318, "grad_norm": 0.8312204615221693, "learning_rate": 9.034210864382616e-06, "loss": 1.0057, "step": 11970 }, { "epoch": 0.7239983078503657, "grad_norm": 0.7671606856646014, "learning_rate": 9.032242170204724e-06, "loss": 0.988, "step": 11980 }, { "epoch": 0.7246026470054995, "grad_norm": 0.7870259571936258, "learning_rate": 9.030271686510857e-06, "loss": 0.9838, "step": 11990 }, { "epoch": 0.7252069861606334, "grad_norm": 0.7722349099886145, "learning_rate": 9.028299414175513e-06, "loss": 0.9821, "step": 12000 }, { "epoch": 0.7258113253157672, "grad_norm": 0.7507826755962207, "learning_rate": 9.026325354073994e-06, "loss": 0.9839, "step": 12010 }, { "epoch": 0.7264156644709011, "grad_norm": 0.7804021600145328, "learning_rate": 9.024349507082391e-06, "loss": 0.9735, "step": 12020 }, { "epoch": 0.7270200036260349, "grad_norm": 0.7444413921959204, "learning_rate": 9.022371874077587e-06, "loss": 0.9643, "step": 12030 }, { "epoch": 0.7276243427811688, "grad_norm": 0.8052815652514865, "learning_rate": 9.020392455937258e-06, "loss": 0.969, "step": 12040 }, { "epoch": 0.7282286819363026, "grad_norm": 0.570848511247743, "learning_rate": 9.018411253539878e-06, "loss": 1.0035, "step": 12050 }, { "epoch": 0.7288330210914366, "grad_norm": 0.547092389803415, "learning_rate": 9.016428267764703e-06, "loss": 0.992, "step": 12060 }, { "epoch": 0.7294373602465704, "grad_norm": 0.5479318492657143, "learning_rate": 9.014443499491787e-06, "loss": 1.0049, "step": 12070 }, { "epoch": 0.7300416994017043, "grad_norm": 0.5392362781407463, "learning_rate": 9.012456949601973e-06, "loss": 0.9923, "step": 12080 }, { "epoch": 0.7306460385568381, "grad_norm": 0.5184908872445889, "learning_rate": 9.010468618976897e-06, "loss": 0.9784, "step": 12090 }, { "epoch": 0.731250377711972, "grad_norm": 0.5660185365497624, "learning_rate": 9.008478508498983e-06, "loss": 0.9948, "step": 12100 }, { "epoch": 0.7318547168671058, "grad_norm": 0.5976364002032862, "learning_rate": 9.006486619051444e-06, "loss": 0.9854, "step": 12110 }, { "epoch": 0.7324590560222397, "grad_norm": 0.6173132792024844, "learning_rate": 9.004492951518286e-06, "loss": 0.9768, "step": 12120 }, { "epoch": 0.7330633951773735, "grad_norm": 0.5808668053590242, "learning_rate": 9.0024975067843e-06, "loss": 0.9951, "step": 12130 }, { "epoch": 0.7336677343325074, "grad_norm": 0.5879149739609679, "learning_rate": 9.000500285735068e-06, "loss": 0.9587, "step": 12140 }, { "epoch": 0.7342720734876412, "grad_norm": 0.7645631638809431, "learning_rate": 8.998501289256962e-06, "loss": 1.0147, "step": 12150 }, { "epoch": 0.7348764126427751, "grad_norm": 0.8201230793337498, "learning_rate": 8.996500518237138e-06, "loss": 0.9862, "step": 12160 }, { "epoch": 0.735480751797909, "grad_norm": 0.8259271342722222, "learning_rate": 8.994497973563546e-06, "loss": 0.9916, "step": 12170 }, { "epoch": 0.7360850909530429, "grad_norm": 0.8422042361479396, "learning_rate": 8.99249365612491e-06, "loss": 1.003, "step": 12180 }, { "epoch": 0.7366894301081767, "grad_norm": 0.8094208736844749, "learning_rate": 8.990487566810758e-06, "loss": 1.0045, "step": 12190 }, { "epoch": 0.7372937692633106, "grad_norm": 0.495061085816348, "learning_rate": 8.988479706511392e-06, "loss": 0.9684, "step": 12200 }, { "epoch": 0.7378981084184444, "grad_norm": 0.46688358966768023, "learning_rate": 8.986470076117903e-06, "loss": 0.981, "step": 12210 }, { "epoch": 0.7385024475735783, "grad_norm": 0.5491548419505625, "learning_rate": 8.984458676522173e-06, "loss": 1.0009, "step": 12220 }, { "epoch": 0.7391067867287121, "grad_norm": 0.5004912743622855, "learning_rate": 8.98244550861686e-06, "loss": 0.9775, "step": 12230 }, { "epoch": 0.739711125883846, "grad_norm": 0.5037586346099409, "learning_rate": 8.980430573295412e-06, "loss": 0.9937, "step": 12240 }, { "epoch": 0.7403154650389798, "grad_norm": 0.50782572699603, "learning_rate": 8.978413871452063e-06, "loss": 0.9743, "step": 12250 }, { "epoch": 0.7409198041941137, "grad_norm": 0.48623540179183794, "learning_rate": 8.976395403981826e-06, "loss": 0.963, "step": 12260 }, { "epoch": 0.7415241433492475, "grad_norm": 0.525237134634794, "learning_rate": 8.974375171780503e-06, "loss": 0.9786, "step": 12270 }, { "epoch": 0.7421284825043815, "grad_norm": 0.4968514246767906, "learning_rate": 8.972353175744673e-06, "loss": 0.9776, "step": 12280 }, { "epoch": 0.7427328216595154, "grad_norm": 0.4954694245732145, "learning_rate": 8.970329416771707e-06, "loss": 0.9487, "step": 12290 }, { "epoch": 0.7433371608146492, "grad_norm": 0.486922501516735, "learning_rate": 8.968303895759746e-06, "loss": 0.9761, "step": 12300 }, { "epoch": 0.7439414999697831, "grad_norm": 0.4824293904099712, "learning_rate": 8.966276613607723e-06, "loss": 0.9815, "step": 12310 }, { "epoch": 0.7445458391249169, "grad_norm": 0.47489455049298224, "learning_rate": 8.96424757121535e-06, "loss": 0.9642, "step": 12320 }, { "epoch": 0.7451501782800508, "grad_norm": 0.5166013727865318, "learning_rate": 8.96221676948312e-06, "loss": 0.9966, "step": 12330 }, { "epoch": 0.7457545174351846, "grad_norm": 0.5397581526337417, "learning_rate": 8.960184209312303e-06, "loss": 0.972, "step": 12340 }, { "epoch": 0.7463588565903185, "grad_norm": 0.519150812071339, "learning_rate": 8.958149891604955e-06, "loss": 0.9657, "step": 12350 }, { "epoch": 0.7469631957454523, "grad_norm": 0.48926311701962183, "learning_rate": 8.95611381726391e-06, "loss": 0.9734, "step": 12360 }, { "epoch": 0.7475675349005863, "grad_norm": 0.4748196966353509, "learning_rate": 8.95407598719278e-06, "loss": 0.9832, "step": 12370 }, { "epoch": 0.74817187405572, "grad_norm": 0.49403967934107046, "learning_rate": 8.952036402295957e-06, "loss": 0.9835, "step": 12380 }, { "epoch": 0.748776213210854, "grad_norm": 0.47845085355691896, "learning_rate": 8.949995063478613e-06, "loss": 0.9782, "step": 12390 }, { "epoch": 0.7493805523659878, "grad_norm": 0.5183590080599418, "learning_rate": 8.9479519716467e-06, "loss": 0.9688, "step": 12400 }, { "epoch": 0.7499848915211217, "grad_norm": 0.5083758736917221, "learning_rate": 8.945907127706943e-06, "loss": 0.9809, "step": 12410 }, { "epoch": 0.7505892306762555, "grad_norm": 0.5217360128388254, "learning_rate": 8.943860532566848e-06, "loss": 0.9687, "step": 12420 }, { "epoch": 0.7511935698313894, "grad_norm": 0.4792187304605603, "learning_rate": 8.941812187134697e-06, "loss": 0.9694, "step": 12430 }, { "epoch": 0.7517979089865232, "grad_norm": 0.4938774550681597, "learning_rate": 8.93976209231955e-06, "loss": 0.9824, "step": 12440 }, { "epoch": 0.7524022481416571, "grad_norm": 0.5427203765930836, "learning_rate": 8.937710249031242e-06, "loss": 1.008, "step": 12450 }, { "epoch": 0.7530065872967909, "grad_norm": 0.5023301042273396, "learning_rate": 8.935656658180383e-06, "loss": 0.9724, "step": 12460 }, { "epoch": 0.7536109264519248, "grad_norm": 0.5194539153980096, "learning_rate": 8.933601320678364e-06, "loss": 0.9874, "step": 12470 }, { "epoch": 0.7542152656070586, "grad_norm": 0.49529764598306425, "learning_rate": 8.931544237437345e-06, "loss": 0.9902, "step": 12480 }, { "epoch": 0.7548196047621926, "grad_norm": 0.5494985257211211, "learning_rate": 8.929485409370262e-06, "loss": 0.9817, "step": 12490 }, { "epoch": 0.7554239439173264, "grad_norm": 0.6035382428142145, "learning_rate": 8.927424837390827e-06, "loss": 1.0071, "step": 12500 }, { "epoch": 0.7560282830724603, "grad_norm": 0.594329703039584, "learning_rate": 8.925362522413525e-06, "loss": 0.9952, "step": 12510 }, { "epoch": 0.7566326222275941, "grad_norm": 0.6182196854564175, "learning_rate": 8.923298465353619e-06, "loss": 0.972, "step": 12520 }, { "epoch": 0.757236961382728, "grad_norm": 0.5803582135895718, "learning_rate": 8.921232667127135e-06, "loss": 0.9784, "step": 12530 }, { "epoch": 0.7578413005378618, "grad_norm": 0.6042346067545898, "learning_rate": 8.919165128650883e-06, "loss": 0.9726, "step": 12540 }, { "epoch": 0.7584456396929957, "grad_norm": 0.6487320621608026, "learning_rate": 8.917095850842437e-06, "loss": 0.9997, "step": 12550 }, { "epoch": 0.7590499788481295, "grad_norm": 0.6322104977852444, "learning_rate": 8.915024834620148e-06, "loss": 0.9972, "step": 12560 }, { "epoch": 0.7596543180032634, "grad_norm": 0.6265604748891366, "learning_rate": 8.912952080903133e-06, "loss": 0.9646, "step": 12570 }, { "epoch": 0.7602586571583972, "grad_norm": 0.6315980662337992, "learning_rate": 8.910877590611287e-06, "loss": 0.974, "step": 12580 }, { "epoch": 0.7608629963135312, "grad_norm": 0.6525734752535817, "learning_rate": 8.908801364665271e-06, "loss": 0.9962, "step": 12590 }, { "epoch": 0.761467335468665, "grad_norm": 0.5984498114217938, "learning_rate": 8.90672340398652e-06, "loss": 0.992, "step": 12600 }, { "epoch": 0.7620716746237989, "grad_norm": 0.5847134619059274, "learning_rate": 8.904643709497232e-06, "loss": 0.988, "step": 12610 }, { "epoch": 0.7626760137789327, "grad_norm": 0.6243311340661921, "learning_rate": 8.902562282120384e-06, "loss": 0.9838, "step": 12620 }, { "epoch": 0.7632803529340666, "grad_norm": 0.6136992415016654, "learning_rate": 8.900479122779712e-06, "loss": 0.9649, "step": 12630 }, { "epoch": 0.7638846920892005, "grad_norm": 0.6181032997009042, "learning_rate": 8.89839423239973e-06, "loss": 0.9873, "step": 12640 }, { "epoch": 0.7644890312443343, "grad_norm": 0.7571176720444768, "learning_rate": 8.896307611905712e-06, "loss": 0.9689, "step": 12650 }, { "epoch": 0.7650933703994682, "grad_norm": 0.8257227649852571, "learning_rate": 8.894219262223708e-06, "loss": 0.9649, "step": 12660 }, { "epoch": 0.765697709554602, "grad_norm": 0.7573265597093679, "learning_rate": 8.892129184280529e-06, "loss": 0.9717, "step": 12670 }, { "epoch": 0.766302048709736, "grad_norm": 0.7765239035915268, "learning_rate": 8.890037379003753e-06, "loss": 0.9811, "step": 12680 }, { "epoch": 0.7669063878648698, "grad_norm": 0.772057018563983, "learning_rate": 8.887943847321732e-06, "loss": 0.9859, "step": 12690 }, { "epoch": 0.7675107270200037, "grad_norm": 0.7485074173903123, "learning_rate": 8.885848590163576e-06, "loss": 0.9792, "step": 12700 }, { "epoch": 0.7681150661751375, "grad_norm": 0.7391935357756217, "learning_rate": 8.88375160845916e-06, "loss": 0.9741, "step": 12710 }, { "epoch": 0.7687194053302714, "grad_norm": 0.7627188576521519, "learning_rate": 8.881652903139133e-06, "loss": 0.9726, "step": 12720 }, { "epoch": 0.7693237444854052, "grad_norm": 0.7719007616210114, "learning_rate": 8.879552475134905e-06, "loss": 0.9749, "step": 12730 }, { "epoch": 0.7699280836405391, "grad_norm": 0.7557094347350467, "learning_rate": 8.877450325378644e-06, "loss": 0.9872, "step": 12740 }, { "epoch": 0.7705324227956729, "grad_norm": 1.1654136615150479, "learning_rate": 8.875346454803293e-06, "loss": 0.9761, "step": 12750 }, { "epoch": 0.7711367619508068, "grad_norm": 1.1643518181661552, "learning_rate": 8.873240864342551e-06, "loss": 0.9834, "step": 12760 }, { "epoch": 0.7717411011059406, "grad_norm": 1.152879144672955, "learning_rate": 8.871133554930883e-06, "loss": 0.9773, "step": 12770 }, { "epoch": 0.7723454402610745, "grad_norm": 1.2105008183838115, "learning_rate": 8.869024527503515e-06, "loss": 0.9967, "step": 12780 }, { "epoch": 0.7729497794162083, "grad_norm": 1.156277855829216, "learning_rate": 8.86691378299644e-06, "loss": 0.9796, "step": 12790 }, { "epoch": 0.7735541185713423, "grad_norm": 0.871129940192965, "learning_rate": 8.864801322346409e-06, "loss": 0.9751, "step": 12800 }, { "epoch": 0.7741584577264761, "grad_norm": 1.0971713398537382, "learning_rate": 8.862687146490933e-06, "loss": 0.96, "step": 12810 }, { "epoch": 0.77476279688161, "grad_norm": 0.9603285046436115, "learning_rate": 8.860571256368291e-06, "loss": 0.9719, "step": 12820 }, { "epoch": 0.7753671360367438, "grad_norm": 1.0284086486563349, "learning_rate": 8.858453652917515e-06, "loss": 0.9822, "step": 12830 }, { "epoch": 0.7759714751918777, "grad_norm": 1.108532051114074, "learning_rate": 8.856334337078405e-06, "loss": 0.989, "step": 12840 }, { "epoch": 0.7765758143470115, "grad_norm": 2.288443083966407, "learning_rate": 8.854213309791512e-06, "loss": 0.9914, "step": 12850 }, { "epoch": 0.7771801535021454, "grad_norm": 2.2618259532924387, "learning_rate": 8.852090571998156e-06, "loss": 0.9672, "step": 12860 }, { "epoch": 0.7777844926572792, "grad_norm": 3.2938088308449385, "learning_rate": 8.84996612464041e-06, "loss": 0.9994, "step": 12870 }, { "epoch": 0.7783888318124131, "grad_norm": 2.4549008753153765, "learning_rate": 8.847839968661107e-06, "loss": 1.0199, "step": 12880 }, { "epoch": 0.7789931709675469, "grad_norm": 2.2624847474969516, "learning_rate": 8.845712105003839e-06, "loss": 0.9753, "step": 12890 }, { "epoch": 0.7795975101226809, "grad_norm": 0.9079545883891622, "learning_rate": 8.843582534612953e-06, "loss": 0.9892, "step": 12900 }, { "epoch": 0.7802018492778147, "grad_norm": 0.8898374237683913, "learning_rate": 8.841451258433561e-06, "loss": 0.9685, "step": 12910 }, { "epoch": 0.7808061884329486, "grad_norm": 1.022380287712484, "learning_rate": 8.839318277411525e-06, "loss": 0.9908, "step": 12920 }, { "epoch": 0.7814105275880824, "grad_norm": 0.8965094544144594, "learning_rate": 8.837183592493464e-06, "loss": 1.0301, "step": 12930 }, { "epoch": 0.7820148667432163, "grad_norm": 1.051180753939061, "learning_rate": 8.835047204626754e-06, "loss": 0.9689, "step": 12940 }, { "epoch": 0.7826192058983501, "grad_norm": 0.9220824188974558, "learning_rate": 8.832909114759531e-06, "loss": 0.985, "step": 12950 }, { "epoch": 0.783223545053484, "grad_norm": 0.9956000678829586, "learning_rate": 8.830769323840683e-06, "loss": 0.9718, "step": 12960 }, { "epoch": 0.7838278842086179, "grad_norm": 0.9865907366189631, "learning_rate": 8.828627832819849e-06, "loss": 0.9713, "step": 12970 }, { "epoch": 0.7844322233637517, "grad_norm": 0.9885251681361801, "learning_rate": 8.82648464264743e-06, "loss": 1.0016, "step": 12980 }, { "epoch": 0.7850365625188856, "grad_norm": 0.8647638258869463, "learning_rate": 8.824339754274575e-06, "loss": 0.9711, "step": 12990 }, { "epoch": 0.7856409016740195, "grad_norm": 0.9766631680171941, "learning_rate": 8.822193168653192e-06, "loss": 0.9913, "step": 13000 }, { "epoch": 0.7862452408291534, "grad_norm": 1.1066695703897402, "learning_rate": 8.820044886735937e-06, "loss": 0.9758, "step": 13010 }, { "epoch": 0.7868495799842872, "grad_norm": 1.0346901679097578, "learning_rate": 8.81789490947622e-06, "loss": 0.9784, "step": 13020 }, { "epoch": 0.7874539191394211, "grad_norm": 1.0138395969210046, "learning_rate": 8.815743237828209e-06, "loss": 0.9931, "step": 13030 }, { "epoch": 0.7880582582945549, "grad_norm": 0.9486189044453003, "learning_rate": 8.813589872746817e-06, "loss": 0.9708, "step": 13040 }, { "epoch": 0.7886625974496888, "grad_norm": 0.9412574776706261, "learning_rate": 8.81143481518771e-06, "loss": 0.974, "step": 13050 }, { "epoch": 0.7892669366048226, "grad_norm": 1.0842521628414044, "learning_rate": 8.809278066107307e-06, "loss": 0.9794, "step": 13060 }, { "epoch": 0.7898712757599565, "grad_norm": 1.0057938488081792, "learning_rate": 8.807119626462779e-06, "loss": 0.9756, "step": 13070 }, { "epoch": 0.7904756149150903, "grad_norm": 0.9875812606361327, "learning_rate": 8.804959497212042e-06, "loss": 0.9762, "step": 13080 }, { "epoch": 0.7910799540702242, "grad_norm": 1.0159773205094567, "learning_rate": 8.802797679313766e-06, "loss": 0.9839, "step": 13090 }, { "epoch": 0.791684293225358, "grad_norm": 1.1402570452683067, "learning_rate": 8.800634173727372e-06, "loss": 1.0008, "step": 13100 }, { "epoch": 0.792288632380492, "grad_norm": 1.2044459552875266, "learning_rate": 8.798468981413025e-06, "loss": 0.9879, "step": 13110 }, { "epoch": 0.7928929715356258, "grad_norm": 1.0625292941885056, "learning_rate": 8.79630210333164e-06, "loss": 0.9781, "step": 13120 }, { "epoch": 0.7934973106907597, "grad_norm": 1.137201790184712, "learning_rate": 8.794133540444884e-06, "loss": 0.9886, "step": 13130 }, { "epoch": 0.7941016498458935, "grad_norm": 1.0583191666183143, "learning_rate": 8.791963293715166e-06, "loss": 0.9732, "step": 13140 }, { "epoch": 0.7947059890010274, "grad_norm": 1.0450695896110342, "learning_rate": 8.78979136410565e-06, "loss": 0.9813, "step": 13150 }, { "epoch": 0.7953103281561612, "grad_norm": 1.0682241880229884, "learning_rate": 8.787617752580237e-06, "loss": 0.9665, "step": 13160 }, { "epoch": 0.7959146673112951, "grad_norm": 1.0131171225012323, "learning_rate": 8.785442460103581e-06, "loss": 0.9942, "step": 13170 }, { "epoch": 0.7965190064664289, "grad_norm": 0.9723856845729653, "learning_rate": 8.783265487641083e-06, "loss": 0.9935, "step": 13180 }, { "epoch": 0.7971233456215628, "grad_norm": 0.9588773108284964, "learning_rate": 8.781086836158884e-06, "loss": 0.9899, "step": 13190 }, { "epoch": 0.7977276847766966, "grad_norm": 1.067355261100459, "learning_rate": 8.778906506623875e-06, "loss": 0.9924, "step": 13200 }, { "epoch": 0.7983320239318306, "grad_norm": 0.9887635902127057, "learning_rate": 8.776724500003689e-06, "loss": 1.0129, "step": 13210 }, { "epoch": 0.7989363630869644, "grad_norm": 1.0662466768371086, "learning_rate": 8.774540817266703e-06, "loss": 1.001, "step": 13220 }, { "epoch": 0.7995407022420983, "grad_norm": 1.01603496144635, "learning_rate": 8.772355459382042e-06, "loss": 0.98, "step": 13230 }, { "epoch": 0.8001450413972321, "grad_norm": 1.0423178226386356, "learning_rate": 8.77016842731957e-06, "loss": 0.9536, "step": 13240 }, { "epoch": 0.800749380552366, "grad_norm": 0.9937477886555593, "learning_rate": 8.767979722049898e-06, "loss": 0.9841, "step": 13250 }, { "epoch": 0.8013537197074998, "grad_norm": 0.9948481782744802, "learning_rate": 8.765789344544373e-06, "loss": 0.97, "step": 13260 }, { "epoch": 0.8019580588626337, "grad_norm": 0.9202628224318222, "learning_rate": 8.76359729577509e-06, "loss": 0.9814, "step": 13270 }, { "epoch": 0.8025623980177675, "grad_norm": 0.9496722799701562, "learning_rate": 8.761403576714886e-06, "loss": 0.9918, "step": 13280 }, { "epoch": 0.8031667371729014, "grad_norm": 0.9319272570842513, "learning_rate": 8.759208188337335e-06, "loss": 0.9761, "step": 13290 }, { "epoch": 0.8037710763280352, "grad_norm": 1.2242296288204617, "learning_rate": 8.757011131616754e-06, "loss": 0.9912, "step": 13300 }, { "epoch": 0.8043754154831692, "grad_norm": 1.1882086142734938, "learning_rate": 8.7548124075282e-06, "loss": 0.9878, "step": 13310 }, { "epoch": 0.8049797546383031, "grad_norm": 1.1022888980543828, "learning_rate": 8.752612017047474e-06, "loss": 0.9833, "step": 13320 }, { "epoch": 0.8055840937934369, "grad_norm": 1.1801299846477953, "learning_rate": 8.750409961151113e-06, "loss": 0.9836, "step": 13330 }, { "epoch": 0.8061884329485708, "grad_norm": 1.146882476748507, "learning_rate": 8.748206240816388e-06, "loss": 0.9575, "step": 13340 }, { "epoch": 0.8067927721037046, "grad_norm": 1.087458660597489, "learning_rate": 8.746000857021318e-06, "loss": 0.9692, "step": 13350 }, { "epoch": 0.8073971112588385, "grad_norm": 1.049939773835628, "learning_rate": 8.743793810744655e-06, "loss": 1.0044, "step": 13360 }, { "epoch": 0.8080014504139723, "grad_norm": 1.075727090349215, "learning_rate": 8.741585102965888e-06, "loss": 0.9848, "step": 13370 }, { "epoch": 0.8086057895691062, "grad_norm": 1.0630622608807763, "learning_rate": 8.739374734665248e-06, "loss": 0.9773, "step": 13380 }, { "epoch": 0.80921012872424, "grad_norm": 1.0728569187017845, "learning_rate": 8.737162706823701e-06, "loss": 0.995, "step": 13390 }, { "epoch": 0.809814467879374, "grad_norm": 1.152574026721421, "learning_rate": 8.734949020422946e-06, "loss": 0.9752, "step": 13400 }, { "epoch": 0.8104188070345077, "grad_norm": 1.1812830246249495, "learning_rate": 8.732733676445422e-06, "loss": 0.9756, "step": 13410 }, { "epoch": 0.8110231461896417, "grad_norm": 1.1278256143246983, "learning_rate": 8.730516675874303e-06, "loss": 0.9949, "step": 13420 }, { "epoch": 0.8116274853447755, "grad_norm": 1.132258464057653, "learning_rate": 8.728298019693496e-06, "loss": 0.9939, "step": 13430 }, { "epoch": 0.8122318244999094, "grad_norm": 1.1585054158947528, "learning_rate": 8.726077708887645e-06, "loss": 0.9805, "step": 13440 }, { "epoch": 0.8128361636550432, "grad_norm": 1.3183845519856896, "learning_rate": 8.723855744442131e-06, "loss": 0.9891, "step": 13450 }, { "epoch": 0.8134405028101771, "grad_norm": 1.294442025174831, "learning_rate": 8.721632127343061e-06, "loss": 0.9827, "step": 13460 }, { "epoch": 0.8140448419653109, "grad_norm": 1.3180376381226397, "learning_rate": 8.719406858577283e-06, "loss": 0.9779, "step": 13470 }, { "epoch": 0.8146491811204448, "grad_norm": 1.3836647676847191, "learning_rate": 8.717179939132374e-06, "loss": 0.9808, "step": 13480 }, { "epoch": 0.8152535202755786, "grad_norm": 1.2321249849783131, "learning_rate": 8.714951369996642e-06, "loss": 0.9633, "step": 13490 }, { "epoch": 0.8158578594307125, "grad_norm": 0.8340901038421106, "learning_rate": 8.712721152159136e-06, "loss": 0.9618, "step": 13500 }, { "epoch": 0.8164621985858463, "grad_norm": 0.8177150348613327, "learning_rate": 8.710489286609624e-06, "loss": 0.9847, "step": 13510 }, { "epoch": 0.8170665377409803, "grad_norm": 0.788853435450462, "learning_rate": 8.708255774338616e-06, "loss": 0.9649, "step": 13520 }, { "epoch": 0.8176708768961141, "grad_norm": 0.8625179587825503, "learning_rate": 8.706020616337348e-06, "loss": 0.9696, "step": 13530 }, { "epoch": 0.818275216051248, "grad_norm": 0.8239071561559175, "learning_rate": 8.703783813597785e-06, "loss": 0.9589, "step": 13540 }, { "epoch": 0.8188795552063818, "grad_norm": 0.7234791914525768, "learning_rate": 8.701545367112627e-06, "loss": 0.9867, "step": 13550 }, { "epoch": 0.8194838943615157, "grad_norm": 0.7324782438751588, "learning_rate": 8.699305277875298e-06, "loss": 0.9687, "step": 13560 }, { "epoch": 0.8200882335166495, "grad_norm": 0.7579427313245379, "learning_rate": 8.697063546879952e-06, "loss": 0.9833, "step": 13570 }, { "epoch": 0.8206925726717834, "grad_norm": 0.7226863976371414, "learning_rate": 8.694820175121477e-06, "loss": 0.9894, "step": 13580 }, { "epoch": 0.8212969118269172, "grad_norm": 0.7178822526135513, "learning_rate": 8.692575163595483e-06, "loss": 0.9868, "step": 13590 }, { "epoch": 0.8219012509820511, "grad_norm": 0.8309495385533435, "learning_rate": 8.690328513298309e-06, "loss": 0.9874, "step": 13600 }, { "epoch": 0.8225055901371849, "grad_norm": 0.7964068295482079, "learning_rate": 8.688080225227024e-06, "loss": 0.9927, "step": 13610 }, { "epoch": 0.8231099292923189, "grad_norm": 0.7134736196963872, "learning_rate": 8.685830300379423e-06, "loss": 0.9846, "step": 13620 }, { "epoch": 0.8237142684474527, "grad_norm": 0.8016370223478746, "learning_rate": 8.683578739754023e-06, "loss": 0.972, "step": 13630 }, { "epoch": 0.8243186076025866, "grad_norm": 0.7828803915266184, "learning_rate": 8.681325544350073e-06, "loss": 0.9748, "step": 13640 }, { "epoch": 0.8249229467577205, "grad_norm": 0.525963738795146, "learning_rate": 8.679070715167545e-06, "loss": 0.9802, "step": 13650 }, { "epoch": 0.8255272859128543, "grad_norm": 0.5119496778046251, "learning_rate": 8.676814253207135e-06, "loss": 0.9954, "step": 13660 }, { "epoch": 0.8261316250679882, "grad_norm": 0.5320327454561167, "learning_rate": 8.674556159470266e-06, "loss": 0.9531, "step": 13670 }, { "epoch": 0.826735964223122, "grad_norm": 0.5298844005491382, "learning_rate": 8.672296434959081e-06, "loss": 0.9894, "step": 13680 }, { "epoch": 0.8273403033782559, "grad_norm": 0.568192299757382, "learning_rate": 8.670035080676455e-06, "loss": 1.0102, "step": 13690 }, { "epoch": 0.8279446425333897, "grad_norm": 0.5795311032175636, "learning_rate": 8.667772097625975e-06, "loss": 0.9979, "step": 13700 }, { "epoch": 0.8285489816885236, "grad_norm": 0.5482009557717502, "learning_rate": 8.665507486811962e-06, "loss": 0.9798, "step": 13710 }, { "epoch": 0.8291533208436574, "grad_norm": 0.5803001889632351, "learning_rate": 8.663241249239449e-06, "loss": 0.9676, "step": 13720 }, { "epoch": 0.8297576599987914, "grad_norm": 0.5585460629729855, "learning_rate": 8.6609733859142e-06, "loss": 0.9665, "step": 13730 }, { "epoch": 0.8303619991539252, "grad_norm": 0.6044475964382725, "learning_rate": 8.658703897842693e-06, "loss": 0.9917, "step": 13740 }, { "epoch": 0.8309663383090591, "grad_norm": 0.8075921621568113, "learning_rate": 8.656432786032135e-06, "loss": 0.9585, "step": 13750 }, { "epoch": 0.8315706774641929, "grad_norm": 0.7213302008928867, "learning_rate": 8.654160051490447e-06, "loss": 0.9847, "step": 13760 }, { "epoch": 0.8321750166193268, "grad_norm": 0.8258436481699448, "learning_rate": 8.651885695226273e-06, "loss": 0.9876, "step": 13770 }, { "epoch": 0.8327793557744606, "grad_norm": 0.7296502453047593, "learning_rate": 8.649609718248974e-06, "loss": 0.9693, "step": 13780 }, { "epoch": 0.8333836949295945, "grad_norm": 0.7565117159939226, "learning_rate": 8.647332121568635e-06, "loss": 0.9653, "step": 13790 }, { "epoch": 0.8339880340847283, "grad_norm": 0.4668739376583184, "learning_rate": 8.645052906196059e-06, "loss": 0.9756, "step": 13800 }, { "epoch": 0.8345923732398622, "grad_norm": 0.512767536719212, "learning_rate": 8.64277207314276e-06, "loss": 0.9805, "step": 13810 }, { "epoch": 0.835196712394996, "grad_norm": 0.513216989615094, "learning_rate": 8.640489623420982e-06, "loss": 0.9763, "step": 13820 }, { "epoch": 0.83580105155013, "grad_norm": 0.5159392361381759, "learning_rate": 8.638205558043674e-06, "loss": 0.9885, "step": 13830 }, { "epoch": 0.8364053907052638, "grad_norm": 0.507925826130658, "learning_rate": 8.635919878024514e-06, "loss": 0.9812, "step": 13840 }, { "epoch": 0.8370097298603977, "grad_norm": 0.5105816874123786, "learning_rate": 8.633632584377887e-06, "loss": 0.9351, "step": 13850 }, { "epoch": 0.8376140690155315, "grad_norm": 0.4843146889888788, "learning_rate": 8.6313436781189e-06, "loss": 0.9785, "step": 13860 }, { "epoch": 0.8382184081706654, "grad_norm": 0.4997169719585392, "learning_rate": 8.62905316026337e-06, "loss": 0.9948, "step": 13870 }, { "epoch": 0.8388227473257992, "grad_norm": 0.5167673178229548, "learning_rate": 8.626761031827836e-06, "loss": 0.9614, "step": 13880 }, { "epoch": 0.8394270864809331, "grad_norm": 0.4983261577437121, "learning_rate": 8.624467293829548e-06, "loss": 0.9869, "step": 13890 }, { "epoch": 0.8400314256360669, "grad_norm": 0.517457209055249, "learning_rate": 8.622171947286468e-06, "loss": 0.9734, "step": 13900 }, { "epoch": 0.8406357647912008, "grad_norm": 0.4624168463259461, "learning_rate": 8.61987499321728e-06, "loss": 1.0018, "step": 13910 }, { "epoch": 0.8412401039463346, "grad_norm": 0.548196103565086, "learning_rate": 8.617576432641373e-06, "loss": 1.0105, "step": 13920 }, { "epoch": 0.8418444431014686, "grad_norm": 0.5151892472790248, "learning_rate": 8.615276266578851e-06, "loss": 0.989, "step": 13930 }, { "epoch": 0.8424487822566024, "grad_norm": 0.5135347974928427, "learning_rate": 8.612974496050533e-06, "loss": 0.9914, "step": 13940 }, { "epoch": 0.8430531214117363, "grad_norm": 0.45531070788787553, "learning_rate": 8.610671122077949e-06, "loss": 0.9602, "step": 13950 }, { "epoch": 0.8436574605668701, "grad_norm": 0.5286391714421553, "learning_rate": 8.60836614568334e-06, "loss": 0.9808, "step": 13960 }, { "epoch": 0.844261799722004, "grad_norm": 0.5216868629946407, "learning_rate": 8.60605956788966e-06, "loss": 0.9918, "step": 13970 }, { "epoch": 0.8448661388771378, "grad_norm": 0.529974168619616, "learning_rate": 8.603751389720569e-06, "loss": 0.9892, "step": 13980 }, { "epoch": 0.8454704780322717, "grad_norm": 0.4881199578200851, "learning_rate": 8.601441612200443e-06, "loss": 0.983, "step": 13990 }, { "epoch": 0.8460748171874056, "grad_norm": 0.5157680614443185, "learning_rate": 8.599130236354365e-06, "loss": 1.006, "step": 14000 }, { "epoch": 0.8466791563425394, "grad_norm": 0.4870445244532973, "learning_rate": 8.596817263208125e-06, "loss": 0.9823, "step": 14010 }, { "epoch": 0.8472834954976733, "grad_norm": 0.5251658980807535, "learning_rate": 8.594502693788227e-06, "loss": 1.0009, "step": 14020 }, { "epoch": 0.8478878346528071, "grad_norm": 0.4754799389422651, "learning_rate": 8.592186529121879e-06, "loss": 0.9973, "step": 14030 }, { "epoch": 0.8484921738079411, "grad_norm": 0.540879750874813, "learning_rate": 8.589868770237001e-06, "loss": 1.0054, "step": 14040 }, { "epoch": 0.8490965129630749, "grad_norm": 0.5270743610518237, "learning_rate": 8.58754941816222e-06, "loss": 0.9632, "step": 14050 }, { "epoch": 0.8497008521182088, "grad_norm": 0.529225460967496, "learning_rate": 8.585228473926864e-06, "loss": 0.9779, "step": 14060 }, { "epoch": 0.8503051912733426, "grad_norm": 0.4733574050188894, "learning_rate": 8.582905938560972e-06, "loss": 0.9821, "step": 14070 }, { "epoch": 0.8509095304284765, "grad_norm": 0.506945004798917, "learning_rate": 8.580581813095292e-06, "loss": 0.9825, "step": 14080 }, { "epoch": 0.8515138695836103, "grad_norm": 0.5504746195997688, "learning_rate": 8.578256098561276e-06, "loss": 0.9795, "step": 14090 }, { "epoch": 0.8521182087387442, "grad_norm": 0.5597103599368471, "learning_rate": 8.575928795991076e-06, "loss": 0.9658, "step": 14100 }, { "epoch": 0.852722547893878, "grad_norm": 0.5759872420755888, "learning_rate": 8.573599906417554e-06, "loss": 0.9894, "step": 14110 }, { "epoch": 0.8533268870490119, "grad_norm": 0.5963806452644624, "learning_rate": 8.571269430874277e-06, "loss": 0.9641, "step": 14120 }, { "epoch": 0.8539312262041457, "grad_norm": 0.6096464374638144, "learning_rate": 8.568937370395514e-06, "loss": 0.9873, "step": 14130 }, { "epoch": 0.8545355653592797, "grad_norm": 0.5738626573172579, "learning_rate": 8.566603726016236e-06, "loss": 0.9717, "step": 14140 }, { "epoch": 0.8551399045144135, "grad_norm": 0.635145077266177, "learning_rate": 8.564268498772116e-06, "loss": 0.9915, "step": 14150 }, { "epoch": 0.8557442436695474, "grad_norm": 0.669204900845668, "learning_rate": 8.561931689699538e-06, "loss": 1.0097, "step": 14160 }, { "epoch": 0.8563485828246812, "grad_norm": 0.6218427952078261, "learning_rate": 8.559593299835578e-06, "loss": 0.961, "step": 14170 }, { "epoch": 0.8569529219798151, "grad_norm": 0.6461805763290213, "learning_rate": 8.557253330218019e-06, "loss": 0.9888, "step": 14180 }, { "epoch": 0.8575572611349489, "grad_norm": 0.701577584718705, "learning_rate": 8.55491178188534e-06, "loss": 0.9865, "step": 14190 }, { "epoch": 0.8581616002900828, "grad_norm": 0.6138595871903099, "learning_rate": 8.552568655876728e-06, "loss": 0.9571, "step": 14200 }, { "epoch": 0.8587659394452166, "grad_norm": 0.5891773854121215, "learning_rate": 8.550223953232064e-06, "loss": 0.9946, "step": 14210 }, { "epoch": 0.8593702786003505, "grad_norm": 0.6039962466404325, "learning_rate": 8.547877674991932e-06, "loss": 0.9823, "step": 14220 }, { "epoch": 0.8599746177554843, "grad_norm": 0.5587377829841708, "learning_rate": 8.545529822197615e-06, "loss": 0.9743, "step": 14230 }, { "epoch": 0.8605789569106183, "grad_norm": 0.6052969075414736, "learning_rate": 8.543180395891094e-06, "loss": 0.9777, "step": 14240 }, { "epoch": 0.8611832960657521, "grad_norm": 0.7745570749066291, "learning_rate": 8.540829397115047e-06, "loss": 0.9684, "step": 14250 }, { "epoch": 0.861787635220886, "grad_norm": 0.7856283138123235, "learning_rate": 8.538476826912854e-06, "loss": 0.9652, "step": 14260 }, { "epoch": 0.8623919743760198, "grad_norm": 0.7686319996115566, "learning_rate": 8.536122686328586e-06, "loss": 0.9611, "step": 14270 }, { "epoch": 0.8629963135311537, "grad_norm": 0.7320404575102034, "learning_rate": 8.533766976407016e-06, "loss": 0.9808, "step": 14280 }, { "epoch": 0.8636006526862875, "grad_norm": 0.7499438577831408, "learning_rate": 8.531409698193613e-06, "loss": 0.9588, "step": 14290 }, { "epoch": 0.8642049918414214, "grad_norm": 0.760692798217358, "learning_rate": 8.529050852734542e-06, "loss": 0.9815, "step": 14300 }, { "epoch": 0.8648093309965552, "grad_norm": 0.7612773578772055, "learning_rate": 8.52669044107666e-06, "loss": 0.9952, "step": 14310 }, { "epoch": 0.8654136701516891, "grad_norm": 0.7304535865932663, "learning_rate": 8.524328464267522e-06, "loss": 0.9758, "step": 14320 }, { "epoch": 0.866018009306823, "grad_norm": 0.7710648651713178, "learning_rate": 8.521964923355382e-06, "loss": 0.9613, "step": 14330 }, { "epoch": 0.8666223484619568, "grad_norm": 0.7772496666920414, "learning_rate": 8.519599819389174e-06, "loss": 1.012, "step": 14340 }, { "epoch": 0.8672266876170908, "grad_norm": 1.2236421890411022, "learning_rate": 8.517233153418547e-06, "loss": 0.9844, "step": 14350 }, { "epoch": 0.8678310267722246, "grad_norm": 1.1564478451423705, "learning_rate": 8.514864926493822e-06, "loss": 0.9683, "step": 14360 }, { "epoch": 0.8684353659273585, "grad_norm": 1.1754332130103715, "learning_rate": 8.512495139666026e-06, "loss": 0.9908, "step": 14370 }, { "epoch": 0.8690397050824923, "grad_norm": 1.1573009779585544, "learning_rate": 8.510123793986875e-06, "loss": 0.9863, "step": 14380 }, { "epoch": 0.8696440442376262, "grad_norm": 1.187821551420355, "learning_rate": 8.507750890508777e-06, "loss": 0.9468, "step": 14390 }, { "epoch": 0.87024838339276, "grad_norm": 0.7814586127959184, "learning_rate": 8.505376430284827e-06, "loss": 0.9609, "step": 14400 }, { "epoch": 0.8708527225478939, "grad_norm": 0.9420540458090267, "learning_rate": 8.503000414368816e-06, "loss": 0.9523, "step": 14410 }, { "epoch": 0.8714570617030277, "grad_norm": 1.0162098669137272, "learning_rate": 8.500622843815225e-06, "loss": 0.9735, "step": 14420 }, { "epoch": 0.8720614008581616, "grad_norm": 1.2102364811172532, "learning_rate": 8.498243719679221e-06, "loss": 0.9566, "step": 14430 }, { "epoch": 0.8726657400132954, "grad_norm": 0.7628880249012625, "learning_rate": 8.49586304301667e-06, "loss": 0.9707, "step": 14440 }, { "epoch": 0.8732700791684294, "grad_norm": 2.473223653843743, "learning_rate": 8.493480814884113e-06, "loss": 0.9935, "step": 14450 }, { "epoch": 0.8738744183235632, "grad_norm": 2.4960605710619737, "learning_rate": 8.491097036338792e-06, "loss": 0.9972, "step": 14460 }, { "epoch": 0.8744787574786971, "grad_norm": 2.5440214608363627, "learning_rate": 8.488711708438629e-06, "loss": 0.975, "step": 14470 }, { "epoch": 0.8750830966338309, "grad_norm": 2.427756414539397, "learning_rate": 8.486324832242237e-06, "loss": 0.9755, "step": 14480 }, { "epoch": 0.8756874357889648, "grad_norm": 2.270768583605908, "learning_rate": 8.483936408808917e-06, "loss": 0.9847, "step": 14490 }, { "epoch": 0.8762917749440986, "grad_norm": 0.9276414380370431, "learning_rate": 8.481546439198654e-06, "loss": 1.0007, "step": 14500 }, { "epoch": 0.8768961140992325, "grad_norm": 0.9322671026768691, "learning_rate": 8.479154924472122e-06, "loss": 0.9933, "step": 14510 }, { "epoch": 0.8775004532543663, "grad_norm": 0.9077576688579291, "learning_rate": 8.476761865690679e-06, "loss": 0.9487, "step": 14520 }, { "epoch": 0.8781047924095002, "grad_norm": 1.0740130002574293, "learning_rate": 8.474367263916367e-06, "loss": 0.9921, "step": 14530 }, { "epoch": 0.878709131564634, "grad_norm": 0.8862733981331156, "learning_rate": 8.471971120211917e-06, "loss": 1.0023, "step": 14540 }, { "epoch": 0.879313470719768, "grad_norm": 1.0373397333430026, "learning_rate": 8.46957343564074e-06, "loss": 0.9765, "step": 14550 }, { "epoch": 0.8799178098749018, "grad_norm": 1.037263547393421, "learning_rate": 8.467174211266934e-06, "loss": 1.0, "step": 14560 }, { "epoch": 0.8805221490300357, "grad_norm": 1.00567796094567, "learning_rate": 8.464773448155275e-06, "loss": 0.9714, "step": 14570 }, { "epoch": 0.8811264881851695, "grad_norm": 0.9418816848410064, "learning_rate": 8.46237114737123e-06, "loss": 0.9515, "step": 14580 }, { "epoch": 0.8817308273403034, "grad_norm": 0.9409106173195751, "learning_rate": 8.459967309980943e-06, "loss": 0.9732, "step": 14590 }, { "epoch": 0.8823351664954372, "grad_norm": 1.019162987597295, "learning_rate": 8.45756193705124e-06, "loss": 0.9657, "step": 14600 }, { "epoch": 0.8829395056505711, "grad_norm": 1.0700939690954803, "learning_rate": 8.45515502964963e-06, "loss": 0.9953, "step": 14610 }, { "epoch": 0.8835438448057049, "grad_norm": 1.0264694737703495, "learning_rate": 8.452746588844303e-06, "loss": 0.9909, "step": 14620 }, { "epoch": 0.8841481839608388, "grad_norm": 1.084242423671368, "learning_rate": 8.45033661570413e-06, "loss": 0.989, "step": 14630 }, { "epoch": 0.8847525231159726, "grad_norm": 1.0984857314914027, "learning_rate": 8.447925111298661e-06, "loss": 0.9851, "step": 14640 }, { "epoch": 0.8853568622711065, "grad_norm": 1.0742926234423478, "learning_rate": 8.445512076698125e-06, "loss": 0.96, "step": 14650 }, { "epoch": 0.8859612014262404, "grad_norm": 1.002027953341183, "learning_rate": 8.44309751297343e-06, "loss": 0.9951, "step": 14660 }, { "epoch": 0.8865655405813743, "grad_norm": 0.9725366946088984, "learning_rate": 8.440681421196166e-06, "loss": 0.9684, "step": 14670 }, { "epoch": 0.8871698797365082, "grad_norm": 1.0216916213642229, "learning_rate": 8.438263802438596e-06, "loss": 0.9767, "step": 14680 }, { "epoch": 0.887774218891642, "grad_norm": 0.998327880324408, "learning_rate": 8.435844657773666e-06, "loss": 0.9507, "step": 14690 }, { "epoch": 0.8883785580467759, "grad_norm": 1.0952169416767867, "learning_rate": 8.433423988274994e-06, "loss": 0.9838, "step": 14700 }, { "epoch": 0.8889828972019097, "grad_norm": 1.2251788461943667, "learning_rate": 8.431001795016878e-06, "loss": 0.9977, "step": 14710 }, { "epoch": 0.8895872363570436, "grad_norm": 1.181693250057835, "learning_rate": 8.428578079074294e-06, "loss": 0.9537, "step": 14720 }, { "epoch": 0.8901915755121774, "grad_norm": 1.0959573473565298, "learning_rate": 8.426152841522887e-06, "loss": 0.9519, "step": 14730 }, { "epoch": 0.8907959146673113, "grad_norm": 1.001507992246485, "learning_rate": 8.423726083438986e-06, "loss": 0.9588, "step": 14740 }, { "epoch": 0.8914002538224451, "grad_norm": 0.9517339318631465, "learning_rate": 8.421297805899586e-06, "loss": 0.9931, "step": 14750 }, { "epoch": 0.8920045929775791, "grad_norm": 0.9001871773562664, "learning_rate": 8.418868009982364e-06, "loss": 0.988, "step": 14760 }, { "epoch": 0.8926089321327129, "grad_norm": 0.9356960550403014, "learning_rate": 8.416436696765667e-06, "loss": 0.9927, "step": 14770 }, { "epoch": 0.8932132712878468, "grad_norm": 0.9811393595806533, "learning_rate": 8.414003867328515e-06, "loss": 1.0087, "step": 14780 }, { "epoch": 0.8938176104429806, "grad_norm": 0.9649555352069363, "learning_rate": 8.411569522750603e-06, "loss": 0.9988, "step": 14790 }, { "epoch": 0.8944219495981145, "grad_norm": 0.9646987723282942, "learning_rate": 8.409133664112295e-06, "loss": 0.9584, "step": 14800 }, { "epoch": 0.8950262887532483, "grad_norm": 0.9961419680305934, "learning_rate": 8.406696292494632e-06, "loss": 0.9834, "step": 14810 }, { "epoch": 0.8956306279083822, "grad_norm": 1.0542166242440507, "learning_rate": 8.404257408979322e-06, "loss": 0.977, "step": 14820 }, { "epoch": 0.896234967063516, "grad_norm": 1.0533188303783554, "learning_rate": 8.401817014648748e-06, "loss": 0.9689, "step": 14830 }, { "epoch": 0.8968393062186499, "grad_norm": 1.0131002601556658, "learning_rate": 8.399375110585956e-06, "loss": 0.9674, "step": 14840 }, { "epoch": 0.8974436453737837, "grad_norm": 0.9709924962791653, "learning_rate": 8.396931697874673e-06, "loss": 0.9633, "step": 14850 }, { "epoch": 0.8980479845289177, "grad_norm": 0.936565669426908, "learning_rate": 8.394486777599287e-06, "loss": 0.9866, "step": 14860 }, { "epoch": 0.8986523236840515, "grad_norm": 0.962284143408487, "learning_rate": 8.392040350844858e-06, "loss": 0.9735, "step": 14870 }, { "epoch": 0.8992566628391854, "grad_norm": 0.9946695017782787, "learning_rate": 8.389592418697117e-06, "loss": 0.9786, "step": 14880 }, { "epoch": 0.8998610019943192, "grad_norm": 0.9691296166065627, "learning_rate": 8.387142982242455e-06, "loss": 0.9734, "step": 14890 }, { "epoch": 0.9004653411494531, "grad_norm": 1.1259013990647193, "learning_rate": 8.384692042567942e-06, "loss": 0.9981, "step": 14900 }, { "epoch": 0.9010696803045869, "grad_norm": 1.1465770908465234, "learning_rate": 8.382239600761302e-06, "loss": 0.9617, "step": 14910 }, { "epoch": 0.9016740194597208, "grad_norm": 1.1654058563708993, "learning_rate": 8.379785657910939e-06, "loss": 0.9988, "step": 14920 }, { "epoch": 0.9022783586148546, "grad_norm": 1.0990433646220246, "learning_rate": 8.377330215105917e-06, "loss": 0.9676, "step": 14930 }, { "epoch": 0.9028826977699885, "grad_norm": 1.0878428754781606, "learning_rate": 8.374873273435964e-06, "loss": 0.9787, "step": 14940 }, { "epoch": 0.9034870369251223, "grad_norm": 1.0830366501084334, "learning_rate": 8.372414833991472e-06, "loss": 0.9835, "step": 14950 }, { "epoch": 0.9040913760802562, "grad_norm": 1.0228113040934184, "learning_rate": 8.369954897863507e-06, "loss": 0.999, "step": 14960 }, { "epoch": 0.90469571523539, "grad_norm": 0.9791965032152091, "learning_rate": 8.367493466143786e-06, "loss": 0.9921, "step": 14970 }, { "epoch": 0.905300054390524, "grad_norm": 1.083387497785294, "learning_rate": 8.365030539924704e-06, "loss": 0.994, "step": 14980 }, { "epoch": 0.9059043935456578, "grad_norm": 0.9877531161131822, "learning_rate": 8.362566120299304e-06, "loss": 0.9917, "step": 14990 }, { "epoch": 0.9065087327007917, "grad_norm": 1.2101812681075603, "learning_rate": 8.360100208361306e-06, "loss": 0.994, "step": 15000 }, { "epoch": 0.9071130718559255, "grad_norm": 1.1916898119621775, "learning_rate": 8.357632805205083e-06, "loss": 0.9745, "step": 15010 }, { "epoch": 0.9077174110110594, "grad_norm": 1.1528099146691606, "learning_rate": 8.355163911925672e-06, "loss": 0.9811, "step": 15020 }, { "epoch": 0.9083217501661933, "grad_norm": 1.1868930183977866, "learning_rate": 8.352693529618773e-06, "loss": 1.0003, "step": 15030 }, { "epoch": 0.9089260893213271, "grad_norm": 1.1441811008731064, "learning_rate": 8.350221659380749e-06, "loss": 0.9765, "step": 15040 }, { "epoch": 0.909530428476461, "grad_norm": 1.2854872430282722, "learning_rate": 8.347748302308616e-06, "loss": 0.9679, "step": 15050 }, { "epoch": 0.9101347676315948, "grad_norm": 1.305410285187446, "learning_rate": 8.345273459500055e-06, "loss": 0.9696, "step": 15060 }, { "epoch": 0.9107391067867288, "grad_norm": 1.4112752533252417, "learning_rate": 8.342797132053406e-06, "loss": 0.981, "step": 15070 }, { "epoch": 0.9113434459418626, "grad_norm": 1.3399073634525698, "learning_rate": 8.340319321067668e-06, "loss": 0.9652, "step": 15080 }, { "epoch": 0.9119477850969965, "grad_norm": 1.3063670610413907, "learning_rate": 8.337840027642496e-06, "loss": 0.9695, "step": 15090 }, { "epoch": 0.9125521242521303, "grad_norm": 0.8581451886576797, "learning_rate": 8.335359252878208e-06, "loss": 0.9829, "step": 15100 }, { "epoch": 0.9131564634072642, "grad_norm": 0.7855715403061322, "learning_rate": 8.332876997875772e-06, "loss": 0.9806, "step": 15110 }, { "epoch": 0.913760802562398, "grad_norm": 0.7467948355223356, "learning_rate": 8.33039326373682e-06, "loss": 0.965, "step": 15120 }, { "epoch": 0.9143651417175319, "grad_norm": 0.7382353306044588, "learning_rate": 8.327908051563636e-06, "loss": 0.9748, "step": 15130 }, { "epoch": 0.9149694808726657, "grad_norm": 0.8474447213633335, "learning_rate": 8.325421362459161e-06, "loss": 0.9932, "step": 15140 }, { "epoch": 0.9155738200277996, "grad_norm": 0.7317917032082506, "learning_rate": 8.322933197526994e-06, "loss": 0.9835, "step": 15150 }, { "epoch": 0.9161781591829334, "grad_norm": 0.7530942217530358, "learning_rate": 8.320443557871385e-06, "loss": 0.9829, "step": 15160 }, { "epoch": 0.9167824983380674, "grad_norm": 0.7154828727948112, "learning_rate": 8.31795244459724e-06, "loss": 0.9773, "step": 15170 }, { "epoch": 0.9173868374932012, "grad_norm": 0.7637348413098434, "learning_rate": 8.31545985881012e-06, "loss": 0.9862, "step": 15180 }, { "epoch": 0.9179911766483351, "grad_norm": 0.7479647119333702, "learning_rate": 8.312965801616239e-06, "loss": 0.9525, "step": 15190 }, { "epoch": 0.9185955158034689, "grad_norm": 0.7942328833650443, "learning_rate": 8.310470274122466e-06, "loss": 0.9609, "step": 15200 }, { "epoch": 0.9191998549586028, "grad_norm": 0.805313807375061, "learning_rate": 8.307973277436316e-06, "loss": 0.9721, "step": 15210 }, { "epoch": 0.9198041941137366, "grad_norm": 0.8151444437111519, "learning_rate": 8.305474812665964e-06, "loss": 0.9808, "step": 15220 }, { "epoch": 0.9204085332688705, "grad_norm": 0.7739572996198226, "learning_rate": 8.302974880920232e-06, "loss": 0.9535, "step": 15230 }, { "epoch": 0.9210128724240043, "grad_norm": 0.7971473423220925, "learning_rate": 8.300473483308593e-06, "loss": 0.9848, "step": 15240 }, { "epoch": 0.9216172115791382, "grad_norm": 0.5593937608199296, "learning_rate": 8.297970620941172e-06, "loss": 0.9633, "step": 15250 }, { "epoch": 0.922221550734272, "grad_norm": 0.5615530705752211, "learning_rate": 8.295466294928742e-06, "loss": 0.9906, "step": 15260 }, { "epoch": 0.922825889889406, "grad_norm": 0.5590789840513106, "learning_rate": 8.292960506382732e-06, "loss": 0.9785, "step": 15270 }, { "epoch": 0.9234302290445398, "grad_norm": 0.5291157134336621, "learning_rate": 8.290453256415209e-06, "loss": 0.9562, "step": 15280 }, { "epoch": 0.9240345681996737, "grad_norm": 0.5409264386781765, "learning_rate": 8.287944546138902e-06, "loss": 0.9809, "step": 15290 }, { "epoch": 0.9246389073548075, "grad_norm": 0.5968634586693193, "learning_rate": 8.285434376667173e-06, "loss": 0.9751, "step": 15300 }, { "epoch": 0.9252432465099414, "grad_norm": 0.6052672579343464, "learning_rate": 8.282922749114045e-06, "loss": 0.9704, "step": 15310 }, { "epoch": 0.9258475856650752, "grad_norm": 0.5767452870876945, "learning_rate": 8.280409664594182e-06, "loss": 0.9871, "step": 15320 }, { "epoch": 0.9264519248202091, "grad_norm": 0.5943979835389582, "learning_rate": 8.277895124222893e-06, "loss": 0.9507, "step": 15330 }, { "epoch": 0.9270562639753429, "grad_norm": 0.5826660040013454, "learning_rate": 8.275379129116137e-06, "loss": 0.9594, "step": 15340 }, { "epoch": 0.9276606031304768, "grad_norm": 0.9398837479641462, "learning_rate": 8.272861680390516e-06, "loss": 0.9773, "step": 15350 }, { "epoch": 0.9282649422856107, "grad_norm": 0.8369343658449666, "learning_rate": 8.270342779163278e-06, "loss": 0.9626, "step": 15360 }, { "epoch": 0.9288692814407445, "grad_norm": 0.7929105923486776, "learning_rate": 8.267822426552319e-06, "loss": 0.951, "step": 15370 }, { "epoch": 0.9294736205958785, "grad_norm": 0.7720117715997297, "learning_rate": 8.265300623676168e-06, "loss": 0.9613, "step": 15380 }, { "epoch": 0.9300779597510123, "grad_norm": 0.737724355421029, "learning_rate": 8.262777371654015e-06, "loss": 0.9825, "step": 15390 }, { "epoch": 0.9306822989061462, "grad_norm": 0.4792498970466182, "learning_rate": 8.260252671605678e-06, "loss": 0.9938, "step": 15400 }, { "epoch": 0.93128663806128, "grad_norm": 0.49639666083524897, "learning_rate": 8.257726524651623e-06, "loss": 0.989, "step": 15410 }, { "epoch": 0.9318909772164139, "grad_norm": 0.4879334070101319, "learning_rate": 8.25519893191296e-06, "loss": 0.9862, "step": 15420 }, { "epoch": 0.9324953163715477, "grad_norm": 0.5236490326331864, "learning_rate": 8.25266989451144e-06, "loss": 0.9712, "step": 15430 }, { "epoch": 0.9330996555266816, "grad_norm": 0.49138545746913104, "learning_rate": 8.250139413569452e-06, "loss": 0.9815, "step": 15440 }, { "epoch": 0.9337039946818154, "grad_norm": 0.4874412389398828, "learning_rate": 8.247607490210026e-06, "loss": 0.9818, "step": 15450 }, { "epoch": 0.9343083338369493, "grad_norm": 0.5483962683415574, "learning_rate": 8.24507412555684e-06, "loss": 0.9542, "step": 15460 }, { "epoch": 0.9349126729920831, "grad_norm": 0.45861596009021244, "learning_rate": 8.2425393207342e-06, "loss": 0.9741, "step": 15470 }, { "epoch": 0.935517012147217, "grad_norm": 0.48667606147713494, "learning_rate": 8.240003076867058e-06, "loss": 0.9665, "step": 15480 }, { "epoch": 0.9361213513023509, "grad_norm": 0.4862374408145362, "learning_rate": 8.237465395081006e-06, "loss": 0.9853, "step": 15490 }, { "epoch": 0.9367256904574848, "grad_norm": 0.49319242139667385, "learning_rate": 8.234926276502267e-06, "loss": 1.0055, "step": 15500 }, { "epoch": 0.9373300296126186, "grad_norm": 0.48575364669378046, "learning_rate": 8.232385722257711e-06, "loss": 0.9786, "step": 15510 }, { "epoch": 0.9379343687677525, "grad_norm": 0.5064431072585477, "learning_rate": 8.229843733474838e-06, "loss": 0.9661, "step": 15520 }, { "epoch": 0.9385387079228863, "grad_norm": 0.5210995718594443, "learning_rate": 8.227300311281786e-06, "loss": 0.9867, "step": 15530 }, { "epoch": 0.9391430470780202, "grad_norm": 0.520543236604751, "learning_rate": 8.224755456807331e-06, "loss": 0.9645, "step": 15540 }, { "epoch": 0.939747386233154, "grad_norm": 0.510032001989098, "learning_rate": 8.222209171180883e-06, "loss": 0.9837, "step": 15550 }, { "epoch": 0.9403517253882879, "grad_norm": 0.5066937166384391, "learning_rate": 8.21966145553249e-06, "loss": 0.9769, "step": 15560 }, { "epoch": 0.9409560645434217, "grad_norm": 0.47529590926820436, "learning_rate": 8.217112310992831e-06, "loss": 0.964, "step": 15570 }, { "epoch": 0.9415604036985556, "grad_norm": 0.5110087145423996, "learning_rate": 8.214561738693223e-06, "loss": 1.0113, "step": 15580 }, { "epoch": 0.9421647428536895, "grad_norm": 0.48570477342435137, "learning_rate": 8.212009739765611e-06, "loss": 0.9636, "step": 15590 }, { "epoch": 0.9427690820088234, "grad_norm": 0.49986980981019213, "learning_rate": 8.209456315342578e-06, "loss": 0.9752, "step": 15600 }, { "epoch": 0.9433734211639572, "grad_norm": 0.4982489644343123, "learning_rate": 8.206901466557339e-06, "loss": 0.9833, "step": 15610 }, { "epoch": 0.9439777603190911, "grad_norm": 0.4925601414432979, "learning_rate": 8.204345194543737e-06, "loss": 0.9526, "step": 15620 }, { "epoch": 0.9445820994742249, "grad_norm": 0.502427741366534, "learning_rate": 8.201787500436255e-06, "loss": 0.9791, "step": 15630 }, { "epoch": 0.9451864386293588, "grad_norm": 0.4837334195618305, "learning_rate": 8.199228385369997e-06, "loss": 0.9782, "step": 15640 }, { "epoch": 0.9457907777844926, "grad_norm": 0.5118359169968741, "learning_rate": 8.196667850480705e-06, "loss": 0.9898, "step": 15650 }, { "epoch": 0.9463951169396265, "grad_norm": 0.5063133883805043, "learning_rate": 8.194105896904752e-06, "loss": 0.9777, "step": 15660 }, { "epoch": 0.9469994560947603, "grad_norm": 0.4863301039086336, "learning_rate": 8.19154252577913e-06, "loss": 0.9626, "step": 15670 }, { "epoch": 0.9476037952498942, "grad_norm": 0.6063423950239919, "learning_rate": 8.188977738241475e-06, "loss": 0.9454, "step": 15680 }, { "epoch": 0.948208134405028, "grad_norm": 0.5343329334189985, "learning_rate": 8.18641153543004e-06, "loss": 0.967, "step": 15690 }, { "epoch": 0.948812473560162, "grad_norm": 0.555430421579258, "learning_rate": 8.183843918483707e-06, "loss": 1.0006, "step": 15700 }, { "epoch": 0.9494168127152959, "grad_norm": 0.6011406882443133, "learning_rate": 8.181274888541996e-06, "loss": 0.9739, "step": 15710 }, { "epoch": 0.9500211518704297, "grad_norm": 0.6132581360553538, "learning_rate": 8.178704446745042e-06, "loss": 1.0113, "step": 15720 }, { "epoch": 0.9506254910255636, "grad_norm": 0.5644882398869236, "learning_rate": 8.176132594233615e-06, "loss": 0.9852, "step": 15730 }, { "epoch": 0.9512298301806974, "grad_norm": 0.5631525429681971, "learning_rate": 8.173559332149105e-06, "loss": 0.9667, "step": 15740 }, { "epoch": 0.9518341693358313, "grad_norm": 0.7062211408913227, "learning_rate": 8.17098466163353e-06, "loss": 0.9962, "step": 15750 }, { "epoch": 0.9524385084909651, "grad_norm": 0.6601969274334052, "learning_rate": 8.168408583829534e-06, "loss": 0.9723, "step": 15760 }, { "epoch": 0.953042847646099, "grad_norm": 0.6670435824440197, "learning_rate": 8.165831099880384e-06, "loss": 0.9648, "step": 15770 }, { "epoch": 0.9536471868012328, "grad_norm": 0.6619443844641391, "learning_rate": 8.163252210929973e-06, "loss": 0.9806, "step": 15780 }, { "epoch": 0.9542515259563668, "grad_norm": 0.6414183110946925, "learning_rate": 8.160671918122815e-06, "loss": 0.973, "step": 15790 }, { "epoch": 0.9548558651115006, "grad_norm": 0.5766717035562999, "learning_rate": 8.158090222604052e-06, "loss": 0.9957, "step": 15800 }, { "epoch": 0.9554602042666345, "grad_norm": 0.5956217786900823, "learning_rate": 8.155507125519438e-06, "loss": 0.9618, "step": 15810 }, { "epoch": 0.9560645434217683, "grad_norm": 0.5895176817887027, "learning_rate": 8.152922628015363e-06, "loss": 0.964, "step": 15820 }, { "epoch": 0.9566688825769022, "grad_norm": 0.6001932027160904, "learning_rate": 8.150336731238826e-06, "loss": 0.9883, "step": 15830 }, { "epoch": 0.957273221732036, "grad_norm": 0.5776169600082943, "learning_rate": 8.147749436337455e-06, "loss": 0.9806, "step": 15840 }, { "epoch": 0.9578775608871699, "grad_norm": 0.7670392590266749, "learning_rate": 8.145160744459497e-06, "loss": 0.9665, "step": 15850 }, { "epoch": 0.9584819000423037, "grad_norm": 0.7478739127084156, "learning_rate": 8.142570656753816e-06, "loss": 0.9838, "step": 15860 }, { "epoch": 0.9590862391974376, "grad_norm": 0.7515754514708176, "learning_rate": 8.139979174369898e-06, "loss": 0.9662, "step": 15870 }, { "epoch": 0.9596905783525714, "grad_norm": 0.791726205386205, "learning_rate": 8.137386298457847e-06, "loss": 0.9794, "step": 15880 }, { "epoch": 0.9602949175077053, "grad_norm": 0.7376190131207087, "learning_rate": 8.134792030168386e-06, "loss": 0.9614, "step": 15890 }, { "epoch": 0.9608992566628392, "grad_norm": 0.7986479819326139, "learning_rate": 8.132196370652855e-06, "loss": 1.0005, "step": 15900 }, { "epoch": 0.9615035958179731, "grad_norm": 0.7598894944051914, "learning_rate": 8.129599321063214e-06, "loss": 0.9723, "step": 15910 }, { "epoch": 0.9621079349731069, "grad_norm": 0.7661687428972335, "learning_rate": 8.127000882552036e-06, "loss": 0.971, "step": 15920 }, { "epoch": 0.9627122741282408, "grad_norm": 0.7380062715591414, "learning_rate": 8.124401056272513e-06, "loss": 0.9664, "step": 15930 }, { "epoch": 0.9633166132833746, "grad_norm": 0.7148929039572601, "learning_rate": 8.121799843378452e-06, "loss": 0.9599, "step": 15940 }, { "epoch": 0.9639209524385085, "grad_norm": 1.1548965140777823, "learning_rate": 8.119197245024279e-06, "loss": 0.9704, "step": 15950 }, { "epoch": 0.9645252915936423, "grad_norm": 1.1355189202619795, "learning_rate": 8.116593262365027e-06, "loss": 0.9514, "step": 15960 }, { "epoch": 0.9651296307487762, "grad_norm": 1.1744731134148332, "learning_rate": 8.11398789655635e-06, "loss": 0.961, "step": 15970 }, { "epoch": 0.96573396990391, "grad_norm": 1.2228545136055289, "learning_rate": 8.111381148754515e-06, "loss": 0.9806, "step": 15980 }, { "epoch": 0.9663383090590439, "grad_norm": 1.1663052920384902, "learning_rate": 8.108773020116398e-06, "loss": 0.9583, "step": 15990 }, { "epoch": 0.9669426482141777, "grad_norm": 0.9580314185195974, "learning_rate": 8.106163511799495e-06, "loss": 0.9837, "step": 16000 }, { "epoch": 0.9675469873693117, "grad_norm": 0.9008400535684915, "learning_rate": 8.103552624961907e-06, "loss": 0.9615, "step": 16010 }, { "epoch": 0.9681513265244455, "grad_norm": 1.240721520907658, "learning_rate": 8.100940360762352e-06, "loss": 0.9666, "step": 16020 }, { "epoch": 0.9687556656795794, "grad_norm": 0.6993395308165153, "learning_rate": 8.098326720360155e-06, "loss": 0.9863, "step": 16030 }, { "epoch": 0.9693600048347133, "grad_norm": 1.1569674427667267, "learning_rate": 8.095711704915257e-06, "loss": 0.9613, "step": 16040 }, { "epoch": 0.9699643439898471, "grad_norm": 2.5312024947088116, "learning_rate": 8.093095315588205e-06, "loss": 0.9975, "step": 16050 }, { "epoch": 0.970568683144981, "grad_norm": 2.6365482277837304, "learning_rate": 8.090477553540155e-06, "loss": 0.9748, "step": 16060 }, { "epoch": 0.9711730223001148, "grad_norm": 2.306362014830411, "learning_rate": 8.087858419932876e-06, "loss": 1.0037, "step": 16070 }, { "epoch": 0.9717773614552487, "grad_norm": 2.633887786204463, "learning_rate": 8.085237915928747e-06, "loss": 0.9898, "step": 16080 }, { "epoch": 0.9723817006103825, "grad_norm": 2.2361695271247406, "learning_rate": 8.08261604269075e-06, "loss": 0.9712, "step": 16090 }, { "epoch": 0.9729860397655165, "grad_norm": 0.9027673205533038, "learning_rate": 8.079992801382473e-06, "loss": 0.9864, "step": 16100 }, { "epoch": 0.9735903789206503, "grad_norm": 0.9485300360821619, "learning_rate": 8.07736819316812e-06, "loss": 0.9863, "step": 16110 }, { "epoch": 0.9741947180757842, "grad_norm": 0.8996714752716645, "learning_rate": 8.074742219212495e-06, "loss": 0.9629, "step": 16120 }, { "epoch": 0.974799057230918, "grad_norm": 1.0196912605373736, "learning_rate": 8.072114880681008e-06, "loss": 0.9647, "step": 16130 }, { "epoch": 0.9754033963860519, "grad_norm": 0.8731143251895254, "learning_rate": 8.069486178739677e-06, "loss": 0.9737, "step": 16140 }, { "epoch": 0.9760077355411857, "grad_norm": 0.9399427677200887, "learning_rate": 8.066856114555128e-06, "loss": 0.9727, "step": 16150 }, { "epoch": 0.9766120746963196, "grad_norm": 0.9481263278394423, "learning_rate": 8.064224689294584e-06, "loss": 1.0035, "step": 16160 }, { "epoch": 0.9772164138514534, "grad_norm": 0.9791490851971432, "learning_rate": 8.061591904125878e-06, "loss": 0.9685, "step": 16170 }, { "epoch": 0.9778207530065873, "grad_norm": 0.9356403527625788, "learning_rate": 8.058957760217445e-06, "loss": 0.9725, "step": 16180 }, { "epoch": 0.9784250921617211, "grad_norm": 0.9882829100865731, "learning_rate": 8.05632225873832e-06, "loss": 0.9703, "step": 16190 }, { "epoch": 0.979029431316855, "grad_norm": 1.1818364373883143, "learning_rate": 8.053685400858145e-06, "loss": 0.9428, "step": 16200 }, { "epoch": 0.9796337704719889, "grad_norm": 1.0711103626098628, "learning_rate": 8.05104718774716e-06, "loss": 0.9872, "step": 16210 }, { "epoch": 0.9802381096271228, "grad_norm": 1.0681541962507033, "learning_rate": 8.048407620576214e-06, "loss": 0.965, "step": 16220 }, { "epoch": 0.9808424487822566, "grad_norm": 1.0982852366265872, "learning_rate": 8.045766700516744e-06, "loss": 0.9889, "step": 16230 }, { "epoch": 0.9814467879373905, "grad_norm": 1.1195015634134953, "learning_rate": 8.043124428740798e-06, "loss": 0.9883, "step": 16240 }, { "epoch": 0.9820511270925243, "grad_norm": 1.0497449594996697, "learning_rate": 8.040480806421023e-06, "loss": 1.0138, "step": 16250 }, { "epoch": 0.9826554662476582, "grad_norm": 1.0369912334742362, "learning_rate": 8.037835834730661e-06, "loss": 0.9835, "step": 16260 }, { "epoch": 0.983259805402792, "grad_norm": 0.9974874217628756, "learning_rate": 8.035189514843553e-06, "loss": 0.9568, "step": 16270 }, { "epoch": 0.9838641445579259, "grad_norm": 1.071505687372954, "learning_rate": 8.032541847934145e-06, "loss": 0.963, "step": 16280 }, { "epoch": 0.9844684837130597, "grad_norm": 1.006831916981404, "learning_rate": 8.029892835177473e-06, "loss": 0.9906, "step": 16290 }, { "epoch": 0.9850728228681936, "grad_norm": 1.0728103298911376, "learning_rate": 8.027242477749172e-06, "loss": 0.9765, "step": 16300 }, { "epoch": 0.9856771620233274, "grad_norm": 1.1279953282506783, "learning_rate": 8.02459077682548e-06, "loss": 1.0026, "step": 16310 }, { "epoch": 0.9862815011784614, "grad_norm": 1.1377071729682078, "learning_rate": 8.021937733583223e-06, "loss": 0.9762, "step": 16320 }, { "epoch": 0.9868858403335952, "grad_norm": 1.1023832208894342, "learning_rate": 8.019283349199827e-06, "loss": 0.9777, "step": 16330 }, { "epoch": 0.9874901794887291, "grad_norm": 1.1830416503673535, "learning_rate": 8.016627624853311e-06, "loss": 0.9582, "step": 16340 }, { "epoch": 0.9880945186438629, "grad_norm": 0.9270252595130956, "learning_rate": 8.01397056172229e-06, "loss": 0.9931, "step": 16350 }, { "epoch": 0.9886988577989968, "grad_norm": 0.917664965218076, "learning_rate": 8.011312160985976e-06, "loss": 0.9845, "step": 16360 }, { "epoch": 0.9893031969541306, "grad_norm": 1.0182963867610455, "learning_rate": 8.00865242382417e-06, "loss": 0.9831, "step": 16370 }, { "epoch": 0.9899075361092645, "grad_norm": 0.9677862270377444, "learning_rate": 8.005991351417266e-06, "loss": 0.9783, "step": 16380 }, { "epoch": 0.9905118752643984, "grad_norm": 0.9756658582285264, "learning_rate": 8.003328944946256e-06, "loss": 0.98, "step": 16390 }, { "epoch": 0.9911162144195322, "grad_norm": 0.9898636351662876, "learning_rate": 8.00066520559272e-06, "loss": 0.9975, "step": 16400 }, { "epoch": 0.9917205535746662, "grad_norm": 1.1825132286439701, "learning_rate": 7.998000134538826e-06, "loss": 0.9757, "step": 16410 }, { "epoch": 0.9923248927298, "grad_norm": 1.0438803315668894, "learning_rate": 7.995333732967342e-06, "loss": 0.995, "step": 16420 }, { "epoch": 0.9929292318849339, "grad_norm": 1.0611379895197603, "learning_rate": 7.99266600206162e-06, "loss": 0.9482, "step": 16430 }, { "epoch": 0.9935335710400677, "grad_norm": 0.968777919224612, "learning_rate": 7.989996943005602e-06, "loss": 0.9752, "step": 16440 }, { "epoch": 0.9941379101952016, "grad_norm": 1.0257813939329161, "learning_rate": 7.987326556983823e-06, "loss": 0.9553, "step": 16450 }, { "epoch": 0.9947422493503354, "grad_norm": 1.0253543776364944, "learning_rate": 7.984654845181404e-06, "loss": 0.9808, "step": 16460 }, { "epoch": 0.9953465885054693, "grad_norm": 0.9311508087482454, "learning_rate": 7.981981808784054e-06, "loss": 1.0002, "step": 16470 }, { "epoch": 0.9959509276606031, "grad_norm": 0.8827879448771927, "learning_rate": 7.979307448978075e-06, "loss": 0.9839, "step": 16480 }, { "epoch": 0.996555266815737, "grad_norm": 0.8781862377620872, "learning_rate": 7.976631766950348e-06, "loss": 0.9873, "step": 16490 }, { "epoch": 0.9971596059708708, "grad_norm": 1.117524151394854, "learning_rate": 7.973954763888347e-06, "loss": 0.9604, "step": 16500 }, { "epoch": 0.9977639451260047, "grad_norm": 1.1513608596965998, "learning_rate": 7.971276440980133e-06, "loss": 0.9668, "step": 16510 }, { "epoch": 0.9983682842811386, "grad_norm": 1.1082309067546794, "learning_rate": 7.968596799414347e-06, "loss": 0.9759, "step": 16520 }, { "epoch": 0.9989726234362725, "grad_norm": 1.116730405972857, "learning_rate": 7.965915840380219e-06, "loss": 1.0019, "step": 16530 }, { "epoch": 0.9995769625914063, "grad_norm": 1.097591218971702, "learning_rate": 7.963233565067565e-06, "loss": 0.9832, "step": 16540 }, { "epoch": 1.0001813017465402, "grad_norm": 1.0222076559815056, "learning_rate": 7.96054997466678e-06, "loss": 0.9656, "step": 16550 }, { "epoch": 1.000785640901674, "grad_norm": 1.1324279892713336, "learning_rate": 7.95786507036885e-06, "loss": 0.9392, "step": 16560 }, { "epoch": 1.0013899800568078, "grad_norm": 1.1583391945287789, "learning_rate": 7.955178853365338e-06, "loss": 0.9357, "step": 16570 }, { "epoch": 1.0019943192119418, "grad_norm": 1.0553472594753273, "learning_rate": 7.952491324848392e-06, "loss": 0.9361, "step": 16580 }, { "epoch": 1.0025986583670756, "grad_norm": 0.9992459487305569, "learning_rate": 7.94980248601074e-06, "loss": 0.9129, "step": 16590 }, { "epoch": 1.0032029975222094, "grad_norm": 1.1827744975887033, "learning_rate": 7.947112338045698e-06, "loss": 0.9187, "step": 16600 }, { "epoch": 1.0038073366773432, "grad_norm": 1.106611197017621, "learning_rate": 7.944420882147153e-06, "loss": 0.9344, "step": 16610 }, { "epoch": 1.0044116758324773, "grad_norm": 1.215207228383985, "learning_rate": 7.941728119509582e-06, "loss": 0.9387, "step": 16620 }, { "epoch": 1.005016014987611, "grad_norm": 1.1347656254059526, "learning_rate": 7.939034051328034e-06, "loss": 0.9402, "step": 16630 }, { "epoch": 1.0056203541427449, "grad_norm": 1.2085534661289723, "learning_rate": 7.936338678798143e-06, "loss": 0.9462, "step": 16640 }, { "epoch": 1.0062246932978787, "grad_norm": 1.4027098713260302, "learning_rate": 7.93364200311612e-06, "loss": 0.9377, "step": 16650 }, { "epoch": 1.0068290324530127, "grad_norm": 1.384610909662134, "learning_rate": 7.930944025478755e-06, "loss": 0.9426, "step": 16660 }, { "epoch": 1.0074333716081465, "grad_norm": 1.4810983091198728, "learning_rate": 7.928244747083414e-06, "loss": 0.924, "step": 16670 }, { "epoch": 1.0080377107632803, "grad_norm": 1.3867969630647643, "learning_rate": 7.92554416912804e-06, "loss": 0.9265, "step": 16680 }, { "epoch": 1.008642049918414, "grad_norm": 1.4354481910321326, "learning_rate": 7.922842292811155e-06, "loss": 0.9474, "step": 16690 }, { "epoch": 1.0092463890735481, "grad_norm": 0.8792760693545016, "learning_rate": 7.920139119331859e-06, "loss": 0.9709, "step": 16700 }, { "epoch": 1.009850728228682, "grad_norm": 0.877800870610991, "learning_rate": 7.917434649889823e-06, "loss": 0.9437, "step": 16710 }, { "epoch": 1.0104550673838157, "grad_norm": 0.830735823659445, "learning_rate": 7.914728885685294e-06, "loss": 0.9241, "step": 16720 }, { "epoch": 1.0110594065389498, "grad_norm": 0.8530426531503883, "learning_rate": 7.912021827919097e-06, "loss": 0.9646, "step": 16730 }, { "epoch": 1.0116637456940836, "grad_norm": 0.88664475936253, "learning_rate": 7.909313477792627e-06, "loss": 0.9387, "step": 16740 }, { "epoch": 1.0122680848492174, "grad_norm": 0.7367816222313015, "learning_rate": 7.906603836507853e-06, "loss": 0.9344, "step": 16750 }, { "epoch": 1.0128724240043512, "grad_norm": 0.7859491510043656, "learning_rate": 7.903892905267323e-06, "loss": 0.9416, "step": 16760 }, { "epoch": 1.0134767631594852, "grad_norm": 0.7543168368386722, "learning_rate": 7.90118068527415e-06, "loss": 0.9353, "step": 16770 }, { "epoch": 1.014081102314619, "grad_norm": 0.7930047723096081, "learning_rate": 7.898467177732022e-06, "loss": 0.9544, "step": 16780 }, { "epoch": 1.0146854414697528, "grad_norm": 0.793617635036497, "learning_rate": 7.895752383845201e-06, "loss": 0.9378, "step": 16790 }, { "epoch": 1.0152897806248866, "grad_norm": 0.8769535221494227, "learning_rate": 7.893036304818513e-06, "loss": 0.9196, "step": 16800 }, { "epoch": 1.0158941197800206, "grad_norm": 0.8206136583910835, "learning_rate": 7.890318941857361e-06, "loss": 0.925, "step": 16810 }, { "epoch": 1.0164984589351544, "grad_norm": 0.7938365201088614, "learning_rate": 7.887600296167715e-06, "loss": 0.9397, "step": 16820 }, { "epoch": 1.0171027980902883, "grad_norm": 0.7925782897904219, "learning_rate": 7.884880368956113e-06, "loss": 0.9332, "step": 16830 }, { "epoch": 1.017707137245422, "grad_norm": 0.7908502049314249, "learning_rate": 7.882159161429667e-06, "loss": 0.9308, "step": 16840 }, { "epoch": 1.018311476400556, "grad_norm": 0.5810920604705304, "learning_rate": 7.879436674796051e-06, "loss": 0.9241, "step": 16850 }, { "epoch": 1.0189158155556899, "grad_norm": 0.5660654629161752, "learning_rate": 7.876712910263506e-06, "loss": 0.9467, "step": 16860 }, { "epoch": 1.0195201547108237, "grad_norm": 0.6036814742101025, "learning_rate": 7.873987869040851e-06, "loss": 0.9597, "step": 16870 }, { "epoch": 1.0201244938659575, "grad_norm": 0.5723685025171404, "learning_rate": 7.871261552337458e-06, "loss": 0.9177, "step": 16880 }, { "epoch": 1.0207288330210915, "grad_norm": 0.6122230853641085, "learning_rate": 7.868533961363272e-06, "loss": 0.933, "step": 16890 }, { "epoch": 1.0213331721762253, "grad_norm": 0.6092007285623798, "learning_rate": 7.865805097328804e-06, "loss": 0.9155, "step": 16900 }, { "epoch": 1.0219375113313591, "grad_norm": 0.6326732563963001, "learning_rate": 7.86307496144513e-06, "loss": 0.9256, "step": 16910 }, { "epoch": 1.022541850486493, "grad_norm": 0.6236000190709885, "learning_rate": 7.860343554923884e-06, "loss": 0.9498, "step": 16920 }, { "epoch": 1.023146189641627, "grad_norm": 0.6126817686032936, "learning_rate": 7.857610878977273e-06, "loss": 0.95, "step": 16930 }, { "epoch": 1.0237505287967608, "grad_norm": 0.6285983823142869, "learning_rate": 7.854876934818064e-06, "loss": 0.9264, "step": 16940 }, { "epoch": 1.0243548679518946, "grad_norm": 0.8143835637091942, "learning_rate": 7.85214172365958e-06, "loss": 0.9494, "step": 16950 }, { "epoch": 1.0249592071070284, "grad_norm": 0.8357300452676808, "learning_rate": 7.84940524671572e-06, "loss": 0.9428, "step": 16960 }, { "epoch": 1.0255635462621624, "grad_norm": 0.8789353994651676, "learning_rate": 7.846667505200931e-06, "loss": 0.9587, "step": 16970 }, { "epoch": 1.0261678854172962, "grad_norm": 0.8293078770478578, "learning_rate": 7.843928500330234e-06, "loss": 0.9203, "step": 16980 }, { "epoch": 1.02677222457243, "grad_norm": 0.8425941491537405, "learning_rate": 7.841188233319197e-06, "loss": 0.942, "step": 16990 }, { "epoch": 1.0273765637275638, "grad_norm": 0.5866636262704408, "learning_rate": 7.838446705383958e-06, "loss": 0.9254, "step": 17000 }, { "epoch": 1.0279809028826978, "grad_norm": 0.5659161865147383, "learning_rate": 7.835703917741213e-06, "loss": 0.9424, "step": 17010 }, { "epoch": 1.0285852420378316, "grad_norm": 0.5326252176534534, "learning_rate": 7.832959871608216e-06, "loss": 0.9425, "step": 17020 }, { "epoch": 1.0291895811929654, "grad_norm": 0.5291294681712005, "learning_rate": 7.830214568202777e-06, "loss": 0.948, "step": 17030 }, { "epoch": 1.0297939203480992, "grad_norm": 0.5418078807948797, "learning_rate": 7.827468008743269e-06, "loss": 0.9635, "step": 17040 }, { "epoch": 1.0303982595032333, "grad_norm": 0.5305533046485595, "learning_rate": 7.824720194448617e-06, "loss": 0.9186, "step": 17050 }, { "epoch": 1.031002598658367, "grad_norm": 0.5329434727869929, "learning_rate": 7.821971126538308e-06, "loss": 0.9312, "step": 17060 }, { "epoch": 1.0316069378135009, "grad_norm": 0.5502276447201018, "learning_rate": 7.819220806232381e-06, "loss": 0.9439, "step": 17070 }, { "epoch": 1.032211276968635, "grad_norm": 0.5500431855661826, "learning_rate": 7.816469234751436e-06, "loss": 0.9628, "step": 17080 }, { "epoch": 1.0328156161237687, "grad_norm": 0.5290719256049004, "learning_rate": 7.813716413316621e-06, "loss": 0.9596, "step": 17090 }, { "epoch": 1.0334199552789025, "grad_norm": 0.5387557996000338, "learning_rate": 7.810962343149647e-06, "loss": 0.9512, "step": 17100 }, { "epoch": 1.0340242944340363, "grad_norm": 0.5536744671060361, "learning_rate": 7.808207025472773e-06, "loss": 0.9343, "step": 17110 }, { "epoch": 1.0346286335891703, "grad_norm": 0.5307958362077472, "learning_rate": 7.805450461508812e-06, "loss": 0.9232, "step": 17120 }, { "epoch": 1.0352329727443041, "grad_norm": 0.5141975412793283, "learning_rate": 7.802692652481135e-06, "loss": 0.937, "step": 17130 }, { "epoch": 1.035837311899438, "grad_norm": 0.5673089058993276, "learning_rate": 7.799933599613661e-06, "loss": 0.9383, "step": 17140 }, { "epoch": 1.0364416510545718, "grad_norm": 0.4963958181023315, "learning_rate": 7.797173304130861e-06, "loss": 0.9198, "step": 17150 }, { "epoch": 1.0370459902097058, "grad_norm": 0.5188908573139808, "learning_rate": 7.794411767257763e-06, "loss": 0.904, "step": 17160 }, { "epoch": 1.0376503293648396, "grad_norm": 0.5139287859896396, "learning_rate": 7.791648990219936e-06, "loss": 0.9144, "step": 17170 }, { "epoch": 1.0382546685199734, "grad_norm": 0.5580327050618912, "learning_rate": 7.78888497424351e-06, "loss": 0.9203, "step": 17180 }, { "epoch": 1.0388590076751072, "grad_norm": 0.5278198038472391, "learning_rate": 7.786119720555158e-06, "loss": 0.9252, "step": 17190 }, { "epoch": 1.0394633468302412, "grad_norm": 0.5591315816805235, "learning_rate": 7.783353230382105e-06, "loss": 0.9067, "step": 17200 }, { "epoch": 1.040067685985375, "grad_norm": 0.5357427652993897, "learning_rate": 7.780585504952122e-06, "loss": 0.9395, "step": 17210 }, { "epoch": 1.0406720251405088, "grad_norm": 0.5134222025354997, "learning_rate": 7.777816545493534e-06, "loss": 0.9435, "step": 17220 }, { "epoch": 1.0412763642956426, "grad_norm": 0.5493819771337071, "learning_rate": 7.775046353235206e-06, "loss": 0.919, "step": 17230 }, { "epoch": 1.0418807034507767, "grad_norm": 0.534374848403984, "learning_rate": 7.772274929406556e-06, "loss": 0.945, "step": 17240 }, { "epoch": 1.0424850426059105, "grad_norm": 0.5265338350839955, "learning_rate": 7.769502275237548e-06, "loss": 0.9466, "step": 17250 }, { "epoch": 1.0430893817610443, "grad_norm": 0.5657623021518893, "learning_rate": 7.766728391958689e-06, "loss": 0.9243, "step": 17260 }, { "epoch": 1.043693720916178, "grad_norm": 0.5416678425204373, "learning_rate": 7.763953280801033e-06, "loss": 0.9394, "step": 17270 }, { "epoch": 1.044298060071312, "grad_norm": 0.5540016282585845, "learning_rate": 7.761176942996178e-06, "loss": 0.9398, "step": 17280 }, { "epoch": 1.044902399226446, "grad_norm": 0.5735650702516203, "learning_rate": 7.75839937977627e-06, "loss": 0.9326, "step": 17290 }, { "epoch": 1.0455067383815797, "grad_norm": 0.6312286886898297, "learning_rate": 7.755620592373996e-06, "loss": 0.9282, "step": 17300 }, { "epoch": 1.0461110775367135, "grad_norm": 0.6334737381977422, "learning_rate": 7.752840582022585e-06, "loss": 0.9255, "step": 17310 }, { "epoch": 1.0467154166918475, "grad_norm": 0.654917849959172, "learning_rate": 7.750059349955813e-06, "loss": 0.9452, "step": 17320 }, { "epoch": 1.0473197558469813, "grad_norm": 0.6271134174635833, "learning_rate": 7.747276897407991e-06, "loss": 0.931, "step": 17330 }, { "epoch": 1.0479240950021151, "grad_norm": 0.6213932038944121, "learning_rate": 7.74449322561398e-06, "loss": 0.9367, "step": 17340 }, { "epoch": 1.048528434157249, "grad_norm": 0.6842261683524189, "learning_rate": 7.741708335809178e-06, "loss": 0.9407, "step": 17350 }, { "epoch": 1.049132773312383, "grad_norm": 0.7512335733068148, "learning_rate": 7.738922229229523e-06, "loss": 0.9677, "step": 17360 }, { "epoch": 1.0497371124675168, "grad_norm": 0.6636142611915227, "learning_rate": 7.736134907111492e-06, "loss": 0.9028, "step": 17370 }, { "epoch": 1.0503414516226506, "grad_norm": 0.719226792283849, "learning_rate": 7.733346370692108e-06, "loss": 0.9278, "step": 17380 }, { "epoch": 1.0509457907777846, "grad_norm": 0.652187295856004, "learning_rate": 7.730556621208925e-06, "loss": 0.9351, "step": 17390 }, { "epoch": 1.0515501299329184, "grad_norm": 0.6589839148089985, "learning_rate": 7.727765659900037e-06, "loss": 0.9654, "step": 17400 }, { "epoch": 1.0521544690880522, "grad_norm": 0.6159802991459832, "learning_rate": 7.724973488004081e-06, "loss": 0.9108, "step": 17410 }, { "epoch": 1.052758808243186, "grad_norm": 0.6811609900169269, "learning_rate": 7.722180106760228e-06, "loss": 0.9287, "step": 17420 }, { "epoch": 1.05336314739832, "grad_norm": 0.6310949608454279, "learning_rate": 7.719385517408182e-06, "loss": 0.9068, "step": 17430 }, { "epoch": 1.0539674865534538, "grad_norm": 0.6288201714726321, "learning_rate": 7.716589721188188e-06, "loss": 0.945, "step": 17440 }, { "epoch": 1.0545718257085877, "grad_norm": 0.7816625286245904, "learning_rate": 7.713792719341025e-06, "loss": 0.9545, "step": 17450 }, { "epoch": 1.0551761648637215, "grad_norm": 0.8997991728124461, "learning_rate": 7.710994513108007e-06, "loss": 0.919, "step": 17460 }, { "epoch": 1.0557805040188555, "grad_norm": 0.8067305936420595, "learning_rate": 7.708195103730985e-06, "loss": 0.9348, "step": 17470 }, { "epoch": 1.0563848431739893, "grad_norm": 0.7829921953321952, "learning_rate": 7.705394492452337e-06, "loss": 0.9414, "step": 17480 }, { "epoch": 1.056989182329123, "grad_norm": 0.7771452511030948, "learning_rate": 7.702592680514987e-06, "loss": 0.9425, "step": 17490 }, { "epoch": 1.057593521484257, "grad_norm": 0.8287772552764033, "learning_rate": 7.699789669162375e-06, "loss": 0.9423, "step": 17500 }, { "epoch": 1.058197860639391, "grad_norm": 0.7865863061813924, "learning_rate": 7.696985459638487e-06, "loss": 0.9389, "step": 17510 }, { "epoch": 1.0588021997945247, "grad_norm": 0.7929486218402757, "learning_rate": 7.694180053187835e-06, "loss": 0.9106, "step": 17520 }, { "epoch": 1.0594065389496585, "grad_norm": 0.7467555816310237, "learning_rate": 7.691373451055465e-06, "loss": 0.9133, "step": 17530 }, { "epoch": 1.0600108781047923, "grad_norm": 0.768235156442969, "learning_rate": 7.688565654486946e-06, "loss": 0.9181, "step": 17540 }, { "epoch": 1.0606152172599264, "grad_norm": 1.2191750462306667, "learning_rate": 7.68575666472839e-06, "loss": 0.9342, "step": 17550 }, { "epoch": 1.0612195564150602, "grad_norm": 1.2345557906807694, "learning_rate": 7.68294648302643e-06, "loss": 0.9288, "step": 17560 }, { "epoch": 1.061823895570194, "grad_norm": 1.2710411362888787, "learning_rate": 7.680135110628226e-06, "loss": 0.9343, "step": 17570 }, { "epoch": 1.0624282347253278, "grad_norm": 1.2138994266559717, "learning_rate": 7.677322548781471e-06, "loss": 0.9271, "step": 17580 }, { "epoch": 1.0630325738804618, "grad_norm": 1.2243402093882982, "learning_rate": 7.674508798734388e-06, "loss": 0.9165, "step": 17590 }, { "epoch": 1.0636369130355956, "grad_norm": 0.9175987045396671, "learning_rate": 7.67169386173572e-06, "loss": 0.9404, "step": 17600 }, { "epoch": 1.0642412521907294, "grad_norm": 0.9958627314941471, "learning_rate": 7.668877739034743e-06, "loss": 0.9311, "step": 17610 }, { "epoch": 1.0648455913458632, "grad_norm": 0.9076448485095544, "learning_rate": 7.666060431881258e-06, "loss": 0.9257, "step": 17620 }, { "epoch": 1.0654499305009972, "grad_norm": 0.9631456639674933, "learning_rate": 7.663241941525587e-06, "loss": 0.9285, "step": 17630 }, { "epoch": 1.066054269656131, "grad_norm": 0.8543313921634847, "learning_rate": 7.660422269218584e-06, "loss": 0.9267, "step": 17640 }, { "epoch": 1.0666586088112648, "grad_norm": 2.401943331264358, "learning_rate": 7.657601416211625e-06, "loss": 0.9548, "step": 17650 }, { "epoch": 1.0672629479663986, "grad_norm": 2.323754105260101, "learning_rate": 7.654779383756606e-06, "loss": 0.9367, "step": 17660 }, { "epoch": 1.0678672871215327, "grad_norm": 2.532305996465686, "learning_rate": 7.651956173105952e-06, "loss": 0.9371, "step": 17670 }, { "epoch": 1.0684716262766665, "grad_norm": 2.6212061626863994, "learning_rate": 7.64913178551261e-06, "loss": 0.9183, "step": 17680 }, { "epoch": 1.0690759654318003, "grad_norm": 2.19291191968829, "learning_rate": 7.646306222230046e-06, "loss": 0.9642, "step": 17690 }, { "epoch": 1.0696803045869343, "grad_norm": 0.9355035176067491, "learning_rate": 7.643479484512251e-06, "loss": 0.9443, "step": 17700 }, { "epoch": 1.070284643742068, "grad_norm": 0.9186575857568213, "learning_rate": 7.640651573613735e-06, "loss": 0.9281, "step": 17710 }, { "epoch": 1.070888982897202, "grad_norm": 0.944859914491086, "learning_rate": 7.63782249078953e-06, "loss": 0.9608, "step": 17720 }, { "epoch": 1.0714933220523357, "grad_norm": 0.9321307889049993, "learning_rate": 7.634992237295188e-06, "loss": 0.9243, "step": 17730 }, { "epoch": 1.0720976612074695, "grad_norm": 0.8925562864372932, "learning_rate": 7.63216081438678e-06, "loss": 0.9356, "step": 17740 }, { "epoch": 1.0727020003626035, "grad_norm": 0.9319896823185124, "learning_rate": 7.629328223320898e-06, "loss": 0.9441, "step": 17750 }, { "epoch": 1.0733063395177374, "grad_norm": 0.9632602171112727, "learning_rate": 7.626494465354648e-06, "loss": 0.9413, "step": 17760 }, { "epoch": 1.0739106786728712, "grad_norm": 0.9679103853513814, "learning_rate": 7.623659541745658e-06, "loss": 0.9113, "step": 17770 }, { "epoch": 1.0745150178280052, "grad_norm": 1.0326943645942648, "learning_rate": 7.620823453752073e-06, "loss": 0.9324, "step": 17780 }, { "epoch": 1.075119356983139, "grad_norm": 1.0656998837801366, "learning_rate": 7.617986202632553e-06, "loss": 0.9476, "step": 17790 }, { "epoch": 1.0757236961382728, "grad_norm": 1.1655298605395468, "learning_rate": 7.615147789646272e-06, "loss": 0.9116, "step": 17800 }, { "epoch": 1.0763280352934066, "grad_norm": 1.0036326709028562, "learning_rate": 7.612308216052929e-06, "loss": 0.9515, "step": 17810 }, { "epoch": 1.0769323744485406, "grad_norm": 1.0340336615446928, "learning_rate": 7.6094674831127245e-06, "loss": 0.9511, "step": 17820 }, { "epoch": 1.0775367136036744, "grad_norm": 0.98383497117508, "learning_rate": 7.606625592086385e-06, "loss": 0.9323, "step": 17830 }, { "epoch": 1.0781410527588082, "grad_norm": 1.0825964160397739, "learning_rate": 7.603782544235146e-06, "loss": 0.9203, "step": 17840 }, { "epoch": 1.078745391913942, "grad_norm": 1.0986722008342131, "learning_rate": 7.600938340820756e-06, "loss": 0.9309, "step": 17850 }, { "epoch": 1.079349731069076, "grad_norm": 1.2457489182430421, "learning_rate": 7.598092983105478e-06, "loss": 0.9285, "step": 17860 }, { "epoch": 1.0799540702242099, "grad_norm": 1.0850639495062309, "learning_rate": 7.595246472352088e-06, "loss": 0.9296, "step": 17870 }, { "epoch": 1.0805584093793437, "grad_norm": 1.0203780363698607, "learning_rate": 7.592398809823869e-06, "loss": 0.9422, "step": 17880 }, { "epoch": 1.0811627485344775, "grad_norm": 1.0045630153118468, "learning_rate": 7.589549996784622e-06, "loss": 0.9469, "step": 17890 }, { "epoch": 1.0817670876896115, "grad_norm": 1.2239430174571362, "learning_rate": 7.586700034498652e-06, "loss": 0.9596, "step": 17900 }, { "epoch": 1.0823714268447453, "grad_norm": 1.1464164762917124, "learning_rate": 7.5838489242307785e-06, "loss": 0.9251, "step": 17910 }, { "epoch": 1.082975765999879, "grad_norm": 1.163584774202159, "learning_rate": 7.58099666724633e-06, "loss": 0.9421, "step": 17920 }, { "epoch": 1.083580105155013, "grad_norm": 1.127372288276386, "learning_rate": 7.578143264811142e-06, "loss": 0.9379, "step": 17930 }, { "epoch": 1.084184444310147, "grad_norm": 1.1477872466713324, "learning_rate": 7.575288718191559e-06, "loss": 0.9132, "step": 17940 }, { "epoch": 1.0847887834652807, "grad_norm": 1.0134197870887063, "learning_rate": 7.572433028654436e-06, "loss": 0.9392, "step": 17950 }, { "epoch": 1.0853931226204145, "grad_norm": 1.0832553901018576, "learning_rate": 7.56957619746713e-06, "loss": 0.9257, "step": 17960 }, { "epoch": 1.0859974617755483, "grad_norm": 1.017673550134316, "learning_rate": 7.566718225897509e-06, "loss": 0.9117, "step": 17970 }, { "epoch": 1.0866018009306824, "grad_norm": 0.9358024022533614, "learning_rate": 7.563859115213945e-06, "loss": 0.9194, "step": 17980 }, { "epoch": 1.0872061400858162, "grad_norm": 0.9594514118907036, "learning_rate": 7.560998866685317e-06, "loss": 0.9089, "step": 17990 }, { "epoch": 1.08781047924095, "grad_norm": 0.9888000686801851, "learning_rate": 7.558137481581008e-06, "loss": 0.9493, "step": 18000 }, { "epoch": 1.0884148183960838, "grad_norm": 1.026695318025682, "learning_rate": 7.555274961170906e-06, "loss": 0.9409, "step": 18010 }, { "epoch": 1.0890191575512178, "grad_norm": 0.9677848897743511, "learning_rate": 7.5524113067254e-06, "loss": 0.9216, "step": 18020 }, { "epoch": 1.0896234967063516, "grad_norm": 1.0562920205313786, "learning_rate": 7.549546519515389e-06, "loss": 0.9432, "step": 18030 }, { "epoch": 1.0902278358614854, "grad_norm": 0.9954802932504392, "learning_rate": 7.546680600812267e-06, "loss": 0.9435, "step": 18040 }, { "epoch": 1.0908321750166192, "grad_norm": 1.0126452259594012, "learning_rate": 7.5438135518879355e-06, "loss": 0.9378, "step": 18050 }, { "epoch": 1.0914365141717532, "grad_norm": 1.013427097497143, "learning_rate": 7.540945374014794e-06, "loss": 0.9463, "step": 18060 }, { "epoch": 1.092040853326887, "grad_norm": 0.9956295192601601, "learning_rate": 7.538076068465746e-06, "loss": 0.9291, "step": 18070 }, { "epoch": 1.0926451924820209, "grad_norm": 0.9452206477910722, "learning_rate": 7.535205636514194e-06, "loss": 0.9426, "step": 18080 }, { "epoch": 1.0932495316371549, "grad_norm": 1.006683965899687, "learning_rate": 7.53233407943404e-06, "loss": 0.9511, "step": 18090 }, { "epoch": 1.0938538707922887, "grad_norm": 1.1452663795381461, "learning_rate": 7.529461398499689e-06, "loss": 0.9258, "step": 18100 }, { "epoch": 1.0944582099474225, "grad_norm": 1.2181680291939803, "learning_rate": 7.526587594986037e-06, "loss": 0.9289, "step": 18110 }, { "epoch": 1.0950625491025563, "grad_norm": 1.0759594597451736, "learning_rate": 7.523712670168486e-06, "loss": 0.9191, "step": 18120 }, { "epoch": 1.0956668882576903, "grad_norm": 1.2041323183873132, "learning_rate": 7.5208366253229316e-06, "loss": 0.9258, "step": 18130 }, { "epoch": 1.0962712274128241, "grad_norm": 1.1827484285363192, "learning_rate": 7.517959461725767e-06, "loss": 0.908, "step": 18140 }, { "epoch": 1.096875566567958, "grad_norm": 1.157332960067366, "learning_rate": 7.5150811806538825e-06, "loss": 0.9362, "step": 18150 }, { "epoch": 1.0974799057230917, "grad_norm": 1.169992858850769, "learning_rate": 7.512201783384665e-06, "loss": 0.9313, "step": 18160 }, { "epoch": 1.0980842448782258, "grad_norm": 1.1282561411447811, "learning_rate": 7.509321271195996e-06, "loss": 0.9665, "step": 18170 }, { "epoch": 1.0986885840333596, "grad_norm": 1.140677578190521, "learning_rate": 7.5064396453662515e-06, "loss": 0.9547, "step": 18180 }, { "epoch": 1.0992929231884934, "grad_norm": 1.0836950332806154, "learning_rate": 7.503556907174301e-06, "loss": 0.9438, "step": 18190 }, { "epoch": 1.0998972623436272, "grad_norm": 1.2107318221539178, "learning_rate": 7.500673057899509e-06, "loss": 0.9382, "step": 18200 }, { "epoch": 1.1005016014987612, "grad_norm": 1.1518057842264933, "learning_rate": 7.497788098821734e-06, "loss": 0.9082, "step": 18210 }, { "epoch": 1.101105940653895, "grad_norm": 1.2321108621606867, "learning_rate": 7.494902031221325e-06, "loss": 0.9689, "step": 18220 }, { "epoch": 1.1017102798090288, "grad_norm": 1.2688642849578091, "learning_rate": 7.492014856379123e-06, "loss": 0.9489, "step": 18230 }, { "epoch": 1.1023146189641626, "grad_norm": 1.1660540709104281, "learning_rate": 7.489126575576462e-06, "loss": 0.9349, "step": 18240 }, { "epoch": 1.1029189581192966, "grad_norm": 1.4189099891583747, "learning_rate": 7.486237190095165e-06, "loss": 0.9549, "step": 18250 }, { "epoch": 1.1035232972744304, "grad_norm": 1.4092109380096982, "learning_rate": 7.483346701217548e-06, "loss": 0.9426, "step": 18260 }, { "epoch": 1.1041276364295642, "grad_norm": 1.388194491260783, "learning_rate": 7.4804551102264125e-06, "loss": 0.9275, "step": 18270 }, { "epoch": 1.104731975584698, "grad_norm": 1.3794214625191168, "learning_rate": 7.477562418405053e-06, "loss": 0.9363, "step": 18280 }, { "epoch": 1.105336314739832, "grad_norm": 1.339498916009595, "learning_rate": 7.474668627037251e-06, "loss": 0.9384, "step": 18290 }, { "epoch": 1.1059406538949659, "grad_norm": 0.8755874501739986, "learning_rate": 7.471773737407277e-06, "loss": 0.925, "step": 18300 }, { "epoch": 1.1065449930500997, "grad_norm": 0.9110506355136735, "learning_rate": 7.468877750799887e-06, "loss": 0.9342, "step": 18310 }, { "epoch": 1.1071493322052335, "grad_norm": 0.8660264542621957, "learning_rate": 7.4659806685003245e-06, "loss": 0.9439, "step": 18320 }, { "epoch": 1.1077536713603675, "grad_norm": 0.8759157981394622, "learning_rate": 7.4630824917943186e-06, "loss": 0.9426, "step": 18330 }, { "epoch": 1.1083580105155013, "grad_norm": 0.8532046255984694, "learning_rate": 7.460183221968088e-06, "loss": 0.938, "step": 18340 }, { "epoch": 1.1089623496706351, "grad_norm": 0.7509445769856646, "learning_rate": 7.4572828603083325e-06, "loss": 0.9475, "step": 18350 }, { "epoch": 1.109566688825769, "grad_norm": 0.8094425513662415, "learning_rate": 7.454381408102236e-06, "loss": 0.9433, "step": 18360 }, { "epoch": 1.110171027980903, "grad_norm": 0.7666016460687293, "learning_rate": 7.451478866637469e-06, "loss": 0.9206, "step": 18370 }, { "epoch": 1.1107753671360368, "grad_norm": 0.7527321293860785, "learning_rate": 7.448575237202185e-06, "loss": 0.9427, "step": 18380 }, { "epoch": 1.1113797062911706, "grad_norm": 0.816718070340287, "learning_rate": 7.445670521085017e-06, "loss": 0.933, "step": 18390 }, { "epoch": 1.1119840454463046, "grad_norm": 0.7946087949364989, "learning_rate": 7.442764719575088e-06, "loss": 0.9562, "step": 18400 }, { "epoch": 1.1125883846014384, "grad_norm": 0.8368392538723526, "learning_rate": 7.4398578339619935e-06, "loss": 0.9347, "step": 18410 }, { "epoch": 1.1131927237565722, "grad_norm": 0.8676632114239737, "learning_rate": 7.436949865535814e-06, "loss": 0.9412, "step": 18420 }, { "epoch": 1.113797062911706, "grad_norm": 0.7942275826430807, "learning_rate": 7.434040815587114e-06, "loss": 0.9443, "step": 18430 }, { "epoch": 1.1144014020668398, "grad_norm": 0.8746305292978679, "learning_rate": 7.431130685406933e-06, "loss": 0.9267, "step": 18440 }, { "epoch": 1.1150057412219738, "grad_norm": 0.5921675189226722, "learning_rate": 7.428219476286792e-06, "loss": 0.9375, "step": 18450 }, { "epoch": 1.1156100803771076, "grad_norm": 0.6047915413956512, "learning_rate": 7.425307189518691e-06, "loss": 0.9683, "step": 18460 }, { "epoch": 1.1162144195322414, "grad_norm": 0.5641124287732586, "learning_rate": 7.4223938263951075e-06, "loss": 0.9376, "step": 18470 }, { "epoch": 1.1168187586873755, "grad_norm": 0.6032880788133669, "learning_rate": 7.419479388208997e-06, "loss": 0.9294, "step": 18480 }, { "epoch": 1.1174230978425093, "grad_norm": 0.5768160398383214, "learning_rate": 7.416563876253793e-06, "loss": 0.9356, "step": 18490 }, { "epoch": 1.118027436997643, "grad_norm": 0.6074259013678874, "learning_rate": 7.413647291823402e-06, "loss": 0.9392, "step": 18500 }, { "epoch": 1.1186317761527769, "grad_norm": 0.6019726796676416, "learning_rate": 7.410729636212212e-06, "loss": 0.9203, "step": 18510 }, { "epoch": 1.119236115307911, "grad_norm": 0.6425817645653109, "learning_rate": 7.4078109107150834e-06, "loss": 0.918, "step": 18520 }, { "epoch": 1.1198404544630447, "grad_norm": 0.6292809675648877, "learning_rate": 7.404891116627349e-06, "loss": 0.9391, "step": 18530 }, { "epoch": 1.1204447936181785, "grad_norm": 0.5967513731120074, "learning_rate": 7.401970255244821e-06, "loss": 0.9438, "step": 18540 }, { "epoch": 1.1210491327733123, "grad_norm": 0.8391683127683519, "learning_rate": 7.3990483278637825e-06, "loss": 0.9276, "step": 18550 }, { "epoch": 1.1216534719284463, "grad_norm": 0.8187967151357829, "learning_rate": 7.396125335780988e-06, "loss": 0.9311, "step": 18560 }, { "epoch": 1.1222578110835801, "grad_norm": 0.8336153283271945, "learning_rate": 7.393201280293668e-06, "loss": 0.9278, "step": 18570 }, { "epoch": 1.122862150238714, "grad_norm": 0.8640297047521126, "learning_rate": 7.390276162699524e-06, "loss": 0.9316, "step": 18580 }, { "epoch": 1.1234664893938477, "grad_norm": 0.8728493256719332, "learning_rate": 7.387349984296725e-06, "loss": 0.9429, "step": 18590 }, { "epoch": 1.1240708285489818, "grad_norm": 0.5105072076303535, "learning_rate": 7.384422746383918e-06, "loss": 0.9342, "step": 18600 }, { "epoch": 1.1246751677041156, "grad_norm": 0.5284821926637777, "learning_rate": 7.381494450260214e-06, "loss": 0.9371, "step": 18610 }, { "epoch": 1.1252795068592494, "grad_norm": 0.5383736889487204, "learning_rate": 7.378565097225196e-06, "loss": 0.9442, "step": 18620 }, { "epoch": 1.1258838460143832, "grad_norm": 0.5487929806086651, "learning_rate": 7.375634688578916e-06, "loss": 0.9305, "step": 18630 }, { "epoch": 1.1264881851695172, "grad_norm": 0.5174109381174217, "learning_rate": 7.372703225621896e-06, "loss": 0.9543, "step": 18640 }, { "epoch": 1.127092524324651, "grad_norm": 0.5238752933855901, "learning_rate": 7.36977070965512e-06, "loss": 0.9169, "step": 18650 }, { "epoch": 1.1276968634797848, "grad_norm": 0.5611088769581871, "learning_rate": 7.366837141980048e-06, "loss": 0.9332, "step": 18660 }, { "epoch": 1.1283012026349186, "grad_norm": 0.5157733749348441, "learning_rate": 7.363902523898601e-06, "loss": 0.9377, "step": 18670 }, { "epoch": 1.1289055417900526, "grad_norm": 0.541693977630682, "learning_rate": 7.360966856713166e-06, "loss": 0.9345, "step": 18680 }, { "epoch": 1.1295098809451865, "grad_norm": 0.5772872167113711, "learning_rate": 7.358030141726599e-06, "loss": 0.9131, "step": 18690 }, { "epoch": 1.1301142201003203, "grad_norm": 0.593293592996897, "learning_rate": 7.355092380242217e-06, "loss": 0.9362, "step": 18700 }, { "epoch": 1.1307185592554543, "grad_norm": 0.4977876548116746, "learning_rate": 7.352153573563807e-06, "loss": 0.9268, "step": 18710 }, { "epoch": 1.131322898410588, "grad_norm": 0.5187828309416557, "learning_rate": 7.349213722995614e-06, "loss": 0.9382, "step": 18720 }, { "epoch": 1.1319272375657219, "grad_norm": 0.6057896594417571, "learning_rate": 7.346272829842348e-06, "loss": 0.9652, "step": 18730 }, { "epoch": 1.1325315767208557, "grad_norm": 0.5627037988929109, "learning_rate": 7.343330895409185e-06, "loss": 0.9322, "step": 18740 }, { "epoch": 1.1331359158759895, "grad_norm": 0.5938214083120487, "learning_rate": 7.340387921001759e-06, "loss": 0.9507, "step": 18750 }, { "epoch": 1.1337402550311235, "grad_norm": 0.497879296733788, "learning_rate": 7.337443907926167e-06, "loss": 0.9451, "step": 18760 }, { "epoch": 1.1343445941862573, "grad_norm": 0.5291685520192279, "learning_rate": 7.334498857488968e-06, "loss": 0.9368, "step": 18770 }, { "epoch": 1.1349489333413911, "grad_norm": 0.5169483284744473, "learning_rate": 7.331552770997179e-06, "loss": 0.9176, "step": 18780 }, { "epoch": 1.1355532724965252, "grad_norm": 0.5121865324391289, "learning_rate": 7.328605649758278e-06, "loss": 0.9386, "step": 18790 }, { "epoch": 1.136157611651659, "grad_norm": 0.5030996319434998, "learning_rate": 7.325657495080205e-06, "loss": 0.9391, "step": 18800 }, { "epoch": 1.1367619508067928, "grad_norm": 0.5134502893425638, "learning_rate": 7.322708308271353e-06, "loss": 0.9092, "step": 18810 }, { "epoch": 1.1373662899619266, "grad_norm": 0.5378254096527265, "learning_rate": 7.319758090640579e-06, "loss": 0.9194, "step": 18820 }, { "epoch": 1.1379706291170604, "grad_norm": 0.5646526258128092, "learning_rate": 7.316806843497192e-06, "loss": 0.9556, "step": 18830 }, { "epoch": 1.1385749682721944, "grad_norm": 0.5201525360244731, "learning_rate": 7.313854568150962e-06, "loss": 0.9135, "step": 18840 }, { "epoch": 1.1391793074273282, "grad_norm": 0.571178709083454, "learning_rate": 7.3109012659121134e-06, "loss": 0.9372, "step": 18850 }, { "epoch": 1.139783646582462, "grad_norm": 0.5924905430176384, "learning_rate": 7.3079469380913285e-06, "loss": 0.9122, "step": 18860 }, { "epoch": 1.140387985737596, "grad_norm": 0.5478826754841538, "learning_rate": 7.304991585999741e-06, "loss": 0.9494, "step": 18870 }, { "epoch": 1.1409923248927298, "grad_norm": 0.5760825458033325, "learning_rate": 7.302035210948941e-06, "loss": 0.9458, "step": 18880 }, { "epoch": 1.1415966640478636, "grad_norm": 0.5914022187486925, "learning_rate": 7.2990778142509745e-06, "loss": 0.965, "step": 18890 }, { "epoch": 1.1422010032029974, "grad_norm": 0.6763849306396178, "learning_rate": 7.2961193972183374e-06, "loss": 0.9597, "step": 18900 }, { "epoch": 1.1428053423581315, "grad_norm": 0.6048262417231475, "learning_rate": 7.293159961163983e-06, "loss": 0.9086, "step": 18910 }, { "epoch": 1.1434096815132653, "grad_norm": 0.6222698001208778, "learning_rate": 7.29019950740131e-06, "loss": 0.93, "step": 18920 }, { "epoch": 1.144014020668399, "grad_norm": 0.6685430569916258, "learning_rate": 7.287238037244175e-06, "loss": 0.9285, "step": 18930 }, { "epoch": 1.1446183598235329, "grad_norm": 0.6072658212484155, "learning_rate": 7.284275552006886e-06, "loss": 0.9363, "step": 18940 }, { "epoch": 1.145222698978667, "grad_norm": 0.6732557639309836, "learning_rate": 7.281312053004195e-06, "loss": 0.925, "step": 18950 }, { "epoch": 1.1458270381338007, "grad_norm": 0.6931346464609389, "learning_rate": 7.278347541551309e-06, "loss": 0.9415, "step": 18960 }, { "epoch": 1.1464313772889345, "grad_norm": 0.6928384248316889, "learning_rate": 7.275382018963885e-06, "loss": 0.9474, "step": 18970 }, { "epoch": 1.1470357164440683, "grad_norm": 0.6798218765972116, "learning_rate": 7.272415486558026e-06, "loss": 0.952, "step": 18980 }, { "epoch": 1.1476400555992023, "grad_norm": 0.6960719947984609, "learning_rate": 7.269447945650282e-06, "loss": 0.9613, "step": 18990 }, { "epoch": 1.1482443947543362, "grad_norm": 0.6430908521862926, "learning_rate": 7.266479397557656e-06, "loss": 0.9289, "step": 19000 }, { "epoch": 1.14884873390947, "grad_norm": 0.6543530075263003, "learning_rate": 7.263509843597595e-06, "loss": 0.9268, "step": 19010 }, { "epoch": 1.1494530730646038, "grad_norm": 0.6470296770223022, "learning_rate": 7.260539285087988e-06, "loss": 0.9456, "step": 19020 }, { "epoch": 1.1500574122197378, "grad_norm": 0.635482263462426, "learning_rate": 7.257567723347179e-06, "loss": 0.9155, "step": 19030 }, { "epoch": 1.1506617513748716, "grad_norm": 0.6649600152854649, "learning_rate": 7.254595159693948e-06, "loss": 0.9409, "step": 19040 }, { "epoch": 1.1512660905300054, "grad_norm": 0.831945972839819, "learning_rate": 7.251621595447526e-06, "loss": 0.9188, "step": 19050 }, { "epoch": 1.1518704296851392, "grad_norm": 0.7504975278972764, "learning_rate": 7.248647031927586e-06, "loss": 0.9537, "step": 19060 }, { "epoch": 1.1524747688402732, "grad_norm": 0.7936176246908174, "learning_rate": 7.2456714704542434e-06, "loss": 0.9351, "step": 19070 }, { "epoch": 1.153079107995407, "grad_norm": 0.8218128454207213, "learning_rate": 7.242694912348057e-06, "loss": 0.9312, "step": 19080 }, { "epoch": 1.1536834471505408, "grad_norm": 0.8156632103920143, "learning_rate": 7.23971735893003e-06, "loss": 0.9326, "step": 19090 }, { "epoch": 1.1542877863056749, "grad_norm": 0.772009988766304, "learning_rate": 7.236738811521605e-06, "loss": 0.9334, "step": 19100 }, { "epoch": 1.1548921254608087, "grad_norm": 0.8043428640497754, "learning_rate": 7.233759271444667e-06, "loss": 0.9492, "step": 19110 }, { "epoch": 1.1554964646159425, "grad_norm": 0.7813595861102836, "learning_rate": 7.23077874002154e-06, "loss": 0.929, "step": 19120 }, { "epoch": 1.1561008037710763, "grad_norm": 0.7419627370864481, "learning_rate": 7.22779721857499e-06, "loss": 0.93, "step": 19130 }, { "epoch": 1.15670514292621, "grad_norm": 0.7824465114149218, "learning_rate": 7.22481470842822e-06, "loss": 0.9304, "step": 19140 }, { "epoch": 1.157309482081344, "grad_norm": 1.1738793303501924, "learning_rate": 7.221831210904874e-06, "loss": 0.9341, "step": 19150 }, { "epoch": 1.157913821236478, "grad_norm": 1.2367200919460224, "learning_rate": 7.218846727329033e-06, "loss": 0.9379, "step": 19160 }, { "epoch": 1.1585181603916117, "grad_norm": 1.2387832930701335, "learning_rate": 7.215861259025216e-06, "loss": 0.9347, "step": 19170 }, { "epoch": 1.1591224995467457, "grad_norm": 1.3907061165091044, "learning_rate": 7.212874807318378e-06, "loss": 0.9331, "step": 19180 }, { "epoch": 1.1597268387018795, "grad_norm": 1.1598510527775059, "learning_rate": 7.2098873735339145e-06, "loss": 0.9452, "step": 19190 }, { "epoch": 1.1603311778570133, "grad_norm": 0.927557777311021, "learning_rate": 7.20689895899765e-06, "loss": 0.9247, "step": 19200 }, { "epoch": 1.1609355170121471, "grad_norm": 0.8370659921602016, "learning_rate": 7.2039095650358494e-06, "loss": 0.9399, "step": 19210 }, { "epoch": 1.1615398561672812, "grad_norm": 0.9688812200462812, "learning_rate": 7.200919192975212e-06, "loss": 0.928, "step": 19220 }, { "epoch": 1.162144195322415, "grad_norm": 1.264778437936213, "learning_rate": 7.197927844142869e-06, "loss": 0.9392, "step": 19230 }, { "epoch": 1.1627485344775488, "grad_norm": 0.9055547884474286, "learning_rate": 7.194935519866386e-06, "loss": 0.922, "step": 19240 }, { "epoch": 1.1633528736326826, "grad_norm": 2.4215463145885345, "learning_rate": 7.191942221473762e-06, "loss": 0.9328, "step": 19250 }, { "epoch": 1.1639572127878166, "grad_norm": 2.504318524119521, "learning_rate": 7.188947950293428e-06, "loss": 0.9594, "step": 19260 }, { "epoch": 1.1645615519429504, "grad_norm": 2.630624707801629, "learning_rate": 7.1859527076542465e-06, "loss": 0.9327, "step": 19270 }, { "epoch": 1.1651658910980842, "grad_norm": 2.548969903935177, "learning_rate": 7.1829564948855116e-06, "loss": 0.9448, "step": 19280 }, { "epoch": 1.165770230253218, "grad_norm": 2.772859776142628, "learning_rate": 7.179959313316946e-06, "loss": 0.939, "step": 19290 }, { "epoch": 1.166374569408352, "grad_norm": 0.9708992928620127, "learning_rate": 7.176961164278705e-06, "loss": 0.9366, "step": 19300 }, { "epoch": 1.1669789085634859, "grad_norm": 0.9322807228718224, "learning_rate": 7.173962049101374e-06, "loss": 0.9242, "step": 19310 }, { "epoch": 1.1675832477186197, "grad_norm": 0.9071571047705957, "learning_rate": 7.170961969115964e-06, "loss": 0.9272, "step": 19320 }, { "epoch": 1.1681875868737535, "grad_norm": 0.9064026522166889, "learning_rate": 7.167960925653915e-06, "loss": 0.9143, "step": 19330 }, { "epoch": 1.1687919260288875, "grad_norm": 0.8856958909723631, "learning_rate": 7.164958920047096e-06, "loss": 0.9505, "step": 19340 }, { "epoch": 1.1693962651840213, "grad_norm": 0.9827473727432444, "learning_rate": 7.161955953627801e-06, "loss": 0.9409, "step": 19350 }, { "epoch": 1.170000604339155, "grad_norm": 1.1319777866603369, "learning_rate": 7.158952027728756e-06, "loss": 0.9585, "step": 19360 }, { "epoch": 1.170604943494289, "grad_norm": 0.9307210123248505, "learning_rate": 7.155947143683104e-06, "loss": 0.944, "step": 19370 }, { "epoch": 1.171209282649423, "grad_norm": 1.0866587614294696, "learning_rate": 7.152941302824419e-06, "loss": 0.9298, "step": 19380 }, { "epoch": 1.1718136218045567, "grad_norm": 0.9725624097208035, "learning_rate": 7.149934506486698e-06, "loss": 0.9095, "step": 19390 }, { "epoch": 1.1724179609596905, "grad_norm": 0.9764327494403443, "learning_rate": 7.146926756004362e-06, "loss": 0.9074, "step": 19400 }, { "epoch": 1.1730223001148246, "grad_norm": 1.0752513846267016, "learning_rate": 7.143918052712256e-06, "loss": 0.9389, "step": 19410 }, { "epoch": 1.1736266392699584, "grad_norm": 0.993877767810036, "learning_rate": 7.140908397945649e-06, "loss": 0.9595, "step": 19420 }, { "epoch": 1.1742309784250922, "grad_norm": 1.0741905300725103, "learning_rate": 7.13789779304023e-06, "loss": 0.951, "step": 19430 }, { "epoch": 1.174835317580226, "grad_norm": 1.0680461873299278, "learning_rate": 7.13488623933211e-06, "loss": 0.9613, "step": 19440 }, { "epoch": 1.1754396567353598, "grad_norm": 1.057180064547026, "learning_rate": 7.131873738157823e-06, "loss": 0.9306, "step": 19450 }, { "epoch": 1.1760439958904938, "grad_norm": 1.0007222482890263, "learning_rate": 7.128860290854321e-06, "loss": 0.9476, "step": 19460 }, { "epoch": 1.1766483350456276, "grad_norm": 1.0306466903209341, "learning_rate": 7.125845898758979e-06, "loss": 0.9239, "step": 19470 }, { "epoch": 1.1772526742007614, "grad_norm": 1.030204917596983, "learning_rate": 7.122830563209586e-06, "loss": 0.9203, "step": 19480 }, { "epoch": 1.1778570133558954, "grad_norm": 1.0916464812747928, "learning_rate": 7.119814285544355e-06, "loss": 0.9428, "step": 19490 }, { "epoch": 1.1784613525110292, "grad_norm": 1.113306921597439, "learning_rate": 7.116797067101917e-06, "loss": 0.9497, "step": 19500 }, { "epoch": 1.179065691666163, "grad_norm": 1.1805570984543383, "learning_rate": 7.113778909221316e-06, "loss": 0.9494, "step": 19510 }, { "epoch": 1.1796700308212968, "grad_norm": 1.1938755354106976, "learning_rate": 7.110759813242017e-06, "loss": 0.9199, "step": 19520 }, { "epoch": 1.1802743699764306, "grad_norm": 1.1266511284695175, "learning_rate": 7.1077397805038995e-06, "loss": 0.9359, "step": 19530 }, { "epoch": 1.1808787091315647, "grad_norm": 1.1392131018580411, "learning_rate": 7.10471881234726e-06, "loss": 0.9194, "step": 19540 }, { "epoch": 1.1814830482866985, "grad_norm": 0.9726978558822639, "learning_rate": 7.101696910112806e-06, "loss": 0.9195, "step": 19550 }, { "epoch": 1.1820873874418323, "grad_norm": 0.9318360286868769, "learning_rate": 7.0986740751416675e-06, "loss": 0.925, "step": 19560 }, { "epoch": 1.1826917265969663, "grad_norm": 0.9999454546800447, "learning_rate": 7.095650308775381e-06, "loss": 0.9658, "step": 19570 }, { "epoch": 1.1832960657521001, "grad_norm": 0.9926969226682162, "learning_rate": 7.092625612355901e-06, "loss": 0.944, "step": 19580 }, { "epoch": 1.183900404907234, "grad_norm": 0.9413123629939818, "learning_rate": 7.08959998722559e-06, "loss": 0.9432, "step": 19590 }, { "epoch": 1.1845047440623677, "grad_norm": 1.0186516597912494, "learning_rate": 7.086573434727231e-06, "loss": 0.9319, "step": 19600 }, { "epoch": 1.1851090832175017, "grad_norm": 1.0415123348345905, "learning_rate": 7.083545956204006e-06, "loss": 0.9109, "step": 19610 }, { "epoch": 1.1857134223726356, "grad_norm": 1.0024155603339588, "learning_rate": 7.08051755299952e-06, "loss": 0.9388, "step": 19620 }, { "epoch": 1.1863177615277694, "grad_norm": 1.1673005148467968, "learning_rate": 7.077488226457784e-06, "loss": 0.909, "step": 19630 }, { "epoch": 1.1869221006829032, "grad_norm": 1.0260071759869203, "learning_rate": 7.074457977923214e-06, "loss": 0.9143, "step": 19640 }, { "epoch": 1.1875264398380372, "grad_norm": 1.0489411635023616, "learning_rate": 7.071426808740644e-06, "loss": 0.9245, "step": 19650 }, { "epoch": 1.188130778993171, "grad_norm": 1.0212859157356728, "learning_rate": 7.068394720255311e-06, "loss": 0.919, "step": 19660 }, { "epoch": 1.1887351181483048, "grad_norm": 1.0004858463039392, "learning_rate": 7.065361713812859e-06, "loss": 0.9558, "step": 19670 }, { "epoch": 1.1893394573034386, "grad_norm": 1.0055819914161925, "learning_rate": 7.062327790759344e-06, "loss": 0.9453, "step": 19680 }, { "epoch": 1.1899437964585726, "grad_norm": 0.9899774815858461, "learning_rate": 7.059292952441224e-06, "loss": 0.9371, "step": 19690 }, { "epoch": 1.1905481356137064, "grad_norm": 1.2108514969546031, "learning_rate": 7.056257200205367e-06, "loss": 0.9425, "step": 19700 }, { "epoch": 1.1911524747688402, "grad_norm": 1.1980472870625456, "learning_rate": 7.053220535399043e-06, "loss": 0.9336, "step": 19710 }, { "epoch": 1.1917568139239743, "grad_norm": 1.1027450959423797, "learning_rate": 7.050182959369931e-06, "loss": 0.9447, "step": 19720 }, { "epoch": 1.192361153079108, "grad_norm": 1.1906947204228908, "learning_rate": 7.047144473466114e-06, "loss": 0.9286, "step": 19730 }, { "epoch": 1.1929654922342419, "grad_norm": 1.1083372382582157, "learning_rate": 7.044105079036075e-06, "loss": 0.9604, "step": 19740 }, { "epoch": 1.1935698313893757, "grad_norm": 1.0228367065038593, "learning_rate": 7.0410647774287e-06, "loss": 0.9332, "step": 19750 }, { "epoch": 1.1941741705445095, "grad_norm": 1.0723768135489609, "learning_rate": 7.038023569993284e-06, "loss": 0.9349, "step": 19760 }, { "epoch": 1.1947785096996435, "grad_norm": 1.0704030557256032, "learning_rate": 7.034981458079519e-06, "loss": 0.9431, "step": 19770 }, { "epoch": 1.1953828488547773, "grad_norm": 1.0754003471430496, "learning_rate": 7.0319384430375e-06, "loss": 0.9096, "step": 19780 }, { "epoch": 1.195987188009911, "grad_norm": 1.0611102234186296, "learning_rate": 7.028894526217722e-06, "loss": 0.9428, "step": 19790 }, { "epoch": 1.1965915271650451, "grad_norm": 1.2388424023247573, "learning_rate": 7.02584970897108e-06, "loss": 0.9018, "step": 19800 }, { "epoch": 1.197195866320179, "grad_norm": 1.243105411071229, "learning_rate": 7.022803992648867e-06, "loss": 0.922, "step": 19810 }, { "epoch": 1.1978002054753127, "grad_norm": 1.1943670106298478, "learning_rate": 7.01975737860278e-06, "loss": 0.9322, "step": 19820 }, { "epoch": 1.1984045446304465, "grad_norm": 1.2435586994894567, "learning_rate": 7.016709868184911e-06, "loss": 0.9354, "step": 19830 }, { "epoch": 1.1990088837855803, "grad_norm": 1.2061295025068992, "learning_rate": 7.013661462747751e-06, "loss": 0.9454, "step": 19840 }, { "epoch": 1.1996132229407144, "grad_norm": 1.3949446935351246, "learning_rate": 7.010612163644186e-06, "loss": 0.9441, "step": 19850 }, { "epoch": 1.2002175620958482, "grad_norm": 1.4460511324635015, "learning_rate": 7.007561972227501e-06, "loss": 0.9383, "step": 19860 }, { "epoch": 1.200821901250982, "grad_norm": 1.4539877051434926, "learning_rate": 7.004510889851376e-06, "loss": 0.9406, "step": 19870 }, { "epoch": 1.201426240406116, "grad_norm": 1.4008324687161817, "learning_rate": 7.001458917869887e-06, "loss": 0.9286, "step": 19880 }, { "epoch": 1.2020305795612498, "grad_norm": 1.4221315242319108, "learning_rate": 6.998406057637503e-06, "loss": 0.9516, "step": 19890 }, { "epoch": 1.2026349187163836, "grad_norm": 0.8890058067800306, "learning_rate": 6.995352310509091e-06, "loss": 0.9411, "step": 19900 }, { "epoch": 1.2032392578715174, "grad_norm": 0.8392562994216797, "learning_rate": 6.992297677839908e-06, "loss": 0.928, "step": 19910 }, { "epoch": 1.2038435970266514, "grad_norm": 0.8909889995816207, "learning_rate": 6.989242160985603e-06, "loss": 0.9388, "step": 19920 }, { "epoch": 1.2044479361817853, "grad_norm": 0.8642233967849721, "learning_rate": 6.986185761302224e-06, "loss": 0.9408, "step": 19930 }, { "epoch": 1.205052275336919, "grad_norm": 0.9460195355213684, "learning_rate": 6.983128480146203e-06, "loss": 0.9324, "step": 19940 }, { "epoch": 1.2056566144920529, "grad_norm": 0.8029051507326859, "learning_rate": 6.980070318874367e-06, "loss": 0.9353, "step": 19950 }, { "epoch": 1.2062609536471869, "grad_norm": 0.7858799944406785, "learning_rate": 6.977011278843934e-06, "loss": 0.9214, "step": 19960 }, { "epoch": 1.2068652928023207, "grad_norm": 0.7510011450562873, "learning_rate": 6.973951361412512e-06, "loss": 0.9139, "step": 19970 }, { "epoch": 1.2074696319574545, "grad_norm": 0.718384309839229, "learning_rate": 6.9708905679380956e-06, "loss": 0.9307, "step": 19980 }, { "epoch": 1.2080739711125883, "grad_norm": 0.7283282319790186, "learning_rate": 6.9678288997790725e-06, "loss": 0.9201, "step": 19990 }, { "epoch": 1.2086783102677223, "grad_norm": 0.8121757501780279, "learning_rate": 6.964766358294213e-06, "loss": 0.9533, "step": 20000 }, { "epoch": 1.2092826494228561, "grad_norm": 0.8782745966915562, "learning_rate": 6.961702944842682e-06, "loss": 0.9491, "step": 20010 }, { "epoch": 1.20988698857799, "grad_norm": 0.8059374276263742, "learning_rate": 6.958638660784026e-06, "loss": 0.9114, "step": 20020 }, { "epoch": 1.2104913277331237, "grad_norm": 0.7598256207588705, "learning_rate": 6.955573507478179e-06, "loss": 0.9135, "step": 20030 }, { "epoch": 1.2110956668882578, "grad_norm": 0.891771194767523, "learning_rate": 6.952507486285462e-06, "loss": 0.9427, "step": 20040 }, { "epoch": 1.2117000060433916, "grad_norm": 0.5764111661821675, "learning_rate": 6.94944059856658e-06, "loss": 0.9314, "step": 20050 }, { "epoch": 1.2123043451985254, "grad_norm": 0.5604924088026544, "learning_rate": 6.946372845682622e-06, "loss": 0.9319, "step": 20060 }, { "epoch": 1.2129086843536592, "grad_norm": 0.5784348320334823, "learning_rate": 6.943304228995064e-06, "loss": 0.934, "step": 20070 }, { "epoch": 1.2135130235087932, "grad_norm": 0.5956243090438809, "learning_rate": 6.940234749865763e-06, "loss": 0.9385, "step": 20080 }, { "epoch": 1.214117362663927, "grad_norm": 0.5837537568852901, "learning_rate": 6.937164409656958e-06, "loss": 0.9306, "step": 20090 }, { "epoch": 1.2147217018190608, "grad_norm": 0.6100500608247255, "learning_rate": 6.934093209731272e-06, "loss": 0.9442, "step": 20100 }, { "epoch": 1.2153260409741948, "grad_norm": 0.6824101665164553, "learning_rate": 6.931021151451709e-06, "loss": 0.9435, "step": 20110 }, { "epoch": 1.2159303801293286, "grad_norm": 0.6058840737726875, "learning_rate": 6.9279482361816516e-06, "loss": 0.9555, "step": 20120 }, { "epoch": 1.2165347192844624, "grad_norm": 0.620996171237021, "learning_rate": 6.924874465284869e-06, "loss": 0.9101, "step": 20130 }, { "epoch": 1.2171390584395962, "grad_norm": 0.6179480078942203, "learning_rate": 6.921799840125503e-06, "loss": 0.9458, "step": 20140 }, { "epoch": 1.21774339759473, "grad_norm": 0.8318185655300202, "learning_rate": 6.9187243620680765e-06, "loss": 0.9352, "step": 20150 }, { "epoch": 1.218347736749864, "grad_norm": 0.8301274637834942, "learning_rate": 6.9156480324774946e-06, "loss": 0.9199, "step": 20160 }, { "epoch": 1.2189520759049979, "grad_norm": 0.7714690574444714, "learning_rate": 6.912570852719036e-06, "loss": 0.9414, "step": 20170 }, { "epoch": 1.2195564150601317, "grad_norm": 0.8167329708260073, "learning_rate": 6.90949282415836e-06, "loss": 0.9314, "step": 20180 }, { "epoch": 1.2201607542152657, "grad_norm": 0.7844816059194251, "learning_rate": 6.9064139481615e-06, "loss": 0.9349, "step": 20190 }, { "epoch": 1.2207650933703995, "grad_norm": 0.5359931103425383, "learning_rate": 6.903334226094864e-06, "loss": 0.9443, "step": 20200 }, { "epoch": 1.2213694325255333, "grad_norm": 0.5407871108862375, "learning_rate": 6.900253659325242e-06, "loss": 0.9313, "step": 20210 }, { "epoch": 1.2219737716806671, "grad_norm": 0.5836312461149332, "learning_rate": 6.8971722492197934e-06, "loss": 0.9441, "step": 20220 }, { "epoch": 1.2225781108358011, "grad_norm": 0.5717076627819502, "learning_rate": 6.894089997146052e-06, "loss": 0.9215, "step": 20230 }, { "epoch": 1.223182449990935, "grad_norm": 0.5421335823155865, "learning_rate": 6.891006904471927e-06, "loss": 0.9224, "step": 20240 }, { "epoch": 1.2237867891460688, "grad_norm": 0.5283379070988189, "learning_rate": 6.887922972565702e-06, "loss": 0.9173, "step": 20250 }, { "epoch": 1.2243911283012026, "grad_norm": 0.515825534070461, "learning_rate": 6.884838202796028e-06, "loss": 0.9275, "step": 20260 }, { "epoch": 1.2249954674563366, "grad_norm": 0.5889256599898429, "learning_rate": 6.8817525965319345e-06, "loss": 0.9368, "step": 20270 }, { "epoch": 1.2255998066114704, "grad_norm": 0.5372474037594597, "learning_rate": 6.878666155142818e-06, "loss": 0.9296, "step": 20280 }, { "epoch": 1.2262041457666042, "grad_norm": 0.5077302684028948, "learning_rate": 6.8755788799984435e-06, "loss": 0.9367, "step": 20290 }, { "epoch": 1.226808484921738, "grad_norm": 0.5538548884538033, "learning_rate": 6.872490772468954e-06, "loss": 0.9261, "step": 20300 }, { "epoch": 1.227412824076872, "grad_norm": 0.5595078322602588, "learning_rate": 6.869401833924853e-06, "loss": 0.9317, "step": 20310 }, { "epoch": 1.2280171632320058, "grad_norm": 0.5356233206774714, "learning_rate": 6.866312065737018e-06, "loss": 0.9188, "step": 20320 }, { "epoch": 1.2286215023871396, "grad_norm": 0.5282016800917761, "learning_rate": 6.863221469276693e-06, "loss": 0.9466, "step": 20330 }, { "epoch": 1.2292258415422734, "grad_norm": 0.5504446506700361, "learning_rate": 6.8601300459154895e-06, "loss": 0.9336, "step": 20340 }, { "epoch": 1.2298301806974075, "grad_norm": 0.5503710980562528, "learning_rate": 6.8570377970253885e-06, "loss": 0.9008, "step": 20350 }, { "epoch": 1.2304345198525413, "grad_norm": 0.5133951305911965, "learning_rate": 6.853944723978735e-06, "loss": 0.9251, "step": 20360 }, { "epoch": 1.231038859007675, "grad_norm": 0.6032558781030611, "learning_rate": 6.8508508281482365e-06, "loss": 0.9208, "step": 20370 }, { "epoch": 1.2316431981628089, "grad_norm": 0.5318308450247039, "learning_rate": 6.847756110906974e-06, "loss": 0.9241, "step": 20380 }, { "epoch": 1.232247537317943, "grad_norm": 0.5349276043564706, "learning_rate": 6.844660573628387e-06, "loss": 0.925, "step": 20390 }, { "epoch": 1.2328518764730767, "grad_norm": 0.5965937715615065, "learning_rate": 6.841564217686279e-06, "loss": 0.9321, "step": 20400 }, { "epoch": 1.2334562156282105, "grad_norm": 0.5105433259348409, "learning_rate": 6.838467044454819e-06, "loss": 0.9255, "step": 20410 }, { "epoch": 1.2340605547833445, "grad_norm": 0.5401507283057976, "learning_rate": 6.835369055308536e-06, "loss": 0.9587, "step": 20420 }, { "epoch": 1.2346648939384783, "grad_norm": 0.529052759553093, "learning_rate": 6.832270251622326e-06, "loss": 0.9293, "step": 20430 }, { "epoch": 1.2352692330936121, "grad_norm": 0.6063008576277052, "learning_rate": 6.829170634771442e-06, "loss": 0.9491, "step": 20440 }, { "epoch": 1.235873572248746, "grad_norm": 0.5676452156183475, "learning_rate": 6.826070206131498e-06, "loss": 0.947, "step": 20450 }, { "epoch": 1.2364779114038797, "grad_norm": 0.5662977986267751, "learning_rate": 6.8229689670784695e-06, "loss": 0.9373, "step": 20460 }, { "epoch": 1.2370822505590138, "grad_norm": 0.531159891950673, "learning_rate": 6.8198669189886935e-06, "loss": 0.9183, "step": 20470 }, { "epoch": 1.2376865897141476, "grad_norm": 0.5932563623654429, "learning_rate": 6.8167640632388644e-06, "loss": 0.911, "step": 20480 }, { "epoch": 1.2382909288692814, "grad_norm": 0.5198335034293413, "learning_rate": 6.813660401206033e-06, "loss": 0.9282, "step": 20490 }, { "epoch": 1.2388952680244154, "grad_norm": 0.5938794346616102, "learning_rate": 6.810555934267611e-06, "loss": 0.9418, "step": 20500 }, { "epoch": 1.2394996071795492, "grad_norm": 0.6935352548280033, "learning_rate": 6.807450663801365e-06, "loss": 0.9421, "step": 20510 }, { "epoch": 1.240103946334683, "grad_norm": 0.6198638091006703, "learning_rate": 6.804344591185422e-06, "loss": 0.9375, "step": 20520 }, { "epoch": 1.2407082854898168, "grad_norm": 0.6465662753867024, "learning_rate": 6.8012377177982605e-06, "loss": 0.942, "step": 20530 }, { "epoch": 1.2413126246449506, "grad_norm": 0.622290026542129, "learning_rate": 6.798130045018715e-06, "loss": 0.9313, "step": 20540 }, { "epoch": 1.2419169638000847, "grad_norm": 0.7142818767669845, "learning_rate": 6.795021574225978e-06, "loss": 0.9381, "step": 20550 }, { "epoch": 1.2425213029552185, "grad_norm": 0.6939265203905661, "learning_rate": 6.791912306799594e-06, "loss": 0.944, "step": 20560 }, { "epoch": 1.2431256421103523, "grad_norm": 0.7295199942711916, "learning_rate": 6.7888022441194615e-06, "loss": 0.9421, "step": 20570 }, { "epoch": 1.2437299812654863, "grad_norm": 0.6712086757588075, "learning_rate": 6.785691387565829e-06, "loss": 0.9615, "step": 20580 }, { "epoch": 1.24433432042062, "grad_norm": 0.7734944438436397, "learning_rate": 6.782579738519304e-06, "loss": 0.9538, "step": 20590 }, { "epoch": 1.244938659575754, "grad_norm": 0.6285076237184368, "learning_rate": 6.779467298360837e-06, "loss": 0.9103, "step": 20600 }, { "epoch": 1.2455429987308877, "grad_norm": 0.6226915890652817, "learning_rate": 6.776354068471738e-06, "loss": 0.9355, "step": 20610 }, { "epoch": 1.2461473378860217, "grad_norm": 0.6456314600247918, "learning_rate": 6.7732400502336625e-06, "loss": 0.9304, "step": 20620 }, { "epoch": 1.2467516770411555, "grad_norm": 0.6399642965634079, "learning_rate": 6.770125245028617e-06, "loss": 0.928, "step": 20630 }, { "epoch": 1.2473560161962893, "grad_norm": 0.6213078373894296, "learning_rate": 6.7670096542389576e-06, "loss": 0.9167, "step": 20640 }, { "epoch": 1.2479603553514231, "grad_norm": 0.7801327277490291, "learning_rate": 6.763893279247389e-06, "loss": 0.9331, "step": 20650 }, { "epoch": 1.2485646945065572, "grad_norm": 0.7996747131481201, "learning_rate": 6.760776121436963e-06, "loss": 0.9478, "step": 20660 }, { "epoch": 1.249169033661691, "grad_norm": 0.8265767279808282, "learning_rate": 6.75765818219108e-06, "loss": 0.9329, "step": 20670 }, { "epoch": 1.2497733728168248, "grad_norm": 0.75948255351935, "learning_rate": 6.754539462893488e-06, "loss": 0.9371, "step": 20680 }, { "epoch": 1.2503777119719586, "grad_norm": 0.8198662867232713, "learning_rate": 6.751419964928279e-06, "loss": 0.9294, "step": 20690 }, { "epoch": 1.2509820511270926, "grad_norm": 0.7870382637790324, "learning_rate": 6.74829968967989e-06, "loss": 0.9589, "step": 20700 }, { "epoch": 1.2515863902822264, "grad_norm": 0.7670199166420851, "learning_rate": 6.745178638533106e-06, "loss": 0.9447, "step": 20710 }, { "epoch": 1.2521907294373602, "grad_norm": 0.7902133057307972, "learning_rate": 6.7420568128730545e-06, "loss": 0.9126, "step": 20720 }, { "epoch": 1.2527950685924942, "grad_norm": 0.7974377089002933, "learning_rate": 6.738934214085209e-06, "loss": 0.9413, "step": 20730 }, { "epoch": 1.253399407747628, "grad_norm": 0.7715117234540104, "learning_rate": 6.7358108435553804e-06, "loss": 0.9368, "step": 20740 }, { "epoch": 1.2540037469027618, "grad_norm": 1.20003798642118, "learning_rate": 6.732686702669729e-06, "loss": 0.9557, "step": 20750 }, { "epoch": 1.2546080860578956, "grad_norm": 1.2226903386953512, "learning_rate": 6.729561792814753e-06, "loss": 0.9278, "step": 20760 }, { "epoch": 1.2552124252130294, "grad_norm": 1.2217402732192495, "learning_rate": 6.726436115377291e-06, "loss": 0.9397, "step": 20770 }, { "epoch": 1.2558167643681635, "grad_norm": 1.2137601271828522, "learning_rate": 6.723309671744526e-06, "loss": 0.9468, "step": 20780 }, { "epoch": 1.2564211035232973, "grad_norm": 1.2663172756265997, "learning_rate": 6.7201824633039816e-06, "loss": 0.9395, "step": 20790 }, { "epoch": 1.257025442678431, "grad_norm": 0.9863430977961953, "learning_rate": 6.717054491443511e-06, "loss": 0.9478, "step": 20800 }, { "epoch": 1.257629781833565, "grad_norm": 1.061233152512319, "learning_rate": 6.7139257575513206e-06, "loss": 0.9326, "step": 20810 }, { "epoch": 1.258234120988699, "grad_norm": 1.0805984098847896, "learning_rate": 6.710796263015944e-06, "loss": 0.9586, "step": 20820 }, { "epoch": 1.2588384601438327, "grad_norm": 1.1130806362391232, "learning_rate": 6.707666009226258e-06, "loss": 0.9422, "step": 20830 }, { "epoch": 1.2594427992989665, "grad_norm": 0.9591251379760726, "learning_rate": 6.704534997571473e-06, "loss": 0.9274, "step": 20840 }, { "epoch": 1.2600471384541003, "grad_norm": 2.5530808287871634, "learning_rate": 6.701403229441138e-06, "loss": 0.9289, "step": 20850 }, { "epoch": 1.2606514776092343, "grad_norm": 2.4412256026625485, "learning_rate": 6.698270706225137e-06, "loss": 0.9335, "step": 20860 }, { "epoch": 1.2612558167643682, "grad_norm": 2.5725695170307636, "learning_rate": 6.695137429313691e-06, "loss": 0.8941, "step": 20870 }, { "epoch": 1.261860155919502, "grad_norm": 2.3312643495843393, "learning_rate": 6.69200340009735e-06, "loss": 0.927, "step": 20880 }, { "epoch": 1.262464495074636, "grad_norm": 3.1210480975513413, "learning_rate": 6.6888686199670035e-06, "loss": 0.9336, "step": 20890 }, { "epoch": 1.2630688342297698, "grad_norm": 0.9626136990919115, "learning_rate": 6.6857330903138754e-06, "loss": 0.9393, "step": 20900 }, { "epoch": 1.2636731733849036, "grad_norm": 1.1870849395147782, "learning_rate": 6.6825968125295125e-06, "loss": 0.936, "step": 20910 }, { "epoch": 1.2642775125400374, "grad_norm": 0.8252889775966359, "learning_rate": 6.679459788005805e-06, "loss": 0.9573, "step": 20920 }, { "epoch": 1.2648818516951712, "grad_norm": 0.9435531385065613, "learning_rate": 6.67632201813497e-06, "loss": 0.9324, "step": 20930 }, { "epoch": 1.2654861908503052, "grad_norm": 1.0158470995515982, "learning_rate": 6.673183504309553e-06, "loss": 0.9399, "step": 20940 }, { "epoch": 1.266090530005439, "grad_norm": 0.9879970036591537, "learning_rate": 6.670044247922431e-06, "loss": 0.9178, "step": 20950 }, { "epoch": 1.2666948691605728, "grad_norm": 1.0109463775278411, "learning_rate": 6.666904250366815e-06, "loss": 0.9419, "step": 20960 }, { "epoch": 1.2672992083157069, "grad_norm": 1.1555863088766083, "learning_rate": 6.663763513036237e-06, "loss": 0.9269, "step": 20970 }, { "epoch": 1.2679035474708407, "grad_norm": 0.9613787263799334, "learning_rate": 6.660622037324566e-06, "loss": 0.912, "step": 20980 }, { "epoch": 1.2685078866259745, "grad_norm": 1.0560038276585342, "learning_rate": 6.657479824625989e-06, "loss": 0.9286, "step": 20990 }, { "epoch": 1.2691122257811083, "grad_norm": 1.074153064961318, "learning_rate": 6.6543368763350315e-06, "loss": 0.9275, "step": 21000 }, { "epoch": 1.2697165649362423, "grad_norm": 0.9760807118304226, "learning_rate": 6.651193193846535e-06, "loss": 0.9269, "step": 21010 }, { "epoch": 1.270320904091376, "grad_norm": 1.058068280322029, "learning_rate": 6.648048778555672e-06, "loss": 0.9358, "step": 21020 }, { "epoch": 1.27092524324651, "grad_norm": 1.0926792005542167, "learning_rate": 6.644903631857941e-06, "loss": 0.9439, "step": 21030 }, { "epoch": 1.271529582401644, "grad_norm": 1.071420110327903, "learning_rate": 6.641757755149161e-06, "loss": 0.9275, "step": 21040 }, { "epoch": 1.2721339215567777, "grad_norm": 1.0070963072937444, "learning_rate": 6.638611149825479e-06, "loss": 0.9159, "step": 21050 }, { "epoch": 1.2727382607119115, "grad_norm": 1.040406979699207, "learning_rate": 6.635463817283365e-06, "loss": 0.9322, "step": 21060 }, { "epoch": 1.2733425998670453, "grad_norm": 1.0978359394302144, "learning_rate": 6.632315758919608e-06, "loss": 0.9459, "step": 21070 }, { "epoch": 1.2739469390221791, "grad_norm": 1.058434784024176, "learning_rate": 6.629166976131324e-06, "loss": 0.9314, "step": 21080 }, { "epoch": 1.2745512781773132, "grad_norm": 1.036345841163535, "learning_rate": 6.6260174703159465e-06, "loss": 0.924, "step": 21090 }, { "epoch": 1.275155617332447, "grad_norm": 1.154005199065569, "learning_rate": 6.622867242871231e-06, "loss": 0.9297, "step": 21100 }, { "epoch": 1.2757599564875808, "grad_norm": 1.1524729230836825, "learning_rate": 6.619716295195256e-06, "loss": 0.9265, "step": 21110 }, { "epoch": 1.2763642956427148, "grad_norm": 1.1713282910504288, "learning_rate": 6.616564628686417e-06, "loss": 0.911, "step": 21120 }, { "epoch": 1.2769686347978486, "grad_norm": 1.1139782251033674, "learning_rate": 6.613412244743428e-06, "loss": 0.9454, "step": 21130 }, { "epoch": 1.2775729739529824, "grad_norm": 1.1869427759294304, "learning_rate": 6.610259144765322e-06, "loss": 0.9345, "step": 21140 }, { "epoch": 1.2781773131081162, "grad_norm": 0.978351285050308, "learning_rate": 6.607105330151452e-06, "loss": 0.9123, "step": 21150 }, { "epoch": 1.27878165226325, "grad_norm": 1.0021079532976485, "learning_rate": 6.603950802301485e-06, "loss": 0.9241, "step": 21160 }, { "epoch": 1.279385991418384, "grad_norm": 1.0861909583578744, "learning_rate": 6.600795562615405e-06, "loss": 0.9244, "step": 21170 }, { "epoch": 1.2799903305735179, "grad_norm": 1.0018027000194207, "learning_rate": 6.597639612493516e-06, "loss": 0.923, "step": 21180 }, { "epoch": 1.2805946697286517, "grad_norm": 1.0369608770113576, "learning_rate": 6.594482953336429e-06, "loss": 0.9205, "step": 21190 }, { "epoch": 1.2811990088837857, "grad_norm": 1.0018049905608608, "learning_rate": 6.591325586545081e-06, "loss": 0.9666, "step": 21200 }, { "epoch": 1.2818033480389195, "grad_norm": 1.0542119094067566, "learning_rate": 6.588167513520714e-06, "loss": 0.936, "step": 21210 }, { "epoch": 1.2824076871940533, "grad_norm": 1.0257850303118394, "learning_rate": 6.585008735664884e-06, "loss": 0.9171, "step": 21220 }, { "epoch": 1.283012026349187, "grad_norm": 1.0629264313566607, "learning_rate": 6.581849254379464e-06, "loss": 0.9407, "step": 21230 }, { "epoch": 1.283616365504321, "grad_norm": 1.0645272350209467, "learning_rate": 6.578689071066637e-06, "loss": 0.9174, "step": 21240 }, { "epoch": 1.284220704659455, "grad_norm": 0.9942172899664239, "learning_rate": 6.575528187128897e-06, "loss": 0.9428, "step": 21250 }, { "epoch": 1.2848250438145887, "grad_norm": 0.9958710522566719, "learning_rate": 6.5723666039690515e-06, "loss": 0.9182, "step": 21260 }, { "epoch": 1.2854293829697225, "grad_norm": 0.9543256694020993, "learning_rate": 6.569204322990216e-06, "loss": 0.943, "step": 21270 }, { "epoch": 1.2860337221248566, "grad_norm": 0.9939418030273146, "learning_rate": 6.566041345595814e-06, "loss": 0.9319, "step": 21280 }, { "epoch": 1.2866380612799904, "grad_norm": 1.0547380875161039, "learning_rate": 6.562877673189583e-06, "loss": 0.9376, "step": 21290 }, { "epoch": 1.2872424004351242, "grad_norm": 1.171081254921601, "learning_rate": 6.5597133071755655e-06, "loss": 0.9326, "step": 21300 }, { "epoch": 1.287846739590258, "grad_norm": 1.1725934465440044, "learning_rate": 6.5565482489581115e-06, "loss": 0.9466, "step": 21310 }, { "epoch": 1.2884510787453918, "grad_norm": 1.2326777667202249, "learning_rate": 6.55338249994188e-06, "loss": 0.9367, "step": 21320 }, { "epoch": 1.2890554179005258, "grad_norm": 1.1597071943780024, "learning_rate": 6.550216061531837e-06, "loss": 0.9361, "step": 21330 }, { "epoch": 1.2896597570556596, "grad_norm": 1.095543226881079, "learning_rate": 6.547048935133251e-06, "loss": 0.9294, "step": 21340 }, { "epoch": 1.2902640962107936, "grad_norm": 1.089831708969902, "learning_rate": 6.5438811221516994e-06, "loss": 0.9526, "step": 21350 }, { "epoch": 1.2908684353659274, "grad_norm": 1.0514813037236386, "learning_rate": 6.540712623993063e-06, "loss": 0.9274, "step": 21360 }, { "epoch": 1.2914727745210612, "grad_norm": 0.995499283392343, "learning_rate": 6.537543442063526e-06, "loss": 0.9406, "step": 21370 }, { "epoch": 1.292077113676195, "grad_norm": 1.1740467112075939, "learning_rate": 6.534373577769578e-06, "loss": 0.9463, "step": 21380 }, { "epoch": 1.2926814528313288, "grad_norm": 1.107057930823461, "learning_rate": 6.53120303251801e-06, "loss": 0.9476, "step": 21390 }, { "epoch": 1.2932857919864629, "grad_norm": 1.1227755034068285, "learning_rate": 6.528031807715913e-06, "loss": 0.9267, "step": 21400 }, { "epoch": 1.2938901311415967, "grad_norm": 1.1532011204220003, "learning_rate": 6.524859904770686e-06, "loss": 0.9186, "step": 21410 }, { "epoch": 1.2944944702967305, "grad_norm": 1.2236358389287254, "learning_rate": 6.521687325090023e-06, "loss": 0.9327, "step": 21420 }, { "epoch": 1.2950988094518645, "grad_norm": 1.2654715751089771, "learning_rate": 6.518514070081918e-06, "loss": 0.9343, "step": 21430 }, { "epoch": 1.2957031486069983, "grad_norm": 1.1260807209923318, "learning_rate": 6.5153401411546715e-06, "loss": 0.9198, "step": 21440 }, { "epoch": 1.2963074877621321, "grad_norm": 1.3782201507384069, "learning_rate": 6.512165539716875e-06, "loss": 0.9419, "step": 21450 }, { "epoch": 1.296911826917266, "grad_norm": 1.4566152824347334, "learning_rate": 6.508990267177424e-06, "loss": 0.9142, "step": 21460 }, { "epoch": 1.2975161660723997, "grad_norm": 1.3611489497109344, "learning_rate": 6.505814324945509e-06, "loss": 0.9649, "step": 21470 }, { "epoch": 1.2981205052275337, "grad_norm": 1.3857152810604922, "learning_rate": 6.502637714430619e-06, "loss": 0.9406, "step": 21480 }, { "epoch": 1.2987248443826676, "grad_norm": 1.4554878354894845, "learning_rate": 6.499460437042539e-06, "loss": 0.9316, "step": 21490 }, { "epoch": 1.2993291835378014, "grad_norm": 0.9060526311398772, "learning_rate": 6.496282494191351e-06, "loss": 0.9375, "step": 21500 }, { "epoch": 1.2999335226929354, "grad_norm": 0.8434628343707185, "learning_rate": 6.493103887287429e-06, "loss": 0.9107, "step": 21510 }, { "epoch": 1.3005378618480692, "grad_norm": 0.9087772227016638, "learning_rate": 6.489924617741446e-06, "loss": 0.931, "step": 21520 }, { "epoch": 1.301142201003203, "grad_norm": 0.8517540136578798, "learning_rate": 6.486744686964367e-06, "loss": 0.9192, "step": 21530 }, { "epoch": 1.3017465401583368, "grad_norm": 0.8659201869544333, "learning_rate": 6.483564096367452e-06, "loss": 0.9543, "step": 21540 }, { "epoch": 1.3023508793134706, "grad_norm": 0.7780229044615488, "learning_rate": 6.480382847362251e-06, "loss": 0.9592, "step": 21550 }, { "epoch": 1.3029552184686046, "grad_norm": 0.7487894351169517, "learning_rate": 6.477200941360606e-06, "loss": 0.9359, "step": 21560 }, { "epoch": 1.3035595576237384, "grad_norm": 0.7718657395863984, "learning_rate": 6.474018379774658e-06, "loss": 0.954, "step": 21570 }, { "epoch": 1.3041638967788722, "grad_norm": 0.7804785430559377, "learning_rate": 6.470835164016828e-06, "loss": 0.912, "step": 21580 }, { "epoch": 1.3047682359340063, "grad_norm": 0.7710334144819115, "learning_rate": 6.467651295499835e-06, "loss": 0.9562, "step": 21590 }, { "epoch": 1.30537257508914, "grad_norm": 0.812678079427488, "learning_rate": 6.464466775636684e-06, "loss": 0.9337, "step": 21600 }, { "epoch": 1.3059769142442739, "grad_norm": 0.8467816023389082, "learning_rate": 6.461281605840671e-06, "loss": 0.9598, "step": 21610 }, { "epoch": 1.3065812533994077, "grad_norm": 0.934919839022193, "learning_rate": 6.458095787525379e-06, "loss": 0.9299, "step": 21620 }, { "epoch": 1.3071855925545415, "grad_norm": 0.8199809488776156, "learning_rate": 6.454909322104682e-06, "loss": 0.9308, "step": 21630 }, { "epoch": 1.3077899317096755, "grad_norm": 0.8362409965440692, "learning_rate": 6.4517222109927365e-06, "loss": 0.9401, "step": 21640 }, { "epoch": 1.3083942708648093, "grad_norm": 0.5492779152089806, "learning_rate": 6.448534455603987e-06, "loss": 0.9186, "step": 21650 }, { "epoch": 1.308998610019943, "grad_norm": 0.5831708502837989, "learning_rate": 6.445346057353169e-06, "loss": 0.9238, "step": 21660 }, { "epoch": 1.3096029491750771, "grad_norm": 0.569977602112014, "learning_rate": 6.442157017655296e-06, "loss": 0.9309, "step": 21670 }, { "epoch": 1.310207288330211, "grad_norm": 0.5492876954598281, "learning_rate": 6.438967337925668e-06, "loss": 0.94, "step": 21680 }, { "epoch": 1.3108116274853447, "grad_norm": 0.560845223605572, "learning_rate": 6.435777019579874e-06, "loss": 0.94, "step": 21690 }, { "epoch": 1.3114159666404785, "grad_norm": 0.6371161033969285, "learning_rate": 6.432586064033779e-06, "loss": 0.904, "step": 21700 }, { "epoch": 1.3120203057956126, "grad_norm": 0.5786682172760027, "learning_rate": 6.429394472703539e-06, "loss": 0.9374, "step": 21710 }, { "epoch": 1.3126246449507464, "grad_norm": 0.6442147002487556, "learning_rate": 6.426202247005584e-06, "loss": 0.9345, "step": 21720 }, { "epoch": 1.3132289841058802, "grad_norm": 0.621374919533826, "learning_rate": 6.423009388356629e-06, "loss": 0.9392, "step": 21730 }, { "epoch": 1.3138333232610142, "grad_norm": 0.6395976854719905, "learning_rate": 6.419815898173674e-06, "loss": 0.9349, "step": 21740 }, { "epoch": 1.314437662416148, "grad_norm": 0.7928798710105224, "learning_rate": 6.416621777873992e-06, "loss": 0.9164, "step": 21750 }, { "epoch": 1.3150420015712818, "grad_norm": 0.8258525432311563, "learning_rate": 6.413427028875138e-06, "loss": 0.8963, "step": 21760 }, { "epoch": 1.3156463407264156, "grad_norm": 0.8690828653097746, "learning_rate": 6.41023165259495e-06, "loss": 0.9446, "step": 21770 }, { "epoch": 1.3162506798815494, "grad_norm": 0.9729642447657476, "learning_rate": 6.407035650451541e-06, "loss": 0.9444, "step": 21780 }, { "epoch": 1.3168550190366834, "grad_norm": 0.7846911672514052, "learning_rate": 6.4038390238633e-06, "loss": 0.9351, "step": 21790 }, { "epoch": 1.3174593581918173, "grad_norm": 0.5747159695914157, "learning_rate": 6.400641774248899e-06, "loss": 0.9379, "step": 21800 }, { "epoch": 1.318063697346951, "grad_norm": 0.5362650986243326, "learning_rate": 6.397443903027281e-06, "loss": 0.96, "step": 21810 }, { "epoch": 1.318668036502085, "grad_norm": 0.5475275595662574, "learning_rate": 6.394245411617664e-06, "loss": 0.9388, "step": 21820 }, { "epoch": 1.3192723756572189, "grad_norm": 0.5570424810631336, "learning_rate": 6.3910463014395496e-06, "loss": 0.9409, "step": 21830 }, { "epoch": 1.3198767148123527, "grad_norm": 0.5283875446225601, "learning_rate": 6.387846573912704e-06, "loss": 0.9327, "step": 21840 }, { "epoch": 1.3204810539674865, "grad_norm": 0.5182538742220805, "learning_rate": 6.384646230457173e-06, "loss": 0.9101, "step": 21850 }, { "epoch": 1.3210853931226203, "grad_norm": 0.5428016082066797, "learning_rate": 6.381445272493276e-06, "loss": 0.936, "step": 21860 }, { "epoch": 1.3216897322777543, "grad_norm": 0.549710069424863, "learning_rate": 6.3782437014416e-06, "loss": 0.9511, "step": 21870 }, { "epoch": 1.3222940714328881, "grad_norm": 0.5480501706869516, "learning_rate": 6.37504151872301e-06, "loss": 0.9673, "step": 21880 }, { "epoch": 1.322898410588022, "grad_norm": 0.5167824004896747, "learning_rate": 6.371838725758642e-06, "loss": 0.9276, "step": 21890 }, { "epoch": 1.323502749743156, "grad_norm": 0.5570338893498351, "learning_rate": 6.3686353239698974e-06, "loss": 0.9353, "step": 21900 }, { "epoch": 1.3241070888982898, "grad_norm": 0.5545426919234545, "learning_rate": 6.365431314778454e-06, "loss": 0.9216, "step": 21910 }, { "epoch": 1.3247114280534236, "grad_norm": 0.5604850610266743, "learning_rate": 6.3622266996062564e-06, "loss": 0.9615, "step": 21920 }, { "epoch": 1.3253157672085574, "grad_norm": 0.5483329374231105, "learning_rate": 6.359021479875515e-06, "loss": 0.958, "step": 21930 }, { "epoch": 1.3259201063636912, "grad_norm": 0.5664060019149888, "learning_rate": 6.355815657008717e-06, "loss": 0.94, "step": 21940 }, { "epoch": 1.3265244455188252, "grad_norm": 0.5447665700072896, "learning_rate": 6.352609232428608e-06, "loss": 0.9598, "step": 21950 }, { "epoch": 1.327128784673959, "grad_norm": 0.5112442863219152, "learning_rate": 6.349402207558206e-06, "loss": 0.9008, "step": 21960 }, { "epoch": 1.3277331238290928, "grad_norm": 0.541148629513141, "learning_rate": 6.346194583820795e-06, "loss": 0.9335, "step": 21970 }, { "epoch": 1.3283374629842268, "grad_norm": 0.5415610638277757, "learning_rate": 6.3429863626399245e-06, "loss": 0.9308, "step": 21980 }, { "epoch": 1.3289418021393606, "grad_norm": 0.5223660122979568, "learning_rate": 6.339777545439405e-06, "loss": 0.8859, "step": 21990 }, { "epoch": 1.3295461412944944, "grad_norm": 0.5690876138385812, "learning_rate": 6.33656813364332e-06, "loss": 0.94, "step": 22000 }, { "epoch": 1.3301504804496282, "grad_norm": 0.5365625441405761, "learning_rate": 6.333358128676006e-06, "loss": 0.9512, "step": 22010 }, { "epoch": 1.3307548196047623, "grad_norm": 0.5493141422193557, "learning_rate": 6.330147531962073e-06, "loss": 0.9371, "step": 22020 }, { "epoch": 1.331359158759896, "grad_norm": 0.5586144619198927, "learning_rate": 6.326936344926389e-06, "loss": 0.9107, "step": 22030 }, { "epoch": 1.3319634979150299, "grad_norm": 0.5249760688552998, "learning_rate": 6.323724568994083e-06, "loss": 0.9414, "step": 22040 }, { "epoch": 1.332567837070164, "grad_norm": 0.5266686959945355, "learning_rate": 6.320512205590548e-06, "loss": 0.914, "step": 22050 }, { "epoch": 1.3331721762252977, "grad_norm": 0.5404602508706192, "learning_rate": 6.3172992561414345e-06, "loss": 0.9135, "step": 22060 }, { "epoch": 1.3337765153804315, "grad_norm": 0.5629736014165325, "learning_rate": 6.314085722072656e-06, "loss": 0.9095, "step": 22070 }, { "epoch": 1.3343808545355653, "grad_norm": 0.5225209520001106, "learning_rate": 6.310871604810386e-06, "loss": 0.9101, "step": 22080 }, { "epoch": 1.3349851936906991, "grad_norm": 0.5800260535790562, "learning_rate": 6.307656905781053e-06, "loss": 0.9151, "step": 22090 }, { "epoch": 1.3355895328458331, "grad_norm": 0.6439688632145437, "learning_rate": 6.3044416264113465e-06, "loss": 0.9512, "step": 22100 }, { "epoch": 1.336193872000967, "grad_norm": 0.5761871565203135, "learning_rate": 6.301225768128213e-06, "loss": 0.9263, "step": 22110 }, { "epoch": 1.3367982111561008, "grad_norm": 0.630189294795375, "learning_rate": 6.298009332358857e-06, "loss": 0.9544, "step": 22120 }, { "epoch": 1.3374025503112348, "grad_norm": 0.5845267401828218, "learning_rate": 6.294792320530736e-06, "loss": 0.911, "step": 22130 }, { "epoch": 1.3380068894663686, "grad_norm": 0.6126744642079395, "learning_rate": 6.2915747340715684e-06, "loss": 0.9287, "step": 22140 }, { "epoch": 1.3386112286215024, "grad_norm": 0.6835343502301523, "learning_rate": 6.288356574409321e-06, "loss": 0.9261, "step": 22150 }, { "epoch": 1.3392155677766362, "grad_norm": 0.7743374738155538, "learning_rate": 6.285137842972221e-06, "loss": 0.94, "step": 22160 }, { "epoch": 1.33981990693177, "grad_norm": 0.6497260732966652, "learning_rate": 6.281918541188746e-06, "loss": 0.9238, "step": 22170 }, { "epoch": 1.340424246086904, "grad_norm": 0.7557369605602523, "learning_rate": 6.278698670487628e-06, "loss": 0.9287, "step": 22180 }, { "epoch": 1.3410285852420378, "grad_norm": 0.6030564395859551, "learning_rate": 6.275478232297852e-06, "loss": 0.9123, "step": 22190 }, { "epoch": 1.3416329243971716, "grad_norm": 0.6327443539400669, "learning_rate": 6.272257228048653e-06, "loss": 0.9348, "step": 22200 }, { "epoch": 1.3422372635523057, "grad_norm": 0.5813519839285867, "learning_rate": 6.269035659169515e-06, "loss": 0.9157, "step": 22210 }, { "epoch": 1.3428416027074395, "grad_norm": 0.6167215685631435, "learning_rate": 6.2658135270901834e-06, "loss": 0.9327, "step": 22220 }, { "epoch": 1.3434459418625733, "grad_norm": 0.6906512066095957, "learning_rate": 6.2625908332406404e-06, "loss": 0.9225, "step": 22230 }, { "epoch": 1.344050281017707, "grad_norm": 0.6067945734676988, "learning_rate": 6.2593675790511235e-06, "loss": 0.9667, "step": 22240 }, { "epoch": 1.3446546201728409, "grad_norm": 0.8204204225475179, "learning_rate": 6.25614376595212e-06, "loss": 0.9319, "step": 22250 }, { "epoch": 1.345258959327975, "grad_norm": 0.7973932270460289, "learning_rate": 6.2529193953743615e-06, "loss": 0.9263, "step": 22260 }, { "epoch": 1.3458632984831087, "grad_norm": 0.8121349243326759, "learning_rate": 6.24969446874883e-06, "loss": 0.9359, "step": 22270 }, { "epoch": 1.3464676376382425, "grad_norm": 0.8015237906389019, "learning_rate": 6.246468987506757e-06, "loss": 0.9211, "step": 22280 }, { "epoch": 1.3470719767933765, "grad_norm": 0.8161251810751748, "learning_rate": 6.243242953079611e-06, "loss": 0.942, "step": 22290 }, { "epoch": 1.3476763159485103, "grad_norm": 0.7685221795581088, "learning_rate": 6.240016366899114e-06, "loss": 0.9415, "step": 22300 }, { "epoch": 1.3482806551036441, "grad_norm": 0.7676335412252995, "learning_rate": 6.2367892303972325e-06, "loss": 0.9058, "step": 22310 }, { "epoch": 1.348884994258778, "grad_norm": 0.8601381268208241, "learning_rate": 6.233561545006173e-06, "loss": 0.9434, "step": 22320 }, { "epoch": 1.3494893334139118, "grad_norm": 0.8024577878501143, "learning_rate": 6.230333312158385e-06, "loss": 0.9353, "step": 22330 }, { "epoch": 1.3500936725690458, "grad_norm": 0.7592737141146152, "learning_rate": 6.22710453328657e-06, "loss": 0.9227, "step": 22340 }, { "epoch": 1.3506980117241796, "grad_norm": 1.185353453725579, "learning_rate": 6.223875209823661e-06, "loss": 0.9226, "step": 22350 }, { "epoch": 1.3513023508793136, "grad_norm": 1.2427121701483452, "learning_rate": 6.22064534320284e-06, "loss": 0.9243, "step": 22360 }, { "epoch": 1.3519066900344474, "grad_norm": 1.2496198440663855, "learning_rate": 6.217414934857526e-06, "loss": 0.9414, "step": 22370 }, { "epoch": 1.3525110291895812, "grad_norm": 1.1937846542128254, "learning_rate": 6.21418398622138e-06, "loss": 0.9171, "step": 22380 }, { "epoch": 1.353115368344715, "grad_norm": 1.1813710547520198, "learning_rate": 6.210952498728302e-06, "loss": 0.9362, "step": 22390 }, { "epoch": 1.3537197074998488, "grad_norm": 1.1441829380985113, "learning_rate": 6.2077204738124316e-06, "loss": 0.8943, "step": 22400 }, { "epoch": 1.3543240466549828, "grad_norm": 0.830010673067638, "learning_rate": 6.204487912908148e-06, "loss": 0.9165, "step": 22410 }, { "epoch": 1.3549283858101167, "grad_norm": 0.8596609071558763, "learning_rate": 6.201254817450066e-06, "loss": 0.9326, "step": 22420 }, { "epoch": 1.3555327249652505, "grad_norm": 0.778697432172059, "learning_rate": 6.198021188873041e-06, "loss": 0.9492, "step": 22430 }, { "epoch": 1.3561370641203845, "grad_norm": 0.8219141931965135, "learning_rate": 6.19478702861216e-06, "loss": 0.9429, "step": 22440 }, { "epoch": 1.3567414032755183, "grad_norm": 2.7015732155787213, "learning_rate": 6.191552338102752e-06, "loss": 0.9313, "step": 22450 }, { "epoch": 1.357345742430652, "grad_norm": 2.247523982015999, "learning_rate": 6.188317118780376e-06, "loss": 0.9138, "step": 22460 }, { "epoch": 1.357950081585786, "grad_norm": 2.5207787575513745, "learning_rate": 6.1850813720808265e-06, "loss": 0.911, "step": 22470 }, { "epoch": 1.3585544207409197, "grad_norm": 2.5163477113093853, "learning_rate": 6.181845099440138e-06, "loss": 0.9319, "step": 22480 }, { "epoch": 1.3591587598960537, "grad_norm": 2.393759477116278, "learning_rate": 6.1786083022945695e-06, "loss": 0.9434, "step": 22490 }, { "epoch": 1.3597630990511875, "grad_norm": 1.0232534242601, "learning_rate": 6.175370982080621e-06, "loss": 0.9421, "step": 22500 }, { "epoch": 1.3603674382063213, "grad_norm": 1.005498263751018, "learning_rate": 6.172133140235017e-06, "loss": 0.9166, "step": 22510 }, { "epoch": 1.3609717773614554, "grad_norm": 0.8964003157565335, "learning_rate": 6.168894778194718e-06, "loss": 0.9587, "step": 22520 }, { "epoch": 1.3615761165165892, "grad_norm": 0.9831363965925539, "learning_rate": 6.165655897396916e-06, "loss": 0.9371, "step": 22530 }, { "epoch": 1.362180455671723, "grad_norm": 1.0242360823936525, "learning_rate": 6.1624164992790324e-06, "loss": 0.9461, "step": 22540 }, { "epoch": 1.3627847948268568, "grad_norm": 0.9251969235022647, "learning_rate": 6.159176585278716e-06, "loss": 0.9385, "step": 22550 }, { "epoch": 1.3633891339819906, "grad_norm": 1.0123875241828533, "learning_rate": 6.155936156833846e-06, "loss": 0.9301, "step": 22560 }, { "epoch": 1.3639934731371246, "grad_norm": 1.0013237217353859, "learning_rate": 6.152695215382532e-06, "loss": 0.9518, "step": 22570 }, { "epoch": 1.3645978122922584, "grad_norm": 1.006583334831408, "learning_rate": 6.149453762363106e-06, "loss": 0.9242, "step": 22580 }, { "epoch": 1.3652021514473922, "grad_norm": 1.0363878873251027, "learning_rate": 6.146211799214135e-06, "loss": 0.9292, "step": 22590 }, { "epoch": 1.3658064906025262, "grad_norm": 1.0217693890776098, "learning_rate": 6.142969327374404e-06, "loss": 0.9578, "step": 22600 }, { "epoch": 1.36641082975766, "grad_norm": 1.052903444545231, "learning_rate": 6.139726348282929e-06, "loss": 0.929, "step": 22610 }, { "epoch": 1.3670151689127938, "grad_norm": 1.0903198118444486, "learning_rate": 6.13648286337895e-06, "loss": 0.9399, "step": 22620 }, { "epoch": 1.3676195080679276, "grad_norm": 1.0931020828190434, "learning_rate": 6.13323887410193e-06, "loss": 0.9501, "step": 22630 }, { "epoch": 1.3682238472230615, "grad_norm": 1.058246207747843, "learning_rate": 6.1299943818915585e-06, "loss": 0.912, "step": 22640 }, { "epoch": 1.3688281863781955, "grad_norm": 0.9972810901398478, "learning_rate": 6.126749388187746e-06, "loss": 0.9241, "step": 22650 }, { "epoch": 1.3694325255333293, "grad_norm": 1.009710890428033, "learning_rate": 6.1235038944306255e-06, "loss": 0.9346, "step": 22660 }, { "epoch": 1.370036864688463, "grad_norm": 1.1395487232627965, "learning_rate": 6.120257902060553e-06, "loss": 0.9735, "step": 22670 }, { "epoch": 1.3706412038435971, "grad_norm": 1.0126362471346355, "learning_rate": 6.117011412518107e-06, "loss": 0.9277, "step": 22680 }, { "epoch": 1.371245542998731, "grad_norm": 1.081490879804867, "learning_rate": 6.113764427244082e-06, "loss": 0.9024, "step": 22690 }, { "epoch": 1.3718498821538647, "grad_norm": 1.1515774599267152, "learning_rate": 6.1105169476795e-06, "loss": 0.9742, "step": 22700 }, { "epoch": 1.3724542213089985, "grad_norm": 1.148094541529228, "learning_rate": 6.1072689752655946e-06, "loss": 0.9274, "step": 22710 }, { "epoch": 1.3730585604641325, "grad_norm": 1.1897230912869639, "learning_rate": 6.104020511443822e-06, "loss": 0.9272, "step": 22720 }, { "epoch": 1.3736628996192664, "grad_norm": 1.1238871800753303, "learning_rate": 6.100771557655857e-06, "loss": 0.9516, "step": 22730 }, { "epoch": 1.3742672387744002, "grad_norm": 1.2501363161240353, "learning_rate": 6.097522115343591e-06, "loss": 0.9383, "step": 22740 }, { "epoch": 1.3748715779295342, "grad_norm": 1.0858864801854375, "learning_rate": 6.0942721859491305e-06, "loss": 0.9186, "step": 22750 }, { "epoch": 1.375475917084668, "grad_norm": 1.0054737063992778, "learning_rate": 6.091021770914801e-06, "loss": 0.9413, "step": 22760 }, { "epoch": 1.3760802562398018, "grad_norm": 1.0130363302378544, "learning_rate": 6.087770871683142e-06, "loss": 0.9357, "step": 22770 }, { "epoch": 1.3766845953949356, "grad_norm": 1.0346428589517516, "learning_rate": 6.084519489696907e-06, "loss": 0.9363, "step": 22780 }, { "epoch": 1.3772889345500694, "grad_norm": 0.9612419586153981, "learning_rate": 6.081267626399067e-06, "loss": 0.9515, "step": 22790 }, { "epoch": 1.3778932737052034, "grad_norm": 1.002256217744848, "learning_rate": 6.078015283232803e-06, "loss": 0.9199, "step": 22800 }, { "epoch": 1.3784976128603372, "grad_norm": 1.038285108587607, "learning_rate": 6.0747624616415105e-06, "loss": 0.9351, "step": 22810 }, { "epoch": 1.379101952015471, "grad_norm": 1.069573709718411, "learning_rate": 6.071509163068798e-06, "loss": 0.931, "step": 22820 }, { "epoch": 1.379706291170605, "grad_norm": 1.1036864995574835, "learning_rate": 6.068255388958485e-06, "loss": 0.9292, "step": 22830 }, { "epoch": 1.3803106303257389, "grad_norm": 1.0905563157319802, "learning_rate": 6.0650011407546e-06, "loss": 0.944, "step": 22840 }, { "epoch": 1.3809149694808727, "grad_norm": 1.043855993639079, "learning_rate": 6.061746419901389e-06, "loss": 0.9394, "step": 22850 }, { "epoch": 1.3815193086360065, "grad_norm": 1.043080650091453, "learning_rate": 6.058491227843295e-06, "loss": 0.9339, "step": 22860 }, { "epoch": 1.3821236477911403, "grad_norm": 0.9964559186294396, "learning_rate": 6.0552355660249855e-06, "loss": 0.9151, "step": 22870 }, { "epoch": 1.3827279869462743, "grad_norm": 1.031683146749357, "learning_rate": 6.051979435891324e-06, "loss": 0.9368, "step": 22880 }, { "epoch": 1.383332326101408, "grad_norm": 0.9972540871676556, "learning_rate": 6.048722838887388e-06, "loss": 0.9447, "step": 22890 }, { "epoch": 1.383936665256542, "grad_norm": 1.1069890579146098, "learning_rate": 6.045465776458462e-06, "loss": 0.9524, "step": 22900 }, { "epoch": 1.384541004411676, "grad_norm": 1.1312219577350453, "learning_rate": 6.042208250050035e-06, "loss": 0.9265, "step": 22910 }, { "epoch": 1.3851453435668097, "grad_norm": 1.106722136380151, "learning_rate": 6.0389502611078e-06, "loss": 0.9296, "step": 22920 }, { "epoch": 1.3857496827219435, "grad_norm": 1.128971079243757, "learning_rate": 6.035691811077662e-06, "loss": 0.9169, "step": 22930 }, { "epoch": 1.3863540218770773, "grad_norm": 1.0760346175620863, "learning_rate": 6.032432901405724e-06, "loss": 0.9555, "step": 22940 }, { "epoch": 1.3869583610322112, "grad_norm": 1.0687632502766016, "learning_rate": 6.029173533538298e-06, "loss": 0.9219, "step": 22950 }, { "epoch": 1.3875627001873452, "grad_norm": 1.0552049540779944, "learning_rate": 6.025913708921897e-06, "loss": 0.9248, "step": 22960 }, { "epoch": 1.388167039342479, "grad_norm": 1.0759021138327665, "learning_rate": 6.022653429003234e-06, "loss": 0.9546, "step": 22970 }, { "epoch": 1.3887713784976128, "grad_norm": 1.1756032399045384, "learning_rate": 6.019392695229229e-06, "loss": 0.9385, "step": 22980 }, { "epoch": 1.3893757176527468, "grad_norm": 1.0496975412144864, "learning_rate": 6.016131509047002e-06, "loss": 0.9466, "step": 22990 }, { "epoch": 1.3899800568078806, "grad_norm": 1.2054130693796088, "learning_rate": 6.0128698719038715e-06, "loss": 0.9247, "step": 23000 }, { "epoch": 1.3905843959630144, "grad_norm": 1.2071874990069786, "learning_rate": 6.009607785247356e-06, "loss": 0.9208, "step": 23010 }, { "epoch": 1.3911887351181482, "grad_norm": 1.1972046327312493, "learning_rate": 6.00634525052518e-06, "loss": 0.9507, "step": 23020 }, { "epoch": 1.3917930742732822, "grad_norm": 1.1407249294411796, "learning_rate": 6.003082269185257e-06, "loss": 0.9325, "step": 23030 }, { "epoch": 1.392397413428416, "grad_norm": 1.22365586421668, "learning_rate": 5.999818842675706e-06, "loss": 0.9231, "step": 23040 }, { "epoch": 1.3930017525835499, "grad_norm": 1.3928813699944156, "learning_rate": 5.996554972444842e-06, "loss": 0.9358, "step": 23050 }, { "epoch": 1.3936060917386839, "grad_norm": 1.3713560730860463, "learning_rate": 5.993290659941175e-06, "loss": 0.9461, "step": 23060 }, { "epoch": 1.3942104308938177, "grad_norm": 1.3616165234332318, "learning_rate": 5.990025906613413e-06, "loss": 0.9321, "step": 23070 }, { "epoch": 1.3948147700489515, "grad_norm": 1.3069118442311392, "learning_rate": 5.986760713910458e-06, "loss": 0.9523, "step": 23080 }, { "epoch": 1.3954191092040853, "grad_norm": 1.4267578130905407, "learning_rate": 5.9834950832814085e-06, "loss": 0.9243, "step": 23090 }, { "epoch": 1.396023448359219, "grad_norm": 0.8370332104615356, "learning_rate": 5.980229016175558e-06, "loss": 0.9032, "step": 23100 }, { "epoch": 1.3966277875143531, "grad_norm": 0.8651910531255127, "learning_rate": 5.976962514042392e-06, "loss": 0.9363, "step": 23110 }, { "epoch": 1.397232126669487, "grad_norm": 0.8778088344901273, "learning_rate": 5.9736955783315905e-06, "loss": 0.943, "step": 23120 }, { "epoch": 1.3978364658246207, "grad_norm": 0.8349847974894837, "learning_rate": 5.970428210493024e-06, "loss": 0.925, "step": 23130 }, { "epoch": 1.3984408049797548, "grad_norm": 0.8478423563184788, "learning_rate": 5.967160411976757e-06, "loss": 0.9446, "step": 23140 }, { "epoch": 1.3990451441348886, "grad_norm": 0.7579262321762854, "learning_rate": 5.9638921842330435e-06, "loss": 0.9225, "step": 23150 }, { "epoch": 1.3996494832900224, "grad_norm": 0.7604510132674285, "learning_rate": 5.960623528712328e-06, "loss": 0.912, "step": 23160 }, { "epoch": 1.4002538224451562, "grad_norm": 0.7626086586725821, "learning_rate": 5.957354446865247e-06, "loss": 0.937, "step": 23170 }, { "epoch": 1.40085816160029, "grad_norm": 0.7236251629362629, "learning_rate": 5.954084940142621e-06, "loss": 0.9577, "step": 23180 }, { "epoch": 1.401462500755424, "grad_norm": 0.832589112317401, "learning_rate": 5.950815009995469e-06, "loss": 0.9352, "step": 23190 }, { "epoch": 1.4020668399105578, "grad_norm": 0.8127616193532974, "learning_rate": 5.947544657874984e-06, "loss": 0.9426, "step": 23200 }, { "epoch": 1.4026711790656916, "grad_norm": 0.8209702452228631, "learning_rate": 5.944273885232561e-06, "loss": 0.9372, "step": 23210 }, { "epoch": 1.4032755182208256, "grad_norm": 0.9392017093993318, "learning_rate": 5.94100269351977e-06, "loss": 0.9408, "step": 23220 }, { "epoch": 1.4038798573759594, "grad_norm": 0.8472141843693116, "learning_rate": 5.937731084188372e-06, "loss": 0.9187, "step": 23230 }, { "epoch": 1.4044841965310932, "grad_norm": 0.7879784182064267, "learning_rate": 5.934459058690314e-06, "loss": 0.9419, "step": 23240 }, { "epoch": 1.405088535686227, "grad_norm": 0.5672758982938461, "learning_rate": 5.931186618477727e-06, "loss": 0.9197, "step": 23250 }, { "epoch": 1.4056928748413609, "grad_norm": 0.5628166222146257, "learning_rate": 5.9279137650029226e-06, "loss": 0.9241, "step": 23260 }, { "epoch": 1.4062972139964949, "grad_norm": 0.5793938583559338, "learning_rate": 5.924640499718404e-06, "loss": 0.916, "step": 23270 }, { "epoch": 1.4069015531516287, "grad_norm": 0.6043414574811208, "learning_rate": 5.921366824076846e-06, "loss": 0.9331, "step": 23280 }, { "epoch": 1.4075058923067625, "grad_norm": 0.5808669327490961, "learning_rate": 5.918092739531116e-06, "loss": 0.9142, "step": 23290 }, { "epoch": 1.4081102314618965, "grad_norm": 0.6602905051195931, "learning_rate": 5.914818247534256e-06, "loss": 0.9222, "step": 23300 }, { "epoch": 1.4087145706170303, "grad_norm": 0.63981220871686, "learning_rate": 5.911543349539491e-06, "loss": 0.9179, "step": 23310 }, { "epoch": 1.4093189097721641, "grad_norm": 0.6635073430666767, "learning_rate": 5.9082680470002275e-06, "loss": 0.9636, "step": 23320 }, { "epoch": 1.409923248927298, "grad_norm": 0.6034233326503531, "learning_rate": 5.90499234137005e-06, "loss": 0.924, "step": 23330 }, { "epoch": 1.4105275880824317, "grad_norm": 0.6030559521565237, "learning_rate": 5.901716234102722e-06, "loss": 0.9155, "step": 23340 }, { "epoch": 1.4111319272375658, "grad_norm": 0.8554880372229265, "learning_rate": 5.898439726652185e-06, "loss": 0.928, "step": 23350 }, { "epoch": 1.4117362663926996, "grad_norm": 0.8497726167709079, "learning_rate": 5.895162820472561e-06, "loss": 0.9226, "step": 23360 }, { "epoch": 1.4123406055478334, "grad_norm": 0.752648043247242, "learning_rate": 5.891885517018144e-06, "loss": 0.9266, "step": 23370 }, { "epoch": 1.4129449447029674, "grad_norm": 0.8587357615882126, "learning_rate": 5.888607817743407e-06, "loss": 0.9114, "step": 23380 }, { "epoch": 1.4135492838581012, "grad_norm": 0.7827360882332269, "learning_rate": 5.885329724103e-06, "loss": 0.9107, "step": 23390 }, { "epoch": 1.414153623013235, "grad_norm": 0.5532713154866552, "learning_rate": 5.882051237551747e-06, "loss": 0.949, "step": 23400 }, { "epoch": 1.4147579621683688, "grad_norm": 0.5370093816865774, "learning_rate": 5.878772359544644e-06, "loss": 0.9226, "step": 23410 }, { "epoch": 1.4153623013235028, "grad_norm": 0.5255402087035534, "learning_rate": 5.875493091536863e-06, "loss": 0.9219, "step": 23420 }, { "epoch": 1.4159666404786366, "grad_norm": 0.5698333088239176, "learning_rate": 5.872213434983749e-06, "loss": 0.9312, "step": 23430 }, { "epoch": 1.4165709796337704, "grad_norm": 0.5639465058228315, "learning_rate": 5.86893339134082e-06, "loss": 0.9191, "step": 23440 }, { "epoch": 1.4171753187889045, "grad_norm": 0.5380120211799528, "learning_rate": 5.8656529620637645e-06, "loss": 0.9119, "step": 23450 }, { "epoch": 1.4177796579440383, "grad_norm": 0.5197435168817296, "learning_rate": 5.862372148608442e-06, "loss": 0.9255, "step": 23460 }, { "epoch": 1.418383997099172, "grad_norm": 0.5276321836999159, "learning_rate": 5.859090952430884e-06, "loss": 0.9384, "step": 23470 }, { "epoch": 1.4189883362543059, "grad_norm": 0.563623102037306, "learning_rate": 5.855809374987292e-06, "loss": 0.9288, "step": 23480 }, { "epoch": 1.4195926754094397, "grad_norm": 0.553209711824374, "learning_rate": 5.85252741773403e-06, "loss": 0.9575, "step": 23490 }, { "epoch": 1.4201970145645737, "grad_norm": 0.5224332909694155, "learning_rate": 5.849245082127645e-06, "loss": 0.9478, "step": 23500 }, { "epoch": 1.4208013537197075, "grad_norm": 0.5567811248645171, "learning_rate": 5.845962369624838e-06, "loss": 0.9595, "step": 23510 }, { "epoch": 1.4214056928748413, "grad_norm": 0.594796326940543, "learning_rate": 5.842679281682482e-06, "loss": 0.9275, "step": 23520 }, { "epoch": 1.4220100320299753, "grad_norm": 0.568043704523285, "learning_rate": 5.839395819757618e-06, "loss": 0.9243, "step": 23530 }, { "epoch": 1.4226143711851091, "grad_norm": 0.536059227424461, "learning_rate": 5.836111985307454e-06, "loss": 0.9362, "step": 23540 }, { "epoch": 1.423218710340243, "grad_norm": 0.519457316071563, "learning_rate": 5.832827779789359e-06, "loss": 0.9406, "step": 23550 }, { "epoch": 1.4238230494953767, "grad_norm": 0.5389675848892215, "learning_rate": 5.82954320466087e-06, "loss": 0.9209, "step": 23560 }, { "epoch": 1.4244273886505106, "grad_norm": 0.5368733627433508, "learning_rate": 5.826258261379685e-06, "loss": 0.9359, "step": 23570 }, { "epoch": 1.4250317278056446, "grad_norm": 0.5247160243873157, "learning_rate": 5.82297295140367e-06, "loss": 0.9251, "step": 23580 }, { "epoch": 1.4256360669607784, "grad_norm": 0.5499268238784991, "learning_rate": 5.8196872761908505e-06, "loss": 0.9119, "step": 23590 }, { "epoch": 1.4262404061159122, "grad_norm": 0.5483747462857096, "learning_rate": 5.816401237199415e-06, "loss": 0.9131, "step": 23600 }, { "epoch": 1.4268447452710462, "grad_norm": 0.553352366434354, "learning_rate": 5.813114835887712e-06, "loss": 0.9162, "step": 23610 }, { "epoch": 1.42744908442618, "grad_norm": 0.5308617286168494, "learning_rate": 5.809828073714254e-06, "loss": 0.9189, "step": 23620 }, { "epoch": 1.4280534235813138, "grad_norm": 0.5472399866509499, "learning_rate": 5.8065409521377095e-06, "loss": 0.9304, "step": 23630 }, { "epoch": 1.4286577627364476, "grad_norm": 0.5343379434090632, "learning_rate": 5.80325347261691e-06, "loss": 0.9527, "step": 23640 }, { "epoch": 1.4292621018915814, "grad_norm": 0.5619202319151952, "learning_rate": 5.799965636610844e-06, "loss": 0.95, "step": 23650 }, { "epoch": 1.4298664410467155, "grad_norm": 0.5710364577741498, "learning_rate": 5.796677445578661e-06, "loss": 0.9146, "step": 23660 }, { "epoch": 1.4304707802018493, "grad_norm": 0.5584617636158474, "learning_rate": 5.7933889009796625e-06, "loss": 0.927, "step": 23670 }, { "epoch": 1.431075119356983, "grad_norm": 0.5435218036747607, "learning_rate": 5.7901000042733116e-06, "loss": 0.9373, "step": 23680 }, { "epoch": 1.431679458512117, "grad_norm": 0.5266136045708651, "learning_rate": 5.7868107569192254e-06, "loss": 0.9262, "step": 23690 }, { "epoch": 1.432283797667251, "grad_norm": 0.6227605025639936, "learning_rate": 5.783521160377179e-06, "loss": 0.9157, "step": 23700 }, { "epoch": 1.4328881368223847, "grad_norm": 0.6550486181648428, "learning_rate": 5.7802312161071e-06, "loss": 0.9148, "step": 23710 }, { "epoch": 1.4334924759775185, "grad_norm": 0.6152904797965045, "learning_rate": 5.776940925569072e-06, "loss": 0.9301, "step": 23720 }, { "epoch": 1.4340968151326525, "grad_norm": 0.6516510877489632, "learning_rate": 5.7736502902233314e-06, "loss": 0.921, "step": 23730 }, { "epoch": 1.4347011542877863, "grad_norm": 0.6418468792766503, "learning_rate": 5.770359311530267e-06, "loss": 0.9355, "step": 23740 }, { "epoch": 1.4353054934429201, "grad_norm": 0.7392416154689126, "learning_rate": 5.767067990950422e-06, "loss": 0.9358, "step": 23750 }, { "epoch": 1.4359098325980542, "grad_norm": 0.6804505711286888, "learning_rate": 5.763776329944491e-06, "loss": 0.9289, "step": 23760 }, { "epoch": 1.436514171753188, "grad_norm": 0.6804828549369927, "learning_rate": 5.760484329973314e-06, "loss": 0.9448, "step": 23770 }, { "epoch": 1.4371185109083218, "grad_norm": 0.7085763300938479, "learning_rate": 5.757191992497891e-06, "loss": 0.9622, "step": 23780 }, { "epoch": 1.4377228500634556, "grad_norm": 0.7162386094733575, "learning_rate": 5.753899318979366e-06, "loss": 0.9206, "step": 23790 }, { "epoch": 1.4383271892185894, "grad_norm": 0.6099898676532602, "learning_rate": 5.750606310879034e-06, "loss": 0.9333, "step": 23800 }, { "epoch": 1.4389315283737234, "grad_norm": 0.6765274733055479, "learning_rate": 5.747312969658334e-06, "loss": 0.9541, "step": 23810 }, { "epoch": 1.4395358675288572, "grad_norm": 0.6447121314069736, "learning_rate": 5.74401929677886e-06, "loss": 0.9212, "step": 23820 }, { "epoch": 1.440140206683991, "grad_norm": 0.6960786656162969, "learning_rate": 5.740725293702346e-06, "loss": 0.9259, "step": 23830 }, { "epoch": 1.440744545839125, "grad_norm": 0.645977388970727, "learning_rate": 5.737430961890681e-06, "loss": 0.9554, "step": 23840 }, { "epoch": 1.4413488849942588, "grad_norm": 0.7459973063815706, "learning_rate": 5.734136302805893e-06, "loss": 0.9015, "step": 23850 }, { "epoch": 1.4419532241493926, "grad_norm": 0.7873226922195327, "learning_rate": 5.730841317910155e-06, "loss": 0.9378, "step": 23860 }, { "epoch": 1.4425575633045264, "grad_norm": 0.7899715681165357, "learning_rate": 5.727546008665791e-06, "loss": 0.9209, "step": 23870 }, { "epoch": 1.4431619024596603, "grad_norm": 0.8172282363303137, "learning_rate": 5.72425037653526e-06, "loss": 0.9372, "step": 23880 }, { "epoch": 1.4437662416147943, "grad_norm": 0.8182090464292833, "learning_rate": 5.720954422981175e-06, "loss": 0.9328, "step": 23890 }, { "epoch": 1.444370580769928, "grad_norm": 0.7834800642413587, "learning_rate": 5.717658149466283e-06, "loss": 0.9331, "step": 23900 }, { "epoch": 1.4449749199250619, "grad_norm": 0.7937840402422004, "learning_rate": 5.7143615574534736e-06, "loss": 0.9545, "step": 23910 }, { "epoch": 1.445579259080196, "grad_norm": 0.8091386230879632, "learning_rate": 5.711064648405784e-06, "loss": 0.95, "step": 23920 }, { "epoch": 1.4461835982353297, "grad_norm": 0.8002209844544873, "learning_rate": 5.707767423786385e-06, "loss": 0.9071, "step": 23930 }, { "epoch": 1.4467879373904635, "grad_norm": 0.7677156548723814, "learning_rate": 5.704469885058594e-06, "loss": 0.9257, "step": 23940 }, { "epoch": 1.4473922765455973, "grad_norm": 1.117799176143842, "learning_rate": 5.70117203368586e-06, "loss": 0.92, "step": 23950 }, { "epoch": 1.4479966157007311, "grad_norm": 1.1564055606972556, "learning_rate": 5.697873871131778e-06, "loss": 0.9579, "step": 23960 }, { "epoch": 1.4486009548558652, "grad_norm": 1.199502041846667, "learning_rate": 5.694575398860076e-06, "loss": 0.9362, "step": 23970 }, { "epoch": 1.449205294010999, "grad_norm": 1.1048179768832782, "learning_rate": 5.6912766183346235e-06, "loss": 0.9363, "step": 23980 }, { "epoch": 1.4498096331661328, "grad_norm": 1.1810427476759027, "learning_rate": 5.687977531019425e-06, "loss": 0.9381, "step": 23990 }, { "epoch": 1.4504139723212668, "grad_norm": 0.9493817329333853, "learning_rate": 5.6846781383786196e-06, "loss": 0.9377, "step": 24000 }, { "epoch": 1.4510183114764006, "grad_norm": 1.0181251025469211, "learning_rate": 5.681378441876485e-06, "loss": 0.9284, "step": 24010 }, { "epoch": 1.4516226506315344, "grad_norm": 0.8571959376934667, "learning_rate": 5.678078442977431e-06, "loss": 0.9244, "step": 24020 }, { "epoch": 1.4522269897866682, "grad_norm": 1.064963072459699, "learning_rate": 5.674778143146003e-06, "loss": 0.9441, "step": 24030 }, { "epoch": 1.452831328941802, "grad_norm": 0.8561229919400791, "learning_rate": 5.6714775438468815e-06, "loss": 0.9303, "step": 24040 }, { "epoch": 1.453435668096936, "grad_norm": 2.4463143228903017, "learning_rate": 5.668176646544876e-06, "loss": 0.9066, "step": 24050 }, { "epoch": 1.4540400072520698, "grad_norm": 2.6358690553350805, "learning_rate": 5.66487545270493e-06, "loss": 0.9271, "step": 24060 }, { "epoch": 1.4546443464072039, "grad_norm": 2.42934031766331, "learning_rate": 5.6615739637921205e-06, "loss": 0.9306, "step": 24070 }, { "epoch": 1.4552486855623377, "grad_norm": 2.7277840927160364, "learning_rate": 5.658272181271653e-06, "loss": 0.8911, "step": 24080 }, { "epoch": 1.4558530247174715, "grad_norm": 2.5010606102575075, "learning_rate": 5.654970106608865e-06, "loss": 0.9302, "step": 24090 }, { "epoch": 1.4564573638726053, "grad_norm": 0.9949991228860524, "learning_rate": 5.651667741269222e-06, "loss": 0.9133, "step": 24100 }, { "epoch": 1.457061703027739, "grad_norm": 1.0082999516017692, "learning_rate": 5.64836508671832e-06, "loss": 0.9183, "step": 24110 }, { "epoch": 1.457666042182873, "grad_norm": 1.0073580807753402, "learning_rate": 5.645062144421884e-06, "loss": 0.9345, "step": 24120 }, { "epoch": 1.458270381338007, "grad_norm": 0.9525189911860953, "learning_rate": 5.641758915845762e-06, "loss": 0.96, "step": 24130 }, { "epoch": 1.4588747204931407, "grad_norm": 1.3000267953353162, "learning_rate": 5.638455402455934e-06, "loss": 0.9433, "step": 24140 }, { "epoch": 1.4594790596482747, "grad_norm": 0.9329578236185453, "learning_rate": 5.635151605718507e-06, "loss": 0.9184, "step": 24150 }, { "epoch": 1.4600833988034085, "grad_norm": 0.9442844028556734, "learning_rate": 5.631847527099711e-06, "loss": 0.9425, "step": 24160 }, { "epoch": 1.4606877379585423, "grad_norm": 0.970976383184132, "learning_rate": 5.628543168065899e-06, "loss": 0.9327, "step": 24170 }, { "epoch": 1.4612920771136761, "grad_norm": 1.0174704510277752, "learning_rate": 5.625238530083554e-06, "loss": 0.9423, "step": 24180 }, { "epoch": 1.46189641626881, "grad_norm": 0.9775932818965495, "learning_rate": 5.621933614619279e-06, "loss": 0.942, "step": 24190 }, { "epoch": 1.462500755423944, "grad_norm": 1.0600828461815932, "learning_rate": 5.618628423139801e-06, "loss": 0.9406, "step": 24200 }, { "epoch": 1.4631050945790778, "grad_norm": 1.0739367834506144, "learning_rate": 5.6153229571119706e-06, "loss": 0.9423, "step": 24210 }, { "epoch": 1.4637094337342116, "grad_norm": 1.0312842310827948, "learning_rate": 5.6120172180027565e-06, "loss": 0.9104, "step": 24220 }, { "epoch": 1.4643137728893456, "grad_norm": 1.0635678647373001, "learning_rate": 5.608711207279254e-06, "loss": 0.9368, "step": 24230 }, { "epoch": 1.4649181120444794, "grad_norm": 1.0344959989680858, "learning_rate": 5.6054049264086765e-06, "loss": 0.9097, "step": 24240 }, { "epoch": 1.4655224511996132, "grad_norm": 1.005835618202722, "learning_rate": 5.602098376858356e-06, "loss": 0.952, "step": 24250 }, { "epoch": 1.466126790354747, "grad_norm": 1.131875223252975, "learning_rate": 5.598791560095744e-06, "loss": 0.9186, "step": 24260 }, { "epoch": 1.4667311295098808, "grad_norm": 1.086194952288562, "learning_rate": 5.595484477588415e-06, "loss": 0.9403, "step": 24270 }, { "epoch": 1.4673354686650149, "grad_norm": 1.0270700446528251, "learning_rate": 5.592177130804053e-06, "loss": 0.9443, "step": 24280 }, { "epoch": 1.4679398078201487, "grad_norm": 1.0910641604620384, "learning_rate": 5.588869521210468e-06, "loss": 0.947, "step": 24290 }, { "epoch": 1.4685441469752825, "grad_norm": 1.0796712728801525, "learning_rate": 5.585561650275581e-06, "loss": 0.9248, "step": 24300 }, { "epoch": 1.4691484861304165, "grad_norm": 1.0995957262770726, "learning_rate": 5.582253519467432e-06, "loss": 0.8944, "step": 24310 }, { "epoch": 1.4697528252855503, "grad_norm": 1.206669149227736, "learning_rate": 5.578945130254174e-06, "loss": 0.9121, "step": 24320 }, { "epoch": 1.470357164440684, "grad_norm": 1.1237455542866643, "learning_rate": 5.575636484104075e-06, "loss": 0.9272, "step": 24330 }, { "epoch": 1.470961503595818, "grad_norm": 1.090345073196265, "learning_rate": 5.57232758248552e-06, "loss": 0.9206, "step": 24340 }, { "epoch": 1.4715658427509517, "grad_norm": 1.0094949587322917, "learning_rate": 5.5690184268670035e-06, "loss": 0.9283, "step": 24350 }, { "epoch": 1.4721701819060857, "grad_norm": 1.0165623788715539, "learning_rate": 5.565709018717136e-06, "loss": 0.9284, "step": 24360 }, { "epoch": 1.4727745210612195, "grad_norm": 1.211764548105409, "learning_rate": 5.562399359504637e-06, "loss": 0.9333, "step": 24370 }, { "epoch": 1.4733788602163533, "grad_norm": 0.998326085486439, "learning_rate": 5.559089450698338e-06, "loss": 0.9101, "step": 24380 }, { "epoch": 1.4739831993714874, "grad_norm": 0.9532644690759089, "learning_rate": 5.555779293767185e-06, "loss": 0.9267, "step": 24390 }, { "epoch": 1.4745875385266212, "grad_norm": 1.1335260433592877, "learning_rate": 5.55246889018023e-06, "loss": 0.9377, "step": 24400 }, { "epoch": 1.475191877681755, "grad_norm": 1.0757158715265807, "learning_rate": 5.5491582414066366e-06, "loss": 0.9085, "step": 24410 }, { "epoch": 1.4757962168368888, "grad_norm": 1.057926732626364, "learning_rate": 5.545847348915674e-06, "loss": 0.9686, "step": 24420 }, { "epoch": 1.4764005559920228, "grad_norm": 1.0654649545148709, "learning_rate": 5.542536214176725e-06, "loss": 0.9299, "step": 24430 }, { "epoch": 1.4770048951471566, "grad_norm": 1.0084903494852366, "learning_rate": 5.539224838659275e-06, "loss": 0.9437, "step": 24440 }, { "epoch": 1.4776092343022904, "grad_norm": 0.9313667488629244, "learning_rate": 5.53591322383292e-06, "loss": 0.9431, "step": 24450 }, { "epoch": 1.4782135734574244, "grad_norm": 1.032862261985383, "learning_rate": 5.532601371167356e-06, "loss": 0.9411, "step": 24460 }, { "epoch": 1.4788179126125582, "grad_norm": 0.9464095432894105, "learning_rate": 5.529289282132394e-06, "loss": 0.9299, "step": 24470 }, { "epoch": 1.479422251767692, "grad_norm": 0.9831548913519023, "learning_rate": 5.52597695819794e-06, "loss": 0.9284, "step": 24480 }, { "epoch": 1.4800265909228258, "grad_norm": 1.0092236694155683, "learning_rate": 5.522664400834013e-06, "loss": 0.9374, "step": 24490 }, { "epoch": 1.4806309300779597, "grad_norm": 1.0630829895520733, "learning_rate": 5.519351611510731e-06, "loss": 0.9289, "step": 24500 }, { "epoch": 1.4812352692330937, "grad_norm": 1.2132130764191726, "learning_rate": 5.5160385916983136e-06, "loss": 0.9134, "step": 24510 }, { "epoch": 1.4818396083882275, "grad_norm": 1.1136866536032546, "learning_rate": 5.512725342867087e-06, "loss": 0.9187, "step": 24520 }, { "epoch": 1.4824439475433613, "grad_norm": 1.1271814258216801, "learning_rate": 5.509411866487474e-06, "loss": 0.9243, "step": 24530 }, { "epoch": 1.4830482866984953, "grad_norm": 1.1097661823471676, "learning_rate": 5.506098164030003e-06, "loss": 0.9422, "step": 24540 }, { "epoch": 1.4836526258536291, "grad_norm": 1.0779943607533187, "learning_rate": 5.502784236965301e-06, "loss": 0.9351, "step": 24550 }, { "epoch": 1.484256965008763, "grad_norm": 1.11532446683327, "learning_rate": 5.499470086764094e-06, "loss": 0.9371, "step": 24560 }, { "epoch": 1.4848613041638967, "grad_norm": 1.114111938319899, "learning_rate": 5.496155714897207e-06, "loss": 0.9527, "step": 24570 }, { "epoch": 1.4854656433190305, "grad_norm": 1.0720829313632556, "learning_rate": 5.492841122835565e-06, "loss": 0.9186, "step": 24580 }, { "epoch": 1.4860699824741646, "grad_norm": 1.1325486265286429, "learning_rate": 5.489526312050187e-06, "loss": 0.9089, "step": 24590 }, { "epoch": 1.4866743216292984, "grad_norm": 1.234902681852651, "learning_rate": 5.486211284012196e-06, "loss": 0.9357, "step": 24600 }, { "epoch": 1.4872786607844322, "grad_norm": 1.153268063464747, "learning_rate": 5.482896040192805e-06, "loss": 0.884, "step": 24610 }, { "epoch": 1.4878829999395662, "grad_norm": 1.1971313011206486, "learning_rate": 5.479580582063322e-06, "loss": 0.9306, "step": 24620 }, { "epoch": 1.4884873390947, "grad_norm": 1.0973762613588873, "learning_rate": 5.476264911095158e-06, "loss": 0.9316, "step": 24630 }, { "epoch": 1.4890916782498338, "grad_norm": 1.212149959098993, "learning_rate": 5.472949028759811e-06, "loss": 0.9163, "step": 24640 }, { "epoch": 1.4896960174049676, "grad_norm": 1.4200996829837007, "learning_rate": 5.469632936528875e-06, "loss": 0.9177, "step": 24650 }, { "epoch": 1.4903003565601014, "grad_norm": 1.4829504571141172, "learning_rate": 5.466316635874037e-06, "loss": 0.9258, "step": 24660 }, { "epoch": 1.4909046957152354, "grad_norm": 1.4280627667598678, "learning_rate": 5.463000128267079e-06, "loss": 0.9178, "step": 24670 }, { "epoch": 1.4915090348703692, "grad_norm": 1.362757596864265, "learning_rate": 5.459683415179871e-06, "loss": 0.9232, "step": 24680 }, { "epoch": 1.492113374025503, "grad_norm": 1.4033937713313966, "learning_rate": 5.4563664980843765e-06, "loss": 0.9192, "step": 24690 }, { "epoch": 1.492717713180637, "grad_norm": 0.7871530104856718, "learning_rate": 5.453049378452648e-06, "loss": 0.9364, "step": 24700 }, { "epoch": 1.4933220523357709, "grad_norm": 0.8376815217719871, "learning_rate": 5.449732057756831e-06, "loss": 0.9373, "step": 24710 }, { "epoch": 1.4939263914909047, "grad_norm": 0.8335830707549651, "learning_rate": 5.446414537469158e-06, "loss": 0.8991, "step": 24720 }, { "epoch": 1.4945307306460385, "grad_norm": 0.8996257796761521, "learning_rate": 5.443096819061946e-06, "loss": 0.9226, "step": 24730 }, { "epoch": 1.4951350698011725, "grad_norm": 0.8344490672476051, "learning_rate": 5.43977890400761e-06, "loss": 0.9146, "step": 24740 }, { "epoch": 1.4957394089563063, "grad_norm": 0.7919301571373561, "learning_rate": 5.436460793778642e-06, "loss": 0.9407, "step": 24750 }, { "epoch": 1.49634374811144, "grad_norm": 0.8424038899184205, "learning_rate": 5.433142489847626e-06, "loss": 0.9455, "step": 24760 }, { "epoch": 1.4969480872665741, "grad_norm": 0.7754519638914394, "learning_rate": 5.429823993687234e-06, "loss": 0.9509, "step": 24770 }, { "epoch": 1.497552426421708, "grad_norm": 0.788339896315405, "learning_rate": 5.4265053067702156e-06, "loss": 0.9052, "step": 24780 }, { "epoch": 1.4981567655768417, "grad_norm": 0.806122611292681, "learning_rate": 5.423186430569411e-06, "loss": 0.9119, "step": 24790 }, { "epoch": 1.4987611047319755, "grad_norm": 0.8061608990913078, "learning_rate": 5.419867366557746e-06, "loss": 0.942, "step": 24800 }, { "epoch": 1.4993654438871094, "grad_norm": 0.8371038554107869, "learning_rate": 5.4165481162082255e-06, "loss": 0.9446, "step": 24810 }, { "epoch": 1.4999697830422434, "grad_norm": 0.8055214948696328, "learning_rate": 5.413228680993936e-06, "loss": 0.9368, "step": 24820 }, { "epoch": 1.5005741221973772, "grad_norm": 0.8338406655848178, "learning_rate": 5.409909062388053e-06, "loss": 0.9434, "step": 24830 }, { "epoch": 1.501178461352511, "grad_norm": 0.8765348187200152, "learning_rate": 5.406589261863824e-06, "loss": 0.9349, "step": 24840 }, { "epoch": 1.501782800507645, "grad_norm": 0.605094709523364, "learning_rate": 5.403269280894586e-06, "loss": 0.913, "step": 24850 }, { "epoch": 1.5023871396627788, "grad_norm": 0.5646927157546151, "learning_rate": 5.39994912095375e-06, "loss": 0.9299, "step": 24860 }, { "epoch": 1.5029914788179126, "grad_norm": 0.5587592563848658, "learning_rate": 5.39662878351481e-06, "loss": 0.938, "step": 24870 }, { "epoch": 1.5035958179730464, "grad_norm": 0.6182669339387755, "learning_rate": 5.393308270051336e-06, "loss": 0.9581, "step": 24880 }, { "epoch": 1.5042001571281802, "grad_norm": 0.5565209161500577, "learning_rate": 5.38998758203698e-06, "loss": 0.9265, "step": 24890 }, { "epoch": 1.504804496283314, "grad_norm": 0.6217507169907869, "learning_rate": 5.386666720945466e-06, "loss": 0.9345, "step": 24900 }, { "epoch": 1.505408835438448, "grad_norm": 0.6406947531859823, "learning_rate": 5.383345688250599e-06, "loss": 0.9478, "step": 24910 }, { "epoch": 1.506013174593582, "grad_norm": 0.6206465542025668, "learning_rate": 5.380024485426262e-06, "loss": 0.9323, "step": 24920 }, { "epoch": 1.5066175137487159, "grad_norm": 0.6410990777965546, "learning_rate": 5.376703113946405e-06, "loss": 0.924, "step": 24930 }, { "epoch": 1.5072218529038497, "grad_norm": 0.6273856086626944, "learning_rate": 5.373381575285063e-06, "loss": 0.941, "step": 24940 }, { "epoch": 1.5078261920589835, "grad_norm": 0.8347017688219457, "learning_rate": 5.370059870916338e-06, "loss": 0.9294, "step": 24950 }, { "epoch": 1.5084305312141173, "grad_norm": 0.8068542653111551, "learning_rate": 5.366738002314409e-06, "loss": 0.9373, "step": 24960 }, { "epoch": 1.509034870369251, "grad_norm": 0.8329047474556546, "learning_rate": 5.3634159709535265e-06, "loss": 0.939, "step": 24970 }, { "epoch": 1.5096392095243851, "grad_norm": 0.8036045907060764, "learning_rate": 5.360093778308014e-06, "loss": 0.9557, "step": 24980 }, { "epoch": 1.510243548679519, "grad_norm": 0.8724936878030212, "learning_rate": 5.356771425852265e-06, "loss": 0.9445, "step": 24990 }, { "epoch": 1.510847887834653, "grad_norm": 0.5718648496158292, "learning_rate": 5.353448915060748e-06, "loss": 0.932, "step": 25000 }, { "epoch": 1.5114522269897868, "grad_norm": 0.5361320371221349, "learning_rate": 5.350126247407997e-06, "loss": 0.9312, "step": 25010 }, { "epoch": 1.5120565661449206, "grad_norm": 0.5433642153068129, "learning_rate": 5.3468034243686185e-06, "loss": 0.9275, "step": 25020 }, { "epoch": 1.5126609053000544, "grad_norm": 0.5327652215574483, "learning_rate": 5.343480447417286e-06, "loss": 0.9311, "step": 25030 }, { "epoch": 1.5132652444551882, "grad_norm": 0.5300971442516206, "learning_rate": 5.340157318028743e-06, "loss": 0.9274, "step": 25040 }, { "epoch": 1.513869583610322, "grad_norm": 0.5568574526881223, "learning_rate": 5.336834037677799e-06, "loss": 0.9213, "step": 25050 }, { "epoch": 1.514473922765456, "grad_norm": 0.5700724867063462, "learning_rate": 5.333510607839335e-06, "loss": 0.9555, "step": 25060 }, { "epoch": 1.5150782619205898, "grad_norm": 0.5264939713962447, "learning_rate": 5.33018702998829e-06, "loss": 0.9353, "step": 25070 }, { "epoch": 1.5156826010757238, "grad_norm": 0.5495008937206775, "learning_rate": 5.326863305599676e-06, "loss": 0.9292, "step": 25080 }, { "epoch": 1.5162869402308576, "grad_norm": 0.511724912429098, "learning_rate": 5.323539436148569e-06, "loss": 0.9322, "step": 25090 }, { "epoch": 1.5168912793859914, "grad_norm": 0.6295904820107148, "learning_rate": 5.320215423110106e-06, "loss": 0.9234, "step": 25100 }, { "epoch": 1.5174956185411252, "grad_norm": 0.5641467392529417, "learning_rate": 5.31689126795949e-06, "loss": 0.9086, "step": 25110 }, { "epoch": 1.518099957696259, "grad_norm": 0.5574782585023906, "learning_rate": 5.313566972171986e-06, "loss": 0.9093, "step": 25120 }, { "epoch": 1.5187042968513929, "grad_norm": 0.5961576601894568, "learning_rate": 5.310242537222923e-06, "loss": 0.939, "step": 25130 }, { "epoch": 1.5193086360065269, "grad_norm": 0.5765480262979534, "learning_rate": 5.306917964587692e-06, "loss": 0.9469, "step": 25140 }, { "epoch": 1.5199129751616607, "grad_norm": 0.5681235217743901, "learning_rate": 5.303593255741743e-06, "loss": 0.9292, "step": 25150 }, { "epoch": 1.5205173143167947, "grad_norm": 0.5480531718419404, "learning_rate": 5.300268412160585e-06, "loss": 0.92, "step": 25160 }, { "epoch": 1.5211216534719285, "grad_norm": 0.5478385749808464, "learning_rate": 5.296943435319793e-06, "loss": 0.9355, "step": 25170 }, { "epoch": 1.5217259926270623, "grad_norm": 0.5405497931577712, "learning_rate": 5.293618326694997e-06, "loss": 0.9074, "step": 25180 }, { "epoch": 1.5223303317821961, "grad_norm": 0.5264046387987237, "learning_rate": 5.290293087761884e-06, "loss": 0.9423, "step": 25190 }, { "epoch": 1.52293467093733, "grad_norm": 0.5529984620973368, "learning_rate": 5.286967719996202e-06, "loss": 0.9102, "step": 25200 }, { "epoch": 1.5235390100924637, "grad_norm": 0.5520220959776307, "learning_rate": 5.283642224873755e-06, "loss": 0.9308, "step": 25210 }, { "epoch": 1.5241433492475978, "grad_norm": 0.5270088411338348, "learning_rate": 5.280316603870402e-06, "loss": 0.9345, "step": 25220 }, { "epoch": 1.5247476884027316, "grad_norm": 0.5604217066139217, "learning_rate": 5.2769908584620625e-06, "loss": 0.9636, "step": 25230 }, { "epoch": 1.5253520275578656, "grad_norm": 0.5531553773663259, "learning_rate": 5.273664990124703e-06, "loss": 0.9239, "step": 25240 }, { "epoch": 1.5259563667129994, "grad_norm": 0.5355421801389098, "learning_rate": 5.270339000334354e-06, "loss": 0.9179, "step": 25250 }, { "epoch": 1.5265607058681332, "grad_norm": 0.615318033585799, "learning_rate": 5.267012890567093e-06, "loss": 0.9159, "step": 25260 }, { "epoch": 1.527165045023267, "grad_norm": 0.5474946878706693, "learning_rate": 5.2636866622990545e-06, "loss": 0.9374, "step": 25270 }, { "epoch": 1.5277693841784008, "grad_norm": 0.53694373818359, "learning_rate": 5.260360317006424e-06, "loss": 0.9332, "step": 25280 }, { "epoch": 1.5283737233335348, "grad_norm": 0.5750388536969242, "learning_rate": 5.25703385616544e-06, "loss": 0.9409, "step": 25290 }, { "epoch": 1.5289780624886686, "grad_norm": 0.6342670899913346, "learning_rate": 5.253707281252389e-06, "loss": 0.9439, "step": 25300 }, { "epoch": 1.5295824016438027, "grad_norm": 0.610913971867008, "learning_rate": 5.2503805937436136e-06, "loss": 0.9373, "step": 25310 }, { "epoch": 1.5301867407989365, "grad_norm": 0.6435650310477017, "learning_rate": 5.247053795115502e-06, "loss": 0.9065, "step": 25320 }, { "epoch": 1.5307910799540703, "grad_norm": 0.6064943663841538, "learning_rate": 5.243726886844492e-06, "loss": 0.9406, "step": 25330 }, { "epoch": 1.531395419109204, "grad_norm": 0.656846394886137, "learning_rate": 5.240399870407073e-06, "loss": 0.9144, "step": 25340 }, { "epoch": 1.5319997582643379, "grad_norm": 0.7094892004234634, "learning_rate": 5.23707274727978e-06, "loss": 0.9299, "step": 25350 }, { "epoch": 1.5326040974194717, "grad_norm": 0.6663549201986683, "learning_rate": 5.2337455189391975e-06, "loss": 0.9162, "step": 25360 }, { "epoch": 1.5332084365746057, "grad_norm": 0.680862343698712, "learning_rate": 5.230418186861952e-06, "loss": 0.9291, "step": 25370 }, { "epoch": 1.5338127757297395, "grad_norm": 0.6830789485686696, "learning_rate": 5.22709075252472e-06, "loss": 0.9371, "step": 25380 }, { "epoch": 1.5344171148848735, "grad_norm": 0.7158287834715885, "learning_rate": 5.223763217404224e-06, "loss": 0.918, "step": 25390 }, { "epoch": 1.5350214540400073, "grad_norm": 0.6771683047073732, "learning_rate": 5.220435582977229e-06, "loss": 0.9202, "step": 25400 }, { "epoch": 1.5356257931951411, "grad_norm": 0.6626398846368972, "learning_rate": 5.2171078507205445e-06, "loss": 0.9409, "step": 25410 }, { "epoch": 1.536230132350275, "grad_norm": 0.6462441093685363, "learning_rate": 5.2137800221110245e-06, "loss": 0.9356, "step": 25420 }, { "epoch": 1.5368344715054088, "grad_norm": 0.6960277359787035, "learning_rate": 5.210452098625566e-06, "loss": 0.9192, "step": 25430 }, { "epoch": 1.5374388106605426, "grad_norm": 0.6286253596716214, "learning_rate": 5.207124081741104e-06, "loss": 0.9247, "step": 25440 }, { "epoch": 1.5380431498156766, "grad_norm": 0.7684302944711767, "learning_rate": 5.203795972934621e-06, "loss": 0.922, "step": 25450 }, { "epoch": 1.5386474889708104, "grad_norm": 0.7947482004036137, "learning_rate": 5.200467773683137e-06, "loss": 0.94, "step": 25460 }, { "epoch": 1.5392518281259444, "grad_norm": 0.838804320386143, "learning_rate": 5.197139485463709e-06, "loss": 0.9519, "step": 25470 }, { "epoch": 1.5398561672810782, "grad_norm": 0.7658220466284251, "learning_rate": 5.193811109753443e-06, "loss": 0.9228, "step": 25480 }, { "epoch": 1.540460506436212, "grad_norm": 0.7647489548855564, "learning_rate": 5.190482648029474e-06, "loss": 0.9222, "step": 25490 }, { "epoch": 1.5410648455913458, "grad_norm": 0.7563827069031019, "learning_rate": 5.18715410176898e-06, "loss": 0.9153, "step": 25500 }, { "epoch": 1.5416691847464796, "grad_norm": 0.8365780095920619, "learning_rate": 5.183825472449174e-06, "loss": 0.929, "step": 25510 }, { "epoch": 1.5422735239016134, "grad_norm": 0.7991861123225084, "learning_rate": 5.18049676154731e-06, "loss": 0.9076, "step": 25520 }, { "epoch": 1.5428778630567475, "grad_norm": 0.8048621616490019, "learning_rate": 5.177167970540672e-06, "loss": 0.9237, "step": 25530 }, { "epoch": 1.5434822022118813, "grad_norm": 0.7734975598678984, "learning_rate": 5.173839100906587e-06, "loss": 0.9163, "step": 25540 }, { "epoch": 1.5440865413670153, "grad_norm": 1.119163306485506, "learning_rate": 5.170510154122412e-06, "loss": 0.9383, "step": 25550 }, { "epoch": 1.544690880522149, "grad_norm": 1.2355977381533663, "learning_rate": 5.167181131665537e-06, "loss": 0.9101, "step": 25560 }, { "epoch": 1.545295219677283, "grad_norm": 1.1578603675013999, "learning_rate": 5.163852035013391e-06, "loss": 0.9232, "step": 25570 }, { "epoch": 1.5458995588324167, "grad_norm": 1.1605758822996983, "learning_rate": 5.16052286564343e-06, "loss": 0.9378, "step": 25580 }, { "epoch": 1.5465038979875505, "grad_norm": 1.1404758708343652, "learning_rate": 5.157193625033148e-06, "loss": 0.9273, "step": 25590 }, { "epoch": 1.5471082371426845, "grad_norm": 0.9145071291879595, "learning_rate": 5.1538643146600655e-06, "loss": 0.9444, "step": 25600 }, { "epoch": 1.5477125762978183, "grad_norm": 1.2991484525941206, "learning_rate": 5.150534936001736e-06, "loss": 0.9343, "step": 25610 }, { "epoch": 1.5483169154529524, "grad_norm": 0.8944752930190453, "learning_rate": 5.147205490535746e-06, "loss": 0.9198, "step": 25620 }, { "epoch": 1.5489212546080862, "grad_norm": 1.3259442828515904, "learning_rate": 5.1438759797397065e-06, "loss": 0.9613, "step": 25630 }, { "epoch": 1.54952559376322, "grad_norm": 1.236584863804695, "learning_rate": 5.14054640509126e-06, "loss": 0.9494, "step": 25640 }, { "epoch": 1.5501299329183538, "grad_norm": 2.1651514961345177, "learning_rate": 5.13721676806808e-06, "loss": 0.9213, "step": 25650 }, { "epoch": 1.5507342720734876, "grad_norm": 2.1903286706820073, "learning_rate": 5.1338870701478614e-06, "loss": 0.9128, "step": 25660 }, { "epoch": 1.5513386112286214, "grad_norm": 2.2900094008757663, "learning_rate": 5.130557312808333e-06, "loss": 0.9342, "step": 25670 }, { "epoch": 1.5519429503837554, "grad_norm": 2.5834108998599925, "learning_rate": 5.127227497527245e-06, "loss": 0.9273, "step": 25680 }, { "epoch": 1.5525472895388892, "grad_norm": 2.376168287666955, "learning_rate": 5.123897625782377e-06, "loss": 0.9293, "step": 25690 }, { "epoch": 1.5531516286940232, "grad_norm": 0.870189873118243, "learning_rate": 5.12056769905153e-06, "loss": 0.9295, "step": 25700 }, { "epoch": 1.553755967849157, "grad_norm": 1.051007151173739, "learning_rate": 5.117237718812531e-06, "loss": 0.9546, "step": 25710 }, { "epoch": 1.5543603070042908, "grad_norm": 0.8648837502793729, "learning_rate": 5.1139076865432315e-06, "loss": 0.9283, "step": 25720 }, { "epoch": 1.5549646461594246, "grad_norm": 0.8946069289994244, "learning_rate": 5.110577603721507e-06, "loss": 0.9286, "step": 25730 }, { "epoch": 1.5555689853145585, "grad_norm": 1.0195353939218657, "learning_rate": 5.107247471825253e-06, "loss": 0.9302, "step": 25740 }, { "epoch": 1.5561733244696923, "grad_norm": 0.9520057049318175, "learning_rate": 5.103917292332387e-06, "loss": 0.9325, "step": 25750 }, { "epoch": 1.5567776636248263, "grad_norm": 0.9999656492662283, "learning_rate": 5.100587066720848e-06, "loss": 0.9455, "step": 25760 }, { "epoch": 1.55738200277996, "grad_norm": 0.9988157968221073, "learning_rate": 5.097256796468598e-06, "loss": 0.9233, "step": 25770 }, { "epoch": 1.557986341935094, "grad_norm": 0.8971390514863067, "learning_rate": 5.093926483053613e-06, "loss": 0.8944, "step": 25780 }, { "epoch": 1.558590681090228, "grad_norm": 0.9408964147464126, "learning_rate": 5.0905961279538955e-06, "loss": 0.9236, "step": 25790 }, { "epoch": 1.5591950202453617, "grad_norm": 1.0587713843456572, "learning_rate": 5.087265732647462e-06, "loss": 0.9308, "step": 25800 }, { "epoch": 1.5597993594004955, "grad_norm": 1.133974684422884, "learning_rate": 5.083935298612344e-06, "loss": 0.9056, "step": 25810 }, { "epoch": 1.5604036985556293, "grad_norm": 1.0537213682437596, "learning_rate": 5.0806048273266e-06, "loss": 0.9274, "step": 25820 }, { "epoch": 1.5610080377107631, "grad_norm": 1.0244925212422469, "learning_rate": 5.077274320268295e-06, "loss": 0.9218, "step": 25830 }, { "epoch": 1.5616123768658972, "grad_norm": 1.0331662872298382, "learning_rate": 5.073943778915513e-06, "loss": 0.9412, "step": 25840 }, { "epoch": 1.562216716021031, "grad_norm": 1.073812897284331, "learning_rate": 5.0706132047463566e-06, "loss": 0.934, "step": 25850 }, { "epoch": 1.562821055176165, "grad_norm": 0.9866836211663912, "learning_rate": 5.0672825992389385e-06, "loss": 0.9251, "step": 25860 }, { "epoch": 1.5634253943312988, "grad_norm": 1.0231239993437653, "learning_rate": 5.063951963871388e-06, "loss": 0.9305, "step": 25870 }, { "epoch": 1.5640297334864326, "grad_norm": 1.0385677366708708, "learning_rate": 5.0606213001218474e-06, "loss": 0.9206, "step": 25880 }, { "epoch": 1.5646340726415664, "grad_norm": 1.036345186275833, "learning_rate": 5.0572906094684685e-06, "loss": 0.9253, "step": 25890 }, { "epoch": 1.5652384117967002, "grad_norm": 1.142363563968631, "learning_rate": 5.0539598933894195e-06, "loss": 0.918, "step": 25900 }, { "epoch": 1.565842750951834, "grad_norm": 1.1499087774908776, "learning_rate": 5.050629153362877e-06, "loss": 0.9283, "step": 25910 }, { "epoch": 1.566447090106968, "grad_norm": 1.2066881531981957, "learning_rate": 5.04729839086703e-06, "loss": 0.9665, "step": 25920 }, { "epoch": 1.567051429262102, "grad_norm": 1.0886914769098222, "learning_rate": 5.043967607380076e-06, "loss": 0.9144, "step": 25930 }, { "epoch": 1.5676557684172359, "grad_norm": 1.085450693017567, "learning_rate": 5.0406368043802235e-06, "loss": 0.9093, "step": 25940 }, { "epoch": 1.5682601075723697, "grad_norm": 0.9891846310233481, "learning_rate": 5.037305983345686e-06, "loss": 0.9367, "step": 25950 }, { "epoch": 1.5688644467275035, "grad_norm": 0.9963790541513797, "learning_rate": 5.033975145754691e-06, "loss": 0.93, "step": 25960 }, { "epoch": 1.5694687858826373, "grad_norm": 0.9865967084762404, "learning_rate": 5.030644293085467e-06, "loss": 0.9306, "step": 25970 }, { "epoch": 1.570073125037771, "grad_norm": 0.9973800819386068, "learning_rate": 5.027313426816253e-06, "loss": 0.9236, "step": 25980 }, { "epoch": 1.570677464192905, "grad_norm": 1.025192235084345, "learning_rate": 5.023982548425294e-06, "loss": 0.9217, "step": 25990 }, { "epoch": 1.571281803348039, "grad_norm": 1.003606142469775, "learning_rate": 5.02065165939084e-06, "loss": 0.9196, "step": 26000 }, { "epoch": 1.571886142503173, "grad_norm": 1.042744971044762, "learning_rate": 5.017320761191142e-06, "loss": 0.9181, "step": 26010 }, { "epoch": 1.5724904816583067, "grad_norm": 1.0393084883227832, "learning_rate": 5.013989855304463e-06, "loss": 0.9285, "step": 26020 }, { "epoch": 1.5730948208134405, "grad_norm": 1.0740141005569745, "learning_rate": 5.010658943209061e-06, "loss": 0.9157, "step": 26030 }, { "epoch": 1.5736991599685743, "grad_norm": 1.0353080282987033, "learning_rate": 5.007328026383201e-06, "loss": 0.9453, "step": 26040 }, { "epoch": 1.5743034991237081, "grad_norm": 0.9779490300492935, "learning_rate": 5.0039971063051506e-06, "loss": 0.9464, "step": 26050 }, { "epoch": 1.574907838278842, "grad_norm": 1.0734494690786855, "learning_rate": 5.000666184453178e-06, "loss": 0.9097, "step": 26060 }, { "epoch": 1.575512177433976, "grad_norm": 0.9897734471954108, "learning_rate": 4.997335262305552e-06, "loss": 0.9171, "step": 26070 }, { "epoch": 1.5761165165891098, "grad_norm": 0.9603379912784669, "learning_rate": 4.99400434134054e-06, "loss": 0.9248, "step": 26080 }, { "epoch": 1.5767208557442438, "grad_norm": 0.9228970233585315, "learning_rate": 4.990673423036413e-06, "loss": 0.9202, "step": 26090 }, { "epoch": 1.5773251948993776, "grad_norm": 1.1794627083838227, "learning_rate": 4.987342508871435e-06, "loss": 0.916, "step": 26100 }, { "epoch": 1.5779295340545114, "grad_norm": 1.1121431300687952, "learning_rate": 4.9840116003238755e-06, "loss": 0.9596, "step": 26110 }, { "epoch": 1.5785338732096452, "grad_norm": 1.1510909770573559, "learning_rate": 4.980680698871994e-06, "loss": 0.9446, "step": 26120 }, { "epoch": 1.579138212364779, "grad_norm": 1.206898411951541, "learning_rate": 4.977349805994052e-06, "loss": 0.9255, "step": 26130 }, { "epoch": 1.5797425515199128, "grad_norm": 1.1342969650408612, "learning_rate": 4.974018923168304e-06, "loss": 0.9299, "step": 26140 }, { "epoch": 1.5803468906750469, "grad_norm": 1.170289375363151, "learning_rate": 4.9706880518730035e-06, "loss": 0.9391, "step": 26150 }, { "epoch": 1.5809512298301807, "grad_norm": 1.0496278904169514, "learning_rate": 4.967357193586394e-06, "loss": 0.9238, "step": 26160 }, { "epoch": 1.5815555689853147, "grad_norm": 1.126234443121289, "learning_rate": 4.964026349786718e-06, "loss": 0.899, "step": 26170 }, { "epoch": 1.5821599081404485, "grad_norm": 1.1347189258346306, "learning_rate": 4.960695521952212e-06, "loss": 0.9275, "step": 26180 }, { "epoch": 1.5827642472955823, "grad_norm": 1.1009282133778555, "learning_rate": 4.957364711561098e-06, "loss": 0.9168, "step": 26190 }, { "epoch": 1.583368586450716, "grad_norm": 1.1225844815673176, "learning_rate": 4.954033920091599e-06, "loss": 0.939, "step": 26200 }, { "epoch": 1.58397292560585, "grad_norm": 1.185183737250558, "learning_rate": 4.950703149021926e-06, "loss": 0.9269, "step": 26210 }, { "epoch": 1.5845772647609837, "grad_norm": 1.170194770014627, "learning_rate": 4.947372399830278e-06, "loss": 0.9388, "step": 26220 }, { "epoch": 1.5851816039161177, "grad_norm": 1.1168819248849862, "learning_rate": 4.944041673994849e-06, "loss": 0.9574, "step": 26230 }, { "epoch": 1.5857859430712515, "grad_norm": 1.1103202715717237, "learning_rate": 4.940710972993823e-06, "loss": 0.9166, "step": 26240 }, { "epoch": 1.5863902822263856, "grad_norm": 1.3970417725624233, "learning_rate": 4.937380298305367e-06, "loss": 0.9142, "step": 26250 }, { "epoch": 1.5869946213815194, "grad_norm": 1.325250298028764, "learning_rate": 4.9340496514076426e-06, "loss": 0.9168, "step": 26260 }, { "epoch": 1.5875989605366532, "grad_norm": 1.4355224563834648, "learning_rate": 4.930719033778796e-06, "loss": 0.9387, "step": 26270 }, { "epoch": 1.588203299691787, "grad_norm": 1.341306523944089, "learning_rate": 4.92738844689696e-06, "loss": 0.9662, "step": 26280 }, { "epoch": 1.5888076388469208, "grad_norm": 1.4293212041784922, "learning_rate": 4.924057892240255e-06, "loss": 0.8993, "step": 26290 }, { "epoch": 1.5894119780020548, "grad_norm": 0.8246833103032736, "learning_rate": 4.920727371286787e-06, "loss": 0.942, "step": 26300 }, { "epoch": 1.5900163171571886, "grad_norm": 0.8227680165610858, "learning_rate": 4.917396885514651e-06, "loss": 0.9107, "step": 26310 }, { "epoch": 1.5906206563123226, "grad_norm": 0.8555478160224953, "learning_rate": 4.914066436401915e-06, "loss": 0.9282, "step": 26320 }, { "epoch": 1.5912249954674564, "grad_norm": 0.8525732833201712, "learning_rate": 4.910736025426642e-06, "loss": 0.931, "step": 26330 }, { "epoch": 1.5918293346225902, "grad_norm": 0.8520978853241207, "learning_rate": 4.907405654066876e-06, "loss": 0.939, "step": 26340 }, { "epoch": 1.592433673777724, "grad_norm": 0.7981743725438954, "learning_rate": 4.904075323800637e-06, "loss": 0.9308, "step": 26350 }, { "epoch": 1.5930380129328578, "grad_norm": 0.7702941241513368, "learning_rate": 4.900745036105933e-06, "loss": 0.9482, "step": 26360 }, { "epoch": 1.5936423520879917, "grad_norm": 0.7776441582442999, "learning_rate": 4.897414792460755e-06, "loss": 0.9312, "step": 26370 }, { "epoch": 1.5942466912431257, "grad_norm": 0.7778515280276301, "learning_rate": 4.894084594343065e-06, "loss": 0.925, "step": 26380 }, { "epoch": 1.5948510303982595, "grad_norm": 0.7723257748622294, "learning_rate": 4.890754443230815e-06, "loss": 0.9236, "step": 26390 }, { "epoch": 1.5954553695533935, "grad_norm": 0.7658686467998368, "learning_rate": 4.887424340601929e-06, "loss": 0.9251, "step": 26400 }, { "epoch": 1.5960597087085273, "grad_norm": 0.7750451266165372, "learning_rate": 4.884094287934314e-06, "loss": 0.9126, "step": 26410 }, { "epoch": 1.5966640478636611, "grad_norm": 0.7497944965314435, "learning_rate": 4.880764286705851e-06, "loss": 0.9299, "step": 26420 }, { "epoch": 1.597268387018795, "grad_norm": 0.8150259219210402, "learning_rate": 4.877434338394405e-06, "loss": 0.9061, "step": 26430 }, { "epoch": 1.5978727261739287, "grad_norm": 0.8855265785179022, "learning_rate": 4.874104444477806e-06, "loss": 0.9407, "step": 26440 }, { "epoch": 1.5984770653290625, "grad_norm": 0.5895912372705706, "learning_rate": 4.870774606433871e-06, "loss": 0.9172, "step": 26450 }, { "epoch": 1.5990814044841966, "grad_norm": 0.5856860585357047, "learning_rate": 4.867444825740389e-06, "loss": 0.9307, "step": 26460 }, { "epoch": 1.5996857436393304, "grad_norm": 0.5948474788306042, "learning_rate": 4.864115103875117e-06, "loss": 0.9093, "step": 26470 }, { "epoch": 1.6002900827944644, "grad_norm": 0.5702327706232243, "learning_rate": 4.8607854423157955e-06, "loss": 0.9269, "step": 26480 }, { "epoch": 1.6008944219495982, "grad_norm": 0.5417826012953707, "learning_rate": 4.8574558425401345e-06, "loss": 0.9341, "step": 26490 }, { "epoch": 1.601498761104732, "grad_norm": 0.646404666609463, "learning_rate": 4.854126306025813e-06, "loss": 0.8996, "step": 26500 }, { "epoch": 1.6021031002598658, "grad_norm": 0.6457680874161628, "learning_rate": 4.8507968342504875e-06, "loss": 0.9525, "step": 26510 }, { "epoch": 1.6027074394149996, "grad_norm": 0.7100155684707448, "learning_rate": 4.847467428691783e-06, "loss": 0.9508, "step": 26520 }, { "epoch": 1.6033117785701334, "grad_norm": 0.6657314539082194, "learning_rate": 4.8441380908272935e-06, "loss": 0.9223, "step": 26530 }, { "epoch": 1.6039161177252674, "grad_norm": 0.6506751308645118, "learning_rate": 4.840808822134585e-06, "loss": 0.9315, "step": 26540 }, { "epoch": 1.6045204568804012, "grad_norm": 0.8521558152037761, "learning_rate": 4.837479624091197e-06, "loss": 0.9347, "step": 26550 }, { "epoch": 1.6051247960355353, "grad_norm": 0.8520698279818828, "learning_rate": 4.834150498174628e-06, "loss": 0.907, "step": 26560 }, { "epoch": 1.605729135190669, "grad_norm": 0.8331079106737275, "learning_rate": 4.83082144586235e-06, "loss": 0.9355, "step": 26570 }, { "epoch": 1.6063334743458029, "grad_norm": 0.8750659000337639, "learning_rate": 4.827492468631808e-06, "loss": 0.9497, "step": 26580 }, { "epoch": 1.6069378135009367, "grad_norm": 0.9443038340742554, "learning_rate": 4.824163567960401e-06, "loss": 0.9464, "step": 26590 }, { "epoch": 1.6075421526560705, "grad_norm": 0.5442946433615261, "learning_rate": 4.8208347453255035e-06, "loss": 0.8995, "step": 26600 }, { "epoch": 1.6081464918112043, "grad_norm": 0.4956088241809183, "learning_rate": 4.817506002204455e-06, "loss": 0.9353, "step": 26610 }, { "epoch": 1.6087508309663383, "grad_norm": 0.529590223032268, "learning_rate": 4.814177340074552e-06, "loss": 0.9674, "step": 26620 }, { "epoch": 1.6093551701214723, "grad_norm": 0.5440362373993227, "learning_rate": 4.8108487604130645e-06, "loss": 0.9511, "step": 26630 }, { "epoch": 1.6099595092766061, "grad_norm": 0.5373674639904178, "learning_rate": 4.8075202646972216e-06, "loss": 0.908, "step": 26640 }, { "epoch": 1.61056384843174, "grad_norm": 0.5233883726919283, "learning_rate": 4.804191854404215e-06, "loss": 0.9278, "step": 26650 }, { "epoch": 1.6111681875868737, "grad_norm": 0.509363665624484, "learning_rate": 4.8008635310111995e-06, "loss": 0.9426, "step": 26660 }, { "epoch": 1.6117725267420075, "grad_norm": 0.5234340054230651, "learning_rate": 4.7975352959952875e-06, "loss": 0.9186, "step": 26670 }, { "epoch": 1.6123768658971414, "grad_norm": 0.5517044750143772, "learning_rate": 4.7942071508335605e-06, "loss": 0.9571, "step": 26680 }, { "epoch": 1.6129812050522754, "grad_norm": 0.5182221386890951, "learning_rate": 4.79087909700305e-06, "loss": 0.9594, "step": 26690 }, { "epoch": 1.6135855442074092, "grad_norm": 0.5921367162961881, "learning_rate": 4.787551135980753e-06, "loss": 0.9279, "step": 26700 }, { "epoch": 1.6141898833625432, "grad_norm": 0.5603120231246512, "learning_rate": 4.78422326924363e-06, "loss": 0.9334, "step": 26710 }, { "epoch": 1.614794222517677, "grad_norm": 0.5437730051893302, "learning_rate": 4.7808954982685855e-06, "loss": 0.907, "step": 26720 }, { "epoch": 1.6153985616728108, "grad_norm": 0.5226481124834631, "learning_rate": 4.777567824532495e-06, "loss": 0.9209, "step": 26730 }, { "epoch": 1.6160029008279446, "grad_norm": 0.5759004935924458, "learning_rate": 4.774240249512186e-06, "loss": 0.912, "step": 26740 }, { "epoch": 1.6166072399830784, "grad_norm": 0.5317313425080706, "learning_rate": 4.770912774684439e-06, "loss": 0.9251, "step": 26750 }, { "epoch": 1.6172115791382122, "grad_norm": 0.5219969850059324, "learning_rate": 4.767585401525996e-06, "loss": 0.9164, "step": 26760 }, { "epoch": 1.6178159182933463, "grad_norm": 0.5398824708514468, "learning_rate": 4.76425813151355e-06, "loss": 0.9268, "step": 26770 }, { "epoch": 1.61842025744848, "grad_norm": 0.5426289328942654, "learning_rate": 4.7609309661237485e-06, "loss": 0.925, "step": 26780 }, { "epoch": 1.619024596603614, "grad_norm": 0.5412258014841264, "learning_rate": 4.757603906833192e-06, "loss": 0.907, "step": 26790 }, { "epoch": 1.619628935758748, "grad_norm": 0.5371693293357467, "learning_rate": 4.754276955118439e-06, "loss": 0.9224, "step": 26800 }, { "epoch": 1.6202332749138817, "grad_norm": 0.5374754509218702, "learning_rate": 4.750950112455992e-06, "loss": 0.9242, "step": 26810 }, { "epoch": 1.6208376140690155, "grad_norm": 0.5361171486338071, "learning_rate": 4.747623380322312e-06, "loss": 0.9308, "step": 26820 }, { "epoch": 1.6214419532241493, "grad_norm": 0.5279326225702382, "learning_rate": 4.744296760193809e-06, "loss": 0.9221, "step": 26830 }, { "epoch": 1.622046292379283, "grad_norm": 0.5294536035839044, "learning_rate": 4.740970253546839e-06, "loss": 0.9427, "step": 26840 }, { "epoch": 1.6226506315344171, "grad_norm": 0.5444931966987818, "learning_rate": 4.737643861857715e-06, "loss": 0.9213, "step": 26850 }, { "epoch": 1.623254970689551, "grad_norm": 0.5492706543591206, "learning_rate": 4.734317586602696e-06, "loss": 0.9322, "step": 26860 }, { "epoch": 1.623859309844685, "grad_norm": 0.5420210930149884, "learning_rate": 4.7309914292579845e-06, "loss": 0.9302, "step": 26870 }, { "epoch": 1.6244636489998188, "grad_norm": 0.5202852539861623, "learning_rate": 4.727665391299737e-06, "loss": 0.935, "step": 26880 }, { "epoch": 1.6250679881549526, "grad_norm": 0.5813698065906621, "learning_rate": 4.724339474204058e-06, "loss": 0.9117, "step": 26890 }, { "epoch": 1.6256723273100864, "grad_norm": 0.6582789140299272, "learning_rate": 4.72101367944699e-06, "loss": 0.9151, "step": 26900 }, { "epoch": 1.6262766664652202, "grad_norm": 0.5730638098774922, "learning_rate": 4.717688008504529e-06, "loss": 0.9384, "step": 26910 }, { "epoch": 1.626881005620354, "grad_norm": 0.7523414520843882, "learning_rate": 4.714362462852614e-06, "loss": 0.9276, "step": 26920 }, { "epoch": 1.627485344775488, "grad_norm": 0.6482797417045263, "learning_rate": 4.711037043967126e-06, "loss": 0.9324, "step": 26930 }, { "epoch": 1.6280896839306218, "grad_norm": 0.6901000510769487, "learning_rate": 4.707711753323892e-06, "loss": 0.9413, "step": 26940 }, { "epoch": 1.6286940230857558, "grad_norm": 0.686288940202985, "learning_rate": 4.704386592398684e-06, "loss": 0.9447, "step": 26950 }, { "epoch": 1.6292983622408896, "grad_norm": 0.7053826906832571, "learning_rate": 4.701061562667211e-06, "loss": 0.9164, "step": 26960 }, { "epoch": 1.6299027013960234, "grad_norm": 0.633892867019406, "learning_rate": 4.697736665605127e-06, "loss": 0.9141, "step": 26970 }, { "epoch": 1.6305070405511572, "grad_norm": 0.7279680293555136, "learning_rate": 4.69441190268803e-06, "loss": 0.9329, "step": 26980 }, { "epoch": 1.631111379706291, "grad_norm": 0.6738772796273035, "learning_rate": 4.691087275391454e-06, "loss": 0.911, "step": 26990 }, { "epoch": 1.631715718861425, "grad_norm": 0.660674340450183, "learning_rate": 4.687762785190873e-06, "loss": 0.9352, "step": 27000 }, { "epoch": 1.6323200580165589, "grad_norm": 0.6186126346725852, "learning_rate": 4.6844384335617035e-06, "loss": 0.9354, "step": 27010 }, { "epoch": 1.632924397171693, "grad_norm": 0.6637131287284315, "learning_rate": 4.681114221979297e-06, "loss": 0.9383, "step": 27020 }, { "epoch": 1.6335287363268267, "grad_norm": 0.6709107255116934, "learning_rate": 4.677790151918945e-06, "loss": 0.9233, "step": 27030 }, { "epoch": 1.6341330754819605, "grad_norm": 0.6410581472298236, "learning_rate": 4.674466224855874e-06, "loss": 0.9213, "step": 27040 }, { "epoch": 1.6347374146370943, "grad_norm": 0.8034656058111185, "learning_rate": 4.671142442265252e-06, "loss": 0.9119, "step": 27050 }, { "epoch": 1.6353417537922281, "grad_norm": 0.7828599156256191, "learning_rate": 4.6678188056221744e-06, "loss": 0.9412, "step": 27060 }, { "epoch": 1.635946092947362, "grad_norm": 0.7989635418415998, "learning_rate": 4.6644953164016795e-06, "loss": 0.928, "step": 27070 }, { "epoch": 1.636550432102496, "grad_norm": 0.7915148899045632, "learning_rate": 4.66117197607874e-06, "loss": 0.9443, "step": 27080 }, { "epoch": 1.6371547712576298, "grad_norm": 0.8030523164757561, "learning_rate": 4.6578487861282555e-06, "loss": 0.9197, "step": 27090 }, { "epoch": 1.6377591104127638, "grad_norm": 0.8126809376864675, "learning_rate": 4.654525748025065e-06, "loss": 0.932, "step": 27100 }, { "epoch": 1.6383634495678976, "grad_norm": 0.7545653873681402, "learning_rate": 4.651202863243942e-06, "loss": 0.9009, "step": 27110 }, { "epoch": 1.6389677887230314, "grad_norm": 0.7669329066945321, "learning_rate": 4.6478801332595815e-06, "loss": 0.9149, "step": 27120 }, { "epoch": 1.6395721278781652, "grad_norm": 0.8014471866527094, "learning_rate": 4.644557559546622e-06, "loss": 0.9025, "step": 27130 }, { "epoch": 1.640176467033299, "grad_norm": 0.7725432760728462, "learning_rate": 4.641235143579628e-06, "loss": 0.9138, "step": 27140 }, { "epoch": 1.6407808061884328, "grad_norm": 1.2191053411337243, "learning_rate": 4.6379128868330896e-06, "loss": 0.9251, "step": 27150 }, { "epoch": 1.6413851453435668, "grad_norm": 1.1848172413315827, "learning_rate": 4.634590790781432e-06, "loss": 0.9322, "step": 27160 }, { "epoch": 1.6419894844987006, "grad_norm": 1.2722634508905812, "learning_rate": 4.6312688568990074e-06, "loss": 0.9096, "step": 27170 }, { "epoch": 1.6425938236538347, "grad_norm": 1.2060298131419236, "learning_rate": 4.6279470866600954e-06, "loss": 0.928, "step": 27180 }, { "epoch": 1.6431981628089685, "grad_norm": 1.2175083913934246, "learning_rate": 4.624625481538903e-06, "loss": 0.9265, "step": 27190 }, { "epoch": 1.6438025019641023, "grad_norm": 0.8684648078225902, "learning_rate": 4.621304043009566e-06, "loss": 0.929, "step": 27200 }, { "epoch": 1.644406841119236, "grad_norm": 0.9919958929756366, "learning_rate": 4.61798277254614e-06, "loss": 0.9368, "step": 27210 }, { "epoch": 1.6450111802743699, "grad_norm": 1.2010800461854234, "learning_rate": 4.6146616716226146e-06, "loss": 0.926, "step": 27220 }, { "epoch": 1.6456155194295037, "grad_norm": 0.7788699100934598, "learning_rate": 4.611340741712901e-06, "loss": 0.9472, "step": 27230 }, { "epoch": 1.6462198585846377, "grad_norm": 1.05596151064457, "learning_rate": 4.608019984290828e-06, "loss": 0.9349, "step": 27240 }, { "epoch": 1.6468241977397715, "grad_norm": 2.497988355890021, "learning_rate": 4.604699400830159e-06, "loss": 0.9559, "step": 27250 }, { "epoch": 1.6474285368949055, "grad_norm": 2.374901210182557, "learning_rate": 4.6013789928045734e-06, "loss": 0.9379, "step": 27260 }, { "epoch": 1.6480328760500393, "grad_norm": 2.3239279043846492, "learning_rate": 4.598058761687672e-06, "loss": 0.9271, "step": 27270 }, { "epoch": 1.6486372152051731, "grad_norm": 2.289244062461535, "learning_rate": 4.5947387089529795e-06, "loss": 0.9186, "step": 27280 }, { "epoch": 1.649241554360307, "grad_norm": 2.227535949967949, "learning_rate": 4.591418836073944e-06, "loss": 0.9411, "step": 27290 }, { "epoch": 1.6498458935154408, "grad_norm": 0.9153778250320579, "learning_rate": 4.588099144523927e-06, "loss": 0.9096, "step": 27300 }, { "epoch": 1.6504502326705748, "grad_norm": 0.9442181791376509, "learning_rate": 4.5847796357762155e-06, "loss": 0.9383, "step": 27310 }, { "epoch": 1.6510545718257086, "grad_norm": 0.8803522298189124, "learning_rate": 4.581460311304011e-06, "loss": 0.943, "step": 27320 }, { "epoch": 1.6516589109808426, "grad_norm": 0.8596887539071623, "learning_rate": 4.57814117258044e-06, "loss": 0.9484, "step": 27330 }, { "epoch": 1.6522632501359764, "grad_norm": 0.8858725049618714, "learning_rate": 4.574822221078535e-06, "loss": 0.9309, "step": 27340 }, { "epoch": 1.6528675892911102, "grad_norm": 1.0154273228509834, "learning_rate": 4.571503458271257e-06, "loss": 0.9156, "step": 27350 }, { "epoch": 1.653471928446244, "grad_norm": 0.9534906211499345, "learning_rate": 4.56818488563148e-06, "loss": 0.9227, "step": 27360 }, { "epoch": 1.6540762676013778, "grad_norm": 0.971604518397412, "learning_rate": 4.564866504631987e-06, "loss": 0.9372, "step": 27370 }, { "epoch": 1.6546806067565116, "grad_norm": 1.010528136988153, "learning_rate": 4.561548316745485e-06, "loss": 0.9431, "step": 27380 }, { "epoch": 1.6552849459116457, "grad_norm": 0.9629646922497487, "learning_rate": 4.558230323444592e-06, "loss": 0.9488, "step": 27390 }, { "epoch": 1.6558892850667795, "grad_norm": 0.984580801392413, "learning_rate": 4.554912526201837e-06, "loss": 0.9255, "step": 27400 }, { "epoch": 1.6564936242219135, "grad_norm": 1.063952951109643, "learning_rate": 4.551594926489665e-06, "loss": 0.9399, "step": 27410 }, { "epoch": 1.6570979633770473, "grad_norm": 1.0083497633968181, "learning_rate": 4.548277525780431e-06, "loss": 0.9228, "step": 27420 }, { "epoch": 1.657702302532181, "grad_norm": 1.0292318641142133, "learning_rate": 4.544960325546406e-06, "loss": 0.9318, "step": 27430 }, { "epoch": 1.658306641687315, "grad_norm": 1.0557518101294667, "learning_rate": 4.541643327259766e-06, "loss": 0.9208, "step": 27440 }, { "epoch": 1.6589109808424487, "grad_norm": 1.1012237967007068, "learning_rate": 4.538326532392604e-06, "loss": 0.9097, "step": 27450 }, { "epoch": 1.6595153199975825, "grad_norm": 1.0371946133413965, "learning_rate": 4.535009942416916e-06, "loss": 0.9256, "step": 27460 }, { "epoch": 1.6601196591527165, "grad_norm": 1.0545601402786666, "learning_rate": 4.531693558804611e-06, "loss": 0.925, "step": 27470 }, { "epoch": 1.6607239983078503, "grad_norm": 1.0306633799473977, "learning_rate": 4.528377383027508e-06, "loss": 0.9466, "step": 27480 }, { "epoch": 1.6613283374629844, "grad_norm": 1.101002036492239, "learning_rate": 4.525061416557327e-06, "loss": 0.9315, "step": 27490 }, { "epoch": 1.6619326766181182, "grad_norm": 1.1638059208900207, "learning_rate": 4.521745660865704e-06, "loss": 0.9476, "step": 27500 }, { "epoch": 1.662537015773252, "grad_norm": 1.1046363665358636, "learning_rate": 4.518430117424176e-06, "loss": 0.9358, "step": 27510 }, { "epoch": 1.6631413549283858, "grad_norm": 1.0586402476588566, "learning_rate": 4.515114787704186e-06, "loss": 0.9076, "step": 27520 }, { "epoch": 1.6637456940835196, "grad_norm": 1.0417259591512342, "learning_rate": 4.511799673177081e-06, "loss": 0.9257, "step": 27530 }, { "epoch": 1.6643500332386534, "grad_norm": 1.0628045004289932, "learning_rate": 4.508484775314121e-06, "loss": 0.9152, "step": 27540 }, { "epoch": 1.6649543723937874, "grad_norm": 0.9754013878320819, "learning_rate": 4.505170095586456e-06, "loss": 0.9181, "step": 27550 }, { "epoch": 1.6655587115489212, "grad_norm": 1.0003173477848608, "learning_rate": 4.501855635465151e-06, "loss": 0.9003, "step": 27560 }, { "epoch": 1.6661630507040552, "grad_norm": 0.9434455139856954, "learning_rate": 4.498541396421167e-06, "loss": 0.903, "step": 27570 }, { "epoch": 1.666767389859189, "grad_norm": 0.9498678319953696, "learning_rate": 4.49522737992537e-06, "loss": 0.9278, "step": 27580 }, { "epoch": 1.6673717290143228, "grad_norm": 0.9903893223510205, "learning_rate": 4.4919135874485255e-06, "loss": 0.9358, "step": 27590 }, { "epoch": 1.6679760681694566, "grad_norm": 1.0547366304881938, "learning_rate": 4.488600020461302e-06, "loss": 0.9257, "step": 27600 }, { "epoch": 1.6685804073245905, "grad_norm": 1.0855267447896846, "learning_rate": 4.485286680434263e-06, "loss": 0.9188, "step": 27610 }, { "epoch": 1.6691847464797243, "grad_norm": 1.14583175579124, "learning_rate": 4.481973568837877e-06, "loss": 0.9056, "step": 27620 }, { "epoch": 1.6697890856348583, "grad_norm": 1.0383279579670204, "learning_rate": 4.478660687142509e-06, "loss": 0.938, "step": 27630 }, { "epoch": 1.6703934247899923, "grad_norm": 0.9973117444619705, "learning_rate": 4.47534803681842e-06, "loss": 0.9387, "step": 27640 }, { "epoch": 1.6709977639451261, "grad_norm": 1.0781597736983577, "learning_rate": 4.472035619335768e-06, "loss": 0.9365, "step": 27650 }, { "epoch": 1.67160210310026, "grad_norm": 1.0086605048833426, "learning_rate": 4.468723436164615e-06, "loss": 0.9419, "step": 27660 }, { "epoch": 1.6722064422553937, "grad_norm": 0.9904528479548492, "learning_rate": 4.46541148877491e-06, "loss": 0.929, "step": 27670 }, { "epoch": 1.6728107814105275, "grad_norm": 1.0233910939852584, "learning_rate": 4.462099778636501e-06, "loss": 0.9385, "step": 27680 }, { "epoch": 1.6734151205656613, "grad_norm": 0.9639341387716622, "learning_rate": 4.45878830721913e-06, "loss": 0.9355, "step": 27690 }, { "epoch": 1.6740194597207954, "grad_norm": 1.1717558786884754, "learning_rate": 4.455477075992437e-06, "loss": 0.9106, "step": 27700 }, { "epoch": 1.6746237988759292, "grad_norm": 1.1384265237157696, "learning_rate": 4.452166086425948e-06, "loss": 0.9241, "step": 27710 }, { "epoch": 1.6752281380310632, "grad_norm": 1.1633279402551957, "learning_rate": 4.448855339989088e-06, "loss": 0.9319, "step": 27720 }, { "epoch": 1.675832477186197, "grad_norm": 1.1612404304794983, "learning_rate": 4.445544838151173e-06, "loss": 0.8954, "step": 27730 }, { "epoch": 1.6764368163413308, "grad_norm": 1.1340392899417444, "learning_rate": 4.442234582381405e-06, "loss": 0.9116, "step": 27740 }, { "epoch": 1.6770411554964646, "grad_norm": 1.0933124543260229, "learning_rate": 4.438924574148886e-06, "loss": 0.9376, "step": 27750 }, { "epoch": 1.6776454946515984, "grad_norm": 1.0506221165887413, "learning_rate": 4.435614814922603e-06, "loss": 0.9241, "step": 27760 }, { "epoch": 1.6782498338067322, "grad_norm": 1.0731449323418225, "learning_rate": 4.432305306171429e-06, "loss": 0.943, "step": 27770 }, { "epoch": 1.6788541729618662, "grad_norm": 1.069398047672288, "learning_rate": 4.428996049364132e-06, "loss": 0.943, "step": 27780 }, { "epoch": 1.679458512117, "grad_norm": 1.1520911903455413, "learning_rate": 4.4256870459693675e-06, "loss": 0.9369, "step": 27790 }, { "epoch": 1.680062851272134, "grad_norm": 1.0763261852922708, "learning_rate": 4.422378297455673e-06, "loss": 0.8949, "step": 27800 }, { "epoch": 1.6806671904272679, "grad_norm": 1.2195842914892054, "learning_rate": 4.41906980529148e-06, "loss": 0.9251, "step": 27810 }, { "epoch": 1.6812715295824017, "grad_norm": 1.1275208778935877, "learning_rate": 4.415761570945103e-06, "loss": 0.9299, "step": 27820 }, { "epoch": 1.6818758687375355, "grad_norm": 1.2393702085132507, "learning_rate": 4.4124535958847395e-06, "loss": 0.9311, "step": 27830 }, { "epoch": 1.6824802078926693, "grad_norm": 1.1479771102291836, "learning_rate": 4.409145881578476e-06, "loss": 0.9035, "step": 27840 }, { "epoch": 1.683084547047803, "grad_norm": 1.375477897581971, "learning_rate": 4.405838429494283e-06, "loss": 0.9689, "step": 27850 }, { "epoch": 1.683688886202937, "grad_norm": 1.2962495106155394, "learning_rate": 4.402531241100011e-06, "loss": 0.9061, "step": 27860 }, { "epoch": 1.684293225358071, "grad_norm": 1.3505056164295135, "learning_rate": 4.399224317863396e-06, "loss": 0.9073, "step": 27870 }, { "epoch": 1.684897564513205, "grad_norm": 1.4685163455523527, "learning_rate": 4.39591766125206e-06, "loss": 0.951, "step": 27880 }, { "epoch": 1.6855019036683387, "grad_norm": 1.3792194479756674, "learning_rate": 4.392611272733498e-06, "loss": 0.9541, "step": 27890 }, { "epoch": 1.6861062428234725, "grad_norm": 0.8647969899655263, "learning_rate": 4.389305153775092e-06, "loss": 0.9628, "step": 27900 }, { "epoch": 1.6867105819786063, "grad_norm": 0.8363252518643338, "learning_rate": 4.385999305844106e-06, "loss": 0.934, "step": 27910 }, { "epoch": 1.6873149211337402, "grad_norm": 0.8748052027046616, "learning_rate": 4.382693730407678e-06, "loss": 0.9233, "step": 27920 }, { "epoch": 1.687919260288874, "grad_norm": 0.8952598102240186, "learning_rate": 4.3793884289328305e-06, "loss": 0.9224, "step": 27930 }, { "epoch": 1.688523599444008, "grad_norm": 0.8234164956760309, "learning_rate": 4.37608340288646e-06, "loss": 0.9077, "step": 27940 }, { "epoch": 1.6891279385991418, "grad_norm": 0.7696182428019646, "learning_rate": 4.372778653735344e-06, "loss": 0.912, "step": 27950 }, { "epoch": 1.6897322777542758, "grad_norm": 0.7701060705873991, "learning_rate": 4.369474182946133e-06, "loss": 0.9189, "step": 27960 }, { "epoch": 1.6903366169094096, "grad_norm": 0.7608107666338965, "learning_rate": 4.366169991985363e-06, "loss": 0.915, "step": 27970 }, { "epoch": 1.6909409560645434, "grad_norm": 0.7742137049554035, "learning_rate": 4.362866082319432e-06, "loss": 0.9466, "step": 27980 }, { "epoch": 1.6915452952196772, "grad_norm": 0.7679991902779509, "learning_rate": 4.359562455414624e-06, "loss": 0.9095, "step": 27990 }, { "epoch": 1.692149634374811, "grad_norm": 0.8544752041076246, "learning_rate": 4.356259112737096e-06, "loss": 0.9157, "step": 28000 }, { "epoch": 1.692753973529945, "grad_norm": 0.7837174651831658, "learning_rate": 4.352956055752877e-06, "loss": 0.9313, "step": 28010 }, { "epoch": 1.6933583126850789, "grad_norm": 0.7913855009297621, "learning_rate": 4.349653285927866e-06, "loss": 0.9173, "step": 28020 }, { "epoch": 1.6939626518402129, "grad_norm": 0.853914467183262, "learning_rate": 4.3463508047278404e-06, "loss": 0.9082, "step": 28030 }, { "epoch": 1.6945669909953467, "grad_norm": 0.8147627434128741, "learning_rate": 4.343048613618448e-06, "loss": 0.9238, "step": 28040 }, { "epoch": 1.6951713301504805, "grad_norm": 0.6046438435047382, "learning_rate": 4.3397467140652046e-06, "loss": 0.9409, "step": 28050 }, { "epoch": 1.6957756693056143, "grad_norm": 0.5683928370681905, "learning_rate": 4.3364451075335e-06, "loss": 0.9142, "step": 28060 }, { "epoch": 1.696380008460748, "grad_norm": 0.548058025415299, "learning_rate": 4.333143795488592e-06, "loss": 0.9382, "step": 28070 }, { "epoch": 1.696984347615882, "grad_norm": 0.5464462211858346, "learning_rate": 4.329842779395611e-06, "loss": 0.9317, "step": 28080 }, { "epoch": 1.697588686771016, "grad_norm": 0.5727019712332703, "learning_rate": 4.326542060719549e-06, "loss": 0.9278, "step": 28090 }, { "epoch": 1.6981930259261497, "grad_norm": 0.6238429816951516, "learning_rate": 4.323241640925276e-06, "loss": 0.9513, "step": 28100 }, { "epoch": 1.6987973650812838, "grad_norm": 0.6336548613948914, "learning_rate": 4.319941521477519e-06, "loss": 0.9291, "step": 28110 }, { "epoch": 1.6994017042364176, "grad_norm": 0.62575323069499, "learning_rate": 4.316641703840879e-06, "loss": 0.9034, "step": 28120 }, { "epoch": 1.7000060433915514, "grad_norm": 0.6296687627610604, "learning_rate": 4.313342189479823e-06, "loss": 0.9548, "step": 28130 }, { "epoch": 1.7006103825466852, "grad_norm": 0.6170402035746204, "learning_rate": 4.310042979858677e-06, "loss": 0.9655, "step": 28140 }, { "epoch": 1.701214721701819, "grad_norm": 0.8211625468584101, "learning_rate": 4.306744076441637e-06, "loss": 0.9369, "step": 28150 }, { "epoch": 1.7018190608569528, "grad_norm": 0.8124031639679851, "learning_rate": 4.3034454806927645e-06, "loss": 0.9052, "step": 28160 }, { "epoch": 1.7024234000120868, "grad_norm": 0.8013036597678181, "learning_rate": 4.3001471940759784e-06, "loss": 0.9189, "step": 28170 }, { "epoch": 1.7030277391672206, "grad_norm": 0.8097437174096903, "learning_rate": 4.296849218055066e-06, "loss": 0.9561, "step": 28180 }, { "epoch": 1.7036320783223546, "grad_norm": 0.847539700590884, "learning_rate": 4.293551554093676e-06, "loss": 0.9369, "step": 28190 }, { "epoch": 1.7042364174774884, "grad_norm": 0.5622933389120811, "learning_rate": 4.290254203655313e-06, "loss": 0.921, "step": 28200 }, { "epoch": 1.7048407566326222, "grad_norm": 0.5273192963739084, "learning_rate": 4.286957168203351e-06, "loss": 0.9157, "step": 28210 }, { "epoch": 1.705445095787756, "grad_norm": 0.519120823987634, "learning_rate": 4.283660449201019e-06, "loss": 0.9164, "step": 28220 }, { "epoch": 1.7060494349428899, "grad_norm": 0.5598572393526107, "learning_rate": 4.280364048111405e-06, "loss": 0.9161, "step": 28230 }, { "epoch": 1.7066537740980237, "grad_norm": 0.5597854324729941, "learning_rate": 4.277067966397458e-06, "loss": 0.9408, "step": 28240 }, { "epoch": 1.7072581132531577, "grad_norm": 0.5216003791321182, "learning_rate": 4.273772205521987e-06, "loss": 0.9409, "step": 28250 }, { "epoch": 1.7078624524082915, "grad_norm": 0.5319526539254827, "learning_rate": 4.270476766947652e-06, "loss": 0.9162, "step": 28260 }, { "epoch": 1.7084667915634255, "grad_norm": 0.5627786996045264, "learning_rate": 4.267181652136977e-06, "loss": 0.9056, "step": 28270 }, { "epoch": 1.7090711307185593, "grad_norm": 0.545375117083162, "learning_rate": 4.263886862552341e-06, "loss": 0.9209, "step": 28280 }, { "epoch": 1.7096754698736931, "grad_norm": 0.5229453450887879, "learning_rate": 4.260592399655973e-06, "loss": 0.9248, "step": 28290 }, { "epoch": 1.710279809028827, "grad_norm": 0.5224402876622195, "learning_rate": 4.2572982649099645e-06, "loss": 0.9522, "step": 28300 }, { "epoch": 1.7108841481839607, "grad_norm": 0.5169319635994611, "learning_rate": 4.2540044597762596e-06, "loss": 0.9126, "step": 28310 }, { "epoch": 1.7114884873390948, "grad_norm": 0.5345270198463166, "learning_rate": 4.25071098571665e-06, "loss": 0.926, "step": 28320 }, { "epoch": 1.7120928264942286, "grad_norm": 0.5314485029825494, "learning_rate": 4.247417844192789e-06, "loss": 0.9294, "step": 28330 }, { "epoch": 1.7126971656493626, "grad_norm": 0.5407995469612804, "learning_rate": 4.2441250366661765e-06, "loss": 0.9329, "step": 28340 }, { "epoch": 1.7133015048044964, "grad_norm": 0.5400833004929693, "learning_rate": 4.240832564598168e-06, "loss": 0.9247, "step": 28350 }, { "epoch": 1.7139058439596302, "grad_norm": 0.5359789955879173, "learning_rate": 4.237540429449965e-06, "loss": 0.9211, "step": 28360 }, { "epoch": 1.714510183114764, "grad_norm": 0.5384733668120537, "learning_rate": 4.234248632682625e-06, "loss": 0.9368, "step": 28370 }, { "epoch": 1.7151145222698978, "grad_norm": 0.5448593145911083, "learning_rate": 4.230957175757054e-06, "loss": 0.9359, "step": 28380 }, { "epoch": 1.7157188614250316, "grad_norm": 0.5494418718750299, "learning_rate": 4.2276660601340035e-06, "loss": 0.9167, "step": 28390 }, { "epoch": 1.7163232005801656, "grad_norm": 0.5134345107264927, "learning_rate": 4.224375287274077e-06, "loss": 0.9369, "step": 28400 }, { "epoch": 1.7169275397352994, "grad_norm": 0.5225699328073128, "learning_rate": 4.221084858637729e-06, "loss": 0.9326, "step": 28410 }, { "epoch": 1.7175318788904335, "grad_norm": 0.5533382379104111, "learning_rate": 4.217794775685251e-06, "loss": 0.9612, "step": 28420 }, { "epoch": 1.7181362180455673, "grad_norm": 0.5230799665949161, "learning_rate": 4.21450503987679e-06, "loss": 0.9313, "step": 28430 }, { "epoch": 1.718740557200701, "grad_norm": 0.522582413903175, "learning_rate": 4.211215652672341e-06, "loss": 0.9377, "step": 28440 }, { "epoch": 1.7193448963558349, "grad_norm": 0.5847133430275941, "learning_rate": 4.207926615531732e-06, "loss": 0.9233, "step": 28450 }, { "epoch": 1.7199492355109687, "grad_norm": 0.5222721781588695, "learning_rate": 4.204637929914649e-06, "loss": 0.9345, "step": 28460 }, { "epoch": 1.7205535746661025, "grad_norm": 0.5471993669412948, "learning_rate": 4.201349597280614e-06, "loss": 0.9289, "step": 28470 }, { "epoch": 1.7211579138212365, "grad_norm": 0.5503192523531073, "learning_rate": 4.198061619088995e-06, "loss": 0.9509, "step": 28480 }, { "epoch": 1.7217622529763703, "grad_norm": 0.5719271620417735, "learning_rate": 4.194773996799e-06, "loss": 0.9525, "step": 28490 }, { "epoch": 1.7223665921315043, "grad_norm": 0.6593825695807976, "learning_rate": 4.1914867318696875e-06, "loss": 0.9437, "step": 28500 }, { "epoch": 1.7229709312866381, "grad_norm": 0.6288954149181362, "learning_rate": 4.188199825759944e-06, "loss": 0.9183, "step": 28510 }, { "epoch": 1.723575270441772, "grad_norm": 0.6646357931438769, "learning_rate": 4.184913279928508e-06, "loss": 0.9414, "step": 28520 }, { "epoch": 1.7241796095969057, "grad_norm": 0.6795611526732371, "learning_rate": 4.181627095833955e-06, "loss": 0.9253, "step": 28530 }, { "epoch": 1.7247839487520396, "grad_norm": 0.6452554054803482, "learning_rate": 4.178341274934697e-06, "loss": 0.9385, "step": 28540 }, { "epoch": 1.7253882879071734, "grad_norm": 0.6954579019471295, "learning_rate": 4.175055818688988e-06, "loss": 0.9197, "step": 28550 }, { "epoch": 1.7259926270623074, "grad_norm": 0.7210926932118932, "learning_rate": 4.17177072855492e-06, "loss": 0.9176, "step": 28560 }, { "epoch": 1.7265969662174412, "grad_norm": 0.7147529530363084, "learning_rate": 4.168486005990419e-06, "loss": 0.9292, "step": 28570 }, { "epoch": 1.7272013053725752, "grad_norm": 0.678852288447761, "learning_rate": 4.1652016524532524e-06, "loss": 0.9083, "step": 28580 }, { "epoch": 1.727805644527709, "grad_norm": 0.6898873988239523, "learning_rate": 4.161917669401021e-06, "loss": 0.9383, "step": 28590 }, { "epoch": 1.7284099836828428, "grad_norm": 0.6146637217519962, "learning_rate": 4.158634058291164e-06, "loss": 0.8997, "step": 28600 }, { "epoch": 1.7290143228379766, "grad_norm": 0.6219568538250213, "learning_rate": 4.15535082058095e-06, "loss": 0.9241, "step": 28610 }, { "epoch": 1.7296186619931104, "grad_norm": 0.6190849352140058, "learning_rate": 4.152067957727491e-06, "loss": 0.9377, "step": 28620 }, { "epoch": 1.7302230011482442, "grad_norm": 0.624568985291949, "learning_rate": 4.1487854711877215e-06, "loss": 0.9087, "step": 28630 }, { "epoch": 1.7308273403033783, "grad_norm": 0.6285974544687487, "learning_rate": 4.145503362418416e-06, "loss": 0.9186, "step": 28640 }, { "epoch": 1.731431679458512, "grad_norm": 0.7786380732845095, "learning_rate": 4.142221632876184e-06, "loss": 0.9262, "step": 28650 }, { "epoch": 1.732036018613646, "grad_norm": 0.7658717757022581, "learning_rate": 4.138940284017458e-06, "loss": 0.9699, "step": 28660 }, { "epoch": 1.73264035776878, "grad_norm": 0.7982729082319859, "learning_rate": 4.135659317298508e-06, "loss": 0.92, "step": 28670 }, { "epoch": 1.7332446969239137, "grad_norm": 0.8501974444355619, "learning_rate": 4.132378734175432e-06, "loss": 0.9233, "step": 28680 }, { "epoch": 1.7338490360790475, "grad_norm": 0.7876602712029557, "learning_rate": 4.129098536104161e-06, "loss": 0.9178, "step": 28690 }, { "epoch": 1.7344533752341813, "grad_norm": 0.753480921958508, "learning_rate": 4.1258187245404495e-06, "loss": 0.9044, "step": 28700 }, { "epoch": 1.7350577143893153, "grad_norm": 0.7808076662796362, "learning_rate": 4.1225393009398845e-06, "loss": 0.9173, "step": 28710 }, { "epoch": 1.7356620535444491, "grad_norm": 0.7909649978679867, "learning_rate": 4.11926026675788e-06, "loss": 0.9312, "step": 28720 }, { "epoch": 1.7362663926995832, "grad_norm": 0.8319812970621725, "learning_rate": 4.115981623449677e-06, "loss": 0.9449, "step": 28730 }, { "epoch": 1.736870731854717, "grad_norm": 0.7961357862062574, "learning_rate": 4.112703372470342e-06, "loss": 0.9136, "step": 28740 }, { "epoch": 1.7374750710098508, "grad_norm": 1.1975285217244283, "learning_rate": 4.109425515274771e-06, "loss": 0.9304, "step": 28750 }, { "epoch": 1.7380794101649846, "grad_norm": 1.1847021062700358, "learning_rate": 4.106148053317679e-06, "loss": 0.9223, "step": 28760 }, { "epoch": 1.7386837493201184, "grad_norm": 1.219984396493548, "learning_rate": 4.102870988053611e-06, "loss": 0.9238, "step": 28770 }, { "epoch": 1.7392880884752522, "grad_norm": 1.20594493272711, "learning_rate": 4.099594320936936e-06, "loss": 0.9099, "step": 28780 }, { "epoch": 1.7398924276303862, "grad_norm": 1.160492299566175, "learning_rate": 4.096318053421841e-06, "loss": 0.9097, "step": 28790 }, { "epoch": 1.74049676678552, "grad_norm": 0.8955515791682743, "learning_rate": 4.093042186962341e-06, "loss": 0.9198, "step": 28800 }, { "epoch": 1.741101105940654, "grad_norm": 0.8412730627715108, "learning_rate": 4.0897667230122725e-06, "loss": 0.9378, "step": 28810 }, { "epoch": 1.7417054450957878, "grad_norm": 1.0874227687016704, "learning_rate": 4.0864916630252895e-06, "loss": 0.9376, "step": 28820 }, { "epoch": 1.7423097842509216, "grad_norm": 0.9910514511056169, "learning_rate": 4.08321700845487e-06, "loss": 0.9506, "step": 28830 }, { "epoch": 1.7429141234060554, "grad_norm": 0.9682200652630557, "learning_rate": 4.079942760754313e-06, "loss": 0.9301, "step": 28840 }, { "epoch": 1.7435184625611893, "grad_norm": 2.209609858110814, "learning_rate": 4.076668921376733e-06, "loss": 0.936, "step": 28850 }, { "epoch": 1.744122801716323, "grad_norm": 2.4471751842831977, "learning_rate": 4.073395491775065e-06, "loss": 0.9287, "step": 28860 }, { "epoch": 1.744727140871457, "grad_norm": 2.2251776326386654, "learning_rate": 4.070122473402067e-06, "loss": 0.9101, "step": 28870 }, { "epoch": 1.7453314800265909, "grad_norm": 2.572562552049361, "learning_rate": 4.066849867710304e-06, "loss": 0.927, "step": 28880 }, { "epoch": 1.745935819181725, "grad_norm": 2.427725408261804, "learning_rate": 4.063577676152168e-06, "loss": 0.9615, "step": 28890 }, { "epoch": 1.7465401583368587, "grad_norm": 0.8817407460860198, "learning_rate": 4.060305900179864e-06, "loss": 0.9511, "step": 28900 }, { "epoch": 1.7471444974919925, "grad_norm": 1.050339568385794, "learning_rate": 4.0570345412454084e-06, "loss": 0.9193, "step": 28910 }, { "epoch": 1.7477488366471263, "grad_norm": 0.9012741801894236, "learning_rate": 4.053763600800637e-06, "loss": 0.9326, "step": 28920 }, { "epoch": 1.7483531758022601, "grad_norm": 0.9800412900520905, "learning_rate": 4.050493080297202e-06, "loss": 0.898, "step": 28930 }, { "epoch": 1.748957514957394, "grad_norm": 0.8971395239471041, "learning_rate": 4.04722298118656e-06, "loss": 0.9188, "step": 28940 }, { "epoch": 1.749561854112528, "grad_norm": 0.973458149617079, "learning_rate": 4.043953304919992e-06, "loss": 0.9156, "step": 28950 }, { "epoch": 1.7501661932676618, "grad_norm": 1.052742223871775, "learning_rate": 4.040684052948585e-06, "loss": 0.9191, "step": 28960 }, { "epoch": 1.7507705324227958, "grad_norm": 0.9611456134382498, "learning_rate": 4.037415226723235e-06, "loss": 0.9251, "step": 28970 }, { "epoch": 1.7513748715779296, "grad_norm": 0.9966884991248691, "learning_rate": 4.034146827694657e-06, "loss": 0.9452, "step": 28980 }, { "epoch": 1.7519792107330634, "grad_norm": 0.8969999396104078, "learning_rate": 4.030878857313369e-06, "loss": 0.9136, "step": 28990 }, { "epoch": 1.7525835498881972, "grad_norm": 1.0350502990855937, "learning_rate": 4.027611317029705e-06, "loss": 0.9434, "step": 29000 }, { "epoch": 1.753187889043331, "grad_norm": 1.1586626710428998, "learning_rate": 4.024344208293802e-06, "loss": 0.9008, "step": 29010 }, { "epoch": 1.753792228198465, "grad_norm": 1.0205193790008382, "learning_rate": 4.0210775325556085e-06, "loss": 0.9299, "step": 29020 }, { "epoch": 1.7543965673535988, "grad_norm": 1.0719006845399093, "learning_rate": 4.017811291264884e-06, "loss": 0.9464, "step": 29030 }, { "epoch": 1.7550009065087329, "grad_norm": 1.0047855323878288, "learning_rate": 4.014545485871188e-06, "loss": 0.9241, "step": 29040 }, { "epoch": 1.7556052456638667, "grad_norm": 0.9992269022985253, "learning_rate": 4.011280117823892e-06, "loss": 0.9105, "step": 29050 }, { "epoch": 1.7562095848190005, "grad_norm": 1.0874729697241927, "learning_rate": 4.008015188572175e-06, "loss": 0.9412, "step": 29060 }, { "epoch": 1.7568139239741343, "grad_norm": 0.9958719412959735, "learning_rate": 4.004750699565013e-06, "loss": 0.9237, "step": 29070 }, { "epoch": 1.757418263129268, "grad_norm": 1.0005849729762109, "learning_rate": 4.001486652251194e-06, "loss": 0.9353, "step": 29080 }, { "epoch": 1.7580226022844019, "grad_norm": 1.061390336705989, "learning_rate": 3.99822304807931e-06, "loss": 0.9494, "step": 29090 }, { "epoch": 1.758626941439536, "grad_norm": 1.1619495194693477, "learning_rate": 3.9949598884977505e-06, "loss": 0.9428, "step": 29100 }, { "epoch": 1.7592312805946697, "grad_norm": 1.187098886512169, "learning_rate": 3.991697174954715e-06, "loss": 0.9059, "step": 29110 }, { "epoch": 1.7598356197498037, "grad_norm": 1.1184433101080973, "learning_rate": 3.988434908898198e-06, "loss": 0.9486, "step": 29120 }, { "epoch": 1.7604399589049375, "grad_norm": 1.078999904905132, "learning_rate": 3.985173091776e-06, "loss": 0.95, "step": 29130 }, { "epoch": 1.7610442980600713, "grad_norm": 1.0633109201823634, "learning_rate": 3.981911725035721e-06, "loss": 0.912, "step": 29140 }, { "epoch": 1.7616486372152051, "grad_norm": 1.04109343203107, "learning_rate": 3.978650810124763e-06, "loss": 0.9326, "step": 29150 }, { "epoch": 1.762252976370339, "grad_norm": 0.9767783925535196, "learning_rate": 3.975390348490321e-06, "loss": 0.9285, "step": 29160 }, { "epoch": 1.7628573155254728, "grad_norm": 0.9541021152711103, "learning_rate": 3.972130341579397e-06, "loss": 0.9375, "step": 29170 }, { "epoch": 1.7634616546806068, "grad_norm": 0.9962720333217153, "learning_rate": 3.968870790838789e-06, "loss": 0.9357, "step": 29180 }, { "epoch": 1.7640659938357406, "grad_norm": 0.9353997194295306, "learning_rate": 3.965611697715087e-06, "loss": 0.9261, "step": 29190 }, { "epoch": 1.7646703329908746, "grad_norm": 0.953982881743324, "learning_rate": 3.9623530636546845e-06, "loss": 0.9219, "step": 29200 }, { "epoch": 1.7652746721460084, "grad_norm": 1.057404587595876, "learning_rate": 3.9590948901037715e-06, "loss": 0.9209, "step": 29210 }, { "epoch": 1.7658790113011422, "grad_norm": 1.0200504242372836, "learning_rate": 3.955837178508325e-06, "loss": 0.9431, "step": 29220 }, { "epoch": 1.766483350456276, "grad_norm": 1.0265144195798135, "learning_rate": 3.952579930314128e-06, "loss": 0.9393, "step": 29230 }, { "epoch": 1.7670876896114098, "grad_norm": 1.0893425750456653, "learning_rate": 3.9493231469667495e-06, "loss": 0.9288, "step": 29240 }, { "epoch": 1.7676920287665436, "grad_norm": 0.9947770454973495, "learning_rate": 3.946066829911558e-06, "loss": 0.9394, "step": 29250 }, { "epoch": 1.7682963679216777, "grad_norm": 1.0256234731425078, "learning_rate": 3.942810980593711e-06, "loss": 0.9118, "step": 29260 }, { "epoch": 1.7689007070768115, "grad_norm": 0.9498250051806073, "learning_rate": 3.939555600458162e-06, "loss": 0.935, "step": 29270 }, { "epoch": 1.7695050462319455, "grad_norm": 0.9382613784732785, "learning_rate": 3.936300690949651e-06, "loss": 0.9164, "step": 29280 }, { "epoch": 1.7701093853870793, "grad_norm": 0.9036835464689572, "learning_rate": 3.933046253512714e-06, "loss": 0.9168, "step": 29290 }, { "epoch": 1.770713724542213, "grad_norm": 1.1260063614778104, "learning_rate": 3.929792289591678e-06, "loss": 0.9362, "step": 29300 }, { "epoch": 1.771318063697347, "grad_norm": 1.112082283564898, "learning_rate": 3.926538800630655e-06, "loss": 0.9274, "step": 29310 }, { "epoch": 1.7719224028524807, "grad_norm": 1.1487598047703247, "learning_rate": 3.9232857880735495e-06, "loss": 0.9207, "step": 29320 }, { "epoch": 1.7725267420076145, "grad_norm": 1.105047230704751, "learning_rate": 3.920033253364054e-06, "loss": 0.8914, "step": 29330 }, { "epoch": 1.7731310811627485, "grad_norm": 1.1355765610830528, "learning_rate": 3.916781197945652e-06, "loss": 0.9338, "step": 29340 }, { "epoch": 1.7737354203178826, "grad_norm": 1.0743764475386823, "learning_rate": 3.913529623261606e-06, "loss": 0.9306, "step": 29350 }, { "epoch": 1.7743397594730164, "grad_norm": 1.0345611859648796, "learning_rate": 3.910278530754974e-06, "loss": 0.9074, "step": 29360 }, { "epoch": 1.7749440986281502, "grad_norm": 1.073161976092642, "learning_rate": 3.907027921868596e-06, "loss": 0.9304, "step": 29370 }, { "epoch": 1.775548437783284, "grad_norm": 1.1161521036350304, "learning_rate": 3.903777798045097e-06, "loss": 0.9362, "step": 29380 }, { "epoch": 1.7761527769384178, "grad_norm": 1.099926306775401, "learning_rate": 3.900528160726887e-06, "loss": 0.9266, "step": 29390 }, { "epoch": 1.7767571160935516, "grad_norm": 1.1106376869829082, "learning_rate": 3.897279011356164e-06, "loss": 0.9056, "step": 29400 }, { "epoch": 1.7773614552486856, "grad_norm": 1.1998338745463926, "learning_rate": 3.8940303513749e-06, "loss": 0.9048, "step": 29410 }, { "epoch": 1.7779657944038194, "grad_norm": 1.1290712572608752, "learning_rate": 3.89078218222486e-06, "loss": 0.9064, "step": 29420 }, { "epoch": 1.7785701335589534, "grad_norm": 1.1499888068921444, "learning_rate": 3.887534505347589e-06, "loss": 0.8799, "step": 29430 }, { "epoch": 1.7791744727140872, "grad_norm": 1.155229460266719, "learning_rate": 3.8842873221844065e-06, "loss": 0.9328, "step": 29440 }, { "epoch": 1.779778811869221, "grad_norm": 1.4312733567142073, "learning_rate": 3.8810406341764204e-06, "loss": 0.9336, "step": 29450 }, { "epoch": 1.7803831510243548, "grad_norm": 1.4028306837952726, "learning_rate": 3.877794442764519e-06, "loss": 0.9495, "step": 29460 }, { "epoch": 1.7809874901794887, "grad_norm": 1.4269343928293714, "learning_rate": 3.874548749389362e-06, "loss": 0.922, "step": 29470 }, { "epoch": 1.7815918293346225, "grad_norm": 1.3734190732144356, "learning_rate": 3.871303555491398e-06, "loss": 0.9483, "step": 29480 }, { "epoch": 1.7821961684897565, "grad_norm": 1.3969604695593751, "learning_rate": 3.868058862510848e-06, "loss": 0.9114, "step": 29490 }, { "epoch": 1.7828005076448903, "grad_norm": 0.8289691556286879, "learning_rate": 3.864814671887713e-06, "loss": 0.9175, "step": 29500 }, { "epoch": 1.7834048468000243, "grad_norm": 0.8170960549761034, "learning_rate": 3.861570985061769e-06, "loss": 0.907, "step": 29510 }, { "epoch": 1.7840091859551581, "grad_norm": 0.8548432678197053, "learning_rate": 3.858327803472572e-06, "loss": 0.9264, "step": 29520 }, { "epoch": 1.784613525110292, "grad_norm": 0.8558348024869225, "learning_rate": 3.855085128559448e-06, "loss": 0.9356, "step": 29530 }, { "epoch": 1.7852178642654257, "grad_norm": 0.8739095987409106, "learning_rate": 3.851842961761505e-06, "loss": 0.9312, "step": 29540 }, { "epoch": 1.7858222034205595, "grad_norm": 0.745830819800408, "learning_rate": 3.848601304517622e-06, "loss": 0.9196, "step": 29550 }, { "epoch": 1.7864265425756933, "grad_norm": 0.7888270545204027, "learning_rate": 3.8453601582664505e-06, "loss": 0.9096, "step": 29560 }, { "epoch": 1.7870308817308274, "grad_norm": 0.8158106354151947, "learning_rate": 3.842119524446416e-06, "loss": 0.9411, "step": 29570 }, { "epoch": 1.7876352208859612, "grad_norm": 0.7702930550041105, "learning_rate": 3.838879404495721e-06, "loss": 0.9249, "step": 29580 }, { "epoch": 1.7882395600410952, "grad_norm": 0.7729868215873599, "learning_rate": 3.835639799852332e-06, "loss": 0.9203, "step": 29590 }, { "epoch": 1.788843899196229, "grad_norm": 0.7555366467832316, "learning_rate": 3.832400711953993e-06, "loss": 0.9332, "step": 29600 }, { "epoch": 1.7894482383513628, "grad_norm": 0.759661534444941, "learning_rate": 3.829162142238219e-06, "loss": 0.9235, "step": 29610 }, { "epoch": 1.7900525775064966, "grad_norm": 0.7744893226266466, "learning_rate": 3.825924092142289e-06, "loss": 0.9193, "step": 29620 }, { "epoch": 1.7906569166616304, "grad_norm": 0.8580460263875727, "learning_rate": 3.822686563103257e-06, "loss": 0.9258, "step": 29630 }, { "epoch": 1.7912612558167642, "grad_norm": 0.8390815248522523, "learning_rate": 3.819449556557944e-06, "loss": 0.9055, "step": 29640 }, { "epoch": 1.7918655949718982, "grad_norm": 0.5784632011880171, "learning_rate": 3.816213073942939e-06, "loss": 0.9033, "step": 29650 }, { "epoch": 1.792469934127032, "grad_norm": 0.5822405172874409, "learning_rate": 3.8129771166945973e-06, "loss": 0.918, "step": 29660 }, { "epoch": 1.793074273282166, "grad_norm": 0.5745599296947681, "learning_rate": 3.809741686249043e-06, "loss": 0.9184, "step": 29670 }, { "epoch": 1.7936786124372999, "grad_norm": 0.5814953579888945, "learning_rate": 3.8065067840421678e-06, "loss": 0.9241, "step": 29680 }, { "epoch": 1.7942829515924337, "grad_norm": 0.6131249058045749, "learning_rate": 3.803272411509623e-06, "loss": 0.9463, "step": 29690 }, { "epoch": 1.7948872907475675, "grad_norm": 0.6544800732532693, "learning_rate": 3.80003857008683e-06, "loss": 0.9398, "step": 29700 }, { "epoch": 1.7954916299027013, "grad_norm": 0.6235371676133413, "learning_rate": 3.7968052612089767e-06, "loss": 0.9374, "step": 29710 }, { "epoch": 1.7960959690578353, "grad_norm": 0.6424185197063359, "learning_rate": 3.7935724863110063e-06, "loss": 0.9493, "step": 29720 }, { "epoch": 1.796700308212969, "grad_norm": 0.6578890377391214, "learning_rate": 3.7903402468276313e-06, "loss": 0.9155, "step": 29730 }, { "epoch": 1.7973046473681031, "grad_norm": 0.636163900304483, "learning_rate": 3.7871085441933274e-06, "loss": 0.9263, "step": 29740 }, { "epoch": 1.797908986523237, "grad_norm": 0.8474354965583315, "learning_rate": 3.7838773798423274e-06, "loss": 0.9182, "step": 29750 }, { "epoch": 1.7985133256783707, "grad_norm": 0.8137597660358254, "learning_rate": 3.780646755208627e-06, "loss": 0.9044, "step": 29760 }, { "epoch": 1.7991176648335045, "grad_norm": 0.8104204356966251, "learning_rate": 3.7774166717259852e-06, "loss": 0.924, "step": 29770 }, { "epoch": 1.7997220039886384, "grad_norm": 0.8995237577266365, "learning_rate": 3.774187130827916e-06, "loss": 0.9126, "step": 29780 }, { "epoch": 1.8003263431437722, "grad_norm": 0.8485054889944411, "learning_rate": 3.7709581339476965e-06, "loss": 0.9097, "step": 29790 }, { "epoch": 1.8009306822989062, "grad_norm": 0.5582675352448696, "learning_rate": 3.767729682518363e-06, "loss": 0.9325, "step": 29800 }, { "epoch": 1.80153502145404, "grad_norm": 0.5299149026002937, "learning_rate": 3.7645017779727044e-06, "loss": 0.936, "step": 29810 }, { "epoch": 1.802139360609174, "grad_norm": 0.549243609096665, "learning_rate": 3.761274421743271e-06, "loss": 0.9255, "step": 29820 }, { "epoch": 1.8027436997643078, "grad_norm": 0.5734958902150582, "learning_rate": 3.7580476152623726e-06, "loss": 0.9436, "step": 29830 }, { "epoch": 1.8033480389194416, "grad_norm": 0.5129167765861892, "learning_rate": 3.754821359962066e-06, "loss": 0.9043, "step": 29840 }, { "epoch": 1.8039523780745754, "grad_norm": 0.5439238123126162, "learning_rate": 3.7515956572741715e-06, "loss": 0.9355, "step": 29850 }, { "epoch": 1.8045567172297092, "grad_norm": 0.5425997075853455, "learning_rate": 3.7483705086302634e-06, "loss": 0.9224, "step": 29860 }, { "epoch": 1.805161056384843, "grad_norm": 0.5666056204932807, "learning_rate": 3.7451459154616645e-06, "loss": 0.9265, "step": 29870 }, { "epoch": 1.805765395539977, "grad_norm": 0.5411669870680204, "learning_rate": 3.741921879199457e-06, "loss": 0.919, "step": 29880 }, { "epoch": 1.8063697346951109, "grad_norm": 0.5466863759604617, "learning_rate": 3.7386984012744733e-06, "loss": 0.9302, "step": 29890 }, { "epoch": 1.8069740738502449, "grad_norm": 0.5334813296440465, "learning_rate": 3.7354754831172986e-06, "loss": 0.9364, "step": 29900 }, { "epoch": 1.8075784130053787, "grad_norm": 0.5420136916352104, "learning_rate": 3.732253126158268e-06, "loss": 0.9093, "step": 29910 }, { "epoch": 1.8081827521605125, "grad_norm": 0.5218457704359801, "learning_rate": 3.7290313318274716e-06, "loss": 0.9287, "step": 29920 }, { "epoch": 1.8087870913156463, "grad_norm": 0.5281767381873376, "learning_rate": 3.725810101554745e-06, "loss": 0.9239, "step": 29930 }, { "epoch": 1.80939143047078, "grad_norm": 0.5066693381495344, "learning_rate": 3.722589436769676e-06, "loss": 0.9131, "step": 29940 }, { "epoch": 1.809995769625914, "grad_norm": 0.5529085145395609, "learning_rate": 3.7193693389016038e-06, "loss": 0.9193, "step": 29950 }, { "epoch": 1.810600108781048, "grad_norm": 0.5456503050449465, "learning_rate": 3.716149809379609e-06, "loss": 0.9121, "step": 29960 }, { "epoch": 1.8112044479361817, "grad_norm": 0.5200752140586525, "learning_rate": 3.7129308496325266e-06, "loss": 0.921, "step": 29970 }, { "epoch": 1.8118087870913158, "grad_norm": 0.5672171447850323, "learning_rate": 3.709712461088939e-06, "loss": 0.941, "step": 29980 }, { "epoch": 1.8124131262464496, "grad_norm": 0.5418840252388737, "learning_rate": 3.706494645177168e-06, "loss": 0.9063, "step": 29990 }, { "epoch": 1.8130174654015834, "grad_norm": 0.5669367221438031, "learning_rate": 3.703277403325287e-06, "loss": 0.9235, "step": 30000 }, { "epoch": 1.8136218045567172, "grad_norm": 0.5397365739890289, "learning_rate": 3.7000607369611154e-06, "loss": 0.918, "step": 30010 }, { "epoch": 1.814226143711851, "grad_norm": 0.5471272823103205, "learning_rate": 3.6968446475122146e-06, "loss": 0.9394, "step": 30020 }, { "epoch": 1.814830482866985, "grad_norm": 0.5022206240626622, "learning_rate": 3.6936291364058896e-06, "loss": 0.9018, "step": 30030 }, { "epoch": 1.8154348220221188, "grad_norm": 0.5689934571594994, "learning_rate": 3.6904142050691895e-06, "loss": 0.9272, "step": 30040 }, { "epoch": 1.8160391611772528, "grad_norm": 0.5836049423164198, "learning_rate": 3.687199854928909e-06, "loss": 0.9528, "step": 30050 }, { "epoch": 1.8166435003323866, "grad_norm": 0.5345911301300967, "learning_rate": 3.6839860874115774e-06, "loss": 0.9256, "step": 30060 }, { "epoch": 1.8172478394875204, "grad_norm": 0.5528500728852526, "learning_rate": 3.680772903943474e-06, "loss": 0.8837, "step": 30070 }, { "epoch": 1.8178521786426542, "grad_norm": 0.5582100586078845, "learning_rate": 3.677560305950616e-06, "loss": 0.9386, "step": 30080 }, { "epoch": 1.818456517797788, "grad_norm": 0.5750340346612108, "learning_rate": 3.674348294858756e-06, "loss": 0.9268, "step": 30090 }, { "epoch": 1.8190608569529219, "grad_norm": 0.5711408078346777, "learning_rate": 3.671136872093392e-06, "loss": 0.9222, "step": 30100 }, { "epoch": 1.8196651961080559, "grad_norm": 0.6112269646455496, "learning_rate": 3.667926039079761e-06, "loss": 0.9002, "step": 30110 }, { "epoch": 1.8202695352631897, "grad_norm": 0.6140953401552849, "learning_rate": 3.6647157972428328e-06, "loss": 0.9179, "step": 30120 }, { "epoch": 1.8208738744183237, "grad_norm": 0.6661035494700247, "learning_rate": 3.66150614800732e-06, "loss": 0.9118, "step": 30130 }, { "epoch": 1.8214782135734575, "grad_norm": 0.637954177846652, "learning_rate": 3.6582970927976702e-06, "loss": 0.9516, "step": 30140 }, { "epoch": 1.8220825527285913, "grad_norm": 0.7360743073942373, "learning_rate": 3.655088633038067e-06, "loss": 0.9471, "step": 30150 }, { "epoch": 1.8226868918837251, "grad_norm": 0.6994191092481861, "learning_rate": 3.65188077015243e-06, "loss": 0.9444, "step": 30160 }, { "epoch": 1.823291231038859, "grad_norm": 0.6573074414989548, "learning_rate": 3.6486735055644173e-06, "loss": 0.9032, "step": 30170 }, { "epoch": 1.8238955701939927, "grad_norm": 0.6775804757434722, "learning_rate": 3.6454668406974135e-06, "loss": 0.9447, "step": 30180 }, { "epoch": 1.8244999093491268, "grad_norm": 0.708305126261626, "learning_rate": 3.6422607769745456e-06, "loss": 0.9432, "step": 30190 }, { "epoch": 1.8251042485042606, "grad_norm": 0.6076070351746073, "learning_rate": 3.6390553158186703e-06, "loss": 0.9376, "step": 30200 }, { "epoch": 1.8257085876593946, "grad_norm": 0.637275892662838, "learning_rate": 3.6358504586523736e-06, "loss": 0.9233, "step": 30210 }, { "epoch": 1.8263129268145284, "grad_norm": 0.6130783908031918, "learning_rate": 3.6326462068979797e-06, "loss": 0.8998, "step": 30220 }, { "epoch": 1.8269172659696622, "grad_norm": 0.665250724142056, "learning_rate": 3.629442561977541e-06, "loss": 0.9545, "step": 30230 }, { "epoch": 1.827521605124796, "grad_norm": 0.654058771807477, "learning_rate": 3.626239525312837e-06, "loss": 0.9266, "step": 30240 }, { "epoch": 1.8281259442799298, "grad_norm": 0.7780445923338133, "learning_rate": 3.623037098325384e-06, "loss": 0.9113, "step": 30250 }, { "epoch": 1.8287302834350636, "grad_norm": 0.8166429109439057, "learning_rate": 3.6198352824364256e-06, "loss": 0.9344, "step": 30260 }, { "epoch": 1.8293346225901976, "grad_norm": 0.7998785126830879, "learning_rate": 3.6166340790669303e-06, "loss": 0.9297, "step": 30270 }, { "epoch": 1.8299389617453314, "grad_norm": 0.7842408660186287, "learning_rate": 3.613433489637599e-06, "loss": 0.914, "step": 30280 }, { "epoch": 1.8305433009004655, "grad_norm": 0.7974565081699884, "learning_rate": 3.6102335155688596e-06, "loss": 0.9102, "step": 30290 }, { "epoch": 1.8311476400555993, "grad_norm": 0.7869548592747461, "learning_rate": 3.6070341582808642e-06, "loss": 0.9346, "step": 30300 }, { "epoch": 1.831751979210733, "grad_norm": 0.8505850628567252, "learning_rate": 3.603835419193493e-06, "loss": 0.9115, "step": 30310 }, { "epoch": 1.8323563183658669, "grad_norm": 0.8235690567516089, "learning_rate": 3.600637299726355e-06, "loss": 0.947, "step": 30320 }, { "epoch": 1.8329606575210007, "grad_norm": 0.835277289975126, "learning_rate": 3.5974398012987772e-06, "loss": 0.9301, "step": 30330 }, { "epoch": 1.8335649966761345, "grad_norm": 0.8111892465325004, "learning_rate": 3.594242925329816e-06, "loss": 0.9283, "step": 30340 }, { "epoch": 1.8341693358312685, "grad_norm": 1.1618986078975626, "learning_rate": 3.591046673238251e-06, "loss": 0.9071, "step": 30350 }, { "epoch": 1.8347736749864023, "grad_norm": 1.1745416774634267, "learning_rate": 3.587851046442587e-06, "loss": 0.91, "step": 30360 }, { "epoch": 1.8353780141415363, "grad_norm": 1.1765824367765059, "learning_rate": 3.5846560463610437e-06, "loss": 0.9182, "step": 30370 }, { "epoch": 1.8359823532966701, "grad_norm": 1.2134141546475001, "learning_rate": 3.5814616744115704e-06, "loss": 0.9351, "step": 30380 }, { "epoch": 1.836586692451804, "grad_norm": 1.2168181895369916, "learning_rate": 3.578267932011835e-06, "loss": 0.9345, "step": 30390 }, { "epoch": 1.8371910316069378, "grad_norm": 0.9185490107846815, "learning_rate": 3.5750748205792253e-06, "loss": 0.9358, "step": 30400 }, { "epoch": 1.8377953707620716, "grad_norm": 1.0675845787646152, "learning_rate": 3.571882341530848e-06, "loss": 0.9402, "step": 30410 }, { "epoch": 1.8383997099172056, "grad_norm": 0.8571488978672929, "learning_rate": 3.5686904962835346e-06, "loss": 0.9211, "step": 30420 }, { "epoch": 1.8390040490723394, "grad_norm": 0.9220315575467384, "learning_rate": 3.5654992862538276e-06, "loss": 0.9232, "step": 30430 }, { "epoch": 1.8396083882274734, "grad_norm": 0.8264268468980654, "learning_rate": 3.562308712857992e-06, "loss": 0.9162, "step": 30440 }, { "epoch": 1.8402127273826072, "grad_norm": 2.2574627287404274, "learning_rate": 3.5591187775120134e-06, "loss": 0.9166, "step": 30450 }, { "epoch": 1.840817066537741, "grad_norm": 2.2135004127350095, "learning_rate": 3.555929481631586e-06, "loss": 0.9076, "step": 30460 }, { "epoch": 1.8414214056928748, "grad_norm": 2.677380574768971, "learning_rate": 3.5527408266321262e-06, "loss": 0.9314, "step": 30470 }, { "epoch": 1.8420257448480086, "grad_norm": 2.447771349329891, "learning_rate": 3.549552813928767e-06, "loss": 0.9296, "step": 30480 }, { "epoch": 1.8426300840031424, "grad_norm": 2.651620784471785, "learning_rate": 3.5463654449363505e-06, "loss": 0.9351, "step": 30490 }, { "epoch": 1.8432344231582765, "grad_norm": 1.0185018940566928, "learning_rate": 3.543178721069438e-06, "loss": 0.9163, "step": 30500 }, { "epoch": 1.8438387623134103, "grad_norm": 0.9436772332216543, "learning_rate": 3.539992643742305e-06, "loss": 0.9188, "step": 30510 }, { "epoch": 1.8444431014685443, "grad_norm": 1.0513732532776594, "learning_rate": 3.5368072143689335e-06, "loss": 0.9181, "step": 30520 }, { "epoch": 1.845047440623678, "grad_norm": 0.9461286171780051, "learning_rate": 3.5336224343630267e-06, "loss": 0.9295, "step": 30530 }, { "epoch": 1.845651779778812, "grad_norm": 0.88231914964534, "learning_rate": 3.530438305137993e-06, "loss": 0.9117, "step": 30540 }, { "epoch": 1.8462561189339457, "grad_norm": 0.9438100821375899, "learning_rate": 3.5272548281069563e-06, "loss": 0.9268, "step": 30550 }, { "epoch": 1.8468604580890795, "grad_norm": 0.9553533991613056, "learning_rate": 3.5240720046827458e-06, "loss": 0.9232, "step": 30560 }, { "epoch": 1.8474647972442133, "grad_norm": 0.9836899162349655, "learning_rate": 3.5208898362779086e-06, "loss": 0.9366, "step": 30570 }, { "epoch": 1.8480691363993473, "grad_norm": 0.9719354566636513, "learning_rate": 3.517708324304693e-06, "loss": 0.927, "step": 30580 }, { "epoch": 1.8486734755544811, "grad_norm": 0.965896212983014, "learning_rate": 3.51452747017506e-06, "loss": 0.915, "step": 30590 }, { "epoch": 1.8492778147096152, "grad_norm": 1.2662065726308087, "learning_rate": 3.51134727530068e-06, "loss": 0.9268, "step": 30600 }, { "epoch": 1.849882153864749, "grad_norm": 1.060891510417651, "learning_rate": 3.508167741092926e-06, "loss": 0.9491, "step": 30610 }, { "epoch": 1.8504864930198828, "grad_norm": 0.9586644529022527, "learning_rate": 3.5049888689628827e-06, "loss": 0.9221, "step": 30620 }, { "epoch": 1.8510908321750166, "grad_norm": 1.0214462184512128, "learning_rate": 3.5018106603213404e-06, "loss": 0.942, "step": 30630 }, { "epoch": 1.8516951713301504, "grad_norm": 1.077764225001201, "learning_rate": 3.4986331165787896e-06, "loss": 0.906, "step": 30640 }, { "epoch": 1.8522995104852842, "grad_norm": 1.0375479696404344, "learning_rate": 3.4954562391454317e-06, "loss": 0.9185, "step": 30650 }, { "epoch": 1.8529038496404182, "grad_norm": 1.0183334429773074, "learning_rate": 3.4922800294311727e-06, "loss": 0.9199, "step": 30660 }, { "epoch": 1.853508188795552, "grad_norm": 1.0247050118179013, "learning_rate": 3.4891044888456154e-06, "loss": 0.8909, "step": 30670 }, { "epoch": 1.854112527950686, "grad_norm": 1.01157509584948, "learning_rate": 3.485929618798074e-06, "loss": 0.9212, "step": 30680 }, { "epoch": 1.8547168671058198, "grad_norm": 1.0678949102994058, "learning_rate": 3.482755420697558e-06, "loss": 0.8864, "step": 30690 }, { "epoch": 1.8553212062609536, "grad_norm": 1.0933914107847948, "learning_rate": 3.479581895952786e-06, "loss": 0.9284, "step": 30700 }, { "epoch": 1.8559255454160875, "grad_norm": 1.113446707454662, "learning_rate": 3.476409045972169e-06, "loss": 0.9523, "step": 30710 }, { "epoch": 1.8565298845712213, "grad_norm": 1.1045607837128297, "learning_rate": 3.473236872163826e-06, "loss": 0.9193, "step": 30720 }, { "epoch": 1.8571342237263553, "grad_norm": 1.527476823151143, "learning_rate": 3.4700653759355745e-06, "loss": 0.9364, "step": 30730 }, { "epoch": 1.857738562881489, "grad_norm": 1.1566134019644148, "learning_rate": 3.4668945586949263e-06, "loss": 0.93, "step": 30740 }, { "epoch": 1.8583429020366231, "grad_norm": 0.9978520510823714, "learning_rate": 3.463724421849097e-06, "loss": 0.9384, "step": 30750 }, { "epoch": 1.858947241191757, "grad_norm": 0.9322939975599035, "learning_rate": 3.460554966805002e-06, "loss": 0.9166, "step": 30760 }, { "epoch": 1.8595515803468907, "grad_norm": 0.9883399437836303, "learning_rate": 3.4573861949692456e-06, "loss": 0.9175, "step": 30770 }, { "epoch": 1.8601559195020245, "grad_norm": 0.9417747363846148, "learning_rate": 3.4542181077481372e-06, "loss": 0.9653, "step": 30780 }, { "epoch": 1.8607602586571583, "grad_norm": 0.9538677075385023, "learning_rate": 3.4510507065476796e-06, "loss": 0.934, "step": 30790 }, { "epoch": 1.8613645978122921, "grad_norm": 1.019560210074955, "learning_rate": 3.44788399277357e-06, "loss": 0.9129, "step": 30800 }, { "epoch": 1.8619689369674262, "grad_norm": 0.9560064580599331, "learning_rate": 3.4447179678311997e-06, "loss": 0.9131, "step": 30810 }, { "epoch": 1.86257327612256, "grad_norm": 1.0449354786666403, "learning_rate": 3.4415526331256605e-06, "loss": 0.9166, "step": 30820 }, { "epoch": 1.863177615277694, "grad_norm": 1.024719714605216, "learning_rate": 3.4383879900617278e-06, "loss": 0.9068, "step": 30830 }, { "epoch": 1.8637819544328278, "grad_norm": 1.0446297480005429, "learning_rate": 3.4352240400438785e-06, "loss": 0.9253, "step": 30840 }, { "epoch": 1.8643862935879616, "grad_norm": 0.9550403219574327, "learning_rate": 3.4320607844762817e-06, "loss": 0.9093, "step": 30850 }, { "epoch": 1.8649906327430954, "grad_norm": 1.00531406267595, "learning_rate": 3.42889822476279e-06, "loss": 0.9174, "step": 30860 }, { "epoch": 1.8655949718982292, "grad_norm": 0.9404242416485341, "learning_rate": 3.4257363623069563e-06, "loss": 0.956, "step": 30870 }, { "epoch": 1.866199311053363, "grad_norm": 0.9447630325252553, "learning_rate": 3.4225751985120213e-06, "loss": 0.9141, "step": 30880 }, { "epoch": 1.866803650208497, "grad_norm": 0.999555036542683, "learning_rate": 3.419414734780912e-06, "loss": 0.9312, "step": 30890 }, { "epoch": 1.8674079893636308, "grad_norm": 1.1077082767806796, "learning_rate": 3.416254972516249e-06, "loss": 0.9047, "step": 30900 }, { "epoch": 1.8680123285187649, "grad_norm": 1.0959927693105036, "learning_rate": 3.413095913120342e-06, "loss": 0.9432, "step": 30910 }, { "epoch": 1.8686166676738987, "grad_norm": 1.1033794414580878, "learning_rate": 3.4099375579951844e-06, "loss": 0.9132, "step": 30920 }, { "epoch": 1.8692210068290325, "grad_norm": 1.1074681876020167, "learning_rate": 3.4067799085424608e-06, "loss": 0.9266, "step": 30930 }, { "epoch": 1.8698253459841663, "grad_norm": 1.1408154445524386, "learning_rate": 3.4036229661635413e-06, "loss": 0.9297, "step": 30940 }, { "epoch": 1.8704296851393, "grad_norm": 1.0881833617425594, "learning_rate": 3.400466732259482e-06, "loss": 0.8956, "step": 30950 }, { "epoch": 1.8710340242944339, "grad_norm": 1.0945169020029208, "learning_rate": 3.3973112082310245e-06, "loss": 0.9257, "step": 30960 }, { "epoch": 1.871638363449568, "grad_norm": 1.0416844339914666, "learning_rate": 3.394156395478598e-06, "loss": 0.9064, "step": 30970 }, { "epoch": 1.8722427026047017, "grad_norm": 1.060926738099387, "learning_rate": 3.3910022954023103e-06, "loss": 0.9211, "step": 30980 }, { "epoch": 1.8728470417598357, "grad_norm": 1.1132565375562784, "learning_rate": 3.3878489094019585e-06, "loss": 0.9295, "step": 30990 }, { "epoch": 1.8734513809149695, "grad_norm": 1.2230042833869643, "learning_rate": 3.3846962388770222e-06, "loss": 0.912, "step": 31000 }, { "epoch": 1.8740557200701033, "grad_norm": 1.1457856142182463, "learning_rate": 3.381544285226658e-06, "loss": 0.9169, "step": 31010 }, { "epoch": 1.8746600592252372, "grad_norm": 1.11155203698507, "learning_rate": 3.3783930498497106e-06, "loss": 0.9132, "step": 31020 }, { "epoch": 1.875264398380371, "grad_norm": 1.1447256528594774, "learning_rate": 3.375242534144706e-06, "loss": 0.9093, "step": 31030 }, { "epoch": 1.8758687375355048, "grad_norm": 1.1454029496747797, "learning_rate": 3.3720927395098447e-06, "loss": 0.9209, "step": 31040 }, { "epoch": 1.8764730766906388, "grad_norm": 1.4096350282334087, "learning_rate": 3.3689436673430134e-06, "loss": 0.9353, "step": 31050 }, { "epoch": 1.8770774158457728, "grad_norm": 1.3979284987475882, "learning_rate": 3.3657953190417724e-06, "loss": 0.9267, "step": 31060 }, { "epoch": 1.8776817550009066, "grad_norm": 1.4316298495958675, "learning_rate": 3.3626476960033695e-06, "loss": 0.9514, "step": 31070 }, { "epoch": 1.8782860941560404, "grad_norm": 1.3647796504721923, "learning_rate": 3.3595007996247196e-06, "loss": 0.917, "step": 31080 }, { "epoch": 1.8788904333111742, "grad_norm": 1.3748002302568774, "learning_rate": 3.356354631302423e-06, "loss": 0.9336, "step": 31090 }, { "epoch": 1.879494772466308, "grad_norm": 0.9362148447253937, "learning_rate": 3.353209192432757e-06, "loss": 0.9304, "step": 31100 }, { "epoch": 1.8800991116214418, "grad_norm": 0.8104977880293032, "learning_rate": 3.350064484411668e-06, "loss": 0.935, "step": 31110 }, { "epoch": 1.8807034507765759, "grad_norm": 0.8236573016929736, "learning_rate": 3.3469205086347855e-06, "loss": 0.9156, "step": 31120 }, { "epoch": 1.8813077899317097, "grad_norm": 0.9094453948721859, "learning_rate": 3.3437772664974133e-06, "loss": 0.9147, "step": 31130 }, { "epoch": 1.8819121290868437, "grad_norm": 0.8413589420763598, "learning_rate": 3.3406347593945236e-06, "loss": 0.9009, "step": 31140 }, { "epoch": 1.8825164682419775, "grad_norm": 0.8132417851350491, "learning_rate": 3.3374929887207685e-06, "loss": 0.9128, "step": 31150 }, { "epoch": 1.8831208073971113, "grad_norm": 0.78740111180985, "learning_rate": 3.334351955870472e-06, "loss": 0.9158, "step": 31160 }, { "epoch": 1.883725146552245, "grad_norm": 0.760767733312395, "learning_rate": 3.331211662237628e-06, "loss": 0.9234, "step": 31170 }, { "epoch": 1.884329485707379, "grad_norm": 0.726505885919191, "learning_rate": 3.328072109215905e-06, "loss": 0.9045, "step": 31180 }, { "epoch": 1.8849338248625127, "grad_norm": 0.780864972848366, "learning_rate": 3.3249332981986428e-06, "loss": 0.9231, "step": 31190 }, { "epoch": 1.8855381640176467, "grad_norm": 0.8307752643958988, "learning_rate": 3.3217952305788504e-06, "loss": 0.9031, "step": 31200 }, { "epoch": 1.8861425031727805, "grad_norm": 0.7988999242594705, "learning_rate": 3.3186579077492065e-06, "loss": 0.9288, "step": 31210 }, { "epoch": 1.8867468423279146, "grad_norm": 0.8253620466099063, "learning_rate": 3.315521331102063e-06, "loss": 0.9258, "step": 31220 }, { "epoch": 1.8873511814830484, "grad_norm": 0.8132882462442603, "learning_rate": 3.3123855020294344e-06, "loss": 0.9201, "step": 31230 }, { "epoch": 1.8879555206381822, "grad_norm": 0.815256218425578, "learning_rate": 3.309250421923009e-06, "loss": 0.9367, "step": 31240 }, { "epoch": 1.888559859793316, "grad_norm": 0.5957479022040869, "learning_rate": 3.306116092174143e-06, "loss": 0.9337, "step": 31250 }, { "epoch": 1.8891641989484498, "grad_norm": 0.5555668046882692, "learning_rate": 3.3029825141738525e-06, "loss": 0.9293, "step": 31260 }, { "epoch": 1.8897685381035836, "grad_norm": 0.6161523499411489, "learning_rate": 3.2998496893128274e-06, "loss": 0.9179, "step": 31270 }, { "epoch": 1.8903728772587176, "grad_norm": 0.551205275812608, "learning_rate": 3.296717618981423e-06, "loss": 0.9509, "step": 31280 }, { "epoch": 1.8909772164138514, "grad_norm": 0.5853362971975156, "learning_rate": 3.2935863045696525e-06, "loss": 0.931, "step": 31290 }, { "epoch": 1.8915815555689854, "grad_norm": 0.6315686524480645, "learning_rate": 3.290455747467203e-06, "loss": 0.935, "step": 31300 }, { "epoch": 1.8921858947241192, "grad_norm": 0.6538485953542557, "learning_rate": 3.287325949063419e-06, "loss": 0.9343, "step": 31310 }, { "epoch": 1.892790233879253, "grad_norm": 0.6710681251782127, "learning_rate": 3.2841969107473103e-06, "loss": 0.9383, "step": 31320 }, { "epoch": 1.8933945730343869, "grad_norm": 0.5931635692804301, "learning_rate": 3.281068633907549e-06, "loss": 0.9117, "step": 31330 }, { "epoch": 1.8939989121895207, "grad_norm": 0.6270552248394079, "learning_rate": 3.277941119932473e-06, "loss": 0.8916, "step": 31340 }, { "epoch": 1.8946032513446545, "grad_norm": 0.7927137373887444, "learning_rate": 3.2748143702100733e-06, "loss": 0.9119, "step": 31350 }, { "epoch": 1.8952075904997885, "grad_norm": 0.8757437613610626, "learning_rate": 3.2716883861280095e-06, "loss": 0.9516, "step": 31360 }, { "epoch": 1.8958119296549223, "grad_norm": 0.8059562565636221, "learning_rate": 3.2685631690735987e-06, "loss": 0.9228, "step": 31370 }, { "epoch": 1.8964162688100563, "grad_norm": 0.876840530022881, "learning_rate": 3.2654387204338193e-06, "loss": 0.9386, "step": 31380 }, { "epoch": 1.8970206079651901, "grad_norm": 0.9382330964159984, "learning_rate": 3.262315041595303e-06, "loss": 0.92, "step": 31390 }, { "epoch": 1.897624947120324, "grad_norm": 0.5300094989498622, "learning_rate": 3.2591921339443467e-06, "loss": 0.9173, "step": 31400 }, { "epoch": 1.8982292862754577, "grad_norm": 0.5590988149399575, "learning_rate": 3.256069998866903e-06, "loss": 0.9247, "step": 31410 }, { "epoch": 1.8988336254305915, "grad_norm": 0.5378023720053102, "learning_rate": 3.2529486377485767e-06, "loss": 0.9448, "step": 31420 }, { "epoch": 1.8994379645857256, "grad_norm": 0.5165235671647236, "learning_rate": 3.249828051974637e-06, "loss": 0.9086, "step": 31430 }, { "epoch": 1.9000423037408594, "grad_norm": 0.5722286288813279, "learning_rate": 3.2467082429300047e-06, "loss": 0.9351, "step": 31440 }, { "epoch": 1.9006466428959934, "grad_norm": 0.5417172030527615, "learning_rate": 3.243589211999254e-06, "loss": 0.9089, "step": 31450 }, { "epoch": 1.9012509820511272, "grad_norm": 0.5927939206647315, "learning_rate": 3.240470960566618e-06, "loss": 0.9262, "step": 31460 }, { "epoch": 1.901855321206261, "grad_norm": 0.5226270742463195, "learning_rate": 3.2373534900159833e-06, "loss": 0.9084, "step": 31470 }, { "epoch": 1.9024596603613948, "grad_norm": 0.567034355947198, "learning_rate": 3.2342368017308855e-06, "loss": 0.9158, "step": 31480 }, { "epoch": 1.9030639995165286, "grad_norm": 0.5339493036133449, "learning_rate": 3.231120897094517e-06, "loss": 0.9234, "step": 31490 }, { "epoch": 1.9036683386716624, "grad_norm": 0.5726067936928292, "learning_rate": 3.2280057774897256e-06, "loss": 0.9208, "step": 31500 }, { "epoch": 1.9042726778267964, "grad_norm": 0.5207599374433414, "learning_rate": 3.224891444299001e-06, "loss": 0.8899, "step": 31510 }, { "epoch": 1.9048770169819302, "grad_norm": 0.5191165272309807, "learning_rate": 3.221777898904492e-06, "loss": 0.9287, "step": 31520 }, { "epoch": 1.9054813561370643, "grad_norm": 0.5486190737368021, "learning_rate": 3.2186651426879976e-06, "loss": 0.9298, "step": 31530 }, { "epoch": 1.906085695292198, "grad_norm": 0.5500060209410718, "learning_rate": 3.215553177030961e-06, "loss": 0.9397, "step": 31540 }, { "epoch": 1.9066900344473319, "grad_norm": 0.4900360459171041, "learning_rate": 3.2124420033144783e-06, "loss": 0.9346, "step": 31550 }, { "epoch": 1.9072943736024657, "grad_norm": 0.5677897663859682, "learning_rate": 3.2093316229192968e-06, "loss": 0.916, "step": 31560 }, { "epoch": 1.9078987127575995, "grad_norm": 0.571046731051788, "learning_rate": 3.2062220372258047e-06, "loss": 0.9402, "step": 31570 }, { "epoch": 1.9085030519127333, "grad_norm": 0.5675328164879303, "learning_rate": 3.2031132476140436e-06, "loss": 0.9332, "step": 31580 }, { "epoch": 1.9091073910678673, "grad_norm": 0.5550489226848586, "learning_rate": 3.2000052554636997e-06, "loss": 0.9312, "step": 31590 }, { "epoch": 1.9097117302230011, "grad_norm": 0.5598651286430216, "learning_rate": 3.1968980621541047e-06, "loss": 0.9067, "step": 31600 }, { "epoch": 1.9103160693781351, "grad_norm": 0.5435394896623816, "learning_rate": 3.1937916690642356e-06, "loss": 0.9021, "step": 31610 }, { "epoch": 1.910920408533269, "grad_norm": 0.5270720182040158, "learning_rate": 3.190686077572718e-06, "loss": 0.9449, "step": 31620 }, { "epoch": 1.9115247476884027, "grad_norm": 0.5531124347734957, "learning_rate": 3.1875812890578138e-06, "loss": 0.9117, "step": 31630 }, { "epoch": 1.9121290868435366, "grad_norm": 0.5626353241899941, "learning_rate": 3.1844773048974365e-06, "loss": 0.9048, "step": 31640 }, { "epoch": 1.9127334259986704, "grad_norm": 0.5357859581854699, "learning_rate": 3.181374126469141e-06, "loss": 0.9118, "step": 31650 }, { "epoch": 1.9133377651538042, "grad_norm": 0.5551032153901071, "learning_rate": 3.1782717551501195e-06, "loss": 0.9089, "step": 31660 }, { "epoch": 1.9139421043089382, "grad_norm": 0.5643872963285096, "learning_rate": 3.1751701923172117e-06, "loss": 0.9099, "step": 31670 }, { "epoch": 1.914546443464072, "grad_norm": 0.5475899852929832, "learning_rate": 3.1720694393468976e-06, "loss": 0.9397, "step": 31680 }, { "epoch": 1.915150782619206, "grad_norm": 0.5910932896915573, "learning_rate": 3.168969497615295e-06, "loss": 0.9064, "step": 31690 }, { "epoch": 1.9157551217743398, "grad_norm": 0.6582655237850477, "learning_rate": 3.165870368498164e-06, "loss": 0.9493, "step": 31700 }, { "epoch": 1.9163594609294736, "grad_norm": 0.5871662553314744, "learning_rate": 3.1627720533709028e-06, "loss": 0.8935, "step": 31710 }, { "epoch": 1.9169638000846074, "grad_norm": 0.6627514787628234, "learning_rate": 3.1596745536085503e-06, "loss": 0.9068, "step": 31720 }, { "epoch": 1.9175681392397412, "grad_norm": 0.6638914032720941, "learning_rate": 3.1565778705857804e-06, "loss": 0.8906, "step": 31730 }, { "epoch": 1.9181724783948753, "grad_norm": 0.6386018044940778, "learning_rate": 3.1534820056769066e-06, "loss": 0.9355, "step": 31740 }, { "epoch": 1.918776817550009, "grad_norm": 0.6921545337885953, "learning_rate": 3.1503869602558824e-06, "loss": 0.9241, "step": 31750 }, { "epoch": 1.919381156705143, "grad_norm": 0.7130650941805561, "learning_rate": 3.1472927356962885e-06, "loss": 0.9401, "step": 31760 }, { "epoch": 1.919985495860277, "grad_norm": 0.7041319097258212, "learning_rate": 3.1441993333713506e-06, "loss": 0.9269, "step": 31770 }, { "epoch": 1.9205898350154107, "grad_norm": 0.6685905670555268, "learning_rate": 3.1411067546539277e-06, "loss": 0.9088, "step": 31780 }, { "epoch": 1.9211941741705445, "grad_norm": 0.6793384253852527, "learning_rate": 3.1380150009165066e-06, "loss": 0.9367, "step": 31790 }, { "epoch": 1.9217985133256783, "grad_norm": 0.6417536972951596, "learning_rate": 3.1349240735312157e-06, "loss": 0.9253, "step": 31800 }, { "epoch": 1.922402852480812, "grad_norm": 0.6182334685408064, "learning_rate": 3.1318339738698166e-06, "loss": 0.9275, "step": 31810 }, { "epoch": 1.9230071916359461, "grad_norm": 0.6451272615634099, "learning_rate": 3.1287447033036955e-06, "loss": 0.9416, "step": 31820 }, { "epoch": 1.92361153079108, "grad_norm": 0.6708339279732021, "learning_rate": 3.125656263203879e-06, "loss": 0.9096, "step": 31830 }, { "epoch": 1.924215869946214, "grad_norm": 0.6310388425930847, "learning_rate": 3.122568654941022e-06, "loss": 0.9281, "step": 31840 }, { "epoch": 1.9248202091013478, "grad_norm": 0.8092513423955408, "learning_rate": 3.1194818798854094e-06, "loss": 0.9269, "step": 31850 }, { "epoch": 1.9254245482564816, "grad_norm": 0.8020465918270393, "learning_rate": 3.116395939406957e-06, "loss": 0.9286, "step": 31860 }, { "epoch": 1.9260288874116154, "grad_norm": 0.7906053954025566, "learning_rate": 3.113310834875213e-06, "loss": 0.9216, "step": 31870 }, { "epoch": 1.9266332265667492, "grad_norm": 0.8188340326221923, "learning_rate": 3.1102265676593485e-06, "loss": 0.9476, "step": 31880 }, { "epoch": 1.927237565721883, "grad_norm": 0.750355734497124, "learning_rate": 3.107143139128168e-06, "loss": 0.9321, "step": 31890 }, { "epoch": 1.927841904877017, "grad_norm": 0.7863238217817754, "learning_rate": 3.1040605506501053e-06, "loss": 0.9259, "step": 31900 }, { "epoch": 1.9284462440321508, "grad_norm": 0.741469798061842, "learning_rate": 3.100978803593213e-06, "loss": 0.9148, "step": 31910 }, { "epoch": 1.9290505831872848, "grad_norm": 0.7765148417313548, "learning_rate": 3.097897899325179e-06, "loss": 0.9407, "step": 31920 }, { "epoch": 1.9296549223424186, "grad_norm": 0.7856535721489776, "learning_rate": 3.0948178392133155e-06, "loss": 0.9345, "step": 31930 }, { "epoch": 1.9302592614975524, "grad_norm": 0.7946613756760031, "learning_rate": 3.091738624624554e-06, "loss": 0.9231, "step": 31940 }, { "epoch": 1.9308636006526863, "grad_norm": 1.128315483264186, "learning_rate": 3.088660256925459e-06, "loss": 0.9214, "step": 31950 }, { "epoch": 1.93146793980782, "grad_norm": 1.1836628003029233, "learning_rate": 3.0855827374822134e-06, "loss": 0.9404, "step": 31960 }, { "epoch": 1.9320722789629539, "grad_norm": 1.1618924226568106, "learning_rate": 3.0825060676606266e-06, "loss": 0.9083, "step": 31970 }, { "epoch": 1.9326766181180879, "grad_norm": 1.199156684298891, "learning_rate": 3.0794302488261283e-06, "loss": 0.9241, "step": 31980 }, { "epoch": 1.9332809572732217, "grad_norm": 1.2470218375580546, "learning_rate": 3.076355282343776e-06, "loss": 0.9297, "step": 31990 }, { "epoch": 1.9338852964283557, "grad_norm": 0.8216792201198375, "learning_rate": 3.0732811695782394e-06, "loss": 0.9244, "step": 32000 }, { "epoch": 1.9344896355834895, "grad_norm": 0.8290002407210196, "learning_rate": 3.0702079118938182e-06, "loss": 0.9312, "step": 32010 }, { "epoch": 1.9350939747386233, "grad_norm": 1.4219101536771561, "learning_rate": 3.06713551065443e-06, "loss": 0.9356, "step": 32020 }, { "epoch": 1.9356983138937571, "grad_norm": 0.8077893072465951, "learning_rate": 3.0640639672236123e-06, "loss": 0.9127, "step": 32030 }, { "epoch": 1.936302653048891, "grad_norm": 0.93829282607337, "learning_rate": 3.0609932829645185e-06, "loss": 0.9397, "step": 32040 }, { "epoch": 1.9369069922040247, "grad_norm": 2.394815372434293, "learning_rate": 3.0579234592399243e-06, "loss": 0.9344, "step": 32050 }, { "epoch": 1.9375113313591588, "grad_norm": 2.4282761480067343, "learning_rate": 3.0548544974122257e-06, "loss": 0.9401, "step": 32060 }, { "epoch": 1.9381156705142926, "grad_norm": 2.25535593102297, "learning_rate": 3.0517863988434294e-06, "loss": 0.9177, "step": 32070 }, { "epoch": 1.9387200096694266, "grad_norm": 2.348962629243723, "learning_rate": 3.0487191648951654e-06, "loss": 0.9266, "step": 32080 }, { "epoch": 1.9393243488245604, "grad_norm": 2.840143765784924, "learning_rate": 3.0456527969286757e-06, "loss": 0.9464, "step": 32090 }, { "epoch": 1.9399286879796942, "grad_norm": 0.875474637443051, "learning_rate": 3.042587296304821e-06, "loss": 0.9023, "step": 32100 }, { "epoch": 1.940533027134828, "grad_norm": 0.9977103761292808, "learning_rate": 3.0395226643840735e-06, "loss": 0.9262, "step": 32110 }, { "epoch": 1.9411373662899618, "grad_norm": 0.9233311417519634, "learning_rate": 3.0364589025265257e-06, "loss": 0.9267, "step": 32120 }, { "epoch": 1.9417417054450958, "grad_norm": 0.8686267646710731, "learning_rate": 3.0333960120918772e-06, "loss": 0.914, "step": 32130 }, { "epoch": 1.9423460446002296, "grad_norm": 0.9021681688215947, "learning_rate": 3.030333994439444e-06, "loss": 0.938, "step": 32140 }, { "epoch": 1.9429503837553637, "grad_norm": 0.897337222074805, "learning_rate": 3.0272728509281577e-06, "loss": 0.901, "step": 32150 }, { "epoch": 1.9435547229104975, "grad_norm": 0.9915272990335809, "learning_rate": 3.024212582916556e-06, "loss": 0.9353, "step": 32160 }, { "epoch": 1.9441590620656313, "grad_norm": 0.9920408114625213, "learning_rate": 3.0211531917627922e-06, "loss": 0.9181, "step": 32170 }, { "epoch": 1.944763401220765, "grad_norm": 0.922240495540258, "learning_rate": 3.0180946788246314e-06, "loss": 0.907, "step": 32180 }, { "epoch": 1.9453677403758989, "grad_norm": 0.8906856500303493, "learning_rate": 3.0150370454594425e-06, "loss": 0.9126, "step": 32190 }, { "epoch": 1.9459720795310327, "grad_norm": 1.0091078276857819, "learning_rate": 3.011980293024212e-06, "loss": 0.91, "step": 32200 }, { "epoch": 1.9465764186861667, "grad_norm": 0.9846864296048661, "learning_rate": 3.008924422875531e-06, "loss": 0.9033, "step": 32210 }, { "epoch": 1.9471807578413005, "grad_norm": 1.1174820841487947, "learning_rate": 3.0058694363695994e-06, "loss": 0.9246, "step": 32220 }, { "epoch": 1.9477850969964345, "grad_norm": 1.055839686448261, "learning_rate": 3.0028153348622247e-06, "loss": 0.9287, "step": 32230 }, { "epoch": 1.9483894361515683, "grad_norm": 0.9914626314915221, "learning_rate": 2.999762119708825e-06, "loss": 0.9239, "step": 32240 }, { "epoch": 1.9489937753067021, "grad_norm": 1.1239900727366239, "learning_rate": 2.996709792264419e-06, "loss": 0.9302, "step": 32250 }, { "epoch": 1.949598114461836, "grad_norm": 0.9709258613994516, "learning_rate": 2.993658353883636e-06, "loss": 0.9203, "step": 32260 }, { "epoch": 1.9502024536169698, "grad_norm": 0.9804946366958124, "learning_rate": 2.9906078059207126e-06, "loss": 0.9179, "step": 32270 }, { "epoch": 1.9508067927721036, "grad_norm": 1.0676886964074659, "learning_rate": 2.987558149729483e-06, "loss": 0.905, "step": 32280 }, { "epoch": 1.9514111319272376, "grad_norm": 0.9875442842327805, "learning_rate": 2.984509386663391e-06, "loss": 0.914, "step": 32290 }, { "epoch": 1.9520154710823714, "grad_norm": 1.059226065297373, "learning_rate": 2.9814615180754856e-06, "loss": 0.9078, "step": 32300 }, { "epoch": 1.9526198102375054, "grad_norm": 1.1558804797938624, "learning_rate": 2.9784145453184127e-06, "loss": 0.9257, "step": 32310 }, { "epoch": 1.9532241493926392, "grad_norm": 1.2244021868212303, "learning_rate": 2.9753684697444244e-06, "loss": 0.9269, "step": 32320 }, { "epoch": 1.953828488547773, "grad_norm": 1.106647973666506, "learning_rate": 2.972323292705378e-06, "loss": 0.9207, "step": 32330 }, { "epoch": 1.9544328277029068, "grad_norm": 1.1065831486996487, "learning_rate": 2.9692790155527228e-06, "loss": 0.9149, "step": 32340 }, { "epoch": 1.9550371668580406, "grad_norm": 0.9404833343662748, "learning_rate": 2.966235639637518e-06, "loss": 0.9361, "step": 32350 }, { "epoch": 1.9556415060131744, "grad_norm": 0.9973618253511899, "learning_rate": 2.9631931663104162e-06, "loss": 0.9395, "step": 32360 }, { "epoch": 1.9562458451683085, "grad_norm": 0.9324200023926333, "learning_rate": 2.9601515969216767e-06, "loss": 0.9041, "step": 32370 }, { "epoch": 1.9568501843234423, "grad_norm": 0.9260919412899182, "learning_rate": 2.957110932821148e-06, "loss": 0.9484, "step": 32380 }, { "epoch": 1.9574545234785763, "grad_norm": 1.0172926808066802, "learning_rate": 2.954071175358285e-06, "loss": 0.92, "step": 32390 }, { "epoch": 1.95805886263371, "grad_norm": 1.0559499410005995, "learning_rate": 2.9510323258821383e-06, "loss": 0.9218, "step": 32400 }, { "epoch": 1.958663201788844, "grad_norm": 1.1322889084296959, "learning_rate": 2.9479943857413506e-06, "loss": 0.9323, "step": 32410 }, { "epoch": 1.9592675409439777, "grad_norm": 1.0526214508943277, "learning_rate": 2.944957356284168e-06, "loss": 0.921, "step": 32420 }, { "epoch": 1.9598718800991115, "grad_norm": 1.0208651283529961, "learning_rate": 2.9419212388584305e-06, "loss": 0.9079, "step": 32430 }, { "epoch": 1.9604762192542455, "grad_norm": 1.0171728187289686, "learning_rate": 2.9388860348115684e-06, "loss": 0.9198, "step": 32440 }, { "epoch": 1.9610805584093793, "grad_norm": 0.99155183818389, "learning_rate": 2.935851745490612e-06, "loss": 0.9116, "step": 32450 }, { "epoch": 1.9616848975645134, "grad_norm": 0.9673745494452112, "learning_rate": 2.9328183722421865e-06, "loss": 0.9281, "step": 32460 }, { "epoch": 1.9622892367196472, "grad_norm": 0.9474513139669102, "learning_rate": 2.9297859164125047e-06, "loss": 0.9152, "step": 32470 }, { "epoch": 1.962893575874781, "grad_norm": 1.0083598815518564, "learning_rate": 2.926754379347378e-06, "loss": 0.9249, "step": 32480 }, { "epoch": 1.9634979150299148, "grad_norm": 1.003871754276492, "learning_rate": 2.9237237623922064e-06, "loss": 0.9322, "step": 32490 }, { "epoch": 1.9641022541850486, "grad_norm": 1.1358619924346611, "learning_rate": 2.920694066891984e-06, "loss": 0.9181, "step": 32500 }, { "epoch": 1.9647065933401824, "grad_norm": 1.1846682303453873, "learning_rate": 2.9176652941912924e-06, "loss": 0.919, "step": 32510 }, { "epoch": 1.9653109324953164, "grad_norm": 1.1408791980114923, "learning_rate": 2.9146374456343106e-06, "loss": 0.9302, "step": 32520 }, { "epoch": 1.9659152716504502, "grad_norm": 1.1349605319128238, "learning_rate": 2.9116105225647982e-06, "loss": 0.9402, "step": 32530 }, { "epoch": 1.9665196108055842, "grad_norm": 1.1148278751903036, "learning_rate": 2.90858452632611e-06, "loss": 0.9158, "step": 32540 }, { "epoch": 1.967123949960718, "grad_norm": 1.1477944399757234, "learning_rate": 2.9055594582611922e-06, "loss": 0.9219, "step": 32550 }, { "epoch": 1.9677282891158518, "grad_norm": 1.0187724455338298, "learning_rate": 2.9025353197125703e-06, "loss": 0.9258, "step": 32560 }, { "epoch": 1.9683326282709857, "grad_norm": 1.0752010407910595, "learning_rate": 2.899512112022364e-06, "loss": 0.9244, "step": 32570 }, { "epoch": 1.9689369674261195, "grad_norm": 1.100695413139785, "learning_rate": 2.8964898365322803e-06, "loss": 0.9364, "step": 32580 }, { "epoch": 1.9695413065812533, "grad_norm": 1.137557064747928, "learning_rate": 2.8934684945836055e-06, "loss": 0.898, "step": 32590 }, { "epoch": 1.9701456457363873, "grad_norm": 1.2278894678868826, "learning_rate": 2.890448087517219e-06, "loss": 0.931, "step": 32600 }, { "epoch": 1.970749984891521, "grad_norm": 1.107111899524758, "learning_rate": 2.8874286166735853e-06, "loss": 0.9035, "step": 32610 }, { "epoch": 1.9713543240466551, "grad_norm": 1.1943072974599098, "learning_rate": 2.884410083392745e-06, "loss": 0.9118, "step": 32620 }, { "epoch": 1.971958663201789, "grad_norm": 1.1005789652895215, "learning_rate": 2.881392489014332e-06, "loss": 0.9114, "step": 32630 }, { "epoch": 1.9725630023569227, "grad_norm": 1.1998377559811042, "learning_rate": 2.8783758348775605e-06, "loss": 0.8738, "step": 32640 }, { "epoch": 1.9731673415120565, "grad_norm": 1.4153646027691513, "learning_rate": 2.875360122321224e-06, "loss": 0.9223, "step": 32650 }, { "epoch": 1.9737716806671903, "grad_norm": 1.3440989087855313, "learning_rate": 2.872345352683704e-06, "loss": 0.9398, "step": 32660 }, { "epoch": 1.9743760198223241, "grad_norm": 1.3037362096791534, "learning_rate": 2.869331527302957e-06, "loss": 0.9232, "step": 32670 }, { "epoch": 1.9749803589774582, "grad_norm": 1.3995213591528144, "learning_rate": 2.866318647516527e-06, "loss": 0.9195, "step": 32680 }, { "epoch": 1.975584698132592, "grad_norm": 1.5130545928590327, "learning_rate": 2.8633067146615325e-06, "loss": 0.9323, "step": 32690 }, { "epoch": 1.976189037287726, "grad_norm": 0.7913265420283543, "learning_rate": 2.860295730074676e-06, "loss": 0.9215, "step": 32700 }, { "epoch": 1.9767933764428598, "grad_norm": 0.8435315743548842, "learning_rate": 2.857285695092239e-06, "loss": 0.923, "step": 32710 }, { "epoch": 1.9773977155979936, "grad_norm": 0.8185880430242493, "learning_rate": 2.8542766110500765e-06, "loss": 0.9248, "step": 32720 }, { "epoch": 1.9780020547531274, "grad_norm": 0.8836776245724742, "learning_rate": 2.8512684792836267e-06, "loss": 0.915, "step": 32730 }, { "epoch": 1.9786063939082612, "grad_norm": 0.8340818409642348, "learning_rate": 2.848261301127907e-06, "loss": 0.9122, "step": 32740 }, { "epoch": 1.979210733063395, "grad_norm": 0.7854443586623282, "learning_rate": 2.8452550779175024e-06, "loss": 0.9056, "step": 32750 }, { "epoch": 1.979815072218529, "grad_norm": 0.7529096818552463, "learning_rate": 2.842249810986584e-06, "loss": 0.933, "step": 32760 }, { "epoch": 1.980419411373663, "grad_norm": 0.7652955543004387, "learning_rate": 2.8392455016688948e-06, "loss": 0.9283, "step": 32770 }, { "epoch": 1.9810237505287969, "grad_norm": 0.7798261430277603, "learning_rate": 2.8362421512977483e-06, "loss": 0.9265, "step": 32780 }, { "epoch": 1.9816280896839307, "grad_norm": 0.7764552270940536, "learning_rate": 2.83323976120604e-06, "loss": 0.9463, "step": 32790 }, { "epoch": 1.9822324288390645, "grad_norm": 0.7727308479756552, "learning_rate": 2.830238332726236e-06, "loss": 0.9165, "step": 32800 }, { "epoch": 1.9828367679941983, "grad_norm": 0.8486144258989697, "learning_rate": 2.8272378671903732e-06, "loss": 0.9136, "step": 32810 }, { "epoch": 1.983441107149332, "grad_norm": 0.7747073893964028, "learning_rate": 2.824238365930064e-06, "loss": 0.9222, "step": 32820 }, { "epoch": 1.984045446304466, "grad_norm": 0.8308668580416605, "learning_rate": 2.8212398302764945e-06, "loss": 0.9349, "step": 32830 }, { "epoch": 1.9846497854596, "grad_norm": 0.9216706197598951, "learning_rate": 2.818242261560416e-06, "loss": 0.9387, "step": 32840 }, { "epoch": 1.985254124614734, "grad_norm": 0.6535766427815558, "learning_rate": 2.815245661112157e-06, "loss": 0.9105, "step": 32850 }, { "epoch": 1.9858584637698677, "grad_norm": 0.5853580374722734, "learning_rate": 2.812250030261615e-06, "loss": 0.9029, "step": 32860 }, { "epoch": 1.9864628029250015, "grad_norm": 0.5444700147799568, "learning_rate": 2.809255370338254e-06, "loss": 0.9296, "step": 32870 }, { "epoch": 1.9870671420801354, "grad_norm": 0.609405547699801, "learning_rate": 2.8062616826711087e-06, "loss": 0.9414, "step": 32880 }, { "epoch": 1.9876714812352692, "grad_norm": 0.5727461239091808, "learning_rate": 2.8032689685887866e-06, "loss": 0.9269, "step": 32890 }, { "epoch": 1.988275820390403, "grad_norm": 0.6674327437341456, "learning_rate": 2.8002772294194545e-06, "loss": 0.9202, "step": 32900 }, { "epoch": 1.988880159545537, "grad_norm": 0.687543998492259, "learning_rate": 2.797286466490854e-06, "loss": 0.9396, "step": 32910 }, { "epoch": 1.9894844987006708, "grad_norm": 0.6250925155005832, "learning_rate": 2.7942966811302917e-06, "loss": 0.9112, "step": 32920 }, { "epoch": 1.9900888378558048, "grad_norm": 0.6006309650330449, "learning_rate": 2.7913078746646374e-06, "loss": 0.9172, "step": 32930 }, { "epoch": 1.9906931770109386, "grad_norm": 0.5888906987637169, "learning_rate": 2.78832004842033e-06, "loss": 0.9433, "step": 32940 }, { "epoch": 1.9912975161660724, "grad_norm": 0.8400342315851975, "learning_rate": 2.78533320372337e-06, "loss": 0.9066, "step": 32950 }, { "epoch": 1.9919018553212062, "grad_norm": 0.8195413255021915, "learning_rate": 2.7823473418993276e-06, "loss": 0.9111, "step": 32960 }, { "epoch": 1.99250619447634, "grad_norm": 0.7897071085945362, "learning_rate": 2.7793624642733284e-06, "loss": 0.9022, "step": 32970 }, { "epoch": 1.9931105336314738, "grad_norm": 0.8736510745012994, "learning_rate": 2.776378572170071e-06, "loss": 0.9081, "step": 32980 }, { "epoch": 1.9937148727866079, "grad_norm": 0.8179960109206366, "learning_rate": 2.7733956669138085e-06, "loss": 0.9204, "step": 32990 }, { "epoch": 1.9943192119417417, "grad_norm": 0.5443723734672266, "learning_rate": 2.77041374982836e-06, "loss": 0.9338, "step": 33000 }, { "epoch": 1.9949235510968757, "grad_norm": 0.5681284356864479, "learning_rate": 2.7674328222371085e-06, "loss": 0.9135, "step": 33010 }, { "epoch": 1.9955278902520095, "grad_norm": 0.5791545706207375, "learning_rate": 2.7644528854629904e-06, "loss": 0.9029, "step": 33020 }, { "epoch": 1.9961322294071433, "grad_norm": 0.5180478658915624, "learning_rate": 2.7614739408285084e-06, "loss": 0.9349, "step": 33030 }, { "epoch": 1.996736568562277, "grad_norm": 0.5630728117922584, "learning_rate": 2.758495989655724e-06, "loss": 0.9332, "step": 33040 }, { "epoch": 1.997340907717411, "grad_norm": 0.5768387302422116, "learning_rate": 2.7555190332662585e-06, "loss": 0.9387, "step": 33050 }, { "epoch": 1.9979452468725447, "grad_norm": 0.5318731060149366, "learning_rate": 2.752543072981286e-06, "loss": 0.9529, "step": 33060 }, { "epoch": 1.9985495860276787, "grad_norm": 0.5904056298317066, "learning_rate": 2.749568110121545e-06, "loss": 0.9411, "step": 33070 }, { "epoch": 1.9991539251828125, "grad_norm": 0.5724855952547209, "learning_rate": 2.746594146007331e-06, "loss": 0.9129, "step": 33080 }, { "epoch": 1.9997582643379466, "grad_norm": 0.5828335575439317, "learning_rate": 2.7436211819584914e-06, "loss": 0.9302, "step": 33090 }, { "epoch": 2.0003626034930804, "grad_norm": 0.5234494648789862, "learning_rate": 2.740649219294433e-06, "loss": 0.9076, "step": 33100 }, { "epoch": 2.000966942648214, "grad_norm": 0.5628551805580915, "learning_rate": 2.737678259334121e-06, "loss": 0.8893, "step": 33110 }, { "epoch": 2.001571281803348, "grad_norm": 0.5467440707166451, "learning_rate": 2.7347083033960688e-06, "loss": 0.8621, "step": 33120 }, { "epoch": 2.002175620958482, "grad_norm": 0.5192012510442349, "learning_rate": 2.731739352798348e-06, "loss": 0.8876, "step": 33130 }, { "epoch": 2.0027799601136156, "grad_norm": 0.5528029113129463, "learning_rate": 2.728771408858589e-06, "loss": 0.8903, "step": 33140 }, { "epoch": 2.00338429926875, "grad_norm": 0.5347676345274723, "learning_rate": 2.725804472893964e-06, "loss": 0.8721, "step": 33150 }, { "epoch": 2.0039886384238836, "grad_norm": 0.5321168639117014, "learning_rate": 2.7228385462212076e-06, "loss": 0.9036, "step": 33160 }, { "epoch": 2.0045929775790174, "grad_norm": 0.5365723743891695, "learning_rate": 2.7198736301566044e-06, "loss": 0.8944, "step": 33170 }, { "epoch": 2.0051973167341512, "grad_norm": 0.5045542509670169, "learning_rate": 2.716909726015985e-06, "loss": 0.8903, "step": 33180 }, { "epoch": 2.005801655889285, "grad_norm": 0.5539428151647481, "learning_rate": 2.7139468351147396e-06, "loss": 0.8869, "step": 33190 }, { "epoch": 2.006405995044419, "grad_norm": 0.5481305980648615, "learning_rate": 2.7109849587678005e-06, "loss": 0.8765, "step": 33200 }, { "epoch": 2.0070103341995527, "grad_norm": 0.5247873972754932, "learning_rate": 2.708024098289657e-06, "loss": 0.8677, "step": 33210 }, { "epoch": 2.0076146733546865, "grad_norm": 0.5199961803702887, "learning_rate": 2.7050642549943407e-06, "loss": 0.867, "step": 33220 }, { "epoch": 2.0082190125098207, "grad_norm": 0.546377177548141, "learning_rate": 2.702105430195438e-06, "loss": 0.8812, "step": 33230 }, { "epoch": 2.0088233516649545, "grad_norm": 0.5367156471007768, "learning_rate": 2.6991476252060776e-06, "loss": 0.8928, "step": 33240 }, { "epoch": 2.0094276908200883, "grad_norm": 0.5662789050973269, "learning_rate": 2.6961908413389392e-06, "loss": 0.8831, "step": 33250 }, { "epoch": 2.010032029975222, "grad_norm": 0.5645438196801373, "learning_rate": 2.693235079906252e-06, "loss": 0.9111, "step": 33260 }, { "epoch": 2.010636369130356, "grad_norm": 0.5568149281941507, "learning_rate": 2.690280342219783e-06, "loss": 0.8841, "step": 33270 }, { "epoch": 2.0112407082854897, "grad_norm": 0.5602450264231862, "learning_rate": 2.6873266295908517e-06, "loss": 0.8919, "step": 33280 }, { "epoch": 2.0118450474406235, "grad_norm": 0.5643730607117805, "learning_rate": 2.684373943330324e-06, "loss": 0.9105, "step": 33290 }, { "epoch": 2.0124493865957573, "grad_norm": 0.6433892215861963, "learning_rate": 2.6814222847486016e-06, "loss": 0.8791, "step": 33300 }, { "epoch": 2.0130537257508916, "grad_norm": 0.621168110091114, "learning_rate": 2.6784716551556377e-06, "loss": 0.9081, "step": 33310 }, { "epoch": 2.0136580649060254, "grad_norm": 0.639073527533395, "learning_rate": 2.6755220558609308e-06, "loss": 0.8562, "step": 33320 }, { "epoch": 2.014262404061159, "grad_norm": 0.6400031150970298, "learning_rate": 2.672573488173512e-06, "loss": 0.9029, "step": 33330 }, { "epoch": 2.014866743216293, "grad_norm": 0.6280523070689569, "learning_rate": 2.669625953401963e-06, "loss": 0.8873, "step": 33340 }, { "epoch": 2.015471082371427, "grad_norm": 0.7477962293475895, "learning_rate": 2.6666794528544083e-06, "loss": 0.8924, "step": 33350 }, { "epoch": 2.0160754215265606, "grad_norm": 0.6903162201229224, "learning_rate": 2.663733987838504e-06, "loss": 0.8944, "step": 33360 }, { "epoch": 2.0166797606816944, "grad_norm": 0.7261448503178681, "learning_rate": 2.660789559661455e-06, "loss": 0.8977, "step": 33370 }, { "epoch": 2.017284099836828, "grad_norm": 0.6786592217617367, "learning_rate": 2.6578461696300043e-06, "loss": 0.895, "step": 33380 }, { "epoch": 2.0178884389919625, "grad_norm": 0.6412472848510454, "learning_rate": 2.654903819050435e-06, "loss": 0.8645, "step": 33390 }, { "epoch": 2.0184927781470963, "grad_norm": 0.6444920737505094, "learning_rate": 2.651962509228563e-06, "loss": 0.9081, "step": 33400 }, { "epoch": 2.01909711730223, "grad_norm": 0.5933777225042867, "learning_rate": 2.6490222414697497e-06, "loss": 0.8892, "step": 33410 }, { "epoch": 2.019701456457364, "grad_norm": 0.6306811462696658, "learning_rate": 2.646083017078891e-06, "loss": 0.9064, "step": 33420 }, { "epoch": 2.0203057956124977, "grad_norm": 0.6293017800076647, "learning_rate": 2.643144837360418e-06, "loss": 0.8565, "step": 33430 }, { "epoch": 2.0209101347676315, "grad_norm": 0.6105091733849285, "learning_rate": 2.640207703618302e-06, "loss": 0.8871, "step": 33440 }, { "epoch": 2.0215144739227653, "grad_norm": 0.8366734512635472, "learning_rate": 2.6372716171560455e-06, "loss": 0.884, "step": 33450 }, { "epoch": 2.0221188130778995, "grad_norm": 0.829892759850703, "learning_rate": 2.634336579276692e-06, "loss": 0.8935, "step": 33460 }, { "epoch": 2.0227231522330333, "grad_norm": 0.7751375567901571, "learning_rate": 2.6314025912828113e-06, "loss": 0.9244, "step": 33470 }, { "epoch": 2.023327491388167, "grad_norm": 0.7849102813434207, "learning_rate": 2.6284696544765176e-06, "loss": 0.9112, "step": 33480 }, { "epoch": 2.023931830543301, "grad_norm": 0.8071163824775153, "learning_rate": 2.62553777015945e-06, "loss": 0.8801, "step": 33490 }, { "epoch": 2.0245361696984348, "grad_norm": 0.8377023058225088, "learning_rate": 2.6226069396327836e-06, "loss": 0.9004, "step": 33500 }, { "epoch": 2.0251405088535686, "grad_norm": 0.7428017653108226, "learning_rate": 2.6196771641972298e-06, "loss": 0.8914, "step": 33510 }, { "epoch": 2.0257448480087024, "grad_norm": 0.8355728375626714, "learning_rate": 2.6167484451530234e-06, "loss": 0.8964, "step": 33520 }, { "epoch": 2.026349187163836, "grad_norm": 0.788687173689808, "learning_rate": 2.613820783799936e-06, "loss": 0.8756, "step": 33530 }, { "epoch": 2.0269535263189704, "grad_norm": 0.7985096227824965, "learning_rate": 2.610894181437273e-06, "loss": 0.881, "step": 33540 }, { "epoch": 2.027557865474104, "grad_norm": 1.2744338711871706, "learning_rate": 2.6079686393638592e-06, "loss": 0.8874, "step": 33550 }, { "epoch": 2.028162204629238, "grad_norm": 1.2668373590520514, "learning_rate": 2.6050441588780584e-06, "loss": 0.8756, "step": 33560 }, { "epoch": 2.028766543784372, "grad_norm": 1.249836076359573, "learning_rate": 2.602120741277762e-06, "loss": 0.8883, "step": 33570 }, { "epoch": 2.0293708829395056, "grad_norm": 1.1500137432809445, "learning_rate": 2.599198387860383e-06, "loss": 0.8656, "step": 33580 }, { "epoch": 2.0299752220946394, "grad_norm": 1.225715597692489, "learning_rate": 2.596277099922869e-06, "loss": 0.8868, "step": 33590 }, { "epoch": 2.0305795612497732, "grad_norm": 0.7786545909646788, "learning_rate": 2.5933568787616953e-06, "loss": 0.8695, "step": 33600 }, { "epoch": 2.031183900404907, "grad_norm": 1.0826214339964257, "learning_rate": 2.5904377256728565e-06, "loss": 0.897, "step": 33610 }, { "epoch": 2.0317882395600413, "grad_norm": 1.0644701152236413, "learning_rate": 2.58751964195188e-06, "loss": 0.9061, "step": 33620 }, { "epoch": 2.032392578715175, "grad_norm": 0.7746719706115537, "learning_rate": 2.584602628893819e-06, "loss": 0.9032, "step": 33630 }, { "epoch": 2.032996917870309, "grad_norm": 0.9177230023310055, "learning_rate": 2.5816866877932433e-06, "loss": 0.9111, "step": 33640 }, { "epoch": 2.0336012570254427, "grad_norm": 2.3606877000249202, "learning_rate": 2.5787718199442555e-06, "loss": 0.9004, "step": 33650 }, { "epoch": 2.0342055961805765, "grad_norm": 2.444030987959464, "learning_rate": 2.5758580266404808e-06, "loss": 0.894, "step": 33660 }, { "epoch": 2.0348099353357103, "grad_norm": 2.2691657418387385, "learning_rate": 2.5729453091750617e-06, "loss": 0.8995, "step": 33670 }, { "epoch": 2.035414274490844, "grad_norm": 2.536120913511076, "learning_rate": 2.570033668840669e-06, "loss": 0.8879, "step": 33680 }, { "epoch": 2.036018613645978, "grad_norm": 2.4728535889157865, "learning_rate": 2.567123106929497e-06, "loss": 0.8919, "step": 33690 }, { "epoch": 2.036622952801112, "grad_norm": 0.9213739755567852, "learning_rate": 2.5642136247332517e-06, "loss": 0.8649, "step": 33700 }, { "epoch": 2.037227291956246, "grad_norm": 0.9047894836272231, "learning_rate": 2.5613052235431712e-06, "loss": 0.8937, "step": 33710 }, { "epoch": 2.0378316311113798, "grad_norm": 0.9934360767452772, "learning_rate": 2.5583979046500063e-06, "loss": 0.8887, "step": 33720 }, { "epoch": 2.0384359702665136, "grad_norm": 0.9132731047293965, "learning_rate": 2.5554916693440323e-06, "loss": 0.8796, "step": 33730 }, { "epoch": 2.0390403094216474, "grad_norm": 0.906634365101205, "learning_rate": 2.552586518915038e-06, "loss": 0.9015, "step": 33740 }, { "epoch": 2.039644648576781, "grad_norm": 1.0039848914728786, "learning_rate": 2.549682454652337e-06, "loss": 0.8686, "step": 33750 }, { "epoch": 2.040248987731915, "grad_norm": 0.9960613291996244, "learning_rate": 2.5467794778447586e-06, "loss": 0.8727, "step": 33760 }, { "epoch": 2.0408533268870492, "grad_norm": 1.0491001038892025, "learning_rate": 2.543877589780646e-06, "loss": 0.8899, "step": 33770 }, { "epoch": 2.041457666042183, "grad_norm": 1.019584010266302, "learning_rate": 2.5409767917478633e-06, "loss": 0.8816, "step": 33780 }, { "epoch": 2.042062005197317, "grad_norm": 0.9578093684340653, "learning_rate": 2.538077085033792e-06, "loss": 0.8759, "step": 33790 }, { "epoch": 2.0426663443524506, "grad_norm": 1.1118673861825716, "learning_rate": 2.535178470925323e-06, "loss": 0.8812, "step": 33800 }, { "epoch": 2.0432706835075845, "grad_norm": 1.0435113535243707, "learning_rate": 2.532280950708868e-06, "loss": 0.8941, "step": 33810 }, { "epoch": 2.0438750226627183, "grad_norm": 1.0694698018499207, "learning_rate": 2.5293845256703533e-06, "loss": 0.8945, "step": 33820 }, { "epoch": 2.044479361817852, "grad_norm": 1.1059337705513197, "learning_rate": 2.5264891970952137e-06, "loss": 0.8945, "step": 33830 }, { "epoch": 2.045083700972986, "grad_norm": 1.0589327270025761, "learning_rate": 2.5235949662684027e-06, "loss": 0.8863, "step": 33840 }, { "epoch": 2.04568804012812, "grad_norm": 1.0755303399925493, "learning_rate": 2.520701834474387e-06, "loss": 0.8662, "step": 33850 }, { "epoch": 2.046292379283254, "grad_norm": 1.1532568856177698, "learning_rate": 2.517809802997139e-06, "loss": 0.9012, "step": 33860 }, { "epoch": 2.0468967184383877, "grad_norm": 1.0807277105029338, "learning_rate": 2.51491887312015e-06, "loss": 0.8672, "step": 33870 }, { "epoch": 2.0475010575935215, "grad_norm": 1.002487627145067, "learning_rate": 2.5120290461264213e-06, "loss": 0.873, "step": 33880 }, { "epoch": 2.0481053967486553, "grad_norm": 1.0167593572073805, "learning_rate": 2.509140323298459e-06, "loss": 0.8977, "step": 33890 }, { "epoch": 2.048709735903789, "grad_norm": 1.1223595145458065, "learning_rate": 2.5062527059182853e-06, "loss": 0.8774, "step": 33900 }, { "epoch": 2.049314075058923, "grad_norm": 1.101351498470469, "learning_rate": 2.5033661952674315e-06, "loss": 0.8822, "step": 33910 }, { "epoch": 2.0499184142140567, "grad_norm": 1.0782198581993534, "learning_rate": 2.500480792626932e-06, "loss": 0.8993, "step": 33920 }, { "epoch": 2.050522753369191, "grad_norm": 1.162541636524804, "learning_rate": 2.4975964992773357e-06, "loss": 0.887, "step": 33930 }, { "epoch": 2.051127092524325, "grad_norm": 1.1544992755469565, "learning_rate": 2.4947133164986987e-06, "loss": 0.882, "step": 33940 }, { "epoch": 2.0517314316794586, "grad_norm": 1.02484742504229, "learning_rate": 2.491831245570579e-06, "loss": 0.8888, "step": 33950 }, { "epoch": 2.0523357708345924, "grad_norm": 1.005488902778665, "learning_rate": 2.4889502877720477e-06, "loss": 0.8949, "step": 33960 }, { "epoch": 2.052940109989726, "grad_norm": 0.9549913290537595, "learning_rate": 2.4860704443816753e-06, "loss": 0.8779, "step": 33970 }, { "epoch": 2.05354444914486, "grad_norm": 1.0144438246472387, "learning_rate": 2.4831917166775453e-06, "loss": 0.9049, "step": 33980 }, { "epoch": 2.054148788299994, "grad_norm": 0.9934721776547466, "learning_rate": 2.480314105937238e-06, "loss": 0.9205, "step": 33990 }, { "epoch": 2.0547531274551276, "grad_norm": 1.1612390010795213, "learning_rate": 2.477437613437846e-06, "loss": 0.9014, "step": 34000 }, { "epoch": 2.055357466610262, "grad_norm": 1.1732583877051292, "learning_rate": 2.4745622404559572e-06, "loss": 0.9087, "step": 34010 }, { "epoch": 2.0559618057653957, "grad_norm": 1.136773696330377, "learning_rate": 2.4716879882676692e-06, "loss": 0.9088, "step": 34020 }, { "epoch": 2.0565661449205295, "grad_norm": 1.0098662837013497, "learning_rate": 2.468814858148582e-06, "loss": 0.8616, "step": 34030 }, { "epoch": 2.0571704840756633, "grad_norm": 1.0702287416437863, "learning_rate": 2.465942851373792e-06, "loss": 0.8859, "step": 34040 }, { "epoch": 2.057774823230797, "grad_norm": 1.1198816440791115, "learning_rate": 2.4630719692179027e-06, "loss": 0.9049, "step": 34050 }, { "epoch": 2.058379162385931, "grad_norm": 1.0496563566526718, "learning_rate": 2.4602022129550163e-06, "loss": 0.8962, "step": 34060 }, { "epoch": 2.0589835015410647, "grad_norm": 1.0561577415803982, "learning_rate": 2.4573335838587374e-06, "loss": 0.8999, "step": 34070 }, { "epoch": 2.0595878406961985, "grad_norm": 1.0572024573012317, "learning_rate": 2.454466083202165e-06, "loss": 0.8979, "step": 34080 }, { "epoch": 2.0601921798513327, "grad_norm": 1.0238078496867966, "learning_rate": 2.4515997122579022e-06, "loss": 0.8915, "step": 34090 }, { "epoch": 2.0607965190064665, "grad_norm": 1.23372882607432, "learning_rate": 2.4487344722980523e-06, "loss": 0.8691, "step": 34100 }, { "epoch": 2.0614008581616003, "grad_norm": 1.1909270512188104, "learning_rate": 2.4458703645942082e-06, "loss": 0.8984, "step": 34110 }, { "epoch": 2.062005197316734, "grad_norm": 1.267788850990393, "learning_rate": 2.4430073904174687e-06, "loss": 0.9125, "step": 34120 }, { "epoch": 2.062609536471868, "grad_norm": 1.296894589043979, "learning_rate": 2.4401455510384285e-06, "loss": 0.8903, "step": 34130 }, { "epoch": 2.0632138756270018, "grad_norm": 1.1630203149076703, "learning_rate": 2.4372848477271737e-06, "loss": 0.8723, "step": 34140 }, { "epoch": 2.0638182147821356, "grad_norm": 1.081595331338194, "learning_rate": 2.4344252817532897e-06, "loss": 0.8787, "step": 34150 }, { "epoch": 2.06442255393727, "grad_norm": 1.0969701820912907, "learning_rate": 2.43156685438586e-06, "loss": 0.9216, "step": 34160 }, { "epoch": 2.0650268930924036, "grad_norm": 1.0998530233601764, "learning_rate": 2.4287095668934553e-06, "loss": 0.8803, "step": 34170 }, { "epoch": 2.0656312322475374, "grad_norm": 1.1125106531216604, "learning_rate": 2.4258534205441458e-06, "loss": 0.888, "step": 34180 }, { "epoch": 2.0662355714026712, "grad_norm": 1.062084138335021, "learning_rate": 2.422998416605498e-06, "loss": 0.9056, "step": 34190 }, { "epoch": 2.066839910557805, "grad_norm": 1.2819693410705182, "learning_rate": 2.4201445563445613e-06, "loss": 0.8901, "step": 34200 }, { "epoch": 2.067444249712939, "grad_norm": 1.287427389826221, "learning_rate": 2.417291841027889e-06, "loss": 0.9035, "step": 34210 }, { "epoch": 2.0680485888680726, "grad_norm": 1.2114528892128853, "learning_rate": 2.414440271921517e-06, "loss": 0.9046, "step": 34220 }, { "epoch": 2.0686529280232064, "grad_norm": 1.224477279906845, "learning_rate": 2.4115898502909805e-06, "loss": 0.8811, "step": 34230 }, { "epoch": 2.0692572671783407, "grad_norm": 1.2633808837607525, "learning_rate": 2.408740577401298e-06, "loss": 0.9019, "step": 34240 }, { "epoch": 2.0698616063334745, "grad_norm": 1.4904601859270488, "learning_rate": 2.405892454516984e-06, "loss": 0.8796, "step": 34250 }, { "epoch": 2.0704659454886083, "grad_norm": 1.4584899747748776, "learning_rate": 2.403045482902039e-06, "loss": 0.8947, "step": 34260 }, { "epoch": 2.071070284643742, "grad_norm": 1.5586887362112432, "learning_rate": 2.4001996638199534e-06, "loss": 0.8916, "step": 34270 }, { "epoch": 2.071674623798876, "grad_norm": 1.5127390082420138, "learning_rate": 2.3973549985337092e-06, "loss": 0.9092, "step": 34280 }, { "epoch": 2.0722789629540097, "grad_norm": 1.4118918722534175, "learning_rate": 2.3945114883057707e-06, "loss": 0.8828, "step": 34290 }, { "epoch": 2.0728833021091435, "grad_norm": 0.942857580073176, "learning_rate": 2.3916691343980936e-06, "loss": 0.8985, "step": 34300 }, { "epoch": 2.0734876412642773, "grad_norm": 0.8738297772014342, "learning_rate": 2.3888279380721217e-06, "loss": 0.8711, "step": 34310 }, { "epoch": 2.0740919804194116, "grad_norm": 0.9276469481384416, "learning_rate": 2.3859879005887792e-06, "loss": 0.8812, "step": 34320 }, { "epoch": 2.0746963195745454, "grad_norm": 0.9057986511621184, "learning_rate": 2.3831490232084812e-06, "loss": 0.9055, "step": 34330 }, { "epoch": 2.075300658729679, "grad_norm": 0.9343820841731831, "learning_rate": 2.3803113071911283e-06, "loss": 0.9118, "step": 34340 }, { "epoch": 2.075904997884813, "grad_norm": 0.8443879267785902, "learning_rate": 2.3774747537960995e-06, "loss": 0.8846, "step": 34350 }, { "epoch": 2.076509337039947, "grad_norm": 0.8045289642936068, "learning_rate": 2.374639364282265e-06, "loss": 0.8741, "step": 34360 }, { "epoch": 2.0771136761950806, "grad_norm": 0.8861799181137933, "learning_rate": 2.3718051399079765e-06, "loss": 0.9018, "step": 34370 }, { "epoch": 2.0777180153502144, "grad_norm": 0.8179872100872368, "learning_rate": 2.368972081931064e-06, "loss": 0.8882, "step": 34380 }, { "epoch": 2.078322354505348, "grad_norm": 0.8141840998347964, "learning_rate": 2.3661401916088463e-06, "loss": 0.882, "step": 34390 }, { "epoch": 2.0789266936604824, "grad_norm": 0.8730579412882683, "learning_rate": 2.3633094701981195e-06, "loss": 0.8807, "step": 34400 }, { "epoch": 2.0795310328156162, "grad_norm": 0.8680190170134608, "learning_rate": 2.360479918955166e-06, "loss": 0.9077, "step": 34410 }, { "epoch": 2.08013537197075, "grad_norm": 0.8948758943263898, "learning_rate": 2.3576515391357404e-06, "loss": 0.8825, "step": 34420 }, { "epoch": 2.080739711125884, "grad_norm": 0.8817060767926409, "learning_rate": 2.354824331995085e-06, "loss": 0.9152, "step": 34430 }, { "epoch": 2.0813440502810177, "grad_norm": 0.8115647503714777, "learning_rate": 2.351998298787922e-06, "loss": 0.8594, "step": 34440 }, { "epoch": 2.0819483894361515, "grad_norm": 0.6287151130532005, "learning_rate": 2.3491734407684445e-06, "loss": 0.8864, "step": 34450 }, { "epoch": 2.0825527285912853, "grad_norm": 0.6523360667762224, "learning_rate": 2.346349759190332e-06, "loss": 0.8833, "step": 34460 }, { "epoch": 2.083157067746419, "grad_norm": 0.6381124356689684, "learning_rate": 2.343527255306741e-06, "loss": 0.8829, "step": 34470 }, { "epoch": 2.0837614069015533, "grad_norm": 0.609362180472817, "learning_rate": 2.340705930370302e-06, "loss": 0.8946, "step": 34480 }, { "epoch": 2.084365746056687, "grad_norm": 0.6220008590249745, "learning_rate": 2.3378857856331212e-06, "loss": 0.866, "step": 34490 }, { "epoch": 2.084970085211821, "grad_norm": 0.6599735606133348, "learning_rate": 2.335066822346788e-06, "loss": 0.9025, "step": 34500 }, { "epoch": 2.0855744243669547, "grad_norm": 0.6549383329723182, "learning_rate": 2.332249041762359e-06, "loss": 0.88, "step": 34510 }, { "epoch": 2.0861787635220885, "grad_norm": 0.6316181413129887, "learning_rate": 2.3294324451303723e-06, "loss": 0.8542, "step": 34520 }, { "epoch": 2.0867831026772223, "grad_norm": 0.7063560196363463, "learning_rate": 2.32661703370084e-06, "loss": 0.8755, "step": 34530 }, { "epoch": 2.087387441832356, "grad_norm": 0.6888164321034947, "learning_rate": 2.323802808723242e-06, "loss": 0.8887, "step": 34540 }, { "epoch": 2.0879917809874904, "grad_norm": 0.8676496105652605, "learning_rate": 2.3209897714465394e-06, "loss": 0.8867, "step": 34550 }, { "epoch": 2.088596120142624, "grad_norm": 0.8836167392541048, "learning_rate": 2.318177923119164e-06, "loss": 0.872, "step": 34560 }, { "epoch": 2.089200459297758, "grad_norm": 0.8751056678380355, "learning_rate": 2.3153672649890157e-06, "loss": 0.8689, "step": 34570 }, { "epoch": 2.089804798452892, "grad_norm": 0.8946280319058736, "learning_rate": 2.312557798303471e-06, "loss": 0.886, "step": 34580 }, { "epoch": 2.0904091376080256, "grad_norm": 0.8520897938908295, "learning_rate": 2.3097495243093777e-06, "loss": 0.8828, "step": 34590 }, { "epoch": 2.0910134767631594, "grad_norm": 0.5982091311112707, "learning_rate": 2.3069424442530487e-06, "loss": 0.906, "step": 34600 }, { "epoch": 2.091617815918293, "grad_norm": 0.584204158757196, "learning_rate": 2.304136559380273e-06, "loss": 0.8645, "step": 34610 }, { "epoch": 2.092222155073427, "grad_norm": 0.6134684130382023, "learning_rate": 2.30133187093631e-06, "loss": 0.8838, "step": 34620 }, { "epoch": 2.0928264942285613, "grad_norm": 0.5829245271987381, "learning_rate": 2.2985283801658797e-06, "loss": 0.904, "step": 34630 }, { "epoch": 2.093430833383695, "grad_norm": 0.6092198710457203, "learning_rate": 2.2957260883131795e-06, "loss": 0.8722, "step": 34640 }, { "epoch": 2.094035172538829, "grad_norm": 0.5747940891469823, "learning_rate": 2.292924996621872e-06, "loss": 0.8808, "step": 34650 }, { "epoch": 2.0946395116939627, "grad_norm": 0.6177267529143505, "learning_rate": 2.2901251063350834e-06, "loss": 0.8722, "step": 34660 }, { "epoch": 2.0952438508490965, "grad_norm": 0.5893416430517816, "learning_rate": 2.2873264186954115e-06, "loss": 0.887, "step": 34670 }, { "epoch": 2.0958481900042303, "grad_norm": 0.6093742520217635, "learning_rate": 2.2845289349449208e-06, "loss": 0.8737, "step": 34680 }, { "epoch": 2.096452529159364, "grad_norm": 0.5943031808818438, "learning_rate": 2.2817326563251353e-06, "loss": 0.8789, "step": 34690 }, { "epoch": 2.097056868314498, "grad_norm": 0.5829180628244706, "learning_rate": 2.2789375840770502e-06, "loss": 0.887, "step": 34700 }, { "epoch": 2.097661207469632, "grad_norm": 0.5635717878567686, "learning_rate": 2.2761437194411233e-06, "loss": 0.8804, "step": 34710 }, { "epoch": 2.098265546624766, "grad_norm": 0.5653942999983343, "learning_rate": 2.2733510636572787e-06, "loss": 0.8903, "step": 34720 }, { "epoch": 2.0988698857798997, "grad_norm": 0.6849553168792376, "learning_rate": 2.2705596179648974e-06, "loss": 0.8962, "step": 34730 }, { "epoch": 2.0994742249350336, "grad_norm": 0.5926090645034051, "learning_rate": 2.2677693836028326e-06, "loss": 0.8993, "step": 34740 }, { "epoch": 2.1000785640901674, "grad_norm": 0.6006080839306511, "learning_rate": 2.26498036180939e-06, "loss": 0.8497, "step": 34750 }, { "epoch": 2.100682903245301, "grad_norm": 0.6345567413536951, "learning_rate": 2.262192553822347e-06, "loss": 0.8802, "step": 34760 }, { "epoch": 2.101287242400435, "grad_norm": 0.5882297662846905, "learning_rate": 2.259405960878934e-06, "loss": 0.9012, "step": 34770 }, { "epoch": 2.101891581555569, "grad_norm": 0.5951227148797412, "learning_rate": 2.2566205842158484e-06, "loss": 0.8796, "step": 34780 }, { "epoch": 2.102495920710703, "grad_norm": 0.5980703035207874, "learning_rate": 2.253836425069242e-06, "loss": 0.8707, "step": 34790 }, { "epoch": 2.103100259865837, "grad_norm": 0.5884591143374369, "learning_rate": 2.251053484674732e-06, "loss": 0.8852, "step": 34800 }, { "epoch": 2.1037045990209706, "grad_norm": 0.600476775170353, "learning_rate": 2.2482717642673924e-06, "loss": 0.8713, "step": 34810 }, { "epoch": 2.1043089381761044, "grad_norm": 0.603379914327248, "learning_rate": 2.245491265081752e-06, "loss": 0.8797, "step": 34820 }, { "epoch": 2.1049132773312382, "grad_norm": 0.5745195459892538, "learning_rate": 2.242711988351805e-06, "loss": 0.9032, "step": 34830 }, { "epoch": 2.105517616486372, "grad_norm": 0.5580419534985216, "learning_rate": 2.239933935310998e-06, "loss": 0.8726, "step": 34840 }, { "epoch": 2.106121955641506, "grad_norm": 0.5902441602622466, "learning_rate": 2.237157107192235e-06, "loss": 0.8939, "step": 34850 }, { "epoch": 2.10672629479664, "grad_norm": 0.6086353032737659, "learning_rate": 2.2343815052278767e-06, "loss": 0.88, "step": 34860 }, { "epoch": 2.107330633951774, "grad_norm": 0.6058424196575252, "learning_rate": 2.2316071306497426e-06, "loss": 0.88, "step": 34870 }, { "epoch": 2.1079349731069077, "grad_norm": 0.6119039244035755, "learning_rate": 2.228833984689101e-06, "loss": 0.9177, "step": 34880 }, { "epoch": 2.1085393122620415, "grad_norm": 0.5851167193550637, "learning_rate": 2.2260620685766814e-06, "loss": 0.8856, "step": 34890 }, { "epoch": 2.1091436514171753, "grad_norm": 0.6488738094814008, "learning_rate": 2.2232913835426658e-06, "loss": 0.9006, "step": 34900 }, { "epoch": 2.109747990572309, "grad_norm": 0.7054329810556527, "learning_rate": 2.2205219308166858e-06, "loss": 0.8914, "step": 34910 }, { "epoch": 2.110352329727443, "grad_norm": 0.6907042944017625, "learning_rate": 2.2177537116278306e-06, "loss": 0.8717, "step": 34920 }, { "epoch": 2.1109566688825767, "grad_norm": 0.6617661198131073, "learning_rate": 2.214986727204644e-06, "loss": 0.8978, "step": 34930 }, { "epoch": 2.111561008037711, "grad_norm": 0.7010738125662763, "learning_rate": 2.2122209787751125e-06, "loss": 0.8847, "step": 34940 }, { "epoch": 2.1121653471928448, "grad_norm": 0.7493688535112966, "learning_rate": 2.2094564675666836e-06, "loss": 0.8846, "step": 34950 }, { "epoch": 2.1127696863479786, "grad_norm": 0.7392628770817403, "learning_rate": 2.206693194806253e-06, "loss": 0.8885, "step": 34960 }, { "epoch": 2.1133740255031124, "grad_norm": 0.7175873337179419, "learning_rate": 2.2039311617201627e-06, "loss": 0.8825, "step": 34970 }, { "epoch": 2.113978364658246, "grad_norm": 0.7198687173415584, "learning_rate": 2.201170369534209e-06, "loss": 0.8987, "step": 34980 }, { "epoch": 2.11458270381338, "grad_norm": 0.7177763240792459, "learning_rate": 2.198410819473638e-06, "loss": 0.8855, "step": 34990 }, { "epoch": 2.115187042968514, "grad_norm": 0.6489087987318607, "learning_rate": 2.19565251276314e-06, "loss": 0.8662, "step": 35000 }, { "epoch": 2.1157913821236476, "grad_norm": 0.6837499991695534, "learning_rate": 2.1928954506268595e-06, "loss": 0.8978, "step": 35010 }, { "epoch": 2.116395721278782, "grad_norm": 0.7071408791622961, "learning_rate": 2.1901396342883813e-06, "loss": 0.8871, "step": 35020 }, { "epoch": 2.1170000604339156, "grad_norm": 0.6910637024332569, "learning_rate": 2.1873850649707455e-06, "loss": 0.897, "step": 35030 }, { "epoch": 2.1176043995890494, "grad_norm": 0.6640508129953245, "learning_rate": 2.1846317438964305e-06, "loss": 0.9004, "step": 35040 }, { "epoch": 2.1182087387441833, "grad_norm": 0.8775869764246423, "learning_rate": 2.1818796722873675e-06, "loss": 0.8889, "step": 35050 }, { "epoch": 2.118813077899317, "grad_norm": 0.8289244115055131, "learning_rate": 2.1791288513649316e-06, "loss": 0.8961, "step": 35060 }, { "epoch": 2.119417417054451, "grad_norm": 0.8565348925654535, "learning_rate": 2.176379282349939e-06, "loss": 0.9148, "step": 35070 }, { "epoch": 2.1200217562095847, "grad_norm": 0.858558783533691, "learning_rate": 2.173630966462654e-06, "loss": 0.8817, "step": 35080 }, { "epoch": 2.1206260953647185, "grad_norm": 0.8202094397338572, "learning_rate": 2.170883904922787e-06, "loss": 0.8831, "step": 35090 }, { "epoch": 2.1212304345198527, "grad_norm": 0.8272451174395579, "learning_rate": 2.1681380989494845e-06, "loss": 0.8676, "step": 35100 }, { "epoch": 2.1218347736749865, "grad_norm": 0.7990505840913317, "learning_rate": 2.1653935497613406e-06, "loss": 0.8951, "step": 35110 }, { "epoch": 2.1224391128301203, "grad_norm": 0.8059993974861749, "learning_rate": 2.1626502585763935e-06, "loss": 0.8892, "step": 35120 }, { "epoch": 2.123043451985254, "grad_norm": 0.8208193456155347, "learning_rate": 2.159908226612118e-06, "loss": 0.8762, "step": 35130 }, { "epoch": 2.123647791140388, "grad_norm": 0.8260571564340111, "learning_rate": 2.1571674550854327e-06, "loss": 0.8734, "step": 35140 }, { "epoch": 2.1242521302955217, "grad_norm": 1.188727260442727, "learning_rate": 2.1544279452126994e-06, "loss": 0.8925, "step": 35150 }, { "epoch": 2.1248564694506555, "grad_norm": 1.2985667446071452, "learning_rate": 2.151689698209713e-06, "loss": 0.8922, "step": 35160 }, { "epoch": 2.12546080860579, "grad_norm": 1.2490945899209687, "learning_rate": 2.148952715291715e-06, "loss": 0.9058, "step": 35170 }, { "epoch": 2.1260651477609236, "grad_norm": 1.252217327619332, "learning_rate": 2.1462169976733844e-06, "loss": 0.8681, "step": 35180 }, { "epoch": 2.1266694869160574, "grad_norm": 1.292078563896392, "learning_rate": 2.143482546568833e-06, "loss": 0.8904, "step": 35190 }, { "epoch": 2.127273826071191, "grad_norm": 1.0783379426951425, "learning_rate": 2.140749363191618e-06, "loss": 0.9092, "step": 35200 }, { "epoch": 2.127878165226325, "grad_norm": 1.1343899003506481, "learning_rate": 2.138017448754732e-06, "loss": 0.8855, "step": 35210 }, { "epoch": 2.128482504381459, "grad_norm": 1.148027935851704, "learning_rate": 2.1352868044705998e-06, "loss": 0.8948, "step": 35220 }, { "epoch": 2.1290868435365926, "grad_norm": 1.037062192508951, "learning_rate": 2.1325574315510877e-06, "loss": 0.9043, "step": 35230 }, { "epoch": 2.1296911826917264, "grad_norm": 1.0306703816982747, "learning_rate": 2.129829331207498e-06, "loss": 0.8828, "step": 35240 }, { "epoch": 2.1302955218468607, "grad_norm": 2.443198515554324, "learning_rate": 2.127102504650563e-06, "loss": 0.8912, "step": 35250 }, { "epoch": 2.1308998610019945, "grad_norm": 2.471623746885376, "learning_rate": 2.1243769530904564e-06, "loss": 0.9089, "step": 35260 }, { "epoch": 2.1315042001571283, "grad_norm": 2.4917903575177496, "learning_rate": 2.1216526777367778e-06, "loss": 0.8595, "step": 35270 }, { "epoch": 2.132108539312262, "grad_norm": 2.352940743333999, "learning_rate": 2.1189296797985707e-06, "loss": 0.8812, "step": 35280 }, { "epoch": 2.132712878467396, "grad_norm": 2.5801568056401427, "learning_rate": 2.1162079604843023e-06, "loss": 0.8844, "step": 35290 }, { "epoch": 2.1333172176225297, "grad_norm": 0.9694831430766778, "learning_rate": 2.113487521001879e-06, "loss": 0.8973, "step": 35300 }, { "epoch": 2.1339215567776635, "grad_norm": 0.9794414191024234, "learning_rate": 2.1107683625586335e-06, "loss": 0.89, "step": 35310 }, { "epoch": 2.1345258959327973, "grad_norm": 1.0859209140806365, "learning_rate": 2.1080504863613345e-06, "loss": 0.8736, "step": 35320 }, { "epoch": 2.1351302350879315, "grad_norm": 0.9369050892777468, "learning_rate": 2.105333893616182e-06, "loss": 0.8775, "step": 35330 }, { "epoch": 2.1357345742430653, "grad_norm": 0.9046471684755009, "learning_rate": 2.102618585528802e-06, "loss": 0.8821, "step": 35340 }, { "epoch": 2.136338913398199, "grad_norm": 0.979791704621407, "learning_rate": 2.0999045633042527e-06, "loss": 0.8746, "step": 35350 }, { "epoch": 2.136943252553333, "grad_norm": 1.0094618054026403, "learning_rate": 2.097191828147024e-06, "loss": 0.8865, "step": 35360 }, { "epoch": 2.1375475917084668, "grad_norm": 1.0224876271721504, "learning_rate": 2.094480381261031e-06, "loss": 0.8979, "step": 35370 }, { "epoch": 2.1381519308636006, "grad_norm": 1.0160810745856237, "learning_rate": 2.0917702238496174e-06, "loss": 0.8887, "step": 35380 }, { "epoch": 2.1387562700187344, "grad_norm": 1.0269846979152855, "learning_rate": 2.0890613571155577e-06, "loss": 0.892, "step": 35390 }, { "epoch": 2.1393606091738686, "grad_norm": 1.0152712709937863, "learning_rate": 2.0863537822610525e-06, "loss": 0.9, "step": 35400 }, { "epoch": 2.1399649483290024, "grad_norm": 1.0511487372225052, "learning_rate": 2.0836475004877243e-06, "loss": 0.8701, "step": 35410 }, { "epoch": 2.140569287484136, "grad_norm": 1.2066776744248506, "learning_rate": 2.080942512996628e-06, "loss": 0.8761, "step": 35420 }, { "epoch": 2.14117362663927, "grad_norm": 1.0498445939232066, "learning_rate": 2.0782388209882436e-06, "loss": 0.8938, "step": 35430 }, { "epoch": 2.141777965794404, "grad_norm": 1.0460228250232422, "learning_rate": 2.0755364256624697e-06, "loss": 0.9017, "step": 35440 }, { "epoch": 2.1423823049495376, "grad_norm": 1.102782905117123, "learning_rate": 2.072835328218637e-06, "loss": 0.8755, "step": 35450 }, { "epoch": 2.1429866441046714, "grad_norm": 1.0861099682857072, "learning_rate": 2.070135529855498e-06, "loss": 0.8727, "step": 35460 }, { "epoch": 2.1435909832598052, "grad_norm": 1.045015745250695, "learning_rate": 2.067437031771225e-06, "loss": 0.9005, "step": 35470 }, { "epoch": 2.144195322414939, "grad_norm": 1.1578892261007294, "learning_rate": 2.0647398351634173e-06, "loss": 0.8935, "step": 35480 }, { "epoch": 2.1447996615700733, "grad_norm": 1.1193433874703707, "learning_rate": 2.062043941229097e-06, "loss": 0.8957, "step": 35490 }, { "epoch": 2.145404000725207, "grad_norm": 1.2570433934283072, "learning_rate": 2.0593493511647033e-06, "loss": 0.9045, "step": 35500 }, { "epoch": 2.146008339880341, "grad_norm": 1.189471684351895, "learning_rate": 2.0566560661661034e-06, "loss": 0.9022, "step": 35510 }, { "epoch": 2.1466126790354747, "grad_norm": 1.1179705995987261, "learning_rate": 2.0539640874285783e-06, "loss": 0.8733, "step": 35520 }, { "epoch": 2.1472170181906085, "grad_norm": 1.2253792804387909, "learning_rate": 2.051273416146836e-06, "loss": 0.9032, "step": 35530 }, { "epoch": 2.1478213573457423, "grad_norm": 1.1463727472970437, "learning_rate": 2.048584053514998e-06, "loss": 0.8923, "step": 35540 }, { "epoch": 2.148425696500876, "grad_norm": 1.011497982764017, "learning_rate": 2.045896000726611e-06, "loss": 0.8925, "step": 35550 }, { "epoch": 2.1490300356560104, "grad_norm": 1.0391118031559496, "learning_rate": 2.0432092589746343e-06, "loss": 0.8798, "step": 35560 }, { "epoch": 2.149634374811144, "grad_norm": 1.0260494795038764, "learning_rate": 2.04052382945145e-06, "loss": 0.8806, "step": 35570 }, { "epoch": 2.150238713966278, "grad_norm": 1.0343086600007603, "learning_rate": 2.0378397133488583e-06, "loss": 0.861, "step": 35580 }, { "epoch": 2.1508430531214118, "grad_norm": 0.9777778462235185, "learning_rate": 2.0351569118580707e-06, "loss": 0.8888, "step": 35590 }, { "epoch": 2.1514473922765456, "grad_norm": 1.0429594046718926, "learning_rate": 2.032475426169721e-06, "loss": 0.8786, "step": 35600 }, { "epoch": 2.1520517314316794, "grad_norm": 1.0806821193356724, "learning_rate": 2.0297952574738587e-06, "loss": 0.8897, "step": 35610 }, { "epoch": 2.152656070586813, "grad_norm": 1.0914073512978002, "learning_rate": 2.0271164069599446e-06, "loss": 0.8781, "step": 35620 }, { "epoch": 2.153260409741947, "grad_norm": 1.1159188438262881, "learning_rate": 2.024438875816858e-06, "loss": 0.8784, "step": 35630 }, { "epoch": 2.1538647488970812, "grad_norm": 1.017125102181146, "learning_rate": 2.0217626652328937e-06, "loss": 0.8812, "step": 35640 }, { "epoch": 2.154469088052215, "grad_norm": 1.0499298551121643, "learning_rate": 2.019087776395756e-06, "loss": 0.8917, "step": 35650 }, { "epoch": 2.155073427207349, "grad_norm": 1.087448243552565, "learning_rate": 2.016414210492565e-06, "loss": 0.8841, "step": 35660 }, { "epoch": 2.1556777663624827, "grad_norm": 1.0237217579134834, "learning_rate": 2.013741968709857e-06, "loss": 0.9002, "step": 35670 }, { "epoch": 2.1562821055176165, "grad_norm": 1.0033457295406665, "learning_rate": 2.0110710522335737e-06, "loss": 0.8871, "step": 35680 }, { "epoch": 2.1568864446727503, "grad_norm": 1.0562248760644997, "learning_rate": 2.008401462249074e-06, "loss": 0.8781, "step": 35690 }, { "epoch": 2.157490783827884, "grad_norm": 1.1886738581592653, "learning_rate": 2.0057331999411277e-06, "loss": 0.8887, "step": 35700 }, { "epoch": 2.158095122983018, "grad_norm": 1.2908916142027507, "learning_rate": 2.0030662664939123e-06, "loss": 0.8745, "step": 35710 }, { "epoch": 2.158699462138152, "grad_norm": 1.236655812966903, "learning_rate": 2.000400663091018e-06, "loss": 0.8862, "step": 35720 }, { "epoch": 2.159303801293286, "grad_norm": 1.2233466413012823, "learning_rate": 1.997736390915444e-06, "loss": 0.8987, "step": 35730 }, { "epoch": 2.1599081404484197, "grad_norm": 1.269474755309957, "learning_rate": 1.995073451149601e-06, "loss": 0.8662, "step": 35740 }, { "epoch": 2.1605124796035535, "grad_norm": 1.1562498483985872, "learning_rate": 1.992411844975303e-06, "loss": 0.8848, "step": 35750 }, { "epoch": 2.1611168187586873, "grad_norm": 1.0306184405875465, "learning_rate": 1.9897515735737776e-06, "loss": 0.8704, "step": 35760 }, { "epoch": 2.161721157913821, "grad_norm": 1.185211482651265, "learning_rate": 1.9870926381256563e-06, "loss": 0.9017, "step": 35770 }, { "epoch": 2.162325497068955, "grad_norm": 1.1722510370499983, "learning_rate": 1.9844350398109813e-06, "loss": 0.8911, "step": 35780 }, { "epoch": 2.162929836224089, "grad_norm": 1.0806332237913934, "learning_rate": 1.981778779809196e-06, "loss": 0.9087, "step": 35790 }, { "epoch": 2.163534175379223, "grad_norm": 1.29751618661275, "learning_rate": 1.979123859299157e-06, "loss": 0.9006, "step": 35800 }, { "epoch": 2.164138514534357, "grad_norm": 1.2403192627717603, "learning_rate": 1.976470279459119e-06, "loss": 0.9099, "step": 35810 }, { "epoch": 2.1647428536894906, "grad_norm": 1.2601712753650698, "learning_rate": 1.9738180414667462e-06, "loss": 0.8912, "step": 35820 }, { "epoch": 2.1653471928446244, "grad_norm": 1.2293630561367561, "learning_rate": 1.9711671464991094e-06, "loss": 0.8708, "step": 35830 }, { "epoch": 2.165951531999758, "grad_norm": 1.3115604141213448, "learning_rate": 1.9685175957326757e-06, "loss": 0.8968, "step": 35840 }, { "epoch": 2.166555871154892, "grad_norm": 1.3863014066553783, "learning_rate": 1.965869390343323e-06, "loss": 0.8795, "step": 35850 }, { "epoch": 2.167160210310026, "grad_norm": 1.5049387392185003, "learning_rate": 1.9632225315063302e-06, "loss": 0.9095, "step": 35860 }, { "epoch": 2.1677645494651596, "grad_norm": 1.5260768768434285, "learning_rate": 1.9605770203963754e-06, "loss": 0.8952, "step": 35870 }, { "epoch": 2.168368888620294, "grad_norm": 1.4635600908834379, "learning_rate": 1.957932858187541e-06, "loss": 0.8764, "step": 35880 }, { "epoch": 2.1689732277754277, "grad_norm": 1.5583928916014775, "learning_rate": 1.955290046053314e-06, "loss": 0.8696, "step": 35890 }, { "epoch": 2.1695775669305615, "grad_norm": 0.9236971476454152, "learning_rate": 1.9526485851665755e-06, "loss": 0.8734, "step": 35900 }, { "epoch": 2.1701819060856953, "grad_norm": 0.9411413085299765, "learning_rate": 1.950008476699611e-06, "loss": 0.904, "step": 35910 }, { "epoch": 2.170786245240829, "grad_norm": 0.896966845381397, "learning_rate": 1.947369721824108e-06, "loss": 0.8965, "step": 35920 }, { "epoch": 2.171390584395963, "grad_norm": 0.9013676365513028, "learning_rate": 1.9447323217111456e-06, "loss": 0.8783, "step": 35930 }, { "epoch": 2.1719949235510967, "grad_norm": 0.9459254683936665, "learning_rate": 1.9420962775312095e-06, "loss": 0.8958, "step": 35940 }, { "epoch": 2.172599262706231, "grad_norm": 0.8271281170881429, "learning_rate": 1.939461590454182e-06, "loss": 0.8932, "step": 35950 }, { "epoch": 2.1732036018613647, "grad_norm": 0.8605198446006623, "learning_rate": 1.9368282616493377e-06, "loss": 0.8852, "step": 35960 }, { "epoch": 2.1738079410164985, "grad_norm": 0.8469156772830047, "learning_rate": 1.9341962922853554e-06, "loss": 0.8766, "step": 35970 }, { "epoch": 2.1744122801716324, "grad_norm": 0.8252595662449176, "learning_rate": 1.9315656835303074e-06, "loss": 0.8877, "step": 35980 }, { "epoch": 2.175016619326766, "grad_norm": 0.8067127772748928, "learning_rate": 1.928936436551661e-06, "loss": 0.887, "step": 35990 }, { "epoch": 2.1756209584819, "grad_norm": 0.9187445385367577, "learning_rate": 1.926308552516281e-06, "loss": 0.8866, "step": 36000 }, { "epoch": 2.1762252976370338, "grad_norm": 0.8328957201464536, "learning_rate": 1.9236820325904293e-06, "loss": 0.8924, "step": 36010 }, { "epoch": 2.1768296367921676, "grad_norm": 0.8257160681123686, "learning_rate": 1.9210568779397554e-06, "loss": 0.8867, "step": 36020 }, { "epoch": 2.177433975947302, "grad_norm": 0.8645447246584572, "learning_rate": 1.9184330897293117e-06, "loss": 0.871, "step": 36030 }, { "epoch": 2.1780383151024356, "grad_norm": 0.8949221317083683, "learning_rate": 1.9158106691235368e-06, "loss": 0.8901, "step": 36040 }, { "epoch": 2.1786426542575694, "grad_norm": 0.6738737702382099, "learning_rate": 1.913189617286268e-06, "loss": 0.8578, "step": 36050 }, { "epoch": 2.1792469934127032, "grad_norm": 0.606883174927583, "learning_rate": 1.9105699353807293e-06, "loss": 0.8807, "step": 36060 }, { "epoch": 2.179851332567837, "grad_norm": 0.6781936866183955, "learning_rate": 1.9079516245695425e-06, "loss": 0.8804, "step": 36070 }, { "epoch": 2.180455671722971, "grad_norm": 0.6492750059631206, "learning_rate": 1.9053346860147193e-06, "loss": 0.8844, "step": 36080 }, { "epoch": 2.1810600108781046, "grad_norm": 0.6279252365467862, "learning_rate": 1.9027191208776586e-06, "loss": 0.8936, "step": 36090 }, { "epoch": 2.1816643500332384, "grad_norm": 0.6969525667350275, "learning_rate": 1.9001049303191537e-06, "loss": 0.8891, "step": 36100 }, { "epoch": 2.1822686891883727, "grad_norm": 0.6642236673433908, "learning_rate": 1.8974921154993898e-06, "loss": 0.892, "step": 36110 }, { "epoch": 2.1828730283435065, "grad_norm": 0.7019656482815692, "learning_rate": 1.8948806775779337e-06, "loss": 0.8801, "step": 36120 }, { "epoch": 2.1834773674986403, "grad_norm": 0.6793730750545127, "learning_rate": 1.892270617713749e-06, "loss": 0.8739, "step": 36130 }, { "epoch": 2.184081706653774, "grad_norm": 0.6654197545780667, "learning_rate": 1.889661937065186e-06, "loss": 0.8927, "step": 36140 }, { "epoch": 2.184686045808908, "grad_norm": 0.8536512125544913, "learning_rate": 1.8870546367899773e-06, "loss": 0.8843, "step": 36150 }, { "epoch": 2.1852903849640417, "grad_norm": 0.9011878101054452, "learning_rate": 1.8844487180452502e-06, "loss": 0.8835, "step": 36160 }, { "epoch": 2.1858947241191755, "grad_norm": 0.9387183601169968, "learning_rate": 1.8818441819875172e-06, "loss": 0.8852, "step": 36170 }, { "epoch": 2.1864990632743098, "grad_norm": 0.9327607919395237, "learning_rate": 1.8792410297726726e-06, "loss": 0.8864, "step": 36180 }, { "epoch": 2.1871034024294436, "grad_norm": 0.8755628055035676, "learning_rate": 1.876639262556001e-06, "loss": 0.8733, "step": 36190 }, { "epoch": 2.1877077415845774, "grad_norm": 0.6338858720646463, "learning_rate": 1.8740388814921728e-06, "loss": 0.8686, "step": 36200 }, { "epoch": 2.188312080739711, "grad_norm": 0.6630273915354651, "learning_rate": 1.8714398877352392e-06, "loss": 0.8881, "step": 36210 }, { "epoch": 2.188916419894845, "grad_norm": 0.6159520225542523, "learning_rate": 1.8688422824386388e-06, "loss": 0.8839, "step": 36220 }, { "epoch": 2.189520759049979, "grad_norm": 0.6385636877440544, "learning_rate": 1.8662460667551958e-06, "loss": 0.8831, "step": 36230 }, { "epoch": 2.1901250982051126, "grad_norm": 0.6467284846128416, "learning_rate": 1.8636512418371105e-06, "loss": 0.8855, "step": 36240 }, { "epoch": 2.1907294373602464, "grad_norm": 0.5977882139510758, "learning_rate": 1.8610578088359731e-06, "loss": 0.8963, "step": 36250 }, { "epoch": 2.1913337765153806, "grad_norm": 0.6083594760850606, "learning_rate": 1.8584657689027558e-06, "loss": 0.9004, "step": 36260 }, { "epoch": 2.1919381156705144, "grad_norm": 0.6018744711703098, "learning_rate": 1.8558751231878064e-06, "loss": 0.9022, "step": 36270 }, { "epoch": 2.1925424548256482, "grad_norm": 0.564524932783717, "learning_rate": 1.8532858728408592e-06, "loss": 0.8963, "step": 36280 }, { "epoch": 2.193146793980782, "grad_norm": 0.5897282314184277, "learning_rate": 1.8506980190110307e-06, "loss": 0.8824, "step": 36290 }, { "epoch": 2.193751133135916, "grad_norm": 0.617904741544674, "learning_rate": 1.8481115628468105e-06, "loss": 0.8809, "step": 36300 }, { "epoch": 2.1943554722910497, "grad_norm": 0.6179938951967063, "learning_rate": 1.845526505496076e-06, "loss": 0.8952, "step": 36310 }, { "epoch": 2.1949598114461835, "grad_norm": 0.6379393710503313, "learning_rate": 1.8429428481060769e-06, "loss": 0.8773, "step": 36320 }, { "epoch": 2.1955641506013173, "grad_norm": 0.5857841852551722, "learning_rate": 1.840360591823448e-06, "loss": 0.903, "step": 36330 }, { "epoch": 2.1961684897564515, "grad_norm": 0.5669331872883006, "learning_rate": 1.8377797377941953e-06, "loss": 0.8837, "step": 36340 }, { "epoch": 2.1967728289115853, "grad_norm": 0.5885944120463181, "learning_rate": 1.8352002871637104e-06, "loss": 0.8986, "step": 36350 }, { "epoch": 2.197377168066719, "grad_norm": 0.5931377916438505, "learning_rate": 1.8326222410767536e-06, "loss": 0.9044, "step": 36360 }, { "epoch": 2.197981507221853, "grad_norm": 0.5748942824991078, "learning_rate": 1.8300456006774687e-06, "loss": 0.8794, "step": 36370 }, { "epoch": 2.1985858463769867, "grad_norm": 0.6105965524242724, "learning_rate": 1.827470367109374e-06, "loss": 0.9033, "step": 36380 }, { "epoch": 2.1991901855321205, "grad_norm": 0.5917392433348321, "learning_rate": 1.8248965415153602e-06, "loss": 0.8853, "step": 36390 }, { "epoch": 2.1997945246872543, "grad_norm": 0.5775980237344719, "learning_rate": 1.822324125037696e-06, "loss": 0.8947, "step": 36400 }, { "epoch": 2.200398863842388, "grad_norm": 0.5873058355473882, "learning_rate": 1.8197531188180256e-06, "loss": 0.9063, "step": 36410 }, { "epoch": 2.2010032029975224, "grad_norm": 0.5701127562582692, "learning_rate": 1.8171835239973667e-06, "loss": 0.8796, "step": 36420 }, { "epoch": 2.201607542152656, "grad_norm": 0.5825694048979243, "learning_rate": 1.8146153417161067e-06, "loss": 0.8782, "step": 36430 }, { "epoch": 2.20221188130779, "grad_norm": 0.5885962454219273, "learning_rate": 1.8120485731140107e-06, "loss": 0.8796, "step": 36440 }, { "epoch": 2.202816220462924, "grad_norm": 0.6351575672730985, "learning_rate": 1.8094832193302175e-06, "loss": 0.899, "step": 36450 }, { "epoch": 2.2034205596180576, "grad_norm": 0.6352902982756858, "learning_rate": 1.8069192815032311e-06, "loss": 0.8768, "step": 36460 }, { "epoch": 2.2040248987731914, "grad_norm": 0.6743008800834629, "learning_rate": 1.804356760770934e-06, "loss": 0.8969, "step": 36470 }, { "epoch": 2.204629237928325, "grad_norm": 0.6510438100399902, "learning_rate": 1.8017956582705787e-06, "loss": 0.8915, "step": 36480 }, { "epoch": 2.205233577083459, "grad_norm": 0.5813652587460086, "learning_rate": 1.7992359751387834e-06, "loss": 0.8706, "step": 36490 }, { "epoch": 2.2058379162385933, "grad_norm": 0.6953466438171458, "learning_rate": 1.7966777125115415e-06, "loss": 0.8721, "step": 36500 }, { "epoch": 2.206442255393727, "grad_norm": 0.6616065017926829, "learning_rate": 1.7941208715242165e-06, "loss": 0.9004, "step": 36510 }, { "epoch": 2.207046594548861, "grad_norm": 0.5938364396116497, "learning_rate": 1.7915654533115346e-06, "loss": 0.8714, "step": 36520 }, { "epoch": 2.2076509337039947, "grad_norm": 0.7179989755939891, "learning_rate": 1.7890114590075974e-06, "loss": 0.8767, "step": 36530 }, { "epoch": 2.2082552728591285, "grad_norm": 0.7358894922676781, "learning_rate": 1.7864588897458729e-06, "loss": 0.8993, "step": 36540 }, { "epoch": 2.2088596120142623, "grad_norm": 0.746367742265286, "learning_rate": 1.7839077466591925e-06, "loss": 0.8864, "step": 36550 }, { "epoch": 2.209463951169396, "grad_norm": 0.7817523231637606, "learning_rate": 1.7813580308797613e-06, "loss": 0.8861, "step": 36560 }, { "epoch": 2.2100682903245303, "grad_norm": 0.729496105777396, "learning_rate": 1.7788097435391444e-06, "loss": 0.8989, "step": 36570 }, { "epoch": 2.210672629479664, "grad_norm": 0.6852444065561719, "learning_rate": 1.7762628857682796e-06, "loss": 0.884, "step": 36580 }, { "epoch": 2.211276968634798, "grad_norm": 0.7175292497845551, "learning_rate": 1.7737174586974627e-06, "loss": 0.903, "step": 36590 }, { "epoch": 2.2118813077899317, "grad_norm": 0.6770318813166235, "learning_rate": 1.7711734634563621e-06, "loss": 0.9013, "step": 36600 }, { "epoch": 2.2124856469450656, "grad_norm": 0.6475624360900526, "learning_rate": 1.7686309011740044e-06, "loss": 0.8808, "step": 36610 }, { "epoch": 2.2130899861001994, "grad_norm": 0.6934941463311876, "learning_rate": 1.7660897729787841e-06, "loss": 0.8751, "step": 36620 }, { "epoch": 2.213694325255333, "grad_norm": 0.6828939367024258, "learning_rate": 1.76355007999846e-06, "loss": 0.9083, "step": 36630 }, { "epoch": 2.214298664410467, "grad_norm": 0.668867271778218, "learning_rate": 1.7610118233601497e-06, "loss": 0.8602, "step": 36640 }, { "epoch": 2.214903003565601, "grad_norm": 0.8615036294875504, "learning_rate": 1.758475004190337e-06, "loss": 0.8921, "step": 36650 }, { "epoch": 2.215507342720735, "grad_norm": 0.848344021406913, "learning_rate": 1.7559396236148678e-06, "loss": 0.8949, "step": 36660 }, { "epoch": 2.216111681875869, "grad_norm": 0.8447896454662474, "learning_rate": 1.7534056827589452e-06, "loss": 0.9174, "step": 36670 }, { "epoch": 2.2167160210310026, "grad_norm": 0.8611124789595431, "learning_rate": 1.7508731827471387e-06, "loss": 0.8858, "step": 36680 }, { "epoch": 2.2173203601861364, "grad_norm": 0.8484346659557226, "learning_rate": 1.7483421247033776e-06, "loss": 0.9027, "step": 36690 }, { "epoch": 2.2179246993412702, "grad_norm": 0.8012577424675125, "learning_rate": 1.745812509750946e-06, "loss": 0.8949, "step": 36700 }, { "epoch": 2.218529038496404, "grad_norm": 0.8111003175741118, "learning_rate": 1.7432843390124933e-06, "loss": 0.8841, "step": 36710 }, { "epoch": 2.219133377651538, "grad_norm": 0.8692906827599644, "learning_rate": 1.7407576136100278e-06, "loss": 0.9, "step": 36720 }, { "epoch": 2.219737716806672, "grad_norm": 0.9031370287133852, "learning_rate": 1.738232334664911e-06, "loss": 0.8948, "step": 36730 }, { "epoch": 2.220342055961806, "grad_norm": 0.8294289203178514, "learning_rate": 1.7357085032978676e-06, "loss": 0.8735, "step": 36740 }, { "epoch": 2.2209463951169397, "grad_norm": 1.232663255690472, "learning_rate": 1.733186120628979e-06, "loss": 0.8982, "step": 36750 }, { "epoch": 2.2215507342720735, "grad_norm": 1.1938677755502394, "learning_rate": 1.7306651877776836e-06, "loss": 0.8894, "step": 36760 }, { "epoch": 2.2221550734272073, "grad_norm": 1.271398043818262, "learning_rate": 1.7281457058627733e-06, "loss": 0.8972, "step": 36770 }, { "epoch": 2.222759412582341, "grad_norm": 1.231630452020186, "learning_rate": 1.7256276760023993e-06, "loss": 0.89, "step": 36780 }, { "epoch": 2.223363751737475, "grad_norm": 1.1888978954029776, "learning_rate": 1.7231110993140699e-06, "loss": 0.8711, "step": 36790 }, { "epoch": 2.223968090892609, "grad_norm": 1.0612803309642933, "learning_rate": 1.7205959769146424e-06, "loss": 0.9119, "step": 36800 }, { "epoch": 2.224572430047743, "grad_norm": 0.8424229073925843, "learning_rate": 1.7180823099203365e-06, "loss": 0.9146, "step": 36810 }, { "epoch": 2.2251767692028768, "grad_norm": 0.8622728513728528, "learning_rate": 1.7155700994467173e-06, "loss": 0.8964, "step": 36820 }, { "epoch": 2.2257811083580106, "grad_norm": 0.9560136887571115, "learning_rate": 1.7130593466087125e-06, "loss": 0.896, "step": 36830 }, { "epoch": 2.2263854475131444, "grad_norm": 0.8185085188081441, "learning_rate": 1.7105500525205943e-06, "loss": 0.8907, "step": 36840 }, { "epoch": 2.226989786668278, "grad_norm": 2.4229031740319957, "learning_rate": 1.7080422182959961e-06, "loss": 0.8731, "step": 36850 }, { "epoch": 2.227594125823412, "grad_norm": 2.7314356927294705, "learning_rate": 1.7055358450478948e-06, "loss": 0.8778, "step": 36860 }, { "epoch": 2.228198464978546, "grad_norm": 2.722034080264612, "learning_rate": 1.7030309338886258e-06, "loss": 0.8964, "step": 36870 }, { "epoch": 2.2288028041336796, "grad_norm": 2.48289751055475, "learning_rate": 1.7005274859298731e-06, "loss": 0.914, "step": 36880 }, { "epoch": 2.229407143288814, "grad_norm": 2.480870019229309, "learning_rate": 1.6980255022826691e-06, "loss": 0.8956, "step": 36890 }, { "epoch": 2.2300114824439476, "grad_norm": 0.9282967713244585, "learning_rate": 1.6955249840574001e-06, "loss": 0.879, "step": 36900 }, { "epoch": 2.2306158215990814, "grad_norm": 0.9122029164231888, "learning_rate": 1.6930259323638015e-06, "loss": 0.8966, "step": 36910 }, { "epoch": 2.2312201607542153, "grad_norm": 0.8933742193952547, "learning_rate": 1.6905283483109535e-06, "loss": 0.9075, "step": 36920 }, { "epoch": 2.231824499909349, "grad_norm": 0.9780188667155987, "learning_rate": 1.6880322330072897e-06, "loss": 0.8958, "step": 36930 }, { "epoch": 2.232428839064483, "grad_norm": 0.8602150819446924, "learning_rate": 1.6855375875605923e-06, "loss": 0.8611, "step": 36940 }, { "epoch": 2.2330331782196167, "grad_norm": 1.0075398662983217, "learning_rate": 1.683044413077986e-06, "loss": 0.8743, "step": 36950 }, { "epoch": 2.233637517374751, "grad_norm": 1.1288389264643472, "learning_rate": 1.6805527106659469e-06, "loss": 0.8961, "step": 36960 }, { "epoch": 2.2342418565298847, "grad_norm": 1.0082437861676605, "learning_rate": 1.6780624814302993e-06, "loss": 0.9121, "step": 36970 }, { "epoch": 2.2348461956850185, "grad_norm": 0.9897001307573534, "learning_rate": 1.675573726476208e-06, "loss": 0.8894, "step": 36980 }, { "epoch": 2.2354505348401523, "grad_norm": 0.9485602597674926, "learning_rate": 1.673086446908187e-06, "loss": 0.8748, "step": 36990 }, { "epoch": 2.236054873995286, "grad_norm": 1.1339738049158197, "learning_rate": 1.670600643830098e-06, "loss": 0.8893, "step": 37000 }, { "epoch": 2.23665921315042, "grad_norm": 1.0643826525869515, "learning_rate": 1.668116318345141e-06, "loss": 0.8931, "step": 37010 }, { "epoch": 2.2372635523055537, "grad_norm": 1.121287043341767, "learning_rate": 1.6656334715558658e-06, "loss": 0.8763, "step": 37020 }, { "epoch": 2.2378678914606875, "grad_norm": 1.0536902328507198, "learning_rate": 1.6631521045641658e-06, "loss": 0.8792, "step": 37030 }, { "epoch": 2.238472230615822, "grad_norm": 0.9963660692357627, "learning_rate": 1.660672218471272e-06, "loss": 0.8624, "step": 37040 }, { "epoch": 2.2390765697709556, "grad_norm": 1.1169132798114147, "learning_rate": 1.6581938143777643e-06, "loss": 0.8918, "step": 37050 }, { "epoch": 2.2396809089260894, "grad_norm": 1.0804784448940277, "learning_rate": 1.6557168933835643e-06, "loss": 0.8906, "step": 37060 }, { "epoch": 2.240285248081223, "grad_norm": 1.0086513349254966, "learning_rate": 1.6532414565879302e-06, "loss": 0.8807, "step": 37070 }, { "epoch": 2.240889587236357, "grad_norm": 1.0226198102864512, "learning_rate": 1.650767505089469e-06, "loss": 0.8878, "step": 37080 }, { "epoch": 2.241493926391491, "grad_norm": 1.0240458853836232, "learning_rate": 1.6482950399861203e-06, "loss": 0.8862, "step": 37090 }, { "epoch": 2.2420982655466246, "grad_norm": 1.1888935201367858, "learning_rate": 1.645824062375172e-06, "loss": 0.8796, "step": 37100 }, { "epoch": 2.2427026047017584, "grad_norm": 1.1268260617394485, "learning_rate": 1.6433545733532452e-06, "loss": 0.8798, "step": 37110 }, { "epoch": 2.2433069438568927, "grad_norm": 1.1905736824302668, "learning_rate": 1.6408865740163054e-06, "loss": 0.8906, "step": 37120 }, { "epoch": 2.2439112830120265, "grad_norm": 1.140063230054329, "learning_rate": 1.6384200654596555e-06, "loss": 0.8873, "step": 37130 }, { "epoch": 2.2445156221671603, "grad_norm": 1.2265198915264868, "learning_rate": 1.6359550487779336e-06, "loss": 0.8933, "step": 37140 }, { "epoch": 2.245119961322294, "grad_norm": 1.0563880127005834, "learning_rate": 1.63349152506512e-06, "loss": 0.8994, "step": 37150 }, { "epoch": 2.245724300477428, "grad_norm": 1.0037048836714033, "learning_rate": 1.6310294954145323e-06, "loss": 0.8786, "step": 37160 }, { "epoch": 2.2463286396325617, "grad_norm": 1.029701021736369, "learning_rate": 1.6285689609188199e-06, "loss": 0.8863, "step": 37170 }, { "epoch": 2.2469329787876955, "grad_norm": 1.042120107209839, "learning_rate": 1.6261099226699738e-06, "loss": 0.9061, "step": 37180 }, { "epoch": 2.2475373179428297, "grad_norm": 1.0140323104236189, "learning_rate": 1.623652381759322e-06, "loss": 0.915, "step": 37190 }, { "epoch": 2.2481416570979635, "grad_norm": 1.1034948303221233, "learning_rate": 1.621196339277521e-06, "loss": 0.8811, "step": 37200 }, { "epoch": 2.2487459962530973, "grad_norm": 1.0019590675621992, "learning_rate": 1.618741796314569e-06, "loss": 0.8914, "step": 37210 }, { "epoch": 2.249350335408231, "grad_norm": 1.1056444831702081, "learning_rate": 1.6162887539597977e-06, "loss": 0.8927, "step": 37220 }, { "epoch": 2.249954674563365, "grad_norm": 1.0650229486425582, "learning_rate": 1.6138372133018676e-06, "loss": 0.8964, "step": 37230 }, { "epoch": 2.2505590137184988, "grad_norm": 1.0637077422666117, "learning_rate": 1.6113871754287796e-06, "loss": 0.863, "step": 37240 }, { "epoch": 2.2511633528736326, "grad_norm": 1.0660076532336324, "learning_rate": 1.6089386414278652e-06, "loss": 0.8971, "step": 37250 }, { "epoch": 2.2517676920287664, "grad_norm": 1.0247836824547185, "learning_rate": 1.6064916123857854e-06, "loss": 0.8988, "step": 37260 }, { "epoch": 2.2523720311839, "grad_norm": 1.0730542327450574, "learning_rate": 1.6040460893885374e-06, "loss": 0.8575, "step": 37270 }, { "epoch": 2.2529763703390344, "grad_norm": 1.1349725238081176, "learning_rate": 1.6016020735214494e-06, "loss": 0.882, "step": 37280 }, { "epoch": 2.253580709494168, "grad_norm": 1.025046365954902, "learning_rate": 1.5991595658691778e-06, "loss": 0.8891, "step": 37290 }, { "epoch": 2.254185048649302, "grad_norm": 1.2105527844032822, "learning_rate": 1.596718567515712e-06, "loss": 0.8921, "step": 37300 }, { "epoch": 2.254789387804436, "grad_norm": 1.3201846423105932, "learning_rate": 1.594279079544374e-06, "loss": 0.879, "step": 37310 }, { "epoch": 2.2553937269595696, "grad_norm": 1.2862273845874301, "learning_rate": 1.5918411030378089e-06, "loss": 0.8921, "step": 37320 }, { "epoch": 2.2559980661147034, "grad_norm": 1.174249035134414, "learning_rate": 1.5894046390779982e-06, "loss": 0.89, "step": 37330 }, { "epoch": 2.2566024052698372, "grad_norm": 1.2148050798508878, "learning_rate": 1.5869696887462455e-06, "loss": 0.8968, "step": 37340 }, { "epoch": 2.2572067444249715, "grad_norm": 1.1568354974623272, "learning_rate": 1.5845362531231894e-06, "loss": 0.9086, "step": 37350 }, { "epoch": 2.2578110835801053, "grad_norm": 1.1810387400072275, "learning_rate": 1.5821043332887891e-06, "loss": 0.8857, "step": 37360 }, { "epoch": 2.258415422735239, "grad_norm": 1.0797959905169021, "learning_rate": 1.5796739303223386e-06, "loss": 0.8713, "step": 37370 }, { "epoch": 2.259019761890373, "grad_norm": 1.0869542518005186, "learning_rate": 1.5772450453024513e-06, "loss": 0.8874, "step": 37380 }, { "epoch": 2.2596241010455067, "grad_norm": 1.1098372976244586, "learning_rate": 1.574817679307072e-06, "loss": 0.884, "step": 37390 }, { "epoch": 2.2602284402006405, "grad_norm": 1.1942924562134536, "learning_rate": 1.5723918334134703e-06, "loss": 0.8913, "step": 37400 }, { "epoch": 2.2608327793557743, "grad_norm": 1.2174895155080612, "learning_rate": 1.569967508698242e-06, "loss": 0.8973, "step": 37410 }, { "epoch": 2.2614371185109086, "grad_norm": 1.2268678157474902, "learning_rate": 1.5675447062373035e-06, "loss": 0.8915, "step": 37420 }, { "epoch": 2.2620414576660424, "grad_norm": 1.21756793256655, "learning_rate": 1.5651234271059e-06, "loss": 0.8804, "step": 37430 }, { "epoch": 2.262645796821176, "grad_norm": 1.2738766299183828, "learning_rate": 1.5627036723786022e-06, "loss": 0.8803, "step": 37440 }, { "epoch": 2.26325013597631, "grad_norm": 1.4434433678876375, "learning_rate": 1.5602854431292963e-06, "loss": 0.8871, "step": 37450 }, { "epoch": 2.2638544751314438, "grad_norm": 1.4766335072600694, "learning_rate": 1.5578687404311993e-06, "loss": 0.8921, "step": 37460 }, { "epoch": 2.2644588142865776, "grad_norm": 1.4760296309205438, "learning_rate": 1.5554535653568492e-06, "loss": 0.871, "step": 37470 }, { "epoch": 2.2650631534417114, "grad_norm": 1.422964959497385, "learning_rate": 1.553039918978102e-06, "loss": 0.8874, "step": 37480 }, { "epoch": 2.265667492596845, "grad_norm": 1.498394552454921, "learning_rate": 1.5506278023661398e-06, "loss": 0.9058, "step": 37490 }, { "epoch": 2.266271831751979, "grad_norm": 0.9678208636011018, "learning_rate": 1.548217216591465e-06, "loss": 0.8726, "step": 37500 }, { "epoch": 2.2668761709071132, "grad_norm": 0.8876549650275329, "learning_rate": 1.5458081627238969e-06, "loss": 0.8996, "step": 37510 }, { "epoch": 2.267480510062247, "grad_norm": 0.9028933332394528, "learning_rate": 1.5434006418325792e-06, "loss": 0.8744, "step": 37520 }, { "epoch": 2.268084849217381, "grad_norm": 0.9120746950777309, "learning_rate": 1.5409946549859756e-06, "loss": 0.8988, "step": 37530 }, { "epoch": 2.2686891883725147, "grad_norm": 0.8974977166314854, "learning_rate": 1.538590203251864e-06, "loss": 0.8863, "step": 37540 }, { "epoch": 2.2692935275276485, "grad_norm": 0.8247723106141008, "learning_rate": 1.5361872876973454e-06, "loss": 0.887, "step": 37550 }, { "epoch": 2.2698978666827823, "grad_norm": 0.8506953062544385, "learning_rate": 1.5337859093888403e-06, "loss": 0.877, "step": 37560 }, { "epoch": 2.270502205837916, "grad_norm": 0.8308649285460079, "learning_rate": 1.5313860693920802e-06, "loss": 0.8833, "step": 37570 }, { "epoch": 2.2711065449930503, "grad_norm": 0.8586671539291736, "learning_rate": 1.5289877687721222e-06, "loss": 0.8823, "step": 37580 }, { "epoch": 2.271710884148184, "grad_norm": 0.8705147847147894, "learning_rate": 1.5265910085933323e-06, "loss": 0.8728, "step": 37590 }, { "epoch": 2.272315223303318, "grad_norm": 0.8386276651409899, "learning_rate": 1.524195789919401e-06, "loss": 0.8894, "step": 37600 }, { "epoch": 2.2729195624584517, "grad_norm": 0.8884548096993902, "learning_rate": 1.521802113813326e-06, "loss": 0.8633, "step": 37610 }, { "epoch": 2.2735239016135855, "grad_norm": 0.9003853829388102, "learning_rate": 1.519409981337429e-06, "loss": 0.8729, "step": 37620 }, { "epoch": 2.2741282407687193, "grad_norm": 0.8834963767649261, "learning_rate": 1.5170193935533389e-06, "loss": 0.9032, "step": 37630 }, { "epoch": 2.274732579923853, "grad_norm": 0.8235260953922231, "learning_rate": 1.5146303515220045e-06, "loss": 0.8913, "step": 37640 }, { "epoch": 2.275336919078987, "grad_norm": 0.6353030770335787, "learning_rate": 1.5122428563036883e-06, "loss": 0.8934, "step": 37650 }, { "epoch": 2.2759412582341207, "grad_norm": 0.6080089849420665, "learning_rate": 1.5098569089579611e-06, "loss": 0.8735, "step": 37660 }, { "epoch": 2.276545597389255, "grad_norm": 0.6717457742950187, "learning_rate": 1.5074725105437128e-06, "loss": 0.8612, "step": 37670 }, { "epoch": 2.277149936544389, "grad_norm": 0.674529580795632, "learning_rate": 1.5050896621191447e-06, "loss": 0.8731, "step": 37680 }, { "epoch": 2.2777542756995226, "grad_norm": 0.6385370616480971, "learning_rate": 1.5027083647417657e-06, "loss": 0.8854, "step": 37690 }, { "epoch": 2.2783586148546564, "grad_norm": 0.6640793926567452, "learning_rate": 1.5003286194684008e-06, "loss": 0.8795, "step": 37700 }, { "epoch": 2.27896295400979, "grad_norm": 0.717967984453388, "learning_rate": 1.4979504273551875e-06, "loss": 0.8733, "step": 37710 }, { "epoch": 2.279567293164924, "grad_norm": 0.6924389178321663, "learning_rate": 1.4955737894575683e-06, "loss": 0.867, "step": 37720 }, { "epoch": 2.280171632320058, "grad_norm": 0.6516681011651431, "learning_rate": 1.4931987068303e-06, "loss": 0.8627, "step": 37730 }, { "epoch": 2.280775971475192, "grad_norm": 0.6436091883452515, "learning_rate": 1.4908251805274487e-06, "loss": 0.8655, "step": 37740 }, { "epoch": 2.281380310630326, "grad_norm": 0.848333541809155, "learning_rate": 1.4884532116023914e-06, "loss": 0.9009, "step": 37750 }, { "epoch": 2.2819846497854597, "grad_norm": 0.8779172399496122, "learning_rate": 1.4860828011078087e-06, "loss": 0.9046, "step": 37760 }, { "epoch": 2.2825889889405935, "grad_norm": 0.8522566568300387, "learning_rate": 1.4837139500956938e-06, "loss": 0.887, "step": 37770 }, { "epoch": 2.2831933280957273, "grad_norm": 0.9434503850020198, "learning_rate": 1.4813466596173492e-06, "loss": 0.8911, "step": 37780 }, { "epoch": 2.283797667250861, "grad_norm": 0.9076135953936881, "learning_rate": 1.4789809307233789e-06, "loss": 0.8916, "step": 37790 }, { "epoch": 2.284402006405995, "grad_norm": 0.5705320045257928, "learning_rate": 1.476616764463699e-06, "loss": 0.8702, "step": 37800 }, { "epoch": 2.285006345561129, "grad_norm": 0.5728535275398304, "learning_rate": 1.4742541618875322e-06, "loss": 0.8806, "step": 37810 }, { "epoch": 2.285610684716263, "grad_norm": 0.5973995018392249, "learning_rate": 1.471893124043402e-06, "loss": 0.8824, "step": 37820 }, { "epoch": 2.2862150238713967, "grad_norm": 0.5630313767600951, "learning_rate": 1.4695336519791454e-06, "loss": 0.8699, "step": 37830 }, { "epoch": 2.2868193630265305, "grad_norm": 0.5959663572778476, "learning_rate": 1.4671757467418956e-06, "loss": 0.88, "step": 37840 }, { "epoch": 2.2874237021816644, "grad_norm": 0.5726324475985635, "learning_rate": 1.4648194093780987e-06, "loss": 0.8905, "step": 37850 }, { "epoch": 2.288028041336798, "grad_norm": 0.5698584527590744, "learning_rate": 1.462464640933498e-06, "loss": 0.8842, "step": 37860 }, { "epoch": 2.288632380491932, "grad_norm": 0.6409374671040744, "learning_rate": 1.4601114424531476e-06, "loss": 0.8718, "step": 37870 }, { "epoch": 2.2892367196470658, "grad_norm": 0.6190529434845924, "learning_rate": 1.4577598149813965e-06, "loss": 0.8774, "step": 37880 }, { "epoch": 2.2898410588021996, "grad_norm": 0.5652780643712649, "learning_rate": 1.4554097595619039e-06, "loss": 0.8477, "step": 37890 }, { "epoch": 2.290445397957334, "grad_norm": 0.6160125339586162, "learning_rate": 1.4530612772376295e-06, "loss": 0.9001, "step": 37900 }, { "epoch": 2.2910497371124676, "grad_norm": 0.6093875474013092, "learning_rate": 1.45071436905083e-06, "loss": 0.898, "step": 37910 }, { "epoch": 2.2916540762676014, "grad_norm": 0.6026820628589228, "learning_rate": 1.4483690360430702e-06, "loss": 0.8778, "step": 37920 }, { "epoch": 2.2922584154227352, "grad_norm": 0.6052810117891112, "learning_rate": 1.4460252792552133e-06, "loss": 0.882, "step": 37930 }, { "epoch": 2.292862754577869, "grad_norm": 0.5933166114948633, "learning_rate": 1.4436830997274199e-06, "loss": 0.8737, "step": 37940 }, { "epoch": 2.293467093733003, "grad_norm": 0.5887759488402405, "learning_rate": 1.4413424984991547e-06, "loss": 0.8861, "step": 37950 }, { "epoch": 2.2940714328881366, "grad_norm": 0.6175401040264422, "learning_rate": 1.439003476609182e-06, "loss": 0.8962, "step": 37960 }, { "epoch": 2.294675772043271, "grad_norm": 0.6058417503897862, "learning_rate": 1.4366660350955613e-06, "loss": 0.8963, "step": 37970 }, { "epoch": 2.2952801111984047, "grad_norm": 0.584681810947484, "learning_rate": 1.434330174995654e-06, "loss": 0.8829, "step": 37980 }, { "epoch": 2.2958844503535385, "grad_norm": 0.6356538065996326, "learning_rate": 1.4319958973461202e-06, "loss": 0.8875, "step": 37990 }, { "epoch": 2.2964887895086723, "grad_norm": 0.6085316972920589, "learning_rate": 1.4296632031829139e-06, "loss": 0.8757, "step": 38000 }, { "epoch": 2.297093128663806, "grad_norm": 0.6136341535863586, "learning_rate": 1.42733209354129e-06, "loss": 0.8837, "step": 38010 }, { "epoch": 2.29769746781894, "grad_norm": 0.6124372399641269, "learning_rate": 1.4250025694558e-06, "loss": 0.9062, "step": 38020 }, { "epoch": 2.2983018069740737, "grad_norm": 0.5718842647616003, "learning_rate": 1.4226746319602886e-06, "loss": 0.8762, "step": 38030 }, { "epoch": 2.2989061461292075, "grad_norm": 0.637893617290052, "learning_rate": 1.4203482820878994e-06, "loss": 0.8783, "step": 38040 }, { "epoch": 2.2995104852843418, "grad_norm": 0.6259416948328, "learning_rate": 1.418023520871072e-06, "loss": 0.879, "step": 38050 }, { "epoch": 2.3001148244394756, "grad_norm": 0.5962914511084001, "learning_rate": 1.4157003493415361e-06, "loss": 0.885, "step": 38060 }, { "epoch": 2.3007191635946094, "grad_norm": 0.6372198512368646, "learning_rate": 1.4133787685303213e-06, "loss": 0.8727, "step": 38070 }, { "epoch": 2.301323502749743, "grad_norm": 0.6117439150049708, "learning_rate": 1.4110587794677487e-06, "loss": 0.8631, "step": 38080 }, { "epoch": 2.301927841904877, "grad_norm": 0.6385353486274702, "learning_rate": 1.4087403831834352e-06, "loss": 0.8996, "step": 38090 }, { "epoch": 2.302532181060011, "grad_norm": 0.6919054891369676, "learning_rate": 1.4064235807062853e-06, "loss": 0.8966, "step": 38100 }, { "epoch": 2.3031365202151446, "grad_norm": 0.6617623573733691, "learning_rate": 1.4041083730645043e-06, "loss": 0.9033, "step": 38110 }, { "epoch": 2.3037408593702784, "grad_norm": 0.6634089563771768, "learning_rate": 1.4017947612855814e-06, "loss": 0.9012, "step": 38120 }, { "epoch": 2.3043451985254126, "grad_norm": 0.6433921208736895, "learning_rate": 1.3994827463963045e-06, "loss": 0.8721, "step": 38130 }, { "epoch": 2.3049495376805464, "grad_norm": 0.6770576897545616, "learning_rate": 1.3971723294227468e-06, "loss": 0.8775, "step": 38140 }, { "epoch": 2.3055538768356802, "grad_norm": 0.7360078605631015, "learning_rate": 1.3948635113902781e-06, "loss": 0.8926, "step": 38150 }, { "epoch": 2.306158215990814, "grad_norm": 0.724226262026073, "learning_rate": 1.3925562933235536e-06, "loss": 0.8669, "step": 38160 }, { "epoch": 2.306762555145948, "grad_norm": 0.7325213655099648, "learning_rate": 1.3902506762465212e-06, "loss": 0.8991, "step": 38170 }, { "epoch": 2.3073668943010817, "grad_norm": 0.7278384354492624, "learning_rate": 1.38794666118242e-06, "loss": 0.8719, "step": 38180 }, { "epoch": 2.3079712334562155, "grad_norm": 0.7897648685006671, "learning_rate": 1.3856442491537719e-06, "loss": 0.8972, "step": 38190 }, { "epoch": 2.3085755726113497, "grad_norm": 0.634141413340625, "learning_rate": 1.3833434411823938e-06, "loss": 0.8924, "step": 38200 }, { "epoch": 2.3091799117664835, "grad_norm": 0.6748259700113949, "learning_rate": 1.381044238289389e-06, "loss": 0.8769, "step": 38210 }, { "epoch": 2.3097842509216173, "grad_norm": 0.6861944331681215, "learning_rate": 1.378746641495145e-06, "loss": 0.8601, "step": 38220 }, { "epoch": 2.310388590076751, "grad_norm": 0.6522450464471466, "learning_rate": 1.3764506518193404e-06, "loss": 0.9039, "step": 38230 }, { "epoch": 2.310992929231885, "grad_norm": 0.6660109867395618, "learning_rate": 1.3741562702809414e-06, "loss": 0.9032, "step": 38240 }, { "epoch": 2.3115972683870187, "grad_norm": 0.8858639158926562, "learning_rate": 1.3718634978981948e-06, "loss": 0.864, "step": 38250 }, { "epoch": 2.3122016075421525, "grad_norm": 0.8688711857797916, "learning_rate": 1.3695723356886387e-06, "loss": 0.8709, "step": 38260 }, { "epoch": 2.3128059466972863, "grad_norm": 0.807033033866591, "learning_rate": 1.3672827846690968e-06, "loss": 0.8719, "step": 38270 }, { "epoch": 2.31341028585242, "grad_norm": 0.8156677950414035, "learning_rate": 1.3649948458556717e-06, "loss": 0.918, "step": 38280 }, { "epoch": 2.3140146250075544, "grad_norm": 0.8481972079943875, "learning_rate": 1.3627085202637568e-06, "loss": 0.8854, "step": 38290 }, { "epoch": 2.314618964162688, "grad_norm": 0.8581066073457636, "learning_rate": 1.360423808908029e-06, "loss": 0.8695, "step": 38300 }, { "epoch": 2.315223303317822, "grad_norm": 0.8723934072113854, "learning_rate": 1.3581407128024437e-06, "loss": 0.878, "step": 38310 }, { "epoch": 2.315827642472956, "grad_norm": 0.854675385101025, "learning_rate": 1.3558592329602443e-06, "loss": 0.8923, "step": 38320 }, { "epoch": 2.3164319816280896, "grad_norm": 0.8577976898983138, "learning_rate": 1.3535793703939571e-06, "loss": 0.8832, "step": 38330 }, { "epoch": 2.3170363207832234, "grad_norm": 0.8515784710989001, "learning_rate": 1.3513011261153864e-06, "loss": 0.888, "step": 38340 }, { "epoch": 2.317640659938357, "grad_norm": 1.1906397458516886, "learning_rate": 1.3490245011356212e-06, "loss": 0.8631, "step": 38350 }, { "epoch": 2.3182449990934915, "grad_norm": 1.3576366931725123, "learning_rate": 1.3467494964650346e-06, "loss": 0.8744, "step": 38360 }, { "epoch": 2.3188493382486253, "grad_norm": 1.2433628693586447, "learning_rate": 1.3444761131132738e-06, "loss": 0.8631, "step": 38370 }, { "epoch": 2.319453677403759, "grad_norm": 1.2087813079594782, "learning_rate": 1.342204352089273e-06, "loss": 0.8955, "step": 38380 }, { "epoch": 2.320058016558893, "grad_norm": 1.2789827972554675, "learning_rate": 1.339934214401241e-06, "loss": 0.9016, "step": 38390 }, { "epoch": 2.3206623557140267, "grad_norm": 1.2571337809254906, "learning_rate": 1.337665701056672e-06, "loss": 0.8881, "step": 38400 }, { "epoch": 2.3212666948691605, "grad_norm": 1.0415830978911638, "learning_rate": 1.3353988130623319e-06, "loss": 0.9036, "step": 38410 }, { "epoch": 2.3218710340242943, "grad_norm": 0.9286746988296491, "learning_rate": 1.3331335514242727e-06, "loss": 0.9233, "step": 38420 }, { "epoch": 2.3224753731794285, "grad_norm": 0.9196636600332607, "learning_rate": 1.3308699171478217e-06, "loss": 0.8494, "step": 38430 }, { "epoch": 2.3230797123345623, "grad_norm": 1.0855610557346511, "learning_rate": 1.3286079112375816e-06, "loss": 0.9141, "step": 38440 }, { "epoch": 2.323684051489696, "grad_norm": 2.3883763207069872, "learning_rate": 1.3263475346974346e-06, "loss": 0.8796, "step": 38450 }, { "epoch": 2.32428839064483, "grad_norm": 2.4485917854873995, "learning_rate": 1.3240887885305426e-06, "loss": 0.8829, "step": 38460 }, { "epoch": 2.3248927297999638, "grad_norm": 2.5911538762668576, "learning_rate": 1.3218316737393372e-06, "loss": 0.9142, "step": 38470 }, { "epoch": 2.3254970689550976, "grad_norm": 2.425973762702792, "learning_rate": 1.3195761913255317e-06, "loss": 0.8762, "step": 38480 }, { "epoch": 2.3261014081102314, "grad_norm": 2.678089605295009, "learning_rate": 1.3173223422901143e-06, "loss": 0.9103, "step": 38490 }, { "epoch": 2.326705747265365, "grad_norm": 0.9620608279141157, "learning_rate": 1.3150701276333444e-06, "loss": 0.9105, "step": 38500 }, { "epoch": 2.327310086420499, "grad_norm": 0.869689154709553, "learning_rate": 1.3128195483547596e-06, "loss": 0.8866, "step": 38510 }, { "epoch": 2.327914425575633, "grad_norm": 0.9514243182061801, "learning_rate": 1.3105706054531726e-06, "loss": 0.8887, "step": 38520 }, { "epoch": 2.328518764730767, "grad_norm": 0.920439849185892, "learning_rate": 1.3083232999266654e-06, "loss": 0.8732, "step": 38530 }, { "epoch": 2.329123103885901, "grad_norm": 0.9496562982987091, "learning_rate": 1.3060776327725976e-06, "loss": 0.8518, "step": 38540 }, { "epoch": 2.3297274430410346, "grad_norm": 1.007130116180434, "learning_rate": 1.303833604987601e-06, "loss": 0.9023, "step": 38550 }, { "epoch": 2.3303317821961684, "grad_norm": 1.0127214602493766, "learning_rate": 1.3015912175675766e-06, "loss": 0.8947, "step": 38560 }, { "epoch": 2.3309361213513022, "grad_norm": 0.9654032967972956, "learning_rate": 1.299350471507701e-06, "loss": 0.8869, "step": 38570 }, { "epoch": 2.331540460506436, "grad_norm": 0.9213049583388885, "learning_rate": 1.2971113678024228e-06, "loss": 0.8803, "step": 38580 }, { "epoch": 2.3321447996615703, "grad_norm": 0.972742456399869, "learning_rate": 1.2948739074454563e-06, "loss": 0.86, "step": 38590 }, { "epoch": 2.332749138816704, "grad_norm": 1.0832606148539412, "learning_rate": 1.2926380914297931e-06, "loss": 0.8878, "step": 38600 }, { "epoch": 2.333353477971838, "grad_norm": 1.0287336097995405, "learning_rate": 1.2904039207476927e-06, "loss": 0.8918, "step": 38610 }, { "epoch": 2.3339578171269717, "grad_norm": 1.1057106361436366, "learning_rate": 1.288171396390681e-06, "loss": 0.8749, "step": 38620 }, { "epoch": 2.3345621562821055, "grad_norm": 1.0174293623061006, "learning_rate": 1.2859405193495594e-06, "loss": 0.8629, "step": 38630 }, { "epoch": 2.3351664954372393, "grad_norm": 1.0853736579345017, "learning_rate": 1.2837112906143917e-06, "loss": 0.8862, "step": 38640 }, { "epoch": 2.335770834592373, "grad_norm": 1.1257346695427284, "learning_rate": 1.2814837111745166e-06, "loss": 0.8547, "step": 38650 }, { "epoch": 2.336375173747507, "grad_norm": 1.0250955148952747, "learning_rate": 1.2792577820185343e-06, "loss": 0.8891, "step": 38660 }, { "epoch": 2.3369795129026407, "grad_norm": 1.0579691840048668, "learning_rate": 1.2770335041343195e-06, "loss": 0.8891, "step": 38670 }, { "epoch": 2.337583852057775, "grad_norm": 1.1262128268216802, "learning_rate": 1.2748108785090074e-06, "loss": 0.8621, "step": 38680 }, { "epoch": 2.3381881912129088, "grad_norm": 1.0215376361463935, "learning_rate": 1.2725899061290043e-06, "loss": 0.887, "step": 38690 }, { "epoch": 2.3387925303680426, "grad_norm": 1.217297751405057, "learning_rate": 1.2703705879799833e-06, "loss": 0.898, "step": 38700 }, { "epoch": 2.3393968695231764, "grad_norm": 1.0979621224373344, "learning_rate": 1.2681529250468782e-06, "loss": 0.881, "step": 38710 }, { "epoch": 2.34000120867831, "grad_norm": 1.2070410107461766, "learning_rate": 1.2659369183138936e-06, "loss": 0.8649, "step": 38720 }, { "epoch": 2.340605547833444, "grad_norm": 1.2016328096140823, "learning_rate": 1.2637225687644989e-06, "loss": 0.8853, "step": 38730 }, { "epoch": 2.341209886988578, "grad_norm": 1.0962450781221647, "learning_rate": 1.2615098773814222e-06, "loss": 0.8962, "step": 38740 }, { "epoch": 2.341814226143712, "grad_norm": 1.0704058699263035, "learning_rate": 1.2592988451466615e-06, "loss": 0.888, "step": 38750 }, { "epoch": 2.342418565298846, "grad_norm": 1.0449913846281933, "learning_rate": 1.2570894730414773e-06, "loss": 0.8846, "step": 38760 }, { "epoch": 2.3430229044539796, "grad_norm": 1.0257455429089837, "learning_rate": 1.2548817620463932e-06, "loss": 0.8793, "step": 38770 }, { "epoch": 2.3436272436091135, "grad_norm": 1.1340190647976194, "learning_rate": 1.2526757131411932e-06, "loss": 0.8892, "step": 38780 }, { "epoch": 2.3442315827642473, "grad_norm": 1.0348871480890511, "learning_rate": 1.2504713273049268e-06, "loss": 0.8638, "step": 38790 }, { "epoch": 2.344835921919381, "grad_norm": 1.1083706930609398, "learning_rate": 1.2482686055159054e-06, "loss": 0.8747, "step": 38800 }, { "epoch": 2.345440261074515, "grad_norm": 1.0484467676271267, "learning_rate": 1.2460675487516977e-06, "loss": 0.8729, "step": 38810 }, { "epoch": 2.346044600229649, "grad_norm": 1.1050274060423304, "learning_rate": 1.2438681579891383e-06, "loss": 0.8801, "step": 38820 }, { "epoch": 2.346648939384783, "grad_norm": 1.167344705455927, "learning_rate": 1.2416704342043217e-06, "loss": 0.8908, "step": 38830 }, { "epoch": 2.3472532785399167, "grad_norm": 1.1648404252980153, "learning_rate": 1.2394743783725987e-06, "loss": 0.8693, "step": 38840 }, { "epoch": 2.3478576176950505, "grad_norm": 1.075533100150982, "learning_rate": 1.2372799914685845e-06, "loss": 0.89, "step": 38850 }, { "epoch": 2.3484619568501843, "grad_norm": 1.0312528503652874, "learning_rate": 1.2350872744661523e-06, "loss": 0.8822, "step": 38860 }, { "epoch": 2.349066296005318, "grad_norm": 0.9737887922759815, "learning_rate": 1.2328962283384315e-06, "loss": 0.8678, "step": 38870 }, { "epoch": 2.349670635160452, "grad_norm": 1.031132776002923, "learning_rate": 1.2307068540578149e-06, "loss": 0.8839, "step": 38880 }, { "epoch": 2.3502749743155857, "grad_norm": 1.0654572364935515, "learning_rate": 1.2285191525959478e-06, "loss": 0.872, "step": 38890 }, { "epoch": 2.3508793134707195, "grad_norm": 1.2851219673422374, "learning_rate": 1.226333124923738e-06, "loss": 0.8933, "step": 38900 }, { "epoch": 2.351483652625854, "grad_norm": 1.309461044217282, "learning_rate": 1.224148772011346e-06, "loss": 0.9095, "step": 38910 }, { "epoch": 2.3520879917809876, "grad_norm": 1.2617117027930045, "learning_rate": 1.2219660948281942e-06, "loss": 0.9156, "step": 38920 }, { "epoch": 2.3526923309361214, "grad_norm": 1.1616053041216488, "learning_rate": 1.2197850943429557e-06, "loss": 0.9113, "step": 38930 }, { "epoch": 2.353296670091255, "grad_norm": 1.256998491359396, "learning_rate": 1.2176057715235634e-06, "loss": 0.8755, "step": 38940 }, { "epoch": 2.353901009246389, "grad_norm": 1.1092277008399234, "learning_rate": 1.2154281273372065e-06, "loss": 0.8806, "step": 38950 }, { "epoch": 2.354505348401523, "grad_norm": 1.2028756460990289, "learning_rate": 1.2132521627503236e-06, "loss": 0.895, "step": 38960 }, { "epoch": 2.3551096875566566, "grad_norm": 1.2170443345590407, "learning_rate": 1.2110778787286132e-06, "loss": 0.8923, "step": 38970 }, { "epoch": 2.355714026711791, "grad_norm": 1.183272829135899, "learning_rate": 1.2089052762370278e-06, "loss": 0.8597, "step": 38980 }, { "epoch": 2.3563183658669247, "grad_norm": 1.1522304044103695, "learning_rate": 1.2067343562397693e-06, "loss": 0.9039, "step": 38990 }, { "epoch": 2.3569227050220585, "grad_norm": 1.1732744962455999, "learning_rate": 1.2045651197002973e-06, "loss": 0.9066, "step": 39000 }, { "epoch": 2.3575270441771923, "grad_norm": 1.2154805582034967, "learning_rate": 1.2023975675813237e-06, "loss": 0.8804, "step": 39010 }, { "epoch": 2.358131383332326, "grad_norm": 1.1846220976986728, "learning_rate": 1.2002317008448088e-06, "loss": 0.8888, "step": 39020 }, { "epoch": 2.35873572248746, "grad_norm": 1.2588579728657623, "learning_rate": 1.1980675204519704e-06, "loss": 0.8778, "step": 39030 }, { "epoch": 2.3593400616425937, "grad_norm": 1.2217056019604282, "learning_rate": 1.195905027363276e-06, "loss": 0.8897, "step": 39040 }, { "epoch": 2.3599444007977275, "grad_norm": 1.3717312497810175, "learning_rate": 1.193744222538441e-06, "loss": 0.8652, "step": 39050 }, { "epoch": 2.3605487399528613, "grad_norm": 1.461396880656895, "learning_rate": 1.191585106936436e-06, "loss": 0.8894, "step": 39060 }, { "epoch": 2.3611530791079955, "grad_norm": 1.4992617779409008, "learning_rate": 1.1894276815154815e-06, "loss": 0.8884, "step": 39070 }, { "epoch": 2.3617574182631293, "grad_norm": 1.415271971506978, "learning_rate": 1.187271947233044e-06, "loss": 0.8973, "step": 39080 }, { "epoch": 2.362361757418263, "grad_norm": 1.4582796354205354, "learning_rate": 1.1851179050458428e-06, "loss": 0.8746, "step": 39090 }, { "epoch": 2.362966096573397, "grad_norm": 0.826188634656245, "learning_rate": 1.1829655559098458e-06, "loss": 0.8922, "step": 39100 }, { "epoch": 2.3635704357285308, "grad_norm": 0.9309698722708558, "learning_rate": 1.1808149007802711e-06, "loss": 0.8779, "step": 39110 }, { "epoch": 2.3641747748836646, "grad_norm": 0.9876997859770517, "learning_rate": 1.1786659406115798e-06, "loss": 0.8926, "step": 39120 }, { "epoch": 2.3647791140387984, "grad_norm": 0.9522632663271489, "learning_rate": 1.1765186763574871e-06, "loss": 0.8981, "step": 39130 }, { "epoch": 2.3653834531939326, "grad_norm": 0.9241515739771305, "learning_rate": 1.1743731089709498e-06, "loss": 0.896, "step": 39140 }, { "epoch": 2.3659877923490664, "grad_norm": 0.8092522033893093, "learning_rate": 1.1722292394041769e-06, "loss": 0.8717, "step": 39150 }, { "epoch": 2.3665921315042002, "grad_norm": 0.8583278389170031, "learning_rate": 1.1700870686086191e-06, "loss": 0.8783, "step": 39160 }, { "epoch": 2.367196470659334, "grad_norm": 0.9271202916869054, "learning_rate": 1.167946597534978e-06, "loss": 0.8922, "step": 39170 }, { "epoch": 2.367800809814468, "grad_norm": 0.8260098433579567, "learning_rate": 1.1658078271331951e-06, "loss": 0.9027, "step": 39180 }, { "epoch": 2.3684051489696016, "grad_norm": 0.8597411978291764, "learning_rate": 1.163670758352462e-06, "loss": 0.8818, "step": 39190 }, { "epoch": 2.3690094881247354, "grad_norm": 0.8610779457857143, "learning_rate": 1.161535392141216e-06, "loss": 0.8663, "step": 39200 }, { "epoch": 2.3696138272798697, "grad_norm": 0.8968116179996009, "learning_rate": 1.1594017294471316e-06, "loss": 0.8824, "step": 39210 }, { "epoch": 2.3702181664350035, "grad_norm": 0.8531697052247004, "learning_rate": 1.1572697712171344e-06, "loss": 0.8829, "step": 39220 }, { "epoch": 2.3708225055901373, "grad_norm": 0.8091335145529137, "learning_rate": 1.1551395183973924e-06, "loss": 0.8673, "step": 39230 }, { "epoch": 2.371426844745271, "grad_norm": 0.8609162752883601, "learning_rate": 1.1530109719333127e-06, "loss": 0.8782, "step": 39240 }, { "epoch": 2.372031183900405, "grad_norm": 0.6486798281733559, "learning_rate": 1.1508841327695497e-06, "loss": 0.8823, "step": 39250 }, { "epoch": 2.3726355230555387, "grad_norm": 0.6535240832818486, "learning_rate": 1.1487590018499984e-06, "loss": 0.881, "step": 39260 }, { "epoch": 2.3732398622106725, "grad_norm": 0.631833198038447, "learning_rate": 1.1466355801177942e-06, "loss": 0.8714, "step": 39270 }, { "epoch": 2.3738442013658063, "grad_norm": 0.6600240251580102, "learning_rate": 1.144513868515315e-06, "loss": 0.9039, "step": 39280 }, { "epoch": 2.37444854052094, "grad_norm": 0.6775966793398034, "learning_rate": 1.1423938679841834e-06, "loss": 0.8978, "step": 39290 }, { "epoch": 2.3750528796760744, "grad_norm": 0.6575718128032229, "learning_rate": 1.1402755794652554e-06, "loss": 0.886, "step": 39300 }, { "epoch": 2.375657218831208, "grad_norm": 0.6841897395708637, "learning_rate": 1.1381590038986319e-06, "loss": 0.8884, "step": 39310 }, { "epoch": 2.376261557986342, "grad_norm": 0.7246653885076295, "learning_rate": 1.136044142223655e-06, "loss": 0.8934, "step": 39320 }, { "epoch": 2.376865897141476, "grad_norm": 0.6588294289115753, "learning_rate": 1.133930995378902e-06, "loss": 0.9049, "step": 39330 }, { "epoch": 2.3774702362966096, "grad_norm": 0.6488178551631584, "learning_rate": 1.1318195643021902e-06, "loss": 0.8884, "step": 39340 }, { "epoch": 2.3780745754517434, "grad_norm": 0.83890880199567, "learning_rate": 1.1297098499305798e-06, "loss": 0.8731, "step": 39350 }, { "epoch": 2.378678914606877, "grad_norm": 0.8835444414153574, "learning_rate": 1.1276018532003613e-06, "loss": 0.8721, "step": 39360 }, { "epoch": 2.3792832537620114, "grad_norm": 0.8018377844467994, "learning_rate": 1.1254955750470698e-06, "loss": 0.8777, "step": 39370 }, { "epoch": 2.3798875929171452, "grad_norm": 1.0536293340339764, "learning_rate": 1.1233910164054763e-06, "loss": 0.8708, "step": 39380 }, { "epoch": 2.380491932072279, "grad_norm": 0.9106179478752364, "learning_rate": 1.1212881782095842e-06, "loss": 0.8702, "step": 39390 }, { "epoch": 2.381096271227413, "grad_norm": 0.6186197212473284, "learning_rate": 1.1191870613926398e-06, "loss": 0.8834, "step": 39400 }, { "epoch": 2.3817006103825467, "grad_norm": 0.5803289380955238, "learning_rate": 1.11708766688712e-06, "loss": 0.9057, "step": 39410 }, { "epoch": 2.3823049495376805, "grad_norm": 0.5947504156256807, "learning_rate": 1.1149899956247418e-06, "loss": 0.889, "step": 39420 }, { "epoch": 2.3829092886928143, "grad_norm": 0.5722482574353475, "learning_rate": 1.112894048536453e-06, "loss": 0.8937, "step": 39430 }, { "epoch": 2.3835136278479485, "grad_norm": 0.6317931416178177, "learning_rate": 1.1107998265524394e-06, "loss": 0.9303, "step": 39440 }, { "epoch": 2.3841179670030823, "grad_norm": 0.6046702724342344, "learning_rate": 1.1087073306021222e-06, "loss": 0.871, "step": 39450 }, { "epoch": 2.384722306158216, "grad_norm": 0.5660446702728901, "learning_rate": 1.1066165616141516e-06, "loss": 0.8761, "step": 39460 }, { "epoch": 2.38532664531335, "grad_norm": 0.5881350501581389, "learning_rate": 1.1045275205164157e-06, "loss": 0.8777, "step": 39470 }, { "epoch": 2.3859309844684837, "grad_norm": 0.6062244346918683, "learning_rate": 1.1024402082360363e-06, "loss": 0.8708, "step": 39480 }, { "epoch": 2.3865353236236175, "grad_norm": 0.6617265414934231, "learning_rate": 1.100354625699363e-06, "loss": 0.8849, "step": 39490 }, { "epoch": 2.3871396627787513, "grad_norm": 0.5730605062061782, "learning_rate": 1.0982707738319832e-06, "loss": 0.8811, "step": 39500 }, { "epoch": 2.387744001933885, "grad_norm": 0.6072893979708712, "learning_rate": 1.0961886535587142e-06, "loss": 0.888, "step": 39510 }, { "epoch": 2.388348341089019, "grad_norm": 0.612568042548487, "learning_rate": 1.0941082658036018e-06, "loss": 0.8733, "step": 39520 }, { "epoch": 2.388952680244153, "grad_norm": 0.6192737226747179, "learning_rate": 1.0920296114899275e-06, "loss": 0.8851, "step": 39530 }, { "epoch": 2.389557019399287, "grad_norm": 0.6149845734577145, "learning_rate": 1.089952691540203e-06, "loss": 0.8726, "step": 39540 }, { "epoch": 2.390161358554421, "grad_norm": 0.6106027207908674, "learning_rate": 1.0878775068761666e-06, "loss": 0.9031, "step": 39550 }, { "epoch": 2.3907656977095546, "grad_norm": 0.5817264493192338, "learning_rate": 1.085804058418789e-06, "loss": 0.8889, "step": 39560 }, { "epoch": 2.3913700368646884, "grad_norm": 0.5719520120056023, "learning_rate": 1.0837323470882727e-06, "loss": 0.874, "step": 39570 }, { "epoch": 2.391974376019822, "grad_norm": 0.60121580275604, "learning_rate": 1.0816623738040432e-06, "loss": 0.8928, "step": 39580 }, { "epoch": 2.392578715174956, "grad_norm": 0.5424523460294342, "learning_rate": 1.0795941394847604e-06, "loss": 0.887, "step": 39590 }, { "epoch": 2.3931830543300903, "grad_norm": 0.5837087164209533, "learning_rate": 1.0775276450483108e-06, "loss": 0.8938, "step": 39600 }, { "epoch": 2.393787393485224, "grad_norm": 0.545301989808708, "learning_rate": 1.0754628914118055e-06, "loss": 0.8564, "step": 39610 }, { "epoch": 2.394391732640358, "grad_norm": 0.5676700215894412, "learning_rate": 1.0733998794915873e-06, "loss": 0.8823, "step": 39620 }, { "epoch": 2.3949960717954917, "grad_norm": 0.5776790473308407, "learning_rate": 1.0713386102032258e-06, "loss": 0.8815, "step": 39630 }, { "epoch": 2.3956004109506255, "grad_norm": 0.5829049078485785, "learning_rate": 1.0692790844615131e-06, "loss": 0.8918, "step": 39640 }, { "epoch": 2.3962047501057593, "grad_norm": 0.6222032243704987, "learning_rate": 1.0672213031804718e-06, "loss": 0.8856, "step": 39650 }, { "epoch": 2.396809089260893, "grad_norm": 0.6482413632166244, "learning_rate": 1.0651652672733497e-06, "loss": 0.8862, "step": 39660 }, { "epoch": 2.397413428416027, "grad_norm": 0.6234854921688038, "learning_rate": 1.0631109776526166e-06, "loss": 0.8738, "step": 39670 }, { "epoch": 2.3980177675711607, "grad_norm": 0.6025737863212187, "learning_rate": 1.0610584352299724e-06, "loss": 0.9005, "step": 39680 }, { "epoch": 2.398622106726295, "grad_norm": 0.5943390356962067, "learning_rate": 1.059007640916338e-06, "loss": 0.8815, "step": 39690 }, { "epoch": 2.3992264458814287, "grad_norm": 0.6665130767977476, "learning_rate": 1.056958595621857e-06, "loss": 0.8803, "step": 39700 }, { "epoch": 2.3998307850365626, "grad_norm": 0.6586745122537215, "learning_rate": 1.0549113002559013e-06, "loss": 0.9071, "step": 39710 }, { "epoch": 2.4004351241916964, "grad_norm": 0.6972941100277277, "learning_rate": 1.052865755727066e-06, "loss": 0.887, "step": 39720 }, { "epoch": 2.40103946334683, "grad_norm": 0.6553379223778856, "learning_rate": 1.0508219629431631e-06, "loss": 0.9029, "step": 39730 }, { "epoch": 2.401643802501964, "grad_norm": 0.7160502767925534, "learning_rate": 1.0487799228112344e-06, "loss": 0.892, "step": 39740 }, { "epoch": 2.4022481416570978, "grad_norm": 0.6923967937788605, "learning_rate": 1.046739636237541e-06, "loss": 0.8695, "step": 39750 }, { "epoch": 2.402852480812232, "grad_norm": 0.7185550380353858, "learning_rate": 1.0447011041275629e-06, "loss": 0.8833, "step": 39760 }, { "epoch": 2.403456819967366, "grad_norm": 0.7149292759973592, "learning_rate": 1.0426643273860055e-06, "loss": 0.876, "step": 39770 }, { "epoch": 2.4040611591224996, "grad_norm": 0.6958120885525948, "learning_rate": 1.0406293069167944e-06, "loss": 0.8786, "step": 39780 }, { "epoch": 2.4046654982776334, "grad_norm": 0.7545752472801516, "learning_rate": 1.0385960436230762e-06, "loss": 0.8963, "step": 39790 }, { "epoch": 2.4052698374327672, "grad_norm": 0.7096128021795056, "learning_rate": 1.0365645384072137e-06, "loss": 0.8793, "step": 39800 }, { "epoch": 2.405874176587901, "grad_norm": 0.6790031370634885, "learning_rate": 1.0345347921707942e-06, "loss": 0.8669, "step": 39810 }, { "epoch": 2.406478515743035, "grad_norm": 0.6656644782473132, "learning_rate": 1.0325068058146232e-06, "loss": 0.901, "step": 39820 }, { "epoch": 2.407082854898169, "grad_norm": 0.6415335294265717, "learning_rate": 1.0304805802387225e-06, "loss": 0.8818, "step": 39830 }, { "epoch": 2.407687194053303, "grad_norm": 0.7207569061893038, "learning_rate": 1.0284561163423356e-06, "loss": 0.8773, "step": 39840 }, { "epoch": 2.4082915332084367, "grad_norm": 0.8244458612890934, "learning_rate": 1.0264334150239247e-06, "loss": 0.8803, "step": 39850 }, { "epoch": 2.4088958723635705, "grad_norm": 0.894079612005884, "learning_rate": 1.0244124771811648e-06, "loss": 0.9099, "step": 39860 }, { "epoch": 2.4095002115187043, "grad_norm": 0.938144724272759, "learning_rate": 1.022393303710953e-06, "loss": 0.9016, "step": 39870 }, { "epoch": 2.410104550673838, "grad_norm": 0.8668587694656729, "learning_rate": 1.020375895509404e-06, "loss": 0.8839, "step": 39880 }, { "epoch": 2.410708889828972, "grad_norm": 0.8593237257260798, "learning_rate": 1.0183602534718435e-06, "loss": 0.8946, "step": 39890 }, { "epoch": 2.4113132289841057, "grad_norm": 0.812151659495348, "learning_rate": 1.0163463784928184e-06, "loss": 0.8803, "step": 39900 }, { "epoch": 2.4119175681392395, "grad_norm": 0.7971137937921556, "learning_rate": 1.0143342714660914e-06, "loss": 0.8713, "step": 39910 }, { "epoch": 2.4125219072943738, "grad_norm": 0.8368856847598072, "learning_rate": 1.0123239332846362e-06, "loss": 0.8829, "step": 39920 }, { "epoch": 2.4131262464495076, "grad_norm": 0.8022886361565047, "learning_rate": 1.0103153648406477e-06, "loss": 0.9055, "step": 39930 }, { "epoch": 2.4137305856046414, "grad_norm": 0.8243992837945359, "learning_rate": 1.0083085670255283e-06, "loss": 0.8922, "step": 39940 }, { "epoch": 2.414334924759775, "grad_norm": 1.2176967055080188, "learning_rate": 1.0063035407299016e-06, "loss": 0.8728, "step": 39950 }, { "epoch": 2.414939263914909, "grad_norm": 1.253705674463632, "learning_rate": 1.0043002868435991e-06, "loss": 0.8907, "step": 39960 }, { "epoch": 2.415543603070043, "grad_norm": 1.2232482221576086, "learning_rate": 1.0022988062556704e-06, "loss": 0.8921, "step": 39970 }, { "epoch": 2.4161479422251766, "grad_norm": 1.2568056364655433, "learning_rate": 1.0002990998543737e-06, "loss": 0.8961, "step": 39980 }, { "epoch": 2.416752281380311, "grad_norm": 1.228900319126794, "learning_rate": 9.983011685271832e-07, "loss": 0.8949, "step": 39990 }, { "epoch": 2.4173566205354446, "grad_norm": 0.7977047349015124, "learning_rate": 9.963050131607865e-07, "loss": 0.8875, "step": 40000 }, { "epoch": 2.4179609596905784, "grad_norm": 0.9469623879299512, "learning_rate": 9.943106346410769e-07, "loss": 0.8691, "step": 40010 }, { "epoch": 2.4185652988457123, "grad_norm": 1.01828106855766, "learning_rate": 9.923180338531651e-07, "loss": 0.8754, "step": 40020 }, { "epoch": 2.419169638000846, "grad_norm": 1.019067869588305, "learning_rate": 9.903272116813717e-07, "loss": 0.8693, "step": 40030 }, { "epoch": 2.41977397715598, "grad_norm": 1.2427818593476803, "learning_rate": 9.883381690092248e-07, "loss": 0.8637, "step": 40040 }, { "epoch": 2.4203783163111137, "grad_norm": 2.51751257552718, "learning_rate": 9.863509067194653e-07, "loss": 0.8688, "step": 40050 }, { "epoch": 2.4209826554662475, "grad_norm": 2.577179746751611, "learning_rate": 9.84365425694046e-07, "loss": 0.9009, "step": 40060 }, { "epoch": 2.4215869946213813, "grad_norm": 2.501405424447074, "learning_rate": 9.823817268141233e-07, "loss": 0.8959, "step": 40070 }, { "epoch": 2.4221913337765155, "grad_norm": 2.88040941430708, "learning_rate": 9.803998109600681e-07, "loss": 0.8703, "step": 40080 }, { "epoch": 2.4227956729316493, "grad_norm": 2.5613001107528564, "learning_rate": 9.78419679011458e-07, "loss": 0.8895, "step": 40090 }, { "epoch": 2.423400012086783, "grad_norm": 0.9349367631368284, "learning_rate": 9.7644133184708e-07, "loss": 0.8759, "step": 40100 }, { "epoch": 2.424004351241917, "grad_norm": 1.009146913955769, "learning_rate": 9.744647703449256e-07, "loss": 0.9112, "step": 40110 }, { "epoch": 2.4246086903970507, "grad_norm": 1.013591341517496, "learning_rate": 9.72489995382197e-07, "loss": 0.881, "step": 40120 }, { "epoch": 2.4252130295521845, "grad_norm": 0.9612964801090413, "learning_rate": 9.705170078353043e-07, "loss": 0.8847, "step": 40130 }, { "epoch": 2.4258173687073183, "grad_norm": 0.9528065448293137, "learning_rate": 9.685458085798604e-07, "loss": 0.8781, "step": 40140 }, { "epoch": 2.4264217078624526, "grad_norm": 1.0473877532677942, "learning_rate": 9.665763984906878e-07, "loss": 0.88, "step": 40150 }, { "epoch": 2.4270260470175864, "grad_norm": 1.0291073107156665, "learning_rate": 9.646087784418157e-07, "loss": 0.8805, "step": 40160 }, { "epoch": 2.42763038617272, "grad_norm": 1.0003495739477302, "learning_rate": 9.626429493064743e-07, "loss": 0.885, "step": 40170 }, { "epoch": 2.428234725327854, "grad_norm": 1.0532050241340538, "learning_rate": 9.606789119571047e-07, "loss": 0.8901, "step": 40180 }, { "epoch": 2.428839064482988, "grad_norm": 0.9979484167797138, "learning_rate": 9.587166672653475e-07, "loss": 0.888, "step": 40190 }, { "epoch": 2.4294434036381216, "grad_norm": 1.0325258340420072, "learning_rate": 9.567562161020534e-07, "loss": 0.8843, "step": 40200 }, { "epoch": 2.4300477427932554, "grad_norm": 1.1206055926553746, "learning_rate": 9.5479755933727e-07, "loss": 0.9071, "step": 40210 }, { "epoch": 2.4306520819483897, "grad_norm": 1.0530992969734247, "learning_rate": 9.52840697840256e-07, "loss": 0.88, "step": 40220 }, { "epoch": 2.4312564211035235, "grad_norm": 0.9979372713695156, "learning_rate": 9.508856324794674e-07, "loss": 0.8765, "step": 40230 }, { "epoch": 2.4318607602586573, "grad_norm": 1.0181891717185223, "learning_rate": 9.489323641225667e-07, "loss": 0.8849, "step": 40240 }, { "epoch": 2.432465099413791, "grad_norm": 1.1239773592157096, "learning_rate": 9.469808936364189e-07, "loss": 0.8871, "step": 40250 }, { "epoch": 2.433069438568925, "grad_norm": 1.0714133073879752, "learning_rate": 9.450312218870878e-07, "loss": 0.8983, "step": 40260 }, { "epoch": 2.4336737777240587, "grad_norm": 1.1042796970049513, "learning_rate": 9.430833497398417e-07, "loss": 0.8827, "step": 40270 }, { "epoch": 2.4342781168791925, "grad_norm": 1.085088893401994, "learning_rate": 9.41137278059151e-07, "loss": 0.9013, "step": 40280 }, { "epoch": 2.4348824560343263, "grad_norm": 1.037640719786448, "learning_rate": 9.391930077086837e-07, "loss": 0.8811, "step": 40290 }, { "epoch": 2.43548679518946, "grad_norm": 1.1339613588055377, "learning_rate": 9.372505395513104e-07, "loss": 0.8824, "step": 40300 }, { "epoch": 2.4360911343445943, "grad_norm": 1.1968594944459072, "learning_rate": 9.353098744491046e-07, "loss": 0.8694, "step": 40310 }, { "epoch": 2.436695473499728, "grad_norm": 1.1535253230895357, "learning_rate": 9.333710132633328e-07, "loss": 0.8801, "step": 40320 }, { "epoch": 2.437299812654862, "grad_norm": 1.1101329881597528, "learning_rate": 9.314339568544673e-07, "loss": 0.8847, "step": 40330 }, { "epoch": 2.4379041518099958, "grad_norm": 1.1202104735969172, "learning_rate": 9.294987060821775e-07, "loss": 0.8817, "step": 40340 }, { "epoch": 2.4385084909651296, "grad_norm": 1.1472877529301357, "learning_rate": 9.275652618053294e-07, "loss": 0.8971, "step": 40350 }, { "epoch": 2.4391128301202634, "grad_norm": 1.14996637819073, "learning_rate": 9.256336248819891e-07, "loss": 0.8818, "step": 40360 }, { "epoch": 2.439717169275397, "grad_norm": 0.9841776466953165, "learning_rate": 9.237037961694223e-07, "loss": 0.8831, "step": 40370 }, { "epoch": 2.4403215084305314, "grad_norm": 1.0384358238508726, "learning_rate": 9.217757765240876e-07, "loss": 0.8866, "step": 40380 }, { "epoch": 2.440925847585665, "grad_norm": 0.9709337804878487, "learning_rate": 9.19849566801645e-07, "loss": 0.8893, "step": 40390 }, { "epoch": 2.441530186740799, "grad_norm": 1.002710911705235, "learning_rate": 9.179251678569501e-07, "loss": 0.9143, "step": 40400 }, { "epoch": 2.442134525895933, "grad_norm": 1.0570992077629024, "learning_rate": 9.160025805440526e-07, "loss": 0.8816, "step": 40410 }, { "epoch": 2.4427388650510666, "grad_norm": 1.0381080922769719, "learning_rate": 9.14081805716201e-07, "loss": 0.8748, "step": 40420 }, { "epoch": 2.4433432042062004, "grad_norm": 1.1464506880121867, "learning_rate": 9.121628442258396e-07, "loss": 0.8737, "step": 40430 }, { "epoch": 2.4439475433613342, "grad_norm": 1.024202977282041, "learning_rate": 9.102456969246048e-07, "loss": 0.8936, "step": 40440 }, { "epoch": 2.4445518825164685, "grad_norm": 1.0392841490146056, "learning_rate": 9.083303646633323e-07, "loss": 0.8824, "step": 40450 }, { "epoch": 2.4451562216716023, "grad_norm": 0.9892722915030926, "learning_rate": 9.064168482920472e-07, "loss": 0.8893, "step": 40460 }, { "epoch": 2.445760560826736, "grad_norm": 1.0548562648819615, "learning_rate": 9.045051486599732e-07, "loss": 0.881, "step": 40470 }, { "epoch": 2.44636489998187, "grad_norm": 1.1086944823651066, "learning_rate": 9.025952666155242e-07, "loss": 0.8829, "step": 40480 }, { "epoch": 2.4469692391370037, "grad_norm": 1.0127080923354776, "learning_rate": 9.006872030063101e-07, "loss": 0.906, "step": 40490 }, { "epoch": 2.4475735782921375, "grad_norm": 1.2698491151972442, "learning_rate": 8.987809586791341e-07, "loss": 0.8713, "step": 40500 }, { "epoch": 2.4481779174472713, "grad_norm": 1.1991252703491568, "learning_rate": 8.968765344799879e-07, "loss": 0.8815, "step": 40510 }, { "epoch": 2.448782256602405, "grad_norm": 1.2431552513571238, "learning_rate": 8.949739312540601e-07, "loss": 0.8935, "step": 40520 }, { "epoch": 2.449386595757539, "grad_norm": 1.1432295897574343, "learning_rate": 8.930731498457296e-07, "loss": 0.844, "step": 40530 }, { "epoch": 2.449990934912673, "grad_norm": 1.2028449447048442, "learning_rate": 8.91174191098565e-07, "loss": 0.888, "step": 40540 }, { "epoch": 2.450595274067807, "grad_norm": 1.1523326084546026, "learning_rate": 8.892770558553276e-07, "loss": 0.8849, "step": 40550 }, { "epoch": 2.4511996132229408, "grad_norm": 1.1790556802267356, "learning_rate": 8.873817449579713e-07, "loss": 0.8823, "step": 40560 }, { "epoch": 2.4518039523780746, "grad_norm": 1.1555191092906105, "learning_rate": 8.85488259247636e-07, "loss": 0.9068, "step": 40570 }, { "epoch": 2.4524082915332084, "grad_norm": 1.082071714979443, "learning_rate": 8.835965995646544e-07, "loss": 0.8872, "step": 40580 }, { "epoch": 2.453012630688342, "grad_norm": 1.1836829788332326, "learning_rate": 8.817067667485496e-07, "loss": 0.8915, "step": 40590 }, { "epoch": 2.453616969843476, "grad_norm": 1.274098974946871, "learning_rate": 8.798187616380311e-07, "loss": 0.8847, "step": 40600 }, { "epoch": 2.4542213089986102, "grad_norm": 1.2647313993593425, "learning_rate": 8.779325850709991e-07, "loss": 0.8868, "step": 40610 }, { "epoch": 2.454825648153744, "grad_norm": 1.2301879884850013, "learning_rate": 8.760482378845431e-07, "loss": 0.8937, "step": 40620 }, { "epoch": 2.455429987308878, "grad_norm": 1.173147950355316, "learning_rate": 8.741657209149379e-07, "loss": 0.8926, "step": 40630 }, { "epoch": 2.4560343264640117, "grad_norm": 1.1804737786496446, "learning_rate": 8.722850349976492e-07, "loss": 0.8568, "step": 40640 }, { "epoch": 2.4566386656191455, "grad_norm": 1.4567761868521123, "learning_rate": 8.704061809673286e-07, "loss": 0.8648, "step": 40650 }, { "epoch": 2.4572430047742793, "grad_norm": 1.532856362103709, "learning_rate": 8.685291596578132e-07, "loss": 0.917, "step": 40660 }, { "epoch": 2.457847343929413, "grad_norm": 1.4067824423288466, "learning_rate": 8.666539719021288e-07, "loss": 0.8937, "step": 40670 }, { "epoch": 2.458451683084547, "grad_norm": 1.4894543584209463, "learning_rate": 8.647806185324892e-07, "loss": 0.9166, "step": 40680 }, { "epoch": 2.4590560222396807, "grad_norm": 1.4080180127720432, "learning_rate": 8.629091003802892e-07, "loss": 0.8674, "step": 40690 }, { "epoch": 2.459660361394815, "grad_norm": 0.9550241482039177, "learning_rate": 8.610394182761128e-07, "loss": 0.8789, "step": 40700 }, { "epoch": 2.4602647005499487, "grad_norm": 0.9170315185809859, "learning_rate": 8.591715730497275e-07, "loss": 0.8914, "step": 40710 }, { "epoch": 2.4608690397050825, "grad_norm": 0.9114295602852198, "learning_rate": 8.573055655300877e-07, "loss": 0.8684, "step": 40720 }, { "epoch": 2.4614733788602163, "grad_norm": 0.905964321453953, "learning_rate": 8.554413965453284e-07, "loss": 0.8666, "step": 40730 }, { "epoch": 2.46207771801535, "grad_norm": 0.9862078466810149, "learning_rate": 8.535790669227734e-07, "loss": 0.873, "step": 40740 }, { "epoch": 2.462682057170484, "grad_norm": 0.8102293660945225, "learning_rate": 8.517185774889253e-07, "loss": 0.9058, "step": 40750 }, { "epoch": 2.4632863963256177, "grad_norm": 0.8671735721374867, "learning_rate": 8.498599290694737e-07, "loss": 0.8935, "step": 40760 }, { "epoch": 2.463890735480752, "grad_norm": 0.9155077692543427, "learning_rate": 8.480031224892903e-07, "loss": 0.8999, "step": 40770 }, { "epoch": 2.464495074635886, "grad_norm": 0.8800269468239498, "learning_rate": 8.461481585724302e-07, "loss": 0.9024, "step": 40780 }, { "epoch": 2.4650994137910196, "grad_norm": 0.7967158437233781, "learning_rate": 8.442950381421266e-07, "loss": 0.8722, "step": 40790 }, { "epoch": 2.4657037529461534, "grad_norm": 0.8383316828539763, "learning_rate": 8.424437620207999e-07, "loss": 0.869, "step": 40800 }, { "epoch": 2.466308092101287, "grad_norm": 0.8556933488620937, "learning_rate": 8.40594331030049e-07, "loss": 0.8976, "step": 40810 }, { "epoch": 2.466912431256421, "grad_norm": 0.8890821834570544, "learning_rate": 8.387467459906534e-07, "loss": 0.8897, "step": 40820 }, { "epoch": 2.467516770411555, "grad_norm": 0.9267038280657559, "learning_rate": 8.369010077225759e-07, "loss": 0.8711, "step": 40830 }, { "epoch": 2.468121109566689, "grad_norm": 0.758990858193993, "learning_rate": 8.350571170449584e-07, "loss": 0.8776, "step": 40840 }, { "epoch": 2.468725448721823, "grad_norm": 0.6752216215519213, "learning_rate": 8.332150747761214e-07, "loss": 0.9009, "step": 40850 }, { "epoch": 2.4693297878769567, "grad_norm": 0.6350222527973223, "learning_rate": 8.313748817335671e-07, "loss": 0.8785, "step": 40860 }, { "epoch": 2.4699341270320905, "grad_norm": 0.6090962901780818, "learning_rate": 8.295365387339777e-07, "loss": 0.857, "step": 40870 }, { "epoch": 2.4705384661872243, "grad_norm": 0.6857908201667366, "learning_rate": 8.277000465932106e-07, "loss": 0.8858, "step": 40880 }, { "epoch": 2.471142805342358, "grad_norm": 0.6642855637090372, "learning_rate": 8.25865406126305e-07, "loss": 0.8839, "step": 40890 }, { "epoch": 2.471747144497492, "grad_norm": 0.7539847947975113, "learning_rate": 8.240326181474789e-07, "loss": 0.8873, "step": 40900 }, { "epoch": 2.4723514836526257, "grad_norm": 0.6629985866564397, "learning_rate": 8.222016834701247e-07, "loss": 0.8835, "step": 40910 }, { "epoch": 2.4729558228077595, "grad_norm": 0.7263953374517576, "learning_rate": 8.203726029068149e-07, "loss": 0.9003, "step": 40920 }, { "epoch": 2.4735601619628937, "grad_norm": 0.6831832263349958, "learning_rate": 8.185453772693003e-07, "loss": 0.8883, "step": 40930 }, { "epoch": 2.4741645011180275, "grad_norm": 0.6567853640438026, "learning_rate": 8.167200073685039e-07, "loss": 0.8729, "step": 40940 }, { "epoch": 2.4747688402731614, "grad_norm": 0.8772422402015878, "learning_rate": 8.14896494014531e-07, "loss": 0.8885, "step": 40950 }, { "epoch": 2.475373179428295, "grad_norm": 0.9163396307998091, "learning_rate": 8.13074838016657e-07, "loss": 0.9038, "step": 40960 }, { "epoch": 2.475977518583429, "grad_norm": 0.9489757718026602, "learning_rate": 8.112550401833385e-07, "loss": 0.898, "step": 40970 }, { "epoch": 2.4765818577385628, "grad_norm": 0.8717296378538169, "learning_rate": 8.09437101322203e-07, "loss": 0.8783, "step": 40980 }, { "epoch": 2.4771861968936966, "grad_norm": 0.8241932637516207, "learning_rate": 8.076210222400566e-07, "loss": 0.8838, "step": 40990 }, { "epoch": 2.477790536048831, "grad_norm": 0.6112766384950763, "learning_rate": 8.058068037428757e-07, "loss": 0.8835, "step": 41000 }, { "epoch": 2.4783948752039646, "grad_norm": 0.5568956321952637, "learning_rate": 8.039944466358157e-07, "loss": 0.8875, "step": 41010 }, { "epoch": 2.4789992143590984, "grad_norm": 0.6046399911718908, "learning_rate": 8.021839517232044e-07, "loss": 0.8825, "step": 41020 }, { "epoch": 2.4796035535142322, "grad_norm": 0.5788564076338315, "learning_rate": 8.003753198085401e-07, "loss": 0.8876, "step": 41030 }, { "epoch": 2.480207892669366, "grad_norm": 0.571656490361716, "learning_rate": 7.98568551694498e-07, "loss": 0.8819, "step": 41040 }, { "epoch": 2.4808122318245, "grad_norm": 0.6007697435233759, "learning_rate": 7.967636481829261e-07, "loss": 0.9112, "step": 41050 }, { "epoch": 2.4814165709796336, "grad_norm": 0.58364374425912, "learning_rate": 7.949606100748414e-07, "loss": 0.8892, "step": 41060 }, { "epoch": 2.4820209101347674, "grad_norm": 0.6208364335185198, "learning_rate": 7.931594381704366e-07, "loss": 0.9108, "step": 41070 }, { "epoch": 2.4826252492899012, "grad_norm": 0.601545399669037, "learning_rate": 7.913601332690757e-07, "loss": 0.8772, "step": 41080 }, { "epoch": 2.4832295884450355, "grad_norm": 0.600802974725024, "learning_rate": 7.895626961692915e-07, "loss": 0.8986, "step": 41090 }, { "epoch": 2.4838339276001693, "grad_norm": 0.607832178324593, "learning_rate": 7.877671276687899e-07, "loss": 0.8651, "step": 41100 }, { "epoch": 2.484438266755303, "grad_norm": 0.6082153709049353, "learning_rate": 7.859734285644483e-07, "loss": 0.8808, "step": 41110 }, { "epoch": 2.485042605910437, "grad_norm": 0.574824514043198, "learning_rate": 7.841815996523138e-07, "loss": 0.8842, "step": 41120 }, { "epoch": 2.4856469450655707, "grad_norm": 0.6439760165299043, "learning_rate": 7.823916417276012e-07, "loss": 0.8874, "step": 41130 }, { "epoch": 2.4862512842207045, "grad_norm": 0.5947019040844654, "learning_rate": 7.80603555584698e-07, "loss": 0.8586, "step": 41140 }, { "epoch": 2.4868556233758383, "grad_norm": 0.6374303381339865, "learning_rate": 7.788173420171613e-07, "loss": 0.9056, "step": 41150 }, { "epoch": 2.4874599625309726, "grad_norm": 0.6085257937966161, "learning_rate": 7.77033001817713e-07, "loss": 0.9013, "step": 41160 }, { "epoch": 2.4880643016861064, "grad_norm": 0.5877577075095965, "learning_rate": 7.752505357782475e-07, "loss": 0.8769, "step": 41170 }, { "epoch": 2.48866864084124, "grad_norm": 0.5851397229057675, "learning_rate": 7.734699446898275e-07, "loss": 0.8672, "step": 41180 }, { "epoch": 2.489272979996374, "grad_norm": 0.6239213774782675, "learning_rate": 7.716912293426804e-07, "loss": 0.877, "step": 41190 }, { "epoch": 2.489877319151508, "grad_norm": 0.5607932588278917, "learning_rate": 7.699143905262052e-07, "loss": 0.873, "step": 41200 }, { "epoch": 2.4904816583066416, "grad_norm": 0.5684729240308406, "learning_rate": 7.681394290289634e-07, "loss": 0.8824, "step": 41210 }, { "epoch": 2.4910859974617754, "grad_norm": 0.5746386240729316, "learning_rate": 7.663663456386889e-07, "loss": 0.8799, "step": 41220 }, { "epoch": 2.4916903366169096, "grad_norm": 0.6256627291758511, "learning_rate": 7.645951411422764e-07, "loss": 0.8767, "step": 41230 }, { "epoch": 2.4922946757720434, "grad_norm": 0.5429073102730873, "learning_rate": 7.628258163257918e-07, "loss": 0.8826, "step": 41240 }, { "epoch": 2.4928990149271772, "grad_norm": 0.6055282159058301, "learning_rate": 7.610583719744625e-07, "loss": 0.8782, "step": 41250 }, { "epoch": 2.493503354082311, "grad_norm": 0.6130839462133468, "learning_rate": 7.592928088726837e-07, "loss": 0.8743, "step": 41260 }, { "epoch": 2.494107693237445, "grad_norm": 0.5888237331924623, "learning_rate": 7.57529127804017e-07, "loss": 0.8615, "step": 41270 }, { "epoch": 2.4947120323925787, "grad_norm": 0.5806026114001572, "learning_rate": 7.557673295511853e-07, "loss": 0.8764, "step": 41280 }, { "epoch": 2.4953163715477125, "grad_norm": 0.5888561537060585, "learning_rate": 7.54007414896078e-07, "loss": 0.8927, "step": 41290 }, { "epoch": 2.4959207107028463, "grad_norm": 0.6635875796227327, "learning_rate": 7.522493846197498e-07, "loss": 0.8931, "step": 41300 }, { "epoch": 2.49652504985798, "grad_norm": 0.7117084599458885, "learning_rate": 7.50493239502415e-07, "loss": 0.867, "step": 41310 }, { "epoch": 2.4971293890131143, "grad_norm": 0.6801507717611001, "learning_rate": 7.487389803234552e-07, "loss": 0.8589, "step": 41320 }, { "epoch": 2.497733728168248, "grad_norm": 0.6690993993859737, "learning_rate": 7.469866078614146e-07, "loss": 0.8773, "step": 41330 }, { "epoch": 2.498338067323382, "grad_norm": 0.6857810396814289, "learning_rate": 7.452361228939975e-07, "loss": 0.8977, "step": 41340 }, { "epoch": 2.4989424064785157, "grad_norm": 0.7193851079727911, "learning_rate": 7.434875261980717e-07, "loss": 0.893, "step": 41350 }, { "epoch": 2.4995467456336495, "grad_norm": 0.7771860953543476, "learning_rate": 7.417408185496699e-07, "loss": 0.8719, "step": 41360 }, { "epoch": 2.5001510847887833, "grad_norm": 0.720979980458029, "learning_rate": 7.399960007239815e-07, "loss": 0.8971, "step": 41370 }, { "epoch": 2.500755423943917, "grad_norm": 0.7508571703625692, "learning_rate": 7.382530734953602e-07, "loss": 0.8547, "step": 41380 }, { "epoch": 2.5013597630990514, "grad_norm": 0.7042127820803035, "learning_rate": 7.365120376373219e-07, "loss": 0.8799, "step": 41390 }, { "epoch": 2.501964102254185, "grad_norm": 0.6328680950770649, "learning_rate": 7.347728939225385e-07, "loss": 0.8674, "step": 41400 }, { "epoch": 2.502568441409319, "grad_norm": 0.6865609669511411, "learning_rate": 7.330356431228464e-07, "loss": 0.8969, "step": 41410 }, { "epoch": 2.503172780564453, "grad_norm": 0.690132915780125, "learning_rate": 7.313002860092411e-07, "loss": 0.8752, "step": 41420 }, { "epoch": 2.5037771197195866, "grad_norm": 0.7021186612842231, "learning_rate": 7.295668233518755e-07, "loss": 0.894, "step": 41430 }, { "epoch": 2.5043814588747204, "grad_norm": 0.6851340951751752, "learning_rate": 7.27835255920064e-07, "loss": 0.8744, "step": 41440 }, { "epoch": 2.504985798029854, "grad_norm": 0.8410684795933202, "learning_rate": 7.261055844822795e-07, "loss": 0.871, "step": 41450 }, { "epoch": 2.5055901371849885, "grad_norm": 0.9147063038305515, "learning_rate": 7.243778098061544e-07, "loss": 0.8814, "step": 41460 }, { "epoch": 2.506194476340122, "grad_norm": 0.8451077764037075, "learning_rate": 7.226519326584746e-07, "loss": 0.8592, "step": 41470 }, { "epoch": 2.506798815495256, "grad_norm": 0.8623229815034531, "learning_rate": 7.209279538051911e-07, "loss": 0.8701, "step": 41480 }, { "epoch": 2.50740315465039, "grad_norm": 0.8513258955684797, "learning_rate": 7.192058740114055e-07, "loss": 0.8706, "step": 41490 }, { "epoch": 2.5080074938055237, "grad_norm": 0.7778377032944958, "learning_rate": 7.174856940413826e-07, "loss": 0.8934, "step": 41500 }, { "epoch": 2.5086118329606575, "grad_norm": 0.8225250114310346, "learning_rate": 7.157674146585386e-07, "loss": 0.8808, "step": 41510 }, { "epoch": 2.5092161721157913, "grad_norm": 0.8286421025559088, "learning_rate": 7.14051036625451e-07, "loss": 0.8767, "step": 41520 }, { "epoch": 2.509820511270925, "grad_norm": 0.8723314979501335, "learning_rate": 7.12336560703849e-07, "loss": 0.8843, "step": 41530 }, { "epoch": 2.510424850426059, "grad_norm": 0.8217767532029071, "learning_rate": 7.106239876546217e-07, "loss": 0.9186, "step": 41540 }, { "epoch": 2.511029189581193, "grad_norm": 1.3176567746767818, "learning_rate": 7.089133182378122e-07, "loss": 0.8891, "step": 41550 }, { "epoch": 2.511633528736327, "grad_norm": 1.2262934350892705, "learning_rate": 7.07204553212617e-07, "loss": 0.91, "step": 41560 }, { "epoch": 2.5122378678914608, "grad_norm": 1.3440158903501784, "learning_rate": 7.054976933373897e-07, "loss": 0.8893, "step": 41570 }, { "epoch": 2.5128422070465946, "grad_norm": 1.233015173445002, "learning_rate": 7.037927393696392e-07, "loss": 0.8624, "step": 41580 }, { "epoch": 2.5134465462017284, "grad_norm": 1.2550374059816882, "learning_rate": 7.020896920660242e-07, "loss": 0.8986, "step": 41590 }, { "epoch": 2.514050885356862, "grad_norm": 0.9814955904656175, "learning_rate": 7.003885521823611e-07, "loss": 0.8833, "step": 41600 }, { "epoch": 2.514655224511996, "grad_norm": 0.905318800146908, "learning_rate": 6.9868932047362e-07, "loss": 0.8778, "step": 41610 }, { "epoch": 2.51525956366713, "grad_norm": 1.1544488775909953, "learning_rate": 6.969919976939205e-07, "loss": 0.8628, "step": 41620 }, { "epoch": 2.515863902822264, "grad_norm": 1.1138147357852284, "learning_rate": 6.952965845965393e-07, "loss": 0.874, "step": 41630 }, { "epoch": 2.516468241977398, "grad_norm": 1.0481084841215758, "learning_rate": 6.936030819339035e-07, "loss": 0.8833, "step": 41640 }, { "epoch": 2.5170725811325316, "grad_norm": 2.4505905400521235, "learning_rate": 6.919114904575908e-07, "loss": 0.8879, "step": 41650 }, { "epoch": 2.5176769202876654, "grad_norm": 2.6286040442490655, "learning_rate": 6.902218109183334e-07, "loss": 0.8895, "step": 41660 }, { "epoch": 2.5182812594427992, "grad_norm": 2.4166159298620835, "learning_rate": 6.885340440660154e-07, "loss": 0.8793, "step": 41670 }, { "epoch": 2.518885598597933, "grad_norm": 2.8147112186088026, "learning_rate": 6.868481906496683e-07, "loss": 0.8787, "step": 41680 }, { "epoch": 2.5194899377530673, "grad_norm": 2.374540844592351, "learning_rate": 6.851642514174778e-07, "loss": 0.9291, "step": 41690 }, { "epoch": 2.5200942769082006, "grad_norm": 0.9637552808959402, "learning_rate": 6.834822271167801e-07, "loss": 0.8771, "step": 41700 }, { "epoch": 2.520698616063335, "grad_norm": 0.9432855559774069, "learning_rate": 6.818021184940582e-07, "loss": 0.8665, "step": 41710 }, { "epoch": 2.5213029552184687, "grad_norm": 0.9156381976104924, "learning_rate": 6.801239262949477e-07, "loss": 0.8757, "step": 41720 }, { "epoch": 2.5219072943736025, "grad_norm": 1.010610201588485, "learning_rate": 6.784476512642357e-07, "loss": 0.8848, "step": 41730 }, { "epoch": 2.5225116335287363, "grad_norm": 0.9177307480678155, "learning_rate": 6.767732941458521e-07, "loss": 0.8508, "step": 41740 }, { "epoch": 2.52311597268387, "grad_norm": 1.0095202052263494, "learning_rate": 6.751008556828831e-07, "loss": 0.8722, "step": 41750 }, { "epoch": 2.523720311839004, "grad_norm": 0.963805904401234, "learning_rate": 6.734303366175565e-07, "loss": 0.8864, "step": 41760 }, { "epoch": 2.5243246509941377, "grad_norm": 1.0159372246572098, "learning_rate": 6.717617376912545e-07, "loss": 0.8843, "step": 41770 }, { "epoch": 2.524928990149272, "grad_norm": 1.0213444690102842, "learning_rate": 6.70095059644501e-07, "loss": 0.8677, "step": 41780 }, { "epoch": 2.5255333293044058, "grad_norm": 1.0811498914152846, "learning_rate": 6.684303032169732e-07, "loss": 0.891, "step": 41790 }, { "epoch": 2.5261376684595396, "grad_norm": 1.059908932999426, "learning_rate": 6.667674691474923e-07, "loss": 0.8971, "step": 41800 }, { "epoch": 2.5267420076146734, "grad_norm": 1.1106257810913194, "learning_rate": 6.651065581740257e-07, "loss": 0.9171, "step": 41810 }, { "epoch": 2.527346346769807, "grad_norm": 1.0327366653796355, "learning_rate": 6.634475710336891e-07, "loss": 0.881, "step": 41820 }, { "epoch": 2.527950685924941, "grad_norm": 0.9996554903594473, "learning_rate": 6.617905084627452e-07, "loss": 0.8947, "step": 41830 }, { "epoch": 2.528555025080075, "grad_norm": 1.0182575251216492, "learning_rate": 6.601353711965991e-07, "loss": 0.8815, "step": 41840 }, { "epoch": 2.529159364235209, "grad_norm": 1.052178439056579, "learning_rate": 6.584821599698043e-07, "loss": 0.8888, "step": 41850 }, { "epoch": 2.5297637033903424, "grad_norm": 1.1635945387098245, "learning_rate": 6.568308755160607e-07, "loss": 0.8844, "step": 41860 }, { "epoch": 2.5303680425454766, "grad_norm": 1.039975162307649, "learning_rate": 6.551815185682076e-07, "loss": 0.8893, "step": 41870 }, { "epoch": 2.5309723817006105, "grad_norm": 1.1949681295089856, "learning_rate": 6.535340898582348e-07, "loss": 0.9057, "step": 41880 }, { "epoch": 2.5315767208557443, "grad_norm": 1.1497988985003074, "learning_rate": 6.518885901172744e-07, "loss": 0.9093, "step": 41890 }, { "epoch": 2.532181060010878, "grad_norm": 1.212665791374069, "learning_rate": 6.502450200756005e-07, "loss": 0.8834, "step": 41900 }, { "epoch": 2.532785399166012, "grad_norm": 1.101735350294155, "learning_rate": 6.486033804626324e-07, "loss": 0.8831, "step": 41910 }, { "epoch": 2.5333897383211457, "grad_norm": 1.098056204967435, "learning_rate": 6.469636720069344e-07, "loss": 0.8884, "step": 41920 }, { "epoch": 2.5339940774762795, "grad_norm": 1.0972572497456845, "learning_rate": 6.453258954362091e-07, "loss": 0.8803, "step": 41930 }, { "epoch": 2.5345984166314137, "grad_norm": 1.1375922649610486, "learning_rate": 6.436900514773064e-07, "loss": 0.882, "step": 41940 }, { "epoch": 2.5352027557865475, "grad_norm": 1.0008639292983053, "learning_rate": 6.420561408562176e-07, "loss": 0.8748, "step": 41950 }, { "epoch": 2.5358070949416813, "grad_norm": 1.0625468113826864, "learning_rate": 6.404241642980719e-07, "loss": 0.8733, "step": 41960 }, { "epoch": 2.536411434096815, "grad_norm": 1.0124885960100483, "learning_rate": 6.387941225271454e-07, "loss": 0.8765, "step": 41970 }, { "epoch": 2.537015773251949, "grad_norm": 1.0346608631156706, "learning_rate": 6.371660162668547e-07, "loss": 0.9161, "step": 41980 }, { "epoch": 2.5376201124070827, "grad_norm": 1.0502572790662368, "learning_rate": 6.355398462397533e-07, "loss": 0.8675, "step": 41990 }, { "epoch": 2.5382244515622165, "grad_norm": 1.1584241430956357, "learning_rate": 6.3391561316754e-07, "loss": 0.8997, "step": 42000 }, { "epoch": 2.538828790717351, "grad_norm": 1.1483278089411093, "learning_rate": 6.322933177710511e-07, "loss": 0.876, "step": 42010 }, { "epoch": 2.5394331298724846, "grad_norm": 1.0485668810213669, "learning_rate": 6.30672960770266e-07, "loss": 0.881, "step": 42020 }, { "epoch": 2.5400374690276184, "grad_norm": 1.1212436329522641, "learning_rate": 6.290545428842992e-07, "loss": 0.8906, "step": 42030 }, { "epoch": 2.540641808182752, "grad_norm": 1.0908228516654648, "learning_rate": 6.274380648314099e-07, "loss": 0.8777, "step": 42040 }, { "epoch": 2.541246147337886, "grad_norm": 1.023040896025016, "learning_rate": 6.25823527328992e-07, "loss": 0.8981, "step": 42050 }, { "epoch": 2.54185048649302, "grad_norm": 1.1038122647020336, "learning_rate": 6.242109310935801e-07, "loss": 0.9005, "step": 42060 }, { "epoch": 2.5424548256481536, "grad_norm": 1.095077367570081, "learning_rate": 6.22600276840849e-07, "loss": 0.889, "step": 42070 }, { "epoch": 2.543059164803288, "grad_norm": 1.0331561983218978, "learning_rate": 6.209915652856074e-07, "loss": 0.9056, "step": 42080 }, { "epoch": 2.5436635039584212, "grad_norm": 1.0257322683385564, "learning_rate": 6.19384797141806e-07, "loss": 0.8698, "step": 42090 }, { "epoch": 2.5442678431135555, "grad_norm": 1.2304851630039246, "learning_rate": 6.177799731225315e-07, "loss": 0.8865, "step": 42100 }, { "epoch": 2.5448721822686893, "grad_norm": 1.1305342969891252, "learning_rate": 6.161770939400052e-07, "loss": 0.8599, "step": 42110 }, { "epoch": 2.545476521423823, "grad_norm": 1.2080891922429504, "learning_rate": 6.145761603055894e-07, "loss": 0.8815, "step": 42120 }, { "epoch": 2.546080860578957, "grad_norm": 1.2639039914736894, "learning_rate": 6.129771729297806e-07, "loss": 0.8802, "step": 42130 }, { "epoch": 2.5466851997340907, "grad_norm": 1.222373367090069, "learning_rate": 6.11380132522214e-07, "loss": 0.8758, "step": 42140 }, { "epoch": 2.5472895388892245, "grad_norm": 1.2147533764602698, "learning_rate": 6.097850397916561e-07, "loss": 0.8916, "step": 42150 }, { "epoch": 2.5478938780443583, "grad_norm": 1.1105430745799367, "learning_rate": 6.081918954460131e-07, "loss": 0.8801, "step": 42160 }, { "epoch": 2.5484982171994925, "grad_norm": 1.1327420871134162, "learning_rate": 6.066007001923263e-07, "loss": 0.8708, "step": 42170 }, { "epoch": 2.5491025563546263, "grad_norm": 1.0907110284284494, "learning_rate": 6.050114547367681e-07, "loss": 0.8772, "step": 42180 }, { "epoch": 2.54970689550976, "grad_norm": 1.1736055008192041, "learning_rate": 6.034241597846502e-07, "loss": 0.8844, "step": 42190 }, { "epoch": 2.550311234664894, "grad_norm": 1.1778309924846977, "learning_rate": 6.018388160404176e-07, "loss": 0.8752, "step": 42200 }, { "epoch": 2.5509155738200278, "grad_norm": 1.2295835842125538, "learning_rate": 6.002554242076464e-07, "loss": 0.8853, "step": 42210 }, { "epoch": 2.5515199129751616, "grad_norm": 1.1878022088962958, "learning_rate": 5.986739849890499e-07, "loss": 0.903, "step": 42220 }, { "epoch": 2.5521242521302954, "grad_norm": 1.272235646274948, "learning_rate": 5.970944990864747e-07, "loss": 0.8968, "step": 42230 }, { "epoch": 2.5527285912854296, "grad_norm": 1.1609381699830803, "learning_rate": 5.955169672008965e-07, "loss": 0.8673, "step": 42240 }, { "epoch": 2.553332930440563, "grad_norm": 1.436489165843789, "learning_rate": 5.939413900324292e-07, "loss": 0.865, "step": 42250 }, { "epoch": 2.5539372695956972, "grad_norm": 1.557865762308893, "learning_rate": 5.923677682803148e-07, "loss": 0.8939, "step": 42260 }, { "epoch": 2.554541608750831, "grad_norm": 1.4514644540897934, "learning_rate": 5.907961026429304e-07, "loss": 0.8742, "step": 42270 }, { "epoch": 2.555145947905965, "grad_norm": 1.4775760792547543, "learning_rate": 5.892263938177833e-07, "loss": 0.897, "step": 42280 }, { "epoch": 2.5557502870610986, "grad_norm": 1.5837154702920653, "learning_rate": 5.87658642501514e-07, "loss": 0.8827, "step": 42290 }, { "epoch": 2.5563546262162324, "grad_norm": 0.975337285406159, "learning_rate": 5.860928493898921e-07, "loss": 0.8759, "step": 42300 }, { "epoch": 2.5569589653713667, "grad_norm": 0.9434887563252474, "learning_rate": 5.845290151778188e-07, "loss": 0.8999, "step": 42310 }, { "epoch": 2.5575633045265, "grad_norm": 0.9366206325953544, "learning_rate": 5.82967140559329e-07, "loss": 0.8619, "step": 42320 }, { "epoch": 2.5581676436816343, "grad_norm": 0.9107071311363936, "learning_rate": 5.814072262275822e-07, "loss": 0.8843, "step": 42330 }, { "epoch": 2.558771982836768, "grad_norm": 0.9325762069290423, "learning_rate": 5.798492728748729e-07, "loss": 0.8765, "step": 42340 }, { "epoch": 2.559376321991902, "grad_norm": 0.9158779710565561, "learning_rate": 5.782932811926234e-07, "loss": 0.8764, "step": 42350 }, { "epoch": 2.5599806611470357, "grad_norm": 0.848763542101446, "learning_rate": 5.767392518713844e-07, "loss": 0.8982, "step": 42360 }, { "epoch": 2.5605850003021695, "grad_norm": 0.8122214444589134, "learning_rate": 5.751871856008373e-07, "loss": 0.8838, "step": 42370 }, { "epoch": 2.5611893394573033, "grad_norm": 0.8983281544339264, "learning_rate": 5.736370830697929e-07, "loss": 0.894, "step": 42380 }, { "epoch": 2.561793678612437, "grad_norm": 0.9133833263149227, "learning_rate": 5.720889449661876e-07, "loss": 0.8904, "step": 42390 }, { "epoch": 2.5623980177675714, "grad_norm": 0.8947611375204686, "learning_rate": 5.705427719770884e-07, "loss": 0.8994, "step": 42400 }, { "epoch": 2.563002356922705, "grad_norm": 0.8521615667422683, "learning_rate": 5.689985647886909e-07, "loss": 0.8798, "step": 42410 }, { "epoch": 2.563606696077839, "grad_norm": 0.9114040080793261, "learning_rate": 5.674563240863146e-07, "loss": 0.8889, "step": 42420 }, { "epoch": 2.564211035232973, "grad_norm": 0.8876995648282615, "learning_rate": 5.659160505544093e-07, "loss": 0.8976, "step": 42430 }, { "epoch": 2.5648153743881066, "grad_norm": 0.9589346831806169, "learning_rate": 5.643777448765525e-07, "loss": 0.8813, "step": 42440 }, { "epoch": 2.5654197135432404, "grad_norm": 0.6397163371256055, "learning_rate": 5.628414077354444e-07, "loss": 0.8673, "step": 42450 }, { "epoch": 2.566024052698374, "grad_norm": 0.6794446596086937, "learning_rate": 5.613070398129156e-07, "loss": 0.8842, "step": 42460 }, { "epoch": 2.5666283918535084, "grad_norm": 0.6133173771038953, "learning_rate": 5.597746417899214e-07, "loss": 0.8731, "step": 42470 }, { "epoch": 2.567232731008642, "grad_norm": 0.7074697632168433, "learning_rate": 5.582442143465428e-07, "loss": 0.9014, "step": 42480 }, { "epoch": 2.567837070163776, "grad_norm": 0.6494350346170464, "learning_rate": 5.567157581619848e-07, "loss": 0.8825, "step": 42490 }, { "epoch": 2.56844140931891, "grad_norm": 0.6523773100339134, "learning_rate": 5.551892739145804e-07, "loss": 0.8811, "step": 42500 }, { "epoch": 2.5690457484740437, "grad_norm": 0.8013430256427662, "learning_rate": 5.536647622817842e-07, "loss": 0.8806, "step": 42510 }, { "epoch": 2.5696500876291775, "grad_norm": 0.6772055551867058, "learning_rate": 5.521422239401792e-07, "loss": 0.8851, "step": 42520 }, { "epoch": 2.5702544267843113, "grad_norm": 0.7228541547978615, "learning_rate": 5.506216595654685e-07, "loss": 0.8705, "step": 42530 }, { "epoch": 2.570858765939445, "grad_norm": 0.7224840143607193, "learning_rate": 5.491030698324828e-07, "loss": 0.8966, "step": 42540 }, { "epoch": 2.571463105094579, "grad_norm": 0.8434937449033829, "learning_rate": 5.475864554151727e-07, "loss": 0.8889, "step": 42550 }, { "epoch": 2.572067444249713, "grad_norm": 0.894371293908137, "learning_rate": 5.460718169866163e-07, "loss": 0.8678, "step": 42560 }, { "epoch": 2.572671783404847, "grad_norm": 0.8435505511396212, "learning_rate": 5.445591552190127e-07, "loss": 0.8889, "step": 42570 }, { "epoch": 2.5732761225599807, "grad_norm": 0.8484808947948894, "learning_rate": 5.43048470783682e-07, "loss": 0.9071, "step": 42580 }, { "epoch": 2.5738804617151145, "grad_norm": 0.8577720676967568, "learning_rate": 5.4153976435107e-07, "loss": 0.8899, "step": 42590 }, { "epoch": 2.5744848008702483, "grad_norm": 0.6387411783552193, "learning_rate": 5.400330365907442e-07, "loss": 0.9024, "step": 42600 }, { "epoch": 2.575089140025382, "grad_norm": 0.6130786577685777, "learning_rate": 5.385282881713899e-07, "loss": 0.8784, "step": 42610 }, { "epoch": 2.575693479180516, "grad_norm": 0.5966086700331396, "learning_rate": 5.370255197608193e-07, "loss": 0.8861, "step": 42620 }, { "epoch": 2.57629781833565, "grad_norm": 0.6386617379497778, "learning_rate": 5.355247320259638e-07, "loss": 0.8766, "step": 42630 }, { "epoch": 2.5769021574907836, "grad_norm": 0.5941256155595729, "learning_rate": 5.340259256328734e-07, "loss": 0.8766, "step": 42640 }, { "epoch": 2.577506496645918, "grad_norm": 0.581963111734866, "learning_rate": 5.325291012467232e-07, "loss": 0.8734, "step": 42650 }, { "epoch": 2.5781108358010516, "grad_norm": 0.6033359411971223, "learning_rate": 5.310342595318063e-07, "loss": 0.8724, "step": 42660 }, { "epoch": 2.5787151749561854, "grad_norm": 0.5910341602160661, "learning_rate": 5.295414011515338e-07, "loss": 0.8856, "step": 42670 }, { "epoch": 2.579319514111319, "grad_norm": 0.5770435225913192, "learning_rate": 5.280505267684411e-07, "loss": 0.8805, "step": 42680 }, { "epoch": 2.579923853266453, "grad_norm": 0.5851012761348724, "learning_rate": 5.265616370441806e-07, "loss": 0.8776, "step": 42690 }, { "epoch": 2.5805281924215873, "grad_norm": 0.6417043600261595, "learning_rate": 5.250747326395223e-07, "loss": 0.9072, "step": 42700 }, { "epoch": 2.5811325315767206, "grad_norm": 0.5720364423893937, "learning_rate": 5.235898142143586e-07, "loss": 0.8591, "step": 42710 }, { "epoch": 2.581736870731855, "grad_norm": 0.589051717202406, "learning_rate": 5.221068824276987e-07, "loss": 0.899, "step": 42720 }, { "epoch": 2.5823412098869887, "grad_norm": 0.6071983701254237, "learning_rate": 5.206259379376699e-07, "loss": 0.8821, "step": 42730 }, { "epoch": 2.5829455490421225, "grad_norm": 0.6067004615332431, "learning_rate": 5.191469814015171e-07, "loss": 0.8705, "step": 42740 }, { "epoch": 2.5835498881972563, "grad_norm": 0.6099898173104981, "learning_rate": 5.176700134756057e-07, "loss": 0.8987, "step": 42750 }, { "epoch": 2.58415422735239, "grad_norm": 0.6123237351748546, "learning_rate": 5.161950348154149e-07, "loss": 0.8897, "step": 42760 }, { "epoch": 2.584758566507524, "grad_norm": 0.5549704567261496, "learning_rate": 5.147220460755442e-07, "loss": 0.8783, "step": 42770 }, { "epoch": 2.5853629056626577, "grad_norm": 0.6176553921963164, "learning_rate": 5.132510479097064e-07, "loss": 0.8954, "step": 42780 }, { "epoch": 2.585967244817792, "grad_norm": 0.6285660020211064, "learning_rate": 5.117820409707358e-07, "loss": 0.8943, "step": 42790 }, { "epoch": 2.5865715839729257, "grad_norm": 0.5749615393245653, "learning_rate": 5.103150259105771e-07, "loss": 0.8872, "step": 42800 }, { "epoch": 2.5871759231280596, "grad_norm": 0.6029564978556262, "learning_rate": 5.088500033802957e-07, "loss": 0.8717, "step": 42810 }, { "epoch": 2.5877802622831934, "grad_norm": 0.606613001232314, "learning_rate": 5.073869740300724e-07, "loss": 0.8564, "step": 42820 }, { "epoch": 2.588384601438327, "grad_norm": 0.6068726818586329, "learning_rate": 5.059259385091996e-07, "loss": 0.9041, "step": 42830 }, { "epoch": 2.588988940593461, "grad_norm": 0.5783701098280523, "learning_rate": 5.044668974660888e-07, "loss": 0.8963, "step": 42840 }, { "epoch": 2.5895932797485948, "grad_norm": 0.5965022551131236, "learning_rate": 5.030098515482651e-07, "loss": 0.8941, "step": 42850 }, { "epoch": 2.590197618903729, "grad_norm": 0.6503457402794336, "learning_rate": 5.01554801402367e-07, "loss": 0.8986, "step": 42860 }, { "epoch": 2.5908019580588624, "grad_norm": 0.6270907151806185, "learning_rate": 5.001017476741487e-07, "loss": 0.8827, "step": 42870 }, { "epoch": 2.5914062972139966, "grad_norm": 0.6263799501859979, "learning_rate": 4.986506910084787e-07, "loss": 0.8706, "step": 42880 }, { "epoch": 2.5920106363691304, "grad_norm": 0.61376824013978, "learning_rate": 4.972016320493372e-07, "loss": 0.8906, "step": 42890 }, { "epoch": 2.5926149755242642, "grad_norm": 0.6611348083256532, "learning_rate": 4.957545714398194e-07, "loss": 0.897, "step": 42900 }, { "epoch": 2.593219314679398, "grad_norm": 0.6661591757414965, "learning_rate": 4.943095098221346e-07, "loss": 0.8693, "step": 42910 }, { "epoch": 2.593823653834532, "grad_norm": 0.6596231070624817, "learning_rate": 4.92866447837601e-07, "loss": 0.8869, "step": 42920 }, { "epoch": 2.5944279929896656, "grad_norm": 0.6718971592695029, "learning_rate": 4.914253861266538e-07, "loss": 0.8877, "step": 42930 }, { "epoch": 2.5950323321447994, "grad_norm": 0.6669725691680815, "learning_rate": 4.899863253288395e-07, "loss": 0.8919, "step": 42940 }, { "epoch": 2.5956366712999337, "grad_norm": 0.7057916200133024, "learning_rate": 4.885492660828134e-07, "loss": 0.8965, "step": 42950 }, { "epoch": 2.5962410104550675, "grad_norm": 0.6905292609717943, "learning_rate": 4.871142090263458e-07, "loss": 0.872, "step": 42960 }, { "epoch": 2.5968453496102013, "grad_norm": 0.7457587929021203, "learning_rate": 4.856811547963192e-07, "loss": 0.8821, "step": 42970 }, { "epoch": 2.597449688765335, "grad_norm": 0.7605233775464636, "learning_rate": 4.842501040287229e-07, "loss": 0.9034, "step": 42980 }, { "epoch": 2.598054027920469, "grad_norm": 0.752038056447917, "learning_rate": 4.828210573586605e-07, "loss": 0.8651, "step": 42990 }, { "epoch": 2.5986583670756027, "grad_norm": 0.6880145727339527, "learning_rate": 4.813940154203467e-07, "loss": 0.8921, "step": 43000 }, { "epoch": 2.5992627062307365, "grad_norm": 0.710689834578039, "learning_rate": 4.799689788471034e-07, "loss": 0.8807, "step": 43010 }, { "epoch": 2.5998670453858708, "grad_norm": 0.6637555466759679, "learning_rate": 4.785459482713656e-07, "loss": 0.8862, "step": 43020 }, { "epoch": 2.6004713845410046, "grad_norm": 0.6753966033832595, "learning_rate": 4.771249243246756e-07, "loss": 0.869, "step": 43030 }, { "epoch": 2.6010757236961384, "grad_norm": 0.6701160009662632, "learning_rate": 4.757059076376874e-07, "loss": 0.9101, "step": 43040 }, { "epoch": 2.601680062851272, "grad_norm": 0.8042100074168831, "learning_rate": 4.7428889884016115e-07, "loss": 0.8652, "step": 43050 }, { "epoch": 2.602284402006406, "grad_norm": 0.8934394930441069, "learning_rate": 4.728738985609704e-07, "loss": 0.8922, "step": 43060 }, { "epoch": 2.60288874116154, "grad_norm": 0.8480391096702197, "learning_rate": 4.714609074280924e-07, "loss": 0.8703, "step": 43070 }, { "epoch": 2.6034930803166736, "grad_norm": 0.8358965834916362, "learning_rate": 4.7004992606861586e-07, "loss": 0.8897, "step": 43080 }, { "epoch": 2.604097419471808, "grad_norm": 0.7900502515232571, "learning_rate": 4.686409551087373e-07, "loss": 0.8607, "step": 43090 }, { "epoch": 2.604701758626941, "grad_norm": 0.7887683312661142, "learning_rate": 4.672339951737598e-07, "loss": 0.8673, "step": 43100 }, { "epoch": 2.6053060977820754, "grad_norm": 0.8198006789645077, "learning_rate": 4.6582904688809437e-07, "loss": 0.8758, "step": 43110 }, { "epoch": 2.6059104369372093, "grad_norm": 0.858222868784393, "learning_rate": 4.644261108752596e-07, "loss": 0.9041, "step": 43120 }, { "epoch": 2.606514776092343, "grad_norm": 0.7841385620649725, "learning_rate": 4.630251877578823e-07, "loss": 0.8695, "step": 43130 }, { "epoch": 2.607119115247477, "grad_norm": 0.815516764291645, "learning_rate": 4.616262781576925e-07, "loss": 0.9113, "step": 43140 }, { "epoch": 2.6077234544026107, "grad_norm": 1.214019958266389, "learning_rate": 4.6022938269552953e-07, "loss": 0.8726, "step": 43150 }, { "epoch": 2.6083277935577445, "grad_norm": 1.2316790980663552, "learning_rate": 4.5883450199133873e-07, "loss": 0.8578, "step": 43160 }, { "epoch": 2.6089321327128783, "grad_norm": 1.187961228354091, "learning_rate": 4.5744163666416876e-07, "loss": 0.8599, "step": 43170 }, { "epoch": 2.6095364718680125, "grad_norm": 1.2259177245333102, "learning_rate": 4.560507873321773e-07, "loss": 0.9077, "step": 43180 }, { "epoch": 2.6101408110231463, "grad_norm": 1.2843016836868726, "learning_rate": 4.5466195461262575e-07, "loss": 0.8863, "step": 43190 }, { "epoch": 2.61074515017828, "grad_norm": 1.1662908017562952, "learning_rate": 4.532751391218787e-07, "loss": 0.8797, "step": 43200 }, { "epoch": 2.611349489333414, "grad_norm": 1.2220459602247087, "learning_rate": 4.5189034147540833e-07, "loss": 0.8874, "step": 43210 }, { "epoch": 2.6119538284885477, "grad_norm": 0.8546693001503075, "learning_rate": 4.505075622877908e-07, "loss": 0.906, "step": 43220 }, { "epoch": 2.6125581676436815, "grad_norm": 0.9619785719140915, "learning_rate": 4.4912680217270466e-07, "loss": 0.8729, "step": 43230 }, { "epoch": 2.6131625067988153, "grad_norm": 0.9340927571487166, "learning_rate": 4.4774806174293416e-07, "loss": 0.8821, "step": 43240 }, { "epoch": 2.6137668459539496, "grad_norm": 2.914872472865227, "learning_rate": 4.463713416103682e-07, "loss": 0.8898, "step": 43250 }, { "epoch": 2.614371185109083, "grad_norm": 2.5256069278283553, "learning_rate": 4.4499664238599415e-07, "loss": 0.8769, "step": 43260 }, { "epoch": 2.614975524264217, "grad_norm": 2.5193237010766163, "learning_rate": 4.436239646799084e-07, "loss": 0.8784, "step": 43270 }, { "epoch": 2.615579863419351, "grad_norm": 2.621712967379699, "learning_rate": 4.422533091013076e-07, "loss": 0.8881, "step": 43280 }, { "epoch": 2.616184202574485, "grad_norm": 2.5873680815964684, "learning_rate": 4.408846762584901e-07, "loss": 0.8741, "step": 43290 }, { "epoch": 2.6167885417296186, "grad_norm": 0.9555779685644418, "learning_rate": 4.3951806675885833e-07, "loss": 0.8755, "step": 43300 }, { "epoch": 2.6173928808847524, "grad_norm": 0.9278433601172189, "learning_rate": 4.38153481208915e-07, "loss": 0.9185, "step": 43310 }, { "epoch": 2.617997220039886, "grad_norm": 0.9825172755445509, "learning_rate": 4.367909202142662e-07, "loss": 0.8912, "step": 43320 }, { "epoch": 2.61860155919502, "grad_norm": 0.9651057214865958, "learning_rate": 4.3543038437961814e-07, "loss": 0.8777, "step": 43330 }, { "epoch": 2.6192058983501543, "grad_norm": 0.9440098579120147, "learning_rate": 4.340718743087802e-07, "loss": 0.8783, "step": 43340 }, { "epoch": 2.619810237505288, "grad_norm": 1.0782410743858655, "learning_rate": 4.3271539060465995e-07, "loss": 0.8582, "step": 43350 }, { "epoch": 2.620414576660422, "grad_norm": 1.0768914130535585, "learning_rate": 4.3136093386926816e-07, "loss": 0.8689, "step": 43360 }, { "epoch": 2.6210189158155557, "grad_norm": 1.0856387250961594, "learning_rate": 4.300085047037156e-07, "loss": 0.8794, "step": 43370 }, { "epoch": 2.6216232549706895, "grad_norm": 1.088311777727129, "learning_rate": 4.2865810370821047e-07, "loss": 0.8775, "step": 43380 }, { "epoch": 2.6222275941258233, "grad_norm": 0.9637473309610657, "learning_rate": 4.273097314820651e-07, "loss": 0.8921, "step": 43390 }, { "epoch": 2.622831933280957, "grad_norm": 1.099713445025246, "learning_rate": 4.2596338862368914e-07, "loss": 0.8715, "step": 43400 }, { "epoch": 2.6234362724360913, "grad_norm": 1.115343801867195, "learning_rate": 4.2461907573059067e-07, "loss": 0.8768, "step": 43410 }, { "epoch": 2.624040611591225, "grad_norm": 1.0546264339069136, "learning_rate": 4.2327679339937924e-07, "loss": 0.8948, "step": 43420 }, { "epoch": 2.624644950746359, "grad_norm": 1.1413067443560065, "learning_rate": 4.2193654222576206e-07, "loss": 0.8917, "step": 43430 }, { "epoch": 2.6252492899014928, "grad_norm": 1.035788299578993, "learning_rate": 4.20598322804544e-07, "loss": 0.9011, "step": 43440 }, { "epoch": 2.6258536290566266, "grad_norm": 1.0783188574807157, "learning_rate": 4.192621357296295e-07, "loss": 0.8849, "step": 43450 }, { "epoch": 2.6264579682117604, "grad_norm": 1.0561851845741754, "learning_rate": 4.179279815940207e-07, "loss": 0.8742, "step": 43460 }, { "epoch": 2.627062307366894, "grad_norm": 1.1756603887056123, "learning_rate": 4.1659586098981854e-07, "loss": 0.9047, "step": 43470 }, { "epoch": 2.6276666465220284, "grad_norm": 1.162210507205473, "learning_rate": 4.1526577450821835e-07, "loss": 0.8824, "step": 43480 }, { "epoch": 2.6282709856771618, "grad_norm": 1.126249856683156, "learning_rate": 4.1393772273951583e-07, "loss": 0.8689, "step": 43490 }, { "epoch": 2.628875324832296, "grad_norm": 1.089311651312746, "learning_rate": 4.126117062731039e-07, "loss": 0.8807, "step": 43500 }, { "epoch": 2.62947966398743, "grad_norm": 1.1112939970484306, "learning_rate": 4.1128772569746845e-07, "loss": 0.8915, "step": 43510 }, { "epoch": 2.6300840031425636, "grad_norm": 1.180490640411348, "learning_rate": 4.0996578160019617e-07, "loss": 0.8845, "step": 43520 }, { "epoch": 2.6306883422976974, "grad_norm": 1.197374232502728, "learning_rate": 4.086458745679678e-07, "loss": 0.8817, "step": 43530 }, { "epoch": 2.6312926814528312, "grad_norm": 1.1542862155577798, "learning_rate": 4.073280051865597e-07, "loss": 0.8955, "step": 43540 }, { "epoch": 2.631897020607965, "grad_norm": 1.0446893088197602, "learning_rate": 4.060121740408457e-07, "loss": 0.8953, "step": 43550 }, { "epoch": 2.632501359763099, "grad_norm": 1.0192611349747334, "learning_rate": 4.04698381714792e-07, "loss": 0.8775, "step": 43560 }, { "epoch": 2.633105698918233, "grad_norm": 1.0228223880126996, "learning_rate": 4.0338662879146453e-07, "loss": 0.9087, "step": 43570 }, { "epoch": 2.633710038073367, "grad_norm": 1.0225605695194147, "learning_rate": 4.0207691585301855e-07, "loss": 0.8836, "step": 43580 }, { "epoch": 2.6343143772285007, "grad_norm": 1.0866276505740302, "learning_rate": 4.007692434807098e-07, "loss": 0.888, "step": 43590 }, { "epoch": 2.6349187163836345, "grad_norm": 0.9970753469976296, "learning_rate": 3.9946361225488283e-07, "loss": 0.8826, "step": 43600 }, { "epoch": 2.6355230555387683, "grad_norm": 1.0409089782300487, "learning_rate": 3.9816002275498036e-07, "loss": 0.883, "step": 43610 }, { "epoch": 2.636127394693902, "grad_norm": 1.0217474566699665, "learning_rate": 3.9685847555953784e-07, "loss": 0.8677, "step": 43620 }, { "epoch": 2.636731733849036, "grad_norm": 1.0013617352147786, "learning_rate": 3.9555897124618383e-07, "loss": 0.8838, "step": 43630 }, { "epoch": 2.63733607300417, "grad_norm": 1.0664941232312735, "learning_rate": 3.942615103916403e-07, "loss": 0.87, "step": 43640 }, { "epoch": 2.6379404121593035, "grad_norm": 1.0355345230974287, "learning_rate": 3.9296609357172333e-07, "loss": 0.8669, "step": 43650 }, { "epoch": 2.6385447513144378, "grad_norm": 1.0596592072884867, "learning_rate": 3.9167272136134027e-07, "loss": 0.8864, "step": 43660 }, { "epoch": 2.6391490904695716, "grad_norm": 1.0823909732558148, "learning_rate": 3.9038139433449194e-07, "loss": 0.884, "step": 43670 }, { "epoch": 2.6397534296247054, "grad_norm": 1.013513754511572, "learning_rate": 3.8909211306427274e-07, "loss": 0.8765, "step": 43680 }, { "epoch": 2.640357768779839, "grad_norm": 1.0390876464100904, "learning_rate": 3.878048781228666e-07, "loss": 0.9131, "step": 43690 }, { "epoch": 2.640962107934973, "grad_norm": 1.144995132013146, "learning_rate": 3.86519690081551e-07, "loss": 0.8762, "step": 43700 }, { "epoch": 2.6415664470901072, "grad_norm": 1.2570537438358822, "learning_rate": 3.852365495106952e-07, "loss": 0.8672, "step": 43710 }, { "epoch": 2.6421707862452406, "grad_norm": 1.322219189103477, "learning_rate": 3.8395545697975765e-07, "loss": 0.9006, "step": 43720 }, { "epoch": 2.642775125400375, "grad_norm": 1.1290146874942655, "learning_rate": 3.8267641305729076e-07, "loss": 0.8995, "step": 43730 }, { "epoch": 2.6433794645555087, "grad_norm": 1.2326782837518608, "learning_rate": 3.813994183109365e-07, "loss": 0.8499, "step": 43740 }, { "epoch": 2.6439838037106425, "grad_norm": 1.1724561433574527, "learning_rate": 3.8012447330742607e-07, "loss": 0.8974, "step": 43750 }, { "epoch": 2.6445881428657763, "grad_norm": 1.0728561939648071, "learning_rate": 3.7885157861258295e-07, "loss": 0.8862, "step": 43760 }, { "epoch": 2.64519248202091, "grad_norm": 1.1908841490343347, "learning_rate": 3.775807347913207e-07, "loss": 0.8719, "step": 43770 }, { "epoch": 2.645796821176044, "grad_norm": 1.184293877908691, "learning_rate": 3.7631194240764113e-07, "loss": 0.8736, "step": 43780 }, { "epoch": 2.6464011603311777, "grad_norm": 1.0766110307497285, "learning_rate": 3.750452020246359e-07, "loss": 0.9241, "step": 43790 }, { "epoch": 2.647005499486312, "grad_norm": 1.2148393304143459, "learning_rate": 3.737805142044887e-07, "loss": 0.8712, "step": 43800 }, { "epoch": 2.6476098386414457, "grad_norm": 1.220223015092784, "learning_rate": 3.725178795084677e-07, "loss": 0.8798, "step": 43810 }, { "epoch": 2.6482141777965795, "grad_norm": 1.2335915789870364, "learning_rate": 3.7125729849693427e-07, "loss": 0.8833, "step": 43820 }, { "epoch": 2.6488185169517133, "grad_norm": 1.168483822697791, "learning_rate": 3.6999877172933517e-07, "loss": 0.8762, "step": 43830 }, { "epoch": 2.649422856106847, "grad_norm": 1.2127118443731855, "learning_rate": 3.687422997642071e-07, "loss": 0.8826, "step": 43840 }, { "epoch": 2.650027195261981, "grad_norm": 1.4665009668811448, "learning_rate": 3.674878831591744e-07, "loss": 0.8889, "step": 43850 }, { "epoch": 2.6506315344171147, "grad_norm": 1.4576280159055806, "learning_rate": 3.662355224709491e-07, "loss": 0.8869, "step": 43860 }, { "epoch": 2.651235873572249, "grad_norm": 1.5118235914781029, "learning_rate": 3.6498521825533205e-07, "loss": 0.8879, "step": 43870 }, { "epoch": 2.6518402127273824, "grad_norm": 1.512515345737943, "learning_rate": 3.6373697106720896e-07, "loss": 0.8716, "step": 43880 }, { "epoch": 2.6524445518825166, "grad_norm": 1.4423984504415237, "learning_rate": 3.6249078146055493e-07, "loss": 0.9107, "step": 43890 }, { "epoch": 2.6530488910376504, "grad_norm": 0.9584133630320645, "learning_rate": 3.612466499884315e-07, "loss": 0.9106, "step": 43900 }, { "epoch": 2.653653230192784, "grad_norm": 0.992578958991291, "learning_rate": 3.6000457720298533e-07, "loss": 0.8947, "step": 43910 }, { "epoch": 2.654257569347918, "grad_norm": 0.9178430826637624, "learning_rate": 3.5876456365545045e-07, "loss": 0.8865, "step": 43920 }, { "epoch": 2.654861908503052, "grad_norm": 0.9150553484729788, "learning_rate": 3.575266098961483e-07, "loss": 0.864, "step": 43930 }, { "epoch": 2.6554662476581856, "grad_norm": 0.9124823216881625, "learning_rate": 3.562907164744833e-07, "loss": 0.8972, "step": 43940 }, { "epoch": 2.6560705868133194, "grad_norm": 0.8724897906392935, "learning_rate": 3.550568839389473e-07, "loss": 0.8749, "step": 43950 }, { "epoch": 2.6566749259684537, "grad_norm": 0.852852251174776, "learning_rate": 3.5382511283711895e-07, "loss": 0.8836, "step": 43960 }, { "epoch": 2.6572792651235875, "grad_norm": 0.7995311589105277, "learning_rate": 3.5259540371565816e-07, "loss": 0.8756, "step": 43970 }, { "epoch": 2.6578836042787213, "grad_norm": 0.8614177345583277, "learning_rate": 3.5136775712031337e-07, "loss": 0.8895, "step": 43980 }, { "epoch": 2.658487943433855, "grad_norm": 0.8870556743029577, "learning_rate": 3.50142173595916e-07, "loss": 0.8741, "step": 43990 }, { "epoch": 2.659092282588989, "grad_norm": 0.8167234999583096, "learning_rate": 3.4891865368638146e-07, "loss": 0.8791, "step": 44000 }, { "epoch": 2.6596966217441227, "grad_norm": 0.9378867466529495, "learning_rate": 3.4769719793471046e-07, "loss": 0.8743, "step": 44010 }, { "epoch": 2.6603009608992565, "grad_norm": 0.9029683678887714, "learning_rate": 3.464778068829883e-07, "loss": 0.8942, "step": 44020 }, { "epoch": 2.6609053000543907, "grad_norm": 0.8960525968167036, "learning_rate": 3.452604810723803e-07, "loss": 0.8717, "step": 44030 }, { "epoch": 2.6615096392095245, "grad_norm": 0.8767426540642903, "learning_rate": 3.4404522104313943e-07, "loss": 0.9051, "step": 44040 }, { "epoch": 2.6621139783646584, "grad_norm": 0.6662176844034844, "learning_rate": 3.428320273346003e-07, "loss": 0.8832, "step": 44050 }, { "epoch": 2.662718317519792, "grad_norm": 0.5742983863112889, "learning_rate": 3.4162090048517946e-07, "loss": 0.8631, "step": 44060 }, { "epoch": 2.663322656674926, "grad_norm": 0.6743823834285331, "learning_rate": 3.404118410323781e-07, "loss": 0.9191, "step": 44070 }, { "epoch": 2.6639269958300598, "grad_norm": 0.6424213514599196, "learning_rate": 3.3920484951277753e-07, "loss": 0.8545, "step": 44080 }, { "epoch": 2.6645313349851936, "grad_norm": 0.6378953679641663, "learning_rate": 3.379999264620443e-07, "loss": 0.8853, "step": 44090 }, { "epoch": 2.665135674140328, "grad_norm": 0.7228214564536835, "learning_rate": 3.3679707241492334e-07, "loss": 0.8864, "step": 44100 }, { "epoch": 2.665740013295461, "grad_norm": 0.6955572224582335, "learning_rate": 3.3559628790524544e-07, "loss": 0.8942, "step": 44110 }, { "epoch": 2.6663443524505954, "grad_norm": 0.7149970854094722, "learning_rate": 3.343975734659194e-07, "loss": 0.885, "step": 44120 }, { "epoch": 2.6669486916057292, "grad_norm": 0.6964161244538115, "learning_rate": 3.3320092962893623e-07, "loss": 0.9111, "step": 44130 }, { "epoch": 2.667553030760863, "grad_norm": 0.66458410337221, "learning_rate": 3.320063569253701e-07, "loss": 0.8625, "step": 44140 }, { "epoch": 2.668157369915997, "grad_norm": 0.9769026065755836, "learning_rate": 3.308138558853746e-07, "loss": 0.8595, "step": 44150 }, { "epoch": 2.6687617090711306, "grad_norm": 0.8453732839139658, "learning_rate": 3.296234270381821e-07, "loss": 0.8915, "step": 44160 }, { "epoch": 2.6693660482262644, "grad_norm": 0.8869206614138567, "learning_rate": 3.284350709121076e-07, "loss": 0.8803, "step": 44170 }, { "epoch": 2.6699703873813982, "grad_norm": 0.8997987974377994, "learning_rate": 3.272487880345465e-07, "loss": 0.8486, "step": 44180 }, { "epoch": 2.6705747265365325, "grad_norm": 0.8727690731104922, "learning_rate": 3.2606457893197164e-07, "loss": 0.8935, "step": 44190 }, { "epoch": 2.6711790656916663, "grad_norm": 0.6311554818261784, "learning_rate": 3.248824441299375e-07, "loss": 0.8877, "step": 44200 }, { "epoch": 2.6717834048468, "grad_norm": 0.6123527286505754, "learning_rate": 3.2370238415307896e-07, "loss": 0.8723, "step": 44210 }, { "epoch": 2.672387744001934, "grad_norm": 0.5859084938790945, "learning_rate": 3.2252439952510704e-07, "loss": 0.9013, "step": 44220 }, { "epoch": 2.6729920831570677, "grad_norm": 0.6217074710737848, "learning_rate": 3.2134849076881303e-07, "loss": 0.8574, "step": 44230 }, { "epoch": 2.6735964223122015, "grad_norm": 0.6026215002550583, "learning_rate": 3.2017465840606944e-07, "loss": 0.8838, "step": 44240 }, { "epoch": 2.6742007614673353, "grad_norm": 0.5772391787459733, "learning_rate": 3.190029029578229e-07, "loss": 0.8674, "step": 44250 }, { "epoch": 2.6748051006224696, "grad_norm": 0.5874932384324928, "learning_rate": 3.178332249441013e-07, "loss": 0.8982, "step": 44260 }, { "epoch": 2.675409439777603, "grad_norm": 0.5791916985148398, "learning_rate": 3.1666562488401054e-07, "loss": 0.8939, "step": 44270 }, { "epoch": 2.676013778932737, "grad_norm": 0.5647132992319281, "learning_rate": 3.1550010329573164e-07, "loss": 0.8773, "step": 44280 }, { "epoch": 2.676618118087871, "grad_norm": 0.6378354821034709, "learning_rate": 3.143366606965259e-07, "loss": 0.8915, "step": 44290 }, { "epoch": 2.677222457243005, "grad_norm": 0.603575427971721, "learning_rate": 3.13175297602733e-07, "loss": 0.899, "step": 44300 }, { "epoch": 2.6778267963981386, "grad_norm": 0.5995792293066782, "learning_rate": 3.120160145297646e-07, "loss": 0.8828, "step": 44310 }, { "epoch": 2.6784311355532724, "grad_norm": 0.5512564719336104, "learning_rate": 3.1085881199211576e-07, "loss": 0.8638, "step": 44320 }, { "epoch": 2.679035474708406, "grad_norm": 0.5958366573850185, "learning_rate": 3.097036905033518e-07, "loss": 0.853, "step": 44330 }, { "epoch": 2.67963981386354, "grad_norm": 0.59003121469836, "learning_rate": 3.0855065057612044e-07, "loss": 0.9033, "step": 44340 }, { "epoch": 2.6802441530186742, "grad_norm": 0.5710681931885819, "learning_rate": 3.0739969272214065e-07, "loss": 0.8762, "step": 44350 }, { "epoch": 2.680848492173808, "grad_norm": 0.6160427067410187, "learning_rate": 3.0625081745221107e-07, "loss": 0.889, "step": 44360 }, { "epoch": 2.681452831328942, "grad_norm": 0.6452060402213153, "learning_rate": 3.0510402527620277e-07, "loss": 0.9081, "step": 44370 }, { "epoch": 2.6820571704840757, "grad_norm": 0.5753217129010045, "learning_rate": 3.0395931670306523e-07, "loss": 0.8761, "step": 44380 }, { "epoch": 2.6826615096392095, "grad_norm": 0.6068957855421832, "learning_rate": 3.0281669224082266e-07, "loss": 0.8763, "step": 44390 }, { "epoch": 2.6832658487943433, "grad_norm": 0.568098102051787, "learning_rate": 3.016761523965728e-07, "loss": 0.871, "step": 44400 }, { "epoch": 2.683870187949477, "grad_norm": 0.5885370471373237, "learning_rate": 3.00537697676489e-07, "loss": 0.8936, "step": 44410 }, { "epoch": 2.6844745271046113, "grad_norm": 0.6300200683297026, "learning_rate": 2.994013285858205e-07, "loss": 0.9193, "step": 44420 }, { "epoch": 2.685078866259745, "grad_norm": 0.6383619882100581, "learning_rate": 2.982670456288883e-07, "loss": 0.9028, "step": 44430 }, { "epoch": 2.685683205414879, "grad_norm": 0.5682472920606244, "learning_rate": 2.971348493090903e-07, "loss": 0.8699, "step": 44440 }, { "epoch": 2.6862875445700127, "grad_norm": 0.5809286243912853, "learning_rate": 2.960047401288979e-07, "loss": 0.8814, "step": 44450 }, { "epoch": 2.6868918837251465, "grad_norm": 0.5964210974375643, "learning_rate": 2.948767185898532e-07, "loss": 0.9095, "step": 44460 }, { "epoch": 2.6874962228802803, "grad_norm": 0.6269819978363419, "learning_rate": 2.9375078519257525e-07, "loss": 0.8782, "step": 44470 }, { "epoch": 2.688100562035414, "grad_norm": 0.6112862451973233, "learning_rate": 2.9262694043675545e-07, "loss": 0.8683, "step": 44480 }, { "epoch": 2.6887049011905484, "grad_norm": 0.6450045891635381, "learning_rate": 2.915051848211592e-07, "loss": 0.8693, "step": 44490 }, { "epoch": 2.6893092403456818, "grad_norm": 0.6429482315688143, "learning_rate": 2.9038551884362056e-07, "loss": 0.8825, "step": 44500 }, { "epoch": 2.689913579500816, "grad_norm": 0.677920814906551, "learning_rate": 2.8926794300105143e-07, "loss": 0.8721, "step": 44510 }, { "epoch": 2.69051791865595, "grad_norm": 0.6701491222126793, "learning_rate": 2.8815245778943403e-07, "loss": 0.8963, "step": 44520 }, { "epoch": 2.6911222578110836, "grad_norm": 0.698093394607616, "learning_rate": 2.8703906370382117e-07, "loss": 0.8962, "step": 44530 }, { "epoch": 2.6917265969662174, "grad_norm": 0.6838647105102715, "learning_rate": 2.859277612383399e-07, "loss": 0.8791, "step": 44540 }, { "epoch": 2.692330936121351, "grad_norm": 0.7423617329646944, "learning_rate": 2.848185508861889e-07, "loss": 0.8867, "step": 44550 }, { "epoch": 2.692935275276485, "grad_norm": 0.7438536977114678, "learning_rate": 2.8371143313963566e-07, "loss": 0.9049, "step": 44560 }, { "epoch": 2.693539614431619, "grad_norm": 0.773209881306369, "learning_rate": 2.826064084900232e-07, "loss": 0.8889, "step": 44570 }, { "epoch": 2.694143953586753, "grad_norm": 0.7129533364584698, "learning_rate": 2.815034774277614e-07, "loss": 0.8677, "step": 44580 }, { "epoch": 2.694748292741887, "grad_norm": 0.7511512987060771, "learning_rate": 2.8040264044233436e-07, "loss": 0.9059, "step": 44590 }, { "epoch": 2.6953526318970207, "grad_norm": 0.6833981161092568, "learning_rate": 2.79303898022294e-07, "loss": 0.8738, "step": 44600 }, { "epoch": 2.6959569710521545, "grad_norm": 0.7292251167777419, "learning_rate": 2.782072506552658e-07, "loss": 0.8777, "step": 44610 }, { "epoch": 2.6965613102072883, "grad_norm": 0.7130669561404509, "learning_rate": 2.771126988279421e-07, "loss": 0.899, "step": 44620 }, { "epoch": 2.697165649362422, "grad_norm": 0.6802975319099562, "learning_rate": 2.760202430260872e-07, "loss": 0.9004, "step": 44630 }, { "epoch": 2.697769988517556, "grad_norm": 0.6302671018604316, "learning_rate": 2.7492988373453653e-07, "loss": 0.8678, "step": 44640 }, { "epoch": 2.69837432767269, "grad_norm": 0.8747536511635973, "learning_rate": 2.7384162143719137e-07, "loss": 0.8874, "step": 44650 }, { "epoch": 2.6989786668278235, "grad_norm": 0.8461739229698654, "learning_rate": 2.7275545661702485e-07, "loss": 0.88, "step": 44660 }, { "epoch": 2.6995830059829578, "grad_norm": 0.8363056138119715, "learning_rate": 2.716713897560802e-07, "loss": 0.8871, "step": 44670 }, { "epoch": 2.7001873451380916, "grad_norm": 0.7926902738712706, "learning_rate": 2.705894213354665e-07, "loss": 0.8753, "step": 44680 }, { "epoch": 2.7007916842932254, "grad_norm": 0.8376912435964293, "learning_rate": 2.69509551835363e-07, "loss": 0.9015, "step": 44690 }, { "epoch": 2.701396023448359, "grad_norm": 0.8350907894895543, "learning_rate": 2.684317817350196e-07, "loss": 0.896, "step": 44700 }, { "epoch": 2.702000362603493, "grad_norm": 0.8464860003006531, "learning_rate": 2.6735611151275045e-07, "loss": 0.8931, "step": 44710 }, { "epoch": 2.702604701758627, "grad_norm": 0.9090875024563327, "learning_rate": 2.662825416459408e-07, "loss": 0.9092, "step": 44720 }, { "epoch": 2.7032090409137606, "grad_norm": 0.8212991164160085, "learning_rate": 2.652110726110435e-07, "loss": 0.8921, "step": 44730 }, { "epoch": 2.703813380068895, "grad_norm": 0.8379777426125715, "learning_rate": 2.641417048835765e-07, "loss": 0.8857, "step": 44740 }, { "epoch": 2.7044177192240286, "grad_norm": 1.148210674383701, "learning_rate": 2.6307443893812847e-07, "loss": 0.8621, "step": 44750 }, { "epoch": 2.7050220583791624, "grad_norm": 1.2261794452709474, "learning_rate": 2.620092752483533e-07, "loss": 0.8967, "step": 44760 }, { "epoch": 2.7056263975342962, "grad_norm": 1.240665228130346, "learning_rate": 2.6094621428697285e-07, "loss": 0.8668, "step": 44770 }, { "epoch": 2.70623073668943, "grad_norm": 1.208723166751661, "learning_rate": 2.5988525652577424e-07, "loss": 0.882, "step": 44780 }, { "epoch": 2.706835075844564, "grad_norm": 1.2517706033107603, "learning_rate": 2.588264024356146e-07, "loss": 0.8762, "step": 44790 }, { "epoch": 2.7074394149996976, "grad_norm": 0.9902902650343767, "learning_rate": 2.577696524864132e-07, "loss": 0.9, "step": 44800 }, { "epoch": 2.708043754154832, "grad_norm": 0.9776933450606815, "learning_rate": 2.5671500714715745e-07, "loss": 0.8984, "step": 44810 }, { "epoch": 2.7086480933099657, "grad_norm": 0.9092755011009269, "learning_rate": 2.55662466885902e-07, "loss": 0.8988, "step": 44820 }, { "epoch": 2.7092524324650995, "grad_norm": 1.0110562276040946, "learning_rate": 2.5461203216976593e-07, "loss": 0.9084, "step": 44830 }, { "epoch": 2.7098567716202333, "grad_norm": 1.0129500837342975, "learning_rate": 2.535637034649324e-07, "loss": 0.8799, "step": 44840 }, { "epoch": 2.710461110775367, "grad_norm": 2.7061545308058244, "learning_rate": 2.5251748123665376e-07, "loss": 0.8893, "step": 44850 }, { "epoch": 2.711065449930501, "grad_norm": 2.143043160935743, "learning_rate": 2.51473365949243e-07, "loss": 0.8886, "step": 44860 }, { "epoch": 2.7116697890856347, "grad_norm": 2.4614840484862843, "learning_rate": 2.5043135806608223e-07, "loss": 0.8875, "step": 44870 }, { "epoch": 2.712274128240769, "grad_norm": 2.6432759170363687, "learning_rate": 2.493914580496143e-07, "loss": 0.8858, "step": 44880 }, { "epoch": 2.7128784673959023, "grad_norm": 2.4554370560241487, "learning_rate": 2.483536663613506e-07, "loss": 0.8749, "step": 44890 }, { "epoch": 2.7134828065510366, "grad_norm": 0.9459670926428778, "learning_rate": 2.473179834618639e-07, "loss": 0.88, "step": 44900 }, { "epoch": 2.7140871457061704, "grad_norm": 0.9580246587333454, "learning_rate": 2.462844098107914e-07, "loss": 0.8697, "step": 44910 }, { "epoch": 2.714691484861304, "grad_norm": 0.9955754639792951, "learning_rate": 2.4525294586683625e-07, "loss": 0.8816, "step": 44920 }, { "epoch": 2.715295824016438, "grad_norm": 0.919740970255983, "learning_rate": 2.442235920877628e-07, "loss": 0.8999, "step": 44930 }, { "epoch": 2.715900163171572, "grad_norm": 1.0107015176502936, "learning_rate": 2.431963489304001e-07, "loss": 0.8485, "step": 44940 }, { "epoch": 2.7165045023267056, "grad_norm": 1.0184455406520803, "learning_rate": 2.421712168506418e-07, "loss": 0.8958, "step": 44950 }, { "epoch": 2.7171088414818394, "grad_norm": 0.9825152806957556, "learning_rate": 2.411481963034412e-07, "loss": 0.892, "step": 44960 }, { "epoch": 2.7177131806369736, "grad_norm": 0.9392867403119376, "learning_rate": 2.401272877428179e-07, "loss": 0.8554, "step": 44970 }, { "epoch": 2.7183175197921075, "grad_norm": 1.088299104437561, "learning_rate": 2.391084916218533e-07, "loss": 0.8907, "step": 44980 }, { "epoch": 2.7189218589472413, "grad_norm": 0.9794911351986175, "learning_rate": 2.380918083926892e-07, "loss": 0.8677, "step": 44990 }, { "epoch": 2.719526198102375, "grad_norm": 1.1727643450334087, "learning_rate": 2.370772385065323e-07, "loss": 0.8818, "step": 45000 }, { "epoch": 2.720130537257509, "grad_norm": 1.0177159573572903, "learning_rate": 2.360647824136514e-07, "loss": 0.8842, "step": 45010 }, { "epoch": 2.7207348764126427, "grad_norm": 0.9805753057868669, "learning_rate": 2.3505444056337478e-07, "loss": 0.8814, "step": 45020 }, { "epoch": 2.7213392155677765, "grad_norm": 1.5267478949796094, "learning_rate": 2.3404621340409383e-07, "loss": 0.8929, "step": 45030 }, { "epoch": 2.7219435547229107, "grad_norm": 1.0990778320044334, "learning_rate": 2.3304010138326282e-07, "loss": 0.8991, "step": 45040 }, { "epoch": 2.7225478938780445, "grad_norm": 1.0014123143834517, "learning_rate": 2.3203610494739514e-07, "loss": 0.9033, "step": 45050 }, { "epoch": 2.7231522330331783, "grad_norm": 1.11621162461148, "learning_rate": 2.3103422454206548e-07, "loss": 0.8799, "step": 45060 }, { "epoch": 2.723756572188312, "grad_norm": 1.149514812100179, "learning_rate": 2.3003446061191204e-07, "loss": 0.8965, "step": 45070 }, { "epoch": 2.724360911343446, "grad_norm": 1.0592214190440168, "learning_rate": 2.2903681360062934e-07, "loss": 0.8391, "step": 45080 }, { "epoch": 2.7249652504985797, "grad_norm": 1.045205437885987, "learning_rate": 2.2804128395097659e-07, "loss": 0.8756, "step": 45090 }, { "epoch": 2.7255695896537135, "grad_norm": 1.1695799392856787, "learning_rate": 2.270478721047714e-07, "loss": 0.9016, "step": 45100 }, { "epoch": 2.726173928808848, "grad_norm": 1.2092449162116197, "learning_rate": 2.2605657850289064e-07, "loss": 0.9097, "step": 45110 }, { "epoch": 2.726778267963981, "grad_norm": 1.1930576511306474, "learning_rate": 2.250674035852729e-07, "loss": 0.8626, "step": 45120 }, { "epoch": 2.7273826071191154, "grad_norm": 1.106632166487915, "learning_rate": 2.2408034779091537e-07, "loss": 0.9029, "step": 45130 }, { "epoch": 2.727986946274249, "grad_norm": 1.1287378717292094, "learning_rate": 2.2309541155787595e-07, "loss": 0.8802, "step": 45140 }, { "epoch": 2.728591285429383, "grad_norm": 1.0761266096183384, "learning_rate": 2.2211259532326945e-07, "loss": 0.9043, "step": 45150 }, { "epoch": 2.729195624584517, "grad_norm": 0.9740322270351403, "learning_rate": 2.2113189952327197e-07, "loss": 0.884, "step": 45160 }, { "epoch": 2.7297999637396506, "grad_norm": 1.114333437812144, "learning_rate": 2.2015332459311921e-07, "loss": 0.872, "step": 45170 }, { "epoch": 2.7304043028947844, "grad_norm": 1.0057318949706762, "learning_rate": 2.191768709671027e-07, "loss": 0.8771, "step": 45180 }, { "epoch": 2.7310086420499182, "grad_norm": 0.9943468202093073, "learning_rate": 2.1820253907857526e-07, "loss": 0.8777, "step": 45190 }, { "epoch": 2.7316129812050525, "grad_norm": 1.037643341148123, "learning_rate": 2.172303293599476e-07, "loss": 0.8726, "step": 45200 }, { "epoch": 2.7322173203601863, "grad_norm": 1.058745903853557, "learning_rate": 2.1626024224268628e-07, "loss": 0.8731, "step": 45210 }, { "epoch": 2.73282165951532, "grad_norm": 1.110355961210582, "learning_rate": 2.1529227815731858e-07, "loss": 0.8574, "step": 45220 }, { "epoch": 2.733425998670454, "grad_norm": 1.0564040481363202, "learning_rate": 2.143264375334292e-07, "loss": 0.8697, "step": 45230 }, { "epoch": 2.7340303378255877, "grad_norm": 1.0220958855142999, "learning_rate": 2.1336272079965857e-07, "loss": 0.9043, "step": 45240 }, { "epoch": 2.7346346769807215, "grad_norm": 1.0016145974739459, "learning_rate": 2.1240112838370686e-07, "loss": 0.8948, "step": 45250 }, { "epoch": 2.7352390161358553, "grad_norm": 1.163346595977779, "learning_rate": 2.1144166071233042e-07, "loss": 0.8947, "step": 45260 }, { "epoch": 2.7358433552909895, "grad_norm": 1.0993512992455559, "learning_rate": 2.1048431821134196e-07, "loss": 0.8819, "step": 45270 }, { "epoch": 2.736447694446123, "grad_norm": 1.005474838410543, "learning_rate": 2.0952910130561165e-07, "loss": 0.8862, "step": 45280 }, { "epoch": 2.737052033601257, "grad_norm": 1.0095370829479087, "learning_rate": 2.0857601041906816e-07, "loss": 0.9066, "step": 45290 }, { "epoch": 2.737656372756391, "grad_norm": 1.139606821521737, "learning_rate": 2.0762504597469258e-07, "loss": 0.894, "step": 45300 }, { "epoch": 2.7382607119115248, "grad_norm": 1.1910161161297665, "learning_rate": 2.0667620839452508e-07, "loss": 0.8878, "step": 45310 }, { "epoch": 2.7388650510666586, "grad_norm": 1.1200032044713917, "learning_rate": 2.0572949809966326e-07, "loss": 0.9016, "step": 45320 }, { "epoch": 2.7394693902217924, "grad_norm": 1.1618684672210273, "learning_rate": 2.0478491551025604e-07, "loss": 0.903, "step": 45330 }, { "epoch": 2.740073729376926, "grad_norm": 1.2146107547250926, "learning_rate": 2.0384246104551252e-07, "loss": 0.8837, "step": 45340 }, { "epoch": 2.74067806853206, "grad_norm": 1.1648456698217036, "learning_rate": 2.0290213512369594e-07, "loss": 0.8893, "step": 45350 }, { "epoch": 2.7412824076871942, "grad_norm": 1.1862111453265956, "learning_rate": 2.0196393816212246e-07, "loss": 0.8706, "step": 45360 }, { "epoch": 2.741886746842328, "grad_norm": 1.1763491035258002, "learning_rate": 2.0102787057716788e-07, "loss": 0.8933, "step": 45370 }, { "epoch": 2.742491085997462, "grad_norm": 1.157534575240225, "learning_rate": 2.0009393278425882e-07, "loss": 0.8698, "step": 45380 }, { "epoch": 2.7430954251525956, "grad_norm": 1.277193657313442, "learning_rate": 1.9916212519788037e-07, "loss": 0.8913, "step": 45390 }, { "epoch": 2.7436997643077294, "grad_norm": 1.155230518209369, "learning_rate": 1.9823244823156783e-07, "loss": 0.8665, "step": 45400 }, { "epoch": 2.7443041034628632, "grad_norm": 1.248104079354863, "learning_rate": 1.9730490229791567e-07, "loss": 0.8799, "step": 45410 }, { "epoch": 2.744908442617997, "grad_norm": 1.141762068994002, "learning_rate": 1.9637948780856898e-07, "loss": 0.8693, "step": 45420 }, { "epoch": 2.7455127817731313, "grad_norm": 1.2753267008002964, "learning_rate": 1.954562051742287e-07, "loss": 0.8851, "step": 45430 }, { "epoch": 2.746117120928265, "grad_norm": 1.2235220363401962, "learning_rate": 1.9453505480464985e-07, "loss": 0.8827, "step": 45440 }, { "epoch": 2.746721460083399, "grad_norm": 1.4858072217758969, "learning_rate": 1.936160371086393e-07, "loss": 0.8857, "step": 45450 }, { "epoch": 2.7473257992385327, "grad_norm": 1.5156490892263077, "learning_rate": 1.9269915249405912e-07, "loss": 0.9048, "step": 45460 }, { "epoch": 2.7479301383936665, "grad_norm": 1.4707327254645535, "learning_rate": 1.917844013678255e-07, "loss": 0.8756, "step": 45470 }, { "epoch": 2.7485344775488003, "grad_norm": 1.4603464920498683, "learning_rate": 1.908717841359048e-07, "loss": 0.9005, "step": 45480 }, { "epoch": 2.749138816703934, "grad_norm": 1.4919514207653686, "learning_rate": 1.8996130120331914e-07, "loss": 0.9005, "step": 45490 }, { "epoch": 2.7497431558590684, "grad_norm": 1.0327461395228872, "learning_rate": 1.8905295297414194e-07, "loss": 0.8768, "step": 45500 }, { "epoch": 2.7503474950142017, "grad_norm": 0.9820293648659041, "learning_rate": 1.8814673985150078e-07, "loss": 0.8936, "step": 45510 }, { "epoch": 2.750951834169336, "grad_norm": 0.9016692349610924, "learning_rate": 1.872426622375728e-07, "loss": 0.8827, "step": 45520 }, { "epoch": 2.75155617332447, "grad_norm": 0.937904234239, "learning_rate": 1.8634072053359042e-07, "loss": 0.8903, "step": 45530 }, { "epoch": 2.7521605124796036, "grad_norm": 0.9393857965950156, "learning_rate": 1.854409151398373e-07, "loss": 0.8987, "step": 45540 }, { "epoch": 2.7527648516347374, "grad_norm": 0.8576501034287979, "learning_rate": 1.845432464556468e-07, "loss": 0.8759, "step": 45550 }, { "epoch": 2.753369190789871, "grad_norm": 0.8383746397929337, "learning_rate": 1.8364771487940747e-07, "loss": 0.8778, "step": 45560 }, { "epoch": 2.753973529945005, "grad_norm": 0.8676877785678067, "learning_rate": 1.8275432080855747e-07, "loss": 0.8918, "step": 45570 }, { "epoch": 2.754577869100139, "grad_norm": 0.8245797010954128, "learning_rate": 1.8186306463958636e-07, "loss": 0.8806, "step": 45580 }, { "epoch": 2.755182208255273, "grad_norm": 0.7980094066911018, "learning_rate": 1.8097394676803437e-07, "loss": 0.8833, "step": 45590 }, { "epoch": 2.755786547410407, "grad_norm": 0.8814633362664858, "learning_rate": 1.800869675884953e-07, "loss": 0.8948, "step": 45600 }, { "epoch": 2.7563908865655407, "grad_norm": 0.8482749726288346, "learning_rate": 1.7920212749461097e-07, "loss": 0.9, "step": 45610 }, { "epoch": 2.7569952257206745, "grad_norm": 0.8838429290291643, "learning_rate": 1.78319426879075e-07, "loss": 0.8853, "step": 45620 }, { "epoch": 2.7575995648758083, "grad_norm": 0.9175655452716482, "learning_rate": 1.7743886613363125e-07, "loss": 0.8832, "step": 45630 }, { "epoch": 2.758203904030942, "grad_norm": 0.8627250630323202, "learning_rate": 1.7656044564907437e-07, "loss": 0.8966, "step": 45640 }, { "epoch": 2.758808243186076, "grad_norm": 0.6406229430466438, "learning_rate": 1.756841658152486e-07, "loss": 0.8836, "step": 45650 }, { "epoch": 2.75941258234121, "grad_norm": 0.6381578530896204, "learning_rate": 1.74810027021049e-07, "loss": 0.8661, "step": 45660 }, { "epoch": 2.7600169214963435, "grad_norm": 0.6559950695303506, "learning_rate": 1.739380296544191e-07, "loss": 0.8814, "step": 45670 }, { "epoch": 2.7606212606514777, "grad_norm": 0.6323016875435511, "learning_rate": 1.730681741023532e-07, "loss": 0.8697, "step": 45680 }, { "epoch": 2.7612255998066115, "grad_norm": 0.6496213978397954, "learning_rate": 1.7220046075089536e-07, "loss": 0.914, "step": 45690 }, { "epoch": 2.7618299389617453, "grad_norm": 0.6226583041147095, "learning_rate": 1.7133488998513692e-07, "loss": 0.871, "step": 45700 }, { "epoch": 2.762434278116879, "grad_norm": 0.708741715191671, "learning_rate": 1.7047146218922062e-07, "loss": 0.8799, "step": 45710 }, { "epoch": 2.763038617272013, "grad_norm": 0.7229626178654985, "learning_rate": 1.6961017774633714e-07, "loss": 0.893, "step": 45720 }, { "epoch": 2.763642956427147, "grad_norm": 0.6892131594267331, "learning_rate": 1.6875103703872518e-07, "loss": 0.8807, "step": 45730 }, { "epoch": 2.7642472955822806, "grad_norm": 0.7166797767440123, "learning_rate": 1.6789404044767366e-07, "loss": 0.8758, "step": 45740 }, { "epoch": 2.764851634737415, "grad_norm": 0.8957928780555156, "learning_rate": 1.670391883535194e-07, "loss": 0.8889, "step": 45750 }, { "epoch": 2.7654559738925486, "grad_norm": 0.9388549397024907, "learning_rate": 1.6618648113564616e-07, "loss": 0.8551, "step": 45760 }, { "epoch": 2.7660603130476824, "grad_norm": 0.8756157168474912, "learning_rate": 1.6533591917248737e-07, "loss": 0.9027, "step": 45770 }, { "epoch": 2.766664652202816, "grad_norm": 0.9666364572877677, "learning_rate": 1.6448750284152438e-07, "loss": 0.8836, "step": 45780 }, { "epoch": 2.76726899135795, "grad_norm": 0.9060640149069816, "learning_rate": 1.6364123251928487e-07, "loss": 0.8699, "step": 45790 }, { "epoch": 2.767873330513084, "grad_norm": 0.5688728041654626, "learning_rate": 1.6279710858134623e-07, "loss": 0.8861, "step": 45800 }, { "epoch": 2.7684776696682176, "grad_norm": 0.5856732913104161, "learning_rate": 1.6195513140233098e-07, "loss": 0.8692, "step": 45810 }, { "epoch": 2.769082008823352, "grad_norm": 0.5667721982668187, "learning_rate": 1.611153013559108e-07, "loss": 0.8675, "step": 45820 }, { "epoch": 2.7696863479784857, "grad_norm": 0.5945509810361672, "learning_rate": 1.6027761881480307e-07, "loss": 0.8905, "step": 45830 }, { "epoch": 2.7702906871336195, "grad_norm": 0.6282107389067669, "learning_rate": 1.5944208415077323e-07, "loss": 0.8943, "step": 45840 }, { "epoch": 2.7708950262887533, "grad_norm": 0.5977481253744285, "learning_rate": 1.586086977346335e-07, "loss": 0.8843, "step": 45850 }, { "epoch": 2.771499365443887, "grad_norm": 0.571685794633865, "learning_rate": 1.5777745993624028e-07, "loss": 0.8882, "step": 45860 }, { "epoch": 2.772103704599021, "grad_norm": 0.6065710704633975, "learning_rate": 1.5694837112450067e-07, "loss": 0.8631, "step": 45870 }, { "epoch": 2.7727080437541547, "grad_norm": 0.5964393739660016, "learning_rate": 1.5612143166736426e-07, "loss": 0.8975, "step": 45880 }, { "epoch": 2.773312382909289, "grad_norm": 0.562478649284115, "learning_rate": 1.5529664193182858e-07, "loss": 0.8839, "step": 45890 }, { "epoch": 2.7739167220644223, "grad_norm": 0.5672029556953706, "learning_rate": 1.5447400228393695e-07, "loss": 0.872, "step": 45900 }, { "epoch": 2.7745210612195566, "grad_norm": 0.5942124996834438, "learning_rate": 1.5365351308877785e-07, "loss": 0.8649, "step": 45910 }, { "epoch": 2.7751254003746904, "grad_norm": 0.6086671159664907, "learning_rate": 1.528351747104856e-07, "loss": 0.8903, "step": 45920 }, { "epoch": 2.775729739529824, "grad_norm": 0.5986512656298831, "learning_rate": 1.5201898751224075e-07, "loss": 0.896, "step": 45930 }, { "epoch": 2.776334078684958, "grad_norm": 0.5987795832523434, "learning_rate": 1.512049518562686e-07, "loss": 0.8869, "step": 45940 }, { "epoch": 2.7769384178400918, "grad_norm": 0.5903942970017256, "learning_rate": 1.5039306810383848e-07, "loss": 0.8771, "step": 45950 }, { "epoch": 2.7775427569952256, "grad_norm": 0.5643675712703505, "learning_rate": 1.4958333661526713e-07, "loss": 0.8796, "step": 45960 }, { "epoch": 2.7781470961503594, "grad_norm": 0.5488452337788334, "learning_rate": 1.4877575774991493e-07, "loss": 0.8992, "step": 45970 }, { "epoch": 2.7787514353054936, "grad_norm": 0.5680377663391452, "learning_rate": 1.4797033186618513e-07, "loss": 0.8511, "step": 45980 }, { "epoch": 2.7793557744606274, "grad_norm": 0.5759190022229727, "learning_rate": 1.4716705932152798e-07, "loss": 0.8712, "step": 45990 }, { "epoch": 2.7799601136157612, "grad_norm": 0.598760258147706, "learning_rate": 1.4636594047243823e-07, "loss": 0.8885, "step": 46000 }, { "epoch": 2.780564452770895, "grad_norm": 0.5826932912802716, "learning_rate": 1.455669756744521e-07, "loss": 0.888, "step": 46010 }, { "epoch": 2.781168791926029, "grad_norm": 0.561753710232754, "learning_rate": 1.4477016528215259e-07, "loss": 0.88, "step": 46020 }, { "epoch": 2.7817731310811626, "grad_norm": 0.5581961395981817, "learning_rate": 1.4397550964916574e-07, "loss": 0.904, "step": 46030 }, { "epoch": 2.7823774702362964, "grad_norm": 0.568751185955918, "learning_rate": 1.4318300912815997e-07, "loss": 0.8561, "step": 46040 }, { "epoch": 2.7829818093914307, "grad_norm": 0.5907567741634793, "learning_rate": 1.4239266407084895e-07, "loss": 0.8942, "step": 46050 }, { "epoch": 2.7835861485465645, "grad_norm": 0.597394406216666, "learning_rate": 1.416044748279899e-07, "loss": 0.9099, "step": 46060 }, { "epoch": 2.7841904877016983, "grad_norm": 0.6043521594637002, "learning_rate": 1.4081844174938076e-07, "loss": 0.8799, "step": 46070 }, { "epoch": 2.784794826856832, "grad_norm": 0.5979420961631614, "learning_rate": 1.4003456518386582e-07, "loss": 0.8755, "step": 46080 }, { "epoch": 2.785399166011966, "grad_norm": 0.6415000168194408, "learning_rate": 1.3925284547933126e-07, "loss": 0.8785, "step": 46090 }, { "epoch": 2.7860035051670997, "grad_norm": 0.7312409200714377, "learning_rate": 1.38473282982704e-07, "loss": 0.9052, "step": 46100 }, { "epoch": 2.7866078443222335, "grad_norm": 0.676311515345982, "learning_rate": 1.376958780399562e-07, "loss": 0.8847, "step": 46110 }, { "epoch": 2.7872121834773678, "grad_norm": 0.7411261653744863, "learning_rate": 1.3692063099610187e-07, "loss": 0.8878, "step": 46120 }, { "epoch": 2.787816522632501, "grad_norm": 0.6782575006435092, "learning_rate": 1.3614754219519577e-07, "loss": 0.8748, "step": 46130 }, { "epoch": 2.7884208617876354, "grad_norm": 0.6904883125058752, "learning_rate": 1.3537661198033735e-07, "loss": 0.8878, "step": 46140 }, { "epoch": 2.789025200942769, "grad_norm": 0.7617510133608474, "learning_rate": 1.3460784069366573e-07, "loss": 0.8855, "step": 46150 }, { "epoch": 2.789629540097903, "grad_norm": 0.7200044659977112, "learning_rate": 1.338412286763635e-07, "loss": 0.9089, "step": 46160 }, { "epoch": 2.790233879253037, "grad_norm": 0.7772490473931158, "learning_rate": 1.3307677626865413e-07, "loss": 0.8796, "step": 46170 }, { "epoch": 2.7908382184081706, "grad_norm": 0.7099030513901076, "learning_rate": 1.3231448380980284e-07, "loss": 0.8831, "step": 46180 }, { "epoch": 2.7914425575633044, "grad_norm": 0.7022387939697349, "learning_rate": 1.3155435163811681e-07, "loss": 0.8799, "step": 46190 }, { "epoch": 2.792046896718438, "grad_norm": 0.7088038703085388, "learning_rate": 1.3079638009094286e-07, "loss": 0.9003, "step": 46200 }, { "epoch": 2.7926512358735724, "grad_norm": 0.6788177111866023, "learning_rate": 1.3004056950467135e-07, "loss": 0.897, "step": 46210 }, { "epoch": 2.7932555750287063, "grad_norm": 0.7283795620806107, "learning_rate": 1.2928692021473231e-07, "loss": 0.8723, "step": 46220 }, { "epoch": 2.79385991418384, "grad_norm": 0.6838402674598276, "learning_rate": 1.2853543255559542e-07, "loss": 0.8841, "step": 46230 }, { "epoch": 2.794464253338974, "grad_norm": 0.7045179880227102, "learning_rate": 1.2778610686077274e-07, "loss": 0.8916, "step": 46240 }, { "epoch": 2.7950685924941077, "grad_norm": 0.7993308797096246, "learning_rate": 1.270389434628172e-07, "loss": 0.8572, "step": 46250 }, { "epoch": 2.7956729316492415, "grad_norm": 0.8571924094544897, "learning_rate": 1.262939426933202e-07, "loss": 0.8867, "step": 46260 }, { "epoch": 2.7962772708043753, "grad_norm": 0.8306968904325757, "learning_rate": 1.255511048829139e-07, "loss": 0.8717, "step": 46270 }, { "epoch": 2.7968816099595095, "grad_norm": 0.8465291540343134, "learning_rate": 1.2481043036127238e-07, "loss": 0.8838, "step": 46280 }, { "epoch": 2.797485949114643, "grad_norm": 0.8117943044821256, "learning_rate": 1.2407191945710718e-07, "loss": 0.8646, "step": 46290 }, { "epoch": 2.798090288269777, "grad_norm": 0.8373645276096434, "learning_rate": 1.2333557249817107e-07, "loss": 0.8978, "step": 46300 }, { "epoch": 2.798694627424911, "grad_norm": 0.8100752631612739, "learning_rate": 1.226013898112566e-07, "loss": 0.8922, "step": 46310 }, { "epoch": 2.7992989665800447, "grad_norm": 0.8183693837632486, "learning_rate": 1.218693717221947e-07, "loss": 0.9128, "step": 46320 }, { "epoch": 2.7999033057351785, "grad_norm": 0.8012229286627883, "learning_rate": 1.211395185558556e-07, "loss": 0.8729, "step": 46330 }, { "epoch": 2.8005076448903123, "grad_norm": 0.8390485626989796, "learning_rate": 1.2041183063615126e-07, "loss": 0.8774, "step": 46340 }, { "epoch": 2.801111984045446, "grad_norm": 1.3003713487067985, "learning_rate": 1.196863082860289e-07, "loss": 0.8884, "step": 46350 }, { "epoch": 2.80171632320058, "grad_norm": 1.1966820725481797, "learning_rate": 1.1896295182747709e-07, "loss": 0.8729, "step": 46360 }, { "epoch": 2.802320662355714, "grad_norm": 1.2283574882036994, "learning_rate": 1.1824176158152346e-07, "loss": 0.9012, "step": 46370 }, { "epoch": 2.802925001510848, "grad_norm": 1.2925235894327627, "learning_rate": 1.175227378682331e-07, "loss": 0.8767, "step": 46380 }, { "epoch": 2.803529340665982, "grad_norm": 1.3404863705472452, "learning_rate": 1.1680588100670964e-07, "loss": 0.8973, "step": 46390 }, { "epoch": 2.8041336798211156, "grad_norm": 0.8819776330937119, "learning_rate": 1.1609119131509583e-07, "loss": 0.8977, "step": 46400 }, { "epoch": 2.8047380189762494, "grad_norm": 1.3152709206599904, "learning_rate": 1.1537866911057183e-07, "loss": 0.8782, "step": 46410 }, { "epoch": 2.805342358131383, "grad_norm": 1.1882071994148973, "learning_rate": 1.146683147093558e-07, "loss": 0.8647, "step": 46420 }, { "epoch": 2.805946697286517, "grad_norm": 0.7900719459903853, "learning_rate": 1.1396012842670556e-07, "loss": 0.8659, "step": 46430 }, { "epoch": 2.8065510364416513, "grad_norm": 1.024499184682017, "learning_rate": 1.132541105769136e-07, "loss": 0.8795, "step": 46440 }, { "epoch": 2.807155375596785, "grad_norm": 2.2376591793600547, "learning_rate": 1.1255026147331316e-07, "loss": 0.8906, "step": 46450 }, { "epoch": 2.807759714751919, "grad_norm": 2.556276121537409, "learning_rate": 1.1184858142827382e-07, "loss": 0.8678, "step": 46460 }, { "epoch": 2.8083640539070527, "grad_norm": 2.3118055345112056, "learning_rate": 1.1114907075320147e-07, "loss": 0.8959, "step": 46470 }, { "epoch": 2.8089683930621865, "grad_norm": 2.5783981980041775, "learning_rate": 1.1045172975854057e-07, "loss": 0.8735, "step": 46480 }, { "epoch": 2.8095727322173203, "grad_norm": 2.346724448745516, "learning_rate": 1.0975655875377244e-07, "loss": 0.8707, "step": 46490 }, { "epoch": 2.810177071372454, "grad_norm": 1.043861123232615, "learning_rate": 1.090635580474153e-07, "loss": 0.8718, "step": 46500 }, { "epoch": 2.8107814105275883, "grad_norm": 0.9994293790810496, "learning_rate": 1.0837272794702314e-07, "loss": 0.9039, "step": 46510 }, { "epoch": 2.8113857496827217, "grad_norm": 0.9405530801990281, "learning_rate": 1.0768406875918847e-07, "loss": 0.8895, "step": 46520 }, { "epoch": 2.811990088837856, "grad_norm": 0.8708076821707352, "learning_rate": 1.0699758078953904e-07, "loss": 0.8929, "step": 46530 }, { "epoch": 2.8125944279929898, "grad_norm": 0.9292307378624338, "learning_rate": 1.0631326434273948e-07, "loss": 0.873, "step": 46540 }, { "epoch": 2.8131987671481236, "grad_norm": 0.9734444244612962, "learning_rate": 1.0563111972249018e-07, "loss": 0.8856, "step": 46550 }, { "epoch": 2.8138031063032574, "grad_norm": 0.9675862271215961, "learning_rate": 1.04951147231529e-07, "loss": 0.8655, "step": 46560 }, { "epoch": 2.814407445458391, "grad_norm": 0.9877806037397211, "learning_rate": 1.0427334717162841e-07, "loss": 0.8882, "step": 46570 }, { "epoch": 2.815011784613525, "grad_norm": 0.9748356763372517, "learning_rate": 1.0359771984359668e-07, "loss": 0.855, "step": 46580 }, { "epoch": 2.8156161237686588, "grad_norm": 1.1603181515046335, "learning_rate": 1.0292426554727953e-07, "loss": 0.8828, "step": 46590 }, { "epoch": 2.816220462923793, "grad_norm": 1.0489701291607032, "learning_rate": 1.0225298458155563e-07, "loss": 0.8747, "step": 46600 }, { "epoch": 2.816824802078927, "grad_norm": 1.1263964630292564, "learning_rate": 1.0158387724434171e-07, "loss": 0.8943, "step": 46610 }, { "epoch": 2.8174291412340606, "grad_norm": 1.0173615825873077, "learning_rate": 1.009169438325891e-07, "loss": 0.8525, "step": 46620 }, { "epoch": 2.8180334803891944, "grad_norm": 1.1670095734263473, "learning_rate": 1.002521846422827e-07, "loss": 0.8893, "step": 46630 }, { "epoch": 2.8186378195443282, "grad_norm": 1.0737130949007663, "learning_rate": 9.958959996844486e-08, "loss": 0.8706, "step": 46640 }, { "epoch": 2.819242158699462, "grad_norm": 1.168097788390359, "learning_rate": 9.892919010513147e-08, "loss": 0.8948, "step": 46650 }, { "epoch": 2.819846497854596, "grad_norm": 1.1803821624601578, "learning_rate": 9.827095534543307e-08, "loss": 0.9075, "step": 46660 }, { "epoch": 2.82045083700973, "grad_norm": 1.0261548376509666, "learning_rate": 9.761489598147655e-08, "loss": 0.8846, "step": 46670 }, { "epoch": 2.8210551761648635, "grad_norm": 1.1563884522704913, "learning_rate": 9.696101230442067e-08, "loss": 0.8839, "step": 46680 }, { "epoch": 2.8216595153199977, "grad_norm": 1.1706999991284068, "learning_rate": 9.630930460446163e-08, "loss": 0.8911, "step": 46690 }, { "epoch": 2.8222638544751315, "grad_norm": 1.311721728642041, "learning_rate": 9.565977317082698e-08, "loss": 0.8861, "step": 46700 }, { "epoch": 2.8228681936302653, "grad_norm": 1.2769329052758165, "learning_rate": 9.501241829178109e-08, "loss": 0.9037, "step": 46710 }, { "epoch": 2.823472532785399, "grad_norm": 1.2206326222224786, "learning_rate": 9.43672402546203e-08, "loss": 0.9067, "step": 46720 }, { "epoch": 2.824076871940533, "grad_norm": 1.181323136426952, "learning_rate": 9.37242393456761e-08, "loss": 0.8912, "step": 46730 }, { "epoch": 2.8246812110956667, "grad_norm": 1.2200265128827024, "learning_rate": 9.308341585031356e-08, "loss": 0.875, "step": 46740 }, { "epoch": 2.8252855502508005, "grad_norm": 1.014588840435437, "learning_rate": 9.244477005293129e-08, "loss": 0.8915, "step": 46750 }, { "epoch": 2.8258898894059348, "grad_norm": 0.9571812270972648, "learning_rate": 9.180830223696036e-08, "loss": 0.9121, "step": 46760 }, { "epoch": 2.8264942285610686, "grad_norm": 1.0493186897081646, "learning_rate": 9.117401268486814e-08, "loss": 0.8826, "step": 46770 }, { "epoch": 2.8270985677162024, "grad_norm": 1.0642303083861215, "learning_rate": 9.054190167815114e-08, "loss": 0.8859, "step": 46780 }, { "epoch": 2.827702906871336, "grad_norm": 1.0278710984688342, "learning_rate": 8.991196949734327e-08, "loss": 0.8623, "step": 46790 }, { "epoch": 2.82830724602647, "grad_norm": 1.0879670282751883, "learning_rate": 8.928421642200869e-08, "loss": 0.8858, "step": 46800 }, { "epoch": 2.828911585181604, "grad_norm": 1.0288407306604375, "learning_rate": 8.865864273074565e-08, "loss": 0.8781, "step": 46810 }, { "epoch": 2.8295159243367376, "grad_norm": 1.0787730109493485, "learning_rate": 8.803524870118374e-08, "loss": 0.885, "step": 46820 }, { "epoch": 2.830120263491872, "grad_norm": 1.077160638388282, "learning_rate": 8.741403460998721e-08, "loss": 0.8704, "step": 46830 }, { "epoch": 2.8307246026470056, "grad_norm": 1.0204152608102433, "learning_rate": 8.679500073285274e-08, "loss": 0.8982, "step": 46840 }, { "epoch": 2.8313289418021395, "grad_norm": 1.021120619901927, "learning_rate": 8.617814734450669e-08, "loss": 0.895, "step": 46850 }, { "epoch": 2.8319332809572733, "grad_norm": 1.0219136918527307, "learning_rate": 8.556347471871118e-08, "loss": 0.8772, "step": 46860 }, { "epoch": 2.832537620112407, "grad_norm": 1.0673649062757689, "learning_rate": 8.495098312825856e-08, "loss": 0.8896, "step": 46870 }, { "epoch": 2.833141959267541, "grad_norm": 1.1195832357987203, "learning_rate": 8.434067284497304e-08, "loss": 0.9054, "step": 46880 }, { "epoch": 2.8337462984226747, "grad_norm": 1.0242138680746011, "learning_rate": 8.373254413971243e-08, "loss": 0.8871, "step": 46890 }, { "epoch": 2.834350637577809, "grad_norm": 1.230775567341354, "learning_rate": 8.312659728236471e-08, "loss": 0.8855, "step": 46900 }, { "epoch": 2.8349549767329423, "grad_norm": 1.2705193051277628, "learning_rate": 8.252283254184979e-08, "loss": 0.8959, "step": 46910 }, { "epoch": 2.8355593158880765, "grad_norm": 1.170057308394656, "learning_rate": 8.192125018611996e-08, "loss": 0.9018, "step": 46920 }, { "epoch": 2.8361636550432103, "grad_norm": 1.2200485884000922, "learning_rate": 8.132185048215835e-08, "loss": 0.9074, "step": 46930 }, { "epoch": 2.836767994198344, "grad_norm": 1.360498759458052, "learning_rate": 8.072463369597994e-08, "loss": 0.8915, "step": 46940 }, { "epoch": 2.837372333353478, "grad_norm": 1.1551758157361498, "learning_rate": 8.012960009262993e-08, "loss": 0.874, "step": 46950 }, { "epoch": 2.8379766725086117, "grad_norm": 1.1353881574746296, "learning_rate": 7.953674993618599e-08, "loss": 0.8864, "step": 46960 }, { "epoch": 2.8385810116637455, "grad_norm": 1.1125432873158816, "learning_rate": 7.894608348975541e-08, "loss": 0.8791, "step": 46970 }, { "epoch": 2.8391853508188793, "grad_norm": 1.2469623921709778, "learning_rate": 7.835760101547684e-08, "loss": 0.8754, "step": 46980 }, { "epoch": 2.8397896899740136, "grad_norm": 1.1181508124100181, "learning_rate": 7.777130277452082e-08, "loss": 0.868, "step": 46990 }, { "epoch": 2.8403940291291474, "grad_norm": 1.2532438025038035, "learning_rate": 7.718718902708589e-08, "loss": 0.8857, "step": 47000 }, { "epoch": 2.840998368284281, "grad_norm": 1.2132650148866018, "learning_rate": 7.66052600324041e-08, "loss": 0.8832, "step": 47010 }, { "epoch": 2.841602707439415, "grad_norm": 1.2552440728721452, "learning_rate": 7.602551604873665e-08, "loss": 0.8784, "step": 47020 }, { "epoch": 2.842207046594549, "grad_norm": 1.249658242160488, "learning_rate": 7.544795733337384e-08, "loss": 0.9048, "step": 47030 }, { "epoch": 2.8428113857496826, "grad_norm": 1.2234935983664768, "learning_rate": 7.487258414263732e-08, "loss": 0.8924, "step": 47040 }, { "epoch": 2.8434157249048164, "grad_norm": 1.4385600249385777, "learning_rate": 7.429939673188003e-08, "loss": 0.8917, "step": 47050 }, { "epoch": 2.8440200640599507, "grad_norm": 1.4487850586067208, "learning_rate": 7.372839535548182e-08, "loss": 0.8882, "step": 47060 }, { "epoch": 2.844624403215084, "grad_norm": 1.4570299716789237, "learning_rate": 7.315958026685555e-08, "loss": 0.9054, "step": 47070 }, { "epoch": 2.8452287423702183, "grad_norm": 1.3553314367497842, "learning_rate": 7.25929517184415e-08, "loss": 0.8788, "step": 47080 }, { "epoch": 2.845833081525352, "grad_norm": 1.506834707658984, "learning_rate": 7.202850996171018e-08, "loss": 0.8908, "step": 47090 }, { "epoch": 2.846437420680486, "grad_norm": 0.907679281978945, "learning_rate": 7.146625524716233e-08, "loss": 0.8972, "step": 47100 }, { "epoch": 2.8470417598356197, "grad_norm": 0.920633696020761, "learning_rate": 7.090618782432778e-08, "loss": 0.9007, "step": 47110 }, { "epoch": 2.8476460989907535, "grad_norm": 0.9474354164229694, "learning_rate": 7.034830794176439e-08, "loss": 0.8777, "step": 47120 }, { "epoch": 2.8482504381458877, "grad_norm": 0.8976442716170314, "learning_rate": 6.979261584706132e-08, "loss": 0.8723, "step": 47130 }, { "epoch": 2.848854777301021, "grad_norm": 0.9841409651818649, "learning_rate": 6.923911178683518e-08, "loss": 0.8794, "step": 47140 }, { "epoch": 2.8494591164561553, "grad_norm": 0.8804013532486229, "learning_rate": 6.868779600673226e-08, "loss": 0.889, "step": 47150 }, { "epoch": 2.850063455611289, "grad_norm": 0.8852889331177264, "learning_rate": 6.813866875142683e-08, "loss": 0.8951, "step": 47160 }, { "epoch": 2.850667794766423, "grad_norm": 0.8098427558182276, "learning_rate": 6.759173026462285e-08, "loss": 0.8771, "step": 47170 }, { "epoch": 2.8512721339215568, "grad_norm": 0.8850582175581926, "learning_rate": 6.704698078905281e-08, "loss": 0.9061, "step": 47180 }, { "epoch": 2.8518764730766906, "grad_norm": 0.8148389397287765, "learning_rate": 6.650442056647721e-08, "loss": 0.8766, "step": 47190 }, { "epoch": 2.8524808122318244, "grad_norm": 0.8667722304988269, "learning_rate": 6.596404983768512e-08, "loss": 0.859, "step": 47200 }, { "epoch": 2.853085151386958, "grad_norm": 0.8414301200296546, "learning_rate": 6.54258688424947e-08, "loss": 0.8863, "step": 47210 }, { "epoch": 2.8536894905420924, "grad_norm": 0.8366656816231236, "learning_rate": 6.488987781975042e-08, "loss": 0.9028, "step": 47220 }, { "epoch": 2.8542938296972262, "grad_norm": 0.7745611905990553, "learning_rate": 6.435607700732649e-08, "loss": 0.8702, "step": 47230 }, { "epoch": 2.85489816885236, "grad_norm": 0.8242775179155521, "learning_rate": 6.38244666421256e-08, "loss": 0.8738, "step": 47240 }, { "epoch": 2.855502508007494, "grad_norm": 0.6496646518401458, "learning_rate": 6.329504696007571e-08, "loss": 0.8632, "step": 47250 }, { "epoch": 2.8561068471626276, "grad_norm": 0.6345989415982591, "learning_rate": 6.276781819613498e-08, "loss": 0.8774, "step": 47260 }, { "epoch": 2.8567111863177614, "grad_norm": 0.6235959709723341, "learning_rate": 6.224278058428901e-08, "loss": 0.8847, "step": 47270 }, { "epoch": 2.8573155254728952, "grad_norm": 0.6753358926569923, "learning_rate": 6.171993435754974e-08, "loss": 0.9056, "step": 47280 }, { "epoch": 2.8579198646280295, "grad_norm": 0.6275729174849449, "learning_rate": 6.119927974795659e-08, "loss": 0.8847, "step": 47290 }, { "epoch": 2.858524203783163, "grad_norm": 0.7072188642614241, "learning_rate": 6.068081698657857e-08, "loss": 0.8856, "step": 47300 }, { "epoch": 2.859128542938297, "grad_norm": 0.6865434161367749, "learning_rate": 6.016454630350888e-08, "loss": 0.908, "step": 47310 }, { "epoch": 2.859732882093431, "grad_norm": 0.6583015188338257, "learning_rate": 5.965046792786977e-08, "loss": 0.8702, "step": 47320 }, { "epoch": 2.8603372212485647, "grad_norm": 0.7072343862163731, "learning_rate": 5.913858208781043e-08, "loss": 0.8943, "step": 47330 }, { "epoch": 2.8609415604036985, "grad_norm": 0.6780368034978116, "learning_rate": 5.862888901050634e-08, "loss": 0.8667, "step": 47340 }, { "epoch": 2.8615458995588323, "grad_norm": 0.8229330433280384, "learning_rate": 5.812138892216046e-08, "loss": 0.9096, "step": 47350 }, { "epoch": 2.862150238713966, "grad_norm": 0.8321494776305733, "learning_rate": 5.761608204800206e-08, "loss": 0.8775, "step": 47360 }, { "epoch": 2.8627545778691, "grad_norm": 0.8588862094950755, "learning_rate": 5.7112968612286765e-08, "loss": 0.8831, "step": 47370 }, { "epoch": 2.863358917024234, "grad_norm": 0.9435169344902588, "learning_rate": 5.661204883829763e-08, "loss": 0.8716, "step": 47380 }, { "epoch": 2.863963256179368, "grad_norm": 0.9734734147603962, "learning_rate": 5.61133229483446e-08, "loss": 0.8815, "step": 47390 }, { "epoch": 2.864567595334502, "grad_norm": 0.573786678514249, "learning_rate": 5.561679116376117e-08, "loss": 0.8752, "step": 47400 }, { "epoch": 2.8651719344896356, "grad_norm": 0.6191535919663005, "learning_rate": 5.512245370490998e-08, "loss": 0.8826, "step": 47410 }, { "epoch": 2.8657762736447694, "grad_norm": 0.6142616597429817, "learning_rate": 5.463031079117942e-08, "loss": 0.8775, "step": 47420 }, { "epoch": 2.866380612799903, "grad_norm": 0.6139070308326352, "learning_rate": 5.414036264098199e-08, "loss": 0.8661, "step": 47430 }, { "epoch": 2.866984951955037, "grad_norm": 0.6339430008827052, "learning_rate": 5.365260947175877e-08, "loss": 0.86, "step": 47440 }, { "epoch": 2.8675892911101712, "grad_norm": 0.6014077080311432, "learning_rate": 5.316705149997492e-08, "loss": 0.888, "step": 47450 }, { "epoch": 2.868193630265305, "grad_norm": 0.5961247251355861, "learning_rate": 5.2683688941121966e-08, "loss": 0.8806, "step": 47460 }, { "epoch": 2.868797969420439, "grad_norm": 0.5974730287761562, "learning_rate": 5.2202522009716647e-08, "loss": 0.8811, "step": 47470 }, { "epoch": 2.8694023085755727, "grad_norm": 0.564147768905625, "learning_rate": 5.172355091930259e-08, "loss": 0.8977, "step": 47480 }, { "epoch": 2.8700066477307065, "grad_norm": 0.590412262492525, "learning_rate": 5.124677588244698e-08, "loss": 0.8791, "step": 47490 }, { "epoch": 2.8706109868858403, "grad_norm": 0.5636910791058279, "learning_rate": 5.077219711074388e-08, "loss": 0.8761, "step": 47500 }, { "epoch": 2.871215326040974, "grad_norm": 0.586483313663193, "learning_rate": 5.029981481481261e-08, "loss": 0.887, "step": 47510 }, { "epoch": 2.8718196651961083, "grad_norm": 0.5932535536691216, "learning_rate": 4.982962920429657e-08, "loss": 0.9017, "step": 47520 }, { "epoch": 2.8724240043512417, "grad_norm": 0.6065631818589902, "learning_rate": 4.93616404878644e-08, "loss": 0.8784, "step": 47530 }, { "epoch": 2.873028343506376, "grad_norm": 0.6011846516671374, "learning_rate": 4.889584887321164e-08, "loss": 0.8696, "step": 47540 }, { "epoch": 2.8736326826615097, "grad_norm": 0.5803400381946026, "learning_rate": 4.84322545670568e-08, "loss": 0.8775, "step": 47550 }, { "epoch": 2.8742370218166435, "grad_norm": 0.5703820696290958, "learning_rate": 4.797085777514365e-08, "loss": 0.8762, "step": 47560 }, { "epoch": 2.8748413609717773, "grad_norm": 0.5768360571919828, "learning_rate": 4.751165870224117e-08, "loss": 0.8815, "step": 47570 }, { "epoch": 2.875445700126911, "grad_norm": 0.5850061461151962, "learning_rate": 4.7054657552143e-08, "loss": 0.8584, "step": 47580 }, { "epoch": 2.876050039282045, "grad_norm": 0.5914027341537174, "learning_rate": 4.65998545276658e-08, "loss": 0.8738, "step": 47590 }, { "epoch": 2.8766543784371787, "grad_norm": 0.6017551131997185, "learning_rate": 4.614724983065311e-08, "loss": 0.913, "step": 47600 }, { "epoch": 2.877258717592313, "grad_norm": 0.5979821184315797, "learning_rate": 4.569684366197147e-08, "loss": 0.8994, "step": 47610 }, { "epoch": 2.877863056747447, "grad_norm": 0.5962390370357233, "learning_rate": 4.5248636221511543e-08, "loss": 0.8763, "step": 47620 }, { "epoch": 2.8784673959025806, "grad_norm": 0.6200529278695698, "learning_rate": 4.4802627708188105e-08, "loss": 0.8984, "step": 47630 }, { "epoch": 2.8790717350577144, "grad_norm": 0.6336552635198061, "learning_rate": 4.4358818319942266e-08, "loss": 0.8778, "step": 47640 }, { "epoch": 2.879676074212848, "grad_norm": 0.6037715073948364, "learning_rate": 4.391720825373535e-08, "loss": 0.8739, "step": 47650 }, { "epoch": 2.880280413367982, "grad_norm": 0.6212368508294195, "learning_rate": 4.3477797705556156e-08, "loss": 0.8719, "step": 47660 }, { "epoch": 2.880884752523116, "grad_norm": 0.6252612210117532, "learning_rate": 4.3040586870415346e-08, "loss": 0.8989, "step": 47670 }, { "epoch": 2.88148909167825, "grad_norm": 0.5922084095551822, "learning_rate": 4.2605575942347154e-08, "loss": 0.8888, "step": 47680 }, { "epoch": 2.8820934308333834, "grad_norm": 0.6152783038963051, "learning_rate": 4.217276511441104e-08, "loss": 0.8819, "step": 47690 }, { "epoch": 2.8826977699885177, "grad_norm": 0.6756744366354618, "learning_rate": 4.1742154578688906e-08, "loss": 0.8694, "step": 47700 }, { "epoch": 2.8833021091436515, "grad_norm": 0.6987691784481876, "learning_rate": 4.131374452628678e-08, "loss": 0.8725, "step": 47710 }, { "epoch": 2.8839064482987853, "grad_norm": 0.6437287514862596, "learning_rate": 4.0887535147333125e-08, "loss": 0.9037, "step": 47720 }, { "epoch": 2.884510787453919, "grad_norm": 0.6385553672862551, "learning_rate": 4.046352663098107e-08, "loss": 0.8581, "step": 47730 }, { "epoch": 2.885115126609053, "grad_norm": 0.691475279436729, "learning_rate": 4.004171916540567e-08, "loss": 0.8842, "step": 47740 }, { "epoch": 2.8857194657641867, "grad_norm": 0.7065817536846365, "learning_rate": 3.962211293780549e-08, "loss": 0.8901, "step": 47750 }, { "epoch": 2.8863238049193205, "grad_norm": 0.7850325936349539, "learning_rate": 3.9204708134404355e-08, "loss": 0.8884, "step": 47760 }, { "epoch": 2.8869281440744547, "grad_norm": 0.7554187281668162, "learning_rate": 3.87895049404452e-08, "loss": 0.8723, "step": 47770 }, { "epoch": 2.8875324832295886, "grad_norm": 0.7844783819668697, "learning_rate": 3.837650354019673e-08, "loss": 0.8867, "step": 47780 }, { "epoch": 2.8881368223847224, "grad_norm": 0.7187229970698894, "learning_rate": 3.796570411694955e-08, "loss": 0.87, "step": 47790 }, { "epoch": 2.888741161539856, "grad_norm": 0.6568169544101209, "learning_rate": 3.755710685301728e-08, "loss": 0.8736, "step": 47800 }, { "epoch": 2.88934550069499, "grad_norm": 0.7062961033925668, "learning_rate": 3.7150711929735964e-08, "loss": 0.8994, "step": 47810 }, { "epoch": 2.8899498398501238, "grad_norm": 0.6620164459446279, "learning_rate": 3.674651952746522e-08, "loss": 0.8818, "step": 47820 }, { "epoch": 2.8905541790052576, "grad_norm": 0.6931280661951346, "learning_rate": 3.634452982558489e-08, "loss": 0.8825, "step": 47830 }, { "epoch": 2.891158518160392, "grad_norm": 0.6840717502640693, "learning_rate": 3.594474300249895e-08, "loss": 0.8766, "step": 47840 }, { "epoch": 2.8917628573155256, "grad_norm": 0.866572671607689, "learning_rate": 3.5547159235634344e-08, "loss": 0.893, "step": 47850 }, { "epoch": 2.8923671964706594, "grad_norm": 0.8739253775433515, "learning_rate": 3.515177870143938e-08, "loss": 0.9044, "step": 47860 }, { "epoch": 2.8929715356257932, "grad_norm": 0.8193532824668441, "learning_rate": 3.4758601575383686e-08, "loss": 0.8765, "step": 47870 }, { "epoch": 2.893575874780927, "grad_norm": 0.8031726221399412, "learning_rate": 3.4367628031959924e-08, "loss": 0.8693, "step": 47880 }, { "epoch": 2.894180213936061, "grad_norm": 0.8798552597475637, "learning_rate": 3.397885824468428e-08, "loss": 0.8782, "step": 47890 }, { "epoch": 2.8947845530911946, "grad_norm": 0.8233590042699582, "learning_rate": 3.359229238609207e-08, "loss": 0.8886, "step": 47900 }, { "epoch": 2.895388892246329, "grad_norm": 0.7913200132565852, "learning_rate": 3.320793062774275e-08, "loss": 0.882, "step": 47910 }, { "epoch": 2.8959932314014623, "grad_norm": 0.8136735262509437, "learning_rate": 3.282577314021596e-08, "loss": 0.8817, "step": 47920 }, { "epoch": 2.8965975705565965, "grad_norm": 0.8197318711129136, "learning_rate": 3.2445820093114365e-08, "loss": 0.8688, "step": 47930 }, { "epoch": 2.8972019097117303, "grad_norm": 0.8022732632023085, "learning_rate": 3.2068071655061426e-08, "loss": 0.8935, "step": 47940 }, { "epoch": 2.897806248866864, "grad_norm": 1.3033310600292838, "learning_rate": 3.169252799370304e-08, "loss": 0.8767, "step": 47950 }, { "epoch": 2.898410588021998, "grad_norm": 1.1757968660352565, "learning_rate": 3.131918927570532e-08, "loss": 0.8692, "step": 47960 }, { "epoch": 2.8990149271771317, "grad_norm": 1.2695810916233092, "learning_rate": 3.0948055666757404e-08, "loss": 0.8999, "step": 47970 }, { "epoch": 2.8996192663322655, "grad_norm": 1.283341008376628, "learning_rate": 3.0579127331569204e-08, "loss": 0.8636, "step": 47980 }, { "epoch": 2.9002236054873993, "grad_norm": 1.1818688417448828, "learning_rate": 3.02124044338703e-08, "loss": 0.887, "step": 47990 }, { "epoch": 2.9008279446425336, "grad_norm": 0.9026133091409173, "learning_rate": 2.98478871364144e-08, "loss": 0.9065, "step": 48000 }, { "epoch": 2.9014322837976674, "grad_norm": 1.047480065952849, "learning_rate": 2.948557560097487e-08, "loss": 0.8782, "step": 48010 }, { "epoch": 2.902036622952801, "grad_norm": 0.9303797384411031, "learning_rate": 2.9125469988345312e-08, "loss": 0.8851, "step": 48020 }, { "epoch": 2.902640962107935, "grad_norm": 0.9802651332582745, "learning_rate": 2.8767570458341775e-08, "loss": 0.8957, "step": 48030 }, { "epoch": 2.903245301263069, "grad_norm": 0.8896807070238727, "learning_rate": 2.841187716980054e-08, "loss": 0.8687, "step": 48040 }, { "epoch": 2.9038496404182026, "grad_norm": 2.500513041071473, "learning_rate": 2.8058390280578662e-08, "loss": 0.8643, "step": 48050 }, { "epoch": 2.9044539795733364, "grad_norm": 2.3990638653765615, "learning_rate": 2.7707109947554545e-08, "loss": 0.8858, "step": 48060 }, { "epoch": 2.9050583187284706, "grad_norm": 2.7711905370017194, "learning_rate": 2.735803632662737e-08, "loss": 0.8753, "step": 48070 }, { "epoch": 2.905662657883604, "grad_norm": 2.7376534823592937, "learning_rate": 2.701116957271599e-08, "loss": 0.8835, "step": 48080 }, { "epoch": 2.9062669970387383, "grad_norm": 2.57357269387293, "learning_rate": 2.6666509839760602e-08, "loss": 0.9, "step": 48090 }, { "epoch": 2.906871336193872, "grad_norm": 1.020523450808735, "learning_rate": 2.6324057280721627e-08, "loss": 0.869, "step": 48100 }, { "epoch": 2.907475675349006, "grad_norm": 0.8953639467395756, "learning_rate": 2.5983812047580824e-08, "loss": 0.8761, "step": 48110 }, { "epoch": 2.9080800145041397, "grad_norm": 0.9106429177907563, "learning_rate": 2.5645774291338522e-08, "loss": 0.8938, "step": 48120 }, { "epoch": 2.9086843536592735, "grad_norm": 0.9257714897382723, "learning_rate": 2.5309944162017486e-08, "loss": 0.8866, "step": 48130 }, { "epoch": 2.9092886928144077, "grad_norm": 0.9245962575317234, "learning_rate": 2.4976321808659054e-08, "loss": 0.87, "step": 48140 }, { "epoch": 2.909893031969541, "grad_norm": 1.0882567199495, "learning_rate": 2.4644907379325345e-08, "loss": 0.903, "step": 48150 }, { "epoch": 2.9104973711246753, "grad_norm": 1.0279581207737698, "learning_rate": 2.4315701021099258e-08, "loss": 0.8672, "step": 48160 }, { "epoch": 2.911101710279809, "grad_norm": 1.0636935510244523, "learning_rate": 2.3988702880082815e-08, "loss": 0.8889, "step": 48170 }, { "epoch": 2.911706049434943, "grad_norm": 0.9586831525230265, "learning_rate": 2.3663913101397707e-08, "loss": 0.8729, "step": 48180 }, { "epoch": 2.9123103885900767, "grad_norm": 1.1843819294379931, "learning_rate": 2.3341331829187518e-08, "loss": 0.8678, "step": 48190 }, { "epoch": 2.9129147277452105, "grad_norm": 0.9836143279433839, "learning_rate": 2.302095920661329e-08, "loss": 0.8816, "step": 48200 }, { "epoch": 2.9135190669003443, "grad_norm": 1.028957684726055, "learning_rate": 2.27027953758574e-08, "loss": 0.8645, "step": 48210 }, { "epoch": 2.914123406055478, "grad_norm": 1.089780173699639, "learning_rate": 2.2386840478121342e-08, "loss": 0.8551, "step": 48220 }, { "epoch": 2.9147277452106124, "grad_norm": 1.0001868740979094, "learning_rate": 2.2073094653626836e-08, "loss": 0.8663, "step": 48230 }, { "epoch": 2.915332084365746, "grad_norm": 1.0603245706445, "learning_rate": 2.1761558041614174e-08, "loss": 0.8895, "step": 48240 }, { "epoch": 2.91593642352088, "grad_norm": 1.1025176359618816, "learning_rate": 2.1452230780343863e-08, "loss": 0.8776, "step": 48250 }, { "epoch": 2.916540762676014, "grad_norm": 1.1304699483778677, "learning_rate": 2.1145113007096652e-08, "loss": 0.8793, "step": 48260 }, { "epoch": 2.9171451018311476, "grad_norm": 1.1529694127797536, "learning_rate": 2.0840204858171287e-08, "loss": 0.9053, "step": 48270 }, { "epoch": 2.9177494409862814, "grad_norm": 1.1131795130265054, "learning_rate": 2.0537506468886192e-08, "loss": 0.896, "step": 48280 }, { "epoch": 2.918353780141415, "grad_norm": 1.1483771805940317, "learning_rate": 2.0237017973580576e-08, "loss": 0.8878, "step": 48290 }, { "epoch": 2.9189581192965495, "grad_norm": 1.2378147073709478, "learning_rate": 1.9938739505610537e-08, "loss": 0.9153, "step": 48300 }, { "epoch": 2.919562458451683, "grad_norm": 1.1276109034490809, "learning_rate": 1.9642671197353525e-08, "loss": 0.8782, "step": 48310 }, { "epoch": 2.920166797606817, "grad_norm": 1.1804055671026048, "learning_rate": 1.934881318020443e-08, "loss": 0.8926, "step": 48320 }, { "epoch": 2.920771136761951, "grad_norm": 1.1453111272567047, "learning_rate": 1.9057165584578375e-08, "loss": 0.8842, "step": 48330 }, { "epoch": 2.9213754759170847, "grad_norm": 1.1895832535428406, "learning_rate": 1.8767728539909047e-08, "loss": 0.8727, "step": 48340 }, { "epoch": 2.9219798150722185, "grad_norm": 1.054963680278825, "learning_rate": 1.8480502174649252e-08, "loss": 0.8765, "step": 48350 }, { "epoch": 2.9225841542273523, "grad_norm": 1.1353672251489595, "learning_rate": 1.8195486616269797e-08, "loss": 0.881, "step": 48360 }, { "epoch": 2.923188493382486, "grad_norm": 0.9942376652737575, "learning_rate": 1.7912681991261728e-08, "loss": 0.887, "step": 48370 }, { "epoch": 2.92379283253762, "grad_norm": 1.041258106423287, "learning_rate": 1.763208842513464e-08, "loss": 0.8939, "step": 48380 }, { "epoch": 2.924397171692754, "grad_norm": 1.0094817412444108, "learning_rate": 1.7353706042415042e-08, "loss": 0.8919, "step": 48390 }, { "epoch": 2.925001510847888, "grad_norm": 1.1270933287404508, "learning_rate": 1.7077534966650767e-08, "loss": 0.8719, "step": 48400 }, { "epoch": 2.9256058500030218, "grad_norm": 1.1054662519323466, "learning_rate": 1.680357532040655e-08, "loss": 0.866, "step": 48410 }, { "epoch": 2.9262101891581556, "grad_norm": 1.043973034769781, "learning_rate": 1.653182722526625e-08, "loss": 0.8775, "step": 48420 }, { "epoch": 2.9268145283132894, "grad_norm": 1.0751898591350448, "learning_rate": 1.6262290801831726e-08, "loss": 0.8867, "step": 48430 }, { "epoch": 2.927418867468423, "grad_norm": 1.0749551779575743, "learning_rate": 1.5994966169724512e-08, "loss": 0.8628, "step": 48440 }, { "epoch": 2.928023206623557, "grad_norm": 1.1076709077630909, "learning_rate": 1.5729853447583044e-08, "loss": 0.8811, "step": 48450 }, { "epoch": 2.928627545778691, "grad_norm": 1.0501163117814052, "learning_rate": 1.546695275306487e-08, "loss": 0.8985, "step": 48460 }, { "epoch": 2.929231884933825, "grad_norm": 1.057995129125902, "learning_rate": 1.52062642028461e-08, "loss": 0.8875, "step": 48470 }, { "epoch": 2.929836224088959, "grad_norm": 1.0435990242800204, "learning_rate": 1.4947787912620304e-08, "loss": 0.8826, "step": 48480 }, { "epoch": 2.9304405632440926, "grad_norm": 1.0624525626936585, "learning_rate": 1.4691523997100165e-08, "loss": 0.8561, "step": 48490 }, { "epoch": 2.9310449023992264, "grad_norm": 1.2022924755368345, "learning_rate": 1.4437472570016376e-08, "loss": 0.8812, "step": 48500 }, { "epoch": 2.9316492415543602, "grad_norm": 1.2850191129122155, "learning_rate": 1.4185633744116523e-08, "loss": 0.8835, "step": 48510 }, { "epoch": 2.932253580709494, "grad_norm": 1.2049387789591448, "learning_rate": 1.3936007631167314e-08, "loss": 0.8672, "step": 48520 }, { "epoch": 2.9328579198646283, "grad_norm": 1.29483095187272, "learning_rate": 1.3688594341953465e-08, "loss": 0.8955, "step": 48530 }, { "epoch": 2.9334622590197617, "grad_norm": 1.195476951143966, "learning_rate": 1.3443393986277143e-08, "loss": 0.8993, "step": 48540 }, { "epoch": 2.934066598174896, "grad_norm": 1.1108318720046964, "learning_rate": 1.3200406672959077e-08, "loss": 0.8686, "step": 48550 }, { "epoch": 2.9346709373300297, "grad_norm": 1.1775040355041206, "learning_rate": 1.2959632509837449e-08, "loss": 0.9028, "step": 48560 }, { "epoch": 2.9352752764851635, "grad_norm": 1.0719683668734474, "learning_rate": 1.2721071603767899e-08, "loss": 0.878, "step": 48570 }, { "epoch": 2.9358796156402973, "grad_norm": 1.1538732366865068, "learning_rate": 1.248472406062462e-08, "loss": 0.8844, "step": 48580 }, { "epoch": 2.936483954795431, "grad_norm": 1.146548691738027, "learning_rate": 1.2250589985298155e-08, "loss": 0.8938, "step": 48590 }, { "epoch": 2.937088293950565, "grad_norm": 1.2013913050531153, "learning_rate": 1.2018669481698719e-08, "loss": 0.873, "step": 48600 }, { "epoch": 2.9376926331056987, "grad_norm": 1.2745246508822037, "learning_rate": 1.1788962652752312e-08, "loss": 0.8971, "step": 48610 }, { "epoch": 2.938296972260833, "grad_norm": 1.3084135195341944, "learning_rate": 1.1561469600403496e-08, "loss": 0.908, "step": 48620 }, { "epoch": 2.9389013114159668, "grad_norm": 1.2582654235480115, "learning_rate": 1.133619042561429e-08, "loss": 0.8644, "step": 48630 }, { "epoch": 2.9395056505711006, "grad_norm": 1.2688377667219746, "learning_rate": 1.1113125228363608e-08, "loss": 0.8864, "step": 48640 }, { "epoch": 2.9401099897262344, "grad_norm": 1.4564923069321634, "learning_rate": 1.0892274107647816e-08, "loss": 0.8878, "step": 48650 }, { "epoch": 2.940714328881368, "grad_norm": 1.4155443907845306, "learning_rate": 1.0673637161481842e-08, "loss": 0.8757, "step": 48660 }, { "epoch": 2.941318668036502, "grad_norm": 1.4997561993402064, "learning_rate": 1.045721448689696e-08, "loss": 0.9184, "step": 48670 }, { "epoch": 2.941923007191636, "grad_norm": 1.4623064230799816, "learning_rate": 1.024300617994134e-08, "loss": 0.8879, "step": 48680 }, { "epoch": 2.94252734634677, "grad_norm": 1.514055213760748, "learning_rate": 1.003101233568171e-08, "loss": 0.9062, "step": 48690 }, { "epoch": 2.9431316855019034, "grad_norm": 0.8963237880393616, "learning_rate": 9.821233048200596e-09, "loss": 0.8772, "step": 48700 }, { "epoch": 2.9437360246570377, "grad_norm": 0.8952764990034612, "learning_rate": 9.613668410599075e-09, "loss": 0.8718, "step": 48710 }, { "epoch": 2.9443403638121715, "grad_norm": 0.8975042641198281, "learning_rate": 9.408318514994574e-09, "loss": 0.9018, "step": 48720 }, { "epoch": 2.9449447029673053, "grad_norm": 0.9170974345942527, "learning_rate": 9.205183452520861e-09, "loss": 0.8969, "step": 48730 }, { "epoch": 2.945549042122439, "grad_norm": 0.9367375294242539, "learning_rate": 9.004263313330818e-09, "loss": 0.8792, "step": 48740 }, { "epoch": 2.946153381277573, "grad_norm": 0.813725206000509, "learning_rate": 8.805558186592566e-09, "loss": 0.9126, "step": 48750 }, { "epoch": 2.9467577204327067, "grad_norm": 0.8199137608373107, "learning_rate": 8.609068160492229e-09, "loss": 0.8703, "step": 48760 }, { "epoch": 2.9473620595878405, "grad_norm": 0.8002124846668378, "learning_rate": 8.414793322231718e-09, "loss": 0.8982, "step": 48770 }, { "epoch": 2.9479663987429747, "grad_norm": 0.8350032478867254, "learning_rate": 8.222733758030955e-09, "loss": 0.8955, "step": 48780 }, { "epoch": 2.9485707378981085, "grad_norm": 0.8259746165134852, "learning_rate": 8.032889553125644e-09, "loss": 0.8691, "step": 48790 }, { "epoch": 2.9491750770532423, "grad_norm": 0.8701461749155597, "learning_rate": 7.84526079177006e-09, "loss": 0.9011, "step": 48800 }, { "epoch": 2.949779416208376, "grad_norm": 0.8032006560678746, "learning_rate": 7.659847557233702e-09, "loss": 0.8733, "step": 48810 }, { "epoch": 2.95038375536351, "grad_norm": 0.8425075793643152, "learning_rate": 7.476649931802971e-09, "loss": 0.8741, "step": 48820 }, { "epoch": 2.9509880945186437, "grad_norm": 0.8846479838314857, "learning_rate": 7.295667996781719e-09, "loss": 0.8907, "step": 48830 }, { "epoch": 2.9515924336737775, "grad_norm": 0.9065296894692451, "learning_rate": 7.116901832489587e-09, "loss": 0.8846, "step": 48840 }, { "epoch": 2.952196772828912, "grad_norm": 0.6548291879523653, "learning_rate": 6.9403515182642215e-09, "loss": 0.8959, "step": 48850 }, { "epoch": 2.9528011119840456, "grad_norm": 0.6751675350049904, "learning_rate": 6.766017132457947e-09, "loss": 0.8735, "step": 48860 }, { "epoch": 2.9534054511391794, "grad_norm": 0.6479372730098051, "learning_rate": 6.593898752440542e-09, "loss": 0.8913, "step": 48870 }, { "epoch": 2.954009790294313, "grad_norm": 0.6775292365042954, "learning_rate": 6.42399645459979e-09, "loss": 0.8717, "step": 48880 }, { "epoch": 2.954614129449447, "grad_norm": 0.5785354213231347, "learning_rate": 6.256310314337044e-09, "loss": 0.8861, "step": 48890 }, { "epoch": 2.955218468604581, "grad_norm": 0.7234655571692659, "learning_rate": 6.090840406072773e-09, "loss": 0.8894, "step": 48900 }, { "epoch": 2.9558228077597146, "grad_norm": 0.6965411068752153, "learning_rate": 5.9275868032426795e-09, "loss": 0.8905, "step": 48910 }, { "epoch": 2.956427146914849, "grad_norm": 0.668334419531779, "learning_rate": 5.766549578298252e-09, "loss": 0.8803, "step": 48920 }, { "epoch": 2.9570314860699822, "grad_norm": 0.6929757132506598, "learning_rate": 5.607728802708434e-09, "loss": 0.8901, "step": 48930 }, { "epoch": 2.9576358252251165, "grad_norm": 0.6791935173477074, "learning_rate": 5.4511245469590635e-09, "loss": 0.8911, "step": 48940 }, { "epoch": 2.9582401643802503, "grad_norm": 0.8245425632724706, "learning_rate": 5.296736880550102e-09, "loss": 0.8655, "step": 48950 }, { "epoch": 2.958844503535384, "grad_norm": 0.8980330785952828, "learning_rate": 5.144565871999518e-09, "loss": 0.8862, "step": 48960 }, { "epoch": 2.959448842690518, "grad_norm": 0.95339056150385, "learning_rate": 4.994611588841624e-09, "loss": 0.8908, "step": 48970 }, { "epoch": 2.9600531818456517, "grad_norm": 0.8792979966516095, "learning_rate": 4.846874097625409e-09, "loss": 0.8749, "step": 48980 }, { "epoch": 2.9606575210007855, "grad_norm": 0.8419969610509209, "learning_rate": 4.701353463917868e-09, "loss": 0.9028, "step": 48990 }, { "epoch": 2.9612618601559193, "grad_norm": 0.5745110143833063, "learning_rate": 4.558049752300675e-09, "loss": 0.8741, "step": 49000 }, { "epoch": 2.9618661993110535, "grad_norm": 0.5823552936408872, "learning_rate": 4.416963026372955e-09, "loss": 0.8848, "step": 49010 }, { "epoch": 2.9624705384661874, "grad_norm": 0.5926183017853066, "learning_rate": 4.278093348748513e-09, "loss": 0.8727, "step": 49020 }, { "epoch": 2.963074877621321, "grad_norm": 0.6096019853074819, "learning_rate": 4.141440781058048e-09, "loss": 0.8729, "step": 49030 }, { "epoch": 2.963679216776455, "grad_norm": 0.5786432726139871, "learning_rate": 4.007005383948603e-09, "loss": 0.8788, "step": 49040 }, { "epoch": 2.9642835559315888, "grad_norm": 0.5666405146170521, "learning_rate": 3.874787217083009e-09, "loss": 0.9124, "step": 49050 }, { "epoch": 2.9648878950867226, "grad_norm": 0.5930436680869728, "learning_rate": 3.744786339139328e-09, "loss": 0.8833, "step": 49060 }, { "epoch": 2.9654922342418564, "grad_norm": 0.6061420937100483, "learning_rate": 3.6170028078119647e-09, "loss": 0.8826, "step": 49070 }, { "epoch": 2.9660965733969906, "grad_norm": 0.6213514978877125, "learning_rate": 3.491436679812221e-09, "loss": 0.9099, "step": 49080 }, { "epoch": 2.966700912552124, "grad_norm": 0.5847448638982462, "learning_rate": 3.3680880108666327e-09, "loss": 0.8754, "step": 49090 }, { "epoch": 2.9673052517072582, "grad_norm": 0.5895139162136194, "learning_rate": 3.246956855716965e-09, "loss": 0.8768, "step": 49100 }, { "epoch": 2.967909590862392, "grad_norm": 0.5746931854544143, "learning_rate": 3.128043268121328e-09, "loss": 0.8735, "step": 49110 }, { "epoch": 2.968513930017526, "grad_norm": 0.5931281529327385, "learning_rate": 3.0113473008541727e-09, "loss": 0.872, "step": 49120 }, { "epoch": 2.9691182691726596, "grad_norm": 0.603838851133666, "learning_rate": 2.896869005705183e-09, "loss": 0.9015, "step": 49130 }, { "epoch": 2.9697226083277934, "grad_norm": 0.5827662201168953, "learning_rate": 2.7846084334803846e-09, "loss": 0.8803, "step": 49140 }, { "epoch": 2.9703269474829277, "grad_norm": 0.5778573144543692, "learning_rate": 2.674565634001036e-09, "loss": 0.8891, "step": 49150 }, { "epoch": 2.970931286638061, "grad_norm": 0.5735814384398921, "learning_rate": 2.566740656103628e-09, "loss": 0.8775, "step": 49160 }, { "epoch": 2.9715356257931953, "grad_norm": 0.6061507113458374, "learning_rate": 2.4611335476421027e-09, "loss": 0.8833, "step": 49170 }, { "epoch": 2.972139964948329, "grad_norm": 0.6176135240296653, "learning_rate": 2.357744355484526e-09, "loss": 0.8965, "step": 49180 }, { "epoch": 2.972744304103463, "grad_norm": 0.5741498086221855, "learning_rate": 2.2565731255153045e-09, "loss": 0.879, "step": 49190 }, { "epoch": 2.9733486432585967, "grad_norm": 0.6250699824204273, "learning_rate": 2.1576199026346333e-09, "loss": 0.895, "step": 49200 }, { "epoch": 2.9739529824137305, "grad_norm": 0.6011562023441367, "learning_rate": 2.060884730757384e-09, "loss": 0.9049, "step": 49210 }, { "epoch": 2.9745573215688643, "grad_norm": 0.6023919580072014, "learning_rate": 1.9663676528158814e-09, "loss": 0.8621, "step": 49220 }, { "epoch": 2.975161660723998, "grad_norm": 0.6182059212995448, "learning_rate": 1.8740687107560164e-09, "loss": 0.8642, "step": 49230 }, { "epoch": 2.9757659998791324, "grad_norm": 0.585521058002708, "learning_rate": 1.7839879455411325e-09, "loss": 0.8711, "step": 49240 }, { "epoch": 2.976370339034266, "grad_norm": 0.6068836425468965, "learning_rate": 1.6961253971481406e-09, "loss": 0.9029, "step": 49250 }, { "epoch": 2.9769746781894, "grad_norm": 0.6135498475674649, "learning_rate": 1.610481104571404e-09, "loss": 0.8818, "step": 49260 }, { "epoch": 2.977579017344534, "grad_norm": 0.6011288764524885, "learning_rate": 1.5270551058194083e-09, "loss": 0.8619, "step": 49270 }, { "epoch": 2.9781833564996676, "grad_norm": 0.6289414057745571, "learning_rate": 1.4458474379169806e-09, "loss": 0.9104, "step": 49280 }, { "epoch": 2.9787876956548014, "grad_norm": 0.6623175758365316, "learning_rate": 1.3668581369047362e-09, "loss": 0.8659, "step": 49290 }, { "epoch": 2.979392034809935, "grad_norm": 0.6602552204338815, "learning_rate": 1.290087237837967e-09, "loss": 0.8834, "step": 49300 }, { "epoch": 2.9799963739650694, "grad_norm": 0.6990209477739195, "learning_rate": 1.2155347747871971e-09, "loss": 0.893, "step": 49310 }, { "epoch": 2.980600713120203, "grad_norm": 0.6904709034302106, "learning_rate": 1.1432007808398482e-09, "loss": 0.9, "step": 49320 }, { "epoch": 2.981205052275337, "grad_norm": 0.7048984983315532, "learning_rate": 1.0730852880969088e-09, "loss": 0.9005, "step": 49330 }, { "epoch": 2.981809391430471, "grad_norm": 0.6966414093410382, "learning_rate": 1.0051883276762653e-09, "loss": 0.8624, "step": 49340 }, { "epoch": 2.9824137305856047, "grad_norm": 0.8403987749537117, "learning_rate": 9.395099297110355e-10, "loss": 0.8767, "step": 49350 }, { "epoch": 2.9830180697407385, "grad_norm": 0.7515781844389828, "learning_rate": 8.760501233484597e-10, "loss": 0.8738, "step": 49360 }, { "epoch": 2.9836224088958723, "grad_norm": 0.7397597873393668, "learning_rate": 8.148089367532308e-10, "loss": 0.8998, "step": 49370 }, { "epoch": 2.984226748051006, "grad_norm": 0.7297769888916055, "learning_rate": 7.557863971036084e-10, "loss": 0.883, "step": 49380 }, { "epoch": 2.98483108720614, "grad_norm": 0.7152450326570774, "learning_rate": 6.989825305936393e-10, "loss": 0.8995, "step": 49390 }, { "epoch": 2.985435426361274, "grad_norm": 0.7379537255250919, "learning_rate": 6.44397362433713e-10, "loss": 0.8877, "step": 49400 }, { "epoch": 2.986039765516408, "grad_norm": 0.8258330177937314, "learning_rate": 5.920309168483407e-10, "loss": 0.8945, "step": 49410 }, { "epoch": 2.9866441046715417, "grad_norm": 0.721822043141609, "learning_rate": 5.418832170783761e-10, "loss": 0.8922, "step": 49420 }, { "epoch": 2.9872484438266755, "grad_norm": 0.6690799268238671, "learning_rate": 4.939542853787948e-10, "loss": 0.8755, "step": 49430 }, { "epoch": 2.9878527829818093, "grad_norm": 0.6239633496165857, "learning_rate": 4.482441430203599e-10, "loss": 0.8715, "step": 49440 }, { "epoch": 2.988457122136943, "grad_norm": 0.8337487714495238, "learning_rate": 4.0475281029017654e-10, "loss": 0.8863, "step": 49450 }, { "epoch": 2.989061461292077, "grad_norm": 0.862269789654611, "learning_rate": 3.6348030648947206e-10, "loss": 0.8955, "step": 49460 }, { "epoch": 2.989665800447211, "grad_norm": 0.8550932641644109, "learning_rate": 3.24426649934706e-10, "loss": 0.8948, "step": 49470 }, { "epoch": 2.990270139602345, "grad_norm": 0.8354210161578469, "learning_rate": 2.875918579581249e-10, "loss": 0.8778, "step": 49480 }, { "epoch": 2.990874478757479, "grad_norm": 0.9036768662399685, "learning_rate": 2.5297594690720797e-10, "loss": 0.8821, "step": 49490 }, { "epoch": 2.9914788179126126, "grad_norm": 0.8855291970675718, "learning_rate": 2.205789321446661e-10, "loss": 0.8829, "step": 49500 }, { "epoch": 2.9920831570677464, "grad_norm": 0.8539480963425093, "learning_rate": 1.904008280478875e-10, "loss": 0.8799, "step": 49510 }, { "epoch": 2.99268749622288, "grad_norm": 0.8335814596473017, "learning_rate": 1.624416480106028e-10, "loss": 0.8832, "step": 49520 }, { "epoch": 2.993291835378014, "grad_norm": 0.8132456530146003, "learning_rate": 1.3670140444010937e-10, "loss": 0.8832, "step": 49530 }, { "epoch": 2.9938961745331483, "grad_norm": 0.8449260546807574, "learning_rate": 1.1318010876115725e-10, "loss": 0.8905, "step": 49540 }, { "epoch": 2.9945005136882816, "grad_norm": 1.2398274573494132, "learning_rate": 9.187777141206333e-11, "loss": 0.896, "step": 49550 }, { "epoch": 2.995104852843416, "grad_norm": 1.0779526193921622, "learning_rate": 7.279440184637665e-11, "loss": 0.883, "step": 49560 }, { "epoch": 2.9957091919985497, "grad_norm": 1.3347277897980097, "learning_rate": 5.5930008533988667e-11, "loss": 0.9023, "step": 49570 }, { "epoch": 2.9963135311536835, "grad_norm": 1.240232862369335, "learning_rate": 4.12845989594679e-11, "loss": 0.8884, "step": 49580 }, { "epoch": 2.9969178703088173, "grad_norm": 1.222159608521956, "learning_rate": 2.8858179621504835e-11, "loss": 0.8993, "step": 49590 }, { "epoch": 2.997522209463951, "grad_norm": 0.8417692759125358, "learning_rate": 1.8650756036242555e-11, "loss": 0.8806, "step": 49600 }, { "epoch": 2.998126548619085, "grad_norm": 0.9887157162262514, "learning_rate": 1.0662332732280789e-11, "loss": 0.8635, "step": 49610 }, { "epoch": 2.9987308877742187, "grad_norm": 0.9703831343529619, "learning_rate": 4.892913256226984e-12, "loss": 0.8827, "step": 49620 }, { "epoch": 2.999335226929353, "grad_norm": 1.0022784710492407, "learning_rate": 1.3425001677003224e-12, "loss": 0.8881, "step": 49630 }, { "epoch": 2.9999395660844868, "grad_norm": 0.951152020027502, "learning_rate": 1.1095043217501655e-14, "loss": 0.8982, "step": 49640 }, { "epoch": 3.0, "step": 49641, "total_flos": 1475049923543040.0, "train_loss": 0.949762989045323, "train_runtime": 45551.2119, "train_samples_per_second": 139.491, "train_steps_per_second": 1.09 } ], "logging_steps": 10, "max_steps": 49641, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1475049923543040.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }