diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,66533 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3838120209924975, + "eval_steps": 500, + "global_step": 95000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 4.040126536763131e-05, + "grad_norm": 720.1376953125, + "learning_rate": 8.080808080808081e-09, + "loss": 97.7333, + "step": 10 + }, + { + "epoch": 8.080253073526263e-05, + "grad_norm": 765.2322387695312, + "learning_rate": 1.6161616161616162e-08, + "loss": 64.9714, + "step": 20 + }, + { + "epoch": 0.00012120379610289395, + "grad_norm": 1526.16015625, + "learning_rate": 2.4242424242424243e-08, + "loss": 141.7881, + "step": 30 + }, + { + "epoch": 0.00016160506147052525, + "grad_norm": 1654.4871826171875, + "learning_rate": 3.2323232323232324e-08, + "loss": 94.7421, + "step": 40 + }, + { + "epoch": 0.00020200632683815657, + "grad_norm": 706.5513305664062, + "learning_rate": 4.040404040404041e-08, + "loss": 79.4392, + "step": 50 + }, + { + "epoch": 0.0002424075922057879, + "grad_norm": 1566.697998046875, + "learning_rate": 4.8484848484848486e-08, + "loss": 167.6519, + "step": 60 + }, + { + "epoch": 0.0002828088575734192, + "grad_norm": 1094.059326171875, + "learning_rate": 5.656565656565657e-08, + "loss": 74.5029, + "step": 70 + }, + { + "epoch": 0.0003232101229410505, + "grad_norm": 2690.986328125, + "learning_rate": 6.464646464646465e-08, + "loss": 101.7366, + "step": 80 + }, + { + "epoch": 0.0003636113883086818, + "grad_norm": 845.0662231445312, + "learning_rate": 7.272727272727274e-08, + "loss": 109.2449, + "step": 90 + }, + { + "epoch": 0.00040401265367631315, + "grad_norm": 1515.5186767578125, + "learning_rate": 8.080808080808082e-08, + "loss": 68.3294, + "step": 100 + }, + { + "epoch": 0.00044441391904394446, + "grad_norm": 875.564453125, + "learning_rate": 8.88888888888889e-08, + "loss": 96.3639, + "step": 110 + }, + { + "epoch": 0.0004848151844115758, + "grad_norm": 342.3182067871094, + "learning_rate": 9.696969696969697e-08, + "loss": 112.5309, + "step": 120 + }, + { + "epoch": 0.000525216449779207, + "grad_norm": 1264.6597900390625, + "learning_rate": 1.0505050505050506e-07, + "loss": 118.3208, + "step": 130 + }, + { + "epoch": 0.0005656177151468384, + "grad_norm": 1327.964111328125, + "learning_rate": 1.1313131313131314e-07, + "loss": 75.7366, + "step": 140 + }, + { + "epoch": 0.0006060189805144697, + "grad_norm": 1309.533447265625, + "learning_rate": 1.2121212121212122e-07, + "loss": 109.1942, + "step": 150 + }, + { + "epoch": 0.000646420245882101, + "grad_norm": 683.5692749023438, + "learning_rate": 1.292929292929293e-07, + "loss": 129.5559, + "step": 160 + }, + { + "epoch": 0.0006868215112497323, + "grad_norm": 965.5596923828125, + "learning_rate": 1.3737373737373738e-07, + "loss": 67.0329, + "step": 170 + }, + { + "epoch": 0.0007272227766173637, + "grad_norm": 1409.6192626953125, + "learning_rate": 1.4545454545454548e-07, + "loss": 100.9175, + "step": 180 + }, + { + "epoch": 0.000767624041984995, + "grad_norm": 936.8455810546875, + "learning_rate": 1.5353535353535356e-07, + "loss": 114.6267, + "step": 190 + }, + { + "epoch": 0.0008080253073526263, + "grad_norm": 1041.8524169921875, + "learning_rate": 1.6161616161616163e-07, + "loss": 100.7278, + "step": 200 + }, + { + "epoch": 0.0008484265727202576, + "grad_norm": 1001.06982421875, + "learning_rate": 1.6969696969696974e-07, + "loss": 83.2612, + "step": 210 + }, + { + "epoch": 0.0008888278380878889, + "grad_norm": 542.4169921875, + "learning_rate": 1.777777777777778e-07, + "loss": 65.0905, + "step": 220 + }, + { + "epoch": 0.0009292291034555202, + "grad_norm": 1276.65185546875, + "learning_rate": 1.858585858585859e-07, + "loss": 95.6987, + "step": 230 + }, + { + "epoch": 0.0009696303688231516, + "grad_norm": 550.302734375, + "learning_rate": 1.9393939393939395e-07, + "loss": 118.7955, + "step": 240 + }, + { + "epoch": 0.0010100316341907828, + "grad_norm": 1489.1029052734375, + "learning_rate": 2.0202020202020205e-07, + "loss": 124.1141, + "step": 250 + }, + { + "epoch": 0.001050432899558414, + "grad_norm": 1103.9412841796875, + "learning_rate": 2.1010101010101013e-07, + "loss": 101.3522, + "step": 260 + }, + { + "epoch": 0.0010908341649260454, + "grad_norm": 1566.489013671875, + "learning_rate": 2.181818181818182e-07, + "loss": 97.4709, + "step": 270 + }, + { + "epoch": 0.0011312354302936767, + "grad_norm": 1549.5975341796875, + "learning_rate": 2.2626262626262628e-07, + "loss": 74.9938, + "step": 280 + }, + { + "epoch": 0.001171636695661308, + "grad_norm": 1458.78564453125, + "learning_rate": 2.343434343434344e-07, + "loss": 86.4949, + "step": 290 + }, + { + "epoch": 0.0012120379610289394, + "grad_norm": 767.0146484375, + "learning_rate": 2.4242424242424244e-07, + "loss": 64.3565, + "step": 300 + }, + { + "epoch": 0.0012524392263965707, + "grad_norm": 1175.2060546875, + "learning_rate": 2.505050505050505e-07, + "loss": 58.2902, + "step": 310 + }, + { + "epoch": 0.001292840491764202, + "grad_norm": 666.94580078125, + "learning_rate": 2.585858585858586e-07, + "loss": 65.0699, + "step": 320 + }, + { + "epoch": 0.0013332417571318333, + "grad_norm": 411.40283203125, + "learning_rate": 2.666666666666667e-07, + "loss": 79.5953, + "step": 330 + }, + { + "epoch": 0.0013736430224994647, + "grad_norm": 800.8609619140625, + "learning_rate": 2.7474747474747475e-07, + "loss": 113.2127, + "step": 340 + }, + { + "epoch": 0.001414044287867096, + "grad_norm": 438.9530029296875, + "learning_rate": 2.828282828282829e-07, + "loss": 77.6877, + "step": 350 + }, + { + "epoch": 0.0014544455532347273, + "grad_norm": 708.1030883789062, + "learning_rate": 2.9090909090909096e-07, + "loss": 59.7696, + "step": 360 + }, + { + "epoch": 0.0014948468186023586, + "grad_norm": 472.1529235839844, + "learning_rate": 2.9898989898989904e-07, + "loss": 94.6443, + "step": 370 + }, + { + "epoch": 0.00153524808396999, + "grad_norm": 1017.7798461914062, + "learning_rate": 3.070707070707071e-07, + "loss": 80.9425, + "step": 380 + }, + { + "epoch": 0.0015756493493376213, + "grad_norm": 874.1619262695312, + "learning_rate": 3.151515151515152e-07, + "loss": 78.2897, + "step": 390 + }, + { + "epoch": 0.0016160506147052526, + "grad_norm": 738.783203125, + "learning_rate": 3.2323232323232327e-07, + "loss": 124.2281, + "step": 400 + }, + { + "epoch": 0.001656451880072884, + "grad_norm": 1742.195556640625, + "learning_rate": 3.3131313131313135e-07, + "loss": 90.681, + "step": 410 + }, + { + "epoch": 0.0016968531454405152, + "grad_norm": 495.2870178222656, + "learning_rate": 3.393939393939395e-07, + "loss": 53.7599, + "step": 420 + }, + { + "epoch": 0.0017372544108081465, + "grad_norm": 599.09033203125, + "learning_rate": 3.474747474747475e-07, + "loss": 73.8856, + "step": 430 + }, + { + "epoch": 0.0017776556761757779, + "grad_norm": 701.4446411132812, + "learning_rate": 3.555555555555556e-07, + "loss": 104.5027, + "step": 440 + }, + { + "epoch": 0.0018180569415434092, + "grad_norm": 1392.83642578125, + "learning_rate": 3.6363636363636366e-07, + "loss": 82.1324, + "step": 450 + }, + { + "epoch": 0.0018584582069110405, + "grad_norm": 368.55596923828125, + "learning_rate": 3.717171717171718e-07, + "loss": 90.4273, + "step": 460 + }, + { + "epoch": 0.0018988594722786718, + "grad_norm": 966.1232299804688, + "learning_rate": 3.7979797979797987e-07, + "loss": 80.2726, + "step": 470 + }, + { + "epoch": 0.0019392607376463031, + "grad_norm": 727.8269653320312, + "learning_rate": 3.878787878787879e-07, + "loss": 72.9839, + "step": 480 + }, + { + "epoch": 0.0019796620030139342, + "grad_norm": 1065.9033203125, + "learning_rate": 3.9595959595959597e-07, + "loss": 76.9423, + "step": 490 + }, + { + "epoch": 0.0020200632683815656, + "grad_norm": 487.7821960449219, + "learning_rate": 4.040404040404041e-07, + "loss": 85.3323, + "step": 500 + }, + { + "epoch": 0.002060464533749197, + "grad_norm": 943.0349731445312, + "learning_rate": 4.121212121212122e-07, + "loss": 103.3431, + "step": 510 + }, + { + "epoch": 0.002100865799116828, + "grad_norm": 1148.1529541015625, + "learning_rate": 4.2020202020202026e-07, + "loss": 94.2576, + "step": 520 + }, + { + "epoch": 0.0021412670644844595, + "grad_norm": 751.3715209960938, + "learning_rate": 4.282828282828283e-07, + "loss": 74.1272, + "step": 530 + }, + { + "epoch": 0.002181668329852091, + "grad_norm": 940.9969482421875, + "learning_rate": 4.363636363636364e-07, + "loss": 57.4998, + "step": 540 + }, + { + "epoch": 0.002222069595219722, + "grad_norm": 1486.8199462890625, + "learning_rate": 4.444444444444445e-07, + "loss": 86.582, + "step": 550 + }, + { + "epoch": 0.0022624708605873535, + "grad_norm": 794.8299560546875, + "learning_rate": 4.5252525252525257e-07, + "loss": 100.8996, + "step": 560 + }, + { + "epoch": 0.002302872125954985, + "grad_norm": 475.3540344238281, + "learning_rate": 4.6060606060606064e-07, + "loss": 52.334, + "step": 570 + }, + { + "epoch": 0.002343273391322616, + "grad_norm": 1649.77392578125, + "learning_rate": 4.686868686868688e-07, + "loss": 120.0299, + "step": 580 + }, + { + "epoch": 0.0023836746566902474, + "grad_norm": 1001.852294921875, + "learning_rate": 4.767676767676768e-07, + "loss": 99.2672, + "step": 590 + }, + { + "epoch": 0.0024240759220578788, + "grad_norm": 755.6719970703125, + "learning_rate": 4.848484848484849e-07, + "loss": 60.3841, + "step": 600 + }, + { + "epoch": 0.00246447718742551, + "grad_norm": 1413.067626953125, + "learning_rate": 4.929292929292929e-07, + "loss": 99.4986, + "step": 610 + }, + { + "epoch": 0.0025048784527931414, + "grad_norm": 514.0801391601562, + "learning_rate": 5.01010101010101e-07, + "loss": 82.1478, + "step": 620 + }, + { + "epoch": 0.0025452797181607727, + "grad_norm": 446.5071105957031, + "learning_rate": 5.090909090909092e-07, + "loss": 90.5055, + "step": 630 + }, + { + "epoch": 0.002585680983528404, + "grad_norm": 1363.0565185546875, + "learning_rate": 5.171717171717172e-07, + "loss": 75.8123, + "step": 640 + }, + { + "epoch": 0.0026260822488960354, + "grad_norm": 1498.7562255859375, + "learning_rate": 5.252525252525253e-07, + "loss": 97.2051, + "step": 650 + }, + { + "epoch": 0.0026664835142636667, + "grad_norm": 819.4781494140625, + "learning_rate": 5.333333333333335e-07, + "loss": 92.2455, + "step": 660 + }, + { + "epoch": 0.002706884779631298, + "grad_norm": 739.6301879882812, + "learning_rate": 5.414141414141415e-07, + "loss": 60.5293, + "step": 670 + }, + { + "epoch": 0.0027472860449989293, + "grad_norm": 610.8301391601562, + "learning_rate": 5.494949494949495e-07, + "loss": 76.4783, + "step": 680 + }, + { + "epoch": 0.0027876873103665606, + "grad_norm": 796.4779663085938, + "learning_rate": 5.575757575757576e-07, + "loss": 76.7366, + "step": 690 + }, + { + "epoch": 0.002828088575734192, + "grad_norm": 372.6942443847656, + "learning_rate": 5.656565656565658e-07, + "loss": 55.3669, + "step": 700 + }, + { + "epoch": 0.0028684898411018233, + "grad_norm": 887.7366333007812, + "learning_rate": 5.737373737373738e-07, + "loss": 91.2889, + "step": 710 + }, + { + "epoch": 0.0029088911064694546, + "grad_norm": 869.73486328125, + "learning_rate": 5.818181818181819e-07, + "loss": 90.853, + "step": 720 + }, + { + "epoch": 0.002949292371837086, + "grad_norm": 900.1129760742188, + "learning_rate": 5.898989898989899e-07, + "loss": 99.8146, + "step": 730 + }, + { + "epoch": 0.0029896936372047172, + "grad_norm": 882.5169067382812, + "learning_rate": 5.979797979797981e-07, + "loss": 94.4538, + "step": 740 + }, + { + "epoch": 0.0030300949025723486, + "grad_norm": 599.550537109375, + "learning_rate": 6.060606060606061e-07, + "loss": 66.0833, + "step": 750 + }, + { + "epoch": 0.00307049616793998, + "grad_norm": 513.547607421875, + "learning_rate": 6.141414141414142e-07, + "loss": 51.7566, + "step": 760 + }, + { + "epoch": 0.003110897433307611, + "grad_norm": 645.5187377929688, + "learning_rate": 6.222222222222223e-07, + "loss": 68.8667, + "step": 770 + }, + { + "epoch": 0.0031512986986752425, + "grad_norm": 473.2290344238281, + "learning_rate": 6.303030303030304e-07, + "loss": 58.1628, + "step": 780 + }, + { + "epoch": 0.003191699964042874, + "grad_norm": 912.64404296875, + "learning_rate": 6.383838383838384e-07, + "loss": 90.716, + "step": 790 + }, + { + "epoch": 0.003232101229410505, + "grad_norm": 647.8580322265625, + "learning_rate": 6.464646464646465e-07, + "loss": 55.9714, + "step": 800 + }, + { + "epoch": 0.0032725024947781365, + "grad_norm": 768.3733520507812, + "learning_rate": 6.545454545454547e-07, + "loss": 83.1954, + "step": 810 + }, + { + "epoch": 0.003312903760145768, + "grad_norm": 482.00775146484375, + "learning_rate": 6.626262626262627e-07, + "loss": 63.0688, + "step": 820 + }, + { + "epoch": 0.003353305025513399, + "grad_norm": 411.4869079589844, + "learning_rate": 6.707070707070708e-07, + "loss": 58.3106, + "step": 830 + }, + { + "epoch": 0.0033937062908810304, + "grad_norm": 590.60546875, + "learning_rate": 6.78787878787879e-07, + "loss": 81.1757, + "step": 840 + }, + { + "epoch": 0.0034341075562486618, + "grad_norm": 267.18963623046875, + "learning_rate": 6.868686868686869e-07, + "loss": 74.0499, + "step": 850 + }, + { + "epoch": 0.003474508821616293, + "grad_norm": 553.4140625, + "learning_rate": 6.94949494949495e-07, + "loss": 93.1003, + "step": 860 + }, + { + "epoch": 0.0035149100869839244, + "grad_norm": 624.134033203125, + "learning_rate": 7.03030303030303e-07, + "loss": 53.7593, + "step": 870 + }, + { + "epoch": 0.0035553113523515557, + "grad_norm": 529.945556640625, + "learning_rate": 7.111111111111112e-07, + "loss": 48.8026, + "step": 880 + }, + { + "epoch": 0.003595712617719187, + "grad_norm": 564.1666870117188, + "learning_rate": 7.191919191919193e-07, + "loss": 76.9729, + "step": 890 + }, + { + "epoch": 0.0036361138830868184, + "grad_norm": 433.373779296875, + "learning_rate": 7.272727272727273e-07, + "loss": 64.9687, + "step": 900 + }, + { + "epoch": 0.0036765151484544497, + "grad_norm": 671.010986328125, + "learning_rate": 7.353535353535354e-07, + "loss": 86.2554, + "step": 910 + }, + { + "epoch": 0.003716916413822081, + "grad_norm": 481.1733093261719, + "learning_rate": 7.434343434343436e-07, + "loss": 54.4597, + "step": 920 + }, + { + "epoch": 0.0037573176791897123, + "grad_norm": 643.103271484375, + "learning_rate": 7.515151515151516e-07, + "loss": 64.1825, + "step": 930 + }, + { + "epoch": 0.0037977189445573436, + "grad_norm": 732.6908569335938, + "learning_rate": 7.595959595959597e-07, + "loss": 67.7345, + "step": 940 + }, + { + "epoch": 0.003838120209924975, + "grad_norm": 401.0261535644531, + "learning_rate": 7.676767676767677e-07, + "loss": 61.4696, + "step": 950 + }, + { + "epoch": 0.0038785214752926063, + "grad_norm": 491.2078552246094, + "learning_rate": 7.757575757575758e-07, + "loss": 54.7679, + "step": 960 + }, + { + "epoch": 0.003918922740660237, + "grad_norm": 496.06292724609375, + "learning_rate": 7.838383838383839e-07, + "loss": 74.1232, + "step": 970 + }, + { + "epoch": 0.0039593240060278685, + "grad_norm": 612.330810546875, + "learning_rate": 7.919191919191919e-07, + "loss": 48.3253, + "step": 980 + }, + { + "epoch": 0.0039997252713955, + "grad_norm": 961.5505981445312, + "learning_rate": 8.000000000000001e-07, + "loss": 97.4158, + "step": 990 + }, + { + "epoch": 0.004040126536763131, + "grad_norm": 458.24176025390625, + "learning_rate": 8.080808080808082e-07, + "loss": 73.8477, + "step": 1000 + }, + { + "epoch": 0.0040805278021307624, + "grad_norm": 902.3980712890625, + "learning_rate": 8.161616161616162e-07, + "loss": 84.1415, + "step": 1010 + }, + { + "epoch": 0.004120929067498394, + "grad_norm": 1041.8564453125, + "learning_rate": 8.242424242424244e-07, + "loss": 91.096, + "step": 1020 + }, + { + "epoch": 0.004161330332866025, + "grad_norm": 483.5507507324219, + "learning_rate": 8.323232323232324e-07, + "loss": 71.8416, + "step": 1030 + }, + { + "epoch": 0.004201731598233656, + "grad_norm": 461.9169006347656, + "learning_rate": 8.404040404040405e-07, + "loss": 109.2555, + "step": 1040 + }, + { + "epoch": 0.004242132863601288, + "grad_norm": 716.3146362304688, + "learning_rate": 8.484848484848486e-07, + "loss": 67.7536, + "step": 1050 + }, + { + "epoch": 0.004282534128968919, + "grad_norm": 583.708251953125, + "learning_rate": 8.565656565656566e-07, + "loss": 79.6363, + "step": 1060 + }, + { + "epoch": 0.00432293539433655, + "grad_norm": 644.1818237304688, + "learning_rate": 8.646464646464647e-07, + "loss": 86.4302, + "step": 1070 + }, + { + "epoch": 0.004363336659704182, + "grad_norm": 549.10400390625, + "learning_rate": 8.727272727272728e-07, + "loss": 48.8068, + "step": 1080 + }, + { + "epoch": 0.004403737925071813, + "grad_norm": 651.5703735351562, + "learning_rate": 8.808080808080808e-07, + "loss": 62.0918, + "step": 1090 + }, + { + "epoch": 0.004444139190439444, + "grad_norm": 553.933837890625, + "learning_rate": 8.88888888888889e-07, + "loss": 51.3118, + "step": 1100 + }, + { + "epoch": 0.004484540455807076, + "grad_norm": 559.3411254882812, + "learning_rate": 8.96969696969697e-07, + "loss": 78.899, + "step": 1110 + }, + { + "epoch": 0.004524941721174707, + "grad_norm": 1178.9420166015625, + "learning_rate": 9.050505050505051e-07, + "loss": 59.7121, + "step": 1120 + }, + { + "epoch": 0.004565342986542338, + "grad_norm": 470.557373046875, + "learning_rate": 9.131313131313133e-07, + "loss": 77.5851, + "step": 1130 + }, + { + "epoch": 0.00460574425190997, + "grad_norm": 823.9404907226562, + "learning_rate": 9.212121212121213e-07, + "loss": 88.5502, + "step": 1140 + }, + { + "epoch": 0.004646145517277601, + "grad_norm": 858.840087890625, + "learning_rate": 9.292929292929294e-07, + "loss": 103.8964, + "step": 1150 + }, + { + "epoch": 0.004686546782645232, + "grad_norm": 914.1184692382812, + "learning_rate": 9.373737373737376e-07, + "loss": 77.9602, + "step": 1160 + }, + { + "epoch": 0.0047269480480128636, + "grad_norm": 686.4456787109375, + "learning_rate": 9.454545454545455e-07, + "loss": 76.4946, + "step": 1170 + }, + { + "epoch": 0.004767349313380495, + "grad_norm": 487.4496765136719, + "learning_rate": 9.535353535353536e-07, + "loss": 99.8757, + "step": 1180 + }, + { + "epoch": 0.004807750578748126, + "grad_norm": 520.9862670898438, + "learning_rate": 9.616161616161617e-07, + "loss": 93.658, + "step": 1190 + }, + { + "epoch": 0.0048481518441157575, + "grad_norm": 390.9288635253906, + "learning_rate": 9.696969696969698e-07, + "loss": 87.906, + "step": 1200 + }, + { + "epoch": 0.004888553109483389, + "grad_norm": 436.38543701171875, + "learning_rate": 9.77777777777778e-07, + "loss": 47.0411, + "step": 1210 + }, + { + "epoch": 0.00492895437485102, + "grad_norm": 671.6222534179688, + "learning_rate": 9.858585858585858e-07, + "loss": 51.2458, + "step": 1220 + }, + { + "epoch": 0.0049693556402186515, + "grad_norm": 691.9678344726562, + "learning_rate": 9.93939393939394e-07, + "loss": 54.2135, + "step": 1230 + }, + { + "epoch": 0.005009756905586283, + "grad_norm": 849.5625610351562, + "learning_rate": 1.002020202020202e-06, + "loss": 85.6906, + "step": 1240 + }, + { + "epoch": 0.005050158170953914, + "grad_norm": 790.7261352539062, + "learning_rate": 1.01010101010101e-06, + "loss": 118.3505, + "step": 1250 + }, + { + "epoch": 0.0050905594363215454, + "grad_norm": 838.802734375, + "learning_rate": 1.0181818181818183e-06, + "loss": 66.9146, + "step": 1260 + }, + { + "epoch": 0.005130960701689177, + "grad_norm": 871.91162109375, + "learning_rate": 1.0262626262626264e-06, + "loss": 94.0071, + "step": 1270 + }, + { + "epoch": 0.005171361967056808, + "grad_norm": 321.37939453125, + "learning_rate": 1.0343434343434344e-06, + "loss": 74.266, + "step": 1280 + }, + { + "epoch": 0.005211763232424439, + "grad_norm": 1083.537353515625, + "learning_rate": 1.0424242424242426e-06, + "loss": 84.3127, + "step": 1290 + }, + { + "epoch": 0.005252164497792071, + "grad_norm": 755.6991577148438, + "learning_rate": 1.0505050505050506e-06, + "loss": 67.5171, + "step": 1300 + }, + { + "epoch": 0.005292565763159702, + "grad_norm": 496.10772705078125, + "learning_rate": 1.0585858585858587e-06, + "loss": 59.7423, + "step": 1310 + }, + { + "epoch": 0.005332967028527333, + "grad_norm": 840.3679809570312, + "learning_rate": 1.066666666666667e-06, + "loss": 69.084, + "step": 1320 + }, + { + "epoch": 0.005373368293894965, + "grad_norm": 913.7398681640625, + "learning_rate": 1.0747474747474747e-06, + "loss": 54.4863, + "step": 1330 + }, + { + "epoch": 0.005413769559262596, + "grad_norm": 1077.27392578125, + "learning_rate": 1.082828282828283e-06, + "loss": 90.3723, + "step": 1340 + }, + { + "epoch": 0.005454170824630227, + "grad_norm": 587.8721313476562, + "learning_rate": 1.090909090909091e-06, + "loss": 61.4766, + "step": 1350 + }, + { + "epoch": 0.005494572089997859, + "grad_norm": 651.0157470703125, + "learning_rate": 1.098989898989899e-06, + "loss": 58.8504, + "step": 1360 + }, + { + "epoch": 0.00553497335536549, + "grad_norm": 1200.4676513671875, + "learning_rate": 1.1070707070707072e-06, + "loss": 78.9724, + "step": 1370 + }, + { + "epoch": 0.005575374620733121, + "grad_norm": 1426.4935302734375, + "learning_rate": 1.1151515151515153e-06, + "loss": 97.0092, + "step": 1380 + }, + { + "epoch": 0.005615775886100753, + "grad_norm": 668.6116333007812, + "learning_rate": 1.1232323232323233e-06, + "loss": 78.3742, + "step": 1390 + }, + { + "epoch": 0.005656177151468384, + "grad_norm": 213.35760498046875, + "learning_rate": 1.1313131313131315e-06, + "loss": 101.9843, + "step": 1400 + }, + { + "epoch": 0.005696578416836015, + "grad_norm": 407.4592590332031, + "learning_rate": 1.1393939393939395e-06, + "loss": 67.6436, + "step": 1410 + }, + { + "epoch": 0.0057369796822036466, + "grad_norm": 1257.8907470703125, + "learning_rate": 1.1474747474747476e-06, + "loss": 85.1267, + "step": 1420 + }, + { + "epoch": 0.005777380947571278, + "grad_norm": 671.1593017578125, + "learning_rate": 1.1555555555555556e-06, + "loss": 71.1813, + "step": 1430 + }, + { + "epoch": 0.005817782212938909, + "grad_norm": 592.0625610351562, + "learning_rate": 1.1636363636363638e-06, + "loss": 70.4293, + "step": 1440 + }, + { + "epoch": 0.0058581834783065405, + "grad_norm": 425.52978515625, + "learning_rate": 1.1717171717171719e-06, + "loss": 60.6906, + "step": 1450 + }, + { + "epoch": 0.005898584743674172, + "grad_norm": 1379.921630859375, + "learning_rate": 1.1797979797979799e-06, + "loss": 69.5714, + "step": 1460 + }, + { + "epoch": 0.005938986009041803, + "grad_norm": 246.26141357421875, + "learning_rate": 1.187878787878788e-06, + "loss": 64.0959, + "step": 1470 + }, + { + "epoch": 0.0059793872744094345, + "grad_norm": 399.0498046875, + "learning_rate": 1.1959595959595961e-06, + "loss": 58.6223, + "step": 1480 + }, + { + "epoch": 0.006019788539777066, + "grad_norm": 723.107421875, + "learning_rate": 1.2040404040404042e-06, + "loss": 99.6874, + "step": 1490 + }, + { + "epoch": 0.006060189805144697, + "grad_norm": 556.9105224609375, + "learning_rate": 1.2121212121212122e-06, + "loss": 48.2795, + "step": 1500 + }, + { + "epoch": 0.0061005910705123284, + "grad_norm": 1261.1217041015625, + "learning_rate": 1.2202020202020202e-06, + "loss": 63.467, + "step": 1510 + }, + { + "epoch": 0.00614099233587996, + "grad_norm": 832.5606689453125, + "learning_rate": 1.2282828282828285e-06, + "loss": 56.5807, + "step": 1520 + }, + { + "epoch": 0.006181393601247591, + "grad_norm": 561.32275390625, + "learning_rate": 1.2363636363636365e-06, + "loss": 50.7661, + "step": 1530 + }, + { + "epoch": 0.006221794866615222, + "grad_norm": 607.7224731445312, + "learning_rate": 1.2444444444444445e-06, + "loss": 68.9423, + "step": 1540 + }, + { + "epoch": 0.006262196131982854, + "grad_norm": 728.4628295898438, + "learning_rate": 1.2525252525252527e-06, + "loss": 70.7681, + "step": 1550 + }, + { + "epoch": 0.006302597397350485, + "grad_norm": 666.349853515625, + "learning_rate": 1.2606060606060608e-06, + "loss": 56.895, + "step": 1560 + }, + { + "epoch": 0.006342998662718116, + "grad_norm": 1740.4351806640625, + "learning_rate": 1.268686868686869e-06, + "loss": 60.3912, + "step": 1570 + }, + { + "epoch": 0.006383399928085748, + "grad_norm": 1566.6124267578125, + "learning_rate": 1.2767676767676768e-06, + "loss": 93.7447, + "step": 1580 + }, + { + "epoch": 0.006423801193453379, + "grad_norm": 462.4256591796875, + "learning_rate": 1.2848484848484848e-06, + "loss": 73.4026, + "step": 1590 + }, + { + "epoch": 0.00646420245882101, + "grad_norm": 1016.7862548828125, + "learning_rate": 1.292929292929293e-06, + "loss": 76.6085, + "step": 1600 + }, + { + "epoch": 0.006504603724188642, + "grad_norm": 657.1649780273438, + "learning_rate": 1.301010101010101e-06, + "loss": 75.5045, + "step": 1610 + }, + { + "epoch": 0.006545004989556273, + "grad_norm": 958.7669067382812, + "learning_rate": 1.3090909090909093e-06, + "loss": 65.2289, + "step": 1620 + }, + { + "epoch": 0.006585406254923904, + "grad_norm": 542.748779296875, + "learning_rate": 1.3171717171717172e-06, + "loss": 87.3519, + "step": 1630 + }, + { + "epoch": 0.006625807520291536, + "grad_norm": 719.8927612304688, + "learning_rate": 1.3252525252525254e-06, + "loss": 75.2894, + "step": 1640 + }, + { + "epoch": 0.006666208785659167, + "grad_norm": 909.3445434570312, + "learning_rate": 1.3333333333333334e-06, + "loss": 82.4471, + "step": 1650 + }, + { + "epoch": 0.006706610051026798, + "grad_norm": 589.6493530273438, + "learning_rate": 1.3414141414141417e-06, + "loss": 40.8706, + "step": 1660 + }, + { + "epoch": 0.0067470113163944296, + "grad_norm": 409.3217468261719, + "learning_rate": 1.3494949494949497e-06, + "loss": 60.2111, + "step": 1670 + }, + { + "epoch": 0.006787412581762061, + "grad_norm": 750.4218139648438, + "learning_rate": 1.357575757575758e-06, + "loss": 53.4837, + "step": 1680 + }, + { + "epoch": 0.006827813847129692, + "grad_norm": 807.2741088867188, + "learning_rate": 1.3656565656565657e-06, + "loss": 65.8104, + "step": 1690 + }, + { + "epoch": 0.0068682151124973235, + "grad_norm": 909.712890625, + "learning_rate": 1.3737373737373738e-06, + "loss": 54.6645, + "step": 1700 + }, + { + "epoch": 0.006908616377864955, + "grad_norm": 736.7755126953125, + "learning_rate": 1.381818181818182e-06, + "loss": 74.1443, + "step": 1710 + }, + { + "epoch": 0.006949017643232586, + "grad_norm": 1265.934814453125, + "learning_rate": 1.38989898989899e-06, + "loss": 64.3544, + "step": 1720 + }, + { + "epoch": 0.0069894189086002175, + "grad_norm": 587.2948608398438, + "learning_rate": 1.3979797979797982e-06, + "loss": 51.3367, + "step": 1730 + }, + { + "epoch": 0.007029820173967849, + "grad_norm": 580.3670043945312, + "learning_rate": 1.406060606060606e-06, + "loss": 79.2176, + "step": 1740 + }, + { + "epoch": 0.00707022143933548, + "grad_norm": 727.06298828125, + "learning_rate": 1.4141414141414143e-06, + "loss": 85.4666, + "step": 1750 + }, + { + "epoch": 0.0071106227047031114, + "grad_norm": 370.42242431640625, + "learning_rate": 1.4222222222222223e-06, + "loss": 76.675, + "step": 1760 + }, + { + "epoch": 0.007151023970070743, + "grad_norm": 316.96478271484375, + "learning_rate": 1.4303030303030306e-06, + "loss": 64.5112, + "step": 1770 + }, + { + "epoch": 0.007191425235438374, + "grad_norm": 630.2029418945312, + "learning_rate": 1.4383838383838386e-06, + "loss": 86.1591, + "step": 1780 + }, + { + "epoch": 0.007231826500806005, + "grad_norm": 313.6539611816406, + "learning_rate": 1.4464646464646464e-06, + "loss": 43.7239, + "step": 1790 + }, + { + "epoch": 0.007272227766173637, + "grad_norm": 443.1033630371094, + "learning_rate": 1.4545454545454546e-06, + "loss": 45.1063, + "step": 1800 + }, + { + "epoch": 0.007312629031541268, + "grad_norm": 435.49420166015625, + "learning_rate": 1.4626262626262627e-06, + "loss": 83.8737, + "step": 1810 + }, + { + "epoch": 0.007353030296908899, + "grad_norm": 600.2357177734375, + "learning_rate": 1.470707070707071e-06, + "loss": 92.0006, + "step": 1820 + }, + { + "epoch": 0.007393431562276531, + "grad_norm": 720.475341796875, + "learning_rate": 1.478787878787879e-06, + "loss": 83.276, + "step": 1830 + }, + { + "epoch": 0.007433832827644162, + "grad_norm": 837.3829956054688, + "learning_rate": 1.4868686868686872e-06, + "loss": 63.679, + "step": 1840 + }, + { + "epoch": 0.007474234093011793, + "grad_norm": 749.28564453125, + "learning_rate": 1.494949494949495e-06, + "loss": 66.9486, + "step": 1850 + }, + { + "epoch": 0.007514635358379425, + "grad_norm": 447.3569030761719, + "learning_rate": 1.5030303030303032e-06, + "loss": 75.0599, + "step": 1860 + }, + { + "epoch": 0.007555036623747056, + "grad_norm": 687.4884033203125, + "learning_rate": 1.5111111111111112e-06, + "loss": 93.3957, + "step": 1870 + }, + { + "epoch": 0.007595437889114687, + "grad_norm": 926.2616577148438, + "learning_rate": 1.5191919191919195e-06, + "loss": 54.0039, + "step": 1880 + }, + { + "epoch": 0.007635839154482319, + "grad_norm": 1626.1761474609375, + "learning_rate": 1.5272727272727275e-06, + "loss": 78.25, + "step": 1890 + }, + { + "epoch": 0.00767624041984995, + "grad_norm": 894.1566772460938, + "learning_rate": 1.5353535353535353e-06, + "loss": 72.2371, + "step": 1900 + }, + { + "epoch": 0.007716641685217581, + "grad_norm": 1152.4473876953125, + "learning_rate": 1.5434343434343435e-06, + "loss": 63.9651, + "step": 1910 + }, + { + "epoch": 0.0077570429505852126, + "grad_norm": 378.2879333496094, + "learning_rate": 1.5515151515151516e-06, + "loss": 69.7281, + "step": 1920 + }, + { + "epoch": 0.007797444215952844, + "grad_norm": 668.77294921875, + "learning_rate": 1.5595959595959598e-06, + "loss": 75.3944, + "step": 1930 + }, + { + "epoch": 0.007837845481320474, + "grad_norm": 1068.0228271484375, + "learning_rate": 1.5676767676767678e-06, + "loss": 70.1825, + "step": 1940 + }, + { + "epoch": 0.007878246746688106, + "grad_norm": 1168.907958984375, + "learning_rate": 1.5757575757575759e-06, + "loss": 76.5451, + "step": 1950 + }, + { + "epoch": 0.007918648012055737, + "grad_norm": 556.515625, + "learning_rate": 1.5838383838383839e-06, + "loss": 75.1146, + "step": 1960 + }, + { + "epoch": 0.007959049277423368, + "grad_norm": 4213.0146484375, + "learning_rate": 1.5919191919191921e-06, + "loss": 86.7434, + "step": 1970 + }, + { + "epoch": 0.007999450542791, + "grad_norm": 190.214111328125, + "learning_rate": 1.6000000000000001e-06, + "loss": 87.13, + "step": 1980 + }, + { + "epoch": 0.008039851808158631, + "grad_norm": 732.1063232421875, + "learning_rate": 1.6080808080808084e-06, + "loss": 62.2306, + "step": 1990 + }, + { + "epoch": 0.008080253073526262, + "grad_norm": 601.2501220703125, + "learning_rate": 1.6161616161616164e-06, + "loss": 50.099, + "step": 2000 + }, + { + "epoch": 0.008120654338893894, + "grad_norm": 982.2610473632812, + "learning_rate": 1.6242424242424242e-06, + "loss": 83.7663, + "step": 2010 + }, + { + "epoch": 0.008161055604261525, + "grad_norm": 738.6604614257812, + "learning_rate": 1.6323232323232325e-06, + "loss": 79.6428, + "step": 2020 + }, + { + "epoch": 0.008201456869629156, + "grad_norm": 517.3074951171875, + "learning_rate": 1.6404040404040405e-06, + "loss": 65.7427, + "step": 2030 + }, + { + "epoch": 0.008241858134996788, + "grad_norm": 908.445556640625, + "learning_rate": 1.6484848484848487e-06, + "loss": 78.0444, + "step": 2040 + }, + { + "epoch": 0.008282259400364419, + "grad_norm": 1011.9039306640625, + "learning_rate": 1.6565656565656567e-06, + "loss": 51.7269, + "step": 2050 + }, + { + "epoch": 0.00832266066573205, + "grad_norm": 917.6417236328125, + "learning_rate": 1.6646464646464648e-06, + "loss": 86.5933, + "step": 2060 + }, + { + "epoch": 0.008363061931099681, + "grad_norm": 795.33544921875, + "learning_rate": 1.6727272727272728e-06, + "loss": 108.7873, + "step": 2070 + }, + { + "epoch": 0.008403463196467313, + "grad_norm": 554.3195190429688, + "learning_rate": 1.680808080808081e-06, + "loss": 60.6646, + "step": 2080 + }, + { + "epoch": 0.008443864461834944, + "grad_norm": 674.0402221679688, + "learning_rate": 1.688888888888889e-06, + "loss": 61.3736, + "step": 2090 + }, + { + "epoch": 0.008484265727202575, + "grad_norm": 620.1227416992188, + "learning_rate": 1.6969696969696973e-06, + "loss": 46.0749, + "step": 2100 + }, + { + "epoch": 0.008524666992570207, + "grad_norm": 664.8109741210938, + "learning_rate": 1.705050505050505e-06, + "loss": 47.4902, + "step": 2110 + }, + { + "epoch": 0.008565068257937838, + "grad_norm": 698.6871337890625, + "learning_rate": 1.7131313131313131e-06, + "loss": 73.7654, + "step": 2120 + }, + { + "epoch": 0.00860546952330547, + "grad_norm": 557.4563598632812, + "learning_rate": 1.7212121212121214e-06, + "loss": 74.8754, + "step": 2130 + }, + { + "epoch": 0.0086458707886731, + "grad_norm": 951.68701171875, + "learning_rate": 1.7292929292929294e-06, + "loss": 75.8206, + "step": 2140 + }, + { + "epoch": 0.008686272054040732, + "grad_norm": 607.4713134765625, + "learning_rate": 1.7373737373737376e-06, + "loss": 65.7856, + "step": 2150 + }, + { + "epoch": 0.008726673319408363, + "grad_norm": 561.9832763671875, + "learning_rate": 1.7454545454545456e-06, + "loss": 71.6938, + "step": 2160 + }, + { + "epoch": 0.008767074584775995, + "grad_norm": 576.9796142578125, + "learning_rate": 1.7535353535353537e-06, + "loss": 69.7876, + "step": 2170 + }, + { + "epoch": 0.008807475850143626, + "grad_norm": 774.2403564453125, + "learning_rate": 1.7616161616161617e-06, + "loss": 70.9791, + "step": 2180 + }, + { + "epoch": 0.008847877115511257, + "grad_norm": 826.15625, + "learning_rate": 1.76969696969697e-06, + "loss": 46.3716, + "step": 2190 + }, + { + "epoch": 0.008888278380878889, + "grad_norm": 614.9705200195312, + "learning_rate": 1.777777777777778e-06, + "loss": 55.0711, + "step": 2200 + }, + { + "epoch": 0.00892867964624652, + "grad_norm": 385.35986328125, + "learning_rate": 1.7858585858585862e-06, + "loss": 55.8608, + "step": 2210 + }, + { + "epoch": 0.008969080911614151, + "grad_norm": 900.0477905273438, + "learning_rate": 1.793939393939394e-06, + "loss": 63.3841, + "step": 2220 + }, + { + "epoch": 0.009009482176981783, + "grad_norm": 484.96966552734375, + "learning_rate": 1.802020202020202e-06, + "loss": 80.3578, + "step": 2230 + }, + { + "epoch": 0.009049883442349414, + "grad_norm": 598.5676879882812, + "learning_rate": 1.8101010101010103e-06, + "loss": 72.6313, + "step": 2240 + }, + { + "epoch": 0.009090284707717045, + "grad_norm": 540.750244140625, + "learning_rate": 1.8181818181818183e-06, + "loss": 57.7766, + "step": 2250 + }, + { + "epoch": 0.009130685973084677, + "grad_norm": 1184.3426513671875, + "learning_rate": 1.8262626262626265e-06, + "loss": 48.5757, + "step": 2260 + }, + { + "epoch": 0.009171087238452308, + "grad_norm": 752.9002075195312, + "learning_rate": 1.8343434343434343e-06, + "loss": 97.9035, + "step": 2270 + }, + { + "epoch": 0.00921148850381994, + "grad_norm": 420.1020202636719, + "learning_rate": 1.8424242424242426e-06, + "loss": 75.47, + "step": 2280 + }, + { + "epoch": 0.00925188976918757, + "grad_norm": 605.6198120117188, + "learning_rate": 1.8505050505050506e-06, + "loss": 54.201, + "step": 2290 + }, + { + "epoch": 0.009292291034555202, + "grad_norm": 908.86376953125, + "learning_rate": 1.8585858585858588e-06, + "loss": 42.1971, + "step": 2300 + }, + { + "epoch": 0.009332692299922833, + "grad_norm": 719.4215087890625, + "learning_rate": 1.8666666666666669e-06, + "loss": 75.1888, + "step": 2310 + }, + { + "epoch": 0.009373093565290464, + "grad_norm": 834.7787475585938, + "learning_rate": 1.874747474747475e-06, + "loss": 69.2066, + "step": 2320 + }, + { + "epoch": 0.009413494830658096, + "grad_norm": 1182.236572265625, + "learning_rate": 1.882828282828283e-06, + "loss": 58.0279, + "step": 2330 + }, + { + "epoch": 0.009453896096025727, + "grad_norm": 804.4097900390625, + "learning_rate": 1.890909090909091e-06, + "loss": 65.1326, + "step": 2340 + }, + { + "epoch": 0.009494297361393358, + "grad_norm": 503.9187316894531, + "learning_rate": 1.8989898989898992e-06, + "loss": 90.3938, + "step": 2350 + }, + { + "epoch": 0.00953469862676099, + "grad_norm": 667.1215209960938, + "learning_rate": 1.9070707070707072e-06, + "loss": 92.9535, + "step": 2360 + }, + { + "epoch": 0.009575099892128621, + "grad_norm": 962.7706909179688, + "learning_rate": 1.9151515151515154e-06, + "loss": 62.9176, + "step": 2370 + }, + { + "epoch": 0.009615501157496252, + "grad_norm": 644.2401733398438, + "learning_rate": 1.9232323232323235e-06, + "loss": 52.7077, + "step": 2380 + }, + { + "epoch": 0.009655902422863884, + "grad_norm": 651.3950805664062, + "learning_rate": 1.9313131313131315e-06, + "loss": 65.4711, + "step": 2390 + }, + { + "epoch": 0.009696303688231515, + "grad_norm": 240.46392822265625, + "learning_rate": 1.9393939393939395e-06, + "loss": 63.148, + "step": 2400 + }, + { + "epoch": 0.009736704953599146, + "grad_norm": 590.9519653320312, + "learning_rate": 1.9474747474747475e-06, + "loss": 46.7516, + "step": 2410 + }, + { + "epoch": 0.009777106218966778, + "grad_norm": 698.1563110351562, + "learning_rate": 1.955555555555556e-06, + "loss": 52.9271, + "step": 2420 + }, + { + "epoch": 0.009817507484334409, + "grad_norm": 657.922119140625, + "learning_rate": 1.9636363636363636e-06, + "loss": 70.142, + "step": 2430 + }, + { + "epoch": 0.00985790874970204, + "grad_norm": 598.1620483398438, + "learning_rate": 1.9717171717171716e-06, + "loss": 47.5373, + "step": 2440 + }, + { + "epoch": 0.009898310015069672, + "grad_norm": 277.993408203125, + "learning_rate": 1.97979797979798e-06, + "loss": 51.4112, + "step": 2450 + }, + { + "epoch": 0.009938711280437303, + "grad_norm": 552.108642578125, + "learning_rate": 1.987878787878788e-06, + "loss": 34.7859, + "step": 2460 + }, + { + "epoch": 0.009979112545804934, + "grad_norm": 859.3973999023438, + "learning_rate": 1.995959595959596e-06, + "loss": 72.8732, + "step": 2470 + }, + { + "epoch": 0.010019513811172566, + "grad_norm": 838.69140625, + "learning_rate": 2.004040404040404e-06, + "loss": 57.4993, + "step": 2480 + }, + { + "epoch": 0.010059915076540197, + "grad_norm": 505.9194030761719, + "learning_rate": 2.012121212121212e-06, + "loss": 85.4388, + "step": 2490 + }, + { + "epoch": 0.010100316341907828, + "grad_norm": 369.4288330078125, + "learning_rate": 2.02020202020202e-06, + "loss": 32.7626, + "step": 2500 + }, + { + "epoch": 0.01014071760727546, + "grad_norm": 877.2677612304688, + "learning_rate": 2.0282828282828286e-06, + "loss": 60.406, + "step": 2510 + }, + { + "epoch": 0.010181118872643091, + "grad_norm": 718.6683959960938, + "learning_rate": 2.0363636363636367e-06, + "loss": 58.8499, + "step": 2520 + }, + { + "epoch": 0.010221520138010722, + "grad_norm": 620.8602905273438, + "learning_rate": 2.0444444444444447e-06, + "loss": 40.9196, + "step": 2530 + }, + { + "epoch": 0.010261921403378354, + "grad_norm": 448.3660583496094, + "learning_rate": 2.0525252525252527e-06, + "loss": 62.7339, + "step": 2540 + }, + { + "epoch": 0.010302322668745985, + "grad_norm": 698.1956787109375, + "learning_rate": 2.0606060606060607e-06, + "loss": 94.1049, + "step": 2550 + }, + { + "epoch": 0.010342723934113616, + "grad_norm": 422.0584716796875, + "learning_rate": 2.0686868686868688e-06, + "loss": 52.2436, + "step": 2560 + }, + { + "epoch": 0.010383125199481247, + "grad_norm": 1026.939697265625, + "learning_rate": 2.0767676767676768e-06, + "loss": 81.9264, + "step": 2570 + }, + { + "epoch": 0.010423526464848879, + "grad_norm": 672.6967163085938, + "learning_rate": 2.0848484848484852e-06, + "loss": 49.8656, + "step": 2580 + }, + { + "epoch": 0.01046392773021651, + "grad_norm": 1137.779052734375, + "learning_rate": 2.092929292929293e-06, + "loss": 49.7138, + "step": 2590 + }, + { + "epoch": 0.010504328995584141, + "grad_norm": 709.0454711914062, + "learning_rate": 2.1010101010101013e-06, + "loss": 50.5514, + "step": 2600 + }, + { + "epoch": 0.010544730260951773, + "grad_norm": 1094.946044921875, + "learning_rate": 2.1090909090909093e-06, + "loss": 54.4812, + "step": 2610 + }, + { + "epoch": 0.010585131526319404, + "grad_norm": 695.2977294921875, + "learning_rate": 2.1171717171717173e-06, + "loss": 58.17, + "step": 2620 + }, + { + "epoch": 0.010625532791687035, + "grad_norm": 639.0679931640625, + "learning_rate": 2.1252525252525254e-06, + "loss": 66.6043, + "step": 2630 + }, + { + "epoch": 0.010665934057054667, + "grad_norm": 443.9601135253906, + "learning_rate": 2.133333333333334e-06, + "loss": 88.869, + "step": 2640 + }, + { + "epoch": 0.010706335322422298, + "grad_norm": 601.3858032226562, + "learning_rate": 2.1414141414141414e-06, + "loss": 63.8113, + "step": 2650 + }, + { + "epoch": 0.01074673658778993, + "grad_norm": 216.58883666992188, + "learning_rate": 2.1494949494949494e-06, + "loss": 53.4207, + "step": 2660 + }, + { + "epoch": 0.01078713785315756, + "grad_norm": 796.6296997070312, + "learning_rate": 2.157575757575758e-06, + "loss": 73.9355, + "step": 2670 + }, + { + "epoch": 0.010827539118525192, + "grad_norm": 429.7906188964844, + "learning_rate": 2.165656565656566e-06, + "loss": 52.2577, + "step": 2680 + }, + { + "epoch": 0.010867940383892823, + "grad_norm": 961.33740234375, + "learning_rate": 2.173737373737374e-06, + "loss": 63.543, + "step": 2690 + }, + { + "epoch": 0.010908341649260455, + "grad_norm": 1032.55810546875, + "learning_rate": 2.181818181818182e-06, + "loss": 75.2953, + "step": 2700 + }, + { + "epoch": 0.010948742914628086, + "grad_norm": 707.4940185546875, + "learning_rate": 2.18989898989899e-06, + "loss": 155.0811, + "step": 2710 + }, + { + "epoch": 0.010989144179995717, + "grad_norm": 632.8768310546875, + "learning_rate": 2.197979797979798e-06, + "loss": 111.3545, + "step": 2720 + }, + { + "epoch": 0.011029545445363349, + "grad_norm": 422.77349853515625, + "learning_rate": 2.2060606060606064e-06, + "loss": 48.8855, + "step": 2730 + }, + { + "epoch": 0.01106994671073098, + "grad_norm": 1005.6026611328125, + "learning_rate": 2.2141414141414145e-06, + "loss": 71.3869, + "step": 2740 + }, + { + "epoch": 0.011110347976098611, + "grad_norm": 618.6168212890625, + "learning_rate": 2.222222222222222e-06, + "loss": 50.2819, + "step": 2750 + }, + { + "epoch": 0.011150749241466243, + "grad_norm": 661.1751708984375, + "learning_rate": 2.2303030303030305e-06, + "loss": 65.9274, + "step": 2760 + }, + { + "epoch": 0.011191150506833874, + "grad_norm": 718.5821533203125, + "learning_rate": 2.2383838383838385e-06, + "loss": 45.7298, + "step": 2770 + }, + { + "epoch": 0.011231551772201505, + "grad_norm": 375.16595458984375, + "learning_rate": 2.2464646464646466e-06, + "loss": 64.9801, + "step": 2780 + }, + { + "epoch": 0.011271953037569137, + "grad_norm": 290.95556640625, + "learning_rate": 2.254545454545455e-06, + "loss": 53.4231, + "step": 2790 + }, + { + "epoch": 0.011312354302936768, + "grad_norm": 753.8040771484375, + "learning_rate": 2.262626262626263e-06, + "loss": 54.899, + "step": 2800 + }, + { + "epoch": 0.0113527555683044, + "grad_norm": 948.993896484375, + "learning_rate": 2.2707070707070706e-06, + "loss": 139.1114, + "step": 2810 + }, + { + "epoch": 0.01139315683367203, + "grad_norm": 564.3215942382812, + "learning_rate": 2.278787878787879e-06, + "loss": 55.8284, + "step": 2820 + }, + { + "epoch": 0.011433558099039662, + "grad_norm": 884.8452758789062, + "learning_rate": 2.286868686868687e-06, + "loss": 74.0817, + "step": 2830 + }, + { + "epoch": 0.011473959364407293, + "grad_norm": 691.4619750976562, + "learning_rate": 2.294949494949495e-06, + "loss": 65.5481, + "step": 2840 + }, + { + "epoch": 0.011514360629774924, + "grad_norm": 513.1780395507812, + "learning_rate": 2.303030303030303e-06, + "loss": 62.6853, + "step": 2850 + }, + { + "epoch": 0.011554761895142556, + "grad_norm": 446.5616760253906, + "learning_rate": 2.311111111111111e-06, + "loss": 64.6615, + "step": 2860 + }, + { + "epoch": 0.011595163160510187, + "grad_norm": 1361.8853759765625, + "learning_rate": 2.3191919191919192e-06, + "loss": 74.9464, + "step": 2870 + }, + { + "epoch": 0.011635564425877818, + "grad_norm": 511.1175537109375, + "learning_rate": 2.3272727272727277e-06, + "loss": 40.8995, + "step": 2880 + }, + { + "epoch": 0.01167596569124545, + "grad_norm": 935.4153442382812, + "learning_rate": 2.3353535353535357e-06, + "loss": 72.9418, + "step": 2890 + }, + { + "epoch": 0.011716366956613081, + "grad_norm": 504.27166748046875, + "learning_rate": 2.3434343434343437e-06, + "loss": 77.0979, + "step": 2900 + }, + { + "epoch": 0.011756768221980712, + "grad_norm": 407.9814147949219, + "learning_rate": 2.3515151515151517e-06, + "loss": 65.9649, + "step": 2910 + }, + { + "epoch": 0.011797169487348344, + "grad_norm": 463.7010192871094, + "learning_rate": 2.3595959595959598e-06, + "loss": 51.9431, + "step": 2920 + }, + { + "epoch": 0.011837570752715975, + "grad_norm": 493.0938720703125, + "learning_rate": 2.367676767676768e-06, + "loss": 57.3446, + "step": 2930 + }, + { + "epoch": 0.011877972018083606, + "grad_norm": 465.1329345703125, + "learning_rate": 2.375757575757576e-06, + "loss": 56.2858, + "step": 2940 + }, + { + "epoch": 0.011918373283451238, + "grad_norm": 853.31396484375, + "learning_rate": 2.3838383838383843e-06, + "loss": 66.7674, + "step": 2950 + }, + { + "epoch": 0.011958774548818869, + "grad_norm": 1173.5980224609375, + "learning_rate": 2.3919191919191923e-06, + "loss": 85.9143, + "step": 2960 + }, + { + "epoch": 0.0119991758141865, + "grad_norm": 1093.7786865234375, + "learning_rate": 2.4000000000000003e-06, + "loss": 55.0169, + "step": 2970 + }, + { + "epoch": 0.012039577079554132, + "grad_norm": 525.7014770507812, + "learning_rate": 2.4080808080808083e-06, + "loss": 46.8544, + "step": 2980 + }, + { + "epoch": 0.012079978344921763, + "grad_norm": 1659.3507080078125, + "learning_rate": 2.4161616161616164e-06, + "loss": 95.7587, + "step": 2990 + }, + { + "epoch": 0.012120379610289394, + "grad_norm": 520.4998779296875, + "learning_rate": 2.4242424242424244e-06, + "loss": 51.6112, + "step": 3000 + }, + { + "epoch": 0.012160780875657026, + "grad_norm": 1072.08447265625, + "learning_rate": 2.432323232323233e-06, + "loss": 80.0246, + "step": 3010 + }, + { + "epoch": 0.012201182141024657, + "grad_norm": 371.0710144042969, + "learning_rate": 2.4404040404040404e-06, + "loss": 59.9696, + "step": 3020 + }, + { + "epoch": 0.012241583406392288, + "grad_norm": 1110.0040283203125, + "learning_rate": 2.4484848484848485e-06, + "loss": 51.8267, + "step": 3030 + }, + { + "epoch": 0.01228198467175992, + "grad_norm": 664.203369140625, + "learning_rate": 2.456565656565657e-06, + "loss": 72.4027, + "step": 3040 + }, + { + "epoch": 0.01232238593712755, + "grad_norm": 804.1693725585938, + "learning_rate": 2.464646464646465e-06, + "loss": 82.5, + "step": 3050 + }, + { + "epoch": 0.012362787202495182, + "grad_norm": 426.992919921875, + "learning_rate": 2.472727272727273e-06, + "loss": 51.2883, + "step": 3060 + }, + { + "epoch": 0.012403188467862813, + "grad_norm": 946.348876953125, + "learning_rate": 2.480808080808081e-06, + "loss": 53.2797, + "step": 3070 + }, + { + "epoch": 0.012443589733230445, + "grad_norm": 925.2742309570312, + "learning_rate": 2.488888888888889e-06, + "loss": 64.6453, + "step": 3080 + }, + { + "epoch": 0.012483990998598076, + "grad_norm": 773.533203125, + "learning_rate": 2.496969696969697e-06, + "loss": 64.05, + "step": 3090 + }, + { + "epoch": 0.012524392263965707, + "grad_norm": 1721.887939453125, + "learning_rate": 2.5050505050505055e-06, + "loss": 89.4751, + "step": 3100 + }, + { + "epoch": 0.012564793529333339, + "grad_norm": 502.3291320800781, + "learning_rate": 2.5131313131313135e-06, + "loss": 73.6394, + "step": 3110 + }, + { + "epoch": 0.01260519479470097, + "grad_norm": 951.5177612304688, + "learning_rate": 2.5212121212121215e-06, + "loss": 69.3922, + "step": 3120 + }, + { + "epoch": 0.012645596060068601, + "grad_norm": 528.2192993164062, + "learning_rate": 2.5292929292929296e-06, + "loss": 55.9364, + "step": 3130 + }, + { + "epoch": 0.012685997325436233, + "grad_norm": 760.628662109375, + "learning_rate": 2.537373737373738e-06, + "loss": 51.1709, + "step": 3140 + }, + { + "epoch": 0.012726398590803864, + "grad_norm": 718.6551513671875, + "learning_rate": 2.5454545454545456e-06, + "loss": 88.1984, + "step": 3150 + }, + { + "epoch": 0.012766799856171495, + "grad_norm": 1814.95654296875, + "learning_rate": 2.5535353535353536e-06, + "loss": 82.0715, + "step": 3160 + }, + { + "epoch": 0.012807201121539127, + "grad_norm": 724.8761596679688, + "learning_rate": 2.5616161616161617e-06, + "loss": 50.4141, + "step": 3170 + }, + { + "epoch": 0.012847602386906758, + "grad_norm": 720.0292358398438, + "learning_rate": 2.5696969696969697e-06, + "loss": 79.8761, + "step": 3180 + }, + { + "epoch": 0.01288800365227439, + "grad_norm": 543.6854858398438, + "learning_rate": 2.577777777777778e-06, + "loss": 57.894, + "step": 3190 + }, + { + "epoch": 0.01292840491764202, + "grad_norm": 1304.784912109375, + "learning_rate": 2.585858585858586e-06, + "loss": 61.9256, + "step": 3200 + }, + { + "epoch": 0.012968806183009652, + "grad_norm": 749.076904296875, + "learning_rate": 2.593939393939394e-06, + "loss": 86.3883, + "step": 3210 + }, + { + "epoch": 0.013009207448377283, + "grad_norm": 744.2839965820312, + "learning_rate": 2.602020202020202e-06, + "loss": 58.4392, + "step": 3220 + }, + { + "epoch": 0.013049608713744915, + "grad_norm": 642.6310424804688, + "learning_rate": 2.6101010101010107e-06, + "loss": 74.5423, + "step": 3230 + }, + { + "epoch": 0.013090009979112546, + "grad_norm": 1457.65380859375, + "learning_rate": 2.6181818181818187e-06, + "loss": 68.783, + "step": 3240 + }, + { + "epoch": 0.013130411244480177, + "grad_norm": 900.1226196289062, + "learning_rate": 2.6262626262626267e-06, + "loss": 63.59, + "step": 3250 + }, + { + "epoch": 0.013170812509847809, + "grad_norm": 727.9959716796875, + "learning_rate": 2.6343434343434343e-06, + "loss": 74.8924, + "step": 3260 + }, + { + "epoch": 0.01321121377521544, + "grad_norm": 569.1546630859375, + "learning_rate": 2.6424242424242423e-06, + "loss": 75.5204, + "step": 3270 + }, + { + "epoch": 0.013251615040583071, + "grad_norm": 579.5902099609375, + "learning_rate": 2.6505050505050508e-06, + "loss": 52.7214, + "step": 3280 + }, + { + "epoch": 0.013292016305950703, + "grad_norm": 725.4396362304688, + "learning_rate": 2.658585858585859e-06, + "loss": 80.6275, + "step": 3290 + }, + { + "epoch": 0.013332417571318334, + "grad_norm": 752.127685546875, + "learning_rate": 2.666666666666667e-06, + "loss": 62.3925, + "step": 3300 + }, + { + "epoch": 0.013372818836685965, + "grad_norm": 730.8565063476562, + "learning_rate": 2.674747474747475e-06, + "loss": 46.3678, + "step": 3310 + }, + { + "epoch": 0.013413220102053596, + "grad_norm": 706.3470458984375, + "learning_rate": 2.6828282828282833e-06, + "loss": 55.6273, + "step": 3320 + }, + { + "epoch": 0.013453621367421228, + "grad_norm": 850.8480834960938, + "learning_rate": 2.6909090909090913e-06, + "loss": 76.168, + "step": 3330 + }, + { + "epoch": 0.013494022632788859, + "grad_norm": 547.6309814453125, + "learning_rate": 2.6989898989898994e-06, + "loss": 75.9509, + "step": 3340 + }, + { + "epoch": 0.01353442389815649, + "grad_norm": 2081.936279296875, + "learning_rate": 2.7070707070707074e-06, + "loss": 74.0594, + "step": 3350 + }, + { + "epoch": 0.013574825163524122, + "grad_norm": 795.8720703125, + "learning_rate": 2.715151515151516e-06, + "loss": 51.1241, + "step": 3360 + }, + { + "epoch": 0.013615226428891753, + "grad_norm": 741.885498046875, + "learning_rate": 2.7232323232323234e-06, + "loss": 93.6878, + "step": 3370 + }, + { + "epoch": 0.013655627694259384, + "grad_norm": 540.7803955078125, + "learning_rate": 2.7313131313131315e-06, + "loss": 41.2008, + "step": 3380 + }, + { + "epoch": 0.013696028959627016, + "grad_norm": 510.04937744140625, + "learning_rate": 2.7393939393939395e-06, + "loss": 64.7001, + "step": 3390 + }, + { + "epoch": 0.013736430224994647, + "grad_norm": 743.4458618164062, + "learning_rate": 2.7474747474747475e-06, + "loss": 59.3923, + "step": 3400 + }, + { + "epoch": 0.013776831490362278, + "grad_norm": 824.0260009765625, + "learning_rate": 2.755555555555556e-06, + "loss": 103.2661, + "step": 3410 + }, + { + "epoch": 0.01381723275572991, + "grad_norm": 809.0377807617188, + "learning_rate": 2.763636363636364e-06, + "loss": 65.098, + "step": 3420 + }, + { + "epoch": 0.013857634021097541, + "grad_norm": 1693.0699462890625, + "learning_rate": 2.771717171717172e-06, + "loss": 50.6526, + "step": 3430 + }, + { + "epoch": 0.013898035286465172, + "grad_norm": 519.3567504882812, + "learning_rate": 2.77979797979798e-06, + "loss": 32.3857, + "step": 3440 + }, + { + "epoch": 0.013938436551832804, + "grad_norm": 293.765380859375, + "learning_rate": 2.7878787878787885e-06, + "loss": 44.1302, + "step": 3450 + }, + { + "epoch": 0.013978837817200435, + "grad_norm": 879.6158447265625, + "learning_rate": 2.7959595959595965e-06, + "loss": 49.3538, + "step": 3460 + }, + { + "epoch": 0.014019239082568066, + "grad_norm": 616.3663330078125, + "learning_rate": 2.804040404040404e-06, + "loss": 53.0307, + "step": 3470 + }, + { + "epoch": 0.014059640347935698, + "grad_norm": 987.908447265625, + "learning_rate": 2.812121212121212e-06, + "loss": 63.739, + "step": 3480 + }, + { + "epoch": 0.014100041613303329, + "grad_norm": 767.0664672851562, + "learning_rate": 2.82020202020202e-06, + "loss": 59.8414, + "step": 3490 + }, + { + "epoch": 0.01414044287867096, + "grad_norm": 1181.593994140625, + "learning_rate": 2.8282828282828286e-06, + "loss": 55.1145, + "step": 3500 + }, + { + "epoch": 0.014180844144038592, + "grad_norm": 512.4702758789062, + "learning_rate": 2.8363636363636366e-06, + "loss": 49.1521, + "step": 3510 + }, + { + "epoch": 0.014221245409406223, + "grad_norm": 889.2345581054688, + "learning_rate": 2.8444444444444446e-06, + "loss": 46.9908, + "step": 3520 + }, + { + "epoch": 0.014261646674773854, + "grad_norm": 616.6620483398438, + "learning_rate": 2.8525252525252527e-06, + "loss": 55.8178, + "step": 3530 + }, + { + "epoch": 0.014302047940141486, + "grad_norm": 950.4310302734375, + "learning_rate": 2.860606060606061e-06, + "loss": 44.9625, + "step": 3540 + }, + { + "epoch": 0.014342449205509117, + "grad_norm": 678.1963500976562, + "learning_rate": 2.868686868686869e-06, + "loss": 47.2252, + "step": 3550 + }, + { + "epoch": 0.014382850470876748, + "grad_norm": 274.08941650390625, + "learning_rate": 2.876767676767677e-06, + "loss": 54.4971, + "step": 3560 + }, + { + "epoch": 0.01442325173624438, + "grad_norm": 702.7638549804688, + "learning_rate": 2.884848484848485e-06, + "loss": 51.9829, + "step": 3570 + }, + { + "epoch": 0.01446365300161201, + "grad_norm": 377.993408203125, + "learning_rate": 2.892929292929293e-06, + "loss": 38.773, + "step": 3580 + }, + { + "epoch": 0.014504054266979642, + "grad_norm": 606.4713745117188, + "learning_rate": 2.9010101010101012e-06, + "loss": 48.4606, + "step": 3590 + }, + { + "epoch": 0.014544455532347273, + "grad_norm": 941.6731567382812, + "learning_rate": 2.9090909090909093e-06, + "loss": 49.9611, + "step": 3600 + }, + { + "epoch": 0.014584856797714905, + "grad_norm": 576.7796630859375, + "learning_rate": 2.9171717171717173e-06, + "loss": 77.4996, + "step": 3610 + }, + { + "epoch": 0.014625258063082536, + "grad_norm": 582.7783203125, + "learning_rate": 2.9252525252525253e-06, + "loss": 65.8574, + "step": 3620 + }, + { + "epoch": 0.014665659328450167, + "grad_norm": 340.5007019042969, + "learning_rate": 2.9333333333333338e-06, + "loss": 56.075, + "step": 3630 + }, + { + "epoch": 0.014706060593817799, + "grad_norm": 1056.101806640625, + "learning_rate": 2.941414141414142e-06, + "loss": 54.8923, + "step": 3640 + }, + { + "epoch": 0.01474646185918543, + "grad_norm": 697.7027587890625, + "learning_rate": 2.94949494949495e-06, + "loss": 68.0702, + "step": 3650 + }, + { + "epoch": 0.014786863124553061, + "grad_norm": 711.9849243164062, + "learning_rate": 2.957575757575758e-06, + "loss": 57.5173, + "step": 3660 + }, + { + "epoch": 0.014827264389920693, + "grad_norm": 645.8955078125, + "learning_rate": 2.9656565656565663e-06, + "loss": 77.3891, + "step": 3670 + }, + { + "epoch": 0.014867665655288324, + "grad_norm": 836.5162353515625, + "learning_rate": 2.9737373737373743e-06, + "loss": 54.3535, + "step": 3680 + }, + { + "epoch": 0.014908066920655955, + "grad_norm": 756.8634643554688, + "learning_rate": 2.981818181818182e-06, + "loss": 73.0932, + "step": 3690 + }, + { + "epoch": 0.014948468186023587, + "grad_norm": 840.5918579101562, + "learning_rate": 2.98989898989899e-06, + "loss": 119.6765, + "step": 3700 + }, + { + "epoch": 0.014988869451391218, + "grad_norm": 485.3866271972656, + "learning_rate": 2.997979797979798e-06, + "loss": 57.1866, + "step": 3710 + }, + { + "epoch": 0.01502927071675885, + "grad_norm": 1064.0423583984375, + "learning_rate": 3.0060606060606064e-06, + "loss": 65.6966, + "step": 3720 + }, + { + "epoch": 0.01506967198212648, + "grad_norm": 899.6082763671875, + "learning_rate": 3.0141414141414144e-06, + "loss": 77.0435, + "step": 3730 + }, + { + "epoch": 0.015110073247494112, + "grad_norm": 542.0571899414062, + "learning_rate": 3.0222222222222225e-06, + "loss": 61.2996, + "step": 3740 + }, + { + "epoch": 0.015150474512861743, + "grad_norm": 1062.953369140625, + "learning_rate": 3.0303030303030305e-06, + "loss": 81.9733, + "step": 3750 + }, + { + "epoch": 0.015190875778229375, + "grad_norm": 423.0398864746094, + "learning_rate": 3.038383838383839e-06, + "loss": 49.3342, + "step": 3760 + }, + { + "epoch": 0.015231277043597006, + "grad_norm": 587.5816650390625, + "learning_rate": 3.046464646464647e-06, + "loss": 71.2666, + "step": 3770 + }, + { + "epoch": 0.015271678308964637, + "grad_norm": 827.7647705078125, + "learning_rate": 3.054545454545455e-06, + "loss": 58.1244, + "step": 3780 + }, + { + "epoch": 0.015312079574332269, + "grad_norm": 819.0059814453125, + "learning_rate": 3.0626262626262626e-06, + "loss": 69.2099, + "step": 3790 + }, + { + "epoch": 0.0153524808396999, + "grad_norm": 538.2634887695312, + "learning_rate": 3.0707070707070706e-06, + "loss": 41.0661, + "step": 3800 + }, + { + "epoch": 0.015392882105067531, + "grad_norm": 755.6787109375, + "learning_rate": 3.078787878787879e-06, + "loss": 63.0487, + "step": 3810 + }, + { + "epoch": 0.015433283370435162, + "grad_norm": 507.8043518066406, + "learning_rate": 3.086868686868687e-06, + "loss": 49.7004, + "step": 3820 + }, + { + "epoch": 0.015473684635802794, + "grad_norm": 783.618896484375, + "learning_rate": 3.094949494949495e-06, + "loss": 63.6786, + "step": 3830 + }, + { + "epoch": 0.015514085901170425, + "grad_norm": 573.7366943359375, + "learning_rate": 3.103030303030303e-06, + "loss": 51.0182, + "step": 3840 + }, + { + "epoch": 0.015554487166538056, + "grad_norm": 435.3841247558594, + "learning_rate": 3.1111111111111116e-06, + "loss": 58.01, + "step": 3850 + }, + { + "epoch": 0.015594888431905688, + "grad_norm": 979.0087890625, + "learning_rate": 3.1191919191919196e-06, + "loss": 49.2491, + "step": 3860 + }, + { + "epoch": 0.01563528969727332, + "grad_norm": 361.8955078125, + "learning_rate": 3.1272727272727276e-06, + "loss": 56.3938, + "step": 3870 + }, + { + "epoch": 0.01567569096264095, + "grad_norm": 323.97381591796875, + "learning_rate": 3.1353535353535357e-06, + "loss": 49.1725, + "step": 3880 + }, + { + "epoch": 0.01571609222800858, + "grad_norm": 501.0435485839844, + "learning_rate": 3.143434343434344e-06, + "loss": 52.2216, + "step": 3890 + }, + { + "epoch": 0.01575649349337621, + "grad_norm": 1032.385009765625, + "learning_rate": 3.1515151515151517e-06, + "loss": 60.1772, + "step": 3900 + }, + { + "epoch": 0.015796894758743844, + "grad_norm": 400.3407287597656, + "learning_rate": 3.1595959595959597e-06, + "loss": 53.0336, + "step": 3910 + }, + { + "epoch": 0.015837296024111474, + "grad_norm": 961.6312866210938, + "learning_rate": 3.1676767676767678e-06, + "loss": 46.5452, + "step": 3920 + }, + { + "epoch": 0.015877697289479107, + "grad_norm": 697.0581665039062, + "learning_rate": 3.1757575757575758e-06, + "loss": 69.2422, + "step": 3930 + }, + { + "epoch": 0.015918098554846737, + "grad_norm": 744.8246459960938, + "learning_rate": 3.1838383838383842e-06, + "loss": 46.6851, + "step": 3940 + }, + { + "epoch": 0.01595849982021437, + "grad_norm": 454.99267578125, + "learning_rate": 3.1919191919191923e-06, + "loss": 69.5364, + "step": 3950 + }, + { + "epoch": 0.015998901085582, + "grad_norm": 890.9744262695312, + "learning_rate": 3.2000000000000003e-06, + "loss": 70.4928, + "step": 3960 + }, + { + "epoch": 0.016039302350949632, + "grad_norm": 381.5486755371094, + "learning_rate": 3.2080808080808083e-06, + "loss": 52.5701, + "step": 3970 + }, + { + "epoch": 0.016079703616317262, + "grad_norm": 722.4985961914062, + "learning_rate": 3.2161616161616168e-06, + "loss": 39.8047, + "step": 3980 + }, + { + "epoch": 0.016120104881684895, + "grad_norm": 457.2010498046875, + "learning_rate": 3.2242424242424248e-06, + "loss": 73.1557, + "step": 3990 + }, + { + "epoch": 0.016160506147052525, + "grad_norm": 412.5587463378906, + "learning_rate": 3.232323232323233e-06, + "loss": 79.8474, + "step": 4000 + }, + { + "epoch": 0.016200907412420158, + "grad_norm": 432.9417419433594, + "learning_rate": 3.2404040404040404e-06, + "loss": 58.3798, + "step": 4010 + }, + { + "epoch": 0.016241308677787787, + "grad_norm": 883.053466796875, + "learning_rate": 3.2484848484848484e-06, + "loss": 56.1412, + "step": 4020 + }, + { + "epoch": 0.01628170994315542, + "grad_norm": 433.01910400390625, + "learning_rate": 3.256565656565657e-06, + "loss": 78.8629, + "step": 4030 + }, + { + "epoch": 0.01632211120852305, + "grad_norm": 590.1842651367188, + "learning_rate": 3.264646464646465e-06, + "loss": 74.5015, + "step": 4040 + }, + { + "epoch": 0.016362512473890683, + "grad_norm": 1087.239501953125, + "learning_rate": 3.272727272727273e-06, + "loss": 47.2005, + "step": 4050 + }, + { + "epoch": 0.016402913739258312, + "grad_norm": 547.9854736328125, + "learning_rate": 3.280808080808081e-06, + "loss": 88.357, + "step": 4060 + }, + { + "epoch": 0.016443315004625945, + "grad_norm": 1154.8641357421875, + "learning_rate": 3.2888888888888894e-06, + "loss": 55.9507, + "step": 4070 + }, + { + "epoch": 0.016483716269993575, + "grad_norm": 696.9087524414062, + "learning_rate": 3.2969696969696974e-06, + "loss": 56.5099, + "step": 4080 + }, + { + "epoch": 0.016524117535361208, + "grad_norm": 612.0421142578125, + "learning_rate": 3.3050505050505054e-06, + "loss": 73.3457, + "step": 4090 + }, + { + "epoch": 0.016564518800728838, + "grad_norm": 935.5057373046875, + "learning_rate": 3.3131313131313135e-06, + "loss": 50.4606, + "step": 4100 + }, + { + "epoch": 0.01660492006609647, + "grad_norm": 888.5339965820312, + "learning_rate": 3.321212121212121e-06, + "loss": 56.5149, + "step": 4110 + }, + { + "epoch": 0.0166453213314641, + "grad_norm": 392.083740234375, + "learning_rate": 3.3292929292929295e-06, + "loss": 66.5389, + "step": 4120 + }, + { + "epoch": 0.016685722596831733, + "grad_norm": 340.5673522949219, + "learning_rate": 3.3373737373737375e-06, + "loss": 75.7454, + "step": 4130 + }, + { + "epoch": 0.016726123862199363, + "grad_norm": 668.51025390625, + "learning_rate": 3.3454545454545456e-06, + "loss": 52.3896, + "step": 4140 + }, + { + "epoch": 0.016766525127566996, + "grad_norm": 728.2340698242188, + "learning_rate": 3.3535353535353536e-06, + "loss": 80.3662, + "step": 4150 + }, + { + "epoch": 0.016806926392934626, + "grad_norm": 497.7481384277344, + "learning_rate": 3.361616161616162e-06, + "loss": 68.5763, + "step": 4160 + }, + { + "epoch": 0.01684732765830226, + "grad_norm": 589.8648071289062, + "learning_rate": 3.36969696969697e-06, + "loss": 87.7933, + "step": 4170 + }, + { + "epoch": 0.016887728923669888, + "grad_norm": 1049.1983642578125, + "learning_rate": 3.377777777777778e-06, + "loss": 58.2885, + "step": 4180 + }, + { + "epoch": 0.01692813018903752, + "grad_norm": 472.6341247558594, + "learning_rate": 3.385858585858586e-06, + "loss": 48.8271, + "step": 4190 + }, + { + "epoch": 0.01696853145440515, + "grad_norm": 648.27734375, + "learning_rate": 3.3939393939393946e-06, + "loss": 64.2691, + "step": 4200 + }, + { + "epoch": 0.017008932719772784, + "grad_norm": 446.6534729003906, + "learning_rate": 3.4020202020202026e-06, + "loss": 65.1291, + "step": 4210 + }, + { + "epoch": 0.017049333985140414, + "grad_norm": 212.90321350097656, + "learning_rate": 3.41010101010101e-06, + "loss": 41.2398, + "step": 4220 + }, + { + "epoch": 0.017089735250508047, + "grad_norm": 1060.3450927734375, + "learning_rate": 3.4181818181818182e-06, + "loss": 74.0737, + "step": 4230 + }, + { + "epoch": 0.017130136515875676, + "grad_norm": 622.3221435546875, + "learning_rate": 3.4262626262626262e-06, + "loss": 63.4254, + "step": 4240 + }, + { + "epoch": 0.01717053778124331, + "grad_norm": 1164.900634765625, + "learning_rate": 3.4343434343434347e-06, + "loss": 76.6035, + "step": 4250 + }, + { + "epoch": 0.01721093904661094, + "grad_norm": 513.646728515625, + "learning_rate": 3.4424242424242427e-06, + "loss": 66.3594, + "step": 4260 + }, + { + "epoch": 0.017251340311978572, + "grad_norm": 457.9349060058594, + "learning_rate": 3.4505050505050507e-06, + "loss": 55.117, + "step": 4270 + }, + { + "epoch": 0.0172917415773462, + "grad_norm": 786.7095336914062, + "learning_rate": 3.4585858585858588e-06, + "loss": 68.2138, + "step": 4280 + }, + { + "epoch": 0.017332142842713835, + "grad_norm": 131.28732299804688, + "learning_rate": 3.4666666666666672e-06, + "loss": 33.2112, + "step": 4290 + }, + { + "epoch": 0.017372544108081464, + "grad_norm": 1183.358642578125, + "learning_rate": 3.4747474747474752e-06, + "loss": 57.2951, + "step": 4300 + }, + { + "epoch": 0.017412945373449097, + "grad_norm": 770.9044799804688, + "learning_rate": 3.4828282828282833e-06, + "loss": 73.1749, + "step": 4310 + }, + { + "epoch": 0.017453346638816727, + "grad_norm": 920.8295288085938, + "learning_rate": 3.4909090909090913e-06, + "loss": 43.6484, + "step": 4320 + }, + { + "epoch": 0.01749374790418436, + "grad_norm": 613.7767333984375, + "learning_rate": 3.498989898989899e-06, + "loss": 50.3909, + "step": 4330 + }, + { + "epoch": 0.01753414916955199, + "grad_norm": 936.6824951171875, + "learning_rate": 3.5070707070707073e-06, + "loss": 54.8212, + "step": 4340 + }, + { + "epoch": 0.017574550434919622, + "grad_norm": 512.0675048828125, + "learning_rate": 3.5151515151515154e-06, + "loss": 67.7541, + "step": 4350 + }, + { + "epoch": 0.017614951700287252, + "grad_norm": 522.534912109375, + "learning_rate": 3.5232323232323234e-06, + "loss": 59.2163, + "step": 4360 + }, + { + "epoch": 0.017655352965654885, + "grad_norm": 696.8623046875, + "learning_rate": 3.5313131313131314e-06, + "loss": 60.4836, + "step": 4370 + }, + { + "epoch": 0.017695754231022515, + "grad_norm": 370.84930419921875, + "learning_rate": 3.53939393939394e-06, + "loss": 54.7472, + "step": 4380 + }, + { + "epoch": 0.017736155496390148, + "grad_norm": 624.1700439453125, + "learning_rate": 3.547474747474748e-06, + "loss": 41.7864, + "step": 4390 + }, + { + "epoch": 0.017776556761757777, + "grad_norm": 512.7784423828125, + "learning_rate": 3.555555555555556e-06, + "loss": 78.3068, + "step": 4400 + }, + { + "epoch": 0.01781695802712541, + "grad_norm": 596.0513916015625, + "learning_rate": 3.563636363636364e-06, + "loss": 64.5097, + "step": 4410 + }, + { + "epoch": 0.01785735929249304, + "grad_norm": 603.3121948242188, + "learning_rate": 3.5717171717171724e-06, + "loss": 44.8857, + "step": 4420 + }, + { + "epoch": 0.017897760557860673, + "grad_norm": 702.3116455078125, + "learning_rate": 3.57979797979798e-06, + "loss": 66.0556, + "step": 4430 + }, + { + "epoch": 0.017938161823228303, + "grad_norm": 762.5350952148438, + "learning_rate": 3.587878787878788e-06, + "loss": 54.1978, + "step": 4440 + }, + { + "epoch": 0.017978563088595936, + "grad_norm": 578.8521728515625, + "learning_rate": 3.595959595959596e-06, + "loss": 63.9503, + "step": 4450 + }, + { + "epoch": 0.018018964353963565, + "grad_norm": 514.289306640625, + "learning_rate": 3.604040404040404e-06, + "loss": 63.0047, + "step": 4460 + }, + { + "epoch": 0.0180593656193312, + "grad_norm": 760.0272216796875, + "learning_rate": 3.6121212121212125e-06, + "loss": 77.3559, + "step": 4470 + }, + { + "epoch": 0.018099766884698828, + "grad_norm": 1489.2890625, + "learning_rate": 3.6202020202020205e-06, + "loss": 67.4747, + "step": 4480 + }, + { + "epoch": 0.01814016815006646, + "grad_norm": 639.5055541992188, + "learning_rate": 3.6282828282828286e-06, + "loss": 62.7511, + "step": 4490 + }, + { + "epoch": 0.01818056941543409, + "grad_norm": 459.6601257324219, + "learning_rate": 3.6363636363636366e-06, + "loss": 60.6712, + "step": 4500 + }, + { + "epoch": 0.018220970680801724, + "grad_norm": 614.9654541015625, + "learning_rate": 3.644444444444445e-06, + "loss": 54.301, + "step": 4510 + }, + { + "epoch": 0.018261371946169353, + "grad_norm": 439.7833557128906, + "learning_rate": 3.652525252525253e-06, + "loss": 43.6239, + "step": 4520 + }, + { + "epoch": 0.018301773211536986, + "grad_norm": 796.3854370117188, + "learning_rate": 3.660606060606061e-06, + "loss": 61.3992, + "step": 4530 + }, + { + "epoch": 0.018342174476904616, + "grad_norm": 343.8157653808594, + "learning_rate": 3.6686868686868687e-06, + "loss": 65.5689, + "step": 4540 + }, + { + "epoch": 0.01838257574227225, + "grad_norm": 1090.6224365234375, + "learning_rate": 3.6767676767676767e-06, + "loss": 76.7682, + "step": 4550 + }, + { + "epoch": 0.01842297700763988, + "grad_norm": 599.2227783203125, + "learning_rate": 3.684848484848485e-06, + "loss": 68.6533, + "step": 4560 + }, + { + "epoch": 0.01846337827300751, + "grad_norm": 679.0711669921875, + "learning_rate": 3.692929292929293e-06, + "loss": 65.9553, + "step": 4570 + }, + { + "epoch": 0.01850377953837514, + "grad_norm": 979.4942626953125, + "learning_rate": 3.701010101010101e-06, + "loss": 67.687, + "step": 4580 + }, + { + "epoch": 0.018544180803742774, + "grad_norm": 210.69126892089844, + "learning_rate": 3.7090909090909092e-06, + "loss": 59.4653, + "step": 4590 + }, + { + "epoch": 0.018584582069110404, + "grad_norm": 434.259033203125, + "learning_rate": 3.7171717171717177e-06, + "loss": 65.9339, + "step": 4600 + }, + { + "epoch": 0.018624983334478037, + "grad_norm": 854.1275024414062, + "learning_rate": 3.7252525252525257e-06, + "loss": 45.3619, + "step": 4610 + }, + { + "epoch": 0.018665384599845666, + "grad_norm": 311.56829833984375, + "learning_rate": 3.7333333333333337e-06, + "loss": 38.0432, + "step": 4620 + }, + { + "epoch": 0.0187057858652133, + "grad_norm": 498.4246520996094, + "learning_rate": 3.7414141414141418e-06, + "loss": 48.5221, + "step": 4630 + }, + { + "epoch": 0.01874618713058093, + "grad_norm": 801.4674072265625, + "learning_rate": 3.74949494949495e-06, + "loss": 58.1732, + "step": 4640 + }, + { + "epoch": 0.018786588395948562, + "grad_norm": 1160.4736328125, + "learning_rate": 3.757575757575758e-06, + "loss": 53.0421, + "step": 4650 + }, + { + "epoch": 0.01882698966131619, + "grad_norm": 897.707763671875, + "learning_rate": 3.765656565656566e-06, + "loss": 79.2011, + "step": 4660 + }, + { + "epoch": 0.018867390926683825, + "grad_norm": 400.8449401855469, + "learning_rate": 3.773737373737374e-06, + "loss": 60.0867, + "step": 4670 + }, + { + "epoch": 0.018907792192051454, + "grad_norm": 739.8035888671875, + "learning_rate": 3.781818181818182e-06, + "loss": 44.1463, + "step": 4680 + }, + { + "epoch": 0.018948193457419087, + "grad_norm": 1055.0322265625, + "learning_rate": 3.7898989898989903e-06, + "loss": 57.8223, + "step": 4690 + }, + { + "epoch": 0.018988594722786717, + "grad_norm": 1063.527587890625, + "learning_rate": 3.7979797979797984e-06, + "loss": 77.483, + "step": 4700 + }, + { + "epoch": 0.01902899598815435, + "grad_norm": 560.8001708984375, + "learning_rate": 3.8060606060606064e-06, + "loss": 53.0922, + "step": 4710 + }, + { + "epoch": 0.01906939725352198, + "grad_norm": 692.53466796875, + "learning_rate": 3.8141414141414144e-06, + "loss": 51.8766, + "step": 4720 + }, + { + "epoch": 0.019109798518889613, + "grad_norm": 1038.6710205078125, + "learning_rate": 3.8222222222222224e-06, + "loss": 40.9016, + "step": 4730 + }, + { + "epoch": 0.019150199784257242, + "grad_norm": 739.777099609375, + "learning_rate": 3.830303030303031e-06, + "loss": 71.4101, + "step": 4740 + }, + { + "epoch": 0.019190601049624875, + "grad_norm": 540.2933959960938, + "learning_rate": 3.8383838383838385e-06, + "loss": 47.5912, + "step": 4750 + }, + { + "epoch": 0.019231002314992505, + "grad_norm": 454.8396911621094, + "learning_rate": 3.846464646464647e-06, + "loss": 49.2231, + "step": 4760 + }, + { + "epoch": 0.019271403580360138, + "grad_norm": 538.3920288085938, + "learning_rate": 3.8545454545454545e-06, + "loss": 56.6818, + "step": 4770 + }, + { + "epoch": 0.019311804845727767, + "grad_norm": 567.5830078125, + "learning_rate": 3.862626262626263e-06, + "loss": 69.7806, + "step": 4780 + }, + { + "epoch": 0.0193522061110954, + "grad_norm": 234.95065307617188, + "learning_rate": 3.8707070707070706e-06, + "loss": 40.516, + "step": 4790 + }, + { + "epoch": 0.01939260737646303, + "grad_norm": 947.2410888671875, + "learning_rate": 3.878787878787879e-06, + "loss": 52.9704, + "step": 4800 + }, + { + "epoch": 0.019433008641830663, + "grad_norm": 435.2585754394531, + "learning_rate": 3.8868686868686875e-06, + "loss": 48.7174, + "step": 4810 + }, + { + "epoch": 0.019473409907198293, + "grad_norm": 815.7978515625, + "learning_rate": 3.894949494949495e-06, + "loss": 55.1616, + "step": 4820 + }, + { + "epoch": 0.019513811172565926, + "grad_norm": 1187.990478515625, + "learning_rate": 3.9030303030303035e-06, + "loss": 67.6024, + "step": 4830 + }, + { + "epoch": 0.019554212437933555, + "grad_norm": 1282.373046875, + "learning_rate": 3.911111111111112e-06, + "loss": 50.7462, + "step": 4840 + }, + { + "epoch": 0.01959461370330119, + "grad_norm": 662.55908203125, + "learning_rate": 3.9191919191919196e-06, + "loss": 64.9835, + "step": 4850 + }, + { + "epoch": 0.019635014968668818, + "grad_norm": 443.5242614746094, + "learning_rate": 3.927272727272727e-06, + "loss": 37.5332, + "step": 4860 + }, + { + "epoch": 0.01967541623403645, + "grad_norm": 1131.5301513671875, + "learning_rate": 3.935353535353536e-06, + "loss": 82.1557, + "step": 4870 + }, + { + "epoch": 0.01971581749940408, + "grad_norm": 799.3508911132812, + "learning_rate": 3.943434343434343e-06, + "loss": 51.2066, + "step": 4880 + }, + { + "epoch": 0.019756218764771714, + "grad_norm": 510.3349914550781, + "learning_rate": 3.951515151515152e-06, + "loss": 43.9406, + "step": 4890 + }, + { + "epoch": 0.019796620030139343, + "grad_norm": 549.172119140625, + "learning_rate": 3.95959595959596e-06, + "loss": 65.3976, + "step": 4900 + }, + { + "epoch": 0.019837021295506976, + "grad_norm": 763.05224609375, + "learning_rate": 3.967676767676768e-06, + "loss": 55.32, + "step": 4910 + }, + { + "epoch": 0.019877422560874606, + "grad_norm": 427.2909851074219, + "learning_rate": 3.975757575757576e-06, + "loss": 39.9464, + "step": 4920 + }, + { + "epoch": 0.01991782382624224, + "grad_norm": 838.1200561523438, + "learning_rate": 3.983838383838385e-06, + "loss": 50.7108, + "step": 4930 + }, + { + "epoch": 0.01995822509160987, + "grad_norm": 784.1903686523438, + "learning_rate": 3.991919191919192e-06, + "loss": 75.4968, + "step": 4940 + }, + { + "epoch": 0.0199986263569775, + "grad_norm": 336.4251708984375, + "learning_rate": 4.000000000000001e-06, + "loss": 60.2289, + "step": 4950 + }, + { + "epoch": 0.02003902762234513, + "grad_norm": 976.969970703125, + "learning_rate": 4.008080808080808e-06, + "loss": 50.0926, + "step": 4960 + }, + { + "epoch": 0.020079428887712764, + "grad_norm": 1279.132568359375, + "learning_rate": 4.016161616161616e-06, + "loss": 60.1504, + "step": 4970 + }, + { + "epoch": 0.020119830153080394, + "grad_norm": 711.7320556640625, + "learning_rate": 4.024242424242424e-06, + "loss": 49.4476, + "step": 4980 + }, + { + "epoch": 0.020160231418448027, + "grad_norm": 423.693603515625, + "learning_rate": 4.032323232323233e-06, + "loss": 62.753, + "step": 4990 + }, + { + "epoch": 0.020200632683815656, + "grad_norm": 613.124267578125, + "learning_rate": 4.04040404040404e-06, + "loss": 49.9839, + "step": 5000 + }, + { + "epoch": 0.02024103394918329, + "grad_norm": 984.8370971679688, + "learning_rate": 4.048484848484849e-06, + "loss": 60.4485, + "step": 5010 + }, + { + "epoch": 0.02028143521455092, + "grad_norm": 614.8654174804688, + "learning_rate": 4.056565656565657e-06, + "loss": 50.103, + "step": 5020 + }, + { + "epoch": 0.020321836479918552, + "grad_norm": 766.6077270507812, + "learning_rate": 4.064646464646465e-06, + "loss": 71.7173, + "step": 5030 + }, + { + "epoch": 0.020362237745286182, + "grad_norm": 1153.7392578125, + "learning_rate": 4.072727272727273e-06, + "loss": 70.789, + "step": 5040 + }, + { + "epoch": 0.020402639010653815, + "grad_norm": 788.94189453125, + "learning_rate": 4.080808080808081e-06, + "loss": 54.5462, + "step": 5050 + }, + { + "epoch": 0.020443040276021444, + "grad_norm": 1231.907958984375, + "learning_rate": 4.088888888888889e-06, + "loss": 62.8956, + "step": 5060 + }, + { + "epoch": 0.020483441541389077, + "grad_norm": 335.8381652832031, + "learning_rate": 4.096969696969697e-06, + "loss": 48.22, + "step": 5070 + }, + { + "epoch": 0.020523842806756707, + "grad_norm": 2388.536376953125, + "learning_rate": 4.105050505050505e-06, + "loss": 68.5397, + "step": 5080 + }, + { + "epoch": 0.02056424407212434, + "grad_norm": 880.5589599609375, + "learning_rate": 4.113131313131313e-06, + "loss": 51.2133, + "step": 5090 + }, + { + "epoch": 0.02060464533749197, + "grad_norm": 444.1842956542969, + "learning_rate": 4.1212121212121215e-06, + "loss": 45.3639, + "step": 5100 + }, + { + "epoch": 0.020645046602859603, + "grad_norm": 497.71258544921875, + "learning_rate": 4.12929292929293e-06, + "loss": 65.4038, + "step": 5110 + }, + { + "epoch": 0.020685447868227232, + "grad_norm": 620.34912109375, + "learning_rate": 4.1373737373737375e-06, + "loss": 49.9131, + "step": 5120 + }, + { + "epoch": 0.020725849133594865, + "grad_norm": 902.3446655273438, + "learning_rate": 4.145454545454546e-06, + "loss": 45.9957, + "step": 5130 + }, + { + "epoch": 0.020766250398962495, + "grad_norm": 505.1466064453125, + "learning_rate": 4.1535353535353536e-06, + "loss": 43.1903, + "step": 5140 + }, + { + "epoch": 0.020806651664330128, + "grad_norm": 401.0746765136719, + "learning_rate": 4.161616161616162e-06, + "loss": 59.4582, + "step": 5150 + }, + { + "epoch": 0.020847052929697758, + "grad_norm": 622.1741943359375, + "learning_rate": 4.1696969696969705e-06, + "loss": 42.9341, + "step": 5160 + }, + { + "epoch": 0.02088745419506539, + "grad_norm": 633.169921875, + "learning_rate": 4.177777777777778e-06, + "loss": 64.0073, + "step": 5170 + }, + { + "epoch": 0.02092785546043302, + "grad_norm": 709.371337890625, + "learning_rate": 4.185858585858586e-06, + "loss": 55.1911, + "step": 5180 + }, + { + "epoch": 0.020968256725800653, + "grad_norm": 536.787841796875, + "learning_rate": 4.193939393939394e-06, + "loss": 59.7044, + "step": 5190 + }, + { + "epoch": 0.021008657991168283, + "grad_norm": 710.5322875976562, + "learning_rate": 4.2020202020202026e-06, + "loss": 55.8415, + "step": 5200 + }, + { + "epoch": 0.021049059256535916, + "grad_norm": 747.7606811523438, + "learning_rate": 4.21010101010101e-06, + "loss": 46.1033, + "step": 5210 + }, + { + "epoch": 0.021089460521903546, + "grad_norm": 470.2503356933594, + "learning_rate": 4.218181818181819e-06, + "loss": 51.2424, + "step": 5220 + }, + { + "epoch": 0.02112986178727118, + "grad_norm": 723.833984375, + "learning_rate": 4.226262626262626e-06, + "loss": 51.1689, + "step": 5230 + }, + { + "epoch": 0.021170263052638808, + "grad_norm": 692.9503173828125, + "learning_rate": 4.234343434343435e-06, + "loss": 66.355, + "step": 5240 + }, + { + "epoch": 0.02121066431800644, + "grad_norm": 3034.588623046875, + "learning_rate": 4.242424242424243e-06, + "loss": 85.8846, + "step": 5250 + }, + { + "epoch": 0.02125106558337407, + "grad_norm": 912.686767578125, + "learning_rate": 4.250505050505051e-06, + "loss": 69.7366, + "step": 5260 + }, + { + "epoch": 0.021291466848741704, + "grad_norm": 941.4260864257812, + "learning_rate": 4.258585858585859e-06, + "loss": 67.8435, + "step": 5270 + }, + { + "epoch": 0.021331868114109333, + "grad_norm": 484.5862731933594, + "learning_rate": 4.266666666666668e-06, + "loss": 56.8078, + "step": 5280 + }, + { + "epoch": 0.021372269379476967, + "grad_norm": 703.8028564453125, + "learning_rate": 4.274747474747475e-06, + "loss": 56.7962, + "step": 5290 + }, + { + "epoch": 0.021412670644844596, + "grad_norm": 406.5282897949219, + "learning_rate": 4.282828282828283e-06, + "loss": 50.0921, + "step": 5300 + }, + { + "epoch": 0.02145307191021223, + "grad_norm": 909.0770263671875, + "learning_rate": 4.290909090909091e-06, + "loss": 68.9491, + "step": 5310 + }, + { + "epoch": 0.02149347317557986, + "grad_norm": 605.3233642578125, + "learning_rate": 4.298989898989899e-06, + "loss": 48.6208, + "step": 5320 + }, + { + "epoch": 0.021533874440947492, + "grad_norm": 360.8162536621094, + "learning_rate": 4.307070707070707e-06, + "loss": 67.9922, + "step": 5330 + }, + { + "epoch": 0.02157427570631512, + "grad_norm": 547.1395263671875, + "learning_rate": 4.315151515151516e-06, + "loss": 49.9828, + "step": 5340 + }, + { + "epoch": 0.021614676971682754, + "grad_norm": 298.53076171875, + "learning_rate": 4.323232323232323e-06, + "loss": 51.9223, + "step": 5350 + }, + { + "epoch": 0.021655078237050384, + "grad_norm": 239.08287048339844, + "learning_rate": 4.331313131313132e-06, + "loss": 93.9693, + "step": 5360 + }, + { + "epoch": 0.021695479502418017, + "grad_norm": 400.78326416015625, + "learning_rate": 4.33939393939394e-06, + "loss": 61.8255, + "step": 5370 + }, + { + "epoch": 0.021735880767785647, + "grad_norm": 844.6863403320312, + "learning_rate": 4.347474747474748e-06, + "loss": 80.0776, + "step": 5380 + }, + { + "epoch": 0.02177628203315328, + "grad_norm": 1113.2698974609375, + "learning_rate": 4.3555555555555555e-06, + "loss": 54.753, + "step": 5390 + }, + { + "epoch": 0.02181668329852091, + "grad_norm": 776.0969848632812, + "learning_rate": 4.363636363636364e-06, + "loss": 63.0497, + "step": 5400 + }, + { + "epoch": 0.021857084563888542, + "grad_norm": 448.30889892578125, + "learning_rate": 4.3717171717171715e-06, + "loss": 67.8514, + "step": 5410 + }, + { + "epoch": 0.021897485829256172, + "grad_norm": 1007.500732421875, + "learning_rate": 4.37979797979798e-06, + "loss": 63.2784, + "step": 5420 + }, + { + "epoch": 0.021937887094623805, + "grad_norm": 755.4855346679688, + "learning_rate": 4.387878787878788e-06, + "loss": 64.7918, + "step": 5430 + }, + { + "epoch": 0.021978288359991435, + "grad_norm": 468.41009521484375, + "learning_rate": 4.395959595959596e-06, + "loss": 64.8188, + "step": 5440 + }, + { + "epoch": 0.022018689625359068, + "grad_norm": 952.0288696289062, + "learning_rate": 4.4040404040404044e-06, + "loss": 66.8628, + "step": 5450 + }, + { + "epoch": 0.022059090890726697, + "grad_norm": 921.4072875976562, + "learning_rate": 4.412121212121213e-06, + "loss": 69.8722, + "step": 5460 + }, + { + "epoch": 0.02209949215609433, + "grad_norm": 971.798583984375, + "learning_rate": 4.4202020202020205e-06, + "loss": 45.01, + "step": 5470 + }, + { + "epoch": 0.02213989342146196, + "grad_norm": 2802.30419921875, + "learning_rate": 4.428282828282829e-06, + "loss": 70.787, + "step": 5480 + }, + { + "epoch": 0.022180294686829593, + "grad_norm": 528.7456665039062, + "learning_rate": 4.436363636363637e-06, + "loss": 52.1586, + "step": 5490 + }, + { + "epoch": 0.022220695952197222, + "grad_norm": 775.1383056640625, + "learning_rate": 4.444444444444444e-06, + "loss": 62.1388, + "step": 5500 + }, + { + "epoch": 0.022261097217564856, + "grad_norm": 653.5880737304688, + "learning_rate": 4.452525252525253e-06, + "loss": 39.7751, + "step": 5510 + }, + { + "epoch": 0.022301498482932485, + "grad_norm": 493.3662109375, + "learning_rate": 4.460606060606061e-06, + "loss": 49.2763, + "step": 5520 + }, + { + "epoch": 0.022341899748300118, + "grad_norm": 717.1765747070312, + "learning_rate": 4.468686868686869e-06, + "loss": 56.1161, + "step": 5530 + }, + { + "epoch": 0.022382301013667748, + "grad_norm": 907.2646484375, + "learning_rate": 4.476767676767677e-06, + "loss": 52.5763, + "step": 5540 + }, + { + "epoch": 0.02242270227903538, + "grad_norm": 680.5430908203125, + "learning_rate": 4.4848484848484855e-06, + "loss": 64.6257, + "step": 5550 + }, + { + "epoch": 0.02246310354440301, + "grad_norm": 587.7493896484375, + "learning_rate": 4.492929292929293e-06, + "loss": 66.1097, + "step": 5560 + }, + { + "epoch": 0.022503504809770643, + "grad_norm": 351.2096252441406, + "learning_rate": 4.501010101010102e-06, + "loss": 35.0575, + "step": 5570 + }, + { + "epoch": 0.022543906075138273, + "grad_norm": 868.7583618164062, + "learning_rate": 4.50909090909091e-06, + "loss": 52.3459, + "step": 5580 + }, + { + "epoch": 0.022584307340505906, + "grad_norm": 413.28155517578125, + "learning_rate": 4.517171717171718e-06, + "loss": 56.9695, + "step": 5590 + }, + { + "epoch": 0.022624708605873536, + "grad_norm": 649.5818481445312, + "learning_rate": 4.525252525252526e-06, + "loss": 49.7981, + "step": 5600 + }, + { + "epoch": 0.02266510987124117, + "grad_norm": 1128.8115234375, + "learning_rate": 4.533333333333334e-06, + "loss": 55.7752, + "step": 5610 + }, + { + "epoch": 0.0227055111366088, + "grad_norm": 1171.7386474609375, + "learning_rate": 4.541414141414141e-06, + "loss": 78.1653, + "step": 5620 + }, + { + "epoch": 0.02274591240197643, + "grad_norm": 988.4644775390625, + "learning_rate": 4.54949494949495e-06, + "loss": 52.3605, + "step": 5630 + }, + { + "epoch": 0.02278631366734406, + "grad_norm": 667.2089233398438, + "learning_rate": 4.557575757575758e-06, + "loss": 58.1055, + "step": 5640 + }, + { + "epoch": 0.022826714932711694, + "grad_norm": 1140.114990234375, + "learning_rate": 4.565656565656566e-06, + "loss": 68.9946, + "step": 5650 + }, + { + "epoch": 0.022867116198079324, + "grad_norm": 975.0390014648438, + "learning_rate": 4.573737373737374e-06, + "loss": 65.694, + "step": 5660 + }, + { + "epoch": 0.022907517463446957, + "grad_norm": 974.479736328125, + "learning_rate": 4.581818181818183e-06, + "loss": 48.7883, + "step": 5670 + }, + { + "epoch": 0.022947918728814586, + "grad_norm": 474.9225158691406, + "learning_rate": 4.58989898989899e-06, + "loss": 47.1188, + "step": 5680 + }, + { + "epoch": 0.02298831999418222, + "grad_norm": 1234.3985595703125, + "learning_rate": 4.597979797979799e-06, + "loss": 76.4564, + "step": 5690 + }, + { + "epoch": 0.02302872125954985, + "grad_norm": 937.4863891601562, + "learning_rate": 4.606060606060606e-06, + "loss": 53.0915, + "step": 5700 + }, + { + "epoch": 0.023069122524917482, + "grad_norm": 687.7938842773438, + "learning_rate": 4.614141414141414e-06, + "loss": 58.9133, + "step": 5710 + }, + { + "epoch": 0.02310952379028511, + "grad_norm": 445.2677001953125, + "learning_rate": 4.622222222222222e-06, + "loss": 47.943, + "step": 5720 + }, + { + "epoch": 0.023149925055652745, + "grad_norm": 380.9918212890625, + "learning_rate": 4.630303030303031e-06, + "loss": 54.5093, + "step": 5730 + }, + { + "epoch": 0.023190326321020374, + "grad_norm": 399.1734924316406, + "learning_rate": 4.6383838383838384e-06, + "loss": 60.505, + "step": 5740 + }, + { + "epoch": 0.023230727586388007, + "grad_norm": 1005.51953125, + "learning_rate": 4.646464646464647e-06, + "loss": 53.1637, + "step": 5750 + }, + { + "epoch": 0.023271128851755637, + "grad_norm": 1141.5557861328125, + "learning_rate": 4.654545454545455e-06, + "loss": 85.439, + "step": 5760 + }, + { + "epoch": 0.02331153011712327, + "grad_norm": 719.6810913085938, + "learning_rate": 4.662626262626263e-06, + "loss": 53.3294, + "step": 5770 + }, + { + "epoch": 0.0233519313824909, + "grad_norm": 804.4564208984375, + "learning_rate": 4.670707070707071e-06, + "loss": 58.7554, + "step": 5780 + }, + { + "epoch": 0.023392332647858533, + "grad_norm": 738.3641967773438, + "learning_rate": 4.678787878787879e-06, + "loss": 58.9601, + "step": 5790 + }, + { + "epoch": 0.023432733913226162, + "grad_norm": 442.52630615234375, + "learning_rate": 4.6868686868686874e-06, + "loss": 58.0687, + "step": 5800 + }, + { + "epoch": 0.023473135178593795, + "grad_norm": 684.3228759765625, + "learning_rate": 4.694949494949496e-06, + "loss": 57.9766, + "step": 5810 + }, + { + "epoch": 0.023513536443961425, + "grad_norm": 1048.6868896484375, + "learning_rate": 4.7030303030303035e-06, + "loss": 65.6569, + "step": 5820 + }, + { + "epoch": 0.023553937709329058, + "grad_norm": 417.07696533203125, + "learning_rate": 4.711111111111111e-06, + "loss": 49.7707, + "step": 5830 + }, + { + "epoch": 0.023594338974696687, + "grad_norm": 1276.1221923828125, + "learning_rate": 4.7191919191919195e-06, + "loss": 51.5833, + "step": 5840 + }, + { + "epoch": 0.02363474024006432, + "grad_norm": 1004.6600341796875, + "learning_rate": 4.727272727272728e-06, + "loss": 84.6636, + "step": 5850 + }, + { + "epoch": 0.02367514150543195, + "grad_norm": 709.0186157226562, + "learning_rate": 4.735353535353536e-06, + "loss": 62.0867, + "step": 5860 + }, + { + "epoch": 0.023715542770799583, + "grad_norm": 817.0234375, + "learning_rate": 4.743434343434344e-06, + "loss": 52.1775, + "step": 5870 + }, + { + "epoch": 0.023755944036167213, + "grad_norm": 246.22628784179688, + "learning_rate": 4.751515151515152e-06, + "loss": 62.8754, + "step": 5880 + }, + { + "epoch": 0.023796345301534846, + "grad_norm": 927.6898193359375, + "learning_rate": 4.75959595959596e-06, + "loss": 54.2966, + "step": 5890 + }, + { + "epoch": 0.023836746566902475, + "grad_norm": 1191.2314453125, + "learning_rate": 4.7676767676767685e-06, + "loss": 102.11, + "step": 5900 + }, + { + "epoch": 0.02387714783227011, + "grad_norm": 573.662109375, + "learning_rate": 4.775757575757576e-06, + "loss": 63.7849, + "step": 5910 + }, + { + "epoch": 0.023917549097637738, + "grad_norm": 677.00732421875, + "learning_rate": 4.783838383838385e-06, + "loss": 32.899, + "step": 5920 + }, + { + "epoch": 0.02395795036300537, + "grad_norm": 739.2052001953125, + "learning_rate": 4.791919191919192e-06, + "loss": 42.3564, + "step": 5930 + }, + { + "epoch": 0.023998351628373, + "grad_norm": 591.5802001953125, + "learning_rate": 4.800000000000001e-06, + "loss": 54.1494, + "step": 5940 + }, + { + "epoch": 0.024038752893740634, + "grad_norm": 676.9740600585938, + "learning_rate": 4.808080808080808e-06, + "loss": 51.5359, + "step": 5950 + }, + { + "epoch": 0.024079154159108263, + "grad_norm": 688.4370727539062, + "learning_rate": 4.816161616161617e-06, + "loss": 65.8977, + "step": 5960 + }, + { + "epoch": 0.024119555424475896, + "grad_norm": 881.8679809570312, + "learning_rate": 4.824242424242424e-06, + "loss": 55.0991, + "step": 5970 + }, + { + "epoch": 0.024159956689843526, + "grad_norm": 994.0008544921875, + "learning_rate": 4.832323232323233e-06, + "loss": 59.3008, + "step": 5980 + }, + { + "epoch": 0.02420035795521116, + "grad_norm": 653.2454223632812, + "learning_rate": 4.840404040404041e-06, + "loss": 27.4822, + "step": 5990 + }, + { + "epoch": 0.02424075922057879, + "grad_norm": 288.7889404296875, + "learning_rate": 4.848484848484849e-06, + "loss": 50.8106, + "step": 6000 + }, + { + "epoch": 0.02428116048594642, + "grad_norm": 615.9229125976562, + "learning_rate": 4.856565656565657e-06, + "loss": 53.6111, + "step": 6010 + }, + { + "epoch": 0.02432156175131405, + "grad_norm": 957.2933349609375, + "learning_rate": 4.864646464646466e-06, + "loss": 58.5743, + "step": 6020 + }, + { + "epoch": 0.024361963016681684, + "grad_norm": 1033.9827880859375, + "learning_rate": 4.872727272727273e-06, + "loss": 69.179, + "step": 6030 + }, + { + "epoch": 0.024402364282049314, + "grad_norm": 730.6578979492188, + "learning_rate": 4.880808080808081e-06, + "loss": 46.347, + "step": 6040 + }, + { + "epoch": 0.024442765547416947, + "grad_norm": 446.7236022949219, + "learning_rate": 4.888888888888889e-06, + "loss": 46.9909, + "step": 6050 + }, + { + "epoch": 0.024483166812784576, + "grad_norm": 411.31451416015625, + "learning_rate": 4.896969696969697e-06, + "loss": 50.5173, + "step": 6060 + }, + { + "epoch": 0.02452356807815221, + "grad_norm": 962.7041625976562, + "learning_rate": 4.905050505050505e-06, + "loss": 66.097, + "step": 6070 + }, + { + "epoch": 0.02456396934351984, + "grad_norm": 950.1348876953125, + "learning_rate": 4.913131313131314e-06, + "loss": 66.6417, + "step": 6080 + }, + { + "epoch": 0.02460437060888747, + "grad_norm": 651.8181762695312, + "learning_rate": 4.9212121212121214e-06, + "loss": 46.0087, + "step": 6090 + }, + { + "epoch": 0.0246447718742551, + "grad_norm": 925.5591430664062, + "learning_rate": 4.92929292929293e-06, + "loss": 60.3818, + "step": 6100 + }, + { + "epoch": 0.02468517313962273, + "grad_norm": 683.6544799804688, + "learning_rate": 4.937373737373738e-06, + "loss": 61.0053, + "step": 6110 + }, + { + "epoch": 0.024725574404990364, + "grad_norm": 627.9014892578125, + "learning_rate": 4.945454545454546e-06, + "loss": 66.3312, + "step": 6120 + }, + { + "epoch": 0.024765975670357994, + "grad_norm": 835.2926025390625, + "learning_rate": 4.953535353535354e-06, + "loss": 64.1998, + "step": 6130 + }, + { + "epoch": 0.024806376935725627, + "grad_norm": 979.1827392578125, + "learning_rate": 4.961616161616162e-06, + "loss": 62.5498, + "step": 6140 + }, + { + "epoch": 0.024846778201093257, + "grad_norm": 491.91552734375, + "learning_rate": 4.9696969696969696e-06, + "loss": 59.011, + "step": 6150 + }, + { + "epoch": 0.02488717946646089, + "grad_norm": 494.9542541503906, + "learning_rate": 4.977777777777778e-06, + "loss": 76.8512, + "step": 6160 + }, + { + "epoch": 0.02492758073182852, + "grad_norm": 627.1669311523438, + "learning_rate": 4.9858585858585865e-06, + "loss": 64.9759, + "step": 6170 + }, + { + "epoch": 0.024967981997196152, + "grad_norm": 668.6937866210938, + "learning_rate": 4.993939393939394e-06, + "loss": 64.4498, + "step": 6180 + }, + { + "epoch": 0.025008383262563782, + "grad_norm": 561.9140014648438, + "learning_rate": 5.0020202020202025e-06, + "loss": 49.3964, + "step": 6190 + }, + { + "epoch": 0.025048784527931415, + "grad_norm": 549.7935180664062, + "learning_rate": 5.010101010101011e-06, + "loss": 64.5368, + "step": 6200 + }, + { + "epoch": 0.025089185793299044, + "grad_norm": 2008.2236328125, + "learning_rate": 5.0181818181818186e-06, + "loss": 53.3066, + "step": 6210 + }, + { + "epoch": 0.025129587058666678, + "grad_norm": 431.6338806152344, + "learning_rate": 5.026262626262627e-06, + "loss": 78.1808, + "step": 6220 + }, + { + "epoch": 0.025169988324034307, + "grad_norm": 277.0874328613281, + "learning_rate": 5.034343434343435e-06, + "loss": 51.0862, + "step": 6230 + }, + { + "epoch": 0.02521038958940194, + "grad_norm": 601.8732299804688, + "learning_rate": 5.042424242424243e-06, + "loss": 43.3031, + "step": 6240 + }, + { + "epoch": 0.02525079085476957, + "grad_norm": 357.4873352050781, + "learning_rate": 5.0505050505050515e-06, + "loss": 71.1351, + "step": 6250 + }, + { + "epoch": 0.025291192120137203, + "grad_norm": 666.0474243164062, + "learning_rate": 5.058585858585859e-06, + "loss": 48.8687, + "step": 6260 + }, + { + "epoch": 0.025331593385504832, + "grad_norm": 2769.57568359375, + "learning_rate": 5.0666666666666676e-06, + "loss": 85.213, + "step": 6270 + }, + { + "epoch": 0.025371994650872465, + "grad_norm": 1185.662841796875, + "learning_rate": 5.074747474747476e-06, + "loss": 64.6072, + "step": 6280 + }, + { + "epoch": 0.025412395916240095, + "grad_norm": 593.606689453125, + "learning_rate": 5.082828282828284e-06, + "loss": 45.3778, + "step": 6290 + }, + { + "epoch": 0.025452797181607728, + "grad_norm": 685.597900390625, + "learning_rate": 5.090909090909091e-06, + "loss": 63.415, + "step": 6300 + }, + { + "epoch": 0.025493198446975358, + "grad_norm": 779.54150390625, + "learning_rate": 5.098989898989899e-06, + "loss": 42.4922, + "step": 6310 + }, + { + "epoch": 0.02553359971234299, + "grad_norm": 748.0287475585938, + "learning_rate": 5.107070707070707e-06, + "loss": 54.5869, + "step": 6320 + }, + { + "epoch": 0.02557400097771062, + "grad_norm": 1047.96630859375, + "learning_rate": 5.115151515151515e-06, + "loss": 71.8537, + "step": 6330 + }, + { + "epoch": 0.025614402243078253, + "grad_norm": 481.7173156738281, + "learning_rate": 5.123232323232323e-06, + "loss": 53.4357, + "step": 6340 + }, + { + "epoch": 0.025654803508445883, + "grad_norm": 469.4927673339844, + "learning_rate": 5.131313131313132e-06, + "loss": 55.997, + "step": 6350 + }, + { + "epoch": 0.025695204773813516, + "grad_norm": 882.4183349609375, + "learning_rate": 5.139393939393939e-06, + "loss": 61.6789, + "step": 6360 + }, + { + "epoch": 0.025735606039181146, + "grad_norm": 710.2070922851562, + "learning_rate": 5.147474747474748e-06, + "loss": 50.3809, + "step": 6370 + }, + { + "epoch": 0.02577600730454878, + "grad_norm": 979.2042846679688, + "learning_rate": 5.155555555555556e-06, + "loss": 57.65, + "step": 6380 + }, + { + "epoch": 0.025816408569916408, + "grad_norm": 3295.435302734375, + "learning_rate": 5.163636363636364e-06, + "loss": 95.8132, + "step": 6390 + }, + { + "epoch": 0.02585680983528404, + "grad_norm": 612.2908935546875, + "learning_rate": 5.171717171717172e-06, + "loss": 44.9435, + "step": 6400 + }, + { + "epoch": 0.02589721110065167, + "grad_norm": 163.25262451171875, + "learning_rate": 5.17979797979798e-06, + "loss": 43.0925, + "step": 6410 + }, + { + "epoch": 0.025937612366019304, + "grad_norm": 727.7279663085938, + "learning_rate": 5.187878787878788e-06, + "loss": 50.2254, + "step": 6420 + }, + { + "epoch": 0.025978013631386934, + "grad_norm": 561.295654296875, + "learning_rate": 5.195959595959597e-06, + "loss": 55.9661, + "step": 6430 + }, + { + "epoch": 0.026018414896754567, + "grad_norm": 1451.63916015625, + "learning_rate": 5.204040404040404e-06, + "loss": 69.4304, + "step": 6440 + }, + { + "epoch": 0.026058816162122196, + "grad_norm": 1265.6790771484375, + "learning_rate": 5.212121212121213e-06, + "loss": 66.7605, + "step": 6450 + }, + { + "epoch": 0.02609921742748983, + "grad_norm": 646.0961303710938, + "learning_rate": 5.220202020202021e-06, + "loss": 45.969, + "step": 6460 + }, + { + "epoch": 0.02613961869285746, + "grad_norm": 676.3788452148438, + "learning_rate": 5.228282828282829e-06, + "loss": 56.7987, + "step": 6470 + }, + { + "epoch": 0.026180019958225092, + "grad_norm": 510.5483093261719, + "learning_rate": 5.236363636363637e-06, + "loss": 62.0078, + "step": 6480 + }, + { + "epoch": 0.02622042122359272, + "grad_norm": 1271.3424072265625, + "learning_rate": 5.244444444444445e-06, + "loss": 53.2245, + "step": 6490 + }, + { + "epoch": 0.026260822488960354, + "grad_norm": 408.92352294921875, + "learning_rate": 5.252525252525253e-06, + "loss": 61.5336, + "step": 6500 + }, + { + "epoch": 0.026301223754327984, + "grad_norm": 644.672119140625, + "learning_rate": 5.26060606060606e-06, + "loss": 54.1448, + "step": 6510 + }, + { + "epoch": 0.026341625019695617, + "grad_norm": 477.9904479980469, + "learning_rate": 5.268686868686869e-06, + "loss": 71.9779, + "step": 6520 + }, + { + "epoch": 0.026382026285063247, + "grad_norm": 963.8379516601562, + "learning_rate": 5.276767676767677e-06, + "loss": 77.2526, + "step": 6530 + }, + { + "epoch": 0.02642242755043088, + "grad_norm": 1079.2596435546875, + "learning_rate": 5.284848484848485e-06, + "loss": 55.9919, + "step": 6540 + }, + { + "epoch": 0.02646282881579851, + "grad_norm": 949.0852661132812, + "learning_rate": 5.292929292929293e-06, + "loss": 48.6409, + "step": 6550 + }, + { + "epoch": 0.026503230081166142, + "grad_norm": 684.5231323242188, + "learning_rate": 5.3010101010101016e-06, + "loss": 75.0948, + "step": 6560 + }, + { + "epoch": 0.026543631346533772, + "grad_norm": 534.61474609375, + "learning_rate": 5.309090909090909e-06, + "loss": 43.8272, + "step": 6570 + }, + { + "epoch": 0.026584032611901405, + "grad_norm": 593.8630981445312, + "learning_rate": 5.317171717171718e-06, + "loss": 61.2505, + "step": 6580 + }, + { + "epoch": 0.026624433877269035, + "grad_norm": 500.44586181640625, + "learning_rate": 5.325252525252525e-06, + "loss": 36.6023, + "step": 6590 + }, + { + "epoch": 0.026664835142636668, + "grad_norm": 534.3717651367188, + "learning_rate": 5.333333333333334e-06, + "loss": 60.64, + "step": 6600 + }, + { + "epoch": 0.026705236408004297, + "grad_norm": 948.6107788085938, + "learning_rate": 5.341414141414142e-06, + "loss": 57.0894, + "step": 6610 + }, + { + "epoch": 0.02674563767337193, + "grad_norm": 684.484619140625, + "learning_rate": 5.34949494949495e-06, + "loss": 53.3891, + "step": 6620 + }, + { + "epoch": 0.02678603893873956, + "grad_norm": 613.2217407226562, + "learning_rate": 5.357575757575758e-06, + "loss": 71.4286, + "step": 6630 + }, + { + "epoch": 0.026826440204107193, + "grad_norm": 568.6145629882812, + "learning_rate": 5.365656565656567e-06, + "loss": 50.2748, + "step": 6640 + }, + { + "epoch": 0.026866841469474823, + "grad_norm": 398.4335632324219, + "learning_rate": 5.373737373737374e-06, + "loss": 42.2858, + "step": 6650 + }, + { + "epoch": 0.026907242734842456, + "grad_norm": 712.74755859375, + "learning_rate": 5.381818181818183e-06, + "loss": 44.9092, + "step": 6660 + }, + { + "epoch": 0.026947644000210085, + "grad_norm": 739.2601318359375, + "learning_rate": 5.38989898989899e-06, + "loss": 51.0345, + "step": 6670 + }, + { + "epoch": 0.026988045265577718, + "grad_norm": 663.2088623046875, + "learning_rate": 5.397979797979799e-06, + "loss": 42.3987, + "step": 6680 + }, + { + "epoch": 0.027028446530945348, + "grad_norm": 567.5929565429688, + "learning_rate": 5.406060606060607e-06, + "loss": 43.0136, + "step": 6690 + }, + { + "epoch": 0.02706884779631298, + "grad_norm": 1150.5491943359375, + "learning_rate": 5.414141414141415e-06, + "loss": 76.9021, + "step": 6700 + }, + { + "epoch": 0.02710924906168061, + "grad_norm": 568.4838256835938, + "learning_rate": 5.422222222222223e-06, + "loss": 69.4932, + "step": 6710 + }, + { + "epoch": 0.027149650327048244, + "grad_norm": 589.513671875, + "learning_rate": 5.430303030303032e-06, + "loss": 60.2286, + "step": 6720 + }, + { + "epoch": 0.027190051592415873, + "grad_norm": 948.9300537109375, + "learning_rate": 5.438383838383838e-06, + "loss": 81.8324, + "step": 6730 + }, + { + "epoch": 0.027230452857783506, + "grad_norm": 470.6671447753906, + "learning_rate": 5.446464646464647e-06, + "loss": 56.1316, + "step": 6740 + }, + { + "epoch": 0.027270854123151136, + "grad_norm": 1675.1475830078125, + "learning_rate": 5.4545454545454545e-06, + "loss": 73.5525, + "step": 6750 + }, + { + "epoch": 0.02731125538851877, + "grad_norm": 754.5110473632812, + "learning_rate": 5.462626262626263e-06, + "loss": 61.0356, + "step": 6760 + }, + { + "epoch": 0.0273516566538864, + "grad_norm": 430.7125244140625, + "learning_rate": 5.4707070707070705e-06, + "loss": 53.5754, + "step": 6770 + }, + { + "epoch": 0.02739205791925403, + "grad_norm": 847.1322631835938, + "learning_rate": 5.478787878787879e-06, + "loss": 65.6341, + "step": 6780 + }, + { + "epoch": 0.02743245918462166, + "grad_norm": 977.4957275390625, + "learning_rate": 5.486868686868687e-06, + "loss": 70.9679, + "step": 6790 + }, + { + "epoch": 0.027472860449989294, + "grad_norm": 2050.4521484375, + "learning_rate": 5.494949494949495e-06, + "loss": 99.4739, + "step": 6800 + }, + { + "epoch": 0.027513261715356924, + "grad_norm": 1008.5648193359375, + "learning_rate": 5.5030303030303034e-06, + "loss": 58.5655, + "step": 6810 + }, + { + "epoch": 0.027553662980724557, + "grad_norm": 401.62506103515625, + "learning_rate": 5.511111111111112e-06, + "loss": 61.3661, + "step": 6820 + }, + { + "epoch": 0.027594064246092186, + "grad_norm": 997.3637084960938, + "learning_rate": 5.5191919191919195e-06, + "loss": 49.5676, + "step": 6830 + }, + { + "epoch": 0.02763446551145982, + "grad_norm": 576.8031616210938, + "learning_rate": 5.527272727272728e-06, + "loss": 41.5905, + "step": 6840 + }, + { + "epoch": 0.02767486677682745, + "grad_norm": 550.9244384765625, + "learning_rate": 5.5353535353535355e-06, + "loss": 61.1382, + "step": 6850 + }, + { + "epoch": 0.027715268042195082, + "grad_norm": 626.9200439453125, + "learning_rate": 5.543434343434344e-06, + "loss": 53.2902, + "step": 6860 + }, + { + "epoch": 0.02775566930756271, + "grad_norm": 752.208984375, + "learning_rate": 5.5515151515151524e-06, + "loss": 57.3701, + "step": 6870 + }, + { + "epoch": 0.027796070572930345, + "grad_norm": 772.6936645507812, + "learning_rate": 5.55959595959596e-06, + "loss": 60.7988, + "step": 6880 + }, + { + "epoch": 0.027836471838297974, + "grad_norm": 533.5892944335938, + "learning_rate": 5.5676767676767685e-06, + "loss": 60.7892, + "step": 6890 + }, + { + "epoch": 0.027876873103665607, + "grad_norm": 756.2522583007812, + "learning_rate": 5.575757575757577e-06, + "loss": 48.367, + "step": 6900 + }, + { + "epoch": 0.027917274369033237, + "grad_norm": 600.169921875, + "learning_rate": 5.5838383838383845e-06, + "loss": 57.2744, + "step": 6910 + }, + { + "epoch": 0.02795767563440087, + "grad_norm": 800.9529418945312, + "learning_rate": 5.591919191919193e-06, + "loss": 56.2487, + "step": 6920 + }, + { + "epoch": 0.0279980768997685, + "grad_norm": 691.3702392578125, + "learning_rate": 5.600000000000001e-06, + "loss": 42.3235, + "step": 6930 + }, + { + "epoch": 0.028038478165136133, + "grad_norm": 625.58642578125, + "learning_rate": 5.608080808080808e-06, + "loss": 51.2433, + "step": 6940 + }, + { + "epoch": 0.028078879430503762, + "grad_norm": 591.58837890625, + "learning_rate": 5.616161616161616e-06, + "loss": 56.8397, + "step": 6950 + }, + { + "epoch": 0.028119280695871395, + "grad_norm": 828.33935546875, + "learning_rate": 5.624242424242424e-06, + "loss": 40.0593, + "step": 6960 + }, + { + "epoch": 0.028159681961239025, + "grad_norm": 0.0, + "learning_rate": 5.632323232323233e-06, + "loss": 32.3671, + "step": 6970 + }, + { + "epoch": 0.028200083226606658, + "grad_norm": 1996.341796875, + "learning_rate": 5.64040404040404e-06, + "loss": 85.5505, + "step": 6980 + }, + { + "epoch": 0.028240484491974287, + "grad_norm": 596.9549560546875, + "learning_rate": 5.648484848484849e-06, + "loss": 52.3962, + "step": 6990 + }, + { + "epoch": 0.02828088575734192, + "grad_norm": 404.708740234375, + "learning_rate": 5.656565656565657e-06, + "loss": 47.1833, + "step": 7000 + }, + { + "epoch": 0.02832128702270955, + "grad_norm": 912.1199340820312, + "learning_rate": 5.664646464646465e-06, + "loss": 55.7482, + "step": 7010 + }, + { + "epoch": 0.028361688288077183, + "grad_norm": 833.735107421875, + "learning_rate": 5.672727272727273e-06, + "loss": 81.9062, + "step": 7020 + }, + { + "epoch": 0.028402089553444813, + "grad_norm": 486.2171630859375, + "learning_rate": 5.680808080808081e-06, + "loss": 44.0852, + "step": 7030 + }, + { + "epoch": 0.028442490818812446, + "grad_norm": 755.2271728515625, + "learning_rate": 5.688888888888889e-06, + "loss": 52.617, + "step": 7040 + }, + { + "epoch": 0.028482892084180075, + "grad_norm": 734.7620849609375, + "learning_rate": 5.696969696969698e-06, + "loss": 63.221, + "step": 7050 + }, + { + "epoch": 0.02852329334954771, + "grad_norm": 699.6790771484375, + "learning_rate": 5.705050505050505e-06, + "loss": 48.4481, + "step": 7060 + }, + { + "epoch": 0.028563694614915338, + "grad_norm": 819.5504760742188, + "learning_rate": 5.713131313131314e-06, + "loss": 51.6106, + "step": 7070 + }, + { + "epoch": 0.02860409588028297, + "grad_norm": 597.2007446289062, + "learning_rate": 5.721212121212122e-06, + "loss": 61.2867, + "step": 7080 + }, + { + "epoch": 0.0286444971456506, + "grad_norm": 970.8818969726562, + "learning_rate": 5.72929292929293e-06, + "loss": 47.3044, + "step": 7090 + }, + { + "epoch": 0.028684898411018234, + "grad_norm": 459.5702819824219, + "learning_rate": 5.737373737373738e-06, + "loss": 57.9656, + "step": 7100 + }, + { + "epoch": 0.028725299676385863, + "grad_norm": 581.9468383789062, + "learning_rate": 5.745454545454546e-06, + "loss": 47.3413, + "step": 7110 + }, + { + "epoch": 0.028765700941753496, + "grad_norm": 467.80316162109375, + "learning_rate": 5.753535353535354e-06, + "loss": 67.7175, + "step": 7120 + }, + { + "epoch": 0.028806102207121126, + "grad_norm": 797.3532104492188, + "learning_rate": 5.761616161616163e-06, + "loss": 47.0771, + "step": 7130 + }, + { + "epoch": 0.02884650347248876, + "grad_norm": 725.4122314453125, + "learning_rate": 5.76969696969697e-06, + "loss": 76.4885, + "step": 7140 + }, + { + "epoch": 0.02888690473785639, + "grad_norm": 1118.2633056640625, + "learning_rate": 5.777777777777778e-06, + "loss": 63.7839, + "step": 7150 + }, + { + "epoch": 0.02892730600322402, + "grad_norm": 626.3515014648438, + "learning_rate": 5.785858585858586e-06, + "loss": 74.4487, + "step": 7160 + }, + { + "epoch": 0.02896770726859165, + "grad_norm": 886.15234375, + "learning_rate": 5.793939393939394e-06, + "loss": 45.6227, + "step": 7170 + }, + { + "epoch": 0.029008108533959284, + "grad_norm": 1027.4027099609375, + "learning_rate": 5.8020202020202025e-06, + "loss": 68.2278, + "step": 7180 + }, + { + "epoch": 0.029048509799326914, + "grad_norm": 725.6253051757812, + "learning_rate": 5.81010101010101e-06, + "loss": 60.2867, + "step": 7190 + }, + { + "epoch": 0.029088911064694547, + "grad_norm": 689.9947509765625, + "learning_rate": 5.8181818181818185e-06, + "loss": 37.6077, + "step": 7200 + }, + { + "epoch": 0.029129312330062176, + "grad_norm": 1027.0823974609375, + "learning_rate": 5.826262626262626e-06, + "loss": 50.7096, + "step": 7210 + }, + { + "epoch": 0.02916971359542981, + "grad_norm": 226.7436981201172, + "learning_rate": 5.834343434343435e-06, + "loss": 64.8211, + "step": 7220 + }, + { + "epoch": 0.02921011486079744, + "grad_norm": 675.5316162109375, + "learning_rate": 5.842424242424243e-06, + "loss": 63.2848, + "step": 7230 + }, + { + "epoch": 0.029250516126165072, + "grad_norm": 564.1721801757812, + "learning_rate": 5.850505050505051e-06, + "loss": 49.1681, + "step": 7240 + }, + { + "epoch": 0.029290917391532702, + "grad_norm": 891.8140869140625, + "learning_rate": 5.858585858585859e-06, + "loss": 58.443, + "step": 7250 + }, + { + "epoch": 0.029331318656900335, + "grad_norm": 782.2761840820312, + "learning_rate": 5.8666666666666675e-06, + "loss": 48.1456, + "step": 7260 + }, + { + "epoch": 0.029371719922267964, + "grad_norm": 1056.0394287109375, + "learning_rate": 5.874747474747475e-06, + "loss": 48.6662, + "step": 7270 + }, + { + "epoch": 0.029412121187635597, + "grad_norm": 440.3533630371094, + "learning_rate": 5.882828282828284e-06, + "loss": 66.0266, + "step": 7280 + }, + { + "epoch": 0.029452522453003227, + "grad_norm": 819.3785400390625, + "learning_rate": 5.890909090909091e-06, + "loss": 70.9092, + "step": 7290 + }, + { + "epoch": 0.02949292371837086, + "grad_norm": 1346.183349609375, + "learning_rate": 5.8989898989899e-06, + "loss": 54.89, + "step": 7300 + }, + { + "epoch": 0.02953332498373849, + "grad_norm": 1040.2781982421875, + "learning_rate": 5.907070707070708e-06, + "loss": 52.2293, + "step": 7310 + }, + { + "epoch": 0.029573726249106123, + "grad_norm": 609.7448120117188, + "learning_rate": 5.915151515151516e-06, + "loss": 46.7856, + "step": 7320 + }, + { + "epoch": 0.029614127514473752, + "grad_norm": 2038.486328125, + "learning_rate": 5.923232323232324e-06, + "loss": 60.1004, + "step": 7330 + }, + { + "epoch": 0.029654528779841385, + "grad_norm": 626.9315185546875, + "learning_rate": 5.9313131313131326e-06, + "loss": 62.3969, + "step": 7340 + }, + { + "epoch": 0.029694930045209015, + "grad_norm": 464.0664978027344, + "learning_rate": 5.93939393939394e-06, + "loss": 46.8368, + "step": 7350 + }, + { + "epoch": 0.029735331310576648, + "grad_norm": 830.5698852539062, + "learning_rate": 5.947474747474749e-06, + "loss": 47.1381, + "step": 7360 + }, + { + "epoch": 0.029775732575944278, + "grad_norm": 924.8589477539062, + "learning_rate": 5.955555555555555e-06, + "loss": 42.5227, + "step": 7370 + }, + { + "epoch": 0.02981613384131191, + "grad_norm": 627.990478515625, + "learning_rate": 5.963636363636364e-06, + "loss": 47.545, + "step": 7380 + }, + { + "epoch": 0.02985653510667954, + "grad_norm": 1128.0264892578125, + "learning_rate": 5.9717171717171714e-06, + "loss": 65.1939, + "step": 7390 + }, + { + "epoch": 0.029896936372047173, + "grad_norm": 680.4015502929688, + "learning_rate": 5.97979797979798e-06, + "loss": 77.6617, + "step": 7400 + }, + { + "epoch": 0.029937337637414803, + "grad_norm": 1081.4503173828125, + "learning_rate": 5.987878787878788e-06, + "loss": 55.418, + "step": 7410 + }, + { + "epoch": 0.029977738902782436, + "grad_norm": 789.0983276367188, + "learning_rate": 5.995959595959596e-06, + "loss": 71.1661, + "step": 7420 + }, + { + "epoch": 0.030018140168150065, + "grad_norm": 1137.5018310546875, + "learning_rate": 6.004040404040404e-06, + "loss": 54.8057, + "step": 7430 + }, + { + "epoch": 0.0300585414335177, + "grad_norm": 728.9630737304688, + "learning_rate": 6.012121212121213e-06, + "loss": 45.3661, + "step": 7440 + }, + { + "epoch": 0.030098942698885328, + "grad_norm": 769.5979614257812, + "learning_rate": 6.0202020202020204e-06, + "loss": 58.8569, + "step": 7450 + }, + { + "epoch": 0.03013934396425296, + "grad_norm": 620.3338012695312, + "learning_rate": 6.028282828282829e-06, + "loss": 48.3413, + "step": 7460 + }, + { + "epoch": 0.03017974522962059, + "grad_norm": 1105.1793212890625, + "learning_rate": 6.0363636363636365e-06, + "loss": 49.1405, + "step": 7470 + }, + { + "epoch": 0.030220146494988224, + "grad_norm": 873.118896484375, + "learning_rate": 6.044444444444445e-06, + "loss": 74.7576, + "step": 7480 + }, + { + "epoch": 0.030260547760355853, + "grad_norm": 657.729248046875, + "learning_rate": 6.052525252525253e-06, + "loss": 59.2427, + "step": 7490 + }, + { + "epoch": 0.030300949025723486, + "grad_norm": 1126.5247802734375, + "learning_rate": 6.060606060606061e-06, + "loss": 62.4204, + "step": 7500 + }, + { + "epoch": 0.030341350291091116, + "grad_norm": 751.9967041015625, + "learning_rate": 6.068686868686869e-06, + "loss": 46.9996, + "step": 7510 + }, + { + "epoch": 0.03038175155645875, + "grad_norm": 620.5112915039062, + "learning_rate": 6.076767676767678e-06, + "loss": 50.917, + "step": 7520 + }, + { + "epoch": 0.03042215282182638, + "grad_norm": 566.5572509765625, + "learning_rate": 6.0848484848484855e-06, + "loss": 63.4834, + "step": 7530 + }, + { + "epoch": 0.030462554087194012, + "grad_norm": 622.100830078125, + "learning_rate": 6.092929292929294e-06, + "loss": 63.3863, + "step": 7540 + }, + { + "epoch": 0.03050295535256164, + "grad_norm": 990.3203125, + "learning_rate": 6.1010101010101015e-06, + "loss": 41.6357, + "step": 7550 + }, + { + "epoch": 0.030543356617929274, + "grad_norm": 746.3700561523438, + "learning_rate": 6.10909090909091e-06, + "loss": 50.8004, + "step": 7560 + }, + { + "epoch": 0.030583757883296904, + "grad_norm": 684.2229614257812, + "learning_rate": 6.117171717171718e-06, + "loss": 41.9685, + "step": 7570 + }, + { + "epoch": 0.030624159148664537, + "grad_norm": 642.2493896484375, + "learning_rate": 6.125252525252525e-06, + "loss": 61.6354, + "step": 7580 + }, + { + "epoch": 0.030664560414032167, + "grad_norm": 554.5538330078125, + "learning_rate": 6.133333333333334e-06, + "loss": 87.8135, + "step": 7590 + }, + { + "epoch": 0.0307049616793998, + "grad_norm": 1042.8443603515625, + "learning_rate": 6.141414141414141e-06, + "loss": 62.6359, + "step": 7600 + }, + { + "epoch": 0.03074536294476743, + "grad_norm": 473.3791198730469, + "learning_rate": 6.14949494949495e-06, + "loss": 57.5903, + "step": 7610 + }, + { + "epoch": 0.030785764210135062, + "grad_norm": 466.8446960449219, + "learning_rate": 6.157575757575758e-06, + "loss": 60.731, + "step": 7620 + }, + { + "epoch": 0.030826165475502692, + "grad_norm": 891.0825805664062, + "learning_rate": 6.165656565656566e-06, + "loss": 60.5271, + "step": 7630 + }, + { + "epoch": 0.030866566740870325, + "grad_norm": 262.8304138183594, + "learning_rate": 6.173737373737374e-06, + "loss": 55.9255, + "step": 7640 + }, + { + "epoch": 0.030906968006237955, + "grad_norm": 946.0525512695312, + "learning_rate": 6.181818181818182e-06, + "loss": 53.6557, + "step": 7650 + }, + { + "epoch": 0.030947369271605588, + "grad_norm": 556.9738159179688, + "learning_rate": 6.18989898989899e-06, + "loss": 50.9775, + "step": 7660 + }, + { + "epoch": 0.030987770536973217, + "grad_norm": 555.402099609375, + "learning_rate": 6.197979797979799e-06, + "loss": 60.4395, + "step": 7670 + }, + { + "epoch": 0.03102817180234085, + "grad_norm": 961.5715942382812, + "learning_rate": 6.206060606060606e-06, + "loss": 74.3546, + "step": 7680 + }, + { + "epoch": 0.03106857306770848, + "grad_norm": 2129.817626953125, + "learning_rate": 6.214141414141415e-06, + "loss": 83.1414, + "step": 7690 + }, + { + "epoch": 0.031108974333076113, + "grad_norm": 704.78564453125, + "learning_rate": 6.222222222222223e-06, + "loss": 49.4473, + "step": 7700 + }, + { + "epoch": 0.031149375598443742, + "grad_norm": 881.543701171875, + "learning_rate": 6.230303030303031e-06, + "loss": 67.96, + "step": 7710 + }, + { + "epoch": 0.031189776863811376, + "grad_norm": 648.9303588867188, + "learning_rate": 6.238383838383839e-06, + "loss": 52.5546, + "step": 7720 + }, + { + "epoch": 0.031230178129179005, + "grad_norm": 561.0586547851562, + "learning_rate": 6.246464646464647e-06, + "loss": 59.6244, + "step": 7730 + }, + { + "epoch": 0.03127057939454664, + "grad_norm": 509.1589050292969, + "learning_rate": 6.254545454545455e-06, + "loss": 45.3432, + "step": 7740 + }, + { + "epoch": 0.03131098065991427, + "grad_norm": 648.4699096679688, + "learning_rate": 6.262626262626264e-06, + "loss": 67.8853, + "step": 7750 + }, + { + "epoch": 0.0313513819252819, + "grad_norm": 1544.9857177734375, + "learning_rate": 6.270707070707071e-06, + "loss": 55.752, + "step": 7760 + }, + { + "epoch": 0.03139178319064953, + "grad_norm": 654.1318969726562, + "learning_rate": 6.27878787878788e-06, + "loss": 51.3248, + "step": 7770 + }, + { + "epoch": 0.03143218445601716, + "grad_norm": 464.28411865234375, + "learning_rate": 6.286868686868688e-06, + "loss": 58.6119, + "step": 7780 + }, + { + "epoch": 0.031472585721384796, + "grad_norm": 543.3377075195312, + "learning_rate": 6.294949494949495e-06, + "loss": 63.7197, + "step": 7790 + }, + { + "epoch": 0.03151298698675242, + "grad_norm": 1178.969970703125, + "learning_rate": 6.303030303030303e-06, + "loss": 56.3949, + "step": 7800 + }, + { + "epoch": 0.031553388252120056, + "grad_norm": 534.0205688476562, + "learning_rate": 6.311111111111111e-06, + "loss": 41.2432, + "step": 7810 + }, + { + "epoch": 0.03159378951748769, + "grad_norm": 965.5466918945312, + "learning_rate": 6.3191919191919195e-06, + "loss": 56.5476, + "step": 7820 + }, + { + "epoch": 0.03163419078285532, + "grad_norm": 429.6799011230469, + "learning_rate": 6.327272727272727e-06, + "loss": 49.7253, + "step": 7830 + }, + { + "epoch": 0.03167459204822295, + "grad_norm": 1150.05126953125, + "learning_rate": 6.3353535353535355e-06, + "loss": 55.4475, + "step": 7840 + }, + { + "epoch": 0.03171499331359058, + "grad_norm": 996.5704956054688, + "learning_rate": 6.343434343434344e-06, + "loss": 53.3563, + "step": 7850 + }, + { + "epoch": 0.031755394578958214, + "grad_norm": 896.9970092773438, + "learning_rate": 6.3515151515151516e-06, + "loss": 51.2789, + "step": 7860 + }, + { + "epoch": 0.03179579584432585, + "grad_norm": 833.8233642578125, + "learning_rate": 6.35959595959596e-06, + "loss": 71.7647, + "step": 7870 + }, + { + "epoch": 0.03183619710969347, + "grad_norm": 2155.960205078125, + "learning_rate": 6.3676767676767685e-06, + "loss": 67.7663, + "step": 7880 + }, + { + "epoch": 0.031876598375061106, + "grad_norm": 685.2928466796875, + "learning_rate": 6.375757575757576e-06, + "loss": 42.8484, + "step": 7890 + }, + { + "epoch": 0.03191699964042874, + "grad_norm": 633.02978515625, + "learning_rate": 6.3838383838383845e-06, + "loss": 46.8481, + "step": 7900 + }, + { + "epoch": 0.03195740090579637, + "grad_norm": 413.897216796875, + "learning_rate": 6.391919191919192e-06, + "loss": 69.5535, + "step": 7910 + }, + { + "epoch": 0.031997802171164, + "grad_norm": 367.6637878417969, + "learning_rate": 6.4000000000000006e-06, + "loss": 62.3668, + "step": 7920 + }, + { + "epoch": 0.03203820343653163, + "grad_norm": 580.1402587890625, + "learning_rate": 6.408080808080809e-06, + "loss": 58.8996, + "step": 7930 + }, + { + "epoch": 0.032078604701899265, + "grad_norm": 779.8268432617188, + "learning_rate": 6.416161616161617e-06, + "loss": 49.8146, + "step": 7940 + }, + { + "epoch": 0.0321190059672669, + "grad_norm": 712.888427734375, + "learning_rate": 6.424242424242425e-06, + "loss": 61.1355, + "step": 7950 + }, + { + "epoch": 0.032159407232634524, + "grad_norm": 760.9315185546875, + "learning_rate": 6.4323232323232335e-06, + "loss": 96.3001, + "step": 7960 + }, + { + "epoch": 0.03219980849800216, + "grad_norm": 949.9454345703125, + "learning_rate": 6.440404040404041e-06, + "loss": 54.5059, + "step": 7970 + }, + { + "epoch": 0.03224020976336979, + "grad_norm": 1105.3018798828125, + "learning_rate": 6.4484848484848496e-06, + "loss": 86.9141, + "step": 7980 + }, + { + "epoch": 0.03228061102873742, + "grad_norm": 622.3108520507812, + "learning_rate": 6.456565656565658e-06, + "loss": 52.3702, + "step": 7990 + }, + { + "epoch": 0.03232101229410505, + "grad_norm": 511.1920166015625, + "learning_rate": 6.464646464646466e-06, + "loss": 65.486, + "step": 8000 + }, + { + "epoch": 0.03236141355947268, + "grad_norm": 787.6452026367188, + "learning_rate": 6.472727272727272e-06, + "loss": 63.4064, + "step": 8010 + }, + { + "epoch": 0.032401814824840315, + "grad_norm": 641.1221923828125, + "learning_rate": 6.480808080808081e-06, + "loss": 67.2346, + "step": 8020 + }, + { + "epoch": 0.03244221609020795, + "grad_norm": 849.4371948242188, + "learning_rate": 6.488888888888889e-06, + "loss": 74.775, + "step": 8030 + }, + { + "epoch": 0.032482617355575574, + "grad_norm": 281.5003662109375, + "learning_rate": 6.496969696969697e-06, + "loss": 48.3455, + "step": 8040 + }, + { + "epoch": 0.03252301862094321, + "grad_norm": 635.1185302734375, + "learning_rate": 6.505050505050505e-06, + "loss": 58.5666, + "step": 8050 + }, + { + "epoch": 0.03256341988631084, + "grad_norm": 930.7824096679688, + "learning_rate": 6.513131313131314e-06, + "loss": 73.5296, + "step": 8060 + }, + { + "epoch": 0.03260382115167847, + "grad_norm": 582.126708984375, + "learning_rate": 6.521212121212121e-06, + "loss": 34.6027, + "step": 8070 + }, + { + "epoch": 0.0326442224170461, + "grad_norm": 562.6720581054688, + "learning_rate": 6.52929292929293e-06, + "loss": 60.8401, + "step": 8080 + }, + { + "epoch": 0.03268462368241373, + "grad_norm": 306.123046875, + "learning_rate": 6.537373737373737e-06, + "loss": 44.4158, + "step": 8090 + }, + { + "epoch": 0.032725024947781366, + "grad_norm": 991.8218994140625, + "learning_rate": 6.545454545454546e-06, + "loss": 45.6267, + "step": 8100 + }, + { + "epoch": 0.032765426213149, + "grad_norm": 835.2438354492188, + "learning_rate": 6.553535353535354e-06, + "loss": 53.1318, + "step": 8110 + }, + { + "epoch": 0.032805827478516625, + "grad_norm": 1006.3707885742188, + "learning_rate": 6.561616161616162e-06, + "loss": 53.602, + "step": 8120 + }, + { + "epoch": 0.03284622874388426, + "grad_norm": 350.9178466796875, + "learning_rate": 6.56969696969697e-06, + "loss": 49.0478, + "step": 8130 + }, + { + "epoch": 0.03288663000925189, + "grad_norm": 542.7490844726562, + "learning_rate": 6.577777777777779e-06, + "loss": 42.3374, + "step": 8140 + }, + { + "epoch": 0.032927031274619524, + "grad_norm": 453.57965087890625, + "learning_rate": 6.585858585858586e-06, + "loss": 50.3855, + "step": 8150 + }, + { + "epoch": 0.03296743253998715, + "grad_norm": 887.0755615234375, + "learning_rate": 6.593939393939395e-06, + "loss": 43.7275, + "step": 8160 + }, + { + "epoch": 0.03300783380535478, + "grad_norm": 388.6866760253906, + "learning_rate": 6.602020202020203e-06, + "loss": 44.9985, + "step": 8170 + }, + { + "epoch": 0.033048235070722416, + "grad_norm": 672.9983520507812, + "learning_rate": 6.610101010101011e-06, + "loss": 53.5537, + "step": 8180 + }, + { + "epoch": 0.03308863633609005, + "grad_norm": 634.128173828125, + "learning_rate": 6.618181818181819e-06, + "loss": 52.5657, + "step": 8190 + }, + { + "epoch": 0.033129037601457675, + "grad_norm": 735.7168579101562, + "learning_rate": 6.626262626262627e-06, + "loss": 61.5097, + "step": 8200 + }, + { + "epoch": 0.03316943886682531, + "grad_norm": 506.9273376464844, + "learning_rate": 6.634343434343435e-06, + "loss": 59.6884, + "step": 8210 + }, + { + "epoch": 0.03320984013219294, + "grad_norm": 568.7894287109375, + "learning_rate": 6.642424242424242e-06, + "loss": 54.3135, + "step": 8220 + }, + { + "epoch": 0.033250241397560575, + "grad_norm": 916.6472778320312, + "learning_rate": 6.650505050505051e-06, + "loss": 46.6363, + "step": 8230 + }, + { + "epoch": 0.0332906426629282, + "grad_norm": 511.55718994140625, + "learning_rate": 6.658585858585859e-06, + "loss": 45.2044, + "step": 8240 + }, + { + "epoch": 0.033331043928295834, + "grad_norm": 680.4920654296875, + "learning_rate": 6.666666666666667e-06, + "loss": 51.8027, + "step": 8250 + }, + { + "epoch": 0.03337144519366347, + "grad_norm": 524.2860107421875, + "learning_rate": 6.674747474747475e-06, + "loss": 46.2791, + "step": 8260 + }, + { + "epoch": 0.0334118464590311, + "grad_norm": 1008.0855712890625, + "learning_rate": 6.682828282828283e-06, + "loss": 70.7169, + "step": 8270 + }, + { + "epoch": 0.033452247724398726, + "grad_norm": 586.5045776367188, + "learning_rate": 6.690909090909091e-06, + "loss": 77.9191, + "step": 8280 + }, + { + "epoch": 0.03349264898976636, + "grad_norm": 613.2836303710938, + "learning_rate": 6.6989898989899e-06, + "loss": 63.8899, + "step": 8290 + }, + { + "epoch": 0.03353305025513399, + "grad_norm": 339.6265869140625, + "learning_rate": 6.707070707070707e-06, + "loss": 71.7176, + "step": 8300 + }, + { + "epoch": 0.033573451520501625, + "grad_norm": 341.3080749511719, + "learning_rate": 6.715151515151516e-06, + "loss": 49.7896, + "step": 8310 + }, + { + "epoch": 0.03361385278586925, + "grad_norm": 1058.944580078125, + "learning_rate": 6.723232323232324e-06, + "loss": 61.7188, + "step": 8320 + }, + { + "epoch": 0.033654254051236884, + "grad_norm": 392.35272216796875, + "learning_rate": 6.731313131313132e-06, + "loss": 54.2229, + "step": 8330 + }, + { + "epoch": 0.03369465531660452, + "grad_norm": 725.1533813476562, + "learning_rate": 6.73939393939394e-06, + "loss": 48.29, + "step": 8340 + }, + { + "epoch": 0.03373505658197215, + "grad_norm": 741.072265625, + "learning_rate": 6.747474747474749e-06, + "loss": 64.0867, + "step": 8350 + }, + { + "epoch": 0.033775457847339777, + "grad_norm": 450.0721130371094, + "learning_rate": 6.755555555555556e-06, + "loss": 49.962, + "step": 8360 + }, + { + "epoch": 0.03381585911270741, + "grad_norm": 967.4086303710938, + "learning_rate": 6.763636363636365e-06, + "loss": 46.306, + "step": 8370 + }, + { + "epoch": 0.03385626037807504, + "grad_norm": 537.5621337890625, + "learning_rate": 6.771717171717172e-06, + "loss": 29.0172, + "step": 8380 + }, + { + "epoch": 0.033896661643442676, + "grad_norm": 1187.185546875, + "learning_rate": 6.779797979797981e-06, + "loss": 67.4143, + "step": 8390 + }, + { + "epoch": 0.0339370629088103, + "grad_norm": 893.6007690429688, + "learning_rate": 6.787878787878789e-06, + "loss": 39.529, + "step": 8400 + }, + { + "epoch": 0.033977464174177935, + "grad_norm": 713.060546875, + "learning_rate": 6.795959595959597e-06, + "loss": 55.9818, + "step": 8410 + }, + { + "epoch": 0.03401786543954557, + "grad_norm": 406.208251953125, + "learning_rate": 6.804040404040405e-06, + "loss": 49.2516, + "step": 8420 + }, + { + "epoch": 0.0340582667049132, + "grad_norm": 644.3673095703125, + "learning_rate": 6.812121212121212e-06, + "loss": 41.1035, + "step": 8430 + }, + { + "epoch": 0.03409866797028083, + "grad_norm": 759.6295166015625, + "learning_rate": 6.82020202020202e-06, + "loss": 58.1698, + "step": 8440 + }, + { + "epoch": 0.03413906923564846, + "grad_norm": 564.6557006835938, + "learning_rate": 6.828282828282828e-06, + "loss": 31.1043, + "step": 8450 + }, + { + "epoch": 0.03417947050101609, + "grad_norm": 787.0310668945312, + "learning_rate": 6.8363636363636364e-06, + "loss": 50.7432, + "step": 8460 + }, + { + "epoch": 0.034219871766383726, + "grad_norm": 373.0039367675781, + "learning_rate": 6.844444444444445e-06, + "loss": 37.4919, + "step": 8470 + }, + { + "epoch": 0.03426027303175135, + "grad_norm": 747.8372192382812, + "learning_rate": 6.8525252525252525e-06, + "loss": 63.8979, + "step": 8480 + }, + { + "epoch": 0.034300674297118985, + "grad_norm": 407.1216125488281, + "learning_rate": 6.860606060606061e-06, + "loss": 69.6129, + "step": 8490 + }, + { + "epoch": 0.03434107556248662, + "grad_norm": 70.61812591552734, + "learning_rate": 6.868686868686869e-06, + "loss": 78.9777, + "step": 8500 + }, + { + "epoch": 0.03438147682785425, + "grad_norm": 599.4298095703125, + "learning_rate": 6.876767676767677e-06, + "loss": 68.5256, + "step": 8510 + }, + { + "epoch": 0.03442187809322188, + "grad_norm": 726.5167236328125, + "learning_rate": 6.8848484848484854e-06, + "loss": 89.8193, + "step": 8520 + }, + { + "epoch": 0.03446227935858951, + "grad_norm": 612.718505859375, + "learning_rate": 6.892929292929294e-06, + "loss": 50.8533, + "step": 8530 + }, + { + "epoch": 0.034502680623957144, + "grad_norm": 1743.5341796875, + "learning_rate": 6.9010101010101015e-06, + "loss": 50.107, + "step": 8540 + }, + { + "epoch": 0.03454308188932478, + "grad_norm": 702.13671875, + "learning_rate": 6.90909090909091e-06, + "loss": 74.444, + "step": 8550 + }, + { + "epoch": 0.0345834831546924, + "grad_norm": 875.4811401367188, + "learning_rate": 6.9171717171717175e-06, + "loss": 44.2529, + "step": 8560 + }, + { + "epoch": 0.034623884420060036, + "grad_norm": 697.1666870117188, + "learning_rate": 6.925252525252526e-06, + "loss": 87.5896, + "step": 8570 + }, + { + "epoch": 0.03466428568542767, + "grad_norm": 751.409423828125, + "learning_rate": 6.9333333333333344e-06, + "loss": 46.6455, + "step": 8580 + }, + { + "epoch": 0.0347046869507953, + "grad_norm": 2989.8251953125, + "learning_rate": 6.941414141414142e-06, + "loss": 58.6519, + "step": 8590 + }, + { + "epoch": 0.03474508821616293, + "grad_norm": 701.8047485351562, + "learning_rate": 6.9494949494949505e-06, + "loss": 89.5045, + "step": 8600 + }, + { + "epoch": 0.03478548948153056, + "grad_norm": 460.89923095703125, + "learning_rate": 6.957575757575759e-06, + "loss": 85.3149, + "step": 8610 + }, + { + "epoch": 0.034825890746898194, + "grad_norm": 860.1593627929688, + "learning_rate": 6.9656565656565665e-06, + "loss": 45.1932, + "step": 8620 + }, + { + "epoch": 0.03486629201226583, + "grad_norm": 987.288330078125, + "learning_rate": 6.973737373737375e-06, + "loss": 64.3151, + "step": 8630 + }, + { + "epoch": 0.034906693277633453, + "grad_norm": 1131.6240234375, + "learning_rate": 6.981818181818183e-06, + "loss": 61.3024, + "step": 8640 + }, + { + "epoch": 0.03494709454300109, + "grad_norm": 621.4078979492188, + "learning_rate": 6.98989898989899e-06, + "loss": 43.6811, + "step": 8650 + }, + { + "epoch": 0.03498749580836872, + "grad_norm": 588.7035522460938, + "learning_rate": 6.997979797979798e-06, + "loss": 52.8447, + "step": 8660 + }, + { + "epoch": 0.03502789707373635, + "grad_norm": 549.5409545898438, + "learning_rate": 7.006060606060606e-06, + "loss": 54.9615, + "step": 8670 + }, + { + "epoch": 0.03506829833910398, + "grad_norm": 900.7230834960938, + "learning_rate": 7.014141414141415e-06, + "loss": 36.6513, + "step": 8680 + }, + { + "epoch": 0.03510869960447161, + "grad_norm": 306.8768310546875, + "learning_rate": 7.022222222222222e-06, + "loss": 39.3492, + "step": 8690 + }, + { + "epoch": 0.035149100869839245, + "grad_norm": 581.7448120117188, + "learning_rate": 7.030303030303031e-06, + "loss": 62.2904, + "step": 8700 + }, + { + "epoch": 0.03518950213520688, + "grad_norm": 781.9658813476562, + "learning_rate": 7.038383838383839e-06, + "loss": 53.9546, + "step": 8710 + }, + { + "epoch": 0.035229903400574504, + "grad_norm": 751.5908813476562, + "learning_rate": 7.046464646464647e-06, + "loss": 54.8535, + "step": 8720 + }, + { + "epoch": 0.03527030466594214, + "grad_norm": 1163.4237060546875, + "learning_rate": 7.054545454545455e-06, + "loss": 55.7728, + "step": 8730 + }, + { + "epoch": 0.03531070593130977, + "grad_norm": 1224.25634765625, + "learning_rate": 7.062626262626263e-06, + "loss": 70.6655, + "step": 8740 + }, + { + "epoch": 0.0353511071966774, + "grad_norm": 679.2452392578125, + "learning_rate": 7.070707070707071e-06, + "loss": 53.1984, + "step": 8750 + }, + { + "epoch": 0.03539150846204503, + "grad_norm": 1009.7100830078125, + "learning_rate": 7.07878787878788e-06, + "loss": 53.5107, + "step": 8760 + }, + { + "epoch": 0.03543190972741266, + "grad_norm": 429.02496337890625, + "learning_rate": 7.086868686868687e-06, + "loss": 50.9108, + "step": 8770 + }, + { + "epoch": 0.035472310992780295, + "grad_norm": 705.5595092773438, + "learning_rate": 7.094949494949496e-06, + "loss": 39.2461, + "step": 8780 + }, + { + "epoch": 0.03551271225814793, + "grad_norm": 774.8412475585938, + "learning_rate": 7.103030303030304e-06, + "loss": 58.4899, + "step": 8790 + }, + { + "epoch": 0.035553113523515555, + "grad_norm": 746.4476318359375, + "learning_rate": 7.111111111111112e-06, + "loss": 49.5306, + "step": 8800 + }, + { + "epoch": 0.03559351478888319, + "grad_norm": 1140.700927734375, + "learning_rate": 7.11919191919192e-06, + "loss": 98.9858, + "step": 8810 + }, + { + "epoch": 0.03563391605425082, + "grad_norm": 822.6146240234375, + "learning_rate": 7.127272727272728e-06, + "loss": 40.658, + "step": 8820 + }, + { + "epoch": 0.035674317319618454, + "grad_norm": 305.0389709472656, + "learning_rate": 7.135353535353536e-06, + "loss": 65.953, + "step": 8830 + }, + { + "epoch": 0.03571471858498608, + "grad_norm": 983.6098022460938, + "learning_rate": 7.143434343434345e-06, + "loss": 55.4964, + "step": 8840 + }, + { + "epoch": 0.03575511985035371, + "grad_norm": 801.6506958007812, + "learning_rate": 7.151515151515152e-06, + "loss": 52.7301, + "step": 8850 + }, + { + "epoch": 0.035795521115721346, + "grad_norm": 346.94573974609375, + "learning_rate": 7.15959595959596e-06, + "loss": 55.3774, + "step": 8860 + }, + { + "epoch": 0.03583592238108898, + "grad_norm": 660.070556640625, + "learning_rate": 7.1676767676767676e-06, + "loss": 46.3488, + "step": 8870 + }, + { + "epoch": 0.035876323646456605, + "grad_norm": 420.3671569824219, + "learning_rate": 7.175757575757576e-06, + "loss": 57.9034, + "step": 8880 + }, + { + "epoch": 0.03591672491182424, + "grad_norm": 734.4971313476562, + "learning_rate": 7.1838383838383845e-06, + "loss": 51.1084, + "step": 8890 + }, + { + "epoch": 0.03595712617719187, + "grad_norm": 622.8480224609375, + "learning_rate": 7.191919191919192e-06, + "loss": 63.6594, + "step": 8900 + }, + { + "epoch": 0.035997527442559504, + "grad_norm": 1083.3779296875, + "learning_rate": 7.2000000000000005e-06, + "loss": 48.5284, + "step": 8910 + }, + { + "epoch": 0.03603792870792713, + "grad_norm": 795.3521118164062, + "learning_rate": 7.208080808080808e-06, + "loss": 60.9741, + "step": 8920 + }, + { + "epoch": 0.036078329973294763, + "grad_norm": 763.7685546875, + "learning_rate": 7.2161616161616166e-06, + "loss": 67.9574, + "step": 8930 + }, + { + "epoch": 0.0361187312386624, + "grad_norm": 299.5180969238281, + "learning_rate": 7.224242424242425e-06, + "loss": 44.8309, + "step": 8940 + }, + { + "epoch": 0.03615913250403003, + "grad_norm": 793.3358154296875, + "learning_rate": 7.232323232323233e-06, + "loss": 57.0138, + "step": 8950 + }, + { + "epoch": 0.036199533769397656, + "grad_norm": 698.6807861328125, + "learning_rate": 7.240404040404041e-06, + "loss": 50.4727, + "step": 8960 + }, + { + "epoch": 0.03623993503476529, + "grad_norm": 1897.521484375, + "learning_rate": 7.2484848484848495e-06, + "loss": 66.5708, + "step": 8970 + }, + { + "epoch": 0.03628033630013292, + "grad_norm": 390.52337646484375, + "learning_rate": 7.256565656565657e-06, + "loss": 32.5167, + "step": 8980 + }, + { + "epoch": 0.036320737565500555, + "grad_norm": 1146.5927734375, + "learning_rate": 7.2646464646464656e-06, + "loss": 74.7776, + "step": 8990 + }, + { + "epoch": 0.03636113883086818, + "grad_norm": 698.8363037109375, + "learning_rate": 7.272727272727273e-06, + "loss": 52.9729, + "step": 9000 + }, + { + "epoch": 0.036401540096235814, + "grad_norm": 787.6583862304688, + "learning_rate": 7.280808080808082e-06, + "loss": 58.5929, + "step": 9010 + }, + { + "epoch": 0.03644194136160345, + "grad_norm": 1728.040771484375, + "learning_rate": 7.28888888888889e-06, + "loss": 58.7803, + "step": 9020 + }, + { + "epoch": 0.03648234262697108, + "grad_norm": 504.9664001464844, + "learning_rate": 7.296969696969698e-06, + "loss": 28.0724, + "step": 9030 + }, + { + "epoch": 0.036522743892338706, + "grad_norm": 1262.524169921875, + "learning_rate": 7.305050505050506e-06, + "loss": 67.8205, + "step": 9040 + }, + { + "epoch": 0.03656314515770634, + "grad_norm": 425.6719970703125, + "learning_rate": 7.3131313131313146e-06, + "loss": 39.1797, + "step": 9050 + }, + { + "epoch": 0.03660354642307397, + "grad_norm": 1008.1500244140625, + "learning_rate": 7.321212121212122e-06, + "loss": 70.5822, + "step": 9060 + }, + { + "epoch": 0.036643947688441605, + "grad_norm": 1135.142333984375, + "learning_rate": 7.32929292929293e-06, + "loss": 63.7957, + "step": 9070 + }, + { + "epoch": 0.03668434895380923, + "grad_norm": 610.756103515625, + "learning_rate": 7.337373737373737e-06, + "loss": 48.8291, + "step": 9080 + }, + { + "epoch": 0.036724750219176865, + "grad_norm": 295.875, + "learning_rate": 7.345454545454546e-06, + "loss": 42.3916, + "step": 9090 + }, + { + "epoch": 0.0367651514845445, + "grad_norm": 814.1820068359375, + "learning_rate": 7.353535353535353e-06, + "loss": 81.7657, + "step": 9100 + }, + { + "epoch": 0.03680555274991213, + "grad_norm": 800.7321166992188, + "learning_rate": 7.361616161616162e-06, + "loss": 38.7879, + "step": 9110 + }, + { + "epoch": 0.03684595401527976, + "grad_norm": 1052.885009765625, + "learning_rate": 7.36969696969697e-06, + "loss": 51.3777, + "step": 9120 + }, + { + "epoch": 0.03688635528064739, + "grad_norm": 571.583984375, + "learning_rate": 7.377777777777778e-06, + "loss": 43.6872, + "step": 9130 + }, + { + "epoch": 0.03692675654601502, + "grad_norm": 452.5201110839844, + "learning_rate": 7.385858585858586e-06, + "loss": 45.5216, + "step": 9140 + }, + { + "epoch": 0.03696715781138265, + "grad_norm": 636.5348510742188, + "learning_rate": 7.393939393939395e-06, + "loss": 55.7327, + "step": 9150 + }, + { + "epoch": 0.03700755907675028, + "grad_norm": 746.4474487304688, + "learning_rate": 7.402020202020202e-06, + "loss": 88.9879, + "step": 9160 + }, + { + "epoch": 0.037047960342117915, + "grad_norm": 1030.46044921875, + "learning_rate": 7.410101010101011e-06, + "loss": 67.4612, + "step": 9170 + }, + { + "epoch": 0.03708836160748555, + "grad_norm": 632.0867919921875, + "learning_rate": 7.4181818181818185e-06, + "loss": 48.0889, + "step": 9180 + }, + { + "epoch": 0.037128762872853174, + "grad_norm": 709.0557250976562, + "learning_rate": 7.426262626262627e-06, + "loss": 56.7526, + "step": 9190 + }, + { + "epoch": 0.03716916413822081, + "grad_norm": 910.6807250976562, + "learning_rate": 7.434343434343435e-06, + "loss": 61.0249, + "step": 9200 + }, + { + "epoch": 0.03720956540358844, + "grad_norm": 1631.1051025390625, + "learning_rate": 7.442424242424243e-06, + "loss": 53.2277, + "step": 9210 + }, + { + "epoch": 0.037249966668956074, + "grad_norm": 637.2019653320312, + "learning_rate": 7.450505050505051e-06, + "loss": 48.1968, + "step": 9220 + }, + { + "epoch": 0.0372903679343237, + "grad_norm": 881.9324951171875, + "learning_rate": 7.45858585858586e-06, + "loss": 56.8194, + "step": 9230 + }, + { + "epoch": 0.03733076919969133, + "grad_norm": 986.95263671875, + "learning_rate": 7.4666666666666675e-06, + "loss": 53.6152, + "step": 9240 + }, + { + "epoch": 0.037371170465058966, + "grad_norm": 767.5629272460938, + "learning_rate": 7.474747474747476e-06, + "loss": 49.1764, + "step": 9250 + }, + { + "epoch": 0.0374115717304266, + "grad_norm": 2124.801513671875, + "learning_rate": 7.4828282828282835e-06, + "loss": 42.4815, + "step": 9260 + }, + { + "epoch": 0.037451972995794225, + "grad_norm": 409.00555419921875, + "learning_rate": 7.490909090909092e-06, + "loss": 60.798, + "step": 9270 + }, + { + "epoch": 0.03749237426116186, + "grad_norm": 271.09124755859375, + "learning_rate": 7.4989898989899e-06, + "loss": 50.9011, + "step": 9280 + }, + { + "epoch": 0.03753277552652949, + "grad_norm": 974.7747192382812, + "learning_rate": 7.507070707070707e-06, + "loss": 62.7403, + "step": 9290 + }, + { + "epoch": 0.037573176791897124, + "grad_norm": 697.45166015625, + "learning_rate": 7.515151515151516e-06, + "loss": 71.2546, + "step": 9300 + }, + { + "epoch": 0.03761357805726475, + "grad_norm": 536.0864868164062, + "learning_rate": 7.523232323232323e-06, + "loss": 46.562, + "step": 9310 + }, + { + "epoch": 0.03765397932263238, + "grad_norm": 801.9717407226562, + "learning_rate": 7.531313131313132e-06, + "loss": 68.1762, + "step": 9320 + }, + { + "epoch": 0.037694380588000016, + "grad_norm": 780.1590576171875, + "learning_rate": 7.53939393939394e-06, + "loss": 72.8615, + "step": 9330 + }, + { + "epoch": 0.03773478185336765, + "grad_norm": 597.0780029296875, + "learning_rate": 7.547474747474748e-06, + "loss": 51.8024, + "step": 9340 + }, + { + "epoch": 0.037775183118735275, + "grad_norm": 392.8757019042969, + "learning_rate": 7.555555555555556e-06, + "loss": 25.0151, + "step": 9350 + }, + { + "epoch": 0.03781558438410291, + "grad_norm": 908.3223266601562, + "learning_rate": 7.563636363636364e-06, + "loss": 85.6096, + "step": 9360 + }, + { + "epoch": 0.03785598564947054, + "grad_norm": 596.0296020507812, + "learning_rate": 7.571717171717172e-06, + "loss": 35.1797, + "step": 9370 + }, + { + "epoch": 0.037896386914838175, + "grad_norm": 587.8231201171875, + "learning_rate": 7.579797979797981e-06, + "loss": 60.454, + "step": 9380 + }, + { + "epoch": 0.0379367881802058, + "grad_norm": 616.9268798828125, + "learning_rate": 7.587878787878788e-06, + "loss": 50.2102, + "step": 9390 + }, + { + "epoch": 0.037977189445573434, + "grad_norm": 486.89788818359375, + "learning_rate": 7.595959595959597e-06, + "loss": 49.0462, + "step": 9400 + }, + { + "epoch": 0.03801759071094107, + "grad_norm": 655.9788208007812, + "learning_rate": 7.604040404040405e-06, + "loss": 51.1414, + "step": 9410 + }, + { + "epoch": 0.0380579919763087, + "grad_norm": 652.9426879882812, + "learning_rate": 7.612121212121213e-06, + "loss": 44.0585, + "step": 9420 + }, + { + "epoch": 0.038098393241676326, + "grad_norm": 315.17327880859375, + "learning_rate": 7.620202020202021e-06, + "loss": 46.1256, + "step": 9430 + }, + { + "epoch": 0.03813879450704396, + "grad_norm": 1290.96533203125, + "learning_rate": 7.628282828282829e-06, + "loss": 51.2887, + "step": 9440 + }, + { + "epoch": 0.03817919577241159, + "grad_norm": 945.685302734375, + "learning_rate": 7.636363636363638e-06, + "loss": 51.7154, + "step": 9450 + }, + { + "epoch": 0.038219597037779225, + "grad_norm": 572.252685546875, + "learning_rate": 7.644444444444445e-06, + "loss": 55.3707, + "step": 9460 + }, + { + "epoch": 0.03825999830314685, + "grad_norm": 1046.2398681640625, + "learning_rate": 7.652525252525253e-06, + "loss": 54.6761, + "step": 9470 + }, + { + "epoch": 0.038300399568514484, + "grad_norm": 964.0022583007812, + "learning_rate": 7.660606060606062e-06, + "loss": 65.9791, + "step": 9480 + }, + { + "epoch": 0.03834080083388212, + "grad_norm": 619.1194458007812, + "learning_rate": 7.66868686868687e-06, + "loss": 43.4894, + "step": 9490 + }, + { + "epoch": 0.03838120209924975, + "grad_norm": 337.8772277832031, + "learning_rate": 7.676767676767677e-06, + "loss": 58.457, + "step": 9500 + }, + { + "epoch": 0.03842160336461738, + "grad_norm": 542.2553100585938, + "learning_rate": 7.684848484848485e-06, + "loss": 43.3362, + "step": 9510 + }, + { + "epoch": 0.03846200462998501, + "grad_norm": 740.5171508789062, + "learning_rate": 7.692929292929294e-06, + "loss": 74.0348, + "step": 9520 + }, + { + "epoch": 0.03850240589535264, + "grad_norm": 974.9364624023438, + "learning_rate": 7.7010101010101e-06, + "loss": 44.931, + "step": 9530 + }, + { + "epoch": 0.038542807160720276, + "grad_norm": 519.744140625, + "learning_rate": 7.709090909090909e-06, + "loss": 33.287, + "step": 9540 + }, + { + "epoch": 0.0385832084260879, + "grad_norm": 451.38677978515625, + "learning_rate": 7.717171717171717e-06, + "loss": 40.3233, + "step": 9550 + }, + { + "epoch": 0.038623609691455535, + "grad_norm": 675.4090576171875, + "learning_rate": 7.725252525252526e-06, + "loss": 41.2818, + "step": 9560 + }, + { + "epoch": 0.03866401095682317, + "grad_norm": 4203.88623046875, + "learning_rate": 7.733333333333334e-06, + "loss": 79.4917, + "step": 9570 + }, + { + "epoch": 0.0387044122221908, + "grad_norm": 688.3527221679688, + "learning_rate": 7.741414141414141e-06, + "loss": 46.4714, + "step": 9580 + }, + { + "epoch": 0.03874481348755843, + "grad_norm": 985.7579345703125, + "learning_rate": 7.74949494949495e-06, + "loss": 49.4049, + "step": 9590 + }, + { + "epoch": 0.03878521475292606, + "grad_norm": 498.4870300292969, + "learning_rate": 7.757575757575758e-06, + "loss": 54.2156, + "step": 9600 + }, + { + "epoch": 0.03882561601829369, + "grad_norm": 474.2466125488281, + "learning_rate": 7.765656565656566e-06, + "loss": 43.1809, + "step": 9610 + }, + { + "epoch": 0.038866017283661326, + "grad_norm": 777.8629150390625, + "learning_rate": 7.773737373737375e-06, + "loss": 50.0144, + "step": 9620 + }, + { + "epoch": 0.03890641854902895, + "grad_norm": 911.9942016601562, + "learning_rate": 7.781818181818183e-06, + "loss": 71.0392, + "step": 9630 + }, + { + "epoch": 0.038946819814396585, + "grad_norm": 745.5965576171875, + "learning_rate": 7.78989898989899e-06, + "loss": 52.3377, + "step": 9640 + }, + { + "epoch": 0.03898722107976422, + "grad_norm": 655.3526611328125, + "learning_rate": 7.797979797979799e-06, + "loss": 56.1896, + "step": 9650 + }, + { + "epoch": 0.03902762234513185, + "grad_norm": 986.2277221679688, + "learning_rate": 7.806060606060607e-06, + "loss": 66.5987, + "step": 9660 + }, + { + "epoch": 0.03906802361049948, + "grad_norm": 2253.599365234375, + "learning_rate": 7.814141414141415e-06, + "loss": 54.8736, + "step": 9670 + }, + { + "epoch": 0.03910842487586711, + "grad_norm": 809.8319091796875, + "learning_rate": 7.822222222222224e-06, + "loss": 60.677, + "step": 9680 + }, + { + "epoch": 0.039148826141234744, + "grad_norm": 745.8842163085938, + "learning_rate": 7.83030303030303e-06, + "loss": 54.0499, + "step": 9690 + }, + { + "epoch": 0.03918922740660238, + "grad_norm": 392.3405456542969, + "learning_rate": 7.838383838383839e-06, + "loss": 57.1388, + "step": 9700 + }, + { + "epoch": 0.03922962867197, + "grad_norm": 391.06524658203125, + "learning_rate": 7.846464646464646e-06, + "loss": 67.0702, + "step": 9710 + }, + { + "epoch": 0.039270029937337636, + "grad_norm": 964.3352661132812, + "learning_rate": 7.854545454545454e-06, + "loss": 49.3004, + "step": 9720 + }, + { + "epoch": 0.03931043120270527, + "grad_norm": 1257.4691162109375, + "learning_rate": 7.862626262626263e-06, + "loss": 34.1305, + "step": 9730 + }, + { + "epoch": 0.0393508324680729, + "grad_norm": 408.74609375, + "learning_rate": 7.870707070707071e-06, + "loss": 52.1865, + "step": 9740 + }, + { + "epoch": 0.03939123373344053, + "grad_norm": 1199.1552734375, + "learning_rate": 7.87878787878788e-06, + "loss": 54.2215, + "step": 9750 + }, + { + "epoch": 0.03943163499880816, + "grad_norm": 1096.5802001953125, + "learning_rate": 7.886868686868686e-06, + "loss": 61.8289, + "step": 9760 + }, + { + "epoch": 0.039472036264175794, + "grad_norm": 1617.402587890625, + "learning_rate": 7.894949494949495e-06, + "loss": 55.392, + "step": 9770 + }, + { + "epoch": 0.03951243752954343, + "grad_norm": 603.028076171875, + "learning_rate": 7.903030303030303e-06, + "loss": 38.076, + "step": 9780 + }, + { + "epoch": 0.039552838794911054, + "grad_norm": 712.2247314453125, + "learning_rate": 7.911111111111112e-06, + "loss": 59.4955, + "step": 9790 + }, + { + "epoch": 0.03959324006027869, + "grad_norm": 474.9757995605469, + "learning_rate": 7.91919191919192e-06, + "loss": 41.3934, + "step": 9800 + }, + { + "epoch": 0.03963364132564632, + "grad_norm": 895.0010375976562, + "learning_rate": 7.927272727272729e-06, + "loss": 71.613, + "step": 9810 + }, + { + "epoch": 0.03967404259101395, + "grad_norm": 343.2088928222656, + "learning_rate": 7.935353535353535e-06, + "loss": 56.6161, + "step": 9820 + }, + { + "epoch": 0.03971444385638158, + "grad_norm": 317.080078125, + "learning_rate": 7.943434343434344e-06, + "loss": 58.7523, + "step": 9830 + }, + { + "epoch": 0.03975484512174921, + "grad_norm": 1206.803955078125, + "learning_rate": 7.951515151515152e-06, + "loss": 60.3558, + "step": 9840 + }, + { + "epoch": 0.039795246387116845, + "grad_norm": 568.8405151367188, + "learning_rate": 7.95959595959596e-06, + "loss": 50.1119, + "step": 9850 + }, + { + "epoch": 0.03983564765248448, + "grad_norm": 1495.424560546875, + "learning_rate": 7.96767676767677e-06, + "loss": 63.5327, + "step": 9860 + }, + { + "epoch": 0.039876048917852104, + "grad_norm": 889.3040771484375, + "learning_rate": 7.975757575757576e-06, + "loss": 59.4066, + "step": 9870 + }, + { + "epoch": 0.03991645018321974, + "grad_norm": 515.6483154296875, + "learning_rate": 7.983838383838384e-06, + "loss": 44.5552, + "step": 9880 + }, + { + "epoch": 0.03995685144858737, + "grad_norm": 422.6609802246094, + "learning_rate": 7.991919191919193e-06, + "loss": 34.7969, + "step": 9890 + }, + { + "epoch": 0.039997252713955, + "grad_norm": 504.5275573730469, + "learning_rate": 8.000000000000001e-06, + "loss": 42.1655, + "step": 9900 + }, + { + "epoch": 0.04003765397932263, + "grad_norm": 388.7497253417969, + "learning_rate": 8.00808080808081e-06, + "loss": 64.0802, + "step": 9910 + }, + { + "epoch": 0.04007805524469026, + "grad_norm": 692.829345703125, + "learning_rate": 8.016161616161617e-06, + "loss": 56.8257, + "step": 9920 + }, + { + "epoch": 0.040118456510057895, + "grad_norm": 646.9595336914062, + "learning_rate": 8.024242424242425e-06, + "loss": 68.7429, + "step": 9930 + }, + { + "epoch": 0.04015885777542553, + "grad_norm": 870.9974365234375, + "learning_rate": 8.032323232323232e-06, + "loss": 28.9334, + "step": 9940 + }, + { + "epoch": 0.040199259040793155, + "grad_norm": 696.292236328125, + "learning_rate": 8.04040404040404e-06, + "loss": 38.459, + "step": 9950 + }, + { + "epoch": 0.04023966030616079, + "grad_norm": 849.521728515625, + "learning_rate": 8.048484848484849e-06, + "loss": 53.8687, + "step": 9960 + }, + { + "epoch": 0.04028006157152842, + "grad_norm": 874.6473388671875, + "learning_rate": 8.056565656565657e-06, + "loss": 77.5796, + "step": 9970 + }, + { + "epoch": 0.040320462836896054, + "grad_norm": 419.1984558105469, + "learning_rate": 8.064646464646466e-06, + "loss": 62.392, + "step": 9980 + }, + { + "epoch": 0.04036086410226368, + "grad_norm": 518.1126098632812, + "learning_rate": 8.072727272727274e-06, + "loss": 50.2856, + "step": 9990 + }, + { + "epoch": 0.04040126536763131, + "grad_norm": 429.58343505859375, + "learning_rate": 8.08080808080808e-06, + "loss": 44.5308, + "step": 10000 + }, + { + "epoch": 0.040441666632998946, + "grad_norm": 321.3651123046875, + "learning_rate": 8.08888888888889e-06, + "loss": 43.4247, + "step": 10010 + }, + { + "epoch": 0.04048206789836658, + "grad_norm": 651.4296264648438, + "learning_rate": 8.096969696969698e-06, + "loss": 46.7586, + "step": 10020 + }, + { + "epoch": 0.040522469163734205, + "grad_norm": 424.0990905761719, + "learning_rate": 8.105050505050506e-06, + "loss": 66.6896, + "step": 10030 + }, + { + "epoch": 0.04056287042910184, + "grad_norm": 675.9149780273438, + "learning_rate": 8.113131313131315e-06, + "loss": 43.7758, + "step": 10040 + }, + { + "epoch": 0.04060327169446947, + "grad_norm": 139.72412109375, + "learning_rate": 8.121212121212121e-06, + "loss": 55.9011, + "step": 10050 + }, + { + "epoch": 0.040643672959837104, + "grad_norm": 748.906005859375, + "learning_rate": 8.12929292929293e-06, + "loss": 43.4391, + "step": 10060 + }, + { + "epoch": 0.04068407422520473, + "grad_norm": 669.5646362304688, + "learning_rate": 8.137373737373738e-06, + "loss": 58.3727, + "step": 10070 + }, + { + "epoch": 0.040724475490572364, + "grad_norm": 742.7377319335938, + "learning_rate": 8.145454545454547e-06, + "loss": 65.0417, + "step": 10080 + }, + { + "epoch": 0.04076487675594, + "grad_norm": 404.1474304199219, + "learning_rate": 8.153535353535355e-06, + "loss": 68.0475, + "step": 10090 + }, + { + "epoch": 0.04080527802130763, + "grad_norm": 676.8277587890625, + "learning_rate": 8.161616161616162e-06, + "loss": 50.2154, + "step": 10100 + }, + { + "epoch": 0.040845679286675256, + "grad_norm": 904.6346435546875, + "learning_rate": 8.16969696969697e-06, + "loss": 61.044, + "step": 10110 + }, + { + "epoch": 0.04088608055204289, + "grad_norm": 542.9082641601562, + "learning_rate": 8.177777777777779e-06, + "loss": 40.1249, + "step": 10120 + }, + { + "epoch": 0.04092648181741052, + "grad_norm": 664.3563842773438, + "learning_rate": 8.185858585858587e-06, + "loss": 80.5697, + "step": 10130 + }, + { + "epoch": 0.040966883082778155, + "grad_norm": 893.8529663085938, + "learning_rate": 8.193939393939394e-06, + "loss": 48.2468, + "step": 10140 + }, + { + "epoch": 0.04100728434814578, + "grad_norm": 582.698486328125, + "learning_rate": 8.202020202020202e-06, + "loss": 53.0039, + "step": 10150 + }, + { + "epoch": 0.041047685613513414, + "grad_norm": 1301.6468505859375, + "learning_rate": 8.21010101010101e-06, + "loss": 62.4941, + "step": 10160 + }, + { + "epoch": 0.04108808687888105, + "grad_norm": 643.629150390625, + "learning_rate": 8.21818181818182e-06, + "loss": 56.7416, + "step": 10170 + }, + { + "epoch": 0.04112848814424868, + "grad_norm": 599.5493774414062, + "learning_rate": 8.226262626262626e-06, + "loss": 71.8773, + "step": 10180 + }, + { + "epoch": 0.041168889409616306, + "grad_norm": 1008.1700439453125, + "learning_rate": 8.234343434343434e-06, + "loss": 59.6597, + "step": 10190 + }, + { + "epoch": 0.04120929067498394, + "grad_norm": 476.3126525878906, + "learning_rate": 8.242424242424243e-06, + "loss": 37.4634, + "step": 10200 + }, + { + "epoch": 0.04124969194035157, + "grad_norm": 644.0550537109375, + "learning_rate": 8.250505050505051e-06, + "loss": 55.9344, + "step": 10210 + }, + { + "epoch": 0.041290093205719205, + "grad_norm": 624.4482421875, + "learning_rate": 8.25858585858586e-06, + "loss": 65.2902, + "step": 10220 + }, + { + "epoch": 0.04133049447108683, + "grad_norm": 636.2765502929688, + "learning_rate": 8.266666666666667e-06, + "loss": 37.1124, + "step": 10230 + }, + { + "epoch": 0.041370895736454465, + "grad_norm": 1039.8856201171875, + "learning_rate": 8.274747474747475e-06, + "loss": 55.8901, + "step": 10240 + }, + { + "epoch": 0.0414112970018221, + "grad_norm": 1239.593505859375, + "learning_rate": 8.282828282828283e-06, + "loss": 79.6421, + "step": 10250 + }, + { + "epoch": 0.04145169826718973, + "grad_norm": 909.7822875976562, + "learning_rate": 8.290909090909092e-06, + "loss": 63.3099, + "step": 10260 + }, + { + "epoch": 0.04149209953255736, + "grad_norm": 340.58721923828125, + "learning_rate": 8.2989898989899e-06, + "loss": 55.8349, + "step": 10270 + }, + { + "epoch": 0.04153250079792499, + "grad_norm": 674.6702270507812, + "learning_rate": 8.307070707070707e-06, + "loss": 87.4816, + "step": 10280 + }, + { + "epoch": 0.04157290206329262, + "grad_norm": 802.2882690429688, + "learning_rate": 8.315151515151516e-06, + "loss": 40.7146, + "step": 10290 + }, + { + "epoch": 0.041613303328660256, + "grad_norm": 707.7451171875, + "learning_rate": 8.323232323232324e-06, + "loss": 63.5479, + "step": 10300 + }, + { + "epoch": 0.04165370459402788, + "grad_norm": 777.0239868164062, + "learning_rate": 8.331313131313132e-06, + "loss": 43.5335, + "step": 10310 + }, + { + "epoch": 0.041694105859395515, + "grad_norm": 514.3717041015625, + "learning_rate": 8.339393939393941e-06, + "loss": 83.1662, + "step": 10320 + }, + { + "epoch": 0.04173450712476315, + "grad_norm": 292.78375244140625, + "learning_rate": 8.34747474747475e-06, + "loss": 37.0555, + "step": 10330 + }, + { + "epoch": 0.04177490839013078, + "grad_norm": 664.9932861328125, + "learning_rate": 8.355555555555556e-06, + "loss": 38.5216, + "step": 10340 + }, + { + "epoch": 0.04181530965549841, + "grad_norm": 735.2276611328125, + "learning_rate": 8.363636363636365e-06, + "loss": 54.7254, + "step": 10350 + }, + { + "epoch": 0.04185571092086604, + "grad_norm": 1044.8675537109375, + "learning_rate": 8.371717171717171e-06, + "loss": 52.2735, + "step": 10360 + }, + { + "epoch": 0.041896112186233674, + "grad_norm": 1418.629150390625, + "learning_rate": 8.37979797979798e-06, + "loss": 85.6306, + "step": 10370 + }, + { + "epoch": 0.04193651345160131, + "grad_norm": 560.5042114257812, + "learning_rate": 8.387878787878788e-06, + "loss": 71.9104, + "step": 10380 + }, + { + "epoch": 0.04197691471696893, + "grad_norm": 305.0915222167969, + "learning_rate": 8.395959595959597e-06, + "loss": 54.2431, + "step": 10390 + }, + { + "epoch": 0.042017315982336566, + "grad_norm": 502.9292297363281, + "learning_rate": 8.404040404040405e-06, + "loss": 55.7189, + "step": 10400 + }, + { + "epoch": 0.0420577172477042, + "grad_norm": 408.15557861328125, + "learning_rate": 8.412121212121212e-06, + "loss": 55.843, + "step": 10410 + }, + { + "epoch": 0.04209811851307183, + "grad_norm": 480.6004638671875, + "learning_rate": 8.42020202020202e-06, + "loss": 48.1143, + "step": 10420 + }, + { + "epoch": 0.04213851977843946, + "grad_norm": 990.3383178710938, + "learning_rate": 8.428282828282829e-06, + "loss": 55.1379, + "step": 10430 + }, + { + "epoch": 0.04217892104380709, + "grad_norm": 629.5912475585938, + "learning_rate": 8.436363636363637e-06, + "loss": 61.4927, + "step": 10440 + }, + { + "epoch": 0.042219322309174724, + "grad_norm": 1151.8690185546875, + "learning_rate": 8.444444444444446e-06, + "loss": 73.6629, + "step": 10450 + }, + { + "epoch": 0.04225972357454236, + "grad_norm": 1047.275634765625, + "learning_rate": 8.452525252525252e-06, + "loss": 50.8134, + "step": 10460 + }, + { + "epoch": 0.04230012483990998, + "grad_norm": 480.5417175292969, + "learning_rate": 8.460606060606061e-06, + "loss": 42.938, + "step": 10470 + }, + { + "epoch": 0.042340526105277616, + "grad_norm": 547.515869140625, + "learning_rate": 8.46868686868687e-06, + "loss": 39.9703, + "step": 10480 + }, + { + "epoch": 0.04238092737064525, + "grad_norm": 678.3375244140625, + "learning_rate": 8.476767676767678e-06, + "loss": 62.9904, + "step": 10490 + }, + { + "epoch": 0.04242132863601288, + "grad_norm": 849.853759765625, + "learning_rate": 8.484848484848486e-06, + "loss": 53.3405, + "step": 10500 + }, + { + "epoch": 0.04246172990138051, + "grad_norm": 493.50836181640625, + "learning_rate": 8.492929292929295e-06, + "loss": 52.3254, + "step": 10510 + }, + { + "epoch": 0.04250213116674814, + "grad_norm": 891.0341796875, + "learning_rate": 8.501010101010101e-06, + "loss": 53.4351, + "step": 10520 + }, + { + "epoch": 0.042542532432115775, + "grad_norm": 343.7546691894531, + "learning_rate": 8.50909090909091e-06, + "loss": 51.0014, + "step": 10530 + }, + { + "epoch": 0.04258293369748341, + "grad_norm": 500.97979736328125, + "learning_rate": 8.517171717171718e-06, + "loss": 50.3782, + "step": 10540 + }, + { + "epoch": 0.042623334962851034, + "grad_norm": 294.78076171875, + "learning_rate": 8.525252525252527e-06, + "loss": 33.7601, + "step": 10550 + }, + { + "epoch": 0.04266373622821867, + "grad_norm": 993.7897338867188, + "learning_rate": 8.533333333333335e-06, + "loss": 44.46, + "step": 10560 + }, + { + "epoch": 0.0427041374935863, + "grad_norm": 837.2728881835938, + "learning_rate": 8.541414141414142e-06, + "loss": 52.4295, + "step": 10570 + }, + { + "epoch": 0.04274453875895393, + "grad_norm": 1006.4978637695312, + "learning_rate": 8.54949494949495e-06, + "loss": 46.5704, + "step": 10580 + }, + { + "epoch": 0.04278494002432156, + "grad_norm": 1683.9364013671875, + "learning_rate": 8.557575757575757e-06, + "loss": 61.555, + "step": 10590 + }, + { + "epoch": 0.04282534128968919, + "grad_norm": 664.7928466796875, + "learning_rate": 8.565656565656566e-06, + "loss": 45.5131, + "step": 10600 + }, + { + "epoch": 0.042865742555056825, + "grad_norm": 706.4365234375, + "learning_rate": 8.573737373737374e-06, + "loss": 71.1823, + "step": 10610 + }, + { + "epoch": 0.04290614382042446, + "grad_norm": 1057.157470703125, + "learning_rate": 8.581818181818183e-06, + "loss": 66.9543, + "step": 10620 + }, + { + "epoch": 0.042946545085792084, + "grad_norm": 763.0455322265625, + "learning_rate": 8.589898989898991e-06, + "loss": 49.7286, + "step": 10630 + }, + { + "epoch": 0.04298694635115972, + "grad_norm": 917.1838989257812, + "learning_rate": 8.597979797979798e-06, + "loss": 61.1907, + "step": 10640 + }, + { + "epoch": 0.04302734761652735, + "grad_norm": 821.6635131835938, + "learning_rate": 8.606060606060606e-06, + "loss": 40.5262, + "step": 10650 + }, + { + "epoch": 0.043067748881894984, + "grad_norm": 557.9569091796875, + "learning_rate": 8.614141414141415e-06, + "loss": 62.2163, + "step": 10660 + }, + { + "epoch": 0.04310815014726261, + "grad_norm": 374.8048095703125, + "learning_rate": 8.622222222222223e-06, + "loss": 52.1193, + "step": 10670 + }, + { + "epoch": 0.04314855141263024, + "grad_norm": 622.052978515625, + "learning_rate": 8.630303030303032e-06, + "loss": 39.8787, + "step": 10680 + }, + { + "epoch": 0.043188952677997876, + "grad_norm": 496.65679931640625, + "learning_rate": 8.63838383838384e-06, + "loss": 41.2797, + "step": 10690 + }, + { + "epoch": 0.04322935394336551, + "grad_norm": 773.8624877929688, + "learning_rate": 8.646464646464647e-06, + "loss": 47.26, + "step": 10700 + }, + { + "epoch": 0.043269755208733135, + "grad_norm": 397.5426025390625, + "learning_rate": 8.654545454545455e-06, + "loss": 55.0543, + "step": 10710 + }, + { + "epoch": 0.04331015647410077, + "grad_norm": 890.8135375976562, + "learning_rate": 8.662626262626264e-06, + "loss": 59.0316, + "step": 10720 + }, + { + "epoch": 0.0433505577394684, + "grad_norm": 1218.11376953125, + "learning_rate": 8.670707070707072e-06, + "loss": 59.9817, + "step": 10730 + }, + { + "epoch": 0.043390959004836034, + "grad_norm": 981.9630737304688, + "learning_rate": 8.67878787878788e-06, + "loss": 50.446, + "step": 10740 + }, + { + "epoch": 0.04343136027020366, + "grad_norm": 421.39764404296875, + "learning_rate": 8.686868686868687e-06, + "loss": 59.1779, + "step": 10750 + }, + { + "epoch": 0.04347176153557129, + "grad_norm": 994.2972412109375, + "learning_rate": 8.694949494949496e-06, + "loss": 71.2787, + "step": 10760 + }, + { + "epoch": 0.043512162800938926, + "grad_norm": 926.8892211914062, + "learning_rate": 8.703030303030304e-06, + "loss": 67.1323, + "step": 10770 + }, + { + "epoch": 0.04355256406630656, + "grad_norm": 652.7195434570312, + "learning_rate": 8.711111111111111e-06, + "loss": 45.3532, + "step": 10780 + }, + { + "epoch": 0.043592965331674186, + "grad_norm": 632.1166381835938, + "learning_rate": 8.71919191919192e-06, + "loss": 57.5382, + "step": 10790 + }, + { + "epoch": 0.04363336659704182, + "grad_norm": 785.2573852539062, + "learning_rate": 8.727272727272728e-06, + "loss": 64.4019, + "step": 10800 + }, + { + "epoch": 0.04367376786240945, + "grad_norm": 859.1141357421875, + "learning_rate": 8.735353535353536e-06, + "loss": 77.0335, + "step": 10810 + }, + { + "epoch": 0.043714169127777085, + "grad_norm": 894.4269409179688, + "learning_rate": 8.743434343434343e-06, + "loss": 49.1961, + "step": 10820 + }, + { + "epoch": 0.04375457039314471, + "grad_norm": 564.3938598632812, + "learning_rate": 8.751515151515151e-06, + "loss": 51.9728, + "step": 10830 + }, + { + "epoch": 0.043794971658512344, + "grad_norm": 1009.34521484375, + "learning_rate": 8.75959595959596e-06, + "loss": 65.0657, + "step": 10840 + }, + { + "epoch": 0.04383537292387998, + "grad_norm": 631.89111328125, + "learning_rate": 8.767676767676768e-06, + "loss": 61.9598, + "step": 10850 + }, + { + "epoch": 0.04387577418924761, + "grad_norm": 716.18994140625, + "learning_rate": 8.775757575757577e-06, + "loss": 49.4842, + "step": 10860 + }, + { + "epoch": 0.043916175454615236, + "grad_norm": 1062.8853759765625, + "learning_rate": 8.783838383838385e-06, + "loss": 71.7064, + "step": 10870 + }, + { + "epoch": 0.04395657671998287, + "grad_norm": 729.2897338867188, + "learning_rate": 8.791919191919192e-06, + "loss": 63.324, + "step": 10880 + }, + { + "epoch": 0.0439969779853505, + "grad_norm": 528.548828125, + "learning_rate": 8.8e-06, + "loss": 66.7493, + "step": 10890 + }, + { + "epoch": 0.044037379250718135, + "grad_norm": 838.0880737304688, + "learning_rate": 8.808080808080809e-06, + "loss": 54.1123, + "step": 10900 + }, + { + "epoch": 0.04407778051608576, + "grad_norm": 623.3431396484375, + "learning_rate": 8.816161616161617e-06, + "loss": 43.8357, + "step": 10910 + }, + { + "epoch": 0.044118181781453394, + "grad_norm": 415.62939453125, + "learning_rate": 8.824242424242426e-06, + "loss": 53.8524, + "step": 10920 + }, + { + "epoch": 0.04415858304682103, + "grad_norm": 618.20654296875, + "learning_rate": 8.832323232323233e-06, + "loss": 47.9192, + "step": 10930 + }, + { + "epoch": 0.04419898431218866, + "grad_norm": 708.5919799804688, + "learning_rate": 8.840404040404041e-06, + "loss": 50.4365, + "step": 10940 + }, + { + "epoch": 0.04423938557755629, + "grad_norm": 791.4614868164062, + "learning_rate": 8.84848484848485e-06, + "loss": 35.4549, + "step": 10950 + }, + { + "epoch": 0.04427978684292392, + "grad_norm": 508.3524169921875, + "learning_rate": 8.856565656565658e-06, + "loss": 36.9709, + "step": 10960 + }, + { + "epoch": 0.04432018810829155, + "grad_norm": 152.70664978027344, + "learning_rate": 8.864646464646466e-06, + "loss": 41.8174, + "step": 10970 + }, + { + "epoch": 0.044360589373659186, + "grad_norm": 322.36065673828125, + "learning_rate": 8.872727272727275e-06, + "loss": 51.6953, + "step": 10980 + }, + { + "epoch": 0.04440099063902681, + "grad_norm": 553.2557983398438, + "learning_rate": 8.880808080808082e-06, + "loss": 75.7669, + "step": 10990 + }, + { + "epoch": 0.044441391904394445, + "grad_norm": 490.9820251464844, + "learning_rate": 8.888888888888888e-06, + "loss": 50.3666, + "step": 11000 + }, + { + "epoch": 0.04448179316976208, + "grad_norm": 680.9080810546875, + "learning_rate": 8.896969696969697e-06, + "loss": 50.7332, + "step": 11010 + }, + { + "epoch": 0.04452219443512971, + "grad_norm": 994.4353637695312, + "learning_rate": 8.905050505050505e-06, + "loss": 54.6636, + "step": 11020 + }, + { + "epoch": 0.04456259570049734, + "grad_norm": 879.1863403320312, + "learning_rate": 8.913131313131314e-06, + "loss": 55.7514, + "step": 11030 + }, + { + "epoch": 0.04460299696586497, + "grad_norm": 453.1380310058594, + "learning_rate": 8.921212121212122e-06, + "loss": 76.496, + "step": 11040 + }, + { + "epoch": 0.0446433982312326, + "grad_norm": 364.94403076171875, + "learning_rate": 8.92929292929293e-06, + "loss": 49.3788, + "step": 11050 + }, + { + "epoch": 0.044683799496600236, + "grad_norm": 522.0982055664062, + "learning_rate": 8.937373737373737e-06, + "loss": 35.623, + "step": 11060 + }, + { + "epoch": 0.04472420076196786, + "grad_norm": 362.5142822265625, + "learning_rate": 8.945454545454546e-06, + "loss": 43.0607, + "step": 11070 + }, + { + "epoch": 0.044764602027335496, + "grad_norm": 925.5531616210938, + "learning_rate": 8.953535353535354e-06, + "loss": 57.3, + "step": 11080 + }, + { + "epoch": 0.04480500329270313, + "grad_norm": 797.6778564453125, + "learning_rate": 8.961616161616163e-06, + "loss": 44.5526, + "step": 11090 + }, + { + "epoch": 0.04484540455807076, + "grad_norm": 772.5079345703125, + "learning_rate": 8.969696969696971e-06, + "loss": 58.54, + "step": 11100 + }, + { + "epoch": 0.04488580582343839, + "grad_norm": 803.4046630859375, + "learning_rate": 8.977777777777778e-06, + "loss": 40.9027, + "step": 11110 + }, + { + "epoch": 0.04492620708880602, + "grad_norm": 814.4190063476562, + "learning_rate": 8.985858585858586e-06, + "loss": 54.8803, + "step": 11120 + }, + { + "epoch": 0.044966608354173654, + "grad_norm": 554.0245971679688, + "learning_rate": 8.993939393939395e-06, + "loss": 30.8268, + "step": 11130 + }, + { + "epoch": 0.04500700961954129, + "grad_norm": 861.0400390625, + "learning_rate": 9.002020202020203e-06, + "loss": 95.88, + "step": 11140 + }, + { + "epoch": 0.04504741088490891, + "grad_norm": 626.3646850585938, + "learning_rate": 9.010101010101012e-06, + "loss": 51.77, + "step": 11150 + }, + { + "epoch": 0.045087812150276546, + "grad_norm": 1681.829345703125, + "learning_rate": 9.01818181818182e-06, + "loss": 88.4558, + "step": 11160 + }, + { + "epoch": 0.04512821341564418, + "grad_norm": 1307.4827880859375, + "learning_rate": 9.026262626262627e-06, + "loss": 49.2158, + "step": 11170 + }, + { + "epoch": 0.04516861468101181, + "grad_norm": 690.13623046875, + "learning_rate": 9.034343434343435e-06, + "loss": 46.9865, + "step": 11180 + }, + { + "epoch": 0.04520901594637944, + "grad_norm": 1026.7313232421875, + "learning_rate": 9.042424242424244e-06, + "loss": 41.4207, + "step": 11190 + }, + { + "epoch": 0.04524941721174707, + "grad_norm": 1039.1217041015625, + "learning_rate": 9.050505050505052e-06, + "loss": 65.3547, + "step": 11200 + }, + { + "epoch": 0.045289818477114704, + "grad_norm": 605.8031616210938, + "learning_rate": 9.058585858585859e-06, + "loss": 51.7349, + "step": 11210 + }, + { + "epoch": 0.04533021974248234, + "grad_norm": 567.4804077148438, + "learning_rate": 9.066666666666667e-06, + "loss": 68.7462, + "step": 11220 + }, + { + "epoch": 0.045370621007849964, + "grad_norm": 801.2859497070312, + "learning_rate": 9.074747474747476e-06, + "loss": 38.204, + "step": 11230 + }, + { + "epoch": 0.0454110222732176, + "grad_norm": 715.8931274414062, + "learning_rate": 9.082828282828283e-06, + "loss": 78.7292, + "step": 11240 + }, + { + "epoch": 0.04545142353858523, + "grad_norm": 383.57635498046875, + "learning_rate": 9.090909090909091e-06, + "loss": 52.5061, + "step": 11250 + }, + { + "epoch": 0.04549182480395286, + "grad_norm": 658.3374633789062, + "learning_rate": 9.0989898989899e-06, + "loss": 60.3061, + "step": 11260 + }, + { + "epoch": 0.04553222606932049, + "grad_norm": 825.5767822265625, + "learning_rate": 9.107070707070708e-06, + "loss": 61.6082, + "step": 11270 + }, + { + "epoch": 0.04557262733468812, + "grad_norm": 673.5419921875, + "learning_rate": 9.115151515151516e-06, + "loss": 52.7299, + "step": 11280 + }, + { + "epoch": 0.045613028600055755, + "grad_norm": 2270.1708984375, + "learning_rate": 9.123232323232323e-06, + "loss": 87.2664, + "step": 11290 + }, + { + "epoch": 0.04565342986542339, + "grad_norm": 682.08544921875, + "learning_rate": 9.131313131313132e-06, + "loss": 49.0, + "step": 11300 + }, + { + "epoch": 0.045693831130791014, + "grad_norm": 435.1982727050781, + "learning_rate": 9.13939393939394e-06, + "loss": 46.8469, + "step": 11310 + }, + { + "epoch": 0.04573423239615865, + "grad_norm": 513.4351806640625, + "learning_rate": 9.147474747474748e-06, + "loss": 59.5729, + "step": 11320 + }, + { + "epoch": 0.04577463366152628, + "grad_norm": 701.8012084960938, + "learning_rate": 9.155555555555557e-06, + "loss": 50.215, + "step": 11330 + }, + { + "epoch": 0.04581503492689391, + "grad_norm": 1011.1109619140625, + "learning_rate": 9.163636363636365e-06, + "loss": 61.7965, + "step": 11340 + }, + { + "epoch": 0.04585543619226154, + "grad_norm": 1000.3032836914062, + "learning_rate": 9.171717171717172e-06, + "loss": 66.5579, + "step": 11350 + }, + { + "epoch": 0.04589583745762917, + "grad_norm": 892.2225341796875, + "learning_rate": 9.17979797979798e-06, + "loss": 77.5938, + "step": 11360 + }, + { + "epoch": 0.045936238722996806, + "grad_norm": 473.8362121582031, + "learning_rate": 9.187878787878789e-06, + "loss": 43.2337, + "step": 11370 + }, + { + "epoch": 0.04597663998836444, + "grad_norm": 602.120361328125, + "learning_rate": 9.195959595959597e-06, + "loss": 47.1484, + "step": 11380 + }, + { + "epoch": 0.046017041253732065, + "grad_norm": 481.7225341796875, + "learning_rate": 9.204040404040406e-06, + "loss": 44.948, + "step": 11390 + }, + { + "epoch": 0.0460574425190997, + "grad_norm": 617.6996459960938, + "learning_rate": 9.212121212121213e-06, + "loss": 45.0998, + "step": 11400 + }, + { + "epoch": 0.04609784378446733, + "grad_norm": 921.5098876953125, + "learning_rate": 9.220202020202021e-06, + "loss": 49.7482, + "step": 11410 + }, + { + "epoch": 0.046138245049834964, + "grad_norm": 1062.4364013671875, + "learning_rate": 9.228282828282828e-06, + "loss": 60.8331, + "step": 11420 + }, + { + "epoch": 0.04617864631520259, + "grad_norm": 991.2988891601562, + "learning_rate": 9.236363636363636e-06, + "loss": 62.9908, + "step": 11430 + }, + { + "epoch": 0.04621904758057022, + "grad_norm": 694.0717163085938, + "learning_rate": 9.244444444444445e-06, + "loss": 55.5939, + "step": 11440 + }, + { + "epoch": 0.046259448845937856, + "grad_norm": 468.04827880859375, + "learning_rate": 9.252525252525253e-06, + "loss": 42.1948, + "step": 11450 + }, + { + "epoch": 0.04629985011130549, + "grad_norm": 618.5086669921875, + "learning_rate": 9.260606060606062e-06, + "loss": 57.7817, + "step": 11460 + }, + { + "epoch": 0.046340251376673115, + "grad_norm": 668.7138061523438, + "learning_rate": 9.268686868686868e-06, + "loss": 47.0043, + "step": 11470 + }, + { + "epoch": 0.04638065264204075, + "grad_norm": 611.4515380859375, + "learning_rate": 9.276767676767677e-06, + "loss": 65.1178, + "step": 11480 + }, + { + "epoch": 0.04642105390740838, + "grad_norm": 465.3556823730469, + "learning_rate": 9.284848484848485e-06, + "loss": 49.5862, + "step": 11490 + }, + { + "epoch": 0.046461455172776014, + "grad_norm": 646.4593505859375, + "learning_rate": 9.292929292929294e-06, + "loss": 47.2889, + "step": 11500 + }, + { + "epoch": 0.04650185643814364, + "grad_norm": 465.8161926269531, + "learning_rate": 9.301010101010102e-06, + "loss": 49.2758, + "step": 11510 + }, + { + "epoch": 0.046542257703511274, + "grad_norm": 124.87579345703125, + "learning_rate": 9.30909090909091e-06, + "loss": 51.9798, + "step": 11520 + }, + { + "epoch": 0.04658265896887891, + "grad_norm": 455.3066711425781, + "learning_rate": 9.317171717171717e-06, + "loss": 48.4502, + "step": 11530 + }, + { + "epoch": 0.04662306023424654, + "grad_norm": 561.8794555664062, + "learning_rate": 9.325252525252526e-06, + "loss": 60.7198, + "step": 11540 + }, + { + "epoch": 0.046663461499614166, + "grad_norm": 1191.8280029296875, + "learning_rate": 9.333333333333334e-06, + "loss": 51.1963, + "step": 11550 + }, + { + "epoch": 0.0467038627649818, + "grad_norm": 757.8594360351562, + "learning_rate": 9.341414141414143e-06, + "loss": 67.2604, + "step": 11560 + }, + { + "epoch": 0.04674426403034943, + "grad_norm": 581.9176025390625, + "learning_rate": 9.349494949494951e-06, + "loss": 48.4778, + "step": 11570 + }, + { + "epoch": 0.046784665295717065, + "grad_norm": 326.5763244628906, + "learning_rate": 9.357575757575758e-06, + "loss": 35.4996, + "step": 11580 + }, + { + "epoch": 0.04682506656108469, + "grad_norm": 985.1808471679688, + "learning_rate": 9.365656565656566e-06, + "loss": 55.5719, + "step": 11590 + }, + { + "epoch": 0.046865467826452324, + "grad_norm": 626.5885009765625, + "learning_rate": 9.373737373737375e-06, + "loss": 37.6512, + "step": 11600 + }, + { + "epoch": 0.04690586909181996, + "grad_norm": 634.4691162109375, + "learning_rate": 9.381818181818183e-06, + "loss": 34.5778, + "step": 11610 + }, + { + "epoch": 0.04694627035718759, + "grad_norm": 934.1278686523438, + "learning_rate": 9.389898989898992e-06, + "loss": 46.384, + "step": 11620 + }, + { + "epoch": 0.046986671622555216, + "grad_norm": 784.7304077148438, + "learning_rate": 9.397979797979799e-06, + "loss": 68.3392, + "step": 11630 + }, + { + "epoch": 0.04702707288792285, + "grad_norm": 336.8243408203125, + "learning_rate": 9.406060606060607e-06, + "loss": 52.2806, + "step": 11640 + }, + { + "epoch": 0.04706747415329048, + "grad_norm": 854.4815063476562, + "learning_rate": 9.414141414141414e-06, + "loss": 56.4582, + "step": 11650 + }, + { + "epoch": 0.047107875418658116, + "grad_norm": 478.9867858886719, + "learning_rate": 9.422222222222222e-06, + "loss": 38.2133, + "step": 11660 + }, + { + "epoch": 0.04714827668402574, + "grad_norm": 349.7056884765625, + "learning_rate": 9.43030303030303e-06, + "loss": 42.0987, + "step": 11670 + }, + { + "epoch": 0.047188677949393375, + "grad_norm": 771.971923828125, + "learning_rate": 9.438383838383839e-06, + "loss": 92.2668, + "step": 11680 + }, + { + "epoch": 0.04722907921476101, + "grad_norm": 1389.6817626953125, + "learning_rate": 9.446464646464648e-06, + "loss": 93.1496, + "step": 11690 + }, + { + "epoch": 0.04726948048012864, + "grad_norm": 484.66351318359375, + "learning_rate": 9.454545454545456e-06, + "loss": 44.8321, + "step": 11700 + }, + { + "epoch": 0.04730988174549627, + "grad_norm": 1661.9954833984375, + "learning_rate": 9.462626262626263e-06, + "loss": 38.4946, + "step": 11710 + }, + { + "epoch": 0.0473502830108639, + "grad_norm": 616.4440307617188, + "learning_rate": 9.470707070707071e-06, + "loss": 51.6212, + "step": 11720 + }, + { + "epoch": 0.04739068427623153, + "grad_norm": 665.550048828125, + "learning_rate": 9.47878787878788e-06, + "loss": 63.4329, + "step": 11730 + }, + { + "epoch": 0.047431085541599166, + "grad_norm": 837.4865112304688, + "learning_rate": 9.486868686868688e-06, + "loss": 52.9131, + "step": 11740 + }, + { + "epoch": 0.04747148680696679, + "grad_norm": 695.9431762695312, + "learning_rate": 9.494949494949497e-06, + "loss": 49.8683, + "step": 11750 + }, + { + "epoch": 0.047511888072334425, + "grad_norm": 584.3406982421875, + "learning_rate": 9.503030303030303e-06, + "loss": 32.603, + "step": 11760 + }, + { + "epoch": 0.04755228933770206, + "grad_norm": 1272.3975830078125, + "learning_rate": 9.511111111111112e-06, + "loss": 56.4731, + "step": 11770 + }, + { + "epoch": 0.04759269060306969, + "grad_norm": 849.8159790039062, + "learning_rate": 9.51919191919192e-06, + "loss": 44.4619, + "step": 11780 + }, + { + "epoch": 0.04763309186843732, + "grad_norm": 967.6990356445312, + "learning_rate": 9.527272727272729e-06, + "loss": 52.3391, + "step": 11790 + }, + { + "epoch": 0.04767349313380495, + "grad_norm": 563.64990234375, + "learning_rate": 9.535353535353537e-06, + "loss": 58.6493, + "step": 11800 + }, + { + "epoch": 0.047713894399172584, + "grad_norm": 410.8050537109375, + "learning_rate": 9.543434343434344e-06, + "loss": 70.926, + "step": 11810 + }, + { + "epoch": 0.04775429566454022, + "grad_norm": 932.15185546875, + "learning_rate": 9.551515151515152e-06, + "loss": 54.6105, + "step": 11820 + }, + { + "epoch": 0.04779469692990784, + "grad_norm": 741.9338989257812, + "learning_rate": 9.55959595959596e-06, + "loss": 62.3345, + "step": 11830 + }, + { + "epoch": 0.047835098195275476, + "grad_norm": 920.0084228515625, + "learning_rate": 9.56767676767677e-06, + "loss": 62.9895, + "step": 11840 + }, + { + "epoch": 0.04787549946064311, + "grad_norm": 612.9961547851562, + "learning_rate": 9.575757575757576e-06, + "loss": 48.8344, + "step": 11850 + }, + { + "epoch": 0.04791590072601074, + "grad_norm": 679.0940551757812, + "learning_rate": 9.583838383838384e-06, + "loss": 50.3198, + "step": 11860 + }, + { + "epoch": 0.04795630199137837, + "grad_norm": 680.0853881835938, + "learning_rate": 9.591919191919193e-06, + "loss": 71.3714, + "step": 11870 + }, + { + "epoch": 0.047996703256746, + "grad_norm": 651.5006103515625, + "learning_rate": 9.600000000000001e-06, + "loss": 83.7444, + "step": 11880 + }, + { + "epoch": 0.048037104522113634, + "grad_norm": 803.4596557617188, + "learning_rate": 9.608080808080808e-06, + "loss": 69.2039, + "step": 11890 + }, + { + "epoch": 0.04807750578748127, + "grad_norm": 5523.23193359375, + "learning_rate": 9.616161616161616e-06, + "loss": 72.4104, + "step": 11900 + }, + { + "epoch": 0.04811790705284889, + "grad_norm": 638.7211303710938, + "learning_rate": 9.624242424242425e-06, + "loss": 50.5673, + "step": 11910 + }, + { + "epoch": 0.048158308318216526, + "grad_norm": 909.378173828125, + "learning_rate": 9.632323232323233e-06, + "loss": 39.5691, + "step": 11920 + }, + { + "epoch": 0.04819870958358416, + "grad_norm": 615.8162841796875, + "learning_rate": 9.640404040404042e-06, + "loss": 37.2438, + "step": 11930 + }, + { + "epoch": 0.04823911084895179, + "grad_norm": 655.8267822265625, + "learning_rate": 9.648484848484849e-06, + "loss": 52.3094, + "step": 11940 + }, + { + "epoch": 0.04827951211431942, + "grad_norm": 766.4174194335938, + "learning_rate": 9.656565656565657e-06, + "loss": 68.0634, + "step": 11950 + }, + { + "epoch": 0.04831991337968705, + "grad_norm": 637.963623046875, + "learning_rate": 9.664646464646465e-06, + "loss": 48.027, + "step": 11960 + }, + { + "epoch": 0.048360314645054685, + "grad_norm": 570.1321411132812, + "learning_rate": 9.672727272727274e-06, + "loss": 43.3436, + "step": 11970 + }, + { + "epoch": 0.04840071591042232, + "grad_norm": 970.7443237304688, + "learning_rate": 9.680808080808082e-06, + "loss": 55.089, + "step": 11980 + }, + { + "epoch": 0.048441117175789944, + "grad_norm": 1095.2872314453125, + "learning_rate": 9.688888888888889e-06, + "loss": 95.5435, + "step": 11990 + }, + { + "epoch": 0.04848151844115758, + "grad_norm": 219.54226684570312, + "learning_rate": 9.696969696969698e-06, + "loss": 60.4978, + "step": 12000 + }, + { + "epoch": 0.04852191970652521, + "grad_norm": 605.0892944335938, + "learning_rate": 9.705050505050506e-06, + "loss": 50.5544, + "step": 12010 + }, + { + "epoch": 0.04856232097189284, + "grad_norm": 574.8591918945312, + "learning_rate": 9.713131313131314e-06, + "loss": 48.281, + "step": 12020 + }, + { + "epoch": 0.04860272223726047, + "grad_norm": 529.4436645507812, + "learning_rate": 9.721212121212123e-06, + "loss": 65.4045, + "step": 12030 + }, + { + "epoch": 0.0486431235026281, + "grad_norm": 1770.2958984375, + "learning_rate": 9.729292929292931e-06, + "loss": 59.0963, + "step": 12040 + }, + { + "epoch": 0.048683524767995735, + "grad_norm": 462.4886169433594, + "learning_rate": 9.737373737373738e-06, + "loss": 66.9072, + "step": 12050 + }, + { + "epoch": 0.04872392603336337, + "grad_norm": 541.0017700195312, + "learning_rate": 9.745454545454547e-06, + "loss": 53.1198, + "step": 12060 + }, + { + "epoch": 0.048764327298730994, + "grad_norm": 901.8868408203125, + "learning_rate": 9.753535353535353e-06, + "loss": 47.6384, + "step": 12070 + }, + { + "epoch": 0.04880472856409863, + "grad_norm": 1244.428955078125, + "learning_rate": 9.761616161616162e-06, + "loss": 48.4008, + "step": 12080 + }, + { + "epoch": 0.04884512982946626, + "grad_norm": 1056.5343017578125, + "learning_rate": 9.76969696969697e-06, + "loss": 89.3727, + "step": 12090 + }, + { + "epoch": 0.048885531094833894, + "grad_norm": 539.5419921875, + "learning_rate": 9.777777777777779e-06, + "loss": 60.6288, + "step": 12100 + }, + { + "epoch": 0.04892593236020152, + "grad_norm": 662.9339599609375, + "learning_rate": 9.785858585858587e-06, + "loss": 43.1786, + "step": 12110 + }, + { + "epoch": 0.04896633362556915, + "grad_norm": 789.1748657226562, + "learning_rate": 9.793939393939394e-06, + "loss": 55.6451, + "step": 12120 + }, + { + "epoch": 0.049006734890936786, + "grad_norm": 712.3914184570312, + "learning_rate": 9.802020202020202e-06, + "loss": 55.9028, + "step": 12130 + }, + { + "epoch": 0.04904713615630442, + "grad_norm": 552.1458740234375, + "learning_rate": 9.81010101010101e-06, + "loss": 55.985, + "step": 12140 + }, + { + "epoch": 0.049087537421672045, + "grad_norm": 785.1761474609375, + "learning_rate": 9.81818181818182e-06, + "loss": 67.8702, + "step": 12150 + }, + { + "epoch": 0.04912793868703968, + "grad_norm": 2131.36669921875, + "learning_rate": 9.826262626262628e-06, + "loss": 70.2431, + "step": 12160 + }, + { + "epoch": 0.04916833995240731, + "grad_norm": 712.6942138671875, + "learning_rate": 9.834343434343434e-06, + "loss": 76.463, + "step": 12170 + }, + { + "epoch": 0.04920874121777494, + "grad_norm": 720.9066162109375, + "learning_rate": 9.842424242424243e-06, + "loss": 51.4022, + "step": 12180 + }, + { + "epoch": 0.04924914248314257, + "grad_norm": 563.6546020507812, + "learning_rate": 9.850505050505051e-06, + "loss": 57.7309, + "step": 12190 + }, + { + "epoch": 0.0492895437485102, + "grad_norm": 1423.6729736328125, + "learning_rate": 9.85858585858586e-06, + "loss": 45.7969, + "step": 12200 + }, + { + "epoch": 0.049329945013877836, + "grad_norm": 649.6131591796875, + "learning_rate": 9.866666666666668e-06, + "loss": 58.7549, + "step": 12210 + }, + { + "epoch": 0.04937034627924546, + "grad_norm": 614.8030395507812, + "learning_rate": 9.874747474747477e-06, + "loss": 44.4937, + "step": 12220 + }, + { + "epoch": 0.049410747544613096, + "grad_norm": 1071.0455322265625, + "learning_rate": 9.882828282828283e-06, + "loss": 64.4793, + "step": 12230 + }, + { + "epoch": 0.04945114880998073, + "grad_norm": 502.5149230957031, + "learning_rate": 9.890909090909092e-06, + "loss": 51.8935, + "step": 12240 + }, + { + "epoch": 0.04949155007534836, + "grad_norm": 635.7072143554688, + "learning_rate": 9.8989898989899e-06, + "loss": 77.3302, + "step": 12250 + }, + { + "epoch": 0.04953195134071599, + "grad_norm": 243.91246032714844, + "learning_rate": 9.907070707070709e-06, + "loss": 59.5737, + "step": 12260 + }, + { + "epoch": 0.04957235260608362, + "grad_norm": 769.7122802734375, + "learning_rate": 9.915151515151515e-06, + "loss": 48.6593, + "step": 12270 + }, + { + "epoch": 0.049612753871451254, + "grad_norm": 923.63330078125, + "learning_rate": 9.923232323232324e-06, + "loss": 51.4512, + "step": 12280 + }, + { + "epoch": 0.04965315513681889, + "grad_norm": 374.1686096191406, + "learning_rate": 9.931313131313132e-06, + "loss": 37.1888, + "step": 12290 + }, + { + "epoch": 0.04969355640218651, + "grad_norm": 1896.8494873046875, + "learning_rate": 9.939393939393939e-06, + "loss": 62.5259, + "step": 12300 + }, + { + "epoch": 0.049733957667554146, + "grad_norm": 845.9722290039062, + "learning_rate": 9.947474747474748e-06, + "loss": 45.9669, + "step": 12310 + }, + { + "epoch": 0.04977435893292178, + "grad_norm": 628.6317138671875, + "learning_rate": 9.955555555555556e-06, + "loss": 48.4875, + "step": 12320 + }, + { + "epoch": 0.04981476019828941, + "grad_norm": 661.7048950195312, + "learning_rate": 9.963636363636364e-06, + "loss": 50.1969, + "step": 12330 + }, + { + "epoch": 0.04985516146365704, + "grad_norm": 302.84808349609375, + "learning_rate": 9.971717171717173e-06, + "loss": 78.5323, + "step": 12340 + }, + { + "epoch": 0.04989556272902467, + "grad_norm": 778.6891479492188, + "learning_rate": 9.97979797979798e-06, + "loss": 58.8246, + "step": 12350 + }, + { + "epoch": 0.049935963994392304, + "grad_norm": 918.199951171875, + "learning_rate": 9.987878787878788e-06, + "loss": 49.6424, + "step": 12360 + }, + { + "epoch": 0.04997636525975994, + "grad_norm": 1064.10302734375, + "learning_rate": 9.995959595959597e-06, + "loss": 61.3562, + "step": 12370 + }, + { + "epoch": 0.050016766525127564, + "grad_norm": 1215.5438232421875, + "learning_rate": 9.99999995027162e-06, + "loss": 59.4952, + "step": 12380 + }, + { + "epoch": 0.0500571677904952, + "grad_norm": 1015.760986328125, + "learning_rate": 9.99999955244457e-06, + "loss": 58.8701, + "step": 12390 + }, + { + "epoch": 0.05009756905586283, + "grad_norm": 547.9041748046875, + "learning_rate": 9.999998756790503e-06, + "loss": 63.0136, + "step": 12400 + }, + { + "epoch": 0.05013797032123046, + "grad_norm": 871.2403564453125, + "learning_rate": 9.999997563309483e-06, + "loss": 69.3615, + "step": 12410 + }, + { + "epoch": 0.05017837158659809, + "grad_norm": 801.6481323242188, + "learning_rate": 9.999995972001602e-06, + "loss": 46.739, + "step": 12420 + }, + { + "epoch": 0.05021877285196572, + "grad_norm": 704.8037109375, + "learning_rate": 9.99999398286699e-06, + "loss": 74.7781, + "step": 12430 + }, + { + "epoch": 0.050259174117333355, + "grad_norm": 741.2653198242188, + "learning_rate": 9.999991595905803e-06, + "loss": 68.1409, + "step": 12440 + }, + { + "epoch": 0.05029957538270099, + "grad_norm": 575.7168579101562, + "learning_rate": 9.999988811118232e-06, + "loss": 48.7718, + "step": 12450 + }, + { + "epoch": 0.050339976648068614, + "grad_norm": 867.4459838867188, + "learning_rate": 9.999985628504498e-06, + "loss": 61.8401, + "step": 12460 + }, + { + "epoch": 0.05038037791343625, + "grad_norm": 525.7904663085938, + "learning_rate": 9.999982048064854e-06, + "loss": 36.3457, + "step": 12470 + }, + { + "epoch": 0.05042077917880388, + "grad_norm": 480.1571044921875, + "learning_rate": 9.999978069799585e-06, + "loss": 75.6131, + "step": 12480 + }, + { + "epoch": 0.05046118044417151, + "grad_norm": 545.6644897460938, + "learning_rate": 9.999973693709008e-06, + "loss": 60.0911, + "step": 12490 + }, + { + "epoch": 0.05050158170953914, + "grad_norm": 547.3186645507812, + "learning_rate": 9.99996891979347e-06, + "loss": 51.344, + "step": 12500 + }, + { + "epoch": 0.05054198297490677, + "grad_norm": 934.025146484375, + "learning_rate": 9.999963748053354e-06, + "loss": 54.9807, + "step": 12510 + }, + { + "epoch": 0.050582384240274406, + "grad_norm": 963.864501953125, + "learning_rate": 9.999958178489069e-06, + "loss": 56.3869, + "step": 12520 + }, + { + "epoch": 0.05062278550564204, + "grad_norm": 762.842041015625, + "learning_rate": 9.999952211101056e-06, + "loss": 44.2664, + "step": 12530 + }, + { + "epoch": 0.050663186771009665, + "grad_norm": 1445.8870849609375, + "learning_rate": 9.999945845889795e-06, + "loss": 73.7543, + "step": 12540 + }, + { + "epoch": 0.0507035880363773, + "grad_norm": 1076.540283203125, + "learning_rate": 9.999939082855788e-06, + "loss": 53.5912, + "step": 12550 + }, + { + "epoch": 0.05074398930174493, + "grad_norm": 438.9979248046875, + "learning_rate": 9.999931921999575e-06, + "loss": 58.8434, + "step": 12560 + }, + { + "epoch": 0.050784390567112564, + "grad_norm": 381.0602111816406, + "learning_rate": 9.999924363321726e-06, + "loss": 76.014, + "step": 12570 + }, + { + "epoch": 0.05082479183248019, + "grad_norm": 394.7668762207031, + "learning_rate": 9.999916406822843e-06, + "loss": 73.4046, + "step": 12580 + }, + { + "epoch": 0.05086519309784782, + "grad_norm": 225.88870239257812, + "learning_rate": 9.999908052503557e-06, + "loss": 69.8275, + "step": 12590 + }, + { + "epoch": 0.050905594363215456, + "grad_norm": 512.4158325195312, + "learning_rate": 9.999899300364534e-06, + "loss": 40.6297, + "step": 12600 + }, + { + "epoch": 0.05094599562858309, + "grad_norm": 404.0837707519531, + "learning_rate": 9.99989015040647e-06, + "loss": 48.8008, + "step": 12610 + }, + { + "epoch": 0.050986396893950715, + "grad_norm": 693.5153198242188, + "learning_rate": 9.999880602630092e-06, + "loss": 61.1755, + "step": 12620 + }, + { + "epoch": 0.05102679815931835, + "grad_norm": 750.3567504882812, + "learning_rate": 9.999870657036161e-06, + "loss": 59.9361, + "step": 12630 + }, + { + "epoch": 0.05106719942468598, + "grad_norm": 229.11830139160156, + "learning_rate": 9.99986031362547e-06, + "loss": 49.4575, + "step": 12640 + }, + { + "epoch": 0.051107600690053615, + "grad_norm": 1311.7059326171875, + "learning_rate": 9.99984957239884e-06, + "loss": 71.9959, + "step": 12650 + }, + { + "epoch": 0.05114800195542124, + "grad_norm": 416.9798889160156, + "learning_rate": 9.999838433357124e-06, + "loss": 51.3517, + "step": 12660 + }, + { + "epoch": 0.051188403220788874, + "grad_norm": 955.900634765625, + "learning_rate": 9.99982689650121e-06, + "loss": 43.4182, + "step": 12670 + }, + { + "epoch": 0.05122880448615651, + "grad_norm": 705.177978515625, + "learning_rate": 9.999814961832018e-06, + "loss": 49.622, + "step": 12680 + }, + { + "epoch": 0.05126920575152414, + "grad_norm": 713.4158935546875, + "learning_rate": 9.999802629350492e-06, + "loss": 54.9794, + "step": 12690 + }, + { + "epoch": 0.051309607016891766, + "grad_norm": 408.42449951171875, + "learning_rate": 9.99978989905762e-06, + "loss": 59.9696, + "step": 12700 + }, + { + "epoch": 0.0513500082822594, + "grad_norm": 513.5841064453125, + "learning_rate": 9.999776770954411e-06, + "loss": 51.2767, + "step": 12710 + }, + { + "epoch": 0.05139040954762703, + "grad_norm": 621.69140625, + "learning_rate": 9.99976324504191e-06, + "loss": 43.1716, + "step": 12720 + }, + { + "epoch": 0.051430810812994665, + "grad_norm": 666.1728515625, + "learning_rate": 9.999749321321192e-06, + "loss": 54.0102, + "step": 12730 + }, + { + "epoch": 0.05147121207836229, + "grad_norm": 600.5795288085938, + "learning_rate": 9.999734999793369e-06, + "loss": 47.6661, + "step": 12740 + }, + { + "epoch": 0.051511613343729924, + "grad_norm": 628.9915161132812, + "learning_rate": 9.999720280459576e-06, + "loss": 43.5194, + "step": 12750 + }, + { + "epoch": 0.05155201460909756, + "grad_norm": 417.6671447753906, + "learning_rate": 9.999705163320987e-06, + "loss": 54.9336, + "step": 12760 + }, + { + "epoch": 0.05159241587446519, + "grad_norm": 1952.5086669921875, + "learning_rate": 9.999689648378801e-06, + "loss": 79.7189, + "step": 12770 + }, + { + "epoch": 0.051632817139832816, + "grad_norm": 537.900634765625, + "learning_rate": 9.999673735634259e-06, + "loss": 72.6476, + "step": 12780 + }, + { + "epoch": 0.05167321840520045, + "grad_norm": 757.7594604492188, + "learning_rate": 9.99965742508862e-06, + "loss": 71.4085, + "step": 12790 + }, + { + "epoch": 0.05171361967056808, + "grad_norm": 515.7596435546875, + "learning_rate": 9.999640716743186e-06, + "loss": 89.0427, + "step": 12800 + }, + { + "epoch": 0.051754020935935716, + "grad_norm": 644.1132202148438, + "learning_rate": 9.999623610599287e-06, + "loss": 62.6025, + "step": 12810 + }, + { + "epoch": 0.05179442220130334, + "grad_norm": 652.4735107421875, + "learning_rate": 9.999606106658282e-06, + "loss": 50.8262, + "step": 12820 + }, + { + "epoch": 0.051834823466670975, + "grad_norm": 646.9786987304688, + "learning_rate": 9.999588204921562e-06, + "loss": 70.1996, + "step": 12830 + }, + { + "epoch": 0.05187522473203861, + "grad_norm": 792.312744140625, + "learning_rate": 9.999569905390556e-06, + "loss": 55.6008, + "step": 12840 + }, + { + "epoch": 0.05191562599740624, + "grad_norm": 635.0096435546875, + "learning_rate": 9.999551208066716e-06, + "loss": 47.4141, + "step": 12850 + }, + { + "epoch": 0.05195602726277387, + "grad_norm": 364.86016845703125, + "learning_rate": 9.99953211295153e-06, + "loss": 43.5858, + "step": 12860 + }, + { + "epoch": 0.0519964285281415, + "grad_norm": 928.7925415039062, + "learning_rate": 9.999512620046523e-06, + "loss": 60.4772, + "step": 12870 + }, + { + "epoch": 0.05203682979350913, + "grad_norm": 1354.784912109375, + "learning_rate": 9.999492729353238e-06, + "loss": 58.8818, + "step": 12880 + }, + { + "epoch": 0.052077231058876766, + "grad_norm": 715.9904174804688, + "learning_rate": 9.999472440873261e-06, + "loss": 46.8977, + "step": 12890 + }, + { + "epoch": 0.05211763232424439, + "grad_norm": 543.8614501953125, + "learning_rate": 9.999451754608208e-06, + "loss": 71.3133, + "step": 12900 + }, + { + "epoch": 0.052158033589612025, + "grad_norm": 497.0710754394531, + "learning_rate": 9.999430670559723e-06, + "loss": 59.7093, + "step": 12910 + }, + { + "epoch": 0.05219843485497966, + "grad_norm": 863.6559448242188, + "learning_rate": 9.999409188729484e-06, + "loss": 60.5322, + "step": 12920 + }, + { + "epoch": 0.05223883612034729, + "grad_norm": 2148.5078125, + "learning_rate": 9.999387309119198e-06, + "loss": 56.2072, + "step": 12930 + }, + { + "epoch": 0.05227923738571492, + "grad_norm": 1144.13037109375, + "learning_rate": 9.999365031730609e-06, + "loss": 58.272, + "step": 12940 + }, + { + "epoch": 0.05231963865108255, + "grad_norm": 2413.07763671875, + "learning_rate": 9.99934235656549e-06, + "loss": 70.858, + "step": 12950 + }, + { + "epoch": 0.052360039916450184, + "grad_norm": 906.7877197265625, + "learning_rate": 9.999319283625641e-06, + "loss": 62.3502, + "step": 12960 + }, + { + "epoch": 0.05240044118181782, + "grad_norm": 534.9559326171875, + "learning_rate": 9.999295812912902e-06, + "loss": 51.2167, + "step": 12970 + }, + { + "epoch": 0.05244084244718544, + "grad_norm": 652.7603759765625, + "learning_rate": 9.999271944429139e-06, + "loss": 48.0079, + "step": 12980 + }, + { + "epoch": 0.052481243712553076, + "grad_norm": 381.3694763183594, + "learning_rate": 9.99924767817625e-06, + "loss": 35.3519, + "step": 12990 + }, + { + "epoch": 0.05252164497792071, + "grad_norm": 1747.3424072265625, + "learning_rate": 9.999223014156167e-06, + "loss": 49.7469, + "step": 13000 + }, + { + "epoch": 0.05256204624328834, + "grad_norm": 862.7362670898438, + "learning_rate": 9.999197952370851e-06, + "loss": 55.1828, + "step": 13010 + }, + { + "epoch": 0.05260244750865597, + "grad_norm": 786.5454711914062, + "learning_rate": 9.9991724928223e-06, + "loss": 37.0467, + "step": 13020 + }, + { + "epoch": 0.0526428487740236, + "grad_norm": 314.32000732421875, + "learning_rate": 9.999146635512535e-06, + "loss": 45.9797, + "step": 13030 + }, + { + "epoch": 0.052683250039391234, + "grad_norm": 694.8716430664062, + "learning_rate": 9.999120380443614e-06, + "loss": 46.7638, + "step": 13040 + }, + { + "epoch": 0.05272365130475887, + "grad_norm": 303.11627197265625, + "learning_rate": 9.99909372761763e-06, + "loss": 36.8872, + "step": 13050 + }, + { + "epoch": 0.05276405257012649, + "grad_norm": 517.9519653320312, + "learning_rate": 9.9990666770367e-06, + "loss": 45.6657, + "step": 13060 + }, + { + "epoch": 0.052804453835494126, + "grad_norm": 1371.8948974609375, + "learning_rate": 9.999039228702975e-06, + "loss": 89.3382, + "step": 13070 + }, + { + "epoch": 0.05284485510086176, + "grad_norm": 864.37890625, + "learning_rate": 9.999011382618644e-06, + "loss": 58.1169, + "step": 13080 + }, + { + "epoch": 0.05288525636622939, + "grad_norm": 522.5619506835938, + "learning_rate": 9.998983138785919e-06, + "loss": 42.7408, + "step": 13090 + }, + { + "epoch": 0.05292565763159702, + "grad_norm": 912.5321044921875, + "learning_rate": 9.998954497207045e-06, + "loss": 50.1915, + "step": 13100 + }, + { + "epoch": 0.05296605889696465, + "grad_norm": 584.2015380859375, + "learning_rate": 9.998925457884307e-06, + "loss": 46.6389, + "step": 13110 + }, + { + "epoch": 0.053006460162332285, + "grad_norm": 835.9763793945312, + "learning_rate": 9.99889602082001e-06, + "loss": 38.9002, + "step": 13120 + }, + { + "epoch": 0.05304686142769992, + "grad_norm": 533.8273315429688, + "learning_rate": 9.998866186016501e-06, + "loss": 56.9394, + "step": 13130 + }, + { + "epoch": 0.053087262693067544, + "grad_norm": 463.4489440917969, + "learning_rate": 9.99883595347615e-06, + "loss": 56.6824, + "step": 13140 + }, + { + "epoch": 0.05312766395843518, + "grad_norm": 407.6231994628906, + "learning_rate": 9.998805323201364e-06, + "loss": 47.9757, + "step": 13150 + }, + { + "epoch": 0.05316806522380281, + "grad_norm": 490.29644775390625, + "learning_rate": 9.998774295194579e-06, + "loss": 58.1626, + "step": 13160 + }, + { + "epoch": 0.05320846648917044, + "grad_norm": 1248.9224853515625, + "learning_rate": 9.998742869458264e-06, + "loss": 55.3795, + "step": 13170 + }, + { + "epoch": 0.05324886775453807, + "grad_norm": 1146.1629638671875, + "learning_rate": 9.998711045994922e-06, + "loss": 79.3144, + "step": 13180 + }, + { + "epoch": 0.0532892690199057, + "grad_norm": 511.3272705078125, + "learning_rate": 9.998678824807082e-06, + "loss": 40.7286, + "step": 13190 + }, + { + "epoch": 0.053329670285273335, + "grad_norm": 711.6715087890625, + "learning_rate": 9.99864620589731e-06, + "loss": 44.7338, + "step": 13200 + }, + { + "epoch": 0.05337007155064097, + "grad_norm": 602.4719848632812, + "learning_rate": 9.998613189268197e-06, + "loss": 59.2179, + "step": 13210 + }, + { + "epoch": 0.053410472816008595, + "grad_norm": 600.1394653320312, + "learning_rate": 9.998579774922377e-06, + "loss": 39.6892, + "step": 13220 + }, + { + "epoch": 0.05345087408137623, + "grad_norm": 232.3237762451172, + "learning_rate": 9.998545962862503e-06, + "loss": 49.2781, + "step": 13230 + }, + { + "epoch": 0.05349127534674386, + "grad_norm": 589.8071899414062, + "learning_rate": 9.998511753091267e-06, + "loss": 52.6975, + "step": 13240 + }, + { + "epoch": 0.053531676612111494, + "grad_norm": 620.8717651367188, + "learning_rate": 9.998477145611389e-06, + "loss": 93.4042, + "step": 13250 + }, + { + "epoch": 0.05357207787747912, + "grad_norm": 435.4456787109375, + "learning_rate": 9.998442140425625e-06, + "loss": 46.4562, + "step": 13260 + }, + { + "epoch": 0.05361247914284675, + "grad_norm": 427.4245910644531, + "learning_rate": 9.998406737536761e-06, + "loss": 48.8901, + "step": 13270 + }, + { + "epoch": 0.053652880408214386, + "grad_norm": 708.8045654296875, + "learning_rate": 9.998370936947614e-06, + "loss": 57.0132, + "step": 13280 + }, + { + "epoch": 0.05369328167358202, + "grad_norm": 746.7619018554688, + "learning_rate": 9.998334738661028e-06, + "loss": 51.5482, + "step": 13290 + }, + { + "epoch": 0.053733682938949645, + "grad_norm": 613.72265625, + "learning_rate": 9.998298142679888e-06, + "loss": 62.2609, + "step": 13300 + }, + { + "epoch": 0.05377408420431728, + "grad_norm": 507.6068420410156, + "learning_rate": 9.998261149007104e-06, + "loss": 36.7143, + "step": 13310 + }, + { + "epoch": 0.05381448546968491, + "grad_norm": 515.9215698242188, + "learning_rate": 9.998223757645618e-06, + "loss": 57.8858, + "step": 13320 + }, + { + "epoch": 0.053854886735052544, + "grad_norm": 480.4765625, + "learning_rate": 9.998185968598407e-06, + "loss": 73.2252, + "step": 13330 + }, + { + "epoch": 0.05389528800042017, + "grad_norm": 892.7890014648438, + "learning_rate": 9.998147781868477e-06, + "loss": 58.0664, + "step": 13340 + }, + { + "epoch": 0.0539356892657878, + "grad_norm": 459.78326416015625, + "learning_rate": 9.998109197458865e-06, + "loss": 42.0378, + "step": 13350 + }, + { + "epoch": 0.053976090531155436, + "grad_norm": 864.340576171875, + "learning_rate": 9.998070215372645e-06, + "loss": 63.3425, + "step": 13360 + }, + { + "epoch": 0.05401649179652307, + "grad_norm": 818.0133666992188, + "learning_rate": 9.998030835612914e-06, + "loss": 54.6549, + "step": 13370 + }, + { + "epoch": 0.054056893061890696, + "grad_norm": 750.6216430664062, + "learning_rate": 9.997991058182807e-06, + "loss": 42.7049, + "step": 13380 + }, + { + "epoch": 0.05409729432725833, + "grad_norm": 749.8779907226562, + "learning_rate": 9.997950883085492e-06, + "loss": 74.4298, + "step": 13390 + }, + { + "epoch": 0.05413769559262596, + "grad_norm": 454.1107482910156, + "learning_rate": 9.99791031032416e-06, + "loss": 45.0433, + "step": 13400 + }, + { + "epoch": 0.054178096857993595, + "grad_norm": 867.2178955078125, + "learning_rate": 9.997869339902043e-06, + "loss": 47.866, + "step": 13410 + }, + { + "epoch": 0.05421849812336122, + "grad_norm": 679.61962890625, + "learning_rate": 9.9978279718224e-06, + "loss": 67.6122, + "step": 13420 + }, + { + "epoch": 0.054258899388728854, + "grad_norm": 528.4546508789062, + "learning_rate": 9.99778620608852e-06, + "loss": 40.4731, + "step": 13430 + }, + { + "epoch": 0.05429930065409649, + "grad_norm": 557.4871826171875, + "learning_rate": 9.997744042703731e-06, + "loss": 62.1209, + "step": 13440 + }, + { + "epoch": 0.05433970191946412, + "grad_norm": 1284.357177734375, + "learning_rate": 9.997701481671384e-06, + "loss": 65.1331, + "step": 13450 + }, + { + "epoch": 0.054380103184831746, + "grad_norm": 566.5718383789062, + "learning_rate": 9.997658522994867e-06, + "loss": 56.0474, + "step": 13460 + }, + { + "epoch": 0.05442050445019938, + "grad_norm": 1788.369873046875, + "learning_rate": 9.997615166677597e-06, + "loss": 67.9537, + "step": 13470 + }, + { + "epoch": 0.05446090571556701, + "grad_norm": 524.1817016601562, + "learning_rate": 9.997571412723024e-06, + "loss": 58.4599, + "step": 13480 + }, + { + "epoch": 0.054501306980934645, + "grad_norm": 730.0363159179688, + "learning_rate": 9.99752726113463e-06, + "loss": 56.8917, + "step": 13490 + }, + { + "epoch": 0.05454170824630227, + "grad_norm": 287.1725158691406, + "learning_rate": 9.997482711915926e-06, + "loss": 54.6312, + "step": 13500 + }, + { + "epoch": 0.054582109511669905, + "grad_norm": 1575.38916015625, + "learning_rate": 9.99743776507046e-06, + "loss": 54.69, + "step": 13510 + }, + { + "epoch": 0.05462251077703754, + "grad_norm": 533.496337890625, + "learning_rate": 9.997392420601804e-06, + "loss": 42.1878, + "step": 13520 + }, + { + "epoch": 0.05466291204240517, + "grad_norm": 590.1005249023438, + "learning_rate": 9.99734667851357e-06, + "loss": 41.1869, + "step": 13530 + }, + { + "epoch": 0.0547033133077728, + "grad_norm": 997.8026733398438, + "learning_rate": 9.997300538809394e-06, + "loss": 65.3091, + "step": 13540 + }, + { + "epoch": 0.05474371457314043, + "grad_norm": 539.83935546875, + "learning_rate": 9.99725400149295e-06, + "loss": 60.0879, + "step": 13550 + }, + { + "epoch": 0.05478411583850806, + "grad_norm": 683.99267578125, + "learning_rate": 9.997207066567939e-06, + "loss": 54.2406, + "step": 13560 + }, + { + "epoch": 0.054824517103875696, + "grad_norm": 767.1815185546875, + "learning_rate": 9.997159734038096e-06, + "loss": 58.345, + "step": 13570 + }, + { + "epoch": 0.05486491836924332, + "grad_norm": 0.0, + "learning_rate": 9.997112003907186e-06, + "loss": 34.0798, + "step": 13580 + }, + { + "epoch": 0.054905319634610955, + "grad_norm": 378.8706970214844, + "learning_rate": 9.997063876179007e-06, + "loss": 49.5251, + "step": 13590 + }, + { + "epoch": 0.05494572089997859, + "grad_norm": 901.79736328125, + "learning_rate": 9.997015350857391e-06, + "loss": 41.9663, + "step": 13600 + }, + { + "epoch": 0.05498612216534622, + "grad_norm": 800.0772705078125, + "learning_rate": 9.996966427946195e-06, + "loss": 66.8479, + "step": 13610 + }, + { + "epoch": 0.05502652343071385, + "grad_norm": 1176.25537109375, + "learning_rate": 9.996917107449313e-06, + "loss": 39.7329, + "step": 13620 + }, + { + "epoch": 0.05506692469608148, + "grad_norm": 1161.61767578125, + "learning_rate": 9.99686738937067e-06, + "loss": 68.4346, + "step": 13630 + }, + { + "epoch": 0.05510732596144911, + "grad_norm": 690.8731689453125, + "learning_rate": 9.996817273714222e-06, + "loss": 53.2709, + "step": 13640 + }, + { + "epoch": 0.055147727226816746, + "grad_norm": 691.3014526367188, + "learning_rate": 9.996766760483955e-06, + "loss": 75.7811, + "step": 13650 + }, + { + "epoch": 0.05518812849218437, + "grad_norm": 556.6007690429688, + "learning_rate": 9.996715849683889e-06, + "loss": 75.581, + "step": 13660 + }, + { + "epoch": 0.055228529757552006, + "grad_norm": 480.1729431152344, + "learning_rate": 9.996664541318076e-06, + "loss": 51.7071, + "step": 13670 + }, + { + "epoch": 0.05526893102291964, + "grad_norm": 533.1707153320312, + "learning_rate": 9.996612835390596e-06, + "loss": 35.2482, + "step": 13680 + }, + { + "epoch": 0.05530933228828727, + "grad_norm": 604.6953125, + "learning_rate": 9.996560731905565e-06, + "loss": 59.7485, + "step": 13690 + }, + { + "epoch": 0.0553497335536549, + "grad_norm": 634.8004760742188, + "learning_rate": 9.996508230867126e-06, + "loss": 59.5475, + "step": 13700 + }, + { + "epoch": 0.05539013481902253, + "grad_norm": 742.4111938476562, + "learning_rate": 9.996455332279458e-06, + "loss": 49.5485, + "step": 13710 + }, + { + "epoch": 0.055430536084390164, + "grad_norm": 513.7229614257812, + "learning_rate": 9.99640203614677e-06, + "loss": 53.4414, + "step": 13720 + }, + { + "epoch": 0.0554709373497578, + "grad_norm": 717.775390625, + "learning_rate": 9.996348342473304e-06, + "loss": 45.8425, + "step": 13730 + }, + { + "epoch": 0.05551133861512542, + "grad_norm": 696.4607543945312, + "learning_rate": 9.99629425126333e-06, + "loss": 78.0228, + "step": 13740 + }, + { + "epoch": 0.055551739880493056, + "grad_norm": 477.80926513671875, + "learning_rate": 9.996239762521152e-06, + "loss": 61.2231, + "step": 13750 + }, + { + "epoch": 0.05559214114586069, + "grad_norm": 970.312744140625, + "learning_rate": 9.996184876251105e-06, + "loss": 44.8878, + "step": 13760 + }, + { + "epoch": 0.05563254241122832, + "grad_norm": 692.9142456054688, + "learning_rate": 9.996129592457558e-06, + "loss": 42.4326, + "step": 13770 + }, + { + "epoch": 0.05567294367659595, + "grad_norm": 437.6714782714844, + "learning_rate": 9.996073911144907e-06, + "loss": 45.8802, + "step": 13780 + }, + { + "epoch": 0.05571334494196358, + "grad_norm": 466.60186767578125, + "learning_rate": 9.996017832317583e-06, + "loss": 49.2506, + "step": 13790 + }, + { + "epoch": 0.055753746207331215, + "grad_norm": 712.9346313476562, + "learning_rate": 9.995961355980052e-06, + "loss": 77.4249, + "step": 13800 + }, + { + "epoch": 0.05579414747269885, + "grad_norm": 711.3295288085938, + "learning_rate": 9.995904482136803e-06, + "loss": 38.404, + "step": 13810 + }, + { + "epoch": 0.055834548738066474, + "grad_norm": 1094.4012451171875, + "learning_rate": 9.99584721079236e-06, + "loss": 60.306, + "step": 13820 + }, + { + "epoch": 0.05587495000343411, + "grad_norm": 259.01220703125, + "learning_rate": 9.995789541951287e-06, + "loss": 57.5724, + "step": 13830 + }, + { + "epoch": 0.05591535126880174, + "grad_norm": 930.34521484375, + "learning_rate": 9.995731475618163e-06, + "loss": 44.2708, + "step": 13840 + }, + { + "epoch": 0.05595575253416937, + "grad_norm": 635.6669311523438, + "learning_rate": 9.995673011797615e-06, + "loss": 55.8103, + "step": 13850 + }, + { + "epoch": 0.055996153799537, + "grad_norm": 1038.2628173828125, + "learning_rate": 9.995614150494293e-06, + "loss": 56.7732, + "step": 13860 + }, + { + "epoch": 0.05603655506490463, + "grad_norm": 646.6795654296875, + "learning_rate": 9.995554891712879e-06, + "loss": 67.5958, + "step": 13870 + }, + { + "epoch": 0.056076956330272265, + "grad_norm": 635.2930908203125, + "learning_rate": 9.995495235458087e-06, + "loss": 56.4254, + "step": 13880 + }, + { + "epoch": 0.0561173575956399, + "grad_norm": 477.6964111328125, + "learning_rate": 9.99543518173467e-06, + "loss": 33.5029, + "step": 13890 + }, + { + "epoch": 0.056157758861007524, + "grad_norm": 750.3272094726562, + "learning_rate": 9.995374730547397e-06, + "loss": 43.8864, + "step": 13900 + }, + { + "epoch": 0.05619816012637516, + "grad_norm": 421.46722412109375, + "learning_rate": 9.995313881901085e-06, + "loss": 44.306, + "step": 13910 + }, + { + "epoch": 0.05623856139174279, + "grad_norm": 1255.6578369140625, + "learning_rate": 9.995252635800572e-06, + "loss": 69.9456, + "step": 13920 + }, + { + "epoch": 0.05627896265711042, + "grad_norm": 935.6283569335938, + "learning_rate": 9.995190992250732e-06, + "loss": 50.333, + "step": 13930 + }, + { + "epoch": 0.05631936392247805, + "grad_norm": 394.6123046875, + "learning_rate": 9.995128951256469e-06, + "loss": 49.9401, + "step": 13940 + }, + { + "epoch": 0.05635976518784568, + "grad_norm": 439.6722412109375, + "learning_rate": 9.99506651282272e-06, + "loss": 70.1002, + "step": 13950 + }, + { + "epoch": 0.056400166453213316, + "grad_norm": 432.78179931640625, + "learning_rate": 9.995003676954454e-06, + "loss": 50.2052, + "step": 13960 + }, + { + "epoch": 0.05644056771858095, + "grad_norm": 581.228271484375, + "learning_rate": 9.994940443656668e-06, + "loss": 64.6687, + "step": 13970 + }, + { + "epoch": 0.056480968983948575, + "grad_norm": 927.3905639648438, + "learning_rate": 9.994876812934395e-06, + "loss": 47.3174, + "step": 13980 + }, + { + "epoch": 0.05652137024931621, + "grad_norm": 336.7020568847656, + "learning_rate": 9.994812784792698e-06, + "loss": 65.6403, + "step": 13990 + }, + { + "epoch": 0.05656177151468384, + "grad_norm": 407.6746520996094, + "learning_rate": 9.99474835923667e-06, + "loss": 47.5244, + "step": 14000 + }, + { + "epoch": 0.056602172780051474, + "grad_norm": 1338.405517578125, + "learning_rate": 9.994683536271437e-06, + "loss": 67.1354, + "step": 14010 + }, + { + "epoch": 0.0566425740454191, + "grad_norm": 967.8305053710938, + "learning_rate": 9.994618315902161e-06, + "loss": 50.1595, + "step": 14020 + }, + { + "epoch": 0.05668297531078673, + "grad_norm": 447.31695556640625, + "learning_rate": 9.994552698134023e-06, + "loss": 44.7543, + "step": 14030 + }, + { + "epoch": 0.056723376576154366, + "grad_norm": 630.2139892578125, + "learning_rate": 9.994486682972253e-06, + "loss": 60.6165, + "step": 14040 + }, + { + "epoch": 0.056763777841522, + "grad_norm": 676.4772338867188, + "learning_rate": 9.994420270422096e-06, + "loss": 63.8667, + "step": 14050 + }, + { + "epoch": 0.056804179106889625, + "grad_norm": 724.5189819335938, + "learning_rate": 9.994353460488842e-06, + "loss": 61.9814, + "step": 14060 + }, + { + "epoch": 0.05684458037225726, + "grad_norm": 885.9684448242188, + "learning_rate": 9.994286253177803e-06, + "loss": 54.9514, + "step": 14070 + }, + { + "epoch": 0.05688498163762489, + "grad_norm": 431.4734191894531, + "learning_rate": 9.994218648494327e-06, + "loss": 42.4939, + "step": 14080 + }, + { + "epoch": 0.056925382902992525, + "grad_norm": 875.316650390625, + "learning_rate": 9.994150646443793e-06, + "loss": 38.2235, + "step": 14090 + }, + { + "epoch": 0.05696578416836015, + "grad_norm": 1274.8563232421875, + "learning_rate": 9.994082247031613e-06, + "loss": 68.8717, + "step": 14100 + }, + { + "epoch": 0.057006185433727784, + "grad_norm": 1349.3104248046875, + "learning_rate": 9.99401345026323e-06, + "loss": 39.0644, + "step": 14110 + }, + { + "epoch": 0.05704658669909542, + "grad_norm": 648.0847778320312, + "learning_rate": 9.993944256144115e-06, + "loss": 49.0192, + "step": 14120 + }, + { + "epoch": 0.05708698796446305, + "grad_norm": 655.9918823242188, + "learning_rate": 9.993874664679774e-06, + "loss": 53.7475, + "step": 14130 + }, + { + "epoch": 0.057127389229830676, + "grad_norm": 857.4654541015625, + "learning_rate": 9.993804675875744e-06, + "loss": 76.6148, + "step": 14140 + }, + { + "epoch": 0.05716779049519831, + "grad_norm": 238.42205810546875, + "learning_rate": 9.993734289737596e-06, + "loss": 47.7305, + "step": 14150 + }, + { + "epoch": 0.05720819176056594, + "grad_norm": 334.1250915527344, + "learning_rate": 9.993663506270928e-06, + "loss": 51.6321, + "step": 14160 + }, + { + "epoch": 0.057248593025933575, + "grad_norm": 1000.7257080078125, + "learning_rate": 9.993592325481373e-06, + "loss": 49.4137, + "step": 14170 + }, + { + "epoch": 0.0572889942913012, + "grad_norm": 605.1682739257812, + "learning_rate": 9.993520747374594e-06, + "loss": 50.634, + "step": 14180 + }, + { + "epoch": 0.057329395556668834, + "grad_norm": 473.75982666015625, + "learning_rate": 9.993448771956285e-06, + "loss": 49.1249, + "step": 14190 + }, + { + "epoch": 0.05736979682203647, + "grad_norm": 443.055419921875, + "learning_rate": 9.993376399232175e-06, + "loss": 34.1855, + "step": 14200 + }, + { + "epoch": 0.0574101980874041, + "grad_norm": 857.4747924804688, + "learning_rate": 9.993303629208023e-06, + "loss": 50.4225, + "step": 14210 + }, + { + "epoch": 0.057450599352771727, + "grad_norm": 372.2861328125, + "learning_rate": 9.993230461889616e-06, + "loss": 41.4766, + "step": 14220 + }, + { + "epoch": 0.05749100061813936, + "grad_norm": 622.736328125, + "learning_rate": 9.993156897282776e-06, + "loss": 58.5451, + "step": 14230 + }, + { + "epoch": 0.05753140188350699, + "grad_norm": 574.2984619140625, + "learning_rate": 9.99308293539336e-06, + "loss": 53.2113, + "step": 14240 + }, + { + "epoch": 0.057571803148874626, + "grad_norm": 706.0330810546875, + "learning_rate": 9.993008576227248e-06, + "loss": 54.6104, + "step": 14250 + }, + { + "epoch": 0.05761220441424225, + "grad_norm": 802.1617431640625, + "learning_rate": 9.992933819790358e-06, + "loss": 71.0195, + "step": 14260 + }, + { + "epoch": 0.057652605679609885, + "grad_norm": 197.7603302001953, + "learning_rate": 9.992858666088638e-06, + "loss": 45.4237, + "step": 14270 + }, + { + "epoch": 0.05769300694497752, + "grad_norm": 407.2517395019531, + "learning_rate": 9.992783115128072e-06, + "loss": 58.2364, + "step": 14280 + }, + { + "epoch": 0.05773340821034515, + "grad_norm": 300.0452575683594, + "learning_rate": 9.992707166914662e-06, + "loss": 65.9405, + "step": 14290 + }, + { + "epoch": 0.05777380947571278, + "grad_norm": 937.140625, + "learning_rate": 9.992630821454458e-06, + "loss": 48.6805, + "step": 14300 + }, + { + "epoch": 0.05781421074108041, + "grad_norm": 1485.9613037109375, + "learning_rate": 9.992554078753534e-06, + "loss": 58.1787, + "step": 14310 + }, + { + "epoch": 0.05785461200644804, + "grad_norm": 659.9443359375, + "learning_rate": 9.992476938817994e-06, + "loss": 43.7871, + "step": 14320 + }, + { + "epoch": 0.057895013271815676, + "grad_norm": 609.6917114257812, + "learning_rate": 9.992399401653976e-06, + "loss": 37.274, + "step": 14330 + }, + { + "epoch": 0.0579354145371833, + "grad_norm": 126.08211517333984, + "learning_rate": 9.99232146726765e-06, + "loss": 57.0369, + "step": 14340 + }, + { + "epoch": 0.057975815802550935, + "grad_norm": 256.6801452636719, + "learning_rate": 9.992243135665217e-06, + "loss": 59.119, + "step": 14350 + }, + { + "epoch": 0.05801621706791857, + "grad_norm": 1137.1373291015625, + "learning_rate": 9.992164406852908e-06, + "loss": 80.0415, + "step": 14360 + }, + { + "epoch": 0.0580566183332862, + "grad_norm": 1009.8486938476562, + "learning_rate": 9.992085280836988e-06, + "loss": 50.1561, + "step": 14370 + }, + { + "epoch": 0.05809701959865383, + "grad_norm": 829.55712890625, + "learning_rate": 9.992005757623753e-06, + "loss": 54.28, + "step": 14380 + }, + { + "epoch": 0.05813742086402146, + "grad_norm": 812.1973876953125, + "learning_rate": 9.991925837219532e-06, + "loss": 58.4078, + "step": 14390 + }, + { + "epoch": 0.058177822129389094, + "grad_norm": 364.135498046875, + "learning_rate": 9.991845519630679e-06, + "loss": 40.6019, + "step": 14400 + }, + { + "epoch": 0.05821822339475673, + "grad_norm": 855.4879150390625, + "learning_rate": 9.991764804863588e-06, + "loss": 76.5647, + "step": 14410 + }, + { + "epoch": 0.05825862466012435, + "grad_norm": 405.6043395996094, + "learning_rate": 9.991683692924682e-06, + "loss": 47.4223, + "step": 14420 + }, + { + "epoch": 0.058299025925491986, + "grad_norm": 449.4226989746094, + "learning_rate": 9.991602183820412e-06, + "loss": 40.5298, + "step": 14430 + }, + { + "epoch": 0.05833942719085962, + "grad_norm": 823.6030883789062, + "learning_rate": 9.991520277557266e-06, + "loss": 42.4834, + "step": 14440 + }, + { + "epoch": 0.05837982845622725, + "grad_norm": 757.8836669921875, + "learning_rate": 9.991437974141759e-06, + "loss": 47.1578, + "step": 14450 + }, + { + "epoch": 0.05842022972159488, + "grad_norm": 873.563720703125, + "learning_rate": 9.99135527358044e-06, + "loss": 52.538, + "step": 14460 + }, + { + "epoch": 0.05846063098696251, + "grad_norm": 670.3134765625, + "learning_rate": 9.991272175879888e-06, + "loss": 39.9001, + "step": 14470 + }, + { + "epoch": 0.058501032252330144, + "grad_norm": 329.8004455566406, + "learning_rate": 9.991188681046718e-06, + "loss": 82.909, + "step": 14480 + }, + { + "epoch": 0.05854143351769778, + "grad_norm": 725.716064453125, + "learning_rate": 9.991104789087568e-06, + "loss": 66.8185, + "step": 14490 + }, + { + "epoch": 0.058581834783065403, + "grad_norm": 359.1539611816406, + "learning_rate": 9.991020500009118e-06, + "loss": 54.1102, + "step": 14500 + }, + { + "epoch": 0.05862223604843304, + "grad_norm": 618.91650390625, + "learning_rate": 9.990935813818073e-06, + "loss": 39.747, + "step": 14510 + }, + { + "epoch": 0.05866263731380067, + "grad_norm": 668.1556396484375, + "learning_rate": 9.99085073052117e-06, + "loss": 48.4924, + "step": 14520 + }, + { + "epoch": 0.0587030385791683, + "grad_norm": 480.24945068359375, + "learning_rate": 9.990765250125179e-06, + "loss": 50.9185, + "step": 14530 + }, + { + "epoch": 0.05874343984453593, + "grad_norm": 811.7920532226562, + "learning_rate": 9.990679372636902e-06, + "loss": 53.3772, + "step": 14540 + }, + { + "epoch": 0.05878384110990356, + "grad_norm": 577.9950561523438, + "learning_rate": 9.99059309806317e-06, + "loss": 51.9823, + "step": 14550 + }, + { + "epoch": 0.058824242375271195, + "grad_norm": 699.646728515625, + "learning_rate": 9.990506426410851e-06, + "loss": 56.4946, + "step": 14560 + }, + { + "epoch": 0.05886464364063883, + "grad_norm": 371.0194091796875, + "learning_rate": 9.990419357686839e-06, + "loss": 41.3928, + "step": 14570 + }, + { + "epoch": 0.058905044906006454, + "grad_norm": 644.16796875, + "learning_rate": 9.99033189189806e-06, + "loss": 44.1936, + "step": 14580 + }, + { + "epoch": 0.05894544617137409, + "grad_norm": 817.6756591796875, + "learning_rate": 9.990244029051475e-06, + "loss": 52.6435, + "step": 14590 + }, + { + "epoch": 0.05898584743674172, + "grad_norm": 966.6154174804688, + "learning_rate": 9.990155769154077e-06, + "loss": 65.1172, + "step": 14600 + }, + { + "epoch": 0.05902624870210935, + "grad_norm": 668.8129272460938, + "learning_rate": 9.990067112212884e-06, + "loss": 47.6556, + "step": 14610 + }, + { + "epoch": 0.05906664996747698, + "grad_norm": 334.03900146484375, + "learning_rate": 9.989978058234952e-06, + "loss": 63.2436, + "step": 14620 + }, + { + "epoch": 0.05910705123284461, + "grad_norm": 658.3740844726562, + "learning_rate": 9.989888607227369e-06, + "loss": 40.0384, + "step": 14630 + }, + { + "epoch": 0.059147452498212245, + "grad_norm": 637.34716796875, + "learning_rate": 9.989798759197247e-06, + "loss": 51.4579, + "step": 14640 + }, + { + "epoch": 0.05918785376357988, + "grad_norm": 511.2720642089844, + "learning_rate": 9.989708514151739e-06, + "loss": 52.719, + "step": 14650 + }, + { + "epoch": 0.059228255028947505, + "grad_norm": 412.48040771484375, + "learning_rate": 9.989617872098026e-06, + "loss": 38.988, + "step": 14660 + }, + { + "epoch": 0.05926865629431514, + "grad_norm": 1462.632080078125, + "learning_rate": 9.989526833043316e-06, + "loss": 56.2337, + "step": 14670 + }, + { + "epoch": 0.05930905755968277, + "grad_norm": 746.0077514648438, + "learning_rate": 9.989435396994856e-06, + "loss": 50.376, + "step": 14680 + }, + { + "epoch": 0.059349458825050404, + "grad_norm": 894.7488403320312, + "learning_rate": 9.989343563959919e-06, + "loss": 51.9415, + "step": 14690 + }, + { + "epoch": 0.05938986009041803, + "grad_norm": 378.21697998046875, + "learning_rate": 9.989251333945813e-06, + "loss": 63.1732, + "step": 14700 + }, + { + "epoch": 0.05943026135578566, + "grad_norm": 433.5731201171875, + "learning_rate": 9.989158706959875e-06, + "loss": 57.7156, + "step": 14710 + }, + { + "epoch": 0.059470662621153296, + "grad_norm": 931.3536987304688, + "learning_rate": 9.989065683009477e-06, + "loss": 70.65, + "step": 14720 + }, + { + "epoch": 0.05951106388652093, + "grad_norm": 1730.477783203125, + "learning_rate": 9.988972262102018e-06, + "loss": 58.9854, + "step": 14730 + }, + { + "epoch": 0.059551465151888555, + "grad_norm": 1209.090576171875, + "learning_rate": 9.988878444244937e-06, + "loss": 47.5035, + "step": 14740 + }, + { + "epoch": 0.05959186641725619, + "grad_norm": 221.4032440185547, + "learning_rate": 9.988784229445689e-06, + "loss": 45.2581, + "step": 14750 + }, + { + "epoch": 0.05963226768262382, + "grad_norm": 404.3892822265625, + "learning_rate": 9.988689617711777e-06, + "loss": 55.4668, + "step": 14760 + }, + { + "epoch": 0.059672668947991454, + "grad_norm": 582.4869384765625, + "learning_rate": 9.988594609050726e-06, + "loss": 56.3967, + "step": 14770 + }, + { + "epoch": 0.05971307021335908, + "grad_norm": 248.2957305908203, + "learning_rate": 9.988499203470097e-06, + "loss": 44.0815, + "step": 14780 + }, + { + "epoch": 0.059753471478726713, + "grad_norm": 945.0496826171875, + "learning_rate": 9.988403400977482e-06, + "loss": 54.5601, + "step": 14790 + }, + { + "epoch": 0.05979387274409435, + "grad_norm": 446.46405029296875, + "learning_rate": 9.9883072015805e-06, + "loss": 49.9845, + "step": 14800 + }, + { + "epoch": 0.05983427400946198, + "grad_norm": 347.37139892578125, + "learning_rate": 9.98821060528681e-06, + "loss": 65.609, + "step": 14810 + }, + { + "epoch": 0.059874675274829606, + "grad_norm": 461.4508972167969, + "learning_rate": 9.988113612104093e-06, + "loss": 52.9646, + "step": 14820 + }, + { + "epoch": 0.05991507654019724, + "grad_norm": 736.7156982421875, + "learning_rate": 9.988016222040067e-06, + "loss": 42.4082, + "step": 14830 + }, + { + "epoch": 0.05995547780556487, + "grad_norm": 729.3582763671875, + "learning_rate": 9.987918435102484e-06, + "loss": 44.0376, + "step": 14840 + }, + { + "epoch": 0.059995879070932505, + "grad_norm": 404.2114562988281, + "learning_rate": 9.987820251299121e-06, + "loss": 77.3696, + "step": 14850 + }, + { + "epoch": 0.06003628033630013, + "grad_norm": 954.4871215820312, + "learning_rate": 9.987721670637794e-06, + "loss": 48.0571, + "step": 14860 + }, + { + "epoch": 0.060076681601667764, + "grad_norm": 511.55133056640625, + "learning_rate": 9.987622693126342e-06, + "loss": 55.6837, + "step": 14870 + }, + { + "epoch": 0.0601170828670354, + "grad_norm": 572.0296020507812, + "learning_rate": 9.987523318772644e-06, + "loss": 51.224, + "step": 14880 + }, + { + "epoch": 0.06015748413240303, + "grad_norm": 771.312255859375, + "learning_rate": 9.987423547584605e-06, + "loss": 35.857, + "step": 14890 + }, + { + "epoch": 0.060197885397770656, + "grad_norm": 634.609375, + "learning_rate": 9.987323379570161e-06, + "loss": 63.9532, + "step": 14900 + }, + { + "epoch": 0.06023828666313829, + "grad_norm": 719.4342651367188, + "learning_rate": 9.987222814737287e-06, + "loss": 51.5199, + "step": 14910 + }, + { + "epoch": 0.06027868792850592, + "grad_norm": 544.3135375976562, + "learning_rate": 9.987121853093982e-06, + "loss": 58.8402, + "step": 14920 + }, + { + "epoch": 0.060319089193873555, + "grad_norm": 1171.758056640625, + "learning_rate": 9.987020494648279e-06, + "loss": 54.2668, + "step": 14930 + }, + { + "epoch": 0.06035949045924118, + "grad_norm": 688.6409912109375, + "learning_rate": 9.986918739408241e-06, + "loss": 52.9917, + "step": 14940 + }, + { + "epoch": 0.060399891724608815, + "grad_norm": 450.6348876953125, + "learning_rate": 9.986816587381966e-06, + "loss": 53.4728, + "step": 14950 + }, + { + "epoch": 0.06044029298997645, + "grad_norm": 403.31341552734375, + "learning_rate": 9.986714038577582e-06, + "loss": 36.6673, + "step": 14960 + }, + { + "epoch": 0.06048069425534408, + "grad_norm": 844.4390258789062, + "learning_rate": 9.986611093003249e-06, + "loss": 52.0443, + "step": 14970 + }, + { + "epoch": 0.06052109552071171, + "grad_norm": 254.19454956054688, + "learning_rate": 9.986507750667157e-06, + "loss": 56.2579, + "step": 14980 + }, + { + "epoch": 0.06056149678607934, + "grad_norm": 566.4449462890625, + "learning_rate": 9.986404011577525e-06, + "loss": 39.8158, + "step": 14990 + }, + { + "epoch": 0.06060189805144697, + "grad_norm": 648.5628662109375, + "learning_rate": 9.986299875742612e-06, + "loss": 53.3486, + "step": 15000 + }, + { + "epoch": 0.060642299316814606, + "grad_norm": 856.6651000976562, + "learning_rate": 9.986195343170703e-06, + "loss": 53.0422, + "step": 15010 + }, + { + "epoch": 0.06068270058218223, + "grad_norm": 770.17431640625, + "learning_rate": 9.986090413870114e-06, + "loss": 47.2132, + "step": 15020 + }, + { + "epoch": 0.060723101847549865, + "grad_norm": 400.9591979980469, + "learning_rate": 9.985985087849193e-06, + "loss": 67.7665, + "step": 15030 + }, + { + "epoch": 0.0607635031129175, + "grad_norm": 694.5767211914062, + "learning_rate": 9.98587936511632e-06, + "loss": 43.7988, + "step": 15040 + }, + { + "epoch": 0.06080390437828513, + "grad_norm": 466.62896728515625, + "learning_rate": 9.98577324567991e-06, + "loss": 50.6092, + "step": 15050 + }, + { + "epoch": 0.06084430564365276, + "grad_norm": 737.1053466796875, + "learning_rate": 9.985666729548404e-06, + "loss": 47.2564, + "step": 15060 + }, + { + "epoch": 0.06088470690902039, + "grad_norm": 696.4453125, + "learning_rate": 9.985559816730277e-06, + "loss": 54.4192, + "step": 15070 + }, + { + "epoch": 0.060925108174388024, + "grad_norm": 1044.3509521484375, + "learning_rate": 9.985452507234037e-06, + "loss": 60.5334, + "step": 15080 + }, + { + "epoch": 0.06096550943975566, + "grad_norm": 1355.4449462890625, + "learning_rate": 9.98534480106822e-06, + "loss": 57.1139, + "step": 15090 + }, + { + "epoch": 0.06100591070512328, + "grad_norm": 586.2362060546875, + "learning_rate": 9.985236698241396e-06, + "loss": 57.273, + "step": 15100 + }, + { + "epoch": 0.061046311970490916, + "grad_norm": 781.879150390625, + "learning_rate": 9.985128198762168e-06, + "loss": 40.2266, + "step": 15110 + }, + { + "epoch": 0.06108671323585855, + "grad_norm": 831.5631713867188, + "learning_rate": 9.98501930263917e-06, + "loss": 61.6051, + "step": 15120 + }, + { + "epoch": 0.06112711450122618, + "grad_norm": 538.1463012695312, + "learning_rate": 9.984910009881062e-06, + "loss": 34.2463, + "step": 15130 + }, + { + "epoch": 0.06116751576659381, + "grad_norm": 1016.048828125, + "learning_rate": 9.984800320496542e-06, + "loss": 56.4194, + "step": 15140 + }, + { + "epoch": 0.06120791703196144, + "grad_norm": 537.7840576171875, + "learning_rate": 9.984690234494338e-06, + "loss": 70.5479, + "step": 15150 + }, + { + "epoch": 0.061248318297329074, + "grad_norm": 625.3380737304688, + "learning_rate": 9.98457975188321e-06, + "loss": 77.5726, + "step": 15160 + }, + { + "epoch": 0.06128871956269671, + "grad_norm": 1212.03857421875, + "learning_rate": 9.984468872671945e-06, + "loss": 49.5992, + "step": 15170 + }, + { + "epoch": 0.06132912082806433, + "grad_norm": 1049.4886474609375, + "learning_rate": 9.984357596869369e-06, + "loss": 62.8757, + "step": 15180 + }, + { + "epoch": 0.061369522093431966, + "grad_norm": 1094.6876220703125, + "learning_rate": 9.984245924484334e-06, + "loss": 56.43, + "step": 15190 + }, + { + "epoch": 0.0614099233587996, + "grad_norm": 503.16534423828125, + "learning_rate": 9.984133855525723e-06, + "loss": 55.6275, + "step": 15200 + }, + { + "epoch": 0.061450324624167225, + "grad_norm": 431.4841613769531, + "learning_rate": 9.984021390002458e-06, + "loss": 64.4734, + "step": 15210 + }, + { + "epoch": 0.06149072588953486, + "grad_norm": 968.3142700195312, + "learning_rate": 9.983908527923486e-06, + "loss": 62.6523, + "step": 15220 + }, + { + "epoch": 0.06153112715490249, + "grad_norm": 652.2515869140625, + "learning_rate": 9.983795269297782e-06, + "loss": 30.2004, + "step": 15230 + }, + { + "epoch": 0.061571528420270125, + "grad_norm": 660.2927856445312, + "learning_rate": 9.983681614134363e-06, + "loss": 43.4263, + "step": 15240 + }, + { + "epoch": 0.06161192968563775, + "grad_norm": 536.5686645507812, + "learning_rate": 9.98356756244227e-06, + "loss": 55.396, + "step": 15250 + }, + { + "epoch": 0.061652330951005384, + "grad_norm": 542.5938720703125, + "learning_rate": 9.983453114230575e-06, + "loss": 41.3936, + "step": 15260 + }, + { + "epoch": 0.06169273221637302, + "grad_norm": 971.2131958007812, + "learning_rate": 9.98333826950839e-06, + "loss": 66.6024, + "step": 15270 + }, + { + "epoch": 0.06173313348174065, + "grad_norm": 422.6085510253906, + "learning_rate": 9.983223028284847e-06, + "loss": 37.8949, + "step": 15280 + }, + { + "epoch": 0.061773534747108276, + "grad_norm": 339.5025634765625, + "learning_rate": 9.983107390569118e-06, + "loss": 38.7237, + "step": 15290 + }, + { + "epoch": 0.06181393601247591, + "grad_norm": 916.392333984375, + "learning_rate": 9.982991356370404e-06, + "loss": 39.1873, + "step": 15300 + }, + { + "epoch": 0.06185433727784354, + "grad_norm": 678.418212890625, + "learning_rate": 9.982874925697937e-06, + "loss": 64.8346, + "step": 15310 + }, + { + "epoch": 0.061894738543211175, + "grad_norm": 854.1286010742188, + "learning_rate": 9.982758098560978e-06, + "loss": 52.7229, + "step": 15320 + }, + { + "epoch": 0.0619351398085788, + "grad_norm": 747.940673828125, + "learning_rate": 9.982640874968827e-06, + "loss": 40.8252, + "step": 15330 + }, + { + "epoch": 0.061975541073946434, + "grad_norm": 982.86181640625, + "learning_rate": 9.98252325493081e-06, + "loss": 65.4864, + "step": 15340 + }, + { + "epoch": 0.06201594233931407, + "grad_norm": 356.97271728515625, + "learning_rate": 9.982405238456281e-06, + "loss": 52.2651, + "step": 15350 + }, + { + "epoch": 0.0620563436046817, + "grad_norm": 685.7958374023438, + "learning_rate": 9.982286825554636e-06, + "loss": 44.4038, + "step": 15360 + }, + { + "epoch": 0.06209674487004933, + "grad_norm": 834.1498413085938, + "learning_rate": 9.982168016235292e-06, + "loss": 44.5143, + "step": 15370 + }, + { + "epoch": 0.06213714613541696, + "grad_norm": 433.9917907714844, + "learning_rate": 9.982048810507706e-06, + "loss": 44.4605, + "step": 15380 + }, + { + "epoch": 0.06217754740078459, + "grad_norm": 908.0674438476562, + "learning_rate": 9.98192920838136e-06, + "loss": 43.4592, + "step": 15390 + }, + { + "epoch": 0.062217948666152226, + "grad_norm": 1588.20703125, + "learning_rate": 9.98180920986577e-06, + "loss": 56.2675, + "step": 15400 + }, + { + "epoch": 0.06225834993151985, + "grad_norm": 886.8815307617188, + "learning_rate": 9.981688814970485e-06, + "loss": 55.398, + "step": 15410 + }, + { + "epoch": 0.062298751196887485, + "grad_norm": 1576.02685546875, + "learning_rate": 9.981568023705085e-06, + "loss": 50.5813, + "step": 15420 + }, + { + "epoch": 0.06233915246225512, + "grad_norm": 594.6544799804688, + "learning_rate": 9.981446836079178e-06, + "loss": 49.5407, + "step": 15430 + }, + { + "epoch": 0.06237955372762275, + "grad_norm": 942.7835083007812, + "learning_rate": 9.981325252102408e-06, + "loss": 75.0559, + "step": 15440 + }, + { + "epoch": 0.06241995499299038, + "grad_norm": 762.1746826171875, + "learning_rate": 9.98120327178445e-06, + "loss": 42.7199, + "step": 15450 + }, + { + "epoch": 0.06246035625835801, + "grad_norm": 575.0107421875, + "learning_rate": 9.981080895135007e-06, + "loss": 40.931, + "step": 15460 + }, + { + "epoch": 0.06250075752372564, + "grad_norm": 765.7384033203125, + "learning_rate": 9.980958122163818e-06, + "loss": 47.4123, + "step": 15470 + }, + { + "epoch": 0.06254115878909328, + "grad_norm": 1211.600341796875, + "learning_rate": 9.980834952880652e-06, + "loss": 51.5481, + "step": 15480 + }, + { + "epoch": 0.0625815600544609, + "grad_norm": 678.6597290039062, + "learning_rate": 9.980711387295306e-06, + "loss": 79.6206, + "step": 15490 + }, + { + "epoch": 0.06262196131982854, + "grad_norm": 808.155517578125, + "learning_rate": 9.980587425417612e-06, + "loss": 75.9096, + "step": 15500 + }, + { + "epoch": 0.06266236258519617, + "grad_norm": 1321.018310546875, + "learning_rate": 9.980463067257437e-06, + "loss": 47.1829, + "step": 15510 + }, + { + "epoch": 0.0627027638505638, + "grad_norm": 705.9486083984375, + "learning_rate": 9.980338312824672e-06, + "loss": 38.1095, + "step": 15520 + }, + { + "epoch": 0.06274316511593143, + "grad_norm": 801.2616577148438, + "learning_rate": 9.980213162129244e-06, + "loss": 53.8346, + "step": 15530 + }, + { + "epoch": 0.06278356638129906, + "grad_norm": 499.783935546875, + "learning_rate": 9.980087615181111e-06, + "loss": 50.7611, + "step": 15540 + }, + { + "epoch": 0.06282396764666669, + "grad_norm": 978.2482299804688, + "learning_rate": 9.979961671990263e-06, + "loss": 54.354, + "step": 15550 + }, + { + "epoch": 0.06286436891203433, + "grad_norm": 611.9962768554688, + "learning_rate": 9.979835332566719e-06, + "loss": 49.5433, + "step": 15560 + }, + { + "epoch": 0.06290477017740195, + "grad_norm": 2557.07958984375, + "learning_rate": 9.97970859692053e-06, + "loss": 69.103, + "step": 15570 + }, + { + "epoch": 0.06294517144276959, + "grad_norm": 484.30908203125, + "learning_rate": 9.979581465061784e-06, + "loss": 60.9328, + "step": 15580 + }, + { + "epoch": 0.06298557270813722, + "grad_norm": 1626.028564453125, + "learning_rate": 9.979453937000594e-06, + "loss": 77.7794, + "step": 15590 + }, + { + "epoch": 0.06302597397350485, + "grad_norm": 600.4717407226562, + "learning_rate": 9.979326012747106e-06, + "loss": 62.6274, + "step": 15600 + }, + { + "epoch": 0.06306637523887249, + "grad_norm": 863.0751953125, + "learning_rate": 9.9791976923115e-06, + "loss": 44.0777, + "step": 15610 + }, + { + "epoch": 0.06310677650424011, + "grad_norm": 1112.2489013671875, + "learning_rate": 9.979068975703984e-06, + "loss": 54.6989, + "step": 15620 + }, + { + "epoch": 0.06314717776960774, + "grad_norm": 579.8980712890625, + "learning_rate": 9.978939862934802e-06, + "loss": 51.3433, + "step": 15630 + }, + { + "epoch": 0.06318757903497538, + "grad_norm": 577.5842895507812, + "learning_rate": 9.978810354014223e-06, + "loss": 52.9068, + "step": 15640 + }, + { + "epoch": 0.063227980300343, + "grad_norm": 1737.6309814453125, + "learning_rate": 9.978680448952556e-06, + "loss": 60.6608, + "step": 15650 + }, + { + "epoch": 0.06326838156571064, + "grad_norm": 931.930908203125, + "learning_rate": 9.978550147760133e-06, + "loss": 65.9854, + "step": 15660 + }, + { + "epoch": 0.06330878283107827, + "grad_norm": 847.0628662109375, + "learning_rate": 9.978419450447325e-06, + "loss": 47.0742, + "step": 15670 + }, + { + "epoch": 0.0633491840964459, + "grad_norm": 538.1990356445312, + "learning_rate": 9.978288357024527e-06, + "loss": 69.5365, + "step": 15680 + }, + { + "epoch": 0.06338958536181354, + "grad_norm": 920.2020263671875, + "learning_rate": 9.978156867502173e-06, + "loss": 49.8007, + "step": 15690 + }, + { + "epoch": 0.06342998662718116, + "grad_norm": 922.1722412109375, + "learning_rate": 9.978024981890724e-06, + "loss": 45.6619, + "step": 15700 + }, + { + "epoch": 0.06347038789254879, + "grad_norm": 262.210205078125, + "learning_rate": 9.977892700200673e-06, + "loss": 47.8528, + "step": 15710 + }, + { + "epoch": 0.06351078915791643, + "grad_norm": 781.1572875976562, + "learning_rate": 9.977760022442545e-06, + "loss": 66.2854, + "step": 15720 + }, + { + "epoch": 0.06355119042328405, + "grad_norm": 808.654052734375, + "learning_rate": 9.977626948626897e-06, + "loss": 46.0415, + "step": 15730 + }, + { + "epoch": 0.0635915916886517, + "grad_norm": 924.575927734375, + "learning_rate": 9.977493478764316e-06, + "loss": 63.1324, + "step": 15740 + }, + { + "epoch": 0.06363199295401932, + "grad_norm": 1043.2413330078125, + "learning_rate": 9.977359612865424e-06, + "loss": 60.2796, + "step": 15750 + }, + { + "epoch": 0.06367239421938695, + "grad_norm": 331.38427734375, + "learning_rate": 9.97722535094087e-06, + "loss": 50.0805, + "step": 15760 + }, + { + "epoch": 0.06371279548475459, + "grad_norm": 742.5581665039062, + "learning_rate": 9.977090693001336e-06, + "loss": 50.485, + "step": 15770 + }, + { + "epoch": 0.06375319675012221, + "grad_norm": 692.9002075195312, + "learning_rate": 9.976955639057539e-06, + "loss": 48.5586, + "step": 15780 + }, + { + "epoch": 0.06379359801548984, + "grad_norm": 700.7842407226562, + "learning_rate": 9.976820189120223e-06, + "loss": 44.3135, + "step": 15790 + }, + { + "epoch": 0.06383399928085748, + "grad_norm": 693.6561889648438, + "learning_rate": 9.976684343200164e-06, + "loss": 65.8412, + "step": 15800 + }, + { + "epoch": 0.0638744005462251, + "grad_norm": 785.6405029296875, + "learning_rate": 9.976548101308173e-06, + "loss": 48.4629, + "step": 15810 + }, + { + "epoch": 0.06391480181159274, + "grad_norm": 640.048828125, + "learning_rate": 9.976411463455088e-06, + "loss": 37.2934, + "step": 15820 + }, + { + "epoch": 0.06395520307696037, + "grad_norm": 874.2314453125, + "learning_rate": 9.976274429651783e-06, + "loss": 41.366, + "step": 15830 + }, + { + "epoch": 0.063995604342328, + "grad_norm": 1200.9598388671875, + "learning_rate": 9.976136999909156e-06, + "loss": 34.9769, + "step": 15840 + }, + { + "epoch": 0.06403600560769564, + "grad_norm": 639.4553833007812, + "learning_rate": 9.97599917423815e-06, + "loss": 47.3031, + "step": 15850 + }, + { + "epoch": 0.06407640687306326, + "grad_norm": 745.8543090820312, + "learning_rate": 9.975860952649724e-06, + "loss": 65.3151, + "step": 15860 + }, + { + "epoch": 0.06411680813843089, + "grad_norm": 644.9210815429688, + "learning_rate": 9.975722335154876e-06, + "loss": 54.8319, + "step": 15870 + }, + { + "epoch": 0.06415720940379853, + "grad_norm": 836.5652465820312, + "learning_rate": 9.975583321764638e-06, + "loss": 39.6489, + "step": 15880 + }, + { + "epoch": 0.06419761066916616, + "grad_norm": 776.029052734375, + "learning_rate": 9.975443912490073e-06, + "loss": 83.7785, + "step": 15890 + }, + { + "epoch": 0.0642380119345338, + "grad_norm": 620.9443359375, + "learning_rate": 9.975304107342268e-06, + "loss": 49.9498, + "step": 15900 + }, + { + "epoch": 0.06427841319990142, + "grad_norm": 780.52099609375, + "learning_rate": 9.97516390633235e-06, + "loss": 45.0322, + "step": 15910 + }, + { + "epoch": 0.06431881446526905, + "grad_norm": 570.9188232421875, + "learning_rate": 9.975023309471473e-06, + "loss": 35.3322, + "step": 15920 + }, + { + "epoch": 0.06435921573063669, + "grad_norm": 650.2386474609375, + "learning_rate": 9.974882316770823e-06, + "loss": 60.9373, + "step": 15930 + }, + { + "epoch": 0.06439961699600431, + "grad_norm": 369.92730712890625, + "learning_rate": 9.974740928241617e-06, + "loss": 41.9204, + "step": 15940 + }, + { + "epoch": 0.06444001826137194, + "grad_norm": 427.5985412597656, + "learning_rate": 9.974599143895107e-06, + "loss": 47.1646, + "step": 15950 + }, + { + "epoch": 0.06448041952673958, + "grad_norm": 552.6906127929688, + "learning_rate": 9.974456963742573e-06, + "loss": 50.5909, + "step": 15960 + }, + { + "epoch": 0.0645208207921072, + "grad_norm": 895.8667602539062, + "learning_rate": 9.97431438779533e-06, + "loss": 59.4875, + "step": 15970 + }, + { + "epoch": 0.06456122205747485, + "grad_norm": 756.911376953125, + "learning_rate": 9.974171416064719e-06, + "loss": 45.0479, + "step": 15980 + }, + { + "epoch": 0.06460162332284247, + "grad_norm": 492.8658142089844, + "learning_rate": 9.974028048562118e-06, + "loss": 59.8826, + "step": 15990 + }, + { + "epoch": 0.0646420245882101, + "grad_norm": 1029.951904296875, + "learning_rate": 9.973884285298932e-06, + "loss": 62.1363, + "step": 16000 + }, + { + "epoch": 0.06468242585357774, + "grad_norm": 1486.369873046875, + "learning_rate": 9.9737401262866e-06, + "loss": 58.4914, + "step": 16010 + }, + { + "epoch": 0.06472282711894536, + "grad_norm": 532.9174194335938, + "learning_rate": 9.973595571536593e-06, + "loss": 58.1295, + "step": 16020 + }, + { + "epoch": 0.06476322838431299, + "grad_norm": 768.2650756835938, + "learning_rate": 9.973450621060412e-06, + "loss": 58.3537, + "step": 16030 + }, + { + "epoch": 0.06480362964968063, + "grad_norm": 626.2802734375, + "learning_rate": 9.97330527486959e-06, + "loss": 74.6434, + "step": 16040 + }, + { + "epoch": 0.06484403091504826, + "grad_norm": 805.8840942382812, + "learning_rate": 9.973159532975691e-06, + "loss": 50.8522, + "step": 16050 + }, + { + "epoch": 0.0648844321804159, + "grad_norm": 612.8935546875, + "learning_rate": 9.973013395390314e-06, + "loss": 43.9846, + "step": 16060 + }, + { + "epoch": 0.06492483344578352, + "grad_norm": 711.977294921875, + "learning_rate": 9.972866862125083e-06, + "loss": 54.3085, + "step": 16070 + }, + { + "epoch": 0.06496523471115115, + "grad_norm": 1257.6363525390625, + "learning_rate": 9.972719933191657e-06, + "loss": 45.2739, + "step": 16080 + }, + { + "epoch": 0.06500563597651879, + "grad_norm": 828.5887451171875, + "learning_rate": 9.97257260860173e-06, + "loss": 51.8254, + "step": 16090 + }, + { + "epoch": 0.06504603724188641, + "grad_norm": 885.0379638671875, + "learning_rate": 9.972424888367019e-06, + "loss": 44.1245, + "step": 16100 + }, + { + "epoch": 0.06508643850725404, + "grad_norm": 699.1367797851562, + "learning_rate": 9.972276772499281e-06, + "loss": 49.8983, + "step": 16110 + }, + { + "epoch": 0.06512683977262168, + "grad_norm": 619.27099609375, + "learning_rate": 9.9721282610103e-06, + "loss": 43.9222, + "step": 16120 + }, + { + "epoch": 0.0651672410379893, + "grad_norm": 1254.580322265625, + "learning_rate": 9.971979353911891e-06, + "loss": 65.6907, + "step": 16130 + }, + { + "epoch": 0.06520764230335695, + "grad_norm": 856.28759765625, + "learning_rate": 9.971830051215905e-06, + "loss": 73.6939, + "step": 16140 + }, + { + "epoch": 0.06524804356872457, + "grad_norm": 703.4512329101562, + "learning_rate": 9.97168035293422e-06, + "loss": 53.0731, + "step": 16150 + }, + { + "epoch": 0.0652884448340922, + "grad_norm": 1020.3666381835938, + "learning_rate": 9.971530259078743e-06, + "loss": 42.901, + "step": 16160 + }, + { + "epoch": 0.06532884609945984, + "grad_norm": 570.0337524414062, + "learning_rate": 9.971379769661422e-06, + "loss": 68.6924, + "step": 16170 + }, + { + "epoch": 0.06536924736482747, + "grad_norm": 586.2837524414062, + "learning_rate": 9.971228884694228e-06, + "loss": 43.7809, + "step": 16180 + }, + { + "epoch": 0.06540964863019509, + "grad_norm": 636.9314575195312, + "learning_rate": 9.971077604189166e-06, + "loss": 68.6276, + "step": 16190 + }, + { + "epoch": 0.06545004989556273, + "grad_norm": 369.6883850097656, + "learning_rate": 9.970925928158275e-06, + "loss": 38.0615, + "step": 16200 + }, + { + "epoch": 0.06549045116093036, + "grad_norm": 545.9331665039062, + "learning_rate": 9.970773856613617e-06, + "loss": 60.5622, + "step": 16210 + }, + { + "epoch": 0.065530852426298, + "grad_norm": 390.1427001953125, + "learning_rate": 9.970621389567301e-06, + "loss": 51.0867, + "step": 16220 + }, + { + "epoch": 0.06557125369166562, + "grad_norm": 473.8525085449219, + "learning_rate": 9.97046852703145e-06, + "loss": 72.4376, + "step": 16230 + }, + { + "epoch": 0.06561165495703325, + "grad_norm": 807.6546630859375, + "learning_rate": 9.970315269018231e-06, + "loss": 48.2503, + "step": 16240 + }, + { + "epoch": 0.06565205622240089, + "grad_norm": 255.02505493164062, + "learning_rate": 9.970161615539837e-06, + "loss": 40.2138, + "step": 16250 + }, + { + "epoch": 0.06569245748776852, + "grad_norm": 471.5314636230469, + "learning_rate": 9.970007566608492e-06, + "loss": 55.3855, + "step": 16260 + }, + { + "epoch": 0.06573285875313614, + "grad_norm": 512.8792724609375, + "learning_rate": 9.969853122236455e-06, + "loss": 62.5609, + "step": 16270 + }, + { + "epoch": 0.06577326001850378, + "grad_norm": 733.0510864257812, + "learning_rate": 9.969698282436013e-06, + "loss": 87.3586, + "step": 16280 + }, + { + "epoch": 0.06581366128387141, + "grad_norm": 451.9583740234375, + "learning_rate": 9.969543047219487e-06, + "loss": 50.8946, + "step": 16290 + }, + { + "epoch": 0.06585406254923905, + "grad_norm": 638.0576171875, + "learning_rate": 9.969387416599227e-06, + "loss": 77.3127, + "step": 16300 + }, + { + "epoch": 0.06589446381460667, + "grad_norm": 542.7348022460938, + "learning_rate": 9.969231390587618e-06, + "loss": 53.3758, + "step": 16310 + }, + { + "epoch": 0.0659348650799743, + "grad_norm": 553.0634155273438, + "learning_rate": 9.969074969197072e-06, + "loss": 57.5647, + "step": 16320 + }, + { + "epoch": 0.06597526634534194, + "grad_norm": 298.8785400390625, + "learning_rate": 9.968918152440036e-06, + "loss": 52.9646, + "step": 16330 + }, + { + "epoch": 0.06601566761070957, + "grad_norm": 943.7760009765625, + "learning_rate": 9.968760940328987e-06, + "loss": 52.4363, + "step": 16340 + }, + { + "epoch": 0.06605606887607719, + "grad_norm": 841.0722045898438, + "learning_rate": 9.968603332876435e-06, + "loss": 65.1616, + "step": 16350 + }, + { + "epoch": 0.06609647014144483, + "grad_norm": 310.6723327636719, + "learning_rate": 9.968445330094915e-06, + "loss": 49.5367, + "step": 16360 + }, + { + "epoch": 0.06613687140681246, + "grad_norm": 364.29095458984375, + "learning_rate": 9.968286931997004e-06, + "loss": 42.1819, + "step": 16370 + }, + { + "epoch": 0.0661772726721801, + "grad_norm": 806.6517333984375, + "learning_rate": 9.968128138595304e-06, + "loss": 59.3689, + "step": 16380 + }, + { + "epoch": 0.06621767393754772, + "grad_norm": 540.7401733398438, + "learning_rate": 9.967968949902448e-06, + "loss": 50.0984, + "step": 16390 + }, + { + "epoch": 0.06625807520291535, + "grad_norm": 734.2467041015625, + "learning_rate": 9.967809365931102e-06, + "loss": 39.6099, + "step": 16400 + }, + { + "epoch": 0.06629847646828299, + "grad_norm": 934.7858276367188, + "learning_rate": 9.967649386693964e-06, + "loss": 46.0471, + "step": 16410 + }, + { + "epoch": 0.06633887773365062, + "grad_norm": 823.3958129882812, + "learning_rate": 9.967489012203765e-06, + "loss": 40.7156, + "step": 16420 + }, + { + "epoch": 0.06637927899901824, + "grad_norm": 1129.1982421875, + "learning_rate": 9.967328242473261e-06, + "loss": 42.2821, + "step": 16430 + }, + { + "epoch": 0.06641968026438588, + "grad_norm": 509.66717529296875, + "learning_rate": 9.967167077515246e-06, + "loss": 54.3899, + "step": 16440 + }, + { + "epoch": 0.06646008152975351, + "grad_norm": 269.69049072265625, + "learning_rate": 9.967005517342544e-06, + "loss": 41.5783, + "step": 16450 + }, + { + "epoch": 0.06650048279512115, + "grad_norm": 178.01014709472656, + "learning_rate": 9.966843561968005e-06, + "loss": 43.4332, + "step": 16460 + }, + { + "epoch": 0.06654088406048878, + "grad_norm": 854.1151733398438, + "learning_rate": 9.966681211404521e-06, + "loss": 57.2772, + "step": 16470 + }, + { + "epoch": 0.0665812853258564, + "grad_norm": 602.7614135742188, + "learning_rate": 9.966518465665007e-06, + "loss": 35.6911, + "step": 16480 + }, + { + "epoch": 0.06662168659122404, + "grad_norm": 882.269775390625, + "learning_rate": 9.966355324762412e-06, + "loss": 44.7084, + "step": 16490 + }, + { + "epoch": 0.06666208785659167, + "grad_norm": 649.4912719726562, + "learning_rate": 9.966191788709716e-06, + "loss": 63.0644, + "step": 16500 + }, + { + "epoch": 0.0667024891219593, + "grad_norm": 466.97381591796875, + "learning_rate": 9.966027857519931e-06, + "loss": 29.1349, + "step": 16510 + }, + { + "epoch": 0.06674289038732693, + "grad_norm": 271.84954833984375, + "learning_rate": 9.9658635312061e-06, + "loss": 58.9536, + "step": 16520 + }, + { + "epoch": 0.06678329165269456, + "grad_norm": 553.5524291992188, + "learning_rate": 9.965698809781298e-06, + "loss": 46.6408, + "step": 16530 + }, + { + "epoch": 0.0668236929180622, + "grad_norm": 872.4886474609375, + "learning_rate": 9.965533693258632e-06, + "loss": 45.775, + "step": 16540 + }, + { + "epoch": 0.06686409418342983, + "grad_norm": 670.1477661132812, + "learning_rate": 9.965368181651239e-06, + "loss": 75.9548, + "step": 16550 + }, + { + "epoch": 0.06690449544879745, + "grad_norm": 708.0873413085938, + "learning_rate": 9.965202274972288e-06, + "loss": 53.3991, + "step": 16560 + }, + { + "epoch": 0.06694489671416509, + "grad_norm": 487.0908203125, + "learning_rate": 9.965035973234977e-06, + "loss": 50.9436, + "step": 16570 + }, + { + "epoch": 0.06698529797953272, + "grad_norm": 1012.3402099609375, + "learning_rate": 9.964869276452542e-06, + "loss": 61.3239, + "step": 16580 + }, + { + "epoch": 0.06702569924490034, + "grad_norm": 987.9320068359375, + "learning_rate": 9.964702184638244e-06, + "loss": 65.6153, + "step": 16590 + }, + { + "epoch": 0.06706610051026798, + "grad_norm": 512.2110595703125, + "learning_rate": 9.964534697805377e-06, + "loss": 48.5166, + "step": 16600 + }, + { + "epoch": 0.06710650177563561, + "grad_norm": 300.725341796875, + "learning_rate": 9.96436681596727e-06, + "loss": 53.5148, + "step": 16610 + }, + { + "epoch": 0.06714690304100325, + "grad_norm": 512.7298583984375, + "learning_rate": 9.964198539137277e-06, + "loss": 59.9783, + "step": 16620 + }, + { + "epoch": 0.06718730430637088, + "grad_norm": 1161.68994140625, + "learning_rate": 9.964029867328791e-06, + "loss": 74.497, + "step": 16630 + }, + { + "epoch": 0.0672277055717385, + "grad_norm": 1012.3746337890625, + "learning_rate": 9.963860800555228e-06, + "loss": 78.1185, + "step": 16640 + }, + { + "epoch": 0.06726810683710614, + "grad_norm": 596.1810302734375, + "learning_rate": 9.963691338830045e-06, + "loss": 46.9206, + "step": 16650 + }, + { + "epoch": 0.06730850810247377, + "grad_norm": 375.6921081542969, + "learning_rate": 9.963521482166718e-06, + "loss": 42.1555, + "step": 16660 + }, + { + "epoch": 0.0673489093678414, + "grad_norm": 482.91064453125, + "learning_rate": 9.96335123057877e-06, + "loss": 54.3135, + "step": 16670 + }, + { + "epoch": 0.06738931063320903, + "grad_norm": 398.011962890625, + "learning_rate": 9.963180584079741e-06, + "loss": 41.0378, + "step": 16680 + }, + { + "epoch": 0.06742971189857666, + "grad_norm": 317.195068359375, + "learning_rate": 9.963009542683214e-06, + "loss": 53.7055, + "step": 16690 + }, + { + "epoch": 0.0674701131639443, + "grad_norm": 198.84494018554688, + "learning_rate": 9.962838106402791e-06, + "loss": 43.8947, + "step": 16700 + }, + { + "epoch": 0.06751051442931193, + "grad_norm": 284.9634704589844, + "learning_rate": 9.962666275252117e-06, + "loss": 58.5298, + "step": 16710 + }, + { + "epoch": 0.06755091569467955, + "grad_norm": 675.8781127929688, + "learning_rate": 9.962494049244866e-06, + "loss": 54.8316, + "step": 16720 + }, + { + "epoch": 0.06759131696004719, + "grad_norm": 852.4727783203125, + "learning_rate": 9.962321428394735e-06, + "loss": 52.3011, + "step": 16730 + }, + { + "epoch": 0.06763171822541482, + "grad_norm": 344.2173767089844, + "learning_rate": 9.962148412715464e-06, + "loss": 43.6536, + "step": 16740 + }, + { + "epoch": 0.06767211949078245, + "grad_norm": 601.1047973632812, + "learning_rate": 9.961975002220816e-06, + "loss": 44.1749, + "step": 16750 + }, + { + "epoch": 0.06771252075615009, + "grad_norm": 740.0291137695312, + "learning_rate": 9.96180119692459e-06, + "loss": 42.4861, + "step": 16760 + }, + { + "epoch": 0.06775292202151771, + "grad_norm": 573.695556640625, + "learning_rate": 9.961626996840613e-06, + "loss": 39.4819, + "step": 16770 + }, + { + "epoch": 0.06779332328688535, + "grad_norm": 616.494384765625, + "learning_rate": 9.961452401982748e-06, + "loss": 54.9687, + "step": 16780 + }, + { + "epoch": 0.06783372455225298, + "grad_norm": 780.5487060546875, + "learning_rate": 9.961277412364884e-06, + "loss": 54.1724, + "step": 16790 + }, + { + "epoch": 0.0678741258176206, + "grad_norm": 1156.9290771484375, + "learning_rate": 9.961102028000948e-06, + "loss": 68.4075, + "step": 16800 + }, + { + "epoch": 0.06791452708298824, + "grad_norm": 1188.955810546875, + "learning_rate": 9.96092624890489e-06, + "loss": 67.3586, + "step": 16810 + }, + { + "epoch": 0.06795492834835587, + "grad_norm": 912.7185668945312, + "learning_rate": 9.960750075090698e-06, + "loss": 51.0991, + "step": 16820 + }, + { + "epoch": 0.0679953296137235, + "grad_norm": 918.0380859375, + "learning_rate": 9.960573506572391e-06, + "loss": 46.7497, + "step": 16830 + }, + { + "epoch": 0.06803573087909114, + "grad_norm": 1461.614501953125, + "learning_rate": 9.960396543364013e-06, + "loss": 52.6995, + "step": 16840 + }, + { + "epoch": 0.06807613214445876, + "grad_norm": 1134.6422119140625, + "learning_rate": 9.96021918547965e-06, + "loss": 52.5664, + "step": 16850 + }, + { + "epoch": 0.0681165334098264, + "grad_norm": 1380.5745849609375, + "learning_rate": 9.96004143293341e-06, + "loss": 50.9108, + "step": 16860 + }, + { + "epoch": 0.06815693467519403, + "grad_norm": 706.7164306640625, + "learning_rate": 9.959863285739436e-06, + "loss": 75.2603, + "step": 16870 + }, + { + "epoch": 0.06819733594056165, + "grad_norm": 847.0613403320312, + "learning_rate": 9.959684743911904e-06, + "loss": 56.6586, + "step": 16880 + }, + { + "epoch": 0.0682377372059293, + "grad_norm": 560.4253540039062, + "learning_rate": 9.959505807465018e-06, + "loss": 47.3038, + "step": 16890 + }, + { + "epoch": 0.06827813847129692, + "grad_norm": 759.576171875, + "learning_rate": 9.959326476413016e-06, + "loss": 60.7899, + "step": 16900 + }, + { + "epoch": 0.06831853973666455, + "grad_norm": 742.4689331054688, + "learning_rate": 9.959146750770167e-06, + "loss": 56.8813, + "step": 16910 + }, + { + "epoch": 0.06835894100203219, + "grad_norm": 1035.506103515625, + "learning_rate": 9.95896663055077e-06, + "loss": 47.8974, + "step": 16920 + }, + { + "epoch": 0.06839934226739981, + "grad_norm": 734.8804931640625, + "learning_rate": 9.958786115769157e-06, + "loss": 59.3255, + "step": 16930 + }, + { + "epoch": 0.06843974353276745, + "grad_norm": 514.1586303710938, + "learning_rate": 9.958605206439692e-06, + "loss": 56.9457, + "step": 16940 + }, + { + "epoch": 0.06848014479813508, + "grad_norm": 523.6434326171875, + "learning_rate": 9.958423902576764e-06, + "loss": 37.5847, + "step": 16950 + }, + { + "epoch": 0.0685205460635027, + "grad_norm": 855.633056640625, + "learning_rate": 9.958242204194804e-06, + "loss": 62.8069, + "step": 16960 + }, + { + "epoch": 0.06856094732887034, + "grad_norm": 1315.413818359375, + "learning_rate": 9.958060111308267e-06, + "loss": 63.7991, + "step": 16970 + }, + { + "epoch": 0.06860134859423797, + "grad_norm": 460.4294738769531, + "learning_rate": 9.957877623931642e-06, + "loss": 55.9995, + "step": 16980 + }, + { + "epoch": 0.0686417498596056, + "grad_norm": 2325.47412109375, + "learning_rate": 9.95769474207945e-06, + "loss": 81.635, + "step": 16990 + }, + { + "epoch": 0.06868215112497324, + "grad_norm": 521.9066162109375, + "learning_rate": 9.957511465766236e-06, + "loss": 120.9248, + "step": 17000 + }, + { + "epoch": 0.06872255239034086, + "grad_norm": 650.1915893554688, + "learning_rate": 9.957327795006589e-06, + "loss": 61.7534, + "step": 17010 + }, + { + "epoch": 0.0687629536557085, + "grad_norm": 781.476806640625, + "learning_rate": 9.95714372981512e-06, + "loss": 52.5771, + "step": 17020 + }, + { + "epoch": 0.06880335492107613, + "grad_norm": 1087.2088623046875, + "learning_rate": 9.956959270206474e-06, + "loss": 55.3713, + "step": 17030 + }, + { + "epoch": 0.06884375618644376, + "grad_norm": 891.9490356445312, + "learning_rate": 9.956774416195329e-06, + "loss": 50.7214, + "step": 17040 + }, + { + "epoch": 0.0688841574518114, + "grad_norm": 289.277587890625, + "learning_rate": 9.956589167796392e-06, + "loss": 58.5371, + "step": 17050 + }, + { + "epoch": 0.06892455871717902, + "grad_norm": 641.5277709960938, + "learning_rate": 9.956403525024402e-06, + "loss": 46.9823, + "step": 17060 + }, + { + "epoch": 0.06896495998254665, + "grad_norm": 1088.70458984375, + "learning_rate": 9.956217487894131e-06, + "loss": 49.6527, + "step": 17070 + }, + { + "epoch": 0.06900536124791429, + "grad_norm": 795.5383911132812, + "learning_rate": 9.95603105642038e-06, + "loss": 41.3176, + "step": 17080 + }, + { + "epoch": 0.06904576251328191, + "grad_norm": 531.6299438476562, + "learning_rate": 9.955844230617985e-06, + "loss": 48.9703, + "step": 17090 + }, + { + "epoch": 0.06908616377864955, + "grad_norm": 1016.78271484375, + "learning_rate": 9.955657010501807e-06, + "loss": 54.1437, + "step": 17100 + }, + { + "epoch": 0.06912656504401718, + "grad_norm": 447.7895202636719, + "learning_rate": 9.955469396086743e-06, + "loss": 62.9068, + "step": 17110 + }, + { + "epoch": 0.0691669663093848, + "grad_norm": 729.5142822265625, + "learning_rate": 9.955281387387724e-06, + "loss": 39.2298, + "step": 17120 + }, + { + "epoch": 0.06920736757475245, + "grad_norm": 582.5546875, + "learning_rate": 9.955092984419705e-06, + "loss": 52.08, + "step": 17130 + }, + { + "epoch": 0.06924776884012007, + "grad_norm": 921.3667602539062, + "learning_rate": 9.954904187197679e-06, + "loss": 57.2562, + "step": 17140 + }, + { + "epoch": 0.0692881701054877, + "grad_norm": 556.9216918945312, + "learning_rate": 9.954714995736667e-06, + "loss": 47.0871, + "step": 17150 + }, + { + "epoch": 0.06932857137085534, + "grad_norm": 1060.7427978515625, + "learning_rate": 9.95452541005172e-06, + "loss": 54.6456, + "step": 17160 + }, + { + "epoch": 0.06936897263622296, + "grad_norm": 492.2557678222656, + "learning_rate": 9.954335430157926e-06, + "loss": 57.3385, + "step": 17170 + }, + { + "epoch": 0.0694093739015906, + "grad_norm": 812.3903198242188, + "learning_rate": 9.9541450560704e-06, + "loss": 46.2137, + "step": 17180 + }, + { + "epoch": 0.06944977516695823, + "grad_norm": 1028.221923828125, + "learning_rate": 9.953954287804286e-06, + "loss": 77.8132, + "step": 17190 + }, + { + "epoch": 0.06949017643232586, + "grad_norm": 511.1866149902344, + "learning_rate": 9.953763125374767e-06, + "loss": 53.9452, + "step": 17200 + }, + { + "epoch": 0.0695305776976935, + "grad_norm": 617.6327514648438, + "learning_rate": 9.953571568797049e-06, + "loss": 49.3006, + "step": 17210 + }, + { + "epoch": 0.06957097896306112, + "grad_norm": 725.1810913085938, + "learning_rate": 9.953379618086377e-06, + "loss": 53.0108, + "step": 17220 + }, + { + "epoch": 0.06961138022842875, + "grad_norm": 1026.266357421875, + "learning_rate": 9.95318727325802e-06, + "loss": 60.5115, + "step": 17230 + }, + { + "epoch": 0.06965178149379639, + "grad_norm": 457.9506530761719, + "learning_rate": 9.952994534327283e-06, + "loss": 40.233, + "step": 17240 + }, + { + "epoch": 0.06969218275916401, + "grad_norm": 95.95256042480469, + "learning_rate": 9.952801401309504e-06, + "loss": 54.0698, + "step": 17250 + }, + { + "epoch": 0.06973258402453165, + "grad_norm": 369.8098449707031, + "learning_rate": 9.952607874220048e-06, + "loss": 29.6986, + "step": 17260 + }, + { + "epoch": 0.06977298528989928, + "grad_norm": 605.2461547851562, + "learning_rate": 9.952413953074312e-06, + "loss": 48.9784, + "step": 17270 + }, + { + "epoch": 0.06981338655526691, + "grad_norm": 1227.542724609375, + "learning_rate": 9.952219637887725e-06, + "loss": 63.727, + "step": 17280 + }, + { + "epoch": 0.06985378782063455, + "grad_norm": 554.7730102539062, + "learning_rate": 9.952024928675752e-06, + "loss": 55.0353, + "step": 17290 + }, + { + "epoch": 0.06989418908600217, + "grad_norm": 1237.1422119140625, + "learning_rate": 9.951829825453881e-06, + "loss": 41.5354, + "step": 17300 + }, + { + "epoch": 0.0699345903513698, + "grad_norm": 508.1934814453125, + "learning_rate": 9.951634328237635e-06, + "loss": 54.589, + "step": 17310 + }, + { + "epoch": 0.06997499161673744, + "grad_norm": 568.03076171875, + "learning_rate": 9.951438437042572e-06, + "loss": 50.1381, + "step": 17320 + }, + { + "epoch": 0.07001539288210507, + "grad_norm": 330.10528564453125, + "learning_rate": 9.951242151884275e-06, + "loss": 33.1387, + "step": 17330 + }, + { + "epoch": 0.0700557941474727, + "grad_norm": 426.9512634277344, + "learning_rate": 9.951045472778365e-06, + "loss": 48.9173, + "step": 17340 + }, + { + "epoch": 0.07009619541284033, + "grad_norm": 860.360595703125, + "learning_rate": 9.950848399740488e-06, + "loss": 52.8169, + "step": 17350 + }, + { + "epoch": 0.07013659667820796, + "grad_norm": 694.7147827148438, + "learning_rate": 9.950650932786325e-06, + "loss": 53.9027, + "step": 17360 + }, + { + "epoch": 0.0701769979435756, + "grad_norm": 2214.466552734375, + "learning_rate": 9.95045307193159e-06, + "loss": 72.7385, + "step": 17370 + }, + { + "epoch": 0.07021739920894322, + "grad_norm": 922.2384033203125, + "learning_rate": 9.95025481719202e-06, + "loss": 69.9777, + "step": 17380 + }, + { + "epoch": 0.07025780047431085, + "grad_norm": 1792.91162109375, + "learning_rate": 9.950056168583395e-06, + "loss": 48.3632, + "step": 17390 + }, + { + "epoch": 0.07029820173967849, + "grad_norm": 718.42431640625, + "learning_rate": 9.949857126121519e-06, + "loss": 30.7003, + "step": 17400 + }, + { + "epoch": 0.07033860300504612, + "grad_norm": 378.0965881347656, + "learning_rate": 9.949657689822226e-06, + "loss": 66.112, + "step": 17410 + }, + { + "epoch": 0.07037900427041376, + "grad_norm": 893.2484130859375, + "learning_rate": 9.949457859701388e-06, + "loss": 71.9019, + "step": 17420 + }, + { + "epoch": 0.07041940553578138, + "grad_norm": 536.0899658203125, + "learning_rate": 9.949257635774903e-06, + "loss": 57.2, + "step": 17430 + }, + { + "epoch": 0.07045980680114901, + "grad_norm": 724.5510864257812, + "learning_rate": 9.9490570180587e-06, + "loss": 41.6391, + "step": 17440 + }, + { + "epoch": 0.07050020806651665, + "grad_norm": 1140.1925048828125, + "learning_rate": 9.948856006568746e-06, + "loss": 34.9596, + "step": 17450 + }, + { + "epoch": 0.07054060933188427, + "grad_norm": 1027.14697265625, + "learning_rate": 9.94865460132103e-06, + "loss": 47.5769, + "step": 17460 + }, + { + "epoch": 0.0705810105972519, + "grad_norm": 1001.2021484375, + "learning_rate": 9.948452802331578e-06, + "loss": 63.4763, + "step": 17470 + }, + { + "epoch": 0.07062141186261954, + "grad_norm": 751.27978515625, + "learning_rate": 9.948250609616449e-06, + "loss": 55.1303, + "step": 17480 + }, + { + "epoch": 0.07066181312798717, + "grad_norm": 355.3070068359375, + "learning_rate": 9.948048023191728e-06, + "loss": 43.9042, + "step": 17490 + }, + { + "epoch": 0.0707022143933548, + "grad_norm": 516.75244140625, + "learning_rate": 9.947845043073533e-06, + "loss": 44.1306, + "step": 17500 + }, + { + "epoch": 0.07074261565872243, + "grad_norm": 1007.9006958007812, + "learning_rate": 9.947641669278016e-06, + "loss": 61.7492, + "step": 17510 + }, + { + "epoch": 0.07078301692409006, + "grad_norm": 462.08758544921875, + "learning_rate": 9.947437901821358e-06, + "loss": 38.7414, + "step": 17520 + }, + { + "epoch": 0.0708234181894577, + "grad_norm": 893.343505859375, + "learning_rate": 9.947233740719772e-06, + "loss": 50.9898, + "step": 17530 + }, + { + "epoch": 0.07086381945482532, + "grad_norm": 501.1763916015625, + "learning_rate": 9.947029185989501e-06, + "loss": 49.4258, + "step": 17540 + }, + { + "epoch": 0.07090422072019295, + "grad_norm": 495.0986022949219, + "learning_rate": 9.946824237646823e-06, + "loss": 68.988, + "step": 17550 + }, + { + "epoch": 0.07094462198556059, + "grad_norm": 327.663818359375, + "learning_rate": 9.946618895708043e-06, + "loss": 73.2408, + "step": 17560 + }, + { + "epoch": 0.07098502325092822, + "grad_norm": 346.1497497558594, + "learning_rate": 9.946413160189498e-06, + "loss": 39.7363, + "step": 17570 + }, + { + "epoch": 0.07102542451629586, + "grad_norm": 592.0716552734375, + "learning_rate": 9.946207031107562e-06, + "loss": 49.571, + "step": 17580 + }, + { + "epoch": 0.07106582578166348, + "grad_norm": 463.5035400390625, + "learning_rate": 9.94600050847863e-06, + "loss": 58.9509, + "step": 17590 + }, + { + "epoch": 0.07110622704703111, + "grad_norm": 518.01904296875, + "learning_rate": 9.945793592319137e-06, + "loss": 57.2311, + "step": 17600 + }, + { + "epoch": 0.07114662831239875, + "grad_norm": 1007.0841064453125, + "learning_rate": 9.945586282645545e-06, + "loss": 50.4529, + "step": 17610 + }, + { + "epoch": 0.07118702957776638, + "grad_norm": 822.48583984375, + "learning_rate": 9.945378579474351e-06, + "loss": 62.1675, + "step": 17620 + }, + { + "epoch": 0.071227430843134, + "grad_norm": 550.595458984375, + "learning_rate": 9.945170482822079e-06, + "loss": 44.7671, + "step": 17630 + }, + { + "epoch": 0.07126783210850164, + "grad_norm": 550.9453125, + "learning_rate": 9.944961992705288e-06, + "loss": 40.3771, + "step": 17640 + }, + { + "epoch": 0.07130823337386927, + "grad_norm": 926.0831298828125, + "learning_rate": 9.944753109140564e-06, + "loss": 83.9147, + "step": 17650 + }, + { + "epoch": 0.07134863463923691, + "grad_norm": 303.1770324707031, + "learning_rate": 9.94454383214453e-06, + "loss": 47.9595, + "step": 17660 + }, + { + "epoch": 0.07138903590460453, + "grad_norm": 736.1362915039062, + "learning_rate": 9.944334161733835e-06, + "loss": 58.8802, + "step": 17670 + }, + { + "epoch": 0.07142943716997216, + "grad_norm": 927.33935546875, + "learning_rate": 9.944124097925161e-06, + "loss": 65.2757, + "step": 17680 + }, + { + "epoch": 0.0714698384353398, + "grad_norm": 337.0379638671875, + "learning_rate": 9.943913640735224e-06, + "loss": 52.1521, + "step": 17690 + }, + { + "epoch": 0.07151023970070743, + "grad_norm": 814.1279907226562, + "learning_rate": 9.94370279018077e-06, + "loss": 54.99, + "step": 17700 + }, + { + "epoch": 0.07155064096607505, + "grad_norm": 695.0791625976562, + "learning_rate": 9.94349154627857e-06, + "loss": 48.8546, + "step": 17710 + }, + { + "epoch": 0.07159104223144269, + "grad_norm": 629.86279296875, + "learning_rate": 9.943279909045438e-06, + "loss": 45.39, + "step": 17720 + }, + { + "epoch": 0.07163144349681032, + "grad_norm": 572.5151977539062, + "learning_rate": 9.94306787849821e-06, + "loss": 72.4478, + "step": 17730 + }, + { + "epoch": 0.07167184476217796, + "grad_norm": 473.1834411621094, + "learning_rate": 9.942855454653755e-06, + "loss": 44.4805, + "step": 17740 + }, + { + "epoch": 0.07171224602754558, + "grad_norm": 653.8013305664062, + "learning_rate": 9.942642637528977e-06, + "loss": 49.7141, + "step": 17750 + }, + { + "epoch": 0.07175264729291321, + "grad_norm": 724.1669921875, + "learning_rate": 9.942429427140807e-06, + "loss": 42.9303, + "step": 17760 + }, + { + "epoch": 0.07179304855828085, + "grad_norm": 848.2758178710938, + "learning_rate": 9.942215823506211e-06, + "loss": 71.6385, + "step": 17770 + }, + { + "epoch": 0.07183344982364848, + "grad_norm": 537.5114135742188, + "learning_rate": 9.942001826642184e-06, + "loss": 55.1928, + "step": 17780 + }, + { + "epoch": 0.0718738510890161, + "grad_norm": 517.9396362304688, + "learning_rate": 9.941787436565751e-06, + "loss": 42.2545, + "step": 17790 + }, + { + "epoch": 0.07191425235438374, + "grad_norm": 1042.4097900390625, + "learning_rate": 9.941572653293974e-06, + "loss": 41.3418, + "step": 17800 + }, + { + "epoch": 0.07195465361975137, + "grad_norm": 686.4190673828125, + "learning_rate": 9.941357476843938e-06, + "loss": 32.3788, + "step": 17810 + }, + { + "epoch": 0.07199505488511901, + "grad_norm": 411.7096862792969, + "learning_rate": 9.941141907232766e-06, + "loss": 59.7465, + "step": 17820 + }, + { + "epoch": 0.07203545615048663, + "grad_norm": 838.7464599609375, + "learning_rate": 9.940925944477608e-06, + "loss": 51.814, + "step": 17830 + }, + { + "epoch": 0.07207585741585426, + "grad_norm": 1071.185546875, + "learning_rate": 9.940709588595649e-06, + "loss": 53.1841, + "step": 17840 + }, + { + "epoch": 0.0721162586812219, + "grad_norm": 970.0137939453125, + "learning_rate": 9.940492839604103e-06, + "loss": 59.9117, + "step": 17850 + }, + { + "epoch": 0.07215665994658953, + "grad_norm": 441.033935546875, + "learning_rate": 9.940275697520216e-06, + "loss": 38.621, + "step": 17860 + }, + { + "epoch": 0.07219706121195715, + "grad_norm": 347.8889465332031, + "learning_rate": 9.940058162361264e-06, + "loss": 41.1021, + "step": 17870 + }, + { + "epoch": 0.0722374624773248, + "grad_norm": 579.1629028320312, + "learning_rate": 9.939840234144556e-06, + "loss": 54.0829, + "step": 17880 + }, + { + "epoch": 0.07227786374269242, + "grad_norm": 941.6212158203125, + "learning_rate": 9.939621912887431e-06, + "loss": 50.8714, + "step": 17890 + }, + { + "epoch": 0.07231826500806006, + "grad_norm": 860.2476196289062, + "learning_rate": 9.93940319860726e-06, + "loss": 56.4514, + "step": 17900 + }, + { + "epoch": 0.07235866627342769, + "grad_norm": 678.0944213867188, + "learning_rate": 9.939184091321445e-06, + "loss": 51.4201, + "step": 17910 + }, + { + "epoch": 0.07239906753879531, + "grad_norm": 1071.39013671875, + "learning_rate": 9.938964591047421e-06, + "loss": 35.9462, + "step": 17920 + }, + { + "epoch": 0.07243946880416295, + "grad_norm": 311.69110107421875, + "learning_rate": 9.938744697802651e-06, + "loss": 53.9317, + "step": 17930 + }, + { + "epoch": 0.07247987006953058, + "grad_norm": 1138.3892822265625, + "learning_rate": 9.938524411604631e-06, + "loss": 71.4564, + "step": 17940 + }, + { + "epoch": 0.0725202713348982, + "grad_norm": 670.2659301757812, + "learning_rate": 9.938303732470888e-06, + "loss": 54.5628, + "step": 17950 + }, + { + "epoch": 0.07256067260026584, + "grad_norm": 587.255615234375, + "learning_rate": 9.938082660418981e-06, + "loss": 70.5084, + "step": 17960 + }, + { + "epoch": 0.07260107386563347, + "grad_norm": 466.6063537597656, + "learning_rate": 9.937861195466498e-06, + "loss": 53.5537, + "step": 17970 + }, + { + "epoch": 0.07264147513100111, + "grad_norm": 783.6781616210938, + "learning_rate": 9.937639337631064e-06, + "loss": 36.8183, + "step": 17980 + }, + { + "epoch": 0.07268187639636874, + "grad_norm": 849.5465698242188, + "learning_rate": 9.937417086930328e-06, + "loss": 42.0539, + "step": 17990 + }, + { + "epoch": 0.07272227766173636, + "grad_norm": 1289.6552734375, + "learning_rate": 9.937194443381972e-06, + "loss": 46.7966, + "step": 18000 + }, + { + "epoch": 0.072762678927104, + "grad_norm": 865.15283203125, + "learning_rate": 9.936971407003714e-06, + "loss": 60.911, + "step": 18010 + }, + { + "epoch": 0.07280308019247163, + "grad_norm": 406.6366882324219, + "learning_rate": 9.936747977813299e-06, + "loss": 46.7317, + "step": 18020 + }, + { + "epoch": 0.07284348145783925, + "grad_norm": 571.2393188476562, + "learning_rate": 9.936524155828503e-06, + "loss": 40.7895, + "step": 18030 + }, + { + "epoch": 0.0728838827232069, + "grad_norm": 671.0640869140625, + "learning_rate": 9.936299941067137e-06, + "loss": 37.075, + "step": 18040 + }, + { + "epoch": 0.07292428398857452, + "grad_norm": 735.3406372070312, + "learning_rate": 9.93607533354704e-06, + "loss": 64.778, + "step": 18050 + }, + { + "epoch": 0.07296468525394216, + "grad_norm": 1079.908447265625, + "learning_rate": 9.935850333286081e-06, + "loss": 64.6785, + "step": 18060 + }, + { + "epoch": 0.07300508651930979, + "grad_norm": 553.8833618164062, + "learning_rate": 9.935624940302165e-06, + "loss": 43.7391, + "step": 18070 + }, + { + "epoch": 0.07304548778467741, + "grad_norm": 821.9501342773438, + "learning_rate": 9.93539915461322e-06, + "loss": 42.4855, + "step": 18080 + }, + { + "epoch": 0.07308588905004505, + "grad_norm": 380.239013671875, + "learning_rate": 9.935172976237218e-06, + "loss": 51.201, + "step": 18090 + }, + { + "epoch": 0.07312629031541268, + "grad_norm": 550.2764282226562, + "learning_rate": 9.934946405192152e-06, + "loss": 35.0583, + "step": 18100 + }, + { + "epoch": 0.0731666915807803, + "grad_norm": 510.43701171875, + "learning_rate": 9.934719441496048e-06, + "loss": 45.7056, + "step": 18110 + }, + { + "epoch": 0.07320709284614794, + "grad_norm": 601.5700073242188, + "learning_rate": 9.934492085166965e-06, + "loss": 42.423, + "step": 18120 + }, + { + "epoch": 0.07324749411151557, + "grad_norm": 621.2982177734375, + "learning_rate": 9.934264336222992e-06, + "loss": 55.7587, + "step": 18130 + }, + { + "epoch": 0.07328789537688321, + "grad_norm": 913.1992797851562, + "learning_rate": 9.934036194682253e-06, + "loss": 48.2963, + "step": 18140 + }, + { + "epoch": 0.07332829664225084, + "grad_norm": 993.3046264648438, + "learning_rate": 9.933807660562898e-06, + "loss": 52.526, + "step": 18150 + }, + { + "epoch": 0.07336869790761846, + "grad_norm": 387.90020751953125, + "learning_rate": 9.933578733883109e-06, + "loss": 51.2602, + "step": 18160 + }, + { + "epoch": 0.0734090991729861, + "grad_norm": 622.7506103515625, + "learning_rate": 9.933349414661103e-06, + "loss": 53.1697, + "step": 18170 + }, + { + "epoch": 0.07344950043835373, + "grad_norm": 767.485595703125, + "learning_rate": 9.933119702915125e-06, + "loss": 47.9134, + "step": 18180 + }, + { + "epoch": 0.07348990170372136, + "grad_norm": 542.6588134765625, + "learning_rate": 9.932889598663452e-06, + "loss": 53.2502, + "step": 18190 + }, + { + "epoch": 0.073530302969089, + "grad_norm": 543.2275390625, + "learning_rate": 9.932659101924393e-06, + "loss": 50.1185, + "step": 18200 + }, + { + "epoch": 0.07357070423445662, + "grad_norm": 629.7283935546875, + "learning_rate": 9.932428212716287e-06, + "loss": 58.6988, + "step": 18210 + }, + { + "epoch": 0.07361110549982426, + "grad_norm": 523.290283203125, + "learning_rate": 9.932196931057505e-06, + "loss": 51.7237, + "step": 18220 + }, + { + "epoch": 0.07365150676519189, + "grad_norm": 1247.166259765625, + "learning_rate": 9.931965256966449e-06, + "loss": 49.4384, + "step": 18230 + }, + { + "epoch": 0.07369190803055951, + "grad_norm": 862.195068359375, + "learning_rate": 9.931733190461552e-06, + "loss": 59.3807, + "step": 18240 + }, + { + "epoch": 0.07373230929592715, + "grad_norm": 806.7147827148438, + "learning_rate": 9.931500731561279e-06, + "loss": 57.1338, + "step": 18250 + }, + { + "epoch": 0.07377271056129478, + "grad_norm": 417.43731689453125, + "learning_rate": 9.931267880284124e-06, + "loss": 43.645, + "step": 18260 + }, + { + "epoch": 0.0738131118266624, + "grad_norm": 608.3236694335938, + "learning_rate": 9.931034636648616e-06, + "loss": 57.7599, + "step": 18270 + }, + { + "epoch": 0.07385351309203005, + "grad_norm": 921.3759765625, + "learning_rate": 9.930801000673314e-06, + "loss": 53.7194, + "step": 18280 + }, + { + "epoch": 0.07389391435739767, + "grad_norm": 536.0777587890625, + "learning_rate": 9.930566972376803e-06, + "loss": 59.2549, + "step": 18290 + }, + { + "epoch": 0.0739343156227653, + "grad_norm": 792.5118408203125, + "learning_rate": 9.930332551777709e-06, + "loss": 60.8778, + "step": 18300 + }, + { + "epoch": 0.07397471688813294, + "grad_norm": 699.9194946289062, + "learning_rate": 9.930097738894679e-06, + "loss": 60.5394, + "step": 18310 + }, + { + "epoch": 0.07401511815350056, + "grad_norm": 360.6986389160156, + "learning_rate": 9.929862533746398e-06, + "loss": 53.1862, + "step": 18320 + }, + { + "epoch": 0.0740555194188682, + "grad_norm": 572.6876220703125, + "learning_rate": 9.92962693635158e-06, + "loss": 81.4816, + "step": 18330 + }, + { + "epoch": 0.07409592068423583, + "grad_norm": 703.1517944335938, + "learning_rate": 9.929390946728972e-06, + "loss": 56.8753, + "step": 18340 + }, + { + "epoch": 0.07413632194960346, + "grad_norm": 594.4182739257812, + "learning_rate": 9.929154564897347e-06, + "loss": 37.7829, + "step": 18350 + }, + { + "epoch": 0.0741767232149711, + "grad_norm": 536.6315307617188, + "learning_rate": 9.928917790875519e-06, + "loss": 41.1827, + "step": 18360 + }, + { + "epoch": 0.07421712448033872, + "grad_norm": 790.8310546875, + "learning_rate": 9.92868062468232e-06, + "loss": 67.6239, + "step": 18370 + }, + { + "epoch": 0.07425752574570635, + "grad_norm": 640.1522216796875, + "learning_rate": 9.928443066336624e-06, + "loss": 51.2941, + "step": 18380 + }, + { + "epoch": 0.07429792701107399, + "grad_norm": 588.3639526367188, + "learning_rate": 9.92820511585733e-06, + "loss": 63.7813, + "step": 18390 + }, + { + "epoch": 0.07433832827644161, + "grad_norm": 385.4635925292969, + "learning_rate": 9.927966773263375e-06, + "loss": 33.1393, + "step": 18400 + }, + { + "epoch": 0.07437872954180925, + "grad_norm": 1008.00439453125, + "learning_rate": 9.92772803857372e-06, + "loss": 64.6085, + "step": 18410 + }, + { + "epoch": 0.07441913080717688, + "grad_norm": 670.56787109375, + "learning_rate": 9.927488911807359e-06, + "loss": 38.1293, + "step": 18420 + }, + { + "epoch": 0.07445953207254451, + "grad_norm": 943.517333984375, + "learning_rate": 9.927249392983319e-06, + "loss": 53.9865, + "step": 18430 + }, + { + "epoch": 0.07449993333791215, + "grad_norm": 616.2203979492188, + "learning_rate": 9.927009482120658e-06, + "loss": 30.1008, + "step": 18440 + }, + { + "epoch": 0.07454033460327977, + "grad_norm": 559.6461181640625, + "learning_rate": 9.926769179238467e-06, + "loss": 47.3283, + "step": 18450 + }, + { + "epoch": 0.0745807358686474, + "grad_norm": 646.236572265625, + "learning_rate": 9.926528484355859e-06, + "loss": 77.804, + "step": 18460 + }, + { + "epoch": 0.07462113713401504, + "grad_norm": 509.07177734375, + "learning_rate": 9.926287397491992e-06, + "loss": 50.2232, + "step": 18470 + }, + { + "epoch": 0.07466153839938267, + "grad_norm": 522.69580078125, + "learning_rate": 9.926045918666045e-06, + "loss": 44.2353, + "step": 18480 + }, + { + "epoch": 0.0747019396647503, + "grad_norm": 729.4260864257812, + "learning_rate": 9.925804047897231e-06, + "loss": 69.1156, + "step": 18490 + }, + { + "epoch": 0.07474234093011793, + "grad_norm": 730.46142578125, + "learning_rate": 9.925561785204797e-06, + "loss": 46.6243, + "step": 18500 + }, + { + "epoch": 0.07478274219548556, + "grad_norm": 406.8343505859375, + "learning_rate": 9.925319130608015e-06, + "loss": 47.7084, + "step": 18510 + }, + { + "epoch": 0.0748231434608532, + "grad_norm": 547.1348876953125, + "learning_rate": 9.925076084126194e-06, + "loss": 40.9297, + "step": 18520 + }, + { + "epoch": 0.07486354472622082, + "grad_norm": 466.9611511230469, + "learning_rate": 9.924832645778674e-06, + "loss": 46.5333, + "step": 18530 + }, + { + "epoch": 0.07490394599158845, + "grad_norm": 1125.0511474609375, + "learning_rate": 9.924588815584822e-06, + "loss": 49.7393, + "step": 18540 + }, + { + "epoch": 0.07494434725695609, + "grad_norm": 287.8588562011719, + "learning_rate": 9.924344593564038e-06, + "loss": 49.0368, + "step": 18550 + }, + { + "epoch": 0.07498474852232372, + "grad_norm": 753.5078125, + "learning_rate": 9.924099979735754e-06, + "loss": 33.4634, + "step": 18560 + }, + { + "epoch": 0.07502514978769136, + "grad_norm": 688.8632202148438, + "learning_rate": 9.923854974119434e-06, + "loss": 69.1773, + "step": 18570 + }, + { + "epoch": 0.07506555105305898, + "grad_norm": 571.031005859375, + "learning_rate": 9.92360957673457e-06, + "loss": 54.5468, + "step": 18580 + }, + { + "epoch": 0.07510595231842661, + "grad_norm": 305.73028564453125, + "learning_rate": 9.923363787600688e-06, + "loss": 34.359, + "step": 18590 + }, + { + "epoch": 0.07514635358379425, + "grad_norm": 647.7716674804688, + "learning_rate": 9.923117606737347e-06, + "loss": 84.9632, + "step": 18600 + }, + { + "epoch": 0.07518675484916187, + "grad_norm": 385.5935363769531, + "learning_rate": 9.92287103416413e-06, + "loss": 53.0548, + "step": 18610 + }, + { + "epoch": 0.0752271561145295, + "grad_norm": 567.6202392578125, + "learning_rate": 9.922624069900658e-06, + "loss": 43.9075, + "step": 18620 + }, + { + "epoch": 0.07526755737989714, + "grad_norm": 758.9103393554688, + "learning_rate": 9.922376713966581e-06, + "loss": 47.5629, + "step": 18630 + }, + { + "epoch": 0.07530795864526477, + "grad_norm": 764.8362426757812, + "learning_rate": 9.92212896638158e-06, + "loss": 42.1043, + "step": 18640 + }, + { + "epoch": 0.0753483599106324, + "grad_norm": 551.5079345703125, + "learning_rate": 9.921880827165367e-06, + "loss": 38.0223, + "step": 18650 + }, + { + "epoch": 0.07538876117600003, + "grad_norm": 1067.0855712890625, + "learning_rate": 9.921632296337683e-06, + "loss": 73.4578, + "step": 18660 + }, + { + "epoch": 0.07542916244136766, + "grad_norm": 715.7173461914062, + "learning_rate": 9.921383373918305e-06, + "loss": 60.8801, + "step": 18670 + }, + { + "epoch": 0.0754695637067353, + "grad_norm": 515.8720092773438, + "learning_rate": 9.92113405992704e-06, + "loss": 43.01, + "step": 18680 + }, + { + "epoch": 0.07550996497210292, + "grad_norm": 863.5546264648438, + "learning_rate": 9.92088435438372e-06, + "loss": 42.6416, + "step": 18690 + }, + { + "epoch": 0.07555036623747055, + "grad_norm": 549.4111938476562, + "learning_rate": 9.920634257308217e-06, + "loss": 59.0651, + "step": 18700 + }, + { + "epoch": 0.07559076750283819, + "grad_norm": 493.9446105957031, + "learning_rate": 9.920383768720429e-06, + "loss": 58.4141, + "step": 18710 + }, + { + "epoch": 0.07563116876820582, + "grad_norm": 807.5221557617188, + "learning_rate": 9.920132888640286e-06, + "loss": 72.1741, + "step": 18720 + }, + { + "epoch": 0.07567157003357346, + "grad_norm": 319.9321594238281, + "learning_rate": 9.91988161708775e-06, + "loss": 58.0552, + "step": 18730 + }, + { + "epoch": 0.07571197129894108, + "grad_norm": 585.4686279296875, + "learning_rate": 9.919629954082813e-06, + "loss": 47.1483, + "step": 18740 + }, + { + "epoch": 0.07575237256430871, + "grad_norm": 393.1877136230469, + "learning_rate": 9.919377899645497e-06, + "loss": 38.655, + "step": 18750 + }, + { + "epoch": 0.07579277382967635, + "grad_norm": 889.9515380859375, + "learning_rate": 9.91912545379586e-06, + "loss": 42.3758, + "step": 18760 + }, + { + "epoch": 0.07583317509504398, + "grad_norm": 468.6612548828125, + "learning_rate": 9.918872616553986e-06, + "loss": 45.4651, + "step": 18770 + }, + { + "epoch": 0.0758735763604116, + "grad_norm": 976.6599731445312, + "learning_rate": 9.918619387939991e-06, + "loss": 40.7768, + "step": 18780 + }, + { + "epoch": 0.07591397762577924, + "grad_norm": 862.0733032226562, + "learning_rate": 9.918365767974025e-06, + "loss": 34.8569, + "step": 18790 + }, + { + "epoch": 0.07595437889114687, + "grad_norm": 2882.824951171875, + "learning_rate": 9.91811175667627e-06, + "loss": 62.0223, + "step": 18800 + }, + { + "epoch": 0.07599478015651451, + "grad_norm": 709.4342651367188, + "learning_rate": 9.91785735406693e-06, + "loss": 58.1651, + "step": 18810 + }, + { + "epoch": 0.07603518142188213, + "grad_norm": 626.4570922851562, + "learning_rate": 9.917602560166253e-06, + "loss": 55.8888, + "step": 18820 + }, + { + "epoch": 0.07607558268724976, + "grad_norm": 685.3681030273438, + "learning_rate": 9.917347374994507e-06, + "loss": 54.2592, + "step": 18830 + }, + { + "epoch": 0.0761159839526174, + "grad_norm": 559.7869873046875, + "learning_rate": 9.917091798571998e-06, + "loss": 40.9733, + "step": 18840 + }, + { + "epoch": 0.07615638521798503, + "grad_norm": 249.24891662597656, + "learning_rate": 9.916835830919062e-06, + "loss": 51.8981, + "step": 18850 + }, + { + "epoch": 0.07619678648335265, + "grad_norm": 913.8907470703125, + "learning_rate": 9.916579472056064e-06, + "loss": 61.8862, + "step": 18860 + }, + { + "epoch": 0.07623718774872029, + "grad_norm": 861.2070922851562, + "learning_rate": 9.916322722003402e-06, + "loss": 52.7893, + "step": 18870 + }, + { + "epoch": 0.07627758901408792, + "grad_norm": 541.4371337890625, + "learning_rate": 9.916065580781504e-06, + "loss": 44.1705, + "step": 18880 + }, + { + "epoch": 0.07631799027945556, + "grad_norm": 567.7680053710938, + "learning_rate": 9.91580804841083e-06, + "loss": 49.3585, + "step": 18890 + }, + { + "epoch": 0.07635839154482318, + "grad_norm": 402.49945068359375, + "learning_rate": 9.915550124911866e-06, + "loss": 56.762, + "step": 18900 + }, + { + "epoch": 0.07639879281019081, + "grad_norm": 649.5604248046875, + "learning_rate": 9.915291810305141e-06, + "loss": 40.8407, + "step": 18910 + }, + { + "epoch": 0.07643919407555845, + "grad_norm": 680.6880493164062, + "learning_rate": 9.915033104611204e-06, + "loss": 80.9502, + "step": 18920 + }, + { + "epoch": 0.07647959534092608, + "grad_norm": 558.8545532226562, + "learning_rate": 9.914774007850641e-06, + "loss": 44.2588, + "step": 18930 + }, + { + "epoch": 0.0765199966062937, + "grad_norm": 451.441162109375, + "learning_rate": 9.914514520044065e-06, + "loss": 36.8717, + "step": 18940 + }, + { + "epoch": 0.07656039787166134, + "grad_norm": 356.0220947265625, + "learning_rate": 9.914254641212124e-06, + "loss": 52.1456, + "step": 18950 + }, + { + "epoch": 0.07660079913702897, + "grad_norm": 519.8460693359375, + "learning_rate": 9.913994371375494e-06, + "loss": 66.9399, + "step": 18960 + }, + { + "epoch": 0.07664120040239661, + "grad_norm": 247.09814453125, + "learning_rate": 9.913733710554886e-06, + "loss": 42.8186, + "step": 18970 + }, + { + "epoch": 0.07668160166776423, + "grad_norm": 1293.5992431640625, + "learning_rate": 9.913472658771034e-06, + "loss": 55.2946, + "step": 18980 + }, + { + "epoch": 0.07672200293313186, + "grad_norm": 826.9439086914062, + "learning_rate": 9.913211216044715e-06, + "loss": 64.7492, + "step": 18990 + }, + { + "epoch": 0.0767624041984995, + "grad_norm": 1064.3778076171875, + "learning_rate": 9.912949382396728e-06, + "loss": 80.7947, + "step": 19000 + }, + { + "epoch": 0.07680280546386713, + "grad_norm": 521.4830322265625, + "learning_rate": 9.912687157847905e-06, + "loss": 39.9748, + "step": 19010 + }, + { + "epoch": 0.07684320672923475, + "grad_norm": 837.2169799804688, + "learning_rate": 9.91242454241911e-06, + "loss": 52.3863, + "step": 19020 + }, + { + "epoch": 0.0768836079946024, + "grad_norm": 650.2366333007812, + "learning_rate": 9.912161536131242e-06, + "loss": 47.731, + "step": 19030 + }, + { + "epoch": 0.07692400925997002, + "grad_norm": 887.7250366210938, + "learning_rate": 9.911898139005222e-06, + "loss": 62.2921, + "step": 19040 + }, + { + "epoch": 0.07696441052533766, + "grad_norm": 911.5108032226562, + "learning_rate": 9.91163435106201e-06, + "loss": 71.3875, + "step": 19050 + }, + { + "epoch": 0.07700481179070529, + "grad_norm": 821.865478515625, + "learning_rate": 9.911370172322595e-06, + "loss": 70.9358, + "step": 19060 + }, + { + "epoch": 0.07704521305607291, + "grad_norm": 741.1298217773438, + "learning_rate": 9.911105602807996e-06, + "loss": 48.0397, + "step": 19070 + }, + { + "epoch": 0.07708561432144055, + "grad_norm": 423.426025390625, + "learning_rate": 9.910840642539261e-06, + "loss": 49.4986, + "step": 19080 + }, + { + "epoch": 0.07712601558680818, + "grad_norm": 509.99359130859375, + "learning_rate": 9.910575291537476e-06, + "loss": 60.3422, + "step": 19090 + }, + { + "epoch": 0.0771664168521758, + "grad_norm": 507.8599548339844, + "learning_rate": 9.91030954982375e-06, + "loss": 47.7275, + "step": 19100 + }, + { + "epoch": 0.07720681811754344, + "grad_norm": 711.5784912109375, + "learning_rate": 9.910043417419228e-06, + "loss": 52.5352, + "step": 19110 + }, + { + "epoch": 0.07724721938291107, + "grad_norm": 597.524169921875, + "learning_rate": 9.909776894345086e-06, + "loss": 43.659, + "step": 19120 + }, + { + "epoch": 0.07728762064827871, + "grad_norm": 603.5820922851562, + "learning_rate": 9.909509980622532e-06, + "loss": 60.1314, + "step": 19130 + }, + { + "epoch": 0.07732802191364634, + "grad_norm": 570.4426879882812, + "learning_rate": 9.909242676272797e-06, + "loss": 50.9336, + "step": 19140 + }, + { + "epoch": 0.07736842317901396, + "grad_norm": 450.529296875, + "learning_rate": 9.908974981317155e-06, + "loss": 32.5167, + "step": 19150 + }, + { + "epoch": 0.0774088244443816, + "grad_norm": 423.9222106933594, + "learning_rate": 9.9087068957769e-06, + "loss": 33.1907, + "step": 19160 + }, + { + "epoch": 0.07744922570974923, + "grad_norm": 509.5530700683594, + "learning_rate": 9.908438419673367e-06, + "loss": 25.4662, + "step": 19170 + }, + { + "epoch": 0.07748962697511685, + "grad_norm": 1166.0137939453125, + "learning_rate": 9.908169553027916e-06, + "loss": 57.6205, + "step": 19180 + }, + { + "epoch": 0.0775300282404845, + "grad_norm": 660.1497802734375, + "learning_rate": 9.90790029586194e-06, + "loss": 85.3185, + "step": 19190 + }, + { + "epoch": 0.07757042950585212, + "grad_norm": 768.8164672851562, + "learning_rate": 9.907630648196857e-06, + "loss": 43.815, + "step": 19200 + }, + { + "epoch": 0.07761083077121976, + "grad_norm": 623.1567993164062, + "learning_rate": 9.907360610054132e-06, + "loss": 65.0855, + "step": 19210 + }, + { + "epoch": 0.07765123203658739, + "grad_norm": 430.18414306640625, + "learning_rate": 9.907090181455241e-06, + "loss": 65.7018, + "step": 19220 + }, + { + "epoch": 0.07769163330195501, + "grad_norm": 758.6072387695312, + "learning_rate": 9.906819362421707e-06, + "loss": 44.9825, + "step": 19230 + }, + { + "epoch": 0.07773203456732265, + "grad_norm": 684.4268188476562, + "learning_rate": 9.906548152975076e-06, + "loss": 65.7489, + "step": 19240 + }, + { + "epoch": 0.07777243583269028, + "grad_norm": 976.61767578125, + "learning_rate": 9.906276553136924e-06, + "loss": 55.8617, + "step": 19250 + }, + { + "epoch": 0.0778128370980579, + "grad_norm": 886.5833129882812, + "learning_rate": 9.906004562928865e-06, + "loss": 70.1672, + "step": 19260 + }, + { + "epoch": 0.07785323836342554, + "grad_norm": 1017.2815551757812, + "learning_rate": 9.905732182372538e-06, + "loss": 48.9207, + "step": 19270 + }, + { + "epoch": 0.07789363962879317, + "grad_norm": 805.864013671875, + "learning_rate": 9.905459411489617e-06, + "loss": 43.7626, + "step": 19280 + }, + { + "epoch": 0.07793404089416081, + "grad_norm": 319.4452209472656, + "learning_rate": 9.905186250301802e-06, + "loss": 27.7071, + "step": 19290 + }, + { + "epoch": 0.07797444215952844, + "grad_norm": 660.695068359375, + "learning_rate": 9.904912698830828e-06, + "loss": 55.5006, + "step": 19300 + }, + { + "epoch": 0.07801484342489606, + "grad_norm": 516.4996948242188, + "learning_rate": 9.904638757098464e-06, + "loss": 43.2097, + "step": 19310 + }, + { + "epoch": 0.0780552446902637, + "grad_norm": 2025.7215576171875, + "learning_rate": 9.9043644251265e-06, + "loss": 77.0012, + "step": 19320 + }, + { + "epoch": 0.07809564595563133, + "grad_norm": 510.74237060546875, + "learning_rate": 9.90408970293677e-06, + "loss": 50.4969, + "step": 19330 + }, + { + "epoch": 0.07813604722099896, + "grad_norm": 1480.844970703125, + "learning_rate": 9.903814590551127e-06, + "loss": 72.9755, + "step": 19340 + }, + { + "epoch": 0.0781764484863666, + "grad_norm": 656.4638671875, + "learning_rate": 9.903539087991462e-06, + "loss": 48.8749, + "step": 19350 + }, + { + "epoch": 0.07821684975173422, + "grad_norm": 558.57177734375, + "learning_rate": 9.903263195279698e-06, + "loss": 64.6075, + "step": 19360 + }, + { + "epoch": 0.07825725101710186, + "grad_norm": 626.6182861328125, + "learning_rate": 9.902986912437784e-06, + "loss": 44.9975, + "step": 19370 + }, + { + "epoch": 0.07829765228246949, + "grad_norm": 2065.308349609375, + "learning_rate": 9.902710239487702e-06, + "loss": 66.3444, + "step": 19380 + }, + { + "epoch": 0.07833805354783711, + "grad_norm": 1237.13134765625, + "learning_rate": 9.902433176451466e-06, + "loss": 68.9546, + "step": 19390 + }, + { + "epoch": 0.07837845481320475, + "grad_norm": 989.8242797851562, + "learning_rate": 9.902155723351124e-06, + "loss": 66.7895, + "step": 19400 + }, + { + "epoch": 0.07841885607857238, + "grad_norm": 771.996826171875, + "learning_rate": 9.901877880208747e-06, + "loss": 60.2761, + "step": 19410 + }, + { + "epoch": 0.07845925734394, + "grad_norm": 981.9373168945312, + "learning_rate": 9.901599647046443e-06, + "loss": 69.1419, + "step": 19420 + }, + { + "epoch": 0.07849965860930765, + "grad_norm": 709.3343505859375, + "learning_rate": 9.901321023886351e-06, + "loss": 65.5082, + "step": 19430 + }, + { + "epoch": 0.07854005987467527, + "grad_norm": 411.25970458984375, + "learning_rate": 9.901042010750641e-06, + "loss": 49.9508, + "step": 19440 + }, + { + "epoch": 0.07858046114004291, + "grad_norm": 709.1156616210938, + "learning_rate": 9.900762607661509e-06, + "loss": 50.5245, + "step": 19450 + }, + { + "epoch": 0.07862086240541054, + "grad_norm": 137.40362548828125, + "learning_rate": 9.900482814641188e-06, + "loss": 47.2816, + "step": 19460 + }, + { + "epoch": 0.07866126367077816, + "grad_norm": 792.5342407226562, + "learning_rate": 9.90020263171194e-06, + "loss": 65.7329, + "step": 19470 + }, + { + "epoch": 0.0787016649361458, + "grad_norm": 1262.7125244140625, + "learning_rate": 9.899922058896058e-06, + "loss": 34.923, + "step": 19480 + }, + { + "epoch": 0.07874206620151343, + "grad_norm": 1053.7841796875, + "learning_rate": 9.899641096215865e-06, + "loss": 67.22, + "step": 19490 + }, + { + "epoch": 0.07878246746688106, + "grad_norm": 619.6085205078125, + "learning_rate": 9.899359743693715e-06, + "loss": 43.8119, + "step": 19500 + }, + { + "epoch": 0.0788228687322487, + "grad_norm": 406.4720458984375, + "learning_rate": 9.899078001351996e-06, + "loss": 38.7725, + "step": 19510 + }, + { + "epoch": 0.07886326999761632, + "grad_norm": 821.3504638671875, + "learning_rate": 9.898795869213125e-06, + "loss": 50.2778, + "step": 19520 + }, + { + "epoch": 0.07890367126298396, + "grad_norm": 948.050048828125, + "learning_rate": 9.898513347299549e-06, + "loss": 55.1774, + "step": 19530 + }, + { + "epoch": 0.07894407252835159, + "grad_norm": 1106.9036865234375, + "learning_rate": 9.898230435633747e-06, + "loss": 51.0458, + "step": 19540 + }, + { + "epoch": 0.07898447379371921, + "grad_norm": 676.792236328125, + "learning_rate": 9.897947134238228e-06, + "loss": 53.7979, + "step": 19550 + }, + { + "epoch": 0.07902487505908685, + "grad_norm": 544.3801879882812, + "learning_rate": 9.897663443135534e-06, + "loss": 55.7435, + "step": 19560 + }, + { + "epoch": 0.07906527632445448, + "grad_norm": 374.0148010253906, + "learning_rate": 9.897379362348239e-06, + "loss": 49.8657, + "step": 19570 + }, + { + "epoch": 0.07910567758982211, + "grad_norm": 647.5410766601562, + "learning_rate": 9.897094891898942e-06, + "loss": 39.7925, + "step": 19580 + }, + { + "epoch": 0.07914607885518975, + "grad_norm": 770.840576171875, + "learning_rate": 9.89681003181028e-06, + "loss": 33.4735, + "step": 19590 + }, + { + "epoch": 0.07918648012055737, + "grad_norm": 732.4638671875, + "learning_rate": 9.896524782104917e-06, + "loss": 64.5618, + "step": 19600 + }, + { + "epoch": 0.07922688138592501, + "grad_norm": 487.14825439453125, + "learning_rate": 9.89623914280555e-06, + "loss": 45.1367, + "step": 19610 + }, + { + "epoch": 0.07926728265129264, + "grad_norm": 540.7517700195312, + "learning_rate": 9.895953113934904e-06, + "loss": 48.6443, + "step": 19620 + }, + { + "epoch": 0.07930768391666027, + "grad_norm": 639.5513916015625, + "learning_rate": 9.895666695515739e-06, + "loss": 55.7388, + "step": 19630 + }, + { + "epoch": 0.0793480851820279, + "grad_norm": 936.28857421875, + "learning_rate": 9.895379887570842e-06, + "loss": 55.8839, + "step": 19640 + }, + { + "epoch": 0.07938848644739553, + "grad_norm": 513.8567504882812, + "learning_rate": 9.895092690123036e-06, + "loss": 45.4252, + "step": 19650 + }, + { + "epoch": 0.07942888771276316, + "grad_norm": 274.3640441894531, + "learning_rate": 9.894805103195168e-06, + "loss": 47.4719, + "step": 19660 + }, + { + "epoch": 0.0794692889781308, + "grad_norm": 879.132568359375, + "learning_rate": 9.894517126810122e-06, + "loss": 57.8774, + "step": 19670 + }, + { + "epoch": 0.07950969024349842, + "grad_norm": 460.71124267578125, + "learning_rate": 9.894228760990811e-06, + "loss": 44.0254, + "step": 19680 + }, + { + "epoch": 0.07955009150886606, + "grad_norm": 655.6797485351562, + "learning_rate": 9.893940005760181e-06, + "loss": 67.7663, + "step": 19690 + }, + { + "epoch": 0.07959049277423369, + "grad_norm": 320.0049133300781, + "learning_rate": 9.893650861141204e-06, + "loss": 39.7949, + "step": 19700 + }, + { + "epoch": 0.07963089403960132, + "grad_norm": 940.2349853515625, + "learning_rate": 9.893361327156887e-06, + "loss": 50.6715, + "step": 19710 + }, + { + "epoch": 0.07967129530496896, + "grad_norm": 772.8973388671875, + "learning_rate": 9.893071403830265e-06, + "loss": 45.953, + "step": 19720 + }, + { + "epoch": 0.07971169657033658, + "grad_norm": 950.0260009765625, + "learning_rate": 9.892781091184409e-06, + "loss": 56.717, + "step": 19730 + }, + { + "epoch": 0.07975209783570421, + "grad_norm": 1299.931396484375, + "learning_rate": 9.892490389242417e-06, + "loss": 50.0108, + "step": 19740 + }, + { + "epoch": 0.07979249910107185, + "grad_norm": 764.660888671875, + "learning_rate": 9.892199298027416e-06, + "loss": 41.7661, + "step": 19750 + }, + { + "epoch": 0.07983290036643947, + "grad_norm": 2445.427490234375, + "learning_rate": 9.891907817562572e-06, + "loss": 59.8738, + "step": 19760 + }, + { + "epoch": 0.07987330163180711, + "grad_norm": 1094.4820556640625, + "learning_rate": 9.891615947871072e-06, + "loss": 80.5574, + "step": 19770 + }, + { + "epoch": 0.07991370289717474, + "grad_norm": 910.0176391601562, + "learning_rate": 9.89132368897614e-06, + "loss": 42.2061, + "step": 19780 + }, + { + "epoch": 0.07995410416254237, + "grad_norm": 740.8135986328125, + "learning_rate": 9.891031040901031e-06, + "loss": 53.9417, + "step": 19790 + }, + { + "epoch": 0.07999450542791, + "grad_norm": 852.3710327148438, + "learning_rate": 9.890738003669029e-06, + "loss": 45.9227, + "step": 19800 + }, + { + "epoch": 0.08003490669327763, + "grad_norm": 653.4926147460938, + "learning_rate": 9.890444577303448e-06, + "loss": 50.5123, + "step": 19810 + }, + { + "epoch": 0.08007530795864526, + "grad_norm": 474.41845703125, + "learning_rate": 9.890150761827639e-06, + "loss": 51.1544, + "step": 19820 + }, + { + "epoch": 0.0801157092240129, + "grad_norm": 1015.6337890625, + "learning_rate": 9.889856557264975e-06, + "loss": 79.7073, + "step": 19830 + }, + { + "epoch": 0.08015611048938052, + "grad_norm": 645.1337890625, + "learning_rate": 9.889561963638866e-06, + "loss": 69.1597, + "step": 19840 + }, + { + "epoch": 0.08019651175474816, + "grad_norm": 784.7030029296875, + "learning_rate": 9.889266980972752e-06, + "loss": 73.7766, + "step": 19850 + }, + { + "epoch": 0.08023691302011579, + "grad_norm": 779.0071411132812, + "learning_rate": 9.888971609290103e-06, + "loss": 34.1661, + "step": 19860 + }, + { + "epoch": 0.08027731428548342, + "grad_norm": 639.2666015625, + "learning_rate": 9.88867584861442e-06, + "loss": 40.5408, + "step": 19870 + }, + { + "epoch": 0.08031771555085106, + "grad_norm": 564.05224609375, + "learning_rate": 9.888379698969236e-06, + "loss": 70.8067, + "step": 19880 + }, + { + "epoch": 0.08035811681621868, + "grad_norm": 371.7476806640625, + "learning_rate": 9.888083160378114e-06, + "loss": 59.5962, + "step": 19890 + }, + { + "epoch": 0.08039851808158631, + "grad_norm": 625.6640014648438, + "learning_rate": 9.887786232864648e-06, + "loss": 40.9848, + "step": 19900 + }, + { + "epoch": 0.08043891934695395, + "grad_norm": 1052.5758056640625, + "learning_rate": 9.887488916452463e-06, + "loss": 77.8972, + "step": 19910 + }, + { + "epoch": 0.08047932061232158, + "grad_norm": 565.8422241210938, + "learning_rate": 9.887191211165217e-06, + "loss": 44.9603, + "step": 19920 + }, + { + "epoch": 0.08051972187768922, + "grad_norm": 747.18603515625, + "learning_rate": 9.886893117026593e-06, + "loss": 38.7456, + "step": 19930 + }, + { + "epoch": 0.08056012314305684, + "grad_norm": 567.5298461914062, + "learning_rate": 9.886594634060314e-06, + "loss": 44.1234, + "step": 19940 + }, + { + "epoch": 0.08060052440842447, + "grad_norm": 441.0209655761719, + "learning_rate": 9.886295762290125e-06, + "loss": 39.1584, + "step": 19950 + }, + { + "epoch": 0.08064092567379211, + "grad_norm": 574.0647583007812, + "learning_rate": 9.885996501739808e-06, + "loss": 75.4105, + "step": 19960 + }, + { + "epoch": 0.08068132693915973, + "grad_norm": 1946.658447265625, + "learning_rate": 9.885696852433174e-06, + "loss": 96.2204, + "step": 19970 + }, + { + "epoch": 0.08072172820452736, + "grad_norm": 719.7885131835938, + "learning_rate": 9.885396814394062e-06, + "loss": 50.1303, + "step": 19980 + }, + { + "epoch": 0.080762129469895, + "grad_norm": 526.1619262695312, + "learning_rate": 9.885096387646346e-06, + "loss": 42.3297, + "step": 19990 + }, + { + "epoch": 0.08080253073526263, + "grad_norm": 761.8712768554688, + "learning_rate": 9.88479557221393e-06, + "loss": 60.0437, + "step": 20000 + }, + { + "epoch": 0.08084293200063027, + "grad_norm": 653.3501586914062, + "learning_rate": 9.88449436812075e-06, + "loss": 55.2075, + "step": 20010 + }, + { + "epoch": 0.08088333326599789, + "grad_norm": 293.372314453125, + "learning_rate": 9.88419277539077e-06, + "loss": 46.4932, + "step": 20020 + }, + { + "epoch": 0.08092373453136552, + "grad_norm": 561.39208984375, + "learning_rate": 9.883890794047985e-06, + "loss": 64.5671, + "step": 20030 + }, + { + "epoch": 0.08096413579673316, + "grad_norm": 581.0809936523438, + "learning_rate": 9.883588424116424e-06, + "loss": 66.5158, + "step": 20040 + }, + { + "epoch": 0.08100453706210078, + "grad_norm": 554.6759033203125, + "learning_rate": 9.883285665620145e-06, + "loss": 59.3087, + "step": 20050 + }, + { + "epoch": 0.08104493832746841, + "grad_norm": 387.6012268066406, + "learning_rate": 9.882982518583238e-06, + "loss": 56.412, + "step": 20060 + }, + { + "epoch": 0.08108533959283605, + "grad_norm": 1148.7908935546875, + "learning_rate": 9.882678983029819e-06, + "loss": 82.5292, + "step": 20070 + }, + { + "epoch": 0.08112574085820368, + "grad_norm": 892.7615356445312, + "learning_rate": 9.882375058984044e-06, + "loss": 65.469, + "step": 20080 + }, + { + "epoch": 0.08116614212357132, + "grad_norm": 790.3026123046875, + "learning_rate": 9.882070746470092e-06, + "loss": 51.3404, + "step": 20090 + }, + { + "epoch": 0.08120654338893894, + "grad_norm": 743.3460083007812, + "learning_rate": 9.881766045512176e-06, + "loss": 50.7929, + "step": 20100 + }, + { + "epoch": 0.08124694465430657, + "grad_norm": 916.11474609375, + "learning_rate": 9.88146095613454e-06, + "loss": 52.6558, + "step": 20110 + }, + { + "epoch": 0.08128734591967421, + "grad_norm": 452.7943420410156, + "learning_rate": 9.881155478361459e-06, + "loss": 37.1106, + "step": 20120 + }, + { + "epoch": 0.08132774718504183, + "grad_norm": 318.2351379394531, + "learning_rate": 9.880849612217238e-06, + "loss": 65.855, + "step": 20130 + }, + { + "epoch": 0.08136814845040946, + "grad_norm": 1544.2833251953125, + "learning_rate": 9.880543357726214e-06, + "loss": 66.3424, + "step": 20140 + }, + { + "epoch": 0.0814085497157771, + "grad_norm": 201.214111328125, + "learning_rate": 9.880236714912754e-06, + "loss": 64.8169, + "step": 20150 + }, + { + "epoch": 0.08144895098114473, + "grad_norm": 290.1680908203125, + "learning_rate": 9.879929683801254e-06, + "loss": 46.0704, + "step": 20160 + }, + { + "epoch": 0.08148935224651237, + "grad_norm": 661.8184814453125, + "learning_rate": 9.879622264416147e-06, + "loss": 46.8915, + "step": 20170 + }, + { + "epoch": 0.08152975351188, + "grad_norm": 481.2290954589844, + "learning_rate": 9.87931445678189e-06, + "loss": 50.5384, + "step": 20180 + }, + { + "epoch": 0.08157015477724762, + "grad_norm": 854.9109497070312, + "learning_rate": 9.879006260922975e-06, + "loss": 47.7714, + "step": 20190 + }, + { + "epoch": 0.08161055604261526, + "grad_norm": 550.2037353515625, + "learning_rate": 9.878697676863922e-06, + "loss": 56.8208, + "step": 20200 + }, + { + "epoch": 0.08165095730798289, + "grad_norm": 560.0890502929688, + "learning_rate": 9.878388704629286e-06, + "loss": 43.5966, + "step": 20210 + }, + { + "epoch": 0.08169135857335051, + "grad_norm": 470.9778137207031, + "learning_rate": 9.87807934424365e-06, + "loss": 72.7084, + "step": 20220 + }, + { + "epoch": 0.08173175983871815, + "grad_norm": 1365.6583251953125, + "learning_rate": 9.877769595731629e-06, + "loss": 73.7771, + "step": 20230 + }, + { + "epoch": 0.08177216110408578, + "grad_norm": 0.0, + "learning_rate": 9.877459459117864e-06, + "loss": 60.936, + "step": 20240 + }, + { + "epoch": 0.08181256236945342, + "grad_norm": 881.4310302734375, + "learning_rate": 9.877148934427037e-06, + "loss": 44.2945, + "step": 20250 + }, + { + "epoch": 0.08185296363482104, + "grad_norm": 779.4917602539062, + "learning_rate": 9.87683802168385e-06, + "loss": 57.1076, + "step": 20260 + }, + { + "epoch": 0.08189336490018867, + "grad_norm": 602.3546142578125, + "learning_rate": 9.876526720913045e-06, + "loss": 51.2614, + "step": 20270 + }, + { + "epoch": 0.08193376616555631, + "grad_norm": 657.62890625, + "learning_rate": 9.87621503213939e-06, + "loss": 65.6103, + "step": 20280 + }, + { + "epoch": 0.08197416743092394, + "grad_norm": 487.8164978027344, + "learning_rate": 9.875902955387682e-06, + "loss": 47.4494, + "step": 20290 + }, + { + "epoch": 0.08201456869629156, + "grad_norm": 679.0338745117188, + "learning_rate": 9.875590490682754e-06, + "loss": 41.2857, + "step": 20300 + }, + { + "epoch": 0.0820549699616592, + "grad_norm": 840.4469604492188, + "learning_rate": 9.875277638049466e-06, + "loss": 71.3646, + "step": 20310 + }, + { + "epoch": 0.08209537122702683, + "grad_norm": 874.2022705078125, + "learning_rate": 9.87496439751271e-06, + "loss": 40.4883, + "step": 20320 + }, + { + "epoch": 0.08213577249239447, + "grad_norm": 795.24853515625, + "learning_rate": 9.87465076909741e-06, + "loss": 60.0892, + "step": 20330 + }, + { + "epoch": 0.0821761737577621, + "grad_norm": 517.8695678710938, + "learning_rate": 9.874336752828523e-06, + "loss": 40.2634, + "step": 20340 + }, + { + "epoch": 0.08221657502312972, + "grad_norm": 2106.507080078125, + "learning_rate": 9.87402234873103e-06, + "loss": 57.6708, + "step": 20350 + }, + { + "epoch": 0.08225697628849736, + "grad_norm": 570.333251953125, + "learning_rate": 9.873707556829945e-06, + "loss": 45.0013, + "step": 20360 + }, + { + "epoch": 0.08229737755386499, + "grad_norm": 633.8534545898438, + "learning_rate": 9.873392377150318e-06, + "loss": 73.9094, + "step": 20370 + }, + { + "epoch": 0.08233777881923261, + "grad_norm": 831.1759643554688, + "learning_rate": 9.873076809717226e-06, + "loss": 43.2788, + "step": 20380 + }, + { + "epoch": 0.08237818008460025, + "grad_norm": 975.9166870117188, + "learning_rate": 9.872760854555776e-06, + "loss": 57.6913, + "step": 20390 + }, + { + "epoch": 0.08241858134996788, + "grad_norm": 687.0908203125, + "learning_rate": 9.872444511691108e-06, + "loss": 54.1361, + "step": 20400 + }, + { + "epoch": 0.08245898261533552, + "grad_norm": 812.077880859375, + "learning_rate": 9.872127781148392e-06, + "loss": 46.2579, + "step": 20410 + }, + { + "epoch": 0.08249938388070314, + "grad_norm": 1039.45947265625, + "learning_rate": 9.871810662952828e-06, + "loss": 60.9721, + "step": 20420 + }, + { + "epoch": 0.08253978514607077, + "grad_norm": 350.81365966796875, + "learning_rate": 9.87149315712965e-06, + "loss": 54.0255, + "step": 20430 + }, + { + "epoch": 0.08258018641143841, + "grad_norm": 1040.960205078125, + "learning_rate": 9.871175263704116e-06, + "loss": 47.519, + "step": 20440 + }, + { + "epoch": 0.08262058767680604, + "grad_norm": 903.642578125, + "learning_rate": 9.870856982701522e-06, + "loss": 42.4366, + "step": 20450 + }, + { + "epoch": 0.08266098894217366, + "grad_norm": 268.2828674316406, + "learning_rate": 9.870538314147194e-06, + "loss": 65.6971, + "step": 20460 + }, + { + "epoch": 0.0827013902075413, + "grad_norm": 728.4773559570312, + "learning_rate": 9.870219258066485e-06, + "loss": 49.7082, + "step": 20470 + }, + { + "epoch": 0.08274179147290893, + "grad_norm": 579.0664672851562, + "learning_rate": 9.86989981448478e-06, + "loss": 44.6814, + "step": 20480 + }, + { + "epoch": 0.08278219273827657, + "grad_norm": 944.3462524414062, + "learning_rate": 9.869579983427497e-06, + "loss": 68.3373, + "step": 20490 + }, + { + "epoch": 0.0828225940036442, + "grad_norm": 1739.9251708984375, + "learning_rate": 9.869259764920081e-06, + "loss": 61.5346, + "step": 20500 + }, + { + "epoch": 0.08286299526901182, + "grad_norm": 323.2250061035156, + "learning_rate": 9.868939158988016e-06, + "loss": 66.8795, + "step": 20510 + }, + { + "epoch": 0.08290339653437946, + "grad_norm": 610.7361450195312, + "learning_rate": 9.868618165656805e-06, + "loss": 44.2779, + "step": 20520 + }, + { + "epoch": 0.08294379779974709, + "grad_norm": 766.6193237304688, + "learning_rate": 9.868296784951992e-06, + "loss": 68.555, + "step": 20530 + }, + { + "epoch": 0.08298419906511471, + "grad_norm": 251.73240661621094, + "learning_rate": 9.867975016899145e-06, + "loss": 51.9702, + "step": 20540 + }, + { + "epoch": 0.08302460033048235, + "grad_norm": 753.54345703125, + "learning_rate": 9.867652861523866e-06, + "loss": 48.5123, + "step": 20550 + }, + { + "epoch": 0.08306500159584998, + "grad_norm": 655.2874145507812, + "learning_rate": 9.86733031885179e-06, + "loss": 56.5569, + "step": 20560 + }, + { + "epoch": 0.08310540286121762, + "grad_norm": 675.6414184570312, + "learning_rate": 9.867007388908579e-06, + "loss": 29.6619, + "step": 20570 + }, + { + "epoch": 0.08314580412658525, + "grad_norm": 1051.6103515625, + "learning_rate": 9.866684071719926e-06, + "loss": 71.0548, + "step": 20580 + }, + { + "epoch": 0.08318620539195287, + "grad_norm": 482.6436767578125, + "learning_rate": 9.866360367311557e-06, + "loss": 53.7929, + "step": 20590 + }, + { + "epoch": 0.08322660665732051, + "grad_norm": 714.1856689453125, + "learning_rate": 9.866036275709226e-06, + "loss": 81.5517, + "step": 20600 + }, + { + "epoch": 0.08326700792268814, + "grad_norm": 697.500244140625, + "learning_rate": 9.86571179693872e-06, + "loss": 92.779, + "step": 20610 + }, + { + "epoch": 0.08330740918805576, + "grad_norm": 888.16455078125, + "learning_rate": 9.865386931025858e-06, + "loss": 70.6248, + "step": 20620 + }, + { + "epoch": 0.0833478104534234, + "grad_norm": 1028.4215087890625, + "learning_rate": 9.865061677996487e-06, + "loss": 59.6395, + "step": 20630 + }, + { + "epoch": 0.08338821171879103, + "grad_norm": 930.8424072265625, + "learning_rate": 9.864736037876487e-06, + "loss": 47.4096, + "step": 20640 + }, + { + "epoch": 0.08342861298415867, + "grad_norm": 644.8805541992188, + "learning_rate": 9.864410010691766e-06, + "loss": 41.8741, + "step": 20650 + }, + { + "epoch": 0.0834690142495263, + "grad_norm": 1114.886474609375, + "learning_rate": 9.864083596468263e-06, + "loss": 70.8659, + "step": 20660 + }, + { + "epoch": 0.08350941551489392, + "grad_norm": 840.799072265625, + "learning_rate": 9.863756795231953e-06, + "loss": 73.1389, + "step": 20670 + }, + { + "epoch": 0.08354981678026156, + "grad_norm": 654.5409545898438, + "learning_rate": 9.863429607008837e-06, + "loss": 47.1239, + "step": 20680 + }, + { + "epoch": 0.08359021804562919, + "grad_norm": 833.7312622070312, + "learning_rate": 9.863102031824946e-06, + "loss": 60.6132, + "step": 20690 + }, + { + "epoch": 0.08363061931099681, + "grad_norm": 735.7879028320312, + "learning_rate": 9.862774069706346e-06, + "loss": 62.9169, + "step": 20700 + }, + { + "epoch": 0.08367102057636445, + "grad_norm": 428.26934814453125, + "learning_rate": 9.86244572067913e-06, + "loss": 50.549, + "step": 20710 + }, + { + "epoch": 0.08371142184173208, + "grad_norm": 561.2461547851562, + "learning_rate": 9.862116984769424e-06, + "loss": 56.5235, + "step": 20720 + }, + { + "epoch": 0.08375182310709972, + "grad_norm": 728.88525390625, + "learning_rate": 9.861787862003384e-06, + "loss": 70.6504, + "step": 20730 + }, + { + "epoch": 0.08379222437246735, + "grad_norm": 2484.103271484375, + "learning_rate": 9.861458352407196e-06, + "loss": 63.5527, + "step": 20740 + }, + { + "epoch": 0.08383262563783497, + "grad_norm": 1101.0321044921875, + "learning_rate": 9.861128456007076e-06, + "loss": 29.5614, + "step": 20750 + }, + { + "epoch": 0.08387302690320261, + "grad_norm": 739.0653076171875, + "learning_rate": 9.860798172829277e-06, + "loss": 60.2193, + "step": 20760 + }, + { + "epoch": 0.08391342816857024, + "grad_norm": 771.9661865234375, + "learning_rate": 9.860467502900076e-06, + "loss": 41.4003, + "step": 20770 + }, + { + "epoch": 0.08395382943393787, + "grad_norm": 412.1797790527344, + "learning_rate": 9.860136446245779e-06, + "loss": 47.9847, + "step": 20780 + }, + { + "epoch": 0.0839942306993055, + "grad_norm": 547.4696044921875, + "learning_rate": 9.859805002892733e-06, + "loss": 60.2853, + "step": 20790 + }, + { + "epoch": 0.08403463196467313, + "grad_norm": 2845.0302734375, + "learning_rate": 9.859473172867304e-06, + "loss": 90.4173, + "step": 20800 + }, + { + "epoch": 0.08407503323004077, + "grad_norm": 758.51171875, + "learning_rate": 9.859140956195898e-06, + "loss": 50.2199, + "step": 20810 + }, + { + "epoch": 0.0841154344954084, + "grad_norm": 598.7674560546875, + "learning_rate": 9.858808352904946e-06, + "loss": 56.4163, + "step": 20820 + }, + { + "epoch": 0.08415583576077602, + "grad_norm": 595.4998168945312, + "learning_rate": 9.858475363020913e-06, + "loss": 50.6997, + "step": 20830 + }, + { + "epoch": 0.08419623702614366, + "grad_norm": 992.6197509765625, + "learning_rate": 9.858141986570294e-06, + "loss": 60.4768, + "step": 20840 + }, + { + "epoch": 0.08423663829151129, + "grad_norm": 451.0296325683594, + "learning_rate": 9.85780822357961e-06, + "loss": 34.9621, + "step": 20850 + }, + { + "epoch": 0.08427703955687892, + "grad_norm": 731.828857421875, + "learning_rate": 9.857474074075422e-06, + "loss": 40.2803, + "step": 20860 + }, + { + "epoch": 0.08431744082224656, + "grad_norm": 371.8586730957031, + "learning_rate": 9.857139538084313e-06, + "loss": 45.1926, + "step": 20870 + }, + { + "epoch": 0.08435784208761418, + "grad_norm": 822.1978149414062, + "learning_rate": 9.856804615632904e-06, + "loss": 62.567, + "step": 20880 + }, + { + "epoch": 0.08439824335298182, + "grad_norm": 592.7335205078125, + "learning_rate": 9.85646930674784e-06, + "loss": 47.0134, + "step": 20890 + }, + { + "epoch": 0.08443864461834945, + "grad_norm": 1128.1734619140625, + "learning_rate": 9.856133611455802e-06, + "loss": 58.2488, + "step": 20900 + }, + { + "epoch": 0.08447904588371707, + "grad_norm": 974.8399658203125, + "learning_rate": 9.855797529783499e-06, + "loss": 73.3392, + "step": 20910 + }, + { + "epoch": 0.08451944714908471, + "grad_norm": 734.9163818359375, + "learning_rate": 9.855461061757673e-06, + "loss": 55.1066, + "step": 20920 + }, + { + "epoch": 0.08455984841445234, + "grad_norm": 572.8345336914062, + "learning_rate": 9.855124207405093e-06, + "loss": 47.7488, + "step": 20930 + }, + { + "epoch": 0.08460024967981997, + "grad_norm": 506.1241149902344, + "learning_rate": 9.854786966752561e-06, + "loss": 49.7169, + "step": 20940 + }, + { + "epoch": 0.0846406509451876, + "grad_norm": 776.8680419921875, + "learning_rate": 9.854449339826912e-06, + "loss": 59.9915, + "step": 20950 + }, + { + "epoch": 0.08468105221055523, + "grad_norm": 680.7813110351562, + "learning_rate": 9.854111326655006e-06, + "loss": 34.6824, + "step": 20960 + }, + { + "epoch": 0.08472145347592287, + "grad_norm": 672.206787109375, + "learning_rate": 9.85377292726374e-06, + "loss": 62.063, + "step": 20970 + }, + { + "epoch": 0.0847618547412905, + "grad_norm": 1285.8265380859375, + "learning_rate": 9.85343414168004e-06, + "loss": 40.8714, + "step": 20980 + }, + { + "epoch": 0.08480225600665812, + "grad_norm": 404.3912353515625, + "learning_rate": 9.853094969930857e-06, + "loss": 55.0305, + "step": 20990 + }, + { + "epoch": 0.08484265727202576, + "grad_norm": 620.1910400390625, + "learning_rate": 9.85275541204318e-06, + "loss": 46.7199, + "step": 21000 + }, + { + "epoch": 0.08488305853739339, + "grad_norm": 4275.12841796875, + "learning_rate": 9.852415468044027e-06, + "loss": 65.9217, + "step": 21010 + }, + { + "epoch": 0.08492345980276102, + "grad_norm": 619.6917114257812, + "learning_rate": 9.852075137960446e-06, + "loss": 59.9521, + "step": 21020 + }, + { + "epoch": 0.08496386106812866, + "grad_norm": 161.13560485839844, + "learning_rate": 9.851734421819511e-06, + "loss": 46.9062, + "step": 21030 + }, + { + "epoch": 0.08500426233349628, + "grad_norm": 729.6267700195312, + "learning_rate": 9.851393319648338e-06, + "loss": 45.645, + "step": 21040 + }, + { + "epoch": 0.08504466359886392, + "grad_norm": 1013.0626220703125, + "learning_rate": 9.851051831474062e-06, + "loss": 53.9054, + "step": 21050 + }, + { + "epoch": 0.08508506486423155, + "grad_norm": 613.8585205078125, + "learning_rate": 9.850709957323855e-06, + "loss": 36.5241, + "step": 21060 + }, + { + "epoch": 0.08512546612959918, + "grad_norm": 339.6336364746094, + "learning_rate": 9.85036769722492e-06, + "loss": 62.6872, + "step": 21070 + }, + { + "epoch": 0.08516586739496682, + "grad_norm": 334.4331359863281, + "learning_rate": 9.850025051204484e-06, + "loss": 48.3355, + "step": 21080 + }, + { + "epoch": 0.08520626866033444, + "grad_norm": 987.1373901367188, + "learning_rate": 9.849682019289816e-06, + "loss": 55.5567, + "step": 21090 + }, + { + "epoch": 0.08524666992570207, + "grad_norm": 737.2757568359375, + "learning_rate": 9.849338601508204e-06, + "loss": 49.2449, + "step": 21100 + }, + { + "epoch": 0.08528707119106971, + "grad_norm": 780.7937622070312, + "learning_rate": 9.848994797886978e-06, + "loss": 51.7602, + "step": 21110 + }, + { + "epoch": 0.08532747245643733, + "grad_norm": 505.43365478515625, + "learning_rate": 9.84865060845349e-06, + "loss": 48.2497, + "step": 21120 + }, + { + "epoch": 0.08536787372180497, + "grad_norm": 416.4381103515625, + "learning_rate": 9.848306033235123e-06, + "loss": 49.8744, + "step": 21130 + }, + { + "epoch": 0.0854082749871726, + "grad_norm": 1224.279541015625, + "learning_rate": 9.847961072259298e-06, + "loss": 51.9309, + "step": 21140 + }, + { + "epoch": 0.08544867625254023, + "grad_norm": 855.0914306640625, + "learning_rate": 9.847615725553457e-06, + "loss": 45.4371, + "step": 21150 + }, + { + "epoch": 0.08548907751790787, + "grad_norm": 518.2168579101562, + "learning_rate": 9.847269993145082e-06, + "loss": 50.2552, + "step": 21160 + }, + { + "epoch": 0.08552947878327549, + "grad_norm": 387.5550231933594, + "learning_rate": 9.84692387506168e-06, + "loss": 51.4854, + "step": 21170 + }, + { + "epoch": 0.08556988004864312, + "grad_norm": 325.9864501953125, + "learning_rate": 9.846577371330788e-06, + "loss": 48.9065, + "step": 21180 + }, + { + "epoch": 0.08561028131401076, + "grad_norm": 643.303466796875, + "learning_rate": 9.846230481979978e-06, + "loss": 61.2373, + "step": 21190 + }, + { + "epoch": 0.08565068257937838, + "grad_norm": 1041.485107421875, + "learning_rate": 9.84588320703685e-06, + "loss": 52.6828, + "step": 21200 + }, + { + "epoch": 0.08569108384474602, + "grad_norm": 945.1188354492188, + "learning_rate": 9.845535546529036e-06, + "loss": 59.1794, + "step": 21210 + }, + { + "epoch": 0.08573148511011365, + "grad_norm": 1870.1568603515625, + "learning_rate": 9.845187500484194e-06, + "loss": 79.922, + "step": 21220 + }, + { + "epoch": 0.08577188637548128, + "grad_norm": 1248.2161865234375, + "learning_rate": 9.844839068930021e-06, + "loss": 58.7054, + "step": 21230 + }, + { + "epoch": 0.08581228764084892, + "grad_norm": 326.83563232421875, + "learning_rate": 9.844490251894237e-06, + "loss": 46.6798, + "step": 21240 + }, + { + "epoch": 0.08585268890621654, + "grad_norm": 860.9239501953125, + "learning_rate": 9.844141049404598e-06, + "loss": 50.0194, + "step": 21250 + }, + { + "epoch": 0.08589309017158417, + "grad_norm": 877.5747680664062, + "learning_rate": 9.843791461488887e-06, + "loss": 50.6901, + "step": 21260 + }, + { + "epoch": 0.08593349143695181, + "grad_norm": 942.8595581054688, + "learning_rate": 9.843441488174918e-06, + "loss": 49.5507, + "step": 21270 + }, + { + "epoch": 0.08597389270231943, + "grad_norm": 1001.177734375, + "learning_rate": 9.843091129490539e-06, + "loss": 65.9635, + "step": 21280 + }, + { + "epoch": 0.08601429396768706, + "grad_norm": 583.5938110351562, + "learning_rate": 9.842740385463628e-06, + "loss": 69.3124, + "step": 21290 + }, + { + "epoch": 0.0860546952330547, + "grad_norm": 808.9459838867188, + "learning_rate": 9.842389256122086e-06, + "loss": 42.665, + "step": 21300 + }, + { + "epoch": 0.08609509649842233, + "grad_norm": 1406.5587158203125, + "learning_rate": 9.842037741493856e-06, + "loss": 49.0096, + "step": 21310 + }, + { + "epoch": 0.08613549776378997, + "grad_norm": 591.5014038085938, + "learning_rate": 9.841685841606905e-06, + "loss": 48.2358, + "step": 21320 + }, + { + "epoch": 0.0861758990291576, + "grad_norm": 405.5210876464844, + "learning_rate": 9.841333556489232e-06, + "loss": 42.8229, + "step": 21330 + }, + { + "epoch": 0.08621630029452522, + "grad_norm": 945.532470703125, + "learning_rate": 9.840980886168866e-06, + "loss": 55.2916, + "step": 21340 + }, + { + "epoch": 0.08625670155989286, + "grad_norm": 670.8292236328125, + "learning_rate": 9.840627830673867e-06, + "loss": 61.8779, + "step": 21350 + }, + { + "epoch": 0.08629710282526049, + "grad_norm": 547.29345703125, + "learning_rate": 9.84027439003233e-06, + "loss": 37.4086, + "step": 21360 + }, + { + "epoch": 0.08633750409062811, + "grad_norm": 583.8782958984375, + "learning_rate": 9.839920564272372e-06, + "loss": 38.6236, + "step": 21370 + }, + { + "epoch": 0.08637790535599575, + "grad_norm": 1493.264404296875, + "learning_rate": 9.839566353422148e-06, + "loss": 52.7415, + "step": 21380 + }, + { + "epoch": 0.08641830662136338, + "grad_norm": 1489.428466796875, + "learning_rate": 9.839211757509838e-06, + "loss": 44.0712, + "step": 21390 + }, + { + "epoch": 0.08645870788673102, + "grad_norm": 1181.785400390625, + "learning_rate": 9.83885677656366e-06, + "loss": 79.1896, + "step": 21400 + }, + { + "epoch": 0.08649910915209864, + "grad_norm": 346.3993225097656, + "learning_rate": 9.838501410611852e-06, + "loss": 66.8448, + "step": 21410 + }, + { + "epoch": 0.08653951041746627, + "grad_norm": 749.7047119140625, + "learning_rate": 9.838145659682695e-06, + "loss": 46.5892, + "step": 21420 + }, + { + "epoch": 0.08657991168283391, + "grad_norm": 698.231201171875, + "learning_rate": 9.837789523804491e-06, + "loss": 46.5881, + "step": 21430 + }, + { + "epoch": 0.08662031294820154, + "grad_norm": 788.7677612304688, + "learning_rate": 9.837433003005578e-06, + "loss": 73.2478, + "step": 21440 + }, + { + "epoch": 0.08666071421356916, + "grad_norm": 494.36346435546875, + "learning_rate": 9.83707609731432e-06, + "loss": 43.4364, + "step": 21450 + }, + { + "epoch": 0.0867011154789368, + "grad_norm": 1333.8416748046875, + "learning_rate": 9.836718806759119e-06, + "loss": 59.773, + "step": 21460 + }, + { + "epoch": 0.08674151674430443, + "grad_norm": 10082.6943359375, + "learning_rate": 9.836361131368398e-06, + "loss": 79.0009, + "step": 21470 + }, + { + "epoch": 0.08678191800967207, + "grad_norm": 478.3102722167969, + "learning_rate": 9.836003071170617e-06, + "loss": 30.4878, + "step": 21480 + }, + { + "epoch": 0.0868223192750397, + "grad_norm": 1929.5330810546875, + "learning_rate": 9.835644626194268e-06, + "loss": 86.2987, + "step": 21490 + }, + { + "epoch": 0.08686272054040732, + "grad_norm": 860.5169677734375, + "learning_rate": 9.835285796467867e-06, + "loss": 46.1074, + "step": 21500 + }, + { + "epoch": 0.08690312180577496, + "grad_norm": 410.758056640625, + "learning_rate": 9.834926582019968e-06, + "loss": 51.9143, + "step": 21510 + }, + { + "epoch": 0.08694352307114259, + "grad_norm": 547.5929565429688, + "learning_rate": 9.834566982879149e-06, + "loss": 43.2907, + "step": 21520 + }, + { + "epoch": 0.08698392433651021, + "grad_norm": 878.74560546875, + "learning_rate": 9.83420699907402e-06, + "loss": 57.618, + "step": 21530 + }, + { + "epoch": 0.08702432560187785, + "grad_norm": 1142.8790283203125, + "learning_rate": 9.83384663063323e-06, + "loss": 43.2136, + "step": 21540 + }, + { + "epoch": 0.08706472686724548, + "grad_norm": 483.8853454589844, + "learning_rate": 9.833485877585447e-06, + "loss": 59.4731, + "step": 21550 + }, + { + "epoch": 0.08710512813261312, + "grad_norm": 716.4909057617188, + "learning_rate": 9.833124739959375e-06, + "loss": 54.2022, + "step": 21560 + }, + { + "epoch": 0.08714552939798074, + "grad_norm": 863.1256713867188, + "learning_rate": 9.83276321778375e-06, + "loss": 29.776, + "step": 21570 + }, + { + "epoch": 0.08718593066334837, + "grad_norm": 319.7196350097656, + "learning_rate": 9.832401311087334e-06, + "loss": 71.2109, + "step": 21580 + }, + { + "epoch": 0.08722633192871601, + "grad_norm": 491.4449462890625, + "learning_rate": 9.832039019898922e-06, + "loss": 42.9584, + "step": 21590 + }, + { + "epoch": 0.08726673319408364, + "grad_norm": 962.9393310546875, + "learning_rate": 9.831676344247343e-06, + "loss": 68.2823, + "step": 21600 + }, + { + "epoch": 0.08730713445945126, + "grad_norm": 371.64996337890625, + "learning_rate": 9.831313284161452e-06, + "loss": 44.8427, + "step": 21610 + }, + { + "epoch": 0.0873475357248189, + "grad_norm": 706.4290161132812, + "learning_rate": 9.830949839670134e-06, + "loss": 61.4159, + "step": 21620 + }, + { + "epoch": 0.08738793699018653, + "grad_norm": 251.71263122558594, + "learning_rate": 9.83058601080231e-06, + "loss": 54.2862, + "step": 21630 + }, + { + "epoch": 0.08742833825555417, + "grad_norm": 636.603759765625, + "learning_rate": 9.830221797586925e-06, + "loss": 40.2427, + "step": 21640 + }, + { + "epoch": 0.0874687395209218, + "grad_norm": 1012.7183837890625, + "learning_rate": 9.829857200052961e-06, + "loss": 46.5919, + "step": 21650 + }, + { + "epoch": 0.08750914078628942, + "grad_norm": 537.0764770507812, + "learning_rate": 9.829492218229426e-06, + "loss": 48.8234, + "step": 21660 + }, + { + "epoch": 0.08754954205165706, + "grad_norm": 236.4057159423828, + "learning_rate": 9.829126852145357e-06, + "loss": 56.6592, + "step": 21670 + }, + { + "epoch": 0.08758994331702469, + "grad_norm": 500.1606140136719, + "learning_rate": 9.82876110182983e-06, + "loss": 41.0559, + "step": 21680 + }, + { + "epoch": 0.08763034458239231, + "grad_norm": 633.2708740234375, + "learning_rate": 9.82839496731194e-06, + "loss": 49.9588, + "step": 21690 + }, + { + "epoch": 0.08767074584775995, + "grad_norm": 324.40264892578125, + "learning_rate": 9.828028448620824e-06, + "loss": 56.6327, + "step": 21700 + }, + { + "epoch": 0.08771114711312758, + "grad_norm": 1261.2022705078125, + "learning_rate": 9.827661545785641e-06, + "loss": 57.7509, + "step": 21710 + }, + { + "epoch": 0.08775154837849522, + "grad_norm": 438.5520324707031, + "learning_rate": 9.827294258835584e-06, + "loss": 53.7419, + "step": 21720 + }, + { + "epoch": 0.08779194964386285, + "grad_norm": 446.0879211425781, + "learning_rate": 9.82692658779988e-06, + "loss": 39.9361, + "step": 21730 + }, + { + "epoch": 0.08783235090923047, + "grad_norm": 1023.3678588867188, + "learning_rate": 9.826558532707777e-06, + "loss": 37.8205, + "step": 21740 + }, + { + "epoch": 0.08787275217459811, + "grad_norm": 331.857666015625, + "learning_rate": 9.826190093588564e-06, + "loss": 43.6651, + "step": 21750 + }, + { + "epoch": 0.08791315343996574, + "grad_norm": 654.5843505859375, + "learning_rate": 9.825821270471555e-06, + "loss": 43.0031, + "step": 21760 + }, + { + "epoch": 0.08795355470533336, + "grad_norm": 286.42913818359375, + "learning_rate": 9.825452063386094e-06, + "loss": 41.2635, + "step": 21770 + }, + { + "epoch": 0.087993955970701, + "grad_norm": 411.1146240234375, + "learning_rate": 9.825082472361558e-06, + "loss": 57.5872, + "step": 21780 + }, + { + "epoch": 0.08803435723606863, + "grad_norm": 326.46392822265625, + "learning_rate": 9.824712497427354e-06, + "loss": 31.2261, + "step": 21790 + }, + { + "epoch": 0.08807475850143627, + "grad_norm": 820.8731079101562, + "learning_rate": 9.824342138612918e-06, + "loss": 55.781, + "step": 21800 + }, + { + "epoch": 0.0881151597668039, + "grad_norm": 576.8391723632812, + "learning_rate": 9.823971395947723e-06, + "loss": 42.2346, + "step": 21810 + }, + { + "epoch": 0.08815556103217152, + "grad_norm": 830.1489868164062, + "learning_rate": 9.823600269461259e-06, + "loss": 31.6791, + "step": 21820 + }, + { + "epoch": 0.08819596229753916, + "grad_norm": 1346.4920654296875, + "learning_rate": 9.823228759183058e-06, + "loss": 57.8082, + "step": 21830 + }, + { + "epoch": 0.08823636356290679, + "grad_norm": 488.5693054199219, + "learning_rate": 9.822856865142683e-06, + "loss": 56.2668, + "step": 21840 + }, + { + "epoch": 0.08827676482827441, + "grad_norm": 417.81512451171875, + "learning_rate": 9.822484587369721e-06, + "loss": 35.235, + "step": 21850 + }, + { + "epoch": 0.08831716609364205, + "grad_norm": 837.1052856445312, + "learning_rate": 9.822111925893792e-06, + "loss": 57.6994, + "step": 21860 + }, + { + "epoch": 0.08835756735900968, + "grad_norm": 380.0332946777344, + "learning_rate": 9.821738880744549e-06, + "loss": 50.7504, + "step": 21870 + }, + { + "epoch": 0.08839796862437732, + "grad_norm": 821.6565551757812, + "learning_rate": 9.82136545195167e-06, + "loss": 46.5275, + "step": 21880 + }, + { + "epoch": 0.08843836988974495, + "grad_norm": 1301.0826416015625, + "learning_rate": 9.82099163954487e-06, + "loss": 41.9282, + "step": 21890 + }, + { + "epoch": 0.08847877115511257, + "grad_norm": 526.063232421875, + "learning_rate": 9.820617443553889e-06, + "loss": 57.149, + "step": 21900 + }, + { + "epoch": 0.08851917242048021, + "grad_norm": 743.5262451171875, + "learning_rate": 9.820242864008503e-06, + "loss": 61.0266, + "step": 21910 + }, + { + "epoch": 0.08855957368584784, + "grad_norm": 1047.2117919921875, + "learning_rate": 9.819867900938514e-06, + "loss": 39.4777, + "step": 21920 + }, + { + "epoch": 0.08859997495121547, + "grad_norm": 1059.58447265625, + "learning_rate": 9.819492554373758e-06, + "loss": 51.7157, + "step": 21930 + }, + { + "epoch": 0.0886403762165831, + "grad_norm": 539.9701538085938, + "learning_rate": 9.819116824344095e-06, + "loss": 71.8611, + "step": 21940 + }, + { + "epoch": 0.08868077748195073, + "grad_norm": 1279.98193359375, + "learning_rate": 9.818740710879424e-06, + "loss": 58.2858, + "step": 21950 + }, + { + "epoch": 0.08872117874731837, + "grad_norm": 972.816162109375, + "learning_rate": 9.81836421400967e-06, + "loss": 60.5523, + "step": 21960 + }, + { + "epoch": 0.088761580012686, + "grad_norm": 512.8277587890625, + "learning_rate": 9.81798733376479e-06, + "loss": 62.7751, + "step": 21970 + }, + { + "epoch": 0.08880198127805362, + "grad_norm": 531.3379516601562, + "learning_rate": 9.817610070174768e-06, + "loss": 38.6099, + "step": 21980 + }, + { + "epoch": 0.08884238254342126, + "grad_norm": 1373.3828125, + "learning_rate": 9.817232423269622e-06, + "loss": 55.1818, + "step": 21990 + }, + { + "epoch": 0.08888278380878889, + "grad_norm": 410.4630126953125, + "learning_rate": 9.816854393079402e-06, + "loss": 43.0107, + "step": 22000 + }, + { + "epoch": 0.08892318507415652, + "grad_norm": 502.2277526855469, + "learning_rate": 9.816475979634183e-06, + "loss": 52.2099, + "step": 22010 + }, + { + "epoch": 0.08896358633952416, + "grad_norm": 706.2998657226562, + "learning_rate": 9.816097182964076e-06, + "loss": 42.4107, + "step": 22020 + }, + { + "epoch": 0.08900398760489178, + "grad_norm": 916.6030883789062, + "learning_rate": 9.81571800309922e-06, + "loss": 59.517, + "step": 22030 + }, + { + "epoch": 0.08904438887025942, + "grad_norm": 514.5048828125, + "learning_rate": 9.815338440069782e-06, + "loss": 44.6423, + "step": 22040 + }, + { + "epoch": 0.08908479013562705, + "grad_norm": 859.5369262695312, + "learning_rate": 9.814958493905962e-06, + "loss": 46.8622, + "step": 22050 + }, + { + "epoch": 0.08912519140099467, + "grad_norm": 538.0739135742188, + "learning_rate": 9.814578164637996e-06, + "loss": 56.694, + "step": 22060 + }, + { + "epoch": 0.08916559266636231, + "grad_norm": 470.18182373046875, + "learning_rate": 9.81419745229614e-06, + "loss": 36.0551, + "step": 22070 + }, + { + "epoch": 0.08920599393172994, + "grad_norm": 1527.13720703125, + "learning_rate": 9.813816356910685e-06, + "loss": 46.0836, + "step": 22080 + }, + { + "epoch": 0.08924639519709757, + "grad_norm": 1404.5830078125, + "learning_rate": 9.813434878511956e-06, + "loss": 59.701, + "step": 22090 + }, + { + "epoch": 0.0892867964624652, + "grad_norm": 439.1456298828125, + "learning_rate": 9.813053017130305e-06, + "loss": 50.6273, + "step": 22100 + }, + { + "epoch": 0.08932719772783283, + "grad_norm": 3546.374267578125, + "learning_rate": 9.812670772796113e-06, + "loss": 63.3059, + "step": 22110 + }, + { + "epoch": 0.08936759899320047, + "grad_norm": 836.252197265625, + "learning_rate": 9.812288145539796e-06, + "loss": 47.2886, + "step": 22120 + }, + { + "epoch": 0.0894080002585681, + "grad_norm": 1181.7552490234375, + "learning_rate": 9.811905135391796e-06, + "loss": 54.5237, + "step": 22130 + }, + { + "epoch": 0.08944840152393572, + "grad_norm": 661.9832763671875, + "learning_rate": 9.81152174238259e-06, + "loss": 38.4662, + "step": 22140 + }, + { + "epoch": 0.08948880278930336, + "grad_norm": 883.0792846679688, + "learning_rate": 9.81113796654268e-06, + "loss": 57.6954, + "step": 22150 + }, + { + "epoch": 0.08952920405467099, + "grad_norm": 748.608642578125, + "learning_rate": 9.810753807902603e-06, + "loss": 36.5849, + "step": 22160 + }, + { + "epoch": 0.08956960532003862, + "grad_norm": 627.541259765625, + "learning_rate": 9.81036926649292e-06, + "loss": 53.9005, + "step": 22170 + }, + { + "epoch": 0.08961000658540626, + "grad_norm": 563.03662109375, + "learning_rate": 9.809984342344234e-06, + "loss": 41.9332, + "step": 22180 + }, + { + "epoch": 0.08965040785077388, + "grad_norm": 908.7540283203125, + "learning_rate": 9.80959903548717e-06, + "loss": 73.8175, + "step": 22190 + }, + { + "epoch": 0.08969080911614152, + "grad_norm": 650.3843994140625, + "learning_rate": 9.80921334595238e-06, + "loss": 59.4379, + "step": 22200 + }, + { + "epoch": 0.08973121038150915, + "grad_norm": 443.2868347167969, + "learning_rate": 9.808827273770558e-06, + "loss": 47.3863, + "step": 22210 + }, + { + "epoch": 0.08977161164687678, + "grad_norm": 411.5378112792969, + "learning_rate": 9.80844081897242e-06, + "loss": 82.0899, + "step": 22220 + }, + { + "epoch": 0.08981201291224442, + "grad_norm": 210.01197814941406, + "learning_rate": 9.808053981588712e-06, + "loss": 31.6324, + "step": 22230 + }, + { + "epoch": 0.08985241417761204, + "grad_norm": 722.1740112304688, + "learning_rate": 9.807666761650215e-06, + "loss": 69.5212, + "step": 22240 + }, + { + "epoch": 0.08989281544297967, + "grad_norm": 947.6121215820312, + "learning_rate": 9.80727915918774e-06, + "loss": 59.0137, + "step": 22250 + }, + { + "epoch": 0.08993321670834731, + "grad_norm": 697.5064086914062, + "learning_rate": 9.806891174232122e-06, + "loss": 49.3849, + "step": 22260 + }, + { + "epoch": 0.08997361797371493, + "grad_norm": 555.270263671875, + "learning_rate": 9.806502806814236e-06, + "loss": 44.8319, + "step": 22270 + }, + { + "epoch": 0.09001401923908257, + "grad_norm": 414.53692626953125, + "learning_rate": 9.806114056964977e-06, + "loss": 41.9408, + "step": 22280 + }, + { + "epoch": 0.0900544205044502, + "grad_norm": 324.5998840332031, + "learning_rate": 9.805724924715283e-06, + "loss": 65.8233, + "step": 22290 + }, + { + "epoch": 0.09009482176981783, + "grad_norm": 705.962646484375, + "learning_rate": 9.80533541009611e-06, + "loss": 64.0755, + "step": 22300 + }, + { + "epoch": 0.09013522303518547, + "grad_norm": 465.8612976074219, + "learning_rate": 9.804945513138454e-06, + "loss": 44.9652, + "step": 22310 + }, + { + "epoch": 0.09017562430055309, + "grad_norm": 360.5195617675781, + "learning_rate": 9.804555233873335e-06, + "loss": 64.9888, + "step": 22320 + }, + { + "epoch": 0.09021602556592072, + "grad_norm": 339.8669128417969, + "learning_rate": 9.804164572331804e-06, + "loss": 52.764, + "step": 22330 + }, + { + "epoch": 0.09025642683128836, + "grad_norm": 821.659423828125, + "learning_rate": 9.80377352854495e-06, + "loss": 51.951, + "step": 22340 + }, + { + "epoch": 0.09029682809665598, + "grad_norm": 384.43115234375, + "learning_rate": 9.80338210254388e-06, + "loss": 50.9012, + "step": 22350 + }, + { + "epoch": 0.09033722936202362, + "grad_norm": 520.3011474609375, + "learning_rate": 9.80299029435974e-06, + "loss": 53.0986, + "step": 22360 + }, + { + "epoch": 0.09037763062739125, + "grad_norm": 806.8153076171875, + "learning_rate": 9.802598104023706e-06, + "loss": 67.7043, + "step": 22370 + }, + { + "epoch": 0.09041803189275888, + "grad_norm": 443.0950927734375, + "learning_rate": 9.80220553156698e-06, + "loss": 42.6991, + "step": 22380 + }, + { + "epoch": 0.09045843315812652, + "grad_norm": 601.2862548828125, + "learning_rate": 9.801812577020802e-06, + "loss": 61.9289, + "step": 22390 + }, + { + "epoch": 0.09049883442349414, + "grad_norm": 611.3983764648438, + "learning_rate": 9.801419240416432e-06, + "loss": 43.7061, + "step": 22400 + }, + { + "epoch": 0.09053923568886177, + "grad_norm": 507.5821533203125, + "learning_rate": 9.80102552178517e-06, + "loss": 49.6816, + "step": 22410 + }, + { + "epoch": 0.09057963695422941, + "grad_norm": 262.9966735839844, + "learning_rate": 9.800631421158341e-06, + "loss": 39.2033, + "step": 22420 + }, + { + "epoch": 0.09062003821959703, + "grad_norm": 320.47772216796875, + "learning_rate": 9.800236938567302e-06, + "loss": 48.6889, + "step": 22430 + }, + { + "epoch": 0.09066043948496467, + "grad_norm": 678.9915771484375, + "learning_rate": 9.799842074043438e-06, + "loss": 48.05, + "step": 22440 + }, + { + "epoch": 0.0907008407503323, + "grad_norm": 647.835693359375, + "learning_rate": 9.799446827618172e-06, + "loss": 59.4923, + "step": 22450 + }, + { + "epoch": 0.09074124201569993, + "grad_norm": 1000.3504028320312, + "learning_rate": 9.799051199322944e-06, + "loss": 53.8829, + "step": 22460 + }, + { + "epoch": 0.09078164328106757, + "grad_norm": 543.1785278320312, + "learning_rate": 9.798655189189239e-06, + "loss": 53.5274, + "step": 22470 + }, + { + "epoch": 0.0908220445464352, + "grad_norm": 206.3665771484375, + "learning_rate": 9.798258797248563e-06, + "loss": 49.3775, + "step": 22480 + }, + { + "epoch": 0.09086244581180282, + "grad_norm": 499.0605773925781, + "learning_rate": 9.797862023532457e-06, + "loss": 53.9699, + "step": 22490 + }, + { + "epoch": 0.09090284707717046, + "grad_norm": 654.6769409179688, + "learning_rate": 9.797464868072489e-06, + "loss": 46.1002, + "step": 22500 + }, + { + "epoch": 0.09094324834253809, + "grad_norm": 800.8095703125, + "learning_rate": 9.797067330900256e-06, + "loss": 43.4725, + "step": 22510 + }, + { + "epoch": 0.09098364960790573, + "grad_norm": 574.1755981445312, + "learning_rate": 9.796669412047392e-06, + "loss": 44.0503, + "step": 22520 + }, + { + "epoch": 0.09102405087327335, + "grad_norm": 986.699462890625, + "learning_rate": 9.796271111545559e-06, + "loss": 43.2854, + "step": 22530 + }, + { + "epoch": 0.09106445213864098, + "grad_norm": 1443.898193359375, + "learning_rate": 9.795872429426443e-06, + "loss": 61.5884, + "step": 22540 + }, + { + "epoch": 0.09110485340400862, + "grad_norm": 523.7254638671875, + "learning_rate": 9.79547336572177e-06, + "loss": 40.8429, + "step": 22550 + }, + { + "epoch": 0.09114525466937624, + "grad_norm": 681.1943359375, + "learning_rate": 9.795073920463289e-06, + "loss": 68.2972, + "step": 22560 + }, + { + "epoch": 0.09118565593474387, + "grad_norm": 564.5675048828125, + "learning_rate": 9.794674093682781e-06, + "loss": 63.7268, + "step": 22570 + }, + { + "epoch": 0.09122605720011151, + "grad_norm": 379.54693603515625, + "learning_rate": 9.79427388541206e-06, + "loss": 57.5344, + "step": 22580 + }, + { + "epoch": 0.09126645846547914, + "grad_norm": 368.9071044921875, + "learning_rate": 9.79387329568297e-06, + "loss": 44.1385, + "step": 22590 + }, + { + "epoch": 0.09130685973084678, + "grad_norm": 672.96240234375, + "learning_rate": 9.793472324527383e-06, + "loss": 65.3485, + "step": 22600 + }, + { + "epoch": 0.0913472609962144, + "grad_norm": 553.6704711914062, + "learning_rate": 9.793070971977203e-06, + "loss": 51.0067, + "step": 22610 + }, + { + "epoch": 0.09138766226158203, + "grad_norm": 699.769287109375, + "learning_rate": 9.79266923806436e-06, + "loss": 56.594, + "step": 22620 + }, + { + "epoch": 0.09142806352694967, + "grad_norm": 565.623291015625, + "learning_rate": 9.792267122820823e-06, + "loss": 38.9469, + "step": 22630 + }, + { + "epoch": 0.0914684647923173, + "grad_norm": 319.3910217285156, + "learning_rate": 9.791864626278584e-06, + "loss": 57.7853, + "step": 22640 + }, + { + "epoch": 0.09150886605768492, + "grad_norm": 328.24212646484375, + "learning_rate": 9.791461748469669e-06, + "loss": 44.6982, + "step": 22650 + }, + { + "epoch": 0.09154926732305256, + "grad_norm": 479.53546142578125, + "learning_rate": 9.791058489426134e-06, + "loss": 51.2582, + "step": 22660 + }, + { + "epoch": 0.09158966858842019, + "grad_norm": 621.2388305664062, + "learning_rate": 9.790654849180059e-06, + "loss": 40.81, + "step": 22670 + }, + { + "epoch": 0.09163006985378783, + "grad_norm": 640.1640625, + "learning_rate": 9.790250827763566e-06, + "loss": 51.9004, + "step": 22680 + }, + { + "epoch": 0.09167047111915545, + "grad_norm": 623.7976684570312, + "learning_rate": 9.7898464252088e-06, + "loss": 40.9685, + "step": 22690 + }, + { + "epoch": 0.09171087238452308, + "grad_norm": 611.9249267578125, + "learning_rate": 9.789441641547935e-06, + "loss": 60.35, + "step": 22700 + }, + { + "epoch": 0.09175127364989072, + "grad_norm": 923.3186645507812, + "learning_rate": 9.789036476813178e-06, + "loss": 49.1127, + "step": 22710 + }, + { + "epoch": 0.09179167491525834, + "grad_norm": 357.7413635253906, + "learning_rate": 9.788630931036769e-06, + "loss": 44.903, + "step": 22720 + }, + { + "epoch": 0.09183207618062597, + "grad_norm": 742.0269775390625, + "learning_rate": 9.788225004250974e-06, + "loss": 63.4893, + "step": 22730 + }, + { + "epoch": 0.09187247744599361, + "grad_norm": 815.6424560546875, + "learning_rate": 9.78781869648809e-06, + "loss": 48.6374, + "step": 22740 + }, + { + "epoch": 0.09191287871136124, + "grad_norm": 599.6590576171875, + "learning_rate": 9.787412007780445e-06, + "loss": 32.8504, + "step": 22750 + }, + { + "epoch": 0.09195327997672888, + "grad_norm": 545.2987670898438, + "learning_rate": 9.787004938160398e-06, + "loss": 58.4387, + "step": 22760 + }, + { + "epoch": 0.0919936812420965, + "grad_norm": 643.601806640625, + "learning_rate": 9.786597487660336e-06, + "loss": 54.0817, + "step": 22770 + }, + { + "epoch": 0.09203408250746413, + "grad_norm": 852.6585693359375, + "learning_rate": 9.78618965631268e-06, + "loss": 53.0852, + "step": 22780 + }, + { + "epoch": 0.09207448377283177, + "grad_norm": 1306.4365234375, + "learning_rate": 9.785781444149883e-06, + "loss": 34.2664, + "step": 22790 + }, + { + "epoch": 0.0921148850381994, + "grad_norm": 850.7540893554688, + "learning_rate": 9.785372851204415e-06, + "loss": 52.8783, + "step": 22800 + }, + { + "epoch": 0.09215528630356702, + "grad_norm": 1039.580810546875, + "learning_rate": 9.784963877508794e-06, + "loss": 49.9097, + "step": 22810 + }, + { + "epoch": 0.09219568756893466, + "grad_norm": 544.6483764648438, + "learning_rate": 9.784554523095554e-06, + "loss": 49.3491, + "step": 22820 + }, + { + "epoch": 0.09223608883430229, + "grad_norm": 694.8217163085938, + "learning_rate": 9.784144787997272e-06, + "loss": 43.204, + "step": 22830 + }, + { + "epoch": 0.09227649009966993, + "grad_norm": 836.0192260742188, + "learning_rate": 9.783734672246545e-06, + "loss": 43.338, + "step": 22840 + }, + { + "epoch": 0.09231689136503755, + "grad_norm": 443.9679870605469, + "learning_rate": 9.783324175876004e-06, + "loss": 46.913, + "step": 22850 + }, + { + "epoch": 0.09235729263040518, + "grad_norm": 754.3963623046875, + "learning_rate": 9.782913298918311e-06, + "loss": 53.1269, + "step": 22860 + }, + { + "epoch": 0.09239769389577282, + "grad_norm": 625.8417358398438, + "learning_rate": 9.782502041406157e-06, + "loss": 43.964, + "step": 22870 + }, + { + "epoch": 0.09243809516114045, + "grad_norm": 405.47113037109375, + "learning_rate": 9.782090403372263e-06, + "loss": 37.2729, + "step": 22880 + }, + { + "epoch": 0.09247849642650807, + "grad_norm": 881.507568359375, + "learning_rate": 9.781678384849385e-06, + "loss": 58.9882, + "step": 22890 + }, + { + "epoch": 0.09251889769187571, + "grad_norm": 546.2739868164062, + "learning_rate": 9.7812659858703e-06, + "loss": 44.2559, + "step": 22900 + }, + { + "epoch": 0.09255929895724334, + "grad_norm": 428.82354736328125, + "learning_rate": 9.780853206467826e-06, + "loss": 59.4935, + "step": 22910 + }, + { + "epoch": 0.09259970022261098, + "grad_norm": 784.0477294921875, + "learning_rate": 9.780440046674803e-06, + "loss": 56.4658, + "step": 22920 + }, + { + "epoch": 0.0926401014879786, + "grad_norm": 428.0048828125, + "learning_rate": 9.780026506524106e-06, + "loss": 58.3504, + "step": 22930 + }, + { + "epoch": 0.09268050275334623, + "grad_norm": 1084.146728515625, + "learning_rate": 9.779612586048635e-06, + "loss": 54.8185, + "step": 22940 + }, + { + "epoch": 0.09272090401871387, + "grad_norm": 569.2861938476562, + "learning_rate": 9.779198285281326e-06, + "loss": 53.3557, + "step": 22950 + }, + { + "epoch": 0.0927613052840815, + "grad_norm": 486.71405029296875, + "learning_rate": 9.778783604255145e-06, + "loss": 48.3609, + "step": 22960 + }, + { + "epoch": 0.09280170654944912, + "grad_norm": 504.45513916015625, + "learning_rate": 9.778368543003083e-06, + "loss": 55.2413, + "step": 22970 + }, + { + "epoch": 0.09284210781481676, + "grad_norm": 940.0879516601562, + "learning_rate": 9.777953101558164e-06, + "loss": 53.3876, + "step": 22980 + }, + { + "epoch": 0.09288250908018439, + "grad_norm": 656.7711181640625, + "learning_rate": 9.777537279953448e-06, + "loss": 58.7076, + "step": 22990 + }, + { + "epoch": 0.09292291034555203, + "grad_norm": 744.0709838867188, + "learning_rate": 9.777121078222015e-06, + "loss": 45.4177, + "step": 23000 + }, + { + "epoch": 0.09296331161091966, + "grad_norm": 823.136474609375, + "learning_rate": 9.77670449639698e-06, + "loss": 47.3975, + "step": 23010 + }, + { + "epoch": 0.09300371287628728, + "grad_norm": 625.787353515625, + "learning_rate": 9.776287534511492e-06, + "loss": 31.936, + "step": 23020 + }, + { + "epoch": 0.09304411414165492, + "grad_norm": 562.3572387695312, + "learning_rate": 9.775870192598726e-06, + "loss": 68.5727, + "step": 23030 + }, + { + "epoch": 0.09308451540702255, + "grad_norm": 584.5991821289062, + "learning_rate": 9.775452470691886e-06, + "loss": 56.3662, + "step": 23040 + }, + { + "epoch": 0.09312491667239017, + "grad_norm": 476.9333801269531, + "learning_rate": 9.77503436882421e-06, + "loss": 121.5844, + "step": 23050 + }, + { + "epoch": 0.09316531793775781, + "grad_norm": 380.27105712890625, + "learning_rate": 9.774615887028964e-06, + "loss": 45.8855, + "step": 23060 + }, + { + "epoch": 0.09320571920312544, + "grad_norm": 820.0983276367188, + "learning_rate": 9.774197025339442e-06, + "loss": 53.6872, + "step": 23070 + }, + { + "epoch": 0.09324612046849308, + "grad_norm": 1121.6595458984375, + "learning_rate": 9.773777783788976e-06, + "loss": 65.0373, + "step": 23080 + }, + { + "epoch": 0.0932865217338607, + "grad_norm": 942.5689697265625, + "learning_rate": 9.77335816241092e-06, + "loss": 57.7903, + "step": 23090 + }, + { + "epoch": 0.09332692299922833, + "grad_norm": 469.630859375, + "learning_rate": 9.77293816123866e-06, + "loss": 44.2538, + "step": 23100 + }, + { + "epoch": 0.09336732426459597, + "grad_norm": 305.6590270996094, + "learning_rate": 9.772517780305618e-06, + "loss": 54.9539, + "step": 23110 + }, + { + "epoch": 0.0934077255299636, + "grad_norm": 469.95758056640625, + "learning_rate": 9.772097019645236e-06, + "loss": 39.7449, + "step": 23120 + }, + { + "epoch": 0.09344812679533122, + "grad_norm": 896.3937377929688, + "learning_rate": 9.771675879290998e-06, + "loss": 53.7042, + "step": 23130 + }, + { + "epoch": 0.09348852806069886, + "grad_norm": 543.0301513671875, + "learning_rate": 9.771254359276407e-06, + "loss": 44.7906, + "step": 23140 + }, + { + "epoch": 0.09352892932606649, + "grad_norm": 766.5215454101562, + "learning_rate": 9.770832459635004e-06, + "loss": 79.5679, + "step": 23150 + }, + { + "epoch": 0.09356933059143413, + "grad_norm": 1159.5113525390625, + "learning_rate": 9.77041018040036e-06, + "loss": 51.9176, + "step": 23160 + }, + { + "epoch": 0.09360973185680176, + "grad_norm": 884.1097412109375, + "learning_rate": 9.769987521606068e-06, + "loss": 41.639, + "step": 23170 + }, + { + "epoch": 0.09365013312216938, + "grad_norm": 527.7691040039062, + "learning_rate": 9.769564483285761e-06, + "loss": 43.4502, + "step": 23180 + }, + { + "epoch": 0.09369053438753702, + "grad_norm": 605.3202514648438, + "learning_rate": 9.769141065473099e-06, + "loss": 53.9118, + "step": 23190 + }, + { + "epoch": 0.09373093565290465, + "grad_norm": 1220.0347900390625, + "learning_rate": 9.768717268201768e-06, + "loss": 59.4184, + "step": 23200 + }, + { + "epoch": 0.09377133691827227, + "grad_norm": 1246.2518310546875, + "learning_rate": 9.768293091505491e-06, + "loss": 68.2398, + "step": 23210 + }, + { + "epoch": 0.09381173818363991, + "grad_norm": 608.1319580078125, + "learning_rate": 9.767868535418014e-06, + "loss": 61.0542, + "step": 23220 + }, + { + "epoch": 0.09385213944900754, + "grad_norm": 839.668701171875, + "learning_rate": 9.767443599973122e-06, + "loss": 125.2062, + "step": 23230 + }, + { + "epoch": 0.09389254071437518, + "grad_norm": 303.346435546875, + "learning_rate": 9.76701828520462e-06, + "loss": 43.7648, + "step": 23240 + }, + { + "epoch": 0.0939329419797428, + "grad_norm": 724.7330932617188, + "learning_rate": 9.766592591146353e-06, + "loss": 26.6217, + "step": 23250 + }, + { + "epoch": 0.09397334324511043, + "grad_norm": 236.2427978515625, + "learning_rate": 9.766166517832188e-06, + "loss": 45.8864, + "step": 23260 + }, + { + "epoch": 0.09401374451047807, + "grad_norm": 245.2400665283203, + "learning_rate": 9.765740065296025e-06, + "loss": 35.4692, + "step": 23270 + }, + { + "epoch": 0.0940541457758457, + "grad_norm": 612.867431640625, + "learning_rate": 9.765313233571798e-06, + "loss": 48.1673, + "step": 23280 + }, + { + "epoch": 0.09409454704121333, + "grad_norm": 440.6855773925781, + "learning_rate": 9.76488602269347e-06, + "loss": 39.7642, + "step": 23290 + }, + { + "epoch": 0.09413494830658097, + "grad_norm": 500.4783935546875, + "learning_rate": 9.764458432695026e-06, + "loss": 44.2278, + "step": 23300 + }, + { + "epoch": 0.09417534957194859, + "grad_norm": 243.58364868164062, + "learning_rate": 9.76403046361049e-06, + "loss": 31.7886, + "step": 23310 + }, + { + "epoch": 0.09421575083731623, + "grad_norm": 1071.7940673828125, + "learning_rate": 9.763602115473914e-06, + "loss": 66.7946, + "step": 23320 + }, + { + "epoch": 0.09425615210268386, + "grad_norm": 483.341552734375, + "learning_rate": 9.763173388319381e-06, + "loss": 43.0986, + "step": 23330 + }, + { + "epoch": 0.09429655336805148, + "grad_norm": 346.4289245605469, + "learning_rate": 9.762744282181e-06, + "loss": 50.6579, + "step": 23340 + }, + { + "epoch": 0.09433695463341912, + "grad_norm": 708.1380004882812, + "learning_rate": 9.762314797092916e-06, + "loss": 38.8699, + "step": 23350 + }, + { + "epoch": 0.09437735589878675, + "grad_norm": 536.1306762695312, + "learning_rate": 9.761884933089301e-06, + "loss": 43.5115, + "step": 23360 + }, + { + "epoch": 0.09441775716415438, + "grad_norm": 487.45355224609375, + "learning_rate": 9.761454690204352e-06, + "loss": 44.2896, + "step": 23370 + }, + { + "epoch": 0.09445815842952202, + "grad_norm": 896.3792724609375, + "learning_rate": 9.76102406847231e-06, + "loss": 54.5288, + "step": 23380 + }, + { + "epoch": 0.09449855969488964, + "grad_norm": 830.8819580078125, + "learning_rate": 9.760593067927428e-06, + "loss": 97.8602, + "step": 23390 + }, + { + "epoch": 0.09453896096025728, + "grad_norm": 769.9625244140625, + "learning_rate": 9.760161688604008e-06, + "loss": 96.4636, + "step": 23400 + }, + { + "epoch": 0.09457936222562491, + "grad_norm": 772.0631103515625, + "learning_rate": 9.759729930536367e-06, + "loss": 55.2128, + "step": 23410 + }, + { + "epoch": 0.09461976349099253, + "grad_norm": 713.3424072265625, + "learning_rate": 9.75929779375886e-06, + "loss": 43.9229, + "step": 23420 + }, + { + "epoch": 0.09466016475636017, + "grad_norm": 606.2196655273438, + "learning_rate": 9.75886527830587e-06, + "loss": 30.7504, + "step": 23430 + }, + { + "epoch": 0.0947005660217278, + "grad_norm": 384.1034240722656, + "learning_rate": 9.75843238421181e-06, + "loss": 47.3888, + "step": 23440 + }, + { + "epoch": 0.09474096728709543, + "grad_norm": 678.3400268554688, + "learning_rate": 9.757999111511121e-06, + "loss": 52.3418, + "step": 23450 + }, + { + "epoch": 0.09478136855246307, + "grad_norm": 493.89691162109375, + "learning_rate": 9.757565460238281e-06, + "loss": 48.9043, + "step": 23460 + }, + { + "epoch": 0.09482176981783069, + "grad_norm": 785.3358764648438, + "learning_rate": 9.757131430427791e-06, + "loss": 40.5328, + "step": 23470 + }, + { + "epoch": 0.09486217108319833, + "grad_norm": 525.2156372070312, + "learning_rate": 9.756697022114185e-06, + "loss": 35.2212, + "step": 23480 + }, + { + "epoch": 0.09490257234856596, + "grad_norm": 1074.95556640625, + "learning_rate": 9.756262235332029e-06, + "loss": 63.1825, + "step": 23490 + }, + { + "epoch": 0.09494297361393358, + "grad_norm": 756.4033203125, + "learning_rate": 9.755827070115915e-06, + "loss": 60.1457, + "step": 23500 + }, + { + "epoch": 0.09498337487930122, + "grad_norm": 655.587890625, + "learning_rate": 9.755391526500466e-06, + "loss": 58.1549, + "step": 23510 + }, + { + "epoch": 0.09502377614466885, + "grad_norm": 876.078857421875, + "learning_rate": 9.75495560452034e-06, + "loss": 50.9512, + "step": 23520 + }, + { + "epoch": 0.09506417741003648, + "grad_norm": 643.4996337890625, + "learning_rate": 9.754519304210214e-06, + "loss": 45.0397, + "step": 23530 + }, + { + "epoch": 0.09510457867540412, + "grad_norm": 799.6237182617188, + "learning_rate": 9.754082625604812e-06, + "loss": 36.293, + "step": 23540 + }, + { + "epoch": 0.09514497994077174, + "grad_norm": 1540.503173828125, + "learning_rate": 9.753645568738872e-06, + "loss": 61.4238, + "step": 23550 + }, + { + "epoch": 0.09518538120613938, + "grad_norm": 634.837890625, + "learning_rate": 9.75320813364717e-06, + "loss": 48.8427, + "step": 23560 + }, + { + "epoch": 0.09522578247150701, + "grad_norm": 1066.7530517578125, + "learning_rate": 9.752770320364512e-06, + "loss": 45.408, + "step": 23570 + }, + { + "epoch": 0.09526618373687464, + "grad_norm": 431.2601623535156, + "learning_rate": 9.752332128925732e-06, + "loss": 45.2199, + "step": 23580 + }, + { + "epoch": 0.09530658500224228, + "grad_norm": 505.9089660644531, + "learning_rate": 9.751893559365693e-06, + "loss": 38.7723, + "step": 23590 + }, + { + "epoch": 0.0953469862676099, + "grad_norm": 540.0280151367188, + "learning_rate": 9.751454611719294e-06, + "loss": 70.2385, + "step": 23600 + }, + { + "epoch": 0.09538738753297753, + "grad_norm": 233.38671875, + "learning_rate": 9.751015286021455e-06, + "loss": 48.6521, + "step": 23610 + }, + { + "epoch": 0.09542778879834517, + "grad_norm": 429.7454833984375, + "learning_rate": 9.750575582307136e-06, + "loss": 63.9905, + "step": 23620 + }, + { + "epoch": 0.0954681900637128, + "grad_norm": 808.2809448242188, + "learning_rate": 9.75013550061132e-06, + "loss": 67.099, + "step": 23630 + }, + { + "epoch": 0.09550859132908043, + "grad_norm": 462.7481384277344, + "learning_rate": 9.749695040969022e-06, + "loss": 38.5078, + "step": 23640 + }, + { + "epoch": 0.09554899259444806, + "grad_norm": 1053.72607421875, + "learning_rate": 9.749254203415288e-06, + "loss": 60.8307, + "step": 23650 + }, + { + "epoch": 0.09558939385981569, + "grad_norm": 527.76513671875, + "learning_rate": 9.748812987985193e-06, + "loss": 49.8133, + "step": 23660 + }, + { + "epoch": 0.09562979512518333, + "grad_norm": 660.1892700195312, + "learning_rate": 9.748371394713842e-06, + "loss": 42.2738, + "step": 23670 + }, + { + "epoch": 0.09567019639055095, + "grad_norm": 435.3009948730469, + "learning_rate": 9.747929423636372e-06, + "loss": 60.2794, + "step": 23680 + }, + { + "epoch": 0.09571059765591858, + "grad_norm": 705.1487426757812, + "learning_rate": 9.74748707478795e-06, + "loss": 48.4879, + "step": 23690 + }, + { + "epoch": 0.09575099892128622, + "grad_norm": 624.763916015625, + "learning_rate": 9.747044348203766e-06, + "loss": 55.8783, + "step": 23700 + }, + { + "epoch": 0.09579140018665384, + "grad_norm": 243.1126708984375, + "learning_rate": 9.74660124391905e-06, + "loss": 55.0685, + "step": 23710 + }, + { + "epoch": 0.09583180145202148, + "grad_norm": 553.0624389648438, + "learning_rate": 9.746157761969058e-06, + "loss": 64.8828, + "step": 23720 + }, + { + "epoch": 0.09587220271738911, + "grad_norm": 3757.382080078125, + "learning_rate": 9.745713902389074e-06, + "loss": 74.2841, + "step": 23730 + }, + { + "epoch": 0.09591260398275674, + "grad_norm": 869.4652709960938, + "learning_rate": 9.745269665214415e-06, + "loss": 73.5666, + "step": 23740 + }, + { + "epoch": 0.09595300524812438, + "grad_norm": 885.2520751953125, + "learning_rate": 9.744825050480425e-06, + "loss": 54.041, + "step": 23750 + }, + { + "epoch": 0.095993406513492, + "grad_norm": 602.6681518554688, + "learning_rate": 9.744380058222483e-06, + "loss": 58.2484, + "step": 23760 + }, + { + "epoch": 0.09603380777885963, + "grad_norm": 327.1231384277344, + "learning_rate": 9.743934688475994e-06, + "loss": 53.0578, + "step": 23770 + }, + { + "epoch": 0.09607420904422727, + "grad_norm": 537.817626953125, + "learning_rate": 9.743488941276394e-06, + "loss": 45.4333, + "step": 23780 + }, + { + "epoch": 0.0961146103095949, + "grad_norm": 657.9298706054688, + "learning_rate": 9.743042816659147e-06, + "loss": 70.0273, + "step": 23790 + }, + { + "epoch": 0.09615501157496253, + "grad_norm": 439.4751281738281, + "learning_rate": 9.742596314659751e-06, + "loss": 57.6084, + "step": 23800 + }, + { + "epoch": 0.09619541284033016, + "grad_norm": 246.55235290527344, + "learning_rate": 9.742149435313732e-06, + "loss": 47.0762, + "step": 23810 + }, + { + "epoch": 0.09623581410569779, + "grad_norm": 485.2574768066406, + "learning_rate": 9.741702178656647e-06, + "loss": 47.7125, + "step": 23820 + }, + { + "epoch": 0.09627621537106543, + "grad_norm": 946.404052734375, + "learning_rate": 9.74125454472408e-06, + "loss": 64.874, + "step": 23830 + }, + { + "epoch": 0.09631661663643305, + "grad_norm": 368.9576110839844, + "learning_rate": 9.740806533551647e-06, + "loss": 58.668, + "step": 23840 + }, + { + "epoch": 0.09635701790180068, + "grad_norm": 882.6640014648438, + "learning_rate": 9.740358145174999e-06, + "loss": 36.2069, + "step": 23850 + }, + { + "epoch": 0.09639741916716832, + "grad_norm": 496.58367919921875, + "learning_rate": 9.739909379629805e-06, + "loss": 49.835, + "step": 23860 + }, + { + "epoch": 0.09643782043253595, + "grad_norm": 400.39715576171875, + "learning_rate": 9.739460236951778e-06, + "loss": 50.0755, + "step": 23870 + }, + { + "epoch": 0.09647822169790359, + "grad_norm": 576.3343505859375, + "learning_rate": 9.739010717176649e-06, + "loss": 38.722, + "step": 23880 + }, + { + "epoch": 0.09651862296327121, + "grad_norm": 463.68853759765625, + "learning_rate": 9.738560820340189e-06, + "loss": 48.1985, + "step": 23890 + }, + { + "epoch": 0.09655902422863884, + "grad_norm": 504.19940185546875, + "learning_rate": 9.738110546478188e-06, + "loss": 59.8322, + "step": 23900 + }, + { + "epoch": 0.09659942549400648, + "grad_norm": 1040.739501953125, + "learning_rate": 9.737659895626478e-06, + "loss": 63.9173, + "step": 23910 + }, + { + "epoch": 0.0966398267593741, + "grad_norm": 671.8014526367188, + "learning_rate": 9.737208867820914e-06, + "loss": 38.5287, + "step": 23920 + }, + { + "epoch": 0.09668022802474173, + "grad_norm": 643.8292236328125, + "learning_rate": 9.736757463097378e-06, + "loss": 57.7698, + "step": 23930 + }, + { + "epoch": 0.09672062929010937, + "grad_norm": 479.278564453125, + "learning_rate": 9.736305681491792e-06, + "loss": 53.5585, + "step": 23940 + }, + { + "epoch": 0.096761030555477, + "grad_norm": 839.4384765625, + "learning_rate": 9.735853523040098e-06, + "loss": 57.4403, + "step": 23950 + }, + { + "epoch": 0.09680143182084464, + "grad_norm": 667.2028198242188, + "learning_rate": 9.735400987778274e-06, + "loss": 50.5064, + "step": 23960 + }, + { + "epoch": 0.09684183308621226, + "grad_norm": 474.6343078613281, + "learning_rate": 9.734948075742328e-06, + "loss": 65.1481, + "step": 23970 + }, + { + "epoch": 0.09688223435157989, + "grad_norm": 284.0242614746094, + "learning_rate": 9.734494786968293e-06, + "loss": 38.4558, + "step": 23980 + }, + { + "epoch": 0.09692263561694753, + "grad_norm": 335.21466064453125, + "learning_rate": 9.734041121492235e-06, + "loss": 51.5069, + "step": 23990 + }, + { + "epoch": 0.09696303688231515, + "grad_norm": 1233.53369140625, + "learning_rate": 9.733587079350254e-06, + "loss": 58.2334, + "step": 24000 + }, + { + "epoch": 0.09700343814768278, + "grad_norm": 846.1797485351562, + "learning_rate": 9.73313266057847e-06, + "loss": 57.5334, + "step": 24010 + }, + { + "epoch": 0.09704383941305042, + "grad_norm": 553.3931884765625, + "learning_rate": 9.732677865213044e-06, + "loss": 69.9946, + "step": 24020 + }, + { + "epoch": 0.09708424067841805, + "grad_norm": 881.1492309570312, + "learning_rate": 9.73222269329016e-06, + "loss": 55.2486, + "step": 24030 + }, + { + "epoch": 0.09712464194378569, + "grad_norm": 423.26806640625, + "learning_rate": 9.731767144846034e-06, + "loss": 43.9426, + "step": 24040 + }, + { + "epoch": 0.09716504320915331, + "grad_norm": 849.846923828125, + "learning_rate": 9.731311219916912e-06, + "loss": 45.18, + "step": 24050 + }, + { + "epoch": 0.09720544447452094, + "grad_norm": 550.5770874023438, + "learning_rate": 9.730854918539072e-06, + "loss": 48.6627, + "step": 24060 + }, + { + "epoch": 0.09724584573988858, + "grad_norm": 213.45596313476562, + "learning_rate": 9.730398240748816e-06, + "loss": 28.9641, + "step": 24070 + }, + { + "epoch": 0.0972862470052562, + "grad_norm": 736.7731323242188, + "learning_rate": 9.729941186582482e-06, + "loss": 56.5135, + "step": 24080 + }, + { + "epoch": 0.09732664827062383, + "grad_norm": 574.6141967773438, + "learning_rate": 9.729483756076436e-06, + "loss": 39.7891, + "step": 24090 + }, + { + "epoch": 0.09736704953599147, + "grad_norm": 418.90606689453125, + "learning_rate": 9.729025949267072e-06, + "loss": 61.8591, + "step": 24100 + }, + { + "epoch": 0.0974074508013591, + "grad_norm": 603.3834838867188, + "learning_rate": 9.728567766190817e-06, + "loss": 51.6418, + "step": 24110 + }, + { + "epoch": 0.09744785206672674, + "grad_norm": 576.2706298828125, + "learning_rate": 9.728109206884125e-06, + "loss": 48.2781, + "step": 24120 + }, + { + "epoch": 0.09748825333209436, + "grad_norm": 609.32861328125, + "learning_rate": 9.727650271383485e-06, + "loss": 57.3191, + "step": 24130 + }, + { + "epoch": 0.09752865459746199, + "grad_norm": 1344.430908203125, + "learning_rate": 9.727190959725407e-06, + "loss": 51.7622, + "step": 24140 + }, + { + "epoch": 0.09756905586282963, + "grad_norm": 996.9224853515625, + "learning_rate": 9.72673127194644e-06, + "loss": 49.3176, + "step": 24150 + }, + { + "epoch": 0.09760945712819726, + "grad_norm": 469.8564147949219, + "learning_rate": 9.72627120808316e-06, + "loss": 50.8814, + "step": 24160 + }, + { + "epoch": 0.09764985839356488, + "grad_norm": 656.7633666992188, + "learning_rate": 9.725810768172169e-06, + "loss": 42.3984, + "step": 24170 + }, + { + "epoch": 0.09769025965893252, + "grad_norm": 842.8346557617188, + "learning_rate": 9.725349952250105e-06, + "loss": 68.7092, + "step": 24180 + }, + { + "epoch": 0.09773066092430015, + "grad_norm": 724.8569946289062, + "learning_rate": 9.724888760353631e-06, + "loss": 54.5133, + "step": 24190 + }, + { + "epoch": 0.09777106218966779, + "grad_norm": 1168.2147216796875, + "learning_rate": 9.72442719251944e-06, + "loss": 44.4517, + "step": 24200 + }, + { + "epoch": 0.09781146345503541, + "grad_norm": 905.7360229492188, + "learning_rate": 9.723965248784264e-06, + "loss": 67.6947, + "step": 24210 + }, + { + "epoch": 0.09785186472040304, + "grad_norm": 591.1112670898438, + "learning_rate": 9.723502929184851e-06, + "loss": 45.0093, + "step": 24220 + }, + { + "epoch": 0.09789226598577068, + "grad_norm": 1430.4012451171875, + "learning_rate": 9.723040233757987e-06, + "loss": 71.5288, + "step": 24230 + }, + { + "epoch": 0.0979326672511383, + "grad_norm": 675.6157836914062, + "learning_rate": 9.722577162540489e-06, + "loss": 46.933, + "step": 24240 + }, + { + "epoch": 0.09797306851650593, + "grad_norm": 528.8775024414062, + "learning_rate": 9.7221137155692e-06, + "loss": 50.8753, + "step": 24250 + }, + { + "epoch": 0.09801346978187357, + "grad_norm": 880.9699096679688, + "learning_rate": 9.721649892880995e-06, + "loss": 69.2766, + "step": 24260 + }, + { + "epoch": 0.0980538710472412, + "grad_norm": 1034.8077392578125, + "learning_rate": 9.721185694512776e-06, + "loss": 41.0543, + "step": 24270 + }, + { + "epoch": 0.09809427231260884, + "grad_norm": 1290.8067626953125, + "learning_rate": 9.720721120501478e-06, + "loss": 41.7326, + "step": 24280 + }, + { + "epoch": 0.09813467357797646, + "grad_norm": 617.8194580078125, + "learning_rate": 9.720256170884066e-06, + "loss": 45.3977, + "step": 24290 + }, + { + "epoch": 0.09817507484334409, + "grad_norm": 480.2484436035156, + "learning_rate": 9.719790845697534e-06, + "loss": 37.6541, + "step": 24300 + }, + { + "epoch": 0.09821547610871173, + "grad_norm": 569.1683959960938, + "learning_rate": 9.719325144978907e-06, + "loss": 50.1147, + "step": 24310 + }, + { + "epoch": 0.09825587737407936, + "grad_norm": 638.9716796875, + "learning_rate": 9.718859068765234e-06, + "loss": 56.0363, + "step": 24320 + }, + { + "epoch": 0.09829627863944698, + "grad_norm": 638.63525390625, + "learning_rate": 9.718392617093602e-06, + "loss": 44.4817, + "step": 24330 + }, + { + "epoch": 0.09833667990481462, + "grad_norm": 538.0601806640625, + "learning_rate": 9.717925790001125e-06, + "loss": 35.1493, + "step": 24340 + }, + { + "epoch": 0.09837708117018225, + "grad_norm": 655.0274047851562, + "learning_rate": 9.717458587524946e-06, + "loss": 35.8556, + "step": 24350 + }, + { + "epoch": 0.09841748243554987, + "grad_norm": 2356.542724609375, + "learning_rate": 9.716991009702236e-06, + "loss": 77.9995, + "step": 24360 + }, + { + "epoch": 0.09845788370091751, + "grad_norm": 423.50372314453125, + "learning_rate": 9.7165230565702e-06, + "loss": 52.0112, + "step": 24370 + }, + { + "epoch": 0.09849828496628514, + "grad_norm": 596.177001953125, + "learning_rate": 9.71605472816607e-06, + "loss": 68.7233, + "step": 24380 + }, + { + "epoch": 0.09853868623165278, + "grad_norm": 848.5178833007812, + "learning_rate": 9.71558602452711e-06, + "loss": 56.1421, + "step": 24390 + }, + { + "epoch": 0.0985790874970204, + "grad_norm": 548.5185546875, + "learning_rate": 9.71511694569061e-06, + "loss": 57.7374, + "step": 24400 + }, + { + "epoch": 0.09861948876238803, + "grad_norm": 959.719970703125, + "learning_rate": 9.714647491693897e-06, + "loss": 48.1545, + "step": 24410 + }, + { + "epoch": 0.09865989002775567, + "grad_norm": 1168.14794921875, + "learning_rate": 9.714177662574316e-06, + "loss": 60.9899, + "step": 24420 + }, + { + "epoch": 0.0987002912931233, + "grad_norm": 834.342041015625, + "learning_rate": 9.713707458369258e-06, + "loss": 37.8764, + "step": 24430 + }, + { + "epoch": 0.09874069255849093, + "grad_norm": 506.5158386230469, + "learning_rate": 9.713236879116127e-06, + "loss": 63.5037, + "step": 24440 + }, + { + "epoch": 0.09878109382385857, + "grad_norm": 651.78759765625, + "learning_rate": 9.71276592485237e-06, + "loss": 72.3737, + "step": 24450 + }, + { + "epoch": 0.09882149508922619, + "grad_norm": 760.8204345703125, + "learning_rate": 9.712294595615458e-06, + "loss": 52.8254, + "step": 24460 + }, + { + "epoch": 0.09886189635459383, + "grad_norm": 928.4266967773438, + "learning_rate": 9.711822891442887e-06, + "loss": 47.5478, + "step": 24470 + }, + { + "epoch": 0.09890229761996146, + "grad_norm": 553.4059448242188, + "learning_rate": 9.711350812372198e-06, + "loss": 35.1253, + "step": 24480 + }, + { + "epoch": 0.09894269888532908, + "grad_norm": 774.9642944335938, + "learning_rate": 9.710878358440945e-06, + "loss": 90.113, + "step": 24490 + }, + { + "epoch": 0.09898310015069672, + "grad_norm": 762.3937377929688, + "learning_rate": 9.710405529686722e-06, + "loss": 60.1524, + "step": 24500 + }, + { + "epoch": 0.09902350141606435, + "grad_norm": 754.8513793945312, + "learning_rate": 9.709932326147147e-06, + "loss": 57.3479, + "step": 24510 + }, + { + "epoch": 0.09906390268143198, + "grad_norm": 2144.4775390625, + "learning_rate": 9.709458747859874e-06, + "loss": 57.9094, + "step": 24520 + }, + { + "epoch": 0.09910430394679962, + "grad_norm": 404.76617431640625, + "learning_rate": 9.708984794862581e-06, + "loss": 50.33, + "step": 24530 + }, + { + "epoch": 0.09914470521216724, + "grad_norm": 587.0454711914062, + "learning_rate": 9.708510467192981e-06, + "loss": 42.9969, + "step": 24540 + }, + { + "epoch": 0.09918510647753488, + "grad_norm": 457.4508056640625, + "learning_rate": 9.70803576488881e-06, + "loss": 47.6615, + "step": 24550 + }, + { + "epoch": 0.09922550774290251, + "grad_norm": 676.8587646484375, + "learning_rate": 9.707560687987843e-06, + "loss": 48.9946, + "step": 24560 + }, + { + "epoch": 0.09926590900827013, + "grad_norm": 1258.223388671875, + "learning_rate": 9.707085236527873e-06, + "loss": 67.3204, + "step": 24570 + }, + { + "epoch": 0.09930631027363777, + "grad_norm": 503.6045837402344, + "learning_rate": 9.706609410546736e-06, + "loss": 40.1334, + "step": 24580 + }, + { + "epoch": 0.0993467115390054, + "grad_norm": 786.1532592773438, + "learning_rate": 9.706133210082288e-06, + "loss": 54.2325, + "step": 24590 + }, + { + "epoch": 0.09938711280437303, + "grad_norm": 456.151123046875, + "learning_rate": 9.705656635172418e-06, + "loss": 62.9975, + "step": 24600 + }, + { + "epoch": 0.09942751406974067, + "grad_norm": 651.8788452148438, + "learning_rate": 9.705179685855048e-06, + "loss": 44.2353, + "step": 24610 + }, + { + "epoch": 0.09946791533510829, + "grad_norm": 685.383056640625, + "learning_rate": 9.704702362168121e-06, + "loss": 38.0583, + "step": 24620 + }, + { + "epoch": 0.09950831660047593, + "grad_norm": 366.0917053222656, + "learning_rate": 9.704224664149621e-06, + "loss": 37.4004, + "step": 24630 + }, + { + "epoch": 0.09954871786584356, + "grad_norm": 546.1963500976562, + "learning_rate": 9.703746591837552e-06, + "loss": 50.105, + "step": 24640 + }, + { + "epoch": 0.09958911913121118, + "grad_norm": 707.4985961914062, + "learning_rate": 9.703268145269957e-06, + "loss": 45.0067, + "step": 24650 + }, + { + "epoch": 0.09962952039657882, + "grad_norm": 918.8980712890625, + "learning_rate": 9.702789324484898e-06, + "loss": 38.0209, + "step": 24660 + }, + { + "epoch": 0.09966992166194645, + "grad_norm": 1020.766845703125, + "learning_rate": 9.702310129520476e-06, + "loss": 62.4222, + "step": 24670 + }, + { + "epoch": 0.09971032292731408, + "grad_norm": 1279.9139404296875, + "learning_rate": 9.701830560414817e-06, + "loss": 49.9268, + "step": 24680 + }, + { + "epoch": 0.09975072419268172, + "grad_norm": 896.4984130859375, + "learning_rate": 9.701350617206081e-06, + "loss": 60.7759, + "step": 24690 + }, + { + "epoch": 0.09979112545804934, + "grad_norm": 583.581298828125, + "learning_rate": 9.700870299932453e-06, + "loss": 33.8629, + "step": 24700 + }, + { + "epoch": 0.09983152672341698, + "grad_norm": 683.1466674804688, + "learning_rate": 9.700389608632146e-06, + "loss": 65.077, + "step": 24710 + }, + { + "epoch": 0.09987192798878461, + "grad_norm": 742.3271484375, + "learning_rate": 9.699908543343413e-06, + "loss": 44.0271, + "step": 24720 + }, + { + "epoch": 0.09991232925415224, + "grad_norm": 280.29315185546875, + "learning_rate": 9.699427104104525e-06, + "loss": 53.0423, + "step": 24730 + }, + { + "epoch": 0.09995273051951988, + "grad_norm": 834.8994750976562, + "learning_rate": 9.698945290953789e-06, + "loss": 65.0682, + "step": 24740 + }, + { + "epoch": 0.0999931317848875, + "grad_norm": 1069.6033935546875, + "learning_rate": 9.698463103929542e-06, + "loss": 61.3382, + "step": 24750 + }, + { + "epoch": 0.10003353305025513, + "grad_norm": 301.9818115234375, + "learning_rate": 9.69798054307015e-06, + "loss": 42.7463, + "step": 24760 + }, + { + "epoch": 0.10007393431562277, + "grad_norm": 563.1431884765625, + "learning_rate": 9.697497608414007e-06, + "loss": 42.7955, + "step": 24770 + }, + { + "epoch": 0.1001143355809904, + "grad_norm": 715.0123291015625, + "learning_rate": 9.697014299999536e-06, + "loss": 37.4068, + "step": 24780 + }, + { + "epoch": 0.10015473684635803, + "grad_norm": 946.2361450195312, + "learning_rate": 9.696530617865197e-06, + "loss": 61.5986, + "step": 24790 + }, + { + "epoch": 0.10019513811172566, + "grad_norm": 630.42626953125, + "learning_rate": 9.696046562049469e-06, + "loss": 56.6305, + "step": 24800 + }, + { + "epoch": 0.10023553937709329, + "grad_norm": 725.95947265625, + "learning_rate": 9.695562132590865e-06, + "loss": 60.9256, + "step": 24810 + }, + { + "epoch": 0.10027594064246093, + "grad_norm": 868.1839599609375, + "learning_rate": 9.695077329527936e-06, + "loss": 53.6544, + "step": 24820 + }, + { + "epoch": 0.10031634190782855, + "grad_norm": 817.20751953125, + "learning_rate": 9.694592152899249e-06, + "loss": 89.8513, + "step": 24830 + }, + { + "epoch": 0.10035674317319618, + "grad_norm": 603.2654418945312, + "learning_rate": 9.694106602743411e-06, + "loss": 62.9391, + "step": 24840 + }, + { + "epoch": 0.10039714443856382, + "grad_norm": 190.94393920898438, + "learning_rate": 9.693620679099055e-06, + "loss": 50.3173, + "step": 24850 + }, + { + "epoch": 0.10043754570393144, + "grad_norm": 761.8203735351562, + "learning_rate": 9.693134382004839e-06, + "loss": 74.0534, + "step": 24860 + }, + { + "epoch": 0.10047794696929908, + "grad_norm": 860.0404663085938, + "learning_rate": 9.69264771149946e-06, + "loss": 58.5714, + "step": 24870 + }, + { + "epoch": 0.10051834823466671, + "grad_norm": 734.0036010742188, + "learning_rate": 9.692160667621639e-06, + "loss": 52.3132, + "step": 24880 + }, + { + "epoch": 0.10055874950003434, + "grad_norm": 894.4503784179688, + "learning_rate": 9.69167325041013e-06, + "loss": 28.4199, + "step": 24890 + }, + { + "epoch": 0.10059915076540198, + "grad_norm": 394.6111145019531, + "learning_rate": 9.69118545990371e-06, + "loss": 49.8315, + "step": 24900 + }, + { + "epoch": 0.1006395520307696, + "grad_norm": 600.8136596679688, + "learning_rate": 9.690697296141194e-06, + "loss": 40.0911, + "step": 24910 + }, + { + "epoch": 0.10067995329613723, + "grad_norm": 710.9945678710938, + "learning_rate": 9.690208759161418e-06, + "loss": 42.2445, + "step": 24920 + }, + { + "epoch": 0.10072035456150487, + "grad_norm": 516.5250244140625, + "learning_rate": 9.689719849003261e-06, + "loss": 40.4775, + "step": 24930 + }, + { + "epoch": 0.1007607558268725, + "grad_norm": 948.2924194335938, + "learning_rate": 9.689230565705617e-06, + "loss": 54.2192, + "step": 24940 + }, + { + "epoch": 0.10080115709224013, + "grad_norm": 692.30712890625, + "learning_rate": 9.688740909307416e-06, + "loss": 42.3075, + "step": 24950 + }, + { + "epoch": 0.10084155835760776, + "grad_norm": 279.9989013671875, + "learning_rate": 9.68825087984762e-06, + "loss": 44.2146, + "step": 24960 + }, + { + "epoch": 0.10088195962297539, + "grad_norm": 425.18505859375, + "learning_rate": 9.687760477365217e-06, + "loss": 26.8136, + "step": 24970 + }, + { + "epoch": 0.10092236088834303, + "grad_norm": 1031.68115234375, + "learning_rate": 9.687269701899228e-06, + "loss": 73.9425, + "step": 24980 + }, + { + "epoch": 0.10096276215371065, + "grad_norm": 873.32763671875, + "learning_rate": 9.6867785534887e-06, + "loss": 50.5244, + "step": 24990 + }, + { + "epoch": 0.10100316341907828, + "grad_norm": 332.6380920410156, + "learning_rate": 9.686287032172712e-06, + "loss": 49.424, + "step": 25000 + }, + { + "epoch": 0.10104356468444592, + "grad_norm": 6846.8583984375, + "learning_rate": 9.685795137990372e-06, + "loss": 97.2676, + "step": 25010 + }, + { + "epoch": 0.10108396594981355, + "grad_norm": 860.862548828125, + "learning_rate": 9.685302870980819e-06, + "loss": 80.9092, + "step": 25020 + }, + { + "epoch": 0.10112436721518119, + "grad_norm": 712.3943481445312, + "learning_rate": 9.684810231183218e-06, + "loss": 48.4841, + "step": 25030 + }, + { + "epoch": 0.10116476848054881, + "grad_norm": 820.1293334960938, + "learning_rate": 9.684317218636767e-06, + "loss": 42.9877, + "step": 25040 + }, + { + "epoch": 0.10120516974591644, + "grad_norm": 782.7633056640625, + "learning_rate": 9.683823833380692e-06, + "loss": 59.5399, + "step": 25050 + }, + { + "epoch": 0.10124557101128408, + "grad_norm": 718.3815307617188, + "learning_rate": 9.683330075454252e-06, + "loss": 53.6291, + "step": 25060 + }, + { + "epoch": 0.1012859722766517, + "grad_norm": 488.6388854980469, + "learning_rate": 9.68283594489673e-06, + "loss": 53.4475, + "step": 25070 + }, + { + "epoch": 0.10132637354201933, + "grad_norm": 604.8165283203125, + "learning_rate": 9.682341441747446e-06, + "loss": 48.3369, + "step": 25080 + }, + { + "epoch": 0.10136677480738697, + "grad_norm": 1349.2950439453125, + "learning_rate": 9.68184656604574e-06, + "loss": 58.4348, + "step": 25090 + }, + { + "epoch": 0.1014071760727546, + "grad_norm": 1390.267333984375, + "learning_rate": 9.681351317830991e-06, + "loss": 47.6931, + "step": 25100 + }, + { + "epoch": 0.10144757733812224, + "grad_norm": 280.08746337890625, + "learning_rate": 9.680855697142601e-06, + "loss": 64.118, + "step": 25110 + }, + { + "epoch": 0.10148797860348986, + "grad_norm": 441.5658264160156, + "learning_rate": 9.680359704020005e-06, + "loss": 48.1863, + "step": 25120 + }, + { + "epoch": 0.10152837986885749, + "grad_norm": 354.169921875, + "learning_rate": 9.67986333850267e-06, + "loss": 45.0124, + "step": 25130 + }, + { + "epoch": 0.10156878113422513, + "grad_norm": 281.8157043457031, + "learning_rate": 9.679366600630085e-06, + "loss": 74.9676, + "step": 25140 + }, + { + "epoch": 0.10160918239959275, + "grad_norm": 355.419921875, + "learning_rate": 9.678869490441775e-06, + "loss": 57.2231, + "step": 25150 + }, + { + "epoch": 0.10164958366496038, + "grad_norm": 700.468505859375, + "learning_rate": 9.678372007977292e-06, + "loss": 58.7536, + "step": 25160 + }, + { + "epoch": 0.10168998493032802, + "grad_norm": 935.990478515625, + "learning_rate": 9.67787415327622e-06, + "loss": 60.8175, + "step": 25170 + }, + { + "epoch": 0.10173038619569565, + "grad_norm": 727.43994140625, + "learning_rate": 9.67737592637817e-06, + "loss": 61.9682, + "step": 25180 + }, + { + "epoch": 0.10177078746106329, + "grad_norm": 507.2603759765625, + "learning_rate": 9.676877327322785e-06, + "loss": 67.5783, + "step": 25190 + }, + { + "epoch": 0.10181118872643091, + "grad_norm": 652.1765747070312, + "learning_rate": 9.676378356149733e-06, + "loss": 41.8282, + "step": 25200 + }, + { + "epoch": 0.10185158999179854, + "grad_norm": 751.2033081054688, + "learning_rate": 9.675879012898719e-06, + "loss": 55.6442, + "step": 25210 + }, + { + "epoch": 0.10189199125716618, + "grad_norm": 429.15814208984375, + "learning_rate": 9.67537929760947e-06, + "loss": 31.6516, + "step": 25220 + }, + { + "epoch": 0.1019323925225338, + "grad_norm": 612.541015625, + "learning_rate": 9.674879210321747e-06, + "loss": 58.0954, + "step": 25230 + }, + { + "epoch": 0.10197279378790143, + "grad_norm": 3285.99853515625, + "learning_rate": 9.67437875107534e-06, + "loss": 90.851, + "step": 25240 + }, + { + "epoch": 0.10201319505326907, + "grad_norm": 482.5252990722656, + "learning_rate": 9.673877919910069e-06, + "loss": 43.1322, + "step": 25250 + }, + { + "epoch": 0.1020535963186367, + "grad_norm": 661.02294921875, + "learning_rate": 9.673376716865781e-06, + "loss": 59.0004, + "step": 25260 + }, + { + "epoch": 0.10209399758400434, + "grad_norm": 734.8023681640625, + "learning_rate": 9.672875141982358e-06, + "loss": 55.4186, + "step": 25270 + }, + { + "epoch": 0.10213439884937196, + "grad_norm": 605.2401123046875, + "learning_rate": 9.672373195299704e-06, + "loss": 43.0386, + "step": 25280 + }, + { + "epoch": 0.10217480011473959, + "grad_norm": 489.87982177734375, + "learning_rate": 9.67187087685776e-06, + "loss": 48.6595, + "step": 25290 + }, + { + "epoch": 0.10221520138010723, + "grad_norm": 468.3460693359375, + "learning_rate": 9.671368186696488e-06, + "loss": 44.0622, + "step": 25300 + }, + { + "epoch": 0.10225560264547486, + "grad_norm": 1122.7103271484375, + "learning_rate": 9.670865124855889e-06, + "loss": 53.3322, + "step": 25310 + }, + { + "epoch": 0.10229600391084248, + "grad_norm": 437.603515625, + "learning_rate": 9.67036169137599e-06, + "loss": 58.5753, + "step": 25320 + }, + { + "epoch": 0.10233640517621012, + "grad_norm": 671.1591796875, + "learning_rate": 9.669857886296842e-06, + "loss": 52.2499, + "step": 25330 + }, + { + "epoch": 0.10237680644157775, + "grad_norm": 412.7358703613281, + "learning_rate": 9.669353709658537e-06, + "loss": 75.7357, + "step": 25340 + }, + { + "epoch": 0.10241720770694539, + "grad_norm": 769.44775390625, + "learning_rate": 9.668849161501186e-06, + "loss": 50.5775, + "step": 25350 + }, + { + "epoch": 0.10245760897231301, + "grad_norm": 321.61749267578125, + "learning_rate": 9.668344241864934e-06, + "loss": 42.0402, + "step": 25360 + }, + { + "epoch": 0.10249801023768064, + "grad_norm": 645.4732055664062, + "learning_rate": 9.667838950789957e-06, + "loss": 77.0858, + "step": 25370 + }, + { + "epoch": 0.10253841150304828, + "grad_norm": 922.1859130859375, + "learning_rate": 9.667333288316454e-06, + "loss": 69.5223, + "step": 25380 + }, + { + "epoch": 0.1025788127684159, + "grad_norm": 683.8822631835938, + "learning_rate": 9.666827254484663e-06, + "loss": 43.2432, + "step": 25390 + }, + { + "epoch": 0.10261921403378353, + "grad_norm": 686.84423828125, + "learning_rate": 9.666320849334846e-06, + "loss": 67.6858, + "step": 25400 + }, + { + "epoch": 0.10265961529915117, + "grad_norm": 376.72894287109375, + "learning_rate": 9.665814072907293e-06, + "loss": 45.7562, + "step": 25410 + }, + { + "epoch": 0.1027000165645188, + "grad_norm": 690.8438720703125, + "learning_rate": 9.665306925242329e-06, + "loss": 50.5314, + "step": 25420 + }, + { + "epoch": 0.10274041782988644, + "grad_norm": 519.7843017578125, + "learning_rate": 9.664799406380302e-06, + "loss": 35.6388, + "step": 25430 + }, + { + "epoch": 0.10278081909525406, + "grad_norm": 739.2352294921875, + "learning_rate": 9.664291516361597e-06, + "loss": 56.1373, + "step": 25440 + }, + { + "epoch": 0.10282122036062169, + "grad_norm": 564.990966796875, + "learning_rate": 9.663783255226622e-06, + "loss": 71.4753, + "step": 25450 + }, + { + "epoch": 0.10286162162598933, + "grad_norm": 643.6279907226562, + "learning_rate": 9.663274623015816e-06, + "loss": 59.0967, + "step": 25460 + }, + { + "epoch": 0.10290202289135696, + "grad_norm": 794.2958984375, + "learning_rate": 9.662765619769651e-06, + "loss": 58.3527, + "step": 25470 + }, + { + "epoch": 0.10294242415672458, + "grad_norm": 868.2996826171875, + "learning_rate": 9.662256245528622e-06, + "loss": 43.9523, + "step": 25480 + }, + { + "epoch": 0.10298282542209222, + "grad_norm": 301.7526550292969, + "learning_rate": 9.661746500333265e-06, + "loss": 36.3696, + "step": 25490 + }, + { + "epoch": 0.10302322668745985, + "grad_norm": 802.8497924804688, + "learning_rate": 9.66123638422413e-06, + "loss": 58.9308, + "step": 25500 + }, + { + "epoch": 0.10306362795282749, + "grad_norm": 747.5589599609375, + "learning_rate": 9.66072589724181e-06, + "loss": 45.8074, + "step": 25510 + }, + { + "epoch": 0.10310402921819511, + "grad_norm": 474.6321716308594, + "learning_rate": 9.66021503942692e-06, + "loss": 58.1529, + "step": 25520 + }, + { + "epoch": 0.10314443048356274, + "grad_norm": 289.06378173828125, + "learning_rate": 9.659703810820105e-06, + "loss": 57.4549, + "step": 25530 + }, + { + "epoch": 0.10318483174893038, + "grad_norm": 699.4983520507812, + "learning_rate": 9.659192211462043e-06, + "loss": 72.7736, + "step": 25540 + }, + { + "epoch": 0.103225233014298, + "grad_norm": 823.0641479492188, + "learning_rate": 9.658680241393441e-06, + "loss": 53.6581, + "step": 25550 + }, + { + "epoch": 0.10326563427966563, + "grad_norm": 486.10650634765625, + "learning_rate": 9.658167900655032e-06, + "loss": 36.218, + "step": 25560 + }, + { + "epoch": 0.10330603554503327, + "grad_norm": 566.1380004882812, + "learning_rate": 9.657655189287582e-06, + "loss": 44.0325, + "step": 25570 + }, + { + "epoch": 0.1033464368104009, + "grad_norm": 264.7056884765625, + "learning_rate": 9.657142107331883e-06, + "loss": 41.3755, + "step": 25580 + }, + { + "epoch": 0.10338683807576854, + "grad_norm": 781.30712890625, + "learning_rate": 9.65662865482876e-06, + "loss": 51.5806, + "step": 25590 + }, + { + "epoch": 0.10342723934113617, + "grad_norm": 468.07794189453125, + "learning_rate": 9.656114831819067e-06, + "loss": 49.1324, + "step": 25600 + }, + { + "epoch": 0.10346764060650379, + "grad_norm": 287.0177917480469, + "learning_rate": 9.655600638343685e-06, + "loss": 51.1372, + "step": 25610 + }, + { + "epoch": 0.10350804187187143, + "grad_norm": 979.0823364257812, + "learning_rate": 9.655086074443527e-06, + "loss": 46.1757, + "step": 25620 + }, + { + "epoch": 0.10354844313723906, + "grad_norm": 790.9038696289062, + "learning_rate": 9.654571140159534e-06, + "loss": 60.9249, + "step": 25630 + }, + { + "epoch": 0.10358884440260668, + "grad_norm": 796.7022705078125, + "learning_rate": 9.654055835532676e-06, + "loss": 71.7788, + "step": 25640 + }, + { + "epoch": 0.10362924566797432, + "grad_norm": 573.487548828125, + "learning_rate": 9.653540160603956e-06, + "loss": 61.6155, + "step": 25650 + }, + { + "epoch": 0.10366964693334195, + "grad_norm": 654.7489013671875, + "learning_rate": 9.653024115414402e-06, + "loss": 46.4036, + "step": 25660 + }, + { + "epoch": 0.10371004819870959, + "grad_norm": 797.578369140625, + "learning_rate": 9.652507700005072e-06, + "loss": 45.1721, + "step": 25670 + }, + { + "epoch": 0.10375044946407722, + "grad_norm": 274.95050048828125, + "learning_rate": 9.651990914417057e-06, + "loss": 34.9745, + "step": 25680 + }, + { + "epoch": 0.10379085072944484, + "grad_norm": 552.0946044921875, + "learning_rate": 9.651473758691477e-06, + "loss": 53.0471, + "step": 25690 + }, + { + "epoch": 0.10383125199481248, + "grad_norm": 955.2523803710938, + "learning_rate": 9.650956232869475e-06, + "loss": 52.053, + "step": 25700 + }, + { + "epoch": 0.10387165326018011, + "grad_norm": 625.6551513671875, + "learning_rate": 9.650438336992231e-06, + "loss": 56.149, + "step": 25710 + }, + { + "epoch": 0.10391205452554773, + "grad_norm": 296.4748229980469, + "learning_rate": 9.64992007110095e-06, + "loss": 41.5693, + "step": 25720 + }, + { + "epoch": 0.10395245579091537, + "grad_norm": 962.1800537109375, + "learning_rate": 9.64940143523687e-06, + "loss": 86.3123, + "step": 25730 + }, + { + "epoch": 0.103992857056283, + "grad_norm": 618.9432373046875, + "learning_rate": 9.648882429441258e-06, + "loss": 49.0561, + "step": 25740 + }, + { + "epoch": 0.10403325832165064, + "grad_norm": 846.5286254882812, + "learning_rate": 9.648363053755406e-06, + "loss": 54.6641, + "step": 25750 + }, + { + "epoch": 0.10407365958701827, + "grad_norm": 963.1185913085938, + "learning_rate": 9.647843308220636e-06, + "loss": 55.3922, + "step": 25760 + }, + { + "epoch": 0.10411406085238589, + "grad_norm": 1572.095703125, + "learning_rate": 9.647323192878306e-06, + "loss": 59.8284, + "step": 25770 + }, + { + "epoch": 0.10415446211775353, + "grad_norm": 809.3494262695312, + "learning_rate": 9.646802707769798e-06, + "loss": 42.1801, + "step": 25780 + }, + { + "epoch": 0.10419486338312116, + "grad_norm": 745.4800415039062, + "learning_rate": 9.646281852936525e-06, + "loss": 42.9003, + "step": 25790 + }, + { + "epoch": 0.10423526464848878, + "grad_norm": 335.8274841308594, + "learning_rate": 9.64576062841993e-06, + "loss": 51.5139, + "step": 25800 + }, + { + "epoch": 0.10427566591385642, + "grad_norm": 529.6254272460938, + "learning_rate": 9.64523903426148e-06, + "loss": 40.3054, + "step": 25810 + }, + { + "epoch": 0.10431606717922405, + "grad_norm": 672.9226684570312, + "learning_rate": 9.64471707050268e-06, + "loss": 52.9082, + "step": 25820 + }, + { + "epoch": 0.10435646844459169, + "grad_norm": 506.335693359375, + "learning_rate": 9.644194737185058e-06, + "loss": 62.1664, + "step": 25830 + }, + { + "epoch": 0.10439686970995932, + "grad_norm": 320.349853515625, + "learning_rate": 9.643672034350177e-06, + "loss": 56.1261, + "step": 25840 + }, + { + "epoch": 0.10443727097532694, + "grad_norm": 425.2008056640625, + "learning_rate": 9.643148962039622e-06, + "loss": 46.4004, + "step": 25850 + }, + { + "epoch": 0.10447767224069458, + "grad_norm": 435.9482727050781, + "learning_rate": 9.642625520295014e-06, + "loss": 46.6332, + "step": 25860 + }, + { + "epoch": 0.10451807350606221, + "grad_norm": 808.4017333984375, + "learning_rate": 9.642101709158001e-06, + "loss": 50.6643, + "step": 25870 + }, + { + "epoch": 0.10455847477142984, + "grad_norm": 464.9044494628906, + "learning_rate": 9.641577528670257e-06, + "loss": 55.6681, + "step": 25880 + }, + { + "epoch": 0.10459887603679748, + "grad_norm": 979.295166015625, + "learning_rate": 9.641052978873494e-06, + "loss": 68.0048, + "step": 25890 + }, + { + "epoch": 0.1046392773021651, + "grad_norm": 732.1350708007812, + "learning_rate": 9.640528059809442e-06, + "loss": 57.266, + "step": 25900 + }, + { + "epoch": 0.10467967856753274, + "grad_norm": 1971.1300048828125, + "learning_rate": 9.640002771519872e-06, + "loss": 54.0411, + "step": 25910 + }, + { + "epoch": 0.10472007983290037, + "grad_norm": 994.9387817382812, + "learning_rate": 9.639477114046575e-06, + "loss": 42.02, + "step": 25920 + }, + { + "epoch": 0.104760481098268, + "grad_norm": 358.152587890625, + "learning_rate": 9.638951087431376e-06, + "loss": 40.5045, + "step": 25930 + }, + { + "epoch": 0.10480088236363563, + "grad_norm": 386.483154296875, + "learning_rate": 9.638424691716129e-06, + "loss": 66.2506, + "step": 25940 + }, + { + "epoch": 0.10484128362900326, + "grad_norm": 737.9490356445312, + "learning_rate": 9.637897926942716e-06, + "loss": 69.654, + "step": 25950 + }, + { + "epoch": 0.10488168489437089, + "grad_norm": 597.8994750976562, + "learning_rate": 9.637370793153051e-06, + "loss": 50.6889, + "step": 25960 + }, + { + "epoch": 0.10492208615973853, + "grad_norm": 912.8892822265625, + "learning_rate": 9.636843290389076e-06, + "loss": 53.4189, + "step": 25970 + }, + { + "epoch": 0.10496248742510615, + "grad_norm": 1086.555419921875, + "learning_rate": 9.636315418692759e-06, + "loss": 62.2072, + "step": 25980 + }, + { + "epoch": 0.10500288869047379, + "grad_norm": 701.8748779296875, + "learning_rate": 9.635787178106102e-06, + "loss": 48.1612, + "step": 25990 + }, + { + "epoch": 0.10504328995584142, + "grad_norm": 594.7928466796875, + "learning_rate": 9.635258568671135e-06, + "loss": 39.6997, + "step": 26000 + }, + { + "epoch": 0.10508369122120904, + "grad_norm": 475.304443359375, + "learning_rate": 9.634729590429917e-06, + "loss": 42.5213, + "step": 26010 + }, + { + "epoch": 0.10512409248657668, + "grad_norm": 553.9869995117188, + "learning_rate": 9.634200243424535e-06, + "loss": 40.3878, + "step": 26020 + }, + { + "epoch": 0.10516449375194431, + "grad_norm": 477.8350830078125, + "learning_rate": 9.633670527697108e-06, + "loss": 52.4336, + "step": 26030 + }, + { + "epoch": 0.10520489501731194, + "grad_norm": 876.521484375, + "learning_rate": 9.633140443289784e-06, + "loss": 49.6513, + "step": 26040 + }, + { + "epoch": 0.10524529628267958, + "grad_norm": 918.1558227539062, + "learning_rate": 9.632609990244737e-06, + "loss": 45.0479, + "step": 26050 + }, + { + "epoch": 0.1052856975480472, + "grad_norm": 1112.887939453125, + "learning_rate": 9.632079168604175e-06, + "loss": 48.0344, + "step": 26060 + }, + { + "epoch": 0.10532609881341484, + "grad_norm": 494.50140380859375, + "learning_rate": 9.63154797841033e-06, + "loss": 43.9376, + "step": 26070 + }, + { + "epoch": 0.10536650007878247, + "grad_norm": 639.7578735351562, + "learning_rate": 9.63101641970547e-06, + "loss": 57.9903, + "step": 26080 + }, + { + "epoch": 0.1054069013441501, + "grad_norm": 959.657470703125, + "learning_rate": 9.630484492531886e-06, + "loss": 50.5194, + "step": 26090 + }, + { + "epoch": 0.10544730260951773, + "grad_norm": 541.671875, + "learning_rate": 9.629952196931902e-06, + "loss": 29.0569, + "step": 26100 + }, + { + "epoch": 0.10548770387488536, + "grad_norm": 867.1376342773438, + "learning_rate": 9.629419532947872e-06, + "loss": 45.2668, + "step": 26110 + }, + { + "epoch": 0.10552810514025299, + "grad_norm": 859.1763305664062, + "learning_rate": 9.628886500622174e-06, + "loss": 38.3891, + "step": 26120 + }, + { + "epoch": 0.10556850640562063, + "grad_norm": 656.4653930664062, + "learning_rate": 9.62835309999722e-06, + "loss": 39.2543, + "step": 26130 + }, + { + "epoch": 0.10560890767098825, + "grad_norm": 630.9413452148438, + "learning_rate": 9.627819331115453e-06, + "loss": 45.4982, + "step": 26140 + }, + { + "epoch": 0.10564930893635589, + "grad_norm": 902.5018310546875, + "learning_rate": 9.627285194019342e-06, + "loss": 76.9234, + "step": 26150 + }, + { + "epoch": 0.10568971020172352, + "grad_norm": 460.1519775390625, + "learning_rate": 9.626750688751382e-06, + "loss": 49.7342, + "step": 26160 + }, + { + "epoch": 0.10573011146709115, + "grad_norm": 1033.299560546875, + "learning_rate": 9.626215815354104e-06, + "loss": 61.9341, + "step": 26170 + }, + { + "epoch": 0.10577051273245879, + "grad_norm": 726.0341186523438, + "learning_rate": 9.625680573870067e-06, + "loss": 42.1929, + "step": 26180 + }, + { + "epoch": 0.10581091399782641, + "grad_norm": 466.17462158203125, + "learning_rate": 9.625144964341853e-06, + "loss": 42.927, + "step": 26190 + }, + { + "epoch": 0.10585131526319404, + "grad_norm": 163.19650268554688, + "learning_rate": 9.624608986812082e-06, + "loss": 50.3991, + "step": 26200 + }, + { + "epoch": 0.10589171652856168, + "grad_norm": 844.4330444335938, + "learning_rate": 9.624072641323398e-06, + "loss": 47.5053, + "step": 26210 + }, + { + "epoch": 0.1059321177939293, + "grad_norm": 477.6076354980469, + "learning_rate": 9.623535927918474e-06, + "loss": 54.595, + "step": 26220 + }, + { + "epoch": 0.10597251905929694, + "grad_norm": 629.3731079101562, + "learning_rate": 9.622998846640018e-06, + "loss": 51.9192, + "step": 26230 + }, + { + "epoch": 0.10601292032466457, + "grad_norm": 1155.002197265625, + "learning_rate": 9.62246139753076e-06, + "loss": 39.1533, + "step": 26240 + }, + { + "epoch": 0.1060533215900322, + "grad_norm": 685.2892456054688, + "learning_rate": 9.621923580633462e-06, + "loss": 25.9385, + "step": 26250 + }, + { + "epoch": 0.10609372285539984, + "grad_norm": 351.75091552734375, + "learning_rate": 9.621385395990915e-06, + "loss": 33.7549, + "step": 26260 + }, + { + "epoch": 0.10613412412076746, + "grad_norm": 994.1544189453125, + "learning_rate": 9.620846843645944e-06, + "loss": 65.0634, + "step": 26270 + }, + { + "epoch": 0.10617452538613509, + "grad_norm": 448.69256591796875, + "learning_rate": 9.620307923641395e-06, + "loss": 33.9385, + "step": 26280 + }, + { + "epoch": 0.10621492665150273, + "grad_norm": 653.5578002929688, + "learning_rate": 9.61976863602015e-06, + "loss": 64.1324, + "step": 26290 + }, + { + "epoch": 0.10625532791687035, + "grad_norm": 1237.69140625, + "learning_rate": 9.619228980825114e-06, + "loss": 57.0205, + "step": 26300 + }, + { + "epoch": 0.106295729182238, + "grad_norm": 518.8150634765625, + "learning_rate": 9.61868895809923e-06, + "loss": 42.3386, + "step": 26310 + }, + { + "epoch": 0.10633613044760562, + "grad_norm": 1088.1500244140625, + "learning_rate": 9.618148567885462e-06, + "loss": 38.555, + "step": 26320 + }, + { + "epoch": 0.10637653171297325, + "grad_norm": 643.849365234375, + "learning_rate": 9.617607810226806e-06, + "loss": 61.6261, + "step": 26330 + }, + { + "epoch": 0.10641693297834089, + "grad_norm": 997.7747192382812, + "learning_rate": 9.61706668516629e-06, + "loss": 51.5593, + "step": 26340 + }, + { + "epoch": 0.10645733424370851, + "grad_norm": 939.482177734375, + "learning_rate": 9.616525192746965e-06, + "loss": 62.9643, + "step": 26350 + }, + { + "epoch": 0.10649773550907614, + "grad_norm": 840.9242553710938, + "learning_rate": 9.61598333301192e-06, + "loss": 49.4016, + "step": 26360 + }, + { + "epoch": 0.10653813677444378, + "grad_norm": 824.6075439453125, + "learning_rate": 9.615441106004264e-06, + "loss": 40.5007, + "step": 26370 + }, + { + "epoch": 0.1065785380398114, + "grad_norm": 383.22613525390625, + "learning_rate": 9.614898511767142e-06, + "loss": 42.2523, + "step": 26380 + }, + { + "epoch": 0.10661893930517904, + "grad_norm": 478.3547668457031, + "learning_rate": 9.614355550343724e-06, + "loss": 56.1983, + "step": 26390 + }, + { + "epoch": 0.10665934057054667, + "grad_norm": 608.23291015625, + "learning_rate": 9.613812221777212e-06, + "loss": 46.3041, + "step": 26400 + }, + { + "epoch": 0.1066997418359143, + "grad_norm": 820.3971557617188, + "learning_rate": 9.613268526110838e-06, + "loss": 48.1949, + "step": 26410 + }, + { + "epoch": 0.10674014310128194, + "grad_norm": 712.67529296875, + "learning_rate": 9.612724463387857e-06, + "loss": 67.0135, + "step": 26420 + }, + { + "epoch": 0.10678054436664956, + "grad_norm": 780.4419555664062, + "learning_rate": 9.612180033651561e-06, + "loss": 49.2101, + "step": 26430 + }, + { + "epoch": 0.10682094563201719, + "grad_norm": 1020.588623046875, + "learning_rate": 9.611635236945267e-06, + "loss": 65.4931, + "step": 26440 + }, + { + "epoch": 0.10686134689738483, + "grad_norm": 933.3222045898438, + "learning_rate": 9.61109007331232e-06, + "loss": 60.496, + "step": 26450 + }, + { + "epoch": 0.10690174816275246, + "grad_norm": 775.654296875, + "learning_rate": 9.610544542796101e-06, + "loss": 54.8142, + "step": 26460 + }, + { + "epoch": 0.1069421494281201, + "grad_norm": 244.994384765625, + "learning_rate": 9.609998645440011e-06, + "loss": 37.4775, + "step": 26470 + }, + { + "epoch": 0.10698255069348772, + "grad_norm": 1276.1300048828125, + "learning_rate": 9.609452381287486e-06, + "loss": 52.7323, + "step": 26480 + }, + { + "epoch": 0.10702295195885535, + "grad_norm": 626.2915649414062, + "learning_rate": 9.608905750381988e-06, + "loss": 65.2879, + "step": 26490 + }, + { + "epoch": 0.10706335322422299, + "grad_norm": 544.8095092773438, + "learning_rate": 9.608358752767013e-06, + "loss": 41.748, + "step": 26500 + }, + { + "epoch": 0.10710375448959061, + "grad_norm": 863.2129516601562, + "learning_rate": 9.60781138848608e-06, + "loss": 68.7763, + "step": 26510 + }, + { + "epoch": 0.10714415575495824, + "grad_norm": 262.70263671875, + "learning_rate": 9.607263657582744e-06, + "loss": 39.4715, + "step": 26520 + }, + { + "epoch": 0.10718455702032588, + "grad_norm": 399.425537109375, + "learning_rate": 9.60671556010058e-06, + "loss": 50.2638, + "step": 26530 + }, + { + "epoch": 0.1072249582856935, + "grad_norm": 637.1754150390625, + "learning_rate": 9.606167096083205e-06, + "loss": 51.3568, + "step": 26540 + }, + { + "epoch": 0.10726535955106115, + "grad_norm": 692.915283203125, + "learning_rate": 9.60561826557425e-06, + "loss": 43.2627, + "step": 26550 + }, + { + "epoch": 0.10730576081642877, + "grad_norm": 634.8587646484375, + "learning_rate": 9.60506906861739e-06, + "loss": 37.3307, + "step": 26560 + }, + { + "epoch": 0.1073461620817964, + "grad_norm": 641.59228515625, + "learning_rate": 9.604519505256316e-06, + "loss": 57.2253, + "step": 26570 + }, + { + "epoch": 0.10738656334716404, + "grad_norm": 755.43115234375, + "learning_rate": 9.603969575534757e-06, + "loss": 65.6552, + "step": 26580 + }, + { + "epoch": 0.10742696461253166, + "grad_norm": 508.3285827636719, + "learning_rate": 9.60341927949647e-06, + "loss": 62.2325, + "step": 26590 + }, + { + "epoch": 0.10746736587789929, + "grad_norm": 1276.345703125, + "learning_rate": 9.602868617185238e-06, + "loss": 53.7871, + "step": 26600 + }, + { + "epoch": 0.10750776714326693, + "grad_norm": 620.2528686523438, + "learning_rate": 9.602317588644872e-06, + "loss": 59.2076, + "step": 26610 + }, + { + "epoch": 0.10754816840863456, + "grad_norm": 284.98577880859375, + "learning_rate": 9.601766193919217e-06, + "loss": 34.0635, + "step": 26620 + }, + { + "epoch": 0.1075885696740022, + "grad_norm": 777.9571533203125, + "learning_rate": 9.601214433052147e-06, + "loss": 50.2766, + "step": 26630 + }, + { + "epoch": 0.10762897093936982, + "grad_norm": 818.9276733398438, + "learning_rate": 9.600662306087562e-06, + "loss": 62.3097, + "step": 26640 + }, + { + "epoch": 0.10766937220473745, + "grad_norm": 866.0750122070312, + "learning_rate": 9.600109813069389e-06, + "loss": 35.7204, + "step": 26650 + }, + { + "epoch": 0.10770977347010509, + "grad_norm": 968.46484375, + "learning_rate": 9.599556954041591e-06, + "loss": 57.4643, + "step": 26660 + }, + { + "epoch": 0.10775017473547271, + "grad_norm": 1058.4351806640625, + "learning_rate": 9.599003729048157e-06, + "loss": 48.7317, + "step": 26670 + }, + { + "epoch": 0.10779057600084034, + "grad_norm": 965.6939086914062, + "learning_rate": 9.598450138133101e-06, + "loss": 36.7754, + "step": 26680 + }, + { + "epoch": 0.10783097726620798, + "grad_norm": 713.4266967773438, + "learning_rate": 9.597896181340471e-06, + "loss": 44.2337, + "step": 26690 + }, + { + "epoch": 0.1078713785315756, + "grad_norm": 731.5632934570312, + "learning_rate": 9.597341858714344e-06, + "loss": 56.622, + "step": 26700 + }, + { + "epoch": 0.10791177979694325, + "grad_norm": 828.4664306640625, + "learning_rate": 9.596787170298824e-06, + "loss": 45.5558, + "step": 26710 + }, + { + "epoch": 0.10795218106231087, + "grad_norm": 523.5830688476562, + "learning_rate": 9.596232116138047e-06, + "loss": 45.0617, + "step": 26720 + }, + { + "epoch": 0.1079925823276785, + "grad_norm": 1065.1573486328125, + "learning_rate": 9.595676696276173e-06, + "loss": 50.7657, + "step": 26730 + }, + { + "epoch": 0.10803298359304614, + "grad_norm": 664.0277709960938, + "learning_rate": 9.595120910757396e-06, + "loss": 63.9061, + "step": 26740 + }, + { + "epoch": 0.10807338485841377, + "grad_norm": 518.4659423828125, + "learning_rate": 9.594564759625936e-06, + "loss": 48.3799, + "step": 26750 + }, + { + "epoch": 0.10811378612378139, + "grad_norm": 591.0075073242188, + "learning_rate": 9.594008242926046e-06, + "loss": 52.9573, + "step": 26760 + }, + { + "epoch": 0.10815418738914903, + "grad_norm": 374.7685852050781, + "learning_rate": 9.593451360702003e-06, + "loss": 49.838, + "step": 26770 + }, + { + "epoch": 0.10819458865451666, + "grad_norm": 893.8427734375, + "learning_rate": 9.592894112998115e-06, + "loss": 61.3094, + "step": 26780 + }, + { + "epoch": 0.1082349899198843, + "grad_norm": 234.3647003173828, + "learning_rate": 9.592336499858721e-06, + "loss": 64.3124, + "step": 26790 + }, + { + "epoch": 0.10827539118525192, + "grad_norm": 341.9205627441406, + "learning_rate": 9.59177852132819e-06, + "loss": 52.1276, + "step": 26800 + }, + { + "epoch": 0.10831579245061955, + "grad_norm": 509.6340026855469, + "learning_rate": 9.591220177450912e-06, + "loss": 51.8163, + "step": 26810 + }, + { + "epoch": 0.10835619371598719, + "grad_norm": 896.3446655273438, + "learning_rate": 9.590661468271319e-06, + "loss": 49.8318, + "step": 26820 + }, + { + "epoch": 0.10839659498135482, + "grad_norm": 545.89794921875, + "learning_rate": 9.59010239383386e-06, + "loss": 56.6707, + "step": 26830 + }, + { + "epoch": 0.10843699624672244, + "grad_norm": 472.1811828613281, + "learning_rate": 9.589542954183018e-06, + "loss": 39.5817, + "step": 26840 + }, + { + "epoch": 0.10847739751209008, + "grad_norm": 374.705322265625, + "learning_rate": 9.588983149363307e-06, + "loss": 47.7198, + "step": 26850 + }, + { + "epoch": 0.10851779877745771, + "grad_norm": 537.0018310546875, + "learning_rate": 9.588422979419267e-06, + "loss": 59.9525, + "step": 26860 + }, + { + "epoch": 0.10855820004282535, + "grad_norm": 441.2084655761719, + "learning_rate": 9.587862444395471e-06, + "loss": 37.375, + "step": 26870 + }, + { + "epoch": 0.10859860130819297, + "grad_norm": 827.1460571289062, + "learning_rate": 9.587301544336513e-06, + "loss": 51.7789, + "step": 26880 + }, + { + "epoch": 0.1086390025735606, + "grad_norm": 621.7218627929688, + "learning_rate": 9.586740279287024e-06, + "loss": 48.793, + "step": 26890 + }, + { + "epoch": 0.10867940383892824, + "grad_norm": 1153.9176025390625, + "learning_rate": 9.586178649291664e-06, + "loss": 74.7823, + "step": 26900 + }, + { + "epoch": 0.10871980510429587, + "grad_norm": 1106.1214599609375, + "learning_rate": 9.585616654395113e-06, + "loss": 70.2931, + "step": 26910 + }, + { + "epoch": 0.10876020636966349, + "grad_norm": 258.2606201171875, + "learning_rate": 9.585054294642093e-06, + "loss": 47.435, + "step": 26920 + }, + { + "epoch": 0.10880060763503113, + "grad_norm": 509.2825622558594, + "learning_rate": 9.584491570077343e-06, + "loss": 59.2019, + "step": 26930 + }, + { + "epoch": 0.10884100890039876, + "grad_norm": 454.8273620605469, + "learning_rate": 9.58392848074564e-06, + "loss": 40.4965, + "step": 26940 + }, + { + "epoch": 0.1088814101657664, + "grad_norm": 216.57943725585938, + "learning_rate": 9.583365026691785e-06, + "loss": 51.4199, + "step": 26950 + }, + { + "epoch": 0.10892181143113402, + "grad_norm": 636.304931640625, + "learning_rate": 9.58280120796061e-06, + "loss": 36.1534, + "step": 26960 + }, + { + "epoch": 0.10896221269650165, + "grad_norm": 385.84832763671875, + "learning_rate": 9.582237024596974e-06, + "loss": 53.6006, + "step": 26970 + }, + { + "epoch": 0.10900261396186929, + "grad_norm": 586.0780639648438, + "learning_rate": 9.581672476645768e-06, + "loss": 50.0682, + "step": 26980 + }, + { + "epoch": 0.10904301522723692, + "grad_norm": 331.61419677734375, + "learning_rate": 9.58110756415191e-06, + "loss": 54.5409, + "step": 26990 + }, + { + "epoch": 0.10908341649260454, + "grad_norm": 237.11599731445312, + "learning_rate": 9.580542287160348e-06, + "loss": 39.9448, + "step": 27000 + }, + { + "epoch": 0.10912381775797218, + "grad_norm": 481.5954284667969, + "learning_rate": 9.579976645716058e-06, + "loss": 61.8992, + "step": 27010 + }, + { + "epoch": 0.10916421902333981, + "grad_norm": 654.479248046875, + "learning_rate": 9.579410639864046e-06, + "loss": 67.2557, + "step": 27020 + }, + { + "epoch": 0.10920462028870745, + "grad_norm": 524.7271728515625, + "learning_rate": 9.578844269649345e-06, + "loss": 66.595, + "step": 27030 + }, + { + "epoch": 0.10924502155407508, + "grad_norm": 451.2859802246094, + "learning_rate": 9.578277535117022e-06, + "loss": 47.5412, + "step": 27040 + }, + { + "epoch": 0.1092854228194427, + "grad_norm": 448.099853515625, + "learning_rate": 9.577710436312164e-06, + "loss": 58.6692, + "step": 27050 + }, + { + "epoch": 0.10932582408481034, + "grad_norm": 505.60125732421875, + "learning_rate": 9.577142973279896e-06, + "loss": 42.8677, + "step": 27060 + }, + { + "epoch": 0.10936622535017797, + "grad_norm": 724.5872192382812, + "learning_rate": 9.576575146065369e-06, + "loss": 61.896, + "step": 27070 + }, + { + "epoch": 0.1094066266155456, + "grad_norm": 1077.6009521484375, + "learning_rate": 9.576006954713762e-06, + "loss": 60.5044, + "step": 27080 + }, + { + "epoch": 0.10944702788091323, + "grad_norm": 604.5072631835938, + "learning_rate": 9.57543839927028e-06, + "loss": 44.5489, + "step": 27090 + }, + { + "epoch": 0.10948742914628086, + "grad_norm": 524.4970703125, + "learning_rate": 9.574869479780165e-06, + "loss": 85.2106, + "step": 27100 + }, + { + "epoch": 0.1095278304116485, + "grad_norm": 732.091796875, + "learning_rate": 9.57430019628868e-06, + "loss": 36.05, + "step": 27110 + }, + { + "epoch": 0.10956823167701613, + "grad_norm": 695.1461181640625, + "learning_rate": 9.573730548841122e-06, + "loss": 35.9356, + "step": 27120 + }, + { + "epoch": 0.10960863294238375, + "grad_norm": 511.49029541015625, + "learning_rate": 9.573160537482816e-06, + "loss": 46.0325, + "step": 27130 + }, + { + "epoch": 0.10964903420775139, + "grad_norm": 2797.2470703125, + "learning_rate": 9.572590162259112e-06, + "loss": 56.4708, + "step": 27140 + }, + { + "epoch": 0.10968943547311902, + "grad_norm": 478.7276611328125, + "learning_rate": 9.572019423215395e-06, + "loss": 52.5807, + "step": 27150 + }, + { + "epoch": 0.10972983673848664, + "grad_norm": 350.75634765625, + "learning_rate": 9.571448320397076e-06, + "loss": 68.0859, + "step": 27160 + }, + { + "epoch": 0.10977023800385428, + "grad_norm": 709.3019409179688, + "learning_rate": 9.570876853849593e-06, + "loss": 68.0508, + "step": 27170 + }, + { + "epoch": 0.10981063926922191, + "grad_norm": 629.4264526367188, + "learning_rate": 9.570305023618417e-06, + "loss": 42.6662, + "step": 27180 + }, + { + "epoch": 0.10985104053458955, + "grad_norm": 747.0570678710938, + "learning_rate": 9.569732829749045e-06, + "loss": 39.6871, + "step": 27190 + }, + { + "epoch": 0.10989144179995718, + "grad_norm": 605.722412109375, + "learning_rate": 9.569160272287003e-06, + "loss": 40.4398, + "step": 27200 + }, + { + "epoch": 0.1099318430653248, + "grad_norm": 266.65997314453125, + "learning_rate": 9.56858735127785e-06, + "loss": 42.8571, + "step": 27210 + }, + { + "epoch": 0.10997224433069244, + "grad_norm": 1407.9671630859375, + "learning_rate": 9.568014066767166e-06, + "loss": 67.4599, + "step": 27220 + }, + { + "epoch": 0.11001264559606007, + "grad_norm": 347.4352722167969, + "learning_rate": 9.567440418800569e-06, + "loss": 39.2598, + "step": 27230 + }, + { + "epoch": 0.1100530468614277, + "grad_norm": 843.9002685546875, + "learning_rate": 9.566866407423698e-06, + "loss": 48.4884, + "step": 27240 + }, + { + "epoch": 0.11009344812679533, + "grad_norm": 428.3054504394531, + "learning_rate": 9.566292032682228e-06, + "loss": 36.8349, + "step": 27250 + }, + { + "epoch": 0.11013384939216296, + "grad_norm": 707.8931274414062, + "learning_rate": 9.565717294621856e-06, + "loss": 59.8093, + "step": 27260 + }, + { + "epoch": 0.1101742506575306, + "grad_norm": 828.3231811523438, + "learning_rate": 9.565142193288313e-06, + "loss": 51.0172, + "step": 27270 + }, + { + "epoch": 0.11021465192289823, + "grad_norm": 589.5343627929688, + "learning_rate": 9.564566728727358e-06, + "loss": 40.5171, + "step": 27280 + }, + { + "epoch": 0.11025505318826585, + "grad_norm": 1542.44140625, + "learning_rate": 9.563990900984775e-06, + "loss": 40.179, + "step": 27290 + }, + { + "epoch": 0.11029545445363349, + "grad_norm": 592.8526611328125, + "learning_rate": 9.563414710106382e-06, + "loss": 53.8067, + "step": 27300 + }, + { + "epoch": 0.11033585571900112, + "grad_norm": 588.4796752929688, + "learning_rate": 9.562838156138025e-06, + "loss": 41.335, + "step": 27310 + }, + { + "epoch": 0.11037625698436875, + "grad_norm": 491.3796081542969, + "learning_rate": 9.562261239125575e-06, + "loss": 64.4173, + "step": 27320 + }, + { + "epoch": 0.11041665824973639, + "grad_norm": 712.91796875, + "learning_rate": 9.561683959114938e-06, + "loss": 45.0702, + "step": 27330 + }, + { + "epoch": 0.11045705951510401, + "grad_norm": 844.9070434570312, + "learning_rate": 9.561106316152043e-06, + "loss": 64.8769, + "step": 27340 + }, + { + "epoch": 0.11049746078047164, + "grad_norm": 886.7588500976562, + "learning_rate": 9.56052831028285e-06, + "loss": 68.3311, + "step": 27350 + }, + { + "epoch": 0.11053786204583928, + "grad_norm": 494.68353271484375, + "learning_rate": 9.559949941553351e-06, + "loss": 100.2485, + "step": 27360 + }, + { + "epoch": 0.1105782633112069, + "grad_norm": 414.14691162109375, + "learning_rate": 9.559371210009562e-06, + "loss": 59.7353, + "step": 27370 + }, + { + "epoch": 0.11061866457657454, + "grad_norm": 317.25750732421875, + "learning_rate": 9.55879211569753e-06, + "loss": 42.0364, + "step": 27380 + }, + { + "epoch": 0.11065906584194217, + "grad_norm": 559.4423828125, + "learning_rate": 9.55821265866333e-06, + "loss": 57.4018, + "step": 27390 + }, + { + "epoch": 0.1106994671073098, + "grad_norm": 521.9277954101562, + "learning_rate": 9.55763283895307e-06, + "loss": 73.1083, + "step": 27400 + }, + { + "epoch": 0.11073986837267744, + "grad_norm": 671.8775634765625, + "learning_rate": 9.557052656612882e-06, + "loss": 60.0441, + "step": 27410 + }, + { + "epoch": 0.11078026963804506, + "grad_norm": 3136.1943359375, + "learning_rate": 9.556472111688928e-06, + "loss": 67.2099, + "step": 27420 + }, + { + "epoch": 0.11082067090341269, + "grad_norm": 810.9188842773438, + "learning_rate": 9.555891204227399e-06, + "loss": 47.5018, + "step": 27430 + }, + { + "epoch": 0.11086107216878033, + "grad_norm": 692.3886108398438, + "learning_rate": 9.555309934274515e-06, + "loss": 39.6191, + "step": 27440 + }, + { + "epoch": 0.11090147343414795, + "grad_norm": 672.2152099609375, + "learning_rate": 9.554728301876525e-06, + "loss": 56.8934, + "step": 27450 + }, + { + "epoch": 0.1109418746995156, + "grad_norm": 684.3619995117188, + "learning_rate": 9.554146307079711e-06, + "loss": 54.4783, + "step": 27460 + }, + { + "epoch": 0.11098227596488322, + "grad_norm": 448.3148498535156, + "learning_rate": 9.553563949930374e-06, + "loss": 53.4588, + "step": 27470 + }, + { + "epoch": 0.11102267723025085, + "grad_norm": 957.7836303710938, + "learning_rate": 9.552981230474849e-06, + "loss": 58.2424, + "step": 27480 + }, + { + "epoch": 0.11106307849561849, + "grad_norm": 363.869384765625, + "learning_rate": 9.552398148759506e-06, + "loss": 39.7609, + "step": 27490 + }, + { + "epoch": 0.11110347976098611, + "grad_norm": 365.90118408203125, + "learning_rate": 9.551814704830734e-06, + "loss": 36.9783, + "step": 27500 + }, + { + "epoch": 0.11114388102635374, + "grad_norm": 503.6572570800781, + "learning_rate": 9.551230898734955e-06, + "loss": 30.8108, + "step": 27510 + }, + { + "epoch": 0.11118428229172138, + "grad_norm": 128.0505828857422, + "learning_rate": 9.550646730518623e-06, + "loss": 48.5108, + "step": 27520 + }, + { + "epoch": 0.111224683557089, + "grad_norm": 435.7357177734375, + "learning_rate": 9.550062200228214e-06, + "loss": 35.7215, + "step": 27530 + }, + { + "epoch": 0.11126508482245664, + "grad_norm": 975.533447265625, + "learning_rate": 9.549477307910238e-06, + "loss": 49.5548, + "step": 27540 + }, + { + "epoch": 0.11130548608782427, + "grad_norm": 533.999267578125, + "learning_rate": 9.548892053611232e-06, + "loss": 67.928, + "step": 27550 + }, + { + "epoch": 0.1113458873531919, + "grad_norm": 621.736328125, + "learning_rate": 9.54830643737776e-06, + "loss": 35.6264, + "step": 27560 + }, + { + "epoch": 0.11138628861855954, + "grad_norm": 805.3984985351562, + "learning_rate": 9.54772045925642e-06, + "loss": 59.4797, + "step": 27570 + }, + { + "epoch": 0.11142668988392716, + "grad_norm": 538.3480224609375, + "learning_rate": 9.547134119293835e-06, + "loss": 51.2988, + "step": 27580 + }, + { + "epoch": 0.11146709114929479, + "grad_norm": 699.3734741210938, + "learning_rate": 9.546547417536656e-06, + "loss": 43.1402, + "step": 27590 + }, + { + "epoch": 0.11150749241466243, + "grad_norm": 773.6231689453125, + "learning_rate": 9.545960354031564e-06, + "loss": 69.3904, + "step": 27600 + }, + { + "epoch": 0.11154789368003006, + "grad_norm": 1388.7728271484375, + "learning_rate": 9.545372928825271e-06, + "loss": 58.2387, + "step": 27610 + }, + { + "epoch": 0.1115882949453977, + "grad_norm": 633.869384765625, + "learning_rate": 9.544785141964514e-06, + "loss": 72.711, + "step": 27620 + }, + { + "epoch": 0.11162869621076532, + "grad_norm": 1325.7919921875, + "learning_rate": 9.544196993496062e-06, + "loss": 52.6202, + "step": 27630 + }, + { + "epoch": 0.11166909747613295, + "grad_norm": 556.158935546875, + "learning_rate": 9.54360848346671e-06, + "loss": 35.6353, + "step": 27640 + }, + { + "epoch": 0.11170949874150059, + "grad_norm": 634.7527465820312, + "learning_rate": 9.543019611923283e-06, + "loss": 57.7024, + "step": 27650 + }, + { + "epoch": 0.11174990000686821, + "grad_norm": 603.2935791015625, + "learning_rate": 9.542430378912634e-06, + "loss": 54.0695, + "step": 27660 + }, + { + "epoch": 0.11179030127223584, + "grad_norm": 494.1707458496094, + "learning_rate": 9.541840784481648e-06, + "loss": 36.8178, + "step": 27670 + }, + { + "epoch": 0.11183070253760348, + "grad_norm": 764.1178588867188, + "learning_rate": 9.541250828677235e-06, + "loss": 47.1555, + "step": 27680 + }, + { + "epoch": 0.1118711038029711, + "grad_norm": 681.1707153320312, + "learning_rate": 9.540660511546335e-06, + "loss": 42.6035, + "step": 27690 + }, + { + "epoch": 0.11191150506833875, + "grad_norm": 1050.41015625, + "learning_rate": 9.540069833135917e-06, + "loss": 57.2844, + "step": 27700 + }, + { + "epoch": 0.11195190633370637, + "grad_norm": 494.7498779296875, + "learning_rate": 9.539478793492978e-06, + "loss": 44.8998, + "step": 27710 + }, + { + "epoch": 0.111992307599074, + "grad_norm": 657.7085571289062, + "learning_rate": 9.538887392664544e-06, + "loss": 35.1478, + "step": 27720 + }, + { + "epoch": 0.11203270886444164, + "grad_norm": 864.594970703125, + "learning_rate": 9.53829563069767e-06, + "loss": 54.5924, + "step": 27730 + }, + { + "epoch": 0.11207311012980926, + "grad_norm": 439.203857421875, + "learning_rate": 9.537703507639444e-06, + "loss": 49.3219, + "step": 27740 + }, + { + "epoch": 0.11211351139517689, + "grad_norm": 404.7133483886719, + "learning_rate": 9.537111023536973e-06, + "loss": 55.6897, + "step": 27750 + }, + { + "epoch": 0.11215391266054453, + "grad_norm": 669.194091796875, + "learning_rate": 9.536518178437402e-06, + "loss": 39.5461, + "step": 27760 + }, + { + "epoch": 0.11219431392591216, + "grad_norm": 591.7159423828125, + "learning_rate": 9.535924972387898e-06, + "loss": 34.4681, + "step": 27770 + }, + { + "epoch": 0.1122347151912798, + "grad_norm": 478.49212646484375, + "learning_rate": 9.535331405435662e-06, + "loss": 40.2554, + "step": 27780 + }, + { + "epoch": 0.11227511645664742, + "grad_norm": 549.56787109375, + "learning_rate": 9.534737477627918e-06, + "loss": 28.9227, + "step": 27790 + }, + { + "epoch": 0.11231551772201505, + "grad_norm": 686.5111083984375, + "learning_rate": 9.534143189011928e-06, + "loss": 63.8506, + "step": 27800 + }, + { + "epoch": 0.11235591898738269, + "grad_norm": 730.6181030273438, + "learning_rate": 9.533548539634971e-06, + "loss": 65.5437, + "step": 27810 + }, + { + "epoch": 0.11239632025275031, + "grad_norm": 625.7398681640625, + "learning_rate": 9.532953529544365e-06, + "loss": 49.1752, + "step": 27820 + }, + { + "epoch": 0.11243672151811794, + "grad_norm": 332.5801696777344, + "learning_rate": 9.532358158787446e-06, + "loss": 50.4811, + "step": 27830 + }, + { + "epoch": 0.11247712278348558, + "grad_norm": 855.0786743164062, + "learning_rate": 9.531762427411592e-06, + "loss": 32.4726, + "step": 27840 + }, + { + "epoch": 0.1125175240488532, + "grad_norm": 464.85125732421875, + "learning_rate": 9.531166335464198e-06, + "loss": 57.4719, + "step": 27850 + }, + { + "epoch": 0.11255792531422085, + "grad_norm": 464.2534484863281, + "learning_rate": 9.530569882992698e-06, + "loss": 56.7898, + "step": 27860 + }, + { + "epoch": 0.11259832657958847, + "grad_norm": 338.19952392578125, + "learning_rate": 9.52997307004454e-06, + "loss": 45.8968, + "step": 27870 + }, + { + "epoch": 0.1126387278449561, + "grad_norm": 974.372314453125, + "learning_rate": 9.529375896667218e-06, + "loss": 58.9688, + "step": 27880 + }, + { + "epoch": 0.11267912911032374, + "grad_norm": 679.4461059570312, + "learning_rate": 9.528778362908241e-06, + "loss": 39.7818, + "step": 27890 + }, + { + "epoch": 0.11271953037569137, + "grad_norm": 752.9938354492188, + "learning_rate": 9.528180468815155e-06, + "loss": 52.0521, + "step": 27900 + }, + { + "epoch": 0.11275993164105899, + "grad_norm": 577.4219970703125, + "learning_rate": 9.527582214435531e-06, + "loss": 35.0712, + "step": 27910 + }, + { + "epoch": 0.11280033290642663, + "grad_norm": 1068.46337890625, + "learning_rate": 9.526983599816968e-06, + "loss": 53.9641, + "step": 27920 + }, + { + "epoch": 0.11284073417179426, + "grad_norm": 313.15789794921875, + "learning_rate": 9.526384625007096e-06, + "loss": 46.0763, + "step": 27930 + }, + { + "epoch": 0.1128811354371619, + "grad_norm": 842.06591796875, + "learning_rate": 9.525785290053573e-06, + "loss": 47.3168, + "step": 27940 + }, + { + "epoch": 0.11292153670252952, + "grad_norm": 699.9207153320312, + "learning_rate": 9.525185595004085e-06, + "loss": 90.293, + "step": 27950 + }, + { + "epoch": 0.11296193796789715, + "grad_norm": 576.01220703125, + "learning_rate": 9.524585539906345e-06, + "loss": 45.2152, + "step": 27960 + }, + { + "epoch": 0.11300233923326479, + "grad_norm": 606.6934204101562, + "learning_rate": 9.523985124808102e-06, + "loss": 55.5013, + "step": 27970 + }, + { + "epoch": 0.11304274049863242, + "grad_norm": 891.949462890625, + "learning_rate": 9.523384349757123e-06, + "loss": 43.9636, + "step": 27980 + }, + { + "epoch": 0.11308314176400004, + "grad_norm": 659.2472534179688, + "learning_rate": 9.522783214801213e-06, + "loss": 71.8418, + "step": 27990 + }, + { + "epoch": 0.11312354302936768, + "grad_norm": 873.6649169921875, + "learning_rate": 9.522181719988196e-06, + "loss": 47.9444, + "step": 28000 + }, + { + "epoch": 0.11316394429473531, + "grad_norm": 907.7310180664062, + "learning_rate": 9.521579865365935e-06, + "loss": 52.8025, + "step": 28010 + }, + { + "epoch": 0.11320434556010295, + "grad_norm": 656.29248046875, + "learning_rate": 9.520977650982316e-06, + "loss": 43.1285, + "step": 28020 + }, + { + "epoch": 0.11324474682547057, + "grad_norm": 368.1136474609375, + "learning_rate": 9.520375076885253e-06, + "loss": 57.7966, + "step": 28030 + }, + { + "epoch": 0.1132851480908382, + "grad_norm": 934.730224609375, + "learning_rate": 9.519772143122691e-06, + "loss": 64.5761, + "step": 28040 + }, + { + "epoch": 0.11332554935620584, + "grad_norm": 1095.819580078125, + "learning_rate": 9.519168849742603e-06, + "loss": 67.1447, + "step": 28050 + }, + { + "epoch": 0.11336595062157347, + "grad_norm": 500.99053955078125, + "learning_rate": 9.51856519679299e-06, + "loss": 38.3834, + "step": 28060 + }, + { + "epoch": 0.11340635188694109, + "grad_norm": 684.0264892578125, + "learning_rate": 9.517961184321882e-06, + "loss": 59.6902, + "step": 28070 + }, + { + "epoch": 0.11344675315230873, + "grad_norm": 419.83099365234375, + "learning_rate": 9.517356812377336e-06, + "loss": 43.5778, + "step": 28080 + }, + { + "epoch": 0.11348715441767636, + "grad_norm": 859.154052734375, + "learning_rate": 9.516752081007441e-06, + "loss": 60.8287, + "step": 28090 + }, + { + "epoch": 0.113527555683044, + "grad_norm": 688.739501953125, + "learning_rate": 9.51614699026031e-06, + "loss": 51.3651, + "step": 28100 + }, + { + "epoch": 0.11356795694841162, + "grad_norm": 646.2841796875, + "learning_rate": 9.515541540184093e-06, + "loss": 49.2372, + "step": 28110 + }, + { + "epoch": 0.11360835821377925, + "grad_norm": 857.5767211914062, + "learning_rate": 9.514935730826957e-06, + "loss": 46.2163, + "step": 28120 + }, + { + "epoch": 0.11364875947914689, + "grad_norm": 553.1755981445312, + "learning_rate": 9.514329562237107e-06, + "loss": 42.8402, + "step": 28130 + }, + { + "epoch": 0.11368916074451452, + "grad_norm": 509.92474365234375, + "learning_rate": 9.51372303446277e-06, + "loss": 44.156, + "step": 28140 + }, + { + "epoch": 0.11372956200988214, + "grad_norm": 554.966552734375, + "learning_rate": 9.513116147552207e-06, + "loss": 42.856, + "step": 28150 + }, + { + "epoch": 0.11376996327524978, + "grad_norm": 784.25830078125, + "learning_rate": 9.512508901553703e-06, + "loss": 67.9408, + "step": 28160 + }, + { + "epoch": 0.11381036454061741, + "grad_norm": 406.4463806152344, + "learning_rate": 9.511901296515578e-06, + "loss": 57.0736, + "step": 28170 + }, + { + "epoch": 0.11385076580598505, + "grad_norm": 957.32080078125, + "learning_rate": 9.511293332486172e-06, + "loss": 55.6416, + "step": 28180 + }, + { + "epoch": 0.11389116707135268, + "grad_norm": 840.68310546875, + "learning_rate": 9.51068500951386e-06, + "loss": 56.1937, + "step": 28190 + }, + { + "epoch": 0.1139315683367203, + "grad_norm": 607.9652709960938, + "learning_rate": 9.510076327647043e-06, + "loss": 50.4717, + "step": 28200 + }, + { + "epoch": 0.11397196960208794, + "grad_norm": 921.6870727539062, + "learning_rate": 9.509467286934151e-06, + "loss": 35.7569, + "step": 28210 + }, + { + "epoch": 0.11401237086745557, + "grad_norm": 740.13232421875, + "learning_rate": 9.508857887423644e-06, + "loss": 46.1539, + "step": 28220 + }, + { + "epoch": 0.1140527721328232, + "grad_norm": 872.6014404296875, + "learning_rate": 9.508248129164006e-06, + "loss": 51.5815, + "step": 28230 + }, + { + "epoch": 0.11409317339819083, + "grad_norm": 684.9937744140625, + "learning_rate": 9.507638012203755e-06, + "loss": 55.3623, + "step": 28240 + }, + { + "epoch": 0.11413357466355846, + "grad_norm": 487.1953430175781, + "learning_rate": 9.507027536591436e-06, + "loss": 37.2326, + "step": 28250 + }, + { + "epoch": 0.1141739759289261, + "grad_norm": 896.9238891601562, + "learning_rate": 9.506416702375618e-06, + "loss": 45.9022, + "step": 28260 + }, + { + "epoch": 0.11421437719429373, + "grad_norm": 454.0743408203125, + "learning_rate": 9.505805509604906e-06, + "loss": 57.2897, + "step": 28270 + }, + { + "epoch": 0.11425477845966135, + "grad_norm": 1185.238525390625, + "learning_rate": 9.505193958327927e-06, + "loss": 74.1116, + "step": 28280 + }, + { + "epoch": 0.11429517972502899, + "grad_norm": 518.6088256835938, + "learning_rate": 9.504582048593343e-06, + "loss": 52.5233, + "step": 28290 + }, + { + "epoch": 0.11433558099039662, + "grad_norm": 625.5014038085938, + "learning_rate": 9.503969780449838e-06, + "loss": 32.3718, + "step": 28300 + }, + { + "epoch": 0.11437598225576424, + "grad_norm": 1069.5562744140625, + "learning_rate": 9.503357153946126e-06, + "loss": 50.4978, + "step": 28310 + }, + { + "epoch": 0.11441638352113188, + "grad_norm": 333.4046325683594, + "learning_rate": 9.502744169130955e-06, + "loss": 50.467, + "step": 28320 + }, + { + "epoch": 0.11445678478649951, + "grad_norm": 344.731201171875, + "learning_rate": 9.502130826053095e-06, + "loss": 49.283, + "step": 28330 + }, + { + "epoch": 0.11449718605186715, + "grad_norm": 707.1812744140625, + "learning_rate": 9.501517124761347e-06, + "loss": 35.1441, + "step": 28340 + }, + { + "epoch": 0.11453758731723478, + "grad_norm": 1012.0341796875, + "learning_rate": 9.50090306530454e-06, + "loss": 61.0239, + "step": 28350 + }, + { + "epoch": 0.1145779885826024, + "grad_norm": 489.1926574707031, + "learning_rate": 9.500288647731533e-06, + "loss": 40.3379, + "step": 28360 + }, + { + "epoch": 0.11461838984797004, + "grad_norm": 698.6495361328125, + "learning_rate": 9.49967387209121e-06, + "loss": 53.1061, + "step": 28370 + }, + { + "epoch": 0.11465879111333767, + "grad_norm": 1728.1700439453125, + "learning_rate": 9.499058738432492e-06, + "loss": 51.7011, + "step": 28380 + }, + { + "epoch": 0.1146991923787053, + "grad_norm": 505.17584228515625, + "learning_rate": 9.498443246804314e-06, + "loss": 56.5864, + "step": 28390 + }, + { + "epoch": 0.11473959364407293, + "grad_norm": 463.1519470214844, + "learning_rate": 9.497827397255655e-06, + "loss": 53.7967, + "step": 28400 + }, + { + "epoch": 0.11477999490944056, + "grad_norm": 1365.9278564453125, + "learning_rate": 9.49721118983551e-06, + "loss": 63.5547, + "step": 28410 + }, + { + "epoch": 0.1148203961748082, + "grad_norm": 1068.4659423828125, + "learning_rate": 9.49659462459291e-06, + "loss": 46.5801, + "step": 28420 + }, + { + "epoch": 0.11486079744017583, + "grad_norm": 507.3717956542969, + "learning_rate": 9.495977701576913e-06, + "loss": 38.8723, + "step": 28430 + }, + { + "epoch": 0.11490119870554345, + "grad_norm": 637.6323852539062, + "learning_rate": 9.495360420836603e-06, + "loss": 63.2256, + "step": 28440 + }, + { + "epoch": 0.11494159997091109, + "grad_norm": 1089.265869140625, + "learning_rate": 9.494742782421099e-06, + "loss": 51.0123, + "step": 28450 + }, + { + "epoch": 0.11498200123627872, + "grad_norm": 891.3598022460938, + "learning_rate": 9.494124786379535e-06, + "loss": 43.423, + "step": 28460 + }, + { + "epoch": 0.11502240250164635, + "grad_norm": 604.9508056640625, + "learning_rate": 9.49350643276109e-06, + "loss": 46.5848, + "step": 28470 + }, + { + "epoch": 0.11506280376701399, + "grad_norm": 635.8272094726562, + "learning_rate": 9.49288772161496e-06, + "loss": 37.0862, + "step": 28480 + }, + { + "epoch": 0.11510320503238161, + "grad_norm": 371.99444580078125, + "learning_rate": 9.492268652990374e-06, + "loss": 61.0252, + "step": 28490 + }, + { + "epoch": 0.11514360629774925, + "grad_norm": 503.8235778808594, + "learning_rate": 9.491649226936586e-06, + "loss": 56.4696, + "step": 28500 + }, + { + "epoch": 0.11518400756311688, + "grad_norm": 692.4237060546875, + "learning_rate": 9.491029443502884e-06, + "loss": 56.2257, + "step": 28510 + }, + { + "epoch": 0.1152244088284845, + "grad_norm": 224.27877807617188, + "learning_rate": 9.490409302738582e-06, + "loss": 39.3771, + "step": 28520 + }, + { + "epoch": 0.11526481009385214, + "grad_norm": 678.6007080078125, + "learning_rate": 9.489788804693017e-06, + "loss": 42.1856, + "step": 28530 + }, + { + "epoch": 0.11530521135921977, + "grad_norm": 1114.4183349609375, + "learning_rate": 9.489167949415563e-06, + "loss": 58.9925, + "step": 28540 + }, + { + "epoch": 0.1153456126245874, + "grad_norm": 557.2171630859375, + "learning_rate": 9.48854673695562e-06, + "loss": 55.2478, + "step": 28550 + }, + { + "epoch": 0.11538601388995504, + "grad_norm": 998.3175048828125, + "learning_rate": 9.48792516736261e-06, + "loss": 35.496, + "step": 28560 + }, + { + "epoch": 0.11542641515532266, + "grad_norm": 794.63232421875, + "learning_rate": 9.487303240685992e-06, + "loss": 55.0467, + "step": 28570 + }, + { + "epoch": 0.1154668164206903, + "grad_norm": 455.69879150390625, + "learning_rate": 9.48668095697525e-06, + "loss": 41.8241, + "step": 28580 + }, + { + "epoch": 0.11550721768605793, + "grad_norm": 712.5789794921875, + "learning_rate": 9.486058316279894e-06, + "loss": 59.8318, + "step": 28590 + }, + { + "epoch": 0.11554761895142555, + "grad_norm": 707.5386962890625, + "learning_rate": 9.485435318649468e-06, + "loss": 50.9216, + "step": 28600 + }, + { + "epoch": 0.1155880202167932, + "grad_norm": 722.671875, + "learning_rate": 9.484811964133537e-06, + "loss": 65.316, + "step": 28610 + }, + { + "epoch": 0.11562842148216082, + "grad_norm": 509.6846923828125, + "learning_rate": 9.484188252781701e-06, + "loss": 50.3926, + "step": 28620 + }, + { + "epoch": 0.11566882274752845, + "grad_norm": 1625.15869140625, + "learning_rate": 9.483564184643586e-06, + "loss": 37.5687, + "step": 28630 + }, + { + "epoch": 0.11570922401289609, + "grad_norm": 425.3319396972656, + "learning_rate": 9.482939759768845e-06, + "loss": 37.632, + "step": 28640 + }, + { + "epoch": 0.11574962527826371, + "grad_norm": 526.8452758789062, + "learning_rate": 9.48231497820716e-06, + "loss": 55.4761, + "step": 28650 + }, + { + "epoch": 0.11579002654363135, + "grad_norm": 524.2207641601562, + "learning_rate": 9.481689840008246e-06, + "loss": 54.3112, + "step": 28660 + }, + { + "epoch": 0.11583042780899898, + "grad_norm": 793.691162109375, + "learning_rate": 9.481064345221838e-06, + "loss": 60.8318, + "step": 28670 + }, + { + "epoch": 0.1158708290743666, + "grad_norm": 453.846435546875, + "learning_rate": 9.480438493897707e-06, + "loss": 56.1913, + "step": 28680 + }, + { + "epoch": 0.11591123033973424, + "grad_norm": 836.8286743164062, + "learning_rate": 9.479812286085645e-06, + "loss": 49.0991, + "step": 28690 + }, + { + "epoch": 0.11595163160510187, + "grad_norm": 656.8587036132812, + "learning_rate": 9.47918572183548e-06, + "loss": 66.3525, + "step": 28700 + }, + { + "epoch": 0.1159920328704695, + "grad_norm": 918.6471557617188, + "learning_rate": 9.478558801197065e-06, + "loss": 54.7832, + "step": 28710 + }, + { + "epoch": 0.11603243413583714, + "grad_norm": 702.2949829101562, + "learning_rate": 9.47793152422028e-06, + "loss": 43.806, + "step": 28720 + }, + { + "epoch": 0.11607283540120476, + "grad_norm": 683.9447021484375, + "learning_rate": 9.477303890955032e-06, + "loss": 41.7837, + "step": 28730 + }, + { + "epoch": 0.1161132366665724, + "grad_norm": 881.1528930664062, + "learning_rate": 9.476675901451264e-06, + "loss": 62.3749, + "step": 28740 + }, + { + "epoch": 0.11615363793194003, + "grad_norm": 235.79928588867188, + "learning_rate": 9.476047555758938e-06, + "loss": 34.0945, + "step": 28750 + }, + { + "epoch": 0.11619403919730766, + "grad_norm": 1202.2252197265625, + "learning_rate": 9.475418853928051e-06, + "loss": 40.956, + "step": 28760 + }, + { + "epoch": 0.1162344404626753, + "grad_norm": 723.0114135742188, + "learning_rate": 9.474789796008625e-06, + "loss": 59.2273, + "step": 28770 + }, + { + "epoch": 0.11627484172804292, + "grad_norm": 948.4896850585938, + "learning_rate": 9.474160382050711e-06, + "loss": 43.0877, + "step": 28780 + }, + { + "epoch": 0.11631524299341055, + "grad_norm": 665.72998046875, + "learning_rate": 9.47353061210439e-06, + "loss": 58.8115, + "step": 28790 + }, + { + "epoch": 0.11635564425877819, + "grad_norm": 928.5210571289062, + "learning_rate": 9.47290048621977e-06, + "loss": 72.7469, + "step": 28800 + }, + { + "epoch": 0.11639604552414581, + "grad_norm": 837.2612915039062, + "learning_rate": 9.472270004446984e-06, + "loss": 62.3605, + "step": 28810 + }, + { + "epoch": 0.11643644678951345, + "grad_norm": 404.30841064453125, + "learning_rate": 9.4716391668362e-06, + "loss": 43.8358, + "step": 28820 + }, + { + "epoch": 0.11647684805488108, + "grad_norm": 522.2630615234375, + "learning_rate": 9.471007973437607e-06, + "loss": 35.7472, + "step": 28830 + }, + { + "epoch": 0.1165172493202487, + "grad_norm": 826.51708984375, + "learning_rate": 9.470376424301432e-06, + "loss": 68.2935, + "step": 28840 + }, + { + "epoch": 0.11655765058561635, + "grad_norm": 969.8021240234375, + "learning_rate": 9.46974451947792e-06, + "loss": 44.8838, + "step": 28850 + }, + { + "epoch": 0.11659805185098397, + "grad_norm": 907.9220581054688, + "learning_rate": 9.469112259017349e-06, + "loss": 37.908, + "step": 28860 + }, + { + "epoch": 0.1166384531163516, + "grad_norm": 1271.998779296875, + "learning_rate": 9.468479642970027e-06, + "loss": 65.6431, + "step": 28870 + }, + { + "epoch": 0.11667885438171924, + "grad_norm": 616.036865234375, + "learning_rate": 9.467846671386287e-06, + "loss": 39.8803, + "step": 28880 + }, + { + "epoch": 0.11671925564708686, + "grad_norm": 800.3985595703125, + "learning_rate": 9.467213344316493e-06, + "loss": 60.458, + "step": 28890 + }, + { + "epoch": 0.1167596569124545, + "grad_norm": 982.2677612304688, + "learning_rate": 9.466579661811032e-06, + "loss": 75.691, + "step": 28900 + }, + { + "epoch": 0.11680005817782213, + "grad_norm": 649.3153076171875, + "learning_rate": 9.46594562392033e-06, + "loss": 44.2065, + "step": 28910 + }, + { + "epoch": 0.11684045944318976, + "grad_norm": 982.4515380859375, + "learning_rate": 9.465311230694828e-06, + "loss": 45.287, + "step": 28920 + }, + { + "epoch": 0.1168808607085574, + "grad_norm": 727.6909790039062, + "learning_rate": 9.464676482185005e-06, + "loss": 30.1195, + "step": 28930 + }, + { + "epoch": 0.11692126197392502, + "grad_norm": 301.429931640625, + "learning_rate": 9.464041378441365e-06, + "loss": 58.9985, + "step": 28940 + }, + { + "epoch": 0.11696166323929265, + "grad_norm": 854.9701538085938, + "learning_rate": 9.46340591951444e-06, + "loss": 72.6457, + "step": 28950 + }, + { + "epoch": 0.11700206450466029, + "grad_norm": 518.9381713867188, + "learning_rate": 9.462770105454789e-06, + "loss": 51.8344, + "step": 28960 + }, + { + "epoch": 0.11704246577002791, + "grad_norm": 741.2862548828125, + "learning_rate": 9.462133936313002e-06, + "loss": 40.7278, + "step": 28970 + }, + { + "epoch": 0.11708286703539555, + "grad_norm": 752.0146484375, + "learning_rate": 9.461497412139697e-06, + "loss": 63.2356, + "step": 28980 + }, + { + "epoch": 0.11712326830076318, + "grad_norm": 656.5631103515625, + "learning_rate": 9.46086053298552e-06, + "loss": 55.1799, + "step": 28990 + }, + { + "epoch": 0.11716366956613081, + "grad_norm": 655.6858520507812, + "learning_rate": 9.460223298901138e-06, + "loss": 46.3702, + "step": 29000 + }, + { + "epoch": 0.11720407083149845, + "grad_norm": 367.1216735839844, + "learning_rate": 9.459585709937262e-06, + "loss": 82.6018, + "step": 29010 + }, + { + "epoch": 0.11724447209686607, + "grad_norm": 822.807373046875, + "learning_rate": 9.458947766144617e-06, + "loss": 87.3932, + "step": 29020 + }, + { + "epoch": 0.1172848733622337, + "grad_norm": 728.6187133789062, + "learning_rate": 9.458309467573963e-06, + "loss": 40.4143, + "step": 29030 + }, + { + "epoch": 0.11732527462760134, + "grad_norm": 690.3950805664062, + "learning_rate": 9.457670814276083e-06, + "loss": 53.4531, + "step": 29040 + }, + { + "epoch": 0.11736567589296897, + "grad_norm": 833.6300048828125, + "learning_rate": 9.457031806301795e-06, + "loss": 49.5981, + "step": 29050 + }, + { + "epoch": 0.1174060771583366, + "grad_norm": 394.2637939453125, + "learning_rate": 9.456392443701943e-06, + "loss": 28.4206, + "step": 29060 + }, + { + "epoch": 0.11744647842370423, + "grad_norm": 447.1683349609375, + "learning_rate": 9.455752726527395e-06, + "loss": 42.3745, + "step": 29070 + }, + { + "epoch": 0.11748687968907186, + "grad_norm": 747.0037841796875, + "learning_rate": 9.45511265482905e-06, + "loss": 67.5624, + "step": 29080 + }, + { + "epoch": 0.1175272809544395, + "grad_norm": 619.459228515625, + "learning_rate": 9.454472228657841e-06, + "loss": 49.5098, + "step": 29090 + }, + { + "epoch": 0.11756768221980712, + "grad_norm": 461.28607177734375, + "learning_rate": 9.453831448064717e-06, + "loss": 34.5057, + "step": 29100 + }, + { + "epoch": 0.11760808348517475, + "grad_norm": 478.06207275390625, + "learning_rate": 9.453190313100666e-06, + "loss": 60.1882, + "step": 29110 + }, + { + "epoch": 0.11764848475054239, + "grad_norm": 740.0570068359375, + "learning_rate": 9.4525488238167e-06, + "loss": 45.3488, + "step": 29120 + }, + { + "epoch": 0.11768888601591002, + "grad_norm": 622.1351928710938, + "learning_rate": 9.451906980263857e-06, + "loss": 40.4671, + "step": 29130 + }, + { + "epoch": 0.11772928728127766, + "grad_norm": 646.7392578125, + "learning_rate": 9.451264782493208e-06, + "loss": 51.3332, + "step": 29140 + }, + { + "epoch": 0.11776968854664528, + "grad_norm": 364.55999755859375, + "learning_rate": 9.450622230555849e-06, + "loss": 50.0434, + "step": 29150 + }, + { + "epoch": 0.11781008981201291, + "grad_norm": 586.4746704101562, + "learning_rate": 9.449979324502905e-06, + "loss": 33.2186, + "step": 29160 + }, + { + "epoch": 0.11785049107738055, + "grad_norm": 899.0317993164062, + "learning_rate": 9.449336064385529e-06, + "loss": 62.2745, + "step": 29170 + }, + { + "epoch": 0.11789089234274817, + "grad_norm": 466.4114990234375, + "learning_rate": 9.4486924502549e-06, + "loss": 57.8692, + "step": 29180 + }, + { + "epoch": 0.1179312936081158, + "grad_norm": 708.7315063476562, + "learning_rate": 9.448048482162231e-06, + "loss": 109.0333, + "step": 29190 + }, + { + "epoch": 0.11797169487348344, + "grad_norm": 617.0620727539062, + "learning_rate": 9.447404160158758e-06, + "loss": 37.3076, + "step": 29200 + }, + { + "epoch": 0.11801209613885107, + "grad_norm": 1074.2401123046875, + "learning_rate": 9.446759484295745e-06, + "loss": 63.679, + "step": 29210 + }, + { + "epoch": 0.1180524974042187, + "grad_norm": 859.2569580078125, + "learning_rate": 9.44611445462449e-06, + "loss": 47.2913, + "step": 29220 + }, + { + "epoch": 0.11809289866958633, + "grad_norm": 357.2909851074219, + "learning_rate": 9.445469071196312e-06, + "loss": 63.1842, + "step": 29230 + }, + { + "epoch": 0.11813329993495396, + "grad_norm": 916.20068359375, + "learning_rate": 9.444823334062562e-06, + "loss": 60.4991, + "step": 29240 + }, + { + "epoch": 0.1181737012003216, + "grad_norm": 238.49969482421875, + "learning_rate": 9.444177243274619e-06, + "loss": 62.8598, + "step": 29250 + }, + { + "epoch": 0.11821410246568922, + "grad_norm": 761.7315063476562, + "learning_rate": 9.443530798883887e-06, + "loss": 39.2142, + "step": 29260 + }, + { + "epoch": 0.11825450373105685, + "grad_norm": 2124.98779296875, + "learning_rate": 9.442884000941803e-06, + "loss": 51.3082, + "step": 29270 + }, + { + "epoch": 0.11829490499642449, + "grad_norm": 922.1412353515625, + "learning_rate": 9.44223684949983e-06, + "loss": 51.5363, + "step": 29280 + }, + { + "epoch": 0.11833530626179212, + "grad_norm": 718.6259155273438, + "learning_rate": 9.441589344609457e-06, + "loss": 65.476, + "step": 29290 + }, + { + "epoch": 0.11837570752715976, + "grad_norm": 972.5775146484375, + "learning_rate": 9.440941486322205e-06, + "loss": 65.9795, + "step": 29300 + }, + { + "epoch": 0.11841610879252738, + "grad_norm": 566.7423706054688, + "learning_rate": 9.44029327468962e-06, + "loss": 45.4314, + "step": 29310 + }, + { + "epoch": 0.11845651005789501, + "grad_norm": 269.9037780761719, + "learning_rate": 9.439644709763276e-06, + "loss": 57.1088, + "step": 29320 + }, + { + "epoch": 0.11849691132326265, + "grad_norm": 1288.29150390625, + "learning_rate": 9.43899579159478e-06, + "loss": 40.9538, + "step": 29330 + }, + { + "epoch": 0.11853731258863028, + "grad_norm": 2228.189453125, + "learning_rate": 9.438346520235759e-06, + "loss": 73.1681, + "step": 29340 + }, + { + "epoch": 0.1185777138539979, + "grad_norm": 754.686767578125, + "learning_rate": 9.437696895737876e-06, + "loss": 80.8364, + "step": 29350 + }, + { + "epoch": 0.11861811511936554, + "grad_norm": 423.5769958496094, + "learning_rate": 9.437046918152817e-06, + "loss": 43.7477, + "step": 29360 + }, + { + "epoch": 0.11865851638473317, + "grad_norm": 539.15234375, + "learning_rate": 9.436396587532297e-06, + "loss": 44.1189, + "step": 29370 + }, + { + "epoch": 0.11869891765010081, + "grad_norm": 809.685791015625, + "learning_rate": 9.435745903928062e-06, + "loss": 50.4817, + "step": 29380 + }, + { + "epoch": 0.11873931891546843, + "grad_norm": 692.5665283203125, + "learning_rate": 9.435094867391881e-06, + "loss": 58.9802, + "step": 29390 + }, + { + "epoch": 0.11877972018083606, + "grad_norm": 363.3084411621094, + "learning_rate": 9.434443477975557e-06, + "loss": 38.1004, + "step": 29400 + }, + { + "epoch": 0.1188201214462037, + "grad_norm": 835.03515625, + "learning_rate": 9.433791735730917e-06, + "loss": 44.2224, + "step": 29410 + }, + { + "epoch": 0.11886052271157133, + "grad_norm": 734.1571044921875, + "learning_rate": 9.433139640709817e-06, + "loss": 53.3648, + "step": 29420 + }, + { + "epoch": 0.11890092397693895, + "grad_norm": 669.4501953125, + "learning_rate": 9.432487192964142e-06, + "loss": 58.705, + "step": 29430 + }, + { + "epoch": 0.11894132524230659, + "grad_norm": 521.8321533203125, + "learning_rate": 9.431834392545803e-06, + "loss": 58.5444, + "step": 29440 + }, + { + "epoch": 0.11898172650767422, + "grad_norm": 370.6477355957031, + "learning_rate": 9.43118123950674e-06, + "loss": 46.7026, + "step": 29450 + }, + { + "epoch": 0.11902212777304186, + "grad_norm": 558.437255859375, + "learning_rate": 9.430527733898922e-06, + "loss": 47.0624, + "step": 29460 + }, + { + "epoch": 0.11906252903840948, + "grad_norm": 691.9668579101562, + "learning_rate": 9.429873875774344e-06, + "loss": 46.8082, + "step": 29470 + }, + { + "epoch": 0.11910293030377711, + "grad_norm": 739.7124633789062, + "learning_rate": 9.429219665185034e-06, + "loss": 57.6623, + "step": 29480 + }, + { + "epoch": 0.11914333156914475, + "grad_norm": 1001.8974609375, + "learning_rate": 9.428565102183043e-06, + "loss": 46.2761, + "step": 29490 + }, + { + "epoch": 0.11918373283451238, + "grad_norm": 427.9935607910156, + "learning_rate": 9.42791018682045e-06, + "loss": 78.2117, + "step": 29500 + }, + { + "epoch": 0.11922413409988, + "grad_norm": 1094.6697998046875, + "learning_rate": 9.427254919149367e-06, + "loss": 68.6234, + "step": 29510 + }, + { + "epoch": 0.11926453536524764, + "grad_norm": 436.7558898925781, + "learning_rate": 9.426599299221925e-06, + "loss": 45.9419, + "step": 29520 + }, + { + "epoch": 0.11930493663061527, + "grad_norm": 570.2569580078125, + "learning_rate": 9.425943327090295e-06, + "loss": 61.1487, + "step": 29530 + }, + { + "epoch": 0.11934533789598291, + "grad_norm": 1047.4046630859375, + "learning_rate": 9.425287002806666e-06, + "loss": 60.9063, + "step": 29540 + }, + { + "epoch": 0.11938573916135053, + "grad_norm": 661.2189331054688, + "learning_rate": 9.42463032642326e-06, + "loss": 44.0193, + "step": 29550 + }, + { + "epoch": 0.11942614042671816, + "grad_norm": 543.251708984375, + "learning_rate": 9.423973297992324e-06, + "loss": 41.5051, + "step": 29560 + }, + { + "epoch": 0.1194665416920858, + "grad_norm": 870.4523315429688, + "learning_rate": 9.423315917566137e-06, + "loss": 47.8213, + "step": 29570 + }, + { + "epoch": 0.11950694295745343, + "grad_norm": 733.741943359375, + "learning_rate": 9.422658185197002e-06, + "loss": 44.6389, + "step": 29580 + }, + { + "epoch": 0.11954734422282105, + "grad_norm": 424.02923583984375, + "learning_rate": 9.422000100937253e-06, + "loss": 53.1652, + "step": 29590 + }, + { + "epoch": 0.1195877454881887, + "grad_norm": 812.8602905273438, + "learning_rate": 9.42134166483925e-06, + "loss": 36.4047, + "step": 29600 + }, + { + "epoch": 0.11962814675355632, + "grad_norm": 1044.6739501953125, + "learning_rate": 9.420682876955382e-06, + "loss": 63.9026, + "step": 29610 + }, + { + "epoch": 0.11966854801892396, + "grad_norm": 752.5007934570312, + "learning_rate": 9.420023737338065e-06, + "loss": 54.0017, + "step": 29620 + }, + { + "epoch": 0.11970894928429159, + "grad_norm": 322.84503173828125, + "learning_rate": 9.419364246039745e-06, + "loss": 60.8512, + "step": 29630 + }, + { + "epoch": 0.11974935054965921, + "grad_norm": 407.95428466796875, + "learning_rate": 9.418704403112894e-06, + "loss": 31.9523, + "step": 29640 + }, + { + "epoch": 0.11978975181502685, + "grad_norm": 855.3703002929688, + "learning_rate": 9.418044208610013e-06, + "loss": 85.5561, + "step": 29650 + }, + { + "epoch": 0.11983015308039448, + "grad_norm": 801.6728515625, + "learning_rate": 9.41738366258363e-06, + "loss": 40.399, + "step": 29660 + }, + { + "epoch": 0.1198705543457621, + "grad_norm": 874.2803344726562, + "learning_rate": 9.416722765086304e-06, + "loss": 50.6003, + "step": 29670 + }, + { + "epoch": 0.11991095561112974, + "grad_norm": 1156.8819580078125, + "learning_rate": 9.416061516170615e-06, + "loss": 49.4536, + "step": 29680 + }, + { + "epoch": 0.11995135687649737, + "grad_norm": 627.3614501953125, + "learning_rate": 9.415399915889179e-06, + "loss": 45.6941, + "step": 29690 + }, + { + "epoch": 0.11999175814186501, + "grad_norm": 559.5956420898438, + "learning_rate": 9.414737964294636e-06, + "loss": 56.8244, + "step": 29700 + }, + { + "epoch": 0.12003215940723264, + "grad_norm": 653.4930419921875, + "learning_rate": 9.414075661439653e-06, + "loss": 57.2013, + "step": 29710 + }, + { + "epoch": 0.12007256067260026, + "grad_norm": 3804.873291015625, + "learning_rate": 9.413413007376928e-06, + "loss": 67.4335, + "step": 29720 + }, + { + "epoch": 0.1201129619379679, + "grad_norm": 703.4122924804688, + "learning_rate": 9.412750002159186e-06, + "loss": 60.5386, + "step": 29730 + }, + { + "epoch": 0.12015336320333553, + "grad_norm": 396.68994140625, + "learning_rate": 9.412086645839177e-06, + "loss": 50.3618, + "step": 29740 + }, + { + "epoch": 0.12019376446870315, + "grad_norm": 1265.5760498046875, + "learning_rate": 9.411422938469683e-06, + "loss": 47.9156, + "step": 29750 + }, + { + "epoch": 0.1202341657340708, + "grad_norm": 402.34283447265625, + "learning_rate": 9.41075888010351e-06, + "loss": 51.1713, + "step": 29760 + }, + { + "epoch": 0.12027456699943842, + "grad_norm": 712.1184692382812, + "learning_rate": 9.410094470793497e-06, + "loss": 52.6493, + "step": 29770 + }, + { + "epoch": 0.12031496826480606, + "grad_norm": 632.7574462890625, + "learning_rate": 9.409429710592505e-06, + "loss": 46.334, + "step": 29780 + }, + { + "epoch": 0.12035536953017369, + "grad_norm": 186.35232543945312, + "learning_rate": 9.408764599553429e-06, + "loss": 43.2821, + "step": 29790 + }, + { + "epoch": 0.12039577079554131, + "grad_norm": 747.8989868164062, + "learning_rate": 9.408099137729188e-06, + "loss": 45.6112, + "step": 29800 + }, + { + "epoch": 0.12043617206090895, + "grad_norm": 1416.9267578125, + "learning_rate": 9.407433325172727e-06, + "loss": 39.188, + "step": 29810 + }, + { + "epoch": 0.12047657332627658, + "grad_norm": 594.1754760742188, + "learning_rate": 9.406767161937025e-06, + "loss": 47.1414, + "step": 29820 + }, + { + "epoch": 0.1205169745916442, + "grad_norm": 420.6256103515625, + "learning_rate": 9.406100648075084e-06, + "loss": 47.8546, + "step": 29830 + }, + { + "epoch": 0.12055737585701184, + "grad_norm": 911.7869262695312, + "learning_rate": 9.405433783639936e-06, + "loss": 43.1674, + "step": 29840 + }, + { + "epoch": 0.12059777712237947, + "grad_norm": 920.2177124023438, + "learning_rate": 9.40476656868464e-06, + "loss": 44.3151, + "step": 29850 + }, + { + "epoch": 0.12063817838774711, + "grad_norm": 648.54833984375, + "learning_rate": 9.404099003262282e-06, + "loss": 35.8031, + "step": 29860 + }, + { + "epoch": 0.12067857965311474, + "grad_norm": 658.5719604492188, + "learning_rate": 9.40343108742598e-06, + "loss": 46.2772, + "step": 29870 + }, + { + "epoch": 0.12071898091848236, + "grad_norm": 662.2303466796875, + "learning_rate": 9.402762821228875e-06, + "loss": 45.1511, + "step": 29880 + }, + { + "epoch": 0.12075938218385, + "grad_norm": 829.085205078125, + "learning_rate": 9.402094204724138e-06, + "loss": 56.8606, + "step": 29890 + }, + { + "epoch": 0.12079978344921763, + "grad_norm": 356.94500732421875, + "learning_rate": 9.401425237964966e-06, + "loss": 64.053, + "step": 29900 + }, + { + "epoch": 0.12084018471458526, + "grad_norm": 726.9462890625, + "learning_rate": 9.400755921004592e-06, + "loss": 55.208, + "step": 29910 + }, + { + "epoch": 0.1208805859799529, + "grad_norm": 389.7232666015625, + "learning_rate": 9.400086253896264e-06, + "loss": 54.952, + "step": 29920 + }, + { + "epoch": 0.12092098724532052, + "grad_norm": 2380.118896484375, + "learning_rate": 9.399416236693264e-06, + "loss": 80.5505, + "step": 29930 + }, + { + "epoch": 0.12096138851068816, + "grad_norm": 451.5255432128906, + "learning_rate": 9.398745869448909e-06, + "loss": 44.9507, + "step": 29940 + }, + { + "epoch": 0.12100178977605579, + "grad_norm": 728.6965942382812, + "learning_rate": 9.39807515221653e-06, + "loss": 76.1519, + "step": 29950 + }, + { + "epoch": 0.12104219104142341, + "grad_norm": 1228.5821533203125, + "learning_rate": 9.397404085049496e-06, + "loss": 64.1188, + "step": 29960 + }, + { + "epoch": 0.12108259230679105, + "grad_norm": 668.8536987304688, + "learning_rate": 9.3967326680012e-06, + "loss": 56.7463, + "step": 29970 + }, + { + "epoch": 0.12112299357215868, + "grad_norm": 1020.1240234375, + "learning_rate": 9.396060901125064e-06, + "loss": 52.0907, + "step": 29980 + }, + { + "epoch": 0.1211633948375263, + "grad_norm": 1008.449951171875, + "learning_rate": 9.395388784474538e-06, + "loss": 38.816, + "step": 29990 + }, + { + "epoch": 0.12120379610289395, + "grad_norm": 828.15234375, + "learning_rate": 9.394716318103098e-06, + "loss": 67.8823, + "step": 30000 + }, + { + "epoch": 0.12124419736826157, + "grad_norm": 242.62425231933594, + "learning_rate": 9.394043502064249e-06, + "loss": 47.3324, + "step": 30010 + }, + { + "epoch": 0.12128459863362921, + "grad_norm": 615.1395263671875, + "learning_rate": 9.393370336411527e-06, + "loss": 63.4642, + "step": 30020 + }, + { + "epoch": 0.12132499989899684, + "grad_norm": 610.5171508789062, + "learning_rate": 9.392696821198488e-06, + "loss": 53.2339, + "step": 30030 + }, + { + "epoch": 0.12136540116436446, + "grad_norm": 685.2667846679688, + "learning_rate": 9.392022956478724e-06, + "loss": 47.9081, + "step": 30040 + }, + { + "epoch": 0.1214058024297321, + "grad_norm": 585.4192504882812, + "learning_rate": 9.391348742305849e-06, + "loss": 66.8327, + "step": 30050 + }, + { + "epoch": 0.12144620369509973, + "grad_norm": 737.0886840820312, + "learning_rate": 9.390674178733508e-06, + "loss": 51.1965, + "step": 30060 + }, + { + "epoch": 0.12148660496046736, + "grad_norm": 450.2093505859375, + "learning_rate": 9.389999265815373e-06, + "loss": 59.4211, + "step": 30070 + }, + { + "epoch": 0.121527006225835, + "grad_norm": 134.6776885986328, + "learning_rate": 9.389324003605144e-06, + "loss": 67.8788, + "step": 30080 + }, + { + "epoch": 0.12156740749120262, + "grad_norm": 995.7404174804688, + "learning_rate": 9.388648392156547e-06, + "loss": 49.0113, + "step": 30090 + }, + { + "epoch": 0.12160780875657026, + "grad_norm": 561.1322021484375, + "learning_rate": 9.387972431523341e-06, + "loss": 39.4004, + "step": 30100 + }, + { + "epoch": 0.12164821002193789, + "grad_norm": 746.4854125976562, + "learning_rate": 9.387296121759305e-06, + "loss": 45.6675, + "step": 30110 + }, + { + "epoch": 0.12168861128730551, + "grad_norm": 412.273193359375, + "learning_rate": 9.386619462918254e-06, + "loss": 47.4065, + "step": 30120 + }, + { + "epoch": 0.12172901255267315, + "grad_norm": 2498.973876953125, + "learning_rate": 9.385942455054022e-06, + "loss": 54.9776, + "step": 30130 + }, + { + "epoch": 0.12176941381804078, + "grad_norm": 584.4258422851562, + "learning_rate": 9.385265098220478e-06, + "loss": 47.1616, + "step": 30140 + }, + { + "epoch": 0.12180981508340841, + "grad_norm": 768.3419799804688, + "learning_rate": 9.384587392471516e-06, + "loss": 50.8677, + "step": 30150 + }, + { + "epoch": 0.12185021634877605, + "grad_norm": 722.3310546875, + "learning_rate": 9.383909337861058e-06, + "loss": 68.8301, + "step": 30160 + }, + { + "epoch": 0.12189061761414367, + "grad_norm": 347.5932922363281, + "learning_rate": 9.383230934443053e-06, + "loss": 51.4054, + "step": 30170 + }, + { + "epoch": 0.12193101887951131, + "grad_norm": 373.2593688964844, + "learning_rate": 9.382552182271478e-06, + "loss": 49.6567, + "step": 30180 + }, + { + "epoch": 0.12197142014487894, + "grad_norm": 1203.6734619140625, + "learning_rate": 9.38187308140034e-06, + "loss": 56.8176, + "step": 30190 + }, + { + "epoch": 0.12201182141024657, + "grad_norm": 727.6742553710938, + "learning_rate": 9.381193631883672e-06, + "loss": 39.6272, + "step": 30200 + }, + { + "epoch": 0.1220522226756142, + "grad_norm": 619.8937377929688, + "learning_rate": 9.380513833775531e-06, + "loss": 44.3137, + "step": 30210 + }, + { + "epoch": 0.12209262394098183, + "grad_norm": 540.3925170898438, + "learning_rate": 9.37983368713001e-06, + "loss": 43.6342, + "step": 30220 + }, + { + "epoch": 0.12213302520634946, + "grad_norm": 5220.17626953125, + "learning_rate": 9.379153192001223e-06, + "loss": 59.757, + "step": 30230 + }, + { + "epoch": 0.1221734264717171, + "grad_norm": 963.0521850585938, + "learning_rate": 9.378472348443315e-06, + "loss": 34.9574, + "step": 30240 + }, + { + "epoch": 0.12221382773708472, + "grad_norm": 399.6465759277344, + "learning_rate": 9.377791156510456e-06, + "loss": 35.0663, + "step": 30250 + }, + { + "epoch": 0.12225422900245236, + "grad_norm": 524.9255981445312, + "learning_rate": 9.377109616256846e-06, + "loss": 53.069, + "step": 30260 + }, + { + "epoch": 0.12229463026781999, + "grad_norm": 573.6826782226562, + "learning_rate": 9.37642772773671e-06, + "loss": 31.281, + "step": 30270 + }, + { + "epoch": 0.12233503153318762, + "grad_norm": 430.6109313964844, + "learning_rate": 9.375745491004307e-06, + "loss": 35.5072, + "step": 30280 + }, + { + "epoch": 0.12237543279855526, + "grad_norm": 309.88275146484375, + "learning_rate": 9.375062906113916e-06, + "loss": 47.0233, + "step": 30290 + }, + { + "epoch": 0.12241583406392288, + "grad_norm": 873.5589599609375, + "learning_rate": 9.37437997311985e-06, + "loss": 49.2769, + "step": 30300 + }, + { + "epoch": 0.12245623532929051, + "grad_norm": 958.1603393554688, + "learning_rate": 9.373696692076446e-06, + "loss": 64.9906, + "step": 30310 + }, + { + "epoch": 0.12249663659465815, + "grad_norm": 935.3135375976562, + "learning_rate": 9.373013063038066e-06, + "loss": 46.1228, + "step": 30320 + }, + { + "epoch": 0.12253703786002577, + "grad_norm": 632.3682861328125, + "learning_rate": 9.372329086059108e-06, + "loss": 61.2088, + "step": 30330 + }, + { + "epoch": 0.12257743912539341, + "grad_norm": 914.8970336914062, + "learning_rate": 9.37164476119399e-06, + "loss": 58.5277, + "step": 30340 + }, + { + "epoch": 0.12261784039076104, + "grad_norm": 900.7513427734375, + "learning_rate": 9.370960088497162e-06, + "loss": 45.0407, + "step": 30350 + }, + { + "epoch": 0.12265824165612867, + "grad_norm": 491.3097229003906, + "learning_rate": 9.370275068023097e-06, + "loss": 64.0002, + "step": 30360 + }, + { + "epoch": 0.1226986429214963, + "grad_norm": 579.8344116210938, + "learning_rate": 9.369589699826306e-06, + "loss": 60.9695, + "step": 30370 + }, + { + "epoch": 0.12273904418686393, + "grad_norm": 410.3919677734375, + "learning_rate": 9.368903983961315e-06, + "loss": 43.6325, + "step": 30380 + }, + { + "epoch": 0.12277944545223156, + "grad_norm": 201.71817016601562, + "learning_rate": 9.368217920482684e-06, + "loss": 53.0613, + "step": 30390 + }, + { + "epoch": 0.1228198467175992, + "grad_norm": 408.74169921875, + "learning_rate": 9.367531509445001e-06, + "loss": 55.3284, + "step": 30400 + }, + { + "epoch": 0.12286024798296682, + "grad_norm": 355.0393981933594, + "learning_rate": 9.366844750902878e-06, + "loss": 43.9765, + "step": 30410 + }, + { + "epoch": 0.12290064924833445, + "grad_norm": 513.59765625, + "learning_rate": 9.36615764491096e-06, + "loss": 34.4743, + "step": 30420 + }, + { + "epoch": 0.12294105051370209, + "grad_norm": 894.3479614257812, + "learning_rate": 9.365470191523917e-06, + "loss": 41.8912, + "step": 30430 + }, + { + "epoch": 0.12298145177906972, + "grad_norm": 601.699462890625, + "learning_rate": 9.364782390796446e-06, + "loss": 49.0976, + "step": 30440 + }, + { + "epoch": 0.12302185304443736, + "grad_norm": 399.9588317871094, + "learning_rate": 9.364094242783272e-06, + "loss": 34.56, + "step": 30450 + }, + { + "epoch": 0.12306225430980498, + "grad_norm": 1018.5534057617188, + "learning_rate": 9.363405747539147e-06, + "loss": 50.4698, + "step": 30460 + }, + { + "epoch": 0.12310265557517261, + "grad_norm": 898.0135498046875, + "learning_rate": 9.362716905118851e-06, + "loss": 53.0371, + "step": 30470 + }, + { + "epoch": 0.12314305684054025, + "grad_norm": 941.5891723632812, + "learning_rate": 9.362027715577195e-06, + "loss": 57.5847, + "step": 30480 + }, + { + "epoch": 0.12318345810590788, + "grad_norm": 507.96929931640625, + "learning_rate": 9.361338178969012e-06, + "loss": 60.7366, + "step": 30490 + }, + { + "epoch": 0.1232238593712755, + "grad_norm": 668.412353515625, + "learning_rate": 9.360648295349165e-06, + "loss": 59.3322, + "step": 30500 + }, + { + "epoch": 0.12326426063664314, + "grad_norm": 1075.7337646484375, + "learning_rate": 9.359958064772547e-06, + "loss": 59.0285, + "step": 30510 + }, + { + "epoch": 0.12330466190201077, + "grad_norm": 760.0520629882812, + "learning_rate": 9.359267487294075e-06, + "loss": 51.6215, + "step": 30520 + }, + { + "epoch": 0.12334506316737841, + "grad_norm": 1032.9842529296875, + "learning_rate": 9.358576562968695e-06, + "loss": 45.0181, + "step": 30530 + }, + { + "epoch": 0.12338546443274603, + "grad_norm": 369.5265197753906, + "learning_rate": 9.357885291851382e-06, + "loss": 50.1647, + "step": 30540 + }, + { + "epoch": 0.12342586569811366, + "grad_norm": 518.7932739257812, + "learning_rate": 9.357193673997133e-06, + "loss": 58.2824, + "step": 30550 + }, + { + "epoch": 0.1234662669634813, + "grad_norm": 828.255859375, + "learning_rate": 9.356501709460984e-06, + "loss": 39.197, + "step": 30560 + }, + { + "epoch": 0.12350666822884893, + "grad_norm": 1110.9344482421875, + "learning_rate": 9.355809398297986e-06, + "loss": 52.1149, + "step": 30570 + }, + { + "epoch": 0.12354706949421655, + "grad_norm": 973.9660034179688, + "learning_rate": 9.355116740563225e-06, + "loss": 37.8704, + "step": 30580 + }, + { + "epoch": 0.12358747075958419, + "grad_norm": 919.6453247070312, + "learning_rate": 9.354423736311813e-06, + "loss": 43.1774, + "step": 30590 + }, + { + "epoch": 0.12362787202495182, + "grad_norm": 295.9122009277344, + "learning_rate": 9.353730385598887e-06, + "loss": 49.3843, + "step": 30600 + }, + { + "epoch": 0.12366827329031946, + "grad_norm": 513.4649047851562, + "learning_rate": 9.353036688479615e-06, + "loss": 57.7642, + "step": 30610 + }, + { + "epoch": 0.12370867455568708, + "grad_norm": 777.9099731445312, + "learning_rate": 9.352342645009193e-06, + "loss": 55.8895, + "step": 30620 + }, + { + "epoch": 0.12374907582105471, + "grad_norm": 917.2174072265625, + "learning_rate": 9.35164825524284e-06, + "loss": 59.0916, + "step": 30630 + }, + { + "epoch": 0.12378947708642235, + "grad_norm": 1081.7359619140625, + "learning_rate": 9.350953519235807e-06, + "loss": 52.1535, + "step": 30640 + }, + { + "epoch": 0.12382987835178998, + "grad_norm": 960.4583740234375, + "learning_rate": 9.35025843704337e-06, + "loss": 46.3987, + "step": 30650 + }, + { + "epoch": 0.1238702796171576, + "grad_norm": 569.8470458984375, + "learning_rate": 9.349563008720836e-06, + "loss": 42.4655, + "step": 30660 + }, + { + "epoch": 0.12391068088252524, + "grad_norm": 456.7442932128906, + "learning_rate": 9.348867234323534e-06, + "loss": 35.9404, + "step": 30670 + }, + { + "epoch": 0.12395108214789287, + "grad_norm": 580.541748046875, + "learning_rate": 9.348171113906826e-06, + "loss": 40.7233, + "step": 30680 + }, + { + "epoch": 0.12399148341326051, + "grad_norm": 542.5335083007812, + "learning_rate": 9.347474647526095e-06, + "loss": 63.2249, + "step": 30690 + }, + { + "epoch": 0.12403188467862813, + "grad_norm": 822.7742919921875, + "learning_rate": 9.34677783523676e-06, + "loss": 46.3272, + "step": 30700 + }, + { + "epoch": 0.12407228594399576, + "grad_norm": 1118.610595703125, + "learning_rate": 9.346080677094262e-06, + "loss": 42.0597, + "step": 30710 + }, + { + "epoch": 0.1241126872093634, + "grad_norm": 1029.7388916015625, + "learning_rate": 9.345383173154072e-06, + "loss": 45.3306, + "step": 30720 + }, + { + "epoch": 0.12415308847473103, + "grad_norm": 314.6160583496094, + "learning_rate": 9.344685323471682e-06, + "loss": 24.6263, + "step": 30730 + }, + { + "epoch": 0.12419348974009865, + "grad_norm": 2012.316650390625, + "learning_rate": 9.343987128102624e-06, + "loss": 65.5848, + "step": 30740 + }, + { + "epoch": 0.1242338910054663, + "grad_norm": 1248.8795166015625, + "learning_rate": 9.343288587102444e-06, + "loss": 42.0409, + "step": 30750 + }, + { + "epoch": 0.12427429227083392, + "grad_norm": 509.92193603515625, + "learning_rate": 9.342589700526725e-06, + "loss": 36.0369, + "step": 30760 + }, + { + "epoch": 0.12431469353620156, + "grad_norm": 649.0388793945312, + "learning_rate": 9.341890468431072e-06, + "loss": 41.3814, + "step": 30770 + }, + { + "epoch": 0.12435509480156919, + "grad_norm": 743.8023681640625, + "learning_rate": 9.341190890871123e-06, + "loss": 47.9775, + "step": 30780 + }, + { + "epoch": 0.12439549606693681, + "grad_norm": 1114.2352294921875, + "learning_rate": 9.340490967902535e-06, + "loss": 50.7062, + "step": 30790 + }, + { + "epoch": 0.12443589733230445, + "grad_norm": 940.1904907226562, + "learning_rate": 9.339790699581004e-06, + "loss": 63.2531, + "step": 30800 + }, + { + "epoch": 0.12447629859767208, + "grad_norm": 1059.23193359375, + "learning_rate": 9.339090085962244e-06, + "loss": 39.3221, + "step": 30810 + }, + { + "epoch": 0.1245166998630397, + "grad_norm": 397.91900634765625, + "learning_rate": 9.338389127101998e-06, + "loss": 44.4031, + "step": 30820 + }, + { + "epoch": 0.12455710112840734, + "grad_norm": 765.754638671875, + "learning_rate": 9.337687823056041e-06, + "loss": 44.932, + "step": 30830 + }, + { + "epoch": 0.12459750239377497, + "grad_norm": 554.5352172851562, + "learning_rate": 9.336986173880169e-06, + "loss": 54.2205, + "step": 30840 + }, + { + "epoch": 0.12463790365914261, + "grad_norm": 546.3801879882812, + "learning_rate": 9.336284179630215e-06, + "loss": 37.6779, + "step": 30850 + }, + { + "epoch": 0.12467830492451024, + "grad_norm": 84.68031311035156, + "learning_rate": 9.335581840362026e-06, + "loss": 53.455, + "step": 30860 + }, + { + "epoch": 0.12471870618987786, + "grad_norm": 656.2481689453125, + "learning_rate": 9.33487915613149e-06, + "loss": 42.2726, + "step": 30870 + }, + { + "epoch": 0.1247591074552455, + "grad_norm": 810.590087890625, + "learning_rate": 9.334176126994512e-06, + "loss": 40.2841, + "step": 30880 + }, + { + "epoch": 0.12479950872061313, + "grad_norm": 514.6560668945312, + "learning_rate": 9.333472753007031e-06, + "loss": 91.8756, + "step": 30890 + }, + { + "epoch": 0.12483990998598075, + "grad_norm": 838.5552368164062, + "learning_rate": 9.332769034225012e-06, + "loss": 38.2255, + "step": 30900 + }, + { + "epoch": 0.1248803112513484, + "grad_norm": 335.7632141113281, + "learning_rate": 9.332064970704445e-06, + "loss": 49.4308, + "step": 30910 + }, + { + "epoch": 0.12492071251671602, + "grad_norm": 540.861572265625, + "learning_rate": 9.33136056250135e-06, + "loss": 39.3426, + "step": 30920 + }, + { + "epoch": 0.12496111378208366, + "grad_norm": 622.8423461914062, + "learning_rate": 9.330655809671773e-06, + "loss": 47.4215, + "step": 30930 + }, + { + "epoch": 0.12500151504745127, + "grad_norm": 360.4191589355469, + "learning_rate": 9.32995071227179e-06, + "loss": 50.474, + "step": 30940 + }, + { + "epoch": 0.1250419163128189, + "grad_norm": 407.1855773925781, + "learning_rate": 9.3292452703575e-06, + "loss": 39.8224, + "step": 30950 + }, + { + "epoch": 0.12508231757818655, + "grad_norm": 773.7116088867188, + "learning_rate": 9.328539483985031e-06, + "loss": 110.1445, + "step": 30960 + }, + { + "epoch": 0.1251227188435542, + "grad_norm": 305.62103271484375, + "learning_rate": 9.327833353210541e-06, + "loss": 53.3745, + "step": 30970 + }, + { + "epoch": 0.1251631201089218, + "grad_norm": 477.38482666015625, + "learning_rate": 9.327126878090214e-06, + "loss": 42.7223, + "step": 30980 + }, + { + "epoch": 0.12520352137428944, + "grad_norm": 635.5834350585938, + "learning_rate": 9.32642005868026e-06, + "loss": 47.8754, + "step": 30990 + }, + { + "epoch": 0.12524392263965708, + "grad_norm": 721.0254516601562, + "learning_rate": 9.325712895036916e-06, + "loss": 57.4999, + "step": 31000 + }, + { + "epoch": 0.1252843239050247, + "grad_norm": 443.795166015625, + "learning_rate": 9.32500538721645e-06, + "loss": 41.7979, + "step": 31010 + }, + { + "epoch": 0.12532472517039234, + "grad_norm": 863.4866333007812, + "learning_rate": 9.324297535275156e-06, + "loss": 60.9883, + "step": 31020 + }, + { + "epoch": 0.12536512643575998, + "grad_norm": 582.2689208984375, + "learning_rate": 9.323589339269352e-06, + "loss": 53.3317, + "step": 31030 + }, + { + "epoch": 0.1254055277011276, + "grad_norm": 799.9541625976562, + "learning_rate": 9.322880799255385e-06, + "loss": 73.9565, + "step": 31040 + }, + { + "epoch": 0.12544592896649523, + "grad_norm": 1019.82861328125, + "learning_rate": 9.322171915289635e-06, + "loss": 37.8343, + "step": 31050 + }, + { + "epoch": 0.12548633023186287, + "grad_norm": 759.5281372070312, + "learning_rate": 9.321462687428499e-06, + "loss": 47.9594, + "step": 31060 + }, + { + "epoch": 0.12552673149723048, + "grad_norm": 686.42822265625, + "learning_rate": 9.320753115728413e-06, + "loss": 49.3588, + "step": 31070 + }, + { + "epoch": 0.12556713276259812, + "grad_norm": 761.8621215820312, + "learning_rate": 9.320043200245829e-06, + "loss": 59.4951, + "step": 31080 + }, + { + "epoch": 0.12560753402796576, + "grad_norm": 1079.365234375, + "learning_rate": 9.319332941037235e-06, + "loss": 71.4847, + "step": 31090 + }, + { + "epoch": 0.12564793529333337, + "grad_norm": 302.6757507324219, + "learning_rate": 9.31862233815914e-06, + "loss": 51.828, + "step": 31100 + }, + { + "epoch": 0.125688336558701, + "grad_norm": 644.4342651367188, + "learning_rate": 9.317911391668087e-06, + "loss": 43.484, + "step": 31110 + }, + { + "epoch": 0.12572873782406865, + "grad_norm": 829.1041259765625, + "learning_rate": 9.317200101620641e-06, + "loss": 44.3326, + "step": 31120 + }, + { + "epoch": 0.1257691390894363, + "grad_norm": 398.044921875, + "learning_rate": 9.316488468073397e-06, + "loss": 50.6066, + "step": 31130 + }, + { + "epoch": 0.1258095403548039, + "grad_norm": 592.14208984375, + "learning_rate": 9.315776491082973e-06, + "loss": 52.6301, + "step": 31140 + }, + { + "epoch": 0.12584994162017155, + "grad_norm": 201.01861572265625, + "learning_rate": 9.315064170706023e-06, + "loss": 48.0798, + "step": 31150 + }, + { + "epoch": 0.12589034288553919, + "grad_norm": 738.100830078125, + "learning_rate": 9.31435150699922e-06, + "loss": 51.338, + "step": 31160 + }, + { + "epoch": 0.1259307441509068, + "grad_norm": 756.7727661132812, + "learning_rate": 9.313638500019267e-06, + "loss": 54.9391, + "step": 31170 + }, + { + "epoch": 0.12597114541627444, + "grad_norm": 287.8023986816406, + "learning_rate": 9.312925149822895e-06, + "loss": 28.0471, + "step": 31180 + }, + { + "epoch": 0.12601154668164208, + "grad_norm": 723.8349609375, + "learning_rate": 9.312211456466862e-06, + "loss": 36.3156, + "step": 31190 + }, + { + "epoch": 0.1260519479470097, + "grad_norm": 878.0177612304688, + "learning_rate": 9.311497420007955e-06, + "loss": 61.6736, + "step": 31200 + }, + { + "epoch": 0.12609234921237733, + "grad_norm": 1783.608154296875, + "learning_rate": 9.310783040502987e-06, + "loss": 45.5743, + "step": 31210 + }, + { + "epoch": 0.12613275047774497, + "grad_norm": 1379.0848388671875, + "learning_rate": 9.310068318008794e-06, + "loss": 72.9638, + "step": 31220 + }, + { + "epoch": 0.12617315174311258, + "grad_norm": 477.46966552734375, + "learning_rate": 9.309353252582246e-06, + "loss": 45.4072, + "step": 31230 + }, + { + "epoch": 0.12621355300848022, + "grad_norm": 495.3040466308594, + "learning_rate": 9.308637844280236e-06, + "loss": 34.8518, + "step": 31240 + }, + { + "epoch": 0.12625395427384786, + "grad_norm": 834.5982055664062, + "learning_rate": 9.307922093159688e-06, + "loss": 53.4758, + "step": 31250 + }, + { + "epoch": 0.12629435553921547, + "grad_norm": 2230.323486328125, + "learning_rate": 9.30720599927755e-06, + "loss": 78.3272, + "step": 31260 + }, + { + "epoch": 0.12633475680458311, + "grad_norm": 433.3248291015625, + "learning_rate": 9.306489562690797e-06, + "loss": 35.0247, + "step": 31270 + }, + { + "epoch": 0.12637515806995075, + "grad_norm": 868.4600830078125, + "learning_rate": 9.305772783456435e-06, + "loss": 35.0469, + "step": 31280 + }, + { + "epoch": 0.1264155593353184, + "grad_norm": 406.2452697753906, + "learning_rate": 9.305055661631493e-06, + "loss": 76.8342, + "step": 31290 + }, + { + "epoch": 0.126455960600686, + "grad_norm": 443.42449951171875, + "learning_rate": 9.304338197273029e-06, + "loss": 39.4005, + "step": 31300 + }, + { + "epoch": 0.12649636186605365, + "grad_norm": 713.2732543945312, + "learning_rate": 9.303620390438128e-06, + "loss": 55.5563, + "step": 31310 + }, + { + "epoch": 0.1265367631314213, + "grad_norm": 710.0195922851562, + "learning_rate": 9.302902241183905e-06, + "loss": 45.8039, + "step": 31320 + }, + { + "epoch": 0.1265771643967889, + "grad_norm": 808.3165283203125, + "learning_rate": 9.302183749567498e-06, + "loss": 31.8709, + "step": 31330 + }, + { + "epoch": 0.12661756566215654, + "grad_norm": 558.5325927734375, + "learning_rate": 9.301464915646074e-06, + "loss": 40.9993, + "step": 31340 + }, + { + "epoch": 0.12665796692752418, + "grad_norm": 374.66131591796875, + "learning_rate": 9.30074573947683e-06, + "loss": 44.0041, + "step": 31350 + }, + { + "epoch": 0.1266983681928918, + "grad_norm": 549.226318359375, + "learning_rate": 9.30002622111698e-06, + "loss": 44.0095, + "step": 31360 + }, + { + "epoch": 0.12673876945825943, + "grad_norm": 198.41749572753906, + "learning_rate": 9.299306360623782e-06, + "loss": 51.1259, + "step": 31370 + }, + { + "epoch": 0.12677917072362707, + "grad_norm": 907.3374633789062, + "learning_rate": 9.298586158054508e-06, + "loss": 62.2416, + "step": 31380 + }, + { + "epoch": 0.12681957198899468, + "grad_norm": 1143.6590576171875, + "learning_rate": 9.297865613466459e-06, + "loss": 57.0977, + "step": 31390 + }, + { + "epoch": 0.12685997325436232, + "grad_norm": 301.8861389160156, + "learning_rate": 9.29714472691697e-06, + "loss": 37.5276, + "step": 31400 + }, + { + "epoch": 0.12690037451972996, + "grad_norm": 1299.40087890625, + "learning_rate": 9.296423498463396e-06, + "loss": 64.7403, + "step": 31410 + }, + { + "epoch": 0.12694077578509758, + "grad_norm": 624.0057983398438, + "learning_rate": 9.29570192816312e-06, + "loss": 52.6827, + "step": 31420 + }, + { + "epoch": 0.12698117705046522, + "grad_norm": 185.71693420410156, + "learning_rate": 9.29498001607356e-06, + "loss": 52.4407, + "step": 31430 + }, + { + "epoch": 0.12702157831583286, + "grad_norm": 527.6602783203125, + "learning_rate": 9.294257762252148e-06, + "loss": 37.781, + "step": 31440 + }, + { + "epoch": 0.1270619795812005, + "grad_norm": 1054.0302734375, + "learning_rate": 9.293535166756356e-06, + "loss": 66.2366, + "step": 31450 + }, + { + "epoch": 0.1271023808465681, + "grad_norm": 1055.7821044921875, + "learning_rate": 9.292812229643674e-06, + "loss": 45.0993, + "step": 31460 + }, + { + "epoch": 0.12714278211193575, + "grad_norm": 1077.979736328125, + "learning_rate": 9.292088950971624e-06, + "loss": 54.6229, + "step": 31470 + }, + { + "epoch": 0.1271831833773034, + "grad_norm": 678.5938110351562, + "learning_rate": 9.291365330797755e-06, + "loss": 47.38, + "step": 31480 + }, + { + "epoch": 0.127223584642671, + "grad_norm": 712.6802978515625, + "learning_rate": 9.290641369179643e-06, + "loss": 48.0058, + "step": 31490 + }, + { + "epoch": 0.12726398590803864, + "grad_norm": 626.124267578125, + "learning_rate": 9.289917066174887e-06, + "loss": 52.2015, + "step": 31500 + }, + { + "epoch": 0.12730438717340628, + "grad_norm": 573.7828979492188, + "learning_rate": 9.289192421841116e-06, + "loss": 70.4851, + "step": 31510 + }, + { + "epoch": 0.1273447884387739, + "grad_norm": 888.5181884765625, + "learning_rate": 9.288467436235992e-06, + "loss": 40.2558, + "step": 31520 + }, + { + "epoch": 0.12738518970414153, + "grad_norm": 995.8738403320312, + "learning_rate": 9.287742109417194e-06, + "loss": 48.9089, + "step": 31530 + }, + { + "epoch": 0.12742559096950917, + "grad_norm": 302.162353515625, + "learning_rate": 9.287016441442435e-06, + "loss": 60.6778, + "step": 31540 + }, + { + "epoch": 0.12746599223487678, + "grad_norm": 576.8596801757812, + "learning_rate": 9.28629043236945e-06, + "loss": 48.8662, + "step": 31550 + }, + { + "epoch": 0.12750639350024442, + "grad_norm": 474.55120849609375, + "learning_rate": 9.285564082256011e-06, + "loss": 53.2105, + "step": 31560 + }, + { + "epoch": 0.12754679476561206, + "grad_norm": 593.5822143554688, + "learning_rate": 9.284837391159904e-06, + "loss": 61.1313, + "step": 31570 + }, + { + "epoch": 0.12758719603097968, + "grad_norm": 434.08734130859375, + "learning_rate": 9.284110359138951e-06, + "loss": 51.8877, + "step": 31580 + }, + { + "epoch": 0.12762759729634732, + "grad_norm": 1331.6153564453125, + "learning_rate": 9.283382986250997e-06, + "loss": 52.297, + "step": 31590 + }, + { + "epoch": 0.12766799856171496, + "grad_norm": 464.4027099609375, + "learning_rate": 9.282655272553917e-06, + "loss": 39.4069, + "step": 31600 + }, + { + "epoch": 0.1277083998270826, + "grad_norm": 354.9674377441406, + "learning_rate": 9.281927218105613e-06, + "loss": 48.6394, + "step": 31610 + }, + { + "epoch": 0.1277488010924502, + "grad_norm": 1142.4324951171875, + "learning_rate": 9.281198822964011e-06, + "loss": 54.2571, + "step": 31620 + }, + { + "epoch": 0.12778920235781785, + "grad_norm": 416.7544250488281, + "learning_rate": 9.280470087187066e-06, + "loss": 53.5044, + "step": 31630 + }, + { + "epoch": 0.1278296036231855, + "grad_norm": 765.3726806640625, + "learning_rate": 9.279741010832761e-06, + "loss": 56.0496, + "step": 31640 + }, + { + "epoch": 0.1278700048885531, + "grad_norm": 884.37109375, + "learning_rate": 9.279011593959107e-06, + "loss": 75.0785, + "step": 31650 + }, + { + "epoch": 0.12791040615392074, + "grad_norm": 718.0591430664062, + "learning_rate": 9.278281836624137e-06, + "loss": 68.1496, + "step": 31660 + }, + { + "epoch": 0.12795080741928838, + "grad_norm": 598.039794921875, + "learning_rate": 9.277551738885915e-06, + "loss": 49.5493, + "step": 31670 + }, + { + "epoch": 0.127991208684656, + "grad_norm": 669.0643920898438, + "learning_rate": 9.276821300802535e-06, + "loss": 44.912, + "step": 31680 + }, + { + "epoch": 0.12803160995002363, + "grad_norm": 1183.2327880859375, + "learning_rate": 9.276090522432109e-06, + "loss": 53.1531, + "step": 31690 + }, + { + "epoch": 0.12807201121539127, + "grad_norm": 278.2745666503906, + "learning_rate": 9.275359403832787e-06, + "loss": 57.979, + "step": 31700 + }, + { + "epoch": 0.12811241248075889, + "grad_norm": 545.3709106445312, + "learning_rate": 9.274627945062738e-06, + "loss": 42.9828, + "step": 31710 + }, + { + "epoch": 0.12815281374612653, + "grad_norm": 426.2801818847656, + "learning_rate": 9.27389614618016e-06, + "loss": 69.7885, + "step": 31720 + }, + { + "epoch": 0.12819321501149417, + "grad_norm": 691.3192749023438, + "learning_rate": 9.273164007243281e-06, + "loss": 50.9559, + "step": 31730 + }, + { + "epoch": 0.12823361627686178, + "grad_norm": 3655.259033203125, + "learning_rate": 9.272431528310354e-06, + "loss": 90.1712, + "step": 31740 + }, + { + "epoch": 0.12827401754222942, + "grad_norm": 812.1537475585938, + "learning_rate": 9.271698709439658e-06, + "loss": 52.1837, + "step": 31750 + }, + { + "epoch": 0.12831441880759706, + "grad_norm": 490.5054931640625, + "learning_rate": 9.2709655506895e-06, + "loss": 64.8262, + "step": 31760 + }, + { + "epoch": 0.1283548200729647, + "grad_norm": 1356.9622802734375, + "learning_rate": 9.270232052118214e-06, + "loss": 61.913, + "step": 31770 + }, + { + "epoch": 0.1283952213383323, + "grad_norm": 413.27093505859375, + "learning_rate": 9.26949821378416e-06, + "loss": 35.5495, + "step": 31780 + }, + { + "epoch": 0.12843562260369995, + "grad_norm": 477.1658935546875, + "learning_rate": 9.268764035745727e-06, + "loss": 49.7717, + "step": 31790 + }, + { + "epoch": 0.1284760238690676, + "grad_norm": 755.6824951171875, + "learning_rate": 9.268029518061335e-06, + "loss": 42.3219, + "step": 31800 + }, + { + "epoch": 0.1285164251344352, + "grad_norm": 637.2919921875, + "learning_rate": 9.267294660789417e-06, + "loss": 60.4771, + "step": 31810 + }, + { + "epoch": 0.12855682639980284, + "grad_norm": 363.55499267578125, + "learning_rate": 9.26655946398845e-06, + "loss": 60.0659, + "step": 31820 + }, + { + "epoch": 0.12859722766517048, + "grad_norm": 745.2439575195312, + "learning_rate": 9.265823927716927e-06, + "loss": 40.7839, + "step": 31830 + }, + { + "epoch": 0.1286376289305381, + "grad_norm": 627.36865234375, + "learning_rate": 9.26508805203337e-06, + "loss": 48.2277, + "step": 31840 + }, + { + "epoch": 0.12867803019590573, + "grad_norm": 484.4584655761719, + "learning_rate": 9.264351836996332e-06, + "loss": 45.7992, + "step": 31850 + }, + { + "epoch": 0.12871843146127337, + "grad_norm": 292.0769958496094, + "learning_rate": 9.26361528266439e-06, + "loss": 47.8684, + "step": 31860 + }, + { + "epoch": 0.128758832726641, + "grad_norm": 441.6422424316406, + "learning_rate": 9.262878389096147e-06, + "loss": 29.7647, + "step": 31870 + }, + { + "epoch": 0.12879923399200863, + "grad_norm": 736.0634765625, + "learning_rate": 9.262141156350233e-06, + "loss": 37.1215, + "step": 31880 + }, + { + "epoch": 0.12883963525737627, + "grad_norm": 552.0074462890625, + "learning_rate": 9.261403584485308e-06, + "loss": 50.1483, + "step": 31890 + }, + { + "epoch": 0.12888003652274388, + "grad_norm": 629.8731689453125, + "learning_rate": 9.260665673560058e-06, + "loss": 47.3221, + "step": 31900 + }, + { + "epoch": 0.12892043778811152, + "grad_norm": 1326.4381103515625, + "learning_rate": 9.259927423633193e-06, + "loss": 36.9234, + "step": 31910 + }, + { + "epoch": 0.12896083905347916, + "grad_norm": 526.2960815429688, + "learning_rate": 9.259188834763455e-06, + "loss": 43.9766, + "step": 31920 + }, + { + "epoch": 0.1290012403188468, + "grad_norm": 1196.8380126953125, + "learning_rate": 9.258449907009607e-06, + "loss": 65.413, + "step": 31930 + }, + { + "epoch": 0.1290416415842144, + "grad_norm": 277.989990234375, + "learning_rate": 9.257710640430444e-06, + "loss": 36.3897, + "step": 31940 + }, + { + "epoch": 0.12908204284958205, + "grad_norm": 544.9930419921875, + "learning_rate": 9.256971035084786e-06, + "loss": 53.2941, + "step": 31950 + }, + { + "epoch": 0.1291224441149497, + "grad_norm": 517.1935424804688, + "learning_rate": 9.256231091031477e-06, + "loss": 39.4429, + "step": 31960 + }, + { + "epoch": 0.1291628453803173, + "grad_norm": 396.2089538574219, + "learning_rate": 9.255490808329397e-06, + "loss": 43.8766, + "step": 31970 + }, + { + "epoch": 0.12920324664568494, + "grad_norm": 330.9097900390625, + "learning_rate": 9.254750187037443e-06, + "loss": 31.7952, + "step": 31980 + }, + { + "epoch": 0.12924364791105258, + "grad_norm": 805.3584594726562, + "learning_rate": 9.254009227214543e-06, + "loss": 52.6609, + "step": 31990 + }, + { + "epoch": 0.1292840491764202, + "grad_norm": 981.2225341796875, + "learning_rate": 9.253267928919652e-06, + "loss": 40.1239, + "step": 32000 + }, + { + "epoch": 0.12932445044178784, + "grad_norm": 765.0771484375, + "learning_rate": 9.25252629221175e-06, + "loss": 63.7369, + "step": 32010 + }, + { + "epoch": 0.12936485170715548, + "grad_norm": 1077.512451171875, + "learning_rate": 9.251784317149848e-06, + "loss": 62.9928, + "step": 32020 + }, + { + "epoch": 0.1294052529725231, + "grad_norm": 1018.3309326171875, + "learning_rate": 9.251042003792983e-06, + "loss": 51.2373, + "step": 32030 + }, + { + "epoch": 0.12944565423789073, + "grad_norm": 554.8124389648438, + "learning_rate": 9.250299352200214e-06, + "loss": 54.4565, + "step": 32040 + }, + { + "epoch": 0.12948605550325837, + "grad_norm": 843.983642578125, + "learning_rate": 9.249556362430631e-06, + "loss": 41.286, + "step": 32050 + }, + { + "epoch": 0.12952645676862598, + "grad_norm": 2317.033447265625, + "learning_rate": 9.248813034543353e-06, + "loss": 48.9243, + "step": 32060 + }, + { + "epoch": 0.12956685803399362, + "grad_norm": 656.9749755859375, + "learning_rate": 9.24806936859752e-06, + "loss": 43.4692, + "step": 32070 + }, + { + "epoch": 0.12960725929936126, + "grad_norm": 491.047119140625, + "learning_rate": 9.247325364652304e-06, + "loss": 66.3645, + "step": 32080 + }, + { + "epoch": 0.1296476605647289, + "grad_norm": 646.3383178710938, + "learning_rate": 9.2465810227669e-06, + "loss": 52.3016, + "step": 32090 + }, + { + "epoch": 0.1296880618300965, + "grad_norm": 765.2969360351562, + "learning_rate": 9.245836343000534e-06, + "loss": 47.1274, + "step": 32100 + }, + { + "epoch": 0.12972846309546415, + "grad_norm": 600.1698608398438, + "learning_rate": 9.245091325412456e-06, + "loss": 34.7515, + "step": 32110 + }, + { + "epoch": 0.1297688643608318, + "grad_norm": 986.0180053710938, + "learning_rate": 9.244345970061944e-06, + "loss": 56.5286, + "step": 32120 + }, + { + "epoch": 0.1298092656261994, + "grad_norm": 521.9662475585938, + "learning_rate": 9.243600277008301e-06, + "loss": 45.0921, + "step": 32130 + }, + { + "epoch": 0.12984966689156704, + "grad_norm": 212.4668731689453, + "learning_rate": 9.24285424631086e-06, + "loss": 28.9113, + "step": 32140 + }, + { + "epoch": 0.12989006815693468, + "grad_norm": 526.3524169921875, + "learning_rate": 9.242107878028978e-06, + "loss": 39.1954, + "step": 32150 + }, + { + "epoch": 0.1299304694223023, + "grad_norm": 538.303955078125, + "learning_rate": 9.241361172222043e-06, + "loss": 54.2439, + "step": 32160 + }, + { + "epoch": 0.12997087068766994, + "grad_norm": 860.3079223632812, + "learning_rate": 9.240614128949463e-06, + "loss": 64.1403, + "step": 32170 + }, + { + "epoch": 0.13001127195303758, + "grad_norm": 777.4006958007812, + "learning_rate": 9.239866748270679e-06, + "loss": 43.971, + "step": 32180 + }, + { + "epoch": 0.1300516732184052, + "grad_norm": 841.7437744140625, + "learning_rate": 9.239119030245156e-06, + "loss": 59.4323, + "step": 32190 + }, + { + "epoch": 0.13009207448377283, + "grad_norm": 711.6234741210938, + "learning_rate": 9.238370974932387e-06, + "loss": 57.6824, + "step": 32200 + }, + { + "epoch": 0.13013247574914047, + "grad_norm": 512.6424560546875, + "learning_rate": 9.23762258239189e-06, + "loss": 60.2203, + "step": 32210 + }, + { + "epoch": 0.13017287701450808, + "grad_norm": 710.1792602539062, + "learning_rate": 9.236873852683213e-06, + "loss": 59.2537, + "step": 32220 + }, + { + "epoch": 0.13021327827987572, + "grad_norm": 533.3241577148438, + "learning_rate": 9.23612478586593e-06, + "loss": 62.686, + "step": 32230 + }, + { + "epoch": 0.13025367954524336, + "grad_norm": 2313.620849609375, + "learning_rate": 9.235375381999636e-06, + "loss": 56.1435, + "step": 32240 + }, + { + "epoch": 0.130294080810611, + "grad_norm": 292.7706298828125, + "learning_rate": 9.234625641143962e-06, + "loss": 49.0173, + "step": 32250 + }, + { + "epoch": 0.1303344820759786, + "grad_norm": 811.357666015625, + "learning_rate": 9.233875563358559e-06, + "loss": 35.5564, + "step": 32260 + }, + { + "epoch": 0.13037488334134625, + "grad_norm": 1006.41796875, + "learning_rate": 9.23312514870311e-06, + "loss": 35.7152, + "step": 32270 + }, + { + "epoch": 0.1304152846067139, + "grad_norm": 709.2618408203125, + "learning_rate": 9.232374397237318e-06, + "loss": 53.3593, + "step": 32280 + }, + { + "epoch": 0.1304556858720815, + "grad_norm": 870.2730712890625, + "learning_rate": 9.231623309020922e-06, + "loss": 65.6405, + "step": 32290 + }, + { + "epoch": 0.13049608713744915, + "grad_norm": 475.84686279296875, + "learning_rate": 9.230871884113679e-06, + "loss": 32.081, + "step": 32300 + }, + { + "epoch": 0.13053648840281679, + "grad_norm": 588.6025390625, + "learning_rate": 9.230120122575376e-06, + "loss": 57.8686, + "step": 32310 + }, + { + "epoch": 0.1305768896681844, + "grad_norm": 481.3664245605469, + "learning_rate": 9.22936802446583e-06, + "loss": 56.8029, + "step": 32320 + }, + { + "epoch": 0.13061729093355204, + "grad_norm": 1488.2110595703125, + "learning_rate": 9.228615589844879e-06, + "loss": 60.244, + "step": 32330 + }, + { + "epoch": 0.13065769219891968, + "grad_norm": 422.4720153808594, + "learning_rate": 9.227862818772392e-06, + "loss": 35.8967, + "step": 32340 + }, + { + "epoch": 0.1306980934642873, + "grad_norm": 587.341064453125, + "learning_rate": 9.227109711308265e-06, + "loss": 60.7431, + "step": 32350 + }, + { + "epoch": 0.13073849472965493, + "grad_norm": 770.119384765625, + "learning_rate": 9.226356267512417e-06, + "loss": 67.5445, + "step": 32360 + }, + { + "epoch": 0.13077889599502257, + "grad_norm": 426.0884704589844, + "learning_rate": 9.225602487444799e-06, + "loss": 34.8377, + "step": 32370 + }, + { + "epoch": 0.13081929726039018, + "grad_norm": 339.0352783203125, + "learning_rate": 9.224848371165382e-06, + "loss": 45.3635, + "step": 32380 + }, + { + "epoch": 0.13085969852575782, + "grad_norm": 0.0, + "learning_rate": 9.224093918734172e-06, + "loss": 38.837, + "step": 32390 + }, + { + "epoch": 0.13090009979112546, + "grad_norm": 488.4760437011719, + "learning_rate": 9.223339130211194e-06, + "loss": 50.7496, + "step": 32400 + }, + { + "epoch": 0.1309405010564931, + "grad_norm": 940.7568969726562, + "learning_rate": 9.222584005656501e-06, + "loss": 65.9121, + "step": 32410 + }, + { + "epoch": 0.13098090232186071, + "grad_norm": 796.3623657226562, + "learning_rate": 9.22182854513018e-06, + "loss": 47.539, + "step": 32420 + }, + { + "epoch": 0.13102130358722835, + "grad_norm": 684.649169921875, + "learning_rate": 9.221072748692336e-06, + "loss": 42.9674, + "step": 32430 + }, + { + "epoch": 0.131061704852596, + "grad_norm": 555.63232421875, + "learning_rate": 9.220316616403109e-06, + "loss": 41.5548, + "step": 32440 + }, + { + "epoch": 0.1311021061179636, + "grad_norm": 801.7620239257812, + "learning_rate": 9.219560148322655e-06, + "loss": 45.9207, + "step": 32450 + }, + { + "epoch": 0.13114250738333125, + "grad_norm": 624.9111938476562, + "learning_rate": 9.218803344511165e-06, + "loss": 59.801, + "step": 32460 + }, + { + "epoch": 0.1311829086486989, + "grad_norm": 673.0691528320312, + "learning_rate": 9.218046205028854e-06, + "loss": 57.1818, + "step": 32470 + }, + { + "epoch": 0.1312233099140665, + "grad_norm": 817.8880615234375, + "learning_rate": 9.217288729935966e-06, + "loss": 40.0779, + "step": 32480 + }, + { + "epoch": 0.13126371117943414, + "grad_norm": 759.4141845703125, + "learning_rate": 9.216530919292768e-06, + "loss": 50.3778, + "step": 32490 + }, + { + "epoch": 0.13130411244480178, + "grad_norm": 936.8798217773438, + "learning_rate": 9.215772773159556e-06, + "loss": 69.4996, + "step": 32500 + }, + { + "epoch": 0.1313445137101694, + "grad_norm": 995.8248901367188, + "learning_rate": 9.215014291596653e-06, + "loss": 55.1168, + "step": 32510 + }, + { + "epoch": 0.13138491497553703, + "grad_norm": 858.1106567382812, + "learning_rate": 9.214255474664405e-06, + "loss": 45.871, + "step": 32520 + }, + { + "epoch": 0.13142531624090467, + "grad_norm": 720.53955078125, + "learning_rate": 9.213496322423193e-06, + "loss": 63.9526, + "step": 32530 + }, + { + "epoch": 0.13146571750627228, + "grad_norm": 2278.630615234375, + "learning_rate": 9.212736834933413e-06, + "loss": 76.4255, + "step": 32540 + }, + { + "epoch": 0.13150611877163992, + "grad_norm": 560.2059936523438, + "learning_rate": 9.211977012255497e-06, + "loss": 43.5413, + "step": 32550 + }, + { + "epoch": 0.13154652003700756, + "grad_norm": 1519.486572265625, + "learning_rate": 9.211216854449903e-06, + "loss": 77.558, + "step": 32560 + }, + { + "epoch": 0.1315869213023752, + "grad_norm": 337.37933349609375, + "learning_rate": 9.210456361577109e-06, + "loss": 51.9883, + "step": 32570 + }, + { + "epoch": 0.13162732256774282, + "grad_norm": 601.6719360351562, + "learning_rate": 9.209695533697624e-06, + "loss": 40.5055, + "step": 32580 + }, + { + "epoch": 0.13166772383311046, + "grad_norm": 541.4317626953125, + "learning_rate": 9.208934370871989e-06, + "loss": 50.7977, + "step": 32590 + }, + { + "epoch": 0.1317081250984781, + "grad_norm": 1253.9888916015625, + "learning_rate": 9.20817287316076e-06, + "loss": 45.5538, + "step": 32600 + }, + { + "epoch": 0.1317485263638457, + "grad_norm": 489.9399108886719, + "learning_rate": 9.20741104062453e-06, + "loss": 53.2811, + "step": 32610 + }, + { + "epoch": 0.13178892762921335, + "grad_norm": 561.935791015625, + "learning_rate": 9.206648873323912e-06, + "loss": 49.4477, + "step": 32620 + }, + { + "epoch": 0.131829328894581, + "grad_norm": 408.33892822265625, + "learning_rate": 9.205886371319548e-06, + "loss": 50.0402, + "step": 32630 + }, + { + "epoch": 0.1318697301599486, + "grad_norm": 391.8433532714844, + "learning_rate": 9.20512353467211e-06, + "loss": 48.5688, + "step": 32640 + }, + { + "epoch": 0.13191013142531624, + "grad_norm": 710.4462890625, + "learning_rate": 9.204360363442288e-06, + "loss": 56.6522, + "step": 32650 + }, + { + "epoch": 0.13195053269068388, + "grad_norm": 644.66943359375, + "learning_rate": 9.20359685769081e-06, + "loss": 57.3297, + "step": 32660 + }, + { + "epoch": 0.1319909339560515, + "grad_norm": 622.2406616210938, + "learning_rate": 9.202833017478421e-06, + "loss": 36.6121, + "step": 32670 + }, + { + "epoch": 0.13203133522141913, + "grad_norm": 559.0179443359375, + "learning_rate": 9.2020688428659e-06, + "loss": 43.6517, + "step": 32680 + }, + { + "epoch": 0.13207173648678677, + "grad_norm": 543.2446899414062, + "learning_rate": 9.201304333914042e-06, + "loss": 45.9316, + "step": 32690 + }, + { + "epoch": 0.13211213775215438, + "grad_norm": 493.87835693359375, + "learning_rate": 9.200539490683682e-06, + "loss": 48.7048, + "step": 32700 + }, + { + "epoch": 0.13215253901752202, + "grad_norm": 705.3010864257812, + "learning_rate": 9.19977431323567e-06, + "loss": 43.7614, + "step": 32710 + }, + { + "epoch": 0.13219294028288966, + "grad_norm": 530.714111328125, + "learning_rate": 9.199008801630893e-06, + "loss": 58.9209, + "step": 32720 + }, + { + "epoch": 0.1322333415482573, + "grad_norm": 529.1024780273438, + "learning_rate": 9.198242955930257e-06, + "loss": 51.1792, + "step": 32730 + }, + { + "epoch": 0.13227374281362492, + "grad_norm": 714.8733520507812, + "learning_rate": 9.197476776194693e-06, + "loss": 38.4417, + "step": 32740 + }, + { + "epoch": 0.13231414407899256, + "grad_norm": 903.4959106445312, + "learning_rate": 9.196710262485168e-06, + "loss": 58.4088, + "step": 32750 + }, + { + "epoch": 0.1323545453443602, + "grad_norm": 694.1265869140625, + "learning_rate": 9.195943414862667e-06, + "loss": 62.1965, + "step": 32760 + }, + { + "epoch": 0.1323949466097278, + "grad_norm": 653.4110107421875, + "learning_rate": 9.195176233388206e-06, + "loss": 50.7297, + "step": 32770 + }, + { + "epoch": 0.13243534787509545, + "grad_norm": 956.4697265625, + "learning_rate": 9.194408718122825e-06, + "loss": 36.8561, + "step": 32780 + }, + { + "epoch": 0.1324757491404631, + "grad_norm": 761.3573608398438, + "learning_rate": 9.193640869127592e-06, + "loss": 66.8244, + "step": 32790 + }, + { + "epoch": 0.1325161504058307, + "grad_norm": 444.17547607421875, + "learning_rate": 9.192872686463601e-06, + "loss": 44.2107, + "step": 32800 + }, + { + "epoch": 0.13255655167119834, + "grad_norm": 1537.195556640625, + "learning_rate": 9.192104170191973e-06, + "loss": 69.7521, + "step": 32810 + }, + { + "epoch": 0.13259695293656598, + "grad_norm": 513.7176513671875, + "learning_rate": 9.191335320373856e-06, + "loss": 45.5239, + "step": 32820 + }, + { + "epoch": 0.1326373542019336, + "grad_norm": 545.9681396484375, + "learning_rate": 9.190566137070422e-06, + "loss": 50.4583, + "step": 32830 + }, + { + "epoch": 0.13267775546730123, + "grad_norm": 1057.2578125, + "learning_rate": 9.189796620342875e-06, + "loss": 64.9317, + "step": 32840 + }, + { + "epoch": 0.13271815673266887, + "grad_norm": 528.858642578125, + "learning_rate": 9.189026770252437e-06, + "loss": 65.4341, + "step": 32850 + }, + { + "epoch": 0.13275855799803649, + "grad_norm": 465.4150695800781, + "learning_rate": 9.188256586860365e-06, + "loss": 63.703, + "step": 32860 + }, + { + "epoch": 0.13279895926340413, + "grad_norm": 471.5633850097656, + "learning_rate": 9.187486070227938e-06, + "loss": 44.7681, + "step": 32870 + }, + { + "epoch": 0.13283936052877177, + "grad_norm": 1108.5830078125, + "learning_rate": 9.186715220416463e-06, + "loss": 55.0163, + "step": 32880 + }, + { + "epoch": 0.1328797617941394, + "grad_norm": 829.570068359375, + "learning_rate": 9.185944037487271e-06, + "loss": 71.4634, + "step": 32890 + }, + { + "epoch": 0.13292016305950702, + "grad_norm": 404.9236755371094, + "learning_rate": 9.185172521501723e-06, + "loss": 39.8276, + "step": 32900 + }, + { + "epoch": 0.13296056432487466, + "grad_norm": 623.8037109375, + "learning_rate": 9.184400672521204e-06, + "loss": 44.3128, + "step": 32910 + }, + { + "epoch": 0.1330009655902423, + "grad_norm": 622.3230590820312, + "learning_rate": 9.183628490607129e-06, + "loss": 38.3063, + "step": 32920 + }, + { + "epoch": 0.1330413668556099, + "grad_norm": 629.4878540039062, + "learning_rate": 9.182855975820934e-06, + "loss": 61.7241, + "step": 32930 + }, + { + "epoch": 0.13308176812097755, + "grad_norm": 456.10931396484375, + "learning_rate": 9.182083128224086e-06, + "loss": 64.5606, + "step": 32940 + }, + { + "epoch": 0.1331221693863452, + "grad_norm": 600.0465087890625, + "learning_rate": 9.181309947878077e-06, + "loss": 50.6354, + "step": 32950 + }, + { + "epoch": 0.1331625706517128, + "grad_norm": 352.6278381347656, + "learning_rate": 9.180536434844426e-06, + "loss": 49.3764, + "step": 32960 + }, + { + "epoch": 0.13320297191708044, + "grad_norm": 474.4338073730469, + "learning_rate": 9.179762589184676e-06, + "loss": 47.952, + "step": 32970 + }, + { + "epoch": 0.13324337318244808, + "grad_norm": 722.0818481445312, + "learning_rate": 9.1789884109604e-06, + "loss": 46.6468, + "step": 32980 + }, + { + "epoch": 0.1332837744478157, + "grad_norm": 687.3217163085938, + "learning_rate": 9.178213900233193e-06, + "loss": 44.9744, + "step": 32990 + }, + { + "epoch": 0.13332417571318333, + "grad_norm": 494.89044189453125, + "learning_rate": 9.177439057064684e-06, + "loss": 53.1962, + "step": 33000 + }, + { + "epoch": 0.13336457697855097, + "grad_norm": 262.19580078125, + "learning_rate": 9.17666388151652e-06, + "loss": 42.9505, + "step": 33010 + }, + { + "epoch": 0.1334049782439186, + "grad_norm": 502.8430480957031, + "learning_rate": 9.175888373650377e-06, + "loss": 51.0244, + "step": 33020 + }, + { + "epoch": 0.13344537950928623, + "grad_norm": 749.8898315429688, + "learning_rate": 9.175112533527963e-06, + "loss": 40.869, + "step": 33030 + }, + { + "epoch": 0.13348578077465387, + "grad_norm": 364.7505187988281, + "learning_rate": 9.174336361211007e-06, + "loss": 63.2581, + "step": 33040 + }, + { + "epoch": 0.1335261820400215, + "grad_norm": 688.9202880859375, + "learning_rate": 9.173559856761262e-06, + "loss": 39.9341, + "step": 33050 + }, + { + "epoch": 0.13356658330538912, + "grad_norm": 1022.2750854492188, + "learning_rate": 9.172783020240514e-06, + "loss": 59.776, + "step": 33060 + }, + { + "epoch": 0.13360698457075676, + "grad_norm": 461.04437255859375, + "learning_rate": 9.172005851710573e-06, + "loss": 40.5875, + "step": 33070 + }, + { + "epoch": 0.1336473858361244, + "grad_norm": 552.5075073242188, + "learning_rate": 9.171228351233272e-06, + "loss": 63.339, + "step": 33080 + }, + { + "epoch": 0.133687787101492, + "grad_norm": 204.60736083984375, + "learning_rate": 9.170450518870475e-06, + "loss": 53.6323, + "step": 33090 + }, + { + "epoch": 0.13372818836685965, + "grad_norm": 597.3870239257812, + "learning_rate": 9.169672354684069e-06, + "loss": 58.8157, + "step": 33100 + }, + { + "epoch": 0.1337685896322273, + "grad_norm": 626.3027954101562, + "learning_rate": 9.168893858735972e-06, + "loss": 53.9851, + "step": 33110 + }, + { + "epoch": 0.1338089908975949, + "grad_norm": 631.9942626953125, + "learning_rate": 9.168115031088122e-06, + "loss": 52.9761, + "step": 33120 + }, + { + "epoch": 0.13384939216296254, + "grad_norm": 863.2987670898438, + "learning_rate": 9.167335871802488e-06, + "loss": 47.3985, + "step": 33130 + }, + { + "epoch": 0.13388979342833018, + "grad_norm": 734.11181640625, + "learning_rate": 9.166556380941063e-06, + "loss": 53.5019, + "step": 33140 + }, + { + "epoch": 0.1339301946936978, + "grad_norm": 669.5849609375, + "learning_rate": 9.16577655856587e-06, + "loss": 42.8189, + "step": 33150 + }, + { + "epoch": 0.13397059595906544, + "grad_norm": 447.07562255859375, + "learning_rate": 9.164996404738955e-06, + "loss": 31.3628, + "step": 33160 + }, + { + "epoch": 0.13401099722443308, + "grad_norm": 822.84716796875, + "learning_rate": 9.16421591952239e-06, + "loss": 49.1292, + "step": 33170 + }, + { + "epoch": 0.1340513984898007, + "grad_norm": 489.0323486328125, + "learning_rate": 9.163435102978276e-06, + "loss": 40.1574, + "step": 33180 + }, + { + "epoch": 0.13409179975516833, + "grad_norm": 808.0147094726562, + "learning_rate": 9.162653955168739e-06, + "loss": 48.2493, + "step": 33190 + }, + { + "epoch": 0.13413220102053597, + "grad_norm": 679.6731567382812, + "learning_rate": 9.161872476155929e-06, + "loss": 44.6656, + "step": 33200 + }, + { + "epoch": 0.1341726022859036, + "grad_norm": 703.5978393554688, + "learning_rate": 9.161090666002029e-06, + "loss": 33.7541, + "step": 33210 + }, + { + "epoch": 0.13421300355127122, + "grad_norm": 405.0184020996094, + "learning_rate": 9.16030852476924e-06, + "loss": 35.5508, + "step": 33220 + }, + { + "epoch": 0.13425340481663886, + "grad_norm": 326.9285583496094, + "learning_rate": 9.159526052519794e-06, + "loss": 48.6239, + "step": 33230 + }, + { + "epoch": 0.1342938060820065, + "grad_norm": 1562.007080078125, + "learning_rate": 9.15874324931595e-06, + "loss": 53.0464, + "step": 33240 + }, + { + "epoch": 0.1343342073473741, + "grad_norm": 1214.361328125, + "learning_rate": 9.157960115219993e-06, + "loss": 54.1673, + "step": 33250 + }, + { + "epoch": 0.13437460861274175, + "grad_norm": 589.89013671875, + "learning_rate": 9.157176650294231e-06, + "loss": 39.7849, + "step": 33260 + }, + { + "epoch": 0.1344150098781094, + "grad_norm": 561.1547241210938, + "learning_rate": 9.156392854601001e-06, + "loss": 60.4532, + "step": 33270 + }, + { + "epoch": 0.134455411143477, + "grad_norm": 407.2488708496094, + "learning_rate": 9.155608728202669e-06, + "loss": 46.0791, + "step": 33280 + }, + { + "epoch": 0.13449581240884464, + "grad_norm": 546.0625, + "learning_rate": 9.154824271161621e-06, + "loss": 47.7987, + "step": 33290 + }, + { + "epoch": 0.13453621367421228, + "grad_norm": 709.1144409179688, + "learning_rate": 9.154039483540273e-06, + "loss": 62.7406, + "step": 33300 + }, + { + "epoch": 0.1345766149395799, + "grad_norm": 706.6863403320312, + "learning_rate": 9.153254365401069e-06, + "loss": 36.7554, + "step": 33310 + }, + { + "epoch": 0.13461701620494754, + "grad_norm": 619.6150512695312, + "learning_rate": 9.152468916806477e-06, + "loss": 60.1805, + "step": 33320 + }, + { + "epoch": 0.13465741747031518, + "grad_norm": 244.01255798339844, + "learning_rate": 9.151683137818989e-06, + "loss": 51.3976, + "step": 33330 + }, + { + "epoch": 0.1346978187356828, + "grad_norm": 3886.03857421875, + "learning_rate": 9.150897028501126e-06, + "loss": 81.1586, + "step": 33340 + }, + { + "epoch": 0.13473822000105043, + "grad_norm": 556.2538452148438, + "learning_rate": 9.15011058891544e-06, + "loss": 50.1933, + "step": 33350 + }, + { + "epoch": 0.13477862126641807, + "grad_norm": 807.3983764648438, + "learning_rate": 9.149323819124498e-06, + "loss": 41.1168, + "step": 33360 + }, + { + "epoch": 0.1348190225317857, + "grad_norm": 325.5909423828125, + "learning_rate": 9.148536719190904e-06, + "loss": 41.2033, + "step": 33370 + }, + { + "epoch": 0.13485942379715332, + "grad_norm": 613.8952026367188, + "learning_rate": 9.147749289177282e-06, + "loss": 41.9996, + "step": 33380 + }, + { + "epoch": 0.13489982506252096, + "grad_norm": 361.23492431640625, + "learning_rate": 9.146961529146285e-06, + "loss": 42.0668, + "step": 33390 + }, + { + "epoch": 0.1349402263278886, + "grad_norm": 648.5516357421875, + "learning_rate": 9.146173439160591e-06, + "loss": 41.9859, + "step": 33400 + }, + { + "epoch": 0.1349806275932562, + "grad_norm": 296.4998474121094, + "learning_rate": 9.145385019282904e-06, + "loss": 39.537, + "step": 33410 + }, + { + "epoch": 0.13502102885862385, + "grad_norm": 1153.4014892578125, + "learning_rate": 9.144596269575957e-06, + "loss": 61.5977, + "step": 33420 + }, + { + "epoch": 0.1350614301239915, + "grad_norm": 532.4719848632812, + "learning_rate": 9.143807190102504e-06, + "loss": 58.7296, + "step": 33430 + }, + { + "epoch": 0.1351018313893591, + "grad_norm": 764.9578247070312, + "learning_rate": 9.143017780925331e-06, + "loss": 52.383, + "step": 33440 + }, + { + "epoch": 0.13514223265472675, + "grad_norm": 722.291748046875, + "learning_rate": 9.142228042107248e-06, + "loss": 49.525, + "step": 33450 + }, + { + "epoch": 0.13518263392009439, + "grad_norm": 0.0, + "learning_rate": 9.141437973711092e-06, + "loss": 36.1941, + "step": 33460 + }, + { + "epoch": 0.135223035185462, + "grad_norm": 920.4172973632812, + "learning_rate": 9.14064757579972e-06, + "loss": 45.6598, + "step": 33470 + }, + { + "epoch": 0.13526343645082964, + "grad_norm": 359.2383117675781, + "learning_rate": 9.139856848436023e-06, + "loss": 37.3868, + "step": 33480 + }, + { + "epoch": 0.13530383771619728, + "grad_norm": 668.7049560546875, + "learning_rate": 9.139065791682916e-06, + "loss": 39.9217, + "step": 33490 + }, + { + "epoch": 0.1353442389815649, + "grad_norm": 674.8594360351562, + "learning_rate": 9.138274405603342e-06, + "loss": 33.1557, + "step": 33500 + }, + { + "epoch": 0.13538464024693253, + "grad_norm": 396.17352294921875, + "learning_rate": 9.137482690260265e-06, + "loss": 64.2327, + "step": 33510 + }, + { + "epoch": 0.13542504151230017, + "grad_norm": 560.4313354492188, + "learning_rate": 9.13669064571668e-06, + "loss": 71.6313, + "step": 33520 + }, + { + "epoch": 0.13546544277766778, + "grad_norm": 537.1707153320312, + "learning_rate": 9.135898272035601e-06, + "loss": 67.0521, + "step": 33530 + }, + { + "epoch": 0.13550584404303542, + "grad_norm": 557.8136596679688, + "learning_rate": 9.13510556928008e-06, + "loss": 37.5089, + "step": 33540 + }, + { + "epoch": 0.13554624530840306, + "grad_norm": 407.9730529785156, + "learning_rate": 9.134312537513188e-06, + "loss": 34.4554, + "step": 33550 + }, + { + "epoch": 0.1355866465737707, + "grad_norm": 1424.865478515625, + "learning_rate": 9.133519176798021e-06, + "loss": 39.1332, + "step": 33560 + }, + { + "epoch": 0.13562704783913831, + "grad_norm": 669.2245483398438, + "learning_rate": 9.132725487197701e-06, + "loss": 43.7718, + "step": 33570 + }, + { + "epoch": 0.13566744910450595, + "grad_norm": 712.7960815429688, + "learning_rate": 9.131931468775382e-06, + "loss": 54.1853, + "step": 33580 + }, + { + "epoch": 0.1357078503698736, + "grad_norm": 473.095458984375, + "learning_rate": 9.131137121594239e-06, + "loss": 52.0267, + "step": 33590 + }, + { + "epoch": 0.1357482516352412, + "grad_norm": 494.1900329589844, + "learning_rate": 9.130342445717474e-06, + "loss": 49.4793, + "step": 33600 + }, + { + "epoch": 0.13578865290060885, + "grad_norm": 597.4051513671875, + "learning_rate": 9.129547441208317e-06, + "loss": 67.9233, + "step": 33610 + }, + { + "epoch": 0.1358290541659765, + "grad_norm": 254.22824096679688, + "learning_rate": 9.128752108130022e-06, + "loss": 62.3962, + "step": 33620 + }, + { + "epoch": 0.1358694554313441, + "grad_norm": 312.43621826171875, + "learning_rate": 9.12795644654587e-06, + "loss": 43.4682, + "step": 33630 + }, + { + "epoch": 0.13590985669671174, + "grad_norm": 676.4065551757812, + "learning_rate": 9.127160456519168e-06, + "loss": 45.085, + "step": 33640 + }, + { + "epoch": 0.13595025796207938, + "grad_norm": 594.743408203125, + "learning_rate": 9.126364138113251e-06, + "loss": 106.4617, + "step": 33650 + }, + { + "epoch": 0.135990659227447, + "grad_norm": 800.3324584960938, + "learning_rate": 9.125567491391476e-06, + "loss": 49.7073, + "step": 33660 + }, + { + "epoch": 0.13603106049281463, + "grad_norm": 819.260498046875, + "learning_rate": 9.12477051641723e-06, + "loss": 53.5647, + "step": 33670 + }, + { + "epoch": 0.13607146175818227, + "grad_norm": 1157.8453369140625, + "learning_rate": 9.123973213253923e-06, + "loss": 52.8083, + "step": 33680 + }, + { + "epoch": 0.13611186302354988, + "grad_norm": 471.611572265625, + "learning_rate": 9.123175581964995e-06, + "loss": 50.6554, + "step": 33690 + }, + { + "epoch": 0.13615226428891752, + "grad_norm": 428.2491149902344, + "learning_rate": 9.122377622613909e-06, + "loss": 39.169, + "step": 33700 + }, + { + "epoch": 0.13619266555428516, + "grad_norm": 852.0259399414062, + "learning_rate": 9.121579335264155e-06, + "loss": 35.4563, + "step": 33710 + }, + { + "epoch": 0.1362330668196528, + "grad_norm": 266.2728271484375, + "learning_rate": 9.120780719979248e-06, + "loss": 63.9524, + "step": 33720 + }, + { + "epoch": 0.13627346808502042, + "grad_norm": 562.6665649414062, + "learning_rate": 9.11998177682273e-06, + "loss": 79.5502, + "step": 33730 + }, + { + "epoch": 0.13631386935038806, + "grad_norm": 866.7197875976562, + "learning_rate": 9.11918250585817e-06, + "loss": 48.4737, + "step": 33740 + }, + { + "epoch": 0.1363542706157557, + "grad_norm": 410.7391662597656, + "learning_rate": 9.118382907149164e-06, + "loss": 38.7582, + "step": 33750 + }, + { + "epoch": 0.1363946718811233, + "grad_norm": 1614.3094482421875, + "learning_rate": 9.117582980759332e-06, + "loss": 59.5404, + "step": 33760 + }, + { + "epoch": 0.13643507314649095, + "grad_norm": 583.3519897460938, + "learning_rate": 9.116782726752317e-06, + "loss": 49.7827, + "step": 33770 + }, + { + "epoch": 0.1364754744118586, + "grad_norm": 873.2832641601562, + "learning_rate": 9.115982145191796e-06, + "loss": 51.7136, + "step": 33780 + }, + { + "epoch": 0.1365158756772262, + "grad_norm": 726.3242797851562, + "learning_rate": 9.115181236141463e-06, + "loss": 35.4949, + "step": 33790 + }, + { + "epoch": 0.13655627694259384, + "grad_norm": 571.6852416992188, + "learning_rate": 9.114379999665047e-06, + "loss": 51.8902, + "step": 33800 + }, + { + "epoch": 0.13659667820796148, + "grad_norm": 685.9703369140625, + "learning_rate": 9.113578435826295e-06, + "loss": 78.0631, + "step": 33810 + }, + { + "epoch": 0.1366370794733291, + "grad_norm": 728.8062744140625, + "learning_rate": 9.112776544688988e-06, + "loss": 41.2744, + "step": 33820 + }, + { + "epoch": 0.13667748073869673, + "grad_norm": 941.1890869140625, + "learning_rate": 9.111974326316926e-06, + "loss": 61.4087, + "step": 33830 + }, + { + "epoch": 0.13671788200406437, + "grad_norm": 472.9835205078125, + "learning_rate": 9.111171780773938e-06, + "loss": 57.9784, + "step": 33840 + }, + { + "epoch": 0.13675828326943198, + "grad_norm": 248.1824188232422, + "learning_rate": 9.110368908123878e-06, + "loss": 48.1912, + "step": 33850 + }, + { + "epoch": 0.13679868453479962, + "grad_norm": 577.3104858398438, + "learning_rate": 9.10956570843063e-06, + "loss": 45.3517, + "step": 33860 + }, + { + "epoch": 0.13683908580016726, + "grad_norm": 400.1105041503906, + "learning_rate": 9.108762181758096e-06, + "loss": 35.0138, + "step": 33870 + }, + { + "epoch": 0.1368794870655349, + "grad_norm": 369.667236328125, + "learning_rate": 9.107958328170215e-06, + "loss": 57.6431, + "step": 33880 + }, + { + "epoch": 0.13691988833090252, + "grad_norm": 403.0173645019531, + "learning_rate": 9.10715414773094e-06, + "loss": 48.3835, + "step": 33890 + }, + { + "epoch": 0.13696028959627016, + "grad_norm": 383.2605895996094, + "learning_rate": 9.10634964050426e-06, + "loss": 43.6177, + "step": 33900 + }, + { + "epoch": 0.1370006908616378, + "grad_norm": 677.1846313476562, + "learning_rate": 9.105544806554184e-06, + "loss": 57.0482, + "step": 33910 + }, + { + "epoch": 0.1370410921270054, + "grad_norm": 1978.128662109375, + "learning_rate": 9.104739645944752e-06, + "loss": 59.6453, + "step": 33920 + }, + { + "epoch": 0.13708149339237305, + "grad_norm": 668.5293579101562, + "learning_rate": 9.103934158740023e-06, + "loss": 37.7574, + "step": 33930 + }, + { + "epoch": 0.1371218946577407, + "grad_norm": 763.6205444335938, + "learning_rate": 9.10312834500409e-06, + "loss": 61.1796, + "step": 33940 + }, + { + "epoch": 0.1371622959231083, + "grad_norm": 504.1656494140625, + "learning_rate": 9.102322204801062e-06, + "loss": 50.8536, + "step": 33950 + }, + { + "epoch": 0.13720269718847594, + "grad_norm": 1202.7027587890625, + "learning_rate": 9.101515738195084e-06, + "loss": 70.192, + "step": 33960 + }, + { + "epoch": 0.13724309845384358, + "grad_norm": 633.1737060546875, + "learning_rate": 9.100708945250322e-06, + "loss": 62.8866, + "step": 33970 + }, + { + "epoch": 0.1372834997192112, + "grad_norm": 1090.31103515625, + "learning_rate": 9.099901826030969e-06, + "loss": 55.2263, + "step": 33980 + }, + { + "epoch": 0.13732390098457883, + "grad_norm": 678.004150390625, + "learning_rate": 9.099094380601244e-06, + "loss": 67.2974, + "step": 33990 + }, + { + "epoch": 0.13736430224994647, + "grad_norm": 1015.7597045898438, + "learning_rate": 9.098286609025392e-06, + "loss": 61.169, + "step": 34000 + }, + { + "epoch": 0.13740470351531409, + "grad_norm": 460.1115417480469, + "learning_rate": 9.097478511367682e-06, + "loss": 41.7142, + "step": 34010 + }, + { + "epoch": 0.13744510478068173, + "grad_norm": 558.1769409179688, + "learning_rate": 9.096670087692413e-06, + "loss": 62.2378, + "step": 34020 + }, + { + "epoch": 0.13748550604604937, + "grad_norm": 420.15423583984375, + "learning_rate": 9.095861338063906e-06, + "loss": 33.5995, + "step": 34030 + }, + { + "epoch": 0.137525907311417, + "grad_norm": 855.2376708984375, + "learning_rate": 9.09505226254651e-06, + "loss": 41.2862, + "step": 34040 + }, + { + "epoch": 0.13756630857678462, + "grad_norm": 668.4373168945312, + "learning_rate": 9.094242861204598e-06, + "loss": 51.963, + "step": 34050 + }, + { + "epoch": 0.13760670984215226, + "grad_norm": 1038.2156982421875, + "learning_rate": 9.093433134102572e-06, + "loss": 49.4119, + "step": 34060 + }, + { + "epoch": 0.1376471111075199, + "grad_norm": 781.8707275390625, + "learning_rate": 9.09262308130486e-06, + "loss": 46.6765, + "step": 34070 + }, + { + "epoch": 0.1376875123728875, + "grad_norm": 443.0128173828125, + "learning_rate": 9.091812702875908e-06, + "loss": 45.1318, + "step": 34080 + }, + { + "epoch": 0.13772791363825515, + "grad_norm": 2211.4814453125, + "learning_rate": 9.0910019988802e-06, + "loss": 53.6052, + "step": 34090 + }, + { + "epoch": 0.1377683149036228, + "grad_norm": 769.3303833007812, + "learning_rate": 9.09019096938224e-06, + "loss": 41.2156, + "step": 34100 + }, + { + "epoch": 0.1378087161689904, + "grad_norm": 540.2257080078125, + "learning_rate": 9.089379614446554e-06, + "loss": 53.6039, + "step": 34110 + }, + { + "epoch": 0.13784911743435804, + "grad_norm": 684.0350341796875, + "learning_rate": 9.0885679341377e-06, + "loss": 43.7098, + "step": 34120 + }, + { + "epoch": 0.13788951869972568, + "grad_norm": 524.279296875, + "learning_rate": 9.08775592852026e-06, + "loss": 38.2165, + "step": 34130 + }, + { + "epoch": 0.1379299199650933, + "grad_norm": 805.431396484375, + "learning_rate": 9.08694359765884e-06, + "loss": 83.9917, + "step": 34140 + }, + { + "epoch": 0.13797032123046093, + "grad_norm": 611.6764526367188, + "learning_rate": 9.086130941618075e-06, + "loss": 41.6229, + "step": 34150 + }, + { + "epoch": 0.13801072249582857, + "grad_norm": 606.640869140625, + "learning_rate": 9.085317960462625e-06, + "loss": 59.065, + "step": 34160 + }, + { + "epoch": 0.1380511237611962, + "grad_norm": 598.1301879882812, + "learning_rate": 9.084504654257173e-06, + "loss": 49.4473, + "step": 34170 + }, + { + "epoch": 0.13809152502656383, + "grad_norm": 430.78424072265625, + "learning_rate": 9.08369102306643e-06, + "loss": 50.9787, + "step": 34180 + }, + { + "epoch": 0.13813192629193147, + "grad_norm": 1345.611572265625, + "learning_rate": 9.082877066955135e-06, + "loss": 65.4843, + "step": 34190 + }, + { + "epoch": 0.1381723275572991, + "grad_norm": 874.1762084960938, + "learning_rate": 9.08206278598805e-06, + "loss": 46.0913, + "step": 34200 + }, + { + "epoch": 0.13821272882266672, + "grad_norm": 1135.8431396484375, + "learning_rate": 9.081248180229963e-06, + "loss": 73.032, + "step": 34210 + }, + { + "epoch": 0.13825313008803436, + "grad_norm": 507.51470947265625, + "learning_rate": 9.080433249745688e-06, + "loss": 39.2776, + "step": 34220 + }, + { + "epoch": 0.138293531353402, + "grad_norm": 789.2049560546875, + "learning_rate": 9.079617994600066e-06, + "loss": 37.8734, + "step": 34230 + }, + { + "epoch": 0.1383339326187696, + "grad_norm": 470.8572998046875, + "learning_rate": 9.078802414857963e-06, + "loss": 43.4763, + "step": 34240 + }, + { + "epoch": 0.13837433388413725, + "grad_norm": 899.4437866210938, + "learning_rate": 9.077986510584273e-06, + "loss": 48.1022, + "step": 34250 + }, + { + "epoch": 0.1384147351495049, + "grad_norm": 511.8283386230469, + "learning_rate": 9.07717028184391e-06, + "loss": 89.8688, + "step": 34260 + }, + { + "epoch": 0.1384551364148725, + "grad_norm": 485.81951904296875, + "learning_rate": 9.07635372870182e-06, + "loss": 38.4757, + "step": 34270 + }, + { + "epoch": 0.13849553768024014, + "grad_norm": 507.5504455566406, + "learning_rate": 9.07553685122297e-06, + "loss": 48.0852, + "step": 34280 + }, + { + "epoch": 0.13853593894560778, + "grad_norm": 752.1076049804688, + "learning_rate": 9.074719649472358e-06, + "loss": 40.384, + "step": 34290 + }, + { + "epoch": 0.1385763402109754, + "grad_norm": 714.6204833984375, + "learning_rate": 9.073902123515005e-06, + "loss": 63.1098, + "step": 34300 + }, + { + "epoch": 0.13861674147634304, + "grad_norm": 1068.350341796875, + "learning_rate": 9.073084273415956e-06, + "loss": 63.3068, + "step": 34310 + }, + { + "epoch": 0.13865714274171068, + "grad_norm": 1284.7296142578125, + "learning_rate": 9.072266099240286e-06, + "loss": 47.1397, + "step": 34320 + }, + { + "epoch": 0.1386975440070783, + "grad_norm": 1022.5322875976562, + "learning_rate": 9.07144760105309e-06, + "loss": 56.532, + "step": 34330 + }, + { + "epoch": 0.13873794527244593, + "grad_norm": 382.51300048828125, + "learning_rate": 9.070628778919493e-06, + "loss": 53.3356, + "step": 34340 + }, + { + "epoch": 0.13877834653781357, + "grad_norm": 430.1619873046875, + "learning_rate": 9.069809632904647e-06, + "loss": 39.5561, + "step": 34350 + }, + { + "epoch": 0.1388187478031812, + "grad_norm": 416.1811218261719, + "learning_rate": 9.068990163073726e-06, + "loss": 53.2887, + "step": 34360 + }, + { + "epoch": 0.13885914906854882, + "grad_norm": 787.1919555664062, + "learning_rate": 9.068170369491932e-06, + "loss": 57.9072, + "step": 34370 + }, + { + "epoch": 0.13889955033391646, + "grad_norm": 498.4957580566406, + "learning_rate": 9.067350252224491e-06, + "loss": 61.3324, + "step": 34380 + }, + { + "epoch": 0.1389399515992841, + "grad_norm": 690.4000244140625, + "learning_rate": 9.066529811336658e-06, + "loss": 52.2355, + "step": 34390 + }, + { + "epoch": 0.1389803528646517, + "grad_norm": 523.7412719726562, + "learning_rate": 9.06570904689371e-06, + "loss": 44.2429, + "step": 34400 + }, + { + "epoch": 0.13902075413001935, + "grad_norm": 558.5323486328125, + "learning_rate": 9.064887958960953e-06, + "loss": 47.6126, + "step": 34410 + }, + { + "epoch": 0.139061155395387, + "grad_norm": 838.606201171875, + "learning_rate": 9.064066547603716e-06, + "loss": 48.0207, + "step": 34420 + }, + { + "epoch": 0.1391015566607546, + "grad_norm": 698.4739379882812, + "learning_rate": 9.063244812887357e-06, + "loss": 55.4509, + "step": 34430 + }, + { + "epoch": 0.13914195792612225, + "grad_norm": 639.7809448242188, + "learning_rate": 9.062422754877253e-06, + "loss": 60.6359, + "step": 34440 + }, + { + "epoch": 0.13918235919148988, + "grad_norm": 806.6522827148438, + "learning_rate": 9.061600373638816e-06, + "loss": 36.7169, + "step": 34450 + }, + { + "epoch": 0.1392227604568575, + "grad_norm": 737.7337036132812, + "learning_rate": 9.06077766923748e-06, + "loss": 43.2489, + "step": 34460 + }, + { + "epoch": 0.13926316172222514, + "grad_norm": 441.1222839355469, + "learning_rate": 9.059954641738697e-06, + "loss": 71.4044, + "step": 34470 + }, + { + "epoch": 0.13930356298759278, + "grad_norm": 800.96337890625, + "learning_rate": 9.059131291207958e-06, + "loss": 50.1446, + "step": 34480 + }, + { + "epoch": 0.1393439642529604, + "grad_norm": 669.9575805664062, + "learning_rate": 9.058307617710771e-06, + "loss": 50.5971, + "step": 34490 + }, + { + "epoch": 0.13938436551832803, + "grad_norm": 544.849609375, + "learning_rate": 9.057483621312671e-06, + "loss": 40.0475, + "step": 34500 + }, + { + "epoch": 0.13942476678369567, + "grad_norm": 424.7563781738281, + "learning_rate": 9.056659302079222e-06, + "loss": 38.7654, + "step": 34510 + }, + { + "epoch": 0.1394651680490633, + "grad_norm": 240.59954833984375, + "learning_rate": 9.055834660076008e-06, + "loss": 55.3523, + "step": 34520 + }, + { + "epoch": 0.13950556931443092, + "grad_norm": 877.2344360351562, + "learning_rate": 9.055009695368646e-06, + "loss": 54.841, + "step": 34530 + }, + { + "epoch": 0.13954597057979856, + "grad_norm": 714.7890014648438, + "learning_rate": 9.054184408022772e-06, + "loss": 43.7608, + "step": 34540 + }, + { + "epoch": 0.1395863718451662, + "grad_norm": 710.421142578125, + "learning_rate": 9.05335879810405e-06, + "loss": 78.2367, + "step": 34550 + }, + { + "epoch": 0.13962677311053381, + "grad_norm": 763.3213500976562, + "learning_rate": 9.052532865678171e-06, + "loss": 53.5301, + "step": 34560 + }, + { + "epoch": 0.13966717437590145, + "grad_norm": 262.8904724121094, + "learning_rate": 9.05170661081085e-06, + "loss": 34.3858, + "step": 34570 + }, + { + "epoch": 0.1397075756412691, + "grad_norm": 391.6971740722656, + "learning_rate": 9.050880033567831e-06, + "loss": 41.5089, + "step": 34580 + }, + { + "epoch": 0.1397479769066367, + "grad_norm": 320.8378601074219, + "learning_rate": 9.050053134014878e-06, + "loss": 47.4074, + "step": 34590 + }, + { + "epoch": 0.13978837817200435, + "grad_norm": 656.7279663085938, + "learning_rate": 9.049225912217782e-06, + "loss": 59.3808, + "step": 34600 + }, + { + "epoch": 0.13982877943737199, + "grad_norm": 976.0907592773438, + "learning_rate": 9.048398368242365e-06, + "loss": 39.1846, + "step": 34610 + }, + { + "epoch": 0.1398691807027396, + "grad_norm": 622.8963623046875, + "learning_rate": 9.047570502154471e-06, + "loss": 65.1156, + "step": 34620 + }, + { + "epoch": 0.13990958196810724, + "grad_norm": 553.8403930664062, + "learning_rate": 9.046742314019968e-06, + "loss": 30.1408, + "step": 34630 + }, + { + "epoch": 0.13994998323347488, + "grad_norm": 690.28076171875, + "learning_rate": 9.045913803904748e-06, + "loss": 61.2043, + "step": 34640 + }, + { + "epoch": 0.1399903844988425, + "grad_norm": 546.177001953125, + "learning_rate": 9.045084971874738e-06, + "loss": 38.7078, + "step": 34650 + }, + { + "epoch": 0.14003078576421013, + "grad_norm": 677.779296875, + "learning_rate": 9.04425581799588e-06, + "loss": 66.1053, + "step": 34660 + }, + { + "epoch": 0.14007118702957777, + "grad_norm": 377.52117919921875, + "learning_rate": 9.043426342334147e-06, + "loss": 43.3946, + "step": 34670 + }, + { + "epoch": 0.1401115882949454, + "grad_norm": 1007.6997680664062, + "learning_rate": 9.042596544955538e-06, + "loss": 55.9989, + "step": 34680 + }, + { + "epoch": 0.14015198956031302, + "grad_norm": 1100.5362548828125, + "learning_rate": 9.041766425926073e-06, + "loss": 60.7844, + "step": 34690 + }, + { + "epoch": 0.14019239082568066, + "grad_norm": 1078.2347412109375, + "learning_rate": 9.040935985311804e-06, + "loss": 50.4251, + "step": 34700 + }, + { + "epoch": 0.1402327920910483, + "grad_norm": 951.633056640625, + "learning_rate": 9.040105223178803e-06, + "loss": 49.7869, + "step": 34710 + }, + { + "epoch": 0.14027319335641592, + "grad_norm": 499.31390380859375, + "learning_rate": 9.039274139593173e-06, + "loss": 61.8163, + "step": 34720 + }, + { + "epoch": 0.14031359462178356, + "grad_norm": 720.83544921875, + "learning_rate": 9.038442734621034e-06, + "loss": 49.9191, + "step": 34730 + }, + { + "epoch": 0.1403539958871512, + "grad_norm": 591.9390258789062, + "learning_rate": 9.037611008328544e-06, + "loss": 60.4522, + "step": 34740 + }, + { + "epoch": 0.1403943971525188, + "grad_norm": 1439.417724609375, + "learning_rate": 9.036778960781874e-06, + "loss": 65.0005, + "step": 34750 + }, + { + "epoch": 0.14043479841788645, + "grad_norm": 1044.05517578125, + "learning_rate": 9.03594659204723e-06, + "loss": 53.8747, + "step": 34760 + }, + { + "epoch": 0.1404751996832541, + "grad_norm": 708.0410766601562, + "learning_rate": 9.035113902190838e-06, + "loss": 41.8351, + "step": 34770 + }, + { + "epoch": 0.1405156009486217, + "grad_norm": 660.4917602539062, + "learning_rate": 9.03428089127895e-06, + "loss": 52.9228, + "step": 34780 + }, + { + "epoch": 0.14055600221398934, + "grad_norm": 654.495361328125, + "learning_rate": 9.033447559377847e-06, + "loss": 54.6773, + "step": 34790 + }, + { + "epoch": 0.14059640347935698, + "grad_norm": 417.1293029785156, + "learning_rate": 9.032613906553833e-06, + "loss": 44.8771, + "step": 34800 + }, + { + "epoch": 0.1406368047447246, + "grad_norm": 663.234375, + "learning_rate": 9.031779932873238e-06, + "loss": 46.9563, + "step": 34810 + }, + { + "epoch": 0.14067720601009223, + "grad_norm": 714.1453857421875, + "learning_rate": 9.030945638402415e-06, + "loss": 51.3227, + "step": 34820 + }, + { + "epoch": 0.14071760727545987, + "grad_norm": 971.0957641601562, + "learning_rate": 9.030111023207751e-06, + "loss": 59.2087, + "step": 34830 + }, + { + "epoch": 0.1407580085408275, + "grad_norm": 619.7679443359375, + "learning_rate": 9.029276087355646e-06, + "loss": 54.8381, + "step": 34840 + }, + { + "epoch": 0.14079840980619512, + "grad_norm": 895.6705932617188, + "learning_rate": 9.028440830912536e-06, + "loss": 53.2132, + "step": 34850 + }, + { + "epoch": 0.14083881107156276, + "grad_norm": 1039.65576171875, + "learning_rate": 9.027605253944874e-06, + "loss": 42.7459, + "step": 34860 + }, + { + "epoch": 0.1408792123369304, + "grad_norm": 603.7433471679688, + "learning_rate": 9.026769356519149e-06, + "loss": 41.1029, + "step": 34870 + }, + { + "epoch": 0.14091961360229802, + "grad_norm": 770.4713134765625, + "learning_rate": 9.025933138701865e-06, + "loss": 42.8621, + "step": 34880 + }, + { + "epoch": 0.14096001486766566, + "grad_norm": 511.4858093261719, + "learning_rate": 9.02509660055956e-06, + "loss": 56.1045, + "step": 34890 + }, + { + "epoch": 0.1410004161330333, + "grad_norm": 436.2823486328125, + "learning_rate": 9.02425974215879e-06, + "loss": 42.6911, + "step": 34900 + }, + { + "epoch": 0.1410408173984009, + "grad_norm": 537.3641967773438, + "learning_rate": 9.02342256356614e-06, + "loss": 53.8692, + "step": 34910 + }, + { + "epoch": 0.14108121866376855, + "grad_norm": 832.6757202148438, + "learning_rate": 9.022585064848222e-06, + "loss": 40.6428, + "step": 34920 + }, + { + "epoch": 0.1411216199291362, + "grad_norm": 300.685546875, + "learning_rate": 9.021747246071673e-06, + "loss": 37.7268, + "step": 34930 + }, + { + "epoch": 0.1411620211945038, + "grad_norm": 360.01593017578125, + "learning_rate": 9.020909107303152e-06, + "loss": 67.776, + "step": 34940 + }, + { + "epoch": 0.14120242245987144, + "grad_norm": 817.0067138671875, + "learning_rate": 9.020070648609347e-06, + "loss": 54.2647, + "step": 34950 + }, + { + "epoch": 0.14124282372523908, + "grad_norm": 697.2791137695312, + "learning_rate": 9.01923187005697e-06, + "loss": 42.1943, + "step": 34960 + }, + { + "epoch": 0.1412832249906067, + "grad_norm": 492.3368835449219, + "learning_rate": 9.018392771712758e-06, + "loss": 38.4684, + "step": 34970 + }, + { + "epoch": 0.14132362625597433, + "grad_norm": 611.7986450195312, + "learning_rate": 9.017553353643479e-06, + "loss": 61.7044, + "step": 34980 + }, + { + "epoch": 0.14136402752134197, + "grad_norm": 373.13775634765625, + "learning_rate": 9.016713615915913e-06, + "loss": 48.7291, + "step": 34990 + }, + { + "epoch": 0.1414044287867096, + "grad_norm": 854.9056396484375, + "learning_rate": 9.01587355859688e-06, + "loss": 141.1521, + "step": 35000 + }, + { + "epoch": 0.14144483005207723, + "grad_norm": 270.6610107421875, + "learning_rate": 9.015033181753219e-06, + "loss": 38.4525, + "step": 35010 + }, + { + "epoch": 0.14148523131744487, + "grad_norm": 889.5081787109375, + "learning_rate": 9.014192485451794e-06, + "loss": 59.1941, + "step": 35020 + }, + { + "epoch": 0.1415256325828125, + "grad_norm": 612.941650390625, + "learning_rate": 9.013351469759497e-06, + "loss": 64.6319, + "step": 35030 + }, + { + "epoch": 0.14156603384818012, + "grad_norm": 803.6441040039062, + "learning_rate": 9.01251013474324e-06, + "loss": 48.9887, + "step": 35040 + }, + { + "epoch": 0.14160643511354776, + "grad_norm": 948.8998413085938, + "learning_rate": 9.011668480469969e-06, + "loss": 51.4555, + "step": 35050 + }, + { + "epoch": 0.1416468363789154, + "grad_norm": 1026.9730224609375, + "learning_rate": 9.010826507006644e-06, + "loss": 49.5063, + "step": 35060 + }, + { + "epoch": 0.141687237644283, + "grad_norm": 800.8943481445312, + "learning_rate": 9.009984214420265e-06, + "loss": 37.3596, + "step": 35070 + }, + { + "epoch": 0.14172763890965065, + "grad_norm": 725.1683349609375, + "learning_rate": 9.009141602777845e-06, + "loss": 80.1707, + "step": 35080 + }, + { + "epoch": 0.1417680401750183, + "grad_norm": 860.2307739257812, + "learning_rate": 9.008298672146425e-06, + "loss": 46.548, + "step": 35090 + }, + { + "epoch": 0.1418084414403859, + "grad_norm": 493.4331359863281, + "learning_rate": 9.007455422593077e-06, + "loss": 55.3125, + "step": 35100 + }, + { + "epoch": 0.14184884270575354, + "grad_norm": 725.03662109375, + "learning_rate": 9.006611854184893e-06, + "loss": 49.1633, + "step": 35110 + }, + { + "epoch": 0.14188924397112118, + "grad_norm": 1479.2064208984375, + "learning_rate": 9.00576796698899e-06, + "loss": 63.2753, + "step": 35120 + }, + { + "epoch": 0.1419296452364888, + "grad_norm": 584.6494140625, + "learning_rate": 9.004923761072515e-06, + "loss": 49.4957, + "step": 35130 + }, + { + "epoch": 0.14197004650185643, + "grad_norm": 293.61279296875, + "learning_rate": 9.004079236502636e-06, + "loss": 51.4557, + "step": 35140 + }, + { + "epoch": 0.14201044776722407, + "grad_norm": 397.71746826171875, + "learning_rate": 9.00323439334655e-06, + "loss": 52.3445, + "step": 35150 + }, + { + "epoch": 0.14205084903259171, + "grad_norm": 522.1874389648438, + "learning_rate": 9.002389231671474e-06, + "loss": 50.3419, + "step": 35160 + }, + { + "epoch": 0.14209125029795933, + "grad_norm": 985.80419921875, + "learning_rate": 9.001543751544654e-06, + "loss": 61.9348, + "step": 35170 + }, + { + "epoch": 0.14213165156332697, + "grad_norm": 755.8569946289062, + "learning_rate": 9.000697953033364e-06, + "loss": 43.5234, + "step": 35180 + }, + { + "epoch": 0.1421720528286946, + "grad_norm": 329.2603454589844, + "learning_rate": 8.999851836204901e-06, + "loss": 90.2732, + "step": 35190 + }, + { + "epoch": 0.14221245409406222, + "grad_norm": 514.22119140625, + "learning_rate": 8.99900540112658e-06, + "loss": 54.4841, + "step": 35200 + }, + { + "epoch": 0.14225285535942986, + "grad_norm": 372.5816955566406, + "learning_rate": 8.998158647865753e-06, + "loss": 31.2375, + "step": 35210 + }, + { + "epoch": 0.1422932566247975, + "grad_norm": 625.9616088867188, + "learning_rate": 8.997311576489793e-06, + "loss": 58.9871, + "step": 35220 + }, + { + "epoch": 0.1423336578901651, + "grad_norm": 702.8914184570312, + "learning_rate": 8.996464187066096e-06, + "loss": 44.5763, + "step": 35230 + }, + { + "epoch": 0.14237405915553275, + "grad_norm": 953.0204467773438, + "learning_rate": 8.995616479662084e-06, + "loss": 56.9604, + "step": 35240 + }, + { + "epoch": 0.1424144604209004, + "grad_norm": 1026.1070556640625, + "learning_rate": 8.994768454345207e-06, + "loss": 46.73, + "step": 35250 + }, + { + "epoch": 0.142454861686268, + "grad_norm": 630.1910400390625, + "learning_rate": 8.993920111182937e-06, + "loss": 43.647, + "step": 35260 + }, + { + "epoch": 0.14249526295163564, + "grad_norm": 320.8638000488281, + "learning_rate": 8.993071450242775e-06, + "loss": 50.5799, + "step": 35270 + }, + { + "epoch": 0.14253566421700328, + "grad_norm": 939.0507202148438, + "learning_rate": 8.99222247159224e-06, + "loss": 50.8784, + "step": 35280 + }, + { + "epoch": 0.1425760654823709, + "grad_norm": 1079.3389892578125, + "learning_rate": 8.991373175298887e-06, + "loss": 47.8058, + "step": 35290 + }, + { + "epoch": 0.14261646674773854, + "grad_norm": 784.1708374023438, + "learning_rate": 8.99052356143029e-06, + "loss": 43.6722, + "step": 35300 + }, + { + "epoch": 0.14265686801310618, + "grad_norm": 781.5846557617188, + "learning_rate": 8.989673630054044e-06, + "loss": 52.9857, + "step": 35310 + }, + { + "epoch": 0.14269726927847382, + "grad_norm": 1505.8087158203125, + "learning_rate": 8.988823381237778e-06, + "loss": 57.9318, + "step": 35320 + }, + { + "epoch": 0.14273767054384143, + "grad_norm": 427.637451171875, + "learning_rate": 8.987972815049144e-06, + "loss": 51.4073, + "step": 35330 + }, + { + "epoch": 0.14277807180920907, + "grad_norm": 641.6649780273438, + "learning_rate": 8.987121931555814e-06, + "loss": 59.3153, + "step": 35340 + }, + { + "epoch": 0.1428184730745767, + "grad_norm": 840.8336181640625, + "learning_rate": 8.986270730825489e-06, + "loss": 46.8198, + "step": 35350 + }, + { + "epoch": 0.14285887433994432, + "grad_norm": 585.4122924804688, + "learning_rate": 8.985419212925898e-06, + "loss": 54.9574, + "step": 35360 + }, + { + "epoch": 0.14289927560531196, + "grad_norm": 700.5527954101562, + "learning_rate": 8.98456737792479e-06, + "loss": 42.6561, + "step": 35370 + }, + { + "epoch": 0.1429396768706796, + "grad_norm": 518.1506958007812, + "learning_rate": 8.983715225889942e-06, + "loss": 50.0722, + "step": 35380 + }, + { + "epoch": 0.1429800781360472, + "grad_norm": 812.709228515625, + "learning_rate": 8.982862756889158e-06, + "loss": 66.0057, + "step": 35390 + }, + { + "epoch": 0.14302047940141485, + "grad_norm": 660.6160888671875, + "learning_rate": 8.982009970990262e-06, + "loss": 55.8418, + "step": 35400 + }, + { + "epoch": 0.1430608806667825, + "grad_norm": 636.583740234375, + "learning_rate": 8.98115686826111e-06, + "loss": 54.9813, + "step": 35410 + }, + { + "epoch": 0.1431012819321501, + "grad_norm": 570.1336059570312, + "learning_rate": 8.980303448769574e-06, + "loss": 56.2874, + "step": 35420 + }, + { + "epoch": 0.14314168319751774, + "grad_norm": 587.9059448242188, + "learning_rate": 8.979449712583562e-06, + "loss": 67.4113, + "step": 35430 + }, + { + "epoch": 0.14318208446288538, + "grad_norm": 845.802001953125, + "learning_rate": 8.978595659770997e-06, + "loss": 54.5296, + "step": 35440 + }, + { + "epoch": 0.143222485728253, + "grad_norm": 873.96044921875, + "learning_rate": 8.977741290399836e-06, + "loss": 57.2999, + "step": 35450 + }, + { + "epoch": 0.14326288699362064, + "grad_norm": 992.7213134765625, + "learning_rate": 8.976886604538055e-06, + "loss": 52.2296, + "step": 35460 + }, + { + "epoch": 0.14330328825898828, + "grad_norm": 628.52587890625, + "learning_rate": 8.976031602253661e-06, + "loss": 45.6108, + "step": 35470 + }, + { + "epoch": 0.14334368952435592, + "grad_norm": 736.7313232421875, + "learning_rate": 8.975176283614677e-06, + "loss": 52.8563, + "step": 35480 + }, + { + "epoch": 0.14338409078972353, + "grad_norm": 614.1516723632812, + "learning_rate": 8.97432064868916e-06, + "loss": 58.3903, + "step": 35490 + }, + { + "epoch": 0.14342449205509117, + "grad_norm": 695.76513671875, + "learning_rate": 8.973464697545191e-06, + "loss": 45.3606, + "step": 35500 + }, + { + "epoch": 0.1434648933204588, + "grad_norm": 527.5994873046875, + "learning_rate": 8.97260843025087e-06, + "loss": 36.107, + "step": 35510 + }, + { + "epoch": 0.14350529458582642, + "grad_norm": 721.7711791992188, + "learning_rate": 8.971751846874329e-06, + "loss": 47.1207, + "step": 35520 + }, + { + "epoch": 0.14354569585119406, + "grad_norm": 338.0601806640625, + "learning_rate": 8.97089494748372e-06, + "loss": 46.7291, + "step": 35530 + }, + { + "epoch": 0.1435860971165617, + "grad_norm": 1139.4949951171875, + "learning_rate": 8.970037732147226e-06, + "loss": 55.5211, + "step": 35540 + }, + { + "epoch": 0.1436264983819293, + "grad_norm": 633.7717895507812, + "learning_rate": 8.969180200933048e-06, + "loss": 60.3075, + "step": 35550 + }, + { + "epoch": 0.14366689964729695, + "grad_norm": 490.70562744140625, + "learning_rate": 8.968322353909417e-06, + "loss": 86.5231, + "step": 35560 + }, + { + "epoch": 0.1437073009126646, + "grad_norm": 558.4280395507812, + "learning_rate": 8.96746419114459e-06, + "loss": 42.8284, + "step": 35570 + }, + { + "epoch": 0.1437477021780322, + "grad_norm": 636.939453125, + "learning_rate": 8.966605712706844e-06, + "loss": 45.6461, + "step": 35580 + }, + { + "epoch": 0.14378810344339985, + "grad_norm": 507.1147766113281, + "learning_rate": 8.965746918664486e-06, + "loss": 44.9939, + "step": 35590 + }, + { + "epoch": 0.14382850470876749, + "grad_norm": 2863.917724609375, + "learning_rate": 8.964887809085846e-06, + "loss": 66.9657, + "step": 35600 + }, + { + "epoch": 0.1438689059741351, + "grad_norm": 489.3071594238281, + "learning_rate": 8.96402838403928e-06, + "loss": 43.2519, + "step": 35610 + }, + { + "epoch": 0.14390930723950274, + "grad_norm": 783.5155639648438, + "learning_rate": 8.96316864359317e-06, + "loss": 49.5435, + "step": 35620 + }, + { + "epoch": 0.14394970850487038, + "grad_norm": 283.0666198730469, + "learning_rate": 8.962308587815916e-06, + "loss": 41.0137, + "step": 35630 + }, + { + "epoch": 0.14399010977023802, + "grad_norm": 1222.13037109375, + "learning_rate": 8.961448216775955e-06, + "loss": 45.5039, + "step": 35640 + }, + { + "epoch": 0.14403051103560563, + "grad_norm": 1376.6578369140625, + "learning_rate": 8.960587530541737e-06, + "loss": 41.2508, + "step": 35650 + }, + { + "epoch": 0.14407091230097327, + "grad_norm": 634.9231567382812, + "learning_rate": 8.959726529181748e-06, + "loss": 45.087, + "step": 35660 + }, + { + "epoch": 0.1441113135663409, + "grad_norm": 559.2318725585938, + "learning_rate": 8.95886521276449e-06, + "loss": 70.1771, + "step": 35670 + }, + { + "epoch": 0.14415171483170852, + "grad_norm": 302.1412048339844, + "learning_rate": 8.958003581358498e-06, + "loss": 41.9856, + "step": 35680 + }, + { + "epoch": 0.14419211609707616, + "grad_norm": 543.0943603515625, + "learning_rate": 8.957141635032325e-06, + "loss": 47.1019, + "step": 35690 + }, + { + "epoch": 0.1442325173624438, + "grad_norm": 298.73309326171875, + "learning_rate": 8.956279373854553e-06, + "loss": 31.6609, + "step": 35700 + }, + { + "epoch": 0.14427291862781141, + "grad_norm": 1010.9532470703125, + "learning_rate": 8.955416797893787e-06, + "loss": 77.5079, + "step": 35710 + }, + { + "epoch": 0.14431331989317905, + "grad_norm": 515.0786743164062, + "learning_rate": 8.95455390721866e-06, + "loss": 50.1513, + "step": 35720 + }, + { + "epoch": 0.1443537211585467, + "grad_norm": 577.6357421875, + "learning_rate": 8.953690701897827e-06, + "loss": 35.7391, + "step": 35730 + }, + { + "epoch": 0.1443941224239143, + "grad_norm": 648.1744995117188, + "learning_rate": 8.952827181999973e-06, + "loss": 46.5073, + "step": 35740 + }, + { + "epoch": 0.14443452368928195, + "grad_norm": 412.1001281738281, + "learning_rate": 8.951963347593797e-06, + "loss": 65.8637, + "step": 35750 + }, + { + "epoch": 0.1444749249546496, + "grad_norm": 602.5824584960938, + "learning_rate": 8.951099198748036e-06, + "loss": 59.1762, + "step": 35760 + }, + { + "epoch": 0.1445153262200172, + "grad_norm": 550.7648315429688, + "learning_rate": 8.950234735531445e-06, + "loss": 45.6496, + "step": 35770 + }, + { + "epoch": 0.14455572748538484, + "grad_norm": 650.7678833007812, + "learning_rate": 8.949369958012806e-06, + "loss": 57.3394, + "step": 35780 + }, + { + "epoch": 0.14459612875075248, + "grad_norm": 167.07852172851562, + "learning_rate": 8.948504866260924e-06, + "loss": 64.0975, + "step": 35790 + }, + { + "epoch": 0.14463653001612012, + "grad_norm": 541.3006591796875, + "learning_rate": 8.94763946034463e-06, + "loss": 77.2448, + "step": 35800 + }, + { + "epoch": 0.14467693128148773, + "grad_norm": 476.4756774902344, + "learning_rate": 8.946773740332781e-06, + "loss": 57.2655, + "step": 35810 + }, + { + "epoch": 0.14471733254685537, + "grad_norm": 255.90960693359375, + "learning_rate": 8.945907706294262e-06, + "loss": 51.7911, + "step": 35820 + }, + { + "epoch": 0.144757733812223, + "grad_norm": 429.4159240722656, + "learning_rate": 8.945041358297973e-06, + "loss": 46.7109, + "step": 35830 + }, + { + "epoch": 0.14479813507759062, + "grad_norm": 570.712646484375, + "learning_rate": 8.94417469641285e-06, + "loss": 45.2215, + "step": 35840 + }, + { + "epoch": 0.14483853634295826, + "grad_norm": 549.2108154296875, + "learning_rate": 8.943307720707846e-06, + "loss": 50.7016, + "step": 35850 + }, + { + "epoch": 0.1448789376083259, + "grad_norm": 1001.9048461914062, + "learning_rate": 8.942440431251947e-06, + "loss": 54.3763, + "step": 35860 + }, + { + "epoch": 0.14491933887369352, + "grad_norm": 418.1084899902344, + "learning_rate": 8.941572828114154e-06, + "loss": 40.7367, + "step": 35870 + }, + { + "epoch": 0.14495974013906116, + "grad_norm": 402.5210876464844, + "learning_rate": 8.9407049113635e-06, + "loss": 87.0937, + "step": 35880 + }, + { + "epoch": 0.1450001414044288, + "grad_norm": 763.7208862304688, + "learning_rate": 8.939836681069042e-06, + "loss": 50.3678, + "step": 35890 + }, + { + "epoch": 0.1450405426697964, + "grad_norm": 1092.267333984375, + "learning_rate": 8.938968137299861e-06, + "loss": 50.2257, + "step": 35900 + }, + { + "epoch": 0.14508094393516405, + "grad_norm": 605.1162109375, + "learning_rate": 8.938099280125064e-06, + "loss": 44.4844, + "step": 35910 + }, + { + "epoch": 0.1451213452005317, + "grad_norm": 440.0594787597656, + "learning_rate": 8.937230109613778e-06, + "loss": 46.008, + "step": 35920 + }, + { + "epoch": 0.1451617464658993, + "grad_norm": 1271.16259765625, + "learning_rate": 8.936360625835164e-06, + "loss": 55.5143, + "step": 35930 + }, + { + "epoch": 0.14520214773126694, + "grad_norm": 631.4669799804688, + "learning_rate": 8.935490828858399e-06, + "loss": 49.6649, + "step": 35940 + }, + { + "epoch": 0.14524254899663458, + "grad_norm": 765.8115234375, + "learning_rate": 8.934620718752691e-06, + "loss": 36.8788, + "step": 35950 + }, + { + "epoch": 0.14528295026200222, + "grad_norm": 1090.13916015625, + "learning_rate": 8.933750295587269e-06, + "loss": 36.7493, + "step": 35960 + }, + { + "epoch": 0.14532335152736983, + "grad_norm": 830.1979370117188, + "learning_rate": 8.932879559431392e-06, + "loss": 41.6039, + "step": 35970 + }, + { + "epoch": 0.14536375279273747, + "grad_norm": 925.3457641601562, + "learning_rate": 8.932008510354336e-06, + "loss": 50.1517, + "step": 35980 + }, + { + "epoch": 0.1454041540581051, + "grad_norm": 359.84588623046875, + "learning_rate": 8.931137148425407e-06, + "loss": 53.3795, + "step": 35990 + }, + { + "epoch": 0.14544455532347272, + "grad_norm": 807.447021484375, + "learning_rate": 8.930265473713939e-06, + "loss": 58.659, + "step": 36000 + }, + { + "epoch": 0.14548495658884036, + "grad_norm": 303.23321533203125, + "learning_rate": 8.929393486289283e-06, + "loss": 36.5493, + "step": 36010 + }, + { + "epoch": 0.145525357854208, + "grad_norm": 621.742431640625, + "learning_rate": 8.928521186220822e-06, + "loss": 46.3721, + "step": 36020 + }, + { + "epoch": 0.14556575911957562, + "grad_norm": 442.9004211425781, + "learning_rate": 8.92764857357796e-06, + "loss": 36.3563, + "step": 36030 + }, + { + "epoch": 0.14560616038494326, + "grad_norm": 357.2429504394531, + "learning_rate": 8.926775648430124e-06, + "loss": 53.3133, + "step": 36040 + }, + { + "epoch": 0.1456465616503109, + "grad_norm": 805.5604858398438, + "learning_rate": 8.925902410846774e-06, + "loss": 49.645, + "step": 36050 + }, + { + "epoch": 0.1456869629156785, + "grad_norm": 658.550537109375, + "learning_rate": 8.925028860897384e-06, + "loss": 61.7202, + "step": 36060 + }, + { + "epoch": 0.14572736418104615, + "grad_norm": 505.9190979003906, + "learning_rate": 8.924154998651461e-06, + "loss": 49.1229, + "step": 36070 + }, + { + "epoch": 0.1457677654464138, + "grad_norm": 418.9546203613281, + "learning_rate": 8.923280824178538e-06, + "loss": 39.7103, + "step": 36080 + }, + { + "epoch": 0.1458081667117814, + "grad_norm": 642.6342163085938, + "learning_rate": 8.922406337548162e-06, + "loss": 46.5422, + "step": 36090 + }, + { + "epoch": 0.14584856797714904, + "grad_norm": 412.61297607421875, + "learning_rate": 8.921531538829917e-06, + "loss": 45.7388, + "step": 36100 + }, + { + "epoch": 0.14588896924251668, + "grad_norm": 1076.3936767578125, + "learning_rate": 8.920656428093403e-06, + "loss": 37.6155, + "step": 36110 + }, + { + "epoch": 0.14592937050788432, + "grad_norm": 515.2711181640625, + "learning_rate": 8.919781005408251e-06, + "loss": 66.2909, + "step": 36120 + }, + { + "epoch": 0.14596977177325193, + "grad_norm": 1106.967529296875, + "learning_rate": 8.918905270844113e-06, + "loss": 51.8518, + "step": 36130 + }, + { + "epoch": 0.14601017303861957, + "grad_norm": 619.3397216796875, + "learning_rate": 8.918029224470671e-06, + "loss": 53.6405, + "step": 36140 + }, + { + "epoch": 0.1460505743039872, + "grad_norm": 633.2496948242188, + "learning_rate": 8.917152866357621e-06, + "loss": 42.5888, + "step": 36150 + }, + { + "epoch": 0.14609097556935483, + "grad_norm": 764.9520263671875, + "learning_rate": 8.916276196574698e-06, + "loss": 35.7627, + "step": 36160 + }, + { + "epoch": 0.14613137683472247, + "grad_norm": 1141.3271484375, + "learning_rate": 8.91539921519165e-06, + "loss": 48.7545, + "step": 36170 + }, + { + "epoch": 0.1461717781000901, + "grad_norm": 1107.6766357421875, + "learning_rate": 8.914521922278255e-06, + "loss": 39.765, + "step": 36180 + }, + { + "epoch": 0.14621217936545772, + "grad_norm": 597.0726318359375, + "learning_rate": 8.913644317904317e-06, + "loss": 74.6474, + "step": 36190 + }, + { + "epoch": 0.14625258063082536, + "grad_norm": 713.440185546875, + "learning_rate": 8.912766402139662e-06, + "loss": 44.1024, + "step": 36200 + }, + { + "epoch": 0.146292981896193, + "grad_norm": 466.7696533203125, + "learning_rate": 8.91188817505414e-06, + "loss": 40.7422, + "step": 36210 + }, + { + "epoch": 0.1463333831615606, + "grad_norm": 993.3900756835938, + "learning_rate": 8.91100963671763e-06, + "loss": 53.3883, + "step": 36220 + }, + { + "epoch": 0.14637378442692825, + "grad_norm": 736.5969848632812, + "learning_rate": 8.910130787200032e-06, + "loss": 50.1694, + "step": 36230 + }, + { + "epoch": 0.1464141856922959, + "grad_norm": 2283.6953125, + "learning_rate": 8.909251626571273e-06, + "loss": 57.6746, + "step": 36240 + }, + { + "epoch": 0.1464545869576635, + "grad_norm": 379.3497619628906, + "learning_rate": 8.908372154901302e-06, + "loss": 38.0152, + "step": 36250 + }, + { + "epoch": 0.14649498822303114, + "grad_norm": 441.6664123535156, + "learning_rate": 8.907492372260096e-06, + "loss": 48.2084, + "step": 36260 + }, + { + "epoch": 0.14653538948839878, + "grad_norm": 816.5291137695312, + "learning_rate": 8.906612278717657e-06, + "loss": 56.6522, + "step": 36270 + }, + { + "epoch": 0.14657579075376642, + "grad_norm": 851.9935913085938, + "learning_rate": 8.905731874344005e-06, + "loss": 66.8219, + "step": 36280 + }, + { + "epoch": 0.14661619201913403, + "grad_norm": 553.1090087890625, + "learning_rate": 8.904851159209193e-06, + "loss": 50.3202, + "step": 36290 + }, + { + "epoch": 0.14665659328450167, + "grad_norm": 752.8753662109375, + "learning_rate": 8.903970133383297e-06, + "loss": 37.1439, + "step": 36300 + }, + { + "epoch": 0.14669699454986931, + "grad_norm": 837.05859375, + "learning_rate": 8.903088796936414e-06, + "loss": 62.281, + "step": 36310 + }, + { + "epoch": 0.14673739581523693, + "grad_norm": 162.0035858154297, + "learning_rate": 8.902207149938667e-06, + "loss": 44.1739, + "step": 36320 + }, + { + "epoch": 0.14677779708060457, + "grad_norm": 979.8157958984375, + "learning_rate": 8.901325192460206e-06, + "loss": 64.2416, + "step": 36330 + }, + { + "epoch": 0.1468181983459722, + "grad_norm": 603.8624267578125, + "learning_rate": 8.900442924571204e-06, + "loss": 44.7346, + "step": 36340 + }, + { + "epoch": 0.14685859961133982, + "grad_norm": 530.7752685546875, + "learning_rate": 8.89956034634186e-06, + "loss": 47.8523, + "step": 36350 + }, + { + "epoch": 0.14689900087670746, + "grad_norm": 816.960205078125, + "learning_rate": 8.898677457842394e-06, + "loss": 50.7503, + "step": 36360 + }, + { + "epoch": 0.1469394021420751, + "grad_norm": 1267.8363037109375, + "learning_rate": 8.897794259143057e-06, + "loss": 59.9809, + "step": 36370 + }, + { + "epoch": 0.1469798034074427, + "grad_norm": 404.8268127441406, + "learning_rate": 8.896910750314118e-06, + "loss": 37.5561, + "step": 36380 + }, + { + "epoch": 0.14702020467281035, + "grad_norm": 235.9456024169922, + "learning_rate": 8.896026931425876e-06, + "loss": 65.6808, + "step": 36390 + }, + { + "epoch": 0.147060605938178, + "grad_norm": 507.3228454589844, + "learning_rate": 8.895142802548653e-06, + "loss": 40.5951, + "step": 36400 + }, + { + "epoch": 0.1471010072035456, + "grad_norm": 842.0059204101562, + "learning_rate": 8.89425836375279e-06, + "loss": 66.0475, + "step": 36410 + }, + { + "epoch": 0.14714140846891324, + "grad_norm": 366.0971984863281, + "learning_rate": 8.893373615108663e-06, + "loss": 37.7263, + "step": 36420 + }, + { + "epoch": 0.14718180973428088, + "grad_norm": 629.5830688476562, + "learning_rate": 8.892488556686665e-06, + "loss": 46.7629, + "step": 36430 + }, + { + "epoch": 0.14722221099964852, + "grad_norm": 944.98291015625, + "learning_rate": 8.891603188557218e-06, + "loss": 48.1869, + "step": 36440 + }, + { + "epoch": 0.14726261226501614, + "grad_norm": 791.6475830078125, + "learning_rate": 8.890717510790763e-06, + "loss": 55.2396, + "step": 36450 + }, + { + "epoch": 0.14730301353038378, + "grad_norm": 631.4806518554688, + "learning_rate": 8.889831523457773e-06, + "loss": 55.5351, + "step": 36460 + }, + { + "epoch": 0.14734341479575142, + "grad_norm": 740.2923583984375, + "learning_rate": 8.888945226628742e-06, + "loss": 45.0742, + "step": 36470 + }, + { + "epoch": 0.14738381606111903, + "grad_norm": 294.7581787109375, + "learning_rate": 8.888058620374185e-06, + "loss": 43.5822, + "step": 36480 + }, + { + "epoch": 0.14742421732648667, + "grad_norm": 1752.1192626953125, + "learning_rate": 8.887171704764647e-06, + "loss": 54.3944, + "step": 36490 + }, + { + "epoch": 0.1474646185918543, + "grad_norm": 292.4616394042969, + "learning_rate": 8.8862844798707e-06, + "loss": 32.088, + "step": 36500 + }, + { + "epoch": 0.14750501985722192, + "grad_norm": 411.1035461425781, + "learning_rate": 8.885396945762928e-06, + "loss": 52.1857, + "step": 36510 + }, + { + "epoch": 0.14754542112258956, + "grad_norm": 854.386474609375, + "learning_rate": 8.884509102511956e-06, + "loss": 65.9514, + "step": 36520 + }, + { + "epoch": 0.1475858223879572, + "grad_norm": 438.49725341796875, + "learning_rate": 8.883620950188422e-06, + "loss": 42.1549, + "step": 36530 + }, + { + "epoch": 0.1476262236533248, + "grad_norm": 482.4796447753906, + "learning_rate": 8.882732488862988e-06, + "loss": 49.8962, + "step": 36540 + }, + { + "epoch": 0.14766662491869245, + "grad_norm": 671.1175537109375, + "learning_rate": 8.881843718606353e-06, + "loss": 66.4913, + "step": 36550 + }, + { + "epoch": 0.1477070261840601, + "grad_norm": 496.26824951171875, + "learning_rate": 8.880954639489227e-06, + "loss": 36.7379, + "step": 36560 + }, + { + "epoch": 0.1477474274494277, + "grad_norm": 560.552490234375, + "learning_rate": 8.880065251582354e-06, + "loss": 66.3091, + "step": 36570 + }, + { + "epoch": 0.14778782871479534, + "grad_norm": 558.4953002929688, + "learning_rate": 8.879175554956495e-06, + "loss": 60.9963, + "step": 36580 + }, + { + "epoch": 0.14782822998016298, + "grad_norm": 400.2424621582031, + "learning_rate": 8.87828554968244e-06, + "loss": 37.0019, + "step": 36590 + }, + { + "epoch": 0.1478686312455306, + "grad_norm": 1002.6381225585938, + "learning_rate": 8.877395235831002e-06, + "loss": 51.9359, + "step": 36600 + }, + { + "epoch": 0.14790903251089824, + "grad_norm": 455.9054260253906, + "learning_rate": 8.876504613473019e-06, + "loss": 66.0401, + "step": 36610 + }, + { + "epoch": 0.14794943377626588, + "grad_norm": 345.1482849121094, + "learning_rate": 8.875613682679356e-06, + "loss": 60.0617, + "step": 36620 + }, + { + "epoch": 0.14798983504163352, + "grad_norm": 840.730224609375, + "learning_rate": 8.874722443520898e-06, + "loss": 56.1831, + "step": 36630 + }, + { + "epoch": 0.14803023630700113, + "grad_norm": 566.4497680664062, + "learning_rate": 8.873830896068559e-06, + "loss": 44.0531, + "step": 36640 + }, + { + "epoch": 0.14807063757236877, + "grad_norm": 720.5826416015625, + "learning_rate": 8.872939040393274e-06, + "loss": 76.8549, + "step": 36650 + }, + { + "epoch": 0.1481110388377364, + "grad_norm": 1977.6263427734375, + "learning_rate": 8.872046876566003e-06, + "loss": 65.1014, + "step": 36660 + }, + { + "epoch": 0.14815144010310402, + "grad_norm": 617.149658203125, + "learning_rate": 8.871154404657734e-06, + "loss": 55.5341, + "step": 36670 + }, + { + "epoch": 0.14819184136847166, + "grad_norm": 433.0403137207031, + "learning_rate": 8.870261624739474e-06, + "loss": 46.8973, + "step": 36680 + }, + { + "epoch": 0.1482322426338393, + "grad_norm": 637.2809448242188, + "learning_rate": 8.869368536882258e-06, + "loss": 40.903, + "step": 36690 + }, + { + "epoch": 0.1482726438992069, + "grad_norm": 505.6259460449219, + "learning_rate": 8.868475141157146e-06, + "loss": 47.5848, + "step": 36700 + }, + { + "epoch": 0.14831304516457455, + "grad_norm": 929.6765747070312, + "learning_rate": 8.867581437635221e-06, + "loss": 64.581, + "step": 36710 + }, + { + "epoch": 0.1483534464299422, + "grad_norm": 293.0738525390625, + "learning_rate": 8.866687426387592e-06, + "loss": 50.6497, + "step": 36720 + }, + { + "epoch": 0.1483938476953098, + "grad_norm": 758.82666015625, + "learning_rate": 8.86579310748539e-06, + "loss": 45.4429, + "step": 36730 + }, + { + "epoch": 0.14843424896067745, + "grad_norm": 482.9668884277344, + "learning_rate": 8.86489848099977e-06, + "loss": 52.9492, + "step": 36740 + }, + { + "epoch": 0.14847465022604509, + "grad_norm": 637.962158203125, + "learning_rate": 8.864003547001916e-06, + "loss": 34.2872, + "step": 36750 + }, + { + "epoch": 0.1485150514914127, + "grad_norm": 288.27679443359375, + "learning_rate": 8.863108305563035e-06, + "loss": 75.5196, + "step": 36760 + }, + { + "epoch": 0.14855545275678034, + "grad_norm": 463.9593200683594, + "learning_rate": 8.862212756754354e-06, + "loss": 45.1242, + "step": 36770 + }, + { + "epoch": 0.14859585402214798, + "grad_norm": 1444.18798828125, + "learning_rate": 8.861316900647129e-06, + "loss": 52.0546, + "step": 36780 + }, + { + "epoch": 0.14863625528751562, + "grad_norm": 2431.045654296875, + "learning_rate": 8.860420737312638e-06, + "loss": 79.4979, + "step": 36790 + }, + { + "epoch": 0.14867665655288323, + "grad_norm": 451.2615051269531, + "learning_rate": 8.859524266822188e-06, + "loss": 36.7135, + "step": 36800 + }, + { + "epoch": 0.14871705781825087, + "grad_norm": 350.17462158203125, + "learning_rate": 8.858627489247105e-06, + "loss": 46.1026, + "step": 36810 + }, + { + "epoch": 0.1487574590836185, + "grad_norm": 908.991455078125, + "learning_rate": 8.85773040465874e-06, + "loss": 46.8176, + "step": 36820 + }, + { + "epoch": 0.14879786034898612, + "grad_norm": 1497.7669677734375, + "learning_rate": 8.856833013128472e-06, + "loss": 60.939, + "step": 36830 + }, + { + "epoch": 0.14883826161435376, + "grad_norm": 740.0812377929688, + "learning_rate": 8.855935314727702e-06, + "loss": 65.2613, + "step": 36840 + }, + { + "epoch": 0.1488786628797214, + "grad_norm": 394.499755859375, + "learning_rate": 8.855037309527854e-06, + "loss": 34.5515, + "step": 36850 + }, + { + "epoch": 0.14891906414508901, + "grad_norm": 317.7518005371094, + "learning_rate": 8.854138997600382e-06, + "loss": 42.0422, + "step": 36860 + }, + { + "epoch": 0.14895946541045665, + "grad_norm": 445.2290344238281, + "learning_rate": 8.853240379016757e-06, + "loss": 70.6691, + "step": 36870 + }, + { + "epoch": 0.1489998666758243, + "grad_norm": 675.0117797851562, + "learning_rate": 8.852341453848477e-06, + "loss": 55.443, + "step": 36880 + }, + { + "epoch": 0.1490402679411919, + "grad_norm": 491.013671875, + "learning_rate": 8.851442222167068e-06, + "loss": 49.8648, + "step": 36890 + }, + { + "epoch": 0.14908066920655955, + "grad_norm": 708.0944213867188, + "learning_rate": 8.850542684044078e-06, + "loss": 28.9624, + "step": 36900 + }, + { + "epoch": 0.1491210704719272, + "grad_norm": 384.8024597167969, + "learning_rate": 8.849642839551079e-06, + "loss": 40.8231, + "step": 36910 + }, + { + "epoch": 0.1491614717372948, + "grad_norm": 462.9109191894531, + "learning_rate": 8.848742688759666e-06, + "loss": 46.5803, + "step": 36920 + }, + { + "epoch": 0.14920187300266244, + "grad_norm": 207.1934814453125, + "learning_rate": 8.847842231741462e-06, + "loss": 41.873, + "step": 36930 + }, + { + "epoch": 0.14924227426803008, + "grad_norm": 603.2200317382812, + "learning_rate": 8.846941468568108e-06, + "loss": 48.2413, + "step": 36940 + }, + { + "epoch": 0.14928267553339772, + "grad_norm": 399.51690673828125, + "learning_rate": 8.846040399311278e-06, + "loss": 48.9574, + "step": 36950 + }, + { + "epoch": 0.14932307679876533, + "grad_norm": 1572.202392578125, + "learning_rate": 8.845139024042664e-06, + "loss": 68.8805, + "step": 36960 + }, + { + "epoch": 0.14936347806413297, + "grad_norm": 1128.0, + "learning_rate": 8.844237342833985e-06, + "loss": 54.9947, + "step": 36970 + }, + { + "epoch": 0.1494038793295006, + "grad_norm": 360.4505920410156, + "learning_rate": 8.843335355756983e-06, + "loss": 53.7954, + "step": 36980 + }, + { + "epoch": 0.14944428059486822, + "grad_norm": 4446.03125, + "learning_rate": 8.842433062883427e-06, + "loss": 73.6097, + "step": 36990 + }, + { + "epoch": 0.14948468186023586, + "grad_norm": 595.9097900390625, + "learning_rate": 8.841530464285105e-06, + "loss": 47.5093, + "step": 37000 + }, + { + "epoch": 0.1495250831256035, + "grad_norm": 248.24876403808594, + "learning_rate": 8.840627560033833e-06, + "loss": 46.4306, + "step": 37010 + }, + { + "epoch": 0.14956548439097112, + "grad_norm": 907.060546875, + "learning_rate": 8.839724350201452e-06, + "loss": 54.754, + "step": 37020 + }, + { + "epoch": 0.14960588565633876, + "grad_norm": 925.5053100585938, + "learning_rate": 8.838820834859829e-06, + "loss": 60.8077, + "step": 37030 + }, + { + "epoch": 0.1496462869217064, + "grad_norm": 633.4927978515625, + "learning_rate": 8.837917014080849e-06, + "loss": 43.2474, + "step": 37040 + }, + { + "epoch": 0.149686688187074, + "grad_norm": 612.7200317382812, + "learning_rate": 8.837012887936426e-06, + "loss": 44.9851, + "step": 37050 + }, + { + "epoch": 0.14972708945244165, + "grad_norm": 738.667724609375, + "learning_rate": 8.836108456498497e-06, + "loss": 55.44, + "step": 37060 + }, + { + "epoch": 0.1497674907178093, + "grad_norm": 415.4226379394531, + "learning_rate": 8.835203719839024e-06, + "loss": 52.5291, + "step": 37070 + }, + { + "epoch": 0.1498078919831769, + "grad_norm": 367.70318603515625, + "learning_rate": 8.834298678029988e-06, + "loss": 43.8007, + "step": 37080 + }, + { + "epoch": 0.14984829324854454, + "grad_norm": 418.97137451171875, + "learning_rate": 8.833393331143409e-06, + "loss": 30.1141, + "step": 37090 + }, + { + "epoch": 0.14988869451391218, + "grad_norm": 1005.45263671875, + "learning_rate": 8.832487679251311e-06, + "loss": 50.5335, + "step": 37100 + }, + { + "epoch": 0.14992909577927982, + "grad_norm": 639.8682250976562, + "learning_rate": 8.831581722425761e-06, + "loss": 49.5506, + "step": 37110 + }, + { + "epoch": 0.14996949704464743, + "grad_norm": 1013.299072265625, + "learning_rate": 8.830675460738835e-06, + "loss": 45.148, + "step": 37120 + }, + { + "epoch": 0.15000989831001507, + "grad_norm": 595.0233154296875, + "learning_rate": 8.829768894262644e-06, + "loss": 65.652, + "step": 37130 + }, + { + "epoch": 0.1500502995753827, + "grad_norm": 849.735595703125, + "learning_rate": 8.82886202306932e-06, + "loss": 50.526, + "step": 37140 + }, + { + "epoch": 0.15009070084075032, + "grad_norm": 346.1163635253906, + "learning_rate": 8.827954847231016e-06, + "loss": 59.2245, + "step": 37150 + }, + { + "epoch": 0.15013110210611796, + "grad_norm": 610.0313720703125, + "learning_rate": 8.82704736681991e-06, + "loss": 53.6018, + "step": 37160 + }, + { + "epoch": 0.1501715033714856, + "grad_norm": 753.264892578125, + "learning_rate": 8.826139581908211e-06, + "loss": 50.999, + "step": 37170 + }, + { + "epoch": 0.15021190463685322, + "grad_norm": 397.34228515625, + "learning_rate": 8.825231492568146e-06, + "loss": 39.9716, + "step": 37180 + }, + { + "epoch": 0.15025230590222086, + "grad_norm": 787.6273803710938, + "learning_rate": 8.824323098871966e-06, + "loss": 59.7436, + "step": 37190 + }, + { + "epoch": 0.1502927071675885, + "grad_norm": 794.3360595703125, + "learning_rate": 8.823414400891948e-06, + "loss": 55.8264, + "step": 37200 + }, + { + "epoch": 0.1503331084329561, + "grad_norm": 556.7963256835938, + "learning_rate": 8.822505398700395e-06, + "loss": 62.2807, + "step": 37210 + }, + { + "epoch": 0.15037350969832375, + "grad_norm": 652.981201171875, + "learning_rate": 8.821596092369627e-06, + "loss": 63.0537, + "step": 37220 + }, + { + "epoch": 0.1504139109636914, + "grad_norm": 525.3092041015625, + "learning_rate": 8.820686481971998e-06, + "loss": 40.2604, + "step": 37230 + }, + { + "epoch": 0.150454312229059, + "grad_norm": 302.7001647949219, + "learning_rate": 8.81977656757988e-06, + "loss": 34.4532, + "step": 37240 + }, + { + "epoch": 0.15049471349442664, + "grad_norm": 541.8635864257812, + "learning_rate": 8.81886634926567e-06, + "loss": 55.9025, + "step": 37250 + }, + { + "epoch": 0.15053511475979428, + "grad_norm": 504.7498474121094, + "learning_rate": 8.817955827101794e-06, + "loss": 59.6146, + "step": 37260 + }, + { + "epoch": 0.15057551602516192, + "grad_norm": 657.9691772460938, + "learning_rate": 8.817045001160693e-06, + "loss": 59.897, + "step": 37270 + }, + { + "epoch": 0.15061591729052953, + "grad_norm": 662.6056518554688, + "learning_rate": 8.816133871514838e-06, + "loss": 48.5258, + "step": 37280 + }, + { + "epoch": 0.15065631855589717, + "grad_norm": 480.95965576171875, + "learning_rate": 8.815222438236726e-06, + "loss": 62.4884, + "step": 37290 + }, + { + "epoch": 0.1506967198212648, + "grad_norm": 1077.7459716796875, + "learning_rate": 8.814310701398873e-06, + "loss": 38.3544, + "step": 37300 + }, + { + "epoch": 0.15073712108663243, + "grad_norm": 389.59796142578125, + "learning_rate": 8.813398661073823e-06, + "loss": 38.4094, + "step": 37310 + }, + { + "epoch": 0.15077752235200007, + "grad_norm": 521.3574829101562, + "learning_rate": 8.812486317334145e-06, + "loss": 30.9333, + "step": 37320 + }, + { + "epoch": 0.1508179236173677, + "grad_norm": 846.1622314453125, + "learning_rate": 8.811573670252426e-06, + "loss": 60.0239, + "step": 37330 + }, + { + "epoch": 0.15085832488273532, + "grad_norm": 554.4835815429688, + "learning_rate": 8.810660719901283e-06, + "loss": 33.6676, + "step": 37340 + }, + { + "epoch": 0.15089872614810296, + "grad_norm": 316.88494873046875, + "learning_rate": 8.809747466353356e-06, + "loss": 40.518, + "step": 37350 + }, + { + "epoch": 0.1509391274134706, + "grad_norm": 601.1395874023438, + "learning_rate": 8.808833909681305e-06, + "loss": 40.8812, + "step": 37360 + }, + { + "epoch": 0.1509795286788382, + "grad_norm": 449.86737060546875, + "learning_rate": 8.80792004995782e-06, + "loss": 49.2103, + "step": 37370 + }, + { + "epoch": 0.15101992994420585, + "grad_norm": 998.6974487304688, + "learning_rate": 8.807005887255615e-06, + "loss": 40.9746, + "step": 37380 + }, + { + "epoch": 0.1510603312095735, + "grad_norm": 662.0831909179688, + "learning_rate": 8.806091421647423e-06, + "loss": 42.3315, + "step": 37390 + }, + { + "epoch": 0.1511007324749411, + "grad_norm": 697.000244140625, + "learning_rate": 8.805176653206004e-06, + "loss": 57.2413, + "step": 37400 + }, + { + "epoch": 0.15114113374030874, + "grad_norm": 287.0083923339844, + "learning_rate": 8.80426158200414e-06, + "loss": 57.0611, + "step": 37410 + }, + { + "epoch": 0.15118153500567638, + "grad_norm": 656.7665405273438, + "learning_rate": 8.803346208114643e-06, + "loss": 59.7992, + "step": 37420 + }, + { + "epoch": 0.15122193627104402, + "grad_norm": 535.0011596679688, + "learning_rate": 8.802430531610344e-06, + "loss": 37.0664, + "step": 37430 + }, + { + "epoch": 0.15126233753641163, + "grad_norm": 401.31475830078125, + "learning_rate": 8.801514552564097e-06, + "loss": 41.4795, + "step": 37440 + }, + { + "epoch": 0.15130273880177927, + "grad_norm": 525.552490234375, + "learning_rate": 8.800598271048784e-06, + "loss": 42.3817, + "step": 37450 + }, + { + "epoch": 0.15134314006714691, + "grad_norm": 858.5873413085938, + "learning_rate": 8.799681687137309e-06, + "loss": 56.0704, + "step": 37460 + }, + { + "epoch": 0.15138354133251453, + "grad_norm": 531.9202270507812, + "learning_rate": 8.7987648009026e-06, + "loss": 44.5954, + "step": 37470 + }, + { + "epoch": 0.15142394259788217, + "grad_norm": 686.9660034179688, + "learning_rate": 8.79784761241761e-06, + "loss": 29.8809, + "step": 37480 + }, + { + "epoch": 0.1514643438632498, + "grad_norm": 616.3722534179688, + "learning_rate": 8.796930121755315e-06, + "loss": 51.981, + "step": 37490 + }, + { + "epoch": 0.15150474512861742, + "grad_norm": 538.9678344726562, + "learning_rate": 8.796012328988716e-06, + "loss": 63.4237, + "step": 37500 + }, + { + "epoch": 0.15154514639398506, + "grad_norm": 567.8744506835938, + "learning_rate": 8.795094234190837e-06, + "loss": 41.3883, + "step": 37510 + }, + { + "epoch": 0.1515855476593527, + "grad_norm": 544.1000366210938, + "learning_rate": 8.794175837434729e-06, + "loss": 55.9233, + "step": 37520 + }, + { + "epoch": 0.1516259489247203, + "grad_norm": 735.0166625976562, + "learning_rate": 8.79325713879346e-06, + "loss": 51.2997, + "step": 37530 + }, + { + "epoch": 0.15166635019008795, + "grad_norm": 494.5010681152344, + "learning_rate": 8.792338138340131e-06, + "loss": 39.8792, + "step": 37540 + }, + { + "epoch": 0.1517067514554556, + "grad_norm": 971.3145141601562, + "learning_rate": 8.791418836147858e-06, + "loss": 38.6304, + "step": 37550 + }, + { + "epoch": 0.1517471527208232, + "grad_norm": 639.4276733398438, + "learning_rate": 8.790499232289793e-06, + "loss": 49.2647, + "step": 37560 + }, + { + "epoch": 0.15178755398619084, + "grad_norm": 260.048095703125, + "learning_rate": 8.789579326839097e-06, + "loss": 55.1944, + "step": 37570 + }, + { + "epoch": 0.15182795525155848, + "grad_norm": 534.6347045898438, + "learning_rate": 8.788659119868966e-06, + "loss": 40.731, + "step": 37580 + }, + { + "epoch": 0.15186835651692612, + "grad_norm": 695.5957641601562, + "learning_rate": 8.787738611452616e-06, + "loss": 42.3443, + "step": 37590 + }, + { + "epoch": 0.15190875778229374, + "grad_norm": 640.8202514648438, + "learning_rate": 8.78681780166329e-06, + "loss": 53.9494, + "step": 37600 + }, + { + "epoch": 0.15194915904766138, + "grad_norm": 815.8932495117188, + "learning_rate": 8.785896690574248e-06, + "loss": 39.8566, + "step": 37610 + }, + { + "epoch": 0.15198956031302902, + "grad_norm": 292.79315185546875, + "learning_rate": 8.784975278258783e-06, + "loss": 40.0339, + "step": 37620 + }, + { + "epoch": 0.15202996157839663, + "grad_norm": 973.7109375, + "learning_rate": 8.784053564790205e-06, + "loss": 60.2069, + "step": 37630 + }, + { + "epoch": 0.15207036284376427, + "grad_norm": 748.5640258789062, + "learning_rate": 8.783131550241853e-06, + "loss": 64.9437, + "step": 37640 + }, + { + "epoch": 0.1521107641091319, + "grad_norm": 1388.9613037109375, + "learning_rate": 8.782209234687083e-06, + "loss": 55.6427, + "step": 37650 + }, + { + "epoch": 0.15215116537449952, + "grad_norm": 592.8505249023438, + "learning_rate": 8.781286618199285e-06, + "loss": 45.0461, + "step": 37660 + }, + { + "epoch": 0.15219156663986716, + "grad_norm": 513.772705078125, + "learning_rate": 8.780363700851863e-06, + "loss": 42.5973, + "step": 37670 + }, + { + "epoch": 0.1522319679052348, + "grad_norm": 502.7109069824219, + "learning_rate": 8.779440482718251e-06, + "loss": 50.6042, + "step": 37680 + }, + { + "epoch": 0.1522723691706024, + "grad_norm": 385.12823486328125, + "learning_rate": 8.778516963871904e-06, + "loss": 30.2378, + "step": 37690 + }, + { + "epoch": 0.15231277043597005, + "grad_norm": 677.427734375, + "learning_rate": 8.777593144386305e-06, + "loss": 53.152, + "step": 37700 + }, + { + "epoch": 0.1523531717013377, + "grad_norm": 401.56939697265625, + "learning_rate": 8.776669024334955e-06, + "loss": 43.2506, + "step": 37710 + }, + { + "epoch": 0.1523935729667053, + "grad_norm": 674.8109741210938, + "learning_rate": 8.775744603791385e-06, + "loss": 44.2603, + "step": 37720 + }, + { + "epoch": 0.15243397423207294, + "grad_norm": 442.9088439941406, + "learning_rate": 8.774819882829144e-06, + "loss": 30.5279, + "step": 37730 + }, + { + "epoch": 0.15247437549744058, + "grad_norm": 689.316650390625, + "learning_rate": 8.77389486152181e-06, + "loss": 45.8222, + "step": 37740 + }, + { + "epoch": 0.15251477676280822, + "grad_norm": 490.8953552246094, + "learning_rate": 8.772969539942981e-06, + "loss": 41.8513, + "step": 37750 + }, + { + "epoch": 0.15255517802817584, + "grad_norm": 1018.9712524414062, + "learning_rate": 8.772043918166282e-06, + "loss": 43.3342, + "step": 37760 + }, + { + "epoch": 0.15259557929354348, + "grad_norm": 679.5645141601562, + "learning_rate": 8.771117996265358e-06, + "loss": 87.8444, + "step": 37770 + }, + { + "epoch": 0.15263598055891112, + "grad_norm": 623.6113891601562, + "learning_rate": 8.770191774313883e-06, + "loss": 48.4947, + "step": 37780 + }, + { + "epoch": 0.15267638182427873, + "grad_norm": 605.280517578125, + "learning_rate": 8.769265252385552e-06, + "loss": 47.7166, + "step": 37790 + }, + { + "epoch": 0.15271678308964637, + "grad_norm": 575.60791015625, + "learning_rate": 8.768338430554083e-06, + "loss": 29.7103, + "step": 37800 + }, + { + "epoch": 0.152757184355014, + "grad_norm": 729.2935180664062, + "learning_rate": 8.76741130889322e-06, + "loss": 44.2859, + "step": 37810 + }, + { + "epoch": 0.15279758562038162, + "grad_norm": 816.4547729492188, + "learning_rate": 8.766483887476727e-06, + "loss": 52.9964, + "step": 37820 + }, + { + "epoch": 0.15283798688574926, + "grad_norm": 1206.9049072265625, + "learning_rate": 8.7655561663784e-06, + "loss": 63.2851, + "step": 37830 + }, + { + "epoch": 0.1528783881511169, + "grad_norm": 947.2355346679688, + "learning_rate": 8.764628145672048e-06, + "loss": 60.9178, + "step": 37840 + }, + { + "epoch": 0.1529187894164845, + "grad_norm": 1392.46923828125, + "learning_rate": 8.763699825431513e-06, + "loss": 52.3686, + "step": 37850 + }, + { + "epoch": 0.15295919068185215, + "grad_norm": 374.6267395019531, + "learning_rate": 8.762771205730656e-06, + "loss": 32.7278, + "step": 37860 + }, + { + "epoch": 0.1529995919472198, + "grad_norm": 583.3077392578125, + "learning_rate": 8.761842286643362e-06, + "loss": 42.1567, + "step": 37870 + }, + { + "epoch": 0.1530399932125874, + "grad_norm": 671.8289184570312, + "learning_rate": 8.760913068243542e-06, + "loss": 56.332, + "step": 37880 + }, + { + "epoch": 0.15308039447795505, + "grad_norm": 514.0142211914062, + "learning_rate": 8.759983550605132e-06, + "loss": 37.4075, + "step": 37890 + }, + { + "epoch": 0.15312079574332269, + "grad_norm": 417.3742980957031, + "learning_rate": 8.759053733802083e-06, + "loss": 32.1351, + "step": 37900 + }, + { + "epoch": 0.15316119700869033, + "grad_norm": 363.85223388671875, + "learning_rate": 8.758123617908383e-06, + "loss": 43.9274, + "step": 37910 + }, + { + "epoch": 0.15320159827405794, + "grad_norm": 392.4961242675781, + "learning_rate": 8.757193202998033e-06, + "loss": 40.1141, + "step": 37920 + }, + { + "epoch": 0.15324199953942558, + "grad_norm": 572.553955078125, + "learning_rate": 8.756262489145061e-06, + "loss": 47.0318, + "step": 37930 + }, + { + "epoch": 0.15328240080479322, + "grad_norm": 440.2536926269531, + "learning_rate": 8.755331476423526e-06, + "loss": 45.7451, + "step": 37940 + }, + { + "epoch": 0.15332280207016083, + "grad_norm": 506.631591796875, + "learning_rate": 8.754400164907496e-06, + "loss": 45.7129, + "step": 37950 + }, + { + "epoch": 0.15336320333552847, + "grad_norm": 315.4403991699219, + "learning_rate": 8.753468554671078e-06, + "loss": 45.1952, + "step": 37960 + }, + { + "epoch": 0.1534036046008961, + "grad_norm": 469.86474609375, + "learning_rate": 8.752536645788391e-06, + "loss": 57.6503, + "step": 37970 + }, + { + "epoch": 0.15344400586626372, + "grad_norm": 456.01617431640625, + "learning_rate": 8.751604438333587e-06, + "loss": 53.0288, + "step": 37980 + }, + { + "epoch": 0.15348440713163136, + "grad_norm": 560.4326782226562, + "learning_rate": 8.750671932380834e-06, + "loss": 49.6529, + "step": 37990 + }, + { + "epoch": 0.153524808396999, + "grad_norm": 843.6622924804688, + "learning_rate": 8.749739128004329e-06, + "loss": 34.5586, + "step": 38000 + }, + { + "epoch": 0.15356520966236661, + "grad_norm": 478.7063293457031, + "learning_rate": 8.748806025278292e-06, + "loss": 36.0544, + "step": 38010 + }, + { + "epoch": 0.15360561092773425, + "grad_norm": 455.8020324707031, + "learning_rate": 8.747872624276963e-06, + "loss": 47.236, + "step": 38020 + }, + { + "epoch": 0.1536460121931019, + "grad_norm": 915.6499633789062, + "learning_rate": 8.746938925074609e-06, + "loss": 45.514, + "step": 38030 + }, + { + "epoch": 0.1536864134584695, + "grad_norm": 931.0418701171875, + "learning_rate": 8.746004927745522e-06, + "loss": 44.9785, + "step": 38040 + }, + { + "epoch": 0.15372681472383715, + "grad_norm": 626.0626831054688, + "learning_rate": 8.745070632364014e-06, + "loss": 39.1456, + "step": 38050 + }, + { + "epoch": 0.1537672159892048, + "grad_norm": 537.35400390625, + "learning_rate": 8.744136039004422e-06, + "loss": 55.098, + "step": 38060 + }, + { + "epoch": 0.15380761725457243, + "grad_norm": 492.8907470703125, + "learning_rate": 8.743201147741112e-06, + "loss": 29.6499, + "step": 38070 + }, + { + "epoch": 0.15384801851994004, + "grad_norm": 386.3055114746094, + "learning_rate": 8.742265958648464e-06, + "loss": 55.5486, + "step": 38080 + }, + { + "epoch": 0.15388841978530768, + "grad_norm": 189.93057250976562, + "learning_rate": 8.741330471800888e-06, + "loss": 56.8159, + "step": 38090 + }, + { + "epoch": 0.15392882105067532, + "grad_norm": 689.0518188476562, + "learning_rate": 8.740394687272817e-06, + "loss": 57.4512, + "step": 38100 + }, + { + "epoch": 0.15396922231604293, + "grad_norm": 759.5892944335938, + "learning_rate": 8.739458605138706e-06, + "loss": 45.8897, + "step": 38110 + }, + { + "epoch": 0.15400962358141057, + "grad_norm": 470.87738037109375, + "learning_rate": 8.738522225473036e-06, + "loss": 57.6412, + "step": 38120 + }, + { + "epoch": 0.1540500248467782, + "grad_norm": 711.9561157226562, + "learning_rate": 8.737585548350312e-06, + "loss": 45.1267, + "step": 38130 + }, + { + "epoch": 0.15409042611214582, + "grad_norm": 525.4124755859375, + "learning_rate": 8.736648573845057e-06, + "loss": 47.2233, + "step": 38140 + }, + { + "epoch": 0.15413082737751346, + "grad_norm": 572.8314819335938, + "learning_rate": 8.735711302031824e-06, + "loss": 47.1612, + "step": 38150 + }, + { + "epoch": 0.1541712286428811, + "grad_norm": 466.9100341796875, + "learning_rate": 8.734773732985186e-06, + "loss": 41.9769, + "step": 38160 + }, + { + "epoch": 0.15421162990824872, + "grad_norm": 582.3335571289062, + "learning_rate": 8.733835866779745e-06, + "loss": 53.4043, + "step": 38170 + }, + { + "epoch": 0.15425203117361636, + "grad_norm": 561.9642944335938, + "learning_rate": 8.73289770349012e-06, + "loss": 38.1644, + "step": 38180 + }, + { + "epoch": 0.154292432438984, + "grad_norm": 542.4348754882812, + "learning_rate": 8.731959243190955e-06, + "loss": 42.0566, + "step": 38190 + }, + { + "epoch": 0.1543328337043516, + "grad_norm": 735.38330078125, + "learning_rate": 8.73102048595692e-06, + "loss": 31.1532, + "step": 38200 + }, + { + "epoch": 0.15437323496971925, + "grad_norm": 585.3547973632812, + "learning_rate": 8.730081431862709e-06, + "loss": 49.3706, + "step": 38210 + }, + { + "epoch": 0.1544136362350869, + "grad_norm": 439.5715637207031, + "learning_rate": 8.729142080983037e-06, + "loss": 43.1502, + "step": 38220 + }, + { + "epoch": 0.15445403750045453, + "grad_norm": 453.2962646484375, + "learning_rate": 8.728202433392645e-06, + "loss": 43.9217, + "step": 38230 + }, + { + "epoch": 0.15449443876582214, + "grad_norm": 836.9139404296875, + "learning_rate": 8.727262489166295e-06, + "loss": 78.5818, + "step": 38240 + }, + { + "epoch": 0.15453484003118978, + "grad_norm": 1474.2198486328125, + "learning_rate": 8.726322248378775e-06, + "loss": 72.0549, + "step": 38250 + }, + { + "epoch": 0.15457524129655742, + "grad_norm": 603.5780029296875, + "learning_rate": 8.725381711104894e-06, + "loss": 52.8777, + "step": 38260 + }, + { + "epoch": 0.15461564256192503, + "grad_norm": 850.0660400390625, + "learning_rate": 8.724440877419487e-06, + "loss": 76.4468, + "step": 38270 + }, + { + "epoch": 0.15465604382729267, + "grad_norm": 1414.240478515625, + "learning_rate": 8.723499747397415e-06, + "loss": 46.7855, + "step": 38280 + }, + { + "epoch": 0.1546964450926603, + "grad_norm": 704.088134765625, + "learning_rate": 8.722558321113555e-06, + "loss": 59.9468, + "step": 38290 + }, + { + "epoch": 0.15473684635802792, + "grad_norm": 369.7774963378906, + "learning_rate": 8.721616598642812e-06, + "loss": 39.3916, + "step": 38300 + }, + { + "epoch": 0.15477724762339556, + "grad_norm": 648.994384765625, + "learning_rate": 8.720674580060117e-06, + "loss": 48.405, + "step": 38310 + }, + { + "epoch": 0.1548176488887632, + "grad_norm": 367.8060607910156, + "learning_rate": 8.719732265440423e-06, + "loss": 65.3975, + "step": 38320 + }, + { + "epoch": 0.15485805015413082, + "grad_norm": 682.9230346679688, + "learning_rate": 8.718789654858702e-06, + "loss": 41.5378, + "step": 38330 + }, + { + "epoch": 0.15489845141949846, + "grad_norm": 341.1941223144531, + "learning_rate": 8.717846748389956e-06, + "loss": 40.6628, + "step": 38340 + }, + { + "epoch": 0.1549388526848661, + "grad_norm": 508.8382263183594, + "learning_rate": 8.716903546109208e-06, + "loss": 38.0144, + "step": 38350 + }, + { + "epoch": 0.1549792539502337, + "grad_norm": 480.63970947265625, + "learning_rate": 8.715960048091502e-06, + "loss": 45.1181, + "step": 38360 + }, + { + "epoch": 0.15501965521560135, + "grad_norm": 730.0436401367188, + "learning_rate": 8.715016254411908e-06, + "loss": 43.1608, + "step": 38370 + }, + { + "epoch": 0.155060056480969, + "grad_norm": 744.2866821289062, + "learning_rate": 8.714072165145521e-06, + "loss": 55.8971, + "step": 38380 + }, + { + "epoch": 0.15510045774633663, + "grad_norm": 422.5709533691406, + "learning_rate": 8.713127780367458e-06, + "loss": 47.1621, + "step": 38390 + }, + { + "epoch": 0.15514085901170424, + "grad_norm": 428.45452880859375, + "learning_rate": 8.712183100152858e-06, + "loss": 42.7035, + "step": 38400 + }, + { + "epoch": 0.15518126027707188, + "grad_norm": 491.593505859375, + "learning_rate": 8.711238124576884e-06, + "loss": 93.1383, + "step": 38410 + }, + { + "epoch": 0.15522166154243952, + "grad_norm": 1070.5885009765625, + "learning_rate": 8.710292853714726e-06, + "loss": 30.7639, + "step": 38420 + }, + { + "epoch": 0.15526206280780713, + "grad_norm": 1554.32666015625, + "learning_rate": 8.709347287641593e-06, + "loss": 53.9866, + "step": 38430 + }, + { + "epoch": 0.15530246407317477, + "grad_norm": 472.5458068847656, + "learning_rate": 8.70840142643272e-06, + "loss": 55.8533, + "step": 38440 + }, + { + "epoch": 0.1553428653385424, + "grad_norm": 448.1315612792969, + "learning_rate": 8.707455270163365e-06, + "loss": 63.6653, + "step": 38450 + }, + { + "epoch": 0.15538326660391003, + "grad_norm": 704.42138671875, + "learning_rate": 8.70650881890881e-06, + "loss": 42.9613, + "step": 38460 + }, + { + "epoch": 0.15542366786927767, + "grad_norm": 246.81031799316406, + "learning_rate": 8.705562072744358e-06, + "loss": 47.2647, + "step": 38470 + }, + { + "epoch": 0.1554640691346453, + "grad_norm": 652.4033203125, + "learning_rate": 8.704615031745337e-06, + "loss": 66.3134, + "step": 38480 + }, + { + "epoch": 0.15550447040001292, + "grad_norm": 790.2584838867188, + "learning_rate": 8.703667695987102e-06, + "loss": 58.0497, + "step": 38490 + }, + { + "epoch": 0.15554487166538056, + "grad_norm": 718.5376586914062, + "learning_rate": 8.702720065545024e-06, + "loss": 63.9357, + "step": 38500 + }, + { + "epoch": 0.1555852729307482, + "grad_norm": 754.3954467773438, + "learning_rate": 8.701772140494504e-06, + "loss": 62.0079, + "step": 38510 + }, + { + "epoch": 0.1556256741961158, + "grad_norm": 531.1658325195312, + "learning_rate": 8.700823920910964e-06, + "loss": 58.6769, + "step": 38520 + }, + { + "epoch": 0.15566607546148345, + "grad_norm": 1036.1522216796875, + "learning_rate": 8.699875406869848e-06, + "loss": 40.6459, + "step": 38530 + }, + { + "epoch": 0.1557064767268511, + "grad_norm": 411.3389587402344, + "learning_rate": 8.69892659844663e-06, + "loss": 58.2496, + "step": 38540 + }, + { + "epoch": 0.15574687799221873, + "grad_norm": 317.8341979980469, + "learning_rate": 8.697977495716794e-06, + "loss": 42.8371, + "step": 38550 + }, + { + "epoch": 0.15578727925758634, + "grad_norm": 837.8154907226562, + "learning_rate": 8.697028098755863e-06, + "loss": 47.0967, + "step": 38560 + }, + { + "epoch": 0.15582768052295398, + "grad_norm": 408.35845947265625, + "learning_rate": 8.69607840763937e-06, + "loss": 59.6451, + "step": 38570 + }, + { + "epoch": 0.15586808178832162, + "grad_norm": 421.7928771972656, + "learning_rate": 8.695128422442882e-06, + "loss": 37.1028, + "step": 38580 + }, + { + "epoch": 0.15590848305368923, + "grad_norm": 1320.11962890625, + "learning_rate": 8.694178143241984e-06, + "loss": 51.9049, + "step": 38590 + }, + { + "epoch": 0.15594888431905687, + "grad_norm": 702.5349731445312, + "learning_rate": 8.693227570112285e-06, + "loss": 59.5722, + "step": 38600 + }, + { + "epoch": 0.15598928558442451, + "grad_norm": 827.35888671875, + "learning_rate": 8.692276703129421e-06, + "loss": 55.1379, + "step": 38610 + }, + { + "epoch": 0.15602968684979213, + "grad_norm": 387.04266357421875, + "learning_rate": 8.691325542369041e-06, + "loss": 52.5193, + "step": 38620 + }, + { + "epoch": 0.15607008811515977, + "grad_norm": 271.24432373046875, + "learning_rate": 8.69037408790683e-06, + "loss": 55.1027, + "step": 38630 + }, + { + "epoch": 0.1561104893805274, + "grad_norm": 448.82666015625, + "learning_rate": 8.689422339818489e-06, + "loss": 38.2809, + "step": 38640 + }, + { + "epoch": 0.15615089064589502, + "grad_norm": 531.73828125, + "learning_rate": 8.688470298179746e-06, + "loss": 46.1902, + "step": 38650 + }, + { + "epoch": 0.15619129191126266, + "grad_norm": 103.58868408203125, + "learning_rate": 8.687517963066347e-06, + "loss": 37.2087, + "step": 38660 + }, + { + "epoch": 0.1562316931766303, + "grad_norm": 845.83251953125, + "learning_rate": 8.686565334554069e-06, + "loss": 67.7298, + "step": 38670 + }, + { + "epoch": 0.1562720944419979, + "grad_norm": 475.2627258300781, + "learning_rate": 8.685612412718704e-06, + "loss": 57.5004, + "step": 38680 + }, + { + "epoch": 0.15631249570736555, + "grad_norm": 812.0715942382812, + "learning_rate": 8.684659197636076e-06, + "loss": 46.925, + "step": 38690 + }, + { + "epoch": 0.1563528969727332, + "grad_norm": 531.0429077148438, + "learning_rate": 8.683705689382025e-06, + "loss": 59.5186, + "step": 38700 + }, + { + "epoch": 0.15639329823810083, + "grad_norm": 593.1111450195312, + "learning_rate": 8.682751888032419e-06, + "loss": 39.3045, + "step": 38710 + }, + { + "epoch": 0.15643369950346844, + "grad_norm": 720.40869140625, + "learning_rate": 8.681797793663147e-06, + "loss": 32.7965, + "step": 38720 + }, + { + "epoch": 0.15647410076883608, + "grad_norm": 1218.6812744140625, + "learning_rate": 8.680843406350122e-06, + "loss": 53.3814, + "step": 38730 + }, + { + "epoch": 0.15651450203420372, + "grad_norm": 1180.31884765625, + "learning_rate": 8.679888726169277e-06, + "loss": 56.8287, + "step": 38740 + }, + { + "epoch": 0.15655490329957134, + "grad_norm": 377.81390380859375, + "learning_rate": 8.678933753196577e-06, + "loss": 58.477, + "step": 38750 + }, + { + "epoch": 0.15659530456493898, + "grad_norm": 1002.2963256835938, + "learning_rate": 8.677978487508002e-06, + "loss": 44.0508, + "step": 38760 + }, + { + "epoch": 0.15663570583030662, + "grad_norm": 768.4991455078125, + "learning_rate": 8.677022929179558e-06, + "loss": 57.5769, + "step": 38770 + }, + { + "epoch": 0.15667610709567423, + "grad_norm": 1618.297607421875, + "learning_rate": 8.676067078287276e-06, + "loss": 39.0754, + "step": 38780 + }, + { + "epoch": 0.15671650836104187, + "grad_norm": 723.4669189453125, + "learning_rate": 8.675110934907206e-06, + "loss": 45.2615, + "step": 38790 + }, + { + "epoch": 0.1567569096264095, + "grad_norm": 397.9026184082031, + "learning_rate": 8.674154499115426e-06, + "loss": 37.6675, + "step": 38800 + }, + { + "epoch": 0.15679731089177712, + "grad_norm": 943.8479614257812, + "learning_rate": 8.673197770988034e-06, + "loss": 47.4713, + "step": 38810 + }, + { + "epoch": 0.15683771215714476, + "grad_norm": 940.014892578125, + "learning_rate": 8.672240750601152e-06, + "loss": 60.0122, + "step": 38820 + }, + { + "epoch": 0.1568781134225124, + "grad_norm": 605.8595581054688, + "learning_rate": 8.67128343803093e-06, + "loss": 48.2564, + "step": 38830 + }, + { + "epoch": 0.15691851468788, + "grad_norm": 613.501708984375, + "learning_rate": 8.670325833353532e-06, + "loss": 36.4502, + "step": 38840 + }, + { + "epoch": 0.15695891595324765, + "grad_norm": 357.8223876953125, + "learning_rate": 8.669367936645152e-06, + "loss": 45.7456, + "step": 38850 + }, + { + "epoch": 0.1569993172186153, + "grad_norm": 934.835693359375, + "learning_rate": 8.668409747982005e-06, + "loss": 43.5836, + "step": 38860 + }, + { + "epoch": 0.15703971848398293, + "grad_norm": 421.37188720703125, + "learning_rate": 8.667451267440332e-06, + "loss": 53.3812, + "step": 38870 + }, + { + "epoch": 0.15708011974935054, + "grad_norm": 176.40232849121094, + "learning_rate": 8.666492495096391e-06, + "loss": 49.2198, + "step": 38880 + }, + { + "epoch": 0.15712052101471818, + "grad_norm": 688.4175415039062, + "learning_rate": 8.66553343102647e-06, + "loss": 37.7146, + "step": 38890 + }, + { + "epoch": 0.15716092228008582, + "grad_norm": 498.5278015136719, + "learning_rate": 8.664574075306876e-06, + "loss": 37.7639, + "step": 38900 + }, + { + "epoch": 0.15720132354545344, + "grad_norm": 554.88232421875, + "learning_rate": 8.66361442801394e-06, + "loss": 41.1534, + "step": 38910 + }, + { + "epoch": 0.15724172481082108, + "grad_norm": 591.5156860351562, + "learning_rate": 8.662654489224018e-06, + "loss": 55.6884, + "step": 38920 + }, + { + "epoch": 0.15728212607618872, + "grad_norm": 915.4189453125, + "learning_rate": 8.661694259013489e-06, + "loss": 40.387, + "step": 38930 + }, + { + "epoch": 0.15732252734155633, + "grad_norm": 844.4616088867188, + "learning_rate": 8.660733737458751e-06, + "loss": 63.7767, + "step": 38940 + }, + { + "epoch": 0.15736292860692397, + "grad_norm": 492.8940124511719, + "learning_rate": 8.659772924636232e-06, + "loss": 48.9174, + "step": 38950 + }, + { + "epoch": 0.1574033298722916, + "grad_norm": 746.3994140625, + "learning_rate": 8.658811820622376e-06, + "loss": 42.9431, + "step": 38960 + }, + { + "epoch": 0.15744373113765922, + "grad_norm": 796.9805908203125, + "learning_rate": 8.657850425493656e-06, + "loss": 60.7916, + "step": 38970 + }, + { + "epoch": 0.15748413240302686, + "grad_norm": 392.3251647949219, + "learning_rate": 8.656888739326564e-06, + "loss": 51.2107, + "step": 38980 + }, + { + "epoch": 0.1575245336683945, + "grad_norm": 887.1876831054688, + "learning_rate": 8.65592676219762e-06, + "loss": 53.673, + "step": 38990 + }, + { + "epoch": 0.1575649349337621, + "grad_norm": 435.13690185546875, + "learning_rate": 8.65496449418336e-06, + "loss": 57.3104, + "step": 39000 + }, + { + "epoch": 0.15760533619912975, + "grad_norm": 769.52099609375, + "learning_rate": 8.654001935360349e-06, + "loss": 53.4194, + "step": 39010 + }, + { + "epoch": 0.1576457374644974, + "grad_norm": 691.1365356445312, + "learning_rate": 8.653039085805174e-06, + "loss": 28.5163, + "step": 39020 + }, + { + "epoch": 0.15768613872986503, + "grad_norm": 1318.233642578125, + "learning_rate": 8.652075945594444e-06, + "loss": 63.2581, + "step": 39030 + }, + { + "epoch": 0.15772653999523265, + "grad_norm": 338.80902099609375, + "learning_rate": 8.651112514804793e-06, + "loss": 51.1088, + "step": 39040 + }, + { + "epoch": 0.15776694126060029, + "grad_norm": 690.9937133789062, + "learning_rate": 8.650148793512874e-06, + "loss": 54.7363, + "step": 39050 + }, + { + "epoch": 0.15780734252596793, + "grad_norm": 672.234375, + "learning_rate": 8.649184781795367e-06, + "loss": 50.9707, + "step": 39060 + }, + { + "epoch": 0.15784774379133554, + "grad_norm": 353.51617431640625, + "learning_rate": 8.648220479728976e-06, + "loss": 67.0655, + "step": 39070 + }, + { + "epoch": 0.15788814505670318, + "grad_norm": 481.1822814941406, + "learning_rate": 8.647255887390425e-06, + "loss": 35.7537, + "step": 39080 + }, + { + "epoch": 0.15792854632207082, + "grad_norm": 845.3275756835938, + "learning_rate": 8.64629100485646e-06, + "loss": 74.3657, + "step": 39090 + }, + { + "epoch": 0.15796894758743843, + "grad_norm": 400.9262390136719, + "learning_rate": 8.645325832203855e-06, + "loss": 35.4863, + "step": 39100 + }, + { + "epoch": 0.15800934885280607, + "grad_norm": 611.3833618164062, + "learning_rate": 8.644360369509403e-06, + "loss": 50.1126, + "step": 39110 + }, + { + "epoch": 0.1580497501181737, + "grad_norm": 635.46142578125, + "learning_rate": 8.64339461684992e-06, + "loss": 50.9561, + "step": 39120 + }, + { + "epoch": 0.15809015138354132, + "grad_norm": 695.3146362304688, + "learning_rate": 8.64242857430225e-06, + "loss": 42.4911, + "step": 39130 + }, + { + "epoch": 0.15813055264890896, + "grad_norm": 320.0533142089844, + "learning_rate": 8.641462241943255e-06, + "loss": 42.3086, + "step": 39140 + }, + { + "epoch": 0.1581709539142766, + "grad_norm": 606.509521484375, + "learning_rate": 8.640495619849821e-06, + "loss": 45.3936, + "step": 39150 + }, + { + "epoch": 0.15821135517964421, + "grad_norm": 566.2433471679688, + "learning_rate": 8.639528708098858e-06, + "loss": 44.108, + "step": 39160 + }, + { + "epoch": 0.15825175644501185, + "grad_norm": 869.4197998046875, + "learning_rate": 8.6385615067673e-06, + "loss": 34.082, + "step": 39170 + }, + { + "epoch": 0.1582921577103795, + "grad_norm": 335.2037048339844, + "learning_rate": 8.6375940159321e-06, + "loss": 41.5822, + "step": 39180 + }, + { + "epoch": 0.15833255897574713, + "grad_norm": 629.8877563476562, + "learning_rate": 8.63662623567024e-06, + "loss": 31.4693, + "step": 39190 + }, + { + "epoch": 0.15837296024111475, + "grad_norm": 1557.7572021484375, + "learning_rate": 8.63565816605872e-06, + "loss": 41.352, + "step": 39200 + }, + { + "epoch": 0.1584133615064824, + "grad_norm": 666.0990600585938, + "learning_rate": 8.634689807174564e-06, + "loss": 47.8496, + "step": 39210 + }, + { + "epoch": 0.15845376277185003, + "grad_norm": 704.407958984375, + "learning_rate": 8.633721159094823e-06, + "loss": 46.6244, + "step": 39220 + }, + { + "epoch": 0.15849416403721764, + "grad_norm": 390.741455078125, + "learning_rate": 8.632752221896562e-06, + "loss": 45.0767, + "step": 39230 + }, + { + "epoch": 0.15853456530258528, + "grad_norm": 470.3848876953125, + "learning_rate": 8.631782995656884e-06, + "loss": 53.1645, + "step": 39240 + }, + { + "epoch": 0.15857496656795292, + "grad_norm": 357.0032043457031, + "learning_rate": 8.630813480452898e-06, + "loss": 34.2059, + "step": 39250 + }, + { + "epoch": 0.15861536783332053, + "grad_norm": 483.2372131347656, + "learning_rate": 8.629843676361747e-06, + "loss": 46.7471, + "step": 39260 + }, + { + "epoch": 0.15865576909868817, + "grad_norm": 889.9521484375, + "learning_rate": 8.628873583460593e-06, + "loss": 48.7918, + "step": 39270 + }, + { + "epoch": 0.1586961703640558, + "grad_norm": 338.2070007324219, + "learning_rate": 8.627903201826622e-06, + "loss": 45.2854, + "step": 39280 + }, + { + "epoch": 0.15873657162942342, + "grad_norm": 810.640380859375, + "learning_rate": 8.626932531537042e-06, + "loss": 32.9458, + "step": 39290 + }, + { + "epoch": 0.15877697289479106, + "grad_norm": 1532.687255859375, + "learning_rate": 8.625961572669087e-06, + "loss": 59.3802, + "step": 39300 + }, + { + "epoch": 0.1588173741601587, + "grad_norm": 657.8353271484375, + "learning_rate": 8.62499032530001e-06, + "loss": 37.4358, + "step": 39310 + }, + { + "epoch": 0.15885777542552632, + "grad_norm": 960.59521484375, + "learning_rate": 8.624018789507091e-06, + "loss": 40.7411, + "step": 39320 + }, + { + "epoch": 0.15889817669089396, + "grad_norm": 439.2926025390625, + "learning_rate": 8.62304696536763e-06, + "loss": 40.3107, + "step": 39330 + }, + { + "epoch": 0.1589385779562616, + "grad_norm": 1047.429931640625, + "learning_rate": 8.622074852958946e-06, + "loss": 44.9087, + "step": 39340 + }, + { + "epoch": 0.15897897922162924, + "grad_norm": 1334.8177490234375, + "learning_rate": 8.621102452358393e-06, + "loss": 64.5529, + "step": 39350 + }, + { + "epoch": 0.15901938048699685, + "grad_norm": 996.2518920898438, + "learning_rate": 8.620129763643333e-06, + "loss": 56.1161, + "step": 39360 + }, + { + "epoch": 0.1590597817523645, + "grad_norm": 610.1491088867188, + "learning_rate": 8.619156786891162e-06, + "loss": 60.381, + "step": 39370 + }, + { + "epoch": 0.15910018301773213, + "grad_norm": 994.11328125, + "learning_rate": 8.618183522179295e-06, + "loss": 42.3613, + "step": 39380 + }, + { + "epoch": 0.15914058428309974, + "grad_norm": 455.6463317871094, + "learning_rate": 8.617209969585171e-06, + "loss": 55.0342, + "step": 39390 + }, + { + "epoch": 0.15918098554846738, + "grad_norm": 661.022216796875, + "learning_rate": 8.616236129186252e-06, + "loss": 43.6494, + "step": 39400 + }, + { + "epoch": 0.15922138681383502, + "grad_norm": 139.87229919433594, + "learning_rate": 8.615262001060019e-06, + "loss": 48.6457, + "step": 39410 + }, + { + "epoch": 0.15926178807920263, + "grad_norm": 856.7398071289062, + "learning_rate": 8.61428758528398e-06, + "loss": 50.8592, + "step": 39420 + }, + { + "epoch": 0.15930218934457027, + "grad_norm": 848.9993896484375, + "learning_rate": 8.613312881935667e-06, + "loss": 69.9591, + "step": 39430 + }, + { + "epoch": 0.1593425906099379, + "grad_norm": 313.719482421875, + "learning_rate": 8.61233789109263e-06, + "loss": 43.3973, + "step": 39440 + }, + { + "epoch": 0.15938299187530552, + "grad_norm": 494.93194580078125, + "learning_rate": 8.611362612832445e-06, + "loss": 52.3588, + "step": 39450 + }, + { + "epoch": 0.15942339314067316, + "grad_norm": 493.6256103515625, + "learning_rate": 8.610387047232711e-06, + "loss": 68.9036, + "step": 39460 + }, + { + "epoch": 0.1594637944060408, + "grad_norm": 767.64208984375, + "learning_rate": 8.609411194371049e-06, + "loss": 58.1112, + "step": 39470 + }, + { + "epoch": 0.15950419567140842, + "grad_norm": 1218.093017578125, + "learning_rate": 8.608435054325103e-06, + "loss": 48.0329, + "step": 39480 + }, + { + "epoch": 0.15954459693677606, + "grad_norm": 615.8851318359375, + "learning_rate": 8.60745862717254e-06, + "loss": 44.5316, + "step": 39490 + }, + { + "epoch": 0.1595849982021437, + "grad_norm": 380.9693908691406, + "learning_rate": 8.606481912991052e-06, + "loss": 33.8241, + "step": 39500 + }, + { + "epoch": 0.1596253994675113, + "grad_norm": 859.0559692382812, + "learning_rate": 8.605504911858347e-06, + "loss": 44.5087, + "step": 39510 + }, + { + "epoch": 0.15966580073287895, + "grad_norm": 492.1607360839844, + "learning_rate": 8.604527623852165e-06, + "loss": 41.605, + "step": 39520 + }, + { + "epoch": 0.1597062019982466, + "grad_norm": 611.9906616210938, + "learning_rate": 8.603550049050262e-06, + "loss": 55.0675, + "step": 39530 + }, + { + "epoch": 0.15974660326361423, + "grad_norm": 469.29364013671875, + "learning_rate": 8.602572187530421e-06, + "loss": 44.7947, + "step": 39540 + }, + { + "epoch": 0.15978700452898184, + "grad_norm": 398.1039123535156, + "learning_rate": 8.601594039370441e-06, + "loss": 49.7298, + "step": 39550 + }, + { + "epoch": 0.15982740579434948, + "grad_norm": 601.5389404296875, + "learning_rate": 8.600615604648155e-06, + "loss": 50.9114, + "step": 39560 + }, + { + "epoch": 0.15986780705971712, + "grad_norm": 342.42767333984375, + "learning_rate": 8.599636883441408e-06, + "loss": 52.5872, + "step": 39570 + }, + { + "epoch": 0.15990820832508473, + "grad_norm": 857.3191528320312, + "learning_rate": 8.598657875828078e-06, + "loss": 77.0436, + "step": 39580 + }, + { + "epoch": 0.15994860959045237, + "grad_norm": 543.1047973632812, + "learning_rate": 8.597678581886055e-06, + "loss": 45.0541, + "step": 39590 + }, + { + "epoch": 0.15998901085582, + "grad_norm": 712.693115234375, + "learning_rate": 8.596699001693257e-06, + "loss": 42.4967, + "step": 39600 + }, + { + "epoch": 0.16002941212118763, + "grad_norm": 691.2177734375, + "learning_rate": 8.595719135327627e-06, + "loss": 50.3935, + "step": 39610 + }, + { + "epoch": 0.16006981338655527, + "grad_norm": 345.3578186035156, + "learning_rate": 8.594738982867126e-06, + "loss": 61.9182, + "step": 39620 + }, + { + "epoch": 0.1601102146519229, + "grad_norm": 494.4696960449219, + "learning_rate": 8.593758544389743e-06, + "loss": 57.9511, + "step": 39630 + }, + { + "epoch": 0.16015061591729052, + "grad_norm": 1123.7225341796875, + "learning_rate": 8.592777819973486e-06, + "loss": 59.5531, + "step": 39640 + }, + { + "epoch": 0.16019101718265816, + "grad_norm": 641.5360107421875, + "learning_rate": 8.591796809696386e-06, + "loss": 46.7559, + "step": 39650 + }, + { + "epoch": 0.1602314184480258, + "grad_norm": 862.7567138671875, + "learning_rate": 8.590815513636498e-06, + "loss": 47.3019, + "step": 39660 + }, + { + "epoch": 0.1602718197133934, + "grad_norm": 354.60186767578125, + "learning_rate": 8.5898339318719e-06, + "loss": 50.8752, + "step": 39670 + }, + { + "epoch": 0.16031222097876105, + "grad_norm": 875.8455810546875, + "learning_rate": 8.58885206448069e-06, + "loss": 41.4256, + "step": 39680 + }, + { + "epoch": 0.1603526222441287, + "grad_norm": 362.48443603515625, + "learning_rate": 8.587869911540993e-06, + "loss": 42.8521, + "step": 39690 + }, + { + "epoch": 0.16039302350949633, + "grad_norm": 679.5111694335938, + "learning_rate": 8.586887473130951e-06, + "loss": 34.4303, + "step": 39700 + }, + { + "epoch": 0.16043342477486394, + "grad_norm": 457.1158447265625, + "learning_rate": 8.585904749328736e-06, + "loss": 33.436, + "step": 39710 + }, + { + "epoch": 0.16047382604023158, + "grad_norm": 583.4326782226562, + "learning_rate": 8.584921740212537e-06, + "loss": 87.6192, + "step": 39720 + }, + { + "epoch": 0.16051422730559922, + "grad_norm": 542.652587890625, + "learning_rate": 8.583938445860569e-06, + "loss": 42.0307, + "step": 39730 + }, + { + "epoch": 0.16055462857096683, + "grad_norm": 989.8584594726562, + "learning_rate": 8.582954866351065e-06, + "loss": 49.7806, + "step": 39740 + }, + { + "epoch": 0.16059502983633447, + "grad_norm": 821.0096435546875, + "learning_rate": 8.581971001762287e-06, + "loss": 69.7385, + "step": 39750 + }, + { + "epoch": 0.16063543110170211, + "grad_norm": 461.29443359375, + "learning_rate": 8.580986852172514e-06, + "loss": 41.6745, + "step": 39760 + }, + { + "epoch": 0.16067583236706973, + "grad_norm": 520.6972045898438, + "learning_rate": 8.580002417660054e-06, + "loss": 64.0036, + "step": 39770 + }, + { + "epoch": 0.16071623363243737, + "grad_norm": 730.748046875, + "learning_rate": 8.579017698303228e-06, + "loss": 63.5609, + "step": 39780 + }, + { + "epoch": 0.160756634897805, + "grad_norm": 1039.7550048828125, + "learning_rate": 8.578032694180394e-06, + "loss": 39.672, + "step": 39790 + }, + { + "epoch": 0.16079703616317262, + "grad_norm": 965.9579467773438, + "learning_rate": 8.577047405369916e-06, + "loss": 39.7766, + "step": 39800 + }, + { + "epoch": 0.16083743742854026, + "grad_norm": 559.5202026367188, + "learning_rate": 8.576061831950193e-06, + "loss": 45.9991, + "step": 39810 + }, + { + "epoch": 0.1608778386939079, + "grad_norm": 724.7987060546875, + "learning_rate": 8.575075973999642e-06, + "loss": 48.9104, + "step": 39820 + }, + { + "epoch": 0.1609182399592755, + "grad_norm": 732.09326171875, + "learning_rate": 8.574089831596703e-06, + "loss": 72.9903, + "step": 39830 + }, + { + "epoch": 0.16095864122464315, + "grad_norm": 351.67486572265625, + "learning_rate": 8.57310340481984e-06, + "loss": 53.9298, + "step": 39840 + }, + { + "epoch": 0.1609990424900108, + "grad_norm": 646.2775268554688, + "learning_rate": 8.572116693747537e-06, + "loss": 30.1154, + "step": 39850 + }, + { + "epoch": 0.16103944375537843, + "grad_norm": 576.468017578125, + "learning_rate": 8.571129698458302e-06, + "loss": 40.7335, + "step": 39860 + }, + { + "epoch": 0.16107984502074604, + "grad_norm": 874.0496826171875, + "learning_rate": 8.570142419030668e-06, + "loss": 60.7948, + "step": 39870 + }, + { + "epoch": 0.16112024628611368, + "grad_norm": 466.7607727050781, + "learning_rate": 8.569154855543184e-06, + "loss": 44.0453, + "step": 39880 + }, + { + "epoch": 0.16116064755148132, + "grad_norm": 450.4464111328125, + "learning_rate": 8.56816700807443e-06, + "loss": 52.5402, + "step": 39890 + }, + { + "epoch": 0.16120104881684894, + "grad_norm": 416.64697265625, + "learning_rate": 8.567178876703002e-06, + "loss": 50.5467, + "step": 39900 + }, + { + "epoch": 0.16124145008221658, + "grad_norm": 388.1261901855469, + "learning_rate": 8.566190461507521e-06, + "loss": 57.1608, + "step": 39910 + }, + { + "epoch": 0.16128185134758422, + "grad_norm": 348.6123046875, + "learning_rate": 8.565201762566632e-06, + "loss": 46.7085, + "step": 39920 + }, + { + "epoch": 0.16132225261295183, + "grad_norm": 862.7435913085938, + "learning_rate": 8.564212779959003e-06, + "loss": 67.4059, + "step": 39930 + }, + { + "epoch": 0.16136265387831947, + "grad_norm": 422.3407287597656, + "learning_rate": 8.563223513763319e-06, + "loss": 46.8044, + "step": 39940 + }, + { + "epoch": 0.1614030551436871, + "grad_norm": 502.47418212890625, + "learning_rate": 8.562233964058294e-06, + "loss": 53.1609, + "step": 39950 + }, + { + "epoch": 0.16144345640905472, + "grad_norm": 851.7847290039062, + "learning_rate": 8.561244130922658e-06, + "loss": 69.4814, + "step": 39960 + }, + { + "epoch": 0.16148385767442236, + "grad_norm": 962.0199584960938, + "learning_rate": 8.560254014435172e-06, + "loss": 62.8515, + "step": 39970 + }, + { + "epoch": 0.16152425893979, + "grad_norm": 794.7582397460938, + "learning_rate": 8.559263614674615e-06, + "loss": 62.2661, + "step": 39980 + }, + { + "epoch": 0.1615646602051576, + "grad_norm": 484.3645324707031, + "learning_rate": 8.558272931719785e-06, + "loss": 62.7066, + "step": 39990 + }, + { + "epoch": 0.16160506147052525, + "grad_norm": 574.9622802734375, + "learning_rate": 8.557281965649508e-06, + "loss": 43.5384, + "step": 40000 + }, + { + "epoch": 0.1616454627358929, + "grad_norm": 381.67010498046875, + "learning_rate": 8.556290716542632e-06, + "loss": 43.6, + "step": 40010 + }, + { + "epoch": 0.16168586400126053, + "grad_norm": 632.470947265625, + "learning_rate": 8.555299184478026e-06, + "loss": 45.3281, + "step": 40020 + }, + { + "epoch": 0.16172626526662814, + "grad_norm": 614.8099975585938, + "learning_rate": 8.554307369534577e-06, + "loss": 41.8744, + "step": 40030 + }, + { + "epoch": 0.16176666653199578, + "grad_norm": 612.2963256835938, + "learning_rate": 8.553315271791207e-06, + "loss": 46.7159, + "step": 40040 + }, + { + "epoch": 0.16180706779736342, + "grad_norm": 912.1803588867188, + "learning_rate": 8.552322891326846e-06, + "loss": 61.35, + "step": 40050 + }, + { + "epoch": 0.16184746906273104, + "grad_norm": 866.2830810546875, + "learning_rate": 8.551330228220454e-06, + "loss": 44.3065, + "step": 40060 + }, + { + "epoch": 0.16188787032809868, + "grad_norm": 272.114013671875, + "learning_rate": 8.550337282551016e-06, + "loss": 36.4487, + "step": 40070 + }, + { + "epoch": 0.16192827159346632, + "grad_norm": 1175.4171142578125, + "learning_rate": 8.549344054397533e-06, + "loss": 50.1568, + "step": 40080 + }, + { + "epoch": 0.16196867285883393, + "grad_norm": 626.5860595703125, + "learning_rate": 8.548350543839034e-06, + "loss": 43.3123, + "step": 40090 + }, + { + "epoch": 0.16200907412420157, + "grad_norm": 618.5504760742188, + "learning_rate": 8.547356750954568e-06, + "loss": 39.5396, + "step": 40100 + }, + { + "epoch": 0.1620494753895692, + "grad_norm": 537.752685546875, + "learning_rate": 8.546362675823204e-06, + "loss": 45.1189, + "step": 40110 + }, + { + "epoch": 0.16208987665493682, + "grad_norm": 390.0348205566406, + "learning_rate": 8.545368318524036e-06, + "loss": 50.7311, + "step": 40120 + }, + { + "epoch": 0.16213027792030446, + "grad_norm": 645.032958984375, + "learning_rate": 8.544373679136184e-06, + "loss": 60.2118, + "step": 40130 + }, + { + "epoch": 0.1621706791856721, + "grad_norm": 706.8973388671875, + "learning_rate": 8.543378757738785e-06, + "loss": 86.3639, + "step": 40140 + }, + { + "epoch": 0.1622110804510397, + "grad_norm": 612.7260131835938, + "learning_rate": 8.542383554411e-06, + "loss": 48.4019, + "step": 40150 + }, + { + "epoch": 0.16225148171640735, + "grad_norm": 763.453857421875, + "learning_rate": 8.541388069232012e-06, + "loss": 62.7933, + "step": 40160 + }, + { + "epoch": 0.162291882981775, + "grad_norm": 622.7904663085938, + "learning_rate": 8.54039230228103e-06, + "loss": 66.0159, + "step": 40170 + }, + { + "epoch": 0.16233228424714263, + "grad_norm": 405.7510070800781, + "learning_rate": 8.53939625363728e-06, + "loss": 47.5229, + "step": 40180 + }, + { + "epoch": 0.16237268551251025, + "grad_norm": 812.6790771484375, + "learning_rate": 8.538399923380011e-06, + "loss": 49.0495, + "step": 40190 + }, + { + "epoch": 0.16241308677787789, + "grad_norm": 485.32135009765625, + "learning_rate": 8.537403311588502e-06, + "loss": 53.9039, + "step": 40200 + }, + { + "epoch": 0.16245348804324553, + "grad_norm": 724.760986328125, + "learning_rate": 8.536406418342044e-06, + "loss": 56.8525, + "step": 40210 + }, + { + "epoch": 0.16249388930861314, + "grad_norm": 400.77783203125, + "learning_rate": 8.53540924371996e-06, + "loss": 46.7015, + "step": 40220 + }, + { + "epoch": 0.16253429057398078, + "grad_norm": 319.41314697265625, + "learning_rate": 8.534411787801586e-06, + "loss": 40.1834, + "step": 40230 + }, + { + "epoch": 0.16257469183934842, + "grad_norm": 486.13262939453125, + "learning_rate": 8.533414050666287e-06, + "loss": 47.1607, + "step": 40240 + }, + { + "epoch": 0.16261509310471603, + "grad_norm": 579.2314453125, + "learning_rate": 8.532416032393447e-06, + "loss": 34.5006, + "step": 40250 + }, + { + "epoch": 0.16265549437008367, + "grad_norm": 368.7862548828125, + "learning_rate": 8.531417733062476e-06, + "loss": 34.5283, + "step": 40260 + }, + { + "epoch": 0.1626958956354513, + "grad_norm": 631.860595703125, + "learning_rate": 8.530419152752804e-06, + "loss": 80.3774, + "step": 40270 + }, + { + "epoch": 0.16273629690081892, + "grad_norm": 405.42877197265625, + "learning_rate": 8.529420291543882e-06, + "loss": 51.1724, + "step": 40280 + }, + { + "epoch": 0.16277669816618656, + "grad_norm": 282.8262634277344, + "learning_rate": 8.528421149515185e-06, + "loss": 41.0309, + "step": 40290 + }, + { + "epoch": 0.1628170994315542, + "grad_norm": 566.5156860351562, + "learning_rate": 8.52742172674621e-06, + "loss": 40.4973, + "step": 40300 + }, + { + "epoch": 0.16285750069692181, + "grad_norm": 911.9764404296875, + "learning_rate": 8.526422023316478e-06, + "loss": 55.5752, + "step": 40310 + }, + { + "epoch": 0.16289790196228945, + "grad_norm": 1154.181640625, + "learning_rate": 8.525422039305529e-06, + "loss": 44.3412, + "step": 40320 + }, + { + "epoch": 0.1629383032276571, + "grad_norm": 526.5676879882812, + "learning_rate": 8.524421774792926e-06, + "loss": 79.0259, + "step": 40330 + }, + { + "epoch": 0.16297870449302473, + "grad_norm": 1047.71875, + "learning_rate": 8.52342122985826e-06, + "loss": 59.4561, + "step": 40340 + }, + { + "epoch": 0.16301910575839235, + "grad_norm": 636.7918701171875, + "learning_rate": 8.522420404581135e-06, + "loss": 32.4132, + "step": 40350 + }, + { + "epoch": 0.16305950702376, + "grad_norm": 505.8265686035156, + "learning_rate": 8.521419299041185e-06, + "loss": 56.5309, + "step": 40360 + }, + { + "epoch": 0.16309990828912763, + "grad_norm": 757.0598754882812, + "learning_rate": 8.520417913318065e-06, + "loss": 36.9938, + "step": 40370 + }, + { + "epoch": 0.16314030955449524, + "grad_norm": 277.91778564453125, + "learning_rate": 8.519416247491445e-06, + "loss": 67.4497, + "step": 40380 + }, + { + "epoch": 0.16318071081986288, + "grad_norm": 335.56793212890625, + "learning_rate": 8.518414301641027e-06, + "loss": 44.4462, + "step": 40390 + }, + { + "epoch": 0.16322111208523052, + "grad_norm": 603.6694946289062, + "learning_rate": 8.517412075846529e-06, + "loss": 73.2141, + "step": 40400 + }, + { + "epoch": 0.16326151335059813, + "grad_norm": 638.0531005859375, + "learning_rate": 8.516409570187698e-06, + "loss": 34.8373, + "step": 40410 + }, + { + "epoch": 0.16330191461596577, + "grad_norm": 764.2570190429688, + "learning_rate": 8.515406784744294e-06, + "loss": 60.3965, + "step": 40420 + }, + { + "epoch": 0.1633423158813334, + "grad_norm": 740.9988403320312, + "learning_rate": 8.514403719596104e-06, + "loss": 46.767, + "step": 40430 + }, + { + "epoch": 0.16338271714670102, + "grad_norm": 835.2094116210938, + "learning_rate": 8.513400374822942e-06, + "loss": 68.2253, + "step": 40440 + }, + { + "epoch": 0.16342311841206866, + "grad_norm": 489.5925598144531, + "learning_rate": 8.512396750504635e-06, + "loss": 44.3674, + "step": 40450 + }, + { + "epoch": 0.1634635196774363, + "grad_norm": 527.5684204101562, + "learning_rate": 8.511392846721037e-06, + "loss": 38.6535, + "step": 40460 + }, + { + "epoch": 0.16350392094280392, + "grad_norm": 665.7122802734375, + "learning_rate": 8.510388663552027e-06, + "loss": 43.2655, + "step": 40470 + }, + { + "epoch": 0.16354432220817156, + "grad_norm": 463.0517883300781, + "learning_rate": 8.509384201077502e-06, + "loss": 51.3164, + "step": 40480 + }, + { + "epoch": 0.1635847234735392, + "grad_norm": 660.7234497070312, + "learning_rate": 8.508379459377381e-06, + "loss": 46.4135, + "step": 40490 + }, + { + "epoch": 0.16362512473890684, + "grad_norm": 2250.38330078125, + "learning_rate": 8.507374438531606e-06, + "loss": 85.4677, + "step": 40500 + }, + { + "epoch": 0.16366552600427445, + "grad_norm": 371.8035888671875, + "learning_rate": 8.506369138620148e-06, + "loss": 23.6598, + "step": 40510 + }, + { + "epoch": 0.1637059272696421, + "grad_norm": 377.6024475097656, + "learning_rate": 8.505363559722985e-06, + "loss": 60.3731, + "step": 40520 + }, + { + "epoch": 0.16374632853500973, + "grad_norm": 391.7501525878906, + "learning_rate": 8.504357701920134e-06, + "loss": 57.3929, + "step": 40530 + }, + { + "epoch": 0.16378672980037734, + "grad_norm": 557.2824096679688, + "learning_rate": 8.503351565291622e-06, + "loss": 35.8355, + "step": 40540 + }, + { + "epoch": 0.16382713106574498, + "grad_norm": 527.2345581054688, + "learning_rate": 8.502345149917506e-06, + "loss": 64.468, + "step": 40550 + }, + { + "epoch": 0.16386753233111262, + "grad_norm": 543.4524536132812, + "learning_rate": 8.501338455877859e-06, + "loss": 55.0868, + "step": 40560 + }, + { + "epoch": 0.16390793359648023, + "grad_norm": 664.7778930664062, + "learning_rate": 8.50033148325278e-06, + "loss": 45.5921, + "step": 40570 + }, + { + "epoch": 0.16394833486184787, + "grad_norm": 690.2338256835938, + "learning_rate": 8.499324232122389e-06, + "loss": 39.4778, + "step": 40580 + }, + { + "epoch": 0.1639887361272155, + "grad_norm": 1139.7445068359375, + "learning_rate": 8.498316702566828e-06, + "loss": 67.1272, + "step": 40590 + }, + { + "epoch": 0.16402913739258312, + "grad_norm": 1078.213623046875, + "learning_rate": 8.497308894666263e-06, + "loss": 47.1635, + "step": 40600 + }, + { + "epoch": 0.16406953865795076, + "grad_norm": 528.3421020507812, + "learning_rate": 8.496300808500878e-06, + "loss": 54.4817, + "step": 40610 + }, + { + "epoch": 0.1641099399233184, + "grad_norm": 1129.3258056640625, + "learning_rate": 8.495292444150887e-06, + "loss": 49.5488, + "step": 40620 + }, + { + "epoch": 0.16415034118868602, + "grad_norm": 995.45361328125, + "learning_rate": 8.494283801696514e-06, + "loss": 90.1325, + "step": 40630 + }, + { + "epoch": 0.16419074245405366, + "grad_norm": 287.7896423339844, + "learning_rate": 8.493274881218017e-06, + "loss": 50.7454, + "step": 40640 + }, + { + "epoch": 0.1642311437194213, + "grad_norm": 800.5263671875, + "learning_rate": 8.49226568279567e-06, + "loss": 72.1485, + "step": 40650 + }, + { + "epoch": 0.16427154498478894, + "grad_norm": 760.0538940429688, + "learning_rate": 8.49125620650977e-06, + "loss": 55.6863, + "step": 40660 + }, + { + "epoch": 0.16431194625015655, + "grad_norm": 778.16357421875, + "learning_rate": 8.490246452440636e-06, + "loss": 34.3954, + "step": 40670 + }, + { + "epoch": 0.1643523475155242, + "grad_norm": 500.4969482421875, + "learning_rate": 8.48923642066861e-06, + "loss": 48.982, + "step": 40680 + }, + { + "epoch": 0.16439274878089183, + "grad_norm": 604.9345092773438, + "learning_rate": 8.488226111274055e-06, + "loss": 50.3341, + "step": 40690 + }, + { + "epoch": 0.16443315004625944, + "grad_norm": 712.126708984375, + "learning_rate": 8.487215524337357e-06, + "loss": 57.6024, + "step": 40700 + }, + { + "epoch": 0.16447355131162708, + "grad_norm": 1261.656494140625, + "learning_rate": 8.486204659938924e-06, + "loss": 52.6965, + "step": 40710 + }, + { + "epoch": 0.16451395257699472, + "grad_norm": 830.56591796875, + "learning_rate": 8.485193518159186e-06, + "loss": 45.7652, + "step": 40720 + }, + { + "epoch": 0.16455435384236233, + "grad_norm": 352.6490478515625, + "learning_rate": 8.484182099078596e-06, + "loss": 67.0111, + "step": 40730 + }, + { + "epoch": 0.16459475510772997, + "grad_norm": 185.65826416015625, + "learning_rate": 8.483170402777624e-06, + "loss": 67.0452, + "step": 40740 + }, + { + "epoch": 0.1646351563730976, + "grad_norm": 894.4474487304688, + "learning_rate": 8.482158429336769e-06, + "loss": 41.1207, + "step": 40750 + }, + { + "epoch": 0.16467555763846523, + "grad_norm": 683.1304321289062, + "learning_rate": 8.48114617883655e-06, + "loss": 51.1379, + "step": 40760 + }, + { + "epoch": 0.16471595890383287, + "grad_norm": 625.189453125, + "learning_rate": 8.480133651357507e-06, + "loss": 39.9019, + "step": 40770 + }, + { + "epoch": 0.1647563601692005, + "grad_norm": 395.3323059082031, + "learning_rate": 8.479120846980197e-06, + "loss": 45.6717, + "step": 40780 + }, + { + "epoch": 0.16479676143456812, + "grad_norm": 786.5029907226562, + "learning_rate": 8.478107765785212e-06, + "loss": 45.9822, + "step": 40790 + }, + { + "epoch": 0.16483716269993576, + "grad_norm": 693.7678833007812, + "learning_rate": 8.477094407853153e-06, + "loss": 32.4612, + "step": 40800 + }, + { + "epoch": 0.1648775639653034, + "grad_norm": 445.7911071777344, + "learning_rate": 8.47608077326465e-06, + "loss": 71.7955, + "step": 40810 + }, + { + "epoch": 0.16491796523067104, + "grad_norm": 655.4782104492188, + "learning_rate": 8.475066862100352e-06, + "loss": 56.675, + "step": 40820 + }, + { + "epoch": 0.16495836649603865, + "grad_norm": 741.1582641601562, + "learning_rate": 8.474052674440934e-06, + "loss": 60.2743, + "step": 40830 + }, + { + "epoch": 0.1649987677614063, + "grad_norm": 646.4039916992188, + "learning_rate": 8.473038210367086e-06, + "loss": 48.6313, + "step": 40840 + }, + { + "epoch": 0.16503916902677393, + "grad_norm": 405.1526794433594, + "learning_rate": 8.47202346995953e-06, + "loss": 29.084, + "step": 40850 + }, + { + "epoch": 0.16507957029214154, + "grad_norm": 559.633544921875, + "learning_rate": 8.471008453298998e-06, + "loss": 48.7567, + "step": 40860 + }, + { + "epoch": 0.16511997155750918, + "grad_norm": 612.960205078125, + "learning_rate": 8.469993160466254e-06, + "loss": 33.431, + "step": 40870 + }, + { + "epoch": 0.16516037282287682, + "grad_norm": 855.2415161132812, + "learning_rate": 8.46897759154208e-06, + "loss": 50.173, + "step": 40880 + }, + { + "epoch": 0.16520077408824443, + "grad_norm": 730.0927124023438, + "learning_rate": 8.467961746607279e-06, + "loss": 57.6002, + "step": 40890 + }, + { + "epoch": 0.16524117535361207, + "grad_norm": 523.8104858398438, + "learning_rate": 8.466945625742678e-06, + "loss": 29.2724, + "step": 40900 + }, + { + "epoch": 0.16528157661897971, + "grad_norm": 572.157470703125, + "learning_rate": 8.465929229029124e-06, + "loss": 51.1289, + "step": 40910 + }, + { + "epoch": 0.16532197788434733, + "grad_norm": 851.248779296875, + "learning_rate": 8.464912556547486e-06, + "loss": 64.4108, + "step": 40920 + }, + { + "epoch": 0.16536237914971497, + "grad_norm": 1449.7430419921875, + "learning_rate": 8.46389560837866e-06, + "loss": 75.141, + "step": 40930 + }, + { + "epoch": 0.1654027804150826, + "grad_norm": 608.1896362304688, + "learning_rate": 8.462878384603558e-06, + "loss": 57.1169, + "step": 40940 + }, + { + "epoch": 0.16544318168045022, + "grad_norm": 887.8237915039062, + "learning_rate": 8.461860885303116e-06, + "loss": 43.4107, + "step": 40950 + }, + { + "epoch": 0.16548358294581786, + "grad_norm": 716.5380249023438, + "learning_rate": 8.460843110558287e-06, + "loss": 48.526, + "step": 40960 + }, + { + "epoch": 0.1655239842111855, + "grad_norm": 552.896240234375, + "learning_rate": 8.459825060450058e-06, + "loss": 37.3961, + "step": 40970 + }, + { + "epoch": 0.16556438547655314, + "grad_norm": 841.8020629882812, + "learning_rate": 8.458806735059428e-06, + "loss": 47.3296, + "step": 40980 + }, + { + "epoch": 0.16560478674192075, + "grad_norm": 617.2305908203125, + "learning_rate": 8.45778813446742e-06, + "loss": 32.6389, + "step": 40990 + }, + { + "epoch": 0.1656451880072884, + "grad_norm": 457.9773864746094, + "learning_rate": 8.456769258755078e-06, + "loss": 40.7037, + "step": 41000 + }, + { + "epoch": 0.16568558927265603, + "grad_norm": 453.1566162109375, + "learning_rate": 8.455750108003468e-06, + "loss": 43.0002, + "step": 41010 + }, + { + "epoch": 0.16572599053802364, + "grad_norm": 782.6980590820312, + "learning_rate": 8.454730682293686e-06, + "loss": 42.0216, + "step": 41020 + }, + { + "epoch": 0.16576639180339128, + "grad_norm": 697.3536376953125, + "learning_rate": 8.453710981706838e-06, + "loss": 31.8444, + "step": 41030 + }, + { + "epoch": 0.16580679306875892, + "grad_norm": 289.1650390625, + "learning_rate": 8.452691006324055e-06, + "loss": 56.3204, + "step": 41040 + }, + { + "epoch": 0.16584719433412654, + "grad_norm": 595.3594970703125, + "learning_rate": 8.451670756226496e-06, + "loss": 60.4295, + "step": 41050 + }, + { + "epoch": 0.16588759559949418, + "grad_norm": 486.653076171875, + "learning_rate": 8.450650231495336e-06, + "loss": 35.8233, + "step": 41060 + }, + { + "epoch": 0.16592799686486182, + "grad_norm": 399.5616455078125, + "learning_rate": 8.449629432211774e-06, + "loss": 57.242, + "step": 41070 + }, + { + "epoch": 0.16596839813022943, + "grad_norm": 421.82159423828125, + "learning_rate": 8.44860835845703e-06, + "loss": 41.3208, + "step": 41080 + }, + { + "epoch": 0.16600879939559707, + "grad_norm": 376.24420166015625, + "learning_rate": 8.447587010312343e-06, + "loss": 47.7066, + "step": 41090 + }, + { + "epoch": 0.1660492006609647, + "grad_norm": 383.0332336425781, + "learning_rate": 8.44656538785898e-06, + "loss": 42.9964, + "step": 41100 + }, + { + "epoch": 0.16608960192633232, + "grad_norm": 395.98724365234375, + "learning_rate": 8.44554349117823e-06, + "loss": 33.3155, + "step": 41110 + }, + { + "epoch": 0.16613000319169996, + "grad_norm": 660.3027954101562, + "learning_rate": 8.444521320351397e-06, + "loss": 88.4446, + "step": 41120 + }, + { + "epoch": 0.1661704044570676, + "grad_norm": 882.0906372070312, + "learning_rate": 8.44349887545981e-06, + "loss": 30.5517, + "step": 41130 + }, + { + "epoch": 0.16621080572243524, + "grad_norm": 308.0935363769531, + "learning_rate": 8.442476156584818e-06, + "loss": 36.8402, + "step": 41140 + }, + { + "epoch": 0.16625120698780285, + "grad_norm": 530.5660400390625, + "learning_rate": 8.4414531638078e-06, + "loss": 48.6802, + "step": 41150 + }, + { + "epoch": 0.1662916082531705, + "grad_norm": 313.61029052734375, + "learning_rate": 8.440429897210148e-06, + "loss": 57.5153, + "step": 41160 + }, + { + "epoch": 0.16633200951853813, + "grad_norm": 470.394775390625, + "learning_rate": 8.439406356873279e-06, + "loss": 38.5236, + "step": 41170 + }, + { + "epoch": 0.16637241078390574, + "grad_norm": 632.5592651367188, + "learning_rate": 8.43838254287863e-06, + "loss": 60.4704, + "step": 41180 + }, + { + "epoch": 0.16641281204927338, + "grad_norm": 387.6313781738281, + "learning_rate": 8.43735845530766e-06, + "loss": 48.1446, + "step": 41190 + }, + { + "epoch": 0.16645321331464102, + "grad_norm": 919.393310546875, + "learning_rate": 8.436334094241855e-06, + "loss": 70.1436, + "step": 41200 + }, + { + "epoch": 0.16649361458000864, + "grad_norm": 864.226806640625, + "learning_rate": 8.435309459762718e-06, + "loss": 51.2987, + "step": 41210 + }, + { + "epoch": 0.16653401584537628, + "grad_norm": 210.6269073486328, + "learning_rate": 8.434284551951772e-06, + "loss": 47.4699, + "step": 41220 + }, + { + "epoch": 0.16657441711074392, + "grad_norm": 479.52294921875, + "learning_rate": 8.433259370890565e-06, + "loss": 40.4656, + "step": 41230 + }, + { + "epoch": 0.16661481837611153, + "grad_norm": 975.6790771484375, + "learning_rate": 8.432233916660669e-06, + "loss": 46.843, + "step": 41240 + }, + { + "epoch": 0.16665521964147917, + "grad_norm": 774.6649780273438, + "learning_rate": 8.43120818934367e-06, + "loss": 40.471, + "step": 41250 + }, + { + "epoch": 0.1666956209068468, + "grad_norm": 423.52178955078125, + "learning_rate": 8.43018218902118e-06, + "loss": 48.6622, + "step": 41260 + }, + { + "epoch": 0.16673602217221442, + "grad_norm": 424.2727355957031, + "learning_rate": 8.429155915774839e-06, + "loss": 44.5995, + "step": 41270 + }, + { + "epoch": 0.16677642343758206, + "grad_norm": 1353.1103515625, + "learning_rate": 8.428129369686299e-06, + "loss": 107.302, + "step": 41280 + }, + { + "epoch": 0.1668168247029497, + "grad_norm": 476.1059265136719, + "learning_rate": 8.427102550837238e-06, + "loss": 52.2604, + "step": 41290 + }, + { + "epoch": 0.16685722596831734, + "grad_norm": 820.45068359375, + "learning_rate": 8.426075459309356e-06, + "loss": 59.7153, + "step": 41300 + }, + { + "epoch": 0.16689762723368495, + "grad_norm": 811.30322265625, + "learning_rate": 8.42504809518437e-06, + "loss": 43.2647, + "step": 41310 + }, + { + "epoch": 0.1669380284990526, + "grad_norm": 541.5391845703125, + "learning_rate": 8.42402045854403e-06, + "loss": 43.6814, + "step": 41320 + }, + { + "epoch": 0.16697842976442023, + "grad_norm": 931.9412231445312, + "learning_rate": 8.422992549470094e-06, + "loss": 61.0118, + "step": 41330 + }, + { + "epoch": 0.16701883102978785, + "grad_norm": 779.17724609375, + "learning_rate": 8.42196436804435e-06, + "loss": 53.9492, + "step": 41340 + }, + { + "epoch": 0.16705923229515549, + "grad_norm": 917.96142578125, + "learning_rate": 8.420935914348607e-06, + "loss": 48.7795, + "step": 41350 + }, + { + "epoch": 0.16709963356052313, + "grad_norm": 1356.6431884765625, + "learning_rate": 8.419907188464691e-06, + "loss": 62.2583, + "step": 41360 + }, + { + "epoch": 0.16714003482589074, + "grad_norm": 622.7725830078125, + "learning_rate": 8.418878190474459e-06, + "loss": 46.7867, + "step": 41370 + }, + { + "epoch": 0.16718043609125838, + "grad_norm": 498.8053894042969, + "learning_rate": 8.417848920459778e-06, + "loss": 52.3582, + "step": 41380 + }, + { + "epoch": 0.16722083735662602, + "grad_norm": 1115.1090087890625, + "learning_rate": 8.416819378502543e-06, + "loss": 61.8992, + "step": 41390 + }, + { + "epoch": 0.16726123862199363, + "grad_norm": 290.065185546875, + "learning_rate": 8.415789564684673e-06, + "loss": 49.5622, + "step": 41400 + }, + { + "epoch": 0.16730163988736127, + "grad_norm": 712.3739624023438, + "learning_rate": 8.414759479088102e-06, + "loss": 43.2107, + "step": 41410 + }, + { + "epoch": 0.1673420411527289, + "grad_norm": 562.4710083007812, + "learning_rate": 8.413729121794794e-06, + "loss": 41.9459, + "step": 41420 + }, + { + "epoch": 0.16738244241809652, + "grad_norm": 529.8401489257812, + "learning_rate": 8.412698492886723e-06, + "loss": 36.3189, + "step": 41430 + }, + { + "epoch": 0.16742284368346416, + "grad_norm": 653.1558227539062, + "learning_rate": 8.411667592445898e-06, + "loss": 67.4094, + "step": 41440 + }, + { + "epoch": 0.1674632449488318, + "grad_norm": 786.386962890625, + "learning_rate": 8.410636420554337e-06, + "loss": 54.185, + "step": 41450 + }, + { + "epoch": 0.16750364621419944, + "grad_norm": 522.7491455078125, + "learning_rate": 8.409604977294093e-06, + "loss": 45.0625, + "step": 41460 + }, + { + "epoch": 0.16754404747956705, + "grad_norm": 5738.32568359375, + "learning_rate": 8.408573262747225e-06, + "loss": 87.3419, + "step": 41470 + }, + { + "epoch": 0.1675844487449347, + "grad_norm": 768.86328125, + "learning_rate": 8.407541276995828e-06, + "loss": 50.3956, + "step": 41480 + }, + { + "epoch": 0.16762485001030233, + "grad_norm": 688.6503295898438, + "learning_rate": 8.40650902012201e-06, + "loss": 47.1603, + "step": 41490 + }, + { + "epoch": 0.16766525127566995, + "grad_norm": 650.5867309570312, + "learning_rate": 8.405476492207902e-06, + "loss": 39.5879, + "step": 41500 + }, + { + "epoch": 0.1677056525410376, + "grad_norm": 1079.1339111328125, + "learning_rate": 8.404443693335658e-06, + "loss": 45.5985, + "step": 41510 + }, + { + "epoch": 0.16774605380640523, + "grad_norm": 601.2516479492188, + "learning_rate": 8.403410623587454e-06, + "loss": 46.3612, + "step": 41520 + }, + { + "epoch": 0.16778645507177284, + "grad_norm": 781.9537963867188, + "learning_rate": 8.402377283045487e-06, + "loss": 60.7914, + "step": 41530 + }, + { + "epoch": 0.16782685633714048, + "grad_norm": 607.5199584960938, + "learning_rate": 8.401343671791974e-06, + "loss": 50.5169, + "step": 41540 + }, + { + "epoch": 0.16786725760250812, + "grad_norm": 413.96392822265625, + "learning_rate": 8.400309789909155e-06, + "loss": 57.9376, + "step": 41550 + }, + { + "epoch": 0.16790765886787573, + "grad_norm": 483.5220642089844, + "learning_rate": 8.399275637479291e-06, + "loss": 57.2534, + "step": 41560 + }, + { + "epoch": 0.16794806013324337, + "grad_norm": 1478.341796875, + "learning_rate": 8.398241214584666e-06, + "loss": 53.3458, + "step": 41570 + }, + { + "epoch": 0.167988461398611, + "grad_norm": 552.9909057617188, + "learning_rate": 8.397206521307584e-06, + "loss": 58.3804, + "step": 41580 + }, + { + "epoch": 0.16802886266397862, + "grad_norm": 608.0415649414062, + "learning_rate": 8.396171557730369e-06, + "loss": 62.6251, + "step": 41590 + }, + { + "epoch": 0.16806926392934626, + "grad_norm": 645.6259155273438, + "learning_rate": 8.39513632393537e-06, + "loss": 41.6197, + "step": 41600 + }, + { + "epoch": 0.1681096651947139, + "grad_norm": 648.4116821289062, + "learning_rate": 8.394100820004954e-06, + "loss": 62.4968, + "step": 41610 + }, + { + "epoch": 0.16815006646008154, + "grad_norm": 373.94134521484375, + "learning_rate": 8.393065046021513e-06, + "loss": 46.2327, + "step": 41620 + }, + { + "epoch": 0.16819046772544916, + "grad_norm": 190.22682189941406, + "learning_rate": 8.39202900206746e-06, + "loss": 50.5545, + "step": 41630 + }, + { + "epoch": 0.1682308689908168, + "grad_norm": 678.0693359375, + "learning_rate": 8.390992688225226e-06, + "loss": 41.0278, + "step": 41640 + }, + { + "epoch": 0.16827127025618444, + "grad_norm": 417.6565246582031, + "learning_rate": 8.389956104577265e-06, + "loss": 50.3119, + "step": 41650 + }, + { + "epoch": 0.16831167152155205, + "grad_norm": 270.62994384765625, + "learning_rate": 8.388919251206054e-06, + "loss": 57.1411, + "step": 41660 + }, + { + "epoch": 0.1683520727869197, + "grad_norm": 1314.5177001953125, + "learning_rate": 8.387882128194094e-06, + "loss": 52.9899, + "step": 41670 + }, + { + "epoch": 0.16839247405228733, + "grad_norm": 751.9255981445312, + "learning_rate": 8.3868447356239e-06, + "loss": 61.0519, + "step": 41680 + }, + { + "epoch": 0.16843287531765494, + "grad_norm": 831.6484375, + "learning_rate": 8.385807073578014e-06, + "loss": 43.6853, + "step": 41690 + }, + { + "epoch": 0.16847327658302258, + "grad_norm": 493.83502197265625, + "learning_rate": 8.384769142138998e-06, + "loss": 43.3712, + "step": 41700 + }, + { + "epoch": 0.16851367784839022, + "grad_norm": 663.5391845703125, + "learning_rate": 8.383730941389434e-06, + "loss": 58.759, + "step": 41710 + }, + { + "epoch": 0.16855407911375783, + "grad_norm": 201.29359436035156, + "learning_rate": 8.382692471411931e-06, + "loss": 47.675, + "step": 41720 + }, + { + "epoch": 0.16859448037912547, + "grad_norm": 660.2062377929688, + "learning_rate": 8.38165373228911e-06, + "loss": 69.8102, + "step": 41730 + }, + { + "epoch": 0.1686348816444931, + "grad_norm": 246.1966094970703, + "learning_rate": 8.380614724103622e-06, + "loss": 43.068, + "step": 41740 + }, + { + "epoch": 0.16867528290986072, + "grad_norm": 548.47998046875, + "learning_rate": 8.379575446938136e-06, + "loss": 57.0117, + "step": 41750 + }, + { + "epoch": 0.16871568417522836, + "grad_norm": 715.6472778320312, + "learning_rate": 8.37853590087534e-06, + "loss": 50.1982, + "step": 41760 + }, + { + "epoch": 0.168756085440596, + "grad_norm": 469.2695007324219, + "learning_rate": 8.377496085997949e-06, + "loss": 36.8721, + "step": 41770 + }, + { + "epoch": 0.16879648670596364, + "grad_norm": 609.3477783203125, + "learning_rate": 8.376456002388695e-06, + "loss": 51.9407, + "step": 41780 + }, + { + "epoch": 0.16883688797133126, + "grad_norm": 948.3466796875, + "learning_rate": 8.375415650130332e-06, + "loss": 39.1852, + "step": 41790 + }, + { + "epoch": 0.1688772892366989, + "grad_norm": 518.4425659179688, + "learning_rate": 8.37437502930564e-06, + "loss": 41.667, + "step": 41800 + }, + { + "epoch": 0.16891769050206654, + "grad_norm": 616.8099365234375, + "learning_rate": 8.373334139997409e-06, + "loss": 43.1468, + "step": 41810 + }, + { + "epoch": 0.16895809176743415, + "grad_norm": 675.777099609375, + "learning_rate": 8.372292982288463e-06, + "loss": 41.971, + "step": 41820 + }, + { + "epoch": 0.1689984930328018, + "grad_norm": 523.8914794921875, + "learning_rate": 8.371251556261642e-06, + "loss": 53.4606, + "step": 41830 + }, + { + "epoch": 0.16903889429816943, + "grad_norm": 511.80206298828125, + "learning_rate": 8.370209861999807e-06, + "loss": 47.4166, + "step": 41840 + }, + { + "epoch": 0.16907929556353704, + "grad_norm": 361.1617736816406, + "learning_rate": 8.36916789958584e-06, + "loss": 41.0987, + "step": 41850 + }, + { + "epoch": 0.16911969682890468, + "grad_norm": 6810.2587890625, + "learning_rate": 8.368125669102645e-06, + "loss": 76.2174, + "step": 41860 + }, + { + "epoch": 0.16916009809427232, + "grad_norm": 751.0748291015625, + "learning_rate": 8.36708317063315e-06, + "loss": 52.5193, + "step": 41870 + }, + { + "epoch": 0.16920049935963993, + "grad_norm": 572.6092529296875, + "learning_rate": 8.366040404260298e-06, + "loss": 48.3041, + "step": 41880 + }, + { + "epoch": 0.16924090062500757, + "grad_norm": 570.5328369140625, + "learning_rate": 8.36499737006706e-06, + "loss": 45.7522, + "step": 41890 + }, + { + "epoch": 0.1692813018903752, + "grad_norm": 529.4615478515625, + "learning_rate": 8.363954068136424e-06, + "loss": 53.588, + "step": 41900 + }, + { + "epoch": 0.16932170315574283, + "grad_norm": 1069.1943359375, + "learning_rate": 8.362910498551402e-06, + "loss": 77.0546, + "step": 41910 + }, + { + "epoch": 0.16936210442111047, + "grad_norm": 505.5280456542969, + "learning_rate": 8.361866661395024e-06, + "loss": 43.8829, + "step": 41920 + }, + { + "epoch": 0.1694025056864781, + "grad_norm": 902.7269897460938, + "learning_rate": 8.360822556750345e-06, + "loss": 42.4727, + "step": 41930 + }, + { + "epoch": 0.16944290695184575, + "grad_norm": 466.8572082519531, + "learning_rate": 8.35977818470044e-06, + "loss": 46.5572, + "step": 41940 + }, + { + "epoch": 0.16948330821721336, + "grad_norm": 894.6742553710938, + "learning_rate": 8.358733545328404e-06, + "loss": 58.5088, + "step": 41950 + }, + { + "epoch": 0.169523709482581, + "grad_norm": 285.3533935546875, + "learning_rate": 8.357688638717354e-06, + "loss": 54.5953, + "step": 41960 + }, + { + "epoch": 0.16956411074794864, + "grad_norm": 1186.5799560546875, + "learning_rate": 8.356643464950428e-06, + "loss": 54.535, + "step": 41970 + }, + { + "epoch": 0.16960451201331625, + "grad_norm": 445.8567199707031, + "learning_rate": 8.355598024110789e-06, + "loss": 41.7435, + "step": 41980 + }, + { + "epoch": 0.1696449132786839, + "grad_norm": 597.5289916992188, + "learning_rate": 8.354552316281613e-06, + "loss": 77.5292, + "step": 41990 + }, + { + "epoch": 0.16968531454405153, + "grad_norm": 226.13951110839844, + "learning_rate": 8.353506341546106e-06, + "loss": 45.1414, + "step": 42000 + }, + { + "epoch": 0.16972571580941914, + "grad_norm": 499.8537292480469, + "learning_rate": 8.352460099987488e-06, + "loss": 44.9113, + "step": 42010 + }, + { + "epoch": 0.16976611707478678, + "grad_norm": 264.3408508300781, + "learning_rate": 8.351413591689007e-06, + "loss": 32.2931, + "step": 42020 + }, + { + "epoch": 0.16980651834015442, + "grad_norm": 928.5385131835938, + "learning_rate": 8.350366816733927e-06, + "loss": 64.2158, + "step": 42030 + }, + { + "epoch": 0.16984691960552203, + "grad_norm": 859.626953125, + "learning_rate": 8.349319775205536e-06, + "loss": 43.5746, + "step": 42040 + }, + { + "epoch": 0.16988732087088967, + "grad_norm": 419.03350830078125, + "learning_rate": 8.34827246718714e-06, + "loss": 41.5845, + "step": 42050 + }, + { + "epoch": 0.16992772213625731, + "grad_norm": 453.50531005859375, + "learning_rate": 8.347224892762072e-06, + "loss": 34.3044, + "step": 42060 + }, + { + "epoch": 0.16996812340162493, + "grad_norm": 500.0800476074219, + "learning_rate": 8.346177052013681e-06, + "loss": 49.3354, + "step": 42070 + }, + { + "epoch": 0.17000852466699257, + "grad_norm": 579.340576171875, + "learning_rate": 8.345128945025338e-06, + "loss": 49.3049, + "step": 42080 + }, + { + "epoch": 0.1700489259323602, + "grad_norm": 755.8071899414062, + "learning_rate": 8.344080571880438e-06, + "loss": 53.1518, + "step": 42090 + }, + { + "epoch": 0.17008932719772785, + "grad_norm": 605.3427124023438, + "learning_rate": 8.343031932662394e-06, + "loss": 38.2326, + "step": 42100 + }, + { + "epoch": 0.17012972846309546, + "grad_norm": 600.05615234375, + "learning_rate": 8.341983027454641e-06, + "loss": 31.4536, + "step": 42110 + }, + { + "epoch": 0.1701701297284631, + "grad_norm": 986.0425415039062, + "learning_rate": 8.340933856340637e-06, + "loss": 52.7103, + "step": 42120 + }, + { + "epoch": 0.17021053099383074, + "grad_norm": 737.8534545898438, + "learning_rate": 8.339884419403857e-06, + "loss": 55.2349, + "step": 42130 + }, + { + "epoch": 0.17025093225919835, + "grad_norm": 334.7961730957031, + "learning_rate": 8.338834716727801e-06, + "loss": 44.5493, + "step": 42140 + }, + { + "epoch": 0.170291333524566, + "grad_norm": 610.7578125, + "learning_rate": 8.337784748395992e-06, + "loss": 48.5967, + "step": 42150 + }, + { + "epoch": 0.17033173478993363, + "grad_norm": 839.372802734375, + "learning_rate": 8.336734514491968e-06, + "loss": 45.0206, + "step": 42160 + }, + { + "epoch": 0.17037213605530124, + "grad_norm": 1020.9551391601562, + "learning_rate": 8.335684015099294e-06, + "loss": 55.0339, + "step": 42170 + }, + { + "epoch": 0.17041253732066888, + "grad_norm": 823.2727661132812, + "learning_rate": 8.33463325030155e-06, + "loss": 51.4678, + "step": 42180 + }, + { + "epoch": 0.17045293858603652, + "grad_norm": 524.025146484375, + "learning_rate": 8.333582220182344e-06, + "loss": 46.4598, + "step": 42190 + }, + { + "epoch": 0.17049333985140414, + "grad_norm": 473.0232849121094, + "learning_rate": 8.332530924825297e-06, + "loss": 57.2351, + "step": 42200 + }, + { + "epoch": 0.17053374111677178, + "grad_norm": 1434.7774658203125, + "learning_rate": 8.33147936431406e-06, + "loss": 56.9665, + "step": 42210 + }, + { + "epoch": 0.17057414238213942, + "grad_norm": 647.9461669921875, + "learning_rate": 8.3304275387323e-06, + "loss": 60.3315, + "step": 42220 + }, + { + "epoch": 0.17061454364750703, + "grad_norm": 1044.830078125, + "learning_rate": 8.329375448163703e-06, + "loss": 40.1567, + "step": 42230 + }, + { + "epoch": 0.17065494491287467, + "grad_norm": 894.5424194335938, + "learning_rate": 8.328323092691985e-06, + "loss": 46.116, + "step": 42240 + }, + { + "epoch": 0.1706953461782423, + "grad_norm": 698.2495727539062, + "learning_rate": 8.32727047240087e-06, + "loss": 50.6057, + "step": 42250 + }, + { + "epoch": 0.17073574744360995, + "grad_norm": 1016.3301391601562, + "learning_rate": 8.326217587374115e-06, + "loss": 59.295, + "step": 42260 + }, + { + "epoch": 0.17077614870897756, + "grad_norm": 549.875732421875, + "learning_rate": 8.325164437695493e-06, + "loss": 57.8668, + "step": 42270 + }, + { + "epoch": 0.1708165499743452, + "grad_norm": 565.3195190429688, + "learning_rate": 8.324111023448795e-06, + "loss": 48.7341, + "step": 42280 + }, + { + "epoch": 0.17085695123971284, + "grad_norm": 885.8643188476562, + "learning_rate": 8.32305734471784e-06, + "loss": 45.0074, + "step": 42290 + }, + { + "epoch": 0.17089735250508045, + "grad_norm": 1960.1605224609375, + "learning_rate": 8.322003401586463e-06, + "loss": 68.4631, + "step": 42300 + }, + { + "epoch": 0.1709377537704481, + "grad_norm": 509.7738037109375, + "learning_rate": 8.32094919413852e-06, + "loss": 57.2811, + "step": 42310 + }, + { + "epoch": 0.17097815503581573, + "grad_norm": 491.28338623046875, + "learning_rate": 8.319894722457892e-06, + "loss": 45.6266, + "step": 42320 + }, + { + "epoch": 0.17101855630118334, + "grad_norm": 430.2795104980469, + "learning_rate": 8.318839986628477e-06, + "loss": 43.9842, + "step": 42330 + }, + { + "epoch": 0.17105895756655098, + "grad_norm": 236.9319305419922, + "learning_rate": 8.317784986734194e-06, + "loss": 45.3445, + "step": 42340 + }, + { + "epoch": 0.17109935883191862, + "grad_norm": 1582.1502685546875, + "learning_rate": 8.316729722858987e-06, + "loss": 60.803, + "step": 42350 + }, + { + "epoch": 0.17113976009728624, + "grad_norm": 422.8695983886719, + "learning_rate": 8.31567419508682e-06, + "loss": 59.6711, + "step": 42360 + }, + { + "epoch": 0.17118016136265388, + "grad_norm": 1095.4122314453125, + "learning_rate": 8.31461840350167e-06, + "loss": 59.5847, + "step": 42370 + }, + { + "epoch": 0.17122056262802152, + "grad_norm": 346.7009582519531, + "learning_rate": 8.313562348187549e-06, + "loss": 60.8975, + "step": 42380 + }, + { + "epoch": 0.17126096389338913, + "grad_norm": 632.48974609375, + "learning_rate": 8.312506029228478e-06, + "loss": 47.6719, + "step": 42390 + }, + { + "epoch": 0.17130136515875677, + "grad_norm": 419.339111328125, + "learning_rate": 8.311449446708506e-06, + "loss": 49.8142, + "step": 42400 + }, + { + "epoch": 0.1713417664241244, + "grad_norm": 518.4583740234375, + "learning_rate": 8.310392600711698e-06, + "loss": 63.9603, + "step": 42410 + }, + { + "epoch": 0.17138216768949205, + "grad_norm": 645.0120239257812, + "learning_rate": 8.309335491322143e-06, + "loss": 54.8118, + "step": 42420 + }, + { + "epoch": 0.17142256895485966, + "grad_norm": 926.7328491210938, + "learning_rate": 8.30827811862395e-06, + "loss": 45.266, + "step": 42430 + }, + { + "epoch": 0.1714629702202273, + "grad_norm": 267.48138427734375, + "learning_rate": 8.307220482701251e-06, + "loss": 55.2687, + "step": 42440 + }, + { + "epoch": 0.17150337148559494, + "grad_norm": 754.5764770507812, + "learning_rate": 8.306162583638197e-06, + "loss": 39.9945, + "step": 42450 + }, + { + "epoch": 0.17154377275096255, + "grad_norm": 396.7652893066406, + "learning_rate": 8.305104421518959e-06, + "loss": 61.9433, + "step": 42460 + }, + { + "epoch": 0.1715841740163302, + "grad_norm": 1039.3521728515625, + "learning_rate": 8.30404599642773e-06, + "loss": 47.3649, + "step": 42470 + }, + { + "epoch": 0.17162457528169783, + "grad_norm": 678.3857421875, + "learning_rate": 8.302987308448724e-06, + "loss": 43.0866, + "step": 42480 + }, + { + "epoch": 0.17166497654706545, + "grad_norm": 368.931396484375, + "learning_rate": 8.301928357666178e-06, + "loss": 28.1968, + "step": 42490 + }, + { + "epoch": 0.17170537781243309, + "grad_norm": 455.97747802734375, + "learning_rate": 8.300869144164346e-06, + "loss": 72.2107, + "step": 42500 + }, + { + "epoch": 0.17174577907780073, + "grad_norm": 470.565673828125, + "learning_rate": 8.299809668027505e-06, + "loss": 51.0551, + "step": 42510 + }, + { + "epoch": 0.17178618034316834, + "grad_norm": 944.7191772460938, + "learning_rate": 8.298749929339953e-06, + "loss": 52.9875, + "step": 42520 + }, + { + "epoch": 0.17182658160853598, + "grad_norm": 566.98193359375, + "learning_rate": 8.297689928186009e-06, + "loss": 38.8439, + "step": 42530 + }, + { + "epoch": 0.17186698287390362, + "grad_norm": 336.9647521972656, + "learning_rate": 8.29662966465001e-06, + "loss": 59.2766, + "step": 42540 + }, + { + "epoch": 0.17190738413927123, + "grad_norm": 551.6116943359375, + "learning_rate": 8.295569138816319e-06, + "loss": 52.5348, + "step": 42550 + }, + { + "epoch": 0.17194778540463887, + "grad_norm": 659.5198974609375, + "learning_rate": 8.294508350769315e-06, + "loss": 34.3035, + "step": 42560 + }, + { + "epoch": 0.1719881866700065, + "grad_norm": 840.8699340820312, + "learning_rate": 8.293447300593402e-06, + "loss": 46.1301, + "step": 42570 + }, + { + "epoch": 0.17202858793537412, + "grad_norm": 1800.16748046875, + "learning_rate": 8.292385988373005e-06, + "loss": 56.3059, + "step": 42580 + }, + { + "epoch": 0.17206898920074176, + "grad_norm": 462.5638122558594, + "learning_rate": 8.29132441419256e-06, + "loss": 44.6035, + "step": 42590 + }, + { + "epoch": 0.1721093904661094, + "grad_norm": 684.3241577148438, + "learning_rate": 8.290262578136541e-06, + "loss": 58.0993, + "step": 42600 + }, + { + "epoch": 0.17214979173147704, + "grad_norm": 450.2313537597656, + "learning_rate": 8.289200480289426e-06, + "loss": 67.3204, + "step": 42610 + }, + { + "epoch": 0.17219019299684465, + "grad_norm": 498.5903015136719, + "learning_rate": 8.288138120735726e-06, + "loss": 28.4738, + "step": 42620 + }, + { + "epoch": 0.1722305942622123, + "grad_norm": 758.5321655273438, + "learning_rate": 8.287075499559965e-06, + "loss": 60.2802, + "step": 42630 + }, + { + "epoch": 0.17227099552757993, + "grad_norm": 351.36859130859375, + "learning_rate": 8.286012616846693e-06, + "loss": 52.4755, + "step": 42640 + }, + { + "epoch": 0.17231139679294755, + "grad_norm": 539.5321655273438, + "learning_rate": 8.284949472680477e-06, + "loss": 56.0628, + "step": 42650 + }, + { + "epoch": 0.1723517980583152, + "grad_norm": 2152.857177734375, + "learning_rate": 8.283886067145908e-06, + "loss": 56.3268, + "step": 42660 + }, + { + "epoch": 0.17239219932368283, + "grad_norm": 395.3343811035156, + "learning_rate": 8.282822400327595e-06, + "loss": 44.1626, + "step": 42670 + }, + { + "epoch": 0.17243260058905044, + "grad_norm": 594.5778198242188, + "learning_rate": 8.28175847231017e-06, + "loss": 51.8114, + "step": 42680 + }, + { + "epoch": 0.17247300185441808, + "grad_norm": 803.4881591796875, + "learning_rate": 8.280694283178285e-06, + "loss": 41.6613, + "step": 42690 + }, + { + "epoch": 0.17251340311978572, + "grad_norm": 1555.0865478515625, + "learning_rate": 8.27962983301661e-06, + "loss": 65.1994, + "step": 42700 + }, + { + "epoch": 0.17255380438515333, + "grad_norm": 695.3710327148438, + "learning_rate": 8.278565121909845e-06, + "loss": 66.7208, + "step": 42710 + }, + { + "epoch": 0.17259420565052097, + "grad_norm": 540.1356811523438, + "learning_rate": 8.277500149942697e-06, + "loss": 43.4715, + "step": 42720 + }, + { + "epoch": 0.1726346069158886, + "grad_norm": 683.9020385742188, + "learning_rate": 8.276434917199904e-06, + "loss": 37.1363, + "step": 42730 + }, + { + "epoch": 0.17267500818125622, + "grad_norm": 2300.76220703125, + "learning_rate": 8.275369423766222e-06, + "loss": 47.8163, + "step": 42740 + }, + { + "epoch": 0.17271540944662386, + "grad_norm": 752.5711059570312, + "learning_rate": 8.274303669726427e-06, + "loss": 39.8013, + "step": 42750 + }, + { + "epoch": 0.1727558107119915, + "grad_norm": 830.5774536132812, + "learning_rate": 8.273237655165314e-06, + "loss": 61.424, + "step": 42760 + }, + { + "epoch": 0.17279621197735914, + "grad_norm": 748.5219116210938, + "learning_rate": 8.272171380167705e-06, + "loss": 62.0189, + "step": 42770 + }, + { + "epoch": 0.17283661324272676, + "grad_norm": 423.2668762207031, + "learning_rate": 8.271104844818436e-06, + "loss": 46.4281, + "step": 42780 + }, + { + "epoch": 0.1728770145080944, + "grad_norm": 359.3676452636719, + "learning_rate": 8.270038049202366e-06, + "loss": 53.1651, + "step": 42790 + }, + { + "epoch": 0.17291741577346204, + "grad_norm": 264.7219543457031, + "learning_rate": 8.268970993404377e-06, + "loss": 46.8862, + "step": 42800 + }, + { + "epoch": 0.17295781703882965, + "grad_norm": 677.9569702148438, + "learning_rate": 8.267903677509368e-06, + "loss": 47.7671, + "step": 42810 + }, + { + "epoch": 0.1729982183041973, + "grad_norm": 898.0774536132812, + "learning_rate": 8.266836101602263e-06, + "loss": 52.6813, + "step": 42820 + }, + { + "epoch": 0.17303861956956493, + "grad_norm": 387.7664489746094, + "learning_rate": 8.265768265767999e-06, + "loss": 48.4607, + "step": 42830 + }, + { + "epoch": 0.17307902083493254, + "grad_norm": 914.6298828125, + "learning_rate": 8.264700170091543e-06, + "loss": 76.5356, + "step": 42840 + }, + { + "epoch": 0.17311942210030018, + "grad_norm": 419.8518981933594, + "learning_rate": 8.263631814657879e-06, + "loss": 33.402, + "step": 42850 + }, + { + "epoch": 0.17315982336566782, + "grad_norm": 523.0492553710938, + "learning_rate": 8.262563199552007e-06, + "loss": 41.1054, + "step": 42860 + }, + { + "epoch": 0.17320022463103543, + "grad_norm": 1040.5003662109375, + "learning_rate": 8.261494324858956e-06, + "loss": 54.1803, + "step": 42870 + }, + { + "epoch": 0.17324062589640307, + "grad_norm": 591.4171142578125, + "learning_rate": 8.26042519066377e-06, + "loss": 43.2964, + "step": 42880 + }, + { + "epoch": 0.1732810271617707, + "grad_norm": 359.90386962890625, + "learning_rate": 8.259355797051515e-06, + "loss": 28.9607, + "step": 42890 + }, + { + "epoch": 0.17332142842713832, + "grad_norm": 656.0526733398438, + "learning_rate": 8.258286144107277e-06, + "loss": 27.343, + "step": 42900 + }, + { + "epoch": 0.17336182969250596, + "grad_norm": 810.3823852539062, + "learning_rate": 8.257216231916162e-06, + "loss": 71.7967, + "step": 42910 + }, + { + "epoch": 0.1734022309578736, + "grad_norm": 662.1875610351562, + "learning_rate": 8.256146060563304e-06, + "loss": 46.7155, + "step": 42920 + }, + { + "epoch": 0.17344263222324124, + "grad_norm": 268.1656494140625, + "learning_rate": 8.255075630133847e-06, + "loss": 53.0825, + "step": 42930 + }, + { + "epoch": 0.17348303348860886, + "grad_norm": 458.50628662109375, + "learning_rate": 8.254004940712958e-06, + "loss": 52.152, + "step": 42940 + }, + { + "epoch": 0.1735234347539765, + "grad_norm": 639.825439453125, + "learning_rate": 8.252933992385833e-06, + "loss": 63.8295, + "step": 42950 + }, + { + "epoch": 0.17356383601934414, + "grad_norm": 506.6952209472656, + "learning_rate": 8.251862785237676e-06, + "loss": 54.3871, + "step": 42960 + }, + { + "epoch": 0.17360423728471175, + "grad_norm": 168.96426391601562, + "learning_rate": 8.250791319353723e-06, + "loss": 31.244, + "step": 42970 + }, + { + "epoch": 0.1736446385500794, + "grad_norm": 333.39715576171875, + "learning_rate": 8.249719594819225e-06, + "loss": 51.3396, + "step": 42980 + }, + { + "epoch": 0.17368503981544703, + "grad_norm": 512.88134765625, + "learning_rate": 8.248647611719452e-06, + "loss": 52.5945, + "step": 42990 + }, + { + "epoch": 0.17372544108081464, + "grad_norm": 451.7750549316406, + "learning_rate": 8.247575370139695e-06, + "loss": 36.7286, + "step": 43000 + }, + { + "epoch": 0.17376584234618228, + "grad_norm": 875.9497680664062, + "learning_rate": 8.246502870165273e-06, + "loss": 45.5384, + "step": 43010 + }, + { + "epoch": 0.17380624361154992, + "grad_norm": 1002.91455078125, + "learning_rate": 8.245430111881519e-06, + "loss": 47.9184, + "step": 43020 + }, + { + "epoch": 0.17384664487691753, + "grad_norm": 727.3613891601562, + "learning_rate": 8.244357095373783e-06, + "loss": 57.4964, + "step": 43030 + }, + { + "epoch": 0.17388704614228517, + "grad_norm": 462.3572082519531, + "learning_rate": 8.243283820727441e-06, + "loss": 37.5159, + "step": 43040 + }, + { + "epoch": 0.1739274474076528, + "grad_norm": 738.8742065429688, + "learning_rate": 8.242210288027893e-06, + "loss": 57.068, + "step": 43050 + }, + { + "epoch": 0.17396784867302043, + "grad_norm": 593.364990234375, + "learning_rate": 8.241136497360552e-06, + "loss": 61.0344, + "step": 43060 + }, + { + "epoch": 0.17400824993838807, + "grad_norm": 892.4779052734375, + "learning_rate": 8.240062448810853e-06, + "loss": 48.9156, + "step": 43070 + }, + { + "epoch": 0.1740486512037557, + "grad_norm": 783.9124755859375, + "learning_rate": 8.238988142464254e-06, + "loss": 53.5666, + "step": 43080 + }, + { + "epoch": 0.17408905246912335, + "grad_norm": 389.2962951660156, + "learning_rate": 8.237913578406236e-06, + "loss": 40.7518, + "step": 43090 + }, + { + "epoch": 0.17412945373449096, + "grad_norm": 536.7239990234375, + "learning_rate": 8.236838756722294e-06, + "loss": 53.6748, + "step": 43100 + }, + { + "epoch": 0.1741698549998586, + "grad_norm": 1080.3280029296875, + "learning_rate": 8.235763677497945e-06, + "loss": 151.0997, + "step": 43110 + }, + { + "epoch": 0.17421025626522624, + "grad_norm": 554.4981689453125, + "learning_rate": 8.234688340818732e-06, + "loss": 61.6212, + "step": 43120 + }, + { + "epoch": 0.17425065753059385, + "grad_norm": 828.6658935546875, + "learning_rate": 8.233612746770214e-06, + "loss": 55.1259, + "step": 43130 + }, + { + "epoch": 0.1742910587959615, + "grad_norm": 249.32948303222656, + "learning_rate": 8.232536895437968e-06, + "loss": 52.4912, + "step": 43140 + }, + { + "epoch": 0.17433146006132913, + "grad_norm": 770.260009765625, + "learning_rate": 8.231460786907597e-06, + "loss": 35.9362, + "step": 43150 + }, + { + "epoch": 0.17437186132669674, + "grad_norm": 637.1809692382812, + "learning_rate": 8.230384421264722e-06, + "loss": 55.1247, + "step": 43160 + }, + { + "epoch": 0.17441226259206438, + "grad_norm": 587.2365112304688, + "learning_rate": 8.229307798594985e-06, + "loss": 57.6803, + "step": 43170 + }, + { + "epoch": 0.17445266385743202, + "grad_norm": 431.0023498535156, + "learning_rate": 8.228230918984046e-06, + "loss": 40.9327, + "step": 43180 + }, + { + "epoch": 0.17449306512279963, + "grad_norm": 471.2975158691406, + "learning_rate": 8.22715378251759e-06, + "loss": 43.1537, + "step": 43190 + }, + { + "epoch": 0.17453346638816727, + "grad_norm": 738.6006469726562, + "learning_rate": 8.226076389281316e-06, + "loss": 68.729, + "step": 43200 + }, + { + "epoch": 0.17457386765353491, + "grad_norm": 496.95184326171875, + "learning_rate": 8.22499873936095e-06, + "loss": 46.558, + "step": 43210 + }, + { + "epoch": 0.17461426891890253, + "grad_norm": 473.88580322265625, + "learning_rate": 8.223920832842236e-06, + "loss": 139.2572, + "step": 43220 + }, + { + "epoch": 0.17465467018427017, + "grad_norm": 989.8582763671875, + "learning_rate": 8.222842669810936e-06, + "loss": 38.1966, + "step": 43230 + }, + { + "epoch": 0.1746950714496378, + "grad_norm": 934.4788208007812, + "learning_rate": 8.221764250352835e-06, + "loss": 48.2463, + "step": 43240 + }, + { + "epoch": 0.17473547271500545, + "grad_norm": 654.1959228515625, + "learning_rate": 8.220685574553739e-06, + "loss": 50.7117, + "step": 43250 + }, + { + "epoch": 0.17477587398037306, + "grad_norm": 736.326171875, + "learning_rate": 8.219606642499474e-06, + "loss": 47.1028, + "step": 43260 + }, + { + "epoch": 0.1748162752457407, + "grad_norm": 892.4651489257812, + "learning_rate": 8.218527454275884e-06, + "loss": 57.5517, + "step": 43270 + }, + { + "epoch": 0.17485667651110834, + "grad_norm": 749.7070922851562, + "learning_rate": 8.217448009968834e-06, + "loss": 63.03, + "step": 43280 + }, + { + "epoch": 0.17489707777647595, + "grad_norm": 809.4559326171875, + "learning_rate": 8.216368309664213e-06, + "loss": 46.0392, + "step": 43290 + }, + { + "epoch": 0.1749374790418436, + "grad_norm": 685.7556762695312, + "learning_rate": 8.215288353447927e-06, + "loss": 40.1804, + "step": 43300 + }, + { + "epoch": 0.17497788030721123, + "grad_norm": 751.0911865234375, + "learning_rate": 8.214208141405903e-06, + "loss": 42.3572, + "step": 43310 + }, + { + "epoch": 0.17501828157257884, + "grad_norm": 504.02471923828125, + "learning_rate": 8.213127673624088e-06, + "loss": 47.8609, + "step": 43320 + }, + { + "epoch": 0.17505868283794648, + "grad_norm": 544.6509399414062, + "learning_rate": 8.212046950188451e-06, + "loss": 60.7416, + "step": 43330 + }, + { + "epoch": 0.17509908410331412, + "grad_norm": 190.57473754882812, + "learning_rate": 8.21096597118498e-06, + "loss": 47.4481, + "step": 43340 + }, + { + "epoch": 0.17513948536868174, + "grad_norm": 336.1890869140625, + "learning_rate": 8.209884736699681e-06, + "loss": 43.6772, + "step": 43350 + }, + { + "epoch": 0.17517988663404938, + "grad_norm": 1088.29736328125, + "learning_rate": 8.208803246818586e-06, + "loss": 57.5273, + "step": 43360 + }, + { + "epoch": 0.17522028789941702, + "grad_norm": 1386.392578125, + "learning_rate": 8.207721501627743e-06, + "loss": 57.2031, + "step": 43370 + }, + { + "epoch": 0.17526068916478463, + "grad_norm": 499.1271667480469, + "learning_rate": 8.20663950121322e-06, + "loss": 42.4209, + "step": 43380 + }, + { + "epoch": 0.17530109043015227, + "grad_norm": 660.9666748046875, + "learning_rate": 8.20555724566111e-06, + "loss": 54.9783, + "step": 43390 + }, + { + "epoch": 0.1753414916955199, + "grad_norm": 267.87103271484375, + "learning_rate": 8.204474735057522e-06, + "loss": 52.6259, + "step": 43400 + }, + { + "epoch": 0.17538189296088755, + "grad_norm": 808.8639526367188, + "learning_rate": 8.203391969488586e-06, + "loss": 40.2631, + "step": 43410 + }, + { + "epoch": 0.17542229422625516, + "grad_norm": 353.7890625, + "learning_rate": 8.20230894904045e-06, + "loss": 37.8534, + "step": 43420 + }, + { + "epoch": 0.1754626954916228, + "grad_norm": 859.0033569335938, + "learning_rate": 8.20122567379929e-06, + "loss": 48.4326, + "step": 43430 + }, + { + "epoch": 0.17550309675699044, + "grad_norm": 704.4515991210938, + "learning_rate": 8.200142143851295e-06, + "loss": 46.7167, + "step": 43440 + }, + { + "epoch": 0.17554349802235805, + "grad_norm": 651.6087646484375, + "learning_rate": 8.199058359282675e-06, + "loss": 52.8648, + "step": 43450 + }, + { + "epoch": 0.1755838992877257, + "grad_norm": 514.385498046875, + "learning_rate": 8.197974320179664e-06, + "loss": 49.6813, + "step": 43460 + }, + { + "epoch": 0.17562430055309333, + "grad_norm": 477.1377258300781, + "learning_rate": 8.19689002662851e-06, + "loss": 58.1234, + "step": 43470 + }, + { + "epoch": 0.17566470181846094, + "grad_norm": 570.377685546875, + "learning_rate": 8.195805478715492e-06, + "loss": 33.3943, + "step": 43480 + }, + { + "epoch": 0.17570510308382858, + "grad_norm": 436.5005798339844, + "learning_rate": 8.194720676526898e-06, + "loss": 42.9418, + "step": 43490 + }, + { + "epoch": 0.17574550434919622, + "grad_norm": 737.1203002929688, + "learning_rate": 8.193635620149041e-06, + "loss": 45.1717, + "step": 43500 + }, + { + "epoch": 0.17578590561456384, + "grad_norm": 1095.8653564453125, + "learning_rate": 8.192550309668254e-06, + "loss": 53.5207, + "step": 43510 + }, + { + "epoch": 0.17582630687993148, + "grad_norm": 298.76434326171875, + "learning_rate": 8.191464745170892e-06, + "loss": 33.359, + "step": 43520 + }, + { + "epoch": 0.17586670814529912, + "grad_norm": 531.721435546875, + "learning_rate": 8.190378926743327e-06, + "loss": 42.1844, + "step": 43530 + }, + { + "epoch": 0.17590710941066673, + "grad_norm": 881.6190185546875, + "learning_rate": 8.189292854471953e-06, + "loss": 49.1135, + "step": 43540 + }, + { + "epoch": 0.17594751067603437, + "grad_norm": 239.04486083984375, + "learning_rate": 8.188206528443182e-06, + "loss": 34.2073, + "step": 43550 + }, + { + "epoch": 0.175987911941402, + "grad_norm": 390.4149169921875, + "learning_rate": 8.18711994874345e-06, + "loss": 59.7111, + "step": 43560 + }, + { + "epoch": 0.17602831320676965, + "grad_norm": 306.6148986816406, + "learning_rate": 8.186033115459211e-06, + "loss": 41.3283, + "step": 43570 + }, + { + "epoch": 0.17606871447213726, + "grad_norm": 480.1251220703125, + "learning_rate": 8.184946028676937e-06, + "loss": 85.4738, + "step": 43580 + }, + { + "epoch": 0.1761091157375049, + "grad_norm": 458.3759765625, + "learning_rate": 8.183858688483126e-06, + "loss": 36.9035, + "step": 43590 + }, + { + "epoch": 0.17614951700287254, + "grad_norm": 878.4601440429688, + "learning_rate": 8.182771094964292e-06, + "loss": 53.6184, + "step": 43600 + }, + { + "epoch": 0.17618991826824015, + "grad_norm": 918.716796875, + "learning_rate": 8.181683248206968e-06, + "loss": 46.9538, + "step": 43610 + }, + { + "epoch": 0.1762303195336078, + "grad_norm": 734.4132690429688, + "learning_rate": 8.180595148297709e-06, + "loss": 51.9942, + "step": 43620 + }, + { + "epoch": 0.17627072079897543, + "grad_norm": 648.378662109375, + "learning_rate": 8.179506795323092e-06, + "loss": 45.4542, + "step": 43630 + }, + { + "epoch": 0.17631112206434305, + "grad_norm": 629.5515747070312, + "learning_rate": 8.17841818936971e-06, + "loss": 58.7435, + "step": 43640 + }, + { + "epoch": 0.17635152332971069, + "grad_norm": 1205.067626953125, + "learning_rate": 8.177329330524182e-06, + "loss": 41.3752, + "step": 43650 + }, + { + "epoch": 0.17639192459507833, + "grad_norm": 569.39404296875, + "learning_rate": 8.17624021887314e-06, + "loss": 39.0071, + "step": 43660 + }, + { + "epoch": 0.17643232586044594, + "grad_norm": 829.2988891601562, + "learning_rate": 8.17515085450324e-06, + "loss": 44.5112, + "step": 43670 + }, + { + "epoch": 0.17647272712581358, + "grad_norm": 292.11273193359375, + "learning_rate": 8.174061237501159e-06, + "loss": 41.0113, + "step": 43680 + }, + { + "epoch": 0.17651312839118122, + "grad_norm": 632.9418334960938, + "learning_rate": 8.172971367953593e-06, + "loss": 38.4412, + "step": 43690 + }, + { + "epoch": 0.17655352965654883, + "grad_norm": 539.7322998046875, + "learning_rate": 8.171881245947257e-06, + "loss": 33.8846, + "step": 43700 + }, + { + "epoch": 0.17659393092191647, + "grad_norm": 755.9725341796875, + "learning_rate": 8.170790871568887e-06, + "loss": 53.129, + "step": 43710 + }, + { + "epoch": 0.1766343321872841, + "grad_norm": 616.8136596679688, + "learning_rate": 8.169700244905239e-06, + "loss": 47.7439, + "step": 43720 + }, + { + "epoch": 0.17667473345265175, + "grad_norm": 508.9373474121094, + "learning_rate": 8.168609366043089e-06, + "loss": 42.9057, + "step": 43730 + }, + { + "epoch": 0.17671513471801936, + "grad_norm": 416.2492980957031, + "learning_rate": 8.167518235069234e-06, + "loss": 42.0221, + "step": 43740 + }, + { + "epoch": 0.176755535983387, + "grad_norm": 648.8447265625, + "learning_rate": 8.16642685207049e-06, + "loss": 71.3161, + "step": 43750 + }, + { + "epoch": 0.17679593724875464, + "grad_norm": 587.8074951171875, + "learning_rate": 8.165335217133695e-06, + "loss": 48.2666, + "step": 43760 + }, + { + "epoch": 0.17683633851412225, + "grad_norm": 752.8903198242188, + "learning_rate": 8.164243330345702e-06, + "loss": 44.9774, + "step": 43770 + }, + { + "epoch": 0.1768767397794899, + "grad_norm": 984.27734375, + "learning_rate": 8.16315119179339e-06, + "loss": 44.6318, + "step": 43780 + }, + { + "epoch": 0.17691714104485753, + "grad_norm": 620.818359375, + "learning_rate": 8.162058801563652e-06, + "loss": 49.2427, + "step": 43790 + }, + { + "epoch": 0.17695754231022515, + "grad_norm": 289.87615966796875, + "learning_rate": 8.160966159743411e-06, + "loss": 43.1271, + "step": 43800 + }, + { + "epoch": 0.1769979435755928, + "grad_norm": 828.4949951171875, + "learning_rate": 8.159873266419598e-06, + "loss": 68.2872, + "step": 43810 + }, + { + "epoch": 0.17703834484096043, + "grad_norm": 859.2119750976562, + "learning_rate": 8.15878012167917e-06, + "loss": 36.2162, + "step": 43820 + }, + { + "epoch": 0.17707874610632804, + "grad_norm": 583.408935546875, + "learning_rate": 8.157686725609105e-06, + "loss": 40.5383, + "step": 43830 + }, + { + "epoch": 0.17711914737169568, + "grad_norm": 480.7803649902344, + "learning_rate": 8.1565930782964e-06, + "loss": 39.6614, + "step": 43840 + }, + { + "epoch": 0.17715954863706332, + "grad_norm": 730.824462890625, + "learning_rate": 8.155499179828068e-06, + "loss": 47.8291, + "step": 43850 + }, + { + "epoch": 0.17719994990243093, + "grad_norm": 521.2890014648438, + "learning_rate": 8.15440503029115e-06, + "loss": 44.0991, + "step": 43860 + }, + { + "epoch": 0.17724035116779857, + "grad_norm": 1121.678466796875, + "learning_rate": 8.153310629772702e-06, + "loss": 60.6592, + "step": 43870 + }, + { + "epoch": 0.1772807524331662, + "grad_norm": 459.24322509765625, + "learning_rate": 8.152215978359796e-06, + "loss": 62.694, + "step": 43880 + }, + { + "epoch": 0.17732115369853385, + "grad_norm": 771.0817260742188, + "learning_rate": 8.151121076139534e-06, + "loss": 49.6407, + "step": 43890 + }, + { + "epoch": 0.17736155496390146, + "grad_norm": 378.06890869140625, + "learning_rate": 8.150025923199027e-06, + "loss": 45.2549, + "step": 43900 + }, + { + "epoch": 0.1774019562292691, + "grad_norm": 655.2774047851562, + "learning_rate": 8.148930519625417e-06, + "loss": 33.8218, + "step": 43910 + }, + { + "epoch": 0.17744235749463674, + "grad_norm": 415.8019104003906, + "learning_rate": 8.147834865505855e-06, + "loss": 42.6838, + "step": 43920 + }, + { + "epoch": 0.17748275876000436, + "grad_norm": 436.5208740234375, + "learning_rate": 8.14673896092752e-06, + "loss": 42.1472, + "step": 43930 + }, + { + "epoch": 0.177523160025372, + "grad_norm": 785.6007080078125, + "learning_rate": 8.145642805977608e-06, + "loss": 50.6509, + "step": 43940 + }, + { + "epoch": 0.17756356129073964, + "grad_norm": 644.1143798828125, + "learning_rate": 8.144546400743334e-06, + "loss": 40.0659, + "step": 43950 + }, + { + "epoch": 0.17760396255610725, + "grad_norm": 288.6166687011719, + "learning_rate": 8.143449745311934e-06, + "loss": 65.2577, + "step": 43960 + }, + { + "epoch": 0.1776443638214749, + "grad_norm": 741.4886474609375, + "learning_rate": 8.142352839770663e-06, + "loss": 45.3209, + "step": 43970 + }, + { + "epoch": 0.17768476508684253, + "grad_norm": 541.5047607421875, + "learning_rate": 8.1412556842068e-06, + "loss": 44.5242, + "step": 43980 + }, + { + "epoch": 0.17772516635221014, + "grad_norm": 752.431640625, + "learning_rate": 8.140158278707637e-06, + "loss": 46.5009, + "step": 43990 + }, + { + "epoch": 0.17776556761757778, + "grad_norm": 816.7749633789062, + "learning_rate": 8.139060623360494e-06, + "loss": 48.2635, + "step": 44000 + }, + { + "epoch": 0.17780596888294542, + "grad_norm": 699.2485961914062, + "learning_rate": 8.1379627182527e-06, + "loss": 40.0845, + "step": 44010 + }, + { + "epoch": 0.17784637014831303, + "grad_norm": 676.007080078125, + "learning_rate": 8.136864563471617e-06, + "loss": 80.3815, + "step": 44020 + }, + { + "epoch": 0.17788677141368067, + "grad_norm": 200.27894592285156, + "learning_rate": 8.135766159104615e-06, + "loss": 48.3457, + "step": 44030 + }, + { + "epoch": 0.1779271726790483, + "grad_norm": 359.0489807128906, + "learning_rate": 8.134667505239092e-06, + "loss": 66.8701, + "step": 44040 + }, + { + "epoch": 0.17796757394441595, + "grad_norm": 601.3418579101562, + "learning_rate": 8.133568601962462e-06, + "loss": 22.5121, + "step": 44050 + }, + { + "epoch": 0.17800797520978356, + "grad_norm": 754.0125732421875, + "learning_rate": 8.132469449362158e-06, + "loss": 63.1259, + "step": 44060 + }, + { + "epoch": 0.1780483764751512, + "grad_norm": 632.214111328125, + "learning_rate": 8.131370047525637e-06, + "loss": 41.4233, + "step": 44070 + }, + { + "epoch": 0.17808877774051884, + "grad_norm": 370.5418701171875, + "learning_rate": 8.130270396540372e-06, + "loss": 58.6016, + "step": 44080 + }, + { + "epoch": 0.17812917900588646, + "grad_norm": 385.2149963378906, + "learning_rate": 8.129170496493857e-06, + "loss": 41.7259, + "step": 44090 + }, + { + "epoch": 0.1781695802712541, + "grad_norm": 492.43988037109375, + "learning_rate": 8.128070347473609e-06, + "loss": 140.3884, + "step": 44100 + }, + { + "epoch": 0.17820998153662174, + "grad_norm": 308.92840576171875, + "learning_rate": 8.126969949567157e-06, + "loss": 44.9308, + "step": 44110 + }, + { + "epoch": 0.17825038280198935, + "grad_norm": 822.5704345703125, + "learning_rate": 8.125869302862058e-06, + "loss": 40.5698, + "step": 44120 + }, + { + "epoch": 0.178290784067357, + "grad_norm": 576.1128540039062, + "learning_rate": 8.124768407445883e-06, + "loss": 65.7646, + "step": 44130 + }, + { + "epoch": 0.17833118533272463, + "grad_norm": 445.1959228515625, + "learning_rate": 8.123667263406228e-06, + "loss": 47.3017, + "step": 44140 + }, + { + "epoch": 0.17837158659809224, + "grad_norm": 853.5143432617188, + "learning_rate": 8.122565870830704e-06, + "loss": 48.5397, + "step": 44150 + }, + { + "epoch": 0.17841198786345988, + "grad_norm": 609.802734375, + "learning_rate": 8.121464229806944e-06, + "loss": 35.6042, + "step": 44160 + }, + { + "epoch": 0.17845238912882752, + "grad_norm": 701.1467895507812, + "learning_rate": 8.120362340422601e-06, + "loss": 45.621, + "step": 44170 + }, + { + "epoch": 0.17849279039419513, + "grad_norm": 717.2283325195312, + "learning_rate": 8.119260202765347e-06, + "loss": 39.1522, + "step": 44180 + }, + { + "epoch": 0.17853319165956277, + "grad_norm": 816.3579711914062, + "learning_rate": 8.118157816922874e-06, + "loss": 53.5569, + "step": 44190 + }, + { + "epoch": 0.1785735929249304, + "grad_norm": 711.2218627929688, + "learning_rate": 8.117055182982895e-06, + "loss": 35.55, + "step": 44200 + }, + { + "epoch": 0.17861399419029805, + "grad_norm": 971.1217651367188, + "learning_rate": 8.115952301033141e-06, + "loss": 62.0396, + "step": 44210 + }, + { + "epoch": 0.17865439545566567, + "grad_norm": 474.5082092285156, + "learning_rate": 8.11484917116136e-06, + "loss": 61.6075, + "step": 44220 + }, + { + "epoch": 0.1786947967210333, + "grad_norm": 424.6689453125, + "learning_rate": 8.113745793455328e-06, + "loss": 41.4968, + "step": 44230 + }, + { + "epoch": 0.17873519798640095, + "grad_norm": 1078.0762939453125, + "learning_rate": 8.112642168002831e-06, + "loss": 39.6164, + "step": 44240 + }, + { + "epoch": 0.17877559925176856, + "grad_norm": 1485.0335693359375, + "learning_rate": 8.111538294891684e-06, + "loss": 43.4463, + "step": 44250 + }, + { + "epoch": 0.1788160005171362, + "grad_norm": 733.3353881835938, + "learning_rate": 8.110434174209714e-06, + "loss": 58.078, + "step": 44260 + }, + { + "epoch": 0.17885640178250384, + "grad_norm": 1062.302001953125, + "learning_rate": 8.109329806044772e-06, + "loss": 78.6044, + "step": 44270 + }, + { + "epoch": 0.17889680304787145, + "grad_norm": 706.5615844726562, + "learning_rate": 8.108225190484728e-06, + "loss": 49.5474, + "step": 44280 + }, + { + "epoch": 0.1789372043132391, + "grad_norm": 834.2645263671875, + "learning_rate": 8.107120327617469e-06, + "loss": 33.1368, + "step": 44290 + }, + { + "epoch": 0.17897760557860673, + "grad_norm": 1417.78173828125, + "learning_rate": 8.106015217530906e-06, + "loss": 57.1304, + "step": 44300 + }, + { + "epoch": 0.17901800684397434, + "grad_norm": 302.16729736328125, + "learning_rate": 8.104909860312968e-06, + "loss": 39.4532, + "step": 44310 + }, + { + "epoch": 0.17905840810934198, + "grad_norm": 737.5609130859375, + "learning_rate": 8.1038042560516e-06, + "loss": 42.557, + "step": 44320 + }, + { + "epoch": 0.17909880937470962, + "grad_norm": 407.8067626953125, + "learning_rate": 8.102698404834773e-06, + "loss": 48.2338, + "step": 44330 + }, + { + "epoch": 0.17913921064007723, + "grad_norm": 584.7301025390625, + "learning_rate": 8.101592306750472e-06, + "loss": 50.2676, + "step": 44340 + }, + { + "epoch": 0.17917961190544487, + "grad_norm": 549.4971923828125, + "learning_rate": 8.100485961886707e-06, + "loss": 53.5906, + "step": 44350 + }, + { + "epoch": 0.17922001317081251, + "grad_norm": 341.3735656738281, + "learning_rate": 8.099379370331502e-06, + "loss": 41.9313, + "step": 44360 + }, + { + "epoch": 0.17926041443618015, + "grad_norm": 894.8284912109375, + "learning_rate": 8.098272532172906e-06, + "loss": 38.5997, + "step": 44370 + }, + { + "epoch": 0.17930081570154777, + "grad_norm": 509.7801513671875, + "learning_rate": 8.097165447498985e-06, + "loss": 53.9619, + "step": 44380 + }, + { + "epoch": 0.1793412169669154, + "grad_norm": 643.4464111328125, + "learning_rate": 8.09605811639782e-06, + "loss": 39.9498, + "step": 44390 + }, + { + "epoch": 0.17938161823228305, + "grad_norm": 721.2262573242188, + "learning_rate": 8.094950538957523e-06, + "loss": 33.5365, + "step": 44400 + }, + { + "epoch": 0.17942201949765066, + "grad_norm": 410.3100891113281, + "learning_rate": 8.093842715266214e-06, + "loss": 54.1215, + "step": 44410 + }, + { + "epoch": 0.1794624207630183, + "grad_norm": 634.4172973632812, + "learning_rate": 8.092734645412037e-06, + "loss": 61.592, + "step": 44420 + }, + { + "epoch": 0.17950282202838594, + "grad_norm": 538.3599243164062, + "learning_rate": 8.09162632948316e-06, + "loss": 39.2725, + "step": 44430 + }, + { + "epoch": 0.17954322329375355, + "grad_norm": 1388.5975341796875, + "learning_rate": 8.090517767567765e-06, + "loss": 61.4013, + "step": 44440 + }, + { + "epoch": 0.1795836245591212, + "grad_norm": 555.4326782226562, + "learning_rate": 8.089408959754055e-06, + "loss": 42.8655, + "step": 44450 + }, + { + "epoch": 0.17962402582448883, + "grad_norm": 727.263427734375, + "learning_rate": 8.088299906130252e-06, + "loss": 43.4522, + "step": 44460 + }, + { + "epoch": 0.17966442708985644, + "grad_norm": 462.22186279296875, + "learning_rate": 8.087190606784598e-06, + "loss": 48.6978, + "step": 44470 + }, + { + "epoch": 0.17970482835522408, + "grad_norm": 723.4546508789062, + "learning_rate": 8.086081061805357e-06, + "loss": 44.1117, + "step": 44480 + }, + { + "epoch": 0.17974522962059172, + "grad_norm": 670.451171875, + "learning_rate": 8.084971271280808e-06, + "loss": 67.2479, + "step": 44490 + }, + { + "epoch": 0.17978563088595934, + "grad_norm": 477.13946533203125, + "learning_rate": 8.083861235299253e-06, + "loss": 36.4865, + "step": 44500 + }, + { + "epoch": 0.17982603215132698, + "grad_norm": 683.2493286132812, + "learning_rate": 8.082750953949015e-06, + "loss": 52.2884, + "step": 44510 + }, + { + "epoch": 0.17986643341669462, + "grad_norm": 832.3070068359375, + "learning_rate": 8.081640427318429e-06, + "loss": 59.4732, + "step": 44520 + }, + { + "epoch": 0.17990683468206226, + "grad_norm": 610.156494140625, + "learning_rate": 8.080529655495856e-06, + "loss": 37.1259, + "step": 44530 + }, + { + "epoch": 0.17994723594742987, + "grad_norm": 602.242431640625, + "learning_rate": 8.079418638569679e-06, + "loss": 54.5028, + "step": 44540 + }, + { + "epoch": 0.1799876372127975, + "grad_norm": 884.65869140625, + "learning_rate": 8.078307376628292e-06, + "loss": 28.0929, + "step": 44550 + }, + { + "epoch": 0.18002803847816515, + "grad_norm": 436.0967102050781, + "learning_rate": 8.077195869760114e-06, + "loss": 45.9886, + "step": 44560 + }, + { + "epoch": 0.18006843974353276, + "grad_norm": 1172.73974609375, + "learning_rate": 8.076084118053584e-06, + "loss": 52.4016, + "step": 44570 + }, + { + "epoch": 0.1801088410089004, + "grad_norm": 488.72637939453125, + "learning_rate": 8.074972121597158e-06, + "loss": 56.7316, + "step": 44580 + }, + { + "epoch": 0.18014924227426804, + "grad_norm": 632.3367919921875, + "learning_rate": 8.073859880479314e-06, + "loss": 49.8892, + "step": 44590 + }, + { + "epoch": 0.18018964353963565, + "grad_norm": 302.1227722167969, + "learning_rate": 8.072747394788545e-06, + "loss": 34.2227, + "step": 44600 + }, + { + "epoch": 0.1802300448050033, + "grad_norm": 776.5565185546875, + "learning_rate": 8.071634664613367e-06, + "loss": 52.676, + "step": 44610 + }, + { + "epoch": 0.18027044607037093, + "grad_norm": 733.58251953125, + "learning_rate": 8.070521690042317e-06, + "loss": 60.9468, + "step": 44620 + }, + { + "epoch": 0.18031084733573854, + "grad_norm": 1145.7186279296875, + "learning_rate": 8.069408471163947e-06, + "loss": 65.979, + "step": 44630 + }, + { + "epoch": 0.18035124860110618, + "grad_norm": 509.70428466796875, + "learning_rate": 8.068295008066832e-06, + "loss": 40.7422, + "step": 44640 + }, + { + "epoch": 0.18039164986647382, + "grad_norm": 576.9971313476562, + "learning_rate": 8.067181300839565e-06, + "loss": 32.6827, + "step": 44650 + }, + { + "epoch": 0.18043205113184144, + "grad_norm": 710.1195678710938, + "learning_rate": 8.066067349570757e-06, + "loss": 58.6771, + "step": 44660 + }, + { + "epoch": 0.18047245239720908, + "grad_norm": 458.78106689453125, + "learning_rate": 8.064953154349042e-06, + "loss": 31.6616, + "step": 44670 + }, + { + "epoch": 0.18051285366257672, + "grad_norm": 411.84686279296875, + "learning_rate": 8.063838715263072e-06, + "loss": 33.9766, + "step": 44680 + }, + { + "epoch": 0.18055325492794436, + "grad_norm": 1309.62255859375, + "learning_rate": 8.062724032401515e-06, + "loss": 51.9341, + "step": 44690 + }, + { + "epoch": 0.18059365619331197, + "grad_norm": 537.8983154296875, + "learning_rate": 8.061609105853062e-06, + "loss": 46.2525, + "step": 44700 + }, + { + "epoch": 0.1806340574586796, + "grad_norm": 1111.2152099609375, + "learning_rate": 8.060493935706425e-06, + "loss": 55.7999, + "step": 44710 + }, + { + "epoch": 0.18067445872404725, + "grad_norm": 578.8212890625, + "learning_rate": 8.059378522050332e-06, + "loss": 64.9187, + "step": 44720 + }, + { + "epoch": 0.18071485998941486, + "grad_norm": 549.1094360351562, + "learning_rate": 8.05826286497353e-06, + "loss": 41.3403, + "step": 44730 + }, + { + "epoch": 0.1807552612547825, + "grad_norm": 641.1884765625, + "learning_rate": 8.057146964564786e-06, + "loss": 52.3945, + "step": 44740 + }, + { + "epoch": 0.18079566252015014, + "grad_norm": 886.13330078125, + "learning_rate": 8.05603082091289e-06, + "loss": 42.8315, + "step": 44750 + }, + { + "epoch": 0.18083606378551775, + "grad_norm": 639.1670532226562, + "learning_rate": 8.054914434106647e-06, + "loss": 56.5641, + "step": 44760 + }, + { + "epoch": 0.1808764650508854, + "grad_norm": 548.1455078125, + "learning_rate": 8.053797804234882e-06, + "loss": 52.2677, + "step": 44770 + }, + { + "epoch": 0.18091686631625303, + "grad_norm": 410.5792541503906, + "learning_rate": 8.052680931386441e-06, + "loss": 30.4637, + "step": 44780 + }, + { + "epoch": 0.18095726758162065, + "grad_norm": 578.6219482421875, + "learning_rate": 8.051563815650187e-06, + "loss": 35.4263, + "step": 44790 + }, + { + "epoch": 0.18099766884698829, + "grad_norm": 409.59832763671875, + "learning_rate": 8.050446457115005e-06, + "loss": 56.8702, + "step": 44800 + }, + { + "epoch": 0.18103807011235593, + "grad_norm": 621.2161865234375, + "learning_rate": 8.0493288558698e-06, + "loss": 45.1692, + "step": 44810 + }, + { + "epoch": 0.18107847137772354, + "grad_norm": 284.7547912597656, + "learning_rate": 8.04821101200349e-06, + "loss": 32.8964, + "step": 44820 + }, + { + "epoch": 0.18111887264309118, + "grad_norm": 200.6179962158203, + "learning_rate": 8.047092925605022e-06, + "loss": 51.5494, + "step": 44830 + }, + { + "epoch": 0.18115927390845882, + "grad_norm": 628.4204711914062, + "learning_rate": 8.045974596763352e-06, + "loss": 34.833, + "step": 44840 + }, + { + "epoch": 0.18119967517382646, + "grad_norm": 1149.173583984375, + "learning_rate": 8.044856025567464e-06, + "loss": 48.7898, + "step": 44850 + }, + { + "epoch": 0.18124007643919407, + "grad_norm": 605.9561157226562, + "learning_rate": 8.043737212106356e-06, + "loss": 46.0257, + "step": 44860 + }, + { + "epoch": 0.1812804777045617, + "grad_norm": 735.3001708984375, + "learning_rate": 8.042618156469045e-06, + "loss": 52.739, + "step": 44870 + }, + { + "epoch": 0.18132087896992935, + "grad_norm": 996.4307250976562, + "learning_rate": 8.041498858744572e-06, + "loss": 61.982, + "step": 44880 + }, + { + "epoch": 0.18136128023529696, + "grad_norm": 677.4853515625, + "learning_rate": 8.040379319021994e-06, + "loss": 51.9419, + "step": 44890 + }, + { + "epoch": 0.1814016815006646, + "grad_norm": 691.8176879882812, + "learning_rate": 8.039259537390388e-06, + "loss": 47.6419, + "step": 44900 + }, + { + "epoch": 0.18144208276603224, + "grad_norm": 1121.185302734375, + "learning_rate": 8.038139513938847e-06, + "loss": 31.3486, + "step": 44910 + }, + { + "epoch": 0.18148248403139985, + "grad_norm": 721.5740966796875, + "learning_rate": 8.037019248756488e-06, + "loss": 39.1443, + "step": 44920 + }, + { + "epoch": 0.1815228852967675, + "grad_norm": 791.95654296875, + "learning_rate": 8.035898741932447e-06, + "loss": 41.8861, + "step": 44930 + }, + { + "epoch": 0.18156328656213513, + "grad_norm": 499.211669921875, + "learning_rate": 8.034777993555875e-06, + "loss": 49.4517, + "step": 44940 + }, + { + "epoch": 0.18160368782750275, + "grad_norm": 342.6352844238281, + "learning_rate": 8.033657003715945e-06, + "loss": 26.7375, + "step": 44950 + }, + { + "epoch": 0.1816440890928704, + "grad_norm": 562.2408447265625, + "learning_rate": 8.032535772501851e-06, + "loss": 44.3456, + "step": 44960 + }, + { + "epoch": 0.18168449035823803, + "grad_norm": 707.8630981445312, + "learning_rate": 8.031414300002802e-06, + "loss": 37.7815, + "step": 44970 + }, + { + "epoch": 0.18172489162360564, + "grad_norm": 434.7294921875, + "learning_rate": 8.03029258630803e-06, + "loss": 38.9817, + "step": 44980 + }, + { + "epoch": 0.18176529288897328, + "grad_norm": 336.34130859375, + "learning_rate": 8.029170631506785e-06, + "loss": 37.1305, + "step": 44990 + }, + { + "epoch": 0.18180569415434092, + "grad_norm": 495.4858093261719, + "learning_rate": 8.028048435688333e-06, + "loss": 50.3649, + "step": 45000 + }, + { + "epoch": 0.18184609541970856, + "grad_norm": 708.4286499023438, + "learning_rate": 8.026925998941965e-06, + "loss": 54.8248, + "step": 45010 + }, + { + "epoch": 0.18188649668507617, + "grad_norm": 1438.5784912109375, + "learning_rate": 8.025803321356989e-06, + "loss": 61.7399, + "step": 45020 + }, + { + "epoch": 0.1819268979504438, + "grad_norm": 389.3792724609375, + "learning_rate": 8.024680403022726e-06, + "loss": 47.3041, + "step": 45030 + }, + { + "epoch": 0.18196729921581145, + "grad_norm": 544.342041015625, + "learning_rate": 8.023557244028526e-06, + "loss": 36.6487, + "step": 45040 + }, + { + "epoch": 0.18200770048117906, + "grad_norm": 765.8178100585938, + "learning_rate": 8.022433844463752e-06, + "loss": 46.9147, + "step": 45050 + }, + { + "epoch": 0.1820481017465467, + "grad_norm": 607.2188110351562, + "learning_rate": 8.02131020441779e-06, + "loss": 48.6501, + "step": 45060 + }, + { + "epoch": 0.18208850301191434, + "grad_norm": 337.20623779296875, + "learning_rate": 8.02018632398004e-06, + "loss": 74.1569, + "step": 45070 + }, + { + "epoch": 0.18212890427728196, + "grad_norm": 698.919677734375, + "learning_rate": 8.019062203239923e-06, + "loss": 51.9308, + "step": 45080 + }, + { + "epoch": 0.1821693055426496, + "grad_norm": 455.0402526855469, + "learning_rate": 8.017937842286882e-06, + "loss": 46.6393, + "step": 45090 + }, + { + "epoch": 0.18220970680801724, + "grad_norm": 697.3123779296875, + "learning_rate": 8.01681324121038e-06, + "loss": 57.6411, + "step": 45100 + }, + { + "epoch": 0.18225010807338485, + "grad_norm": 883.2453002929688, + "learning_rate": 8.015688400099893e-06, + "loss": 51.9202, + "step": 45110 + }, + { + "epoch": 0.1822905093387525, + "grad_norm": 489.3941345214844, + "learning_rate": 8.014563319044919e-06, + "loss": 64.1878, + "step": 45120 + }, + { + "epoch": 0.18233091060412013, + "grad_norm": 485.7003479003906, + "learning_rate": 8.013437998134978e-06, + "loss": 38.5387, + "step": 45130 + }, + { + "epoch": 0.18237131186948774, + "grad_norm": 423.7239074707031, + "learning_rate": 8.012312437459604e-06, + "loss": 28.8249, + "step": 45140 + }, + { + "epoch": 0.18241171313485538, + "grad_norm": 503.5894775390625, + "learning_rate": 8.011186637108354e-06, + "loss": 46.0242, + "step": 45150 + }, + { + "epoch": 0.18245211440022302, + "grad_norm": 731.257080078125, + "learning_rate": 8.010060597170805e-06, + "loss": 58.772, + "step": 45160 + }, + { + "epoch": 0.18249251566559066, + "grad_norm": 326.68988037109375, + "learning_rate": 8.008934317736546e-06, + "loss": 34.3258, + "step": 45170 + }, + { + "epoch": 0.18253291693095827, + "grad_norm": 654.277099609375, + "learning_rate": 8.007807798895195e-06, + "loss": 44.5783, + "step": 45180 + }, + { + "epoch": 0.1825733181963259, + "grad_norm": 456.9231872558594, + "learning_rate": 8.00668104073638e-06, + "loss": 48.7379, + "step": 45190 + }, + { + "epoch": 0.18261371946169355, + "grad_norm": 581.152099609375, + "learning_rate": 8.005554043349753e-06, + "loss": 69.1283, + "step": 45200 + }, + { + "epoch": 0.18265412072706116, + "grad_norm": 472.1055908203125, + "learning_rate": 8.004426806824985e-06, + "loss": 40.6944, + "step": 45210 + }, + { + "epoch": 0.1826945219924288, + "grad_norm": 305.08331298828125, + "learning_rate": 8.003299331251764e-06, + "loss": 26.3944, + "step": 45220 + }, + { + "epoch": 0.18273492325779644, + "grad_norm": 631.8370971679688, + "learning_rate": 8.002171616719798e-06, + "loss": 45.73, + "step": 45230 + }, + { + "epoch": 0.18277532452316406, + "grad_norm": 581.5281372070312, + "learning_rate": 8.001043663318815e-06, + "loss": 44.2141, + "step": 45240 + }, + { + "epoch": 0.1828157257885317, + "grad_norm": 546.5279541015625, + "learning_rate": 7.999915471138562e-06, + "loss": 32.7438, + "step": 45250 + }, + { + "epoch": 0.18285612705389934, + "grad_norm": 790.43701171875, + "learning_rate": 7.9987870402688e-06, + "loss": 39.4014, + "step": 45260 + }, + { + "epoch": 0.18289652831926695, + "grad_norm": 1440.0347900390625, + "learning_rate": 7.997658370799318e-06, + "loss": 59.1243, + "step": 45270 + }, + { + "epoch": 0.1829369295846346, + "grad_norm": 826.537841796875, + "learning_rate": 7.996529462819915e-06, + "loss": 52.4962, + "step": 45280 + }, + { + "epoch": 0.18297733085000223, + "grad_norm": 734.6253662109375, + "learning_rate": 7.995400316420416e-06, + "loss": 38.2799, + "step": 45290 + }, + { + "epoch": 0.18301773211536984, + "grad_norm": 674.3758544921875, + "learning_rate": 7.994270931690662e-06, + "loss": 47.8541, + "step": 45300 + }, + { + "epoch": 0.18305813338073748, + "grad_norm": 521.4122314453125, + "learning_rate": 7.993141308720511e-06, + "loss": 36.1015, + "step": 45310 + }, + { + "epoch": 0.18309853464610512, + "grad_norm": 738.7835083007812, + "learning_rate": 7.99201144759984e-06, + "loss": 44.7773, + "step": 45320 + }, + { + "epoch": 0.18313893591147276, + "grad_norm": 1291.3709716796875, + "learning_rate": 7.990881348418554e-06, + "loss": 53.2255, + "step": 45330 + }, + { + "epoch": 0.18317933717684037, + "grad_norm": 493.1036071777344, + "learning_rate": 7.989751011266565e-06, + "loss": 42.3629, + "step": 45340 + }, + { + "epoch": 0.183219738442208, + "grad_norm": 657.96826171875, + "learning_rate": 7.988620436233806e-06, + "loss": 50.5465, + "step": 45350 + }, + { + "epoch": 0.18326013970757565, + "grad_norm": 1039.5506591796875, + "learning_rate": 7.987489623410236e-06, + "loss": 66.5563, + "step": 45360 + }, + { + "epoch": 0.18330054097294327, + "grad_norm": 375.0739440917969, + "learning_rate": 7.986358572885828e-06, + "loss": 37.0097, + "step": 45370 + }, + { + "epoch": 0.1833409422383109, + "grad_norm": 509.0332946777344, + "learning_rate": 7.985227284750574e-06, + "loss": 42.3172, + "step": 45380 + }, + { + "epoch": 0.18338134350367855, + "grad_norm": 906.0650024414062, + "learning_rate": 7.984095759094485e-06, + "loss": 54.2499, + "step": 45390 + }, + { + "epoch": 0.18342174476904616, + "grad_norm": 832.644287109375, + "learning_rate": 7.982963996007591e-06, + "loss": 59.8838, + "step": 45400 + }, + { + "epoch": 0.1834621460344138, + "grad_norm": 371.72076416015625, + "learning_rate": 7.981831995579943e-06, + "loss": 48.7457, + "step": 45410 + }, + { + "epoch": 0.18350254729978144, + "grad_norm": 285.2592468261719, + "learning_rate": 7.980699757901607e-06, + "loss": 46.093, + "step": 45420 + }, + { + "epoch": 0.18354294856514905, + "grad_norm": 712.9854736328125, + "learning_rate": 7.97956728306267e-06, + "loss": 33.3265, + "step": 45430 + }, + { + "epoch": 0.1835833498305167, + "grad_norm": 175.07998657226562, + "learning_rate": 7.97843457115324e-06, + "loss": 33.9601, + "step": 45440 + }, + { + "epoch": 0.18362375109588433, + "grad_norm": 219.32000732421875, + "learning_rate": 7.97730162226344e-06, + "loss": 51.4527, + "step": 45450 + }, + { + "epoch": 0.18366415236125194, + "grad_norm": 399.9309997558594, + "learning_rate": 7.976168436483415e-06, + "loss": 66.9279, + "step": 45460 + }, + { + "epoch": 0.18370455362661958, + "grad_norm": 1032.3216552734375, + "learning_rate": 7.975035013903326e-06, + "loss": 46.2986, + "step": 45470 + }, + { + "epoch": 0.18374495489198722, + "grad_norm": 759.1256103515625, + "learning_rate": 7.973901354613353e-06, + "loss": 47.9423, + "step": 45480 + }, + { + "epoch": 0.18378535615735486, + "grad_norm": 829.003173828125, + "learning_rate": 7.972767458703697e-06, + "loss": 51.7219, + "step": 45490 + }, + { + "epoch": 0.18382575742272247, + "grad_norm": 728.3862915039062, + "learning_rate": 7.971633326264581e-06, + "loss": 64.3442, + "step": 45500 + }, + { + "epoch": 0.18386615868809011, + "grad_norm": 699.3826293945312, + "learning_rate": 7.970498957386237e-06, + "loss": 43.2976, + "step": 45510 + }, + { + "epoch": 0.18390655995345775, + "grad_norm": 634.3785400390625, + "learning_rate": 7.969364352158922e-06, + "loss": 40.1747, + "step": 45520 + }, + { + "epoch": 0.18394696121882537, + "grad_norm": 837.7490844726562, + "learning_rate": 7.968229510672915e-06, + "loss": 53.3326, + "step": 45530 + }, + { + "epoch": 0.183987362484193, + "grad_norm": 896.8665771484375, + "learning_rate": 7.967094433018508e-06, + "loss": 48.5009, + "step": 45540 + }, + { + "epoch": 0.18402776374956065, + "grad_norm": 616.2903442382812, + "learning_rate": 7.965959119286013e-06, + "loss": 52.8024, + "step": 45550 + }, + { + "epoch": 0.18406816501492826, + "grad_norm": 555.3331909179688, + "learning_rate": 7.964823569565765e-06, + "loss": 58.4551, + "step": 45560 + }, + { + "epoch": 0.1841085662802959, + "grad_norm": 592.5578002929688, + "learning_rate": 7.963687783948111e-06, + "loss": 42.5069, + "step": 45570 + }, + { + "epoch": 0.18414896754566354, + "grad_norm": 665.7092895507812, + "learning_rate": 7.96255176252342e-06, + "loss": 69.5754, + "step": 45580 + }, + { + "epoch": 0.18418936881103115, + "grad_norm": 947.595703125, + "learning_rate": 7.961415505382083e-06, + "loss": 39.7731, + "step": 45590 + }, + { + "epoch": 0.1842297700763988, + "grad_norm": 1249.362548828125, + "learning_rate": 7.960279012614508e-06, + "loss": 57.2685, + "step": 45600 + }, + { + "epoch": 0.18427017134176643, + "grad_norm": 501.44244384765625, + "learning_rate": 7.959142284311115e-06, + "loss": 41.9633, + "step": 45610 + }, + { + "epoch": 0.18431057260713404, + "grad_norm": 530.5111083984375, + "learning_rate": 7.958005320562349e-06, + "loss": 65.3412, + "step": 45620 + }, + { + "epoch": 0.18435097387250168, + "grad_norm": 804.0465087890625, + "learning_rate": 7.95686812145868e-06, + "loss": 76.4228, + "step": 45630 + }, + { + "epoch": 0.18439137513786932, + "grad_norm": 816.2349243164062, + "learning_rate": 7.955730687090582e-06, + "loss": 48.9861, + "step": 45640 + }, + { + "epoch": 0.18443177640323694, + "grad_norm": 346.8124084472656, + "learning_rate": 7.954593017548557e-06, + "loss": 43.3233, + "step": 45650 + }, + { + "epoch": 0.18447217766860458, + "grad_norm": 787.0435791015625, + "learning_rate": 7.953455112923127e-06, + "loss": 50.9048, + "step": 45660 + }, + { + "epoch": 0.18451257893397222, + "grad_norm": 842.18212890625, + "learning_rate": 7.952316973304828e-06, + "loss": 49.7624, + "step": 45670 + }, + { + "epoch": 0.18455298019933986, + "grad_norm": 945.1439208984375, + "learning_rate": 7.951178598784217e-06, + "loss": 34.3345, + "step": 45680 + }, + { + "epoch": 0.18459338146470747, + "grad_norm": 454.4792175292969, + "learning_rate": 7.950039989451868e-06, + "loss": 59.2897, + "step": 45690 + }, + { + "epoch": 0.1846337827300751, + "grad_norm": 581.51806640625, + "learning_rate": 7.948901145398376e-06, + "loss": 36.0325, + "step": 45700 + }, + { + "epoch": 0.18467418399544275, + "grad_norm": 453.9153747558594, + "learning_rate": 7.947762066714353e-06, + "loss": 39.5895, + "step": 45710 + }, + { + "epoch": 0.18471458526081036, + "grad_norm": 727.990234375, + "learning_rate": 7.946622753490433e-06, + "loss": 35.5361, + "step": 45720 + }, + { + "epoch": 0.184754986526178, + "grad_norm": 576.713623046875, + "learning_rate": 7.945483205817262e-06, + "loss": 36.7247, + "step": 45730 + }, + { + "epoch": 0.18479538779154564, + "grad_norm": 1433.50830078125, + "learning_rate": 7.94434342378551e-06, + "loss": 35.3859, + "step": 45740 + }, + { + "epoch": 0.18483578905691325, + "grad_norm": 90.16148376464844, + "learning_rate": 7.943203407485864e-06, + "loss": 49.1724, + "step": 45750 + }, + { + "epoch": 0.1848761903222809, + "grad_norm": 442.8776550292969, + "learning_rate": 7.942063157009033e-06, + "loss": 56.5173, + "step": 45760 + }, + { + "epoch": 0.18491659158764853, + "grad_norm": 409.0257568359375, + "learning_rate": 7.940922672445737e-06, + "loss": 37.2671, + "step": 45770 + }, + { + "epoch": 0.18495699285301614, + "grad_norm": 1186.24560546875, + "learning_rate": 7.939781953886722e-06, + "loss": 53.6409, + "step": 45780 + }, + { + "epoch": 0.18499739411838378, + "grad_norm": 218.8214111328125, + "learning_rate": 7.938641001422747e-06, + "loss": 32.4635, + "step": 45790 + }, + { + "epoch": 0.18503779538375142, + "grad_norm": 342.5951232910156, + "learning_rate": 7.937499815144597e-06, + "loss": 50.4769, + "step": 45800 + }, + { + "epoch": 0.18507819664911904, + "grad_norm": 891.3570556640625, + "learning_rate": 7.936358395143065e-06, + "loss": 67.8622, + "step": 45810 + }, + { + "epoch": 0.18511859791448668, + "grad_norm": 768.8310546875, + "learning_rate": 7.935216741508971e-06, + "loss": 57.9334, + "step": 45820 + }, + { + "epoch": 0.18515899917985432, + "grad_norm": 299.90533447265625, + "learning_rate": 7.934074854333153e-06, + "loss": 31.2045, + "step": 45830 + }, + { + "epoch": 0.18519940044522196, + "grad_norm": 343.8869323730469, + "learning_rate": 7.932932733706467e-06, + "loss": 50.6509, + "step": 45840 + }, + { + "epoch": 0.18523980171058957, + "grad_norm": 722.1644897460938, + "learning_rate": 7.931790379719781e-06, + "loss": 51.3919, + "step": 45850 + }, + { + "epoch": 0.1852802029759572, + "grad_norm": 559.4667358398438, + "learning_rate": 7.93064779246399e-06, + "loss": 51.729, + "step": 45860 + }, + { + "epoch": 0.18532060424132485, + "grad_norm": 623.2672119140625, + "learning_rate": 7.929504972030003e-06, + "loss": 36.6839, + "step": 45870 + }, + { + "epoch": 0.18536100550669246, + "grad_norm": 706.392578125, + "learning_rate": 7.928361918508752e-06, + "loss": 43.8657, + "step": 45880 + }, + { + "epoch": 0.1854014067720601, + "grad_norm": 499.6767578125, + "learning_rate": 7.927218631991182e-06, + "loss": 34.8604, + "step": 45890 + }, + { + "epoch": 0.18544180803742774, + "grad_norm": 524.6019897460938, + "learning_rate": 7.92607511256826e-06, + "loss": 39.3707, + "step": 45900 + }, + { + "epoch": 0.18548220930279535, + "grad_norm": 429.8359680175781, + "learning_rate": 7.924931360330968e-06, + "loss": 47.6919, + "step": 45910 + }, + { + "epoch": 0.185522610568163, + "grad_norm": 569.885498046875, + "learning_rate": 7.92378737537031e-06, + "loss": 44.3475, + "step": 45920 + }, + { + "epoch": 0.18556301183353063, + "grad_norm": 947.2518310546875, + "learning_rate": 7.922643157777314e-06, + "loss": 53.1495, + "step": 45930 + }, + { + "epoch": 0.18560341309889825, + "grad_norm": 860.892578125, + "learning_rate": 7.921498707643011e-06, + "loss": 60.5503, + "step": 45940 + }, + { + "epoch": 0.18564381436426589, + "grad_norm": 1184.7371826171875, + "learning_rate": 7.920354025058467e-06, + "loss": 54.1781, + "step": 45950 + }, + { + "epoch": 0.18568421562963353, + "grad_norm": 666.4442138671875, + "learning_rate": 7.919209110114752e-06, + "loss": 46.4685, + "step": 45960 + }, + { + "epoch": 0.18572461689500114, + "grad_norm": 800.7621459960938, + "learning_rate": 7.918063962902968e-06, + "loss": 43.6424, + "step": 45970 + }, + { + "epoch": 0.18576501816036878, + "grad_norm": 602.9782104492188, + "learning_rate": 7.916918583514227e-06, + "loss": 38.4737, + "step": 45980 + }, + { + "epoch": 0.18580541942573642, + "grad_norm": 361.55755615234375, + "learning_rate": 7.91577297203966e-06, + "loss": 46.0553, + "step": 45990 + }, + { + "epoch": 0.18584582069110406, + "grad_norm": 915.6522216796875, + "learning_rate": 7.91462712857042e-06, + "loss": 46.0892, + "step": 46000 + }, + { + "epoch": 0.18588622195647167, + "grad_norm": 965.8405151367188, + "learning_rate": 7.913481053197673e-06, + "loss": 33.8431, + "step": 46010 + }, + { + "epoch": 0.1859266232218393, + "grad_norm": 701.4907836914062, + "learning_rate": 7.912334746012613e-06, + "loss": 31.4115, + "step": 46020 + }, + { + "epoch": 0.18596702448720695, + "grad_norm": 470.70623779296875, + "learning_rate": 7.911188207106442e-06, + "loss": 29.5224, + "step": 46030 + }, + { + "epoch": 0.18600742575257456, + "grad_norm": 877.6990966796875, + "learning_rate": 7.910041436570386e-06, + "loss": 56.7888, + "step": 46040 + }, + { + "epoch": 0.1860478270179422, + "grad_norm": 787.839111328125, + "learning_rate": 7.90889443449569e-06, + "loss": 69.0265, + "step": 46050 + }, + { + "epoch": 0.18608822828330984, + "grad_norm": 740.5379638671875, + "learning_rate": 7.90774720097361e-06, + "loss": 49.0101, + "step": 46060 + }, + { + "epoch": 0.18612862954867745, + "grad_norm": 436.9485778808594, + "learning_rate": 7.906599736095433e-06, + "loss": 62.774, + "step": 46070 + }, + { + "epoch": 0.1861690308140451, + "grad_norm": 461.7222595214844, + "learning_rate": 7.905452039952453e-06, + "loss": 51.2547, + "step": 46080 + }, + { + "epoch": 0.18620943207941273, + "grad_norm": 967.5213623046875, + "learning_rate": 7.904304112635987e-06, + "loss": 46.2335, + "step": 46090 + }, + { + "epoch": 0.18624983334478035, + "grad_norm": 642.5325317382812, + "learning_rate": 7.903155954237375e-06, + "loss": 46.4207, + "step": 46100 + }, + { + "epoch": 0.186290234610148, + "grad_norm": 1148.9150390625, + "learning_rate": 7.902007564847967e-06, + "loss": 67.827, + "step": 46110 + }, + { + "epoch": 0.18633063587551563, + "grad_norm": 737.8858032226562, + "learning_rate": 7.900858944559133e-06, + "loss": 52.1405, + "step": 46120 + }, + { + "epoch": 0.18637103714088324, + "grad_norm": 762.6358642578125, + "learning_rate": 7.899710093462267e-06, + "loss": 38.3844, + "step": 46130 + }, + { + "epoch": 0.18641143840625088, + "grad_norm": 412.6938781738281, + "learning_rate": 7.898561011648777e-06, + "loss": 53.3843, + "step": 46140 + }, + { + "epoch": 0.18645183967161852, + "grad_norm": 606.4049072265625, + "learning_rate": 7.89741169921009e-06, + "loss": 40.317, + "step": 46150 + }, + { + "epoch": 0.18649224093698616, + "grad_norm": 684.5941772460938, + "learning_rate": 7.896262156237652e-06, + "loss": 53.5236, + "step": 46160 + }, + { + "epoch": 0.18653264220235377, + "grad_norm": 751.0098266601562, + "learning_rate": 7.895112382822925e-06, + "loss": 71.0752, + "step": 46170 + }, + { + "epoch": 0.1865730434677214, + "grad_norm": 555.09521484375, + "learning_rate": 7.893962379057393e-06, + "loss": 41.15, + "step": 46180 + }, + { + "epoch": 0.18661344473308905, + "grad_norm": 830.2692260742188, + "learning_rate": 7.892812145032557e-06, + "loss": 41.5861, + "step": 46190 + }, + { + "epoch": 0.18665384599845666, + "grad_norm": 775.268310546875, + "learning_rate": 7.891661680839932e-06, + "loss": 61.9445, + "step": 46200 + }, + { + "epoch": 0.1866942472638243, + "grad_norm": 706.5367431640625, + "learning_rate": 7.89051098657106e-06, + "loss": 78.4231, + "step": 46210 + }, + { + "epoch": 0.18673464852919194, + "grad_norm": 700.9517822265625, + "learning_rate": 7.889360062317495e-06, + "loss": 35.4704, + "step": 46220 + }, + { + "epoch": 0.18677504979455956, + "grad_norm": 944.4683837890625, + "learning_rate": 7.888208908170812e-06, + "loss": 51.0714, + "step": 46230 + }, + { + "epoch": 0.1868154510599272, + "grad_norm": 866.54443359375, + "learning_rate": 7.887057524222596e-06, + "loss": 59.0249, + "step": 46240 + }, + { + "epoch": 0.18685585232529484, + "grad_norm": 799.7461547851562, + "learning_rate": 7.885905910564466e-06, + "loss": 65.52, + "step": 46250 + }, + { + "epoch": 0.18689625359066245, + "grad_norm": 1012.56396484375, + "learning_rate": 7.884754067288047e-06, + "loss": 55.5126, + "step": 46260 + }, + { + "epoch": 0.1869366548560301, + "grad_norm": 729.2407836914062, + "learning_rate": 7.883601994484986e-06, + "loss": 47.3084, + "step": 46270 + }, + { + "epoch": 0.18697705612139773, + "grad_norm": 505.71856689453125, + "learning_rate": 7.882449692246948e-06, + "loss": 41.2005, + "step": 46280 + }, + { + "epoch": 0.18701745738676534, + "grad_norm": 1154.6778564453125, + "learning_rate": 7.881297160665616e-06, + "loss": 49.5151, + "step": 46290 + }, + { + "epoch": 0.18705785865213298, + "grad_norm": 525.9098510742188, + "learning_rate": 7.880144399832693e-06, + "loss": 42.0621, + "step": 46300 + }, + { + "epoch": 0.18709825991750062, + "grad_norm": 970.4302978515625, + "learning_rate": 7.878991409839897e-06, + "loss": 56.0448, + "step": 46310 + }, + { + "epoch": 0.18713866118286826, + "grad_norm": 808.189697265625, + "learning_rate": 7.87783819077897e-06, + "loss": 47.8887, + "step": 46320 + }, + { + "epoch": 0.18717906244823587, + "grad_norm": 357.2666015625, + "learning_rate": 7.876684742741665e-06, + "loss": 28.5624, + "step": 46330 + }, + { + "epoch": 0.1872194637136035, + "grad_norm": 428.8252868652344, + "learning_rate": 7.875531065819755e-06, + "loss": 31.8156, + "step": 46340 + }, + { + "epoch": 0.18725986497897115, + "grad_norm": 831.3470458984375, + "learning_rate": 7.874377160105037e-06, + "loss": 50.5764, + "step": 46350 + }, + { + "epoch": 0.18730026624433876, + "grad_norm": 724.0596313476562, + "learning_rate": 7.873223025689319e-06, + "loss": 50.731, + "step": 46360 + }, + { + "epoch": 0.1873406675097064, + "grad_norm": 865.5615844726562, + "learning_rate": 7.872068662664432e-06, + "loss": 58.5636, + "step": 46370 + }, + { + "epoch": 0.18738106877507404, + "grad_norm": 537.1841430664062, + "learning_rate": 7.870914071122222e-06, + "loss": 63.4505, + "step": 46380 + }, + { + "epoch": 0.18742147004044166, + "grad_norm": 715.6597900390625, + "learning_rate": 7.869759251154554e-06, + "loss": 45.5798, + "step": 46390 + }, + { + "epoch": 0.1874618713058093, + "grad_norm": 418.3079833984375, + "learning_rate": 7.868604202853314e-06, + "loss": 37.2996, + "step": 46400 + }, + { + "epoch": 0.18750227257117694, + "grad_norm": 661.2510375976562, + "learning_rate": 7.867448926310403e-06, + "loss": 75.2654, + "step": 46410 + }, + { + "epoch": 0.18754267383654455, + "grad_norm": 398.23077392578125, + "learning_rate": 7.866293421617741e-06, + "loss": 46.4417, + "step": 46420 + }, + { + "epoch": 0.1875830751019122, + "grad_norm": 1762.594482421875, + "learning_rate": 7.865137688867264e-06, + "loss": 45.2139, + "step": 46430 + }, + { + "epoch": 0.18762347636727983, + "grad_norm": 663.5921630859375, + "learning_rate": 7.86398172815093e-06, + "loss": 36.9223, + "step": 46440 + }, + { + "epoch": 0.18766387763264744, + "grad_norm": 613.1920776367188, + "learning_rate": 7.862825539560716e-06, + "loss": 46.3909, + "step": 46450 + }, + { + "epoch": 0.18770427889801508, + "grad_norm": 401.7522888183594, + "learning_rate": 7.861669123188613e-06, + "loss": 36.1008, + "step": 46460 + }, + { + "epoch": 0.18774468016338272, + "grad_norm": 319.8637390136719, + "learning_rate": 7.86051247912663e-06, + "loss": 51.4729, + "step": 46470 + }, + { + "epoch": 0.18778508142875036, + "grad_norm": 397.6440124511719, + "learning_rate": 7.859355607466797e-06, + "loss": 55.7406, + "step": 46480 + }, + { + "epoch": 0.18782548269411797, + "grad_norm": 306.9148254394531, + "learning_rate": 7.858198508301161e-06, + "loss": 47.0562, + "step": 46490 + }, + { + "epoch": 0.1878658839594856, + "grad_norm": 568.0166625976562, + "learning_rate": 7.857041181721788e-06, + "loss": 31.964, + "step": 46500 + }, + { + "epoch": 0.18790628522485325, + "grad_norm": 439.1131286621094, + "learning_rate": 7.855883627820757e-06, + "loss": 25.5811, + "step": 46510 + }, + { + "epoch": 0.18794668649022087, + "grad_norm": 988.5216064453125, + "learning_rate": 7.854725846690175e-06, + "loss": 40.9094, + "step": 46520 + }, + { + "epoch": 0.1879870877555885, + "grad_norm": 401.2980041503906, + "learning_rate": 7.85356783842216e-06, + "loss": 46.3652, + "step": 46530 + }, + { + "epoch": 0.18802748902095615, + "grad_norm": 349.0110168457031, + "learning_rate": 7.852409603108845e-06, + "loss": 33.5314, + "step": 46540 + }, + { + "epoch": 0.18806789028632376, + "grad_norm": 694.8333129882812, + "learning_rate": 7.85125114084239e-06, + "loss": 40.1035, + "step": 46550 + }, + { + "epoch": 0.1881082915516914, + "grad_norm": 521.3746337890625, + "learning_rate": 7.850092451714967e-06, + "loss": 33.4153, + "step": 46560 + }, + { + "epoch": 0.18814869281705904, + "grad_norm": 1033.23486328125, + "learning_rate": 7.84893353581877e-06, + "loss": 38.9485, + "step": 46570 + }, + { + "epoch": 0.18818909408242665, + "grad_norm": 380.1110534667969, + "learning_rate": 7.847774393246005e-06, + "loss": 73.8259, + "step": 46580 + }, + { + "epoch": 0.1882294953477943, + "grad_norm": 336.256103515625, + "learning_rate": 7.8466150240889e-06, + "loss": 43.2453, + "step": 46590 + }, + { + "epoch": 0.18826989661316193, + "grad_norm": 512.811279296875, + "learning_rate": 7.845455428439703e-06, + "loss": 59.6738, + "step": 46600 + }, + { + "epoch": 0.18831029787852954, + "grad_norm": 419.74517822265625, + "learning_rate": 7.844295606390675e-06, + "loss": 49.9523, + "step": 46610 + }, + { + "epoch": 0.18835069914389718, + "grad_norm": 469.2853088378906, + "learning_rate": 7.843135558034101e-06, + "loss": 31.1946, + "step": 46620 + }, + { + "epoch": 0.18839110040926482, + "grad_norm": 805.4540405273438, + "learning_rate": 7.841975283462278e-06, + "loss": 42.8343, + "step": 46630 + }, + { + "epoch": 0.18843150167463246, + "grad_norm": 423.8988342285156, + "learning_rate": 7.840814782767525e-06, + "loss": 45.9534, + "step": 46640 + }, + { + "epoch": 0.18847190294000007, + "grad_norm": 618.1028442382812, + "learning_rate": 7.839654056042176e-06, + "loss": 55.0553, + "step": 46650 + }, + { + "epoch": 0.18851230420536771, + "grad_norm": 100.71240234375, + "learning_rate": 7.838493103378588e-06, + "loss": 45.5022, + "step": 46660 + }, + { + "epoch": 0.18855270547073535, + "grad_norm": 350.67462158203125, + "learning_rate": 7.83733192486913e-06, + "loss": 41.2146, + "step": 46670 + }, + { + "epoch": 0.18859310673610297, + "grad_norm": 791.0911254882812, + "learning_rate": 7.836170520606191e-06, + "loss": 46.5223, + "step": 46680 + }, + { + "epoch": 0.1886335080014706, + "grad_norm": 696.6979370117188, + "learning_rate": 7.83500889068218e-06, + "loss": 33.1812, + "step": 46690 + }, + { + "epoch": 0.18867390926683825, + "grad_norm": 492.4786682128906, + "learning_rate": 7.833847035189524e-06, + "loss": 46.7815, + "step": 46700 + }, + { + "epoch": 0.18871431053220586, + "grad_norm": 376.7648620605469, + "learning_rate": 7.832684954220664e-06, + "loss": 69.6158, + "step": 46710 + }, + { + "epoch": 0.1887547117975735, + "grad_norm": 572.7025146484375, + "learning_rate": 7.831522647868064e-06, + "loss": 51.5028, + "step": 46720 + }, + { + "epoch": 0.18879511306294114, + "grad_norm": 703.757568359375, + "learning_rate": 7.8303601162242e-06, + "loss": 42.5955, + "step": 46730 + }, + { + "epoch": 0.18883551432830875, + "grad_norm": 538.8714599609375, + "learning_rate": 7.829197359381571e-06, + "loss": 33.9557, + "step": 46740 + }, + { + "epoch": 0.1888759155936764, + "grad_norm": 1617.576416015625, + "learning_rate": 7.828034377432694e-06, + "loss": 50.4682, + "step": 46750 + }, + { + "epoch": 0.18891631685904403, + "grad_norm": 376.0511474609375, + "learning_rate": 7.826871170470099e-06, + "loss": 36.5483, + "step": 46760 + }, + { + "epoch": 0.18895671812441164, + "grad_norm": 975.2518920898438, + "learning_rate": 7.82570773858634e-06, + "loss": 64.4105, + "step": 46770 + }, + { + "epoch": 0.18899711938977928, + "grad_norm": 1208.195556640625, + "learning_rate": 7.824544081873984e-06, + "loss": 49.3749, + "step": 46780 + }, + { + "epoch": 0.18903752065514692, + "grad_norm": 1309.72607421875, + "learning_rate": 7.823380200425618e-06, + "loss": 69.8779, + "step": 46790 + }, + { + "epoch": 0.18907792192051456, + "grad_norm": 560.5164794921875, + "learning_rate": 7.822216094333847e-06, + "loss": 38.4529, + "step": 46800 + }, + { + "epoch": 0.18911832318588218, + "grad_norm": 631.4193115234375, + "learning_rate": 7.821051763691293e-06, + "loss": 36.4182, + "step": 46810 + }, + { + "epoch": 0.18915872445124982, + "grad_norm": 1111.424560546875, + "learning_rate": 7.819887208590597e-06, + "loss": 45.4261, + "step": 46820 + }, + { + "epoch": 0.18919912571661746, + "grad_norm": 431.0059814453125, + "learning_rate": 7.818722429124418e-06, + "loss": 36.5693, + "step": 46830 + }, + { + "epoch": 0.18923952698198507, + "grad_norm": 721.0582885742188, + "learning_rate": 7.817557425385433e-06, + "loss": 54.937, + "step": 46840 + }, + { + "epoch": 0.1892799282473527, + "grad_norm": 179.35610961914062, + "learning_rate": 7.816392197466333e-06, + "loss": 49.9349, + "step": 46850 + }, + { + "epoch": 0.18932032951272035, + "grad_norm": 661.6287231445312, + "learning_rate": 7.815226745459831e-06, + "loss": 44.7497, + "step": 46860 + }, + { + "epoch": 0.18936073077808796, + "grad_norm": 802.9453125, + "learning_rate": 7.814061069458657e-06, + "loss": 42.5215, + "step": 46870 + }, + { + "epoch": 0.1894011320434556, + "grad_norm": 1228.8883056640625, + "learning_rate": 7.81289516955556e-06, + "loss": 59.6201, + "step": 46880 + }, + { + "epoch": 0.18944153330882324, + "grad_norm": 555.4932861328125, + "learning_rate": 7.811729045843303e-06, + "loss": 51.9421, + "step": 46890 + }, + { + "epoch": 0.18948193457419085, + "grad_norm": 495.4557189941406, + "learning_rate": 7.81056269841467e-06, + "loss": 82.952, + "step": 46900 + }, + { + "epoch": 0.1895223358395585, + "grad_norm": 354.2893371582031, + "learning_rate": 7.80939612736246e-06, + "loss": 43.3217, + "step": 46910 + }, + { + "epoch": 0.18956273710492613, + "grad_norm": 1203.706787109375, + "learning_rate": 7.808229332779496e-06, + "loss": 46.9553, + "step": 46920 + }, + { + "epoch": 0.18960313837029374, + "grad_norm": 460.4730224609375, + "learning_rate": 7.807062314758612e-06, + "loss": 33.5704, + "step": 46930 + }, + { + "epoch": 0.18964353963566138, + "grad_norm": 309.24981689453125, + "learning_rate": 7.80589507339266e-06, + "loss": 54.543, + "step": 46940 + }, + { + "epoch": 0.18968394090102902, + "grad_norm": 573.7108154296875, + "learning_rate": 7.804727608774516e-06, + "loss": 53.2966, + "step": 46950 + }, + { + "epoch": 0.18972434216639666, + "grad_norm": 988.41259765625, + "learning_rate": 7.803559920997067e-06, + "loss": 46.6455, + "step": 46960 + }, + { + "epoch": 0.18976474343176428, + "grad_norm": 534.046630859375, + "learning_rate": 7.802392010153223e-06, + "loss": 32.7703, + "step": 46970 + }, + { + "epoch": 0.18980514469713192, + "grad_norm": 544.7523803710938, + "learning_rate": 7.801223876335907e-06, + "loss": 63.0975, + "step": 46980 + }, + { + "epoch": 0.18984554596249956, + "grad_norm": 608.649169921875, + "learning_rate": 7.800055519638064e-06, + "loss": 49.6523, + "step": 46990 + }, + { + "epoch": 0.18988594722786717, + "grad_norm": 977.2344360351562, + "learning_rate": 7.798886940152654e-06, + "loss": 50.4927, + "step": 47000 + }, + { + "epoch": 0.1899263484932348, + "grad_norm": 653.7332153320312, + "learning_rate": 7.797718137972654e-06, + "loss": 56.2325, + "step": 47010 + }, + { + "epoch": 0.18996674975860245, + "grad_norm": 859.51806640625, + "learning_rate": 7.79654911319106e-06, + "loss": 46.1248, + "step": 47020 + }, + { + "epoch": 0.19000715102397006, + "grad_norm": 285.8470764160156, + "learning_rate": 7.795379865900892e-06, + "loss": 35.2613, + "step": 47030 + }, + { + "epoch": 0.1900475522893377, + "grad_norm": 723.26904296875, + "learning_rate": 7.794210396195175e-06, + "loss": 48.0343, + "step": 47040 + }, + { + "epoch": 0.19008795355470534, + "grad_norm": 846.8826904296875, + "learning_rate": 7.79304070416696e-06, + "loss": 44.9846, + "step": 47050 + }, + { + "epoch": 0.19012835482007295, + "grad_norm": 375.7879638671875, + "learning_rate": 7.791870789909315e-06, + "loss": 54.1298, + "step": 47060 + }, + { + "epoch": 0.1901687560854406, + "grad_norm": 944.4963989257812, + "learning_rate": 7.790700653515324e-06, + "loss": 40.4307, + "step": 47070 + }, + { + "epoch": 0.19020915735080823, + "grad_norm": 455.2538146972656, + "learning_rate": 7.789530295078089e-06, + "loss": 52.4162, + "step": 47080 + }, + { + "epoch": 0.19024955861617585, + "grad_norm": 548.3023071289062, + "learning_rate": 7.788359714690732e-06, + "loss": 35.7722, + "step": 47090 + }, + { + "epoch": 0.19028995988154349, + "grad_norm": 358.2738037109375, + "learning_rate": 7.787188912446389e-06, + "loss": 51.5841, + "step": 47100 + }, + { + "epoch": 0.19033036114691113, + "grad_norm": 1344.7706298828125, + "learning_rate": 7.786017888438214e-06, + "loss": 30.4217, + "step": 47110 + }, + { + "epoch": 0.19037076241227877, + "grad_norm": 593.27685546875, + "learning_rate": 7.784846642759383e-06, + "loss": 42.8882, + "step": 47120 + }, + { + "epoch": 0.19041116367764638, + "grad_norm": 757.25, + "learning_rate": 7.783675175503087e-06, + "loss": 60.5505, + "step": 47130 + }, + { + "epoch": 0.19045156494301402, + "grad_norm": 427.2942199707031, + "learning_rate": 7.78250348676253e-06, + "loss": 46.353, + "step": 47140 + }, + { + "epoch": 0.19049196620838166, + "grad_norm": 302.1863098144531, + "learning_rate": 7.781331576630941e-06, + "loss": 52.2239, + "step": 47150 + }, + { + "epoch": 0.19053236747374927, + "grad_norm": 444.2868347167969, + "learning_rate": 7.780159445201562e-06, + "loss": 52.4015, + "step": 47160 + }, + { + "epoch": 0.1905727687391169, + "grad_norm": 411.0354309082031, + "learning_rate": 7.778987092567658e-06, + "loss": 34.217, + "step": 47170 + }, + { + "epoch": 0.19061317000448455, + "grad_norm": 359.02069091796875, + "learning_rate": 7.777814518822504e-06, + "loss": 47.4867, + "step": 47180 + }, + { + "epoch": 0.19065357126985216, + "grad_norm": 673.830810546875, + "learning_rate": 7.776641724059398e-06, + "loss": 40.569, + "step": 47190 + }, + { + "epoch": 0.1906939725352198, + "grad_norm": 902.4163208007812, + "learning_rate": 7.77546870837165e-06, + "loss": 36.7457, + "step": 47200 + }, + { + "epoch": 0.19073437380058744, + "grad_norm": 663.3661499023438, + "learning_rate": 7.774295471852596e-06, + "loss": 66.2683, + "step": 47210 + }, + { + "epoch": 0.19077477506595505, + "grad_norm": 489.5060119628906, + "learning_rate": 7.773122014595584e-06, + "loss": 55.9361, + "step": 47220 + }, + { + "epoch": 0.1908151763313227, + "grad_norm": 572.3447875976562, + "learning_rate": 7.771948336693983e-06, + "loss": 58.0868, + "step": 47230 + }, + { + "epoch": 0.19085557759669033, + "grad_norm": 777.2191772460938, + "learning_rate": 7.770774438241168e-06, + "loss": 58.8669, + "step": 47240 + }, + { + "epoch": 0.19089597886205795, + "grad_norm": 754.6911010742188, + "learning_rate": 7.769600319330553e-06, + "loss": 47.5421, + "step": 47250 + }, + { + "epoch": 0.1909363801274256, + "grad_norm": 693.4794311523438, + "learning_rate": 7.768425980055548e-06, + "loss": 35.347, + "step": 47260 + }, + { + "epoch": 0.19097678139279323, + "grad_norm": 724.5881958007812, + "learning_rate": 7.767251420509593e-06, + "loss": 55.401, + "step": 47270 + }, + { + "epoch": 0.19101718265816087, + "grad_norm": 370.7134704589844, + "learning_rate": 7.766076640786145e-06, + "loss": 39.1298, + "step": 47280 + }, + { + "epoch": 0.19105758392352848, + "grad_norm": 688.7514038085938, + "learning_rate": 7.764901640978671e-06, + "loss": 57.3265, + "step": 47290 + }, + { + "epoch": 0.19109798518889612, + "grad_norm": 725.8629150390625, + "learning_rate": 7.763726421180664e-06, + "loss": 43.4557, + "step": 47300 + }, + { + "epoch": 0.19113838645426376, + "grad_norm": 408.5218811035156, + "learning_rate": 7.762550981485629e-06, + "loss": 48.6489, + "step": 47310 + }, + { + "epoch": 0.19117878771963137, + "grad_norm": 475.49114990234375, + "learning_rate": 7.76137532198709e-06, + "loss": 42.917, + "step": 47320 + }, + { + "epoch": 0.191219188984999, + "grad_norm": 921.5711059570312, + "learning_rate": 7.76019944277859e-06, + "loss": 35.9836, + "step": 47330 + }, + { + "epoch": 0.19125959025036665, + "grad_norm": 696.03369140625, + "learning_rate": 7.759023343953689e-06, + "loss": 56.2156, + "step": 47340 + }, + { + "epoch": 0.19129999151573426, + "grad_norm": 962.43603515625, + "learning_rate": 7.757847025605963e-06, + "loss": 54.9843, + "step": 47350 + }, + { + "epoch": 0.1913403927811019, + "grad_norm": 471.5603942871094, + "learning_rate": 7.756670487829005e-06, + "loss": 54.7021, + "step": 47360 + }, + { + "epoch": 0.19138079404646954, + "grad_norm": 202.61300659179688, + "learning_rate": 7.755493730716428e-06, + "loss": 46.6755, + "step": 47370 + }, + { + "epoch": 0.19142119531183716, + "grad_norm": 397.8414611816406, + "learning_rate": 7.75431675436186e-06, + "loss": 43.3254, + "step": 47380 + }, + { + "epoch": 0.1914615965772048, + "grad_norm": 439.717529296875, + "learning_rate": 7.753139558858949e-06, + "loss": 45.3347, + "step": 47390 + }, + { + "epoch": 0.19150199784257244, + "grad_norm": 431.6274108886719, + "learning_rate": 7.751962144301359e-06, + "loss": 44.974, + "step": 47400 + }, + { + "epoch": 0.19154239910794005, + "grad_norm": 301.6814880371094, + "learning_rate": 7.75078451078277e-06, + "loss": 36.2488, + "step": 47410 + }, + { + "epoch": 0.1915828003733077, + "grad_norm": 379.6395568847656, + "learning_rate": 7.749606658396883e-06, + "loss": 34.9849, + "step": 47420 + }, + { + "epoch": 0.19162320163867533, + "grad_norm": 1423.8985595703125, + "learning_rate": 7.748428587237412e-06, + "loss": 45.026, + "step": 47430 + }, + { + "epoch": 0.19166360290404297, + "grad_norm": 437.26409912109375, + "learning_rate": 7.747250297398092e-06, + "loss": 48.0475, + "step": 47440 + }, + { + "epoch": 0.19170400416941058, + "grad_norm": 1188.09619140625, + "learning_rate": 7.746071788972675e-06, + "loss": 63.0107, + "step": 47450 + }, + { + "epoch": 0.19174440543477822, + "grad_norm": 596.3973388671875, + "learning_rate": 7.744893062054928e-06, + "loss": 37.3981, + "step": 47460 + }, + { + "epoch": 0.19178480670014586, + "grad_norm": 1094.22021484375, + "learning_rate": 7.743714116738636e-06, + "loss": 51.8161, + "step": 47470 + }, + { + "epoch": 0.19182520796551347, + "grad_norm": 527.591552734375, + "learning_rate": 7.742534953117607e-06, + "loss": 63.8491, + "step": 47480 + }, + { + "epoch": 0.1918656092308811, + "grad_norm": 1503.0655517578125, + "learning_rate": 7.741355571285656e-06, + "loss": 64.103, + "step": 47490 + }, + { + "epoch": 0.19190601049624875, + "grad_norm": 335.1996154785156, + "learning_rate": 7.740175971336624e-06, + "loss": 33.4057, + "step": 47500 + }, + { + "epoch": 0.19194641176161636, + "grad_norm": 675.87451171875, + "learning_rate": 7.738996153364364e-06, + "loss": 48.068, + "step": 47510 + }, + { + "epoch": 0.191986813026984, + "grad_norm": 501.4929504394531, + "learning_rate": 7.737816117462752e-06, + "loss": 43.2299, + "step": 47520 + }, + { + "epoch": 0.19202721429235164, + "grad_norm": 358.48583984375, + "learning_rate": 7.736635863725677e-06, + "loss": 55.4011, + "step": 47530 + }, + { + "epoch": 0.19206761555771926, + "grad_norm": 241.45448303222656, + "learning_rate": 7.735455392247044e-06, + "loss": 61.9485, + "step": 47540 + }, + { + "epoch": 0.1921080168230869, + "grad_norm": 323.69732666015625, + "learning_rate": 7.73427470312078e-06, + "loss": 38.1733, + "step": 47550 + }, + { + "epoch": 0.19214841808845454, + "grad_norm": 330.3637390136719, + "learning_rate": 7.733093796440828e-06, + "loss": 46.7346, + "step": 47560 + }, + { + "epoch": 0.19218881935382215, + "grad_norm": 1063.1988525390625, + "learning_rate": 7.731912672301145e-06, + "loss": 50.6998, + "step": 47570 + }, + { + "epoch": 0.1922292206191898, + "grad_norm": 655.8283081054688, + "learning_rate": 7.730731330795707e-06, + "loss": 46.5782, + "step": 47580 + }, + { + "epoch": 0.19226962188455743, + "grad_norm": 961.4747314453125, + "learning_rate": 7.72954977201851e-06, + "loss": 59.9621, + "step": 47590 + }, + { + "epoch": 0.19231002314992507, + "grad_norm": 322.00726318359375, + "learning_rate": 7.728367996063566e-06, + "loss": 57.8829, + "step": 47600 + }, + { + "epoch": 0.19235042441529268, + "grad_norm": 896.306396484375, + "learning_rate": 7.727186003024902e-06, + "loss": 62.804, + "step": 47610 + }, + { + "epoch": 0.19239082568066032, + "grad_norm": 582.4077758789062, + "learning_rate": 7.726003792996562e-06, + "loss": 58.3848, + "step": 47620 + }, + { + "epoch": 0.19243122694602796, + "grad_norm": 1208.078125, + "learning_rate": 7.724821366072612e-06, + "loss": 54.5546, + "step": 47630 + }, + { + "epoch": 0.19247162821139557, + "grad_norm": 807.9841918945312, + "learning_rate": 7.723638722347132e-06, + "loss": 29.3268, + "step": 47640 + }, + { + "epoch": 0.1925120294767632, + "grad_norm": 1568.94580078125, + "learning_rate": 7.722455861914218e-06, + "loss": 67.2609, + "step": 47650 + }, + { + "epoch": 0.19255243074213085, + "grad_norm": 918.30224609375, + "learning_rate": 7.721272784867983e-06, + "loss": 45.9969, + "step": 47660 + }, + { + "epoch": 0.19259283200749847, + "grad_norm": 733.860595703125, + "learning_rate": 7.720089491302565e-06, + "loss": 43.838, + "step": 47670 + }, + { + "epoch": 0.1926332332728661, + "grad_norm": 403.791748046875, + "learning_rate": 7.718905981312108e-06, + "loss": 47.9569, + "step": 47680 + }, + { + "epoch": 0.19267363453823375, + "grad_norm": 344.37738037109375, + "learning_rate": 7.71772225499078e-06, + "loss": 43.2986, + "step": 47690 + }, + { + "epoch": 0.19271403580360136, + "grad_norm": 959.1226196289062, + "learning_rate": 7.716538312432767e-06, + "loss": 57.1505, + "step": 47700 + }, + { + "epoch": 0.192754437068969, + "grad_norm": 765.4625854492188, + "learning_rate": 7.715354153732265e-06, + "loss": 44.0606, + "step": 47710 + }, + { + "epoch": 0.19279483833433664, + "grad_norm": 910.7199096679688, + "learning_rate": 7.714169778983496e-06, + "loss": 40.9219, + "step": 47720 + }, + { + "epoch": 0.19283523959970425, + "grad_norm": 561.5447387695312, + "learning_rate": 7.712985188280694e-06, + "loss": 75.7906, + "step": 47730 + }, + { + "epoch": 0.1928756408650719, + "grad_norm": 406.99566650390625, + "learning_rate": 7.711800381718111e-06, + "loss": 40.4035, + "step": 47740 + }, + { + "epoch": 0.19291604213043953, + "grad_norm": 757.7989501953125, + "learning_rate": 7.710615359390018e-06, + "loss": 43.6714, + "step": 47750 + }, + { + "epoch": 0.19295644339580717, + "grad_norm": 532.935546875, + "learning_rate": 7.7094301213907e-06, + "loss": 40.9307, + "step": 47760 + }, + { + "epoch": 0.19299684466117478, + "grad_norm": 393.7739562988281, + "learning_rate": 7.708244667814463e-06, + "loss": 53.9688, + "step": 47770 + }, + { + "epoch": 0.19303724592654242, + "grad_norm": 952.9371948242188, + "learning_rate": 7.707058998755626e-06, + "loss": 40.4923, + "step": 47780 + }, + { + "epoch": 0.19307764719191006, + "grad_norm": 470.9456787109375, + "learning_rate": 7.705873114308529e-06, + "loss": 41.2027, + "step": 47790 + }, + { + "epoch": 0.19311804845727767, + "grad_norm": 593.4171142578125, + "learning_rate": 7.704687014567524e-06, + "loss": 36.4702, + "step": 47800 + }, + { + "epoch": 0.19315844972264531, + "grad_norm": 538.1138916015625, + "learning_rate": 7.703500699626988e-06, + "loss": 52.4323, + "step": 47810 + }, + { + "epoch": 0.19319885098801295, + "grad_norm": 861.8800048828125, + "learning_rate": 7.702314169581311e-06, + "loss": 26.6001, + "step": 47820 + }, + { + "epoch": 0.19323925225338057, + "grad_norm": 396.1697082519531, + "learning_rate": 7.701127424524894e-06, + "loss": 43.8624, + "step": 47830 + }, + { + "epoch": 0.1932796535187482, + "grad_norm": 665.240234375, + "learning_rate": 7.699940464552166e-06, + "loss": 72.4191, + "step": 47840 + }, + { + "epoch": 0.19332005478411585, + "grad_norm": 516.3006591796875, + "learning_rate": 7.698753289757565e-06, + "loss": 76.2827, + "step": 47850 + }, + { + "epoch": 0.19336045604948346, + "grad_norm": 3109.86767578125, + "learning_rate": 7.69756590023555e-06, + "loss": 81.7451, + "step": 47860 + }, + { + "epoch": 0.1934008573148511, + "grad_norm": 441.69775390625, + "learning_rate": 7.696378296080598e-06, + "loss": 42.5113, + "step": 47870 + }, + { + "epoch": 0.19344125858021874, + "grad_norm": 449.0380554199219, + "learning_rate": 7.6951904773872e-06, + "loss": 39.6808, + "step": 47880 + }, + { + "epoch": 0.19348165984558635, + "grad_norm": 702.9412841796875, + "learning_rate": 7.694002444249863e-06, + "loss": 50.4996, + "step": 47890 + }, + { + "epoch": 0.193522061110954, + "grad_norm": 923.0763549804688, + "learning_rate": 7.692814196763118e-06, + "loss": 49.8511, + "step": 47900 + }, + { + "epoch": 0.19356246237632163, + "grad_norm": 581.8436279296875, + "learning_rate": 7.691625735021505e-06, + "loss": 61.1597, + "step": 47910 + }, + { + "epoch": 0.19360286364168927, + "grad_norm": 906.2317504882812, + "learning_rate": 7.690437059119584e-06, + "loss": 52.1174, + "step": 47920 + }, + { + "epoch": 0.19364326490705688, + "grad_norm": 812.0711059570312, + "learning_rate": 7.689248169151935e-06, + "loss": 66.8462, + "step": 47930 + }, + { + "epoch": 0.19368366617242452, + "grad_norm": 762.5548095703125, + "learning_rate": 7.68805906521315e-06, + "loss": 42.8686, + "step": 47940 + }, + { + "epoch": 0.19372406743779216, + "grad_norm": 832.9835205078125, + "learning_rate": 7.686869747397843e-06, + "loss": 43.76, + "step": 47950 + }, + { + "epoch": 0.19376446870315978, + "grad_norm": 536.840576171875, + "learning_rate": 7.685680215800639e-06, + "loss": 52.4641, + "step": 47960 + }, + { + "epoch": 0.19380486996852742, + "grad_norm": 303.6435546875, + "learning_rate": 7.684490470516185e-06, + "loss": 42.1546, + "step": 47970 + }, + { + "epoch": 0.19384527123389506, + "grad_norm": 684.5233764648438, + "learning_rate": 7.683300511639149e-06, + "loss": 49.268, + "step": 47980 + }, + { + "epoch": 0.19388567249926267, + "grad_norm": 818.1359252929688, + "learning_rate": 7.682110339264203e-06, + "loss": 52.341, + "step": 47990 + }, + { + "epoch": 0.1939260737646303, + "grad_norm": 322.98992919921875, + "learning_rate": 7.680919953486047e-06, + "loss": 46.4746, + "step": 48000 + }, + { + "epoch": 0.19396647502999795, + "grad_norm": 484.1806640625, + "learning_rate": 7.679729354399395e-06, + "loss": 30.4402, + "step": 48010 + }, + { + "epoch": 0.19400687629536556, + "grad_norm": 594.4285278320312, + "learning_rate": 7.678538542098974e-06, + "loss": 46.43, + "step": 48020 + }, + { + "epoch": 0.1940472775607332, + "grad_norm": 2891.762939453125, + "learning_rate": 7.677347516679536e-06, + "loss": 73.9085, + "step": 48030 + }, + { + "epoch": 0.19408767882610084, + "grad_norm": 977.7078857421875, + "learning_rate": 7.676156278235845e-06, + "loss": 48.965, + "step": 48040 + }, + { + "epoch": 0.19412808009146845, + "grad_norm": 448.16082763671875, + "learning_rate": 7.674964826862679e-06, + "loss": 38.0282, + "step": 48050 + }, + { + "epoch": 0.1941684813568361, + "grad_norm": 597.8922119140625, + "learning_rate": 7.673773162654836e-06, + "loss": 62.7354, + "step": 48060 + }, + { + "epoch": 0.19420888262220373, + "grad_norm": 469.99371337890625, + "learning_rate": 7.672581285707135e-06, + "loss": 45.9692, + "step": 48070 + }, + { + "epoch": 0.19424928388757137, + "grad_norm": 415.48046875, + "learning_rate": 7.67138919611441e-06, + "loss": 62.1821, + "step": 48080 + }, + { + "epoch": 0.19428968515293898, + "grad_norm": 435.82470703125, + "learning_rate": 7.670196893971502e-06, + "loss": 46.7921, + "step": 48090 + }, + { + "epoch": 0.19433008641830662, + "grad_norm": 1045.2288818359375, + "learning_rate": 7.669004379373284e-06, + "loss": 54.4301, + "step": 48100 + }, + { + "epoch": 0.19437048768367426, + "grad_norm": 983.9075927734375, + "learning_rate": 7.667811652414637e-06, + "loss": 37.8054, + "step": 48110 + }, + { + "epoch": 0.19441088894904188, + "grad_norm": 609.9669799804688, + "learning_rate": 7.666618713190459e-06, + "loss": 35.3491, + "step": 48120 + }, + { + "epoch": 0.19445129021440952, + "grad_norm": 632.4072265625, + "learning_rate": 7.665425561795669e-06, + "loss": 35.028, + "step": 48130 + }, + { + "epoch": 0.19449169147977716, + "grad_norm": 749.6817626953125, + "learning_rate": 7.664232198325198e-06, + "loss": 45.446, + "step": 48140 + }, + { + "epoch": 0.19453209274514477, + "grad_norm": 1017.4483032226562, + "learning_rate": 7.663038622873999e-06, + "loss": 36.2746, + "step": 48150 + }, + { + "epoch": 0.1945724940105124, + "grad_norm": 710.2518310546875, + "learning_rate": 7.66184483553704e-06, + "loss": 86.899, + "step": 48160 + }, + { + "epoch": 0.19461289527588005, + "grad_norm": 1045.6429443359375, + "learning_rate": 7.660650836409302e-06, + "loss": 64.3826, + "step": 48170 + }, + { + "epoch": 0.19465329654124766, + "grad_norm": 784.1268310546875, + "learning_rate": 7.65945662558579e-06, + "loss": 36.2017, + "step": 48180 + }, + { + "epoch": 0.1946936978066153, + "grad_norm": 178.67416381835938, + "learning_rate": 7.658262203161517e-06, + "loss": 45.8245, + "step": 48190 + }, + { + "epoch": 0.19473409907198294, + "grad_norm": 812.579833984375, + "learning_rate": 7.65706756923152e-06, + "loss": 41.051, + "step": 48200 + }, + { + "epoch": 0.19477450033735055, + "grad_norm": 1162.462158203125, + "learning_rate": 7.655872723890854e-06, + "loss": 51.0171, + "step": 48210 + }, + { + "epoch": 0.1948149016027182, + "grad_norm": 877.551513671875, + "learning_rate": 7.654677667234582e-06, + "loss": 47.0945, + "step": 48220 + }, + { + "epoch": 0.19485530286808583, + "grad_norm": 325.6798400878906, + "learning_rate": 7.65348239935779e-06, + "loss": 59.1071, + "step": 48230 + }, + { + "epoch": 0.19489570413345347, + "grad_norm": 385.6723937988281, + "learning_rate": 7.652286920355583e-06, + "loss": 46.9059, + "step": 48240 + }, + { + "epoch": 0.19493610539882109, + "grad_norm": 684.6091918945312, + "learning_rate": 7.651091230323079e-06, + "loss": 48.4449, + "step": 48250 + }, + { + "epoch": 0.19497650666418873, + "grad_norm": 925.9144287109375, + "learning_rate": 7.649895329355411e-06, + "loss": 31.311, + "step": 48260 + }, + { + "epoch": 0.19501690792955637, + "grad_norm": 257.57537841796875, + "learning_rate": 7.648699217547733e-06, + "loss": 24.9668, + "step": 48270 + }, + { + "epoch": 0.19505730919492398, + "grad_norm": 833.0692138671875, + "learning_rate": 7.647502894995215e-06, + "loss": 44.589, + "step": 48280 + }, + { + "epoch": 0.19509771046029162, + "grad_norm": 995.8534545898438, + "learning_rate": 7.646306361793042e-06, + "loss": 49.3848, + "step": 48290 + }, + { + "epoch": 0.19513811172565926, + "grad_norm": 739.4403076171875, + "learning_rate": 7.645109618036416e-06, + "loss": 49.1146, + "step": 48300 + }, + { + "epoch": 0.19517851299102687, + "grad_norm": 772.0132446289062, + "learning_rate": 7.643912663820559e-06, + "loss": 54.9199, + "step": 48310 + }, + { + "epoch": 0.1952189142563945, + "grad_norm": 949.4467163085938, + "learning_rate": 7.642715499240702e-06, + "loss": 53.1441, + "step": 48320 + }, + { + "epoch": 0.19525931552176215, + "grad_norm": 283.2403564453125, + "learning_rate": 7.641518124392105e-06, + "loss": 34.6873, + "step": 48330 + }, + { + "epoch": 0.19529971678712976, + "grad_norm": 493.2290954589844, + "learning_rate": 7.640320539370032e-06, + "loss": 39.8798, + "step": 48340 + }, + { + "epoch": 0.1953401180524974, + "grad_norm": 636.8895874023438, + "learning_rate": 7.63912274426977e-06, + "loss": 68.3165, + "step": 48350 + }, + { + "epoch": 0.19538051931786504, + "grad_norm": 429.77777099609375, + "learning_rate": 7.637924739186624e-06, + "loss": 38.3885, + "step": 48360 + }, + { + "epoch": 0.19542092058323265, + "grad_norm": 761.0277709960938, + "learning_rate": 7.636726524215913e-06, + "loss": 50.0891, + "step": 48370 + }, + { + "epoch": 0.1954613218486003, + "grad_norm": 821.7488403320312, + "learning_rate": 7.635528099452974e-06, + "loss": 62.0171, + "step": 48380 + }, + { + "epoch": 0.19550172311396793, + "grad_norm": 724.60693359375, + "learning_rate": 7.634329464993158e-06, + "loss": 49.4184, + "step": 48390 + }, + { + "epoch": 0.19554212437933557, + "grad_norm": 890.350341796875, + "learning_rate": 7.633130620931837e-06, + "loss": 38.8696, + "step": 48400 + }, + { + "epoch": 0.1955825256447032, + "grad_norm": 762.0298461914062, + "learning_rate": 7.631931567364398e-06, + "loss": 45.8956, + "step": 48410 + }, + { + "epoch": 0.19562292691007083, + "grad_norm": 1499.270263671875, + "learning_rate": 7.630732304386244e-06, + "loss": 48.7371, + "step": 48420 + }, + { + "epoch": 0.19566332817543847, + "grad_norm": 606.8875122070312, + "learning_rate": 7.629532832092792e-06, + "loss": 35.9768, + "step": 48430 + }, + { + "epoch": 0.19570372944080608, + "grad_norm": 697.2933349609375, + "learning_rate": 7.62833315057948e-06, + "loss": 42.1523, + "step": 48440 + }, + { + "epoch": 0.19574413070617372, + "grad_norm": 1122.26123046875, + "learning_rate": 7.627133259941762e-06, + "loss": 43.5487, + "step": 48450 + }, + { + "epoch": 0.19578453197154136, + "grad_norm": 1514.467529296875, + "learning_rate": 7.625933160275109e-06, + "loss": 60.6185, + "step": 48460 + }, + { + "epoch": 0.19582493323690897, + "grad_norm": 891.0689086914062, + "learning_rate": 7.6247328516750055e-06, + "loss": 52.881, + "step": 48470 + }, + { + "epoch": 0.1958653345022766, + "grad_norm": 572.1576538085938, + "learning_rate": 7.623532334236954e-06, + "loss": 60.1336, + "step": 48480 + }, + { + "epoch": 0.19590573576764425, + "grad_norm": 1341.4376220703125, + "learning_rate": 7.622331608056474e-06, + "loss": 44.1682, + "step": 48490 + }, + { + "epoch": 0.19594613703301186, + "grad_norm": 665.36962890625, + "learning_rate": 7.621130673229105e-06, + "loss": 58.796, + "step": 48500 + }, + { + "epoch": 0.1959865382983795, + "grad_norm": 861.0709228515625, + "learning_rate": 7.619929529850397e-06, + "loss": 53.8948, + "step": 48510 + }, + { + "epoch": 0.19602693956374714, + "grad_norm": 489.127197265625, + "learning_rate": 7.618728178015919e-06, + "loss": 38.3744, + "step": 48520 + }, + { + "epoch": 0.19606734082911476, + "grad_norm": 653.3378295898438, + "learning_rate": 7.617526617821259e-06, + "loss": 35.1475, + "step": 48530 + }, + { + "epoch": 0.1961077420944824, + "grad_norm": 673.4270629882812, + "learning_rate": 7.616324849362019e-06, + "loss": 50.4059, + "step": 48540 + }, + { + "epoch": 0.19614814335985004, + "grad_norm": 505.6314392089844, + "learning_rate": 7.615122872733819e-06, + "loss": 54.5434, + "step": 48550 + }, + { + "epoch": 0.19618854462521768, + "grad_norm": 905.9286499023438, + "learning_rate": 7.613920688032293e-06, + "loss": 54.7946, + "step": 48560 + }, + { + "epoch": 0.1962289458905853, + "grad_norm": 450.91143798828125, + "learning_rate": 7.612718295353094e-06, + "loss": 33.1633, + "step": 48570 + }, + { + "epoch": 0.19626934715595293, + "grad_norm": 285.4925842285156, + "learning_rate": 7.61151569479189e-06, + "loss": 83.4821, + "step": 48580 + }, + { + "epoch": 0.19630974842132057, + "grad_norm": 1145.9517822265625, + "learning_rate": 7.610312886444369e-06, + "loss": 69.2726, + "step": 48590 + }, + { + "epoch": 0.19635014968668818, + "grad_norm": 343.5018615722656, + "learning_rate": 7.60910987040623e-06, + "loss": 45.3421, + "step": 48600 + }, + { + "epoch": 0.19639055095205582, + "grad_norm": 571.7493896484375, + "learning_rate": 7.607906646773195e-06, + "loss": 51.0718, + "step": 48610 + }, + { + "epoch": 0.19643095221742346, + "grad_norm": 1071.421875, + "learning_rate": 7.606703215640995e-06, + "loss": 51.5955, + "step": 48620 + }, + { + "epoch": 0.19647135348279107, + "grad_norm": 727.3692016601562, + "learning_rate": 7.605499577105382e-06, + "loss": 56.2631, + "step": 48630 + }, + { + "epoch": 0.1965117547481587, + "grad_norm": 555.5590209960938, + "learning_rate": 7.604295731262128e-06, + "loss": 47.5872, + "step": 48640 + }, + { + "epoch": 0.19655215601352635, + "grad_norm": 1017.7325439453125, + "learning_rate": 7.603091678207013e-06, + "loss": 43.9335, + "step": 48650 + }, + { + "epoch": 0.19659255727889396, + "grad_norm": 919.7353515625, + "learning_rate": 7.60188741803584e-06, + "loss": 57.0481, + "step": 48660 + }, + { + "epoch": 0.1966329585442616, + "grad_norm": 814.9369506835938, + "learning_rate": 7.600682950844428e-06, + "loss": 32.845, + "step": 48670 + }, + { + "epoch": 0.19667335980962924, + "grad_norm": 609.3458251953125, + "learning_rate": 7.599478276728607e-06, + "loss": 30.4059, + "step": 48680 + }, + { + "epoch": 0.19671376107499686, + "grad_norm": 458.807861328125, + "learning_rate": 7.5982733957842304e-06, + "loss": 29.1032, + "step": 48690 + }, + { + "epoch": 0.1967541623403645, + "grad_norm": 330.7550354003906, + "learning_rate": 7.597068308107165e-06, + "loss": 44.6583, + "step": 48700 + }, + { + "epoch": 0.19679456360573214, + "grad_norm": 788.6397705078125, + "learning_rate": 7.595863013793292e-06, + "loss": 63.0783, + "step": 48710 + }, + { + "epoch": 0.19683496487109975, + "grad_norm": 796.1050415039062, + "learning_rate": 7.594657512938513e-06, + "loss": 44.8516, + "step": 48720 + }, + { + "epoch": 0.1968753661364674, + "grad_norm": 555.8182983398438, + "learning_rate": 7.593451805638743e-06, + "loss": 41.593, + "step": 48730 + }, + { + "epoch": 0.19691576740183503, + "grad_norm": 864.0370483398438, + "learning_rate": 7.592245891989914e-06, + "loss": 40.4903, + "step": 48740 + }, + { + "epoch": 0.19695616866720267, + "grad_norm": 816.231689453125, + "learning_rate": 7.5910397720879785e-06, + "loss": 63.3931, + "step": 48750 + }, + { + "epoch": 0.19699656993257028, + "grad_norm": 503.22900390625, + "learning_rate": 7.589833446028898e-06, + "loss": 76.2282, + "step": 48760 + }, + { + "epoch": 0.19703697119793792, + "grad_norm": 488.8203430175781, + "learning_rate": 7.5886269139086565e-06, + "loss": 52.7377, + "step": 48770 + }, + { + "epoch": 0.19707737246330556, + "grad_norm": 693.25, + "learning_rate": 7.587420175823252e-06, + "loss": 46.3758, + "step": 48780 + }, + { + "epoch": 0.19711777372867317, + "grad_norm": 998.3317260742188, + "learning_rate": 7.586213231868699e-06, + "loss": 65.0064, + "step": 48790 + }, + { + "epoch": 0.1971581749940408, + "grad_norm": 2683.894775390625, + "learning_rate": 7.585006082141028e-06, + "loss": 71.4827, + "step": 48800 + }, + { + "epoch": 0.19719857625940845, + "grad_norm": 496.90478515625, + "learning_rate": 7.583798726736286e-06, + "loss": 35.5153, + "step": 48810 + }, + { + "epoch": 0.19723897752477607, + "grad_norm": 419.7237243652344, + "learning_rate": 7.5825911657505365e-06, + "loss": 60.3092, + "step": 48820 + }, + { + "epoch": 0.1972793787901437, + "grad_norm": 523.56982421875, + "learning_rate": 7.581383399279863e-06, + "loss": 61.9478, + "step": 48830 + }, + { + "epoch": 0.19731978005551135, + "grad_norm": 812.9863891601562, + "learning_rate": 7.580175427420358e-06, + "loss": 52.6858, + "step": 48840 + }, + { + "epoch": 0.19736018132087896, + "grad_norm": 430.5237731933594, + "learning_rate": 7.578967250268137e-06, + "loss": 36.8604, + "step": 48850 + }, + { + "epoch": 0.1974005825862466, + "grad_norm": 576.6433715820312, + "learning_rate": 7.577758867919325e-06, + "loss": 31.4436, + "step": 48860 + }, + { + "epoch": 0.19744098385161424, + "grad_norm": 369.1871032714844, + "learning_rate": 7.576550280470072e-06, + "loss": 37.8606, + "step": 48870 + }, + { + "epoch": 0.19748138511698185, + "grad_norm": 373.350830078125, + "learning_rate": 7.5753414880165365e-06, + "loss": 44.6778, + "step": 48880 + }, + { + "epoch": 0.1975217863823495, + "grad_norm": 330.2793273925781, + "learning_rate": 7.5741324906548996e-06, + "loss": 40.3745, + "step": 48890 + }, + { + "epoch": 0.19756218764771713, + "grad_norm": 365.4303283691406, + "learning_rate": 7.572923288481355e-06, + "loss": 43.1003, + "step": 48900 + }, + { + "epoch": 0.19760258891308477, + "grad_norm": 363.63287353515625, + "learning_rate": 7.571713881592109e-06, + "loss": 30.675, + "step": 48910 + }, + { + "epoch": 0.19764299017845238, + "grad_norm": 387.30242919921875, + "learning_rate": 7.570504270083394e-06, + "loss": 44.3049, + "step": 48920 + }, + { + "epoch": 0.19768339144382002, + "grad_norm": 514.7061157226562, + "learning_rate": 7.569294454051452e-06, + "loss": 42.9896, + "step": 48930 + }, + { + "epoch": 0.19772379270918766, + "grad_norm": 803.383544921875, + "learning_rate": 7.568084433592542e-06, + "loss": 51.6624, + "step": 48940 + }, + { + "epoch": 0.19776419397455527, + "grad_norm": 410.1513671875, + "learning_rate": 7.566874208802939e-06, + "loss": 44.9291, + "step": 48950 + }, + { + "epoch": 0.19780459523992291, + "grad_norm": 790.0667724609375, + "learning_rate": 7.5656637797789335e-06, + "loss": 65.2939, + "step": 48960 + }, + { + "epoch": 0.19784499650529055, + "grad_norm": 403.3978271484375, + "learning_rate": 7.564453146616837e-06, + "loss": 31.9904, + "step": 48970 + }, + { + "epoch": 0.19788539777065817, + "grad_norm": 1210.605224609375, + "learning_rate": 7.563242309412975e-06, + "loss": 58.8124, + "step": 48980 + }, + { + "epoch": 0.1979257990360258, + "grad_norm": 963.6305541992188, + "learning_rate": 7.562031268263686e-06, + "loss": 55.9536, + "step": 48990 + }, + { + "epoch": 0.19796620030139345, + "grad_norm": 704.73388671875, + "learning_rate": 7.5608200232653254e-06, + "loss": 63.4184, + "step": 49000 + }, + { + "epoch": 0.19800660156676106, + "grad_norm": 729.5773315429688, + "learning_rate": 7.5596085745142654e-06, + "loss": 50.0997, + "step": 49010 + }, + { + "epoch": 0.1980470028321287, + "grad_norm": 961.2758178710938, + "learning_rate": 7.558396922106903e-06, + "loss": 48.7712, + "step": 49020 + }, + { + "epoch": 0.19808740409749634, + "grad_norm": 398.1002197265625, + "learning_rate": 7.557185066139638e-06, + "loss": 53.2708, + "step": 49030 + }, + { + "epoch": 0.19812780536286395, + "grad_norm": 802.1510009765625, + "learning_rate": 7.555973006708892e-06, + "loss": 39.0903, + "step": 49040 + }, + { + "epoch": 0.1981682066282316, + "grad_norm": 373.4618835449219, + "learning_rate": 7.554760743911104e-06, + "loss": 61.9456, + "step": 49050 + }, + { + "epoch": 0.19820860789359923, + "grad_norm": 475.2350158691406, + "learning_rate": 7.553548277842729e-06, + "loss": 54.6043, + "step": 49060 + }, + { + "epoch": 0.19824900915896687, + "grad_norm": 274.4440002441406, + "learning_rate": 7.5523356086002364e-06, + "loss": 34.3625, + "step": 49070 + }, + { + "epoch": 0.19828941042433448, + "grad_norm": 786.3656616210938, + "learning_rate": 7.551122736280113e-06, + "loss": 44.7099, + "step": 49080 + }, + { + "epoch": 0.19832981168970212, + "grad_norm": 145.26461791992188, + "learning_rate": 7.549909660978863e-06, + "loss": 44.07, + "step": 49090 + }, + { + "epoch": 0.19837021295506976, + "grad_norm": 560.5723266601562, + "learning_rate": 7.548696382793002e-06, + "loss": 41.1856, + "step": 49100 + }, + { + "epoch": 0.19841061422043738, + "grad_norm": 351.9812316894531, + "learning_rate": 7.547482901819066e-06, + "loss": 30.168, + "step": 49110 + }, + { + "epoch": 0.19845101548580502, + "grad_norm": 607.218994140625, + "learning_rate": 7.5462692181536094e-06, + "loss": 34.1062, + "step": 49120 + }, + { + "epoch": 0.19849141675117266, + "grad_norm": 1311.45166015625, + "learning_rate": 7.545055331893195e-06, + "loss": 34.9294, + "step": 49130 + }, + { + "epoch": 0.19853181801654027, + "grad_norm": 1162.9915771484375, + "learning_rate": 7.543841243134409e-06, + "loss": 40.0276, + "step": 49140 + }, + { + "epoch": 0.1985722192819079, + "grad_norm": 785.1849975585938, + "learning_rate": 7.5426269519738495e-06, + "loss": 63.0314, + "step": 49150 + }, + { + "epoch": 0.19861262054727555, + "grad_norm": 686.400634765625, + "learning_rate": 7.541412458508133e-06, + "loss": 64.1595, + "step": 49160 + }, + { + "epoch": 0.19865302181264316, + "grad_norm": 589.6557006835938, + "learning_rate": 7.54019776283389e-06, + "loss": 48.8167, + "step": 49170 + }, + { + "epoch": 0.1986934230780108, + "grad_norm": 700.9690551757812, + "learning_rate": 7.53898286504777e-06, + "loss": 52.2654, + "step": 49180 + }, + { + "epoch": 0.19873382434337844, + "grad_norm": 361.7765197753906, + "learning_rate": 7.537767765246436e-06, + "loss": 28.0827, + "step": 49190 + }, + { + "epoch": 0.19877422560874605, + "grad_norm": 1053.419677734375, + "learning_rate": 7.536552463526565e-06, + "loss": 82.5294, + "step": 49200 + }, + { + "epoch": 0.1988146268741137, + "grad_norm": 401.47967529296875, + "learning_rate": 7.535336959984858e-06, + "loss": 40.8976, + "step": 49210 + }, + { + "epoch": 0.19885502813948133, + "grad_norm": 695.1689453125, + "learning_rate": 7.5341212547180246e-06, + "loss": 37.4906, + "step": 49220 + }, + { + "epoch": 0.19889542940484897, + "grad_norm": 522.3072509765625, + "learning_rate": 7.532905347822792e-06, + "loss": 56.5834, + "step": 49230 + }, + { + "epoch": 0.19893583067021658, + "grad_norm": 484.8719787597656, + "learning_rate": 7.5316892393959064e-06, + "loss": 83.0188, + "step": 49240 + }, + { + "epoch": 0.19897623193558422, + "grad_norm": 372.8064880371094, + "learning_rate": 7.530472929534126e-06, + "loss": 47.5193, + "step": 49250 + }, + { + "epoch": 0.19901663320095186, + "grad_norm": 275.8352355957031, + "learning_rate": 7.529256418334228e-06, + "loss": 48.3916, + "step": 49260 + }, + { + "epoch": 0.19905703446631948, + "grad_norm": 383.5005187988281, + "learning_rate": 7.528039705893006e-06, + "loss": 39.6332, + "step": 49270 + }, + { + "epoch": 0.19909743573168712, + "grad_norm": 810.511474609375, + "learning_rate": 7.5268227923072665e-06, + "loss": 42.8357, + "step": 49280 + }, + { + "epoch": 0.19913783699705476, + "grad_norm": 441.7340087890625, + "learning_rate": 7.525605677673831e-06, + "loss": 112.611, + "step": 49290 + }, + { + "epoch": 0.19917823826242237, + "grad_norm": 373.8868408203125, + "learning_rate": 7.524388362089545e-06, + "loss": 31.0369, + "step": 49300 + }, + { + "epoch": 0.19921863952779, + "grad_norm": 386.7220153808594, + "learning_rate": 7.523170845651263e-06, + "loss": 39.6656, + "step": 49310 + }, + { + "epoch": 0.19925904079315765, + "grad_norm": 604.7882690429688, + "learning_rate": 7.521953128455856e-06, + "loss": 37.7455, + "step": 49320 + }, + { + "epoch": 0.19929944205852526, + "grad_norm": 498.83953857421875, + "learning_rate": 7.520735210600213e-06, + "loss": 42.6832, + "step": 49330 + }, + { + "epoch": 0.1993398433238929, + "grad_norm": 430.9537353515625, + "learning_rate": 7.519517092181237e-06, + "loss": 56.7781, + "step": 49340 + }, + { + "epoch": 0.19938024458926054, + "grad_norm": 435.7265625, + "learning_rate": 7.518298773295849e-06, + "loss": 58.8235, + "step": 49350 + }, + { + "epoch": 0.19942064585462815, + "grad_norm": 358.5899353027344, + "learning_rate": 7.517080254040985e-06, + "loss": 37.2343, + "step": 49360 + }, + { + "epoch": 0.1994610471199958, + "grad_norm": 412.10986328125, + "learning_rate": 7.5158615345136e-06, + "loss": 29.2832, + "step": 49370 + }, + { + "epoch": 0.19950144838536343, + "grad_norm": 416.3394470214844, + "learning_rate": 7.514642614810655e-06, + "loss": 42.876, + "step": 49380 + }, + { + "epoch": 0.19954184965073107, + "grad_norm": 811.4172973632812, + "learning_rate": 7.51342349502914e-06, + "loss": 94.7614, + "step": 49390 + }, + { + "epoch": 0.19958225091609869, + "grad_norm": 792.0985717773438, + "learning_rate": 7.512204175266052e-06, + "loss": 45.6878, + "step": 49400 + }, + { + "epoch": 0.19962265218146633, + "grad_norm": 809.537353515625, + "learning_rate": 7.510984655618407e-06, + "loss": 37.4703, + "step": 49410 + }, + { + "epoch": 0.19966305344683397, + "grad_norm": 552.7601928710938, + "learning_rate": 7.509764936183237e-06, + "loss": 44.3597, + "step": 49420 + }, + { + "epoch": 0.19970345471220158, + "grad_norm": 576.9872436523438, + "learning_rate": 7.5085450170575876e-06, + "loss": 51.2273, + "step": 49430 + }, + { + "epoch": 0.19974385597756922, + "grad_norm": 422.26397705078125, + "learning_rate": 7.5073248983385265e-06, + "loss": 48.0467, + "step": 49440 + }, + { + "epoch": 0.19978425724293686, + "grad_norm": 221.23141479492188, + "learning_rate": 7.50610458012313e-06, + "loss": 38.9246, + "step": 49450 + }, + { + "epoch": 0.19982465850830447, + "grad_norm": 475.1904296875, + "learning_rate": 7.504884062508493e-06, + "loss": 47.4083, + "step": 49460 + }, + { + "epoch": 0.1998650597736721, + "grad_norm": 901.5618896484375, + "learning_rate": 7.503663345591726e-06, + "loss": 29.5857, + "step": 49470 + }, + { + "epoch": 0.19990546103903975, + "grad_norm": 417.44183349609375, + "learning_rate": 7.502442429469956e-06, + "loss": 38.4475, + "step": 49480 + }, + { + "epoch": 0.19994586230440736, + "grad_norm": 512.4078369140625, + "learning_rate": 7.501221314240329e-06, + "loss": 27.7243, + "step": 49490 + }, + { + "epoch": 0.199986263569775, + "grad_norm": 617.75390625, + "learning_rate": 7.500000000000001e-06, + "loss": 33.2255, + "step": 49500 + }, + { + "epoch": 0.20002666483514264, + "grad_norm": 1441.7674560546875, + "learning_rate": 7.4987784868461455e-06, + "loss": 67.9786, + "step": 49510 + }, + { + "epoch": 0.20006706610051025, + "grad_norm": 520.3792724609375, + "learning_rate": 7.497556774875953e-06, + "loss": 41.5277, + "step": 49520 + }, + { + "epoch": 0.2001074673658779, + "grad_norm": 852.6936645507812, + "learning_rate": 7.496334864186632e-06, + "loss": 34.2242, + "step": 49530 + }, + { + "epoch": 0.20014786863124553, + "grad_norm": 488.62506103515625, + "learning_rate": 7.4951127548754025e-06, + "loss": 52.5192, + "step": 49540 + }, + { + "epoch": 0.20018826989661317, + "grad_norm": 405.8353576660156, + "learning_rate": 7.4938904470395e-06, + "loss": 52.7841, + "step": 49550 + }, + { + "epoch": 0.2002286711619808, + "grad_norm": 660.2997436523438, + "learning_rate": 7.492667940776182e-06, + "loss": 64.3081, + "step": 49560 + }, + { + "epoch": 0.20026907242734843, + "grad_norm": 783.1737670898438, + "learning_rate": 7.491445236182715e-06, + "loss": 50.6159, + "step": 49570 + }, + { + "epoch": 0.20030947369271607, + "grad_norm": 392.0615234375, + "learning_rate": 7.490222333356384e-06, + "loss": 41.7744, + "step": 49580 + }, + { + "epoch": 0.20034987495808368, + "grad_norm": 486.9169616699219, + "learning_rate": 7.488999232394492e-06, + "loss": 37.5612, + "step": 49590 + }, + { + "epoch": 0.20039027622345132, + "grad_norm": 674.69775390625, + "learning_rate": 7.487775933394353e-06, + "loss": 58.0785, + "step": 49600 + }, + { + "epoch": 0.20043067748881896, + "grad_norm": 372.39263916015625, + "learning_rate": 7.4865524364533e-06, + "loss": 54.9935, + "step": 49610 + }, + { + "epoch": 0.20047107875418657, + "grad_norm": 583.2684936523438, + "learning_rate": 7.485328741668683e-06, + "loss": 52.8313, + "step": 49620 + }, + { + "epoch": 0.2005114800195542, + "grad_norm": 674.1826171875, + "learning_rate": 7.484104849137862e-06, + "loss": 45.282, + "step": 49630 + }, + { + "epoch": 0.20055188128492185, + "grad_norm": 478.70257568359375, + "learning_rate": 7.482880758958219e-06, + "loss": 40.7054, + "step": 49640 + }, + { + "epoch": 0.20059228255028946, + "grad_norm": 557.6510620117188, + "learning_rate": 7.48165647122715e-06, + "loss": 53.2588, + "step": 49650 + }, + { + "epoch": 0.2006326838156571, + "grad_norm": 708.6511840820312, + "learning_rate": 7.480431986042065e-06, + "loss": 48.5664, + "step": 49660 + }, + { + "epoch": 0.20067308508102474, + "grad_norm": 655.1844482421875, + "learning_rate": 7.47920730350039e-06, + "loss": 45.7343, + "step": 49670 + }, + { + "epoch": 0.20071348634639236, + "grad_norm": 1270.8260498046875, + "learning_rate": 7.477982423699568e-06, + "loss": 40.3143, + "step": 49680 + }, + { + "epoch": 0.20075388761176, + "grad_norm": 591.7367553710938, + "learning_rate": 7.476757346737057e-06, + "loss": 38.9331, + "step": 49690 + }, + { + "epoch": 0.20079428887712764, + "grad_norm": 591.572509765625, + "learning_rate": 7.47553207271033e-06, + "loss": 44.8786, + "step": 49700 + }, + { + "epoch": 0.20083469014249528, + "grad_norm": 402.4839172363281, + "learning_rate": 7.474306601716877e-06, + "loss": 51.7751, + "step": 49710 + }, + { + "epoch": 0.2008750914078629, + "grad_norm": 595.6873168945312, + "learning_rate": 7.473080933854205e-06, + "loss": 44.1173, + "step": 49720 + }, + { + "epoch": 0.20091549267323053, + "grad_norm": 1096.7359619140625, + "learning_rate": 7.471855069219831e-06, + "loss": 46.3273, + "step": 49730 + }, + { + "epoch": 0.20095589393859817, + "grad_norm": 590.0582275390625, + "learning_rate": 7.470629007911294e-06, + "loss": 54.4458, + "step": 49740 + }, + { + "epoch": 0.20099629520396578, + "grad_norm": 453.25115966796875, + "learning_rate": 7.469402750026147e-06, + "loss": 56.3232, + "step": 49750 + }, + { + "epoch": 0.20103669646933342, + "grad_norm": 566.2916870117188, + "learning_rate": 7.468176295661955e-06, + "loss": 55.3626, + "step": 49760 + }, + { + "epoch": 0.20107709773470106, + "grad_norm": 454.93035888671875, + "learning_rate": 7.466949644916301e-06, + "loss": 52.3334, + "step": 49770 + }, + { + "epoch": 0.20111749900006867, + "grad_norm": 637.5740356445312, + "learning_rate": 7.465722797886788e-06, + "loss": 51.3216, + "step": 49780 + }, + { + "epoch": 0.2011579002654363, + "grad_norm": 509.33929443359375, + "learning_rate": 7.464495754671027e-06, + "loss": 41.1044, + "step": 49790 + }, + { + "epoch": 0.20119830153080395, + "grad_norm": 600.6532592773438, + "learning_rate": 7.4632685153666505e-06, + "loss": 54.9956, + "step": 49800 + }, + { + "epoch": 0.20123870279617156, + "grad_norm": 800.8745727539062, + "learning_rate": 7.462041080071301e-06, + "loss": 78.0207, + "step": 49810 + }, + { + "epoch": 0.2012791040615392, + "grad_norm": 823.1187744140625, + "learning_rate": 7.460813448882643e-06, + "loss": 51.9847, + "step": 49820 + }, + { + "epoch": 0.20131950532690684, + "grad_norm": 647.9494018554688, + "learning_rate": 7.459585621898353e-06, + "loss": 35.7294, + "step": 49830 + }, + { + "epoch": 0.20135990659227446, + "grad_norm": 567.6367797851562, + "learning_rate": 7.4583575992161235e-06, + "loss": 35.5241, + "step": 49840 + }, + { + "epoch": 0.2014003078576421, + "grad_norm": 414.89678955078125, + "learning_rate": 7.457129380933662e-06, + "loss": 56.1163, + "step": 49850 + }, + { + "epoch": 0.20144070912300974, + "grad_norm": 270.87213134765625, + "learning_rate": 7.4559009671486906e-06, + "loss": 47.756, + "step": 49860 + }, + { + "epoch": 0.20148111038837738, + "grad_norm": 447.1685791015625, + "learning_rate": 7.454672357958951e-06, + "loss": 29.9517, + "step": 49870 + }, + { + "epoch": 0.201521511653745, + "grad_norm": 537.8594970703125, + "learning_rate": 7.453443553462198e-06, + "loss": 32.8862, + "step": 49880 + }, + { + "epoch": 0.20156191291911263, + "grad_norm": 505.7225036621094, + "learning_rate": 7.4522145537562015e-06, + "loss": 43.0821, + "step": 49890 + }, + { + "epoch": 0.20160231418448027, + "grad_norm": 372.20123291015625, + "learning_rate": 7.450985358938747e-06, + "loss": 36.8695, + "step": 49900 + }, + { + "epoch": 0.20164271544984788, + "grad_norm": 403.6573791503906, + "learning_rate": 7.449755969107635e-06, + "loss": 42.9503, + "step": 49910 + }, + { + "epoch": 0.20168311671521552, + "grad_norm": 328.0631408691406, + "learning_rate": 7.4485263843606835e-06, + "loss": 70.2635, + "step": 49920 + }, + { + "epoch": 0.20172351798058316, + "grad_norm": 356.9211120605469, + "learning_rate": 7.447296604795726e-06, + "loss": 40.2945, + "step": 49930 + }, + { + "epoch": 0.20176391924595077, + "grad_norm": 574.4561157226562, + "learning_rate": 7.4460666305106084e-06, + "loss": 40.958, + "step": 49940 + }, + { + "epoch": 0.2018043205113184, + "grad_norm": 480.0083923339844, + "learning_rate": 7.444836461603195e-06, + "loss": 66.8758, + "step": 49950 + }, + { + "epoch": 0.20184472177668605, + "grad_norm": 632.5971069335938, + "learning_rate": 7.443606098171363e-06, + "loss": 69.6456, + "step": 49960 + }, + { + "epoch": 0.20188512304205367, + "grad_norm": 394.3634033203125, + "learning_rate": 7.442375540313012e-06, + "loss": 42.7379, + "step": 49970 + }, + { + "epoch": 0.2019255243074213, + "grad_norm": 620.8236083984375, + "learning_rate": 7.441144788126045e-06, + "loss": 53.8149, + "step": 49980 + }, + { + "epoch": 0.20196592557278895, + "grad_norm": 620.8430786132812, + "learning_rate": 7.4399138417083925e-06, + "loss": 73.245, + "step": 49990 + }, + { + "epoch": 0.20200632683815656, + "grad_norm": 610.7430419921875, + "learning_rate": 7.438682701157993e-06, + "loss": 46.0496, + "step": 50000 + }, + { + "epoch": 0.2020467281035242, + "grad_norm": 587.0072021484375, + "learning_rate": 7.437451366572803e-06, + "loss": 49.8662, + "step": 50010 + }, + { + "epoch": 0.20208712936889184, + "grad_norm": 126.13793182373047, + "learning_rate": 7.436219838050793e-06, + "loss": 46.2343, + "step": 50020 + }, + { + "epoch": 0.20212753063425948, + "grad_norm": 680.1400146484375, + "learning_rate": 7.4349881156899525e-06, + "loss": 49.6154, + "step": 50030 + }, + { + "epoch": 0.2021679318996271, + "grad_norm": 800.9889526367188, + "learning_rate": 7.433756199588282e-06, + "loss": 63.2274, + "step": 50040 + }, + { + "epoch": 0.20220833316499473, + "grad_norm": 1124.2677001953125, + "learning_rate": 7.4325240898438e-06, + "loss": 52.991, + "step": 50050 + }, + { + "epoch": 0.20224873443036237, + "grad_norm": 466.38226318359375, + "learning_rate": 7.4312917865545406e-06, + "loss": 34.7683, + "step": 50060 + }, + { + "epoch": 0.20228913569572998, + "grad_norm": 294.4744567871094, + "learning_rate": 7.430059289818552e-06, + "loss": 38.1768, + "step": 50070 + }, + { + "epoch": 0.20232953696109762, + "grad_norm": 1064.6177978515625, + "learning_rate": 7.4288265997338985e-06, + "loss": 49.4151, + "step": 50080 + }, + { + "epoch": 0.20236993822646526, + "grad_norm": 502.5269775390625, + "learning_rate": 7.427593716398658e-06, + "loss": 42.8233, + "step": 50090 + }, + { + "epoch": 0.20241033949183287, + "grad_norm": 834.306884765625, + "learning_rate": 7.426360639910927e-06, + "loss": 46.4767, + "step": 50100 + }, + { + "epoch": 0.20245074075720051, + "grad_norm": 543.8235473632812, + "learning_rate": 7.425127370368815e-06, + "loss": 47.3622, + "step": 50110 + }, + { + "epoch": 0.20249114202256815, + "grad_norm": 519.2015380859375, + "learning_rate": 7.423893907870449e-06, + "loss": 65.9216, + "step": 50120 + }, + { + "epoch": 0.20253154328793577, + "grad_norm": 728.6759643554688, + "learning_rate": 7.422660252513969e-06, + "loss": 88.8377, + "step": 50130 + }, + { + "epoch": 0.2025719445533034, + "grad_norm": 733.7372436523438, + "learning_rate": 7.421426404397531e-06, + "loss": 52.0609, + "step": 50140 + }, + { + "epoch": 0.20261234581867105, + "grad_norm": 624.3734130859375, + "learning_rate": 7.420192363619305e-06, + "loss": 58.3511, + "step": 50150 + }, + { + "epoch": 0.20265274708403866, + "grad_norm": 634.50634765625, + "learning_rate": 7.418958130277483e-06, + "loss": 53.5587, + "step": 50160 + }, + { + "epoch": 0.2026931483494063, + "grad_norm": 273.2563781738281, + "learning_rate": 7.417723704470261e-06, + "loss": 44.3106, + "step": 50170 + }, + { + "epoch": 0.20273354961477394, + "grad_norm": 729.7420043945312, + "learning_rate": 7.4164890862958615e-06, + "loss": 38.8169, + "step": 50180 + }, + { + "epoch": 0.20277395088014158, + "grad_norm": 476.1905822753906, + "learning_rate": 7.415254275852515e-06, + "loss": 40.2199, + "step": 50190 + }, + { + "epoch": 0.2028143521455092, + "grad_norm": 646.1971435546875, + "learning_rate": 7.414019273238471e-06, + "loss": 39.1268, + "step": 50200 + }, + { + "epoch": 0.20285475341087683, + "grad_norm": 422.8984680175781, + "learning_rate": 7.4127840785519915e-06, + "loss": 40.8058, + "step": 50210 + }, + { + "epoch": 0.20289515467624447, + "grad_norm": 502.76092529296875, + "learning_rate": 7.411548691891357e-06, + "loss": 49.4966, + "step": 50220 + }, + { + "epoch": 0.20293555594161208, + "grad_norm": 832.3966674804688, + "learning_rate": 7.41031311335486e-06, + "loss": 85.8858, + "step": 50230 + }, + { + "epoch": 0.20297595720697972, + "grad_norm": 478.6716613769531, + "learning_rate": 7.409077343040809e-06, + "loss": 58.2753, + "step": 50240 + }, + { + "epoch": 0.20301635847234736, + "grad_norm": 610.2609252929688, + "learning_rate": 7.407841381047533e-06, + "loss": 53.1357, + "step": 50250 + }, + { + "epoch": 0.20305675973771498, + "grad_norm": 498.36273193359375, + "learning_rate": 7.406605227473367e-06, + "loss": 32.1867, + "step": 50260 + }, + { + "epoch": 0.20309716100308262, + "grad_norm": 374.0442810058594, + "learning_rate": 7.405368882416668e-06, + "loss": 48.0912, + "step": 50270 + }, + { + "epoch": 0.20313756226845026, + "grad_norm": 329.8494567871094, + "learning_rate": 7.404132345975806e-06, + "loss": 45.6471, + "step": 50280 + }, + { + "epoch": 0.20317796353381787, + "grad_norm": 340.72100830078125, + "learning_rate": 7.4028956182491665e-06, + "loss": 36.8876, + "step": 50290 + }, + { + "epoch": 0.2032183647991855, + "grad_norm": 695.551025390625, + "learning_rate": 7.401658699335151e-06, + "loss": 39.3887, + "step": 50300 + }, + { + "epoch": 0.20325876606455315, + "grad_norm": 879.5848999023438, + "learning_rate": 7.400421589332175e-06, + "loss": 46.7177, + "step": 50310 + }, + { + "epoch": 0.20329916732992076, + "grad_norm": 482.7911682128906, + "learning_rate": 7.39918428833867e-06, + "loss": 60.0693, + "step": 50320 + }, + { + "epoch": 0.2033395685952884, + "grad_norm": 692.8958129882812, + "learning_rate": 7.397946796453081e-06, + "loss": 52.0577, + "step": 50330 + }, + { + "epoch": 0.20337996986065604, + "grad_norm": 384.43829345703125, + "learning_rate": 7.39670911377387e-06, + "loss": 27.1583, + "step": 50340 + }, + { + "epoch": 0.20342037112602368, + "grad_norm": 122.59984588623047, + "learning_rate": 7.395471240399515e-06, + "loss": 50.0477, + "step": 50350 + }, + { + "epoch": 0.2034607723913913, + "grad_norm": 545.1742553710938, + "learning_rate": 7.394233176428508e-06, + "loss": 29.567, + "step": 50360 + }, + { + "epoch": 0.20350117365675893, + "grad_norm": 628.303955078125, + "learning_rate": 7.3929949219593545e-06, + "loss": 48.9455, + "step": 50370 + }, + { + "epoch": 0.20354157492212657, + "grad_norm": 540.7025756835938, + "learning_rate": 7.391756477090577e-06, + "loss": 43.8203, + "step": 50380 + }, + { + "epoch": 0.20358197618749418, + "grad_norm": 628.2692260742188, + "learning_rate": 7.3905178419207126e-06, + "loss": 34.8731, + "step": 50390 + }, + { + "epoch": 0.20362237745286182, + "grad_norm": 638.9990844726562, + "learning_rate": 7.3892790165483164e-06, + "loss": 36.1334, + "step": 50400 + }, + { + "epoch": 0.20366277871822946, + "grad_norm": 684.90380859375, + "learning_rate": 7.388040001071953e-06, + "loss": 53.3651, + "step": 50410 + }, + { + "epoch": 0.20370317998359708, + "grad_norm": 378.3131103515625, + "learning_rate": 7.386800795590208e-06, + "loss": 37.3771, + "step": 50420 + }, + { + "epoch": 0.20374358124896472, + "grad_norm": 987.3731079101562, + "learning_rate": 7.385561400201675e-06, + "loss": 36.6902, + "step": 50430 + }, + { + "epoch": 0.20378398251433236, + "grad_norm": 1104.762451171875, + "learning_rate": 7.384321815004971e-06, + "loss": 56.715, + "step": 50440 + }, + { + "epoch": 0.20382438377969997, + "grad_norm": 616.8389282226562, + "learning_rate": 7.383082040098723e-06, + "loss": 39.1386, + "step": 50450 + }, + { + "epoch": 0.2038647850450676, + "grad_norm": 618.7867431640625, + "learning_rate": 7.381842075581573e-06, + "loss": 40.5047, + "step": 50460 + }, + { + "epoch": 0.20390518631043525, + "grad_norm": 629.0042114257812, + "learning_rate": 7.380601921552181e-06, + "loss": 51.9558, + "step": 50470 + }, + { + "epoch": 0.20394558757580286, + "grad_norm": 1132.140869140625, + "learning_rate": 7.379361578109218e-06, + "loss": 62.1229, + "step": 50480 + }, + { + "epoch": 0.2039859888411705, + "grad_norm": 673.3324584960938, + "learning_rate": 7.378121045351378e-06, + "loss": 44.218, + "step": 50490 + }, + { + "epoch": 0.20402639010653814, + "grad_norm": 583.1757202148438, + "learning_rate": 7.376880323377357e-06, + "loss": 41.8356, + "step": 50500 + }, + { + "epoch": 0.20406679137190578, + "grad_norm": 732.2437744140625, + "learning_rate": 7.375639412285877e-06, + "loss": 41.8455, + "step": 50510 + }, + { + "epoch": 0.2041071926372734, + "grad_norm": 180.8105926513672, + "learning_rate": 7.374398312175674e-06, + "loss": 30.912, + "step": 50520 + }, + { + "epoch": 0.20414759390264103, + "grad_norm": 564.68603515625, + "learning_rate": 7.373157023145493e-06, + "loss": 50.1993, + "step": 50530 + }, + { + "epoch": 0.20418799516800867, + "grad_norm": 738.6881713867188, + "learning_rate": 7.371915545294098e-06, + "loss": 47.1647, + "step": 50540 + }, + { + "epoch": 0.20422839643337629, + "grad_norm": 524.3673706054688, + "learning_rate": 7.37067387872027e-06, + "loss": 62.6645, + "step": 50550 + }, + { + "epoch": 0.20426879769874393, + "grad_norm": 527.126953125, + "learning_rate": 7.369432023522801e-06, + "loss": 43.1103, + "step": 50560 + }, + { + "epoch": 0.20430919896411157, + "grad_norm": 677.6494140625, + "learning_rate": 7.3681899798005006e-06, + "loss": 43.2993, + "step": 50570 + }, + { + "epoch": 0.20434960022947918, + "grad_norm": 464.6173095703125, + "learning_rate": 7.366947747652191e-06, + "loss": 33.6024, + "step": 50580 + }, + { + "epoch": 0.20439000149484682, + "grad_norm": 548.3930053710938, + "learning_rate": 7.365705327176713e-06, + "loss": 36.8351, + "step": 50590 + }, + { + "epoch": 0.20443040276021446, + "grad_norm": 853.4623413085938, + "learning_rate": 7.364462718472919e-06, + "loss": 44.7392, + "step": 50600 + }, + { + "epoch": 0.20447080402558207, + "grad_norm": 466.6961364746094, + "learning_rate": 7.363219921639677e-06, + "loss": 43.5595, + "step": 50610 + }, + { + "epoch": 0.2045112052909497, + "grad_norm": 765.1495971679688, + "learning_rate": 7.361976936775872e-06, + "loss": 47.2941, + "step": 50620 + }, + { + "epoch": 0.20455160655631735, + "grad_norm": 338.2070007324219, + "learning_rate": 7.360733763980404e-06, + "loss": 40.3301, + "step": 50630 + }, + { + "epoch": 0.20459200782168496, + "grad_norm": 421.01458740234375, + "learning_rate": 7.3594904033521815e-06, + "loss": 54.3122, + "step": 50640 + }, + { + "epoch": 0.2046324090870526, + "grad_norm": 319.7207946777344, + "learning_rate": 7.358246854990138e-06, + "loss": 50.9342, + "step": 50650 + }, + { + "epoch": 0.20467281035242024, + "grad_norm": 311.44586181640625, + "learning_rate": 7.357003118993215e-06, + "loss": 34.9208, + "step": 50660 + }, + { + "epoch": 0.20471321161778788, + "grad_norm": 540.4470825195312, + "learning_rate": 7.355759195460371e-06, + "loss": 41.9387, + "step": 50670 + }, + { + "epoch": 0.2047536128831555, + "grad_norm": 697.3502807617188, + "learning_rate": 7.354515084490579e-06, + "loss": 33.2285, + "step": 50680 + }, + { + "epoch": 0.20479401414852313, + "grad_norm": 971.903076171875, + "learning_rate": 7.353270786182828e-06, + "loss": 69.069, + "step": 50690 + }, + { + "epoch": 0.20483441541389077, + "grad_norm": 821.0972900390625, + "learning_rate": 7.352026300636121e-06, + "loss": 66.8979, + "step": 50700 + }, + { + "epoch": 0.2048748166792584, + "grad_norm": 438.63519287109375, + "learning_rate": 7.350781627949475e-06, + "loss": 43.2084, + "step": 50710 + }, + { + "epoch": 0.20491521794462603, + "grad_norm": 492.3321533203125, + "learning_rate": 7.3495367682219236e-06, + "loss": 56.6399, + "step": 50720 + }, + { + "epoch": 0.20495561920999367, + "grad_norm": 241.5388946533203, + "learning_rate": 7.348291721552514e-06, + "loss": 31.3594, + "step": 50730 + }, + { + "epoch": 0.20499602047536128, + "grad_norm": 635.99609375, + "learning_rate": 7.3470464880403105e-06, + "loss": 36.4675, + "step": 50740 + }, + { + "epoch": 0.20503642174072892, + "grad_norm": 885.6656494140625, + "learning_rate": 7.345801067784388e-06, + "loss": 44.9877, + "step": 50750 + }, + { + "epoch": 0.20507682300609656, + "grad_norm": 323.6302185058594, + "learning_rate": 7.34455546088384e-06, + "loss": 42.1206, + "step": 50760 + }, + { + "epoch": 0.20511722427146417, + "grad_norm": 579.3381958007812, + "learning_rate": 7.343309667437775e-06, + "loss": 37.2963, + "step": 50770 + }, + { + "epoch": 0.2051576255368318, + "grad_norm": 773.6015014648438, + "learning_rate": 7.3420636875453135e-06, + "loss": 65.9994, + "step": 50780 + }, + { + "epoch": 0.20519802680219945, + "grad_norm": 685.1111450195312, + "learning_rate": 7.340817521305595e-06, + "loss": 51.553, + "step": 50790 + }, + { + "epoch": 0.20523842806756706, + "grad_norm": 333.6510009765625, + "learning_rate": 7.3395711688177676e-06, + "loss": 39.827, + "step": 50800 + }, + { + "epoch": 0.2052788293329347, + "grad_norm": 543.8275756835938, + "learning_rate": 7.3383246301809985e-06, + "loss": 52.7167, + "step": 50810 + }, + { + "epoch": 0.20531923059830234, + "grad_norm": 568.8641967773438, + "learning_rate": 7.337077905494472e-06, + "loss": 31.9992, + "step": 50820 + }, + { + "epoch": 0.20535963186366998, + "grad_norm": 643.5866088867188, + "learning_rate": 7.335830994857382e-06, + "loss": 38.0165, + "step": 50830 + }, + { + "epoch": 0.2054000331290376, + "grad_norm": 956.9198608398438, + "learning_rate": 7.334583898368939e-06, + "loss": 57.5262, + "step": 50840 + }, + { + "epoch": 0.20544043439440524, + "grad_norm": 383.0577697753906, + "learning_rate": 7.333336616128369e-06, + "loss": 52.8704, + "step": 50850 + }, + { + "epoch": 0.20548083565977288, + "grad_norm": 554.4602661132812, + "learning_rate": 7.332089148234913e-06, + "loss": 61.3882, + "step": 50860 + }, + { + "epoch": 0.2055212369251405, + "grad_norm": 623.8341064453125, + "learning_rate": 7.330841494787828e-06, + "loss": 51.7853, + "step": 50870 + }, + { + "epoch": 0.20556163819050813, + "grad_norm": 1747.639404296875, + "learning_rate": 7.329593655886382e-06, + "loss": 58.8053, + "step": 50880 + }, + { + "epoch": 0.20560203945587577, + "grad_norm": 613.093505859375, + "learning_rate": 7.3283456316298595e-06, + "loss": 45.6349, + "step": 50890 + }, + { + "epoch": 0.20564244072124338, + "grad_norm": 570.6395263671875, + "learning_rate": 7.32709742211756e-06, + "loss": 44.1308, + "step": 50900 + }, + { + "epoch": 0.20568284198661102, + "grad_norm": 717.2446899414062, + "learning_rate": 7.325849027448799e-06, + "loss": 54.2538, + "step": 50910 + }, + { + "epoch": 0.20572324325197866, + "grad_norm": 314.36724853515625, + "learning_rate": 7.324600447722907e-06, + "loss": 48.2092, + "step": 50920 + }, + { + "epoch": 0.20576364451734627, + "grad_norm": 372.06494140625, + "learning_rate": 7.323351683039224e-06, + "loss": 96.2132, + "step": 50930 + }, + { + "epoch": 0.2058040457827139, + "grad_norm": 483.8934326171875, + "learning_rate": 7.32210273349711e-06, + "loss": 47.4042, + "step": 50940 + }, + { + "epoch": 0.20584444704808155, + "grad_norm": 411.3906555175781, + "learning_rate": 7.32085359919594e-06, + "loss": 50.984, + "step": 50950 + }, + { + "epoch": 0.20588484831344916, + "grad_norm": 892.8008422851562, + "learning_rate": 7.3196042802350995e-06, + "loss": 59.2704, + "step": 50960 + }, + { + "epoch": 0.2059252495788168, + "grad_norm": 605.1826171875, + "learning_rate": 7.3183547767139916e-06, + "loss": 42.0492, + "step": 50970 + }, + { + "epoch": 0.20596565084418444, + "grad_norm": 1213.0029296875, + "learning_rate": 7.317105088732035e-06, + "loss": 38.7694, + "step": 50980 + }, + { + "epoch": 0.20600605210955208, + "grad_norm": 384.1051330566406, + "learning_rate": 7.31585521638866e-06, + "loss": 41.8644, + "step": 50990 + }, + { + "epoch": 0.2060464533749197, + "grad_norm": 767.3082885742188, + "learning_rate": 7.314605159783313e-06, + "loss": 47.898, + "step": 51000 + }, + { + "epoch": 0.20608685464028734, + "grad_norm": 593.4794921875, + "learning_rate": 7.313354919015457e-06, + "loss": 50.6159, + "step": 51010 + }, + { + "epoch": 0.20612725590565498, + "grad_norm": 833.2760009765625, + "learning_rate": 7.312104494184566e-06, + "loss": 42.8232, + "step": 51020 + }, + { + "epoch": 0.2061676571710226, + "grad_norm": 488.1883544921875, + "learning_rate": 7.310853885390133e-06, + "loss": 44.1013, + "step": 51030 + }, + { + "epoch": 0.20620805843639023, + "grad_norm": 771.5651245117188, + "learning_rate": 7.309603092731661e-06, + "loss": 33.7422, + "step": 51040 + }, + { + "epoch": 0.20624845970175787, + "grad_norm": 1035.781982421875, + "learning_rate": 7.30835211630867e-06, + "loss": 58.5417, + "step": 51050 + }, + { + "epoch": 0.20628886096712548, + "grad_norm": 504.0133056640625, + "learning_rate": 7.3071009562206965e-06, + "loss": 50.3386, + "step": 51060 + }, + { + "epoch": 0.20632926223249312, + "grad_norm": 500.3634338378906, + "learning_rate": 7.305849612567287e-06, + "loss": 47.1454, + "step": 51070 + }, + { + "epoch": 0.20636966349786076, + "grad_norm": 419.568603515625, + "learning_rate": 7.304598085448007e-06, + "loss": 39.7767, + "step": 51080 + }, + { + "epoch": 0.20641006476322837, + "grad_norm": 741.7049560546875, + "learning_rate": 7.303346374962433e-06, + "loss": 85.6226, + "step": 51090 + }, + { + "epoch": 0.206450466028596, + "grad_norm": 868.6483154296875, + "learning_rate": 7.302094481210159e-06, + "loss": 71.4091, + "step": 51100 + }, + { + "epoch": 0.20649086729396365, + "grad_norm": 810.5807495117188, + "learning_rate": 7.300842404290792e-06, + "loss": 33.1312, + "step": 51110 + }, + { + "epoch": 0.20653126855933127, + "grad_norm": 339.3719177246094, + "learning_rate": 7.2995901443039554e-06, + "loss": 48.107, + "step": 51120 + }, + { + "epoch": 0.2065716698246989, + "grad_norm": 1150.4307861328125, + "learning_rate": 7.298337701349285e-06, + "loss": 53.5217, + "step": 51130 + }, + { + "epoch": 0.20661207109006655, + "grad_norm": 590.6275634765625, + "learning_rate": 7.29708507552643e-06, + "loss": 59.1666, + "step": 51140 + }, + { + "epoch": 0.20665247235543419, + "grad_norm": 721.5498046875, + "learning_rate": 7.295832266935059e-06, + "loss": 43.1562, + "step": 51150 + }, + { + "epoch": 0.2066928736208018, + "grad_norm": 483.1606750488281, + "learning_rate": 7.2945792756748505e-06, + "loss": 41.8515, + "step": 51160 + }, + { + "epoch": 0.20673327488616944, + "grad_norm": 531.7870483398438, + "learning_rate": 7.2933261018455005e-06, + "loss": 46.7922, + "step": 51170 + }, + { + "epoch": 0.20677367615153708, + "grad_norm": 763.1092529296875, + "learning_rate": 7.292072745546716e-06, + "loss": 63.6227, + "step": 51180 + }, + { + "epoch": 0.2068140774169047, + "grad_norm": 611.9227294921875, + "learning_rate": 7.290819206878223e-06, + "loss": 41.897, + "step": 51190 + }, + { + "epoch": 0.20685447868227233, + "grad_norm": 729.5850830078125, + "learning_rate": 7.289565485939759e-06, + "loss": 55.8601, + "step": 51200 + }, + { + "epoch": 0.20689487994763997, + "grad_norm": 318.1732482910156, + "learning_rate": 7.288311582831078e-06, + "loss": 50.0635, + "step": 51210 + }, + { + "epoch": 0.20693528121300758, + "grad_norm": 309.36285400390625, + "learning_rate": 7.2870574976519455e-06, + "loss": 47.5563, + "step": 51220 + }, + { + "epoch": 0.20697568247837522, + "grad_norm": 418.387451171875, + "learning_rate": 7.2858032305021455e-06, + "loss": 73.4059, + "step": 51230 + }, + { + "epoch": 0.20701608374374286, + "grad_norm": 468.94757080078125, + "learning_rate": 7.28454878148147e-06, + "loss": 48.932, + "step": 51240 + }, + { + "epoch": 0.20705648500911047, + "grad_norm": 493.0873718261719, + "learning_rate": 7.283294150689735e-06, + "loss": 46.617, + "step": 51250 + }, + { + "epoch": 0.20709688627447811, + "grad_norm": 480.55999755859375, + "learning_rate": 7.282039338226763e-06, + "loss": 40.8503, + "step": 51260 + }, + { + "epoch": 0.20713728753984575, + "grad_norm": 458.3509521484375, + "learning_rate": 7.280784344192393e-06, + "loss": 51.6308, + "step": 51270 + }, + { + "epoch": 0.20717768880521337, + "grad_norm": 463.3767395019531, + "learning_rate": 7.279529168686481e-06, + "loss": 46.1649, + "step": 51280 + }, + { + "epoch": 0.207218090070581, + "grad_norm": 568.6404418945312, + "learning_rate": 7.278273811808894e-06, + "loss": 33.446, + "step": 51290 + }, + { + "epoch": 0.20725849133594865, + "grad_norm": 465.77655029296875, + "learning_rate": 7.2770182736595164e-06, + "loss": 53.7961, + "step": 51300 + }, + { + "epoch": 0.2072988926013163, + "grad_norm": 362.6551513671875, + "learning_rate": 7.275762554338244e-06, + "loss": 37.3343, + "step": 51310 + }, + { + "epoch": 0.2073392938666839, + "grad_norm": 717.495849609375, + "learning_rate": 7.2745066539449905e-06, + "loss": 47.863, + "step": 51320 + }, + { + "epoch": 0.20737969513205154, + "grad_norm": 368.6878662109375, + "learning_rate": 7.27325057257968e-06, + "loss": 40.8304, + "step": 51330 + }, + { + "epoch": 0.20742009639741918, + "grad_norm": 560.428955078125, + "learning_rate": 7.271994310342254e-06, + "loss": 50.8628, + "step": 51340 + }, + { + "epoch": 0.2074604976627868, + "grad_norm": 770.505615234375, + "learning_rate": 7.270737867332669e-06, + "loss": 37.3534, + "step": 51350 + }, + { + "epoch": 0.20750089892815443, + "grad_norm": 920.7933959960938, + "learning_rate": 7.2694812436508934e-06, + "loss": 41.0482, + "step": 51360 + }, + { + "epoch": 0.20754130019352207, + "grad_norm": 788.7203369140625, + "learning_rate": 7.268224439396909e-06, + "loss": 48.9918, + "step": 51370 + }, + { + "epoch": 0.20758170145888968, + "grad_norm": 1051.6204833984375, + "learning_rate": 7.266967454670717e-06, + "loss": 42.9526, + "step": 51380 + }, + { + "epoch": 0.20762210272425732, + "grad_norm": 474.6462707519531, + "learning_rate": 7.265710289572328e-06, + "loss": 50.7116, + "step": 51390 + }, + { + "epoch": 0.20766250398962496, + "grad_norm": 414.03656005859375, + "learning_rate": 7.264452944201771e-06, + "loss": 33.1895, + "step": 51400 + }, + { + "epoch": 0.20770290525499258, + "grad_norm": 550.8810424804688, + "learning_rate": 7.263195418659083e-06, + "loss": 68.3899, + "step": 51410 + }, + { + "epoch": 0.20774330652036022, + "grad_norm": 681.5443725585938, + "learning_rate": 7.261937713044325e-06, + "loss": 54.9852, + "step": 51420 + }, + { + "epoch": 0.20778370778572786, + "grad_norm": 647.8605346679688, + "learning_rate": 7.260679827457562e-06, + "loss": 48.1843, + "step": 51430 + }, + { + "epoch": 0.20782410905109547, + "grad_norm": 504.14276123046875, + "learning_rate": 7.259421761998881e-06, + "loss": 33.8833, + "step": 51440 + }, + { + "epoch": 0.2078645103164631, + "grad_norm": 750.7991943359375, + "learning_rate": 7.2581635167683805e-06, + "loss": 38.5598, + "step": 51450 + }, + { + "epoch": 0.20790491158183075, + "grad_norm": 728.8818359375, + "learning_rate": 7.256905091866171e-06, + "loss": 40.8459, + "step": 51460 + }, + { + "epoch": 0.2079453128471984, + "grad_norm": 1682.726318359375, + "learning_rate": 7.255646487392382e-06, + "loss": 72.4861, + "step": 51470 + }, + { + "epoch": 0.207985714112566, + "grad_norm": 847.2395629882812, + "learning_rate": 7.254387703447154e-06, + "loss": 41.9428, + "step": 51480 + }, + { + "epoch": 0.20802611537793364, + "grad_norm": 511.3044738769531, + "learning_rate": 7.2531287401306435e-06, + "loss": 60.2857, + "step": 51490 + }, + { + "epoch": 0.20806651664330128, + "grad_norm": 354.39312744140625, + "learning_rate": 7.251869597543019e-06, + "loss": 34.2236, + "step": 51500 + }, + { + "epoch": 0.2081069179086689, + "grad_norm": 611.3307495117188, + "learning_rate": 7.250610275784464e-06, + "loss": 45.4532, + "step": 51510 + }, + { + "epoch": 0.20814731917403653, + "grad_norm": 648.39404296875, + "learning_rate": 7.2493507749551795e-06, + "loss": 49.6291, + "step": 51520 + }, + { + "epoch": 0.20818772043940417, + "grad_norm": 706.2927856445312, + "learning_rate": 7.248091095155378e-06, + "loss": 64.6364, + "step": 51530 + }, + { + "epoch": 0.20822812170477178, + "grad_norm": 389.3920593261719, + "learning_rate": 7.246831236485283e-06, + "loss": 25.9292, + "step": 51540 + }, + { + "epoch": 0.20826852297013942, + "grad_norm": 664.6102294921875, + "learning_rate": 7.245571199045139e-06, + "loss": 45.7861, + "step": 51550 + }, + { + "epoch": 0.20830892423550706, + "grad_norm": 500.1164855957031, + "learning_rate": 7.244310982935202e-06, + "loss": 30.2852, + "step": 51560 + }, + { + "epoch": 0.20834932550087468, + "grad_norm": 460.7940368652344, + "learning_rate": 7.243050588255738e-06, + "loss": 42.1839, + "step": 51570 + }, + { + "epoch": 0.20838972676624232, + "grad_norm": 703.8712768554688, + "learning_rate": 7.241790015107034e-06, + "loss": 65.8007, + "step": 51580 + }, + { + "epoch": 0.20843012803160996, + "grad_norm": 368.80810546875, + "learning_rate": 7.240529263589386e-06, + "loss": 39.5731, + "step": 51590 + }, + { + "epoch": 0.20847052929697757, + "grad_norm": 783.1382446289062, + "learning_rate": 7.239268333803109e-06, + "loss": 37.1502, + "step": 51600 + }, + { + "epoch": 0.2085109305623452, + "grad_norm": 409.4545593261719, + "learning_rate": 7.2380072258485265e-06, + "loss": 55.8675, + "step": 51610 + }, + { + "epoch": 0.20855133182771285, + "grad_norm": 508.4222106933594, + "learning_rate": 7.2367459398259795e-06, + "loss": 33.9861, + "step": 51620 + }, + { + "epoch": 0.20859173309308046, + "grad_norm": 473.3620300292969, + "learning_rate": 7.2354844758358234e-06, + "loss": 52.3211, + "step": 51630 + }, + { + "epoch": 0.2086321343584481, + "grad_norm": 787.4571533203125, + "learning_rate": 7.234222833978427e-06, + "loss": 29.9313, + "step": 51640 + }, + { + "epoch": 0.20867253562381574, + "grad_norm": 279.2911071777344, + "learning_rate": 7.232961014354175e-06, + "loss": 37.1078, + "step": 51650 + }, + { + "epoch": 0.20871293688918338, + "grad_norm": 603.6970825195312, + "learning_rate": 7.23169901706346e-06, + "loss": 43.0854, + "step": 51660 + }, + { + "epoch": 0.208753338154551, + "grad_norm": 899.671875, + "learning_rate": 7.2304368422067e-06, + "loss": 48.4667, + "step": 51670 + }, + { + "epoch": 0.20879373941991863, + "grad_norm": 795.8191528320312, + "learning_rate": 7.2291744898843145e-06, + "loss": 27.7021, + "step": 51680 + }, + { + "epoch": 0.20883414068528627, + "grad_norm": 940.0831909179688, + "learning_rate": 7.227911960196746e-06, + "loss": 59.3487, + "step": 51690 + }, + { + "epoch": 0.20887454195065389, + "grad_norm": 430.9837341308594, + "learning_rate": 7.226649253244448e-06, + "loss": 39.4674, + "step": 51700 + }, + { + "epoch": 0.20891494321602153, + "grad_norm": 714.0769653320312, + "learning_rate": 7.225386369127886e-06, + "loss": 39.4765, + "step": 51710 + }, + { + "epoch": 0.20895534448138917, + "grad_norm": 369.2260437011719, + "learning_rate": 7.224123307947545e-06, + "loss": 37.7946, + "step": 51720 + }, + { + "epoch": 0.20899574574675678, + "grad_norm": 393.2871398925781, + "learning_rate": 7.2228600698039205e-06, + "loss": 47.0149, + "step": 51730 + }, + { + "epoch": 0.20903614701212442, + "grad_norm": 284.41650390625, + "learning_rate": 7.221596654797522e-06, + "loss": 48.9927, + "step": 51740 + }, + { + "epoch": 0.20907654827749206, + "grad_norm": 800.0281982421875, + "learning_rate": 7.2203330630288714e-06, + "loss": 64.5519, + "step": 51750 + }, + { + "epoch": 0.20911694954285967, + "grad_norm": 445.57330322265625, + "learning_rate": 7.21906929459851e-06, + "loss": 42.594, + "step": 51760 + }, + { + "epoch": 0.2091573508082273, + "grad_norm": 471.7644348144531, + "learning_rate": 7.217805349606988e-06, + "loss": 32.7521, + "step": 51770 + }, + { + "epoch": 0.20919775207359495, + "grad_norm": 541.0687866210938, + "learning_rate": 7.216541228154875e-06, + "loss": 111.9155, + "step": 51780 + }, + { + "epoch": 0.20923815333896256, + "grad_norm": 513.6289672851562, + "learning_rate": 7.215276930342747e-06, + "loss": 29.2734, + "step": 51790 + }, + { + "epoch": 0.2092785546043302, + "grad_norm": 402.09423828125, + "learning_rate": 7.214012456271202e-06, + "loss": 37.2545, + "step": 51800 + }, + { + "epoch": 0.20931895586969784, + "grad_norm": 878.948974609375, + "learning_rate": 7.212747806040845e-06, + "loss": 49.578, + "step": 51810 + }, + { + "epoch": 0.20935935713506548, + "grad_norm": 462.6136474609375, + "learning_rate": 7.211482979752302e-06, + "loss": 37.1633, + "step": 51820 + }, + { + "epoch": 0.2093997584004331, + "grad_norm": 1256.9906005859375, + "learning_rate": 7.210217977506207e-06, + "loss": 46.0968, + "step": 51830 + }, + { + "epoch": 0.20944015966580073, + "grad_norm": 496.5869445800781, + "learning_rate": 7.208952799403211e-06, + "loss": 48.2958, + "step": 51840 + }, + { + "epoch": 0.20948056093116837, + "grad_norm": 795.1804809570312, + "learning_rate": 7.207687445543977e-06, + "loss": 52.5797, + "step": 51850 + }, + { + "epoch": 0.209520962196536, + "grad_norm": 694.8826904296875, + "learning_rate": 7.206421916029187e-06, + "loss": 47.4546, + "step": 51860 + }, + { + "epoch": 0.20956136346190363, + "grad_norm": 905.2157592773438, + "learning_rate": 7.205156210959529e-06, + "loss": 40.945, + "step": 51870 + }, + { + "epoch": 0.20960176472727127, + "grad_norm": 572.3497924804688, + "learning_rate": 7.203890330435715e-06, + "loss": 57.2266, + "step": 51880 + }, + { + "epoch": 0.20964216599263888, + "grad_norm": 538.5952758789062, + "learning_rate": 7.202624274558458e-06, + "loss": 37.1721, + "step": 51890 + }, + { + "epoch": 0.20968256725800652, + "grad_norm": 330.7435607910156, + "learning_rate": 7.201358043428499e-06, + "loss": 32.4022, + "step": 51900 + }, + { + "epoch": 0.20972296852337416, + "grad_norm": 847.5732421875, + "learning_rate": 7.200091637146582e-06, + "loss": 60.8524, + "step": 51910 + }, + { + "epoch": 0.20976336978874177, + "grad_norm": 599.7764892578125, + "learning_rate": 7.198825055813471e-06, + "loss": 68.1129, + "step": 51920 + }, + { + "epoch": 0.2098037710541094, + "grad_norm": 442.0505065917969, + "learning_rate": 7.197558299529941e-06, + "loss": 42.2968, + "step": 51930 + }, + { + "epoch": 0.20984417231947705, + "grad_norm": 387.84210205078125, + "learning_rate": 7.196291368396784e-06, + "loss": 43.1034, + "step": 51940 + }, + { + "epoch": 0.20988457358484466, + "grad_norm": 896.5294189453125, + "learning_rate": 7.1950242625148e-06, + "loss": 55.5236, + "step": 51950 + }, + { + "epoch": 0.2099249748502123, + "grad_norm": 604.0323486328125, + "learning_rate": 7.1937569819848115e-06, + "loss": 40.1479, + "step": 51960 + }, + { + "epoch": 0.20996537611557994, + "grad_norm": 284.2196350097656, + "learning_rate": 7.192489526907646e-06, + "loss": 32.0391, + "step": 51970 + }, + { + "epoch": 0.21000577738094758, + "grad_norm": 549.9973754882812, + "learning_rate": 7.191221897384153e-06, + "loss": 37.4856, + "step": 51980 + }, + { + "epoch": 0.2100461786463152, + "grad_norm": 819.0157470703125, + "learning_rate": 7.189954093515189e-06, + "loss": 45.887, + "step": 51990 + }, + { + "epoch": 0.21008657991168284, + "grad_norm": 814.2695922851562, + "learning_rate": 7.188686115401628e-06, + "loss": 42.8499, + "step": 52000 + }, + { + "epoch": 0.21012698117705048, + "grad_norm": 803.1535034179688, + "learning_rate": 7.187417963144358e-06, + "loss": 40.0624, + "step": 52010 + }, + { + "epoch": 0.2101673824424181, + "grad_norm": 545.9307861328125, + "learning_rate": 7.18614963684428e-06, + "loss": 46.7121, + "step": 52020 + }, + { + "epoch": 0.21020778370778573, + "grad_norm": 747.6693115234375, + "learning_rate": 7.184881136602309e-06, + "loss": 38.1845, + "step": 52030 + }, + { + "epoch": 0.21024818497315337, + "grad_norm": 607.844970703125, + "learning_rate": 7.183612462519371e-06, + "loss": 48.4949, + "step": 52040 + }, + { + "epoch": 0.21028858623852098, + "grad_norm": 536.132568359375, + "learning_rate": 7.182343614696412e-06, + "loss": 39.4975, + "step": 52050 + }, + { + "epoch": 0.21032898750388862, + "grad_norm": 684.9125366210938, + "learning_rate": 7.181074593234387e-06, + "loss": 40.7165, + "step": 52060 + }, + { + "epoch": 0.21036938876925626, + "grad_norm": 539.2525024414062, + "learning_rate": 7.179805398234266e-06, + "loss": 42.7935, + "step": 52070 + }, + { + "epoch": 0.21040979003462387, + "grad_norm": 431.33050537109375, + "learning_rate": 7.178536029797035e-06, + "loss": 41.8031, + "step": 52080 + }, + { + "epoch": 0.2104501912999915, + "grad_norm": 742.8863525390625, + "learning_rate": 7.177266488023688e-06, + "loss": 49.8048, + "step": 52090 + }, + { + "epoch": 0.21049059256535915, + "grad_norm": 1108.1619873046875, + "learning_rate": 7.17599677301524e-06, + "loss": 64.1411, + "step": 52100 + }, + { + "epoch": 0.21053099383072676, + "grad_norm": 691.4251708984375, + "learning_rate": 7.174726884872716e-06, + "loss": 35.8404, + "step": 52110 + }, + { + "epoch": 0.2105713950960944, + "grad_norm": 752.3907470703125, + "learning_rate": 7.173456823697154e-06, + "loss": 43.8726, + "step": 52120 + }, + { + "epoch": 0.21061179636146204, + "grad_norm": 594.6886596679688, + "learning_rate": 7.172186589589607e-06, + "loss": 89.6082, + "step": 52130 + }, + { + "epoch": 0.21065219762682968, + "grad_norm": 550.98193359375, + "learning_rate": 7.170916182651141e-06, + "loss": 31.0973, + "step": 52140 + }, + { + "epoch": 0.2106925988921973, + "grad_norm": 527.2732543945312, + "learning_rate": 7.1696456029828386e-06, + "loss": 55.1561, + "step": 52150 + }, + { + "epoch": 0.21073300015756494, + "grad_norm": 1399.20166015625, + "learning_rate": 7.168374850685794e-06, + "loss": 52.6242, + "step": 52160 + }, + { + "epoch": 0.21077340142293258, + "grad_norm": 689.70751953125, + "learning_rate": 7.167103925861113e-06, + "loss": 67.5702, + "step": 52170 + }, + { + "epoch": 0.2108138026883002, + "grad_norm": 3195.75244140625, + "learning_rate": 7.165832828609918e-06, + "loss": 45.5313, + "step": 52180 + }, + { + "epoch": 0.21085420395366783, + "grad_norm": 767.225830078125, + "learning_rate": 7.164561559033344e-06, + "loss": 67.4989, + "step": 52190 + }, + { + "epoch": 0.21089460521903547, + "grad_norm": 933.3297119140625, + "learning_rate": 7.163290117232542e-06, + "loss": 40.6139, + "step": 52200 + }, + { + "epoch": 0.21093500648440308, + "grad_norm": 416.0960388183594, + "learning_rate": 7.162018503308674e-06, + "loss": 61.1358, + "step": 52210 + }, + { + "epoch": 0.21097540774977072, + "grad_norm": 527.6151733398438, + "learning_rate": 7.1607467173629145e-06, + "loss": 49.4484, + "step": 52220 + }, + { + "epoch": 0.21101580901513836, + "grad_norm": 259.9767761230469, + "learning_rate": 7.1594747594964564e-06, + "loss": 36.1799, + "step": 52230 + }, + { + "epoch": 0.21105621028050597, + "grad_norm": 416.2400207519531, + "learning_rate": 7.1582026298105e-06, + "loss": 40.1228, + "step": 52240 + }, + { + "epoch": 0.2110966115458736, + "grad_norm": 469.9295349121094, + "learning_rate": 7.156930328406268e-06, + "loss": 43.5409, + "step": 52250 + }, + { + "epoch": 0.21113701281124125, + "grad_norm": 446.4311828613281, + "learning_rate": 7.1556578553849875e-06, + "loss": 88.1864, + "step": 52260 + }, + { + "epoch": 0.21117741407660887, + "grad_norm": 576.50732421875, + "learning_rate": 7.154385210847905e-06, + "loss": 43.5277, + "step": 52270 + }, + { + "epoch": 0.2112178153419765, + "grad_norm": 395.7337341308594, + "learning_rate": 7.153112394896279e-06, + "loss": 34.336, + "step": 52280 + }, + { + "epoch": 0.21125821660734415, + "grad_norm": 319.70098876953125, + "learning_rate": 7.15183940763138e-06, + "loss": 69.1773, + "step": 52290 + }, + { + "epoch": 0.21129861787271179, + "grad_norm": 509.9462585449219, + "learning_rate": 7.150566249154496e-06, + "loss": 45.0828, + "step": 52300 + }, + { + "epoch": 0.2113390191380794, + "grad_norm": 244.54661560058594, + "learning_rate": 7.149292919566924e-06, + "loss": 38.4245, + "step": 52310 + }, + { + "epoch": 0.21137942040344704, + "grad_norm": 701.5598754882812, + "learning_rate": 7.148019418969979e-06, + "loss": 56.945, + "step": 52320 + }, + { + "epoch": 0.21141982166881468, + "grad_norm": 378.5613098144531, + "learning_rate": 7.146745747464987e-06, + "loss": 30.5057, + "step": 52330 + }, + { + "epoch": 0.2114602229341823, + "grad_norm": 755.4013671875, + "learning_rate": 7.145471905153288e-06, + "loss": 53.356, + "step": 52340 + }, + { + "epoch": 0.21150062419954993, + "grad_norm": 254.3358612060547, + "learning_rate": 7.1441978921362365e-06, + "loss": 40.9182, + "step": 52350 + }, + { + "epoch": 0.21154102546491757, + "grad_norm": 688.7052001953125, + "learning_rate": 7.142923708515199e-06, + "loss": 68.2775, + "step": 52360 + }, + { + "epoch": 0.21158142673028518, + "grad_norm": 329.7192077636719, + "learning_rate": 7.141649354391556e-06, + "loss": 55.6541, + "step": 52370 + }, + { + "epoch": 0.21162182799565282, + "grad_norm": 591.1115112304688, + "learning_rate": 7.140374829866703e-06, + "loss": 76.5433, + "step": 52380 + }, + { + "epoch": 0.21166222926102046, + "grad_norm": 1178.5982666015625, + "learning_rate": 7.1391001350420486e-06, + "loss": 49.616, + "step": 52390 + }, + { + "epoch": 0.21170263052638807, + "grad_norm": 694.3931884765625, + "learning_rate": 7.137825270019012e-06, + "loss": 42.3436, + "step": 52400 + }, + { + "epoch": 0.21174303179175571, + "grad_norm": 1042.828857421875, + "learning_rate": 7.1365502348990315e-06, + "loss": 49.1559, + "step": 52410 + }, + { + "epoch": 0.21178343305712335, + "grad_norm": 440.44696044921875, + "learning_rate": 7.135275029783554e-06, + "loss": 43.2997, + "step": 52420 + }, + { + "epoch": 0.21182383432249097, + "grad_norm": 441.6261291503906, + "learning_rate": 7.133999654774041e-06, + "loss": 52.5607, + "step": 52430 + }, + { + "epoch": 0.2118642355878586, + "grad_norm": 643.361328125, + "learning_rate": 7.13272410997197e-06, + "loss": 60.0243, + "step": 52440 + }, + { + "epoch": 0.21190463685322625, + "grad_norm": 840.8086547851562, + "learning_rate": 7.13144839547883e-06, + "loss": 37.9704, + "step": 52450 + }, + { + "epoch": 0.2119450381185939, + "grad_norm": 772.6046752929688, + "learning_rate": 7.130172511396123e-06, + "loss": 50.9666, + "step": 52460 + }, + { + "epoch": 0.2119854393839615, + "grad_norm": 465.1317138671875, + "learning_rate": 7.128896457825364e-06, + "loss": 33.6535, + "step": 52470 + }, + { + "epoch": 0.21202584064932914, + "grad_norm": 420.5824890136719, + "learning_rate": 7.127620234868085e-06, + "loss": 46.2685, + "step": 52480 + }, + { + "epoch": 0.21206624191469678, + "grad_norm": 675.41845703125, + "learning_rate": 7.126343842625828e-06, + "loss": 86.1336, + "step": 52490 + }, + { + "epoch": 0.2121066431800644, + "grad_norm": 523.8388671875, + "learning_rate": 7.1250672812001505e-06, + "loss": 40.6279, + "step": 52500 + }, + { + "epoch": 0.21214704444543203, + "grad_norm": 675.9490966796875, + "learning_rate": 7.123790550692624e-06, + "loss": 53.4468, + "step": 52510 + }, + { + "epoch": 0.21218744571079967, + "grad_norm": 550.1663208007812, + "learning_rate": 7.1225136512048275e-06, + "loss": 49.8968, + "step": 52520 + }, + { + "epoch": 0.21222784697616728, + "grad_norm": 504.02569580078125, + "learning_rate": 7.1212365828383615e-06, + "loss": 36.7957, + "step": 52530 + }, + { + "epoch": 0.21226824824153492, + "grad_norm": 462.62005615234375, + "learning_rate": 7.119959345694835e-06, + "loss": 38.4813, + "step": 52540 + }, + { + "epoch": 0.21230864950690256, + "grad_norm": 591.6526489257812, + "learning_rate": 7.118681939875875e-06, + "loss": 40.4088, + "step": 52550 + }, + { + "epoch": 0.21234905077227018, + "grad_norm": 949.3772583007812, + "learning_rate": 7.117404365483116e-06, + "loss": 53.9539, + "step": 52560 + }, + { + "epoch": 0.21238945203763782, + "grad_norm": 612.3878173828125, + "learning_rate": 7.116126622618207e-06, + "loss": 37.0243, + "step": 52570 + }, + { + "epoch": 0.21242985330300546, + "grad_norm": 2274.1650390625, + "learning_rate": 7.114848711382816e-06, + "loss": 68.6226, + "step": 52580 + }, + { + "epoch": 0.21247025456837307, + "grad_norm": 200.64561462402344, + "learning_rate": 7.1135706318786195e-06, + "loss": 48.2132, + "step": 52590 + }, + { + "epoch": 0.2125106558337407, + "grad_norm": 579.8665161132812, + "learning_rate": 7.112292384207306e-06, + "loss": 53.6652, + "step": 52600 + }, + { + "epoch": 0.21255105709910835, + "grad_norm": 1212.4814453125, + "learning_rate": 7.111013968470581e-06, + "loss": 52.715, + "step": 52610 + }, + { + "epoch": 0.212591458364476, + "grad_norm": 550.4876098632812, + "learning_rate": 7.109735384770166e-06, + "loss": 42.4386, + "step": 52620 + }, + { + "epoch": 0.2126318596298436, + "grad_norm": 1110.4034423828125, + "learning_rate": 7.108456633207787e-06, + "loss": 60.6928, + "step": 52630 + }, + { + "epoch": 0.21267226089521124, + "grad_norm": 907.2188110351562, + "learning_rate": 7.10717771388519e-06, + "loss": 60.7178, + "step": 52640 + }, + { + "epoch": 0.21271266216057888, + "grad_norm": 127.77151489257812, + "learning_rate": 7.105898626904134e-06, + "loss": 47.8759, + "step": 52650 + }, + { + "epoch": 0.2127530634259465, + "grad_norm": 1381.4952392578125, + "learning_rate": 7.104619372366387e-06, + "loss": 51.4232, + "step": 52660 + }, + { + "epoch": 0.21279346469131413, + "grad_norm": 756.6724243164062, + "learning_rate": 7.103339950373737e-06, + "loss": 81.3154, + "step": 52670 + }, + { + "epoch": 0.21283386595668177, + "grad_norm": 514.89453125, + "learning_rate": 7.102060361027981e-06, + "loss": 71.0428, + "step": 52680 + }, + { + "epoch": 0.21287426722204938, + "grad_norm": 542.7091674804688, + "learning_rate": 7.100780604430928e-06, + "loss": 49.2722, + "step": 52690 + }, + { + "epoch": 0.21291466848741702, + "grad_norm": 815.9537963867188, + "learning_rate": 7.099500680684404e-06, + "loss": 46.4009, + "step": 52700 + }, + { + "epoch": 0.21295506975278466, + "grad_norm": 361.59857177734375, + "learning_rate": 7.0982205898902444e-06, + "loss": 40.411, + "step": 52710 + }, + { + "epoch": 0.21299547101815228, + "grad_norm": 439.58587646484375, + "learning_rate": 7.096940332150305e-06, + "loss": 36.6399, + "step": 52720 + }, + { + "epoch": 0.21303587228351992, + "grad_norm": 663.2642822265625, + "learning_rate": 7.095659907566446e-06, + "loss": 57.5411, + "step": 52730 + }, + { + "epoch": 0.21307627354888756, + "grad_norm": 1188.1744384765625, + "learning_rate": 7.094379316240545e-06, + "loss": 46.7822, + "step": 52740 + }, + { + "epoch": 0.21311667481425517, + "grad_norm": 667.1629638671875, + "learning_rate": 7.093098558274494e-06, + "loss": 56.3298, + "step": 52750 + }, + { + "epoch": 0.2131570760796228, + "grad_norm": 797.8674926757812, + "learning_rate": 7.091817633770197e-06, + "loss": 43.1346, + "step": 52760 + }, + { + "epoch": 0.21319747734499045, + "grad_norm": 355.7433776855469, + "learning_rate": 7.090536542829571e-06, + "loss": 49.5407, + "step": 52770 + }, + { + "epoch": 0.2132378786103581, + "grad_norm": 712.264892578125, + "learning_rate": 7.089255285554546e-06, + "loss": 49.0901, + "step": 52780 + }, + { + "epoch": 0.2132782798757257, + "grad_norm": 755.2996215820312, + "learning_rate": 7.087973862047067e-06, + "loss": 55.9708, + "step": 52790 + }, + { + "epoch": 0.21331868114109334, + "grad_norm": 283.7481689453125, + "learning_rate": 7.08669227240909e-06, + "loss": 40.528, + "step": 52800 + }, + { + "epoch": 0.21335908240646098, + "grad_norm": 784.1795654296875, + "learning_rate": 7.085410516742586e-06, + "loss": 31.2034, + "step": 52810 + }, + { + "epoch": 0.2133994836718286, + "grad_norm": 790.679443359375, + "learning_rate": 7.084128595149538e-06, + "loss": 46.8797, + "step": 52820 + }, + { + "epoch": 0.21343988493719623, + "grad_norm": 773.3251953125, + "learning_rate": 7.082846507731942e-06, + "loss": 53.9314, + "step": 52830 + }, + { + "epoch": 0.21348028620256387, + "grad_norm": 443.99810791015625, + "learning_rate": 7.081564254591809e-06, + "loss": 41.7004, + "step": 52840 + }, + { + "epoch": 0.21352068746793149, + "grad_norm": 634.8325805664062, + "learning_rate": 7.08028183583116e-06, + "loss": 37.2924, + "step": 52850 + }, + { + "epoch": 0.21356108873329913, + "grad_norm": 728.5630493164062, + "learning_rate": 7.078999251552034e-06, + "loss": 31.9136, + "step": 52860 + }, + { + "epoch": 0.21360148999866677, + "grad_norm": 731.4481811523438, + "learning_rate": 7.077716501856478e-06, + "loss": 33.3455, + "step": 52870 + }, + { + "epoch": 0.21364189126403438, + "grad_norm": 861.4498901367188, + "learning_rate": 7.076433586846555e-06, + "loss": 64.4692, + "step": 52880 + }, + { + "epoch": 0.21368229252940202, + "grad_norm": 600.2234497070312, + "learning_rate": 7.075150506624342e-06, + "loss": 48.3308, + "step": 52890 + }, + { + "epoch": 0.21372269379476966, + "grad_norm": 527.2857666015625, + "learning_rate": 7.073867261291926e-06, + "loss": 50.0515, + "step": 52900 + }, + { + "epoch": 0.21376309506013727, + "grad_norm": 594.2874755859375, + "learning_rate": 7.0725838509514115e-06, + "loss": 39.6426, + "step": 52910 + }, + { + "epoch": 0.2138034963255049, + "grad_norm": 1075.2667236328125, + "learning_rate": 7.07130027570491e-06, + "loss": 68.3749, + "step": 52920 + }, + { + "epoch": 0.21384389759087255, + "grad_norm": 585.2786865234375, + "learning_rate": 7.070016535654551e-06, + "loss": 36.8384, + "step": 52930 + }, + { + "epoch": 0.2138842988562402, + "grad_norm": 740.2340087890625, + "learning_rate": 7.068732630902479e-06, + "loss": 55.1437, + "step": 52940 + }, + { + "epoch": 0.2139247001216078, + "grad_norm": 682.5702514648438, + "learning_rate": 7.067448561550844e-06, + "loss": 43.2231, + "step": 52950 + }, + { + "epoch": 0.21396510138697544, + "grad_norm": 727.7985229492188, + "learning_rate": 7.066164327701815e-06, + "loss": 58.4932, + "step": 52960 + }, + { + "epoch": 0.21400550265234308, + "grad_norm": 408.12554931640625, + "learning_rate": 7.064879929457573e-06, + "loss": 52.6824, + "step": 52970 + }, + { + "epoch": 0.2140459039177107, + "grad_norm": 678.7532348632812, + "learning_rate": 7.063595366920314e-06, + "loss": 43.5814, + "step": 52980 + }, + { + "epoch": 0.21408630518307833, + "grad_norm": 554.4685668945312, + "learning_rate": 7.062310640192239e-06, + "loss": 54.5951, + "step": 52990 + }, + { + "epoch": 0.21412670644844597, + "grad_norm": 759.0743408203125, + "learning_rate": 7.061025749375572e-06, + "loss": 40.8169, + "step": 53000 + }, + { + "epoch": 0.2141671077138136, + "grad_norm": 751.3245239257812, + "learning_rate": 7.059740694572545e-06, + "loss": 55.642, + "step": 53010 + }, + { + "epoch": 0.21420750897918123, + "grad_norm": 823.6691284179688, + "learning_rate": 7.058455475885405e-06, + "loss": 35.4595, + "step": 53020 + }, + { + "epoch": 0.21424791024454887, + "grad_norm": 808.4596557617188, + "learning_rate": 7.05717009341641e-06, + "loss": 43.8247, + "step": 53030 + }, + { + "epoch": 0.21428831150991648, + "grad_norm": 435.7613220214844, + "learning_rate": 7.05588454726783e-06, + "loss": 39.8654, + "step": 53040 + }, + { + "epoch": 0.21432871277528412, + "grad_norm": 761.8973999023438, + "learning_rate": 7.054598837541951e-06, + "loss": 43.7041, + "step": 53050 + }, + { + "epoch": 0.21436911404065176, + "grad_norm": 1002.57177734375, + "learning_rate": 7.053312964341075e-06, + "loss": 56.8311, + "step": 53060 + }, + { + "epoch": 0.21440951530601937, + "grad_norm": 725.5249633789062, + "learning_rate": 7.052026927767508e-06, + "loss": 53.9829, + "step": 53070 + }, + { + "epoch": 0.214449916571387, + "grad_norm": 832.4341430664062, + "learning_rate": 7.050740727923576e-06, + "loss": 42.1309, + "step": 53080 + }, + { + "epoch": 0.21449031783675465, + "grad_norm": 2103.50830078125, + "learning_rate": 7.049454364911615e-06, + "loss": 51.145, + "step": 53090 + }, + { + "epoch": 0.2145307191021223, + "grad_norm": 579.0474853515625, + "learning_rate": 7.048167838833977e-06, + "loss": 29.026, + "step": 53100 + }, + { + "epoch": 0.2145711203674899, + "grad_norm": 646.6517333984375, + "learning_rate": 7.046881149793026e-06, + "loss": 67.8125, + "step": 53110 + }, + { + "epoch": 0.21461152163285754, + "grad_norm": 900.6437377929688, + "learning_rate": 7.045594297891133e-06, + "loss": 46.3305, + "step": 53120 + }, + { + "epoch": 0.21465192289822518, + "grad_norm": 954.8676147460938, + "learning_rate": 7.04430728323069e-06, + "loss": 38.0878, + "step": 53130 + }, + { + "epoch": 0.2146923241635928, + "grad_norm": 607.4523315429688, + "learning_rate": 7.043020105914098e-06, + "loss": 40.7413, + "step": 53140 + }, + { + "epoch": 0.21473272542896044, + "grad_norm": 743.0084838867188, + "learning_rate": 7.041732766043775e-06, + "loss": 59.0576, + "step": 53150 + }, + { + "epoch": 0.21477312669432808, + "grad_norm": 422.3906555175781, + "learning_rate": 7.040445263722145e-06, + "loss": 42.1732, + "step": 53160 + }, + { + "epoch": 0.2148135279596957, + "grad_norm": 568.6240234375, + "learning_rate": 7.039157599051648e-06, + "loss": 33.5946, + "step": 53170 + }, + { + "epoch": 0.21485392922506333, + "grad_norm": 944.6744995117188, + "learning_rate": 7.037869772134741e-06, + "loss": 36.3536, + "step": 53180 + }, + { + "epoch": 0.21489433049043097, + "grad_norm": 779.457275390625, + "learning_rate": 7.036581783073888e-06, + "loss": 44.802, + "step": 53190 + }, + { + "epoch": 0.21493473175579858, + "grad_norm": 993.3648071289062, + "learning_rate": 7.035293631971569e-06, + "loss": 73.5553, + "step": 53200 + }, + { + "epoch": 0.21497513302116622, + "grad_norm": 738.565185546875, + "learning_rate": 7.034005318930277e-06, + "loss": 41.3091, + "step": 53210 + }, + { + "epoch": 0.21501553428653386, + "grad_norm": 818.487060546875, + "learning_rate": 7.032716844052517e-06, + "loss": 51.1636, + "step": 53220 + }, + { + "epoch": 0.21505593555190147, + "grad_norm": 591.581787109375, + "learning_rate": 7.031428207440807e-06, + "loss": 61.1763, + "step": 53230 + }, + { + "epoch": 0.2150963368172691, + "grad_norm": 590.1148681640625, + "learning_rate": 7.030139409197676e-06, + "loss": 36.4519, + "step": 53240 + }, + { + "epoch": 0.21513673808263675, + "grad_norm": 357.1758728027344, + "learning_rate": 7.02885044942567e-06, + "loss": 46.619, + "step": 53250 + }, + { + "epoch": 0.2151771393480044, + "grad_norm": 982.3209228515625, + "learning_rate": 7.027561328227345e-06, + "loss": 42.6811, + "step": 53260 + }, + { + "epoch": 0.215217540613372, + "grad_norm": 764.6962890625, + "learning_rate": 7.02627204570527e-06, + "loss": 30.1889, + "step": 53270 + }, + { + "epoch": 0.21525794187873964, + "grad_norm": 556.0413208007812, + "learning_rate": 7.024982601962027e-06, + "loss": 38.2327, + "step": 53280 + }, + { + "epoch": 0.21529834314410728, + "grad_norm": 604.3740844726562, + "learning_rate": 7.023692997100213e-06, + "loss": 52.7227, + "step": 53290 + }, + { + "epoch": 0.2153387444094749, + "grad_norm": 735.7214965820312, + "learning_rate": 7.0224032312224345e-06, + "loss": 49.6408, + "step": 53300 + }, + { + "epoch": 0.21537914567484254, + "grad_norm": 4784.7998046875, + "learning_rate": 7.021113304431313e-06, + "loss": 105.0257, + "step": 53310 + }, + { + "epoch": 0.21541954694021018, + "grad_norm": 563.2162475585938, + "learning_rate": 7.01982321682948e-06, + "loss": 47.3066, + "step": 53320 + }, + { + "epoch": 0.2154599482055778, + "grad_norm": 506.74755859375, + "learning_rate": 7.018532968519584e-06, + "loss": 62.6589, + "step": 53330 + }, + { + "epoch": 0.21550034947094543, + "grad_norm": 249.8182830810547, + "learning_rate": 7.0172425596042846e-06, + "loss": 29.9312, + "step": 53340 + }, + { + "epoch": 0.21554075073631307, + "grad_norm": 1026.1318359375, + "learning_rate": 7.0159519901862515e-06, + "loss": 55.5077, + "step": 53350 + }, + { + "epoch": 0.21558115200168068, + "grad_norm": 447.23590087890625, + "learning_rate": 7.014661260368171e-06, + "loss": 38.1175, + "step": 53360 + }, + { + "epoch": 0.21562155326704832, + "grad_norm": 608.2694702148438, + "learning_rate": 7.01337037025274e-06, + "loss": 38.778, + "step": 53370 + }, + { + "epoch": 0.21566195453241596, + "grad_norm": 430.6813659667969, + "learning_rate": 7.012079319942668e-06, + "loss": 45.4868, + "step": 53380 + }, + { + "epoch": 0.21570235579778357, + "grad_norm": 899.20556640625, + "learning_rate": 7.01078810954068e-06, + "loss": 56.9402, + "step": 53390 + }, + { + "epoch": 0.2157427570631512, + "grad_norm": 434.4818115234375, + "learning_rate": 7.0094967391495095e-06, + "loss": 47.1917, + "step": 53400 + }, + { + "epoch": 0.21578315832851885, + "grad_norm": 891.284423828125, + "learning_rate": 7.008205208871906e-06, + "loss": 65.6954, + "step": 53410 + }, + { + "epoch": 0.2158235595938865, + "grad_norm": 715.593017578125, + "learning_rate": 7.00691351881063e-06, + "loss": 55.7541, + "step": 53420 + }, + { + "epoch": 0.2158639608592541, + "grad_norm": 615.5816650390625, + "learning_rate": 7.005621669068456e-06, + "loss": 56.0196, + "step": 53430 + }, + { + "epoch": 0.21590436212462175, + "grad_norm": 655.7932739257812, + "learning_rate": 7.004329659748172e-06, + "loss": 38.692, + "step": 53440 + }, + { + "epoch": 0.21594476338998939, + "grad_norm": 279.7879638671875, + "learning_rate": 7.003037490952574e-06, + "loss": 51.2097, + "step": 53450 + }, + { + "epoch": 0.215985164655357, + "grad_norm": 533.7430419921875, + "learning_rate": 7.0017451627844765e-06, + "loss": 42.7369, + "step": 53460 + }, + { + "epoch": 0.21602556592072464, + "grad_norm": 509.7603759765625, + "learning_rate": 7.0004526753467004e-06, + "loss": 38.7973, + "step": 53470 + }, + { + "epoch": 0.21606596718609228, + "grad_norm": 376.4740295410156, + "learning_rate": 6.999160028742089e-06, + "loss": 50.9646, + "step": 53480 + }, + { + "epoch": 0.2161063684514599, + "grad_norm": 787.3605346679688, + "learning_rate": 6.997867223073487e-06, + "loss": 51.2587, + "step": 53490 + }, + { + "epoch": 0.21614676971682753, + "grad_norm": 595.9751586914062, + "learning_rate": 6.996574258443761e-06, + "loss": 45.6339, + "step": 53500 + }, + { + "epoch": 0.21618717098219517, + "grad_norm": 446.9659729003906, + "learning_rate": 6.995281134955784e-06, + "loss": 44.5907, + "step": 53510 + }, + { + "epoch": 0.21622757224756278, + "grad_norm": 366.5422058105469, + "learning_rate": 6.993987852712442e-06, + "loss": 33.3201, + "step": 53520 + }, + { + "epoch": 0.21626797351293042, + "grad_norm": 417.02825927734375, + "learning_rate": 6.992694411816638e-06, + "loss": 61.968, + "step": 53530 + }, + { + "epoch": 0.21630837477829806, + "grad_norm": 453.3203430175781, + "learning_rate": 6.991400812371287e-06, + "loss": 38.1036, + "step": 53540 + }, + { + "epoch": 0.21634877604366567, + "grad_norm": 592.3311767578125, + "learning_rate": 6.990107054479313e-06, + "loss": 53.7675, + "step": 53550 + }, + { + "epoch": 0.21638917730903331, + "grad_norm": 1016.1787109375, + "learning_rate": 6.988813138243652e-06, + "loss": 52.4604, + "step": 53560 + }, + { + "epoch": 0.21642957857440095, + "grad_norm": 586.8818359375, + "learning_rate": 6.987519063767257e-06, + "loss": 39.3914, + "step": 53570 + }, + { + "epoch": 0.2164699798397686, + "grad_norm": 458.52301025390625, + "learning_rate": 6.986224831153092e-06, + "loss": 41.3697, + "step": 53580 + }, + { + "epoch": 0.2165103811051362, + "grad_norm": 536.421630859375, + "learning_rate": 6.984930440504134e-06, + "loss": 52.9655, + "step": 53590 + }, + { + "epoch": 0.21655078237050385, + "grad_norm": 463.3578186035156, + "learning_rate": 6.9836358919233695e-06, + "loss": 45.0425, + "step": 53600 + }, + { + "epoch": 0.2165911836358715, + "grad_norm": 930.7780151367188, + "learning_rate": 6.982341185513799e-06, + "loss": 54.5087, + "step": 53610 + }, + { + "epoch": 0.2166315849012391, + "grad_norm": 346.7239074707031, + "learning_rate": 6.981046321378441e-06, + "loss": 46.0492, + "step": 53620 + }, + { + "epoch": 0.21667198616660674, + "grad_norm": 660.0195922851562, + "learning_rate": 6.979751299620318e-06, + "loss": 109.7135, + "step": 53630 + }, + { + "epoch": 0.21671238743197438, + "grad_norm": 440.25726318359375, + "learning_rate": 6.978456120342469e-06, + "loss": 37.689, + "step": 53640 + }, + { + "epoch": 0.216752788697342, + "grad_norm": 614.2511596679688, + "learning_rate": 6.977160783647947e-06, + "loss": 45.4165, + "step": 53650 + }, + { + "epoch": 0.21679318996270963, + "grad_norm": 1187.5201416015625, + "learning_rate": 6.975865289639815e-06, + "loss": 42.2823, + "step": 53660 + }, + { + "epoch": 0.21683359122807727, + "grad_norm": 571.8782348632812, + "learning_rate": 6.974569638421151e-06, + "loss": 59.7416, + "step": 53670 + }, + { + "epoch": 0.21687399249344488, + "grad_norm": 830.4623413085938, + "learning_rate": 6.973273830095042e-06, + "loss": 47.285, + "step": 53680 + }, + { + "epoch": 0.21691439375881252, + "grad_norm": 614.6996459960938, + "learning_rate": 6.971977864764591e-06, + "loss": 38.1775, + "step": 53690 + }, + { + "epoch": 0.21695479502418016, + "grad_norm": 789.254150390625, + "learning_rate": 6.970681742532911e-06, + "loss": 66.3674, + "step": 53700 + }, + { + "epoch": 0.21699519628954778, + "grad_norm": 991.6810302734375, + "learning_rate": 6.969385463503129e-06, + "loss": 47.4346, + "step": 53710 + }, + { + "epoch": 0.21703559755491542, + "grad_norm": 14252.0068359375, + "learning_rate": 6.968089027778384e-06, + "loss": 99.8792, + "step": 53720 + }, + { + "epoch": 0.21707599882028306, + "grad_norm": 790.77001953125, + "learning_rate": 6.9667924354618275e-06, + "loss": 46.2285, + "step": 53730 + }, + { + "epoch": 0.2171164000856507, + "grad_norm": 666.9031372070312, + "learning_rate": 6.965495686656623e-06, + "loss": 46.2733, + "step": 53740 + }, + { + "epoch": 0.2171568013510183, + "grad_norm": 669.7155151367188, + "learning_rate": 6.964198781465948e-06, + "loss": 41.1374, + "step": 53750 + }, + { + "epoch": 0.21719720261638595, + "grad_norm": 406.15411376953125, + "learning_rate": 6.962901719992989e-06, + "loss": 35.3804, + "step": 53760 + }, + { + "epoch": 0.2172376038817536, + "grad_norm": 755.256103515625, + "learning_rate": 6.961604502340949e-06, + "loss": 46.1026, + "step": 53770 + }, + { + "epoch": 0.2172780051471212, + "grad_norm": 810.091064453125, + "learning_rate": 6.960307128613042e-06, + "loss": 35.6879, + "step": 53780 + }, + { + "epoch": 0.21731840641248884, + "grad_norm": 615.5449829101562, + "learning_rate": 6.959009598912493e-06, + "loss": 61.5976, + "step": 53790 + }, + { + "epoch": 0.21735880767785648, + "grad_norm": 769.8095703125, + "learning_rate": 6.957711913342541e-06, + "loss": 56.504, + "step": 53800 + }, + { + "epoch": 0.2173992089432241, + "grad_norm": 325.61114501953125, + "learning_rate": 6.956414072006437e-06, + "loss": 53.6313, + "step": 53810 + }, + { + "epoch": 0.21743961020859173, + "grad_norm": 642.9010009765625, + "learning_rate": 6.955116075007443e-06, + "loss": 34.7719, + "step": 53820 + }, + { + "epoch": 0.21748001147395937, + "grad_norm": 524.3311157226562, + "learning_rate": 6.953817922448837e-06, + "loss": 64.898, + "step": 53830 + }, + { + "epoch": 0.21752041273932698, + "grad_norm": 912.242919921875, + "learning_rate": 6.9525196144339055e-06, + "loss": 70.4545, + "step": 53840 + }, + { + "epoch": 0.21756081400469462, + "grad_norm": 668.8618774414062, + "learning_rate": 6.951221151065948e-06, + "loss": 63.9754, + "step": 53850 + }, + { + "epoch": 0.21760121527006226, + "grad_norm": 320.6151428222656, + "learning_rate": 6.949922532448279e-06, + "loss": 30.3866, + "step": 53860 + }, + { + "epoch": 0.21764161653542988, + "grad_norm": 510.21746826171875, + "learning_rate": 6.948623758684223e-06, + "loss": 27.041, + "step": 53870 + }, + { + "epoch": 0.21768201780079752, + "grad_norm": 875.3880004882812, + "learning_rate": 6.9473248298771176e-06, + "loss": 60.386, + "step": 53880 + }, + { + "epoch": 0.21772241906616516, + "grad_norm": 181.17555236816406, + "learning_rate": 6.946025746130312e-06, + "loss": 47.359, + "step": 53890 + }, + { + "epoch": 0.2177628203315328, + "grad_norm": 909.2413940429688, + "learning_rate": 6.944726507547169e-06, + "loss": 55.6297, + "step": 53900 + }, + { + "epoch": 0.2178032215969004, + "grad_norm": 518.627685546875, + "learning_rate": 6.943427114231064e-06, + "loss": 30.3371, + "step": 53910 + }, + { + "epoch": 0.21784362286226805, + "grad_norm": 435.1603088378906, + "learning_rate": 6.942127566285382e-06, + "loss": 37.2483, + "step": 53920 + }, + { + "epoch": 0.2178840241276357, + "grad_norm": 759.238037109375, + "learning_rate": 6.940827863813523e-06, + "loss": 42.3018, + "step": 53930 + }, + { + "epoch": 0.2179244253930033, + "grad_norm": 2238.64794921875, + "learning_rate": 6.9395280069188964e-06, + "loss": 74.1954, + "step": 53940 + }, + { + "epoch": 0.21796482665837094, + "grad_norm": 242.63772583007812, + "learning_rate": 6.9382279957049295e-06, + "loss": 36.7179, + "step": 53950 + }, + { + "epoch": 0.21800522792373858, + "grad_norm": 550.920166015625, + "learning_rate": 6.936927830275055e-06, + "loss": 74.0684, + "step": 53960 + }, + { + "epoch": 0.2180456291891062, + "grad_norm": 537.9491577148438, + "learning_rate": 6.935627510732724e-06, + "loss": 33.9198, + "step": 53970 + }, + { + "epoch": 0.21808603045447383, + "grad_norm": 1171.784423828125, + "learning_rate": 6.934327037181394e-06, + "loss": 49.3926, + "step": 53980 + }, + { + "epoch": 0.21812643171984147, + "grad_norm": 811.305419921875, + "learning_rate": 6.933026409724538e-06, + "loss": 40.5747, + "step": 53990 + }, + { + "epoch": 0.21816683298520909, + "grad_norm": 492.3915710449219, + "learning_rate": 6.931725628465643e-06, + "loss": 34.5997, + "step": 54000 + }, + { + "epoch": 0.21820723425057673, + "grad_norm": 575.7453002929688, + "learning_rate": 6.9304246935082065e-06, + "loss": 62.0087, + "step": 54010 + }, + { + "epoch": 0.21824763551594437, + "grad_norm": 700.669921875, + "learning_rate": 6.929123604955735e-06, + "loss": 47.6949, + "step": 54020 + }, + { + "epoch": 0.21828803678131198, + "grad_norm": 573.3309326171875, + "learning_rate": 6.927822362911753e-06, + "loss": 36.7347, + "step": 54030 + }, + { + "epoch": 0.21832843804667962, + "grad_norm": 756.4100341796875, + "learning_rate": 6.926520967479791e-06, + "loss": 46.6211, + "step": 54040 + }, + { + "epoch": 0.21836883931204726, + "grad_norm": 755.5197143554688, + "learning_rate": 6.9252194187634e-06, + "loss": 72.3489, + "step": 54050 + }, + { + "epoch": 0.2184092405774149, + "grad_norm": 590.9158935546875, + "learning_rate": 6.923917716866133e-06, + "loss": 46.8761, + "step": 54060 + }, + { + "epoch": 0.2184496418427825, + "grad_norm": 490.1414794921875, + "learning_rate": 6.922615861891564e-06, + "loss": 35.4525, + "step": 54070 + }, + { + "epoch": 0.21849004310815015, + "grad_norm": 672.7666015625, + "learning_rate": 6.921313853943275e-06, + "loss": 45.0449, + "step": 54080 + }, + { + "epoch": 0.2185304443735178, + "grad_norm": 775.6009521484375, + "learning_rate": 6.9200116931248575e-06, + "loss": 55.573, + "step": 54090 + }, + { + "epoch": 0.2185708456388854, + "grad_norm": 469.2464294433594, + "learning_rate": 6.918709379539924e-06, + "loss": 53.0544, + "step": 54100 + }, + { + "epoch": 0.21861124690425304, + "grad_norm": 762.6869506835938, + "learning_rate": 6.917406913292089e-06, + "loss": 71.2748, + "step": 54110 + }, + { + "epoch": 0.21865164816962068, + "grad_norm": 407.9251708984375, + "learning_rate": 6.916104294484988e-06, + "loss": 70.6682, + "step": 54120 + }, + { + "epoch": 0.2186920494349883, + "grad_norm": 1123.1390380859375, + "learning_rate": 6.91480152322226e-06, + "loss": 67.6451, + "step": 54130 + }, + { + "epoch": 0.21873245070035593, + "grad_norm": 662.4995727539062, + "learning_rate": 6.913498599607563e-06, + "loss": 49.0049, + "step": 54140 + }, + { + "epoch": 0.21877285196572357, + "grad_norm": 684.1514892578125, + "learning_rate": 6.9121955237445644e-06, + "loss": 46.24, + "step": 54150 + }, + { + "epoch": 0.2188132532310912, + "grad_norm": 591.7760009765625, + "learning_rate": 6.910892295736944e-06, + "loss": 35.831, + "step": 54160 + }, + { + "epoch": 0.21885365449645883, + "grad_norm": 423.4569396972656, + "learning_rate": 6.9095889156883934e-06, + "loss": 33.5558, + "step": 54170 + }, + { + "epoch": 0.21889405576182647, + "grad_norm": 420.7939758300781, + "learning_rate": 6.908285383702617e-06, + "loss": 48.0798, + "step": 54180 + }, + { + "epoch": 0.21893445702719408, + "grad_norm": 716.4933471679688, + "learning_rate": 6.906981699883329e-06, + "loss": 57.6833, + "step": 54190 + }, + { + "epoch": 0.21897485829256172, + "grad_norm": 564.2745971679688, + "learning_rate": 6.90567786433426e-06, + "loss": 37.345, + "step": 54200 + }, + { + "epoch": 0.21901525955792936, + "grad_norm": 570.277099609375, + "learning_rate": 6.904373877159149e-06, + "loss": 46.2849, + "step": 54210 + }, + { + "epoch": 0.219055660823297, + "grad_norm": 768.31591796875, + "learning_rate": 6.903069738461749e-06, + "loss": 45.064, + "step": 54220 + }, + { + "epoch": 0.2190960620886646, + "grad_norm": 1730.7943115234375, + "learning_rate": 6.901765448345823e-06, + "loss": 54.5519, + "step": 54230 + }, + { + "epoch": 0.21913646335403225, + "grad_norm": 450.1281433105469, + "learning_rate": 6.900461006915149e-06, + "loss": 37.9456, + "step": 54240 + }, + { + "epoch": 0.2191768646193999, + "grad_norm": 929.2560424804688, + "learning_rate": 6.899156414273514e-06, + "loss": 59.381, + "step": 54250 + }, + { + "epoch": 0.2192172658847675, + "grad_norm": 986.7847290039062, + "learning_rate": 6.89785167052472e-06, + "loss": 47.791, + "step": 54260 + }, + { + "epoch": 0.21925766715013514, + "grad_norm": 658.82568359375, + "learning_rate": 6.896546775772577e-06, + "loss": 43.5379, + "step": 54270 + }, + { + "epoch": 0.21929806841550278, + "grad_norm": 1264.8040771484375, + "learning_rate": 6.8952417301209114e-06, + "loss": 50.3495, + "step": 54280 + }, + { + "epoch": 0.2193384696808704, + "grad_norm": 505.1601867675781, + "learning_rate": 6.893936533673561e-06, + "loss": 45.3253, + "step": 54290 + }, + { + "epoch": 0.21937887094623804, + "grad_norm": 1080.662353515625, + "learning_rate": 6.892631186534371e-06, + "loss": 44.8194, + "step": 54300 + }, + { + "epoch": 0.21941927221160568, + "grad_norm": 44.70790481567383, + "learning_rate": 6.891325688807204e-06, + "loss": 34.7908, + "step": 54310 + }, + { + "epoch": 0.2194596734769733, + "grad_norm": 584.1826171875, + "learning_rate": 6.890020040595932e-06, + "loss": 34.2276, + "step": 54320 + }, + { + "epoch": 0.21950007474234093, + "grad_norm": 531.162353515625, + "learning_rate": 6.88871424200444e-06, + "loss": 40.3016, + "step": 54330 + }, + { + "epoch": 0.21954047600770857, + "grad_norm": 662.1864624023438, + "learning_rate": 6.887408293136621e-06, + "loss": 42.8401, + "step": 54340 + }, + { + "epoch": 0.21958087727307618, + "grad_norm": 794.0258178710938, + "learning_rate": 6.886102194096389e-06, + "loss": 48.1658, + "step": 54350 + }, + { + "epoch": 0.21962127853844382, + "grad_norm": 871.9588623046875, + "learning_rate": 6.884795944987661e-06, + "loss": 37.4216, + "step": 54360 + }, + { + "epoch": 0.21966167980381146, + "grad_norm": 313.5548400878906, + "learning_rate": 6.8834895459143694e-06, + "loss": 59.3293, + "step": 54370 + }, + { + "epoch": 0.2197020810691791, + "grad_norm": 611.6298828125, + "learning_rate": 6.882182996980457e-06, + "loss": 49.3419, + "step": 54380 + }, + { + "epoch": 0.2197424823345467, + "grad_norm": 558.3862915039062, + "learning_rate": 6.880876298289885e-06, + "loss": 27.1308, + "step": 54390 + }, + { + "epoch": 0.21978288359991435, + "grad_norm": 360.9166259765625, + "learning_rate": 6.879569449946617e-06, + "loss": 33.9725, + "step": 54400 + }, + { + "epoch": 0.219823284865282, + "grad_norm": 1138.4752197265625, + "learning_rate": 6.878262452054632e-06, + "loss": 69.0244, + "step": 54410 + }, + { + "epoch": 0.2198636861306496, + "grad_norm": 871.0791015625, + "learning_rate": 6.876955304717925e-06, + "loss": 59.2687, + "step": 54420 + }, + { + "epoch": 0.21990408739601724, + "grad_norm": 590.9871215820312, + "learning_rate": 6.875648008040499e-06, + "loss": 55.6743, + "step": 54430 + }, + { + "epoch": 0.21994448866138488, + "grad_norm": 423.3692626953125, + "learning_rate": 6.874340562126368e-06, + "loss": 52.967, + "step": 54440 + }, + { + "epoch": 0.2199848899267525, + "grad_norm": 711.0861206054688, + "learning_rate": 6.873032967079562e-06, + "loss": 37.9035, + "step": 54450 + }, + { + "epoch": 0.22002529119212014, + "grad_norm": 636.9541015625, + "learning_rate": 6.871725223004118e-06, + "loss": 44.2598, + "step": 54460 + }, + { + "epoch": 0.22006569245748778, + "grad_norm": 857.3299560546875, + "learning_rate": 6.870417330004086e-06, + "loss": 40.3004, + "step": 54470 + }, + { + "epoch": 0.2201060937228554, + "grad_norm": 810.0281372070312, + "learning_rate": 6.869109288183534e-06, + "loss": 42.4852, + "step": 54480 + }, + { + "epoch": 0.22014649498822303, + "grad_norm": 1230.8558349609375, + "learning_rate": 6.867801097646534e-06, + "loss": 56.5736, + "step": 54490 + }, + { + "epoch": 0.22018689625359067, + "grad_norm": 435.6243896484375, + "learning_rate": 6.866492758497171e-06, + "loss": 42.9811, + "step": 54500 + }, + { + "epoch": 0.22022729751895828, + "grad_norm": 570.0313110351562, + "learning_rate": 6.865184270839546e-06, + "loss": 25.4508, + "step": 54510 + }, + { + "epoch": 0.22026769878432592, + "grad_norm": 658.4278564453125, + "learning_rate": 6.863875634777767e-06, + "loss": 46.7509, + "step": 54520 + }, + { + "epoch": 0.22030810004969356, + "grad_norm": 608.4541625976562, + "learning_rate": 6.86256685041596e-06, + "loss": 39.0254, + "step": 54530 + }, + { + "epoch": 0.2203485013150612, + "grad_norm": 216.2620086669922, + "learning_rate": 6.861257917858257e-06, + "loss": 35.8314, + "step": 54540 + }, + { + "epoch": 0.2203889025804288, + "grad_norm": 772.1763916015625, + "learning_rate": 6.859948837208802e-06, + "loss": 47.3703, + "step": 54550 + }, + { + "epoch": 0.22042930384579645, + "grad_norm": 872.4230346679688, + "learning_rate": 6.8586396085717536e-06, + "loss": 46.5018, + "step": 54560 + }, + { + "epoch": 0.2204697051111641, + "grad_norm": 907.5761108398438, + "learning_rate": 6.8573302320512836e-06, + "loss": 34.8815, + "step": 54570 + }, + { + "epoch": 0.2205101063765317, + "grad_norm": 423.8174743652344, + "learning_rate": 6.85602070775157e-06, + "loss": 33.9442, + "step": 54580 + }, + { + "epoch": 0.22055050764189935, + "grad_norm": 582.1429443359375, + "learning_rate": 6.854711035776806e-06, + "loss": 65.9389, + "step": 54590 + }, + { + "epoch": 0.22059090890726699, + "grad_norm": 550.8157348632812, + "learning_rate": 6.853401216231198e-06, + "loss": 72.7223, + "step": 54600 + }, + { + "epoch": 0.2206313101726346, + "grad_norm": 623.2466430664062, + "learning_rate": 6.8520912492189605e-06, + "loss": 37.4688, + "step": 54610 + }, + { + "epoch": 0.22067171143800224, + "grad_norm": 3190.148681640625, + "learning_rate": 6.850781134844323e-06, + "loss": 57.139, + "step": 54620 + }, + { + "epoch": 0.22071211270336988, + "grad_norm": 926.8209838867188, + "learning_rate": 6.8494708732115235e-06, + "loss": 46.188, + "step": 54630 + }, + { + "epoch": 0.2207525139687375, + "grad_norm": 757.6647338867188, + "learning_rate": 6.8481604644248155e-06, + "loss": 51.8081, + "step": 54640 + }, + { + "epoch": 0.22079291523410513, + "grad_norm": 351.1914367675781, + "learning_rate": 6.846849908588461e-06, + "loss": 47.889, + "step": 54650 + }, + { + "epoch": 0.22083331649947277, + "grad_norm": 605.3834838867188, + "learning_rate": 6.845539205806735e-06, + "loss": 36.6061, + "step": 54660 + }, + { + "epoch": 0.22087371776484038, + "grad_norm": 424.5836486816406, + "learning_rate": 6.844228356183924e-06, + "loss": 58.7972, + "step": 54670 + }, + { + "epoch": 0.22091411903020802, + "grad_norm": 1018.7999877929688, + "learning_rate": 6.842917359824326e-06, + "loss": 46.3858, + "step": 54680 + }, + { + "epoch": 0.22095452029557566, + "grad_norm": 740.5365600585938, + "learning_rate": 6.841606216832253e-06, + "loss": 37.1616, + "step": 54690 + }, + { + "epoch": 0.22099492156094327, + "grad_norm": 974.8652954101562, + "learning_rate": 6.840294927312024e-06, + "loss": 38.5104, + "step": 54700 + }, + { + "epoch": 0.22103532282631091, + "grad_norm": 816.814453125, + "learning_rate": 6.838983491367974e-06, + "loss": 47.8214, + "step": 54710 + }, + { + "epoch": 0.22107572409167855, + "grad_norm": 525.71044921875, + "learning_rate": 6.837671909104447e-06, + "loss": 48.3607, + "step": 54720 + }, + { + "epoch": 0.2211161253570462, + "grad_norm": 3194.286376953125, + "learning_rate": 6.836360180625801e-06, + "loss": 74.2742, + "step": 54730 + }, + { + "epoch": 0.2211565266224138, + "grad_norm": 414.290283203125, + "learning_rate": 6.835048306036404e-06, + "loss": 35.6531, + "step": 54740 + }, + { + "epoch": 0.22119692788778145, + "grad_norm": 409.2863464355469, + "learning_rate": 6.833736285440632e-06, + "loss": 44.9234, + "step": 54750 + }, + { + "epoch": 0.2212373291531491, + "grad_norm": 958.4154663085938, + "learning_rate": 6.832424118942881e-06, + "loss": 69.0475, + "step": 54760 + }, + { + "epoch": 0.2212777304185167, + "grad_norm": 655.8173828125, + "learning_rate": 6.831111806647552e-06, + "loss": 61.3611, + "step": 54770 + }, + { + "epoch": 0.22131813168388434, + "grad_norm": 481.6536865234375, + "learning_rate": 6.829799348659061e-06, + "loss": 33.9049, + "step": 54780 + }, + { + "epoch": 0.22135853294925198, + "grad_norm": 809.3758544921875, + "learning_rate": 6.828486745081835e-06, + "loss": 53.7308, + "step": 54790 + }, + { + "epoch": 0.2213989342146196, + "grad_norm": 624.48974609375, + "learning_rate": 6.8271739960203065e-06, + "loss": 45.1322, + "step": 54800 + }, + { + "epoch": 0.22143933547998723, + "grad_norm": 911.08203125, + "learning_rate": 6.825861101578931e-06, + "loss": 57.4674, + "step": 54810 + }, + { + "epoch": 0.22147973674535487, + "grad_norm": 1179.452392578125, + "learning_rate": 6.824548061862166e-06, + "loss": 36.5426, + "step": 54820 + }, + { + "epoch": 0.22152013801072248, + "grad_norm": 564.9418334960938, + "learning_rate": 6.823234876974489e-06, + "loss": 42.9665, + "step": 54830 + }, + { + "epoch": 0.22156053927609012, + "grad_norm": 409.09228515625, + "learning_rate": 6.8219215470203756e-06, + "loss": 51.0909, + "step": 54840 + }, + { + "epoch": 0.22160094054145776, + "grad_norm": 764.6719360351562, + "learning_rate": 6.820608072104329e-06, + "loss": 74.6919, + "step": 54850 + }, + { + "epoch": 0.22164134180682538, + "grad_norm": 267.8291931152344, + "learning_rate": 6.819294452330853e-06, + "loss": 35.3009, + "step": 54860 + }, + { + "epoch": 0.22168174307219302, + "grad_norm": 774.4483032226562, + "learning_rate": 6.817980687804467e-06, + "loss": 53.1735, + "step": 54870 + }, + { + "epoch": 0.22172214433756066, + "grad_norm": 427.7686767578125, + "learning_rate": 6.8166667786297e-06, + "loss": 48.7483, + "step": 54880 + }, + { + "epoch": 0.2217625456029283, + "grad_norm": 412.5601501464844, + "learning_rate": 6.815352724911095e-06, + "loss": 58.3952, + "step": 54890 + }, + { + "epoch": 0.2218029468682959, + "grad_norm": 472.51348876953125, + "learning_rate": 6.814038526753205e-06, + "loss": 37.2425, + "step": 54900 + }, + { + "epoch": 0.22184334813366355, + "grad_norm": 715.309814453125, + "learning_rate": 6.812724184260596e-06, + "loss": 39.9846, + "step": 54910 + }, + { + "epoch": 0.2218837493990312, + "grad_norm": 476.5248107910156, + "learning_rate": 6.811409697537843e-06, + "loss": 44.4196, + "step": 54920 + }, + { + "epoch": 0.2219241506643988, + "grad_norm": 695.730712890625, + "learning_rate": 6.810095066689533e-06, + "loss": 63.0258, + "step": 54930 + }, + { + "epoch": 0.22196455192976644, + "grad_norm": 694.9153442382812, + "learning_rate": 6.808780291820264e-06, + "loss": 60.7545, + "step": 54940 + }, + { + "epoch": 0.22200495319513408, + "grad_norm": 619.1652221679688, + "learning_rate": 6.80746537303465e-06, + "loss": 32.6867, + "step": 54950 + }, + { + "epoch": 0.2220453544605017, + "grad_norm": 453.65350341796875, + "learning_rate": 6.806150310437312e-06, + "loss": 47.0478, + "step": 54960 + }, + { + "epoch": 0.22208575572586933, + "grad_norm": 825.5439453125, + "learning_rate": 6.804835104132883e-06, + "loss": 45.5884, + "step": 54970 + }, + { + "epoch": 0.22212615699123697, + "grad_norm": 599.8566284179688, + "learning_rate": 6.803519754226007e-06, + "loss": 54.5817, + "step": 54980 + }, + { + "epoch": 0.22216655825660458, + "grad_norm": 888.5885620117188, + "learning_rate": 6.80220426082134e-06, + "loss": 25.1306, + "step": 54990 + }, + { + "epoch": 0.22220695952197222, + "grad_norm": 1242.8448486328125, + "learning_rate": 6.800888624023552e-06, + "loss": 76.5537, + "step": 55000 + }, + { + "epoch": 0.22224736078733986, + "grad_norm": 503.8450622558594, + "learning_rate": 6.799572843937322e-06, + "loss": 42.9535, + "step": 55010 + }, + { + "epoch": 0.22228776205270748, + "grad_norm": 819.0281372070312, + "learning_rate": 6.79825692066734e-06, + "loss": 45.8547, + "step": 55020 + }, + { + "epoch": 0.22232816331807512, + "grad_norm": 668.2685546875, + "learning_rate": 6.796940854318306e-06, + "loss": 52.2774, + "step": 55030 + }, + { + "epoch": 0.22236856458344276, + "grad_norm": 479.3298645019531, + "learning_rate": 6.795624644994936e-06, + "loss": 46.8034, + "step": 55040 + }, + { + "epoch": 0.2224089658488104, + "grad_norm": 128.8311309814453, + "learning_rate": 6.794308292801954e-06, + "loss": 54.5953, + "step": 55050 + }, + { + "epoch": 0.222449367114178, + "grad_norm": 621.6553955078125, + "learning_rate": 6.792991797844095e-06, + "loss": 46.0012, + "step": 55060 + }, + { + "epoch": 0.22248976837954565, + "grad_norm": 336.5859069824219, + "learning_rate": 6.791675160226109e-06, + "loss": 31.5495, + "step": 55070 + }, + { + "epoch": 0.2225301696449133, + "grad_norm": 518.2781372070312, + "learning_rate": 6.790358380052752e-06, + "loss": 42.9662, + "step": 55080 + }, + { + "epoch": 0.2225705709102809, + "grad_norm": 368.1739807128906, + "learning_rate": 6.789041457428796e-06, + "loss": 53.6045, + "step": 55090 + }, + { + "epoch": 0.22261097217564854, + "grad_norm": 631.5914916992188, + "learning_rate": 6.7877243924590205e-06, + "loss": 62.7045, + "step": 55100 + }, + { + "epoch": 0.22265137344101618, + "grad_norm": 674.2762451171875, + "learning_rate": 6.7864071852482205e-06, + "loss": 49.4443, + "step": 55110 + }, + { + "epoch": 0.2226917747063838, + "grad_norm": 569.96142578125, + "learning_rate": 6.7850898359012e-06, + "loss": 50.1245, + "step": 55120 + }, + { + "epoch": 0.22273217597175143, + "grad_norm": 821.7766723632812, + "learning_rate": 6.7837723445227724e-06, + "loss": 46.7975, + "step": 55130 + }, + { + "epoch": 0.22277257723711907, + "grad_norm": 724.6019897460938, + "learning_rate": 6.782454711217767e-06, + "loss": 43.0379, + "step": 55140 + }, + { + "epoch": 0.22281297850248669, + "grad_norm": 363.89599609375, + "learning_rate": 6.78113693609102e-06, + "loss": 66.6985, + "step": 55150 + }, + { + "epoch": 0.22285337976785433, + "grad_norm": 766.9359741210938, + "learning_rate": 6.77981901924738e-06, + "loss": 60.0962, + "step": 55160 + }, + { + "epoch": 0.22289378103322197, + "grad_norm": 478.49761962890625, + "learning_rate": 6.7785009607917095e-06, + "loss": 42.5678, + "step": 55170 + }, + { + "epoch": 0.22293418229858958, + "grad_norm": 531.2022705078125, + "learning_rate": 6.777182760828881e-06, + "loss": 45.1689, + "step": 55180 + }, + { + "epoch": 0.22297458356395722, + "grad_norm": 576.1405639648438, + "learning_rate": 6.7758644194637755e-06, + "loss": 78.0099, + "step": 55190 + }, + { + "epoch": 0.22301498482932486, + "grad_norm": 385.7039489746094, + "learning_rate": 6.774545936801289e-06, + "loss": 36.7261, + "step": 55200 + }, + { + "epoch": 0.2230553860946925, + "grad_norm": 606.2195434570312, + "learning_rate": 6.773227312946327e-06, + "loss": 31.0199, + "step": 55210 + }, + { + "epoch": 0.2230957873600601, + "grad_norm": 485.9703063964844, + "learning_rate": 6.771908548003803e-06, + "loss": 40.6115, + "step": 55220 + }, + { + "epoch": 0.22313618862542775, + "grad_norm": 896.8988037109375, + "learning_rate": 6.77058964207865e-06, + "loss": 41.6147, + "step": 55230 + }, + { + "epoch": 0.2231765898907954, + "grad_norm": 245.3710479736328, + "learning_rate": 6.769270595275804e-06, + "loss": 38.9666, + "step": 55240 + }, + { + "epoch": 0.223216991156163, + "grad_norm": 1829.5657958984375, + "learning_rate": 6.767951407700217e-06, + "loss": 51.5566, + "step": 55250 + }, + { + "epoch": 0.22325739242153064, + "grad_norm": 606.4932861328125, + "learning_rate": 6.766632079456852e-06, + "loss": 48.8662, + "step": 55260 + }, + { + "epoch": 0.22329779368689828, + "grad_norm": 446.11346435546875, + "learning_rate": 6.765312610650677e-06, + "loss": 33.2803, + "step": 55270 + }, + { + "epoch": 0.2233381949522659, + "grad_norm": 476.4100036621094, + "learning_rate": 6.763993001386681e-06, + "loss": 60.5214, + "step": 55280 + }, + { + "epoch": 0.22337859621763353, + "grad_norm": 462.5067443847656, + "learning_rate": 6.762673251769858e-06, + "loss": 40.2899, + "step": 55290 + }, + { + "epoch": 0.22341899748300117, + "grad_norm": 504.2951965332031, + "learning_rate": 6.761353361905214e-06, + "loss": 52.4844, + "step": 55300 + }, + { + "epoch": 0.2234593987483688, + "grad_norm": 783.8707885742188, + "learning_rate": 6.7600333318977655e-06, + "loss": 45.4845, + "step": 55310 + }, + { + "epoch": 0.22349980001373643, + "grad_norm": 938.263427734375, + "learning_rate": 6.758713161852541e-06, + "loss": 53.5728, + "step": 55320 + }, + { + "epoch": 0.22354020127910407, + "grad_norm": 319.70440673828125, + "learning_rate": 6.757392851874584e-06, + "loss": 26.493, + "step": 55330 + }, + { + "epoch": 0.22358060254447168, + "grad_norm": 903.2843627929688, + "learning_rate": 6.756072402068943e-06, + "loss": 64.1053, + "step": 55340 + }, + { + "epoch": 0.22362100380983932, + "grad_norm": 1832.6444091796875, + "learning_rate": 6.75475181254068e-06, + "loss": 66.4452, + "step": 55350 + }, + { + "epoch": 0.22366140507520696, + "grad_norm": 469.994140625, + "learning_rate": 6.753431083394868e-06, + "loss": 66.304, + "step": 55360 + }, + { + "epoch": 0.2237018063405746, + "grad_norm": 573.45068359375, + "learning_rate": 6.75211021473659e-06, + "loss": 40.0917, + "step": 55370 + }, + { + "epoch": 0.2237422076059422, + "grad_norm": 661.8993530273438, + "learning_rate": 6.750789206670945e-06, + "loss": 42.8067, + "step": 55380 + }, + { + "epoch": 0.22378260887130985, + "grad_norm": 445.6514892578125, + "learning_rate": 6.749468059303039e-06, + "loss": 43.0675, + "step": 55390 + }, + { + "epoch": 0.2238230101366775, + "grad_norm": 566.9107055664062, + "learning_rate": 6.748146772737988e-06, + "loss": 39.0637, + "step": 55400 + }, + { + "epoch": 0.2238634114020451, + "grad_norm": 346.462646484375, + "learning_rate": 6.7468253470809205e-06, + "loss": 31.7741, + "step": 55410 + }, + { + "epoch": 0.22390381266741274, + "grad_norm": 325.1048889160156, + "learning_rate": 6.745503782436976e-06, + "loss": 52.311, + "step": 55420 + }, + { + "epoch": 0.22394421393278038, + "grad_norm": 282.2523498535156, + "learning_rate": 6.7441820789113085e-06, + "loss": 39.3717, + "step": 55430 + }, + { + "epoch": 0.223984615198148, + "grad_norm": 498.2767639160156, + "learning_rate": 6.7428602366090764e-06, + "loss": 43.0168, + "step": 55440 + }, + { + "epoch": 0.22402501646351564, + "grad_norm": 548.7891845703125, + "learning_rate": 6.741538255635454e-06, + "loss": 42.5905, + "step": 55450 + }, + { + "epoch": 0.22406541772888328, + "grad_norm": 208.3527069091797, + "learning_rate": 6.740216136095626e-06, + "loss": 31.6547, + "step": 55460 + }, + { + "epoch": 0.2241058189942509, + "grad_norm": 321.1961669921875, + "learning_rate": 6.738893878094786e-06, + "loss": 45.5326, + "step": 55470 + }, + { + "epoch": 0.22414622025961853, + "grad_norm": 581.4257202148438, + "learning_rate": 6.737571481738141e-06, + "loss": 48.1782, + "step": 55480 + }, + { + "epoch": 0.22418662152498617, + "grad_norm": 536.7522583007812, + "learning_rate": 6.736248947130907e-06, + "loss": 41.7099, + "step": 55490 + }, + { + "epoch": 0.22422702279035378, + "grad_norm": 870.3583374023438, + "learning_rate": 6.734926274378313e-06, + "loss": 44.808, + "step": 55500 + }, + { + "epoch": 0.22426742405572142, + "grad_norm": 1111.5572509765625, + "learning_rate": 6.733603463585598e-06, + "loss": 54.9259, + "step": 55510 + }, + { + "epoch": 0.22430782532108906, + "grad_norm": 477.1290588378906, + "learning_rate": 6.73228051485801e-06, + "loss": 50.8869, + "step": 55520 + }, + { + "epoch": 0.2243482265864567, + "grad_norm": 615.2061157226562, + "learning_rate": 6.7309574283008125e-06, + "loss": 56.2442, + "step": 55530 + }, + { + "epoch": 0.2243886278518243, + "grad_norm": 728.2523803710938, + "learning_rate": 6.729634204019277e-06, + "loss": 53.0035, + "step": 55540 + }, + { + "epoch": 0.22442902911719195, + "grad_norm": 743.2389526367188, + "learning_rate": 6.7283108421186835e-06, + "loss": 62.9054, + "step": 55550 + }, + { + "epoch": 0.2244694303825596, + "grad_norm": 616.263916015625, + "learning_rate": 6.726987342704331e-06, + "loss": 40.6529, + "step": 55560 + }, + { + "epoch": 0.2245098316479272, + "grad_norm": 774.1998901367188, + "learning_rate": 6.72566370588152e-06, + "loss": 46.4587, + "step": 55570 + }, + { + "epoch": 0.22455023291329484, + "grad_norm": 532.6005859375, + "learning_rate": 6.724339931755568e-06, + "loss": 25.9825, + "step": 55580 + }, + { + "epoch": 0.22459063417866248, + "grad_norm": 672.2699584960938, + "learning_rate": 6.7230160204318e-06, + "loss": 65.6449, + "step": 55590 + }, + { + "epoch": 0.2246310354440301, + "grad_norm": 675.6005249023438, + "learning_rate": 6.721691972015557e-06, + "loss": 28.1062, + "step": 55600 + }, + { + "epoch": 0.22467143670939774, + "grad_norm": 1068.322265625, + "learning_rate": 6.720367786612185e-06, + "loss": 53.7093, + "step": 55610 + }, + { + "epoch": 0.22471183797476538, + "grad_norm": 431.6038818359375, + "learning_rate": 6.719043464327043e-06, + "loss": 29.514, + "step": 55620 + }, + { + "epoch": 0.224752239240133, + "grad_norm": 425.9440002441406, + "learning_rate": 6.717719005265502e-06, + "loss": 66.4312, + "step": 55630 + }, + { + "epoch": 0.22479264050550063, + "grad_norm": 509.307373046875, + "learning_rate": 6.716394409532944e-06, + "loss": 46.8041, + "step": 55640 + }, + { + "epoch": 0.22483304177086827, + "grad_norm": 439.1161804199219, + "learning_rate": 6.715069677234758e-06, + "loss": 41.741, + "step": 55650 + }, + { + "epoch": 0.22487344303623588, + "grad_norm": 896.5545654296875, + "learning_rate": 6.713744808476349e-06, + "loss": 54.8341, + "step": 55660 + }, + { + "epoch": 0.22491384430160352, + "grad_norm": 802.5460205078125, + "learning_rate": 6.712419803363132e-06, + "loss": 47.4479, + "step": 55670 + }, + { + "epoch": 0.22495424556697116, + "grad_norm": 182.49765014648438, + "learning_rate": 6.711094662000529e-06, + "loss": 47.8153, + "step": 55680 + }, + { + "epoch": 0.2249946468323388, + "grad_norm": 677.7386474609375, + "learning_rate": 6.709769384493978e-06, + "loss": 74.3336, + "step": 55690 + }, + { + "epoch": 0.2250350480977064, + "grad_norm": 2979.65283203125, + "learning_rate": 6.708443970948923e-06, + "loss": 56.4762, + "step": 55700 + }, + { + "epoch": 0.22507544936307405, + "grad_norm": 885.204345703125, + "learning_rate": 6.707118421470822e-06, + "loss": 51.7887, + "step": 55710 + }, + { + "epoch": 0.2251158506284417, + "grad_norm": 1178.65185546875, + "learning_rate": 6.705792736165142e-06, + "loss": 49.2251, + "step": 55720 + }, + { + "epoch": 0.2251562518938093, + "grad_norm": 518.1280517578125, + "learning_rate": 6.7044669151373645e-06, + "loss": 61.4405, + "step": 55730 + }, + { + "epoch": 0.22519665315917695, + "grad_norm": 538.97265625, + "learning_rate": 6.7031409584929765e-06, + "loss": 51.3991, + "step": 55740 + }, + { + "epoch": 0.22523705442454459, + "grad_norm": 790.5933227539062, + "learning_rate": 6.701814866337477e-06, + "loss": 51.5159, + "step": 55750 + }, + { + "epoch": 0.2252774556899122, + "grad_norm": 669.2061157226562, + "learning_rate": 6.700488638776379e-06, + "loss": 49.5673, + "step": 55760 + }, + { + "epoch": 0.22531785695527984, + "grad_norm": 428.3297119140625, + "learning_rate": 6.699162275915208e-06, + "loss": 37.612, + "step": 55770 + }, + { + "epoch": 0.22535825822064748, + "grad_norm": 211.5701904296875, + "learning_rate": 6.6978357778594896e-06, + "loss": 61.2684, + "step": 55780 + }, + { + "epoch": 0.2253986594860151, + "grad_norm": 550.987548828125, + "learning_rate": 6.69650914471477e-06, + "loss": 42.0971, + "step": 55790 + }, + { + "epoch": 0.22543906075138273, + "grad_norm": 529.25439453125, + "learning_rate": 6.695182376586603e-06, + "loss": 39.4691, + "step": 55800 + }, + { + "epoch": 0.22547946201675037, + "grad_norm": 201.97569274902344, + "learning_rate": 6.6938554735805565e-06, + "loss": 30.0844, + "step": 55810 + }, + { + "epoch": 0.22551986328211798, + "grad_norm": 409.1141357421875, + "learning_rate": 6.6925284358022035e-06, + "loss": 49.2336, + "step": 55820 + }, + { + "epoch": 0.22556026454748562, + "grad_norm": 649.8071899414062, + "learning_rate": 6.69120126335713e-06, + "loss": 44.6068, + "step": 55830 + }, + { + "epoch": 0.22560066581285326, + "grad_norm": 803.1749267578125, + "learning_rate": 6.689873956350932e-06, + "loss": 35.6512, + "step": 55840 + }, + { + "epoch": 0.2256410670782209, + "grad_norm": 536.291015625, + "learning_rate": 6.688546514889221e-06, + "loss": 40.9719, + "step": 55850 + }, + { + "epoch": 0.22568146834358851, + "grad_norm": 1277.4453125, + "learning_rate": 6.687218939077613e-06, + "loss": 51.9526, + "step": 55860 + }, + { + "epoch": 0.22572186960895615, + "grad_norm": 839.084228515625, + "learning_rate": 6.685891229021736e-06, + "loss": 47.9897, + "step": 55870 + }, + { + "epoch": 0.2257622708743238, + "grad_norm": 375.78631591796875, + "learning_rate": 6.6845633848272315e-06, + "loss": 46.8047, + "step": 55880 + }, + { + "epoch": 0.2258026721396914, + "grad_norm": 804.9295654296875, + "learning_rate": 6.68323540659975e-06, + "loss": 50.1948, + "step": 55890 + }, + { + "epoch": 0.22584307340505905, + "grad_norm": 969.8311767578125, + "learning_rate": 6.681907294444952e-06, + "loss": 44.3812, + "step": 55900 + }, + { + "epoch": 0.2258834746704267, + "grad_norm": 413.19512939453125, + "learning_rate": 6.6805790484685094e-06, + "loss": 40.0582, + "step": 55910 + }, + { + "epoch": 0.2259238759357943, + "grad_norm": 1078.072021484375, + "learning_rate": 6.679250668776105e-06, + "loss": 39.8081, + "step": 55920 + }, + { + "epoch": 0.22596427720116194, + "grad_norm": 622.490234375, + "learning_rate": 6.677922155473432e-06, + "loss": 51.5911, + "step": 55930 + }, + { + "epoch": 0.22600467846652958, + "grad_norm": 369.42498779296875, + "learning_rate": 6.676593508666192e-06, + "loss": 41.1671, + "step": 55940 + }, + { + "epoch": 0.2260450797318972, + "grad_norm": 433.25811767578125, + "learning_rate": 6.675264728460103e-06, + "loss": 37.081, + "step": 55950 + }, + { + "epoch": 0.22608548099726483, + "grad_norm": 568.7027587890625, + "learning_rate": 6.673935814960887e-06, + "loss": 57.7069, + "step": 55960 + }, + { + "epoch": 0.22612588226263247, + "grad_norm": 603.1103515625, + "learning_rate": 6.672606768274281e-06, + "loss": 40.4617, + "step": 55970 + }, + { + "epoch": 0.22616628352800008, + "grad_norm": 1139.7769775390625, + "learning_rate": 6.67127758850603e-06, + "loss": 40.6864, + "step": 55980 + }, + { + "epoch": 0.22620668479336772, + "grad_norm": 589.6168212890625, + "learning_rate": 6.669948275761893e-06, + "loss": 34.2788, + "step": 55990 + }, + { + "epoch": 0.22624708605873536, + "grad_norm": 230.50637817382812, + "learning_rate": 6.668618830147634e-06, + "loss": 41.0629, + "step": 56000 + }, + { + "epoch": 0.226287487324103, + "grad_norm": 468.4389953613281, + "learning_rate": 6.667289251769033e-06, + "loss": 41.7365, + "step": 56010 + }, + { + "epoch": 0.22632788858947062, + "grad_norm": 454.0594787597656, + "learning_rate": 6.6659595407318775e-06, + "loss": 41.983, + "step": 56020 + }, + { + "epoch": 0.22636828985483826, + "grad_norm": 830.2134399414062, + "learning_rate": 6.664629697141969e-06, + "loss": 38.8899, + "step": 56030 + }, + { + "epoch": 0.2264086911202059, + "grad_norm": 1085.2601318359375, + "learning_rate": 6.663299721105113e-06, + "loss": 42.8715, + "step": 56040 + }, + { + "epoch": 0.2264490923855735, + "grad_norm": 590.5885620117188, + "learning_rate": 6.661969612727133e-06, + "loss": 48.8924, + "step": 56050 + }, + { + "epoch": 0.22648949365094115, + "grad_norm": 850.6708374023438, + "learning_rate": 6.660639372113858e-06, + "loss": 61.1752, + "step": 56060 + }, + { + "epoch": 0.2265298949163088, + "grad_norm": 309.1644287109375, + "learning_rate": 6.65930899937113e-06, + "loss": 52.0125, + "step": 56070 + }, + { + "epoch": 0.2265702961816764, + "grad_norm": 535.253662109375, + "learning_rate": 6.657978494604799e-06, + "loss": 58.5695, + "step": 56080 + }, + { + "epoch": 0.22661069744704404, + "grad_norm": 688.1857299804688, + "learning_rate": 6.656647857920728e-06, + "loss": 49.479, + "step": 56090 + }, + { + "epoch": 0.22665109871241168, + "grad_norm": 718.3746948242188, + "learning_rate": 6.655317089424791e-06, + "loss": 48.7625, + "step": 56100 + }, + { + "epoch": 0.2266914999777793, + "grad_norm": 609.1416625976562, + "learning_rate": 6.6539861892228695e-06, + "loss": 36.7984, + "step": 56110 + }, + { + "epoch": 0.22673190124314693, + "grad_norm": 245.2581329345703, + "learning_rate": 6.652655157420859e-06, + "loss": 29.8116, + "step": 56120 + }, + { + "epoch": 0.22677230250851457, + "grad_norm": 521.8516845703125, + "learning_rate": 6.651323994124661e-06, + "loss": 46.0755, + "step": 56130 + }, + { + "epoch": 0.22681270377388218, + "grad_norm": 714.7499389648438, + "learning_rate": 6.649992699440191e-06, + "loss": 42.7365, + "step": 56140 + }, + { + "epoch": 0.22685310503924982, + "grad_norm": 560.3165283203125, + "learning_rate": 6.648661273473375e-06, + "loss": 35.3493, + "step": 56150 + }, + { + "epoch": 0.22689350630461746, + "grad_norm": 602.651611328125, + "learning_rate": 6.6473297163301485e-06, + "loss": 50.3261, + "step": 56160 + }, + { + "epoch": 0.2269339075699851, + "grad_norm": 1904.8004150390625, + "learning_rate": 6.645998028116455e-06, + "loss": 57.5763, + "step": 56170 + }, + { + "epoch": 0.22697430883535272, + "grad_norm": 1206.6541748046875, + "learning_rate": 6.6446662089382545e-06, + "loss": 41.4296, + "step": 56180 + }, + { + "epoch": 0.22701471010072036, + "grad_norm": 435.66119384765625, + "learning_rate": 6.643334258901511e-06, + "loss": 39.747, + "step": 56190 + }, + { + "epoch": 0.227055111366088, + "grad_norm": 762.4393310546875, + "learning_rate": 6.642002178112202e-06, + "loss": 57.2834, + "step": 56200 + }, + { + "epoch": 0.2270955126314556, + "grad_norm": 483.8197937011719, + "learning_rate": 6.640669966676316e-06, + "loss": 31.7781, + "step": 56210 + }, + { + "epoch": 0.22713591389682325, + "grad_norm": 1017.3893432617188, + "learning_rate": 6.6393376246998485e-06, + "loss": 52.463, + "step": 56220 + }, + { + "epoch": 0.2271763151621909, + "grad_norm": 728.10693359375, + "learning_rate": 6.638005152288811e-06, + "loss": 40.0086, + "step": 56230 + }, + { + "epoch": 0.2272167164275585, + "grad_norm": 703.792724609375, + "learning_rate": 6.636672549549221e-06, + "loss": 37.7492, + "step": 56240 + }, + { + "epoch": 0.22725711769292614, + "grad_norm": 445.5412292480469, + "learning_rate": 6.635339816587109e-06, + "loss": 40.1628, + "step": 56250 + }, + { + "epoch": 0.22729751895829378, + "grad_norm": 941.9268798828125, + "learning_rate": 6.634006953508512e-06, + "loss": 49.6389, + "step": 56260 + }, + { + "epoch": 0.2273379202236614, + "grad_norm": 877.1407470703125, + "learning_rate": 6.63267396041948e-06, + "loss": 44.4499, + "step": 56270 + }, + { + "epoch": 0.22737832148902903, + "grad_norm": 1341.0885009765625, + "learning_rate": 6.631340837426075e-06, + "loss": 47.847, + "step": 56280 + }, + { + "epoch": 0.22741872275439667, + "grad_norm": 495.6767578125, + "learning_rate": 6.630007584634366e-06, + "loss": 37.3601, + "step": 56290 + }, + { + "epoch": 0.22745912401976429, + "grad_norm": 737.677978515625, + "learning_rate": 6.628674202150434e-06, + "loss": 40.301, + "step": 56300 + }, + { + "epoch": 0.22749952528513193, + "grad_norm": 879.7401733398438, + "learning_rate": 6.627340690080371e-06, + "loss": 69.7933, + "step": 56310 + }, + { + "epoch": 0.22753992655049957, + "grad_norm": 583.7142944335938, + "learning_rate": 6.626007048530276e-06, + "loss": 39.7344, + "step": 56320 + }, + { + "epoch": 0.2275803278158672, + "grad_norm": 588.3863525390625, + "learning_rate": 6.624673277606264e-06, + "loss": 71.7574, + "step": 56330 + }, + { + "epoch": 0.22762072908123482, + "grad_norm": 739.7526245117188, + "learning_rate": 6.623339377414456e-06, + "loss": 31.9641, + "step": 56340 + }, + { + "epoch": 0.22766113034660246, + "grad_norm": 562.2548828125, + "learning_rate": 6.622005348060983e-06, + "loss": 31.7818, + "step": 56350 + }, + { + "epoch": 0.2277015316119701, + "grad_norm": 488.791748046875, + "learning_rate": 6.620671189651988e-06, + "loss": 41.5536, + "step": 56360 + }, + { + "epoch": 0.2277419328773377, + "grad_norm": 1060.41796875, + "learning_rate": 6.6193369022936245e-06, + "loss": 53.0142, + "step": 56370 + }, + { + "epoch": 0.22778233414270535, + "grad_norm": 1061.703369140625, + "learning_rate": 6.618002486092056e-06, + "loss": 36.131, + "step": 56380 + }, + { + "epoch": 0.227822735408073, + "grad_norm": 552.3648681640625, + "learning_rate": 6.616667941153456e-06, + "loss": 43.893, + "step": 56390 + }, + { + "epoch": 0.2278631366734406, + "grad_norm": 816.7445068359375, + "learning_rate": 6.615333267584007e-06, + "loss": 66.4245, + "step": 56400 + }, + { + "epoch": 0.22790353793880824, + "grad_norm": 527.3714599609375, + "learning_rate": 6.613998465489902e-06, + "loss": 42.1384, + "step": 56410 + }, + { + "epoch": 0.22794393920417588, + "grad_norm": 400.6829833984375, + "learning_rate": 6.612663534977347e-06, + "loss": 53.9582, + "step": 56420 + }, + { + "epoch": 0.2279843404695435, + "grad_norm": 1087.0968017578125, + "learning_rate": 6.611328476152557e-06, + "loss": 50.3561, + "step": 56430 + }, + { + "epoch": 0.22802474173491113, + "grad_norm": 498.5974426269531, + "learning_rate": 6.609993289121753e-06, + "loss": 54.2461, + "step": 56440 + }, + { + "epoch": 0.22806514300027877, + "grad_norm": 467.4459228515625, + "learning_rate": 6.608657973991172e-06, + "loss": 37.9477, + "step": 56450 + }, + { + "epoch": 0.2281055442656464, + "grad_norm": 851.5455322265625, + "learning_rate": 6.607322530867061e-06, + "loss": 49.9047, + "step": 56460 + }, + { + "epoch": 0.22814594553101403, + "grad_norm": 640.7950439453125, + "learning_rate": 6.605986959855672e-06, + "loss": 51.9378, + "step": 56470 + }, + { + "epoch": 0.22818634679638167, + "grad_norm": 726.4421997070312, + "learning_rate": 6.60465126106327e-06, + "loss": 43.164, + "step": 56480 + }, + { + "epoch": 0.2282267480617493, + "grad_norm": 347.83416748046875, + "learning_rate": 6.6033154345961314e-06, + "loss": 79.5957, + "step": 56490 + }, + { + "epoch": 0.22826714932711692, + "grad_norm": 665.018798828125, + "learning_rate": 6.601979480560543e-06, + "loss": 80.1648, + "step": 56500 + }, + { + "epoch": 0.22830755059248456, + "grad_norm": 478.4368896484375, + "learning_rate": 6.6006433990627985e-06, + "loss": 40.2869, + "step": 56510 + }, + { + "epoch": 0.2283479518578522, + "grad_norm": 442.74713134765625, + "learning_rate": 6.599307190209206e-06, + "loss": 47.7656, + "step": 56520 + }, + { + "epoch": 0.2283883531232198, + "grad_norm": 1278.633544921875, + "learning_rate": 6.5979708541060796e-06, + "loss": 51.5611, + "step": 56530 + }, + { + "epoch": 0.22842875438858745, + "grad_norm": 477.2268981933594, + "learning_rate": 6.596634390859745e-06, + "loss": 54.8893, + "step": 56540 + }, + { + "epoch": 0.2284691556539551, + "grad_norm": 531.1737060546875, + "learning_rate": 6.59529780057654e-06, + "loss": 32.3914, + "step": 56550 + }, + { + "epoch": 0.2285095569193227, + "grad_norm": 690.1337890625, + "learning_rate": 6.593961083362811e-06, + "loss": 44.5659, + "step": 56560 + }, + { + "epoch": 0.22854995818469034, + "grad_norm": 358.41119384765625, + "learning_rate": 6.592624239324914e-06, + "loss": 42.6093, + "step": 56570 + }, + { + "epoch": 0.22859035945005798, + "grad_norm": 794.7353515625, + "learning_rate": 6.591287268569215e-06, + "loss": 41.1062, + "step": 56580 + }, + { + "epoch": 0.2286307607154256, + "grad_norm": 623.95361328125, + "learning_rate": 6.589950171202092e-06, + "loss": 49.7269, + "step": 56590 + }, + { + "epoch": 0.22867116198079324, + "grad_norm": 727.10693359375, + "learning_rate": 6.588612947329929e-06, + "loss": 48.3992, + "step": 56600 + }, + { + "epoch": 0.22871156324616088, + "grad_norm": 829.1666259765625, + "learning_rate": 6.587275597059125e-06, + "loss": 25.1917, + "step": 56610 + }, + { + "epoch": 0.2287519645115285, + "grad_norm": 734.826416015625, + "learning_rate": 6.585938120496087e-06, + "loss": 44.4312, + "step": 56620 + }, + { + "epoch": 0.22879236577689613, + "grad_norm": 937.6222534179688, + "learning_rate": 6.584600517747232e-06, + "loss": 46.1989, + "step": 56630 + }, + { + "epoch": 0.22883276704226377, + "grad_norm": 714.8348388671875, + "learning_rate": 6.583262788918985e-06, + "loss": 47.9685, + "step": 56640 + }, + { + "epoch": 0.2288731683076314, + "grad_norm": 470.21832275390625, + "learning_rate": 6.581924934117783e-06, + "loss": 57.1887, + "step": 56650 + }, + { + "epoch": 0.22891356957299902, + "grad_norm": 594.2630615234375, + "learning_rate": 6.580586953450076e-06, + "loss": 41.4932, + "step": 56660 + }, + { + "epoch": 0.22895397083836666, + "grad_norm": 613.1303100585938, + "learning_rate": 6.579248847022317e-06, + "loss": 39.0782, + "step": 56670 + }, + { + "epoch": 0.2289943721037343, + "grad_norm": 756.616455078125, + "learning_rate": 6.577910614940978e-06, + "loss": 48.7595, + "step": 56680 + }, + { + "epoch": 0.2290347733691019, + "grad_norm": 333.4111633300781, + "learning_rate": 6.576572257312531e-06, + "loss": 41.8574, + "step": 56690 + }, + { + "epoch": 0.22907517463446955, + "grad_norm": 813.4775390625, + "learning_rate": 6.5752337742434644e-06, + "loss": 34.6469, + "step": 56700 + }, + { + "epoch": 0.2291155758998372, + "grad_norm": 498.3751220703125, + "learning_rate": 6.573895165840276e-06, + "loss": 47.3905, + "step": 56710 + }, + { + "epoch": 0.2291559771652048, + "grad_norm": 803.7112426757812, + "learning_rate": 6.5725564322094745e-06, + "loss": 59.7862, + "step": 56720 + }, + { + "epoch": 0.22919637843057244, + "grad_norm": 1082.3946533203125, + "learning_rate": 6.571217573457573e-06, + "loss": 53.9844, + "step": 56730 + }, + { + "epoch": 0.22923677969594008, + "grad_norm": 276.7662353515625, + "learning_rate": 6.569878589691101e-06, + "loss": 42.0922, + "step": 56740 + }, + { + "epoch": 0.2292771809613077, + "grad_norm": 1344.534912109375, + "learning_rate": 6.568539481016593e-06, + "loss": 47.5675, + "step": 56750 + }, + { + "epoch": 0.22931758222667534, + "grad_norm": 832.77490234375, + "learning_rate": 6.567200247540599e-06, + "loss": 68.7369, + "step": 56760 + }, + { + "epoch": 0.22935798349204298, + "grad_norm": 260.89385986328125, + "learning_rate": 6.5658608893696714e-06, + "loss": 45.8488, + "step": 56770 + }, + { + "epoch": 0.2293983847574106, + "grad_norm": 691.3558349609375, + "learning_rate": 6.564521406610382e-06, + "loss": 49.6963, + "step": 56780 + }, + { + "epoch": 0.22943878602277823, + "grad_norm": 866.0549926757812, + "learning_rate": 6.563181799369301e-06, + "loss": 32.6304, + "step": 56790 + }, + { + "epoch": 0.22947918728814587, + "grad_norm": 760.6442260742188, + "learning_rate": 6.561842067753021e-06, + "loss": 41.1299, + "step": 56800 + }, + { + "epoch": 0.2295195885535135, + "grad_norm": 1116.607666015625, + "learning_rate": 6.560502211868135e-06, + "loss": 56.4624, + "step": 56810 + }, + { + "epoch": 0.22955998981888112, + "grad_norm": 805.6516723632812, + "learning_rate": 6.55916223182125e-06, + "loss": 35.0971, + "step": 56820 + }, + { + "epoch": 0.22960039108424876, + "grad_norm": 378.9771728515625, + "learning_rate": 6.55782212771898e-06, + "loss": 43.8403, + "step": 56830 + }, + { + "epoch": 0.2296407923496164, + "grad_norm": 850.3655395507812, + "learning_rate": 6.5564818996679536e-06, + "loss": 50.1022, + "step": 56840 + }, + { + "epoch": 0.229681193614984, + "grad_norm": 827.5731811523438, + "learning_rate": 6.555141547774807e-06, + "loss": 66.2093, + "step": 56850 + }, + { + "epoch": 0.22972159488035165, + "grad_norm": 383.89361572265625, + "learning_rate": 6.553801072146184e-06, + "loss": 43.9942, + "step": 56860 + }, + { + "epoch": 0.2297619961457193, + "grad_norm": 634.6798095703125, + "learning_rate": 6.55246047288874e-06, + "loss": 63.0002, + "step": 56870 + }, + { + "epoch": 0.2298023974110869, + "grad_norm": 617.5804443359375, + "learning_rate": 6.551119750109142e-06, + "loss": 62.7401, + "step": 56880 + }, + { + "epoch": 0.22984279867645455, + "grad_norm": 757.3110961914062, + "learning_rate": 6.5497789039140635e-06, + "loss": 54.9512, + "step": 56890 + }, + { + "epoch": 0.22988319994182219, + "grad_norm": 897.7661743164062, + "learning_rate": 6.54843793441019e-06, + "loss": 42.9392, + "step": 56900 + }, + { + "epoch": 0.2299236012071898, + "grad_norm": 560.5343017578125, + "learning_rate": 6.547096841704217e-06, + "loss": 49.1479, + "step": 56910 + }, + { + "epoch": 0.22996400247255744, + "grad_norm": 169.0382537841797, + "learning_rate": 6.545755625902848e-06, + "loss": 40.0921, + "step": 56920 + }, + { + "epoch": 0.23000440373792508, + "grad_norm": 1067.6502685546875, + "learning_rate": 6.544414287112798e-06, + "loss": 62.6796, + "step": 56930 + }, + { + "epoch": 0.2300448050032927, + "grad_norm": 717.0487670898438, + "learning_rate": 6.54307282544079e-06, + "loss": 51.3947, + "step": 56940 + }, + { + "epoch": 0.23008520626866033, + "grad_norm": 572.951904296875, + "learning_rate": 6.5417312409935606e-06, + "loss": 52.8754, + "step": 56950 + }, + { + "epoch": 0.23012560753402797, + "grad_norm": 958.3705444335938, + "learning_rate": 6.540389533877852e-06, + "loss": 72.8596, + "step": 56960 + }, + { + "epoch": 0.2301660087993956, + "grad_norm": 515.6321411132812, + "learning_rate": 6.539047704200417e-06, + "loss": 47.7139, + "step": 56970 + }, + { + "epoch": 0.23020641006476322, + "grad_norm": 645.0633544921875, + "learning_rate": 6.53770575206802e-06, + "loss": 45.1984, + "step": 56980 + }, + { + "epoch": 0.23024681133013086, + "grad_norm": 382.80560302734375, + "learning_rate": 6.536363677587433e-06, + "loss": 42.9186, + "step": 56990 + }, + { + "epoch": 0.2302872125954985, + "grad_norm": 316.0374755859375, + "learning_rate": 6.535021480865439e-06, + "loss": 39.2293, + "step": 57000 + }, + { + "epoch": 0.23032761386086611, + "grad_norm": 455.7964782714844, + "learning_rate": 6.5336791620088306e-06, + "loss": 38.0706, + "step": 57010 + }, + { + "epoch": 0.23036801512623375, + "grad_norm": 625.7360229492188, + "learning_rate": 6.53233672112441e-06, + "loss": 38.7002, + "step": 57020 + }, + { + "epoch": 0.2304084163916014, + "grad_norm": 525.91845703125, + "learning_rate": 6.530994158318988e-06, + "loss": 46.5832, + "step": 57030 + }, + { + "epoch": 0.230448817656969, + "grad_norm": 578.9883422851562, + "learning_rate": 6.529651473699389e-06, + "loss": 35.1653, + "step": 57040 + }, + { + "epoch": 0.23048921892233665, + "grad_norm": 756.6240844726562, + "learning_rate": 6.528308667372441e-06, + "loss": 47.4836, + "step": 57050 + }, + { + "epoch": 0.2305296201877043, + "grad_norm": 375.4329528808594, + "learning_rate": 6.526965739444988e-06, + "loss": 49.6254, + "step": 57060 + }, + { + "epoch": 0.2305700214530719, + "grad_norm": 856.4522705078125, + "learning_rate": 6.525622690023878e-06, + "loss": 54.8425, + "step": 57070 + }, + { + "epoch": 0.23061042271843954, + "grad_norm": 648.9761352539062, + "learning_rate": 6.524279519215972e-06, + "loss": 49.5565, + "step": 57080 + }, + { + "epoch": 0.23065082398380718, + "grad_norm": 279.9421081542969, + "learning_rate": 6.522936227128139e-06, + "loss": 29.2129, + "step": 57090 + }, + { + "epoch": 0.2306912252491748, + "grad_norm": 434.59619140625, + "learning_rate": 6.521592813867261e-06, + "loss": 38.2154, + "step": 57100 + }, + { + "epoch": 0.23073162651454243, + "grad_norm": 904.1709594726562, + "learning_rate": 6.520249279540227e-06, + "loss": 48.9872, + "step": 57110 + }, + { + "epoch": 0.23077202777991007, + "grad_norm": 2435.330810546875, + "learning_rate": 6.5189056242539325e-06, + "loss": 60.082, + "step": 57120 + }, + { + "epoch": 0.2308124290452777, + "grad_norm": 279.1988525390625, + "learning_rate": 6.51756184811529e-06, + "loss": 40.6881, + "step": 57130 + }, + { + "epoch": 0.23085283031064532, + "grad_norm": 811.0878295898438, + "learning_rate": 6.516217951231215e-06, + "loss": 51.1541, + "step": 57140 + }, + { + "epoch": 0.23089323157601296, + "grad_norm": 386.1441955566406, + "learning_rate": 6.514873933708637e-06, + "loss": 33.2295, + "step": 57150 + }, + { + "epoch": 0.2309336328413806, + "grad_norm": 745.3314819335938, + "learning_rate": 6.513529795654493e-06, + "loss": 48.7001, + "step": 57160 + }, + { + "epoch": 0.23097403410674822, + "grad_norm": 761.8244018554688, + "learning_rate": 6.512185537175727e-06, + "loss": 34.5987, + "step": 57170 + }, + { + "epoch": 0.23101443537211586, + "grad_norm": 357.27984619140625, + "learning_rate": 6.5108411583793e-06, + "loss": 63.6459, + "step": 57180 + }, + { + "epoch": 0.2310548366374835, + "grad_norm": 698.4451293945312, + "learning_rate": 6.509496659372175e-06, + "loss": 31.1018, + "step": 57190 + }, + { + "epoch": 0.2310952379028511, + "grad_norm": 948.5643310546875, + "learning_rate": 6.508152040261329e-06, + "loss": 49.5578, + "step": 57200 + }, + { + "epoch": 0.23113563916821875, + "grad_norm": 1980.6900634765625, + "learning_rate": 6.506807301153746e-06, + "loss": 47.9453, + "step": 57210 + }, + { + "epoch": 0.2311760404335864, + "grad_norm": 1299.60595703125, + "learning_rate": 6.5054624421564204e-06, + "loss": 50.8445, + "step": 57220 + }, + { + "epoch": 0.231216441698954, + "grad_norm": 1376.9063720703125, + "learning_rate": 6.504117463376358e-06, + "loss": 56.114, + "step": 57230 + }, + { + "epoch": 0.23125684296432164, + "grad_norm": 463.1986999511719, + "learning_rate": 6.502772364920573e-06, + "loss": 48.9991, + "step": 57240 + }, + { + "epoch": 0.23129724422968928, + "grad_norm": 194.3710479736328, + "learning_rate": 6.501427146896087e-06, + "loss": 31.2601, + "step": 57250 + }, + { + "epoch": 0.2313376454950569, + "grad_norm": 565.7164306640625, + "learning_rate": 6.5000818094099345e-06, + "loss": 39.7205, + "step": 57260 + }, + { + "epoch": 0.23137804676042453, + "grad_norm": 511.2424011230469, + "learning_rate": 6.498736352569155e-06, + "loss": 58.0947, + "step": 57270 + }, + { + "epoch": 0.23141844802579217, + "grad_norm": 545.729248046875, + "learning_rate": 6.497390776480804e-06, + "loss": 43.7937, + "step": 57280 + }, + { + "epoch": 0.2314588492911598, + "grad_norm": 1081.4024658203125, + "learning_rate": 6.49604508125194e-06, + "loss": 57.2301, + "step": 57290 + }, + { + "epoch": 0.23149925055652743, + "grad_norm": 858.1848754882812, + "learning_rate": 6.4946992669896355e-06, + "loss": 42.6667, + "step": 57300 + }, + { + "epoch": 0.23153965182189506, + "grad_norm": 176.55555725097656, + "learning_rate": 6.493353333800969e-06, + "loss": 41.2503, + "step": 57310 + }, + { + "epoch": 0.2315800530872627, + "grad_norm": 835.7853393554688, + "learning_rate": 6.492007281793032e-06, + "loss": 69.4198, + "step": 57320 + }, + { + "epoch": 0.23162045435263032, + "grad_norm": 532.6726684570312, + "learning_rate": 6.490661111072923e-06, + "loss": 43.0533, + "step": 57330 + }, + { + "epoch": 0.23166085561799796, + "grad_norm": 395.8429260253906, + "learning_rate": 6.489314821747751e-06, + "loss": 45.0708, + "step": 57340 + }, + { + "epoch": 0.2317012568833656, + "grad_norm": 424.9057312011719, + "learning_rate": 6.487968413924634e-06, + "loss": 51.2508, + "step": 57350 + }, + { + "epoch": 0.2317416581487332, + "grad_norm": 750.9067993164062, + "learning_rate": 6.486621887710698e-06, + "loss": 43.8176, + "step": 57360 + }, + { + "epoch": 0.23178205941410085, + "grad_norm": 454.9244079589844, + "learning_rate": 6.485275243213081e-06, + "loss": 43.997, + "step": 57370 + }, + { + "epoch": 0.2318224606794685, + "grad_norm": 515.5105590820312, + "learning_rate": 6.4839284805389305e-06, + "loss": 44.7541, + "step": 57380 + }, + { + "epoch": 0.2318628619448361, + "grad_norm": 854.0843505859375, + "learning_rate": 6.4825815997954e-06, + "loss": 51.4215, + "step": 57390 + }, + { + "epoch": 0.23190326321020374, + "grad_norm": 554.3998413085938, + "learning_rate": 6.481234601089655e-06, + "loss": 50.3499, + "step": 57400 + }, + { + "epoch": 0.23194366447557138, + "grad_norm": 656.9430541992188, + "learning_rate": 6.4798874845288725e-06, + "loss": 53.9586, + "step": 57410 + }, + { + "epoch": 0.231984065740939, + "grad_norm": 429.04681396484375, + "learning_rate": 6.4785402502202345e-06, + "loss": 58.049, + "step": 57420 + }, + { + "epoch": 0.23202446700630663, + "grad_norm": 669.2875366210938, + "learning_rate": 6.477192898270934e-06, + "loss": 33.2239, + "step": 57430 + }, + { + "epoch": 0.23206486827167427, + "grad_norm": 382.91522216796875, + "learning_rate": 6.475845428788173e-06, + "loss": 50.4403, + "step": 57440 + }, + { + "epoch": 0.23210526953704191, + "grad_norm": 650.1845092773438, + "learning_rate": 6.474497841879166e-06, + "loss": 65.9487, + "step": 57450 + }, + { + "epoch": 0.23214567080240953, + "grad_norm": 991.1076049804688, + "learning_rate": 6.473150137651132e-06, + "loss": 57.2718, + "step": 57460 + }, + { + "epoch": 0.23218607206777717, + "grad_norm": 714.9050903320312, + "learning_rate": 6.471802316211302e-06, + "loss": 57.2066, + "step": 57470 + }, + { + "epoch": 0.2322264733331448, + "grad_norm": 483.4032897949219, + "learning_rate": 6.4704543776669174e-06, + "loss": 45.9662, + "step": 57480 + }, + { + "epoch": 0.23226687459851242, + "grad_norm": 353.4619445800781, + "learning_rate": 6.469106322125227e-06, + "loss": 51.166, + "step": 57490 + }, + { + "epoch": 0.23230727586388006, + "grad_norm": 700.385986328125, + "learning_rate": 6.467758149693486e-06, + "loss": 49.1312, + "step": 57500 + }, + { + "epoch": 0.2323476771292477, + "grad_norm": 429.9539794921875, + "learning_rate": 6.466409860478967e-06, + "loss": 48.9266, + "step": 57510 + }, + { + "epoch": 0.2323880783946153, + "grad_norm": 865.5701293945312, + "learning_rate": 6.465061454588946e-06, + "loss": 51.0269, + "step": 57520 + }, + { + "epoch": 0.23242847965998295, + "grad_norm": 457.370361328125, + "learning_rate": 6.463712932130708e-06, + "loss": 38.1473, + "step": 57530 + }, + { + "epoch": 0.2324688809253506, + "grad_norm": 432.3177795410156, + "learning_rate": 6.462364293211549e-06, + "loss": 33.1413, + "step": 57540 + }, + { + "epoch": 0.2325092821907182, + "grad_norm": 454.0198669433594, + "learning_rate": 6.4610155379387755e-06, + "loss": 36.9611, + "step": 57550 + }, + { + "epoch": 0.23254968345608584, + "grad_norm": 459.07940673828125, + "learning_rate": 6.459666666419699e-06, + "loss": 53.3877, + "step": 57560 + }, + { + "epoch": 0.23259008472145348, + "grad_norm": 552.9193115234375, + "learning_rate": 6.4583176787616466e-06, + "loss": 55.7331, + "step": 57570 + }, + { + "epoch": 0.2326304859868211, + "grad_norm": 187.45762634277344, + "learning_rate": 6.456968575071951e-06, + "loss": 68.7932, + "step": 57580 + }, + { + "epoch": 0.23267088725218874, + "grad_norm": 276.4294128417969, + "learning_rate": 6.45561935545795e-06, + "loss": 36.9424, + "step": 57590 + }, + { + "epoch": 0.23271128851755638, + "grad_norm": 488.11151123046875, + "learning_rate": 6.454270020026996e-06, + "loss": 59.5057, + "step": 57600 + }, + { + "epoch": 0.23275168978292402, + "grad_norm": 763.0408325195312, + "learning_rate": 6.452920568886452e-06, + "loss": 34.2426, + "step": 57610 + }, + { + "epoch": 0.23279209104829163, + "grad_norm": 325.465576171875, + "learning_rate": 6.451571002143687e-06, + "loss": 47.0651, + "step": 57620 + }, + { + "epoch": 0.23283249231365927, + "grad_norm": 1015.4959716796875, + "learning_rate": 6.450221319906079e-06, + "loss": 41.7232, + "step": 57630 + }, + { + "epoch": 0.2328728935790269, + "grad_norm": 1036.8895263671875, + "learning_rate": 6.448871522281016e-06, + "loss": 48.3217, + "step": 57640 + }, + { + "epoch": 0.23291329484439452, + "grad_norm": 424.6831359863281, + "learning_rate": 6.447521609375894e-06, + "loss": 38.218, + "step": 57650 + }, + { + "epoch": 0.23295369610976216, + "grad_norm": 742.8676147460938, + "learning_rate": 6.446171581298123e-06, + "loss": 38.9838, + "step": 57660 + }, + { + "epoch": 0.2329940973751298, + "grad_norm": 821.5770874023438, + "learning_rate": 6.444821438155115e-06, + "loss": 45.9229, + "step": 57670 + }, + { + "epoch": 0.2330344986404974, + "grad_norm": 606.6029052734375, + "learning_rate": 6.443471180054297e-06, + "loss": 53.2201, + "step": 57680 + }, + { + "epoch": 0.23307489990586505, + "grad_norm": 684.6781005859375, + "learning_rate": 6.442120807103102e-06, + "loss": 59.1518, + "step": 57690 + }, + { + "epoch": 0.2331153011712327, + "grad_norm": 678.3739624023438, + "learning_rate": 6.440770319408971e-06, + "loss": 44.6293, + "step": 57700 + }, + { + "epoch": 0.2331557024366003, + "grad_norm": 648.0844116210938, + "learning_rate": 6.43941971707936e-06, + "loss": 30.0908, + "step": 57710 + }, + { + "epoch": 0.23319610370196794, + "grad_norm": 406.64862060546875, + "learning_rate": 6.438069000221727e-06, + "loss": 50.2008, + "step": 57720 + }, + { + "epoch": 0.23323650496733558, + "grad_norm": 673.765380859375, + "learning_rate": 6.4367181689435434e-06, + "loss": 46.924, + "step": 57730 + }, + { + "epoch": 0.2332769062327032, + "grad_norm": 3202.600341796875, + "learning_rate": 6.435367223352289e-06, + "loss": 63.5285, + "step": 57740 + }, + { + "epoch": 0.23331730749807084, + "grad_norm": 1331.566162109375, + "learning_rate": 6.434016163555452e-06, + "loss": 50.6696, + "step": 57750 + }, + { + "epoch": 0.23335770876343848, + "grad_norm": 383.6949768066406, + "learning_rate": 6.432664989660531e-06, + "loss": 37.0474, + "step": 57760 + }, + { + "epoch": 0.2333981100288061, + "grad_norm": 224.4041748046875, + "learning_rate": 6.43131370177503e-06, + "loss": 46.556, + "step": 57770 + }, + { + "epoch": 0.23343851129417373, + "grad_norm": 1214.294677734375, + "learning_rate": 6.429962300006468e-06, + "loss": 72.8442, + "step": 57780 + }, + { + "epoch": 0.23347891255954137, + "grad_norm": 615.74560546875, + "learning_rate": 6.428610784462368e-06, + "loss": 46.897, + "step": 57790 + }, + { + "epoch": 0.233519313824909, + "grad_norm": 600.8942260742188, + "learning_rate": 6.427259155250265e-06, + "loss": 56.1212, + "step": 57800 + }, + { + "epoch": 0.23355971509027662, + "grad_norm": 1422.342041015625, + "learning_rate": 6.4259074124777e-06, + "loss": 53.6335, + "step": 57810 + }, + { + "epoch": 0.23360011635564426, + "grad_norm": 1549.554443359375, + "learning_rate": 6.4245555562522265e-06, + "loss": 34.9436, + "step": 57820 + }, + { + "epoch": 0.2336405176210119, + "grad_norm": 604.1146850585938, + "learning_rate": 6.423203586681406e-06, + "loss": 43.1287, + "step": 57830 + }, + { + "epoch": 0.2336809188863795, + "grad_norm": 181.16397094726562, + "learning_rate": 6.421851503872807e-06, + "loss": 42.3401, + "step": 57840 + }, + { + "epoch": 0.23372132015174715, + "grad_norm": 438.2908935546875, + "learning_rate": 6.42049930793401e-06, + "loss": 58.2065, + "step": 57850 + }, + { + "epoch": 0.2337617214171148, + "grad_norm": 865.8299560546875, + "learning_rate": 6.419146998972602e-06, + "loss": 74.7755, + "step": 57860 + }, + { + "epoch": 0.2338021226824824, + "grad_norm": 399.0161437988281, + "learning_rate": 6.417794577096179e-06, + "loss": 41.3667, + "step": 57870 + }, + { + "epoch": 0.23384252394785005, + "grad_norm": 1323.824951171875, + "learning_rate": 6.41644204241235e-06, + "loss": 48.4313, + "step": 57880 + }, + { + "epoch": 0.23388292521321769, + "grad_norm": 1061.3271484375, + "learning_rate": 6.4150893950287275e-06, + "loss": 61.6943, + "step": 57890 + }, + { + "epoch": 0.2339233264785853, + "grad_norm": 631.509521484375, + "learning_rate": 6.413736635052936e-06, + "loss": 39.0582, + "step": 57900 + }, + { + "epoch": 0.23396372774395294, + "grad_norm": 904.318359375, + "learning_rate": 6.41238376259261e-06, + "loss": 46.4853, + "step": 57910 + }, + { + "epoch": 0.23400412900932058, + "grad_norm": 380.4770812988281, + "learning_rate": 6.411030777755389e-06, + "loss": 53.1108, + "step": 57920 + }, + { + "epoch": 0.2340445302746882, + "grad_norm": 1202.594970703125, + "learning_rate": 6.409677680648925e-06, + "loss": 68.2252, + "step": 57930 + }, + { + "epoch": 0.23408493154005583, + "grad_norm": 1343.1907958984375, + "learning_rate": 6.4083244713808765e-06, + "loss": 32.3128, + "step": 57940 + }, + { + "epoch": 0.23412533280542347, + "grad_norm": 413.1898498535156, + "learning_rate": 6.406971150058914e-06, + "loss": 37.7907, + "step": 57950 + }, + { + "epoch": 0.2341657340707911, + "grad_norm": 864.8408813476562, + "learning_rate": 6.405617716790714e-06, + "loss": 37.8921, + "step": 57960 + }, + { + "epoch": 0.23420613533615872, + "grad_norm": 854.4756469726562, + "learning_rate": 6.404264171683965e-06, + "loss": 47.9397, + "step": 57970 + }, + { + "epoch": 0.23424653660152636, + "grad_norm": 743.707275390625, + "learning_rate": 6.402910514846358e-06, + "loss": 30.2608, + "step": 57980 + }, + { + "epoch": 0.234286937866894, + "grad_norm": 734.5051879882812, + "learning_rate": 6.4015567463856e-06, + "loss": 41.0997, + "step": 57990 + }, + { + "epoch": 0.23432733913226161, + "grad_norm": 744.6089477539062, + "learning_rate": 6.400202866409405e-06, + "loss": 44.8084, + "step": 58000 + }, + { + "epoch": 0.23436774039762925, + "grad_norm": 631.4942626953125, + "learning_rate": 6.398848875025494e-06, + "loss": 41.5169, + "step": 58010 + }, + { + "epoch": 0.2344081416629969, + "grad_norm": 924.2283325195312, + "learning_rate": 6.3974947723415985e-06, + "loss": 57.7236, + "step": 58020 + }, + { + "epoch": 0.2344485429283645, + "grad_norm": 748.7590942382812, + "learning_rate": 6.396140558465456e-06, + "loss": 39.0069, + "step": 58030 + }, + { + "epoch": 0.23448894419373215, + "grad_norm": 1556.5047607421875, + "learning_rate": 6.394786233504816e-06, + "loss": 67.497, + "step": 58040 + }, + { + "epoch": 0.2345293454590998, + "grad_norm": 560.0980834960938, + "learning_rate": 6.39343179756744e-06, + "loss": 36.9168, + "step": 58050 + }, + { + "epoch": 0.2345697467244674, + "grad_norm": 695.441650390625, + "learning_rate": 6.392077250761088e-06, + "loss": 43.3128, + "step": 58060 + }, + { + "epoch": 0.23461014798983504, + "grad_norm": 722.3070678710938, + "learning_rate": 6.390722593193538e-06, + "loss": 53.5355, + "step": 58070 + }, + { + "epoch": 0.23465054925520268, + "grad_norm": 390.59124755859375, + "learning_rate": 6.389367824972575e-06, + "loss": 52.2937, + "step": 58080 + }, + { + "epoch": 0.2346909505205703, + "grad_norm": 448.1783142089844, + "learning_rate": 6.388012946205991e-06, + "loss": 44.6697, + "step": 58090 + }, + { + "epoch": 0.23473135178593793, + "grad_norm": 252.83447265625, + "learning_rate": 6.386657957001585e-06, + "loss": 43.3268, + "step": 58100 + }, + { + "epoch": 0.23477175305130557, + "grad_norm": 572.3157348632812, + "learning_rate": 6.38530285746717e-06, + "loss": 32.4306, + "step": 58110 + }, + { + "epoch": 0.2348121543166732, + "grad_norm": 326.7099609375, + "learning_rate": 6.383947647710565e-06, + "loss": 30.3495, + "step": 58120 + }, + { + "epoch": 0.23485255558204082, + "grad_norm": 636.6470336914062, + "learning_rate": 6.382592327839596e-06, + "loss": 34.1573, + "step": 58130 + }, + { + "epoch": 0.23489295684740846, + "grad_norm": 648.021484375, + "learning_rate": 6.381236897962102e-06, + "loss": 45.2606, + "step": 58140 + }, + { + "epoch": 0.2349333581127761, + "grad_norm": 3382.697998046875, + "learning_rate": 6.379881358185926e-06, + "loss": 48.3623, + "step": 58150 + }, + { + "epoch": 0.23497375937814372, + "grad_norm": 376.2268981933594, + "learning_rate": 6.378525708618924e-06, + "loss": 43.6891, + "step": 58160 + }, + { + "epoch": 0.23501416064351136, + "grad_norm": 846.15625, + "learning_rate": 6.377169949368956e-06, + "loss": 55.0067, + "step": 58170 + }, + { + "epoch": 0.235054561908879, + "grad_norm": 336.41802978515625, + "learning_rate": 6.375814080543899e-06, + "loss": 33.695, + "step": 58180 + }, + { + "epoch": 0.2350949631742466, + "grad_norm": 436.46197509765625, + "learning_rate": 6.3744581022516285e-06, + "loss": 242.9059, + "step": 58190 + }, + { + "epoch": 0.23513536443961425, + "grad_norm": 740.7402954101562, + "learning_rate": 6.373102014600033e-06, + "loss": 43.7324, + "step": 58200 + }, + { + "epoch": 0.2351757657049819, + "grad_norm": 866.8933715820312, + "learning_rate": 6.371745817697012e-06, + "loss": 49.1007, + "step": 58210 + }, + { + "epoch": 0.2352161669703495, + "grad_norm": 681.402099609375, + "learning_rate": 6.370389511650474e-06, + "loss": 48.2255, + "step": 58220 + }, + { + "epoch": 0.23525656823571714, + "grad_norm": 598.37255859375, + "learning_rate": 6.3690330965683304e-06, + "loss": 43.6901, + "step": 58230 + }, + { + "epoch": 0.23529696950108478, + "grad_norm": 616.9476928710938, + "learning_rate": 6.367676572558506e-06, + "loss": 60.4343, + "step": 58240 + }, + { + "epoch": 0.2353373707664524, + "grad_norm": 881.7716674804688, + "learning_rate": 6.366319939728934e-06, + "loss": 114.1712, + "step": 58250 + }, + { + "epoch": 0.23537777203182003, + "grad_norm": 505.7012023925781, + "learning_rate": 6.364963198187555e-06, + "loss": 54.7326, + "step": 58260 + }, + { + "epoch": 0.23541817329718767, + "grad_norm": 464.7174072265625, + "learning_rate": 6.363606348042318e-06, + "loss": 38.2772, + "step": 58270 + }, + { + "epoch": 0.2354585745625553, + "grad_norm": 602.1082763671875, + "learning_rate": 6.362249389401183e-06, + "loss": 37.5626, + "step": 58280 + }, + { + "epoch": 0.23549897582792292, + "grad_norm": 578.7637939453125, + "learning_rate": 6.360892322372115e-06, + "loss": 46.3746, + "step": 58290 + }, + { + "epoch": 0.23553937709329056, + "grad_norm": 298.9007873535156, + "learning_rate": 6.359535147063092e-06, + "loss": 48.7875, + "step": 58300 + }, + { + "epoch": 0.2355797783586582, + "grad_norm": 433.61517333984375, + "learning_rate": 6.358177863582095e-06, + "loss": 41.4706, + "step": 58310 + }, + { + "epoch": 0.23562017962402582, + "grad_norm": 636.701416015625, + "learning_rate": 6.35682047203712e-06, + "loss": 37.8823, + "step": 58320 + }, + { + "epoch": 0.23566058088939346, + "grad_norm": 681.9437255859375, + "learning_rate": 6.355462972536166e-06, + "loss": 49.5436, + "step": 58330 + }, + { + "epoch": 0.2357009821547611, + "grad_norm": 658.2206420898438, + "learning_rate": 6.354105365187244e-06, + "loss": 46.6898, + "step": 58340 + }, + { + "epoch": 0.2357413834201287, + "grad_norm": 502.2849426269531, + "learning_rate": 6.352747650098373e-06, + "loss": 40.1756, + "step": 58350 + }, + { + "epoch": 0.23578178468549635, + "grad_norm": 2149.325927734375, + "learning_rate": 6.35138982737758e-06, + "loss": 61.8135, + "step": 58360 + }, + { + "epoch": 0.235822185950864, + "grad_norm": 403.00921630859375, + "learning_rate": 6.3500318971329e-06, + "loss": 40.8118, + "step": 58370 + }, + { + "epoch": 0.2358625872162316, + "grad_norm": 667.1368408203125, + "learning_rate": 6.348673859472378e-06, + "loss": 52.6098, + "step": 58380 + }, + { + "epoch": 0.23590298848159924, + "grad_norm": 488.9213562011719, + "learning_rate": 6.347315714504066e-06, + "loss": 44.4715, + "step": 58390 + }, + { + "epoch": 0.23594338974696688, + "grad_norm": 560.7942504882812, + "learning_rate": 6.345957462336026e-06, + "loss": 62.3928, + "step": 58400 + }, + { + "epoch": 0.2359837910123345, + "grad_norm": 658.2381591796875, + "learning_rate": 6.344599103076329e-06, + "loss": 47.269, + "step": 58410 + }, + { + "epoch": 0.23602419227770213, + "grad_norm": 415.65850830078125, + "learning_rate": 6.343240636833051e-06, + "loss": 47.3057, + "step": 58420 + }, + { + "epoch": 0.23606459354306977, + "grad_norm": 1434.79150390625, + "learning_rate": 6.341882063714282e-06, + "loss": 50.7882, + "step": 58430 + }, + { + "epoch": 0.2361049948084374, + "grad_norm": 574.5676879882812, + "learning_rate": 6.340523383828115e-06, + "loss": 44.911, + "step": 58440 + }, + { + "epoch": 0.23614539607380503, + "grad_norm": 289.6334533691406, + "learning_rate": 6.339164597282652e-06, + "loss": 41.7514, + "step": 58450 + }, + { + "epoch": 0.23618579733917267, + "grad_norm": 990.1171264648438, + "learning_rate": 6.337805704186011e-06, + "loss": 59.5212, + "step": 58460 + }, + { + "epoch": 0.2362261986045403, + "grad_norm": 394.655029296875, + "learning_rate": 6.336446704646307e-06, + "loss": 41.2014, + "step": 58470 + }, + { + "epoch": 0.23626659986990792, + "grad_norm": 407.64886474609375, + "learning_rate": 6.335087598771676e-06, + "loss": 34.4346, + "step": 58480 + }, + { + "epoch": 0.23630700113527556, + "grad_norm": 199.44761657714844, + "learning_rate": 6.333728386670249e-06, + "loss": 40.7228, + "step": 58490 + }, + { + "epoch": 0.2363474024006432, + "grad_norm": 615.704345703125, + "learning_rate": 6.332369068450175e-06, + "loss": 46.4799, + "step": 58500 + }, + { + "epoch": 0.2363878036660108, + "grad_norm": 356.4165954589844, + "learning_rate": 6.33100964421961e-06, + "loss": 49.1076, + "step": 58510 + }, + { + "epoch": 0.23642820493137845, + "grad_norm": 745.1015625, + "learning_rate": 6.329650114086717e-06, + "loss": 39.9381, + "step": 58520 + }, + { + "epoch": 0.2364686061967461, + "grad_norm": 222.75173950195312, + "learning_rate": 6.328290478159666e-06, + "loss": 56.0289, + "step": 58530 + }, + { + "epoch": 0.2365090074621137, + "grad_norm": 823.6661987304688, + "learning_rate": 6.326930736546637e-06, + "loss": 52.2594, + "step": 58540 + }, + { + "epoch": 0.23654940872748134, + "grad_norm": 817.8341674804688, + "learning_rate": 6.325570889355819e-06, + "loss": 38.3977, + "step": 58550 + }, + { + "epoch": 0.23658980999284898, + "grad_norm": 1076.7332763671875, + "learning_rate": 6.32421093669541e-06, + "loss": 60.3006, + "step": 58560 + }, + { + "epoch": 0.2366302112582166, + "grad_norm": 1023.4537353515625, + "learning_rate": 6.322850878673614e-06, + "loss": 55.8991, + "step": 58570 + }, + { + "epoch": 0.23667061252358423, + "grad_norm": 495.56573486328125, + "learning_rate": 6.321490715398644e-06, + "loss": 48.1315, + "step": 58580 + }, + { + "epoch": 0.23671101378895187, + "grad_norm": 806.6759033203125, + "learning_rate": 6.320130446978722e-06, + "loss": 74.4598, + "step": 58590 + }, + { + "epoch": 0.23675141505431951, + "grad_norm": 673.5408935546875, + "learning_rate": 6.31877007352208e-06, + "loss": 51.3672, + "step": 58600 + }, + { + "epoch": 0.23679181631968713, + "grad_norm": 794.4425659179688, + "learning_rate": 6.317409595136956e-06, + "loss": 34.9962, + "step": 58610 + }, + { + "epoch": 0.23683221758505477, + "grad_norm": 905.353271484375, + "learning_rate": 6.316049011931595e-06, + "loss": 53.9744, + "step": 58620 + }, + { + "epoch": 0.2368726188504224, + "grad_norm": 524.4614868164062, + "learning_rate": 6.314688324014255e-06, + "loss": 48.7819, + "step": 58630 + }, + { + "epoch": 0.23691302011579002, + "grad_norm": 865.238037109375, + "learning_rate": 6.3133275314931995e-06, + "loss": 37.5079, + "step": 58640 + }, + { + "epoch": 0.23695342138115766, + "grad_norm": 643.4755249023438, + "learning_rate": 6.311966634476698e-06, + "loss": 55.7277, + "step": 58650 + }, + { + "epoch": 0.2369938226465253, + "grad_norm": 467.0180358886719, + "learning_rate": 6.3106056330730335e-06, + "loss": 41.7435, + "step": 58660 + }, + { + "epoch": 0.2370342239118929, + "grad_norm": 504.303955078125, + "learning_rate": 6.309244527390493e-06, + "loss": 35.1575, + "step": 58670 + }, + { + "epoch": 0.23707462517726055, + "grad_norm": 573.1863403320312, + "learning_rate": 6.307883317537375e-06, + "loss": 45.4351, + "step": 58680 + }, + { + "epoch": 0.2371150264426282, + "grad_norm": 567.236328125, + "learning_rate": 6.306522003621983e-06, + "loss": 38.8636, + "step": 58690 + }, + { + "epoch": 0.2371554277079958, + "grad_norm": 846.465087890625, + "learning_rate": 6.305160585752632e-06, + "loss": 33.141, + "step": 58700 + }, + { + "epoch": 0.23719582897336344, + "grad_norm": 710.2720947265625, + "learning_rate": 6.303799064037643e-06, + "loss": 38.8927, + "step": 58710 + }, + { + "epoch": 0.23723623023873108, + "grad_norm": 320.60308837890625, + "learning_rate": 6.302437438585345e-06, + "loss": 35.5578, + "step": 58720 + }, + { + "epoch": 0.2372766315040987, + "grad_norm": 1115.669189453125, + "learning_rate": 6.301075709504077e-06, + "loss": 64.423, + "step": 58730 + }, + { + "epoch": 0.23731703276946634, + "grad_norm": 828.960205078125, + "learning_rate": 6.299713876902188e-06, + "loss": 59.0923, + "step": 58740 + }, + { + "epoch": 0.23735743403483398, + "grad_norm": 619.6551513671875, + "learning_rate": 6.29835194088803e-06, + "loss": 42.279, + "step": 58750 + }, + { + "epoch": 0.23739783530020162, + "grad_norm": 628.1817016601562, + "learning_rate": 6.296989901569966e-06, + "loss": 59.1546, + "step": 58760 + }, + { + "epoch": 0.23743823656556923, + "grad_norm": 412.5716857910156, + "learning_rate": 6.295627759056368e-06, + "loss": 42.7949, + "step": 58770 + }, + { + "epoch": 0.23747863783093687, + "grad_norm": 380.8799743652344, + "learning_rate": 6.294265513455616e-06, + "loss": 37.1053, + "step": 58780 + }, + { + "epoch": 0.2375190390963045, + "grad_norm": 796.0271606445312, + "learning_rate": 6.292903164876097e-06, + "loss": 45.2027, + "step": 58790 + }, + { + "epoch": 0.23755944036167212, + "grad_norm": 669.234619140625, + "learning_rate": 6.291540713426206e-06, + "loss": 47.7954, + "step": 58800 + }, + { + "epoch": 0.23759984162703976, + "grad_norm": 907.8953857421875, + "learning_rate": 6.290178159214349e-06, + "loss": 54.457, + "step": 58810 + }, + { + "epoch": 0.2376402428924074, + "grad_norm": 587.7103881835938, + "learning_rate": 6.288815502348935e-06, + "loss": 36.6312, + "step": 58820 + }, + { + "epoch": 0.237680644157775, + "grad_norm": 594.3585815429688, + "learning_rate": 6.287452742938388e-06, + "loss": 48.9698, + "step": 58830 + }, + { + "epoch": 0.23772104542314265, + "grad_norm": 800.2922973632812, + "learning_rate": 6.286089881091134e-06, + "loss": 48.804, + "step": 58840 + }, + { + "epoch": 0.2377614466885103, + "grad_norm": 101.07825469970703, + "learning_rate": 6.284726916915611e-06, + "loss": 37.5152, + "step": 58850 + }, + { + "epoch": 0.2378018479538779, + "grad_norm": 333.7406005859375, + "learning_rate": 6.2833638505202635e-06, + "loss": 45.3329, + "step": 58860 + }, + { + "epoch": 0.23784224921924554, + "grad_norm": 613.3673095703125, + "learning_rate": 6.282000682013545e-06, + "loss": 45.9315, + "step": 58870 + }, + { + "epoch": 0.23788265048461318, + "grad_norm": 387.0548400878906, + "learning_rate": 6.280637411503913e-06, + "loss": 40.9148, + "step": 58880 + }, + { + "epoch": 0.2379230517499808, + "grad_norm": 867.9426879882812, + "learning_rate": 6.279274039099842e-06, + "loss": 30.7292, + "step": 58890 + }, + { + "epoch": 0.23796345301534844, + "grad_norm": 942.0332641601562, + "learning_rate": 6.277910564909806e-06, + "loss": 48.0671, + "step": 58900 + }, + { + "epoch": 0.23800385428071608, + "grad_norm": 1058.768798828125, + "learning_rate": 6.276546989042292e-06, + "loss": 49.5743, + "step": 58910 + }, + { + "epoch": 0.23804425554608372, + "grad_norm": 559.1909790039062, + "learning_rate": 6.275183311605793e-06, + "loss": 33.5974, + "step": 58920 + }, + { + "epoch": 0.23808465681145133, + "grad_norm": 642.1489868164062, + "learning_rate": 6.273819532708807e-06, + "loss": 47.9694, + "step": 58930 + }, + { + "epoch": 0.23812505807681897, + "grad_norm": 478.4878234863281, + "learning_rate": 6.27245565245985e-06, + "loss": 40.388, + "step": 58940 + }, + { + "epoch": 0.2381654593421866, + "grad_norm": 506.3686218261719, + "learning_rate": 6.271091670967437e-06, + "loss": 42.4386, + "step": 58950 + }, + { + "epoch": 0.23820586060755422, + "grad_norm": 899.9596557617188, + "learning_rate": 6.269727588340091e-06, + "loss": 39.7451, + "step": 58960 + }, + { + "epoch": 0.23824626187292186, + "grad_norm": 438.31927490234375, + "learning_rate": 6.268363404686348e-06, + "loss": 44.1548, + "step": 58970 + }, + { + "epoch": 0.2382866631382895, + "grad_norm": 428.0214538574219, + "learning_rate": 6.26699912011475e-06, + "loss": 43.6406, + "step": 58980 + }, + { + "epoch": 0.2383270644036571, + "grad_norm": 491.67291259765625, + "learning_rate": 6.265634734733848e-06, + "loss": 72.9044, + "step": 58990 + }, + { + "epoch": 0.23836746566902475, + "grad_norm": 1070.6533203125, + "learning_rate": 6.264270248652199e-06, + "loss": 61.3305, + "step": 59000 + }, + { + "epoch": 0.2384078669343924, + "grad_norm": 303.5849609375, + "learning_rate": 6.262905661978367e-06, + "loss": 44.1885, + "step": 59010 + }, + { + "epoch": 0.23844826819976, + "grad_norm": 756.376708984375, + "learning_rate": 6.261540974820928e-06, + "loss": 31.1812, + "step": 59020 + }, + { + "epoch": 0.23848866946512765, + "grad_norm": 976.0094604492188, + "learning_rate": 6.260176187288463e-06, + "loss": 34.7344, + "step": 59030 + }, + { + "epoch": 0.23852907073049529, + "grad_norm": 488.3143005371094, + "learning_rate": 6.2588112994895636e-06, + "loss": 47.71, + "step": 59040 + }, + { + "epoch": 0.2385694719958629, + "grad_norm": 646.7849731445312, + "learning_rate": 6.257446311532824e-06, + "loss": 43.175, + "step": 59050 + }, + { + "epoch": 0.23860987326123054, + "grad_norm": 594.6917114257812, + "learning_rate": 6.256081223526854e-06, + "loss": 34.8907, + "step": 59060 + }, + { + "epoch": 0.23865027452659818, + "grad_norm": 275.3476867675781, + "learning_rate": 6.254716035580264e-06, + "loss": 36.1814, + "step": 59070 + }, + { + "epoch": 0.23869067579196582, + "grad_norm": 343.4564514160156, + "learning_rate": 6.25335074780168e-06, + "loss": 54.2813, + "step": 59080 + }, + { + "epoch": 0.23873107705733343, + "grad_norm": 504.92205810546875, + "learning_rate": 6.251985360299728e-06, + "loss": 39.9313, + "step": 59090 + }, + { + "epoch": 0.23877147832270107, + "grad_norm": 794.7259521484375, + "learning_rate": 6.250619873183046e-06, + "loss": 52.7031, + "step": 59100 + }, + { + "epoch": 0.2388118795880687, + "grad_norm": 610.1290283203125, + "learning_rate": 6.249254286560281e-06, + "loss": 37.6571, + "step": 59110 + }, + { + "epoch": 0.23885228085343632, + "grad_norm": 411.7903137207031, + "learning_rate": 6.247888600540084e-06, + "loss": 72.4403, + "step": 59120 + }, + { + "epoch": 0.23889268211880396, + "grad_norm": 479.38494873046875, + "learning_rate": 6.246522815231121e-06, + "loss": 60.42, + "step": 59130 + }, + { + "epoch": 0.2389330833841716, + "grad_norm": 504.35345458984375, + "learning_rate": 6.245156930742057e-06, + "loss": 42.7403, + "step": 59140 + }, + { + "epoch": 0.23897348464953921, + "grad_norm": 419.8155517578125, + "learning_rate": 6.24379094718157e-06, + "loss": 52.5672, + "step": 59150 + }, + { + "epoch": 0.23901388591490685, + "grad_norm": 340.598388671875, + "learning_rate": 6.2424248646583455e-06, + "loss": 39.9453, + "step": 59160 + }, + { + "epoch": 0.2390542871802745, + "grad_norm": 142.64559936523438, + "learning_rate": 6.241058683281077e-06, + "loss": 44.7031, + "step": 59170 + }, + { + "epoch": 0.2390946884456421, + "grad_norm": 685.390380859375, + "learning_rate": 6.239692403158465e-06, + "loss": 27.1027, + "step": 59180 + }, + { + "epoch": 0.23913508971100975, + "grad_norm": 920.209228515625, + "learning_rate": 6.238326024399217e-06, + "loss": 49.9231, + "step": 59190 + }, + { + "epoch": 0.2391754909763774, + "grad_norm": 424.9049377441406, + "learning_rate": 6.236959547112051e-06, + "loss": 45.3293, + "step": 59200 + }, + { + "epoch": 0.239215892241745, + "grad_norm": 1177.8206787109375, + "learning_rate": 6.235592971405691e-06, + "loss": 46.2852, + "step": 59210 + }, + { + "epoch": 0.23925629350711264, + "grad_norm": 496.20458984375, + "learning_rate": 6.234226297388869e-06, + "loss": 39.4153, + "step": 59220 + }, + { + "epoch": 0.23929669477248028, + "grad_norm": 635.3526000976562, + "learning_rate": 6.232859525170324e-06, + "loss": 36.4867, + "step": 59230 + }, + { + "epoch": 0.23933709603784792, + "grad_norm": 697.213134765625, + "learning_rate": 6.231492654858805e-06, + "loss": 60.3189, + "step": 59240 + }, + { + "epoch": 0.23937749730321553, + "grad_norm": 347.87237548828125, + "learning_rate": 6.230125686563068e-06, + "loss": 52.1539, + "step": 59250 + }, + { + "epoch": 0.23941789856858317, + "grad_norm": 338.2275085449219, + "learning_rate": 6.2287586203918745e-06, + "loss": 35.6523, + "step": 59260 + }, + { + "epoch": 0.2394582998339508, + "grad_norm": 843.0849609375, + "learning_rate": 6.227391456453997e-06, + "loss": 37.2032, + "step": 59270 + }, + { + "epoch": 0.23949870109931842, + "grad_norm": 558.8504638671875, + "learning_rate": 6.226024194858214e-06, + "loss": 39.231, + "step": 59280 + }, + { + "epoch": 0.23953910236468606, + "grad_norm": 1432.7646484375, + "learning_rate": 6.224656835713313e-06, + "loss": 34.3905, + "step": 59290 + }, + { + "epoch": 0.2395795036300537, + "grad_norm": 1002.6559448242188, + "learning_rate": 6.223289379128088e-06, + "loss": 46.1089, + "step": 59300 + }, + { + "epoch": 0.23961990489542132, + "grad_norm": 232.8958740234375, + "learning_rate": 6.221921825211342e-06, + "loss": 51.2115, + "step": 59310 + }, + { + "epoch": 0.23966030616078896, + "grad_norm": 811.0142822265625, + "learning_rate": 6.220554174071884e-06, + "loss": 49.028, + "step": 59320 + }, + { + "epoch": 0.2397007074261566, + "grad_norm": 1509.3494873046875, + "learning_rate": 6.219186425818531e-06, + "loss": 50.5631, + "step": 59330 + }, + { + "epoch": 0.2397411086915242, + "grad_norm": 602.2036743164062, + "learning_rate": 6.217818580560111e-06, + "loss": 56.5959, + "step": 59340 + }, + { + "epoch": 0.23978150995689185, + "grad_norm": 474.7471618652344, + "learning_rate": 6.216450638405454e-06, + "loss": 43.5263, + "step": 59350 + }, + { + "epoch": 0.2398219112222595, + "grad_norm": 537.8904418945312, + "learning_rate": 6.2150825994634025e-06, + "loss": 43.5742, + "step": 59360 + }, + { + "epoch": 0.2398623124876271, + "grad_norm": 1052.5858154296875, + "learning_rate": 6.2137144638428045e-06, + "loss": 60.8518, + "step": 59370 + }, + { + "epoch": 0.23990271375299474, + "grad_norm": 774.1162719726562, + "learning_rate": 6.21234623165252e-06, + "loss": 40.4853, + "step": 59380 + }, + { + "epoch": 0.23994311501836238, + "grad_norm": 553.7500610351562, + "learning_rate": 6.210977903001406e-06, + "loss": 47.8429, + "step": 59390 + }, + { + "epoch": 0.23998351628373002, + "grad_norm": 465.7264709472656, + "learning_rate": 6.209609477998339e-06, + "loss": 51.5527, + "step": 59400 + }, + { + "epoch": 0.24002391754909763, + "grad_norm": 535.73779296875, + "learning_rate": 6.2082409567521975e-06, + "loss": 32.3945, + "step": 59410 + }, + { + "epoch": 0.24006431881446527, + "grad_norm": 346.88336181640625, + "learning_rate": 6.206872339371867e-06, + "loss": 44.4696, + "step": 59420 + }, + { + "epoch": 0.2401047200798329, + "grad_norm": 839.6456909179688, + "learning_rate": 6.205503625966247e-06, + "loss": 43.1945, + "step": 59430 + }, + { + "epoch": 0.24014512134520052, + "grad_norm": 419.2178039550781, + "learning_rate": 6.204134816644233e-06, + "loss": 44.3108, + "step": 59440 + }, + { + "epoch": 0.24018552261056816, + "grad_norm": 528.9591064453125, + "learning_rate": 6.2027659115147375e-06, + "loss": 30.9298, + "step": 59450 + }, + { + "epoch": 0.2402259238759358, + "grad_norm": 719.5377197265625, + "learning_rate": 6.201396910686679e-06, + "loss": 44.7331, + "step": 59460 + }, + { + "epoch": 0.24026632514130342, + "grad_norm": 370.1326599121094, + "learning_rate": 6.200027814268984e-06, + "loss": 31.5706, + "step": 59470 + }, + { + "epoch": 0.24030672640667106, + "grad_norm": 719.8978881835938, + "learning_rate": 6.198658622370582e-06, + "loss": 41.7919, + "step": 59480 + }, + { + "epoch": 0.2403471276720387, + "grad_norm": 727.5550537109375, + "learning_rate": 6.197289335100412e-06, + "loss": 46.8141, + "step": 59490 + }, + { + "epoch": 0.2403875289374063, + "grad_norm": 708.8463134765625, + "learning_rate": 6.195919952567426e-06, + "loss": 50.2198, + "step": 59500 + }, + { + "epoch": 0.24042793020277395, + "grad_norm": 681.5879516601562, + "learning_rate": 6.194550474880579e-06, + "loss": 48.9477, + "step": 59510 + }, + { + "epoch": 0.2404683314681416, + "grad_norm": 456.62774658203125, + "learning_rate": 6.193180902148833e-06, + "loss": 32.21, + "step": 59520 + }, + { + "epoch": 0.2405087327335092, + "grad_norm": 308.8553466796875, + "learning_rate": 6.1918112344811575e-06, + "loss": 48.589, + "step": 59530 + }, + { + "epoch": 0.24054913399887684, + "grad_norm": 372.4393005371094, + "learning_rate": 6.190441471986533e-06, + "loss": 45.7649, + "step": 59540 + }, + { + "epoch": 0.24058953526424448, + "grad_norm": 452.86187744140625, + "learning_rate": 6.18907161477394e-06, + "loss": 51.8513, + "step": 59550 + }, + { + "epoch": 0.24062993652961212, + "grad_norm": 409.22991943359375, + "learning_rate": 6.187701662952381e-06, + "loss": 32.4357, + "step": 59560 + }, + { + "epoch": 0.24067033779497973, + "grad_norm": 589.134521484375, + "learning_rate": 6.18633161663085e-06, + "loss": 51.7794, + "step": 59570 + }, + { + "epoch": 0.24071073906034737, + "grad_norm": 250.7372283935547, + "learning_rate": 6.184961475918355e-06, + "loss": 36.2709, + "step": 59580 + }, + { + "epoch": 0.240751140325715, + "grad_norm": 337.1333312988281, + "learning_rate": 6.183591240923914e-06, + "loss": 43.1022, + "step": 59590 + }, + { + "epoch": 0.24079154159108263, + "grad_norm": 515.5701293945312, + "learning_rate": 6.182220911756551e-06, + "loss": 36.8048, + "step": 59600 + }, + { + "epoch": 0.24083194285645027, + "grad_norm": 393.62774658203125, + "learning_rate": 6.1808504885252955e-06, + "loss": 58.8733, + "step": 59610 + }, + { + "epoch": 0.2408723441218179, + "grad_norm": 486.2384033203125, + "learning_rate": 6.179479971339186e-06, + "loss": 49.6456, + "step": 59620 + }, + { + "epoch": 0.24091274538718552, + "grad_norm": 244.34669494628906, + "learning_rate": 6.178109360307267e-06, + "loss": 35.7055, + "step": 59630 + }, + { + "epoch": 0.24095314665255316, + "grad_norm": 892.8306274414062, + "learning_rate": 6.176738655538594e-06, + "loss": 45.6613, + "step": 59640 + }, + { + "epoch": 0.2409935479179208, + "grad_norm": 196.5320281982422, + "learning_rate": 6.175367857142227e-06, + "loss": 46.8177, + "step": 59650 + }, + { + "epoch": 0.2410339491832884, + "grad_norm": 404.7183532714844, + "learning_rate": 6.173996965227234e-06, + "loss": 50.3921, + "step": 59660 + }, + { + "epoch": 0.24107435044865605, + "grad_norm": 592.3640747070312, + "learning_rate": 6.17262597990269e-06, + "loss": 36.2447, + "step": 59670 + }, + { + "epoch": 0.2411147517140237, + "grad_norm": 379.3549499511719, + "learning_rate": 6.171254901277678e-06, + "loss": 36.7108, + "step": 59680 + }, + { + "epoch": 0.2411551529793913, + "grad_norm": 521.3427734375, + "learning_rate": 6.169883729461289e-06, + "loss": 52.4567, + "step": 59690 + }, + { + "epoch": 0.24119555424475894, + "grad_norm": 661.0156860351562, + "learning_rate": 6.16851246456262e-06, + "loss": 28.96, + "step": 59700 + }, + { + "epoch": 0.24123595551012658, + "grad_norm": 732.6842651367188, + "learning_rate": 6.167141106690778e-06, + "loss": 55.4517, + "step": 59710 + }, + { + "epoch": 0.24127635677549422, + "grad_norm": 612.0284423828125, + "learning_rate": 6.1657696559548755e-06, + "loss": 49.9304, + "step": 59720 + }, + { + "epoch": 0.24131675804086183, + "grad_norm": 416.08673095703125, + "learning_rate": 6.16439811246403e-06, + "loss": 50.356, + "step": 59730 + }, + { + "epoch": 0.24135715930622947, + "grad_norm": 1435.585693359375, + "learning_rate": 6.163026476327371e-06, + "loss": 48.206, + "step": 59740 + }, + { + "epoch": 0.24139756057159711, + "grad_norm": 472.382568359375, + "learning_rate": 6.161654747654033e-06, + "loss": 40.12, + "step": 59750 + }, + { + "epoch": 0.24143796183696473, + "grad_norm": 170.5372314453125, + "learning_rate": 6.1602829265531585e-06, + "loss": 30.2096, + "step": 59760 + }, + { + "epoch": 0.24147836310233237, + "grad_norm": 429.81610107421875, + "learning_rate": 6.158911013133896e-06, + "loss": 54.4487, + "step": 59770 + }, + { + "epoch": 0.2415187643677, + "grad_norm": 324.32037353515625, + "learning_rate": 6.157539007505402e-06, + "loss": 62.2513, + "step": 59780 + }, + { + "epoch": 0.24155916563306762, + "grad_norm": 808.5761108398438, + "learning_rate": 6.156166909776842e-06, + "loss": 51.42, + "step": 59790 + }, + { + "epoch": 0.24159956689843526, + "grad_norm": 782.04296875, + "learning_rate": 6.154794720057388e-06, + "loss": 40.6022, + "step": 59800 + }, + { + "epoch": 0.2416399681638029, + "grad_norm": 1242.795654296875, + "learning_rate": 6.153422438456218e-06, + "loss": 47.4253, + "step": 59810 + }, + { + "epoch": 0.2416803694291705, + "grad_norm": 647.463623046875, + "learning_rate": 6.1520500650825175e-06, + "loss": 28.0886, + "step": 59820 + }, + { + "epoch": 0.24172077069453815, + "grad_norm": 2162.615234375, + "learning_rate": 6.150677600045479e-06, + "loss": 84.5893, + "step": 59830 + }, + { + "epoch": 0.2417611719599058, + "grad_norm": 999.169677734375, + "learning_rate": 6.1493050434543065e-06, + "loss": 48.0991, + "step": 59840 + }, + { + "epoch": 0.2418015732252734, + "grad_norm": 583.0239868164062, + "learning_rate": 6.1479323954182055e-06, + "loss": 44.8052, + "step": 59850 + }, + { + "epoch": 0.24184197449064104, + "grad_norm": 596.6414184570312, + "learning_rate": 6.146559656046394e-06, + "loss": 55.0618, + "step": 59860 + }, + { + "epoch": 0.24188237575600868, + "grad_norm": 850.3500366210938, + "learning_rate": 6.1451868254480914e-06, + "loss": 51.9113, + "step": 59870 + }, + { + "epoch": 0.24192277702137632, + "grad_norm": 854.9940185546875, + "learning_rate": 6.143813903732527e-06, + "loss": 45.9024, + "step": 59880 + }, + { + "epoch": 0.24196317828674394, + "grad_norm": 819.890869140625, + "learning_rate": 6.142440891008941e-06, + "loss": 53.9581, + "step": 59890 + }, + { + "epoch": 0.24200357955211158, + "grad_norm": 587.396728515625, + "learning_rate": 6.141067787386579e-06, + "loss": 54.0361, + "step": 59900 + }, + { + "epoch": 0.24204398081747922, + "grad_norm": 547.8096923828125, + "learning_rate": 6.139694592974687e-06, + "loss": 45.5808, + "step": 59910 + }, + { + "epoch": 0.24208438208284683, + "grad_norm": 634.807373046875, + "learning_rate": 6.1383213078825275e-06, + "loss": 58.0814, + "step": 59920 + }, + { + "epoch": 0.24212478334821447, + "grad_norm": 718.9409790039062, + "learning_rate": 6.136947932219365e-06, + "loss": 43.585, + "step": 59930 + }, + { + "epoch": 0.2421651846135821, + "grad_norm": 689.8276977539062, + "learning_rate": 6.135574466094475e-06, + "loss": 53.0661, + "step": 59940 + }, + { + "epoch": 0.24220558587894972, + "grad_norm": 813.9505615234375, + "learning_rate": 6.134200909617135e-06, + "loss": 48.7048, + "step": 59950 + }, + { + "epoch": 0.24224598714431736, + "grad_norm": 703.8113403320312, + "learning_rate": 6.132827262896634e-06, + "loss": 55.3374, + "step": 59960 + }, + { + "epoch": 0.242286388409685, + "grad_norm": 574.634765625, + "learning_rate": 6.131453526042267e-06, + "loss": 47.4171, + "step": 59970 + }, + { + "epoch": 0.2423267896750526, + "grad_norm": 1104.1204833984375, + "learning_rate": 6.130079699163335e-06, + "loss": 60.4575, + "step": 59980 + }, + { + "epoch": 0.24236719094042025, + "grad_norm": 377.7409973144531, + "learning_rate": 6.128705782369149e-06, + "loss": 54.446, + "step": 59990 + }, + { + "epoch": 0.2424075922057879, + "grad_norm": 792.7305908203125, + "learning_rate": 6.127331775769023e-06, + "loss": 69.839, + "step": 60000 + }, + { + "epoch": 0.2424479934711555, + "grad_norm": 1081.14404296875, + "learning_rate": 6.125957679472282e-06, + "loss": 37.0099, + "step": 60010 + }, + { + "epoch": 0.24248839473652314, + "grad_norm": 612.3765869140625, + "learning_rate": 6.124583493588254e-06, + "loss": 41.2812, + "step": 60020 + }, + { + "epoch": 0.24252879600189078, + "grad_norm": 482.0962219238281, + "learning_rate": 6.123209218226282e-06, + "loss": 56.972, + "step": 60030 + }, + { + "epoch": 0.24256919726725842, + "grad_norm": 410.4952392578125, + "learning_rate": 6.121834853495704e-06, + "loss": 31.3736, + "step": 60040 + }, + { + "epoch": 0.24260959853262604, + "grad_norm": 522.3032836914062, + "learning_rate": 6.120460399505876e-06, + "loss": 40.1662, + "step": 60050 + }, + { + "epoch": 0.24264999979799368, + "grad_norm": 473.8763122558594, + "learning_rate": 6.119085856366158e-06, + "loss": 45.7846, + "step": 60060 + }, + { + "epoch": 0.24269040106336132, + "grad_norm": 537.3663940429688, + "learning_rate": 6.117711224185913e-06, + "loss": 58.7338, + "step": 60070 + }, + { + "epoch": 0.24273080232872893, + "grad_norm": 577.8731689453125, + "learning_rate": 6.116336503074516e-06, + "loss": 50.324, + "step": 60080 + }, + { + "epoch": 0.24277120359409657, + "grad_norm": 450.7883605957031, + "learning_rate": 6.114961693141346e-06, + "loss": 48.6675, + "step": 60090 + }, + { + "epoch": 0.2428116048594642, + "grad_norm": 739.99169921875, + "learning_rate": 6.113586794495792e-06, + "loss": 46.6423, + "step": 60100 + }, + { + "epoch": 0.24285200612483182, + "grad_norm": 830.27880859375, + "learning_rate": 6.112211807247246e-06, + "loss": 42.0815, + "step": 60110 + }, + { + "epoch": 0.24289240739019946, + "grad_norm": 787.69091796875, + "learning_rate": 6.110836731505112e-06, + "loss": 61.397, + "step": 60120 + }, + { + "epoch": 0.2429328086555671, + "grad_norm": 335.32855224609375, + "learning_rate": 6.109461567378796e-06, + "loss": 43.3532, + "step": 60130 + }, + { + "epoch": 0.2429732099209347, + "grad_norm": 610.6149291992188, + "learning_rate": 6.108086314977717e-06, + "loss": 44.857, + "step": 60140 + }, + { + "epoch": 0.24301361118630235, + "grad_norm": 545.5875244140625, + "learning_rate": 6.106710974411294e-06, + "loss": 51.1943, + "step": 60150 + }, + { + "epoch": 0.24305401245167, + "grad_norm": 548.0823974609375, + "learning_rate": 6.105335545788957e-06, + "loss": 40.8381, + "step": 60160 + }, + { + "epoch": 0.2430944137170376, + "grad_norm": 871.58251953125, + "learning_rate": 6.103960029220145e-06, + "loss": 42.5754, + "step": 60170 + }, + { + "epoch": 0.24313481498240525, + "grad_norm": 591.369140625, + "learning_rate": 6.102584424814299e-06, + "loss": 43.5635, + "step": 60180 + }, + { + "epoch": 0.24317521624777289, + "grad_norm": 768.7680053710938, + "learning_rate": 6.101208732680872e-06, + "loss": 52.6281, + "step": 60190 + }, + { + "epoch": 0.24321561751314053, + "grad_norm": 489.9885559082031, + "learning_rate": 6.09983295292932e-06, + "loss": 58.9169, + "step": 60200 + }, + { + "epoch": 0.24325601877850814, + "grad_norm": 323.9236145019531, + "learning_rate": 6.0984570856691046e-06, + "loss": 31.9827, + "step": 60210 + }, + { + "epoch": 0.24329642004387578, + "grad_norm": 422.1755676269531, + "learning_rate": 6.097081131009703e-06, + "loss": 29.6285, + "step": 60220 + }, + { + "epoch": 0.24333682130924342, + "grad_norm": 323.1979064941406, + "learning_rate": 6.095705089060589e-06, + "loss": 42.9744, + "step": 60230 + }, + { + "epoch": 0.24337722257461103, + "grad_norm": 1390.3934326171875, + "learning_rate": 6.094328959931252e-06, + "loss": 45.7392, + "step": 60240 + }, + { + "epoch": 0.24341762383997867, + "grad_norm": 841.0800170898438, + "learning_rate": 6.092952743731179e-06, + "loss": 48.7019, + "step": 60250 + }, + { + "epoch": 0.2434580251053463, + "grad_norm": 698.6349487304688, + "learning_rate": 6.091576440569873e-06, + "loss": 58.3474, + "step": 60260 + }, + { + "epoch": 0.24349842637071392, + "grad_norm": 622.3505859375, + "learning_rate": 6.09020005055684e-06, + "loss": 35.8427, + "step": 60270 + }, + { + "epoch": 0.24353882763608156, + "grad_norm": 661.6207885742188, + "learning_rate": 6.088823573801592e-06, + "loss": 56.9386, + "step": 60280 + }, + { + "epoch": 0.2435792289014492, + "grad_norm": 241.02731323242188, + "learning_rate": 6.087447010413651e-06, + "loss": 33.8124, + "step": 60290 + }, + { + "epoch": 0.24361963016681681, + "grad_norm": 544.45263671875, + "learning_rate": 6.08607036050254e-06, + "loss": 42.6043, + "step": 60300 + }, + { + "epoch": 0.24366003143218445, + "grad_norm": 353.69683837890625, + "learning_rate": 6.084693624177794e-06, + "loss": 31.6919, + "step": 60310 + }, + { + "epoch": 0.2437004326975521, + "grad_norm": 566.5994873046875, + "learning_rate": 6.083316801548956e-06, + "loss": 37.8967, + "step": 60320 + }, + { + "epoch": 0.2437408339629197, + "grad_norm": 383.8753967285156, + "learning_rate": 6.081939892725572e-06, + "loss": 46.3118, + "step": 60330 + }, + { + "epoch": 0.24378123522828735, + "grad_norm": 634.250244140625, + "learning_rate": 6.080562897817196e-06, + "loss": 37.6377, + "step": 60340 + }, + { + "epoch": 0.243821636493655, + "grad_norm": 595.0110473632812, + "learning_rate": 6.079185816933388e-06, + "loss": 39.7675, + "step": 60350 + }, + { + "epoch": 0.24386203775902263, + "grad_norm": 784.342041015625, + "learning_rate": 6.077808650183718e-06, + "loss": 46.149, + "step": 60360 + }, + { + "epoch": 0.24390243902439024, + "grad_norm": 654.6844482421875, + "learning_rate": 6.076431397677762e-06, + "loss": 46.0708, + "step": 60370 + }, + { + "epoch": 0.24394284028975788, + "grad_norm": 735.3973388671875, + "learning_rate": 6.0750540595250986e-06, + "loss": 40.0463, + "step": 60380 + }, + { + "epoch": 0.24398324155512552, + "grad_norm": 690.0650634765625, + "learning_rate": 6.073676635835317e-06, + "loss": 37.8187, + "step": 60390 + }, + { + "epoch": 0.24402364282049313, + "grad_norm": 479.63134765625, + "learning_rate": 6.072299126718012e-06, + "loss": 38.4419, + "step": 60400 + }, + { + "epoch": 0.24406404408586077, + "grad_norm": 654.6773071289062, + "learning_rate": 6.070921532282788e-06, + "loss": 34.0201, + "step": 60410 + }, + { + "epoch": 0.2441044453512284, + "grad_norm": 833.0028686523438, + "learning_rate": 6.0695438526392536e-06, + "loss": 49.9405, + "step": 60420 + }, + { + "epoch": 0.24414484661659602, + "grad_norm": 768.3114013671875, + "learning_rate": 6.068166087897022e-06, + "loss": 48.4438, + "step": 60430 + }, + { + "epoch": 0.24418524788196366, + "grad_norm": 539.3959350585938, + "learning_rate": 6.066788238165717e-06, + "loss": 45.1048, + "step": 60440 + }, + { + "epoch": 0.2442256491473313, + "grad_norm": 413.73114013671875, + "learning_rate": 6.0654103035549686e-06, + "loss": 50.7065, + "step": 60450 + }, + { + "epoch": 0.24426605041269892, + "grad_norm": 666.993408203125, + "learning_rate": 6.064032284174411e-06, + "loss": 40.3299, + "step": 60460 + }, + { + "epoch": 0.24430645167806656, + "grad_norm": 420.68548583984375, + "learning_rate": 6.062654180133689e-06, + "loss": 34.3669, + "step": 60470 + }, + { + "epoch": 0.2443468529434342, + "grad_norm": 687.9544067382812, + "learning_rate": 6.06127599154245e-06, + "loss": 38.3136, + "step": 60480 + }, + { + "epoch": 0.2443872542088018, + "grad_norm": 524.0534057617188, + "learning_rate": 6.059897718510351e-06, + "loss": 39.4131, + "step": 60490 + }, + { + "epoch": 0.24442765547416945, + "grad_norm": 639.9952392578125, + "learning_rate": 6.058519361147055e-06, + "loss": 41.8146, + "step": 60500 + }, + { + "epoch": 0.2444680567395371, + "grad_norm": 392.12811279296875, + "learning_rate": 6.057140919562231e-06, + "loss": 23.9516, + "step": 60510 + }, + { + "epoch": 0.24450845800490473, + "grad_norm": 813.45849609375, + "learning_rate": 6.055762393865555e-06, + "loss": 45.8369, + "step": 60520 + }, + { + "epoch": 0.24454885927027234, + "grad_norm": 1308.13671875, + "learning_rate": 6.054383784166712e-06, + "loss": 43.4649, + "step": 60530 + }, + { + "epoch": 0.24458926053563998, + "grad_norm": 1111.109130859375, + "learning_rate": 6.05300509057539e-06, + "loss": 61.8905, + "step": 60540 + }, + { + "epoch": 0.24462966180100762, + "grad_norm": 1149.5960693359375, + "learning_rate": 6.051626313201285e-06, + "loss": 53.8758, + "step": 60550 + }, + { + "epoch": 0.24467006306637523, + "grad_norm": 603.8845825195312, + "learning_rate": 6.0502474521541014e-06, + "loss": 53.2023, + "step": 60560 + }, + { + "epoch": 0.24471046433174287, + "grad_norm": 435.4098205566406, + "learning_rate": 6.048868507543547e-06, + "loss": 24.9156, + "step": 60570 + }, + { + "epoch": 0.2447508655971105, + "grad_norm": 702.36083984375, + "learning_rate": 6.047489479479339e-06, + "loss": 30.666, + "step": 60580 + }, + { + "epoch": 0.24479126686247812, + "grad_norm": 482.1972351074219, + "learning_rate": 6.046110368071201e-06, + "loss": 45.1941, + "step": 60590 + }, + { + "epoch": 0.24483166812784576, + "grad_norm": 290.9931945800781, + "learning_rate": 6.044731173428862e-06, + "loss": 23.8702, + "step": 60600 + }, + { + "epoch": 0.2448720693932134, + "grad_norm": 466.46673583984375, + "learning_rate": 6.043351895662059e-06, + "loss": 42.5676, + "step": 60610 + }, + { + "epoch": 0.24491247065858102, + "grad_norm": 490.4240417480469, + "learning_rate": 6.041972534880533e-06, + "loss": 21.8341, + "step": 60620 + }, + { + "epoch": 0.24495287192394866, + "grad_norm": 286.0077209472656, + "learning_rate": 6.040593091194035e-06, + "loss": 59.126, + "step": 60630 + }, + { + "epoch": 0.2449932731893163, + "grad_norm": 332.6376953125, + "learning_rate": 6.039213564712319e-06, + "loss": 50.8156, + "step": 60640 + }, + { + "epoch": 0.2450336744546839, + "grad_norm": 509.3329162597656, + "learning_rate": 6.03783395554515e-06, + "loss": 36.6754, + "step": 60650 + }, + { + "epoch": 0.24507407572005155, + "grad_norm": 362.27398681640625, + "learning_rate": 6.036454263802297e-06, + "loss": 24.3201, + "step": 60660 + }, + { + "epoch": 0.2451144769854192, + "grad_norm": 482.9433288574219, + "learning_rate": 6.035074489593536e-06, + "loss": 51.8037, + "step": 60670 + }, + { + "epoch": 0.24515487825078683, + "grad_norm": 437.08489990234375, + "learning_rate": 6.033694633028644e-06, + "loss": 43.0535, + "step": 60680 + }, + { + "epoch": 0.24519527951615444, + "grad_norm": 999.8873901367188, + "learning_rate": 6.032314694217416e-06, + "loss": 51.4657, + "step": 60690 + }, + { + "epoch": 0.24523568078152208, + "grad_norm": 1049.546142578125, + "learning_rate": 6.030934673269646e-06, + "loss": 43.613, + "step": 60700 + }, + { + "epoch": 0.24527608204688972, + "grad_norm": 786.54150390625, + "learning_rate": 6.029554570295135e-06, + "loss": 40.1556, + "step": 60710 + }, + { + "epoch": 0.24531648331225733, + "grad_norm": 773.1795043945312, + "learning_rate": 6.028174385403693e-06, + "loss": 56.3238, + "step": 60720 + }, + { + "epoch": 0.24535688457762497, + "grad_norm": 431.96539306640625, + "learning_rate": 6.026794118705133e-06, + "loss": 56.1787, + "step": 60730 + }, + { + "epoch": 0.2453972858429926, + "grad_norm": 789.9153442382812, + "learning_rate": 6.025413770309278e-06, + "loss": 53.2711, + "step": 60740 + }, + { + "epoch": 0.24543768710836023, + "grad_norm": 771.6865234375, + "learning_rate": 6.024033340325954e-06, + "loss": 45.3818, + "step": 60750 + }, + { + "epoch": 0.24547808837372787, + "grad_norm": 642.1569213867188, + "learning_rate": 6.022652828864999e-06, + "loss": 36.2313, + "step": 60760 + }, + { + "epoch": 0.2455184896390955, + "grad_norm": 1129.879638671875, + "learning_rate": 6.0212722360362496e-06, + "loss": 67.5433, + "step": 60770 + }, + { + "epoch": 0.24555889090446312, + "grad_norm": 652.3465576171875, + "learning_rate": 6.019891561949554e-06, + "loss": 44.3988, + "step": 60780 + }, + { + "epoch": 0.24559929216983076, + "grad_norm": 691.905517578125, + "learning_rate": 6.01851080671477e-06, + "loss": 43.6003, + "step": 60790 + }, + { + "epoch": 0.2456396934351984, + "grad_norm": 836.9973754882812, + "learning_rate": 6.017129970441756e-06, + "loss": 41.0295, + "step": 60800 + }, + { + "epoch": 0.245680094700566, + "grad_norm": 537.411376953125, + "learning_rate": 6.015749053240378e-06, + "loss": 35.7805, + "step": 60810 + }, + { + "epoch": 0.24572049596593365, + "grad_norm": 811.6790771484375, + "learning_rate": 6.0143680552205075e-06, + "loss": 44.9824, + "step": 60820 + }, + { + "epoch": 0.2457608972313013, + "grad_norm": 748.29541015625, + "learning_rate": 6.012986976492025e-06, + "loss": 53.0722, + "step": 60830 + }, + { + "epoch": 0.2458012984966689, + "grad_norm": 752.9380493164062, + "learning_rate": 6.011605817164822e-06, + "loss": 44.365, + "step": 60840 + }, + { + "epoch": 0.24584169976203654, + "grad_norm": 539.8973999023438, + "learning_rate": 6.0102245773487855e-06, + "loss": 38.9263, + "step": 60850 + }, + { + "epoch": 0.24588210102740418, + "grad_norm": 748.1033325195312, + "learning_rate": 6.008843257153815e-06, + "loss": 52.9087, + "step": 60860 + }, + { + "epoch": 0.24592250229277182, + "grad_norm": 397.9111328125, + "learning_rate": 6.007461856689815e-06, + "loss": 38.5012, + "step": 60870 + }, + { + "epoch": 0.24596290355813943, + "grad_norm": 532.90771484375, + "learning_rate": 6.0060803760667e-06, + "loss": 53.9542, + "step": 60880 + }, + { + "epoch": 0.24600330482350707, + "grad_norm": 331.3194885253906, + "learning_rate": 6.004698815394389e-06, + "loss": 55.5145, + "step": 60890 + }, + { + "epoch": 0.24604370608887471, + "grad_norm": 1173.3389892578125, + "learning_rate": 6.003317174782801e-06, + "loss": 59.5596, + "step": 60900 + }, + { + "epoch": 0.24608410735424233, + "grad_norm": 390.91949462890625, + "learning_rate": 6.001935454341872e-06, + "loss": 42.8467, + "step": 60910 + }, + { + "epoch": 0.24612450861960997, + "grad_norm": 771.1336669921875, + "learning_rate": 6.000553654181536e-06, + "loss": 36.5511, + "step": 60920 + }, + { + "epoch": 0.2461649098849776, + "grad_norm": 504.6431884765625, + "learning_rate": 5.999171774411737e-06, + "loss": 48.6999, + "step": 60930 + }, + { + "epoch": 0.24620531115034522, + "grad_norm": 1690.32568359375, + "learning_rate": 5.997789815142427e-06, + "loss": 46.5689, + "step": 60940 + }, + { + "epoch": 0.24624571241571286, + "grad_norm": 435.9308776855469, + "learning_rate": 5.99640777648356e-06, + "loss": 59.5126, + "step": 60950 + }, + { + "epoch": 0.2462861136810805, + "grad_norm": 508.0345458984375, + "learning_rate": 5.9950256585450995e-06, + "loss": 42.083, + "step": 60960 + }, + { + "epoch": 0.2463265149464481, + "grad_norm": 600.52392578125, + "learning_rate": 5.993643461437013e-06, + "loss": 37.8109, + "step": 60970 + }, + { + "epoch": 0.24636691621181575, + "grad_norm": 456.70550537109375, + "learning_rate": 5.992261185269278e-06, + "loss": 45.6742, + "step": 60980 + }, + { + "epoch": 0.2464073174771834, + "grad_norm": 373.98712158203125, + "learning_rate": 5.990878830151873e-06, + "loss": 38.2048, + "step": 60990 + }, + { + "epoch": 0.246447718742551, + "grad_norm": 192.475341796875, + "learning_rate": 5.989496396194787e-06, + "loss": 43.5954, + "step": 61000 + }, + { + "epoch": 0.24648812000791864, + "grad_norm": 836.5695190429688, + "learning_rate": 5.988113883508016e-06, + "loss": 79.2422, + "step": 61010 + }, + { + "epoch": 0.24652852127328628, + "grad_norm": 487.8687744140625, + "learning_rate": 5.986731292201555e-06, + "loss": 51.7228, + "step": 61020 + }, + { + "epoch": 0.24656892253865392, + "grad_norm": 342.183349609375, + "learning_rate": 5.985348622385415e-06, + "loss": 33.0495, + "step": 61030 + }, + { + "epoch": 0.24660932380402154, + "grad_norm": 1103.4874267578125, + "learning_rate": 5.9839658741696085e-06, + "loss": 64.6234, + "step": 61040 + }, + { + "epoch": 0.24664972506938918, + "grad_norm": 683.1010131835938, + "learning_rate": 5.982583047664151e-06, + "loss": 35.5353, + "step": 61050 + }, + { + "epoch": 0.24669012633475682, + "grad_norm": 318.55364990234375, + "learning_rate": 5.981200142979071e-06, + "loss": 34.0901, + "step": 61060 + }, + { + "epoch": 0.24673052760012443, + "grad_norm": 737.1567993164062, + "learning_rate": 5.9798171602244e-06, + "loss": 40.2012, + "step": 61070 + }, + { + "epoch": 0.24677092886549207, + "grad_norm": 692.0882568359375, + "learning_rate": 5.978434099510172e-06, + "loss": 47.4886, + "step": 61080 + }, + { + "epoch": 0.2468113301308597, + "grad_norm": 655.0506591796875, + "learning_rate": 5.977050960946433e-06, + "loss": 49.641, + "step": 61090 + }, + { + "epoch": 0.24685173139622732, + "grad_norm": 564.3485717773438, + "learning_rate": 5.975667744643235e-06, + "loss": 44.6221, + "step": 61100 + }, + { + "epoch": 0.24689213266159496, + "grad_norm": 896.7650146484375, + "learning_rate": 5.974284450710631e-06, + "loss": 56.5591, + "step": 61110 + }, + { + "epoch": 0.2469325339269626, + "grad_norm": 1133.868896484375, + "learning_rate": 5.972901079258685e-06, + "loss": 55.2751, + "step": 61120 + }, + { + "epoch": 0.2469729351923302, + "grad_norm": 467.185546875, + "learning_rate": 5.971517630397465e-06, + "loss": 45.7085, + "step": 61130 + }, + { + "epoch": 0.24701333645769785, + "grad_norm": 87.59776306152344, + "learning_rate": 5.970134104237046e-06, + "loss": 51.0002, + "step": 61140 + }, + { + "epoch": 0.2470537377230655, + "grad_norm": 475.14825439453125, + "learning_rate": 5.96875050088751e-06, + "loss": 50.5819, + "step": 61150 + }, + { + "epoch": 0.2470941389884331, + "grad_norm": 673.6754150390625, + "learning_rate": 5.9673668204589396e-06, + "loss": 51.5235, + "step": 61160 + }, + { + "epoch": 0.24713454025380074, + "grad_norm": 265.7752380371094, + "learning_rate": 5.965983063061432e-06, + "loss": 53.3672, + "step": 61170 + }, + { + "epoch": 0.24717494151916838, + "grad_norm": 732.6128540039062, + "learning_rate": 5.964599228805087e-06, + "loss": 49.7361, + "step": 61180 + }, + { + "epoch": 0.24721534278453602, + "grad_norm": 686.7088012695312, + "learning_rate": 5.963215317800008e-06, + "loss": 46.626, + "step": 61190 + }, + { + "epoch": 0.24725574404990364, + "grad_norm": 532.4640502929688, + "learning_rate": 5.961831330156306e-06, + "loss": 37.8897, + "step": 61200 + }, + { + "epoch": 0.24729614531527128, + "grad_norm": 495.2468566894531, + "learning_rate": 5.960447265984098e-06, + "loss": 45.9627, + "step": 61210 + }, + { + "epoch": 0.24733654658063892, + "grad_norm": 328.6434631347656, + "learning_rate": 5.95906312539351e-06, + "loss": 39.4499, + "step": 61220 + }, + { + "epoch": 0.24737694784600653, + "grad_norm": 836.5567626953125, + "learning_rate": 5.9576789084946705e-06, + "loss": 52.5922, + "step": 61230 + }, + { + "epoch": 0.24741734911137417, + "grad_norm": 379.64019775390625, + "learning_rate": 5.956294615397716e-06, + "loss": 52.1735, + "step": 61240 + }, + { + "epoch": 0.2474577503767418, + "grad_norm": 545.490966796875, + "learning_rate": 5.954910246212787e-06, + "loss": 43.6922, + "step": 61250 + }, + { + "epoch": 0.24749815164210942, + "grad_norm": 1560.308349609375, + "learning_rate": 5.953525801050032e-06, + "loss": 46.3509, + "step": 61260 + }, + { + "epoch": 0.24753855290747706, + "grad_norm": 458.22314453125, + "learning_rate": 5.952141280019605e-06, + "loss": 46.8482, + "step": 61270 + }, + { + "epoch": 0.2475789541728447, + "grad_norm": 852.26953125, + "learning_rate": 5.950756683231667e-06, + "loss": 49.4784, + "step": 61280 + }, + { + "epoch": 0.2476193554382123, + "grad_norm": 506.84912109375, + "learning_rate": 5.949372010796384e-06, + "loss": 37.7671, + "step": 61290 + }, + { + "epoch": 0.24765975670357995, + "grad_norm": 221.2855224609375, + "learning_rate": 5.947987262823924e-06, + "loss": 32.4322, + "step": 61300 + }, + { + "epoch": 0.2477001579689476, + "grad_norm": 794.8474731445312, + "learning_rate": 5.94660243942447e-06, + "loss": 44.5728, + "step": 61310 + }, + { + "epoch": 0.2477405592343152, + "grad_norm": 841.0701293945312, + "learning_rate": 5.945217540708206e-06, + "loss": 67.0786, + "step": 61320 + }, + { + "epoch": 0.24778096049968285, + "grad_norm": 450.19146728515625, + "learning_rate": 5.9438325667853185e-06, + "loss": 44.422, + "step": 61330 + }, + { + "epoch": 0.24782136176505049, + "grad_norm": 431.77667236328125, + "learning_rate": 5.942447517766005e-06, + "loss": 46.848, + "step": 61340 + }, + { + "epoch": 0.24786176303041813, + "grad_norm": 796.6408081054688, + "learning_rate": 5.941062393760467e-06, + "loss": 67.5153, + "step": 61350 + }, + { + "epoch": 0.24790216429578574, + "grad_norm": 417.2357177734375, + "learning_rate": 5.939677194878915e-06, + "loss": 36.6902, + "step": 61360 + }, + { + "epoch": 0.24794256556115338, + "grad_norm": 501.64276123046875, + "learning_rate": 5.93829192123156e-06, + "loss": 36.0204, + "step": 61370 + }, + { + "epoch": 0.24798296682652102, + "grad_norm": 1153.709228515625, + "learning_rate": 5.936906572928625e-06, + "loss": 53.7668, + "step": 61380 + }, + { + "epoch": 0.24802336809188863, + "grad_norm": 445.2611999511719, + "learning_rate": 5.935521150080331e-06, + "loss": 35.9362, + "step": 61390 + }, + { + "epoch": 0.24806376935725627, + "grad_norm": 768.938720703125, + "learning_rate": 5.934135652796914e-06, + "loss": 62.0834, + "step": 61400 + }, + { + "epoch": 0.2481041706226239, + "grad_norm": 351.4441833496094, + "learning_rate": 5.9327500811886095e-06, + "loss": 46.8324, + "step": 61410 + }, + { + "epoch": 0.24814457188799152, + "grad_norm": 872.1058959960938, + "learning_rate": 5.931364435365663e-06, + "loss": 32.937, + "step": 61420 + }, + { + "epoch": 0.24818497315335916, + "grad_norm": 1003.3562622070312, + "learning_rate": 5.929978715438322e-06, + "loss": 50.35, + "step": 61430 + }, + { + "epoch": 0.2482253744187268, + "grad_norm": 1165.629150390625, + "learning_rate": 5.928592921516843e-06, + "loss": 54.3017, + "step": 61440 + }, + { + "epoch": 0.24826577568409441, + "grad_norm": 402.1977844238281, + "learning_rate": 5.9272070537114855e-06, + "loss": 43.1218, + "step": 61450 + }, + { + "epoch": 0.24830617694946205, + "grad_norm": 525.96875, + "learning_rate": 5.92582111213252e-06, + "loss": 42.6614, + "step": 61460 + }, + { + "epoch": 0.2483465782148297, + "grad_norm": 894.915283203125, + "learning_rate": 5.924435096890216e-06, + "loss": 38.2639, + "step": 61470 + }, + { + "epoch": 0.2483869794801973, + "grad_norm": 677.1754760742188, + "learning_rate": 5.923049008094855e-06, + "loss": 34.2235, + "step": 61480 + }, + { + "epoch": 0.24842738074556495, + "grad_norm": 265.16180419921875, + "learning_rate": 5.921662845856719e-06, + "loss": 25.4316, + "step": 61490 + }, + { + "epoch": 0.2484677820109326, + "grad_norm": 627.289794921875, + "learning_rate": 5.920276610286102e-06, + "loss": 52.2437, + "step": 61500 + }, + { + "epoch": 0.24850818327630023, + "grad_norm": 433.43951416015625, + "learning_rate": 5.918890301493299e-06, + "loss": 42.8496, + "step": 61510 + }, + { + "epoch": 0.24854858454166784, + "grad_norm": 684.3920288085938, + "learning_rate": 5.91750391958861e-06, + "loss": 55.9738, + "step": 61520 + }, + { + "epoch": 0.24858898580703548, + "grad_norm": 277.8285217285156, + "learning_rate": 5.916117464682346e-06, + "loss": 57.1054, + "step": 61530 + }, + { + "epoch": 0.24862938707240312, + "grad_norm": 539.8350830078125, + "learning_rate": 5.914730936884819e-06, + "loss": 47.499, + "step": 61540 + }, + { + "epoch": 0.24866978833777073, + "grad_norm": 364.66021728515625, + "learning_rate": 5.91334433630635e-06, + "loss": 31.1749, + "step": 61550 + }, + { + "epoch": 0.24871018960313837, + "grad_norm": 655.7823486328125, + "learning_rate": 5.911957663057264e-06, + "loss": 64.6635, + "step": 61560 + }, + { + "epoch": 0.248750590868506, + "grad_norm": 418.73699951171875, + "learning_rate": 5.910570917247892e-06, + "loss": 31.336, + "step": 61570 + }, + { + "epoch": 0.24879099213387362, + "grad_norm": 576.4503173828125, + "learning_rate": 5.909184098988571e-06, + "loss": 40.6577, + "step": 61580 + }, + { + "epoch": 0.24883139339924126, + "grad_norm": 576.8017578125, + "learning_rate": 5.907797208389644e-06, + "loss": 38.9858, + "step": 61590 + }, + { + "epoch": 0.2488717946646089, + "grad_norm": 699.4044189453125, + "learning_rate": 5.906410245561459e-06, + "loss": 43.753, + "step": 61600 + }, + { + "epoch": 0.24891219592997652, + "grad_norm": 892.1261596679688, + "learning_rate": 5.90502321061437e-06, + "loss": 33.1653, + "step": 61610 + }, + { + "epoch": 0.24895259719534416, + "grad_norm": 424.92169189453125, + "learning_rate": 5.90363610365874e-06, + "loss": 60.2741, + "step": 61620 + }, + { + "epoch": 0.2489929984607118, + "grad_norm": 613.3280639648438, + "learning_rate": 5.9022489248049295e-06, + "loss": 53.3307, + "step": 61630 + }, + { + "epoch": 0.2490333997260794, + "grad_norm": 638.9053344726562, + "learning_rate": 5.900861674163314e-06, + "loss": 46.2113, + "step": 61640 + }, + { + "epoch": 0.24907380099144705, + "grad_norm": 791.4915771484375, + "learning_rate": 5.89947435184427e-06, + "loss": 38.609, + "step": 61650 + }, + { + "epoch": 0.2491142022568147, + "grad_norm": 549.6468505859375, + "learning_rate": 5.89808695795818e-06, + "loss": 36.1331, + "step": 61660 + }, + { + "epoch": 0.24915460352218233, + "grad_norm": 709.71044921875, + "learning_rate": 5.896699492615432e-06, + "loss": 43.4479, + "step": 61670 + }, + { + "epoch": 0.24919500478754994, + "grad_norm": 972.5881958007812, + "learning_rate": 5.895311955926419e-06, + "loss": 38.0126, + "step": 61680 + }, + { + "epoch": 0.24923540605291758, + "grad_norm": 423.6819152832031, + "learning_rate": 5.893924348001544e-06, + "loss": 24.6695, + "step": 61690 + }, + { + "epoch": 0.24927580731828522, + "grad_norm": 215.3943634033203, + "learning_rate": 5.8925366689512124e-06, + "loss": 69.2972, + "step": 61700 + }, + { + "epoch": 0.24931620858365283, + "grad_norm": 580.0707397460938, + "learning_rate": 5.891148918885834e-06, + "loss": 47.4895, + "step": 61710 + }, + { + "epoch": 0.24935660984902047, + "grad_norm": 405.8501892089844, + "learning_rate": 5.8897610979158245e-06, + "loss": 53.3295, + "step": 61720 + }, + { + "epoch": 0.2493970111143881, + "grad_norm": 465.32843017578125, + "learning_rate": 5.888373206151608e-06, + "loss": 46.2481, + "step": 61730 + }, + { + "epoch": 0.24943741237975572, + "grad_norm": 550.0686645507812, + "learning_rate": 5.886985243703612e-06, + "loss": 46.1988, + "step": 61740 + }, + { + "epoch": 0.24947781364512336, + "grad_norm": 691.0383911132812, + "learning_rate": 5.885597210682273e-06, + "loss": 41.3984, + "step": 61750 + }, + { + "epoch": 0.249518214910491, + "grad_norm": 709.067138671875, + "learning_rate": 5.884209107198027e-06, + "loss": 48.7272, + "step": 61760 + }, + { + "epoch": 0.24955861617585862, + "grad_norm": 583.8157348632812, + "learning_rate": 5.882820933361321e-06, + "loss": 39.1306, + "step": 61770 + }, + { + "epoch": 0.24959901744122626, + "grad_norm": 467.2850036621094, + "learning_rate": 5.881432689282604e-06, + "loss": 52.9776, + "step": 61780 + }, + { + "epoch": 0.2496394187065939, + "grad_norm": 327.1951904296875, + "learning_rate": 5.880044375072333e-06, + "loss": 53.9046, + "step": 61790 + }, + { + "epoch": 0.2496798199719615, + "grad_norm": 1364.8809814453125, + "learning_rate": 5.8786559908409715e-06, + "loss": 61.291, + "step": 61800 + }, + { + "epoch": 0.24972022123732915, + "grad_norm": 549.9939575195312, + "learning_rate": 5.877267536698984e-06, + "loss": 47.2693, + "step": 61810 + }, + { + "epoch": 0.2497606225026968, + "grad_norm": 715.7103881835938, + "learning_rate": 5.875879012756845e-06, + "loss": 89.7749, + "step": 61820 + }, + { + "epoch": 0.24980102376806443, + "grad_norm": 697.6795654296875, + "learning_rate": 5.8744904191250326e-06, + "loss": 57.4593, + "step": 61830 + }, + { + "epoch": 0.24984142503343204, + "grad_norm": 594.6947631835938, + "learning_rate": 5.873101755914031e-06, + "loss": 40.0505, + "step": 61840 + }, + { + "epoch": 0.24988182629879968, + "grad_norm": 465.7455749511719, + "learning_rate": 5.87171302323433e-06, + "loss": 26.6923, + "step": 61850 + }, + { + "epoch": 0.24992222756416732, + "grad_norm": 852.9819946289062, + "learning_rate": 5.870324221196424e-06, + "loss": 51.0079, + "step": 61860 + }, + { + "epoch": 0.24996262882953493, + "grad_norm": 619.8259887695312, + "learning_rate": 5.868935349910814e-06, + "loss": 35.0291, + "step": 61870 + }, + { + "epoch": 0.25000303009490255, + "grad_norm": 347.346923828125, + "learning_rate": 5.867546409488006e-06, + "loss": 56.9394, + "step": 61880 + }, + { + "epoch": 0.2500434313602702, + "grad_norm": 579.9848022460938, + "learning_rate": 5.8661574000385115e-06, + "loss": 34.7265, + "step": 61890 + }, + { + "epoch": 0.2500838326256378, + "grad_norm": 201.31919860839844, + "learning_rate": 5.864768321672848e-06, + "loss": 39.8269, + "step": 61900 + }, + { + "epoch": 0.25012423389100547, + "grad_norm": 767.70068359375, + "learning_rate": 5.863379174501538e-06, + "loss": 42.9614, + "step": 61910 + }, + { + "epoch": 0.2501646351563731, + "grad_norm": 682.972900390625, + "learning_rate": 5.861989958635109e-06, + "loss": 63.0275, + "step": 61920 + }, + { + "epoch": 0.25020503642174075, + "grad_norm": 606.4254150390625, + "learning_rate": 5.860600674184096e-06, + "loss": 35.5546, + "step": 61930 + }, + { + "epoch": 0.2502454376871084, + "grad_norm": 640.8799438476562, + "learning_rate": 5.859211321259036e-06, + "loss": 72.0527, + "step": 61940 + }, + { + "epoch": 0.25028583895247597, + "grad_norm": 770.442626953125, + "learning_rate": 5.857821899970475e-06, + "loss": 38.6754, + "step": 61950 + }, + { + "epoch": 0.2503262402178436, + "grad_norm": 906.4054565429688, + "learning_rate": 5.856432410428963e-06, + "loss": 38.3737, + "step": 61960 + }, + { + "epoch": 0.25036664148321125, + "grad_norm": 776.6735229492188, + "learning_rate": 5.8550428527450534e-06, + "loss": 31.9661, + "step": 61970 + }, + { + "epoch": 0.2504070427485789, + "grad_norm": 845.9974365234375, + "learning_rate": 5.8536532270293076e-06, + "loss": 44.6735, + "step": 61980 + }, + { + "epoch": 0.25044744401394653, + "grad_norm": 597.65625, + "learning_rate": 5.852263533392294e-06, + "loss": 34.4646, + "step": 61990 + }, + { + "epoch": 0.25048784527931417, + "grad_norm": 483.18707275390625, + "learning_rate": 5.850873771944581e-06, + "loss": 53.7734, + "step": 62000 + }, + { + "epoch": 0.25052824654468175, + "grad_norm": 2730.670654296875, + "learning_rate": 5.849483942796747e-06, + "loss": 51.1974, + "step": 62010 + }, + { + "epoch": 0.2505686478100494, + "grad_norm": 716.452392578125, + "learning_rate": 5.848094046059375e-06, + "loss": 41.6772, + "step": 62020 + }, + { + "epoch": 0.25060904907541703, + "grad_norm": 1254.0281982421875, + "learning_rate": 5.846704081843052e-06, + "loss": 41.3832, + "step": 62030 + }, + { + "epoch": 0.2506494503407847, + "grad_norm": 746.8511962890625, + "learning_rate": 5.84531405025837e-06, + "loss": 51.5164, + "step": 62040 + }, + { + "epoch": 0.2506898516061523, + "grad_norm": 845.3733520507812, + "learning_rate": 5.843923951415931e-06, + "loss": 56.3143, + "step": 62050 + }, + { + "epoch": 0.25073025287151995, + "grad_norm": 210.1995086669922, + "learning_rate": 5.842533785426334e-06, + "loss": 37.6338, + "step": 62060 + }, + { + "epoch": 0.2507706541368876, + "grad_norm": 706.4373779296875, + "learning_rate": 5.84114355240019e-06, + "loss": 49.6547, + "step": 62070 + }, + { + "epoch": 0.2508110554022552, + "grad_norm": 511.6235046386719, + "learning_rate": 5.839753252448115e-06, + "loss": 43.0069, + "step": 62080 + }, + { + "epoch": 0.2508514566676228, + "grad_norm": 723.2483520507812, + "learning_rate": 5.838362885680728e-06, + "loss": 49.3892, + "step": 62090 + }, + { + "epoch": 0.25089185793299046, + "grad_norm": 458.5704345703125, + "learning_rate": 5.8369724522086545e-06, + "loss": 34.8136, + "step": 62100 + }, + { + "epoch": 0.2509322591983581, + "grad_norm": 336.73876953125, + "learning_rate": 5.835581952142522e-06, + "loss": 37.1676, + "step": 62110 + }, + { + "epoch": 0.25097266046372574, + "grad_norm": 255.3216552734375, + "learning_rate": 5.834191385592969e-06, + "loss": 36.8512, + "step": 62120 + }, + { + "epoch": 0.2510130617290934, + "grad_norm": 1595.05517578125, + "learning_rate": 5.8328007526706354e-06, + "loss": 53.1324, + "step": 62130 + }, + { + "epoch": 0.25105346299446096, + "grad_norm": 484.79193115234375, + "learning_rate": 5.83141005348617e-06, + "loss": 30.9311, + "step": 62140 + }, + { + "epoch": 0.2510938642598286, + "grad_norm": 821.14990234375, + "learning_rate": 5.830019288150222e-06, + "loss": 45.7125, + "step": 62150 + }, + { + "epoch": 0.25113426552519624, + "grad_norm": 794.8558959960938, + "learning_rate": 5.8286284567734456e-06, + "loss": 53.2357, + "step": 62160 + }, + { + "epoch": 0.2511746667905639, + "grad_norm": 789.1674194335938, + "learning_rate": 5.827237559466508e-06, + "loss": 41.914, + "step": 62170 + }, + { + "epoch": 0.2512150680559315, + "grad_norm": 786.6305541992188, + "learning_rate": 5.825846596340075e-06, + "loss": 47.0211, + "step": 62180 + }, + { + "epoch": 0.25125546932129916, + "grad_norm": 574.0487060546875, + "learning_rate": 5.824455567504817e-06, + "loss": 44.7584, + "step": 62190 + }, + { + "epoch": 0.25129587058666675, + "grad_norm": 918.026123046875, + "learning_rate": 5.823064473071414e-06, + "loss": 54.0754, + "step": 62200 + }, + { + "epoch": 0.2513362718520344, + "grad_norm": 546.5556640625, + "learning_rate": 5.821673313150546e-06, + "loss": 39.635, + "step": 62210 + }, + { + "epoch": 0.251376673117402, + "grad_norm": 761.0563354492188, + "learning_rate": 5.820282087852906e-06, + "loss": 34.2317, + "step": 62220 + }, + { + "epoch": 0.25141707438276967, + "grad_norm": 604.6393432617188, + "learning_rate": 5.818890797289185e-06, + "loss": 32.208, + "step": 62230 + }, + { + "epoch": 0.2514574756481373, + "grad_norm": 803.0030517578125, + "learning_rate": 5.81749944157008e-06, + "loss": 44.4094, + "step": 62240 + }, + { + "epoch": 0.25149787691350495, + "grad_norm": 558.0752563476562, + "learning_rate": 5.816108020806297e-06, + "loss": 42.962, + "step": 62250 + }, + { + "epoch": 0.2515382781788726, + "grad_norm": 873.9310913085938, + "learning_rate": 5.814716535108545e-06, + "loss": 41.4081, + "step": 62260 + }, + { + "epoch": 0.25157867944424017, + "grad_norm": 419.5707702636719, + "learning_rate": 5.813324984587536e-06, + "loss": 58.7208, + "step": 62270 + }, + { + "epoch": 0.2516190807096078, + "grad_norm": 680.248046875, + "learning_rate": 5.811933369353992e-06, + "loss": 51.4198, + "step": 62280 + }, + { + "epoch": 0.25165948197497545, + "grad_norm": 878.7736206054688, + "learning_rate": 5.810541689518634e-06, + "loss": 53.1204, + "step": 62290 + }, + { + "epoch": 0.2516998832403431, + "grad_norm": 466.78271484375, + "learning_rate": 5.809149945192194e-06, + "loss": 42.8364, + "step": 62300 + }, + { + "epoch": 0.25174028450571073, + "grad_norm": 532.00537109375, + "learning_rate": 5.807758136485409e-06, + "loss": 48.3024, + "step": 62310 + }, + { + "epoch": 0.25178068577107837, + "grad_norm": 312.7720642089844, + "learning_rate": 5.8063662635090136e-06, + "loss": 36.9993, + "step": 62320 + }, + { + "epoch": 0.25182108703644596, + "grad_norm": 897.0571899414062, + "learning_rate": 5.804974326373756e-06, + "loss": 42.1273, + "step": 62330 + }, + { + "epoch": 0.2518614883018136, + "grad_norm": 702.832275390625, + "learning_rate": 5.803582325190387e-06, + "loss": 64.0742, + "step": 62340 + }, + { + "epoch": 0.25190188956718124, + "grad_norm": 544.4562377929688, + "learning_rate": 5.802190260069657e-06, + "loss": 41.5808, + "step": 62350 + }, + { + "epoch": 0.2519422908325489, + "grad_norm": 328.93084716796875, + "learning_rate": 5.800798131122332e-06, + "loss": 52.4388, + "step": 62360 + }, + { + "epoch": 0.2519826920979165, + "grad_norm": 516.4020385742188, + "learning_rate": 5.799405938459175e-06, + "loss": 37.1916, + "step": 62370 + }, + { + "epoch": 0.25202309336328416, + "grad_norm": 458.3615417480469, + "learning_rate": 5.7980136821909565e-06, + "loss": 34.8423, + "step": 62380 + }, + { + "epoch": 0.2520634946286518, + "grad_norm": 1137.943115234375, + "learning_rate": 5.79662136242845e-06, + "loss": 49.3595, + "step": 62390 + }, + { + "epoch": 0.2521038958940194, + "grad_norm": 421.2976379394531, + "learning_rate": 5.795228979282439e-06, + "loss": 34.1204, + "step": 62400 + }, + { + "epoch": 0.252144297159387, + "grad_norm": 634.6295776367188, + "learning_rate": 5.793836532863707e-06, + "loss": 44.5411, + "step": 62410 + }, + { + "epoch": 0.25218469842475466, + "grad_norm": 969.116943359375, + "learning_rate": 5.792444023283046e-06, + "loss": 60.2988, + "step": 62420 + }, + { + "epoch": 0.2522250996901223, + "grad_norm": 321.36590576171875, + "learning_rate": 5.791051450651251e-06, + "loss": 48.6717, + "step": 62430 + }, + { + "epoch": 0.25226550095548994, + "grad_norm": 618.8597412109375, + "learning_rate": 5.789658815079121e-06, + "loss": 47.9615, + "step": 62440 + }, + { + "epoch": 0.2523059022208576, + "grad_norm": 327.97607421875, + "learning_rate": 5.788266116677464e-06, + "loss": 32.009, + "step": 62450 + }, + { + "epoch": 0.25234630348622517, + "grad_norm": 552.7090454101562, + "learning_rate": 5.78687335555709e-06, + "loss": 38.7742, + "step": 62460 + }, + { + "epoch": 0.2523867047515928, + "grad_norm": 699.7271118164062, + "learning_rate": 5.785480531828815e-06, + "loss": 68.4585, + "step": 62470 + }, + { + "epoch": 0.25242710601696045, + "grad_norm": 448.2286376953125, + "learning_rate": 5.784087645603459e-06, + "loss": 55.5299, + "step": 62480 + }, + { + "epoch": 0.2524675072823281, + "grad_norm": 744.1871337890625, + "learning_rate": 5.782694696991845e-06, + "loss": 55.4688, + "step": 62490 + }, + { + "epoch": 0.2525079085476957, + "grad_norm": 565.6279907226562, + "learning_rate": 5.781301686104808e-06, + "loss": 43.324, + "step": 62500 + }, + { + "epoch": 0.25254830981306337, + "grad_norm": 497.8145446777344, + "learning_rate": 5.779908613053181e-06, + "loss": 37.782, + "step": 62510 + }, + { + "epoch": 0.25258871107843095, + "grad_norm": 483.60723876953125, + "learning_rate": 5.778515477947807e-06, + "loss": 44.4805, + "step": 62520 + }, + { + "epoch": 0.2526291123437986, + "grad_norm": 1020.0092163085938, + "learning_rate": 5.777122280899527e-06, + "loss": 48.6439, + "step": 62530 + }, + { + "epoch": 0.25266951360916623, + "grad_norm": 146.36549377441406, + "learning_rate": 5.775729022019193e-06, + "loss": 53.2947, + "step": 62540 + }, + { + "epoch": 0.25270991487453387, + "grad_norm": 779.2500610351562, + "learning_rate": 5.774335701417662e-06, + "loss": 58.0035, + "step": 62550 + }, + { + "epoch": 0.2527503161399015, + "grad_norm": 741.26171875, + "learning_rate": 5.7729423192057936e-06, + "loss": 54.0345, + "step": 62560 + }, + { + "epoch": 0.25279071740526915, + "grad_norm": 1187.714599609375, + "learning_rate": 5.771548875494453e-06, + "loss": 65.7193, + "step": 62570 + }, + { + "epoch": 0.2528311186706368, + "grad_norm": 312.81060791015625, + "learning_rate": 5.7701553703945055e-06, + "loss": 51.9163, + "step": 62580 + }, + { + "epoch": 0.2528715199360044, + "grad_norm": 698.7396240234375, + "learning_rate": 5.768761804016833e-06, + "loss": 39.4483, + "step": 62590 + }, + { + "epoch": 0.252911921201372, + "grad_norm": 794.9736328125, + "learning_rate": 5.767368176472311e-06, + "loss": 47.7515, + "step": 62600 + }, + { + "epoch": 0.25295232246673965, + "grad_norm": 687.7762451171875, + "learning_rate": 5.765974487871826e-06, + "loss": 65.9322, + "step": 62610 + }, + { + "epoch": 0.2529927237321073, + "grad_norm": 678.4483032226562, + "learning_rate": 5.764580738326265e-06, + "loss": 55.1801, + "step": 62620 + }, + { + "epoch": 0.25303312499747493, + "grad_norm": 329.08294677734375, + "learning_rate": 5.763186927946523e-06, + "loss": 46.4785, + "step": 62630 + }, + { + "epoch": 0.2530735262628426, + "grad_norm": 529.1029663085938, + "learning_rate": 5.761793056843501e-06, + "loss": 41.3061, + "step": 62640 + }, + { + "epoch": 0.25311392752821016, + "grad_norm": 708.1193237304688, + "learning_rate": 5.760399125128102e-06, + "loss": 47.0046, + "step": 62650 + }, + { + "epoch": 0.2531543287935778, + "grad_norm": 448.0501403808594, + "learning_rate": 5.759005132911233e-06, + "loss": 61.7254, + "step": 62660 + }, + { + "epoch": 0.25319473005894544, + "grad_norm": 955.7879028320312, + "learning_rate": 5.75761108030381e-06, + "loss": 57.2991, + "step": 62670 + }, + { + "epoch": 0.2532351313243131, + "grad_norm": 1111.9713134765625, + "learning_rate": 5.756216967416749e-06, + "loss": 58.0533, + "step": 62680 + }, + { + "epoch": 0.2532755325896807, + "grad_norm": 564.1481323242188, + "learning_rate": 5.754822794360976e-06, + "loss": 46.9675, + "step": 62690 + }, + { + "epoch": 0.25331593385504836, + "grad_norm": 683.9739990234375, + "learning_rate": 5.753428561247416e-06, + "loss": 42.0367, + "step": 62700 + }, + { + "epoch": 0.253356335120416, + "grad_norm": 409.0925598144531, + "learning_rate": 5.752034268187005e-06, + "loss": 27.2585, + "step": 62710 + }, + { + "epoch": 0.2533967363857836, + "grad_norm": 356.8011779785156, + "learning_rate": 5.750639915290677e-06, + "loss": 32.9736, + "step": 62720 + }, + { + "epoch": 0.2534371376511512, + "grad_norm": 684.747802734375, + "learning_rate": 5.749245502669375e-06, + "loss": 41.3751, + "step": 62730 + }, + { + "epoch": 0.25347753891651886, + "grad_norm": 872.2908935546875, + "learning_rate": 5.747851030434049e-06, + "loss": 63.0237, + "step": 62740 + }, + { + "epoch": 0.2535179401818865, + "grad_norm": 647.8345336914062, + "learning_rate": 5.746456498695648e-06, + "loss": 51.3102, + "step": 62750 + }, + { + "epoch": 0.25355834144725414, + "grad_norm": 264.8699035644531, + "learning_rate": 5.7450619075651305e-06, + "loss": 36.7021, + "step": 62760 + }, + { + "epoch": 0.2535987427126218, + "grad_norm": 1874.4996337890625, + "learning_rate": 5.743667257153454e-06, + "loss": 83.4588, + "step": 62770 + }, + { + "epoch": 0.25363914397798937, + "grad_norm": 350.74017333984375, + "learning_rate": 5.742272547571588e-06, + "loss": 37.704, + "step": 62780 + }, + { + "epoch": 0.253679545243357, + "grad_norm": 151.1192169189453, + "learning_rate": 5.740877778930503e-06, + "loss": 35.6705, + "step": 62790 + }, + { + "epoch": 0.25371994650872465, + "grad_norm": 1044.9840087890625, + "learning_rate": 5.739482951341172e-06, + "loss": 68.5232, + "step": 62800 + }, + { + "epoch": 0.2537603477740923, + "grad_norm": 796.45751953125, + "learning_rate": 5.738088064914576e-06, + "loss": 42.7602, + "step": 62810 + }, + { + "epoch": 0.2538007490394599, + "grad_norm": 487.5058288574219, + "learning_rate": 5.7366931197617e-06, + "loss": 34.9116, + "step": 62820 + }, + { + "epoch": 0.25384115030482757, + "grad_norm": 398.38519287109375, + "learning_rate": 5.735298115993535e-06, + "loss": 51.7369, + "step": 62830 + }, + { + "epoch": 0.25388155157019515, + "grad_norm": 546.4779663085938, + "learning_rate": 5.733903053721072e-06, + "loss": 48.8135, + "step": 62840 + }, + { + "epoch": 0.2539219528355628, + "grad_norm": 575.5430297851562, + "learning_rate": 5.732507933055311e-06, + "loss": 44.4907, + "step": 62850 + }, + { + "epoch": 0.25396235410093043, + "grad_norm": 727.0908813476562, + "learning_rate": 5.731112754107257e-06, + "loss": 57.1689, + "step": 62860 + }, + { + "epoch": 0.25400275536629807, + "grad_norm": 836.796875, + "learning_rate": 5.729717516987916e-06, + "loss": 36.4253, + "step": 62870 + }, + { + "epoch": 0.2540431566316657, + "grad_norm": 412.9502258300781, + "learning_rate": 5.7283222218083e-06, + "loss": 51.407, + "step": 62880 + }, + { + "epoch": 0.25408355789703335, + "grad_norm": 531.1953735351562, + "learning_rate": 5.726926868679429e-06, + "loss": 41.1174, + "step": 62890 + }, + { + "epoch": 0.254123959162401, + "grad_norm": 586.2398071289062, + "learning_rate": 5.725531457712321e-06, + "loss": 51.4301, + "step": 62900 + }, + { + "epoch": 0.2541643604277686, + "grad_norm": 497.56488037109375, + "learning_rate": 5.724135989018007e-06, + "loss": 61.23, + "step": 62910 + }, + { + "epoch": 0.2542047616931362, + "grad_norm": 604.0518798828125, + "learning_rate": 5.722740462707515e-06, + "loss": 55.4451, + "step": 62920 + }, + { + "epoch": 0.25424516295850386, + "grad_norm": 517.4009399414062, + "learning_rate": 5.72134487889188e-06, + "loss": 34.8546, + "step": 62930 + }, + { + "epoch": 0.2542855642238715, + "grad_norm": 219.57757568359375, + "learning_rate": 5.719949237682145e-06, + "loss": 51.7369, + "step": 62940 + }, + { + "epoch": 0.25432596548923914, + "grad_norm": 404.9817810058594, + "learning_rate": 5.718553539189353e-06, + "loss": 56.0131, + "step": 62950 + }, + { + "epoch": 0.2543663667546068, + "grad_norm": 568.6887817382812, + "learning_rate": 5.717157783524553e-06, + "loss": 38.1931, + "step": 62960 + }, + { + "epoch": 0.25440676801997436, + "grad_norm": 579.7442626953125, + "learning_rate": 5.7157619707988e-06, + "loss": 52.0563, + "step": 62970 + }, + { + "epoch": 0.254447169285342, + "grad_norm": 423.65606689453125, + "learning_rate": 5.714366101123152e-06, + "loss": 31.922, + "step": 62980 + }, + { + "epoch": 0.25448757055070964, + "grad_norm": 58.358238220214844, + "learning_rate": 5.712970174608671e-06, + "loss": 40.1323, + "step": 62990 + }, + { + "epoch": 0.2545279718160773, + "grad_norm": 1131.2806396484375, + "learning_rate": 5.711574191366427e-06, + "loss": 60.6359, + "step": 63000 + }, + { + "epoch": 0.2545683730814449, + "grad_norm": 706.688232421875, + "learning_rate": 5.710178151507488e-06, + "loss": 46.877, + "step": 63010 + }, + { + "epoch": 0.25460877434681256, + "grad_norm": 465.1105041503906, + "learning_rate": 5.708782055142934e-06, + "loss": 29.5096, + "step": 63020 + }, + { + "epoch": 0.2546491756121802, + "grad_norm": 686.3890991210938, + "learning_rate": 5.707385902383845e-06, + "loss": 38.0391, + "step": 63030 + }, + { + "epoch": 0.2546895768775478, + "grad_norm": 522.968994140625, + "learning_rate": 5.7059896933413076e-06, + "loss": 43.3018, + "step": 63040 + }, + { + "epoch": 0.2547299781429154, + "grad_norm": 521.99951171875, + "learning_rate": 5.7045934281264085e-06, + "loss": 40.3964, + "step": 63050 + }, + { + "epoch": 0.25477037940828307, + "grad_norm": 600.0154418945312, + "learning_rate": 5.7031971068502425e-06, + "loss": 39.3763, + "step": 63060 + }, + { + "epoch": 0.2548107806736507, + "grad_norm": 262.6509704589844, + "learning_rate": 5.701800729623911e-06, + "loss": 34.9332, + "step": 63070 + }, + { + "epoch": 0.25485118193901835, + "grad_norm": 471.0314025878906, + "learning_rate": 5.700404296558518e-06, + "loss": 43.9025, + "step": 63080 + }, + { + "epoch": 0.254891583204386, + "grad_norm": 612.1912231445312, + "learning_rate": 5.699007807765169e-06, + "loss": 31.0212, + "step": 63090 + }, + { + "epoch": 0.25493198446975357, + "grad_norm": 760.9694213867188, + "learning_rate": 5.6976112633549764e-06, + "loss": 56.0173, + "step": 63100 + }, + { + "epoch": 0.2549723857351212, + "grad_norm": 692.7157592773438, + "learning_rate": 5.696214663439055e-06, + "loss": 41.3706, + "step": 63110 + }, + { + "epoch": 0.25501278700048885, + "grad_norm": 385.0296630859375, + "learning_rate": 5.694818008128531e-06, + "loss": 46.966, + "step": 63120 + }, + { + "epoch": 0.2550531882658565, + "grad_norm": 423.5424499511719, + "learning_rate": 5.693421297534526e-06, + "loss": 40.4821, + "step": 63130 + }, + { + "epoch": 0.25509358953122413, + "grad_norm": 817.661376953125, + "learning_rate": 5.69202453176817e-06, + "loss": 55.8041, + "step": 63140 + }, + { + "epoch": 0.25513399079659177, + "grad_norm": 604.6948852539062, + "learning_rate": 5.6906277109406e-06, + "loss": 41.3391, + "step": 63150 + }, + { + "epoch": 0.25517439206195935, + "grad_norm": 427.2970886230469, + "learning_rate": 5.689230835162949e-06, + "loss": 36.6582, + "step": 63160 + }, + { + "epoch": 0.255214793327327, + "grad_norm": 563.9060668945312, + "learning_rate": 5.687833904546367e-06, + "loss": 53.3264, + "step": 63170 + }, + { + "epoch": 0.25525519459269463, + "grad_norm": 569.6155395507812, + "learning_rate": 5.686436919201996e-06, + "loss": 29.1016, + "step": 63180 + }, + { + "epoch": 0.2552955958580623, + "grad_norm": 760.7314453125, + "learning_rate": 5.68503987924099e-06, + "loss": 51.8447, + "step": 63190 + }, + { + "epoch": 0.2553359971234299, + "grad_norm": 929.677734375, + "learning_rate": 5.683642784774506e-06, + "loss": 50.9056, + "step": 63200 + }, + { + "epoch": 0.25537639838879755, + "grad_norm": 324.9360656738281, + "learning_rate": 5.682245635913701e-06, + "loss": 33.1183, + "step": 63210 + }, + { + "epoch": 0.2554167996541652, + "grad_norm": 309.93939208984375, + "learning_rate": 5.680848432769743e-06, + "loss": 61.7535, + "step": 63220 + }, + { + "epoch": 0.2554572009195328, + "grad_norm": 295.8665771484375, + "learning_rate": 5.6794511754538005e-06, + "loss": 44.2926, + "step": 63230 + }, + { + "epoch": 0.2554976021849004, + "grad_norm": 474.9871826171875, + "learning_rate": 5.6780538640770455e-06, + "loss": 53.1915, + "step": 63240 + }, + { + "epoch": 0.25553800345026806, + "grad_norm": 822.3848876953125, + "learning_rate": 5.6766564987506564e-06, + "loss": 35.7922, + "step": 63250 + }, + { + "epoch": 0.2555784047156357, + "grad_norm": 837.141357421875, + "learning_rate": 5.675259079585816e-06, + "loss": 44.0483, + "step": 63260 + }, + { + "epoch": 0.25561880598100334, + "grad_norm": 708.4387817382812, + "learning_rate": 5.673861606693708e-06, + "loss": 49.6165, + "step": 63270 + }, + { + "epoch": 0.255659207246371, + "grad_norm": 714.2989501953125, + "learning_rate": 5.672464080185526e-06, + "loss": 58.6901, + "step": 63280 + }, + { + "epoch": 0.25569960851173856, + "grad_norm": 832.5451049804688, + "learning_rate": 5.671066500172462e-06, + "loss": 44.7032, + "step": 63290 + }, + { + "epoch": 0.2557400097771062, + "grad_norm": 330.7470703125, + "learning_rate": 5.669668866765717e-06, + "loss": 47.6335, + "step": 63300 + }, + { + "epoch": 0.25578041104247384, + "grad_norm": 682.3972778320312, + "learning_rate": 5.6682711800764935e-06, + "loss": 38.2685, + "step": 63310 + }, + { + "epoch": 0.2558208123078415, + "grad_norm": 819.9092407226562, + "learning_rate": 5.6668734402159994e-06, + "loss": 49.0632, + "step": 63320 + }, + { + "epoch": 0.2558612135732091, + "grad_norm": 716.800048828125, + "learning_rate": 5.6654756472954464e-06, + "loss": 42.4433, + "step": 63330 + }, + { + "epoch": 0.25590161483857676, + "grad_norm": 730.6797485351562, + "learning_rate": 5.66407780142605e-06, + "loss": 39.3221, + "step": 63340 + }, + { + "epoch": 0.2559420161039444, + "grad_norm": 726.9446411132812, + "learning_rate": 5.66267990271903e-06, + "loss": 30.0828, + "step": 63350 + }, + { + "epoch": 0.255982417369312, + "grad_norm": 454.9652404785156, + "learning_rate": 5.661281951285613e-06, + "loss": 37.4903, + "step": 63360 + }, + { + "epoch": 0.2560228186346796, + "grad_norm": 798.0877075195312, + "learning_rate": 5.6598839472370245e-06, + "loss": 34.9171, + "step": 63370 + }, + { + "epoch": 0.25606321990004727, + "grad_norm": 621.2152099609375, + "learning_rate": 5.6584858906845e-06, + "loss": 48.2733, + "step": 63380 + }, + { + "epoch": 0.2561036211654149, + "grad_norm": 580.8334350585938, + "learning_rate": 5.657087781739274e-06, + "loss": 51.7214, + "step": 63390 + }, + { + "epoch": 0.25614402243078255, + "grad_norm": 728.9893188476562, + "learning_rate": 5.6556896205125896e-06, + "loss": 50.1375, + "step": 63400 + }, + { + "epoch": 0.2561844236961502, + "grad_norm": 608.4055786132812, + "learning_rate": 5.654291407115692e-06, + "loss": 37.3258, + "step": 63410 + }, + { + "epoch": 0.25622482496151777, + "grad_norm": 480.6260681152344, + "learning_rate": 5.652893141659829e-06, + "loss": 45.5747, + "step": 63420 + }, + { + "epoch": 0.2562652262268854, + "grad_norm": 906.700927734375, + "learning_rate": 5.651494824256256e-06, + "loss": 42.8529, + "step": 63430 + }, + { + "epoch": 0.25630562749225305, + "grad_norm": 660.1846923828125, + "learning_rate": 5.650096455016227e-06, + "loss": 44.0007, + "step": 63440 + }, + { + "epoch": 0.2563460287576207, + "grad_norm": 352.1722412109375, + "learning_rate": 5.648698034051009e-06, + "loss": 41.2812, + "step": 63450 + }, + { + "epoch": 0.25638643002298833, + "grad_norm": 862.4525146484375, + "learning_rate": 5.647299561471865e-06, + "loss": 94.515, + "step": 63460 + }, + { + "epoch": 0.25642683128835597, + "grad_norm": 502.7945556640625, + "learning_rate": 5.645901037390067e-06, + "loss": 53.6442, + "step": 63470 + }, + { + "epoch": 0.25646723255372356, + "grad_norm": 263.6398620605469, + "learning_rate": 5.644502461916886e-06, + "loss": 54.9007, + "step": 63480 + }, + { + "epoch": 0.2565076338190912, + "grad_norm": 570.159423828125, + "learning_rate": 5.643103835163602e-06, + "loss": 60.2439, + "step": 63490 + }, + { + "epoch": 0.25654803508445884, + "grad_norm": 954.9754638671875, + "learning_rate": 5.641705157241497e-06, + "loss": 42.2908, + "step": 63500 + }, + { + "epoch": 0.2565884363498265, + "grad_norm": 841.0116577148438, + "learning_rate": 5.64030642826186e-06, + "loss": 53.6023, + "step": 63510 + }, + { + "epoch": 0.2566288376151941, + "grad_norm": 349.643310546875, + "learning_rate": 5.6389076483359774e-06, + "loss": 45.1764, + "step": 63520 + }, + { + "epoch": 0.25666923888056176, + "grad_norm": 291.5067138671875, + "learning_rate": 5.637508817575145e-06, + "loss": 58.82, + "step": 63530 + }, + { + "epoch": 0.2567096401459294, + "grad_norm": 462.4788818359375, + "learning_rate": 5.636109936090661e-06, + "loss": 38.8552, + "step": 63540 + }, + { + "epoch": 0.256750041411297, + "grad_norm": 437.5688171386719, + "learning_rate": 5.634711003993832e-06, + "loss": 44.9762, + "step": 63550 + }, + { + "epoch": 0.2567904426766646, + "grad_norm": 559.8381958007812, + "learning_rate": 5.633312021395959e-06, + "loss": 51.4478, + "step": 63560 + }, + { + "epoch": 0.25683084394203226, + "grad_norm": 442.395263671875, + "learning_rate": 5.631912988408356e-06, + "loss": 31.9547, + "step": 63570 + }, + { + "epoch": 0.2568712452073999, + "grad_norm": 431.67193603515625, + "learning_rate": 5.630513905142334e-06, + "loss": 45.5192, + "step": 63580 + }, + { + "epoch": 0.25691164647276754, + "grad_norm": 524.9291381835938, + "learning_rate": 5.629114771709217e-06, + "loss": 42.338, + "step": 63590 + }, + { + "epoch": 0.2569520477381352, + "grad_norm": 986.5203857421875, + "learning_rate": 5.627715588220325e-06, + "loss": 46.758, + "step": 63600 + }, + { + "epoch": 0.25699244900350277, + "grad_norm": 263.0068664550781, + "learning_rate": 5.626316354786982e-06, + "loss": 54.736, + "step": 63610 + }, + { + "epoch": 0.2570328502688704, + "grad_norm": 528.8858642578125, + "learning_rate": 5.624917071520524e-06, + "loss": 46.2457, + "step": 63620 + }, + { + "epoch": 0.25707325153423805, + "grad_norm": 592.2925415039062, + "learning_rate": 5.62351773853228e-06, + "loss": 50.5073, + "step": 63630 + }, + { + "epoch": 0.2571136527996057, + "grad_norm": 473.4315490722656, + "learning_rate": 5.6221183559335935e-06, + "loss": 40.8695, + "step": 63640 + }, + { + "epoch": 0.2571540540649733, + "grad_norm": 681.96435546875, + "learning_rate": 5.6207189238358025e-06, + "loss": 40.739, + "step": 63650 + }, + { + "epoch": 0.25719445533034097, + "grad_norm": 411.7383728027344, + "learning_rate": 5.619319442350256e-06, + "loss": 39.9328, + "step": 63660 + }, + { + "epoch": 0.2572348565957086, + "grad_norm": 375.4176330566406, + "learning_rate": 5.617919911588304e-06, + "loss": 47.2908, + "step": 63670 + }, + { + "epoch": 0.2572752578610762, + "grad_norm": 451.3565979003906, + "learning_rate": 5.616520331661301e-06, + "loss": 42.4083, + "step": 63680 + }, + { + "epoch": 0.25731565912644383, + "grad_norm": 469.7231750488281, + "learning_rate": 5.615120702680604e-06, + "loss": 31.2935, + "step": 63690 + }, + { + "epoch": 0.25735606039181147, + "grad_norm": 539.3211669921875, + "learning_rate": 5.6137210247575754e-06, + "loss": 54.5643, + "step": 63700 + }, + { + "epoch": 0.2573964616571791, + "grad_norm": 791.468505859375, + "learning_rate": 5.6123212980035825e-06, + "loss": 50.1051, + "step": 63710 + }, + { + "epoch": 0.25743686292254675, + "grad_norm": 847.0855712890625, + "learning_rate": 5.610921522529994e-06, + "loss": 67.9444, + "step": 63720 + }, + { + "epoch": 0.2574772641879144, + "grad_norm": 657.5780639648438, + "learning_rate": 5.609521698448183e-06, + "loss": 53.8216, + "step": 63730 + }, + { + "epoch": 0.257517665453282, + "grad_norm": 761.1411743164062, + "learning_rate": 5.608121825869528e-06, + "loss": 53.258, + "step": 63740 + }, + { + "epoch": 0.2575580667186496, + "grad_norm": 774.0830688476562, + "learning_rate": 5.60672190490541e-06, + "loss": 38.646, + "step": 63750 + }, + { + "epoch": 0.25759846798401725, + "grad_norm": 567.2658081054688, + "learning_rate": 5.6053219356672155e-06, + "loss": 52.5933, + "step": 63760 + }, + { + "epoch": 0.2576388692493849, + "grad_norm": 410.2081298828125, + "learning_rate": 5.603921918266332e-06, + "loss": 37.3336, + "step": 63770 + }, + { + "epoch": 0.25767927051475253, + "grad_norm": 1132.9581298828125, + "learning_rate": 5.602521852814152e-06, + "loss": 65.356, + "step": 63780 + }, + { + "epoch": 0.2577196717801202, + "grad_norm": 413.339599609375, + "learning_rate": 5.6011217394220755e-06, + "loss": 62.1223, + "step": 63790 + }, + { + "epoch": 0.25776007304548776, + "grad_norm": 771.2860107421875, + "learning_rate": 5.599721578201499e-06, + "loss": 43.2339, + "step": 63800 + }, + { + "epoch": 0.2578004743108554, + "grad_norm": 581.4656372070312, + "learning_rate": 5.59832136926383e-06, + "loss": 38.4429, + "step": 63810 + }, + { + "epoch": 0.25784087557622304, + "grad_norm": 510.95404052734375, + "learning_rate": 5.5969211127204744e-06, + "loss": 29.4889, + "step": 63820 + }, + { + "epoch": 0.2578812768415907, + "grad_norm": 1371.288818359375, + "learning_rate": 5.595520808682848e-06, + "loss": 61.4885, + "step": 63830 + }, + { + "epoch": 0.2579216781069583, + "grad_norm": 395.72900390625, + "learning_rate": 5.594120457262361e-06, + "loss": 42.5042, + "step": 63840 + }, + { + "epoch": 0.25796207937232596, + "grad_norm": 435.349365234375, + "learning_rate": 5.592720058570438e-06, + "loss": 41.518, + "step": 63850 + }, + { + "epoch": 0.2580024806376936, + "grad_norm": 511.0625915527344, + "learning_rate": 5.591319612718498e-06, + "loss": 40.8742, + "step": 63860 + }, + { + "epoch": 0.2580428819030612, + "grad_norm": 502.5708312988281, + "learning_rate": 5.589919119817971e-06, + "loss": 53.2404, + "step": 63870 + }, + { + "epoch": 0.2580832831684288, + "grad_norm": 723.0562133789062, + "learning_rate": 5.588518579980288e-06, + "loss": 40.6113, + "step": 63880 + }, + { + "epoch": 0.25812368443379646, + "grad_norm": 1327.1318359375, + "learning_rate": 5.587117993316882e-06, + "loss": 37.1122, + "step": 63890 + }, + { + "epoch": 0.2581640856991641, + "grad_norm": 433.02532958984375, + "learning_rate": 5.585717359939192e-06, + "loss": 31.3232, + "step": 63900 + }, + { + "epoch": 0.25820448696453174, + "grad_norm": 538.7310791015625, + "learning_rate": 5.584316679958659e-06, + "loss": 29.2123, + "step": 63910 + }, + { + "epoch": 0.2582448882298994, + "grad_norm": 606.9814453125, + "learning_rate": 5.58291595348673e-06, + "loss": 46.0158, + "step": 63920 + }, + { + "epoch": 0.25828528949526697, + "grad_norm": 933.4443359375, + "learning_rate": 5.581515180634853e-06, + "loss": 41.5524, + "step": 63930 + }, + { + "epoch": 0.2583256907606346, + "grad_norm": 620.8282470703125, + "learning_rate": 5.580114361514484e-06, + "loss": 38.4091, + "step": 63940 + }, + { + "epoch": 0.25836609202600225, + "grad_norm": 478.485107421875, + "learning_rate": 5.5787134962370755e-06, + "loss": 35.9659, + "step": 63950 + }, + { + "epoch": 0.2584064932913699, + "grad_norm": 771.5250854492188, + "learning_rate": 5.57731258491409e-06, + "loss": 50.4317, + "step": 63960 + }, + { + "epoch": 0.2584468945567375, + "grad_norm": 213.68081665039062, + "learning_rate": 5.575911627656993e-06, + "loss": 30.2937, + "step": 63970 + }, + { + "epoch": 0.25848729582210517, + "grad_norm": 783.5086669921875, + "learning_rate": 5.5745106245772506e-06, + "loss": 54.8814, + "step": 63980 + }, + { + "epoch": 0.25852769708747275, + "grad_norm": 498.461669921875, + "learning_rate": 5.573109575786334e-06, + "loss": 32.4557, + "step": 63990 + }, + { + "epoch": 0.2585680983528404, + "grad_norm": 562.4568481445312, + "learning_rate": 5.571708481395719e-06, + "loss": 39.6354, + "step": 64000 + }, + { + "epoch": 0.25860849961820803, + "grad_norm": 724.8565673828125, + "learning_rate": 5.570307341516882e-06, + "loss": 42.1159, + "step": 64010 + }, + { + "epoch": 0.25864890088357567, + "grad_norm": 687.0962524414062, + "learning_rate": 5.568906156261309e-06, + "loss": 45.9281, + "step": 64020 + }, + { + "epoch": 0.2586893021489433, + "grad_norm": 924.2523803710938, + "learning_rate": 5.567504925740484e-06, + "loss": 36.815, + "step": 64030 + }, + { + "epoch": 0.25872970341431095, + "grad_norm": 449.19097900390625, + "learning_rate": 5.566103650065897e-06, + "loss": 45.8584, + "step": 64040 + }, + { + "epoch": 0.2587701046796786, + "grad_norm": 406.7481689453125, + "learning_rate": 5.564702329349041e-06, + "loss": 43.2192, + "step": 64050 + }, + { + "epoch": 0.2588105059450462, + "grad_norm": 845.9132080078125, + "learning_rate": 5.56330096370141e-06, + "loss": 57.1466, + "step": 64060 + }, + { + "epoch": 0.2588509072104138, + "grad_norm": 503.916748046875, + "learning_rate": 5.561899553234509e-06, + "loss": 43.5526, + "step": 64070 + }, + { + "epoch": 0.25889130847578146, + "grad_norm": 410.317138671875, + "learning_rate": 5.560498098059838e-06, + "loss": 39.6964, + "step": 64080 + }, + { + "epoch": 0.2589317097411491, + "grad_norm": 661.5330810546875, + "learning_rate": 5.559096598288906e-06, + "loss": 43.7603, + "step": 64090 + }, + { + "epoch": 0.25897211100651674, + "grad_norm": 1017.6605834960938, + "learning_rate": 5.557695054033223e-06, + "loss": 42.1348, + "step": 64100 + }, + { + "epoch": 0.2590125122718844, + "grad_norm": 738.2880859375, + "learning_rate": 5.556293465404304e-06, + "loss": 52.5627, + "step": 64110 + }, + { + "epoch": 0.25905291353725196, + "grad_norm": 281.1907043457031, + "learning_rate": 5.554891832513668e-06, + "loss": 44.3268, + "step": 64120 + }, + { + "epoch": 0.2590933148026196, + "grad_norm": 557.4099731445312, + "learning_rate": 5.553490155472835e-06, + "loss": 53.3458, + "step": 64130 + }, + { + "epoch": 0.25913371606798724, + "grad_norm": 569.7567138671875, + "learning_rate": 5.55208843439333e-06, + "loss": 46.0401, + "step": 64140 + }, + { + "epoch": 0.2591741173333549, + "grad_norm": 503.8868713378906, + "learning_rate": 5.550686669386683e-06, + "loss": 41.1913, + "step": 64150 + }, + { + "epoch": 0.2592145185987225, + "grad_norm": 928.1849975585938, + "learning_rate": 5.549284860564425e-06, + "loss": 31.0713, + "step": 64160 + }, + { + "epoch": 0.25925491986409016, + "grad_norm": 339.114013671875, + "learning_rate": 5.547883008038091e-06, + "loss": 33.9224, + "step": 64170 + }, + { + "epoch": 0.2592953211294578, + "grad_norm": 647.3558349609375, + "learning_rate": 5.54648111191922e-06, + "loss": 52.8394, + "step": 64180 + }, + { + "epoch": 0.2593357223948254, + "grad_norm": 598.630859375, + "learning_rate": 5.545079172319355e-06, + "loss": 60.7535, + "step": 64190 + }, + { + "epoch": 0.259376123660193, + "grad_norm": 746.982666015625, + "learning_rate": 5.543677189350043e-06, + "loss": 39.941, + "step": 64200 + }, + { + "epoch": 0.25941652492556067, + "grad_norm": 382.3617858886719, + "learning_rate": 5.542275163122831e-06, + "loss": 42.0451, + "step": 64210 + }, + { + "epoch": 0.2594569261909283, + "grad_norm": 486.2451171875, + "learning_rate": 5.540873093749274e-06, + "loss": 34.248, + "step": 64220 + }, + { + "epoch": 0.25949732745629595, + "grad_norm": 392.88134765625, + "learning_rate": 5.539470981340926e-06, + "loss": 56.3614, + "step": 64230 + }, + { + "epoch": 0.2595377287216636, + "grad_norm": 1441.9869384765625, + "learning_rate": 5.538068826009349e-06, + "loss": 52.6453, + "step": 64240 + }, + { + "epoch": 0.25957812998703117, + "grad_norm": 695.2682495117188, + "learning_rate": 5.536666627866104e-06, + "loss": 56.2888, + "step": 64250 + }, + { + "epoch": 0.2596185312523988, + "grad_norm": 629.8070678710938, + "learning_rate": 5.53526438702276e-06, + "loss": 58.3548, + "step": 64260 + }, + { + "epoch": 0.25965893251776645, + "grad_norm": 518.64013671875, + "learning_rate": 5.533862103590883e-06, + "loss": 53.5079, + "step": 64270 + }, + { + "epoch": 0.2596993337831341, + "grad_norm": 733.4268188476562, + "learning_rate": 5.532459777682051e-06, + "loss": 56.0399, + "step": 64280 + }, + { + "epoch": 0.25973973504850173, + "grad_norm": 455.0276184082031, + "learning_rate": 5.5310574094078365e-06, + "loss": 41.9341, + "step": 64290 + }, + { + "epoch": 0.25978013631386937, + "grad_norm": 284.0489196777344, + "learning_rate": 5.529654998879821e-06, + "loss": 34.0637, + "step": 64300 + }, + { + "epoch": 0.25982053757923695, + "grad_norm": 0.0, + "learning_rate": 5.528252546209588e-06, + "loss": 37.8221, + "step": 64310 + }, + { + "epoch": 0.2598609388446046, + "grad_norm": 401.0384521484375, + "learning_rate": 5.526850051508725e-06, + "loss": 68.1539, + "step": 64320 + }, + { + "epoch": 0.25990134010997223, + "grad_norm": 904.4043579101562, + "learning_rate": 5.525447514888822e-06, + "loss": 50.8169, + "step": 64330 + }, + { + "epoch": 0.2599417413753399, + "grad_norm": 356.97705078125, + "learning_rate": 5.52404493646147e-06, + "loss": 41.6639, + "step": 64340 + }, + { + "epoch": 0.2599821426407075, + "grad_norm": 577.2202758789062, + "learning_rate": 5.522642316338268e-06, + "loss": 44.3976, + "step": 64350 + }, + { + "epoch": 0.26002254390607515, + "grad_norm": 219.48606872558594, + "learning_rate": 5.521239654630816e-06, + "loss": 45.1079, + "step": 64360 + }, + { + "epoch": 0.2600629451714428, + "grad_norm": 567.2487182617188, + "learning_rate": 5.519836951450716e-06, + "loss": 43.7239, + "step": 64370 + }, + { + "epoch": 0.2601033464368104, + "grad_norm": 759.8638305664062, + "learning_rate": 5.518434206909577e-06, + "loss": 49.3401, + "step": 64380 + }, + { + "epoch": 0.260143747702178, + "grad_norm": 494.2590637207031, + "learning_rate": 5.517031421119006e-06, + "loss": 40.7932, + "step": 64390 + }, + { + "epoch": 0.26018414896754566, + "grad_norm": 621.8897705078125, + "learning_rate": 5.5156285941906175e-06, + "loss": 58.2495, + "step": 64400 + }, + { + "epoch": 0.2602245502329133, + "grad_norm": 336.2132263183594, + "learning_rate": 5.51422572623603e-06, + "loss": 41.9188, + "step": 64410 + }, + { + "epoch": 0.26026495149828094, + "grad_norm": 544.7830200195312, + "learning_rate": 5.512822817366859e-06, + "loss": 32.5637, + "step": 64420 + }, + { + "epoch": 0.2603053527636486, + "grad_norm": 395.4822082519531, + "learning_rate": 5.511419867694733e-06, + "loss": 38.594, + "step": 64430 + }, + { + "epoch": 0.26034575402901616, + "grad_norm": 653.3141479492188, + "learning_rate": 5.510016877331271e-06, + "loss": 62.9466, + "step": 64440 + }, + { + "epoch": 0.2603861552943838, + "grad_norm": 697.96826171875, + "learning_rate": 5.50861384638811e-06, + "loss": 40.165, + "step": 64450 + }, + { + "epoch": 0.26042655655975144, + "grad_norm": 451.45068359375, + "learning_rate": 5.50721077497688e-06, + "loss": 40.0697, + "step": 64460 + }, + { + "epoch": 0.2604669578251191, + "grad_norm": 496.3128662109375, + "learning_rate": 5.505807663209215e-06, + "loss": 42.665, + "step": 64470 + }, + { + "epoch": 0.2605073590904867, + "grad_norm": 808.9034423828125, + "learning_rate": 5.504404511196755e-06, + "loss": 33.7379, + "step": 64480 + }, + { + "epoch": 0.26054776035585436, + "grad_norm": 502.8928527832031, + "learning_rate": 5.503001319051142e-06, + "loss": 52.4628, + "step": 64490 + }, + { + "epoch": 0.260588161621222, + "grad_norm": 700.6588745117188, + "learning_rate": 5.5015980868840254e-06, + "loss": 44.8512, + "step": 64500 + }, + { + "epoch": 0.2606285628865896, + "grad_norm": 538.7782592773438, + "learning_rate": 5.500194814807051e-06, + "loss": 54.6171, + "step": 64510 + }, + { + "epoch": 0.2606689641519572, + "grad_norm": 871.9270629882812, + "learning_rate": 5.498791502931868e-06, + "loss": 57.9137, + "step": 64520 + }, + { + "epoch": 0.26070936541732487, + "grad_norm": 764.1802368164062, + "learning_rate": 5.497388151370136e-06, + "loss": 64.9328, + "step": 64530 + }, + { + "epoch": 0.2607497666826925, + "grad_norm": 487.97076416015625, + "learning_rate": 5.495984760233511e-06, + "loss": 46.9365, + "step": 64540 + }, + { + "epoch": 0.26079016794806015, + "grad_norm": 731.7493896484375, + "learning_rate": 5.494581329633656e-06, + "loss": 71.8972, + "step": 64550 + }, + { + "epoch": 0.2608305692134278, + "grad_norm": 730.07861328125, + "learning_rate": 5.493177859682234e-06, + "loss": 64.3233, + "step": 64560 + }, + { + "epoch": 0.26087097047879537, + "grad_norm": 439.54656982421875, + "learning_rate": 5.491774350490912e-06, + "loss": 39.8704, + "step": 64570 + }, + { + "epoch": 0.260911371744163, + "grad_norm": 558.0665283203125, + "learning_rate": 5.490370802171362e-06, + "loss": 45.7664, + "step": 64580 + }, + { + "epoch": 0.26095177300953065, + "grad_norm": 517.4381713867188, + "learning_rate": 5.488967214835259e-06, + "loss": 38.9769, + "step": 64590 + }, + { + "epoch": 0.2609921742748983, + "grad_norm": 306.56439208984375, + "learning_rate": 5.487563588594278e-06, + "loss": 60.6436, + "step": 64600 + }, + { + "epoch": 0.26103257554026593, + "grad_norm": 289.06494140625, + "learning_rate": 5.4861599235601e-06, + "loss": 47.6477, + "step": 64610 + }, + { + "epoch": 0.26107297680563357, + "grad_norm": 446.7611083984375, + "learning_rate": 5.484756219844408e-06, + "loss": 63.2884, + "step": 64620 + }, + { + "epoch": 0.26111337807100116, + "grad_norm": 599.0672607421875, + "learning_rate": 5.483352477558889e-06, + "loss": 36.3158, + "step": 64630 + }, + { + "epoch": 0.2611537793363688, + "grad_norm": 531.96337890625, + "learning_rate": 5.48194869681523e-06, + "loss": 67.8228, + "step": 64640 + }, + { + "epoch": 0.26119418060173644, + "grad_norm": 485.176025390625, + "learning_rate": 5.480544877725127e-06, + "loss": 41.1143, + "step": 64650 + }, + { + "epoch": 0.2612345818671041, + "grad_norm": 217.22512817382812, + "learning_rate": 5.479141020400271e-06, + "loss": 36.4919, + "step": 64660 + }, + { + "epoch": 0.2612749831324717, + "grad_norm": 906.6617431640625, + "learning_rate": 5.477737124952366e-06, + "loss": 33.1911, + "step": 64670 + }, + { + "epoch": 0.26131538439783936, + "grad_norm": 629.6244506835938, + "learning_rate": 5.476333191493108e-06, + "loss": 38.5584, + "step": 64680 + }, + { + "epoch": 0.261355785663207, + "grad_norm": 404.6357421875, + "learning_rate": 5.474929220134205e-06, + "loss": 61.765, + "step": 64690 + }, + { + "epoch": 0.2613961869285746, + "grad_norm": 490.454345703125, + "learning_rate": 5.473525210987363e-06, + "loss": 63.0138, + "step": 64700 + }, + { + "epoch": 0.2614365881939422, + "grad_norm": 570.0198974609375, + "learning_rate": 5.472121164164295e-06, + "loss": 47.7393, + "step": 64710 + }, + { + "epoch": 0.26147698945930986, + "grad_norm": 475.7129821777344, + "learning_rate": 5.47071707977671e-06, + "loss": 30.8131, + "step": 64720 + }, + { + "epoch": 0.2615173907246775, + "grad_norm": 791.043212890625, + "learning_rate": 5.46931295793633e-06, + "loss": 53.3681, + "step": 64730 + }, + { + "epoch": 0.26155779199004514, + "grad_norm": 850.3129272460938, + "learning_rate": 5.46790879875487e-06, + "loss": 41.8812, + "step": 64740 + }, + { + "epoch": 0.2615981932554128, + "grad_norm": 735.2415771484375, + "learning_rate": 5.466504602344055e-06, + "loss": 49.5888, + "step": 64750 + }, + { + "epoch": 0.26163859452078037, + "grad_norm": 838.309814453125, + "learning_rate": 5.465100368815609e-06, + "loss": 32.8048, + "step": 64760 + }, + { + "epoch": 0.261678995786148, + "grad_norm": 426.8292541503906, + "learning_rate": 5.463696098281262e-06, + "loss": 49.3965, + "step": 64770 + }, + { + "epoch": 0.26171939705151565, + "grad_norm": 447.4613952636719, + "learning_rate": 5.462291790852744e-06, + "loss": 51.6955, + "step": 64780 + }, + { + "epoch": 0.2617597983168833, + "grad_norm": 713.3033447265625, + "learning_rate": 5.46088744664179e-06, + "loss": 43.2434, + "step": 64790 + }, + { + "epoch": 0.2618001995822509, + "grad_norm": 333.4674987792969, + "learning_rate": 5.459483065760138e-06, + "loss": 37.371, + "step": 64800 + }, + { + "epoch": 0.26184060084761857, + "grad_norm": 687.7012329101562, + "learning_rate": 5.458078648319526e-06, + "loss": 52.3234, + "step": 64810 + }, + { + "epoch": 0.2618810021129862, + "grad_norm": 554.49560546875, + "learning_rate": 5.456674194431698e-06, + "loss": 42.5262, + "step": 64820 + }, + { + "epoch": 0.2619214033783538, + "grad_norm": 389.0173645019531, + "learning_rate": 5.455269704208401e-06, + "loss": 34.1464, + "step": 64830 + }, + { + "epoch": 0.26196180464372143, + "grad_norm": 333.84906005859375, + "learning_rate": 5.453865177761384e-06, + "loss": 55.9638, + "step": 64840 + }, + { + "epoch": 0.26200220590908907, + "grad_norm": 480.15155029296875, + "learning_rate": 5.4524606152023975e-06, + "loss": 49.9895, + "step": 64850 + }, + { + "epoch": 0.2620426071744567, + "grad_norm": 240.2779541015625, + "learning_rate": 5.4510560166431935e-06, + "loss": 38.5802, + "step": 64860 + }, + { + "epoch": 0.26208300843982435, + "grad_norm": 437.9780578613281, + "learning_rate": 5.449651382195535e-06, + "loss": 37.2159, + "step": 64870 + }, + { + "epoch": 0.262123409705192, + "grad_norm": 667.8101196289062, + "learning_rate": 5.448246711971178e-06, + "loss": 64.4867, + "step": 64880 + }, + { + "epoch": 0.2621638109705596, + "grad_norm": 525.559326171875, + "learning_rate": 5.44684200608189e-06, + "loss": 41.0538, + "step": 64890 + }, + { + "epoch": 0.2622042122359272, + "grad_norm": 797.489013671875, + "learning_rate": 5.445437264639433e-06, + "loss": 40.2842, + "step": 64900 + }, + { + "epoch": 0.26224461350129485, + "grad_norm": 539.8469848632812, + "learning_rate": 5.444032487755575e-06, + "loss": 39.7015, + "step": 64910 + }, + { + "epoch": 0.2622850147666625, + "grad_norm": 598.1268310546875, + "learning_rate": 5.442627675542092e-06, + "loss": 38.8527, + "step": 64920 + }, + { + "epoch": 0.26232541603203013, + "grad_norm": 509.8528137207031, + "learning_rate": 5.441222828110756e-06, + "loss": 40.2096, + "step": 64930 + }, + { + "epoch": 0.2623658172973978, + "grad_norm": 793.486083984375, + "learning_rate": 5.439817945573345e-06, + "loss": 47.9723, + "step": 64940 + }, + { + "epoch": 0.26240621856276536, + "grad_norm": 1765.65771484375, + "learning_rate": 5.438413028041637e-06, + "loss": 53.3736, + "step": 64950 + }, + { + "epoch": 0.262446619828133, + "grad_norm": 871.4049682617188, + "learning_rate": 5.4370080756274155e-06, + "loss": 74.8709, + "step": 64960 + }, + { + "epoch": 0.26248702109350064, + "grad_norm": 340.2928771972656, + "learning_rate": 5.435603088442471e-06, + "loss": 45.8343, + "step": 64970 + }, + { + "epoch": 0.2625274223588683, + "grad_norm": 465.7066345214844, + "learning_rate": 5.434198066598585e-06, + "loss": 50.4548, + "step": 64980 + }, + { + "epoch": 0.2625678236242359, + "grad_norm": 299.4384460449219, + "learning_rate": 5.4327930102075525e-06, + "loss": 36.6418, + "step": 64990 + }, + { + "epoch": 0.26260822488960356, + "grad_norm": 597.560302734375, + "learning_rate": 5.431387919381166e-06, + "loss": 51.8495, + "step": 65000 + }, + { + "epoch": 0.2626486261549712, + "grad_norm": 583.4240112304688, + "learning_rate": 5.429982794231221e-06, + "loss": 42.3373, + "step": 65010 + }, + { + "epoch": 0.2626890274203388, + "grad_norm": 256.0840148925781, + "learning_rate": 5.428577634869521e-06, + "loss": 41.6526, + "step": 65020 + }, + { + "epoch": 0.2627294286857064, + "grad_norm": 569.7841186523438, + "learning_rate": 5.427172441407864e-06, + "loss": 29.5803, + "step": 65030 + }, + { + "epoch": 0.26276982995107406, + "grad_norm": 622.056884765625, + "learning_rate": 5.425767213958057e-06, + "loss": 55.9951, + "step": 65040 + }, + { + "epoch": 0.2628102312164417, + "grad_norm": 455.84320068359375, + "learning_rate": 5.424361952631907e-06, + "loss": 61.679, + "step": 65050 + }, + { + "epoch": 0.26285063248180934, + "grad_norm": 233.52798461914062, + "learning_rate": 5.422956657541224e-06, + "loss": 43.4715, + "step": 65060 + }, + { + "epoch": 0.262891033747177, + "grad_norm": 1021.7113037109375, + "learning_rate": 5.421551328797821e-06, + "loss": 47.4148, + "step": 65070 + }, + { + "epoch": 0.26293143501254457, + "grad_norm": 489.813232421875, + "learning_rate": 5.420145966513513e-06, + "loss": 41.6413, + "step": 65080 + }, + { + "epoch": 0.2629718362779122, + "grad_norm": 449.0360412597656, + "learning_rate": 5.418740570800117e-06, + "loss": 39.4725, + "step": 65090 + }, + { + "epoch": 0.26301223754327985, + "grad_norm": 737.3682250976562, + "learning_rate": 5.4173351417694575e-06, + "loss": 41.1901, + "step": 65100 + }, + { + "epoch": 0.2630526388086475, + "grad_norm": 249.3413848876953, + "learning_rate": 5.415929679533356e-06, + "loss": 28.5086, + "step": 65110 + }, + { + "epoch": 0.2630930400740151, + "grad_norm": 436.5331726074219, + "learning_rate": 5.414524184203638e-06, + "loss": 34.6025, + "step": 65120 + }, + { + "epoch": 0.26313344133938277, + "grad_norm": 617.8345947265625, + "learning_rate": 5.4131186558921335e-06, + "loss": 44.1296, + "step": 65130 + }, + { + "epoch": 0.2631738426047504, + "grad_norm": 291.6791076660156, + "learning_rate": 5.411713094710673e-06, + "loss": 39.0024, + "step": 65140 + }, + { + "epoch": 0.263214243870118, + "grad_norm": 1020.7041625976562, + "learning_rate": 5.410307500771092e-06, + "loss": 45.0434, + "step": 65150 + }, + { + "epoch": 0.26325464513548563, + "grad_norm": 360.7496643066406, + "learning_rate": 5.4089018741852264e-06, + "loss": 32.6432, + "step": 65160 + }, + { + "epoch": 0.26329504640085327, + "grad_norm": 577.2459716796875, + "learning_rate": 5.407496215064915e-06, + "loss": 56.6949, + "step": 65170 + }, + { + "epoch": 0.2633354476662209, + "grad_norm": 660.3246459960938, + "learning_rate": 5.406090523521999e-06, + "loss": 33.376, + "step": 65180 + }, + { + "epoch": 0.26337584893158855, + "grad_norm": 493.9849548339844, + "learning_rate": 5.404684799668325e-06, + "loss": 59.4395, + "step": 65190 + }, + { + "epoch": 0.2634162501969562, + "grad_norm": 951.3013916015625, + "learning_rate": 5.403279043615738e-06, + "loss": 43.7015, + "step": 65200 + }, + { + "epoch": 0.2634566514623238, + "grad_norm": 797.259765625, + "learning_rate": 5.4018732554760875e-06, + "loss": 53.7176, + "step": 65210 + }, + { + "epoch": 0.2634970527276914, + "grad_norm": 555.95166015625, + "learning_rate": 5.400467435361227e-06, + "loss": 39.394, + "step": 65220 + }, + { + "epoch": 0.26353745399305906, + "grad_norm": 962.2315063476562, + "learning_rate": 5.399061583383013e-06, + "loss": 50.1824, + "step": 65230 + }, + { + "epoch": 0.2635778552584267, + "grad_norm": 855.8276977539062, + "learning_rate": 5.3976556996532965e-06, + "loss": 41.223, + "step": 65240 + }, + { + "epoch": 0.26361825652379434, + "grad_norm": 402.3208923339844, + "learning_rate": 5.396249784283943e-06, + "loss": 43.6482, + "step": 65250 + }, + { + "epoch": 0.263658657789162, + "grad_norm": 281.7325744628906, + "learning_rate": 5.394843837386812e-06, + "loss": 41.1225, + "step": 65260 + }, + { + "epoch": 0.26369905905452956, + "grad_norm": 998.9751586914062, + "learning_rate": 5.39343785907377e-06, + "loss": 39.7433, + "step": 65270 + }, + { + "epoch": 0.2637394603198972, + "grad_norm": 305.1476745605469, + "learning_rate": 5.392031849456683e-06, + "loss": 41.8851, + "step": 65280 + }, + { + "epoch": 0.26377986158526484, + "grad_norm": 642.9463500976562, + "learning_rate": 5.39062580864742e-06, + "loss": 33.0668, + "step": 65290 + }, + { + "epoch": 0.2638202628506325, + "grad_norm": 427.274658203125, + "learning_rate": 5.3892197367578535e-06, + "loss": 34.9511, + "step": 65300 + }, + { + "epoch": 0.2638606641160001, + "grad_norm": 281.24468994140625, + "learning_rate": 5.38781363389986e-06, + "loss": 31.0351, + "step": 65310 + }, + { + "epoch": 0.26390106538136776, + "grad_norm": 542.3818969726562, + "learning_rate": 5.386407500185316e-06, + "loss": 50.7807, + "step": 65320 + }, + { + "epoch": 0.2639414666467354, + "grad_norm": 630.6954956054688, + "learning_rate": 5.3850013357261e-06, + "loss": 37.06, + "step": 65330 + }, + { + "epoch": 0.263981867912103, + "grad_norm": 566.5939331054688, + "learning_rate": 5.383595140634093e-06, + "loss": 40.1069, + "step": 65340 + }, + { + "epoch": 0.2640222691774706, + "grad_norm": 625.3710327148438, + "learning_rate": 5.382188915021182e-06, + "loss": 46.6871, + "step": 65350 + }, + { + "epoch": 0.26406267044283827, + "grad_norm": 676.7681274414062, + "learning_rate": 5.380782658999256e-06, + "loss": 45.0811, + "step": 65360 + }, + { + "epoch": 0.2641030717082059, + "grad_norm": 297.6866149902344, + "learning_rate": 5.379376372680199e-06, + "loss": 45.0418, + "step": 65370 + }, + { + "epoch": 0.26414347297357355, + "grad_norm": 513.9822998046875, + "learning_rate": 5.377970056175905e-06, + "loss": 57.6066, + "step": 65380 + }, + { + "epoch": 0.2641838742389412, + "grad_norm": 232.52333068847656, + "learning_rate": 5.376563709598267e-06, + "loss": 30.8778, + "step": 65390 + }, + { + "epoch": 0.26422427550430877, + "grad_norm": 665.5692749023438, + "learning_rate": 5.3751573330591855e-06, + "loss": 65.3008, + "step": 65400 + }, + { + "epoch": 0.2642646767696764, + "grad_norm": 789.9412841796875, + "learning_rate": 5.3737509266705555e-06, + "loss": 49.8782, + "step": 65410 + }, + { + "epoch": 0.26430507803504405, + "grad_norm": 532.4793090820312, + "learning_rate": 5.37234449054428e-06, + "loss": 29.4968, + "step": 65420 + }, + { + "epoch": 0.2643454793004117, + "grad_norm": 121.6689224243164, + "learning_rate": 5.370938024792262e-06, + "loss": 43.7967, + "step": 65430 + }, + { + "epoch": 0.26438588056577933, + "grad_norm": 1438.095458984375, + "learning_rate": 5.369531529526406e-06, + "loss": 49.5552, + "step": 65440 + }, + { + "epoch": 0.26442628183114697, + "grad_norm": 381.73846435546875, + "learning_rate": 5.3681250048586246e-06, + "loss": 39.8286, + "step": 65450 + }, + { + "epoch": 0.2644666830965146, + "grad_norm": 1419.7532958984375, + "learning_rate": 5.366718450900825e-06, + "loss": 47.5621, + "step": 65460 + }, + { + "epoch": 0.2645070843618822, + "grad_norm": 883.9287109375, + "learning_rate": 5.365311867764922e-06, + "loss": 44.4383, + "step": 65470 + }, + { + "epoch": 0.26454748562724983, + "grad_norm": 572.197509765625, + "learning_rate": 5.363905255562828e-06, + "loss": 54.7081, + "step": 65480 + }, + { + "epoch": 0.2645878868926175, + "grad_norm": 397.60784912109375, + "learning_rate": 5.362498614406466e-06, + "loss": 34.6413, + "step": 65490 + }, + { + "epoch": 0.2646282881579851, + "grad_norm": 561.8424682617188, + "learning_rate": 5.361091944407751e-06, + "loss": 33.9671, + "step": 65500 + }, + { + "epoch": 0.26466868942335275, + "grad_norm": 459.93792724609375, + "learning_rate": 5.3596852456786075e-06, + "loss": 40.9463, + "step": 65510 + }, + { + "epoch": 0.2647090906887204, + "grad_norm": 588.5122680664062, + "learning_rate": 5.35827851833096e-06, + "loss": 37.2149, + "step": 65520 + }, + { + "epoch": 0.264749491954088, + "grad_norm": 389.89019775390625, + "learning_rate": 5.356871762476735e-06, + "loss": 36.1884, + "step": 65530 + }, + { + "epoch": 0.2647898932194556, + "grad_norm": 387.892578125, + "learning_rate": 5.355464978227861e-06, + "loss": 35.113, + "step": 65540 + }, + { + "epoch": 0.26483029448482326, + "grad_norm": 621.6873168945312, + "learning_rate": 5.354058165696271e-06, + "loss": 35.7391, + "step": 65550 + }, + { + "epoch": 0.2648706957501909, + "grad_norm": 382.91497802734375, + "learning_rate": 5.352651324993897e-06, + "loss": 52.0832, + "step": 65560 + }, + { + "epoch": 0.26491109701555854, + "grad_norm": 902.5263061523438, + "learning_rate": 5.351244456232676e-06, + "loss": 51.0694, + "step": 65570 + }, + { + "epoch": 0.2649514982809262, + "grad_norm": 356.6571960449219, + "learning_rate": 5.349837559524546e-06, + "loss": 53.7859, + "step": 65580 + }, + { + "epoch": 0.26499189954629376, + "grad_norm": 486.88458251953125, + "learning_rate": 5.3484306349814455e-06, + "loss": 43.9213, + "step": 65590 + }, + { + "epoch": 0.2650323008116614, + "grad_norm": 660.9340209960938, + "learning_rate": 5.34702368271532e-06, + "loss": 58.3023, + "step": 65600 + }, + { + "epoch": 0.26507270207702904, + "grad_norm": 559.8971557617188, + "learning_rate": 5.345616702838111e-06, + "loss": 40.1814, + "step": 65610 + }, + { + "epoch": 0.2651131033423967, + "grad_norm": 541.8464965820312, + "learning_rate": 5.344209695461768e-06, + "loss": 35.0644, + "step": 65620 + }, + { + "epoch": 0.2651535046077643, + "grad_norm": 663.918701171875, + "learning_rate": 5.3428026606982396e-06, + "loss": 60.276, + "step": 65630 + }, + { + "epoch": 0.26519390587313196, + "grad_norm": 262.1153869628906, + "learning_rate": 5.341395598659477e-06, + "loss": 47.1914, + "step": 65640 + }, + { + "epoch": 0.2652343071384996, + "grad_norm": 1048.3524169921875, + "learning_rate": 5.339988509457432e-06, + "loss": 48.2345, + "step": 65650 + }, + { + "epoch": 0.2652747084038672, + "grad_norm": 839.9951171875, + "learning_rate": 5.338581393204064e-06, + "loss": 60.1043, + "step": 65660 + }, + { + "epoch": 0.2653151096692348, + "grad_norm": 798.4373779296875, + "learning_rate": 5.337174250011326e-06, + "loss": 51.318, + "step": 65670 + }, + { + "epoch": 0.26535551093460247, + "grad_norm": 267.1536560058594, + "learning_rate": 5.3357670799911805e-06, + "loss": 37.1753, + "step": 65680 + }, + { + "epoch": 0.2653959121999701, + "grad_norm": 775.5031127929688, + "learning_rate": 5.334359883255591e-06, + "loss": 41.825, + "step": 65690 + }, + { + "epoch": 0.26543631346533775, + "grad_norm": 552.0604858398438, + "learning_rate": 5.33295265991652e-06, + "loss": 40.3956, + "step": 65700 + }, + { + "epoch": 0.2654767147307054, + "grad_norm": 814.4728393554688, + "learning_rate": 5.331545410085933e-06, + "loss": 32.473, + "step": 65710 + }, + { + "epoch": 0.26551711599607297, + "grad_norm": 706.5919799804688, + "learning_rate": 5.330138133875799e-06, + "loss": 42.804, + "step": 65720 + }, + { + "epoch": 0.2655575172614406, + "grad_norm": 1015.4716186523438, + "learning_rate": 5.328730831398089e-06, + "loss": 61.0056, + "step": 65730 + }, + { + "epoch": 0.26559791852680825, + "grad_norm": 525.5064697265625, + "learning_rate": 5.3273235027647764e-06, + "loss": 37.2049, + "step": 65740 + }, + { + "epoch": 0.2656383197921759, + "grad_norm": 556.1926879882812, + "learning_rate": 5.3259161480878354e-06, + "loss": 51.9938, + "step": 65750 + }, + { + "epoch": 0.26567872105754353, + "grad_norm": 1342.614990234375, + "learning_rate": 5.324508767479239e-06, + "loss": 34.7253, + "step": 65760 + }, + { + "epoch": 0.26571912232291117, + "grad_norm": 541.5657958984375, + "learning_rate": 5.323101361050972e-06, + "loss": 47.4547, + "step": 65770 + }, + { + "epoch": 0.2657595235882788, + "grad_norm": 827.6627197265625, + "learning_rate": 5.321693928915012e-06, + "loss": 72.2926, + "step": 65780 + }, + { + "epoch": 0.2657999248536464, + "grad_norm": 1144.3697509765625, + "learning_rate": 5.320286471183343e-06, + "loss": 42.7462, + "step": 65790 + }, + { + "epoch": 0.26584032611901404, + "grad_norm": 361.05267333984375, + "learning_rate": 5.3188789879679496e-06, + "loss": 56.6374, + "step": 65800 + }, + { + "epoch": 0.2658807273843817, + "grad_norm": 448.1735534667969, + "learning_rate": 5.317471479380816e-06, + "loss": 55.7517, + "step": 65810 + }, + { + "epoch": 0.2659211286497493, + "grad_norm": 377.99957275390625, + "learning_rate": 5.3160639455339355e-06, + "loss": 48.2162, + "step": 65820 + }, + { + "epoch": 0.26596152991511696, + "grad_norm": 236.86538696289062, + "learning_rate": 5.314656386539298e-06, + "loss": 38.0658, + "step": 65830 + }, + { + "epoch": 0.2660019311804846, + "grad_norm": 929.2343139648438, + "learning_rate": 5.313248802508896e-06, + "loss": 41.3343, + "step": 65840 + }, + { + "epoch": 0.2660423324458522, + "grad_norm": 566.799072265625, + "learning_rate": 5.311841193554723e-06, + "loss": 36.5541, + "step": 65850 + }, + { + "epoch": 0.2660827337112198, + "grad_norm": 549.5016479492188, + "learning_rate": 5.310433559788778e-06, + "loss": 44.7664, + "step": 65860 + }, + { + "epoch": 0.26612313497658746, + "grad_norm": 476.6116638183594, + "learning_rate": 5.309025901323059e-06, + "loss": 32.1816, + "step": 65870 + }, + { + "epoch": 0.2661635362419551, + "grad_norm": 607.8992919921875, + "learning_rate": 5.307618218269569e-06, + "loss": 43.7821, + "step": 65880 + }, + { + "epoch": 0.26620393750732274, + "grad_norm": 1422.3590087890625, + "learning_rate": 5.306210510740307e-06, + "loss": 44.0493, + "step": 65890 + }, + { + "epoch": 0.2662443387726904, + "grad_norm": 651.806396484375, + "learning_rate": 5.304802778847281e-06, + "loss": 46.3512, + "step": 65900 + }, + { + "epoch": 0.26628474003805797, + "grad_norm": 414.97845458984375, + "learning_rate": 5.303395022702495e-06, + "loss": 59.5663, + "step": 65910 + }, + { + "epoch": 0.2663251413034256, + "grad_norm": 1217.07958984375, + "learning_rate": 5.301987242417963e-06, + "loss": 45.3674, + "step": 65920 + }, + { + "epoch": 0.26636554256879325, + "grad_norm": 584.6605224609375, + "learning_rate": 5.300579438105689e-06, + "loss": 47.7693, + "step": 65930 + }, + { + "epoch": 0.2664059438341609, + "grad_norm": 273.66412353515625, + "learning_rate": 5.29917160987769e-06, + "loss": 48.6378, + "step": 65940 + }, + { + "epoch": 0.2664463450995285, + "grad_norm": 497.5111999511719, + "learning_rate": 5.297763757845979e-06, + "loss": 30.083, + "step": 65950 + }, + { + "epoch": 0.26648674636489617, + "grad_norm": 574.8904418945312, + "learning_rate": 5.296355882122572e-06, + "loss": 36.8044, + "step": 65960 + }, + { + "epoch": 0.2665271476302638, + "grad_norm": 411.0905456542969, + "learning_rate": 5.294947982819488e-06, + "loss": 48.731, + "step": 65970 + }, + { + "epoch": 0.2665675488956314, + "grad_norm": 511.0521545410156, + "learning_rate": 5.293540060048746e-06, + "loss": 41.3318, + "step": 65980 + }, + { + "epoch": 0.26660795016099903, + "grad_norm": 740.4434204101562, + "learning_rate": 5.292132113922369e-06, + "loss": 47.0271, + "step": 65990 + }, + { + "epoch": 0.26664835142636667, + "grad_norm": 319.0716247558594, + "learning_rate": 5.290724144552379e-06, + "loss": 30.8917, + "step": 66000 + }, + { + "epoch": 0.2666887526917343, + "grad_norm": 979.0654907226562, + "learning_rate": 5.2893161520508055e-06, + "loss": 44.61, + "step": 66010 + }, + { + "epoch": 0.26672915395710195, + "grad_norm": 736.4896240234375, + "learning_rate": 5.287908136529671e-06, + "loss": 61.8157, + "step": 66020 + }, + { + "epoch": 0.2667695552224696, + "grad_norm": 523.2166137695312, + "learning_rate": 5.28650009810101e-06, + "loss": 49.0017, + "step": 66030 + }, + { + "epoch": 0.2668099564878372, + "grad_norm": 593.52685546875, + "learning_rate": 5.28509203687685e-06, + "loss": 42.2598, + "step": 66040 + }, + { + "epoch": 0.2668503577532048, + "grad_norm": 732.2353515625, + "learning_rate": 5.283683952969224e-06, + "loss": 37.1897, + "step": 66050 + }, + { + "epoch": 0.26689075901857245, + "grad_norm": 608.0689086914062, + "learning_rate": 5.282275846490169e-06, + "loss": 42.6663, + "step": 66060 + }, + { + "epoch": 0.2669311602839401, + "grad_norm": 514.1216430664062, + "learning_rate": 5.280867717551719e-06, + "loss": 41.3538, + "step": 66070 + }, + { + "epoch": 0.26697156154930773, + "grad_norm": 489.35546875, + "learning_rate": 5.279459566265915e-06, + "loss": 42.4444, + "step": 66080 + }, + { + "epoch": 0.2670119628146754, + "grad_norm": 850.6774291992188, + "learning_rate": 5.278051392744796e-06, + "loss": 40.5687, + "step": 66090 + }, + { + "epoch": 0.267052364080043, + "grad_norm": 546.3876953125, + "learning_rate": 5.2766431971004025e-06, + "loss": 43.6863, + "step": 66100 + }, + { + "epoch": 0.2670927653454106, + "grad_norm": 303.21832275390625, + "learning_rate": 5.275234979444781e-06, + "loss": 58.7996, + "step": 66110 + }, + { + "epoch": 0.26713316661077824, + "grad_norm": 436.77923583984375, + "learning_rate": 5.273826739889975e-06, + "loss": 37.3343, + "step": 66120 + }, + { + "epoch": 0.2671735678761459, + "grad_norm": 225.6940460205078, + "learning_rate": 5.272418478548031e-06, + "loss": 51.1029, + "step": 66130 + }, + { + "epoch": 0.2672139691415135, + "grad_norm": 1763.029296875, + "learning_rate": 5.271010195530999e-06, + "loss": 47.1112, + "step": 66140 + }, + { + "epoch": 0.26725437040688116, + "grad_norm": 422.47314453125, + "learning_rate": 5.26960189095093e-06, + "loss": 31.8788, + "step": 66150 + }, + { + "epoch": 0.2672947716722488, + "grad_norm": 424.02606201171875, + "learning_rate": 5.268193564919876e-06, + "loss": 37.1531, + "step": 66160 + }, + { + "epoch": 0.2673351729376164, + "grad_norm": 672.9403686523438, + "learning_rate": 5.26678521754989e-06, + "loss": 34.8339, + "step": 66170 + }, + { + "epoch": 0.267375574202984, + "grad_norm": 486.558349609375, + "learning_rate": 5.265376848953031e-06, + "loss": 35.5, + "step": 66180 + }, + { + "epoch": 0.26741597546835166, + "grad_norm": 749.306640625, + "learning_rate": 5.263968459241351e-06, + "loss": 52.8159, + "step": 66190 + }, + { + "epoch": 0.2674563767337193, + "grad_norm": 346.21722412109375, + "learning_rate": 5.262560048526913e-06, + "loss": 40.2258, + "step": 66200 + }, + { + "epoch": 0.26749677799908694, + "grad_norm": 826.6575317382812, + "learning_rate": 5.261151616921778e-06, + "loss": 36.5579, + "step": 66210 + }, + { + "epoch": 0.2675371792644546, + "grad_norm": 593.7576293945312, + "learning_rate": 5.259743164538008e-06, + "loss": 55.5614, + "step": 66220 + }, + { + "epoch": 0.26757758052982217, + "grad_norm": 535.8445434570312, + "learning_rate": 5.2583346914876655e-06, + "loss": 45.7979, + "step": 66230 + }, + { + "epoch": 0.2676179817951898, + "grad_norm": 521.997314453125, + "learning_rate": 5.2569261978828155e-06, + "loss": 48.649, + "step": 66240 + }, + { + "epoch": 0.26765838306055745, + "grad_norm": 961.8884887695312, + "learning_rate": 5.255517683835528e-06, + "loss": 52.0874, + "step": 66250 + }, + { + "epoch": 0.2676987843259251, + "grad_norm": 573.283935546875, + "learning_rate": 5.254109149457873e-06, + "loss": 40.6045, + "step": 66260 + }, + { + "epoch": 0.2677391855912927, + "grad_norm": 510.8491516113281, + "learning_rate": 5.252700594861918e-06, + "loss": 48.0254, + "step": 66270 + }, + { + "epoch": 0.26777958685666037, + "grad_norm": 4054.195556640625, + "learning_rate": 5.251292020159736e-06, + "loss": 66.1508, + "step": 66280 + }, + { + "epoch": 0.267819988122028, + "grad_norm": 983.9483642578125, + "learning_rate": 5.2498834254634005e-06, + "loss": 59.9476, + "step": 66290 + }, + { + "epoch": 0.2678603893873956, + "grad_norm": 531.3615112304688, + "learning_rate": 5.248474810884988e-06, + "loss": 62.1785, + "step": 66300 + }, + { + "epoch": 0.26790079065276323, + "grad_norm": 438.48516845703125, + "learning_rate": 5.247066176536577e-06, + "loss": 33.2004, + "step": 66310 + }, + { + "epoch": 0.26794119191813087, + "grad_norm": 0.0, + "learning_rate": 5.245657522530243e-06, + "loss": 44.228, + "step": 66320 + }, + { + "epoch": 0.2679815931834985, + "grad_norm": 299.0640563964844, + "learning_rate": 5.244248848978067e-06, + "loss": 40.0723, + "step": 66330 + }, + { + "epoch": 0.26802199444886615, + "grad_norm": 297.197509765625, + "learning_rate": 5.242840155992131e-06, + "loss": 53.5966, + "step": 66340 + }, + { + "epoch": 0.2680623957142338, + "grad_norm": 772.6343994140625, + "learning_rate": 5.24143144368452e-06, + "loss": 50.8811, + "step": 66350 + }, + { + "epoch": 0.2681027969796014, + "grad_norm": 720.2068481445312, + "learning_rate": 5.240022712167315e-06, + "loss": 69.1438, + "step": 66360 + }, + { + "epoch": 0.268143198244969, + "grad_norm": 718.2180786132812, + "learning_rate": 5.2386139615526046e-06, + "loss": 37.676, + "step": 66370 + }, + { + "epoch": 0.26818359951033666, + "grad_norm": 858.3838500976562, + "learning_rate": 5.237205191952477e-06, + "loss": 31.7104, + "step": 66380 + }, + { + "epoch": 0.2682240007757043, + "grad_norm": 808.7693481445312, + "learning_rate": 5.235796403479021e-06, + "loss": 43.2298, + "step": 66390 + }, + { + "epoch": 0.26826440204107194, + "grad_norm": 3157.16015625, + "learning_rate": 5.2343875962443255e-06, + "loss": 61.2995, + "step": 66400 + }, + { + "epoch": 0.2683048033064396, + "grad_norm": 784.2205810546875, + "learning_rate": 5.2329787703604875e-06, + "loss": 50.4512, + "step": 66410 + }, + { + "epoch": 0.2683452045718072, + "grad_norm": 592.7188720703125, + "learning_rate": 5.231569925939596e-06, + "loss": 37.0912, + "step": 66420 + }, + { + "epoch": 0.2683856058371748, + "grad_norm": 523.109619140625, + "learning_rate": 5.230161063093749e-06, + "loss": 52.2018, + "step": 66430 + }, + { + "epoch": 0.26842600710254244, + "grad_norm": 605.025146484375, + "learning_rate": 5.228752181935042e-06, + "loss": 52.8809, + "step": 66440 + }, + { + "epoch": 0.2684664083679101, + "grad_norm": 201.62841796875, + "learning_rate": 5.227343282575574e-06, + "loss": 39.6615, + "step": 66450 + }, + { + "epoch": 0.2685068096332777, + "grad_norm": 899.1422729492188, + "learning_rate": 5.225934365127445e-06, + "loss": 44.044, + "step": 66460 + }, + { + "epoch": 0.26854721089864536, + "grad_norm": 980.2147216796875, + "learning_rate": 5.224525429702755e-06, + "loss": 35.6335, + "step": 66470 + }, + { + "epoch": 0.268587612164013, + "grad_norm": 800.3404541015625, + "learning_rate": 5.223116476413606e-06, + "loss": 34.9568, + "step": 66480 + }, + { + "epoch": 0.2686280134293806, + "grad_norm": 373.1787109375, + "learning_rate": 5.221707505372105e-06, + "loss": 50.0092, + "step": 66490 + }, + { + "epoch": 0.2686684146947482, + "grad_norm": 548.1878051757812, + "learning_rate": 5.220298516690353e-06, + "loss": 51.9183, + "step": 66500 + }, + { + "epoch": 0.26870881596011587, + "grad_norm": 546.2302856445312, + "learning_rate": 5.21888951048046e-06, + "loss": 46.6924, + "step": 66510 + }, + { + "epoch": 0.2687492172254835, + "grad_norm": 404.8308410644531, + "learning_rate": 5.217480486854534e-06, + "loss": 42.3545, + "step": 66520 + }, + { + "epoch": 0.26878961849085115, + "grad_norm": 787.6769409179688, + "learning_rate": 5.216071445924683e-06, + "loss": 50.6945, + "step": 66530 + }, + { + "epoch": 0.2688300197562188, + "grad_norm": 637.3775634765625, + "learning_rate": 5.214662387803019e-06, + "loss": 34.9455, + "step": 66540 + }, + { + "epoch": 0.26887042102158637, + "grad_norm": 840.2587280273438, + "learning_rate": 5.213253312601654e-06, + "loss": 48.897, + "step": 66550 + }, + { + "epoch": 0.268910822286954, + "grad_norm": 668.3641967773438, + "learning_rate": 5.211844220432702e-06, + "loss": 60.593, + "step": 66560 + }, + { + "epoch": 0.26895122355232165, + "grad_norm": 520.17626953125, + "learning_rate": 5.210435111408276e-06, + "loss": 47.4215, + "step": 66570 + }, + { + "epoch": 0.2689916248176893, + "grad_norm": 718.3225708007812, + "learning_rate": 5.209025985640496e-06, + "loss": 43.8603, + "step": 66580 + }, + { + "epoch": 0.26903202608305693, + "grad_norm": 857.1797485351562, + "learning_rate": 5.207616843241476e-06, + "loss": 59.8737, + "step": 66590 + }, + { + "epoch": 0.26907242734842457, + "grad_norm": 0.0, + "learning_rate": 5.206207684323337e-06, + "loss": 47.3329, + "step": 66600 + }, + { + "epoch": 0.2691128286137922, + "grad_norm": 804.732666015625, + "learning_rate": 5.2047985089982e-06, + "loss": 50.3234, + "step": 66610 + }, + { + "epoch": 0.2691532298791598, + "grad_norm": 600.4869995117188, + "learning_rate": 5.203389317378183e-06, + "loss": 35.6028, + "step": 66620 + }, + { + "epoch": 0.26919363114452743, + "grad_norm": 507.7207336425781, + "learning_rate": 5.201980109575414e-06, + "loss": 37.8279, + "step": 66630 + }, + { + "epoch": 0.2692340324098951, + "grad_norm": 1408.29248046875, + "learning_rate": 5.200570885702013e-06, + "loss": 49.3994, + "step": 66640 + }, + { + "epoch": 0.2692744336752627, + "grad_norm": 756.8723754882812, + "learning_rate": 5.19916164587011e-06, + "loss": 50.934, + "step": 66650 + }, + { + "epoch": 0.26931483494063035, + "grad_norm": 715.3894653320312, + "learning_rate": 5.197752390191827e-06, + "loss": 53.1523, + "step": 66660 + }, + { + "epoch": 0.269355236205998, + "grad_norm": 523.8912353515625, + "learning_rate": 5.196343118779292e-06, + "loss": 35.7084, + "step": 66670 + }, + { + "epoch": 0.2693956374713656, + "grad_norm": 721.2276000976562, + "learning_rate": 5.194933831744637e-06, + "loss": 47.4896, + "step": 66680 + }, + { + "epoch": 0.2694360387367332, + "grad_norm": 588.867919921875, + "learning_rate": 5.1935245291999945e-06, + "loss": 40.2639, + "step": 66690 + }, + { + "epoch": 0.26947644000210086, + "grad_norm": 624.7872924804688, + "learning_rate": 5.192115211257491e-06, + "loss": 44.807, + "step": 66700 + }, + { + "epoch": 0.2695168412674685, + "grad_norm": 458.06085205078125, + "learning_rate": 5.19070587802926e-06, + "loss": 36.0, + "step": 66710 + }, + { + "epoch": 0.26955724253283614, + "grad_norm": 344.6346435546875, + "learning_rate": 5.189296529627441e-06, + "loss": 40.4112, + "step": 66720 + }, + { + "epoch": 0.2695976437982038, + "grad_norm": 421.5151062011719, + "learning_rate": 5.187887166164165e-06, + "loss": 45.2732, + "step": 66730 + }, + { + "epoch": 0.2696380450635714, + "grad_norm": 1014.9851684570312, + "learning_rate": 5.186477787751569e-06, + "loss": 68.0941, + "step": 66740 + }, + { + "epoch": 0.269678446328939, + "grad_norm": 277.7436828613281, + "learning_rate": 5.185068394501791e-06, + "loss": 33.1461, + "step": 66750 + }, + { + "epoch": 0.26971884759430664, + "grad_norm": 788.6626586914062, + "learning_rate": 5.183658986526969e-06, + "loss": 59.109, + "step": 66760 + }, + { + "epoch": 0.2697592488596743, + "grad_norm": 823.033935546875, + "learning_rate": 5.1822495639392465e-06, + "loss": 43.8207, + "step": 66770 + }, + { + "epoch": 0.2697996501250419, + "grad_norm": 388.10076904296875, + "learning_rate": 5.180840126850764e-06, + "loss": 26.7396, + "step": 66780 + }, + { + "epoch": 0.26984005139040956, + "grad_norm": 616.7316284179688, + "learning_rate": 5.179430675373659e-06, + "loss": 41.6738, + "step": 66790 + }, + { + "epoch": 0.2698804526557772, + "grad_norm": 395.72637939453125, + "learning_rate": 5.17802120962008e-06, + "loss": 36.2188, + "step": 66800 + }, + { + "epoch": 0.2699208539211448, + "grad_norm": 534.4721069335938, + "learning_rate": 5.17661172970217e-06, + "loss": 55.1734, + "step": 66810 + }, + { + "epoch": 0.2699612551865124, + "grad_norm": 718.8760986328125, + "learning_rate": 5.175202235732077e-06, + "loss": 47.2039, + "step": 66820 + }, + { + "epoch": 0.27000165645188007, + "grad_norm": 181.80419921875, + "learning_rate": 5.1737927278219446e-06, + "loss": 54.5886, + "step": 66830 + }, + { + "epoch": 0.2700420577172477, + "grad_norm": 657.7708740234375, + "learning_rate": 5.1723832060839216e-06, + "loss": 43.6877, + "step": 66840 + }, + { + "epoch": 0.27008245898261535, + "grad_norm": 582.9547119140625, + "learning_rate": 5.170973670630159e-06, + "loss": 32.2856, + "step": 66850 + }, + { + "epoch": 0.270122860247983, + "grad_norm": 908.3861083984375, + "learning_rate": 5.169564121572806e-06, + "loss": 51.184, + "step": 66860 + }, + { + "epoch": 0.27016326151335057, + "grad_norm": 933.0931396484375, + "learning_rate": 5.168154559024014e-06, + "loss": 37.9435, + "step": 66870 + }, + { + "epoch": 0.2702036627787182, + "grad_norm": 288.22430419921875, + "learning_rate": 5.166744983095937e-06, + "loss": 56.0103, + "step": 66880 + }, + { + "epoch": 0.27024406404408585, + "grad_norm": 686.4309692382812, + "learning_rate": 5.165335393900726e-06, + "loss": 34.439, + "step": 66890 + }, + { + "epoch": 0.2702844653094535, + "grad_norm": 558.5487060546875, + "learning_rate": 5.163925791550536e-06, + "loss": 33.7994, + "step": 66900 + }, + { + "epoch": 0.27032486657482113, + "grad_norm": 720.1372680664062, + "learning_rate": 5.162516176157523e-06, + "loss": 53.6696, + "step": 66910 + }, + { + "epoch": 0.27036526784018877, + "grad_norm": 512.8276977539062, + "learning_rate": 5.161106547833843e-06, + "loss": 38.8246, + "step": 66920 + }, + { + "epoch": 0.2704056691055564, + "grad_norm": 397.11785888671875, + "learning_rate": 5.159696906691656e-06, + "loss": 35.0052, + "step": 66930 + }, + { + "epoch": 0.270446070370924, + "grad_norm": 753.915771484375, + "learning_rate": 5.158287252843118e-06, + "loss": 48.151, + "step": 66940 + }, + { + "epoch": 0.27048647163629164, + "grad_norm": 870.271240234375, + "learning_rate": 5.1568775864003894e-06, + "loss": 64.0858, + "step": 66950 + }, + { + "epoch": 0.2705268729016593, + "grad_norm": 715.2255859375, + "learning_rate": 5.155467907475632e-06, + "loss": 42.3206, + "step": 66960 + }, + { + "epoch": 0.2705672741670269, + "grad_norm": 336.386962890625, + "learning_rate": 5.154058216181007e-06, + "loss": 32.85, + "step": 66970 + }, + { + "epoch": 0.27060767543239456, + "grad_norm": 1711.7962646484375, + "learning_rate": 5.1526485126286766e-06, + "loss": 45.585, + "step": 66980 + }, + { + "epoch": 0.2706480766977622, + "grad_norm": 689.0267944335938, + "learning_rate": 5.151238796930804e-06, + "loss": 42.3475, + "step": 66990 + }, + { + "epoch": 0.2706884779631298, + "grad_norm": 0.0, + "learning_rate": 5.149829069199555e-06, + "loss": 36.7057, + "step": 67000 + }, + { + "epoch": 0.2707288792284974, + "grad_norm": 492.4189758300781, + "learning_rate": 5.148419329547094e-06, + "loss": 41.9212, + "step": 67010 + }, + { + "epoch": 0.27076928049386506, + "grad_norm": 746.7906494140625, + "learning_rate": 5.147009578085589e-06, + "loss": 37.3403, + "step": 67020 + }, + { + "epoch": 0.2708096817592327, + "grad_norm": 696.8953247070312, + "learning_rate": 5.145599814927205e-06, + "loss": 48.6399, + "step": 67030 + }, + { + "epoch": 0.27085008302460034, + "grad_norm": 1013.6634521484375, + "learning_rate": 5.144190040184114e-06, + "loss": 36.1067, + "step": 67040 + }, + { + "epoch": 0.270890484289968, + "grad_norm": 962.2171630859375, + "learning_rate": 5.142780253968481e-06, + "loss": 29.0633, + "step": 67050 + }, + { + "epoch": 0.27093088555533557, + "grad_norm": 683.1814575195312, + "learning_rate": 5.14137045639248e-06, + "loss": 36.5833, + "step": 67060 + }, + { + "epoch": 0.2709712868207032, + "grad_norm": 419.5286560058594, + "learning_rate": 5.13996064756828e-06, + "loss": 36.7421, + "step": 67070 + }, + { + "epoch": 0.27101168808607085, + "grad_norm": 428.304931640625, + "learning_rate": 5.138550827608055e-06, + "loss": 28.9419, + "step": 67080 + }, + { + "epoch": 0.2710520893514385, + "grad_norm": 347.9562072753906, + "learning_rate": 5.137140996623975e-06, + "loss": 49.9478, + "step": 67090 + }, + { + "epoch": 0.2710924906168061, + "grad_norm": 354.6157531738281, + "learning_rate": 5.135731154728215e-06, + "loss": 46.6038, + "step": 67100 + }, + { + "epoch": 0.27113289188217377, + "grad_norm": 589.3129272460938, + "learning_rate": 5.134321302032951e-06, + "loss": 42.932, + "step": 67110 + }, + { + "epoch": 0.2711732931475414, + "grad_norm": 361.30029296875, + "learning_rate": 5.1329114386503585e-06, + "loss": 42.2918, + "step": 67120 + }, + { + "epoch": 0.271213694412909, + "grad_norm": 420.7300720214844, + "learning_rate": 5.131501564692611e-06, + "loss": 51.0995, + "step": 67130 + }, + { + "epoch": 0.27125409567827663, + "grad_norm": 478.6790466308594, + "learning_rate": 5.130091680271887e-06, + "loss": 64.9722, + "step": 67140 + }, + { + "epoch": 0.27129449694364427, + "grad_norm": 850.8605346679688, + "learning_rate": 5.128681785500365e-06, + "loss": 60.1893, + "step": 67150 + }, + { + "epoch": 0.2713348982090119, + "grad_norm": 451.3896484375, + "learning_rate": 5.127271880490227e-06, + "loss": 31.8813, + "step": 67160 + }, + { + "epoch": 0.27137529947437955, + "grad_norm": 980.57177734375, + "learning_rate": 5.125861965353647e-06, + "loss": 42.3975, + "step": 67170 + }, + { + "epoch": 0.2714157007397472, + "grad_norm": 517.5213623046875, + "learning_rate": 5.124452040202809e-06, + "loss": 52.2581, + "step": 67180 + }, + { + "epoch": 0.2714561020051148, + "grad_norm": 550.5774536132812, + "learning_rate": 5.1230421051498914e-06, + "loss": 36.1163, + "step": 67190 + }, + { + "epoch": 0.2714965032704824, + "grad_norm": 427.63421630859375, + "learning_rate": 5.121632160307078e-06, + "loss": 48.8638, + "step": 67200 + }, + { + "epoch": 0.27153690453585005, + "grad_norm": 471.3889465332031, + "learning_rate": 5.120222205786556e-06, + "loss": 44.1868, + "step": 67210 + }, + { + "epoch": 0.2715773058012177, + "grad_norm": 610.775390625, + "learning_rate": 5.118812241700501e-06, + "loss": 45.4912, + "step": 67220 + }, + { + "epoch": 0.27161770706658533, + "grad_norm": 654.1630859375, + "learning_rate": 5.117402268161103e-06, + "loss": 39.7674, + "step": 67230 + }, + { + "epoch": 0.271658108331953, + "grad_norm": 311.9112243652344, + "learning_rate": 5.115992285280543e-06, + "loss": 46.7622, + "step": 67240 + }, + { + "epoch": 0.2716985095973206, + "grad_norm": 916.1666870117188, + "learning_rate": 5.114582293171012e-06, + "loss": 45.3463, + "step": 67250 + }, + { + "epoch": 0.2717389108626882, + "grad_norm": 538.2660522460938, + "learning_rate": 5.113172291944693e-06, + "loss": 37.8112, + "step": 67260 + }, + { + "epoch": 0.27177931212805584, + "grad_norm": 652.853515625, + "learning_rate": 5.111762281713773e-06, + "loss": 50.5335, + "step": 67270 + }, + { + "epoch": 0.2718197133934235, + "grad_norm": 459.0245056152344, + "learning_rate": 5.110352262590442e-06, + "loss": 53.5647, + "step": 67280 + }, + { + "epoch": 0.2718601146587911, + "grad_norm": 322.7192687988281, + "learning_rate": 5.108942234686889e-06, + "loss": 28.7373, + "step": 67290 + }, + { + "epoch": 0.27190051592415876, + "grad_norm": 364.63555908203125, + "learning_rate": 5.1075321981153014e-06, + "loss": 39.3614, + "step": 67300 + }, + { + "epoch": 0.2719409171895264, + "grad_norm": 842.6339111328125, + "learning_rate": 5.106122152987869e-06, + "loss": 45.2849, + "step": 67310 + }, + { + "epoch": 0.271981318454894, + "grad_norm": 477.67950439453125, + "learning_rate": 5.1047120994167855e-06, + "loss": 51.1875, + "step": 67320 + }, + { + "epoch": 0.2720217197202616, + "grad_norm": 544.8759155273438, + "learning_rate": 5.103302037514241e-06, + "loss": 44.9741, + "step": 67330 + }, + { + "epoch": 0.27206212098562926, + "grad_norm": 393.1825256347656, + "learning_rate": 5.101891967392426e-06, + "loss": 41.374, + "step": 67340 + }, + { + "epoch": 0.2721025222509969, + "grad_norm": 307.2347106933594, + "learning_rate": 5.100481889163535e-06, + "loss": 43.7367, + "step": 67350 + }, + { + "epoch": 0.27214292351636454, + "grad_norm": 328.61029052734375, + "learning_rate": 5.099071802939763e-06, + "loss": 38.4899, + "step": 67360 + }, + { + "epoch": 0.2721833247817322, + "grad_norm": 253.48068237304688, + "learning_rate": 5.097661708833302e-06, + "loss": 42.7777, + "step": 67370 + }, + { + "epoch": 0.27222372604709977, + "grad_norm": 648.9194946289062, + "learning_rate": 5.096251606956345e-06, + "loss": 49.0186, + "step": 67380 + }, + { + "epoch": 0.2722641273124674, + "grad_norm": 743.0326538085938, + "learning_rate": 5.0948414974210906e-06, + "loss": 52.1385, + "step": 67390 + }, + { + "epoch": 0.27230452857783505, + "grad_norm": 369.1799621582031, + "learning_rate": 5.093431380339734e-06, + "loss": 47.9512, + "step": 67400 + }, + { + "epoch": 0.2723449298432027, + "grad_norm": 240.17050170898438, + "learning_rate": 5.092021255824471e-06, + "loss": 73.6545, + "step": 67410 + }, + { + "epoch": 0.2723853311085703, + "grad_norm": 577.8941040039062, + "learning_rate": 5.090611123987498e-06, + "loss": 46.5082, + "step": 67420 + }, + { + "epoch": 0.27242573237393797, + "grad_norm": 952.72216796875, + "learning_rate": 5.089200984941014e-06, + "loss": 37.1385, + "step": 67430 + }, + { + "epoch": 0.2724661336393056, + "grad_norm": 367.08306884765625, + "learning_rate": 5.087790838797217e-06, + "loss": 44.5161, + "step": 67440 + }, + { + "epoch": 0.2725065349046732, + "grad_norm": 593.40478515625, + "learning_rate": 5.0863806856683076e-06, + "loss": 40.6398, + "step": 67450 + }, + { + "epoch": 0.27254693617004083, + "grad_norm": 474.2576599121094, + "learning_rate": 5.084970525666481e-06, + "loss": 46.1414, + "step": 67460 + }, + { + "epoch": 0.27258733743540847, + "grad_norm": 354.111572265625, + "learning_rate": 5.083560358903942e-06, + "loss": 28.794, + "step": 67470 + }, + { + "epoch": 0.2726277387007761, + "grad_norm": 985.1436157226562, + "learning_rate": 5.082150185492887e-06, + "loss": 41.4091, + "step": 67480 + }, + { + "epoch": 0.27266813996614375, + "grad_norm": 744.9746704101562, + "learning_rate": 5.080740005545519e-06, + "loss": 46.0218, + "step": 67490 + }, + { + "epoch": 0.2727085412315114, + "grad_norm": 549.0433959960938, + "learning_rate": 5.07932981917404e-06, + "loss": 41.1397, + "step": 67500 + }, + { + "epoch": 0.272748942496879, + "grad_norm": 403.9040222167969, + "learning_rate": 5.077919626490651e-06, + "loss": 38.3747, + "step": 67510 + }, + { + "epoch": 0.2727893437622466, + "grad_norm": 690.0769653320312, + "learning_rate": 5.076509427607555e-06, + "loss": 33.2111, + "step": 67520 + }, + { + "epoch": 0.27282974502761426, + "grad_norm": 209.73768615722656, + "learning_rate": 5.075099222636954e-06, + "loss": 41.2934, + "step": 67530 + }, + { + "epoch": 0.2728701462929819, + "grad_norm": 397.9496154785156, + "learning_rate": 5.073689011691054e-06, + "loss": 35.0581, + "step": 67540 + }, + { + "epoch": 0.27291054755834954, + "grad_norm": 731.2933959960938, + "learning_rate": 5.072278794882058e-06, + "loss": 47.9663, + "step": 67550 + }, + { + "epoch": 0.2729509488237172, + "grad_norm": 845.2896118164062, + "learning_rate": 5.07086857232217e-06, + "loss": 33.0361, + "step": 67560 + }, + { + "epoch": 0.2729913500890848, + "grad_norm": 670.6997680664062, + "learning_rate": 5.069458344123592e-06, + "loss": 43.4707, + "step": 67570 + }, + { + "epoch": 0.2730317513544524, + "grad_norm": 782.651123046875, + "learning_rate": 5.068048110398535e-06, + "loss": 48.6419, + "step": 67580 + }, + { + "epoch": 0.27307215261982004, + "grad_norm": 327.63372802734375, + "learning_rate": 5.066637871259201e-06, + "loss": 37.3878, + "step": 67590 + }, + { + "epoch": 0.2731125538851877, + "grad_norm": 942.0525512695312, + "learning_rate": 5.065227626817798e-06, + "loss": 49.2512, + "step": 67600 + }, + { + "epoch": 0.2731529551505553, + "grad_norm": 563.7671508789062, + "learning_rate": 5.063817377186531e-06, + "loss": 39.5605, + "step": 67610 + }, + { + "epoch": 0.27319335641592296, + "grad_norm": 550.6803588867188, + "learning_rate": 5.062407122477609e-06, + "loss": 43.2036, + "step": 67620 + }, + { + "epoch": 0.2732337576812906, + "grad_norm": 556.4356689453125, + "learning_rate": 5.060996862803239e-06, + "loss": 36.3034, + "step": 67630 + }, + { + "epoch": 0.2732741589466582, + "grad_norm": 498.069091796875, + "learning_rate": 5.0595865982756284e-06, + "loss": 44.1085, + "step": 67640 + }, + { + "epoch": 0.2733145602120258, + "grad_norm": 525.10693359375, + "learning_rate": 5.0581763290069865e-06, + "loss": 48.0721, + "step": 67650 + }, + { + "epoch": 0.27335496147739347, + "grad_norm": 1076.3779296875, + "learning_rate": 5.05676605510952e-06, + "loss": 55.7806, + "step": 67660 + }, + { + "epoch": 0.2733953627427611, + "grad_norm": 956.6840209960938, + "learning_rate": 5.055355776695437e-06, + "loss": 50.8037, + "step": 67670 + }, + { + "epoch": 0.27343576400812875, + "grad_norm": 934.687255859375, + "learning_rate": 5.0539454938769525e-06, + "loss": 53.6633, + "step": 67680 + }, + { + "epoch": 0.2734761652734964, + "grad_norm": 789.1433715820312, + "learning_rate": 5.052535206766271e-06, + "loss": 67.9196, + "step": 67690 + }, + { + "epoch": 0.27351656653886397, + "grad_norm": 509.7837219238281, + "learning_rate": 5.051124915475604e-06, + "loss": 47.9745, + "step": 67700 + }, + { + "epoch": 0.2735569678042316, + "grad_norm": 806.1221313476562, + "learning_rate": 5.049714620117162e-06, + "loss": 54.7842, + "step": 67710 + }, + { + "epoch": 0.27359736906959925, + "grad_norm": 789.8609008789062, + "learning_rate": 5.0483043208031575e-06, + "loss": 42.8648, + "step": 67720 + }, + { + "epoch": 0.2736377703349669, + "grad_norm": 378.3641662597656, + "learning_rate": 5.0468940176458e-06, + "loss": 25.4453, + "step": 67730 + }, + { + "epoch": 0.27367817160033453, + "grad_norm": 377.1431579589844, + "learning_rate": 5.045483710757298e-06, + "loss": 44.5361, + "step": 67740 + }, + { + "epoch": 0.27371857286570217, + "grad_norm": 717.8798217773438, + "learning_rate": 5.044073400249867e-06, + "loss": 60.3492, + "step": 67750 + }, + { + "epoch": 0.2737589741310698, + "grad_norm": 612.9976806640625, + "learning_rate": 5.0426630862357176e-06, + "loss": 34.6233, + "step": 67760 + }, + { + "epoch": 0.2737993753964374, + "grad_norm": 242.31356811523438, + "learning_rate": 5.041252768827064e-06, + "loss": 32.6884, + "step": 67770 + }, + { + "epoch": 0.27383977666180503, + "grad_norm": 875.4049072265625, + "learning_rate": 5.039842448136115e-06, + "loss": 58.8626, + "step": 67780 + }, + { + "epoch": 0.2738801779271727, + "grad_norm": 347.9784240722656, + "learning_rate": 5.038432124275087e-06, + "loss": 38.7157, + "step": 67790 + }, + { + "epoch": 0.2739205791925403, + "grad_norm": 528.5045166015625, + "learning_rate": 5.03702179735619e-06, + "loss": 62.6629, + "step": 67800 + }, + { + "epoch": 0.27396098045790795, + "grad_norm": 613.0690307617188, + "learning_rate": 5.035611467491638e-06, + "loss": 30.0438, + "step": 67810 + }, + { + "epoch": 0.2740013817232756, + "grad_norm": 510.3870544433594, + "learning_rate": 5.034201134793646e-06, + "loss": 35.476, + "step": 67820 + }, + { + "epoch": 0.2740417829886432, + "grad_norm": 491.538330078125, + "learning_rate": 5.032790799374426e-06, + "loss": 31.3243, + "step": 67830 + }, + { + "epoch": 0.2740821842540108, + "grad_norm": 360.6121520996094, + "learning_rate": 5.0313804613461925e-06, + "loss": 50.7477, + "step": 67840 + }, + { + "epoch": 0.27412258551937846, + "grad_norm": 502.55706787109375, + "learning_rate": 5.0299701208211605e-06, + "loss": 32.5469, + "step": 67850 + }, + { + "epoch": 0.2741629867847461, + "grad_norm": 524.4901733398438, + "learning_rate": 5.028559777911543e-06, + "loss": 37.7005, + "step": 67860 + }, + { + "epoch": 0.27420338805011374, + "grad_norm": 647.3909912109375, + "learning_rate": 5.027149432729555e-06, + "loss": 69.5209, + "step": 67870 + }, + { + "epoch": 0.2742437893154814, + "grad_norm": 1294.49365234375, + "learning_rate": 5.025739085387411e-06, + "loss": 41.7499, + "step": 67880 + }, + { + "epoch": 0.274284190580849, + "grad_norm": 801.7745361328125, + "learning_rate": 5.024328735997327e-06, + "loss": 59.8358, + "step": 67890 + }, + { + "epoch": 0.2743245918462166, + "grad_norm": 262.2741394042969, + "learning_rate": 5.0229183846715154e-06, + "loss": 26.463, + "step": 67900 + }, + { + "epoch": 0.27436499311158424, + "grad_norm": 978.8836059570312, + "learning_rate": 5.021508031522195e-06, + "loss": 66.1999, + "step": 67910 + }, + { + "epoch": 0.2744053943769519, + "grad_norm": 960.217529296875, + "learning_rate": 5.0200976766615785e-06, + "loss": 47.8755, + "step": 67920 + }, + { + "epoch": 0.2744457956423195, + "grad_norm": 899.3736572265625, + "learning_rate": 5.018687320201882e-06, + "loss": 57.0194, + "step": 67930 + }, + { + "epoch": 0.27448619690768716, + "grad_norm": 572.0177612304688, + "learning_rate": 5.017276962255323e-06, + "loss": 38.2684, + "step": 67940 + }, + { + "epoch": 0.2745265981730548, + "grad_norm": 650.45361328125, + "learning_rate": 5.015866602934112e-06, + "loss": 38.218, + "step": 67950 + }, + { + "epoch": 0.2745669994384224, + "grad_norm": 491.1336669921875, + "learning_rate": 5.01445624235047e-06, + "loss": 46.4445, + "step": 67960 + }, + { + "epoch": 0.27460740070379, + "grad_norm": 622.7539672851562, + "learning_rate": 5.013045880616612e-06, + "loss": 60.7711, + "step": 67970 + }, + { + "epoch": 0.27464780196915767, + "grad_norm": 467.92230224609375, + "learning_rate": 5.011635517844753e-06, + "loss": 49.4, + "step": 67980 + }, + { + "epoch": 0.2746882032345253, + "grad_norm": 635.2073364257812, + "learning_rate": 5.010225154147107e-06, + "loss": 59.7034, + "step": 67990 + }, + { + "epoch": 0.27472860449989295, + "grad_norm": 533.5791015625, + "learning_rate": 5.008814789635894e-06, + "loss": 39.1137, + "step": 68000 + }, + { + "epoch": 0.2747690057652606, + "grad_norm": 494.2832336425781, + "learning_rate": 5.007404424423329e-06, + "loss": 35.455, + "step": 68010 + }, + { + "epoch": 0.27480940703062817, + "grad_norm": 331.5168151855469, + "learning_rate": 5.0059940586216284e-06, + "loss": 33.3954, + "step": 68020 + }, + { + "epoch": 0.2748498082959958, + "grad_norm": 687.5106811523438, + "learning_rate": 5.004583692343007e-06, + "loss": 37.4024, + "step": 68030 + }, + { + "epoch": 0.27489020956136345, + "grad_norm": 685.0592651367188, + "learning_rate": 5.003173325699682e-06, + "loss": 50.4846, + "step": 68040 + }, + { + "epoch": 0.2749306108267311, + "grad_norm": 316.0040588378906, + "learning_rate": 5.00176295880387e-06, + "loss": 43.7119, + "step": 68050 + }, + { + "epoch": 0.27497101209209873, + "grad_norm": 368.1940612792969, + "learning_rate": 5.000352591767787e-06, + "loss": 40.2011, + "step": 68060 + }, + { + "epoch": 0.27501141335746637, + "grad_norm": 483.9440002441406, + "learning_rate": 4.998942224703651e-06, + "loss": 40.0359, + "step": 68070 + }, + { + "epoch": 0.275051814622834, + "grad_norm": 1012.7457275390625, + "learning_rate": 4.997531857723678e-06, + "loss": 39.9802, + "step": 68080 + }, + { + "epoch": 0.2750922158882016, + "grad_norm": 481.0828857421875, + "learning_rate": 4.996121490940084e-06, + "loss": 41.3946, + "step": 68090 + }, + { + "epoch": 0.27513261715356924, + "grad_norm": 619.2692260742188, + "learning_rate": 4.994711124465084e-06, + "loss": 44.6533, + "step": 68100 + }, + { + "epoch": 0.2751730184189369, + "grad_norm": 598.6572875976562, + "learning_rate": 4.993300758410895e-06, + "loss": 45.2937, + "step": 68110 + }, + { + "epoch": 0.2752134196843045, + "grad_norm": 475.28607177734375, + "learning_rate": 4.991890392889735e-06, + "loss": 26.7339, + "step": 68120 + }, + { + "epoch": 0.27525382094967216, + "grad_norm": 296.6506042480469, + "learning_rate": 4.990480028013818e-06, + "loss": 49.4723, + "step": 68130 + }, + { + "epoch": 0.2752942222150398, + "grad_norm": 319.9155578613281, + "learning_rate": 4.989069663895361e-06, + "loss": 45.966, + "step": 68140 + }, + { + "epoch": 0.2753346234804074, + "grad_norm": 629.8634033203125, + "learning_rate": 4.9876593006465825e-06, + "loss": 48.5483, + "step": 68150 + }, + { + "epoch": 0.275375024745775, + "grad_norm": 704.5947265625, + "learning_rate": 4.986248938379696e-06, + "loss": 46.9476, + "step": 68160 + }, + { + "epoch": 0.27541542601114266, + "grad_norm": 696.213134765625, + "learning_rate": 4.984838577206921e-06, + "loss": 58.3571, + "step": 68170 + }, + { + "epoch": 0.2754558272765103, + "grad_norm": 624.2259521484375, + "learning_rate": 4.9834282172404665e-06, + "loss": 49.6634, + "step": 68180 + }, + { + "epoch": 0.27549622854187794, + "grad_norm": 592.13623046875, + "learning_rate": 4.982017858592555e-06, + "loss": 35.5057, + "step": 68190 + }, + { + "epoch": 0.2755366298072456, + "grad_norm": 311.4937438964844, + "learning_rate": 4.980607501375399e-06, + "loss": 48.7469, + "step": 68200 + }, + { + "epoch": 0.2755770310726132, + "grad_norm": 721.6624755859375, + "learning_rate": 4.979197145701216e-06, + "loss": 47.0013, + "step": 68210 + }, + { + "epoch": 0.2756174323379808, + "grad_norm": 306.4320068359375, + "learning_rate": 4.977786791682221e-06, + "loss": 36.7569, + "step": 68220 + }, + { + "epoch": 0.27565783360334845, + "grad_norm": 378.13616943359375, + "learning_rate": 4.976376439430627e-06, + "loss": 50.454, + "step": 68230 + }, + { + "epoch": 0.2756982348687161, + "grad_norm": 651.3588256835938, + "learning_rate": 4.974966089058652e-06, + "loss": 36.0825, + "step": 68240 + }, + { + "epoch": 0.2757386361340837, + "grad_norm": 538.3560180664062, + "learning_rate": 4.973555740678512e-06, + "loss": 33.1485, + "step": 68250 + }, + { + "epoch": 0.27577903739945137, + "grad_norm": 349.70184326171875, + "learning_rate": 4.972145394402421e-06, + "loss": 73.0145, + "step": 68260 + }, + { + "epoch": 0.275819438664819, + "grad_norm": 448.4791259765625, + "learning_rate": 4.9707350503425905e-06, + "loss": 41.0152, + "step": 68270 + }, + { + "epoch": 0.2758598399301866, + "grad_norm": 353.9982604980469, + "learning_rate": 4.969324708611239e-06, + "loss": 40.9351, + "step": 68280 + }, + { + "epoch": 0.27590024119555423, + "grad_norm": 779.5863647460938, + "learning_rate": 4.9679143693205785e-06, + "loss": 52.2675, + "step": 68290 + }, + { + "epoch": 0.27594064246092187, + "grad_norm": 714.5828857421875, + "learning_rate": 4.966504032582826e-06, + "loss": 36.8268, + "step": 68300 + }, + { + "epoch": 0.2759810437262895, + "grad_norm": 930.7247314453125, + "learning_rate": 4.965093698510192e-06, + "loss": 62.9139, + "step": 68310 + }, + { + "epoch": 0.27602144499165715, + "grad_norm": 710.6253051757812, + "learning_rate": 4.963683367214895e-06, + "loss": 59.0211, + "step": 68320 + }, + { + "epoch": 0.2760618462570248, + "grad_norm": 716.7061157226562, + "learning_rate": 4.962273038809143e-06, + "loss": 75.9322, + "step": 68330 + }, + { + "epoch": 0.2761022475223924, + "grad_norm": 957.3465576171875, + "learning_rate": 4.960862713405153e-06, + "loss": 46.2151, + "step": 68340 + }, + { + "epoch": 0.27614264878776, + "grad_norm": 339.1300048828125, + "learning_rate": 4.95945239111514e-06, + "loss": 36.6535, + "step": 68350 + }, + { + "epoch": 0.27618305005312765, + "grad_norm": 740.7291259765625, + "learning_rate": 4.9580420720513115e-06, + "loss": 55.0259, + "step": 68360 + }, + { + "epoch": 0.2762234513184953, + "grad_norm": 521.9520263671875, + "learning_rate": 4.956631756325882e-06, + "loss": 39.7061, + "step": 68370 + }, + { + "epoch": 0.27626385258386293, + "grad_norm": 475.54473876953125, + "learning_rate": 4.955221444051066e-06, + "loss": 37.5069, + "step": 68380 + }, + { + "epoch": 0.2763042538492306, + "grad_norm": 665.6690063476562, + "learning_rate": 4.953811135339073e-06, + "loss": 51.0758, + "step": 68390 + }, + { + "epoch": 0.2763446551145982, + "grad_norm": 360.49273681640625, + "learning_rate": 4.952400830302117e-06, + "loss": 47.2758, + "step": 68400 + }, + { + "epoch": 0.2763850563799658, + "grad_norm": 306.6684265136719, + "learning_rate": 4.950990529052409e-06, + "loss": 38.9826, + "step": 68410 + }, + { + "epoch": 0.27642545764533344, + "grad_norm": 776.9755249023438, + "learning_rate": 4.949580231702158e-06, + "loss": 45.6411, + "step": 68420 + }, + { + "epoch": 0.2764658589107011, + "grad_norm": 275.6937561035156, + "learning_rate": 4.94816993836358e-06, + "loss": 25.9277, + "step": 68430 + }, + { + "epoch": 0.2765062601760687, + "grad_norm": 131.82835388183594, + "learning_rate": 4.946759649148879e-06, + "loss": 30.0371, + "step": 68440 + }, + { + "epoch": 0.27654666144143636, + "grad_norm": 733.06103515625, + "learning_rate": 4.945349364170269e-06, + "loss": 45.6553, + "step": 68450 + }, + { + "epoch": 0.276587062706804, + "grad_norm": 650.412353515625, + "learning_rate": 4.94393908353996e-06, + "loss": 37.3156, + "step": 68460 + }, + { + "epoch": 0.2766274639721716, + "grad_norm": 412.7141418457031, + "learning_rate": 4.942528807370158e-06, + "loss": 38.6044, + "step": 68470 + }, + { + "epoch": 0.2766678652375392, + "grad_norm": 331.10858154296875, + "learning_rate": 4.941118535773078e-06, + "loss": 39.9055, + "step": 68480 + }, + { + "epoch": 0.27670826650290686, + "grad_norm": 708.8507690429688, + "learning_rate": 4.9397082688609245e-06, + "loss": 52.6693, + "step": 68490 + }, + { + "epoch": 0.2767486677682745, + "grad_norm": 411.26239013671875, + "learning_rate": 4.938298006745909e-06, + "loss": 36.8732, + "step": 68500 + }, + { + "epoch": 0.27678906903364214, + "grad_norm": 518.560791015625, + "learning_rate": 4.936887749540236e-06, + "loss": 31.6104, + "step": 68510 + }, + { + "epoch": 0.2768294702990098, + "grad_norm": 1007.0764770507812, + "learning_rate": 4.935477497356118e-06, + "loss": 45.9277, + "step": 68520 + }, + { + "epoch": 0.2768698715643774, + "grad_norm": 5712.80810546875, + "learning_rate": 4.934067250305757e-06, + "loss": 59.7893, + "step": 68530 + }, + { + "epoch": 0.276910272829745, + "grad_norm": 718.469970703125, + "learning_rate": 4.932657008501362e-06, + "loss": 63.6629, + "step": 68540 + }, + { + "epoch": 0.27695067409511265, + "grad_norm": 361.53131103515625, + "learning_rate": 4.931246772055141e-06, + "loss": 47.2617, + "step": 68550 + }, + { + "epoch": 0.2769910753604803, + "grad_norm": 730.575439453125, + "learning_rate": 4.9298365410792985e-06, + "loss": 39.6719, + "step": 68560 + }, + { + "epoch": 0.27703147662584793, + "grad_norm": 630.3861694335938, + "learning_rate": 4.928426315686039e-06, + "loss": 52.2734, + "step": 68570 + }, + { + "epoch": 0.27707187789121557, + "grad_norm": 597.8245849609375, + "learning_rate": 4.92701609598757e-06, + "loss": 42.7517, + "step": 68580 + }, + { + "epoch": 0.2771122791565832, + "grad_norm": 628.2814331054688, + "learning_rate": 4.925605882096096e-06, + "loss": 40.1165, + "step": 68590 + }, + { + "epoch": 0.2771526804219508, + "grad_norm": 862.098876953125, + "learning_rate": 4.924195674123821e-06, + "loss": 45.4163, + "step": 68600 + }, + { + "epoch": 0.27719308168731843, + "grad_norm": 431.5798034667969, + "learning_rate": 4.922785472182948e-06, + "loss": 32.5109, + "step": 68610 + }, + { + "epoch": 0.27723348295268607, + "grad_norm": 568.9783935546875, + "learning_rate": 4.92137527638568e-06, + "loss": 38.1525, + "step": 68620 + }, + { + "epoch": 0.2772738842180537, + "grad_norm": 349.13421630859375, + "learning_rate": 4.919965086844221e-06, + "loss": 49.8859, + "step": 68630 + }, + { + "epoch": 0.27731428548342135, + "grad_norm": 524.2000122070312, + "learning_rate": 4.9185549036707715e-06, + "loss": 40.2317, + "step": 68640 + }, + { + "epoch": 0.277354686748789, + "grad_norm": 556.9549560546875, + "learning_rate": 4.917144726977535e-06, + "loss": 40.5639, + "step": 68650 + }, + { + "epoch": 0.2773950880141566, + "grad_norm": 464.0063781738281, + "learning_rate": 4.915734556876713e-06, + "loss": 41.1225, + "step": 68660 + }, + { + "epoch": 0.2774354892795242, + "grad_norm": 646.3726196289062, + "learning_rate": 4.914324393480504e-06, + "loss": 42.3612, + "step": 68670 + }, + { + "epoch": 0.27747589054489186, + "grad_norm": 475.73748779296875, + "learning_rate": 4.9129142369011105e-06, + "loss": 39.3261, + "step": 68680 + }, + { + "epoch": 0.2775162918102595, + "grad_norm": 429.1013488769531, + "learning_rate": 4.911504087250735e-06, + "loss": 44.6936, + "step": 68690 + }, + { + "epoch": 0.27755669307562714, + "grad_norm": 395.2467041015625, + "learning_rate": 4.910093944641569e-06, + "loss": 49.2282, + "step": 68700 + }, + { + "epoch": 0.2775970943409948, + "grad_norm": 1080.4151611328125, + "learning_rate": 4.9086838091858155e-06, + "loss": 50.2219, + "step": 68710 + }, + { + "epoch": 0.2776374956063624, + "grad_norm": 725.0263671875, + "learning_rate": 4.9072736809956735e-06, + "loss": 41.7178, + "step": 68720 + }, + { + "epoch": 0.27767789687173, + "grad_norm": 240.93812561035156, + "learning_rate": 4.9058635601833384e-06, + "loss": 34.3757, + "step": 68730 + }, + { + "epoch": 0.27771829813709764, + "grad_norm": 422.4692687988281, + "learning_rate": 4.904453446861008e-06, + "loss": 47.2706, + "step": 68740 + }, + { + "epoch": 0.2777586994024653, + "grad_norm": 992.8607177734375, + "learning_rate": 4.903043341140879e-06, + "loss": 40.3693, + "step": 68750 + }, + { + "epoch": 0.2777991006678329, + "grad_norm": 671.8703002929688, + "learning_rate": 4.901633243135144e-06, + "loss": 41.3787, + "step": 68760 + }, + { + "epoch": 0.27783950193320056, + "grad_norm": 748.8638916015625, + "learning_rate": 4.900223152956003e-06, + "loss": 47.3397, + "step": 68770 + }, + { + "epoch": 0.2778799031985682, + "grad_norm": 290.8716735839844, + "learning_rate": 4.898813070715649e-06, + "loss": 37.6664, + "step": 68780 + }, + { + "epoch": 0.2779203044639358, + "grad_norm": 565.4688720703125, + "learning_rate": 4.897402996526273e-06, + "loss": 51.1212, + "step": 68790 + }, + { + "epoch": 0.2779607057293034, + "grad_norm": 898.2797241210938, + "learning_rate": 4.895992930500068e-06, + "loss": 42.9865, + "step": 68800 + }, + { + "epoch": 0.27800110699467107, + "grad_norm": 316.641845703125, + "learning_rate": 4.894582872749229e-06, + "loss": 37.7981, + "step": 68810 + }, + { + "epoch": 0.2780415082600387, + "grad_norm": 701.3886108398438, + "learning_rate": 4.893172823385947e-06, + "loss": 55.1056, + "step": 68820 + }, + { + "epoch": 0.27808190952540635, + "grad_norm": 780.6195068359375, + "learning_rate": 4.8917627825224146e-06, + "loss": 45.9489, + "step": 68830 + }, + { + "epoch": 0.278122310790774, + "grad_norm": 767.5986328125, + "learning_rate": 4.89035275027082e-06, + "loss": 42.4477, + "step": 68840 + }, + { + "epoch": 0.2781627120561416, + "grad_norm": 665.1085205078125, + "learning_rate": 4.888942726743353e-06, + "loss": 42.3077, + "step": 68850 + }, + { + "epoch": 0.2782031133215092, + "grad_norm": 823.7915649414062, + "learning_rate": 4.887532712052206e-06, + "loss": 48.6418, + "step": 68860 + }, + { + "epoch": 0.27824351458687685, + "grad_norm": 601.9495239257812, + "learning_rate": 4.886122706309563e-06, + "loss": 36.7191, + "step": 68870 + }, + { + "epoch": 0.2782839158522445, + "grad_norm": 538.6893310546875, + "learning_rate": 4.884712709627614e-06, + "loss": 42.2536, + "step": 68880 + }, + { + "epoch": 0.27832431711761213, + "grad_norm": 619.4857177734375, + "learning_rate": 4.8833027221185455e-06, + "loss": 37.4783, + "step": 68890 + }, + { + "epoch": 0.27836471838297977, + "grad_norm": 803.7899169921875, + "learning_rate": 4.881892743894543e-06, + "loss": 39.6659, + "step": 68900 + }, + { + "epoch": 0.2784051196483474, + "grad_norm": 377.7758483886719, + "learning_rate": 4.880482775067794e-06, + "loss": 35.0229, + "step": 68910 + }, + { + "epoch": 0.278445520913715, + "grad_norm": 546.9606323242188, + "learning_rate": 4.879072815750481e-06, + "loss": 38.2003, + "step": 68920 + }, + { + "epoch": 0.27848592217908263, + "grad_norm": 751.6867065429688, + "learning_rate": 4.87766286605479e-06, + "loss": 45.7661, + "step": 68930 + }, + { + "epoch": 0.2785263234444503, + "grad_norm": 623.1182250976562, + "learning_rate": 4.876252926092903e-06, + "loss": 45.6871, + "step": 68940 + }, + { + "epoch": 0.2785667247098179, + "grad_norm": 698.1951904296875, + "learning_rate": 4.874842995977004e-06, + "loss": 38.3063, + "step": 68950 + }, + { + "epoch": 0.27860712597518555, + "grad_norm": 1337.2913818359375, + "learning_rate": 4.873433075819272e-06, + "loss": 44.2702, + "step": 68960 + }, + { + "epoch": 0.2786475272405532, + "grad_norm": 386.43536376953125, + "learning_rate": 4.87202316573189e-06, + "loss": 31.441, + "step": 68970 + }, + { + "epoch": 0.2786879285059208, + "grad_norm": 242.0117645263672, + "learning_rate": 4.870613265827037e-06, + "loss": 38.5598, + "step": 68980 + }, + { + "epoch": 0.2787283297712884, + "grad_norm": 598.38818359375, + "learning_rate": 4.869203376216891e-06, + "loss": 33.6361, + "step": 68990 + }, + { + "epoch": 0.27876873103665606, + "grad_norm": 527.7412109375, + "learning_rate": 4.867793497013634e-06, + "loss": 46.4232, + "step": 69000 + }, + { + "epoch": 0.2788091323020237, + "grad_norm": 782.029541015625, + "learning_rate": 4.866383628329442e-06, + "loss": 49.8267, + "step": 69010 + }, + { + "epoch": 0.27884953356739134, + "grad_norm": 409.10711669921875, + "learning_rate": 4.86497377027649e-06, + "loss": 43.6214, + "step": 69020 + }, + { + "epoch": 0.278889934832759, + "grad_norm": 471.22906494140625, + "learning_rate": 4.863563922966957e-06, + "loss": 67.8977, + "step": 69030 + }, + { + "epoch": 0.2789303360981266, + "grad_norm": 400.7550354003906, + "learning_rate": 4.862154086513016e-06, + "loss": 59.4513, + "step": 69040 + }, + { + "epoch": 0.2789707373634942, + "grad_norm": 381.7715148925781, + "learning_rate": 4.860744261026841e-06, + "loss": 38.826, + "step": 69050 + }, + { + "epoch": 0.27901113862886184, + "grad_norm": 245.989501953125, + "learning_rate": 4.8593344466206075e-06, + "loss": 38.2717, + "step": 69060 + }, + { + "epoch": 0.2790515398942295, + "grad_norm": 419.75469970703125, + "learning_rate": 4.857924643406485e-06, + "loss": 27.1463, + "step": 69070 + }, + { + "epoch": 0.2790919411595971, + "grad_norm": 1163.119873046875, + "learning_rate": 4.856514851496647e-06, + "loss": 55.6151, + "step": 69080 + }, + { + "epoch": 0.27913234242496476, + "grad_norm": 999.8130493164062, + "learning_rate": 4.8551050710032625e-06, + "loss": 36.9399, + "step": 69090 + }, + { + "epoch": 0.2791727436903324, + "grad_norm": 463.41046142578125, + "learning_rate": 4.853695302038504e-06, + "loss": 47.6074, + "step": 69100 + }, + { + "epoch": 0.2792131449557, + "grad_norm": 614.2119140625, + "learning_rate": 4.8522855447145385e-06, + "loss": 54.542, + "step": 69110 + }, + { + "epoch": 0.27925354622106763, + "grad_norm": 488.19732666015625, + "learning_rate": 4.850875799143537e-06, + "loss": 38.1682, + "step": 69120 + }, + { + "epoch": 0.27929394748643527, + "grad_norm": 302.4384765625, + "learning_rate": 4.84946606543766e-06, + "loss": 38.7543, + "step": 69130 + }, + { + "epoch": 0.2793343487518029, + "grad_norm": 514.2261352539062, + "learning_rate": 4.848056343709079e-06, + "loss": 41.4556, + "step": 69140 + }, + { + "epoch": 0.27937475001717055, + "grad_norm": 966.3445434570312, + "learning_rate": 4.846646634069957e-06, + "loss": 53.6878, + "step": 69150 + }, + { + "epoch": 0.2794151512825382, + "grad_norm": 1981.818603515625, + "learning_rate": 4.845236936632458e-06, + "loss": 62.9226, + "step": 69160 + }, + { + "epoch": 0.27945555254790583, + "grad_norm": 507.5809631347656, + "learning_rate": 4.843827251508747e-06, + "loss": 63.0771, + "step": 69170 + }, + { + "epoch": 0.2794959538132734, + "grad_norm": 562.9615478515625, + "learning_rate": 4.842417578810984e-06, + "loss": 59.4227, + "step": 69180 + }, + { + "epoch": 0.27953635507864105, + "grad_norm": 459.4322814941406, + "learning_rate": 4.841007918651329e-06, + "loss": 47.2068, + "step": 69190 + }, + { + "epoch": 0.2795767563440087, + "grad_norm": 816.6414184570312, + "learning_rate": 4.839598271141947e-06, + "loss": 45.1818, + "step": 69200 + }, + { + "epoch": 0.27961715760937633, + "grad_norm": 435.2837829589844, + "learning_rate": 4.8381886363949956e-06, + "loss": 50.4104, + "step": 69210 + }, + { + "epoch": 0.27965755887474397, + "grad_norm": 3312.6845703125, + "learning_rate": 4.83677901452263e-06, + "loss": 59.2026, + "step": 69220 + }, + { + "epoch": 0.2796979601401116, + "grad_norm": 560.1244506835938, + "learning_rate": 4.835369405637009e-06, + "loss": 42.2866, + "step": 69230 + }, + { + "epoch": 0.2797383614054792, + "grad_norm": 674.087646484375, + "learning_rate": 4.833959809850288e-06, + "loss": 34.2167, + "step": 69240 + }, + { + "epoch": 0.27977876267084684, + "grad_norm": 978.073974609375, + "learning_rate": 4.832550227274624e-06, + "loss": 44.3173, + "step": 69250 + }, + { + "epoch": 0.2798191639362145, + "grad_norm": 613.9957275390625, + "learning_rate": 4.83114065802217e-06, + "loss": 41.8678, + "step": 69260 + }, + { + "epoch": 0.2798595652015821, + "grad_norm": 899.7896118164062, + "learning_rate": 4.829731102205079e-06, + "loss": 61.8224, + "step": 69270 + }, + { + "epoch": 0.27989996646694976, + "grad_norm": 672.7186279296875, + "learning_rate": 4.828321559935502e-06, + "loss": 52.7221, + "step": 69280 + }, + { + "epoch": 0.2799403677323174, + "grad_norm": 515.7698974609375, + "learning_rate": 4.826912031325592e-06, + "loss": 57.5608, + "step": 69290 + }, + { + "epoch": 0.279980768997685, + "grad_norm": 702.9324951171875, + "learning_rate": 4.825502516487497e-06, + "loss": 36.5247, + "step": 69300 + }, + { + "epoch": 0.2800211702630526, + "grad_norm": 620.7726440429688, + "learning_rate": 4.824093015533365e-06, + "loss": 47.5829, + "step": 69310 + }, + { + "epoch": 0.28006157152842026, + "grad_norm": 100.11849212646484, + "learning_rate": 4.822683528575344e-06, + "loss": 39.2016, + "step": 69320 + }, + { + "epoch": 0.2801019727937879, + "grad_norm": 275.9478759765625, + "learning_rate": 4.8212740557255815e-06, + "loss": 47.3615, + "step": 69330 + }, + { + "epoch": 0.28014237405915554, + "grad_norm": 548.790771484375, + "learning_rate": 4.819864597096222e-06, + "loss": 40.4179, + "step": 69340 + }, + { + "epoch": 0.2801827753245232, + "grad_norm": 593.6949462890625, + "learning_rate": 4.81845515279941e-06, + "loss": 55.0802, + "step": 69350 + }, + { + "epoch": 0.2802231765898908, + "grad_norm": 691.48193359375, + "learning_rate": 4.817045722947288e-06, + "loss": 47.7153, + "step": 69360 + }, + { + "epoch": 0.2802635778552584, + "grad_norm": 613.411376953125, + "learning_rate": 4.815636307651998e-06, + "loss": 40.1139, + "step": 69370 + }, + { + "epoch": 0.28030397912062605, + "grad_norm": 517.376220703125, + "learning_rate": 4.814226907025683e-06, + "loss": 35.4418, + "step": 69380 + }, + { + "epoch": 0.2803443803859937, + "grad_norm": 319.6499328613281, + "learning_rate": 4.812817521180479e-06, + "loss": 45.9838, + "step": 69390 + }, + { + "epoch": 0.2803847816513613, + "grad_norm": 973.4129028320312, + "learning_rate": 4.811408150228526e-06, + "loss": 54.5951, + "step": 69400 + }, + { + "epoch": 0.28042518291672897, + "grad_norm": 366.27972412109375, + "learning_rate": 4.80999879428196e-06, + "loss": 39.5605, + "step": 69410 + }, + { + "epoch": 0.2804655841820966, + "grad_norm": 646.7029418945312, + "learning_rate": 4.808589453452918e-06, + "loss": 42.2973, + "step": 69420 + }, + { + "epoch": 0.2805059854474642, + "grad_norm": 615.7694702148438, + "learning_rate": 4.807180127853535e-06, + "loss": 53.6376, + "step": 69430 + }, + { + "epoch": 0.28054638671283183, + "grad_norm": 758.0111083984375, + "learning_rate": 4.8057708175959446e-06, + "loss": 36.8911, + "step": 69440 + }, + { + "epoch": 0.28058678797819947, + "grad_norm": 847.0225830078125, + "learning_rate": 4.804361522792278e-06, + "loss": 43.2223, + "step": 69450 + }, + { + "epoch": 0.2806271892435671, + "grad_norm": 1029.685791015625, + "learning_rate": 4.8029522435546695e-06, + "loss": 40.3139, + "step": 69460 + }, + { + "epoch": 0.28066759050893475, + "grad_norm": 920.887451171875, + "learning_rate": 4.801542979995245e-06, + "loss": 40.4103, + "step": 69470 + }, + { + "epoch": 0.2807079917743024, + "grad_norm": 520.3526000976562, + "learning_rate": 4.800133732226135e-06, + "loss": 46.0557, + "step": 69480 + }, + { + "epoch": 0.28074839303967003, + "grad_norm": 504.0417175292969, + "learning_rate": 4.798724500359467e-06, + "loss": 32.6267, + "step": 69490 + }, + { + "epoch": 0.2807887943050376, + "grad_norm": 643.9005126953125, + "learning_rate": 4.7973152845073666e-06, + "loss": 88.861, + "step": 69500 + }, + { + "epoch": 0.28082919557040525, + "grad_norm": 539.6871948242188, + "learning_rate": 4.795906084781958e-06, + "loss": 43.0512, + "step": 69510 + }, + { + "epoch": 0.2808695968357729, + "grad_norm": 347.9564208984375, + "learning_rate": 4.7944969012953656e-06, + "loss": 53.9477, + "step": 69520 + }, + { + "epoch": 0.28090999810114053, + "grad_norm": 591.0164794921875, + "learning_rate": 4.793087734159711e-06, + "loss": 49.4503, + "step": 69530 + }, + { + "epoch": 0.2809503993665082, + "grad_norm": 681.713623046875, + "learning_rate": 4.791678583487118e-06, + "loss": 48.3232, + "step": 69540 + }, + { + "epoch": 0.2809908006318758, + "grad_norm": 392.1138610839844, + "learning_rate": 4.790269449389703e-06, + "loss": 43.5036, + "step": 69550 + }, + { + "epoch": 0.2810312018972434, + "grad_norm": 160.6968231201172, + "learning_rate": 4.788860331979586e-06, + "loss": 59.5492, + "step": 69560 + }, + { + "epoch": 0.28107160316261104, + "grad_norm": 382.0798034667969, + "learning_rate": 4.787451231368883e-06, + "loss": 35.5043, + "step": 69570 + }, + { + "epoch": 0.2811120044279787, + "grad_norm": 1062.69970703125, + "learning_rate": 4.786042147669709e-06, + "loss": 46.8691, + "step": 69580 + }, + { + "epoch": 0.2811524056933463, + "grad_norm": 397.5672607421875, + "learning_rate": 4.784633080994181e-06, + "loss": 29.8181, + "step": 69590 + }, + { + "epoch": 0.28119280695871396, + "grad_norm": 1378.0069580078125, + "learning_rate": 4.783224031454409e-06, + "loss": 55.9483, + "step": 69600 + }, + { + "epoch": 0.2812332082240816, + "grad_norm": 929.8328857421875, + "learning_rate": 4.781814999162507e-06, + "loss": 50.3427, + "step": 69610 + }, + { + "epoch": 0.2812736094894492, + "grad_norm": 401.11334228515625, + "learning_rate": 4.780405984230582e-06, + "loss": 37.6489, + "step": 69620 + }, + { + "epoch": 0.2813140107548168, + "grad_norm": 762.3184814453125, + "learning_rate": 4.778996986770747e-06, + "loss": 45.4038, + "step": 69630 + }, + { + "epoch": 0.28135441202018446, + "grad_norm": 762.9951171875, + "learning_rate": 4.777588006895109e-06, + "loss": 53.0929, + "step": 69640 + }, + { + "epoch": 0.2813948132855521, + "grad_norm": 726.9749755859375, + "learning_rate": 4.77617904471577e-06, + "loss": 40.7251, + "step": 69650 + }, + { + "epoch": 0.28143521455091974, + "grad_norm": 578.7279663085938, + "learning_rate": 4.774770100344838e-06, + "loss": 48.0613, + "step": 69660 + }, + { + "epoch": 0.2814756158162874, + "grad_norm": 322.8981628417969, + "learning_rate": 4.7733611738944155e-06, + "loss": 55.718, + "step": 69670 + }, + { + "epoch": 0.281516017081655, + "grad_norm": 658.8078002929688, + "learning_rate": 4.7719522654766044e-06, + "loss": 26.6201, + "step": 69680 + }, + { + "epoch": 0.2815564183470226, + "grad_norm": 553.6875, + "learning_rate": 4.7705433752035045e-06, + "loss": 59.374, + "step": 69690 + }, + { + "epoch": 0.28159681961239025, + "grad_norm": 843.7166748046875, + "learning_rate": 4.7691345031872156e-06, + "loss": 41.141, + "step": 69700 + }, + { + "epoch": 0.2816372208777579, + "grad_norm": 402.2727355957031, + "learning_rate": 4.767725649539833e-06, + "loss": 37.5811, + "step": 69710 + }, + { + "epoch": 0.28167762214312553, + "grad_norm": 288.26202392578125, + "learning_rate": 4.766316814373458e-06, + "loss": 55.0614, + "step": 69720 + }, + { + "epoch": 0.28171802340849317, + "grad_norm": 320.1476745605469, + "learning_rate": 4.76490799780018e-06, + "loss": 39.0787, + "step": 69730 + }, + { + "epoch": 0.2817584246738608, + "grad_norm": 546.8268432617188, + "learning_rate": 4.763499199932093e-06, + "loss": 64.9588, + "step": 69740 + }, + { + "epoch": 0.2817988259392284, + "grad_norm": 594.4404907226562, + "learning_rate": 4.762090420881289e-06, + "loss": 40.4867, + "step": 69750 + }, + { + "epoch": 0.28183922720459603, + "grad_norm": 742.597412109375, + "learning_rate": 4.760681660759859e-06, + "loss": 46.6788, + "step": 69760 + }, + { + "epoch": 0.2818796284699637, + "grad_norm": 452.48114013671875, + "learning_rate": 4.7592729196798905e-06, + "loss": 44.5193, + "step": 69770 + }, + { + "epoch": 0.2819200297353313, + "grad_norm": 762.345703125, + "learning_rate": 4.757864197753472e-06, + "loss": 45.2634, + "step": 69780 + }, + { + "epoch": 0.28196043100069895, + "grad_norm": 723.4268188476562, + "learning_rate": 4.7564554950926876e-06, + "loss": 47.0695, + "step": 69790 + }, + { + "epoch": 0.2820008322660666, + "grad_norm": 645.5535888671875, + "learning_rate": 4.755046811809621e-06, + "loss": 41.8543, + "step": 69800 + }, + { + "epoch": 0.28204123353143423, + "grad_norm": 886.3253784179688, + "learning_rate": 4.7536381480163575e-06, + "loss": 63.1216, + "step": 69810 + }, + { + "epoch": 0.2820816347968018, + "grad_norm": 304.3733825683594, + "learning_rate": 4.752229503824974e-06, + "loss": 27.179, + "step": 69820 + }, + { + "epoch": 0.28212203606216946, + "grad_norm": 712.4049072265625, + "learning_rate": 4.7508208793475515e-06, + "loss": 50.8489, + "step": 69830 + }, + { + "epoch": 0.2821624373275371, + "grad_norm": 627.2713012695312, + "learning_rate": 4.749412274696169e-06, + "loss": 65.211, + "step": 69840 + }, + { + "epoch": 0.28220283859290474, + "grad_norm": 517.7521362304688, + "learning_rate": 4.748003689982901e-06, + "loss": 38.2601, + "step": 69850 + }, + { + "epoch": 0.2822432398582724, + "grad_norm": 901.2315063476562, + "learning_rate": 4.746595125319823e-06, + "loss": 38.5679, + "step": 69860 + }, + { + "epoch": 0.28228364112364, + "grad_norm": 483.2677917480469, + "learning_rate": 4.745186580819008e-06, + "loss": 51.5532, + "step": 69870 + }, + { + "epoch": 0.2823240423890076, + "grad_norm": 455.71343994140625, + "learning_rate": 4.743778056592528e-06, + "loss": 50.9613, + "step": 69880 + }, + { + "epoch": 0.28236444365437524, + "grad_norm": 576.9340209960938, + "learning_rate": 4.742369552752453e-06, + "loss": 61.8175, + "step": 69890 + }, + { + "epoch": 0.2824048449197429, + "grad_norm": 586.7548217773438, + "learning_rate": 4.740961069410848e-06, + "loss": 38.9002, + "step": 69900 + }, + { + "epoch": 0.2824452461851105, + "grad_norm": 273.7797546386719, + "learning_rate": 4.7395526066797835e-06, + "loss": 39.9881, + "step": 69910 + }, + { + "epoch": 0.28248564745047816, + "grad_norm": 414.7781677246094, + "learning_rate": 4.738144164671322e-06, + "loss": 32.6235, + "step": 69920 + }, + { + "epoch": 0.2825260487158458, + "grad_norm": 523.3553466796875, + "learning_rate": 4.736735743497528e-06, + "loss": 35.1946, + "step": 69930 + }, + { + "epoch": 0.2825664499812134, + "grad_norm": 704.5969848632812, + "learning_rate": 4.735327343270461e-06, + "loss": 43.2801, + "step": 69940 + }, + { + "epoch": 0.282606851246581, + "grad_norm": 715.0891723632812, + "learning_rate": 4.733918964102185e-06, + "loss": 47.1886, + "step": 69950 + }, + { + "epoch": 0.28264725251194867, + "grad_norm": 824.6829833984375, + "learning_rate": 4.732510606104754e-06, + "loss": 37.4036, + "step": 69960 + }, + { + "epoch": 0.2826876537773163, + "grad_norm": 535.701904296875, + "learning_rate": 4.731102269390227e-06, + "loss": 66.5, + "step": 69970 + }, + { + "epoch": 0.28272805504268395, + "grad_norm": 602.7445678710938, + "learning_rate": 4.729693954070661e-06, + "loss": 34.4767, + "step": 69980 + }, + { + "epoch": 0.2827684563080516, + "grad_norm": 102.27546691894531, + "learning_rate": 4.728285660258104e-06, + "loss": 25.0424, + "step": 69990 + }, + { + "epoch": 0.2828088575734192, + "grad_norm": 678.1873168945312, + "learning_rate": 4.726877388064609e-06, + "loss": 50.8831, + "step": 70000 + }, + { + "epoch": 0.2828492588387868, + "grad_norm": 401.9143981933594, + "learning_rate": 4.725469137602229e-06, + "loss": 49.9991, + "step": 70010 + }, + { + "epoch": 0.28288966010415445, + "grad_norm": 745.3547973632812, + "learning_rate": 4.724060908983008e-06, + "loss": 39.7754, + "step": 70020 + }, + { + "epoch": 0.2829300613695221, + "grad_norm": 663.1378173828125, + "learning_rate": 4.7226527023189954e-06, + "loss": 42.3307, + "step": 70030 + }, + { + "epoch": 0.28297046263488973, + "grad_norm": 273.6854248046875, + "learning_rate": 4.721244517722233e-06, + "loss": 42.688, + "step": 70040 + }, + { + "epoch": 0.28301086390025737, + "grad_norm": 568.3843994140625, + "learning_rate": 4.719836355304766e-06, + "loss": 56.1069, + "step": 70050 + }, + { + "epoch": 0.283051265165625, + "grad_norm": 619.1213989257812, + "learning_rate": 4.718428215178634e-06, + "loss": 40.7905, + "step": 70060 + }, + { + "epoch": 0.2830916664309926, + "grad_norm": 857.2811889648438, + "learning_rate": 4.717020097455879e-06, + "loss": 54.3016, + "step": 70070 + }, + { + "epoch": 0.28313206769636023, + "grad_norm": 300.54571533203125, + "learning_rate": 4.715612002248533e-06, + "loss": 39.4976, + "step": 70080 + }, + { + "epoch": 0.2831724689617279, + "grad_norm": 1189.8184814453125, + "learning_rate": 4.714203929668637e-06, + "loss": 43.9997, + "step": 70090 + }, + { + "epoch": 0.2832128702270955, + "grad_norm": 450.262939453125, + "learning_rate": 4.712795879828221e-06, + "loss": 42.1777, + "step": 70100 + }, + { + "epoch": 0.28325327149246315, + "grad_norm": 565.9345092773438, + "learning_rate": 4.71138785283932e-06, + "loss": 44.1722, + "step": 70110 + }, + { + "epoch": 0.2832936727578308, + "grad_norm": 372.7854309082031, + "learning_rate": 4.709979848813963e-06, + "loss": 34.5973, + "step": 70120 + }, + { + "epoch": 0.2833340740231984, + "grad_norm": 623.8758544921875, + "learning_rate": 4.7085718678641776e-06, + "loss": 43.4469, + "step": 70130 + }, + { + "epoch": 0.283374475288566, + "grad_norm": 970.5692138671875, + "learning_rate": 4.70716391010199e-06, + "loss": 50.1333, + "step": 70140 + }, + { + "epoch": 0.28341487655393366, + "grad_norm": 793.2423095703125, + "learning_rate": 4.70575597563943e-06, + "loss": 39.2793, + "step": 70150 + }, + { + "epoch": 0.2834552778193013, + "grad_norm": 482.6946716308594, + "learning_rate": 4.704348064588514e-06, + "loss": 41.613, + "step": 70160 + }, + { + "epoch": 0.28349567908466894, + "grad_norm": 557.0717163085938, + "learning_rate": 4.702940177061266e-06, + "loss": 43.9438, + "step": 70170 + }, + { + "epoch": 0.2835360803500366, + "grad_norm": 970.6007690429688, + "learning_rate": 4.7015323131697035e-06, + "loss": 44.938, + "step": 70180 + }, + { + "epoch": 0.2835764816154042, + "grad_norm": 463.7577209472656, + "learning_rate": 4.700124473025846e-06, + "loss": 33.1951, + "step": 70190 + }, + { + "epoch": 0.2836168828807718, + "grad_norm": 236.90660095214844, + "learning_rate": 4.6987166567417085e-06, + "loss": 24.4781, + "step": 70200 + }, + { + "epoch": 0.28365728414613944, + "grad_norm": 600.7906494140625, + "learning_rate": 4.697308864429303e-06, + "loss": 35.3102, + "step": 70210 + }, + { + "epoch": 0.2836976854115071, + "grad_norm": 244.70358276367188, + "learning_rate": 4.695901096200643e-06, + "loss": 35.8941, + "step": 70220 + }, + { + "epoch": 0.2837380866768747, + "grad_norm": 543.962646484375, + "learning_rate": 4.694493352167736e-06, + "loss": 57.4858, + "step": 70230 + }, + { + "epoch": 0.28377848794224236, + "grad_norm": 584.345947265625, + "learning_rate": 4.693085632442593e-06, + "loss": 33.6563, + "step": 70240 + }, + { + "epoch": 0.28381888920761, + "grad_norm": 627.2140502929688, + "learning_rate": 4.691677937137217e-06, + "loss": 47.8382, + "step": 70250 + }, + { + "epoch": 0.2838592904729776, + "grad_norm": 156.89602661132812, + "learning_rate": 4.690270266363612e-06, + "loss": 48.3714, + "step": 70260 + }, + { + "epoch": 0.28389969173834523, + "grad_norm": 591.1685791015625, + "learning_rate": 4.688862620233779e-06, + "loss": 31.6053, + "step": 70270 + }, + { + "epoch": 0.28394009300371287, + "grad_norm": 174.83114624023438, + "learning_rate": 4.687454998859721e-06, + "loss": 27.5564, + "step": 70280 + }, + { + "epoch": 0.2839804942690805, + "grad_norm": 489.0082092285156, + "learning_rate": 4.686047402353433e-06, + "loss": 51.9983, + "step": 70290 + }, + { + "epoch": 0.28402089553444815, + "grad_norm": 3722.665771484375, + "learning_rate": 4.684639830826913e-06, + "loss": 57.5925, + "step": 70300 + }, + { + "epoch": 0.2840612967998158, + "grad_norm": 668.0916137695312, + "learning_rate": 4.683232284392155e-06, + "loss": 53.0558, + "step": 70310 + }, + { + "epoch": 0.28410169806518343, + "grad_norm": 560.221923828125, + "learning_rate": 4.681824763161151e-06, + "loss": 46.5698, + "step": 70320 + }, + { + "epoch": 0.284142099330551, + "grad_norm": 267.6634826660156, + "learning_rate": 4.6804172672458905e-06, + "loss": 37.9598, + "step": 70330 + }, + { + "epoch": 0.28418250059591865, + "grad_norm": 385.9459533691406, + "learning_rate": 4.67900979675836e-06, + "loss": 44.058, + "step": 70340 + }, + { + "epoch": 0.2842229018612863, + "grad_norm": 889.5348510742188, + "learning_rate": 4.677602351810547e-06, + "loss": 49.9251, + "step": 70350 + }, + { + "epoch": 0.28426330312665393, + "grad_norm": 780.3435668945312, + "learning_rate": 4.676194932514435e-06, + "loss": 35.594, + "step": 70360 + }, + { + "epoch": 0.2843037043920216, + "grad_norm": 920.3884887695312, + "learning_rate": 4.674787538982006e-06, + "loss": 66.1759, + "step": 70370 + }, + { + "epoch": 0.2843441056573892, + "grad_norm": 562.5645751953125, + "learning_rate": 4.6733801713252405e-06, + "loss": 33.8848, + "step": 70380 + }, + { + "epoch": 0.2843845069227568, + "grad_norm": 711.589599609375, + "learning_rate": 4.671972829656116e-06, + "loss": 55.0332, + "step": 70390 + }, + { + "epoch": 0.28442490818812444, + "grad_norm": 682.6295776367188, + "learning_rate": 4.670565514086607e-06, + "loss": 47.3515, + "step": 70400 + }, + { + "epoch": 0.2844653094534921, + "grad_norm": 227.3060302734375, + "learning_rate": 4.669158224728691e-06, + "loss": 36.4143, + "step": 70410 + }, + { + "epoch": 0.2845057107188597, + "grad_norm": 2018.75048828125, + "learning_rate": 4.667750961694334e-06, + "loss": 30.8619, + "step": 70420 + }, + { + "epoch": 0.28454611198422736, + "grad_norm": 277.3977355957031, + "learning_rate": 4.666343725095509e-06, + "loss": 41.897, + "step": 70430 + }, + { + "epoch": 0.284586513249595, + "grad_norm": 90.26355743408203, + "learning_rate": 4.6649365150441825e-06, + "loss": 40.3659, + "step": 70440 + }, + { + "epoch": 0.2846269145149626, + "grad_norm": 364.5423583984375, + "learning_rate": 4.66352933165232e-06, + "loss": 37.3343, + "step": 70450 + }, + { + "epoch": 0.2846673157803302, + "grad_norm": 640.2406005859375, + "learning_rate": 4.6621221750318835e-06, + "loss": 51.8274, + "step": 70460 + }, + { + "epoch": 0.28470771704569786, + "grad_norm": 935.7151489257812, + "learning_rate": 4.660715045294834e-06, + "loss": 33.1354, + "step": 70470 + }, + { + "epoch": 0.2847481183110655, + "grad_norm": 367.4470520019531, + "learning_rate": 4.659307942553133e-06, + "loss": 32.8019, + "step": 70480 + }, + { + "epoch": 0.28478851957643314, + "grad_norm": 305.1088562011719, + "learning_rate": 4.657900866918735e-06, + "loss": 50.5888, + "step": 70490 + }, + { + "epoch": 0.2848289208418008, + "grad_norm": 319.4598388671875, + "learning_rate": 4.6564938185035954e-06, + "loss": 42.0434, + "step": 70500 + }, + { + "epoch": 0.2848693221071684, + "grad_norm": 491.0206604003906, + "learning_rate": 4.655086797419666e-06, + "loss": 50.2816, + "step": 70510 + }, + { + "epoch": 0.284909723372536, + "grad_norm": 647.287353515625, + "learning_rate": 4.653679803778897e-06, + "loss": 58.8979, + "step": 70520 + }, + { + "epoch": 0.28495012463790365, + "grad_norm": 276.81549072265625, + "learning_rate": 4.652272837693237e-06, + "loss": 54.2775, + "step": 70530 + }, + { + "epoch": 0.2849905259032713, + "grad_norm": 768.6326904296875, + "learning_rate": 4.650865899274632e-06, + "loss": 61.2162, + "step": 70540 + }, + { + "epoch": 0.2850309271686389, + "grad_norm": 990.27294921875, + "learning_rate": 4.649458988635023e-06, + "loss": 60.081, + "step": 70550 + }, + { + "epoch": 0.28507132843400657, + "grad_norm": 754.9923706054688, + "learning_rate": 4.6480521058863546e-06, + "loss": 43.4132, + "step": 70560 + }, + { + "epoch": 0.2851117296993742, + "grad_norm": 635.0836791992188, + "learning_rate": 4.646645251140564e-06, + "loss": 26.996, + "step": 70570 + }, + { + "epoch": 0.2851521309647418, + "grad_norm": 1053.806640625, + "learning_rate": 4.6452384245095924e-06, + "loss": 30.2651, + "step": 70580 + }, + { + "epoch": 0.28519253223010943, + "grad_norm": 500.2285461425781, + "learning_rate": 4.643831626105369e-06, + "loss": 45.7198, + "step": 70590 + }, + { + "epoch": 0.28523293349547707, + "grad_norm": 801.8435668945312, + "learning_rate": 4.642424856039827e-06, + "loss": 43.3361, + "step": 70600 + }, + { + "epoch": 0.2852733347608447, + "grad_norm": 685.59033203125, + "learning_rate": 4.6410181144249e-06, + "loss": 54.9417, + "step": 70610 + }, + { + "epoch": 0.28531373602621235, + "grad_norm": 471.1318054199219, + "learning_rate": 4.639611401372514e-06, + "loss": 44.3713, + "step": 70620 + }, + { + "epoch": 0.28535413729158, + "grad_norm": 1329.538818359375, + "learning_rate": 4.638204716994594e-06, + "loss": 52.105, + "step": 70630 + }, + { + "epoch": 0.28539453855694763, + "grad_norm": 437.1035461425781, + "learning_rate": 4.636798061403065e-06, + "loss": 38.8913, + "step": 70640 + }, + { + "epoch": 0.2854349398223152, + "grad_norm": 517.3322143554688, + "learning_rate": 4.635391434709847e-06, + "loss": 69.1764, + "step": 70650 + }, + { + "epoch": 0.28547534108768285, + "grad_norm": 502.039306640625, + "learning_rate": 4.6339848370268585e-06, + "loss": 37.4621, + "step": 70660 + }, + { + "epoch": 0.2855157423530505, + "grad_norm": 329.94049072265625, + "learning_rate": 4.63257826846602e-06, + "loss": 41.4155, + "step": 70670 + }, + { + "epoch": 0.28555614361841813, + "grad_norm": 1370.1334228515625, + "learning_rate": 4.6311717291392396e-06, + "loss": 33.5599, + "step": 70680 + }, + { + "epoch": 0.2855965448837858, + "grad_norm": 773.607177734375, + "learning_rate": 4.629765219158433e-06, + "loss": 48.0365, + "step": 70690 + }, + { + "epoch": 0.2856369461491534, + "grad_norm": 497.14300537109375, + "learning_rate": 4.628358738635507e-06, + "loss": 41.8579, + "step": 70700 + }, + { + "epoch": 0.285677347414521, + "grad_norm": 1049.352294921875, + "learning_rate": 4.626952287682372e-06, + "loss": 44.9301, + "step": 70710 + }, + { + "epoch": 0.28571774867988864, + "grad_norm": 446.8216552734375, + "learning_rate": 4.6255458664109306e-06, + "loss": 38.319, + "step": 70720 + }, + { + "epoch": 0.2857581499452563, + "grad_norm": 256.68109130859375, + "learning_rate": 4.624139474933087e-06, + "loss": 41.6502, + "step": 70730 + }, + { + "epoch": 0.2857985512106239, + "grad_norm": 548.9017944335938, + "learning_rate": 4.62273311336074e-06, + "loss": 37.4628, + "step": 70740 + }, + { + "epoch": 0.28583895247599156, + "grad_norm": 468.376708984375, + "learning_rate": 4.62132678180579e-06, + "loss": 51.4482, + "step": 70750 + }, + { + "epoch": 0.2858793537413592, + "grad_norm": 866.9331665039062, + "learning_rate": 4.619920480380127e-06, + "loss": 52.7439, + "step": 70760 + }, + { + "epoch": 0.2859197550067268, + "grad_norm": 532.3231201171875, + "learning_rate": 4.618514209195648e-06, + "loss": 54.0081, + "step": 70770 + }, + { + "epoch": 0.2859601562720944, + "grad_norm": 326.88629150390625, + "learning_rate": 4.617107968364243e-06, + "loss": 78.2161, + "step": 70780 + }, + { + "epoch": 0.28600055753746206, + "grad_norm": 436.6565856933594, + "learning_rate": 4.615701757997799e-06, + "loss": 53.2956, + "step": 70790 + }, + { + "epoch": 0.2860409588028297, + "grad_norm": 329.3333435058594, + "learning_rate": 4.614295578208202e-06, + "loss": 43.7113, + "step": 70800 + }, + { + "epoch": 0.28608136006819734, + "grad_norm": 401.3062438964844, + "learning_rate": 4.612889429107337e-06, + "loss": 61.199, + "step": 70810 + }, + { + "epoch": 0.286121761333565, + "grad_norm": 983.342529296875, + "learning_rate": 4.611483310807082e-06, + "loss": 51.3303, + "step": 70820 + }, + { + "epoch": 0.2861621625989326, + "grad_norm": 571.834716796875, + "learning_rate": 4.610077223419319e-06, + "loss": 37.479, + "step": 70830 + }, + { + "epoch": 0.2862025638643002, + "grad_norm": 622.2101440429688, + "learning_rate": 4.608671167055922e-06, + "loss": 41.2861, + "step": 70840 + }, + { + "epoch": 0.28624296512966785, + "grad_norm": 771.7720336914062, + "learning_rate": 4.607265141828762e-06, + "loss": 46.7046, + "step": 70850 + }, + { + "epoch": 0.2862833663950355, + "grad_norm": 733.5690307617188, + "learning_rate": 4.605859147849713e-06, + "loss": 50.6617, + "step": 70860 + }, + { + "epoch": 0.28632376766040313, + "grad_norm": 564.01513671875, + "learning_rate": 4.604453185230643e-06, + "loss": 35.724, + "step": 70870 + }, + { + "epoch": 0.28636416892577077, + "grad_norm": 383.891845703125, + "learning_rate": 4.603047254083418e-06, + "loss": 26.681, + "step": 70880 + }, + { + "epoch": 0.2864045701911384, + "grad_norm": 464.1899719238281, + "learning_rate": 4.601641354519901e-06, + "loss": 39.4786, + "step": 70890 + }, + { + "epoch": 0.286444971456506, + "grad_norm": 455.7287292480469, + "learning_rate": 4.6002354866519526e-06, + "loss": 53.9625, + "step": 70900 + }, + { + "epoch": 0.28648537272187363, + "grad_norm": 873.5855102539062, + "learning_rate": 4.598829650591432e-06, + "loss": 36.5101, + "step": 70910 + }, + { + "epoch": 0.2865257739872413, + "grad_norm": 599.9312133789062, + "learning_rate": 4.597423846450196e-06, + "loss": 40.5595, + "step": 70920 + }, + { + "epoch": 0.2865661752526089, + "grad_norm": 925.1723022460938, + "learning_rate": 4.596018074340097e-06, + "loss": 55.3253, + "step": 70930 + }, + { + "epoch": 0.28660657651797655, + "grad_norm": 484.8789367675781, + "learning_rate": 4.594612334372985e-06, + "loss": 52.8668, + "step": 70940 + }, + { + "epoch": 0.2866469777833442, + "grad_norm": 645.4853515625, + "learning_rate": 4.59320662666071e-06, + "loss": 42.3059, + "step": 70950 + }, + { + "epoch": 0.28668737904871183, + "grad_norm": 333.7181701660156, + "learning_rate": 4.591800951315116e-06, + "loss": 77.4099, + "step": 70960 + }, + { + "epoch": 0.2867277803140794, + "grad_norm": 438.87396240234375, + "learning_rate": 4.590395308448046e-06, + "loss": 53.1856, + "step": 70970 + }, + { + "epoch": 0.28676818157944706, + "grad_norm": 698.4015502929688, + "learning_rate": 4.588989698171343e-06, + "loss": 54.2699, + "step": 70980 + }, + { + "epoch": 0.2868085828448147, + "grad_norm": 607.1814575195312, + "learning_rate": 4.587584120596842e-06, + "loss": 39.6917, + "step": 70990 + }, + { + "epoch": 0.28684898411018234, + "grad_norm": 1104.1712646484375, + "learning_rate": 4.58617857583638e-06, + "loss": 41.4022, + "step": 71000 + }, + { + "epoch": 0.28688938537555, + "grad_norm": 380.30560302734375, + "learning_rate": 4.5847730640017926e-06, + "loss": 43.8242, + "step": 71010 + }, + { + "epoch": 0.2869297866409176, + "grad_norm": 425.9400329589844, + "learning_rate": 4.5833675852049045e-06, + "loss": 46.4129, + "step": 71020 + }, + { + "epoch": 0.2869701879062852, + "grad_norm": 443.4739074707031, + "learning_rate": 4.5819621395575445e-06, + "loss": 37.8977, + "step": 71030 + }, + { + "epoch": 0.28701058917165284, + "grad_norm": 384.0914001464844, + "learning_rate": 4.5805567271715395e-06, + "loss": 28.2375, + "step": 71040 + }, + { + "epoch": 0.2870509904370205, + "grad_norm": 606.654541015625, + "learning_rate": 4.5791513481587105e-06, + "loss": 40.5883, + "step": 71050 + }, + { + "epoch": 0.2870913917023881, + "grad_norm": 816.3211669921875, + "learning_rate": 4.577746002630878e-06, + "loss": 38.1152, + "step": 71060 + }, + { + "epoch": 0.28713179296775576, + "grad_norm": 1249.8865966796875, + "learning_rate": 4.576340690699857e-06, + "loss": 26.9667, + "step": 71070 + }, + { + "epoch": 0.2871721942331234, + "grad_norm": 846.2115478515625, + "learning_rate": 4.574935412477464e-06, + "loss": 34.3009, + "step": 71080 + }, + { + "epoch": 0.287212595498491, + "grad_norm": 278.2926940917969, + "learning_rate": 4.573530168075508e-06, + "loss": 38.9183, + "step": 71090 + }, + { + "epoch": 0.2872529967638586, + "grad_norm": 823.379150390625, + "learning_rate": 4.572124957605803e-06, + "loss": 47.3542, + "step": 71100 + }, + { + "epoch": 0.28729339802922627, + "grad_norm": 279.0650939941406, + "learning_rate": 4.5707197811801484e-06, + "loss": 36.309, + "step": 71110 + }, + { + "epoch": 0.2873337992945939, + "grad_norm": 255.07122802734375, + "learning_rate": 4.569314638910352e-06, + "loss": 22.5709, + "step": 71120 + }, + { + "epoch": 0.28737420055996155, + "grad_norm": 752.4155883789062, + "learning_rate": 4.56790953090821e-06, + "loss": 65.827, + "step": 71130 + }, + { + "epoch": 0.2874146018253292, + "grad_norm": 731.970458984375, + "learning_rate": 4.566504457285527e-06, + "loss": 46.3308, + "step": 71140 + }, + { + "epoch": 0.2874550030906968, + "grad_norm": 5122.3427734375, + "learning_rate": 4.565099418154093e-06, + "loss": 75.2653, + "step": 71150 + }, + { + "epoch": 0.2874954043560644, + "grad_norm": 815.7695922851562, + "learning_rate": 4.563694413625703e-06, + "loss": 47.1574, + "step": 71160 + }, + { + "epoch": 0.28753580562143205, + "grad_norm": 724.8348999023438, + "learning_rate": 4.5622894438121465e-06, + "loss": 30.6963, + "step": 71170 + }, + { + "epoch": 0.2875762068867997, + "grad_norm": 459.6385498046875, + "learning_rate": 4.560884508825212e-06, + "loss": 49.0495, + "step": 71180 + }, + { + "epoch": 0.28761660815216733, + "grad_norm": 1621.1512451171875, + "learning_rate": 4.559479608776679e-06, + "loss": 74.2594, + "step": 71190 + }, + { + "epoch": 0.28765700941753497, + "grad_norm": 684.2808227539062, + "learning_rate": 4.558074743778333e-06, + "loss": 46.0973, + "step": 71200 + }, + { + "epoch": 0.2876974106829026, + "grad_norm": 600.880859375, + "learning_rate": 4.556669913941951e-06, + "loss": 40.5554, + "step": 71210 + }, + { + "epoch": 0.2877378119482702, + "grad_norm": 1257.7164306640625, + "learning_rate": 4.555265119379308e-06, + "loss": 42.7592, + "step": 71220 + }, + { + "epoch": 0.28777821321363783, + "grad_norm": 493.02850341796875, + "learning_rate": 4.55386036020218e-06, + "loss": 32.3741, + "step": 71230 + }, + { + "epoch": 0.2878186144790055, + "grad_norm": 273.1938171386719, + "learning_rate": 4.552455636522335e-06, + "loss": 54.3847, + "step": 71240 + }, + { + "epoch": 0.2878590157443731, + "grad_norm": 698.703369140625, + "learning_rate": 4.551050948451542e-06, + "loss": 54.6396, + "step": 71250 + }, + { + "epoch": 0.28789941700974075, + "grad_norm": 141.8015594482422, + "learning_rate": 4.549646296101564e-06, + "loss": 53.9065, + "step": 71260 + }, + { + "epoch": 0.2879398182751084, + "grad_norm": 335.04571533203125, + "learning_rate": 4.548241679584165e-06, + "loss": 40.8237, + "step": 71270 + }, + { + "epoch": 0.28798021954047603, + "grad_norm": 483.5780029296875, + "learning_rate": 4.546837099011101e-06, + "loss": 63.2328, + "step": 71280 + }, + { + "epoch": 0.2880206208058436, + "grad_norm": 878.0452270507812, + "learning_rate": 4.545432554494128e-06, + "loss": 39.5612, + "step": 71290 + }, + { + "epoch": 0.28806102207121126, + "grad_norm": 244.78079223632812, + "learning_rate": 4.544028046145002e-06, + "loss": 41.3371, + "step": 71300 + }, + { + "epoch": 0.2881014233365789, + "grad_norm": 905.211669921875, + "learning_rate": 4.542623574075471e-06, + "loss": 42.8943, + "step": 71310 + }, + { + "epoch": 0.28814182460194654, + "grad_norm": 414.203125, + "learning_rate": 4.541219138397283e-06, + "loss": 40.4702, + "step": 71320 + }, + { + "epoch": 0.2881822258673142, + "grad_norm": 591.1458129882812, + "learning_rate": 4.539814739222182e-06, + "loss": 38.5632, + "step": 71330 + }, + { + "epoch": 0.2882226271326818, + "grad_norm": 525.4293212890625, + "learning_rate": 4.538410376661912e-06, + "loss": 35.0831, + "step": 71340 + }, + { + "epoch": 0.2882630283980494, + "grad_norm": 491.466796875, + "learning_rate": 4.537006050828209e-06, + "loss": 49.4348, + "step": 71350 + }, + { + "epoch": 0.28830342966341704, + "grad_norm": 768.8724975585938, + "learning_rate": 4.535601761832811e-06, + "loss": 46.8842, + "step": 71360 + }, + { + "epoch": 0.2883438309287847, + "grad_norm": 522.435791015625, + "learning_rate": 4.534197509787448e-06, + "loss": 40.2206, + "step": 71370 + }, + { + "epoch": 0.2883842321941523, + "grad_norm": 519.8639526367188, + "learning_rate": 4.5327932948038525e-06, + "loss": 34.5767, + "step": 71380 + }, + { + "epoch": 0.28842463345951996, + "grad_norm": 626.932861328125, + "learning_rate": 4.5313891169937495e-06, + "loss": 42.644, + "step": 71390 + }, + { + "epoch": 0.2884650347248876, + "grad_norm": 629.30517578125, + "learning_rate": 4.529984976468864e-06, + "loss": 46.1373, + "step": 71400 + }, + { + "epoch": 0.2885054359902552, + "grad_norm": 1649.0540771484375, + "learning_rate": 4.528580873340916e-06, + "loss": 65.9521, + "step": 71410 + }, + { + "epoch": 0.28854583725562283, + "grad_norm": 472.5841369628906, + "learning_rate": 4.5271768077216245e-06, + "loss": 45.2228, + "step": 71420 + }, + { + "epoch": 0.28858623852099047, + "grad_norm": 546.1096801757812, + "learning_rate": 4.525772779722705e-06, + "loss": 43.5789, + "step": 71430 + }, + { + "epoch": 0.2886266397863581, + "grad_norm": 660.35302734375, + "learning_rate": 4.524368789455872e-06, + "loss": 36.7619, + "step": 71440 + }, + { + "epoch": 0.28866704105172575, + "grad_norm": 575.0551147460938, + "learning_rate": 4.5229648370328276e-06, + "loss": 62.3495, + "step": 71450 + }, + { + "epoch": 0.2887074423170934, + "grad_norm": 673.6522827148438, + "learning_rate": 4.521560922565282e-06, + "loss": 35.792, + "step": 71460 + }, + { + "epoch": 0.28874784358246103, + "grad_norm": 661.4155883789062, + "learning_rate": 4.52015704616494e-06, + "loss": 36.4599, + "step": 71470 + }, + { + "epoch": 0.2887882448478286, + "grad_norm": 745.6897583007812, + "learning_rate": 4.518753207943498e-06, + "loss": 47.3433, + "step": 71480 + }, + { + "epoch": 0.28882864611319625, + "grad_norm": 543.9266967773438, + "learning_rate": 4.517349408012656e-06, + "loss": 42.4632, + "step": 71490 + }, + { + "epoch": 0.2888690473785639, + "grad_norm": 1051.2540283203125, + "learning_rate": 4.515945646484105e-06, + "loss": 37.7069, + "step": 71500 + }, + { + "epoch": 0.28890944864393153, + "grad_norm": 561.8253173828125, + "learning_rate": 4.514541923469538e-06, + "loss": 43.3329, + "step": 71510 + }, + { + "epoch": 0.2889498499092992, + "grad_norm": 700.4878540039062, + "learning_rate": 4.513138239080641e-06, + "loss": 37.6151, + "step": 71520 + }, + { + "epoch": 0.2889902511746668, + "grad_norm": 837.8364868164062, + "learning_rate": 4.511734593429104e-06, + "loss": 57.4711, + "step": 71530 + }, + { + "epoch": 0.2890306524400344, + "grad_norm": 277.2738342285156, + "learning_rate": 4.510330986626602e-06, + "loss": 30.9063, + "step": 71540 + }, + { + "epoch": 0.28907105370540204, + "grad_norm": 717.367431640625, + "learning_rate": 4.5089274187848144e-06, + "loss": 29.4986, + "step": 71550 + }, + { + "epoch": 0.2891114549707697, + "grad_norm": 545.1079711914062, + "learning_rate": 4.507523890015421e-06, + "loss": 42.2378, + "step": 71560 + }, + { + "epoch": 0.2891518562361373, + "grad_norm": 694.6339721679688, + "learning_rate": 4.5061204004300905e-06, + "loss": 57.6911, + "step": 71570 + }, + { + "epoch": 0.28919225750150496, + "grad_norm": 1314.3990478515625, + "learning_rate": 4.504716950140492e-06, + "loss": 41.1658, + "step": 71580 + }, + { + "epoch": 0.2892326587668726, + "grad_norm": 1044.653076171875, + "learning_rate": 4.503313539258294e-06, + "loss": 50.9684, + "step": 71590 + }, + { + "epoch": 0.28927306003224024, + "grad_norm": 604.3131713867188, + "learning_rate": 4.501910167895158e-06, + "loss": 42.9722, + "step": 71600 + }, + { + "epoch": 0.2893134612976078, + "grad_norm": 585.9352416992188, + "learning_rate": 4.500506836162746e-06, + "loss": 27.7333, + "step": 71610 + }, + { + "epoch": 0.28935386256297546, + "grad_norm": 377.50897216796875, + "learning_rate": 4.499103544172711e-06, + "loss": 41.019, + "step": 71620 + }, + { + "epoch": 0.2893942638283431, + "grad_norm": 694.3080444335938, + "learning_rate": 4.497700292036708e-06, + "loss": 52.2126, + "step": 71630 + }, + { + "epoch": 0.28943466509371074, + "grad_norm": 3354.697265625, + "learning_rate": 4.4962970798663865e-06, + "loss": 63.4105, + "step": 71640 + }, + { + "epoch": 0.2894750663590784, + "grad_norm": 435.9419250488281, + "learning_rate": 4.494893907773394e-06, + "loss": 53.5595, + "step": 71650 + }, + { + "epoch": 0.289515467624446, + "grad_norm": 662.7257080078125, + "learning_rate": 4.493490775869377e-06, + "loss": 32.0002, + "step": 71660 + }, + { + "epoch": 0.2895558688898136, + "grad_norm": 323.3299560546875, + "learning_rate": 4.492087684265975e-06, + "loss": 38.7141, + "step": 71670 + }, + { + "epoch": 0.28959627015518125, + "grad_norm": 1028.1826171875, + "learning_rate": 4.490684633074824e-06, + "loss": 41.6504, + "step": 71680 + }, + { + "epoch": 0.2896366714205489, + "grad_norm": 346.1958312988281, + "learning_rate": 4.489281622407559e-06, + "loss": 58.8113, + "step": 71690 + }, + { + "epoch": 0.2896770726859165, + "grad_norm": 248.37896728515625, + "learning_rate": 4.487878652375813e-06, + "loss": 36.2968, + "step": 71700 + }, + { + "epoch": 0.28971747395128417, + "grad_norm": 625.3243408203125, + "learning_rate": 4.486475723091211e-06, + "loss": 57.1991, + "step": 71710 + }, + { + "epoch": 0.2897578752166518, + "grad_norm": 796.6832885742188, + "learning_rate": 4.485072834665379e-06, + "loss": 52.9969, + "step": 71720 + }, + { + "epoch": 0.2897982764820194, + "grad_norm": 711.8778076171875, + "learning_rate": 4.483669987209938e-06, + "loss": 52.1264, + "step": 71730 + }, + { + "epoch": 0.28983867774738703, + "grad_norm": 399.5564880371094, + "learning_rate": 4.482267180836508e-06, + "loss": 49.1718, + "step": 71740 + }, + { + "epoch": 0.28987907901275467, + "grad_norm": 529.950439453125, + "learning_rate": 4.4808644156567e-06, + "loss": 40.0581, + "step": 71750 + }, + { + "epoch": 0.2899194802781223, + "grad_norm": 269.11956787109375, + "learning_rate": 4.479461691782129e-06, + "loss": 25.8915, + "step": 71760 + }, + { + "epoch": 0.28995988154348995, + "grad_norm": 417.9612121582031, + "learning_rate": 4.478059009324403e-06, + "loss": 37.0698, + "step": 71770 + }, + { + "epoch": 0.2900002828088576, + "grad_norm": 345.50360107421875, + "learning_rate": 4.476656368395126e-06, + "loss": 44.7259, + "step": 71780 + }, + { + "epoch": 0.29004068407422523, + "grad_norm": 623.320068359375, + "learning_rate": 4.4752537691059e-06, + "loss": 40.9761, + "step": 71790 + }, + { + "epoch": 0.2900810853395928, + "grad_norm": 718.0443115234375, + "learning_rate": 4.473851211568323e-06, + "loss": 47.5845, + "step": 71800 + }, + { + "epoch": 0.29012148660496045, + "grad_norm": 710.3790893554688, + "learning_rate": 4.472448695893991e-06, + "loss": 37.0356, + "step": 71810 + }, + { + "epoch": 0.2901618878703281, + "grad_norm": 1138.6190185546875, + "learning_rate": 4.471046222194494e-06, + "loss": 44.9854, + "step": 71820 + }, + { + "epoch": 0.29020228913569573, + "grad_norm": 415.5585021972656, + "learning_rate": 4.469643790581422e-06, + "loss": 34.5813, + "step": 71830 + }, + { + "epoch": 0.2902426904010634, + "grad_norm": 749.3125, + "learning_rate": 4.468241401166359e-06, + "loss": 44.3407, + "step": 71840 + }, + { + "epoch": 0.290283091666431, + "grad_norm": 1295.281494140625, + "learning_rate": 4.466839054060888e-06, + "loss": 62.4487, + "step": 71850 + }, + { + "epoch": 0.2903234929317986, + "grad_norm": 762.2303466796875, + "learning_rate": 4.465436749376586e-06, + "loss": 35.7702, + "step": 71860 + }, + { + "epoch": 0.29036389419716624, + "grad_norm": 366.7358703613281, + "learning_rate": 4.464034487225031e-06, + "loss": 63.1885, + "step": 71870 + }, + { + "epoch": 0.2904042954625339, + "grad_norm": 332.02301025390625, + "learning_rate": 4.462632267717789e-06, + "loss": 49.8979, + "step": 71880 + }, + { + "epoch": 0.2904446967279015, + "grad_norm": 388.0854187011719, + "learning_rate": 4.461230090966433e-06, + "loss": 35.094, + "step": 71890 + }, + { + "epoch": 0.29048509799326916, + "grad_norm": 580.3858642578125, + "learning_rate": 4.4598279570825244e-06, + "loss": 41.0283, + "step": 71900 + }, + { + "epoch": 0.2905254992586368, + "grad_norm": 396.2524108886719, + "learning_rate": 4.458425866177628e-06, + "loss": 31.7255, + "step": 71910 + }, + { + "epoch": 0.29056590052400444, + "grad_norm": 331.2504577636719, + "learning_rate": 4.457023818363299e-06, + "loss": 53.3679, + "step": 71920 + }, + { + "epoch": 0.290606301789372, + "grad_norm": 744.08544921875, + "learning_rate": 4.455621813751093e-06, + "loss": 41.8191, + "step": 71930 + }, + { + "epoch": 0.29064670305473966, + "grad_norm": 645.9574584960938, + "learning_rate": 4.45421985245256e-06, + "loss": 42.5509, + "step": 71940 + }, + { + "epoch": 0.2906871043201073, + "grad_norm": 233.99517822265625, + "learning_rate": 4.452817934579249e-06, + "loss": 59.6218, + "step": 71950 + }, + { + "epoch": 0.29072750558547494, + "grad_norm": 557.802490234375, + "learning_rate": 4.451416060242707e-06, + "loss": 42.233, + "step": 71960 + }, + { + "epoch": 0.2907679068508426, + "grad_norm": 1119.917236328125, + "learning_rate": 4.450014229554468e-06, + "loss": 60.5082, + "step": 71970 + }, + { + "epoch": 0.2908083081162102, + "grad_norm": 366.4770812988281, + "learning_rate": 4.448612442626073e-06, + "loss": 34.5282, + "step": 71980 + }, + { + "epoch": 0.2908487093815778, + "grad_norm": 1179.2496337890625, + "learning_rate": 4.447210699569055e-06, + "loss": 61.8394, + "step": 71990 + }, + { + "epoch": 0.29088911064694545, + "grad_norm": 833.4341430664062, + "learning_rate": 4.445809000494945e-06, + "loss": 56.3729, + "step": 72000 + }, + { + "epoch": 0.2909295119123131, + "grad_norm": 417.32440185546875, + "learning_rate": 4.4444073455152705e-06, + "loss": 36.397, + "step": 72010 + }, + { + "epoch": 0.29096991317768073, + "grad_norm": 324.6310119628906, + "learning_rate": 4.443005734741553e-06, + "loss": 46.3118, + "step": 72020 + }, + { + "epoch": 0.29101031444304837, + "grad_norm": 386.0984191894531, + "learning_rate": 4.441604168285313e-06, + "loss": 27.7411, + "step": 72030 + }, + { + "epoch": 0.291050715708416, + "grad_norm": 444.1304626464844, + "learning_rate": 4.440202646258067e-06, + "loss": 47.9088, + "step": 72040 + }, + { + "epoch": 0.2910911169737836, + "grad_norm": 783.587890625, + "learning_rate": 4.4388011687713274e-06, + "loss": 57.362, + "step": 72050 + }, + { + "epoch": 0.29113151823915123, + "grad_norm": 1120.068115234375, + "learning_rate": 4.437399735936603e-06, + "loss": 68.9344, + "step": 72060 + }, + { + "epoch": 0.2911719195045189, + "grad_norm": 496.18292236328125, + "learning_rate": 4.435998347865399e-06, + "loss": 31.082, + "step": 72070 + }, + { + "epoch": 0.2912123207698865, + "grad_norm": 427.5905456542969, + "learning_rate": 4.4345970046692174e-06, + "loss": 39.1392, + "step": 72080 + }, + { + "epoch": 0.29125272203525415, + "grad_norm": 828.7031860351562, + "learning_rate": 4.433195706459558e-06, + "loss": 54.4549, + "step": 72090 + }, + { + "epoch": 0.2912931233006218, + "grad_norm": 974.0640869140625, + "learning_rate": 4.431794453347915e-06, + "loss": 45.0464, + "step": 72100 + }, + { + "epoch": 0.29133352456598943, + "grad_norm": 227.1929931640625, + "learning_rate": 4.430393245445781e-06, + "loss": 38.7168, + "step": 72110 + }, + { + "epoch": 0.291373925831357, + "grad_norm": 416.6019287109375, + "learning_rate": 4.42899208286464e-06, + "loss": 44.9181, + "step": 72120 + }, + { + "epoch": 0.29141432709672466, + "grad_norm": 688.7039184570312, + "learning_rate": 4.427590965715981e-06, + "loss": 43.0894, + "step": 72130 + }, + { + "epoch": 0.2914547283620923, + "grad_norm": 250.0460205078125, + "learning_rate": 4.426189894111281e-06, + "loss": 29.5613, + "step": 72140 + }, + { + "epoch": 0.29149512962745994, + "grad_norm": 1028.0450439453125, + "learning_rate": 4.4247888681620165e-06, + "loss": 59.4425, + "step": 72150 + }, + { + "epoch": 0.2915355308928276, + "grad_norm": 1062.1776123046875, + "learning_rate": 4.423387887979663e-06, + "loss": 45.5027, + "step": 72160 + }, + { + "epoch": 0.2915759321581952, + "grad_norm": 967.5091552734375, + "learning_rate": 4.421986953675687e-06, + "loss": 57.5422, + "step": 72170 + }, + { + "epoch": 0.2916163334235628, + "grad_norm": 281.9029541015625, + "learning_rate": 4.420586065361558e-06, + "loss": 33.8364, + "step": 72180 + }, + { + "epoch": 0.29165673468893044, + "grad_norm": 475.996826171875, + "learning_rate": 4.419185223148737e-06, + "loss": 56.7115, + "step": 72190 + }, + { + "epoch": 0.2916971359542981, + "grad_norm": 900.3701782226562, + "learning_rate": 4.417784427148681e-06, + "loss": 35.606, + "step": 72200 + }, + { + "epoch": 0.2917375372196657, + "grad_norm": 495.081787109375, + "learning_rate": 4.4163836774728466e-06, + "loss": 52.596, + "step": 72210 + }, + { + "epoch": 0.29177793848503336, + "grad_norm": 385.602294921875, + "learning_rate": 4.414982974232686e-06, + "loss": 33.3124, + "step": 72220 + }, + { + "epoch": 0.291818339750401, + "grad_norm": 741.7224731445312, + "learning_rate": 4.413582317539644e-06, + "loss": 35.2805, + "step": 72230 + }, + { + "epoch": 0.29185874101576864, + "grad_norm": 842.2901611328125, + "learning_rate": 4.412181707505167e-06, + "loss": 56.8002, + "step": 72240 + }, + { + "epoch": 0.2918991422811362, + "grad_norm": 537.3816528320312, + "learning_rate": 4.410781144240692e-06, + "loss": 48.8103, + "step": 72250 + }, + { + "epoch": 0.29193954354650387, + "grad_norm": 677.678466796875, + "learning_rate": 4.409380627857658e-06, + "loss": 70.5054, + "step": 72260 + }, + { + "epoch": 0.2919799448118715, + "grad_norm": 445.90057373046875, + "learning_rate": 4.4079801584674955e-06, + "loss": 39.0066, + "step": 72270 + }, + { + "epoch": 0.29202034607723915, + "grad_norm": 579.4309692382812, + "learning_rate": 4.406579736181636e-06, + "loss": 47.5335, + "step": 72280 + }, + { + "epoch": 0.2920607473426068, + "grad_norm": 373.58270263671875, + "learning_rate": 4.405179361111503e-06, + "loss": 46.5012, + "step": 72290 + }, + { + "epoch": 0.2921011486079744, + "grad_norm": 292.4507141113281, + "learning_rate": 4.403779033368521e-06, + "loss": 39.3859, + "step": 72300 + }, + { + "epoch": 0.292141549873342, + "grad_norm": 446.4667663574219, + "learning_rate": 4.402378753064102e-06, + "loss": 59.9139, + "step": 72310 + }, + { + "epoch": 0.29218195113870965, + "grad_norm": 756.2067260742188, + "learning_rate": 4.400978520309663e-06, + "loss": 40.9019, + "step": 72320 + }, + { + "epoch": 0.2922223524040773, + "grad_norm": 707.1049194335938, + "learning_rate": 4.399578335216615e-06, + "loss": 59.0028, + "step": 72330 + }, + { + "epoch": 0.29226275366944493, + "grad_norm": 583.9354248046875, + "learning_rate": 4.3981781978963625e-06, + "loss": 35.8286, + "step": 72340 + }, + { + "epoch": 0.29230315493481257, + "grad_norm": 637.59130859375, + "learning_rate": 4.39677810846031e-06, + "loss": 63.2968, + "step": 72350 + }, + { + "epoch": 0.2923435562001802, + "grad_norm": 811.93408203125, + "learning_rate": 4.395378067019854e-06, + "loss": 47.6136, + "step": 72360 + }, + { + "epoch": 0.2923839574655478, + "grad_norm": 1073.439697265625, + "learning_rate": 4.39397807368639e-06, + "loss": 57.2526, + "step": 72370 + }, + { + "epoch": 0.29242435873091543, + "grad_norm": 374.0229797363281, + "learning_rate": 4.39257812857131e-06, + "loss": 41.0583, + "step": 72380 + }, + { + "epoch": 0.2924647599962831, + "grad_norm": 757.6024780273438, + "learning_rate": 4.391178231786003e-06, + "loss": 38.2484, + "step": 72390 + }, + { + "epoch": 0.2925051612616507, + "grad_norm": 797.3472290039062, + "learning_rate": 4.389778383441847e-06, + "loss": 38.5529, + "step": 72400 + }, + { + "epoch": 0.29254556252701835, + "grad_norm": 927.91796875, + "learning_rate": 4.388378583650225e-06, + "loss": 38.3963, + "step": 72410 + }, + { + "epoch": 0.292585963792386, + "grad_norm": 467.8027648925781, + "learning_rate": 4.386978832522512e-06, + "loss": 37.6053, + "step": 72420 + }, + { + "epoch": 0.29262636505775363, + "grad_norm": 491.3323059082031, + "learning_rate": 4.38557913017008e-06, + "loss": 55.3208, + "step": 72430 + }, + { + "epoch": 0.2926667663231212, + "grad_norm": 931.24609375, + "learning_rate": 4.384179476704297e-06, + "loss": 49.4638, + "step": 72440 + }, + { + "epoch": 0.29270716758848886, + "grad_norm": 854.8884887695312, + "learning_rate": 4.382779872236527e-06, + "loss": 44.3615, + "step": 72450 + }, + { + "epoch": 0.2927475688538565, + "grad_norm": 498.6365966796875, + "learning_rate": 4.3813803168781295e-06, + "loss": 39.2645, + "step": 72460 + }, + { + "epoch": 0.29278797011922414, + "grad_norm": 658.3452758789062, + "learning_rate": 4.379980810740463e-06, + "loss": 41.7759, + "step": 72470 + }, + { + "epoch": 0.2928283713845918, + "grad_norm": 596.740478515625, + "learning_rate": 4.378581353934876e-06, + "loss": 49.1443, + "step": 72480 + }, + { + "epoch": 0.2928687726499594, + "grad_norm": 496.052734375, + "learning_rate": 4.3771819465727185e-06, + "loss": 40.2331, + "step": 72490 + }, + { + "epoch": 0.292909173915327, + "grad_norm": 340.43646240234375, + "learning_rate": 4.3757825887653345e-06, + "loss": 38.848, + "step": 72500 + }, + { + "epoch": 0.29294957518069464, + "grad_norm": 613.5763549804688, + "learning_rate": 4.374383280624066e-06, + "loss": 36.6635, + "step": 72510 + }, + { + "epoch": 0.2929899764460623, + "grad_norm": 450.40399169921875, + "learning_rate": 4.372984022260249e-06, + "loss": 37.3646, + "step": 72520 + }, + { + "epoch": 0.2930303777114299, + "grad_norm": 751.0297241210938, + "learning_rate": 4.371584813785216e-06, + "loss": 30.7279, + "step": 72530 + }, + { + "epoch": 0.29307077897679756, + "grad_norm": 347.6400146484375, + "learning_rate": 4.370185655310295e-06, + "loss": 41.535, + "step": 72540 + }, + { + "epoch": 0.2931111802421652, + "grad_norm": 928.6194458007812, + "learning_rate": 4.368786546946811e-06, + "loss": 41.9351, + "step": 72550 + }, + { + "epoch": 0.29315158150753284, + "grad_norm": 542.6160278320312, + "learning_rate": 4.367387488806086e-06, + "loss": 49.6833, + "step": 72560 + }, + { + "epoch": 0.29319198277290043, + "grad_norm": 351.0897216796875, + "learning_rate": 4.365988480999434e-06, + "loss": 32.4432, + "step": 72570 + }, + { + "epoch": 0.29323238403826807, + "grad_norm": 381.7917785644531, + "learning_rate": 4.364589523638168e-06, + "loss": 34.2274, + "step": 72580 + }, + { + "epoch": 0.2932727853036357, + "grad_norm": 1160.76611328125, + "learning_rate": 4.363190616833598e-06, + "loss": 41.1181, + "step": 72590 + }, + { + "epoch": 0.29331318656900335, + "grad_norm": 766.3687744140625, + "learning_rate": 4.361791760697027e-06, + "loss": 42.0144, + "step": 72600 + }, + { + "epoch": 0.293353587834371, + "grad_norm": 619.919677734375, + "learning_rate": 4.360392955339758e-06, + "loss": 31.6507, + "step": 72610 + }, + { + "epoch": 0.29339398909973863, + "grad_norm": 236.89402770996094, + "learning_rate": 4.358994200873085e-06, + "loss": 32.084, + "step": 72620 + }, + { + "epoch": 0.2934343903651062, + "grad_norm": 587.992919921875, + "learning_rate": 4.357595497408303e-06, + "loss": 40.2523, + "step": 72630 + }, + { + "epoch": 0.29347479163047385, + "grad_norm": 640.2550048828125, + "learning_rate": 4.356196845056699e-06, + "loss": 61.5953, + "step": 72640 + }, + { + "epoch": 0.2935151928958415, + "grad_norm": 653.5460205078125, + "learning_rate": 4.3547982439295576e-06, + "loss": 60.019, + "step": 72650 + }, + { + "epoch": 0.29355559416120913, + "grad_norm": 462.1523132324219, + "learning_rate": 4.353399694138158e-06, + "loss": 41.3274, + "step": 72660 + }, + { + "epoch": 0.2935959954265768, + "grad_norm": 541.9874877929688, + "learning_rate": 4.352001195793778e-06, + "loss": 48.6624, + "step": 72670 + }, + { + "epoch": 0.2936363966919444, + "grad_norm": 190.7097625732422, + "learning_rate": 4.350602749007688e-06, + "loss": 34.3381, + "step": 72680 + }, + { + "epoch": 0.293676797957312, + "grad_norm": 630.536376953125, + "learning_rate": 4.349204353891158e-06, + "loss": 39.0707, + "step": 72690 + }, + { + "epoch": 0.29371719922267964, + "grad_norm": 421.9498596191406, + "learning_rate": 4.347806010555448e-06, + "loss": 34.1856, + "step": 72700 + }, + { + "epoch": 0.2937576004880473, + "grad_norm": 679.4033203125, + "learning_rate": 4.346407719111823e-06, + "loss": 49.3176, + "step": 72710 + }, + { + "epoch": 0.2937980017534149, + "grad_norm": 749.3023681640625, + "learning_rate": 4.3450094796715354e-06, + "loss": 66.0755, + "step": 72720 + }, + { + "epoch": 0.29383840301878256, + "grad_norm": 614.3016357421875, + "learning_rate": 4.343611292345839e-06, + "loss": 49.1222, + "step": 72730 + }, + { + "epoch": 0.2938788042841502, + "grad_norm": 644.041748046875, + "learning_rate": 4.342213157245979e-06, + "loss": 40.4218, + "step": 72740 + }, + { + "epoch": 0.29391920554951784, + "grad_norm": 681.5894775390625, + "learning_rate": 4.340815074483199e-06, + "loss": 56.7761, + "step": 72750 + }, + { + "epoch": 0.2939596068148854, + "grad_norm": 474.3656311035156, + "learning_rate": 4.339417044168738e-06, + "loss": 49.5246, + "step": 72760 + }, + { + "epoch": 0.29400000808025306, + "grad_norm": 701.384033203125, + "learning_rate": 4.338019066413832e-06, + "loss": 47.6203, + "step": 72770 + }, + { + "epoch": 0.2940404093456207, + "grad_norm": 322.1119384765625, + "learning_rate": 4.33662114132971e-06, + "loss": 37.8673, + "step": 72780 + }, + { + "epoch": 0.29408081061098834, + "grad_norm": 406.5722351074219, + "learning_rate": 4.335223269027599e-06, + "loss": 44.9244, + "step": 72790 + }, + { + "epoch": 0.294121211876356, + "grad_norm": 520.870361328125, + "learning_rate": 4.333825449618721e-06, + "loss": 48.5541, + "step": 72800 + }, + { + "epoch": 0.2941616131417236, + "grad_norm": 768.7017822265625, + "learning_rate": 4.332427683214295e-06, + "loss": 41.3215, + "step": 72810 + }, + { + "epoch": 0.2942020144070912, + "grad_norm": 882.6221313476562, + "learning_rate": 4.331029969925538e-06, + "loss": 39.3176, + "step": 72820 + }, + { + "epoch": 0.29424241567245885, + "grad_norm": 4053.163818359375, + "learning_rate": 4.329632309863652e-06, + "loss": 61.8202, + "step": 72830 + }, + { + "epoch": 0.2942828169378265, + "grad_norm": 517.92724609375, + "learning_rate": 4.328234703139847e-06, + "loss": 43.2393, + "step": 72840 + }, + { + "epoch": 0.2943232182031941, + "grad_norm": 284.5147399902344, + "learning_rate": 4.326837149865325e-06, + "loss": 27.8085, + "step": 72850 + }, + { + "epoch": 0.29436361946856177, + "grad_norm": 856.0078125, + "learning_rate": 4.325439650151281e-06, + "loss": 57.4546, + "step": 72860 + }, + { + "epoch": 0.2944040207339294, + "grad_norm": 613.6627807617188, + "learning_rate": 4.324042204108908e-06, + "loss": 44.772, + "step": 72870 + }, + { + "epoch": 0.29444442199929705, + "grad_norm": 905.0805053710938, + "learning_rate": 4.322644811849395e-06, + "loss": 47.959, + "step": 72880 + }, + { + "epoch": 0.29448482326466463, + "grad_norm": 596.2250366210938, + "learning_rate": 4.321247473483924e-06, + "loss": 29.8382, + "step": 72890 + }, + { + "epoch": 0.29452522453003227, + "grad_norm": 327.65338134765625, + "learning_rate": 4.319850189123681e-06, + "loss": 47.0369, + "step": 72900 + }, + { + "epoch": 0.2945656257953999, + "grad_norm": 716.1499633789062, + "learning_rate": 4.3184529588798335e-06, + "loss": 46.2548, + "step": 72910 + }, + { + "epoch": 0.29460602706076755, + "grad_norm": 270.2619934082031, + "learning_rate": 4.3170557828635565e-06, + "loss": 53.3165, + "step": 72920 + }, + { + "epoch": 0.2946464283261352, + "grad_norm": 517.9827880859375, + "learning_rate": 4.315658661186016e-06, + "loss": 37.7342, + "step": 72930 + }, + { + "epoch": 0.29468682959150283, + "grad_norm": 508.7879638671875, + "learning_rate": 4.314261593958376e-06, + "loss": 30.2865, + "step": 72940 + }, + { + "epoch": 0.2947272308568704, + "grad_norm": 840.9309692382812, + "learning_rate": 4.3128645812917935e-06, + "loss": 43.7234, + "step": 72950 + }, + { + "epoch": 0.29476763212223805, + "grad_norm": 649.1232299804688, + "learning_rate": 4.311467623297423e-06, + "loss": 43.8122, + "step": 72960 + }, + { + "epoch": 0.2948080333876057, + "grad_norm": 886.0907592773438, + "learning_rate": 4.310070720086414e-06, + "loss": 51.4335, + "step": 72970 + }, + { + "epoch": 0.29484843465297333, + "grad_norm": 1010.0867919921875, + "learning_rate": 4.30867387176991e-06, + "loss": 54.0462, + "step": 72980 + }, + { + "epoch": 0.294888835918341, + "grad_norm": 563.8475341796875, + "learning_rate": 4.307277078459057e-06, + "loss": 33.6128, + "step": 72990 + }, + { + "epoch": 0.2949292371837086, + "grad_norm": 507.4144287109375, + "learning_rate": 4.305880340264985e-06, + "loss": 41.6365, + "step": 73000 + }, + { + "epoch": 0.2949696384490762, + "grad_norm": 901.0919189453125, + "learning_rate": 4.3044836572988285e-06, + "loss": 53.9174, + "step": 73010 + }, + { + "epoch": 0.29501003971444384, + "grad_norm": 384.7436828613281, + "learning_rate": 4.3030870296717155e-06, + "loss": 52.3031, + "step": 73020 + }, + { + "epoch": 0.2950504409798115, + "grad_norm": 578.6005249023438, + "learning_rate": 4.301690457494769e-06, + "loss": 40.9771, + "step": 73030 + }, + { + "epoch": 0.2950908422451791, + "grad_norm": 544.58642578125, + "learning_rate": 4.300293940879108e-06, + "loss": 42.7261, + "step": 73040 + }, + { + "epoch": 0.29513124351054676, + "grad_norm": 279.6160583496094, + "learning_rate": 4.298897479935847e-06, + "loss": 66.8447, + "step": 73050 + }, + { + "epoch": 0.2951716447759144, + "grad_norm": 217.38037109375, + "learning_rate": 4.297501074776097e-06, + "loss": 33.5354, + "step": 73060 + }, + { + "epoch": 0.29521204604128204, + "grad_norm": 0.0, + "learning_rate": 4.296104725510961e-06, + "loss": 49.2657, + "step": 73070 + }, + { + "epoch": 0.2952524473066496, + "grad_norm": 860.3460693359375, + "learning_rate": 4.294708432251544e-06, + "loss": 50.6563, + "step": 73080 + }, + { + "epoch": 0.29529284857201726, + "grad_norm": 500.3200988769531, + "learning_rate": 4.293312195108938e-06, + "loss": 35.9054, + "step": 73090 + }, + { + "epoch": 0.2953332498373849, + "grad_norm": 642.476318359375, + "learning_rate": 4.291916014194238e-06, + "loss": 42.9805, + "step": 73100 + }, + { + "epoch": 0.29537365110275254, + "grad_norm": 453.5120544433594, + "learning_rate": 4.290519889618531e-06, + "loss": 44.4742, + "step": 73110 + }, + { + "epoch": 0.2954140523681202, + "grad_norm": 604.7741088867188, + "learning_rate": 4.2891238214928995e-06, + "loss": 49.2943, + "step": 73120 + }, + { + "epoch": 0.2954544536334878, + "grad_norm": 632.0167846679688, + "learning_rate": 4.287727809928423e-06, + "loss": 48.4525, + "step": 73130 + }, + { + "epoch": 0.2954948548988554, + "grad_norm": 1167.5364990234375, + "learning_rate": 4.286331855036177e-06, + "loss": 40.5812, + "step": 73140 + }, + { + "epoch": 0.29553525616422305, + "grad_norm": 569.1380004882812, + "learning_rate": 4.284935956927229e-06, + "loss": 38.5044, + "step": 73150 + }, + { + "epoch": 0.2955756574295907, + "grad_norm": 628.6519775390625, + "learning_rate": 4.283540115712647e-06, + "loss": 46.5134, + "step": 73160 + }, + { + "epoch": 0.29561605869495833, + "grad_norm": 384.6105041503906, + "learning_rate": 4.282144331503488e-06, + "loss": 59.5178, + "step": 73170 + }, + { + "epoch": 0.29565645996032597, + "grad_norm": 526.2188720703125, + "learning_rate": 4.280748604410811e-06, + "loss": 81.0941, + "step": 73180 + }, + { + "epoch": 0.2956968612256936, + "grad_norm": 550.6795654296875, + "learning_rate": 4.279352934545666e-06, + "loss": 34.5867, + "step": 73190 + }, + { + "epoch": 0.2957372624910612, + "grad_norm": 1194.502197265625, + "learning_rate": 4.277957322019101e-06, + "loss": 61.5651, + "step": 73200 + }, + { + "epoch": 0.29577766375642883, + "grad_norm": 566.8056640625, + "learning_rate": 4.276561766942158e-06, + "loss": 40.1345, + "step": 73210 + }, + { + "epoch": 0.2958180650217965, + "grad_norm": 623.376953125, + "learning_rate": 4.275166269425874e-06, + "loss": 34.6147, + "step": 73220 + }, + { + "epoch": 0.2958584662871641, + "grad_norm": 596.8927001953125, + "learning_rate": 4.273770829581285e-06, + "loss": 62.5187, + "step": 73230 + }, + { + "epoch": 0.29589886755253175, + "grad_norm": 770.3164672851562, + "learning_rate": 4.272375447519418e-06, + "loss": 54.9159, + "step": 73240 + }, + { + "epoch": 0.2959392688178994, + "grad_norm": 425.0506286621094, + "learning_rate": 4.270980123351299e-06, + "loss": 54.7439, + "step": 73250 + }, + { + "epoch": 0.29597967008326703, + "grad_norm": 690.937255859375, + "learning_rate": 4.269584857187942e-06, + "loss": 38.0473, + "step": 73260 + }, + { + "epoch": 0.2960200713486346, + "grad_norm": 606.042236328125, + "learning_rate": 4.268189649140369e-06, + "loss": 40.5556, + "step": 73270 + }, + { + "epoch": 0.29606047261400226, + "grad_norm": 1045.22119140625, + "learning_rate": 4.266794499319585e-06, + "loss": 54.4675, + "step": 73280 + }, + { + "epoch": 0.2961008738793699, + "grad_norm": 514.9558715820312, + "learning_rate": 4.265399407836598e-06, + "loss": 47.2978, + "step": 73290 + }, + { + "epoch": 0.29614127514473754, + "grad_norm": 976.2498779296875, + "learning_rate": 4.26400437480241e-06, + "loss": 41.2603, + "step": 73300 + }, + { + "epoch": 0.2961816764101052, + "grad_norm": 423.5660705566406, + "learning_rate": 4.262609400328015e-06, + "loss": 43.1175, + "step": 73310 + }, + { + "epoch": 0.2962220776754728, + "grad_norm": 505.2185974121094, + "learning_rate": 4.2612144845244044e-06, + "loss": 31.8775, + "step": 73320 + }, + { + "epoch": 0.2962624789408404, + "grad_norm": 577.039794921875, + "learning_rate": 4.259819627502571e-06, + "loss": 40.0951, + "step": 73330 + }, + { + "epoch": 0.29630288020620804, + "grad_norm": 580.8576049804688, + "learning_rate": 4.258424829373491e-06, + "loss": 63.3412, + "step": 73340 + }, + { + "epoch": 0.2963432814715757, + "grad_norm": 413.0155944824219, + "learning_rate": 4.257030090248142e-06, + "loss": 31.0133, + "step": 73350 + }, + { + "epoch": 0.2963836827369433, + "grad_norm": 473.4555358886719, + "learning_rate": 4.2556354102374994e-06, + "loss": 45.6765, + "step": 73360 + }, + { + "epoch": 0.29642408400231096, + "grad_norm": 777.5567626953125, + "learning_rate": 4.254240789452532e-06, + "loss": 42.2331, + "step": 73370 + }, + { + "epoch": 0.2964644852676786, + "grad_norm": 639.9664306640625, + "learning_rate": 4.252846228004203e-06, + "loss": 77.8432, + "step": 73380 + }, + { + "epoch": 0.29650488653304624, + "grad_norm": 884.723388671875, + "learning_rate": 4.25145172600347e-06, + "loss": 46.5431, + "step": 73390 + }, + { + "epoch": 0.2965452877984138, + "grad_norm": 92.27436828613281, + "learning_rate": 4.2500572835612876e-06, + "loss": 31.1916, + "step": 73400 + }, + { + "epoch": 0.29658568906378147, + "grad_norm": 469.61456298828125, + "learning_rate": 4.248662900788605e-06, + "loss": 36.7089, + "step": 73410 + }, + { + "epoch": 0.2966260903291491, + "grad_norm": 654.217041015625, + "learning_rate": 4.247268577796368e-06, + "loss": 41.0459, + "step": 73420 + }, + { + "epoch": 0.29666649159451675, + "grad_norm": 433.7368469238281, + "learning_rate": 4.245874314695516e-06, + "loss": 35.8187, + "step": 73430 + }, + { + "epoch": 0.2967068928598844, + "grad_norm": 413.3462219238281, + "learning_rate": 4.244480111596984e-06, + "loss": 37.6828, + "step": 73440 + }, + { + "epoch": 0.296747294125252, + "grad_norm": 837.417236328125, + "learning_rate": 4.2430859686117e-06, + "loss": 44.1146, + "step": 73450 + }, + { + "epoch": 0.2967876953906196, + "grad_norm": 599.1912841796875, + "learning_rate": 4.241691885850593e-06, + "loss": 42.8803, + "step": 73460 + }, + { + "epoch": 0.29682809665598725, + "grad_norm": 382.2321472167969, + "learning_rate": 4.240297863424582e-06, + "loss": 36.6295, + "step": 73470 + }, + { + "epoch": 0.2968684979213549, + "grad_norm": 680.3961791992188, + "learning_rate": 4.2389039014445846e-06, + "loss": 52.8627, + "step": 73480 + }, + { + "epoch": 0.29690889918672253, + "grad_norm": 654.2417602539062, + "learning_rate": 4.23751000002151e-06, + "loss": 37.8739, + "step": 73490 + }, + { + "epoch": 0.29694930045209017, + "grad_norm": 235.81236267089844, + "learning_rate": 4.2361161592662655e-06, + "loss": 28.3701, + "step": 73500 + }, + { + "epoch": 0.2969897017174578, + "grad_norm": 1637.2437744140625, + "learning_rate": 4.234722379289753e-06, + "loss": 49.2392, + "step": 73510 + }, + { + "epoch": 0.2970301029828254, + "grad_norm": 315.6921081542969, + "learning_rate": 4.233328660202869e-06, + "loss": 34.3896, + "step": 73520 + }, + { + "epoch": 0.29707050424819303, + "grad_norm": 500.3575134277344, + "learning_rate": 4.231935002116504e-06, + "loss": 49.8805, + "step": 73530 + }, + { + "epoch": 0.2971109055135607, + "grad_norm": 137.28250122070312, + "learning_rate": 4.230541405141546e-06, + "loss": 36.5432, + "step": 73540 + }, + { + "epoch": 0.2971513067789283, + "grad_norm": 558.1651000976562, + "learning_rate": 4.229147869388875e-06, + "loss": 44.4456, + "step": 73550 + }, + { + "epoch": 0.29719170804429595, + "grad_norm": 243.798828125, + "learning_rate": 4.227754394969373e-06, + "loss": 45.8601, + "step": 73560 + }, + { + "epoch": 0.2972321093096636, + "grad_norm": 1039.82177734375, + "learning_rate": 4.226360981993909e-06, + "loss": 57.3914, + "step": 73570 + }, + { + "epoch": 0.29727251057503123, + "grad_norm": 504.4435729980469, + "learning_rate": 4.224967630573351e-06, + "loss": 51.3584, + "step": 73580 + }, + { + "epoch": 0.2973129118403988, + "grad_norm": 357.03570556640625, + "learning_rate": 4.2235743408185635e-06, + "loss": 55.9675, + "step": 73590 + }, + { + "epoch": 0.29735331310576646, + "grad_norm": 398.0602722167969, + "learning_rate": 4.222181112840401e-06, + "loss": 47.9505, + "step": 73600 + }, + { + "epoch": 0.2973937143711341, + "grad_norm": 774.7249145507812, + "learning_rate": 4.220787946749717e-06, + "loss": 45.4625, + "step": 73610 + }, + { + "epoch": 0.29743411563650174, + "grad_norm": 754.9330444335938, + "learning_rate": 4.219394842657361e-06, + "loss": 42.2232, + "step": 73620 + }, + { + "epoch": 0.2974745169018694, + "grad_norm": 583.9215698242188, + "learning_rate": 4.218001800674174e-06, + "loss": 34.6186, + "step": 73630 + }, + { + "epoch": 0.297514918167237, + "grad_norm": 481.25177001953125, + "learning_rate": 4.216608820910995e-06, + "loss": 42.5184, + "step": 73640 + }, + { + "epoch": 0.2975553194326046, + "grad_norm": 1010.7393188476562, + "learning_rate": 4.2152159034786554e-06, + "loss": 49.1567, + "step": 73650 + }, + { + "epoch": 0.29759572069797224, + "grad_norm": 429.19256591796875, + "learning_rate": 4.213823048487987e-06, + "loss": 45.6626, + "step": 73660 + }, + { + "epoch": 0.2976361219633399, + "grad_norm": 602.9405517578125, + "learning_rate": 4.212430256049809e-06, + "loss": 40.8663, + "step": 73670 + }, + { + "epoch": 0.2976765232287075, + "grad_norm": 341.2919921875, + "learning_rate": 4.2110375262749435e-06, + "loss": 42.1535, + "step": 73680 + }, + { + "epoch": 0.29771692449407516, + "grad_norm": 795.6687622070312, + "learning_rate": 4.209644859274199e-06, + "loss": 60.3106, + "step": 73690 + }, + { + "epoch": 0.2977573257594428, + "grad_norm": 534.8817138671875, + "learning_rate": 4.208252255158387e-06, + "loss": 31.561, + "step": 73700 + }, + { + "epoch": 0.29779772702481044, + "grad_norm": 672.9677734375, + "learning_rate": 4.2068597140383084e-06, + "loss": 38.9706, + "step": 73710 + }, + { + "epoch": 0.29783812829017803, + "grad_norm": 590.738037109375, + "learning_rate": 4.205467236024763e-06, + "loss": 44.2501, + "step": 73720 + }, + { + "epoch": 0.29787852955554567, + "grad_norm": 580.1815185546875, + "learning_rate": 4.204074821228542e-06, + "loss": 50.2648, + "step": 73730 + }, + { + "epoch": 0.2979189308209133, + "grad_norm": 895.4475708007812, + "learning_rate": 4.202682469760436e-06, + "loss": 57.8472, + "step": 73740 + }, + { + "epoch": 0.29795933208628095, + "grad_norm": 537.7899780273438, + "learning_rate": 4.2012901817312255e-06, + "loss": 48.3372, + "step": 73750 + }, + { + "epoch": 0.2979997333516486, + "grad_norm": 437.3421936035156, + "learning_rate": 4.199897957251693e-06, + "loss": 36.2092, + "step": 73760 + }, + { + "epoch": 0.29804013461701623, + "grad_norm": 567.8798828125, + "learning_rate": 4.198505796432605e-06, + "loss": 35.1477, + "step": 73770 + }, + { + "epoch": 0.2980805358823838, + "grad_norm": 838.5132446289062, + "learning_rate": 4.197113699384732e-06, + "loss": 50.7975, + "step": 73780 + }, + { + "epoch": 0.29812093714775145, + "grad_norm": 676.006103515625, + "learning_rate": 4.1957216662188385e-06, + "loss": 47.005, + "step": 73790 + }, + { + "epoch": 0.2981613384131191, + "grad_norm": 473.2902526855469, + "learning_rate": 4.194329697045681e-06, + "loss": 57.9121, + "step": 73800 + }, + { + "epoch": 0.29820173967848673, + "grad_norm": 315.8070983886719, + "learning_rate": 4.19293779197601e-06, + "loss": 37.5221, + "step": 73810 + }, + { + "epoch": 0.2982421409438544, + "grad_norm": 545.0514526367188, + "learning_rate": 4.191545951120577e-06, + "loss": 44.9015, + "step": 73820 + }, + { + "epoch": 0.298282542209222, + "grad_norm": 588.2539672851562, + "learning_rate": 4.190154174590122e-06, + "loss": 35.0877, + "step": 73830 + }, + { + "epoch": 0.2983229434745896, + "grad_norm": 513.2553100585938, + "learning_rate": 4.188762462495381e-06, + "loss": 34.4793, + "step": 73840 + }, + { + "epoch": 0.29836334473995724, + "grad_norm": 1437.9708251953125, + "learning_rate": 4.187370814947091e-06, + "loss": 47.6676, + "step": 73850 + }, + { + "epoch": 0.2984037460053249, + "grad_norm": 2023.705810546875, + "learning_rate": 4.185979232055975e-06, + "loss": 77.7692, + "step": 73860 + }, + { + "epoch": 0.2984441472706925, + "grad_norm": 292.62713623046875, + "learning_rate": 4.184587713932755e-06, + "loss": 38.4317, + "step": 73870 + }, + { + "epoch": 0.29848454853606016, + "grad_norm": 679.334716796875, + "learning_rate": 4.183196260688147e-06, + "loss": 38.1238, + "step": 73880 + }, + { + "epoch": 0.2985249498014278, + "grad_norm": 554.282958984375, + "learning_rate": 4.1818048724328646e-06, + "loss": 41.1552, + "step": 73890 + }, + { + "epoch": 0.29856535106679544, + "grad_norm": 1359.9815673828125, + "learning_rate": 4.180413549277614e-06, + "loss": 42.1656, + "step": 73900 + }, + { + "epoch": 0.298605752332163, + "grad_norm": 544.4028930664062, + "learning_rate": 4.1790222913330955e-06, + "loss": 31.6292, + "step": 73910 + }, + { + "epoch": 0.29864615359753066, + "grad_norm": 779.8333740234375, + "learning_rate": 4.1776310987100054e-06, + "loss": 59.4306, + "step": 73920 + }, + { + "epoch": 0.2986865548628983, + "grad_norm": 299.58441162109375, + "learning_rate": 4.1762399715190366e-06, + "loss": 34.8977, + "step": 73930 + }, + { + "epoch": 0.29872695612826594, + "grad_norm": 1023.2490234375, + "learning_rate": 4.1748489098708715e-06, + "loss": 61.7636, + "step": 73940 + }, + { + "epoch": 0.2987673573936336, + "grad_norm": 496.14874267578125, + "learning_rate": 4.173457913876191e-06, + "loss": 37.2103, + "step": 73950 + }, + { + "epoch": 0.2988077586590012, + "grad_norm": 419.87164306640625, + "learning_rate": 4.172066983645671e-06, + "loss": 26.5351, + "step": 73960 + }, + { + "epoch": 0.2988481599243688, + "grad_norm": 309.26873779296875, + "learning_rate": 4.170676119289982e-06, + "loss": 45.8094, + "step": 73970 + }, + { + "epoch": 0.29888856118973645, + "grad_norm": 278.8150329589844, + "learning_rate": 4.1692853209197865e-06, + "loss": 49.6217, + "step": 73980 + }, + { + "epoch": 0.2989289624551041, + "grad_norm": 314.2029724121094, + "learning_rate": 4.167894588645746e-06, + "loss": 28.5693, + "step": 73990 + }, + { + "epoch": 0.2989693637204717, + "grad_norm": 554.4902954101562, + "learning_rate": 4.166503922578516e-06, + "loss": 39.707, + "step": 74000 + }, + { + "epoch": 0.29900976498583937, + "grad_norm": 635.1714477539062, + "learning_rate": 4.165113322828743e-06, + "loss": 43.1086, + "step": 74010 + }, + { + "epoch": 0.299050166251207, + "grad_norm": 404.27142333984375, + "learning_rate": 4.163722789507071e-06, + "loss": 33.8543, + "step": 74020 + }, + { + "epoch": 0.29909056751657465, + "grad_norm": 376.3638916015625, + "learning_rate": 4.162332322724139e-06, + "loss": 37.309, + "step": 74030 + }, + { + "epoch": 0.29913096878194223, + "grad_norm": 647.9591064453125, + "learning_rate": 4.16094192259058e-06, + "loss": 37.8924, + "step": 74040 + }, + { + "epoch": 0.29917137004730987, + "grad_norm": 824.0821533203125, + "learning_rate": 4.15955158921702e-06, + "loss": 48.8199, + "step": 74050 + }, + { + "epoch": 0.2992117713126775, + "grad_norm": 1072.1903076171875, + "learning_rate": 4.158161322714085e-06, + "loss": 50.3598, + "step": 74060 + }, + { + "epoch": 0.29925217257804515, + "grad_norm": 660.3749389648438, + "learning_rate": 4.1567711231923876e-06, + "loss": 36.4483, + "step": 74070 + }, + { + "epoch": 0.2992925738434128, + "grad_norm": 907.34423828125, + "learning_rate": 4.155380990762542e-06, + "loss": 56.8118, + "step": 74080 + }, + { + "epoch": 0.29933297510878043, + "grad_norm": 1100.1082763671875, + "learning_rate": 4.153990925535157e-06, + "loss": 34.7482, + "step": 74090 + }, + { + "epoch": 0.299373376374148, + "grad_norm": 784.7665405273438, + "learning_rate": 4.15260092762083e-06, + "loss": 64.8724, + "step": 74100 + }, + { + "epoch": 0.29941377763951565, + "grad_norm": 332.3941955566406, + "learning_rate": 4.151210997130159e-06, + "loss": 44.2105, + "step": 74110 + }, + { + "epoch": 0.2994541789048833, + "grad_norm": 1009.3142700195312, + "learning_rate": 4.1498211341737335e-06, + "loss": 68.1092, + "step": 74120 + }, + { + "epoch": 0.29949458017025093, + "grad_norm": 584.8860473632812, + "learning_rate": 4.148431338862138e-06, + "loss": 54.0953, + "step": 74130 + }, + { + "epoch": 0.2995349814356186, + "grad_norm": 696.730224609375, + "learning_rate": 4.147041611305952e-06, + "loss": 40.4448, + "step": 74140 + }, + { + "epoch": 0.2995753827009862, + "grad_norm": 566.9742431640625, + "learning_rate": 4.145651951615752e-06, + "loss": 40.5708, + "step": 74150 + }, + { + "epoch": 0.2996157839663538, + "grad_norm": 766.1707153320312, + "learning_rate": 4.144262359902104e-06, + "loss": 43.4644, + "step": 74160 + }, + { + "epoch": 0.29965618523172144, + "grad_norm": 870.1135864257812, + "learning_rate": 4.142872836275572e-06, + "loss": 50.9456, + "step": 74170 + }, + { + "epoch": 0.2996965864970891, + "grad_norm": 333.26947021484375, + "learning_rate": 4.141483380846716e-06, + "loss": 50.0214, + "step": 74180 + }, + { + "epoch": 0.2997369877624567, + "grad_norm": 222.6185302734375, + "learning_rate": 4.1400939937260894e-06, + "loss": 25.4425, + "step": 74190 + }, + { + "epoch": 0.29977738902782436, + "grad_norm": 474.7249755859375, + "learning_rate": 4.138704675024235e-06, + "loss": 39.1855, + "step": 74200 + }, + { + "epoch": 0.299817790293192, + "grad_norm": 416.831298828125, + "learning_rate": 4.137315424851696e-06, + "loss": 29.6196, + "step": 74210 + }, + { + "epoch": 0.29985819155855964, + "grad_norm": 377.79144287109375, + "learning_rate": 4.1359262433190105e-06, + "loss": 46.4644, + "step": 74220 + }, + { + "epoch": 0.2998985928239272, + "grad_norm": 1291.645263671875, + "learning_rate": 4.134537130536708e-06, + "loss": 46.8967, + "step": 74230 + }, + { + "epoch": 0.29993899408929486, + "grad_norm": 511.9327392578125, + "learning_rate": 4.133148086615314e-06, + "loss": 37.661, + "step": 74240 + }, + { + "epoch": 0.2999793953546625, + "grad_norm": 490.469970703125, + "learning_rate": 4.131759111665349e-06, + "loss": 37.2974, + "step": 74250 + }, + { + "epoch": 0.30001979662003014, + "grad_norm": 643.108154296875, + "learning_rate": 4.130370205797326e-06, + "loss": 36.1692, + "step": 74260 + }, + { + "epoch": 0.3000601978853978, + "grad_norm": 365.9134521484375, + "learning_rate": 4.128981369121754e-06, + "loss": 40.0982, + "step": 74270 + }, + { + "epoch": 0.3001005991507654, + "grad_norm": 735.3226318359375, + "learning_rate": 4.127592601749141e-06, + "loss": 48.7191, + "step": 74280 + }, + { + "epoch": 0.300141000416133, + "grad_norm": 362.46533203125, + "learning_rate": 4.1262039037899775e-06, + "loss": 38.9229, + "step": 74290 + }, + { + "epoch": 0.30018140168150065, + "grad_norm": 391.4541931152344, + "learning_rate": 4.12481527535476e-06, + "loss": 44.5621, + "step": 74300 + }, + { + "epoch": 0.3002218029468683, + "grad_norm": 1557.863525390625, + "learning_rate": 4.123426716553972e-06, + "loss": 61.9819, + "step": 74310 + }, + { + "epoch": 0.30026220421223593, + "grad_norm": 845.4327392578125, + "learning_rate": 4.122038227498101e-06, + "loss": 58.8395, + "step": 74320 + }, + { + "epoch": 0.30030260547760357, + "grad_norm": 483.0872802734375, + "learning_rate": 4.120649808297616e-06, + "loss": 84.1862, + "step": 74330 + }, + { + "epoch": 0.3003430067429712, + "grad_norm": 752.4678344726562, + "learning_rate": 4.119261459062992e-06, + "loss": 49.809, + "step": 74340 + }, + { + "epoch": 0.30038340800833885, + "grad_norm": 527.9071655273438, + "learning_rate": 4.1178731799046915e-06, + "loss": 48.4955, + "step": 74350 + }, + { + "epoch": 0.30042380927370643, + "grad_norm": 1314.4130859375, + "learning_rate": 4.116484970933174e-06, + "loss": 41.3964, + "step": 74360 + }, + { + "epoch": 0.3004642105390741, + "grad_norm": 586.785400390625, + "learning_rate": 4.1150968322588915e-06, + "loss": 56.8744, + "step": 74370 + }, + { + "epoch": 0.3005046118044417, + "grad_norm": 906.0205078125, + "learning_rate": 4.113708763992294e-06, + "loss": 45.647, + "step": 74380 + }, + { + "epoch": 0.30054501306980935, + "grad_norm": 271.83453369140625, + "learning_rate": 4.1123207662438216e-06, + "loss": 53.6612, + "step": 74390 + }, + { + "epoch": 0.300585414335177, + "grad_norm": 772.8023681640625, + "learning_rate": 4.110932839123911e-06, + "loss": 45.7652, + "step": 74400 + }, + { + "epoch": 0.30062581560054463, + "grad_norm": 1087.2738037109375, + "learning_rate": 4.109544982742995e-06, + "loss": 47.2202, + "step": 74410 + }, + { + "epoch": 0.3006662168659122, + "grad_norm": 195.87841796875, + "learning_rate": 4.108157197211499e-06, + "loss": 37.906, + "step": 74420 + }, + { + "epoch": 0.30070661813127986, + "grad_norm": 625.649169921875, + "learning_rate": 4.1067694826398405e-06, + "loss": 57.2702, + "step": 74430 + }, + { + "epoch": 0.3007470193966475, + "grad_norm": 1060.251220703125, + "learning_rate": 4.105381839138436e-06, + "loss": 54.2827, + "step": 74440 + }, + { + "epoch": 0.30078742066201514, + "grad_norm": 236.252197265625, + "learning_rate": 4.103994266817694e-06, + "loss": 29.2483, + "step": 74450 + }, + { + "epoch": 0.3008278219273828, + "grad_norm": 607.3275756835938, + "learning_rate": 4.102606765788014e-06, + "loss": 41.7655, + "step": 74460 + }, + { + "epoch": 0.3008682231927504, + "grad_norm": 469.4609069824219, + "learning_rate": 4.101219336159795e-06, + "loss": 39.0194, + "step": 74470 + }, + { + "epoch": 0.300908624458118, + "grad_norm": 527.9530639648438, + "learning_rate": 4.09983197804343e-06, + "loss": 49.2814, + "step": 74480 + }, + { + "epoch": 0.30094902572348564, + "grad_norm": 707.5580444335938, + "learning_rate": 4.098444691549302e-06, + "loss": 48.481, + "step": 74490 + }, + { + "epoch": 0.3009894269888533, + "grad_norm": 266.0844421386719, + "learning_rate": 4.097057476787792e-06, + "loss": 37.1013, + "step": 74500 + }, + { + "epoch": 0.3010298282542209, + "grad_norm": 565.5172119140625, + "learning_rate": 4.0956703338692755e-06, + "loss": 50.1374, + "step": 74510 + }, + { + "epoch": 0.30107022951958856, + "grad_norm": 756.9215087890625, + "learning_rate": 4.09428326290412e-06, + "loss": 47.4215, + "step": 74520 + }, + { + "epoch": 0.3011106307849562, + "grad_norm": 368.2695617675781, + "learning_rate": 4.092896264002689e-06, + "loss": 66.5132, + "step": 74530 + }, + { + "epoch": 0.30115103205032384, + "grad_norm": 397.1333923339844, + "learning_rate": 4.09150933727534e-06, + "loss": 43.6294, + "step": 74540 + }, + { + "epoch": 0.3011914333156914, + "grad_norm": 443.4328918457031, + "learning_rate": 4.0901224828324225e-06, + "loss": 55.6375, + "step": 74550 + }, + { + "epoch": 0.30123183458105907, + "grad_norm": 1249.0457763671875, + "learning_rate": 4.088735700784283e-06, + "loss": 51.6747, + "step": 74560 + }, + { + "epoch": 0.3012722358464267, + "grad_norm": 779.8118896484375, + "learning_rate": 4.087348991241262e-06, + "loss": 32.0234, + "step": 74570 + }, + { + "epoch": 0.30131263711179435, + "grad_norm": 902.4772338867188, + "learning_rate": 4.0859623543136935e-06, + "loss": 39.6898, + "step": 74580 + }, + { + "epoch": 0.301353038377162, + "grad_norm": 556.6895751953125, + "learning_rate": 4.084575790111905e-06, + "loss": 38.2554, + "step": 74590 + }, + { + "epoch": 0.3013934396425296, + "grad_norm": 1023.3602294921875, + "learning_rate": 4.08318929874622e-06, + "loss": 46.229, + "step": 74600 + }, + { + "epoch": 0.3014338409078972, + "grad_norm": 625.0224609375, + "learning_rate": 4.081802880326955e-06, + "loss": 52.6646, + "step": 74610 + }, + { + "epoch": 0.30147424217326485, + "grad_norm": 291.3082580566406, + "learning_rate": 4.080416534964422e-06, + "loss": 42.6597, + "step": 74620 + }, + { + "epoch": 0.3015146434386325, + "grad_norm": 2209.510009765625, + "learning_rate": 4.079030262768924e-06, + "loss": 50.5197, + "step": 74630 + }, + { + "epoch": 0.30155504470400013, + "grad_norm": 542.4468994140625, + "learning_rate": 4.077644063850761e-06, + "loss": 39.2088, + "step": 74640 + }, + { + "epoch": 0.30159544596936777, + "grad_norm": 584.303466796875, + "learning_rate": 4.076257938320226e-06, + "loss": 46.4614, + "step": 74650 + }, + { + "epoch": 0.3016358472347354, + "grad_norm": 516.731689453125, + "learning_rate": 4.074871886287609e-06, + "loss": 31.4396, + "step": 74660 + }, + { + "epoch": 0.30167624850010305, + "grad_norm": 464.3906555175781, + "learning_rate": 4.073485907863189e-06, + "loss": 59.6245, + "step": 74670 + }, + { + "epoch": 0.30171664976547063, + "grad_norm": 535.5070190429688, + "learning_rate": 4.0721000031572445e-06, + "loss": 42.0167, + "step": 74680 + }, + { + "epoch": 0.3017570510308383, + "grad_norm": 697.2291870117188, + "learning_rate": 4.070714172280043e-06, + "loss": 51.2677, + "step": 74690 + }, + { + "epoch": 0.3017974522962059, + "grad_norm": 596.2420654296875, + "learning_rate": 4.06932841534185e-06, + "loss": 36.7729, + "step": 74700 + }, + { + "epoch": 0.30183785356157355, + "grad_norm": 797.6749267578125, + "learning_rate": 4.067942732452926e-06, + "loss": 30.443, + "step": 74710 + }, + { + "epoch": 0.3018782548269412, + "grad_norm": 984.3765869140625, + "learning_rate": 4.06655712372352e-06, + "loss": 42.8845, + "step": 74720 + }, + { + "epoch": 0.30191865609230883, + "grad_norm": 435.9882507324219, + "learning_rate": 4.065171589263878e-06, + "loss": 41.4888, + "step": 74730 + }, + { + "epoch": 0.3019590573576764, + "grad_norm": 613.9766845703125, + "learning_rate": 4.063786129184243e-06, + "loss": 47.646, + "step": 74740 + }, + { + "epoch": 0.30199945862304406, + "grad_norm": 795.3075561523438, + "learning_rate": 4.06240074359485e-06, + "loss": 64.4297, + "step": 74750 + }, + { + "epoch": 0.3020398598884117, + "grad_norm": 880.7652587890625, + "learning_rate": 4.061015432605927e-06, + "loss": 35.325, + "step": 74760 + }, + { + "epoch": 0.30208026115377934, + "grad_norm": 693.7550048828125, + "learning_rate": 4.059630196327696e-06, + "loss": 57.8272, + "step": 74770 + }, + { + "epoch": 0.302120662419147, + "grad_norm": 653.6392211914062, + "learning_rate": 4.058245034870375e-06, + "loss": 42.8275, + "step": 74780 + }, + { + "epoch": 0.3021610636845146, + "grad_norm": 270.9452819824219, + "learning_rate": 4.056859948344175e-06, + "loss": 31.6941, + "step": 74790 + }, + { + "epoch": 0.3022014649498822, + "grad_norm": 675.5482788085938, + "learning_rate": 4.0554749368593e-06, + "loss": 36.3624, + "step": 74800 + }, + { + "epoch": 0.30224186621524984, + "grad_norm": 372.732177734375, + "learning_rate": 4.054090000525949e-06, + "loss": 39.691, + "step": 74810 + }, + { + "epoch": 0.3022822674806175, + "grad_norm": 543.2750244140625, + "learning_rate": 4.052705139454316e-06, + "loss": 27.9947, + "step": 74820 + }, + { + "epoch": 0.3023226687459851, + "grad_norm": 817.23779296875, + "learning_rate": 4.051320353754586e-06, + "loss": 52.1108, + "step": 74830 + }, + { + "epoch": 0.30236307001135276, + "grad_norm": 704.6561279296875, + "learning_rate": 4.049935643536943e-06, + "loss": 53.9041, + "step": 74840 + }, + { + "epoch": 0.3024034712767204, + "grad_norm": 456.3458251953125, + "learning_rate": 4.048551008911561e-06, + "loss": 33.8443, + "step": 74850 + }, + { + "epoch": 0.30244387254208804, + "grad_norm": 758.6323852539062, + "learning_rate": 4.0471664499886074e-06, + "loss": 58.1973, + "step": 74860 + }, + { + "epoch": 0.30248427380745563, + "grad_norm": 336.8901062011719, + "learning_rate": 4.045781966878247e-06, + "loss": 28.6064, + "step": 74870 + }, + { + "epoch": 0.30252467507282327, + "grad_norm": 528.4429931640625, + "learning_rate": 4.044397559690638e-06, + "loss": 33.9238, + "step": 74880 + }, + { + "epoch": 0.3025650763381909, + "grad_norm": 1521.8575439453125, + "learning_rate": 4.043013228535928e-06, + "loss": 54.3209, + "step": 74890 + }, + { + "epoch": 0.30260547760355855, + "grad_norm": 787.5988159179688, + "learning_rate": 4.041628973524264e-06, + "loss": 36.775, + "step": 74900 + }, + { + "epoch": 0.3026458788689262, + "grad_norm": 830.396240234375, + "learning_rate": 4.040244794765783e-06, + "loss": 47.7584, + "step": 74910 + }, + { + "epoch": 0.30268628013429383, + "grad_norm": 459.3642272949219, + "learning_rate": 4.03886069237062e-06, + "loss": 38.857, + "step": 74920 + }, + { + "epoch": 0.3027266813996614, + "grad_norm": 396.26116943359375, + "learning_rate": 4.037476666448899e-06, + "loss": 40.7984, + "step": 74930 + }, + { + "epoch": 0.30276708266502905, + "grad_norm": 744.2449340820312, + "learning_rate": 4.0360927171107436e-06, + "loss": 69.9115, + "step": 74940 + }, + { + "epoch": 0.3028074839303967, + "grad_norm": 788.3013916015625, + "learning_rate": 4.034708844466267e-06, + "loss": 42.2636, + "step": 74950 + }, + { + "epoch": 0.30284788519576433, + "grad_norm": 576.2045288085938, + "learning_rate": 4.033325048625578e-06, + "loss": 48.013, + "step": 74960 + }, + { + "epoch": 0.302888286461132, + "grad_norm": 430.5680847167969, + "learning_rate": 4.031941329698778e-06, + "loss": 40.4983, + "step": 74970 + }, + { + "epoch": 0.3029286877264996, + "grad_norm": 563.1224365234375, + "learning_rate": 4.030557687795965e-06, + "loss": 33.5722, + "step": 74980 + }, + { + "epoch": 0.30296908899186725, + "grad_norm": 424.4299011230469, + "learning_rate": 4.029174123027226e-06, + "loss": 47.6854, + "step": 74990 + }, + { + "epoch": 0.30300949025723484, + "grad_norm": 1378.790283203125, + "learning_rate": 4.027790635502646e-06, + "loss": 37.4708, + "step": 75000 + }, + { + "epoch": 0.3030498915226025, + "grad_norm": 1003.11865234375, + "learning_rate": 4.026407225332305e-06, + "loss": 71.0551, + "step": 75010 + }, + { + "epoch": 0.3030902927879701, + "grad_norm": 544.5147705078125, + "learning_rate": 4.025023892626272e-06, + "loss": 33.1692, + "step": 75020 + }, + { + "epoch": 0.30313069405333776, + "grad_norm": 786.7648315429688, + "learning_rate": 4.023640637494612e-06, + "loss": 44.8312, + "step": 75030 + }, + { + "epoch": 0.3031710953187054, + "grad_norm": 748.3566284179688, + "learning_rate": 4.022257460047387e-06, + "loss": 35.4565, + "step": 75040 + }, + { + "epoch": 0.30321149658407304, + "grad_norm": 452.1760559082031, + "learning_rate": 4.0208743603946505e-06, + "loss": 42.6222, + "step": 75050 + }, + { + "epoch": 0.3032518978494406, + "grad_norm": 388.5080871582031, + "learning_rate": 4.0194913386464445e-06, + "loss": 59.1521, + "step": 75060 + }, + { + "epoch": 0.30329229911480826, + "grad_norm": 1736.18408203125, + "learning_rate": 4.018108394912814e-06, + "loss": 73.9259, + "step": 75070 + }, + { + "epoch": 0.3033327003801759, + "grad_norm": 324.61883544921875, + "learning_rate": 4.016725529303792e-06, + "loss": 37.6378, + "step": 75080 + }, + { + "epoch": 0.30337310164554354, + "grad_norm": 844.913818359375, + "learning_rate": 4.015342741929407e-06, + "loss": 29.1962, + "step": 75090 + }, + { + "epoch": 0.3034135029109112, + "grad_norm": 572.6766357421875, + "learning_rate": 4.013960032899681e-06, + "loss": 39.9053, + "step": 75100 + }, + { + "epoch": 0.3034539041762788, + "grad_norm": 394.77142333984375, + "learning_rate": 4.012577402324631e-06, + "loss": 47.6606, + "step": 75110 + }, + { + "epoch": 0.3034943054416464, + "grad_norm": 731.8491821289062, + "learning_rate": 4.011194850314263e-06, + "loss": 47.0361, + "step": 75120 + }, + { + "epoch": 0.30353470670701405, + "grad_norm": 450.2945251464844, + "learning_rate": 4.009812376978585e-06, + "loss": 45.0056, + "step": 75130 + }, + { + "epoch": 0.3035751079723817, + "grad_norm": 512.0255126953125, + "learning_rate": 4.0084299824275926e-06, + "loss": 29.1424, + "step": 75140 + }, + { + "epoch": 0.3036155092377493, + "grad_norm": 599.2891845703125, + "learning_rate": 4.007047666771274e-06, + "loss": 47.6622, + "step": 75150 + }, + { + "epoch": 0.30365591050311697, + "grad_norm": 533.6417236328125, + "learning_rate": 4.005665430119615e-06, + "loss": 30.358, + "step": 75160 + }, + { + "epoch": 0.3036963117684846, + "grad_norm": 729.7269897460938, + "learning_rate": 4.0042832725825954e-06, + "loss": 37.5167, + "step": 75170 + }, + { + "epoch": 0.30373671303385225, + "grad_norm": 487.214599609375, + "learning_rate": 4.002901194270186e-06, + "loss": 50.324, + "step": 75180 + }, + { + "epoch": 0.30377711429921983, + "grad_norm": 566.6151123046875, + "learning_rate": 4.001519195292352e-06, + "loss": 41.4339, + "step": 75190 + }, + { + "epoch": 0.30381751556458747, + "grad_norm": 470.7705078125, + "learning_rate": 4.000137275759053e-06, + "loss": 50.9527, + "step": 75200 + }, + { + "epoch": 0.3038579168299551, + "grad_norm": 518.1776733398438, + "learning_rate": 3.9987554357802435e-06, + "loss": 51.7486, + "step": 75210 + }, + { + "epoch": 0.30389831809532275, + "grad_norm": 374.93310546875, + "learning_rate": 3.997373675465869e-06, + "loss": 42.1258, + "step": 75220 + }, + { + "epoch": 0.3039387193606904, + "grad_norm": 643.6387329101562, + "learning_rate": 3.995991994925869e-06, + "loss": 41.6441, + "step": 75230 + }, + { + "epoch": 0.30397912062605803, + "grad_norm": 419.3672790527344, + "learning_rate": 3.994610394270178e-06, + "loss": 52.551, + "step": 75240 + }, + { + "epoch": 0.3040195218914256, + "grad_norm": 828.9144897460938, + "learning_rate": 3.993228873608724e-06, + "loss": 32.2149, + "step": 75250 + }, + { + "epoch": 0.30405992315679325, + "grad_norm": 451.1707458496094, + "learning_rate": 3.991847433051427e-06, + "loss": 43.9339, + "step": 75260 + }, + { + "epoch": 0.3041003244221609, + "grad_norm": 533.591552734375, + "learning_rate": 3.990466072708204e-06, + "loss": 39.6879, + "step": 75270 + }, + { + "epoch": 0.30414072568752853, + "grad_norm": 701.5623779296875, + "learning_rate": 3.989084792688962e-06, + "loss": 39.0662, + "step": 75280 + }, + { + "epoch": 0.3041811269528962, + "grad_norm": 263.5152893066406, + "learning_rate": 3.987703593103604e-06, + "loss": 40.9297, + "step": 75290 + }, + { + "epoch": 0.3042215282182638, + "grad_norm": 351.6845703125, + "learning_rate": 3.986322474062025e-06, + "loss": 26.069, + "step": 75300 + }, + { + "epoch": 0.30426192948363145, + "grad_norm": 593.352783203125, + "learning_rate": 3.9849414356741165e-06, + "loss": 44.6515, + "step": 75310 + }, + { + "epoch": 0.30430233074899904, + "grad_norm": 724.2471313476562, + "learning_rate": 3.9835604780497575e-06, + "loss": 37.1545, + "step": 75320 + }, + { + "epoch": 0.3043427320143667, + "grad_norm": 689.8421020507812, + "learning_rate": 3.982179601298827e-06, + "loss": 41.7073, + "step": 75330 + }, + { + "epoch": 0.3043831332797343, + "grad_norm": 335.9830627441406, + "learning_rate": 3.9807988055311946e-06, + "loss": 44.3201, + "step": 75340 + }, + { + "epoch": 0.30442353454510196, + "grad_norm": 1735.399658203125, + "learning_rate": 3.979418090856723e-06, + "loss": 52.7617, + "step": 75350 + }, + { + "epoch": 0.3044639358104696, + "grad_norm": 764.639892578125, + "learning_rate": 3.978037457385268e-06, + "loss": 44.3283, + "step": 75360 + }, + { + "epoch": 0.30450433707583724, + "grad_norm": 583.4554443359375, + "learning_rate": 3.976656905226686e-06, + "loss": 49.1295, + "step": 75370 + }, + { + "epoch": 0.3045447383412048, + "grad_norm": 756.2467041015625, + "learning_rate": 3.9752764344908155e-06, + "loss": 52.4948, + "step": 75380 + }, + { + "epoch": 0.30458513960657246, + "grad_norm": 580.8175048828125, + "learning_rate": 3.9738960452874975e-06, + "loss": 57.1519, + "step": 75390 + }, + { + "epoch": 0.3046255408719401, + "grad_norm": 538.5950317382812, + "learning_rate": 3.972515737726563e-06, + "loss": 43.3488, + "step": 75400 + }, + { + "epoch": 0.30466594213730774, + "grad_norm": 716.4103393554688, + "learning_rate": 3.9711355119178345e-06, + "loss": 47.843, + "step": 75410 + }, + { + "epoch": 0.3047063434026754, + "grad_norm": 1004.1705932617188, + "learning_rate": 3.969755367971131e-06, + "loss": 38.9337, + "step": 75420 + }, + { + "epoch": 0.304746744668043, + "grad_norm": 560.037109375, + "learning_rate": 3.9683753059962646e-06, + "loss": 33.0987, + "step": 75430 + }, + { + "epoch": 0.3047871459334106, + "grad_norm": 945.2614135742188, + "learning_rate": 3.966995326103041e-06, + "loss": 54.9676, + "step": 75440 + }, + { + "epoch": 0.30482754719877825, + "grad_norm": 543.81787109375, + "learning_rate": 3.965615428401257e-06, + "loss": 47.2744, + "step": 75450 + }, + { + "epoch": 0.3048679484641459, + "grad_norm": 773.4525756835938, + "learning_rate": 3.964235613000708e-06, + "loss": 57.7128, + "step": 75460 + }, + { + "epoch": 0.30490834972951353, + "grad_norm": 652.48046875, + "learning_rate": 3.962855880011177e-06, + "loss": 23.7307, + "step": 75470 + }, + { + "epoch": 0.30494875099488117, + "grad_norm": 485.0013122558594, + "learning_rate": 3.961476229542446e-06, + "loss": 38.741, + "step": 75480 + }, + { + "epoch": 0.3049891522602488, + "grad_norm": 599.990234375, + "learning_rate": 3.9600966617042825e-06, + "loss": 36.2357, + "step": 75490 + }, + { + "epoch": 0.30502955352561645, + "grad_norm": 1090.4490966796875, + "learning_rate": 3.958717176606456e-06, + "loss": 69.718, + "step": 75500 + }, + { + "epoch": 0.30506995479098403, + "grad_norm": 520.7333374023438, + "learning_rate": 3.957337774358725e-06, + "loss": 53.6938, + "step": 75510 + }, + { + "epoch": 0.3051103560563517, + "grad_norm": 568.2504272460938, + "learning_rate": 3.955958455070842e-06, + "loss": 38.5345, + "step": 75520 + }, + { + "epoch": 0.3051507573217193, + "grad_norm": 397.0588073730469, + "learning_rate": 3.954579218852553e-06, + "loss": 44.8067, + "step": 75530 + }, + { + "epoch": 0.30519115858708695, + "grad_norm": 592.46875, + "learning_rate": 3.953200065813597e-06, + "loss": 45.6429, + "step": 75540 + }, + { + "epoch": 0.3052315598524546, + "grad_norm": 597.1698608398438, + "learning_rate": 3.951820996063708e-06, + "loss": 50.6538, + "step": 75550 + }, + { + "epoch": 0.30527196111782223, + "grad_norm": 148.9978790283203, + "learning_rate": 3.950442009712612e-06, + "loss": 54.7872, + "step": 75560 + }, + { + "epoch": 0.3053123623831898, + "grad_norm": 614.67333984375, + "learning_rate": 3.949063106870031e-06, + "loss": 39.9307, + "step": 75570 + }, + { + "epoch": 0.30535276364855746, + "grad_norm": 268.4046325683594, + "learning_rate": 3.9476842876456735e-06, + "loss": 24.0442, + "step": 75580 + }, + { + "epoch": 0.3053931649139251, + "grad_norm": 314.32305908203125, + "learning_rate": 3.946305552149247e-06, + "loss": 34.3205, + "step": 75590 + }, + { + "epoch": 0.30543356617929274, + "grad_norm": 629.6476440429688, + "learning_rate": 3.944926900490452e-06, + "loss": 35.2416, + "step": 75600 + }, + { + "epoch": 0.3054739674446604, + "grad_norm": 873.33984375, + "learning_rate": 3.943548332778982e-06, + "loss": 44.9323, + "step": 75610 + }, + { + "epoch": 0.305514368710028, + "grad_norm": 588.8949584960938, + "learning_rate": 3.942169849124523e-06, + "loss": 38.1177, + "step": 75620 + }, + { + "epoch": 0.30555476997539566, + "grad_norm": 594.586669921875, + "learning_rate": 3.940791449636753e-06, + "loss": 26.6151, + "step": 75630 + }, + { + "epoch": 0.30559517124076324, + "grad_norm": 1274.4742431640625, + "learning_rate": 3.939413134425347e-06, + "loss": 62.9132, + "step": 75640 + }, + { + "epoch": 0.3056355725061309, + "grad_norm": 434.52105712890625, + "learning_rate": 3.938034903599972e-06, + "loss": 44.5439, + "step": 75650 + }, + { + "epoch": 0.3056759737714985, + "grad_norm": 701.6295166015625, + "learning_rate": 3.9366567572702845e-06, + "loss": 45.023, + "step": 75660 + }, + { + "epoch": 0.30571637503686616, + "grad_norm": 408.67327880859375, + "learning_rate": 3.935278695545939e-06, + "loss": 50.6655, + "step": 75670 + }, + { + "epoch": 0.3057567763022338, + "grad_norm": 536.2960815429688, + "learning_rate": 3.933900718536579e-06, + "loss": 38.9414, + "step": 75680 + }, + { + "epoch": 0.30579717756760144, + "grad_norm": 608.7130126953125, + "learning_rate": 3.932522826351849e-06, + "loss": 44.9229, + "step": 75690 + }, + { + "epoch": 0.305837578832969, + "grad_norm": 874.279052734375, + "learning_rate": 3.9311450191013774e-06, + "loss": 32.2318, + "step": 75700 + }, + { + "epoch": 0.30587798009833667, + "grad_norm": 882.767578125, + "learning_rate": 3.929767296894792e-06, + "loss": 35.0614, + "step": 75710 + }, + { + "epoch": 0.3059183813637043, + "grad_norm": 480.5823974609375, + "learning_rate": 3.9283896598417104e-06, + "loss": 31.6882, + "step": 75720 + }, + { + "epoch": 0.30595878262907195, + "grad_norm": 481.7046203613281, + "learning_rate": 3.927012108051746e-06, + "loss": 39.5504, + "step": 75730 + }, + { + "epoch": 0.3059991838944396, + "grad_norm": 164.4939727783203, + "learning_rate": 3.925634641634505e-06, + "loss": 31.9604, + "step": 75740 + }, + { + "epoch": 0.3060395851598072, + "grad_norm": 249.40625, + "learning_rate": 3.924257260699583e-06, + "loss": 36.5495, + "step": 75750 + }, + { + "epoch": 0.3060799864251748, + "grad_norm": 398.69757080078125, + "learning_rate": 3.922879965356574e-06, + "loss": 49.0819, + "step": 75760 + }, + { + "epoch": 0.30612038769054245, + "grad_norm": 1438.6021728515625, + "learning_rate": 3.921502755715064e-06, + "loss": 57.2429, + "step": 75770 + }, + { + "epoch": 0.3061607889559101, + "grad_norm": 600.7191162109375, + "learning_rate": 3.920125631884628e-06, + "loss": 38.778, + "step": 75780 + }, + { + "epoch": 0.30620119022127773, + "grad_norm": 894.872802734375, + "learning_rate": 3.918748593974841e-06, + "loss": 55.2803, + "step": 75790 + }, + { + "epoch": 0.30624159148664537, + "grad_norm": 968.234130859375, + "learning_rate": 3.917371642095265e-06, + "loss": 66.3114, + "step": 75800 + }, + { + "epoch": 0.306281992752013, + "grad_norm": 483.552734375, + "learning_rate": 3.91599477635546e-06, + "loss": 59.5434, + "step": 75810 + }, + { + "epoch": 0.30632239401738065, + "grad_norm": 442.4198303222656, + "learning_rate": 3.914617996864976e-06, + "loss": 49.4996, + "step": 75820 + }, + { + "epoch": 0.30636279528274823, + "grad_norm": 1012.351318359375, + "learning_rate": 3.9132413037333565e-06, + "loss": 59.5431, + "step": 75830 + }, + { + "epoch": 0.3064031965481159, + "grad_norm": 269.81292724609375, + "learning_rate": 3.911864697070139e-06, + "loss": 30.562, + "step": 75840 + }, + { + "epoch": 0.3064435978134835, + "grad_norm": 939.7927856445312, + "learning_rate": 3.910488176984853e-06, + "loss": 39.42, + "step": 75850 + }, + { + "epoch": 0.30648399907885115, + "grad_norm": 710.1830444335938, + "learning_rate": 3.909111743587023e-06, + "loss": 55.374, + "step": 75860 + }, + { + "epoch": 0.3065244003442188, + "grad_norm": 811.8646850585938, + "learning_rate": 3.907735396986166e-06, + "loss": 36.7296, + "step": 75870 + }, + { + "epoch": 0.30656480160958643, + "grad_norm": 520.3335571289062, + "learning_rate": 3.9063591372917875e-06, + "loss": 32.1305, + "step": 75880 + }, + { + "epoch": 0.306605202874954, + "grad_norm": 369.95404052734375, + "learning_rate": 3.904982964613395e-06, + "loss": 50.2928, + "step": 75890 + }, + { + "epoch": 0.30664560414032166, + "grad_norm": 723.0009155273438, + "learning_rate": 3.903606879060483e-06, + "loss": 31.4758, + "step": 75900 + }, + { + "epoch": 0.3066860054056893, + "grad_norm": 626.061767578125, + "learning_rate": 3.902230880742541e-06, + "loss": 26.1713, + "step": 75910 + }, + { + "epoch": 0.30672640667105694, + "grad_norm": 463.85137939453125, + "learning_rate": 3.900854969769049e-06, + "loss": 41.5159, + "step": 75920 + }, + { + "epoch": 0.3067668079364246, + "grad_norm": 1027.1707763671875, + "learning_rate": 3.899479146249482e-06, + "loss": 45.4365, + "step": 75930 + }, + { + "epoch": 0.3068072092017922, + "grad_norm": 424.14703369140625, + "learning_rate": 3.898103410293309e-06, + "loss": 49.6982, + "step": 75940 + }, + { + "epoch": 0.3068476104671598, + "grad_norm": 546.7954711914062, + "learning_rate": 3.89672776200999e-06, + "loss": 46.8047, + "step": 75950 + }, + { + "epoch": 0.30688801173252744, + "grad_norm": 440.770751953125, + "learning_rate": 3.895352201508981e-06, + "loss": 56.3813, + "step": 75960 + }, + { + "epoch": 0.3069284129978951, + "grad_norm": 568.0527954101562, + "learning_rate": 3.893976728899726e-06, + "loss": 38.7457, + "step": 75970 + }, + { + "epoch": 0.3069688142632627, + "grad_norm": 691.5803833007812, + "learning_rate": 3.892601344291667e-06, + "loss": 44.3474, + "step": 75980 + }, + { + "epoch": 0.30700921552863036, + "grad_norm": 467.8775939941406, + "learning_rate": 3.891226047794237e-06, + "loss": 54.3494, + "step": 75990 + }, + { + "epoch": 0.307049616793998, + "grad_norm": 608.9412231445312, + "learning_rate": 3.8898508395168645e-06, + "loss": 52.8284, + "step": 76000 + }, + { + "epoch": 0.30709001805936564, + "grad_norm": 727.375, + "learning_rate": 3.888475719568961e-06, + "loss": 45.566, + "step": 76010 + }, + { + "epoch": 0.30713041932473323, + "grad_norm": 687.283935546875, + "learning_rate": 3.887100688059947e-06, + "loss": 32.538, + "step": 76020 + }, + { + "epoch": 0.30717082059010087, + "grad_norm": 316.9288024902344, + "learning_rate": 3.885725745099222e-06, + "loss": 36.429, + "step": 76030 + }, + { + "epoch": 0.3072112218554685, + "grad_norm": 504.8291015625, + "learning_rate": 3.8843508907961855e-06, + "loss": 48.7635, + "step": 76040 + }, + { + "epoch": 0.30725162312083615, + "grad_norm": 709.7803344726562, + "learning_rate": 3.882976125260229e-06, + "loss": 49.7421, + "step": 76050 + }, + { + "epoch": 0.3072920243862038, + "grad_norm": 788.4102172851562, + "learning_rate": 3.881601448600736e-06, + "loss": 40.394, + "step": 76060 + }, + { + "epoch": 0.30733242565157143, + "grad_norm": 542.3693237304688, + "learning_rate": 3.880226860927082e-06, + "loss": 49.177, + "step": 76070 + }, + { + "epoch": 0.307372826916939, + "grad_norm": 520.00048828125, + "learning_rate": 3.8788523623486405e-06, + "loss": 43.6388, + "step": 76080 + }, + { + "epoch": 0.30741322818230665, + "grad_norm": 415.659912109375, + "learning_rate": 3.877477952974768e-06, + "loss": 27.0779, + "step": 76090 + }, + { + "epoch": 0.3074536294476743, + "grad_norm": 1292.7166748046875, + "learning_rate": 3.876103632914825e-06, + "loss": 44.3588, + "step": 76100 + }, + { + "epoch": 0.30749403071304193, + "grad_norm": 520.9183959960938, + "learning_rate": 3.8747294022781555e-06, + "loss": 33.352, + "step": 76110 + }, + { + "epoch": 0.3075344319784096, + "grad_norm": 587.1658325195312, + "learning_rate": 3.873355261174105e-06, + "loss": 54.5753, + "step": 76120 + }, + { + "epoch": 0.3075748332437772, + "grad_norm": 754.8411254882812, + "learning_rate": 3.871981209712006e-06, + "loss": 50.3107, + "step": 76130 + }, + { + "epoch": 0.30761523450914485, + "grad_norm": 493.7840881347656, + "learning_rate": 3.870607248001184e-06, + "loss": 37.3539, + "step": 76140 + }, + { + "epoch": 0.30765563577451244, + "grad_norm": 714.5708618164062, + "learning_rate": 3.869233376150961e-06, + "loss": 41.1049, + "step": 76150 + }, + { + "epoch": 0.3076960370398801, + "grad_norm": 833.8482055664062, + "learning_rate": 3.867859594270649e-06, + "loss": 58.7063, + "step": 76160 + }, + { + "epoch": 0.3077364383052477, + "grad_norm": 620.5109252929688, + "learning_rate": 3.866485902469554e-06, + "loss": 36.8431, + "step": 76170 + }, + { + "epoch": 0.30777683957061536, + "grad_norm": 558.1962280273438, + "learning_rate": 3.865112300856972e-06, + "loss": 42.8776, + "step": 76180 + }, + { + "epoch": 0.307817240835983, + "grad_norm": 631.0726318359375, + "learning_rate": 3.863738789542196e-06, + "loss": 34.5687, + "step": 76190 + }, + { + "epoch": 0.30785764210135064, + "grad_norm": 667.056640625, + "learning_rate": 3.86236536863451e-06, + "loss": 40.6587, + "step": 76200 + }, + { + "epoch": 0.3078980433667182, + "grad_norm": 284.8694152832031, + "learning_rate": 3.860992038243189e-06, + "loss": 70.5536, + "step": 76210 + }, + { + "epoch": 0.30793844463208586, + "grad_norm": 476.61114501953125, + "learning_rate": 3.8596187984775064e-06, + "loss": 35.2596, + "step": 76220 + }, + { + "epoch": 0.3079788458974535, + "grad_norm": 729.2440185546875, + "learning_rate": 3.8582456494467214e-06, + "loss": 50.2296, + "step": 76230 + }, + { + "epoch": 0.30801924716282114, + "grad_norm": 963.9013061523438, + "learning_rate": 3.8568725912600904e-06, + "loss": 48.3303, + "step": 76240 + }, + { + "epoch": 0.3080596484281888, + "grad_norm": 659.7503051757812, + "learning_rate": 3.855499624026861e-06, + "loss": 34.8169, + "step": 76250 + }, + { + "epoch": 0.3081000496935564, + "grad_norm": 397.644775390625, + "learning_rate": 3.854126747856275e-06, + "loss": 59.7892, + "step": 76260 + }, + { + "epoch": 0.308140450958924, + "grad_norm": 847.7427978515625, + "learning_rate": 3.8527539628575635e-06, + "loss": 56.8665, + "step": 76270 + }, + { + "epoch": 0.30818085222429165, + "grad_norm": 489.5332336425781, + "learning_rate": 3.851381269139955e-06, + "loss": 37.4831, + "step": 76280 + }, + { + "epoch": 0.3082212534896593, + "grad_norm": 817.7260131835938, + "learning_rate": 3.8500086668126666e-06, + "loss": 48.8452, + "step": 76290 + }, + { + "epoch": 0.3082616547550269, + "grad_norm": 610.6978759765625, + "learning_rate": 3.848636155984912e-06, + "loss": 51.255, + "step": 76300 + }, + { + "epoch": 0.30830205602039457, + "grad_norm": 567.9066162109375, + "learning_rate": 3.847263736765892e-06, + "loss": 57.638, + "step": 76310 + }, + { + "epoch": 0.3083424572857622, + "grad_norm": 912.1642456054688, + "learning_rate": 3.8458914092648074e-06, + "loss": 40.1567, + "step": 76320 + }, + { + "epoch": 0.30838285855112985, + "grad_norm": 742.2052612304688, + "learning_rate": 3.844519173590847e-06, + "loss": 29.8157, + "step": 76330 + }, + { + "epoch": 0.30842325981649743, + "grad_norm": 748.3175048828125, + "learning_rate": 3.843147029853194e-06, + "loss": 50.3242, + "step": 76340 + }, + { + "epoch": 0.30846366108186507, + "grad_norm": 515.7076416015625, + "learning_rate": 3.841774978161022e-06, + "loss": 38.1882, + "step": 76350 + }, + { + "epoch": 0.3085040623472327, + "grad_norm": 294.39068603515625, + "learning_rate": 3.840403018623499e-06, + "loss": 36.2087, + "step": 76360 + }, + { + "epoch": 0.30854446361260035, + "grad_norm": 644.3060913085938, + "learning_rate": 3.839031151349786e-06, + "loss": 54.5945, + "step": 76370 + }, + { + "epoch": 0.308584864877968, + "grad_norm": 278.3051452636719, + "learning_rate": 3.837659376449036e-06, + "loss": 40.8898, + "step": 76380 + }, + { + "epoch": 0.30862526614333563, + "grad_norm": 542.3574829101562, + "learning_rate": 3.836287694030395e-06, + "loss": 40.5055, + "step": 76390 + }, + { + "epoch": 0.3086656674087032, + "grad_norm": 800.6304321289062, + "learning_rate": 3.834916104203e-06, + "loss": 58.5879, + "step": 76400 + }, + { + "epoch": 0.30870606867407085, + "grad_norm": 350.9243469238281, + "learning_rate": 3.833544607075986e-06, + "loss": 32.7555, + "step": 76410 + }, + { + "epoch": 0.3087464699394385, + "grad_norm": 389.3909606933594, + "learning_rate": 3.8321732027584734e-06, + "loss": 52.5082, + "step": 76420 + }, + { + "epoch": 0.30878687120480613, + "grad_norm": 806.8661499023438, + "learning_rate": 3.830801891359582e-06, + "loss": 37.8104, + "step": 76430 + }, + { + "epoch": 0.3088272724701738, + "grad_norm": 169.90428161621094, + "learning_rate": 3.829430672988414e-06, + "loss": 42.1398, + "step": 76440 + }, + { + "epoch": 0.3088676737355414, + "grad_norm": 662.7548828125, + "learning_rate": 3.828059547754078e-06, + "loss": 34.0822, + "step": 76450 + }, + { + "epoch": 0.30890807500090905, + "grad_norm": 853.8652954101562, + "learning_rate": 3.826688515765664e-06, + "loss": 49.2767, + "step": 76460 + }, + { + "epoch": 0.30894847626627664, + "grad_norm": 509.5336608886719, + "learning_rate": 3.82531757713226e-06, + "loss": 50.4154, + "step": 76470 + }, + { + "epoch": 0.3089888775316443, + "grad_norm": 701.5994262695312, + "learning_rate": 3.823946731962945e-06, + "loss": 35.8648, + "step": 76480 + }, + { + "epoch": 0.3090292787970119, + "grad_norm": 315.60101318359375, + "learning_rate": 3.8225759803667925e-06, + "loss": 31.0514, + "step": 76490 + }, + { + "epoch": 0.30906968006237956, + "grad_norm": 365.6060791015625, + "learning_rate": 3.821205322452863e-06, + "loss": 44.3825, + "step": 76500 + }, + { + "epoch": 0.3091100813277472, + "grad_norm": 290.24053955078125, + "learning_rate": 3.81983475833022e-06, + "loss": 38.933, + "step": 76510 + }, + { + "epoch": 0.30915048259311484, + "grad_norm": 501.88043212890625, + "learning_rate": 3.818464288107908e-06, + "loss": 52.8634, + "step": 76520 + }, + { + "epoch": 0.3091908838584824, + "grad_norm": 809.6956176757812, + "learning_rate": 3.817093911894968e-06, + "loss": 40.3679, + "step": 76530 + }, + { + "epoch": 0.30923128512385006, + "grad_norm": 764.794677734375, + "learning_rate": 3.8157236298004375e-06, + "loss": 48.3359, + "step": 76540 + }, + { + "epoch": 0.3092716863892177, + "grad_norm": 888.4735717773438, + "learning_rate": 3.814353441933343e-06, + "loss": 52.2985, + "step": 76550 + }, + { + "epoch": 0.30931208765458534, + "grad_norm": 623.149169921875, + "learning_rate": 3.812983348402703e-06, + "loss": 66.8884, + "step": 76560 + }, + { + "epoch": 0.309352488919953, + "grad_norm": 762.5501098632812, + "learning_rate": 3.811613349317531e-06, + "loss": 43.1388, + "step": 76570 + }, + { + "epoch": 0.3093928901853206, + "grad_norm": 920.5236206054688, + "learning_rate": 3.810243444786831e-06, + "loss": 49.3094, + "step": 76580 + }, + { + "epoch": 0.3094332914506882, + "grad_norm": 735.0086059570312, + "learning_rate": 3.8088736349195995e-06, + "loss": 36.29, + "step": 76590 + }, + { + "epoch": 0.30947369271605585, + "grad_norm": 2006.9073486328125, + "learning_rate": 3.8075039198248274e-06, + "loss": 64.1557, + "step": 76600 + }, + { + "epoch": 0.3095140939814235, + "grad_norm": 397.25018310546875, + "learning_rate": 3.8061342996114946e-06, + "loss": 48.698, + "step": 76610 + }, + { + "epoch": 0.30955449524679113, + "grad_norm": 694.335205078125, + "learning_rate": 3.8047647743885762e-06, + "loss": 53.1335, + "step": 76620 + }, + { + "epoch": 0.30959489651215877, + "grad_norm": 1197.7288818359375, + "learning_rate": 3.8033953442650382e-06, + "loss": 60.2836, + "step": 76630 + }, + { + "epoch": 0.3096352977775264, + "grad_norm": 780.9642944335938, + "learning_rate": 3.802026009349843e-06, + "loss": 51.8936, + "step": 76640 + }, + { + "epoch": 0.30967569904289405, + "grad_norm": 882.420166015625, + "learning_rate": 3.800656769751939e-06, + "loss": 46.3737, + "step": 76650 + }, + { + "epoch": 0.30971610030826163, + "grad_norm": 623.1157836914062, + "learning_rate": 3.799287625580273e-06, + "loss": 53.2067, + "step": 76660 + }, + { + "epoch": 0.3097565015736293, + "grad_norm": 851.390869140625, + "learning_rate": 3.7979185769437795e-06, + "loss": 33.3779, + "step": 76670 + }, + { + "epoch": 0.3097969028389969, + "grad_norm": 323.7216796875, + "learning_rate": 3.7965496239513875e-06, + "loss": 35.5668, + "step": 76680 + }, + { + "epoch": 0.30983730410436455, + "grad_norm": 810.2097778320312, + "learning_rate": 3.79518076671202e-06, + "loss": 42.1789, + "step": 76690 + }, + { + "epoch": 0.3098777053697322, + "grad_norm": 715.9926147460938, + "learning_rate": 3.793812005334589e-06, + "loss": 54.3969, + "step": 76700 + }, + { + "epoch": 0.30991810663509983, + "grad_norm": 341.7637939453125, + "learning_rate": 3.792443339928001e-06, + "loss": 45.9676, + "step": 76710 + }, + { + "epoch": 0.3099585079004674, + "grad_norm": 652.6634521484375, + "learning_rate": 3.7910747706011543e-06, + "loss": 43.1486, + "step": 76720 + }, + { + "epoch": 0.30999890916583506, + "grad_norm": 288.5016174316406, + "learning_rate": 3.7897062974629384e-06, + "loss": 27.6082, + "step": 76730 + }, + { + "epoch": 0.3100393104312027, + "grad_norm": 672.7640991210938, + "learning_rate": 3.78833792062224e-06, + "loss": 40.3693, + "step": 76740 + }, + { + "epoch": 0.31007971169657034, + "grad_norm": 559.7825317382812, + "learning_rate": 3.786969640187932e-06, + "loss": 34.8804, + "step": 76750 + }, + { + "epoch": 0.310120112961938, + "grad_norm": 1037.9384765625, + "learning_rate": 3.785601456268882e-06, + "loss": 49.6401, + "step": 76760 + }, + { + "epoch": 0.3101605142273056, + "grad_norm": 614.68115234375, + "learning_rate": 3.7842333689739524e-06, + "loss": 43.7798, + "step": 76770 + }, + { + "epoch": 0.31020091549267326, + "grad_norm": 1368.322021484375, + "learning_rate": 3.782865378411993e-06, + "loss": 49.2492, + "step": 76780 + }, + { + "epoch": 0.31024131675804084, + "grad_norm": 436.1568298339844, + "learning_rate": 3.7814974846918496e-06, + "loss": 55.0184, + "step": 76790 + }, + { + "epoch": 0.3102817180234085, + "grad_norm": 635.700927734375, + "learning_rate": 3.7801296879223594e-06, + "loss": 46.0046, + "step": 76800 + }, + { + "epoch": 0.3103221192887761, + "grad_norm": 366.9886779785156, + "learning_rate": 3.7787619882123506e-06, + "loss": 29.4515, + "step": 76810 + }, + { + "epoch": 0.31036252055414376, + "grad_norm": 640.870849609375, + "learning_rate": 3.7773943856706463e-06, + "loss": 50.9594, + "step": 76820 + }, + { + "epoch": 0.3104029218195114, + "grad_norm": 816.541015625, + "learning_rate": 3.7760268804060583e-06, + "loss": 39.7423, + "step": 76830 + }, + { + "epoch": 0.31044332308487904, + "grad_norm": 704.5332641601562, + "learning_rate": 3.774659472527396e-06, + "loss": 49.7774, + "step": 76840 + }, + { + "epoch": 0.3104837243502466, + "grad_norm": 621.05419921875, + "learning_rate": 3.7732921621434553e-06, + "loss": 36.368, + "step": 76850 + }, + { + "epoch": 0.31052412561561427, + "grad_norm": 326.81329345703125, + "learning_rate": 3.77192494936303e-06, + "loss": 65.5857, + "step": 76860 + }, + { + "epoch": 0.3105645268809819, + "grad_norm": 564.2059326171875, + "learning_rate": 3.7705578342948967e-06, + "loss": 37.76, + "step": 76870 + }, + { + "epoch": 0.31060492814634955, + "grad_norm": 49.22088623046875, + "learning_rate": 3.7691908170478352e-06, + "loss": 53.3579, + "step": 76880 + }, + { + "epoch": 0.3106453294117172, + "grad_norm": 522.652587890625, + "learning_rate": 3.767823897730612e-06, + "loss": 35.3693, + "step": 76890 + }, + { + "epoch": 0.3106857306770848, + "grad_norm": 507.7791442871094, + "learning_rate": 3.7664570764519865e-06, + "loss": 33.2377, + "step": 76900 + }, + { + "epoch": 0.3107261319424524, + "grad_norm": 318.0624694824219, + "learning_rate": 3.76509035332071e-06, + "loss": 49.797, + "step": 76910 + }, + { + "epoch": 0.31076653320782005, + "grad_norm": 701.2640991210938, + "learning_rate": 3.7637237284455264e-06, + "loss": 37.4456, + "step": 76920 + }, + { + "epoch": 0.3108069344731877, + "grad_norm": 441.8192443847656, + "learning_rate": 3.762357201935171e-06, + "loss": 37.457, + "step": 76930 + }, + { + "epoch": 0.31084733573855533, + "grad_norm": 556.957763671875, + "learning_rate": 3.7609907738983762e-06, + "loss": 27.0198, + "step": 76940 + }, + { + "epoch": 0.31088773700392297, + "grad_norm": 758.1605834960938, + "learning_rate": 3.7596244444438577e-06, + "loss": 68.1611, + "step": 76950 + }, + { + "epoch": 0.3109281382692906, + "grad_norm": 503.20672607421875, + "learning_rate": 3.758258213680328e-06, + "loss": 40.469, + "step": 76960 + }, + { + "epoch": 0.31096853953465825, + "grad_norm": 579.7887573242188, + "learning_rate": 3.7568920817164945e-06, + "loss": 45.6774, + "step": 76970 + }, + { + "epoch": 0.31100894080002583, + "grad_norm": 799.3428344726562, + "learning_rate": 3.755526048661053e-06, + "loss": 40.041, + "step": 76980 + }, + { + "epoch": 0.3110493420653935, + "grad_norm": 619.9786987304688, + "learning_rate": 3.7541601146226924e-06, + "loss": 38.2264, + "step": 76990 + }, + { + "epoch": 0.3110897433307611, + "grad_norm": 1017.5667724609375, + "learning_rate": 3.752794279710094e-06, + "loss": 65.4237, + "step": 77000 + }, + { + "epoch": 0.31113014459612875, + "grad_norm": 744.3829956054688, + "learning_rate": 3.751428544031931e-06, + "loss": 53.0718, + "step": 77010 + }, + { + "epoch": 0.3111705458614964, + "grad_norm": 1114.8953857421875, + "learning_rate": 3.750062907696868e-06, + "loss": 46.9796, + "step": 77020 + }, + { + "epoch": 0.31121094712686403, + "grad_norm": 789.780517578125, + "learning_rate": 3.7486973708135643e-06, + "loss": 73.1727, + "step": 77030 + }, + { + "epoch": 0.3112513483922316, + "grad_norm": 281.07928466796875, + "learning_rate": 3.7473319334906678e-06, + "loss": 25.536, + "step": 77040 + }, + { + "epoch": 0.31129174965759926, + "grad_norm": 378.5385437011719, + "learning_rate": 3.7459665958368197e-06, + "loss": 52.1635, + "step": 77050 + }, + { + "epoch": 0.3113321509229669, + "grad_norm": 972.1957397460938, + "learning_rate": 3.7446013579606534e-06, + "loss": 55.4378, + "step": 77060 + }, + { + "epoch": 0.31137255218833454, + "grad_norm": 585.01171875, + "learning_rate": 3.743236219970796e-06, + "loss": 44.6457, + "step": 77070 + }, + { + "epoch": 0.3114129534537022, + "grad_norm": 391.7244873046875, + "learning_rate": 3.741871181975866e-06, + "loss": 40.0719, + "step": 77080 + }, + { + "epoch": 0.3114533547190698, + "grad_norm": 1258.5592041015625, + "learning_rate": 3.740506244084471e-06, + "loss": 44.3012, + "step": 77090 + }, + { + "epoch": 0.31149375598443746, + "grad_norm": 786.6157836914062, + "learning_rate": 3.7391414064052138e-06, + "loss": 56.0882, + "step": 77100 + }, + { + "epoch": 0.31153415724980504, + "grad_norm": 362.84814453125, + "learning_rate": 3.737776669046689e-06, + "loss": 40.2787, + "step": 77110 + }, + { + "epoch": 0.3115745585151727, + "grad_norm": 436.129150390625, + "learning_rate": 3.7364120321174826e-06, + "loss": 40.8267, + "step": 77120 + }, + { + "epoch": 0.3116149597805403, + "grad_norm": 357.44561767578125, + "learning_rate": 3.7350474957261705e-06, + "loss": 39.113, + "step": 77130 + }, + { + "epoch": 0.31165536104590796, + "grad_norm": 732.1150512695312, + "learning_rate": 3.7336830599813245e-06, + "loss": 39.6991, + "step": 77140 + }, + { + "epoch": 0.3116957623112756, + "grad_norm": 336.4695129394531, + "learning_rate": 3.732318724991505e-06, + "loss": 26.2738, + "step": 77150 + }, + { + "epoch": 0.31173616357664324, + "grad_norm": 959.9412841796875, + "learning_rate": 3.730954490865266e-06, + "loss": 58.7199, + "step": 77160 + }, + { + "epoch": 0.31177656484201083, + "grad_norm": 521.5714721679688, + "learning_rate": 3.7295903577111548e-06, + "loss": 37.6181, + "step": 77170 + }, + { + "epoch": 0.31181696610737847, + "grad_norm": 231.63853454589844, + "learning_rate": 3.728226325637709e-06, + "loss": 64.1254, + "step": 77180 + }, + { + "epoch": 0.3118573673727461, + "grad_norm": 1082.9320068359375, + "learning_rate": 3.726862394753457e-06, + "loss": 43.6313, + "step": 77190 + }, + { + "epoch": 0.31189776863811375, + "grad_norm": 525.0565185546875, + "learning_rate": 3.725498565166923e-06, + "loss": 40.8923, + "step": 77200 + }, + { + "epoch": 0.3119381699034814, + "grad_norm": 248.5260467529297, + "learning_rate": 3.7241348369866183e-06, + "loss": 34.3744, + "step": 77210 + }, + { + "epoch": 0.31197857116884903, + "grad_norm": 994.7620239257812, + "learning_rate": 3.7227712103210485e-06, + "loss": 52.4718, + "step": 77220 + }, + { + "epoch": 0.3120189724342166, + "grad_norm": 979.7284545898438, + "learning_rate": 3.721407685278712e-06, + "loss": 45.3776, + "step": 77230 + }, + { + "epoch": 0.31205937369958425, + "grad_norm": 410.4310302734375, + "learning_rate": 3.7200442619680976e-06, + "loss": 51.1721, + "step": 77240 + }, + { + "epoch": 0.3120997749649519, + "grad_norm": 821.8783569335938, + "learning_rate": 3.7186809404976877e-06, + "loss": 37.6959, + "step": 77250 + }, + { + "epoch": 0.31214017623031953, + "grad_norm": 598.2932739257812, + "learning_rate": 3.7173177209759538e-06, + "loss": 78.5719, + "step": 77260 + }, + { + "epoch": 0.3121805774956872, + "grad_norm": 599.0471801757812, + "learning_rate": 3.715954603511363e-06, + "loss": 40.2954, + "step": 77270 + }, + { + "epoch": 0.3122209787610548, + "grad_norm": 778.6463623046875, + "learning_rate": 3.714591588212372e-06, + "loss": 28.0776, + "step": 77280 + }, + { + "epoch": 0.31226138002642245, + "grad_norm": 922.8678588867188, + "learning_rate": 3.713228675187429e-06, + "loss": 37.1462, + "step": 77290 + }, + { + "epoch": 0.31230178129179004, + "grad_norm": 521.8157958984375, + "learning_rate": 3.7118658645449745e-06, + "loss": 47.8419, + "step": 77300 + }, + { + "epoch": 0.3123421825571577, + "grad_norm": 1121.2880859375, + "learning_rate": 3.710503156393441e-06, + "loss": 52.1181, + "step": 77310 + }, + { + "epoch": 0.3123825838225253, + "grad_norm": 557.4747314453125, + "learning_rate": 3.7091405508412538e-06, + "loss": 47.8416, + "step": 77320 + }, + { + "epoch": 0.31242298508789296, + "grad_norm": 654.5795288085938, + "learning_rate": 3.7077780479968286e-06, + "loss": 44.9925, + "step": 77330 + }, + { + "epoch": 0.3124633863532606, + "grad_norm": 550.8849487304688, + "learning_rate": 3.7064156479685736e-06, + "loss": 46.5616, + "step": 77340 + }, + { + "epoch": 0.31250378761862824, + "grad_norm": 441.02020263671875, + "learning_rate": 3.705053350864888e-06, + "loss": 58.3744, + "step": 77350 + }, + { + "epoch": 0.3125441888839958, + "grad_norm": 981.9597778320312, + "learning_rate": 3.703691156794165e-06, + "loss": 38.7906, + "step": 77360 + }, + { + "epoch": 0.31258459014936346, + "grad_norm": 517.7142333984375, + "learning_rate": 3.7023290658647893e-06, + "loss": 43.4523, + "step": 77370 + }, + { + "epoch": 0.3126249914147311, + "grad_norm": 432.1444091796875, + "learning_rate": 3.7009670781851326e-06, + "loss": 34.6007, + "step": 77380 + }, + { + "epoch": 0.31266539268009874, + "grad_norm": 5524.80029296875, + "learning_rate": 3.6996051938635626e-06, + "loss": 57.5598, + "step": 77390 + }, + { + "epoch": 0.3127057939454664, + "grad_norm": 469.6958312988281, + "learning_rate": 3.69824341300844e-06, + "loss": 54.4597, + "step": 77400 + }, + { + "epoch": 0.312746195210834, + "grad_norm": 568.2667236328125, + "learning_rate": 3.696881735728115e-06, + "loss": 72.8601, + "step": 77410 + }, + { + "epoch": 0.31278659647620166, + "grad_norm": 342.8549499511719, + "learning_rate": 3.6955201621309302e-06, + "loss": 33.34, + "step": 77420 + }, + { + "epoch": 0.31282699774156925, + "grad_norm": 553.44921875, + "learning_rate": 3.6941586923252194e-06, + "loss": 39.574, + "step": 77430 + }, + { + "epoch": 0.3128673990069369, + "grad_norm": 322.31494140625, + "learning_rate": 3.6927973264193074e-06, + "loss": 42.0066, + "step": 77440 + }, + { + "epoch": 0.3129078002723045, + "grad_norm": 746.9494018554688, + "learning_rate": 3.691436064521513e-06, + "loss": 50.2764, + "step": 77450 + }, + { + "epoch": 0.31294820153767217, + "grad_norm": 535.7817993164062, + "learning_rate": 3.6900749067401474e-06, + "loss": 45.719, + "step": 77460 + }, + { + "epoch": 0.3129886028030398, + "grad_norm": 494.1651306152344, + "learning_rate": 3.6887138531835085e-06, + "loss": 47.2059, + "step": 77470 + }, + { + "epoch": 0.31302900406840745, + "grad_norm": 458.833740234375, + "learning_rate": 3.6873529039598903e-06, + "loss": 83.885, + "step": 77480 + }, + { + "epoch": 0.31306940533377503, + "grad_norm": 452.3143615722656, + "learning_rate": 3.6859920591775763e-06, + "loss": 28.6129, + "step": 77490 + }, + { + "epoch": 0.31310980659914267, + "grad_norm": 1721.304443359375, + "learning_rate": 3.6846313189448447e-06, + "loss": 69.1576, + "step": 77500 + }, + { + "epoch": 0.3131502078645103, + "grad_norm": 1018.3944702148438, + "learning_rate": 3.6832706833699616e-06, + "loss": 58.151, + "step": 77510 + }, + { + "epoch": 0.31319060912987795, + "grad_norm": 710.81494140625, + "learning_rate": 3.681910152561188e-06, + "loss": 41.7799, + "step": 77520 + }, + { + "epoch": 0.3132310103952456, + "grad_norm": 244.9594268798828, + "learning_rate": 3.6805497266267742e-06, + "loss": 33.3409, + "step": 77530 + }, + { + "epoch": 0.31327141166061323, + "grad_norm": 736.140625, + "learning_rate": 3.679189405674963e-06, + "loss": 40.06, + "step": 77540 + }, + { + "epoch": 0.3133118129259808, + "grad_norm": 528.4320678710938, + "learning_rate": 3.6778291898139907e-06, + "loss": 46.3387, + "step": 77550 + }, + { + "epoch": 0.31335221419134845, + "grad_norm": 563.1051635742188, + "learning_rate": 3.6764690791520797e-06, + "loss": 38.3095, + "step": 77560 + }, + { + "epoch": 0.3133926154567161, + "grad_norm": 330.9205627441406, + "learning_rate": 3.6751090737974506e-06, + "loss": 36.1311, + "step": 77570 + }, + { + "epoch": 0.31343301672208373, + "grad_norm": 987.652099609375, + "learning_rate": 3.673749173858312e-06, + "loss": 62.4743, + "step": 77580 + }, + { + "epoch": 0.3134734179874514, + "grad_norm": 787.0823364257812, + "learning_rate": 3.672389379442864e-06, + "loss": 41.8553, + "step": 77590 + }, + { + "epoch": 0.313513819252819, + "grad_norm": 606.4526977539062, + "learning_rate": 3.6710296906593012e-06, + "loss": 53.3784, + "step": 77600 + }, + { + "epoch": 0.31355422051818665, + "grad_norm": 313.4430847167969, + "learning_rate": 3.6696701076158064e-06, + "loss": 37.6288, + "step": 77610 + }, + { + "epoch": 0.31359462178355424, + "grad_norm": 472.91925048828125, + "learning_rate": 3.6683106304205564e-06, + "loss": 35.023, + "step": 77620 + }, + { + "epoch": 0.3136350230489219, + "grad_norm": 933.0067138671875, + "learning_rate": 3.666951259181718e-06, + "loss": 47.8511, + "step": 77630 + }, + { + "epoch": 0.3136754243142895, + "grad_norm": 593.52001953125, + "learning_rate": 3.6655919940074497e-06, + "loss": 46.1541, + "step": 77640 + }, + { + "epoch": 0.31371582557965716, + "grad_norm": 1112.1434326171875, + "learning_rate": 3.664232835005902e-06, + "loss": 43.5285, + "step": 77650 + }, + { + "epoch": 0.3137562268450248, + "grad_norm": 630.2261352539062, + "learning_rate": 3.6628737822852177e-06, + "loss": 46.3687, + "step": 77660 + }, + { + "epoch": 0.31379662811039244, + "grad_norm": 676.2584838867188, + "learning_rate": 3.66151483595353e-06, + "loss": 36.21, + "step": 77670 + }, + { + "epoch": 0.31383702937576, + "grad_norm": 206.3310089111328, + "learning_rate": 3.6601559961189626e-06, + "loss": 52.2429, + "step": 77680 + }, + { + "epoch": 0.31387743064112766, + "grad_norm": 439.7651672363281, + "learning_rate": 3.6587972628896345e-06, + "loss": 44.6034, + "step": 77690 + }, + { + "epoch": 0.3139178319064953, + "grad_norm": 848.1891479492188, + "learning_rate": 3.6574386363736532e-06, + "loss": 31.7829, + "step": 77700 + }, + { + "epoch": 0.31395823317186294, + "grad_norm": 673.7623901367188, + "learning_rate": 3.6560801166791183e-06, + "loss": 50.483, + "step": 77710 + }, + { + "epoch": 0.3139986344372306, + "grad_norm": 611.6978149414062, + "learning_rate": 3.654721703914121e-06, + "loss": 62.5249, + "step": 77720 + }, + { + "epoch": 0.3140390357025982, + "grad_norm": 628.8622436523438, + "learning_rate": 3.6533633981867433e-06, + "loss": 56.999, + "step": 77730 + }, + { + "epoch": 0.31407943696796586, + "grad_norm": 655.521728515625, + "learning_rate": 3.652005199605059e-06, + "loss": 49.1772, + "step": 77740 + }, + { + "epoch": 0.31411983823333345, + "grad_norm": 607.6434326171875, + "learning_rate": 3.6506471082771357e-06, + "loss": 42.7941, + "step": 77750 + }, + { + "epoch": 0.3141602394987011, + "grad_norm": 562.9298095703125, + "learning_rate": 3.6492891243110283e-06, + "loss": 41.0694, + "step": 77760 + }, + { + "epoch": 0.31420064076406873, + "grad_norm": 867.0905151367188, + "learning_rate": 3.6479312478147866e-06, + "loss": 42.9423, + "step": 77770 + }, + { + "epoch": 0.31424104202943637, + "grad_norm": 363.48370361328125, + "learning_rate": 3.64657347889645e-06, + "loss": 32.8319, + "step": 77780 + }, + { + "epoch": 0.314281443294804, + "grad_norm": 822.6988525390625, + "learning_rate": 3.6452158176640505e-06, + "loss": 58.5034, + "step": 77790 + }, + { + "epoch": 0.31432184456017165, + "grad_norm": 778.3346557617188, + "learning_rate": 3.6438582642256138e-06, + "loss": 46.2177, + "step": 77800 + }, + { + "epoch": 0.31436224582553923, + "grad_norm": 823.2440185546875, + "learning_rate": 3.642500818689148e-06, + "loss": 30.1332, + "step": 77810 + }, + { + "epoch": 0.3144026470909069, + "grad_norm": 582.5382690429688, + "learning_rate": 3.641143481162661e-06, + "loss": 35.8509, + "step": 77820 + }, + { + "epoch": 0.3144430483562745, + "grad_norm": 512.2465209960938, + "learning_rate": 3.639786251754153e-06, + "loss": 45.7103, + "step": 77830 + }, + { + "epoch": 0.31448344962164215, + "grad_norm": 445.96392822265625, + "learning_rate": 3.638429130571609e-06, + "loss": 26.201, + "step": 77840 + }, + { + "epoch": 0.3145238508870098, + "grad_norm": 388.62579345703125, + "learning_rate": 3.637072117723012e-06, + "loss": 50.9161, + "step": 77850 + }, + { + "epoch": 0.31456425215237743, + "grad_norm": 1324.3643798828125, + "learning_rate": 3.6357152133163297e-06, + "loss": 60.3968, + "step": 77860 + }, + { + "epoch": 0.314604653417745, + "grad_norm": 485.1225891113281, + "learning_rate": 3.634358417459528e-06, + "loss": 38.8524, + "step": 77870 + }, + { + "epoch": 0.31464505468311266, + "grad_norm": 712.991943359375, + "learning_rate": 3.633001730260558e-06, + "loss": 40.6697, + "step": 77880 + }, + { + "epoch": 0.3146854559484803, + "grad_norm": 565.8276977539062, + "learning_rate": 3.63164515182737e-06, + "loss": 54.9966, + "step": 77890 + }, + { + "epoch": 0.31472585721384794, + "grad_norm": 826.9208984375, + "learning_rate": 3.630288682267895e-06, + "loss": 43.2319, + "step": 77900 + }, + { + "epoch": 0.3147662584792156, + "grad_norm": 603.3365478515625, + "learning_rate": 3.628932321690063e-06, + "loss": 42.2038, + "step": 77910 + }, + { + "epoch": 0.3148066597445832, + "grad_norm": 751.0957641601562, + "learning_rate": 3.6275760702017938e-06, + "loss": 54.4473, + "step": 77920 + }, + { + "epoch": 0.31484706100995086, + "grad_norm": 419.89166259765625, + "learning_rate": 3.626219927910999e-06, + "loss": 38.9901, + "step": 77930 + }, + { + "epoch": 0.31488746227531844, + "grad_norm": 467.87078857421875, + "learning_rate": 3.6248638949255795e-06, + "loss": 49.0393, + "step": 77940 + }, + { + "epoch": 0.3149278635406861, + "grad_norm": 711.9083251953125, + "learning_rate": 3.6235079713534287e-06, + "loss": 44.0929, + "step": 77950 + }, + { + "epoch": 0.3149682648060537, + "grad_norm": 811.3135986328125, + "learning_rate": 3.6221521573024316e-06, + "loss": 40.8963, + "step": 77960 + }, + { + "epoch": 0.31500866607142136, + "grad_norm": 466.0732421875, + "learning_rate": 3.620796452880464e-06, + "loss": 30.8488, + "step": 77970 + }, + { + "epoch": 0.315049067336789, + "grad_norm": 291.6542663574219, + "learning_rate": 3.6194408581953934e-06, + "loss": 22.6698, + "step": 77980 + }, + { + "epoch": 0.31508946860215664, + "grad_norm": 639.0294189453125, + "learning_rate": 3.618085373355077e-06, + "loss": 45.7208, + "step": 77990 + }, + { + "epoch": 0.3151298698675242, + "grad_norm": 1079.23828125, + "learning_rate": 3.6167299984673655e-06, + "loss": 55.0113, + "step": 78000 + }, + { + "epoch": 0.31517027113289187, + "grad_norm": 269.6738586425781, + "learning_rate": 3.615374733640099e-06, + "loss": 35.7515, + "step": 78010 + }, + { + "epoch": 0.3152106723982595, + "grad_norm": 657.4027709960938, + "learning_rate": 3.6140195789811108e-06, + "loss": 35.9039, + "step": 78020 + }, + { + "epoch": 0.31525107366362715, + "grad_norm": 291.2002258300781, + "learning_rate": 3.6126645345982243e-06, + "loss": 57.9338, + "step": 78030 + }, + { + "epoch": 0.3152914749289948, + "grad_norm": 812.0730590820312, + "learning_rate": 3.611309600599253e-06, + "loss": 43.1736, + "step": 78040 + }, + { + "epoch": 0.3153318761943624, + "grad_norm": 754.0714111328125, + "learning_rate": 3.6099547770920046e-06, + "loss": 48.0824, + "step": 78050 + }, + { + "epoch": 0.31537227745973007, + "grad_norm": 477.45733642578125, + "learning_rate": 3.6086000641842757e-06, + "loss": 27.9684, + "step": 78060 + }, + { + "epoch": 0.31541267872509765, + "grad_norm": 324.0244140625, + "learning_rate": 3.6072454619838525e-06, + "loss": 34.7192, + "step": 78070 + }, + { + "epoch": 0.3154530799904653, + "grad_norm": 680.8942260742188, + "learning_rate": 3.6058909705985166e-06, + "loss": 38.3838, + "step": 78080 + }, + { + "epoch": 0.31549348125583293, + "grad_norm": 566.3120727539062, + "learning_rate": 3.6045365901360385e-06, + "loss": 36.4708, + "step": 78090 + }, + { + "epoch": 0.31553388252120057, + "grad_norm": 453.73834228515625, + "learning_rate": 3.603182320704179e-06, + "loss": 47.0383, + "step": 78100 + }, + { + "epoch": 0.3155742837865682, + "grad_norm": 1445.5745849609375, + "learning_rate": 3.601828162410691e-06, + "loss": 51.0274, + "step": 78110 + }, + { + "epoch": 0.31561468505193585, + "grad_norm": 367.10394287109375, + "learning_rate": 3.6004741153633194e-06, + "loss": 30.2823, + "step": 78120 + }, + { + "epoch": 0.31565508631730343, + "grad_norm": 385.9931640625, + "learning_rate": 3.5991201796698006e-06, + "loss": 31.6421, + "step": 78130 + }, + { + "epoch": 0.3156954875826711, + "grad_norm": 457.8427429199219, + "learning_rate": 3.5977663554378594e-06, + "loss": 55.4395, + "step": 78140 + }, + { + "epoch": 0.3157358888480387, + "grad_norm": 644.6337280273438, + "learning_rate": 3.5964126427752155e-06, + "loss": 36.9936, + "step": 78150 + }, + { + "epoch": 0.31577629011340635, + "grad_norm": 459.3667907714844, + "learning_rate": 3.595059041789575e-06, + "loss": 39.2877, + "step": 78160 + }, + { + "epoch": 0.315816691378774, + "grad_norm": 527.1011352539062, + "learning_rate": 3.5937055525886377e-06, + "loss": 46.0837, + "step": 78170 + }, + { + "epoch": 0.31585709264414163, + "grad_norm": 789.7222900390625, + "learning_rate": 3.592352175280096e-06, + "loss": 51.5472, + "step": 78180 + }, + { + "epoch": 0.3158974939095092, + "grad_norm": 703.373046875, + "learning_rate": 3.5909989099716325e-06, + "loss": 51.672, + "step": 78190 + }, + { + "epoch": 0.31593789517487686, + "grad_norm": 1366.6173095703125, + "learning_rate": 3.589645756770918e-06, + "loss": 54.4186, + "step": 78200 + }, + { + "epoch": 0.3159782964402445, + "grad_norm": 651.6315307617188, + "learning_rate": 3.5882927157856175e-06, + "loss": 37.2753, + "step": 78210 + }, + { + "epoch": 0.31601869770561214, + "grad_norm": 457.5976867675781, + "learning_rate": 3.586939787123388e-06, + "loss": 58.4296, + "step": 78220 + }, + { + "epoch": 0.3160590989709798, + "grad_norm": 463.1532287597656, + "learning_rate": 3.585586970891876e-06, + "loss": 42.9541, + "step": 78230 + }, + { + "epoch": 0.3160995002363474, + "grad_norm": 802.164794921875, + "learning_rate": 3.584234267198715e-06, + "loss": 37.8752, + "step": 78240 + }, + { + "epoch": 0.31613990150171506, + "grad_norm": 438.13385009765625, + "learning_rate": 3.582881676151536e-06, + "loss": 35.8725, + "step": 78250 + }, + { + "epoch": 0.31618030276708264, + "grad_norm": 605.7740478515625, + "learning_rate": 3.581529197857959e-06, + "loss": 34.4586, + "step": 78260 + }, + { + "epoch": 0.3162207040324503, + "grad_norm": 503.4420471191406, + "learning_rate": 3.580176832425594e-06, + "loss": 46.2822, + "step": 78270 + }, + { + "epoch": 0.3162611052978179, + "grad_norm": 762.1167602539062, + "learning_rate": 3.5788245799620425e-06, + "loss": 51.9271, + "step": 78280 + }, + { + "epoch": 0.31630150656318556, + "grad_norm": 671.9507446289062, + "learning_rate": 3.577472440574896e-06, + "loss": 41.3695, + "step": 78290 + }, + { + "epoch": 0.3163419078285532, + "grad_norm": 408.4527282714844, + "learning_rate": 3.5761204143717387e-06, + "loss": 56.6144, + "step": 78300 + }, + { + "epoch": 0.31638230909392084, + "grad_norm": 357.1184387207031, + "learning_rate": 3.5747685014601456e-06, + "loss": 33.9895, + "step": 78310 + }, + { + "epoch": 0.31642271035928843, + "grad_norm": 260.8604431152344, + "learning_rate": 3.5734167019476845e-06, + "loss": 32.9133, + "step": 78320 + }, + { + "epoch": 0.31646311162465607, + "grad_norm": 756.7543334960938, + "learning_rate": 3.572065015941907e-06, + "loss": 46.5736, + "step": 78330 + }, + { + "epoch": 0.3165035128900237, + "grad_norm": 694.8875732421875, + "learning_rate": 3.570713443550362e-06, + "loss": 68.6546, + "step": 78340 + }, + { + "epoch": 0.31654391415539135, + "grad_norm": 347.2257385253906, + "learning_rate": 3.5693619848805892e-06, + "loss": 32.6081, + "step": 78350 + }, + { + "epoch": 0.316584315420759, + "grad_norm": 580.4989624023438, + "learning_rate": 3.568010640040118e-06, + "loss": 55.1243, + "step": 78360 + }, + { + "epoch": 0.31662471668612663, + "grad_norm": 487.68292236328125, + "learning_rate": 3.566659409136468e-06, + "loss": 56.2001, + "step": 78370 + }, + { + "epoch": 0.31666511795149427, + "grad_norm": 441.1833190917969, + "learning_rate": 3.565308292277151e-06, + "loss": 49.3039, + "step": 78380 + }, + { + "epoch": 0.31670551921686185, + "grad_norm": 497.8100891113281, + "learning_rate": 3.563957289569669e-06, + "loss": 38.5456, + "step": 78390 + }, + { + "epoch": 0.3167459204822295, + "grad_norm": 685.7149047851562, + "learning_rate": 3.5626064011215135e-06, + "loss": 47.0403, + "step": 78400 + }, + { + "epoch": 0.31678632174759713, + "grad_norm": 540.7225341796875, + "learning_rate": 3.5612556270401733e-06, + "loss": 44.1251, + "step": 78410 + }, + { + "epoch": 0.3168267230129648, + "grad_norm": 810.5453491210938, + "learning_rate": 3.5599049674331175e-06, + "loss": 44.7801, + "step": 78420 + }, + { + "epoch": 0.3168671242783324, + "grad_norm": 172.29164123535156, + "learning_rate": 3.5585544224078143e-06, + "loss": 38.7469, + "step": 78430 + }, + { + "epoch": 0.31690752554370005, + "grad_norm": 433.4833984375, + "learning_rate": 3.5572039920717192e-06, + "loss": 45.1474, + "step": 78440 + }, + { + "epoch": 0.31694792680906764, + "grad_norm": 456.20703125, + "learning_rate": 3.5558536765322825e-06, + "loss": 56.6234, + "step": 78450 + }, + { + "epoch": 0.3169883280744353, + "grad_norm": 547.5340576171875, + "learning_rate": 3.554503475896941e-06, + "loss": 49.693, + "step": 78460 + }, + { + "epoch": 0.3170287293398029, + "grad_norm": 616.480712890625, + "learning_rate": 3.553153390273124e-06, + "loss": 46.7905, + "step": 78470 + }, + { + "epoch": 0.31706913060517056, + "grad_norm": 618.6358642578125, + "learning_rate": 3.551803419768251e-06, + "loss": 41.6367, + "step": 78480 + }, + { + "epoch": 0.3171095318705382, + "grad_norm": 359.9627685546875, + "learning_rate": 3.5504535644897352e-06, + "loss": 34.8955, + "step": 78490 + }, + { + "epoch": 0.31714993313590584, + "grad_norm": 928.6984252929688, + "learning_rate": 3.549103824544975e-06, + "loss": 39.994, + "step": 78500 + }, + { + "epoch": 0.3171903344012734, + "grad_norm": 594.8177490234375, + "learning_rate": 3.5477542000413657e-06, + "loss": 64.4823, + "step": 78510 + }, + { + "epoch": 0.31723073566664106, + "grad_norm": 427.8562927246094, + "learning_rate": 3.546404691086289e-06, + "loss": 34.0224, + "step": 78520 + }, + { + "epoch": 0.3172711369320087, + "grad_norm": 298.39031982421875, + "learning_rate": 3.5450552977871207e-06, + "loss": 46.3302, + "step": 78530 + }, + { + "epoch": 0.31731153819737634, + "grad_norm": 604.6382446289062, + "learning_rate": 3.543706020251223e-06, + "loss": 32.3792, + "step": 78540 + }, + { + "epoch": 0.317351939462744, + "grad_norm": 1150.4559326171875, + "learning_rate": 3.542356858585956e-06, + "loss": 36.3491, + "step": 78550 + }, + { + "epoch": 0.3173923407281116, + "grad_norm": 1116.10888671875, + "learning_rate": 3.541007812898663e-06, + "loss": 37.8419, + "step": 78560 + }, + { + "epoch": 0.31743274199347926, + "grad_norm": 477.97076416015625, + "learning_rate": 3.539658883296683e-06, + "loss": 31.0409, + "step": 78570 + }, + { + "epoch": 0.31747314325884685, + "grad_norm": 743.943603515625, + "learning_rate": 3.5383100698873446e-06, + "loss": 49.6261, + "step": 78580 + }, + { + "epoch": 0.3175135445242145, + "grad_norm": 767.6546020507812, + "learning_rate": 3.536961372777965e-06, + "loss": 41.5514, + "step": 78590 + }, + { + "epoch": 0.3175539457895821, + "grad_norm": 705.5023193359375, + "learning_rate": 3.535612792075854e-06, + "loss": 32.2963, + "step": 78600 + }, + { + "epoch": 0.31759434705494977, + "grad_norm": 384.8016662597656, + "learning_rate": 3.5342643278883127e-06, + "loss": 29.7352, + "step": 78610 + }, + { + "epoch": 0.3176347483203174, + "grad_norm": 411.0057678222656, + "learning_rate": 3.532915980322632e-06, + "loss": 46.1627, + "step": 78620 + }, + { + "epoch": 0.31767514958568505, + "grad_norm": 466.7144775390625, + "learning_rate": 3.5315677494860923e-06, + "loss": 44.4728, + "step": 78630 + }, + { + "epoch": 0.31771555085105263, + "grad_norm": 560.2733154296875, + "learning_rate": 3.5302196354859693e-06, + "loss": 38.7301, + "step": 78640 + }, + { + "epoch": 0.31775595211642027, + "grad_norm": 541.011474609375, + "learning_rate": 3.528871638429524e-06, + "loss": 61.4878, + "step": 78650 + }, + { + "epoch": 0.3177963533817879, + "grad_norm": 497.6824645996094, + "learning_rate": 3.527523758424013e-06, + "loss": 32.1103, + "step": 78660 + }, + { + "epoch": 0.31783675464715555, + "grad_norm": 418.5374755859375, + "learning_rate": 3.526175995576676e-06, + "loss": 42.6755, + "step": 78670 + }, + { + "epoch": 0.3178771559125232, + "grad_norm": 686.6862182617188, + "learning_rate": 3.524828349994752e-06, + "loss": 63.9243, + "step": 78680 + }, + { + "epoch": 0.31791755717789083, + "grad_norm": 908.7049560546875, + "learning_rate": 3.523480821785466e-06, + "loss": 66.8907, + "step": 78690 + }, + { + "epoch": 0.31795795844325847, + "grad_norm": 904.7369384765625, + "learning_rate": 3.5221334110560345e-06, + "loss": 37.5363, + "step": 78700 + }, + { + "epoch": 0.31799835970862605, + "grad_norm": 797.6988525390625, + "learning_rate": 3.5207861179136654e-06, + "loss": 35.7445, + "step": 78710 + }, + { + "epoch": 0.3180387609739937, + "grad_norm": 441.76275634765625, + "learning_rate": 3.519438942465556e-06, + "loss": 47.8045, + "step": 78720 + }, + { + "epoch": 0.31807916223936133, + "grad_norm": 709.9418334960938, + "learning_rate": 3.5180918848188937e-06, + "loss": 62.3529, + "step": 78730 + }, + { + "epoch": 0.318119563504729, + "grad_norm": 540.959228515625, + "learning_rate": 3.516744945080861e-06, + "loss": 40.3376, + "step": 78740 + }, + { + "epoch": 0.3181599647700966, + "grad_norm": 539.8733520507812, + "learning_rate": 3.5153981233586277e-06, + "loss": 57.6363, + "step": 78750 + }, + { + "epoch": 0.31820036603546425, + "grad_norm": 602.8059692382812, + "learning_rate": 3.5140514197593494e-06, + "loss": 41.2759, + "step": 78760 + }, + { + "epoch": 0.31824076730083184, + "grad_norm": 464.77081298828125, + "learning_rate": 3.512704834390179e-06, + "loss": 44.2819, + "step": 78770 + }, + { + "epoch": 0.3182811685661995, + "grad_norm": 691.4537963867188, + "learning_rate": 3.5113583673582613e-06, + "loss": 49.4957, + "step": 78780 + }, + { + "epoch": 0.3183215698315671, + "grad_norm": 294.8431701660156, + "learning_rate": 3.510012018770726e-06, + "loss": 62.4848, + "step": 78790 + }, + { + "epoch": 0.31836197109693476, + "grad_norm": 987.3035888671875, + "learning_rate": 3.508665788734696e-06, + "loss": 49.3034, + "step": 78800 + }, + { + "epoch": 0.3184023723623024, + "grad_norm": 410.7585144042969, + "learning_rate": 3.507319677357285e-06, + "loss": 34.8022, + "step": 78810 + }, + { + "epoch": 0.31844277362767004, + "grad_norm": 519.374755859375, + "learning_rate": 3.5059736847455967e-06, + "loss": 37.6209, + "step": 78820 + }, + { + "epoch": 0.3184831748930376, + "grad_norm": 925.8695068359375, + "learning_rate": 3.5046278110067242e-06, + "loss": 33.3094, + "step": 78830 + }, + { + "epoch": 0.31852357615840526, + "grad_norm": 1023.0116577148438, + "learning_rate": 3.5032820562477577e-06, + "loss": 59.342, + "step": 78840 + }, + { + "epoch": 0.3185639774237729, + "grad_norm": 725.557861328125, + "learning_rate": 3.5019364205757667e-06, + "loss": 38.1884, + "step": 78850 + }, + { + "epoch": 0.31860437868914054, + "grad_norm": 432.7353820800781, + "learning_rate": 3.5005909040978188e-06, + "loss": 47.6009, + "step": 78860 + }, + { + "epoch": 0.3186447799545082, + "grad_norm": 505.9002380371094, + "learning_rate": 3.4992455069209717e-06, + "loss": 52.5041, + "step": 78870 + }, + { + "epoch": 0.3186851812198758, + "grad_norm": 526.93701171875, + "learning_rate": 3.4979002291522723e-06, + "loss": 54.7521, + "step": 78880 + }, + { + "epoch": 0.31872558248524346, + "grad_norm": 586.6494750976562, + "learning_rate": 3.4965550708987583e-06, + "loss": 43.5702, + "step": 78890 + }, + { + "epoch": 0.31876598375061105, + "grad_norm": 226.22586059570312, + "learning_rate": 3.4952100322674574e-06, + "loss": 41.4644, + "step": 78900 + }, + { + "epoch": 0.3188063850159787, + "grad_norm": 1272.3206787109375, + "learning_rate": 3.4938651133653877e-06, + "loss": 45.8692, + "step": 78910 + }, + { + "epoch": 0.31884678628134633, + "grad_norm": 577.4758911132812, + "learning_rate": 3.49252031429956e-06, + "loss": 41.5907, + "step": 78920 + }, + { + "epoch": 0.31888718754671397, + "grad_norm": 622.424560546875, + "learning_rate": 3.4911756351769722e-06, + "loss": 39.5042, + "step": 78930 + }, + { + "epoch": 0.3189275888120816, + "grad_norm": 486.1512451171875, + "learning_rate": 3.4898310761046133e-06, + "loss": 43.4679, + "step": 78940 + }, + { + "epoch": 0.31896799007744925, + "grad_norm": 465.1907958984375, + "learning_rate": 3.4884866371894654e-06, + "loss": 34.0905, + "step": 78950 + }, + { + "epoch": 0.31900839134281683, + "grad_norm": 488.0005798339844, + "learning_rate": 3.487142318538498e-06, + "loss": 42.7252, + "step": 78960 + }, + { + "epoch": 0.3190487926081845, + "grad_norm": 430.1434020996094, + "learning_rate": 3.4857981202586742e-06, + "loss": 35.6475, + "step": 78970 + }, + { + "epoch": 0.3190891938735521, + "grad_norm": 789.178955078125, + "learning_rate": 3.4844540424569453e-06, + "loss": 56.79, + "step": 78980 + }, + { + "epoch": 0.31912959513891975, + "grad_norm": 1318.2086181640625, + "learning_rate": 3.483110085240252e-06, + "loss": 51.1702, + "step": 78990 + }, + { + "epoch": 0.3191699964042874, + "grad_norm": 588.171142578125, + "learning_rate": 3.481766248715528e-06, + "loss": 37.9113, + "step": 79000 + }, + { + "epoch": 0.31921039766965503, + "grad_norm": 609.7747802734375, + "learning_rate": 3.4804225329896963e-06, + "loss": 25.8852, + "step": 79010 + }, + { + "epoch": 0.3192507989350226, + "grad_norm": 442.40960693359375, + "learning_rate": 3.4790789381696686e-06, + "loss": 54.1903, + "step": 79020 + }, + { + "epoch": 0.31929120020039026, + "grad_norm": 548.3629150390625, + "learning_rate": 3.4777354643623506e-06, + "loss": 39.2755, + "step": 79030 + }, + { + "epoch": 0.3193316014657579, + "grad_norm": 675.93603515625, + "learning_rate": 3.4763921116746352e-06, + "loss": 60.0762, + "step": 79040 + }, + { + "epoch": 0.31937200273112554, + "grad_norm": 426.993408203125, + "learning_rate": 3.475048880213407e-06, + "loss": 53.1464, + "step": 79050 + }, + { + "epoch": 0.3194124039964932, + "grad_norm": 585.4061279296875, + "learning_rate": 3.473705770085539e-06, + "loss": 29.5674, + "step": 79060 + }, + { + "epoch": 0.3194528052618608, + "grad_norm": 322.57574462890625, + "learning_rate": 3.4723627813979005e-06, + "loss": 41.8893, + "step": 79070 + }, + { + "epoch": 0.31949320652722846, + "grad_norm": 962.873046875, + "learning_rate": 3.471019914257344e-06, + "loss": 50.1003, + "step": 79080 + }, + { + "epoch": 0.31953360779259604, + "grad_norm": 496.5466613769531, + "learning_rate": 3.4696771687707176e-06, + "loss": 34.1614, + "step": 79090 + }, + { + "epoch": 0.3195740090579637, + "grad_norm": 351.3705749511719, + "learning_rate": 3.468334545044853e-06, + "loss": 36.1759, + "step": 79100 + }, + { + "epoch": 0.3196144103233313, + "grad_norm": 515.8349609375, + "learning_rate": 3.46699204318658e-06, + "loss": 57.3094, + "step": 79110 + }, + { + "epoch": 0.31965481158869896, + "grad_norm": 215.4729766845703, + "learning_rate": 3.465649663302715e-06, + "loss": 20.6461, + "step": 79120 + }, + { + "epoch": 0.3196952128540666, + "grad_norm": 395.2944641113281, + "learning_rate": 3.464307405500064e-06, + "loss": 45.1714, + "step": 79130 + }, + { + "epoch": 0.31973561411943424, + "grad_norm": 424.1663818359375, + "learning_rate": 3.4629652698854254e-06, + "loss": 43.4122, + "step": 79140 + }, + { + "epoch": 0.3197760153848018, + "grad_norm": 641.1630859375, + "learning_rate": 3.461623256565586e-06, + "loss": 45.5314, + "step": 79150 + }, + { + "epoch": 0.31981641665016947, + "grad_norm": 721.894775390625, + "learning_rate": 3.4602813656473223e-06, + "loss": 48.3435, + "step": 79160 + }, + { + "epoch": 0.3198568179155371, + "grad_norm": 1735.0408935546875, + "learning_rate": 3.4589395972374055e-06, + "loss": 65.0877, + "step": 79170 + }, + { + "epoch": 0.31989721918090475, + "grad_norm": 844.60205078125, + "learning_rate": 3.457597951442595e-06, + "loss": 28.3744, + "step": 79180 + }, + { + "epoch": 0.3199376204462724, + "grad_norm": 402.255859375, + "learning_rate": 3.456256428369633e-06, + "loss": 39.7704, + "step": 79190 + }, + { + "epoch": 0.31997802171164, + "grad_norm": 719.4677124023438, + "learning_rate": 3.4549150281252635e-06, + "loss": 60.1323, + "step": 79200 + }, + { + "epoch": 0.32001842297700767, + "grad_norm": 797.6649780273438, + "learning_rate": 3.453573750816214e-06, + "loss": 50.052, + "step": 79210 + }, + { + "epoch": 0.32005882424237525, + "grad_norm": 1009.09716796875, + "learning_rate": 3.452232596549204e-06, + "loss": 51.3804, + "step": 79220 + }, + { + "epoch": 0.3200992255077429, + "grad_norm": 684.1012573242188, + "learning_rate": 3.4508915654309438e-06, + "loss": 42.5982, + "step": 79230 + }, + { + "epoch": 0.32013962677311053, + "grad_norm": 571.1311645507812, + "learning_rate": 3.4495506575681313e-06, + "loss": 39.391, + "step": 79240 + }, + { + "epoch": 0.32018002803847817, + "grad_norm": 363.0473937988281, + "learning_rate": 3.4482098730674577e-06, + "loss": 39.8878, + "step": 79250 + }, + { + "epoch": 0.3202204293038458, + "grad_norm": 476.1780700683594, + "learning_rate": 3.4468692120356017e-06, + "loss": 42.8986, + "step": 79260 + }, + { + "epoch": 0.32026083056921345, + "grad_norm": 883.4768676757812, + "learning_rate": 3.4455286745792383e-06, + "loss": 53.311, + "step": 79270 + }, + { + "epoch": 0.32030123183458103, + "grad_norm": 594.4937133789062, + "learning_rate": 3.4441882608050216e-06, + "loss": 35.4785, + "step": 79280 + }, + { + "epoch": 0.3203416330999487, + "grad_norm": 534.9506225585938, + "learning_rate": 3.442847970819604e-06, + "loss": 42.1486, + "step": 79290 + }, + { + "epoch": 0.3203820343653163, + "grad_norm": 3948.400634765625, + "learning_rate": 3.441507804729627e-06, + "loss": 73.9457, + "step": 79300 + }, + { + "epoch": 0.32042243563068395, + "grad_norm": 287.04266357421875, + "learning_rate": 3.440167762641722e-06, + "loss": 64.0647, + "step": 79310 + }, + { + "epoch": 0.3204628368960516, + "grad_norm": 1426.3807373046875, + "learning_rate": 3.43882784466251e-06, + "loss": 37.0213, + "step": 79320 + }, + { + "epoch": 0.32050323816141923, + "grad_norm": 613.2059936523438, + "learning_rate": 3.4374880508986013e-06, + "loss": 38.8521, + "step": 79330 + }, + { + "epoch": 0.3205436394267868, + "grad_norm": 494.06988525390625, + "learning_rate": 3.436148381456598e-06, + "loss": 44.2171, + "step": 79340 + }, + { + "epoch": 0.32058404069215446, + "grad_norm": 764.4878540039062, + "learning_rate": 3.434808836443091e-06, + "loss": 58.2755, + "step": 79350 + }, + { + "epoch": 0.3206244419575221, + "grad_norm": 1218.543212890625, + "learning_rate": 3.4334694159646608e-06, + "loss": 40.1468, + "step": 79360 + }, + { + "epoch": 0.32066484322288974, + "grad_norm": 611.8609008789062, + "learning_rate": 3.43213012012788e-06, + "loss": 35.2115, + "step": 79370 + }, + { + "epoch": 0.3207052444882574, + "grad_norm": 702.8851928710938, + "learning_rate": 3.43079094903931e-06, + "loss": 41.0661, + "step": 79380 + }, + { + "epoch": 0.320745645753625, + "grad_norm": 959.5906982421875, + "learning_rate": 3.4294519028055014e-06, + "loss": 51.1332, + "step": 79390 + }, + { + "epoch": 0.32078604701899266, + "grad_norm": 552.6378784179688, + "learning_rate": 3.428112981532998e-06, + "loss": 63.2162, + "step": 79400 + }, + { + "epoch": 0.32082644828436024, + "grad_norm": 819.8400268554688, + "learning_rate": 3.4267741853283305e-06, + "loss": 41.3017, + "step": 79410 + }, + { + "epoch": 0.3208668495497279, + "grad_norm": 797.3480834960938, + "learning_rate": 3.425435514298021e-06, + "loss": 70.3864, + "step": 79420 + }, + { + "epoch": 0.3209072508150955, + "grad_norm": 281.98406982421875, + "learning_rate": 3.4240969685485813e-06, + "loss": 45.3279, + "step": 79430 + }, + { + "epoch": 0.32094765208046316, + "grad_norm": 497.9682922363281, + "learning_rate": 3.422758548186515e-06, + "loss": 49.0692, + "step": 79440 + }, + { + "epoch": 0.3209880533458308, + "grad_norm": 532.9519653320312, + "learning_rate": 3.4214202533183104e-06, + "loss": 47.7864, + "step": 79450 + }, + { + "epoch": 0.32102845461119844, + "grad_norm": 327.18487548828125, + "learning_rate": 3.420082084050453e-06, + "loss": 62.1244, + "step": 79460 + }, + { + "epoch": 0.32106885587656603, + "grad_norm": 522.5250244140625, + "learning_rate": 3.4187440404894123e-06, + "loss": 33.9887, + "step": 79470 + }, + { + "epoch": 0.32110925714193367, + "grad_norm": 465.98486328125, + "learning_rate": 3.417406122741651e-06, + "loss": 50.0454, + "step": 79480 + }, + { + "epoch": 0.3211496584073013, + "grad_norm": 641.7701416015625, + "learning_rate": 3.416068330913621e-06, + "loss": 45.9428, + "step": 79490 + }, + { + "epoch": 0.32119005967266895, + "grad_norm": 220.279296875, + "learning_rate": 3.4147306651117663e-06, + "loss": 37.3395, + "step": 79500 + }, + { + "epoch": 0.3212304609380366, + "grad_norm": 474.749267578125, + "learning_rate": 3.4133931254425156e-06, + "loss": 45.0709, + "step": 79510 + }, + { + "epoch": 0.32127086220340423, + "grad_norm": 253.58042907714844, + "learning_rate": 3.4120557120122944e-06, + "loss": 44.7656, + "step": 79520 + }, + { + "epoch": 0.32131126346877187, + "grad_norm": 597.0999145507812, + "learning_rate": 3.4107184249275114e-06, + "loss": 52.3735, + "step": 79530 + }, + { + "epoch": 0.32135166473413945, + "grad_norm": 484.10125732421875, + "learning_rate": 3.4093812642945694e-06, + "loss": 39.8811, + "step": 79540 + }, + { + "epoch": 0.3213920659995071, + "grad_norm": 523.0706176757812, + "learning_rate": 3.40804423021986e-06, + "loss": 48.5399, + "step": 79550 + }, + { + "epoch": 0.32143246726487473, + "grad_norm": 797.7967529296875, + "learning_rate": 3.4067073228097656e-06, + "loss": 36.3043, + "step": 79560 + }, + { + "epoch": 0.3214728685302424, + "grad_norm": 534.5451049804688, + "learning_rate": 3.4053705421706574e-06, + "loss": 36.7537, + "step": 79570 + }, + { + "epoch": 0.32151326979561, + "grad_norm": 664.1915893554688, + "learning_rate": 3.4040338884088955e-06, + "loss": 44.3147, + "step": 79580 + }, + { + "epoch": 0.32155367106097765, + "grad_norm": 295.6415710449219, + "learning_rate": 3.4026973616308334e-06, + "loss": 40.6299, + "step": 79590 + }, + { + "epoch": 0.32159407232634524, + "grad_norm": 624.4505004882812, + "learning_rate": 3.401360961942812e-06, + "loss": 53.7231, + "step": 79600 + }, + { + "epoch": 0.3216344735917129, + "grad_norm": 735.50244140625, + "learning_rate": 3.4000246894511634e-06, + "loss": 32.1379, + "step": 79610 + }, + { + "epoch": 0.3216748748570805, + "grad_norm": 268.3006896972656, + "learning_rate": 3.398688544262205e-06, + "loss": 30.7915, + "step": 79620 + }, + { + "epoch": 0.32171527612244816, + "grad_norm": 868.1389770507812, + "learning_rate": 3.397352526482251e-06, + "loss": 39.305, + "step": 79630 + }, + { + "epoch": 0.3217556773878158, + "grad_norm": 487.7118225097656, + "learning_rate": 3.396016636217601e-06, + "loss": 40.3482, + "step": 79640 + }, + { + "epoch": 0.32179607865318344, + "grad_norm": 551.2245483398438, + "learning_rate": 3.394680873574546e-06, + "loss": 46.2264, + "step": 79650 + }, + { + "epoch": 0.321836479918551, + "grad_norm": 634.7960205078125, + "learning_rate": 3.3933452386593666e-06, + "loss": 42.2122, + "step": 79660 + }, + { + "epoch": 0.32187688118391866, + "grad_norm": 272.61962890625, + "learning_rate": 3.392009731578334e-06, + "loss": 28.0738, + "step": 79670 + }, + { + "epoch": 0.3219172824492863, + "grad_norm": 273.2590026855469, + "learning_rate": 3.3906743524377053e-06, + "loss": 33.9681, + "step": 79680 + }, + { + "epoch": 0.32195768371465394, + "grad_norm": 400.4376220703125, + "learning_rate": 3.3893391013437338e-06, + "loss": 53.93, + "step": 79690 + }, + { + "epoch": 0.3219980849800216, + "grad_norm": 381.15850830078125, + "learning_rate": 3.38800397840266e-06, + "loss": 38.3037, + "step": 79700 + }, + { + "epoch": 0.3220384862453892, + "grad_norm": 668.4859008789062, + "learning_rate": 3.3866689837207094e-06, + "loss": 43.7831, + "step": 79710 + }, + { + "epoch": 0.32207888751075686, + "grad_norm": 599.7156372070312, + "learning_rate": 3.3853341174041025e-06, + "loss": 36.1802, + "step": 79720 + }, + { + "epoch": 0.32211928877612445, + "grad_norm": 673.56005859375, + "learning_rate": 3.3839993795590507e-06, + "loss": 33.4147, + "step": 79730 + }, + { + "epoch": 0.3221596900414921, + "grad_norm": 613.9022216796875, + "learning_rate": 3.3826647702917526e-06, + "loss": 51.1556, + "step": 79740 + }, + { + "epoch": 0.3222000913068597, + "grad_norm": 726.7110595703125, + "learning_rate": 3.3813302897083955e-06, + "loss": 31.6607, + "step": 79750 + }, + { + "epoch": 0.32224049257222737, + "grad_norm": 848.6845092773438, + "learning_rate": 3.379995937915158e-06, + "loss": 45.0145, + "step": 79760 + }, + { + "epoch": 0.322280893837595, + "grad_norm": 804.3870849609375, + "learning_rate": 3.37866171501821e-06, + "loss": 35.8172, + "step": 79770 + }, + { + "epoch": 0.32232129510296265, + "grad_norm": 282.2019958496094, + "learning_rate": 3.3773276211237087e-06, + "loss": 38.8783, + "step": 79780 + }, + { + "epoch": 0.32236169636833023, + "grad_norm": 501.5666198730469, + "learning_rate": 3.3759936563378004e-06, + "loss": 37.0371, + "step": 79790 + }, + { + "epoch": 0.32240209763369787, + "grad_norm": 299.2339782714844, + "learning_rate": 3.374659820766625e-06, + "loss": 35.9554, + "step": 79800 + }, + { + "epoch": 0.3224424988990655, + "grad_norm": 376.84515380859375, + "learning_rate": 3.3733261145163064e-06, + "loss": 68.8255, + "step": 79810 + }, + { + "epoch": 0.32248290016443315, + "grad_norm": 342.44073486328125, + "learning_rate": 3.371992537692964e-06, + "loss": 30.2029, + "step": 79820 + }, + { + "epoch": 0.3225233014298008, + "grad_norm": 251.28659057617188, + "learning_rate": 3.370659090402704e-06, + "loss": 31.2192, + "step": 79830 + }, + { + "epoch": 0.32256370269516843, + "grad_norm": 563.5650024414062, + "learning_rate": 3.3693257727516227e-06, + "loss": 40.0149, + "step": 79840 + }, + { + "epoch": 0.32260410396053607, + "grad_norm": 611.80419921875, + "learning_rate": 3.367992584845806e-06, + "loss": 31.1052, + "step": 79850 + }, + { + "epoch": 0.32264450522590365, + "grad_norm": 707.4664916992188, + "learning_rate": 3.3666595267913293e-06, + "loss": 50.2898, + "step": 79860 + }, + { + "epoch": 0.3226849064912713, + "grad_norm": 855.7467041015625, + "learning_rate": 3.365326598694259e-06, + "loss": 38.313, + "step": 79870 + }, + { + "epoch": 0.32272530775663893, + "grad_norm": 1066.11865234375, + "learning_rate": 3.3639938006606483e-06, + "loss": 57.3661, + "step": 79880 + }, + { + "epoch": 0.3227657090220066, + "grad_norm": 662.3603515625, + "learning_rate": 3.3626611327965418e-06, + "loss": 55.8432, + "step": 79890 + }, + { + "epoch": 0.3228061102873742, + "grad_norm": 557.5881958007812, + "learning_rate": 3.3613285952079754e-06, + "loss": 31.4979, + "step": 79900 + }, + { + "epoch": 0.32284651155274185, + "grad_norm": 336.6396789550781, + "learning_rate": 3.3599961880009713e-06, + "loss": 39.1249, + "step": 79910 + }, + { + "epoch": 0.32288691281810944, + "grad_norm": 611.0641479492188, + "learning_rate": 3.3586639112815446e-06, + "loss": 47.8904, + "step": 79920 + }, + { + "epoch": 0.3229273140834771, + "grad_norm": 436.7526550292969, + "learning_rate": 3.357331765155698e-06, + "loss": 38.4208, + "step": 79930 + }, + { + "epoch": 0.3229677153488447, + "grad_norm": 791.1549072265625, + "learning_rate": 3.355999749729424e-06, + "loss": 54.1118, + "step": 79940 + }, + { + "epoch": 0.32300811661421236, + "grad_norm": 308.76812744140625, + "learning_rate": 3.354667865108706e-06, + "loss": 45.5083, + "step": 79950 + }, + { + "epoch": 0.32304851787958, + "grad_norm": 392.00079345703125, + "learning_rate": 3.353336111399513e-06, + "loss": 41.9508, + "step": 79960 + }, + { + "epoch": 0.32308891914494764, + "grad_norm": 682.669921875, + "learning_rate": 3.3520044887078096e-06, + "loss": 56.1205, + "step": 79970 + }, + { + "epoch": 0.3231293204103152, + "grad_norm": 477.31451416015625, + "learning_rate": 3.350672997139546e-06, + "loss": 37.7137, + "step": 79980 + }, + { + "epoch": 0.32316972167568286, + "grad_norm": 604.2123413085938, + "learning_rate": 3.3493416368006614e-06, + "loss": 45.0942, + "step": 79990 + }, + { + "epoch": 0.3232101229410505, + "grad_norm": 340.8969421386719, + "learning_rate": 3.348010407797088e-06, + "loss": 27.6048, + "step": 80000 + }, + { + "epoch": 0.32325052420641814, + "grad_norm": 520.0179443359375, + "learning_rate": 3.346679310234744e-06, + "loss": 55.725, + "step": 80010 + }, + { + "epoch": 0.3232909254717858, + "grad_norm": 773.4434204101562, + "learning_rate": 3.34534834421954e-06, + "loss": 28.6225, + "step": 80020 + }, + { + "epoch": 0.3233313267371534, + "grad_norm": 632.611328125, + "learning_rate": 3.3440175098573748e-06, + "loss": 39.8326, + "step": 80030 + }, + { + "epoch": 0.32337172800252106, + "grad_norm": 1086.8714599609375, + "learning_rate": 3.3426868072541386e-06, + "loss": 44.8706, + "step": 80040 + }, + { + "epoch": 0.32341212926788865, + "grad_norm": 742.6467895507812, + "learning_rate": 3.3413562365157037e-06, + "loss": 51.3557, + "step": 80050 + }, + { + "epoch": 0.3234525305332563, + "grad_norm": 493.33428955078125, + "learning_rate": 3.340025797747942e-06, + "loss": 29.4505, + "step": 80060 + }, + { + "epoch": 0.32349293179862393, + "grad_norm": 574.6460571289062, + "learning_rate": 3.3386954910567094e-06, + "loss": 48.0383, + "step": 80070 + }, + { + "epoch": 0.32353333306399157, + "grad_norm": 314.26397705078125, + "learning_rate": 3.337365316547852e-06, + "loss": 35.6812, + "step": 80080 + }, + { + "epoch": 0.3235737343293592, + "grad_norm": 521.7124633789062, + "learning_rate": 3.336035274327206e-06, + "loss": 27.6825, + "step": 80090 + }, + { + "epoch": 0.32361413559472685, + "grad_norm": 745.5689697265625, + "learning_rate": 3.3347053645005965e-06, + "loss": 48.5578, + "step": 80100 + }, + { + "epoch": 0.32365453686009443, + "grad_norm": 226.4112091064453, + "learning_rate": 3.333375587173838e-06, + "loss": 41.1653, + "step": 80110 + }, + { + "epoch": 0.3236949381254621, + "grad_norm": 737.1196899414062, + "learning_rate": 3.332045942452738e-06, + "loss": 65.6083, + "step": 80120 + }, + { + "epoch": 0.3237353393908297, + "grad_norm": 573.3949584960938, + "learning_rate": 3.330716430443085e-06, + "loss": 44.0588, + "step": 80130 + }, + { + "epoch": 0.32377574065619735, + "grad_norm": 943.7135009765625, + "learning_rate": 3.329387051250664e-06, + "loss": 62.2634, + "step": 80140 + }, + { + "epoch": 0.323816141921565, + "grad_norm": 686.1159057617188, + "learning_rate": 3.3280578049812493e-06, + "loss": 34.7635, + "step": 80150 + }, + { + "epoch": 0.32385654318693263, + "grad_norm": 514.3086547851562, + "learning_rate": 3.3267286917406027e-06, + "loss": 27.8198, + "step": 80160 + }, + { + "epoch": 0.3238969444523003, + "grad_norm": 286.08856201171875, + "learning_rate": 3.3253997116344737e-06, + "loss": 52.2875, + "step": 80170 + }, + { + "epoch": 0.32393734571766786, + "grad_norm": 1013.5391845703125, + "learning_rate": 3.3240708647686047e-06, + "loss": 43.7235, + "step": 80180 + }, + { + "epoch": 0.3239777469830355, + "grad_norm": 311.2264404296875, + "learning_rate": 3.322742151248726e-06, + "loss": 48.4024, + "step": 80190 + }, + { + "epoch": 0.32401814824840314, + "grad_norm": 620.4436645507812, + "learning_rate": 3.3214135711805555e-06, + "loss": 48.492, + "step": 80200 + }, + { + "epoch": 0.3240585495137708, + "grad_norm": 514.2919311523438, + "learning_rate": 3.3200851246698053e-06, + "loss": 35.2087, + "step": 80210 + }, + { + "epoch": 0.3240989507791384, + "grad_norm": 673.8460083007812, + "learning_rate": 3.318756811822171e-06, + "loss": 53.043, + "step": 80220 + }, + { + "epoch": 0.32413935204450606, + "grad_norm": 529.0668334960938, + "learning_rate": 3.3174286327433408e-06, + "loss": 48.762, + "step": 80230 + }, + { + "epoch": 0.32417975330987364, + "grad_norm": 274.8592834472656, + "learning_rate": 3.3161005875389916e-06, + "loss": 44.827, + "step": 80240 + }, + { + "epoch": 0.3242201545752413, + "grad_norm": 572.7136840820312, + "learning_rate": 3.3147726763147913e-06, + "loss": 39.5248, + "step": 80250 + }, + { + "epoch": 0.3242605558406089, + "grad_norm": 772.9658813476562, + "learning_rate": 3.3134448991763957e-06, + "loss": 39.5004, + "step": 80260 + }, + { + "epoch": 0.32430095710597656, + "grad_norm": 361.0927734375, + "learning_rate": 3.312117256229449e-06, + "loss": 38.6941, + "step": 80270 + }, + { + "epoch": 0.3243413583713442, + "grad_norm": 563.4647216796875, + "learning_rate": 3.310789747579586e-06, + "loss": 73.1953, + "step": 80280 + }, + { + "epoch": 0.32438175963671184, + "grad_norm": 383.66326904296875, + "learning_rate": 3.30946237333243e-06, + "loss": 34.4704, + "step": 80290 + }, + { + "epoch": 0.3244221609020794, + "grad_norm": 652.5866088867188, + "learning_rate": 3.308135133593595e-06, + "loss": 41.3068, + "step": 80300 + }, + { + "epoch": 0.32446256216744707, + "grad_norm": 180.12716674804688, + "learning_rate": 3.3068080284686825e-06, + "loss": 51.6751, + "step": 80310 + }, + { + "epoch": 0.3245029634328147, + "grad_norm": 239.9126739501953, + "learning_rate": 3.3054810580632844e-06, + "loss": 33.6634, + "step": 80320 + }, + { + "epoch": 0.32454336469818235, + "grad_norm": 615.7425537109375, + "learning_rate": 3.304154222482982e-06, + "loss": 41.3266, + "step": 80330 + }, + { + "epoch": 0.32458376596355, + "grad_norm": 391.79119873046875, + "learning_rate": 3.3028275218333438e-06, + "loss": 34.0825, + "step": 80340 + }, + { + "epoch": 0.3246241672289176, + "grad_norm": 380.4940490722656, + "learning_rate": 3.301500956219932e-06, + "loss": 64.4449, + "step": 80350 + }, + { + "epoch": 0.32466456849428527, + "grad_norm": 569.7992553710938, + "learning_rate": 3.3001745257482935e-06, + "loss": 45.7908, + "step": 80360 + }, + { + "epoch": 0.32470496975965285, + "grad_norm": 666.3257446289062, + "learning_rate": 3.2988482305239673e-06, + "loss": 45.82, + "step": 80370 + }, + { + "epoch": 0.3247453710250205, + "grad_norm": 590.6626586914062, + "learning_rate": 3.2975220706524813e-06, + "loss": 38.1958, + "step": 80380 + }, + { + "epoch": 0.32478577229038813, + "grad_norm": 621.2801513671875, + "learning_rate": 3.2961960462393492e-06, + "loss": 37.6412, + "step": 80390 + }, + { + "epoch": 0.32482617355575577, + "grad_norm": 708.3571166992188, + "learning_rate": 3.2948701573900786e-06, + "loss": 43.4666, + "step": 80400 + }, + { + "epoch": 0.3248665748211234, + "grad_norm": 481.2866516113281, + "learning_rate": 3.2935444042101646e-06, + "loss": 26.2208, + "step": 80410 + }, + { + "epoch": 0.32490697608649105, + "grad_norm": 521.428955078125, + "learning_rate": 3.29221878680509e-06, + "loss": 52.5316, + "step": 80420 + }, + { + "epoch": 0.32494737735185864, + "grad_norm": 793.7952270507812, + "learning_rate": 3.2908933052803292e-06, + "loss": 64.951, + "step": 80430 + }, + { + "epoch": 0.3249877786172263, + "grad_norm": 466.8500671386719, + "learning_rate": 3.2895679597413433e-06, + "loss": 28.5023, + "step": 80440 + }, + { + "epoch": 0.3250281798825939, + "grad_norm": 404.64739990234375, + "learning_rate": 3.2882427502935867e-06, + "loss": 37.0899, + "step": 80450 + }, + { + "epoch": 0.32506858114796156, + "grad_norm": 475.4322509765625, + "learning_rate": 3.2869176770424976e-06, + "loss": 41.2995, + "step": 80460 + }, + { + "epoch": 0.3251089824133292, + "grad_norm": 679.5100708007812, + "learning_rate": 3.2855927400935085e-06, + "loss": 36.223, + "step": 80470 + }, + { + "epoch": 0.32514938367869683, + "grad_norm": 426.2971496582031, + "learning_rate": 3.2842679395520363e-06, + "loss": 44.444, + "step": 80480 + }, + { + "epoch": 0.3251897849440645, + "grad_norm": 727.0317993164062, + "learning_rate": 3.282943275523489e-06, + "loss": 46.9828, + "step": 80490 + }, + { + "epoch": 0.32523018620943206, + "grad_norm": 522.9331665039062, + "learning_rate": 3.2816187481132655e-06, + "loss": 47.7081, + "step": 80500 + }, + { + "epoch": 0.3252705874747997, + "grad_norm": 675.2789916992188, + "learning_rate": 3.280294357426752e-06, + "loss": 50.2561, + "step": 80510 + }, + { + "epoch": 0.32531098874016734, + "grad_norm": 735.783447265625, + "learning_rate": 3.2789701035693242e-06, + "loss": 58.5167, + "step": 80520 + }, + { + "epoch": 0.325351390005535, + "grad_norm": 601.2733154296875, + "learning_rate": 3.277645986646346e-06, + "loss": 52.778, + "step": 80530 + }, + { + "epoch": 0.3253917912709026, + "grad_norm": 649.8107299804688, + "learning_rate": 3.276322006763172e-06, + "loss": 44.8131, + "step": 80540 + }, + { + "epoch": 0.32543219253627026, + "grad_norm": 731.3230590820312, + "learning_rate": 3.274998164025148e-06, + "loss": 46.8925, + "step": 80550 + }, + { + "epoch": 0.32547259380163784, + "grad_norm": 824.4846801757812, + "learning_rate": 3.2736744585376016e-06, + "loss": 41.6556, + "step": 80560 + }, + { + "epoch": 0.3255129950670055, + "grad_norm": 402.4475402832031, + "learning_rate": 3.2723508904058547e-06, + "loss": 41.3893, + "step": 80570 + }, + { + "epoch": 0.3255533963323731, + "grad_norm": 762.2625732421875, + "learning_rate": 3.27102745973522e-06, + "loss": 45.3046, + "step": 80580 + }, + { + "epoch": 0.32559379759774076, + "grad_norm": 526.9542236328125, + "learning_rate": 3.269704166630995e-06, + "loss": 30.6245, + "step": 80590 + }, + { + "epoch": 0.3256341988631084, + "grad_norm": 1798.8516845703125, + "learning_rate": 3.268381011198468e-06, + "loss": 55.7041, + "step": 80600 + }, + { + "epoch": 0.32567460012847604, + "grad_norm": 701.319091796875, + "learning_rate": 3.2670579935429176e-06, + "loss": 34.5113, + "step": 80610 + }, + { + "epoch": 0.32571500139384363, + "grad_norm": 708.3885498046875, + "learning_rate": 3.265735113769609e-06, + "loss": 41.173, + "step": 80620 + }, + { + "epoch": 0.32575540265921127, + "grad_norm": 384.1140441894531, + "learning_rate": 3.264412371983797e-06, + "loss": 49.1882, + "step": 80630 + }, + { + "epoch": 0.3257958039245789, + "grad_norm": 618.54931640625, + "learning_rate": 3.2630897682907312e-06, + "loss": 44.0357, + "step": 80640 + }, + { + "epoch": 0.32583620518994655, + "grad_norm": 781.7505493164062, + "learning_rate": 3.261767302795639e-06, + "loss": 45.6742, + "step": 80650 + }, + { + "epoch": 0.3258766064553142, + "grad_norm": 602.9747924804688, + "learning_rate": 3.2604449756037447e-06, + "loss": 33.2267, + "step": 80660 + }, + { + "epoch": 0.32591700772068183, + "grad_norm": 307.5460205078125, + "learning_rate": 3.2591227868202592e-06, + "loss": 40.4157, + "step": 80670 + }, + { + "epoch": 0.32595740898604947, + "grad_norm": 867.035400390625, + "learning_rate": 3.257800736550385e-06, + "loss": 47.2259, + "step": 80680 + }, + { + "epoch": 0.32599781025141705, + "grad_norm": 432.8591613769531, + "learning_rate": 3.2564788248993105e-06, + "loss": 48.1717, + "step": 80690 + }, + { + "epoch": 0.3260382115167847, + "grad_norm": 551.8834838867188, + "learning_rate": 3.2551570519722155e-06, + "loss": 37.013, + "step": 80700 + }, + { + "epoch": 0.32607861278215233, + "grad_norm": 527.9805908203125, + "learning_rate": 3.2538354178742648e-06, + "loss": 53.8853, + "step": 80710 + }, + { + "epoch": 0.32611901404752, + "grad_norm": 1692.8126220703125, + "learning_rate": 3.2525139227106163e-06, + "loss": 59.593, + "step": 80720 + }, + { + "epoch": 0.3261594153128876, + "grad_norm": 61.837989807128906, + "learning_rate": 3.2511925665864164e-06, + "loss": 42.8212, + "step": 80730 + }, + { + "epoch": 0.32619981657825525, + "grad_norm": 617.0887451171875, + "learning_rate": 3.2498713496067963e-06, + "loss": 40.8015, + "step": 80740 + }, + { + "epoch": 0.32624021784362284, + "grad_norm": 558.3300170898438, + "learning_rate": 3.2485502718768814e-06, + "loss": 38.1439, + "step": 80750 + }, + { + "epoch": 0.3262806191089905, + "grad_norm": 410.2859802246094, + "learning_rate": 3.2472293335017836e-06, + "loss": 42.2704, + "step": 80760 + }, + { + "epoch": 0.3263210203743581, + "grad_norm": 666.5332641601562, + "learning_rate": 3.245908534586602e-06, + "loss": 40.3663, + "step": 80770 + }, + { + "epoch": 0.32636142163972576, + "grad_norm": 737.5230712890625, + "learning_rate": 3.2445878752364298e-06, + "loss": 33.5636, + "step": 80780 + }, + { + "epoch": 0.3264018229050934, + "grad_norm": 496.13604736328125, + "learning_rate": 3.2432673555563433e-06, + "loss": 38.1529, + "step": 80790 + }, + { + "epoch": 0.32644222417046104, + "grad_norm": 966.6318359375, + "learning_rate": 3.2419469756514116e-06, + "loss": 49.5031, + "step": 80800 + }, + { + "epoch": 0.3264826254358287, + "grad_norm": 447.5719299316406, + "learning_rate": 3.2406267356266918e-06, + "loss": 29.6554, + "step": 80810 + }, + { + "epoch": 0.32652302670119626, + "grad_norm": 769.01025390625, + "learning_rate": 3.2393066355872264e-06, + "loss": 57.3569, + "step": 80820 + }, + { + "epoch": 0.3265634279665639, + "grad_norm": 454.7648010253906, + "learning_rate": 3.237986675638052e-06, + "loss": 46.1115, + "step": 80830 + }, + { + "epoch": 0.32660382923193154, + "grad_norm": 398.7479248046875, + "learning_rate": 3.236666855884192e-06, + "loss": 27.5807, + "step": 80840 + }, + { + "epoch": 0.3266442304972992, + "grad_norm": 687.7910766601562, + "learning_rate": 3.2353471764306567e-06, + "loss": 44.8174, + "step": 80850 + }, + { + "epoch": 0.3266846317626668, + "grad_norm": 510.6954345703125, + "learning_rate": 3.234027637382447e-06, + "loss": 42.4717, + "step": 80860 + }, + { + "epoch": 0.32672503302803446, + "grad_norm": 430.68328857421875, + "learning_rate": 3.2327082388445545e-06, + "loss": 41.8108, + "step": 80870 + }, + { + "epoch": 0.32676543429340205, + "grad_norm": 56.9016227722168, + "learning_rate": 3.2313889809219568e-06, + "loss": 30.2116, + "step": 80880 + }, + { + "epoch": 0.3268058355587697, + "grad_norm": 467.46502685546875, + "learning_rate": 3.2300698637196217e-06, + "loss": 31.7899, + "step": 80890 + }, + { + "epoch": 0.3268462368241373, + "grad_norm": 572.5570068359375, + "learning_rate": 3.2287508873425043e-06, + "loss": 43.237, + "step": 80900 + }, + { + "epoch": 0.32688663808950497, + "grad_norm": 874.2039794921875, + "learning_rate": 3.22743205189555e-06, + "loss": 55.5983, + "step": 80910 + }, + { + "epoch": 0.3269270393548726, + "grad_norm": 823.0765380859375, + "learning_rate": 3.2261133574836918e-06, + "loss": 37.0331, + "step": 80920 + }, + { + "epoch": 0.32696744062024025, + "grad_norm": 701.4224243164062, + "learning_rate": 3.2247948042118525e-06, + "loss": 50.2638, + "step": 80930 + }, + { + "epoch": 0.32700784188560783, + "grad_norm": 280.49932861328125, + "learning_rate": 3.223476392184944e-06, + "loss": 37.2814, + "step": 80940 + }, + { + "epoch": 0.32704824315097547, + "grad_norm": 562.7162475585938, + "learning_rate": 3.2221581215078656e-06, + "loss": 36.172, + "step": 80950 + }, + { + "epoch": 0.3270886444163431, + "grad_norm": 1017.1797485351562, + "learning_rate": 3.2208399922855055e-06, + "loss": 40.6515, + "step": 80960 + }, + { + "epoch": 0.32712904568171075, + "grad_norm": 565.8228149414062, + "learning_rate": 3.2195220046227425e-06, + "loss": 35.703, + "step": 80970 + }, + { + "epoch": 0.3271694469470784, + "grad_norm": 852.8961181640625, + "learning_rate": 3.218204158624445e-06, + "loss": 60.7625, + "step": 80980 + }, + { + "epoch": 0.32720984821244603, + "grad_norm": 808.6995239257812, + "learning_rate": 3.216886454395463e-06, + "loss": 53.9434, + "step": 80990 + }, + { + "epoch": 0.32725024947781367, + "grad_norm": 386.1480407714844, + "learning_rate": 3.2155688920406415e-06, + "loss": 28.4667, + "step": 81000 + }, + { + "epoch": 0.32729065074318126, + "grad_norm": 798.99755859375, + "learning_rate": 3.2142514716648143e-06, + "loss": 39.1096, + "step": 81010 + }, + { + "epoch": 0.3273310520085489, + "grad_norm": 242.5078582763672, + "learning_rate": 3.212934193372803e-06, + "loss": 37.2829, + "step": 81020 + }, + { + "epoch": 0.32737145327391654, + "grad_norm": 237.69207763671875, + "learning_rate": 3.2116170572694156e-06, + "loss": 41.0571, + "step": 81030 + }, + { + "epoch": 0.3274118545392842, + "grad_norm": 570.440673828125, + "learning_rate": 3.2103000634594518e-06, + "loss": 51.5456, + "step": 81040 + }, + { + "epoch": 0.3274522558046518, + "grad_norm": 616.5958862304688, + "learning_rate": 3.2089832120476983e-06, + "loss": 30.9374, + "step": 81050 + }, + { + "epoch": 0.32749265707001946, + "grad_norm": 810.25927734375, + "learning_rate": 3.2076665031389294e-06, + "loss": 30.6905, + "step": 81060 + }, + { + "epoch": 0.32753305833538704, + "grad_norm": 584.2343139648438, + "learning_rate": 3.2063499368379146e-06, + "loss": 38.2822, + "step": 81070 + }, + { + "epoch": 0.3275734596007547, + "grad_norm": 442.2859802246094, + "learning_rate": 3.2050335132494014e-06, + "loss": 54.2211, + "step": 81080 + }, + { + "epoch": 0.3276138608661223, + "grad_norm": 422.188232421875, + "learning_rate": 3.203717232478133e-06, + "loss": 26.3646, + "step": 81090 + }, + { + "epoch": 0.32765426213148996, + "grad_norm": 462.74261474609375, + "learning_rate": 3.2024010946288415e-06, + "loss": 42.2259, + "step": 81100 + }, + { + "epoch": 0.3276946633968576, + "grad_norm": 511.82464599609375, + "learning_rate": 3.201085099806245e-06, + "loss": 47.9144, + "step": 81110 + }, + { + "epoch": 0.32773506466222524, + "grad_norm": 1044.2135009765625, + "learning_rate": 3.199769248115051e-06, + "loss": 36.4315, + "step": 81120 + }, + { + "epoch": 0.3277754659275929, + "grad_norm": 2148.4111328125, + "learning_rate": 3.1984535396599565e-06, + "loss": 51.7218, + "step": 81130 + }, + { + "epoch": 0.32781586719296046, + "grad_norm": 278.0746154785156, + "learning_rate": 3.1971379745456452e-06, + "loss": 54.2099, + "step": 81140 + }, + { + "epoch": 0.3278562684583281, + "grad_norm": 1001.82568359375, + "learning_rate": 3.1958225528767918e-06, + "loss": 44.9904, + "step": 81150 + }, + { + "epoch": 0.32789666972369574, + "grad_norm": 594.7664794921875, + "learning_rate": 3.1945072747580585e-06, + "loss": 46.4409, + "step": 81160 + }, + { + "epoch": 0.3279370709890634, + "grad_norm": 623.8257446289062, + "learning_rate": 3.1931921402940946e-06, + "loss": 42.915, + "step": 81170 + }, + { + "epoch": 0.327977472254431, + "grad_norm": 392.2928466796875, + "learning_rate": 3.1918771495895395e-06, + "loss": 40.8185, + "step": 81180 + }, + { + "epoch": 0.32801787351979866, + "grad_norm": 710.558349609375, + "learning_rate": 3.1905623027490205e-06, + "loss": 42.129, + "step": 81190 + }, + { + "epoch": 0.32805827478516625, + "grad_norm": 529.6282958984375, + "learning_rate": 3.1892475998771567e-06, + "loss": 39.6081, + "step": 81200 + }, + { + "epoch": 0.3280986760505339, + "grad_norm": 546.3908081054688, + "learning_rate": 3.1879330410785503e-06, + "loss": 42.8713, + "step": 81210 + }, + { + "epoch": 0.32813907731590153, + "grad_norm": 811.4768676757812, + "learning_rate": 3.186618626457796e-06, + "loss": 26.8995, + "step": 81220 + }, + { + "epoch": 0.32817947858126917, + "grad_norm": 190.24868774414062, + "learning_rate": 3.1853043561194748e-06, + "loss": 35.0625, + "step": 81230 + }, + { + "epoch": 0.3282198798466368, + "grad_norm": 684.8766479492188, + "learning_rate": 3.183990230168159e-06, + "loss": 42.4473, + "step": 81240 + }, + { + "epoch": 0.32826028111200445, + "grad_norm": 831.6922607421875, + "learning_rate": 3.1826762487084053e-06, + "loss": 57.3726, + "step": 81250 + }, + { + "epoch": 0.32830068237737203, + "grad_norm": 587.2714233398438, + "learning_rate": 3.1813624118447615e-06, + "loss": 29.0623, + "step": 81260 + }, + { + "epoch": 0.3283410836427397, + "grad_norm": 541.7936401367188, + "learning_rate": 3.180048719681765e-06, + "loss": 41.3275, + "step": 81270 + }, + { + "epoch": 0.3283814849081073, + "grad_norm": 532.4156494140625, + "learning_rate": 3.178735172323939e-06, + "loss": 37.0309, + "step": 81280 + }, + { + "epoch": 0.32842188617347495, + "grad_norm": 144.05352783203125, + "learning_rate": 3.177421769875796e-06, + "loss": 29.7735, + "step": 81290 + }, + { + "epoch": 0.3284622874388426, + "grad_norm": 662.11376953125, + "learning_rate": 3.176108512441839e-06, + "loss": 49.1473, + "step": 81300 + }, + { + "epoch": 0.32850268870421023, + "grad_norm": 401.6919860839844, + "learning_rate": 3.174795400126557e-06, + "loss": 47.5695, + "step": 81310 + }, + { + "epoch": 0.3285430899695779, + "grad_norm": 732.7622680664062, + "learning_rate": 3.173482433034429e-06, + "loss": 41.8201, + "step": 81320 + }, + { + "epoch": 0.32858349123494546, + "grad_norm": 596.14794921875, + "learning_rate": 3.1721696112699217e-06, + "loss": 39.5877, + "step": 81330 + }, + { + "epoch": 0.3286238925003131, + "grad_norm": 818.538818359375, + "learning_rate": 3.1708569349374896e-06, + "loss": 43.1352, + "step": 81340 + }, + { + "epoch": 0.32866429376568074, + "grad_norm": 1098.4739990234375, + "learning_rate": 3.1695444041415757e-06, + "loss": 32.2592, + "step": 81350 + }, + { + "epoch": 0.3287046950310484, + "grad_norm": 402.6197509765625, + "learning_rate": 3.1682320189866133e-06, + "loss": 51.2497, + "step": 81360 + }, + { + "epoch": 0.328745096296416, + "grad_norm": 793.1287841796875, + "learning_rate": 3.1669197795770225e-06, + "loss": 59.4796, + "step": 81370 + }, + { + "epoch": 0.32878549756178366, + "grad_norm": 379.2230224609375, + "learning_rate": 3.165607686017212e-06, + "loss": 36.5757, + "step": 81380 + }, + { + "epoch": 0.32882589882715124, + "grad_norm": 1003.5530395507812, + "learning_rate": 3.164295738411578e-06, + "loss": 40.0238, + "step": 81390 + }, + { + "epoch": 0.3288663000925189, + "grad_norm": 501.1110534667969, + "learning_rate": 3.1629839368645087e-06, + "loss": 41.732, + "step": 81400 + }, + { + "epoch": 0.3289067013578865, + "grad_norm": 8355.318359375, + "learning_rate": 3.161672281480379e-06, + "loss": 61.845, + "step": 81410 + }, + { + "epoch": 0.32894710262325416, + "grad_norm": 376.4793701171875, + "learning_rate": 3.1603607723635455e-06, + "loss": 44.7023, + "step": 81420 + }, + { + "epoch": 0.3289875038886218, + "grad_norm": 326.89520263671875, + "learning_rate": 3.1590494096183643e-06, + "loss": 41.9557, + "step": 81430 + }, + { + "epoch": 0.32902790515398944, + "grad_norm": 659.9878540039062, + "learning_rate": 3.1577381933491718e-06, + "loss": 45.3585, + "step": 81440 + }, + { + "epoch": 0.3290683064193571, + "grad_norm": 671.0325927734375, + "learning_rate": 3.156427123660297e-06, + "loss": 47.1818, + "step": 81450 + }, + { + "epoch": 0.32910870768472467, + "grad_norm": 991.5573120117188, + "learning_rate": 3.1551162006560554e-06, + "loss": 55.3784, + "step": 81460 + }, + { + "epoch": 0.3291491089500923, + "grad_norm": 628.5300903320312, + "learning_rate": 3.15380542444075e-06, + "loss": 53.9291, + "step": 81470 + }, + { + "epoch": 0.32918951021545995, + "grad_norm": 505.6431884765625, + "learning_rate": 3.1524947951186746e-06, + "loss": 31.4605, + "step": 81480 + }, + { + "epoch": 0.3292299114808276, + "grad_norm": 439.1938171386719, + "learning_rate": 3.1511843127941085e-06, + "loss": 53.568, + "step": 81490 + }, + { + "epoch": 0.3292703127461952, + "grad_norm": 600.3242797851562, + "learning_rate": 3.149873977571324e-06, + "loss": 45.2601, + "step": 81500 + }, + { + "epoch": 0.32931071401156287, + "grad_norm": 869.1049194335938, + "learning_rate": 3.148563789554575e-06, + "loss": 42.587, + "step": 81510 + }, + { + "epoch": 0.32935111527693045, + "grad_norm": 645.8557739257812, + "learning_rate": 3.147253748848107e-06, + "loss": 64.908, + "step": 81520 + }, + { + "epoch": 0.3293915165422981, + "grad_norm": 309.96282958984375, + "learning_rate": 3.1459438555561565e-06, + "loss": 40.8375, + "step": 81530 + }, + { + "epoch": 0.32943191780766573, + "grad_norm": 725.4754028320312, + "learning_rate": 3.1446341097829446e-06, + "loss": 37.4378, + "step": 81540 + }, + { + "epoch": 0.32947231907303337, + "grad_norm": 490.93060302734375, + "learning_rate": 3.1433245116326812e-06, + "loss": 46.1421, + "step": 81550 + }, + { + "epoch": 0.329512720338401, + "grad_norm": 952.149658203125, + "learning_rate": 3.1420150612095653e-06, + "loss": 44.979, + "step": 81560 + }, + { + "epoch": 0.32955312160376865, + "grad_norm": 567.7958984375, + "learning_rate": 3.140705758617784e-06, + "loss": 32.8766, + "step": 81570 + }, + { + "epoch": 0.32959352286913624, + "grad_norm": 887.6821899414062, + "learning_rate": 3.139396603961512e-06, + "loss": 41.5468, + "step": 81580 + }, + { + "epoch": 0.3296339241345039, + "grad_norm": 384.929931640625, + "learning_rate": 3.1380875973449155e-06, + "loss": 50.3088, + "step": 81590 + }, + { + "epoch": 0.3296743253998715, + "grad_norm": 1463.9766845703125, + "learning_rate": 3.1367787388721427e-06, + "loss": 42.5035, + "step": 81600 + }, + { + "epoch": 0.32971472666523916, + "grad_norm": 222.37277221679688, + "learning_rate": 3.135470028647334e-06, + "loss": 47.8513, + "step": 81610 + }, + { + "epoch": 0.3297551279306068, + "grad_norm": 515.9407348632812, + "learning_rate": 3.134161466774617e-06, + "loss": 29.2294, + "step": 81620 + }, + { + "epoch": 0.32979552919597444, + "grad_norm": 489.002197265625, + "learning_rate": 3.1328530533581102e-06, + "loss": 43.5699, + "step": 81630 + }, + { + "epoch": 0.3298359304613421, + "grad_norm": 396.6682434082031, + "learning_rate": 3.131544788501917e-06, + "loss": 42.7945, + "step": 81640 + }, + { + "epoch": 0.32987633172670966, + "grad_norm": 813.0165405273438, + "learning_rate": 3.1302366723101294e-06, + "loss": 43.1957, + "step": 81650 + }, + { + "epoch": 0.3299167329920773, + "grad_norm": 106.5440673828125, + "learning_rate": 3.12892870488683e-06, + "loss": 38.8655, + "step": 81660 + }, + { + "epoch": 0.32995713425744494, + "grad_norm": 814.0963134765625, + "learning_rate": 3.1276208863360862e-06, + "loss": 43.5237, + "step": 81670 + }, + { + "epoch": 0.3299975355228126, + "grad_norm": 731.88330078125, + "learning_rate": 3.126313216761955e-06, + "loss": 57.3485, + "step": 81680 + }, + { + "epoch": 0.3300379367881802, + "grad_norm": 706.98779296875, + "learning_rate": 3.125005696268482e-06, + "loss": 59.5241, + "step": 81690 + }, + { + "epoch": 0.33007833805354786, + "grad_norm": 991.2210693359375, + "learning_rate": 3.1236983249597007e-06, + "loss": 54.4068, + "step": 81700 + }, + { + "epoch": 0.33011873931891544, + "grad_norm": 610.1210327148438, + "learning_rate": 3.1223911029396324e-06, + "loss": 53.5363, + "step": 81710 + }, + { + "epoch": 0.3301591405842831, + "grad_norm": 763.7863159179688, + "learning_rate": 3.121084030312286e-06, + "loss": 39.8277, + "step": 81720 + }, + { + "epoch": 0.3301995418496507, + "grad_norm": 429.45367431640625, + "learning_rate": 3.1197771071816617e-06, + "loss": 46.1526, + "step": 81730 + }, + { + "epoch": 0.33023994311501836, + "grad_norm": 561.5693969726562, + "learning_rate": 3.118470333651744e-06, + "loss": 54.1054, + "step": 81740 + }, + { + "epoch": 0.330280344380386, + "grad_norm": 761.9989624023438, + "learning_rate": 3.1171637098265063e-06, + "loss": 50.2312, + "step": 81750 + }, + { + "epoch": 0.33032074564575364, + "grad_norm": 901.3028564453125, + "learning_rate": 3.1158572358099127e-06, + "loss": 49.2848, + "step": 81760 + }, + { + "epoch": 0.3303611469111213, + "grad_norm": 768.6314086914062, + "learning_rate": 3.11455091170591e-06, + "loss": 44.6113, + "step": 81770 + }, + { + "epoch": 0.33040154817648887, + "grad_norm": 368.1278076171875, + "learning_rate": 3.1132447376184383e-06, + "loss": 57.5698, + "step": 81780 + }, + { + "epoch": 0.3304419494418565, + "grad_norm": 644.8842163085938, + "learning_rate": 3.1119387136514246e-06, + "loss": 38.2045, + "step": 81790 + }, + { + "epoch": 0.33048235070722415, + "grad_norm": 889.154296875, + "learning_rate": 3.1106328399087814e-06, + "loss": 47.1308, + "step": 81800 + }, + { + "epoch": 0.3305227519725918, + "grad_norm": 339.5101013183594, + "learning_rate": 3.1093271164944116e-06, + "loss": 48.5115, + "step": 81810 + }, + { + "epoch": 0.33056315323795943, + "grad_norm": 555.5851440429688, + "learning_rate": 3.1080215435122072e-06, + "loss": 45.9561, + "step": 81820 + }, + { + "epoch": 0.33060355450332707, + "grad_norm": 558.2139282226562, + "learning_rate": 3.106716121066046e-06, + "loss": 48.2627, + "step": 81830 + }, + { + "epoch": 0.33064395576869465, + "grad_norm": 889.7554931640625, + "learning_rate": 3.105410849259796e-06, + "loss": 57.0113, + "step": 81840 + }, + { + "epoch": 0.3306843570340623, + "grad_norm": 928.018798828125, + "learning_rate": 3.104105728197306e-06, + "loss": 39.6977, + "step": 81850 + }, + { + "epoch": 0.33072475829942993, + "grad_norm": 594.8426513671875, + "learning_rate": 3.1028007579824234e-06, + "loss": 43.1991, + "step": 81860 + }, + { + "epoch": 0.3307651595647976, + "grad_norm": 367.9489440917969, + "learning_rate": 3.1014959387189774e-06, + "loss": 51.1528, + "step": 81870 + }, + { + "epoch": 0.3308055608301652, + "grad_norm": 727.341552734375, + "learning_rate": 3.1001912705107874e-06, + "loss": 49.7708, + "step": 81880 + }, + { + "epoch": 0.33084596209553285, + "grad_norm": 572.1885375976562, + "learning_rate": 3.0988867534616586e-06, + "loss": 23.4917, + "step": 81890 + }, + { + "epoch": 0.33088636336090044, + "grad_norm": 244.5078125, + "learning_rate": 3.097582387675385e-06, + "loss": 45.9483, + "step": 81900 + }, + { + "epoch": 0.3309267646262681, + "grad_norm": 250.58753967285156, + "learning_rate": 3.09627817325575e-06, + "loss": 48.8653, + "step": 81910 + }, + { + "epoch": 0.3309671658916357, + "grad_norm": 494.48101806640625, + "learning_rate": 3.0949741103065246e-06, + "loss": 48.1861, + "step": 81920 + }, + { + "epoch": 0.33100756715700336, + "grad_norm": 641.30908203125, + "learning_rate": 3.093670198931469e-06, + "loss": 35.6989, + "step": 81930 + }, + { + "epoch": 0.331047968422371, + "grad_norm": 886.3203125, + "learning_rate": 3.0923664392343233e-06, + "loss": 59.5055, + "step": 81940 + }, + { + "epoch": 0.33108836968773864, + "grad_norm": 666.1203002929688, + "learning_rate": 3.091062831318825e-06, + "loss": 37.953, + "step": 81950 + }, + { + "epoch": 0.3311287709531063, + "grad_norm": 341.7474060058594, + "learning_rate": 3.089759375288698e-06, + "loss": 40.19, + "step": 81960 + }, + { + "epoch": 0.33116917221847386, + "grad_norm": 473.49053955078125, + "learning_rate": 3.0884560712476497e-06, + "loss": 48.5036, + "step": 81970 + }, + { + "epoch": 0.3312095734838415, + "grad_norm": 964.3297119140625, + "learning_rate": 3.0871529192993794e-06, + "loss": 56.6274, + "step": 81980 + }, + { + "epoch": 0.33124997474920914, + "grad_norm": 469.9378662109375, + "learning_rate": 3.085849919547572e-06, + "loss": 36.335, + "step": 81990 + }, + { + "epoch": 0.3312903760145768, + "grad_norm": 692.3630981445312, + "learning_rate": 3.0845470720959027e-06, + "loss": 45.9318, + "step": 82000 + }, + { + "epoch": 0.3313307772799444, + "grad_norm": 696.519775390625, + "learning_rate": 3.08324437704803e-06, + "loss": 48.5088, + "step": 82010 + }, + { + "epoch": 0.33137117854531206, + "grad_norm": 556.7713012695312, + "learning_rate": 3.0819418345076095e-06, + "loss": 20.585, + "step": 82020 + }, + { + "epoch": 0.33141157981067965, + "grad_norm": 387.6054992675781, + "learning_rate": 3.080639444578272e-06, + "loss": 42.5861, + "step": 82030 + }, + { + "epoch": 0.3314519810760473, + "grad_norm": 733.8251953125, + "learning_rate": 3.0793372073636455e-06, + "loss": 45.5413, + "step": 82040 + }, + { + "epoch": 0.3314923823414149, + "grad_norm": 1118.459228515625, + "learning_rate": 3.0780351229673423e-06, + "loss": 47.3603, + "step": 82050 + }, + { + "epoch": 0.33153278360678257, + "grad_norm": 620.4292602539062, + "learning_rate": 3.0767331914929638e-06, + "loss": 39.6754, + "step": 82060 + }, + { + "epoch": 0.3315731848721502, + "grad_norm": 351.4848327636719, + "learning_rate": 3.075431413044099e-06, + "loss": 43.1012, + "step": 82070 + }, + { + "epoch": 0.33161358613751785, + "grad_norm": 887.3251342773438, + "learning_rate": 3.074129787724324e-06, + "loss": 44.2201, + "step": 82080 + }, + { + "epoch": 0.33165398740288543, + "grad_norm": 575.6647338867188, + "learning_rate": 3.072828315637203e-06, + "loss": 41.9672, + "step": 82090 + }, + { + "epoch": 0.33169438866825307, + "grad_norm": 275.0605163574219, + "learning_rate": 3.0715269968862898e-06, + "loss": 42.3283, + "step": 82100 + }, + { + "epoch": 0.3317347899336207, + "grad_norm": 374.13812255859375, + "learning_rate": 3.0702258315751223e-06, + "loss": 72.7533, + "step": 82110 + }, + { + "epoch": 0.33177519119898835, + "grad_norm": 677.5405883789062, + "learning_rate": 3.0689248198072282e-06, + "loss": 30.5168, + "step": 82120 + }, + { + "epoch": 0.331815592464356, + "grad_norm": 456.5289306640625, + "learning_rate": 3.0676239616861234e-06, + "loss": 34.6827, + "step": 82130 + }, + { + "epoch": 0.33185599372972363, + "grad_norm": 652.3780517578125, + "learning_rate": 3.066323257315311e-06, + "loss": 44.3979, + "step": 82140 + }, + { + "epoch": 0.33189639499509127, + "grad_norm": 304.3308410644531, + "learning_rate": 3.065022706798284e-06, + "loss": 47.26, + "step": 82150 + }, + { + "epoch": 0.33193679626045886, + "grad_norm": 446.0997314453125, + "learning_rate": 3.06372231023852e-06, + "loss": 36.9697, + "step": 82160 + }, + { + "epoch": 0.3319771975258265, + "grad_norm": 525.1025390625, + "learning_rate": 3.0624220677394854e-06, + "loss": 34.6405, + "step": 82170 + }, + { + "epoch": 0.33201759879119414, + "grad_norm": 621.7910766601562, + "learning_rate": 3.0611219794046344e-06, + "loss": 39.364, + "step": 82180 + }, + { + "epoch": 0.3320580000565618, + "grad_norm": 484.9457702636719, + "learning_rate": 3.05982204533741e-06, + "loss": 39.2439, + "step": 82190 + }, + { + "epoch": 0.3320984013219294, + "grad_norm": 609.784912109375, + "learning_rate": 3.0585222656412406e-06, + "loss": 43.1885, + "step": 82200 + }, + { + "epoch": 0.33213880258729706, + "grad_norm": 746.94580078125, + "learning_rate": 3.0572226404195436e-06, + "loss": 28.1384, + "step": 82210 + }, + { + "epoch": 0.33217920385266464, + "grad_norm": 416.6310119628906, + "learning_rate": 3.055923169775726e-06, + "loss": 71.9208, + "step": 82220 + }, + { + "epoch": 0.3322196051180323, + "grad_norm": 378.2218017578125, + "learning_rate": 3.054623853813179e-06, + "loss": 37.3235, + "step": 82230 + }, + { + "epoch": 0.3322600063833999, + "grad_norm": 425.2323913574219, + "learning_rate": 3.0533246926352834e-06, + "loss": 46.6814, + "step": 82240 + }, + { + "epoch": 0.33230040764876756, + "grad_norm": 664.06982421875, + "learning_rate": 3.0520256863454077e-06, + "loss": 46.2079, + "step": 82250 + }, + { + "epoch": 0.3323408089141352, + "grad_norm": 527.8890380859375, + "learning_rate": 3.05072683504691e-06, + "loss": 33.8584, + "step": 82260 + }, + { + "epoch": 0.33238121017950284, + "grad_norm": 854.1585693359375, + "learning_rate": 3.049428138843133e-06, + "loss": 35.6598, + "step": 82270 + }, + { + "epoch": 0.3324216114448705, + "grad_norm": 596.1838989257812, + "learning_rate": 3.0481295978374037e-06, + "loss": 66.4695, + "step": 82280 + }, + { + "epoch": 0.33246201271023806, + "grad_norm": 691.150146484375, + "learning_rate": 3.0468312121330464e-06, + "loss": 37.7867, + "step": 82290 + }, + { + "epoch": 0.3325024139756057, + "grad_norm": 436.70391845703125, + "learning_rate": 3.0455329818333652e-06, + "loss": 31.3879, + "step": 82300 + }, + { + "epoch": 0.33254281524097334, + "grad_norm": 331.2305603027344, + "learning_rate": 3.044234907041655e-06, + "loss": 39.9435, + "step": 82310 + }, + { + "epoch": 0.332583216506341, + "grad_norm": 519.8748779296875, + "learning_rate": 3.0429369878611968e-06, + "loss": 34.4187, + "step": 82320 + }, + { + "epoch": 0.3326236177717086, + "grad_norm": 582.2870483398438, + "learning_rate": 3.041639224395262e-06, + "loss": 47.4416, + "step": 82330 + }, + { + "epoch": 0.33266401903707626, + "grad_norm": 534.1697998046875, + "learning_rate": 3.0403416167471044e-06, + "loss": 45.1939, + "step": 82340 + }, + { + "epoch": 0.33270442030244385, + "grad_norm": 538.1617431640625, + "learning_rate": 3.0390441650199727e-06, + "loss": 38.0616, + "step": 82350 + }, + { + "epoch": 0.3327448215678115, + "grad_norm": 930.120849609375, + "learning_rate": 3.0377468693170985e-06, + "loss": 48.7219, + "step": 82360 + }, + { + "epoch": 0.33278522283317913, + "grad_norm": 571.488037109375, + "learning_rate": 3.0364497297416973e-06, + "loss": 43.8716, + "step": 82370 + }, + { + "epoch": 0.33282562409854677, + "grad_norm": 642.69970703125, + "learning_rate": 3.035152746396981e-06, + "loss": 50.2554, + "step": 82380 + }, + { + "epoch": 0.3328660253639144, + "grad_norm": 424.82440185546875, + "learning_rate": 3.0338559193861434e-06, + "loss": 40.0815, + "step": 82390 + }, + { + "epoch": 0.33290642662928205, + "grad_norm": 641.3731079101562, + "learning_rate": 3.032559248812367e-06, + "loss": 49.77, + "step": 82400 + }, + { + "epoch": 0.33294682789464963, + "grad_norm": 903.556640625, + "learning_rate": 3.0312627347788208e-06, + "loss": 67.3948, + "step": 82410 + }, + { + "epoch": 0.3329872291600173, + "grad_norm": 711.0341186523438, + "learning_rate": 3.0299663773886646e-06, + "loss": 37.6624, + "step": 82420 + }, + { + "epoch": 0.3330276304253849, + "grad_norm": 830.165283203125, + "learning_rate": 3.0286701767450423e-06, + "loss": 49.855, + "step": 82430 + }, + { + "epoch": 0.33306803169075255, + "grad_norm": 729.2254638671875, + "learning_rate": 3.0273741329510852e-06, + "loss": 45.3208, + "step": 82440 + }, + { + "epoch": 0.3331084329561202, + "grad_norm": 316.6321716308594, + "learning_rate": 3.0260782461099192e-06, + "loss": 43.257, + "step": 82450 + }, + { + "epoch": 0.33314883422148783, + "grad_norm": 592.7379760742188, + "learning_rate": 3.024782516324645e-06, + "loss": 44.4966, + "step": 82460 + }, + { + "epoch": 0.3331892354868555, + "grad_norm": 571.3594360351562, + "learning_rate": 3.0234869436983606e-06, + "loss": 56.4878, + "step": 82470 + }, + { + "epoch": 0.33322963675222306, + "grad_norm": 375.4861145019531, + "learning_rate": 3.02219152833415e-06, + "loss": 62.9724, + "step": 82480 + }, + { + "epoch": 0.3332700380175907, + "grad_norm": 466.45416259765625, + "learning_rate": 3.0208962703350832e-06, + "loss": 59.3834, + "step": 82490 + }, + { + "epoch": 0.33331043928295834, + "grad_norm": 544.7888793945312, + "learning_rate": 3.019601169804216e-06, + "loss": 35.7769, + "step": 82500 + }, + { + "epoch": 0.333350840548326, + "grad_norm": 743.954833984375, + "learning_rate": 3.0183062268445964e-06, + "loss": 44.2142, + "step": 82510 + }, + { + "epoch": 0.3333912418136936, + "grad_norm": 448.221923828125, + "learning_rate": 3.0170114415592543e-06, + "loss": 39.1324, + "step": 82520 + }, + { + "epoch": 0.33343164307906126, + "grad_norm": 653.385986328125, + "learning_rate": 3.015716814051213e-06, + "loss": 40.8635, + "step": 82530 + }, + { + "epoch": 0.33347204434442884, + "grad_norm": 776.0372924804688, + "learning_rate": 3.0144223444234767e-06, + "loss": 34.3393, + "step": 82540 + }, + { + "epoch": 0.3335124456097965, + "grad_norm": 789.0279541015625, + "learning_rate": 3.0131280327790412e-06, + "loss": 59.5271, + "step": 82550 + }, + { + "epoch": 0.3335528468751641, + "grad_norm": 619.6022338867188, + "learning_rate": 3.0118338792208912e-06, + "loss": 29.2193, + "step": 82560 + }, + { + "epoch": 0.33359324814053176, + "grad_norm": 437.3066101074219, + "learning_rate": 3.010539883851993e-06, + "loss": 42.7655, + "step": 82570 + }, + { + "epoch": 0.3336336494058994, + "grad_norm": 492.7397766113281, + "learning_rate": 3.009246046775307e-06, + "loss": 30.9015, + "step": 82580 + }, + { + "epoch": 0.33367405067126704, + "grad_norm": 699.6427001953125, + "learning_rate": 3.0079523680937766e-06, + "loss": 60.2017, + "step": 82590 + }, + { + "epoch": 0.3337144519366347, + "grad_norm": 835.25244140625, + "learning_rate": 3.006658847910334e-06, + "loss": 43.552, + "step": 82600 + }, + { + "epoch": 0.33375485320200227, + "grad_norm": 489.4479064941406, + "learning_rate": 3.005365486327899e-06, + "loss": 29.7535, + "step": 82610 + }, + { + "epoch": 0.3337952544673699, + "grad_norm": 289.3746032714844, + "learning_rate": 3.004072283449379e-06, + "loss": 34.3409, + "step": 82620 + }, + { + "epoch": 0.33383565573273755, + "grad_norm": 358.6887512207031, + "learning_rate": 3.0027792393776666e-06, + "loss": 42.0769, + "step": 82630 + }, + { + "epoch": 0.3338760569981052, + "grad_norm": 478.79754638671875, + "learning_rate": 3.001486354215644e-06, + "loss": 41.234, + "step": 82640 + }, + { + "epoch": 0.3339164582634728, + "grad_norm": 336.5907897949219, + "learning_rate": 3.0001936280661794e-06, + "loss": 49.105, + "step": 82650 + }, + { + "epoch": 0.33395685952884047, + "grad_norm": 326.53662109375, + "learning_rate": 2.998901061032131e-06, + "loss": 46.3112, + "step": 82660 + }, + { + "epoch": 0.33399726079420805, + "grad_norm": 877.6404418945312, + "learning_rate": 2.9976086532163397e-06, + "loss": 46.8901, + "step": 82670 + }, + { + "epoch": 0.3340376620595757, + "grad_norm": 372.0155334472656, + "learning_rate": 2.9963164047216397e-06, + "loss": 39.4705, + "step": 82680 + }, + { + "epoch": 0.33407806332494333, + "grad_norm": 940.6842651367188, + "learning_rate": 2.9950243156508473e-06, + "loss": 39.6171, + "step": 82690 + }, + { + "epoch": 0.33411846459031097, + "grad_norm": 433.3446350097656, + "learning_rate": 2.9937323861067695e-06, + "loss": 25.4052, + "step": 82700 + }, + { + "epoch": 0.3341588658556786, + "grad_norm": 868.9320678710938, + "learning_rate": 2.992440616192197e-06, + "loss": 36.8388, + "step": 82710 + }, + { + "epoch": 0.33419926712104625, + "grad_norm": 717.5938720703125, + "learning_rate": 2.9911490060099117e-06, + "loss": 53.1356, + "step": 82720 + }, + { + "epoch": 0.33423966838641384, + "grad_norm": 458.17218017578125, + "learning_rate": 2.9898575556626807e-06, + "loss": 60.6068, + "step": 82730 + }, + { + "epoch": 0.3342800696517815, + "grad_norm": 542.5181274414062, + "learning_rate": 2.9885662652532586e-06, + "loss": 37.2215, + "step": 82740 + }, + { + "epoch": 0.3343204709171491, + "grad_norm": 611.09228515625, + "learning_rate": 2.9872751348843875e-06, + "loss": 38.3881, + "step": 82750 + }, + { + "epoch": 0.33436087218251676, + "grad_norm": 958.848876953125, + "learning_rate": 2.985984164658796e-06, + "loss": 43.9771, + "step": 82760 + }, + { + "epoch": 0.3344012734478844, + "grad_norm": 751.4885864257812, + "learning_rate": 2.9846933546792012e-06, + "loss": 45.087, + "step": 82770 + }, + { + "epoch": 0.33444167471325204, + "grad_norm": 444.13800048828125, + "learning_rate": 2.9834027050483085e-06, + "loss": 33.4151, + "step": 82780 + }, + { + "epoch": 0.3344820759786197, + "grad_norm": 975.753662109375, + "learning_rate": 2.9821122158688086e-06, + "loss": 37.9123, + "step": 82790 + }, + { + "epoch": 0.33452247724398726, + "grad_norm": 350.3032531738281, + "learning_rate": 2.980821887243377e-06, + "loss": 41.8596, + "step": 82800 + }, + { + "epoch": 0.3345628785093549, + "grad_norm": 494.2857360839844, + "learning_rate": 2.979531719274681e-06, + "loss": 42.1246, + "step": 82810 + }, + { + "epoch": 0.33460327977472254, + "grad_norm": 386.30780029296875, + "learning_rate": 2.978241712065374e-06, + "loss": 44.5044, + "step": 82820 + }, + { + "epoch": 0.3346436810400902, + "grad_norm": 1870.94287109375, + "learning_rate": 2.9769518657180953e-06, + "loss": 47.9243, + "step": 82830 + }, + { + "epoch": 0.3346840823054578, + "grad_norm": 412.831298828125, + "learning_rate": 2.9756621803354722e-06, + "loss": 44.2663, + "step": 82840 + }, + { + "epoch": 0.33472448357082546, + "grad_norm": 748.1416015625, + "learning_rate": 2.9743726560201185e-06, + "loss": 47.1023, + "step": 82850 + }, + { + "epoch": 0.33476488483619304, + "grad_norm": 672.1915893554688, + "learning_rate": 2.9730832928746355e-06, + "loss": 40.4036, + "step": 82860 + }, + { + "epoch": 0.3348052861015607, + "grad_norm": 468.0141296386719, + "learning_rate": 2.9717940910016135e-06, + "loss": 45.7574, + "step": 82870 + }, + { + "epoch": 0.3348456873669283, + "grad_norm": 888.3388671875, + "learning_rate": 2.9705050505036294e-06, + "loss": 38.8895, + "step": 82880 + }, + { + "epoch": 0.33488608863229596, + "grad_norm": 371.0845947265625, + "learning_rate": 2.9692161714832422e-06, + "loss": 51.6445, + "step": 82890 + }, + { + "epoch": 0.3349264898976636, + "grad_norm": 931.8453979492188, + "learning_rate": 2.9679274540430037e-06, + "loss": 38.1988, + "step": 82900 + }, + { + "epoch": 0.33496689116303124, + "grad_norm": 438.6957702636719, + "learning_rate": 2.966638898285452e-06, + "loss": 40.7775, + "step": 82910 + }, + { + "epoch": 0.3350072924283989, + "grad_norm": 355.041015625, + "learning_rate": 2.9653505043131125e-06, + "loss": 36.4231, + "step": 82920 + }, + { + "epoch": 0.33504769369376647, + "grad_norm": 220.08999633789062, + "learning_rate": 2.9640622722284944e-06, + "loss": 44.5257, + "step": 82930 + }, + { + "epoch": 0.3350880949591341, + "grad_norm": 300.3365783691406, + "learning_rate": 2.962774202134098e-06, + "loss": 44.7493, + "step": 82940 + }, + { + "epoch": 0.33512849622450175, + "grad_norm": 364.51422119140625, + "learning_rate": 2.961486294132409e-06, + "loss": 47.445, + "step": 82950 + }, + { + "epoch": 0.3351688974898694, + "grad_norm": 754.2266235351562, + "learning_rate": 2.960198548325901e-06, + "loss": 40.6013, + "step": 82960 + }, + { + "epoch": 0.33520929875523703, + "grad_norm": 781.6005859375, + "learning_rate": 2.958910964817032e-06, + "loss": 39.7547, + "step": 82970 + }, + { + "epoch": 0.33524970002060467, + "grad_norm": 538.5778198242188, + "learning_rate": 2.9576235437082502e-06, + "loss": 37.5946, + "step": 82980 + }, + { + "epoch": 0.33529010128597225, + "grad_norm": 514.644287109375, + "learning_rate": 2.9563362851019893e-06, + "loss": 68.5898, + "step": 82990 + }, + { + "epoch": 0.3353305025513399, + "grad_norm": 700.5049438476562, + "learning_rate": 2.9550491891006704e-06, + "loss": 45.753, + "step": 83000 + }, + { + "epoch": 0.33537090381670753, + "grad_norm": 443.4197998046875, + "learning_rate": 2.9537622558067036e-06, + "loss": 42.1891, + "step": 83010 + }, + { + "epoch": 0.3354113050820752, + "grad_norm": 298.11846923828125, + "learning_rate": 2.9524754853224837e-06, + "loss": 46.5696, + "step": 83020 + }, + { + "epoch": 0.3354517063474428, + "grad_norm": 699.7203369140625, + "learning_rate": 2.9511888777503916e-06, + "loss": 41.8185, + "step": 83030 + }, + { + "epoch": 0.33549210761281045, + "grad_norm": 1089.260009765625, + "learning_rate": 2.949902433192798e-06, + "loss": 38.7898, + "step": 83040 + }, + { + "epoch": 0.33553250887817804, + "grad_norm": 848.50244140625, + "learning_rate": 2.94861615175206e-06, + "loss": 36.6572, + "step": 83050 + }, + { + "epoch": 0.3355729101435457, + "grad_norm": 503.3612976074219, + "learning_rate": 2.9473300335305193e-06, + "loss": 41.0212, + "step": 83060 + }, + { + "epoch": 0.3356133114089133, + "grad_norm": 485.5074768066406, + "learning_rate": 2.946044078630508e-06, + "loss": 59.2754, + "step": 83070 + }, + { + "epoch": 0.33565371267428096, + "grad_norm": 322.2576599121094, + "learning_rate": 2.9447582871543423e-06, + "loss": 35.568, + "step": 83080 + }, + { + "epoch": 0.3356941139396486, + "grad_norm": 496.3887939453125, + "learning_rate": 2.9434726592043263e-06, + "loss": 45.2664, + "step": 83090 + }, + { + "epoch": 0.33573451520501624, + "grad_norm": 578.519287109375, + "learning_rate": 2.942187194882754e-06, + "loss": 39.4166, + "step": 83100 + }, + { + "epoch": 0.3357749164703839, + "grad_norm": 889.9583740234375, + "learning_rate": 2.940901894291902e-06, + "loss": 43.7717, + "step": 83110 + }, + { + "epoch": 0.33581531773575146, + "grad_norm": 479.1361083984375, + "learning_rate": 2.939616757534037e-06, + "loss": 44.8988, + "step": 83120 + }, + { + "epoch": 0.3358557190011191, + "grad_norm": 570.2633666992188, + "learning_rate": 2.938331784711411e-06, + "loss": 39.3279, + "step": 83130 + }, + { + "epoch": 0.33589612026648674, + "grad_norm": 812.4302368164062, + "learning_rate": 2.937046975926262e-06, + "loss": 54.3701, + "step": 83140 + }, + { + "epoch": 0.3359365215318544, + "grad_norm": 727.7770385742188, + "learning_rate": 2.9357623312808183e-06, + "loss": 44.7289, + "step": 83150 + }, + { + "epoch": 0.335976922797222, + "grad_norm": 206.3565216064453, + "learning_rate": 2.934477850877292e-06, + "loss": 41.506, + "step": 83160 + }, + { + "epoch": 0.33601732406258966, + "grad_norm": 480.6734924316406, + "learning_rate": 2.9331935348178838e-06, + "loss": 33.4442, + "step": 83170 + }, + { + "epoch": 0.33605772532795725, + "grad_norm": 1374.9521484375, + "learning_rate": 2.931909383204781e-06, + "loss": 71.191, + "step": 83180 + }, + { + "epoch": 0.3360981265933249, + "grad_norm": 594.8909301757812, + "learning_rate": 2.9306253961401553e-06, + "loss": 38.9371, + "step": 83190 + }, + { + "epoch": 0.3361385278586925, + "grad_norm": 477.9680480957031, + "learning_rate": 2.929341573726171e-06, + "loss": 39.7385, + "step": 83200 + }, + { + "epoch": 0.33617892912406017, + "grad_norm": 692.1814575195312, + "learning_rate": 2.928057916064975e-06, + "loss": 45.9146, + "step": 83210 + }, + { + "epoch": 0.3362193303894278, + "grad_norm": 755.2357788085938, + "learning_rate": 2.9267744232587035e-06, + "loss": 51.6069, + "step": 83220 + }, + { + "epoch": 0.33625973165479545, + "grad_norm": 175.2359619140625, + "learning_rate": 2.925491095409473e-06, + "loss": 33.7543, + "step": 83230 + }, + { + "epoch": 0.3363001329201631, + "grad_norm": 1155.9080810546875, + "learning_rate": 2.924207932619397e-06, + "loss": 40.8623, + "step": 83240 + }, + { + "epoch": 0.33634053418553067, + "grad_norm": 795.6212768554688, + "learning_rate": 2.9229249349905686e-06, + "loss": 38.2946, + "step": 83250 + }, + { + "epoch": 0.3363809354508983, + "grad_norm": 507.70013427734375, + "learning_rate": 2.9216421026250707e-06, + "loss": 58.5413, + "step": 83260 + }, + { + "epoch": 0.33642133671626595, + "grad_norm": 418.37408447265625, + "learning_rate": 2.9203594356249726e-06, + "loss": 40.5312, + "step": 83270 + }, + { + "epoch": 0.3364617379816336, + "grad_norm": 339.6582946777344, + "learning_rate": 2.919076934092329e-06, + "loss": 43.9843, + "step": 83280 + }, + { + "epoch": 0.33650213924700123, + "grad_norm": 544.9318237304688, + "learning_rate": 2.9177945981291843e-06, + "loss": 31.948, + "step": 83290 + }, + { + "epoch": 0.33654254051236887, + "grad_norm": 1640.63818359375, + "learning_rate": 2.916512427837568e-06, + "loss": 47.5451, + "step": 83300 + }, + { + "epoch": 0.33658294177773646, + "grad_norm": 394.36474609375, + "learning_rate": 2.9152304233194974e-06, + "loss": 36.8862, + "step": 83310 + }, + { + "epoch": 0.3366233430431041, + "grad_norm": 240.1521759033203, + "learning_rate": 2.9139485846769723e-06, + "loss": 33.977, + "step": 83320 + }, + { + "epoch": 0.33666374430847174, + "grad_norm": 683.187255859375, + "learning_rate": 2.9126669120119846e-06, + "loss": 45.2668, + "step": 83330 + }, + { + "epoch": 0.3367041455738394, + "grad_norm": 715.0475463867188, + "learning_rate": 2.9113854054265112e-06, + "loss": 46.4243, + "step": 83340 + }, + { + "epoch": 0.336744546839207, + "grad_norm": 333.6781921386719, + "learning_rate": 2.9101040650225155e-06, + "loss": 43.0255, + "step": 83350 + }, + { + "epoch": 0.33678494810457466, + "grad_norm": 460.63641357421875, + "learning_rate": 2.9088228909019455e-06, + "loss": 44.8078, + "step": 83360 + }, + { + "epoch": 0.33682534936994224, + "grad_norm": 1160.0489501953125, + "learning_rate": 2.9075418831667436e-06, + "loss": 56.1693, + "step": 83370 + }, + { + "epoch": 0.3368657506353099, + "grad_norm": 558.1758422851562, + "learning_rate": 2.906261041918831e-06, + "loss": 43.3211, + "step": 83380 + }, + { + "epoch": 0.3369061519006775, + "grad_norm": 640.573974609375, + "learning_rate": 2.90498036726012e-06, + "loss": 39.1272, + "step": 83390 + }, + { + "epoch": 0.33694655316604516, + "grad_norm": 498.0477294921875, + "learning_rate": 2.903699859292505e-06, + "loss": 35.0058, + "step": 83400 + }, + { + "epoch": 0.3369869544314128, + "grad_norm": 343.3585510253906, + "learning_rate": 2.9024195181178704e-06, + "loss": 54.491, + "step": 83410 + }, + { + "epoch": 0.33702735569678044, + "grad_norm": 537.1847534179688, + "learning_rate": 2.9011393438380884e-06, + "loss": 28.7002, + "step": 83420 + }, + { + "epoch": 0.3370677569621481, + "grad_norm": 502.0111999511719, + "learning_rate": 2.8998593365550178e-06, + "loss": 29.3126, + "step": 83430 + }, + { + "epoch": 0.33710815822751566, + "grad_norm": 477.94482421875, + "learning_rate": 2.8985794963704992e-06, + "loss": 44.7282, + "step": 83440 + }, + { + "epoch": 0.3371485594928833, + "grad_norm": 167.1127166748047, + "learning_rate": 2.8972998233863657e-06, + "loss": 31.71, + "step": 83450 + }, + { + "epoch": 0.33718896075825094, + "grad_norm": 982.8727416992188, + "learning_rate": 2.8960203177044364e-06, + "loss": 52.744, + "step": 83460 + }, + { + "epoch": 0.3372293620236186, + "grad_norm": 587.1280517578125, + "learning_rate": 2.8947409794265146e-06, + "loss": 45.7435, + "step": 83470 + }, + { + "epoch": 0.3372697632889862, + "grad_norm": 859.0236206054688, + "learning_rate": 2.893461808654393e-06, + "loss": 36.1571, + "step": 83480 + }, + { + "epoch": 0.33731016455435386, + "grad_norm": 489.2738037109375, + "learning_rate": 2.892182805489846e-06, + "loss": 51.3487, + "step": 83490 + }, + { + "epoch": 0.33735056581972145, + "grad_norm": 716.7078857421875, + "learning_rate": 2.8909039700346385e-06, + "loss": 33.1731, + "step": 83500 + }, + { + "epoch": 0.3373909670850891, + "grad_norm": 479.42559814453125, + "learning_rate": 2.889625302390524e-06, + "loss": 32.8824, + "step": 83510 + }, + { + "epoch": 0.33743136835045673, + "grad_norm": 645.82275390625, + "learning_rate": 2.8883468026592382e-06, + "loss": 48.3588, + "step": 83520 + }, + { + "epoch": 0.33747176961582437, + "grad_norm": 285.9710388183594, + "learning_rate": 2.8870684709425063e-06, + "loss": 46.8221, + "step": 83530 + }, + { + "epoch": 0.337512170881192, + "grad_norm": 698.7156982421875, + "learning_rate": 2.885790307342039e-06, + "loss": 39.8056, + "step": 83540 + }, + { + "epoch": 0.33755257214655965, + "grad_norm": 644.73974609375, + "learning_rate": 2.884512311959532e-06, + "loss": 55.1326, + "step": 83550 + }, + { + "epoch": 0.3375929734119273, + "grad_norm": 595.5823974609375, + "learning_rate": 2.8832344848966758e-06, + "loss": 48.1498, + "step": 83560 + }, + { + "epoch": 0.3376333746772949, + "grad_norm": 777.5430908203125, + "learning_rate": 2.8819568262551344e-06, + "loss": 47.8501, + "step": 83570 + }, + { + "epoch": 0.3376737759426625, + "grad_norm": 695.3916015625, + "learning_rate": 2.8806793361365686e-06, + "loss": 63.2564, + "step": 83580 + }, + { + "epoch": 0.33771417720803015, + "grad_norm": 802.4063720703125, + "learning_rate": 2.8794020146426217e-06, + "loss": 48.957, + "step": 83590 + }, + { + "epoch": 0.3377545784733978, + "grad_norm": 430.29315185546875, + "learning_rate": 2.8781248618749235e-06, + "loss": 40.1431, + "step": 83600 + }, + { + "epoch": 0.33779497973876543, + "grad_norm": 359.31207275390625, + "learning_rate": 2.8768478779350927e-06, + "loss": 33.8353, + "step": 83610 + }, + { + "epoch": 0.3378353810041331, + "grad_norm": 204.70376586914062, + "learning_rate": 2.875571062924732e-06, + "loss": 46.5473, + "step": 83620 + }, + { + "epoch": 0.33787578226950066, + "grad_norm": 780.0933227539062, + "learning_rate": 2.874294416945432e-06, + "loss": 40.4211, + "step": 83630 + }, + { + "epoch": 0.3379161835348683, + "grad_norm": 535.6966552734375, + "learning_rate": 2.8730179400987697e-06, + "loss": 67.5744, + "step": 83640 + }, + { + "epoch": 0.33795658480023594, + "grad_norm": 576.902587890625, + "learning_rate": 2.871741632486308e-06, + "loss": 34.0992, + "step": 83650 + }, + { + "epoch": 0.3379969860656036, + "grad_norm": 611.9027709960938, + "learning_rate": 2.8704654942095977e-06, + "loss": 26.9851, + "step": 83660 + }, + { + "epoch": 0.3380373873309712, + "grad_norm": 414.39654541015625, + "learning_rate": 2.869189525370174e-06, + "loss": 43.7683, + "step": 83670 + }, + { + "epoch": 0.33807778859633886, + "grad_norm": 864.5162963867188, + "learning_rate": 2.8679137260695614e-06, + "loss": 53.398, + "step": 83680 + }, + { + "epoch": 0.33811818986170644, + "grad_norm": 492.2377624511719, + "learning_rate": 2.866638096409269e-06, + "loss": 34.3411, + "step": 83690 + }, + { + "epoch": 0.3381585911270741, + "grad_norm": 759.2435302734375, + "learning_rate": 2.8653626364907918e-06, + "loss": 41.1706, + "step": 83700 + }, + { + "epoch": 0.3381989923924417, + "grad_norm": 688.8447265625, + "learning_rate": 2.8640873464156127e-06, + "loss": 49.1247, + "step": 83710 + }, + { + "epoch": 0.33823939365780936, + "grad_norm": 655.9478759765625, + "learning_rate": 2.8628122262852015e-06, + "loss": 43.5474, + "step": 83720 + }, + { + "epoch": 0.338279794923177, + "grad_norm": 255.67202758789062, + "learning_rate": 2.861537276201013e-06, + "loss": 28.4624, + "step": 83730 + }, + { + "epoch": 0.33832019618854464, + "grad_norm": 987.0891723632812, + "learning_rate": 2.860262496264489e-06, + "loss": 47.4445, + "step": 83740 + }, + { + "epoch": 0.3383605974539123, + "grad_norm": 204.87025451660156, + "learning_rate": 2.858987886577058e-06, + "loss": 69.179, + "step": 83750 + }, + { + "epoch": 0.33840099871927987, + "grad_norm": 296.1997375488281, + "learning_rate": 2.857713447240135e-06, + "loss": 64.5847, + "step": 83760 + }, + { + "epoch": 0.3384413999846475, + "grad_norm": 541.6371459960938, + "learning_rate": 2.8564391783551214e-06, + "loss": 43.0074, + "step": 83770 + }, + { + "epoch": 0.33848180125001515, + "grad_norm": 551.6318359375, + "learning_rate": 2.855165080023405e-06, + "loss": 34.2838, + "step": 83780 + }, + { + "epoch": 0.3385222025153828, + "grad_norm": 357.3740234375, + "learning_rate": 2.85389115234636e-06, + "loss": 33.5051, + "step": 83790 + }, + { + "epoch": 0.3385626037807504, + "grad_norm": 280.4405822753906, + "learning_rate": 2.8526173954253458e-06, + "loss": 35.4893, + "step": 83800 + }, + { + "epoch": 0.33860300504611807, + "grad_norm": 968.9623413085938, + "learning_rate": 2.8513438093617107e-06, + "loss": 53.0172, + "step": 83810 + }, + { + "epoch": 0.33864340631148565, + "grad_norm": 247.39732360839844, + "learning_rate": 2.8500703942567874e-06, + "loss": 36.6919, + "step": 83820 + }, + { + "epoch": 0.3386838075768533, + "grad_norm": 554.3946533203125, + "learning_rate": 2.848797150211896e-06, + "loss": 39.9293, + "step": 83830 + }, + { + "epoch": 0.33872420884222093, + "grad_norm": 329.2684020996094, + "learning_rate": 2.847524077328343e-06, + "loss": 44.9997, + "step": 83840 + }, + { + "epoch": 0.33876461010758857, + "grad_norm": 585.6620483398438, + "learning_rate": 2.8462511757074205e-06, + "loss": 42.0342, + "step": 83850 + }, + { + "epoch": 0.3388050113729562, + "grad_norm": 481.63720703125, + "learning_rate": 2.844978445450408e-06, + "loss": 39.1469, + "step": 83860 + }, + { + "epoch": 0.33884541263832385, + "grad_norm": 857.4867553710938, + "learning_rate": 2.8437058866585698e-06, + "loss": 46.5103, + "step": 83870 + }, + { + "epoch": 0.3388858139036915, + "grad_norm": 676.7787475585938, + "learning_rate": 2.842433499433158e-06, + "loss": 52.469, + "step": 83880 + }, + { + "epoch": 0.3389262151690591, + "grad_norm": 746.2361450195312, + "learning_rate": 2.841161283875411e-06, + "loss": 30.7549, + "step": 83890 + }, + { + "epoch": 0.3389666164344267, + "grad_norm": 416.3562316894531, + "learning_rate": 2.8398892400865537e-06, + "loss": 41.4338, + "step": 83900 + }, + { + "epoch": 0.33900701769979436, + "grad_norm": 398.10546875, + "learning_rate": 2.838617368167797e-06, + "loss": 56.6096, + "step": 83910 + }, + { + "epoch": 0.339047418965162, + "grad_norm": 443.0265808105469, + "learning_rate": 2.837345668220333e-06, + "loss": 57.8073, + "step": 83920 + }, + { + "epoch": 0.33908782023052964, + "grad_norm": 297.83135986328125, + "learning_rate": 2.836074140345352e-06, + "loss": 22.5456, + "step": 83930 + }, + { + "epoch": 0.3391282214958973, + "grad_norm": 1189.527587890625, + "learning_rate": 2.834802784644019e-06, + "loss": 78.1559, + "step": 83940 + }, + { + "epoch": 0.33916862276126486, + "grad_norm": 482.82147216796875, + "learning_rate": 2.8335316012174925e-06, + "loss": 21.9731, + "step": 83950 + }, + { + "epoch": 0.3392090240266325, + "grad_norm": 192.12002563476562, + "learning_rate": 2.8322605901669133e-06, + "loss": 42.8519, + "step": 83960 + }, + { + "epoch": 0.33924942529200014, + "grad_norm": 912.255126953125, + "learning_rate": 2.8309897515934104e-06, + "loss": 74.6317, + "step": 83970 + }, + { + "epoch": 0.3392898265573678, + "grad_norm": 405.94451904296875, + "learning_rate": 2.8297190855980987e-06, + "loss": 48.1218, + "step": 83980 + }, + { + "epoch": 0.3393302278227354, + "grad_norm": 685.3220825195312, + "learning_rate": 2.8284485922820814e-06, + "loss": 42.0125, + "step": 83990 + }, + { + "epoch": 0.33937062908810306, + "grad_norm": 376.51837158203125, + "learning_rate": 2.8271782717464413e-06, + "loss": 41.4176, + "step": 84000 + }, + { + "epoch": 0.33941103035347064, + "grad_norm": 305.7981872558594, + "learning_rate": 2.8259081240922522e-06, + "loss": 30.9832, + "step": 84010 + }, + { + "epoch": 0.3394514316188383, + "grad_norm": 537.4862670898438, + "learning_rate": 2.8246381494205775e-06, + "loss": 56.0932, + "step": 84020 + }, + { + "epoch": 0.3394918328842059, + "grad_norm": 712.7220458984375, + "learning_rate": 2.8233683478324627e-06, + "loss": 52.1589, + "step": 84030 + }, + { + "epoch": 0.33953223414957356, + "grad_norm": 647.290283203125, + "learning_rate": 2.822098719428938e-06, + "loss": 42.8148, + "step": 84040 + }, + { + "epoch": 0.3395726354149412, + "grad_norm": 1051.1763916015625, + "learning_rate": 2.8208292643110237e-06, + "loss": 49.4385, + "step": 84050 + }, + { + "epoch": 0.33961303668030884, + "grad_norm": 846.8150024414062, + "learning_rate": 2.8195599825797233e-06, + "loss": 46.9994, + "step": 84060 + }, + { + "epoch": 0.3396534379456765, + "grad_norm": 455.5355224609375, + "learning_rate": 2.818290874336028e-06, + "loss": 37.593, + "step": 84070 + }, + { + "epoch": 0.33969383921104407, + "grad_norm": 1034.6007080078125, + "learning_rate": 2.817021939680918e-06, + "loss": 38.3971, + "step": 84080 + }, + { + "epoch": 0.3397342404764117, + "grad_norm": 585.2464599609375, + "learning_rate": 2.8157531787153515e-06, + "loss": 46.1575, + "step": 84090 + }, + { + "epoch": 0.33977464174177935, + "grad_norm": 437.1051940917969, + "learning_rate": 2.8144845915402796e-06, + "loss": 51.5693, + "step": 84100 + }, + { + "epoch": 0.339815043007147, + "grad_norm": 694.668701171875, + "learning_rate": 2.813216178256637e-06, + "loss": 40.9471, + "step": 84110 + }, + { + "epoch": 0.33985544427251463, + "grad_norm": 698.83544921875, + "learning_rate": 2.8119479389653492e-06, + "loss": 44.3773, + "step": 84120 + }, + { + "epoch": 0.33989584553788227, + "grad_norm": 734.264892578125, + "learning_rate": 2.8106798737673223e-06, + "loss": 35.5071, + "step": 84130 + }, + { + "epoch": 0.33993624680324985, + "grad_norm": 631.1513671875, + "learning_rate": 2.8094119827634496e-06, + "loss": 38.4813, + "step": 84140 + }, + { + "epoch": 0.3399766480686175, + "grad_norm": 497.7986145019531, + "learning_rate": 2.8081442660546126e-06, + "loss": 32.6974, + "step": 84150 + }, + { + "epoch": 0.34001704933398513, + "grad_norm": 440.0523376464844, + "learning_rate": 2.806876723741677e-06, + "loss": 38.9369, + "step": 84160 + }, + { + "epoch": 0.3400574505993528, + "grad_norm": 932.92578125, + "learning_rate": 2.805609355925497e-06, + "loss": 35.6964, + "step": 84170 + }, + { + "epoch": 0.3400978518647204, + "grad_norm": 119.84386444091797, + "learning_rate": 2.8043421627069077e-06, + "loss": 43.8795, + "step": 84180 + }, + { + "epoch": 0.34013825313008805, + "grad_norm": 854.2145385742188, + "learning_rate": 2.8030751441867364e-06, + "loss": 44.9602, + "step": 84190 + }, + { + "epoch": 0.3401786543954557, + "grad_norm": 480.2217712402344, + "learning_rate": 2.8018083004657924e-06, + "loss": 43.9618, + "step": 84200 + }, + { + "epoch": 0.3402190556608233, + "grad_norm": 560.4413452148438, + "learning_rate": 2.800541631644873e-06, + "loss": 48.4142, + "step": 84210 + }, + { + "epoch": 0.3402594569261909, + "grad_norm": 828.934326171875, + "learning_rate": 2.7992751378247627e-06, + "loss": 37.967, + "step": 84220 + }, + { + "epoch": 0.34029985819155856, + "grad_norm": 965.2238159179688, + "learning_rate": 2.79800881910623e-06, + "loss": 46.0749, + "step": 84230 + }, + { + "epoch": 0.3403402594569262, + "grad_norm": 784.2520141601562, + "learning_rate": 2.7967426755900293e-06, + "loss": 32.5352, + "step": 84240 + }, + { + "epoch": 0.34038066072229384, + "grad_norm": 514.36328125, + "learning_rate": 2.795476707376905e-06, + "loss": 38.8823, + "step": 84250 + }, + { + "epoch": 0.3404210619876615, + "grad_norm": 858.2100219726562, + "learning_rate": 2.79421091456758e-06, + "loss": 61.8575, + "step": 84260 + }, + { + "epoch": 0.34046146325302906, + "grad_norm": 581.9533081054688, + "learning_rate": 2.7929452972627685e-06, + "loss": 35.4017, + "step": 84270 + }, + { + "epoch": 0.3405018645183967, + "grad_norm": 777.8386840820312, + "learning_rate": 2.791679855563171e-06, + "loss": 31.8655, + "step": 84280 + }, + { + "epoch": 0.34054226578376434, + "grad_norm": 607.4931640625, + "learning_rate": 2.790414589569473e-06, + "loss": 46.2957, + "step": 84290 + }, + { + "epoch": 0.340582667049132, + "grad_norm": 752.4151611328125, + "learning_rate": 2.789149499382345e-06, + "loss": 50.8046, + "step": 84300 + }, + { + "epoch": 0.3406230683144996, + "grad_norm": 243.44146728515625, + "learning_rate": 2.7878845851024426e-06, + "loss": 56.0194, + "step": 84310 + }, + { + "epoch": 0.34066346957986726, + "grad_norm": 934.1202392578125, + "learning_rate": 2.786619846830414e-06, + "loss": 60.4464, + "step": 84320 + }, + { + "epoch": 0.34070387084523485, + "grad_norm": 461.3244934082031, + "learning_rate": 2.7853552846668865e-06, + "loss": 50.0204, + "step": 84330 + }, + { + "epoch": 0.3407442721106025, + "grad_norm": 704.9860229492188, + "learning_rate": 2.784090898712476e-06, + "loss": 39.5185, + "step": 84340 + }, + { + "epoch": 0.3407846733759701, + "grad_norm": 841.1322021484375, + "learning_rate": 2.7828266890677825e-06, + "loss": 48.853, + "step": 84350 + }, + { + "epoch": 0.34082507464133777, + "grad_norm": 707.6873779296875, + "learning_rate": 2.781562655833393e-06, + "loss": 41.3904, + "step": 84360 + }, + { + "epoch": 0.3408654759067054, + "grad_norm": 622.2953491210938, + "learning_rate": 2.7802987991098816e-06, + "loss": 61.1507, + "step": 84370 + }, + { + "epoch": 0.34090587717207305, + "grad_norm": 278.2420959472656, + "learning_rate": 2.7790351189978083e-06, + "loss": 43.5655, + "step": 84380 + }, + { + "epoch": 0.3409462784374407, + "grad_norm": 440.2988586425781, + "learning_rate": 2.777771615597717e-06, + "loss": 34.7467, + "step": 84390 + }, + { + "epoch": 0.34098667970280827, + "grad_norm": 794.9293212890625, + "learning_rate": 2.776508289010138e-06, + "loss": 36.8944, + "step": 84400 + }, + { + "epoch": 0.3410270809681759, + "grad_norm": 393.08367919921875, + "learning_rate": 2.7752451393355916e-06, + "loss": 44.2507, + "step": 84410 + }, + { + "epoch": 0.34106748223354355, + "grad_norm": 574.3208618164062, + "learning_rate": 2.773982166674582e-06, + "loss": 28.7962, + "step": 84420 + }, + { + "epoch": 0.3411078834989112, + "grad_norm": 557.8585205078125, + "learning_rate": 2.772719371127593e-06, + "loss": 36.2364, + "step": 84430 + }, + { + "epoch": 0.34114828476427883, + "grad_norm": 377.4037170410156, + "learning_rate": 2.771456752795102e-06, + "loss": 36.0346, + "step": 84440 + }, + { + "epoch": 0.34118868602964647, + "grad_norm": 555.8394775390625, + "learning_rate": 2.7701943117775686e-06, + "loss": 40.5826, + "step": 84450 + }, + { + "epoch": 0.34122908729501406, + "grad_norm": 604.6018676757812, + "learning_rate": 2.7689320481754414e-06, + "loss": 38.2319, + "step": 84460 + }, + { + "epoch": 0.3412694885603817, + "grad_norm": 1093.6312255859375, + "learning_rate": 2.7676699620891514e-06, + "loss": 46.0036, + "step": 84470 + }, + { + "epoch": 0.34130988982574934, + "grad_norm": 574.8224487304688, + "learning_rate": 2.7664080536191178e-06, + "loss": 44.9199, + "step": 84480 + }, + { + "epoch": 0.341350291091117, + "grad_norm": 312.3489074707031, + "learning_rate": 2.7651463228657444e-06, + "loss": 43.5578, + "step": 84490 + }, + { + "epoch": 0.3413906923564846, + "grad_norm": 869.42138671875, + "learning_rate": 2.7638847699294196e-06, + "loss": 47.0098, + "step": 84500 + }, + { + "epoch": 0.34143109362185226, + "grad_norm": 417.8177795410156, + "learning_rate": 2.7626233949105252e-06, + "loss": 33.5986, + "step": 84510 + }, + { + "epoch": 0.3414714948872199, + "grad_norm": 552.8901977539062, + "learning_rate": 2.7613621979094173e-06, + "loss": 44.5389, + "step": 84520 + }, + { + "epoch": 0.3415118961525875, + "grad_norm": 702.9528198242188, + "learning_rate": 2.7601011790264454e-06, + "loss": 45.9088, + "step": 84530 + }, + { + "epoch": 0.3415522974179551, + "grad_norm": 388.2279357910156, + "learning_rate": 2.758840338361942e-06, + "loss": 52.9619, + "step": 84540 + }, + { + "epoch": 0.34159269868332276, + "grad_norm": 322.6661071777344, + "learning_rate": 2.7575796760162288e-06, + "loss": 33.669, + "step": 84550 + }, + { + "epoch": 0.3416330999486904, + "grad_norm": 429.1786193847656, + "learning_rate": 2.7563191920896084e-06, + "loss": 49.6612, + "step": 84560 + }, + { + "epoch": 0.34167350121405804, + "grad_norm": 371.429931640625, + "learning_rate": 2.755058886682373e-06, + "loss": 32.2629, + "step": 84570 + }, + { + "epoch": 0.3417139024794257, + "grad_norm": 440.96319580078125, + "learning_rate": 2.753798759894799e-06, + "loss": 49.125, + "step": 84580 + }, + { + "epoch": 0.34175430374479326, + "grad_norm": 227.27528381347656, + "learning_rate": 2.7525388118271495e-06, + "loss": 32.9526, + "step": 84590 + }, + { + "epoch": 0.3417947050101609, + "grad_norm": 755.2015380859375, + "learning_rate": 2.751279042579672e-06, + "loss": 39.707, + "step": 84600 + }, + { + "epoch": 0.34183510627552854, + "grad_norm": 572.055419921875, + "learning_rate": 2.7500194522526007e-06, + "loss": 60.7821, + "step": 84610 + }, + { + "epoch": 0.3418755075408962, + "grad_norm": 568.6116943359375, + "learning_rate": 2.748760040946156e-06, + "loss": 32.712, + "step": 84620 + }, + { + "epoch": 0.3419159088062638, + "grad_norm": 421.6330261230469, + "learning_rate": 2.7475008087605428e-06, + "loss": 42.8288, + "step": 84630 + }, + { + "epoch": 0.34195631007163146, + "grad_norm": 512.63427734375, + "learning_rate": 2.746241755795952e-06, + "loss": 52.5931, + "step": 84640 + }, + { + "epoch": 0.34199671133699905, + "grad_norm": 439.4118957519531, + "learning_rate": 2.7449828821525624e-06, + "loss": 38.7648, + "step": 84650 + }, + { + "epoch": 0.3420371126023667, + "grad_norm": 661.09765625, + "learning_rate": 2.7437241879305354e-06, + "loss": 51.7134, + "step": 84660 + }, + { + "epoch": 0.34207751386773433, + "grad_norm": 643.4342651367188, + "learning_rate": 2.7424656732300193e-06, + "loss": 49.4388, + "step": 84670 + }, + { + "epoch": 0.34211791513310197, + "grad_norm": 667.8513793945312, + "learning_rate": 2.7412073381511495e-06, + "loss": 34.0474, + "step": 84680 + }, + { + "epoch": 0.3421583163984696, + "grad_norm": 191.4943084716797, + "learning_rate": 2.739949182794045e-06, + "loss": 41.1864, + "step": 84690 + }, + { + "epoch": 0.34219871766383725, + "grad_norm": 528.9896850585938, + "learning_rate": 2.7386912072588123e-06, + "loss": 42.0747, + "step": 84700 + }, + { + "epoch": 0.3422391189292049, + "grad_norm": 625.2055053710938, + "learning_rate": 2.737433411645542e-06, + "loss": 38.8828, + "step": 84710 + }, + { + "epoch": 0.3422795201945725, + "grad_norm": 581.0269775390625, + "learning_rate": 2.7361757960543114e-06, + "loss": 44.3446, + "step": 84720 + }, + { + "epoch": 0.3423199214599401, + "grad_norm": 819.0857543945312, + "learning_rate": 2.7349183605851824e-06, + "loss": 35.713, + "step": 84730 + }, + { + "epoch": 0.34236032272530775, + "grad_norm": 745.9638671875, + "learning_rate": 2.733661105338205e-06, + "loss": 37.4942, + "step": 84740 + }, + { + "epoch": 0.3424007239906754, + "grad_norm": 733.6754150390625, + "learning_rate": 2.7324040304134125e-06, + "loss": 51.0984, + "step": 84750 + }, + { + "epoch": 0.34244112525604303, + "grad_norm": 247.33990478515625, + "learning_rate": 2.731147135910824e-06, + "loss": 33.1103, + "step": 84760 + }, + { + "epoch": 0.3424815265214107, + "grad_norm": 876.4989013671875, + "learning_rate": 2.729890421930445e-06, + "loss": 43.8137, + "step": 84770 + }, + { + "epoch": 0.34252192778677826, + "grad_norm": 448.0574951171875, + "learning_rate": 2.7286338885722674e-06, + "loss": 34.5072, + "step": 84780 + }, + { + "epoch": 0.3425623290521459, + "grad_norm": 567.6174926757812, + "learning_rate": 2.7273775359362665e-06, + "loss": 29.5398, + "step": 84790 + }, + { + "epoch": 0.34260273031751354, + "grad_norm": 390.6498107910156, + "learning_rate": 2.7261213641224056e-06, + "loss": 45.2981, + "step": 84800 + }, + { + "epoch": 0.3426431315828812, + "grad_norm": 614.93359375, + "learning_rate": 2.724865373230632e-06, + "loss": 38.1664, + "step": 84810 + }, + { + "epoch": 0.3426835328482488, + "grad_norm": 1281.5938720703125, + "learning_rate": 2.723609563360879e-06, + "loss": 45.2071, + "step": 84820 + }, + { + "epoch": 0.34272393411361646, + "grad_norm": 871.1010131835938, + "learning_rate": 2.7223539346130655e-06, + "loss": 46.9967, + "step": 84830 + }, + { + "epoch": 0.3427643353789841, + "grad_norm": 876.0147094726562, + "learning_rate": 2.7210984870870972e-06, + "loss": 41.4255, + "step": 84840 + }, + { + "epoch": 0.3428047366443517, + "grad_norm": 284.36773681640625, + "learning_rate": 2.7198432208828653e-06, + "loss": 33.469, + "step": 84850 + }, + { + "epoch": 0.3428451379097193, + "grad_norm": 416.6922607421875, + "learning_rate": 2.7185881361002415e-06, + "loss": 49.1144, + "step": 84860 + }, + { + "epoch": 0.34288553917508696, + "grad_norm": 864.996337890625, + "learning_rate": 2.717333232839088e-06, + "loss": 44.824, + "step": 84870 + }, + { + "epoch": 0.3429259404404546, + "grad_norm": 680.2763671875, + "learning_rate": 2.7160785111992546e-06, + "loss": 46.4814, + "step": 84880 + }, + { + "epoch": 0.34296634170582224, + "grad_norm": 371.52783203125, + "learning_rate": 2.7148239712805725e-06, + "loss": 42.2676, + "step": 84890 + }, + { + "epoch": 0.3430067429711899, + "grad_norm": 485.0791931152344, + "learning_rate": 2.713569613182859e-06, + "loss": 51.0596, + "step": 84900 + }, + { + "epoch": 0.34304714423655747, + "grad_norm": 598.8778076171875, + "learning_rate": 2.7123154370059185e-06, + "loss": 39.7807, + "step": 84910 + }, + { + "epoch": 0.3430875455019251, + "grad_norm": 786.4949340820312, + "learning_rate": 2.7110614428495396e-06, + "loss": 49.6095, + "step": 84920 + }, + { + "epoch": 0.34312794676729275, + "grad_norm": 443.7628479003906, + "learning_rate": 2.709807630813497e-06, + "loss": 44.824, + "step": 84930 + }, + { + "epoch": 0.3431683480326604, + "grad_norm": 480.0895080566406, + "learning_rate": 2.7085540009975526e-06, + "loss": 42.2488, + "step": 84940 + }, + { + "epoch": 0.343208749298028, + "grad_norm": 1161.1788330078125, + "learning_rate": 2.707300553501448e-06, + "loss": 65.0683, + "step": 84950 + }, + { + "epoch": 0.34324915056339567, + "grad_norm": 620.4024658203125, + "learning_rate": 2.7060472884249145e-06, + "loss": 53.8362, + "step": 84960 + }, + { + "epoch": 0.34328955182876325, + "grad_norm": 409.4412841796875, + "learning_rate": 2.7047942058676717e-06, + "loss": 32.1152, + "step": 84970 + }, + { + "epoch": 0.3433299530941309, + "grad_norm": 478.91217041015625, + "learning_rate": 2.703541305929421e-06, + "loss": 33.8495, + "step": 84980 + }, + { + "epoch": 0.34337035435949853, + "grad_norm": 581.8789672851562, + "learning_rate": 2.7022885887098492e-06, + "loss": 28.2309, + "step": 84990 + }, + { + "epoch": 0.34341075562486617, + "grad_norm": 454.3522033691406, + "learning_rate": 2.701036054308629e-06, + "loss": 41.7049, + "step": 85000 + }, + { + "epoch": 0.3434511568902338, + "grad_norm": 500.46746826171875, + "learning_rate": 2.699783702825419e-06, + "loss": 45.1027, + "step": 85010 + }, + { + "epoch": 0.34349155815560145, + "grad_norm": 626.4382934570312, + "learning_rate": 2.698531534359864e-06, + "loss": 41.5983, + "step": 85020 + }, + { + "epoch": 0.3435319594209691, + "grad_norm": 800.3005981445312, + "learning_rate": 2.6972795490115944e-06, + "loss": 46.0404, + "step": 85030 + }, + { + "epoch": 0.3435723606863367, + "grad_norm": 644.60986328125, + "learning_rate": 2.6960277468802203e-06, + "loss": 52.5239, + "step": 85040 + }, + { + "epoch": 0.3436127619517043, + "grad_norm": 430.5956115722656, + "learning_rate": 2.694776128065345e-06, + "loss": 43.1113, + "step": 85050 + }, + { + "epoch": 0.34365316321707196, + "grad_norm": 516.9263305664062, + "learning_rate": 2.6935246926665513e-06, + "loss": 47.5073, + "step": 85060 + }, + { + "epoch": 0.3436935644824396, + "grad_norm": 911.3888549804688, + "learning_rate": 2.692273440783415e-06, + "loss": 48.9684, + "step": 85070 + }, + { + "epoch": 0.34373396574780724, + "grad_norm": 1070.81201171875, + "learning_rate": 2.6910223725154903e-06, + "loss": 51.5814, + "step": 85080 + }, + { + "epoch": 0.3437743670131749, + "grad_norm": 786.39013671875, + "learning_rate": 2.6897714879623184e-06, + "loss": 49.3049, + "step": 85090 + }, + { + "epoch": 0.34381476827854246, + "grad_norm": 882.5459594726562, + "learning_rate": 2.688520787223426e-06, + "loss": 36.6178, + "step": 85100 + }, + { + "epoch": 0.3438551695439101, + "grad_norm": 631.80517578125, + "learning_rate": 2.6872702703983287e-06, + "loss": 41.4636, + "step": 85110 + }, + { + "epoch": 0.34389557080927774, + "grad_norm": 607.47119140625, + "learning_rate": 2.6860199375865203e-06, + "loss": 32.1181, + "step": 85120 + }, + { + "epoch": 0.3439359720746454, + "grad_norm": 692.5158081054688, + "learning_rate": 2.6847697888874853e-06, + "loss": 43.0668, + "step": 85130 + }, + { + "epoch": 0.343976373340013, + "grad_norm": 360.70697021484375, + "learning_rate": 2.683519824400693e-06, + "loss": 51.3872, + "step": 85140 + }, + { + "epoch": 0.34401677460538066, + "grad_norm": 551.6885375976562, + "learning_rate": 2.6822700442255965e-06, + "loss": 30.0492, + "step": 85150 + }, + { + "epoch": 0.34405717587074824, + "grad_norm": 700.1077880859375, + "learning_rate": 2.681020448461634e-06, + "loss": 43.414, + "step": 85160 + }, + { + "epoch": 0.3440975771361159, + "grad_norm": 409.4812316894531, + "learning_rate": 2.679771037208234e-06, + "loss": 49.9183, + "step": 85170 + }, + { + "epoch": 0.3441379784014835, + "grad_norm": 522.0422973632812, + "learning_rate": 2.678521810564804e-06, + "loss": 33.3543, + "step": 85180 + }, + { + "epoch": 0.34417837966685116, + "grad_norm": 918.2327270507812, + "learning_rate": 2.6772727686307398e-06, + "loss": 55.8203, + "step": 85190 + }, + { + "epoch": 0.3442187809322188, + "grad_norm": 532.8182373046875, + "learning_rate": 2.676023911505423e-06, + "loss": 43.2552, + "step": 85200 + }, + { + "epoch": 0.34425918219758644, + "grad_norm": 448.65093994140625, + "learning_rate": 2.674775239288216e-06, + "loss": 90.8124, + "step": 85210 + }, + { + "epoch": 0.3442995834629541, + "grad_norm": 640.6787719726562, + "learning_rate": 2.673526752078472e-06, + "loss": 29.4502, + "step": 85220 + }, + { + "epoch": 0.34433998472832167, + "grad_norm": 580.9517211914062, + "learning_rate": 2.6722784499755273e-06, + "loss": 44.5188, + "step": 85230 + }, + { + "epoch": 0.3443803859936893, + "grad_norm": 927.4952392578125, + "learning_rate": 2.6710303330787035e-06, + "loss": 42.2006, + "step": 85240 + }, + { + "epoch": 0.34442078725905695, + "grad_norm": 873.9500732421875, + "learning_rate": 2.6697824014873076e-06, + "loss": 45.5853, + "step": 85250 + }, + { + "epoch": 0.3444611885244246, + "grad_norm": 587.55126953125, + "learning_rate": 2.6685346553006293e-06, + "loss": 51.6585, + "step": 85260 + }, + { + "epoch": 0.34450158978979223, + "grad_norm": 814.5745239257812, + "learning_rate": 2.6672870946179506e-06, + "loss": 34.1723, + "step": 85270 + }, + { + "epoch": 0.34454199105515987, + "grad_norm": 784.3179931640625, + "learning_rate": 2.6660397195385344e-06, + "loss": 46.9616, + "step": 85280 + }, + { + "epoch": 0.34458239232052745, + "grad_norm": 558.2401733398438, + "learning_rate": 2.664792530161624e-06, + "loss": 35.8878, + "step": 85290 + }, + { + "epoch": 0.3446227935858951, + "grad_norm": 741.8731079101562, + "learning_rate": 2.6635455265864553e-06, + "loss": 39.7892, + "step": 85300 + }, + { + "epoch": 0.34466319485126273, + "grad_norm": 650.9552001953125, + "learning_rate": 2.662298708912246e-06, + "loss": 54.3428, + "step": 85310 + }, + { + "epoch": 0.3447035961166304, + "grad_norm": 0.0, + "learning_rate": 2.6610520772382e-06, + "loss": 31.7625, + "step": 85320 + }, + { + "epoch": 0.344743997381998, + "grad_norm": 1837.4625244140625, + "learning_rate": 2.659805631663505e-06, + "loss": 54.597, + "step": 85330 + }, + { + "epoch": 0.34478439864736565, + "grad_norm": 561.35302734375, + "learning_rate": 2.658559372287337e-06, + "loss": 39.6425, + "step": 85340 + }, + { + "epoch": 0.3448247999127333, + "grad_norm": 373.4984130859375, + "learning_rate": 2.6573132992088534e-06, + "loss": 25.0325, + "step": 85350 + }, + { + "epoch": 0.3448652011781009, + "grad_norm": 1178.658935546875, + "learning_rate": 2.656067412527197e-06, + "loss": 59.2496, + "step": 85360 + }, + { + "epoch": 0.3449056024434685, + "grad_norm": 391.9385681152344, + "learning_rate": 2.6548217123415033e-06, + "loss": 51.5498, + "step": 85370 + }, + { + "epoch": 0.34494600370883616, + "grad_norm": 933.973876953125, + "learning_rate": 2.6535761987508813e-06, + "loss": 43.9957, + "step": 85380 + }, + { + "epoch": 0.3449864049742038, + "grad_norm": 526.8036499023438, + "learning_rate": 2.652330871854433e-06, + "loss": 48.7626, + "step": 85390 + }, + { + "epoch": 0.34502680623957144, + "grad_norm": 312.12994384765625, + "learning_rate": 2.651085731751242e-06, + "loss": 47.8764, + "step": 85400 + }, + { + "epoch": 0.3450672075049391, + "grad_norm": 357.3439025878906, + "learning_rate": 2.6498407785403794e-06, + "loss": 35.1316, + "step": 85410 + }, + { + "epoch": 0.34510760877030666, + "grad_norm": 447.86126708984375, + "learning_rate": 2.648596012320901e-06, + "loss": 44.3176, + "step": 85420 + }, + { + "epoch": 0.3451480100356743, + "grad_norm": 734.8046264648438, + "learning_rate": 2.647351433191846e-06, + "loss": 32.598, + "step": 85430 + }, + { + "epoch": 0.34518841130104194, + "grad_norm": 710.0639038085938, + "learning_rate": 2.64610704125224e-06, + "loss": 41.4129, + "step": 85440 + }, + { + "epoch": 0.3452288125664096, + "grad_norm": 1368.0191650390625, + "learning_rate": 2.644862836601092e-06, + "loss": 62.2333, + "step": 85450 + }, + { + "epoch": 0.3452692138317772, + "grad_norm": 411.8841857910156, + "learning_rate": 2.6436188193374035e-06, + "loss": 32.3257, + "step": 85460 + }, + { + "epoch": 0.34530961509714486, + "grad_norm": 720.62890625, + "learning_rate": 2.6423749895601494e-06, + "loss": 38.4964, + "step": 85470 + }, + { + "epoch": 0.34535001636251245, + "grad_norm": 444.2539367675781, + "learning_rate": 2.6411313473682966e-06, + "loss": 52.2975, + "step": 85480 + }, + { + "epoch": 0.3453904176278801, + "grad_norm": 674.4296264648438, + "learning_rate": 2.6398878928607973e-06, + "loss": 46.51, + "step": 85490 + }, + { + "epoch": 0.3454308188932477, + "grad_norm": 707.7427978515625, + "learning_rate": 2.6386446261365874e-06, + "loss": 42.681, + "step": 85500 + }, + { + "epoch": 0.34547122015861537, + "grad_norm": 959.9139404296875, + "learning_rate": 2.6374015472945868e-06, + "loss": 42.9944, + "step": 85510 + }, + { + "epoch": 0.345511621423983, + "grad_norm": 620.0403442382812, + "learning_rate": 2.6361586564337023e-06, + "loss": 43.4066, + "step": 85520 + }, + { + "epoch": 0.34555202268935065, + "grad_norm": 894.035400390625, + "learning_rate": 2.6349159536528245e-06, + "loss": 44.0186, + "step": 85530 + }, + { + "epoch": 0.3455924239547183, + "grad_norm": 703.9459228515625, + "learning_rate": 2.633673439050831e-06, + "loss": 42.7953, + "step": 85540 + }, + { + "epoch": 0.34563282522008587, + "grad_norm": 1000.5932006835938, + "learning_rate": 2.6324311127265812e-06, + "loss": 54.5857, + "step": 85550 + }, + { + "epoch": 0.3456732264854535, + "grad_norm": 4672.994140625, + "learning_rate": 2.6311889747789225e-06, + "loss": 52.1665, + "step": 85560 + }, + { + "epoch": 0.34571362775082115, + "grad_norm": 723.791748046875, + "learning_rate": 2.6299470253066863e-06, + "loss": 44.0532, + "step": 85570 + }, + { + "epoch": 0.3457540290161888, + "grad_norm": 531.83740234375, + "learning_rate": 2.628705264408687e-06, + "loss": 41.6017, + "step": 85580 + }, + { + "epoch": 0.34579443028155643, + "grad_norm": 622.5613403320312, + "learning_rate": 2.6274636921837272e-06, + "loss": 42.075, + "step": 85590 + }, + { + "epoch": 0.34583483154692407, + "grad_norm": 635.7496948242188, + "learning_rate": 2.626222308730594e-06, + "loss": 44.8181, + "step": 85600 + }, + { + "epoch": 0.34587523281229166, + "grad_norm": 520.9039306640625, + "learning_rate": 2.6249811141480564e-06, + "loss": 37.7106, + "step": 85610 + }, + { + "epoch": 0.3459156340776593, + "grad_norm": 809.7235717773438, + "learning_rate": 2.6237401085348723e-06, + "loss": 55.4172, + "step": 85620 + }, + { + "epoch": 0.34595603534302694, + "grad_norm": 748.8391723632812, + "learning_rate": 2.6224992919897817e-06, + "loss": 60.3569, + "step": 85630 + }, + { + "epoch": 0.3459964366083946, + "grad_norm": 422.0188903808594, + "learning_rate": 2.6212586646115114e-06, + "loss": 32.4093, + "step": 85640 + }, + { + "epoch": 0.3460368378737622, + "grad_norm": 386.18365478515625, + "learning_rate": 2.620018226498772e-06, + "loss": 56.6944, + "step": 85650 + }, + { + "epoch": 0.34607723913912986, + "grad_norm": 704.84765625, + "learning_rate": 2.61877797775026e-06, + "loss": 39.3209, + "step": 85660 + }, + { + "epoch": 0.3461176404044975, + "grad_norm": 497.8809509277344, + "learning_rate": 2.6175379184646565e-06, + "loss": 33.6977, + "step": 85670 + }, + { + "epoch": 0.3461580416698651, + "grad_norm": 733.8531494140625, + "learning_rate": 2.616298048740626e-06, + "loss": 41.8165, + "step": 85680 + }, + { + "epoch": 0.3461984429352327, + "grad_norm": 733.565185546875, + "learning_rate": 2.6150583686768203e-06, + "loss": 33.9747, + "step": 85690 + }, + { + "epoch": 0.34623884420060036, + "grad_norm": 598.9244384765625, + "learning_rate": 2.6138188783718745e-06, + "loss": 57.9881, + "step": 85700 + }, + { + "epoch": 0.346279245465968, + "grad_norm": 471.6096496582031, + "learning_rate": 2.6125795779244125e-06, + "loss": 37.6456, + "step": 85710 + }, + { + "epoch": 0.34631964673133564, + "grad_norm": 816.8072509765625, + "learning_rate": 2.611340467433031e-06, + "loss": 48.4208, + "step": 85720 + }, + { + "epoch": 0.3463600479967033, + "grad_norm": 368.94610595703125, + "learning_rate": 2.61010154699633e-06, + "loss": 37.9098, + "step": 85730 + }, + { + "epoch": 0.34640044926207086, + "grad_norm": 1260.583984375, + "learning_rate": 2.6088628167128794e-06, + "loss": 55.5421, + "step": 85740 + }, + { + "epoch": 0.3464408505274385, + "grad_norm": 934.0879516601562, + "learning_rate": 2.607624276681241e-06, + "loss": 43.342, + "step": 85750 + }, + { + "epoch": 0.34648125179280614, + "grad_norm": 547.6620483398438, + "learning_rate": 2.6063859269999594e-06, + "loss": 42.7008, + "step": 85760 + }, + { + "epoch": 0.3465216530581738, + "grad_norm": 475.74371337890625, + "learning_rate": 2.605147767767564e-06, + "loss": 34.0014, + "step": 85770 + }, + { + "epoch": 0.3465620543235414, + "grad_norm": 366.348388671875, + "learning_rate": 2.6039097990825703e-06, + "loss": 31.9253, + "step": 85780 + }, + { + "epoch": 0.34660245558890906, + "grad_norm": 999.9945678710938, + "learning_rate": 2.602672021043477e-06, + "loss": 84.3831, + "step": 85790 + }, + { + "epoch": 0.34664285685427665, + "grad_norm": 1005.42578125, + "learning_rate": 2.601434433748771e-06, + "loss": 37.5773, + "step": 85800 + }, + { + "epoch": 0.3466832581196443, + "grad_norm": 458.24029541015625, + "learning_rate": 2.600197037296917e-06, + "loss": 50.0763, + "step": 85810 + }, + { + "epoch": 0.34672365938501193, + "grad_norm": 732.8392944335938, + "learning_rate": 2.5989598317863694e-06, + "loss": 46.9295, + "step": 85820 + }, + { + "epoch": 0.34676406065037957, + "grad_norm": 282.8758544921875, + "learning_rate": 2.59772281731557e-06, + "loss": 38.0548, + "step": 85830 + }, + { + "epoch": 0.3468044619157472, + "grad_norm": 1127.44384765625, + "learning_rate": 2.5964859939829423e-06, + "loss": 52.8228, + "step": 85840 + }, + { + "epoch": 0.34684486318111485, + "grad_norm": 1677.3736572265625, + "learning_rate": 2.595249361886892e-06, + "loss": 53.9349, + "step": 85850 + }, + { + "epoch": 0.3468852644464825, + "grad_norm": 543.578369140625, + "learning_rate": 2.5940129211258147e-06, + "loss": 42.8509, + "step": 85860 + }, + { + "epoch": 0.3469256657118501, + "grad_norm": 700.8523559570312, + "learning_rate": 2.5927766717980873e-06, + "loss": 51.0615, + "step": 85870 + }, + { + "epoch": 0.3469660669772177, + "grad_norm": 297.7542419433594, + "learning_rate": 2.5915406140020738e-06, + "loss": 29.6853, + "step": 85880 + }, + { + "epoch": 0.34700646824258535, + "grad_norm": 316.5313415527344, + "learning_rate": 2.590304747836119e-06, + "loss": 34.6187, + "step": 85890 + }, + { + "epoch": 0.347046869507953, + "grad_norm": 995.6156005859375, + "learning_rate": 2.5890690733985555e-06, + "loss": 48.3624, + "step": 85900 + }, + { + "epoch": 0.34708727077332063, + "grad_norm": 438.5686950683594, + "learning_rate": 2.5878335907876997e-06, + "loss": 51.1746, + "step": 85910 + }, + { + "epoch": 0.3471276720386883, + "grad_norm": 1318.769775390625, + "learning_rate": 2.5865983001018567e-06, + "loss": 44.332, + "step": 85920 + }, + { + "epoch": 0.34716807330405586, + "grad_norm": 188.23451232910156, + "learning_rate": 2.5853632014393108e-06, + "loss": 28.3907, + "step": 85930 + }, + { + "epoch": 0.3472084745694235, + "grad_norm": 539.7623291015625, + "learning_rate": 2.584128294898334e-06, + "loss": 43.8977, + "step": 85940 + }, + { + "epoch": 0.34724887583479114, + "grad_norm": 445.7605285644531, + "learning_rate": 2.5828935805771804e-06, + "loss": 46.062, + "step": 85950 + }, + { + "epoch": 0.3472892771001588, + "grad_norm": 504.9610290527344, + "learning_rate": 2.581659058574092e-06, + "loss": 77.293, + "step": 85960 + }, + { + "epoch": 0.3473296783655264, + "grad_norm": 569.9614868164062, + "learning_rate": 2.580424728987296e-06, + "loss": 42.8085, + "step": 85970 + }, + { + "epoch": 0.34737007963089406, + "grad_norm": 786.86767578125, + "learning_rate": 2.5791905919149973e-06, + "loss": 35.5293, + "step": 85980 + }, + { + "epoch": 0.3474104808962617, + "grad_norm": 899.1405639648438, + "learning_rate": 2.5779566474553934e-06, + "loss": 54.769, + "step": 85990 + }, + { + "epoch": 0.3474508821616293, + "grad_norm": 331.9537353515625, + "learning_rate": 2.5767228957066635e-06, + "loss": 31.8642, + "step": 86000 + }, + { + "epoch": 0.3474912834269969, + "grad_norm": 952.2335205078125, + "learning_rate": 2.5754893367669697e-06, + "loss": 36.8524, + "step": 86010 + }, + { + "epoch": 0.34753168469236456, + "grad_norm": 415.8359680175781, + "learning_rate": 2.5742559707344638e-06, + "loss": 35.8399, + "step": 86020 + }, + { + "epoch": 0.3475720859577322, + "grad_norm": 397.8514709472656, + "learning_rate": 2.573022797707278e-06, + "loss": 42.1008, + "step": 86030 + }, + { + "epoch": 0.34761248722309984, + "grad_norm": 588.5228271484375, + "learning_rate": 2.57178981778353e-06, + "loss": 44.473, + "step": 86040 + }, + { + "epoch": 0.3476528884884675, + "grad_norm": 790.072998046875, + "learning_rate": 2.5705570310613215e-06, + "loss": 39.8272, + "step": 86050 + }, + { + "epoch": 0.34769328975383507, + "grad_norm": 400.7402038574219, + "learning_rate": 2.5693244376387435e-06, + "loss": 30.9018, + "step": 86060 + }, + { + "epoch": 0.3477336910192027, + "grad_norm": 293.9530944824219, + "learning_rate": 2.568092037613862e-06, + "loss": 25.6735, + "step": 86070 + }, + { + "epoch": 0.34777409228457035, + "grad_norm": 873.0491333007812, + "learning_rate": 2.566859831084736e-06, + "loss": 60.489, + "step": 86080 + }, + { + "epoch": 0.347814493549938, + "grad_norm": 487.86639404296875, + "learning_rate": 2.5656278181494072e-06, + "loss": 34.5589, + "step": 86090 + }, + { + "epoch": 0.3478548948153056, + "grad_norm": 350.3863830566406, + "learning_rate": 2.5643959989058997e-06, + "loss": 37.4708, + "step": 86100 + }, + { + "epoch": 0.34789529608067327, + "grad_norm": 1139.77294921875, + "learning_rate": 2.563164373452224e-06, + "loss": 34.991, + "step": 86110 + }, + { + "epoch": 0.34793569734604085, + "grad_norm": 422.9922180175781, + "learning_rate": 2.561932941886377e-06, + "loss": 63.9281, + "step": 86120 + }, + { + "epoch": 0.3479760986114085, + "grad_norm": 423.64495849609375, + "learning_rate": 2.560701704306336e-06, + "loss": 39.6468, + "step": 86130 + }, + { + "epoch": 0.34801649987677613, + "grad_norm": 496.6314392089844, + "learning_rate": 2.5594706608100677e-06, + "loss": 35.5593, + "step": 86140 + }, + { + "epoch": 0.34805690114214377, + "grad_norm": 201.16738891601562, + "learning_rate": 2.5582398114955164e-06, + "loss": 39.8651, + "step": 86150 + }, + { + "epoch": 0.3480973024075114, + "grad_norm": 464.44390869140625, + "learning_rate": 2.5570091564606182e-06, + "loss": 41.5413, + "step": 86160 + }, + { + "epoch": 0.34813770367287905, + "grad_norm": 518.0660400390625, + "learning_rate": 2.555778695803288e-06, + "loss": 46.4022, + "step": 86170 + }, + { + "epoch": 0.3481781049382467, + "grad_norm": 617.8944091796875, + "learning_rate": 2.554548429621431e-06, + "loss": 53.7246, + "step": 86180 + }, + { + "epoch": 0.3482185062036143, + "grad_norm": 409.9566345214844, + "learning_rate": 2.5533183580129317e-06, + "loss": 43.618, + "step": 86190 + }, + { + "epoch": 0.3482589074689819, + "grad_norm": 608.5611572265625, + "learning_rate": 2.5520884810756614e-06, + "loss": 39.9297, + "step": 86200 + }, + { + "epoch": 0.34829930873434956, + "grad_norm": 672.5448608398438, + "learning_rate": 2.550858798907475e-06, + "loss": 39.3397, + "step": 86210 + }, + { + "epoch": 0.3483397099997172, + "grad_norm": 183.29920959472656, + "learning_rate": 2.5496293116062154e-06, + "loss": 41.2196, + "step": 86220 + }, + { + "epoch": 0.34838011126508484, + "grad_norm": 903.1591796875, + "learning_rate": 2.5484000192697078e-06, + "loss": 56.9694, + "step": 86230 + }, + { + "epoch": 0.3484205125304525, + "grad_norm": 2445.053955078125, + "learning_rate": 2.5471709219957573e-06, + "loss": 68.2111, + "step": 86240 + }, + { + "epoch": 0.34846091379582006, + "grad_norm": 504.24951171875, + "learning_rate": 2.5459420198821604e-06, + "loss": 57.0127, + "step": 86250 + }, + { + "epoch": 0.3485013150611877, + "grad_norm": 447.6020812988281, + "learning_rate": 2.5447133130266937e-06, + "loss": 38.3485, + "step": 86260 + }, + { + "epoch": 0.34854171632655534, + "grad_norm": 619.9303588867188, + "learning_rate": 2.5434848015271206e-06, + "loss": 39.8455, + "step": 86270 + }, + { + "epoch": 0.348582117591923, + "grad_norm": 1106.0968017578125, + "learning_rate": 2.542256485481188e-06, + "loss": 51.2592, + "step": 86280 + }, + { + "epoch": 0.3486225188572906, + "grad_norm": 550.4239501953125, + "learning_rate": 2.5410283649866272e-06, + "loss": 56.5999, + "step": 86290 + }, + { + "epoch": 0.34866292012265826, + "grad_norm": 353.9264221191406, + "learning_rate": 2.539800440141154e-06, + "loss": 38.7674, + "step": 86300 + }, + { + "epoch": 0.3487033213880259, + "grad_norm": 819.6434326171875, + "learning_rate": 2.5385727110424697e-06, + "loss": 38.6414, + "step": 86310 + }, + { + "epoch": 0.3487437226533935, + "grad_norm": 751.7317504882812, + "learning_rate": 2.5373451777882575e-06, + "loss": 50.2327, + "step": 86320 + }, + { + "epoch": 0.3487841239187611, + "grad_norm": 264.96881103515625, + "learning_rate": 2.5361178404761876e-06, + "loss": 34.7408, + "step": 86330 + }, + { + "epoch": 0.34882452518412876, + "grad_norm": 508.8009948730469, + "learning_rate": 2.534890699203914e-06, + "loss": 37.9676, + "step": 86340 + }, + { + "epoch": 0.3488649264494964, + "grad_norm": 450.41363525390625, + "learning_rate": 2.533663754069074e-06, + "loss": 36.202, + "step": 86350 + }, + { + "epoch": 0.34890532771486404, + "grad_norm": 1836.413330078125, + "learning_rate": 2.5324370051692905e-06, + "loss": 51.1927, + "step": 86360 + }, + { + "epoch": 0.3489457289802317, + "grad_norm": 431.05322265625, + "learning_rate": 2.5312104526021687e-06, + "loss": 37.8777, + "step": 86370 + }, + { + "epoch": 0.34898613024559927, + "grad_norm": 606.6944580078125, + "learning_rate": 2.529984096465302e-06, + "loss": 46.8281, + "step": 86380 + }, + { + "epoch": 0.3490265315109669, + "grad_norm": 449.3538513183594, + "learning_rate": 2.528757936856264e-06, + "loss": 47.6959, + "step": 86390 + }, + { + "epoch": 0.34906693277633455, + "grad_norm": 682.8927001953125, + "learning_rate": 2.527531973872617e-06, + "loss": 44.2995, + "step": 86400 + }, + { + "epoch": 0.3491073340417022, + "grad_norm": 621.0684814453125, + "learning_rate": 2.5263062076119026e-06, + "loss": 55.0341, + "step": 86410 + }, + { + "epoch": 0.34914773530706983, + "grad_norm": 959.4788818359375, + "learning_rate": 2.525080638171651e-06, + "loss": 38.6664, + "step": 86420 + }, + { + "epoch": 0.34918813657243747, + "grad_norm": 329.4792175292969, + "learning_rate": 2.5238552656493743e-06, + "loss": 54.4498, + "step": 86430 + }, + { + "epoch": 0.34922853783780505, + "grad_norm": 547.2073974609375, + "learning_rate": 2.52263009014257e-06, + "loss": 50.6669, + "step": 86440 + }, + { + "epoch": 0.3492689391031727, + "grad_norm": 348.137939453125, + "learning_rate": 2.5214051117487205e-06, + "loss": 53.2841, + "step": 86450 + }, + { + "epoch": 0.34930934036854033, + "grad_norm": 838.4050903320312, + "learning_rate": 2.52018033056529e-06, + "loss": 46.9813, + "step": 86460 + }, + { + "epoch": 0.349349741633908, + "grad_norm": 662.2925415039062, + "learning_rate": 2.5189557466897306e-06, + "loss": 41.6777, + "step": 86470 + }, + { + "epoch": 0.3493901428992756, + "grad_norm": 541.9384155273438, + "learning_rate": 2.517731360219476e-06, + "loss": 41.1058, + "step": 86480 + }, + { + "epoch": 0.34943054416464325, + "grad_norm": 446.6821594238281, + "learning_rate": 2.5165071712519447e-06, + "loss": 46.4978, + "step": 86490 + }, + { + "epoch": 0.3494709454300109, + "grad_norm": 493.4598083496094, + "learning_rate": 2.51528317988454e-06, + "loss": 34.3595, + "step": 86500 + }, + { + "epoch": 0.3495113466953785, + "grad_norm": 368.7260437011719, + "learning_rate": 2.5140593862146496e-06, + "loss": 33.893, + "step": 86510 + }, + { + "epoch": 0.3495517479607461, + "grad_norm": 360.5600891113281, + "learning_rate": 2.512835790339645e-06, + "loss": 91.0386, + "step": 86520 + }, + { + "epoch": 0.34959214922611376, + "grad_norm": 759.7279052734375, + "learning_rate": 2.5116123923568815e-06, + "loss": 61.5651, + "step": 86530 + }, + { + "epoch": 0.3496325504914814, + "grad_norm": 482.3156433105469, + "learning_rate": 2.5103891923637e-06, + "loss": 35.2281, + "step": 86540 + }, + { + "epoch": 0.34967295175684904, + "grad_norm": 1186.7445068359375, + "learning_rate": 2.509166190457425e-06, + "loss": 40.6471, + "step": 86550 + }, + { + "epoch": 0.3497133530222167, + "grad_norm": 926.4749145507812, + "learning_rate": 2.5079433867353646e-06, + "loss": 52.9848, + "step": 86560 + }, + { + "epoch": 0.34975375428758426, + "grad_norm": 282.3522644042969, + "learning_rate": 2.5067207812948123e-06, + "loss": 35.276, + "step": 86570 + }, + { + "epoch": 0.3497941555529519, + "grad_norm": 592.4163818359375, + "learning_rate": 2.505498374233044e-06, + "loss": 50.2927, + "step": 86580 + }, + { + "epoch": 0.34983455681831954, + "grad_norm": 749.9614868164062, + "learning_rate": 2.5042761656473226e-06, + "loss": 58.0474, + "step": 86590 + }, + { + "epoch": 0.3498749580836872, + "grad_norm": 476.7281799316406, + "learning_rate": 2.503054155634893e-06, + "loss": 46.2431, + "step": 86600 + }, + { + "epoch": 0.3499153593490548, + "grad_norm": 544.9255981445312, + "learning_rate": 2.5018323442929844e-06, + "loss": 36.0948, + "step": 86610 + }, + { + "epoch": 0.34995576061442246, + "grad_norm": 615.7764892578125, + "learning_rate": 2.500610731718811e-06, + "loss": 44.5007, + "step": 86620 + }, + { + "epoch": 0.3499961618797901, + "grad_norm": 615.6270141601562, + "learning_rate": 2.499389318009571e-06, + "loss": 32.2692, + "step": 86630 + }, + { + "epoch": 0.3500365631451577, + "grad_norm": 724.1041259765625, + "learning_rate": 2.4981681032624473e-06, + "loss": 47.3459, + "step": 86640 + }, + { + "epoch": 0.3500769644105253, + "grad_norm": 1264.620849609375, + "learning_rate": 2.4969470875746055e-06, + "loss": 41.2133, + "step": 86650 + }, + { + "epoch": 0.35011736567589297, + "grad_norm": 616.659423828125, + "learning_rate": 2.495726271043198e-06, + "loss": 41.7627, + "step": 86660 + }, + { + "epoch": 0.3501577669412606, + "grad_norm": 519.1875610351562, + "learning_rate": 2.4945056537653545e-06, + "loss": 54.5645, + "step": 86670 + }, + { + "epoch": 0.35019816820662825, + "grad_norm": 255.1110382080078, + "learning_rate": 2.493285235838199e-06, + "loss": 35.6709, + "step": 86680 + }, + { + "epoch": 0.3502385694719959, + "grad_norm": 702.441650390625, + "learning_rate": 2.492065017358834e-06, + "loss": 46.2376, + "step": 86690 + }, + { + "epoch": 0.35027897073736347, + "grad_norm": 773.6206665039062, + "learning_rate": 2.4908449984243448e-06, + "loss": 47.9963, + "step": 86700 + }, + { + "epoch": 0.3503193720027311, + "grad_norm": 1610.5216064453125, + "learning_rate": 2.4896251791318036e-06, + "loss": 73.3314, + "step": 86710 + }, + { + "epoch": 0.35035977326809875, + "grad_norm": 515.2232666015625, + "learning_rate": 2.4884055595782666e-06, + "loss": 62.2861, + "step": 86720 + }, + { + "epoch": 0.3504001745334664, + "grad_norm": 603.9746704101562, + "learning_rate": 2.487186139860772e-06, + "loss": 69.002, + "step": 86730 + }, + { + "epoch": 0.35044057579883403, + "grad_norm": 410.34613037109375, + "learning_rate": 2.485966920076346e-06, + "loss": 29.3686, + "step": 86740 + }, + { + "epoch": 0.35048097706420167, + "grad_norm": 396.1544189453125, + "learning_rate": 2.4847479003219926e-06, + "loss": 29.2824, + "step": 86750 + }, + { + "epoch": 0.35052137832956926, + "grad_norm": 501.6451110839844, + "learning_rate": 2.4835290806947047e-06, + "loss": 51.5449, + "step": 86760 + }, + { + "epoch": 0.3505617795949369, + "grad_norm": 617.4530639648438, + "learning_rate": 2.4823104612914578e-06, + "loss": 41.4633, + "step": 86770 + }, + { + "epoch": 0.35060218086030454, + "grad_norm": 1717.3043212890625, + "learning_rate": 2.4810920422092137e-06, + "loss": 58.29, + "step": 86780 + }, + { + "epoch": 0.3506425821256722, + "grad_norm": 481.8612365722656, + "learning_rate": 2.4798738235449164e-06, + "loss": 52.251, + "step": 86790 + }, + { + "epoch": 0.3506829833910398, + "grad_norm": 303.1776123046875, + "learning_rate": 2.478655805395493e-06, + "loss": 29.4278, + "step": 86800 + }, + { + "epoch": 0.35072338465640746, + "grad_norm": 697.6649169921875, + "learning_rate": 2.477437987857856e-06, + "loss": 28.0373, + "step": 86810 + }, + { + "epoch": 0.3507637859217751, + "grad_norm": 433.3942565917969, + "learning_rate": 2.4762203710289008e-06, + "loss": 43.1259, + "step": 86820 + }, + { + "epoch": 0.3508041871871427, + "grad_norm": 377.6124267578125, + "learning_rate": 2.4750029550055098e-06, + "loss": 41.783, + "step": 86830 + }, + { + "epoch": 0.3508445884525103, + "grad_norm": 617.4815063476562, + "learning_rate": 2.473785739884544e-06, + "loss": 31.3333, + "step": 86840 + }, + { + "epoch": 0.35088498971787796, + "grad_norm": 614.5011596679688, + "learning_rate": 2.4725687257628533e-06, + "loss": 49.7944, + "step": 86850 + }, + { + "epoch": 0.3509253909832456, + "grad_norm": 322.9035949707031, + "learning_rate": 2.47135191273727e-06, + "loss": 47.4259, + "step": 86860 + }, + { + "epoch": 0.35096579224861324, + "grad_norm": 1108.1063232421875, + "learning_rate": 2.4701353009046075e-06, + "loss": 54.4417, + "step": 86870 + }, + { + "epoch": 0.3510061935139809, + "grad_norm": 719.0805053710938, + "learning_rate": 2.4689188903616707e-06, + "loss": 39.9168, + "step": 86880 + }, + { + "epoch": 0.35104659477934846, + "grad_norm": 545.470947265625, + "learning_rate": 2.467702681205241e-06, + "loss": 51.4684, + "step": 86890 + }, + { + "epoch": 0.3510869960447161, + "grad_norm": 158.2139129638672, + "learning_rate": 2.4664866735320886e-06, + "loss": 31.18, + "step": 86900 + }, + { + "epoch": 0.35112739731008374, + "grad_norm": 437.4032287597656, + "learning_rate": 2.4652708674389636e-06, + "loss": 29.5146, + "step": 86910 + }, + { + "epoch": 0.3511677985754514, + "grad_norm": 606.1836547851562, + "learning_rate": 2.464055263022605e-06, + "loss": 39.0574, + "step": 86920 + }, + { + "epoch": 0.351208199840819, + "grad_norm": 485.1193542480469, + "learning_rate": 2.462839860379729e-06, + "loss": 38.1961, + "step": 86930 + }, + { + "epoch": 0.35124860110618666, + "grad_norm": 377.4209899902344, + "learning_rate": 2.46162465960704e-06, + "loss": 34.7745, + "step": 86940 + }, + { + "epoch": 0.3512890023715543, + "grad_norm": 717.3018188476562, + "learning_rate": 2.460409660801229e-06, + "loss": 44.4682, + "step": 86950 + }, + { + "epoch": 0.3513294036369219, + "grad_norm": 621.1063842773438, + "learning_rate": 2.459194864058963e-06, + "loss": 64.5044, + "step": 86960 + }, + { + "epoch": 0.35136980490228953, + "grad_norm": 244.81198120117188, + "learning_rate": 2.457980269476903e-06, + "loss": 46.7034, + "step": 86970 + }, + { + "epoch": 0.35141020616765717, + "grad_norm": 1095.035888671875, + "learning_rate": 2.4567658771516876e-06, + "loss": 57.7799, + "step": 86980 + }, + { + "epoch": 0.3514506074330248, + "grad_norm": 163.78842163085938, + "learning_rate": 2.455551687179939e-06, + "loss": 27.2865, + "step": 86990 + }, + { + "epoch": 0.35149100869839245, + "grad_norm": 539.2749633789062, + "learning_rate": 2.454337699658267e-06, + "loss": 38.7721, + "step": 87000 + }, + { + "epoch": 0.3515314099637601, + "grad_norm": 544.2691040039062, + "learning_rate": 2.453123914683259e-06, + "loss": 43.5339, + "step": 87010 + }, + { + "epoch": 0.3515718112291277, + "grad_norm": 597.60546875, + "learning_rate": 2.4519103323514932e-06, + "loss": 45.6523, + "step": 87020 + }, + { + "epoch": 0.3516122124944953, + "grad_norm": 546.766845703125, + "learning_rate": 2.4506969527595277e-06, + "loss": 27.7563, + "step": 87030 + }, + { + "epoch": 0.35165261375986295, + "grad_norm": 974.383544921875, + "learning_rate": 2.4494837760039057e-06, + "loss": 39.1787, + "step": 87040 + }, + { + "epoch": 0.3516930150252306, + "grad_norm": 832.134033203125, + "learning_rate": 2.4482708021811546e-06, + "loss": 46.4646, + "step": 87050 + }, + { + "epoch": 0.35173341629059823, + "grad_norm": 668.964111328125, + "learning_rate": 2.4470580313877833e-06, + "loss": 48.1042, + "step": 87060 + }, + { + "epoch": 0.3517738175559659, + "grad_norm": 441.5453186035156, + "learning_rate": 2.44584546372029e-06, + "loss": 58.8005, + "step": 87070 + }, + { + "epoch": 0.35181421882133346, + "grad_norm": 1058.5311279296875, + "learning_rate": 2.4446330992751504e-06, + "loss": 34.4747, + "step": 87080 + }, + { + "epoch": 0.3518546200867011, + "grad_norm": 242.58157348632812, + "learning_rate": 2.44342093814883e-06, + "loss": 42.5668, + "step": 87090 + }, + { + "epoch": 0.35189502135206874, + "grad_norm": 530.7557373046875, + "learning_rate": 2.442208980437771e-06, + "loss": 38.0475, + "step": 87100 + }, + { + "epoch": 0.3519354226174364, + "grad_norm": 694.8820190429688, + "learning_rate": 2.4409972262384037e-06, + "loss": 52.2897, + "step": 87110 + }, + { + "epoch": 0.351975823882804, + "grad_norm": 1054.0111083984375, + "learning_rate": 2.4397856756471435e-06, + "loss": 56.8561, + "step": 87120 + }, + { + "epoch": 0.35201622514817166, + "grad_norm": 287.3025817871094, + "learning_rate": 2.438574328760387e-06, + "loss": 35.5717, + "step": 87130 + }, + { + "epoch": 0.3520566264135393, + "grad_norm": 538.0453491210938, + "learning_rate": 2.437363185674516e-06, + "loss": 47.153, + "step": 87140 + }, + { + "epoch": 0.3520970276789069, + "grad_norm": 286.7646179199219, + "learning_rate": 2.4361522464858956e-06, + "loss": 34.1812, + "step": 87150 + }, + { + "epoch": 0.3521374289442745, + "grad_norm": 717.658935546875, + "learning_rate": 2.434941511290872e-06, + "loss": 44.6221, + "step": 87160 + }, + { + "epoch": 0.35217783020964216, + "grad_norm": 574.4979858398438, + "learning_rate": 2.4337309801857846e-06, + "loss": 70.7453, + "step": 87170 + }, + { + "epoch": 0.3522182314750098, + "grad_norm": 518.8090209960938, + "learning_rate": 2.432520653266943e-06, + "loss": 35.0774, + "step": 87180 + }, + { + "epoch": 0.35225863274037744, + "grad_norm": 457.4261779785156, + "learning_rate": 2.4313105306306505e-06, + "loss": 44.7714, + "step": 87190 + }, + { + "epoch": 0.3522990340057451, + "grad_norm": 740.4395141601562, + "learning_rate": 2.4301006123731908e-06, + "loss": 42.6572, + "step": 87200 + }, + { + "epoch": 0.35233943527111267, + "grad_norm": 760.9652709960938, + "learning_rate": 2.4288908985908304e-06, + "loss": 60.802, + "step": 87210 + }, + { + "epoch": 0.3523798365364803, + "grad_norm": 451.7291564941406, + "learning_rate": 2.4276813893798212e-06, + "loss": 35.5246, + "step": 87220 + }, + { + "epoch": 0.35242023780184795, + "grad_norm": 364.14007568359375, + "learning_rate": 2.4264720848363992e-06, + "loss": 34.915, + "step": 87230 + }, + { + "epoch": 0.3524606390672156, + "grad_norm": 738.9100341796875, + "learning_rate": 2.4252629850567823e-06, + "loss": 42.3692, + "step": 87240 + }, + { + "epoch": 0.3525010403325832, + "grad_norm": 501.7611083984375, + "learning_rate": 2.4240540901371727e-06, + "loss": 40.0392, + "step": 87250 + }, + { + "epoch": 0.35254144159795087, + "grad_norm": 598.90771484375, + "learning_rate": 2.4228454001737576e-06, + "loss": 38.7727, + "step": 87260 + }, + { + "epoch": 0.3525818428633185, + "grad_norm": 807.11181640625, + "learning_rate": 2.421636915262707e-06, + "loss": 35.2477, + "step": 87270 + }, + { + "epoch": 0.3526222441286861, + "grad_norm": 421.2793273925781, + "learning_rate": 2.420428635500173e-06, + "loss": 28.5157, + "step": 87280 + }, + { + "epoch": 0.35266264539405373, + "grad_norm": 662.828369140625, + "learning_rate": 2.419220560982294e-06, + "loss": 55.7317, + "step": 87290 + }, + { + "epoch": 0.35270304665942137, + "grad_norm": 341.786865234375, + "learning_rate": 2.418012691805191e-06, + "loss": 36.8736, + "step": 87300 + }, + { + "epoch": 0.352743447924789, + "grad_norm": 423.5752258300781, + "learning_rate": 2.4168050280649686e-06, + "loss": 43.2021, + "step": 87310 + }, + { + "epoch": 0.35278384919015665, + "grad_norm": 643.5564575195312, + "learning_rate": 2.4155975698577146e-06, + "loss": 43.2181, + "step": 87320 + }, + { + "epoch": 0.3528242504555243, + "grad_norm": 885.1715698242188, + "learning_rate": 2.4143903172795014e-06, + "loss": 39.284, + "step": 87330 + }, + { + "epoch": 0.3528646517208919, + "grad_norm": 827.6829223632812, + "learning_rate": 2.4131832704263842e-06, + "loss": 57.596, + "step": 87340 + }, + { + "epoch": 0.3529050529862595, + "grad_norm": 947.4823608398438, + "learning_rate": 2.411976429394402e-06, + "loss": 29.5421, + "step": 87350 + }, + { + "epoch": 0.35294545425162716, + "grad_norm": 432.5012512207031, + "learning_rate": 2.4107697942795782e-06, + "loss": 95.7835, + "step": 87360 + }, + { + "epoch": 0.3529858555169948, + "grad_norm": 816.228515625, + "learning_rate": 2.4095633651779186e-06, + "loss": 42.3632, + "step": 87370 + }, + { + "epoch": 0.35302625678236244, + "grad_norm": 245.00999450683594, + "learning_rate": 2.4083571421854137e-06, + "loss": 32.5642, + "step": 87380 + }, + { + "epoch": 0.3530666580477301, + "grad_norm": 515.5824584960938, + "learning_rate": 2.407151125398037e-06, + "loss": 42.5482, + "step": 87390 + }, + { + "epoch": 0.35310705931309766, + "grad_norm": 478.2258605957031, + "learning_rate": 2.405945314911746e-06, + "loss": 39.545, + "step": 87400 + }, + { + "epoch": 0.3531474605784653, + "grad_norm": 330.6973876953125, + "learning_rate": 2.4047397108224807e-06, + "loss": 38.6373, + "step": 87410 + }, + { + "epoch": 0.35318786184383294, + "grad_norm": 654.366943359375, + "learning_rate": 2.403534313226166e-06, + "loss": 51.1304, + "step": 87420 + }, + { + "epoch": 0.3532282631092006, + "grad_norm": 235.28494262695312, + "learning_rate": 2.40232912221871e-06, + "loss": 38.8305, + "step": 87430 + }, + { + "epoch": 0.3532686643745682, + "grad_norm": 553.5276489257812, + "learning_rate": 2.4011241378960037e-06, + "loss": 65.192, + "step": 87440 + }, + { + "epoch": 0.35330906563993586, + "grad_norm": 500.3632507324219, + "learning_rate": 2.3999193603539234e-06, + "loss": 23.022, + "step": 87450 + }, + { + "epoch": 0.3533494669053035, + "grad_norm": 544.8861083984375, + "learning_rate": 2.3987147896883263e-06, + "loss": 34.551, + "step": 87460 + }, + { + "epoch": 0.3533898681706711, + "grad_norm": 642.550048828125, + "learning_rate": 2.397510425995055e-06, + "loss": 40.3579, + "step": 87470 + }, + { + "epoch": 0.3534302694360387, + "grad_norm": 371.6269836425781, + "learning_rate": 2.3963062693699353e-06, + "loss": 20.749, + "step": 87480 + }, + { + "epoch": 0.35347067070140636, + "grad_norm": 568.1721801757812, + "learning_rate": 2.3951023199087763e-06, + "loss": 50.5051, + "step": 87490 + }, + { + "epoch": 0.353511071966774, + "grad_norm": 823.19921875, + "learning_rate": 2.393898577707371e-06, + "loss": 25.9185, + "step": 87500 + }, + { + "epoch": 0.35355147323214164, + "grad_norm": 589.0402221679688, + "learning_rate": 2.392695042861495e-06, + "loss": 42.5533, + "step": 87510 + }, + { + "epoch": 0.3535918744975093, + "grad_norm": 417.7690124511719, + "learning_rate": 2.391491715466909e-06, + "loss": 35.2292, + "step": 87520 + }, + { + "epoch": 0.35363227576287687, + "grad_norm": 408.7934875488281, + "learning_rate": 2.390288595619356e-06, + "loss": 56.9616, + "step": 87530 + }, + { + "epoch": 0.3536726770282445, + "grad_norm": 623.8540649414062, + "learning_rate": 2.3890856834145625e-06, + "loss": 33.9323, + "step": 87540 + }, + { + "epoch": 0.35371307829361215, + "grad_norm": 927.5078125, + "learning_rate": 2.3878829789482385e-06, + "loss": 45.9919, + "step": 87550 + }, + { + "epoch": 0.3537534795589798, + "grad_norm": 467.7714538574219, + "learning_rate": 2.3866804823160776e-06, + "loss": 48.9872, + "step": 87560 + }, + { + "epoch": 0.35379388082434743, + "grad_norm": 214.3078155517578, + "learning_rate": 2.385478193613758e-06, + "loss": 44.1506, + "step": 87570 + }, + { + "epoch": 0.35383428208971507, + "grad_norm": 691.3468017578125, + "learning_rate": 2.3842761129369387e-06, + "loss": 37.2927, + "step": 87580 + }, + { + "epoch": 0.3538746833550827, + "grad_norm": 428.5848388671875, + "learning_rate": 2.3830742403812646e-06, + "loss": 29.9571, + "step": 87590 + }, + { + "epoch": 0.3539150846204503, + "grad_norm": 209.60064697265625, + "learning_rate": 2.381872576042365e-06, + "loss": 38.5981, + "step": 87600 + }, + { + "epoch": 0.35395548588581793, + "grad_norm": 737.27587890625, + "learning_rate": 2.3806711200158473e-06, + "loss": 44.0908, + "step": 87610 + }, + { + "epoch": 0.3539958871511856, + "grad_norm": 645.7940063476562, + "learning_rate": 2.3794698723973057e-06, + "loss": 36.5283, + "step": 87620 + }, + { + "epoch": 0.3540362884165532, + "grad_norm": 312.0558776855469, + "learning_rate": 2.3782688332823212e-06, + "loss": 23.5262, + "step": 87630 + }, + { + "epoch": 0.35407668968192085, + "grad_norm": 971.0711059570312, + "learning_rate": 2.3770680027664537e-06, + "loss": 44.3331, + "step": 87640 + }, + { + "epoch": 0.3541170909472885, + "grad_norm": 438.9453125, + "learning_rate": 2.3758673809452484e-06, + "loss": 32.176, + "step": 87650 + }, + { + "epoch": 0.3541574922126561, + "grad_norm": 674.4141845703125, + "learning_rate": 2.3746669679142315e-06, + "loss": 32.0949, + "step": 87660 + }, + { + "epoch": 0.3541978934780237, + "grad_norm": 443.1610412597656, + "learning_rate": 2.373466763768915e-06, + "loss": 47.1898, + "step": 87670 + }, + { + "epoch": 0.35423829474339136, + "grad_norm": 1148.020263671875, + "learning_rate": 2.3722667686047945e-06, + "loss": 40.4775, + "step": 87680 + }, + { + "epoch": 0.354278696008759, + "grad_norm": 792.3489379882812, + "learning_rate": 2.37106698251735e-06, + "loss": 54.9188, + "step": 87690 + }, + { + "epoch": 0.35431909727412664, + "grad_norm": 716.711669921875, + "learning_rate": 2.3698674056020378e-06, + "loss": 57.037, + "step": 87700 + }, + { + "epoch": 0.3543594985394943, + "grad_norm": 751.2996215820312, + "learning_rate": 2.3686680379543057e-06, + "loss": 44.8936, + "step": 87710 + }, + { + "epoch": 0.35439989980486186, + "grad_norm": 607.5596313476562, + "learning_rate": 2.36746887966958e-06, + "loss": 36.4576, + "step": 87720 + }, + { + "epoch": 0.3544403010702295, + "grad_norm": 530.6735229492188, + "learning_rate": 2.366269930843275e-06, + "loss": 47.7748, + "step": 87730 + }, + { + "epoch": 0.35448070233559714, + "grad_norm": 324.5372314453125, + "learning_rate": 2.3650711915707852e-06, + "loss": 29.4486, + "step": 87740 + }, + { + "epoch": 0.3545211036009648, + "grad_norm": 1344.857666015625, + "learning_rate": 2.363872661947488e-06, + "loss": 47.2667, + "step": 87750 + }, + { + "epoch": 0.3545615048663324, + "grad_norm": 412.0323791503906, + "learning_rate": 2.362674342068744e-06, + "loss": 31.9723, + "step": 87760 + }, + { + "epoch": 0.35460190613170006, + "grad_norm": 391.2769470214844, + "learning_rate": 2.3614762320299e-06, + "loss": 38.9474, + "step": 87770 + }, + { + "epoch": 0.3546423073970677, + "grad_norm": 716.4356079101562, + "learning_rate": 2.3602783319262847e-06, + "loss": 42.3854, + "step": 87780 + }, + { + "epoch": 0.3546827086624353, + "grad_norm": 445.8362121582031, + "learning_rate": 2.3590806418532052e-06, + "loss": 43.7234, + "step": 87790 + }, + { + "epoch": 0.3547231099278029, + "grad_norm": 353.9080505371094, + "learning_rate": 2.3578831619059595e-06, + "loss": 36.2803, + "step": 87800 + }, + { + "epoch": 0.35476351119317057, + "grad_norm": 510.5903625488281, + "learning_rate": 2.3566858921798246e-06, + "loss": 40.1896, + "step": 87810 + }, + { + "epoch": 0.3548039124585382, + "grad_norm": 433.15924072265625, + "learning_rate": 2.3554888327700604e-06, + "loss": 46.1273, + "step": 87820 + }, + { + "epoch": 0.35484431372390585, + "grad_norm": 345.576171875, + "learning_rate": 2.3542919837719154e-06, + "loss": 38.3756, + "step": 87830 + }, + { + "epoch": 0.3548847149892735, + "grad_norm": 631.8955688476562, + "learning_rate": 2.3530953452806143e-06, + "loss": 45.1479, + "step": 87840 + }, + { + "epoch": 0.35492511625464107, + "grad_norm": 516.5052490234375, + "learning_rate": 2.351898917391369e-06, + "loss": 39.7578, + "step": 87850 + }, + { + "epoch": 0.3549655175200087, + "grad_norm": 617.366943359375, + "learning_rate": 2.350702700199376e-06, + "loss": 36.3532, + "step": 87860 + }, + { + "epoch": 0.35500591878537635, + "grad_norm": 514.8583984375, + "learning_rate": 2.3495066937998085e-06, + "loss": 47.3744, + "step": 87870 + }, + { + "epoch": 0.355046320050744, + "grad_norm": 856.524658203125, + "learning_rate": 2.3483108982878294e-06, + "loss": 38.0377, + "step": 87880 + }, + { + "epoch": 0.35508672131611163, + "grad_norm": 236.63670349121094, + "learning_rate": 2.3471153137585823e-06, + "loss": 25.1089, + "step": 87890 + }, + { + "epoch": 0.35512712258147927, + "grad_norm": 530.3377075195312, + "learning_rate": 2.345919940307195e-06, + "loss": 51.7881, + "step": 87900 + }, + { + "epoch": 0.3551675238468469, + "grad_norm": 340.4045104980469, + "learning_rate": 2.3447247780287746e-06, + "loss": 47.1669, + "step": 87910 + }, + { + "epoch": 0.3552079251122145, + "grad_norm": 433.123291015625, + "learning_rate": 2.3435298270184204e-06, + "loss": 24.2755, + "step": 87920 + }, + { + "epoch": 0.35524832637758214, + "grad_norm": 393.7866516113281, + "learning_rate": 2.3423350873712057e-06, + "loss": 50.8094, + "step": 87930 + }, + { + "epoch": 0.3552887276429498, + "grad_norm": 695.3616333007812, + "learning_rate": 2.341140559182192e-06, + "loss": 38.8656, + "step": 87940 + }, + { + "epoch": 0.3553291289083174, + "grad_norm": 655.1812133789062, + "learning_rate": 2.339946242546422e-06, + "loss": 58.7881, + "step": 87950 + }, + { + "epoch": 0.35536953017368506, + "grad_norm": 789.6603393554688, + "learning_rate": 2.3387521375589205e-06, + "loss": 45.0847, + "step": 87960 + }, + { + "epoch": 0.3554099314390527, + "grad_norm": 634.9356079101562, + "learning_rate": 2.3375582443146977e-06, + "loss": 35.3675, + "step": 87970 + }, + { + "epoch": 0.3554503327044203, + "grad_norm": 287.3161926269531, + "learning_rate": 2.3363645629087467e-06, + "loss": 30.8269, + "step": 87980 + }, + { + "epoch": 0.3554907339697879, + "grad_norm": 454.5109558105469, + "learning_rate": 2.3351710934360426e-06, + "loss": 37.7506, + "step": 87990 + }, + { + "epoch": 0.35553113523515556, + "grad_norm": 556.6475219726562, + "learning_rate": 2.333977835991545e-06, + "loss": 41.6105, + "step": 88000 + }, + { + "epoch": 0.3555715365005232, + "grad_norm": 367.1153564453125, + "learning_rate": 2.3327847906701932e-06, + "loss": 31.0596, + "step": 88010 + }, + { + "epoch": 0.35561193776589084, + "grad_norm": 910.18798828125, + "learning_rate": 2.3315919575669172e-06, + "loss": 40.8386, + "step": 88020 + }, + { + "epoch": 0.3556523390312585, + "grad_norm": 909.4761352539062, + "learning_rate": 2.330399336776625e-06, + "loss": 28.7883, + "step": 88030 + }, + { + "epoch": 0.35569274029662606, + "grad_norm": 1147.876953125, + "learning_rate": 2.329206928394203e-06, + "loss": 50.8721, + "step": 88040 + }, + { + "epoch": 0.3557331415619937, + "grad_norm": 455.9250793457031, + "learning_rate": 2.3280147325145285e-06, + "loss": 39.5445, + "step": 88050 + }, + { + "epoch": 0.35577354282736134, + "grad_norm": 406.4186706542969, + "learning_rate": 2.3268227492324594e-06, + "loss": 43.1811, + "step": 88060 + }, + { + "epoch": 0.355813944092729, + "grad_norm": 719.1936645507812, + "learning_rate": 2.325630978642836e-06, + "loss": 45.485, + "step": 88070 + }, + { + "epoch": 0.3558543453580966, + "grad_norm": 972.1707153320312, + "learning_rate": 2.3244394208404816e-06, + "loss": 46.7824, + "step": 88080 + }, + { + "epoch": 0.35589474662346426, + "grad_norm": 1423.513427734375, + "learning_rate": 2.3232480759202035e-06, + "loss": 43.499, + "step": 88090 + }, + { + "epoch": 0.3559351478888319, + "grad_norm": 448.8571472167969, + "learning_rate": 2.3220569439767907e-06, + "loss": 58.1903, + "step": 88100 + }, + { + "epoch": 0.3559755491541995, + "grad_norm": 458.1613464355469, + "learning_rate": 2.320866025105016e-06, + "loss": 45.6451, + "step": 88110 + }, + { + "epoch": 0.35601595041956713, + "grad_norm": 691.4515991210938, + "learning_rate": 2.319675319399639e-06, + "loss": 41.7642, + "step": 88120 + }, + { + "epoch": 0.35605635168493477, + "grad_norm": 776.6409301757812, + "learning_rate": 2.3184848269553944e-06, + "loss": 46.5186, + "step": 88130 + }, + { + "epoch": 0.3560967529503024, + "grad_norm": 742.6858520507812, + "learning_rate": 2.3172945478670056e-06, + "loss": 30.7103, + "step": 88140 + }, + { + "epoch": 0.35613715421567005, + "grad_norm": 545.2284545898438, + "learning_rate": 2.316104482229178e-06, + "loss": 50.1192, + "step": 88150 + }, + { + "epoch": 0.3561775554810377, + "grad_norm": 771.6258544921875, + "learning_rate": 2.314914630136599e-06, + "loss": 53.8035, + "step": 88160 + }, + { + "epoch": 0.3562179567464053, + "grad_norm": 383.92913818359375, + "learning_rate": 2.3137249916839394e-06, + "loss": 38.666, + "step": 88170 + }, + { + "epoch": 0.3562583580117729, + "grad_norm": 750.7330322265625, + "learning_rate": 2.3125355669658547e-06, + "loss": 34.8775, + "step": 88180 + }, + { + "epoch": 0.35629875927714055, + "grad_norm": 659.4488525390625, + "learning_rate": 2.3113463560769807e-06, + "loss": 43.7715, + "step": 88190 + }, + { + "epoch": 0.3563391605425082, + "grad_norm": 899.4495239257812, + "learning_rate": 2.310157359111938e-06, + "loss": 44.3542, + "step": 88200 + }, + { + "epoch": 0.35637956180787583, + "grad_norm": 589.4261474609375, + "learning_rate": 2.3089685761653296e-06, + "loss": 40.3078, + "step": 88210 + }, + { + "epoch": 0.3564199630732435, + "grad_norm": 3914.93994140625, + "learning_rate": 2.3077800073317415e-06, + "loss": 73.6616, + "step": 88220 + }, + { + "epoch": 0.35646036433861106, + "grad_norm": 402.8006591796875, + "learning_rate": 2.3065916527057426e-06, + "loss": 45.9411, + "step": 88230 + }, + { + "epoch": 0.3565007656039787, + "grad_norm": 539.3480834960938, + "learning_rate": 2.305403512381884e-06, + "loss": 71.7619, + "step": 88240 + }, + { + "epoch": 0.35654116686934634, + "grad_norm": 945.126708984375, + "learning_rate": 2.3042155864547024e-06, + "loss": 43.7147, + "step": 88250 + }, + { + "epoch": 0.356581568134714, + "grad_norm": 355.734375, + "learning_rate": 2.303027875018714e-06, + "loss": 27.9518, + "step": 88260 + }, + { + "epoch": 0.3566219694000816, + "grad_norm": 208.47483825683594, + "learning_rate": 2.3018403781684205e-06, + "loss": 33.6595, + "step": 88270 + }, + { + "epoch": 0.35666237066544926, + "grad_norm": 313.4412536621094, + "learning_rate": 2.3006530959983055e-06, + "loss": 34.5666, + "step": 88280 + }, + { + "epoch": 0.3567027719308169, + "grad_norm": 224.41334533691406, + "learning_rate": 2.299466028602835e-06, + "loss": 44.1448, + "step": 88290 + }, + { + "epoch": 0.3567431731961845, + "grad_norm": 644.6503295898438, + "learning_rate": 2.298279176076459e-06, + "loss": 39.4249, + "step": 88300 + }, + { + "epoch": 0.3567835744615521, + "grad_norm": 1147.67919921875, + "learning_rate": 2.2970925385136093e-06, + "loss": 65.6626, + "step": 88310 + }, + { + "epoch": 0.35682397572691976, + "grad_norm": 724.1583862304688, + "learning_rate": 2.295906116008702e-06, + "loss": 47.0453, + "step": 88320 + }, + { + "epoch": 0.3568643769922874, + "grad_norm": 757.2796630859375, + "learning_rate": 2.2947199086561346e-06, + "loss": 36.3413, + "step": 88330 + }, + { + "epoch": 0.35690477825765504, + "grad_norm": 926.6180419921875, + "learning_rate": 2.293533916550289e-06, + "loss": 52.0724, + "step": 88340 + }, + { + "epoch": 0.3569451795230227, + "grad_norm": 459.0198059082031, + "learning_rate": 2.292348139785528e-06, + "loss": 44.2652, + "step": 88350 + }, + { + "epoch": 0.35698558078839027, + "grad_norm": 405.31219482421875, + "learning_rate": 2.2911625784562e-06, + "loss": 35.9227, + "step": 88360 + }, + { + "epoch": 0.3570259820537579, + "grad_norm": 568.8250122070312, + "learning_rate": 2.2899772326566327e-06, + "loss": 35.6614, + "step": 88370 + }, + { + "epoch": 0.35706638331912555, + "grad_norm": 277.4350891113281, + "learning_rate": 2.2887921024811405e-06, + "loss": 39.3197, + "step": 88380 + }, + { + "epoch": 0.3571067845844932, + "grad_norm": 483.451171875, + "learning_rate": 2.2876071880240174e-06, + "loss": 41.9486, + "step": 88390 + }, + { + "epoch": 0.3571471858498608, + "grad_norm": 765.7012939453125, + "learning_rate": 2.2864224893795423e-06, + "loss": 42.5878, + "step": 88400 + }, + { + "epoch": 0.35718758711522847, + "grad_norm": 363.0635681152344, + "learning_rate": 2.285238006641976e-06, + "loss": 42.8957, + "step": 88410 + }, + { + "epoch": 0.3572279883805961, + "grad_norm": 417.99847412109375, + "learning_rate": 2.284053739905563e-06, + "loss": 37.0467, + "step": 88420 + }, + { + "epoch": 0.3572683896459637, + "grad_norm": 748.5416870117188, + "learning_rate": 2.282869689264529e-06, + "loss": 42.7998, + "step": 88430 + }, + { + "epoch": 0.35730879091133133, + "grad_norm": 738.539794921875, + "learning_rate": 2.2816858548130837e-06, + "loss": 29.4397, + "step": 88440 + }, + { + "epoch": 0.35734919217669897, + "grad_norm": 297.82244873046875, + "learning_rate": 2.28050223664542e-06, + "loss": 33.6989, + "step": 88450 + }, + { + "epoch": 0.3573895934420666, + "grad_norm": 619.92822265625, + "learning_rate": 2.2793188348557136e-06, + "loss": 57.3341, + "step": 88460 + }, + { + "epoch": 0.35742999470743425, + "grad_norm": 862.1347045898438, + "learning_rate": 2.2781356495381186e-06, + "loss": 43.8701, + "step": 88470 + }, + { + "epoch": 0.3574703959728019, + "grad_norm": 345.45556640625, + "learning_rate": 2.276952680786779e-06, + "loss": 33.5217, + "step": 88480 + }, + { + "epoch": 0.3575107972381695, + "grad_norm": 1218.187744140625, + "learning_rate": 2.2757699286958186e-06, + "loss": 47.879, + "step": 88490 + }, + { + "epoch": 0.3575511985035371, + "grad_norm": 938.0349731445312, + "learning_rate": 2.274587393359342e-06, + "loss": 37.6885, + "step": 88500 + }, + { + "epoch": 0.35759159976890476, + "grad_norm": 448.3917541503906, + "learning_rate": 2.273405074871438e-06, + "loss": 51.9774, + "step": 88510 + }, + { + "epoch": 0.3576320010342724, + "grad_norm": 254.905029296875, + "learning_rate": 2.2722229733261795e-06, + "loss": 35.7465, + "step": 88520 + }, + { + "epoch": 0.35767240229964004, + "grad_norm": 524.2755126953125, + "learning_rate": 2.2710410888176205e-06, + "loss": 51.7829, + "step": 88530 + }, + { + "epoch": 0.3577128035650077, + "grad_norm": 1088.100830078125, + "learning_rate": 2.2698594214397966e-06, + "loss": 48.4789, + "step": 88540 + }, + { + "epoch": 0.35775320483037526, + "grad_norm": 358.4494323730469, + "learning_rate": 2.268677971286732e-06, + "loss": 32.0552, + "step": 88550 + }, + { + "epoch": 0.3577936060957429, + "grad_norm": 286.228515625, + "learning_rate": 2.2674967384524237e-06, + "loss": 49.5649, + "step": 88560 + }, + { + "epoch": 0.35783400736111054, + "grad_norm": 322.6353454589844, + "learning_rate": 2.2663157230308576e-06, + "loss": 38.7459, + "step": 88570 + }, + { + "epoch": 0.3578744086264782, + "grad_norm": 520.438232421875, + "learning_rate": 2.2651349251160055e-06, + "loss": 50.6289, + "step": 88580 + }, + { + "epoch": 0.3579148098918458, + "grad_norm": 638.7794799804688, + "learning_rate": 2.263954344801816e-06, + "loss": 38.1684, + "step": 88590 + }, + { + "epoch": 0.35795521115721346, + "grad_norm": 1405.0643310546875, + "learning_rate": 2.2627739821822226e-06, + "loss": 46.8707, + "step": 88600 + }, + { + "epoch": 0.3579956124225811, + "grad_norm": 1341.0897216796875, + "learning_rate": 2.261593837351141e-06, + "loss": 52.1858, + "step": 88610 + }, + { + "epoch": 0.3580360136879487, + "grad_norm": 425.5408020019531, + "learning_rate": 2.26041391040247e-06, + "loss": 34.8545, + "step": 88620 + }, + { + "epoch": 0.3580764149533163, + "grad_norm": 587.6309814453125, + "learning_rate": 2.259234201430092e-06, + "loss": 28.1153, + "step": 88630 + }, + { + "epoch": 0.35811681621868396, + "grad_norm": 962.7505493164062, + "learning_rate": 2.2580547105278716e-06, + "loss": 48.4188, + "step": 88640 + }, + { + "epoch": 0.3581572174840516, + "grad_norm": 438.6845703125, + "learning_rate": 2.2568754377896516e-06, + "loss": 35.5792, + "step": 88650 + }, + { + "epoch": 0.35819761874941924, + "grad_norm": 464.7343444824219, + "learning_rate": 2.255696383309265e-06, + "loss": 46.6391, + "step": 88660 + }, + { + "epoch": 0.3582380200147869, + "grad_norm": 867.2921752929688, + "learning_rate": 2.2545175471805197e-06, + "loss": 61.4531, + "step": 88670 + }, + { + "epoch": 0.35827842128015447, + "grad_norm": 221.92137145996094, + "learning_rate": 2.2533389294972153e-06, + "loss": 36.2097, + "step": 88680 + }, + { + "epoch": 0.3583188225455221, + "grad_norm": 346.0577087402344, + "learning_rate": 2.2521605303531267e-06, + "loss": 42.6694, + "step": 88690 + }, + { + "epoch": 0.35835922381088975, + "grad_norm": 953.293701171875, + "learning_rate": 2.2509823498420142e-06, + "loss": 41.7837, + "step": 88700 + }, + { + "epoch": 0.3583996250762574, + "grad_norm": 459.3766174316406, + "learning_rate": 2.2498043880576193e-06, + "loss": 35.375, + "step": 88710 + }, + { + "epoch": 0.35844002634162503, + "grad_norm": 735.3365478515625, + "learning_rate": 2.2486266450936695e-06, + "loss": 49.8161, + "step": 88720 + }, + { + "epoch": 0.35848042760699267, + "grad_norm": 488.2784118652344, + "learning_rate": 2.2474491210438687e-06, + "loss": 45.6757, + "step": 88730 + }, + { + "epoch": 0.3585208288723603, + "grad_norm": 918.28515625, + "learning_rate": 2.2462718160019086e-06, + "loss": 69.573, + "step": 88740 + }, + { + "epoch": 0.3585612301377279, + "grad_norm": 472.2066345214844, + "learning_rate": 2.245094730061463e-06, + "loss": 39.619, + "step": 88750 + }, + { + "epoch": 0.35860163140309553, + "grad_norm": 425.4399719238281, + "learning_rate": 2.2439178633161855e-06, + "loss": 46.1708, + "step": 88760 + }, + { + "epoch": 0.3586420326684632, + "grad_norm": 776.6825561523438, + "learning_rate": 2.2427412158597133e-06, + "loss": 49.0356, + "step": 88770 + }, + { + "epoch": 0.3586824339338308, + "grad_norm": 911.3529052734375, + "learning_rate": 2.2415647877856706e-06, + "loss": 59.9118, + "step": 88780 + }, + { + "epoch": 0.35872283519919845, + "grad_norm": 230.06333923339844, + "learning_rate": 2.240388579187658e-06, + "loss": 48.1842, + "step": 88790 + }, + { + "epoch": 0.3587632364645661, + "grad_norm": 549.273193359375, + "learning_rate": 2.2392125901592615e-06, + "loss": 37.3137, + "step": 88800 + }, + { + "epoch": 0.3588036377299337, + "grad_norm": 1127.8165283203125, + "learning_rate": 2.23803682079405e-06, + "loss": 48.1943, + "step": 88810 + }, + { + "epoch": 0.3588440389953013, + "grad_norm": 4227.5986328125, + "learning_rate": 2.236861271185572e-06, + "loss": 75.6474, + "step": 88820 + }, + { + "epoch": 0.35888444026066896, + "grad_norm": 596.3049926757812, + "learning_rate": 2.2356859414273613e-06, + "loss": 34.7695, + "step": 88830 + }, + { + "epoch": 0.3589248415260366, + "grad_norm": 529.9871826171875, + "learning_rate": 2.2345108316129333e-06, + "loss": 53.1124, + "step": 88840 + }, + { + "epoch": 0.35896524279140424, + "grad_norm": 577.8896484375, + "learning_rate": 2.233335941835787e-06, + "loss": 46.6854, + "step": 88850 + }, + { + "epoch": 0.3590056440567719, + "grad_norm": 656.1891479492188, + "learning_rate": 2.232161272189401e-06, + "loss": 48.7999, + "step": 88860 + }, + { + "epoch": 0.35904604532213946, + "grad_norm": 328.6973876953125, + "learning_rate": 2.230986822767241e-06, + "loss": 36.8326, + "step": 88870 + }, + { + "epoch": 0.3590864465875071, + "grad_norm": 777.5918579101562, + "learning_rate": 2.2298125936627517e-06, + "loss": 44.1089, + "step": 88880 + }, + { + "epoch": 0.35912684785287474, + "grad_norm": 395.1194152832031, + "learning_rate": 2.228638584969363e-06, + "loss": 28.7259, + "step": 88890 + }, + { + "epoch": 0.3591672491182424, + "grad_norm": 637.8306274414062, + "learning_rate": 2.227464796780481e-06, + "loss": 40.9933, + "step": 88900 + }, + { + "epoch": 0.35920765038361, + "grad_norm": 685.5401000976562, + "learning_rate": 2.226291229189501e-06, + "loss": 53.4521, + "step": 88910 + }, + { + "epoch": 0.35924805164897766, + "grad_norm": 592.7680053710938, + "learning_rate": 2.225117882289799e-06, + "loss": 35.8013, + "step": 88920 + }, + { + "epoch": 0.3592884529143453, + "grad_norm": 433.8973388671875, + "learning_rate": 2.223944756174731e-06, + "loss": 44.9199, + "step": 88930 + }, + { + "epoch": 0.3593288541797129, + "grad_norm": 706.0247802734375, + "learning_rate": 2.2227718509376395e-06, + "loss": 44.5, + "step": 88940 + }, + { + "epoch": 0.3593692554450805, + "grad_norm": 437.9292907714844, + "learning_rate": 2.221599166671845e-06, + "loss": 35.4113, + "step": 88950 + }, + { + "epoch": 0.35940965671044817, + "grad_norm": 680.7326049804688, + "learning_rate": 2.220426703470653e-06, + "loss": 33.6723, + "step": 88960 + }, + { + "epoch": 0.3594500579758158, + "grad_norm": 340.7998046875, + "learning_rate": 2.2192544614273526e-06, + "loss": 59.2249, + "step": 88970 + }, + { + "epoch": 0.35949045924118345, + "grad_norm": 569.3303833007812, + "learning_rate": 2.218082440635215e-06, + "loss": 51.8298, + "step": 88980 + }, + { + "epoch": 0.3595308605065511, + "grad_norm": 586.7805786132812, + "learning_rate": 2.216910641187488e-06, + "loss": 33.3345, + "step": 88990 + }, + { + "epoch": 0.35957126177191867, + "grad_norm": 660.5562133789062, + "learning_rate": 2.215739063177409e-06, + "loss": 39.8882, + "step": 89000 + }, + { + "epoch": 0.3596116630372863, + "grad_norm": 328.0276794433594, + "learning_rate": 2.2145677066981948e-06, + "loss": 48.4375, + "step": 89010 + }, + { + "epoch": 0.35965206430265395, + "grad_norm": 740.8335571289062, + "learning_rate": 2.213396571843045e-06, + "loss": 41.5751, + "step": 89020 + }, + { + "epoch": 0.3596924655680216, + "grad_norm": 663.4422607421875, + "learning_rate": 2.2122256587051404e-06, + "loss": 44.6787, + "step": 89030 + }, + { + "epoch": 0.35973286683338923, + "grad_norm": 693.177001953125, + "learning_rate": 2.211054967377647e-06, + "loss": 39.8156, + "step": 89040 + }, + { + "epoch": 0.35977326809875687, + "grad_norm": 424.6520690917969, + "learning_rate": 2.2098844979537093e-06, + "loss": 43.0032, + "step": 89050 + }, + { + "epoch": 0.3598136693641245, + "grad_norm": 398.33245849609375, + "learning_rate": 2.208714250526456e-06, + "loss": 51.8167, + "step": 89060 + }, + { + "epoch": 0.3598540706294921, + "grad_norm": 235.34942626953125, + "learning_rate": 2.207544225189003e-06, + "loss": 31.962, + "step": 89070 + }, + { + "epoch": 0.35989447189485974, + "grad_norm": 524.5186767578125, + "learning_rate": 2.2063744220344386e-06, + "loss": 31.0559, + "step": 89080 + }, + { + "epoch": 0.3599348731602274, + "grad_norm": 447.48516845703125, + "learning_rate": 2.2052048411558403e-06, + "loss": 26.2319, + "step": 89090 + }, + { + "epoch": 0.359975274425595, + "grad_norm": 971.1300659179688, + "learning_rate": 2.204035482646267e-06, + "loss": 51.9521, + "step": 89100 + }, + { + "epoch": 0.36001567569096266, + "grad_norm": 617.1143798828125, + "learning_rate": 2.2028663465987576e-06, + "loss": 36.2012, + "step": 89110 + }, + { + "epoch": 0.3600560769563303, + "grad_norm": 752.3304443359375, + "learning_rate": 2.201697433106336e-06, + "loss": 37.9972, + "step": 89120 + }, + { + "epoch": 0.3600964782216979, + "grad_norm": 575.4898681640625, + "learning_rate": 2.2005287422620083e-06, + "loss": 47.5751, + "step": 89130 + }, + { + "epoch": 0.3601368794870655, + "grad_norm": 440.1522521972656, + "learning_rate": 2.19936027415876e-06, + "loss": 29.9773, + "step": 89140 + }, + { + "epoch": 0.36017728075243316, + "grad_norm": 423.9078369140625, + "learning_rate": 2.1981920288895615e-06, + "loss": 36.8926, + "step": 89150 + }, + { + "epoch": 0.3602176820178008, + "grad_norm": 772.270751953125, + "learning_rate": 2.197024006547364e-06, + "loss": 32.4956, + "step": 89160 + }, + { + "epoch": 0.36025808328316844, + "grad_norm": 669.4854736328125, + "learning_rate": 2.1958562072251033e-06, + "loss": 46.3644, + "step": 89170 + }, + { + "epoch": 0.3602984845485361, + "grad_norm": 997.831298828125, + "learning_rate": 2.1946886310156945e-06, + "loss": 45.0814, + "step": 89180 + }, + { + "epoch": 0.36033888581390366, + "grad_norm": 661.4156494140625, + "learning_rate": 2.193521278012037e-06, + "loss": 59.7783, + "step": 89190 + }, + { + "epoch": 0.3603792870792713, + "grad_norm": 539.9761352539062, + "learning_rate": 2.1923541483070114e-06, + "loss": 34.3439, + "step": 89200 + }, + { + "epoch": 0.36041968834463894, + "grad_norm": 385.9214172363281, + "learning_rate": 2.1911872419934804e-06, + "loss": 55.5859, + "step": 89210 + }, + { + "epoch": 0.3604600896100066, + "grad_norm": 266.3016052246094, + "learning_rate": 2.1900205591642904e-06, + "loss": 33.0835, + "step": 89220 + }, + { + "epoch": 0.3605004908753742, + "grad_norm": 682.5214233398438, + "learning_rate": 2.188854099912268e-06, + "loss": 40.3408, + "step": 89230 + }, + { + "epoch": 0.36054089214074186, + "grad_norm": 460.3638916015625, + "learning_rate": 2.187687864330224e-06, + "loss": 30.4106, + "step": 89240 + }, + { + "epoch": 0.3605812934061095, + "grad_norm": 724.8146362304688, + "learning_rate": 2.1865218525109496e-06, + "loss": 37.4181, + "step": 89250 + }, + { + "epoch": 0.3606216946714771, + "grad_norm": 436.9694519042969, + "learning_rate": 2.185356064547219e-06, + "loss": 46.0752, + "step": 89260 + }, + { + "epoch": 0.36066209593684473, + "grad_norm": 517.351806640625, + "learning_rate": 2.1841905005317893e-06, + "loss": 51.404, + "step": 89270 + }, + { + "epoch": 0.36070249720221237, + "grad_norm": 626.5015258789062, + "learning_rate": 2.1830251605573984e-06, + "loss": 68.4701, + "step": 89280 + }, + { + "epoch": 0.36074289846758, + "grad_norm": 686.57275390625, + "learning_rate": 2.181860044716767e-06, + "loss": 53.143, + "step": 89290 + }, + { + "epoch": 0.36078329973294765, + "grad_norm": 6181.7060546875, + "learning_rate": 2.180695153102599e-06, + "loss": 52.3037, + "step": 89300 + }, + { + "epoch": 0.3608237009983153, + "grad_norm": 739.2041625976562, + "learning_rate": 2.179530485807578e-06, + "loss": 46.834, + "step": 89310 + }, + { + "epoch": 0.3608641022636829, + "grad_norm": 340.6275329589844, + "learning_rate": 2.1783660429243747e-06, + "loss": 42.4703, + "step": 89320 + }, + { + "epoch": 0.3609045035290505, + "grad_norm": 594.989501953125, + "learning_rate": 2.177201824545632e-06, + "loss": 35.3637, + "step": 89330 + }, + { + "epoch": 0.36094490479441815, + "grad_norm": 560.0608520507812, + "learning_rate": 2.1760378307639867e-06, + "loss": 35.9234, + "step": 89340 + }, + { + "epoch": 0.3609853060597858, + "grad_norm": 583.403076171875, + "learning_rate": 2.174874061672051e-06, + "loss": 59.6438, + "step": 89350 + }, + { + "epoch": 0.36102570732515343, + "grad_norm": 567.5617065429688, + "learning_rate": 2.173710517362421e-06, + "loss": 37.6246, + "step": 89360 + }, + { + "epoch": 0.3610661085905211, + "grad_norm": 424.9427795410156, + "learning_rate": 2.1725471979276734e-06, + "loss": 24.9531, + "step": 89370 + }, + { + "epoch": 0.3611065098558887, + "grad_norm": 458.8323974609375, + "learning_rate": 2.17138410346037e-06, + "loss": 50.1594, + "step": 89380 + }, + { + "epoch": 0.3611469111212563, + "grad_norm": 488.1316223144531, + "learning_rate": 2.1702212340530515e-06, + "loss": 48.1364, + "step": 89390 + }, + { + "epoch": 0.36118731238662394, + "grad_norm": 711.227294921875, + "learning_rate": 2.1690585897982423e-06, + "loss": 32.577, + "step": 89400 + }, + { + "epoch": 0.3612277136519916, + "grad_norm": 309.82220458984375, + "learning_rate": 2.167896170788451e-06, + "loss": 20.7099, + "step": 89410 + }, + { + "epoch": 0.3612681149173592, + "grad_norm": 744.2542114257812, + "learning_rate": 2.16673397711616e-06, + "loss": 51.5083, + "step": 89420 + }, + { + "epoch": 0.36130851618272686, + "grad_norm": 960.5636596679688, + "learning_rate": 2.165572008873845e-06, + "loss": 36.2448, + "step": 89430 + }, + { + "epoch": 0.3613489174480945, + "grad_norm": 556.6470947265625, + "learning_rate": 2.1644102661539573e-06, + "loss": 50.8981, + "step": 89440 + }, + { + "epoch": 0.3613893187134621, + "grad_norm": 344.86383056640625, + "learning_rate": 2.1632487490489314e-06, + "loss": 35.3776, + "step": 89450 + }, + { + "epoch": 0.3614297199788297, + "grad_norm": 1143.32861328125, + "learning_rate": 2.162087457651183e-06, + "loss": 55.0619, + "step": 89460 + }, + { + "epoch": 0.36147012124419736, + "grad_norm": 555.110107421875, + "learning_rate": 2.1609263920531115e-06, + "loss": 49.6263, + "step": 89470 + }, + { + "epoch": 0.361510522509565, + "grad_norm": 599.9452514648438, + "learning_rate": 2.159765552347098e-06, + "loss": 32.7204, + "step": 89480 + }, + { + "epoch": 0.36155092377493264, + "grad_norm": 696.069091796875, + "learning_rate": 2.1586049386255036e-06, + "loss": 31.2028, + "step": 89490 + }, + { + "epoch": 0.3615913250403003, + "grad_norm": 623.7810668945312, + "learning_rate": 2.1574445509806764e-06, + "loss": 51.1999, + "step": 89500 + }, + { + "epoch": 0.36163172630566787, + "grad_norm": 922.6550903320312, + "learning_rate": 2.156284389504939e-06, + "loss": 50.31, + "step": 89510 + }, + { + "epoch": 0.3616721275710355, + "grad_norm": 480.38385009765625, + "learning_rate": 2.1551244542905995e-06, + "loss": 53.4307, + "step": 89520 + }, + { + "epoch": 0.36171252883640315, + "grad_norm": 381.1392517089844, + "learning_rate": 2.1539647454299535e-06, + "loss": 37.8111, + "step": 89530 + }, + { + "epoch": 0.3617529301017708, + "grad_norm": 807.1481323242188, + "learning_rate": 2.152805263015271e-06, + "loss": 35.2265, + "step": 89540 + }, + { + "epoch": 0.3617933313671384, + "grad_norm": 414.92742919921875, + "learning_rate": 2.1516460071388062e-06, + "loss": 34.6921, + "step": 89550 + }, + { + "epoch": 0.36183373263250607, + "grad_norm": 878.7418823242188, + "learning_rate": 2.1504869778927965e-06, + "loss": 56.5126, + "step": 89560 + }, + { + "epoch": 0.3618741338978737, + "grad_norm": 207.6223602294922, + "learning_rate": 2.149328175369461e-06, + "loss": 23.0313, + "step": 89570 + }, + { + "epoch": 0.3619145351632413, + "grad_norm": 780.982177734375, + "learning_rate": 2.148169599661001e-06, + "loss": 39.0533, + "step": 89580 + }, + { + "epoch": 0.36195493642860893, + "grad_norm": 403.15582275390625, + "learning_rate": 2.147011250859597e-06, + "loss": 32.8454, + "step": 89590 + }, + { + "epoch": 0.36199533769397657, + "grad_norm": 231.27716064453125, + "learning_rate": 2.1458531290574138e-06, + "loss": 28.3538, + "step": 89600 + }, + { + "epoch": 0.3620357389593442, + "grad_norm": 410.24169921875, + "learning_rate": 2.144695234346598e-06, + "loss": 49.4636, + "step": 89610 + }, + { + "epoch": 0.36207614022471185, + "grad_norm": 581.3450317382812, + "learning_rate": 2.143537566819277e-06, + "loss": 52.0878, + "step": 89620 + }, + { + "epoch": 0.3621165414900795, + "grad_norm": 1102.1473388671875, + "learning_rate": 2.1423801265675643e-06, + "loss": 44.8626, + "step": 89630 + }, + { + "epoch": 0.3621569427554471, + "grad_norm": 450.6357727050781, + "learning_rate": 2.14122291368355e-06, + "loss": 65.0009, + "step": 89640 + }, + { + "epoch": 0.3621973440208147, + "grad_norm": 576.7796630859375, + "learning_rate": 2.1400659282593083e-06, + "loss": 37.2811, + "step": 89650 + }, + { + "epoch": 0.36223774528618236, + "grad_norm": 653.2671508789062, + "learning_rate": 2.1389091703868954e-06, + "loss": 46.5598, + "step": 89660 + }, + { + "epoch": 0.36227814655155, + "grad_norm": 701.052001953125, + "learning_rate": 2.137752640158351e-06, + "loss": 47.4967, + "step": 89670 + }, + { + "epoch": 0.36231854781691764, + "grad_norm": 1010.1072998046875, + "learning_rate": 2.136596337665691e-06, + "loss": 38.5774, + "step": 89680 + }, + { + "epoch": 0.3623589490822853, + "grad_norm": 623.6953735351562, + "learning_rate": 2.13544026300092e-06, + "loss": 49.8961, + "step": 89690 + }, + { + "epoch": 0.3623993503476529, + "grad_norm": 1026.1448974609375, + "learning_rate": 2.13428441625602e-06, + "loss": 31.1046, + "step": 89700 + }, + { + "epoch": 0.3624397516130205, + "grad_norm": 621.78076171875, + "learning_rate": 2.1331287975229574e-06, + "loss": 54.1089, + "step": 89710 + }, + { + "epoch": 0.36248015287838814, + "grad_norm": 418.7895812988281, + "learning_rate": 2.131973406893677e-06, + "loss": 37.3024, + "step": 89720 + }, + { + "epoch": 0.3625205541437558, + "grad_norm": 580.6715698242188, + "learning_rate": 2.1308182444601126e-06, + "loss": 33.1834, + "step": 89730 + }, + { + "epoch": 0.3625609554091234, + "grad_norm": 653.8624267578125, + "learning_rate": 2.1296633103141724e-06, + "loss": 48.9852, + "step": 89740 + }, + { + "epoch": 0.36260135667449106, + "grad_norm": 646.8182373046875, + "learning_rate": 2.1285086045477515e-06, + "loss": 41.7328, + "step": 89750 + }, + { + "epoch": 0.3626417579398587, + "grad_norm": 696.7265014648438, + "learning_rate": 2.12735412725272e-06, + "loss": 49.5961, + "step": 89760 + }, + { + "epoch": 0.3626821592052263, + "grad_norm": 735.8131103515625, + "learning_rate": 2.1261998785209382e-06, + "loss": 36.8307, + "step": 89770 + }, + { + "epoch": 0.3627225604705939, + "grad_norm": 528.7770385742188, + "learning_rate": 2.125045858444242e-06, + "loss": 41.6708, + "step": 89780 + }, + { + "epoch": 0.36276296173596156, + "grad_norm": 535.4943237304688, + "learning_rate": 2.1238920671144534e-06, + "loss": 38.8069, + "step": 89790 + }, + { + "epoch": 0.3628033630013292, + "grad_norm": 1034.5419921875, + "learning_rate": 2.122738504623373e-06, + "loss": 37.8137, + "step": 89800 + }, + { + "epoch": 0.36284376426669684, + "grad_norm": 762.473388671875, + "learning_rate": 2.121585171062785e-06, + "loss": 53.8039, + "step": 89810 + }, + { + "epoch": 0.3628841655320645, + "grad_norm": 613.7921142578125, + "learning_rate": 2.1204320665244533e-06, + "loss": 38.9286, + "step": 89820 + }, + { + "epoch": 0.36292456679743207, + "grad_norm": 513.7335205078125, + "learning_rate": 2.119279191100128e-06, + "loss": 33.4057, + "step": 89830 + }, + { + "epoch": 0.3629649680627997, + "grad_norm": 864.5242919921875, + "learning_rate": 2.1181265448815388e-06, + "loss": 42.9364, + "step": 89840 + }, + { + "epoch": 0.36300536932816735, + "grad_norm": 422.1235656738281, + "learning_rate": 2.1169741279603927e-06, + "loss": 40.5869, + "step": 89850 + }, + { + "epoch": 0.363045770593535, + "grad_norm": 693.3779296875, + "learning_rate": 2.1158219404283836e-06, + "loss": 50.8548, + "step": 89860 + }, + { + "epoch": 0.36308617185890263, + "grad_norm": 466.7857971191406, + "learning_rate": 2.1146699823771867e-06, + "loss": 34.1342, + "step": 89870 + }, + { + "epoch": 0.36312657312427027, + "grad_norm": 703.0391235351562, + "learning_rate": 2.1135182538984565e-06, + "loss": 54.1938, + "step": 89880 + }, + { + "epoch": 0.3631669743896379, + "grad_norm": 404.7064208984375, + "learning_rate": 2.1123667550838322e-06, + "loss": 29.0139, + "step": 89890 + }, + { + "epoch": 0.3632073756550055, + "grad_norm": 502.29351806640625, + "learning_rate": 2.1112154860249327e-06, + "loss": 54.9764, + "step": 89900 + }, + { + "epoch": 0.36324777692037313, + "grad_norm": 510.11895751953125, + "learning_rate": 2.1100644468133574e-06, + "loss": 51.0422, + "step": 89910 + }, + { + "epoch": 0.3632881781857408, + "grad_norm": 299.7879333496094, + "learning_rate": 2.1089136375406934e-06, + "loss": 48.2378, + "step": 89920 + }, + { + "epoch": 0.3633285794511084, + "grad_norm": 240.06448364257812, + "learning_rate": 2.107763058298504e-06, + "loss": 31.8403, + "step": 89930 + }, + { + "epoch": 0.36336898071647605, + "grad_norm": 1164.992431640625, + "learning_rate": 2.106612709178333e-06, + "loss": 34.8477, + "step": 89940 + }, + { + "epoch": 0.3634093819818437, + "grad_norm": 565.4199829101562, + "learning_rate": 2.10546259027171e-06, + "loss": 53.9241, + "step": 89950 + }, + { + "epoch": 0.3634497832472113, + "grad_norm": 584.436767578125, + "learning_rate": 2.1043127016701442e-06, + "loss": 45.8077, + "step": 89960 + }, + { + "epoch": 0.3634901845125789, + "grad_norm": 475.01226806640625, + "learning_rate": 2.1031630434651277e-06, + "loss": 44.3417, + "step": 89970 + }, + { + "epoch": 0.36353058577794656, + "grad_norm": 569.577880859375, + "learning_rate": 2.102013615748133e-06, + "loss": 49.0946, + "step": 89980 + }, + { + "epoch": 0.3635709870433142, + "grad_norm": 443.4644775390625, + "learning_rate": 2.1008644186106146e-06, + "loss": 38.6309, + "step": 89990 + }, + { + "epoch": 0.36361138830868184, + "grad_norm": 2156.372314453125, + "learning_rate": 2.09971545214401e-06, + "loss": 51.6774, + "step": 90000 + }, + { + "epoch": 0.3636517895740495, + "grad_norm": 564.2222290039062, + "learning_rate": 2.0985667164397355e-06, + "loss": 50.4205, + "step": 90010 + }, + { + "epoch": 0.3636921908394171, + "grad_norm": 803.0197143554688, + "learning_rate": 2.0974182115891924e-06, + "loss": 48.016, + "step": 90020 + }, + { + "epoch": 0.3637325921047847, + "grad_norm": 715.5997314453125, + "learning_rate": 2.0962699376837604e-06, + "loss": 51.9438, + "step": 90030 + }, + { + "epoch": 0.36377299337015234, + "grad_norm": 553.6121215820312, + "learning_rate": 2.0951218948148034e-06, + "loss": 46.5583, + "step": 90040 + }, + { + "epoch": 0.36381339463552, + "grad_norm": 923.4013061523438, + "learning_rate": 2.093974083073666e-06, + "loss": 51.7148, + "step": 90050 + }, + { + "epoch": 0.3638537959008876, + "grad_norm": 919.7776489257812, + "learning_rate": 2.0928265025516737e-06, + "loss": 55.002, + "step": 90060 + }, + { + "epoch": 0.36389419716625526, + "grad_norm": 522.1885375976562, + "learning_rate": 2.0916791533401344e-06, + "loss": 41.7103, + "step": 90070 + }, + { + "epoch": 0.3639345984316229, + "grad_norm": 721.49609375, + "learning_rate": 2.090532035530337e-06, + "loss": 40.6271, + "step": 90080 + }, + { + "epoch": 0.3639749996969905, + "grad_norm": 361.6112060546875, + "learning_rate": 2.0893851492135536e-06, + "loss": 39.6089, + "step": 90090 + }, + { + "epoch": 0.3640154009623581, + "grad_norm": 666.88720703125, + "learning_rate": 2.0882384944810358e-06, + "loss": 36.2407, + "step": 90100 + }, + { + "epoch": 0.36405580222772577, + "grad_norm": 370.4792175292969, + "learning_rate": 2.087092071424017e-06, + "loss": 39.4503, + "step": 90110 + }, + { + "epoch": 0.3640962034930934, + "grad_norm": 317.5982971191406, + "learning_rate": 2.085945880133715e-06, + "loss": 56.8149, + "step": 90120 + }, + { + "epoch": 0.36413660475846105, + "grad_norm": 743.6160888671875, + "learning_rate": 2.0847999207013247e-06, + "loss": 50.2978, + "step": 90130 + }, + { + "epoch": 0.3641770060238287, + "grad_norm": 482.4396057128906, + "learning_rate": 2.083654193218026e-06, + "loss": 37.2876, + "step": 90140 + }, + { + "epoch": 0.36421740728919627, + "grad_norm": 737.9346923828125, + "learning_rate": 2.0825086977749793e-06, + "loss": 47.1252, + "step": 90150 + }, + { + "epoch": 0.3642578085545639, + "grad_norm": 375.45465087890625, + "learning_rate": 2.0813634344633256e-06, + "loss": 39.6987, + "step": 90160 + }, + { + "epoch": 0.36429820981993155, + "grad_norm": 493.0916748046875, + "learning_rate": 2.0802184033741886e-06, + "loss": 38.7509, + "step": 90170 + }, + { + "epoch": 0.3643386110852992, + "grad_norm": 551.7267456054688, + "learning_rate": 2.0790736045986737e-06, + "loss": 41.7807, + "step": 90180 + }, + { + "epoch": 0.36437901235066683, + "grad_norm": 742.0836181640625, + "learning_rate": 2.077929038227867e-06, + "loss": 36.7443, + "step": 90190 + }, + { + "epoch": 0.36441941361603447, + "grad_norm": 635.392578125, + "learning_rate": 2.076784704352835e-06, + "loss": 37.1757, + "step": 90200 + }, + { + "epoch": 0.3644598148814021, + "grad_norm": 1009.4746704101562, + "learning_rate": 2.075640603064629e-06, + "loss": 32.1593, + "step": 90210 + }, + { + "epoch": 0.3645002161467697, + "grad_norm": 679.4805297851562, + "learning_rate": 2.07449673445428e-06, + "loss": 57.6325, + "step": 90220 + }, + { + "epoch": 0.36454061741213734, + "grad_norm": 404.5597839355469, + "learning_rate": 2.0733530986127985e-06, + "loss": 25.9294, + "step": 90230 + }, + { + "epoch": 0.364581018677505, + "grad_norm": 305.4102478027344, + "learning_rate": 2.07220969563118e-06, + "loss": 36.3092, + "step": 90240 + }, + { + "epoch": 0.3646214199428726, + "grad_norm": 915.2492065429688, + "learning_rate": 2.0710665256003994e-06, + "loss": 54.6068, + "step": 90250 + }, + { + "epoch": 0.36466182120824026, + "grad_norm": 867.6870727539062, + "learning_rate": 2.069923588611413e-06, + "loss": 47.5033, + "step": 90260 + }, + { + "epoch": 0.3647022224736079, + "grad_norm": 616.1192016601562, + "learning_rate": 2.068780884755161e-06, + "loss": 43.4188, + "step": 90270 + }, + { + "epoch": 0.3647426237389755, + "grad_norm": 644.8280029296875, + "learning_rate": 2.0676384141225586e-06, + "loss": 55.7568, + "step": 90280 + }, + { + "epoch": 0.3647830250043431, + "grad_norm": 492.7054443359375, + "learning_rate": 2.066496176804511e-06, + "loss": 47.7336, + "step": 90290 + }, + { + "epoch": 0.36482342626971076, + "grad_norm": 1280.5413818359375, + "learning_rate": 2.0653541728919002e-06, + "loss": 46.9477, + "step": 90300 + }, + { + "epoch": 0.3648638275350784, + "grad_norm": 603.3501586914062, + "learning_rate": 2.0642124024755895e-06, + "loss": 31.9769, + "step": 90310 + }, + { + "epoch": 0.36490422880044604, + "grad_norm": 976.8206176757812, + "learning_rate": 2.0630708656464245e-06, + "loss": 50.2211, + "step": 90320 + }, + { + "epoch": 0.3649446300658137, + "grad_norm": 698.71240234375, + "learning_rate": 2.0619295624952318e-06, + "loss": 55.8766, + "step": 90330 + }, + { + "epoch": 0.3649850313311813, + "grad_norm": 1087.606689453125, + "learning_rate": 2.0607884931128205e-06, + "loss": 50.9868, + "step": 90340 + }, + { + "epoch": 0.3650254325965489, + "grad_norm": 817.5459594726562, + "learning_rate": 2.059647657589979e-06, + "loss": 47.0761, + "step": 90350 + }, + { + "epoch": 0.36506583386191654, + "grad_norm": 929.0218505859375, + "learning_rate": 2.0585070560174807e-06, + "loss": 44.2793, + "step": 90360 + }, + { + "epoch": 0.3651062351272842, + "grad_norm": 316.37432861328125, + "learning_rate": 2.057366688486073e-06, + "loss": 24.7103, + "step": 90370 + }, + { + "epoch": 0.3651466363926518, + "grad_norm": 1726.5743408203125, + "learning_rate": 2.056226555086495e-06, + "loss": 37.4747, + "step": 90380 + }, + { + "epoch": 0.36518703765801946, + "grad_norm": 703.32275390625, + "learning_rate": 2.0550866559094597e-06, + "loss": 49.8901, + "step": 90390 + }, + { + "epoch": 0.3652274389233871, + "grad_norm": 1690.470703125, + "learning_rate": 2.053946991045664e-06, + "loss": 53.2018, + "step": 90400 + }, + { + "epoch": 0.3652678401887547, + "grad_norm": 431.23388671875, + "learning_rate": 2.0528075605857855e-06, + "loss": 33.2038, + "step": 90410 + }, + { + "epoch": 0.36530824145412233, + "grad_norm": 323.9116516113281, + "learning_rate": 2.0516683646204836e-06, + "loss": 55.9418, + "step": 90420 + }, + { + "epoch": 0.36534864271948997, + "grad_norm": 671.396240234375, + "learning_rate": 2.0505294032403987e-06, + "loss": 30.016, + "step": 90430 + }, + { + "epoch": 0.3653890439848576, + "grad_norm": 641.1298217773438, + "learning_rate": 2.0493906765361556e-06, + "loss": 36.2649, + "step": 90440 + }, + { + "epoch": 0.36542944525022525, + "grad_norm": 364.4647521972656, + "learning_rate": 2.0482521845983522e-06, + "loss": 47.5725, + "step": 90450 + }, + { + "epoch": 0.3654698465155929, + "grad_norm": 323.13031005859375, + "learning_rate": 2.047113927517576e-06, + "loss": 40.9744, + "step": 90460 + }, + { + "epoch": 0.3655102477809605, + "grad_norm": 541.819580078125, + "learning_rate": 2.0459759053843913e-06, + "loss": 46.2442, + "step": 90470 + }, + { + "epoch": 0.3655506490463281, + "grad_norm": 424.2214050292969, + "learning_rate": 2.0448381182893485e-06, + "loss": 47.5028, + "step": 90480 + }, + { + "epoch": 0.36559105031169575, + "grad_norm": 330.5768737792969, + "learning_rate": 2.043700566322974e-06, + "loss": 49.1583, + "step": 90490 + }, + { + "epoch": 0.3656314515770634, + "grad_norm": 751.4622192382812, + "learning_rate": 2.0425632495757776e-06, + "loss": 37.6808, + "step": 90500 + }, + { + "epoch": 0.36567185284243103, + "grad_norm": 431.5377502441406, + "learning_rate": 2.0414261681382507e-06, + "loss": 47.902, + "step": 90510 + }, + { + "epoch": 0.3657122541077987, + "grad_norm": 363.0134582519531, + "learning_rate": 2.0402893221008657e-06, + "loss": 30.096, + "step": 90520 + }, + { + "epoch": 0.3657526553731663, + "grad_norm": 270.1390075683594, + "learning_rate": 2.0391527115540777e-06, + "loss": 52.4842, + "step": 90530 + }, + { + "epoch": 0.3657930566385339, + "grad_norm": 316.0973815917969, + "learning_rate": 2.0380163365883188e-06, + "loss": 35.3534, + "step": 90540 + }, + { + "epoch": 0.36583345790390154, + "grad_norm": 871.845947265625, + "learning_rate": 2.0368801972940055e-06, + "loss": 43.3384, + "step": 90550 + }, + { + "epoch": 0.3658738591692692, + "grad_norm": 948.296142578125, + "learning_rate": 2.0357442937615367e-06, + "loss": 37.9077, + "step": 90560 + }, + { + "epoch": 0.3659142604346368, + "grad_norm": 310.0594177246094, + "learning_rate": 2.034608626081288e-06, + "loss": 44.4959, + "step": 90570 + }, + { + "epoch": 0.36595466170000446, + "grad_norm": 783.2125244140625, + "learning_rate": 2.0334731943436235e-06, + "loss": 40.5028, + "step": 90580 + }, + { + "epoch": 0.3659950629653721, + "grad_norm": 963.5101318359375, + "learning_rate": 2.032337998638883e-06, + "loss": 45.5643, + "step": 90590 + }, + { + "epoch": 0.3660354642307397, + "grad_norm": 530.5311889648438, + "learning_rate": 2.031203039057388e-06, + "loss": 26.7716, + "step": 90600 + }, + { + "epoch": 0.3660758654961073, + "grad_norm": 477.712890625, + "learning_rate": 2.0300683156894435e-06, + "loss": 45.5529, + "step": 90610 + }, + { + "epoch": 0.36611626676147496, + "grad_norm": 734.0820922851562, + "learning_rate": 2.028933828625332e-06, + "loss": 42.5537, + "step": 90620 + }, + { + "epoch": 0.3661566680268426, + "grad_norm": 878.6141967773438, + "learning_rate": 2.0277995779553193e-06, + "loss": 40.6559, + "step": 90630 + }, + { + "epoch": 0.36619706929221024, + "grad_norm": 740.7584228515625, + "learning_rate": 2.026665563769655e-06, + "loss": 59.3021, + "step": 90640 + }, + { + "epoch": 0.3662374705575779, + "grad_norm": 406.0100402832031, + "learning_rate": 2.025531786158565e-06, + "loss": 48.8978, + "step": 90650 + }, + { + "epoch": 0.3662778718229455, + "grad_norm": 351.8523254394531, + "learning_rate": 2.02439824521226e-06, + "loss": 28.7422, + "step": 90660 + }, + { + "epoch": 0.3663182730883131, + "grad_norm": 373.048095703125, + "learning_rate": 2.023264941020929e-06, + "loss": 36.5842, + "step": 90670 + }, + { + "epoch": 0.36635867435368075, + "grad_norm": 251.12399291992188, + "learning_rate": 2.022131873674747e-06, + "loss": 47.2558, + "step": 90680 + }, + { + "epoch": 0.3663990756190484, + "grad_norm": 412.8178405761719, + "learning_rate": 2.020999043263865e-06, + "loss": 39.3716, + "step": 90690 + }, + { + "epoch": 0.366439476884416, + "grad_norm": 483.9591369628906, + "learning_rate": 2.0198664498784194e-06, + "loss": 42.5158, + "step": 90700 + }, + { + "epoch": 0.36647987814978367, + "grad_norm": 479.1996765136719, + "learning_rate": 2.018734093608521e-06, + "loss": 42.1002, + "step": 90710 + }, + { + "epoch": 0.3665202794151513, + "grad_norm": 617.8897094726562, + "learning_rate": 2.017601974544269e-06, + "loss": 45.9508, + "step": 90720 + }, + { + "epoch": 0.3665606806805189, + "grad_norm": 807.5328369140625, + "learning_rate": 2.0164700927757407e-06, + "loss": 61.1654, + "step": 90730 + }, + { + "epoch": 0.36660108194588653, + "grad_norm": 610.870361328125, + "learning_rate": 2.0153384483929946e-06, + "loss": 34.2054, + "step": 90740 + }, + { + "epoch": 0.36664148321125417, + "grad_norm": 337.25994873046875, + "learning_rate": 2.0142070414860704e-06, + "loss": 47.6262, + "step": 90750 + }, + { + "epoch": 0.3666818844766218, + "grad_norm": 1225.8677978515625, + "learning_rate": 2.0130758721449887e-06, + "loss": 54.6125, + "step": 90760 + }, + { + "epoch": 0.36672228574198945, + "grad_norm": 583.123291015625, + "learning_rate": 2.01194494045975e-06, + "loss": 41.9564, + "step": 90770 + }, + { + "epoch": 0.3667626870073571, + "grad_norm": 532.25, + "learning_rate": 2.0108142465203413e-06, + "loss": 37.121, + "step": 90780 + }, + { + "epoch": 0.3668030882727247, + "grad_norm": 1506.677490234375, + "learning_rate": 2.0096837904167252e-06, + "loss": 35.0181, + "step": 90790 + }, + { + "epoch": 0.3668434895380923, + "grad_norm": 492.0948486328125, + "learning_rate": 2.0085535722388454e-06, + "loss": 35.3306, + "step": 90800 + }, + { + "epoch": 0.36688389080345996, + "grad_norm": 672.6559448242188, + "learning_rate": 2.007423592076629e-06, + "loss": 34.055, + "step": 90810 + }, + { + "epoch": 0.3669242920688276, + "grad_norm": 543.7750854492188, + "learning_rate": 2.006293850019983e-06, + "loss": 35.9712, + "step": 90820 + }, + { + "epoch": 0.36696469333419524, + "grad_norm": 712.0992431640625, + "learning_rate": 2.005164346158796e-06, + "loss": 40.947, + "step": 90830 + }, + { + "epoch": 0.3670050945995629, + "grad_norm": 722.9002075195312, + "learning_rate": 2.004035080582938e-06, + "loss": 48.6245, + "step": 90840 + }, + { + "epoch": 0.3670454958649305, + "grad_norm": 952.40234375, + "learning_rate": 2.002906053382258e-06, + "loss": 46.5803, + "step": 90850 + }, + { + "epoch": 0.3670858971302981, + "grad_norm": 416.2144470214844, + "learning_rate": 2.001777264646588e-06, + "loss": 40.4567, + "step": 90860 + }, + { + "epoch": 0.36712629839566574, + "grad_norm": 403.0811767578125, + "learning_rate": 2.000648714465744e-06, + "loss": 43.3017, + "step": 90870 + }, + { + "epoch": 0.3671666996610334, + "grad_norm": 580.00048828125, + "learning_rate": 1.9995204029295147e-06, + "loss": 38.2412, + "step": 90880 + }, + { + "epoch": 0.367207100926401, + "grad_norm": 378.8351745605469, + "learning_rate": 1.9983923301276764e-06, + "loss": 36.4264, + "step": 90890 + }, + { + "epoch": 0.36724750219176866, + "grad_norm": 707.3033447265625, + "learning_rate": 1.9972644961499853e-06, + "loss": 19.8426, + "step": 90900 + }, + { + "epoch": 0.3672879034571363, + "grad_norm": 932.6064453125, + "learning_rate": 1.9961369010861777e-06, + "loss": 52.4729, + "step": 90910 + }, + { + "epoch": 0.3673283047225039, + "grad_norm": 568.361572265625, + "learning_rate": 1.995009545025971e-06, + "loss": 32.0684, + "step": 90920 + }, + { + "epoch": 0.3673687059878715, + "grad_norm": 768.6260986328125, + "learning_rate": 1.9938824280590635e-06, + "loss": 53.6914, + "step": 90930 + }, + { + "epoch": 0.36740910725323916, + "grad_norm": 680.199951171875, + "learning_rate": 1.992755550275135e-06, + "loss": 29.9278, + "step": 90940 + }, + { + "epoch": 0.3674495085186068, + "grad_norm": 772.3173217773438, + "learning_rate": 1.991628911763846e-06, + "loss": 62.1594, + "step": 90950 + }, + { + "epoch": 0.36748990978397444, + "grad_norm": 571.9541015625, + "learning_rate": 1.990502512614838e-06, + "loss": 32.7492, + "step": 90960 + }, + { + "epoch": 0.3675303110493421, + "grad_norm": 404.0989685058594, + "learning_rate": 1.989376352917733e-06, + "loss": 37.1862, + "step": 90970 + }, + { + "epoch": 0.3675707123147097, + "grad_norm": 659.4763793945312, + "learning_rate": 1.988250432762135e-06, + "loss": 38.4783, + "step": 90980 + }, + { + "epoch": 0.3676111135800773, + "grad_norm": 665.2921142578125, + "learning_rate": 1.987124752237628e-06, + "loss": 58.7645, + "step": 90990 + }, + { + "epoch": 0.36765151484544495, + "grad_norm": 378.5953369140625, + "learning_rate": 1.9859993114337773e-06, + "loss": 35.1157, + "step": 91000 + }, + { + "epoch": 0.3676919161108126, + "grad_norm": 826.9314575195312, + "learning_rate": 1.984874110440129e-06, + "loss": 48.5521, + "step": 91010 + }, + { + "epoch": 0.36773231737618023, + "grad_norm": 584.8626708984375, + "learning_rate": 1.9837491493462104e-06, + "loss": 50.472, + "step": 91020 + }, + { + "epoch": 0.36777271864154787, + "grad_norm": 381.18341064453125, + "learning_rate": 1.9826244282415285e-06, + "loss": 43.9919, + "step": 91030 + }, + { + "epoch": 0.3678131199069155, + "grad_norm": 365.3213195800781, + "learning_rate": 1.9814999472155736e-06, + "loss": 60.1717, + "step": 91040 + }, + { + "epoch": 0.3678535211722831, + "grad_norm": 525.1259155273438, + "learning_rate": 1.9803757063578146e-06, + "loss": 44.7315, + "step": 91050 + }, + { + "epoch": 0.36789392243765073, + "grad_norm": 526.0704956054688, + "learning_rate": 1.9792517057577026e-06, + "loss": 50.6954, + "step": 91060 + }, + { + "epoch": 0.3679343237030184, + "grad_norm": 279.87548828125, + "learning_rate": 1.978127945504669e-06, + "loss": 38.3538, + "step": 91070 + }, + { + "epoch": 0.367974724968386, + "grad_norm": 474.6632995605469, + "learning_rate": 1.977004425688126e-06, + "loss": 37.0405, + "step": 91080 + }, + { + "epoch": 0.36801512623375365, + "grad_norm": 480.8251037597656, + "learning_rate": 1.9758811463974677e-06, + "loss": 51.2929, + "step": 91090 + }, + { + "epoch": 0.3680555274991213, + "grad_norm": 547.632568359375, + "learning_rate": 1.9747581077220675e-06, + "loss": 28.9646, + "step": 91100 + }, + { + "epoch": 0.3680959287644889, + "grad_norm": 635.3892211914062, + "learning_rate": 1.9736353097512802e-06, + "loss": 45.2826, + "step": 91110 + }, + { + "epoch": 0.3681363300298565, + "grad_norm": 260.98651123046875, + "learning_rate": 1.9725127525744423e-06, + "loss": 77.2748, + "step": 91120 + }, + { + "epoch": 0.36817673129522416, + "grad_norm": 728.8169555664062, + "learning_rate": 1.971390436280871e-06, + "loss": 40.5998, + "step": 91130 + }, + { + "epoch": 0.3682171325605918, + "grad_norm": 411.4289245605469, + "learning_rate": 1.970268360959863e-06, + "loss": 45.5529, + "step": 91140 + }, + { + "epoch": 0.36825753382595944, + "grad_norm": 753.0279541015625, + "learning_rate": 1.9691465267006965e-06, + "loss": 35.7461, + "step": 91150 + }, + { + "epoch": 0.3682979350913271, + "grad_norm": 488.8835144042969, + "learning_rate": 1.9680249335926314e-06, + "loss": 37.8499, + "step": 91160 + }, + { + "epoch": 0.3683383363566947, + "grad_norm": 243.46627807617188, + "learning_rate": 1.9669035817249077e-06, + "loss": 45.215, + "step": 91170 + }, + { + "epoch": 0.3683787376220623, + "grad_norm": 599.044189453125, + "learning_rate": 1.9657824711867457e-06, + "loss": 56.9096, + "step": 91180 + }, + { + "epoch": 0.36841913888742994, + "grad_norm": 932.3475952148438, + "learning_rate": 1.9646616020673474e-06, + "loss": 34.8462, + "step": 91190 + }, + { + "epoch": 0.3684595401527976, + "grad_norm": 707.1528930664062, + "learning_rate": 1.9635409744558953e-06, + "loss": 42.6267, + "step": 91200 + }, + { + "epoch": 0.3684999414181652, + "grad_norm": 377.12896728515625, + "learning_rate": 1.962420588441552e-06, + "loss": 34.0437, + "step": 91210 + }, + { + "epoch": 0.36854034268353286, + "grad_norm": 756.0499877929688, + "learning_rate": 1.9613004441134635e-06, + "loss": 46.7615, + "step": 91220 + }, + { + "epoch": 0.3685807439489005, + "grad_norm": 1075.0089111328125, + "learning_rate": 1.96018054156075e-06, + "loss": 56.312, + "step": 91230 + }, + { + "epoch": 0.3686211452142681, + "grad_norm": 680.0381469726562, + "learning_rate": 1.9590608808725214e-06, + "loss": 44.0678, + "step": 91240 + }, + { + "epoch": 0.3686615464796357, + "grad_norm": 464.0368347167969, + "learning_rate": 1.9579414621378624e-06, + "loss": 45.7525, + "step": 91250 + }, + { + "epoch": 0.36870194774500337, + "grad_norm": 205.71853637695312, + "learning_rate": 1.9568222854458403e-06, + "loss": 49.5888, + "step": 91260 + }, + { + "epoch": 0.368742349010371, + "grad_norm": 599.9993286132812, + "learning_rate": 1.955703350885502e-06, + "loss": 46.9081, + "step": 91270 + }, + { + "epoch": 0.36878275027573865, + "grad_norm": 766.0186767578125, + "learning_rate": 1.954584658545877e-06, + "loss": 43.123, + "step": 91280 + }, + { + "epoch": 0.3688231515411063, + "grad_norm": 579.0977172851562, + "learning_rate": 1.9534662085159746e-06, + "loss": 47.9196, + "step": 91290 + }, + { + "epoch": 0.36886355280647387, + "grad_norm": 710.0382690429688, + "learning_rate": 1.9523480008847856e-06, + "loss": 39.5002, + "step": 91300 + }, + { + "epoch": 0.3689039540718415, + "grad_norm": 449.13397216796875, + "learning_rate": 1.9512300357412778e-06, + "loss": 42.9828, + "step": 91310 + }, + { + "epoch": 0.36894435533720915, + "grad_norm": 657.600830078125, + "learning_rate": 1.950112313174404e-06, + "loss": 36.0095, + "step": 91320 + }, + { + "epoch": 0.3689847566025768, + "grad_norm": 842.6561279296875, + "learning_rate": 1.9489948332730945e-06, + "loss": 44.1807, + "step": 91330 + }, + { + "epoch": 0.36902515786794443, + "grad_norm": 284.8597717285156, + "learning_rate": 1.947877596126266e-06, + "loss": 30.7171, + "step": 91340 + }, + { + "epoch": 0.36906555913331207, + "grad_norm": 921.1884765625, + "learning_rate": 1.946760601822809e-06, + "loss": 47.5682, + "step": 91350 + }, + { + "epoch": 0.3691059603986797, + "grad_norm": 486.9660339355469, + "learning_rate": 1.945643850451599e-06, + "loss": 45.4097, + "step": 91360 + }, + { + "epoch": 0.3691463616640473, + "grad_norm": 481.0872497558594, + "learning_rate": 1.9445273421014903e-06, + "loss": 34.2548, + "step": 91370 + }, + { + "epoch": 0.36918676292941494, + "grad_norm": 297.3924560546875, + "learning_rate": 1.9434110768613184e-06, + "loss": 53.0149, + "step": 91380 + }, + { + "epoch": 0.3692271641947826, + "grad_norm": 721.890869140625, + "learning_rate": 1.9422950548199004e-06, + "loss": 66.6985, + "step": 91390 + }, + { + "epoch": 0.3692675654601502, + "grad_norm": 407.1198425292969, + "learning_rate": 1.941179276066031e-06, + "loss": 42.478, + "step": 91400 + }, + { + "epoch": 0.36930796672551786, + "grad_norm": 982.4834594726562, + "learning_rate": 1.9400637406884875e-06, + "loss": 42.1372, + "step": 91410 + }, + { + "epoch": 0.3693483679908855, + "grad_norm": 547.2681884765625, + "learning_rate": 1.938948448776028e-06, + "loss": 22.6065, + "step": 91420 + }, + { + "epoch": 0.3693887692562531, + "grad_norm": 751.7438354492188, + "learning_rate": 1.9378334004173936e-06, + "loss": 43.4024, + "step": 91430 + }, + { + "epoch": 0.3694291705216207, + "grad_norm": 515.72802734375, + "learning_rate": 1.9367185957013024e-06, + "loss": 38.0327, + "step": 91440 + }, + { + "epoch": 0.36946957178698836, + "grad_norm": 481.98919677734375, + "learning_rate": 1.9356040347164533e-06, + "loss": 71.2502, + "step": 91450 + }, + { + "epoch": 0.369509973052356, + "grad_norm": 274.7797546386719, + "learning_rate": 1.9344897175515283e-06, + "loss": 39.7428, + "step": 91460 + }, + { + "epoch": 0.36955037431772364, + "grad_norm": 390.0133056640625, + "learning_rate": 1.9333756442951886e-06, + "loss": 47.6374, + "step": 91470 + }, + { + "epoch": 0.3695907755830913, + "grad_norm": 556.7567749023438, + "learning_rate": 1.9322618150360732e-06, + "loss": 49.5216, + "step": 91480 + }, + { + "epoch": 0.3696311768484589, + "grad_norm": 442.883056640625, + "learning_rate": 1.931148229862807e-06, + "loss": 36.484, + "step": 91490 + }, + { + "epoch": 0.3696715781138265, + "grad_norm": 499.36090087890625, + "learning_rate": 1.9300348888639915e-06, + "loss": 51.1593, + "step": 91500 + }, + { + "epoch": 0.36971197937919414, + "grad_norm": 750.4673461914062, + "learning_rate": 1.9289217921282104e-06, + "loss": 48.1591, + "step": 91510 + }, + { + "epoch": 0.3697523806445618, + "grad_norm": 307.9422607421875, + "learning_rate": 1.927808939744027e-06, + "loss": 46.4992, + "step": 91520 + }, + { + "epoch": 0.3697927819099294, + "grad_norm": 907.98046875, + "learning_rate": 1.9266963317999884e-06, + "loss": 39.0636, + "step": 91530 + }, + { + "epoch": 0.36983318317529706, + "grad_norm": 797.4759521484375, + "learning_rate": 1.9255839683846174e-06, + "loss": 52.2098, + "step": 91540 + }, + { + "epoch": 0.3698735844406647, + "grad_norm": 372.3993225097656, + "learning_rate": 1.9244718495864206e-06, + "loss": 37.4535, + "step": 91550 + }, + { + "epoch": 0.3699139857060323, + "grad_norm": 488.9723815917969, + "learning_rate": 1.9233599754938857e-06, + "loss": 32.6998, + "step": 91560 + }, + { + "epoch": 0.36995438697139993, + "grad_norm": 843.7347412109375, + "learning_rate": 1.922248346195477e-06, + "loss": 47.1682, + "step": 91570 + }, + { + "epoch": 0.36999478823676757, + "grad_norm": 472.1070251464844, + "learning_rate": 1.921136961779641e-06, + "loss": 38.4525, + "step": 91580 + }, + { + "epoch": 0.3700351895021352, + "grad_norm": 768.330810546875, + "learning_rate": 1.9200258223348072e-06, + "loss": 35.5316, + "step": 91590 + }, + { + "epoch": 0.37007559076750285, + "grad_norm": 917.4964599609375, + "learning_rate": 1.918914927949384e-06, + "loss": 32.2703, + "step": 91600 + }, + { + "epoch": 0.3701159920328705, + "grad_norm": 1343.5494384765625, + "learning_rate": 1.9178042787117594e-06, + "loss": 35.542, + "step": 91610 + }, + { + "epoch": 0.3701563932982381, + "grad_norm": 463.442626953125, + "learning_rate": 1.9166938747103013e-06, + "loss": 33.1795, + "step": 91620 + }, + { + "epoch": 0.3701967945636057, + "grad_norm": 389.9674377441406, + "learning_rate": 1.915583716033363e-06, + "loss": 42.821, + "step": 91630 + }, + { + "epoch": 0.37023719582897335, + "grad_norm": 404.5822448730469, + "learning_rate": 1.9144738027692746e-06, + "loss": 43.7155, + "step": 91640 + }, + { + "epoch": 0.370277597094341, + "grad_norm": 587.7801513671875, + "learning_rate": 1.913364135006343e-06, + "loss": 29.5249, + "step": 91650 + }, + { + "epoch": 0.37031799835970863, + "grad_norm": 835.2510986328125, + "learning_rate": 1.9122547128328616e-06, + "loss": 41.1438, + "step": 91660 + }, + { + "epoch": 0.3703583996250763, + "grad_norm": 342.636962890625, + "learning_rate": 1.9111455363371016e-06, + "loss": 34.5593, + "step": 91670 + }, + { + "epoch": 0.3703988008904439, + "grad_norm": 424.19354248046875, + "learning_rate": 1.910036605607316e-06, + "loss": 32.6238, + "step": 91680 + }, + { + "epoch": 0.3704392021558115, + "grad_norm": 920.641845703125, + "learning_rate": 1.908927920731736e-06, + "loss": 37.6558, + "step": 91690 + }, + { + "epoch": 0.37047960342117914, + "grad_norm": 857.9271240234375, + "learning_rate": 1.9078194817985755e-06, + "loss": 41.7298, + "step": 91700 + }, + { + "epoch": 0.3705200046865468, + "grad_norm": 368.0783996582031, + "learning_rate": 1.9067112888960283e-06, + "loss": 48.4999, + "step": 91710 + }, + { + "epoch": 0.3705604059519144, + "grad_norm": 644.633056640625, + "learning_rate": 1.905603342112265e-06, + "loss": 45.2764, + "step": 91720 + }, + { + "epoch": 0.37060080721728206, + "grad_norm": 310.33648681640625, + "learning_rate": 1.904495641535446e-06, + "loss": 49.4517, + "step": 91730 + }, + { + "epoch": 0.3706412084826497, + "grad_norm": 407.6061096191406, + "learning_rate": 1.9033881872537009e-06, + "loss": 39.223, + "step": 91740 + }, + { + "epoch": 0.3706816097480173, + "grad_norm": 665.1161499023438, + "learning_rate": 1.902280979355146e-06, + "loss": 38.5211, + "step": 91750 + }, + { + "epoch": 0.3707220110133849, + "grad_norm": 392.2351989746094, + "learning_rate": 1.901174017927877e-06, + "loss": 41.5336, + "step": 91760 + }, + { + "epoch": 0.37076241227875256, + "grad_norm": 520.5103759765625, + "learning_rate": 1.9000673030599698e-06, + "loss": 35.4438, + "step": 91770 + }, + { + "epoch": 0.3708028135441202, + "grad_norm": 579.9551391601562, + "learning_rate": 1.89896083483948e-06, + "loss": 39.8775, + "step": 91780 + }, + { + "epoch": 0.37084321480948784, + "grad_norm": 1177.031494140625, + "learning_rate": 1.897854613354445e-06, + "loss": 45.311, + "step": 91790 + }, + { + "epoch": 0.3708836160748555, + "grad_norm": 481.6818542480469, + "learning_rate": 1.8967486386928819e-06, + "loss": 40.572, + "step": 91800 + }, + { + "epoch": 0.3709240173402231, + "grad_norm": 631.0228881835938, + "learning_rate": 1.8956429109427855e-06, + "loss": 59.4919, + "step": 91810 + }, + { + "epoch": 0.3709644186055907, + "grad_norm": 459.4544372558594, + "learning_rate": 1.8945374301921393e-06, + "loss": 32.1201, + "step": 91820 + }, + { + "epoch": 0.37100481987095835, + "grad_norm": 340.3385925292969, + "learning_rate": 1.893432196528896e-06, + "loss": 60.1047, + "step": 91830 + }, + { + "epoch": 0.371045221136326, + "grad_norm": 647.6593017578125, + "learning_rate": 1.892327210040995e-06, + "loss": 34.1518, + "step": 91840 + }, + { + "epoch": 0.3710856224016936, + "grad_norm": 718.9778442382812, + "learning_rate": 1.8912224708163561e-06, + "loss": 34.7443, + "step": 91850 + }, + { + "epoch": 0.37112602366706127, + "grad_norm": 784.8222045898438, + "learning_rate": 1.890117978942878e-06, + "loss": 28.6377, + "step": 91860 + }, + { + "epoch": 0.3711664249324289, + "grad_norm": 482.3733825683594, + "learning_rate": 1.8890137345084392e-06, + "loss": 43.0016, + "step": 91870 + }, + { + "epoch": 0.3712068261977965, + "grad_norm": 1036.407470703125, + "learning_rate": 1.8879097376009009e-06, + "loss": 44.0674, + "step": 91880 + }, + { + "epoch": 0.37124722746316413, + "grad_norm": 554.4319458007812, + "learning_rate": 1.8868059883081015e-06, + "loss": 41.1695, + "step": 91890 + }, + { + "epoch": 0.37128762872853177, + "grad_norm": 642.7158203125, + "learning_rate": 1.8857024867178625e-06, + "loss": 29.2449, + "step": 91900 + }, + { + "epoch": 0.3713280299938994, + "grad_norm": 451.4582214355469, + "learning_rate": 1.8845992329179835e-06, + "loss": 41.4946, + "step": 91910 + }, + { + "epoch": 0.37136843125926705, + "grad_norm": 1148.1690673828125, + "learning_rate": 1.883496226996246e-06, + "loss": 31.4822, + "step": 91920 + }, + { + "epoch": 0.3714088325246347, + "grad_norm": 1108.7608642578125, + "learning_rate": 1.8823934690404106e-06, + "loss": 44.5192, + "step": 91930 + }, + { + "epoch": 0.3714492337900023, + "grad_norm": 407.66729736328125, + "learning_rate": 1.8812909591382195e-06, + "loss": 44.9309, + "step": 91940 + }, + { + "epoch": 0.3714896350553699, + "grad_norm": 833.0994262695312, + "learning_rate": 1.8801886973773936e-06, + "loss": 41.494, + "step": 91950 + }, + { + "epoch": 0.37153003632073756, + "grad_norm": 356.8611755371094, + "learning_rate": 1.8790866838456351e-06, + "loss": 32.0275, + "step": 91960 + }, + { + "epoch": 0.3715704375861052, + "grad_norm": 552.33837890625, + "learning_rate": 1.877984918630626e-06, + "loss": 43.8171, + "step": 91970 + }, + { + "epoch": 0.37161083885147284, + "grad_norm": 882.7359619140625, + "learning_rate": 1.876883401820029e-06, + "loss": 68.7707, + "step": 91980 + }, + { + "epoch": 0.3716512401168405, + "grad_norm": 222.77023315429688, + "learning_rate": 1.8757821335014858e-06, + "loss": 28.0252, + "step": 91990 + }, + { + "epoch": 0.3716916413822081, + "grad_norm": 569.3855590820312, + "learning_rate": 1.8746811137626208e-06, + "loss": 41.8199, + "step": 92000 + }, + { + "epoch": 0.3717320426475757, + "grad_norm": 337.5414123535156, + "learning_rate": 1.8735803426910366e-06, + "loss": 44.0779, + "step": 92010 + }, + { + "epoch": 0.37177244391294334, + "grad_norm": 307.907470703125, + "learning_rate": 1.8724798203743154e-06, + "loss": 34.9715, + "step": 92020 + }, + { + "epoch": 0.371812845178311, + "grad_norm": 410.8703308105469, + "learning_rate": 1.8713795469000218e-06, + "loss": 41.9369, + "step": 92030 + }, + { + "epoch": 0.3718532464436786, + "grad_norm": 526.17138671875, + "learning_rate": 1.8702795223556992e-06, + "loss": 41.1116, + "step": 92040 + }, + { + "epoch": 0.37189364770904626, + "grad_norm": 546.6736450195312, + "learning_rate": 1.8691797468288713e-06, + "loss": 40.5777, + "step": 92050 + }, + { + "epoch": 0.3719340489744139, + "grad_norm": 665.9561157226562, + "learning_rate": 1.8680802204070432e-06, + "loss": 52.4794, + "step": 92060 + }, + { + "epoch": 0.3719744502397815, + "grad_norm": 656.8494262695312, + "learning_rate": 1.8669809431776991e-06, + "loss": 43.0441, + "step": 92070 + }, + { + "epoch": 0.3720148515051491, + "grad_norm": 835.1004638671875, + "learning_rate": 1.8658819152283003e-06, + "loss": 54.4179, + "step": 92080 + }, + { + "epoch": 0.37205525277051676, + "grad_norm": 738.4459838867188, + "learning_rate": 1.8647831366462948e-06, + "loss": 39.4756, + "step": 92090 + }, + { + "epoch": 0.3720956540358844, + "grad_norm": 497.9049377441406, + "learning_rate": 1.8636846075191067e-06, + "loss": 34.005, + "step": 92100 + }, + { + "epoch": 0.37213605530125204, + "grad_norm": 334.0683898925781, + "learning_rate": 1.8625863279341406e-06, + "loss": 36.7819, + "step": 92110 + }, + { + "epoch": 0.3721764565666197, + "grad_norm": 432.1595764160156, + "learning_rate": 1.8614882979787818e-06, + "loss": 42.6485, + "step": 92120 + }, + { + "epoch": 0.3722168578319873, + "grad_norm": 563.0101318359375, + "learning_rate": 1.8603905177403953e-06, + "loss": 40.2115, + "step": 92130 + }, + { + "epoch": 0.3722572590973549, + "grad_norm": 603.9617919921875, + "learning_rate": 1.8592929873063259e-06, + "loss": 47.0243, + "step": 92140 + }, + { + "epoch": 0.37229766036272255, + "grad_norm": 250.31321716308594, + "learning_rate": 1.8581957067639e-06, + "loss": 45.3303, + "step": 92150 + }, + { + "epoch": 0.3723380616280902, + "grad_norm": 486.78363037109375, + "learning_rate": 1.8570986762004246e-06, + "loss": 43.6162, + "step": 92160 + }, + { + "epoch": 0.37237846289345783, + "grad_norm": 798.7283935546875, + "learning_rate": 1.8560018957031816e-06, + "loss": 35.2461, + "step": 92170 + }, + { + "epoch": 0.37241886415882547, + "grad_norm": 576.589599609375, + "learning_rate": 1.8549053653594373e-06, + "loss": 41.0202, + "step": 92180 + }, + { + "epoch": 0.3724592654241931, + "grad_norm": 590.5379028320312, + "learning_rate": 1.8538090852564405e-06, + "loss": 46.1836, + "step": 92190 + }, + { + "epoch": 0.3724996666895607, + "grad_norm": 313.347900390625, + "learning_rate": 1.852713055481416e-06, + "loss": 41.0, + "step": 92200 + }, + { + "epoch": 0.37254006795492833, + "grad_norm": 903.6192626953125, + "learning_rate": 1.8516172761215695e-06, + "loss": 46.3808, + "step": 92210 + }, + { + "epoch": 0.372580469220296, + "grad_norm": 626.2720947265625, + "learning_rate": 1.8505217472640868e-06, + "loss": 53.388, + "step": 92220 + }, + { + "epoch": 0.3726208704856636, + "grad_norm": 644.8720703125, + "learning_rate": 1.849426468996135e-06, + "loss": 38.492, + "step": 92230 + }, + { + "epoch": 0.37266127175103125, + "grad_norm": 750.325439453125, + "learning_rate": 1.8483314414048597e-06, + "loss": 37.5022, + "step": 92240 + }, + { + "epoch": 0.3727016730163989, + "grad_norm": 726.2249145507812, + "learning_rate": 1.8472366645773892e-06, + "loss": 46.9339, + "step": 92250 + }, + { + "epoch": 0.3727420742817665, + "grad_norm": 622.7699584960938, + "learning_rate": 1.846142138600826e-06, + "loss": 37.4274, + "step": 92260 + }, + { + "epoch": 0.3727824755471341, + "grad_norm": 666.1739501953125, + "learning_rate": 1.8450478635622592e-06, + "loss": 43.8214, + "step": 92270 + }, + { + "epoch": 0.37282287681250176, + "grad_norm": 818.9193115234375, + "learning_rate": 1.8439538395487528e-06, + "loss": 57.6135, + "step": 92280 + }, + { + "epoch": 0.3728632780778694, + "grad_norm": 784.80859375, + "learning_rate": 1.842860066647356e-06, + "loss": 40.4329, + "step": 92290 + }, + { + "epoch": 0.37290367934323704, + "grad_norm": 768.7633056640625, + "learning_rate": 1.841766544945095e-06, + "loss": 42.8496, + "step": 92300 + }, + { + "epoch": 0.3729440806086047, + "grad_norm": 563.5238037109375, + "learning_rate": 1.8406732745289757e-06, + "loss": 46.8466, + "step": 92310 + }, + { + "epoch": 0.3729844818739723, + "grad_norm": 678.9468383789062, + "learning_rate": 1.839580255485985e-06, + "loss": 52.8284, + "step": 92320 + }, + { + "epoch": 0.3730248831393399, + "grad_norm": 314.1755676269531, + "learning_rate": 1.83848748790309e-06, + "loss": 56.7375, + "step": 92330 + }, + { + "epoch": 0.37306528440470754, + "grad_norm": 756.5408325195312, + "learning_rate": 1.8373949718672345e-06, + "loss": 51.5074, + "step": 92340 + }, + { + "epoch": 0.3731056856700752, + "grad_norm": 402.6850891113281, + "learning_rate": 1.8363027074653473e-06, + "loss": 32.0473, + "step": 92350 + }, + { + "epoch": 0.3731460869354428, + "grad_norm": 610.68017578125, + "learning_rate": 1.835210694784334e-06, + "loss": 40.5902, + "step": 92360 + }, + { + "epoch": 0.37318648820081046, + "grad_norm": 776.7313842773438, + "learning_rate": 1.8341189339110793e-06, + "loss": 47.7953, + "step": 92370 + }, + { + "epoch": 0.3732268894661781, + "grad_norm": 171.4274444580078, + "learning_rate": 1.8330274249324537e-06, + "loss": 54.6801, + "step": 92380 + }, + { + "epoch": 0.3732672907315457, + "grad_norm": 527.5751953125, + "learning_rate": 1.831936167935301e-06, + "loss": 44.6827, + "step": 92390 + }, + { + "epoch": 0.3733076919969133, + "grad_norm": 542.8246459960938, + "learning_rate": 1.8308451630064484e-06, + "loss": 44.6548, + "step": 92400 + }, + { + "epoch": 0.37334809326228097, + "grad_norm": 558.8591918945312, + "learning_rate": 1.8297544102327014e-06, + "loss": 40.691, + "step": 92410 + }, + { + "epoch": 0.3733884945276486, + "grad_norm": 635.3528442382812, + "learning_rate": 1.8286639097008484e-06, + "loss": 46.9977, + "step": 92420 + }, + { + "epoch": 0.37342889579301625, + "grad_norm": 540.139892578125, + "learning_rate": 1.827573661497652e-06, + "loss": 44.0475, + "step": 92430 + }, + { + "epoch": 0.3734692970583839, + "grad_norm": 400.1092224121094, + "learning_rate": 1.8264836657098595e-06, + "loss": 35.4605, + "step": 92440 + }, + { + "epoch": 0.3735096983237515, + "grad_norm": 504.9743347167969, + "learning_rate": 1.8253939224241974e-06, + "loss": 41.2205, + "step": 92450 + }, + { + "epoch": 0.3735500995891191, + "grad_norm": 403.0384521484375, + "learning_rate": 1.8243044317273717e-06, + "loss": 49.5701, + "step": 92460 + }, + { + "epoch": 0.37359050085448675, + "grad_norm": 214.4297637939453, + "learning_rate": 1.823215193706066e-06, + "loss": 33.6296, + "step": 92470 + }, + { + "epoch": 0.3736309021198544, + "grad_norm": 435.58782958984375, + "learning_rate": 1.82212620844695e-06, + "loss": 60.6322, + "step": 92480 + }, + { + "epoch": 0.37367130338522203, + "grad_norm": 820.32373046875, + "learning_rate": 1.8210374760366662e-06, + "loss": 42.7006, + "step": 92490 + }, + { + "epoch": 0.37371170465058967, + "grad_norm": 488.87689208984375, + "learning_rate": 1.8199489965618433e-06, + "loss": 40.5734, + "step": 92500 + }, + { + "epoch": 0.3737521059159573, + "grad_norm": 1221.661865234375, + "learning_rate": 1.8188607701090827e-06, + "loss": 72.2834, + "step": 92510 + }, + { + "epoch": 0.3737925071813249, + "grad_norm": 712.658447265625, + "learning_rate": 1.8177727967649705e-06, + "loss": 45.4339, + "step": 92520 + }, + { + "epoch": 0.37383290844669254, + "grad_norm": 703.703369140625, + "learning_rate": 1.816685076616073e-06, + "loss": 59.1592, + "step": 92530 + }, + { + "epoch": 0.3738733097120602, + "grad_norm": 384.11309814453125, + "learning_rate": 1.8155976097489342e-06, + "loss": 46.3461, + "step": 92540 + }, + { + "epoch": 0.3739137109774278, + "grad_norm": 322.06011962890625, + "learning_rate": 1.8145103962500792e-06, + "loss": 36.9918, + "step": 92550 + }, + { + "epoch": 0.37395411224279546, + "grad_norm": 349.7514953613281, + "learning_rate": 1.8134234362060128e-06, + "loss": 42.063, + "step": 92560 + }, + { + "epoch": 0.3739945135081631, + "grad_norm": 679.798828125, + "learning_rate": 1.8123367297032175e-06, + "loss": 40.9792, + "step": 92570 + }, + { + "epoch": 0.3740349147735307, + "grad_norm": 891.4302978515625, + "learning_rate": 1.8112502768281608e-06, + "loss": 49.7337, + "step": 92580 + }, + { + "epoch": 0.3740753160388983, + "grad_norm": 359.6378479003906, + "learning_rate": 1.810164077667287e-06, + "loss": 32.4731, + "step": 92590 + }, + { + "epoch": 0.37411571730426596, + "grad_norm": 348.2173767089844, + "learning_rate": 1.809078132307016e-06, + "loss": 29.6351, + "step": 92600 + }, + { + "epoch": 0.3741561185696336, + "grad_norm": 579.4752197265625, + "learning_rate": 1.807992440833754e-06, + "loss": 38.418, + "step": 92610 + }, + { + "epoch": 0.37419651983500124, + "grad_norm": 738.5474853515625, + "learning_rate": 1.8069070033338842e-06, + "loss": 23.2401, + "step": 92620 + }, + { + "epoch": 0.3742369211003689, + "grad_norm": 619.52978515625, + "learning_rate": 1.8058218198937695e-06, + "loss": 51.5335, + "step": 92630 + }, + { + "epoch": 0.3742773223657365, + "grad_norm": 578.2020874023438, + "learning_rate": 1.8047368905997536e-06, + "loss": 30.5904, + "step": 92640 + }, + { + "epoch": 0.3743177236311041, + "grad_norm": 55.084449768066406, + "learning_rate": 1.8036522155381592e-06, + "loss": 44.467, + "step": 92650 + }, + { + "epoch": 0.37435812489647174, + "grad_norm": 323.4886474609375, + "learning_rate": 1.8025677947952879e-06, + "loss": 45.4456, + "step": 92660 + }, + { + "epoch": 0.3743985261618394, + "grad_norm": 235.45973205566406, + "learning_rate": 1.8014836284574223e-06, + "loss": 44.9233, + "step": 92670 + }, + { + "epoch": 0.374438927427207, + "grad_norm": 758.6615600585938, + "learning_rate": 1.8003997166108278e-06, + "loss": 31.2891, + "step": 92680 + }, + { + "epoch": 0.37447932869257466, + "grad_norm": 692.9884033203125, + "learning_rate": 1.7993160593417424e-06, + "loss": 28.9124, + "step": 92690 + }, + { + "epoch": 0.3745197299579423, + "grad_norm": 354.16278076171875, + "learning_rate": 1.798232656736389e-06, + "loss": 29.815, + "step": 92700 + }, + { + "epoch": 0.3745601312233099, + "grad_norm": 709.9598388671875, + "learning_rate": 1.7971495088809688e-06, + "loss": 42.7331, + "step": 92710 + }, + { + "epoch": 0.37460053248867753, + "grad_norm": 788.5820922851562, + "learning_rate": 1.796066615861663e-06, + "loss": 34.2348, + "step": 92720 + }, + { + "epoch": 0.37464093375404517, + "grad_norm": 347.8127136230469, + "learning_rate": 1.7949839777646327e-06, + "loss": 33.6161, + "step": 92730 + }, + { + "epoch": 0.3746813350194128, + "grad_norm": 483.2420349121094, + "learning_rate": 1.7939015946760186e-06, + "loss": 41.9075, + "step": 92740 + }, + { + "epoch": 0.37472173628478045, + "grad_norm": 424.3015441894531, + "learning_rate": 1.7928194666819398e-06, + "loss": 46.9531, + "step": 92750 + }, + { + "epoch": 0.3747621375501481, + "grad_norm": 582.9439697265625, + "learning_rate": 1.7917375938684979e-06, + "loss": 28.903, + "step": 92760 + }, + { + "epoch": 0.37480253881551573, + "grad_norm": 697.3135375976562, + "learning_rate": 1.7906559763217713e-06, + "loss": 43.9791, + "step": 92770 + }, + { + "epoch": 0.3748429400808833, + "grad_norm": 733.6925659179688, + "learning_rate": 1.7895746141278198e-06, + "loss": 45.0568, + "step": 92780 + }, + { + "epoch": 0.37488334134625095, + "grad_norm": 426.135498046875, + "learning_rate": 1.7884935073726822e-06, + "loss": 46.5901, + "step": 92790 + }, + { + "epoch": 0.3749237426116186, + "grad_norm": 500.1527099609375, + "learning_rate": 1.7874126561423771e-06, + "loss": 47.9253, + "step": 92800 + }, + { + "epoch": 0.37496414387698623, + "grad_norm": 129.91612243652344, + "learning_rate": 1.786332060522904e-06, + "loss": 45.9043, + "step": 92810 + }, + { + "epoch": 0.3750045451423539, + "grad_norm": 514.854736328125, + "learning_rate": 1.7852517206002396e-06, + "loss": 32.7294, + "step": 92820 + }, + { + "epoch": 0.3750449464077215, + "grad_norm": 657.1265258789062, + "learning_rate": 1.7841716364603423e-06, + "loss": 37.4753, + "step": 92830 + }, + { + "epoch": 0.3750853476730891, + "grad_norm": 806.763916015625, + "learning_rate": 1.783091808189149e-06, + "loss": 43.7356, + "step": 92840 + }, + { + "epoch": 0.37512574893845674, + "grad_norm": 445.2367858886719, + "learning_rate": 1.7820122358725772e-06, + "loss": 46.6244, + "step": 92850 + }, + { + "epoch": 0.3751661502038244, + "grad_norm": 181.48643493652344, + "learning_rate": 1.780932919596523e-06, + "loss": 27.6955, + "step": 92860 + }, + { + "epoch": 0.375206551469192, + "grad_norm": 853.9912719726562, + "learning_rate": 1.779853859446863e-06, + "loss": 52.9919, + "step": 92870 + }, + { + "epoch": 0.37524695273455966, + "grad_norm": 575.7948608398438, + "learning_rate": 1.778775055509453e-06, + "loss": 35.4131, + "step": 92880 + }, + { + "epoch": 0.3752873539999273, + "grad_norm": 772.3094482421875, + "learning_rate": 1.777696507870128e-06, + "loss": 57.1095, + "step": 92890 + }, + { + "epoch": 0.3753277552652949, + "grad_norm": 674.185546875, + "learning_rate": 1.776618216614704e-06, + "loss": 37.1195, + "step": 92900 + }, + { + "epoch": 0.3753681565306625, + "grad_norm": 735.7659301757812, + "learning_rate": 1.7755401818289748e-06, + "loss": 48.0352, + "step": 92910 + }, + { + "epoch": 0.37540855779603016, + "grad_norm": 256.3131408691406, + "learning_rate": 1.774462403598715e-06, + "loss": 37.8858, + "step": 92920 + }, + { + "epoch": 0.3754489590613978, + "grad_norm": 484.47283935546875, + "learning_rate": 1.7733848820096789e-06, + "loss": 41.405, + "step": 92930 + }, + { + "epoch": 0.37548936032676544, + "grad_norm": 271.2965393066406, + "learning_rate": 1.7723076171475995e-06, + "loss": 32.3131, + "step": 92940 + }, + { + "epoch": 0.3755297615921331, + "grad_norm": 523.7734375, + "learning_rate": 1.7712306090981896e-06, + "loss": 34.7863, + "step": 92950 + }, + { + "epoch": 0.3755701628575007, + "grad_norm": 724.2642822265625, + "learning_rate": 1.7701538579471423e-06, + "loss": 45.1211, + "step": 92960 + }, + { + "epoch": 0.3756105641228683, + "grad_norm": 291.0926513671875, + "learning_rate": 1.7690773637801295e-06, + "loss": 39.4207, + "step": 92970 + }, + { + "epoch": 0.37565096538823595, + "grad_norm": 673.997314453125, + "learning_rate": 1.768001126682803e-06, + "loss": 59.2063, + "step": 92980 + }, + { + "epoch": 0.3756913666536036, + "grad_norm": 1037.6884765625, + "learning_rate": 1.7669251467407938e-06, + "loss": 26.6047, + "step": 92990 + }, + { + "epoch": 0.3757317679189712, + "grad_norm": 931.728759765625, + "learning_rate": 1.7658494240397127e-06, + "loss": 50.3487, + "step": 93000 + }, + { + "epoch": 0.37577216918433887, + "grad_norm": 617.3193969726562, + "learning_rate": 1.7647739586651508e-06, + "loss": 38.8255, + "step": 93010 + }, + { + "epoch": 0.3758125704497065, + "grad_norm": 591.8453979492188, + "learning_rate": 1.7636987507026787e-06, + "loss": 51.4436, + "step": 93020 + }, + { + "epoch": 0.3758529717150741, + "grad_norm": 428.7930603027344, + "learning_rate": 1.762623800237841e-06, + "loss": 44.514, + "step": 93030 + }, + { + "epoch": 0.37589337298044173, + "grad_norm": 644.0867309570312, + "learning_rate": 1.7615491073561714e-06, + "loss": 52.178, + "step": 93040 + }, + { + "epoch": 0.37593377424580937, + "grad_norm": 565.926025390625, + "learning_rate": 1.760474672143177e-06, + "loss": 30.3734, + "step": 93050 + }, + { + "epoch": 0.375974175511177, + "grad_norm": 1001.1051635742188, + "learning_rate": 1.7594004946843458e-06, + "loss": 62.9556, + "step": 93060 + }, + { + "epoch": 0.37601457677654465, + "grad_norm": 1308.181884765625, + "learning_rate": 1.7583265750651446e-06, + "loss": 55.2899, + "step": 93070 + }, + { + "epoch": 0.3760549780419123, + "grad_norm": 611.5211181640625, + "learning_rate": 1.7572529133710204e-06, + "loss": 45.4431, + "step": 93080 + }, + { + "epoch": 0.37609537930727993, + "grad_norm": 416.8019104003906, + "learning_rate": 1.7561795096874002e-06, + "loss": 57.3495, + "step": 93090 + }, + { + "epoch": 0.3761357805726475, + "grad_norm": 222.11227416992188, + "learning_rate": 1.755106364099689e-06, + "loss": 41.9647, + "step": 93100 + }, + { + "epoch": 0.37617618183801516, + "grad_norm": 804.1954345703125, + "learning_rate": 1.7540334766932738e-06, + "loss": 60.3842, + "step": 93110 + }, + { + "epoch": 0.3762165831033828, + "grad_norm": 446.4356994628906, + "learning_rate": 1.7529608475535165e-06, + "loss": 40.3711, + "step": 93120 + }, + { + "epoch": 0.37625698436875044, + "grad_norm": 681.1730346679688, + "learning_rate": 1.7518884767657612e-06, + "loss": 73.1313, + "step": 93130 + }, + { + "epoch": 0.3762973856341181, + "grad_norm": 540.1654663085938, + "learning_rate": 1.7508163644153342e-06, + "loss": 40.5221, + "step": 93140 + }, + { + "epoch": 0.3763377868994857, + "grad_norm": 765.6929321289062, + "learning_rate": 1.7497445105875377e-06, + "loss": 50.165, + "step": 93150 + }, + { + "epoch": 0.3763781881648533, + "grad_norm": 246.20120239257812, + "learning_rate": 1.7486729153676536e-06, + "loss": 21.7108, + "step": 93160 + }, + { + "epoch": 0.37641858943022094, + "grad_norm": 367.9482727050781, + "learning_rate": 1.7476015788409439e-06, + "loss": 36.8506, + "step": 93170 + }, + { + "epoch": 0.3764589906955886, + "grad_norm": 562.669677734375, + "learning_rate": 1.7465305010926503e-06, + "loss": 35.8054, + "step": 93180 + }, + { + "epoch": 0.3764993919609562, + "grad_norm": 644.0034790039062, + "learning_rate": 1.745459682207995e-06, + "loss": 35.9354, + "step": 93190 + }, + { + "epoch": 0.37653979322632386, + "grad_norm": 548.04052734375, + "learning_rate": 1.7443891222721749e-06, + "loss": 25.4771, + "step": 93200 + }, + { + "epoch": 0.3765801944916915, + "grad_norm": 531.5404663085938, + "learning_rate": 1.7433188213703712e-06, + "loss": 40.387, + "step": 93210 + }, + { + "epoch": 0.3766205957570591, + "grad_norm": 481.58502197265625, + "learning_rate": 1.7422487795877424e-06, + "loss": 45.3946, + "step": 93220 + }, + { + "epoch": 0.3766609970224267, + "grad_norm": 1088.45654296875, + "learning_rate": 1.7411789970094257e-06, + "loss": 44.2268, + "step": 93230 + }, + { + "epoch": 0.37670139828779436, + "grad_norm": 716.853271484375, + "learning_rate": 1.7401094737205415e-06, + "loss": 70.572, + "step": 93240 + }, + { + "epoch": 0.376741799553162, + "grad_norm": 666.8670043945312, + "learning_rate": 1.739040209806186e-06, + "loss": 56.1082, + "step": 93250 + }, + { + "epoch": 0.37678220081852964, + "grad_norm": 255.77133178710938, + "learning_rate": 1.7379712053514352e-06, + "loss": 28.3571, + "step": 93260 + }, + { + "epoch": 0.3768226020838973, + "grad_norm": 473.16607666015625, + "learning_rate": 1.736902460441345e-06, + "loss": 20.6927, + "step": 93270 + }, + { + "epoch": 0.3768630033492649, + "grad_norm": 417.4438171386719, + "learning_rate": 1.735833975160952e-06, + "loss": 42.4167, + "step": 93280 + }, + { + "epoch": 0.3769034046146325, + "grad_norm": 463.9417724609375, + "learning_rate": 1.7347657495952675e-06, + "loss": 36.2839, + "step": 93290 + }, + { + "epoch": 0.37694380588000015, + "grad_norm": 874.1891479492188, + "learning_rate": 1.7336977838292867e-06, + "loss": 43.4433, + "step": 93300 + }, + { + "epoch": 0.3769842071453678, + "grad_norm": 634.763916015625, + "learning_rate": 1.7326300779479826e-06, + "loss": 37.5954, + "step": 93310 + }, + { + "epoch": 0.37702460841073543, + "grad_norm": 394.0238342285156, + "learning_rate": 1.731562632036307e-06, + "loss": 38.361, + "step": 93320 + }, + { + "epoch": 0.37706500967610307, + "grad_norm": 667.3516845703125, + "learning_rate": 1.730495446179194e-06, + "loss": 39.4061, + "step": 93330 + }, + { + "epoch": 0.3771054109414707, + "grad_norm": 703.3050537109375, + "learning_rate": 1.7294285204615536e-06, + "loss": 57.0635, + "step": 93340 + }, + { + "epoch": 0.3771458122068383, + "grad_norm": 562.1600341796875, + "learning_rate": 1.7283618549682757e-06, + "loss": 33.8003, + "step": 93350 + }, + { + "epoch": 0.37718621347220593, + "grad_norm": 707.1860961914062, + "learning_rate": 1.727295449784232e-06, + "loss": 35.2757, + "step": 93360 + }, + { + "epoch": 0.3772266147375736, + "grad_norm": 540.1780395507812, + "learning_rate": 1.726229304994268e-06, + "loss": 41.1969, + "step": 93370 + }, + { + "epoch": 0.3772670160029412, + "grad_norm": 282.990234375, + "learning_rate": 1.7251634206832135e-06, + "loss": 44.1028, + "step": 93380 + }, + { + "epoch": 0.37730741726830885, + "grad_norm": 473.59356689453125, + "learning_rate": 1.7240977969358757e-06, + "loss": 27.7578, + "step": 93390 + }, + { + "epoch": 0.3773478185336765, + "grad_norm": 718.213134765625, + "learning_rate": 1.7230324338370425e-06, + "loss": 38.0047, + "step": 93400 + }, + { + "epoch": 0.37738821979904413, + "grad_norm": 972.40283203125, + "learning_rate": 1.721967331471479e-06, + "loss": 41.0903, + "step": 93410 + }, + { + "epoch": 0.3774286210644117, + "grad_norm": 570.8587036132812, + "learning_rate": 1.7209024899239297e-06, + "loss": 46.1088, + "step": 93420 + }, + { + "epoch": 0.37746902232977936, + "grad_norm": 2588.669189453125, + "learning_rate": 1.7198379092791213e-06, + "loss": 43.5806, + "step": 93430 + }, + { + "epoch": 0.377509423595147, + "grad_norm": 710.2998657226562, + "learning_rate": 1.7187735896217567e-06, + "loss": 30.8053, + "step": 93440 + }, + { + "epoch": 0.37754982486051464, + "grad_norm": 534.6858520507812, + "learning_rate": 1.7177095310365205e-06, + "loss": 34.1006, + "step": 93450 + }, + { + "epoch": 0.3775902261258823, + "grad_norm": 371.9433898925781, + "learning_rate": 1.7166457336080716e-06, + "loss": 32.8527, + "step": 93460 + }, + { + "epoch": 0.3776306273912499, + "grad_norm": 568.0192260742188, + "learning_rate": 1.715582197421053e-06, + "loss": 39.4708, + "step": 93470 + }, + { + "epoch": 0.3776710286566175, + "grad_norm": 374.3291320800781, + "learning_rate": 1.7145189225600856e-06, + "loss": 35.2344, + "step": 93480 + }, + { + "epoch": 0.37771142992198514, + "grad_norm": 666.5590209960938, + "learning_rate": 1.7134559091097691e-06, + "loss": 33.7738, + "step": 93490 + }, + { + "epoch": 0.3777518311873528, + "grad_norm": 950.9321899414062, + "learning_rate": 1.7123931571546826e-06, + "loss": 37.9408, + "step": 93500 + }, + { + "epoch": 0.3777922324527204, + "grad_norm": 535.5814208984375, + "learning_rate": 1.711330666779385e-06, + "loss": 57.5564, + "step": 93510 + }, + { + "epoch": 0.37783263371808806, + "grad_norm": 379.9372253417969, + "learning_rate": 1.7102684380684109e-06, + "loss": 37.9954, + "step": 93520 + }, + { + "epoch": 0.3778730349834557, + "grad_norm": 724.26220703125, + "learning_rate": 1.7092064711062816e-06, + "loss": 70.6237, + "step": 93530 + }, + { + "epoch": 0.3779134362488233, + "grad_norm": 755.5435791015625, + "learning_rate": 1.708144765977492e-06, + "loss": 33.0022, + "step": 93540 + }, + { + "epoch": 0.3779538375141909, + "grad_norm": 487.3462219238281, + "learning_rate": 1.7070833227665146e-06, + "loss": 53.9496, + "step": 93550 + }, + { + "epoch": 0.37799423877955857, + "grad_norm": 503.11114501953125, + "learning_rate": 1.7060221415578042e-06, + "loss": 33.7394, + "step": 93560 + }, + { + "epoch": 0.3780346400449262, + "grad_norm": 771.644287109375, + "learning_rate": 1.7049612224357954e-06, + "loss": 40.8785, + "step": 93570 + }, + { + "epoch": 0.37807504131029385, + "grad_norm": 934.612060546875, + "learning_rate": 1.703900565484899e-06, + "loss": 39.6854, + "step": 93580 + }, + { + "epoch": 0.3781154425756615, + "grad_norm": 567.306640625, + "learning_rate": 1.7028401707895082e-06, + "loss": 51.0686, + "step": 93590 + }, + { + "epoch": 0.3781558438410291, + "grad_norm": 150.63609313964844, + "learning_rate": 1.7017800384339928e-06, + "loss": 32.8338, + "step": 93600 + }, + { + "epoch": 0.3781962451063967, + "grad_norm": 863.6294555664062, + "learning_rate": 1.700720168502703e-06, + "loss": 52.1523, + "step": 93610 + }, + { + "epoch": 0.37823664637176435, + "grad_norm": 652.1657104492188, + "learning_rate": 1.6996605610799682e-06, + "loss": 30.39, + "step": 93620 + }, + { + "epoch": 0.378277047637132, + "grad_norm": 402.42376708984375, + "learning_rate": 1.6986012162500953e-06, + "loss": 28.3965, + "step": 93630 + }, + { + "epoch": 0.37831744890249963, + "grad_norm": 489.7448425292969, + "learning_rate": 1.697542134097373e-06, + "loss": 60.2797, + "step": 93640 + }, + { + "epoch": 0.37835785016786727, + "grad_norm": 696.7704467773438, + "learning_rate": 1.6964833147060661e-06, + "loss": 42.7316, + "step": 93650 + }, + { + "epoch": 0.3783982514332349, + "grad_norm": 561.4448852539062, + "learning_rate": 1.6954247581604216e-06, + "loss": 37.4174, + "step": 93660 + }, + { + "epoch": 0.3784386526986025, + "grad_norm": 652.9007568359375, + "learning_rate": 1.6943664645446622e-06, + "loss": 50.4149, + "step": 93670 + }, + { + "epoch": 0.37847905396397014, + "grad_norm": 342.3523864746094, + "learning_rate": 1.6933084339429935e-06, + "loss": 40.2572, + "step": 93680 + }, + { + "epoch": 0.3785194552293378, + "grad_norm": 428.1909484863281, + "learning_rate": 1.692250666439596e-06, + "loss": 25.0063, + "step": 93690 + }, + { + "epoch": 0.3785598564947054, + "grad_norm": 569.578369140625, + "learning_rate": 1.6911931621186329e-06, + "loss": 45.803, + "step": 93700 + }, + { + "epoch": 0.37860025776007306, + "grad_norm": 1233.8858642578125, + "learning_rate": 1.6901359210642444e-06, + "loss": 49.5512, + "step": 93710 + }, + { + "epoch": 0.3786406590254407, + "grad_norm": 615.4265747070312, + "learning_rate": 1.6890789433605508e-06, + "loss": 44.6923, + "step": 93720 + }, + { + "epoch": 0.37868106029080834, + "grad_norm": 457.422119140625, + "learning_rate": 1.6880222290916503e-06, + "loss": 51.6429, + "step": 93730 + }, + { + "epoch": 0.3787214615561759, + "grad_norm": 483.12017822265625, + "learning_rate": 1.686965778341621e-06, + "loss": 54.6015, + "step": 93740 + }, + { + "epoch": 0.37876186282154356, + "grad_norm": 637.249755859375, + "learning_rate": 1.68590959119452e-06, + "loss": 38.6967, + "step": 93750 + }, + { + "epoch": 0.3788022640869112, + "grad_norm": 549.8311767578125, + "learning_rate": 1.6848536677343836e-06, + "loss": 43.5703, + "step": 93760 + }, + { + "epoch": 0.37884266535227884, + "grad_norm": 885.5505981445312, + "learning_rate": 1.683798008045226e-06, + "loss": 37.328, + "step": 93770 + }, + { + "epoch": 0.3788830666176465, + "grad_norm": 344.5069274902344, + "learning_rate": 1.6827426122110412e-06, + "loss": 44.6456, + "step": 93780 + }, + { + "epoch": 0.3789234678830141, + "grad_norm": 464.1234436035156, + "learning_rate": 1.6816874803158034e-06, + "loss": 71.6938, + "step": 93790 + }, + { + "epoch": 0.3789638691483817, + "grad_norm": 640.6181030273438, + "learning_rate": 1.6806326124434634e-06, + "loss": 61.3986, + "step": 93800 + }, + { + "epoch": 0.37900427041374934, + "grad_norm": 549.3226318359375, + "learning_rate": 1.679578008677953e-06, + "loss": 46.0698, + "step": 93810 + }, + { + "epoch": 0.379044671679117, + "grad_norm": 782.4042358398438, + "learning_rate": 1.6785236691031808e-06, + "loss": 57.5303, + "step": 93820 + }, + { + "epoch": 0.3790850729444846, + "grad_norm": 581.6171875, + "learning_rate": 1.6774695938030378e-06, + "loss": 54.6885, + "step": 93830 + }, + { + "epoch": 0.37912547420985226, + "grad_norm": 858.0625, + "learning_rate": 1.6764157828613902e-06, + "loss": 40.6032, + "step": 93840 + }, + { + "epoch": 0.3791658754752199, + "grad_norm": 550.3126220703125, + "learning_rate": 1.675362236362086e-06, + "loss": 80.7207, + "step": 93850 + }, + { + "epoch": 0.3792062767405875, + "grad_norm": 340.3455810546875, + "learning_rate": 1.6743089543889502e-06, + "loss": 35.5949, + "step": 93860 + }, + { + "epoch": 0.37924667800595513, + "grad_norm": 271.68463134765625, + "learning_rate": 1.6732559370257884e-06, + "loss": 42.7064, + "step": 93870 + }, + { + "epoch": 0.37928707927132277, + "grad_norm": 528.7034912109375, + "learning_rate": 1.6722031843563836e-06, + "loss": 43.5807, + "step": 93880 + }, + { + "epoch": 0.3793274805366904, + "grad_norm": 365.6889953613281, + "learning_rate": 1.6711506964644992e-06, + "loss": 41.8635, + "step": 93890 + }, + { + "epoch": 0.37936788180205805, + "grad_norm": 153.63197326660156, + "learning_rate": 1.6700984734338765e-06, + "loss": 30.3246, + "step": 93900 + }, + { + "epoch": 0.3794082830674257, + "grad_norm": 293.03607177734375, + "learning_rate": 1.669046515348236e-06, + "loss": 38.6674, + "step": 93910 + }, + { + "epoch": 0.37944868433279333, + "grad_norm": 653.3015747070312, + "learning_rate": 1.6679948222912773e-06, + "loss": 39.9429, + "step": 93920 + }, + { + "epoch": 0.3794890855981609, + "grad_norm": 658.6498413085938, + "learning_rate": 1.6669433943466789e-06, + "loss": 43.2089, + "step": 93930 + }, + { + "epoch": 0.37952948686352855, + "grad_norm": 335.7240905761719, + "learning_rate": 1.6658922315980975e-06, + "loss": 33.1409, + "step": 93940 + }, + { + "epoch": 0.3795698881288962, + "grad_norm": 508.2664489746094, + "learning_rate": 1.6648413341291703e-06, + "loss": 47.2963, + "step": 93950 + }, + { + "epoch": 0.37961028939426383, + "grad_norm": 690.1048583984375, + "learning_rate": 1.6637907020235117e-06, + "loss": 57.5872, + "step": 93960 + }, + { + "epoch": 0.3796506906596315, + "grad_norm": 659.0160522460938, + "learning_rate": 1.662740335364717e-06, + "loss": 39.8609, + "step": 93970 + }, + { + "epoch": 0.3796910919249991, + "grad_norm": 615.6521606445312, + "learning_rate": 1.661690234236355e-06, + "loss": 29.5961, + "step": 93980 + }, + { + "epoch": 0.3797314931903667, + "grad_norm": 206.93682861328125, + "learning_rate": 1.6606403987219815e-06, + "loss": 38.3237, + "step": 93990 + }, + { + "epoch": 0.37977189445573434, + "grad_norm": 464.7280578613281, + "learning_rate": 1.6595908289051266e-06, + "loss": 53.9895, + "step": 94000 + }, + { + "epoch": 0.379812295721102, + "grad_norm": 509.655517578125, + "learning_rate": 1.6585415248692988e-06, + "loss": 30.5599, + "step": 94010 + }, + { + "epoch": 0.3798526969864696, + "grad_norm": 773.5880126953125, + "learning_rate": 1.6574924866979863e-06, + "loss": 32.8057, + "step": 94020 + }, + { + "epoch": 0.37989309825183726, + "grad_norm": 350.2411193847656, + "learning_rate": 1.6564437144746564e-06, + "loss": 35.1408, + "step": 94030 + }, + { + "epoch": 0.3799334995172049, + "grad_norm": 837.4682006835938, + "learning_rate": 1.6553952082827562e-06, + "loss": 49.9956, + "step": 94040 + }, + { + "epoch": 0.37997390078257254, + "grad_norm": 509.6433410644531, + "learning_rate": 1.6543469682057105e-06, + "loss": 54.7281, + "step": 94050 + }, + { + "epoch": 0.3800143020479401, + "grad_norm": 472.0282897949219, + "learning_rate": 1.6532989943269207e-06, + "loss": 37.2688, + "step": 94060 + }, + { + "epoch": 0.38005470331330776, + "grad_norm": 635.9703979492188, + "learning_rate": 1.6522512867297707e-06, + "loss": 45.623, + "step": 94070 + }, + { + "epoch": 0.3800951045786754, + "grad_norm": 397.69342041015625, + "learning_rate": 1.6512038454976198e-06, + "loss": 43.5267, + "step": 94080 + }, + { + "epoch": 0.38013550584404304, + "grad_norm": 46.28896713256836, + "learning_rate": 1.6501566707138116e-06, + "loss": 22.4504, + "step": 94090 + }, + { + "epoch": 0.3801759071094107, + "grad_norm": 543.3253784179688, + "learning_rate": 1.6491097624616637e-06, + "loss": 35.8103, + "step": 94100 + }, + { + "epoch": 0.3802163083747783, + "grad_norm": 1130.6624755859375, + "learning_rate": 1.6480631208244735e-06, + "loss": 44.4341, + "step": 94110 + }, + { + "epoch": 0.3802567096401459, + "grad_norm": 398.57989501953125, + "learning_rate": 1.6470167458855174e-06, + "loss": 34.3871, + "step": 94120 + }, + { + "epoch": 0.38029711090551355, + "grad_norm": 889.4392700195312, + "learning_rate": 1.645970637728051e-06, + "loss": 75.1114, + "step": 94130 + }, + { + "epoch": 0.3803375121708812, + "grad_norm": 800.8850708007812, + "learning_rate": 1.6449247964353094e-06, + "loss": 43.0828, + "step": 94140 + }, + { + "epoch": 0.3803779134362488, + "grad_norm": 571.4644165039062, + "learning_rate": 1.643879222090502e-06, + "loss": 33.5428, + "step": 94150 + }, + { + "epoch": 0.38041831470161647, + "grad_norm": 795.34716796875, + "learning_rate": 1.642833914776823e-06, + "loss": 34.8854, + "step": 94160 + }, + { + "epoch": 0.3804587159669841, + "grad_norm": 645.9882202148438, + "learning_rate": 1.6417888745774418e-06, + "loss": 35.6082, + "step": 94170 + }, + { + "epoch": 0.3804991172323517, + "grad_norm": 530.788330078125, + "learning_rate": 1.640744101575506e-06, + "loss": 51.9296, + "step": 94180 + }, + { + "epoch": 0.38053951849771933, + "grad_norm": 446.3039855957031, + "learning_rate": 1.6396995958541468e-06, + "loss": 47.5382, + "step": 94190 + }, + { + "epoch": 0.38057991976308697, + "grad_norm": 371.99151611328125, + "learning_rate": 1.6386553574964691e-06, + "loss": 29.8966, + "step": 94200 + }, + { + "epoch": 0.3806203210284546, + "grad_norm": 950.2408447265625, + "learning_rate": 1.6376113865855585e-06, + "loss": 49.887, + "step": 94210 + }, + { + "epoch": 0.38066072229382225, + "grad_norm": 445.62725830078125, + "learning_rate": 1.6365676832044796e-06, + "loss": 45.9934, + "step": 94220 + }, + { + "epoch": 0.3807011235591899, + "grad_norm": 718.4339599609375, + "learning_rate": 1.6355242474362732e-06, + "loss": 30.1935, + "step": 94230 + }, + { + "epoch": 0.38074152482455753, + "grad_norm": 719.7804565429688, + "learning_rate": 1.634481079363961e-06, + "loss": 39.0847, + "step": 94240 + }, + { + "epoch": 0.3807819260899251, + "grad_norm": 529.1168212890625, + "learning_rate": 1.6334381790705439e-06, + "loss": 71.6443, + "step": 94250 + }, + { + "epoch": 0.38082232735529276, + "grad_norm": 499.4634094238281, + "learning_rate": 1.6323955466390001e-06, + "loss": 30.6561, + "step": 94260 + }, + { + "epoch": 0.3808627286206604, + "grad_norm": 327.93621826171875, + "learning_rate": 1.6313531821522876e-06, + "loss": 39.9715, + "step": 94270 + }, + { + "epoch": 0.38090312988602804, + "grad_norm": 888.1876831054688, + "learning_rate": 1.6303110856933413e-06, + "loss": 33.8202, + "step": 94280 + }, + { + "epoch": 0.3809435311513957, + "grad_norm": 438.6484680175781, + "learning_rate": 1.629269257345078e-06, + "loss": 38.3931, + "step": 94290 + }, + { + "epoch": 0.3809839324167633, + "grad_norm": 420.45562744140625, + "learning_rate": 1.628227697190391e-06, + "loss": 42.586, + "step": 94300 + }, + { + "epoch": 0.3810243336821309, + "grad_norm": 732.1066284179688, + "learning_rate": 1.6271864053121528e-06, + "loss": 35.4111, + "step": 94310 + }, + { + "epoch": 0.38106473494749854, + "grad_norm": 625.7940063476562, + "learning_rate": 1.6261453817932122e-06, + "loss": 37.4915, + "step": 94320 + }, + { + "epoch": 0.3811051362128662, + "grad_norm": 635.6824340820312, + "learning_rate": 1.6251046267163988e-06, + "loss": 33.4941, + "step": 94330 + }, + { + "epoch": 0.3811455374782338, + "grad_norm": 484.083984375, + "learning_rate": 1.6240641401645224e-06, + "loss": 42.375, + "step": 94340 + }, + { + "epoch": 0.38118593874360146, + "grad_norm": 810.295654296875, + "learning_rate": 1.6230239222203687e-06, + "loss": 57.4285, + "step": 94350 + }, + { + "epoch": 0.3812263400089691, + "grad_norm": 357.74725341796875, + "learning_rate": 1.621983972966703e-06, + "loss": 44.1577, + "step": 94360 + }, + { + "epoch": 0.3812667412743367, + "grad_norm": 466.22796630859375, + "learning_rate": 1.6209442924862684e-06, + "loss": 48.3063, + "step": 94370 + }, + { + "epoch": 0.3813071425397043, + "grad_norm": 590.7434692382812, + "learning_rate": 1.6199048808617896e-06, + "loss": 39.6602, + "step": 94380 + }, + { + "epoch": 0.38134754380507196, + "grad_norm": 316.4775085449219, + "learning_rate": 1.6188657381759676e-06, + "loss": 31.71, + "step": 94390 + }, + { + "epoch": 0.3813879450704396, + "grad_norm": 1969.42626953125, + "learning_rate": 1.6178268645114826e-06, + "loss": 42.9319, + "step": 94400 + }, + { + "epoch": 0.38142834633580724, + "grad_norm": 799.62744140625, + "learning_rate": 1.6167882599509904e-06, + "loss": 39.8405, + "step": 94410 + }, + { + "epoch": 0.3814687476011749, + "grad_norm": 479.5269775390625, + "learning_rate": 1.6157499245771296e-06, + "loss": 41.1756, + "step": 94420 + }, + { + "epoch": 0.3815091488665425, + "grad_norm": 762.2589721679688, + "learning_rate": 1.6147118584725163e-06, + "loss": 42.6559, + "step": 94430 + }, + { + "epoch": 0.3815495501319101, + "grad_norm": 319.53082275390625, + "learning_rate": 1.6136740617197433e-06, + "loss": 40.6062, + "step": 94440 + }, + { + "epoch": 0.38158995139727775, + "grad_norm": 497.3779296875, + "learning_rate": 1.612636534401384e-06, + "loss": 34.069, + "step": 94450 + }, + { + "epoch": 0.3816303526626454, + "grad_norm": 530.7467041015625, + "learning_rate": 1.61159927659999e-06, + "loss": 28.3256, + "step": 94460 + }, + { + "epoch": 0.38167075392801303, + "grad_norm": 1004.8090209960938, + "learning_rate": 1.6105622883980893e-06, + "loss": 56.0737, + "step": 94470 + }, + { + "epoch": 0.38171115519338067, + "grad_norm": 660.3544311523438, + "learning_rate": 1.6095255698781954e-06, + "loss": 44.8425, + "step": 94480 + }, + { + "epoch": 0.3817515564587483, + "grad_norm": 487.4358215332031, + "learning_rate": 1.6084891211227899e-06, + "loss": 35.9278, + "step": 94490 + }, + { + "epoch": 0.3817919577241159, + "grad_norm": 572.1942749023438, + "learning_rate": 1.6074529422143398e-06, + "loss": 51.2289, + "step": 94500 + }, + { + "epoch": 0.38183235898948353, + "grad_norm": 584.8763427734375, + "learning_rate": 1.6064170332352897e-06, + "loss": 35.8506, + "step": 94510 + }, + { + "epoch": 0.3818727602548512, + "grad_norm": 854.034423828125, + "learning_rate": 1.6053813942680618e-06, + "loss": 60.1789, + "step": 94520 + }, + { + "epoch": 0.3819131615202188, + "grad_norm": 959.2164916992188, + "learning_rate": 1.604346025395057e-06, + "loss": 33.7964, + "step": 94530 + }, + { + "epoch": 0.38195356278558645, + "grad_norm": 408.621337890625, + "learning_rate": 1.6033109266986552e-06, + "loss": 19.004, + "step": 94540 + }, + { + "epoch": 0.3819939640509541, + "grad_norm": 713.4255981445312, + "learning_rate": 1.602276098261214e-06, + "loss": 37.2563, + "step": 94550 + }, + { + "epoch": 0.38203436531632173, + "grad_norm": 722.92724609375, + "learning_rate": 1.6012415401650706e-06, + "loss": 35.3043, + "step": 94560 + }, + { + "epoch": 0.3820747665816893, + "grad_norm": 515.4312744140625, + "learning_rate": 1.6002072524925395e-06, + "loss": 46.4359, + "step": 94570 + }, + { + "epoch": 0.38211516784705696, + "grad_norm": 202.4333953857422, + "learning_rate": 1.5991732353259142e-06, + "loss": 52.5165, + "step": 94580 + }, + { + "epoch": 0.3821555691124246, + "grad_norm": 406.05755615234375, + "learning_rate": 1.598139488747467e-06, + "loss": 48.7197, + "step": 94590 + }, + { + "epoch": 0.38219597037779224, + "grad_norm": 581.1624755859375, + "learning_rate": 1.5971060128394483e-06, + "loss": 34.375, + "step": 94600 + }, + { + "epoch": 0.3822363716431599, + "grad_norm": 655.8013916015625, + "learning_rate": 1.596072807684087e-06, + "loss": 56.5818, + "step": 94610 + }, + { + "epoch": 0.3822767729085275, + "grad_norm": 336.11737060546875, + "learning_rate": 1.5950398733635903e-06, + "loss": 37.6246, + "step": 94620 + }, + { + "epoch": 0.3823171741738951, + "grad_norm": 669.8641357421875, + "learning_rate": 1.5940072099601446e-06, + "loss": 41.239, + "step": 94630 + }, + { + "epoch": 0.38235757543926274, + "grad_norm": 644.0601806640625, + "learning_rate": 1.5929748175559135e-06, + "loss": 30.1086, + "step": 94640 + }, + { + "epoch": 0.3823979767046304, + "grad_norm": 625.1327514648438, + "learning_rate": 1.5919426962330398e-06, + "loss": 35.999, + "step": 94650 + }, + { + "epoch": 0.382438377969998, + "grad_norm": 1454.2506103515625, + "learning_rate": 1.5909108460736455e-06, + "loss": 41.8045, + "step": 94660 + }, + { + "epoch": 0.38247877923536566, + "grad_norm": 456.4172668457031, + "learning_rate": 1.589879267159829e-06, + "loss": 37.1018, + "step": 94670 + }, + { + "epoch": 0.3825191805007333, + "grad_norm": 652.0750122070312, + "learning_rate": 1.5888479595736695e-06, + "loss": 52.8909, + "step": 94680 + }, + { + "epoch": 0.3825595817661009, + "grad_norm": 454.7263488769531, + "learning_rate": 1.5878169233972218e-06, + "loss": 30.1145, + "step": 94690 + }, + { + "epoch": 0.3825999830314685, + "grad_norm": 155.27793884277344, + "learning_rate": 1.5867861587125228e-06, + "loss": 33.1408, + "step": 94700 + }, + { + "epoch": 0.38264038429683617, + "grad_norm": 857.6797485351562, + "learning_rate": 1.5857556656015837e-06, + "loss": 90.8161, + "step": 94710 + }, + { + "epoch": 0.3826807855622038, + "grad_norm": 497.7012634277344, + "learning_rate": 1.5847254441463978e-06, + "loss": 41.9356, + "step": 94720 + }, + { + "epoch": 0.38272118682757145, + "grad_norm": 1495.0457763671875, + "learning_rate": 1.583695494428934e-06, + "loss": 69.8086, + "step": 94730 + }, + { + "epoch": 0.3827615880929391, + "grad_norm": 652.8819580078125, + "learning_rate": 1.5826658165311409e-06, + "loss": 36.6699, + "step": 94740 + }, + { + "epoch": 0.3828019893583067, + "grad_norm": 773.2257690429688, + "learning_rate": 1.5816364105349451e-06, + "loss": 41.6954, + "step": 94750 + }, + { + "epoch": 0.3828423906236743, + "grad_norm": 373.1732177734375, + "learning_rate": 1.5806072765222524e-06, + "loss": 41.5614, + "step": 94760 + }, + { + "epoch": 0.38288279188904195, + "grad_norm": 578.3794555664062, + "learning_rate": 1.5795784145749453e-06, + "loss": 42.0277, + "step": 94770 + }, + { + "epoch": 0.3829231931544096, + "grad_norm": 700.9031372070312, + "learning_rate": 1.5785498247748864e-06, + "loss": 45.0889, + "step": 94780 + }, + { + "epoch": 0.38296359441977723, + "grad_norm": 719.7659301757812, + "learning_rate": 1.5775215072039157e-06, + "loss": 30.6197, + "step": 94790 + }, + { + "epoch": 0.38300399568514487, + "grad_norm": 396.6778259277344, + "learning_rate": 1.5764934619438515e-06, + "loss": 60.4507, + "step": 94800 + }, + { + "epoch": 0.3830443969505125, + "grad_norm": 715.6949462890625, + "learning_rate": 1.5754656890764912e-06, + "loss": 38.4569, + "step": 94810 + }, + { + "epoch": 0.3830847982158801, + "grad_norm": 518.1155395507812, + "learning_rate": 1.5744381886836091e-06, + "loss": 45.3779, + "step": 94820 + }, + { + "epoch": 0.38312519948124774, + "grad_norm": 495.7441101074219, + "learning_rate": 1.5734109608469612e-06, + "loss": 39.1621, + "step": 94830 + }, + { + "epoch": 0.3831656007466154, + "grad_norm": 888.2919311523438, + "learning_rate": 1.5723840056482731e-06, + "loss": 45.9851, + "step": 94840 + }, + { + "epoch": 0.383206002011983, + "grad_norm": 582.6542358398438, + "learning_rate": 1.5713573231692613e-06, + "loss": 31.6152, + "step": 94850 + }, + { + "epoch": 0.38324640327735066, + "grad_norm": 399.1988525390625, + "learning_rate": 1.5703309134916116e-06, + "loss": 29.2875, + "step": 94860 + }, + { + "epoch": 0.3832868045427183, + "grad_norm": 770.6976318359375, + "learning_rate": 1.5693047766969916e-06, + "loss": 44.1505, + "step": 94870 + }, + { + "epoch": 0.38332720580808594, + "grad_norm": 67.18172454833984, + "learning_rate": 1.568278912867045e-06, + "loss": 47.0008, + "step": 94880 + }, + { + "epoch": 0.3833676070734535, + "grad_norm": 550.394775390625, + "learning_rate": 1.5672533220833962e-06, + "loss": 31.0633, + "step": 94890 + }, + { + "epoch": 0.38340800833882116, + "grad_norm": 470.9195251464844, + "learning_rate": 1.5662280044276467e-06, + "loss": 58.1111, + "step": 94900 + }, + { + "epoch": 0.3834484096041888, + "grad_norm": 434.5940246582031, + "learning_rate": 1.5652029599813773e-06, + "loss": 68.5302, + "step": 94910 + }, + { + "epoch": 0.38348881086955644, + "grad_norm": 886.968505859375, + "learning_rate": 1.564178188826143e-06, + "loss": 52.7653, + "step": 94920 + }, + { + "epoch": 0.3835292121349241, + "grad_norm": 858.910888671875, + "learning_rate": 1.5631536910434807e-06, + "loss": 37.843, + "step": 94930 + }, + { + "epoch": 0.3835696134002917, + "grad_norm": 523.4095458984375, + "learning_rate": 1.5621294667149079e-06, + "loss": 44.0606, + "step": 94940 + }, + { + "epoch": 0.3836100146656593, + "grad_norm": 576.0836791992188, + "learning_rate": 1.561105515921915e-06, + "loss": 52.5642, + "step": 94950 + }, + { + "epoch": 0.38365041593102694, + "grad_norm": 454.44219970703125, + "learning_rate": 1.5600818387459748e-06, + "loss": 36.4562, + "step": 94960 + }, + { + "epoch": 0.3836908171963946, + "grad_norm": 704.0636596679688, + "learning_rate": 1.559058435268535e-06, + "loss": 43.8542, + "step": 94970 + }, + { + "epoch": 0.3837312184617622, + "grad_norm": 521.6298217773438, + "learning_rate": 1.558035305571024e-06, + "loss": 38.1112, + "step": 94980 + }, + { + "epoch": 0.38377161972712986, + "grad_norm": 363.50469970703125, + "learning_rate": 1.5570124497348466e-06, + "loss": 38.1875, + "step": 94990 + }, + { + "epoch": 0.3838120209924975, + "grad_norm": 851.0789184570312, + "learning_rate": 1.5559898678413898e-06, + "loss": 43.4976, + "step": 95000 + } + ], + "logging_steps": 10, + "max_steps": 123750, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}