{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 9503, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 36.36130527170318, "learning_rate": 3.4965034965034967e-08, "loss": 1.8874, "step": 1 }, { "epoch": 0.0, "grad_norm": 44.35509081795796, "learning_rate": 6.993006993006993e-08, "loss": 1.9623, "step": 2 }, { "epoch": 0.0, "grad_norm": 113.15059681611625, "learning_rate": 1.048951048951049e-07, "loss": 1.8315, "step": 3 }, { "epoch": 0.0, "grad_norm": 26.480686239957798, "learning_rate": 1.3986013986013987e-07, "loss": 1.8153, "step": 4 }, { "epoch": 0.0, "grad_norm": 73.55538638754199, "learning_rate": 1.7482517482517484e-07, "loss": 1.8261, "step": 5 }, { "epoch": 0.0, "grad_norm": 44.47995785747746, "learning_rate": 2.097902097902098e-07, "loss": 1.7684, "step": 6 }, { "epoch": 0.0, "grad_norm": 41.19958226318581, "learning_rate": 2.447552447552448e-07, "loss": 1.7842, "step": 7 }, { "epoch": 0.0, "grad_norm": 162.3686148569218, "learning_rate": 2.7972027972027973e-07, "loss": 1.8389, "step": 8 }, { "epoch": 0.0, "grad_norm": 37.77584116684587, "learning_rate": 3.1468531468531473e-07, "loss": 1.739, "step": 9 }, { "epoch": 0.0, "grad_norm": 33.567385322447045, "learning_rate": 3.496503496503497e-07, "loss": 1.823, "step": 10 }, { "epoch": 0.0, "grad_norm": 79.25205422236616, "learning_rate": 3.846153846153847e-07, "loss": 1.8705, "step": 11 }, { "epoch": 0.0, "grad_norm": 29.01030979777457, "learning_rate": 4.195804195804196e-07, "loss": 1.8595, "step": 12 }, { "epoch": 0.0, "grad_norm": 32.29577345941253, "learning_rate": 4.5454545454545457e-07, "loss": 1.7776, "step": 13 }, { "epoch": 0.0, "grad_norm": 75.68576026951285, "learning_rate": 4.895104895104896e-07, "loss": 1.7253, "step": 14 }, { "epoch": 0.0, "grad_norm": 44.31708263717978, "learning_rate": 5.244755244755246e-07, "loss": 1.6779, "step": 15 }, { "epoch": 0.0, "grad_norm": 219.5878152798679, "learning_rate": 5.594405594405595e-07, "loss": 1.702, "step": 16 }, { "epoch": 0.0, "grad_norm": 33.33211308321619, "learning_rate": 5.944055944055945e-07, "loss": 1.5494, "step": 17 }, { "epoch": 0.0, "grad_norm": 28.77943791137145, "learning_rate": 6.293706293706295e-07, "loss": 1.5089, "step": 18 }, { "epoch": 0.0, "grad_norm": 60.96425095593503, "learning_rate": 6.643356643356644e-07, "loss": 1.4609, "step": 19 }, { "epoch": 0.0, "grad_norm": 21.119045219567745, "learning_rate": 6.993006993006994e-07, "loss": 1.4632, "step": 20 }, { "epoch": 0.0, "grad_norm": 30.087479668348955, "learning_rate": 7.342657342657343e-07, "loss": 1.5663, "step": 21 }, { "epoch": 0.0, "grad_norm": 22.134522059806123, "learning_rate": 7.692307692307694e-07, "loss": 1.4837, "step": 22 }, { "epoch": 0.0, "grad_norm": 29.13387995825167, "learning_rate": 8.041958041958043e-07, "loss": 1.4038, "step": 23 }, { "epoch": 0.0, "grad_norm": 8.488083114385395, "learning_rate": 8.391608391608393e-07, "loss": 1.2151, "step": 24 }, { "epoch": 0.0, "grad_norm": 10.410898426509407, "learning_rate": 8.741258741258741e-07, "loss": 1.2384, "step": 25 }, { "epoch": 0.0, "grad_norm": 7.196734276367997, "learning_rate": 9.090909090909091e-07, "loss": 1.2946, "step": 26 }, { "epoch": 0.0, "grad_norm": 5.855727873849648, "learning_rate": 9.44055944055944e-07, "loss": 1.3565, "step": 27 }, { "epoch": 0.0, "grad_norm": 9.775814502806083, "learning_rate": 9.790209790209791e-07, "loss": 1.2246, "step": 28 }, { "epoch": 0.0, "grad_norm": 4.850328552766668, "learning_rate": 1.013986013986014e-06, "loss": 0.8368, "step": 29 }, { "epoch": 0.0, "grad_norm": 5.46226966648362, "learning_rate": 1.0489510489510491e-06, "loss": 0.8374, "step": 30 }, { "epoch": 0.0, "grad_norm": 4.878572158964688, "learning_rate": 1.083916083916084e-06, "loss": 1.1998, "step": 31 }, { "epoch": 0.0, "grad_norm": 7.823282919023532, "learning_rate": 1.118881118881119e-06, "loss": 1.3195, "step": 32 }, { "epoch": 0.0, "grad_norm": 10.031172311822704, "learning_rate": 1.153846153846154e-06, "loss": 1.1547, "step": 33 }, { "epoch": 0.0, "grad_norm": 5.192629192579879, "learning_rate": 1.188811188811189e-06, "loss": 1.1146, "step": 34 }, { "epoch": 0.0, "grad_norm": 7.2298322921309905, "learning_rate": 1.2237762237762238e-06, "loss": 1.0787, "step": 35 }, { "epoch": 0.0, "grad_norm": 6.136735379037229, "learning_rate": 1.258741258741259e-06, "loss": 1.1542, "step": 36 }, { "epoch": 0.0, "grad_norm": 5.0826929997956345, "learning_rate": 1.2937062937062938e-06, "loss": 1.1515, "step": 37 }, { "epoch": 0.0, "grad_norm": 9.642838223265395, "learning_rate": 1.3286713286713287e-06, "loss": 1.0195, "step": 38 }, { "epoch": 0.0, "grad_norm": 21.361559216190493, "learning_rate": 1.3636363636363636e-06, "loss": 1.099, "step": 39 }, { "epoch": 0.0, "grad_norm": 4.808653298973119, "learning_rate": 1.3986013986013987e-06, "loss": 1.0705, "step": 40 }, { "epoch": 0.0, "grad_norm": 12.37717034792203, "learning_rate": 1.4335664335664336e-06, "loss": 1.0939, "step": 41 }, { "epoch": 0.0, "grad_norm": 6.615327284739696, "learning_rate": 1.4685314685314685e-06, "loss": 1.0464, "step": 42 }, { "epoch": 0.0, "grad_norm": 4.179391150292978, "learning_rate": 1.5034965034965034e-06, "loss": 1.0878, "step": 43 }, { "epoch": 0.0, "grad_norm": 20.006567323936093, "learning_rate": 1.5384615384615387e-06, "loss": 1.1121, "step": 44 }, { "epoch": 0.0, "grad_norm": 5.213316956884923, "learning_rate": 1.5734265734265736e-06, "loss": 1.0629, "step": 45 }, { "epoch": 0.0, "grad_norm": 4.22220233163102, "learning_rate": 1.6083916083916085e-06, "loss": 1.1175, "step": 46 }, { "epoch": 0.0, "grad_norm": 3.945556190990595, "learning_rate": 1.6433566433566434e-06, "loss": 1.0423, "step": 47 }, { "epoch": 0.01, "grad_norm": 3.6470904938849054, "learning_rate": 1.6783216783216785e-06, "loss": 1.0511, "step": 48 }, { "epoch": 0.01, "grad_norm": 4.446567502839809, "learning_rate": 1.7132867132867134e-06, "loss": 1.0117, "step": 49 }, { "epoch": 0.01, "grad_norm": 6.4133295800026815, "learning_rate": 1.7482517482517483e-06, "loss": 1.0825, "step": 50 }, { "epoch": 0.01, "grad_norm": 9.263781826142708, "learning_rate": 1.7832167832167834e-06, "loss": 1.0928, "step": 51 }, { "epoch": 0.01, "grad_norm": 3.3410522050072218, "learning_rate": 1.8181818181818183e-06, "loss": 0.9511, "step": 52 }, { "epoch": 0.01, "grad_norm": 5.078099442521805, "learning_rate": 1.8531468531468532e-06, "loss": 1.0446, "step": 53 }, { "epoch": 0.01, "grad_norm": 3.7783601770563657, "learning_rate": 1.888111888111888e-06, "loss": 0.9961, "step": 54 }, { "epoch": 0.01, "grad_norm": 5.3245107534214275, "learning_rate": 1.9230769230769234e-06, "loss": 1.002, "step": 55 }, { "epoch": 0.01, "grad_norm": 4.550873551909127, "learning_rate": 1.9580419580419583e-06, "loss": 1.0045, "step": 56 }, { "epoch": 0.01, "grad_norm": 5.298300519654384, "learning_rate": 1.993006993006993e-06, "loss": 0.9899, "step": 57 }, { "epoch": 0.01, "grad_norm": 5.636808401697184, "learning_rate": 2.027972027972028e-06, "loss": 1.0293, "step": 58 }, { "epoch": 0.01, "grad_norm": 4.989845861785016, "learning_rate": 2.0629370629370634e-06, "loss": 0.9782, "step": 59 }, { "epoch": 0.01, "grad_norm": 4.036151597648198, "learning_rate": 2.0979020979020983e-06, "loss": 0.9476, "step": 60 }, { "epoch": 0.01, "grad_norm": 4.748491625435901, "learning_rate": 2.132867132867133e-06, "loss": 1.0607, "step": 61 }, { "epoch": 0.01, "grad_norm": 4.981488543652494, "learning_rate": 2.167832167832168e-06, "loss": 1.0103, "step": 62 }, { "epoch": 0.01, "grad_norm": 2.747396934973974, "learning_rate": 2.202797202797203e-06, "loss": 0.9394, "step": 63 }, { "epoch": 0.01, "grad_norm": 6.959028225064991, "learning_rate": 2.237762237762238e-06, "loss": 1.0223, "step": 64 }, { "epoch": 0.01, "grad_norm": 4.1572731448374745, "learning_rate": 2.2727272727272728e-06, "loss": 0.8474, "step": 65 }, { "epoch": 0.01, "grad_norm": 4.264763279942374, "learning_rate": 2.307692307692308e-06, "loss": 1.0108, "step": 66 }, { "epoch": 0.01, "grad_norm": 6.095523926263317, "learning_rate": 2.342657342657343e-06, "loss": 0.9966, "step": 67 }, { "epoch": 0.01, "grad_norm": 2.0378141611911293, "learning_rate": 2.377622377622378e-06, "loss": 0.6418, "step": 68 }, { "epoch": 0.01, "grad_norm": 12.350034886280204, "learning_rate": 2.4125874125874128e-06, "loss": 0.9833, "step": 69 }, { "epoch": 0.01, "grad_norm": 4.400350695457104, "learning_rate": 2.4475524475524477e-06, "loss": 1.014, "step": 70 }, { "epoch": 0.01, "grad_norm": 8.539091480211638, "learning_rate": 2.4825174825174825e-06, "loss": 0.9588, "step": 71 }, { "epoch": 0.01, "grad_norm": 2.76197207482864, "learning_rate": 2.517482517482518e-06, "loss": 1.0505, "step": 72 }, { "epoch": 0.01, "grad_norm": 4.105444561403831, "learning_rate": 2.5524475524475528e-06, "loss": 1.0268, "step": 73 }, { "epoch": 0.01, "grad_norm": 4.808311827182721, "learning_rate": 2.5874125874125877e-06, "loss": 0.9266, "step": 74 }, { "epoch": 0.01, "grad_norm": 3.3062924637115896, "learning_rate": 2.6223776223776225e-06, "loss": 0.9497, "step": 75 }, { "epoch": 0.01, "grad_norm": 5.359465673010584, "learning_rate": 2.6573426573426574e-06, "loss": 1.0061, "step": 76 }, { "epoch": 0.01, "grad_norm": 3.5894653414802122, "learning_rate": 2.6923076923076923e-06, "loss": 0.9365, "step": 77 }, { "epoch": 0.01, "grad_norm": 5.6964431945565215, "learning_rate": 2.7272727272727272e-06, "loss": 0.9945, "step": 78 }, { "epoch": 0.01, "grad_norm": 3.554710415267673, "learning_rate": 2.762237762237762e-06, "loss": 0.909, "step": 79 }, { "epoch": 0.01, "grad_norm": 3.6224966193912933, "learning_rate": 2.7972027972027974e-06, "loss": 0.9841, "step": 80 }, { "epoch": 0.01, "grad_norm": 4.100484077566709, "learning_rate": 2.8321678321678323e-06, "loss": 0.9785, "step": 81 }, { "epoch": 0.01, "grad_norm": 2.7434549809880306, "learning_rate": 2.8671328671328672e-06, "loss": 0.9091, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.910473094338652, "learning_rate": 2.902097902097902e-06, "loss": 0.9128, "step": 83 }, { "epoch": 0.01, "grad_norm": 4.681590491267232, "learning_rate": 2.937062937062937e-06, "loss": 0.9723, "step": 84 }, { "epoch": 0.01, "grad_norm": 3.8481519631092667, "learning_rate": 2.972027972027972e-06, "loss": 0.9126, "step": 85 }, { "epoch": 0.01, "grad_norm": 3.061017336429524, "learning_rate": 3.006993006993007e-06, "loss": 0.949, "step": 86 }, { "epoch": 0.01, "grad_norm": 3.1054284861421526, "learning_rate": 3.0419580419580425e-06, "loss": 0.954, "step": 87 }, { "epoch": 0.01, "grad_norm": 5.216222090801437, "learning_rate": 3.0769230769230774e-06, "loss": 0.999, "step": 88 }, { "epoch": 0.01, "grad_norm": 9.344055127943243, "learning_rate": 3.1118881118881123e-06, "loss": 0.8865, "step": 89 }, { "epoch": 0.01, "grad_norm": 3.916337109234207, "learning_rate": 3.1468531468531472e-06, "loss": 0.9985, "step": 90 }, { "epoch": 0.01, "grad_norm": 5.646273915281262, "learning_rate": 3.181818181818182e-06, "loss": 0.9368, "step": 91 }, { "epoch": 0.01, "grad_norm": 3.2528928171773566, "learning_rate": 3.216783216783217e-06, "loss": 0.8946, "step": 92 }, { "epoch": 0.01, "grad_norm": 3.670080997940339, "learning_rate": 3.251748251748252e-06, "loss": 0.883, "step": 93 }, { "epoch": 0.01, "grad_norm": 4.3498888254458885, "learning_rate": 3.286713286713287e-06, "loss": 0.8334, "step": 94 }, { "epoch": 0.01, "grad_norm": 3.4044098902580067, "learning_rate": 3.321678321678322e-06, "loss": 0.8982, "step": 95 }, { "epoch": 0.01, "grad_norm": 5.964267535512436, "learning_rate": 3.356643356643357e-06, "loss": 0.901, "step": 96 }, { "epoch": 0.01, "grad_norm": 7.2547749034420175, "learning_rate": 3.391608391608392e-06, "loss": 0.888, "step": 97 }, { "epoch": 0.01, "grad_norm": 3.6445555952684594, "learning_rate": 3.426573426573427e-06, "loss": 0.9357, "step": 98 }, { "epoch": 0.01, "grad_norm": 3.6183641170544676, "learning_rate": 3.4615384615384617e-06, "loss": 0.8898, "step": 99 }, { "epoch": 0.01, "grad_norm": 3.683704401531074, "learning_rate": 3.4965034965034966e-06, "loss": 0.9716, "step": 100 }, { "epoch": 0.01, "grad_norm": 4.642503279279386, "learning_rate": 3.5314685314685315e-06, "loss": 0.8517, "step": 101 }, { "epoch": 0.01, "grad_norm": 3.5861147598949064, "learning_rate": 3.566433566433567e-06, "loss": 0.8891, "step": 102 }, { "epoch": 0.01, "grad_norm": 6.15820179982336, "learning_rate": 3.6013986013986017e-06, "loss": 0.9273, "step": 103 }, { "epoch": 0.01, "grad_norm": 3.0966116013351526, "learning_rate": 3.6363636363636366e-06, "loss": 0.8954, "step": 104 }, { "epoch": 0.01, "grad_norm": 4.590992779608164, "learning_rate": 3.6713286713286715e-06, "loss": 0.8935, "step": 105 }, { "epoch": 0.01, "grad_norm": 6.257062049913251, "learning_rate": 3.7062937062937064e-06, "loss": 0.7903, "step": 106 }, { "epoch": 0.01, "grad_norm": 3.1160354318045598, "learning_rate": 3.7412587412587413e-06, "loss": 0.9888, "step": 107 }, { "epoch": 0.01, "grad_norm": 3.557620314215135, "learning_rate": 3.776223776223776e-06, "loss": 0.8218, "step": 108 }, { "epoch": 0.01, "grad_norm": 3.690586014565839, "learning_rate": 3.811188811188811e-06, "loss": 0.9652, "step": 109 }, { "epoch": 0.01, "grad_norm": 4.649988211859253, "learning_rate": 3.846153846153847e-06, "loss": 0.874, "step": 110 }, { "epoch": 0.01, "grad_norm": 3.4358159594855593, "learning_rate": 3.881118881118881e-06, "loss": 0.8646, "step": 111 }, { "epoch": 0.01, "grad_norm": 4.275525174285379, "learning_rate": 3.916083916083917e-06, "loss": 0.8884, "step": 112 }, { "epoch": 0.01, "grad_norm": 4.163330070858521, "learning_rate": 3.951048951048951e-06, "loss": 0.8856, "step": 113 }, { "epoch": 0.01, "grad_norm": 2.0432246966048093, "learning_rate": 3.986013986013986e-06, "loss": 0.6153, "step": 114 }, { "epoch": 0.01, "grad_norm": 3.6243328469121807, "learning_rate": 4.020979020979021e-06, "loss": 0.9266, "step": 115 }, { "epoch": 0.01, "grad_norm": 3.7038828259874026, "learning_rate": 4.055944055944056e-06, "loss": 0.8613, "step": 116 }, { "epoch": 0.01, "grad_norm": 6.587444425327127, "learning_rate": 4.0909090909090915e-06, "loss": 0.8677, "step": 117 }, { "epoch": 0.01, "grad_norm": 2.8022237219414, "learning_rate": 4.125874125874127e-06, "loss": 0.9397, "step": 118 }, { "epoch": 0.01, "grad_norm": 4.591322074020894, "learning_rate": 4.160839160839161e-06, "loss": 0.8731, "step": 119 }, { "epoch": 0.01, "grad_norm": 4.273093982425321, "learning_rate": 4.195804195804197e-06, "loss": 0.8526, "step": 120 }, { "epoch": 0.01, "grad_norm": 4.197128473907411, "learning_rate": 4.230769230769231e-06, "loss": 0.8944, "step": 121 }, { "epoch": 0.01, "grad_norm": 3.223590603807003, "learning_rate": 4.265734265734266e-06, "loss": 0.8939, "step": 122 }, { "epoch": 0.01, "grad_norm": 3.7474696753810433, "learning_rate": 4.300699300699301e-06, "loss": 0.893, "step": 123 }, { "epoch": 0.01, "grad_norm": 7.509667764665236, "learning_rate": 4.335664335664336e-06, "loss": 0.8958, "step": 124 }, { "epoch": 0.01, "grad_norm": 3.4746005923931715, "learning_rate": 4.3706293706293715e-06, "loss": 0.9511, "step": 125 }, { "epoch": 0.01, "grad_norm": 3.8752998189935024, "learning_rate": 4.405594405594406e-06, "loss": 0.9831, "step": 126 }, { "epoch": 0.01, "grad_norm": 2.6750347835184956, "learning_rate": 4.440559440559441e-06, "loss": 0.8389, "step": 127 }, { "epoch": 0.01, "grad_norm": 3.059225334055095, "learning_rate": 4.475524475524476e-06, "loss": 0.9191, "step": 128 }, { "epoch": 0.01, "grad_norm": 2.8492139079466123, "learning_rate": 4.510489510489511e-06, "loss": 0.9256, "step": 129 }, { "epoch": 0.01, "grad_norm": 3.466866349881368, "learning_rate": 4.5454545454545455e-06, "loss": 0.9199, "step": 130 }, { "epoch": 0.01, "grad_norm": 2.5173267002299946, "learning_rate": 4.580419580419581e-06, "loss": 0.8514, "step": 131 }, { "epoch": 0.01, "grad_norm": 3.6745126525258236, "learning_rate": 4.615384615384616e-06, "loss": 0.9873, "step": 132 }, { "epoch": 0.01, "grad_norm": 2.9338742337892234, "learning_rate": 4.650349650349651e-06, "loss": 0.8569, "step": 133 }, { "epoch": 0.01, "grad_norm": 9.843844549233959, "learning_rate": 4.685314685314686e-06, "loss": 0.9136, "step": 134 }, { "epoch": 0.01, "grad_norm": 2.624661321804515, "learning_rate": 4.72027972027972e-06, "loss": 0.9501, "step": 135 }, { "epoch": 0.01, "grad_norm": 3.175344406567936, "learning_rate": 4.755244755244756e-06, "loss": 0.7571, "step": 136 }, { "epoch": 0.01, "grad_norm": 2.9193507761169832, "learning_rate": 4.79020979020979e-06, "loss": 0.8804, "step": 137 }, { "epoch": 0.01, "grad_norm": 3.136679694610541, "learning_rate": 4.8251748251748255e-06, "loss": 0.8397, "step": 138 }, { "epoch": 0.01, "grad_norm": 4.002722016275543, "learning_rate": 4.86013986013986e-06, "loss": 0.7947, "step": 139 }, { "epoch": 0.01, "grad_norm": 4.059401974397323, "learning_rate": 4.895104895104895e-06, "loss": 0.9127, "step": 140 }, { "epoch": 0.01, "grad_norm": 6.278913674680803, "learning_rate": 4.930069930069931e-06, "loss": 0.88, "step": 141 }, { "epoch": 0.01, "grad_norm": 4.830877808380784, "learning_rate": 4.965034965034965e-06, "loss": 0.9, "step": 142 }, { "epoch": 0.02, "grad_norm": 3.983823394564358, "learning_rate": 5e-06, "loss": 0.9034, "step": 143 }, { "epoch": 0.02, "grad_norm": 5.804644998821451, "learning_rate": 5.034965034965036e-06, "loss": 0.7911, "step": 144 }, { "epoch": 0.02, "grad_norm": 4.991083677118832, "learning_rate": 5.06993006993007e-06, "loss": 0.9021, "step": 145 }, { "epoch": 0.02, "grad_norm": 5.151416376092996, "learning_rate": 5.1048951048951055e-06, "loss": 0.9136, "step": 146 }, { "epoch": 0.02, "grad_norm": 3.2803307360013227, "learning_rate": 5.13986013986014e-06, "loss": 0.862, "step": 147 }, { "epoch": 0.02, "grad_norm": 3.2255731360831343, "learning_rate": 5.174825174825175e-06, "loss": 0.8045, "step": 148 }, { "epoch": 0.02, "grad_norm": 10.688893991857855, "learning_rate": 5.20979020979021e-06, "loss": 0.8606, "step": 149 }, { "epoch": 0.02, "grad_norm": 3.0166064964847963, "learning_rate": 5.244755244755245e-06, "loss": 0.8728, "step": 150 }, { "epoch": 0.02, "grad_norm": 5.697806716640856, "learning_rate": 5.27972027972028e-06, "loss": 0.8277, "step": 151 }, { "epoch": 0.02, "grad_norm": 3.9797528895264196, "learning_rate": 5.314685314685315e-06, "loss": 0.8832, "step": 152 }, { "epoch": 0.02, "grad_norm": 3.5741820214637494, "learning_rate": 5.34965034965035e-06, "loss": 0.7884, "step": 153 }, { "epoch": 0.02, "grad_norm": 3.904019428236043, "learning_rate": 5.384615384615385e-06, "loss": 0.8818, "step": 154 }, { "epoch": 0.02, "grad_norm": 5.113543577877246, "learning_rate": 5.41958041958042e-06, "loss": 0.8515, "step": 155 }, { "epoch": 0.02, "grad_norm": 3.916916159534253, "learning_rate": 5.4545454545454545e-06, "loss": 0.883, "step": 156 }, { "epoch": 0.02, "grad_norm": 3.026332966896275, "learning_rate": 5.48951048951049e-06, "loss": 0.8202, "step": 157 }, { "epoch": 0.02, "grad_norm": 4.2747535576313895, "learning_rate": 5.524475524475524e-06, "loss": 0.8201, "step": 158 }, { "epoch": 0.02, "grad_norm": 4.915673342738748, "learning_rate": 5.5594405594405596e-06, "loss": 0.8054, "step": 159 }, { "epoch": 0.02, "grad_norm": 1.8868567094503492, "learning_rate": 5.594405594405595e-06, "loss": 0.5897, "step": 160 }, { "epoch": 0.02, "grad_norm": 4.031567311252644, "learning_rate": 5.629370629370629e-06, "loss": 0.9305, "step": 161 }, { "epoch": 0.02, "grad_norm": 5.750704520517035, "learning_rate": 5.664335664335665e-06, "loss": 0.8506, "step": 162 }, { "epoch": 0.02, "grad_norm": 3.9833189282740107, "learning_rate": 5.699300699300699e-06, "loss": 0.7708, "step": 163 }, { "epoch": 0.02, "grad_norm": 8.354822607813489, "learning_rate": 5.7342657342657345e-06, "loss": 0.8819, "step": 164 }, { "epoch": 0.02, "grad_norm": 3.130090251045954, "learning_rate": 5.769230769230769e-06, "loss": 0.769, "step": 165 }, { "epoch": 0.02, "grad_norm": 5.0268037848757725, "learning_rate": 5.804195804195804e-06, "loss": 0.8636, "step": 166 }, { "epoch": 0.02, "grad_norm": 4.590445411100409, "learning_rate": 5.83916083916084e-06, "loss": 0.9014, "step": 167 }, { "epoch": 0.02, "grad_norm": 3.9573676244626235, "learning_rate": 5.874125874125874e-06, "loss": 0.9767, "step": 168 }, { "epoch": 0.02, "grad_norm": 3.7976970282517706, "learning_rate": 5.90909090909091e-06, "loss": 0.7677, "step": 169 }, { "epoch": 0.02, "grad_norm": 3.20200552669586, "learning_rate": 5.944055944055944e-06, "loss": 0.8744, "step": 170 }, { "epoch": 0.02, "grad_norm": 5.403103369719815, "learning_rate": 5.97902097902098e-06, "loss": 0.7684, "step": 171 }, { "epoch": 0.02, "grad_norm": 5.688672125161367, "learning_rate": 6.013986013986014e-06, "loss": 0.828, "step": 172 }, { "epoch": 0.02, "grad_norm": 3.300203165530322, "learning_rate": 6.04895104895105e-06, "loss": 0.8845, "step": 173 }, { "epoch": 0.02, "grad_norm": 2.9549829165853074, "learning_rate": 6.083916083916085e-06, "loss": 0.8396, "step": 174 }, { "epoch": 0.02, "grad_norm": 3.137866556517222, "learning_rate": 6.1188811188811196e-06, "loss": 0.8187, "step": 175 }, { "epoch": 0.02, "grad_norm": 4.821237941276991, "learning_rate": 6.153846153846155e-06, "loss": 0.8752, "step": 176 }, { "epoch": 0.02, "grad_norm": 3.1343880638693324, "learning_rate": 6.188811188811189e-06, "loss": 0.8322, "step": 177 }, { "epoch": 0.02, "grad_norm": 3.187172327993686, "learning_rate": 6.223776223776225e-06, "loss": 0.7712, "step": 178 }, { "epoch": 0.02, "grad_norm": 3.822050694898768, "learning_rate": 6.258741258741259e-06, "loss": 0.8855, "step": 179 }, { "epoch": 0.02, "grad_norm": 4.567034848940679, "learning_rate": 6.2937062937062944e-06, "loss": 0.8808, "step": 180 }, { "epoch": 0.02, "grad_norm": 3.878919913487739, "learning_rate": 6.32867132867133e-06, "loss": 0.816, "step": 181 }, { "epoch": 0.02, "grad_norm": 3.235297169720441, "learning_rate": 6.363636363636364e-06, "loss": 0.7188, "step": 182 }, { "epoch": 0.02, "grad_norm": 3.930819892851912, "learning_rate": 6.3986013986013996e-06, "loss": 0.9284, "step": 183 }, { "epoch": 0.02, "grad_norm": 3.670121712591447, "learning_rate": 6.433566433566434e-06, "loss": 0.7796, "step": 184 }, { "epoch": 0.02, "grad_norm": 2.6886770215888207, "learning_rate": 6.468531468531469e-06, "loss": 0.8803, "step": 185 }, { "epoch": 0.02, "grad_norm": 3.5148986203471853, "learning_rate": 6.503496503496504e-06, "loss": 0.8603, "step": 186 }, { "epoch": 0.02, "grad_norm": 8.671074065374444, "learning_rate": 6.538461538461539e-06, "loss": 0.8593, "step": 187 }, { "epoch": 0.02, "grad_norm": 4.011761895974061, "learning_rate": 6.573426573426574e-06, "loss": 0.8902, "step": 188 }, { "epoch": 0.02, "grad_norm": 2.48637995228731, "learning_rate": 6.608391608391609e-06, "loss": 0.7607, "step": 189 }, { "epoch": 0.02, "grad_norm": 3.3132599184181504, "learning_rate": 6.643356643356644e-06, "loss": 0.8014, "step": 190 }, { "epoch": 0.02, "grad_norm": 3.055806698354549, "learning_rate": 6.678321678321679e-06, "loss": 0.9812, "step": 191 }, { "epoch": 0.02, "grad_norm": 3.2543072248631977, "learning_rate": 6.713286713286714e-06, "loss": 0.8687, "step": 192 }, { "epoch": 0.02, "grad_norm": 3.040625426096151, "learning_rate": 6.7482517482517485e-06, "loss": 0.7797, "step": 193 }, { "epoch": 0.02, "grad_norm": 3.215620104060743, "learning_rate": 6.783216783216784e-06, "loss": 0.8274, "step": 194 }, { "epoch": 0.02, "grad_norm": 2.471180655628137, "learning_rate": 6.818181818181818e-06, "loss": 0.6187, "step": 195 }, { "epoch": 0.02, "grad_norm": 4.359167344334515, "learning_rate": 6.853146853146854e-06, "loss": 0.8114, "step": 196 }, { "epoch": 0.02, "grad_norm": 4.290199760313723, "learning_rate": 6.888111888111889e-06, "loss": 0.8188, "step": 197 }, { "epoch": 0.02, "grad_norm": 5.885960883267336, "learning_rate": 6.923076923076923e-06, "loss": 0.8622, "step": 198 }, { "epoch": 0.02, "grad_norm": 2.63144492234704, "learning_rate": 6.958041958041959e-06, "loss": 0.9063, "step": 199 }, { "epoch": 0.02, "grad_norm": 3.3612366109316167, "learning_rate": 6.993006993006993e-06, "loss": 0.862, "step": 200 }, { "epoch": 0.02, "grad_norm": 4.586306665535676, "learning_rate": 7.0279720279720285e-06, "loss": 0.8577, "step": 201 }, { "epoch": 0.02, "grad_norm": 3.2176272960473553, "learning_rate": 7.062937062937063e-06, "loss": 0.8225, "step": 202 }, { "epoch": 0.02, "grad_norm": 3.416044727353836, "learning_rate": 7.097902097902098e-06, "loss": 0.8681, "step": 203 }, { "epoch": 0.02, "grad_norm": 5.4811630787160945, "learning_rate": 7.132867132867134e-06, "loss": 0.8767, "step": 204 }, { "epoch": 0.02, "grad_norm": 3.567725865125878, "learning_rate": 7.167832167832168e-06, "loss": 0.8528, "step": 205 }, { "epoch": 0.02, "grad_norm": 3.2885446735714443, "learning_rate": 7.202797202797203e-06, "loss": 0.756, "step": 206 }, { "epoch": 0.02, "grad_norm": 4.884812749781261, "learning_rate": 7.237762237762238e-06, "loss": 0.8742, "step": 207 }, { "epoch": 0.02, "grad_norm": 3.2827496038865385, "learning_rate": 7.272727272727273e-06, "loss": 0.854, "step": 208 }, { "epoch": 0.02, "grad_norm": 3.8769620411554135, "learning_rate": 7.307692307692308e-06, "loss": 0.9437, "step": 209 }, { "epoch": 0.02, "grad_norm": 3.152946273455516, "learning_rate": 7.342657342657343e-06, "loss": 0.7662, "step": 210 }, { "epoch": 0.02, "grad_norm": 2.7643870516857074, "learning_rate": 7.377622377622379e-06, "loss": 0.9363, "step": 211 }, { "epoch": 0.02, "grad_norm": 3.519504752467641, "learning_rate": 7.412587412587413e-06, "loss": 0.8056, "step": 212 }, { "epoch": 0.02, "grad_norm": 1.7919893275590533, "learning_rate": 7.447552447552449e-06, "loss": 0.6353, "step": 213 }, { "epoch": 0.02, "grad_norm": 2.9052671409624167, "learning_rate": 7.4825174825174825e-06, "loss": 0.8335, "step": 214 }, { "epoch": 0.02, "grad_norm": 3.479591408911271, "learning_rate": 7.517482517482519e-06, "loss": 0.8493, "step": 215 }, { "epoch": 0.02, "grad_norm": 4.377375722973853, "learning_rate": 7.552447552447552e-06, "loss": 0.7782, "step": 216 }, { "epoch": 0.02, "grad_norm": 3.2542755743953102, "learning_rate": 7.5874125874125885e-06, "loss": 0.8053, "step": 217 }, { "epoch": 0.02, "grad_norm": 3.2686387521722158, "learning_rate": 7.622377622377622e-06, "loss": 0.8326, "step": 218 }, { "epoch": 0.02, "grad_norm": 8.023160327593423, "learning_rate": 7.657342657342658e-06, "loss": 0.8383, "step": 219 }, { "epoch": 0.02, "grad_norm": 3.0319106359639614, "learning_rate": 7.692307692307694e-06, "loss": 0.873, "step": 220 }, { "epoch": 0.02, "grad_norm": 3.857091847979987, "learning_rate": 7.727272727272727e-06, "loss": 0.8656, "step": 221 }, { "epoch": 0.02, "grad_norm": 3.137600063788741, "learning_rate": 7.762237762237763e-06, "loss": 0.7638, "step": 222 }, { "epoch": 0.02, "grad_norm": 4.911586562313934, "learning_rate": 7.797202797202798e-06, "loss": 0.7693, "step": 223 }, { "epoch": 0.02, "grad_norm": 2.9977640714571807, "learning_rate": 7.832167832167833e-06, "loss": 0.8815, "step": 224 }, { "epoch": 0.02, "grad_norm": 2.6947791098337848, "learning_rate": 7.867132867132867e-06, "loss": 0.8508, "step": 225 }, { "epoch": 0.02, "grad_norm": 2.70865229471086, "learning_rate": 7.902097902097902e-06, "loss": 0.8039, "step": 226 }, { "epoch": 0.02, "grad_norm": 3.9607344558352198, "learning_rate": 7.937062937062937e-06, "loss": 0.8913, "step": 227 }, { "epoch": 0.02, "grad_norm": 2.7336519000248947, "learning_rate": 7.972027972027973e-06, "loss": 0.855, "step": 228 }, { "epoch": 0.02, "grad_norm": 3.197027691020523, "learning_rate": 8.006993006993008e-06, "loss": 0.8883, "step": 229 }, { "epoch": 0.02, "grad_norm": 2.8607529888322873, "learning_rate": 8.041958041958042e-06, "loss": 0.8003, "step": 230 }, { "epoch": 0.02, "grad_norm": 6.811064736453866, "learning_rate": 8.076923076923077e-06, "loss": 0.9102, "step": 231 }, { "epoch": 0.02, "grad_norm": 1.9400768388006893, "learning_rate": 8.111888111888112e-06, "loss": 0.6545, "step": 232 }, { "epoch": 0.02, "grad_norm": 3.1601907658724624, "learning_rate": 8.146853146853148e-06, "loss": 0.8757, "step": 233 }, { "epoch": 0.02, "grad_norm": 10.646638408245371, "learning_rate": 8.181818181818183e-06, "loss": 0.8677, "step": 234 }, { "epoch": 0.02, "grad_norm": 5.665181271364294, "learning_rate": 8.216783216783217e-06, "loss": 0.7499, "step": 235 }, { "epoch": 0.02, "grad_norm": 4.519763391278135, "learning_rate": 8.251748251748254e-06, "loss": 0.8092, "step": 236 }, { "epoch": 0.02, "grad_norm": 4.799514568767844, "learning_rate": 8.286713286713287e-06, "loss": 0.8237, "step": 237 }, { "epoch": 0.03, "grad_norm": 4.3167393880149625, "learning_rate": 8.321678321678323e-06, "loss": 0.7958, "step": 238 }, { "epoch": 0.03, "grad_norm": 4.207750765393011, "learning_rate": 8.356643356643356e-06, "loss": 0.9301, "step": 239 }, { "epoch": 0.03, "grad_norm": 3.0242485006730004, "learning_rate": 8.391608391608393e-06, "loss": 0.8689, "step": 240 }, { "epoch": 0.03, "grad_norm": 4.061479269990459, "learning_rate": 8.426573426573428e-06, "loss": 0.8528, "step": 241 }, { "epoch": 0.03, "grad_norm": 3.1543956915127973, "learning_rate": 8.461538461538462e-06, "loss": 0.8714, "step": 242 }, { "epoch": 0.03, "grad_norm": 5.237885935376305, "learning_rate": 8.496503496503497e-06, "loss": 0.7606, "step": 243 }, { "epoch": 0.03, "grad_norm": 1.950280179321886, "learning_rate": 8.531468531468533e-06, "loss": 0.6335, "step": 244 }, { "epoch": 0.03, "grad_norm": 1.5896370335044245, "learning_rate": 8.566433566433568e-06, "loss": 0.6082, "step": 245 }, { "epoch": 0.03, "grad_norm": 11.404835463536644, "learning_rate": 8.601398601398602e-06, "loss": 0.8319, "step": 246 }, { "epoch": 0.03, "grad_norm": 3.238800843683949, "learning_rate": 8.636363636363637e-06, "loss": 0.7277, "step": 247 }, { "epoch": 0.03, "grad_norm": 4.816682891374414, "learning_rate": 8.671328671328672e-06, "loss": 0.8987, "step": 248 }, { "epoch": 0.03, "grad_norm": 2.937171146872307, "learning_rate": 8.706293706293708e-06, "loss": 0.8465, "step": 249 }, { "epoch": 0.03, "grad_norm": 2.9152180387496465, "learning_rate": 8.741258741258743e-06, "loss": 0.8588, "step": 250 }, { "epoch": 0.03, "grad_norm": 3.2233086259580013, "learning_rate": 8.776223776223777e-06, "loss": 0.7794, "step": 251 }, { "epoch": 0.03, "grad_norm": 2.2240451951651212, "learning_rate": 8.811188811188812e-06, "loss": 0.6543, "step": 252 }, { "epoch": 0.03, "grad_norm": 3.9198696085904783, "learning_rate": 8.846153846153847e-06, "loss": 0.8946, "step": 253 }, { "epoch": 0.03, "grad_norm": 3.0548127678087447, "learning_rate": 8.881118881118883e-06, "loss": 0.8461, "step": 254 }, { "epoch": 0.03, "grad_norm": 3.0034454684442564, "learning_rate": 8.916083916083916e-06, "loss": 0.836, "step": 255 }, { "epoch": 0.03, "grad_norm": 4.356298512009719, "learning_rate": 8.951048951048951e-06, "loss": 0.8044, "step": 256 }, { "epoch": 0.03, "grad_norm": 3.7719381130534395, "learning_rate": 8.986013986013987e-06, "loss": 0.8598, "step": 257 }, { "epoch": 0.03, "grad_norm": 2.961248303513742, "learning_rate": 9.020979020979022e-06, "loss": 0.8625, "step": 258 }, { "epoch": 0.03, "grad_norm": 2.494139526881577, "learning_rate": 9.055944055944057e-06, "loss": 0.8373, "step": 259 }, { "epoch": 0.03, "grad_norm": 11.62343900060472, "learning_rate": 9.090909090909091e-06, "loss": 0.8142, "step": 260 }, { "epoch": 0.03, "grad_norm": 2.976610214894441, "learning_rate": 9.125874125874126e-06, "loss": 0.8383, "step": 261 }, { "epoch": 0.03, "grad_norm": 3.152334899926377, "learning_rate": 9.160839160839162e-06, "loss": 0.831, "step": 262 }, { "epoch": 0.03, "grad_norm": 2.943305109872976, "learning_rate": 9.195804195804197e-06, "loss": 0.7919, "step": 263 }, { "epoch": 0.03, "grad_norm": 1.6262854108938019, "learning_rate": 9.230769230769232e-06, "loss": 0.6016, "step": 264 }, { "epoch": 0.03, "grad_norm": 2.9820347523442305, "learning_rate": 9.265734265734266e-06, "loss": 0.842, "step": 265 }, { "epoch": 0.03, "grad_norm": 2.965150403672179, "learning_rate": 9.300699300699301e-06, "loss": 0.8197, "step": 266 }, { "epoch": 0.03, "grad_norm": 4.015217382657136, "learning_rate": 9.335664335664337e-06, "loss": 0.7963, "step": 267 }, { "epoch": 0.03, "grad_norm": 3.5798922752668783, "learning_rate": 9.370629370629372e-06, "loss": 0.8153, "step": 268 }, { "epoch": 0.03, "grad_norm": 3.150189471491218, "learning_rate": 9.405594405594406e-06, "loss": 0.7447, "step": 269 }, { "epoch": 0.03, "grad_norm": 2.8869071132238133, "learning_rate": 9.44055944055944e-06, "loss": 0.8291, "step": 270 }, { "epoch": 0.03, "grad_norm": 5.031692940937883, "learning_rate": 9.475524475524476e-06, "loss": 0.7182, "step": 271 }, { "epoch": 0.03, "grad_norm": 3.3293803718839365, "learning_rate": 9.510489510489511e-06, "loss": 0.7867, "step": 272 }, { "epoch": 0.03, "grad_norm": 3.411178547420612, "learning_rate": 9.545454545454547e-06, "loss": 0.849, "step": 273 }, { "epoch": 0.03, "grad_norm": 1.8859064509058623, "learning_rate": 9.58041958041958e-06, "loss": 0.5875, "step": 274 }, { "epoch": 0.03, "grad_norm": 3.9199928735928666, "learning_rate": 9.615384615384616e-06, "loss": 0.8625, "step": 275 }, { "epoch": 0.03, "grad_norm": 3.4204022429701837, "learning_rate": 9.650349650349651e-06, "loss": 0.787, "step": 276 }, { "epoch": 0.03, "grad_norm": 3.280881830881753, "learning_rate": 9.685314685314686e-06, "loss": 0.8484, "step": 277 }, { "epoch": 0.03, "grad_norm": 3.9903254168619435, "learning_rate": 9.72027972027972e-06, "loss": 0.8444, "step": 278 }, { "epoch": 0.03, "grad_norm": 3.2907073267044566, "learning_rate": 9.755244755244755e-06, "loss": 0.7562, "step": 279 }, { "epoch": 0.03, "grad_norm": 3.454956915827184, "learning_rate": 9.79020979020979e-06, "loss": 0.7811, "step": 280 }, { "epoch": 0.03, "grad_norm": 4.564600921480734, "learning_rate": 9.825174825174826e-06, "loss": 0.7783, "step": 281 }, { "epoch": 0.03, "grad_norm": 3.0241945476082597, "learning_rate": 9.860139860139861e-06, "loss": 0.8345, "step": 282 }, { "epoch": 0.03, "grad_norm": 3.548190169203221, "learning_rate": 9.895104895104895e-06, "loss": 0.7788, "step": 283 }, { "epoch": 0.03, "grad_norm": 4.732400299622023, "learning_rate": 9.93006993006993e-06, "loss": 0.7922, "step": 284 }, { "epoch": 0.03, "grad_norm": 2.878974528064686, "learning_rate": 9.965034965034966e-06, "loss": 0.8064, "step": 285 }, { "epoch": 0.03, "grad_norm": 4.264526689698259, "learning_rate": 1e-05, "loss": 0.8588, "step": 286 }, { "epoch": 0.03, "grad_norm": 3.20198908359177, "learning_rate": 9.999999709557228e-06, "loss": 0.8367, "step": 287 }, { "epoch": 0.03, "grad_norm": 20.715771128490395, "learning_rate": 9.999998838228941e-06, "loss": 0.7669, "step": 288 }, { "epoch": 0.03, "grad_norm": 3.3540036271091456, "learning_rate": 9.999997386015244e-06, "loss": 0.863, "step": 289 }, { "epoch": 0.03, "grad_norm": 3.1848307160272125, "learning_rate": 9.999995352916303e-06, "loss": 0.7439, "step": 290 }, { "epoch": 0.03, "grad_norm": 3.152572478387918, "learning_rate": 9.999992738932357e-06, "loss": 0.822, "step": 291 }, { "epoch": 0.03, "grad_norm": 3.5369549798522573, "learning_rate": 9.999989544063708e-06, "loss": 0.7687, "step": 292 }, { "epoch": 0.03, "grad_norm": 1.5501867277358583, "learning_rate": 9.999985768310726e-06, "loss": 0.6133, "step": 293 }, { "epoch": 0.03, "grad_norm": 3.319103455407901, "learning_rate": 9.99998141167385e-06, "loss": 0.7729, "step": 294 }, { "epoch": 0.03, "grad_norm": 3.687148563168577, "learning_rate": 9.999976474153589e-06, "loss": 0.8048, "step": 295 }, { "epoch": 0.03, "grad_norm": 2.984437946192857, "learning_rate": 9.999970955750516e-06, "loss": 0.7737, "step": 296 }, { "epoch": 0.03, "grad_norm": 4.5653321300359915, "learning_rate": 9.999964856465268e-06, "loss": 0.7832, "step": 297 }, { "epoch": 0.03, "grad_norm": 3.6017277018504, "learning_rate": 9.999958176298559e-06, "loss": 0.8802, "step": 298 }, { "epoch": 0.03, "grad_norm": 3.4928074350110396, "learning_rate": 9.99995091525116e-06, "loss": 0.8067, "step": 299 }, { "epoch": 0.03, "grad_norm": 2.7767296913858037, "learning_rate": 9.999943073323919e-06, "loss": 0.7946, "step": 300 }, { "epoch": 0.03, "grad_norm": 3.560126265511432, "learning_rate": 9.999934650517743e-06, "loss": 0.7549, "step": 301 }, { "epoch": 0.03, "grad_norm": 3.1779121129553207, "learning_rate": 9.999925646833614e-06, "loss": 0.8234, "step": 302 }, { "epoch": 0.03, "grad_norm": 2.6269027998737364, "learning_rate": 9.999916062272576e-06, "loss": 0.747, "step": 303 }, { "epoch": 0.03, "grad_norm": 3.0096315209518854, "learning_rate": 9.999905896835745e-06, "loss": 0.8391, "step": 304 }, { "epoch": 0.03, "grad_norm": 3.6570640261588303, "learning_rate": 9.999895150524297e-06, "loss": 0.8514, "step": 305 }, { "epoch": 0.03, "grad_norm": 2.7168969428743295, "learning_rate": 9.999883823339487e-06, "loss": 0.7491, "step": 306 }, { "epoch": 0.03, "grad_norm": 2.929508346366845, "learning_rate": 9.999871915282625e-06, "loss": 0.8162, "step": 307 }, { "epoch": 0.03, "grad_norm": 3.431569543759362, "learning_rate": 9.999859426355098e-06, "loss": 0.8174, "step": 308 }, { "epoch": 0.03, "grad_norm": 3.6342519486169595, "learning_rate": 9.999846356558356e-06, "loss": 0.8412, "step": 309 }, { "epoch": 0.03, "grad_norm": 2.773720032260141, "learning_rate": 9.999832705893919e-06, "loss": 0.8367, "step": 310 }, { "epoch": 0.03, "grad_norm": 2.9812086033455607, "learning_rate": 9.999818474363368e-06, "loss": 0.8288, "step": 311 }, { "epoch": 0.03, "grad_norm": 3.9790057425352723, "learning_rate": 9.999803661968361e-06, "loss": 0.8828, "step": 312 }, { "epoch": 0.03, "grad_norm": 3.06724404084709, "learning_rate": 9.999788268710619e-06, "loss": 0.8277, "step": 313 }, { "epoch": 0.03, "grad_norm": 3.020362580767787, "learning_rate": 9.999772294591927e-06, "loss": 0.7538, "step": 314 }, { "epoch": 0.03, "grad_norm": 3.4612261352729305, "learning_rate": 9.999755739614144e-06, "loss": 0.7895, "step": 315 }, { "epoch": 0.03, "grad_norm": 3.272614641342335, "learning_rate": 9.999738603779192e-06, "loss": 0.6866, "step": 316 }, { "epoch": 0.03, "grad_norm": 1.7980881010106364, "learning_rate": 9.999720887089062e-06, "loss": 0.6429, "step": 317 }, { "epoch": 0.03, "grad_norm": 3.3263471950749786, "learning_rate": 9.99970258954581e-06, "loss": 0.8188, "step": 318 }, { "epoch": 0.03, "grad_norm": 3.0088862503780187, "learning_rate": 9.999683711151565e-06, "loss": 0.7195, "step": 319 }, { "epoch": 0.03, "grad_norm": 2.87971268033177, "learning_rate": 9.99966425190852e-06, "loss": 0.8251, "step": 320 }, { "epoch": 0.03, "grad_norm": 2.8366240584831233, "learning_rate": 9.999644211818934e-06, "loss": 0.8274, "step": 321 }, { "epoch": 0.03, "grad_norm": 3.3752232102724986, "learning_rate": 9.999623590885135e-06, "loss": 0.7519, "step": 322 }, { "epoch": 0.03, "grad_norm": 3.248073936944215, "learning_rate": 9.999602389109521e-06, "loss": 0.8161, "step": 323 }, { "epoch": 0.03, "grad_norm": 2.7887278560705093, "learning_rate": 9.999580606494554e-06, "loss": 0.8526, "step": 324 }, { "epoch": 0.03, "grad_norm": 2.6703840967870525, "learning_rate": 9.999558243042763e-06, "loss": 0.763, "step": 325 }, { "epoch": 0.03, "grad_norm": 3.653691408190642, "learning_rate": 9.999535298756749e-06, "loss": 0.8728, "step": 326 }, { "epoch": 0.03, "grad_norm": 2.6557511065861665, "learning_rate": 9.999511773639177e-06, "loss": 0.8372, "step": 327 }, { "epoch": 0.03, "grad_norm": 2.9851651458699573, "learning_rate": 9.999487667692778e-06, "loss": 0.8749, "step": 328 }, { "epoch": 0.03, "grad_norm": 3.556775744280704, "learning_rate": 9.999462980920353e-06, "loss": 0.9202, "step": 329 }, { "epoch": 0.03, "grad_norm": 3.1780645043985722, "learning_rate": 9.99943771332477e-06, "loss": 0.7793, "step": 330 }, { "epoch": 0.03, "grad_norm": 2.50054261428372, "learning_rate": 9.999411864908967e-06, "loss": 0.7445, "step": 331 }, { "epoch": 0.03, "grad_norm": 8.416951605036898, "learning_rate": 9.999385435675947e-06, "loss": 0.7777, "step": 332 }, { "epoch": 0.04, "grad_norm": 1.7297205360660906, "learning_rate": 9.999358425628777e-06, "loss": 0.6569, "step": 333 }, { "epoch": 0.04, "grad_norm": 2.763919036592778, "learning_rate": 9.999330834770598e-06, "loss": 0.7999, "step": 334 }, { "epoch": 0.04, "grad_norm": 3.3468910031579515, "learning_rate": 9.999302663104611e-06, "loss": 0.8996, "step": 335 }, { "epoch": 0.04, "grad_norm": 2.52656655880427, "learning_rate": 9.999273910634095e-06, "loss": 0.8481, "step": 336 }, { "epoch": 0.04, "grad_norm": 3.0210600507512853, "learning_rate": 9.999244577362388e-06, "loss": 0.7786, "step": 337 }, { "epoch": 0.04, "grad_norm": 2.449552196852401, "learning_rate": 9.999214663292896e-06, "loss": 0.7566, "step": 338 }, { "epoch": 0.04, "grad_norm": 3.5228157643635942, "learning_rate": 9.999184168429095e-06, "loss": 0.8273, "step": 339 }, { "epoch": 0.04, "grad_norm": 2.8127747250358417, "learning_rate": 9.99915309277453e-06, "loss": 0.7628, "step": 340 }, { "epoch": 0.04, "grad_norm": 2.9561302000674052, "learning_rate": 9.999121436332809e-06, "loss": 0.7372, "step": 341 }, { "epoch": 0.04, "grad_norm": 2.635610731073348, "learning_rate": 9.99908919910761e-06, "loss": 0.8471, "step": 342 }, { "epoch": 0.04, "grad_norm": 2.974938379443744, "learning_rate": 9.99905638110268e-06, "loss": 0.8192, "step": 343 }, { "epoch": 0.04, "grad_norm": 3.7322849018695665, "learning_rate": 9.99902298232183e-06, "loss": 0.8051, "step": 344 }, { "epoch": 0.04, "grad_norm": 2.989115971364437, "learning_rate": 9.998989002768939e-06, "loss": 0.7731, "step": 345 }, { "epoch": 0.04, "grad_norm": 2.7573253684038, "learning_rate": 9.99895444244796e-06, "loss": 0.7253, "step": 346 }, { "epoch": 0.04, "grad_norm": 3.0060806455923066, "learning_rate": 9.998919301362902e-06, "loss": 0.8549, "step": 347 }, { "epoch": 0.04, "grad_norm": 2.813781196605346, "learning_rate": 9.998883579517849e-06, "loss": 0.8518, "step": 348 }, { "epoch": 0.04, "grad_norm": 3.0126862479542558, "learning_rate": 9.998847276916953e-06, "loss": 0.7217, "step": 349 }, { "epoch": 0.04, "grad_norm": 2.5842429131791644, "learning_rate": 9.99881039356443e-06, "loss": 0.8056, "step": 350 }, { "epoch": 0.04, "grad_norm": 3.153516334642533, "learning_rate": 9.998772929464567e-06, "loss": 0.827, "step": 351 }, { "epoch": 0.04, "grad_norm": 2.955277320770189, "learning_rate": 9.998734884621714e-06, "loss": 0.8264, "step": 352 }, { "epoch": 0.04, "grad_norm": 2.3636272885829444, "learning_rate": 9.998696259040292e-06, "loss": 0.8436, "step": 353 }, { "epoch": 0.04, "grad_norm": 2.6445985660522586, "learning_rate": 9.99865705272479e-06, "loss": 0.7755, "step": 354 }, { "epoch": 0.04, "grad_norm": 3.237081162467692, "learning_rate": 9.99861726567976e-06, "loss": 0.7811, "step": 355 }, { "epoch": 0.04, "grad_norm": 2.6981850184698, "learning_rate": 9.998576897909826e-06, "loss": 0.7442, "step": 356 }, { "epoch": 0.04, "grad_norm": 3.1062173431704796, "learning_rate": 9.998535949419676e-06, "loss": 0.8203, "step": 357 }, { "epoch": 0.04, "grad_norm": 3.337004723227758, "learning_rate": 9.99849442021407e-06, "loss": 0.8208, "step": 358 }, { "epoch": 0.04, "grad_norm": 7.14701609390736, "learning_rate": 9.99845231029783e-06, "loss": 0.8006, "step": 359 }, { "epoch": 0.04, "grad_norm": 2.19324654121165, "learning_rate": 9.998409619675852e-06, "loss": 0.8317, "step": 360 }, { "epoch": 0.04, "grad_norm": 3.676166242806993, "learning_rate": 9.998366348353092e-06, "loss": 0.7146, "step": 361 }, { "epoch": 0.04, "grad_norm": 1.7837495615392405, "learning_rate": 9.998322496334579e-06, "loss": 0.6307, "step": 362 }, { "epoch": 0.04, "grad_norm": 2.6924764025176575, "learning_rate": 9.998278063625407e-06, "loss": 0.788, "step": 363 }, { "epoch": 0.04, "grad_norm": 2.771669821577446, "learning_rate": 9.998233050230737e-06, "loss": 0.7719, "step": 364 }, { "epoch": 0.04, "grad_norm": 2.7022314056519003, "learning_rate": 9.9981874561558e-06, "loss": 0.7276, "step": 365 }, { "epoch": 0.04, "grad_norm": 3.4443745382241926, "learning_rate": 9.998141281405892e-06, "loss": 0.7159, "step": 366 }, { "epoch": 0.04, "grad_norm": 2.638255448321009, "learning_rate": 9.99809452598638e-06, "loss": 0.8132, "step": 367 }, { "epoch": 0.04, "grad_norm": 3.342150185068803, "learning_rate": 9.998047189902693e-06, "loss": 0.7607, "step": 368 }, { "epoch": 0.04, "grad_norm": 2.415218070936139, "learning_rate": 9.997999273160333e-06, "loss": 0.7152, "step": 369 }, { "epoch": 0.04, "grad_norm": 4.393978658986462, "learning_rate": 9.997950775764862e-06, "loss": 0.8176, "step": 370 }, { "epoch": 0.04, "grad_norm": 2.590618917457282, "learning_rate": 9.99790169772192e-06, "loss": 0.7372, "step": 371 }, { "epoch": 0.04, "grad_norm": 3.4930700657768323, "learning_rate": 9.997852039037206e-06, "loss": 0.823, "step": 372 }, { "epoch": 0.04, "grad_norm": 2.487379499776839, "learning_rate": 9.99780179971649e-06, "loss": 0.6554, "step": 373 }, { "epoch": 0.04, "grad_norm": 2.624919386906931, "learning_rate": 9.997750979765606e-06, "loss": 0.8252, "step": 374 }, { "epoch": 0.04, "grad_norm": 2.6531319266118483, "learning_rate": 9.997699579190462e-06, "loss": 0.9001, "step": 375 }, { "epoch": 0.04, "grad_norm": 2.769920692517521, "learning_rate": 9.997647597997025e-06, "loss": 0.8726, "step": 376 }, { "epoch": 0.04, "grad_norm": 2.8611168209874145, "learning_rate": 9.997595036191338e-06, "loss": 0.8799, "step": 377 }, { "epoch": 0.04, "grad_norm": 2.704800856083731, "learning_rate": 9.997541893779507e-06, "loss": 0.8497, "step": 378 }, { "epoch": 0.04, "grad_norm": 2.4415710077044612, "learning_rate": 9.997488170767706e-06, "loss": 0.8527, "step": 379 }, { "epoch": 0.04, "grad_norm": 3.050991550010565, "learning_rate": 9.997433867162174e-06, "loss": 0.7829, "step": 380 }, { "epoch": 0.04, "grad_norm": 2.869557820847034, "learning_rate": 9.997378982969223e-06, "loss": 0.8413, "step": 381 }, { "epoch": 0.04, "grad_norm": 2.5652433421128276, "learning_rate": 9.997323518195227e-06, "loss": 0.7852, "step": 382 }, { "epoch": 0.04, "grad_norm": 3.0907376752843794, "learning_rate": 9.99726747284663e-06, "loss": 0.8476, "step": 383 }, { "epoch": 0.04, "grad_norm": 2.576052254739924, "learning_rate": 9.997210846929945e-06, "loss": 0.7156, "step": 384 }, { "epoch": 0.04, "grad_norm": 2.957239805878372, "learning_rate": 9.997153640451748e-06, "loss": 0.7981, "step": 385 }, { "epoch": 0.04, "grad_norm": 2.406716857617946, "learning_rate": 9.997095853418685e-06, "loss": 0.7176, "step": 386 }, { "epoch": 0.04, "grad_norm": 2.5783679058938773, "learning_rate": 9.997037485837474e-06, "loss": 0.8001, "step": 387 }, { "epoch": 0.04, "grad_norm": 3.2799386939664985, "learning_rate": 9.996978537714891e-06, "loss": 0.7861, "step": 388 }, { "epoch": 0.04, "grad_norm": 2.6320652922884773, "learning_rate": 9.996919009057787e-06, "loss": 0.844, "step": 389 }, { "epoch": 0.04, "grad_norm": 3.079074473308074, "learning_rate": 9.996858899873076e-06, "loss": 0.8376, "step": 390 }, { "epoch": 0.04, "grad_norm": 2.810909666983869, "learning_rate": 9.996798210167745e-06, "loss": 0.8102, "step": 391 }, { "epoch": 0.04, "grad_norm": 2.822002229623917, "learning_rate": 9.996736939948838e-06, "loss": 0.7358, "step": 392 }, { "epoch": 0.04, "grad_norm": 2.553150551330481, "learning_rate": 9.996675089223481e-06, "loss": 0.8613, "step": 393 }, { "epoch": 0.04, "grad_norm": 2.6973816176709278, "learning_rate": 9.996612657998856e-06, "loss": 0.7688, "step": 394 }, { "epoch": 0.04, "grad_norm": 3.017782068668808, "learning_rate": 9.996549646282214e-06, "loss": 0.8728, "step": 395 }, { "epoch": 0.04, "grad_norm": 2.4233265237128307, "learning_rate": 9.99648605408088e-06, "loss": 0.7584, "step": 396 }, { "epoch": 0.04, "grad_norm": 2.5786786252897844, "learning_rate": 9.996421881402238e-06, "loss": 0.8619, "step": 397 }, { "epoch": 0.04, "grad_norm": 2.733216464863266, "learning_rate": 9.996357128253747e-06, "loss": 0.7994, "step": 398 }, { "epoch": 0.04, "grad_norm": 2.1790269474955832, "learning_rate": 9.996291794642924e-06, "loss": 0.7871, "step": 399 }, { "epoch": 0.04, "grad_norm": 2.506896691185903, "learning_rate": 9.996225880577366e-06, "loss": 0.7843, "step": 400 }, { "epoch": 0.04, "grad_norm": 2.3733740763366336, "learning_rate": 9.996159386064728e-06, "loss": 0.743, "step": 401 }, { "epoch": 0.04, "grad_norm": 2.3332932132080813, "learning_rate": 9.996092311112734e-06, "loss": 0.7422, "step": 402 }, { "epoch": 0.04, "grad_norm": 2.521593140693123, "learning_rate": 9.996024655729177e-06, "loss": 0.8187, "step": 403 }, { "epoch": 0.04, "grad_norm": 2.7935109625374253, "learning_rate": 9.99595641992192e-06, "loss": 0.8251, "step": 404 }, { "epoch": 0.04, "grad_norm": 8.26279670060322, "learning_rate": 9.995887603698886e-06, "loss": 0.7706, "step": 405 }, { "epoch": 0.04, "grad_norm": 2.41303802576548, "learning_rate": 9.99581820706807e-06, "loss": 0.7678, "step": 406 }, { "epoch": 0.04, "grad_norm": 3.0303506168562255, "learning_rate": 9.99574823003754e-06, "loss": 0.7319, "step": 407 }, { "epoch": 0.04, "grad_norm": 3.486164975875137, "learning_rate": 9.99567767261542e-06, "loss": 0.8546, "step": 408 }, { "epoch": 0.04, "grad_norm": 2.9414654462050036, "learning_rate": 9.995606534809909e-06, "loss": 0.77, "step": 409 }, { "epoch": 0.04, "grad_norm": 2.5654915500442366, "learning_rate": 9.995534816629271e-06, "loss": 0.7913, "step": 410 }, { "epoch": 0.04, "grad_norm": 2.6295512034240995, "learning_rate": 9.99546251808184e-06, "loss": 0.8889, "step": 411 }, { "epoch": 0.04, "grad_norm": 2.789997301197883, "learning_rate": 9.995389639176013e-06, "loss": 0.848, "step": 412 }, { "epoch": 0.04, "grad_norm": 2.9172960774366037, "learning_rate": 9.995316179920258e-06, "loss": 0.729, "step": 413 }, { "epoch": 0.04, "grad_norm": 2.5099680383958822, "learning_rate": 9.99524214032311e-06, "loss": 0.7368, "step": 414 }, { "epoch": 0.04, "grad_norm": 2.516008292563799, "learning_rate": 9.99516752039317e-06, "loss": 0.8693, "step": 415 }, { "epoch": 0.04, "grad_norm": 2.2755006768965584, "learning_rate": 9.995092320139106e-06, "loss": 0.7947, "step": 416 }, { "epoch": 0.04, "grad_norm": 2.276675967191438, "learning_rate": 9.995016539569656e-06, "loss": 0.8244, "step": 417 }, { "epoch": 0.04, "grad_norm": 2.4161022069453173, "learning_rate": 9.994940178693624e-06, "loss": 0.8016, "step": 418 }, { "epoch": 0.04, "grad_norm": 3.8339702744181325, "learning_rate": 9.99486323751988e-06, "loss": 0.7548, "step": 419 }, { "epoch": 0.04, "grad_norm": 4.975612430718834, "learning_rate": 9.994785716057364e-06, "loss": 0.7163, "step": 420 }, { "epoch": 0.04, "grad_norm": 2.5503239423291677, "learning_rate": 9.994707614315084e-06, "loss": 0.7693, "step": 421 }, { "epoch": 0.04, "grad_norm": 2.8040417675794083, "learning_rate": 9.99462893230211e-06, "loss": 0.7943, "step": 422 }, { "epoch": 0.04, "grad_norm": 3.6155920470952188, "learning_rate": 9.994549670027584e-06, "loss": 0.8104, "step": 423 }, { "epoch": 0.04, "grad_norm": 3.6688952806199437, "learning_rate": 9.994469827500716e-06, "loss": 0.7914, "step": 424 }, { "epoch": 0.04, "grad_norm": 2.784372060150453, "learning_rate": 9.99438940473078e-06, "loss": 0.7975, "step": 425 }, { "epoch": 0.04, "grad_norm": 2.5929689621810783, "learning_rate": 9.994308401727122e-06, "loss": 0.8029, "step": 426 }, { "epoch": 0.04, "grad_norm": 2.827200496684302, "learning_rate": 9.99422681849915e-06, "loss": 0.8063, "step": 427 }, { "epoch": 0.05, "grad_norm": 2.420731477742907, "learning_rate": 9.994144655056343e-06, "loss": 0.7089, "step": 428 }, { "epoch": 0.05, "grad_norm": 3.1411562499918078, "learning_rate": 9.994061911408245e-06, "loss": 0.8219, "step": 429 }, { "epoch": 0.05, "grad_norm": 5.296664953262252, "learning_rate": 9.993978587564473e-06, "loss": 0.748, "step": 430 }, { "epoch": 0.05, "grad_norm": 3.786778377368977, "learning_rate": 9.993894683534704e-06, "loss": 0.79, "step": 431 }, { "epoch": 0.05, "grad_norm": 4.9069867693590075, "learning_rate": 9.993810199328687e-06, "loss": 0.7826, "step": 432 }, { "epoch": 0.05, "grad_norm": 3.7740885681631293, "learning_rate": 9.993725134956235e-06, "loss": 0.8472, "step": 433 }, { "epoch": 0.05, "grad_norm": 3.0792423971075777, "learning_rate": 9.993639490427235e-06, "loss": 0.7272, "step": 434 }, { "epoch": 0.05, "grad_norm": 2.505248944163071, "learning_rate": 9.993553265751632e-06, "loss": 0.8125, "step": 435 }, { "epoch": 0.05, "grad_norm": 3.1705332082489175, "learning_rate": 9.993466460939447e-06, "loss": 0.7515, "step": 436 }, { "epoch": 0.05, "grad_norm": 3.068737573080172, "learning_rate": 9.993379076000762e-06, "loss": 0.6471, "step": 437 }, { "epoch": 0.05, "grad_norm": 3.0565141356797696, "learning_rate": 9.99329111094573e-06, "loss": 0.7869, "step": 438 }, { "epoch": 0.05, "grad_norm": 2.418500158534013, "learning_rate": 9.993202565784573e-06, "loss": 0.8211, "step": 439 }, { "epoch": 0.05, "grad_norm": 2.423639948102629, "learning_rate": 9.993113440527573e-06, "loss": 0.815, "step": 440 }, { "epoch": 0.05, "grad_norm": 2.2704412010849007, "learning_rate": 9.993023735185088e-06, "loss": 0.7843, "step": 441 }, { "epoch": 0.05, "grad_norm": 2.521200294230519, "learning_rate": 9.992933449767538e-06, "loss": 0.7483, "step": 442 }, { "epoch": 0.05, "grad_norm": 2.706970229721735, "learning_rate": 9.992842584285416e-06, "loss": 0.7187, "step": 443 }, { "epoch": 0.05, "grad_norm": 2.390640870602009, "learning_rate": 9.992751138749273e-06, "loss": 0.7721, "step": 444 }, { "epoch": 0.05, "grad_norm": 2.5982385192863124, "learning_rate": 9.992659113169736e-06, "loss": 0.7153, "step": 445 }, { "epoch": 0.05, "grad_norm": 2.4856303794193093, "learning_rate": 9.992566507557495e-06, "loss": 0.7573, "step": 446 }, { "epoch": 0.05, "grad_norm": 2.5444768288105166, "learning_rate": 9.99247332192331e-06, "loss": 0.7668, "step": 447 }, { "epoch": 0.05, "grad_norm": 2.040260161584014, "learning_rate": 9.992379556278006e-06, "loss": 0.7644, "step": 448 }, { "epoch": 0.05, "grad_norm": 2.3952337813826174, "learning_rate": 9.992285210632476e-06, "loss": 0.7813, "step": 449 }, { "epoch": 0.05, "grad_norm": 2.7875393710927003, "learning_rate": 9.992190284997683e-06, "loss": 0.8625, "step": 450 }, { "epoch": 0.05, "grad_norm": 2.3456211144367436, "learning_rate": 9.992094779384651e-06, "loss": 0.7421, "step": 451 }, { "epoch": 0.05, "grad_norm": 2.2941981241460914, "learning_rate": 9.991998693804482e-06, "loss": 0.786, "step": 452 }, { "epoch": 0.05, "grad_norm": 2.7710253391308113, "learning_rate": 9.991902028268333e-06, "loss": 0.8221, "step": 453 }, { "epoch": 0.05, "grad_norm": 2.8127605742355093, "learning_rate": 9.991804782787435e-06, "loss": 0.8029, "step": 454 }, { "epoch": 0.05, "grad_norm": 2.21579763303864, "learning_rate": 9.991706957373088e-06, "loss": 0.8484, "step": 455 }, { "epoch": 0.05, "grad_norm": 3.0567607175438702, "learning_rate": 9.991608552036659e-06, "loss": 0.7895, "step": 456 }, { "epoch": 0.05, "grad_norm": 2.337491818683799, "learning_rate": 9.991509566789575e-06, "loss": 0.8293, "step": 457 }, { "epoch": 0.05, "grad_norm": 2.0862821534316573, "learning_rate": 9.991410001643338e-06, "loss": 0.7856, "step": 458 }, { "epoch": 0.05, "grad_norm": 2.278358994242391, "learning_rate": 9.991309856609517e-06, "loss": 0.7536, "step": 459 }, { "epoch": 0.05, "grad_norm": 1.9648242121722526, "learning_rate": 9.991209131699745e-06, "loss": 0.7508, "step": 460 }, { "epoch": 0.05, "grad_norm": 2.454545521761428, "learning_rate": 9.991107826925724e-06, "loss": 0.7682, "step": 461 }, { "epoch": 0.05, "grad_norm": 2.3135550329147647, "learning_rate": 9.991005942299224e-06, "loss": 0.794, "step": 462 }, { "epoch": 0.05, "grad_norm": 2.958497853786959, "learning_rate": 9.990903477832081e-06, "loss": 0.884, "step": 463 }, { "epoch": 0.05, "grad_norm": 2.559497699524832, "learning_rate": 9.990800433536198e-06, "loss": 0.7475, "step": 464 }, { "epoch": 0.05, "grad_norm": 2.2979423649720405, "learning_rate": 9.990696809423551e-06, "loss": 0.8139, "step": 465 }, { "epoch": 0.05, "grad_norm": 3.477081620098315, "learning_rate": 9.990592605506172e-06, "loss": 0.7413, "step": 466 }, { "epoch": 0.05, "grad_norm": 4.145253092459398, "learning_rate": 9.990487821796171e-06, "loss": 0.816, "step": 467 }, { "epoch": 0.05, "grad_norm": 2.6671842619211605, "learning_rate": 9.99038245830572e-06, "loss": 0.7634, "step": 468 }, { "epoch": 0.05, "grad_norm": 2.4700914843679307, "learning_rate": 9.990276515047063e-06, "loss": 0.7116, "step": 469 }, { "epoch": 0.05, "grad_norm": 2.2107804543409255, "learning_rate": 9.990169992032506e-06, "loss": 0.8021, "step": 470 }, { "epoch": 0.05, "grad_norm": 2.1867548251004902, "learning_rate": 9.990062889274423e-06, "loss": 0.8221, "step": 471 }, { "epoch": 0.05, "grad_norm": 3.1546753580491793, "learning_rate": 9.989955206785258e-06, "loss": 0.8515, "step": 472 }, { "epoch": 0.05, "grad_norm": 2.845940673194977, "learning_rate": 9.989846944577524e-06, "loss": 0.7823, "step": 473 }, { "epoch": 0.05, "grad_norm": 2.782847174447964, "learning_rate": 9.989738102663792e-06, "loss": 0.8043, "step": 474 }, { "epoch": 0.05, "grad_norm": 1.5304700280693413, "learning_rate": 9.989628681056716e-06, "loss": 0.6264, "step": 475 }, { "epoch": 0.05, "grad_norm": 2.4549669242826684, "learning_rate": 9.989518679769e-06, "loss": 0.8646, "step": 476 }, { "epoch": 0.05, "grad_norm": 2.704340279572477, "learning_rate": 9.989408098813429e-06, "loss": 0.8055, "step": 477 }, { "epoch": 0.05, "grad_norm": 2.5055698906785566, "learning_rate": 9.989296938202846e-06, "loss": 0.7233, "step": 478 }, { "epoch": 0.05, "grad_norm": 3.2218415084935477, "learning_rate": 9.989185197950168e-06, "loss": 0.8227, "step": 479 }, { "epoch": 0.05, "grad_norm": 2.3617802926725933, "learning_rate": 9.989072878068376e-06, "loss": 0.8324, "step": 480 }, { "epoch": 0.05, "grad_norm": 2.4468315450678713, "learning_rate": 9.98895997857052e-06, "loss": 0.7699, "step": 481 }, { "epoch": 0.05, "grad_norm": 2.667742814508917, "learning_rate": 9.988846499469714e-06, "loss": 0.9051, "step": 482 }, { "epoch": 0.05, "grad_norm": 2.7213442322657078, "learning_rate": 9.988732440779145e-06, "loss": 0.7052, "step": 483 }, { "epoch": 0.05, "grad_norm": 2.326845379917123, "learning_rate": 9.98861780251206e-06, "loss": 0.7763, "step": 484 }, { "epoch": 0.05, "grad_norm": 2.9136376815635368, "learning_rate": 9.98850258468178e-06, "loss": 0.7103, "step": 485 }, { "epoch": 0.05, "grad_norm": 2.2455870076795366, "learning_rate": 9.988386787301689e-06, "loss": 0.8498, "step": 486 }, { "epoch": 0.05, "grad_norm": 2.697658986196603, "learning_rate": 9.988270410385242e-06, "loss": 0.7247, "step": 487 }, { "epoch": 0.05, "grad_norm": 2.6037250054010124, "learning_rate": 9.98815345394596e-06, "loss": 0.7998, "step": 488 }, { "epoch": 0.05, "grad_norm": 2.2732857324114515, "learning_rate": 9.988035917997426e-06, "loss": 0.7466, "step": 489 }, { "epoch": 0.05, "grad_norm": 2.447532792950365, "learning_rate": 9.9879178025533e-06, "loss": 0.7684, "step": 490 }, { "epoch": 0.05, "grad_norm": 2.4996822315622005, "learning_rate": 9.987799107627301e-06, "loss": 0.7892, "step": 491 }, { "epoch": 0.05, "grad_norm": 2.4392244915064354, "learning_rate": 9.987679833233219e-06, "loss": 0.7154, "step": 492 }, { "epoch": 0.05, "grad_norm": 3.01944378814734, "learning_rate": 9.987559979384913e-06, "loss": 0.8231, "step": 493 }, { "epoch": 0.05, "grad_norm": 2.3479953891689944, "learning_rate": 9.987439546096309e-06, "loss": 0.7696, "step": 494 }, { "epoch": 0.05, "grad_norm": 2.095707445377812, "learning_rate": 9.987318533381391e-06, "loss": 0.7402, "step": 495 }, { "epoch": 0.05, "grad_norm": 3.6125099373264056, "learning_rate": 9.987196941254226e-06, "loss": 0.8434, "step": 496 }, { "epoch": 0.05, "grad_norm": 2.407463112602799, "learning_rate": 9.987074769728936e-06, "loss": 0.7014, "step": 497 }, { "epoch": 0.05, "grad_norm": 2.2803041227883676, "learning_rate": 9.986952018819715e-06, "loss": 0.7015, "step": 498 }, { "epoch": 0.05, "grad_norm": 2.850670147969541, "learning_rate": 9.986828688540825e-06, "loss": 0.8059, "step": 499 }, { "epoch": 0.05, "grad_norm": 2.5894833396875727, "learning_rate": 9.986704778906594e-06, "loss": 0.8364, "step": 500 }, { "epoch": 0.05, "grad_norm": 2.310359350175941, "learning_rate": 9.986580289931416e-06, "loss": 0.854, "step": 501 }, { "epoch": 0.05, "grad_norm": 1.3368374036518098, "learning_rate": 9.986455221629754e-06, "loss": 0.5978, "step": 502 }, { "epoch": 0.05, "grad_norm": 2.4931722034378425, "learning_rate": 9.98632957401614e-06, "loss": 0.8525, "step": 503 }, { "epoch": 0.05, "grad_norm": 4.644787385474121, "learning_rate": 9.986203347105168e-06, "loss": 0.8037, "step": 504 }, { "epoch": 0.05, "grad_norm": 2.563442064696163, "learning_rate": 9.986076540911507e-06, "loss": 0.8657, "step": 505 }, { "epoch": 0.05, "grad_norm": 2.1165485781851836, "learning_rate": 9.985949155449885e-06, "loss": 0.7919, "step": 506 }, { "epoch": 0.05, "grad_norm": 3.471404486523451, "learning_rate": 9.985821190735104e-06, "loss": 0.7542, "step": 507 }, { "epoch": 0.05, "grad_norm": 2.3249602966837206, "learning_rate": 9.98569264678203e-06, "loss": 0.8402, "step": 508 }, { "epoch": 0.05, "grad_norm": 2.48495778605686, "learning_rate": 9.985563523605597e-06, "loss": 0.721, "step": 509 }, { "epoch": 0.05, "grad_norm": 2.4276988470343577, "learning_rate": 9.985433821220805e-06, "loss": 0.8305, "step": 510 }, { "epoch": 0.05, "grad_norm": 2.266343385980247, "learning_rate": 9.985303539642721e-06, "loss": 0.8618, "step": 511 }, { "epoch": 0.05, "grad_norm": 2.6368944571388737, "learning_rate": 9.985172678886486e-06, "loss": 0.7497, "step": 512 }, { "epoch": 0.05, "grad_norm": 2.3362271184394703, "learning_rate": 9.985041238967297e-06, "loss": 0.7608, "step": 513 }, { "epoch": 0.05, "grad_norm": 2.323494738235158, "learning_rate": 9.984909219900429e-06, "loss": 0.8141, "step": 514 }, { "epoch": 0.05, "grad_norm": 2.2944704925260733, "learning_rate": 9.984776621701218e-06, "loss": 0.7668, "step": 515 }, { "epoch": 0.05, "grad_norm": 2.539104026728401, "learning_rate": 9.984643444385067e-06, "loss": 0.6898, "step": 516 }, { "epoch": 0.05, "grad_norm": 2.2795736059424563, "learning_rate": 9.984509687967451e-06, "loss": 0.8352, "step": 517 }, { "epoch": 0.05, "grad_norm": 2.5063326762752127, "learning_rate": 9.984375352463908e-06, "loss": 0.7219, "step": 518 }, { "epoch": 0.05, "grad_norm": 2.875219661862659, "learning_rate": 9.984240437890045e-06, "loss": 0.7054, "step": 519 }, { "epoch": 0.05, "grad_norm": 2.8779919468667536, "learning_rate": 9.984104944261536e-06, "loss": 0.8438, "step": 520 }, { "epoch": 0.05, "grad_norm": 2.222566644720778, "learning_rate": 9.983968871594121e-06, "loss": 0.7997, "step": 521 }, { "epoch": 0.05, "grad_norm": 2.959982902693521, "learning_rate": 9.98383221990361e-06, "loss": 0.7926, "step": 522 }, { "epoch": 0.06, "grad_norm": 3.1528886906910403, "learning_rate": 9.983694989205882e-06, "loss": 0.8242, "step": 523 }, { "epoch": 0.06, "grad_norm": 2.18977821015223, "learning_rate": 9.983557179516872e-06, "loss": 0.757, "step": 524 }, { "epoch": 0.06, "grad_norm": 1.5734084664110641, "learning_rate": 9.983418790852597e-06, "loss": 0.6629, "step": 525 }, { "epoch": 0.06, "grad_norm": 2.324375311018105, "learning_rate": 9.983279823229132e-06, "loss": 0.8114, "step": 526 }, { "epoch": 0.06, "grad_norm": 3.124013190729634, "learning_rate": 9.983140276662621e-06, "loss": 0.779, "step": 527 }, { "epoch": 0.06, "grad_norm": 2.535001843346559, "learning_rate": 9.98300015116928e-06, "loss": 0.7235, "step": 528 }, { "epoch": 0.06, "grad_norm": 2.1443674722870116, "learning_rate": 9.982859446765385e-06, "loss": 0.8205, "step": 529 }, { "epoch": 0.06, "grad_norm": 3.3022865226413534, "learning_rate": 9.982718163467282e-06, "loss": 0.7919, "step": 530 }, { "epoch": 0.06, "grad_norm": 2.2210575024061376, "learning_rate": 9.982576301291387e-06, "loss": 0.8224, "step": 531 }, { "epoch": 0.06, "grad_norm": 2.266820994801646, "learning_rate": 9.982433860254181e-06, "loss": 0.8027, "step": 532 }, { "epoch": 0.06, "grad_norm": 2.8366928780887593, "learning_rate": 9.982290840372212e-06, "loss": 0.7632, "step": 533 }, { "epoch": 0.06, "grad_norm": 2.7429115320708926, "learning_rate": 9.982147241662097e-06, "loss": 0.8406, "step": 534 }, { "epoch": 0.06, "grad_norm": 2.324430888530594, "learning_rate": 9.982003064140515e-06, "loss": 0.7881, "step": 535 }, { "epoch": 0.06, "grad_norm": 2.519378774537875, "learning_rate": 9.98185830782422e-06, "loss": 0.7492, "step": 536 }, { "epoch": 0.06, "grad_norm": 2.2247325308571977, "learning_rate": 9.981712972730027e-06, "loss": 0.7321, "step": 537 }, { "epoch": 0.06, "grad_norm": 2.367419912672573, "learning_rate": 9.981567058874822e-06, "loss": 0.7559, "step": 538 }, { "epoch": 0.06, "grad_norm": 2.442752211560955, "learning_rate": 9.981420566275554e-06, "loss": 0.7823, "step": 539 }, { "epoch": 0.06, "grad_norm": 2.718090756388608, "learning_rate": 9.981273494949247e-06, "loss": 0.7689, "step": 540 }, { "epoch": 0.06, "grad_norm": 1.3168535978993352, "learning_rate": 9.981125844912985e-06, "loss": 0.6097, "step": 541 }, { "epoch": 0.06, "grad_norm": 2.39423124719434, "learning_rate": 9.98097761618392e-06, "loss": 0.8062, "step": 542 }, { "epoch": 0.06, "grad_norm": 2.6461362569919227, "learning_rate": 9.980828808779275e-06, "loss": 0.8499, "step": 543 }, { "epoch": 0.06, "grad_norm": 2.6944205009407094, "learning_rate": 9.980679422716336e-06, "loss": 0.8563, "step": 544 }, { "epoch": 0.06, "grad_norm": 2.419123667806579, "learning_rate": 9.98052945801246e-06, "loss": 0.8623, "step": 545 }, { "epoch": 0.06, "grad_norm": 2.7435396717539557, "learning_rate": 9.980378914685069e-06, "loss": 0.7444, "step": 546 }, { "epoch": 0.06, "grad_norm": 2.662949276618667, "learning_rate": 9.980227792751653e-06, "loss": 0.7362, "step": 547 }, { "epoch": 0.06, "grad_norm": 2.9210580658144902, "learning_rate": 9.980076092229767e-06, "loss": 0.7805, "step": 548 }, { "epoch": 0.06, "grad_norm": 2.6271159104021105, "learning_rate": 9.979923813137039e-06, "loss": 0.7591, "step": 549 }, { "epoch": 0.06, "grad_norm": 2.1688198714122695, "learning_rate": 9.979770955491154e-06, "loss": 0.7107, "step": 550 }, { "epoch": 0.06, "grad_norm": 3.3806001298954946, "learning_rate": 9.979617519309878e-06, "loss": 0.7611, "step": 551 }, { "epoch": 0.06, "grad_norm": 2.6186345183581548, "learning_rate": 9.97946350461103e-06, "loss": 0.8052, "step": 552 }, { "epoch": 0.06, "grad_norm": 2.434402348311883, "learning_rate": 9.979308911412508e-06, "loss": 0.7546, "step": 553 }, { "epoch": 0.06, "grad_norm": 2.1361701202500973, "learning_rate": 9.979153739732273e-06, "loss": 0.6987, "step": 554 }, { "epoch": 0.06, "grad_norm": 2.638578499216826, "learning_rate": 9.978997989588346e-06, "loss": 0.8184, "step": 555 }, { "epoch": 0.06, "grad_norm": 2.397169992403867, "learning_rate": 9.978841660998827e-06, "loss": 0.748, "step": 556 }, { "epoch": 0.06, "grad_norm": 2.6871052832438886, "learning_rate": 9.978684753981875e-06, "loss": 0.8245, "step": 557 }, { "epoch": 0.06, "grad_norm": 2.127649979618681, "learning_rate": 9.978527268555723e-06, "loss": 0.7135, "step": 558 }, { "epoch": 0.06, "grad_norm": 2.500931002963333, "learning_rate": 9.97836920473866e-06, "loss": 0.7869, "step": 559 }, { "epoch": 0.06, "grad_norm": 2.6065448044913357, "learning_rate": 9.978210562549057e-06, "loss": 0.7589, "step": 560 }, { "epoch": 0.06, "grad_norm": 2.2848105655651274, "learning_rate": 9.978051342005342e-06, "loss": 0.7439, "step": 561 }, { "epoch": 0.06, "grad_norm": 2.3300511317144, "learning_rate": 9.97789154312601e-06, "loss": 0.731, "step": 562 }, { "epoch": 0.06, "grad_norm": 2.53982707617505, "learning_rate": 9.97773116592963e-06, "loss": 0.7874, "step": 563 }, { "epoch": 0.06, "grad_norm": 3.0472971560396367, "learning_rate": 9.977570210434831e-06, "loss": 0.7261, "step": 564 }, { "epoch": 0.06, "grad_norm": 2.883803485999733, "learning_rate": 9.977408676660314e-06, "loss": 0.8079, "step": 565 }, { "epoch": 0.06, "grad_norm": 19.573290818215536, "learning_rate": 9.977246564624845e-06, "loss": 0.7604, "step": 566 }, { "epoch": 0.06, "grad_norm": 2.6716091098325245, "learning_rate": 9.977083874347258e-06, "loss": 0.6414, "step": 567 }, { "epoch": 0.06, "grad_norm": 3.6901090885051846, "learning_rate": 9.976920605846452e-06, "loss": 0.8105, "step": 568 }, { "epoch": 0.06, "grad_norm": 2.7134792536923555, "learning_rate": 9.976756759141399e-06, "loss": 0.7635, "step": 569 }, { "epoch": 0.06, "grad_norm": 2.7086458704405434, "learning_rate": 9.976592334251132e-06, "loss": 0.8302, "step": 570 }, { "epoch": 0.06, "grad_norm": 2.1306874134793508, "learning_rate": 9.976427331194753e-06, "loss": 0.7362, "step": 571 }, { "epoch": 0.06, "grad_norm": 2.45895557799812, "learning_rate": 9.976261749991433e-06, "loss": 0.7854, "step": 572 }, { "epoch": 0.06, "grad_norm": 2.2266882437527475, "learning_rate": 9.976095590660407e-06, "loss": 0.8152, "step": 573 }, { "epoch": 0.06, "grad_norm": 2.363404791350861, "learning_rate": 9.975928853220979e-06, "loss": 0.8157, "step": 574 }, { "epoch": 0.06, "grad_norm": 2.4298149385499475, "learning_rate": 9.975761537692522e-06, "loss": 0.7796, "step": 575 }, { "epoch": 0.06, "grad_norm": 2.2830133914169424, "learning_rate": 9.975593644094472e-06, "loss": 0.7881, "step": 576 }, { "epoch": 0.06, "grad_norm": 5.164741876077585, "learning_rate": 9.975425172446336e-06, "loss": 0.6478, "step": 577 }, { "epoch": 0.06, "grad_norm": 2.1615284561481283, "learning_rate": 9.975256122767687e-06, "loss": 0.7038, "step": 578 }, { "epoch": 0.06, "grad_norm": 2.8554861521662316, "learning_rate": 9.975086495078161e-06, "loss": 0.8082, "step": 579 }, { "epoch": 0.06, "grad_norm": 2.8793525267467426, "learning_rate": 9.974916289397469e-06, "loss": 0.7612, "step": 580 }, { "epoch": 0.06, "grad_norm": 2.2817512397632775, "learning_rate": 9.974745505745385e-06, "loss": 0.7507, "step": 581 }, { "epoch": 0.06, "grad_norm": 2.3293064382183752, "learning_rate": 9.974574144141746e-06, "loss": 0.782, "step": 582 }, { "epoch": 0.06, "grad_norm": 2.420863828032633, "learning_rate": 9.974402204606464e-06, "loss": 0.7904, "step": 583 }, { "epoch": 0.06, "grad_norm": 2.467148177657104, "learning_rate": 9.974229687159515e-06, "loss": 0.7148, "step": 584 }, { "epoch": 0.06, "grad_norm": 2.3692761510748506, "learning_rate": 9.974056591820937e-06, "loss": 0.7572, "step": 585 }, { "epoch": 0.06, "grad_norm": 2.410599414340472, "learning_rate": 9.973882918610845e-06, "loss": 0.7463, "step": 586 }, { "epoch": 0.06, "grad_norm": 2.407706605193204, "learning_rate": 9.973708667549413e-06, "loss": 0.7719, "step": 587 }, { "epoch": 0.06, "grad_norm": 2.1487892620296885, "learning_rate": 9.973533838656886e-06, "loss": 0.737, "step": 588 }, { "epoch": 0.06, "grad_norm": 2.983088881371681, "learning_rate": 9.973358431953574e-06, "loss": 0.8082, "step": 589 }, { "epoch": 0.06, "grad_norm": 4.84274621933108, "learning_rate": 9.973182447459856e-06, "loss": 0.8005, "step": 590 }, { "epoch": 0.06, "grad_norm": 2.514268336749957, "learning_rate": 9.973005885196177e-06, "loss": 0.8384, "step": 591 }, { "epoch": 0.06, "grad_norm": 2.9786767920760076, "learning_rate": 9.97282874518305e-06, "loss": 0.8312, "step": 592 }, { "epoch": 0.06, "grad_norm": 2.5407053182524333, "learning_rate": 9.972651027441053e-06, "loss": 0.8417, "step": 593 }, { "epoch": 0.06, "grad_norm": 2.2034669895851526, "learning_rate": 9.972472731990836e-06, "loss": 0.7066, "step": 594 }, { "epoch": 0.06, "grad_norm": 2.3153560832834725, "learning_rate": 9.972293858853111e-06, "loss": 0.7032, "step": 595 }, { "epoch": 0.06, "grad_norm": 1.1674873013648661, "learning_rate": 9.972114408048658e-06, "loss": 0.6294, "step": 596 }, { "epoch": 0.06, "grad_norm": 2.4983825264182977, "learning_rate": 9.971934379598327e-06, "loss": 0.8138, "step": 597 }, { "epoch": 0.06, "grad_norm": 2.5640167266728517, "learning_rate": 9.971753773523032e-06, "loss": 0.7516, "step": 598 }, { "epoch": 0.06, "grad_norm": 2.819664317552869, "learning_rate": 9.971572589843754e-06, "loss": 0.7164, "step": 599 }, { "epoch": 0.06, "grad_norm": 2.8700762251626, "learning_rate": 9.971390828581546e-06, "loss": 0.7128, "step": 600 }, { "epoch": 0.06, "grad_norm": 3.2350947513381, "learning_rate": 9.971208489757522e-06, "loss": 0.7065, "step": 601 }, { "epoch": 0.06, "grad_norm": 6.78868545134538, "learning_rate": 9.971025573392863e-06, "loss": 0.7491, "step": 602 }, { "epoch": 0.06, "grad_norm": 2.620861295267675, "learning_rate": 9.970842079508827e-06, "loss": 0.8286, "step": 603 }, { "epoch": 0.06, "grad_norm": 2.387001074929076, "learning_rate": 9.970658008126725e-06, "loss": 0.7537, "step": 604 }, { "epoch": 0.06, "grad_norm": 2.356795460296981, "learning_rate": 9.970473359267945e-06, "loss": 0.7542, "step": 605 }, { "epoch": 0.06, "grad_norm": 1.9886458699286347, "learning_rate": 9.970288132953938e-06, "loss": 0.6917, "step": 606 }, { "epoch": 0.06, "grad_norm": 2.2751385664982706, "learning_rate": 9.970102329206221e-06, "loss": 0.7728, "step": 607 }, { "epoch": 0.06, "grad_norm": 2.495226626436587, "learning_rate": 9.969915948046387e-06, "loss": 0.6747, "step": 608 }, { "epoch": 0.06, "grad_norm": 1.4123037303465609, "learning_rate": 9.969728989496081e-06, "loss": 0.6243, "step": 609 }, { "epoch": 0.06, "grad_norm": 2.510919227428744, "learning_rate": 9.96954145357703e-06, "loss": 0.7466, "step": 610 }, { "epoch": 0.06, "grad_norm": 2.4441972726765404, "learning_rate": 9.969353340311017e-06, "loss": 0.7598, "step": 611 }, { "epoch": 0.06, "grad_norm": 2.0740206980367337, "learning_rate": 9.969164649719898e-06, "loss": 0.7293, "step": 612 }, { "epoch": 0.06, "grad_norm": 1.3874311549388647, "learning_rate": 9.968975381825594e-06, "loss": 0.6497, "step": 613 }, { "epoch": 0.06, "grad_norm": 2.2328817638501284, "learning_rate": 9.968785536650095e-06, "loss": 0.7843, "step": 614 }, { "epoch": 0.06, "grad_norm": 2.039098314636674, "learning_rate": 9.968595114215453e-06, "loss": 0.7148, "step": 615 }, { "epoch": 0.06, "grad_norm": 1.8974999023720256, "learning_rate": 9.968404114543796e-06, "loss": 0.7553, "step": 616 }, { "epoch": 0.06, "grad_norm": 2.363258301216911, "learning_rate": 9.968212537657311e-06, "loss": 0.7465, "step": 617 }, { "epoch": 0.07, "grad_norm": 2.5506185481947714, "learning_rate": 9.968020383578253e-06, "loss": 0.8889, "step": 618 }, { "epoch": 0.07, "grad_norm": 2.0478689904138068, "learning_rate": 9.96782765232895e-06, "loss": 0.7014, "step": 619 }, { "epoch": 0.07, "grad_norm": 2.1272161891964703, "learning_rate": 9.967634343931791e-06, "loss": 0.8825, "step": 620 }, { "epoch": 0.07, "grad_norm": 2.3598809615068794, "learning_rate": 9.967440458409232e-06, "loss": 0.7521, "step": 621 }, { "epoch": 0.07, "grad_norm": 2.0759823166768627, "learning_rate": 9.967245995783801e-06, "loss": 0.7767, "step": 622 }, { "epoch": 0.07, "grad_norm": 2.1835848414184875, "learning_rate": 9.96705095607809e-06, "loss": 0.8266, "step": 623 }, { "epoch": 0.07, "grad_norm": 3.6137036631937254, "learning_rate": 9.966855339314756e-06, "loss": 0.857, "step": 624 }, { "epoch": 0.07, "grad_norm": 2.516218552454981, "learning_rate": 9.966659145516527e-06, "loss": 0.8295, "step": 625 }, { "epoch": 0.07, "grad_norm": 2.179275872622473, "learning_rate": 9.966462374706196e-06, "loss": 0.8447, "step": 626 }, { "epoch": 0.07, "grad_norm": 2.0487084736863883, "learning_rate": 9.966265026906622e-06, "loss": 0.7527, "step": 627 }, { "epoch": 0.07, "grad_norm": 2.394315996422968, "learning_rate": 9.966067102140734e-06, "loss": 0.7543, "step": 628 }, { "epoch": 0.07, "grad_norm": 3.0716112519627363, "learning_rate": 9.965868600431525e-06, "loss": 0.7141, "step": 629 }, { "epoch": 0.07, "grad_norm": 4.924340713473136, "learning_rate": 9.965669521802057e-06, "loss": 0.8291, "step": 630 }, { "epoch": 0.07, "grad_norm": 2.8596202937468727, "learning_rate": 9.965469866275457e-06, "loss": 0.7771, "step": 631 }, { "epoch": 0.07, "grad_norm": 2.7307465501368644, "learning_rate": 9.965269633874924e-06, "loss": 0.7157, "step": 632 }, { "epoch": 0.07, "grad_norm": 2.407738691497479, "learning_rate": 9.965068824623718e-06, "loss": 0.6772, "step": 633 }, { "epoch": 0.07, "grad_norm": 2.0971229549109176, "learning_rate": 9.964867438545166e-06, "loss": 0.7709, "step": 634 }, { "epoch": 0.07, "grad_norm": 2.90404822468166, "learning_rate": 9.964665475662668e-06, "loss": 0.7457, "step": 635 }, { "epoch": 0.07, "grad_norm": 2.213670720198414, "learning_rate": 9.964462935999688e-06, "loss": 0.8134, "step": 636 }, { "epoch": 0.07, "grad_norm": 2.8026473036892554, "learning_rate": 9.964259819579754e-06, "loss": 0.7484, "step": 637 }, { "epoch": 0.07, "grad_norm": 2.4715032303759554, "learning_rate": 9.964056126426464e-06, "loss": 0.851, "step": 638 }, { "epoch": 0.07, "grad_norm": 1.9713105204657861, "learning_rate": 9.963851856563483e-06, "loss": 0.7706, "step": 639 }, { "epoch": 0.07, "grad_norm": 2.0696765042050407, "learning_rate": 9.963647010014541e-06, "loss": 0.7526, "step": 640 }, { "epoch": 0.07, "grad_norm": 2.61319517784048, "learning_rate": 9.963441586803439e-06, "loss": 0.7249, "step": 641 }, { "epoch": 0.07, "grad_norm": 35.735839142292704, "learning_rate": 9.963235586954043e-06, "loss": 0.807, "step": 642 }, { "epoch": 0.07, "grad_norm": 2.601165133281576, "learning_rate": 9.963029010490281e-06, "loss": 0.6441, "step": 643 }, { "epoch": 0.07, "grad_norm": 2.700590007062543, "learning_rate": 9.962821857436156e-06, "loss": 0.699, "step": 644 }, { "epoch": 0.07, "grad_norm": 2.83901661028331, "learning_rate": 9.962614127815735e-06, "loss": 0.715, "step": 645 }, { "epoch": 0.07, "grad_norm": 3.2052448381433063, "learning_rate": 9.96240582165315e-06, "loss": 0.7545, "step": 646 }, { "epoch": 0.07, "grad_norm": 3.50569734388418, "learning_rate": 9.962196938972599e-06, "loss": 0.7477, "step": 647 }, { "epoch": 0.07, "grad_norm": 3.3913535336435077, "learning_rate": 9.961987479798354e-06, "loss": 0.6525, "step": 648 }, { "epoch": 0.07, "grad_norm": 3.97776453246874, "learning_rate": 9.961777444154747e-06, "loss": 0.7575, "step": 649 }, { "epoch": 0.07, "grad_norm": 2.6326354312277207, "learning_rate": 9.96156683206618e-06, "loss": 0.7891, "step": 650 }, { "epoch": 0.07, "grad_norm": 2.590711433166387, "learning_rate": 9.96135564355712e-06, "loss": 0.8471, "step": 651 }, { "epoch": 0.07, "grad_norm": 2.694706096312992, "learning_rate": 9.961143878652104e-06, "loss": 0.7534, "step": 652 }, { "epoch": 0.07, "grad_norm": 2.1493792498317736, "learning_rate": 9.960931537375731e-06, "loss": 0.7545, "step": 653 }, { "epoch": 0.07, "grad_norm": 2.547411269212289, "learning_rate": 9.960718619752676e-06, "loss": 0.7998, "step": 654 }, { "epoch": 0.07, "grad_norm": 2.380301424223435, "learning_rate": 9.96050512580767e-06, "loss": 0.6799, "step": 655 }, { "epoch": 0.07, "grad_norm": 2.317215606575152, "learning_rate": 9.960291055565518e-06, "loss": 0.8361, "step": 656 }, { "epoch": 0.07, "grad_norm": 2.76463251057736, "learning_rate": 9.96007640905109e-06, "loss": 0.7474, "step": 657 }, { "epoch": 0.07, "grad_norm": 2.337177345644392, "learning_rate": 9.959861186289324e-06, "loss": 0.7889, "step": 658 }, { "epoch": 0.07, "grad_norm": 2.549090640868925, "learning_rate": 9.95964538730522e-06, "loss": 0.8025, "step": 659 }, { "epoch": 0.07, "grad_norm": 2.530522238161703, "learning_rate": 9.959429012123853e-06, "loss": 0.715, "step": 660 }, { "epoch": 0.07, "grad_norm": 2.515390648368771, "learning_rate": 9.95921206077036e-06, "loss": 0.7851, "step": 661 }, { "epoch": 0.07, "grad_norm": 2.929755415997428, "learning_rate": 9.958994533269947e-06, "loss": 0.7982, "step": 662 }, { "epoch": 0.07, "grad_norm": 2.450266556660888, "learning_rate": 9.958776429647882e-06, "loss": 0.6835, "step": 663 }, { "epoch": 0.07, "grad_norm": 2.5343938278654954, "learning_rate": 9.958557749929507e-06, "loss": 0.7832, "step": 664 }, { "epoch": 0.07, "grad_norm": 2.2961561118476643, "learning_rate": 9.958338494140226e-06, "loss": 0.7488, "step": 665 }, { "epoch": 0.07, "grad_norm": 2.0185829134658593, "learning_rate": 9.958118662305512e-06, "loss": 0.6639, "step": 666 }, { "epoch": 0.07, "grad_norm": 2.6097834515400935, "learning_rate": 9.957898254450904e-06, "loss": 0.8354, "step": 667 }, { "epoch": 0.07, "grad_norm": 2.681329545781412, "learning_rate": 9.957677270602009e-06, "loss": 0.8243, "step": 668 }, { "epoch": 0.07, "grad_norm": 3.0534672454472056, "learning_rate": 9.957455710784499e-06, "loss": 0.7536, "step": 669 }, { "epoch": 0.07, "grad_norm": 2.306313918952685, "learning_rate": 9.957233575024114e-06, "loss": 0.831, "step": 670 }, { "epoch": 0.07, "grad_norm": 2.188405047316281, "learning_rate": 9.957010863346665e-06, "loss": 0.7803, "step": 671 }, { "epoch": 0.07, "grad_norm": 3.0129653737744073, "learning_rate": 9.956787575778022e-06, "loss": 0.721, "step": 672 }, { "epoch": 0.07, "grad_norm": 2.3823769975743683, "learning_rate": 9.956563712344127e-06, "loss": 0.7449, "step": 673 }, { "epoch": 0.07, "grad_norm": 2.5712155219456476, "learning_rate": 9.956339273070988e-06, "loss": 0.7693, "step": 674 }, { "epoch": 0.07, "grad_norm": 2.917447740270616, "learning_rate": 9.95611425798468e-06, "loss": 0.7564, "step": 675 }, { "epoch": 0.07, "grad_norm": 2.441012766615215, "learning_rate": 9.955888667111341e-06, "loss": 0.7311, "step": 676 }, { "epoch": 0.07, "grad_norm": 2.3439122547341844, "learning_rate": 9.955662500477185e-06, "loss": 0.6111, "step": 677 }, { "epoch": 0.07, "grad_norm": 2.3472763771803153, "learning_rate": 9.955435758108488e-06, "loss": 0.7722, "step": 678 }, { "epoch": 0.07, "grad_norm": 3.0118308796961055, "learning_rate": 9.955208440031586e-06, "loss": 0.7479, "step": 679 }, { "epoch": 0.07, "grad_norm": 2.5164721040624083, "learning_rate": 9.954980546272892e-06, "loss": 0.7528, "step": 680 }, { "epoch": 0.07, "grad_norm": 2.349312862665128, "learning_rate": 9.95475207685888e-06, "loss": 0.7895, "step": 681 }, { "epoch": 0.07, "grad_norm": 3.538231437183247, "learning_rate": 9.954523031816096e-06, "loss": 0.7881, "step": 682 }, { "epoch": 0.07, "grad_norm": 2.2748356407545756, "learning_rate": 9.95429341117115e-06, "loss": 0.7373, "step": 683 }, { "epoch": 0.07, "grad_norm": 2.690864131773263, "learning_rate": 9.954063214950715e-06, "loss": 0.7729, "step": 684 }, { "epoch": 0.07, "grad_norm": 2.441689564432529, "learning_rate": 9.953832443181536e-06, "loss": 0.6846, "step": 685 }, { "epoch": 0.07, "grad_norm": 2.7063998770038253, "learning_rate": 9.953601095890425e-06, "loss": 0.7764, "step": 686 }, { "epoch": 0.07, "grad_norm": 2.3897375113199755, "learning_rate": 9.953369173104256e-06, "loss": 0.7468, "step": 687 }, { "epoch": 0.07, "grad_norm": 5.623372216112194, "learning_rate": 9.953136674849978e-06, "loss": 0.7766, "step": 688 }, { "epoch": 0.07, "grad_norm": 2.33678410607246, "learning_rate": 9.952903601154598e-06, "loss": 0.7899, "step": 689 }, { "epoch": 0.07, "grad_norm": 5.7100874309999385, "learning_rate": 9.952669952045196e-06, "loss": 0.6976, "step": 690 }, { "epoch": 0.07, "grad_norm": 2.4385704663154444, "learning_rate": 9.952435727548915e-06, "loss": 0.7834, "step": 691 }, { "epoch": 0.07, "grad_norm": 2.466989152583984, "learning_rate": 9.952200927692965e-06, "loss": 0.7762, "step": 692 }, { "epoch": 0.07, "grad_norm": 2.4093135917267245, "learning_rate": 9.95196555250463e-06, "loss": 0.6769, "step": 693 }, { "epoch": 0.07, "grad_norm": 2.3338351134830537, "learning_rate": 9.95172960201125e-06, "loss": 0.7501, "step": 694 }, { "epoch": 0.07, "grad_norm": 2.3101279059282334, "learning_rate": 9.95149307624024e-06, "loss": 0.7034, "step": 695 }, { "epoch": 0.07, "grad_norm": 2.2681781457320707, "learning_rate": 9.951255975219076e-06, "loss": 0.8163, "step": 696 }, { "epoch": 0.07, "grad_norm": 2.79211360891463, "learning_rate": 9.951018298975306e-06, "loss": 0.7878, "step": 697 }, { "epoch": 0.07, "grad_norm": 1.9232352000611326, "learning_rate": 9.950780047536543e-06, "loss": 0.74, "step": 698 }, { "epoch": 0.07, "grad_norm": 2.750774856582151, "learning_rate": 9.950541220930463e-06, "loss": 0.7653, "step": 699 }, { "epoch": 0.07, "grad_norm": 2.046202452399702, "learning_rate": 9.950301819184816e-06, "loss": 0.6924, "step": 700 }, { "epoch": 0.07, "grad_norm": 2.5862040137998457, "learning_rate": 9.950061842327415e-06, "loss": 0.7198, "step": 701 }, { "epoch": 0.07, "grad_norm": 2.9170844730186487, "learning_rate": 9.949821290386137e-06, "loss": 0.7769, "step": 702 }, { "epoch": 0.07, "grad_norm": 2.638513502534882, "learning_rate": 9.94958016338893e-06, "loss": 0.7465, "step": 703 }, { "epoch": 0.07, "grad_norm": 3.154543573950718, "learning_rate": 9.949338461363807e-06, "loss": 0.7234, "step": 704 }, { "epoch": 0.07, "grad_norm": 2.4170777296764943, "learning_rate": 9.949096184338849e-06, "loss": 0.7579, "step": 705 }, { "epoch": 0.07, "grad_norm": 3.1649853433830772, "learning_rate": 9.948853332342202e-06, "loss": 0.6416, "step": 706 }, { "epoch": 0.07, "grad_norm": 2.2732580001069485, "learning_rate": 9.948609905402082e-06, "loss": 0.7448, "step": 707 }, { "epoch": 0.07, "grad_norm": 3.0109624448260335, "learning_rate": 9.948365903546766e-06, "loss": 0.8066, "step": 708 }, { "epoch": 0.07, "grad_norm": 3.5719103836155117, "learning_rate": 9.948121326804604e-06, "loss": 0.6297, "step": 709 }, { "epoch": 0.07, "grad_norm": 1.6740846319075147, "learning_rate": 9.947876175204013e-06, "loss": 0.6353, "step": 710 }, { "epoch": 0.07, "grad_norm": 2.8270316407346163, "learning_rate": 9.947630448773468e-06, "loss": 0.7751, "step": 711 }, { "epoch": 0.07, "grad_norm": 2.8283132421827895, "learning_rate": 9.94738414754152e-06, "loss": 0.8027, "step": 712 }, { "epoch": 0.08, "grad_norm": 2.332076932637489, "learning_rate": 9.947137271536784e-06, "loss": 0.6781, "step": 713 }, { "epoch": 0.08, "grad_norm": 2.3484004490599073, "learning_rate": 9.94688982078794e-06, "loss": 0.6904, "step": 714 }, { "epoch": 0.08, "grad_norm": 2.1432128787457025, "learning_rate": 9.946641795323737e-06, "loss": 0.7361, "step": 715 }, { "epoch": 0.08, "grad_norm": 1.6418533463624745, "learning_rate": 9.946393195172987e-06, "loss": 0.63, "step": 716 }, { "epoch": 0.08, "grad_norm": 2.2838183539614234, "learning_rate": 9.946144020364576e-06, "loss": 0.7464, "step": 717 }, { "epoch": 0.08, "grad_norm": 2.955408112768042, "learning_rate": 9.945894270927452e-06, "loss": 0.7333, "step": 718 }, { "epoch": 0.08, "grad_norm": 2.565202439751648, "learning_rate": 9.945643946890628e-06, "loss": 0.6788, "step": 719 }, { "epoch": 0.08, "grad_norm": 1.987883523633849, "learning_rate": 9.945393048283186e-06, "loss": 0.7369, "step": 720 }, { "epoch": 0.08, "grad_norm": 2.305645743958634, "learning_rate": 9.945141575134275e-06, "loss": 0.8795, "step": 721 }, { "epoch": 0.08, "grad_norm": 4.342801724950221, "learning_rate": 9.944889527473112e-06, "loss": 0.7749, "step": 722 }, { "epoch": 0.08, "grad_norm": 2.6867592587630953, "learning_rate": 9.944636905328977e-06, "loss": 0.7122, "step": 723 }, { "epoch": 0.08, "grad_norm": 2.7963278085386087, "learning_rate": 9.94438370873122e-06, "loss": 0.7286, "step": 724 }, { "epoch": 0.08, "grad_norm": 3.0500060514327645, "learning_rate": 9.944129937709255e-06, "loss": 0.8238, "step": 725 }, { "epoch": 0.08, "grad_norm": 2.518747853267898, "learning_rate": 9.943875592292569e-06, "loss": 0.7468, "step": 726 }, { "epoch": 0.08, "grad_norm": 2.3307749408071023, "learning_rate": 9.943620672510706e-06, "loss": 0.784, "step": 727 }, { "epoch": 0.08, "grad_norm": 2.2927298280547608, "learning_rate": 9.943365178393283e-06, "loss": 0.6643, "step": 728 }, { "epoch": 0.08, "grad_norm": 5.413030343240209, "learning_rate": 9.943109109969985e-06, "loss": 0.7718, "step": 729 }, { "epoch": 0.08, "grad_norm": 1.985911781478991, "learning_rate": 9.94285246727056e-06, "loss": 0.7437, "step": 730 }, { "epoch": 0.08, "grad_norm": 2.8515953816737416, "learning_rate": 9.942595250324823e-06, "loss": 0.7, "step": 731 }, { "epoch": 0.08, "grad_norm": 2.567107917615768, "learning_rate": 9.942337459162657e-06, "loss": 0.6874, "step": 732 }, { "epoch": 0.08, "grad_norm": 2.389536459691384, "learning_rate": 9.942079093814012e-06, "loss": 0.7786, "step": 733 }, { "epoch": 0.08, "grad_norm": 2.456329041193247, "learning_rate": 9.941820154308905e-06, "loss": 0.6675, "step": 734 }, { "epoch": 0.08, "grad_norm": 2.2902527313975454, "learning_rate": 9.941560640677417e-06, "loss": 0.7431, "step": 735 }, { "epoch": 0.08, "grad_norm": 1.9299279456459286, "learning_rate": 9.941300552949697e-06, "loss": 0.7444, "step": 736 }, { "epoch": 0.08, "grad_norm": 2.894177677033512, "learning_rate": 9.941039891155964e-06, "loss": 0.6389, "step": 737 }, { "epoch": 0.08, "grad_norm": 2.1178139348238716, "learning_rate": 9.940778655326499e-06, "loss": 0.7812, "step": 738 }, { "epoch": 0.08, "grad_norm": 2.6341684151897957, "learning_rate": 9.940516845491653e-06, "loss": 0.7911, "step": 739 }, { "epoch": 0.08, "grad_norm": 2.3572249944985764, "learning_rate": 9.940254461681841e-06, "loss": 0.7095, "step": 740 }, { "epoch": 0.08, "grad_norm": 2.392416707308911, "learning_rate": 9.939991503927548e-06, "loss": 0.7532, "step": 741 }, { "epoch": 0.08, "grad_norm": 2.892105469013813, "learning_rate": 9.939727972259321e-06, "loss": 0.8392, "step": 742 }, { "epoch": 0.08, "grad_norm": 2.5792946557866454, "learning_rate": 9.939463866707777e-06, "loss": 0.7718, "step": 743 }, { "epoch": 0.08, "grad_norm": 2.382428104409083, "learning_rate": 9.939199187303598e-06, "loss": 0.6834, "step": 744 }, { "epoch": 0.08, "grad_norm": 2.963009035348591, "learning_rate": 9.938933934077539e-06, "loss": 0.749, "step": 745 }, { "epoch": 0.08, "grad_norm": 2.3169431674281125, "learning_rate": 9.93866810706041e-06, "loss": 0.7598, "step": 746 }, { "epoch": 0.08, "grad_norm": 2.269068621083456, "learning_rate": 9.938401706283096e-06, "loss": 0.6919, "step": 747 }, { "epoch": 0.08, "grad_norm": 4.520110943561689, "learning_rate": 9.93813473177655e-06, "loss": 0.7574, "step": 748 }, { "epoch": 0.08, "grad_norm": 2.6705782110520553, "learning_rate": 9.937867183571784e-06, "loss": 0.6852, "step": 749 }, { "epoch": 0.08, "grad_norm": 2.394466270287015, "learning_rate": 9.93759906169988e-06, "loss": 0.8214, "step": 750 }, { "epoch": 0.08, "grad_norm": 2.4103257753815956, "learning_rate": 9.937330366191994e-06, "loss": 0.7784, "step": 751 }, { "epoch": 0.08, "grad_norm": 1.2399480859042666, "learning_rate": 9.937061097079337e-06, "loss": 0.6601, "step": 752 }, { "epoch": 0.08, "grad_norm": 3.0481316936580254, "learning_rate": 9.936791254393193e-06, "loss": 0.7356, "step": 753 }, { "epoch": 0.08, "grad_norm": 2.437583015333378, "learning_rate": 9.936520838164912e-06, "loss": 0.7158, "step": 754 }, { "epoch": 0.08, "grad_norm": 2.707441716431831, "learning_rate": 9.93624984842591e-06, "loss": 0.7215, "step": 755 }, { "epoch": 0.08, "grad_norm": 4.532030304474135, "learning_rate": 9.93597828520767e-06, "loss": 0.753, "step": 756 }, { "epoch": 0.08, "grad_norm": 2.7461971545571786, "learning_rate": 9.935706148541742e-06, "loss": 0.7097, "step": 757 }, { "epoch": 0.08, "grad_norm": 2.3627790545916216, "learning_rate": 9.93543343845974e-06, "loss": 0.8169, "step": 758 }, { "epoch": 0.08, "grad_norm": 2.6693014549496814, "learning_rate": 9.93516015499335e-06, "loss": 0.8296, "step": 759 }, { "epoch": 0.08, "grad_norm": 3.7105561050330222, "learning_rate": 9.934886298174317e-06, "loss": 0.7099, "step": 760 }, { "epoch": 0.08, "grad_norm": 2.720683329507592, "learning_rate": 9.93461186803446e-06, "loss": 0.7632, "step": 761 }, { "epoch": 0.08, "grad_norm": 2.588291193800891, "learning_rate": 9.934336864605663e-06, "loss": 0.7789, "step": 762 }, { "epoch": 0.08, "grad_norm": 2.5585071173949863, "learning_rate": 9.934061287919869e-06, "loss": 0.6833, "step": 763 }, { "epoch": 0.08, "grad_norm": 2.2895773779403195, "learning_rate": 9.9337851380091e-06, "loss": 0.7367, "step": 764 }, { "epoch": 0.08, "grad_norm": 3.0219975702999697, "learning_rate": 9.933508414905434e-06, "loss": 0.7717, "step": 765 }, { "epoch": 0.08, "grad_norm": 2.6270513907040094, "learning_rate": 9.933231118641025e-06, "loss": 0.6998, "step": 766 }, { "epoch": 0.08, "grad_norm": 2.620673953552405, "learning_rate": 9.932953249248082e-06, "loss": 0.833, "step": 767 }, { "epoch": 0.08, "grad_norm": 2.5153078631067154, "learning_rate": 9.93267480675889e-06, "loss": 0.7723, "step": 768 }, { "epoch": 0.08, "grad_norm": 2.951354250369123, "learning_rate": 9.9323957912058e-06, "loss": 0.7502, "step": 769 }, { "epoch": 0.08, "grad_norm": 1.4394149168178134, "learning_rate": 9.932116202621224e-06, "loss": 0.6457, "step": 770 }, { "epoch": 0.08, "grad_norm": 2.1095938889246026, "learning_rate": 9.931836041037644e-06, "loss": 0.7061, "step": 771 }, { "epoch": 0.08, "grad_norm": 2.6819242824234957, "learning_rate": 9.931555306487612e-06, "loss": 0.7691, "step": 772 }, { "epoch": 0.08, "grad_norm": 2.2043376051832424, "learning_rate": 9.931273999003738e-06, "loss": 0.6946, "step": 773 }, { "epoch": 0.08, "grad_norm": 2.6902924884956425, "learning_rate": 9.930992118618706e-06, "loss": 0.6695, "step": 774 }, { "epoch": 0.08, "grad_norm": 2.8989924651843326, "learning_rate": 9.930709665365264e-06, "loss": 0.7694, "step": 775 }, { "epoch": 0.08, "grad_norm": 2.224303330241575, "learning_rate": 9.930426639276225e-06, "loss": 0.7487, "step": 776 }, { "epoch": 0.08, "grad_norm": 2.6983836303838826, "learning_rate": 9.930143040384472e-06, "loss": 0.7523, "step": 777 }, { "epoch": 0.08, "grad_norm": 2.3780230591194065, "learning_rate": 9.929858868722954e-06, "loss": 0.7692, "step": 778 }, { "epoch": 0.08, "grad_norm": 2.4634887523343942, "learning_rate": 9.929574124324682e-06, "loss": 0.8036, "step": 779 }, { "epoch": 0.08, "grad_norm": 2.2694290012543052, "learning_rate": 9.929288807222738e-06, "loss": 0.7424, "step": 780 }, { "epoch": 0.08, "grad_norm": 2.6592362586407394, "learning_rate": 9.92900291745027e-06, "loss": 0.6375, "step": 781 }, { "epoch": 0.08, "grad_norm": 3.495897539805403, "learning_rate": 9.92871645504049e-06, "loss": 0.7854, "step": 782 }, { "epoch": 0.08, "grad_norm": 2.0961963685516727, "learning_rate": 9.928429420026682e-06, "loss": 0.7634, "step": 783 }, { "epoch": 0.08, "grad_norm": 2.8509364984526253, "learning_rate": 9.92814181244219e-06, "loss": 0.6458, "step": 784 }, { "epoch": 0.08, "grad_norm": 2.8241576523218956, "learning_rate": 9.927853632320427e-06, "loss": 0.7804, "step": 785 }, { "epoch": 0.08, "grad_norm": 3.0001225664364055, "learning_rate": 9.927564879694874e-06, "loss": 0.7229, "step": 786 }, { "epoch": 0.08, "grad_norm": 3.4475528816764944, "learning_rate": 9.927275554599078e-06, "loss": 0.8362, "step": 787 }, { "epoch": 0.08, "grad_norm": 2.2308232356508526, "learning_rate": 9.926985657066653e-06, "loss": 0.7114, "step": 788 }, { "epoch": 0.08, "grad_norm": 2.341682432680137, "learning_rate": 9.926695187131275e-06, "loss": 0.7783, "step": 789 }, { "epoch": 0.08, "grad_norm": 2.6052199283711874, "learning_rate": 9.92640414482669e-06, "loss": 0.8333, "step": 790 }, { "epoch": 0.08, "grad_norm": 2.8370875665068858, "learning_rate": 9.926112530186715e-06, "loss": 0.7107, "step": 791 }, { "epoch": 0.08, "grad_norm": 2.5275739897207865, "learning_rate": 9.925820343245225e-06, "loss": 0.7965, "step": 792 }, { "epoch": 0.08, "grad_norm": 2.241333301690011, "learning_rate": 9.925527584036167e-06, "loss": 0.8081, "step": 793 }, { "epoch": 0.08, "grad_norm": 2.1980959634447164, "learning_rate": 9.925234252593554e-06, "loss": 0.7231, "step": 794 }, { "epoch": 0.08, "grad_norm": 2.5031300722496965, "learning_rate": 9.92494034895146e-06, "loss": 0.6994, "step": 795 }, { "epoch": 0.08, "grad_norm": 2.1430334991183604, "learning_rate": 9.924645873144035e-06, "loss": 0.6908, "step": 796 }, { "epoch": 0.08, "grad_norm": 2.350620970434163, "learning_rate": 9.924350825205487e-06, "loss": 0.8007, "step": 797 }, { "epoch": 0.08, "grad_norm": 2.579066826686899, "learning_rate": 9.924055205170095e-06, "loss": 0.8572, "step": 798 }, { "epoch": 0.08, "grad_norm": 2.9358520387744473, "learning_rate": 9.923759013072205e-06, "loss": 0.7289, "step": 799 }, { "epoch": 0.08, "grad_norm": 1.4595413230957774, "learning_rate": 9.923462248946224e-06, "loss": 0.6404, "step": 800 }, { "epoch": 0.08, "grad_norm": 2.25406590521839, "learning_rate": 9.923164912826631e-06, "loss": 0.7915, "step": 801 }, { "epoch": 0.08, "grad_norm": 2.5320545260007727, "learning_rate": 9.922867004747971e-06, "loss": 0.7355, "step": 802 }, { "epoch": 0.08, "grad_norm": 1.0795033707901633, "learning_rate": 9.922568524744854e-06, "loss": 0.6239, "step": 803 }, { "epoch": 0.08, "grad_norm": 5.1305263819984, "learning_rate": 9.922269472851953e-06, "loss": 0.7992, "step": 804 }, { "epoch": 0.08, "grad_norm": 2.4229395578571924, "learning_rate": 9.921969849104015e-06, "loss": 0.686, "step": 805 }, { "epoch": 0.08, "grad_norm": 2.925961640710328, "learning_rate": 9.921669653535848e-06, "loss": 0.7174, "step": 806 }, { "epoch": 0.08, "grad_norm": 2.77414781814395, "learning_rate": 9.921368886182328e-06, "loss": 0.7249, "step": 807 }, { "epoch": 0.09, "grad_norm": 2.732166312681211, "learning_rate": 9.921067547078396e-06, "loss": 0.7682, "step": 808 }, { "epoch": 0.09, "grad_norm": 2.6581746298192357, "learning_rate": 9.920765636259062e-06, "loss": 0.7427, "step": 809 }, { "epoch": 0.09, "grad_norm": 3.420618373750652, "learning_rate": 9.9204631537594e-06, "loss": 0.8256, "step": 810 }, { "epoch": 0.09, "grad_norm": 4.452433300784335, "learning_rate": 9.920160099614553e-06, "loss": 0.7742, "step": 811 }, { "epoch": 0.09, "grad_norm": 2.347854862834246, "learning_rate": 9.91985647385973e-06, "loss": 0.6981, "step": 812 }, { "epoch": 0.09, "grad_norm": 2.335973187712152, "learning_rate": 9.919552276530202e-06, "loss": 0.6822, "step": 813 }, { "epoch": 0.09, "grad_norm": 3.0899513351336645, "learning_rate": 9.919247507661313e-06, "loss": 0.6554, "step": 814 }, { "epoch": 0.09, "grad_norm": 2.4579977944848657, "learning_rate": 9.918942167288467e-06, "loss": 0.7347, "step": 815 }, { "epoch": 0.09, "grad_norm": 2.2321378141123316, "learning_rate": 9.918636255447141e-06, "loss": 0.7083, "step": 816 }, { "epoch": 0.09, "grad_norm": 2.337678644880633, "learning_rate": 9.918329772172872e-06, "loss": 0.7353, "step": 817 }, { "epoch": 0.09, "grad_norm": 2.484233611654691, "learning_rate": 9.918022717501268e-06, "loss": 0.7626, "step": 818 }, { "epoch": 0.09, "grad_norm": 2.6233513206248698, "learning_rate": 9.917715091467999e-06, "loss": 0.7422, "step": 819 }, { "epoch": 0.09, "grad_norm": 2.2460279378058137, "learning_rate": 9.91740689410881e-06, "loss": 0.8034, "step": 820 }, { "epoch": 0.09, "grad_norm": 3.014666827028448, "learning_rate": 9.917098125459501e-06, "loss": 0.7201, "step": 821 }, { "epoch": 0.09, "grad_norm": 2.8852937872527904, "learning_rate": 9.916788785555945e-06, "loss": 0.702, "step": 822 }, { "epoch": 0.09, "grad_norm": 2.468109581522842, "learning_rate": 9.91647887443408e-06, "loss": 0.7292, "step": 823 }, { "epoch": 0.09, "grad_norm": 2.0137798678101095, "learning_rate": 9.916168392129914e-06, "loss": 0.7081, "step": 824 }, { "epoch": 0.09, "grad_norm": 2.1356848172886997, "learning_rate": 9.915857338679515e-06, "loss": 0.7477, "step": 825 }, { "epoch": 0.09, "grad_norm": 2.146941252431847, "learning_rate": 9.91554571411902e-06, "loss": 0.7231, "step": 826 }, { "epoch": 0.09, "grad_norm": 1.967160075670586, "learning_rate": 9.915233518484633e-06, "loss": 0.7984, "step": 827 }, { "epoch": 0.09, "grad_norm": 2.0751156185412385, "learning_rate": 9.914920751812626e-06, "loss": 0.8102, "step": 828 }, { "epoch": 0.09, "grad_norm": 2.3472721595318586, "learning_rate": 9.914607414139332e-06, "loss": 0.7313, "step": 829 }, { "epoch": 0.09, "grad_norm": 2.118986957021893, "learning_rate": 9.914293505501155e-06, "loss": 0.8668, "step": 830 }, { "epoch": 0.09, "grad_norm": 2.394587519870367, "learning_rate": 9.913979025934566e-06, "loss": 0.7498, "step": 831 }, { "epoch": 0.09, "grad_norm": 3.0206794969562405, "learning_rate": 9.913663975476099e-06, "loss": 0.7633, "step": 832 }, { "epoch": 0.09, "grad_norm": 2.096598722449407, "learning_rate": 9.913348354162353e-06, "loss": 0.7422, "step": 833 }, { "epoch": 0.09, "grad_norm": 2.4996308686533895, "learning_rate": 9.913032162029999e-06, "loss": 0.7345, "step": 834 }, { "epoch": 0.09, "grad_norm": 2.692833005695984, "learning_rate": 9.91271539911577e-06, "loss": 0.783, "step": 835 }, { "epoch": 0.09, "grad_norm": 3.1228836691300037, "learning_rate": 9.91239806545647e-06, "loss": 0.7425, "step": 836 }, { "epoch": 0.09, "grad_norm": 2.3986505144559427, "learning_rate": 9.91208016108896e-06, "loss": 0.7267, "step": 837 }, { "epoch": 0.09, "grad_norm": 2.7101167395338273, "learning_rate": 9.911761686050177e-06, "loss": 0.7581, "step": 838 }, { "epoch": 0.09, "grad_norm": 2.5806365307450667, "learning_rate": 9.91144264037712e-06, "loss": 0.7847, "step": 839 }, { "epoch": 0.09, "grad_norm": 1.5964088279350312, "learning_rate": 9.911123024106854e-06, "loss": 0.6742, "step": 840 }, { "epoch": 0.09, "grad_norm": 2.0458536491050143, "learning_rate": 9.910802837276514e-06, "loss": 0.6898, "step": 841 }, { "epoch": 0.09, "grad_norm": 1.9738131081216486, "learning_rate": 9.910482079923293e-06, "loss": 0.7097, "step": 842 }, { "epoch": 0.09, "grad_norm": 4.033940638900864, "learning_rate": 9.910160752084461e-06, "loss": 0.6891, "step": 843 }, { "epoch": 0.09, "grad_norm": 2.493418453706031, "learning_rate": 9.909838853797347e-06, "loss": 0.8386, "step": 844 }, { "epoch": 0.09, "grad_norm": 2.3609786228834397, "learning_rate": 9.909516385099346e-06, "loss": 0.6447, "step": 845 }, { "epoch": 0.09, "grad_norm": 1.8826827313822074, "learning_rate": 9.909193346027923e-06, "loss": 0.7634, "step": 846 }, { "epoch": 0.09, "grad_norm": 2.192146232328817, "learning_rate": 9.90886973662061e-06, "loss": 0.6501, "step": 847 }, { "epoch": 0.09, "grad_norm": 4.924566828839797, "learning_rate": 9.908545556915e-06, "loss": 0.7382, "step": 848 }, { "epoch": 0.09, "grad_norm": 2.124722725213922, "learning_rate": 9.908220806948755e-06, "loss": 0.7342, "step": 849 }, { "epoch": 0.09, "grad_norm": 3.149040160809967, "learning_rate": 9.90789548675961e-06, "loss": 0.769, "step": 850 }, { "epoch": 0.09, "grad_norm": 2.016612347468723, "learning_rate": 9.90756959638535e-06, "loss": 0.7533, "step": 851 }, { "epoch": 0.09, "grad_norm": 2.21890626690869, "learning_rate": 9.90724313586384e-06, "loss": 0.7183, "step": 852 }, { "epoch": 0.09, "grad_norm": 2.1739377588424182, "learning_rate": 9.90691610523301e-06, "loss": 0.725, "step": 853 }, { "epoch": 0.09, "grad_norm": 2.4308506352472565, "learning_rate": 9.906588504530852e-06, "loss": 0.7721, "step": 854 }, { "epoch": 0.09, "grad_norm": 2.039428986286277, "learning_rate": 9.906260333795423e-06, "loss": 0.7862, "step": 855 }, { "epoch": 0.09, "grad_norm": 2.072889277401122, "learning_rate": 9.905931593064852e-06, "loss": 0.7057, "step": 856 }, { "epoch": 0.09, "grad_norm": 2.5603746952912583, "learning_rate": 9.905602282377331e-06, "loss": 0.7845, "step": 857 }, { "epoch": 0.09, "grad_norm": 2.2094754050609255, "learning_rate": 9.905272401771115e-06, "loss": 0.6726, "step": 858 }, { "epoch": 0.09, "grad_norm": 2.010211257078767, "learning_rate": 9.904941951284535e-06, "loss": 0.82, "step": 859 }, { "epoch": 0.09, "grad_norm": 2.4294590301163907, "learning_rate": 9.904610930955975e-06, "loss": 0.7225, "step": 860 }, { "epoch": 0.09, "grad_norm": 2.4545417076156157, "learning_rate": 9.904279340823895e-06, "loss": 0.7379, "step": 861 }, { "epoch": 0.09, "grad_norm": 2.0489546331766704, "learning_rate": 9.903947180926819e-06, "loss": 0.7939, "step": 862 }, { "epoch": 0.09, "grad_norm": 4.328322416011042, "learning_rate": 9.903614451303335e-06, "loss": 0.7423, "step": 863 }, { "epoch": 0.09, "grad_norm": 2.673446796533317, "learning_rate": 9.903281151992097e-06, "loss": 0.684, "step": 864 }, { "epoch": 0.09, "grad_norm": 2.6240021544906904, "learning_rate": 9.902947283031833e-06, "loss": 0.6573, "step": 865 }, { "epoch": 0.09, "grad_norm": 2.4194535688154097, "learning_rate": 9.902612844461322e-06, "loss": 0.7671, "step": 866 }, { "epoch": 0.09, "grad_norm": 2.600458794788045, "learning_rate": 9.902277836319424e-06, "loss": 0.686, "step": 867 }, { "epoch": 0.09, "grad_norm": 2.7708332752714226, "learning_rate": 9.90194225864506e-06, "loss": 0.7713, "step": 868 }, { "epoch": 0.09, "grad_norm": 2.345954440796902, "learning_rate": 9.901606111477213e-06, "loss": 0.7127, "step": 869 }, { "epoch": 0.09, "grad_norm": 2.4380277637214958, "learning_rate": 9.901269394854938e-06, "loss": 0.7432, "step": 870 }, { "epoch": 0.09, "grad_norm": 2.535510929005561, "learning_rate": 9.900932108817352e-06, "loss": 0.735, "step": 871 }, { "epoch": 0.09, "grad_norm": 3.0039289042478337, "learning_rate": 9.900594253403642e-06, "loss": 0.76, "step": 872 }, { "epoch": 0.09, "grad_norm": 2.350300158880335, "learning_rate": 9.900255828653057e-06, "loss": 0.6084, "step": 873 }, { "epoch": 0.09, "grad_norm": 2.4958117500634938, "learning_rate": 9.899916834604914e-06, "loss": 0.7951, "step": 874 }, { "epoch": 0.09, "grad_norm": 2.4483497475118843, "learning_rate": 9.899577271298596e-06, "loss": 0.7217, "step": 875 }, { "epoch": 0.09, "grad_norm": 1.9481072329531595, "learning_rate": 9.89923713877356e-06, "loss": 0.6245, "step": 876 }, { "epoch": 0.09, "grad_norm": 2.765051932549656, "learning_rate": 9.89889643706931e-06, "loss": 0.6723, "step": 877 }, { "epoch": 0.09, "grad_norm": 2.739221117936455, "learning_rate": 9.898555166225434e-06, "loss": 0.7703, "step": 878 }, { "epoch": 0.09, "grad_norm": 2.1418498868809768, "learning_rate": 9.89821332628158e-06, "loss": 0.7298, "step": 879 }, { "epoch": 0.09, "grad_norm": 2.2036351653598367, "learning_rate": 9.897870917277461e-06, "loss": 0.8093, "step": 880 }, { "epoch": 0.09, "grad_norm": 2.4323072705596505, "learning_rate": 9.897527939252858e-06, "loss": 0.7399, "step": 881 }, { "epoch": 0.09, "grad_norm": 2.8657728288025477, "learning_rate": 9.897184392247614e-06, "loss": 0.7356, "step": 882 }, { "epoch": 0.09, "grad_norm": 2.5414139234529642, "learning_rate": 9.896840276301645e-06, "loss": 0.6747, "step": 883 }, { "epoch": 0.09, "grad_norm": 2.2994147096204327, "learning_rate": 9.896495591454929e-06, "loss": 0.7335, "step": 884 }, { "epoch": 0.09, "grad_norm": 1.9851136351096752, "learning_rate": 9.896150337747508e-06, "loss": 0.7839, "step": 885 }, { "epoch": 0.09, "grad_norm": 2.475304204640165, "learning_rate": 9.895804515219495e-06, "loss": 0.7442, "step": 886 }, { "epoch": 0.09, "grad_norm": 3.0051133893066178, "learning_rate": 9.895458123911066e-06, "loss": 0.819, "step": 887 }, { "epoch": 0.09, "grad_norm": 3.01882317139601, "learning_rate": 9.895111163862464e-06, "loss": 0.743, "step": 888 }, { "epoch": 0.09, "grad_norm": 1.4869064278045154, "learning_rate": 9.894763635113995e-06, "loss": 0.673, "step": 889 }, { "epoch": 0.09, "grad_norm": 2.0423193152498316, "learning_rate": 9.894415537706036e-06, "loss": 0.7938, "step": 890 }, { "epoch": 0.09, "grad_norm": 2.3877929769561415, "learning_rate": 9.89406687167903e-06, "loss": 0.8399, "step": 891 }, { "epoch": 0.09, "grad_norm": 2.3118433378041283, "learning_rate": 9.893717637073483e-06, "loss": 0.7365, "step": 892 }, { "epoch": 0.09, "grad_norm": 3.1231159883183817, "learning_rate": 9.893367833929965e-06, "loss": 0.6975, "step": 893 }, { "epoch": 0.09, "grad_norm": 2.946732175793273, "learning_rate": 9.893017462289119e-06, "loss": 0.7355, "step": 894 }, { "epoch": 0.09, "grad_norm": 4.3717939300188275, "learning_rate": 9.892666522191648e-06, "loss": 0.6766, "step": 895 }, { "epoch": 0.09, "grad_norm": 3.0279846422346446, "learning_rate": 9.892315013678323e-06, "loss": 0.7597, "step": 896 }, { "epoch": 0.09, "grad_norm": 2.55744056624668, "learning_rate": 9.891962936789983e-06, "loss": 0.7977, "step": 897 }, { "epoch": 0.09, "grad_norm": 2.569713950624268, "learning_rate": 9.891610291567529e-06, "loss": 0.7722, "step": 898 }, { "epoch": 0.09, "grad_norm": 2.8251737732812003, "learning_rate": 9.891257078051932e-06, "loss": 0.8538, "step": 899 }, { "epoch": 0.09, "grad_norm": 3.940064605870108, "learning_rate": 9.890903296284228e-06, "loss": 0.6861, "step": 900 }, { "epoch": 0.09, "grad_norm": 2.3962938305889727, "learning_rate": 9.890548946305516e-06, "loss": 0.7367, "step": 901 }, { "epoch": 0.09, "grad_norm": 2.6270790503951877, "learning_rate": 9.890194028156965e-06, "loss": 0.6927, "step": 902 }, { "epoch": 0.1, "grad_norm": 2.2464932261338486, "learning_rate": 9.889838541879808e-06, "loss": 0.6671, "step": 903 }, { "epoch": 0.1, "grad_norm": 2.2068048828956184, "learning_rate": 9.889482487515344e-06, "loss": 0.6574, "step": 904 }, { "epoch": 0.1, "grad_norm": 4.131286705587521, "learning_rate": 9.889125865104939e-06, "loss": 0.6844, "step": 905 }, { "epoch": 0.1, "grad_norm": 2.4854115277088384, "learning_rate": 9.888768674690023e-06, "loss": 0.7562, "step": 906 }, { "epoch": 0.1, "grad_norm": 2.430105603029439, "learning_rate": 9.888410916312096e-06, "loss": 0.8826, "step": 907 }, { "epoch": 0.1, "grad_norm": 2.3307764886099447, "learning_rate": 9.888052590012719e-06, "loss": 0.6994, "step": 908 }, { "epoch": 0.1, "grad_norm": 2.399232411769506, "learning_rate": 9.887693695833522e-06, "loss": 0.7844, "step": 909 }, { "epoch": 0.1, "grad_norm": 2.4841741299380127, "learning_rate": 9.887334233816199e-06, "loss": 0.7191, "step": 910 }, { "epoch": 0.1, "grad_norm": 2.231644133692005, "learning_rate": 9.886974204002514e-06, "loss": 0.7236, "step": 911 }, { "epoch": 0.1, "grad_norm": 2.589193939890633, "learning_rate": 9.886613606434294e-06, "loss": 0.8006, "step": 912 }, { "epoch": 0.1, "grad_norm": 2.8815065090619294, "learning_rate": 9.886252441153428e-06, "loss": 0.762, "step": 913 }, { "epoch": 0.1, "grad_norm": 2.458751138818673, "learning_rate": 9.885890708201881e-06, "loss": 0.748, "step": 914 }, { "epoch": 0.1, "grad_norm": 2.729352069908257, "learning_rate": 9.885528407621674e-06, "loss": 0.7319, "step": 915 }, { "epoch": 0.1, "grad_norm": 2.7456542817248035, "learning_rate": 9.885165539454898e-06, "loss": 0.7691, "step": 916 }, { "epoch": 0.1, "grad_norm": 2.821954609815299, "learning_rate": 9.884802103743712e-06, "loss": 0.724, "step": 917 }, { "epoch": 0.1, "grad_norm": 2.507495458671125, "learning_rate": 9.88443810053034e-06, "loss": 0.7557, "step": 918 }, { "epoch": 0.1, "grad_norm": 2.813557690867528, "learning_rate": 9.884073529857066e-06, "loss": 0.7378, "step": 919 }, { "epoch": 0.1, "grad_norm": 1.2109392193308994, "learning_rate": 9.883708391766248e-06, "loss": 0.655, "step": 920 }, { "epoch": 0.1, "grad_norm": 2.7797266133676817, "learning_rate": 9.88334268630031e-06, "loss": 0.7568, "step": 921 }, { "epoch": 0.1, "grad_norm": 2.291991455347209, "learning_rate": 9.882976413501733e-06, "loss": 0.6939, "step": 922 }, { "epoch": 0.1, "grad_norm": 2.2553429818506694, "learning_rate": 9.88260957341307e-06, "loss": 0.8572, "step": 923 }, { "epoch": 0.1, "grad_norm": 2.944662889076845, "learning_rate": 9.882242166076942e-06, "loss": 0.7171, "step": 924 }, { "epoch": 0.1, "grad_norm": 2.6005068384270817, "learning_rate": 9.881874191536032e-06, "loss": 0.7304, "step": 925 }, { "epoch": 0.1, "grad_norm": 2.466317488664055, "learning_rate": 9.881505649833091e-06, "loss": 0.7116, "step": 926 }, { "epoch": 0.1, "grad_norm": 2.4452143875236243, "learning_rate": 9.881136541010934e-06, "loss": 0.7629, "step": 927 }, { "epoch": 0.1, "grad_norm": 2.5102243570459595, "learning_rate": 9.880766865112444e-06, "loss": 0.7286, "step": 928 }, { "epoch": 0.1, "grad_norm": 2.7791143392672875, "learning_rate": 9.880396622180567e-06, "loss": 0.7923, "step": 929 }, { "epoch": 0.1, "grad_norm": 2.876441285426497, "learning_rate": 9.880025812258322e-06, "loss": 0.7381, "step": 930 }, { "epoch": 0.1, "grad_norm": 3.2830140025930246, "learning_rate": 9.879654435388781e-06, "loss": 0.7657, "step": 931 }, { "epoch": 0.1, "grad_norm": 2.1392167947242817, "learning_rate": 9.879282491615096e-06, "loss": 0.7584, "step": 932 }, { "epoch": 0.1, "grad_norm": 2.5574554782173244, "learning_rate": 9.878909980980475e-06, "loss": 0.6736, "step": 933 }, { "epoch": 0.1, "grad_norm": 2.482015182428965, "learning_rate": 9.878536903528195e-06, "loss": 0.7855, "step": 934 }, { "epoch": 0.1, "grad_norm": 2.20969188023806, "learning_rate": 9.8781632593016e-06, "loss": 0.7142, "step": 935 }, { "epoch": 0.1, "grad_norm": 2.6096755265839593, "learning_rate": 9.8777890483441e-06, "loss": 0.8059, "step": 936 }, { "epoch": 0.1, "grad_norm": 2.6014399338081122, "learning_rate": 9.877414270699168e-06, "loss": 0.7378, "step": 937 }, { "epoch": 0.1, "grad_norm": 2.1756243191451383, "learning_rate": 9.877038926410346e-06, "loss": 0.7268, "step": 938 }, { "epoch": 0.1, "grad_norm": 2.196316926201555, "learning_rate": 9.876663015521237e-06, "loss": 0.8158, "step": 939 }, { "epoch": 0.1, "grad_norm": 2.383983584002673, "learning_rate": 9.876286538075519e-06, "loss": 0.7215, "step": 940 }, { "epoch": 0.1, "grad_norm": 2.7085992373351204, "learning_rate": 9.875909494116925e-06, "loss": 0.6575, "step": 941 }, { "epoch": 0.1, "grad_norm": 2.5130773928374737, "learning_rate": 9.875531883689262e-06, "loss": 0.6911, "step": 942 }, { "epoch": 0.1, "grad_norm": 3.318178727992876, "learning_rate": 9.875153706836397e-06, "loss": 0.788, "step": 943 }, { "epoch": 0.1, "grad_norm": 4.286494331288682, "learning_rate": 9.874774963602268e-06, "loss": 0.7114, "step": 944 }, { "epoch": 0.1, "grad_norm": 2.1591477947522186, "learning_rate": 9.874395654030876e-06, "loss": 0.7871, "step": 945 }, { "epoch": 0.1, "grad_norm": 2.1547218552212177, "learning_rate": 9.874015778166285e-06, "loss": 0.8348, "step": 946 }, { "epoch": 0.1, "grad_norm": 2.762373245266379, "learning_rate": 9.873635336052633e-06, "loss": 0.7688, "step": 947 }, { "epoch": 0.1, "grad_norm": 2.309708677231106, "learning_rate": 9.873254327734115e-06, "loss": 0.747, "step": 948 }, { "epoch": 0.1, "grad_norm": 2.053888930700921, "learning_rate": 9.872872753254996e-06, "loss": 0.7223, "step": 949 }, { "epoch": 0.1, "grad_norm": 3.5008788542760363, "learning_rate": 9.872490612659607e-06, "loss": 0.6766, "step": 950 }, { "epoch": 0.1, "grad_norm": 2.3863679851555673, "learning_rate": 9.872107905992343e-06, "loss": 0.6972, "step": 951 }, { "epoch": 0.1, "grad_norm": 2.921615156601913, "learning_rate": 9.871724633297666e-06, "loss": 0.6999, "step": 952 }, { "epoch": 0.1, "grad_norm": 3.2644813929698615, "learning_rate": 9.871340794620103e-06, "loss": 0.761, "step": 953 }, { "epoch": 0.1, "grad_norm": 2.1191106722345814, "learning_rate": 9.87095639000425e-06, "loss": 0.7376, "step": 954 }, { "epoch": 0.1, "grad_norm": 6.026837083096435, "learning_rate": 9.870571419494764e-06, "loss": 0.7222, "step": 955 }, { "epoch": 0.1, "grad_norm": 2.336308324288583, "learning_rate": 9.87018588313637e-06, "loss": 0.7229, "step": 956 }, { "epoch": 0.1, "grad_norm": 2.456262394309276, "learning_rate": 9.869799780973856e-06, "loss": 0.7182, "step": 957 }, { "epoch": 0.1, "grad_norm": 2.5276315796370676, "learning_rate": 9.869413113052084e-06, "loss": 0.6835, "step": 958 }, { "epoch": 0.1, "grad_norm": 3.1051858480552497, "learning_rate": 9.86902587941597e-06, "loss": 0.7101, "step": 959 }, { "epoch": 0.1, "grad_norm": 3.3731070604108844, "learning_rate": 9.868638080110507e-06, "loss": 0.7221, "step": 960 }, { "epoch": 0.1, "grad_norm": 2.0867518495145223, "learning_rate": 9.868249715180741e-06, "loss": 0.7046, "step": 961 }, { "epoch": 0.1, "grad_norm": 2.0764983780128166, "learning_rate": 9.8678607846718e-06, "loss": 0.7498, "step": 962 }, { "epoch": 0.1, "grad_norm": 2.1440243110852037, "learning_rate": 9.867471288628863e-06, "loss": 0.6442, "step": 963 }, { "epoch": 0.1, "grad_norm": 2.559996501289641, "learning_rate": 9.867081227097182e-06, "loss": 0.7556, "step": 964 }, { "epoch": 0.1, "grad_norm": 2.546829215817774, "learning_rate": 9.866690600122075e-06, "loss": 0.7098, "step": 965 }, { "epoch": 0.1, "grad_norm": 2.692415588752108, "learning_rate": 9.866299407748921e-06, "loss": 0.694, "step": 966 }, { "epoch": 0.1, "grad_norm": 2.336011704071477, "learning_rate": 9.865907650023167e-06, "loss": 0.7751, "step": 967 }, { "epoch": 0.1, "grad_norm": 2.3030234604337436, "learning_rate": 9.865515326990332e-06, "loss": 0.68, "step": 968 }, { "epoch": 0.1, "grad_norm": 2.3788631634376527, "learning_rate": 9.865122438695988e-06, "loss": 0.7517, "step": 969 }, { "epoch": 0.1, "grad_norm": 2.594025015808721, "learning_rate": 9.864728985185783e-06, "loss": 0.7859, "step": 970 }, { "epoch": 0.1, "grad_norm": 2.2839322712235326, "learning_rate": 9.86433496650543e-06, "loss": 0.7123, "step": 971 }, { "epoch": 0.1, "grad_norm": 2.7189304806754397, "learning_rate": 9.863940382700699e-06, "loss": 0.7495, "step": 972 }, { "epoch": 0.1, "grad_norm": 2.5542757749406033, "learning_rate": 9.863545233817436e-06, "loss": 0.7249, "step": 973 }, { "epoch": 0.1, "grad_norm": 2.2818240056053565, "learning_rate": 9.863149519901545e-06, "loss": 0.7263, "step": 974 }, { "epoch": 0.1, "grad_norm": 5.980498735075583, "learning_rate": 9.862753240999001e-06, "loss": 0.796, "step": 975 }, { "epoch": 0.1, "grad_norm": 2.17696941282096, "learning_rate": 9.862356397155843e-06, "loss": 0.7528, "step": 976 }, { "epoch": 0.1, "grad_norm": 2.7279007297671747, "learning_rate": 9.861958988418174e-06, "loss": 0.7417, "step": 977 }, { "epoch": 0.1, "grad_norm": 2.7775418932594853, "learning_rate": 9.861561014832166e-06, "loss": 0.6685, "step": 978 }, { "epoch": 0.1, "grad_norm": 3.0608876144824255, "learning_rate": 9.86116247644405e-06, "loss": 0.796, "step": 979 }, { "epoch": 0.1, "grad_norm": 2.230200096432408, "learning_rate": 9.860763373300133e-06, "loss": 0.682, "step": 980 }, { "epoch": 0.1, "grad_norm": 2.583892124389053, "learning_rate": 9.860363705446776e-06, "loss": 0.7154, "step": 981 }, { "epoch": 0.1, "grad_norm": 3.420436801225999, "learning_rate": 9.859963472930413e-06, "loss": 0.6849, "step": 982 }, { "epoch": 0.1, "grad_norm": 2.5498244013352545, "learning_rate": 9.859562675797543e-06, "loss": 0.8011, "step": 983 }, { "epoch": 0.1, "grad_norm": 2.1659421053037153, "learning_rate": 9.85916131409473e-06, "loss": 0.7394, "step": 984 }, { "epoch": 0.1, "grad_norm": 3.338196115649606, "learning_rate": 9.858759387868601e-06, "loss": 0.7245, "step": 985 }, { "epoch": 0.1, "grad_norm": 2.7321452634255357, "learning_rate": 9.858356897165853e-06, "loss": 0.7919, "step": 986 }, { "epoch": 0.1, "grad_norm": 2.6253699652801417, "learning_rate": 9.857953842033243e-06, "loss": 0.733, "step": 987 }, { "epoch": 0.1, "grad_norm": 2.59635003190081, "learning_rate": 9.857550222517598e-06, "loss": 0.7092, "step": 988 }, { "epoch": 0.1, "grad_norm": 2.621851871749562, "learning_rate": 9.857146038665812e-06, "loss": 0.7042, "step": 989 }, { "epoch": 0.1, "grad_norm": 2.7774812468209973, "learning_rate": 9.856741290524839e-06, "loss": 0.7218, "step": 990 }, { "epoch": 0.1, "grad_norm": 2.9702400566443012, "learning_rate": 9.856335978141703e-06, "loss": 0.7605, "step": 991 }, { "epoch": 0.1, "grad_norm": 3.229378466858724, "learning_rate": 9.85593010156349e-06, "loss": 0.7731, "step": 992 }, { "epoch": 0.1, "grad_norm": 2.985236389182805, "learning_rate": 9.855523660837355e-06, "loss": 0.7433, "step": 993 }, { "epoch": 0.1, "grad_norm": 2.418368994051385, "learning_rate": 9.855116656010518e-06, "loss": 0.6799, "step": 994 }, { "epoch": 0.1, "grad_norm": 2.6770469216630866, "learning_rate": 9.854709087130261e-06, "loss": 0.6967, "step": 995 }, { "epoch": 0.1, "grad_norm": 1.2596414821764004, "learning_rate": 9.854300954243937e-06, "loss": 0.6321, "step": 996 }, { "epoch": 0.1, "grad_norm": 1.1913642125753563, "learning_rate": 9.853892257398961e-06, "loss": 0.6239, "step": 997 }, { "epoch": 0.11, "grad_norm": 5.038065448125383, "learning_rate": 9.853482996642812e-06, "loss": 0.8232, "step": 998 }, { "epoch": 0.11, "grad_norm": 2.80199548153629, "learning_rate": 9.85307317202304e-06, "loss": 0.7232, "step": 999 }, { "epoch": 0.11, "grad_norm": 3.3246781936353464, "learning_rate": 9.852662783587255e-06, "loss": 0.6723, "step": 1000 }, { "epoch": 0.11, "grad_norm": 2.285393284696264, "learning_rate": 9.852251831383136e-06, "loss": 0.7249, "step": 1001 }, { "epoch": 0.11, "grad_norm": 2.6840301004398026, "learning_rate": 9.851840315458424e-06, "loss": 0.8205, "step": 1002 }, { "epoch": 0.11, "grad_norm": 2.635419321078597, "learning_rate": 9.85142823586093e-06, "loss": 0.7197, "step": 1003 }, { "epoch": 0.11, "grad_norm": 3.832069075664214, "learning_rate": 9.851015592638528e-06, "loss": 0.7241, "step": 1004 }, { "epoch": 0.11, "grad_norm": 2.533811572972949, "learning_rate": 9.850602385839158e-06, "loss": 0.7935, "step": 1005 }, { "epoch": 0.11, "grad_norm": 3.225027430000192, "learning_rate": 9.850188615510824e-06, "loss": 0.6579, "step": 1006 }, { "epoch": 0.11, "grad_norm": 2.3382405131753483, "learning_rate": 9.849774281701597e-06, "loss": 0.7659, "step": 1007 }, { "epoch": 0.11, "grad_norm": 2.097379634081843, "learning_rate": 9.849359384459614e-06, "loss": 0.7244, "step": 1008 }, { "epoch": 0.11, "grad_norm": 2.882248967395376, "learning_rate": 9.848943923833075e-06, "loss": 0.699, "step": 1009 }, { "epoch": 0.11, "grad_norm": 3.1835190905227715, "learning_rate": 9.848527899870249e-06, "loss": 0.729, "step": 1010 }, { "epoch": 0.11, "grad_norm": 3.3344865062688966, "learning_rate": 9.848111312619464e-06, "loss": 0.7155, "step": 1011 }, { "epoch": 0.11, "grad_norm": 2.5563191668434757, "learning_rate": 9.847694162129124e-06, "loss": 0.718, "step": 1012 }, { "epoch": 0.11, "grad_norm": 2.468563374343176, "learning_rate": 9.84727644844769e-06, "loss": 0.7124, "step": 1013 }, { "epoch": 0.11, "grad_norm": 2.7246642917209405, "learning_rate": 9.846858171623687e-06, "loss": 0.6024, "step": 1014 }, { "epoch": 0.11, "grad_norm": 2.0551445765951826, "learning_rate": 9.846439331705715e-06, "loss": 0.6882, "step": 1015 }, { "epoch": 0.11, "grad_norm": 2.9450655022782932, "learning_rate": 9.846019928742432e-06, "loss": 0.7355, "step": 1016 }, { "epoch": 0.11, "grad_norm": 2.1869980803278106, "learning_rate": 9.84559996278256e-06, "loss": 0.6998, "step": 1017 }, { "epoch": 0.11, "grad_norm": 2.0957427212075115, "learning_rate": 9.845179433874891e-06, "loss": 0.7364, "step": 1018 }, { "epoch": 0.11, "grad_norm": 2.2104276939981258, "learning_rate": 9.844758342068284e-06, "loss": 0.7528, "step": 1019 }, { "epoch": 0.11, "grad_norm": 2.2088600503635325, "learning_rate": 9.844336687411657e-06, "loss": 0.637, "step": 1020 }, { "epoch": 0.11, "grad_norm": 2.2733449333519444, "learning_rate": 9.843914469953995e-06, "loss": 0.7377, "step": 1021 }, { "epoch": 0.11, "grad_norm": 6.872012422821911, "learning_rate": 9.843491689744354e-06, "loss": 0.7699, "step": 1022 }, { "epoch": 0.11, "grad_norm": 2.471746142903541, "learning_rate": 9.84306834683185e-06, "loss": 0.7256, "step": 1023 }, { "epoch": 0.11, "grad_norm": 2.5086959706339997, "learning_rate": 9.842644441265664e-06, "loss": 0.8149, "step": 1024 }, { "epoch": 0.11, "grad_norm": 2.3150840456614925, "learning_rate": 9.842219973095045e-06, "loss": 0.7124, "step": 1025 }, { "epoch": 0.11, "grad_norm": 2.3604359593997164, "learning_rate": 9.841794942369309e-06, "loss": 0.6731, "step": 1026 }, { "epoch": 0.11, "grad_norm": 2.439817785760237, "learning_rate": 9.841369349137832e-06, "loss": 0.7622, "step": 1027 }, { "epoch": 0.11, "grad_norm": 2.9509635034152497, "learning_rate": 9.840943193450059e-06, "loss": 0.7069, "step": 1028 }, { "epoch": 0.11, "grad_norm": 2.9934027081797234, "learning_rate": 9.840516475355499e-06, "loss": 0.7784, "step": 1029 }, { "epoch": 0.11, "grad_norm": 2.533628591878186, "learning_rate": 9.840089194903729e-06, "loss": 0.7079, "step": 1030 }, { "epoch": 0.11, "grad_norm": 2.4056446800859272, "learning_rate": 9.839661352144386e-06, "loss": 0.761, "step": 1031 }, { "epoch": 0.11, "grad_norm": 2.515210654037056, "learning_rate": 9.839232947127178e-06, "loss": 0.7748, "step": 1032 }, { "epoch": 0.11, "grad_norm": 2.108290692947581, "learning_rate": 9.838803979901874e-06, "loss": 0.7569, "step": 1033 }, { "epoch": 0.11, "grad_norm": 3.079443665645577, "learning_rate": 9.838374450518311e-06, "loss": 0.7073, "step": 1034 }, { "epoch": 0.11, "grad_norm": 2.3720082565519367, "learning_rate": 9.837944359026392e-06, "loss": 0.735, "step": 1035 }, { "epoch": 0.11, "grad_norm": 2.6597037634218994, "learning_rate": 9.837513705476082e-06, "loss": 0.745, "step": 1036 }, { "epoch": 0.11, "grad_norm": 3.6631008749810534, "learning_rate": 9.837082489917413e-06, "loss": 0.7522, "step": 1037 }, { "epoch": 0.11, "grad_norm": 2.7564725036319344, "learning_rate": 9.836650712400484e-06, "loss": 0.6971, "step": 1038 }, { "epoch": 0.11, "grad_norm": 2.7150369300373387, "learning_rate": 9.836218372975456e-06, "loss": 0.7348, "step": 1039 }, { "epoch": 0.11, "grad_norm": 2.456649521111631, "learning_rate": 9.835785471692559e-06, "loss": 0.7276, "step": 1040 }, { "epoch": 0.11, "grad_norm": 2.4789195883853457, "learning_rate": 9.835352008602081e-06, "loss": 0.7406, "step": 1041 }, { "epoch": 0.11, "grad_norm": 2.240016595350792, "learning_rate": 9.834917983754388e-06, "loss": 0.7213, "step": 1042 }, { "epoch": 0.11, "grad_norm": 2.4292109109445925, "learning_rate": 9.834483397199897e-06, "loss": 0.7573, "step": 1043 }, { "epoch": 0.11, "grad_norm": 2.7009207551734242, "learning_rate": 9.834048248989101e-06, "loss": 0.7654, "step": 1044 }, { "epoch": 0.11, "grad_norm": 3.359205277225582, "learning_rate": 9.833612539172554e-06, "loss": 0.6987, "step": 1045 }, { "epoch": 0.11, "grad_norm": 4.052083146121145, "learning_rate": 9.833176267800874e-06, "loss": 0.8056, "step": 1046 }, { "epoch": 0.11, "grad_norm": 2.7708114222605413, "learning_rate": 9.832739434924747e-06, "loss": 0.7814, "step": 1047 }, { "epoch": 0.11, "grad_norm": 2.4023504113769785, "learning_rate": 9.832302040594923e-06, "loss": 0.7072, "step": 1048 }, { "epoch": 0.11, "grad_norm": 2.926390430878239, "learning_rate": 9.831864084862216e-06, "loss": 0.7338, "step": 1049 }, { "epoch": 0.11, "grad_norm": 2.18607735411363, "learning_rate": 9.831425567777506e-06, "loss": 0.7444, "step": 1050 }, { "epoch": 0.11, "grad_norm": 2.494103967751944, "learning_rate": 9.830986489391743e-06, "loss": 0.7925, "step": 1051 }, { "epoch": 0.11, "grad_norm": 2.65375185592133, "learning_rate": 9.830546849755932e-06, "loss": 0.7176, "step": 1052 }, { "epoch": 0.11, "grad_norm": 2.350034885411002, "learning_rate": 9.830106648921152e-06, "loss": 0.6827, "step": 1053 }, { "epoch": 0.11, "grad_norm": 3.2408010200349135, "learning_rate": 9.829665886938544e-06, "loss": 0.7066, "step": 1054 }, { "epoch": 0.11, "grad_norm": 3.0868922059592543, "learning_rate": 9.829224563859314e-06, "loss": 0.6116, "step": 1055 }, { "epoch": 0.11, "grad_norm": 2.941803666500437, "learning_rate": 9.828782679734737e-06, "loss": 0.8022, "step": 1056 }, { "epoch": 0.11, "grad_norm": 3.823063887767071, "learning_rate": 9.828340234616142e-06, "loss": 0.7427, "step": 1057 }, { "epoch": 0.11, "grad_norm": 2.8844101888229514, "learning_rate": 9.827897228554939e-06, "loss": 0.7707, "step": 1058 }, { "epoch": 0.11, "grad_norm": 3.0160506838501684, "learning_rate": 9.827453661602592e-06, "loss": 0.7326, "step": 1059 }, { "epoch": 0.11, "grad_norm": 2.300778641620372, "learning_rate": 9.827009533810632e-06, "loss": 0.7175, "step": 1060 }, { "epoch": 0.11, "grad_norm": 2.6516868846525616, "learning_rate": 9.82656484523066e-06, "loss": 0.7898, "step": 1061 }, { "epoch": 0.11, "grad_norm": 2.4715363916836623, "learning_rate": 9.826119595914334e-06, "loss": 0.6921, "step": 1062 }, { "epoch": 0.11, "grad_norm": 2.327340583098541, "learning_rate": 9.825673785913385e-06, "loss": 0.6928, "step": 1063 }, { "epoch": 0.11, "grad_norm": 2.759469881660751, "learning_rate": 9.825227415279606e-06, "loss": 0.7855, "step": 1064 }, { "epoch": 0.11, "grad_norm": 2.534088445808419, "learning_rate": 9.824780484064853e-06, "loss": 0.7307, "step": 1065 }, { "epoch": 0.11, "grad_norm": 2.1112574816361924, "learning_rate": 9.824332992321052e-06, "loss": 0.6811, "step": 1066 }, { "epoch": 0.11, "grad_norm": 2.373627167824405, "learning_rate": 9.823884940100188e-06, "loss": 0.7312, "step": 1067 }, { "epoch": 0.11, "grad_norm": 2.7883580813619697, "learning_rate": 9.823436327454318e-06, "loss": 0.7676, "step": 1068 }, { "epoch": 0.11, "grad_norm": 2.1899506229168537, "learning_rate": 9.822987154435557e-06, "loss": 0.7408, "step": 1069 }, { "epoch": 0.11, "grad_norm": 2.0479852936546252, "learning_rate": 9.82253742109609e-06, "loss": 0.6622, "step": 1070 }, { "epoch": 0.11, "grad_norm": 2.7416313050535015, "learning_rate": 9.822087127488167e-06, "loss": 0.7698, "step": 1071 }, { "epoch": 0.11, "grad_norm": 2.105517261367347, "learning_rate": 9.821636273664102e-06, "loss": 0.6482, "step": 1072 }, { "epoch": 0.11, "grad_norm": 3.280144562837707, "learning_rate": 9.821184859676269e-06, "loss": 0.6423, "step": 1073 }, { "epoch": 0.11, "grad_norm": 4.134409942350642, "learning_rate": 9.820732885577117e-06, "loss": 0.6664, "step": 1074 }, { "epoch": 0.11, "grad_norm": 2.912802423096457, "learning_rate": 9.820280351419155e-06, "loss": 0.826, "step": 1075 }, { "epoch": 0.11, "grad_norm": 3.1928081836845386, "learning_rate": 9.819827257254957e-06, "loss": 0.7791, "step": 1076 }, { "epoch": 0.11, "grad_norm": 2.779451497550286, "learning_rate": 9.81937360313716e-06, "loss": 0.8068, "step": 1077 }, { "epoch": 0.11, "grad_norm": 3.32410870620609, "learning_rate": 9.818919389118466e-06, "loss": 0.7509, "step": 1078 }, { "epoch": 0.11, "grad_norm": 2.5323788910344027, "learning_rate": 9.81846461525165e-06, "loss": 0.7576, "step": 1079 }, { "epoch": 0.11, "grad_norm": 2.439458544255558, "learning_rate": 9.818009281589545e-06, "loss": 0.7202, "step": 1080 }, { "epoch": 0.11, "grad_norm": 3.0195258140646195, "learning_rate": 9.817553388185046e-06, "loss": 0.6668, "step": 1081 }, { "epoch": 0.11, "grad_norm": 1.5034092301568605, "learning_rate": 9.817096935091123e-06, "loss": 0.6359, "step": 1082 }, { "epoch": 0.11, "grad_norm": 3.4584143921665347, "learning_rate": 9.816639922360802e-06, "loss": 0.8676, "step": 1083 }, { "epoch": 0.11, "grad_norm": 3.1995319474627197, "learning_rate": 9.816182350047179e-06, "loss": 0.6817, "step": 1084 }, { "epoch": 0.11, "grad_norm": 3.416376258425686, "learning_rate": 9.815724218203411e-06, "loss": 0.7575, "step": 1085 }, { "epoch": 0.11, "grad_norm": 3.4149722424830817, "learning_rate": 9.815265526882726e-06, "loss": 0.7805, "step": 1086 }, { "epoch": 0.11, "grad_norm": 2.3282208941621216, "learning_rate": 9.814806276138412e-06, "loss": 0.8052, "step": 1087 }, { "epoch": 0.11, "grad_norm": 2.4514470562750907, "learning_rate": 9.81434646602382e-06, "loss": 0.7267, "step": 1088 }, { "epoch": 0.11, "grad_norm": 2.976660270631451, "learning_rate": 9.813886096592376e-06, "loss": 0.6789, "step": 1089 }, { "epoch": 0.11, "grad_norm": 2.0977217217797643, "learning_rate": 9.81342516789756e-06, "loss": 0.7351, "step": 1090 }, { "epoch": 0.11, "grad_norm": 16.753908355958515, "learning_rate": 9.81296367999292e-06, "loss": 0.6394, "step": 1091 }, { "epoch": 0.11, "grad_norm": 2.3815484353523138, "learning_rate": 9.812501632932074e-06, "loss": 0.7619, "step": 1092 }, { "epoch": 0.12, "grad_norm": 4.14830234781379, "learning_rate": 9.8120390267687e-06, "loss": 0.7141, "step": 1093 }, { "epoch": 0.12, "grad_norm": 2.4341062988650624, "learning_rate": 9.811575861556541e-06, "loss": 0.776, "step": 1094 }, { "epoch": 0.12, "grad_norm": 2.290585984126302, "learning_rate": 9.811112137349407e-06, "loss": 0.8117, "step": 1095 }, { "epoch": 0.12, "grad_norm": 2.555561103046378, "learning_rate": 9.810647854201174e-06, "loss": 0.748, "step": 1096 }, { "epoch": 0.12, "grad_norm": 2.710466022236052, "learning_rate": 9.81018301216578e-06, "loss": 0.7782, "step": 1097 }, { "epoch": 0.12, "grad_norm": 3.3963252605264445, "learning_rate": 9.809717611297227e-06, "loss": 0.6976, "step": 1098 }, { "epoch": 0.12, "grad_norm": 2.6354731547094254, "learning_rate": 9.809251651649586e-06, "loss": 0.8057, "step": 1099 }, { "epoch": 0.12, "grad_norm": 1.4944044637050682, "learning_rate": 9.80878513327699e-06, "loss": 0.6846, "step": 1100 }, { "epoch": 0.12, "grad_norm": 2.2779646813088203, "learning_rate": 9.80831805623364e-06, "loss": 0.6475, "step": 1101 }, { "epoch": 0.12, "grad_norm": 2.3609991882243317, "learning_rate": 9.807850420573794e-06, "loss": 0.6642, "step": 1102 }, { "epoch": 0.12, "grad_norm": 2.2887645711932185, "learning_rate": 9.807382226351786e-06, "loss": 0.7091, "step": 1103 }, { "epoch": 0.12, "grad_norm": 2.1955085410313404, "learning_rate": 9.806913473622008e-06, "loss": 0.6662, "step": 1104 }, { "epoch": 0.12, "grad_norm": 1.2036596843300829, "learning_rate": 9.806444162438917e-06, "loss": 0.6465, "step": 1105 }, { "epoch": 0.12, "grad_norm": 1.1349422385810124, "learning_rate": 9.805974292857038e-06, "loss": 0.6237, "step": 1106 }, { "epoch": 0.12, "grad_norm": 2.5361645121575336, "learning_rate": 9.805503864930958e-06, "loss": 0.9142, "step": 1107 }, { "epoch": 0.12, "grad_norm": 3.0395622006277123, "learning_rate": 9.80503287871533e-06, "loss": 0.6835, "step": 1108 }, { "epoch": 0.12, "grad_norm": 2.422353959280125, "learning_rate": 9.804561334264872e-06, "loss": 0.7633, "step": 1109 }, { "epoch": 0.12, "grad_norm": 2.5480040358296767, "learning_rate": 9.804089231634368e-06, "loss": 0.8203, "step": 1110 }, { "epoch": 0.12, "grad_norm": 2.8729382273172983, "learning_rate": 9.803616570878664e-06, "loss": 0.6498, "step": 1111 }, { "epoch": 0.12, "grad_norm": 2.7811166027244028, "learning_rate": 9.803143352052674e-06, "loss": 0.5825, "step": 1112 }, { "epoch": 0.12, "grad_norm": 2.12202620735347, "learning_rate": 9.802669575211369e-06, "loss": 0.6428, "step": 1113 }, { "epoch": 0.12, "grad_norm": 5.0006468325620315, "learning_rate": 9.8021952404098e-06, "loss": 0.7803, "step": 1114 }, { "epoch": 0.12, "grad_norm": 3.1558917309253327, "learning_rate": 9.80172034770307e-06, "loss": 0.7181, "step": 1115 }, { "epoch": 0.12, "grad_norm": 2.333799697036943, "learning_rate": 9.801244897146348e-06, "loss": 0.8098, "step": 1116 }, { "epoch": 0.12, "grad_norm": 2.3764518557927974, "learning_rate": 9.800768888794874e-06, "loss": 0.6982, "step": 1117 }, { "epoch": 0.12, "grad_norm": 2.553267042004277, "learning_rate": 9.800292322703949e-06, "loss": 0.7525, "step": 1118 }, { "epoch": 0.12, "grad_norm": 2.418379261387043, "learning_rate": 9.799815198928937e-06, "loss": 0.7675, "step": 1119 }, { "epoch": 0.12, "grad_norm": 2.8872564473960765, "learning_rate": 9.79933751752527e-06, "loss": 0.6478, "step": 1120 }, { "epoch": 0.12, "grad_norm": 4.052357385896143, "learning_rate": 9.798859278548443e-06, "loss": 0.7681, "step": 1121 }, { "epoch": 0.12, "grad_norm": 2.2984285503416286, "learning_rate": 9.798380482054019e-06, "loss": 0.6921, "step": 1122 }, { "epoch": 0.12, "grad_norm": 2.422351898914631, "learning_rate": 9.79790112809762e-06, "loss": 0.7374, "step": 1123 }, { "epoch": 0.12, "grad_norm": 2.2410791102035756, "learning_rate": 9.797421216734938e-06, "loss": 0.664, "step": 1124 }, { "epoch": 0.12, "grad_norm": 3.470129472972923, "learning_rate": 9.796940748021727e-06, "loss": 0.7462, "step": 1125 }, { "epoch": 0.12, "grad_norm": 2.414314965943791, "learning_rate": 9.796459722013804e-06, "loss": 0.7995, "step": 1126 }, { "epoch": 0.12, "grad_norm": 3.270258241210417, "learning_rate": 9.795978138767059e-06, "loss": 0.7509, "step": 1127 }, { "epoch": 0.12, "grad_norm": 2.353498649269498, "learning_rate": 9.795495998337436e-06, "loss": 0.8035, "step": 1128 }, { "epoch": 0.12, "grad_norm": 2.8698790645980807, "learning_rate": 9.795013300780951e-06, "loss": 0.7457, "step": 1129 }, { "epoch": 0.12, "grad_norm": 3.6109946770346673, "learning_rate": 9.794530046153681e-06, "loss": 0.7238, "step": 1130 }, { "epoch": 0.12, "grad_norm": 2.488339655347247, "learning_rate": 9.79404623451177e-06, "loss": 0.7118, "step": 1131 }, { "epoch": 0.12, "grad_norm": 2.9913223737937966, "learning_rate": 9.793561865911425e-06, "loss": 0.76, "step": 1132 }, { "epoch": 0.12, "grad_norm": 5.4767066337380035, "learning_rate": 9.793076940408921e-06, "loss": 0.6867, "step": 1133 }, { "epoch": 0.12, "grad_norm": 2.657626022399911, "learning_rate": 9.792591458060592e-06, "loss": 0.7424, "step": 1134 }, { "epoch": 0.12, "grad_norm": 3.0031975051776887, "learning_rate": 9.792105418922842e-06, "loss": 0.6451, "step": 1135 }, { "epoch": 0.12, "grad_norm": 2.515412387819786, "learning_rate": 9.791618823052137e-06, "loss": 0.654, "step": 1136 }, { "epoch": 0.12, "grad_norm": 2.2988508182868506, "learning_rate": 9.791131670505008e-06, "loss": 0.7053, "step": 1137 }, { "epoch": 0.12, "grad_norm": 3.392212351593364, "learning_rate": 9.790643961338051e-06, "loss": 0.6784, "step": 1138 }, { "epoch": 0.12, "grad_norm": 2.3321628230545013, "learning_rate": 9.790155695607927e-06, "loss": 0.747, "step": 1139 }, { "epoch": 0.12, "grad_norm": 2.5520902735537194, "learning_rate": 9.789666873371361e-06, "loss": 0.7747, "step": 1140 }, { "epoch": 0.12, "grad_norm": 3.0561347446951537, "learning_rate": 9.789177494685146e-06, "loss": 0.7497, "step": 1141 }, { "epoch": 0.12, "grad_norm": 2.42722295869959, "learning_rate": 9.788687559606131e-06, "loss": 0.7141, "step": 1142 }, { "epoch": 0.12, "grad_norm": 3.087792011146286, "learning_rate": 9.788197068191237e-06, "loss": 0.78, "step": 1143 }, { "epoch": 0.12, "grad_norm": 2.742833380122242, "learning_rate": 9.787706020497451e-06, "loss": 0.702, "step": 1144 }, { "epoch": 0.12, "grad_norm": 2.2623934679268323, "learning_rate": 9.787214416581818e-06, "loss": 0.6702, "step": 1145 }, { "epoch": 0.12, "grad_norm": 3.105793673298671, "learning_rate": 9.786722256501454e-06, "loss": 0.742, "step": 1146 }, { "epoch": 0.12, "grad_norm": 2.771703607077318, "learning_rate": 9.786229540313534e-06, "loss": 0.6825, "step": 1147 }, { "epoch": 0.12, "grad_norm": 2.833220908887274, "learning_rate": 9.785736268075303e-06, "loss": 0.6402, "step": 1148 }, { "epoch": 0.12, "grad_norm": 3.252609192328139, "learning_rate": 9.785242439844064e-06, "loss": 0.7542, "step": 1149 }, { "epoch": 0.12, "grad_norm": 2.6298483732855993, "learning_rate": 9.784748055677193e-06, "loss": 0.758, "step": 1150 }, { "epoch": 0.12, "grad_norm": 2.1632996471894916, "learning_rate": 9.784253115632125e-06, "loss": 0.7049, "step": 1151 }, { "epoch": 0.12, "grad_norm": 2.34611962892255, "learning_rate": 9.783757619766359e-06, "loss": 0.6605, "step": 1152 }, { "epoch": 0.12, "grad_norm": 2.3000195548875086, "learning_rate": 9.783261568137461e-06, "loss": 0.6681, "step": 1153 }, { "epoch": 0.12, "grad_norm": 2.644856326527659, "learning_rate": 9.78276496080306e-06, "loss": 0.7254, "step": 1154 }, { "epoch": 0.12, "grad_norm": 3.363577339292242, "learning_rate": 9.782267797820852e-06, "loss": 0.6255, "step": 1155 }, { "epoch": 0.12, "grad_norm": 2.178294432104343, "learning_rate": 9.781770079248597e-06, "loss": 0.6964, "step": 1156 }, { "epoch": 0.12, "grad_norm": 2.351307497143435, "learning_rate": 9.781271805144115e-06, "loss": 0.672, "step": 1157 }, { "epoch": 0.12, "grad_norm": 2.317111010522036, "learning_rate": 9.780772975565297e-06, "loss": 0.716, "step": 1158 }, { "epoch": 0.12, "grad_norm": 8.74680266959224, "learning_rate": 9.780273590570095e-06, "loss": 0.7632, "step": 1159 }, { "epoch": 0.12, "grad_norm": 3.8612186188840654, "learning_rate": 9.779773650216524e-06, "loss": 0.688, "step": 1160 }, { "epoch": 0.12, "grad_norm": 3.73767312113061, "learning_rate": 9.779273154562668e-06, "loss": 0.732, "step": 1161 }, { "epoch": 0.12, "grad_norm": 2.195963737502883, "learning_rate": 9.778772103666672e-06, "loss": 0.7151, "step": 1162 }, { "epoch": 0.12, "grad_norm": 2.842525432869639, "learning_rate": 9.778270497586747e-06, "loss": 0.6845, "step": 1163 }, { "epoch": 0.12, "grad_norm": 2.134809149072051, "learning_rate": 9.77776833638117e-06, "loss": 0.7597, "step": 1164 }, { "epoch": 0.12, "grad_norm": 2.7009490835040846, "learning_rate": 9.777265620108277e-06, "loss": 0.7779, "step": 1165 }, { "epoch": 0.12, "grad_norm": 2.317959306339774, "learning_rate": 9.776762348826474e-06, "loss": 0.7794, "step": 1166 }, { "epoch": 0.12, "grad_norm": 2.109072732996193, "learning_rate": 9.776258522594231e-06, "loss": 0.7258, "step": 1167 }, { "epoch": 0.12, "grad_norm": 2.1691103374743497, "learning_rate": 9.775754141470077e-06, "loss": 0.7235, "step": 1168 }, { "epoch": 0.12, "grad_norm": 3.2658839932977033, "learning_rate": 9.775249205512614e-06, "loss": 0.7238, "step": 1169 }, { "epoch": 0.12, "grad_norm": 2.4497606850003515, "learning_rate": 9.774743714780502e-06, "loss": 0.7825, "step": 1170 }, { "epoch": 0.12, "grad_norm": 2.7201508522085223, "learning_rate": 9.774237669332467e-06, "loss": 0.7096, "step": 1171 }, { "epoch": 0.12, "grad_norm": 3.194344873698593, "learning_rate": 9.7737310692273e-06, "loss": 0.7806, "step": 1172 }, { "epoch": 0.12, "grad_norm": 2.4546802907254675, "learning_rate": 9.77322391452386e-06, "loss": 0.7075, "step": 1173 }, { "epoch": 0.12, "grad_norm": 2.8217799648099753, "learning_rate": 9.772716205281061e-06, "loss": 0.6641, "step": 1174 }, { "epoch": 0.12, "grad_norm": 3.3940574360026314, "learning_rate": 9.772207941557889e-06, "loss": 0.7329, "step": 1175 }, { "epoch": 0.12, "grad_norm": 2.8617824561808356, "learning_rate": 9.771699123413396e-06, "loss": 0.6231, "step": 1176 }, { "epoch": 0.12, "grad_norm": 1.4154376282640087, "learning_rate": 9.77118975090669e-06, "loss": 0.6116, "step": 1177 }, { "epoch": 0.12, "grad_norm": 2.378924743383222, "learning_rate": 9.770679824096952e-06, "loss": 0.6989, "step": 1178 }, { "epoch": 0.12, "grad_norm": 2.7001805333163587, "learning_rate": 9.770169343043423e-06, "loss": 0.7121, "step": 1179 }, { "epoch": 0.12, "grad_norm": 2.5402821969530742, "learning_rate": 9.769658307805408e-06, "loss": 0.7407, "step": 1180 }, { "epoch": 0.12, "grad_norm": 2.4849409843596, "learning_rate": 9.769146718442279e-06, "loss": 0.7695, "step": 1181 }, { "epoch": 0.12, "grad_norm": 2.36462157041008, "learning_rate": 9.76863457501347e-06, "loss": 0.7366, "step": 1182 }, { "epoch": 0.12, "grad_norm": 3.5569947068679664, "learning_rate": 9.76812187757848e-06, "loss": 0.7354, "step": 1183 }, { "epoch": 0.12, "grad_norm": 2.230897787375336, "learning_rate": 9.767608626196877e-06, "loss": 0.6896, "step": 1184 }, { "epoch": 0.12, "grad_norm": 2.322170991929863, "learning_rate": 9.767094820928282e-06, "loss": 0.7377, "step": 1185 }, { "epoch": 0.12, "grad_norm": 4.766972909981728, "learning_rate": 9.766580461832391e-06, "loss": 0.7322, "step": 1186 }, { "epoch": 0.12, "grad_norm": 1.9936899282121772, "learning_rate": 9.766065548968962e-06, "loss": 0.7133, "step": 1187 }, { "epoch": 0.13, "grad_norm": 2.9245738454667074, "learning_rate": 9.765550082397815e-06, "loss": 0.7225, "step": 1188 }, { "epoch": 0.13, "grad_norm": 2.9105025784741505, "learning_rate": 9.765034062178836e-06, "loss": 0.773, "step": 1189 }, { "epoch": 0.13, "grad_norm": 2.3491105179808205, "learning_rate": 9.764517488371971e-06, "loss": 0.7345, "step": 1190 }, { "epoch": 0.13, "grad_norm": 3.042045767945915, "learning_rate": 9.76400036103724e-06, "loss": 0.6984, "step": 1191 }, { "epoch": 0.13, "grad_norm": 2.452386829371169, "learning_rate": 9.763482680234718e-06, "loss": 0.7039, "step": 1192 }, { "epoch": 0.13, "grad_norm": 2.3004751526217735, "learning_rate": 9.762964446024547e-06, "loss": 0.7878, "step": 1193 }, { "epoch": 0.13, "grad_norm": 1.3235197535321637, "learning_rate": 9.762445658466935e-06, "loss": 0.6694, "step": 1194 }, { "epoch": 0.13, "grad_norm": 2.539843716603838, "learning_rate": 9.761926317622154e-06, "loss": 0.741, "step": 1195 }, { "epoch": 0.13, "grad_norm": 2.849640472644429, "learning_rate": 9.761406423550539e-06, "loss": 0.7408, "step": 1196 }, { "epoch": 0.13, "grad_norm": 2.5166258673564514, "learning_rate": 9.760885976312488e-06, "loss": 0.7223, "step": 1197 }, { "epoch": 0.13, "grad_norm": 2.725239057841064, "learning_rate": 9.760364975968469e-06, "loss": 0.7007, "step": 1198 }, { "epoch": 0.13, "grad_norm": 6.7294366804059464, "learning_rate": 9.759843422579005e-06, "loss": 0.6748, "step": 1199 }, { "epoch": 0.13, "grad_norm": 2.7738891360490694, "learning_rate": 9.759321316204693e-06, "loss": 0.6751, "step": 1200 }, { "epoch": 0.13, "grad_norm": 16.17668799017805, "learning_rate": 9.758798656906187e-06, "loss": 0.7039, "step": 1201 }, { "epoch": 0.13, "grad_norm": 2.8538609592861137, "learning_rate": 9.758275444744211e-06, "loss": 0.7457, "step": 1202 }, { "epoch": 0.13, "grad_norm": 2.8412177031201455, "learning_rate": 9.757751679779549e-06, "loss": 0.6996, "step": 1203 }, { "epoch": 0.13, "grad_norm": 2.4794823846058103, "learning_rate": 9.757227362073048e-06, "loss": 0.7336, "step": 1204 }, { "epoch": 0.13, "grad_norm": 1.9919272034069202, "learning_rate": 9.756702491685626e-06, "loss": 0.7235, "step": 1205 }, { "epoch": 0.13, "grad_norm": 2.682936702377847, "learning_rate": 9.756177068678258e-06, "loss": 0.6468, "step": 1206 }, { "epoch": 0.13, "grad_norm": 2.2979583262392227, "learning_rate": 9.755651093111987e-06, "loss": 0.7306, "step": 1207 }, { "epoch": 0.13, "grad_norm": 5.631349641332311, "learning_rate": 9.755124565047918e-06, "loss": 0.7321, "step": 1208 }, { "epoch": 0.13, "grad_norm": 2.09817214920078, "learning_rate": 9.754597484547223e-06, "loss": 0.6638, "step": 1209 }, { "epoch": 0.13, "grad_norm": 2.909553694559456, "learning_rate": 9.754069851671138e-06, "loss": 0.7271, "step": 1210 }, { "epoch": 0.13, "grad_norm": 3.0625344333106828, "learning_rate": 9.753541666480959e-06, "loss": 0.6552, "step": 1211 }, { "epoch": 0.13, "grad_norm": 3.2516687148488987, "learning_rate": 9.75301292903805e-06, "loss": 0.7763, "step": 1212 }, { "epoch": 0.13, "grad_norm": 3.0525856091150914, "learning_rate": 9.752483639403839e-06, "loss": 0.7237, "step": 1213 }, { "epoch": 0.13, "grad_norm": 1.3517703119170414, "learning_rate": 9.751953797639817e-06, "loss": 0.6346, "step": 1214 }, { "epoch": 0.13, "grad_norm": 2.5336050253735287, "learning_rate": 9.751423403807539e-06, "loss": 0.7032, "step": 1215 }, { "epoch": 0.13, "grad_norm": 2.274623839914132, "learning_rate": 9.750892457968626e-06, "loss": 0.7113, "step": 1216 }, { "epoch": 0.13, "grad_norm": 2.502789805441234, "learning_rate": 9.75036096018476e-06, "loss": 0.6948, "step": 1217 }, { "epoch": 0.13, "grad_norm": 2.362716780752687, "learning_rate": 9.749828910517688e-06, "loss": 0.6797, "step": 1218 }, { "epoch": 0.13, "grad_norm": 1.9013855117687954, "learning_rate": 9.749296309029224e-06, "loss": 0.7267, "step": 1219 }, { "epoch": 0.13, "grad_norm": 2.5275074129127404, "learning_rate": 9.748763155781244e-06, "loss": 0.719, "step": 1220 }, { "epoch": 0.13, "grad_norm": 3.7639603698409827, "learning_rate": 9.748229450835689e-06, "loss": 0.6755, "step": 1221 }, { "epoch": 0.13, "grad_norm": 2.8030238737546624, "learning_rate": 9.747695194254561e-06, "loss": 0.7427, "step": 1222 }, { "epoch": 0.13, "grad_norm": 2.820939766808103, "learning_rate": 9.74716038609993e-06, "loss": 0.7745, "step": 1223 }, { "epoch": 0.13, "grad_norm": 2.48939064756324, "learning_rate": 9.746625026433929e-06, "loss": 0.6752, "step": 1224 }, { "epoch": 0.13, "grad_norm": 2.7729898492722955, "learning_rate": 9.746089115318751e-06, "loss": 0.6633, "step": 1225 }, { "epoch": 0.13, "grad_norm": 2.5642579652047615, "learning_rate": 9.745552652816662e-06, "loss": 0.6911, "step": 1226 }, { "epoch": 0.13, "grad_norm": 2.479378462117706, "learning_rate": 9.74501563898998e-06, "loss": 0.6961, "step": 1227 }, { "epoch": 0.13, "grad_norm": 3.517261027107456, "learning_rate": 9.744478073901102e-06, "loss": 0.6875, "step": 1228 }, { "epoch": 0.13, "grad_norm": 4.143063814038163, "learning_rate": 9.743939957612473e-06, "loss": 0.7862, "step": 1229 }, { "epoch": 0.13, "grad_norm": 2.186066134453419, "learning_rate": 9.743401290186615e-06, "loss": 0.6905, "step": 1230 }, { "epoch": 0.13, "grad_norm": 2.4988157638795383, "learning_rate": 9.742862071686105e-06, "loss": 0.6065, "step": 1231 }, { "epoch": 0.13, "grad_norm": 2.944980331009129, "learning_rate": 9.742322302173591e-06, "loss": 0.8424, "step": 1232 }, { "epoch": 0.13, "grad_norm": 2.4840746884722997, "learning_rate": 9.74178198171178e-06, "loss": 0.7242, "step": 1233 }, { "epoch": 0.13, "grad_norm": 2.3564467576176615, "learning_rate": 9.741241110363446e-06, "loss": 0.5671, "step": 1234 }, { "epoch": 0.13, "grad_norm": 2.5428398872084914, "learning_rate": 9.740699688191426e-06, "loss": 0.7558, "step": 1235 }, { "epoch": 0.13, "grad_norm": 2.987401358325348, "learning_rate": 9.74015771525862e-06, "loss": 0.7976, "step": 1236 }, { "epoch": 0.13, "grad_norm": 2.288527677436737, "learning_rate": 9.73961519162799e-06, "loss": 0.7495, "step": 1237 }, { "epoch": 0.13, "grad_norm": 3.203656023458977, "learning_rate": 9.739072117362572e-06, "loss": 0.6546, "step": 1238 }, { "epoch": 0.13, "grad_norm": 2.893448106608211, "learning_rate": 9.738528492525454e-06, "loss": 0.7536, "step": 1239 }, { "epoch": 0.13, "grad_norm": 2.562798821263755, "learning_rate": 9.737984317179792e-06, "loss": 0.6901, "step": 1240 }, { "epoch": 0.13, "grad_norm": 2.525831897954803, "learning_rate": 9.737439591388808e-06, "loss": 0.7072, "step": 1241 }, { "epoch": 0.13, "grad_norm": 2.390376616064533, "learning_rate": 9.736894315215788e-06, "loss": 0.6709, "step": 1242 }, { "epoch": 0.13, "grad_norm": 2.290595026707291, "learning_rate": 9.736348488724078e-06, "loss": 0.7022, "step": 1243 }, { "epoch": 0.13, "grad_norm": 3.0135726421842928, "learning_rate": 9.735802111977093e-06, "loss": 0.7398, "step": 1244 }, { "epoch": 0.13, "grad_norm": 6.4970745285376745, "learning_rate": 9.735255185038308e-06, "loss": 0.7238, "step": 1245 }, { "epoch": 0.13, "grad_norm": 2.5294324112992945, "learning_rate": 9.734707707971265e-06, "loss": 0.6654, "step": 1246 }, { "epoch": 0.13, "grad_norm": 4.769086981727464, "learning_rate": 9.734159680839566e-06, "loss": 0.7625, "step": 1247 }, { "epoch": 0.13, "grad_norm": 2.425192205003877, "learning_rate": 9.733611103706882e-06, "loss": 0.7288, "step": 1248 }, { "epoch": 0.13, "grad_norm": 2.4432588819312016, "learning_rate": 9.73306197663694e-06, "loss": 0.7453, "step": 1249 }, { "epoch": 0.13, "grad_norm": 2.4916017795323224, "learning_rate": 9.732512299693542e-06, "loss": 0.6687, "step": 1250 }, { "epoch": 0.13, "grad_norm": 6.171083266500421, "learning_rate": 9.731962072940545e-06, "loss": 0.7657, "step": 1251 }, { "epoch": 0.13, "grad_norm": 2.2828106585662606, "learning_rate": 9.731411296441873e-06, "loss": 0.7871, "step": 1252 }, { "epoch": 0.13, "grad_norm": 6.2796867996521595, "learning_rate": 9.730859970261514e-06, "loss": 0.7616, "step": 1253 }, { "epoch": 0.13, "grad_norm": 2.9365596197667454, "learning_rate": 9.730308094463519e-06, "loss": 0.7857, "step": 1254 }, { "epoch": 0.13, "grad_norm": 2.571904631777549, "learning_rate": 9.729755669112003e-06, "loss": 0.7116, "step": 1255 }, { "epoch": 0.13, "grad_norm": 2.5467525821131054, "learning_rate": 9.729202694271145e-06, "loss": 0.6199, "step": 1256 }, { "epoch": 0.13, "grad_norm": 2.2962244446782196, "learning_rate": 9.72864917000519e-06, "loss": 0.758, "step": 1257 }, { "epoch": 0.13, "grad_norm": 3.140094567957693, "learning_rate": 9.728095096378443e-06, "loss": 0.7457, "step": 1258 }, { "epoch": 0.13, "grad_norm": 2.4307560819993066, "learning_rate": 9.727540473455277e-06, "loss": 0.7374, "step": 1259 }, { "epoch": 0.13, "grad_norm": 3.4574501390436145, "learning_rate": 9.726985301300122e-06, "loss": 0.7201, "step": 1260 }, { "epoch": 0.13, "grad_norm": 2.5770663455788885, "learning_rate": 9.72642957997748e-06, "loss": 0.6912, "step": 1261 }, { "epoch": 0.13, "grad_norm": 2.6051783139637252, "learning_rate": 9.725873309551915e-06, "loss": 0.7906, "step": 1262 }, { "epoch": 0.13, "grad_norm": 2.6247042237572757, "learning_rate": 9.725316490088046e-06, "loss": 0.7596, "step": 1263 }, { "epoch": 0.13, "grad_norm": 2.208978552737289, "learning_rate": 9.724759121650569e-06, "loss": 0.6893, "step": 1264 }, { "epoch": 0.13, "grad_norm": 2.818237537685943, "learning_rate": 9.724201204304234e-06, "loss": 0.6509, "step": 1265 }, { "epoch": 0.13, "grad_norm": 3.516551806674273, "learning_rate": 9.72364273811386e-06, "loss": 0.8091, "step": 1266 }, { "epoch": 0.13, "grad_norm": 2.8775423326241003, "learning_rate": 9.723083723144326e-06, "loss": 0.6664, "step": 1267 }, { "epoch": 0.13, "grad_norm": 3.1104989369101292, "learning_rate": 9.722524159460579e-06, "loss": 0.7484, "step": 1268 }, { "epoch": 0.13, "grad_norm": 2.284433621967399, "learning_rate": 9.721964047127627e-06, "loss": 0.7694, "step": 1269 }, { "epoch": 0.13, "grad_norm": 3.528762803348098, "learning_rate": 9.721403386210542e-06, "loss": 0.7047, "step": 1270 }, { "epoch": 0.13, "grad_norm": 2.3698967383567053, "learning_rate": 9.720842176774458e-06, "loss": 0.7303, "step": 1271 }, { "epoch": 0.13, "grad_norm": 2.2862180267688195, "learning_rate": 9.720280418884578e-06, "loss": 0.6719, "step": 1272 }, { "epoch": 0.13, "grad_norm": 2.585341957705507, "learning_rate": 9.719718112606163e-06, "loss": 0.6893, "step": 1273 }, { "epoch": 0.13, "grad_norm": 2.931292145704906, "learning_rate": 9.719155258004542e-06, "loss": 0.7129, "step": 1274 }, { "epoch": 0.13, "grad_norm": 2.930050695251463, "learning_rate": 9.718591855145105e-06, "loss": 0.7024, "step": 1275 }, { "epoch": 0.13, "grad_norm": 2.3725890130265377, "learning_rate": 9.718027904093306e-06, "loss": 0.6779, "step": 1276 }, { "epoch": 0.13, "grad_norm": 2.667713730201913, "learning_rate": 9.717463404914661e-06, "loss": 0.7128, "step": 1277 }, { "epoch": 0.13, "grad_norm": 2.4476996596913376, "learning_rate": 9.716898357674757e-06, "loss": 0.725, "step": 1278 }, { "epoch": 0.13, "grad_norm": 2.286403952573892, "learning_rate": 9.716332762439238e-06, "loss": 0.6882, "step": 1279 }, { "epoch": 0.13, "grad_norm": 2.187379321806065, "learning_rate": 9.71576661927381e-06, "loss": 0.6514, "step": 1280 }, { "epoch": 0.13, "grad_norm": 8.048461731248665, "learning_rate": 9.71519992824425e-06, "loss": 0.69, "step": 1281 }, { "epoch": 0.13, "grad_norm": 2.4471121642879634, "learning_rate": 9.714632689416392e-06, "loss": 0.6853, "step": 1282 }, { "epoch": 0.14, "grad_norm": 2.3522287088330436, "learning_rate": 9.714064902856136e-06, "loss": 0.6857, "step": 1283 }, { "epoch": 0.14, "grad_norm": 2.8039331155505693, "learning_rate": 9.713496568629447e-06, "loss": 0.7953, "step": 1284 }, { "epoch": 0.14, "grad_norm": 2.5431568191768257, "learning_rate": 9.71292768680235e-06, "loss": 0.6906, "step": 1285 }, { "epoch": 0.14, "grad_norm": 2.734010132334214, "learning_rate": 9.712358257440942e-06, "loss": 0.7469, "step": 1286 }, { "epoch": 0.14, "grad_norm": 2.766837316455564, "learning_rate": 9.711788280611371e-06, "loss": 0.7425, "step": 1287 }, { "epoch": 0.14, "grad_norm": 2.2607094221443815, "learning_rate": 9.711217756379859e-06, "loss": 0.6745, "step": 1288 }, { "epoch": 0.14, "grad_norm": 2.718072945572998, "learning_rate": 9.710646684812686e-06, "loss": 0.6632, "step": 1289 }, { "epoch": 0.14, "grad_norm": 2.220340692493552, "learning_rate": 9.7100750659762e-06, "loss": 0.7289, "step": 1290 }, { "epoch": 0.14, "grad_norm": 3.630360305784288, "learning_rate": 9.709502899936805e-06, "loss": 0.6569, "step": 1291 }, { "epoch": 0.14, "grad_norm": 2.7311986179292194, "learning_rate": 9.70893018676098e-06, "loss": 0.7763, "step": 1292 }, { "epoch": 0.14, "grad_norm": 2.292026620226449, "learning_rate": 9.708356926515256e-06, "loss": 0.7046, "step": 1293 }, { "epoch": 0.14, "grad_norm": 3.099523665954287, "learning_rate": 9.707783119266236e-06, "loss": 0.7467, "step": 1294 }, { "epoch": 0.14, "grad_norm": 2.9843246706602047, "learning_rate": 9.707208765080583e-06, "loss": 0.7871, "step": 1295 }, { "epoch": 0.14, "grad_norm": 2.603495110130337, "learning_rate": 9.706633864025021e-06, "loss": 0.6149, "step": 1296 }, { "epoch": 0.14, "grad_norm": 2.9957523727453244, "learning_rate": 9.706058416166342e-06, "loss": 0.6866, "step": 1297 }, { "epoch": 0.14, "grad_norm": 3.4581527501195923, "learning_rate": 9.705482421571401e-06, "loss": 0.7306, "step": 1298 }, { "epoch": 0.14, "grad_norm": 2.75978405172835, "learning_rate": 9.704905880307113e-06, "loss": 0.704, "step": 1299 }, { "epoch": 0.14, "grad_norm": 2.6219298892401675, "learning_rate": 9.704328792440462e-06, "loss": 0.7072, "step": 1300 }, { "epoch": 0.14, "grad_norm": 2.1583848744671967, "learning_rate": 9.70375115803849e-06, "loss": 0.6726, "step": 1301 }, { "epoch": 0.14, "grad_norm": 2.7991267040996943, "learning_rate": 9.703172977168307e-06, "loss": 0.6575, "step": 1302 }, { "epoch": 0.14, "grad_norm": 9.725583520285742, "learning_rate": 9.702594249897082e-06, "loss": 0.735, "step": 1303 }, { "epoch": 0.14, "grad_norm": 3.2733331053327124, "learning_rate": 9.70201497629205e-06, "loss": 0.7678, "step": 1304 }, { "epoch": 0.14, "grad_norm": 3.3225063544881346, "learning_rate": 9.701435156420511e-06, "loss": 0.6528, "step": 1305 }, { "epoch": 0.14, "grad_norm": 2.5695618934438342, "learning_rate": 9.700854790349826e-06, "loss": 0.7385, "step": 1306 }, { "epoch": 0.14, "grad_norm": 2.165748397571203, "learning_rate": 9.700273878147419e-06, "loss": 0.6923, "step": 1307 }, { "epoch": 0.14, "grad_norm": 3.946583806133681, "learning_rate": 9.699692419880782e-06, "loss": 0.6818, "step": 1308 }, { "epoch": 0.14, "grad_norm": 2.7377235660563417, "learning_rate": 9.699110415617464e-06, "loss": 0.6344, "step": 1309 }, { "epoch": 0.14, "grad_norm": 4.216540668421752, "learning_rate": 9.698527865425083e-06, "loss": 0.7325, "step": 1310 }, { "epoch": 0.14, "grad_norm": 2.5772415638304755, "learning_rate": 9.697944769371315e-06, "loss": 0.6738, "step": 1311 }, { "epoch": 0.14, "grad_norm": 3.4296412096784854, "learning_rate": 9.697361127523905e-06, "loss": 0.7011, "step": 1312 }, { "epoch": 0.14, "grad_norm": 3.0442601673343894, "learning_rate": 9.696776939950657e-06, "loss": 0.7084, "step": 1313 }, { "epoch": 0.14, "grad_norm": 3.075407781881538, "learning_rate": 9.696192206719441e-06, "loss": 0.7175, "step": 1314 }, { "epoch": 0.14, "grad_norm": 2.190682088347276, "learning_rate": 9.69560692789819e-06, "loss": 0.718, "step": 1315 }, { "epoch": 0.14, "grad_norm": 3.203702468047881, "learning_rate": 9.695021103554901e-06, "loss": 0.78, "step": 1316 }, { "epoch": 0.14, "grad_norm": 2.334461505397333, "learning_rate": 9.694434733757632e-06, "loss": 0.7062, "step": 1317 }, { "epoch": 0.14, "grad_norm": 2.3787385280317968, "learning_rate": 9.693847818574504e-06, "loss": 0.6883, "step": 1318 }, { "epoch": 0.14, "grad_norm": 1.7061551304241709, "learning_rate": 9.693260358073707e-06, "loss": 0.7412, "step": 1319 }, { "epoch": 0.14, "grad_norm": 3.5189339975333427, "learning_rate": 9.692672352323486e-06, "loss": 0.7493, "step": 1320 }, { "epoch": 0.14, "grad_norm": 2.718227304089354, "learning_rate": 9.692083801392158e-06, "loss": 0.7298, "step": 1321 }, { "epoch": 0.14, "grad_norm": 5.863146583225161, "learning_rate": 9.691494705348097e-06, "loss": 0.685, "step": 1322 }, { "epoch": 0.14, "grad_norm": 2.224062335592531, "learning_rate": 9.690905064259744e-06, "loss": 0.7131, "step": 1323 }, { "epoch": 0.14, "grad_norm": 2.3482124963584043, "learning_rate": 9.690314878195599e-06, "loss": 0.7642, "step": 1324 }, { "epoch": 0.14, "grad_norm": 2.802943387618265, "learning_rate": 9.68972414722423e-06, "loss": 0.7402, "step": 1325 }, { "epoch": 0.14, "grad_norm": 6.439325155682455, "learning_rate": 9.689132871414266e-06, "loss": 0.6827, "step": 1326 }, { "epoch": 0.14, "grad_norm": 2.283544545333155, "learning_rate": 9.688541050834402e-06, "loss": 0.7649, "step": 1327 }, { "epoch": 0.14, "grad_norm": 2.8881726166539816, "learning_rate": 9.68794868555339e-06, "loss": 0.7581, "step": 1328 }, { "epoch": 0.14, "grad_norm": 2.5386184576990725, "learning_rate": 9.687355775640052e-06, "loss": 0.7252, "step": 1329 }, { "epoch": 0.14, "grad_norm": 2.6298695027488703, "learning_rate": 9.68676232116327e-06, "loss": 0.7742, "step": 1330 }, { "epoch": 0.14, "grad_norm": 2.084061296828642, "learning_rate": 9.686168322191988e-06, "loss": 0.6, "step": 1331 }, { "epoch": 0.14, "grad_norm": 2.3716239247862054, "learning_rate": 9.685573778795218e-06, "loss": 0.5694, "step": 1332 }, { "epoch": 0.14, "grad_norm": 3.2053716555139404, "learning_rate": 9.684978691042031e-06, "loss": 0.7231, "step": 1333 }, { "epoch": 0.14, "grad_norm": 2.844182707802076, "learning_rate": 9.684383059001562e-06, "loss": 0.6835, "step": 1334 }, { "epoch": 0.14, "grad_norm": 3.390207972128154, "learning_rate": 9.68378688274301e-06, "loss": 0.7385, "step": 1335 }, { "epoch": 0.14, "grad_norm": 2.4980529568022267, "learning_rate": 9.683190162335638e-06, "loss": 0.8148, "step": 1336 }, { "epoch": 0.14, "grad_norm": 4.308242765317922, "learning_rate": 9.68259289784877e-06, "loss": 0.604, "step": 1337 }, { "epoch": 0.14, "grad_norm": 2.5111104713428425, "learning_rate": 9.681995089351797e-06, "loss": 0.6423, "step": 1338 }, { "epoch": 0.14, "grad_norm": 2.184046389884775, "learning_rate": 9.681396736914169e-06, "loss": 0.8212, "step": 1339 }, { "epoch": 0.14, "grad_norm": 2.3349747833725316, "learning_rate": 9.680797840605398e-06, "loss": 0.724, "step": 1340 }, { "epoch": 0.14, "grad_norm": 3.357395284413713, "learning_rate": 9.680198400495067e-06, "loss": 0.7505, "step": 1341 }, { "epoch": 0.14, "grad_norm": 3.237004929584163, "learning_rate": 9.679598416652814e-06, "loss": 0.7294, "step": 1342 }, { "epoch": 0.14, "grad_norm": 2.3140384711893978, "learning_rate": 9.678997889148342e-06, "loss": 0.7354, "step": 1343 }, { "epoch": 0.14, "grad_norm": 2.396103673854701, "learning_rate": 9.678396818051423e-06, "loss": 0.7875, "step": 1344 }, { "epoch": 0.14, "grad_norm": 2.4741313274912935, "learning_rate": 9.677795203431886e-06, "loss": 0.7377, "step": 1345 }, { "epoch": 0.14, "grad_norm": 3.4119548772690003, "learning_rate": 9.677193045359626e-06, "loss": 0.6744, "step": 1346 }, { "epoch": 0.14, "grad_norm": 3.0195566082707535, "learning_rate": 9.676590343904595e-06, "loss": 0.5983, "step": 1347 }, { "epoch": 0.14, "grad_norm": 2.429934472959647, "learning_rate": 9.675987099136817e-06, "loss": 0.6282, "step": 1348 }, { "epoch": 0.14, "grad_norm": 2.399876917459758, "learning_rate": 9.675383311126376e-06, "loss": 0.7758, "step": 1349 }, { "epoch": 0.14, "grad_norm": 2.3597263333081977, "learning_rate": 9.674778979943417e-06, "loss": 0.7706, "step": 1350 }, { "epoch": 0.14, "grad_norm": 2.582735032637409, "learning_rate": 9.67417410565815e-06, "loss": 0.6888, "step": 1351 }, { "epoch": 0.14, "grad_norm": 1.2457596734399106, "learning_rate": 9.673568688340846e-06, "loss": 0.6083, "step": 1352 }, { "epoch": 0.14, "grad_norm": 2.2475959881242735, "learning_rate": 9.672962728061842e-06, "loss": 0.7384, "step": 1353 }, { "epoch": 0.14, "grad_norm": 2.7160143206592346, "learning_rate": 9.672356224891536e-06, "loss": 0.6937, "step": 1354 }, { "epoch": 0.14, "grad_norm": 2.5502591319299843, "learning_rate": 9.671749178900392e-06, "loss": 0.7563, "step": 1355 }, { "epoch": 0.14, "grad_norm": 2.6327155581660455, "learning_rate": 9.67114159015893e-06, "loss": 0.6819, "step": 1356 }, { "epoch": 0.14, "grad_norm": 2.131038039139984, "learning_rate": 9.670533458737744e-06, "loss": 0.7373, "step": 1357 }, { "epoch": 0.14, "grad_norm": 3.385584377854225, "learning_rate": 9.66992478470748e-06, "loss": 0.7657, "step": 1358 }, { "epoch": 0.14, "grad_norm": 7.320376159534197, "learning_rate": 9.669315568138854e-06, "loss": 0.6898, "step": 1359 }, { "epoch": 0.14, "grad_norm": 2.8074892746810742, "learning_rate": 9.668705809102644e-06, "loss": 0.6807, "step": 1360 }, { "epoch": 0.14, "grad_norm": 2.8098437749259717, "learning_rate": 9.668095507669688e-06, "loss": 0.6959, "step": 1361 }, { "epoch": 0.14, "grad_norm": 2.7943446077345855, "learning_rate": 9.66748466391089e-06, "loss": 0.7087, "step": 1362 }, { "epoch": 0.14, "grad_norm": 6.059820137036976, "learning_rate": 9.666873277897216e-06, "loss": 0.6634, "step": 1363 }, { "epoch": 0.14, "grad_norm": 2.575688586030959, "learning_rate": 9.666261349699696e-06, "loss": 0.6982, "step": 1364 }, { "epoch": 0.14, "grad_norm": 2.262608946469177, "learning_rate": 9.66564887938942e-06, "loss": 0.8071, "step": 1365 }, { "epoch": 0.14, "grad_norm": 2.311694063851022, "learning_rate": 9.665035867037545e-06, "loss": 0.7185, "step": 1366 }, { "epoch": 0.14, "grad_norm": 2.1721646707712954, "learning_rate": 9.66442231271529e-06, "loss": 0.6839, "step": 1367 }, { "epoch": 0.14, "grad_norm": 2.056959685399615, "learning_rate": 9.663808216493931e-06, "loss": 0.6246, "step": 1368 }, { "epoch": 0.14, "grad_norm": 3.211417541737079, "learning_rate": 9.663193578444815e-06, "loss": 0.7382, "step": 1369 }, { "epoch": 0.14, "grad_norm": 3.482753174472199, "learning_rate": 9.662578398639353e-06, "loss": 0.7261, "step": 1370 }, { "epoch": 0.14, "grad_norm": 2.8419175901293308, "learning_rate": 9.661962677149007e-06, "loss": 0.7479, "step": 1371 }, { "epoch": 0.14, "grad_norm": 1.562383209374972, "learning_rate": 9.661346414045315e-06, "loss": 0.6338, "step": 1372 }, { "epoch": 0.14, "grad_norm": 2.1133024703227266, "learning_rate": 9.66072960939987e-06, "loss": 0.8398, "step": 1373 }, { "epoch": 0.14, "grad_norm": 2.209697515295642, "learning_rate": 9.660112263284334e-06, "loss": 0.6979, "step": 1374 }, { "epoch": 0.14, "grad_norm": 4.316723331737145, "learning_rate": 9.659494375770424e-06, "loss": 0.7773, "step": 1375 }, { "epoch": 0.14, "grad_norm": 2.3531621544495867, "learning_rate": 9.65887594692993e-06, "loss": 0.7506, "step": 1376 }, { "epoch": 0.14, "grad_norm": 5.6434780012213634, "learning_rate": 9.658256976834692e-06, "loss": 0.7223, "step": 1377 }, { "epoch": 0.15, "grad_norm": 2.899949137691732, "learning_rate": 9.657637465556626e-06, "loss": 0.7059, "step": 1378 }, { "epoch": 0.15, "grad_norm": 3.154298474199794, "learning_rate": 9.657017413167702e-06, "loss": 0.6951, "step": 1379 }, { "epoch": 0.15, "grad_norm": 2.762468831498145, "learning_rate": 9.656396819739959e-06, "loss": 0.7888, "step": 1380 }, { "epoch": 0.15, "grad_norm": 1.3312043862215792, "learning_rate": 9.655775685345493e-06, "loss": 0.6494, "step": 1381 }, { "epoch": 0.15, "grad_norm": 3.5501629949975944, "learning_rate": 9.655154010056464e-06, "loss": 0.8004, "step": 1382 }, { "epoch": 0.15, "grad_norm": 4.64326272620766, "learning_rate": 9.654531793945102e-06, "loss": 0.7567, "step": 1383 }, { "epoch": 0.15, "grad_norm": 2.575524144576267, "learning_rate": 9.653909037083689e-06, "loss": 0.7945, "step": 1384 }, { "epoch": 0.15, "grad_norm": 2.9295057714053, "learning_rate": 9.653285739544578e-06, "loss": 0.7318, "step": 1385 }, { "epoch": 0.15, "grad_norm": 2.0997333286555233, "learning_rate": 9.65266190140018e-06, "loss": 0.6631, "step": 1386 }, { "epoch": 0.15, "grad_norm": 2.2304269290377787, "learning_rate": 9.652037522722974e-06, "loss": 0.6277, "step": 1387 }, { "epoch": 0.15, "grad_norm": 4.44783772978294, "learning_rate": 9.651412603585495e-06, "loss": 0.7528, "step": 1388 }, { "epoch": 0.15, "grad_norm": 2.178234675495518, "learning_rate": 9.650787144060345e-06, "loss": 0.753, "step": 1389 }, { "epoch": 0.15, "grad_norm": 2.2283048639192446, "learning_rate": 9.65016114422019e-06, "loss": 0.7198, "step": 1390 }, { "epoch": 0.15, "grad_norm": 2.4993099361247664, "learning_rate": 9.649534604137755e-06, "loss": 0.7052, "step": 1391 }, { "epoch": 0.15, "grad_norm": 3.1995602061556725, "learning_rate": 9.64890752388583e-06, "loss": 0.7257, "step": 1392 }, { "epoch": 0.15, "grad_norm": 2.2307125057623227, "learning_rate": 9.648279903537268e-06, "loss": 0.7356, "step": 1393 }, { "epoch": 0.15, "grad_norm": 2.1622062738792347, "learning_rate": 9.647651743164983e-06, "loss": 0.6852, "step": 1394 }, { "epoch": 0.15, "grad_norm": 3.3927400290041, "learning_rate": 9.647023042841953e-06, "loss": 0.6911, "step": 1395 }, { "epoch": 0.15, "grad_norm": 4.504113313434371, "learning_rate": 9.64639380264122e-06, "loss": 0.7242, "step": 1396 }, { "epoch": 0.15, "grad_norm": 2.837839765380396, "learning_rate": 9.645764022635886e-06, "loss": 0.6788, "step": 1397 }, { "epoch": 0.15, "grad_norm": 2.2209452355398236, "learning_rate": 9.645133702899116e-06, "loss": 0.6795, "step": 1398 }, { "epoch": 0.15, "grad_norm": 3.3171450711888, "learning_rate": 9.644502843504141e-06, "loss": 0.7098, "step": 1399 }, { "epoch": 0.15, "grad_norm": 2.433039777766387, "learning_rate": 9.643871444524253e-06, "loss": 0.7227, "step": 1400 }, { "epoch": 0.15, "grad_norm": 2.572768002891731, "learning_rate": 9.643239506032805e-06, "loss": 0.7593, "step": 1401 }, { "epoch": 0.15, "grad_norm": 2.125858662151184, "learning_rate": 9.642607028103212e-06, "loss": 0.7119, "step": 1402 }, { "epoch": 0.15, "grad_norm": 2.685061632087422, "learning_rate": 9.641974010808954e-06, "loss": 0.6543, "step": 1403 }, { "epoch": 0.15, "grad_norm": 2.22659599285853, "learning_rate": 9.641340454223576e-06, "loss": 0.7721, "step": 1404 }, { "epoch": 0.15, "grad_norm": 3.4662300157631596, "learning_rate": 9.64070635842068e-06, "loss": 0.7117, "step": 1405 }, { "epoch": 0.15, "grad_norm": 2.643822549271141, "learning_rate": 9.640071723473934e-06, "loss": 0.7299, "step": 1406 }, { "epoch": 0.15, "grad_norm": 2.595323686997289, "learning_rate": 9.639436549457069e-06, "loss": 0.7492, "step": 1407 }, { "epoch": 0.15, "grad_norm": 2.4646272228107784, "learning_rate": 9.638800836443875e-06, "loss": 0.7249, "step": 1408 }, { "epoch": 0.15, "grad_norm": 2.889739658027975, "learning_rate": 9.638164584508211e-06, "loss": 0.7073, "step": 1409 }, { "epoch": 0.15, "grad_norm": 2.731955437980805, "learning_rate": 9.637527793723993e-06, "loss": 0.6637, "step": 1410 }, { "epoch": 0.15, "grad_norm": 7.330236462820327, "learning_rate": 9.6368904641652e-06, "loss": 0.6849, "step": 1411 }, { "epoch": 0.15, "grad_norm": 2.09841411709079, "learning_rate": 9.63625259590588e-06, "loss": 0.7158, "step": 1412 }, { "epoch": 0.15, "grad_norm": 2.629199349370734, "learning_rate": 9.635614189020133e-06, "loss": 0.7321, "step": 1413 }, { "epoch": 0.15, "grad_norm": 2.344946491188394, "learning_rate": 9.63497524358213e-06, "loss": 0.746, "step": 1414 }, { "epoch": 0.15, "grad_norm": 2.6910188223643856, "learning_rate": 9.6343357596661e-06, "loss": 0.6378, "step": 1415 }, { "epoch": 0.15, "grad_norm": 3.2643901325386224, "learning_rate": 9.633695737346341e-06, "loss": 0.7012, "step": 1416 }, { "epoch": 0.15, "grad_norm": 2.741008316052005, "learning_rate": 9.633055176697205e-06, "loss": 0.7338, "step": 1417 }, { "epoch": 0.15, "grad_norm": 3.204256419036011, "learning_rate": 9.632414077793111e-06, "loss": 0.7241, "step": 1418 }, { "epoch": 0.15, "grad_norm": 2.440995779151848, "learning_rate": 9.63177244070854e-06, "loss": 0.6625, "step": 1419 }, { "epoch": 0.15, "grad_norm": 2.5374105809523626, "learning_rate": 9.631130265518036e-06, "loss": 0.7329, "step": 1420 }, { "epoch": 0.15, "grad_norm": 2.46110371173565, "learning_rate": 9.630487552296207e-06, "loss": 0.7371, "step": 1421 }, { "epoch": 0.15, "grad_norm": 3.3468353330936393, "learning_rate": 9.629844301117717e-06, "loss": 0.6405, "step": 1422 }, { "epoch": 0.15, "grad_norm": 2.397830492051804, "learning_rate": 9.6292005120573e-06, "loss": 0.6774, "step": 1423 }, { "epoch": 0.15, "grad_norm": 3.7532545714870977, "learning_rate": 9.62855618518975e-06, "loss": 0.7351, "step": 1424 }, { "epoch": 0.15, "grad_norm": 3.1816654437320833, "learning_rate": 9.627911320589922e-06, "loss": 0.7497, "step": 1425 }, { "epoch": 0.15, "grad_norm": 2.4023123862610016, "learning_rate": 9.627265918332734e-06, "loss": 0.7702, "step": 1426 }, { "epoch": 0.15, "grad_norm": 2.6310701737673368, "learning_rate": 9.626619978493168e-06, "loss": 0.7071, "step": 1427 }, { "epoch": 0.15, "grad_norm": 2.44463817108983, "learning_rate": 9.62597350114627e-06, "loss": 0.6781, "step": 1428 }, { "epoch": 0.15, "grad_norm": 2.4602674966430977, "learning_rate": 9.625326486367139e-06, "loss": 0.7439, "step": 1429 }, { "epoch": 0.15, "grad_norm": 2.4828542109646614, "learning_rate": 9.624678934230948e-06, "loss": 0.7377, "step": 1430 }, { "epoch": 0.15, "grad_norm": 2.846123388535948, "learning_rate": 9.624030844812926e-06, "loss": 0.6388, "step": 1431 }, { "epoch": 0.15, "grad_norm": 2.1757720444416084, "learning_rate": 9.623382218188371e-06, "loss": 0.6985, "step": 1432 }, { "epoch": 0.15, "grad_norm": 3.259786537034406, "learning_rate": 9.622733054432631e-06, "loss": 0.7484, "step": 1433 }, { "epoch": 0.15, "grad_norm": 2.212671951693864, "learning_rate": 9.62208335362113e-06, "loss": 0.6293, "step": 1434 }, { "epoch": 0.15, "grad_norm": 2.818588711614401, "learning_rate": 9.621433115829344e-06, "loss": 0.7193, "step": 1435 }, { "epoch": 0.15, "grad_norm": 4.029835287597727, "learning_rate": 9.62078234113282e-06, "loss": 0.6895, "step": 1436 }, { "epoch": 0.15, "grad_norm": 2.1696773604444783, "learning_rate": 9.62013102960716e-06, "loss": 0.7032, "step": 1437 }, { "epoch": 0.15, "grad_norm": 3.5749029914926895, "learning_rate": 9.619479181328034e-06, "loss": 0.6764, "step": 1438 }, { "epoch": 0.15, "grad_norm": 2.8794399028585693, "learning_rate": 9.618826796371168e-06, "loss": 0.7734, "step": 1439 }, { "epoch": 0.15, "grad_norm": 2.516706221446295, "learning_rate": 9.618173874812357e-06, "loss": 0.7291, "step": 1440 }, { "epoch": 0.15, "grad_norm": 3.1628068758073637, "learning_rate": 9.617520416727456e-06, "loss": 0.7082, "step": 1441 }, { "epoch": 0.15, "grad_norm": 2.8419799887337898, "learning_rate": 9.61686642219238e-06, "loss": 0.7383, "step": 1442 }, { "epoch": 0.15, "grad_norm": 2.6343772160057055, "learning_rate": 9.616211891283108e-06, "loss": 0.6451, "step": 1443 }, { "epoch": 0.15, "grad_norm": 3.1705848350056636, "learning_rate": 9.615556824075684e-06, "loss": 0.7876, "step": 1444 }, { "epoch": 0.15, "grad_norm": 3.483339076402114, "learning_rate": 9.61490122064621e-06, "loss": 0.7049, "step": 1445 }, { "epoch": 0.15, "grad_norm": 2.0799562658988946, "learning_rate": 9.614245081070851e-06, "loss": 0.7073, "step": 1446 }, { "epoch": 0.15, "grad_norm": 2.6861424428613367, "learning_rate": 9.61358840542584e-06, "loss": 0.6411, "step": 1447 }, { "epoch": 0.15, "grad_norm": 2.7191955155420042, "learning_rate": 9.612931193787464e-06, "loss": 0.6745, "step": 1448 }, { "epoch": 0.15, "grad_norm": 6.472474388518654, "learning_rate": 9.612273446232075e-06, "loss": 0.7655, "step": 1449 }, { "epoch": 0.15, "grad_norm": 2.9357438196561434, "learning_rate": 9.61161516283609e-06, "loss": 0.6609, "step": 1450 }, { "epoch": 0.15, "grad_norm": 2.590924049130375, "learning_rate": 9.610956343675988e-06, "loss": 0.7238, "step": 1451 }, { "epoch": 0.15, "grad_norm": 5.879538354937373, "learning_rate": 9.610296988828305e-06, "loss": 0.7834, "step": 1452 }, { "epoch": 0.15, "grad_norm": 3.1041931126773212, "learning_rate": 9.609637098369646e-06, "loss": 0.6807, "step": 1453 }, { "epoch": 0.15, "grad_norm": 2.261974070639065, "learning_rate": 9.608976672376673e-06, "loss": 0.7105, "step": 1454 }, { "epoch": 0.15, "grad_norm": 3.1210663189380314, "learning_rate": 9.608315710926113e-06, "loss": 0.6593, "step": 1455 }, { "epoch": 0.15, "grad_norm": 3.050620721243887, "learning_rate": 9.607654214094757e-06, "loss": 0.6594, "step": 1456 }, { "epoch": 0.15, "grad_norm": 2.7905140014141727, "learning_rate": 9.606992181959451e-06, "loss": 0.7085, "step": 1457 }, { "epoch": 0.15, "grad_norm": 2.7770890627009153, "learning_rate": 9.606329614597114e-06, "loss": 0.7159, "step": 1458 }, { "epoch": 0.15, "grad_norm": 1.9828864391785557, "learning_rate": 9.605666512084716e-06, "loss": 0.6708, "step": 1459 }, { "epoch": 0.15, "grad_norm": 2.266774311682029, "learning_rate": 9.605002874499296e-06, "loss": 0.6211, "step": 1460 }, { "epoch": 0.15, "grad_norm": 11.988026431087897, "learning_rate": 9.604338701917954e-06, "loss": 0.7906, "step": 1461 }, { "epoch": 0.15, "grad_norm": 5.5367891929196, "learning_rate": 9.603673994417853e-06, "loss": 0.6816, "step": 1462 }, { "epoch": 0.15, "grad_norm": 2.3227720167759194, "learning_rate": 9.603008752076213e-06, "loss": 0.7428, "step": 1463 }, { "epoch": 0.15, "grad_norm": 3.309441828787185, "learning_rate": 9.602342974970323e-06, "loss": 0.675, "step": 1464 }, { "epoch": 0.15, "grad_norm": 2.523141442933751, "learning_rate": 9.60167666317753e-06, "loss": 0.6574, "step": 1465 }, { "epoch": 0.15, "grad_norm": 2.9705719635611074, "learning_rate": 9.601009816775244e-06, "loss": 0.6318, "step": 1466 }, { "epoch": 0.15, "grad_norm": 2.4004597591752566, "learning_rate": 9.600342435840939e-06, "loss": 0.7256, "step": 1467 }, { "epoch": 0.15, "grad_norm": 2.9444494206781227, "learning_rate": 9.599674520452148e-06, "loss": 0.7762, "step": 1468 }, { "epoch": 0.15, "grad_norm": 3.7455953837505005, "learning_rate": 9.599006070686467e-06, "loss": 0.7091, "step": 1469 }, { "epoch": 0.15, "grad_norm": 3.6074677438299947, "learning_rate": 9.598337086621555e-06, "loss": 0.6716, "step": 1470 }, { "epoch": 0.15, "grad_norm": 2.9274341045897745, "learning_rate": 9.597667568335133e-06, "loss": 0.6385, "step": 1471 }, { "epoch": 0.15, "grad_norm": 3.7346926939077543, "learning_rate": 9.596997515904983e-06, "loss": 0.7569, "step": 1472 }, { "epoch": 0.16, "grad_norm": 3.5131800878247748, "learning_rate": 9.59632692940895e-06, "loss": 0.8693, "step": 1473 }, { "epoch": 0.16, "grad_norm": 3.097568621283518, "learning_rate": 9.595655808924942e-06, "loss": 0.7473, "step": 1474 }, { "epoch": 0.16, "grad_norm": 3.067535905696128, "learning_rate": 9.594984154530926e-06, "loss": 0.7308, "step": 1475 }, { "epoch": 0.16, "grad_norm": 2.508065359892658, "learning_rate": 9.594311966304933e-06, "loss": 0.7814, "step": 1476 }, { "epoch": 0.16, "grad_norm": 2.2827015265359845, "learning_rate": 9.593639244325057e-06, "loss": 0.7721, "step": 1477 }, { "epoch": 0.16, "grad_norm": 2.738236724185965, "learning_rate": 9.592965988669454e-06, "loss": 0.7184, "step": 1478 }, { "epoch": 0.16, "grad_norm": 2.6198830644856206, "learning_rate": 9.592292199416338e-06, "loss": 0.9262, "step": 1479 }, { "epoch": 0.16, "grad_norm": 2.552236548583453, "learning_rate": 9.591617876643989e-06, "loss": 0.7082, "step": 1480 }, { "epoch": 0.16, "grad_norm": 2.826249720921075, "learning_rate": 9.590943020430747e-06, "loss": 0.6825, "step": 1481 }, { "epoch": 0.16, "grad_norm": 2.860146961164607, "learning_rate": 9.59026763085502e-06, "loss": 0.6227, "step": 1482 }, { "epoch": 0.16, "grad_norm": 1.2203871767796366, "learning_rate": 9.589591707995265e-06, "loss": 0.6181, "step": 1483 }, { "epoch": 0.16, "grad_norm": 2.401274298711083, "learning_rate": 9.588915251930013e-06, "loss": 0.6813, "step": 1484 }, { "epoch": 0.16, "grad_norm": 4.331113197722243, "learning_rate": 9.588238262737853e-06, "loss": 0.6847, "step": 1485 }, { "epoch": 0.16, "grad_norm": 2.7893674560880104, "learning_rate": 9.587560740497434e-06, "loss": 0.6684, "step": 1486 }, { "epoch": 0.16, "grad_norm": 3.812079227043104, "learning_rate": 9.586882685287471e-06, "loss": 0.6859, "step": 1487 }, { "epoch": 0.16, "grad_norm": 2.690594004910145, "learning_rate": 9.586204097186738e-06, "loss": 0.6719, "step": 1488 }, { "epoch": 0.16, "grad_norm": 2.67029996471702, "learning_rate": 9.585524976274068e-06, "loss": 0.6752, "step": 1489 }, { "epoch": 0.16, "grad_norm": 2.783949918181014, "learning_rate": 9.584845322628363e-06, "loss": 0.6744, "step": 1490 }, { "epoch": 0.16, "grad_norm": 2.768161593958554, "learning_rate": 9.58416513632858e-06, "loss": 0.6731, "step": 1491 }, { "epoch": 0.16, "grad_norm": 2.7128750709002785, "learning_rate": 9.583484417453744e-06, "loss": 0.6595, "step": 1492 }, { "epoch": 0.16, "grad_norm": 3.1967481849379173, "learning_rate": 9.582803166082938e-06, "loss": 0.7063, "step": 1493 }, { "epoch": 0.16, "grad_norm": 2.5603570119876258, "learning_rate": 9.582121382295309e-06, "loss": 0.7163, "step": 1494 }, { "epoch": 0.16, "grad_norm": 2.275503794973825, "learning_rate": 9.58143906617006e-06, "loss": 0.7148, "step": 1495 }, { "epoch": 0.16, "grad_norm": 2.4819881710650984, "learning_rate": 9.580756217786466e-06, "loss": 0.6752, "step": 1496 }, { "epoch": 0.16, "grad_norm": 1.274480650090562, "learning_rate": 9.580072837223857e-06, "loss": 0.6004, "step": 1497 }, { "epoch": 0.16, "grad_norm": 2.4619991807752157, "learning_rate": 9.579388924561625e-06, "loss": 0.7528, "step": 1498 }, { "epoch": 0.16, "grad_norm": 2.69869982453989, "learning_rate": 9.578704479879225e-06, "loss": 0.6724, "step": 1499 }, { "epoch": 0.16, "grad_norm": 2.587551021620836, "learning_rate": 9.578019503256175e-06, "loss": 0.661, "step": 1500 }, { "epoch": 0.16, "grad_norm": 2.5984643485005297, "learning_rate": 9.577333994772052e-06, "loss": 0.7543, "step": 1501 }, { "epoch": 0.16, "grad_norm": 3.106286735099979, "learning_rate": 9.576647954506498e-06, "loss": 0.7503, "step": 1502 }, { "epoch": 0.16, "grad_norm": 2.5619268778049284, "learning_rate": 9.575961382539211e-06, "loss": 0.7057, "step": 1503 }, { "epoch": 0.16, "grad_norm": 2.6307425334372945, "learning_rate": 9.575274278949962e-06, "loss": 0.7321, "step": 1504 }, { "epoch": 0.16, "grad_norm": 2.8345146127858487, "learning_rate": 9.574586643818572e-06, "loss": 0.7287, "step": 1505 }, { "epoch": 0.16, "grad_norm": 3.26642048267166, "learning_rate": 9.57389847722493e-06, "loss": 0.7251, "step": 1506 }, { "epoch": 0.16, "grad_norm": 2.5229552614331263, "learning_rate": 9.573209779248985e-06, "loss": 0.7519, "step": 1507 }, { "epoch": 0.16, "grad_norm": 2.80455601855804, "learning_rate": 9.572520549970746e-06, "loss": 0.7422, "step": 1508 }, { "epoch": 0.16, "grad_norm": 2.5636231475624394, "learning_rate": 9.571830789470288e-06, "loss": 0.6824, "step": 1509 }, { "epoch": 0.16, "grad_norm": 2.455344120878093, "learning_rate": 9.571140497827746e-06, "loss": 0.7862, "step": 1510 }, { "epoch": 0.16, "grad_norm": 2.9947982231220225, "learning_rate": 9.570449675123313e-06, "loss": 0.8056, "step": 1511 }, { "epoch": 0.16, "grad_norm": 3.199158359526649, "learning_rate": 9.56975832143725e-06, "loss": 0.7388, "step": 1512 }, { "epoch": 0.16, "grad_norm": 2.481563398604672, "learning_rate": 9.569066436849875e-06, "loss": 0.7722, "step": 1513 }, { "epoch": 0.16, "grad_norm": 2.7913702575178583, "learning_rate": 9.568374021441567e-06, "loss": 0.6772, "step": 1514 }, { "epoch": 0.16, "grad_norm": 2.3892593790051593, "learning_rate": 9.567681075292774e-06, "loss": 0.7162, "step": 1515 }, { "epoch": 0.16, "grad_norm": 2.3903154524770183, "learning_rate": 9.566987598483995e-06, "loss": 0.6662, "step": 1516 }, { "epoch": 0.16, "grad_norm": 2.6872816236265624, "learning_rate": 9.5662935910958e-06, "loss": 0.6746, "step": 1517 }, { "epoch": 0.16, "grad_norm": 2.1845449131190704, "learning_rate": 9.565599053208815e-06, "loss": 0.713, "step": 1518 }, { "epoch": 0.16, "grad_norm": 2.520639847963599, "learning_rate": 9.564903984903731e-06, "loss": 0.6745, "step": 1519 }, { "epoch": 0.16, "grad_norm": 2.3349303925717697, "learning_rate": 9.564208386261296e-06, "loss": 0.6643, "step": 1520 }, { "epoch": 0.16, "grad_norm": 2.883067515708215, "learning_rate": 9.563512257362325e-06, "loss": 0.7143, "step": 1521 }, { "epoch": 0.16, "grad_norm": 2.403562641572306, "learning_rate": 9.56281559828769e-06, "loss": 0.5652, "step": 1522 }, { "epoch": 0.16, "grad_norm": 6.692764536743657, "learning_rate": 9.562118409118334e-06, "loss": 0.7098, "step": 1523 }, { "epoch": 0.16, "grad_norm": 2.24853739667993, "learning_rate": 9.561420689935243e-06, "loss": 0.7346, "step": 1524 }, { "epoch": 0.16, "grad_norm": 4.847945930120676, "learning_rate": 9.560722440819485e-06, "loss": 0.6834, "step": 1525 }, { "epoch": 0.16, "grad_norm": 2.9793899903106524, "learning_rate": 9.560023661852178e-06, "loss": 0.6984, "step": 1526 }, { "epoch": 0.16, "grad_norm": 2.696356145427722, "learning_rate": 9.559324353114503e-06, "loss": 0.7272, "step": 1527 }, { "epoch": 0.16, "grad_norm": 2.2812497097713216, "learning_rate": 9.558624514687703e-06, "loss": 0.761, "step": 1528 }, { "epoch": 0.16, "grad_norm": 2.271576386159374, "learning_rate": 9.557924146653087e-06, "loss": 0.7657, "step": 1529 }, { "epoch": 0.16, "grad_norm": 2.7171270620464365, "learning_rate": 9.557223249092017e-06, "loss": 0.7263, "step": 1530 }, { "epoch": 0.16, "grad_norm": 2.8535792247554834, "learning_rate": 9.556521822085924e-06, "loss": 0.7297, "step": 1531 }, { "epoch": 0.16, "grad_norm": 2.4140439514306706, "learning_rate": 9.555819865716298e-06, "loss": 0.6768, "step": 1532 }, { "epoch": 0.16, "grad_norm": 2.9490982350238957, "learning_rate": 9.555117380064689e-06, "loss": 0.698, "step": 1533 }, { "epoch": 0.16, "grad_norm": 2.49780482074063, "learning_rate": 9.55441436521271e-06, "loss": 0.6453, "step": 1534 }, { "epoch": 0.16, "grad_norm": 3.075811387978172, "learning_rate": 9.553710821242036e-06, "loss": 0.6666, "step": 1535 }, { "epoch": 0.16, "grad_norm": 3.0553855565019585, "learning_rate": 9.553006748234402e-06, "loss": 0.7491, "step": 1536 }, { "epoch": 0.16, "grad_norm": 4.1401242046590845, "learning_rate": 9.552302146271606e-06, "loss": 0.7782, "step": 1537 }, { "epoch": 0.16, "grad_norm": 2.42939226201379, "learning_rate": 9.551597015435505e-06, "loss": 0.7143, "step": 1538 }, { "epoch": 0.16, "grad_norm": 2.82555463707085, "learning_rate": 9.550891355808022e-06, "loss": 0.7285, "step": 1539 }, { "epoch": 0.16, "grad_norm": 2.6022856991586036, "learning_rate": 9.550185167471134e-06, "loss": 0.6323, "step": 1540 }, { "epoch": 0.16, "grad_norm": 2.716216112330096, "learning_rate": 9.549478450506888e-06, "loss": 0.6955, "step": 1541 }, { "epoch": 0.16, "grad_norm": 2.9288414453383163, "learning_rate": 9.548771204997387e-06, "loss": 0.6652, "step": 1542 }, { "epoch": 0.16, "grad_norm": 2.9260455534051664, "learning_rate": 9.548063431024797e-06, "loss": 0.6677, "step": 1543 }, { "epoch": 0.16, "grad_norm": 2.6013304107381066, "learning_rate": 9.547355128671343e-06, "loss": 0.6632, "step": 1544 }, { "epoch": 0.16, "grad_norm": 2.7326280366062523, "learning_rate": 9.546646298019315e-06, "loss": 0.7204, "step": 1545 }, { "epoch": 0.16, "grad_norm": 2.1529382962472567, "learning_rate": 9.545936939151066e-06, "loss": 0.7363, "step": 1546 }, { "epoch": 0.16, "grad_norm": 3.48827212379924, "learning_rate": 9.545227052149002e-06, "loss": 0.761, "step": 1547 }, { "epoch": 0.16, "grad_norm": 2.6924419534835637, "learning_rate": 9.544516637095598e-06, "loss": 0.7262, "step": 1548 }, { "epoch": 0.16, "grad_norm": 2.4210226974371554, "learning_rate": 9.54380569407339e-06, "loss": 0.6652, "step": 1549 }, { "epoch": 0.16, "grad_norm": 2.91586157827423, "learning_rate": 9.543094223164967e-06, "loss": 0.6746, "step": 1550 }, { "epoch": 0.16, "grad_norm": 2.8878136097168228, "learning_rate": 9.542382224452995e-06, "loss": 0.6712, "step": 1551 }, { "epoch": 0.16, "grad_norm": 2.485973778059242, "learning_rate": 9.541669698020184e-06, "loss": 0.7157, "step": 1552 }, { "epoch": 0.16, "grad_norm": 2.6385514951897298, "learning_rate": 9.540956643949317e-06, "loss": 0.7383, "step": 1553 }, { "epoch": 0.16, "grad_norm": 3.2286222175015378, "learning_rate": 9.540243062323234e-06, "loss": 0.6431, "step": 1554 }, { "epoch": 0.16, "grad_norm": 2.6255728686862043, "learning_rate": 9.539528953224835e-06, "loss": 0.6488, "step": 1555 }, { "epoch": 0.16, "grad_norm": 5.294558982143112, "learning_rate": 9.538814316737085e-06, "loss": 0.7291, "step": 1556 }, { "epoch": 0.16, "grad_norm": 2.7649291179617075, "learning_rate": 9.53809915294301e-06, "loss": 0.7938, "step": 1557 }, { "epoch": 0.16, "grad_norm": 2.6080959910423047, "learning_rate": 9.53738346192569e-06, "loss": 0.6932, "step": 1558 }, { "epoch": 0.16, "grad_norm": 3.0121425622421936, "learning_rate": 9.536667243768279e-06, "loss": 0.689, "step": 1559 }, { "epoch": 0.16, "grad_norm": 3.332705210852358, "learning_rate": 9.53595049855398e-06, "loss": 0.7032, "step": 1560 }, { "epoch": 0.16, "grad_norm": 2.5824358159901704, "learning_rate": 9.535233226366067e-06, "loss": 0.6877, "step": 1561 }, { "epoch": 0.16, "grad_norm": 2.7223244944225486, "learning_rate": 9.534515427287865e-06, "loss": 0.6517, "step": 1562 }, { "epoch": 0.16, "grad_norm": 2.0686395234828074, "learning_rate": 9.53379710140277e-06, "loss": 0.7079, "step": 1563 }, { "epoch": 0.16, "grad_norm": 3.0167637777660223, "learning_rate": 9.533078248794232e-06, "loss": 0.6687, "step": 1564 }, { "epoch": 0.16, "grad_norm": 2.432973784171849, "learning_rate": 9.532358869545767e-06, "loss": 0.7802, "step": 1565 }, { "epoch": 0.16, "grad_norm": 2.2926310921200885, "learning_rate": 9.531638963740953e-06, "loss": 0.7231, "step": 1566 }, { "epoch": 0.16, "grad_norm": 2.6393048434940276, "learning_rate": 9.530918531463423e-06, "loss": 0.6623, "step": 1567 }, { "epoch": 0.17, "grad_norm": 2.481069201927267, "learning_rate": 9.530197572796873e-06, "loss": 0.735, "step": 1568 }, { "epoch": 0.17, "grad_norm": 2.6013622318011276, "learning_rate": 9.529476087825067e-06, "loss": 0.6855, "step": 1569 }, { "epoch": 0.17, "grad_norm": 3.4249010091697434, "learning_rate": 9.528754076631821e-06, "loss": 0.7398, "step": 1570 }, { "epoch": 0.17, "grad_norm": 2.2070633925039465, "learning_rate": 9.528031539301016e-06, "loss": 0.7574, "step": 1571 }, { "epoch": 0.17, "grad_norm": 2.555841750294239, "learning_rate": 9.5273084759166e-06, "loss": 0.6432, "step": 1572 }, { "epoch": 0.17, "grad_norm": 1.457477490220107, "learning_rate": 9.526584886562571e-06, "loss": 0.6151, "step": 1573 }, { "epoch": 0.17, "grad_norm": 3.048705570425487, "learning_rate": 9.525860771322995e-06, "loss": 0.7126, "step": 1574 }, { "epoch": 0.17, "grad_norm": 3.438910164173861, "learning_rate": 9.525136130281995e-06, "loss": 0.79, "step": 1575 }, { "epoch": 0.17, "grad_norm": 4.28221697375792, "learning_rate": 9.524410963523763e-06, "loss": 0.7654, "step": 1576 }, { "epoch": 0.17, "grad_norm": 3.0467966030740645, "learning_rate": 9.523685271132543e-06, "loss": 0.7272, "step": 1577 }, { "epoch": 0.17, "grad_norm": 2.3181479200807398, "learning_rate": 9.522959053192646e-06, "loss": 0.7779, "step": 1578 }, { "epoch": 0.17, "grad_norm": 2.571771113992117, "learning_rate": 9.522232309788439e-06, "loss": 0.7544, "step": 1579 }, { "epoch": 0.17, "grad_norm": 3.1183269342095508, "learning_rate": 9.521505041004356e-06, "loss": 0.7422, "step": 1580 }, { "epoch": 0.17, "grad_norm": 2.416853737352066, "learning_rate": 9.520777246924887e-06, "loss": 0.7424, "step": 1581 }, { "epoch": 0.17, "grad_norm": 2.885932690234875, "learning_rate": 9.520048927634587e-06, "loss": 0.6748, "step": 1582 }, { "epoch": 0.17, "grad_norm": 2.615546636686311, "learning_rate": 9.519320083218067e-06, "loss": 0.7059, "step": 1583 }, { "epoch": 0.17, "grad_norm": 2.755154151212538, "learning_rate": 9.518590713760004e-06, "loss": 0.7826, "step": 1584 }, { "epoch": 0.17, "grad_norm": 2.132180252839951, "learning_rate": 9.517860819345136e-06, "loss": 0.6665, "step": 1585 }, { "epoch": 0.17, "grad_norm": 2.255957083552413, "learning_rate": 9.517130400058255e-06, "loss": 0.7323, "step": 1586 }, { "epoch": 0.17, "grad_norm": 2.2483764789100205, "learning_rate": 9.516399455984222e-06, "loss": 0.7601, "step": 1587 }, { "epoch": 0.17, "grad_norm": 3.352631648887012, "learning_rate": 9.515667987207958e-06, "loss": 0.7785, "step": 1588 }, { "epoch": 0.17, "grad_norm": 2.0869685444798676, "learning_rate": 9.514935993814438e-06, "loss": 0.6793, "step": 1589 }, { "epoch": 0.17, "grad_norm": 2.3425785545443816, "learning_rate": 9.514203475888706e-06, "loss": 0.781, "step": 1590 }, { "epoch": 0.17, "grad_norm": 2.86315297570834, "learning_rate": 9.513470433515866e-06, "loss": 0.6503, "step": 1591 }, { "epoch": 0.17, "grad_norm": 3.1652116829667896, "learning_rate": 9.512736866781076e-06, "loss": 0.681, "step": 1592 }, { "epoch": 0.17, "grad_norm": 3.766273085621574, "learning_rate": 9.512002775769562e-06, "loss": 0.7644, "step": 1593 }, { "epoch": 0.17, "grad_norm": 2.2909986490602074, "learning_rate": 9.511268160566609e-06, "loss": 0.6739, "step": 1594 }, { "epoch": 0.17, "grad_norm": 2.075660637359557, "learning_rate": 9.510533021257562e-06, "loss": 0.6721, "step": 1595 }, { "epoch": 0.17, "grad_norm": 2.21371811437727, "learning_rate": 9.509797357927826e-06, "loss": 0.6669, "step": 1596 }, { "epoch": 0.17, "grad_norm": 2.2650689770495087, "learning_rate": 9.50906117066287e-06, "loss": 0.6818, "step": 1597 }, { "epoch": 0.17, "grad_norm": 8.894316490655317, "learning_rate": 9.508324459548221e-06, "loss": 0.6128, "step": 1598 }, { "epoch": 0.17, "grad_norm": 2.363686348557055, "learning_rate": 9.50758722466947e-06, "loss": 0.6766, "step": 1599 }, { "epoch": 0.17, "grad_norm": 2.1204545096681504, "learning_rate": 9.506849466112264e-06, "loss": 0.7487, "step": 1600 }, { "epoch": 0.17, "grad_norm": 2.445721487013043, "learning_rate": 9.506111183962316e-06, "loss": 0.66, "step": 1601 }, { "epoch": 0.17, "grad_norm": 2.9704281101668144, "learning_rate": 9.505372378305398e-06, "loss": 0.6566, "step": 1602 }, { "epoch": 0.17, "grad_norm": 4.032858460518241, "learning_rate": 9.504633049227338e-06, "loss": 0.7025, "step": 1603 }, { "epoch": 0.17, "grad_norm": 2.3197467695242198, "learning_rate": 9.503893196814034e-06, "loss": 0.6593, "step": 1604 }, { "epoch": 0.17, "grad_norm": 3.003637291105467, "learning_rate": 9.503152821151435e-06, "loss": 0.7282, "step": 1605 }, { "epoch": 0.17, "grad_norm": 2.974823409792684, "learning_rate": 9.502411922325561e-06, "loss": 0.644, "step": 1606 }, { "epoch": 0.17, "grad_norm": 2.3999354547850578, "learning_rate": 9.501670500422483e-06, "loss": 0.7695, "step": 1607 }, { "epoch": 0.17, "grad_norm": 3.142048720747408, "learning_rate": 9.500928555528341e-06, "loss": 0.697, "step": 1608 }, { "epoch": 0.17, "grad_norm": 2.1918483417994215, "learning_rate": 9.500186087729331e-06, "loss": 0.6865, "step": 1609 }, { "epoch": 0.17, "grad_norm": 2.3130950683881673, "learning_rate": 9.49944309711171e-06, "loss": 0.6882, "step": 1610 }, { "epoch": 0.17, "grad_norm": 2.2722152426733166, "learning_rate": 9.498699583761795e-06, "loss": 0.7057, "step": 1611 }, { "epoch": 0.17, "grad_norm": 2.3067065149092674, "learning_rate": 9.497955547765966e-06, "loss": 0.6129, "step": 1612 }, { "epoch": 0.17, "grad_norm": 3.000454383678098, "learning_rate": 9.497210989210665e-06, "loss": 0.709, "step": 1613 }, { "epoch": 0.17, "grad_norm": 2.6049845885606016, "learning_rate": 9.49646590818239e-06, "loss": 0.7035, "step": 1614 }, { "epoch": 0.17, "grad_norm": 4.5829176336530795, "learning_rate": 9.495720304767705e-06, "loss": 0.6982, "step": 1615 }, { "epoch": 0.17, "grad_norm": 2.8362441150996003, "learning_rate": 9.494974179053233e-06, "loss": 0.7086, "step": 1616 }, { "epoch": 0.17, "grad_norm": 7.6175372207495045, "learning_rate": 9.494227531125652e-06, "loss": 0.7028, "step": 1617 }, { "epoch": 0.17, "grad_norm": 3.522650359801413, "learning_rate": 9.493480361071707e-06, "loss": 0.7281, "step": 1618 }, { "epoch": 0.17, "grad_norm": 2.4815378042784713, "learning_rate": 9.492732668978205e-06, "loss": 0.6899, "step": 1619 }, { "epoch": 0.17, "grad_norm": 3.439653241146134, "learning_rate": 9.491984454932009e-06, "loss": 0.6359, "step": 1620 }, { "epoch": 0.17, "grad_norm": 2.322479399522989, "learning_rate": 9.491235719020042e-06, "loss": 0.6365, "step": 1621 }, { "epoch": 0.17, "grad_norm": 2.8722680257735367, "learning_rate": 9.490486461329293e-06, "loss": 0.7157, "step": 1622 }, { "epoch": 0.17, "grad_norm": 2.8325212931006036, "learning_rate": 9.489736681946809e-06, "loss": 0.7014, "step": 1623 }, { "epoch": 0.17, "grad_norm": 2.8149300073276424, "learning_rate": 9.488986380959694e-06, "loss": 0.7507, "step": 1624 }, { "epoch": 0.17, "grad_norm": 4.399930194308783, "learning_rate": 9.488235558455118e-06, "loss": 0.7731, "step": 1625 }, { "epoch": 0.17, "grad_norm": 3.4240902602318832, "learning_rate": 9.487484214520308e-06, "loss": 0.6616, "step": 1626 }, { "epoch": 0.17, "grad_norm": 3.3880823758905465, "learning_rate": 9.486732349242556e-06, "loss": 0.6865, "step": 1627 }, { "epoch": 0.17, "grad_norm": 2.434462113134042, "learning_rate": 9.485979962709209e-06, "loss": 0.7546, "step": 1628 }, { "epoch": 0.17, "grad_norm": 3.140235982386299, "learning_rate": 9.485227055007676e-06, "loss": 0.6478, "step": 1629 }, { "epoch": 0.17, "grad_norm": 2.6979154962012895, "learning_rate": 9.48447362622543e-06, "loss": 0.7336, "step": 1630 }, { "epoch": 0.17, "grad_norm": 3.564891032405616, "learning_rate": 9.483719676450003e-06, "loss": 0.6995, "step": 1631 }, { "epoch": 0.17, "grad_norm": 5.186995520230965, "learning_rate": 9.482965205768983e-06, "loss": 0.7284, "step": 1632 }, { "epoch": 0.17, "grad_norm": 2.93274944592198, "learning_rate": 9.482210214270026e-06, "loss": 0.7065, "step": 1633 }, { "epoch": 0.17, "grad_norm": 2.3964166552732427, "learning_rate": 9.481454702040842e-06, "loss": 0.7386, "step": 1634 }, { "epoch": 0.17, "grad_norm": 2.290346577796857, "learning_rate": 9.480698669169207e-06, "loss": 0.6791, "step": 1635 }, { "epoch": 0.17, "grad_norm": 6.335965960819836, "learning_rate": 9.479942115742951e-06, "loss": 0.6365, "step": 1636 }, { "epoch": 0.17, "grad_norm": 1.9605346112705178, "learning_rate": 9.47918504184997e-06, "loss": 0.6899, "step": 1637 }, { "epoch": 0.17, "grad_norm": 2.7418810164216705, "learning_rate": 9.47842744757822e-06, "loss": 0.6344, "step": 1638 }, { "epoch": 0.17, "grad_norm": 2.2563707246896096, "learning_rate": 9.477669333015714e-06, "loss": 0.5757, "step": 1639 }, { "epoch": 0.17, "grad_norm": 3.013900658553373, "learning_rate": 9.476910698250529e-06, "loss": 0.7077, "step": 1640 }, { "epoch": 0.17, "grad_norm": 2.396575665358366, "learning_rate": 9.4761515433708e-06, "loss": 0.7228, "step": 1641 }, { "epoch": 0.17, "grad_norm": 2.40289964027477, "learning_rate": 9.475391868464725e-06, "loss": 0.7134, "step": 1642 }, { "epoch": 0.17, "grad_norm": 2.387400572157838, "learning_rate": 9.474631673620558e-06, "loss": 0.7249, "step": 1643 }, { "epoch": 0.17, "grad_norm": 2.470393309910611, "learning_rate": 9.47387095892662e-06, "loss": 0.7756, "step": 1644 }, { "epoch": 0.17, "grad_norm": 2.318559436180576, "learning_rate": 9.473109724471287e-06, "loss": 0.6502, "step": 1645 }, { "epoch": 0.17, "grad_norm": 2.5401989407597005, "learning_rate": 9.472347970342995e-06, "loss": 0.6817, "step": 1646 }, { "epoch": 0.17, "grad_norm": 3.2594402322006175, "learning_rate": 9.471585696630245e-06, "loss": 0.7104, "step": 1647 }, { "epoch": 0.17, "grad_norm": 1.1769351591530914, "learning_rate": 9.470822903421595e-06, "loss": 0.6472, "step": 1648 }, { "epoch": 0.17, "grad_norm": 2.540121387003377, "learning_rate": 9.470059590805663e-06, "loss": 0.715, "step": 1649 }, { "epoch": 0.17, "grad_norm": 2.3993005705838404, "learning_rate": 9.46929575887113e-06, "loss": 0.7426, "step": 1650 }, { "epoch": 0.17, "grad_norm": 2.4076747230692574, "learning_rate": 9.468531407706733e-06, "loss": 0.6518, "step": 1651 }, { "epoch": 0.17, "grad_norm": 2.3345495527645443, "learning_rate": 9.467766537401278e-06, "loss": 0.6538, "step": 1652 }, { "epoch": 0.17, "grad_norm": 2.2658417056945064, "learning_rate": 9.46700114804362e-06, "loss": 0.6715, "step": 1653 }, { "epoch": 0.17, "grad_norm": 3.2719118199932598, "learning_rate": 9.46623523972268e-06, "loss": 0.6762, "step": 1654 }, { "epoch": 0.17, "grad_norm": 3.0582788277872037, "learning_rate": 9.465468812527443e-06, "loss": 0.629, "step": 1655 }, { "epoch": 0.17, "grad_norm": 2.247235978920937, "learning_rate": 9.464701866546945e-06, "loss": 0.7408, "step": 1656 }, { "epoch": 0.17, "grad_norm": 3.155468792797194, "learning_rate": 9.463934401870292e-06, "loss": 0.613, "step": 1657 }, { "epoch": 0.17, "grad_norm": 2.544248700229081, "learning_rate": 9.463166418586645e-06, "loss": 0.7269, "step": 1658 }, { "epoch": 0.17, "grad_norm": 2.3313482073212315, "learning_rate": 9.462397916785222e-06, "loss": 0.7243, "step": 1659 }, { "epoch": 0.17, "grad_norm": 2.82456167117737, "learning_rate": 9.461628896555312e-06, "loss": 0.7121, "step": 1660 }, { "epoch": 0.17, "grad_norm": 5.726685554503556, "learning_rate": 9.460859357986251e-06, "loss": 0.6316, "step": 1661 }, { "epoch": 0.17, "grad_norm": 3.542875855778497, "learning_rate": 9.460089301167448e-06, "loss": 0.6625, "step": 1662 }, { "epoch": 0.17, "grad_norm": 2.86229240642419, "learning_rate": 9.45931872618836e-06, "loss": 0.6818, "step": 1663 }, { "epoch": 0.18, "grad_norm": 2.656197150207348, "learning_rate": 9.458547633138515e-06, "loss": 0.6801, "step": 1664 }, { "epoch": 0.18, "grad_norm": 3.243631283233658, "learning_rate": 9.457776022107494e-06, "loss": 0.6679, "step": 1665 }, { "epoch": 0.18, "grad_norm": 2.5293787058884636, "learning_rate": 9.45700389318494e-06, "loss": 0.7386, "step": 1666 }, { "epoch": 0.18, "grad_norm": 2.310070540436612, "learning_rate": 9.456231246460557e-06, "loss": 0.774, "step": 1667 }, { "epoch": 0.18, "grad_norm": 2.508606440342495, "learning_rate": 9.455458082024112e-06, "loss": 0.7392, "step": 1668 }, { "epoch": 0.18, "grad_norm": 2.2920775313114405, "learning_rate": 9.454684399965423e-06, "loss": 0.758, "step": 1669 }, { "epoch": 0.18, "grad_norm": 9.041893234843545, "learning_rate": 9.453910200374382e-06, "loss": 0.6369, "step": 1670 }, { "epoch": 0.18, "grad_norm": 1.2931690055491833, "learning_rate": 9.453135483340925e-06, "loss": 0.6547, "step": 1671 }, { "epoch": 0.18, "grad_norm": 1.3115301263653227, "learning_rate": 9.452360248955062e-06, "loss": 0.6449, "step": 1672 }, { "epoch": 0.18, "grad_norm": 2.9637645722219323, "learning_rate": 9.451584497306856e-06, "loss": 0.6999, "step": 1673 }, { "epoch": 0.18, "grad_norm": 2.3068845068680326, "learning_rate": 9.45080822848643e-06, "loss": 0.6948, "step": 1674 }, { "epoch": 0.18, "grad_norm": 2.2231756130525673, "learning_rate": 9.45003144258397e-06, "loss": 0.6791, "step": 1675 }, { "epoch": 0.18, "grad_norm": 3.320104844855623, "learning_rate": 9.449254139689721e-06, "loss": 0.6537, "step": 1676 }, { "epoch": 0.18, "grad_norm": 2.471206074285933, "learning_rate": 9.448476319893989e-06, "loss": 0.7204, "step": 1677 }, { "epoch": 0.18, "grad_norm": 2.4573874653738934, "learning_rate": 9.447697983287136e-06, "loss": 0.6883, "step": 1678 }, { "epoch": 0.18, "grad_norm": 2.2768018435285424, "learning_rate": 9.446919129959589e-06, "loss": 0.6646, "step": 1679 }, { "epoch": 0.18, "grad_norm": 2.5304009939179433, "learning_rate": 9.44613976000183e-06, "loss": 0.6923, "step": 1680 }, { "epoch": 0.18, "grad_norm": 2.3183304739700534, "learning_rate": 9.44535987350441e-06, "loss": 0.82, "step": 1681 }, { "epoch": 0.18, "grad_norm": 2.293573173274393, "learning_rate": 9.44457947055793e-06, "loss": 0.7597, "step": 1682 }, { "epoch": 0.18, "grad_norm": 2.820715534237888, "learning_rate": 9.443798551253052e-06, "loss": 0.7216, "step": 1683 }, { "epoch": 0.18, "grad_norm": 2.122577759545682, "learning_rate": 9.443017115680503e-06, "loss": 0.7303, "step": 1684 }, { "epoch": 0.18, "grad_norm": 2.653511448510485, "learning_rate": 9.442235163931072e-06, "loss": 0.6314, "step": 1685 }, { "epoch": 0.18, "grad_norm": 2.186415024401008, "learning_rate": 9.441452696095601e-06, "loss": 0.755, "step": 1686 }, { "epoch": 0.18, "grad_norm": 2.3274053893757563, "learning_rate": 9.440669712264994e-06, "loss": 0.7157, "step": 1687 }, { "epoch": 0.18, "grad_norm": 1.9206038169379611, "learning_rate": 9.439886212530217e-06, "loss": 0.7278, "step": 1688 }, { "epoch": 0.18, "grad_norm": 2.974321572145424, "learning_rate": 9.439102196982292e-06, "loss": 0.6442, "step": 1689 }, { "epoch": 0.18, "grad_norm": 1.9034939575257075, "learning_rate": 9.438317665712308e-06, "loss": 0.7244, "step": 1690 }, { "epoch": 0.18, "grad_norm": 2.5630336170505945, "learning_rate": 9.437532618811407e-06, "loss": 0.7238, "step": 1691 }, { "epoch": 0.18, "grad_norm": 2.7600588498607768, "learning_rate": 9.436747056370794e-06, "loss": 0.652, "step": 1692 }, { "epoch": 0.18, "grad_norm": 2.4504076582596195, "learning_rate": 9.435960978481734e-06, "loss": 0.7314, "step": 1693 }, { "epoch": 0.18, "grad_norm": 2.51073812241736, "learning_rate": 9.435174385235548e-06, "loss": 0.7227, "step": 1694 }, { "epoch": 0.18, "grad_norm": 2.846356911208405, "learning_rate": 9.434387276723624e-06, "loss": 0.6857, "step": 1695 }, { "epoch": 0.18, "grad_norm": 2.703615406650167, "learning_rate": 9.433599653037406e-06, "loss": 0.5352, "step": 1696 }, { "epoch": 0.18, "grad_norm": 2.3457312870011955, "learning_rate": 9.432811514268396e-06, "loss": 0.7758, "step": 1697 }, { "epoch": 0.18, "grad_norm": 2.271626563468226, "learning_rate": 9.432022860508158e-06, "loss": 0.7422, "step": 1698 }, { "epoch": 0.18, "grad_norm": 2.3311293386557175, "learning_rate": 9.431233691848316e-06, "loss": 0.6922, "step": 1699 }, { "epoch": 0.18, "grad_norm": 3.13275822233704, "learning_rate": 9.430444008380553e-06, "loss": 0.6746, "step": 1700 }, { "epoch": 0.18, "grad_norm": 2.3910164531032594, "learning_rate": 9.429653810196611e-06, "loss": 0.7156, "step": 1701 }, { "epoch": 0.18, "grad_norm": 2.25390169947036, "learning_rate": 9.428863097388295e-06, "loss": 0.7435, "step": 1702 }, { "epoch": 0.18, "grad_norm": 1.9560405228811595, "learning_rate": 9.428071870047469e-06, "loss": 0.7, "step": 1703 }, { "epoch": 0.18, "grad_norm": 1.8824075331234948, "learning_rate": 9.427280128266049e-06, "loss": 0.7596, "step": 1704 }, { "epoch": 0.18, "grad_norm": 2.286053461352808, "learning_rate": 9.426487872136025e-06, "loss": 0.7239, "step": 1705 }, { "epoch": 0.18, "grad_norm": 2.008603418544589, "learning_rate": 9.425695101749435e-06, "loss": 0.7413, "step": 1706 }, { "epoch": 0.18, "grad_norm": 2.10028582452023, "learning_rate": 9.424901817198381e-06, "loss": 0.6829, "step": 1707 }, { "epoch": 0.18, "grad_norm": 2.1535649860037007, "learning_rate": 9.424108018575026e-06, "loss": 0.7089, "step": 1708 }, { "epoch": 0.18, "grad_norm": 2.8660284485190903, "learning_rate": 9.42331370597159e-06, "loss": 0.7544, "step": 1709 }, { "epoch": 0.18, "grad_norm": 2.5095078891354508, "learning_rate": 9.422518879480353e-06, "loss": 0.6405, "step": 1710 }, { "epoch": 0.18, "grad_norm": 2.1188014421663874, "learning_rate": 9.421723539193657e-06, "loss": 0.6818, "step": 1711 }, { "epoch": 0.18, "grad_norm": 3.163280526338815, "learning_rate": 9.420927685203901e-06, "loss": 0.6727, "step": 1712 }, { "epoch": 0.18, "grad_norm": 3.7994284590304366, "learning_rate": 9.42013131760355e-06, "loss": 0.6459, "step": 1713 }, { "epoch": 0.18, "grad_norm": 2.098234987273422, "learning_rate": 9.419334436485117e-06, "loss": 0.7406, "step": 1714 }, { "epoch": 0.18, "grad_norm": 2.204421750962229, "learning_rate": 9.418537041941185e-06, "loss": 0.75, "step": 1715 }, { "epoch": 0.18, "grad_norm": 2.551866812050452, "learning_rate": 9.417739134064392e-06, "loss": 0.7352, "step": 1716 }, { "epoch": 0.18, "grad_norm": 2.1052536649230147, "learning_rate": 9.416940712947436e-06, "loss": 0.7457, "step": 1717 }, { "epoch": 0.18, "grad_norm": 2.552123073690243, "learning_rate": 9.416141778683077e-06, "loss": 0.7008, "step": 1718 }, { "epoch": 0.18, "grad_norm": 2.386726881220311, "learning_rate": 9.415342331364132e-06, "loss": 0.6845, "step": 1719 }, { "epoch": 0.18, "grad_norm": 2.0231356594407064, "learning_rate": 9.414542371083477e-06, "loss": 0.6269, "step": 1720 }, { "epoch": 0.18, "grad_norm": 3.3944077369294376, "learning_rate": 9.413741897934052e-06, "loss": 0.7178, "step": 1721 }, { "epoch": 0.18, "grad_norm": 2.4163679275832997, "learning_rate": 9.412940912008852e-06, "loss": 0.6554, "step": 1722 }, { "epoch": 0.18, "grad_norm": 2.1677504987549554, "learning_rate": 9.412139413400933e-06, "loss": 0.7565, "step": 1723 }, { "epoch": 0.18, "grad_norm": 8.628059217990637, "learning_rate": 9.41133740220341e-06, "loss": 0.6513, "step": 1724 }, { "epoch": 0.18, "grad_norm": 2.2085858844991972, "learning_rate": 9.410534878509461e-06, "loss": 0.6636, "step": 1725 }, { "epoch": 0.18, "grad_norm": 1.9079694945157286, "learning_rate": 9.40973184241232e-06, "loss": 0.6428, "step": 1726 }, { "epoch": 0.18, "grad_norm": 2.2280940372738156, "learning_rate": 9.408928294005279e-06, "loss": 0.6991, "step": 1727 }, { "epoch": 0.18, "grad_norm": 2.2464560200093553, "learning_rate": 9.408124233381695e-06, "loss": 0.7057, "step": 1728 }, { "epoch": 0.18, "grad_norm": 2.41547778019205, "learning_rate": 9.40731966063498e-06, "loss": 0.7175, "step": 1729 }, { "epoch": 0.18, "grad_norm": 1.9493683126801489, "learning_rate": 9.406514575858606e-06, "loss": 0.6848, "step": 1730 }, { "epoch": 0.18, "grad_norm": 2.653450440842846, "learning_rate": 9.405708979146106e-06, "loss": 0.7155, "step": 1731 }, { "epoch": 0.18, "grad_norm": 2.2614446312235077, "learning_rate": 9.404902870591076e-06, "loss": 0.7237, "step": 1732 }, { "epoch": 0.18, "grad_norm": 2.236025418767225, "learning_rate": 9.40409625028716e-06, "loss": 0.7383, "step": 1733 }, { "epoch": 0.18, "grad_norm": 2.0068155617046903, "learning_rate": 9.403289118328074e-06, "loss": 0.624, "step": 1734 }, { "epoch": 0.18, "grad_norm": 2.586872590910335, "learning_rate": 9.402481474807588e-06, "loss": 0.6978, "step": 1735 }, { "epoch": 0.18, "grad_norm": 2.05535830543558, "learning_rate": 9.401673319819529e-06, "loss": 0.6669, "step": 1736 }, { "epoch": 0.18, "grad_norm": 1.8884455652664238, "learning_rate": 9.400864653457789e-06, "loss": 0.697, "step": 1737 }, { "epoch": 0.18, "grad_norm": 2.1851392337369266, "learning_rate": 9.400055475816313e-06, "loss": 0.6524, "step": 1738 }, { "epoch": 0.18, "grad_norm": 2.576173165092392, "learning_rate": 9.399245786989112e-06, "loss": 0.7119, "step": 1739 }, { "epoch": 0.18, "grad_norm": 2.620809902632731, "learning_rate": 9.398435587070254e-06, "loss": 0.7611, "step": 1740 }, { "epoch": 0.18, "grad_norm": 2.8033777586962114, "learning_rate": 9.397624876153862e-06, "loss": 0.6888, "step": 1741 }, { "epoch": 0.18, "grad_norm": 2.1952968707428666, "learning_rate": 9.396813654334124e-06, "loss": 0.729, "step": 1742 }, { "epoch": 0.18, "grad_norm": 2.184788154360516, "learning_rate": 9.396001921705287e-06, "loss": 0.7522, "step": 1743 }, { "epoch": 0.18, "grad_norm": 2.5561173456941186, "learning_rate": 9.395189678361655e-06, "loss": 0.6819, "step": 1744 }, { "epoch": 0.18, "grad_norm": 2.9560468612635016, "learning_rate": 9.39437692439759e-06, "loss": 0.7499, "step": 1745 }, { "epoch": 0.18, "grad_norm": 3.170377346969926, "learning_rate": 9.393563659907516e-06, "loss": 0.7307, "step": 1746 }, { "epoch": 0.18, "grad_norm": 2.0529367396092386, "learning_rate": 9.392749884985918e-06, "loss": 0.6618, "step": 1747 }, { "epoch": 0.18, "grad_norm": 2.5770411994159867, "learning_rate": 9.391935599727336e-06, "loss": 0.702, "step": 1748 }, { "epoch": 0.18, "grad_norm": 2.1159695596217305, "learning_rate": 9.391120804226372e-06, "loss": 0.7339, "step": 1749 }, { "epoch": 0.18, "grad_norm": 2.9557470831906363, "learning_rate": 9.390305498577685e-06, "loss": 0.829, "step": 1750 }, { "epoch": 0.18, "grad_norm": 2.066429118959667, "learning_rate": 9.389489682875999e-06, "loss": 0.7344, "step": 1751 }, { "epoch": 0.18, "grad_norm": 2.3728866613105315, "learning_rate": 9.388673357216088e-06, "loss": 0.6821, "step": 1752 }, { "epoch": 0.18, "grad_norm": 2.5738421703420835, "learning_rate": 9.387856521692795e-06, "loss": 0.6881, "step": 1753 }, { "epoch": 0.18, "grad_norm": 2.2173777484723933, "learning_rate": 9.387039176401013e-06, "loss": 0.6497, "step": 1754 }, { "epoch": 0.18, "grad_norm": 2.8326929223772916, "learning_rate": 9.386221321435702e-06, "loss": 0.7752, "step": 1755 }, { "epoch": 0.18, "grad_norm": 2.342472475816738, "learning_rate": 9.385402956891878e-06, "loss": 0.6642, "step": 1756 }, { "epoch": 0.18, "grad_norm": 1.8421908380492378, "learning_rate": 9.384584082864614e-06, "loss": 0.628, "step": 1757 }, { "epoch": 0.18, "grad_norm": 2.293083670289089, "learning_rate": 9.383764699449047e-06, "loss": 0.7642, "step": 1758 }, { "epoch": 0.19, "grad_norm": 3.1881257410006234, "learning_rate": 9.382944806740369e-06, "loss": 0.697, "step": 1759 }, { "epoch": 0.19, "grad_norm": 2.0454383098040125, "learning_rate": 9.382124404833832e-06, "loss": 0.6683, "step": 1760 }, { "epoch": 0.19, "grad_norm": 2.39746694551876, "learning_rate": 9.38130349382475e-06, "loss": 0.7014, "step": 1761 }, { "epoch": 0.19, "grad_norm": 2.646324899559431, "learning_rate": 9.380482073808493e-06, "loss": 0.6275, "step": 1762 }, { "epoch": 0.19, "grad_norm": 2.31148966058067, "learning_rate": 9.379660144880491e-06, "loss": 0.6855, "step": 1763 }, { "epoch": 0.19, "grad_norm": 2.6610941993286072, "learning_rate": 9.378837707136235e-06, "loss": 0.721, "step": 1764 }, { "epoch": 0.19, "grad_norm": 2.181626035362435, "learning_rate": 9.37801476067127e-06, "loss": 0.7382, "step": 1765 }, { "epoch": 0.19, "grad_norm": 2.5121904009240774, "learning_rate": 9.377191305581208e-06, "loss": 0.6953, "step": 1766 }, { "epoch": 0.19, "grad_norm": 2.324736532829736, "learning_rate": 9.376367341961712e-06, "loss": 0.6536, "step": 1767 }, { "epoch": 0.19, "grad_norm": 2.1731590595672206, "learning_rate": 9.375542869908509e-06, "loss": 0.6795, "step": 1768 }, { "epoch": 0.19, "grad_norm": 2.0441280442692396, "learning_rate": 9.374717889517384e-06, "loss": 0.6707, "step": 1769 }, { "epoch": 0.19, "grad_norm": 2.188945519966826, "learning_rate": 9.373892400884182e-06, "loss": 0.6427, "step": 1770 }, { "epoch": 0.19, "grad_norm": 1.8292074188975915, "learning_rate": 9.373066404104803e-06, "loss": 0.6416, "step": 1771 }, { "epoch": 0.19, "grad_norm": 2.2353048574201884, "learning_rate": 9.37223989927521e-06, "loss": 0.7069, "step": 1772 }, { "epoch": 0.19, "grad_norm": 3.313904910526145, "learning_rate": 9.371412886491424e-06, "loss": 0.7505, "step": 1773 }, { "epoch": 0.19, "grad_norm": 4.177329261346893, "learning_rate": 9.370585365849527e-06, "loss": 0.6287, "step": 1774 }, { "epoch": 0.19, "grad_norm": 2.509687395524509, "learning_rate": 9.369757337445655e-06, "loss": 0.6938, "step": 1775 }, { "epoch": 0.19, "grad_norm": 2.50959271620824, "learning_rate": 9.368928801376009e-06, "loss": 0.7594, "step": 1776 }, { "epoch": 0.19, "grad_norm": 2.3425929238257943, "learning_rate": 9.368099757736843e-06, "loss": 0.6348, "step": 1777 }, { "epoch": 0.19, "grad_norm": 2.571462951149562, "learning_rate": 9.367270206624474e-06, "loss": 0.7839, "step": 1778 }, { "epoch": 0.19, "grad_norm": 2.288592181156132, "learning_rate": 9.366440148135276e-06, "loss": 0.7086, "step": 1779 }, { "epoch": 0.19, "grad_norm": 3.7452409081529328, "learning_rate": 9.365609582365685e-06, "loss": 0.7787, "step": 1780 }, { "epoch": 0.19, "grad_norm": 2.188224299368682, "learning_rate": 9.364778509412191e-06, "loss": 0.7622, "step": 1781 }, { "epoch": 0.19, "grad_norm": 2.7450685765521223, "learning_rate": 9.363946929371349e-06, "loss": 0.6863, "step": 1782 }, { "epoch": 0.19, "grad_norm": 2.5045275357571284, "learning_rate": 9.363114842339767e-06, "loss": 0.7075, "step": 1783 }, { "epoch": 0.19, "grad_norm": 3.0164414465737837, "learning_rate": 9.362282248414114e-06, "loss": 0.6598, "step": 1784 }, { "epoch": 0.19, "grad_norm": 2.35492294691701, "learning_rate": 9.361449147691122e-06, "loss": 0.7235, "step": 1785 }, { "epoch": 0.19, "grad_norm": 2.2619503151970335, "learning_rate": 9.360615540267572e-06, "loss": 0.6997, "step": 1786 }, { "epoch": 0.19, "grad_norm": 2.5899599164267766, "learning_rate": 9.359781426240316e-06, "loss": 0.7257, "step": 1787 }, { "epoch": 0.19, "grad_norm": 2.5634208857345366, "learning_rate": 9.358946805706257e-06, "loss": 0.664, "step": 1788 }, { "epoch": 0.19, "grad_norm": 4.51798972826991, "learning_rate": 9.358111678762359e-06, "loss": 0.6728, "step": 1789 }, { "epoch": 0.19, "grad_norm": 2.3326773791425466, "learning_rate": 9.357276045505643e-06, "loss": 0.7358, "step": 1790 }, { "epoch": 0.19, "grad_norm": 2.2350420427602833, "learning_rate": 9.35643990603319e-06, "loss": 0.6856, "step": 1791 }, { "epoch": 0.19, "grad_norm": 2.608868685684469, "learning_rate": 9.355603260442145e-06, "loss": 0.6177, "step": 1792 }, { "epoch": 0.19, "grad_norm": 1.9713835504144617, "learning_rate": 9.354766108829703e-06, "loss": 0.7943, "step": 1793 }, { "epoch": 0.19, "grad_norm": 2.8062689555008857, "learning_rate": 9.353928451293122e-06, "loss": 0.6725, "step": 1794 }, { "epoch": 0.19, "grad_norm": 2.671272851573433, "learning_rate": 9.35309028792972e-06, "loss": 0.6848, "step": 1795 }, { "epoch": 0.19, "grad_norm": 2.066156049708186, "learning_rate": 9.352251618836872e-06, "loss": 0.7521, "step": 1796 }, { "epoch": 0.19, "grad_norm": 3.0657191943549273, "learning_rate": 9.351412444112013e-06, "loss": 0.6063, "step": 1797 }, { "epoch": 0.19, "grad_norm": 2.647445442676486, "learning_rate": 9.350572763852633e-06, "loss": 0.6984, "step": 1798 }, { "epoch": 0.19, "grad_norm": 2.2824264713651994, "learning_rate": 9.349732578156286e-06, "loss": 0.6747, "step": 1799 }, { "epoch": 0.19, "grad_norm": 2.232278265988478, "learning_rate": 9.348891887120582e-06, "loss": 0.7481, "step": 1800 }, { "epoch": 0.19, "grad_norm": 2.2260356050576937, "learning_rate": 9.348050690843192e-06, "loss": 0.7101, "step": 1801 }, { "epoch": 0.19, "grad_norm": 2.6245692650813734, "learning_rate": 9.347208989421838e-06, "loss": 0.6776, "step": 1802 }, { "epoch": 0.19, "grad_norm": 2.8932084963461766, "learning_rate": 9.346366782954313e-06, "loss": 0.7145, "step": 1803 }, { "epoch": 0.19, "grad_norm": 2.6293013765675806, "learning_rate": 9.345524071538457e-06, "loss": 0.762, "step": 1804 }, { "epoch": 0.19, "grad_norm": 2.4909478746794926, "learning_rate": 9.344680855272178e-06, "loss": 0.7797, "step": 1805 }, { "epoch": 0.19, "grad_norm": 2.678911462141494, "learning_rate": 9.343837134253434e-06, "loss": 0.7017, "step": 1806 }, { "epoch": 0.19, "grad_norm": 3.236697329924541, "learning_rate": 9.342992908580252e-06, "loss": 0.6629, "step": 1807 }, { "epoch": 0.19, "grad_norm": 2.6302188588519484, "learning_rate": 9.342148178350705e-06, "loss": 0.7272, "step": 1808 }, { "epoch": 0.19, "grad_norm": 3.3111476074297292, "learning_rate": 9.341302943662937e-06, "loss": 0.6723, "step": 1809 }, { "epoch": 0.19, "grad_norm": 2.9282424193071446, "learning_rate": 9.34045720461514e-06, "loss": 0.8171, "step": 1810 }, { "epoch": 0.19, "grad_norm": 2.9521986711914656, "learning_rate": 9.339610961305575e-06, "loss": 0.7266, "step": 1811 }, { "epoch": 0.19, "grad_norm": 2.6008867763760595, "learning_rate": 9.33876421383255e-06, "loss": 0.6991, "step": 1812 }, { "epoch": 0.19, "grad_norm": 2.4219540920578213, "learning_rate": 9.337916962294443e-06, "loss": 0.7751, "step": 1813 }, { "epoch": 0.19, "grad_norm": 2.3994526596884787, "learning_rate": 9.337069206789681e-06, "loss": 0.7132, "step": 1814 }, { "epoch": 0.19, "grad_norm": 2.141697670450548, "learning_rate": 9.336220947416757e-06, "loss": 0.6879, "step": 1815 }, { "epoch": 0.19, "grad_norm": 2.442700420921565, "learning_rate": 9.335372184274219e-06, "loss": 0.725, "step": 1816 }, { "epoch": 0.19, "grad_norm": 2.9922843786433275, "learning_rate": 9.334522917460671e-06, "loss": 0.7077, "step": 1817 }, { "epoch": 0.19, "grad_norm": 1.9847132878411518, "learning_rate": 9.33367314707478e-06, "loss": 0.6625, "step": 1818 }, { "epoch": 0.19, "grad_norm": 2.2682583137130137, "learning_rate": 9.332822873215273e-06, "loss": 0.6533, "step": 1819 }, { "epoch": 0.19, "grad_norm": 2.1574351053442125, "learning_rate": 9.331972095980927e-06, "loss": 0.6908, "step": 1820 }, { "epoch": 0.19, "grad_norm": 2.495175865873836, "learning_rate": 9.331120815470586e-06, "loss": 0.69, "step": 1821 }, { "epoch": 0.19, "grad_norm": 2.5556685920907243, "learning_rate": 9.330269031783147e-06, "loss": 0.6904, "step": 1822 }, { "epoch": 0.19, "grad_norm": 2.50380795052326, "learning_rate": 9.329416745017573e-06, "loss": 0.6958, "step": 1823 }, { "epoch": 0.19, "grad_norm": 2.195310629239766, "learning_rate": 9.328563955272873e-06, "loss": 0.718, "step": 1824 }, { "epoch": 0.19, "grad_norm": 2.596171146233937, "learning_rate": 9.327710662648128e-06, "loss": 0.6523, "step": 1825 }, { "epoch": 0.19, "grad_norm": 2.1903089534866957, "learning_rate": 9.326856867242467e-06, "loss": 0.6914, "step": 1826 }, { "epoch": 0.19, "grad_norm": 2.1949315431307785, "learning_rate": 9.326002569155084e-06, "loss": 0.6121, "step": 1827 }, { "epoch": 0.19, "grad_norm": 2.1196053273242206, "learning_rate": 9.325147768485226e-06, "loss": 0.7247, "step": 1828 }, { "epoch": 0.19, "grad_norm": 2.927602122432581, "learning_rate": 9.324292465332205e-06, "loss": 0.7023, "step": 1829 }, { "epoch": 0.19, "grad_norm": 2.59333950514103, "learning_rate": 9.323436659795384e-06, "loss": 0.774, "step": 1830 }, { "epoch": 0.19, "grad_norm": 2.1860597709229794, "learning_rate": 9.32258035197419e-06, "loss": 0.6294, "step": 1831 }, { "epoch": 0.19, "grad_norm": 2.237668840439711, "learning_rate": 9.321723541968106e-06, "loss": 0.6915, "step": 1832 }, { "epoch": 0.19, "grad_norm": 3.013676832988803, "learning_rate": 9.320866229876674e-06, "loss": 0.7642, "step": 1833 }, { "epoch": 0.19, "grad_norm": 2.357450534390374, "learning_rate": 9.320008415799496e-06, "loss": 0.7363, "step": 1834 }, { "epoch": 0.19, "grad_norm": 3.2990217042269863, "learning_rate": 9.319150099836225e-06, "loss": 0.6567, "step": 1835 }, { "epoch": 0.19, "grad_norm": 2.4112889638082033, "learning_rate": 9.318291282086582e-06, "loss": 0.6427, "step": 1836 }, { "epoch": 0.19, "grad_norm": 2.5245440646141706, "learning_rate": 9.317431962650339e-06, "loss": 0.6699, "step": 1837 }, { "epoch": 0.19, "grad_norm": 2.1773949952495566, "learning_rate": 9.316572141627334e-06, "loss": 0.6629, "step": 1838 }, { "epoch": 0.19, "grad_norm": 2.0104341618703967, "learning_rate": 9.315711819117452e-06, "loss": 0.6719, "step": 1839 }, { "epoch": 0.19, "grad_norm": 2.5108935311841467, "learning_rate": 9.31485099522065e-06, "loss": 0.7134, "step": 1840 }, { "epoch": 0.19, "grad_norm": 2.695150335447739, "learning_rate": 9.31398967003693e-06, "loss": 0.8007, "step": 1841 }, { "epoch": 0.19, "grad_norm": 2.7187367089479895, "learning_rate": 9.31312784366636e-06, "loss": 0.6938, "step": 1842 }, { "epoch": 0.19, "grad_norm": 2.266932399300048, "learning_rate": 9.312265516209068e-06, "loss": 0.7279, "step": 1843 }, { "epoch": 0.19, "grad_norm": 2.064288579022452, "learning_rate": 9.311402687765231e-06, "loss": 0.6903, "step": 1844 }, { "epoch": 0.19, "grad_norm": 2.3054354931895165, "learning_rate": 9.310539358435095e-06, "loss": 0.6779, "step": 1845 }, { "epoch": 0.19, "grad_norm": 2.3704699001159244, "learning_rate": 9.309675528318955e-06, "loss": 0.7828, "step": 1846 }, { "epoch": 0.19, "grad_norm": 2.5359614229510274, "learning_rate": 9.308811197517172e-06, "loss": 0.6946, "step": 1847 }, { "epoch": 0.19, "grad_norm": 2.433416978342713, "learning_rate": 9.307946366130158e-06, "loss": 0.8163, "step": 1848 }, { "epoch": 0.19, "grad_norm": 2.6463567241215493, "learning_rate": 9.307081034258389e-06, "loss": 0.7162, "step": 1849 }, { "epoch": 0.19, "grad_norm": 3.2116023992699523, "learning_rate": 9.306215202002396e-06, "loss": 0.6796, "step": 1850 }, { "epoch": 0.19, "grad_norm": 3.0445330680873828, "learning_rate": 9.305348869462768e-06, "loss": 0.7149, "step": 1851 }, { "epoch": 0.19, "grad_norm": 1.881081284323706, "learning_rate": 9.304482036740154e-06, "loss": 0.6436, "step": 1852 }, { "epoch": 0.19, "grad_norm": 2.4175242650695687, "learning_rate": 9.30361470393526e-06, "loss": 0.6238, "step": 1853 }, { "epoch": 0.2, "grad_norm": 2.430329856197137, "learning_rate": 9.302746871148852e-06, "loss": 0.7318, "step": 1854 }, { "epoch": 0.2, "grad_norm": 2.1962368489134803, "learning_rate": 9.301878538481748e-06, "loss": 0.7108, "step": 1855 }, { "epoch": 0.2, "grad_norm": 2.169152259585836, "learning_rate": 9.30100970603483e-06, "loss": 0.7173, "step": 1856 }, { "epoch": 0.2, "grad_norm": 2.463790448876148, "learning_rate": 9.30014037390904e-06, "loss": 0.689, "step": 1857 }, { "epoch": 0.2, "grad_norm": 2.368852330455152, "learning_rate": 9.299270542205372e-06, "loss": 0.6933, "step": 1858 }, { "epoch": 0.2, "grad_norm": 2.444593231468713, "learning_rate": 9.298400211024878e-06, "loss": 0.6799, "step": 1859 }, { "epoch": 0.2, "grad_norm": 1.8680880820924584, "learning_rate": 9.297529380468675e-06, "loss": 0.6574, "step": 1860 }, { "epoch": 0.2, "grad_norm": 2.948905283236975, "learning_rate": 9.29665805063793e-06, "loss": 0.6756, "step": 1861 }, { "epoch": 0.2, "grad_norm": 2.1963367833574643, "learning_rate": 9.295786221633874e-06, "loss": 0.634, "step": 1862 }, { "epoch": 0.2, "grad_norm": 2.139790541038441, "learning_rate": 9.294913893557792e-06, "loss": 0.6734, "step": 1863 }, { "epoch": 0.2, "grad_norm": 2.619706209228136, "learning_rate": 9.294041066511031e-06, "loss": 0.6999, "step": 1864 }, { "epoch": 0.2, "grad_norm": 2.409480108291939, "learning_rate": 9.29316774059499e-06, "loss": 0.6769, "step": 1865 }, { "epoch": 0.2, "grad_norm": 2.410329086374503, "learning_rate": 9.29229391591113e-06, "loss": 0.7089, "step": 1866 }, { "epoch": 0.2, "grad_norm": 5.690751097339351, "learning_rate": 9.291419592560973e-06, "loss": 0.7696, "step": 1867 }, { "epoch": 0.2, "grad_norm": 2.69073549819373, "learning_rate": 9.290544770646092e-06, "loss": 0.6698, "step": 1868 }, { "epoch": 0.2, "grad_norm": 3.9808783369984093, "learning_rate": 9.289669450268122e-06, "loss": 0.6039, "step": 1869 }, { "epoch": 0.2, "grad_norm": 2.15890459589978, "learning_rate": 9.288793631528757e-06, "loss": 0.6616, "step": 1870 }, { "epoch": 0.2, "grad_norm": 2.081034392032343, "learning_rate": 9.287917314529743e-06, "loss": 0.682, "step": 1871 }, { "epoch": 0.2, "grad_norm": 2.1849941363664382, "learning_rate": 9.287040499372893e-06, "loss": 0.7173, "step": 1872 }, { "epoch": 0.2, "grad_norm": 2.4741373441095202, "learning_rate": 9.286163186160067e-06, "loss": 0.6348, "step": 1873 }, { "epoch": 0.2, "grad_norm": 2.4013819058393944, "learning_rate": 9.285285374993195e-06, "loss": 0.641, "step": 1874 }, { "epoch": 0.2, "grad_norm": 2.3332689108954336, "learning_rate": 9.284407065974254e-06, "loss": 0.7153, "step": 1875 }, { "epoch": 0.2, "grad_norm": 2.3415860761989378, "learning_rate": 9.283528259205287e-06, "loss": 0.7111, "step": 1876 }, { "epoch": 0.2, "grad_norm": 2.5710052029402166, "learning_rate": 9.282648954788387e-06, "loss": 0.6825, "step": 1877 }, { "epoch": 0.2, "grad_norm": 2.172242362761334, "learning_rate": 9.281769152825713e-06, "loss": 0.6001, "step": 1878 }, { "epoch": 0.2, "grad_norm": 2.202637528829396, "learning_rate": 9.280888853419476e-06, "loss": 0.6635, "step": 1879 }, { "epoch": 0.2, "grad_norm": 4.4006855761361345, "learning_rate": 9.280008056671947e-06, "loss": 0.694, "step": 1880 }, { "epoch": 0.2, "grad_norm": 2.831327947150695, "learning_rate": 9.279126762685454e-06, "loss": 0.636, "step": 1881 }, { "epoch": 0.2, "grad_norm": 2.6062353877854436, "learning_rate": 9.278244971562382e-06, "loss": 0.8014, "step": 1882 }, { "epoch": 0.2, "grad_norm": 2.870705908895546, "learning_rate": 9.277362683405177e-06, "loss": 0.6577, "step": 1883 }, { "epoch": 0.2, "grad_norm": 2.3116536973353057, "learning_rate": 9.276479898316341e-06, "loss": 0.6627, "step": 1884 }, { "epoch": 0.2, "grad_norm": 2.325166740616653, "learning_rate": 9.275596616398431e-06, "loss": 0.7138, "step": 1885 }, { "epoch": 0.2, "grad_norm": 3.764579252240071, "learning_rate": 9.274712837754068e-06, "loss": 0.6182, "step": 1886 }, { "epoch": 0.2, "grad_norm": 2.610119518405415, "learning_rate": 9.273828562485923e-06, "loss": 0.6369, "step": 1887 }, { "epoch": 0.2, "grad_norm": 2.3604350623510846, "learning_rate": 9.272943790696728e-06, "loss": 0.6644, "step": 1888 }, { "epoch": 0.2, "grad_norm": 2.2652286769748495, "learning_rate": 9.272058522489277e-06, "loss": 0.7804, "step": 1889 }, { "epoch": 0.2, "grad_norm": 2.9511210502310745, "learning_rate": 9.271172757966418e-06, "loss": 0.6344, "step": 1890 }, { "epoch": 0.2, "grad_norm": 2.8169929100268867, "learning_rate": 9.270286497231052e-06, "loss": 0.697, "step": 1891 }, { "epoch": 0.2, "grad_norm": 3.7252767248143597, "learning_rate": 9.269399740386146e-06, "loss": 0.7239, "step": 1892 }, { "epoch": 0.2, "grad_norm": 2.507005417930868, "learning_rate": 9.26851248753472e-06, "loss": 0.6427, "step": 1893 }, { "epoch": 0.2, "grad_norm": 2.8482802966189373, "learning_rate": 9.267624738779853e-06, "loss": 0.6975, "step": 1894 }, { "epoch": 0.2, "grad_norm": 2.8213003925408153, "learning_rate": 9.266736494224677e-06, "loss": 0.6433, "step": 1895 }, { "epoch": 0.2, "grad_norm": 2.436716166307937, "learning_rate": 9.265847753972392e-06, "loss": 0.6206, "step": 1896 }, { "epoch": 0.2, "grad_norm": 3.0067406772274063, "learning_rate": 9.264958518126246e-06, "loss": 0.7178, "step": 1897 }, { "epoch": 0.2, "grad_norm": 2.652398231372856, "learning_rate": 9.264068786789546e-06, "loss": 0.7469, "step": 1898 }, { "epoch": 0.2, "grad_norm": 2.903751071631814, "learning_rate": 9.263178560065664e-06, "loss": 0.6284, "step": 1899 }, { "epoch": 0.2, "grad_norm": 2.233324204587242, "learning_rate": 9.262287838058017e-06, "loss": 0.654, "step": 1900 }, { "epoch": 0.2, "grad_norm": 5.426621816553852, "learning_rate": 9.261396620870092e-06, "loss": 0.6874, "step": 1901 }, { "epoch": 0.2, "grad_norm": 2.9434733409299283, "learning_rate": 9.260504908605425e-06, "loss": 0.6296, "step": 1902 }, { "epoch": 0.2, "grad_norm": 1.370239117918409, "learning_rate": 9.259612701367615e-06, "loss": 0.6566, "step": 1903 }, { "epoch": 0.2, "grad_norm": 2.306513335576572, "learning_rate": 9.258719999260315e-06, "loss": 0.6967, "step": 1904 }, { "epoch": 0.2, "grad_norm": 2.385612710261463, "learning_rate": 9.257826802387234e-06, "loss": 0.6995, "step": 1905 }, { "epoch": 0.2, "grad_norm": 2.8909613530557707, "learning_rate": 9.256933110852145e-06, "loss": 0.6234, "step": 1906 }, { "epoch": 0.2, "grad_norm": 2.956138783299861, "learning_rate": 9.25603892475887e-06, "loss": 0.7436, "step": 1907 }, { "epoch": 0.2, "grad_norm": 3.9130492645144934, "learning_rate": 9.255144244211299e-06, "loss": 0.7194, "step": 1908 }, { "epoch": 0.2, "grad_norm": 2.8433841114866922, "learning_rate": 9.254249069313368e-06, "loss": 0.6685, "step": 1909 }, { "epoch": 0.2, "grad_norm": 3.0403922052487427, "learning_rate": 9.253353400169078e-06, "loss": 0.6988, "step": 1910 }, { "epoch": 0.2, "grad_norm": 2.3083987036017835, "learning_rate": 9.252457236882487e-06, "loss": 0.659, "step": 1911 }, { "epoch": 0.2, "grad_norm": 2.636186973720048, "learning_rate": 9.251560579557705e-06, "loss": 0.6196, "step": 1912 }, { "epoch": 0.2, "grad_norm": 2.3354347387696337, "learning_rate": 9.250663428298906e-06, "loss": 0.7811, "step": 1913 }, { "epoch": 0.2, "grad_norm": 3.3940926366504227, "learning_rate": 9.249765783210316e-06, "loss": 0.6968, "step": 1914 }, { "epoch": 0.2, "grad_norm": 1.9781144300478661, "learning_rate": 9.248867644396224e-06, "loss": 0.7138, "step": 1915 }, { "epoch": 0.2, "grad_norm": 6.221082262034515, "learning_rate": 9.24796901196097e-06, "loss": 0.7758, "step": 1916 }, { "epoch": 0.2, "grad_norm": 2.281625973109882, "learning_rate": 9.247069886008957e-06, "loss": 0.6453, "step": 1917 }, { "epoch": 0.2, "grad_norm": 2.0495291535897127, "learning_rate": 9.24617026664464e-06, "loss": 0.6724, "step": 1918 }, { "epoch": 0.2, "grad_norm": 2.294555365241956, "learning_rate": 9.245270153972537e-06, "loss": 0.6348, "step": 1919 }, { "epoch": 0.2, "grad_norm": 2.6583206062968143, "learning_rate": 9.244369548097218e-06, "loss": 0.7125, "step": 1920 }, { "epoch": 0.2, "grad_norm": 1.2992496345079894, "learning_rate": 9.243468449123316e-06, "loss": 0.6501, "step": 1921 }, { "epoch": 0.2, "grad_norm": 2.294260337838386, "learning_rate": 9.242566857155515e-06, "loss": 0.6783, "step": 1922 }, { "epoch": 0.2, "grad_norm": 2.2677177768740107, "learning_rate": 9.241664772298561e-06, "loss": 0.7314, "step": 1923 }, { "epoch": 0.2, "grad_norm": 2.5676248737182172, "learning_rate": 9.240762194657254e-06, "loss": 0.7354, "step": 1924 }, { "epoch": 0.2, "grad_norm": 2.456584308459011, "learning_rate": 9.239859124336457e-06, "loss": 0.7148, "step": 1925 }, { "epoch": 0.2, "grad_norm": 2.5953088609259134, "learning_rate": 9.23895556144108e-06, "loss": 0.7198, "step": 1926 }, { "epoch": 0.2, "grad_norm": 2.144466625240076, "learning_rate": 9.2380515060761e-06, "loss": 0.6582, "step": 1927 }, { "epoch": 0.2, "grad_norm": 2.821280176023632, "learning_rate": 9.237146958346549e-06, "loss": 0.6836, "step": 1928 }, { "epoch": 0.2, "grad_norm": 2.012627315683036, "learning_rate": 9.236241918357511e-06, "loss": 0.7583, "step": 1929 }, { "epoch": 0.2, "grad_norm": 2.3886139997345825, "learning_rate": 9.235336386214133e-06, "loss": 0.6987, "step": 1930 }, { "epoch": 0.2, "grad_norm": 3.5518418067148287, "learning_rate": 9.234430362021615e-06, "loss": 0.7171, "step": 1931 }, { "epoch": 0.2, "grad_norm": 2.0640715116734265, "learning_rate": 9.233523845885221e-06, "loss": 0.5934, "step": 1932 }, { "epoch": 0.2, "grad_norm": 2.5462408850965303, "learning_rate": 9.232616837910263e-06, "loss": 0.6674, "step": 1933 }, { "epoch": 0.2, "grad_norm": 2.1027514803586316, "learning_rate": 9.231709338202117e-06, "loss": 0.7143, "step": 1934 }, { "epoch": 0.2, "grad_norm": 1.981837777190229, "learning_rate": 9.230801346866212e-06, "loss": 0.6824, "step": 1935 }, { "epoch": 0.2, "grad_norm": 3.0072257637585995, "learning_rate": 9.229892864008037e-06, "loss": 0.6737, "step": 1936 }, { "epoch": 0.2, "grad_norm": 2.404301234252323, "learning_rate": 9.228983889733135e-06, "loss": 0.72, "step": 1937 }, { "epoch": 0.2, "grad_norm": 2.1099503574763405, "learning_rate": 9.228074424147111e-06, "loss": 0.7708, "step": 1938 }, { "epoch": 0.2, "grad_norm": 2.1564706992929987, "learning_rate": 9.227164467355621e-06, "loss": 0.7047, "step": 1939 }, { "epoch": 0.2, "grad_norm": 2.717145887514134, "learning_rate": 9.226254019464384e-06, "loss": 0.7386, "step": 1940 }, { "epoch": 0.2, "grad_norm": 3.434512344610932, "learning_rate": 9.225343080579171e-06, "loss": 0.7586, "step": 1941 }, { "epoch": 0.2, "grad_norm": 2.516975033266961, "learning_rate": 9.224431650805814e-06, "loss": 0.721, "step": 1942 }, { "epoch": 0.2, "grad_norm": 2.0749482225336697, "learning_rate": 9.223519730250198e-06, "loss": 0.6007, "step": 1943 }, { "epoch": 0.2, "grad_norm": 2.5096562394219775, "learning_rate": 9.222607319018271e-06, "loss": 0.7729, "step": 1944 }, { "epoch": 0.2, "grad_norm": 1.9924376463945759, "learning_rate": 9.221694417216031e-06, "loss": 0.7065, "step": 1945 }, { "epoch": 0.2, "grad_norm": 3.4082767933565594, "learning_rate": 9.220781024949536e-06, "loss": 0.7151, "step": 1946 }, { "epoch": 0.2, "grad_norm": 2.157525030220561, "learning_rate": 9.219867142324904e-06, "loss": 0.6323, "step": 1947 }, { "epoch": 0.2, "grad_norm": 2.809345903838342, "learning_rate": 9.218952769448307e-06, "loss": 0.6643, "step": 1948 }, { "epoch": 0.21, "grad_norm": 2.1126315310721426, "learning_rate": 9.218037906425971e-06, "loss": 0.66, "step": 1949 }, { "epoch": 0.21, "grad_norm": 2.106123096050007, "learning_rate": 9.217122553364184e-06, "loss": 0.779, "step": 1950 }, { "epoch": 0.21, "grad_norm": 2.4264702932506927, "learning_rate": 9.21620671036929e-06, "loss": 0.7279, "step": 1951 }, { "epoch": 0.21, "grad_norm": 8.387568989843238, "learning_rate": 9.215290377547688e-06, "loss": 0.576, "step": 1952 }, { "epoch": 0.21, "grad_norm": 2.721569909570348, "learning_rate": 9.214373555005834e-06, "loss": 0.6169, "step": 1953 }, { "epoch": 0.21, "grad_norm": 2.5751826747005553, "learning_rate": 9.213456242850245e-06, "loss": 0.7445, "step": 1954 }, { "epoch": 0.21, "grad_norm": 5.238179252301112, "learning_rate": 9.21253844118749e-06, "loss": 0.6893, "step": 1955 }, { "epoch": 0.21, "grad_norm": 2.230727560495232, "learning_rate": 9.211620150124192e-06, "loss": 0.7169, "step": 1956 }, { "epoch": 0.21, "grad_norm": 2.5021330652535645, "learning_rate": 9.210701369767043e-06, "loss": 0.7869, "step": 1957 }, { "epoch": 0.21, "grad_norm": 2.7468363192392427, "learning_rate": 9.20978210022278e-06, "loss": 0.7359, "step": 1958 }, { "epoch": 0.21, "grad_norm": 2.0826919441428458, "learning_rate": 9.208862341598201e-06, "loss": 0.7324, "step": 1959 }, { "epoch": 0.21, "grad_norm": 2.7555569094425993, "learning_rate": 9.207942094000163e-06, "loss": 0.7457, "step": 1960 }, { "epoch": 0.21, "grad_norm": 3.1278899651708088, "learning_rate": 9.207021357535576e-06, "loss": 0.7093, "step": 1961 }, { "epoch": 0.21, "grad_norm": 2.304923508024911, "learning_rate": 9.206100132311408e-06, "loss": 0.6779, "step": 1962 }, { "epoch": 0.21, "grad_norm": 2.3962706909638007, "learning_rate": 9.205178418434687e-06, "loss": 0.7315, "step": 1963 }, { "epoch": 0.21, "grad_norm": 2.3044315673666205, "learning_rate": 9.204256216012493e-06, "loss": 0.7382, "step": 1964 }, { "epoch": 0.21, "grad_norm": 2.4732648879760863, "learning_rate": 9.203333525151964e-06, "loss": 0.7135, "step": 1965 }, { "epoch": 0.21, "grad_norm": 2.206789161392687, "learning_rate": 9.202410345960298e-06, "loss": 0.7343, "step": 1966 }, { "epoch": 0.21, "grad_norm": 2.4651985133702827, "learning_rate": 9.201486678544745e-06, "loss": 0.718, "step": 1967 }, { "epoch": 0.21, "grad_norm": 2.2568110947549407, "learning_rate": 9.200562523012615e-06, "loss": 0.6678, "step": 1968 }, { "epoch": 0.21, "grad_norm": 2.4466689065325182, "learning_rate": 9.199637879471272e-06, "loss": 0.7278, "step": 1969 }, { "epoch": 0.21, "grad_norm": 2.7412446183420593, "learning_rate": 9.198712748028142e-06, "loss": 0.6746, "step": 1970 }, { "epoch": 0.21, "grad_norm": 2.090127601569364, "learning_rate": 9.197787128790702e-06, "loss": 0.7454, "step": 1971 }, { "epoch": 0.21, "grad_norm": 2.2493962206295457, "learning_rate": 9.19686102186649e-06, "loss": 0.6949, "step": 1972 }, { "epoch": 0.21, "grad_norm": 2.4780855364247603, "learning_rate": 9.195934427363093e-06, "loss": 0.7695, "step": 1973 }, { "epoch": 0.21, "grad_norm": 2.720134076492023, "learning_rate": 9.195007345388165e-06, "loss": 0.7939, "step": 1974 }, { "epoch": 0.21, "grad_norm": 2.6619081847464705, "learning_rate": 9.19407977604941e-06, "loss": 0.7645, "step": 1975 }, { "epoch": 0.21, "grad_norm": 2.1669764589008986, "learning_rate": 9.193151719454591e-06, "loss": 0.658, "step": 1976 }, { "epoch": 0.21, "grad_norm": 2.7606276779843375, "learning_rate": 9.192223175711526e-06, "loss": 0.641, "step": 1977 }, { "epoch": 0.21, "grad_norm": 2.5503781978393776, "learning_rate": 9.191294144928091e-06, "loss": 0.7459, "step": 1978 }, { "epoch": 0.21, "grad_norm": 2.2113194458441345, "learning_rate": 9.190364627212216e-06, "loss": 0.6788, "step": 1979 }, { "epoch": 0.21, "grad_norm": 3.178914025765916, "learning_rate": 9.189434622671894e-06, "loss": 0.6719, "step": 1980 }, { "epoch": 0.21, "grad_norm": 2.684741530038339, "learning_rate": 9.188504131415167e-06, "loss": 0.6779, "step": 1981 }, { "epoch": 0.21, "grad_norm": 2.245253383160478, "learning_rate": 9.187573153550139e-06, "loss": 0.6422, "step": 1982 }, { "epoch": 0.21, "grad_norm": 2.5644319947882055, "learning_rate": 9.186641689184966e-06, "loss": 0.7558, "step": 1983 }, { "epoch": 0.21, "grad_norm": 2.6626949713612134, "learning_rate": 9.185709738427864e-06, "loss": 0.731, "step": 1984 }, { "epoch": 0.21, "grad_norm": 5.227738894627522, "learning_rate": 9.184777301387104e-06, "loss": 0.7663, "step": 1985 }, { "epoch": 0.21, "grad_norm": 2.3446022774136024, "learning_rate": 9.183844378171016e-06, "loss": 0.6564, "step": 1986 }, { "epoch": 0.21, "grad_norm": 2.432195543728687, "learning_rate": 9.182910968887982e-06, "loss": 0.7728, "step": 1987 }, { "epoch": 0.21, "grad_norm": 2.4823577607004146, "learning_rate": 9.181977073646442e-06, "loss": 0.6328, "step": 1988 }, { "epoch": 0.21, "grad_norm": 2.177220583837391, "learning_rate": 9.181042692554894e-06, "loss": 0.6911, "step": 1989 }, { "epoch": 0.21, "grad_norm": 2.483575743653078, "learning_rate": 9.180107825721891e-06, "loss": 0.7007, "step": 1990 }, { "epoch": 0.21, "grad_norm": 2.109985363397772, "learning_rate": 9.179172473256046e-06, "loss": 0.688, "step": 1991 }, { "epoch": 0.21, "grad_norm": 3.0644562546243423, "learning_rate": 9.178236635266025e-06, "loss": 0.6994, "step": 1992 }, { "epoch": 0.21, "grad_norm": 2.1056210922690526, "learning_rate": 9.17730031186055e-06, "loss": 0.7325, "step": 1993 }, { "epoch": 0.21, "grad_norm": 2.249585098272686, "learning_rate": 9.176363503148397e-06, "loss": 0.698, "step": 1994 }, { "epoch": 0.21, "grad_norm": 2.57223165623908, "learning_rate": 9.175426209238407e-06, "loss": 0.6476, "step": 1995 }, { "epoch": 0.21, "grad_norm": 3.6183458052615665, "learning_rate": 9.17448843023947e-06, "loss": 0.8366, "step": 1996 }, { "epoch": 0.21, "grad_norm": 2.072274750411721, "learning_rate": 9.173550166260533e-06, "loss": 0.7383, "step": 1997 }, { "epoch": 0.21, "grad_norm": 2.3127023094424963, "learning_rate": 9.172611417410604e-06, "loss": 0.6212, "step": 1998 }, { "epoch": 0.21, "grad_norm": 2.3387267206899556, "learning_rate": 9.17167218379874e-06, "loss": 0.7201, "step": 1999 }, { "epoch": 0.21, "grad_norm": 2.311361411354398, "learning_rate": 9.170732465534062e-06, "loss": 0.7375, "step": 2000 }, { "epoch": 0.21, "grad_norm": 2.2786653146755618, "learning_rate": 9.169792262725744e-06, "loss": 0.7097, "step": 2001 }, { "epoch": 0.21, "grad_norm": 2.107809653312181, "learning_rate": 9.168851575483013e-06, "loss": 0.6939, "step": 2002 }, { "epoch": 0.21, "grad_norm": 2.057099189874853, "learning_rate": 9.167910403915157e-06, "loss": 0.7254, "step": 2003 }, { "epoch": 0.21, "grad_norm": 2.445940571938724, "learning_rate": 9.16696874813152e-06, "loss": 0.6684, "step": 2004 }, { "epoch": 0.21, "grad_norm": 2.4846164924547747, "learning_rate": 9.166026608241496e-06, "loss": 0.6799, "step": 2005 }, { "epoch": 0.21, "grad_norm": 2.2002524931768814, "learning_rate": 9.165083984354545e-06, "loss": 0.7163, "step": 2006 }, { "epoch": 0.21, "grad_norm": 3.2128685263981924, "learning_rate": 9.164140876580179e-06, "loss": 0.6353, "step": 2007 }, { "epoch": 0.21, "grad_norm": 2.487800260585636, "learning_rate": 9.16319728502796e-06, "loss": 0.7001, "step": 2008 }, { "epoch": 0.21, "grad_norm": 2.3851629284461553, "learning_rate": 9.162253209807517e-06, "loss": 0.6288, "step": 2009 }, { "epoch": 0.21, "grad_norm": 2.4232158597086744, "learning_rate": 9.161308651028527e-06, "loss": 0.7754, "step": 2010 }, { "epoch": 0.21, "grad_norm": 2.154013851517835, "learning_rate": 9.160363608800728e-06, "loss": 0.7117, "step": 2011 }, { "epoch": 0.21, "grad_norm": 3.896276107997591, "learning_rate": 9.159418083233911e-06, "loss": 0.6859, "step": 2012 }, { "epoch": 0.21, "grad_norm": 2.44486977130411, "learning_rate": 9.158472074437923e-06, "loss": 0.6467, "step": 2013 }, { "epoch": 0.21, "grad_norm": 2.0090988720613, "learning_rate": 9.157525582522673e-06, "loss": 0.6484, "step": 2014 }, { "epoch": 0.21, "grad_norm": 2.34089707383189, "learning_rate": 9.156578607598118e-06, "loss": 0.7049, "step": 2015 }, { "epoch": 0.21, "grad_norm": 1.2880641379260889, "learning_rate": 9.155631149774276e-06, "loss": 0.6175, "step": 2016 }, { "epoch": 0.21, "grad_norm": 2.229526829371313, "learning_rate": 9.15468320916122e-06, "loss": 0.6662, "step": 2017 }, { "epoch": 0.21, "grad_norm": 2.156990061837107, "learning_rate": 9.153734785869077e-06, "loss": 0.7708, "step": 2018 }, { "epoch": 0.21, "grad_norm": 1.0523799529588602, "learning_rate": 9.152785880008035e-06, "loss": 0.6244, "step": 2019 }, { "epoch": 0.21, "grad_norm": 7.993605252322678, "learning_rate": 9.151836491688334e-06, "loss": 0.6563, "step": 2020 }, { "epoch": 0.21, "grad_norm": 2.3078838508848922, "learning_rate": 9.15088662102027e-06, "loss": 0.6904, "step": 2021 }, { "epoch": 0.21, "grad_norm": 3.063801331956025, "learning_rate": 9.149936268114199e-06, "loss": 0.676, "step": 2022 }, { "epoch": 0.21, "grad_norm": 2.295192468843143, "learning_rate": 9.148985433080528e-06, "loss": 0.6834, "step": 2023 }, { "epoch": 0.21, "grad_norm": 2.0564302867231383, "learning_rate": 9.148034116029723e-06, "loss": 0.704, "step": 2024 }, { "epoch": 0.21, "grad_norm": 2.5228740207896085, "learning_rate": 9.147082317072305e-06, "loss": 0.6683, "step": 2025 }, { "epoch": 0.21, "grad_norm": 2.7052297386153015, "learning_rate": 9.146130036318853e-06, "loss": 0.7911, "step": 2026 }, { "epoch": 0.21, "grad_norm": 2.601417655514352, "learning_rate": 9.145177273879995e-06, "loss": 0.6382, "step": 2027 }, { "epoch": 0.21, "grad_norm": 1.9751425113181098, "learning_rate": 9.144224029866426e-06, "loss": 0.6693, "step": 2028 }, { "epoch": 0.21, "grad_norm": 2.5895232014896266, "learning_rate": 9.14327030438889e-06, "loss": 0.6712, "step": 2029 }, { "epoch": 0.21, "grad_norm": 3.747457510645605, "learning_rate": 9.142316097558185e-06, "loss": 0.6682, "step": 2030 }, { "epoch": 0.21, "grad_norm": 2.220857892024144, "learning_rate": 9.14136140948517e-06, "loss": 0.7372, "step": 2031 }, { "epoch": 0.21, "grad_norm": 2.3266528015138386, "learning_rate": 9.14040624028076e-06, "loss": 0.7786, "step": 2032 }, { "epoch": 0.21, "grad_norm": 2.348078882997178, "learning_rate": 9.13945059005592e-06, "loss": 0.7961, "step": 2033 }, { "epoch": 0.21, "grad_norm": 2.81669382916974, "learning_rate": 9.138494458921676e-06, "loss": 0.6599, "step": 2034 }, { "epoch": 0.21, "grad_norm": 2.170306767141564, "learning_rate": 9.137537846989111e-06, "loss": 0.6299, "step": 2035 }, { "epoch": 0.21, "grad_norm": 2.5696842899169385, "learning_rate": 9.136580754369357e-06, "loss": 0.6784, "step": 2036 }, { "epoch": 0.21, "grad_norm": 2.3739145799623045, "learning_rate": 9.135623181173609e-06, "loss": 0.7959, "step": 2037 }, { "epoch": 0.21, "grad_norm": 2.3501976834313174, "learning_rate": 9.134665127513116e-06, "loss": 0.6405, "step": 2038 }, { "epoch": 0.21, "grad_norm": 2.7821580766766676, "learning_rate": 9.133706593499181e-06, "loss": 0.7582, "step": 2039 }, { "epoch": 0.21, "grad_norm": 2.0217636722769323, "learning_rate": 9.132747579243163e-06, "loss": 0.7176, "step": 2040 }, { "epoch": 0.21, "grad_norm": 2.149948219126222, "learning_rate": 9.131788084856477e-06, "loss": 0.7303, "step": 2041 }, { "epoch": 0.21, "grad_norm": 2.279544855221473, "learning_rate": 9.130828110450593e-06, "loss": 0.6588, "step": 2042 }, { "epoch": 0.21, "grad_norm": 2.49303969504543, "learning_rate": 9.129867656137044e-06, "loss": 0.7645, "step": 2043 }, { "epoch": 0.22, "grad_norm": 2.143115712697355, "learning_rate": 9.128906722027406e-06, "loss": 0.6674, "step": 2044 }, { "epoch": 0.22, "grad_norm": 2.0564042062634225, "learning_rate": 9.127945308233322e-06, "loss": 0.7074, "step": 2045 }, { "epoch": 0.22, "grad_norm": 2.147223116012419, "learning_rate": 9.126983414866486e-06, "loss": 0.7134, "step": 2046 }, { "epoch": 0.22, "grad_norm": 2.272635221500622, "learning_rate": 9.126021042038644e-06, "loss": 0.7515, "step": 2047 }, { "epoch": 0.22, "grad_norm": 2.7298434180359954, "learning_rate": 9.125058189861607e-06, "loss": 0.7381, "step": 2048 }, { "epoch": 0.22, "grad_norm": 3.0486731591766913, "learning_rate": 9.124094858447233e-06, "loss": 0.6821, "step": 2049 }, { "epoch": 0.22, "grad_norm": 4.6588169832809205, "learning_rate": 9.123131047907439e-06, "loss": 0.8302, "step": 2050 }, { "epoch": 0.22, "grad_norm": 2.07023150767486, "learning_rate": 9.122166758354199e-06, "loss": 0.6876, "step": 2051 }, { "epoch": 0.22, "grad_norm": 2.7807238495464133, "learning_rate": 9.12120198989954e-06, "loss": 0.5919, "step": 2052 }, { "epoch": 0.22, "grad_norm": 2.1579164615666837, "learning_rate": 9.120236742655548e-06, "loss": 0.7351, "step": 2053 }, { "epoch": 0.22, "grad_norm": 2.5527053709438214, "learning_rate": 9.11927101673436e-06, "loss": 0.6993, "step": 2054 }, { "epoch": 0.22, "grad_norm": 2.261502991172867, "learning_rate": 9.118304812248177e-06, "loss": 0.6502, "step": 2055 }, { "epoch": 0.22, "grad_norm": 3.242461395330852, "learning_rate": 9.117338129309243e-06, "loss": 0.6605, "step": 2056 }, { "epoch": 0.22, "grad_norm": 1.914414634765642, "learning_rate": 9.116370968029867e-06, "loss": 0.6981, "step": 2057 }, { "epoch": 0.22, "grad_norm": 2.344979314174602, "learning_rate": 9.115403328522412e-06, "loss": 0.7227, "step": 2058 }, { "epoch": 0.22, "grad_norm": 2.191238107674176, "learning_rate": 9.114435210899296e-06, "loss": 0.6631, "step": 2059 }, { "epoch": 0.22, "grad_norm": 2.4602672291177528, "learning_rate": 9.113466615272988e-06, "loss": 0.7243, "step": 2060 }, { "epoch": 0.22, "grad_norm": 1.44867772558428, "learning_rate": 9.11249754175602e-06, "loss": 0.6982, "step": 2061 }, { "epoch": 0.22, "grad_norm": 2.651963613477152, "learning_rate": 9.111527990460977e-06, "loss": 0.7013, "step": 2062 }, { "epoch": 0.22, "grad_norm": 2.5214633687617325, "learning_rate": 9.110557961500496e-06, "loss": 0.6273, "step": 2063 }, { "epoch": 0.22, "grad_norm": 2.1745678771179233, "learning_rate": 9.109587454987274e-06, "loss": 0.6482, "step": 2064 }, { "epoch": 0.22, "grad_norm": 2.489922591200943, "learning_rate": 9.108616471034061e-06, "loss": 0.6609, "step": 2065 }, { "epoch": 0.22, "grad_norm": 2.0973955866357814, "learning_rate": 9.107645009753663e-06, "loss": 0.6722, "step": 2066 }, { "epoch": 0.22, "grad_norm": 1.9996934192215, "learning_rate": 9.106673071258942e-06, "loss": 0.7301, "step": 2067 }, { "epoch": 0.22, "grad_norm": 2.494129615110545, "learning_rate": 9.105700655662815e-06, "loss": 0.6926, "step": 2068 }, { "epoch": 0.22, "grad_norm": 11.264357578268509, "learning_rate": 9.104727763078253e-06, "loss": 0.7061, "step": 2069 }, { "epoch": 0.22, "grad_norm": 2.8039561431694544, "learning_rate": 9.103754393618287e-06, "loss": 0.6591, "step": 2070 }, { "epoch": 0.22, "grad_norm": 2.2260708554024737, "learning_rate": 9.102780547395997e-06, "loss": 0.7733, "step": 2071 }, { "epoch": 0.22, "grad_norm": 2.6838150314170997, "learning_rate": 9.101806224524524e-06, "loss": 0.7586, "step": 2072 }, { "epoch": 0.22, "grad_norm": 3.5259424740449896, "learning_rate": 9.10083142511706e-06, "loss": 0.6863, "step": 2073 }, { "epoch": 0.22, "grad_norm": 1.894372573153219, "learning_rate": 9.099856149286857e-06, "loss": 0.6572, "step": 2074 }, { "epoch": 0.22, "grad_norm": 3.246841628686618, "learning_rate": 9.098880397147215e-06, "loss": 0.6908, "step": 2075 }, { "epoch": 0.22, "grad_norm": 3.039572983541654, "learning_rate": 9.0979041688115e-06, "loss": 0.6255, "step": 2076 }, { "epoch": 0.22, "grad_norm": 1.5644692341003028, "learning_rate": 9.096927464393123e-06, "loss": 0.6359, "step": 2077 }, { "epoch": 0.22, "grad_norm": 2.305502204546247, "learning_rate": 9.095950284005557e-06, "loss": 0.7748, "step": 2078 }, { "epoch": 0.22, "grad_norm": 2.2388140399919334, "learning_rate": 9.094972627762326e-06, "loss": 0.7147, "step": 2079 }, { "epoch": 0.22, "grad_norm": 2.137643134632365, "learning_rate": 9.093994495777014e-06, "loss": 0.7239, "step": 2080 }, { "epoch": 0.22, "grad_norm": 2.166788507712685, "learning_rate": 9.093015888163255e-06, "loss": 0.7105, "step": 2081 }, { "epoch": 0.22, "grad_norm": 2.811849358016705, "learning_rate": 9.09203680503474e-06, "loss": 0.6821, "step": 2082 }, { "epoch": 0.22, "grad_norm": 2.196127603607177, "learning_rate": 9.091057246505221e-06, "loss": 0.7968, "step": 2083 }, { "epoch": 0.22, "grad_norm": 9.922784029473444, "learning_rate": 9.090077212688496e-06, "loss": 0.7587, "step": 2084 }, { "epoch": 0.22, "grad_norm": 2.039930805628341, "learning_rate": 9.089096703698423e-06, "loss": 0.6852, "step": 2085 }, { "epoch": 0.22, "grad_norm": 30.95824912540301, "learning_rate": 9.088115719648917e-06, "loss": 0.7022, "step": 2086 }, { "epoch": 0.22, "grad_norm": 2.181623054651788, "learning_rate": 9.087134260653943e-06, "loss": 0.6246, "step": 2087 }, { "epoch": 0.22, "grad_norm": 1.9997909852846925, "learning_rate": 9.086152326827527e-06, "loss": 0.6697, "step": 2088 }, { "epoch": 0.22, "grad_norm": 2.395290205085553, "learning_rate": 9.085169918283744e-06, "loss": 0.6567, "step": 2089 }, { "epoch": 0.22, "grad_norm": 2.836594783866522, "learning_rate": 9.084187035136727e-06, "loss": 0.7324, "step": 2090 }, { "epoch": 0.22, "grad_norm": 2.3072720294313496, "learning_rate": 9.08320367750067e-06, "loss": 0.6529, "step": 2091 }, { "epoch": 0.22, "grad_norm": 1.8931936091262302, "learning_rate": 9.08221984548981e-06, "loss": 0.7267, "step": 2092 }, { "epoch": 0.22, "grad_norm": 2.4682881700848234, "learning_rate": 9.081235539218451e-06, "loss": 0.6981, "step": 2093 }, { "epoch": 0.22, "grad_norm": 2.1040997554296026, "learning_rate": 9.080250758800944e-06, "loss": 0.7395, "step": 2094 }, { "epoch": 0.22, "grad_norm": 2.148229839051349, "learning_rate": 9.0792655043517e-06, "loss": 0.6395, "step": 2095 }, { "epoch": 0.22, "grad_norm": 2.008482369488449, "learning_rate": 9.078279775985179e-06, "loss": 0.665, "step": 2096 }, { "epoch": 0.22, "grad_norm": 2.414670215539318, "learning_rate": 9.077293573815905e-06, "loss": 0.7037, "step": 2097 }, { "epoch": 0.22, "grad_norm": 2.6045420628577474, "learning_rate": 9.07630689795845e-06, "loss": 0.6876, "step": 2098 }, { "epoch": 0.22, "grad_norm": 3.089544444738824, "learning_rate": 9.075319748527442e-06, "loss": 0.6634, "step": 2099 }, { "epoch": 0.22, "grad_norm": 7.068471813524876, "learning_rate": 9.074332125637564e-06, "loss": 0.7743, "step": 2100 }, { "epoch": 0.22, "grad_norm": 10.390556215843803, "learning_rate": 9.073344029403562e-06, "loss": 0.6952, "step": 2101 }, { "epoch": 0.22, "grad_norm": 2.5241030806585005, "learning_rate": 9.072355459940222e-06, "loss": 0.7437, "step": 2102 }, { "epoch": 0.22, "grad_norm": 3.752003680927352, "learning_rate": 9.071366417362398e-06, "loss": 0.7585, "step": 2103 }, { "epoch": 0.22, "grad_norm": 2.5306889032364492, "learning_rate": 9.070376901784992e-06, "loss": 0.7722, "step": 2104 }, { "epoch": 0.22, "grad_norm": 2.5152078389519015, "learning_rate": 9.069386913322964e-06, "loss": 0.7526, "step": 2105 }, { "epoch": 0.22, "grad_norm": 2.300437974231803, "learning_rate": 9.068396452091328e-06, "loss": 0.7412, "step": 2106 }, { "epoch": 0.22, "grad_norm": 2.966239911106796, "learning_rate": 9.067405518205153e-06, "loss": 0.6955, "step": 2107 }, { "epoch": 0.22, "grad_norm": 2.374383053660284, "learning_rate": 9.066414111779562e-06, "loss": 0.7065, "step": 2108 }, { "epoch": 0.22, "grad_norm": 1.95341575255716, "learning_rate": 9.065422232929735e-06, "loss": 0.6911, "step": 2109 }, { "epoch": 0.22, "grad_norm": 1.38959249885153, "learning_rate": 9.064429881770905e-06, "loss": 0.5927, "step": 2110 }, { "epoch": 0.22, "grad_norm": 2.354874954642544, "learning_rate": 9.063437058418361e-06, "loss": 0.6709, "step": 2111 }, { "epoch": 0.22, "grad_norm": 3.1182596136285308, "learning_rate": 9.062443762987442e-06, "loss": 0.7256, "step": 2112 }, { "epoch": 0.22, "grad_norm": 2.3451511992294085, "learning_rate": 9.061449995593554e-06, "loss": 0.6163, "step": 2113 }, { "epoch": 0.22, "grad_norm": 2.6148189214617816, "learning_rate": 9.060455756352144e-06, "loss": 0.7164, "step": 2114 }, { "epoch": 0.22, "grad_norm": 2.984293021281382, "learning_rate": 9.059461045378723e-06, "loss": 0.5894, "step": 2115 }, { "epoch": 0.22, "grad_norm": 2.254747474890875, "learning_rate": 9.058465862788852e-06, "loss": 0.6708, "step": 2116 }, { "epoch": 0.22, "grad_norm": 2.363609404179215, "learning_rate": 9.05747020869815e-06, "loss": 0.6888, "step": 2117 }, { "epoch": 0.22, "grad_norm": 2.37996717795513, "learning_rate": 9.056474083222286e-06, "loss": 0.5809, "step": 2118 }, { "epoch": 0.22, "grad_norm": 2.7306453255932848, "learning_rate": 9.055477486476992e-06, "loss": 0.7184, "step": 2119 }, { "epoch": 0.22, "grad_norm": 2.3108089867660713, "learning_rate": 9.054480418578044e-06, "loss": 0.6179, "step": 2120 }, { "epoch": 0.22, "grad_norm": 2.5666873554391665, "learning_rate": 9.053482879641283e-06, "loss": 0.701, "step": 2121 }, { "epoch": 0.22, "grad_norm": 2.492602226878482, "learning_rate": 9.052484869782597e-06, "loss": 0.6191, "step": 2122 }, { "epoch": 0.22, "grad_norm": 9.660745248304112, "learning_rate": 9.051486389117933e-06, "loss": 0.6498, "step": 2123 }, { "epoch": 0.22, "grad_norm": 2.593579048719051, "learning_rate": 9.050487437763294e-06, "loss": 0.7411, "step": 2124 }, { "epoch": 0.22, "grad_norm": 2.193130270383422, "learning_rate": 9.049488015834731e-06, "loss": 0.6509, "step": 2125 }, { "epoch": 0.22, "grad_norm": 2.286545999733999, "learning_rate": 9.048488123448357e-06, "loss": 0.7333, "step": 2126 }, { "epoch": 0.22, "grad_norm": 2.7285901550002145, "learning_rate": 9.047487760720338e-06, "loss": 0.755, "step": 2127 }, { "epoch": 0.22, "grad_norm": 2.5323414148927963, "learning_rate": 9.046486927766889e-06, "loss": 0.6686, "step": 2128 }, { "epoch": 0.22, "grad_norm": 1.2528013077555502, "learning_rate": 9.045485624704287e-06, "loss": 0.6362, "step": 2129 }, { "epoch": 0.22, "grad_norm": 2.170043281237849, "learning_rate": 9.044483851648858e-06, "loss": 0.6531, "step": 2130 }, { "epoch": 0.22, "grad_norm": 2.537814352100077, "learning_rate": 9.043481608716987e-06, "loss": 0.7327, "step": 2131 }, { "epoch": 0.22, "grad_norm": 2.1536774455658683, "learning_rate": 9.042478896025113e-06, "loss": 0.737, "step": 2132 }, { "epoch": 0.22, "grad_norm": 2.4395488467277504, "learning_rate": 9.041475713689725e-06, "loss": 0.7198, "step": 2133 }, { "epoch": 0.22, "grad_norm": 3.612160402864214, "learning_rate": 9.04047206182737e-06, "loss": 0.7047, "step": 2134 }, { "epoch": 0.22, "grad_norm": 2.361860087108175, "learning_rate": 9.039467940554651e-06, "loss": 0.8051, "step": 2135 }, { "epoch": 0.22, "grad_norm": 1.1118384611691083, "learning_rate": 9.038463349988226e-06, "loss": 0.6651, "step": 2136 }, { "epoch": 0.22, "grad_norm": 2.1279878773488656, "learning_rate": 9.0374582902448e-06, "loss": 0.7212, "step": 2137 }, { "epoch": 0.22, "grad_norm": 1.984425840534222, "learning_rate": 9.036452761441143e-06, "loss": 0.6806, "step": 2138 }, { "epoch": 0.23, "grad_norm": 2.2564393133187943, "learning_rate": 9.035446763694073e-06, "loss": 0.6898, "step": 2139 }, { "epoch": 0.23, "grad_norm": 3.2675730698127854, "learning_rate": 9.034440297120461e-06, "loss": 0.6907, "step": 2140 }, { "epoch": 0.23, "grad_norm": 3.1457055260967333, "learning_rate": 9.03343336183724e-06, "loss": 0.6275, "step": 2141 }, { "epoch": 0.23, "grad_norm": 2.699650466296311, "learning_rate": 9.032425957961388e-06, "loss": 0.7142, "step": 2142 }, { "epoch": 0.23, "grad_norm": 2.361799060850231, "learning_rate": 9.031418085609946e-06, "loss": 0.7878, "step": 2143 }, { "epoch": 0.23, "grad_norm": 3.0889387826729044, "learning_rate": 9.030409744900005e-06, "loss": 0.7566, "step": 2144 }, { "epoch": 0.23, "grad_norm": 2.6149850709834492, "learning_rate": 9.029400935948712e-06, "loss": 0.6909, "step": 2145 }, { "epoch": 0.23, "grad_norm": 2.102880043260286, "learning_rate": 9.028391658873264e-06, "loss": 0.6655, "step": 2146 }, { "epoch": 0.23, "grad_norm": 2.6572624912551666, "learning_rate": 9.027381913790916e-06, "loss": 0.627, "step": 2147 }, { "epoch": 0.23, "grad_norm": 2.6045372231550585, "learning_rate": 9.026371700818982e-06, "loss": 0.779, "step": 2148 }, { "epoch": 0.23, "grad_norm": 2.662458429721062, "learning_rate": 9.025361020074823e-06, "loss": 0.7057, "step": 2149 }, { "epoch": 0.23, "grad_norm": 1.8868287840968536, "learning_rate": 9.024349871675855e-06, "loss": 0.7235, "step": 2150 }, { "epoch": 0.23, "grad_norm": 2.2117676794317105, "learning_rate": 9.023338255739553e-06, "loss": 0.7088, "step": 2151 }, { "epoch": 0.23, "grad_norm": 2.2014960756353057, "learning_rate": 9.022326172383444e-06, "loss": 0.7346, "step": 2152 }, { "epoch": 0.23, "grad_norm": 2.14684126625403, "learning_rate": 9.021313621725106e-06, "loss": 0.734, "step": 2153 }, { "epoch": 0.23, "grad_norm": 2.558372722059645, "learning_rate": 9.020300603882178e-06, "loss": 0.6929, "step": 2154 }, { "epoch": 0.23, "grad_norm": 2.442785125856375, "learning_rate": 9.019287118972343e-06, "loss": 0.7285, "step": 2155 }, { "epoch": 0.23, "grad_norm": 2.5672631424976617, "learning_rate": 9.018273167113354e-06, "loss": 0.7115, "step": 2156 }, { "epoch": 0.23, "grad_norm": 3.9219116603164372, "learning_rate": 9.017258748423e-06, "loss": 0.7292, "step": 2157 }, { "epoch": 0.23, "grad_norm": 3.1655630933315155, "learning_rate": 9.01624386301914e-06, "loss": 0.6373, "step": 2158 }, { "epoch": 0.23, "grad_norm": 2.9959938750864805, "learning_rate": 9.015228511019678e-06, "loss": 0.6972, "step": 2159 }, { "epoch": 0.23, "grad_norm": 2.451365705730853, "learning_rate": 9.014212692542573e-06, "loss": 0.7195, "step": 2160 }, { "epoch": 0.23, "grad_norm": 2.277660307422319, "learning_rate": 9.013196407705842e-06, "loss": 0.704, "step": 2161 }, { "epoch": 0.23, "grad_norm": 3.015544240787718, "learning_rate": 9.012179656627553e-06, "loss": 0.6184, "step": 2162 }, { "epoch": 0.23, "grad_norm": 2.046162331434754, "learning_rate": 9.011162439425831e-06, "loss": 0.7308, "step": 2163 }, { "epoch": 0.23, "grad_norm": 2.62859526794707, "learning_rate": 9.010144756218851e-06, "loss": 0.6979, "step": 2164 }, { "epoch": 0.23, "grad_norm": 2.026397922330441, "learning_rate": 9.009126607124844e-06, "loss": 0.6918, "step": 2165 }, { "epoch": 0.23, "grad_norm": 2.3621280368628996, "learning_rate": 9.008107992262098e-06, "loss": 0.7819, "step": 2166 }, { "epoch": 0.23, "grad_norm": 2.176552442201047, "learning_rate": 9.00708891174895e-06, "loss": 0.7668, "step": 2167 }, { "epoch": 0.23, "grad_norm": 3.2070738864736765, "learning_rate": 9.006069365703799e-06, "loss": 0.6924, "step": 2168 }, { "epoch": 0.23, "grad_norm": 2.168031678031707, "learning_rate": 9.005049354245088e-06, "loss": 0.6743, "step": 2169 }, { "epoch": 0.23, "grad_norm": 2.8808305867742656, "learning_rate": 9.004028877491319e-06, "loss": 0.631, "step": 2170 }, { "epoch": 0.23, "grad_norm": 2.295861751174668, "learning_rate": 9.003007935561052e-06, "loss": 0.6949, "step": 2171 }, { "epoch": 0.23, "grad_norm": 2.49760284826448, "learning_rate": 9.001986528572892e-06, "loss": 0.743, "step": 2172 }, { "epoch": 0.23, "grad_norm": 7.296535119642908, "learning_rate": 9.000964656645508e-06, "loss": 0.7973, "step": 2173 }, { "epoch": 0.23, "grad_norm": 2.2214283799588825, "learning_rate": 8.999942319897615e-06, "loss": 0.6888, "step": 2174 }, { "epoch": 0.23, "grad_norm": 3.1319580111833254, "learning_rate": 8.998919518447986e-06, "loss": 0.6926, "step": 2175 }, { "epoch": 0.23, "grad_norm": 2.4770875145538556, "learning_rate": 8.997896252415445e-06, "loss": 0.7683, "step": 2176 }, { "epoch": 0.23, "grad_norm": 2.646693120678769, "learning_rate": 8.996872521918877e-06, "loss": 0.8287, "step": 2177 }, { "epoch": 0.23, "grad_norm": 2.343228561686917, "learning_rate": 8.995848327077211e-06, "loss": 0.7529, "step": 2178 }, { "epoch": 0.23, "grad_norm": 5.810342504369392, "learning_rate": 8.994823668009437e-06, "loss": 0.7357, "step": 2179 }, { "epoch": 0.23, "grad_norm": 2.1482922949969145, "learning_rate": 8.9937985448346e-06, "loss": 0.7304, "step": 2180 }, { "epoch": 0.23, "grad_norm": 1.8551204580841676, "learning_rate": 8.992772957671791e-06, "loss": 0.7101, "step": 2181 }, { "epoch": 0.23, "grad_norm": 2.2466577666192475, "learning_rate": 8.991746906640162e-06, "loss": 0.6895, "step": 2182 }, { "epoch": 0.23, "grad_norm": 2.168650877854395, "learning_rate": 8.990720391858915e-06, "loss": 0.6479, "step": 2183 }, { "epoch": 0.23, "grad_norm": 3.300343690084461, "learning_rate": 8.98969341344731e-06, "loss": 0.6546, "step": 2184 }, { "epoch": 0.23, "grad_norm": 2.3637093422065734, "learning_rate": 8.98866597152466e-06, "loss": 0.6307, "step": 2185 }, { "epoch": 0.23, "grad_norm": 2.754311126650472, "learning_rate": 8.987638066210325e-06, "loss": 0.6912, "step": 2186 }, { "epoch": 0.23, "grad_norm": 2.1562502830328474, "learning_rate": 8.986609697623724e-06, "loss": 0.6785, "step": 2187 }, { "epoch": 0.23, "grad_norm": 2.327649052593783, "learning_rate": 8.985580865884336e-06, "loss": 0.7294, "step": 2188 }, { "epoch": 0.23, "grad_norm": 2.408598312141122, "learning_rate": 8.984551571111683e-06, "loss": 0.6296, "step": 2189 }, { "epoch": 0.23, "grad_norm": 2.142678163175262, "learning_rate": 8.983521813425348e-06, "loss": 0.6944, "step": 2190 }, { "epoch": 0.23, "grad_norm": 2.175847871801838, "learning_rate": 8.982491592944962e-06, "loss": 0.6382, "step": 2191 }, { "epoch": 0.23, "grad_norm": 3.228095121751495, "learning_rate": 8.981460909790216e-06, "loss": 0.5974, "step": 2192 }, { "epoch": 0.23, "grad_norm": 2.052783456234635, "learning_rate": 8.98042976408085e-06, "loss": 0.6731, "step": 2193 }, { "epoch": 0.23, "grad_norm": 2.0986872293091468, "learning_rate": 8.97939815593666e-06, "loss": 0.6891, "step": 2194 }, { "epoch": 0.23, "grad_norm": 2.154720097555765, "learning_rate": 8.978366085477497e-06, "loss": 0.6185, "step": 2195 }, { "epoch": 0.23, "grad_norm": 2.1403969118588067, "learning_rate": 8.977333552823261e-06, "loss": 0.6918, "step": 2196 }, { "epoch": 0.23, "grad_norm": 3.33214529211307, "learning_rate": 8.976300558093911e-06, "loss": 0.6466, "step": 2197 }, { "epoch": 0.23, "grad_norm": 4.260333763778915, "learning_rate": 8.975267101409458e-06, "loss": 0.7482, "step": 2198 }, { "epoch": 0.23, "grad_norm": 2.0914836565162496, "learning_rate": 8.974233182889961e-06, "loss": 0.6254, "step": 2199 }, { "epoch": 0.23, "grad_norm": 2.3762255505538676, "learning_rate": 8.973198802655543e-06, "loss": 0.7553, "step": 2200 }, { "epoch": 0.23, "grad_norm": 2.1282785961850506, "learning_rate": 8.972163960826375e-06, "loss": 0.7332, "step": 2201 }, { "epoch": 0.23, "grad_norm": 1.9694503720386745, "learning_rate": 8.971128657522677e-06, "loss": 0.7163, "step": 2202 }, { "epoch": 0.23, "grad_norm": 2.1801548709290546, "learning_rate": 8.970092892864732e-06, "loss": 0.7532, "step": 2203 }, { "epoch": 0.23, "grad_norm": 2.2491318122078092, "learning_rate": 8.969056666972874e-06, "loss": 0.716, "step": 2204 }, { "epoch": 0.23, "grad_norm": 2.0893548904362413, "learning_rate": 8.968019979967482e-06, "loss": 0.6952, "step": 2205 }, { "epoch": 0.23, "grad_norm": 2.2759911727787907, "learning_rate": 8.966982831969001e-06, "loss": 0.7345, "step": 2206 }, { "epoch": 0.23, "grad_norm": 3.33173403077009, "learning_rate": 8.965945223097922e-06, "loss": 0.7495, "step": 2207 }, { "epoch": 0.23, "grad_norm": 1.9341550603493136, "learning_rate": 8.964907153474791e-06, "loss": 0.7013, "step": 2208 }, { "epoch": 0.23, "grad_norm": 2.1147262266169173, "learning_rate": 8.963868623220208e-06, "loss": 0.6709, "step": 2209 }, { "epoch": 0.23, "grad_norm": 2.8936564339718123, "learning_rate": 8.962829632454829e-06, "loss": 0.7317, "step": 2210 }, { "epoch": 0.23, "grad_norm": 2.4903063833013555, "learning_rate": 8.961790181299354e-06, "loss": 0.8207, "step": 2211 }, { "epoch": 0.23, "grad_norm": 2.2804678844589246, "learning_rate": 8.960750269874552e-06, "loss": 0.6467, "step": 2212 }, { "epoch": 0.23, "grad_norm": 2.4641706147225713, "learning_rate": 8.959709898301232e-06, "loss": 0.7512, "step": 2213 }, { "epoch": 0.23, "grad_norm": 2.6089124316964005, "learning_rate": 8.958669066700261e-06, "loss": 0.6522, "step": 2214 }, { "epoch": 0.23, "grad_norm": 2.19069049516705, "learning_rate": 8.957627775192564e-06, "loss": 0.7337, "step": 2215 }, { "epoch": 0.23, "grad_norm": 2.500190008138319, "learning_rate": 8.956586023899109e-06, "loss": 0.681, "step": 2216 }, { "epoch": 0.23, "grad_norm": 2.159720426862354, "learning_rate": 8.95554381294093e-06, "loss": 0.7571, "step": 2217 }, { "epoch": 0.23, "grad_norm": 2.0686321610309095, "learning_rate": 8.954501142439105e-06, "loss": 0.6577, "step": 2218 }, { "epoch": 0.23, "grad_norm": 2.2475898480773995, "learning_rate": 8.953458012514766e-06, "loss": 0.7263, "step": 2219 }, { "epoch": 0.23, "grad_norm": 3.4884995247883603, "learning_rate": 8.952414423289107e-06, "loss": 0.7057, "step": 2220 }, { "epoch": 0.23, "grad_norm": 4.4448700774798064, "learning_rate": 8.951370374883362e-06, "loss": 0.7204, "step": 2221 }, { "epoch": 0.23, "grad_norm": 2.441270491294434, "learning_rate": 8.950325867418831e-06, "loss": 0.7727, "step": 2222 }, { "epoch": 0.23, "grad_norm": 2.310391536836991, "learning_rate": 8.949280901016859e-06, "loss": 0.7408, "step": 2223 }, { "epoch": 0.23, "grad_norm": 1.894271235429747, "learning_rate": 8.94823547579885e-06, "loss": 0.6713, "step": 2224 }, { "epoch": 0.23, "grad_norm": 2.731116389165696, "learning_rate": 8.947189591886255e-06, "loss": 0.7204, "step": 2225 }, { "epoch": 0.23, "grad_norm": 2.3805815349526687, "learning_rate": 8.946143249400582e-06, "loss": 0.6696, "step": 2226 }, { "epoch": 0.23, "grad_norm": 2.354183378868526, "learning_rate": 8.945096448463397e-06, "loss": 0.7481, "step": 2227 }, { "epoch": 0.23, "grad_norm": 2.189115002118964, "learning_rate": 8.944049189196308e-06, "loss": 0.7049, "step": 2228 }, { "epoch": 0.23, "grad_norm": 15.927462117356988, "learning_rate": 8.943001471720987e-06, "loss": 0.63, "step": 2229 }, { "epoch": 0.23, "grad_norm": 7.588326102390013, "learning_rate": 8.941953296159153e-06, "loss": 0.7003, "step": 2230 }, { "epoch": 0.23, "grad_norm": 3.0744521000216753, "learning_rate": 8.940904662632579e-06, "loss": 0.7521, "step": 2231 }, { "epoch": 0.23, "grad_norm": 3.4628646582770877, "learning_rate": 8.939855571263095e-06, "loss": 0.6593, "step": 2232 }, { "epoch": 0.23, "grad_norm": 7.273569477502985, "learning_rate": 8.938806022172578e-06, "loss": 0.6958, "step": 2233 }, { "epoch": 0.24, "grad_norm": 2.3321350896927435, "learning_rate": 8.937756015482962e-06, "loss": 0.706, "step": 2234 }, { "epoch": 0.24, "grad_norm": 1.249108717782656, "learning_rate": 8.936705551316238e-06, "loss": 0.5939, "step": 2235 }, { "epoch": 0.24, "grad_norm": 2.4287842458282207, "learning_rate": 8.935654629794442e-06, "loss": 0.7158, "step": 2236 }, { "epoch": 0.24, "grad_norm": 2.7772840916896127, "learning_rate": 8.934603251039667e-06, "loss": 0.6861, "step": 2237 }, { "epoch": 0.24, "grad_norm": 2.350481813294236, "learning_rate": 8.93355141517406e-06, "loss": 0.7134, "step": 2238 }, { "epoch": 0.24, "grad_norm": 1.9394185187015496, "learning_rate": 8.932499122319821e-06, "loss": 0.6629, "step": 2239 }, { "epoch": 0.24, "grad_norm": 2.664123857241033, "learning_rate": 8.931446372599202e-06, "loss": 0.6152, "step": 2240 }, { "epoch": 0.24, "grad_norm": 2.349915878479327, "learning_rate": 8.930393166134507e-06, "loss": 0.6989, "step": 2241 }, { "epoch": 0.24, "grad_norm": 2.206930105187941, "learning_rate": 8.929339503048096e-06, "loss": 0.6799, "step": 2242 }, { "epoch": 0.24, "grad_norm": 2.647423217564447, "learning_rate": 8.92828538346238e-06, "loss": 0.7513, "step": 2243 }, { "epoch": 0.24, "grad_norm": 2.4210465633816582, "learning_rate": 8.927230807499824e-06, "loss": 0.6743, "step": 2244 }, { "epoch": 0.24, "grad_norm": 2.5877919600311934, "learning_rate": 8.926175775282946e-06, "loss": 0.6994, "step": 2245 }, { "epoch": 0.24, "grad_norm": 2.158209740707754, "learning_rate": 8.925120286934315e-06, "loss": 0.6448, "step": 2246 }, { "epoch": 0.24, "grad_norm": 2.3832699071525774, "learning_rate": 8.924064342576554e-06, "loss": 0.6684, "step": 2247 }, { "epoch": 0.24, "grad_norm": 2.400928597949794, "learning_rate": 8.923007942332345e-06, "loss": 0.73, "step": 2248 }, { "epoch": 0.24, "grad_norm": 2.9437835667880945, "learning_rate": 8.92195108632441e-06, "loss": 0.7273, "step": 2249 }, { "epoch": 0.24, "grad_norm": 4.870436670961615, "learning_rate": 8.920893774675536e-06, "loss": 0.705, "step": 2250 }, { "epoch": 0.24, "grad_norm": 3.3747630286954555, "learning_rate": 8.919836007508558e-06, "loss": 0.7267, "step": 2251 }, { "epoch": 0.24, "grad_norm": 3.341185883155674, "learning_rate": 8.918777784946364e-06, "loss": 0.712, "step": 2252 }, { "epoch": 0.24, "grad_norm": 2.0954214888627747, "learning_rate": 8.917719107111893e-06, "loss": 0.6333, "step": 2253 }, { "epoch": 0.24, "grad_norm": 1.2833413585564724, "learning_rate": 8.916659974128144e-06, "loss": 0.6779, "step": 2254 }, { "epoch": 0.24, "grad_norm": 2.1835249116285995, "learning_rate": 8.91560038611816e-06, "loss": 0.705, "step": 2255 }, { "epoch": 0.24, "grad_norm": 7.568232696610119, "learning_rate": 8.91454034320504e-06, "loss": 0.6866, "step": 2256 }, { "epoch": 0.24, "grad_norm": 2.7059228367154904, "learning_rate": 8.913479845511942e-06, "loss": 0.6874, "step": 2257 }, { "epoch": 0.24, "grad_norm": 2.1413067070108274, "learning_rate": 8.912418893162066e-06, "loss": 0.6511, "step": 2258 }, { "epoch": 0.24, "grad_norm": 2.553360298084785, "learning_rate": 8.91135748627867e-06, "loss": 0.6896, "step": 2259 }, { "epoch": 0.24, "grad_norm": 2.5267231876409033, "learning_rate": 8.910295624985072e-06, "loss": 0.7684, "step": 2260 }, { "epoch": 0.24, "grad_norm": 2.267588533534899, "learning_rate": 8.909233309404632e-06, "loss": 0.7048, "step": 2261 }, { "epoch": 0.24, "grad_norm": 6.118523741223378, "learning_rate": 8.908170539660766e-06, "loss": 0.7692, "step": 2262 }, { "epoch": 0.24, "grad_norm": 3.7350137185652335, "learning_rate": 8.907107315876942e-06, "loss": 0.5702, "step": 2263 }, { "epoch": 0.24, "grad_norm": 1.2334438560399243, "learning_rate": 8.906043638176686e-06, "loss": 0.6064, "step": 2264 }, { "epoch": 0.24, "grad_norm": 2.408881941929093, "learning_rate": 8.90497950668357e-06, "loss": 0.7069, "step": 2265 }, { "epoch": 0.24, "grad_norm": 0.985892002965994, "learning_rate": 8.903914921521226e-06, "loss": 0.6155, "step": 2266 }, { "epoch": 0.24, "grad_norm": 3.0808888218270587, "learning_rate": 8.90284988281333e-06, "loss": 0.6486, "step": 2267 }, { "epoch": 0.24, "grad_norm": 2.235696390043599, "learning_rate": 8.901784390683616e-06, "loss": 0.7458, "step": 2268 }, { "epoch": 0.24, "grad_norm": 2.545711880315011, "learning_rate": 8.90071844525587e-06, "loss": 0.6872, "step": 2269 }, { "epoch": 0.24, "grad_norm": 3.1494684836406015, "learning_rate": 8.89965204665393e-06, "loss": 0.6339, "step": 2270 }, { "epoch": 0.24, "grad_norm": 1.2914981848806717, "learning_rate": 8.898585195001691e-06, "loss": 0.6513, "step": 2271 }, { "epoch": 0.24, "grad_norm": 2.6252182724430613, "learning_rate": 8.897517890423092e-06, "loss": 0.6416, "step": 2272 }, { "epoch": 0.24, "grad_norm": 2.3145266443168397, "learning_rate": 8.896450133042132e-06, "loss": 0.5625, "step": 2273 }, { "epoch": 0.24, "grad_norm": 2.9425490217808346, "learning_rate": 8.895381922982857e-06, "loss": 0.7486, "step": 2274 }, { "epoch": 0.24, "grad_norm": 2.4409922366780656, "learning_rate": 8.894313260369372e-06, "loss": 0.7289, "step": 2275 }, { "epoch": 0.24, "grad_norm": 2.7296566582591337, "learning_rate": 8.89324414532583e-06, "loss": 0.652, "step": 2276 }, { "epoch": 0.24, "grad_norm": 2.7073730009269488, "learning_rate": 8.892174577976438e-06, "loss": 0.6029, "step": 2277 }, { "epoch": 0.24, "grad_norm": 2.984224980813416, "learning_rate": 8.891104558445454e-06, "loss": 0.6809, "step": 2278 }, { "epoch": 0.24, "grad_norm": 2.5742027243121153, "learning_rate": 8.890034086857189e-06, "loss": 0.6906, "step": 2279 }, { "epoch": 0.24, "grad_norm": 2.0483532696980427, "learning_rate": 8.88896316333601e-06, "loss": 0.7207, "step": 2280 }, { "epoch": 0.24, "grad_norm": 2.7333128521952657, "learning_rate": 8.887891788006334e-06, "loss": 0.7079, "step": 2281 }, { "epoch": 0.24, "grad_norm": 2.860179858149371, "learning_rate": 8.886819960992626e-06, "loss": 0.6179, "step": 2282 }, { "epoch": 0.24, "grad_norm": 2.398502797457853, "learning_rate": 8.885747682419413e-06, "loss": 0.6413, "step": 2283 }, { "epoch": 0.24, "grad_norm": 2.3814307011526807, "learning_rate": 8.884674952411265e-06, "loss": 0.815, "step": 2284 }, { "epoch": 0.24, "grad_norm": 2.91263925062964, "learning_rate": 8.883601771092812e-06, "loss": 0.6907, "step": 2285 }, { "epoch": 0.24, "grad_norm": 2.470495388049324, "learning_rate": 8.882528138588729e-06, "loss": 0.7293, "step": 2286 }, { "epoch": 0.24, "grad_norm": 2.206151479410179, "learning_rate": 8.881454055023752e-06, "loss": 0.7459, "step": 2287 }, { "epoch": 0.24, "grad_norm": 2.432016434550301, "learning_rate": 8.880379520522664e-06, "loss": 0.762, "step": 2288 }, { "epoch": 0.24, "grad_norm": 2.8140952416127694, "learning_rate": 8.879304535210298e-06, "loss": 0.6734, "step": 2289 }, { "epoch": 0.24, "grad_norm": 2.473443750742672, "learning_rate": 8.878229099211548e-06, "loss": 0.75, "step": 2290 }, { "epoch": 0.24, "grad_norm": 4.536037475518659, "learning_rate": 8.87715321265135e-06, "loss": 0.6536, "step": 2291 }, { "epoch": 0.24, "grad_norm": 2.268832339589105, "learning_rate": 8.8760768756547e-06, "loss": 0.7971, "step": 2292 }, { "epoch": 0.24, "grad_norm": 2.784421921853551, "learning_rate": 8.875000088346642e-06, "loss": 0.7665, "step": 2293 }, { "epoch": 0.24, "grad_norm": 2.494207647604574, "learning_rate": 8.873922850852276e-06, "loss": 0.5876, "step": 2294 }, { "epoch": 0.24, "grad_norm": 4.963714518286977, "learning_rate": 8.872845163296752e-06, "loss": 0.7046, "step": 2295 }, { "epoch": 0.24, "grad_norm": 3.1676492228416104, "learning_rate": 8.87176702580527e-06, "loss": 0.6656, "step": 2296 }, { "epoch": 0.24, "grad_norm": 2.5119192830739734, "learning_rate": 8.87068843850309e-06, "loss": 0.7083, "step": 2297 }, { "epoch": 0.24, "grad_norm": 2.943714911949743, "learning_rate": 8.869609401515516e-06, "loss": 0.6959, "step": 2298 }, { "epoch": 0.24, "grad_norm": 2.6887677900783884, "learning_rate": 8.868529914967908e-06, "loss": 0.6394, "step": 2299 }, { "epoch": 0.24, "grad_norm": 2.2402749666716884, "learning_rate": 8.867449978985676e-06, "loss": 0.7139, "step": 2300 }, { "epoch": 0.24, "grad_norm": 2.8829424956949716, "learning_rate": 8.866369593694285e-06, "loss": 0.6697, "step": 2301 }, { "epoch": 0.24, "grad_norm": 2.746440356252218, "learning_rate": 8.865288759219251e-06, "loss": 0.7202, "step": 2302 }, { "epoch": 0.24, "grad_norm": 3.1527046705438075, "learning_rate": 8.864207475686142e-06, "loss": 0.724, "step": 2303 }, { "epoch": 0.24, "grad_norm": 2.95970505604412, "learning_rate": 8.86312574322058e-06, "loss": 0.7037, "step": 2304 }, { "epoch": 0.24, "grad_norm": 2.271026223729123, "learning_rate": 8.862043561948237e-06, "loss": 0.6285, "step": 2305 }, { "epoch": 0.24, "grad_norm": 1.8402186327283752, "learning_rate": 8.860960931994835e-06, "loss": 0.6876, "step": 2306 }, { "epoch": 0.24, "grad_norm": 3.098504551725227, "learning_rate": 8.859877853486154e-06, "loss": 0.7273, "step": 2307 }, { "epoch": 0.24, "grad_norm": 2.2831168072415884, "learning_rate": 8.85879432654802e-06, "loss": 0.7357, "step": 2308 }, { "epoch": 0.24, "grad_norm": 3.372416894210338, "learning_rate": 8.85771035130632e-06, "loss": 0.6096, "step": 2309 }, { "epoch": 0.24, "grad_norm": 7.296320517752463, "learning_rate": 8.85662592788698e-06, "loss": 0.6875, "step": 2310 }, { "epoch": 0.24, "grad_norm": 3.8593869238201894, "learning_rate": 8.855541056415988e-06, "loss": 0.7534, "step": 2311 }, { "epoch": 0.24, "grad_norm": 2.688210282987243, "learning_rate": 8.854455737019381e-06, "loss": 0.7085, "step": 2312 }, { "epoch": 0.24, "grad_norm": 2.2398910749922645, "learning_rate": 8.853369969823249e-06, "loss": 0.7067, "step": 2313 }, { "epoch": 0.24, "grad_norm": 2.5141437787544123, "learning_rate": 8.852283754953734e-06, "loss": 0.6465, "step": 2314 }, { "epoch": 0.24, "grad_norm": 3.3203384815403836, "learning_rate": 8.851197092537027e-06, "loss": 0.6464, "step": 2315 }, { "epoch": 0.24, "grad_norm": 4.071240596546394, "learning_rate": 8.850109982699375e-06, "loss": 0.7057, "step": 2316 }, { "epoch": 0.24, "grad_norm": 5.27586745399632, "learning_rate": 8.849022425567074e-06, "loss": 0.5833, "step": 2317 }, { "epoch": 0.24, "grad_norm": 2.242964035717761, "learning_rate": 8.847934421266475e-06, "loss": 0.6137, "step": 2318 }, { "epoch": 0.24, "grad_norm": 3.005217367036941, "learning_rate": 8.846845969923977e-06, "loss": 0.6083, "step": 2319 }, { "epoch": 0.24, "grad_norm": 2.9145747544167695, "learning_rate": 8.845757071666035e-06, "loss": 0.6655, "step": 2320 }, { "epoch": 0.24, "grad_norm": 3.0295015098864075, "learning_rate": 8.844667726619153e-06, "loss": 0.8086, "step": 2321 }, { "epoch": 0.24, "grad_norm": 2.2677233953465716, "learning_rate": 8.843577934909888e-06, "loss": 0.7134, "step": 2322 }, { "epoch": 0.24, "grad_norm": 3.0387979236232696, "learning_rate": 8.84248769666485e-06, "loss": 0.6452, "step": 2323 }, { "epoch": 0.24, "grad_norm": 2.3092678024403184, "learning_rate": 8.8413970120107e-06, "loss": 0.6905, "step": 2324 }, { "epoch": 0.24, "grad_norm": 2.441938437783016, "learning_rate": 8.840305881074147e-06, "loss": 0.7386, "step": 2325 }, { "epoch": 0.24, "grad_norm": 2.299488064549423, "learning_rate": 8.83921430398196e-06, "loss": 0.5852, "step": 2326 }, { "epoch": 0.24, "grad_norm": 2.346680432521479, "learning_rate": 8.838122280860953e-06, "loss": 0.6816, "step": 2327 }, { "epoch": 0.24, "grad_norm": 1.1971116175516738, "learning_rate": 8.837029811837991e-06, "loss": 0.6475, "step": 2328 }, { "epoch": 0.25, "grad_norm": 3.033820645161564, "learning_rate": 8.83593689704e-06, "loss": 0.6569, "step": 2329 }, { "epoch": 0.25, "grad_norm": 2.2373130942751507, "learning_rate": 8.834843536593949e-06, "loss": 0.7147, "step": 2330 }, { "epoch": 0.25, "grad_norm": 2.462343548899127, "learning_rate": 8.833749730626862e-06, "loss": 0.6969, "step": 2331 }, { "epoch": 0.25, "grad_norm": 3.597236651843794, "learning_rate": 8.832655479265812e-06, "loss": 0.6882, "step": 2332 }, { "epoch": 0.25, "grad_norm": 1.2374268550953933, "learning_rate": 8.831560782637929e-06, "loss": 0.562, "step": 2333 }, { "epoch": 0.25, "grad_norm": 2.383491282270233, "learning_rate": 8.830465640870388e-06, "loss": 0.7145, "step": 2334 }, { "epoch": 0.25, "grad_norm": 2.1925762438333, "learning_rate": 8.829370054090423e-06, "loss": 0.6832, "step": 2335 }, { "epoch": 0.25, "grad_norm": 3.41126511819354, "learning_rate": 8.828274022425316e-06, "loss": 0.7105, "step": 2336 }, { "epoch": 0.25, "grad_norm": 2.863081103637933, "learning_rate": 8.827177546002398e-06, "loss": 0.7167, "step": 2337 }, { "epoch": 0.25, "grad_norm": 2.3220167881162324, "learning_rate": 8.826080624949056e-06, "loss": 0.6032, "step": 2338 }, { "epoch": 0.25, "grad_norm": 2.4710434596501796, "learning_rate": 8.824983259392727e-06, "loss": 0.6848, "step": 2339 }, { "epoch": 0.25, "grad_norm": 2.0728765590634004, "learning_rate": 8.823885449460899e-06, "loss": 0.6752, "step": 2340 }, { "epoch": 0.25, "grad_norm": 2.173323985472954, "learning_rate": 8.822787195281114e-06, "loss": 0.6507, "step": 2341 }, { "epoch": 0.25, "grad_norm": 3.022192983168258, "learning_rate": 8.821688496980964e-06, "loss": 0.5687, "step": 2342 }, { "epoch": 0.25, "grad_norm": 2.321047814509788, "learning_rate": 8.82058935468809e-06, "loss": 0.7194, "step": 2343 }, { "epoch": 0.25, "grad_norm": 2.4035818387743038, "learning_rate": 8.819489768530192e-06, "loss": 0.6671, "step": 2344 }, { "epoch": 0.25, "grad_norm": 3.2287384454438013, "learning_rate": 8.818389738635012e-06, "loss": 0.6707, "step": 2345 }, { "epoch": 0.25, "grad_norm": 2.1051049030090847, "learning_rate": 8.817289265130348e-06, "loss": 0.7092, "step": 2346 }, { "epoch": 0.25, "grad_norm": 2.6891874499459885, "learning_rate": 8.816188348144054e-06, "loss": 0.7263, "step": 2347 }, { "epoch": 0.25, "grad_norm": 1.8695048876833085, "learning_rate": 8.815086987804029e-06, "loss": 0.661, "step": 2348 }, { "epoch": 0.25, "grad_norm": 2.307276114427358, "learning_rate": 8.813985184238226e-06, "loss": 0.7653, "step": 2349 }, { "epoch": 0.25, "grad_norm": 2.273797735104588, "learning_rate": 8.81288293757465e-06, "loss": 0.6886, "step": 2350 }, { "epoch": 0.25, "grad_norm": 2.8942145715992846, "learning_rate": 8.811780247941354e-06, "loss": 0.7078, "step": 2351 }, { "epoch": 0.25, "grad_norm": 2.3063601691994826, "learning_rate": 8.810677115466451e-06, "loss": 0.677, "step": 2352 }, { "epoch": 0.25, "grad_norm": 2.1245376854410516, "learning_rate": 8.809573540278094e-06, "loss": 0.6966, "step": 2353 }, { "epoch": 0.25, "grad_norm": 2.8737504795386495, "learning_rate": 8.808469522504495e-06, "loss": 0.6428, "step": 2354 }, { "epoch": 0.25, "grad_norm": 2.578969771967044, "learning_rate": 8.807365062273917e-06, "loss": 0.7379, "step": 2355 }, { "epoch": 0.25, "grad_norm": 2.771616985464036, "learning_rate": 8.806260159714672e-06, "loss": 0.5943, "step": 2356 }, { "epoch": 0.25, "grad_norm": 2.0923888169384046, "learning_rate": 8.805154814955124e-06, "loss": 0.6965, "step": 2357 }, { "epoch": 0.25, "grad_norm": 2.4647946308004243, "learning_rate": 8.80404902812369e-06, "loss": 0.6628, "step": 2358 }, { "epoch": 0.25, "grad_norm": 2.8740732083977183, "learning_rate": 8.802942799348836e-06, "loss": 0.7136, "step": 2359 }, { "epoch": 0.25, "grad_norm": 2.023161383444029, "learning_rate": 8.80183612875908e-06, "loss": 0.6532, "step": 2360 }, { "epoch": 0.25, "grad_norm": 2.2951972055338783, "learning_rate": 8.800729016482993e-06, "loss": 0.7056, "step": 2361 }, { "epoch": 0.25, "grad_norm": 3.005412561447531, "learning_rate": 8.799621462649198e-06, "loss": 0.6511, "step": 2362 }, { "epoch": 0.25, "grad_norm": 2.347273517470351, "learning_rate": 8.798513467386361e-06, "loss": 0.6972, "step": 2363 }, { "epoch": 0.25, "grad_norm": 1.2061420190350576, "learning_rate": 8.797405030823212e-06, "loss": 0.6577, "step": 2364 }, { "epoch": 0.25, "grad_norm": 2.209573303684799, "learning_rate": 8.796296153088523e-06, "loss": 0.6624, "step": 2365 }, { "epoch": 0.25, "grad_norm": 1.0157084559249987, "learning_rate": 8.79518683431112e-06, "loss": 0.6218, "step": 2366 }, { "epoch": 0.25, "grad_norm": 2.6605469423214507, "learning_rate": 8.794077074619884e-06, "loss": 0.6776, "step": 2367 }, { "epoch": 0.25, "grad_norm": 2.329846735401453, "learning_rate": 8.79296687414374e-06, "loss": 0.6263, "step": 2368 }, { "epoch": 0.25, "grad_norm": 2.16211910196471, "learning_rate": 8.791856233011668e-06, "loss": 0.7662, "step": 2369 }, { "epoch": 0.25, "grad_norm": 2.5132087358973325, "learning_rate": 8.7907451513527e-06, "loss": 0.5662, "step": 2370 }, { "epoch": 0.25, "grad_norm": 2.4691914644939135, "learning_rate": 8.78963362929592e-06, "loss": 0.6809, "step": 2371 }, { "epoch": 0.25, "grad_norm": 2.517049664768459, "learning_rate": 8.788521666970458e-06, "loss": 0.6717, "step": 2372 }, { "epoch": 0.25, "grad_norm": 2.083726661084472, "learning_rate": 8.7874092645055e-06, "loss": 0.6727, "step": 2373 }, { "epoch": 0.25, "grad_norm": 3.259534102965764, "learning_rate": 8.786296422030283e-06, "loss": 0.7514, "step": 2374 }, { "epoch": 0.25, "grad_norm": 5.0201738332244945, "learning_rate": 8.785183139674093e-06, "loss": 0.7253, "step": 2375 }, { "epoch": 0.25, "grad_norm": 2.7540666834409913, "learning_rate": 8.784069417566268e-06, "loss": 0.6923, "step": 2376 }, { "epoch": 0.25, "grad_norm": 2.4727794350012213, "learning_rate": 8.782955255836194e-06, "loss": 0.6761, "step": 2377 }, { "epoch": 0.25, "grad_norm": 2.642090445706794, "learning_rate": 8.781840654613317e-06, "loss": 0.6928, "step": 2378 }, { "epoch": 0.25, "grad_norm": 2.3378650663235514, "learning_rate": 8.780725614027123e-06, "loss": 0.6407, "step": 2379 }, { "epoch": 0.25, "grad_norm": 5.5463507569750945, "learning_rate": 8.779610134207157e-06, "loss": 0.731, "step": 2380 }, { "epoch": 0.25, "grad_norm": 2.476687518226198, "learning_rate": 8.778494215283011e-06, "loss": 0.6693, "step": 2381 }, { "epoch": 0.25, "grad_norm": 2.6190587133475143, "learning_rate": 8.777377857384329e-06, "loss": 0.7043, "step": 2382 }, { "epoch": 0.25, "grad_norm": 2.276304071392182, "learning_rate": 8.776261060640807e-06, "loss": 0.6189, "step": 2383 }, { "epoch": 0.25, "grad_norm": 2.5067148512484305, "learning_rate": 8.775143825182192e-06, "loss": 0.6429, "step": 2384 }, { "epoch": 0.25, "grad_norm": 2.533233559475545, "learning_rate": 8.77402615113828e-06, "loss": 0.7218, "step": 2385 }, { "epoch": 0.25, "grad_norm": 2.3752543251935956, "learning_rate": 8.77290803863892e-06, "loss": 0.6615, "step": 2386 }, { "epoch": 0.25, "grad_norm": 2.6509057573466417, "learning_rate": 8.771789487814009e-06, "loss": 0.6687, "step": 2387 }, { "epoch": 0.25, "grad_norm": 2.322946052629726, "learning_rate": 8.770670498793498e-06, "loss": 0.71, "step": 2388 }, { "epoch": 0.25, "grad_norm": 2.555317772836953, "learning_rate": 8.76955107170739e-06, "loss": 0.6629, "step": 2389 }, { "epoch": 0.25, "grad_norm": 2.0462771901928227, "learning_rate": 8.768431206685735e-06, "loss": 0.6768, "step": 2390 }, { "epoch": 0.25, "grad_norm": 2.4959674203980957, "learning_rate": 8.767310903858635e-06, "loss": 0.7051, "step": 2391 }, { "epoch": 0.25, "grad_norm": 2.400493851416112, "learning_rate": 8.766190163356243e-06, "loss": 0.7141, "step": 2392 }, { "epoch": 0.25, "grad_norm": 2.447160327467119, "learning_rate": 8.765068985308768e-06, "loss": 0.6782, "step": 2393 }, { "epoch": 0.25, "grad_norm": 2.205789371123814, "learning_rate": 8.76394736984646e-06, "loss": 0.6606, "step": 2394 }, { "epoch": 0.25, "grad_norm": 3.2524271530733375, "learning_rate": 8.762825317099628e-06, "loss": 0.623, "step": 2395 }, { "epoch": 0.25, "grad_norm": 2.1224476313578493, "learning_rate": 8.761702827198626e-06, "loss": 0.6808, "step": 2396 }, { "epoch": 0.25, "grad_norm": 1.2653362778423383, "learning_rate": 8.760579900273865e-06, "loss": 0.636, "step": 2397 }, { "epoch": 0.25, "grad_norm": 3.2781604321793023, "learning_rate": 8.759456536455802e-06, "loss": 0.6624, "step": 2398 }, { "epoch": 0.25, "grad_norm": 2.4477894582450905, "learning_rate": 8.758332735874946e-06, "loss": 0.6002, "step": 2399 }, { "epoch": 0.25, "grad_norm": 3.197585400074303, "learning_rate": 8.757208498661857e-06, "loss": 0.6718, "step": 2400 }, { "epoch": 0.25, "grad_norm": 2.278739347444417, "learning_rate": 8.756083824947145e-06, "loss": 0.7983, "step": 2401 }, { "epoch": 0.25, "grad_norm": 2.585442200635953, "learning_rate": 8.754958714861474e-06, "loss": 0.7644, "step": 2402 }, { "epoch": 0.25, "grad_norm": 3.0314353743944635, "learning_rate": 8.753833168535551e-06, "loss": 0.7351, "step": 2403 }, { "epoch": 0.25, "grad_norm": 2.7580999310075356, "learning_rate": 8.752707186100144e-06, "loss": 0.6929, "step": 2404 }, { "epoch": 0.25, "grad_norm": 2.720900436087719, "learning_rate": 8.751580767686063e-06, "loss": 0.6816, "step": 2405 }, { "epoch": 0.25, "grad_norm": 2.7870764847387455, "learning_rate": 8.750453913424172e-06, "loss": 0.6466, "step": 2406 }, { "epoch": 0.25, "grad_norm": 2.5240294889336234, "learning_rate": 8.74932662344539e-06, "loss": 0.6982, "step": 2407 }, { "epoch": 0.25, "grad_norm": 12.532901910391917, "learning_rate": 8.748198897880677e-06, "loss": 0.68, "step": 2408 }, { "epoch": 0.25, "grad_norm": 2.2585279019476845, "learning_rate": 8.747070736861052e-06, "loss": 0.6859, "step": 2409 }, { "epoch": 0.25, "grad_norm": 2.950693720936932, "learning_rate": 8.745942140517579e-06, "loss": 0.6846, "step": 2410 }, { "epoch": 0.25, "grad_norm": 2.8818540494898084, "learning_rate": 8.744813108981377e-06, "loss": 0.7454, "step": 2411 }, { "epoch": 0.25, "grad_norm": 2.03077990529205, "learning_rate": 8.743683642383613e-06, "loss": 0.7515, "step": 2412 }, { "epoch": 0.25, "grad_norm": 2.3199732724761954, "learning_rate": 8.742553740855507e-06, "loss": 0.6335, "step": 2413 }, { "epoch": 0.25, "grad_norm": 2.5030676274429786, "learning_rate": 8.741423404528325e-06, "loss": 0.6373, "step": 2414 }, { "epoch": 0.25, "grad_norm": 3.576788831727748, "learning_rate": 8.740292633533387e-06, "loss": 0.6511, "step": 2415 }, { "epoch": 0.25, "grad_norm": 2.292929750954021, "learning_rate": 8.739161428002061e-06, "loss": 0.6665, "step": 2416 }, { "epoch": 0.25, "grad_norm": 2.495581313970782, "learning_rate": 8.738029788065772e-06, "loss": 0.6986, "step": 2417 }, { "epoch": 0.25, "grad_norm": 2.7267216304430106, "learning_rate": 8.736897713855988e-06, "loss": 0.6805, "step": 2418 }, { "epoch": 0.25, "grad_norm": 2.5310007726717654, "learning_rate": 8.735765205504228e-06, "loss": 0.6876, "step": 2419 }, { "epoch": 0.25, "grad_norm": 1.2473961744466824, "learning_rate": 8.734632263142066e-06, "loss": 0.626, "step": 2420 }, { "epoch": 0.25, "grad_norm": 2.6025083204104194, "learning_rate": 8.733498886901123e-06, "loss": 0.6504, "step": 2421 }, { "epoch": 0.25, "grad_norm": 2.885692680206437, "learning_rate": 8.732365076913072e-06, "loss": 0.6555, "step": 2422 }, { "epoch": 0.25, "grad_norm": 4.032356607171435, "learning_rate": 8.731230833309637e-06, "loss": 0.6401, "step": 2423 }, { "epoch": 0.26, "grad_norm": 2.1972012220479074, "learning_rate": 8.730096156222586e-06, "loss": 0.6757, "step": 2424 }, { "epoch": 0.26, "grad_norm": 2.291089360356186, "learning_rate": 8.728961045783751e-06, "loss": 0.7047, "step": 2425 }, { "epoch": 0.26, "grad_norm": 2.2487186243095816, "learning_rate": 8.727825502124998e-06, "loss": 0.7178, "step": 2426 }, { "epoch": 0.26, "grad_norm": 2.417453807033552, "learning_rate": 8.726689525378254e-06, "loss": 0.6669, "step": 2427 }, { "epoch": 0.26, "grad_norm": 2.3540762968676656, "learning_rate": 8.725553115675496e-06, "loss": 0.5727, "step": 2428 }, { "epoch": 0.26, "grad_norm": 4.006417010869224, "learning_rate": 8.724416273148745e-06, "loss": 0.6692, "step": 2429 }, { "epoch": 0.26, "grad_norm": 2.24813269883343, "learning_rate": 8.723278997930078e-06, "loss": 0.6947, "step": 2430 }, { "epoch": 0.26, "grad_norm": 2.568072157207673, "learning_rate": 8.722141290151618e-06, "loss": 0.6721, "step": 2431 }, { "epoch": 0.26, "grad_norm": 2.681084669318892, "learning_rate": 8.721003149945545e-06, "loss": 0.7217, "step": 2432 }, { "epoch": 0.26, "grad_norm": 2.5296915450142516, "learning_rate": 8.719864577444082e-06, "loss": 0.6596, "step": 2433 }, { "epoch": 0.26, "grad_norm": 2.750107096539297, "learning_rate": 8.718725572779505e-06, "loss": 0.835, "step": 2434 }, { "epoch": 0.26, "grad_norm": 2.444233296643607, "learning_rate": 8.71758613608414e-06, "loss": 0.7186, "step": 2435 }, { "epoch": 0.26, "grad_norm": 2.270574624183063, "learning_rate": 8.716446267490365e-06, "loss": 0.7578, "step": 2436 }, { "epoch": 0.26, "grad_norm": 3.8389444303037235, "learning_rate": 8.715305967130604e-06, "loss": 0.734, "step": 2437 }, { "epoch": 0.26, "grad_norm": 1.1979511502659714, "learning_rate": 8.71416523513734e-06, "loss": 0.6708, "step": 2438 }, { "epoch": 0.26, "grad_norm": 2.8417444339853852, "learning_rate": 8.713024071643092e-06, "loss": 0.6287, "step": 2439 }, { "epoch": 0.26, "grad_norm": 3.0558749007403, "learning_rate": 8.71188247678044e-06, "loss": 0.7621, "step": 2440 }, { "epoch": 0.26, "grad_norm": 2.291175152328059, "learning_rate": 8.710740450682013e-06, "loss": 0.6667, "step": 2441 }, { "epoch": 0.26, "grad_norm": 2.141308772601036, "learning_rate": 8.709597993480489e-06, "loss": 0.6674, "step": 2442 }, { "epoch": 0.26, "grad_norm": 2.0735451596302137, "learning_rate": 8.708455105308591e-06, "loss": 0.6599, "step": 2443 }, { "epoch": 0.26, "grad_norm": 2.481850558377454, "learning_rate": 8.7073117862991e-06, "loss": 0.783, "step": 2444 }, { "epoch": 0.26, "grad_norm": 1.9268542381826397, "learning_rate": 8.706168036584843e-06, "loss": 0.6356, "step": 2445 }, { "epoch": 0.26, "grad_norm": 4.349321958616555, "learning_rate": 8.705023856298695e-06, "loss": 0.611, "step": 2446 }, { "epoch": 0.26, "grad_norm": 3.1941989903979624, "learning_rate": 8.703879245573588e-06, "loss": 0.6716, "step": 2447 }, { "epoch": 0.26, "grad_norm": 2.1349012651457664, "learning_rate": 8.702734204542494e-06, "loss": 0.7377, "step": 2448 }, { "epoch": 0.26, "grad_norm": 2.648744197677024, "learning_rate": 8.701588733338446e-06, "loss": 0.7413, "step": 2449 }, { "epoch": 0.26, "grad_norm": 3.3362813946257623, "learning_rate": 8.700442832094517e-06, "loss": 0.5947, "step": 2450 }, { "epoch": 0.26, "grad_norm": 2.0804046111905614, "learning_rate": 8.699296500943839e-06, "loss": 0.6786, "step": 2451 }, { "epoch": 0.26, "grad_norm": 2.661217136166758, "learning_rate": 8.698149740019587e-06, "loss": 0.7303, "step": 2452 }, { "epoch": 0.26, "grad_norm": 2.5190865956506547, "learning_rate": 8.697002549454988e-06, "loss": 0.7269, "step": 2453 }, { "epoch": 0.26, "grad_norm": 3.070053475596138, "learning_rate": 8.695854929383318e-06, "loss": 0.7228, "step": 2454 }, { "epoch": 0.26, "grad_norm": 2.441251249535768, "learning_rate": 8.694706879937909e-06, "loss": 0.6955, "step": 2455 }, { "epoch": 0.26, "grad_norm": 2.2612646874942746, "learning_rate": 8.693558401252132e-06, "loss": 0.656, "step": 2456 }, { "epoch": 0.26, "grad_norm": 2.4863160385294063, "learning_rate": 8.69240949345942e-06, "loss": 0.7101, "step": 2457 }, { "epoch": 0.26, "grad_norm": 2.43116626808894, "learning_rate": 8.691260156693245e-06, "loss": 0.6744, "step": 2458 }, { "epoch": 0.26, "grad_norm": 2.5200768749768963, "learning_rate": 8.690110391087134e-06, "loss": 0.6949, "step": 2459 }, { "epoch": 0.26, "grad_norm": 2.525389539161813, "learning_rate": 8.688960196774668e-06, "loss": 0.6323, "step": 2460 }, { "epoch": 0.26, "grad_norm": 2.102109783358811, "learning_rate": 8.687809573889467e-06, "loss": 0.6833, "step": 2461 }, { "epoch": 0.26, "grad_norm": 2.222074030086465, "learning_rate": 8.686658522565211e-06, "loss": 0.7645, "step": 2462 }, { "epoch": 0.26, "grad_norm": 2.6743137482807753, "learning_rate": 8.685507042935627e-06, "loss": 0.7335, "step": 2463 }, { "epoch": 0.26, "grad_norm": 2.8113533406525595, "learning_rate": 8.684355135134486e-06, "loss": 0.6922, "step": 2464 }, { "epoch": 0.26, "grad_norm": 1.9458115809108403, "learning_rate": 8.683202799295616e-06, "loss": 0.6556, "step": 2465 }, { "epoch": 0.26, "grad_norm": 2.1842127186245333, "learning_rate": 8.682050035552891e-06, "loss": 0.6102, "step": 2466 }, { "epoch": 0.26, "grad_norm": 2.1705972141925733, "learning_rate": 8.680896844040238e-06, "loss": 0.706, "step": 2467 }, { "epoch": 0.26, "grad_norm": 3.2360348544184308, "learning_rate": 8.67974322489163e-06, "loss": 0.7269, "step": 2468 }, { "epoch": 0.26, "grad_norm": 2.569034689616941, "learning_rate": 8.678589178241092e-06, "loss": 0.737, "step": 2469 }, { "epoch": 0.26, "grad_norm": 2.729945172388795, "learning_rate": 8.677434704222697e-06, "loss": 0.7642, "step": 2470 }, { "epoch": 0.26, "grad_norm": 2.577038874216306, "learning_rate": 8.676279802970566e-06, "loss": 0.6741, "step": 2471 }, { "epoch": 0.26, "grad_norm": 3.477474813381419, "learning_rate": 8.675124474618876e-06, "loss": 0.6524, "step": 2472 }, { "epoch": 0.26, "grad_norm": 2.281055787854727, "learning_rate": 8.673968719301849e-06, "loss": 0.6976, "step": 2473 }, { "epoch": 0.26, "grad_norm": 2.506648427439492, "learning_rate": 8.672812537153757e-06, "loss": 0.6998, "step": 2474 }, { "epoch": 0.26, "grad_norm": 4.568840294148294, "learning_rate": 8.67165592830892e-06, "loss": 0.692, "step": 2475 }, { "epoch": 0.26, "grad_norm": 2.257378434912131, "learning_rate": 8.670498892901712e-06, "loss": 0.7227, "step": 2476 }, { "epoch": 0.26, "grad_norm": 1.9597763138644833, "learning_rate": 8.669341431066552e-06, "loss": 0.6658, "step": 2477 }, { "epoch": 0.26, "grad_norm": 2.4823038278036846, "learning_rate": 8.668183542937912e-06, "loss": 0.6355, "step": 2478 }, { "epoch": 0.26, "grad_norm": 2.2040856463646663, "learning_rate": 8.66702522865031e-06, "loss": 0.6359, "step": 2479 }, { "epoch": 0.26, "grad_norm": 2.7952172523154166, "learning_rate": 8.66586648833832e-06, "loss": 0.684, "step": 2480 }, { "epoch": 0.26, "grad_norm": 2.400485734318114, "learning_rate": 8.664707322136556e-06, "loss": 0.6433, "step": 2481 }, { "epoch": 0.26, "grad_norm": 2.306795903415676, "learning_rate": 8.663547730179692e-06, "loss": 0.651, "step": 2482 }, { "epoch": 0.26, "grad_norm": 4.985126814934233, "learning_rate": 8.662387712602438e-06, "loss": 0.6954, "step": 2483 }, { "epoch": 0.26, "grad_norm": 2.2505071915852075, "learning_rate": 8.661227269539572e-06, "loss": 0.6472, "step": 2484 }, { "epoch": 0.26, "grad_norm": 1.8826833418351787, "learning_rate": 8.660066401125902e-06, "loss": 0.6311, "step": 2485 }, { "epoch": 0.26, "grad_norm": 6.818214783238789, "learning_rate": 8.658905107496299e-06, "loss": 0.6617, "step": 2486 }, { "epoch": 0.26, "grad_norm": 2.6246974001427716, "learning_rate": 8.657743388785676e-06, "loss": 0.7234, "step": 2487 }, { "epoch": 0.26, "grad_norm": 2.5053395649894235, "learning_rate": 8.656581245129e-06, "loss": 0.7851, "step": 2488 }, { "epoch": 0.26, "grad_norm": 2.5788879956613666, "learning_rate": 8.655418676661285e-06, "loss": 0.6737, "step": 2489 }, { "epoch": 0.26, "grad_norm": 2.5695918451833752, "learning_rate": 8.654255683517595e-06, "loss": 0.66, "step": 2490 }, { "epoch": 0.26, "grad_norm": 2.2259903673736456, "learning_rate": 8.653092265833044e-06, "loss": 0.7476, "step": 2491 }, { "epoch": 0.26, "grad_norm": 3.0361177334211376, "learning_rate": 8.651928423742793e-06, "loss": 0.6448, "step": 2492 }, { "epoch": 0.26, "grad_norm": 2.2871956578155253, "learning_rate": 8.650764157382054e-06, "loss": 0.6647, "step": 2493 }, { "epoch": 0.26, "grad_norm": 4.278582756268501, "learning_rate": 8.64959946688609e-06, "loss": 0.663, "step": 2494 }, { "epoch": 0.26, "grad_norm": 2.5143339131874147, "learning_rate": 8.648434352390209e-06, "loss": 0.6837, "step": 2495 }, { "epoch": 0.26, "grad_norm": 2.4594476956792586, "learning_rate": 8.64726881402977e-06, "loss": 0.6644, "step": 2496 }, { "epoch": 0.26, "grad_norm": 3.1170695576353, "learning_rate": 8.646102851940184e-06, "loss": 0.766, "step": 2497 }, { "epoch": 0.26, "grad_norm": 2.2169468874210065, "learning_rate": 8.64493646625691e-06, "loss": 0.664, "step": 2498 }, { "epoch": 0.26, "grad_norm": 3.0516103481287034, "learning_rate": 8.643769657115452e-06, "loss": 0.6429, "step": 2499 }, { "epoch": 0.26, "grad_norm": 3.027841909715622, "learning_rate": 8.642602424651369e-06, "loss": 0.7947, "step": 2500 }, { "epoch": 0.26, "grad_norm": 2.292377856177173, "learning_rate": 8.641434769000267e-06, "loss": 0.6661, "step": 2501 }, { "epoch": 0.26, "grad_norm": 2.591736810360195, "learning_rate": 8.640266690297797e-06, "loss": 0.5915, "step": 2502 }, { "epoch": 0.26, "grad_norm": 2.751465380246021, "learning_rate": 8.639098188679668e-06, "loss": 0.6897, "step": 2503 }, { "epoch": 0.26, "grad_norm": 2.0621395224891077, "learning_rate": 8.637929264281632e-06, "loss": 0.6687, "step": 2504 }, { "epoch": 0.26, "grad_norm": 1.16826930507742, "learning_rate": 8.63675991723949e-06, "loss": 0.6106, "step": 2505 }, { "epoch": 0.26, "grad_norm": 3.7332955065147577, "learning_rate": 8.635590147689092e-06, "loss": 0.7138, "step": 2506 }, { "epoch": 0.26, "grad_norm": 2.7397582619681597, "learning_rate": 8.634419955766342e-06, "loss": 0.6154, "step": 2507 }, { "epoch": 0.26, "grad_norm": 2.6765754635699213, "learning_rate": 8.633249341607186e-06, "loss": 0.6402, "step": 2508 }, { "epoch": 0.26, "grad_norm": 1.147939650717752, "learning_rate": 8.632078305347623e-06, "loss": 0.6187, "step": 2509 }, { "epoch": 0.26, "grad_norm": 2.5410600160091876, "learning_rate": 8.630906847123704e-06, "loss": 0.7342, "step": 2510 }, { "epoch": 0.26, "grad_norm": 2.2309952636873485, "learning_rate": 8.629734967071522e-06, "loss": 0.6337, "step": 2511 }, { "epoch": 0.26, "grad_norm": 2.221441936274778, "learning_rate": 8.628562665327224e-06, "loss": 0.6374, "step": 2512 }, { "epoch": 0.26, "grad_norm": 2.4825649248711654, "learning_rate": 8.627389942027008e-06, "loss": 0.7285, "step": 2513 }, { "epoch": 0.26, "grad_norm": 3.3071050264366133, "learning_rate": 8.62621679730711e-06, "loss": 0.5995, "step": 2514 }, { "epoch": 0.26, "grad_norm": 1.9177350717101802, "learning_rate": 8.62504323130383e-06, "loss": 0.7131, "step": 2515 }, { "epoch": 0.26, "grad_norm": 2.510316012626707, "learning_rate": 8.623869244153504e-06, "loss": 0.7768, "step": 2516 }, { "epoch": 0.26, "grad_norm": 3.497041444497354, "learning_rate": 8.622694835992525e-06, "loss": 0.6689, "step": 2517 }, { "epoch": 0.26, "grad_norm": 2.564165380926844, "learning_rate": 8.621520006957334e-06, "loss": 0.7526, "step": 2518 }, { "epoch": 0.27, "grad_norm": 2.6172118910010704, "learning_rate": 8.620344757184415e-06, "loss": 0.6911, "step": 2519 }, { "epoch": 0.27, "grad_norm": 2.2241203927304367, "learning_rate": 8.619169086810308e-06, "loss": 0.7166, "step": 2520 }, { "epoch": 0.27, "grad_norm": 3.3008760240604422, "learning_rate": 8.6179929959716e-06, "loss": 0.7909, "step": 2521 }, { "epoch": 0.27, "grad_norm": 3.3505911458947626, "learning_rate": 8.616816484804922e-06, "loss": 0.7658, "step": 2522 }, { "epoch": 0.27, "grad_norm": 4.268553301879235, "learning_rate": 8.615639553446961e-06, "loss": 0.642, "step": 2523 }, { "epoch": 0.27, "grad_norm": 2.9320366440133188, "learning_rate": 8.614462202034449e-06, "loss": 0.6443, "step": 2524 }, { "epoch": 0.27, "grad_norm": 2.5459832781821428, "learning_rate": 8.613284430704165e-06, "loss": 0.662, "step": 2525 }, { "epoch": 0.27, "grad_norm": 2.4781573079223422, "learning_rate": 8.612106239592944e-06, "loss": 0.6541, "step": 2526 }, { "epoch": 0.27, "grad_norm": 1.971517376818193, "learning_rate": 8.610927628837658e-06, "loss": 0.6708, "step": 2527 }, { "epoch": 0.27, "grad_norm": 2.465844796112776, "learning_rate": 8.60974859857524e-06, "loss": 0.6641, "step": 2528 }, { "epoch": 0.27, "grad_norm": 2.3666940157342347, "learning_rate": 8.608569148942664e-06, "loss": 0.6648, "step": 2529 }, { "epoch": 0.27, "grad_norm": 3.3906416500624106, "learning_rate": 8.607389280076956e-06, "loss": 0.7074, "step": 2530 }, { "epoch": 0.27, "grad_norm": 2.55985900963281, "learning_rate": 8.606208992115191e-06, "loss": 0.6741, "step": 2531 }, { "epoch": 0.27, "grad_norm": 2.088312059952641, "learning_rate": 8.605028285194487e-06, "loss": 0.6705, "step": 2532 }, { "epoch": 0.27, "grad_norm": 4.4291577802630435, "learning_rate": 8.60384715945202e-06, "loss": 0.7239, "step": 2533 }, { "epoch": 0.27, "grad_norm": 5.431296481740759, "learning_rate": 8.602665615025006e-06, "loss": 0.6131, "step": 2534 }, { "epoch": 0.27, "grad_norm": 1.9635644510309531, "learning_rate": 8.601483652050717e-06, "loss": 0.5776, "step": 2535 }, { "epoch": 0.27, "grad_norm": 2.460305744619151, "learning_rate": 8.600301270666467e-06, "loss": 0.7602, "step": 2536 }, { "epoch": 0.27, "grad_norm": 1.2925216721758674, "learning_rate": 8.599118471009622e-06, "loss": 0.6207, "step": 2537 }, { "epoch": 0.27, "grad_norm": 2.4578397320289453, "learning_rate": 8.597935253217598e-06, "loss": 0.7284, "step": 2538 }, { "epoch": 0.27, "grad_norm": 2.148544072195046, "learning_rate": 8.596751617427856e-06, "loss": 0.609, "step": 2539 }, { "epoch": 0.27, "grad_norm": 3.5641003536407068, "learning_rate": 8.595567563777909e-06, "loss": 0.6304, "step": 2540 }, { "epoch": 0.27, "grad_norm": 2.5993299217825125, "learning_rate": 8.594383092405317e-06, "loss": 0.773, "step": 2541 }, { "epoch": 0.27, "grad_norm": 4.261057636960014, "learning_rate": 8.593198203447685e-06, "loss": 0.6816, "step": 2542 }, { "epoch": 0.27, "grad_norm": 2.7270686627916243, "learning_rate": 8.592012897042677e-06, "loss": 0.6008, "step": 2543 }, { "epoch": 0.27, "grad_norm": 2.593307670860837, "learning_rate": 8.59082717332799e-06, "loss": 0.7299, "step": 2544 }, { "epoch": 0.27, "grad_norm": 2.701277864197115, "learning_rate": 8.589641032441384e-06, "loss": 0.6848, "step": 2545 }, { "epoch": 0.27, "grad_norm": 2.085418911458651, "learning_rate": 8.588454474520657e-06, "loss": 0.6909, "step": 2546 }, { "epoch": 0.27, "grad_norm": 2.8232006350273355, "learning_rate": 8.587267499703667e-06, "loss": 0.6507, "step": 2547 }, { "epoch": 0.27, "grad_norm": 2.4387883280629845, "learning_rate": 8.586080108128304e-06, "loss": 0.7263, "step": 2548 }, { "epoch": 0.27, "grad_norm": 3.2908278171042737, "learning_rate": 8.584892299932523e-06, "loss": 0.6689, "step": 2549 }, { "epoch": 0.27, "grad_norm": 2.573968919899555, "learning_rate": 8.583704075254315e-06, "loss": 0.7485, "step": 2550 }, { "epoch": 0.27, "grad_norm": 1.9765316518612366, "learning_rate": 8.582515434231729e-06, "loss": 0.6095, "step": 2551 }, { "epoch": 0.27, "grad_norm": 1.1051643768949975, "learning_rate": 8.581326377002857e-06, "loss": 0.6412, "step": 2552 }, { "epoch": 0.27, "grad_norm": 2.0858357891432138, "learning_rate": 8.580136903705838e-06, "loss": 0.7277, "step": 2553 }, { "epoch": 0.27, "grad_norm": 2.3915626172098765, "learning_rate": 8.578947014478861e-06, "loss": 0.6705, "step": 2554 }, { "epoch": 0.27, "grad_norm": 2.234544105188988, "learning_rate": 8.577756709460167e-06, "loss": 0.6751, "step": 2555 }, { "epoch": 0.27, "grad_norm": 2.99895822558418, "learning_rate": 8.576565988788042e-06, "loss": 0.6858, "step": 2556 }, { "epoch": 0.27, "grad_norm": 2.176379192047237, "learning_rate": 8.575374852600816e-06, "loss": 0.7022, "step": 2557 }, { "epoch": 0.27, "grad_norm": 2.882909865354107, "learning_rate": 8.574183301036877e-06, "loss": 0.7089, "step": 2558 }, { "epoch": 0.27, "grad_norm": 2.6446090141728487, "learning_rate": 8.572991334234654e-06, "loss": 0.7491, "step": 2559 }, { "epoch": 0.27, "grad_norm": 2.631308560093212, "learning_rate": 8.571798952332625e-06, "loss": 0.7464, "step": 2560 }, { "epoch": 0.27, "grad_norm": 2.3075799632747174, "learning_rate": 8.57060615546932e-06, "loss": 0.6408, "step": 2561 }, { "epoch": 0.27, "grad_norm": 3.280083800016366, "learning_rate": 8.569412943783313e-06, "loss": 0.6251, "step": 2562 }, { "epoch": 0.27, "grad_norm": 2.37287864006488, "learning_rate": 8.56821931741323e-06, "loss": 0.609, "step": 2563 }, { "epoch": 0.27, "grad_norm": 2.5322472513374734, "learning_rate": 8.567025276497739e-06, "loss": 0.7268, "step": 2564 }, { "epoch": 0.27, "grad_norm": 2.4974087371265776, "learning_rate": 8.565830821175563e-06, "loss": 0.7007, "step": 2565 }, { "epoch": 0.27, "grad_norm": 2.042712640061327, "learning_rate": 8.56463595158547e-06, "loss": 0.6754, "step": 2566 }, { "epoch": 0.27, "grad_norm": 3.414866039042617, "learning_rate": 8.563440667866278e-06, "loss": 0.5793, "step": 2567 }, { "epoch": 0.27, "grad_norm": 1.186808534700377, "learning_rate": 8.56224497015685e-06, "loss": 0.6083, "step": 2568 }, { "epoch": 0.27, "grad_norm": 2.7550982917448628, "learning_rate": 8.561048858596097e-06, "loss": 0.6927, "step": 2569 }, { "epoch": 0.27, "grad_norm": 3.307539577711527, "learning_rate": 8.559852333322982e-06, "loss": 0.7618, "step": 2570 }, { "epoch": 0.27, "grad_norm": 2.6597721789148605, "learning_rate": 8.558655394476513e-06, "loss": 0.7079, "step": 2571 }, { "epoch": 0.27, "grad_norm": 2.345267249284243, "learning_rate": 8.557458042195748e-06, "loss": 0.723, "step": 2572 }, { "epoch": 0.27, "grad_norm": 2.194152292832171, "learning_rate": 8.556260276619792e-06, "loss": 0.7222, "step": 2573 }, { "epoch": 0.27, "grad_norm": 2.8628566104318725, "learning_rate": 8.555062097887796e-06, "loss": 0.6131, "step": 2574 }, { "epoch": 0.27, "grad_norm": 2.2175177567930087, "learning_rate": 8.553863506138962e-06, "loss": 0.6847, "step": 2575 }, { "epoch": 0.27, "grad_norm": 5.19010534016988, "learning_rate": 8.55266450151254e-06, "loss": 0.6466, "step": 2576 }, { "epoch": 0.27, "grad_norm": 3.030066766241923, "learning_rate": 8.551465084147826e-06, "loss": 0.5981, "step": 2577 }, { "epoch": 0.27, "grad_norm": 2.5819351901307446, "learning_rate": 8.550265254184163e-06, "loss": 0.6906, "step": 2578 }, { "epoch": 0.27, "grad_norm": 2.837791982234303, "learning_rate": 8.549065011760948e-06, "loss": 0.6177, "step": 2579 }, { "epoch": 0.27, "grad_norm": 4.767739222282302, "learning_rate": 8.547864357017618e-06, "loss": 0.6715, "step": 2580 }, { "epoch": 0.27, "grad_norm": 2.6955870932338923, "learning_rate": 8.546663290093663e-06, "loss": 0.6986, "step": 2581 }, { "epoch": 0.27, "grad_norm": 2.542337333218461, "learning_rate": 8.545461811128618e-06, "loss": 0.7228, "step": 2582 }, { "epoch": 0.27, "grad_norm": 2.345404508684496, "learning_rate": 8.54425992026207e-06, "loss": 0.6988, "step": 2583 }, { "epoch": 0.27, "grad_norm": 3.046812475477053, "learning_rate": 8.54305761763365e-06, "loss": 0.6544, "step": 2584 }, { "epoch": 0.27, "grad_norm": 2.0768028881923986, "learning_rate": 8.541854903383038e-06, "loss": 0.6944, "step": 2585 }, { "epoch": 0.27, "grad_norm": 2.4296065875213158, "learning_rate": 8.54065177764996e-06, "loss": 0.6207, "step": 2586 }, { "epoch": 0.27, "grad_norm": 2.333406207555947, "learning_rate": 8.539448240574196e-06, "loss": 0.7403, "step": 2587 }, { "epoch": 0.27, "grad_norm": 3.2395561665415036, "learning_rate": 8.538244292295565e-06, "loss": 0.6861, "step": 2588 }, { "epoch": 0.27, "grad_norm": 1.1745677467987092, "learning_rate": 8.537039932953941e-06, "loss": 0.6158, "step": 2589 }, { "epoch": 0.27, "grad_norm": 2.7633198958097256, "learning_rate": 8.535835162689243e-06, "loss": 0.6364, "step": 2590 }, { "epoch": 0.27, "grad_norm": 2.5596406892643273, "learning_rate": 8.534629981641435e-06, "loss": 0.6835, "step": 2591 }, { "epoch": 0.27, "grad_norm": 2.182600890353231, "learning_rate": 8.533424389950534e-06, "loss": 0.574, "step": 2592 }, { "epoch": 0.27, "grad_norm": 2.6915694598896676, "learning_rate": 8.532218387756603e-06, "loss": 0.6927, "step": 2593 }, { "epoch": 0.27, "grad_norm": 3.6283644209612125, "learning_rate": 8.531011975199747e-06, "loss": 0.6385, "step": 2594 }, { "epoch": 0.27, "grad_norm": 2.9155113489734292, "learning_rate": 8.52980515242013e-06, "loss": 0.6983, "step": 2595 }, { "epoch": 0.27, "grad_norm": 8.980349217997206, "learning_rate": 8.528597919557953e-06, "loss": 0.7198, "step": 2596 }, { "epoch": 0.27, "grad_norm": 2.810695167253675, "learning_rate": 8.52739027675347e-06, "loss": 0.6109, "step": 2597 }, { "epoch": 0.27, "grad_norm": 2.9757821780269134, "learning_rate": 8.526182224146982e-06, "loss": 0.6589, "step": 2598 }, { "epoch": 0.27, "grad_norm": 3.5030227005870316, "learning_rate": 8.524973761878834e-06, "loss": 0.7574, "step": 2599 }, { "epoch": 0.27, "grad_norm": 1.9534355234534457, "learning_rate": 8.523764890089425e-06, "loss": 0.6311, "step": 2600 }, { "epoch": 0.27, "grad_norm": 2.2968695529272085, "learning_rate": 8.522555608919198e-06, "loss": 0.7159, "step": 2601 }, { "epoch": 0.27, "grad_norm": 2.5862520567000935, "learning_rate": 8.521345918508644e-06, "loss": 0.6494, "step": 2602 }, { "epoch": 0.27, "grad_norm": 2.2346246576533058, "learning_rate": 8.520135818998299e-06, "loss": 0.6798, "step": 2603 }, { "epoch": 0.27, "grad_norm": 2.3638719754838653, "learning_rate": 8.518925310528749e-06, "loss": 0.728, "step": 2604 }, { "epoch": 0.27, "grad_norm": 3.03918196825084, "learning_rate": 8.51771439324063e-06, "loss": 0.7459, "step": 2605 }, { "epoch": 0.27, "grad_norm": 2.3225257145217433, "learning_rate": 8.516503067274622e-06, "loss": 0.7274, "step": 2606 }, { "epoch": 0.27, "grad_norm": 2.186434199840289, "learning_rate": 8.515291332771452e-06, "loss": 0.6888, "step": 2607 }, { "epoch": 0.27, "grad_norm": 1.1134392839374618, "learning_rate": 8.514079189871898e-06, "loss": 0.6312, "step": 2608 }, { "epoch": 0.27, "grad_norm": 2.563735931453355, "learning_rate": 8.51286663871678e-06, "loss": 0.7038, "step": 2609 }, { "epoch": 0.27, "grad_norm": 7.940504117749469, "learning_rate": 8.511653679446972e-06, "loss": 0.7079, "step": 2610 }, { "epoch": 0.27, "grad_norm": 3.2533130654227143, "learning_rate": 8.51044031220339e-06, "loss": 0.6543, "step": 2611 }, { "epoch": 0.27, "grad_norm": 3.8589646098695116, "learning_rate": 8.509226537127e-06, "loss": 0.6759, "step": 2612 }, { "epoch": 0.27, "grad_norm": 3.5253019336589153, "learning_rate": 8.508012354358815e-06, "loss": 0.5805, "step": 2613 }, { "epoch": 0.28, "grad_norm": 3.1367218095988973, "learning_rate": 8.506797764039895e-06, "loss": 0.6941, "step": 2614 }, { "epoch": 0.28, "grad_norm": 3.0713095113351727, "learning_rate": 8.505582766311349e-06, "loss": 0.6673, "step": 2615 }, { "epoch": 0.28, "grad_norm": 2.312239589857587, "learning_rate": 8.504367361314329e-06, "loss": 0.6638, "step": 2616 }, { "epoch": 0.28, "grad_norm": 2.8300865651331475, "learning_rate": 8.50315154919004e-06, "loss": 0.7389, "step": 2617 }, { "epoch": 0.28, "grad_norm": 2.5470160475934462, "learning_rate": 8.501935330079732e-06, "loss": 0.7534, "step": 2618 }, { "epoch": 0.28, "grad_norm": 3.316557953808444, "learning_rate": 8.5007187041247e-06, "loss": 0.7406, "step": 2619 }, { "epoch": 0.28, "grad_norm": 2.146381681148758, "learning_rate": 8.499501671466287e-06, "loss": 0.663, "step": 2620 }, { "epoch": 0.28, "grad_norm": 2.7424691680667523, "learning_rate": 8.498284232245888e-06, "loss": 0.6684, "step": 2621 }, { "epoch": 0.28, "grad_norm": 2.812793415389665, "learning_rate": 8.497066386604937e-06, "loss": 0.7148, "step": 2622 }, { "epoch": 0.28, "grad_norm": 2.4179027330742247, "learning_rate": 8.495848134684924e-06, "loss": 0.5672, "step": 2623 }, { "epoch": 0.28, "grad_norm": 2.9317748456252173, "learning_rate": 8.494629476627378e-06, "loss": 0.6196, "step": 2624 }, { "epoch": 0.28, "grad_norm": 3.8552043750504303, "learning_rate": 8.493410412573883e-06, "loss": 0.76, "step": 2625 }, { "epoch": 0.28, "grad_norm": 2.1671342068280426, "learning_rate": 8.492190942666065e-06, "loss": 0.7228, "step": 2626 }, { "epoch": 0.28, "grad_norm": 3.4361650708105707, "learning_rate": 8.490971067045596e-06, "loss": 0.7221, "step": 2627 }, { "epoch": 0.28, "grad_norm": 2.6358964795479687, "learning_rate": 8.489750785854203e-06, "loss": 0.6363, "step": 2628 }, { "epoch": 0.28, "grad_norm": 3.1358464929580174, "learning_rate": 8.48853009923365e-06, "loss": 0.7216, "step": 2629 }, { "epoch": 0.28, "grad_norm": 2.3541129682126383, "learning_rate": 8.487309007325755e-06, "loss": 0.5821, "step": 2630 }, { "epoch": 0.28, "grad_norm": 2.420390390909207, "learning_rate": 8.48608751027238e-06, "loss": 0.6743, "step": 2631 }, { "epoch": 0.28, "grad_norm": 2.4451760999988363, "learning_rate": 8.484865608215435e-06, "loss": 0.6701, "step": 2632 }, { "epoch": 0.28, "grad_norm": 3.4307024526167944, "learning_rate": 8.483643301296877e-06, "loss": 0.7317, "step": 2633 }, { "epoch": 0.28, "grad_norm": 2.2798188313827032, "learning_rate": 8.482420589658712e-06, "loss": 0.6603, "step": 2634 }, { "epoch": 0.28, "grad_norm": 2.7920937123015785, "learning_rate": 8.481197473442989e-06, "loss": 0.7351, "step": 2635 }, { "epoch": 0.28, "grad_norm": 3.1650536164648404, "learning_rate": 8.479973952791805e-06, "loss": 0.747, "step": 2636 }, { "epoch": 0.28, "grad_norm": 2.4408149875447256, "learning_rate": 8.478750027847308e-06, "loss": 0.7117, "step": 2637 }, { "epoch": 0.28, "grad_norm": 2.5845290741492817, "learning_rate": 8.477525698751688e-06, "loss": 0.6305, "step": 2638 }, { "epoch": 0.28, "grad_norm": 2.311318523859558, "learning_rate": 8.476300965647186e-06, "loss": 0.6609, "step": 2639 }, { "epoch": 0.28, "grad_norm": 3.0266589746186376, "learning_rate": 8.475075828676086e-06, "loss": 0.663, "step": 2640 }, { "epoch": 0.28, "grad_norm": 2.8468358467655803, "learning_rate": 8.473850287980721e-06, "loss": 0.6316, "step": 2641 }, { "epoch": 0.28, "grad_norm": 3.1150563470392996, "learning_rate": 8.472624343703473e-06, "loss": 0.713, "step": 2642 }, { "epoch": 0.28, "grad_norm": 3.3018689988839918, "learning_rate": 8.471397995986766e-06, "loss": 0.5945, "step": 2643 }, { "epoch": 0.28, "grad_norm": 2.299828029802301, "learning_rate": 8.470171244973075e-06, "loss": 0.6362, "step": 2644 }, { "epoch": 0.28, "grad_norm": 2.400265553940191, "learning_rate": 8.46894409080492e-06, "loss": 0.7142, "step": 2645 }, { "epoch": 0.28, "grad_norm": 2.3965587472793275, "learning_rate": 8.467716533624869e-06, "loss": 0.6693, "step": 2646 }, { "epoch": 0.28, "grad_norm": 3.1424468064421527, "learning_rate": 8.466488573575536e-06, "loss": 0.6621, "step": 2647 }, { "epoch": 0.28, "grad_norm": 4.058208255714223, "learning_rate": 8.465260210799579e-06, "loss": 0.5876, "step": 2648 }, { "epoch": 0.28, "grad_norm": 5.051342205579871, "learning_rate": 8.464031445439708e-06, "loss": 0.6876, "step": 2649 }, { "epoch": 0.28, "grad_norm": 2.435428283210832, "learning_rate": 8.462802277638677e-06, "loss": 0.7141, "step": 2650 }, { "epoch": 0.28, "grad_norm": 2.2536304400179357, "learning_rate": 8.461572707539288e-06, "loss": 0.7239, "step": 2651 }, { "epoch": 0.28, "grad_norm": 2.2033188343885404, "learning_rate": 8.460342735284388e-06, "loss": 0.5988, "step": 2652 }, { "epoch": 0.28, "grad_norm": 2.6488901006220136, "learning_rate": 8.459112361016873e-06, "loss": 0.7123, "step": 2653 }, { "epoch": 0.28, "grad_norm": 2.454166766453256, "learning_rate": 8.457881584879681e-06, "loss": 0.7274, "step": 2654 }, { "epoch": 0.28, "grad_norm": 2.338529320134492, "learning_rate": 8.456650407015804e-06, "loss": 0.7723, "step": 2655 }, { "epoch": 0.28, "grad_norm": 2.7241780151773933, "learning_rate": 8.455418827568275e-06, "loss": 0.619, "step": 2656 }, { "epoch": 0.28, "grad_norm": 2.2430549835972053, "learning_rate": 8.454186846680174e-06, "loss": 0.6374, "step": 2657 }, { "epoch": 0.28, "grad_norm": 3.090373805102784, "learning_rate": 8.452954464494631e-06, "loss": 0.7013, "step": 2658 }, { "epoch": 0.28, "grad_norm": 2.50997857984063, "learning_rate": 8.451721681154819e-06, "loss": 0.6987, "step": 2659 }, { "epoch": 0.28, "grad_norm": 2.318114958320992, "learning_rate": 8.45048849680396e-06, "loss": 0.7233, "step": 2660 }, { "epoch": 0.28, "grad_norm": 1.2346055238002462, "learning_rate": 8.449254911585323e-06, "loss": 0.6527, "step": 2661 }, { "epoch": 0.28, "grad_norm": 2.293606280499452, "learning_rate": 8.44802092564222e-06, "loss": 0.6759, "step": 2662 }, { "epoch": 0.28, "grad_norm": 1.206662259361233, "learning_rate": 8.446786539118014e-06, "loss": 0.6139, "step": 2663 }, { "epoch": 0.28, "grad_norm": 12.24962776644313, "learning_rate": 8.445551752156111e-06, "loss": 0.6434, "step": 2664 }, { "epoch": 0.28, "grad_norm": 3.2212865543394447, "learning_rate": 8.444316564899966e-06, "loss": 0.6465, "step": 2665 }, { "epoch": 0.28, "grad_norm": 2.714117520697914, "learning_rate": 8.443080977493078e-06, "loss": 0.5998, "step": 2666 }, { "epoch": 0.28, "grad_norm": 3.336735248734921, "learning_rate": 8.441844990078995e-06, "loss": 0.7573, "step": 2667 }, { "epoch": 0.28, "grad_norm": 2.403703415234515, "learning_rate": 8.44060860280131e-06, "loss": 0.6043, "step": 2668 }, { "epoch": 0.28, "grad_norm": 2.382422434010672, "learning_rate": 8.439371815803666e-06, "loss": 0.7587, "step": 2669 }, { "epoch": 0.28, "grad_norm": 3.0968077866837533, "learning_rate": 8.438134629229746e-06, "loss": 0.6986, "step": 2670 }, { "epoch": 0.28, "grad_norm": 2.43034993732329, "learning_rate": 8.436897043223282e-06, "loss": 0.5956, "step": 2671 }, { "epoch": 0.28, "grad_norm": 2.396150179364192, "learning_rate": 8.435659057928054e-06, "loss": 0.6747, "step": 2672 }, { "epoch": 0.28, "grad_norm": 2.7277764541975973, "learning_rate": 8.434420673487888e-06, "loss": 0.6954, "step": 2673 }, { "epoch": 0.28, "grad_norm": 2.217561657414955, "learning_rate": 8.433181890046658e-06, "loss": 0.6458, "step": 2674 }, { "epoch": 0.28, "grad_norm": 9.236257143877165, "learning_rate": 8.431942707748279e-06, "loss": 0.6915, "step": 2675 }, { "epoch": 0.28, "grad_norm": 3.0699718693696636, "learning_rate": 8.430703126736717e-06, "loss": 0.6652, "step": 2676 }, { "epoch": 0.28, "grad_norm": 3.175699865000509, "learning_rate": 8.429463147155984e-06, "loss": 0.772, "step": 2677 }, { "epoch": 0.28, "grad_norm": 2.7545634656991043, "learning_rate": 8.428222769150137e-06, "loss": 0.6886, "step": 2678 }, { "epoch": 0.28, "grad_norm": 2.448219091164623, "learning_rate": 8.426981992863276e-06, "loss": 0.6719, "step": 2679 }, { "epoch": 0.28, "grad_norm": 2.2382890240649553, "learning_rate": 8.425740818439553e-06, "loss": 0.7212, "step": 2680 }, { "epoch": 0.28, "grad_norm": 2.483770641222447, "learning_rate": 8.424499246023168e-06, "loss": 0.7427, "step": 2681 }, { "epoch": 0.28, "grad_norm": 3.0361221580953, "learning_rate": 8.42325727575836e-06, "loss": 0.7023, "step": 2682 }, { "epoch": 0.28, "grad_norm": 2.1519597344947137, "learning_rate": 8.422014907789413e-06, "loss": 0.7392, "step": 2683 }, { "epoch": 0.28, "grad_norm": 2.1345686803799038, "learning_rate": 8.420772142260667e-06, "loss": 0.6333, "step": 2684 }, { "epoch": 0.28, "grad_norm": 2.287284430544546, "learning_rate": 8.419528979316505e-06, "loss": 0.7573, "step": 2685 }, { "epoch": 0.28, "grad_norm": 2.4381162142757145, "learning_rate": 8.41828541910135e-06, "loss": 0.7724, "step": 2686 }, { "epoch": 0.28, "grad_norm": 2.8332535794197633, "learning_rate": 8.417041461759674e-06, "loss": 0.6979, "step": 2687 }, { "epoch": 0.28, "grad_norm": 1.1728298056179722, "learning_rate": 8.415797107436e-06, "loss": 0.627, "step": 2688 }, { "epoch": 0.28, "grad_norm": 1.1601804909844005, "learning_rate": 8.414552356274891e-06, "loss": 0.6739, "step": 2689 }, { "epoch": 0.28, "grad_norm": 2.679136900723749, "learning_rate": 8.413307208420963e-06, "loss": 0.7404, "step": 2690 }, { "epoch": 0.28, "grad_norm": 2.5824680380348557, "learning_rate": 8.412061664018869e-06, "loss": 0.745, "step": 2691 }, { "epoch": 0.28, "grad_norm": 3.209895645078567, "learning_rate": 8.410815723213312e-06, "loss": 0.6478, "step": 2692 }, { "epoch": 0.28, "grad_norm": 2.1856854173402795, "learning_rate": 8.409569386149046e-06, "loss": 0.6822, "step": 2693 }, { "epoch": 0.28, "grad_norm": 2.3084967091893653, "learning_rate": 8.408322652970866e-06, "loss": 0.7768, "step": 2694 }, { "epoch": 0.28, "grad_norm": 3.9063292951795425, "learning_rate": 8.40707552382361e-06, "loss": 0.6608, "step": 2695 }, { "epoch": 0.28, "grad_norm": 2.051324396490213, "learning_rate": 8.40582799885217e-06, "loss": 0.5923, "step": 2696 }, { "epoch": 0.28, "grad_norm": 2.765515717742255, "learning_rate": 8.404580078201476e-06, "loss": 0.7872, "step": 2697 }, { "epoch": 0.28, "grad_norm": 3.1789517487510066, "learning_rate": 8.403331762016514e-06, "loss": 0.6043, "step": 2698 }, { "epoch": 0.28, "grad_norm": 2.2655735243519515, "learning_rate": 8.402083050442302e-06, "loss": 0.5652, "step": 2699 }, { "epoch": 0.28, "grad_norm": 2.2602508493232687, "learning_rate": 8.400833943623919e-06, "loss": 0.7123, "step": 2700 }, { "epoch": 0.28, "grad_norm": 2.5454383180770535, "learning_rate": 8.399584441706477e-06, "loss": 0.699, "step": 2701 }, { "epoch": 0.28, "grad_norm": 2.2564015333342606, "learning_rate": 8.398334544835143e-06, "loss": 0.6204, "step": 2702 }, { "epoch": 0.28, "grad_norm": 2.418647080107335, "learning_rate": 8.397084253155125e-06, "loss": 0.6295, "step": 2703 }, { "epoch": 0.28, "grad_norm": 3.25654659367241, "learning_rate": 8.395833566811677e-06, "loss": 0.692, "step": 2704 }, { "epoch": 0.28, "grad_norm": 2.666744974325964, "learning_rate": 8.394582485950103e-06, "loss": 0.6038, "step": 2705 }, { "epoch": 0.28, "grad_norm": 2.4348825850755493, "learning_rate": 8.393331010715749e-06, "loss": 0.6997, "step": 2706 }, { "epoch": 0.28, "grad_norm": 2.0592895390345376, "learning_rate": 8.392079141254006e-06, "loss": 0.6366, "step": 2707 }, { "epoch": 0.28, "grad_norm": 2.0843302668304746, "learning_rate": 8.390826877710314e-06, "loss": 0.595, "step": 2708 }, { "epoch": 0.29, "grad_norm": 2.693813971402732, "learning_rate": 8.38957422023016e-06, "loss": 0.6939, "step": 2709 }, { "epoch": 0.29, "grad_norm": 2.4215728777192447, "learning_rate": 8.388321168959068e-06, "loss": 0.7119, "step": 2710 }, { "epoch": 0.29, "grad_norm": 2.4275301307354837, "learning_rate": 8.387067724042618e-06, "loss": 0.6875, "step": 2711 }, { "epoch": 0.29, "grad_norm": 6.524758976644784, "learning_rate": 8.38581388562643e-06, "loss": 0.7328, "step": 2712 }, { "epoch": 0.29, "grad_norm": 2.529001789661384, "learning_rate": 8.384559653856174e-06, "loss": 0.6467, "step": 2713 }, { "epoch": 0.29, "grad_norm": 2.339231251838686, "learning_rate": 8.383305028877559e-06, "loss": 0.6954, "step": 2714 }, { "epoch": 0.29, "grad_norm": 2.422685898720981, "learning_rate": 8.382050010836349e-06, "loss": 0.7583, "step": 2715 }, { "epoch": 0.29, "grad_norm": 5.610823185514183, "learning_rate": 8.380794599878343e-06, "loss": 0.67, "step": 2716 }, { "epoch": 0.29, "grad_norm": 3.642245922603603, "learning_rate": 8.379538796149395e-06, "loss": 0.6711, "step": 2717 }, { "epoch": 0.29, "grad_norm": 2.3135555055352635, "learning_rate": 8.378282599795397e-06, "loss": 0.6705, "step": 2718 }, { "epoch": 0.29, "grad_norm": 2.447841830239155, "learning_rate": 8.377026010962293e-06, "loss": 0.6548, "step": 2719 }, { "epoch": 0.29, "grad_norm": 2.349506243780416, "learning_rate": 8.375769029796068e-06, "loss": 0.6988, "step": 2720 }, { "epoch": 0.29, "grad_norm": 3.4916280933140373, "learning_rate": 8.374511656442756e-06, "loss": 0.6912, "step": 2721 }, { "epoch": 0.29, "grad_norm": 2.008729698463561, "learning_rate": 8.373253891048436e-06, "loss": 0.6498, "step": 2722 }, { "epoch": 0.29, "grad_norm": 2.504995655860519, "learning_rate": 8.371995733759228e-06, "loss": 0.6303, "step": 2723 }, { "epoch": 0.29, "grad_norm": 4.465351579775575, "learning_rate": 8.370737184721305e-06, "loss": 0.6931, "step": 2724 }, { "epoch": 0.29, "grad_norm": 2.786999536883786, "learning_rate": 8.369478244080878e-06, "loss": 0.6444, "step": 2725 }, { "epoch": 0.29, "grad_norm": 3.089894612238134, "learning_rate": 8.368218911984211e-06, "loss": 0.7627, "step": 2726 }, { "epoch": 0.29, "grad_norm": 2.929159775372233, "learning_rate": 8.366959188577606e-06, "loss": 0.6889, "step": 2727 }, { "epoch": 0.29, "grad_norm": 5.440489695426411, "learning_rate": 8.365699074007416e-06, "loss": 0.6793, "step": 2728 }, { "epoch": 0.29, "grad_norm": 3.058496978919441, "learning_rate": 8.364438568420034e-06, "loss": 0.6912, "step": 2729 }, { "epoch": 0.29, "grad_norm": 2.4524683201064064, "learning_rate": 8.363177671961908e-06, "loss": 0.6447, "step": 2730 }, { "epoch": 0.29, "grad_norm": 2.0795075403169156, "learning_rate": 8.36191638477952e-06, "loss": 0.6589, "step": 2731 }, { "epoch": 0.29, "grad_norm": 1.9902950430871291, "learning_rate": 8.360654707019406e-06, "loss": 0.6756, "step": 2732 }, { "epoch": 0.29, "grad_norm": 2.568567286867176, "learning_rate": 8.359392638828142e-06, "loss": 0.7202, "step": 2733 }, { "epoch": 0.29, "grad_norm": 2.7851953645089007, "learning_rate": 8.358130180352353e-06, "loss": 0.6707, "step": 2734 }, { "epoch": 0.29, "grad_norm": 2.7057531159089776, "learning_rate": 8.356867331738706e-06, "loss": 0.6586, "step": 2735 }, { "epoch": 0.29, "grad_norm": 3.2347571171882237, "learning_rate": 8.355604093133916e-06, "loss": 0.6705, "step": 2736 }, { "epoch": 0.29, "grad_norm": 2.625192631188438, "learning_rate": 8.354340464684745e-06, "loss": 0.7096, "step": 2737 }, { "epoch": 0.29, "grad_norm": 2.7137203886388295, "learning_rate": 8.353076446537993e-06, "loss": 0.6789, "step": 2738 }, { "epoch": 0.29, "grad_norm": 3.1425928647546533, "learning_rate": 8.351812038840513e-06, "loss": 0.6174, "step": 2739 }, { "epoch": 0.29, "grad_norm": 3.1199663116285286, "learning_rate": 8.3505472417392e-06, "loss": 0.6875, "step": 2740 }, { "epoch": 0.29, "grad_norm": 2.3558771465349433, "learning_rate": 8.349282055380992e-06, "loss": 0.6542, "step": 2741 }, { "epoch": 0.29, "grad_norm": 2.9753417802732893, "learning_rate": 8.348016479912877e-06, "loss": 0.6566, "step": 2742 }, { "epoch": 0.29, "grad_norm": 2.5685130835209935, "learning_rate": 8.346750515481888e-06, "loss": 0.7154, "step": 2743 }, { "epoch": 0.29, "grad_norm": 3.1983652825774427, "learning_rate": 8.345484162235096e-06, "loss": 0.6878, "step": 2744 }, { "epoch": 0.29, "grad_norm": 8.279656071212713, "learning_rate": 8.344217420319624e-06, "loss": 0.6388, "step": 2745 }, { "epoch": 0.29, "grad_norm": 2.6025787077693563, "learning_rate": 8.342950289882641e-06, "loss": 0.6941, "step": 2746 }, { "epoch": 0.29, "grad_norm": 2.614210374161948, "learning_rate": 8.341682771071357e-06, "loss": 0.6266, "step": 2747 }, { "epoch": 0.29, "grad_norm": 2.1048003157133306, "learning_rate": 8.340414864033028e-06, "loss": 0.6507, "step": 2748 }, { "epoch": 0.29, "grad_norm": 3.1566816578883072, "learning_rate": 8.339146568914958e-06, "loss": 0.7176, "step": 2749 }, { "epoch": 0.29, "grad_norm": 2.6545077340607643, "learning_rate": 8.337877885864489e-06, "loss": 0.7292, "step": 2750 }, { "epoch": 0.29, "grad_norm": 3.9130923410553775, "learning_rate": 8.336608815029018e-06, "loss": 0.7339, "step": 2751 }, { "epoch": 0.29, "grad_norm": 3.184098187059293, "learning_rate": 8.335339356555981e-06, "loss": 0.6965, "step": 2752 }, { "epoch": 0.29, "grad_norm": 2.14723331071444, "learning_rate": 8.334069510592857e-06, "loss": 0.5925, "step": 2753 }, { "epoch": 0.29, "grad_norm": 2.9405681252036464, "learning_rate": 8.332799277287175e-06, "loss": 0.6977, "step": 2754 }, { "epoch": 0.29, "grad_norm": 2.3880455307462882, "learning_rate": 8.331528656786508e-06, "loss": 0.6346, "step": 2755 }, { "epoch": 0.29, "grad_norm": 2.322406078061077, "learning_rate": 8.330257649238472e-06, "loss": 0.7288, "step": 2756 }, { "epoch": 0.29, "grad_norm": 2.1014011587142933, "learning_rate": 8.328986254790729e-06, "loss": 0.6754, "step": 2757 }, { "epoch": 0.29, "grad_norm": 1.3514936631244956, "learning_rate": 8.327714473590986e-06, "loss": 0.646, "step": 2758 }, { "epoch": 0.29, "grad_norm": 2.299770815204609, "learning_rate": 8.326442305786995e-06, "loss": 0.6682, "step": 2759 }, { "epoch": 0.29, "grad_norm": 2.4432174028048284, "learning_rate": 8.325169751526552e-06, "loss": 0.7419, "step": 2760 }, { "epoch": 0.29, "grad_norm": 2.7511522431665765, "learning_rate": 8.323896810957501e-06, "loss": 0.5777, "step": 2761 }, { "epoch": 0.29, "grad_norm": 2.7500144099538226, "learning_rate": 8.322623484227725e-06, "loss": 0.6688, "step": 2762 }, { "epoch": 0.29, "grad_norm": 2.442208106975574, "learning_rate": 8.321349771485159e-06, "loss": 0.7288, "step": 2763 }, { "epoch": 0.29, "grad_norm": 2.4073668296629323, "learning_rate": 8.320075672877776e-06, "loss": 0.6964, "step": 2764 }, { "epoch": 0.29, "grad_norm": 2.973204687169632, "learning_rate": 8.3188011885536e-06, "loss": 0.665, "step": 2765 }, { "epoch": 0.29, "grad_norm": 1.9341022002413333, "learning_rate": 8.317526318660695e-06, "loss": 0.6552, "step": 2766 }, { "epoch": 0.29, "grad_norm": 2.41457288085297, "learning_rate": 8.316251063347175e-06, "loss": 0.7097, "step": 2767 }, { "epoch": 0.29, "grad_norm": 2.5800526852961263, "learning_rate": 8.314975422761187e-06, "loss": 0.5778, "step": 2768 }, { "epoch": 0.29, "grad_norm": 2.752826125090857, "learning_rate": 8.313699397050941e-06, "loss": 0.6198, "step": 2769 }, { "epoch": 0.29, "grad_norm": 2.3181883296463717, "learning_rate": 8.312422986364677e-06, "loss": 0.546, "step": 2770 }, { "epoch": 0.29, "grad_norm": 2.3923531457438534, "learning_rate": 8.311146190850687e-06, "loss": 0.7019, "step": 2771 }, { "epoch": 0.29, "grad_norm": 2.57435657748528, "learning_rate": 8.309869010657303e-06, "loss": 0.6215, "step": 2772 }, { "epoch": 0.29, "grad_norm": 1.9661259285016583, "learning_rate": 8.308591445932905e-06, "loss": 0.6052, "step": 2773 }, { "epoch": 0.29, "grad_norm": 2.1300824586691043, "learning_rate": 8.307313496825918e-06, "loss": 0.6245, "step": 2774 }, { "epoch": 0.29, "grad_norm": 2.7492441683809816, "learning_rate": 8.306035163484806e-06, "loss": 0.6852, "step": 2775 }, { "epoch": 0.29, "grad_norm": 2.4951222437462484, "learning_rate": 8.30475644605809e-06, "loss": 0.6103, "step": 2776 }, { "epoch": 0.29, "grad_norm": 2.2323509496392493, "learning_rate": 8.30347734469432e-06, "loss": 0.6149, "step": 2777 }, { "epoch": 0.29, "grad_norm": 3.702733593456729, "learning_rate": 8.302197859542104e-06, "loss": 0.6772, "step": 2778 }, { "epoch": 0.29, "grad_norm": 2.029812775065599, "learning_rate": 8.300917990750085e-06, "loss": 0.669, "step": 2779 }, { "epoch": 0.29, "grad_norm": 2.436939042536463, "learning_rate": 8.299637738466956e-06, "loss": 0.6703, "step": 2780 }, { "epoch": 0.29, "grad_norm": 7.064859457627149, "learning_rate": 8.298357102841452e-06, "loss": 0.6941, "step": 2781 }, { "epoch": 0.29, "grad_norm": 2.290933554699684, "learning_rate": 8.297076084022355e-06, "loss": 0.585, "step": 2782 }, { "epoch": 0.29, "grad_norm": 2.7292159300881327, "learning_rate": 8.29579468215849e-06, "loss": 0.6463, "step": 2783 }, { "epoch": 0.29, "grad_norm": 2.4076744166382755, "learning_rate": 8.294512897398725e-06, "loss": 0.6891, "step": 2784 }, { "epoch": 0.29, "grad_norm": 2.400183386823079, "learning_rate": 8.293230729891976e-06, "loss": 0.6797, "step": 2785 }, { "epoch": 0.29, "grad_norm": 2.5300566156875215, "learning_rate": 8.2919481797872e-06, "loss": 0.6729, "step": 2786 }, { "epoch": 0.29, "grad_norm": 2.504139135974389, "learning_rate": 8.2906652472334e-06, "loss": 0.7771, "step": 2787 }, { "epoch": 0.29, "grad_norm": 2.4033755762522127, "learning_rate": 8.289381932379625e-06, "loss": 0.7255, "step": 2788 }, { "epoch": 0.29, "grad_norm": 2.130391692034721, "learning_rate": 8.288098235374966e-06, "loss": 0.6504, "step": 2789 }, { "epoch": 0.29, "grad_norm": 2.3119004788902524, "learning_rate": 8.286814156368559e-06, "loss": 0.7532, "step": 2790 }, { "epoch": 0.29, "grad_norm": 12.473032165918204, "learning_rate": 8.285529695509585e-06, "loss": 0.7055, "step": 2791 }, { "epoch": 0.29, "grad_norm": 2.874206287893155, "learning_rate": 8.284244852947265e-06, "loss": 0.7682, "step": 2792 }, { "epoch": 0.29, "grad_norm": 2.3354999888656627, "learning_rate": 8.282959628830875e-06, "loss": 0.7107, "step": 2793 }, { "epoch": 0.29, "grad_norm": 2.6099032292361617, "learning_rate": 8.281674023309725e-06, "loss": 0.6281, "step": 2794 }, { "epoch": 0.29, "grad_norm": 2.232202918075071, "learning_rate": 8.280388036533171e-06, "loss": 0.6399, "step": 2795 }, { "epoch": 0.29, "grad_norm": 2.491569330119017, "learning_rate": 8.27910166865062e-06, "loss": 0.626, "step": 2796 }, { "epoch": 0.29, "grad_norm": 2.2273954138081073, "learning_rate": 8.277814919811516e-06, "loss": 0.6008, "step": 2797 }, { "epoch": 0.29, "grad_norm": 2.2945544456627385, "learning_rate": 8.276527790165349e-06, "loss": 0.6461, "step": 2798 }, { "epoch": 0.29, "grad_norm": 2.277058416467495, "learning_rate": 8.275240279861655e-06, "loss": 0.69, "step": 2799 }, { "epoch": 0.29, "grad_norm": 2.2731253809981653, "learning_rate": 8.273952389050015e-06, "loss": 0.6764, "step": 2800 }, { "epoch": 0.29, "grad_norm": 2.1917315114226636, "learning_rate": 8.272664117880047e-06, "loss": 0.6476, "step": 2801 }, { "epoch": 0.29, "grad_norm": 2.25326375571558, "learning_rate": 8.271375466501424e-06, "loss": 0.7102, "step": 2802 }, { "epoch": 0.29, "grad_norm": 2.302993394481045, "learning_rate": 8.270086435063856e-06, "loss": 0.6434, "step": 2803 }, { "epoch": 0.3, "grad_norm": 2.7166993208874195, "learning_rate": 8.268797023717098e-06, "loss": 0.6369, "step": 2804 }, { "epoch": 0.3, "grad_norm": 2.2576333540166673, "learning_rate": 8.267507232610952e-06, "loss": 0.7091, "step": 2805 }, { "epoch": 0.3, "grad_norm": 2.834052961933635, "learning_rate": 8.26621706189526e-06, "loss": 0.6522, "step": 2806 }, { "epoch": 0.3, "grad_norm": 2.4814364257476966, "learning_rate": 8.264926511719912e-06, "loss": 0.6399, "step": 2807 }, { "epoch": 0.3, "grad_norm": 2.8842459517526953, "learning_rate": 8.26363558223484e-06, "loss": 0.7116, "step": 2808 }, { "epoch": 0.3, "grad_norm": 1.2082080106969904, "learning_rate": 8.26234427359002e-06, "loss": 0.6688, "step": 2809 }, { "epoch": 0.3, "grad_norm": 2.234644633018572, "learning_rate": 8.261052585935471e-06, "loss": 0.6346, "step": 2810 }, { "epoch": 0.3, "grad_norm": 2.4322865695650178, "learning_rate": 8.259760519421263e-06, "loss": 0.7054, "step": 2811 }, { "epoch": 0.3, "grad_norm": 2.6564893401792675, "learning_rate": 8.258468074197499e-06, "loss": 0.6955, "step": 2812 }, { "epoch": 0.3, "grad_norm": 2.266036275854144, "learning_rate": 8.257175250414333e-06, "loss": 0.7159, "step": 2813 }, { "epoch": 0.3, "grad_norm": 2.762553186968921, "learning_rate": 8.255882048221961e-06, "loss": 0.7056, "step": 2814 }, { "epoch": 0.3, "grad_norm": 4.139967136595774, "learning_rate": 8.254588467770628e-06, "loss": 0.707, "step": 2815 }, { "epoch": 0.3, "grad_norm": 3.9568494330619317, "learning_rate": 8.253294509210612e-06, "loss": 0.6702, "step": 2816 }, { "epoch": 0.3, "grad_norm": 3.031085393262171, "learning_rate": 8.252000172692244e-06, "loss": 0.6967, "step": 2817 }, { "epoch": 0.3, "grad_norm": 3.3355616466171094, "learning_rate": 8.250705458365897e-06, "loss": 0.7047, "step": 2818 }, { "epoch": 0.3, "grad_norm": 2.3126709857674936, "learning_rate": 8.249410366381987e-06, "loss": 0.6727, "step": 2819 }, { "epoch": 0.3, "grad_norm": 2.857054433557827, "learning_rate": 8.248114896890975e-06, "loss": 0.6336, "step": 2820 }, { "epoch": 0.3, "grad_norm": 3.0926046214596106, "learning_rate": 8.246819050043363e-06, "loss": 0.6691, "step": 2821 }, { "epoch": 0.3, "grad_norm": 2.618242351375955, "learning_rate": 8.245522825989697e-06, "loss": 0.6733, "step": 2822 }, { "epoch": 0.3, "grad_norm": 2.7640385714230673, "learning_rate": 8.244226224880574e-06, "loss": 0.6313, "step": 2823 }, { "epoch": 0.3, "grad_norm": 2.5354134106997948, "learning_rate": 8.242929246866624e-06, "loss": 0.7069, "step": 2824 }, { "epoch": 0.3, "grad_norm": 2.933933284256052, "learning_rate": 8.24163189209853e-06, "loss": 0.749, "step": 2825 }, { "epoch": 0.3, "grad_norm": 2.8633014033698627, "learning_rate": 8.240334160727013e-06, "loss": 0.6782, "step": 2826 }, { "epoch": 0.3, "grad_norm": 2.7941442636390543, "learning_rate": 8.23903605290284e-06, "loss": 0.6433, "step": 2827 }, { "epoch": 0.3, "grad_norm": 2.3734190039511978, "learning_rate": 8.23773756877682e-06, "loss": 0.6341, "step": 2828 }, { "epoch": 0.3, "grad_norm": 4.306290887870174, "learning_rate": 8.236438708499811e-06, "loss": 0.7399, "step": 2829 }, { "epoch": 0.3, "grad_norm": 3.327313393460941, "learning_rate": 8.235139472222708e-06, "loss": 0.6106, "step": 2830 }, { "epoch": 0.3, "grad_norm": 2.842091527633625, "learning_rate": 8.233839860096453e-06, "loss": 0.6594, "step": 2831 }, { "epoch": 0.3, "grad_norm": 2.427751709970699, "learning_rate": 8.23253987227203e-06, "loss": 0.757, "step": 2832 }, { "epoch": 0.3, "grad_norm": 4.827636901802302, "learning_rate": 8.23123950890047e-06, "loss": 0.7158, "step": 2833 }, { "epoch": 0.3, "grad_norm": 2.310647756260461, "learning_rate": 8.229938770132843e-06, "loss": 0.6142, "step": 2834 }, { "epoch": 0.3, "grad_norm": 3.439737457921449, "learning_rate": 8.228637656120268e-06, "loss": 0.6859, "step": 2835 }, { "epoch": 0.3, "grad_norm": 3.465299362478447, "learning_rate": 8.227336167013901e-06, "loss": 0.619, "step": 2836 }, { "epoch": 0.3, "grad_norm": 5.161811987054101, "learning_rate": 8.22603430296495e-06, "loss": 0.6885, "step": 2837 }, { "epoch": 0.3, "grad_norm": 13.016909893025721, "learning_rate": 8.224732064124658e-06, "loss": 0.7578, "step": 2838 }, { "epoch": 0.3, "grad_norm": 3.29770830365415, "learning_rate": 8.223429450644317e-06, "loss": 0.5983, "step": 2839 }, { "epoch": 0.3, "grad_norm": 2.74582961488932, "learning_rate": 8.222126462675259e-06, "loss": 0.703, "step": 2840 }, { "epoch": 0.3, "grad_norm": 2.839825341685874, "learning_rate": 8.220823100368865e-06, "loss": 0.7577, "step": 2841 }, { "epoch": 0.3, "grad_norm": 2.151612849557347, "learning_rate": 8.219519363876552e-06, "loss": 0.7437, "step": 2842 }, { "epoch": 0.3, "grad_norm": 2.2813563053260157, "learning_rate": 8.218215253349785e-06, "loss": 0.7145, "step": 2843 }, { "epoch": 0.3, "grad_norm": 2.8740576992118227, "learning_rate": 8.216910768940075e-06, "loss": 0.6663, "step": 2844 }, { "epoch": 0.3, "grad_norm": 2.3832922463971293, "learning_rate": 8.215605910798972e-06, "loss": 0.6398, "step": 2845 }, { "epoch": 0.3, "grad_norm": 2.087398052989275, "learning_rate": 8.21430067907807e-06, "loss": 0.6233, "step": 2846 }, { "epoch": 0.3, "grad_norm": 3.1121220094877007, "learning_rate": 8.212995073929002e-06, "loss": 0.7069, "step": 2847 }, { "epoch": 0.3, "grad_norm": 1.1733089591500396, "learning_rate": 8.211689095503457e-06, "loss": 0.6373, "step": 2848 }, { "epoch": 0.3, "grad_norm": 2.53436764490174, "learning_rate": 8.210382743953159e-06, "loss": 0.6675, "step": 2849 }, { "epoch": 0.3, "grad_norm": 2.9962280045699328, "learning_rate": 8.20907601942987e-06, "loss": 0.6339, "step": 2850 }, { "epoch": 0.3, "grad_norm": 2.6687863772007625, "learning_rate": 8.207768922085408e-06, "loss": 0.6427, "step": 2851 }, { "epoch": 0.3, "grad_norm": 2.4367057789328523, "learning_rate": 8.206461452071625e-06, "loss": 0.6382, "step": 2852 }, { "epoch": 0.3, "grad_norm": 2.494084327088906, "learning_rate": 8.20515360954042e-06, "loss": 0.7028, "step": 2853 }, { "epoch": 0.3, "grad_norm": 3.0800899409941738, "learning_rate": 8.203845394643732e-06, "loss": 0.6296, "step": 2854 }, { "epoch": 0.3, "grad_norm": 10.158844023386168, "learning_rate": 8.202536807533548e-06, "loss": 0.7244, "step": 2855 }, { "epoch": 0.3, "grad_norm": 2.667508619561635, "learning_rate": 8.201227848361895e-06, "loss": 0.7001, "step": 2856 }, { "epoch": 0.3, "grad_norm": 2.53043885098298, "learning_rate": 8.199918517280848e-06, "loss": 0.6844, "step": 2857 }, { "epoch": 0.3, "grad_norm": 3.114762138835378, "learning_rate": 8.198608814442513e-06, "loss": 0.6376, "step": 2858 }, { "epoch": 0.3, "grad_norm": 2.9348916221619814, "learning_rate": 8.197298739999055e-06, "loss": 0.6631, "step": 2859 }, { "epoch": 0.3, "grad_norm": 2.2713830464167466, "learning_rate": 8.19598829410267e-06, "loss": 0.5696, "step": 2860 }, { "epoch": 0.3, "grad_norm": 2.640428459107109, "learning_rate": 8.194677476905604e-06, "loss": 0.6494, "step": 2861 }, { "epoch": 0.3, "grad_norm": 2.458537500123125, "learning_rate": 8.193366288560144e-06, "loss": 0.7073, "step": 2862 }, { "epoch": 0.3, "grad_norm": 2.789431886976531, "learning_rate": 8.192054729218621e-06, "loss": 0.6223, "step": 2863 }, { "epoch": 0.3, "grad_norm": 2.0346142539870478, "learning_rate": 8.190742799033404e-06, "loss": 0.6502, "step": 2864 }, { "epoch": 0.3, "grad_norm": 2.523541617507126, "learning_rate": 8.189430498156914e-06, "loss": 0.6346, "step": 2865 }, { "epoch": 0.3, "grad_norm": 2.2087194523547145, "learning_rate": 8.18811782674161e-06, "loss": 0.6108, "step": 2866 }, { "epoch": 0.3, "grad_norm": 2.491402481550859, "learning_rate": 8.18680478493999e-06, "loss": 0.6853, "step": 2867 }, { "epoch": 0.3, "grad_norm": 3.026321341321937, "learning_rate": 8.185491372904604e-06, "loss": 0.6313, "step": 2868 }, { "epoch": 0.3, "grad_norm": 2.6138591078805526, "learning_rate": 8.184177590788038e-06, "loss": 0.6652, "step": 2869 }, { "epoch": 0.3, "grad_norm": 2.519595862218996, "learning_rate": 8.182863438742922e-06, "loss": 0.7254, "step": 2870 }, { "epoch": 0.3, "grad_norm": 2.4026200308540617, "learning_rate": 8.181548916921935e-06, "loss": 0.5704, "step": 2871 }, { "epoch": 0.3, "grad_norm": 2.580908432962159, "learning_rate": 8.180234025477792e-06, "loss": 0.6507, "step": 2872 }, { "epoch": 0.3, "grad_norm": 2.268967709119169, "learning_rate": 8.178918764563251e-06, "loss": 0.718, "step": 2873 }, { "epoch": 0.3, "grad_norm": 2.2396586103792795, "learning_rate": 8.177603134331119e-06, "loss": 0.59, "step": 2874 }, { "epoch": 0.3, "grad_norm": 2.7093881022757165, "learning_rate": 8.17628713493424e-06, "loss": 0.6465, "step": 2875 }, { "epoch": 0.3, "grad_norm": 2.2292433893864105, "learning_rate": 8.174970766525503e-06, "loss": 0.6516, "step": 2876 }, { "epoch": 0.3, "grad_norm": 3.0744331784154943, "learning_rate": 8.17365402925784e-06, "loss": 0.6089, "step": 2877 }, { "epoch": 0.3, "grad_norm": 2.563571554597488, "learning_rate": 8.172336923284225e-06, "loss": 0.6704, "step": 2878 }, { "epoch": 0.3, "grad_norm": 2.8782161919357705, "learning_rate": 8.17101944875768e-06, "loss": 0.7115, "step": 2879 }, { "epoch": 0.3, "grad_norm": 2.662681630857156, "learning_rate": 8.16970160583126e-06, "loss": 0.6443, "step": 2880 }, { "epoch": 0.3, "grad_norm": 2.35955388779492, "learning_rate": 8.16838339465807e-06, "loss": 0.7116, "step": 2881 }, { "epoch": 0.3, "grad_norm": 3.1676445753173974, "learning_rate": 8.167064815391254e-06, "loss": 0.6053, "step": 2882 }, { "epoch": 0.3, "grad_norm": 2.466081183525901, "learning_rate": 8.165745868184006e-06, "loss": 0.6041, "step": 2883 }, { "epoch": 0.3, "grad_norm": 2.0720974841936783, "learning_rate": 8.164426553189553e-06, "loss": 0.6562, "step": 2884 }, { "epoch": 0.3, "grad_norm": 3.740189082503985, "learning_rate": 8.16310687056117e-06, "loss": 0.7227, "step": 2885 }, { "epoch": 0.3, "grad_norm": 2.978902500808749, "learning_rate": 8.161786820452176e-06, "loss": 0.6807, "step": 2886 }, { "epoch": 0.3, "grad_norm": 2.301320445626182, "learning_rate": 8.160466403015928e-06, "loss": 0.726, "step": 2887 }, { "epoch": 0.3, "grad_norm": 2.267037663828458, "learning_rate": 8.159145618405828e-06, "loss": 0.6489, "step": 2888 }, { "epoch": 0.3, "grad_norm": 2.015189747260077, "learning_rate": 8.157824466775324e-06, "loss": 0.6653, "step": 2889 }, { "epoch": 0.3, "grad_norm": 2.521922180245439, "learning_rate": 8.156502948277902e-06, "loss": 0.6805, "step": 2890 }, { "epoch": 0.3, "grad_norm": 2.12799521453685, "learning_rate": 8.15518106306709e-06, "loss": 0.6851, "step": 2891 }, { "epoch": 0.3, "grad_norm": 2.0483331306231944, "learning_rate": 8.153858811296465e-06, "loss": 0.6434, "step": 2892 }, { "epoch": 0.3, "grad_norm": 5.707423136219375, "learning_rate": 8.152536193119638e-06, "loss": 0.577, "step": 2893 }, { "epoch": 0.3, "grad_norm": 3.092278245285496, "learning_rate": 8.151213208690271e-06, "loss": 0.6721, "step": 2894 }, { "epoch": 0.3, "grad_norm": 2.561997031833164, "learning_rate": 8.149889858162062e-06, "loss": 0.6855, "step": 2895 }, { "epoch": 0.3, "grad_norm": 2.0803284506968662, "learning_rate": 8.148566141688755e-06, "loss": 0.6528, "step": 2896 }, { "epoch": 0.3, "grad_norm": 2.9962095481419624, "learning_rate": 8.147242059424134e-06, "loss": 0.7353, "step": 2897 }, { "epoch": 0.3, "grad_norm": 2.5237602734129516, "learning_rate": 8.145917611522029e-06, "loss": 0.6057, "step": 2898 }, { "epoch": 0.31, "grad_norm": 1.8369538414675164, "learning_rate": 8.14459279813631e-06, "loss": 0.7189, "step": 2899 }, { "epoch": 0.31, "grad_norm": 1.2530174295755743, "learning_rate": 8.143267619420892e-06, "loss": 0.605, "step": 2900 }, { "epoch": 0.31, "grad_norm": 2.6147582967437226, "learning_rate": 8.141942075529725e-06, "loss": 0.7003, "step": 2901 }, { "epoch": 0.31, "grad_norm": 2.8824381178183907, "learning_rate": 8.14061616661681e-06, "loss": 0.7087, "step": 2902 }, { "epoch": 0.31, "grad_norm": 3.993274788781763, "learning_rate": 8.13928989283619e-06, "loss": 0.5397, "step": 2903 }, { "epoch": 0.31, "grad_norm": 2.5531708615561, "learning_rate": 8.137963254341944e-06, "loss": 0.6569, "step": 2904 }, { "epoch": 0.31, "grad_norm": 2.666082677328026, "learning_rate": 8.136636251288197e-06, "loss": 0.7032, "step": 2905 }, { "epoch": 0.31, "grad_norm": 2.483321235755457, "learning_rate": 8.135308883829119e-06, "loss": 0.6559, "step": 2906 }, { "epoch": 0.31, "grad_norm": 2.2907791966791415, "learning_rate": 8.133981152118916e-06, "loss": 0.6794, "step": 2907 }, { "epoch": 0.31, "grad_norm": 2.2809901014129657, "learning_rate": 8.132653056311844e-06, "loss": 0.6586, "step": 2908 }, { "epoch": 0.31, "grad_norm": 3.5398481577715, "learning_rate": 8.131324596562195e-06, "loss": 0.6637, "step": 2909 }, { "epoch": 0.31, "grad_norm": 2.7594732513859443, "learning_rate": 8.129995773024306e-06, "loss": 0.7316, "step": 2910 }, { "epoch": 0.31, "grad_norm": 3.30105743088516, "learning_rate": 8.128666585852556e-06, "loss": 0.6668, "step": 2911 }, { "epoch": 0.31, "grad_norm": 2.731674755400316, "learning_rate": 8.127337035201365e-06, "loss": 0.6782, "step": 2912 }, { "epoch": 0.31, "grad_norm": 3.186947648727673, "learning_rate": 8.1260071212252e-06, "loss": 0.6693, "step": 2913 }, { "epoch": 0.31, "grad_norm": 2.198681447100411, "learning_rate": 8.12467684407856e-06, "loss": 0.6396, "step": 2914 }, { "epoch": 0.31, "grad_norm": 2.5485456110616918, "learning_rate": 8.123346203916e-06, "loss": 0.6218, "step": 2915 }, { "epoch": 0.31, "grad_norm": 2.914548621339626, "learning_rate": 8.122015200892106e-06, "loss": 0.6717, "step": 2916 }, { "epoch": 0.31, "grad_norm": 2.593755867982301, "learning_rate": 8.120683835161511e-06, "loss": 0.6373, "step": 2917 }, { "epoch": 0.31, "grad_norm": 2.062797790828946, "learning_rate": 8.11935210687889e-06, "loss": 0.6702, "step": 2918 }, { "epoch": 0.31, "grad_norm": 2.9875147078800683, "learning_rate": 8.118020016198957e-06, "loss": 0.6398, "step": 2919 }, { "epoch": 0.31, "grad_norm": 2.022004557471706, "learning_rate": 8.11668756327647e-06, "loss": 0.7186, "step": 2920 }, { "epoch": 0.31, "grad_norm": 1.2197328167155737, "learning_rate": 8.115354748266233e-06, "loss": 0.652, "step": 2921 }, { "epoch": 0.31, "grad_norm": 2.0617705515592957, "learning_rate": 8.114021571323089e-06, "loss": 0.5757, "step": 2922 }, { "epoch": 0.31, "grad_norm": 2.3145783721939996, "learning_rate": 8.112688032601919e-06, "loss": 0.6625, "step": 2923 }, { "epoch": 0.31, "grad_norm": 2.531303219988851, "learning_rate": 8.111354132257651e-06, "loss": 0.6679, "step": 2924 }, { "epoch": 0.31, "grad_norm": 2.6045952874222555, "learning_rate": 8.110019870445254e-06, "loss": 0.7008, "step": 2925 }, { "epoch": 0.31, "grad_norm": 2.9426229731805704, "learning_rate": 8.10868524731974e-06, "loss": 0.6898, "step": 2926 }, { "epoch": 0.31, "grad_norm": 2.261653088656059, "learning_rate": 8.107350263036157e-06, "loss": 0.6312, "step": 2927 }, { "epoch": 0.31, "grad_norm": 4.259944992592717, "learning_rate": 8.106014917749605e-06, "loss": 0.6676, "step": 2928 }, { "epoch": 0.31, "grad_norm": 2.6421624462915765, "learning_rate": 8.104679211615218e-06, "loss": 0.7101, "step": 2929 }, { "epoch": 0.31, "grad_norm": 2.3871886404739504, "learning_rate": 8.103343144788177e-06, "loss": 0.6416, "step": 2930 }, { "epoch": 0.31, "grad_norm": 2.1379036146977164, "learning_rate": 8.102006717423695e-06, "loss": 0.6405, "step": 2931 }, { "epoch": 0.31, "grad_norm": 3.0829424463195303, "learning_rate": 8.100669929677044e-06, "loss": 0.6991, "step": 2932 }, { "epoch": 0.31, "grad_norm": 2.45544537675686, "learning_rate": 8.099332781703523e-06, "loss": 0.6083, "step": 2933 }, { "epoch": 0.31, "grad_norm": 2.5320119199003224, "learning_rate": 8.097995273658479e-06, "loss": 0.6114, "step": 2934 }, { "epoch": 0.31, "grad_norm": 2.2301392753727916, "learning_rate": 8.0966574056973e-06, "loss": 0.7644, "step": 2935 }, { "epoch": 0.31, "grad_norm": 2.397360362520422, "learning_rate": 8.095319177975412e-06, "loss": 0.5961, "step": 2936 }, { "epoch": 0.31, "grad_norm": 3.5622955344160414, "learning_rate": 8.093980590648291e-06, "loss": 0.6874, "step": 2937 }, { "epoch": 0.31, "grad_norm": 2.481733875250971, "learning_rate": 8.092641643871451e-06, "loss": 0.5639, "step": 2938 }, { "epoch": 0.31, "grad_norm": 2.237192875379186, "learning_rate": 8.091302337800441e-06, "loss": 0.6794, "step": 2939 }, { "epoch": 0.31, "grad_norm": 2.364977168624842, "learning_rate": 8.089962672590865e-06, "loss": 0.6851, "step": 2940 }, { "epoch": 0.31, "grad_norm": 2.155447396944477, "learning_rate": 8.088622648398357e-06, "loss": 0.6662, "step": 2941 }, { "epoch": 0.31, "grad_norm": 2.8646626316625636, "learning_rate": 8.087282265378596e-06, "loss": 0.6547, "step": 2942 }, { "epoch": 0.31, "grad_norm": 2.999962898982827, "learning_rate": 8.085941523687309e-06, "loss": 0.6913, "step": 2943 }, { "epoch": 0.31, "grad_norm": 2.793000783068167, "learning_rate": 8.084600423480253e-06, "loss": 0.7647, "step": 2944 }, { "epoch": 0.31, "grad_norm": 1.294318321894251, "learning_rate": 8.083258964913238e-06, "loss": 0.6202, "step": 2945 }, { "epoch": 0.31, "grad_norm": 2.3681448665076648, "learning_rate": 8.08191714814211e-06, "loss": 0.6897, "step": 2946 }, { "epoch": 0.31, "grad_norm": 3.4035241766844258, "learning_rate": 8.080574973322755e-06, "loss": 0.6347, "step": 2947 }, { "epoch": 0.31, "grad_norm": 10.697359357745482, "learning_rate": 8.079232440611106e-06, "loss": 0.6607, "step": 2948 }, { "epoch": 0.31, "grad_norm": 11.08038730758078, "learning_rate": 8.077889550163133e-06, "loss": 0.6913, "step": 2949 }, { "epoch": 0.31, "grad_norm": 2.289498594594031, "learning_rate": 8.076546302134849e-06, "loss": 0.5815, "step": 2950 }, { "epoch": 0.31, "grad_norm": 3.8411656849036024, "learning_rate": 8.07520269668231e-06, "loss": 0.6598, "step": 2951 }, { "epoch": 0.31, "grad_norm": 3.9514846702421127, "learning_rate": 8.073858733961609e-06, "loss": 0.7163, "step": 2952 }, { "epoch": 0.31, "grad_norm": 3.287925053791738, "learning_rate": 8.072514414128886e-06, "loss": 0.6914, "step": 2953 }, { "epoch": 0.31, "grad_norm": 2.850131167037012, "learning_rate": 8.071169737340322e-06, "loss": 0.6972, "step": 2954 }, { "epoch": 0.31, "grad_norm": 3.3564428711936714, "learning_rate": 8.069824703752136e-06, "loss": 0.6299, "step": 2955 }, { "epoch": 0.31, "grad_norm": 2.5337256748284793, "learning_rate": 8.068479313520589e-06, "loss": 0.7192, "step": 2956 }, { "epoch": 0.31, "grad_norm": 2.7829671234993767, "learning_rate": 8.067133566801986e-06, "loss": 0.6942, "step": 2957 }, { "epoch": 0.31, "grad_norm": 2.0780064728452814, "learning_rate": 8.06578746375267e-06, "loss": 0.699, "step": 2958 }, { "epoch": 0.31, "grad_norm": 2.066654395430205, "learning_rate": 8.06444100452903e-06, "loss": 0.6052, "step": 2959 }, { "epoch": 0.31, "grad_norm": 2.3241867147767636, "learning_rate": 8.063094189287492e-06, "loss": 0.6425, "step": 2960 }, { "epoch": 0.31, "grad_norm": 2.936750856792946, "learning_rate": 8.061747018184525e-06, "loss": 0.6816, "step": 2961 }, { "epoch": 0.31, "grad_norm": 2.564282733037237, "learning_rate": 8.06039949137664e-06, "loss": 0.7117, "step": 2962 }, { "epoch": 0.31, "grad_norm": 35.37441915823146, "learning_rate": 8.05905160902039e-06, "loss": 0.7604, "step": 2963 }, { "epoch": 0.31, "grad_norm": 3.470994439733572, "learning_rate": 8.057703371272368e-06, "loss": 0.6648, "step": 2964 }, { "epoch": 0.31, "grad_norm": 2.653406363973704, "learning_rate": 8.056354778289204e-06, "loss": 0.6818, "step": 2965 }, { "epoch": 0.31, "grad_norm": 2.7162242932530374, "learning_rate": 8.055005830227578e-06, "loss": 0.6176, "step": 2966 }, { "epoch": 0.31, "grad_norm": 2.0336246489108563, "learning_rate": 8.053656527244206e-06, "loss": 0.6957, "step": 2967 }, { "epoch": 0.31, "grad_norm": 2.404566189632472, "learning_rate": 8.052306869495847e-06, "loss": 0.6862, "step": 2968 }, { "epoch": 0.31, "grad_norm": 2.662561717072185, "learning_rate": 8.050956857139298e-06, "loss": 0.7162, "step": 2969 }, { "epoch": 0.31, "grad_norm": 2.5875784364886254, "learning_rate": 8.049606490331403e-06, "loss": 0.6401, "step": 2970 }, { "epoch": 0.31, "grad_norm": 2.978839811791059, "learning_rate": 8.048255769229038e-06, "loss": 0.6423, "step": 2971 }, { "epoch": 0.31, "grad_norm": 3.159696497187548, "learning_rate": 8.046904693989132e-06, "loss": 0.6541, "step": 2972 }, { "epoch": 0.31, "grad_norm": 3.108106082999746, "learning_rate": 8.045553264768645e-06, "loss": 0.7498, "step": 2973 }, { "epoch": 0.31, "grad_norm": 2.4365012913470747, "learning_rate": 8.044201481724582e-06, "loss": 0.6099, "step": 2974 }, { "epoch": 0.31, "grad_norm": 2.9007518834879593, "learning_rate": 8.042849345013995e-06, "loss": 0.6726, "step": 2975 }, { "epoch": 0.31, "grad_norm": 2.2970067282582476, "learning_rate": 8.041496854793964e-06, "loss": 0.5851, "step": 2976 }, { "epoch": 0.31, "grad_norm": 3.006328818924341, "learning_rate": 8.040144011221621e-06, "loss": 0.6791, "step": 2977 }, { "epoch": 0.31, "grad_norm": 2.6939058418287805, "learning_rate": 8.038790814454137e-06, "loss": 0.726, "step": 2978 }, { "epoch": 0.31, "grad_norm": 2.2054039052694705, "learning_rate": 8.037437264648717e-06, "loss": 0.6719, "step": 2979 }, { "epoch": 0.31, "grad_norm": 2.41222955568751, "learning_rate": 8.036083361962616e-06, "loss": 0.6879, "step": 2980 }, { "epoch": 0.31, "grad_norm": 2.4774726350606158, "learning_rate": 8.03472910655313e-06, "loss": 0.7345, "step": 2981 }, { "epoch": 0.31, "grad_norm": 2.3042323688271327, "learning_rate": 8.033374498577586e-06, "loss": 0.7063, "step": 2982 }, { "epoch": 0.31, "grad_norm": 2.7730077023368938, "learning_rate": 8.032019538193363e-06, "loss": 0.7008, "step": 2983 }, { "epoch": 0.31, "grad_norm": 2.7189215222404144, "learning_rate": 8.030664225557873e-06, "loss": 0.6169, "step": 2984 }, { "epoch": 0.31, "grad_norm": 2.845427019172591, "learning_rate": 8.029308560828574e-06, "loss": 0.7042, "step": 2985 }, { "epoch": 0.31, "grad_norm": 2.311773529022882, "learning_rate": 8.027952544162965e-06, "loss": 0.7276, "step": 2986 }, { "epoch": 0.31, "grad_norm": 4.498629343600977, "learning_rate": 8.026596175718582e-06, "loss": 0.65, "step": 2987 }, { "epoch": 0.31, "grad_norm": 2.527545636273197, "learning_rate": 8.025239455653003e-06, "loss": 0.6596, "step": 2988 }, { "epoch": 0.31, "grad_norm": 2.3431814650204354, "learning_rate": 8.023882384123851e-06, "loss": 0.5784, "step": 2989 }, { "epoch": 0.31, "grad_norm": 2.266244506572552, "learning_rate": 8.022524961288783e-06, "loss": 0.6536, "step": 2990 }, { "epoch": 0.31, "grad_norm": 2.4413399629821395, "learning_rate": 8.021167187305504e-06, "loss": 0.648, "step": 2991 }, { "epoch": 0.31, "grad_norm": 3.8044222712960485, "learning_rate": 8.019809062331754e-06, "loss": 0.6785, "step": 2992 }, { "epoch": 0.31, "grad_norm": 2.8009795918380527, "learning_rate": 8.018450586525314e-06, "loss": 0.6825, "step": 2993 }, { "epoch": 0.32, "grad_norm": 2.044733004957816, "learning_rate": 8.017091760044014e-06, "loss": 0.7218, "step": 2994 }, { "epoch": 0.32, "grad_norm": 2.5348478649648625, "learning_rate": 8.015732583045713e-06, "loss": 0.6306, "step": 2995 }, { "epoch": 0.32, "grad_norm": 2.820292578056801, "learning_rate": 8.014373055688319e-06, "loss": 0.6103, "step": 2996 }, { "epoch": 0.32, "grad_norm": 2.83941151917699, "learning_rate": 8.013013178129775e-06, "loss": 0.739, "step": 2997 }, { "epoch": 0.32, "grad_norm": 2.562461642222233, "learning_rate": 8.01165295052807e-06, "loss": 0.6528, "step": 2998 }, { "epoch": 0.32, "grad_norm": 2.2991584011557946, "learning_rate": 8.010292373041233e-06, "loss": 0.6633, "step": 2999 }, { "epoch": 0.32, "grad_norm": 3.0164089815235755, "learning_rate": 8.008931445827329e-06, "loss": 0.6508, "step": 3000 }, { "epoch": 0.32, "grad_norm": 2.2488209086571036, "learning_rate": 8.007570169044467e-06, "loss": 0.6924, "step": 3001 }, { "epoch": 0.32, "grad_norm": 4.067344347818427, "learning_rate": 8.006208542850797e-06, "loss": 0.716, "step": 3002 }, { "epoch": 0.32, "grad_norm": 2.433534837689038, "learning_rate": 8.004846567404509e-06, "loss": 0.6682, "step": 3003 }, { "epoch": 0.32, "grad_norm": 3.1675254720528176, "learning_rate": 8.003484242863833e-06, "loss": 0.744, "step": 3004 }, { "epoch": 0.32, "grad_norm": 2.3134441373453916, "learning_rate": 8.00212156938704e-06, "loss": 0.5428, "step": 3005 }, { "epoch": 0.32, "grad_norm": 2.8236083439191355, "learning_rate": 8.000758547132441e-06, "loss": 0.66, "step": 3006 }, { "epoch": 0.32, "grad_norm": 2.158397208676714, "learning_rate": 7.99939517625839e-06, "loss": 0.7367, "step": 3007 }, { "epoch": 0.32, "grad_norm": 2.2654650330105257, "learning_rate": 7.998031456923274e-06, "loss": 0.5894, "step": 3008 }, { "epoch": 0.32, "grad_norm": 2.5662295257161833, "learning_rate": 7.996667389285532e-06, "loss": 0.6596, "step": 3009 }, { "epoch": 0.32, "grad_norm": 2.677019268255404, "learning_rate": 7.995302973503636e-06, "loss": 0.6978, "step": 3010 }, { "epoch": 0.32, "grad_norm": 2.371436079489015, "learning_rate": 7.993938209736097e-06, "loss": 0.69, "step": 3011 }, { "epoch": 0.32, "grad_norm": 2.1346580337024186, "learning_rate": 7.992573098141472e-06, "loss": 0.644, "step": 3012 }, { "epoch": 0.32, "grad_norm": 1.9295251025511633, "learning_rate": 7.991207638878356e-06, "loss": 0.6617, "step": 3013 }, { "epoch": 0.32, "grad_norm": 2.558479337294168, "learning_rate": 7.989841832105382e-06, "loss": 0.7159, "step": 3014 }, { "epoch": 0.32, "grad_norm": 2.147625038233415, "learning_rate": 7.988475677981229e-06, "loss": 0.6179, "step": 3015 }, { "epoch": 0.32, "grad_norm": 2.1693505791045053, "learning_rate": 7.98710917666461e-06, "loss": 0.6942, "step": 3016 }, { "epoch": 0.32, "grad_norm": 3.1568770622983475, "learning_rate": 7.985742328314279e-06, "loss": 0.7053, "step": 3017 }, { "epoch": 0.32, "grad_norm": 2.1959613312211084, "learning_rate": 7.984375133089038e-06, "loss": 0.637, "step": 3018 }, { "epoch": 0.32, "grad_norm": 9.895723607078637, "learning_rate": 7.98300759114772e-06, "loss": 0.7098, "step": 3019 }, { "epoch": 0.32, "grad_norm": 2.234596285844836, "learning_rate": 7.981639702649204e-06, "loss": 0.6282, "step": 3020 }, { "epoch": 0.32, "grad_norm": 2.2374506416442133, "learning_rate": 7.980271467752405e-06, "loss": 0.6773, "step": 3021 }, { "epoch": 0.32, "grad_norm": 2.6097297477113326, "learning_rate": 7.97890288661628e-06, "loss": 0.7456, "step": 3022 }, { "epoch": 0.32, "grad_norm": 2.8225474093932426, "learning_rate": 7.977533959399833e-06, "loss": 0.68, "step": 3023 }, { "epoch": 0.32, "grad_norm": 2.6648211533685795, "learning_rate": 7.976164686262096e-06, "loss": 0.686, "step": 3024 }, { "epoch": 0.32, "grad_norm": 2.3486556822537206, "learning_rate": 7.974795067362148e-06, "loss": 0.7185, "step": 3025 }, { "epoch": 0.32, "grad_norm": 2.529824254487311, "learning_rate": 7.97342510285911e-06, "loss": 0.6447, "step": 3026 }, { "epoch": 0.32, "grad_norm": 2.7391077394652763, "learning_rate": 7.972054792912138e-06, "loss": 0.6169, "step": 3027 }, { "epoch": 0.32, "grad_norm": 2.237331937538726, "learning_rate": 7.970684137680431e-06, "loss": 0.6488, "step": 3028 }, { "epoch": 0.32, "grad_norm": 2.2823597001812614, "learning_rate": 7.969313137323228e-06, "loss": 0.6577, "step": 3029 }, { "epoch": 0.32, "grad_norm": 2.6046928602923143, "learning_rate": 7.96794179199981e-06, "loss": 0.682, "step": 3030 }, { "epoch": 0.32, "grad_norm": 3.018838373919387, "learning_rate": 7.966570101869494e-06, "loss": 0.6314, "step": 3031 }, { "epoch": 0.32, "grad_norm": 3.190243611941645, "learning_rate": 7.965198067091637e-06, "loss": 0.6224, "step": 3032 }, { "epoch": 0.32, "grad_norm": 2.4033899287674183, "learning_rate": 7.96382568782564e-06, "loss": 0.6538, "step": 3033 }, { "epoch": 0.32, "grad_norm": 2.57052475740044, "learning_rate": 7.962452964230944e-06, "loss": 0.6519, "step": 3034 }, { "epoch": 0.32, "grad_norm": 3.0579037048889752, "learning_rate": 7.961079896467025e-06, "loss": 0.5823, "step": 3035 }, { "epoch": 0.32, "grad_norm": 2.8012393029432645, "learning_rate": 7.959706484693405e-06, "loss": 0.6989, "step": 3036 }, { "epoch": 0.32, "grad_norm": 2.7558779760997525, "learning_rate": 7.95833272906964e-06, "loss": 0.6581, "step": 3037 }, { "epoch": 0.32, "grad_norm": 2.1874011393784665, "learning_rate": 7.95695862975533e-06, "loss": 0.6699, "step": 3038 }, { "epoch": 0.32, "grad_norm": 2.6164914345320107, "learning_rate": 7.955584186910115e-06, "loss": 0.7054, "step": 3039 }, { "epoch": 0.32, "grad_norm": 3.0292864485504594, "learning_rate": 7.954209400693673e-06, "loss": 0.6537, "step": 3040 }, { "epoch": 0.32, "grad_norm": 2.905511291216553, "learning_rate": 7.95283427126572e-06, "loss": 0.6966, "step": 3041 }, { "epoch": 0.32, "grad_norm": 1.208663400211376, "learning_rate": 7.95145879878602e-06, "loss": 0.5702, "step": 3042 }, { "epoch": 0.32, "grad_norm": 2.559598093856362, "learning_rate": 7.950082983414367e-06, "loss": 0.6517, "step": 3043 }, { "epoch": 0.32, "grad_norm": 2.389487342933906, "learning_rate": 7.948706825310601e-06, "loss": 0.6289, "step": 3044 }, { "epoch": 0.32, "grad_norm": 2.675926284212583, "learning_rate": 7.947330324634601e-06, "loss": 0.6088, "step": 3045 }, { "epoch": 0.32, "grad_norm": 2.648358995913935, "learning_rate": 7.945953481546282e-06, "loss": 0.6291, "step": 3046 }, { "epoch": 0.32, "grad_norm": 1.2023556963692583, "learning_rate": 7.944576296205603e-06, "loss": 0.5934, "step": 3047 }, { "epoch": 0.32, "grad_norm": 2.627906298885444, "learning_rate": 7.943198768772565e-06, "loss": 0.743, "step": 3048 }, { "epoch": 0.32, "grad_norm": 3.399467853821857, "learning_rate": 7.9418208994072e-06, "loss": 0.6587, "step": 3049 }, { "epoch": 0.32, "grad_norm": 2.6749330583941258, "learning_rate": 7.940442688269587e-06, "loss": 0.6825, "step": 3050 }, { "epoch": 0.32, "grad_norm": 2.9142489850897517, "learning_rate": 7.939064135519844e-06, "loss": 0.6535, "step": 3051 }, { "epoch": 0.32, "grad_norm": 2.3087341250267763, "learning_rate": 7.937685241318122e-06, "loss": 0.6461, "step": 3052 }, { "epoch": 0.32, "grad_norm": 6.292432896926418, "learning_rate": 7.936306005824624e-06, "loss": 0.6497, "step": 3053 }, { "epoch": 0.32, "grad_norm": 3.3219986590091435, "learning_rate": 7.93492642919958e-06, "loss": 0.6326, "step": 3054 }, { "epoch": 0.32, "grad_norm": 3.119470589054828, "learning_rate": 7.933546511603269e-06, "loss": 0.6417, "step": 3055 }, { "epoch": 0.32, "grad_norm": 6.774960844710111, "learning_rate": 7.932166253196004e-06, "loss": 0.5901, "step": 3056 }, { "epoch": 0.32, "grad_norm": 2.4456975659415847, "learning_rate": 7.93078565413814e-06, "loss": 0.6138, "step": 3057 }, { "epoch": 0.32, "grad_norm": 2.8996471024029513, "learning_rate": 7.92940471459007e-06, "loss": 0.6084, "step": 3058 }, { "epoch": 0.32, "grad_norm": 2.1348588208140797, "learning_rate": 7.928023434712227e-06, "loss": 0.6235, "step": 3059 }, { "epoch": 0.32, "grad_norm": 3.1903589219833037, "learning_rate": 7.926641814665088e-06, "loss": 0.6413, "step": 3060 }, { "epoch": 0.32, "grad_norm": 2.6063098506456126, "learning_rate": 7.925259854609162e-06, "loss": 0.6501, "step": 3061 }, { "epoch": 0.32, "grad_norm": 7.68937491835249, "learning_rate": 7.923877554705002e-06, "loss": 0.632, "step": 3062 }, { "epoch": 0.32, "grad_norm": 3.2423734100409196, "learning_rate": 7.9224949151132e-06, "loss": 0.7238, "step": 3063 }, { "epoch": 0.32, "grad_norm": 2.607658237678769, "learning_rate": 7.921111935994388e-06, "loss": 0.6373, "step": 3064 }, { "epoch": 0.32, "grad_norm": 2.727385481962859, "learning_rate": 7.919728617509233e-06, "loss": 0.6826, "step": 3065 }, { "epoch": 0.32, "grad_norm": 2.6366816990101145, "learning_rate": 7.91834495981845e-06, "loss": 0.6223, "step": 3066 }, { "epoch": 0.32, "grad_norm": 2.388243265491952, "learning_rate": 7.916960963082783e-06, "loss": 0.6517, "step": 3067 }, { "epoch": 0.32, "grad_norm": 2.384651407591471, "learning_rate": 7.915576627463024e-06, "loss": 0.6422, "step": 3068 }, { "epoch": 0.32, "grad_norm": 2.315078446495304, "learning_rate": 7.91419195312e-06, "loss": 0.7492, "step": 3069 }, { "epoch": 0.32, "grad_norm": 2.5016948568104214, "learning_rate": 7.91280694021458e-06, "loss": 0.5617, "step": 3070 }, { "epoch": 0.32, "grad_norm": 2.267739297104988, "learning_rate": 7.91142158890767e-06, "loss": 0.6419, "step": 3071 }, { "epoch": 0.32, "grad_norm": 2.9294081180768607, "learning_rate": 7.910035899360215e-06, "loss": 0.6419, "step": 3072 }, { "epoch": 0.32, "grad_norm": 2.5533958607494083, "learning_rate": 7.908649871733202e-06, "loss": 0.6337, "step": 3073 }, { "epoch": 0.32, "grad_norm": 3.016887449204389, "learning_rate": 7.907263506187655e-06, "loss": 0.577, "step": 3074 }, { "epoch": 0.32, "grad_norm": 2.229569058131735, "learning_rate": 7.905876802884639e-06, "loss": 0.6829, "step": 3075 }, { "epoch": 0.32, "grad_norm": 3.650463843334712, "learning_rate": 7.904489761985254e-06, "loss": 0.6764, "step": 3076 }, { "epoch": 0.32, "grad_norm": 2.5592076303986047, "learning_rate": 7.903102383650645e-06, "loss": 0.623, "step": 3077 }, { "epoch": 0.32, "grad_norm": 5.032748852875668, "learning_rate": 7.901714668041993e-06, "loss": 0.6891, "step": 3078 }, { "epoch": 0.32, "grad_norm": 2.317465920800203, "learning_rate": 7.90032661532052e-06, "loss": 0.6487, "step": 3079 }, { "epoch": 0.32, "grad_norm": 2.305967746812263, "learning_rate": 7.898938225647484e-06, "loss": 0.6823, "step": 3080 }, { "epoch": 0.32, "grad_norm": 2.8884399615654206, "learning_rate": 7.897549499184184e-06, "loss": 0.6296, "step": 3081 }, { "epoch": 0.32, "grad_norm": 3.0770385988445907, "learning_rate": 7.896160436091961e-06, "loss": 0.6943, "step": 3082 }, { "epoch": 0.32, "grad_norm": 2.4056547215114055, "learning_rate": 7.894771036532189e-06, "loss": 0.5929, "step": 3083 }, { "epoch": 0.32, "grad_norm": 2.4323552445732304, "learning_rate": 7.893381300666287e-06, "loss": 0.6887, "step": 3084 }, { "epoch": 0.32, "grad_norm": 2.343998812827096, "learning_rate": 7.89199122865571e-06, "loss": 0.6709, "step": 3085 }, { "epoch": 0.32, "grad_norm": 3.0146219044287164, "learning_rate": 7.89060082066195e-06, "loss": 0.6724, "step": 3086 }, { "epoch": 0.32, "grad_norm": 2.6033290148752357, "learning_rate": 7.889210076846544e-06, "loss": 0.6815, "step": 3087 }, { "epoch": 0.32, "grad_norm": 2.381411272217176, "learning_rate": 7.887818997371062e-06, "loss": 0.6404, "step": 3088 }, { "epoch": 0.33, "grad_norm": 2.904456744874637, "learning_rate": 7.886427582397117e-06, "loss": 0.7417, "step": 3089 }, { "epoch": 0.33, "grad_norm": 2.2786773692260547, "learning_rate": 7.88503583208636e-06, "loss": 0.5864, "step": 3090 }, { "epoch": 0.33, "grad_norm": 2.81932535707139, "learning_rate": 7.88364374660048e-06, "loss": 0.7001, "step": 3091 }, { "epoch": 0.33, "grad_norm": 2.6849729938128997, "learning_rate": 7.882251326101205e-06, "loss": 0.6875, "step": 3092 }, { "epoch": 0.33, "grad_norm": 3.3310488217574488, "learning_rate": 7.8808585707503e-06, "loss": 0.698, "step": 3093 }, { "epoch": 0.33, "grad_norm": 2.6002798711965975, "learning_rate": 7.879465480709577e-06, "loss": 0.6669, "step": 3094 }, { "epoch": 0.33, "grad_norm": 2.8876056113874142, "learning_rate": 7.878072056140878e-06, "loss": 0.6834, "step": 3095 }, { "epoch": 0.33, "grad_norm": 3.537815529224319, "learning_rate": 7.876678297206086e-06, "loss": 0.7348, "step": 3096 }, { "epoch": 0.33, "grad_norm": 2.8620170280847104, "learning_rate": 7.875284204067127e-06, "loss": 0.6779, "step": 3097 }, { "epoch": 0.33, "grad_norm": 3.283985994119008, "learning_rate": 7.873889776885959e-06, "loss": 0.6443, "step": 3098 }, { "epoch": 0.33, "grad_norm": 3.3105599250764297, "learning_rate": 7.872495015824586e-06, "loss": 0.6744, "step": 3099 }, { "epoch": 0.33, "grad_norm": 2.9406649266002853, "learning_rate": 7.871099921045042e-06, "loss": 0.6953, "step": 3100 }, { "epoch": 0.33, "grad_norm": 2.574644681179438, "learning_rate": 7.869704492709412e-06, "loss": 0.6711, "step": 3101 }, { "epoch": 0.33, "grad_norm": 2.449965941126261, "learning_rate": 7.868308730979809e-06, "loss": 0.628, "step": 3102 }, { "epoch": 0.33, "grad_norm": 2.772397515441643, "learning_rate": 7.866912636018389e-06, "loss": 0.7371, "step": 3103 }, { "epoch": 0.33, "grad_norm": 2.39442828453295, "learning_rate": 7.865516207987344e-06, "loss": 0.63, "step": 3104 }, { "epoch": 0.33, "grad_norm": 3.3246795963005713, "learning_rate": 7.864119447048912e-06, "loss": 0.7073, "step": 3105 }, { "epoch": 0.33, "grad_norm": 3.0064966422057773, "learning_rate": 7.862722353365361e-06, "loss": 0.7265, "step": 3106 }, { "epoch": 0.33, "grad_norm": 3.0807358878776006, "learning_rate": 7.861324927099004e-06, "loss": 0.7145, "step": 3107 }, { "epoch": 0.33, "grad_norm": 1.069135073025866, "learning_rate": 7.859927168412186e-06, "loss": 0.6135, "step": 3108 }, { "epoch": 0.33, "grad_norm": 3.286312915224411, "learning_rate": 7.858529077467298e-06, "loss": 0.6363, "step": 3109 }, { "epoch": 0.33, "grad_norm": 2.2389602662700385, "learning_rate": 7.857130654426764e-06, "loss": 0.6131, "step": 3110 }, { "epoch": 0.33, "grad_norm": 3.36015735420768, "learning_rate": 7.85573189945305e-06, "loss": 0.5841, "step": 3111 }, { "epoch": 0.33, "grad_norm": 2.7462815401691096, "learning_rate": 7.854332812708661e-06, "loss": 0.6193, "step": 3112 }, { "epoch": 0.33, "grad_norm": 3.0603590845258815, "learning_rate": 7.852933394356134e-06, "loss": 0.7154, "step": 3113 }, { "epoch": 0.33, "grad_norm": 2.6664238931594584, "learning_rate": 7.851533644558054e-06, "loss": 0.747, "step": 3114 }, { "epoch": 0.33, "grad_norm": 2.859101742503454, "learning_rate": 7.850133563477037e-06, "loss": 0.6958, "step": 3115 }, { "epoch": 0.33, "grad_norm": 2.669270847924662, "learning_rate": 7.848733151275741e-06, "loss": 0.5718, "step": 3116 }, { "epoch": 0.33, "grad_norm": 3.7219654697356863, "learning_rate": 7.847332408116863e-06, "loss": 0.6531, "step": 3117 }, { "epoch": 0.33, "grad_norm": 3.7912563805814945, "learning_rate": 7.845931334163138e-06, "loss": 0.7386, "step": 3118 }, { "epoch": 0.33, "grad_norm": 2.8886193603297348, "learning_rate": 7.844529929577336e-06, "loss": 0.6827, "step": 3119 }, { "epoch": 0.33, "grad_norm": 2.586772606669079, "learning_rate": 7.84312819452227e-06, "loss": 0.6084, "step": 3120 }, { "epoch": 0.33, "grad_norm": 2.6184892406372193, "learning_rate": 7.841726129160789e-06, "loss": 0.6775, "step": 3121 }, { "epoch": 0.33, "grad_norm": 2.3983415224825637, "learning_rate": 7.84032373365578e-06, "loss": 0.6357, "step": 3122 }, { "epoch": 0.33, "grad_norm": 3.1697316223767333, "learning_rate": 7.838921008170171e-06, "loss": 0.7038, "step": 3123 }, { "epoch": 0.33, "grad_norm": 2.538074643605018, "learning_rate": 7.837517952866924e-06, "loss": 0.6816, "step": 3124 }, { "epoch": 0.33, "grad_norm": 2.723960615531358, "learning_rate": 7.836114567909046e-06, "loss": 0.6351, "step": 3125 }, { "epoch": 0.33, "grad_norm": 4.3141201817543715, "learning_rate": 7.834710853459575e-06, "loss": 0.6886, "step": 3126 }, { "epoch": 0.33, "grad_norm": 6.935659578134951, "learning_rate": 7.833306809681593e-06, "loss": 0.6418, "step": 3127 }, { "epoch": 0.33, "grad_norm": 2.8856037758667292, "learning_rate": 7.831902436738215e-06, "loss": 0.6849, "step": 3128 }, { "epoch": 0.33, "grad_norm": 8.563709509210556, "learning_rate": 7.830497734792597e-06, "loss": 0.6117, "step": 3129 }, { "epoch": 0.33, "grad_norm": 2.927718266986384, "learning_rate": 7.829092704007935e-06, "loss": 0.6543, "step": 3130 }, { "epoch": 0.33, "grad_norm": 2.675602881652366, "learning_rate": 7.827687344547459e-06, "loss": 0.6443, "step": 3131 }, { "epoch": 0.33, "grad_norm": 2.732964806627827, "learning_rate": 7.826281656574444e-06, "loss": 0.6704, "step": 3132 }, { "epoch": 0.33, "grad_norm": 2.9065789120991763, "learning_rate": 7.824875640252195e-06, "loss": 0.7202, "step": 3133 }, { "epoch": 0.33, "grad_norm": 2.5054335560181444, "learning_rate": 7.82346929574406e-06, "loss": 0.7212, "step": 3134 }, { "epoch": 0.33, "grad_norm": 2.5085507480711065, "learning_rate": 7.822062623213424e-06, "loss": 0.7014, "step": 3135 }, { "epoch": 0.33, "grad_norm": 8.434812127737116, "learning_rate": 7.820655622823712e-06, "loss": 0.6936, "step": 3136 }, { "epoch": 0.33, "grad_norm": 7.281720661318549, "learning_rate": 7.819248294738381e-06, "loss": 0.6299, "step": 3137 }, { "epoch": 0.33, "grad_norm": 3.336679866157924, "learning_rate": 7.817840639120932e-06, "loss": 0.6982, "step": 3138 }, { "epoch": 0.33, "grad_norm": 2.7369577275240657, "learning_rate": 7.816432656134907e-06, "loss": 0.6836, "step": 3139 }, { "epoch": 0.33, "grad_norm": 3.408523586518461, "learning_rate": 7.815024345943874e-06, "loss": 0.6304, "step": 3140 }, { "epoch": 0.33, "grad_norm": 2.7339532046846244, "learning_rate": 7.81361570871145e-06, "loss": 0.6901, "step": 3141 }, { "epoch": 0.33, "grad_norm": 2.55643397634351, "learning_rate": 7.812206744601288e-06, "loss": 0.7034, "step": 3142 }, { "epoch": 0.33, "grad_norm": 2.7516095557400058, "learning_rate": 7.810797453777076e-06, "loss": 0.5308, "step": 3143 }, { "epoch": 0.33, "grad_norm": 2.5846496687334355, "learning_rate": 7.80938783640254e-06, "loss": 0.7117, "step": 3144 }, { "epoch": 0.33, "grad_norm": 2.8518803010333, "learning_rate": 7.807977892641446e-06, "loss": 0.7384, "step": 3145 }, { "epoch": 0.33, "grad_norm": 3.978801136142066, "learning_rate": 7.806567622657598e-06, "loss": 0.6169, "step": 3146 }, { "epoch": 0.33, "grad_norm": 3.8381379717790853, "learning_rate": 7.805157026614836e-06, "loss": 0.6696, "step": 3147 }, { "epoch": 0.33, "grad_norm": 2.6451501046738706, "learning_rate": 7.80374610467704e-06, "loss": 0.695, "step": 3148 }, { "epoch": 0.33, "grad_norm": 2.6556926713393936, "learning_rate": 7.802334857008127e-06, "loss": 0.5896, "step": 3149 }, { "epoch": 0.33, "grad_norm": 2.3511219203796054, "learning_rate": 7.800923283772051e-06, "loss": 0.6078, "step": 3150 }, { "epoch": 0.33, "grad_norm": 2.3055445730449935, "learning_rate": 7.799511385132803e-06, "loss": 0.7246, "step": 3151 }, { "epoch": 0.33, "grad_norm": 2.807571982019713, "learning_rate": 7.798099161254415e-06, "loss": 0.5812, "step": 3152 }, { "epoch": 0.33, "grad_norm": 2.9073765971064183, "learning_rate": 7.796686612300957e-06, "loss": 0.5974, "step": 3153 }, { "epoch": 0.33, "grad_norm": 2.580983486608408, "learning_rate": 7.795273738436531e-06, "loss": 0.6505, "step": 3154 }, { "epoch": 0.33, "grad_norm": 2.27033419980578, "learning_rate": 7.793860539825282e-06, "loss": 0.6077, "step": 3155 }, { "epoch": 0.33, "grad_norm": 2.736113101860133, "learning_rate": 7.792447016631392e-06, "loss": 0.5951, "step": 3156 }, { "epoch": 0.33, "grad_norm": 2.1064869720748733, "learning_rate": 7.79103316901908e-06, "loss": 0.6632, "step": 3157 }, { "epoch": 0.33, "grad_norm": 3.0989709229973887, "learning_rate": 7.789618997152603e-06, "loss": 0.7188, "step": 3158 }, { "epoch": 0.33, "grad_norm": 3.06812401144027, "learning_rate": 7.788204501196255e-06, "loss": 0.6171, "step": 3159 }, { "epoch": 0.33, "grad_norm": 2.691041503384547, "learning_rate": 7.786789681314368e-06, "loss": 0.604, "step": 3160 }, { "epoch": 0.33, "grad_norm": 2.614511086385431, "learning_rate": 7.785374537671311e-06, "loss": 0.6663, "step": 3161 }, { "epoch": 0.33, "grad_norm": 2.6590381539539054, "learning_rate": 7.783959070431492e-06, "loss": 0.7339, "step": 3162 }, { "epoch": 0.33, "grad_norm": 1.1374038834717708, "learning_rate": 7.782543279759356e-06, "loss": 0.5921, "step": 3163 }, { "epoch": 0.33, "grad_norm": 2.7699969206806414, "learning_rate": 7.781127165819386e-06, "loss": 0.767, "step": 3164 }, { "epoch": 0.33, "grad_norm": 2.0646485786942144, "learning_rate": 7.7797107287761e-06, "loss": 0.657, "step": 3165 }, { "epoch": 0.33, "grad_norm": 3.383226812212046, "learning_rate": 7.778293968794056e-06, "loss": 0.6619, "step": 3166 }, { "epoch": 0.33, "grad_norm": 2.3946861066654246, "learning_rate": 7.776876886037852e-06, "loss": 0.7148, "step": 3167 }, { "epoch": 0.33, "grad_norm": 4.2920273098631965, "learning_rate": 7.775459480672117e-06, "loss": 0.6964, "step": 3168 }, { "epoch": 0.33, "grad_norm": 2.74757852495417, "learning_rate": 7.774041752861524e-06, "loss": 0.6485, "step": 3169 }, { "epoch": 0.33, "grad_norm": 3.8429087492176057, "learning_rate": 7.772623702770779e-06, "loss": 0.7287, "step": 3170 }, { "epoch": 0.33, "grad_norm": 2.6463677367731218, "learning_rate": 7.771205330564626e-06, "loss": 0.6197, "step": 3171 }, { "epoch": 0.33, "grad_norm": 2.3146582536567784, "learning_rate": 7.769786636407849e-06, "loss": 0.673, "step": 3172 }, { "epoch": 0.33, "grad_norm": 3.097668002633075, "learning_rate": 7.768367620465267e-06, "loss": 0.6628, "step": 3173 }, { "epoch": 0.33, "grad_norm": 2.6251746945849934, "learning_rate": 7.766948282901738e-06, "loss": 0.6698, "step": 3174 }, { "epoch": 0.33, "grad_norm": 2.7114682635233325, "learning_rate": 7.765528623882155e-06, "loss": 0.6594, "step": 3175 }, { "epoch": 0.33, "grad_norm": 3.4612588856168838, "learning_rate": 7.76410864357145e-06, "loss": 0.6582, "step": 3176 }, { "epoch": 0.33, "grad_norm": 2.874426831691072, "learning_rate": 7.762688342134597e-06, "loss": 0.6661, "step": 3177 }, { "epoch": 0.33, "grad_norm": 2.9148574564549485, "learning_rate": 7.761267719736593e-06, "loss": 0.6976, "step": 3178 }, { "epoch": 0.33, "grad_norm": 3.0616632712894885, "learning_rate": 7.759846776542492e-06, "loss": 0.6677, "step": 3179 }, { "epoch": 0.33, "grad_norm": 3.561508539134428, "learning_rate": 7.75842551271737e-06, "loss": 0.6977, "step": 3180 }, { "epoch": 0.33, "grad_norm": 3.298240212255653, "learning_rate": 7.757003928426342e-06, "loss": 0.6149, "step": 3181 }, { "epoch": 0.33, "grad_norm": 2.4188530769511982, "learning_rate": 7.755582023834572e-06, "loss": 0.6496, "step": 3182 }, { "epoch": 0.33, "grad_norm": 2.8398810101886345, "learning_rate": 7.754159799107244e-06, "loss": 0.6351, "step": 3183 }, { "epoch": 0.34, "grad_norm": 2.2499424827555927, "learning_rate": 7.752737254409594e-06, "loss": 0.7088, "step": 3184 }, { "epoch": 0.34, "grad_norm": 2.3814957039535374, "learning_rate": 7.751314389906887e-06, "loss": 0.6245, "step": 3185 }, { "epoch": 0.34, "grad_norm": 2.666046394806816, "learning_rate": 7.749891205764427e-06, "loss": 0.6133, "step": 3186 }, { "epoch": 0.34, "grad_norm": 2.7799564388267295, "learning_rate": 7.748467702147555e-06, "loss": 0.605, "step": 3187 }, { "epoch": 0.34, "grad_norm": 5.281029160468379, "learning_rate": 7.747043879221653e-06, "loss": 0.6321, "step": 3188 }, { "epoch": 0.34, "grad_norm": 2.934308558200218, "learning_rate": 7.745619737152133e-06, "loss": 0.7053, "step": 3189 }, { "epoch": 0.34, "grad_norm": 3.1545251666263026, "learning_rate": 7.744195276104447e-06, "loss": 0.6998, "step": 3190 }, { "epoch": 0.34, "grad_norm": 2.642275030044787, "learning_rate": 7.742770496244087e-06, "loss": 0.7263, "step": 3191 }, { "epoch": 0.34, "grad_norm": 1.9956469346760177, "learning_rate": 7.74134539773658e-06, "loss": 0.6707, "step": 3192 }, { "epoch": 0.34, "grad_norm": 4.099408512549327, "learning_rate": 7.73991998074749e-06, "loss": 0.6832, "step": 3193 }, { "epoch": 0.34, "grad_norm": 5.473717647736508, "learning_rate": 7.738494245442415e-06, "loss": 0.5969, "step": 3194 }, { "epoch": 0.34, "grad_norm": 2.67068434985646, "learning_rate": 7.737068191986995e-06, "loss": 0.7505, "step": 3195 }, { "epoch": 0.34, "grad_norm": 2.413731979539875, "learning_rate": 7.735641820546906e-06, "loss": 0.7376, "step": 3196 }, { "epoch": 0.34, "grad_norm": 2.619766771399215, "learning_rate": 7.73421513128786e-06, "loss": 0.712, "step": 3197 }, { "epoch": 0.34, "grad_norm": 2.0705943862250664, "learning_rate": 7.7327881243756e-06, "loss": 0.701, "step": 3198 }, { "epoch": 0.34, "grad_norm": 2.3139029563356512, "learning_rate": 7.731360799975916e-06, "loss": 0.6423, "step": 3199 }, { "epoch": 0.34, "grad_norm": 2.4451859469193082, "learning_rate": 7.72993315825463e-06, "loss": 0.6318, "step": 3200 }, { "epoch": 0.34, "grad_norm": 2.2475814172050104, "learning_rate": 7.728505199377603e-06, "loss": 0.6631, "step": 3201 }, { "epoch": 0.34, "grad_norm": 3.3486986779317154, "learning_rate": 7.727076923510727e-06, "loss": 0.7424, "step": 3202 }, { "epoch": 0.34, "grad_norm": 2.121154876974287, "learning_rate": 7.72564833081994e-06, "loss": 0.6947, "step": 3203 }, { "epoch": 0.34, "grad_norm": 3.4236551024113093, "learning_rate": 7.724219421471206e-06, "loss": 0.727, "step": 3204 }, { "epoch": 0.34, "grad_norm": 2.346428595824038, "learning_rate": 7.722790195630536e-06, "loss": 0.6948, "step": 3205 }, { "epoch": 0.34, "grad_norm": 3.092820968756066, "learning_rate": 7.721360653463971e-06, "loss": 0.6849, "step": 3206 }, { "epoch": 0.34, "grad_norm": 2.5447700966964466, "learning_rate": 7.719930795137592e-06, "loss": 0.7097, "step": 3207 }, { "epoch": 0.34, "grad_norm": 3.2830897925687825, "learning_rate": 7.718500620817517e-06, "loss": 0.6177, "step": 3208 }, { "epoch": 0.34, "grad_norm": 2.8208598121038104, "learning_rate": 7.717070130669896e-06, "loss": 0.6102, "step": 3209 }, { "epoch": 0.34, "grad_norm": 2.9927032819623642, "learning_rate": 7.715639324860925e-06, "loss": 0.6379, "step": 3210 }, { "epoch": 0.34, "grad_norm": 2.6289931425920803, "learning_rate": 7.714208203556825e-06, "loss": 0.7293, "step": 3211 }, { "epoch": 0.34, "grad_norm": 3.899374378134415, "learning_rate": 7.712776766923862e-06, "loss": 0.6116, "step": 3212 }, { "epoch": 0.34, "grad_norm": 3.2414892114848803, "learning_rate": 7.711345015128335e-06, "loss": 0.7262, "step": 3213 }, { "epoch": 0.34, "grad_norm": 3.45926603241094, "learning_rate": 7.709912948336583e-06, "loss": 0.7132, "step": 3214 }, { "epoch": 0.34, "grad_norm": 3.182829948766781, "learning_rate": 7.70848056671498e-06, "loss": 0.6255, "step": 3215 }, { "epoch": 0.34, "grad_norm": 2.7748630647111376, "learning_rate": 7.707047870429931e-06, "loss": 0.6655, "step": 3216 }, { "epoch": 0.34, "grad_norm": 3.9189572306348666, "learning_rate": 7.705614859647888e-06, "loss": 0.6888, "step": 3217 }, { "epoch": 0.34, "grad_norm": 1.1326017663302763, "learning_rate": 7.704181534535332e-06, "loss": 0.6407, "step": 3218 }, { "epoch": 0.34, "grad_norm": 4.352263696228539, "learning_rate": 7.70274789525878e-06, "loss": 0.6605, "step": 3219 }, { "epoch": 0.34, "grad_norm": 8.035499997279176, "learning_rate": 7.701313941984791e-06, "loss": 0.6789, "step": 3220 }, { "epoch": 0.34, "grad_norm": 2.629717191952034, "learning_rate": 7.699879674879958e-06, "loss": 0.5778, "step": 3221 }, { "epoch": 0.34, "grad_norm": 6.130587806018568, "learning_rate": 7.698445094110909e-06, "loss": 0.7025, "step": 3222 }, { "epoch": 0.34, "grad_norm": 2.8760063671669753, "learning_rate": 7.697010199844308e-06, "loss": 0.7081, "step": 3223 }, { "epoch": 0.34, "grad_norm": 2.7331704381503226, "learning_rate": 7.69557499224686e-06, "loss": 0.7452, "step": 3224 }, { "epoch": 0.34, "grad_norm": 2.850116734761613, "learning_rate": 7.694139471485301e-06, "loss": 0.6846, "step": 3225 }, { "epoch": 0.34, "grad_norm": 3.489662975427095, "learning_rate": 7.692703637726407e-06, "loss": 0.7061, "step": 3226 }, { "epoch": 0.34, "grad_norm": 2.903189368179508, "learning_rate": 7.691267491136986e-06, "loss": 0.5947, "step": 3227 }, { "epoch": 0.34, "grad_norm": 4.316176181783629, "learning_rate": 7.689831031883887e-06, "loss": 0.6344, "step": 3228 }, { "epoch": 0.34, "grad_norm": 2.4581575403347564, "learning_rate": 7.688394260133997e-06, "loss": 0.6042, "step": 3229 }, { "epoch": 0.34, "grad_norm": 3.424492880509628, "learning_rate": 7.686957176054231e-06, "loss": 0.6886, "step": 3230 }, { "epoch": 0.34, "grad_norm": 2.4711470411348553, "learning_rate": 7.68551977981155e-06, "loss": 0.5719, "step": 3231 }, { "epoch": 0.34, "grad_norm": 2.4487165637771358, "learning_rate": 7.684082071572943e-06, "loss": 0.7407, "step": 3232 }, { "epoch": 0.34, "grad_norm": 2.8173730592149235, "learning_rate": 7.68264405150544e-06, "loss": 0.6361, "step": 3233 }, { "epoch": 0.34, "grad_norm": 3.721164285756093, "learning_rate": 7.681205719776104e-06, "loss": 0.7631, "step": 3234 }, { "epoch": 0.34, "grad_norm": 3.9251106459657077, "learning_rate": 7.679767076552038e-06, "loss": 0.6352, "step": 3235 }, { "epoch": 0.34, "grad_norm": 1.0737715770377105, "learning_rate": 7.678328122000382e-06, "loss": 0.6233, "step": 3236 }, { "epoch": 0.34, "grad_norm": 2.5541768150933235, "learning_rate": 7.676888856288307e-06, "loss": 0.6348, "step": 3237 }, { "epoch": 0.34, "grad_norm": 2.580142219433405, "learning_rate": 7.67544927958302e-06, "loss": 0.6439, "step": 3238 }, { "epoch": 0.34, "grad_norm": 2.654991640432859, "learning_rate": 7.67400939205177e-06, "loss": 0.6801, "step": 3239 }, { "epoch": 0.34, "grad_norm": 3.6174398276239623, "learning_rate": 7.67256919386184e-06, "loss": 0.7203, "step": 3240 }, { "epoch": 0.34, "grad_norm": 3.5610803553484955, "learning_rate": 7.671128685180547e-06, "loss": 0.6511, "step": 3241 }, { "epoch": 0.34, "grad_norm": 2.938858056226461, "learning_rate": 7.669687866175245e-06, "loss": 0.6039, "step": 3242 }, { "epoch": 0.34, "grad_norm": 2.8043292999636313, "learning_rate": 7.668246737013323e-06, "loss": 0.6662, "step": 3243 }, { "epoch": 0.34, "grad_norm": 2.3949517216338907, "learning_rate": 7.666805297862208e-06, "loss": 0.6496, "step": 3244 }, { "epoch": 0.34, "grad_norm": 3.1462781898491157, "learning_rate": 7.665363548889362e-06, "loss": 0.6178, "step": 3245 }, { "epoch": 0.34, "grad_norm": 2.5506311534896433, "learning_rate": 7.663921490262286e-06, "loss": 0.6798, "step": 3246 }, { "epoch": 0.34, "grad_norm": 9.48469185368361, "learning_rate": 7.66247912214851e-06, "loss": 0.623, "step": 3247 }, { "epoch": 0.34, "grad_norm": 2.841001179637787, "learning_rate": 7.661036444715608e-06, "loss": 0.6621, "step": 3248 }, { "epoch": 0.34, "grad_norm": 1.1276649407769432, "learning_rate": 7.659593458131181e-06, "loss": 0.6079, "step": 3249 }, { "epoch": 0.34, "grad_norm": 2.967235234356677, "learning_rate": 7.658150162562875e-06, "loss": 0.6655, "step": 3250 }, { "epoch": 0.34, "grad_norm": 2.935135934795403, "learning_rate": 7.656706558178368e-06, "loss": 0.6983, "step": 3251 }, { "epoch": 0.34, "grad_norm": 2.7507617496596493, "learning_rate": 7.655262645145374e-06, "loss": 0.6997, "step": 3252 }, { "epoch": 0.34, "grad_norm": 3.6113941756706422, "learning_rate": 7.65381842363164e-06, "loss": 0.6512, "step": 3253 }, { "epoch": 0.34, "grad_norm": 2.9939074711670517, "learning_rate": 7.652373893804952e-06, "loss": 0.6817, "step": 3254 }, { "epoch": 0.34, "grad_norm": 3.5665404033863157, "learning_rate": 7.650929055833135e-06, "loss": 0.6852, "step": 3255 }, { "epoch": 0.34, "grad_norm": 2.7571299007941965, "learning_rate": 7.64948390988404e-06, "loss": 0.6688, "step": 3256 }, { "epoch": 0.34, "grad_norm": 3.93253267201708, "learning_rate": 7.648038456125566e-06, "loss": 0.6498, "step": 3257 }, { "epoch": 0.34, "grad_norm": 1.2564745859826627, "learning_rate": 7.646592694725638e-06, "loss": 0.6059, "step": 3258 }, { "epoch": 0.34, "grad_norm": 2.460398219897918, "learning_rate": 7.64514662585222e-06, "loss": 0.7199, "step": 3259 }, { "epoch": 0.34, "grad_norm": 2.095756389781973, "learning_rate": 7.643700249673315e-06, "loss": 0.6769, "step": 3260 }, { "epoch": 0.34, "grad_norm": 2.6122927168443058, "learning_rate": 7.642253566356957e-06, "loss": 0.6627, "step": 3261 }, { "epoch": 0.34, "grad_norm": 4.825071935901371, "learning_rate": 7.640806576071215e-06, "loss": 0.6343, "step": 3262 }, { "epoch": 0.34, "grad_norm": 2.968048064848337, "learning_rate": 7.639359278984202e-06, "loss": 0.7232, "step": 3263 }, { "epoch": 0.34, "grad_norm": 2.636260757493031, "learning_rate": 7.637911675264056e-06, "loss": 0.6242, "step": 3264 }, { "epoch": 0.34, "grad_norm": 6.452731556132531, "learning_rate": 7.636463765078958e-06, "loss": 0.6242, "step": 3265 }, { "epoch": 0.34, "grad_norm": 1.0833107459100242, "learning_rate": 7.63501554859712e-06, "loss": 0.5713, "step": 3266 }, { "epoch": 0.34, "grad_norm": 3.2859379838193097, "learning_rate": 7.633567025986795e-06, "loss": 0.7321, "step": 3267 }, { "epoch": 0.34, "grad_norm": 2.9027700596440162, "learning_rate": 7.632118197416263e-06, "loss": 0.574, "step": 3268 }, { "epoch": 0.34, "grad_norm": 2.7949610995170224, "learning_rate": 7.630669063053849e-06, "loss": 0.6283, "step": 3269 }, { "epoch": 0.34, "grad_norm": 2.724700144475404, "learning_rate": 7.629219623067907e-06, "loss": 0.6921, "step": 3270 }, { "epoch": 0.34, "grad_norm": 3.384530330225657, "learning_rate": 7.62776987762683e-06, "loss": 0.6618, "step": 3271 }, { "epoch": 0.34, "grad_norm": 2.591079385525758, "learning_rate": 7.626319826899045e-06, "loss": 0.6482, "step": 3272 }, { "epoch": 0.34, "grad_norm": 3.314517492095959, "learning_rate": 7.624869471053014e-06, "loss": 0.6968, "step": 3273 }, { "epoch": 0.34, "grad_norm": 2.71453402590448, "learning_rate": 7.623418810257234e-06, "loss": 0.6831, "step": 3274 }, { "epoch": 0.34, "grad_norm": 3.710436771293437, "learning_rate": 7.621967844680241e-06, "loss": 0.6333, "step": 3275 }, { "epoch": 0.34, "grad_norm": 3.8259084062940265, "learning_rate": 7.620516574490604e-06, "loss": 0.6768, "step": 3276 }, { "epoch": 0.34, "grad_norm": 2.291949490786709, "learning_rate": 7.6190649998569265e-06, "loss": 0.6547, "step": 3277 }, { "epoch": 0.34, "grad_norm": 2.7125389974746223, "learning_rate": 7.617613120947848e-06, "loss": 0.5768, "step": 3278 }, { "epoch": 0.35, "grad_norm": 9.167970886019283, "learning_rate": 7.616160937932045e-06, "loss": 0.7309, "step": 3279 }, { "epoch": 0.35, "grad_norm": 4.549733398070985, "learning_rate": 7.614708450978226e-06, "loss": 0.7135, "step": 3280 }, { "epoch": 0.35, "grad_norm": 2.9374280731445555, "learning_rate": 7.613255660255137e-06, "loss": 0.7211, "step": 3281 }, { "epoch": 0.35, "grad_norm": 2.870278169530705, "learning_rate": 7.611802565931559e-06, "loss": 0.6897, "step": 3282 }, { "epoch": 0.35, "grad_norm": 1.2543736203150375, "learning_rate": 7.610349168176309e-06, "loss": 0.6159, "step": 3283 }, { "epoch": 0.35, "grad_norm": 2.738519475571267, "learning_rate": 7.608895467158241e-06, "loss": 0.7084, "step": 3284 }, { "epoch": 0.35, "grad_norm": 4.479720977262225, "learning_rate": 7.607441463046236e-06, "loss": 0.7224, "step": 3285 }, { "epoch": 0.35, "grad_norm": 3.4942767891910593, "learning_rate": 7.60598715600922e-06, "loss": 0.6773, "step": 3286 }, { "epoch": 0.35, "grad_norm": 4.350906281284041, "learning_rate": 7.60453254621615e-06, "loss": 0.6071, "step": 3287 }, { "epoch": 0.35, "grad_norm": 3.8683834683641827, "learning_rate": 7.603077633836018e-06, "loss": 0.6792, "step": 3288 }, { "epoch": 0.35, "grad_norm": 2.7330042305762703, "learning_rate": 7.601622419037851e-06, "loss": 0.5959, "step": 3289 }, { "epoch": 0.35, "grad_norm": 3.4753065005990313, "learning_rate": 7.600166901990711e-06, "loss": 0.6422, "step": 3290 }, { "epoch": 0.35, "grad_norm": 2.6481317364356585, "learning_rate": 7.5987110828636966e-06, "loss": 0.7076, "step": 3291 }, { "epoch": 0.35, "grad_norm": 4.978965610289502, "learning_rate": 7.5972549618259415e-06, "loss": 0.6281, "step": 3292 }, { "epoch": 0.35, "grad_norm": 3.1836343422551043, "learning_rate": 7.595798539046612e-06, "loss": 0.6259, "step": 3293 }, { "epoch": 0.35, "grad_norm": 2.6636282179606776, "learning_rate": 7.594341814694914e-06, "loss": 0.5633, "step": 3294 }, { "epoch": 0.35, "grad_norm": 2.6492054525812354, "learning_rate": 7.592884788940082e-06, "loss": 0.7039, "step": 3295 }, { "epoch": 0.35, "grad_norm": 2.6199101782391185, "learning_rate": 7.59142746195139e-06, "loss": 0.6398, "step": 3296 }, { "epoch": 0.35, "grad_norm": 2.4865546985556777, "learning_rate": 7.5899698338981475e-06, "loss": 0.625, "step": 3297 }, { "epoch": 0.35, "grad_norm": 2.3823056427702305, "learning_rate": 7.588511904949696e-06, "loss": 0.7082, "step": 3298 }, { "epoch": 0.35, "grad_norm": 2.653522038656345, "learning_rate": 7.587053675275413e-06, "loss": 0.7159, "step": 3299 }, { "epoch": 0.35, "grad_norm": 3.007493250962484, "learning_rate": 7.585595145044714e-06, "loss": 0.6847, "step": 3300 }, { "epoch": 0.35, "grad_norm": 3.9762142271332412, "learning_rate": 7.5841363144270445e-06, "loss": 0.6299, "step": 3301 }, { "epoch": 0.35, "grad_norm": 4.081453490480924, "learning_rate": 7.582677183591889e-06, "loss": 0.6112, "step": 3302 }, { "epoch": 0.35, "grad_norm": 3.0011529464421827, "learning_rate": 7.581217752708763e-06, "loss": 0.6528, "step": 3303 }, { "epoch": 0.35, "grad_norm": 3.8502588551289163, "learning_rate": 7.579758021947221e-06, "loss": 0.7399, "step": 3304 }, { "epoch": 0.35, "grad_norm": 3.6979556515844454, "learning_rate": 7.578297991476848e-06, "loss": 0.7052, "step": 3305 }, { "epoch": 0.35, "grad_norm": 2.409793154362905, "learning_rate": 7.576837661467269e-06, "loss": 0.5654, "step": 3306 }, { "epoch": 0.35, "grad_norm": 3.0297576136517264, "learning_rate": 7.575377032088138e-06, "loss": 0.5874, "step": 3307 }, { "epoch": 0.35, "grad_norm": 2.335566510834448, "learning_rate": 7.573916103509149e-06, "loss": 0.6567, "step": 3308 }, { "epoch": 0.35, "grad_norm": 2.8306846989581675, "learning_rate": 7.572454875900026e-06, "loss": 0.6771, "step": 3309 }, { "epoch": 0.35, "grad_norm": 2.914166159192387, "learning_rate": 7.570993349430533e-06, "loss": 0.6979, "step": 3310 }, { "epoch": 0.35, "grad_norm": 3.172792917556958, "learning_rate": 7.569531524270465e-06, "loss": 0.6625, "step": 3311 }, { "epoch": 0.35, "grad_norm": 5.322440067377819, "learning_rate": 7.568069400589651e-06, "loss": 0.6143, "step": 3312 }, { "epoch": 0.35, "grad_norm": 2.7366214062810466, "learning_rate": 7.566606978557959e-06, "loss": 0.6719, "step": 3313 }, { "epoch": 0.35, "grad_norm": 3.6661060876658205, "learning_rate": 7.565144258345287e-06, "loss": 0.7597, "step": 3314 }, { "epoch": 0.35, "grad_norm": 2.841666028729839, "learning_rate": 7.563681240121569e-06, "loss": 0.6452, "step": 3315 }, { "epoch": 0.35, "grad_norm": 2.9148308706374637, "learning_rate": 7.562217924056777e-06, "loss": 0.7037, "step": 3316 }, { "epoch": 0.35, "grad_norm": 2.493732792998696, "learning_rate": 7.560754310320912e-06, "loss": 0.665, "step": 3317 }, { "epoch": 0.35, "grad_norm": 2.3318417413774633, "learning_rate": 7.559290399084016e-06, "loss": 0.6403, "step": 3318 }, { "epoch": 0.35, "grad_norm": 2.400731362221395, "learning_rate": 7.5578261905161575e-06, "loss": 0.6265, "step": 3319 }, { "epoch": 0.35, "grad_norm": 4.040790993560575, "learning_rate": 7.556361684787446e-06, "loss": 0.6665, "step": 3320 }, { "epoch": 0.35, "grad_norm": 2.2499211076771033, "learning_rate": 7.554896882068025e-06, "loss": 0.594, "step": 3321 }, { "epoch": 0.35, "grad_norm": 2.357387837819163, "learning_rate": 7.5534317825280664e-06, "loss": 0.6814, "step": 3322 }, { "epoch": 0.35, "grad_norm": 6.712351178001371, "learning_rate": 7.551966386337788e-06, "loss": 0.7282, "step": 3323 }, { "epoch": 0.35, "grad_norm": 2.4161878209245478, "learning_rate": 7.5505006936674304e-06, "loss": 0.6134, "step": 3324 }, { "epoch": 0.35, "grad_norm": 2.640259293020909, "learning_rate": 7.5490347046872755e-06, "loss": 0.7243, "step": 3325 }, { "epoch": 0.35, "grad_norm": 3.240206873144137, "learning_rate": 7.547568419567637e-06, "loss": 0.6741, "step": 3326 }, { "epoch": 0.35, "grad_norm": 3.394746942635679, "learning_rate": 7.546101838478864e-06, "loss": 0.6578, "step": 3327 }, { "epoch": 0.35, "grad_norm": 3.144423846324425, "learning_rate": 7.54463496159134e-06, "loss": 0.6511, "step": 3328 }, { "epoch": 0.35, "grad_norm": 2.6453748407725106, "learning_rate": 7.543167789075481e-06, "loss": 0.6275, "step": 3329 }, { "epoch": 0.35, "grad_norm": 2.3080573199067187, "learning_rate": 7.54170032110174e-06, "loss": 0.6676, "step": 3330 }, { "epoch": 0.35, "grad_norm": 2.714660296548131, "learning_rate": 7.540232557840604e-06, "loss": 0.611, "step": 3331 }, { "epoch": 0.35, "grad_norm": 4.047279583079742, "learning_rate": 7.53876449946259e-06, "loss": 0.6867, "step": 3332 }, { "epoch": 0.35, "grad_norm": 3.8795995067294924, "learning_rate": 7.537296146138255e-06, "loss": 0.5692, "step": 3333 }, { "epoch": 0.35, "grad_norm": 2.8734422567926003, "learning_rate": 7.535827498038192e-06, "loss": 0.7021, "step": 3334 }, { "epoch": 0.35, "grad_norm": 5.854617594622321, "learning_rate": 7.534358555333018e-06, "loss": 0.6198, "step": 3335 }, { "epoch": 0.35, "grad_norm": 5.60018430557542, "learning_rate": 7.532889318193393e-06, "loss": 0.6089, "step": 3336 }, { "epoch": 0.35, "grad_norm": 6.2343822540898515, "learning_rate": 7.531419786790011e-06, "loss": 0.7023, "step": 3337 }, { "epoch": 0.35, "grad_norm": 3.095566888586303, "learning_rate": 7.5299499612935934e-06, "loss": 0.6577, "step": 3338 }, { "epoch": 0.35, "grad_norm": 3.136010955006197, "learning_rate": 7.528479841874904e-06, "loss": 0.6715, "step": 3339 }, { "epoch": 0.35, "grad_norm": 2.077841229423998, "learning_rate": 7.527009428704735e-06, "loss": 0.683, "step": 3340 }, { "epoch": 0.35, "grad_norm": 2.840010075056747, "learning_rate": 7.525538721953915e-06, "loss": 0.7238, "step": 3341 }, { "epoch": 0.35, "grad_norm": 1.1576704441730468, "learning_rate": 7.524067721793309e-06, "loss": 0.6221, "step": 3342 }, { "epoch": 0.35, "grad_norm": 2.6260852715338414, "learning_rate": 7.522596428393809e-06, "loss": 0.6732, "step": 3343 }, { "epoch": 0.35, "grad_norm": 3.35119357031428, "learning_rate": 7.521124841926348e-06, "loss": 0.6466, "step": 3344 }, { "epoch": 0.35, "grad_norm": 2.410888681828178, "learning_rate": 7.519652962561894e-06, "loss": 0.7642, "step": 3345 }, { "epoch": 0.35, "grad_norm": 3.7276855469070074, "learning_rate": 7.5181807904714385e-06, "loss": 0.6249, "step": 3346 }, { "epoch": 0.35, "grad_norm": 3.415586524281074, "learning_rate": 7.516708325826021e-06, "loss": 0.5827, "step": 3347 }, { "epoch": 0.35, "grad_norm": 2.8263139482195503, "learning_rate": 7.515235568796704e-06, "loss": 0.662, "step": 3348 }, { "epoch": 0.35, "grad_norm": 2.664326847472684, "learning_rate": 7.513762519554588e-06, "loss": 0.5015, "step": 3349 }, { "epoch": 0.35, "grad_norm": 2.618966692292694, "learning_rate": 7.51228917827081e-06, "loss": 0.6738, "step": 3350 }, { "epoch": 0.35, "grad_norm": 3.633158245759139, "learning_rate": 7.510815545116539e-06, "loss": 0.6204, "step": 3351 }, { "epoch": 0.35, "grad_norm": 2.4185537158238395, "learning_rate": 7.509341620262976e-06, "loss": 0.5918, "step": 3352 }, { "epoch": 0.35, "grad_norm": 2.4156711101816497, "learning_rate": 7.507867403881356e-06, "loss": 0.7105, "step": 3353 }, { "epoch": 0.35, "grad_norm": 8.148153998559598, "learning_rate": 7.506392896142951e-06, "loss": 0.6219, "step": 3354 }, { "epoch": 0.35, "grad_norm": 3.6450108914306396, "learning_rate": 7.5049180972190646e-06, "loss": 0.6179, "step": 3355 }, { "epoch": 0.35, "grad_norm": 2.794686070537919, "learning_rate": 7.503443007281035e-06, "loss": 0.662, "step": 3356 }, { "epoch": 0.35, "grad_norm": 3.1106881667918755, "learning_rate": 7.501967626500231e-06, "loss": 0.695, "step": 3357 }, { "epoch": 0.35, "grad_norm": 3.700458795600681, "learning_rate": 7.500491955048063e-06, "loss": 0.6794, "step": 3358 }, { "epoch": 0.35, "grad_norm": 2.419751161900197, "learning_rate": 7.499015993095968e-06, "loss": 0.6671, "step": 3359 }, { "epoch": 0.35, "grad_norm": 2.8449869838184343, "learning_rate": 7.497539740815419e-06, "loss": 0.7413, "step": 3360 }, { "epoch": 0.35, "grad_norm": 3.4184394602863226, "learning_rate": 7.4960631983779205e-06, "loss": 0.6781, "step": 3361 }, { "epoch": 0.35, "grad_norm": 2.796963292810003, "learning_rate": 7.494586365955017e-06, "loss": 0.667, "step": 3362 }, { "epoch": 0.35, "grad_norm": 3.0483662745104665, "learning_rate": 7.49310924371828e-06, "loss": 0.715, "step": 3363 }, { "epoch": 0.35, "grad_norm": 3.5907131005662767, "learning_rate": 7.491631831839318e-06, "loss": 0.7097, "step": 3364 }, { "epoch": 0.35, "grad_norm": 3.175466104453125, "learning_rate": 7.490154130489773e-06, "loss": 0.6217, "step": 3365 }, { "epoch": 0.35, "grad_norm": 3.423899220213644, "learning_rate": 7.488676139841318e-06, "loss": 0.6247, "step": 3366 }, { "epoch": 0.35, "grad_norm": 2.8572229700965646, "learning_rate": 7.487197860065664e-06, "loss": 0.613, "step": 3367 }, { "epoch": 0.35, "grad_norm": 2.8587163319916042, "learning_rate": 7.485719291334551e-06, "loss": 0.6576, "step": 3368 }, { "epoch": 0.35, "grad_norm": 2.3991473483168626, "learning_rate": 7.484240433819758e-06, "loss": 0.66, "step": 3369 }, { "epoch": 0.35, "grad_norm": 5.965531907514714, "learning_rate": 7.482761287693092e-06, "loss": 0.6466, "step": 3370 }, { "epoch": 0.35, "grad_norm": 3.048642664328739, "learning_rate": 7.481281853126397e-06, "loss": 0.715, "step": 3371 }, { "epoch": 0.35, "grad_norm": 2.6309407059230296, "learning_rate": 7.479802130291548e-06, "loss": 0.6181, "step": 3372 }, { "epoch": 0.35, "grad_norm": 3.222323555003956, "learning_rate": 7.478322119360457e-06, "loss": 0.7331, "step": 3373 }, { "epoch": 0.36, "grad_norm": 2.2573259131196384, "learning_rate": 7.476841820505065e-06, "loss": 0.6684, "step": 3374 }, { "epoch": 0.36, "grad_norm": 3.072317633131639, "learning_rate": 7.475361233897352e-06, "loss": 0.6969, "step": 3375 }, { "epoch": 0.36, "grad_norm": 3.046601351124791, "learning_rate": 7.473880359709324e-06, "loss": 0.6585, "step": 3376 }, { "epoch": 0.36, "grad_norm": 2.4729492389743917, "learning_rate": 7.472399198113029e-06, "loss": 0.677, "step": 3377 }, { "epoch": 0.36, "grad_norm": 3.2903365979880492, "learning_rate": 7.4709177492805405e-06, "loss": 0.6252, "step": 3378 }, { "epoch": 0.36, "grad_norm": 2.9983558599680125, "learning_rate": 7.46943601338397e-06, "loss": 0.7068, "step": 3379 }, { "epoch": 0.36, "grad_norm": 3.0004226327693235, "learning_rate": 7.4679539905954655e-06, "loss": 0.6208, "step": 3380 }, { "epoch": 0.36, "grad_norm": 2.898934649269225, "learning_rate": 7.4664716810871975e-06, "loss": 0.6644, "step": 3381 }, { "epoch": 0.36, "grad_norm": 3.194372324636005, "learning_rate": 7.464989085031381e-06, "loss": 0.6673, "step": 3382 }, { "epoch": 0.36, "grad_norm": 1.1182459004852183, "learning_rate": 7.463506202600257e-06, "loss": 0.5741, "step": 3383 }, { "epoch": 0.36, "grad_norm": 4.36761957421957, "learning_rate": 7.462023033966104e-06, "loss": 0.6101, "step": 3384 }, { "epoch": 0.36, "grad_norm": 2.2861432544796156, "learning_rate": 7.4605395793012325e-06, "loss": 0.6921, "step": 3385 }, { "epoch": 0.36, "grad_norm": 2.7603235128046846, "learning_rate": 7.459055838777984e-06, "loss": 0.6734, "step": 3386 }, { "epoch": 0.36, "grad_norm": 3.9793695446983532, "learning_rate": 7.457571812568738e-06, "loss": 0.6595, "step": 3387 }, { "epoch": 0.36, "grad_norm": 4.311706185746525, "learning_rate": 7.4560875008459035e-06, "loss": 0.7186, "step": 3388 }, { "epoch": 0.36, "grad_norm": 2.597103883565191, "learning_rate": 7.454602903781921e-06, "loss": 0.7305, "step": 3389 }, { "epoch": 0.36, "grad_norm": 2.501333529220921, "learning_rate": 7.45311802154927e-06, "loss": 0.6342, "step": 3390 }, { "epoch": 0.36, "grad_norm": 2.5800190663717655, "learning_rate": 7.451632854320459e-06, "loss": 0.568, "step": 3391 }, { "epoch": 0.36, "grad_norm": 2.3701799197834, "learning_rate": 7.4501474022680265e-06, "loss": 0.6836, "step": 3392 }, { "epoch": 0.36, "grad_norm": 2.3772231724274557, "learning_rate": 7.4486616655645565e-06, "loss": 0.619, "step": 3393 }, { "epoch": 0.36, "grad_norm": 2.8160484269096595, "learning_rate": 7.447175644382648e-06, "loss": 0.7025, "step": 3394 }, { "epoch": 0.36, "grad_norm": 3.7832742183500967, "learning_rate": 7.445689338894949e-06, "loss": 0.7469, "step": 3395 }, { "epoch": 0.36, "grad_norm": 5.0059621903562, "learning_rate": 7.444202749274133e-06, "loss": 0.7397, "step": 3396 }, { "epoch": 0.36, "grad_norm": 3.892558568857166, "learning_rate": 7.442715875692908e-06, "loss": 0.6436, "step": 3397 }, { "epoch": 0.36, "grad_norm": 2.8280194358791824, "learning_rate": 7.4412287183240115e-06, "loss": 0.6798, "step": 3398 }, { "epoch": 0.36, "grad_norm": 2.837303017856792, "learning_rate": 7.43974127734022e-06, "loss": 0.6466, "step": 3399 }, { "epoch": 0.36, "grad_norm": 2.734694065110121, "learning_rate": 7.4382535529143395e-06, "loss": 0.6031, "step": 3400 }, { "epoch": 0.36, "grad_norm": 3.959288671850776, "learning_rate": 7.43676554521921e-06, "loss": 0.6906, "step": 3401 }, { "epoch": 0.36, "grad_norm": 6.198201079133412, "learning_rate": 7.435277254427704e-06, "loss": 0.6888, "step": 3402 }, { "epoch": 0.36, "grad_norm": 4.386419840965464, "learning_rate": 7.4337886807127235e-06, "loss": 0.6252, "step": 3403 }, { "epoch": 0.36, "grad_norm": 4.1534089316760054, "learning_rate": 7.4322998242472135e-06, "loss": 0.6707, "step": 3404 }, { "epoch": 0.36, "grad_norm": 2.7988368901968124, "learning_rate": 7.430810685204137e-06, "loss": 0.6584, "step": 3405 }, { "epoch": 0.36, "grad_norm": 3.919583307019518, "learning_rate": 7.4293212637565045e-06, "loss": 0.6966, "step": 3406 }, { "epoch": 0.36, "grad_norm": 2.508307648411482, "learning_rate": 7.427831560077349e-06, "loss": 0.6444, "step": 3407 }, { "epoch": 0.36, "grad_norm": 1.3042696713402557, "learning_rate": 7.426341574339741e-06, "loss": 0.5891, "step": 3408 }, { "epoch": 0.36, "grad_norm": 3.582107451003599, "learning_rate": 7.424851306716783e-06, "loss": 0.6359, "step": 3409 }, { "epoch": 0.36, "grad_norm": 2.904925684194061, "learning_rate": 7.42336075738161e-06, "loss": 0.6671, "step": 3410 }, { "epoch": 0.36, "grad_norm": 3.5167258077031267, "learning_rate": 7.421869926507389e-06, "loss": 0.7021, "step": 3411 }, { "epoch": 0.36, "grad_norm": 4.358687817951523, "learning_rate": 7.420378814267322e-06, "loss": 0.7419, "step": 3412 }, { "epoch": 0.36, "grad_norm": 3.152148471276924, "learning_rate": 7.41888742083464e-06, "loss": 0.6744, "step": 3413 }, { "epoch": 0.36, "grad_norm": 2.3915603282771976, "learning_rate": 7.417395746382608e-06, "loss": 0.6771, "step": 3414 }, { "epoch": 0.36, "grad_norm": 3.7148070409783536, "learning_rate": 7.415903791084529e-06, "loss": 0.7244, "step": 3415 }, { "epoch": 0.36, "grad_norm": 3.752027193158189, "learning_rate": 7.41441155511373e-06, "loss": 0.6177, "step": 3416 }, { "epoch": 0.36, "grad_norm": 2.62863853424837, "learning_rate": 7.412919038643577e-06, "loss": 0.6457, "step": 3417 }, { "epoch": 0.36, "grad_norm": 2.5794611356979993, "learning_rate": 7.411426241847463e-06, "loss": 0.6303, "step": 3418 }, { "epoch": 0.36, "grad_norm": 3.9676318300037527, "learning_rate": 7.409933164898819e-06, "loss": 0.6817, "step": 3419 }, { "epoch": 0.36, "grad_norm": 2.8550149054743, "learning_rate": 7.408439807971108e-06, "loss": 0.654, "step": 3420 }, { "epoch": 0.36, "grad_norm": 5.195120384669288, "learning_rate": 7.406946171237822e-06, "loss": 0.579, "step": 3421 }, { "epoch": 0.36, "grad_norm": 3.0078016488240107, "learning_rate": 7.4054522548724874e-06, "loss": 0.6311, "step": 3422 }, { "epoch": 0.36, "grad_norm": 2.4860091683873784, "learning_rate": 7.403958059048662e-06, "loss": 0.6168, "step": 3423 }, { "epoch": 0.36, "grad_norm": 2.570809434838443, "learning_rate": 7.40246358393994e-06, "loss": 0.591, "step": 3424 }, { "epoch": 0.36, "grad_norm": 3.7959371110166256, "learning_rate": 7.4009688297199436e-06, "loss": 0.7263, "step": 3425 }, { "epoch": 0.36, "grad_norm": 2.774483241184963, "learning_rate": 7.3994737965623285e-06, "loss": 0.6566, "step": 3426 }, { "epoch": 0.36, "grad_norm": 2.455385395465524, "learning_rate": 7.397978484640783e-06, "loss": 0.5688, "step": 3427 }, { "epoch": 0.36, "grad_norm": 2.422389158395626, "learning_rate": 7.396482894129031e-06, "loss": 0.6501, "step": 3428 }, { "epoch": 0.36, "grad_norm": 2.9579176252075983, "learning_rate": 7.3949870252008215e-06, "loss": 0.5639, "step": 3429 }, { "epoch": 0.36, "grad_norm": 3.2151993534590684, "learning_rate": 7.393490878029945e-06, "loss": 0.6368, "step": 3430 }, { "epoch": 0.36, "grad_norm": 2.4041060387118276, "learning_rate": 7.391994452790217e-06, "loss": 0.7303, "step": 3431 }, { "epoch": 0.36, "grad_norm": 2.7465187882412563, "learning_rate": 7.390497749655487e-06, "loss": 0.6067, "step": 3432 }, { "epoch": 0.36, "grad_norm": 2.714071334159705, "learning_rate": 7.389000768799638e-06, "loss": 0.6604, "step": 3433 }, { "epoch": 0.36, "grad_norm": 2.9662799463771505, "learning_rate": 7.387503510396586e-06, "loss": 0.6275, "step": 3434 }, { "epoch": 0.36, "grad_norm": 2.3746212916638507, "learning_rate": 7.386005974620278e-06, "loss": 0.7533, "step": 3435 }, { "epoch": 0.36, "grad_norm": 3.6759513228513154, "learning_rate": 7.384508161644694e-06, "loss": 0.6726, "step": 3436 }, { "epoch": 0.36, "grad_norm": 4.2759734265835565, "learning_rate": 7.383010071643844e-06, "loss": 0.6219, "step": 3437 }, { "epoch": 0.36, "grad_norm": 1.0676737541014298, "learning_rate": 7.381511704791771e-06, "loss": 0.5884, "step": 3438 }, { "epoch": 0.36, "grad_norm": 3.292189795768927, "learning_rate": 7.380013061262557e-06, "loss": 0.5909, "step": 3439 }, { "epoch": 0.36, "grad_norm": 3.097298037082653, "learning_rate": 7.3785141412303e-06, "loss": 0.6163, "step": 3440 }, { "epoch": 0.36, "grad_norm": 2.816854958679348, "learning_rate": 7.37701494486915e-06, "loss": 0.6845, "step": 3441 }, { "epoch": 0.36, "grad_norm": 4.788505661813447, "learning_rate": 7.375515472353272e-06, "loss": 0.6201, "step": 3442 }, { "epoch": 0.36, "grad_norm": 4.129127828825236, "learning_rate": 7.374015723856873e-06, "loss": 0.6793, "step": 3443 }, { "epoch": 0.36, "grad_norm": 2.3574849469565544, "learning_rate": 7.372515699554191e-06, "loss": 0.6221, "step": 3444 }, { "epoch": 0.36, "grad_norm": 3.3812374504332805, "learning_rate": 7.371015399619494e-06, "loss": 0.6603, "step": 3445 }, { "epoch": 0.36, "grad_norm": 3.0367925120459303, "learning_rate": 7.369514824227082e-06, "loss": 0.6616, "step": 3446 }, { "epoch": 0.36, "grad_norm": 2.6543688097654665, "learning_rate": 7.368013973551286e-06, "loss": 0.7013, "step": 3447 }, { "epoch": 0.36, "grad_norm": 2.1635818274199434, "learning_rate": 7.366512847766472e-06, "loss": 0.6885, "step": 3448 }, { "epoch": 0.36, "grad_norm": 3.085467390645828, "learning_rate": 7.365011447047036e-06, "loss": 0.7165, "step": 3449 }, { "epoch": 0.36, "grad_norm": 2.283937568641037, "learning_rate": 7.363509771567408e-06, "loss": 0.6338, "step": 3450 }, { "epoch": 0.36, "grad_norm": 3.1768009624928166, "learning_rate": 7.362007821502045e-06, "loss": 0.747, "step": 3451 }, { "epoch": 0.36, "grad_norm": 3.0324905424749597, "learning_rate": 7.360505597025442e-06, "loss": 0.685, "step": 3452 }, { "epoch": 0.36, "grad_norm": 2.983148422243415, "learning_rate": 7.359003098312123e-06, "loss": 0.6152, "step": 3453 }, { "epoch": 0.36, "grad_norm": 2.9793497750002658, "learning_rate": 7.357500325536644e-06, "loss": 0.7456, "step": 3454 }, { "epoch": 0.36, "grad_norm": 2.941874609510829, "learning_rate": 7.355997278873589e-06, "loss": 0.6748, "step": 3455 }, { "epoch": 0.36, "grad_norm": 2.1486776506884153, "learning_rate": 7.354493958497583e-06, "loss": 0.661, "step": 3456 }, { "epoch": 0.36, "grad_norm": 1.0170095119117828, "learning_rate": 7.3529903645832744e-06, "loss": 0.5884, "step": 3457 }, { "epoch": 0.36, "grad_norm": 2.4036589735821483, "learning_rate": 7.351486497305347e-06, "loss": 0.5853, "step": 3458 }, { "epoch": 0.36, "grad_norm": 3.310188596068514, "learning_rate": 7.349982356838515e-06, "loss": 0.7171, "step": 3459 }, { "epoch": 0.36, "grad_norm": 2.942433337193868, "learning_rate": 7.348477943357527e-06, "loss": 0.6472, "step": 3460 }, { "epoch": 0.36, "grad_norm": 4.969354980228407, "learning_rate": 7.34697325703716e-06, "loss": 0.6456, "step": 3461 }, { "epoch": 0.36, "grad_norm": 3.8519505648735737, "learning_rate": 7.345468298052224e-06, "loss": 0.6262, "step": 3462 }, { "epoch": 0.36, "grad_norm": 3.685187241926782, "learning_rate": 7.343963066577563e-06, "loss": 0.6571, "step": 3463 }, { "epoch": 0.36, "grad_norm": 3.437533087132148, "learning_rate": 7.342457562788046e-06, "loss": 0.6799, "step": 3464 }, { "epoch": 0.36, "grad_norm": 4.281370556976629, "learning_rate": 7.340951786858583e-06, "loss": 0.688, "step": 3465 }, { "epoch": 0.36, "grad_norm": 3.1063629908728014, "learning_rate": 7.339445738964106e-06, "loss": 0.7737, "step": 3466 }, { "epoch": 0.36, "grad_norm": 2.0889025130238092, "learning_rate": 7.337939419279588e-06, "loss": 0.6248, "step": 3467 }, { "epoch": 0.36, "grad_norm": 2.815104069216375, "learning_rate": 7.336432827980026e-06, "loss": 0.6681, "step": 3468 }, { "epoch": 0.37, "grad_norm": 4.1118170144076265, "learning_rate": 7.334925965240451e-06, "loss": 0.6273, "step": 3469 }, { "epoch": 0.37, "grad_norm": 3.2170988071407147, "learning_rate": 7.333418831235928e-06, "loss": 0.6412, "step": 3470 }, { "epoch": 0.37, "grad_norm": 3.4121748554575984, "learning_rate": 7.33191142614155e-06, "loss": 0.698, "step": 3471 }, { "epoch": 0.37, "grad_norm": 3.0702451497078247, "learning_rate": 7.330403750132443e-06, "loss": 0.5974, "step": 3472 }, { "epoch": 0.37, "grad_norm": 2.896556923363, "learning_rate": 7.328895803383764e-06, "loss": 0.6431, "step": 3473 }, { "epoch": 0.37, "grad_norm": 2.849283645322886, "learning_rate": 7.327387586070705e-06, "loss": 0.6288, "step": 3474 }, { "epoch": 0.37, "grad_norm": 3.6979247535807063, "learning_rate": 7.325879098368483e-06, "loss": 0.6332, "step": 3475 }, { "epoch": 0.37, "grad_norm": 2.7475025526366377, "learning_rate": 7.324370340452351e-06, "loss": 0.6657, "step": 3476 }, { "epoch": 0.37, "grad_norm": 2.9659631728372777, "learning_rate": 7.322861312497591e-06, "loss": 0.6951, "step": 3477 }, { "epoch": 0.37, "grad_norm": 3.1844754621892837, "learning_rate": 7.321352014679522e-06, "loss": 0.5929, "step": 3478 }, { "epoch": 0.37, "grad_norm": 2.974837282969043, "learning_rate": 7.319842447173482e-06, "loss": 0.5501, "step": 3479 }, { "epoch": 0.37, "grad_norm": 2.892368554345106, "learning_rate": 7.318332610154854e-06, "loss": 0.7127, "step": 3480 }, { "epoch": 0.37, "grad_norm": 2.715197258419502, "learning_rate": 7.3168225037990434e-06, "loss": 0.6408, "step": 3481 }, { "epoch": 0.37, "grad_norm": 2.5710173525042554, "learning_rate": 7.315312128281493e-06, "loss": 0.6488, "step": 3482 }, { "epoch": 0.37, "grad_norm": 2.0270385193056204, "learning_rate": 7.313801483777674e-06, "loss": 0.6552, "step": 3483 }, { "epoch": 0.37, "grad_norm": 4.205749249065878, "learning_rate": 7.3122905704630845e-06, "loss": 0.6876, "step": 3484 }, { "epoch": 0.37, "grad_norm": 3.688817976879616, "learning_rate": 7.310779388513263e-06, "loss": 0.6637, "step": 3485 }, { "epoch": 0.37, "grad_norm": 2.401643485510619, "learning_rate": 7.309267938103769e-06, "loss": 0.6614, "step": 3486 }, { "epoch": 0.37, "grad_norm": 2.4560485153621237, "learning_rate": 7.307756219410205e-06, "loss": 0.6329, "step": 3487 }, { "epoch": 0.37, "grad_norm": 2.5820481308855046, "learning_rate": 7.306244232608191e-06, "loss": 0.7006, "step": 3488 }, { "epoch": 0.37, "grad_norm": 1.1020552524421927, "learning_rate": 7.304731977873392e-06, "loss": 0.5822, "step": 3489 }, { "epoch": 0.37, "grad_norm": 2.584430521631613, "learning_rate": 7.303219455381491e-06, "loss": 0.6619, "step": 3490 }, { "epoch": 0.37, "grad_norm": 2.298444021464788, "learning_rate": 7.301706665308212e-06, "loss": 0.7223, "step": 3491 }, { "epoch": 0.37, "grad_norm": 6.985437156669851, "learning_rate": 7.300193607829308e-06, "loss": 0.6624, "step": 3492 }, { "epoch": 0.37, "grad_norm": 2.590700474555624, "learning_rate": 7.298680283120558e-06, "loss": 0.6532, "step": 3493 }, { "epoch": 0.37, "grad_norm": 2.3977866755703614, "learning_rate": 7.29716669135778e-06, "loss": 0.7138, "step": 3494 }, { "epoch": 0.37, "grad_norm": 2.7237583049566525, "learning_rate": 7.295652832716814e-06, "loss": 0.6091, "step": 3495 }, { "epoch": 0.37, "grad_norm": 2.789323251837038, "learning_rate": 7.294138707373539e-06, "loss": 0.6027, "step": 3496 }, { "epoch": 0.37, "grad_norm": 2.1445610603977494, "learning_rate": 7.29262431550386e-06, "loss": 0.6358, "step": 3497 }, { "epoch": 0.37, "grad_norm": 2.7828156452167434, "learning_rate": 7.2911096572837155e-06, "loss": 0.7049, "step": 3498 }, { "epoch": 0.37, "grad_norm": 3.062889194963662, "learning_rate": 7.289594732889073e-06, "loss": 0.6906, "step": 3499 }, { "epoch": 0.37, "grad_norm": 3.187017863959395, "learning_rate": 7.288079542495936e-06, "loss": 0.6046, "step": 3500 }, { "epoch": 0.37, "grad_norm": 2.80365868383857, "learning_rate": 7.286564086280329e-06, "loss": 0.6339, "step": 3501 }, { "epoch": 0.37, "grad_norm": 2.978700832429271, "learning_rate": 7.285048364418319e-06, "loss": 0.6488, "step": 3502 }, { "epoch": 0.37, "grad_norm": 2.9173805232519245, "learning_rate": 7.283532377085992e-06, "loss": 0.6956, "step": 3503 }, { "epoch": 0.37, "grad_norm": 3.020587219160722, "learning_rate": 7.282016124459477e-06, "loss": 0.6508, "step": 3504 }, { "epoch": 0.37, "grad_norm": 2.3101783919909056, "learning_rate": 7.280499606714923e-06, "loss": 0.6795, "step": 3505 }, { "epoch": 0.37, "grad_norm": 3.9706549307214556, "learning_rate": 7.27898282402852e-06, "loss": 0.7841, "step": 3506 }, { "epoch": 0.37, "grad_norm": 3.2123361694164423, "learning_rate": 7.277465776576478e-06, "loss": 0.6485, "step": 3507 }, { "epoch": 0.37, "grad_norm": 2.833849508292802, "learning_rate": 7.275948464535045e-06, "loss": 0.6073, "step": 3508 }, { "epoch": 0.37, "grad_norm": 2.190248056297931, "learning_rate": 7.274430888080502e-06, "loss": 0.6136, "step": 3509 }, { "epoch": 0.37, "grad_norm": 3.801011293418992, "learning_rate": 7.27291304738915e-06, "loss": 0.6847, "step": 3510 }, { "epoch": 0.37, "grad_norm": 6.725920278243489, "learning_rate": 7.271394942637332e-06, "loss": 0.6099, "step": 3511 }, { "epoch": 0.37, "grad_norm": 2.3603811567927466, "learning_rate": 7.269876574001414e-06, "loss": 0.6546, "step": 3512 }, { "epoch": 0.37, "grad_norm": 2.775937881236346, "learning_rate": 7.2683579416578e-06, "loss": 0.6915, "step": 3513 }, { "epoch": 0.37, "grad_norm": 2.542726679655711, "learning_rate": 7.266839045782914e-06, "loss": 0.6684, "step": 3514 }, { "epoch": 0.37, "grad_norm": 2.7964307299094453, "learning_rate": 7.265319886553223e-06, "loss": 0.594, "step": 3515 }, { "epoch": 0.37, "grad_norm": 2.37740356376484, "learning_rate": 7.263800464145214e-06, "loss": 0.7419, "step": 3516 }, { "epoch": 0.37, "grad_norm": 3.485584641652323, "learning_rate": 7.262280778735412e-06, "loss": 0.6703, "step": 3517 }, { "epoch": 0.37, "grad_norm": 3.4839449059905907, "learning_rate": 7.26076083050037e-06, "loss": 0.675, "step": 3518 }, { "epoch": 0.37, "grad_norm": 2.12300936003989, "learning_rate": 7.259240619616668e-06, "loss": 0.6224, "step": 3519 }, { "epoch": 0.37, "grad_norm": 2.6719730804814863, "learning_rate": 7.257720146260923e-06, "loss": 0.659, "step": 3520 }, { "epoch": 0.37, "grad_norm": 3.73292327896022, "learning_rate": 7.256199410609776e-06, "loss": 0.6476, "step": 3521 }, { "epoch": 0.37, "grad_norm": 4.636014633612924, "learning_rate": 7.254678412839905e-06, "loss": 0.6879, "step": 3522 }, { "epoch": 0.37, "grad_norm": 2.5306894259549075, "learning_rate": 7.253157153128012e-06, "loss": 0.6785, "step": 3523 }, { "epoch": 0.37, "grad_norm": 3.976569034282962, "learning_rate": 7.251635631650838e-06, "loss": 0.7064, "step": 3524 }, { "epoch": 0.37, "grad_norm": 2.45250759237423, "learning_rate": 7.250113848585141e-06, "loss": 0.6499, "step": 3525 }, { "epoch": 0.37, "grad_norm": 2.060166639945123, "learning_rate": 7.248591804107724e-06, "loss": 0.6757, "step": 3526 }, { "epoch": 0.37, "grad_norm": 5.716487897796609, "learning_rate": 7.247069498395409e-06, "loss": 0.6512, "step": 3527 }, { "epoch": 0.37, "grad_norm": 3.4915603318297346, "learning_rate": 7.245546931625057e-06, "loss": 0.6518, "step": 3528 }, { "epoch": 0.37, "grad_norm": 2.657417413138783, "learning_rate": 7.244024103973553e-06, "loss": 0.6662, "step": 3529 }, { "epoch": 0.37, "grad_norm": 4.6985543615408805, "learning_rate": 7.242501015617815e-06, "loss": 0.5643, "step": 3530 }, { "epoch": 0.37, "grad_norm": 2.7512005261618566, "learning_rate": 7.240977666734793e-06, "loss": 0.6004, "step": 3531 }, { "epoch": 0.37, "grad_norm": 2.3062901989306015, "learning_rate": 7.239454057501462e-06, "loss": 0.6528, "step": 3532 }, { "epoch": 0.37, "grad_norm": 3.1690879827237803, "learning_rate": 7.237930188094834e-06, "loss": 0.6433, "step": 3533 }, { "epoch": 0.37, "grad_norm": 2.189006985940273, "learning_rate": 7.236406058691944e-06, "loss": 0.6005, "step": 3534 }, { "epoch": 0.37, "grad_norm": 2.5436760311571067, "learning_rate": 7.234881669469864e-06, "loss": 0.6815, "step": 3535 }, { "epoch": 0.37, "grad_norm": 2.5440441226370254, "learning_rate": 7.233357020605692e-06, "loss": 0.5973, "step": 3536 }, { "epoch": 0.37, "grad_norm": 2.8717435554951423, "learning_rate": 7.2318321122765575e-06, "loss": 0.7263, "step": 3537 }, { "epoch": 0.37, "grad_norm": 2.5676017996288674, "learning_rate": 7.230306944659618e-06, "loss": 0.6429, "step": 3538 }, { "epoch": 0.37, "grad_norm": 3.320166184610786, "learning_rate": 7.2287815179320665e-06, "loss": 0.7412, "step": 3539 }, { "epoch": 0.37, "grad_norm": 4.887680699145069, "learning_rate": 7.227255832271122e-06, "loss": 0.7, "step": 3540 }, { "epoch": 0.37, "grad_norm": 2.2913081303644938, "learning_rate": 7.225729887854032e-06, "loss": 0.6098, "step": 3541 }, { "epoch": 0.37, "grad_norm": 4.5364276369948415, "learning_rate": 7.224203684858078e-06, "loss": 0.639, "step": 3542 }, { "epoch": 0.37, "grad_norm": 2.9611456139801065, "learning_rate": 7.222677223460567e-06, "loss": 0.626, "step": 3543 }, { "epoch": 0.37, "grad_norm": 4.607908981424635, "learning_rate": 7.221150503838844e-06, "loss": 0.6371, "step": 3544 }, { "epoch": 0.37, "grad_norm": 2.6691678732909114, "learning_rate": 7.219623526170275e-06, "loss": 0.6519, "step": 3545 }, { "epoch": 0.37, "grad_norm": 2.914049840880877, "learning_rate": 7.218096290632263e-06, "loss": 0.6737, "step": 3546 }, { "epoch": 0.37, "grad_norm": 4.551071513521762, "learning_rate": 7.216568797402232e-06, "loss": 0.616, "step": 3547 }, { "epoch": 0.37, "grad_norm": 2.5813083708314255, "learning_rate": 7.2150410466576495e-06, "loss": 0.7268, "step": 3548 }, { "epoch": 0.37, "grad_norm": 3.0795987810833583, "learning_rate": 7.213513038575999e-06, "loss": 0.6526, "step": 3549 }, { "epoch": 0.37, "grad_norm": 2.2721784087350216, "learning_rate": 7.211984773334803e-06, "loss": 0.6302, "step": 3550 }, { "epoch": 0.37, "grad_norm": 1.928510250413681, "learning_rate": 7.210456251111611e-06, "loss": 0.6411, "step": 3551 }, { "epoch": 0.37, "grad_norm": 4.445810874079125, "learning_rate": 7.208927472084e-06, "loss": 0.6141, "step": 3552 }, { "epoch": 0.37, "grad_norm": 2.3202477177734657, "learning_rate": 7.207398436429581e-06, "loss": 0.6578, "step": 3553 }, { "epoch": 0.37, "grad_norm": 4.305688026429538, "learning_rate": 7.205869144325992e-06, "loss": 0.6092, "step": 3554 }, { "epoch": 0.37, "grad_norm": 2.346775916923627, "learning_rate": 7.204339595950904e-06, "loss": 0.5894, "step": 3555 }, { "epoch": 0.37, "grad_norm": 3.8477133741441674, "learning_rate": 7.202809791482013e-06, "loss": 0.6427, "step": 3556 }, { "epoch": 0.37, "grad_norm": 3.235974425228947, "learning_rate": 7.201279731097048e-06, "loss": 0.6674, "step": 3557 }, { "epoch": 0.37, "grad_norm": 2.2493046168393445, "learning_rate": 7.199749414973767e-06, "loss": 0.6528, "step": 3558 }, { "epoch": 0.37, "grad_norm": 1.107170893614456, "learning_rate": 7.1982188432899595e-06, "loss": 0.5877, "step": 3559 }, { "epoch": 0.37, "grad_norm": 3.3599103332329485, "learning_rate": 7.196688016223439e-06, "loss": 0.7369, "step": 3560 }, { "epoch": 0.37, "grad_norm": 2.4363610701892657, "learning_rate": 7.195156933952055e-06, "loss": 0.6762, "step": 3561 }, { "epoch": 0.37, "grad_norm": 2.7535120357249556, "learning_rate": 7.193625596653684e-06, "loss": 0.6812, "step": 3562 }, { "epoch": 0.37, "grad_norm": 2.099885019771948, "learning_rate": 7.1920940045062335e-06, "loss": 0.6133, "step": 3563 }, { "epoch": 0.38, "grad_norm": 3.93938976849278, "learning_rate": 7.1905621576876375e-06, "loss": 0.6401, "step": 3564 }, { "epoch": 0.38, "grad_norm": 2.4121806622384656, "learning_rate": 7.189030056375862e-06, "loss": 0.629, "step": 3565 }, { "epoch": 0.38, "grad_norm": 2.832137503000308, "learning_rate": 7.187497700748903e-06, "loss": 0.7839, "step": 3566 }, { "epoch": 0.38, "grad_norm": 2.892305433301476, "learning_rate": 7.185965090984783e-06, "loss": 0.6009, "step": 3567 }, { "epoch": 0.38, "grad_norm": 2.701267405207593, "learning_rate": 7.184432227261561e-06, "loss": 0.7288, "step": 3568 }, { "epoch": 0.38, "grad_norm": 3.065332041666608, "learning_rate": 7.182899109757314e-06, "loss": 0.6107, "step": 3569 }, { "epoch": 0.38, "grad_norm": 2.3541441748774177, "learning_rate": 7.181365738650161e-06, "loss": 0.6027, "step": 3570 }, { "epoch": 0.38, "grad_norm": 2.271716533928777, "learning_rate": 7.17983211411824e-06, "loss": 0.6606, "step": 3571 }, { "epoch": 0.38, "grad_norm": 3.88759903004812, "learning_rate": 7.178298236339727e-06, "loss": 0.5967, "step": 3572 }, { "epoch": 0.38, "grad_norm": 5.01835950623007, "learning_rate": 7.176764105492821e-06, "loss": 0.6285, "step": 3573 }, { "epoch": 0.38, "grad_norm": 2.7434215149338397, "learning_rate": 7.175229721755753e-06, "loss": 0.6626, "step": 3574 }, { "epoch": 0.38, "grad_norm": 3.346997092680918, "learning_rate": 7.173695085306785e-06, "loss": 0.7167, "step": 3575 }, { "epoch": 0.38, "grad_norm": 3.140529020266596, "learning_rate": 7.172160196324205e-06, "loss": 0.658, "step": 3576 }, { "epoch": 0.38, "grad_norm": 4.120165751814182, "learning_rate": 7.1706250549863335e-06, "loss": 0.69, "step": 3577 }, { "epoch": 0.38, "grad_norm": 2.2247914008766796, "learning_rate": 7.1690896614715155e-06, "loss": 0.6288, "step": 3578 }, { "epoch": 0.38, "grad_norm": 2.605629864739022, "learning_rate": 7.167554015958133e-06, "loss": 0.6357, "step": 3579 }, { "epoch": 0.38, "grad_norm": 2.3904906015835947, "learning_rate": 7.166018118624588e-06, "loss": 0.5556, "step": 3580 }, { "epoch": 0.38, "grad_norm": 2.6192217487542586, "learning_rate": 7.164481969649323e-06, "loss": 0.6576, "step": 3581 }, { "epoch": 0.38, "grad_norm": 5.076216402779878, "learning_rate": 7.162945569210796e-06, "loss": 0.6749, "step": 3582 }, { "epoch": 0.38, "grad_norm": 4.889072644554949, "learning_rate": 7.161408917487509e-06, "loss": 0.6673, "step": 3583 }, { "epoch": 0.38, "grad_norm": 4.537856847059156, "learning_rate": 7.159872014657978e-06, "loss": 0.6752, "step": 3584 }, { "epoch": 0.38, "grad_norm": 2.221005642218842, "learning_rate": 7.158334860900762e-06, "loss": 0.5711, "step": 3585 }, { "epoch": 0.38, "grad_norm": 2.4533316517178387, "learning_rate": 7.156797456394441e-06, "loss": 0.7351, "step": 3586 }, { "epoch": 0.38, "grad_norm": 2.886040834314495, "learning_rate": 7.1552598013176264e-06, "loss": 0.6767, "step": 3587 }, { "epoch": 0.38, "grad_norm": 2.874147523804201, "learning_rate": 7.1537218958489575e-06, "loss": 0.6642, "step": 3588 }, { "epoch": 0.38, "grad_norm": 2.722531139220571, "learning_rate": 7.152183740167105e-06, "loss": 0.6678, "step": 3589 }, { "epoch": 0.38, "grad_norm": 3.187200392218844, "learning_rate": 7.150645334450767e-06, "loss": 0.7528, "step": 3590 }, { "epoch": 0.38, "grad_norm": 2.3536496010454147, "learning_rate": 7.14910667887867e-06, "loss": 0.6902, "step": 3591 }, { "epoch": 0.38, "grad_norm": 2.0650748967886057, "learning_rate": 7.147567773629573e-06, "loss": 0.6505, "step": 3592 }, { "epoch": 0.38, "grad_norm": 2.86825048250956, "learning_rate": 7.146028618882258e-06, "loss": 0.7501, "step": 3593 }, { "epoch": 0.38, "grad_norm": 2.5434125470032756, "learning_rate": 7.1444892148155445e-06, "loss": 0.7357, "step": 3594 }, { "epoch": 0.38, "grad_norm": 2.4145813864328374, "learning_rate": 7.14294956160827e-06, "loss": 0.613, "step": 3595 }, { "epoch": 0.38, "grad_norm": 2.141187484264333, "learning_rate": 7.141409659439313e-06, "loss": 0.6496, "step": 3596 }, { "epoch": 0.38, "grad_norm": 2.9374788957358136, "learning_rate": 7.139869508487569e-06, "loss": 0.6382, "step": 3597 }, { "epoch": 0.38, "grad_norm": 2.4608281779178722, "learning_rate": 7.138329108931974e-06, "loss": 0.6155, "step": 3598 }, { "epoch": 0.38, "grad_norm": 2.9810869246330727, "learning_rate": 7.136788460951482e-06, "loss": 0.6844, "step": 3599 }, { "epoch": 0.38, "grad_norm": 2.261119179477242, "learning_rate": 7.135247564725085e-06, "loss": 0.6355, "step": 3600 }, { "epoch": 0.38, "grad_norm": 2.655641062191612, "learning_rate": 7.133706420431799e-06, "loss": 0.578, "step": 3601 }, { "epoch": 0.38, "grad_norm": 1.0452192913458778, "learning_rate": 7.132165028250666e-06, "loss": 0.6081, "step": 3602 }, { "epoch": 0.38, "grad_norm": 2.5869892334643882, "learning_rate": 7.130623388360767e-06, "loss": 0.7272, "step": 3603 }, { "epoch": 0.38, "grad_norm": 2.7640836198719785, "learning_rate": 7.129081500941199e-06, "loss": 0.6978, "step": 3604 }, { "epoch": 0.38, "grad_norm": 2.768985249973699, "learning_rate": 7.127539366171099e-06, "loss": 0.5961, "step": 3605 }, { "epoch": 0.38, "grad_norm": 3.9567725278063506, "learning_rate": 7.125996984229623e-06, "loss": 0.685, "step": 3606 }, { "epoch": 0.38, "grad_norm": 2.879031446261374, "learning_rate": 7.124454355295966e-06, "loss": 0.6856, "step": 3607 }, { "epoch": 0.38, "grad_norm": 2.4194323937650752, "learning_rate": 7.1229114795493405e-06, "loss": 0.6894, "step": 3608 }, { "epoch": 0.38, "grad_norm": 0.9796362738749906, "learning_rate": 7.121368357168997e-06, "loss": 0.5798, "step": 3609 }, { "epoch": 0.38, "grad_norm": 2.51120233981379, "learning_rate": 7.11982498833421e-06, "loss": 0.7307, "step": 3610 }, { "epoch": 0.38, "grad_norm": 0.9959995491929295, "learning_rate": 7.1182813732242835e-06, "loss": 0.5706, "step": 3611 }, { "epoch": 0.38, "grad_norm": 2.693687958001663, "learning_rate": 7.116737512018551e-06, "loss": 0.598, "step": 3612 }, { "epoch": 0.38, "grad_norm": 2.298264703609383, "learning_rate": 7.115193404896372e-06, "loss": 0.6928, "step": 3613 }, { "epoch": 0.38, "grad_norm": 4.7510712526228716, "learning_rate": 7.1136490520371394e-06, "loss": 0.6345, "step": 3614 }, { "epoch": 0.38, "grad_norm": 2.545812637717511, "learning_rate": 7.112104453620269e-06, "loss": 0.7136, "step": 3615 }, { "epoch": 0.38, "grad_norm": 2.4425094561499003, "learning_rate": 7.11055960982521e-06, "loss": 0.6611, "step": 3616 }, { "epoch": 0.38, "grad_norm": 3.2838270941484837, "learning_rate": 7.109014520831433e-06, "loss": 0.6004, "step": 3617 }, { "epoch": 0.38, "grad_norm": 3.6464684533156175, "learning_rate": 7.10746918681845e-06, "loss": 0.7205, "step": 3618 }, { "epoch": 0.38, "grad_norm": 2.494247980704131, "learning_rate": 7.105923607965786e-06, "loss": 0.6274, "step": 3619 }, { "epoch": 0.38, "grad_norm": 2.4050078671251107, "learning_rate": 7.104377784453005e-06, "loss": 0.6664, "step": 3620 }, { "epoch": 0.38, "grad_norm": 2.404246434896617, "learning_rate": 7.102831716459696e-06, "loss": 0.5954, "step": 3621 }, { "epoch": 0.38, "grad_norm": 2.623044062785884, "learning_rate": 7.101285404165478e-06, "loss": 0.6359, "step": 3622 }, { "epoch": 0.38, "grad_norm": 2.5546602923700013, "learning_rate": 7.099738847749995e-06, "loss": 0.6142, "step": 3623 }, { "epoch": 0.38, "grad_norm": 2.5511546021541984, "learning_rate": 7.098192047392923e-06, "loss": 0.6326, "step": 3624 }, { "epoch": 0.38, "grad_norm": 2.863354029549815, "learning_rate": 7.096645003273964e-06, "loss": 0.7335, "step": 3625 }, { "epoch": 0.38, "grad_norm": 2.6001421531585835, "learning_rate": 7.095097715572849e-06, "loss": 0.6356, "step": 3626 }, { "epoch": 0.38, "grad_norm": 2.6189887318128875, "learning_rate": 7.093550184469339e-06, "loss": 0.6734, "step": 3627 }, { "epoch": 0.38, "grad_norm": 2.9686532488381214, "learning_rate": 7.092002410143218e-06, "loss": 0.6598, "step": 3628 }, { "epoch": 0.38, "grad_norm": 2.885198141114095, "learning_rate": 7.0904543927743066e-06, "loss": 0.7185, "step": 3629 }, { "epoch": 0.38, "grad_norm": 2.655688880025789, "learning_rate": 7.088906132542446e-06, "loss": 0.6541, "step": 3630 }, { "epoch": 0.38, "grad_norm": 2.7287819033410154, "learning_rate": 7.0873576296275096e-06, "loss": 0.6549, "step": 3631 }, { "epoch": 0.38, "grad_norm": 2.6674782827937946, "learning_rate": 7.085808884209396e-06, "loss": 0.5593, "step": 3632 }, { "epoch": 0.38, "grad_norm": 2.3031490250344473, "learning_rate": 7.084259896468038e-06, "loss": 0.6337, "step": 3633 }, { "epoch": 0.38, "grad_norm": 1.243801593410752, "learning_rate": 7.082710666583389e-06, "loss": 0.5897, "step": 3634 }, { "epoch": 0.38, "grad_norm": 3.3382949550635908, "learning_rate": 7.081161194735435e-06, "loss": 0.7044, "step": 3635 }, { "epoch": 0.38, "grad_norm": 3.2631769374260293, "learning_rate": 7.0796114811041905e-06, "loss": 0.6655, "step": 3636 }, { "epoch": 0.38, "grad_norm": 1.1036281150291933, "learning_rate": 7.078061525869695e-06, "loss": 0.6465, "step": 3637 }, { "epoch": 0.38, "grad_norm": 2.7686792433647867, "learning_rate": 7.076511329212019e-06, "loss": 0.674, "step": 3638 }, { "epoch": 0.38, "grad_norm": 3.7550739067072625, "learning_rate": 7.074960891311258e-06, "loss": 0.6734, "step": 3639 }, { "epoch": 0.38, "grad_norm": 2.177433134571054, "learning_rate": 7.073410212347541e-06, "loss": 0.6277, "step": 3640 }, { "epoch": 0.38, "grad_norm": 2.7882879054667487, "learning_rate": 7.071859292501018e-06, "loss": 0.6493, "step": 3641 }, { "epoch": 0.38, "grad_norm": 3.434366748004624, "learning_rate": 7.070308131951872e-06, "loss": 0.6723, "step": 3642 }, { "epoch": 0.38, "grad_norm": 2.7276152477544935, "learning_rate": 7.068756730880311e-06, "loss": 0.622, "step": 3643 }, { "epoch": 0.38, "grad_norm": 3.0417680257370083, "learning_rate": 7.067205089466574e-06, "loss": 0.6209, "step": 3644 }, { "epoch": 0.38, "grad_norm": 2.321254791228394, "learning_rate": 7.065653207890924e-06, "loss": 0.6661, "step": 3645 }, { "epoch": 0.38, "grad_norm": 2.8612826916827196, "learning_rate": 7.064101086333657e-06, "loss": 0.6949, "step": 3646 }, { "epoch": 0.38, "grad_norm": 3.1520069324380366, "learning_rate": 7.0625487249750915e-06, "loss": 0.6501, "step": 3647 }, { "epoch": 0.38, "grad_norm": 3.964198431485837, "learning_rate": 7.060996123995576e-06, "loss": 0.6407, "step": 3648 }, { "epoch": 0.38, "grad_norm": 2.93236095260122, "learning_rate": 7.059443283575492e-06, "loss": 0.5939, "step": 3649 }, { "epoch": 0.38, "grad_norm": 2.6552920819656887, "learning_rate": 7.0578902038952375e-06, "loss": 0.6932, "step": 3650 }, { "epoch": 0.38, "grad_norm": 2.9297734442881906, "learning_rate": 7.056336885135251e-06, "loss": 0.6419, "step": 3651 }, { "epoch": 0.38, "grad_norm": 2.6224581309833943, "learning_rate": 7.054783327475987e-06, "loss": 0.675, "step": 3652 }, { "epoch": 0.38, "grad_norm": 2.7160780202997263, "learning_rate": 7.053229531097937e-06, "loss": 0.656, "step": 3653 }, { "epoch": 0.38, "grad_norm": 3.3774431250824373, "learning_rate": 7.051675496181614e-06, "loss": 0.5999, "step": 3654 }, { "epoch": 0.38, "grad_norm": 1.2783376063724854, "learning_rate": 7.050121222907564e-06, "loss": 0.5744, "step": 3655 }, { "epoch": 0.38, "grad_norm": 2.3589367318753514, "learning_rate": 7.048566711456355e-06, "loss": 0.6371, "step": 3656 }, { "epoch": 0.38, "grad_norm": 2.6774468417048696, "learning_rate": 7.047011962008589e-06, "loss": 0.6447, "step": 3657 }, { "epoch": 0.38, "grad_norm": 2.4342842413375734, "learning_rate": 7.04545697474489e-06, "loss": 0.6903, "step": 3658 }, { "epoch": 0.39, "grad_norm": 2.252872706576466, "learning_rate": 7.043901749845913e-06, "loss": 0.6952, "step": 3659 }, { "epoch": 0.39, "grad_norm": 2.3175374076123303, "learning_rate": 7.042346287492339e-06, "loss": 0.708, "step": 3660 }, { "epoch": 0.39, "grad_norm": 2.634929578368015, "learning_rate": 7.040790587864875e-06, "loss": 0.6424, "step": 3661 }, { "epoch": 0.39, "grad_norm": 2.618636727946655, "learning_rate": 7.039234651144262e-06, "loss": 0.6591, "step": 3662 }, { "epoch": 0.39, "grad_norm": 2.904861681615746, "learning_rate": 7.037678477511261e-06, "loss": 0.7491, "step": 3663 }, { "epoch": 0.39, "grad_norm": 2.5649713303543047, "learning_rate": 7.036122067146667e-06, "loss": 0.703, "step": 3664 }, { "epoch": 0.39, "grad_norm": 3.220337768555499, "learning_rate": 7.034565420231294e-06, "loss": 0.7034, "step": 3665 }, { "epoch": 0.39, "grad_norm": 2.390572089955541, "learning_rate": 7.033008536945994e-06, "loss": 0.6504, "step": 3666 }, { "epoch": 0.39, "grad_norm": 3.7954821263514296, "learning_rate": 7.031451417471638e-06, "loss": 0.6958, "step": 3667 }, { "epoch": 0.39, "grad_norm": 2.8545469996532997, "learning_rate": 7.029894061989128e-06, "loss": 0.7058, "step": 3668 }, { "epoch": 0.39, "grad_norm": 2.741678996189431, "learning_rate": 7.0283364706793954e-06, "loss": 0.6367, "step": 3669 }, { "epoch": 0.39, "grad_norm": 2.677654834887371, "learning_rate": 7.026778643723393e-06, "loss": 0.6911, "step": 3670 }, { "epoch": 0.39, "grad_norm": 2.061593434569085, "learning_rate": 7.025220581302108e-06, "loss": 0.6083, "step": 3671 }, { "epoch": 0.39, "grad_norm": 4.4271014763245535, "learning_rate": 7.02366228359655e-06, "loss": 0.632, "step": 3672 }, { "epoch": 0.39, "grad_norm": 8.875367995763348, "learning_rate": 7.022103750787759e-06, "loss": 0.7067, "step": 3673 }, { "epoch": 0.39, "grad_norm": 2.7873572210988407, "learning_rate": 7.020544983056796e-06, "loss": 0.7194, "step": 3674 }, { "epoch": 0.39, "grad_norm": 2.662020767735063, "learning_rate": 7.0189859805847615e-06, "loss": 0.6952, "step": 3675 }, { "epoch": 0.39, "grad_norm": 3.8783628852915077, "learning_rate": 7.017426743552769e-06, "loss": 0.6343, "step": 3676 }, { "epoch": 0.39, "grad_norm": 3.5280735743445337, "learning_rate": 7.015867272141972e-06, "loss": 0.6424, "step": 3677 }, { "epoch": 0.39, "grad_norm": 2.457030504295436, "learning_rate": 7.014307566533541e-06, "loss": 0.6985, "step": 3678 }, { "epoch": 0.39, "grad_norm": 2.102622932285392, "learning_rate": 7.0127476269086796e-06, "loss": 0.6618, "step": 3679 }, { "epoch": 0.39, "grad_norm": 2.953298825950946, "learning_rate": 7.011187453448617e-06, "loss": 0.6986, "step": 3680 }, { "epoch": 0.39, "grad_norm": 2.2057102088439478, "learning_rate": 7.009627046334611e-06, "loss": 0.7022, "step": 3681 }, { "epoch": 0.39, "grad_norm": 2.414332975549559, "learning_rate": 7.008066405747943e-06, "loss": 0.6429, "step": 3682 }, { "epoch": 0.39, "grad_norm": 2.484038423521051, "learning_rate": 7.006505531869925e-06, "loss": 0.7395, "step": 3683 }, { "epoch": 0.39, "grad_norm": 2.441293974717065, "learning_rate": 7.004944424881894e-06, "loss": 0.6179, "step": 3684 }, { "epoch": 0.39, "grad_norm": 2.12476887533077, "learning_rate": 7.003383084965215e-06, "loss": 0.5975, "step": 3685 }, { "epoch": 0.39, "grad_norm": 2.4875957020716335, "learning_rate": 7.001821512301283e-06, "loss": 0.701, "step": 3686 }, { "epoch": 0.39, "grad_norm": 2.2622990175164963, "learning_rate": 7.000259707071512e-06, "loss": 0.7091, "step": 3687 }, { "epoch": 0.39, "grad_norm": 3.4195893855491506, "learning_rate": 6.9986976694573515e-06, "loss": 0.6841, "step": 3688 }, { "epoch": 0.39, "grad_norm": 3.6453885611711816, "learning_rate": 6.997135399640273e-06, "loss": 0.6114, "step": 3689 }, { "epoch": 0.39, "grad_norm": 2.129611856788084, "learning_rate": 6.9955728978017775e-06, "loss": 0.6429, "step": 3690 }, { "epoch": 0.39, "grad_norm": 2.2384526562197875, "learning_rate": 6.99401016412339e-06, "loss": 0.7059, "step": 3691 }, { "epoch": 0.39, "grad_norm": 2.206543127801006, "learning_rate": 6.992447198786666e-06, "loss": 0.6889, "step": 3692 }, { "epoch": 0.39, "grad_norm": 2.5820156224344046, "learning_rate": 6.990884001973187e-06, "loss": 0.5918, "step": 3693 }, { "epoch": 0.39, "grad_norm": 2.705800904753045, "learning_rate": 6.98932057386456e-06, "loss": 0.6739, "step": 3694 }, { "epoch": 0.39, "grad_norm": 3.2287782223395998, "learning_rate": 6.987756914642418e-06, "loss": 0.5849, "step": 3695 }, { "epoch": 0.39, "grad_norm": 3.4820384692081654, "learning_rate": 6.986193024488423e-06, "loss": 0.6841, "step": 3696 }, { "epoch": 0.39, "grad_norm": 2.90798808346997, "learning_rate": 6.984628903584266e-06, "loss": 0.6208, "step": 3697 }, { "epoch": 0.39, "grad_norm": 5.045138928005131, "learning_rate": 6.983064552111658e-06, "loss": 0.6761, "step": 3698 }, { "epoch": 0.39, "grad_norm": 2.763765878384353, "learning_rate": 6.981499970252345e-06, "loss": 0.6429, "step": 3699 }, { "epoch": 0.39, "grad_norm": 3.8969396881884393, "learning_rate": 6.979935158188091e-06, "loss": 0.6543, "step": 3700 }, { "epoch": 0.39, "grad_norm": 2.144538256745873, "learning_rate": 6.9783701161006965e-06, "loss": 0.6848, "step": 3701 }, { "epoch": 0.39, "grad_norm": 2.2691334424929632, "learning_rate": 6.976804844171978e-06, "loss": 0.682, "step": 3702 }, { "epoch": 0.39, "grad_norm": 2.853203277677684, "learning_rate": 6.975239342583789e-06, "loss": 0.5798, "step": 3703 }, { "epoch": 0.39, "grad_norm": 0.984156618342594, "learning_rate": 6.973673611518003e-06, "loss": 0.6253, "step": 3704 }, { "epoch": 0.39, "grad_norm": 3.6403380840996964, "learning_rate": 6.972107651156521e-06, "loss": 0.7099, "step": 3705 }, { "epoch": 0.39, "grad_norm": 3.0398511893391715, "learning_rate": 6.970541461681274e-06, "loss": 0.6499, "step": 3706 }, { "epoch": 0.39, "grad_norm": 2.5614405274937453, "learning_rate": 6.968975043274215e-06, "loss": 0.5853, "step": 3707 }, { "epoch": 0.39, "grad_norm": 2.34052750735692, "learning_rate": 6.9674083961173276e-06, "loss": 0.7104, "step": 3708 }, { "epoch": 0.39, "grad_norm": 2.5089454252540015, "learning_rate": 6.96584152039262e-06, "loss": 0.6719, "step": 3709 }, { "epoch": 0.39, "grad_norm": 2.9541879261553783, "learning_rate": 6.964274416282129e-06, "loss": 0.6482, "step": 3710 }, { "epoch": 0.39, "grad_norm": 2.3885655104600265, "learning_rate": 6.962707083967911e-06, "loss": 0.5981, "step": 3711 }, { "epoch": 0.39, "grad_norm": 2.4321001454354527, "learning_rate": 6.9611395236320615e-06, "loss": 0.6967, "step": 3712 }, { "epoch": 0.39, "grad_norm": 1.1448344326564757, "learning_rate": 6.959571735456687e-06, "loss": 0.6432, "step": 3713 }, { "epoch": 0.39, "grad_norm": 2.652298799399858, "learning_rate": 6.958003719623936e-06, "loss": 0.7021, "step": 3714 }, { "epoch": 0.39, "grad_norm": 2.2558034126837194, "learning_rate": 6.956435476315972e-06, "loss": 0.7334, "step": 3715 }, { "epoch": 0.39, "grad_norm": 2.089987806720731, "learning_rate": 6.9548670057149896e-06, "loss": 0.6904, "step": 3716 }, { "epoch": 0.39, "grad_norm": 3.0275655323101884, "learning_rate": 6.953298308003209e-06, "loss": 0.6388, "step": 3717 }, { "epoch": 0.39, "grad_norm": 2.6883335061425595, "learning_rate": 6.9517293833628785e-06, "loss": 0.666, "step": 3718 }, { "epoch": 0.39, "grad_norm": 4.102080658442691, "learning_rate": 6.950160231976269e-06, "loss": 0.6471, "step": 3719 }, { "epoch": 0.39, "grad_norm": 2.547523830477747, "learning_rate": 6.948590854025681e-06, "loss": 0.6384, "step": 3720 }, { "epoch": 0.39, "grad_norm": 2.9940684741992807, "learning_rate": 6.947021249693442e-06, "loss": 0.6313, "step": 3721 }, { "epoch": 0.39, "grad_norm": 3.388166051557115, "learning_rate": 6.9454514191619e-06, "loss": 0.6452, "step": 3722 }, { "epoch": 0.39, "grad_norm": 2.5275961581874453, "learning_rate": 6.9438813626134395e-06, "loss": 0.5957, "step": 3723 }, { "epoch": 0.39, "grad_norm": 2.9456477007058033, "learning_rate": 6.942311080230458e-06, "loss": 0.6231, "step": 3724 }, { "epoch": 0.39, "grad_norm": 2.085258818767328, "learning_rate": 6.940740572195392e-06, "loss": 0.6739, "step": 3725 }, { "epoch": 0.39, "grad_norm": 2.189417657638878, "learning_rate": 6.939169838690695e-06, "loss": 0.6297, "step": 3726 }, { "epoch": 0.39, "grad_norm": 2.502191462085905, "learning_rate": 6.937598879898853e-06, "loss": 0.6238, "step": 3727 }, { "epoch": 0.39, "grad_norm": 2.4771620597343773, "learning_rate": 6.936027696002373e-06, "loss": 0.7406, "step": 3728 }, { "epoch": 0.39, "grad_norm": 2.3821967150760526, "learning_rate": 6.934456287183793e-06, "loss": 0.7131, "step": 3729 }, { "epoch": 0.39, "grad_norm": 2.537080782846546, "learning_rate": 6.932884653625672e-06, "loss": 0.587, "step": 3730 }, { "epoch": 0.39, "grad_norm": 4.044830093724147, "learning_rate": 6.931312795510601e-06, "loss": 0.6819, "step": 3731 }, { "epoch": 0.39, "grad_norm": 2.060390451396102, "learning_rate": 6.929740713021192e-06, "loss": 0.6625, "step": 3732 }, { "epoch": 0.39, "grad_norm": 2.1547981802320297, "learning_rate": 6.928168406340082e-06, "loss": 0.7245, "step": 3733 }, { "epoch": 0.39, "grad_norm": 2.526368800735357, "learning_rate": 6.926595875649944e-06, "loss": 0.6918, "step": 3734 }, { "epoch": 0.39, "grad_norm": 2.751346484300469, "learning_rate": 6.925023121133465e-06, "loss": 0.5705, "step": 3735 }, { "epoch": 0.39, "grad_norm": 3.5756950930937323, "learning_rate": 6.923450142973366e-06, "loss": 0.7081, "step": 3736 }, { "epoch": 0.39, "grad_norm": 2.3081997699324925, "learning_rate": 6.921876941352388e-06, "loss": 0.6519, "step": 3737 }, { "epoch": 0.39, "grad_norm": 3.6782336966893183, "learning_rate": 6.920303516453302e-06, "loss": 0.6644, "step": 3738 }, { "epoch": 0.39, "grad_norm": 2.1786958735173174, "learning_rate": 6.918729868458905e-06, "loss": 0.6086, "step": 3739 }, { "epoch": 0.39, "grad_norm": 1.1080710822626574, "learning_rate": 6.91715599755202e-06, "loss": 0.5619, "step": 3740 }, { "epoch": 0.39, "grad_norm": 3.3882194917466837, "learning_rate": 6.9155819039154914e-06, "loss": 0.6884, "step": 3741 }, { "epoch": 0.39, "grad_norm": 2.2474420615290263, "learning_rate": 6.9140075877321955e-06, "loss": 0.7152, "step": 3742 }, { "epoch": 0.39, "grad_norm": 2.3178490077460467, "learning_rate": 6.91243304918503e-06, "loss": 0.7279, "step": 3743 }, { "epoch": 0.39, "grad_norm": 2.915704362648865, "learning_rate": 6.9108582884569206e-06, "loss": 0.6947, "step": 3744 }, { "epoch": 0.39, "grad_norm": 2.4243198642597314, "learning_rate": 6.909283305730822e-06, "loss": 0.7129, "step": 3745 }, { "epoch": 0.39, "grad_norm": 1.961387262313486, "learning_rate": 6.907708101189705e-06, "loss": 0.7032, "step": 3746 }, { "epoch": 0.39, "grad_norm": 2.7889776855961554, "learning_rate": 6.906132675016577e-06, "loss": 0.5773, "step": 3747 }, { "epoch": 0.39, "grad_norm": 4.106825781647384, "learning_rate": 6.904557027394464e-06, "loss": 0.5643, "step": 3748 }, { "epoch": 0.39, "grad_norm": 4.128759223475058, "learning_rate": 6.902981158506421e-06, "loss": 0.6652, "step": 3749 }, { "epoch": 0.39, "grad_norm": 1.0212995877258428, "learning_rate": 6.90140506853553e-06, "loss": 0.5981, "step": 3750 }, { "epoch": 0.39, "grad_norm": 2.410233497856306, "learning_rate": 6.899828757664892e-06, "loss": 0.6367, "step": 3751 }, { "epoch": 0.39, "grad_norm": 2.5548357809398037, "learning_rate": 6.898252226077642e-06, "loss": 0.6518, "step": 3752 }, { "epoch": 0.39, "grad_norm": 1.8928951894823056, "learning_rate": 6.896675473956935e-06, "loss": 0.6251, "step": 3753 }, { "epoch": 0.4, "grad_norm": 2.8686199304183004, "learning_rate": 6.895098501485955e-06, "loss": 0.6949, "step": 3754 }, { "epoch": 0.4, "grad_norm": 2.4005163798318754, "learning_rate": 6.8935213088479096e-06, "loss": 0.6532, "step": 3755 }, { "epoch": 0.4, "grad_norm": 1.083088075895648, "learning_rate": 6.891943896226031e-06, "loss": 0.6361, "step": 3756 }, { "epoch": 0.4, "grad_norm": 2.3207947203497694, "learning_rate": 6.890366263803579e-06, "loss": 0.6775, "step": 3757 }, { "epoch": 0.4, "grad_norm": 2.8029690208205515, "learning_rate": 6.88878841176384e-06, "loss": 0.614, "step": 3758 }, { "epoch": 0.4, "grad_norm": 1.913955180413663, "learning_rate": 6.887210340290124e-06, "loss": 0.6718, "step": 3759 }, { "epoch": 0.4, "grad_norm": 2.4336755807961237, "learning_rate": 6.885632049565766e-06, "loss": 0.6733, "step": 3760 }, { "epoch": 0.4, "grad_norm": 3.0818816161132827, "learning_rate": 6.884053539774125e-06, "loss": 0.6907, "step": 3761 }, { "epoch": 0.4, "grad_norm": 4.6742613246704785, "learning_rate": 6.882474811098592e-06, "loss": 0.6829, "step": 3762 }, { "epoch": 0.4, "grad_norm": 1.9849851340007725, "learning_rate": 6.880895863722576e-06, "loss": 0.7172, "step": 3763 }, { "epoch": 0.4, "grad_norm": 3.0447314238097247, "learning_rate": 6.8793166978295166e-06, "loss": 0.631, "step": 3764 }, { "epoch": 0.4, "grad_norm": 3.91470175770067, "learning_rate": 6.877737313602876e-06, "loss": 0.6654, "step": 3765 }, { "epoch": 0.4, "grad_norm": 2.1785559232413867, "learning_rate": 6.8761577112261425e-06, "loss": 0.6318, "step": 3766 }, { "epoch": 0.4, "grad_norm": 2.669250198673815, "learning_rate": 6.874577890882829e-06, "loss": 0.6375, "step": 3767 }, { "epoch": 0.4, "grad_norm": 2.9415389760168593, "learning_rate": 6.872997852756474e-06, "loss": 0.6013, "step": 3768 }, { "epoch": 0.4, "grad_norm": 2.4526688080708596, "learning_rate": 6.871417597030644e-06, "loss": 0.6684, "step": 3769 }, { "epoch": 0.4, "grad_norm": 2.788823894332462, "learning_rate": 6.869837123888926e-06, "loss": 0.6253, "step": 3770 }, { "epoch": 0.4, "grad_norm": 4.477851029463746, "learning_rate": 6.868256433514938e-06, "loss": 0.7383, "step": 3771 }, { "epoch": 0.4, "grad_norm": 2.269796038141395, "learning_rate": 6.866675526092317e-06, "loss": 0.7487, "step": 3772 }, { "epoch": 0.4, "grad_norm": 2.648425239153188, "learning_rate": 6.86509440180473e-06, "loss": 0.7038, "step": 3773 }, { "epoch": 0.4, "grad_norm": 2.375374473672006, "learning_rate": 6.863513060835866e-06, "loss": 0.6327, "step": 3774 }, { "epoch": 0.4, "grad_norm": 1.1344879105982528, "learning_rate": 6.861931503369441e-06, "loss": 0.6503, "step": 3775 }, { "epoch": 0.4, "grad_norm": 3.840442440534222, "learning_rate": 6.8603497295891975e-06, "loss": 0.5909, "step": 3776 }, { "epoch": 0.4, "grad_norm": 3.9504848651490403, "learning_rate": 6.8587677396789e-06, "loss": 0.6947, "step": 3777 }, { "epoch": 0.4, "grad_norm": 2.764177497995866, "learning_rate": 6.8571855338223395e-06, "loss": 0.6062, "step": 3778 }, { "epoch": 0.4, "grad_norm": 3.440160369843259, "learning_rate": 6.855603112203333e-06, "loss": 0.6853, "step": 3779 }, { "epoch": 0.4, "grad_norm": 2.2362406157836525, "learning_rate": 6.854020475005719e-06, "loss": 0.6329, "step": 3780 }, { "epoch": 0.4, "grad_norm": 1.1361521836950608, "learning_rate": 6.852437622413366e-06, "loss": 0.5591, "step": 3781 }, { "epoch": 0.4, "grad_norm": 2.2958928068872213, "learning_rate": 6.850854554610167e-06, "loss": 0.7093, "step": 3782 }, { "epoch": 0.4, "grad_norm": 2.311428807633486, "learning_rate": 6.849271271780034e-06, "loss": 0.6284, "step": 3783 }, { "epoch": 0.4, "grad_norm": 2.8763588330667194, "learning_rate": 6.847687774106911e-06, "loss": 0.6576, "step": 3784 }, { "epoch": 0.4, "grad_norm": 2.382119861964838, "learning_rate": 6.846104061774763e-06, "loss": 0.6577, "step": 3785 }, { "epoch": 0.4, "grad_norm": 2.0655614717842052, "learning_rate": 6.8445201349675825e-06, "loss": 0.6832, "step": 3786 }, { "epoch": 0.4, "grad_norm": 2.8485737316689894, "learning_rate": 6.842935993869385e-06, "loss": 0.6554, "step": 3787 }, { "epoch": 0.4, "grad_norm": 2.6641033052353102, "learning_rate": 6.841351638664211e-06, "loss": 0.7066, "step": 3788 }, { "epoch": 0.4, "grad_norm": 2.636994092546543, "learning_rate": 6.839767069536126e-06, "loss": 0.5704, "step": 3789 }, { "epoch": 0.4, "grad_norm": 2.1059693268960524, "learning_rate": 6.838182286669222e-06, "loss": 0.6862, "step": 3790 }, { "epoch": 0.4, "grad_norm": 2.389714565065946, "learning_rate": 6.8365972902476115e-06, "loss": 0.6674, "step": 3791 }, { "epoch": 0.4, "grad_norm": 2.3867920333404187, "learning_rate": 6.835012080455439e-06, "loss": 0.6521, "step": 3792 }, { "epoch": 0.4, "grad_norm": 2.6159918503535615, "learning_rate": 6.833426657476866e-06, "loss": 0.6519, "step": 3793 }, { "epoch": 0.4, "grad_norm": 2.226258850434491, "learning_rate": 6.831841021496084e-06, "loss": 0.7133, "step": 3794 }, { "epoch": 0.4, "grad_norm": 2.899461642291162, "learning_rate": 6.830255172697309e-06, "loss": 0.6671, "step": 3795 }, { "epoch": 0.4, "grad_norm": 2.3305187723569447, "learning_rate": 6.828669111264776e-06, "loss": 0.6724, "step": 3796 }, { "epoch": 0.4, "grad_norm": 2.4055301005112946, "learning_rate": 6.8270828373827536e-06, "loss": 0.8054, "step": 3797 }, { "epoch": 0.4, "grad_norm": 2.2070086468105874, "learning_rate": 6.825496351235528e-06, "loss": 0.6663, "step": 3798 }, { "epoch": 0.4, "grad_norm": 2.465861609693202, "learning_rate": 6.823909653007414e-06, "loss": 0.6668, "step": 3799 }, { "epoch": 0.4, "grad_norm": 2.7446295867469264, "learning_rate": 6.8223227428827485e-06, "loss": 0.6663, "step": 3800 }, { "epoch": 0.4, "grad_norm": 2.2768346355361744, "learning_rate": 6.820735621045895e-06, "loss": 0.6675, "step": 3801 }, { "epoch": 0.4, "grad_norm": 2.3820614356193843, "learning_rate": 6.81914828768124e-06, "loss": 0.689, "step": 3802 }, { "epoch": 0.4, "grad_norm": 2.993505071374713, "learning_rate": 6.817560742973196e-06, "loss": 0.7702, "step": 3803 }, { "epoch": 0.4, "grad_norm": 2.25574301607733, "learning_rate": 6.8159729871061984e-06, "loss": 0.6731, "step": 3804 }, { "epoch": 0.4, "grad_norm": 3.880274384584933, "learning_rate": 6.814385020264708e-06, "loss": 0.7064, "step": 3805 }, { "epoch": 0.4, "grad_norm": 2.6969745237833136, "learning_rate": 6.812796842633213e-06, "loss": 0.7117, "step": 3806 }, { "epoch": 0.4, "grad_norm": 2.10614106669541, "learning_rate": 6.811208454396218e-06, "loss": 0.7491, "step": 3807 }, { "epoch": 0.4, "grad_norm": 4.469718689532213, "learning_rate": 6.809619855738262e-06, "loss": 0.6633, "step": 3808 }, { "epoch": 0.4, "grad_norm": 2.6641844592932244, "learning_rate": 6.8080310468439015e-06, "loss": 0.6784, "step": 3809 }, { "epoch": 0.4, "grad_norm": 6.960823525262656, "learning_rate": 6.806442027897722e-06, "loss": 0.6114, "step": 3810 }, { "epoch": 0.4, "grad_norm": 1.963680773700924, "learning_rate": 6.804852799084329e-06, "loss": 0.6952, "step": 3811 }, { "epoch": 0.4, "grad_norm": 2.327360252561889, "learning_rate": 6.803263360588355e-06, "loss": 0.6871, "step": 3812 }, { "epoch": 0.4, "grad_norm": 2.6946911855419358, "learning_rate": 6.801673712594456e-06, "loss": 0.7192, "step": 3813 }, { "epoch": 0.4, "grad_norm": 2.7849793322013197, "learning_rate": 6.8000838552873135e-06, "loss": 0.6403, "step": 3814 }, { "epoch": 0.4, "grad_norm": 2.3535336364449804, "learning_rate": 6.7984937888516325e-06, "loss": 0.708, "step": 3815 }, { "epoch": 0.4, "grad_norm": 4.483684306418722, "learning_rate": 6.796903513472142e-06, "loss": 0.6605, "step": 3816 }, { "epoch": 0.4, "grad_norm": 3.157785406165776, "learning_rate": 6.795313029333596e-06, "loss": 0.576, "step": 3817 }, { "epoch": 0.4, "grad_norm": 2.243032193761467, "learning_rate": 6.79372233662077e-06, "loss": 0.5717, "step": 3818 }, { "epoch": 0.4, "grad_norm": 3.0101289906232367, "learning_rate": 6.79213143551847e-06, "loss": 0.5542, "step": 3819 }, { "epoch": 0.4, "grad_norm": 3.8401966485891643, "learning_rate": 6.790540326211519e-06, "loss": 0.6712, "step": 3820 }, { "epoch": 0.4, "grad_norm": 2.757074235173517, "learning_rate": 6.78894900888477e-06, "loss": 0.6846, "step": 3821 }, { "epoch": 0.4, "grad_norm": 4.674263473701696, "learning_rate": 6.787357483723096e-06, "loss": 0.7419, "step": 3822 }, { "epoch": 0.4, "grad_norm": 2.606110641324874, "learning_rate": 6.785765750911396e-06, "loss": 0.6317, "step": 3823 }, { "epoch": 0.4, "grad_norm": 2.1675322487659723, "learning_rate": 6.7841738106345935e-06, "loss": 0.6373, "step": 3824 }, { "epoch": 0.4, "grad_norm": 2.2312923273279153, "learning_rate": 6.782581663077637e-06, "loss": 0.6132, "step": 3825 }, { "epoch": 0.4, "grad_norm": 2.6581842644039164, "learning_rate": 6.780989308425493e-06, "loss": 0.6928, "step": 3826 }, { "epoch": 0.4, "grad_norm": 2.246504818054555, "learning_rate": 6.77939674686316e-06, "loss": 0.6127, "step": 3827 }, { "epoch": 0.4, "grad_norm": 2.7698626969798905, "learning_rate": 6.7778039785756575e-06, "loss": 0.6525, "step": 3828 }, { "epoch": 0.4, "grad_norm": 2.4450311084573038, "learning_rate": 6.7762110037480265e-06, "loss": 0.6343, "step": 3829 }, { "epoch": 0.4, "grad_norm": 4.123523847157009, "learning_rate": 6.774617822565338e-06, "loss": 0.6966, "step": 3830 }, { "epoch": 0.4, "grad_norm": 2.7410973711990247, "learning_rate": 6.773024435212678e-06, "loss": 0.6834, "step": 3831 }, { "epoch": 0.4, "grad_norm": 10.729971927019776, "learning_rate": 6.771430841875166e-06, "loss": 0.7335, "step": 3832 }, { "epoch": 0.4, "grad_norm": 2.461919317112508, "learning_rate": 6.7698370427379405e-06, "loss": 0.7462, "step": 3833 }, { "epoch": 0.4, "grad_norm": 2.764361831143325, "learning_rate": 6.7682430379861615e-06, "loss": 0.6316, "step": 3834 }, { "epoch": 0.4, "grad_norm": 2.09261926247929, "learning_rate": 6.766648827805019e-06, "loss": 0.6006, "step": 3835 }, { "epoch": 0.4, "grad_norm": 3.804234407107111, "learning_rate": 6.765054412379722e-06, "loss": 0.6852, "step": 3836 }, { "epoch": 0.4, "grad_norm": 2.798217238649048, "learning_rate": 6.763459791895506e-06, "loss": 0.6397, "step": 3837 }, { "epoch": 0.4, "grad_norm": 2.6552060440599377, "learning_rate": 6.761864966537629e-06, "loss": 0.5773, "step": 3838 }, { "epoch": 0.4, "grad_norm": 2.0551205343106513, "learning_rate": 6.760269936491373e-06, "loss": 0.7127, "step": 3839 }, { "epoch": 0.4, "grad_norm": 2.2555093947538323, "learning_rate": 6.7586747019420444e-06, "loss": 0.6463, "step": 3840 }, { "epoch": 0.4, "grad_norm": 2.508518018543992, "learning_rate": 6.7570792630749725e-06, "loss": 0.6151, "step": 3841 }, { "epoch": 0.4, "grad_norm": 2.2115480509362024, "learning_rate": 6.755483620075509e-06, "loss": 0.6153, "step": 3842 }, { "epoch": 0.4, "grad_norm": 2.5571778252409416, "learning_rate": 6.753887773129036e-06, "loss": 0.6822, "step": 3843 }, { "epoch": 0.4, "grad_norm": 3.3827224492687704, "learning_rate": 6.752291722420951e-06, "loss": 0.6651, "step": 3844 }, { "epoch": 0.4, "grad_norm": 5.068263890882636, "learning_rate": 6.750695468136679e-06, "loss": 0.7441, "step": 3845 }, { "epoch": 0.4, "grad_norm": 2.599488472042048, "learning_rate": 6.7490990104616684e-06, "loss": 0.6053, "step": 3846 }, { "epoch": 0.4, "grad_norm": 5.068319078405129, "learning_rate": 6.74750234958139e-06, "loss": 0.6563, "step": 3847 }, { "epoch": 0.4, "grad_norm": 3.4661100791938293, "learning_rate": 6.745905485681341e-06, "loss": 0.605, "step": 3848 }, { "epoch": 0.41, "grad_norm": 2.8783604735605857, "learning_rate": 6.7443084189470385e-06, "loss": 0.6415, "step": 3849 }, { "epoch": 0.41, "grad_norm": 4.159785400047253, "learning_rate": 6.742711149564028e-06, "loss": 0.7075, "step": 3850 }, { "epoch": 0.41, "grad_norm": 2.5896401663997524, "learning_rate": 6.741113677717872e-06, "loss": 0.7611, "step": 3851 }, { "epoch": 0.41, "grad_norm": 3.423614667213638, "learning_rate": 6.7395160035941624e-06, "loss": 0.7156, "step": 3852 }, { "epoch": 0.41, "grad_norm": 2.874352439060714, "learning_rate": 6.737918127378511e-06, "loss": 0.5727, "step": 3853 }, { "epoch": 0.41, "grad_norm": 2.2310646192841106, "learning_rate": 6.736320049256557e-06, "loss": 0.6234, "step": 3854 }, { "epoch": 0.41, "grad_norm": 3.078116761718466, "learning_rate": 6.734721769413959e-06, "loss": 0.6838, "step": 3855 }, { "epoch": 0.41, "grad_norm": 3.047167191192513, "learning_rate": 6.733123288036399e-06, "loss": 0.739, "step": 3856 }, { "epoch": 0.41, "grad_norm": 2.98595396084378, "learning_rate": 6.731524605309587e-06, "loss": 0.6049, "step": 3857 }, { "epoch": 0.41, "grad_norm": 2.152132778242366, "learning_rate": 6.72992572141925e-06, "loss": 0.6779, "step": 3858 }, { "epoch": 0.41, "grad_norm": 3.411218764669821, "learning_rate": 6.728326636551145e-06, "loss": 0.6093, "step": 3859 }, { "epoch": 0.41, "grad_norm": 2.1157066121453587, "learning_rate": 6.726727350891047e-06, "loss": 0.6165, "step": 3860 }, { "epoch": 0.41, "grad_norm": 2.068433705431225, "learning_rate": 6.725127864624757e-06, "loss": 0.5947, "step": 3861 }, { "epoch": 0.41, "grad_norm": 2.8708815594354373, "learning_rate": 6.723528177938097e-06, "loss": 0.6354, "step": 3862 }, { "epoch": 0.41, "grad_norm": 3.33467858012365, "learning_rate": 6.721928291016917e-06, "loss": 0.7318, "step": 3863 }, { "epoch": 0.41, "grad_norm": 2.803975371742299, "learning_rate": 6.720328204047085e-06, "loss": 0.6814, "step": 3864 }, { "epoch": 0.41, "grad_norm": 1.1642857039266927, "learning_rate": 6.718727917214496e-06, "loss": 0.6067, "step": 3865 }, { "epoch": 0.41, "grad_norm": 2.80771219731655, "learning_rate": 6.7171274307050645e-06, "loss": 0.6535, "step": 3866 }, { "epoch": 0.41, "grad_norm": 2.0379961191421874, "learning_rate": 6.715526744704732e-06, "loss": 0.6385, "step": 3867 }, { "epoch": 0.41, "grad_norm": 2.2538626104452626, "learning_rate": 6.7139258593994625e-06, "loss": 0.6347, "step": 3868 }, { "epoch": 0.41, "grad_norm": 2.5560569234327355, "learning_rate": 6.712324774975241e-06, "loss": 0.6909, "step": 3869 }, { "epoch": 0.41, "grad_norm": 2.4724982923209953, "learning_rate": 6.710723491618077e-06, "loss": 0.676, "step": 3870 }, { "epoch": 0.41, "grad_norm": 2.692509799497406, "learning_rate": 6.709122009514003e-06, "loss": 0.673, "step": 3871 }, { "epoch": 0.41, "grad_norm": 2.2919297459345502, "learning_rate": 6.707520328849074e-06, "loss": 0.6929, "step": 3872 }, { "epoch": 0.41, "grad_norm": 2.882493860740183, "learning_rate": 6.7059184498093696e-06, "loss": 0.6791, "step": 3873 }, { "epoch": 0.41, "grad_norm": 3.6357067311902735, "learning_rate": 6.70431637258099e-06, "loss": 0.7293, "step": 3874 }, { "epoch": 0.41, "grad_norm": 8.322268264406318, "learning_rate": 6.702714097350063e-06, "loss": 0.6628, "step": 3875 }, { "epoch": 0.41, "grad_norm": 2.4783117896301388, "learning_rate": 6.701111624302732e-06, "loss": 0.6576, "step": 3876 }, { "epoch": 0.41, "grad_norm": 2.5768699541037674, "learning_rate": 6.69950895362517e-06, "loss": 0.7493, "step": 3877 }, { "epoch": 0.41, "grad_norm": 5.682147247687123, "learning_rate": 6.697906085503572e-06, "loss": 0.6796, "step": 3878 }, { "epoch": 0.41, "grad_norm": 2.2802199978055913, "learning_rate": 6.696303020124152e-06, "loss": 0.6168, "step": 3879 }, { "epoch": 0.41, "grad_norm": 2.264182484401708, "learning_rate": 6.694699757673151e-06, "loss": 0.6551, "step": 3880 }, { "epoch": 0.41, "grad_norm": 2.5897290859559083, "learning_rate": 6.693096298336832e-06, "loss": 0.6541, "step": 3881 }, { "epoch": 0.41, "grad_norm": 2.05265338675788, "learning_rate": 6.691492642301478e-06, "loss": 0.6983, "step": 3882 }, { "epoch": 0.41, "grad_norm": 2.55619801462537, "learning_rate": 6.6898887897533985e-06, "loss": 0.7378, "step": 3883 }, { "epoch": 0.41, "grad_norm": 3.7122207089430885, "learning_rate": 6.6882847408789255e-06, "loss": 0.6772, "step": 3884 }, { "epoch": 0.41, "grad_norm": 2.8904289019733236, "learning_rate": 6.686680495864411e-06, "loss": 0.6001, "step": 3885 }, { "epoch": 0.41, "grad_norm": 2.1642154485237306, "learning_rate": 6.6850760548962315e-06, "loss": 0.7032, "step": 3886 }, { "epoch": 0.41, "grad_norm": 9.085180115876389, "learning_rate": 6.683471418160787e-06, "loss": 0.6206, "step": 3887 }, { "epoch": 0.41, "grad_norm": 2.33182419264645, "learning_rate": 6.681866585844501e-06, "loss": 0.5902, "step": 3888 }, { "epoch": 0.41, "grad_norm": 2.158864307849004, "learning_rate": 6.680261558133816e-06, "loss": 0.5937, "step": 3889 }, { "epoch": 0.41, "grad_norm": 2.7965197556825463, "learning_rate": 6.678656335215199e-06, "loss": 0.6601, "step": 3890 }, { "epoch": 0.41, "grad_norm": 2.8204118744480993, "learning_rate": 6.677050917275143e-06, "loss": 0.6472, "step": 3891 }, { "epoch": 0.41, "grad_norm": 3.4806015074983594, "learning_rate": 6.675445304500159e-06, "loss": 0.6548, "step": 3892 }, { "epoch": 0.41, "grad_norm": 1.1229091161769098, "learning_rate": 6.673839497076783e-06, "loss": 0.5833, "step": 3893 }, { "epoch": 0.41, "grad_norm": 3.1338450581533546, "learning_rate": 6.672233495191572e-06, "loss": 0.6849, "step": 3894 }, { "epoch": 0.41, "grad_norm": 2.3209497422936622, "learning_rate": 6.670627299031109e-06, "loss": 0.7225, "step": 3895 }, { "epoch": 0.41, "grad_norm": 2.76307749364276, "learning_rate": 6.669020908781994e-06, "loss": 0.6026, "step": 3896 }, { "epoch": 0.41, "grad_norm": 2.4047612077616933, "learning_rate": 6.667414324630856e-06, "loss": 0.6428, "step": 3897 }, { "epoch": 0.41, "grad_norm": 8.962203572754492, "learning_rate": 6.665807546764341e-06, "loss": 0.6385, "step": 3898 }, { "epoch": 0.41, "grad_norm": 2.3629283595989983, "learning_rate": 6.664200575369121e-06, "loss": 0.6119, "step": 3899 }, { "epoch": 0.41, "grad_norm": 3.1049318573672307, "learning_rate": 6.662593410631888e-06, "loss": 0.6005, "step": 3900 }, { "epoch": 0.41, "grad_norm": 2.1219922103429094, "learning_rate": 6.660986052739357e-06, "loss": 0.6908, "step": 3901 }, { "epoch": 0.41, "grad_norm": 5.5048628027957625, "learning_rate": 6.659378501878271e-06, "loss": 0.5462, "step": 3902 }, { "epoch": 0.41, "grad_norm": 2.081524730634284, "learning_rate": 6.657770758235387e-06, "loss": 0.6065, "step": 3903 }, { "epoch": 0.41, "grad_norm": 2.286231378737835, "learning_rate": 6.656162821997487e-06, "loss": 0.6641, "step": 3904 }, { "epoch": 0.41, "grad_norm": 3.018088065090707, "learning_rate": 6.654554693351379e-06, "loss": 0.5698, "step": 3905 }, { "epoch": 0.41, "grad_norm": 2.0133508154340487, "learning_rate": 6.652946372483889e-06, "loss": 0.6064, "step": 3906 }, { "epoch": 0.41, "grad_norm": 2.681912809689596, "learning_rate": 6.651337859581868e-06, "loss": 0.6913, "step": 3907 }, { "epoch": 0.41, "grad_norm": 3.0310073690024075, "learning_rate": 6.6497291548321876e-06, "loss": 0.6523, "step": 3908 }, { "epoch": 0.41, "grad_norm": 2.3565442897512345, "learning_rate": 6.648120258421744e-06, "loss": 0.6395, "step": 3909 }, { "epoch": 0.41, "grad_norm": 2.52692979561079, "learning_rate": 6.646511170537452e-06, "loss": 0.6792, "step": 3910 }, { "epoch": 0.41, "grad_norm": 1.21140083501598, "learning_rate": 6.644901891366251e-06, "loss": 0.5643, "step": 3911 }, { "epoch": 0.41, "grad_norm": 1.0497136827512572, "learning_rate": 6.643292421095105e-06, "loss": 0.5934, "step": 3912 }, { "epoch": 0.41, "grad_norm": 3.4574095439368064, "learning_rate": 6.641682759910993e-06, "loss": 0.6165, "step": 3913 }, { "epoch": 0.41, "grad_norm": 3.1072803681357684, "learning_rate": 6.640072908000926e-06, "loss": 0.7101, "step": 3914 }, { "epoch": 0.41, "grad_norm": 2.449280579698595, "learning_rate": 6.638462865551929e-06, "loss": 0.631, "step": 3915 }, { "epoch": 0.41, "grad_norm": 2.300143552458361, "learning_rate": 6.6368526327510515e-06, "loss": 0.6856, "step": 3916 }, { "epoch": 0.41, "grad_norm": 2.1842280323695746, "learning_rate": 6.635242209785369e-06, "loss": 0.7335, "step": 3917 }, { "epoch": 0.41, "grad_norm": 4.49088278011825, "learning_rate": 6.633631596841972e-06, "loss": 0.7089, "step": 3918 }, { "epoch": 0.41, "grad_norm": 2.9551946728374507, "learning_rate": 6.63202079410798e-06, "loss": 0.7198, "step": 3919 }, { "epoch": 0.41, "grad_norm": 2.5169889269326773, "learning_rate": 6.630409801770528e-06, "loss": 0.6483, "step": 3920 }, { "epoch": 0.41, "grad_norm": 2.5244649430685646, "learning_rate": 6.6287986200167785e-06, "loss": 0.6174, "step": 3921 }, { "epoch": 0.41, "grad_norm": 2.6269573809565423, "learning_rate": 6.627187249033915e-06, "loss": 0.6274, "step": 3922 }, { "epoch": 0.41, "grad_norm": 2.855773301785785, "learning_rate": 6.6255756890091394e-06, "loss": 0.6258, "step": 3923 }, { "epoch": 0.41, "grad_norm": 2.4211612959153817, "learning_rate": 6.6239639401296796e-06, "loss": 0.6101, "step": 3924 }, { "epoch": 0.41, "grad_norm": 2.259452031681648, "learning_rate": 6.622352002582783e-06, "loss": 0.6934, "step": 3925 }, { "epoch": 0.41, "grad_norm": 3.0056254388720265, "learning_rate": 6.620739876555721e-06, "loss": 0.7024, "step": 3926 }, { "epoch": 0.41, "grad_norm": 2.950640775072797, "learning_rate": 6.619127562235786e-06, "loss": 0.7432, "step": 3927 }, { "epoch": 0.41, "grad_norm": 2.111457506555049, "learning_rate": 6.61751505981029e-06, "loss": 0.6785, "step": 3928 }, { "epoch": 0.41, "grad_norm": 5.205473900545925, "learning_rate": 6.615902369466571e-06, "loss": 0.6155, "step": 3929 }, { "epoch": 0.41, "grad_norm": 1.1442082817667982, "learning_rate": 6.614289491391985e-06, "loss": 0.6085, "step": 3930 }, { "epoch": 0.41, "grad_norm": 3.097758491032023, "learning_rate": 6.612676425773914e-06, "loss": 0.6214, "step": 3931 }, { "epoch": 0.41, "grad_norm": 4.102972042758122, "learning_rate": 6.6110631727997566e-06, "loss": 0.6056, "step": 3932 }, { "epoch": 0.41, "grad_norm": 1.9937462472618483, "learning_rate": 6.609449732656936e-06, "loss": 0.6597, "step": 3933 }, { "epoch": 0.41, "grad_norm": 2.5897301973961366, "learning_rate": 6.6078361055328986e-06, "loss": 0.6214, "step": 3934 }, { "epoch": 0.41, "grad_norm": 1.0149618917681156, "learning_rate": 6.606222291615112e-06, "loss": 0.6023, "step": 3935 }, { "epoch": 0.41, "grad_norm": 2.253201954300004, "learning_rate": 6.60460829109106e-06, "loss": 0.6611, "step": 3936 }, { "epoch": 0.41, "grad_norm": 2.9592197675312657, "learning_rate": 6.602994104148256e-06, "loss": 0.7145, "step": 3937 }, { "epoch": 0.41, "grad_norm": 2.2284657591030883, "learning_rate": 6.601379730974231e-06, "loss": 0.618, "step": 3938 }, { "epoch": 0.41, "grad_norm": 4.557356204052841, "learning_rate": 6.599765171756538e-06, "loss": 0.6578, "step": 3939 }, { "epoch": 0.41, "grad_norm": 6.091674295085208, "learning_rate": 6.598150426682752e-06, "loss": 0.6179, "step": 3940 }, { "epoch": 0.41, "grad_norm": 2.0618356021886943, "learning_rate": 6.596535495940468e-06, "loss": 0.6168, "step": 3941 }, { "epoch": 0.41, "grad_norm": 2.4424041643505827, "learning_rate": 6.594920379717307e-06, "loss": 0.6277, "step": 3942 }, { "epoch": 0.41, "grad_norm": 2.6813309064292485, "learning_rate": 6.593305078200907e-06, "loss": 0.6051, "step": 3943 }, { "epoch": 0.42, "grad_norm": 2.067586995607296, "learning_rate": 6.591689591578927e-06, "loss": 0.631, "step": 3944 }, { "epoch": 0.42, "grad_norm": 2.1568079707086967, "learning_rate": 6.590073920039052e-06, "loss": 0.6446, "step": 3945 }, { "epoch": 0.42, "grad_norm": 2.7031629573317324, "learning_rate": 6.588458063768985e-06, "loss": 0.6328, "step": 3946 }, { "epoch": 0.42, "grad_norm": 2.406212144524127, "learning_rate": 6.586842022956453e-06, "loss": 0.6518, "step": 3947 }, { "epoch": 0.42, "grad_norm": 4.66916693541006, "learning_rate": 6.585225797789201e-06, "loss": 0.6624, "step": 3948 }, { "epoch": 0.42, "grad_norm": 2.2582234615243375, "learning_rate": 6.583609388454998e-06, "loss": 0.555, "step": 3949 }, { "epoch": 0.42, "grad_norm": 2.7199146063706134, "learning_rate": 6.581992795141634e-06, "loss": 0.6526, "step": 3950 }, { "epoch": 0.42, "grad_norm": 2.5390407294947397, "learning_rate": 6.580376018036921e-06, "loss": 0.6671, "step": 3951 }, { "epoch": 0.42, "grad_norm": 4.7476904706174, "learning_rate": 6.578759057328691e-06, "loss": 0.6829, "step": 3952 }, { "epoch": 0.42, "grad_norm": 2.5382661667053528, "learning_rate": 6.5771419132047965e-06, "loss": 0.6645, "step": 3953 }, { "epoch": 0.42, "grad_norm": 2.4043076498085805, "learning_rate": 6.5755245858531135e-06, "loss": 0.5974, "step": 3954 }, { "epoch": 0.42, "grad_norm": 3.0677814480436205, "learning_rate": 6.573907075461538e-06, "loss": 0.6752, "step": 3955 }, { "epoch": 0.42, "grad_norm": 2.3299465309321636, "learning_rate": 6.57228938221799e-06, "loss": 0.7099, "step": 3956 }, { "epoch": 0.42, "grad_norm": 3.5992623283864615, "learning_rate": 6.5706715063104065e-06, "loss": 0.7091, "step": 3957 }, { "epoch": 0.42, "grad_norm": 3.1185187454894776, "learning_rate": 6.569053447926746e-06, "loss": 0.5476, "step": 3958 }, { "epoch": 0.42, "grad_norm": 3.4670327552541123, "learning_rate": 6.567435207254993e-06, "loss": 0.679, "step": 3959 }, { "epoch": 0.42, "grad_norm": 4.602615705445102, "learning_rate": 6.565816784483147e-06, "loss": 0.6774, "step": 3960 }, { "epoch": 0.42, "grad_norm": 5.735033391286663, "learning_rate": 6.564198179799234e-06, "loss": 0.6466, "step": 3961 }, { "epoch": 0.42, "grad_norm": 3.8877061902133954, "learning_rate": 6.5625793933912985e-06, "loss": 0.594, "step": 3962 }, { "epoch": 0.42, "grad_norm": 2.814956124692221, "learning_rate": 6.5609604254474065e-06, "loss": 0.6185, "step": 3963 }, { "epoch": 0.42, "grad_norm": 1.1484699815057058, "learning_rate": 6.559341276155644e-06, "loss": 0.6048, "step": 3964 }, { "epoch": 0.42, "grad_norm": 2.872534078697945, "learning_rate": 6.55772194570412e-06, "loss": 0.6446, "step": 3965 }, { "epoch": 0.42, "grad_norm": 5.229090156228293, "learning_rate": 6.5561024342809625e-06, "loss": 0.6325, "step": 3966 }, { "epoch": 0.42, "grad_norm": 2.5077624702277195, "learning_rate": 6.554482742074323e-06, "loss": 0.633, "step": 3967 }, { "epoch": 0.42, "grad_norm": 2.6547530593774695, "learning_rate": 6.552862869272371e-06, "loss": 0.6035, "step": 3968 }, { "epoch": 0.42, "grad_norm": 3.367826287135605, "learning_rate": 6.551242816063302e-06, "loss": 0.6763, "step": 3969 }, { "epoch": 0.42, "grad_norm": 2.4066120293512174, "learning_rate": 6.5496225826353254e-06, "loss": 0.6515, "step": 3970 }, { "epoch": 0.42, "grad_norm": 2.5260789922901496, "learning_rate": 6.548002169176677e-06, "loss": 0.651, "step": 3971 }, { "epoch": 0.42, "grad_norm": 2.3066437486350457, "learning_rate": 6.54638157587561e-06, "loss": 0.5797, "step": 3972 }, { "epoch": 0.42, "grad_norm": 2.181813637695597, "learning_rate": 6.544760802920402e-06, "loss": 0.6741, "step": 3973 }, { "epoch": 0.42, "grad_norm": 3.3250608385498466, "learning_rate": 6.543139850499352e-06, "loss": 0.6409, "step": 3974 }, { "epoch": 0.42, "grad_norm": 2.3644447611901835, "learning_rate": 6.5415187188007726e-06, "loss": 0.6754, "step": 3975 }, { "epoch": 0.42, "grad_norm": 2.405053664018577, "learning_rate": 6.539897408013005e-06, "loss": 0.6084, "step": 3976 }, { "epoch": 0.42, "grad_norm": 2.1188551440111305, "learning_rate": 6.538275918324408e-06, "loss": 0.7079, "step": 3977 }, { "epoch": 0.42, "grad_norm": 2.439917462899966, "learning_rate": 6.536654249923361e-06, "loss": 0.6137, "step": 3978 }, { "epoch": 0.42, "grad_norm": 2.6239270579211404, "learning_rate": 6.535032402998266e-06, "loss": 0.5237, "step": 3979 }, { "epoch": 0.42, "grad_norm": 3.4664442974636303, "learning_rate": 6.533410377737544e-06, "loss": 0.6955, "step": 3980 }, { "epoch": 0.42, "grad_norm": 2.3044985882207807, "learning_rate": 6.531788174329636e-06, "loss": 0.5246, "step": 3981 }, { "epoch": 0.42, "grad_norm": 2.007003515117586, "learning_rate": 6.5301657929630055e-06, "loss": 0.6039, "step": 3982 }, { "epoch": 0.42, "grad_norm": 1.2164380112159745, "learning_rate": 6.5285432338261365e-06, "loss": 0.616, "step": 3983 }, { "epoch": 0.42, "grad_norm": 2.257824172908572, "learning_rate": 6.526920497107535e-06, "loss": 0.7186, "step": 3984 }, { "epoch": 0.42, "grad_norm": 1.093528093998501, "learning_rate": 6.525297582995722e-06, "loss": 0.5979, "step": 3985 }, { "epoch": 0.42, "grad_norm": 2.3370494508343467, "learning_rate": 6.523674491679246e-06, "loss": 0.639, "step": 3986 }, { "epoch": 0.42, "grad_norm": 3.1729179222230477, "learning_rate": 6.522051223346672e-06, "loss": 0.6788, "step": 3987 }, { "epoch": 0.42, "grad_norm": 4.462532611308276, "learning_rate": 6.520427778186586e-06, "loss": 0.7091, "step": 3988 }, { "epoch": 0.42, "grad_norm": 2.673478312971706, "learning_rate": 6.518804156387597e-06, "loss": 0.6831, "step": 3989 }, { "epoch": 0.42, "grad_norm": 2.9158479320514985, "learning_rate": 6.517180358138332e-06, "loss": 0.6926, "step": 3990 }, { "epoch": 0.42, "grad_norm": 2.635411987857093, "learning_rate": 6.515556383627437e-06, "loss": 0.666, "step": 3991 }, { "epoch": 0.42, "grad_norm": 2.2626676614651906, "learning_rate": 6.513932233043584e-06, "loss": 0.7135, "step": 3992 }, { "epoch": 0.42, "grad_norm": 2.83263520238375, "learning_rate": 6.512307906575459e-06, "loss": 0.5915, "step": 3993 }, { "epoch": 0.42, "grad_norm": 2.204046953504845, "learning_rate": 6.510683404411774e-06, "loss": 0.6271, "step": 3994 }, { "epoch": 0.42, "grad_norm": 2.593299117358978, "learning_rate": 6.509058726741258e-06, "loss": 0.6616, "step": 3995 }, { "epoch": 0.42, "grad_norm": 1.996690826929998, "learning_rate": 6.50743387375266e-06, "loss": 0.6885, "step": 3996 }, { "epoch": 0.42, "grad_norm": 12.933011472212376, "learning_rate": 6.505808845634753e-06, "loss": 0.6603, "step": 3997 }, { "epoch": 0.42, "grad_norm": 2.453187947445511, "learning_rate": 6.504183642576327e-06, "loss": 0.7046, "step": 3998 }, { "epoch": 0.42, "grad_norm": 2.4465722034806157, "learning_rate": 6.502558264766194e-06, "loss": 0.5633, "step": 3999 }, { "epoch": 0.42, "grad_norm": 2.341833452210207, "learning_rate": 6.500932712393185e-06, "loss": 0.6048, "step": 4000 }, { "epoch": 0.42, "grad_norm": 2.5314039211160955, "learning_rate": 6.499306985646152e-06, "loss": 0.6565, "step": 4001 }, { "epoch": 0.42, "grad_norm": 2.307652014877147, "learning_rate": 6.497681084713969e-06, "loss": 0.6175, "step": 4002 }, { "epoch": 0.42, "grad_norm": 3.7936798318747624, "learning_rate": 6.496055009785526e-06, "loss": 0.6992, "step": 4003 }, { "epoch": 0.42, "grad_norm": 2.6272219714248983, "learning_rate": 6.494428761049736e-06, "loss": 0.6342, "step": 4004 }, { "epoch": 0.42, "grad_norm": 2.1079646961470906, "learning_rate": 6.492802338695533e-06, "loss": 0.6606, "step": 4005 }, { "epoch": 0.42, "grad_norm": 5.297324004495899, "learning_rate": 6.491175742911869e-06, "loss": 0.6246, "step": 4006 }, { "epoch": 0.42, "grad_norm": 2.6530141636498663, "learning_rate": 6.489548973887717e-06, "loss": 0.6609, "step": 4007 }, { "epoch": 0.42, "grad_norm": 3.1063440835651397, "learning_rate": 6.4879220318120735e-06, "loss": 0.6974, "step": 4008 }, { "epoch": 0.42, "grad_norm": 2.3068250586790517, "learning_rate": 6.486294916873947e-06, "loss": 0.6624, "step": 4009 }, { "epoch": 0.42, "grad_norm": 2.459942839783369, "learning_rate": 6.484667629262375e-06, "loss": 0.6707, "step": 4010 }, { "epoch": 0.42, "grad_norm": 2.1027394322760666, "learning_rate": 6.4830401691664106e-06, "loss": 0.6924, "step": 4011 }, { "epoch": 0.42, "grad_norm": 1.1296970727448212, "learning_rate": 6.481412536775125e-06, "loss": 0.5682, "step": 4012 }, { "epoch": 0.42, "grad_norm": 2.62301953143833, "learning_rate": 6.479784732277612e-06, "loss": 0.6738, "step": 4013 }, { "epoch": 0.42, "grad_norm": 2.8179450664566996, "learning_rate": 6.478156755862988e-06, "loss": 0.6378, "step": 4014 }, { "epoch": 0.42, "grad_norm": 2.7307924595408544, "learning_rate": 6.4765286077203844e-06, "loss": 0.5877, "step": 4015 }, { "epoch": 0.42, "grad_norm": 4.571571986675738, "learning_rate": 6.4749002880389566e-06, "loss": 0.6638, "step": 4016 }, { "epoch": 0.42, "grad_norm": 2.603346420805311, "learning_rate": 6.473271797007876e-06, "loss": 0.6619, "step": 4017 }, { "epoch": 0.42, "grad_norm": 2.174453465113902, "learning_rate": 6.471643134816336e-06, "loss": 0.7298, "step": 4018 }, { "epoch": 0.42, "grad_norm": 2.5043155473625514, "learning_rate": 6.470014301653552e-06, "loss": 0.6076, "step": 4019 }, { "epoch": 0.42, "grad_norm": 2.8733996645916116, "learning_rate": 6.468385297708755e-06, "loss": 0.6422, "step": 4020 }, { "epoch": 0.42, "grad_norm": 1.0053641792809447, "learning_rate": 6.466756123171199e-06, "loss": 0.6017, "step": 4021 }, { "epoch": 0.42, "grad_norm": 2.170505948062594, "learning_rate": 6.465126778230156e-06, "loss": 0.6564, "step": 4022 }, { "epoch": 0.42, "grad_norm": 2.2632578787097453, "learning_rate": 6.46349726307492e-06, "loss": 0.6804, "step": 4023 }, { "epoch": 0.42, "grad_norm": 2.936556398536724, "learning_rate": 6.4618675778948026e-06, "loss": 0.6739, "step": 4024 }, { "epoch": 0.42, "grad_norm": 2.1638172540039973, "learning_rate": 6.4602377228791345e-06, "loss": 0.6198, "step": 4025 }, { "epoch": 0.42, "grad_norm": 2.599114630574275, "learning_rate": 6.458607698217271e-06, "loss": 0.5974, "step": 4026 }, { "epoch": 0.42, "grad_norm": 3.359851858598546, "learning_rate": 6.45697750409858e-06, "loss": 0.6207, "step": 4027 }, { "epoch": 0.42, "grad_norm": 1.0414819211011148, "learning_rate": 6.455347140712455e-06, "loss": 0.5482, "step": 4028 }, { "epoch": 0.42, "grad_norm": 1.9073262379332099, "learning_rate": 6.453716608248306e-06, "loss": 0.7179, "step": 4029 }, { "epoch": 0.42, "grad_norm": 2.894734987817326, "learning_rate": 6.452085906895564e-06, "loss": 0.6275, "step": 4030 }, { "epoch": 0.42, "grad_norm": 6.409131230764491, "learning_rate": 6.450455036843677e-06, "loss": 0.5708, "step": 4031 }, { "epoch": 0.42, "grad_norm": 2.626357432074768, "learning_rate": 6.448823998282119e-06, "loss": 0.6892, "step": 4032 }, { "epoch": 0.42, "grad_norm": 2.3318787973963424, "learning_rate": 6.447192791400375e-06, "loss": 0.6684, "step": 4033 }, { "epoch": 0.42, "grad_norm": 2.341295423290045, "learning_rate": 6.4455614163879576e-06, "loss": 0.7042, "step": 4034 }, { "epoch": 0.42, "grad_norm": 2.51877291354241, "learning_rate": 6.443929873434392e-06, "loss": 0.6783, "step": 4035 }, { "epoch": 0.42, "grad_norm": 2.530723558434043, "learning_rate": 6.442298162729229e-06, "loss": 0.652, "step": 4036 }, { "epoch": 0.42, "grad_norm": 2.544615833177302, "learning_rate": 6.4406662844620346e-06, "loss": 0.6204, "step": 4037 }, { "epoch": 0.42, "grad_norm": 2.6992866068672643, "learning_rate": 6.439034238822396e-06, "loss": 0.6114, "step": 4038 }, { "epoch": 0.43, "grad_norm": 2.361537811446634, "learning_rate": 6.4374020259999194e-06, "loss": 0.6084, "step": 4039 }, { "epoch": 0.43, "grad_norm": 3.3932419299570737, "learning_rate": 6.43576964618423e-06, "loss": 0.5435, "step": 4040 }, { "epoch": 0.43, "grad_norm": 2.2247947497001896, "learning_rate": 6.4341370995649735e-06, "loss": 0.6274, "step": 4041 }, { "epoch": 0.43, "grad_norm": 4.203698930496953, "learning_rate": 6.4325043863318136e-06, "loss": 0.6563, "step": 4042 }, { "epoch": 0.43, "grad_norm": 2.6533644667178184, "learning_rate": 6.430871506674437e-06, "loss": 0.5898, "step": 4043 }, { "epoch": 0.43, "grad_norm": 3.265730806748852, "learning_rate": 6.429238460782543e-06, "loss": 0.5698, "step": 4044 }, { "epoch": 0.43, "grad_norm": 2.1642899955947548, "learning_rate": 6.427605248845859e-06, "loss": 0.6199, "step": 4045 }, { "epoch": 0.43, "grad_norm": 2.6003134035460604, "learning_rate": 6.42597187105412e-06, "loss": 0.6055, "step": 4046 }, { "epoch": 0.43, "grad_norm": 2.2193001480951926, "learning_rate": 6.4243383275970924e-06, "loss": 0.671, "step": 4047 }, { "epoch": 0.43, "grad_norm": 3.5572778898263016, "learning_rate": 6.422704618664557e-06, "loss": 0.6435, "step": 4048 }, { "epoch": 0.43, "grad_norm": 4.222157891641835, "learning_rate": 6.42107074444631e-06, "loss": 0.6501, "step": 4049 }, { "epoch": 0.43, "grad_norm": 2.725601175114304, "learning_rate": 6.419436705132172e-06, "loss": 0.6936, "step": 4050 }, { "epoch": 0.43, "grad_norm": 2.4112413353819484, "learning_rate": 6.4178025009119825e-06, "loss": 0.6389, "step": 4051 }, { "epoch": 0.43, "grad_norm": 2.589327647378624, "learning_rate": 6.416168131975595e-06, "loss": 0.6985, "step": 4052 }, { "epoch": 0.43, "grad_norm": 2.4983806609239703, "learning_rate": 6.414533598512887e-06, "loss": 0.6664, "step": 4053 }, { "epoch": 0.43, "grad_norm": 2.240075636586892, "learning_rate": 6.412898900713757e-06, "loss": 0.6559, "step": 4054 }, { "epoch": 0.43, "grad_norm": 2.8847812663112897, "learning_rate": 6.411264038768115e-06, "loss": 0.6703, "step": 4055 }, { "epoch": 0.43, "grad_norm": 2.367558885670087, "learning_rate": 6.409629012865899e-06, "loss": 0.5792, "step": 4056 }, { "epoch": 0.43, "grad_norm": 3.3855942924416382, "learning_rate": 6.407993823197056e-06, "loss": 0.7401, "step": 4057 }, { "epoch": 0.43, "grad_norm": 3.876331931348521, "learning_rate": 6.406358469951562e-06, "loss": 0.6485, "step": 4058 }, { "epoch": 0.43, "grad_norm": 2.206685527616314, "learning_rate": 6.404722953319406e-06, "loss": 0.6424, "step": 4059 }, { "epoch": 0.43, "grad_norm": 2.075266887505169, "learning_rate": 6.403087273490599e-06, "loss": 0.6713, "step": 4060 }, { "epoch": 0.43, "grad_norm": 2.856229721355921, "learning_rate": 6.401451430655168e-06, "loss": 0.628, "step": 4061 }, { "epoch": 0.43, "grad_norm": 2.0646056376603608, "learning_rate": 6.399815425003161e-06, "loss": 0.6839, "step": 4062 }, { "epoch": 0.43, "grad_norm": 2.0561582164002683, "learning_rate": 6.398179256724644e-06, "loss": 0.6327, "step": 4063 }, { "epoch": 0.43, "grad_norm": 2.1686914862568094, "learning_rate": 6.396542926009703e-06, "loss": 0.5924, "step": 4064 }, { "epoch": 0.43, "grad_norm": 2.452702288596918, "learning_rate": 6.394906433048442e-06, "loss": 0.6387, "step": 4065 }, { "epoch": 0.43, "grad_norm": 3.400913216633786, "learning_rate": 6.3932697780309825e-06, "loss": 0.6913, "step": 4066 }, { "epoch": 0.43, "grad_norm": 2.108636102788443, "learning_rate": 6.3916329611474705e-06, "loss": 0.6406, "step": 4067 }, { "epoch": 0.43, "grad_norm": 2.411671185444273, "learning_rate": 6.389995982588061e-06, "loss": 0.6552, "step": 4068 }, { "epoch": 0.43, "grad_norm": 2.186150326143783, "learning_rate": 6.388358842542939e-06, "loss": 0.6234, "step": 4069 }, { "epoch": 0.43, "grad_norm": 4.859739939616308, "learning_rate": 6.386721541202296e-06, "loss": 0.6838, "step": 4070 }, { "epoch": 0.43, "grad_norm": 2.460365538292539, "learning_rate": 6.3850840787563565e-06, "loss": 0.5673, "step": 4071 }, { "epoch": 0.43, "grad_norm": 7.402069347480076, "learning_rate": 6.383446455395352e-06, "loss": 0.6837, "step": 4072 }, { "epoch": 0.43, "grad_norm": 3.2533740088151664, "learning_rate": 6.3818086713095374e-06, "loss": 0.6788, "step": 4073 }, { "epoch": 0.43, "grad_norm": 2.2858335624830324, "learning_rate": 6.380170726689185e-06, "loss": 0.7238, "step": 4074 }, { "epoch": 0.43, "grad_norm": 2.484411238766058, "learning_rate": 6.378532621724588e-06, "loss": 0.6292, "step": 4075 }, { "epoch": 0.43, "grad_norm": 8.867064268787102, "learning_rate": 6.376894356606056e-06, "loss": 0.5916, "step": 4076 }, { "epoch": 0.43, "grad_norm": 2.291422789683661, "learning_rate": 6.375255931523916e-06, "loss": 0.6287, "step": 4077 }, { "epoch": 0.43, "grad_norm": 2.1675402432820405, "learning_rate": 6.373617346668519e-06, "loss": 0.5687, "step": 4078 }, { "epoch": 0.43, "grad_norm": 2.2952408596599088, "learning_rate": 6.371978602230229e-06, "loss": 0.6288, "step": 4079 }, { "epoch": 0.43, "grad_norm": 4.141696812562484, "learning_rate": 6.370339698399432e-06, "loss": 0.6161, "step": 4080 }, { "epoch": 0.43, "grad_norm": 3.8668906189282164, "learning_rate": 6.3687006353665285e-06, "loss": 0.6, "step": 4081 }, { "epoch": 0.43, "grad_norm": 3.230532760665731, "learning_rate": 6.367061413321942e-06, "loss": 0.6102, "step": 4082 }, { "epoch": 0.43, "grad_norm": 2.68067571548342, "learning_rate": 6.365422032456113e-06, "loss": 0.64, "step": 4083 }, { "epoch": 0.43, "grad_norm": 2.574913062898836, "learning_rate": 6.363782492959499e-06, "loss": 0.6528, "step": 4084 }, { "epoch": 0.43, "grad_norm": 4.168916407705001, "learning_rate": 6.362142795022578e-06, "loss": 0.6213, "step": 4085 }, { "epoch": 0.43, "grad_norm": 2.7508588377575767, "learning_rate": 6.360502938835844e-06, "loss": 0.6605, "step": 4086 }, { "epoch": 0.43, "grad_norm": 3.4135918342912577, "learning_rate": 6.35886292458981e-06, "loss": 0.6797, "step": 4087 }, { "epoch": 0.43, "grad_norm": 2.555329036173374, "learning_rate": 6.35722275247501e-06, "loss": 0.691, "step": 4088 }, { "epoch": 0.43, "grad_norm": 6.3576452705524025, "learning_rate": 6.355582422681996e-06, "loss": 0.6184, "step": 4089 }, { "epoch": 0.43, "grad_norm": 3.01931185233685, "learning_rate": 6.353941935401333e-06, "loss": 0.6206, "step": 4090 }, { "epoch": 0.43, "grad_norm": 2.5007928234352677, "learning_rate": 6.352301290823611e-06, "loss": 0.6169, "step": 4091 }, { "epoch": 0.43, "grad_norm": 3.556883601705964, "learning_rate": 6.350660489139433e-06, "loss": 0.6329, "step": 4092 }, { "epoch": 0.43, "grad_norm": 2.5252161752585427, "learning_rate": 6.349019530539425e-06, "loss": 0.6555, "step": 4093 }, { "epoch": 0.43, "grad_norm": 2.5247540508658526, "learning_rate": 6.347378415214226e-06, "loss": 0.6158, "step": 4094 }, { "epoch": 0.43, "grad_norm": 2.182461495472191, "learning_rate": 6.3457371433544975e-06, "loss": 0.7378, "step": 4095 }, { "epoch": 0.43, "grad_norm": 2.6257667849308226, "learning_rate": 6.34409571515092e-06, "loss": 0.6825, "step": 4096 }, { "epoch": 0.43, "grad_norm": 2.4163772839213733, "learning_rate": 6.342454130794186e-06, "loss": 0.5942, "step": 4097 }, { "epoch": 0.43, "grad_norm": 2.548472388600027, "learning_rate": 6.340812390475012e-06, "loss": 0.6612, "step": 4098 }, { "epoch": 0.43, "grad_norm": 2.6494112784435204, "learning_rate": 6.33917049438413e-06, "loss": 0.7117, "step": 4099 }, { "epoch": 0.43, "grad_norm": 4.975703696322628, "learning_rate": 6.3375284427122915e-06, "loss": 0.726, "step": 4100 }, { "epoch": 0.43, "grad_norm": 2.124929550258763, "learning_rate": 6.335886235650264e-06, "loss": 0.599, "step": 4101 }, { "epoch": 0.43, "grad_norm": 2.5941633006024762, "learning_rate": 6.334243873388838e-06, "loss": 0.7624, "step": 4102 }, { "epoch": 0.43, "grad_norm": 7.256377886619029, "learning_rate": 6.332601356118813e-06, "loss": 0.5449, "step": 4103 }, { "epoch": 0.43, "grad_norm": 3.9556721482690977, "learning_rate": 6.330958684031016e-06, "loss": 0.6911, "step": 4104 }, { "epoch": 0.43, "grad_norm": 2.733540693074302, "learning_rate": 6.329315857316285e-06, "loss": 0.6602, "step": 4105 }, { "epoch": 0.43, "grad_norm": 2.101209748842294, "learning_rate": 6.327672876165481e-06, "loss": 0.6334, "step": 4106 }, { "epoch": 0.43, "grad_norm": 2.230682903167034, "learning_rate": 6.326029740769481e-06, "loss": 0.6723, "step": 4107 }, { "epoch": 0.43, "grad_norm": 3.8797719596760216, "learning_rate": 6.324386451319179e-06, "loss": 0.6211, "step": 4108 }, { "epoch": 0.43, "grad_norm": 3.1406370239153274, "learning_rate": 6.322743008005488e-06, "loss": 0.6444, "step": 4109 }, { "epoch": 0.43, "grad_norm": 4.712312589801154, "learning_rate": 6.321099411019336e-06, "loss": 0.6464, "step": 4110 }, { "epoch": 0.43, "grad_norm": 2.233736620239088, "learning_rate": 6.319455660551674e-06, "loss": 0.6325, "step": 4111 }, { "epoch": 0.43, "grad_norm": 2.038358257077351, "learning_rate": 6.317811756793467e-06, "loss": 0.6742, "step": 4112 }, { "epoch": 0.43, "grad_norm": 2.143450702921909, "learning_rate": 6.316167699935702e-06, "loss": 0.6708, "step": 4113 }, { "epoch": 0.43, "grad_norm": 3.8705182545419534, "learning_rate": 6.314523490169375e-06, "loss": 0.5526, "step": 4114 }, { "epoch": 0.43, "grad_norm": 2.073511807722659, "learning_rate": 6.312879127685512e-06, "loss": 0.6384, "step": 4115 }, { "epoch": 0.43, "grad_norm": 2.653135511720908, "learning_rate": 6.311234612675143e-06, "loss": 0.709, "step": 4116 }, { "epoch": 0.43, "grad_norm": 2.0677937210863715, "learning_rate": 6.309589945329332e-06, "loss": 0.6508, "step": 4117 }, { "epoch": 0.43, "grad_norm": 3.338910592281395, "learning_rate": 6.307945125839143e-06, "loss": 0.678, "step": 4118 }, { "epoch": 0.43, "grad_norm": 2.715799303343155, "learning_rate": 6.3063001543956715e-06, "loss": 0.6564, "step": 4119 }, { "epoch": 0.43, "grad_norm": 2.4933350769017357, "learning_rate": 6.304655031190024e-06, "loss": 0.5819, "step": 4120 }, { "epoch": 0.43, "grad_norm": 2.044652665239333, "learning_rate": 6.303009756413327e-06, "loss": 0.6325, "step": 4121 }, { "epoch": 0.43, "grad_norm": 6.095867866117808, "learning_rate": 6.3013643302567225e-06, "loss": 0.6967, "step": 4122 }, { "epoch": 0.43, "grad_norm": 2.849872100475443, "learning_rate": 6.299718752911371e-06, "loss": 0.7464, "step": 4123 }, { "epoch": 0.43, "grad_norm": 1.9748453249208904, "learning_rate": 6.298073024568454e-06, "loss": 0.6542, "step": 4124 }, { "epoch": 0.43, "grad_norm": 2.2600324826958365, "learning_rate": 6.296427145419164e-06, "loss": 0.5836, "step": 4125 }, { "epoch": 0.43, "grad_norm": 2.802818402196517, "learning_rate": 6.294781115654718e-06, "loss": 0.6668, "step": 4126 }, { "epoch": 0.43, "grad_norm": 2.676232174418196, "learning_rate": 6.293134935466342e-06, "loss": 0.6596, "step": 4127 }, { "epoch": 0.43, "grad_norm": 2.228739627526291, "learning_rate": 6.291488605045288e-06, "loss": 0.5731, "step": 4128 }, { "epoch": 0.43, "grad_norm": 2.6049506326721703, "learning_rate": 6.289842124582822e-06, "loss": 0.5658, "step": 4129 }, { "epoch": 0.43, "grad_norm": 2.6541990876497694, "learning_rate": 6.2881954942702265e-06, "loss": 0.6145, "step": 4130 }, { "epoch": 0.43, "grad_norm": 1.0595704649971625, "learning_rate": 6.286548714298801e-06, "loss": 0.6131, "step": 4131 }, { "epoch": 0.43, "grad_norm": 2.420647946200874, "learning_rate": 6.284901784859866e-06, "loss": 0.6737, "step": 4132 }, { "epoch": 0.43, "grad_norm": 3.041851672678566, "learning_rate": 6.283254706144756e-06, "loss": 0.6566, "step": 4133 }, { "epoch": 0.44, "grad_norm": 2.712373901252542, "learning_rate": 6.281607478344823e-06, "loss": 0.6394, "step": 4134 }, { "epoch": 0.44, "grad_norm": 2.4741338459449853, "learning_rate": 6.279960101651439e-06, "loss": 0.7035, "step": 4135 }, { "epoch": 0.44, "grad_norm": 1.9857951374629823, "learning_rate": 6.278312576255988e-06, "loss": 0.6226, "step": 4136 }, { "epoch": 0.44, "grad_norm": 3.3494664192954384, "learning_rate": 6.276664902349881e-06, "loss": 0.6609, "step": 4137 }, { "epoch": 0.44, "grad_norm": 2.2270633620793228, "learning_rate": 6.275017080124533e-06, "loss": 0.6487, "step": 4138 }, { "epoch": 0.44, "grad_norm": 3.2352633292562363, "learning_rate": 6.273369109771387e-06, "loss": 0.7118, "step": 4139 }, { "epoch": 0.44, "grad_norm": 2.268087699809955, "learning_rate": 6.271720991481897e-06, "loss": 0.6689, "step": 4140 }, { "epoch": 0.44, "grad_norm": 5.099756078374374, "learning_rate": 6.270072725447542e-06, "loss": 0.7536, "step": 4141 }, { "epoch": 0.44, "grad_norm": 2.4160875739712644, "learning_rate": 6.268424311859808e-06, "loss": 0.6797, "step": 4142 }, { "epoch": 0.44, "grad_norm": 3.1608916128751066, "learning_rate": 6.266775750910203e-06, "loss": 0.6239, "step": 4143 }, { "epoch": 0.44, "grad_norm": 2.642655322409538, "learning_rate": 6.265127042790253e-06, "loss": 0.7106, "step": 4144 }, { "epoch": 0.44, "grad_norm": 8.48267619018257, "learning_rate": 6.263478187691502e-06, "loss": 0.6842, "step": 4145 }, { "epoch": 0.44, "grad_norm": 2.2073464735347583, "learning_rate": 6.2618291858055065e-06, "loss": 0.716, "step": 4146 }, { "epoch": 0.44, "grad_norm": 3.2192677534009446, "learning_rate": 6.260180037323843e-06, "loss": 0.6008, "step": 4147 }, { "epoch": 0.44, "grad_norm": 2.8880065557661028, "learning_rate": 6.258530742438107e-06, "loss": 0.6526, "step": 4148 }, { "epoch": 0.44, "grad_norm": 1.923108042812208, "learning_rate": 6.256881301339907e-06, "loss": 0.5658, "step": 4149 }, { "epoch": 0.44, "grad_norm": 2.3215056242914405, "learning_rate": 6.255231714220871e-06, "loss": 0.664, "step": 4150 }, { "epoch": 0.44, "grad_norm": 2.598137114476907, "learning_rate": 6.253581981272641e-06, "loss": 0.6033, "step": 4151 }, { "epoch": 0.44, "grad_norm": 3.292031250462775, "learning_rate": 6.251932102686883e-06, "loss": 0.6393, "step": 4152 }, { "epoch": 0.44, "grad_norm": 2.9711599193058196, "learning_rate": 6.2502820786552695e-06, "loss": 0.7229, "step": 4153 }, { "epoch": 0.44, "grad_norm": 3.4776790489556118, "learning_rate": 6.2486319093695006e-06, "loss": 0.6444, "step": 4154 }, { "epoch": 0.44, "grad_norm": 2.574952252391362, "learning_rate": 6.246981595021284e-06, "loss": 0.6376, "step": 4155 }, { "epoch": 0.44, "grad_norm": 2.485791306054375, "learning_rate": 6.245331135802351e-06, "loss": 0.6843, "step": 4156 }, { "epoch": 0.44, "grad_norm": 2.581681311574103, "learning_rate": 6.243680531904448e-06, "loss": 0.6037, "step": 4157 }, { "epoch": 0.44, "grad_norm": 3.6015305354626954, "learning_rate": 6.242029783519334e-06, "loss": 0.6377, "step": 4158 }, { "epoch": 0.44, "grad_norm": 2.514525749517879, "learning_rate": 6.240378890838792e-06, "loss": 0.688, "step": 4159 }, { "epoch": 0.44, "grad_norm": 2.1131329820828673, "learning_rate": 6.238727854054614e-06, "loss": 0.6627, "step": 4160 }, { "epoch": 0.44, "grad_norm": 2.3327635807486757, "learning_rate": 6.237076673358616e-06, "loss": 0.6625, "step": 4161 }, { "epoch": 0.44, "grad_norm": 2.564116461115227, "learning_rate": 6.235425348942625e-06, "loss": 0.6025, "step": 4162 }, { "epoch": 0.44, "grad_norm": 25.51131291171202, "learning_rate": 6.2337738809984905e-06, "loss": 0.6809, "step": 4163 }, { "epoch": 0.44, "grad_norm": 2.404343930899044, "learning_rate": 6.23212226971807e-06, "loss": 0.6425, "step": 4164 }, { "epoch": 0.44, "grad_norm": 2.986695766080571, "learning_rate": 6.230470515293248e-06, "loss": 0.674, "step": 4165 }, { "epoch": 0.44, "grad_norm": 2.5976631521397846, "learning_rate": 6.2288186179159175e-06, "loss": 0.6709, "step": 4166 }, { "epoch": 0.44, "grad_norm": 12.729513809408251, "learning_rate": 6.227166577777992e-06, "loss": 0.6686, "step": 4167 }, { "epoch": 0.44, "grad_norm": 2.51466132483747, "learning_rate": 6.225514395071401e-06, "loss": 0.7087, "step": 4168 }, { "epoch": 0.44, "grad_norm": 2.7437128880343717, "learning_rate": 6.223862069988091e-06, "loss": 0.6417, "step": 4169 }, { "epoch": 0.44, "grad_norm": 3.7709270732185365, "learning_rate": 6.222209602720023e-06, "loss": 0.6492, "step": 4170 }, { "epoch": 0.44, "grad_norm": 2.6653325226917635, "learning_rate": 6.220556993459174e-06, "loss": 0.6882, "step": 4171 }, { "epoch": 0.44, "grad_norm": 2.290790865423674, "learning_rate": 6.218904242397546e-06, "loss": 0.6277, "step": 4172 }, { "epoch": 0.44, "grad_norm": 3.0699405954785104, "learning_rate": 6.217251349727145e-06, "loss": 0.7121, "step": 4173 }, { "epoch": 0.44, "grad_norm": 2.734217356286444, "learning_rate": 6.215598315640001e-06, "loss": 0.7427, "step": 4174 }, { "epoch": 0.44, "grad_norm": 2.713834982597247, "learning_rate": 6.213945140328157e-06, "loss": 0.6955, "step": 4175 }, { "epoch": 0.44, "grad_norm": 5.311432316870064, "learning_rate": 6.212291823983678e-06, "loss": 0.6851, "step": 4176 }, { "epoch": 0.44, "grad_norm": 2.399335810386324, "learning_rate": 6.2106383667986385e-06, "loss": 0.6589, "step": 4177 }, { "epoch": 0.44, "grad_norm": 2.1179610916249185, "learning_rate": 6.208984768965133e-06, "loss": 0.585, "step": 4178 }, { "epoch": 0.44, "grad_norm": 3.492230402979365, "learning_rate": 6.207331030675272e-06, "loss": 0.6906, "step": 4179 }, { "epoch": 0.44, "grad_norm": 10.05084961096072, "learning_rate": 6.2056771521211815e-06, "loss": 0.6719, "step": 4180 }, { "epoch": 0.44, "grad_norm": 2.525761336265004, "learning_rate": 6.204023133495005e-06, "loss": 0.6352, "step": 4181 }, { "epoch": 0.44, "grad_norm": 2.2251077581550414, "learning_rate": 6.2023689749889e-06, "loss": 0.6294, "step": 4182 }, { "epoch": 0.44, "grad_norm": 2.3917017204363615, "learning_rate": 6.2007146767950455e-06, "loss": 0.6562, "step": 4183 }, { "epoch": 0.44, "grad_norm": 5.7924703663410675, "learning_rate": 6.199060239105628e-06, "loss": 0.6758, "step": 4184 }, { "epoch": 0.44, "grad_norm": 2.3713543077213854, "learning_rate": 6.197405662112862e-06, "loss": 0.6653, "step": 4185 }, { "epoch": 0.44, "grad_norm": 2.439148199597049, "learning_rate": 6.195750946008965e-06, "loss": 0.6896, "step": 4186 }, { "epoch": 0.44, "grad_norm": 3.7616942745004667, "learning_rate": 6.19409609098618e-06, "loss": 0.6445, "step": 4187 }, { "epoch": 0.44, "grad_norm": 2.610494475761507, "learning_rate": 6.192441097236762e-06, "loss": 0.7334, "step": 4188 }, { "epoch": 0.44, "grad_norm": 2.098962128255649, "learning_rate": 6.190785964952985e-06, "loss": 0.634, "step": 4189 }, { "epoch": 0.44, "grad_norm": 1.11012265793801, "learning_rate": 6.189130694327138e-06, "loss": 0.6304, "step": 4190 }, { "epoch": 0.44, "grad_norm": 2.6761648104534377, "learning_rate": 6.187475285551523e-06, "loss": 0.6746, "step": 4191 }, { "epoch": 0.44, "grad_norm": 2.6521807011331933, "learning_rate": 6.185819738818463e-06, "loss": 0.6235, "step": 4192 }, { "epoch": 0.44, "grad_norm": 3.263291536626014, "learning_rate": 6.184164054320293e-06, "loss": 0.6194, "step": 4193 }, { "epoch": 0.44, "grad_norm": 1.0093262515523898, "learning_rate": 6.1825082322493655e-06, "loss": 0.6021, "step": 4194 }, { "epoch": 0.44, "grad_norm": 6.4984369533755295, "learning_rate": 6.18085227279805e-06, "loss": 0.6466, "step": 4195 }, { "epoch": 0.44, "grad_norm": 2.873242485567296, "learning_rate": 6.179196176158733e-06, "loss": 0.6112, "step": 4196 }, { "epoch": 0.44, "grad_norm": 2.2153603654893232, "learning_rate": 6.17753994252381e-06, "loss": 0.677, "step": 4197 }, { "epoch": 0.44, "grad_norm": 4.885706072220139, "learning_rate": 6.175883572085703e-06, "loss": 0.6734, "step": 4198 }, { "epoch": 0.44, "grad_norm": 2.306773750236914, "learning_rate": 6.1742270650368395e-06, "loss": 0.6614, "step": 4199 }, { "epoch": 0.44, "grad_norm": 3.0757107827698014, "learning_rate": 6.172570421569672e-06, "loss": 0.6607, "step": 4200 }, { "epoch": 0.44, "grad_norm": 2.6497916669129955, "learning_rate": 6.170913641876662e-06, "loss": 0.6356, "step": 4201 }, { "epoch": 0.44, "grad_norm": 2.4729495304399536, "learning_rate": 6.1692567261502885e-06, "loss": 0.5511, "step": 4202 }, { "epoch": 0.44, "grad_norm": 2.4085363284193635, "learning_rate": 6.167599674583049e-06, "loss": 0.6771, "step": 4203 }, { "epoch": 0.44, "grad_norm": 2.5904214468463556, "learning_rate": 6.165942487367456e-06, "loss": 0.6186, "step": 4204 }, { "epoch": 0.44, "grad_norm": 11.74561826819688, "learning_rate": 6.164285164696034e-06, "loss": 0.698, "step": 4205 }, { "epoch": 0.44, "grad_norm": 2.4895776594825447, "learning_rate": 6.162627706761326e-06, "loss": 0.6161, "step": 4206 }, { "epoch": 0.44, "grad_norm": 3.9883010766926974, "learning_rate": 6.160970113755894e-06, "loss": 0.6723, "step": 4207 }, { "epoch": 0.44, "grad_norm": 3.412743390251808, "learning_rate": 6.159312385872309e-06, "loss": 0.6407, "step": 4208 }, { "epoch": 0.44, "grad_norm": 1.988545368414137, "learning_rate": 6.157654523303164e-06, "loss": 0.6851, "step": 4209 }, { "epoch": 0.44, "grad_norm": 2.4530378165050104, "learning_rate": 6.15599652624106e-06, "loss": 0.6844, "step": 4210 }, { "epoch": 0.44, "grad_norm": 4.747802711725356, "learning_rate": 6.154338394878624e-06, "loss": 0.6251, "step": 4211 }, { "epoch": 0.44, "grad_norm": 3.6279563635896923, "learning_rate": 6.152680129408488e-06, "loss": 0.6315, "step": 4212 }, { "epoch": 0.44, "grad_norm": 2.6101031201569262, "learning_rate": 6.151021730023308e-06, "loss": 0.6899, "step": 4213 }, { "epoch": 0.44, "grad_norm": 2.8097748740269637, "learning_rate": 6.14936319691575e-06, "loss": 0.6828, "step": 4214 }, { "epoch": 0.44, "grad_norm": 2.5163859758968785, "learning_rate": 6.147704530278497e-06, "loss": 0.7089, "step": 4215 }, { "epoch": 0.44, "grad_norm": 2.8122326932337884, "learning_rate": 6.146045730304252e-06, "loss": 0.7728, "step": 4216 }, { "epoch": 0.44, "grad_norm": 2.2991108660602873, "learning_rate": 6.144386797185724e-06, "loss": 0.6771, "step": 4217 }, { "epoch": 0.44, "grad_norm": 2.82067248565608, "learning_rate": 6.14272773111565e-06, "loss": 0.6825, "step": 4218 }, { "epoch": 0.44, "grad_norm": 4.292155901876321, "learning_rate": 6.141068532286768e-06, "loss": 0.7232, "step": 4219 }, { "epoch": 0.44, "grad_norm": 3.2138852203020436, "learning_rate": 6.139409200891845e-06, "loss": 0.6331, "step": 4220 }, { "epoch": 0.44, "grad_norm": 2.733361759557919, "learning_rate": 6.137749737123652e-06, "loss": 0.5726, "step": 4221 }, { "epoch": 0.44, "grad_norm": 2.045897121812701, "learning_rate": 6.136090141174986e-06, "loss": 0.6303, "step": 4222 }, { "epoch": 0.44, "grad_norm": 6.215595577195097, "learning_rate": 6.134430413238649e-06, "loss": 0.7165, "step": 4223 }, { "epoch": 0.44, "grad_norm": 2.4720795018859287, "learning_rate": 6.132770553507468e-06, "loss": 0.5848, "step": 4224 }, { "epoch": 0.44, "grad_norm": 2.469345000674683, "learning_rate": 6.1311105621742775e-06, "loss": 0.6669, "step": 4225 }, { "epoch": 0.44, "grad_norm": 2.625205929898641, "learning_rate": 6.129450439431932e-06, "loss": 0.6472, "step": 4226 }, { "epoch": 0.44, "grad_norm": 3.5548855414175695, "learning_rate": 6.1277901854732994e-06, "loss": 0.6817, "step": 4227 }, { "epoch": 0.44, "grad_norm": 2.1027568650104005, "learning_rate": 6.126129800491263e-06, "loss": 0.6514, "step": 4228 }, { "epoch": 0.45, "grad_norm": 2.045046875891458, "learning_rate": 6.124469284678721e-06, "loss": 0.5825, "step": 4229 }, { "epoch": 0.45, "grad_norm": 1.1138142631995698, "learning_rate": 6.122808638228588e-06, "loss": 0.6002, "step": 4230 }, { "epoch": 0.45, "grad_norm": 3.3093105459556447, "learning_rate": 6.121147861333795e-06, "loss": 0.7347, "step": 4231 }, { "epoch": 0.45, "grad_norm": 2.637992867101944, "learning_rate": 6.119486954187283e-06, "loss": 0.6293, "step": 4232 }, { "epoch": 0.45, "grad_norm": 4.2443163383582645, "learning_rate": 6.117825916982013e-06, "loss": 0.6475, "step": 4233 }, { "epoch": 0.45, "grad_norm": 2.3645005998517554, "learning_rate": 6.116164749910959e-06, "loss": 0.7004, "step": 4234 }, { "epoch": 0.45, "grad_norm": 12.488986762043982, "learning_rate": 6.114503453167112e-06, "loss": 0.6375, "step": 4235 }, { "epoch": 0.45, "grad_norm": 2.456714677650742, "learning_rate": 6.112842026943473e-06, "loss": 0.7201, "step": 4236 }, { "epoch": 0.45, "grad_norm": 2.792896979064036, "learning_rate": 6.111180471433067e-06, "loss": 0.7046, "step": 4237 }, { "epoch": 0.45, "grad_norm": 2.311085039466233, "learning_rate": 6.109518786828924e-06, "loss": 0.6333, "step": 4238 }, { "epoch": 0.45, "grad_norm": 4.082195260565265, "learning_rate": 6.107856973324097e-06, "loss": 0.7398, "step": 4239 }, { "epoch": 0.45, "grad_norm": 3.422658221464742, "learning_rate": 6.106195031111648e-06, "loss": 0.626, "step": 4240 }, { "epoch": 0.45, "grad_norm": 4.357834278269825, "learning_rate": 6.104532960384658e-06, "loss": 0.6259, "step": 4241 }, { "epoch": 0.45, "grad_norm": 2.0886646213733924, "learning_rate": 6.1028707613362236e-06, "loss": 0.7135, "step": 4242 }, { "epoch": 0.45, "grad_norm": 2.8932851903398698, "learning_rate": 6.101208434159451e-06, "loss": 0.6996, "step": 4243 }, { "epoch": 0.45, "grad_norm": 2.3623621800314547, "learning_rate": 6.099545979047465e-06, "loss": 0.6463, "step": 4244 }, { "epoch": 0.45, "grad_norm": 2.3794475175322924, "learning_rate": 6.097883396193406e-06, "loss": 0.6134, "step": 4245 }, { "epoch": 0.45, "grad_norm": 2.2159715788742234, "learning_rate": 6.09622068579043e-06, "loss": 0.6379, "step": 4246 }, { "epoch": 0.45, "grad_norm": 3.65089220209315, "learning_rate": 6.094557848031699e-06, "loss": 0.6913, "step": 4247 }, { "epoch": 0.45, "grad_norm": 2.238068508379566, "learning_rate": 6.092894883110405e-06, "loss": 0.6267, "step": 4248 }, { "epoch": 0.45, "grad_norm": 2.62840874216548, "learning_rate": 6.0912317912197416e-06, "loss": 0.6864, "step": 4249 }, { "epoch": 0.45, "grad_norm": 2.7152502107110603, "learning_rate": 6.089568572552923e-06, "loss": 0.6414, "step": 4250 }, { "epoch": 0.45, "grad_norm": 1.0634647134438302, "learning_rate": 6.087905227303177e-06, "loss": 0.5915, "step": 4251 }, { "epoch": 0.45, "grad_norm": 2.6867407708114026, "learning_rate": 6.086241755663746e-06, "loss": 0.668, "step": 4252 }, { "epoch": 0.45, "grad_norm": 1.0245261285927338, "learning_rate": 6.08457815782789e-06, "loss": 0.5945, "step": 4253 }, { "epoch": 0.45, "grad_norm": 2.184617873138533, "learning_rate": 6.082914433988875e-06, "loss": 0.566, "step": 4254 }, { "epoch": 0.45, "grad_norm": 2.2773216580867945, "learning_rate": 6.081250584339996e-06, "loss": 0.5985, "step": 4255 }, { "epoch": 0.45, "grad_norm": 2.2623346319662763, "learning_rate": 6.079586609074547e-06, "loss": 0.6648, "step": 4256 }, { "epoch": 0.45, "grad_norm": 3.594546200291953, "learning_rate": 6.077922508385849e-06, "loss": 0.6895, "step": 4257 }, { "epoch": 0.45, "grad_norm": 2.2014493515781806, "learning_rate": 6.076258282467227e-06, "loss": 0.6097, "step": 4258 }, { "epoch": 0.45, "grad_norm": 2.6694207927575233, "learning_rate": 6.074593931512031e-06, "loss": 0.6801, "step": 4259 }, { "epoch": 0.45, "grad_norm": 3.451259021434374, "learning_rate": 6.072929455713616e-06, "loss": 0.6692, "step": 4260 }, { "epoch": 0.45, "grad_norm": 1.9337354819470554, "learning_rate": 6.07126485526536e-06, "loss": 0.5854, "step": 4261 }, { "epoch": 0.45, "grad_norm": 9.767008015068956, "learning_rate": 6.0696001303606486e-06, "loss": 0.6037, "step": 4262 }, { "epoch": 0.45, "grad_norm": 3.0086485367597264, "learning_rate": 6.067935281192887e-06, "loss": 0.6432, "step": 4263 }, { "epoch": 0.45, "grad_norm": 2.153528393185996, "learning_rate": 6.066270307955492e-06, "loss": 0.643, "step": 4264 }, { "epoch": 0.45, "grad_norm": 3.1645611909710882, "learning_rate": 6.064605210841893e-06, "loss": 0.6053, "step": 4265 }, { "epoch": 0.45, "grad_norm": 2.309866791740079, "learning_rate": 6.062939990045541e-06, "loss": 0.6321, "step": 4266 }, { "epoch": 0.45, "grad_norm": 2.156210421450635, "learning_rate": 6.06127464575989e-06, "loss": 0.5757, "step": 4267 }, { "epoch": 0.45, "grad_norm": 3.244885997343262, "learning_rate": 6.059609178178423e-06, "loss": 0.6942, "step": 4268 }, { "epoch": 0.45, "grad_norm": 3.152053051101937, "learning_rate": 6.0579435874946205e-06, "loss": 0.6633, "step": 4269 }, { "epoch": 0.45, "grad_norm": 4.478964275396135, "learning_rate": 6.056277873901993e-06, "loss": 0.6697, "step": 4270 }, { "epoch": 0.45, "grad_norm": 2.9729373074740866, "learning_rate": 6.054612037594053e-06, "loss": 0.7223, "step": 4271 }, { "epoch": 0.45, "grad_norm": 2.0783619133176625, "learning_rate": 6.052946078764337e-06, "loss": 0.6165, "step": 4272 }, { "epoch": 0.45, "grad_norm": 1.2263809793723803, "learning_rate": 6.0512799976063885e-06, "loss": 0.586, "step": 4273 }, { "epoch": 0.45, "grad_norm": 2.84734542991169, "learning_rate": 6.049613794313769e-06, "loss": 0.651, "step": 4274 }, { "epoch": 0.45, "grad_norm": 2.3793929939042915, "learning_rate": 6.047947469080053e-06, "loss": 0.7264, "step": 4275 }, { "epoch": 0.45, "grad_norm": 3.7894502963984307, "learning_rate": 6.0462810220988284e-06, "loss": 0.5848, "step": 4276 }, { "epoch": 0.45, "grad_norm": 2.385912284914175, "learning_rate": 6.044614453563702e-06, "loss": 0.6738, "step": 4277 }, { "epoch": 0.45, "grad_norm": 3.0450272266233553, "learning_rate": 6.042947763668285e-06, "loss": 0.746, "step": 4278 }, { "epoch": 0.45, "grad_norm": 2.0469407502606782, "learning_rate": 6.041280952606214e-06, "loss": 0.5847, "step": 4279 }, { "epoch": 0.45, "grad_norm": 2.531872581619871, "learning_rate": 6.03961402057113e-06, "loss": 0.5961, "step": 4280 }, { "epoch": 0.45, "grad_norm": 0.9848807465163134, "learning_rate": 6.037946967756696e-06, "loss": 0.6215, "step": 4281 }, { "epoch": 0.45, "grad_norm": 2.090185818907428, "learning_rate": 6.036279794356582e-06, "loss": 0.6028, "step": 4282 }, { "epoch": 0.45, "grad_norm": 1.995346619697124, "learning_rate": 6.034612500564479e-06, "loss": 0.5208, "step": 4283 }, { "epoch": 0.45, "grad_norm": 2.7529881198497277, "learning_rate": 6.032945086574085e-06, "loss": 0.6228, "step": 4284 }, { "epoch": 0.45, "grad_norm": 3.3379083789144857, "learning_rate": 6.0312775525791165e-06, "loss": 0.6813, "step": 4285 }, { "epoch": 0.45, "grad_norm": 2.119809893657813, "learning_rate": 6.029609898773305e-06, "loss": 0.6076, "step": 4286 }, { "epoch": 0.45, "grad_norm": 2.5052511733476788, "learning_rate": 6.027942125350389e-06, "loss": 0.6299, "step": 4287 }, { "epoch": 0.45, "grad_norm": 2.655210831114237, "learning_rate": 6.02627423250413e-06, "loss": 0.6856, "step": 4288 }, { "epoch": 0.45, "grad_norm": 2.625661240338276, "learning_rate": 6.024606220428297e-06, "loss": 0.647, "step": 4289 }, { "epoch": 0.45, "grad_norm": 3.2243027452718698, "learning_rate": 6.022938089316677e-06, "loss": 0.7011, "step": 4290 }, { "epoch": 0.45, "grad_norm": 2.3340700591649557, "learning_rate": 6.021269839363063e-06, "loss": 0.6817, "step": 4291 }, { "epoch": 0.45, "grad_norm": 3.0637410458294068, "learning_rate": 6.019601470761275e-06, "loss": 0.668, "step": 4292 }, { "epoch": 0.45, "grad_norm": 2.865208891942265, "learning_rate": 6.017932983705132e-06, "loss": 0.6142, "step": 4293 }, { "epoch": 0.45, "grad_norm": 2.9027349130767828, "learning_rate": 6.016264378388481e-06, "loss": 0.6485, "step": 4294 }, { "epoch": 0.45, "grad_norm": 2.9168453504696266, "learning_rate": 6.0145956550051694e-06, "loss": 0.6187, "step": 4295 }, { "epoch": 0.45, "grad_norm": 2.7032692104534073, "learning_rate": 6.01292681374907e-06, "loss": 0.6664, "step": 4296 }, { "epoch": 0.45, "grad_norm": 2.223566307096264, "learning_rate": 6.01125785481406e-06, "loss": 0.5642, "step": 4297 }, { "epoch": 0.45, "grad_norm": 3.504081111094006, "learning_rate": 6.009588778394035e-06, "loss": 0.6222, "step": 4298 }, { "epoch": 0.45, "grad_norm": 1.8721849635784116, "learning_rate": 6.0079195846829055e-06, "loss": 0.629, "step": 4299 }, { "epoch": 0.45, "grad_norm": 1.9713414450562043, "learning_rate": 6.006250273874591e-06, "loss": 0.6071, "step": 4300 }, { "epoch": 0.45, "grad_norm": 2.0636910457431736, "learning_rate": 6.0045808461630295e-06, "loss": 0.606, "step": 4301 }, { "epoch": 0.45, "grad_norm": 2.4935316363007756, "learning_rate": 6.002911301742168e-06, "loss": 0.6487, "step": 4302 }, { "epoch": 0.45, "grad_norm": 2.0612655080904037, "learning_rate": 6.001241640805973e-06, "loss": 0.6267, "step": 4303 }, { "epoch": 0.45, "grad_norm": 5.5302669242733815, "learning_rate": 5.999571863548416e-06, "loss": 0.6001, "step": 4304 }, { "epoch": 0.45, "grad_norm": 2.4210531577615226, "learning_rate": 5.997901970163491e-06, "loss": 0.6732, "step": 4305 }, { "epoch": 0.45, "grad_norm": 2.4958888183227415, "learning_rate": 5.996231960845198e-06, "loss": 0.6682, "step": 4306 }, { "epoch": 0.45, "grad_norm": 2.5503369133292892, "learning_rate": 5.994561835787558e-06, "loss": 0.5959, "step": 4307 }, { "epoch": 0.45, "grad_norm": 2.176063899739418, "learning_rate": 5.992891595184596e-06, "loss": 0.6718, "step": 4308 }, { "epoch": 0.45, "grad_norm": 2.4856814525301485, "learning_rate": 5.991221239230362e-06, "loss": 0.6848, "step": 4309 }, { "epoch": 0.45, "grad_norm": 2.966837942768863, "learning_rate": 5.989550768118908e-06, "loss": 0.5977, "step": 4310 }, { "epoch": 0.45, "grad_norm": 2.4789769096013554, "learning_rate": 5.987880182044304e-06, "loss": 0.5662, "step": 4311 }, { "epoch": 0.45, "grad_norm": 2.0922816492045966, "learning_rate": 5.98620948120064e-06, "loss": 0.7099, "step": 4312 }, { "epoch": 0.45, "grad_norm": 2.144043338021814, "learning_rate": 5.984538665782007e-06, "loss": 0.6219, "step": 4313 }, { "epoch": 0.45, "grad_norm": 1.9684756423653305, "learning_rate": 5.9828677359825196e-06, "loss": 0.6493, "step": 4314 }, { "epoch": 0.45, "grad_norm": 2.3925762779344515, "learning_rate": 5.981196691996298e-06, "loss": 0.5853, "step": 4315 }, { "epoch": 0.45, "grad_norm": 2.9740384297980214, "learning_rate": 5.9795255340174825e-06, "loss": 0.656, "step": 4316 }, { "epoch": 0.45, "grad_norm": 2.1922968760567727, "learning_rate": 5.9778542622402205e-06, "loss": 0.5823, "step": 4317 }, { "epoch": 0.45, "grad_norm": 2.08742766373127, "learning_rate": 5.976182876858679e-06, "loss": 0.6744, "step": 4318 }, { "epoch": 0.45, "grad_norm": 2.4973148138070638, "learning_rate": 5.9745113780670305e-06, "loss": 0.6447, "step": 4319 }, { "epoch": 0.45, "grad_norm": 2.6221490595037755, "learning_rate": 5.972839766059469e-06, "loss": 0.6632, "step": 4320 }, { "epoch": 0.45, "grad_norm": 2.182690583898645, "learning_rate": 5.971168041030194e-06, "loss": 0.7078, "step": 4321 }, { "epoch": 0.45, "grad_norm": 1.984007580074134, "learning_rate": 5.969496203173424e-06, "loss": 0.5901, "step": 4322 }, { "epoch": 0.45, "grad_norm": 3.2119980194558324, "learning_rate": 5.967824252683389e-06, "loss": 0.6961, "step": 4323 }, { "epoch": 0.46, "grad_norm": 1.9424736990101, "learning_rate": 5.9661521897543276e-06, "loss": 0.5905, "step": 4324 }, { "epoch": 0.46, "grad_norm": 2.3950357257174666, "learning_rate": 5.9644800145805e-06, "loss": 0.6498, "step": 4325 }, { "epoch": 0.46, "grad_norm": 2.9016254383942606, "learning_rate": 5.962807727356169e-06, "loss": 0.7386, "step": 4326 }, { "epoch": 0.46, "grad_norm": 2.187079791651539, "learning_rate": 5.9611353282756235e-06, "loss": 0.6256, "step": 4327 }, { "epoch": 0.46, "grad_norm": 3.203908956516435, "learning_rate": 5.95946281753315e-06, "loss": 0.5533, "step": 4328 }, { "epoch": 0.46, "grad_norm": 2.3206237982379285, "learning_rate": 5.957790195323064e-06, "loss": 0.5879, "step": 4329 }, { "epoch": 0.46, "grad_norm": 2.0758013751273796, "learning_rate": 5.956117461839679e-06, "loss": 0.7067, "step": 4330 }, { "epoch": 0.46, "grad_norm": 2.9307153953571254, "learning_rate": 5.954444617277332e-06, "loss": 0.6043, "step": 4331 }, { "epoch": 0.46, "grad_norm": 3.1653578493636227, "learning_rate": 5.952771661830368e-06, "loss": 0.6076, "step": 4332 }, { "epoch": 0.46, "grad_norm": 2.835128332613066, "learning_rate": 5.951098595693146e-06, "loss": 0.5411, "step": 4333 }, { "epoch": 0.46, "grad_norm": 2.314934556444207, "learning_rate": 5.9494254190600395e-06, "loss": 0.7049, "step": 4334 }, { "epoch": 0.46, "grad_norm": 2.742652891296002, "learning_rate": 5.947752132125432e-06, "loss": 0.6817, "step": 4335 }, { "epoch": 0.46, "grad_norm": 2.3350065294888402, "learning_rate": 5.946078735083723e-06, "loss": 0.6664, "step": 4336 }, { "epoch": 0.46, "grad_norm": 2.878910527625717, "learning_rate": 5.944405228129318e-06, "loss": 0.6108, "step": 4337 }, { "epoch": 0.46, "grad_norm": 2.8697645174477455, "learning_rate": 5.942731611456647e-06, "loss": 0.6308, "step": 4338 }, { "epoch": 0.46, "grad_norm": 3.8193425849051312, "learning_rate": 5.941057885260141e-06, "loss": 0.6536, "step": 4339 }, { "epoch": 0.46, "grad_norm": 2.3962102257159876, "learning_rate": 5.939384049734252e-06, "loss": 0.6428, "step": 4340 }, { "epoch": 0.46, "grad_norm": 1.897199349105444, "learning_rate": 5.937710105073436e-06, "loss": 0.6204, "step": 4341 }, { "epoch": 0.46, "grad_norm": 4.592219177725684, "learning_rate": 5.936036051472173e-06, "loss": 0.6533, "step": 4342 }, { "epoch": 0.46, "grad_norm": 2.856840556341079, "learning_rate": 5.934361889124946e-06, "loss": 0.6482, "step": 4343 }, { "epoch": 0.46, "grad_norm": 3.8920368478735567, "learning_rate": 5.9326876182262575e-06, "loss": 0.6338, "step": 4344 }, { "epoch": 0.46, "grad_norm": 2.3189541995763316, "learning_rate": 5.931013238970616e-06, "loss": 0.6674, "step": 4345 }, { "epoch": 0.46, "grad_norm": 3.1842357570851374, "learning_rate": 5.929338751552549e-06, "loss": 0.6539, "step": 4346 }, { "epoch": 0.46, "grad_norm": 2.412864761506081, "learning_rate": 5.927664156166592e-06, "loss": 0.6452, "step": 4347 }, { "epoch": 0.46, "grad_norm": 3.450085408288252, "learning_rate": 5.925989453007294e-06, "loss": 0.5489, "step": 4348 }, { "epoch": 0.46, "grad_norm": 2.413183425668527, "learning_rate": 5.924314642269219e-06, "loss": 0.5971, "step": 4349 }, { "epoch": 0.46, "grad_norm": 2.077588646518163, "learning_rate": 5.922639724146939e-06, "loss": 0.6276, "step": 4350 }, { "epoch": 0.46, "grad_norm": 2.440785017604606, "learning_rate": 5.920964698835047e-06, "loss": 0.6674, "step": 4351 }, { "epoch": 0.46, "grad_norm": 3.242550745644832, "learning_rate": 5.919289566528135e-06, "loss": 0.5893, "step": 4352 }, { "epoch": 0.46, "grad_norm": 2.571180994234153, "learning_rate": 5.9176143274208185e-06, "loss": 0.7091, "step": 4353 }, { "epoch": 0.46, "grad_norm": 2.4118573325477386, "learning_rate": 5.915938981707724e-06, "loss": 0.6622, "step": 4354 }, { "epoch": 0.46, "grad_norm": 2.5558862540573424, "learning_rate": 5.914263529583485e-06, "loss": 0.7812, "step": 4355 }, { "epoch": 0.46, "grad_norm": 2.2737963351658887, "learning_rate": 5.9125879712427525e-06, "loss": 0.5881, "step": 4356 }, { "epoch": 0.46, "grad_norm": 2.4431483934766702, "learning_rate": 5.9109123068801875e-06, "loss": 0.7217, "step": 4357 }, { "epoch": 0.46, "grad_norm": 2.3370688862721347, "learning_rate": 5.909236536690464e-06, "loss": 0.6663, "step": 4358 }, { "epoch": 0.46, "grad_norm": 2.3073085136490574, "learning_rate": 5.907560660868266e-06, "loss": 0.6542, "step": 4359 }, { "epoch": 0.46, "grad_norm": 3.2027134863893343, "learning_rate": 5.905884679608297e-06, "loss": 0.6156, "step": 4360 }, { "epoch": 0.46, "grad_norm": 2.5426901682583063, "learning_rate": 5.904208593105263e-06, "loss": 0.7358, "step": 4361 }, { "epoch": 0.46, "grad_norm": 3.4513894931625066, "learning_rate": 5.902532401553888e-06, "loss": 0.6191, "step": 4362 }, { "epoch": 0.46, "grad_norm": 2.4589931992350498, "learning_rate": 5.900856105148908e-06, "loss": 0.6734, "step": 4363 }, { "epoch": 0.46, "grad_norm": 2.33735601801206, "learning_rate": 5.899179704085072e-06, "loss": 0.6383, "step": 4364 }, { "epoch": 0.46, "grad_norm": 2.153407332381854, "learning_rate": 5.897503198557134e-06, "loss": 0.6444, "step": 4365 }, { "epoch": 0.46, "grad_norm": 2.8332048431773433, "learning_rate": 5.89582658875987e-06, "loss": 0.5908, "step": 4366 }, { "epoch": 0.46, "grad_norm": 2.8707757219917687, "learning_rate": 5.8941498748880635e-06, "loss": 0.6854, "step": 4367 }, { "epoch": 0.46, "grad_norm": 3.80342904637755, "learning_rate": 5.892473057136508e-06, "loss": 0.5482, "step": 4368 }, { "epoch": 0.46, "grad_norm": 1.0613535298316306, "learning_rate": 5.890796135700013e-06, "loss": 0.607, "step": 4369 }, { "epoch": 0.46, "grad_norm": 4.544050244229915, "learning_rate": 5.889119110773398e-06, "loss": 0.6613, "step": 4370 }, { "epoch": 0.46, "grad_norm": 2.225190268668775, "learning_rate": 5.887441982551495e-06, "loss": 0.684, "step": 4371 }, { "epoch": 0.46, "grad_norm": 2.406353835293022, "learning_rate": 5.885764751229146e-06, "loss": 0.5922, "step": 4372 }, { "epoch": 0.46, "grad_norm": 2.660963513951452, "learning_rate": 5.884087417001212e-06, "loss": 0.6724, "step": 4373 }, { "epoch": 0.46, "grad_norm": 2.151920515191103, "learning_rate": 5.882409980062554e-06, "loss": 0.6619, "step": 4374 }, { "epoch": 0.46, "grad_norm": 4.056944032442784, "learning_rate": 5.880732440608059e-06, "loss": 0.689, "step": 4375 }, { "epoch": 0.46, "grad_norm": 2.8201326385968266, "learning_rate": 5.879054798832612e-06, "loss": 0.6328, "step": 4376 }, { "epoch": 0.46, "grad_norm": 1.9966584556095535, "learning_rate": 5.877377054931122e-06, "loss": 0.6494, "step": 4377 }, { "epoch": 0.46, "grad_norm": 2.3102019347667095, "learning_rate": 5.8756992090985e-06, "loss": 0.6443, "step": 4378 }, { "epoch": 0.46, "grad_norm": 2.7187452579507503, "learning_rate": 5.874021261529675e-06, "loss": 0.778, "step": 4379 }, { "epoch": 0.46, "grad_norm": 3.9391200420774966, "learning_rate": 5.872343212419589e-06, "loss": 0.7402, "step": 4380 }, { "epoch": 0.46, "grad_norm": 3.342650913183981, "learning_rate": 5.870665061963188e-06, "loss": 0.6401, "step": 4381 }, { "epoch": 0.46, "grad_norm": 3.10923752534272, "learning_rate": 5.868986810355437e-06, "loss": 0.6497, "step": 4382 }, { "epoch": 0.46, "grad_norm": 2.9439328630070154, "learning_rate": 5.867308457791311e-06, "loss": 0.6287, "step": 4383 }, { "epoch": 0.46, "grad_norm": 2.1724951171650644, "learning_rate": 5.865630004465796e-06, "loss": 0.6773, "step": 4384 }, { "epoch": 0.46, "grad_norm": 2.2545948900225095, "learning_rate": 5.8639514505738885e-06, "loss": 0.7132, "step": 4385 }, { "epoch": 0.46, "grad_norm": 2.339764871054603, "learning_rate": 5.8622727963106e-06, "loss": 0.6558, "step": 4386 }, { "epoch": 0.46, "grad_norm": 6.264396932081793, "learning_rate": 5.860594041870948e-06, "loss": 0.564, "step": 4387 }, { "epoch": 0.46, "grad_norm": 3.0726383532297126, "learning_rate": 5.85891518744997e-06, "loss": 0.6943, "step": 4388 }, { "epoch": 0.46, "grad_norm": 3.819599638111385, "learning_rate": 5.857236233242709e-06, "loss": 0.6919, "step": 4389 }, { "epoch": 0.46, "grad_norm": 2.55942614466353, "learning_rate": 5.855557179444219e-06, "loss": 0.6243, "step": 4390 }, { "epoch": 0.46, "grad_norm": 2.1630742426007186, "learning_rate": 5.8538780262495695e-06, "loss": 0.59, "step": 4391 }, { "epoch": 0.46, "grad_norm": 2.6158551871983122, "learning_rate": 5.85219877385384e-06, "loss": 0.7309, "step": 4392 }, { "epoch": 0.46, "grad_norm": 2.212409520666976, "learning_rate": 5.8505194224521204e-06, "loss": 0.5875, "step": 4393 }, { "epoch": 0.46, "grad_norm": 2.3813643477613318, "learning_rate": 5.848839972239512e-06, "loss": 0.6635, "step": 4394 }, { "epoch": 0.46, "grad_norm": 2.4275155873597285, "learning_rate": 5.847160423411129e-06, "loss": 0.6709, "step": 4395 }, { "epoch": 0.46, "grad_norm": 4.826967263615847, "learning_rate": 5.845480776162097e-06, "loss": 0.6407, "step": 4396 }, { "epoch": 0.46, "grad_norm": 2.306596082817607, "learning_rate": 5.843801030687555e-06, "loss": 0.6385, "step": 4397 }, { "epoch": 0.46, "grad_norm": 1.9985283159296727, "learning_rate": 5.842121187182644e-06, "loss": 0.6064, "step": 4398 }, { "epoch": 0.46, "grad_norm": 3.2091727302195725, "learning_rate": 5.840441245842532e-06, "loss": 0.74, "step": 4399 }, { "epoch": 0.46, "grad_norm": 2.8692803940553713, "learning_rate": 5.838761206862382e-06, "loss": 0.6428, "step": 4400 }, { "epoch": 0.46, "grad_norm": 2.125632738179534, "learning_rate": 5.837081070437383e-06, "loss": 0.7399, "step": 4401 }, { "epoch": 0.46, "grad_norm": 3.397361281528623, "learning_rate": 5.835400836762723e-06, "loss": 0.6768, "step": 4402 }, { "epoch": 0.46, "grad_norm": 4.962196122812087, "learning_rate": 5.833720506033609e-06, "loss": 0.5604, "step": 4403 }, { "epoch": 0.46, "grad_norm": 2.4524501870949416, "learning_rate": 5.8320400784452566e-06, "loss": 0.6212, "step": 4404 }, { "epoch": 0.46, "grad_norm": 2.242962121068071, "learning_rate": 5.830359554192894e-06, "loss": 0.6391, "step": 4405 }, { "epoch": 0.46, "grad_norm": 2.4880079804921844, "learning_rate": 5.828678933471758e-06, "loss": 0.6894, "step": 4406 }, { "epoch": 0.46, "grad_norm": 0.9982078926059621, "learning_rate": 5.826998216477097e-06, "loss": 0.5715, "step": 4407 }, { "epoch": 0.46, "grad_norm": 3.39200422891883, "learning_rate": 5.825317403404177e-06, "loss": 0.7124, "step": 4408 }, { "epoch": 0.46, "grad_norm": 2.5758571917643183, "learning_rate": 5.823636494448265e-06, "loss": 0.6916, "step": 4409 }, { "epoch": 0.46, "grad_norm": 2.0155720379239783, "learning_rate": 5.821955489804647e-06, "loss": 0.5974, "step": 4410 }, { "epoch": 0.46, "grad_norm": 2.4690764237144096, "learning_rate": 5.820274389668614e-06, "loss": 0.6291, "step": 4411 }, { "epoch": 0.46, "grad_norm": 2.4569169908883968, "learning_rate": 5.818593194235475e-06, "loss": 0.675, "step": 4412 }, { "epoch": 0.46, "grad_norm": 2.0672128706646213, "learning_rate": 5.816911903700546e-06, "loss": 0.6804, "step": 4413 }, { "epoch": 0.46, "grad_norm": 1.9976719347095646, "learning_rate": 5.815230518259153e-06, "loss": 0.6844, "step": 4414 }, { "epoch": 0.46, "grad_norm": 2.564923614303906, "learning_rate": 5.813549038106635e-06, "loss": 0.6927, "step": 4415 }, { "epoch": 0.46, "grad_norm": 2.3811759370101084, "learning_rate": 5.811867463438341e-06, "loss": 0.628, "step": 4416 }, { "epoch": 0.46, "grad_norm": 2.0628079283250282, "learning_rate": 5.810185794449633e-06, "loss": 0.5905, "step": 4417 }, { "epoch": 0.46, "grad_norm": 2.582538159728672, "learning_rate": 5.80850403133588e-06, "loss": 0.7048, "step": 4418 }, { "epoch": 0.47, "grad_norm": 2.4157594950729466, "learning_rate": 5.806822174292467e-06, "loss": 0.5557, "step": 4419 }, { "epoch": 0.47, "grad_norm": 0.990310949015315, "learning_rate": 5.805140223514785e-06, "loss": 0.5572, "step": 4420 }, { "epoch": 0.47, "grad_norm": 3.0489038974046543, "learning_rate": 5.80345817919824e-06, "loss": 0.6249, "step": 4421 }, { "epoch": 0.47, "grad_norm": 2.060292665650145, "learning_rate": 5.801776041538245e-06, "loss": 0.6035, "step": 4422 }, { "epoch": 0.47, "grad_norm": 2.091882121485412, "learning_rate": 5.800093810730229e-06, "loss": 0.6245, "step": 4423 }, { "epoch": 0.47, "grad_norm": 3.179974949588236, "learning_rate": 5.798411486969626e-06, "loss": 0.6589, "step": 4424 }, { "epoch": 0.47, "grad_norm": 2.138657402068139, "learning_rate": 5.796729070451884e-06, "loss": 0.6545, "step": 4425 }, { "epoch": 0.47, "grad_norm": 2.233621588999477, "learning_rate": 5.795046561372463e-06, "loss": 0.681, "step": 4426 }, { "epoch": 0.47, "grad_norm": 3.004996496095365, "learning_rate": 5.79336395992683e-06, "loss": 0.6399, "step": 4427 }, { "epoch": 0.47, "grad_norm": 2.069729876929192, "learning_rate": 5.791681266310465e-06, "loss": 0.6049, "step": 4428 }, { "epoch": 0.47, "grad_norm": 2.4465891459726623, "learning_rate": 5.78999848071886e-06, "loss": 0.6035, "step": 4429 }, { "epoch": 0.47, "grad_norm": 2.2899552504847716, "learning_rate": 5.788315603347515e-06, "loss": 0.6646, "step": 4430 }, { "epoch": 0.47, "grad_norm": 2.750752115170631, "learning_rate": 5.78663263439194e-06, "loss": 0.6953, "step": 4431 }, { "epoch": 0.47, "grad_norm": 3.2813221898068594, "learning_rate": 5.7849495740476625e-06, "loss": 0.7107, "step": 4432 }, { "epoch": 0.47, "grad_norm": 4.412245921640548, "learning_rate": 5.783266422510211e-06, "loss": 0.6862, "step": 4433 }, { "epoch": 0.47, "grad_norm": 2.1713157933842573, "learning_rate": 5.781583179975132e-06, "loss": 0.7024, "step": 4434 }, { "epoch": 0.47, "grad_norm": 2.5292807136122497, "learning_rate": 5.779899846637976e-06, "loss": 0.6344, "step": 4435 }, { "epoch": 0.47, "grad_norm": 2.638992479723519, "learning_rate": 5.778216422694312e-06, "loss": 0.6147, "step": 4436 }, { "epoch": 0.47, "grad_norm": 2.253511934867257, "learning_rate": 5.776532908339713e-06, "loss": 0.6775, "step": 4437 }, { "epoch": 0.47, "grad_norm": 2.5189103997047404, "learning_rate": 5.774849303769767e-06, "loss": 0.6079, "step": 4438 }, { "epoch": 0.47, "grad_norm": 1.9551608372390805, "learning_rate": 5.773165609180067e-06, "loss": 0.6591, "step": 4439 }, { "epoch": 0.47, "grad_norm": 2.0567111687412636, "learning_rate": 5.771481824766222e-06, "loss": 0.6149, "step": 4440 }, { "epoch": 0.47, "grad_norm": 2.063349581374693, "learning_rate": 5.769797950723848e-06, "loss": 0.6967, "step": 4441 }, { "epoch": 0.47, "grad_norm": 2.1228306731836386, "learning_rate": 5.7681139872485744e-06, "loss": 0.6664, "step": 4442 }, { "epoch": 0.47, "grad_norm": 2.793644978792627, "learning_rate": 5.766429934536037e-06, "loss": 0.5862, "step": 4443 }, { "epoch": 0.47, "grad_norm": 1.0595037162948158, "learning_rate": 5.764745792781886e-06, "loss": 0.5796, "step": 4444 }, { "epoch": 0.47, "grad_norm": 2.6360229263832173, "learning_rate": 5.763061562181781e-06, "loss": 0.5961, "step": 4445 }, { "epoch": 0.47, "grad_norm": 2.317300668949924, "learning_rate": 5.761377242931386e-06, "loss": 0.6888, "step": 4446 }, { "epoch": 0.47, "grad_norm": 2.3873429958437904, "learning_rate": 5.759692835226387e-06, "loss": 0.6889, "step": 4447 }, { "epoch": 0.47, "grad_norm": 2.2136534312552785, "learning_rate": 5.75800833926247e-06, "loss": 0.6915, "step": 4448 }, { "epoch": 0.47, "grad_norm": 3.0974828764619353, "learning_rate": 5.756323755235334e-06, "loss": 0.5588, "step": 4449 }, { "epoch": 0.47, "grad_norm": 2.4333697468641398, "learning_rate": 5.754639083340691e-06, "loss": 0.7895, "step": 4450 }, { "epoch": 0.47, "grad_norm": 3.480667649075754, "learning_rate": 5.752954323774261e-06, "loss": 0.6759, "step": 4451 }, { "epoch": 0.47, "grad_norm": 2.26250369737515, "learning_rate": 5.751269476731775e-06, "loss": 0.5934, "step": 4452 }, { "epoch": 0.47, "grad_norm": 1.1003678544450188, "learning_rate": 5.749584542408971e-06, "loss": 0.5815, "step": 4453 }, { "epoch": 0.47, "grad_norm": 2.3078642309343578, "learning_rate": 5.747899521001603e-06, "loss": 0.6309, "step": 4454 }, { "epoch": 0.47, "grad_norm": 0.9538174909326671, "learning_rate": 5.74621441270543e-06, "loss": 0.5846, "step": 4455 }, { "epoch": 0.47, "grad_norm": 2.3481363612900386, "learning_rate": 5.744529217716225e-06, "loss": 0.6627, "step": 4456 }, { "epoch": 0.47, "grad_norm": 2.4288667378870494, "learning_rate": 5.742843936229765e-06, "loss": 0.6473, "step": 4457 }, { "epoch": 0.47, "grad_norm": 12.117953706010812, "learning_rate": 5.741158568441846e-06, "loss": 0.6602, "step": 4458 }, { "epoch": 0.47, "grad_norm": 2.9795673957075657, "learning_rate": 5.739473114548266e-06, "loss": 0.6127, "step": 4459 }, { "epoch": 0.47, "grad_norm": 2.184917637480841, "learning_rate": 5.737787574744837e-06, "loss": 0.7399, "step": 4460 }, { "epoch": 0.47, "grad_norm": 3.0453232292377943, "learning_rate": 5.736101949227382e-06, "loss": 0.6086, "step": 4461 }, { "epoch": 0.47, "grad_norm": 2.146930741168219, "learning_rate": 5.734416238191729e-06, "loss": 0.6511, "step": 4462 }, { "epoch": 0.47, "grad_norm": 2.0161096829790184, "learning_rate": 5.73273044183372e-06, "loss": 0.5465, "step": 4463 }, { "epoch": 0.47, "grad_norm": 3.4064385189492246, "learning_rate": 5.7310445603492085e-06, "loss": 0.6235, "step": 4464 }, { "epoch": 0.47, "grad_norm": 2.1516478687948544, "learning_rate": 5.729358593934051e-06, "loss": 0.6553, "step": 4465 }, { "epoch": 0.47, "grad_norm": 2.013275127090678, "learning_rate": 5.727672542784122e-06, "loss": 0.631, "step": 4466 }, { "epoch": 0.47, "grad_norm": 2.21296220940741, "learning_rate": 5.7259864070953e-06, "loss": 0.6722, "step": 4467 }, { "epoch": 0.47, "grad_norm": 2.0788929579185234, "learning_rate": 5.724300187063474e-06, "loss": 0.6972, "step": 4468 }, { "epoch": 0.47, "grad_norm": 2.2057902914842415, "learning_rate": 5.722613882884549e-06, "loss": 0.6598, "step": 4469 }, { "epoch": 0.47, "grad_norm": 2.3957705076731886, "learning_rate": 5.720927494754429e-06, "loss": 0.6991, "step": 4470 }, { "epoch": 0.47, "grad_norm": 10.12211879589646, "learning_rate": 5.719241022869039e-06, "loss": 0.6594, "step": 4471 }, { "epoch": 0.47, "grad_norm": 3.490904530002642, "learning_rate": 5.7175544674243044e-06, "loss": 0.5995, "step": 4472 }, { "epoch": 0.47, "grad_norm": 5.934704124433464, "learning_rate": 5.715867828616167e-06, "loss": 0.6718, "step": 4473 }, { "epoch": 0.47, "grad_norm": 2.518726432040352, "learning_rate": 5.714181106640575e-06, "loss": 0.7023, "step": 4474 }, { "epoch": 0.47, "grad_norm": 2.2024550220706205, "learning_rate": 5.712494301693486e-06, "loss": 0.6435, "step": 4475 }, { "epoch": 0.47, "grad_norm": 4.867057001543077, "learning_rate": 5.710807413970868e-06, "loss": 0.6585, "step": 4476 }, { "epoch": 0.47, "grad_norm": 2.3892252500285447, "learning_rate": 5.709120443668701e-06, "loss": 0.6105, "step": 4477 }, { "epoch": 0.47, "grad_norm": 2.751702391769238, "learning_rate": 5.707433390982969e-06, "loss": 0.6801, "step": 4478 }, { "epoch": 0.47, "grad_norm": 2.2281246093897633, "learning_rate": 5.705746256109671e-06, "loss": 0.6254, "step": 4479 }, { "epoch": 0.47, "grad_norm": 3.2065787831492654, "learning_rate": 5.704059039244814e-06, "loss": 0.6247, "step": 4480 }, { "epoch": 0.47, "grad_norm": 3.4211374534691665, "learning_rate": 5.7023717405844114e-06, "loss": 0.6601, "step": 4481 }, { "epoch": 0.47, "grad_norm": 8.97147154570612, "learning_rate": 5.700684360324492e-06, "loss": 0.6468, "step": 4482 }, { "epoch": 0.47, "grad_norm": 5.36936656327254, "learning_rate": 5.6989968986610876e-06, "loss": 0.6256, "step": 4483 }, { "epoch": 0.47, "grad_norm": 2.6783669889814394, "learning_rate": 5.697309355790246e-06, "loss": 0.7214, "step": 4484 }, { "epoch": 0.47, "grad_norm": 2.3024950794881716, "learning_rate": 5.695621731908018e-06, "loss": 0.6098, "step": 4485 }, { "epoch": 0.47, "grad_norm": 2.2375276123528454, "learning_rate": 5.693934027210468e-06, "loss": 0.6077, "step": 4486 }, { "epoch": 0.47, "grad_norm": 2.3754390849560383, "learning_rate": 5.692246241893669e-06, "loss": 0.6369, "step": 4487 }, { "epoch": 0.47, "grad_norm": 2.1211748838319964, "learning_rate": 5.6905583761537034e-06, "loss": 0.6388, "step": 4488 }, { "epoch": 0.47, "grad_norm": 3.868019959818687, "learning_rate": 5.68887043018666e-06, "loss": 0.6607, "step": 4489 }, { "epoch": 0.47, "grad_norm": 2.33570658417535, "learning_rate": 5.687182404188642e-06, "loss": 0.6159, "step": 4490 }, { "epoch": 0.47, "grad_norm": 2.8823599155269486, "learning_rate": 5.68549429835576e-06, "loss": 0.535, "step": 4491 }, { "epoch": 0.47, "grad_norm": 2.743721547670784, "learning_rate": 5.6838061128841294e-06, "loss": 0.6538, "step": 4492 }, { "epoch": 0.47, "grad_norm": 3.8002300637615645, "learning_rate": 5.682117847969884e-06, "loss": 0.6659, "step": 4493 }, { "epoch": 0.47, "grad_norm": 1.0584871886426965, "learning_rate": 5.680429503809157e-06, "loss": 0.5985, "step": 4494 }, { "epoch": 0.47, "grad_norm": 3.525503447996165, "learning_rate": 5.678741080598098e-06, "loss": 0.5908, "step": 4495 }, { "epoch": 0.47, "grad_norm": 2.687907042607034, "learning_rate": 5.6770525785328625e-06, "loss": 0.6845, "step": 4496 }, { "epoch": 0.47, "grad_norm": 2.304902889828247, "learning_rate": 5.675363997809616e-06, "loss": 0.6827, "step": 4497 }, { "epoch": 0.47, "grad_norm": 2.4406252620899282, "learning_rate": 5.6736753386245315e-06, "loss": 0.5224, "step": 4498 }, { "epoch": 0.47, "grad_norm": 2.5325199094461035, "learning_rate": 5.6719866011737934e-06, "loss": 0.6689, "step": 4499 }, { "epoch": 0.47, "grad_norm": 2.375682344749398, "learning_rate": 5.670297785653596e-06, "loss": 0.6651, "step": 4500 }, { "epoch": 0.47, "grad_norm": 2.425658142737432, "learning_rate": 5.668608892260138e-06, "loss": 0.7422, "step": 4501 }, { "epoch": 0.47, "grad_norm": 2.0617808401615054, "learning_rate": 5.666919921189632e-06, "loss": 0.6425, "step": 4502 }, { "epoch": 0.47, "grad_norm": 2.272673910971418, "learning_rate": 5.665230872638297e-06, "loss": 0.648, "step": 4503 }, { "epoch": 0.47, "grad_norm": 2.572982484112035, "learning_rate": 5.6635417468023635e-06, "loss": 0.6951, "step": 4504 }, { "epoch": 0.47, "grad_norm": 3.0084581361854075, "learning_rate": 5.661852543878067e-06, "loss": 0.6481, "step": 4505 }, { "epoch": 0.47, "grad_norm": 2.0157127811635567, "learning_rate": 5.660163264061656e-06, "loss": 0.585, "step": 4506 }, { "epoch": 0.47, "grad_norm": 2.5432077638654644, "learning_rate": 5.6584739075493835e-06, "loss": 0.6619, "step": 4507 }, { "epoch": 0.47, "grad_norm": 2.5133640279691036, "learning_rate": 5.656784474537518e-06, "loss": 0.612, "step": 4508 }, { "epoch": 0.47, "grad_norm": 2.9315990055007775, "learning_rate": 5.65509496522233e-06, "loss": 0.6136, "step": 4509 }, { "epoch": 0.47, "grad_norm": 3.2810358608137027, "learning_rate": 5.653405379800102e-06, "loss": 0.6993, "step": 4510 }, { "epoch": 0.47, "grad_norm": 2.493229989865747, "learning_rate": 5.651715718467127e-06, "loss": 0.657, "step": 4511 }, { "epoch": 0.47, "grad_norm": 2.6802549573204453, "learning_rate": 5.6500259814197025e-06, "loss": 0.6624, "step": 4512 }, { "epoch": 0.47, "grad_norm": 2.625253414667082, "learning_rate": 5.648336168854139e-06, "loss": 0.6646, "step": 4513 }, { "epoch": 0.48, "grad_norm": 2.3868204390514567, "learning_rate": 5.646646280966755e-06, "loss": 0.6926, "step": 4514 }, { "epoch": 0.48, "grad_norm": 2.4290428676146343, "learning_rate": 5.6449563179538734e-06, "loss": 0.5732, "step": 4515 }, { "epoch": 0.48, "grad_norm": 2.6813004334794983, "learning_rate": 5.64326628001183e-06, "loss": 0.6179, "step": 4516 }, { "epoch": 0.48, "grad_norm": 2.2908656332176767, "learning_rate": 5.641576167336972e-06, "loss": 0.7049, "step": 4517 }, { "epoch": 0.48, "grad_norm": 2.5593906258069583, "learning_rate": 5.639885980125649e-06, "loss": 0.5926, "step": 4518 }, { "epoch": 0.48, "grad_norm": 2.7153221598085593, "learning_rate": 5.638195718574222e-06, "loss": 0.7009, "step": 4519 }, { "epoch": 0.48, "grad_norm": 2.570454238663235, "learning_rate": 5.636505382879061e-06, "loss": 0.6327, "step": 4520 }, { "epoch": 0.48, "grad_norm": 2.8531174704597646, "learning_rate": 5.6348149732365465e-06, "loss": 0.6287, "step": 4521 }, { "epoch": 0.48, "grad_norm": 6.518867643695685, "learning_rate": 5.633124489843063e-06, "loss": 0.6584, "step": 4522 }, { "epoch": 0.48, "grad_norm": 3.940669081934954, "learning_rate": 5.631433932895005e-06, "loss": 0.7226, "step": 4523 }, { "epoch": 0.48, "grad_norm": 2.262500330185508, "learning_rate": 5.62974330258878e-06, "loss": 0.5943, "step": 4524 }, { "epoch": 0.48, "grad_norm": 2.668293506438283, "learning_rate": 5.6280525991207954e-06, "loss": 0.6074, "step": 4525 }, { "epoch": 0.48, "grad_norm": 2.5241175283242465, "learning_rate": 5.626361822687478e-06, "loss": 0.6424, "step": 4526 }, { "epoch": 0.48, "grad_norm": 2.3985700650396047, "learning_rate": 5.6246709734852535e-06, "loss": 0.6714, "step": 4527 }, { "epoch": 0.48, "grad_norm": 2.762272787782166, "learning_rate": 5.6229800517105615e-06, "loss": 0.7199, "step": 4528 }, { "epoch": 0.48, "grad_norm": 3.74146694769342, "learning_rate": 5.621289057559847e-06, "loss": 0.6983, "step": 4529 }, { "epoch": 0.48, "grad_norm": 2.5748292058521938, "learning_rate": 5.619597991229566e-06, "loss": 0.6199, "step": 4530 }, { "epoch": 0.48, "grad_norm": 2.9437147149445138, "learning_rate": 5.617906852916183e-06, "loss": 0.6011, "step": 4531 }, { "epoch": 0.48, "grad_norm": 23.92235581251259, "learning_rate": 5.6162156428161665e-06, "loss": 0.6263, "step": 4532 }, { "epoch": 0.48, "grad_norm": 3.087366386179771, "learning_rate": 5.614524361125998e-06, "loss": 0.5655, "step": 4533 }, { "epoch": 0.48, "grad_norm": 3.503816305177784, "learning_rate": 5.612833008042166e-06, "loss": 0.6989, "step": 4534 }, { "epoch": 0.48, "grad_norm": 2.0399171517394388, "learning_rate": 5.611141583761167e-06, "loss": 0.5931, "step": 4535 }, { "epoch": 0.48, "grad_norm": 2.4370559310970306, "learning_rate": 5.609450088479506e-06, "loss": 0.6178, "step": 4536 }, { "epoch": 0.48, "grad_norm": 2.033431572385822, "learning_rate": 5.607758522393693e-06, "loss": 0.5607, "step": 4537 }, { "epoch": 0.48, "grad_norm": 6.41604755439263, "learning_rate": 5.6060668857002545e-06, "loss": 0.6314, "step": 4538 }, { "epoch": 0.48, "grad_norm": 2.3686567490270187, "learning_rate": 5.604375178595715e-06, "loss": 0.6557, "step": 4539 }, { "epoch": 0.48, "grad_norm": 2.1562680126957847, "learning_rate": 5.6026834012766155e-06, "loss": 0.6312, "step": 4540 }, { "epoch": 0.48, "grad_norm": 2.498322304260171, "learning_rate": 5.600991553939501e-06, "loss": 0.7011, "step": 4541 }, { "epoch": 0.48, "grad_norm": 2.7681759119595353, "learning_rate": 5.5992996367809236e-06, "loss": 0.6749, "step": 4542 }, { "epoch": 0.48, "grad_norm": 3.1869566753671816, "learning_rate": 5.597607649997449e-06, "loss": 0.6855, "step": 4543 }, { "epoch": 0.48, "grad_norm": 2.3926645915251172, "learning_rate": 5.595915593785644e-06, "loss": 0.676, "step": 4544 }, { "epoch": 0.48, "grad_norm": 2.7435380969095537, "learning_rate": 5.594223468342087e-06, "loss": 0.5655, "step": 4545 }, { "epoch": 0.48, "grad_norm": 2.0698113787263352, "learning_rate": 5.592531273863367e-06, "loss": 0.5551, "step": 4546 }, { "epoch": 0.48, "grad_norm": 2.66821684730331, "learning_rate": 5.590839010546074e-06, "loss": 0.6269, "step": 4547 }, { "epoch": 0.48, "grad_norm": 2.6735786158961927, "learning_rate": 5.589146678586814e-06, "loss": 0.5896, "step": 4548 }, { "epoch": 0.48, "grad_norm": 2.556035989864172, "learning_rate": 5.587454278182196e-06, "loss": 0.7271, "step": 4549 }, { "epoch": 0.48, "grad_norm": 2.5030779108925056, "learning_rate": 5.585761809528839e-06, "loss": 0.5887, "step": 4550 }, { "epoch": 0.48, "grad_norm": 2.6285299123478363, "learning_rate": 5.584069272823367e-06, "loss": 0.6228, "step": 4551 }, { "epoch": 0.48, "grad_norm": 4.742738953366397, "learning_rate": 5.582376668262415e-06, "loss": 0.5916, "step": 4552 }, { "epoch": 0.48, "grad_norm": 2.051227649046525, "learning_rate": 5.580683996042625e-06, "loss": 0.5793, "step": 4553 }, { "epoch": 0.48, "grad_norm": 2.683953068169987, "learning_rate": 5.578991256360649e-06, "loss": 0.6996, "step": 4554 }, { "epoch": 0.48, "grad_norm": 2.4168596403111824, "learning_rate": 5.577298449413141e-06, "loss": 0.59, "step": 4555 }, { "epoch": 0.48, "grad_norm": 2.5891727673576144, "learning_rate": 5.575605575396767e-06, "loss": 0.635, "step": 4556 }, { "epoch": 0.48, "grad_norm": 3.164368406974795, "learning_rate": 5.573912634508203e-06, "loss": 0.6719, "step": 4557 }, { "epoch": 0.48, "grad_norm": 2.1467160524829803, "learning_rate": 5.572219626944128e-06, "loss": 0.6302, "step": 4558 }, { "epoch": 0.48, "grad_norm": 2.9545578011130686, "learning_rate": 5.5705265529012295e-06, "loss": 0.6129, "step": 4559 }, { "epoch": 0.48, "grad_norm": 2.397337716611551, "learning_rate": 5.5688334125762065e-06, "loss": 0.577, "step": 4560 }, { "epoch": 0.48, "grad_norm": 2.5141297243805707, "learning_rate": 5.567140206165762e-06, "loss": 0.6222, "step": 4561 }, { "epoch": 0.48, "grad_norm": 2.5018217036974577, "learning_rate": 5.565446933866607e-06, "loss": 0.6108, "step": 4562 }, { "epoch": 0.48, "grad_norm": 2.280655074209046, "learning_rate": 5.563753595875463e-06, "loss": 0.6282, "step": 4563 }, { "epoch": 0.48, "grad_norm": 6.359831147248012, "learning_rate": 5.562060192389054e-06, "loss": 0.6622, "step": 4564 }, { "epoch": 0.48, "grad_norm": 2.2107889624685333, "learning_rate": 5.560366723604117e-06, "loss": 0.6035, "step": 4565 }, { "epoch": 0.48, "grad_norm": 2.5994180305445815, "learning_rate": 5.558673189717395e-06, "loss": 0.6777, "step": 4566 }, { "epoch": 0.48, "grad_norm": 2.7545314951405593, "learning_rate": 5.556979590925636e-06, "loss": 0.5888, "step": 4567 }, { "epoch": 0.48, "grad_norm": 2.364332000424429, "learning_rate": 5.555285927425599e-06, "loss": 0.6485, "step": 4568 }, { "epoch": 0.48, "grad_norm": 2.5857119479401773, "learning_rate": 5.553592199414047e-06, "loss": 0.6616, "step": 4569 }, { "epoch": 0.48, "grad_norm": 3.15710865319317, "learning_rate": 5.551898407087754e-06, "loss": 0.6485, "step": 4570 }, { "epoch": 0.48, "grad_norm": 2.8154040551745236, "learning_rate": 5.550204550643501e-06, "loss": 0.6448, "step": 4571 }, { "epoch": 0.48, "grad_norm": 2.8762614479514133, "learning_rate": 5.548510630278073e-06, "loss": 0.5749, "step": 4572 }, { "epoch": 0.48, "grad_norm": 3.430020443992809, "learning_rate": 5.5468166461882645e-06, "loss": 0.694, "step": 4573 }, { "epoch": 0.48, "grad_norm": 2.010935891713146, "learning_rate": 5.545122598570879e-06, "loss": 0.5862, "step": 4574 }, { "epoch": 0.48, "grad_norm": 2.755230997400163, "learning_rate": 5.543428487622727e-06, "loss": 0.6561, "step": 4575 }, { "epoch": 0.48, "grad_norm": 4.059923185258385, "learning_rate": 5.5417343135406206e-06, "loss": 0.6963, "step": 4576 }, { "epoch": 0.48, "grad_norm": 4.790230956470769, "learning_rate": 5.54004007652139e-06, "loss": 0.5903, "step": 4577 }, { "epoch": 0.48, "grad_norm": 2.829573606090716, "learning_rate": 5.5383457767618655e-06, "loss": 0.6433, "step": 4578 }, { "epoch": 0.48, "grad_norm": 3.3802992751931424, "learning_rate": 5.5366514144588835e-06, "loss": 0.6504, "step": 4579 }, { "epoch": 0.48, "grad_norm": 2.87227842810873, "learning_rate": 5.534956989809293e-06, "loss": 0.6567, "step": 4580 }, { "epoch": 0.48, "grad_norm": 3.680652735847989, "learning_rate": 5.533262503009944e-06, "loss": 0.6889, "step": 4581 }, { "epoch": 0.48, "grad_norm": 3.381512231517992, "learning_rate": 5.5315679542577e-06, "loss": 0.6544, "step": 4582 }, { "epoch": 0.48, "grad_norm": 2.546877424221789, "learning_rate": 5.529873343749428e-06, "loss": 0.6369, "step": 4583 }, { "epoch": 0.48, "grad_norm": 3.319093075019239, "learning_rate": 5.528178671682002e-06, "loss": 0.5969, "step": 4584 }, { "epoch": 0.48, "grad_norm": 2.453302660315127, "learning_rate": 5.5264839382523035e-06, "loss": 0.5925, "step": 4585 }, { "epoch": 0.48, "grad_norm": 2.3352208256933125, "learning_rate": 5.524789143657226e-06, "loss": 0.6313, "step": 4586 }, { "epoch": 0.48, "grad_norm": 3.6803200585589213, "learning_rate": 5.523094288093659e-06, "loss": 0.6757, "step": 4587 }, { "epoch": 0.48, "grad_norm": 3.4253926993814092, "learning_rate": 5.521399371758511e-06, "loss": 0.6969, "step": 4588 }, { "epoch": 0.48, "grad_norm": 3.223973921567959, "learning_rate": 5.519704394848693e-06, "loss": 0.6356, "step": 4589 }, { "epoch": 0.48, "grad_norm": 3.6909496271121074, "learning_rate": 5.518009357561119e-06, "loss": 0.6763, "step": 4590 }, { "epoch": 0.48, "grad_norm": 2.315901845822254, "learning_rate": 5.516314260092717e-06, "loss": 0.6356, "step": 4591 }, { "epoch": 0.48, "grad_norm": 2.885139970332389, "learning_rate": 5.514619102640415e-06, "loss": 0.6069, "step": 4592 }, { "epoch": 0.48, "grad_norm": 2.566257463657484, "learning_rate": 5.512923885401154e-06, "loss": 0.6844, "step": 4593 }, { "epoch": 0.48, "grad_norm": 1.1239709742116073, "learning_rate": 5.511228608571879e-06, "loss": 0.5754, "step": 4594 }, { "epoch": 0.48, "grad_norm": 3.1944023152638357, "learning_rate": 5.5095332723495425e-06, "loss": 0.6047, "step": 4595 }, { "epoch": 0.48, "grad_norm": 3.0417150262269077, "learning_rate": 5.507837876931102e-06, "loss": 0.6709, "step": 4596 }, { "epoch": 0.48, "grad_norm": 3.1835449500447512, "learning_rate": 5.506142422513525e-06, "loss": 0.6766, "step": 4597 }, { "epoch": 0.48, "grad_norm": 2.4531359904321675, "learning_rate": 5.504446909293786e-06, "loss": 0.6872, "step": 4598 }, { "epoch": 0.48, "grad_norm": 2.9575039883577126, "learning_rate": 5.502751337468862e-06, "loss": 0.6198, "step": 4599 }, { "epoch": 0.48, "grad_norm": 3.254352953269388, "learning_rate": 5.5010557072357395e-06, "loss": 0.7175, "step": 4600 }, { "epoch": 0.48, "grad_norm": 2.5317638836045275, "learning_rate": 5.499360018791416e-06, "loss": 0.626, "step": 4601 }, { "epoch": 0.48, "grad_norm": 3.080250323596298, "learning_rate": 5.497664272332888e-06, "loss": 0.6564, "step": 4602 }, { "epoch": 0.48, "grad_norm": 4.791033529732766, "learning_rate": 5.495968468057164e-06, "loss": 0.6691, "step": 4603 }, { "epoch": 0.48, "grad_norm": 3.576002951182326, "learning_rate": 5.4942726061612564e-06, "loss": 0.7081, "step": 4604 }, { "epoch": 0.48, "grad_norm": 6.002512038489308, "learning_rate": 5.492576686842186e-06, "loss": 0.5352, "step": 4605 }, { "epoch": 0.48, "grad_norm": 3.3897993264643347, "learning_rate": 5.49088071029698e-06, "loss": 0.6308, "step": 4606 }, { "epoch": 0.48, "grad_norm": 2.409849318201643, "learning_rate": 5.489184676722673e-06, "loss": 0.6008, "step": 4607 }, { "epoch": 0.48, "grad_norm": 2.5209078919290677, "learning_rate": 5.487488586316304e-06, "loss": 0.609, "step": 4608 }, { "epoch": 0.49, "grad_norm": 2.3412970675636466, "learning_rate": 5.485792439274919e-06, "loss": 0.644, "step": 4609 }, { "epoch": 0.49, "grad_norm": 0.9448249147025708, "learning_rate": 5.484096235795574e-06, "loss": 0.5841, "step": 4610 }, { "epoch": 0.49, "grad_norm": 5.4569880167790625, "learning_rate": 5.482399976075327e-06, "loss": 0.6169, "step": 4611 }, { "epoch": 0.49, "grad_norm": 4.038821439830503, "learning_rate": 5.4807036603112465e-06, "loss": 0.6924, "step": 4612 }, { "epoch": 0.49, "grad_norm": 2.1488134768098157, "learning_rate": 5.479007288700403e-06, "loss": 0.6386, "step": 4613 }, { "epoch": 0.49, "grad_norm": 3.0696890955921132, "learning_rate": 5.477310861439877e-06, "loss": 0.6493, "step": 4614 }, { "epoch": 0.49, "grad_norm": 2.5185747574560793, "learning_rate": 5.475614378726757e-06, "loss": 0.6438, "step": 4615 }, { "epoch": 0.49, "grad_norm": 2.0897327507354406, "learning_rate": 5.4739178407581315e-06, "loss": 0.5929, "step": 4616 }, { "epoch": 0.49, "grad_norm": 2.6311884639260477, "learning_rate": 5.4722212477311025e-06, "loss": 0.6394, "step": 4617 }, { "epoch": 0.49, "grad_norm": 2.269733417286331, "learning_rate": 5.470524599842773e-06, "loss": 0.5718, "step": 4618 }, { "epoch": 0.49, "grad_norm": 1.021071958636412, "learning_rate": 5.468827897290256e-06, "loss": 0.5784, "step": 4619 }, { "epoch": 0.49, "grad_norm": 3.5006735943403235, "learning_rate": 5.46713114027067e-06, "loss": 0.6056, "step": 4620 }, { "epoch": 0.49, "grad_norm": 1.0266506883030095, "learning_rate": 5.465434328981136e-06, "loss": 0.5808, "step": 4621 }, { "epoch": 0.49, "grad_norm": 3.8304477124966176, "learning_rate": 5.463737463618788e-06, "loss": 0.7006, "step": 4622 }, { "epoch": 0.49, "grad_norm": 2.515291735666162, "learning_rate": 5.462040544380764e-06, "loss": 0.7003, "step": 4623 }, { "epoch": 0.49, "grad_norm": 3.387353672942185, "learning_rate": 5.460343571464203e-06, "loss": 0.6504, "step": 4624 }, { "epoch": 0.49, "grad_norm": 4.141288708078109, "learning_rate": 5.458646545066258e-06, "loss": 0.5682, "step": 4625 }, { "epoch": 0.49, "grad_norm": 2.5542956306423674, "learning_rate": 5.456949465384082e-06, "loss": 0.6874, "step": 4626 }, { "epoch": 0.49, "grad_norm": 2.9922167702054705, "learning_rate": 5.455252332614839e-06, "loss": 0.6838, "step": 4627 }, { "epoch": 0.49, "grad_norm": 2.914946760609991, "learning_rate": 5.453555146955696e-06, "loss": 0.6331, "step": 4628 }, { "epoch": 0.49, "grad_norm": 3.352573285600877, "learning_rate": 5.451857908603826e-06, "loss": 0.635, "step": 4629 }, { "epoch": 0.49, "grad_norm": 2.6733703812603755, "learning_rate": 5.450160617756411e-06, "loss": 0.6508, "step": 4630 }, { "epoch": 0.49, "grad_norm": 2.349826396279672, "learning_rate": 5.448463274610637e-06, "loss": 0.6848, "step": 4631 }, { "epoch": 0.49, "grad_norm": 5.81980906330448, "learning_rate": 5.446765879363697e-06, "loss": 0.6457, "step": 4632 }, { "epoch": 0.49, "grad_norm": 2.651707782689453, "learning_rate": 5.445068432212787e-06, "loss": 0.6972, "step": 4633 }, { "epoch": 0.49, "grad_norm": 2.3655792841646512, "learning_rate": 5.443370933355114e-06, "loss": 0.5924, "step": 4634 }, { "epoch": 0.49, "grad_norm": 2.7313353670254203, "learning_rate": 5.441673382987886e-06, "loss": 0.616, "step": 4635 }, { "epoch": 0.49, "grad_norm": 2.4130469073654335, "learning_rate": 5.439975781308322e-06, "loss": 0.6315, "step": 4636 }, { "epoch": 0.49, "grad_norm": 3.597140459042695, "learning_rate": 5.4382781285136445e-06, "loss": 0.6355, "step": 4637 }, { "epoch": 0.49, "grad_norm": 2.2052663360438993, "learning_rate": 5.436580424801081e-06, "loss": 0.6403, "step": 4638 }, { "epoch": 0.49, "grad_norm": 2.0955992547403572, "learning_rate": 5.434882670367865e-06, "loss": 0.5755, "step": 4639 }, { "epoch": 0.49, "grad_norm": 2.7478098709325316, "learning_rate": 5.4331848654112374e-06, "loss": 0.713, "step": 4640 }, { "epoch": 0.49, "grad_norm": 3.099989483770273, "learning_rate": 5.431487010128445e-06, "loss": 0.6117, "step": 4641 }, { "epoch": 0.49, "grad_norm": 2.7349941290390514, "learning_rate": 5.4297891047167385e-06, "loss": 0.6223, "step": 4642 }, { "epoch": 0.49, "grad_norm": 1.0659832601686883, "learning_rate": 5.428091149373377e-06, "loss": 0.5199, "step": 4643 }, { "epoch": 0.49, "grad_norm": 9.61686418980415, "learning_rate": 5.426393144295623e-06, "loss": 0.6323, "step": 4644 }, { "epoch": 0.49, "grad_norm": 3.013180159122992, "learning_rate": 5.4246950896807445e-06, "loss": 0.6634, "step": 4645 }, { "epoch": 0.49, "grad_norm": 3.7524905717817365, "learning_rate": 5.422996985726019e-06, "loss": 0.5842, "step": 4646 }, { "epoch": 0.49, "grad_norm": 2.2515177137365296, "learning_rate": 5.421298832628729e-06, "loss": 0.6197, "step": 4647 }, { "epoch": 0.49, "grad_norm": 3.427073007294642, "learning_rate": 5.419600630586155e-06, "loss": 0.6538, "step": 4648 }, { "epoch": 0.49, "grad_norm": 2.743461241178892, "learning_rate": 5.417902379795593e-06, "loss": 0.6647, "step": 4649 }, { "epoch": 0.49, "grad_norm": 2.3931710581662897, "learning_rate": 5.416204080454343e-06, "loss": 0.6355, "step": 4650 }, { "epoch": 0.49, "grad_norm": 3.8194753476800356, "learning_rate": 5.414505732759704e-06, "loss": 0.6645, "step": 4651 }, { "epoch": 0.49, "grad_norm": 2.6326764418481194, "learning_rate": 5.412807336908987e-06, "loss": 0.6086, "step": 4652 }, { "epoch": 0.49, "grad_norm": 2.284399679999572, "learning_rate": 5.411108893099508e-06, "loss": 0.5637, "step": 4653 }, { "epoch": 0.49, "grad_norm": 6.7384502492836775, "learning_rate": 5.409410401528586e-06, "loss": 0.622, "step": 4654 }, { "epoch": 0.49, "grad_norm": 3.61822403374306, "learning_rate": 5.4077118623935476e-06, "loss": 0.6361, "step": 4655 }, { "epoch": 0.49, "grad_norm": 4.2720764272007425, "learning_rate": 5.406013275891723e-06, "loss": 0.7198, "step": 4656 }, { "epoch": 0.49, "grad_norm": 3.533871073074416, "learning_rate": 5.404314642220448e-06, "loss": 0.6521, "step": 4657 }, { "epoch": 0.49, "grad_norm": 2.5014645838243834, "learning_rate": 5.40261596157707e-06, "loss": 0.623, "step": 4658 }, { "epoch": 0.49, "grad_norm": 5.187282413928454, "learning_rate": 5.40091723415893e-06, "loss": 0.6687, "step": 4659 }, { "epoch": 0.49, "grad_norm": 2.210081592011959, "learning_rate": 5.399218460163387e-06, "loss": 0.5783, "step": 4660 }, { "epoch": 0.49, "grad_norm": 2.9686826745928863, "learning_rate": 5.397519639787796e-06, "loss": 0.6148, "step": 4661 }, { "epoch": 0.49, "grad_norm": 2.8313621442070436, "learning_rate": 5.395820773229523e-06, "loss": 0.6224, "step": 4662 }, { "epoch": 0.49, "grad_norm": 3.837090983146463, "learning_rate": 5.394121860685937e-06, "loss": 0.5873, "step": 4663 }, { "epoch": 0.49, "grad_norm": 2.7825956148863606, "learning_rate": 5.392422902354413e-06, "loss": 0.6691, "step": 4664 }, { "epoch": 0.49, "grad_norm": 3.3329282867648136, "learning_rate": 5.39072389843233e-06, "loss": 0.6057, "step": 4665 }, { "epoch": 0.49, "grad_norm": 3.5608187127868387, "learning_rate": 5.389024849117074e-06, "loss": 0.705, "step": 4666 }, { "epoch": 0.49, "grad_norm": 2.706689822341293, "learning_rate": 5.387325754606035e-06, "loss": 0.7137, "step": 4667 }, { "epoch": 0.49, "grad_norm": 2.5224351370813674, "learning_rate": 5.3856266150966094e-06, "loss": 0.6446, "step": 4668 }, { "epoch": 0.49, "grad_norm": 4.120747037478334, "learning_rate": 5.3839274307862e-06, "loss": 0.6706, "step": 4669 }, { "epoch": 0.49, "grad_norm": 3.6521412088636325, "learning_rate": 5.3822282018722085e-06, "loss": 0.7024, "step": 4670 }, { "epoch": 0.49, "grad_norm": 2.364628947418871, "learning_rate": 5.380528928552052e-06, "loss": 0.7261, "step": 4671 }, { "epoch": 0.49, "grad_norm": 2.7339482051464534, "learning_rate": 5.378829611023144e-06, "loss": 0.6781, "step": 4672 }, { "epoch": 0.49, "grad_norm": 2.478305759343481, "learning_rate": 5.377130249482907e-06, "loss": 0.6225, "step": 4673 }, { "epoch": 0.49, "grad_norm": 2.411215799916555, "learning_rate": 5.3754308441287675e-06, "loss": 0.6103, "step": 4674 }, { "epoch": 0.49, "grad_norm": 2.42557822909133, "learning_rate": 5.3737313951581575e-06, "loss": 0.5748, "step": 4675 }, { "epoch": 0.49, "grad_norm": 2.6013955730359046, "learning_rate": 5.372031902768514e-06, "loss": 0.5715, "step": 4676 }, { "epoch": 0.49, "grad_norm": 3.1819025201627946, "learning_rate": 5.370332367157281e-06, "loss": 0.7339, "step": 4677 }, { "epoch": 0.49, "grad_norm": 2.588545731682473, "learning_rate": 5.368632788521903e-06, "loss": 0.6315, "step": 4678 }, { "epoch": 0.49, "grad_norm": 3.008669398229263, "learning_rate": 5.3669331670598335e-06, "loss": 0.7626, "step": 4679 }, { "epoch": 0.49, "grad_norm": 3.43609159650542, "learning_rate": 5.36523350296853e-06, "loss": 0.7089, "step": 4680 }, { "epoch": 0.49, "grad_norm": 3.121151492979661, "learning_rate": 5.363533796445452e-06, "loss": 0.6616, "step": 4681 }, { "epoch": 0.49, "grad_norm": 2.7034182260790494, "learning_rate": 5.361834047688071e-06, "loss": 0.6528, "step": 4682 }, { "epoch": 0.49, "grad_norm": 5.478076643546118, "learning_rate": 5.360134256893854e-06, "loss": 0.6114, "step": 4683 }, { "epoch": 0.49, "grad_norm": 2.6079287424630264, "learning_rate": 5.35843442426028e-06, "loss": 0.6421, "step": 4684 }, { "epoch": 0.49, "grad_norm": 2.685462278835273, "learning_rate": 5.356734549984832e-06, "loss": 0.5111, "step": 4685 }, { "epoch": 0.49, "grad_norm": 2.979817349950144, "learning_rate": 5.355034634264996e-06, "loss": 0.6508, "step": 4686 }, { "epoch": 0.49, "grad_norm": 3.0200409785087854, "learning_rate": 5.353334677298261e-06, "loss": 0.6656, "step": 4687 }, { "epoch": 0.49, "grad_norm": 2.646554726463927, "learning_rate": 5.351634679282125e-06, "loss": 0.5935, "step": 4688 }, { "epoch": 0.49, "grad_norm": 2.6235037363889915, "learning_rate": 5.349934640414089e-06, "loss": 0.6812, "step": 4689 }, { "epoch": 0.49, "grad_norm": 9.07421728360287, "learning_rate": 5.348234560891657e-06, "loss": 0.6569, "step": 4690 }, { "epoch": 0.49, "grad_norm": 36.97843704858725, "learning_rate": 5.346534440912341e-06, "loss": 0.6354, "step": 4691 }, { "epoch": 0.49, "grad_norm": 2.4594375427225232, "learning_rate": 5.3448342806736545e-06, "loss": 0.6174, "step": 4692 }, { "epoch": 0.49, "grad_norm": 2.8018238508168425, "learning_rate": 5.34313408037312e-06, "loss": 0.5667, "step": 4693 }, { "epoch": 0.49, "grad_norm": 3.009916868253691, "learning_rate": 5.341433840208258e-06, "loss": 0.7038, "step": 4694 }, { "epoch": 0.49, "grad_norm": 4.258548590941279, "learning_rate": 5.339733560376601e-06, "loss": 0.719, "step": 4695 }, { "epoch": 0.49, "grad_norm": 2.61623774709776, "learning_rate": 5.33803324107568e-06, "loss": 0.7362, "step": 4696 }, { "epoch": 0.49, "grad_norm": 2.7165699079262935, "learning_rate": 5.336332882503034e-06, "loss": 0.6397, "step": 4697 }, { "epoch": 0.49, "grad_norm": 2.479128754751253, "learning_rate": 5.334632484856206e-06, "loss": 0.6062, "step": 4698 }, { "epoch": 0.49, "grad_norm": 3.0822937031597597, "learning_rate": 5.332932048332744e-06, "loss": 0.6856, "step": 4699 }, { "epoch": 0.49, "grad_norm": 2.670085477402618, "learning_rate": 5.331231573130199e-06, "loss": 0.6451, "step": 4700 }, { "epoch": 0.49, "grad_norm": 2.710855683571757, "learning_rate": 5.329531059446127e-06, "loss": 0.5651, "step": 4701 }, { "epoch": 0.49, "grad_norm": 4.256008895457131, "learning_rate": 5.327830507478089e-06, "loss": 0.6252, "step": 4702 }, { "epoch": 0.49, "grad_norm": 2.1096958021617995, "learning_rate": 5.32612991742365e-06, "loss": 0.5849, "step": 4703 }, { "epoch": 0.5, "grad_norm": 3.5793139887216263, "learning_rate": 5.32442928948038e-06, "loss": 0.6785, "step": 4704 }, { "epoch": 0.5, "grad_norm": 2.7963244411549644, "learning_rate": 5.322728623845853e-06, "loss": 0.6408, "step": 4705 }, { "epoch": 0.5, "grad_norm": 2.182317971739211, "learning_rate": 5.321027920717649e-06, "loss": 0.6363, "step": 4706 }, { "epoch": 0.5, "grad_norm": 3.7507644271214393, "learning_rate": 5.319327180293347e-06, "loss": 0.635, "step": 4707 }, { "epoch": 0.5, "grad_norm": 2.075170533407163, "learning_rate": 5.317626402770537e-06, "loss": 0.6228, "step": 4708 }, { "epoch": 0.5, "grad_norm": 2.841933692244469, "learning_rate": 5.3159255883468095e-06, "loss": 0.6796, "step": 4709 }, { "epoch": 0.5, "grad_norm": 3.6806157165268347, "learning_rate": 5.314224737219761e-06, "loss": 0.6646, "step": 4710 }, { "epoch": 0.5, "grad_norm": 2.533486664828576, "learning_rate": 5.31252384958699e-06, "loss": 0.711, "step": 4711 }, { "epoch": 0.5, "grad_norm": 2.7604461850517406, "learning_rate": 5.310822925646103e-06, "loss": 0.604, "step": 4712 }, { "epoch": 0.5, "grad_norm": 2.578358994492397, "learning_rate": 5.309121965594706e-06, "loss": 0.6351, "step": 4713 }, { "epoch": 0.5, "grad_norm": 2.579513564368866, "learning_rate": 5.307420969630412e-06, "loss": 0.7123, "step": 4714 }, { "epoch": 0.5, "grad_norm": 3.582532356341639, "learning_rate": 5.30571993795084e-06, "loss": 0.6549, "step": 4715 }, { "epoch": 0.5, "grad_norm": 2.5183375541671986, "learning_rate": 5.304018870753608e-06, "loss": 0.6463, "step": 4716 }, { "epoch": 0.5, "grad_norm": 1.0420447788319127, "learning_rate": 5.3023177682363435e-06, "loss": 0.5367, "step": 4717 }, { "epoch": 0.5, "grad_norm": 2.294424245337193, "learning_rate": 5.300616630596673e-06, "loss": 0.6515, "step": 4718 }, { "epoch": 0.5, "grad_norm": 1.0335405308223342, "learning_rate": 5.298915458032233e-06, "loss": 0.5989, "step": 4719 }, { "epoch": 0.5, "grad_norm": 1.0427016248786705, "learning_rate": 5.297214250740658e-06, "loss": 0.5918, "step": 4720 }, { "epoch": 0.5, "grad_norm": 3.004325778129872, "learning_rate": 5.295513008919592e-06, "loss": 0.6773, "step": 4721 }, { "epoch": 0.5, "grad_norm": 2.3701521965706323, "learning_rate": 5.293811732766677e-06, "loss": 0.6755, "step": 4722 }, { "epoch": 0.5, "grad_norm": 3.111246958207434, "learning_rate": 5.292110422479565e-06, "loss": 0.6284, "step": 4723 }, { "epoch": 0.5, "grad_norm": 3.018425742396122, "learning_rate": 5.290409078255909e-06, "loss": 0.6031, "step": 4724 }, { "epoch": 0.5, "grad_norm": 0.9869690779073568, "learning_rate": 5.288707700293365e-06, "loss": 0.5695, "step": 4725 }, { "epoch": 0.5, "grad_norm": 1.1072157586028053, "learning_rate": 5.287006288789596e-06, "loss": 0.5593, "step": 4726 }, { "epoch": 0.5, "grad_norm": 2.3663187157291676, "learning_rate": 5.285304843942265e-06, "loss": 0.702, "step": 4727 }, { "epoch": 0.5, "grad_norm": 9.046850596094636, "learning_rate": 5.283603365949043e-06, "loss": 0.7287, "step": 4728 }, { "epoch": 0.5, "grad_norm": 0.9462619367998221, "learning_rate": 5.2819018550076e-06, "loss": 0.5409, "step": 4729 }, { "epoch": 0.5, "grad_norm": 2.1605371729694633, "learning_rate": 5.280200311315616e-06, "loss": 0.6896, "step": 4730 }, { "epoch": 0.5, "grad_norm": 2.9185781197431124, "learning_rate": 5.278498735070769e-06, "loss": 0.6377, "step": 4731 }, { "epoch": 0.5, "grad_norm": 3.5719498173680098, "learning_rate": 5.2767971264707445e-06, "loss": 0.6695, "step": 4732 }, { "epoch": 0.5, "grad_norm": 2.8324365081609333, "learning_rate": 5.27509548571323e-06, "loss": 0.5886, "step": 4733 }, { "epoch": 0.5, "grad_norm": 2.705346452985015, "learning_rate": 5.273393812995917e-06, "loss": 0.6509, "step": 4734 }, { "epoch": 0.5, "grad_norm": 3.9814173861586815, "learning_rate": 5.271692108516501e-06, "loss": 0.7016, "step": 4735 }, { "epoch": 0.5, "grad_norm": 6.424011098877338, "learning_rate": 5.269990372472682e-06, "loss": 0.6616, "step": 4736 }, { "epoch": 0.5, "grad_norm": 2.1493045527463837, "learning_rate": 5.2682886050621604e-06, "loss": 0.6617, "step": 4737 }, { "epoch": 0.5, "grad_norm": 3.716615071908531, "learning_rate": 5.266586806482646e-06, "loss": 0.6362, "step": 4738 }, { "epoch": 0.5, "grad_norm": 3.118626444685795, "learning_rate": 5.264884976931845e-06, "loss": 0.6424, "step": 4739 }, { "epoch": 0.5, "grad_norm": 2.541880011041433, "learning_rate": 5.263183116607474e-06, "loss": 0.5951, "step": 4740 }, { "epoch": 0.5, "grad_norm": 1.1285558503464246, "learning_rate": 5.261481225707251e-06, "loss": 0.588, "step": 4741 }, { "epoch": 0.5, "grad_norm": 2.916993130135385, "learning_rate": 5.259779304428893e-06, "loss": 0.6042, "step": 4742 }, { "epoch": 0.5, "grad_norm": 2.7254833052810423, "learning_rate": 5.258077352970128e-06, "loss": 0.6621, "step": 4743 }, { "epoch": 0.5, "grad_norm": 3.462602034967814, "learning_rate": 5.256375371528681e-06, "loss": 0.5998, "step": 4744 }, { "epoch": 0.5, "grad_norm": 2.520686792805083, "learning_rate": 5.254673360302284e-06, "loss": 0.6472, "step": 4745 }, { "epoch": 0.5, "grad_norm": 3.459934646842888, "learning_rate": 5.252971319488672e-06, "loss": 0.6504, "step": 4746 }, { "epoch": 0.5, "grad_norm": 1.0506864779125666, "learning_rate": 5.2512692492855845e-06, "loss": 0.5606, "step": 4747 }, { "epoch": 0.5, "grad_norm": 2.191202095280542, "learning_rate": 5.249567149890762e-06, "loss": 0.6614, "step": 4748 }, { "epoch": 0.5, "grad_norm": 10.032018653356111, "learning_rate": 5.247865021501949e-06, "loss": 0.608, "step": 4749 }, { "epoch": 0.5, "grad_norm": 3.5143563476752124, "learning_rate": 5.2461628643168935e-06, "loss": 0.5829, "step": 4750 }, { "epoch": 0.5, "grad_norm": 3.066214123433812, "learning_rate": 5.244460678533349e-06, "loss": 0.6338, "step": 4751 }, { "epoch": 0.5, "grad_norm": 3.7234112282653116, "learning_rate": 5.24275846434907e-06, "loss": 0.6476, "step": 4752 }, { "epoch": 0.5, "grad_norm": 2.5886795596777463, "learning_rate": 5.2410562219618135e-06, "loss": 0.652, "step": 4753 }, { "epoch": 0.5, "grad_norm": 2.5480441407074443, "learning_rate": 5.239353951569342e-06, "loss": 0.5687, "step": 4754 }, { "epoch": 0.5, "grad_norm": 2.4463031639209403, "learning_rate": 5.2376516533694196e-06, "loss": 0.5338, "step": 4755 }, { "epoch": 0.5, "grad_norm": 2.257125530689949, "learning_rate": 5.235949327559817e-06, "loss": 0.6191, "step": 4756 }, { "epoch": 0.5, "grad_norm": 2.1011431880055143, "learning_rate": 5.2342469743383026e-06, "loss": 0.6559, "step": 4757 }, { "epoch": 0.5, "grad_norm": 3.487907737324848, "learning_rate": 5.232544593902652e-06, "loss": 0.6932, "step": 4758 }, { "epoch": 0.5, "grad_norm": 2.402578163767159, "learning_rate": 5.230842186450642e-06, "loss": 0.6343, "step": 4759 }, { "epoch": 0.5, "grad_norm": 3.077478548938603, "learning_rate": 5.2291397521800545e-06, "loss": 0.6685, "step": 4760 }, { "epoch": 0.5, "grad_norm": 2.2841356416265044, "learning_rate": 5.227437291288674e-06, "loss": 0.6646, "step": 4761 }, { "epoch": 0.5, "grad_norm": 2.7800487949493458, "learning_rate": 5.225734803974285e-06, "loss": 0.6371, "step": 4762 }, { "epoch": 0.5, "grad_norm": 2.2976515988461337, "learning_rate": 5.22403229043468e-06, "loss": 0.6387, "step": 4763 }, { "epoch": 0.5, "grad_norm": 2.6430943743263358, "learning_rate": 5.222329750867649e-06, "loss": 0.6613, "step": 4764 }, { "epoch": 0.5, "grad_norm": 2.659848728533461, "learning_rate": 5.220627185470993e-06, "loss": 0.614, "step": 4765 }, { "epoch": 0.5, "grad_norm": 3.5083748901897884, "learning_rate": 5.218924594442507e-06, "loss": 0.5716, "step": 4766 }, { "epoch": 0.5, "grad_norm": 2.8298517158269325, "learning_rate": 5.217221977979996e-06, "loss": 0.6882, "step": 4767 }, { "epoch": 0.5, "grad_norm": 3.7161705013278534, "learning_rate": 5.215519336281261e-06, "loss": 0.7084, "step": 4768 }, { "epoch": 0.5, "grad_norm": 2.4592185775923303, "learning_rate": 5.213816669544114e-06, "loss": 0.5649, "step": 4769 }, { "epoch": 0.5, "grad_norm": 2.7485436454433247, "learning_rate": 5.2121139779663645e-06, "loss": 0.6664, "step": 4770 }, { "epoch": 0.5, "grad_norm": 3.0981595313863677, "learning_rate": 5.2104112617458254e-06, "loss": 0.6766, "step": 4771 }, { "epoch": 0.5, "grad_norm": 4.0052501193362255, "learning_rate": 5.2087085210803145e-06, "loss": 0.7007, "step": 4772 }, { "epoch": 0.5, "grad_norm": 3.1047565880215737, "learning_rate": 5.207005756167651e-06, "loss": 0.6448, "step": 4773 }, { "epoch": 0.5, "grad_norm": 2.792375318373496, "learning_rate": 5.205302967205657e-06, "loss": 0.6724, "step": 4774 }, { "epoch": 0.5, "grad_norm": 2.51567986610389, "learning_rate": 5.203600154392158e-06, "loss": 0.606, "step": 4775 }, { "epoch": 0.5, "grad_norm": 2.5125662323316287, "learning_rate": 5.2018973179249824e-06, "loss": 0.6156, "step": 4776 }, { "epoch": 0.5, "grad_norm": 2.2159857718006224, "learning_rate": 5.200194458001958e-06, "loss": 0.5654, "step": 4777 }, { "epoch": 0.5, "grad_norm": 2.9452532347483427, "learning_rate": 5.198491574820923e-06, "loss": 0.603, "step": 4778 }, { "epoch": 0.5, "grad_norm": 6.142355113115631, "learning_rate": 5.196788668579708e-06, "loss": 0.6565, "step": 4779 }, { "epoch": 0.5, "grad_norm": 2.2887002234975764, "learning_rate": 5.195085739476156e-06, "loss": 0.6851, "step": 4780 }, { "epoch": 0.5, "grad_norm": 3.085541461688833, "learning_rate": 5.193382787708106e-06, "loss": 0.5886, "step": 4781 }, { "epoch": 0.5, "grad_norm": 7.500928456155343, "learning_rate": 5.191679813473402e-06, "loss": 0.5474, "step": 4782 }, { "epoch": 0.5, "grad_norm": 6.985875727924238, "learning_rate": 5.189976816969892e-06, "loss": 0.6326, "step": 4783 }, { "epoch": 0.5, "grad_norm": 3.1970563840062383, "learning_rate": 5.188273798395425e-06, "loss": 0.7014, "step": 4784 }, { "epoch": 0.5, "grad_norm": 3.241455526944972, "learning_rate": 5.186570757947852e-06, "loss": 0.731, "step": 4785 }, { "epoch": 0.5, "grad_norm": 3.576374543674953, "learning_rate": 5.1848676958250265e-06, "loss": 0.6363, "step": 4786 }, { "epoch": 0.5, "grad_norm": 2.335548183754556, "learning_rate": 5.183164612224809e-06, "loss": 0.583, "step": 4787 }, { "epoch": 0.5, "grad_norm": 2.4154405135354136, "learning_rate": 5.181461507345054e-06, "loss": 0.5799, "step": 4788 }, { "epoch": 0.5, "grad_norm": 3.5423104288104916, "learning_rate": 5.1797583813836285e-06, "loss": 0.6326, "step": 4789 }, { "epoch": 0.5, "grad_norm": 2.5045963993199747, "learning_rate": 5.178055234538391e-06, "loss": 0.6283, "step": 4790 }, { "epoch": 0.5, "grad_norm": 2.0244708876329987, "learning_rate": 5.176352067007213e-06, "loss": 0.6263, "step": 4791 }, { "epoch": 0.5, "grad_norm": 2.255397053263638, "learning_rate": 5.174648878987959e-06, "loss": 0.6359, "step": 4792 }, { "epoch": 0.5, "grad_norm": 3.722706053854962, "learning_rate": 5.1729456706785055e-06, "loss": 0.6435, "step": 4793 }, { "epoch": 0.5, "grad_norm": 2.075448749546548, "learning_rate": 5.1712424422767224e-06, "loss": 0.6811, "step": 4794 }, { "epoch": 0.5, "grad_norm": 2.688883878063154, "learning_rate": 5.169539193980489e-06, "loss": 0.6266, "step": 4795 }, { "epoch": 0.5, "grad_norm": 2.9204150200499615, "learning_rate": 5.1678359259876824e-06, "loss": 0.6795, "step": 4796 }, { "epoch": 0.5, "grad_norm": 1.1559522338055792, "learning_rate": 5.1661326384961805e-06, "loss": 0.5607, "step": 4797 }, { "epoch": 0.5, "grad_norm": 1.9541032407056105, "learning_rate": 5.164429331703871e-06, "loss": 0.6785, "step": 4798 }, { "epoch": 0.5, "grad_norm": 0.8942781057490377, "learning_rate": 5.162726005808636e-06, "loss": 0.5714, "step": 4799 }, { "epoch": 0.51, "grad_norm": 1.079312812042359, "learning_rate": 5.1610226610083655e-06, "loss": 0.5694, "step": 4800 }, { "epoch": 0.51, "grad_norm": 3.277923548372197, "learning_rate": 5.159319297500945e-06, "loss": 0.6173, "step": 4801 }, { "epoch": 0.51, "grad_norm": 2.986097736979872, "learning_rate": 5.157615915484273e-06, "loss": 0.6071, "step": 4802 }, { "epoch": 0.51, "grad_norm": 3.2600607771581642, "learning_rate": 5.155912515156236e-06, "loss": 0.6338, "step": 4803 }, { "epoch": 0.51, "grad_norm": 4.444448387829765, "learning_rate": 5.154209096714736e-06, "loss": 0.635, "step": 4804 }, { "epoch": 0.51, "grad_norm": 2.2143014150947318, "learning_rate": 5.152505660357667e-06, "loss": 0.6542, "step": 4805 }, { "epoch": 0.51, "grad_norm": 2.685218090583335, "learning_rate": 5.150802206282932e-06, "loss": 0.5947, "step": 4806 }, { "epoch": 0.51, "grad_norm": 2.593209127978214, "learning_rate": 5.149098734688434e-06, "loss": 0.6186, "step": 4807 }, { "epoch": 0.51, "grad_norm": 4.2086494702555886, "learning_rate": 5.147395245772074e-06, "loss": 0.737, "step": 4808 }, { "epoch": 0.51, "grad_norm": 10.188890598572003, "learning_rate": 5.145691739731761e-06, "loss": 0.6247, "step": 4809 }, { "epoch": 0.51, "grad_norm": 9.44137252928556, "learning_rate": 5.143988216765402e-06, "loss": 0.6595, "step": 4810 }, { "epoch": 0.51, "grad_norm": 3.172419643746679, "learning_rate": 5.142284677070911e-06, "loss": 0.6992, "step": 4811 }, { "epoch": 0.51, "grad_norm": 2.952102588392533, "learning_rate": 5.140581120846194e-06, "loss": 0.6163, "step": 4812 }, { "epoch": 0.51, "grad_norm": 2.6735180292043665, "learning_rate": 5.138877548289173e-06, "loss": 0.6745, "step": 4813 }, { "epoch": 0.51, "grad_norm": 3.3271844953089964, "learning_rate": 5.137173959597755e-06, "loss": 0.7083, "step": 4814 }, { "epoch": 0.51, "grad_norm": 5.779885373526377, "learning_rate": 5.135470354969867e-06, "loss": 0.5758, "step": 4815 }, { "epoch": 0.51, "grad_norm": 3.880291332502046, "learning_rate": 5.1337667346034226e-06, "loss": 0.6426, "step": 4816 }, { "epoch": 0.51, "grad_norm": 2.789682189366147, "learning_rate": 5.132063098696346e-06, "loss": 0.6936, "step": 4817 }, { "epoch": 0.51, "grad_norm": 2.5838279711205714, "learning_rate": 5.130359447446561e-06, "loss": 0.6659, "step": 4818 }, { "epoch": 0.51, "grad_norm": 3.4258511041007518, "learning_rate": 5.128655781051991e-06, "loss": 0.6169, "step": 4819 }, { "epoch": 0.51, "grad_norm": 3.6136439580060657, "learning_rate": 5.126952099710566e-06, "loss": 0.6806, "step": 4820 }, { "epoch": 0.51, "grad_norm": 3.209126945224101, "learning_rate": 5.125248403620211e-06, "loss": 0.6683, "step": 4821 }, { "epoch": 0.51, "grad_norm": 2.809317990932967, "learning_rate": 5.12354469297886e-06, "loss": 0.6826, "step": 4822 }, { "epoch": 0.51, "grad_norm": 3.2384938127477425, "learning_rate": 5.121840967984443e-06, "loss": 0.6434, "step": 4823 }, { "epoch": 0.51, "grad_norm": 2.2368138450617656, "learning_rate": 5.120137228834896e-06, "loss": 0.6217, "step": 4824 }, { "epoch": 0.51, "grad_norm": 2.452259843593572, "learning_rate": 5.1184334757281506e-06, "loss": 0.6101, "step": 4825 }, { "epoch": 0.51, "grad_norm": 2.585727076856822, "learning_rate": 5.1167297088621485e-06, "loss": 0.6537, "step": 4826 }, { "epoch": 0.51, "grad_norm": 4.454616726505247, "learning_rate": 5.1150259284348246e-06, "loss": 0.571, "step": 4827 }, { "epoch": 0.51, "grad_norm": 4.132538433226866, "learning_rate": 5.113322134644122e-06, "loss": 0.6571, "step": 4828 }, { "epoch": 0.51, "grad_norm": 2.143250401873857, "learning_rate": 5.111618327687981e-06, "loss": 0.5883, "step": 4829 }, { "epoch": 0.51, "grad_norm": 6.356716637447877, "learning_rate": 5.109914507764345e-06, "loss": 0.6736, "step": 4830 }, { "epoch": 0.51, "grad_norm": 2.7253830188411547, "learning_rate": 5.108210675071159e-06, "loss": 0.6881, "step": 4831 }, { "epoch": 0.51, "grad_norm": 3.4516235016563788, "learning_rate": 5.1065068298063705e-06, "loss": 0.6512, "step": 4832 }, { "epoch": 0.51, "grad_norm": 2.7073002106917237, "learning_rate": 5.104802972167926e-06, "loss": 0.6957, "step": 4833 }, { "epoch": 0.51, "grad_norm": 2.7841704207983984, "learning_rate": 5.103099102353775e-06, "loss": 0.5775, "step": 4834 }, { "epoch": 0.51, "grad_norm": 7.9292838099679965, "learning_rate": 5.101395220561869e-06, "loss": 0.7005, "step": 4835 }, { "epoch": 0.51, "grad_norm": 2.7773812390741077, "learning_rate": 5.099691326990158e-06, "loss": 0.5243, "step": 4836 }, { "epoch": 0.51, "grad_norm": 3.26055135706108, "learning_rate": 5.0979874218365985e-06, "loss": 0.6554, "step": 4837 }, { "epoch": 0.51, "grad_norm": 2.93701844263028, "learning_rate": 5.096283505299142e-06, "loss": 0.5827, "step": 4838 }, { "epoch": 0.51, "grad_norm": 2.661195675756214, "learning_rate": 5.094579577575748e-06, "loss": 0.7464, "step": 4839 }, { "epoch": 0.51, "grad_norm": 4.4434201960292805, "learning_rate": 5.09287563886437e-06, "loss": 0.6926, "step": 4840 }, { "epoch": 0.51, "grad_norm": 2.8619375312083424, "learning_rate": 5.0911716893629695e-06, "loss": 0.7326, "step": 4841 }, { "epoch": 0.51, "grad_norm": 4.179918987044076, "learning_rate": 5.089467729269506e-06, "loss": 0.6202, "step": 4842 }, { "epoch": 0.51, "grad_norm": 2.3674261810438875, "learning_rate": 5.087763758781941e-06, "loss": 0.5401, "step": 4843 }, { "epoch": 0.51, "grad_norm": 2.954478033527312, "learning_rate": 5.0860597780982345e-06, "loss": 0.6457, "step": 4844 }, { "epoch": 0.51, "grad_norm": 6.1055773678841705, "learning_rate": 5.084355787416352e-06, "loss": 0.7077, "step": 4845 }, { "epoch": 0.51, "grad_norm": 3.104870089140017, "learning_rate": 5.08265178693426e-06, "loss": 0.5571, "step": 4846 }, { "epoch": 0.51, "grad_norm": 3.282087698903521, "learning_rate": 5.08094777684992e-06, "loss": 0.7171, "step": 4847 }, { "epoch": 0.51, "grad_norm": 5.212353373051497, "learning_rate": 5.079243757361304e-06, "loss": 0.6634, "step": 4848 }, { "epoch": 0.51, "grad_norm": 3.151447480509918, "learning_rate": 5.077539728666374e-06, "loss": 0.6715, "step": 4849 }, { "epoch": 0.51, "grad_norm": 2.7192044541711002, "learning_rate": 5.0758356909631055e-06, "loss": 0.6798, "step": 4850 }, { "epoch": 0.51, "grad_norm": 2.7229617539344644, "learning_rate": 5.074131644449462e-06, "loss": 0.6132, "step": 4851 }, { "epoch": 0.51, "grad_norm": 6.5465212384001665, "learning_rate": 5.072427589323422e-06, "loss": 0.6457, "step": 4852 }, { "epoch": 0.51, "grad_norm": 2.539686113044392, "learning_rate": 5.0707235257829525e-06, "loss": 0.6064, "step": 4853 }, { "epoch": 0.51, "grad_norm": 2.8359956675113813, "learning_rate": 5.069019454026028e-06, "loss": 0.6889, "step": 4854 }, { "epoch": 0.51, "grad_norm": 3.075569786140454, "learning_rate": 5.067315374250623e-06, "loss": 0.6786, "step": 4855 }, { "epoch": 0.51, "grad_norm": 2.936345810031271, "learning_rate": 5.065611286654712e-06, "loss": 0.6724, "step": 4856 }, { "epoch": 0.51, "grad_norm": 2.7223753328820366, "learning_rate": 5.063907191436274e-06, "loss": 0.6038, "step": 4857 }, { "epoch": 0.51, "grad_norm": 2.747137280849216, "learning_rate": 5.062203088793279e-06, "loss": 0.6451, "step": 4858 }, { "epoch": 0.51, "grad_norm": 2.534090883953568, "learning_rate": 5.060498978923713e-06, "loss": 0.7221, "step": 4859 }, { "epoch": 0.51, "grad_norm": 2.8222282010733877, "learning_rate": 5.058794862025548e-06, "loss": 0.6084, "step": 4860 }, { "epoch": 0.51, "grad_norm": 3.0443913355409364, "learning_rate": 5.057090738296767e-06, "loss": 0.6554, "step": 4861 }, { "epoch": 0.51, "grad_norm": 3.2515144608074653, "learning_rate": 5.055386607935347e-06, "loss": 0.5725, "step": 4862 }, { "epoch": 0.51, "grad_norm": 1.1037680116930089, "learning_rate": 5.053682471139275e-06, "loss": 0.5484, "step": 4863 }, { "epoch": 0.51, "grad_norm": 3.5575549338006436, "learning_rate": 5.051978328106525e-06, "loss": 0.6632, "step": 4864 }, { "epoch": 0.51, "grad_norm": 2.962207242548026, "learning_rate": 5.050274179035084e-06, "loss": 0.6792, "step": 4865 }, { "epoch": 0.51, "grad_norm": 3.5877705132080306, "learning_rate": 5.048570024122935e-06, "loss": 0.6326, "step": 4866 }, { "epoch": 0.51, "grad_norm": 6.322301722453874, "learning_rate": 5.046865863568061e-06, "loss": 0.7552, "step": 4867 }, { "epoch": 0.51, "grad_norm": 6.682490269010231, "learning_rate": 5.045161697568446e-06, "loss": 0.6046, "step": 4868 }, { "epoch": 0.51, "grad_norm": 3.7731626743807523, "learning_rate": 5.0434575263220745e-06, "loss": 0.6787, "step": 4869 }, { "epoch": 0.51, "grad_norm": 2.898042881785744, "learning_rate": 5.041753350026936e-06, "loss": 0.5913, "step": 4870 }, { "epoch": 0.51, "grad_norm": 5.927842102069006, "learning_rate": 5.0400491688810105e-06, "loss": 0.6178, "step": 4871 }, { "epoch": 0.51, "grad_norm": 1.1051954463354348, "learning_rate": 5.038344983082292e-06, "loss": 0.5853, "step": 4872 }, { "epoch": 0.51, "grad_norm": 2.71794682870089, "learning_rate": 5.036640792828761e-06, "loss": 0.6183, "step": 4873 }, { "epoch": 0.51, "grad_norm": 3.746461374115521, "learning_rate": 5.0349365983184105e-06, "loss": 0.6803, "step": 4874 }, { "epoch": 0.51, "grad_norm": 2.2340308167373597, "learning_rate": 5.033232399749226e-06, "loss": 0.6801, "step": 4875 }, { "epoch": 0.51, "grad_norm": 2.9189306232219194, "learning_rate": 5.031528197319197e-06, "loss": 0.6212, "step": 4876 }, { "epoch": 0.51, "grad_norm": 2.3120947610822853, "learning_rate": 5.0298239912263145e-06, "loss": 0.6743, "step": 4877 }, { "epoch": 0.51, "grad_norm": 2.838555185528894, "learning_rate": 5.028119781668566e-06, "loss": 0.6603, "step": 4878 }, { "epoch": 0.51, "grad_norm": 1.8865667092436296, "learning_rate": 5.026415568843943e-06, "loss": 0.4771, "step": 4879 }, { "epoch": 0.51, "grad_norm": 2.7027338628136923, "learning_rate": 5.024711352950435e-06, "loss": 0.6176, "step": 4880 }, { "epoch": 0.51, "grad_norm": 3.3089641577867184, "learning_rate": 5.023007134186035e-06, "loss": 0.5705, "step": 4881 }, { "epoch": 0.51, "grad_norm": 2.1851775810718976, "learning_rate": 5.0213029127487315e-06, "loss": 0.5628, "step": 4882 }, { "epoch": 0.51, "grad_norm": 3.5359776818740016, "learning_rate": 5.0195986888365175e-06, "loss": 0.6628, "step": 4883 }, { "epoch": 0.51, "grad_norm": 2.6067456420321014, "learning_rate": 5.017894462647383e-06, "loss": 0.7029, "step": 4884 }, { "epoch": 0.51, "grad_norm": 2.532052200950741, "learning_rate": 5.0161902343793245e-06, "loss": 0.6756, "step": 4885 }, { "epoch": 0.51, "grad_norm": 2.4514973771189186, "learning_rate": 5.014486004230329e-06, "loss": 0.6228, "step": 4886 }, { "epoch": 0.51, "grad_norm": 2.9144203054617583, "learning_rate": 5.012781772398392e-06, "loss": 0.5954, "step": 4887 }, { "epoch": 0.51, "grad_norm": 3.5304442748367006, "learning_rate": 5.011077539081506e-06, "loss": 0.6309, "step": 4888 }, { "epoch": 0.51, "grad_norm": 2.5115992891771706, "learning_rate": 5.009373304477663e-06, "loss": 0.65, "step": 4889 }, { "epoch": 0.51, "grad_norm": 6.842298359852735, "learning_rate": 5.007669068784857e-06, "loss": 0.5684, "step": 4890 }, { "epoch": 0.51, "grad_norm": 2.293124571221643, "learning_rate": 5.005964832201079e-06, "loss": 0.6347, "step": 4891 }, { "epoch": 0.51, "grad_norm": 12.289205214511725, "learning_rate": 5.004260594924327e-06, "loss": 0.649, "step": 4892 }, { "epoch": 0.51, "grad_norm": 2.6036414189539125, "learning_rate": 5.002556357152589e-06, "loss": 0.6041, "step": 4893 }, { "epoch": 0.51, "grad_norm": 2.7202200557498992, "learning_rate": 5.000852119083863e-06, "loss": 0.5758, "step": 4894 }, { "epoch": 0.52, "grad_norm": 2.769388945590125, "learning_rate": 4.999147880916139e-06, "loss": 0.6113, "step": 4895 }, { "epoch": 0.52, "grad_norm": 2.9093269736434353, "learning_rate": 4.997443642847412e-06, "loss": 0.737, "step": 4896 }, { "epoch": 0.52, "grad_norm": 2.6169699031806175, "learning_rate": 4.995739405075674e-06, "loss": 0.6174, "step": 4897 }, { "epoch": 0.52, "grad_norm": 4.102025420126629, "learning_rate": 4.994035167798921e-06, "loss": 0.601, "step": 4898 }, { "epoch": 0.52, "grad_norm": 2.5096200600239045, "learning_rate": 4.992330931215146e-06, "loss": 0.7385, "step": 4899 }, { "epoch": 0.52, "grad_norm": 2.5045722346398853, "learning_rate": 4.990626695522339e-06, "loss": 0.7197, "step": 4900 }, { "epoch": 0.52, "grad_norm": 2.487306202546889, "learning_rate": 4.988922460918496e-06, "loss": 0.5743, "step": 4901 }, { "epoch": 0.52, "grad_norm": 5.319440294752822, "learning_rate": 4.98721822760161e-06, "loss": 0.7375, "step": 4902 }, { "epoch": 0.52, "grad_norm": 2.1289416212364505, "learning_rate": 4.985513995769672e-06, "loss": 0.575, "step": 4903 }, { "epoch": 0.52, "grad_norm": 2.504486312284673, "learning_rate": 4.983809765620678e-06, "loss": 0.689, "step": 4904 }, { "epoch": 0.52, "grad_norm": 3.312174671775115, "learning_rate": 4.9821055373526175e-06, "loss": 0.5768, "step": 4905 }, { "epoch": 0.52, "grad_norm": 3.0889029546172004, "learning_rate": 4.980401311163483e-06, "loss": 0.6912, "step": 4906 }, { "epoch": 0.52, "grad_norm": 2.355632254622633, "learning_rate": 4.978697087251269e-06, "loss": 0.6775, "step": 4907 }, { "epoch": 0.52, "grad_norm": 2.2880860706153845, "learning_rate": 4.976992865813968e-06, "loss": 0.6585, "step": 4908 }, { "epoch": 0.52, "grad_norm": 1.9846108300900807, "learning_rate": 4.975288647049566e-06, "loss": 0.5791, "step": 4909 }, { "epoch": 0.52, "grad_norm": 2.2342087894997813, "learning_rate": 4.9735844311560574e-06, "loss": 0.6124, "step": 4910 }, { "epoch": 0.52, "grad_norm": 1.9947124356007078, "learning_rate": 4.9718802183314345e-06, "loss": 0.6461, "step": 4911 }, { "epoch": 0.52, "grad_norm": 2.197647296374431, "learning_rate": 4.970176008773688e-06, "loss": 0.6122, "step": 4912 }, { "epoch": 0.52, "grad_norm": 0.9932317458658896, "learning_rate": 4.9684718026808035e-06, "loss": 0.5758, "step": 4913 }, { "epoch": 0.52, "grad_norm": 2.709681927841805, "learning_rate": 4.966767600250776e-06, "loss": 0.6107, "step": 4914 }, { "epoch": 0.52, "grad_norm": 2.1508024576423437, "learning_rate": 4.965063401681591e-06, "loss": 0.598, "step": 4915 }, { "epoch": 0.52, "grad_norm": 2.146135541337258, "learning_rate": 4.963359207171239e-06, "loss": 0.5641, "step": 4916 }, { "epoch": 0.52, "grad_norm": 3.3285165686253713, "learning_rate": 4.961655016917712e-06, "loss": 0.7023, "step": 4917 }, { "epoch": 0.52, "grad_norm": 3.943768718695757, "learning_rate": 4.959950831118991e-06, "loss": 0.6485, "step": 4918 }, { "epoch": 0.52, "grad_norm": 4.023540049447175, "learning_rate": 4.958246649973066e-06, "loss": 0.6233, "step": 4919 }, { "epoch": 0.52, "grad_norm": 5.000434315098372, "learning_rate": 4.956542473677926e-06, "loss": 0.709, "step": 4920 }, { "epoch": 0.52, "grad_norm": 2.067727072792758, "learning_rate": 4.954838302431556e-06, "loss": 0.5751, "step": 4921 }, { "epoch": 0.52, "grad_norm": 2.3191980917060753, "learning_rate": 4.9531341364319404e-06, "loss": 0.6596, "step": 4922 }, { "epoch": 0.52, "grad_norm": 2.3586105417389245, "learning_rate": 4.951429975877066e-06, "loss": 0.6919, "step": 4923 }, { "epoch": 0.52, "grad_norm": 2.1526033976465793, "learning_rate": 4.9497258209649165e-06, "loss": 0.6568, "step": 4924 }, { "epoch": 0.52, "grad_norm": 2.133729675007697, "learning_rate": 4.948021671893475e-06, "loss": 0.6752, "step": 4925 }, { "epoch": 0.52, "grad_norm": 3.159426842721098, "learning_rate": 4.946317528860728e-06, "loss": 0.7144, "step": 4926 }, { "epoch": 0.52, "grad_norm": 2.115435574837891, "learning_rate": 4.9446133920646535e-06, "loss": 0.5658, "step": 4927 }, { "epoch": 0.52, "grad_norm": 2.7390237750782194, "learning_rate": 4.942909261703234e-06, "loss": 0.606, "step": 4928 }, { "epoch": 0.52, "grad_norm": 2.8886081183240573, "learning_rate": 4.941205137974453e-06, "loss": 0.6459, "step": 4929 }, { "epoch": 0.52, "grad_norm": 2.1116737366663343, "learning_rate": 4.93950102107629e-06, "loss": 0.5802, "step": 4930 }, { "epoch": 0.52, "grad_norm": 2.26047601506244, "learning_rate": 4.937796911206722e-06, "loss": 0.6255, "step": 4931 }, { "epoch": 0.52, "grad_norm": 2.1539115192013214, "learning_rate": 4.936092808563729e-06, "loss": 0.6115, "step": 4932 }, { "epoch": 0.52, "grad_norm": 3.0145846571869437, "learning_rate": 4.9343887133452885e-06, "loss": 0.6485, "step": 4933 }, { "epoch": 0.52, "grad_norm": 3.099479126502059, "learning_rate": 4.932684625749379e-06, "loss": 0.6624, "step": 4934 }, { "epoch": 0.52, "grad_norm": 2.5769731100724083, "learning_rate": 4.930980545973973e-06, "loss": 0.6308, "step": 4935 }, { "epoch": 0.52, "grad_norm": 2.6783179427641115, "learning_rate": 4.929276474217049e-06, "loss": 0.6243, "step": 4936 }, { "epoch": 0.52, "grad_norm": 2.943547559066303, "learning_rate": 4.92757241067658e-06, "loss": 0.6204, "step": 4937 }, { "epoch": 0.52, "grad_norm": 2.3266756319552204, "learning_rate": 4.925868355550537e-06, "loss": 0.5994, "step": 4938 }, { "epoch": 0.52, "grad_norm": 2.310739073972056, "learning_rate": 4.924164309036897e-06, "loss": 0.6684, "step": 4939 }, { "epoch": 0.52, "grad_norm": 2.3395607545275916, "learning_rate": 4.922460271333627e-06, "loss": 0.5414, "step": 4940 }, { "epoch": 0.52, "grad_norm": 2.4676331611691746, "learning_rate": 4.920756242638698e-06, "loss": 0.6746, "step": 4941 }, { "epoch": 0.52, "grad_norm": 2.2941161951403686, "learning_rate": 4.919052223150081e-06, "loss": 0.7604, "step": 4942 }, { "epoch": 0.52, "grad_norm": 2.929369540693781, "learning_rate": 4.917348213065742e-06, "loss": 0.6284, "step": 4943 }, { "epoch": 0.52, "grad_norm": 2.196953658021791, "learning_rate": 4.915644212583649e-06, "loss": 0.5309, "step": 4944 }, { "epoch": 0.52, "grad_norm": 2.277862980217032, "learning_rate": 4.913940221901766e-06, "loss": 0.6333, "step": 4945 }, { "epoch": 0.52, "grad_norm": 2.1353131160650287, "learning_rate": 4.912236241218061e-06, "loss": 0.6933, "step": 4946 }, { "epoch": 0.52, "grad_norm": 2.1309552775913865, "learning_rate": 4.910532270730497e-06, "loss": 0.6451, "step": 4947 }, { "epoch": 0.52, "grad_norm": 2.226377878125629, "learning_rate": 4.908828310637031e-06, "loss": 0.6428, "step": 4948 }, { "epoch": 0.52, "grad_norm": 2.218293921242868, "learning_rate": 4.907124361135632e-06, "loss": 0.6537, "step": 4949 }, { "epoch": 0.52, "grad_norm": 2.4059507731751615, "learning_rate": 4.905420422424254e-06, "loss": 0.5964, "step": 4950 }, { "epoch": 0.52, "grad_norm": 2.4481730880265014, "learning_rate": 4.903716494700859e-06, "loss": 0.6094, "step": 4951 }, { "epoch": 0.52, "grad_norm": 2.684768787067311, "learning_rate": 4.902012578163404e-06, "loss": 0.6083, "step": 4952 }, { "epoch": 0.52, "grad_norm": 1.9937131948780513, "learning_rate": 4.900308673009843e-06, "loss": 0.5961, "step": 4953 }, { "epoch": 0.52, "grad_norm": 3.274862105439873, "learning_rate": 4.8986047794381325e-06, "loss": 0.6355, "step": 4954 }, { "epoch": 0.52, "grad_norm": 2.081137560086168, "learning_rate": 4.896900897646226e-06, "loss": 0.6181, "step": 4955 }, { "epoch": 0.52, "grad_norm": 2.1349551752747886, "learning_rate": 4.8951970278320765e-06, "loss": 0.6046, "step": 4956 }, { "epoch": 0.52, "grad_norm": 2.541248755489936, "learning_rate": 4.89349317019363e-06, "loss": 0.6012, "step": 4957 }, { "epoch": 0.52, "grad_norm": 2.565627078399468, "learning_rate": 4.891789324928842e-06, "loss": 0.6558, "step": 4958 }, { "epoch": 0.52, "grad_norm": 3.109453742078581, "learning_rate": 4.890085492235657e-06, "loss": 0.5778, "step": 4959 }, { "epoch": 0.52, "grad_norm": 2.0050613585910755, "learning_rate": 4.888381672312022e-06, "loss": 0.6251, "step": 4960 }, { "epoch": 0.52, "grad_norm": 2.339550540816934, "learning_rate": 4.88667786535588e-06, "loss": 0.6687, "step": 4961 }, { "epoch": 0.52, "grad_norm": 2.290758913793059, "learning_rate": 4.884974071565177e-06, "loss": 0.6846, "step": 4962 }, { "epoch": 0.52, "grad_norm": 1.077911583636364, "learning_rate": 4.883270291137852e-06, "loss": 0.6473, "step": 4963 }, { "epoch": 0.52, "grad_norm": 1.985960627648404, "learning_rate": 4.88156652427185e-06, "loss": 0.6044, "step": 4964 }, { "epoch": 0.52, "grad_norm": 2.305367885235591, "learning_rate": 4.879862771165107e-06, "loss": 0.5774, "step": 4965 }, { "epoch": 0.52, "grad_norm": 2.4768244502117036, "learning_rate": 4.878159032015559e-06, "loss": 0.6634, "step": 4966 }, { "epoch": 0.52, "grad_norm": 2.4276873504006815, "learning_rate": 4.8764553070211415e-06, "loss": 0.6828, "step": 4967 }, { "epoch": 0.52, "grad_norm": 2.4060474857332013, "learning_rate": 4.87475159637979e-06, "loss": 0.6065, "step": 4968 }, { "epoch": 0.52, "grad_norm": 1.7772890860805246, "learning_rate": 4.873047900289437e-06, "loss": 0.6808, "step": 4969 }, { "epoch": 0.52, "grad_norm": 3.0625237060008588, "learning_rate": 4.87134421894801e-06, "loss": 0.6523, "step": 4970 }, { "epoch": 0.52, "grad_norm": 2.2524142418639466, "learning_rate": 4.869640552553441e-06, "loss": 0.6733, "step": 4971 }, { "epoch": 0.52, "grad_norm": 2.2414743848382006, "learning_rate": 4.867936901303656e-06, "loss": 0.6021, "step": 4972 }, { "epoch": 0.52, "grad_norm": 2.338392581877551, "learning_rate": 4.866233265396577e-06, "loss": 0.7099, "step": 4973 }, { "epoch": 0.52, "grad_norm": 2.0935049485955495, "learning_rate": 4.8645296450301345e-06, "loss": 0.6684, "step": 4974 }, { "epoch": 0.52, "grad_norm": 2.1007971339772427, "learning_rate": 4.862826040402246e-06, "loss": 0.6992, "step": 4975 }, { "epoch": 0.52, "grad_norm": 0.9696344105471323, "learning_rate": 4.861122451710829e-06, "loss": 0.5749, "step": 4976 }, { "epoch": 0.52, "grad_norm": 2.553225906237905, "learning_rate": 4.859418879153805e-06, "loss": 0.7279, "step": 4977 }, { "epoch": 0.52, "grad_norm": 2.067442914319545, "learning_rate": 4.857715322929091e-06, "loss": 0.6576, "step": 4978 }, { "epoch": 0.52, "grad_norm": 2.149325406618078, "learning_rate": 4.856011783234599e-06, "loss": 0.7101, "step": 4979 }, { "epoch": 0.52, "grad_norm": 1.9706548407978104, "learning_rate": 4.85430826026824e-06, "loss": 0.6659, "step": 4980 }, { "epoch": 0.52, "grad_norm": 2.6696818093641395, "learning_rate": 4.852604754227927e-06, "loss": 0.6047, "step": 4981 }, { "epoch": 0.52, "grad_norm": 3.3802463897178785, "learning_rate": 4.8509012653115695e-06, "loss": 0.7413, "step": 4982 }, { "epoch": 0.52, "grad_norm": 2.1818945622881833, "learning_rate": 4.849197793717069e-06, "loss": 0.5983, "step": 4983 }, { "epoch": 0.52, "grad_norm": 2.14901122734723, "learning_rate": 4.847494339642334e-06, "loss": 0.7177, "step": 4984 }, { "epoch": 0.52, "grad_norm": 2.918569152656593, "learning_rate": 4.8457909032852654e-06, "loss": 0.6908, "step": 4985 }, { "epoch": 0.52, "grad_norm": 0.9406184537970192, "learning_rate": 4.844087484843764e-06, "loss": 0.6133, "step": 4986 }, { "epoch": 0.52, "grad_norm": 2.0469495720368256, "learning_rate": 4.84238408451573e-06, "loss": 0.6769, "step": 4987 }, { "epoch": 0.52, "grad_norm": 2.7726823392613116, "learning_rate": 4.840680702499056e-06, "loss": 0.6546, "step": 4988 }, { "epoch": 0.52, "grad_norm": 2.272990604413833, "learning_rate": 4.838977338991636e-06, "loss": 0.647, "step": 4989 }, { "epoch": 0.53, "grad_norm": 2.9330631724535494, "learning_rate": 4.837273994191364e-06, "loss": 0.6251, "step": 4990 }, { "epoch": 0.53, "grad_norm": 2.2631226425982933, "learning_rate": 4.835570668296131e-06, "loss": 0.676, "step": 4991 }, { "epoch": 0.53, "grad_norm": 2.13674093049108, "learning_rate": 4.83386736150382e-06, "loss": 0.6693, "step": 4992 }, { "epoch": 0.53, "grad_norm": 2.133284920671136, "learning_rate": 4.83216407401232e-06, "loss": 0.5922, "step": 4993 }, { "epoch": 0.53, "grad_norm": 2.520394477013768, "learning_rate": 4.830460806019512e-06, "loss": 0.6195, "step": 4994 }, { "epoch": 0.53, "grad_norm": 2.693745272656559, "learning_rate": 4.828757557723279e-06, "loss": 0.6932, "step": 4995 }, { "epoch": 0.53, "grad_norm": 2.4450291798500845, "learning_rate": 4.827054329321496e-06, "loss": 0.537, "step": 4996 }, { "epoch": 0.53, "grad_norm": 2.0458948423518097, "learning_rate": 4.825351121012042e-06, "loss": 0.6444, "step": 4997 }, { "epoch": 0.53, "grad_norm": 2.6712241240805357, "learning_rate": 4.823647932992788e-06, "loss": 0.6205, "step": 4998 }, { "epoch": 0.53, "grad_norm": 2.207121252476882, "learning_rate": 4.82194476546161e-06, "loss": 0.5541, "step": 4999 }, { "epoch": 0.53, "grad_norm": 2.289069632699829, "learning_rate": 4.820241618616375e-06, "loss": 0.544, "step": 5000 }, { "epoch": 0.53, "grad_norm": 2.2675898245567687, "learning_rate": 4.818538492654947e-06, "loss": 0.6852, "step": 5001 }, { "epoch": 0.53, "grad_norm": 2.343735840653348, "learning_rate": 4.816835387775193e-06, "loss": 0.6876, "step": 5002 }, { "epoch": 0.53, "grad_norm": 2.246807379369579, "learning_rate": 4.8151323041749734e-06, "loss": 0.6657, "step": 5003 }, { "epoch": 0.53, "grad_norm": 2.4237118515960447, "learning_rate": 4.8134292420521505e-06, "loss": 0.6224, "step": 5004 }, { "epoch": 0.53, "grad_norm": 2.289453411888955, "learning_rate": 4.811726201604576e-06, "loss": 0.655, "step": 5005 }, { "epoch": 0.53, "grad_norm": 2.0026507977297503, "learning_rate": 4.810023183030109e-06, "loss": 0.6169, "step": 5006 }, { "epoch": 0.53, "grad_norm": 3.0079499431943195, "learning_rate": 4.808320186526599e-06, "loss": 0.6452, "step": 5007 }, { "epoch": 0.53, "grad_norm": 2.2628812402524896, "learning_rate": 4.806617212291898e-06, "loss": 0.6522, "step": 5008 }, { "epoch": 0.53, "grad_norm": 2.2505910057014997, "learning_rate": 4.804914260523847e-06, "loss": 0.5728, "step": 5009 }, { "epoch": 0.53, "grad_norm": 2.2225142986972495, "learning_rate": 4.803211331420294e-06, "loss": 0.636, "step": 5010 }, { "epoch": 0.53, "grad_norm": 2.5157421199340506, "learning_rate": 4.801508425179079e-06, "loss": 0.6838, "step": 5011 }, { "epoch": 0.53, "grad_norm": 3.362812838563486, "learning_rate": 4.799805541998042e-06, "loss": 0.6514, "step": 5012 }, { "epoch": 0.53, "grad_norm": 2.460937036790075, "learning_rate": 4.79810268207502e-06, "loss": 0.63, "step": 5013 }, { "epoch": 0.53, "grad_norm": 3.3836688933373873, "learning_rate": 4.796399845607844e-06, "loss": 0.6927, "step": 5014 }, { "epoch": 0.53, "grad_norm": 2.1163838752069926, "learning_rate": 4.7946970327943435e-06, "loss": 0.6223, "step": 5015 }, { "epoch": 0.53, "grad_norm": 2.4368026381336314, "learning_rate": 4.79299424383235e-06, "loss": 0.5904, "step": 5016 }, { "epoch": 0.53, "grad_norm": 2.0227052397175225, "learning_rate": 4.791291478919688e-06, "loss": 0.6301, "step": 5017 }, { "epoch": 0.53, "grad_norm": 3.4149521326579464, "learning_rate": 4.789588738254176e-06, "loss": 0.6325, "step": 5018 }, { "epoch": 0.53, "grad_norm": 2.03492009399757, "learning_rate": 4.787886022033637e-06, "loss": 0.6345, "step": 5019 }, { "epoch": 0.53, "grad_norm": 2.045894564408748, "learning_rate": 4.786183330455886e-06, "loss": 0.6108, "step": 5020 }, { "epoch": 0.53, "grad_norm": 2.0734976435360384, "learning_rate": 4.784480663718742e-06, "loss": 0.5606, "step": 5021 }, { "epoch": 0.53, "grad_norm": 4.08406849984304, "learning_rate": 4.782778022020006e-06, "loss": 0.6312, "step": 5022 }, { "epoch": 0.53, "grad_norm": 2.8678914982078667, "learning_rate": 4.7810754055574945e-06, "loss": 0.6431, "step": 5023 }, { "epoch": 0.53, "grad_norm": 2.010459684826099, "learning_rate": 4.779372814529008e-06, "loss": 0.6737, "step": 5024 }, { "epoch": 0.53, "grad_norm": 2.274385411512584, "learning_rate": 4.7776702491323506e-06, "loss": 0.6473, "step": 5025 }, { "epoch": 0.53, "grad_norm": 2.4893768635552216, "learning_rate": 4.775967709565323e-06, "loss": 0.5868, "step": 5026 }, { "epoch": 0.53, "grad_norm": 2.8594743184322855, "learning_rate": 4.774265196025716e-06, "loss": 0.6205, "step": 5027 }, { "epoch": 0.53, "grad_norm": 3.0896156585418804, "learning_rate": 4.772562708711328e-06, "loss": 0.5436, "step": 5028 }, { "epoch": 0.53, "grad_norm": 2.1024636760857023, "learning_rate": 4.770860247819946e-06, "loss": 0.6145, "step": 5029 }, { "epoch": 0.53, "grad_norm": 2.7483433705268467, "learning_rate": 4.7691578135493595e-06, "loss": 0.6789, "step": 5030 }, { "epoch": 0.53, "grad_norm": 3.7267937561387456, "learning_rate": 4.76745540609735e-06, "loss": 0.651, "step": 5031 }, { "epoch": 0.53, "grad_norm": 0.930921651098154, "learning_rate": 4.765753025661699e-06, "loss": 0.5867, "step": 5032 }, { "epoch": 0.53, "grad_norm": 2.0301984146112435, "learning_rate": 4.764050672440184e-06, "loss": 0.633, "step": 5033 }, { "epoch": 0.53, "grad_norm": 3.3826775767799564, "learning_rate": 4.76234834663058e-06, "loss": 0.6399, "step": 5034 }, { "epoch": 0.53, "grad_norm": 2.648287363125826, "learning_rate": 4.76064604843066e-06, "loss": 0.6734, "step": 5035 }, { "epoch": 0.53, "grad_norm": 2.6257884700520076, "learning_rate": 4.758943778038189e-06, "loss": 0.5742, "step": 5036 }, { "epoch": 0.53, "grad_norm": 2.790385480712019, "learning_rate": 4.757241535650931e-06, "loss": 0.5854, "step": 5037 }, { "epoch": 0.53, "grad_norm": 2.3321216810115755, "learning_rate": 4.755539321466652e-06, "loss": 0.7305, "step": 5038 }, { "epoch": 0.53, "grad_norm": 2.470106783507069, "learning_rate": 4.753837135683108e-06, "loss": 0.7199, "step": 5039 }, { "epoch": 0.53, "grad_norm": 2.519684963476052, "learning_rate": 4.752134978498052e-06, "loss": 0.6208, "step": 5040 }, { "epoch": 0.53, "grad_norm": 1.9933322283576163, "learning_rate": 4.750432850109239e-06, "loss": 0.5654, "step": 5041 }, { "epoch": 0.53, "grad_norm": 2.1628910176613396, "learning_rate": 4.748730750714417e-06, "loss": 0.5597, "step": 5042 }, { "epoch": 0.53, "grad_norm": 3.2601189066847938, "learning_rate": 4.74702868051133e-06, "loss": 0.5868, "step": 5043 }, { "epoch": 0.53, "grad_norm": 2.4445761523300917, "learning_rate": 4.745326639697718e-06, "loss": 0.6006, "step": 5044 }, { "epoch": 0.53, "grad_norm": 2.157421144865131, "learning_rate": 4.743624628471322e-06, "loss": 0.5829, "step": 5045 }, { "epoch": 0.53, "grad_norm": 10.608165987972704, "learning_rate": 4.741922647029873e-06, "loss": 0.5994, "step": 5046 }, { "epoch": 0.53, "grad_norm": 2.6959202608616892, "learning_rate": 4.740220695571108e-06, "loss": 0.7195, "step": 5047 }, { "epoch": 0.53, "grad_norm": 2.374505085575942, "learning_rate": 4.738518774292752e-06, "loss": 0.5836, "step": 5048 }, { "epoch": 0.53, "grad_norm": 2.4217957922423077, "learning_rate": 4.736816883392527e-06, "loss": 0.5942, "step": 5049 }, { "epoch": 0.53, "grad_norm": 3.6160178947490085, "learning_rate": 4.735115023068155e-06, "loss": 0.6114, "step": 5050 }, { "epoch": 0.53, "grad_norm": 1.9430364282301278, "learning_rate": 4.733413193517355e-06, "loss": 0.6125, "step": 5051 }, { "epoch": 0.53, "grad_norm": 2.886628277780551, "learning_rate": 4.731711394937842e-06, "loss": 0.6733, "step": 5052 }, { "epoch": 0.53, "grad_norm": 2.7819669075459643, "learning_rate": 4.73000962752732e-06, "loss": 0.607, "step": 5053 }, { "epoch": 0.53, "grad_norm": 3.3962133589136707, "learning_rate": 4.7283078914835e-06, "loss": 0.6421, "step": 5054 }, { "epoch": 0.53, "grad_norm": 2.8324268366863623, "learning_rate": 4.726606187004084e-06, "loss": 0.6786, "step": 5055 }, { "epoch": 0.53, "grad_norm": 3.009696361121063, "learning_rate": 4.724904514286773e-06, "loss": 0.6692, "step": 5056 }, { "epoch": 0.53, "grad_norm": 2.6870058183715755, "learning_rate": 4.723202873529256e-06, "loss": 0.6067, "step": 5057 }, { "epoch": 0.53, "grad_norm": 2.358795360202302, "learning_rate": 4.721501264929232e-06, "loss": 0.6416, "step": 5058 }, { "epoch": 0.53, "grad_norm": 2.2459402545172984, "learning_rate": 4.719799688684385e-06, "loss": 0.7759, "step": 5059 }, { "epoch": 0.53, "grad_norm": 2.1705987722229483, "learning_rate": 4.7180981449924006e-06, "loss": 0.5818, "step": 5060 }, { "epoch": 0.53, "grad_norm": 2.764535131155558, "learning_rate": 4.716396634050959e-06, "loss": 0.6444, "step": 5061 }, { "epoch": 0.53, "grad_norm": 2.7065511765293717, "learning_rate": 4.714695156057737e-06, "loss": 0.7239, "step": 5062 }, { "epoch": 0.53, "grad_norm": 2.614545604700766, "learning_rate": 4.712993711210405e-06, "loss": 0.5962, "step": 5063 }, { "epoch": 0.53, "grad_norm": 3.2589815632805568, "learning_rate": 4.711292299706636e-06, "loss": 0.6851, "step": 5064 }, { "epoch": 0.53, "grad_norm": 2.695743902716769, "learning_rate": 4.709590921744093e-06, "loss": 0.5757, "step": 5065 }, { "epoch": 0.53, "grad_norm": 3.0665293362792982, "learning_rate": 4.707889577520436e-06, "loss": 0.6153, "step": 5066 }, { "epoch": 0.53, "grad_norm": 2.6924889301708195, "learning_rate": 4.706188267233324e-06, "loss": 0.6596, "step": 5067 }, { "epoch": 0.53, "grad_norm": 2.3595683992153256, "learning_rate": 4.704486991080409e-06, "loss": 0.6294, "step": 5068 }, { "epoch": 0.53, "grad_norm": 2.669021302249065, "learning_rate": 4.7027857492593445e-06, "loss": 0.6792, "step": 5069 }, { "epoch": 0.53, "grad_norm": 1.0161808520148305, "learning_rate": 4.701084541967769e-06, "loss": 0.5579, "step": 5070 }, { "epoch": 0.53, "grad_norm": 2.2988515242704346, "learning_rate": 4.699383369403329e-06, "loss": 0.6003, "step": 5071 }, { "epoch": 0.53, "grad_norm": 2.252022330411591, "learning_rate": 4.697682231763658e-06, "loss": 0.6515, "step": 5072 }, { "epoch": 0.53, "grad_norm": 2.0233341986752094, "learning_rate": 4.695981129246393e-06, "loss": 0.5974, "step": 5073 }, { "epoch": 0.53, "grad_norm": 2.2983932146999533, "learning_rate": 4.694280062049163e-06, "loss": 0.5946, "step": 5074 }, { "epoch": 0.53, "grad_norm": 3.894694889398997, "learning_rate": 4.6925790303695886e-06, "loss": 0.5984, "step": 5075 }, { "epoch": 0.53, "grad_norm": 2.374417964315601, "learning_rate": 4.690878034405296e-06, "loss": 0.5768, "step": 5076 }, { "epoch": 0.53, "grad_norm": 2.310052580071245, "learning_rate": 4.689177074353899e-06, "loss": 0.559, "step": 5077 }, { "epoch": 0.53, "grad_norm": 2.4835325819520286, "learning_rate": 4.687476150413012e-06, "loss": 0.6051, "step": 5078 }, { "epoch": 0.53, "grad_norm": 2.57984204212142, "learning_rate": 4.6857752627802405e-06, "loss": 0.5899, "step": 5079 }, { "epoch": 0.53, "grad_norm": 2.6143636376389408, "learning_rate": 4.684074411653192e-06, "loss": 0.6276, "step": 5080 }, { "epoch": 0.53, "grad_norm": 2.065161043806273, "learning_rate": 4.682373597229464e-06, "loss": 0.6279, "step": 5081 }, { "epoch": 0.53, "grad_norm": 4.501763492850309, "learning_rate": 4.680672819706654e-06, "loss": 0.681, "step": 5082 }, { "epoch": 0.53, "grad_norm": 2.563624459964035, "learning_rate": 4.678972079282354e-06, "loss": 0.64, "step": 5083 }, { "epoch": 0.53, "grad_norm": 2.0259420193680757, "learning_rate": 4.677271376154149e-06, "loss": 0.7082, "step": 5084 }, { "epoch": 0.54, "grad_norm": 3.20008658827479, "learning_rate": 4.6755707105196204e-06, "loss": 0.7601, "step": 5085 }, { "epoch": 0.54, "grad_norm": 2.5221950538299738, "learning_rate": 4.673870082576351e-06, "loss": 0.5972, "step": 5086 }, { "epoch": 0.54, "grad_norm": 2.312808929084742, "learning_rate": 4.672169492521914e-06, "loss": 0.6986, "step": 5087 }, { "epoch": 0.54, "grad_norm": 3.5730283833471397, "learning_rate": 4.670468940553875e-06, "loss": 0.6108, "step": 5088 }, { "epoch": 0.54, "grad_norm": 5.6696729783798485, "learning_rate": 4.6687684268698034e-06, "loss": 0.6344, "step": 5089 }, { "epoch": 0.54, "grad_norm": 3.465514843046939, "learning_rate": 4.667067951667256e-06, "loss": 0.622, "step": 5090 }, { "epoch": 0.54, "grad_norm": 1.13181319546967, "learning_rate": 4.665367515143797e-06, "loss": 0.5712, "step": 5091 }, { "epoch": 0.54, "grad_norm": 2.425783344218305, "learning_rate": 4.663667117496968e-06, "loss": 0.6551, "step": 5092 }, { "epoch": 0.54, "grad_norm": 2.5727074812927193, "learning_rate": 4.6619667589243225e-06, "loss": 0.6365, "step": 5093 }, { "epoch": 0.54, "grad_norm": 2.3970092435076134, "learning_rate": 4.6602664396234e-06, "loss": 0.5804, "step": 5094 }, { "epoch": 0.54, "grad_norm": 2.186207872425851, "learning_rate": 4.658566159791742e-06, "loss": 0.6546, "step": 5095 }, { "epoch": 0.54, "grad_norm": 2.655332565894822, "learning_rate": 4.656865919626883e-06, "loss": 0.5582, "step": 5096 }, { "epoch": 0.54, "grad_norm": 3.041070063381227, "learning_rate": 4.655165719326347e-06, "loss": 0.7031, "step": 5097 }, { "epoch": 0.54, "grad_norm": 1.9879284762088225, "learning_rate": 4.653465559087661e-06, "loss": 0.6188, "step": 5098 }, { "epoch": 0.54, "grad_norm": 2.362708201617922, "learning_rate": 4.651765439108344e-06, "loss": 0.5938, "step": 5099 }, { "epoch": 0.54, "grad_norm": 2.3036918192067968, "learning_rate": 4.650065359585914e-06, "loss": 0.6185, "step": 5100 }, { "epoch": 0.54, "grad_norm": 3.862595099667224, "learning_rate": 4.648365320717876e-06, "loss": 0.6754, "step": 5101 }, { "epoch": 0.54, "grad_norm": 2.3230616471795256, "learning_rate": 4.64666532270174e-06, "loss": 0.6647, "step": 5102 }, { "epoch": 0.54, "grad_norm": 2.0507051961729577, "learning_rate": 4.644965365735004e-06, "loss": 0.5953, "step": 5103 }, { "epoch": 0.54, "grad_norm": 2.422141046773203, "learning_rate": 4.643265450015169e-06, "loss": 0.7227, "step": 5104 }, { "epoch": 0.54, "grad_norm": 2.2097746173914743, "learning_rate": 4.6415655757397206e-06, "loss": 0.6461, "step": 5105 }, { "epoch": 0.54, "grad_norm": 2.0454571953490075, "learning_rate": 4.639865743106148e-06, "loss": 0.5875, "step": 5106 }, { "epoch": 0.54, "grad_norm": 3.145981246348931, "learning_rate": 4.63816595231193e-06, "loss": 0.6611, "step": 5107 }, { "epoch": 0.54, "grad_norm": 1.9222457727504096, "learning_rate": 4.636466203554548e-06, "loss": 0.5957, "step": 5108 }, { "epoch": 0.54, "grad_norm": 2.509150332110932, "learning_rate": 4.634766497031472e-06, "loss": 0.6091, "step": 5109 }, { "epoch": 0.54, "grad_norm": 2.8618331161141985, "learning_rate": 4.633066832940167e-06, "loss": 0.6478, "step": 5110 }, { "epoch": 0.54, "grad_norm": 2.3088562094725518, "learning_rate": 4.631367211478098e-06, "loss": 0.5967, "step": 5111 }, { "epoch": 0.54, "grad_norm": 2.2252841546630666, "learning_rate": 4.62966763284272e-06, "loss": 0.703, "step": 5112 }, { "epoch": 0.54, "grad_norm": 0.9848922788689293, "learning_rate": 4.6279680972314875e-06, "loss": 0.5806, "step": 5113 }, { "epoch": 0.54, "grad_norm": 2.421707800583379, "learning_rate": 4.626268604841844e-06, "loss": 0.655, "step": 5114 }, { "epoch": 0.54, "grad_norm": 3.060075796638142, "learning_rate": 4.624569155871235e-06, "loss": 0.6389, "step": 5115 }, { "epoch": 0.54, "grad_norm": 2.708021895892409, "learning_rate": 4.622869750517094e-06, "loss": 0.7085, "step": 5116 }, { "epoch": 0.54, "grad_norm": 2.198154906455453, "learning_rate": 4.621170388976858e-06, "loss": 0.5702, "step": 5117 }, { "epoch": 0.54, "grad_norm": 2.074916489895744, "learning_rate": 4.619471071447949e-06, "loss": 0.514, "step": 5118 }, { "epoch": 0.54, "grad_norm": 2.89007197368037, "learning_rate": 4.617771798127792e-06, "loss": 0.6663, "step": 5119 }, { "epoch": 0.54, "grad_norm": 2.186100478989604, "learning_rate": 4.616072569213802e-06, "loss": 0.5598, "step": 5120 }, { "epoch": 0.54, "grad_norm": 4.737181403903594, "learning_rate": 4.614373384903391e-06, "loss": 0.6478, "step": 5121 }, { "epoch": 0.54, "grad_norm": 2.744234393991175, "learning_rate": 4.612674245393967e-06, "loss": 0.6454, "step": 5122 }, { "epoch": 0.54, "grad_norm": 2.4651854631448478, "learning_rate": 4.610975150882928e-06, "loss": 0.6289, "step": 5123 }, { "epoch": 0.54, "grad_norm": 5.106591381662164, "learning_rate": 4.609276101567672e-06, "loss": 0.6305, "step": 5124 }, { "epoch": 0.54, "grad_norm": 2.8645757518422106, "learning_rate": 4.607577097645587e-06, "loss": 0.5743, "step": 5125 }, { "epoch": 0.54, "grad_norm": 2.8329814893534113, "learning_rate": 4.605878139314065e-06, "loss": 0.6463, "step": 5126 }, { "epoch": 0.54, "grad_norm": 2.3889289397117075, "learning_rate": 4.604179226770478e-06, "loss": 0.7184, "step": 5127 }, { "epoch": 0.54, "grad_norm": 2.124562346527691, "learning_rate": 4.602480360212205e-06, "loss": 0.6488, "step": 5128 }, { "epoch": 0.54, "grad_norm": 2.7833987833164624, "learning_rate": 4.600781539836614e-06, "loss": 0.6184, "step": 5129 }, { "epoch": 0.54, "grad_norm": 2.554887684552683, "learning_rate": 4.5990827658410705e-06, "loss": 0.6443, "step": 5130 }, { "epoch": 0.54, "grad_norm": 2.83397128521315, "learning_rate": 4.597384038422933e-06, "loss": 0.6566, "step": 5131 }, { "epoch": 0.54, "grad_norm": 2.322027615656651, "learning_rate": 4.595685357779553e-06, "loss": 0.6787, "step": 5132 }, { "epoch": 0.54, "grad_norm": 4.634011797281909, "learning_rate": 4.593986724108279e-06, "loss": 0.5787, "step": 5133 }, { "epoch": 0.54, "grad_norm": 1.0666278509393818, "learning_rate": 4.592288137606454e-06, "loss": 0.5666, "step": 5134 }, { "epoch": 0.54, "grad_norm": 3.207516428553423, "learning_rate": 4.590589598471416e-06, "loss": 0.6368, "step": 5135 }, { "epoch": 0.54, "grad_norm": 3.49123455675996, "learning_rate": 4.588891106900493e-06, "loss": 0.7018, "step": 5136 }, { "epoch": 0.54, "grad_norm": 0.9847912096139533, "learning_rate": 4.587192663091014e-06, "loss": 0.5966, "step": 5137 }, { "epoch": 0.54, "grad_norm": 3.22624018717781, "learning_rate": 4.5854942672402965e-06, "loss": 0.582, "step": 5138 }, { "epoch": 0.54, "grad_norm": 1.9077695725913995, "learning_rate": 4.5837959195456605e-06, "loss": 0.6262, "step": 5139 }, { "epoch": 0.54, "grad_norm": 2.5077328392848472, "learning_rate": 4.5820976202044085e-06, "loss": 0.678, "step": 5140 }, { "epoch": 0.54, "grad_norm": 2.107242753594195, "learning_rate": 4.580399369413847e-06, "loss": 0.5716, "step": 5141 }, { "epoch": 0.54, "grad_norm": 2.2886530499565723, "learning_rate": 4.578701167371274e-06, "loss": 0.6729, "step": 5142 }, { "epoch": 0.54, "grad_norm": 1.806852048189923, "learning_rate": 4.577003014273981e-06, "loss": 0.6226, "step": 5143 }, { "epoch": 0.54, "grad_norm": 2.0468082920924573, "learning_rate": 4.575304910319257e-06, "loss": 0.7215, "step": 5144 }, { "epoch": 0.54, "grad_norm": 2.3451100058055863, "learning_rate": 4.573606855704379e-06, "loss": 0.5165, "step": 5145 }, { "epoch": 0.54, "grad_norm": 2.122583359383461, "learning_rate": 4.571908850626625e-06, "loss": 0.558, "step": 5146 }, { "epoch": 0.54, "grad_norm": 2.1021680512947545, "learning_rate": 4.570210895283262e-06, "loss": 0.5882, "step": 5147 }, { "epoch": 0.54, "grad_norm": 2.613254013725175, "learning_rate": 4.568512989871557e-06, "loss": 0.627, "step": 5148 }, { "epoch": 0.54, "grad_norm": 1.063740812597954, "learning_rate": 4.566815134588763e-06, "loss": 0.5857, "step": 5149 }, { "epoch": 0.54, "grad_norm": 2.3292145678287235, "learning_rate": 4.565117329632137e-06, "loss": 0.5244, "step": 5150 }, { "epoch": 0.54, "grad_norm": 2.2621172262271285, "learning_rate": 4.5634195751989195e-06, "loss": 0.6286, "step": 5151 }, { "epoch": 0.54, "grad_norm": 2.305527091957668, "learning_rate": 4.561721871486357e-06, "loss": 0.6073, "step": 5152 }, { "epoch": 0.54, "grad_norm": 2.664097973486397, "learning_rate": 4.5600242186916786e-06, "loss": 0.7036, "step": 5153 }, { "epoch": 0.54, "grad_norm": 3.907247212421276, "learning_rate": 4.5583266170121155e-06, "loss": 0.6086, "step": 5154 }, { "epoch": 0.54, "grad_norm": 3.356425324401827, "learning_rate": 4.556629066644888e-06, "loss": 0.5769, "step": 5155 }, { "epoch": 0.54, "grad_norm": 3.328669215805127, "learning_rate": 4.554931567787214e-06, "loss": 0.6026, "step": 5156 }, { "epoch": 0.54, "grad_norm": 2.2570183445717817, "learning_rate": 4.553234120636306e-06, "loss": 0.6124, "step": 5157 }, { "epoch": 0.54, "grad_norm": 3.081027926632191, "learning_rate": 4.551536725389364e-06, "loss": 0.6321, "step": 5158 }, { "epoch": 0.54, "grad_norm": 2.273359671518665, "learning_rate": 4.54983938224359e-06, "loss": 0.6413, "step": 5159 }, { "epoch": 0.54, "grad_norm": 2.9426125776938763, "learning_rate": 4.5481420913961734e-06, "loss": 0.6623, "step": 5160 }, { "epoch": 0.54, "grad_norm": 2.272805905164049, "learning_rate": 4.546444853044308e-06, "loss": 0.6284, "step": 5161 }, { "epoch": 0.54, "grad_norm": 4.032007123280218, "learning_rate": 4.544747667385163e-06, "loss": 0.671, "step": 5162 }, { "epoch": 0.54, "grad_norm": 2.705544234308645, "learning_rate": 4.543050534615919e-06, "loss": 0.6616, "step": 5163 }, { "epoch": 0.54, "grad_norm": 2.3270348164118886, "learning_rate": 4.541353454933743e-06, "loss": 0.6169, "step": 5164 }, { "epoch": 0.54, "grad_norm": 3.1781706730214743, "learning_rate": 4.539656428535799e-06, "loss": 0.6087, "step": 5165 }, { "epoch": 0.54, "grad_norm": 2.157315390560079, "learning_rate": 4.537959455619238e-06, "loss": 0.5943, "step": 5166 }, { "epoch": 0.54, "grad_norm": 2.909391157704517, "learning_rate": 4.536262536381213e-06, "loss": 0.6192, "step": 5167 }, { "epoch": 0.54, "grad_norm": 2.3877838658782027, "learning_rate": 4.5345656710188645e-06, "loss": 0.6268, "step": 5168 }, { "epoch": 0.54, "grad_norm": 2.8840280487338967, "learning_rate": 4.532868859729333e-06, "loss": 0.6004, "step": 5169 }, { "epoch": 0.54, "grad_norm": 2.113177902066499, "learning_rate": 4.531172102709746e-06, "loss": 0.6332, "step": 5170 }, { "epoch": 0.54, "grad_norm": 2.0427183759210084, "learning_rate": 4.529475400157228e-06, "loss": 0.6404, "step": 5171 }, { "epoch": 0.54, "grad_norm": 2.552191146094726, "learning_rate": 4.527778752268899e-06, "loss": 0.6292, "step": 5172 }, { "epoch": 0.54, "grad_norm": 2.3830504963095254, "learning_rate": 4.5260821592418685e-06, "loss": 0.6213, "step": 5173 }, { "epoch": 0.54, "grad_norm": 5.240778096163022, "learning_rate": 4.5243856212732466e-06, "loss": 0.5673, "step": 5174 }, { "epoch": 0.54, "grad_norm": 1.0053178425031761, "learning_rate": 4.5226891385601235e-06, "loss": 0.5822, "step": 5175 }, { "epoch": 0.54, "grad_norm": 2.4335630650323252, "learning_rate": 4.520992711299599e-06, "loss": 0.6692, "step": 5176 }, { "epoch": 0.54, "grad_norm": 2.058047240249313, "learning_rate": 4.519296339688754e-06, "loss": 0.5852, "step": 5177 }, { "epoch": 0.54, "grad_norm": 3.063974457211975, "learning_rate": 4.517600023924673e-06, "loss": 0.6141, "step": 5178 }, { "epoch": 0.54, "grad_norm": 2.1386266960462503, "learning_rate": 4.515903764204428e-06, "loss": 0.6138, "step": 5179 }, { "epoch": 0.55, "grad_norm": 2.5711607873760527, "learning_rate": 4.514207560725082e-06, "loss": 0.6987, "step": 5180 }, { "epoch": 0.55, "grad_norm": 1.979892923073177, "learning_rate": 4.512511413683698e-06, "loss": 0.6426, "step": 5181 }, { "epoch": 0.55, "grad_norm": 2.1211499569858487, "learning_rate": 4.510815323277329e-06, "loss": 0.6801, "step": 5182 }, { "epoch": 0.55, "grad_norm": 2.415322447097737, "learning_rate": 4.509119289703023e-06, "loss": 0.5961, "step": 5183 }, { "epoch": 0.55, "grad_norm": 2.4841757246588174, "learning_rate": 4.507423313157815e-06, "loss": 0.6813, "step": 5184 }, { "epoch": 0.55, "grad_norm": 2.409140496949704, "learning_rate": 4.505727393838746e-06, "loss": 0.6362, "step": 5185 }, { "epoch": 0.55, "grad_norm": 2.3420213528636924, "learning_rate": 4.504031531942837e-06, "loss": 0.6293, "step": 5186 }, { "epoch": 0.55, "grad_norm": 2.454227338659623, "learning_rate": 4.502335727667114e-06, "loss": 0.6149, "step": 5187 }, { "epoch": 0.55, "grad_norm": 2.7418078304273394, "learning_rate": 4.500639981208586e-06, "loss": 0.6847, "step": 5188 }, { "epoch": 0.55, "grad_norm": 1.9687799092010179, "learning_rate": 4.498944292764261e-06, "loss": 0.6455, "step": 5189 }, { "epoch": 0.55, "grad_norm": 2.3321151600204844, "learning_rate": 4.497248662531139e-06, "loss": 0.605, "step": 5190 }, { "epoch": 0.55, "grad_norm": 3.4606611035859682, "learning_rate": 4.495553090706216e-06, "loss": 0.6776, "step": 5191 }, { "epoch": 0.55, "grad_norm": 2.3731291130732086, "learning_rate": 4.493857577486477e-06, "loss": 0.6971, "step": 5192 }, { "epoch": 0.55, "grad_norm": 2.041114475320568, "learning_rate": 4.492162123068899e-06, "loss": 0.5753, "step": 5193 }, { "epoch": 0.55, "grad_norm": 2.5283267101737827, "learning_rate": 4.49046672765046e-06, "loss": 0.6632, "step": 5194 }, { "epoch": 0.55, "grad_norm": 2.535327625572618, "learning_rate": 4.488771391428122e-06, "loss": 0.7297, "step": 5195 }, { "epoch": 0.55, "grad_norm": 2.3383531443300956, "learning_rate": 4.487076114598848e-06, "loss": 0.5403, "step": 5196 }, { "epoch": 0.55, "grad_norm": 2.1712529025180665, "learning_rate": 4.485380897359587e-06, "loss": 0.658, "step": 5197 }, { "epoch": 0.55, "grad_norm": 2.2880236706230876, "learning_rate": 4.483685739907285e-06, "loss": 0.6671, "step": 5198 }, { "epoch": 0.55, "grad_norm": 2.923715631742324, "learning_rate": 4.481990642438881e-06, "loss": 0.6322, "step": 5199 }, { "epoch": 0.55, "grad_norm": 2.5156565772915966, "learning_rate": 4.480295605151308e-06, "loss": 0.779, "step": 5200 }, { "epoch": 0.55, "grad_norm": 2.279437155096648, "learning_rate": 4.47860062824149e-06, "loss": 0.5941, "step": 5201 }, { "epoch": 0.55, "grad_norm": 2.8210671148826783, "learning_rate": 4.4769057119063425e-06, "loss": 0.6129, "step": 5202 }, { "epoch": 0.55, "grad_norm": 2.1034516568430606, "learning_rate": 4.475210856342777e-06, "loss": 0.655, "step": 5203 }, { "epoch": 0.55, "grad_norm": 2.1311538275721205, "learning_rate": 4.473516061747697e-06, "loss": 0.537, "step": 5204 }, { "epoch": 0.55, "grad_norm": 0.9788330045930997, "learning_rate": 4.471821328318001e-06, "loss": 0.5896, "step": 5205 }, { "epoch": 0.55, "grad_norm": 3.8737593347024135, "learning_rate": 4.470126656250574e-06, "loss": 0.5861, "step": 5206 }, { "epoch": 0.55, "grad_norm": 2.5511717033127996, "learning_rate": 4.468432045742301e-06, "loss": 0.5941, "step": 5207 }, { "epoch": 0.55, "grad_norm": 2.9074504611235072, "learning_rate": 4.466737496990057e-06, "loss": 0.6367, "step": 5208 }, { "epoch": 0.55, "grad_norm": 3.069907121072369, "learning_rate": 4.46504301019071e-06, "loss": 0.6646, "step": 5209 }, { "epoch": 0.55, "grad_norm": 2.6033731154873, "learning_rate": 4.463348585541117e-06, "loss": 0.6088, "step": 5210 }, { "epoch": 0.55, "grad_norm": 2.188979272302636, "learning_rate": 4.461654223238136e-06, "loss": 0.5968, "step": 5211 }, { "epoch": 0.55, "grad_norm": 2.743951094727751, "learning_rate": 4.459959923478609e-06, "loss": 0.6687, "step": 5212 }, { "epoch": 0.55, "grad_norm": 2.379573949144014, "learning_rate": 4.45826568645938e-06, "loss": 0.6304, "step": 5213 }, { "epoch": 0.55, "grad_norm": 2.454626913642241, "learning_rate": 4.456571512377277e-06, "loss": 0.601, "step": 5214 }, { "epoch": 0.55, "grad_norm": 2.822548767237185, "learning_rate": 4.454877401429123e-06, "loss": 0.5249, "step": 5215 }, { "epoch": 0.55, "grad_norm": 2.6657172122549992, "learning_rate": 4.453183353811737e-06, "loss": 0.5839, "step": 5216 }, { "epoch": 0.55, "grad_norm": 1.3013033806129486, "learning_rate": 4.45148936972193e-06, "loss": 0.5733, "step": 5217 }, { "epoch": 0.55, "grad_norm": 2.2066422534371846, "learning_rate": 4.449795449356502e-06, "loss": 0.6463, "step": 5218 }, { "epoch": 0.55, "grad_norm": 2.360497161476003, "learning_rate": 4.4481015929122465e-06, "loss": 0.6128, "step": 5219 }, { "epoch": 0.55, "grad_norm": 2.5466902571725396, "learning_rate": 4.446407800585954e-06, "loss": 0.6788, "step": 5220 }, { "epoch": 0.55, "grad_norm": 7.202579197824117, "learning_rate": 4.444714072574401e-06, "loss": 0.6787, "step": 5221 }, { "epoch": 0.55, "grad_norm": 3.010123101753715, "learning_rate": 4.443020409074365e-06, "loss": 0.7269, "step": 5222 }, { "epoch": 0.55, "grad_norm": 2.8939895157613513, "learning_rate": 4.441326810282606e-06, "loss": 0.7057, "step": 5223 }, { "epoch": 0.55, "grad_norm": 2.7291258805819063, "learning_rate": 4.4396332763958835e-06, "loss": 0.6379, "step": 5224 }, { "epoch": 0.55, "grad_norm": 2.8152570916653485, "learning_rate": 4.437939807610947e-06, "loss": 0.6389, "step": 5225 }, { "epoch": 0.55, "grad_norm": 4.863829933630771, "learning_rate": 4.436246404124539e-06, "loss": 0.592, "step": 5226 }, { "epoch": 0.55, "grad_norm": 1.9228368391092252, "learning_rate": 4.4345530661333955e-06, "loss": 0.6296, "step": 5227 }, { "epoch": 0.55, "grad_norm": 2.4158924275798084, "learning_rate": 4.432859793834239e-06, "loss": 0.582, "step": 5228 }, { "epoch": 0.55, "grad_norm": 2.572611497694934, "learning_rate": 4.431166587423794e-06, "loss": 0.6169, "step": 5229 }, { "epoch": 0.55, "grad_norm": 2.268729295515817, "learning_rate": 4.42947344709877e-06, "loss": 0.6666, "step": 5230 }, { "epoch": 0.55, "grad_norm": 2.0952795027263966, "learning_rate": 4.4277803730558746e-06, "loss": 0.6486, "step": 5231 }, { "epoch": 0.55, "grad_norm": 1.0968523158920367, "learning_rate": 4.426087365491798e-06, "loss": 0.5293, "step": 5232 }, { "epoch": 0.55, "grad_norm": 2.088438078890414, "learning_rate": 4.424394424603234e-06, "loss": 0.5566, "step": 5233 }, { "epoch": 0.55, "grad_norm": 2.1632948290431497, "learning_rate": 4.42270155058686e-06, "loss": 0.6262, "step": 5234 }, { "epoch": 0.55, "grad_norm": 0.9284998520052347, "learning_rate": 4.421008743639353e-06, "loss": 0.5875, "step": 5235 }, { "epoch": 0.55, "grad_norm": 2.0523984063768803, "learning_rate": 4.419316003957376e-06, "loss": 0.7087, "step": 5236 }, { "epoch": 0.55, "grad_norm": 2.443819742443756, "learning_rate": 4.417623331737587e-06, "loss": 0.6464, "step": 5237 }, { "epoch": 0.55, "grad_norm": 2.0896384959029604, "learning_rate": 4.415930727176634e-06, "loss": 0.5857, "step": 5238 }, { "epoch": 0.55, "grad_norm": 2.230344099608812, "learning_rate": 4.414238190471163e-06, "loss": 0.6313, "step": 5239 }, { "epoch": 0.55, "grad_norm": 3.3364275587007395, "learning_rate": 4.412545721817806e-06, "loss": 0.7127, "step": 5240 }, { "epoch": 0.55, "grad_norm": 1.9806871576287524, "learning_rate": 4.410853321413187e-06, "loss": 0.6476, "step": 5241 }, { "epoch": 0.55, "grad_norm": 2.5890707975835805, "learning_rate": 4.409160989453927e-06, "loss": 0.6181, "step": 5242 }, { "epoch": 0.55, "grad_norm": 3.0789313597402534, "learning_rate": 4.407468726136634e-06, "loss": 0.637, "step": 5243 }, { "epoch": 0.55, "grad_norm": 2.1476761639567976, "learning_rate": 4.405776531657916e-06, "loss": 0.6924, "step": 5244 }, { "epoch": 0.55, "grad_norm": 3.4858241564055628, "learning_rate": 4.404084406214358e-06, "loss": 0.7234, "step": 5245 }, { "epoch": 0.55, "grad_norm": 2.3970949139274835, "learning_rate": 4.402392350002554e-06, "loss": 0.5235, "step": 5246 }, { "epoch": 0.55, "grad_norm": 2.235687197461085, "learning_rate": 4.400700363219076e-06, "loss": 0.6731, "step": 5247 }, { "epoch": 0.55, "grad_norm": 8.656344736133104, "learning_rate": 4.399008446060501e-06, "loss": 0.6499, "step": 5248 }, { "epoch": 0.55, "grad_norm": 3.0639313280738216, "learning_rate": 4.397316598723385e-06, "loss": 0.5468, "step": 5249 }, { "epoch": 0.55, "grad_norm": 2.0867057122263857, "learning_rate": 4.3956248214042855e-06, "loss": 0.6578, "step": 5250 }, { "epoch": 0.55, "grad_norm": 2.599040114763554, "learning_rate": 4.393933114299746e-06, "loss": 0.6618, "step": 5251 }, { "epoch": 0.55, "grad_norm": 2.6001557142240945, "learning_rate": 4.3922414776063075e-06, "loss": 0.6799, "step": 5252 }, { "epoch": 0.55, "grad_norm": 2.1507305611894223, "learning_rate": 4.390549911520497e-06, "loss": 0.663, "step": 5253 }, { "epoch": 0.55, "grad_norm": 2.7080296933337946, "learning_rate": 4.388858416238834e-06, "loss": 0.5737, "step": 5254 }, { "epoch": 0.55, "grad_norm": 5.185440108618935, "learning_rate": 4.3871669919578345e-06, "loss": 0.6253, "step": 5255 }, { "epoch": 0.55, "grad_norm": 2.41165829284016, "learning_rate": 4.385475638874001e-06, "loss": 0.6524, "step": 5256 }, { "epoch": 0.55, "grad_norm": 2.0980213739464113, "learning_rate": 4.383784357183835e-06, "loss": 0.5656, "step": 5257 }, { "epoch": 0.55, "grad_norm": 6.656438895282767, "learning_rate": 4.382093147083819e-06, "loss": 0.6449, "step": 5258 }, { "epoch": 0.55, "grad_norm": 2.4496335492831403, "learning_rate": 4.380402008770435e-06, "loss": 0.6254, "step": 5259 }, { "epoch": 0.55, "grad_norm": 2.959663295531009, "learning_rate": 4.378710942440153e-06, "loss": 0.6069, "step": 5260 }, { "epoch": 0.55, "grad_norm": 2.7471051867065492, "learning_rate": 4.377019948289441e-06, "loss": 0.6661, "step": 5261 }, { "epoch": 0.55, "grad_norm": 3.8039135221418805, "learning_rate": 4.375329026514749e-06, "loss": 0.6398, "step": 5262 }, { "epoch": 0.55, "grad_norm": 3.677966696492936, "learning_rate": 4.373638177312524e-06, "loss": 0.5647, "step": 5263 }, { "epoch": 0.55, "grad_norm": 2.6177835119924455, "learning_rate": 4.371947400879205e-06, "loss": 0.6016, "step": 5264 }, { "epoch": 0.55, "grad_norm": 2.233055028774205, "learning_rate": 4.370256697411221e-06, "loss": 0.5827, "step": 5265 }, { "epoch": 0.55, "grad_norm": 2.2362734819525723, "learning_rate": 4.368566067104998e-06, "loss": 0.6496, "step": 5266 }, { "epoch": 0.55, "grad_norm": 2.211364135754513, "learning_rate": 4.366875510156939e-06, "loss": 0.557, "step": 5267 }, { "epoch": 0.55, "grad_norm": 2.8665096255402696, "learning_rate": 4.365185026763455e-06, "loss": 0.5965, "step": 5268 }, { "epoch": 0.55, "grad_norm": 3.0564195544942065, "learning_rate": 4.363494617120938e-06, "loss": 0.7145, "step": 5269 }, { "epoch": 0.55, "grad_norm": 2.64369421900083, "learning_rate": 4.361804281425779e-06, "loss": 0.725, "step": 5270 }, { "epoch": 0.55, "grad_norm": 3.12214434238347, "learning_rate": 4.360114019874353e-06, "loss": 0.6916, "step": 5271 }, { "epoch": 0.55, "grad_norm": 6.609092742818454, "learning_rate": 4.35842383266303e-06, "loss": 0.6325, "step": 5272 }, { "epoch": 0.55, "grad_norm": 2.300634353062687, "learning_rate": 4.35673371998817e-06, "loss": 0.6284, "step": 5273 }, { "epoch": 0.55, "grad_norm": 3.7838481612576356, "learning_rate": 4.355043682046129e-06, "loss": 0.6038, "step": 5274 }, { "epoch": 0.56, "grad_norm": 2.7986771832900788, "learning_rate": 4.353353719033249e-06, "loss": 0.654, "step": 5275 }, { "epoch": 0.56, "grad_norm": 4.87051694324319, "learning_rate": 4.3516638311458624e-06, "loss": 0.5146, "step": 5276 }, { "epoch": 0.56, "grad_norm": 2.349730181771263, "learning_rate": 4.349974018580298e-06, "loss": 0.6656, "step": 5277 }, { "epoch": 0.56, "grad_norm": 2.335395790852798, "learning_rate": 4.348284281532874e-06, "loss": 0.5557, "step": 5278 }, { "epoch": 0.56, "grad_norm": 2.7325757799709605, "learning_rate": 4.3465946201999e-06, "loss": 0.7216, "step": 5279 }, { "epoch": 0.56, "grad_norm": 2.104588371726466, "learning_rate": 4.344905034777672e-06, "loss": 0.6013, "step": 5280 }, { "epoch": 0.56, "grad_norm": 2.3109494580443304, "learning_rate": 4.343215525462484e-06, "loss": 0.6356, "step": 5281 }, { "epoch": 0.56, "grad_norm": 3.017653085191442, "learning_rate": 4.3415260924506165e-06, "loss": 0.6454, "step": 5282 }, { "epoch": 0.56, "grad_norm": 2.571020375717761, "learning_rate": 4.339836735938347e-06, "loss": 0.5784, "step": 5283 }, { "epoch": 0.56, "grad_norm": 2.4571187576232276, "learning_rate": 4.338147456121935e-06, "loss": 0.6483, "step": 5284 }, { "epoch": 0.56, "grad_norm": 2.5644236337830937, "learning_rate": 4.336458253197637e-06, "loss": 0.5715, "step": 5285 }, { "epoch": 0.56, "grad_norm": 2.511149918383041, "learning_rate": 4.334769127361703e-06, "loss": 0.6012, "step": 5286 }, { "epoch": 0.56, "grad_norm": 4.883950372753124, "learning_rate": 4.333080078810369e-06, "loss": 0.6156, "step": 5287 }, { "epoch": 0.56, "grad_norm": 3.4555634315635984, "learning_rate": 4.331391107739864e-06, "loss": 0.6276, "step": 5288 }, { "epoch": 0.56, "grad_norm": 2.8118042237460923, "learning_rate": 4.329702214346406e-06, "loss": 0.6439, "step": 5289 }, { "epoch": 0.56, "grad_norm": 2.141667067861921, "learning_rate": 4.328013398826207e-06, "loss": 0.6292, "step": 5290 }, { "epoch": 0.56, "grad_norm": 2.1279882409294806, "learning_rate": 4.3263246613754685e-06, "loss": 0.6465, "step": 5291 }, { "epoch": 0.56, "grad_norm": 5.1797440641409, "learning_rate": 4.324636002190386e-06, "loss": 0.6019, "step": 5292 }, { "epoch": 0.56, "grad_norm": 2.5518905433521217, "learning_rate": 4.322947421467138e-06, "loss": 0.6755, "step": 5293 }, { "epoch": 0.56, "grad_norm": 4.707501918054458, "learning_rate": 4.321258919401903e-06, "loss": 0.7213, "step": 5294 }, { "epoch": 0.56, "grad_norm": 3.090219508336725, "learning_rate": 4.319570496190843e-06, "loss": 0.5684, "step": 5295 }, { "epoch": 0.56, "grad_norm": 2.5586775104390838, "learning_rate": 4.317882152030118e-06, "loss": 0.6409, "step": 5296 }, { "epoch": 0.56, "grad_norm": 2.7425290806450615, "learning_rate": 4.316193887115871e-06, "loss": 0.6583, "step": 5297 }, { "epoch": 0.56, "grad_norm": 2.929685634618751, "learning_rate": 4.314505701644242e-06, "loss": 0.6087, "step": 5298 }, { "epoch": 0.56, "grad_norm": 1.037591260391563, "learning_rate": 4.3128175958113585e-06, "loss": 0.5755, "step": 5299 }, { "epoch": 0.56, "grad_norm": 2.459322238903088, "learning_rate": 4.311129569813341e-06, "loss": 0.6523, "step": 5300 }, { "epoch": 0.56, "grad_norm": 3.3425180981358253, "learning_rate": 4.3094416238463e-06, "loss": 0.6665, "step": 5301 }, { "epoch": 0.56, "grad_norm": 2.2917138959022267, "learning_rate": 4.307753758106332e-06, "loss": 0.6679, "step": 5302 }, { "epoch": 0.56, "grad_norm": 1.9682044085217711, "learning_rate": 4.306065972789533e-06, "loss": 0.6181, "step": 5303 }, { "epoch": 0.56, "grad_norm": 2.339833756953796, "learning_rate": 4.304378268091982e-06, "loss": 0.6613, "step": 5304 }, { "epoch": 0.56, "grad_norm": 3.445630959329979, "learning_rate": 4.302690644209756e-06, "loss": 0.6045, "step": 5305 }, { "epoch": 0.56, "grad_norm": 2.5016790643491187, "learning_rate": 4.301003101338913e-06, "loss": 0.5814, "step": 5306 }, { "epoch": 0.56, "grad_norm": 5.6755679527300735, "learning_rate": 4.29931563967551e-06, "loss": 0.6186, "step": 5307 }, { "epoch": 0.56, "grad_norm": 2.4930015287099927, "learning_rate": 4.2976282594155885e-06, "loss": 0.6656, "step": 5308 }, { "epoch": 0.56, "grad_norm": 2.3871048273612114, "learning_rate": 4.2959409607551885e-06, "loss": 0.6458, "step": 5309 }, { "epoch": 0.56, "grad_norm": 2.6870418365333726, "learning_rate": 4.294253743890331e-06, "loss": 0.5847, "step": 5310 }, { "epoch": 0.56, "grad_norm": 2.453388094507715, "learning_rate": 4.292566609017032e-06, "loss": 0.5881, "step": 5311 }, { "epoch": 0.56, "grad_norm": 3.1874846967139456, "learning_rate": 4.290879556331301e-06, "loss": 0.5902, "step": 5312 }, { "epoch": 0.56, "grad_norm": 5.5444861113536374, "learning_rate": 4.2891925860291315e-06, "loss": 0.4916, "step": 5313 }, { "epoch": 0.56, "grad_norm": 2.2307192839807906, "learning_rate": 4.287505698306517e-06, "loss": 0.6353, "step": 5314 }, { "epoch": 0.56, "grad_norm": 2.316081716986372, "learning_rate": 4.285818893359427e-06, "loss": 0.6589, "step": 5315 }, { "epoch": 0.56, "grad_norm": 2.455941628341801, "learning_rate": 4.284132171383834e-06, "loss": 0.6596, "step": 5316 }, { "epoch": 0.56, "grad_norm": 3.220993513664449, "learning_rate": 4.2824455325756955e-06, "loss": 0.6346, "step": 5317 }, { "epoch": 0.56, "grad_norm": 2.19089056244659, "learning_rate": 4.2807589771309635e-06, "loss": 0.5772, "step": 5318 }, { "epoch": 0.56, "grad_norm": 2.731529565019871, "learning_rate": 4.2790725052455726e-06, "loss": 0.6435, "step": 5319 }, { "epoch": 0.56, "grad_norm": 3.5785634546615195, "learning_rate": 4.2773861171154525e-06, "loss": 0.7559, "step": 5320 }, { "epoch": 0.56, "grad_norm": 3.480519667349258, "learning_rate": 4.275699812936526e-06, "loss": 0.6401, "step": 5321 }, { "epoch": 0.56, "grad_norm": 2.4318321347360388, "learning_rate": 4.2740135929047034e-06, "loss": 0.6463, "step": 5322 }, { "epoch": 0.56, "grad_norm": 2.5507047618208354, "learning_rate": 4.2723274572158805e-06, "loss": 0.6721, "step": 5323 }, { "epoch": 0.56, "grad_norm": 2.4756784180683753, "learning_rate": 4.27064140606595e-06, "loss": 0.6507, "step": 5324 }, { "epoch": 0.56, "grad_norm": 2.5608707563373527, "learning_rate": 4.268955439650793e-06, "loss": 0.5792, "step": 5325 }, { "epoch": 0.56, "grad_norm": 2.551538680904513, "learning_rate": 4.267269558166279e-06, "loss": 0.6255, "step": 5326 }, { "epoch": 0.56, "grad_norm": 2.2584831084156374, "learning_rate": 4.265583761808272e-06, "loss": 0.7075, "step": 5327 }, { "epoch": 0.56, "grad_norm": 2.4024189459356524, "learning_rate": 4.26389805077262e-06, "loss": 0.6714, "step": 5328 }, { "epoch": 0.56, "grad_norm": 2.5974293268410182, "learning_rate": 4.262212425255164e-06, "loss": 0.6049, "step": 5329 }, { "epoch": 0.56, "grad_norm": 2.222020544696383, "learning_rate": 4.260526885451734e-06, "loss": 0.6188, "step": 5330 }, { "epoch": 0.56, "grad_norm": 2.637638372678464, "learning_rate": 4.258841431558156e-06, "loss": 0.6554, "step": 5331 }, { "epoch": 0.56, "grad_norm": 2.0803098388886165, "learning_rate": 4.257156063770237e-06, "loss": 0.6647, "step": 5332 }, { "epoch": 0.56, "grad_norm": 2.038548102890104, "learning_rate": 4.2554707822837775e-06, "loss": 0.576, "step": 5333 }, { "epoch": 0.56, "grad_norm": 2.4381809992111414, "learning_rate": 4.253785587294571e-06, "loss": 0.6267, "step": 5334 }, { "epoch": 0.56, "grad_norm": 2.9095370655137764, "learning_rate": 4.252100478998398e-06, "loss": 0.5394, "step": 5335 }, { "epoch": 0.56, "grad_norm": 2.208800982681307, "learning_rate": 4.250415457591031e-06, "loss": 0.5668, "step": 5336 }, { "epoch": 0.56, "grad_norm": 2.237278462901377, "learning_rate": 4.248730523268227e-06, "loss": 0.554, "step": 5337 }, { "epoch": 0.56, "grad_norm": 5.159594855549469, "learning_rate": 4.24704567622574e-06, "loss": 0.5775, "step": 5338 }, { "epoch": 0.56, "grad_norm": 2.0495262092053586, "learning_rate": 4.245360916659309e-06, "loss": 0.6269, "step": 5339 }, { "epoch": 0.56, "grad_norm": 2.7328601805196513, "learning_rate": 4.243676244764667e-06, "loss": 0.5666, "step": 5340 }, { "epoch": 0.56, "grad_norm": 2.599232977470765, "learning_rate": 4.241991660737532e-06, "loss": 0.5742, "step": 5341 }, { "epoch": 0.56, "grad_norm": 2.6689500818787217, "learning_rate": 4.240307164773615e-06, "loss": 0.6964, "step": 5342 }, { "epoch": 0.56, "grad_norm": 2.0051800373166295, "learning_rate": 4.238622757068614e-06, "loss": 0.6334, "step": 5343 }, { "epoch": 0.56, "grad_norm": 3.296191095173538, "learning_rate": 4.2369384378182216e-06, "loss": 0.5819, "step": 5344 }, { "epoch": 0.56, "grad_norm": 3.7526757822737973, "learning_rate": 4.2352542072181156e-06, "loss": 0.5592, "step": 5345 }, { "epoch": 0.56, "grad_norm": 4.769568113242851, "learning_rate": 4.233570065463964e-06, "loss": 0.6135, "step": 5346 }, { "epoch": 0.56, "grad_norm": 2.2793506199943447, "learning_rate": 4.231886012751427e-06, "loss": 0.5955, "step": 5347 }, { "epoch": 0.56, "grad_norm": 2.4668991593558562, "learning_rate": 4.230202049276152e-06, "loss": 0.6746, "step": 5348 }, { "epoch": 0.56, "grad_norm": 2.273428983253158, "learning_rate": 4.228518175233781e-06, "loss": 0.6367, "step": 5349 }, { "epoch": 0.56, "grad_norm": 3.012893755691541, "learning_rate": 4.226834390819935e-06, "loss": 0.6371, "step": 5350 }, { "epoch": 0.56, "grad_norm": 2.892193534980026, "learning_rate": 4.225150696230236e-06, "loss": 0.6217, "step": 5351 }, { "epoch": 0.56, "grad_norm": 2.4872260622117532, "learning_rate": 4.223467091660287e-06, "loss": 0.6131, "step": 5352 }, { "epoch": 0.56, "grad_norm": 2.493780184626296, "learning_rate": 4.22178357730569e-06, "loss": 0.6648, "step": 5353 }, { "epoch": 0.56, "grad_norm": 2.1706205845561226, "learning_rate": 4.220100153362026e-06, "loss": 0.6315, "step": 5354 }, { "epoch": 0.56, "grad_norm": 2.2135535393045607, "learning_rate": 4.2184168200248695e-06, "loss": 0.6127, "step": 5355 }, { "epoch": 0.56, "grad_norm": 3.6077079918211687, "learning_rate": 4.21673357748979e-06, "loss": 0.5827, "step": 5356 }, { "epoch": 0.56, "grad_norm": 2.2105823742172683, "learning_rate": 4.215050425952339e-06, "loss": 0.6933, "step": 5357 }, { "epoch": 0.56, "grad_norm": 2.2055870618699362, "learning_rate": 4.213367365608061e-06, "loss": 0.638, "step": 5358 }, { "epoch": 0.56, "grad_norm": 2.588655796581996, "learning_rate": 4.211684396652487e-06, "loss": 0.6038, "step": 5359 }, { "epoch": 0.56, "grad_norm": 2.0448283559826437, "learning_rate": 4.210001519281142e-06, "loss": 0.7124, "step": 5360 }, { "epoch": 0.56, "grad_norm": 2.062696584888601, "learning_rate": 4.208318733689535e-06, "loss": 0.626, "step": 5361 }, { "epoch": 0.56, "grad_norm": 2.604045649910468, "learning_rate": 4.206636040073172e-06, "loss": 0.6666, "step": 5362 }, { "epoch": 0.56, "grad_norm": 2.508672230990463, "learning_rate": 4.204953438627539e-06, "loss": 0.6892, "step": 5363 }, { "epoch": 0.56, "grad_norm": 4.199729614432598, "learning_rate": 4.203270929548117e-06, "loss": 0.6088, "step": 5364 }, { "epoch": 0.56, "grad_norm": 2.315338636083697, "learning_rate": 4.2015885130303745e-06, "loss": 0.5451, "step": 5365 }, { "epoch": 0.56, "grad_norm": 2.8563637400295274, "learning_rate": 4.199906189269773e-06, "loss": 0.6707, "step": 5366 }, { "epoch": 0.56, "grad_norm": 3.3092435696741203, "learning_rate": 4.198223958461756e-06, "loss": 0.5999, "step": 5367 }, { "epoch": 0.56, "grad_norm": 2.284714130272184, "learning_rate": 4.196541820801761e-06, "loss": 0.5714, "step": 5368 }, { "epoch": 0.56, "grad_norm": 2.208365754435486, "learning_rate": 4.194859776485216e-06, "loss": 0.6477, "step": 5369 }, { "epoch": 0.57, "grad_norm": 2.9161689800766633, "learning_rate": 4.193177825707535e-06, "loss": 0.605, "step": 5370 }, { "epoch": 0.57, "grad_norm": 2.5071516041978534, "learning_rate": 4.191495968664122e-06, "loss": 0.6405, "step": 5371 }, { "epoch": 0.57, "grad_norm": 2.374183355215098, "learning_rate": 4.189814205550369e-06, "loss": 0.6325, "step": 5372 }, { "epoch": 0.57, "grad_norm": 2.9716578098089905, "learning_rate": 4.18813253656166e-06, "loss": 0.5657, "step": 5373 }, { "epoch": 0.57, "grad_norm": 2.5849556565957026, "learning_rate": 4.186450961893366e-06, "loss": 0.5766, "step": 5374 }, { "epoch": 0.57, "grad_norm": 2.6139215621325977, "learning_rate": 4.184769481740848e-06, "loss": 0.5995, "step": 5375 }, { "epoch": 0.57, "grad_norm": 2.035112537909609, "learning_rate": 4.183088096299455e-06, "loss": 0.6741, "step": 5376 }, { "epoch": 0.57, "grad_norm": 4.3310970193321285, "learning_rate": 4.1814068057645255e-06, "loss": 0.6492, "step": 5377 }, { "epoch": 0.57, "grad_norm": 2.581325181469474, "learning_rate": 4.179725610331385e-06, "loss": 0.5451, "step": 5378 }, { "epoch": 0.57, "grad_norm": 4.498160977405595, "learning_rate": 4.178044510195356e-06, "loss": 0.657, "step": 5379 }, { "epoch": 0.57, "grad_norm": 2.631101141420637, "learning_rate": 4.176363505551737e-06, "loss": 0.7063, "step": 5380 }, { "epoch": 0.57, "grad_norm": 2.528098787527815, "learning_rate": 4.174682596595824e-06, "loss": 0.6972, "step": 5381 }, { "epoch": 0.57, "grad_norm": 2.3803147400781186, "learning_rate": 4.173001783522903e-06, "loss": 0.6797, "step": 5382 }, { "epoch": 0.57, "grad_norm": 2.483697902714156, "learning_rate": 4.171321066528243e-06, "loss": 0.659, "step": 5383 }, { "epoch": 0.57, "grad_norm": 3.1064216692361284, "learning_rate": 4.16964044580711e-06, "loss": 0.6183, "step": 5384 }, { "epoch": 0.57, "grad_norm": 2.750461435944443, "learning_rate": 4.167959921554745e-06, "loss": 0.6222, "step": 5385 }, { "epoch": 0.57, "grad_norm": 2.0857966955698677, "learning_rate": 4.166279493966393e-06, "loss": 0.6282, "step": 5386 }, { "epoch": 0.57, "grad_norm": 3.2748873110006333, "learning_rate": 4.164599163237277e-06, "loss": 0.627, "step": 5387 }, { "epoch": 0.57, "grad_norm": 2.7282550442214744, "learning_rate": 4.1629189295626195e-06, "loss": 0.6795, "step": 5388 }, { "epoch": 0.57, "grad_norm": 4.174622024247829, "learning_rate": 4.161238793137619e-06, "loss": 0.5899, "step": 5389 }, { "epoch": 0.57, "grad_norm": 2.068250197507869, "learning_rate": 4.159558754157469e-06, "loss": 0.5851, "step": 5390 }, { "epoch": 0.57, "grad_norm": 2.1242958834633563, "learning_rate": 4.157878812817356e-06, "loss": 0.6659, "step": 5391 }, { "epoch": 0.57, "grad_norm": 2.7032218490370274, "learning_rate": 4.1561989693124485e-06, "loss": 0.6705, "step": 5392 }, { "epoch": 0.57, "grad_norm": 2.753703066203034, "learning_rate": 4.154519223837904e-06, "loss": 0.6477, "step": 5393 }, { "epoch": 0.57, "grad_norm": 2.5189720424400934, "learning_rate": 4.1528395765888716e-06, "loss": 0.6326, "step": 5394 }, { "epoch": 0.57, "grad_norm": 2.1872662432303587, "learning_rate": 4.15116002776049e-06, "loss": 0.6172, "step": 5395 }, { "epoch": 0.57, "grad_norm": 2.8914596193035806, "learning_rate": 4.14948057754788e-06, "loss": 0.6174, "step": 5396 }, { "epoch": 0.57, "grad_norm": 4.0372536675287805, "learning_rate": 4.147801226146163e-06, "loss": 0.5389, "step": 5397 }, { "epoch": 0.57, "grad_norm": 3.7867058905780673, "learning_rate": 4.146121973750431e-06, "loss": 0.6527, "step": 5398 }, { "epoch": 0.57, "grad_norm": 2.3654720115387655, "learning_rate": 4.144442820555782e-06, "loss": 0.6418, "step": 5399 }, { "epoch": 0.57, "grad_norm": 2.179581313117845, "learning_rate": 4.142763766757292e-06, "loss": 0.635, "step": 5400 }, { "epoch": 0.57, "grad_norm": 2.2479561580681238, "learning_rate": 4.141084812550031e-06, "loss": 0.6461, "step": 5401 }, { "epoch": 0.57, "grad_norm": 2.841191478849272, "learning_rate": 4.139405958129053e-06, "loss": 0.6007, "step": 5402 }, { "epoch": 0.57, "grad_norm": 2.0859232520906836, "learning_rate": 4.137727203689402e-06, "loss": 0.5766, "step": 5403 }, { "epoch": 0.57, "grad_norm": 3.3485825067338117, "learning_rate": 4.136048549426112e-06, "loss": 0.6512, "step": 5404 }, { "epoch": 0.57, "grad_norm": 2.49496623222408, "learning_rate": 4.134369995534206e-06, "loss": 0.6146, "step": 5405 }, { "epoch": 0.57, "grad_norm": 2.324007129830475, "learning_rate": 4.132691542208691e-06, "loss": 0.6011, "step": 5406 }, { "epoch": 0.57, "grad_norm": 1.0793244375392315, "learning_rate": 4.1310131896445635e-06, "loss": 0.5288, "step": 5407 }, { "epoch": 0.57, "grad_norm": 3.2583878078923725, "learning_rate": 4.129334938036813e-06, "loss": 0.6705, "step": 5408 }, { "epoch": 0.57, "grad_norm": 2.307137719361831, "learning_rate": 4.127656787580412e-06, "loss": 0.6923, "step": 5409 }, { "epoch": 0.57, "grad_norm": 2.578677072913258, "learning_rate": 4.125978738470326e-06, "loss": 0.6375, "step": 5410 }, { "epoch": 0.57, "grad_norm": 2.4643009262796918, "learning_rate": 4.1243007909015016e-06, "loss": 0.5542, "step": 5411 }, { "epoch": 0.57, "grad_norm": 2.2146931372972523, "learning_rate": 4.1226229450688805e-06, "loss": 0.6535, "step": 5412 }, { "epoch": 0.57, "grad_norm": 2.119950795155613, "learning_rate": 4.120945201167388e-06, "loss": 0.6085, "step": 5413 }, { "epoch": 0.57, "grad_norm": 3.5062101551407485, "learning_rate": 4.119267559391944e-06, "loss": 0.6207, "step": 5414 }, { "epoch": 0.57, "grad_norm": 1.8128176772340931, "learning_rate": 4.117590019937447e-06, "loss": 0.5534, "step": 5415 }, { "epoch": 0.57, "grad_norm": 2.4544616665994945, "learning_rate": 4.11591258299879e-06, "loss": 0.664, "step": 5416 }, { "epoch": 0.57, "grad_norm": 3.5069164956287646, "learning_rate": 4.114235248770854e-06, "loss": 0.6393, "step": 5417 }, { "epoch": 0.57, "grad_norm": 2.7344010000318297, "learning_rate": 4.112558017448508e-06, "loss": 0.6276, "step": 5418 }, { "epoch": 0.57, "grad_norm": 3.5983760316820343, "learning_rate": 4.1108808892266045e-06, "loss": 0.6458, "step": 5419 }, { "epoch": 0.57, "grad_norm": 2.8082114352695724, "learning_rate": 4.109203864299989e-06, "loss": 0.5609, "step": 5420 }, { "epoch": 0.57, "grad_norm": 2.364880898558155, "learning_rate": 4.107526942863493e-06, "loss": 0.7066, "step": 5421 }, { "epoch": 0.57, "grad_norm": 1.9084674442912224, "learning_rate": 4.105850125111937e-06, "loss": 0.613, "step": 5422 }, { "epoch": 0.57, "grad_norm": 1.039149108932299, "learning_rate": 4.104173411240131e-06, "loss": 0.5501, "step": 5423 }, { "epoch": 0.57, "grad_norm": 2.760756741136062, "learning_rate": 4.102496801442868e-06, "loss": 0.7026, "step": 5424 }, { "epoch": 0.57, "grad_norm": 2.2973399753902295, "learning_rate": 4.10082029591493e-06, "loss": 0.643, "step": 5425 }, { "epoch": 0.57, "grad_norm": 2.5509810297461826, "learning_rate": 4.099143894851092e-06, "loss": 0.5958, "step": 5426 }, { "epoch": 0.57, "grad_norm": 1.0570994896608783, "learning_rate": 4.097467598446113e-06, "loss": 0.5708, "step": 5427 }, { "epoch": 0.57, "grad_norm": 2.0781495093655433, "learning_rate": 4.095791406894739e-06, "loss": 0.5697, "step": 5428 }, { "epoch": 0.57, "grad_norm": 2.46415369130286, "learning_rate": 4.094115320391704e-06, "loss": 0.6917, "step": 5429 }, { "epoch": 0.57, "grad_norm": 2.1679299121769535, "learning_rate": 4.0924393391317344e-06, "loss": 0.6069, "step": 5430 }, { "epoch": 0.57, "grad_norm": 1.0173189356350105, "learning_rate": 4.090763463309536e-06, "loss": 0.5557, "step": 5431 }, { "epoch": 0.57, "grad_norm": 2.570745562650635, "learning_rate": 4.089087693119815e-06, "loss": 0.6819, "step": 5432 }, { "epoch": 0.57, "grad_norm": 2.7474195051489185, "learning_rate": 4.087412028757249e-06, "loss": 0.621, "step": 5433 }, { "epoch": 0.57, "grad_norm": 1.9523833561555928, "learning_rate": 4.085736470416517e-06, "loss": 0.6772, "step": 5434 }, { "epoch": 0.57, "grad_norm": 2.4968268241742986, "learning_rate": 4.084061018292277e-06, "loss": 0.6174, "step": 5435 }, { "epoch": 0.57, "grad_norm": 3.53792252658528, "learning_rate": 4.082385672579182e-06, "loss": 0.637, "step": 5436 }, { "epoch": 0.57, "grad_norm": 2.964810455872569, "learning_rate": 4.0807104334718674e-06, "loss": 0.6321, "step": 5437 }, { "epoch": 0.57, "grad_norm": 2.2794226187051545, "learning_rate": 4.079035301164955e-06, "loss": 0.5753, "step": 5438 }, { "epoch": 0.57, "grad_norm": 2.488542771422994, "learning_rate": 4.0773602758530606e-06, "loss": 0.6145, "step": 5439 }, { "epoch": 0.57, "grad_norm": 2.149185123490166, "learning_rate": 4.0756853577307835e-06, "loss": 0.7298, "step": 5440 }, { "epoch": 0.57, "grad_norm": 19.446457701330363, "learning_rate": 4.0740105469927084e-06, "loss": 0.6299, "step": 5441 }, { "epoch": 0.57, "grad_norm": 2.921362940815699, "learning_rate": 4.07233584383341e-06, "loss": 0.5932, "step": 5442 }, { "epoch": 0.57, "grad_norm": 2.436933837657758, "learning_rate": 4.070661248447453e-06, "loss": 0.6107, "step": 5443 }, { "epoch": 0.57, "grad_norm": 2.4936425865614624, "learning_rate": 4.0689867610293845e-06, "loss": 0.6755, "step": 5444 }, { "epoch": 0.57, "grad_norm": 2.3563232675472374, "learning_rate": 4.067312381773744e-06, "loss": 0.5669, "step": 5445 }, { "epoch": 0.57, "grad_norm": 5.811216819647008, "learning_rate": 4.065638110875055e-06, "loss": 0.6642, "step": 5446 }, { "epoch": 0.57, "grad_norm": 2.9029408699267543, "learning_rate": 4.063963948527829e-06, "loss": 0.6854, "step": 5447 }, { "epoch": 0.57, "grad_norm": 2.3885007406079173, "learning_rate": 4.062289894926564e-06, "loss": 0.6045, "step": 5448 }, { "epoch": 0.57, "grad_norm": 2.5249267659429764, "learning_rate": 4.060615950265752e-06, "loss": 0.7814, "step": 5449 }, { "epoch": 0.57, "grad_norm": 2.5576000799315537, "learning_rate": 4.058942114739861e-06, "loss": 0.6392, "step": 5450 }, { "epoch": 0.57, "grad_norm": 2.340626814388993, "learning_rate": 4.057268388543354e-06, "loss": 0.6515, "step": 5451 }, { "epoch": 0.57, "grad_norm": 2.366975049960535, "learning_rate": 4.055594771870682e-06, "loss": 0.5461, "step": 5452 }, { "epoch": 0.57, "grad_norm": 3.5250602391277406, "learning_rate": 4.05392126491628e-06, "loss": 0.6166, "step": 5453 }, { "epoch": 0.57, "grad_norm": 2.7710266206375826, "learning_rate": 4.052247867874569e-06, "loss": 0.5996, "step": 5454 }, { "epoch": 0.57, "grad_norm": 2.5532961974178727, "learning_rate": 4.050574580939961e-06, "loss": 0.6165, "step": 5455 }, { "epoch": 0.57, "grad_norm": 4.2749550989913345, "learning_rate": 4.0489014043068545e-06, "loss": 0.6004, "step": 5456 }, { "epoch": 0.57, "grad_norm": 2.131553043318907, "learning_rate": 4.047228338169632e-06, "loss": 0.6498, "step": 5457 }, { "epoch": 0.57, "grad_norm": 2.574736501197967, "learning_rate": 4.04555538272267e-06, "loss": 0.5856, "step": 5458 }, { "epoch": 0.57, "grad_norm": 2.1788021789713112, "learning_rate": 4.0438825381603225e-06, "loss": 0.6355, "step": 5459 }, { "epoch": 0.57, "grad_norm": 3.4984416537675296, "learning_rate": 4.042209804676937e-06, "loss": 0.6518, "step": 5460 }, { "epoch": 0.57, "grad_norm": 2.5902715960904383, "learning_rate": 4.040537182466849e-06, "loss": 0.538, "step": 5461 }, { "epoch": 0.57, "grad_norm": 3.1931566795393427, "learning_rate": 4.038864671724379e-06, "loss": 0.6478, "step": 5462 }, { "epoch": 0.57, "grad_norm": 2.817321693238749, "learning_rate": 4.0371922726438314e-06, "loss": 0.6962, "step": 5463 }, { "epoch": 0.57, "grad_norm": 2.1447873012073355, "learning_rate": 4.035519985419502e-06, "loss": 0.5852, "step": 5464 }, { "epoch": 0.58, "grad_norm": 2.884660635376069, "learning_rate": 4.033847810245673e-06, "loss": 0.6715, "step": 5465 }, { "epoch": 0.58, "grad_norm": 3.4056554597314035, "learning_rate": 4.0321757473166145e-06, "loss": 0.6417, "step": 5466 }, { "epoch": 0.58, "grad_norm": 2.4034378137851213, "learning_rate": 4.030503796826578e-06, "loss": 0.6369, "step": 5467 }, { "epoch": 0.58, "grad_norm": 3.279509860886487, "learning_rate": 4.028831958969807e-06, "loss": 0.6686, "step": 5468 }, { "epoch": 0.58, "grad_norm": 2.6768593400635177, "learning_rate": 4.027160233940534e-06, "loss": 0.6936, "step": 5469 }, { "epoch": 0.58, "grad_norm": 2.696418739353531, "learning_rate": 4.02548862193297e-06, "loss": 0.5815, "step": 5470 }, { "epoch": 0.58, "grad_norm": 2.8470170762266642, "learning_rate": 4.023817123141324e-06, "loss": 0.6268, "step": 5471 }, { "epoch": 0.58, "grad_norm": 2.3205700797742703, "learning_rate": 4.022145737759781e-06, "loss": 0.7149, "step": 5472 }, { "epoch": 0.58, "grad_norm": 2.4124617955340772, "learning_rate": 4.020474465982519e-06, "loss": 0.5885, "step": 5473 }, { "epoch": 0.58, "grad_norm": 2.4297597298078606, "learning_rate": 4.0188033080037025e-06, "loss": 0.632, "step": 5474 }, { "epoch": 0.58, "grad_norm": 2.7294892840221427, "learning_rate": 4.017132264017483e-06, "loss": 0.6413, "step": 5475 }, { "epoch": 0.58, "grad_norm": 1.0863421441237322, "learning_rate": 4.015461334217995e-06, "loss": 0.5971, "step": 5476 }, { "epoch": 0.58, "grad_norm": 2.5430832765858735, "learning_rate": 4.013790518799361e-06, "loss": 0.6309, "step": 5477 }, { "epoch": 0.58, "grad_norm": 2.900637212310984, "learning_rate": 4.012119817955696e-06, "loss": 0.5844, "step": 5478 }, { "epoch": 0.58, "grad_norm": 2.1161956263445076, "learning_rate": 4.010449231881093e-06, "loss": 0.6315, "step": 5479 }, { "epoch": 0.58, "grad_norm": 2.31955950639972, "learning_rate": 4.00877876076964e-06, "loss": 0.64, "step": 5480 }, { "epoch": 0.58, "grad_norm": 2.1236471054278256, "learning_rate": 4.0071084048154044e-06, "loss": 0.6076, "step": 5481 }, { "epoch": 0.58, "grad_norm": 4.246468971263171, "learning_rate": 4.005438164212444e-06, "loss": 0.6235, "step": 5482 }, { "epoch": 0.58, "grad_norm": 2.0097201604995623, "learning_rate": 4.0037680391548015e-06, "loss": 0.5754, "step": 5483 }, { "epoch": 0.58, "grad_norm": 2.2344373050878206, "learning_rate": 4.002098029836511e-06, "loss": 0.6543, "step": 5484 }, { "epoch": 0.58, "grad_norm": 2.9506649394699247, "learning_rate": 4.000428136451585e-06, "loss": 0.5946, "step": 5485 }, { "epoch": 0.58, "grad_norm": 5.350295409726678, "learning_rate": 3.998758359194028e-06, "loss": 0.5822, "step": 5486 }, { "epoch": 0.58, "grad_norm": 2.4139769523797048, "learning_rate": 3.9970886982578314e-06, "loss": 0.6689, "step": 5487 }, { "epoch": 0.58, "grad_norm": 2.322280956259487, "learning_rate": 3.995419153836972e-06, "loss": 0.5373, "step": 5488 }, { "epoch": 0.58, "grad_norm": 2.334612127720499, "learning_rate": 3.9937497261254114e-06, "loss": 0.741, "step": 5489 }, { "epoch": 0.58, "grad_norm": 2.6565331932796057, "learning_rate": 3.992080415317096e-06, "loss": 0.6195, "step": 5490 }, { "epoch": 0.58, "grad_norm": 2.485017788078939, "learning_rate": 3.9904112216059656e-06, "loss": 0.5669, "step": 5491 }, { "epoch": 0.58, "grad_norm": 3.189719358830894, "learning_rate": 3.988742145185941e-06, "loss": 0.6253, "step": 5492 }, { "epoch": 0.58, "grad_norm": 2.8776383714391742, "learning_rate": 3.987073186250932e-06, "loss": 0.6488, "step": 5493 }, { "epoch": 0.58, "grad_norm": 3.0757770946653458, "learning_rate": 3.985404344994831e-06, "loss": 0.7339, "step": 5494 }, { "epoch": 0.58, "grad_norm": 2.9151134884480556, "learning_rate": 3.9837356216115205e-06, "loss": 0.6524, "step": 5495 }, { "epoch": 0.58, "grad_norm": 2.5361472839977455, "learning_rate": 3.982067016294868e-06, "loss": 0.6586, "step": 5496 }, { "epoch": 0.58, "grad_norm": 2.1819398494243964, "learning_rate": 3.980398529238728e-06, "loss": 0.6144, "step": 5497 }, { "epoch": 0.58, "grad_norm": 9.277473751575629, "learning_rate": 3.978730160636938e-06, "loss": 0.5899, "step": 5498 }, { "epoch": 0.58, "grad_norm": 4.865134875134504, "learning_rate": 3.977061910683325e-06, "loss": 0.6446, "step": 5499 }, { "epoch": 0.58, "grad_norm": 2.0322930621616004, "learning_rate": 3.975393779571704e-06, "loss": 0.6317, "step": 5500 }, { "epoch": 0.58, "grad_norm": 2.853800658322435, "learning_rate": 3.9737257674958714e-06, "loss": 0.5481, "step": 5501 }, { "epoch": 0.58, "grad_norm": 0.9879336943874001, "learning_rate": 3.972057874649613e-06, "loss": 0.5534, "step": 5502 }, { "epoch": 0.58, "grad_norm": 2.222678097797317, "learning_rate": 3.970390101226697e-06, "loss": 0.5955, "step": 5503 }, { "epoch": 0.58, "grad_norm": 2.269757412226817, "learning_rate": 3.968722447420884e-06, "loss": 0.6627, "step": 5504 }, { "epoch": 0.58, "grad_norm": 4.009770633477226, "learning_rate": 3.967054913425916e-06, "loss": 0.6221, "step": 5505 }, { "epoch": 0.58, "grad_norm": 2.8457569192736396, "learning_rate": 3.965387499435524e-06, "loss": 0.6292, "step": 5506 }, { "epoch": 0.58, "grad_norm": 3.254292979448844, "learning_rate": 3.963720205643419e-06, "loss": 0.6363, "step": 5507 }, { "epoch": 0.58, "grad_norm": 2.300083482731035, "learning_rate": 3.962053032243305e-06, "loss": 0.6737, "step": 5508 }, { "epoch": 0.58, "grad_norm": 3.126068404526892, "learning_rate": 3.96038597942887e-06, "loss": 0.689, "step": 5509 }, { "epoch": 0.58, "grad_norm": 2.441809460166265, "learning_rate": 3.958719047393789e-06, "loss": 0.6798, "step": 5510 }, { "epoch": 0.58, "grad_norm": 2.8015689631584326, "learning_rate": 3.9570522363317165e-06, "loss": 0.6766, "step": 5511 }, { "epoch": 0.58, "grad_norm": 1.8277968259678123, "learning_rate": 3.955385546436299e-06, "loss": 0.5986, "step": 5512 }, { "epoch": 0.58, "grad_norm": 2.335643392259737, "learning_rate": 3.9537189779011715e-06, "loss": 0.5452, "step": 5513 }, { "epoch": 0.58, "grad_norm": 2.3708189781396936, "learning_rate": 3.952052530919948e-06, "loss": 0.6386, "step": 5514 }, { "epoch": 0.58, "grad_norm": 2.838879644566167, "learning_rate": 3.9503862056862315e-06, "loss": 0.7274, "step": 5515 }, { "epoch": 0.58, "grad_norm": 2.4970962843979057, "learning_rate": 3.948720002393613e-06, "loss": 0.6018, "step": 5516 }, { "epoch": 0.58, "grad_norm": 2.1528209154994675, "learning_rate": 3.947053921235665e-06, "loss": 0.6035, "step": 5517 }, { "epoch": 0.58, "grad_norm": 3.223370620526591, "learning_rate": 3.945387962405946e-06, "loss": 0.6433, "step": 5518 }, { "epoch": 0.58, "grad_norm": 3.583378068602465, "learning_rate": 3.943722126098009e-06, "loss": 0.5599, "step": 5519 }, { "epoch": 0.58, "grad_norm": 2.934321798163941, "learning_rate": 3.94205641250538e-06, "loss": 0.7043, "step": 5520 }, { "epoch": 0.58, "grad_norm": 2.7272967348881823, "learning_rate": 3.940390821821579e-06, "loss": 0.6763, "step": 5521 }, { "epoch": 0.58, "grad_norm": 2.4185559104349137, "learning_rate": 3.93872535424011e-06, "loss": 0.6508, "step": 5522 }, { "epoch": 0.58, "grad_norm": 2.4863515025722562, "learning_rate": 3.937060009954462e-06, "loss": 0.6582, "step": 5523 }, { "epoch": 0.58, "grad_norm": 2.89940109019646, "learning_rate": 3.935394789158108e-06, "loss": 0.6821, "step": 5524 }, { "epoch": 0.58, "grad_norm": 2.850326326013693, "learning_rate": 3.93372969204451e-06, "loss": 0.5794, "step": 5525 }, { "epoch": 0.58, "grad_norm": 3.1372923087151254, "learning_rate": 3.932064718807114e-06, "loss": 0.6803, "step": 5526 }, { "epoch": 0.58, "grad_norm": 2.0327932638867807, "learning_rate": 3.930399869639353e-06, "loss": 0.6513, "step": 5527 }, { "epoch": 0.58, "grad_norm": 3.519770785227865, "learning_rate": 3.9287351447346424e-06, "loss": 0.6086, "step": 5528 }, { "epoch": 0.58, "grad_norm": 1.0115479234131517, "learning_rate": 3.927070544286385e-06, "loss": 0.5529, "step": 5529 }, { "epoch": 0.58, "grad_norm": 1.0358399237096076, "learning_rate": 3.925406068487972e-06, "loss": 0.5233, "step": 5530 }, { "epoch": 0.58, "grad_norm": 2.7457465246262487, "learning_rate": 3.923741717532774e-06, "loss": 0.6148, "step": 5531 }, { "epoch": 0.58, "grad_norm": 2.7191030649642522, "learning_rate": 3.922077491614155e-06, "loss": 0.653, "step": 5532 }, { "epoch": 0.58, "grad_norm": 2.914553835743117, "learning_rate": 3.920413390925454e-06, "loss": 0.6825, "step": 5533 }, { "epoch": 0.58, "grad_norm": 2.2209209916064294, "learning_rate": 3.918749415660005e-06, "loss": 0.6485, "step": 5534 }, { "epoch": 0.58, "grad_norm": 2.6768274712210633, "learning_rate": 3.917085566011124e-06, "loss": 0.6383, "step": 5535 }, { "epoch": 0.58, "grad_norm": 2.07522859990918, "learning_rate": 3.915421842172113e-06, "loss": 0.6638, "step": 5536 }, { "epoch": 0.58, "grad_norm": 2.4434649901286885, "learning_rate": 3.913758244336255e-06, "loss": 0.6297, "step": 5537 }, { "epoch": 0.58, "grad_norm": 3.82081715284726, "learning_rate": 3.912094772696825e-06, "loss": 0.6508, "step": 5538 }, { "epoch": 0.58, "grad_norm": 2.2583424490630546, "learning_rate": 3.910431427447079e-06, "loss": 0.5839, "step": 5539 }, { "epoch": 0.58, "grad_norm": 4.663703287673878, "learning_rate": 3.908768208780259e-06, "loss": 0.5589, "step": 5540 }, { "epoch": 0.58, "grad_norm": 2.274839976109476, "learning_rate": 3.907105116889597e-06, "loss": 0.6395, "step": 5541 }, { "epoch": 0.58, "grad_norm": 2.4342153540549925, "learning_rate": 3.905442151968302e-06, "loss": 0.6024, "step": 5542 }, { "epoch": 0.58, "grad_norm": 3.4254973540309996, "learning_rate": 3.903779314209573e-06, "loss": 0.612, "step": 5543 }, { "epoch": 0.58, "grad_norm": 2.9209988262515627, "learning_rate": 3.902116603806594e-06, "loss": 0.635, "step": 5544 }, { "epoch": 0.58, "grad_norm": 2.5257899785531785, "learning_rate": 3.900454020952537e-06, "loss": 0.6775, "step": 5545 }, { "epoch": 0.58, "grad_norm": 2.231536697694151, "learning_rate": 3.898791565840552e-06, "loss": 0.6288, "step": 5546 }, { "epoch": 0.58, "grad_norm": 2.7797880961916452, "learning_rate": 3.897129238663777e-06, "loss": 0.5785, "step": 5547 }, { "epoch": 0.58, "grad_norm": 2.1833271590239662, "learning_rate": 3.895467039615342e-06, "loss": 0.6688, "step": 5548 }, { "epoch": 0.58, "grad_norm": 3.5023552603785757, "learning_rate": 3.893804968888354e-06, "loss": 0.5951, "step": 5549 }, { "epoch": 0.58, "grad_norm": 2.1779919766332645, "learning_rate": 3.892143026675905e-06, "loss": 0.6629, "step": 5550 }, { "epoch": 0.58, "grad_norm": 2.937739628950341, "learning_rate": 3.8904812131710776e-06, "loss": 0.6831, "step": 5551 }, { "epoch": 0.58, "grad_norm": 2.7094582537792893, "learning_rate": 3.888819528566935e-06, "loss": 0.6519, "step": 5552 }, { "epoch": 0.58, "grad_norm": 2.507846503527637, "learning_rate": 3.8871579730565265e-06, "loss": 0.6874, "step": 5553 }, { "epoch": 0.58, "grad_norm": 2.3128367049716334, "learning_rate": 3.885496546832891e-06, "loss": 0.6468, "step": 5554 }, { "epoch": 0.58, "grad_norm": 2.18788284410742, "learning_rate": 3.883835250089043e-06, "loss": 0.608, "step": 5555 }, { "epoch": 0.58, "grad_norm": 3.0367412519509323, "learning_rate": 3.8821740830179876e-06, "loss": 0.6377, "step": 5556 }, { "epoch": 0.58, "grad_norm": 2.587808272946125, "learning_rate": 3.880513045812718e-06, "loss": 0.6709, "step": 5557 }, { "epoch": 0.58, "grad_norm": 2.8752893486354014, "learning_rate": 3.8788521386662076e-06, "loss": 0.6109, "step": 5558 }, { "epoch": 0.58, "grad_norm": 5.853722055103752, "learning_rate": 3.8771913617714135e-06, "loss": 0.5964, "step": 5559 }, { "epoch": 0.59, "grad_norm": 1.0139273382638472, "learning_rate": 3.87553071532128e-06, "loss": 0.57, "step": 5560 }, { "epoch": 0.59, "grad_norm": 9.753533947661753, "learning_rate": 3.873870199508739e-06, "loss": 0.6884, "step": 5561 }, { "epoch": 0.59, "grad_norm": 2.59431286674271, "learning_rate": 3.872209814526703e-06, "loss": 0.6489, "step": 5562 }, { "epoch": 0.59, "grad_norm": 2.118810061041824, "learning_rate": 3.87054956056807e-06, "loss": 0.6611, "step": 5563 }, { "epoch": 0.59, "grad_norm": 3.2188855308440183, "learning_rate": 3.868889437825724e-06, "loss": 0.6555, "step": 5564 }, { "epoch": 0.59, "grad_norm": 2.4614632914433274, "learning_rate": 3.867229446492533e-06, "loss": 0.6215, "step": 5565 }, { "epoch": 0.59, "grad_norm": 2.6768898443750375, "learning_rate": 3.865569586761352e-06, "loss": 0.6264, "step": 5566 }, { "epoch": 0.59, "grad_norm": 2.4319918129394216, "learning_rate": 3.863909858825016e-06, "loss": 0.6583, "step": 5567 }, { "epoch": 0.59, "grad_norm": 3.70131547101183, "learning_rate": 3.86225026287635e-06, "loss": 0.5622, "step": 5568 }, { "epoch": 0.59, "grad_norm": 2.5863671241991466, "learning_rate": 3.860590799108157e-06, "loss": 0.6492, "step": 5569 }, { "epoch": 0.59, "grad_norm": 2.603038839665001, "learning_rate": 3.858931467713233e-06, "loss": 0.7282, "step": 5570 }, { "epoch": 0.59, "grad_norm": 3.2162140609274386, "learning_rate": 3.857272268884353e-06, "loss": 0.5941, "step": 5571 }, { "epoch": 0.59, "grad_norm": 2.2617877144746505, "learning_rate": 3.855613202814277e-06, "loss": 0.6509, "step": 5572 }, { "epoch": 0.59, "grad_norm": 2.560892583789322, "learning_rate": 3.853954269695749e-06, "loss": 0.6604, "step": 5573 }, { "epoch": 0.59, "grad_norm": 2.487420247946007, "learning_rate": 3.8522954697215034e-06, "loss": 0.6302, "step": 5574 }, { "epoch": 0.59, "grad_norm": 2.821258946431581, "learning_rate": 3.8506368030842525e-06, "loss": 0.6311, "step": 5575 }, { "epoch": 0.59, "grad_norm": 2.3313444270659462, "learning_rate": 3.848978269976694e-06, "loss": 0.5945, "step": 5576 }, { "epoch": 0.59, "grad_norm": 2.469073388544526, "learning_rate": 3.8473198705915135e-06, "loss": 0.7422, "step": 5577 }, { "epoch": 0.59, "grad_norm": 5.193626538455637, "learning_rate": 3.845661605121377e-06, "loss": 0.6453, "step": 5578 }, { "epoch": 0.59, "grad_norm": 2.5193329581868897, "learning_rate": 3.844003473758941e-06, "loss": 0.6308, "step": 5579 }, { "epoch": 0.59, "grad_norm": 2.157883246431803, "learning_rate": 3.8423454766968394e-06, "loss": 0.6685, "step": 5580 }, { "epoch": 0.59, "grad_norm": 2.718500163349186, "learning_rate": 3.8406876141276924e-06, "loss": 0.5817, "step": 5581 }, { "epoch": 0.59, "grad_norm": 2.3243517458977667, "learning_rate": 3.8390298862441075e-06, "loss": 0.7218, "step": 5582 }, { "epoch": 0.59, "grad_norm": 2.612911018329449, "learning_rate": 3.8373722932386745e-06, "loss": 0.6655, "step": 5583 }, { "epoch": 0.59, "grad_norm": 3.2278773536072167, "learning_rate": 3.835714835303969e-06, "loss": 0.6834, "step": 5584 }, { "epoch": 0.59, "grad_norm": 2.6365356341700603, "learning_rate": 3.834057512632546e-06, "loss": 0.6425, "step": 5585 }, { "epoch": 0.59, "grad_norm": 2.4664103802063475, "learning_rate": 3.832400325416952e-06, "loss": 0.5572, "step": 5586 }, { "epoch": 0.59, "grad_norm": 2.0773321625767425, "learning_rate": 3.830743273849713e-06, "loss": 0.6554, "step": 5587 }, { "epoch": 0.59, "grad_norm": 4.630335903459665, "learning_rate": 3.829086358123339e-06, "loss": 0.5597, "step": 5588 }, { "epoch": 0.59, "grad_norm": 2.038054933842016, "learning_rate": 3.82742957843033e-06, "loss": 0.5566, "step": 5589 }, { "epoch": 0.59, "grad_norm": 7.81111225996943, "learning_rate": 3.825772934963161e-06, "loss": 0.7147, "step": 5590 }, { "epoch": 0.59, "grad_norm": 2.130623296542242, "learning_rate": 3.824116427914298e-06, "loss": 0.5917, "step": 5591 }, { "epoch": 0.59, "grad_norm": 2.22553668110589, "learning_rate": 3.82246005747619e-06, "loss": 0.5317, "step": 5592 }, { "epoch": 0.59, "grad_norm": 2.470012398893969, "learning_rate": 3.82080382384127e-06, "loss": 0.5783, "step": 5593 }, { "epoch": 0.59, "grad_norm": 2.112233161530673, "learning_rate": 3.819147727201951e-06, "loss": 0.625, "step": 5594 }, { "epoch": 0.59, "grad_norm": 3.4003860684934244, "learning_rate": 3.817491767750635e-06, "loss": 0.656, "step": 5595 }, { "epoch": 0.59, "grad_norm": 2.6648181797893433, "learning_rate": 3.815835945679709e-06, "loss": 0.6308, "step": 5596 }, { "epoch": 0.59, "grad_norm": 4.488479873290116, "learning_rate": 3.81418026118154e-06, "loss": 0.5743, "step": 5597 }, { "epoch": 0.59, "grad_norm": 2.520362514666106, "learning_rate": 3.8125247144484777e-06, "loss": 0.5858, "step": 5598 }, { "epoch": 0.59, "grad_norm": 2.938146052846746, "learning_rate": 3.8108693056728636e-06, "loss": 0.6713, "step": 5599 }, { "epoch": 0.59, "grad_norm": 3.143207176889715, "learning_rate": 3.809214035047016e-06, "loss": 0.6833, "step": 5600 }, { "epoch": 0.59, "grad_norm": 2.205971327946951, "learning_rate": 3.8075589027632376e-06, "loss": 0.6633, "step": 5601 }, { "epoch": 0.59, "grad_norm": 2.2680778570668756, "learning_rate": 3.805903909013822e-06, "loss": 0.5748, "step": 5602 }, { "epoch": 0.59, "grad_norm": 2.501171349333083, "learning_rate": 3.804249053991037e-06, "loss": 0.687, "step": 5603 }, { "epoch": 0.59, "grad_norm": 4.102343060157223, "learning_rate": 3.8025943378871394e-06, "loss": 0.6342, "step": 5604 }, { "epoch": 0.59, "grad_norm": 2.894149017249174, "learning_rate": 3.800939760894371e-06, "loss": 0.5931, "step": 5605 }, { "epoch": 0.59, "grad_norm": 2.1805773899796685, "learning_rate": 3.7992853232049566e-06, "loss": 0.5769, "step": 5606 }, { "epoch": 0.59, "grad_norm": 2.4362136260737426, "learning_rate": 3.7976310250111013e-06, "loss": 0.6543, "step": 5607 }, { "epoch": 0.59, "grad_norm": 2.9225984516059653, "learning_rate": 3.7959768665049967e-06, "loss": 0.6982, "step": 5608 }, { "epoch": 0.59, "grad_norm": 2.86310538278994, "learning_rate": 3.7943228478788198e-06, "loss": 0.5438, "step": 5609 }, { "epoch": 0.59, "grad_norm": 2.2140763934345613, "learning_rate": 3.792668969324731e-06, "loss": 0.5698, "step": 5610 }, { "epoch": 0.59, "grad_norm": 2.5016813794002126, "learning_rate": 3.7910152310348686e-06, "loss": 0.7066, "step": 5611 }, { "epoch": 0.59, "grad_norm": 2.1455037865154742, "learning_rate": 3.789361633201363e-06, "loss": 0.6227, "step": 5612 }, { "epoch": 0.59, "grad_norm": 2.0200967442200817, "learning_rate": 3.7877081760163225e-06, "loss": 0.6153, "step": 5613 }, { "epoch": 0.59, "grad_norm": 2.386311762553055, "learning_rate": 3.7860548596718427e-06, "loss": 0.6632, "step": 5614 }, { "epoch": 0.59, "grad_norm": 2.264330176961888, "learning_rate": 3.784401684360001e-06, "loss": 0.6568, "step": 5615 }, { "epoch": 0.59, "grad_norm": 2.761814617242924, "learning_rate": 3.782748650272857e-06, "loss": 0.6565, "step": 5616 }, { "epoch": 0.59, "grad_norm": 2.076122600652747, "learning_rate": 3.781095757602455e-06, "loss": 0.6303, "step": 5617 }, { "epoch": 0.59, "grad_norm": 2.3150873597883947, "learning_rate": 3.779443006540825e-06, "loss": 0.6462, "step": 5618 }, { "epoch": 0.59, "grad_norm": 2.940963699254998, "learning_rate": 3.7777903972799794e-06, "loss": 0.564, "step": 5619 }, { "epoch": 0.59, "grad_norm": 2.4027103997644392, "learning_rate": 3.7761379300119104e-06, "loss": 0.61, "step": 5620 }, { "epoch": 0.59, "grad_norm": 2.2952639954909526, "learning_rate": 3.7744856049286e-06, "loss": 0.5822, "step": 5621 }, { "epoch": 0.59, "grad_norm": 3.0792638125829637, "learning_rate": 3.77283342222201e-06, "loss": 0.6502, "step": 5622 }, { "epoch": 0.59, "grad_norm": 0.9784622364316484, "learning_rate": 3.7711813820840854e-06, "loss": 0.61, "step": 5623 }, { "epoch": 0.59, "grad_norm": 3.0398501089056658, "learning_rate": 3.7695294847067544e-06, "loss": 0.641, "step": 5624 }, { "epoch": 0.59, "grad_norm": 3.801664578402942, "learning_rate": 3.7678777302819314e-06, "loss": 0.5783, "step": 5625 }, { "epoch": 0.59, "grad_norm": 2.4911044008283074, "learning_rate": 3.7662261190015116e-06, "loss": 0.7241, "step": 5626 }, { "epoch": 0.59, "grad_norm": 2.3853448216389594, "learning_rate": 3.7645746510573754e-06, "loss": 0.6626, "step": 5627 }, { "epoch": 0.59, "grad_norm": 2.131535326971549, "learning_rate": 3.7629233266413866e-06, "loss": 0.64, "step": 5628 }, { "epoch": 0.59, "grad_norm": 1.9389020030909176, "learning_rate": 3.7612721459453883e-06, "loss": 0.5731, "step": 5629 }, { "epoch": 0.59, "grad_norm": 2.7087185491547947, "learning_rate": 3.75962110916121e-06, "loss": 0.582, "step": 5630 }, { "epoch": 0.59, "grad_norm": 2.475217811511566, "learning_rate": 3.757970216480667e-06, "loss": 0.6372, "step": 5631 }, { "epoch": 0.59, "grad_norm": 2.4037718481514765, "learning_rate": 3.756319468095555e-06, "loss": 0.6331, "step": 5632 }, { "epoch": 0.59, "grad_norm": 3.020609700875131, "learning_rate": 3.7546688641976496e-06, "loss": 0.6931, "step": 5633 }, { "epoch": 0.59, "grad_norm": 4.592025998510074, "learning_rate": 3.753018404978717e-06, "loss": 0.5209, "step": 5634 }, { "epoch": 0.59, "grad_norm": 2.501978181917583, "learning_rate": 3.7513680906305015e-06, "loss": 0.5572, "step": 5635 }, { "epoch": 0.59, "grad_norm": 2.965457135634429, "learning_rate": 3.7497179213447305e-06, "loss": 0.6254, "step": 5636 }, { "epoch": 0.59, "grad_norm": 2.294791076548485, "learning_rate": 3.7480678973131198e-06, "loss": 0.6553, "step": 5637 }, { "epoch": 0.59, "grad_norm": 2.4361429134308956, "learning_rate": 3.74641801872736e-06, "loss": 0.645, "step": 5638 }, { "epoch": 0.59, "grad_norm": 2.229172173470131, "learning_rate": 3.7447682857791307e-06, "loss": 0.6655, "step": 5639 }, { "epoch": 0.59, "grad_norm": 2.5927694332695186, "learning_rate": 3.743118698660094e-06, "loss": 0.5953, "step": 5640 }, { "epoch": 0.59, "grad_norm": 2.419854503906456, "learning_rate": 3.741469257561895e-06, "loss": 0.6415, "step": 5641 }, { "epoch": 0.59, "grad_norm": 2.454076430219235, "learning_rate": 3.739819962676159e-06, "loss": 0.677, "step": 5642 }, { "epoch": 0.59, "grad_norm": 2.333031240171135, "learning_rate": 3.738170814194495e-06, "loss": 0.5559, "step": 5643 }, { "epoch": 0.59, "grad_norm": 2.427282486291471, "learning_rate": 3.7365218123084996e-06, "loss": 0.6336, "step": 5644 }, { "epoch": 0.59, "grad_norm": 2.692927524150578, "learning_rate": 3.7348729572097487e-06, "loss": 0.5909, "step": 5645 }, { "epoch": 0.59, "grad_norm": 2.406995986734407, "learning_rate": 3.7332242490897985e-06, "loss": 0.6221, "step": 5646 }, { "epoch": 0.59, "grad_norm": 2.735312603120759, "learning_rate": 3.7315756881401944e-06, "loss": 0.6724, "step": 5647 }, { "epoch": 0.59, "grad_norm": 3.4788360617314944, "learning_rate": 3.7299272745524583e-06, "loss": 0.634, "step": 5648 }, { "epoch": 0.59, "grad_norm": 2.280769795287397, "learning_rate": 3.728279008518102e-06, "loss": 0.5864, "step": 5649 }, { "epoch": 0.59, "grad_norm": 2.7064070384851253, "learning_rate": 3.726630890228615e-06, "loss": 0.6402, "step": 5650 }, { "epoch": 0.59, "grad_norm": 3.801131241287733, "learning_rate": 3.7249829198754694e-06, "loss": 0.6281, "step": 5651 }, { "epoch": 0.59, "grad_norm": 2.172639276357326, "learning_rate": 3.7233350976501217e-06, "loss": 0.6297, "step": 5652 }, { "epoch": 0.59, "grad_norm": 1.9375269933216765, "learning_rate": 3.7216874237440127e-06, "loss": 0.5696, "step": 5653 }, { "epoch": 0.59, "grad_norm": 2.3229567583419217, "learning_rate": 3.7200398983485643e-06, "loss": 0.6104, "step": 5654 }, { "epoch": 0.6, "grad_norm": 2.523904866991665, "learning_rate": 3.7183925216551784e-06, "loss": 0.6879, "step": 5655 }, { "epoch": 0.6, "grad_norm": 2.1913564898938622, "learning_rate": 3.716745293855246e-06, "loss": 0.5874, "step": 5656 }, { "epoch": 0.6, "grad_norm": 2.2827810373182724, "learning_rate": 3.715098215140136e-06, "loss": 0.6078, "step": 5657 }, { "epoch": 0.6, "grad_norm": 5.128545296474594, "learning_rate": 3.7134512857012017e-06, "loss": 0.6683, "step": 5658 }, { "epoch": 0.6, "grad_norm": 2.9857064043435133, "learning_rate": 3.711804505729776e-06, "loss": 0.5813, "step": 5659 }, { "epoch": 0.6, "grad_norm": 2.632396638894393, "learning_rate": 3.7101578754171797e-06, "loss": 0.6517, "step": 5660 }, { "epoch": 0.6, "grad_norm": 2.2170898683792815, "learning_rate": 3.7085113949547126e-06, "loss": 0.6747, "step": 5661 }, { "epoch": 0.6, "grad_norm": 2.1974382543005357, "learning_rate": 3.706865064533659e-06, "loss": 0.6875, "step": 5662 }, { "epoch": 0.6, "grad_norm": 3.3819380364992115, "learning_rate": 3.7052188843452854e-06, "loss": 0.622, "step": 5663 }, { "epoch": 0.6, "grad_norm": 2.682752126582236, "learning_rate": 3.7035728545808367e-06, "loss": 0.6522, "step": 5664 }, { "epoch": 0.6, "grad_norm": 2.042183074281712, "learning_rate": 3.701926975431547e-06, "loss": 0.6193, "step": 5665 }, { "epoch": 0.6, "grad_norm": 1.0167480891393266, "learning_rate": 3.700281247088629e-06, "loss": 0.5478, "step": 5666 }, { "epoch": 0.6, "grad_norm": 3.190095939887631, "learning_rate": 3.6986356697432796e-06, "loss": 0.6033, "step": 5667 }, { "epoch": 0.6, "grad_norm": 2.3881960936207682, "learning_rate": 3.6969902435866743e-06, "loss": 0.5799, "step": 5668 }, { "epoch": 0.6, "grad_norm": 2.4289590644832786, "learning_rate": 3.6953449688099774e-06, "loss": 0.7183, "step": 5669 }, { "epoch": 0.6, "grad_norm": 2.15648426463186, "learning_rate": 3.69369984560433e-06, "loss": 0.6219, "step": 5670 }, { "epoch": 0.6, "grad_norm": 2.679139642427304, "learning_rate": 3.69205487416086e-06, "loss": 0.6284, "step": 5671 }, { "epoch": 0.6, "grad_norm": 3.199487911029984, "learning_rate": 3.690410054670671e-06, "loss": 0.6225, "step": 5672 }, { "epoch": 0.6, "grad_norm": 2.927509209876508, "learning_rate": 3.6887653873248575e-06, "loss": 0.6316, "step": 5673 }, { "epoch": 0.6, "grad_norm": 2.9002460059852595, "learning_rate": 3.6871208723144903e-06, "loss": 0.6826, "step": 5674 }, { "epoch": 0.6, "grad_norm": 2.2930525319126507, "learning_rate": 3.6854765098306254e-06, "loss": 0.5643, "step": 5675 }, { "epoch": 0.6, "grad_norm": 2.809686719399776, "learning_rate": 3.6838323000643013e-06, "loss": 0.6805, "step": 5676 }, { "epoch": 0.6, "grad_norm": 2.6784212261701166, "learning_rate": 3.682188243206535e-06, "loss": 0.6072, "step": 5677 }, { "epoch": 0.6, "grad_norm": 1.1335859555703096, "learning_rate": 3.6805443394483275e-06, "loss": 0.5365, "step": 5678 }, { "epoch": 0.6, "grad_norm": 2.477277761969307, "learning_rate": 3.6789005889806656e-06, "loss": 0.7198, "step": 5679 }, { "epoch": 0.6, "grad_norm": 2.1807624837158626, "learning_rate": 3.6772569919945157e-06, "loss": 0.6629, "step": 5680 }, { "epoch": 0.6, "grad_norm": 2.278161138807236, "learning_rate": 3.6756135486808227e-06, "loss": 0.5702, "step": 5681 }, { "epoch": 0.6, "grad_norm": 2.433937775862514, "learning_rate": 3.6739702592305205e-06, "loss": 0.6464, "step": 5682 }, { "epoch": 0.6, "grad_norm": 3.0575854854067104, "learning_rate": 3.6723271238345187e-06, "loss": 0.674, "step": 5683 }, { "epoch": 0.6, "grad_norm": 3.607667973961037, "learning_rate": 3.6706841426837145e-06, "loss": 0.6982, "step": 5684 }, { "epoch": 0.6, "grad_norm": 2.3855563143332543, "learning_rate": 3.669041315968986e-06, "loss": 0.6425, "step": 5685 }, { "epoch": 0.6, "grad_norm": 2.551518686262234, "learning_rate": 3.667398643881189e-06, "loss": 0.6379, "step": 5686 }, { "epoch": 0.6, "grad_norm": 2.2344906686707855, "learning_rate": 3.665756126611164e-06, "loss": 0.6542, "step": 5687 }, { "epoch": 0.6, "grad_norm": 2.892933147579963, "learning_rate": 3.664113764349736e-06, "loss": 0.6753, "step": 5688 }, { "epoch": 0.6, "grad_norm": 2.899716923671387, "learning_rate": 3.6624715572877106e-06, "loss": 0.5615, "step": 5689 }, { "epoch": 0.6, "grad_norm": 2.52564711595407, "learning_rate": 3.6608295056158717e-06, "loss": 0.6426, "step": 5690 }, { "epoch": 0.6, "grad_norm": 3.2313531578252412, "learning_rate": 3.65918760952499e-06, "loss": 0.5662, "step": 5691 }, { "epoch": 0.6, "grad_norm": 2.5379514454376904, "learning_rate": 3.657545869205816e-06, "loss": 0.5762, "step": 5692 }, { "epoch": 0.6, "grad_norm": 2.9538894091444807, "learning_rate": 3.6559042848490835e-06, "loss": 0.5906, "step": 5693 }, { "epoch": 0.6, "grad_norm": 2.6382651155593604, "learning_rate": 3.654262856645503e-06, "loss": 0.6264, "step": 5694 }, { "epoch": 0.6, "grad_norm": 3.482444707821686, "learning_rate": 3.652621584785776e-06, "loss": 0.6596, "step": 5695 }, { "epoch": 0.6, "grad_norm": 2.2566669898698146, "learning_rate": 3.6509804694605768e-06, "loss": 0.7118, "step": 5696 }, { "epoch": 0.6, "grad_norm": 3.5669855228140213, "learning_rate": 3.649339510860568e-06, "loss": 0.5655, "step": 5697 }, { "epoch": 0.6, "grad_norm": 2.3661369567618866, "learning_rate": 3.647698709176391e-06, "loss": 0.6338, "step": 5698 }, { "epoch": 0.6, "grad_norm": 2.3484815033901807, "learning_rate": 3.6460580645986685e-06, "loss": 0.6255, "step": 5699 }, { "epoch": 0.6, "grad_norm": 2.53755595996656, "learning_rate": 3.6444175773180045e-06, "loss": 0.5636, "step": 5700 }, { "epoch": 0.6, "grad_norm": 2.5222433345809763, "learning_rate": 3.6427772475249896e-06, "loss": 0.6518, "step": 5701 }, { "epoch": 0.6, "grad_norm": 2.586007819868345, "learning_rate": 3.6411370754101915e-06, "loss": 0.6029, "step": 5702 }, { "epoch": 0.6, "grad_norm": 5.428940429081987, "learning_rate": 3.639497061164158e-06, "loss": 0.5528, "step": 5703 }, { "epoch": 0.6, "grad_norm": 2.60801073186904, "learning_rate": 3.637857204977424e-06, "loss": 0.5935, "step": 5704 }, { "epoch": 0.6, "grad_norm": 3.8916382563951037, "learning_rate": 3.636217507040502e-06, "loss": 0.6435, "step": 5705 }, { "epoch": 0.6, "grad_norm": 0.9863942243849351, "learning_rate": 3.6345779675438897e-06, "loss": 0.5717, "step": 5706 }, { "epoch": 0.6, "grad_norm": 7.47039659299787, "learning_rate": 3.6329385866780587e-06, "loss": 0.5755, "step": 5707 }, { "epoch": 0.6, "grad_norm": 2.7898417007114333, "learning_rate": 3.6312993646334727e-06, "loss": 0.5874, "step": 5708 }, { "epoch": 0.6, "grad_norm": 2.273917598473412, "learning_rate": 3.6296603016005693e-06, "loss": 0.6866, "step": 5709 }, { "epoch": 0.6, "grad_norm": 2.6107303675720432, "learning_rate": 3.6280213977697715e-06, "loss": 0.6771, "step": 5710 }, { "epoch": 0.6, "grad_norm": 2.2956736561042357, "learning_rate": 3.6263826533314827e-06, "loss": 0.6167, "step": 5711 }, { "epoch": 0.6, "grad_norm": 2.384801633397614, "learning_rate": 3.624744068476086e-06, "loss": 0.6664, "step": 5712 }, { "epoch": 0.6, "grad_norm": 2.0974504782665337, "learning_rate": 3.623105643393946e-06, "loss": 0.6318, "step": 5713 }, { "epoch": 0.6, "grad_norm": 3.3889022639258015, "learning_rate": 3.621467378275414e-06, "loss": 0.6735, "step": 5714 }, { "epoch": 0.6, "grad_norm": 4.18791054241475, "learning_rate": 3.6198292733108177e-06, "loss": 0.6781, "step": 5715 }, { "epoch": 0.6, "grad_norm": 2.7589589062878197, "learning_rate": 3.6181913286904647e-06, "loss": 0.5676, "step": 5716 }, { "epoch": 0.6, "grad_norm": 1.9737675399727652, "learning_rate": 3.6165535446046497e-06, "loss": 0.6197, "step": 5717 }, { "epoch": 0.6, "grad_norm": 2.520077841376911, "learning_rate": 3.6149159212436435e-06, "loss": 0.5949, "step": 5718 }, { "epoch": 0.6, "grad_norm": 2.2678271084992514, "learning_rate": 3.6132784587977053e-06, "loss": 0.5782, "step": 5719 }, { "epoch": 0.6, "grad_norm": 2.736740775923461, "learning_rate": 3.611641157457064e-06, "loss": 0.5897, "step": 5720 }, { "epoch": 0.6, "grad_norm": 2.503689546593751, "learning_rate": 3.6100040174119403e-06, "loss": 0.6358, "step": 5721 }, { "epoch": 0.6, "grad_norm": 0.9542516668761488, "learning_rate": 3.6083670388525316e-06, "loss": 0.6144, "step": 5722 }, { "epoch": 0.6, "grad_norm": 3.151965071313513, "learning_rate": 3.6067302219690175e-06, "loss": 0.6571, "step": 5723 }, { "epoch": 0.6, "grad_norm": 2.083294772175323, "learning_rate": 3.6050935669515604e-06, "loss": 0.6405, "step": 5724 }, { "epoch": 0.6, "grad_norm": 2.308319919656163, "learning_rate": 3.603457073990298e-06, "loss": 0.5927, "step": 5725 }, { "epoch": 0.6, "grad_norm": 2.5585087791382075, "learning_rate": 3.6018207432753572e-06, "loss": 0.5503, "step": 5726 }, { "epoch": 0.6, "grad_norm": 14.380449615576731, "learning_rate": 3.60018457499684e-06, "loss": 0.6287, "step": 5727 }, { "epoch": 0.6, "grad_norm": 3.450787029376905, "learning_rate": 3.598548569344834e-06, "loss": 0.6453, "step": 5728 }, { "epoch": 0.6, "grad_norm": 3.1421747068388366, "learning_rate": 3.596912726509402e-06, "loss": 0.7465, "step": 5729 }, { "epoch": 0.6, "grad_norm": 2.304462145687152, "learning_rate": 3.595277046680594e-06, "loss": 0.6277, "step": 5730 }, { "epoch": 0.6, "grad_norm": 5.179794005325396, "learning_rate": 3.5936415300484383e-06, "loss": 0.6019, "step": 5731 }, { "epoch": 0.6, "grad_norm": 1.0233482875110236, "learning_rate": 3.592006176802944e-06, "loss": 0.5521, "step": 5732 }, { "epoch": 0.6, "grad_norm": 2.2805038648763887, "learning_rate": 3.5903709871341034e-06, "loss": 0.6422, "step": 5733 }, { "epoch": 0.6, "grad_norm": 2.7182569922936426, "learning_rate": 3.5887359612318862e-06, "loss": 0.5947, "step": 5734 }, { "epoch": 0.6, "grad_norm": 2.992203373180557, "learning_rate": 3.5871010992862436e-06, "loss": 0.6566, "step": 5735 }, { "epoch": 0.6, "grad_norm": 3.1660591900577493, "learning_rate": 3.5854664014871128e-06, "loss": 0.6317, "step": 5736 }, { "epoch": 0.6, "grad_norm": 2.645200532572237, "learning_rate": 3.5838318680244067e-06, "loss": 0.6629, "step": 5737 }, { "epoch": 0.6, "grad_norm": 2.2006735106420225, "learning_rate": 3.582197499088019e-06, "loss": 0.7078, "step": 5738 }, { "epoch": 0.6, "grad_norm": 2.7870416290233466, "learning_rate": 3.580563294867828e-06, "loss": 0.6616, "step": 5739 }, { "epoch": 0.6, "grad_norm": 2.2284086143558057, "learning_rate": 3.5789292555536907e-06, "loss": 0.644, "step": 5740 }, { "epoch": 0.6, "grad_norm": 2.0680596768407415, "learning_rate": 3.5772953813354455e-06, "loss": 0.5917, "step": 5741 }, { "epoch": 0.6, "grad_norm": 2.6380941942532816, "learning_rate": 3.575661672402908e-06, "loss": 0.5598, "step": 5742 }, { "epoch": 0.6, "grad_norm": 2.1088238905647647, "learning_rate": 3.5740281289458812e-06, "loss": 0.6548, "step": 5743 }, { "epoch": 0.6, "grad_norm": 2.364337376015603, "learning_rate": 3.5723947511541435e-06, "loss": 0.6301, "step": 5744 }, { "epoch": 0.6, "grad_norm": 2.155417858599172, "learning_rate": 3.5707615392174576e-06, "loss": 0.6011, "step": 5745 }, { "epoch": 0.6, "grad_norm": 2.789662978157353, "learning_rate": 3.5691284933255653e-06, "loss": 0.6223, "step": 5746 }, { "epoch": 0.6, "grad_norm": 2.436792685729838, "learning_rate": 3.567495613668188e-06, "loss": 0.6495, "step": 5747 }, { "epoch": 0.6, "grad_norm": 2.2959050511929493, "learning_rate": 3.565862900435028e-06, "loss": 0.6366, "step": 5748 }, { "epoch": 0.6, "grad_norm": 3.0678303289148188, "learning_rate": 3.564230353815772e-06, "loss": 0.5961, "step": 5749 }, { "epoch": 0.61, "grad_norm": 2.2062343340152553, "learning_rate": 3.562597974000084e-06, "loss": 0.5445, "step": 5750 }, { "epoch": 0.61, "grad_norm": 1.1459202020440926, "learning_rate": 3.5609657611776055e-06, "loss": 0.5367, "step": 5751 }, { "epoch": 0.61, "grad_norm": 2.892508323233743, "learning_rate": 3.5593337155379663e-06, "loss": 0.5296, "step": 5752 }, { "epoch": 0.61, "grad_norm": 13.502367015161873, "learning_rate": 3.5577018372707706e-06, "loss": 0.5599, "step": 5753 }, { "epoch": 0.61, "grad_norm": 4.60449934352309, "learning_rate": 3.5560701265656096e-06, "loss": 0.5771, "step": 5754 }, { "epoch": 0.61, "grad_norm": 2.0330026690570517, "learning_rate": 3.5544385836120445e-06, "loss": 0.6543, "step": 5755 }, { "epoch": 0.61, "grad_norm": 2.246716338488602, "learning_rate": 3.552807208599626e-06, "loss": 0.6199, "step": 5756 }, { "epoch": 0.61, "grad_norm": 2.880693301049812, "learning_rate": 3.551176001717882e-06, "loss": 0.5812, "step": 5757 }, { "epoch": 0.61, "grad_norm": 2.374133819647502, "learning_rate": 3.549544963156324e-06, "loss": 0.6039, "step": 5758 }, { "epoch": 0.61, "grad_norm": 3.651695196080499, "learning_rate": 3.5479140931044393e-06, "loss": 0.6026, "step": 5759 }, { "epoch": 0.61, "grad_norm": 2.256349484082301, "learning_rate": 3.546283391751696e-06, "loss": 0.5949, "step": 5760 }, { "epoch": 0.61, "grad_norm": 2.2426448275819504, "learning_rate": 3.5446528592875464e-06, "loss": 0.6444, "step": 5761 }, { "epoch": 0.61, "grad_norm": 2.0973705329694723, "learning_rate": 3.5430224959014215e-06, "loss": 0.7379, "step": 5762 }, { "epoch": 0.61, "grad_norm": 2.2641582085188, "learning_rate": 3.5413923017827317e-06, "loss": 0.5749, "step": 5763 }, { "epoch": 0.61, "grad_norm": 3.058982375661702, "learning_rate": 3.5397622771208663e-06, "loss": 0.5927, "step": 5764 }, { "epoch": 0.61, "grad_norm": 2.998975467492036, "learning_rate": 3.5381324221051995e-06, "loss": 0.611, "step": 5765 }, { "epoch": 0.61, "grad_norm": 2.436460113249485, "learning_rate": 3.5365027369250804e-06, "loss": 0.5845, "step": 5766 }, { "epoch": 0.61, "grad_norm": 2.336543180347027, "learning_rate": 3.5348732217698466e-06, "loss": 0.6229, "step": 5767 }, { "epoch": 0.61, "grad_norm": 3.005942928681562, "learning_rate": 3.533243876828803e-06, "loss": 0.6442, "step": 5768 }, { "epoch": 0.61, "grad_norm": 2.8740853995553226, "learning_rate": 3.531614702291247e-06, "loss": 0.5655, "step": 5769 }, { "epoch": 0.61, "grad_norm": 25.902005172145653, "learning_rate": 3.5299856983464497e-06, "loss": 0.6208, "step": 5770 }, { "epoch": 0.61, "grad_norm": 2.499670510652518, "learning_rate": 3.528356865183665e-06, "loss": 0.5878, "step": 5771 }, { "epoch": 0.61, "grad_norm": 2.5307387633144414, "learning_rate": 3.526728202992127e-06, "loss": 0.7085, "step": 5772 }, { "epoch": 0.61, "grad_norm": 2.510774660981912, "learning_rate": 3.525099711961045e-06, "loss": 0.6365, "step": 5773 }, { "epoch": 0.61, "grad_norm": 2.6261622750991034, "learning_rate": 3.523471392279616e-06, "loss": 0.6566, "step": 5774 }, { "epoch": 0.61, "grad_norm": 2.445814884829749, "learning_rate": 3.521843244137013e-06, "loss": 0.7112, "step": 5775 }, { "epoch": 0.61, "grad_norm": 3.355458478806883, "learning_rate": 3.52021526772239e-06, "loss": 0.6402, "step": 5776 }, { "epoch": 0.61, "grad_norm": 2.3733230146293263, "learning_rate": 3.5185874632248775e-06, "loss": 0.6198, "step": 5777 }, { "epoch": 0.61, "grad_norm": 2.8989292888055886, "learning_rate": 3.5169598308335915e-06, "loss": 0.592, "step": 5778 }, { "epoch": 0.61, "grad_norm": 2.1665097816063295, "learning_rate": 3.515332370737625e-06, "loss": 0.669, "step": 5779 }, { "epoch": 0.61, "grad_norm": 1.005926002720759, "learning_rate": 3.513705083126054e-06, "loss": 0.6319, "step": 5780 }, { "epoch": 0.61, "grad_norm": 2.336459256200082, "learning_rate": 3.5120779681879286e-06, "loss": 0.6206, "step": 5781 }, { "epoch": 0.61, "grad_norm": 2.298134036866341, "learning_rate": 3.5104510261122836e-06, "loss": 0.6818, "step": 5782 }, { "epoch": 0.61, "grad_norm": 2.8034284235986724, "learning_rate": 3.508824257088132e-06, "loss": 0.653, "step": 5783 }, { "epoch": 0.61, "grad_norm": 3.123317385052922, "learning_rate": 3.507197661304469e-06, "loss": 0.6499, "step": 5784 }, { "epoch": 0.61, "grad_norm": 2.655862388540164, "learning_rate": 3.505571238950267e-06, "loss": 0.5429, "step": 5785 }, { "epoch": 0.61, "grad_norm": 2.490391865079292, "learning_rate": 3.5039449902144763e-06, "loss": 0.6385, "step": 5786 }, { "epoch": 0.61, "grad_norm": 4.384431381049101, "learning_rate": 3.5023189152860325e-06, "loss": 0.7115, "step": 5787 }, { "epoch": 0.61, "grad_norm": 2.6398628266384607, "learning_rate": 3.5006930143538477e-06, "loss": 0.6195, "step": 5788 }, { "epoch": 0.61, "grad_norm": 5.043162425725876, "learning_rate": 3.499067287606817e-06, "loss": 0.5859, "step": 5789 }, { "epoch": 0.61, "grad_norm": 2.925176048693361, "learning_rate": 3.4974417352338074e-06, "loss": 0.6185, "step": 5790 }, { "epoch": 0.61, "grad_norm": 2.3132796544844303, "learning_rate": 3.495816357423674e-06, "loss": 0.6343, "step": 5791 }, { "epoch": 0.61, "grad_norm": 2.2778784069626004, "learning_rate": 3.494191154365247e-06, "loss": 0.6054, "step": 5792 }, { "epoch": 0.61, "grad_norm": 2.7460899113803503, "learning_rate": 3.492566126247341e-06, "loss": 0.6063, "step": 5793 }, { "epoch": 0.61, "grad_norm": 3.201639636764578, "learning_rate": 3.4909412732587444e-06, "loss": 0.6561, "step": 5794 }, { "epoch": 0.61, "grad_norm": 2.547089477361622, "learning_rate": 3.4893165955882275e-06, "loss": 0.5957, "step": 5795 }, { "epoch": 0.61, "grad_norm": 2.598764658378845, "learning_rate": 3.4876920934245423e-06, "loss": 0.6941, "step": 5796 }, { "epoch": 0.61, "grad_norm": 2.3890690610014795, "learning_rate": 3.486067766956418e-06, "loss": 0.6066, "step": 5797 }, { "epoch": 0.61, "grad_norm": 2.9128726002118785, "learning_rate": 3.4844436163725648e-06, "loss": 0.5642, "step": 5798 }, { "epoch": 0.61, "grad_norm": 3.8378397404411326, "learning_rate": 3.48281964186167e-06, "loss": 0.6248, "step": 5799 }, { "epoch": 0.61, "grad_norm": 2.3946114293745913, "learning_rate": 3.4811958436124036e-06, "loss": 0.6376, "step": 5800 }, { "epoch": 0.61, "grad_norm": 2.903630763714226, "learning_rate": 3.479572221813413e-06, "loss": 0.5643, "step": 5801 }, { "epoch": 0.61, "grad_norm": 5.522387352098266, "learning_rate": 3.4779487766533306e-06, "loss": 0.6095, "step": 5802 }, { "epoch": 0.61, "grad_norm": 2.539618765581498, "learning_rate": 3.4763255083207547e-06, "loss": 0.5834, "step": 5803 }, { "epoch": 0.61, "grad_norm": 3.498863654137628, "learning_rate": 3.4747024170042785e-06, "loss": 0.6581, "step": 5804 }, { "epoch": 0.61, "grad_norm": 2.0699886181482836, "learning_rate": 3.473079502892466e-06, "loss": 0.686, "step": 5805 }, { "epoch": 0.61, "grad_norm": 2.56411416397428, "learning_rate": 3.4714567661738635e-06, "loss": 0.6205, "step": 5806 }, { "epoch": 0.61, "grad_norm": 3.6804375443854056, "learning_rate": 3.469834207036996e-06, "loss": 0.5698, "step": 5807 }, { "epoch": 0.61, "grad_norm": 2.3140301754849704, "learning_rate": 3.4682118256703657e-06, "loss": 0.6665, "step": 5808 }, { "epoch": 0.61, "grad_norm": 5.132346843122038, "learning_rate": 3.4665896222624585e-06, "loss": 0.6473, "step": 5809 }, { "epoch": 0.61, "grad_norm": 2.1626805670201685, "learning_rate": 3.4649675970017355e-06, "loss": 0.5748, "step": 5810 }, { "epoch": 0.61, "grad_norm": 2.6486782602012684, "learning_rate": 3.4633457500766413e-06, "loss": 0.6031, "step": 5811 }, { "epoch": 0.61, "grad_norm": 2.5370141596453624, "learning_rate": 3.4617240816755937e-06, "loss": 0.5877, "step": 5812 }, { "epoch": 0.61, "grad_norm": 2.5534598070476, "learning_rate": 3.460102591986997e-06, "loss": 0.6301, "step": 5813 }, { "epoch": 0.61, "grad_norm": 3.3454701642307962, "learning_rate": 3.4584812811992287e-06, "loss": 0.6339, "step": 5814 }, { "epoch": 0.61, "grad_norm": 3.0095813678630763, "learning_rate": 3.4568601495006503e-06, "loss": 0.6619, "step": 5815 }, { "epoch": 0.61, "grad_norm": 2.4805455518296835, "learning_rate": 3.4552391970795984e-06, "loss": 0.6543, "step": 5816 }, { "epoch": 0.61, "grad_norm": 2.491050633881681, "learning_rate": 3.453618424124392e-06, "loss": 0.6431, "step": 5817 }, { "epoch": 0.61, "grad_norm": 5.733199748643421, "learning_rate": 3.451997830823325e-06, "loss": 0.5999, "step": 5818 }, { "epoch": 0.61, "grad_norm": 3.0026297623529605, "learning_rate": 3.4503774173646767e-06, "loss": 0.5972, "step": 5819 }, { "epoch": 0.61, "grad_norm": 3.1106086481537787, "learning_rate": 3.448757183936701e-06, "loss": 0.6202, "step": 5820 }, { "epoch": 0.61, "grad_norm": 5.983654597238341, "learning_rate": 3.447137130727629e-06, "loss": 0.6625, "step": 5821 }, { "epoch": 0.61, "grad_norm": 3.81371149233971, "learning_rate": 3.4455172579256784e-06, "loss": 0.6177, "step": 5822 }, { "epoch": 0.61, "grad_norm": 2.458016439088697, "learning_rate": 3.4438975657190375e-06, "loss": 0.7018, "step": 5823 }, { "epoch": 0.61, "grad_norm": 1.9033971357544983, "learning_rate": 3.442278054295883e-06, "loss": 0.5718, "step": 5824 }, { "epoch": 0.61, "grad_norm": 2.9518489778855823, "learning_rate": 3.440658723844358e-06, "loss": 0.6359, "step": 5825 }, { "epoch": 0.61, "grad_norm": 2.6141808916194367, "learning_rate": 3.439039574552595e-06, "loss": 0.5865, "step": 5826 }, { "epoch": 0.61, "grad_norm": 2.2429938266365808, "learning_rate": 3.437420606608701e-06, "loss": 0.6993, "step": 5827 }, { "epoch": 0.61, "grad_norm": 2.1988445058564614, "learning_rate": 3.435801820200767e-06, "loss": 0.6538, "step": 5828 }, { "epoch": 0.61, "grad_norm": 2.093978386990422, "learning_rate": 3.4341832155168547e-06, "loss": 0.6003, "step": 5829 }, { "epoch": 0.61, "grad_norm": 2.311995229831535, "learning_rate": 3.432564792745009e-06, "loss": 0.6298, "step": 5830 }, { "epoch": 0.61, "grad_norm": 2.324588374264805, "learning_rate": 3.4309465520732556e-06, "loss": 0.5572, "step": 5831 }, { "epoch": 0.61, "grad_norm": 1.0044411358111915, "learning_rate": 3.4293284936895956e-06, "loss": 0.548, "step": 5832 }, { "epoch": 0.61, "grad_norm": 4.285667049229145, "learning_rate": 3.4277106177820123e-06, "loss": 0.6496, "step": 5833 }, { "epoch": 0.61, "grad_norm": 2.389684858763753, "learning_rate": 3.426092924538462e-06, "loss": 0.654, "step": 5834 }, { "epoch": 0.61, "grad_norm": 2.2261660392871327, "learning_rate": 3.4244754141468878e-06, "loss": 0.7062, "step": 5835 }, { "epoch": 0.61, "grad_norm": 2.237433451917938, "learning_rate": 3.4228580867952044e-06, "loss": 0.6249, "step": 5836 }, { "epoch": 0.61, "grad_norm": 3.0863620341722386, "learning_rate": 3.421240942671312e-06, "loss": 0.6019, "step": 5837 }, { "epoch": 0.61, "grad_norm": 2.5102635909640636, "learning_rate": 3.4196239819630806e-06, "loss": 0.5542, "step": 5838 }, { "epoch": 0.61, "grad_norm": 2.377771418582857, "learning_rate": 3.4180072048583667e-06, "loss": 0.7179, "step": 5839 }, { "epoch": 0.61, "grad_norm": 2.8495071063526494, "learning_rate": 3.4163906115450025e-06, "loss": 0.6351, "step": 5840 }, { "epoch": 0.61, "grad_norm": 4.504564584687121, "learning_rate": 3.4147742022108e-06, "loss": 0.5398, "step": 5841 }, { "epoch": 0.61, "grad_norm": 2.7848784093251355, "learning_rate": 3.4131579770435495e-06, "loss": 0.67, "step": 5842 }, { "epoch": 0.61, "grad_norm": 2.7193060597973844, "learning_rate": 3.411541936231016e-06, "loss": 0.6779, "step": 5843 }, { "epoch": 0.61, "grad_norm": 2.9893759030534155, "learning_rate": 3.409926079960949e-06, "loss": 0.5734, "step": 5844 }, { "epoch": 0.62, "grad_norm": 2.402119046084388, "learning_rate": 3.4083104084210746e-06, "loss": 0.7261, "step": 5845 }, { "epoch": 0.62, "grad_norm": 3.2792238487813976, "learning_rate": 3.4066949217990964e-06, "loss": 0.6428, "step": 5846 }, { "epoch": 0.62, "grad_norm": 2.754234915442587, "learning_rate": 3.4050796202826943e-06, "loss": 0.6201, "step": 5847 }, { "epoch": 0.62, "grad_norm": 2.5663658021698423, "learning_rate": 3.4034645040595325e-06, "loss": 0.6867, "step": 5848 }, { "epoch": 0.62, "grad_norm": 2.808416018507515, "learning_rate": 3.4018495733172485e-06, "loss": 0.7043, "step": 5849 }, { "epoch": 0.62, "grad_norm": 2.945233847925162, "learning_rate": 3.4002348282434637e-06, "loss": 0.5994, "step": 5850 }, { "epoch": 0.62, "grad_norm": 2.242388743778958, "learning_rate": 3.3986202690257707e-06, "loss": 0.689, "step": 5851 }, { "epoch": 0.62, "grad_norm": 2.706078895924368, "learning_rate": 3.397005895851746e-06, "loss": 0.6305, "step": 5852 }, { "epoch": 0.62, "grad_norm": 2.4900554279730427, "learning_rate": 3.395391708908941e-06, "loss": 0.6242, "step": 5853 }, { "epoch": 0.62, "grad_norm": 3.2235848293555365, "learning_rate": 3.39377770838489e-06, "loss": 0.6205, "step": 5854 }, { "epoch": 0.62, "grad_norm": 2.1780532875830025, "learning_rate": 3.392163894467103e-06, "loss": 0.6479, "step": 5855 }, { "epoch": 0.62, "grad_norm": 2.180928205646365, "learning_rate": 3.3905502673430648e-06, "loss": 0.6474, "step": 5856 }, { "epoch": 0.62, "grad_norm": 2.0354934950603507, "learning_rate": 3.3889368272002455e-06, "loss": 0.6783, "step": 5857 }, { "epoch": 0.62, "grad_norm": 13.984789819926613, "learning_rate": 3.387323574226087e-06, "loss": 0.6263, "step": 5858 }, { "epoch": 0.62, "grad_norm": 2.841089761628964, "learning_rate": 3.385710508608017e-06, "loss": 0.6473, "step": 5859 }, { "epoch": 0.62, "grad_norm": 4.189754673822482, "learning_rate": 3.3840976305334304e-06, "loss": 0.5959, "step": 5860 }, { "epoch": 0.62, "grad_norm": 2.5889741809528988, "learning_rate": 3.382484940189711e-06, "loss": 0.6292, "step": 5861 }, { "epoch": 0.62, "grad_norm": 2.289269598614149, "learning_rate": 3.380872437764215e-06, "loss": 0.6444, "step": 5862 }, { "epoch": 0.62, "grad_norm": 2.422036993894899, "learning_rate": 3.37926012344428e-06, "loss": 0.5488, "step": 5863 }, { "epoch": 0.62, "grad_norm": 2.456124041276871, "learning_rate": 3.3776479974172184e-06, "loss": 0.6358, "step": 5864 }, { "epoch": 0.62, "grad_norm": 2.4906368084019817, "learning_rate": 3.3760360598703217e-06, "loss": 0.5991, "step": 5865 }, { "epoch": 0.62, "grad_norm": 2.6266749863791805, "learning_rate": 3.374424310990862e-06, "loss": 0.6148, "step": 5866 }, { "epoch": 0.62, "grad_norm": 2.513373641942084, "learning_rate": 3.372812750966087e-06, "loss": 0.6433, "step": 5867 }, { "epoch": 0.62, "grad_norm": 2.884160237208583, "learning_rate": 3.371201379983223e-06, "loss": 0.5771, "step": 5868 }, { "epoch": 0.62, "grad_norm": 3.2671886681135485, "learning_rate": 3.369590198229473e-06, "loss": 0.5949, "step": 5869 }, { "epoch": 0.62, "grad_norm": 2.2010932339105223, "learning_rate": 3.3679792058920223e-06, "loss": 0.677, "step": 5870 }, { "epoch": 0.62, "grad_norm": 2.8453784039703374, "learning_rate": 3.366368403158028e-06, "loss": 0.604, "step": 5871 }, { "epoch": 0.62, "grad_norm": 2.641273584480205, "learning_rate": 3.3647577902146334e-06, "loss": 0.6553, "step": 5872 }, { "epoch": 0.62, "grad_norm": 2.3540207447949997, "learning_rate": 3.363147367248949e-06, "loss": 0.6874, "step": 5873 }, { "epoch": 0.62, "grad_norm": 2.1061808611226644, "learning_rate": 3.3615371344480725e-06, "loss": 0.5198, "step": 5874 }, { "epoch": 0.62, "grad_norm": 3.610784500535765, "learning_rate": 3.3599270919990744e-06, "loss": 0.6982, "step": 5875 }, { "epoch": 0.62, "grad_norm": 17.35217476906217, "learning_rate": 3.358317240089008e-06, "loss": 0.6382, "step": 5876 }, { "epoch": 0.62, "grad_norm": 2.3499225456097874, "learning_rate": 3.3567075789048973e-06, "loss": 0.6571, "step": 5877 }, { "epoch": 0.62, "grad_norm": 4.846781393555933, "learning_rate": 3.35509810863375e-06, "loss": 0.7104, "step": 5878 }, { "epoch": 0.62, "grad_norm": 3.0399172317674497, "learning_rate": 3.35348882946255e-06, "loss": 0.6536, "step": 5879 }, { "epoch": 0.62, "grad_norm": 3.2181809417118203, "learning_rate": 3.3518797415782577e-06, "loss": 0.5946, "step": 5880 }, { "epoch": 0.62, "grad_norm": 3.7651214632668175, "learning_rate": 3.3502708451678145e-06, "loss": 0.6062, "step": 5881 }, { "epoch": 0.62, "grad_norm": 2.5978846672711, "learning_rate": 3.348662140418133e-06, "loss": 0.6363, "step": 5882 }, { "epoch": 0.62, "grad_norm": 7.165618488483955, "learning_rate": 3.3470536275161126e-06, "loss": 0.5837, "step": 5883 }, { "epoch": 0.62, "grad_norm": 3.6883277185410073, "learning_rate": 3.3454453066486214e-06, "loss": 0.6308, "step": 5884 }, { "epoch": 0.62, "grad_norm": 2.1630804016878504, "learning_rate": 3.3438371780025136e-06, "loss": 0.6886, "step": 5885 }, { "epoch": 0.62, "grad_norm": 2.269499258419974, "learning_rate": 3.342229241764615e-06, "loss": 0.6706, "step": 5886 }, { "epoch": 0.62, "grad_norm": 2.3398677700347497, "learning_rate": 3.34062149812173e-06, "loss": 0.6235, "step": 5887 }, { "epoch": 0.62, "grad_norm": 2.507757222319039, "learning_rate": 3.339013947260642e-06, "loss": 0.5739, "step": 5888 }, { "epoch": 0.62, "grad_norm": 2.9578807377909313, "learning_rate": 3.337406589368113e-06, "loss": 0.6063, "step": 5889 }, { "epoch": 0.62, "grad_norm": 2.2486291156021263, "learning_rate": 3.3357994246308815e-06, "loss": 0.6258, "step": 5890 }, { "epoch": 0.62, "grad_norm": 2.4560317757961596, "learning_rate": 3.3341924532356605e-06, "loss": 0.6704, "step": 5891 }, { "epoch": 0.62, "grad_norm": 2.4000882896893856, "learning_rate": 3.3325856753691453e-06, "loss": 0.6653, "step": 5892 }, { "epoch": 0.62, "grad_norm": 2.309148094791573, "learning_rate": 3.3309790912180056e-06, "loss": 0.5464, "step": 5893 }, { "epoch": 0.62, "grad_norm": 2.9793468104494654, "learning_rate": 3.329372700968894e-06, "loss": 0.6522, "step": 5894 }, { "epoch": 0.62, "grad_norm": 3.234552857190179, "learning_rate": 3.3277665048084283e-06, "loss": 0.6763, "step": 5895 }, { "epoch": 0.62, "grad_norm": 2.305618418961012, "learning_rate": 3.326160502923218e-06, "loss": 0.5466, "step": 5896 }, { "epoch": 0.62, "grad_norm": 2.4393905813677113, "learning_rate": 3.324554695499841e-06, "loss": 0.6869, "step": 5897 }, { "epoch": 0.62, "grad_norm": 3.1678663773433686, "learning_rate": 3.3229490827248585e-06, "loss": 0.6572, "step": 5898 }, { "epoch": 0.62, "grad_norm": 2.810314446746764, "learning_rate": 3.3213436647848017e-06, "loss": 0.6039, "step": 5899 }, { "epoch": 0.62, "grad_norm": 2.616553362514333, "learning_rate": 3.3197384418661854e-06, "loss": 0.6404, "step": 5900 }, { "epoch": 0.62, "grad_norm": 2.606456810157447, "learning_rate": 3.3181334141555003e-06, "loss": 0.6813, "step": 5901 }, { "epoch": 0.62, "grad_norm": 3.352922919680765, "learning_rate": 3.3165285818392133e-06, "loss": 0.5888, "step": 5902 }, { "epoch": 0.62, "grad_norm": 3.765095323296954, "learning_rate": 3.3149239451037706e-06, "loss": 0.6115, "step": 5903 }, { "epoch": 0.62, "grad_norm": 2.0882597233379716, "learning_rate": 3.313319504135591e-06, "loss": 0.6357, "step": 5904 }, { "epoch": 0.62, "grad_norm": 2.3840323224262576, "learning_rate": 3.3117152591210765e-06, "loss": 0.6076, "step": 5905 }, { "epoch": 0.62, "grad_norm": 12.879550786415358, "learning_rate": 3.3101112102466014e-06, "loss": 0.6847, "step": 5906 }, { "epoch": 0.62, "grad_norm": 0.9258667628138425, "learning_rate": 3.308507357698525e-06, "loss": 0.5716, "step": 5907 }, { "epoch": 0.62, "grad_norm": 2.468162429286832, "learning_rate": 3.30690370166317e-06, "loss": 0.6944, "step": 5908 }, { "epoch": 0.62, "grad_norm": 3.4587947376198205, "learning_rate": 3.30530024232685e-06, "loss": 0.6611, "step": 5909 }, { "epoch": 0.62, "grad_norm": 2.7167118186662447, "learning_rate": 3.3036969798758486e-06, "loss": 0.6919, "step": 5910 }, { "epoch": 0.62, "grad_norm": 2.8540713326378624, "learning_rate": 3.3020939144964298e-06, "loss": 0.5524, "step": 5911 }, { "epoch": 0.62, "grad_norm": 2.633945852372088, "learning_rate": 3.3004910463748315e-06, "loss": 0.6379, "step": 5912 }, { "epoch": 0.62, "grad_norm": 2.6471695256154977, "learning_rate": 3.298888375697269e-06, "loss": 0.6573, "step": 5913 }, { "epoch": 0.62, "grad_norm": 2.7889126114993785, "learning_rate": 3.2972859026499395e-06, "loss": 0.5756, "step": 5914 }, { "epoch": 0.62, "grad_norm": 2.4597559748985436, "learning_rate": 3.2956836274190107e-06, "loss": 0.6519, "step": 5915 }, { "epoch": 0.62, "grad_norm": 6.22590279694477, "learning_rate": 3.294081550190633e-06, "loss": 0.5535, "step": 5916 }, { "epoch": 0.62, "grad_norm": 3.1209221601644255, "learning_rate": 3.292479671150927e-06, "loss": 0.6076, "step": 5917 }, { "epoch": 0.62, "grad_norm": 2.6511690459462853, "learning_rate": 3.290877990485999e-06, "loss": 0.5224, "step": 5918 }, { "epoch": 0.62, "grad_norm": 1.017349317050653, "learning_rate": 3.2892765083819233e-06, "loss": 0.515, "step": 5919 }, { "epoch": 0.62, "grad_norm": 2.7093325400474417, "learning_rate": 3.28767522502476e-06, "loss": 0.6475, "step": 5920 }, { "epoch": 0.62, "grad_norm": 2.471371183047004, "learning_rate": 3.2860741406005383e-06, "loss": 0.5238, "step": 5921 }, { "epoch": 0.62, "grad_norm": 2.6312257555514824, "learning_rate": 3.2844732552952686e-06, "loss": 0.6347, "step": 5922 }, { "epoch": 0.62, "grad_norm": 2.411758053136448, "learning_rate": 3.282872569294936e-06, "loss": 0.6182, "step": 5923 }, { "epoch": 0.62, "grad_norm": 2.5478750822742935, "learning_rate": 3.281272082785506e-06, "loss": 0.481, "step": 5924 }, { "epoch": 0.62, "grad_norm": 2.3428946425529644, "learning_rate": 3.2796717959529167e-06, "loss": 0.6574, "step": 5925 }, { "epoch": 0.62, "grad_norm": 2.5726580568876902, "learning_rate": 3.2780717089830845e-06, "loss": 0.6016, "step": 5926 }, { "epoch": 0.62, "grad_norm": 2.284631597557582, "learning_rate": 3.276471822061904e-06, "loss": 0.5906, "step": 5927 }, { "epoch": 0.62, "grad_norm": 3.003461067354797, "learning_rate": 3.2748721353752445e-06, "loss": 0.683, "step": 5928 }, { "epoch": 0.62, "grad_norm": 2.570345614518788, "learning_rate": 3.2732726491089562e-06, "loss": 0.6233, "step": 5929 }, { "epoch": 0.62, "grad_norm": 2.7306210262884796, "learning_rate": 3.2716733634488563e-06, "loss": 0.6855, "step": 5930 }, { "epoch": 0.62, "grad_norm": 2.4961398274059046, "learning_rate": 3.2700742785807503e-06, "loss": 0.6488, "step": 5931 }, { "epoch": 0.62, "grad_norm": 2.674949765424381, "learning_rate": 3.2684753946904136e-06, "loss": 0.5726, "step": 5932 }, { "epoch": 0.62, "grad_norm": 3.1722015237208283, "learning_rate": 3.266876711963602e-06, "loss": 0.5482, "step": 5933 }, { "epoch": 0.62, "grad_norm": 2.4532631137041836, "learning_rate": 3.265278230586043e-06, "loss": 0.6033, "step": 5934 }, { "epoch": 0.62, "grad_norm": 5.84912133996752, "learning_rate": 3.2636799507434447e-06, "loss": 0.6797, "step": 5935 }, { "epoch": 0.62, "grad_norm": 2.687307510260449, "learning_rate": 3.2620818726214888e-06, "loss": 0.6185, "step": 5936 }, { "epoch": 0.62, "grad_norm": 2.3580307891356673, "learning_rate": 3.260483996405839e-06, "loss": 0.6161, "step": 5937 }, { "epoch": 0.62, "grad_norm": 2.074986511593617, "learning_rate": 3.2588863222821306e-06, "loss": 0.6061, "step": 5938 }, { "epoch": 0.62, "grad_norm": 2.6795167053076145, "learning_rate": 3.2572888504359743e-06, "loss": 0.648, "step": 5939 }, { "epoch": 0.63, "grad_norm": 0.9485630736671, "learning_rate": 3.2556915810529627e-06, "loss": 0.5871, "step": 5940 }, { "epoch": 0.63, "grad_norm": 3.589804098570901, "learning_rate": 3.25409451431866e-06, "loss": 0.5836, "step": 5941 }, { "epoch": 0.63, "grad_norm": 2.029028547694551, "learning_rate": 3.2524976504186125e-06, "loss": 0.6208, "step": 5942 }, { "epoch": 0.63, "grad_norm": 2.457015002957502, "learning_rate": 3.2509009895383337e-06, "loss": 0.5133, "step": 5943 }, { "epoch": 0.63, "grad_norm": 2.7782124039908958, "learning_rate": 3.249304531863323e-06, "loss": 0.6363, "step": 5944 }, { "epoch": 0.63, "grad_norm": 2.7887314599627198, "learning_rate": 3.247708277579049e-06, "loss": 0.693, "step": 5945 }, { "epoch": 0.63, "grad_norm": 5.059722447279897, "learning_rate": 3.2461122268709657e-06, "loss": 0.6958, "step": 5946 }, { "epoch": 0.63, "grad_norm": 3.609162372409086, "learning_rate": 3.244516379924492e-06, "loss": 0.5227, "step": 5947 }, { "epoch": 0.63, "grad_norm": 2.5711727147391903, "learning_rate": 3.242920736925029e-06, "loss": 0.5873, "step": 5948 }, { "epoch": 0.63, "grad_norm": 2.965512354865726, "learning_rate": 3.2413252980579572e-06, "loss": 0.5924, "step": 5949 }, { "epoch": 0.63, "grad_norm": 4.480684034499855, "learning_rate": 3.239730063508629e-06, "loss": 0.6539, "step": 5950 }, { "epoch": 0.63, "grad_norm": 3.0554230163758693, "learning_rate": 3.238135033462374e-06, "loss": 0.6008, "step": 5951 }, { "epoch": 0.63, "grad_norm": 2.264871265257945, "learning_rate": 3.2365402081044955e-06, "loss": 0.7099, "step": 5952 }, { "epoch": 0.63, "grad_norm": 2.4974903193187594, "learning_rate": 3.2349455876202797e-06, "loss": 0.6184, "step": 5953 }, { "epoch": 0.63, "grad_norm": 2.18292964645188, "learning_rate": 3.2333511721949816e-06, "loss": 0.576, "step": 5954 }, { "epoch": 0.63, "grad_norm": 2.187323524231644, "learning_rate": 3.23175696201384e-06, "loss": 0.6029, "step": 5955 }, { "epoch": 0.63, "grad_norm": 2.42051305833246, "learning_rate": 3.230162957262062e-06, "loss": 0.5949, "step": 5956 }, { "epoch": 0.63, "grad_norm": 2.437281602803996, "learning_rate": 3.2285691581248345e-06, "loss": 0.6308, "step": 5957 }, { "epoch": 0.63, "grad_norm": 2.188705793501174, "learning_rate": 3.226975564787322e-06, "loss": 0.6577, "step": 5958 }, { "epoch": 0.63, "grad_norm": 2.095497000196284, "learning_rate": 3.2253821774346644e-06, "loss": 0.613, "step": 5959 }, { "epoch": 0.63, "grad_norm": 2.217153208860546, "learning_rate": 3.2237889962519748e-06, "loss": 0.5494, "step": 5960 }, { "epoch": 0.63, "grad_norm": 3.0997362892532534, "learning_rate": 3.2221960214243437e-06, "loss": 0.6854, "step": 5961 }, { "epoch": 0.63, "grad_norm": 2.176771504376497, "learning_rate": 3.2206032531368407e-06, "loss": 0.625, "step": 5962 }, { "epoch": 0.63, "grad_norm": 3.3368855193919957, "learning_rate": 3.2190106915745077e-06, "loss": 0.6591, "step": 5963 }, { "epoch": 0.63, "grad_norm": 2.221750102788297, "learning_rate": 3.2174183369223667e-06, "loss": 0.5971, "step": 5964 }, { "epoch": 0.63, "grad_norm": 2.3884878579108473, "learning_rate": 3.2158261893654073e-06, "loss": 0.7223, "step": 5965 }, { "epoch": 0.63, "grad_norm": 2.8667030280760954, "learning_rate": 3.214234249088605e-06, "loss": 0.6087, "step": 5966 }, { "epoch": 0.63, "grad_norm": 2.172551788871772, "learning_rate": 3.212642516276905e-06, "loss": 0.5243, "step": 5967 }, { "epoch": 0.63, "grad_norm": 2.7291377150296534, "learning_rate": 3.2110509911152315e-06, "loss": 0.5997, "step": 5968 }, { "epoch": 0.63, "grad_norm": 1.9384746293091961, "learning_rate": 3.209459673788482e-06, "loss": 0.5464, "step": 5969 }, { "epoch": 0.63, "grad_norm": 2.058472029705662, "learning_rate": 3.207868564481532e-06, "loss": 0.5822, "step": 5970 }, { "epoch": 0.63, "grad_norm": 2.567086993673489, "learning_rate": 3.2062776633792303e-06, "loss": 0.6797, "step": 5971 }, { "epoch": 0.63, "grad_norm": 3.1555293622681844, "learning_rate": 3.2046869706664074e-06, "loss": 0.5843, "step": 5972 }, { "epoch": 0.63, "grad_norm": 2.3596960035354035, "learning_rate": 3.2030964865278604e-06, "loss": 0.5639, "step": 5973 }, { "epoch": 0.63, "grad_norm": 2.0640908869935912, "learning_rate": 3.2015062111483688e-06, "loss": 0.6841, "step": 5974 }, { "epoch": 0.63, "grad_norm": 2.689198480543773, "learning_rate": 3.199916144712688e-06, "loss": 0.6605, "step": 5975 }, { "epoch": 0.63, "grad_norm": 2.6877494467316985, "learning_rate": 3.1983262874055442e-06, "loss": 0.5771, "step": 5976 }, { "epoch": 0.63, "grad_norm": 2.772844595362914, "learning_rate": 3.1967366394116477e-06, "loss": 0.6137, "step": 5977 }, { "epoch": 0.63, "grad_norm": 1.0203362646742848, "learning_rate": 3.1951472009156725e-06, "loss": 0.5498, "step": 5978 }, { "epoch": 0.63, "grad_norm": 2.242251234684735, "learning_rate": 3.193557972102279e-06, "loss": 0.6582, "step": 5979 }, { "epoch": 0.63, "grad_norm": 3.8382421017907684, "learning_rate": 3.191968953156098e-06, "loss": 0.6473, "step": 5980 }, { "epoch": 0.63, "grad_norm": 2.1209413221002222, "learning_rate": 3.19038014426174e-06, "loss": 0.6004, "step": 5981 }, { "epoch": 0.63, "grad_norm": 2.2255883161813963, "learning_rate": 3.1887915456037833e-06, "loss": 0.6218, "step": 5982 }, { "epoch": 0.63, "grad_norm": 2.8783014772228532, "learning_rate": 3.1872031573667895e-06, "loss": 0.7142, "step": 5983 }, { "epoch": 0.63, "grad_norm": 2.5531410132078363, "learning_rate": 3.185614979735293e-06, "loss": 0.5936, "step": 5984 }, { "epoch": 0.63, "grad_norm": 2.565229169837344, "learning_rate": 3.1840270128938032e-06, "loss": 0.6143, "step": 5985 }, { "epoch": 0.63, "grad_norm": 2.045126577655692, "learning_rate": 3.182439257026807e-06, "loss": 0.5457, "step": 5986 }, { "epoch": 0.63, "grad_norm": 2.0857258123928033, "learning_rate": 3.180851712318761e-06, "loss": 0.6753, "step": 5987 }, { "epoch": 0.63, "grad_norm": 2.631692022599449, "learning_rate": 3.179264378954106e-06, "loss": 0.5913, "step": 5988 }, { "epoch": 0.63, "grad_norm": 3.8525652346655925, "learning_rate": 3.1776772571172514e-06, "loss": 0.7053, "step": 5989 }, { "epoch": 0.63, "grad_norm": 2.2544931285363856, "learning_rate": 3.1760903469925874e-06, "loss": 0.6392, "step": 5990 }, { "epoch": 0.63, "grad_norm": 2.2143334987948213, "learning_rate": 3.174503648764473e-06, "loss": 0.6196, "step": 5991 }, { "epoch": 0.63, "grad_norm": 2.7607820463483015, "learning_rate": 3.1729171626172485e-06, "loss": 0.6129, "step": 5992 }, { "epoch": 0.63, "grad_norm": 2.431241441047194, "learning_rate": 3.1713308887352244e-06, "loss": 0.5663, "step": 5993 }, { "epoch": 0.63, "grad_norm": 0.9957317360641763, "learning_rate": 3.1697448273026944e-06, "loss": 0.5867, "step": 5994 }, { "epoch": 0.63, "grad_norm": 4.718815023192582, "learning_rate": 3.1681589785039178e-06, "loss": 0.5582, "step": 5995 }, { "epoch": 0.63, "grad_norm": 2.3173917102419392, "learning_rate": 3.1665733425231356e-06, "loss": 0.5831, "step": 5996 }, { "epoch": 0.63, "grad_norm": 2.9747101713599116, "learning_rate": 3.164987919544563e-06, "loss": 0.6145, "step": 5997 }, { "epoch": 0.63, "grad_norm": 3.0576887431965263, "learning_rate": 3.163402709752389e-06, "loss": 0.6303, "step": 5998 }, { "epoch": 0.63, "grad_norm": 2.2892471503762697, "learning_rate": 3.1618177133307813e-06, "loss": 0.5384, "step": 5999 }, { "epoch": 0.63, "grad_norm": 2.0697756117981876, "learning_rate": 3.1602329304638755e-06, "loss": 0.6807, "step": 6000 }, { "epoch": 0.63, "grad_norm": 2.1548228132567897, "learning_rate": 3.158648361335791e-06, "loss": 0.6769, "step": 6001 }, { "epoch": 0.63, "grad_norm": 2.171892545453622, "learning_rate": 3.1570640061306153e-06, "loss": 0.5813, "step": 6002 }, { "epoch": 0.63, "grad_norm": 3.5558082679769543, "learning_rate": 3.1554798650324183e-06, "loss": 0.5725, "step": 6003 }, { "epoch": 0.63, "grad_norm": 2.1783862920998778, "learning_rate": 3.1538959382252378e-06, "loss": 0.6095, "step": 6004 }, { "epoch": 0.63, "grad_norm": 2.439670545080028, "learning_rate": 3.1523122258930904e-06, "loss": 0.6402, "step": 6005 }, { "epoch": 0.63, "grad_norm": 2.1135984445220846, "learning_rate": 3.150728728219966e-06, "loss": 0.5736, "step": 6006 }, { "epoch": 0.63, "grad_norm": 2.2613893512798544, "learning_rate": 3.149145445389835e-06, "loss": 0.6038, "step": 6007 }, { "epoch": 0.63, "grad_norm": 2.7430614334073287, "learning_rate": 3.147562377586635e-06, "loss": 0.6266, "step": 6008 }, { "epoch": 0.63, "grad_norm": 2.8377286942815885, "learning_rate": 3.1459795249942815e-06, "loss": 0.6455, "step": 6009 }, { "epoch": 0.63, "grad_norm": 0.9591836036301431, "learning_rate": 3.144396887796669e-06, "loss": 0.5318, "step": 6010 }, { "epoch": 0.63, "grad_norm": 2.0086517588320363, "learning_rate": 3.1428144661776605e-06, "loss": 0.5734, "step": 6011 }, { "epoch": 0.63, "grad_norm": 2.218164590981601, "learning_rate": 3.141232260321102e-06, "loss": 0.683, "step": 6012 }, { "epoch": 0.63, "grad_norm": 2.503329798201055, "learning_rate": 3.1396502704108034e-06, "loss": 0.6242, "step": 6013 }, { "epoch": 0.63, "grad_norm": 3.221827577676699, "learning_rate": 3.13806849663056e-06, "loss": 0.5933, "step": 6014 }, { "epoch": 0.63, "grad_norm": 3.6189027947412518, "learning_rate": 3.1364869391641343e-06, "loss": 0.5451, "step": 6015 }, { "epoch": 0.63, "grad_norm": 2.2116203062161435, "learning_rate": 3.1349055981952725e-06, "loss": 0.5863, "step": 6016 }, { "epoch": 0.63, "grad_norm": 2.4603820875117735, "learning_rate": 3.133324473907685e-06, "loss": 0.6867, "step": 6017 }, { "epoch": 0.63, "grad_norm": 4.688134030426931, "learning_rate": 3.1317435664850626e-06, "loss": 0.6107, "step": 6018 }, { "epoch": 0.63, "grad_norm": 2.2474502089494233, "learning_rate": 3.130162876111074e-06, "loss": 0.665, "step": 6019 }, { "epoch": 0.63, "grad_norm": 2.235530948111977, "learning_rate": 3.128582402969358e-06, "loss": 0.6928, "step": 6020 }, { "epoch": 0.63, "grad_norm": 2.5821548582113008, "learning_rate": 3.1270021472435276e-06, "loss": 0.6203, "step": 6021 }, { "epoch": 0.63, "grad_norm": 2.637774136880771, "learning_rate": 3.125422109117173e-06, "loss": 0.66, "step": 6022 }, { "epoch": 0.63, "grad_norm": 2.1103955615847716, "learning_rate": 3.1238422887738596e-06, "loss": 0.5965, "step": 6023 }, { "epoch": 0.63, "grad_norm": 2.2341447165344492, "learning_rate": 3.122262686397124e-06, "loss": 0.741, "step": 6024 }, { "epoch": 0.63, "grad_norm": 2.2231015638638967, "learning_rate": 3.1206833021704843e-06, "loss": 0.6181, "step": 6025 }, { "epoch": 0.63, "grad_norm": 2.8157201183565475, "learning_rate": 3.1191041362774246e-06, "loss": 0.688, "step": 6026 }, { "epoch": 0.63, "grad_norm": 2.6112171825573403, "learning_rate": 3.117525188901409e-06, "loss": 0.6337, "step": 6027 }, { "epoch": 0.63, "grad_norm": 2.591086899022109, "learning_rate": 3.115946460225875e-06, "loss": 0.6666, "step": 6028 }, { "epoch": 0.63, "grad_norm": 1.990144411097681, "learning_rate": 3.1143679504342367e-06, "loss": 0.607, "step": 6029 }, { "epoch": 0.63, "grad_norm": 1.097695113019066, "learning_rate": 3.1127896597098784e-06, "loss": 0.5598, "step": 6030 }, { "epoch": 0.63, "grad_norm": 4.043305343302304, "learning_rate": 3.1112115882361605e-06, "loss": 0.6536, "step": 6031 }, { "epoch": 0.63, "grad_norm": 2.1224861128520143, "learning_rate": 3.1096337361964213e-06, "loss": 0.6197, "step": 6032 }, { "epoch": 0.63, "grad_norm": 2.2716747161287523, "learning_rate": 3.108056103773972e-06, "loss": 0.6368, "step": 6033 }, { "epoch": 0.63, "grad_norm": 2.3153801155163998, "learning_rate": 3.106478691152094e-06, "loss": 0.6325, "step": 6034 }, { "epoch": 0.64, "grad_norm": 2.5772248457210116, "learning_rate": 3.1049014985140468e-06, "loss": 0.6982, "step": 6035 }, { "epoch": 0.64, "grad_norm": 3.92355644648877, "learning_rate": 3.103324526043066e-06, "loss": 0.6235, "step": 6036 }, { "epoch": 0.64, "grad_norm": 2.394493973433464, "learning_rate": 3.101747773922359e-06, "loss": 0.5862, "step": 6037 }, { "epoch": 0.64, "grad_norm": 2.840825066417566, "learning_rate": 3.100171242335109e-06, "loss": 0.6773, "step": 6038 }, { "epoch": 0.64, "grad_norm": 2.3142902400992083, "learning_rate": 3.0985949314644724e-06, "loss": 0.6347, "step": 6039 }, { "epoch": 0.64, "grad_norm": 3.1693698090325952, "learning_rate": 3.09701884149358e-06, "loss": 0.632, "step": 6040 }, { "epoch": 0.64, "grad_norm": 2.174190785161442, "learning_rate": 3.0954429726055367e-06, "loss": 0.5664, "step": 6041 }, { "epoch": 0.64, "grad_norm": 2.427368074809071, "learning_rate": 3.093867324983425e-06, "loss": 0.6002, "step": 6042 }, { "epoch": 0.64, "grad_norm": 0.98991358532084, "learning_rate": 3.0922918988102968e-06, "loss": 0.5889, "step": 6043 }, { "epoch": 0.64, "grad_norm": 3.16305476049039, "learning_rate": 3.0907166942691804e-06, "loss": 0.5656, "step": 6044 }, { "epoch": 0.64, "grad_norm": 2.3426174204082426, "learning_rate": 3.0891417115430794e-06, "loss": 0.5778, "step": 6045 }, { "epoch": 0.64, "grad_norm": 2.695431267278061, "learning_rate": 3.08756695081497e-06, "loss": 0.662, "step": 6046 }, { "epoch": 0.64, "grad_norm": 2.4120245938951017, "learning_rate": 3.085992412267807e-06, "loss": 0.5995, "step": 6047 }, { "epoch": 0.64, "grad_norm": 2.0993135251738906, "learning_rate": 3.08441809608451e-06, "loss": 0.6046, "step": 6048 }, { "epoch": 0.64, "grad_norm": 2.120694022813388, "learning_rate": 3.0828440024479823e-06, "loss": 0.597, "step": 6049 }, { "epoch": 0.64, "grad_norm": 2.164550468698215, "learning_rate": 3.081270131541094e-06, "loss": 0.6243, "step": 6050 }, { "epoch": 0.64, "grad_norm": 2.499720888315672, "learning_rate": 3.079696483546699e-06, "loss": 0.5662, "step": 6051 }, { "epoch": 0.64, "grad_norm": 2.19241543023071, "learning_rate": 3.078123058647614e-06, "loss": 0.645, "step": 6052 }, { "epoch": 0.64, "grad_norm": 2.1759008939791316, "learning_rate": 3.0765498570266354e-06, "loss": 0.5636, "step": 6053 }, { "epoch": 0.64, "grad_norm": 2.481766219102531, "learning_rate": 3.074976878866536e-06, "loss": 0.5773, "step": 6054 }, { "epoch": 0.64, "grad_norm": 2.6119108357768677, "learning_rate": 3.0734041243500578e-06, "loss": 0.6492, "step": 6055 }, { "epoch": 0.64, "grad_norm": 2.1005618832159336, "learning_rate": 3.0718315936599184e-06, "loss": 0.6326, "step": 6056 }, { "epoch": 0.64, "grad_norm": 2.1970627941200704, "learning_rate": 3.0702592869788105e-06, "loss": 0.5606, "step": 6057 }, { "epoch": 0.64, "grad_norm": 1.0059803541239813, "learning_rate": 3.0686872044894014e-06, "loss": 0.5722, "step": 6058 }, { "epoch": 0.64, "grad_norm": 3.2015112152252043, "learning_rate": 3.0671153463743282e-06, "loss": 0.641, "step": 6059 }, { "epoch": 0.64, "grad_norm": 1.0191280750132719, "learning_rate": 3.0655437128162093e-06, "loss": 0.5936, "step": 6060 }, { "epoch": 0.64, "grad_norm": 6.045650809744414, "learning_rate": 3.0639723039976284e-06, "loss": 0.6325, "step": 6061 }, { "epoch": 0.64, "grad_norm": 2.2246240058605222, "learning_rate": 3.062401120101149e-06, "loss": 0.6262, "step": 6062 }, { "epoch": 0.64, "grad_norm": 3.146655954063514, "learning_rate": 3.060830161309305e-06, "loss": 0.699, "step": 6063 }, { "epoch": 0.64, "grad_norm": 2.0936615072845663, "learning_rate": 3.05925942780461e-06, "loss": 0.5818, "step": 6064 }, { "epoch": 0.64, "grad_norm": 4.660109768200281, "learning_rate": 3.0576889197695435e-06, "loss": 0.5617, "step": 6065 }, { "epoch": 0.64, "grad_norm": 2.2782490312138446, "learning_rate": 3.0561186373865625e-06, "loss": 0.5962, "step": 6066 }, { "epoch": 0.64, "grad_norm": 2.5339115449409535, "learning_rate": 3.054548580838099e-06, "loss": 0.6213, "step": 6067 }, { "epoch": 0.64, "grad_norm": 2.145966200390791, "learning_rate": 3.05297875030656e-06, "loss": 0.5802, "step": 6068 }, { "epoch": 0.64, "grad_norm": 3.023290228663878, "learning_rate": 3.05140914597432e-06, "loss": 0.6221, "step": 6069 }, { "epoch": 0.64, "grad_norm": 2.7573240624259157, "learning_rate": 3.049839768023732e-06, "loss": 0.5638, "step": 6070 }, { "epoch": 0.64, "grad_norm": 10.360362698449237, "learning_rate": 3.0482706166371236e-06, "loss": 0.6423, "step": 6071 }, { "epoch": 0.64, "grad_norm": 2.7058889588734343, "learning_rate": 3.0467016919967908e-06, "loss": 0.6319, "step": 6072 }, { "epoch": 0.64, "grad_norm": 2.38161674800116, "learning_rate": 3.0451329942850117e-06, "loss": 0.6777, "step": 6073 }, { "epoch": 0.64, "grad_norm": 2.4754236044435, "learning_rate": 3.0435645236840296e-06, "loss": 0.6553, "step": 6074 }, { "epoch": 0.64, "grad_norm": 2.505835959851159, "learning_rate": 3.041996280376066e-06, "loss": 0.5921, "step": 6075 }, { "epoch": 0.64, "grad_norm": 2.2325223472969085, "learning_rate": 3.0404282645433125e-06, "loss": 0.6708, "step": 6076 }, { "epoch": 0.64, "grad_norm": 2.4229637130342065, "learning_rate": 3.038860476367942e-06, "loss": 0.5817, "step": 6077 }, { "epoch": 0.64, "grad_norm": 2.2424616876949837, "learning_rate": 3.03729291603209e-06, "loss": 0.6665, "step": 6078 }, { "epoch": 0.64, "grad_norm": 2.196951713754301, "learning_rate": 3.0357255837178733e-06, "loss": 0.6736, "step": 6079 }, { "epoch": 0.64, "grad_norm": 3.412747433968496, "learning_rate": 3.034158479607381e-06, "loss": 0.6189, "step": 6080 }, { "epoch": 0.64, "grad_norm": 2.3144405798445638, "learning_rate": 3.032591603882674e-06, "loss": 0.6844, "step": 6081 }, { "epoch": 0.64, "grad_norm": 2.7027103937151957, "learning_rate": 3.031024956725787e-06, "loss": 0.6343, "step": 6082 }, { "epoch": 0.64, "grad_norm": 2.612524722737757, "learning_rate": 3.029458538318728e-06, "loss": 0.6779, "step": 6083 }, { "epoch": 0.64, "grad_norm": 2.5457464342733767, "learning_rate": 3.02789234884348e-06, "loss": 0.6377, "step": 6084 }, { "epoch": 0.64, "grad_norm": 1.9720571167689611, "learning_rate": 3.0263263884819975e-06, "loss": 0.6417, "step": 6085 }, { "epoch": 0.64, "grad_norm": 2.847776105778556, "learning_rate": 3.0247606574162127e-06, "loss": 0.6391, "step": 6086 }, { "epoch": 0.64, "grad_norm": 2.33929101232662, "learning_rate": 3.0231951558280226e-06, "loss": 0.6156, "step": 6087 }, { "epoch": 0.64, "grad_norm": 2.5013360523106787, "learning_rate": 3.0216298838993043e-06, "loss": 0.7201, "step": 6088 }, { "epoch": 0.64, "grad_norm": 3.089321590479491, "learning_rate": 3.020064841811908e-06, "loss": 0.6186, "step": 6089 }, { "epoch": 0.64, "grad_norm": 2.7046583661780823, "learning_rate": 3.018500029747657e-06, "loss": 0.6985, "step": 6090 }, { "epoch": 0.64, "grad_norm": 2.5221299383717657, "learning_rate": 3.016935447888343e-06, "loss": 0.6104, "step": 6091 }, { "epoch": 0.64, "grad_norm": 2.1978338732670597, "learning_rate": 3.015371096415735e-06, "loss": 0.5294, "step": 6092 }, { "epoch": 0.64, "grad_norm": 2.685524749270053, "learning_rate": 3.0138069755115772e-06, "loss": 0.672, "step": 6093 }, { "epoch": 0.64, "grad_norm": 2.1195488615414546, "learning_rate": 3.012243085357582e-06, "loss": 0.6829, "step": 6094 }, { "epoch": 0.64, "grad_norm": 2.577287209734468, "learning_rate": 3.010679426135442e-06, "loss": 0.7072, "step": 6095 }, { "epoch": 0.64, "grad_norm": 6.13324035955566, "learning_rate": 3.009115998026815e-06, "loss": 0.5589, "step": 6096 }, { "epoch": 0.64, "grad_norm": 2.274593312395244, "learning_rate": 3.007552801213335e-06, "loss": 0.6797, "step": 6097 }, { "epoch": 0.64, "grad_norm": 0.8495761189029405, "learning_rate": 3.0059898358766102e-06, "loss": 0.5548, "step": 6098 }, { "epoch": 0.64, "grad_norm": 2.263736591389369, "learning_rate": 3.004427102198225e-06, "loss": 0.6734, "step": 6099 }, { "epoch": 0.64, "grad_norm": 2.428586454310511, "learning_rate": 3.002864600359729e-06, "loss": 0.6665, "step": 6100 }, { "epoch": 0.64, "grad_norm": 2.6142648776527855, "learning_rate": 3.0013023305426493e-06, "loss": 0.665, "step": 6101 }, { "epoch": 0.64, "grad_norm": 4.093099723594316, "learning_rate": 2.9997402929284886e-06, "loss": 0.5937, "step": 6102 }, { "epoch": 0.64, "grad_norm": 0.981769329689394, "learning_rate": 2.9981784876987195e-06, "loss": 0.5496, "step": 6103 }, { "epoch": 0.64, "grad_norm": 2.089324365056355, "learning_rate": 2.996616915034786e-06, "loss": 0.582, "step": 6104 }, { "epoch": 0.64, "grad_norm": 3.283444978516847, "learning_rate": 2.9950555751181067e-06, "loss": 0.6011, "step": 6105 }, { "epoch": 0.64, "grad_norm": 2.1322926764233148, "learning_rate": 2.9934944681300764e-06, "loss": 0.595, "step": 6106 }, { "epoch": 0.64, "grad_norm": 2.6785042803171994, "learning_rate": 2.9919335942520577e-06, "loss": 0.604, "step": 6107 }, { "epoch": 0.64, "grad_norm": 2.525026434174256, "learning_rate": 2.9903729536653908e-06, "loss": 0.5419, "step": 6108 }, { "epoch": 0.64, "grad_norm": 4.332421592393996, "learning_rate": 2.9888125465513838e-06, "loss": 0.6079, "step": 6109 }, { "epoch": 0.64, "grad_norm": 2.7390363662038246, "learning_rate": 2.987252373091322e-06, "loss": 0.5825, "step": 6110 }, { "epoch": 0.64, "grad_norm": 2.6929057930774722, "learning_rate": 2.9856924334664607e-06, "loss": 0.5882, "step": 6111 }, { "epoch": 0.64, "grad_norm": 2.8780256095081334, "learning_rate": 2.9841327278580306e-06, "loss": 0.5991, "step": 6112 }, { "epoch": 0.64, "grad_norm": 3.211030034668102, "learning_rate": 2.982573256447232e-06, "loss": 0.6217, "step": 6113 }, { "epoch": 0.64, "grad_norm": 2.076849408004536, "learning_rate": 2.98101401941524e-06, "loss": 0.6127, "step": 6114 }, { "epoch": 0.64, "grad_norm": 2.8399499017577328, "learning_rate": 2.979455016943204e-06, "loss": 0.6985, "step": 6115 }, { "epoch": 0.64, "grad_norm": 2.5380298634074285, "learning_rate": 2.977896249212244e-06, "loss": 0.6568, "step": 6116 }, { "epoch": 0.64, "grad_norm": 2.1139041232082096, "learning_rate": 2.976337716403452e-06, "loss": 0.6043, "step": 6117 }, { "epoch": 0.64, "grad_norm": 2.386270978789105, "learning_rate": 2.974779418697893e-06, "loss": 0.58, "step": 6118 }, { "epoch": 0.64, "grad_norm": 2.249981645922609, "learning_rate": 2.9732213562766076e-06, "loss": 0.7059, "step": 6119 }, { "epoch": 0.64, "grad_norm": 2.7953475276968995, "learning_rate": 2.9716635293206054e-06, "loss": 0.5731, "step": 6120 }, { "epoch": 0.64, "grad_norm": 2.1595949678722386, "learning_rate": 2.9701059380108732e-06, "loss": 0.6399, "step": 6121 }, { "epoch": 0.64, "grad_norm": 4.250031089313065, "learning_rate": 2.9685485825283646e-06, "loss": 0.6693, "step": 6122 }, { "epoch": 0.64, "grad_norm": 2.716682335751081, "learning_rate": 2.9669914630540074e-06, "loss": 0.5684, "step": 6123 }, { "epoch": 0.64, "grad_norm": 2.11893087796511, "learning_rate": 2.9654345797687067e-06, "loss": 0.6003, "step": 6124 }, { "epoch": 0.64, "grad_norm": 3.1494558741601413, "learning_rate": 2.9638779328533363e-06, "loss": 0.5924, "step": 6125 }, { "epoch": 0.64, "grad_norm": 2.6969375293675673, "learning_rate": 2.9623215224887405e-06, "loss": 0.6277, "step": 6126 }, { "epoch": 0.64, "grad_norm": 5.129836907789918, "learning_rate": 2.9607653488557385e-06, "loss": 0.6265, "step": 6127 }, { "epoch": 0.64, "grad_norm": 2.24499842881353, "learning_rate": 2.9592094121351257e-06, "loss": 0.5141, "step": 6128 }, { "epoch": 0.64, "grad_norm": 1.1727226381138738, "learning_rate": 2.9576537125076644e-06, "loss": 0.5722, "step": 6129 }, { "epoch": 0.65, "grad_norm": 2.4525235655286766, "learning_rate": 2.956098250154089e-06, "loss": 0.628, "step": 6130 }, { "epoch": 0.65, "grad_norm": 3.0032073192917212, "learning_rate": 2.954543025255111e-06, "loss": 0.6489, "step": 6131 }, { "epoch": 0.65, "grad_norm": 2.2967514557097366, "learning_rate": 2.9529880379914123e-06, "loss": 0.6079, "step": 6132 }, { "epoch": 0.65, "grad_norm": 3.9813396803758367, "learning_rate": 2.9514332885436447e-06, "loss": 0.6043, "step": 6133 }, { "epoch": 0.65, "grad_norm": 2.374827795762819, "learning_rate": 2.9498787770924375e-06, "loss": 0.6733, "step": 6134 }, { "epoch": 0.65, "grad_norm": 2.8305018905675694, "learning_rate": 2.9483245038183874e-06, "loss": 0.6773, "step": 6135 }, { "epoch": 0.65, "grad_norm": 2.729421853905016, "learning_rate": 2.946770468902064e-06, "loss": 0.6898, "step": 6136 }, { "epoch": 0.65, "grad_norm": 0.9984646956788515, "learning_rate": 2.945216672524014e-06, "loss": 0.6088, "step": 6137 }, { "epoch": 0.65, "grad_norm": 2.667763450610834, "learning_rate": 2.943663114864752e-06, "loss": 0.577, "step": 6138 }, { "epoch": 0.65, "grad_norm": 2.802310695654871, "learning_rate": 2.9421097961047633e-06, "loss": 0.5924, "step": 6139 }, { "epoch": 0.65, "grad_norm": 2.2866904253496174, "learning_rate": 2.9405567164245096e-06, "loss": 0.6054, "step": 6140 }, { "epoch": 0.65, "grad_norm": 3.2953941016991246, "learning_rate": 2.939003876004424e-06, "loss": 0.6667, "step": 6141 }, { "epoch": 0.65, "grad_norm": 4.230139296054894, "learning_rate": 2.9374512750249098e-06, "loss": 0.594, "step": 6142 }, { "epoch": 0.65, "grad_norm": 2.2675620169968975, "learning_rate": 2.935898913666345e-06, "loss": 0.6655, "step": 6143 }, { "epoch": 0.65, "grad_norm": 2.459430858178251, "learning_rate": 2.9343467921090774e-06, "loss": 0.655, "step": 6144 }, { "epoch": 0.65, "grad_norm": 2.267669278148576, "learning_rate": 2.9327949105334284e-06, "loss": 0.633, "step": 6145 }, { "epoch": 0.65, "grad_norm": 2.4328862273124567, "learning_rate": 2.93124326911969e-06, "loss": 0.6138, "step": 6146 }, { "epoch": 0.65, "grad_norm": 0.8997361338805473, "learning_rate": 2.9296918680481308e-06, "loss": 0.5905, "step": 6147 }, { "epoch": 0.65, "grad_norm": 2.7491447988759568, "learning_rate": 2.928140707498984e-06, "loss": 0.6503, "step": 6148 }, { "epoch": 0.65, "grad_norm": 3.6968575644202506, "learning_rate": 2.92658978765246e-06, "loss": 0.5916, "step": 6149 }, { "epoch": 0.65, "grad_norm": 3.5697760256627373, "learning_rate": 2.925039108688742e-06, "loss": 0.638, "step": 6150 }, { "epoch": 0.65, "grad_norm": 2.049482671759394, "learning_rate": 2.9234886707879827e-06, "loss": 0.6333, "step": 6151 }, { "epoch": 0.65, "grad_norm": 2.446291237259445, "learning_rate": 2.921938474130307e-06, "loss": 0.652, "step": 6152 }, { "epoch": 0.65, "grad_norm": 2.200019946516808, "learning_rate": 2.9203885188958103e-06, "loss": 0.5058, "step": 6153 }, { "epoch": 0.65, "grad_norm": 3.0764287200010685, "learning_rate": 2.9188388052645656e-06, "loss": 0.6562, "step": 6154 }, { "epoch": 0.65, "grad_norm": 3.3263889642902593, "learning_rate": 2.9172893334166108e-06, "loss": 0.5888, "step": 6155 }, { "epoch": 0.65, "grad_norm": 2.164471451455957, "learning_rate": 2.915740103531963e-06, "loss": 0.5748, "step": 6156 }, { "epoch": 0.65, "grad_norm": 3.0166454764249417, "learning_rate": 2.9141911157906032e-06, "loss": 0.6495, "step": 6157 }, { "epoch": 0.65, "grad_norm": 2.0796493091839743, "learning_rate": 2.9126423703724925e-06, "loss": 0.5941, "step": 6158 }, { "epoch": 0.65, "grad_norm": 2.472853403516785, "learning_rate": 2.911093867457555e-06, "loss": 0.6391, "step": 6159 }, { "epoch": 0.65, "grad_norm": 2.2590642055205663, "learning_rate": 2.9095456072256955e-06, "loss": 0.6039, "step": 6160 }, { "epoch": 0.65, "grad_norm": 2.1770319394327617, "learning_rate": 2.9079975898567823e-06, "loss": 0.6178, "step": 6161 }, { "epoch": 0.65, "grad_norm": 2.4623318761495, "learning_rate": 2.906449815530664e-06, "loss": 0.6449, "step": 6162 }, { "epoch": 0.65, "grad_norm": 2.4317346877522916, "learning_rate": 2.9049022844271517e-06, "loss": 0.688, "step": 6163 }, { "epoch": 0.65, "grad_norm": 2.427285093977721, "learning_rate": 2.9033549967260383e-06, "loss": 0.6544, "step": 6164 }, { "epoch": 0.65, "grad_norm": 2.4834448464193644, "learning_rate": 2.9018079526070786e-06, "loss": 0.5716, "step": 6165 }, { "epoch": 0.65, "grad_norm": 3.7395209019829614, "learning_rate": 2.900261152250007e-06, "loss": 0.542, "step": 6166 }, { "epoch": 0.65, "grad_norm": 3.0529654367037993, "learning_rate": 2.8987145958345235e-06, "loss": 0.5837, "step": 6167 }, { "epoch": 0.65, "grad_norm": 3.1062690132576405, "learning_rate": 2.8971682835403043e-06, "loss": 0.6021, "step": 6168 }, { "epoch": 0.65, "grad_norm": 2.8007780098866983, "learning_rate": 2.895622215546997e-06, "loss": 0.5921, "step": 6169 }, { "epoch": 0.65, "grad_norm": 3.200411944525784, "learning_rate": 2.8940763920342153e-06, "loss": 0.6037, "step": 6170 }, { "epoch": 0.65, "grad_norm": 4.858054420142283, "learning_rate": 2.892530813181553e-06, "loss": 0.6641, "step": 6171 }, { "epoch": 0.65, "grad_norm": 2.5625816954495666, "learning_rate": 2.8909854791685666e-06, "loss": 0.5688, "step": 6172 }, { "epoch": 0.65, "grad_norm": 2.715416045079819, "learning_rate": 2.889440390174793e-06, "loss": 0.5802, "step": 6173 }, { "epoch": 0.65, "grad_norm": 2.6641433870808893, "learning_rate": 2.887895546379732e-06, "loss": 0.5821, "step": 6174 }, { "epoch": 0.65, "grad_norm": 2.416515122562737, "learning_rate": 2.8863509479628626e-06, "loss": 0.6671, "step": 6175 }, { "epoch": 0.65, "grad_norm": 2.836952564545351, "learning_rate": 2.884806595103628e-06, "loss": 0.6201, "step": 6176 }, { "epoch": 0.65, "grad_norm": 2.588917907113869, "learning_rate": 2.8832624879814507e-06, "loss": 0.5886, "step": 6177 }, { "epoch": 0.65, "grad_norm": 6.571817028335935, "learning_rate": 2.8817186267757173e-06, "loss": 0.5718, "step": 6178 }, { "epoch": 0.65, "grad_norm": 2.812640707900028, "learning_rate": 2.88017501166579e-06, "loss": 0.6379, "step": 6179 }, { "epoch": 0.65, "grad_norm": 2.626920559291416, "learning_rate": 2.8786316428310046e-06, "loss": 0.5918, "step": 6180 }, { "epoch": 0.65, "grad_norm": 2.798186376430685, "learning_rate": 2.8770885204506603e-06, "loss": 0.6388, "step": 6181 }, { "epoch": 0.65, "grad_norm": 2.751589330763615, "learning_rate": 2.8755456447040362e-06, "loss": 0.573, "step": 6182 }, { "epoch": 0.65, "grad_norm": 2.28473507954531, "learning_rate": 2.874003015770377e-06, "loss": 0.6162, "step": 6183 }, { "epoch": 0.65, "grad_norm": 2.557119980309637, "learning_rate": 2.872460633828904e-06, "loss": 0.5865, "step": 6184 }, { "epoch": 0.65, "grad_norm": 3.1804128048003784, "learning_rate": 2.8709184990588012e-06, "loss": 0.6432, "step": 6185 }, { "epoch": 0.65, "grad_norm": 2.6384781468932905, "learning_rate": 2.869376611639236e-06, "loss": 0.6571, "step": 6186 }, { "epoch": 0.65, "grad_norm": 2.595212904243066, "learning_rate": 2.8678349717493343e-06, "loss": 0.6985, "step": 6187 }, { "epoch": 0.65, "grad_norm": 3.0950307332286857, "learning_rate": 2.8662935795682046e-06, "loss": 0.6029, "step": 6188 }, { "epoch": 0.65, "grad_norm": 3.345799472219859, "learning_rate": 2.864752435274916e-06, "loss": 0.6059, "step": 6189 }, { "epoch": 0.65, "grad_norm": 2.25918445844699, "learning_rate": 2.8632115390485176e-06, "loss": 0.676, "step": 6190 }, { "epoch": 0.65, "grad_norm": 2.322802206550409, "learning_rate": 2.8616708910680278e-06, "loss": 0.6211, "step": 6191 }, { "epoch": 0.65, "grad_norm": 2.523140689849376, "learning_rate": 2.8601304915124305e-06, "loss": 0.6485, "step": 6192 }, { "epoch": 0.65, "grad_norm": 3.013530763468245, "learning_rate": 2.85859034056069e-06, "loss": 0.6619, "step": 6193 }, { "epoch": 0.65, "grad_norm": 3.291612700420486, "learning_rate": 2.8570504383917296e-06, "loss": 0.5981, "step": 6194 }, { "epoch": 0.65, "grad_norm": 2.4140328386952463, "learning_rate": 2.8555107851844576e-06, "loss": 0.5461, "step": 6195 }, { "epoch": 0.65, "grad_norm": 2.687609928584908, "learning_rate": 2.8539713811177418e-06, "loss": 0.6283, "step": 6196 }, { "epoch": 0.65, "grad_norm": 2.3244100962522163, "learning_rate": 2.8524322263704297e-06, "loss": 0.5716, "step": 6197 }, { "epoch": 0.65, "grad_norm": 2.1973658455412997, "learning_rate": 2.8508933211213306e-06, "loss": 0.531, "step": 6198 }, { "epoch": 0.65, "grad_norm": 2.0935858472905258, "learning_rate": 2.8493546655492356e-06, "loss": 0.598, "step": 6199 }, { "epoch": 0.65, "grad_norm": 2.77092033346428, "learning_rate": 2.8478162598328963e-06, "loss": 0.552, "step": 6200 }, { "epoch": 0.65, "grad_norm": 2.624275390466366, "learning_rate": 2.8462781041510446e-06, "loss": 0.666, "step": 6201 }, { "epoch": 0.65, "grad_norm": 2.1540146591782405, "learning_rate": 2.8447401986823752e-06, "loss": 0.6687, "step": 6202 }, { "epoch": 0.65, "grad_norm": 2.263646788490095, "learning_rate": 2.8432025436055593e-06, "loss": 0.6402, "step": 6203 }, { "epoch": 0.65, "grad_norm": 2.537185698965246, "learning_rate": 2.841665139099239e-06, "loss": 0.6249, "step": 6204 }, { "epoch": 0.65, "grad_norm": 2.4385362216870554, "learning_rate": 2.8401279853420216e-06, "loss": 0.6824, "step": 6205 }, { "epoch": 0.65, "grad_norm": 1.1466991106289137, "learning_rate": 2.838591082512494e-06, "loss": 0.5709, "step": 6206 }, { "epoch": 0.65, "grad_norm": 2.076550784919779, "learning_rate": 2.837054430789204e-06, "loss": 0.6901, "step": 6207 }, { "epoch": 0.65, "grad_norm": 2.7605170118341413, "learning_rate": 2.8355180303506803e-06, "loss": 0.6498, "step": 6208 }, { "epoch": 0.65, "grad_norm": 2.224223567454739, "learning_rate": 2.8339818813754115e-06, "loss": 0.6497, "step": 6209 }, { "epoch": 0.65, "grad_norm": 2.348664612330819, "learning_rate": 2.8324459840418694e-06, "loss": 0.6267, "step": 6210 }, { "epoch": 0.65, "grad_norm": 2.9347639572736144, "learning_rate": 2.8309103385284853e-06, "loss": 0.6423, "step": 6211 }, { "epoch": 0.65, "grad_norm": 2.408795609146555, "learning_rate": 2.82937494501367e-06, "loss": 0.6295, "step": 6212 }, { "epoch": 0.65, "grad_norm": 3.8368693625535277, "learning_rate": 2.8278398036757963e-06, "loss": 0.5594, "step": 6213 }, { "epoch": 0.65, "grad_norm": 2.51892710402293, "learning_rate": 2.8263049146932153e-06, "loss": 0.5737, "step": 6214 }, { "epoch": 0.65, "grad_norm": 3.302046825233197, "learning_rate": 2.8247702782442483e-06, "loss": 0.6769, "step": 6215 }, { "epoch": 0.65, "grad_norm": 3.85067453672738, "learning_rate": 2.8232358945071804e-06, "loss": 0.5899, "step": 6216 }, { "epoch": 0.65, "grad_norm": 2.9563736592529963, "learning_rate": 2.821701763660275e-06, "loss": 0.6199, "step": 6217 }, { "epoch": 0.65, "grad_norm": 2.8979485285850846, "learning_rate": 2.8201678858817604e-06, "loss": 0.5747, "step": 6218 }, { "epoch": 0.65, "grad_norm": 6.242630295137544, "learning_rate": 2.818634261349842e-06, "loss": 0.5828, "step": 6219 }, { "epoch": 0.65, "grad_norm": 2.8275662714905065, "learning_rate": 2.8171008902426865e-06, "loss": 0.702, "step": 6220 }, { "epoch": 0.65, "grad_norm": 2.2586239782876385, "learning_rate": 2.8155677727384422e-06, "loss": 0.5648, "step": 6221 }, { "epoch": 0.65, "grad_norm": 3.5846393384463773, "learning_rate": 2.814034909015217e-06, "loss": 0.6001, "step": 6222 }, { "epoch": 0.65, "grad_norm": 2.736699486370083, "learning_rate": 2.8125022992510997e-06, "loss": 0.5504, "step": 6223 }, { "epoch": 0.65, "grad_norm": 2.982453703063961, "learning_rate": 2.810969943624139e-06, "loss": 0.6271, "step": 6224 }, { "epoch": 0.66, "grad_norm": 1.0378158485658642, "learning_rate": 2.8094378423123646e-06, "loss": 0.5252, "step": 6225 }, { "epoch": 0.66, "grad_norm": 2.368853983880845, "learning_rate": 2.807905995493768e-06, "loss": 0.5657, "step": 6226 }, { "epoch": 0.66, "grad_norm": 2.6453989305193906, "learning_rate": 2.8063744033463157e-06, "loss": 0.6209, "step": 6227 }, { "epoch": 0.66, "grad_norm": 3.5318565272857363, "learning_rate": 2.8048430660479463e-06, "loss": 0.5866, "step": 6228 }, { "epoch": 0.66, "grad_norm": 3.8249719217174523, "learning_rate": 2.803311983776562e-06, "loss": 0.6002, "step": 6229 }, { "epoch": 0.66, "grad_norm": 2.4969861662636537, "learning_rate": 2.8017811567100434e-06, "loss": 0.6462, "step": 6230 }, { "epoch": 0.66, "grad_norm": 2.3785984897135353, "learning_rate": 2.8002505850262334e-06, "loss": 0.5927, "step": 6231 }, { "epoch": 0.66, "grad_norm": 2.141017713761464, "learning_rate": 2.7987202689029535e-06, "loss": 0.6416, "step": 6232 }, { "epoch": 0.66, "grad_norm": 2.9090874878260973, "learning_rate": 2.797190208517988e-06, "loss": 0.6156, "step": 6233 }, { "epoch": 0.66, "grad_norm": 2.0212371235951685, "learning_rate": 2.795660404049098e-06, "loss": 0.6247, "step": 6234 }, { "epoch": 0.66, "grad_norm": 2.2096556030865004, "learning_rate": 2.794130855674009e-06, "loss": 0.6012, "step": 6235 }, { "epoch": 0.66, "grad_norm": 2.2361232833661924, "learning_rate": 2.7926015635704216e-06, "loss": 0.6237, "step": 6236 }, { "epoch": 0.66, "grad_norm": 2.605730880983019, "learning_rate": 2.7910725279160016e-06, "loss": 0.6243, "step": 6237 }, { "epoch": 0.66, "grad_norm": 2.33390242289277, "learning_rate": 2.78954374888839e-06, "loss": 0.6674, "step": 6238 }, { "epoch": 0.66, "grad_norm": 3.329689514740396, "learning_rate": 2.7880152266651985e-06, "loss": 0.6576, "step": 6239 }, { "epoch": 0.66, "grad_norm": 4.400052397965454, "learning_rate": 2.7864869614240013e-06, "loss": 0.5916, "step": 6240 }, { "epoch": 0.66, "grad_norm": 2.437094841587654, "learning_rate": 2.7849589533423526e-06, "loss": 0.6769, "step": 6241 }, { "epoch": 0.66, "grad_norm": 2.158441047716093, "learning_rate": 2.783431202597767e-06, "loss": 0.5817, "step": 6242 }, { "epoch": 0.66, "grad_norm": 3.202778092080676, "learning_rate": 2.78190370936774e-06, "loss": 0.6905, "step": 6243 }, { "epoch": 0.66, "grad_norm": 2.181636718933403, "learning_rate": 2.7803764738297257e-06, "loss": 0.6753, "step": 6244 }, { "epoch": 0.66, "grad_norm": 2.169908589296973, "learning_rate": 2.7788494961611577e-06, "loss": 0.6062, "step": 6245 }, { "epoch": 0.66, "grad_norm": 2.1889402639310336, "learning_rate": 2.7773227765394335e-06, "loss": 0.6787, "step": 6246 }, { "epoch": 0.66, "grad_norm": 2.6886102911065244, "learning_rate": 2.7757963151419255e-06, "loss": 0.6211, "step": 6247 }, { "epoch": 0.66, "grad_norm": 3.620084649426985, "learning_rate": 2.7742701121459703e-06, "loss": 0.5493, "step": 6248 }, { "epoch": 0.66, "grad_norm": 2.0808691207938845, "learning_rate": 2.772744167728879e-06, "loss": 0.6245, "step": 6249 }, { "epoch": 0.66, "grad_norm": 0.94937587598107, "learning_rate": 2.7712184820679343e-06, "loss": 0.5034, "step": 6250 }, { "epoch": 0.66, "grad_norm": 2.750670337900543, "learning_rate": 2.7696930553403817e-06, "loss": 0.6075, "step": 6251 }, { "epoch": 0.66, "grad_norm": 1.8167968242924053, "learning_rate": 2.7681678877234446e-06, "loss": 0.567, "step": 6252 }, { "epoch": 0.66, "grad_norm": 2.3048720790451163, "learning_rate": 2.7666429793943087e-06, "loss": 0.6424, "step": 6253 }, { "epoch": 0.66, "grad_norm": 2.978504924873048, "learning_rate": 2.765118330530138e-06, "loss": 0.6175, "step": 6254 }, { "epoch": 0.66, "grad_norm": 2.9075301291638174, "learning_rate": 2.763593941308057e-06, "loss": 0.6043, "step": 6255 }, { "epoch": 0.66, "grad_norm": 3.3429392341032482, "learning_rate": 2.7620698119051687e-06, "loss": 0.6713, "step": 6256 }, { "epoch": 0.66, "grad_norm": 2.077361667669363, "learning_rate": 2.7605459424985387e-06, "loss": 0.7026, "step": 6257 }, { "epoch": 0.66, "grad_norm": 2.5927187596499923, "learning_rate": 2.7590223332652096e-06, "loss": 0.6464, "step": 6258 }, { "epoch": 0.66, "grad_norm": 2.588598586482582, "learning_rate": 2.7574989843821855e-06, "loss": 0.7161, "step": 6259 }, { "epoch": 0.66, "grad_norm": 2.9831247521107374, "learning_rate": 2.7559758960264492e-06, "loss": 0.6043, "step": 6260 }, { "epoch": 0.66, "grad_norm": 13.839156626103529, "learning_rate": 2.7544530683749447e-06, "loss": 0.6524, "step": 6261 }, { "epoch": 0.66, "grad_norm": 2.302955982405863, "learning_rate": 2.7529305016045917e-06, "loss": 0.6315, "step": 6262 }, { "epoch": 0.66, "grad_norm": 4.745223080871031, "learning_rate": 2.751408195892279e-06, "loss": 0.6543, "step": 6263 }, { "epoch": 0.66, "grad_norm": 3.2451920481026892, "learning_rate": 2.74988615141486e-06, "loss": 0.5986, "step": 6264 }, { "epoch": 0.66, "grad_norm": 3.1406872121844263, "learning_rate": 2.7483643683491658e-06, "loss": 0.5726, "step": 6265 }, { "epoch": 0.66, "grad_norm": 3.484418230228393, "learning_rate": 2.7468428468719877e-06, "loss": 0.7162, "step": 6266 }, { "epoch": 0.66, "grad_norm": 2.329591736950578, "learning_rate": 2.7453215871600967e-06, "loss": 0.5947, "step": 6267 }, { "epoch": 0.66, "grad_norm": 2.4045111597868307, "learning_rate": 2.743800589390225e-06, "loss": 0.6726, "step": 6268 }, { "epoch": 0.66, "grad_norm": 2.3627978273372885, "learning_rate": 2.74227985373908e-06, "loss": 0.5535, "step": 6269 }, { "epoch": 0.66, "grad_norm": 3.6457501096909266, "learning_rate": 2.7407593803833333e-06, "loss": 0.6602, "step": 6270 }, { "epoch": 0.66, "grad_norm": 2.926138821288341, "learning_rate": 2.7392391694996335e-06, "loss": 0.7381, "step": 6271 }, { "epoch": 0.66, "grad_norm": 1.1259023466885845, "learning_rate": 2.7377192212645888e-06, "loss": 0.5446, "step": 6272 }, { "epoch": 0.66, "grad_norm": 2.231315217630402, "learning_rate": 2.736199535854788e-06, "loss": 0.566, "step": 6273 }, { "epoch": 0.66, "grad_norm": 2.7856102084315983, "learning_rate": 2.7346801134467794e-06, "loss": 0.606, "step": 6274 }, { "epoch": 0.66, "grad_norm": 2.6026849229772218, "learning_rate": 2.733160954217086e-06, "loss": 0.6111, "step": 6275 }, { "epoch": 0.66, "grad_norm": 2.5134836440676724, "learning_rate": 2.731642058342203e-06, "loss": 0.5827, "step": 6276 }, { "epoch": 0.66, "grad_norm": 2.2996578580915323, "learning_rate": 2.7301234259985863e-06, "loss": 0.5993, "step": 6277 }, { "epoch": 0.66, "grad_norm": 2.937914077737516, "learning_rate": 2.72860505736267e-06, "loss": 0.6798, "step": 6278 }, { "epoch": 0.66, "grad_norm": 2.9983367460524004, "learning_rate": 2.7270869526108507e-06, "loss": 0.6152, "step": 6279 }, { "epoch": 0.66, "grad_norm": 2.6496495044995605, "learning_rate": 2.7255691119195005e-06, "loss": 0.6753, "step": 6280 }, { "epoch": 0.66, "grad_norm": 2.138925305786583, "learning_rate": 2.7240515354649545e-06, "loss": 0.6051, "step": 6281 }, { "epoch": 0.66, "grad_norm": 2.490293141221245, "learning_rate": 2.722534223423524e-06, "loss": 0.6335, "step": 6282 }, { "epoch": 0.66, "grad_norm": 2.322910264522336, "learning_rate": 2.721017175971482e-06, "loss": 0.6405, "step": 6283 }, { "epoch": 0.66, "grad_norm": 2.643925276634918, "learning_rate": 2.719500393285076e-06, "loss": 0.6404, "step": 6284 }, { "epoch": 0.66, "grad_norm": 2.0080720283021827, "learning_rate": 2.7179838755405253e-06, "loss": 0.5701, "step": 6285 }, { "epoch": 0.66, "grad_norm": 2.5525745426515476, "learning_rate": 2.7164676229140098e-06, "loss": 0.6459, "step": 6286 }, { "epoch": 0.66, "grad_norm": 2.512640406594001, "learning_rate": 2.714951635581684e-06, "loss": 0.6733, "step": 6287 }, { "epoch": 0.66, "grad_norm": 0.9599284170567002, "learning_rate": 2.713435913719671e-06, "loss": 0.5056, "step": 6288 }, { "epoch": 0.66, "grad_norm": 2.2612027984032435, "learning_rate": 2.7119204575040666e-06, "loss": 0.6296, "step": 6289 }, { "epoch": 0.66, "grad_norm": 2.856455315447502, "learning_rate": 2.7104052671109267e-06, "loss": 0.6801, "step": 6290 }, { "epoch": 0.66, "grad_norm": 2.527783580967457, "learning_rate": 2.708890342716286e-06, "loss": 0.5643, "step": 6291 }, { "epoch": 0.66, "grad_norm": 3.314451080373654, "learning_rate": 2.7073756844961407e-06, "loss": 0.6189, "step": 6292 }, { "epoch": 0.66, "grad_norm": 2.232485359666232, "learning_rate": 2.7058612926264634e-06, "loss": 0.6106, "step": 6293 }, { "epoch": 0.66, "grad_norm": 1.9749775537277983, "learning_rate": 2.7043471672831866e-06, "loss": 0.597, "step": 6294 }, { "epoch": 0.66, "grad_norm": 2.570926833062, "learning_rate": 2.7028333086422232e-06, "loss": 0.5325, "step": 6295 }, { "epoch": 0.66, "grad_norm": 2.58096398136124, "learning_rate": 2.7013197168794424e-06, "loss": 0.6335, "step": 6296 }, { "epoch": 0.66, "grad_norm": 2.1566887676933293, "learning_rate": 2.699806392170693e-06, "loss": 0.6744, "step": 6297 }, { "epoch": 0.66, "grad_norm": 2.2365763878000284, "learning_rate": 2.698293334691789e-06, "loss": 0.6178, "step": 6298 }, { "epoch": 0.66, "grad_norm": 5.206253358431679, "learning_rate": 2.69678054461851e-06, "loss": 0.6438, "step": 6299 }, { "epoch": 0.66, "grad_norm": 2.4308841962891514, "learning_rate": 2.6952680221266116e-06, "loss": 0.6907, "step": 6300 }, { "epoch": 0.66, "grad_norm": 2.1155846785270302, "learning_rate": 2.6937557673918096e-06, "loss": 0.5513, "step": 6301 }, { "epoch": 0.66, "grad_norm": 2.102074075556992, "learning_rate": 2.692243780589798e-06, "loss": 0.6146, "step": 6302 }, { "epoch": 0.66, "grad_norm": 5.119358235355663, "learning_rate": 2.6907320618962312e-06, "loss": 0.6069, "step": 6303 }, { "epoch": 0.66, "grad_norm": 2.52908697678268, "learning_rate": 2.6892206114867402e-06, "loss": 0.5663, "step": 6304 }, { "epoch": 0.66, "grad_norm": 2.6044059425378716, "learning_rate": 2.6877094295369167e-06, "loss": 0.5864, "step": 6305 }, { "epoch": 0.66, "grad_norm": 2.3129237260574933, "learning_rate": 2.686198516222329e-06, "loss": 0.6366, "step": 6306 }, { "epoch": 0.66, "grad_norm": 2.699401934868816, "learning_rate": 2.6846878717185076e-06, "loss": 0.5805, "step": 6307 }, { "epoch": 0.66, "grad_norm": 2.5503693982830806, "learning_rate": 2.6831774962009582e-06, "loss": 0.5873, "step": 6308 }, { "epoch": 0.66, "grad_norm": 2.3528557272139152, "learning_rate": 2.6816673898451486e-06, "loss": 0.6644, "step": 6309 }, { "epoch": 0.66, "grad_norm": 2.178316075395149, "learning_rate": 2.680157552826519e-06, "loss": 0.5794, "step": 6310 }, { "epoch": 0.66, "grad_norm": 2.640485918878129, "learning_rate": 2.6786479853204817e-06, "loss": 0.7067, "step": 6311 }, { "epoch": 0.66, "grad_norm": 2.296387374049739, "learning_rate": 2.6771386875024087e-06, "loss": 0.6266, "step": 6312 }, { "epoch": 0.66, "grad_norm": 2.1896654863553193, "learning_rate": 2.6756296595476504e-06, "loss": 0.5763, "step": 6313 }, { "epoch": 0.66, "grad_norm": 2.110961387612748, "learning_rate": 2.674120901631517e-06, "loss": 0.5546, "step": 6314 }, { "epoch": 0.66, "grad_norm": 3.028470459580835, "learning_rate": 2.6726124139292964e-06, "loss": 0.59, "step": 6315 }, { "epoch": 0.66, "grad_norm": 2.3573162744903935, "learning_rate": 2.6711041966162356e-06, "loss": 0.638, "step": 6316 }, { "epoch": 0.66, "grad_norm": 2.738834086837961, "learning_rate": 2.6695962498675588e-06, "loss": 0.6098, "step": 6317 }, { "epoch": 0.66, "grad_norm": 2.3705691571110727, "learning_rate": 2.6680885738584512e-06, "loss": 0.6687, "step": 6318 }, { "epoch": 0.66, "grad_norm": 5.548261604744699, "learning_rate": 2.6665811687640723e-06, "loss": 0.6152, "step": 6319 }, { "epoch": 0.67, "grad_norm": 2.3601953378527485, "learning_rate": 2.66507403475955e-06, "loss": 0.5781, "step": 6320 }, { "epoch": 0.67, "grad_norm": 2.7282332847963287, "learning_rate": 2.663567172019977e-06, "loss": 0.6588, "step": 6321 }, { "epoch": 0.67, "grad_norm": 3.647581964093641, "learning_rate": 2.6620605807204134e-06, "loss": 0.5947, "step": 6322 }, { "epoch": 0.67, "grad_norm": 2.55204790802501, "learning_rate": 2.660554261035894e-06, "loss": 0.6317, "step": 6323 }, { "epoch": 0.67, "grad_norm": 2.794476567030957, "learning_rate": 2.659048213141419e-06, "loss": 0.5403, "step": 6324 }, { "epoch": 0.67, "grad_norm": 2.5186567163194873, "learning_rate": 2.657542437211954e-06, "loss": 0.6475, "step": 6325 }, { "epoch": 0.67, "grad_norm": 2.715952090453819, "learning_rate": 2.6560369334224396e-06, "loss": 0.565, "step": 6326 }, { "epoch": 0.67, "grad_norm": 2.700955615524671, "learning_rate": 2.6545317019477764e-06, "loss": 0.6937, "step": 6327 }, { "epoch": 0.67, "grad_norm": 3.7950586784557565, "learning_rate": 2.653026742962842e-06, "loss": 0.6282, "step": 6328 }, { "epoch": 0.67, "grad_norm": 1.932271397052837, "learning_rate": 2.6515220566424735e-06, "loss": 0.5145, "step": 6329 }, { "epoch": 0.67, "grad_norm": 2.426433667015017, "learning_rate": 2.6500176431614866e-06, "loss": 0.6701, "step": 6330 }, { "epoch": 0.67, "grad_norm": 2.267463500168662, "learning_rate": 2.6485135026946545e-06, "loss": 0.62, "step": 6331 }, { "epoch": 0.67, "grad_norm": 2.4952915986161748, "learning_rate": 2.6470096354167264e-06, "loss": 0.6529, "step": 6332 }, { "epoch": 0.67, "grad_norm": 4.28610014397335, "learning_rate": 2.645506041502419e-06, "loss": 0.5839, "step": 6333 }, { "epoch": 0.67, "grad_norm": 2.8188123888897585, "learning_rate": 2.644002721126413e-06, "loss": 0.6255, "step": 6334 }, { "epoch": 0.67, "grad_norm": 3.831571559662532, "learning_rate": 2.642499674463359e-06, "loss": 0.536, "step": 6335 }, { "epoch": 0.67, "grad_norm": 2.58837692606335, "learning_rate": 2.640996901687878e-06, "loss": 0.6061, "step": 6336 }, { "epoch": 0.67, "grad_norm": 2.3253447670569956, "learning_rate": 2.6394944029745594e-06, "loss": 0.6083, "step": 6337 }, { "epoch": 0.67, "grad_norm": 3.1161500571551355, "learning_rate": 2.6379921784979556e-06, "loss": 0.6626, "step": 6338 }, { "epoch": 0.67, "grad_norm": 2.3957780596036167, "learning_rate": 2.6364902284325943e-06, "loss": 0.5869, "step": 6339 }, { "epoch": 0.67, "grad_norm": 3.14685635706374, "learning_rate": 2.634988552952965e-06, "loss": 0.6182, "step": 6340 }, { "epoch": 0.67, "grad_norm": 6.175501966326075, "learning_rate": 2.63348715223353e-06, "loss": 0.5976, "step": 6341 }, { "epoch": 0.67, "grad_norm": 3.2865872103188165, "learning_rate": 2.6319860264487156e-06, "loss": 0.6499, "step": 6342 }, { "epoch": 0.67, "grad_norm": 4.282251344333547, "learning_rate": 2.630485175772921e-06, "loss": 0.6299, "step": 6343 }, { "epoch": 0.67, "grad_norm": 2.959066483609487, "learning_rate": 2.6289846003805073e-06, "loss": 0.7163, "step": 6344 }, { "epoch": 0.67, "grad_norm": 2.6533855270391533, "learning_rate": 2.6274843004458083e-06, "loss": 0.6091, "step": 6345 }, { "epoch": 0.67, "grad_norm": 2.092500503413888, "learning_rate": 2.6259842761431275e-06, "loss": 0.6512, "step": 6346 }, { "epoch": 0.67, "grad_norm": 11.63033721500452, "learning_rate": 2.624484527646729e-06, "loss": 0.5766, "step": 6347 }, { "epoch": 0.67, "grad_norm": 2.344316205856234, "learning_rate": 2.6229850551308533e-06, "loss": 0.6893, "step": 6348 }, { "epoch": 0.67, "grad_norm": 2.7670480341648984, "learning_rate": 2.6214858587697e-06, "loss": 0.6076, "step": 6349 }, { "epoch": 0.67, "grad_norm": 2.7552634595866894, "learning_rate": 2.6199869387374465e-06, "loss": 0.626, "step": 6350 }, { "epoch": 0.67, "grad_norm": 2.087916955904863, "learning_rate": 2.6184882952082286e-06, "loss": 0.6331, "step": 6351 }, { "epoch": 0.67, "grad_norm": 2.2553742098034797, "learning_rate": 2.616989928356158e-06, "loss": 0.6029, "step": 6352 }, { "epoch": 0.67, "grad_norm": 2.4877624793792266, "learning_rate": 2.6154918383553075e-06, "loss": 0.5829, "step": 6353 }, { "epoch": 0.67, "grad_norm": 3.164663373664098, "learning_rate": 2.6139940253797237e-06, "loss": 0.5857, "step": 6354 }, { "epoch": 0.67, "grad_norm": 2.2967240401034346, "learning_rate": 2.6124964896034143e-06, "loss": 0.5886, "step": 6355 }, { "epoch": 0.67, "grad_norm": 4.099408237564359, "learning_rate": 2.610999231200364e-06, "loss": 0.6851, "step": 6356 }, { "epoch": 0.67, "grad_norm": 2.3689665901694243, "learning_rate": 2.6095022503445155e-06, "loss": 0.6386, "step": 6357 }, { "epoch": 0.67, "grad_norm": 2.2222788180650475, "learning_rate": 2.6080055472097844e-06, "loss": 0.638, "step": 6358 }, { "epoch": 0.67, "grad_norm": 2.4258550761659854, "learning_rate": 2.6065091219700568e-06, "loss": 0.5755, "step": 6359 }, { "epoch": 0.67, "grad_norm": 2.5822158507581823, "learning_rate": 2.605012974799178e-06, "loss": 0.6109, "step": 6360 }, { "epoch": 0.67, "grad_norm": 2.344258642918491, "learning_rate": 2.603517105870971e-06, "loss": 0.5318, "step": 6361 }, { "epoch": 0.67, "grad_norm": 3.661742078883693, "learning_rate": 2.602021515359218e-06, "loss": 0.5598, "step": 6362 }, { "epoch": 0.67, "grad_norm": 2.2545924179160295, "learning_rate": 2.600526203437674e-06, "loss": 0.5684, "step": 6363 }, { "epoch": 0.67, "grad_norm": 2.7470556591551687, "learning_rate": 2.5990311702800573e-06, "loss": 0.6562, "step": 6364 }, { "epoch": 0.67, "grad_norm": 2.130529569635912, "learning_rate": 2.597536416060062e-06, "loss": 0.6661, "step": 6365 }, { "epoch": 0.67, "grad_norm": 2.848914985108963, "learning_rate": 2.5960419409513386e-06, "loss": 0.4984, "step": 6366 }, { "epoch": 0.67, "grad_norm": 5.47331013855302, "learning_rate": 2.594547745127514e-06, "loss": 0.6535, "step": 6367 }, { "epoch": 0.67, "grad_norm": 2.1487059851875383, "learning_rate": 2.5930538287621797e-06, "loss": 0.6188, "step": 6368 }, { "epoch": 0.67, "grad_norm": 2.2329154737791517, "learning_rate": 2.591560192028894e-06, "loss": 0.5626, "step": 6369 }, { "epoch": 0.67, "grad_norm": 2.2654061918388013, "learning_rate": 2.5900668351011815e-06, "loss": 0.6215, "step": 6370 }, { "epoch": 0.67, "grad_norm": 2.256650550756588, "learning_rate": 2.588573758152538e-06, "loss": 0.631, "step": 6371 }, { "epoch": 0.67, "grad_norm": 2.001940811368443, "learning_rate": 2.5870809613564264e-06, "loss": 0.615, "step": 6372 }, { "epoch": 0.67, "grad_norm": 5.097482873027538, "learning_rate": 2.585588444886271e-06, "loss": 0.6168, "step": 6373 }, { "epoch": 0.67, "grad_norm": 2.401718053393286, "learning_rate": 2.584096208915473e-06, "loss": 0.6036, "step": 6374 }, { "epoch": 0.67, "grad_norm": 2.7170902456231816, "learning_rate": 2.5826042536173923e-06, "loss": 0.6435, "step": 6375 }, { "epoch": 0.67, "grad_norm": 2.0656142552031445, "learning_rate": 2.581112579165363e-06, "loss": 0.6798, "step": 6376 }, { "epoch": 0.67, "grad_norm": 2.885892388490017, "learning_rate": 2.5796211857326805e-06, "loss": 0.5807, "step": 6377 }, { "epoch": 0.67, "grad_norm": 2.357443586015753, "learning_rate": 2.578130073492613e-06, "loss": 0.5938, "step": 6378 }, { "epoch": 0.67, "grad_norm": 2.3457746446640377, "learning_rate": 2.576639242618391e-06, "loss": 0.6046, "step": 6379 }, { "epoch": 0.67, "grad_norm": 1.0130372996055184, "learning_rate": 2.575148693283217e-06, "loss": 0.5469, "step": 6380 }, { "epoch": 0.67, "grad_norm": 2.2471983613829685, "learning_rate": 2.5736584256602604e-06, "loss": 0.6914, "step": 6381 }, { "epoch": 0.67, "grad_norm": 2.8049842929766076, "learning_rate": 2.572168439922653e-06, "loss": 0.6207, "step": 6382 }, { "epoch": 0.67, "grad_norm": 2.5242050580331536, "learning_rate": 2.570678736243497e-06, "loss": 0.5769, "step": 6383 }, { "epoch": 0.67, "grad_norm": 2.7371668921754995, "learning_rate": 2.569189314795863e-06, "loss": 0.6394, "step": 6384 }, { "epoch": 0.67, "grad_norm": 2.541518894090096, "learning_rate": 2.56770017575279e-06, "loss": 0.5863, "step": 6385 }, { "epoch": 0.67, "grad_norm": 2.104105510648952, "learning_rate": 2.566211319287276e-06, "loss": 0.5289, "step": 6386 }, { "epoch": 0.67, "grad_norm": 2.5996518622471667, "learning_rate": 2.564722745572299e-06, "loss": 0.6135, "step": 6387 }, { "epoch": 0.67, "grad_norm": 2.2459414237155215, "learning_rate": 2.563234454780791e-06, "loss": 0.5344, "step": 6388 }, { "epoch": 0.67, "grad_norm": 2.6467790790692405, "learning_rate": 2.561746447085662e-06, "loss": 0.5519, "step": 6389 }, { "epoch": 0.67, "grad_norm": 2.9589463310028505, "learning_rate": 2.5602587226597813e-06, "loss": 0.61, "step": 6390 }, { "epoch": 0.67, "grad_norm": 2.81740329183419, "learning_rate": 2.5587712816759914e-06, "loss": 0.6797, "step": 6391 }, { "epoch": 0.67, "grad_norm": 2.4235942299105417, "learning_rate": 2.5572841243070944e-06, "loss": 0.6292, "step": 6392 }, { "epoch": 0.67, "grad_norm": 2.7257768033845178, "learning_rate": 2.5557972507258676e-06, "loss": 0.5951, "step": 6393 }, { "epoch": 0.67, "grad_norm": 2.9980654800137065, "learning_rate": 2.554310661105052e-06, "loss": 0.5879, "step": 6394 }, { "epoch": 0.67, "grad_norm": 2.45714129335474, "learning_rate": 2.5528243556173526e-06, "loss": 0.5924, "step": 6395 }, { "epoch": 0.67, "grad_norm": 2.524381334524273, "learning_rate": 2.551338334435447e-06, "loss": 0.5851, "step": 6396 }, { "epoch": 0.67, "grad_norm": 2.2895599260578168, "learning_rate": 2.549852597731973e-06, "loss": 0.6139, "step": 6397 }, { "epoch": 0.67, "grad_norm": 2.2734502037433173, "learning_rate": 2.5483671456795446e-06, "loss": 0.5641, "step": 6398 }, { "epoch": 0.67, "grad_norm": 4.10732024204948, "learning_rate": 2.5468819784507314e-06, "loss": 0.6338, "step": 6399 }, { "epoch": 0.67, "grad_norm": 2.831440417203488, "learning_rate": 2.545397096218081e-06, "loss": 0.632, "step": 6400 }, { "epoch": 0.67, "grad_norm": 2.2175477294639503, "learning_rate": 2.5439124991540986e-06, "loss": 0.5228, "step": 6401 }, { "epoch": 0.67, "grad_norm": 11.26293371493835, "learning_rate": 2.5424281874312616e-06, "loss": 0.638, "step": 6402 }, { "epoch": 0.67, "grad_norm": 3.070512413502359, "learning_rate": 2.5409441612220163e-06, "loss": 0.5331, "step": 6403 }, { "epoch": 0.67, "grad_norm": 2.43982362945709, "learning_rate": 2.53946042069877e-06, "loss": 0.682, "step": 6404 }, { "epoch": 0.67, "grad_norm": 2.5481585409694536, "learning_rate": 2.537976966033897e-06, "loss": 0.6913, "step": 6405 }, { "epoch": 0.67, "grad_norm": 2.780707259096246, "learning_rate": 2.5364937973997433e-06, "loss": 0.637, "step": 6406 }, { "epoch": 0.67, "grad_norm": 3.059638272022161, "learning_rate": 2.535010914968621e-06, "loss": 0.6197, "step": 6407 }, { "epoch": 0.67, "grad_norm": 4.198201640329154, "learning_rate": 2.533528318912803e-06, "loss": 0.544, "step": 6408 }, { "epoch": 0.67, "grad_norm": 2.3114583069479973, "learning_rate": 2.532046009404537e-06, "loss": 0.5643, "step": 6409 }, { "epoch": 0.67, "grad_norm": 4.0785166014115655, "learning_rate": 2.5305639866160293e-06, "loss": 0.6458, "step": 6410 }, { "epoch": 0.67, "grad_norm": 2.4149803287414717, "learning_rate": 2.529082250719461e-06, "loss": 0.7269, "step": 6411 }, { "epoch": 0.67, "grad_norm": 2.160446913236817, "learning_rate": 2.5276008018869722e-06, "loss": 0.5832, "step": 6412 }, { "epoch": 0.67, "grad_norm": 2.6820344561724, "learning_rate": 2.526119640290678e-06, "loss": 0.5942, "step": 6413 }, { "epoch": 0.67, "grad_norm": 2.559166061869424, "learning_rate": 2.5246387661026504e-06, "loss": 0.6239, "step": 6414 }, { "epoch": 0.68, "grad_norm": 2.35834972171533, "learning_rate": 2.5231581794949356e-06, "loss": 0.535, "step": 6415 }, { "epoch": 0.68, "grad_norm": 2.964214903325439, "learning_rate": 2.5216778806395448e-06, "loss": 0.6452, "step": 6416 }, { "epoch": 0.68, "grad_norm": 2.200465378179132, "learning_rate": 2.520197869708454e-06, "loss": 0.6412, "step": 6417 }, { "epoch": 0.68, "grad_norm": 2.5628414197927607, "learning_rate": 2.518718146873605e-06, "loss": 0.6189, "step": 6418 }, { "epoch": 0.68, "grad_norm": 2.3739952466153516, "learning_rate": 2.5172387123069085e-06, "loss": 0.5864, "step": 6419 }, { "epoch": 0.68, "grad_norm": 2.2088475058648496, "learning_rate": 2.5157595661802437e-06, "loss": 0.6126, "step": 6420 }, { "epoch": 0.68, "grad_norm": 2.8096193612985947, "learning_rate": 2.514280708665449e-06, "loss": 0.48, "step": 6421 }, { "epoch": 0.68, "grad_norm": 1.8758991836794494, "learning_rate": 2.5128021399343385e-06, "loss": 0.601, "step": 6422 }, { "epoch": 0.68, "grad_norm": 4.354943435805703, "learning_rate": 2.5113238601586833e-06, "loss": 0.5763, "step": 6423 }, { "epoch": 0.68, "grad_norm": 0.8991492339114561, "learning_rate": 2.50984586951023e-06, "loss": 0.571, "step": 6424 }, { "epoch": 0.68, "grad_norm": 2.988868169199592, "learning_rate": 2.508368168160683e-06, "loss": 0.6109, "step": 6425 }, { "epoch": 0.68, "grad_norm": 2.0181480966879133, "learning_rate": 2.5068907562817223e-06, "loss": 0.6475, "step": 6426 }, { "epoch": 0.68, "grad_norm": 2.607017873285922, "learning_rate": 2.505413634044984e-06, "loss": 0.6466, "step": 6427 }, { "epoch": 0.68, "grad_norm": 2.5325358752326745, "learning_rate": 2.5039368016220795e-06, "loss": 0.707, "step": 6428 }, { "epoch": 0.68, "grad_norm": 5.495898484741088, "learning_rate": 2.502460259184584e-06, "loss": 0.6143, "step": 6429 }, { "epoch": 0.68, "grad_norm": 2.27497515485443, "learning_rate": 2.500984006904035e-06, "loss": 0.6036, "step": 6430 }, { "epoch": 0.68, "grad_norm": 5.384554479372134, "learning_rate": 2.4995080449519383e-06, "loss": 0.632, "step": 6431 }, { "epoch": 0.68, "grad_norm": 2.136269004392376, "learning_rate": 2.498032373499769e-06, "loss": 0.5979, "step": 6432 }, { "epoch": 0.68, "grad_norm": 0.9591439151746047, "learning_rate": 2.496556992718968e-06, "loss": 0.5752, "step": 6433 }, { "epoch": 0.68, "grad_norm": 2.625755986560923, "learning_rate": 2.495081902780937e-06, "loss": 0.6251, "step": 6434 }, { "epoch": 0.68, "grad_norm": 2.6993322682560046, "learning_rate": 2.4936071038570514e-06, "loss": 0.6101, "step": 6435 }, { "epoch": 0.68, "grad_norm": 3.606064994507977, "learning_rate": 2.4921325961186455e-06, "loss": 0.6113, "step": 6436 }, { "epoch": 0.68, "grad_norm": 2.8817794595876305, "learning_rate": 2.490658379737025e-06, "loss": 0.6152, "step": 6437 }, { "epoch": 0.68, "grad_norm": 2.592817222457228, "learning_rate": 2.489184454883462e-06, "loss": 0.6595, "step": 6438 }, { "epoch": 0.68, "grad_norm": 3.112454316014022, "learning_rate": 2.4877108217291913e-06, "loss": 0.6833, "step": 6439 }, { "epoch": 0.68, "grad_norm": 2.089441060610151, "learning_rate": 2.4862374804454127e-06, "loss": 0.6656, "step": 6440 }, { "epoch": 0.68, "grad_norm": 2.1810446892787994, "learning_rate": 2.484764431203297e-06, "loss": 0.655, "step": 6441 }, { "epoch": 0.68, "grad_norm": 2.2182929541942453, "learning_rate": 2.483291674173981e-06, "loss": 0.6376, "step": 6442 }, { "epoch": 0.68, "grad_norm": 2.620256297362581, "learning_rate": 2.4818192095285615e-06, "loss": 0.6077, "step": 6443 }, { "epoch": 0.68, "grad_norm": 0.959409963615247, "learning_rate": 2.4803470374381084e-06, "loss": 0.5402, "step": 6444 }, { "epoch": 0.68, "grad_norm": 2.690295499658671, "learning_rate": 2.4788751580736516e-06, "loss": 0.637, "step": 6445 }, { "epoch": 0.68, "grad_norm": 2.2246955758634246, "learning_rate": 2.4774035716061924e-06, "loss": 0.6423, "step": 6446 }, { "epoch": 0.68, "grad_norm": 2.4753333122148784, "learning_rate": 2.4759322782066924e-06, "loss": 0.6151, "step": 6447 }, { "epoch": 0.68, "grad_norm": 2.3261687913389704, "learning_rate": 2.4744612780460863e-06, "loss": 0.6234, "step": 6448 }, { "epoch": 0.68, "grad_norm": 2.050091521356472, "learning_rate": 2.472990571295266e-06, "loss": 0.528, "step": 6449 }, { "epoch": 0.68, "grad_norm": 2.266129287841651, "learning_rate": 2.4715201581250962e-06, "loss": 0.6456, "step": 6450 }, { "epoch": 0.68, "grad_norm": 2.5040000923789902, "learning_rate": 2.4700500387064074e-06, "loss": 0.6231, "step": 6451 }, { "epoch": 0.68, "grad_norm": 2.405010632193548, "learning_rate": 2.4685802132099923e-06, "loss": 0.5844, "step": 6452 }, { "epoch": 0.68, "grad_norm": 2.6378566394399967, "learning_rate": 2.4671106818066076e-06, "loss": 0.6313, "step": 6453 }, { "epoch": 0.68, "grad_norm": 2.429331136222701, "learning_rate": 2.465641444666983e-06, "loss": 0.6776, "step": 6454 }, { "epoch": 0.68, "grad_norm": 3.3515232887180457, "learning_rate": 2.4641725019618107e-06, "loss": 0.5311, "step": 6455 }, { "epoch": 0.68, "grad_norm": 2.516974554330432, "learning_rate": 2.4627038538617447e-06, "loss": 0.5724, "step": 6456 }, { "epoch": 0.68, "grad_norm": 2.1215303565599735, "learning_rate": 2.461235500537412e-06, "loss": 0.5501, "step": 6457 }, { "epoch": 0.68, "grad_norm": 2.0665664683899, "learning_rate": 2.4597674421593985e-06, "loss": 0.6118, "step": 6458 }, { "epoch": 0.68, "grad_norm": 2.723504300822161, "learning_rate": 2.458299678898263e-06, "loss": 0.6764, "step": 6459 }, { "epoch": 0.68, "grad_norm": 2.4054502421315638, "learning_rate": 2.456832210924521e-06, "loss": 0.6773, "step": 6460 }, { "epoch": 0.68, "grad_norm": 2.408884797009261, "learning_rate": 2.455365038408663e-06, "loss": 0.6166, "step": 6461 }, { "epoch": 0.68, "grad_norm": 2.446288286832739, "learning_rate": 2.453898161521137e-06, "loss": 0.703, "step": 6462 }, { "epoch": 0.68, "grad_norm": 2.2957817402286795, "learning_rate": 2.4524315804323627e-06, "loss": 0.6984, "step": 6463 }, { "epoch": 0.68, "grad_norm": 2.401524156010693, "learning_rate": 2.4509652953127257e-06, "loss": 0.6117, "step": 6464 }, { "epoch": 0.68, "grad_norm": 3.328086050605523, "learning_rate": 2.4494993063325716e-06, "loss": 0.5302, "step": 6465 }, { "epoch": 0.68, "grad_norm": 2.358380771233323, "learning_rate": 2.4480336136622133e-06, "loss": 0.6143, "step": 6466 }, { "epoch": 0.68, "grad_norm": 2.6392348923503626, "learning_rate": 2.446568217471933e-06, "loss": 0.5916, "step": 6467 }, { "epoch": 0.68, "grad_norm": 2.7595861356010456, "learning_rate": 2.445103117931978e-06, "loss": 0.5833, "step": 6468 }, { "epoch": 0.68, "grad_norm": 2.1230177292364067, "learning_rate": 2.443638315212555e-06, "loss": 0.5654, "step": 6469 }, { "epoch": 0.68, "grad_norm": 2.8062776463140517, "learning_rate": 2.442173809483845e-06, "loss": 0.7762, "step": 6470 }, { "epoch": 0.68, "grad_norm": 2.716571799479343, "learning_rate": 2.440709600915986e-06, "loss": 0.6991, "step": 6471 }, { "epoch": 0.68, "grad_norm": 2.7930103373086728, "learning_rate": 2.4392456896790874e-06, "loss": 0.5541, "step": 6472 }, { "epoch": 0.68, "grad_norm": 3.224499151376592, "learning_rate": 2.437782075943224e-06, "loss": 0.6459, "step": 6473 }, { "epoch": 0.68, "grad_norm": 3.395183867623381, "learning_rate": 2.4363187598784323e-06, "loss": 0.6556, "step": 6474 }, { "epoch": 0.68, "grad_norm": 2.1451834322017946, "learning_rate": 2.4348557416547146e-06, "loss": 0.6845, "step": 6475 }, { "epoch": 0.68, "grad_norm": 2.6454970175141983, "learning_rate": 2.4333930214420414e-06, "loss": 0.6793, "step": 6476 }, { "epoch": 0.68, "grad_norm": 2.1614550552826097, "learning_rate": 2.43193059941035e-06, "loss": 0.6409, "step": 6477 }, { "epoch": 0.68, "grad_norm": 2.9560127452631026, "learning_rate": 2.4304684757295376e-06, "loss": 0.6685, "step": 6478 }, { "epoch": 0.68, "grad_norm": 2.385228890681212, "learning_rate": 2.429006650569468e-06, "loss": 0.6054, "step": 6479 }, { "epoch": 0.68, "grad_norm": 2.7023174068498594, "learning_rate": 2.4275451240999743e-06, "loss": 0.6734, "step": 6480 }, { "epoch": 0.68, "grad_norm": 2.8800257059035994, "learning_rate": 2.4260838964908534e-06, "loss": 0.6168, "step": 6481 }, { "epoch": 0.68, "grad_norm": 2.3173592174188915, "learning_rate": 2.424622967911863e-06, "loss": 0.6082, "step": 6482 }, { "epoch": 0.68, "grad_norm": 0.9950821121430008, "learning_rate": 2.4231623385327337e-06, "loss": 0.5374, "step": 6483 }, { "epoch": 0.68, "grad_norm": 2.9226988485243646, "learning_rate": 2.421702008523153e-06, "loss": 0.5921, "step": 6484 }, { "epoch": 0.68, "grad_norm": 2.2019190672941673, "learning_rate": 2.4202419780527796e-06, "loss": 0.6282, "step": 6485 }, { "epoch": 0.68, "grad_norm": 18.87914111722867, "learning_rate": 2.418782247291238e-06, "loss": 0.5984, "step": 6486 }, { "epoch": 0.68, "grad_norm": 0.9756191834087302, "learning_rate": 2.4173228164081135e-06, "loss": 0.5355, "step": 6487 }, { "epoch": 0.68, "grad_norm": 2.229047102096132, "learning_rate": 2.4158636855729563e-06, "loss": 0.6097, "step": 6488 }, { "epoch": 0.68, "grad_norm": 2.160615604668489, "learning_rate": 2.414404854955286e-06, "loss": 0.5794, "step": 6489 }, { "epoch": 0.68, "grad_norm": 2.5744631310901793, "learning_rate": 2.4129463247245877e-06, "loss": 0.6365, "step": 6490 }, { "epoch": 0.68, "grad_norm": 2.4664632502489976, "learning_rate": 2.411488095050305e-06, "loss": 0.6087, "step": 6491 }, { "epoch": 0.68, "grad_norm": 2.2462459640267336, "learning_rate": 2.410030166101855e-06, "loss": 0.6314, "step": 6492 }, { "epoch": 0.68, "grad_norm": 3.7690797181208953, "learning_rate": 2.4085725380486106e-06, "loss": 0.6211, "step": 6493 }, { "epoch": 0.68, "grad_norm": 2.3187693057872476, "learning_rate": 2.4071152110599204e-06, "loss": 0.5885, "step": 6494 }, { "epoch": 0.68, "grad_norm": 2.4839016053608605, "learning_rate": 2.4056581853050877e-06, "loss": 0.6435, "step": 6495 }, { "epoch": 0.68, "grad_norm": 3.5230696548262035, "learning_rate": 2.4042014609533894e-06, "loss": 0.5636, "step": 6496 }, { "epoch": 0.68, "grad_norm": 2.54975560910205, "learning_rate": 2.4027450381740598e-06, "loss": 0.6318, "step": 6497 }, { "epoch": 0.68, "grad_norm": 2.1179176495059173, "learning_rate": 2.4012889171363034e-06, "loss": 0.6436, "step": 6498 }, { "epoch": 0.68, "grad_norm": 2.219085902942063, "learning_rate": 2.3998330980092906e-06, "loss": 0.5653, "step": 6499 }, { "epoch": 0.68, "grad_norm": 2.4057964800651495, "learning_rate": 2.3983775809621525e-06, "loss": 0.6256, "step": 6500 }, { "epoch": 0.68, "grad_norm": 2.9344149719041113, "learning_rate": 2.3969223661639838e-06, "loss": 0.6745, "step": 6501 }, { "epoch": 0.68, "grad_norm": 3.7761690986099326, "learning_rate": 2.395467453783851e-06, "loss": 0.5358, "step": 6502 }, { "epoch": 0.68, "grad_norm": 2.3942022138130055, "learning_rate": 2.394012843990781e-06, "loss": 0.664, "step": 6503 }, { "epoch": 0.68, "grad_norm": 2.3279945476576263, "learning_rate": 2.3925585369537647e-06, "loss": 0.6307, "step": 6504 }, { "epoch": 0.68, "grad_norm": 4.007225583568133, "learning_rate": 2.391104532841762e-06, "loss": 0.6179, "step": 6505 }, { "epoch": 0.68, "grad_norm": 3.6218819183172104, "learning_rate": 2.389650831823691e-06, "loss": 0.6501, "step": 6506 }, { "epoch": 0.68, "grad_norm": 2.3080723378936723, "learning_rate": 2.388197434068441e-06, "loss": 0.6166, "step": 6507 }, { "epoch": 0.68, "grad_norm": 2.2881275417992613, "learning_rate": 2.3867443397448646e-06, "loss": 0.622, "step": 6508 }, { "epoch": 0.68, "grad_norm": 2.241683983187079, "learning_rate": 2.3852915490217772e-06, "loss": 0.6207, "step": 6509 }, { "epoch": 0.69, "grad_norm": 2.556095245479891, "learning_rate": 2.383839062067957e-06, "loss": 0.5998, "step": 6510 }, { "epoch": 0.69, "grad_norm": 1.9799784118007415, "learning_rate": 2.382386879052152e-06, "loss": 0.6074, "step": 6511 }, { "epoch": 0.69, "grad_norm": 2.3182312355329513, "learning_rate": 2.3809350001430743e-06, "loss": 0.5951, "step": 6512 }, { "epoch": 0.69, "grad_norm": 2.3694748756782027, "learning_rate": 2.3794834255093977e-06, "loss": 0.6304, "step": 6513 }, { "epoch": 0.69, "grad_norm": 2.2001457523740147, "learning_rate": 2.37803215531976e-06, "loss": 0.6331, "step": 6514 }, { "epoch": 0.69, "grad_norm": 2.3212863403370796, "learning_rate": 2.3765811897427667e-06, "loss": 0.53, "step": 6515 }, { "epoch": 0.69, "grad_norm": 3.160468095861783, "learning_rate": 2.375130528946989e-06, "loss": 0.7405, "step": 6516 }, { "epoch": 0.69, "grad_norm": 2.6855144317046697, "learning_rate": 2.373680173100957e-06, "loss": 0.5849, "step": 6517 }, { "epoch": 0.69, "grad_norm": 2.341892568924303, "learning_rate": 2.3722301223731724e-06, "loss": 0.5603, "step": 6518 }, { "epoch": 0.69, "grad_norm": 2.283085350966203, "learning_rate": 2.3707803769320943e-06, "loss": 0.6285, "step": 6519 }, { "epoch": 0.69, "grad_norm": 3.545450875140134, "learning_rate": 2.3693309369461514e-06, "loss": 0.643, "step": 6520 }, { "epoch": 0.69, "grad_norm": 2.34738226126757, "learning_rate": 2.367881802583738e-06, "loss": 0.6825, "step": 6521 }, { "epoch": 0.69, "grad_norm": 2.6575846961053333, "learning_rate": 2.366432974013208e-06, "loss": 0.5756, "step": 6522 }, { "epoch": 0.69, "grad_norm": 2.6040764810860146, "learning_rate": 2.36498445140288e-06, "loss": 0.6637, "step": 6523 }, { "epoch": 0.69, "grad_norm": 2.575034774458675, "learning_rate": 2.3635362349210423e-06, "loss": 0.6506, "step": 6524 }, { "epoch": 0.69, "grad_norm": 2.561154528891339, "learning_rate": 2.362088324735945e-06, "loss": 0.585, "step": 6525 }, { "epoch": 0.69, "grad_norm": 2.557511457700825, "learning_rate": 2.3606407210158007e-06, "loss": 0.7341, "step": 6526 }, { "epoch": 0.69, "grad_norm": 5.151485466378373, "learning_rate": 2.3591934239287858e-06, "loss": 0.5364, "step": 6527 }, { "epoch": 0.69, "grad_norm": 3.2369051385886, "learning_rate": 2.3577464336430446e-06, "loss": 0.5904, "step": 6528 }, { "epoch": 0.69, "grad_norm": 2.4237630485347683, "learning_rate": 2.356299750326687e-06, "loss": 0.6413, "step": 6529 }, { "epoch": 0.69, "grad_norm": 2.791157720967069, "learning_rate": 2.3548533741477807e-06, "loss": 0.6735, "step": 6530 }, { "epoch": 0.69, "grad_norm": 2.3572226180240285, "learning_rate": 2.353407305274365e-06, "loss": 0.6745, "step": 6531 }, { "epoch": 0.69, "grad_norm": 2.756415728746884, "learning_rate": 2.3519615438744358e-06, "loss": 0.6098, "step": 6532 }, { "epoch": 0.69, "grad_norm": 3.9211036631117326, "learning_rate": 2.3505160901159596e-06, "loss": 0.6391, "step": 6533 }, { "epoch": 0.69, "grad_norm": 10.804798236915875, "learning_rate": 2.3490709441668673e-06, "loss": 0.6013, "step": 6534 }, { "epoch": 0.69, "grad_norm": 2.3231398228966333, "learning_rate": 2.34762610619505e-06, "loss": 0.6146, "step": 6535 }, { "epoch": 0.69, "grad_norm": 2.496092282256762, "learning_rate": 2.346181576368362e-06, "loss": 0.7055, "step": 6536 }, { "epoch": 0.69, "grad_norm": 2.487885892384019, "learning_rate": 2.344737354854627e-06, "loss": 0.7101, "step": 6537 }, { "epoch": 0.69, "grad_norm": 3.2049421861180685, "learning_rate": 2.343293441821633e-06, "loss": 0.672, "step": 6538 }, { "epoch": 0.69, "grad_norm": 1.9953645112584768, "learning_rate": 2.3418498374371266e-06, "loss": 0.6358, "step": 6539 }, { "epoch": 0.69, "grad_norm": 2.7737257283166983, "learning_rate": 2.3404065418688203e-06, "loss": 0.6623, "step": 6540 }, { "epoch": 0.69, "grad_norm": 2.2828290607769466, "learning_rate": 2.3389635552843943e-06, "loss": 0.6073, "step": 6541 }, { "epoch": 0.69, "grad_norm": 2.7921110029438876, "learning_rate": 2.3375208778514903e-06, "loss": 0.6668, "step": 6542 }, { "epoch": 0.69, "grad_norm": 6.963063546111307, "learning_rate": 2.336078509737715e-06, "loss": 0.7043, "step": 6543 }, { "epoch": 0.69, "grad_norm": 1.9709336196500105, "learning_rate": 2.334636451110639e-06, "loss": 0.5716, "step": 6544 }, { "epoch": 0.69, "grad_norm": 2.052297383518725, "learning_rate": 2.333194702137793e-06, "loss": 0.6293, "step": 6545 }, { "epoch": 0.69, "grad_norm": 2.2880945024427444, "learning_rate": 2.331753262986678e-06, "loss": 0.5453, "step": 6546 }, { "epoch": 0.69, "grad_norm": 3.578393672808325, "learning_rate": 2.330312133824757e-06, "loss": 0.7206, "step": 6547 }, { "epoch": 0.69, "grad_norm": 9.010515317246663, "learning_rate": 2.3288713148194554e-06, "loss": 0.6199, "step": 6548 }, { "epoch": 0.69, "grad_norm": 1.9927985217423352, "learning_rate": 2.3274308061381605e-06, "loss": 0.6139, "step": 6549 }, { "epoch": 0.69, "grad_norm": 1.9806575010532403, "learning_rate": 2.32599060794823e-06, "loss": 0.6179, "step": 6550 }, { "epoch": 0.69, "grad_norm": 2.541089321186657, "learning_rate": 2.324550720416982e-06, "loss": 0.5371, "step": 6551 }, { "epoch": 0.69, "grad_norm": 2.060488905830918, "learning_rate": 2.3231111437116954e-06, "loss": 0.6226, "step": 6552 }, { "epoch": 0.69, "grad_norm": 2.944439269515085, "learning_rate": 2.3216718779996205e-06, "loss": 0.6055, "step": 6553 }, { "epoch": 0.69, "grad_norm": 2.8098524227724737, "learning_rate": 2.320232923447962e-06, "loss": 0.6007, "step": 6554 }, { "epoch": 0.69, "grad_norm": 3.017792625313086, "learning_rate": 2.318794280223897e-06, "loss": 0.6693, "step": 6555 }, { "epoch": 0.69, "grad_norm": 2.2085939033581856, "learning_rate": 2.317355948494563e-06, "loss": 0.5985, "step": 6556 }, { "epoch": 0.69, "grad_norm": 2.4413921495254867, "learning_rate": 2.31591792842706e-06, "loss": 0.6141, "step": 6557 }, { "epoch": 0.69, "grad_norm": 2.270526009720749, "learning_rate": 2.314480220188452e-06, "loss": 0.6078, "step": 6558 }, { "epoch": 0.69, "grad_norm": 2.3574965924340434, "learning_rate": 2.3130428239457688e-06, "loss": 0.6009, "step": 6559 }, { "epoch": 0.69, "grad_norm": 7.673316161467211, "learning_rate": 2.3116057398660046e-06, "loss": 0.5492, "step": 6560 }, { "epoch": 0.69, "grad_norm": 2.7207174172029065, "learning_rate": 2.3101689681161142e-06, "loss": 0.6563, "step": 6561 }, { "epoch": 0.69, "grad_norm": 2.307943954138673, "learning_rate": 2.308732508863016e-06, "loss": 0.5498, "step": 6562 }, { "epoch": 0.69, "grad_norm": 4.299414059442358, "learning_rate": 2.307296362273595e-06, "loss": 0.6527, "step": 6563 }, { "epoch": 0.69, "grad_norm": 3.0267194850070918, "learning_rate": 2.305860528514701e-06, "loss": 0.6284, "step": 6564 }, { "epoch": 0.69, "grad_norm": 2.6543376447272022, "learning_rate": 2.304425007753141e-06, "loss": 0.6456, "step": 6565 }, { "epoch": 0.69, "grad_norm": 2.2467729720442384, "learning_rate": 2.3029898001556928e-06, "loss": 0.5627, "step": 6566 }, { "epoch": 0.69, "grad_norm": 4.086285543773585, "learning_rate": 2.301554905889092e-06, "loss": 0.5767, "step": 6567 }, { "epoch": 0.69, "grad_norm": 3.212741108195205, "learning_rate": 2.3001203251200417e-06, "loss": 0.5448, "step": 6568 }, { "epoch": 0.69, "grad_norm": 3.394587869638318, "learning_rate": 2.2986860580152095e-06, "loss": 0.6423, "step": 6569 }, { "epoch": 0.69, "grad_norm": 3.4031910872820763, "learning_rate": 2.2972521047412223e-06, "loss": 0.6362, "step": 6570 }, { "epoch": 0.69, "grad_norm": 4.483025314036182, "learning_rate": 2.2958184654646705e-06, "loss": 0.4954, "step": 6571 }, { "epoch": 0.69, "grad_norm": 2.548631805057781, "learning_rate": 2.2943851403521123e-06, "loss": 0.7172, "step": 6572 }, { "epoch": 0.69, "grad_norm": 4.61360591907676, "learning_rate": 2.2929521295700695e-06, "loss": 0.6463, "step": 6573 }, { "epoch": 0.69, "grad_norm": 2.8748275398323564, "learning_rate": 2.2915194332850233e-06, "loss": 0.6434, "step": 6574 }, { "epoch": 0.69, "grad_norm": 2.9897551268506732, "learning_rate": 2.290087051663418e-06, "loss": 0.5195, "step": 6575 }, { "epoch": 0.69, "grad_norm": 2.3753623260621426, "learning_rate": 2.288654984871665e-06, "loss": 0.485, "step": 6576 }, { "epoch": 0.69, "grad_norm": 2.635343007038432, "learning_rate": 2.2872232330761383e-06, "loss": 0.6737, "step": 6577 }, { "epoch": 0.69, "grad_norm": 2.5493524141073918, "learning_rate": 2.285791796443176e-06, "loss": 0.6043, "step": 6578 }, { "epoch": 0.69, "grad_norm": 2.9622930982127853, "learning_rate": 2.284360675139078e-06, "loss": 0.6278, "step": 6579 }, { "epoch": 0.69, "grad_norm": 4.097813348559686, "learning_rate": 2.282929869330104e-06, "loss": 0.6418, "step": 6580 }, { "epoch": 0.69, "grad_norm": 2.6376842210758853, "learning_rate": 2.2814993791824836e-06, "loss": 0.6048, "step": 6581 }, { "epoch": 0.69, "grad_norm": 3.058425958808465, "learning_rate": 2.2800692048624092e-06, "loss": 0.6329, "step": 6582 }, { "epoch": 0.69, "grad_norm": 0.9682730563914436, "learning_rate": 2.278639346536031e-06, "loss": 0.5591, "step": 6583 }, { "epoch": 0.69, "grad_norm": 3.6023672656105106, "learning_rate": 2.2772098043694656e-06, "loss": 0.6068, "step": 6584 }, { "epoch": 0.69, "grad_norm": 2.5349956031101852, "learning_rate": 2.2757805785287946e-06, "loss": 0.5998, "step": 6585 }, { "epoch": 0.69, "grad_norm": 1.9259914387049755, "learning_rate": 2.274351669180063e-06, "loss": 0.5596, "step": 6586 }, { "epoch": 0.69, "grad_norm": 2.1838133733921383, "learning_rate": 2.272923076489275e-06, "loss": 0.6713, "step": 6587 }, { "epoch": 0.69, "grad_norm": 2.354737589610576, "learning_rate": 2.271494800622399e-06, "loss": 0.5937, "step": 6588 }, { "epoch": 0.69, "grad_norm": 3.3360105669346956, "learning_rate": 2.2700668417453703e-06, "loss": 0.6004, "step": 6589 }, { "epoch": 0.69, "grad_norm": 2.521181364398551, "learning_rate": 2.2686392000240838e-06, "loss": 0.6411, "step": 6590 }, { "epoch": 0.69, "grad_norm": 2.3567654473778523, "learning_rate": 2.2672118756244014e-06, "loss": 0.6683, "step": 6591 }, { "epoch": 0.69, "grad_norm": 2.2629509475772647, "learning_rate": 2.2657848687121444e-06, "loss": 0.5745, "step": 6592 }, { "epoch": 0.69, "grad_norm": 2.1762686821367487, "learning_rate": 2.2643581794530943e-06, "loss": 0.6356, "step": 6593 }, { "epoch": 0.69, "grad_norm": 2.4123508723145535, "learning_rate": 2.2629318080130042e-06, "loss": 0.638, "step": 6594 }, { "epoch": 0.69, "grad_norm": 8.803556073787599, "learning_rate": 2.261505754557586e-06, "loss": 0.6717, "step": 6595 }, { "epoch": 0.69, "grad_norm": 2.8959725718418525, "learning_rate": 2.260080019252513e-06, "loss": 0.5497, "step": 6596 }, { "epoch": 0.69, "grad_norm": 2.3343782073258676, "learning_rate": 2.258654602263421e-06, "loss": 0.6782, "step": 6597 }, { "epoch": 0.69, "grad_norm": 2.0752300588139856, "learning_rate": 2.2572295037559135e-06, "loss": 0.6159, "step": 6598 }, { "epoch": 0.69, "grad_norm": 2.4484059409198364, "learning_rate": 2.2558047238955547e-06, "loss": 0.6265, "step": 6599 }, { "epoch": 0.69, "grad_norm": 3.0167746887631335, "learning_rate": 2.2543802628478695e-06, "loss": 0.6153, "step": 6600 }, { "epoch": 0.69, "grad_norm": 3.7309284321227167, "learning_rate": 2.2529561207783495e-06, "loss": 0.5535, "step": 6601 }, { "epoch": 0.69, "grad_norm": 2.5843645635460657, "learning_rate": 2.251532297852445e-06, "loss": 0.5995, "step": 6602 }, { "epoch": 0.69, "grad_norm": 2.1738162447033127, "learning_rate": 2.2501087942355736e-06, "loss": 0.7412, "step": 6603 }, { "epoch": 0.69, "grad_norm": 2.4229284055465707, "learning_rate": 2.2486856100931146e-06, "loss": 0.6312, "step": 6604 }, { "epoch": 0.7, "grad_norm": 2.8696482572875674, "learning_rate": 2.2472627455904086e-06, "loss": 0.5519, "step": 6605 }, { "epoch": 0.7, "grad_norm": 2.844232232179864, "learning_rate": 2.2458402008927578e-06, "loss": 0.6271, "step": 6606 }, { "epoch": 0.7, "grad_norm": 2.404083137621484, "learning_rate": 2.24441797616543e-06, "loss": 0.5466, "step": 6607 }, { "epoch": 0.7, "grad_norm": 2.567530863025546, "learning_rate": 2.2429960715736588e-06, "loss": 0.5551, "step": 6608 }, { "epoch": 0.7, "grad_norm": 2.2882668446276555, "learning_rate": 2.241574487282634e-06, "loss": 0.6007, "step": 6609 }, { "epoch": 0.7, "grad_norm": 2.110230304278789, "learning_rate": 2.24015322345751e-06, "loss": 0.6316, "step": 6610 }, { "epoch": 0.7, "grad_norm": 56.41508535977514, "learning_rate": 2.2387322802634065e-06, "loss": 0.6579, "step": 6611 }, { "epoch": 0.7, "grad_norm": 2.2521890735234713, "learning_rate": 2.2373116578654042e-06, "loss": 0.6235, "step": 6612 }, { "epoch": 0.7, "grad_norm": 3.234967262280706, "learning_rate": 2.2358913564285496e-06, "loss": 0.5794, "step": 6613 }, { "epoch": 0.7, "grad_norm": 2.976223938280554, "learning_rate": 2.234471376117847e-06, "loss": 0.6562, "step": 6614 }, { "epoch": 0.7, "grad_norm": 2.5700256793799388, "learning_rate": 2.2330517170982634e-06, "loss": 0.5574, "step": 6615 }, { "epoch": 0.7, "grad_norm": 3.3477002796845468, "learning_rate": 2.2316323795347334e-06, "loss": 0.6504, "step": 6616 }, { "epoch": 0.7, "grad_norm": 2.155545624420643, "learning_rate": 2.2302133635921524e-06, "loss": 0.6096, "step": 6617 }, { "epoch": 0.7, "grad_norm": 2.271504833154178, "learning_rate": 2.2287946694353764e-06, "loss": 0.6292, "step": 6618 }, { "epoch": 0.7, "grad_norm": 1.9615477986067251, "learning_rate": 2.2273762972292227e-06, "loss": 0.5254, "step": 6619 }, { "epoch": 0.7, "grad_norm": 2.347528126787625, "learning_rate": 2.2259582471384765e-06, "loss": 0.5771, "step": 6620 }, { "epoch": 0.7, "grad_norm": 2.0790518781036624, "learning_rate": 2.224540519327884e-06, "loss": 0.5152, "step": 6621 }, { "epoch": 0.7, "grad_norm": 2.4932893443972515, "learning_rate": 2.2231231139621505e-06, "loss": 0.6342, "step": 6622 }, { "epoch": 0.7, "grad_norm": 2.40013914291326, "learning_rate": 2.2217060312059453e-06, "loss": 0.6919, "step": 6623 }, { "epoch": 0.7, "grad_norm": 3.7257495545768244, "learning_rate": 2.2202892712239016e-06, "loss": 0.6643, "step": 6624 }, { "epoch": 0.7, "grad_norm": 3.029296071523241, "learning_rate": 2.2188728341806153e-06, "loss": 0.5941, "step": 6625 }, { "epoch": 0.7, "grad_norm": 2.587522142486169, "learning_rate": 2.2174567202406455e-06, "loss": 0.689, "step": 6626 }, { "epoch": 0.7, "grad_norm": 3.102337698108396, "learning_rate": 2.2160409295685105e-06, "loss": 0.6263, "step": 6627 }, { "epoch": 0.7, "grad_norm": 2.442746996525038, "learning_rate": 2.2146254623286905e-06, "loss": 0.6833, "step": 6628 }, { "epoch": 0.7, "grad_norm": 3.678411820711548, "learning_rate": 2.213210318685633e-06, "loss": 0.6105, "step": 6629 }, { "epoch": 0.7, "grad_norm": 2.314390976374499, "learning_rate": 2.2117954988037467e-06, "loss": 0.6826, "step": 6630 }, { "epoch": 0.7, "grad_norm": 2.3034289358297007, "learning_rate": 2.210381002847399e-06, "loss": 0.6343, "step": 6631 }, { "epoch": 0.7, "grad_norm": 2.9230039460865647, "learning_rate": 2.208966830980921e-06, "loss": 0.629, "step": 6632 }, { "epoch": 0.7, "grad_norm": 2.215059832781754, "learning_rate": 2.207552983368608e-06, "loss": 0.5855, "step": 6633 }, { "epoch": 0.7, "grad_norm": 3.733877578923876, "learning_rate": 2.20613946017472e-06, "loss": 0.561, "step": 6634 }, { "epoch": 0.7, "grad_norm": 5.184821592844948, "learning_rate": 2.2047262615634723e-06, "loss": 0.5693, "step": 6635 }, { "epoch": 0.7, "grad_norm": 2.3808532135782983, "learning_rate": 2.203313387699046e-06, "loss": 0.61, "step": 6636 }, { "epoch": 0.7, "grad_norm": 9.185810849417573, "learning_rate": 2.201900838745586e-06, "loss": 0.6877, "step": 6637 }, { "epoch": 0.7, "grad_norm": 2.901166357224412, "learning_rate": 2.2004886148671978e-06, "loss": 0.6226, "step": 6638 }, { "epoch": 0.7, "grad_norm": 2.8487030064529053, "learning_rate": 2.1990767162279515e-06, "loss": 0.5769, "step": 6639 }, { "epoch": 0.7, "grad_norm": 0.9023289850683952, "learning_rate": 2.197665142991876e-06, "loss": 0.4856, "step": 6640 }, { "epoch": 0.7, "grad_norm": 2.343960901159718, "learning_rate": 2.196253895322961e-06, "loss": 0.5961, "step": 6641 }, { "epoch": 0.7, "grad_norm": 4.467203125378956, "learning_rate": 2.1948429733851646e-06, "loss": 0.6625, "step": 6642 }, { "epoch": 0.7, "grad_norm": 2.702936295244413, "learning_rate": 2.193432377342404e-06, "loss": 0.6413, "step": 6643 }, { "epoch": 0.7, "grad_norm": 2.8056618447753277, "learning_rate": 2.1920221073585564e-06, "loss": 0.5931, "step": 6644 }, { "epoch": 0.7, "grad_norm": 2.79010389004188, "learning_rate": 2.190612163597462e-06, "loss": 0.6772, "step": 6645 }, { "epoch": 0.7, "grad_norm": 4.5825220168109295, "learning_rate": 2.189202546222925e-06, "loss": 0.6462, "step": 6646 }, { "epoch": 0.7, "grad_norm": 2.6022305800555974, "learning_rate": 2.1877932553987114e-06, "loss": 0.6701, "step": 6647 }, { "epoch": 0.7, "grad_norm": 2.1361674026822683, "learning_rate": 2.1863842912885496e-06, "loss": 0.569, "step": 6648 }, { "epoch": 0.7, "grad_norm": 3.1167050044456364, "learning_rate": 2.184975654056128e-06, "loss": 0.745, "step": 6649 }, { "epoch": 0.7, "grad_norm": 2.735863377596502, "learning_rate": 2.183567343865095e-06, "loss": 0.607, "step": 6650 }, { "epoch": 0.7, "grad_norm": 2.4462949583121554, "learning_rate": 2.182159360879067e-06, "loss": 0.587, "step": 6651 }, { "epoch": 0.7, "grad_norm": 1.9840779320355102, "learning_rate": 2.1807517052616205e-06, "loss": 0.5347, "step": 6652 }, { "epoch": 0.7, "grad_norm": 16.6142045279403, "learning_rate": 2.1793443771762912e-06, "loss": 0.5878, "step": 6653 }, { "epoch": 0.7, "grad_norm": 2.1547249179603583, "learning_rate": 2.177937376786577e-06, "loss": 0.5679, "step": 6654 }, { "epoch": 0.7, "grad_norm": 3.7649378439066905, "learning_rate": 2.17653070425594e-06, "loss": 0.6205, "step": 6655 }, { "epoch": 0.7, "grad_norm": 3.1627346207064804, "learning_rate": 2.175124359747806e-06, "loss": 0.653, "step": 6656 }, { "epoch": 0.7, "grad_norm": 3.3916267839635585, "learning_rate": 2.173718343425558e-06, "loss": 0.5896, "step": 6657 }, { "epoch": 0.7, "grad_norm": 3.269804794003583, "learning_rate": 2.1723126554525415e-06, "loss": 0.6527, "step": 6658 }, { "epoch": 0.7, "grad_norm": 1.0320795601481907, "learning_rate": 2.1709072959920667e-06, "loss": 0.525, "step": 6659 }, { "epoch": 0.7, "grad_norm": 2.3228826509557523, "learning_rate": 2.169502265207404e-06, "loss": 0.5739, "step": 6660 }, { "epoch": 0.7, "grad_norm": 2.952927663361331, "learning_rate": 2.168097563261787e-06, "loss": 0.6047, "step": 6661 }, { "epoch": 0.7, "grad_norm": 2.120080095383413, "learning_rate": 2.1666931903184103e-06, "loss": 0.5871, "step": 6662 }, { "epoch": 0.7, "grad_norm": 2.141689567051116, "learning_rate": 2.1652891465404257e-06, "loss": 0.6272, "step": 6663 }, { "epoch": 0.7, "grad_norm": 2.7913975017146035, "learning_rate": 2.1638854320909542e-06, "loss": 0.7013, "step": 6664 }, { "epoch": 0.7, "grad_norm": 2.2400371485167927, "learning_rate": 2.162482047133076e-06, "loss": 0.56, "step": 6665 }, { "epoch": 0.7, "grad_norm": 3.3342661126733666, "learning_rate": 2.161078991829832e-06, "loss": 0.635, "step": 6666 }, { "epoch": 0.7, "grad_norm": 3.536358295411925, "learning_rate": 2.159676266344222e-06, "loss": 0.6228, "step": 6667 }, { "epoch": 0.7, "grad_norm": 2.503851980349559, "learning_rate": 2.1582738708392127e-06, "loss": 0.5741, "step": 6668 }, { "epoch": 0.7, "grad_norm": 3.0980965236347804, "learning_rate": 2.1568718054777322e-06, "loss": 0.6151, "step": 6669 }, { "epoch": 0.7, "grad_norm": 3.792431692180165, "learning_rate": 2.1554700704226673e-06, "loss": 0.6017, "step": 6670 }, { "epoch": 0.7, "grad_norm": 2.0346739479145444, "learning_rate": 2.1540686658368643e-06, "loss": 0.5778, "step": 6671 }, { "epoch": 0.7, "grad_norm": 2.666257894353589, "learning_rate": 2.1526675918831373e-06, "loss": 0.6578, "step": 6672 }, { "epoch": 0.7, "grad_norm": 2.726228194370597, "learning_rate": 2.151266848724259e-06, "loss": 0.6432, "step": 6673 }, { "epoch": 0.7, "grad_norm": 2.5487776785030176, "learning_rate": 2.149866436522965e-06, "loss": 0.6072, "step": 6674 }, { "epoch": 0.7, "grad_norm": 2.8073196930271074, "learning_rate": 2.1484663554419495e-06, "loss": 0.6205, "step": 6675 }, { "epoch": 0.7, "grad_norm": 2.2783150428762, "learning_rate": 2.147066605643868e-06, "loss": 0.6129, "step": 6676 }, { "epoch": 0.7, "grad_norm": 6.203415894181792, "learning_rate": 2.145667187291341e-06, "loss": 0.6235, "step": 6677 }, { "epoch": 0.7, "grad_norm": 2.0439147089340683, "learning_rate": 2.144268100546951e-06, "loss": 0.5993, "step": 6678 }, { "epoch": 0.7, "grad_norm": 2.351141372515001, "learning_rate": 2.1428693455732384e-06, "loss": 0.5968, "step": 6679 }, { "epoch": 0.7, "grad_norm": 2.2933943463025726, "learning_rate": 2.141470922532704e-06, "loss": 0.6489, "step": 6680 }, { "epoch": 0.7, "grad_norm": 2.3273415621641216, "learning_rate": 2.140072831587815e-06, "loss": 0.635, "step": 6681 }, { "epoch": 0.7, "grad_norm": 2.2821768908749736, "learning_rate": 2.138675072900997e-06, "loss": 0.5997, "step": 6682 }, { "epoch": 0.7, "grad_norm": 3.089940278859609, "learning_rate": 2.1372776466346414e-06, "loss": 0.5724, "step": 6683 }, { "epoch": 0.7, "grad_norm": 2.251126225010119, "learning_rate": 2.1358805529510896e-06, "loss": 0.617, "step": 6684 }, { "epoch": 0.7, "grad_norm": 2.760541069813522, "learning_rate": 2.134483792012656e-06, "loss": 0.6546, "step": 6685 }, { "epoch": 0.7, "grad_norm": 2.2370760853157, "learning_rate": 2.1330873639816125e-06, "loss": 0.5493, "step": 6686 }, { "epoch": 0.7, "grad_norm": 2.9935072593068037, "learning_rate": 2.131691269020193e-06, "loss": 0.556, "step": 6687 }, { "epoch": 0.7, "grad_norm": 2.0100434248241155, "learning_rate": 2.130295507290591e-06, "loss": 0.6344, "step": 6688 }, { "epoch": 0.7, "grad_norm": 2.244649889387862, "learning_rate": 2.1289000789549586e-06, "loss": 0.5579, "step": 6689 }, { "epoch": 0.7, "grad_norm": 2.704981131283807, "learning_rate": 2.1275049841754165e-06, "loss": 0.5746, "step": 6690 }, { "epoch": 0.7, "grad_norm": 2.29539621168737, "learning_rate": 2.126110223114043e-06, "loss": 0.6104, "step": 6691 }, { "epoch": 0.7, "grad_norm": 2.205856297808691, "learning_rate": 2.1247157959328763e-06, "loss": 0.5662, "step": 6692 }, { "epoch": 0.7, "grad_norm": 0.8836298348060035, "learning_rate": 2.1233217027939153e-06, "loss": 0.562, "step": 6693 }, { "epoch": 0.7, "grad_norm": 2.3976000029984372, "learning_rate": 2.121927943859123e-06, "loss": 0.6278, "step": 6694 }, { "epoch": 0.7, "grad_norm": 2.331546227933603, "learning_rate": 2.1205345192904224e-06, "loss": 0.6523, "step": 6695 }, { "epoch": 0.7, "grad_norm": 2.3605376267410083, "learning_rate": 2.1191414292497e-06, "loss": 0.6496, "step": 6696 }, { "epoch": 0.7, "grad_norm": 2.102974401678194, "learning_rate": 2.1177486738987984e-06, "loss": 0.6491, "step": 6697 }, { "epoch": 0.7, "grad_norm": 2.26075427680184, "learning_rate": 2.116356253399522e-06, "loss": 0.5821, "step": 6698 }, { "epoch": 0.7, "grad_norm": 0.8908330043379147, "learning_rate": 2.114964167913641e-06, "loss": 0.537, "step": 6699 }, { "epoch": 0.71, "grad_norm": 2.579961013538668, "learning_rate": 2.1135724176028844e-06, "loss": 0.5184, "step": 6700 }, { "epoch": 0.71, "grad_norm": 1.08377361893251, "learning_rate": 2.1121810026289404e-06, "loss": 0.544, "step": 6701 }, { "epoch": 0.71, "grad_norm": 6.241375532358382, "learning_rate": 2.110789923153458e-06, "loss": 0.6789, "step": 6702 }, { "epoch": 0.71, "grad_norm": 2.9160714063650266, "learning_rate": 2.109399179338051e-06, "loss": 0.5792, "step": 6703 }, { "epoch": 0.71, "grad_norm": 2.1537715668446755, "learning_rate": 2.1080087713442928e-06, "loss": 0.5489, "step": 6704 }, { "epoch": 0.71, "grad_norm": 2.053631859586391, "learning_rate": 2.1066186993337158e-06, "loss": 0.5573, "step": 6705 }, { "epoch": 0.71, "grad_norm": 5.425494797474714, "learning_rate": 2.105228963467812e-06, "loss": 0.5586, "step": 6706 }, { "epoch": 0.71, "grad_norm": 2.748837747672611, "learning_rate": 2.10383956390804e-06, "loss": 0.5483, "step": 6707 }, { "epoch": 0.71, "grad_norm": 2.2002541684184727, "learning_rate": 2.1024505008158153e-06, "loss": 0.6831, "step": 6708 }, { "epoch": 0.71, "grad_norm": 2.309736548355432, "learning_rate": 2.101061774352517e-06, "loss": 0.5418, "step": 6709 }, { "epoch": 0.71, "grad_norm": 2.4178982369090267, "learning_rate": 2.099673384679482e-06, "loss": 0.5158, "step": 6710 }, { "epoch": 0.71, "grad_norm": 2.18817854947126, "learning_rate": 2.0982853319580075e-06, "loss": 0.5532, "step": 6711 }, { "epoch": 0.71, "grad_norm": 15.098923040125959, "learning_rate": 2.096897616349355e-06, "loss": 0.6052, "step": 6712 }, { "epoch": 0.71, "grad_norm": 4.248992003023729, "learning_rate": 2.0955102380147474e-06, "loss": 0.6149, "step": 6713 }, { "epoch": 0.71, "grad_norm": 5.785961960691239, "learning_rate": 2.0941231971153644e-06, "loss": 0.6334, "step": 6714 }, { "epoch": 0.71, "grad_norm": 2.878088474717053, "learning_rate": 2.0927364938123457e-06, "loss": 0.6535, "step": 6715 }, { "epoch": 0.71, "grad_norm": 2.4454378764029507, "learning_rate": 2.0913501282667975e-06, "loss": 0.6169, "step": 6716 }, { "epoch": 0.71, "grad_norm": 3.1028098404336815, "learning_rate": 2.0899641006397836e-06, "loss": 0.6849, "step": 6717 }, { "epoch": 0.71, "grad_norm": 2.5675573992295457, "learning_rate": 2.0885784110923325e-06, "loss": 0.6658, "step": 6718 }, { "epoch": 0.71, "grad_norm": 3.0159340806610255, "learning_rate": 2.087193059785421e-06, "loss": 0.6471, "step": 6719 }, { "epoch": 0.71, "grad_norm": 2.879084843294698, "learning_rate": 2.08580804688e-06, "loss": 0.5826, "step": 6720 }, { "epoch": 0.71, "grad_norm": 2.7524142612645415, "learning_rate": 2.084423372536976e-06, "loss": 0.6057, "step": 6721 }, { "epoch": 0.71, "grad_norm": 3.2595597192294785, "learning_rate": 2.083039036917219e-06, "loss": 0.6349, "step": 6722 }, { "epoch": 0.71, "grad_norm": 3.6944542500831257, "learning_rate": 2.0816550401815538e-06, "loss": 0.5963, "step": 6723 }, { "epoch": 0.71, "grad_norm": 2.1565697221035833, "learning_rate": 2.0802713824907683e-06, "loss": 0.5843, "step": 6724 }, { "epoch": 0.71, "grad_norm": 2.4041247947594564, "learning_rate": 2.0788880640056137e-06, "loss": 0.5929, "step": 6725 }, { "epoch": 0.71, "grad_norm": 2.6733679379720794, "learning_rate": 2.077505084886802e-06, "loss": 0.6643, "step": 6726 }, { "epoch": 0.71, "grad_norm": 2.2387112348073126, "learning_rate": 2.0761224452950003e-06, "loss": 0.6349, "step": 6727 }, { "epoch": 0.71, "grad_norm": 2.8590997656724713, "learning_rate": 2.07474014539084e-06, "loss": 0.6274, "step": 6728 }, { "epoch": 0.71, "grad_norm": 2.6204018448125233, "learning_rate": 2.0733581853349128e-06, "loss": 0.699, "step": 6729 }, { "epoch": 0.71, "grad_norm": 3.7073322849788988, "learning_rate": 2.071976565287772e-06, "loss": 0.553, "step": 6730 }, { "epoch": 0.71, "grad_norm": 2.0723138136947097, "learning_rate": 2.0705952854099337e-06, "loss": 0.5749, "step": 6731 }, { "epoch": 0.71, "grad_norm": 2.9024005003511815, "learning_rate": 2.069214345861863e-06, "loss": 0.5859, "step": 6732 }, { "epoch": 0.71, "grad_norm": 3.007051839789599, "learning_rate": 2.067833746803998e-06, "loss": 0.5448, "step": 6733 }, { "epoch": 0.71, "grad_norm": 2.16193794853113, "learning_rate": 2.0664534883967315e-06, "loss": 0.6046, "step": 6734 }, { "epoch": 0.71, "grad_norm": 2.0022011347301887, "learning_rate": 2.065073570800421e-06, "loss": 0.6004, "step": 6735 }, { "epoch": 0.71, "grad_norm": 2.5866230804925356, "learning_rate": 2.0636939941753793e-06, "loss": 0.6142, "step": 6736 }, { "epoch": 0.71, "grad_norm": 0.9455346482011558, "learning_rate": 2.0623147586818786e-06, "loss": 0.5795, "step": 6737 }, { "epoch": 0.71, "grad_norm": 2.8544304813633348, "learning_rate": 2.060935864480158e-06, "loss": 0.6272, "step": 6738 }, { "epoch": 0.71, "grad_norm": 3.139954944371839, "learning_rate": 2.0595573117304147e-06, "loss": 0.5931, "step": 6739 }, { "epoch": 0.71, "grad_norm": 2.141121600735202, "learning_rate": 2.0581791005928024e-06, "loss": 0.5734, "step": 6740 }, { "epoch": 0.71, "grad_norm": 2.1663356934725844, "learning_rate": 2.0568012312274367e-06, "loss": 0.4842, "step": 6741 }, { "epoch": 0.71, "grad_norm": 2.1593137328818948, "learning_rate": 2.0554237037943966e-06, "loss": 0.5317, "step": 6742 }, { "epoch": 0.71, "grad_norm": 4.225299678738348, "learning_rate": 2.054046518453718e-06, "loss": 0.6648, "step": 6743 }, { "epoch": 0.71, "grad_norm": 2.46890585145027, "learning_rate": 2.0526696753654008e-06, "loss": 0.648, "step": 6744 }, { "epoch": 0.71, "grad_norm": 8.384393534696619, "learning_rate": 2.051293174689401e-06, "loss": 0.6517, "step": 6745 }, { "epoch": 0.71, "grad_norm": 2.3553993298810063, "learning_rate": 2.0499170165856343e-06, "loss": 0.5796, "step": 6746 }, { "epoch": 0.71, "grad_norm": 3.2778199669618373, "learning_rate": 2.048541201213981e-06, "loss": 0.6517, "step": 6747 }, { "epoch": 0.71, "grad_norm": 2.1601593601154647, "learning_rate": 2.0471657287342813e-06, "loss": 0.598, "step": 6748 }, { "epoch": 0.71, "grad_norm": 2.7950345405844166, "learning_rate": 2.0457905993063306e-06, "loss": 0.6188, "step": 6749 }, { "epoch": 0.71, "grad_norm": 2.3229400348434717, "learning_rate": 2.044415813089887e-06, "loss": 0.5894, "step": 6750 }, { "epoch": 0.71, "grad_norm": 2.5323121960930086, "learning_rate": 2.0430413702446707e-06, "loss": 0.6715, "step": 6751 }, { "epoch": 0.71, "grad_norm": 2.1152568643865597, "learning_rate": 2.0416672709303597e-06, "loss": 0.591, "step": 6752 }, { "epoch": 0.71, "grad_norm": 3.241190616242863, "learning_rate": 2.0402935153065976e-06, "loss": 0.5456, "step": 6753 }, { "epoch": 0.71, "grad_norm": 2.8952230145025495, "learning_rate": 2.0389201035329754e-06, "loss": 0.6587, "step": 6754 }, { "epoch": 0.71, "grad_norm": 4.913524657839511, "learning_rate": 2.0375470357690564e-06, "loss": 0.6103, "step": 6755 }, { "epoch": 0.71, "grad_norm": 3.357411676416585, "learning_rate": 2.036174312174359e-06, "loss": 0.6013, "step": 6756 }, { "epoch": 0.71, "grad_norm": 2.164348116661981, "learning_rate": 2.034801932908364e-06, "loss": 0.641, "step": 6757 }, { "epoch": 0.71, "grad_norm": 2.285714519634651, "learning_rate": 2.033429898130509e-06, "loss": 0.5707, "step": 6758 }, { "epoch": 0.71, "grad_norm": 2.314588495421655, "learning_rate": 2.032058208000191e-06, "loss": 0.6353, "step": 6759 }, { "epoch": 0.71, "grad_norm": 3.1963968095024904, "learning_rate": 2.030686862676771e-06, "loss": 0.6579, "step": 6760 }, { "epoch": 0.71, "grad_norm": 2.234190092875219, "learning_rate": 2.0293158623195702e-06, "loss": 0.6601, "step": 6761 }, { "epoch": 0.71, "grad_norm": 2.350692178674305, "learning_rate": 2.0279452070878647e-06, "loss": 0.6188, "step": 6762 }, { "epoch": 0.71, "grad_norm": 2.564102840538574, "learning_rate": 2.026574897140892e-06, "loss": 0.6149, "step": 6763 }, { "epoch": 0.71, "grad_norm": 2.174466257356336, "learning_rate": 2.0252049326378524e-06, "loss": 0.605, "step": 6764 }, { "epoch": 0.71, "grad_norm": 2.149315755348158, "learning_rate": 2.0238353137379047e-06, "loss": 0.6217, "step": 6765 }, { "epoch": 0.71, "grad_norm": 2.22159691006164, "learning_rate": 2.02246604060017e-06, "loss": 0.4916, "step": 6766 }, { "epoch": 0.71, "grad_norm": 2.0623059295189003, "learning_rate": 2.0210971133837208e-06, "loss": 0.6159, "step": 6767 }, { "epoch": 0.71, "grad_norm": 5.244551913598113, "learning_rate": 2.0197285322475975e-06, "loss": 0.5866, "step": 6768 }, { "epoch": 0.71, "grad_norm": 3.429308309812128, "learning_rate": 2.0183602973507977e-06, "loss": 0.6469, "step": 6769 }, { "epoch": 0.71, "grad_norm": 2.2426310766438093, "learning_rate": 2.016992408852282e-06, "loss": 0.6122, "step": 6770 }, { "epoch": 0.71, "grad_norm": 3.147899821152736, "learning_rate": 2.0156248669109645e-06, "loss": 0.6084, "step": 6771 }, { "epoch": 0.71, "grad_norm": 2.5207952773245057, "learning_rate": 2.014257671685722e-06, "loss": 0.6181, "step": 6772 }, { "epoch": 0.71, "grad_norm": 2.290492088782644, "learning_rate": 2.012890823335392e-06, "loss": 0.6861, "step": 6773 }, { "epoch": 0.71, "grad_norm": 0.9788946060137566, "learning_rate": 2.011524322018773e-06, "loss": 0.5617, "step": 6774 }, { "epoch": 0.71, "grad_norm": 2.02202729427753, "learning_rate": 2.01015816789462e-06, "loss": 0.5491, "step": 6775 }, { "epoch": 0.71, "grad_norm": 2.644055938189567, "learning_rate": 2.0087923611216452e-06, "loss": 0.6326, "step": 6776 }, { "epoch": 0.71, "grad_norm": 2.6229423897634088, "learning_rate": 2.0074269018585286e-06, "loss": 0.6593, "step": 6777 }, { "epoch": 0.71, "grad_norm": 2.657797931574321, "learning_rate": 2.006061790263903e-06, "loss": 0.6879, "step": 6778 }, { "epoch": 0.71, "grad_norm": 3.731539046733333, "learning_rate": 2.004697026496366e-06, "loss": 0.6025, "step": 6779 }, { "epoch": 0.71, "grad_norm": 2.3300238922256162, "learning_rate": 2.00333261071447e-06, "loss": 0.5737, "step": 6780 }, { "epoch": 0.71, "grad_norm": 2.5105007437944, "learning_rate": 2.001968543076727e-06, "loss": 0.5984, "step": 6781 }, { "epoch": 0.71, "grad_norm": 2.9355723240439238, "learning_rate": 2.0006048237416127e-06, "loss": 0.6853, "step": 6782 }, { "epoch": 0.71, "grad_norm": 2.7838842692244414, "learning_rate": 1.9992414528675607e-06, "loss": 0.5972, "step": 6783 }, { "epoch": 0.71, "grad_norm": 2.409380507657977, "learning_rate": 1.997878430612963e-06, "loss": 0.6305, "step": 6784 }, { "epoch": 0.71, "grad_norm": 2.610091326027608, "learning_rate": 1.9965157571361688e-06, "loss": 0.6465, "step": 6785 }, { "epoch": 0.71, "grad_norm": 2.0465580485685355, "learning_rate": 1.9951534325954913e-06, "loss": 0.5599, "step": 6786 }, { "epoch": 0.71, "grad_norm": 2.7696890221497825, "learning_rate": 1.9937914571492024e-06, "loss": 0.5447, "step": 6787 }, { "epoch": 0.71, "grad_norm": 2.854812905102868, "learning_rate": 1.9924298309555355e-06, "loss": 0.5373, "step": 6788 }, { "epoch": 0.71, "grad_norm": 4.144295935862878, "learning_rate": 1.991068554172673e-06, "loss": 0.5129, "step": 6789 }, { "epoch": 0.71, "grad_norm": 2.235492207734396, "learning_rate": 1.9897076269587686e-06, "loss": 0.5211, "step": 6790 }, { "epoch": 0.71, "grad_norm": 1.0026420194820682, "learning_rate": 1.98834704947193e-06, "loss": 0.5368, "step": 6791 }, { "epoch": 0.71, "grad_norm": 4.2741376437674825, "learning_rate": 1.9869868218702266e-06, "loss": 0.6746, "step": 6792 }, { "epoch": 0.71, "grad_norm": 2.4315638104365025, "learning_rate": 1.985626944311685e-06, "loss": 0.4932, "step": 6793 }, { "epoch": 0.71, "grad_norm": 3.005978231078567, "learning_rate": 1.984267416954289e-06, "loss": 0.694, "step": 6794 }, { "epoch": 0.72, "grad_norm": 0.9172999980041083, "learning_rate": 1.9829082399559872e-06, "loss": 0.5798, "step": 6795 }, { "epoch": 0.72, "grad_norm": 2.2051339131239667, "learning_rate": 1.9815494134746866e-06, "loss": 0.6441, "step": 6796 }, { "epoch": 0.72, "grad_norm": 2.4405798502942733, "learning_rate": 1.98019093766825e-06, "loss": 0.6649, "step": 6797 }, { "epoch": 0.72, "grad_norm": 2.505836213580318, "learning_rate": 1.9788328126944984e-06, "loss": 0.5017, "step": 6798 }, { "epoch": 0.72, "grad_norm": 2.5383152998963965, "learning_rate": 1.9774750387112176e-06, "loss": 0.6945, "step": 6799 }, { "epoch": 0.72, "grad_norm": 2.295043766361787, "learning_rate": 1.976117615876149e-06, "loss": 0.5951, "step": 6800 }, { "epoch": 0.72, "grad_norm": 3.0818322840049546, "learning_rate": 1.974760544346999e-06, "loss": 0.6048, "step": 6801 }, { "epoch": 0.72, "grad_norm": 2.1718689507776063, "learning_rate": 1.9734038242814203e-06, "loss": 0.5392, "step": 6802 }, { "epoch": 0.72, "grad_norm": 4.5743079582613, "learning_rate": 1.9720474558370356e-06, "loss": 0.6438, "step": 6803 }, { "epoch": 0.72, "grad_norm": 2.4649750346849424, "learning_rate": 1.970691439171425e-06, "loss": 0.5826, "step": 6804 }, { "epoch": 0.72, "grad_norm": 2.251453377955877, "learning_rate": 1.9693357744421282e-06, "loss": 0.6129, "step": 6805 }, { "epoch": 0.72, "grad_norm": 2.7067846238307616, "learning_rate": 1.96798046180664e-06, "loss": 0.5928, "step": 6806 }, { "epoch": 0.72, "grad_norm": 2.9657749101823887, "learning_rate": 1.966625501422415e-06, "loss": 0.6502, "step": 6807 }, { "epoch": 0.72, "grad_norm": 2.885763329620718, "learning_rate": 1.965270893446871e-06, "loss": 0.6607, "step": 6808 }, { "epoch": 0.72, "grad_norm": 2.516314689530151, "learning_rate": 1.963916638037384e-06, "loss": 0.6097, "step": 6809 }, { "epoch": 0.72, "grad_norm": 2.819130855149682, "learning_rate": 1.9625627353512854e-06, "loss": 0.647, "step": 6810 }, { "epoch": 0.72, "grad_norm": 3.1510683550167125, "learning_rate": 1.9612091855458663e-06, "loss": 0.5885, "step": 6811 }, { "epoch": 0.72, "grad_norm": 2.8987176112644457, "learning_rate": 1.9598559887783797e-06, "loss": 0.5884, "step": 6812 }, { "epoch": 0.72, "grad_norm": 2.9904600312949907, "learning_rate": 1.958503145206036e-06, "loss": 0.5806, "step": 6813 }, { "epoch": 0.72, "grad_norm": 2.5080351376008374, "learning_rate": 1.9571506549860065e-06, "loss": 0.6038, "step": 6814 }, { "epoch": 0.72, "grad_norm": 2.939203442577235, "learning_rate": 1.955798518275418e-06, "loss": 0.5762, "step": 6815 }, { "epoch": 0.72, "grad_norm": 2.228977059412211, "learning_rate": 1.954446735231356e-06, "loss": 0.5609, "step": 6816 }, { "epoch": 0.72, "grad_norm": 2.63612086695577, "learning_rate": 1.953095306010869e-06, "loss": 0.5973, "step": 6817 }, { "epoch": 0.72, "grad_norm": 2.463307921855391, "learning_rate": 1.9517442307709626e-06, "loss": 0.5971, "step": 6818 }, { "epoch": 0.72, "grad_norm": 1.9678254750031574, "learning_rate": 1.9503935096686004e-06, "loss": 0.5483, "step": 6819 }, { "epoch": 0.72, "grad_norm": 2.4727076802540133, "learning_rate": 1.9490431428607027e-06, "loss": 0.626, "step": 6820 }, { "epoch": 0.72, "grad_norm": 2.0978475149269795, "learning_rate": 1.947693130504153e-06, "loss": 0.5705, "step": 6821 }, { "epoch": 0.72, "grad_norm": 2.10936758831189, "learning_rate": 1.9463434727557927e-06, "loss": 0.6207, "step": 6822 }, { "epoch": 0.72, "grad_norm": 2.4647814076539816, "learning_rate": 1.9449941697724233e-06, "loss": 0.5815, "step": 6823 }, { "epoch": 0.72, "grad_norm": 2.2186953732210632, "learning_rate": 1.943645221710797e-06, "loss": 0.5623, "step": 6824 }, { "epoch": 0.72, "grad_norm": 2.3686123006295414, "learning_rate": 1.942296628727634e-06, "loss": 0.6202, "step": 6825 }, { "epoch": 0.72, "grad_norm": 2.4056025600068973, "learning_rate": 1.9409483909796096e-06, "loss": 0.5397, "step": 6826 }, { "epoch": 0.72, "grad_norm": 2.843753987586021, "learning_rate": 1.93960050862336e-06, "loss": 0.6794, "step": 6827 }, { "epoch": 0.72, "grad_norm": 3.0253380845711915, "learning_rate": 1.9382529818154765e-06, "loss": 0.6079, "step": 6828 }, { "epoch": 0.72, "grad_norm": 2.226590014372156, "learning_rate": 1.9369058107125094e-06, "loss": 0.5139, "step": 6829 }, { "epoch": 0.72, "grad_norm": 2.6079029240171265, "learning_rate": 1.935558995470971e-06, "loss": 0.5842, "step": 6830 }, { "epoch": 0.72, "grad_norm": 2.5961478591051663, "learning_rate": 1.9342125362473313e-06, "loss": 0.7159, "step": 6831 }, { "epoch": 0.72, "grad_norm": 4.508735121085522, "learning_rate": 1.9328664331980175e-06, "loss": 0.6256, "step": 6832 }, { "epoch": 0.72, "grad_norm": 3.0787485825721332, "learning_rate": 1.931520686479413e-06, "loss": 0.7014, "step": 6833 }, { "epoch": 0.72, "grad_norm": 2.2749815114493286, "learning_rate": 1.9301752962478646e-06, "loss": 0.6009, "step": 6834 }, { "epoch": 0.72, "grad_norm": 1.9794025506603408, "learning_rate": 1.9288302626596772e-06, "loss": 0.6428, "step": 6835 }, { "epoch": 0.72, "grad_norm": 5.039751673052599, "learning_rate": 1.9274855858711157e-06, "loss": 0.6417, "step": 6836 }, { "epoch": 0.72, "grad_norm": 2.8877557291619853, "learning_rate": 1.9261412660383927e-06, "loss": 0.6086, "step": 6837 }, { "epoch": 0.72, "grad_norm": 2.692848677613801, "learning_rate": 1.924797303317692e-06, "loss": 0.6517, "step": 6838 }, { "epoch": 0.72, "grad_norm": 2.4817682883772334, "learning_rate": 1.9234536978651514e-06, "loss": 0.6697, "step": 6839 }, { "epoch": 0.72, "grad_norm": 3.126381340774734, "learning_rate": 1.922110449836869e-06, "loss": 0.6342, "step": 6840 }, { "epoch": 0.72, "grad_norm": 2.565024160486094, "learning_rate": 1.920767559388896e-06, "loss": 0.629, "step": 6841 }, { "epoch": 0.72, "grad_norm": 2.208434714049697, "learning_rate": 1.919425026677246e-06, "loss": 0.6768, "step": 6842 }, { "epoch": 0.72, "grad_norm": 2.7771674907930843, "learning_rate": 1.9180828518578907e-06, "loss": 0.6414, "step": 6843 }, { "epoch": 0.72, "grad_norm": 2.972741578017247, "learning_rate": 1.9167410350867634e-06, "loss": 0.5638, "step": 6844 }, { "epoch": 0.72, "grad_norm": 2.408080059628293, "learning_rate": 1.9153995765197492e-06, "loss": 0.5538, "step": 6845 }, { "epoch": 0.72, "grad_norm": 2.463378183778841, "learning_rate": 1.9140584763126942e-06, "loss": 0.6613, "step": 6846 }, { "epoch": 0.72, "grad_norm": 3.6438629469274177, "learning_rate": 1.912717734621404e-06, "loss": 0.6311, "step": 6847 }, { "epoch": 0.72, "grad_norm": 2.4515690885324446, "learning_rate": 1.911377351601644e-06, "loss": 0.587, "step": 6848 }, { "epoch": 0.72, "grad_norm": 2.9571991246007077, "learning_rate": 1.910037327409136e-06, "loss": 0.5853, "step": 6849 }, { "epoch": 0.72, "grad_norm": 1.0579456833316514, "learning_rate": 1.9086976621995595e-06, "loss": 0.5346, "step": 6850 }, { "epoch": 0.72, "grad_norm": 2.360162048127551, "learning_rate": 1.9073583561285507e-06, "loss": 0.5493, "step": 6851 }, { "epoch": 0.72, "grad_norm": 2.9782767148226186, "learning_rate": 1.9060194093517082e-06, "loss": 0.6485, "step": 6852 }, { "epoch": 0.72, "grad_norm": 3.172949041709697, "learning_rate": 1.9046808220245888e-06, "loss": 0.6456, "step": 6853 }, { "epoch": 0.72, "grad_norm": 2.4126025762580485, "learning_rate": 1.903342594302704e-06, "loss": 0.6233, "step": 6854 }, { "epoch": 0.72, "grad_norm": 3.111218794053248, "learning_rate": 1.9020047263415226e-06, "loss": 0.564, "step": 6855 }, { "epoch": 0.72, "grad_norm": 2.112267233952676, "learning_rate": 1.9006672182964776e-06, "loss": 0.5294, "step": 6856 }, { "epoch": 0.72, "grad_norm": 2.1191102063033305, "learning_rate": 1.899330070322955e-06, "loss": 0.5833, "step": 6857 }, { "epoch": 0.72, "grad_norm": 2.8084622563861643, "learning_rate": 1.8979932825763058e-06, "loss": 0.6078, "step": 6858 }, { "epoch": 0.72, "grad_norm": 2.731001054966648, "learning_rate": 1.8966568552118265e-06, "loss": 0.6116, "step": 6859 }, { "epoch": 0.72, "grad_norm": 2.4118252942652885, "learning_rate": 1.895320788384783e-06, "loss": 0.6477, "step": 6860 }, { "epoch": 0.72, "grad_norm": 2.4880736901897076, "learning_rate": 1.8939850822503953e-06, "loss": 0.6904, "step": 6861 }, { "epoch": 0.72, "grad_norm": 2.2508815431544282, "learning_rate": 1.8926497369638435e-06, "loss": 0.5529, "step": 6862 }, { "epoch": 0.72, "grad_norm": 2.475826447334696, "learning_rate": 1.8913147526802633e-06, "loss": 0.5821, "step": 6863 }, { "epoch": 0.72, "grad_norm": 2.106833308302653, "learning_rate": 1.8899801295547476e-06, "loss": 0.6153, "step": 6864 }, { "epoch": 0.72, "grad_norm": 2.202917393863976, "learning_rate": 1.8886458677423497e-06, "loss": 0.5526, "step": 6865 }, { "epoch": 0.72, "grad_norm": 2.4715070549141216, "learning_rate": 1.8873119673980828e-06, "loss": 0.6657, "step": 6866 }, { "epoch": 0.72, "grad_norm": 2.967796086555414, "learning_rate": 1.8859784286769133e-06, "loss": 0.5738, "step": 6867 }, { "epoch": 0.72, "grad_norm": 2.265639351961703, "learning_rate": 1.8846452517337665e-06, "loss": 0.676, "step": 6868 }, { "epoch": 0.72, "grad_norm": 2.2231883474524, "learning_rate": 1.8833124367235294e-06, "loss": 0.6419, "step": 6869 }, { "epoch": 0.72, "grad_norm": 3.0688124326659016, "learning_rate": 1.8819799838010434e-06, "loss": 0.617, "step": 6870 }, { "epoch": 0.72, "grad_norm": 2.5790235530152508, "learning_rate": 1.8806478931211137e-06, "loss": 0.6266, "step": 6871 }, { "epoch": 0.72, "grad_norm": 2.248003195138114, "learning_rate": 1.8793161648384905e-06, "loss": 0.6454, "step": 6872 }, { "epoch": 0.72, "grad_norm": 2.4974510096976377, "learning_rate": 1.8779847991078943e-06, "loss": 0.6192, "step": 6873 }, { "epoch": 0.72, "grad_norm": 2.7581329611825183, "learning_rate": 1.8766537960839997e-06, "loss": 0.6176, "step": 6874 }, { "epoch": 0.72, "grad_norm": 3.5694350624696978, "learning_rate": 1.8753231559214402e-06, "loss": 0.4594, "step": 6875 }, { "epoch": 0.72, "grad_norm": 4.608950453790209, "learning_rate": 1.8739928787748035e-06, "loss": 0.5928, "step": 6876 }, { "epoch": 0.72, "grad_norm": 2.277145432538584, "learning_rate": 1.872662964798636e-06, "loss": 0.6286, "step": 6877 }, { "epoch": 0.72, "grad_norm": 2.794994081731649, "learning_rate": 1.8713334141474454e-06, "loss": 0.6851, "step": 6878 }, { "epoch": 0.72, "grad_norm": 2.249886037254742, "learning_rate": 1.8700042269756964e-06, "loss": 0.633, "step": 6879 }, { "epoch": 0.72, "grad_norm": 2.2734533778197794, "learning_rate": 1.8686754034378085e-06, "loss": 0.6386, "step": 6880 }, { "epoch": 0.72, "grad_norm": 2.128369477606803, "learning_rate": 1.867346943688158e-06, "loss": 0.6784, "step": 6881 }, { "epoch": 0.72, "grad_norm": 2.304439961783062, "learning_rate": 1.8660188478810848e-06, "loss": 0.5845, "step": 6882 }, { "epoch": 0.72, "grad_norm": 2.995582630490625, "learning_rate": 1.8646911161708824e-06, "loss": 0.6216, "step": 6883 }, { "epoch": 0.72, "grad_norm": 2.3675530017194064, "learning_rate": 1.8633637487118046e-06, "loss": 0.5987, "step": 6884 }, { "epoch": 0.72, "grad_norm": 2.9813523750802173, "learning_rate": 1.862036745658059e-06, "loss": 0.6377, "step": 6885 }, { "epoch": 0.72, "grad_norm": 2.6903692135653428, "learning_rate": 1.8607101071638117e-06, "loss": 0.6237, "step": 6886 }, { "epoch": 0.72, "grad_norm": 2.329812249626914, "learning_rate": 1.8593838333831893e-06, "loss": 0.6296, "step": 6887 }, { "epoch": 0.72, "grad_norm": 3.501685681461769, "learning_rate": 1.8580579244702762e-06, "loss": 0.6258, "step": 6888 }, { "epoch": 0.72, "grad_norm": 1.055180231542555, "learning_rate": 1.8567323805791116e-06, "loss": 0.539, "step": 6889 }, { "epoch": 0.73, "grad_norm": 2.6544723103678916, "learning_rate": 1.8554072018636903e-06, "loss": 0.5934, "step": 6890 }, { "epoch": 0.73, "grad_norm": 2.4845123889284735, "learning_rate": 1.8540823884779708e-06, "loss": 0.5886, "step": 6891 }, { "epoch": 0.73, "grad_norm": 3.1419531063459485, "learning_rate": 1.8527579405758672e-06, "loss": 0.5611, "step": 6892 }, { "epoch": 0.73, "grad_norm": 2.4840964920780957, "learning_rate": 1.851433858311248e-06, "loss": 0.5448, "step": 6893 }, { "epoch": 0.73, "grad_norm": 2.7386535448522418, "learning_rate": 1.8501101418379398e-06, "loss": 0.6005, "step": 6894 }, { "epoch": 0.73, "grad_norm": 2.879316470041651, "learning_rate": 1.8487867913097301e-06, "loss": 0.5817, "step": 6895 }, { "epoch": 0.73, "grad_norm": 1.0545439469322904, "learning_rate": 1.8474638068803612e-06, "loss": 0.5266, "step": 6896 }, { "epoch": 0.73, "grad_norm": 3.215830096903211, "learning_rate": 1.8461411887035368e-06, "loss": 0.5681, "step": 6897 }, { "epoch": 0.73, "grad_norm": 2.3679216916484243, "learning_rate": 1.8448189369329117e-06, "loss": 0.6838, "step": 6898 }, { "epoch": 0.73, "grad_norm": 2.423382212663329, "learning_rate": 1.8434970517221e-06, "loss": 0.6508, "step": 6899 }, { "epoch": 0.73, "grad_norm": 1.0003477907669218, "learning_rate": 1.8421755332246765e-06, "loss": 0.5632, "step": 6900 }, { "epoch": 0.73, "grad_norm": 2.3039328325659962, "learning_rate": 1.840854381594173e-06, "loss": 0.5946, "step": 6901 }, { "epoch": 0.73, "grad_norm": 2.9075536999442937, "learning_rate": 1.8395335969840749e-06, "loss": 0.6977, "step": 6902 }, { "epoch": 0.73, "grad_norm": 2.653021560915067, "learning_rate": 1.8382131795478265e-06, "loss": 0.6104, "step": 6903 }, { "epoch": 0.73, "grad_norm": 2.459736159668085, "learning_rate": 1.8368931294388303e-06, "loss": 0.6558, "step": 6904 }, { "epoch": 0.73, "grad_norm": 2.145122811581977, "learning_rate": 1.8355734468104476e-06, "loss": 0.6376, "step": 6905 }, { "epoch": 0.73, "grad_norm": 2.3194721091248622, "learning_rate": 1.8342541318159967e-06, "loss": 0.6556, "step": 6906 }, { "epoch": 0.73, "grad_norm": 2.380316773328887, "learning_rate": 1.8329351846087467e-06, "loss": 0.4877, "step": 6907 }, { "epoch": 0.73, "grad_norm": 2.059952338938169, "learning_rate": 1.8316166053419321e-06, "loss": 0.648, "step": 6908 }, { "epoch": 0.73, "grad_norm": 2.3302793061982854, "learning_rate": 1.8302983941687414e-06, "loss": 0.665, "step": 6909 }, { "epoch": 0.73, "grad_norm": 2.196962381204859, "learning_rate": 1.828980551242322e-06, "loss": 0.6074, "step": 6910 }, { "epoch": 0.73, "grad_norm": 2.3653436980498133, "learning_rate": 1.827663076715776e-06, "loss": 0.5736, "step": 6911 }, { "epoch": 0.73, "grad_norm": 2.3272350514303515, "learning_rate": 1.8263459707421617e-06, "loss": 0.5789, "step": 6912 }, { "epoch": 0.73, "grad_norm": 2.843158913797961, "learning_rate": 1.8250292334744979e-06, "loss": 0.6096, "step": 6913 }, { "epoch": 0.73, "grad_norm": 2.3445312077529357, "learning_rate": 1.8237128650657621e-06, "loss": 0.6748, "step": 6914 }, { "epoch": 0.73, "grad_norm": 2.7392105169159904, "learning_rate": 1.8223968656688834e-06, "loss": 0.5316, "step": 6915 }, { "epoch": 0.73, "grad_norm": 2.381012515570709, "learning_rate": 1.8210812354367501e-06, "loss": 0.6245, "step": 6916 }, { "epoch": 0.73, "grad_norm": 2.0098706319158968, "learning_rate": 1.8197659745222095e-06, "loss": 0.5341, "step": 6917 }, { "epoch": 0.73, "grad_norm": 2.3320907079732485, "learning_rate": 1.818451083078065e-06, "loss": 0.6104, "step": 6918 }, { "epoch": 0.73, "grad_norm": 3.243267962515471, "learning_rate": 1.817136561257078e-06, "loss": 0.5681, "step": 6919 }, { "epoch": 0.73, "grad_norm": 3.244715320509986, "learning_rate": 1.8158224092119648e-06, "loss": 0.5684, "step": 6920 }, { "epoch": 0.73, "grad_norm": 2.9394817301430924, "learning_rate": 1.8145086270953977e-06, "loss": 0.576, "step": 6921 }, { "epoch": 0.73, "grad_norm": 2.421731912227469, "learning_rate": 1.8131952150600101e-06, "loss": 0.573, "step": 6922 }, { "epoch": 0.73, "grad_norm": 2.7932028278457115, "learning_rate": 1.8118821732583918e-06, "loss": 0.5806, "step": 6923 }, { "epoch": 0.73, "grad_norm": 2.9079436194870394, "learning_rate": 1.8105695018430873e-06, "loss": 0.6072, "step": 6924 }, { "epoch": 0.73, "grad_norm": 2.583809450277425, "learning_rate": 1.8092572009665965e-06, "loss": 0.628, "step": 6925 }, { "epoch": 0.73, "grad_norm": 2.1674044267667516, "learning_rate": 1.80794527078138e-06, "loss": 0.5968, "step": 6926 }, { "epoch": 0.73, "grad_norm": 2.6185013262869354, "learning_rate": 1.8066337114398568e-06, "loss": 0.5478, "step": 6927 }, { "epoch": 0.73, "grad_norm": 3.158397579484625, "learning_rate": 1.8053225230943982e-06, "loss": 0.6001, "step": 6928 }, { "epoch": 0.73, "grad_norm": 3.264667477188976, "learning_rate": 1.8040117058973317e-06, "loss": 0.6074, "step": 6929 }, { "epoch": 0.73, "grad_norm": 2.7602307379116064, "learning_rate": 1.802701260000947e-06, "loss": 0.6428, "step": 6930 }, { "epoch": 0.73, "grad_norm": 2.986327201725227, "learning_rate": 1.8013911855574874e-06, "loss": 0.5278, "step": 6931 }, { "epoch": 0.73, "grad_norm": 2.0385916995999906, "learning_rate": 1.8000814827191548e-06, "loss": 0.6268, "step": 6932 }, { "epoch": 0.73, "grad_norm": 2.1047064832640023, "learning_rate": 1.7987721516381056e-06, "loss": 0.6004, "step": 6933 }, { "epoch": 0.73, "grad_norm": 2.4994668971667364, "learning_rate": 1.7974631924664533e-06, "loss": 0.6405, "step": 6934 }, { "epoch": 0.73, "grad_norm": 2.388551949225777, "learning_rate": 1.7961546053562684e-06, "loss": 0.647, "step": 6935 }, { "epoch": 0.73, "grad_norm": 2.348701907039752, "learning_rate": 1.7948463904595826e-06, "loss": 0.5663, "step": 6936 }, { "epoch": 0.73, "grad_norm": 2.3266351864310137, "learning_rate": 1.793538547928378e-06, "loss": 0.604, "step": 6937 }, { "epoch": 0.73, "grad_norm": 2.150907502807278, "learning_rate": 1.7922310779145941e-06, "loss": 0.5916, "step": 6938 }, { "epoch": 0.73, "grad_norm": 3.2575410386870502, "learning_rate": 1.7909239805701307e-06, "loss": 0.6358, "step": 6939 }, { "epoch": 0.73, "grad_norm": 1.0203290093238349, "learning_rate": 1.7896172560468427e-06, "loss": 0.5392, "step": 6940 }, { "epoch": 0.73, "grad_norm": 2.455935523599585, "learning_rate": 1.7883109044965452e-06, "loss": 0.6663, "step": 6941 }, { "epoch": 0.73, "grad_norm": 2.292636831216748, "learning_rate": 1.7870049260709992e-06, "loss": 0.6031, "step": 6942 }, { "epoch": 0.73, "grad_norm": 2.740746619986824, "learning_rate": 1.785699320921933e-06, "loss": 0.5899, "step": 6943 }, { "epoch": 0.73, "grad_norm": 4.438088048263127, "learning_rate": 1.784394089201028e-06, "loss": 0.609, "step": 6944 }, { "epoch": 0.73, "grad_norm": 3.2919583772455385, "learning_rate": 1.7830892310599245e-06, "loss": 0.5254, "step": 6945 }, { "epoch": 0.73, "grad_norm": 3.0375360511034284, "learning_rate": 1.7817847466502146e-06, "loss": 0.5886, "step": 6946 }, { "epoch": 0.73, "grad_norm": 2.278371165535222, "learning_rate": 1.780480636123449e-06, "loss": 0.6285, "step": 6947 }, { "epoch": 0.73, "grad_norm": 2.6474986848344844, "learning_rate": 1.7791768996311355e-06, "loss": 0.6188, "step": 6948 }, { "epoch": 0.73, "grad_norm": 4.053270599492303, "learning_rate": 1.7778735373247414e-06, "loss": 0.6231, "step": 6949 }, { "epoch": 0.73, "grad_norm": 2.6023055630386622, "learning_rate": 1.7765705493556857e-06, "loss": 0.5683, "step": 6950 }, { "epoch": 0.73, "grad_norm": 2.180205593322918, "learning_rate": 1.7752679358753433e-06, "loss": 0.6944, "step": 6951 }, { "epoch": 0.73, "grad_norm": 3.3913259162961666, "learning_rate": 1.7739656970350505e-06, "loss": 0.6438, "step": 6952 }, { "epoch": 0.73, "grad_norm": 2.504035889002391, "learning_rate": 1.7726638329860978e-06, "loss": 0.6856, "step": 6953 }, { "epoch": 0.73, "grad_norm": 2.764765266914022, "learning_rate": 1.7713623438797335e-06, "loss": 0.5888, "step": 6954 }, { "epoch": 0.73, "grad_norm": 3.2754835937489934, "learning_rate": 1.7700612298671587e-06, "loss": 0.6877, "step": 6955 }, { "epoch": 0.73, "grad_norm": 2.449529265194134, "learning_rate": 1.7687604910995321e-06, "loss": 0.6649, "step": 6956 }, { "epoch": 0.73, "grad_norm": 2.4212481065052907, "learning_rate": 1.7674601277279707e-06, "loss": 0.618, "step": 6957 }, { "epoch": 0.73, "grad_norm": 5.736474279576478, "learning_rate": 1.7661601399035494e-06, "loss": 0.5901, "step": 6958 }, { "epoch": 0.73, "grad_norm": 2.691986553144693, "learning_rate": 1.7648605277772945e-06, "loss": 0.6313, "step": 6959 }, { "epoch": 0.73, "grad_norm": 2.6214863033679943, "learning_rate": 1.7635612915001903e-06, "loss": 0.7215, "step": 6960 }, { "epoch": 0.73, "grad_norm": 2.9600672604153533, "learning_rate": 1.7622624312231795e-06, "loss": 0.6593, "step": 6961 }, { "epoch": 0.73, "grad_norm": 2.8520335479613363, "learning_rate": 1.7609639470971618e-06, "loss": 0.5712, "step": 6962 }, { "epoch": 0.73, "grad_norm": 2.2886996974373814, "learning_rate": 1.7596658392729897e-06, "loss": 0.5834, "step": 6963 }, { "epoch": 0.73, "grad_norm": 2.8110720875652033, "learning_rate": 1.7583681079014713e-06, "loss": 0.6194, "step": 6964 }, { "epoch": 0.73, "grad_norm": 2.229998449327696, "learning_rate": 1.7570707531333763e-06, "loss": 0.5953, "step": 6965 }, { "epoch": 0.73, "grad_norm": 2.6339048670728284, "learning_rate": 1.7557737751194264e-06, "loss": 0.6276, "step": 6966 }, { "epoch": 0.73, "grad_norm": 3.126572883969871, "learning_rate": 1.7544771740103034e-06, "loss": 0.6219, "step": 6967 }, { "epoch": 0.73, "grad_norm": 3.532068840298241, "learning_rate": 1.7531809499566399e-06, "loss": 0.6469, "step": 6968 }, { "epoch": 0.73, "grad_norm": 3.1935164634899, "learning_rate": 1.7518851031090267e-06, "loss": 0.612, "step": 6969 }, { "epoch": 0.73, "grad_norm": 2.541841612435397, "learning_rate": 1.7505896336180128e-06, "loss": 0.5458, "step": 6970 }, { "epoch": 0.73, "grad_norm": 2.3737229858880187, "learning_rate": 1.7492945416341034e-06, "loss": 0.6253, "step": 6971 }, { "epoch": 0.73, "grad_norm": 3.2573282697164827, "learning_rate": 1.7479998273077581e-06, "loss": 0.633, "step": 6972 }, { "epoch": 0.73, "grad_norm": 2.7028931132859264, "learning_rate": 1.74670549078939e-06, "loss": 0.6755, "step": 6973 }, { "epoch": 0.73, "grad_norm": 2.702801368704138, "learning_rate": 1.7454115322293735e-06, "loss": 0.5617, "step": 6974 }, { "epoch": 0.73, "grad_norm": 2.3197807420820613, "learning_rate": 1.7441179517780376e-06, "loss": 0.6572, "step": 6975 }, { "epoch": 0.73, "grad_norm": 2.674381378155758, "learning_rate": 1.7428247495856699e-06, "loss": 0.6158, "step": 6976 }, { "epoch": 0.73, "grad_norm": 2.8755462439432047, "learning_rate": 1.7415319258025032e-06, "loss": 0.697, "step": 6977 }, { "epoch": 0.73, "grad_norm": 3.0812031503014983, "learning_rate": 1.7402394805787388e-06, "loss": 0.6277, "step": 6978 }, { "epoch": 0.73, "grad_norm": 11.5464107755622, "learning_rate": 1.7389474140645279e-06, "loss": 0.5804, "step": 6979 }, { "epoch": 0.73, "grad_norm": 2.338776528858335, "learning_rate": 1.7376557264099813e-06, "loss": 0.5742, "step": 6980 }, { "epoch": 0.73, "grad_norm": 2.5226545710029087, "learning_rate": 1.7363644177651623e-06, "loss": 0.5308, "step": 6981 }, { "epoch": 0.73, "grad_norm": 3.538706474138878, "learning_rate": 1.7350734882800891e-06, "loss": 0.6669, "step": 6982 }, { "epoch": 0.73, "grad_norm": 2.553740639868528, "learning_rate": 1.7337829381047405e-06, "loss": 0.6483, "step": 6983 }, { "epoch": 0.73, "grad_norm": 2.4491443310423726, "learning_rate": 1.7324927673890495e-06, "loss": 0.5876, "step": 6984 }, { "epoch": 0.74, "grad_norm": 2.3165156688725244, "learning_rate": 1.7312029762829042e-06, "loss": 0.5532, "step": 6985 }, { "epoch": 0.74, "grad_norm": 2.3703050030038884, "learning_rate": 1.729913564936146e-06, "loss": 0.646, "step": 6986 }, { "epoch": 0.74, "grad_norm": 2.457421486560212, "learning_rate": 1.728624533498577e-06, "loss": 0.6851, "step": 6987 }, { "epoch": 0.74, "grad_norm": 3.7152345384682492, "learning_rate": 1.7273358821199527e-06, "loss": 0.6291, "step": 6988 }, { "epoch": 0.74, "grad_norm": 2.928945773179569, "learning_rate": 1.7260476109499885e-06, "loss": 0.6238, "step": 6989 }, { "epoch": 0.74, "grad_norm": 2.0786975252518975, "learning_rate": 1.7247597201383459e-06, "loss": 0.6212, "step": 6990 }, { "epoch": 0.74, "grad_norm": 3.2322246793230462, "learning_rate": 1.7234722098346512e-06, "loss": 0.5755, "step": 6991 }, { "epoch": 0.74, "grad_norm": 2.385974734862681, "learning_rate": 1.7221850801884838e-06, "loss": 0.6729, "step": 6992 }, { "epoch": 0.74, "grad_norm": 2.2783917987848175, "learning_rate": 1.7208983313493804e-06, "loss": 0.5833, "step": 6993 }, { "epoch": 0.74, "grad_norm": 3.1121321239656368, "learning_rate": 1.7196119634668296e-06, "loss": 0.6074, "step": 6994 }, { "epoch": 0.74, "grad_norm": 2.4956465433518087, "learning_rate": 1.7183259766902765e-06, "loss": 0.5979, "step": 6995 }, { "epoch": 0.74, "grad_norm": 2.2925119083577363, "learning_rate": 1.7170403711691252e-06, "loss": 0.5927, "step": 6996 }, { "epoch": 0.74, "grad_norm": 4.1901529302786145, "learning_rate": 1.7157551470527356e-06, "loss": 0.5835, "step": 6997 }, { "epoch": 0.74, "grad_norm": 2.1103141114003794, "learning_rate": 1.7144703044904186e-06, "loss": 0.647, "step": 6998 }, { "epoch": 0.74, "grad_norm": 2.2812632686596874, "learning_rate": 1.7131858436314431e-06, "loss": 0.5589, "step": 6999 }, { "epoch": 0.74, "grad_norm": 3.893898766295147, "learning_rate": 1.7119017646250346e-06, "loss": 0.5332, "step": 7000 }, { "epoch": 0.74, "grad_norm": 2.333401918284876, "learning_rate": 1.7106180676203743e-06, "loss": 0.644, "step": 7001 }, { "epoch": 0.74, "grad_norm": 2.2776944296611905, "learning_rate": 1.7093347527666e-06, "loss": 0.6315, "step": 7002 }, { "epoch": 0.74, "grad_norm": 2.4878470361517535, "learning_rate": 1.708051820212801e-06, "loss": 0.5557, "step": 7003 }, { "epoch": 0.74, "grad_norm": 0.9538072931011493, "learning_rate": 1.7067692701080247e-06, "loss": 0.556, "step": 7004 }, { "epoch": 0.74, "grad_norm": 3.936334663090163, "learning_rate": 1.7054871026012748e-06, "loss": 0.6332, "step": 7005 }, { "epoch": 0.74, "grad_norm": 2.569749185859605, "learning_rate": 1.7042053178415114e-06, "loss": 0.597, "step": 7006 }, { "epoch": 0.74, "grad_norm": 2.2112215878359844, "learning_rate": 1.7029239159776468e-06, "loss": 0.6083, "step": 7007 }, { "epoch": 0.74, "grad_norm": 2.9065918760961362, "learning_rate": 1.7016428971585491e-06, "loss": 0.6281, "step": 7008 }, { "epoch": 0.74, "grad_norm": 2.0826011656455226, "learning_rate": 1.700362261533045e-06, "loss": 0.5959, "step": 7009 }, { "epoch": 0.74, "grad_norm": 2.3233212826323664, "learning_rate": 1.699082009249915e-06, "loss": 0.6065, "step": 7010 }, { "epoch": 0.74, "grad_norm": 0.9508372060278683, "learning_rate": 1.6978021404578986e-06, "loss": 0.5127, "step": 7011 }, { "epoch": 0.74, "grad_norm": 2.3456337360777826, "learning_rate": 1.6965226553056807e-06, "loss": 0.6536, "step": 7012 }, { "epoch": 0.74, "grad_norm": 2.740110007869177, "learning_rate": 1.6952435539419114e-06, "loss": 0.6519, "step": 7013 }, { "epoch": 0.74, "grad_norm": 2.285157779061375, "learning_rate": 1.6939648365151929e-06, "loss": 0.6668, "step": 7014 }, { "epoch": 0.74, "grad_norm": 2.286468423276322, "learning_rate": 1.692686503174084e-06, "loss": 0.5346, "step": 7015 }, { "epoch": 0.74, "grad_norm": 3.544160329777678, "learning_rate": 1.6914085540670972e-06, "loss": 0.6256, "step": 7016 }, { "epoch": 0.74, "grad_norm": 2.6105527595546683, "learning_rate": 1.6901309893426987e-06, "loss": 0.5112, "step": 7017 }, { "epoch": 0.74, "grad_norm": 2.824511640017726, "learning_rate": 1.688853809149314e-06, "loss": 0.6006, "step": 7018 }, { "epoch": 0.74, "grad_norm": 2.421375917201986, "learning_rate": 1.6875770136353237e-06, "loss": 0.5986, "step": 7019 }, { "epoch": 0.74, "grad_norm": 2.775614276256507, "learning_rate": 1.686300602949061e-06, "loss": 0.641, "step": 7020 }, { "epoch": 0.74, "grad_norm": 2.8464697028833026, "learning_rate": 1.6850245772388136e-06, "loss": 0.6148, "step": 7021 }, { "epoch": 0.74, "grad_norm": 2.6951000358839816, "learning_rate": 1.6837489366528275e-06, "loss": 0.5996, "step": 7022 }, { "epoch": 0.74, "grad_norm": 2.2776028181000036, "learning_rate": 1.6824736813393044e-06, "loss": 0.5767, "step": 7023 }, { "epoch": 0.74, "grad_norm": 3.049024430013574, "learning_rate": 1.6811988114464024e-06, "loss": 0.5973, "step": 7024 }, { "epoch": 0.74, "grad_norm": 2.259489650346381, "learning_rate": 1.6799243271222248e-06, "loss": 0.6279, "step": 7025 }, { "epoch": 0.74, "grad_norm": 2.7237784454005047, "learning_rate": 1.678650228514842e-06, "loss": 0.6341, "step": 7026 }, { "epoch": 0.74, "grad_norm": 2.492400316742001, "learning_rate": 1.677376515772275e-06, "loss": 0.5853, "step": 7027 }, { "epoch": 0.74, "grad_norm": 7.370329142260077, "learning_rate": 1.6761031890425007e-06, "loss": 0.6075, "step": 7028 }, { "epoch": 0.74, "grad_norm": 2.1958100915780823, "learning_rate": 1.6748302484734496e-06, "loss": 0.5913, "step": 7029 }, { "epoch": 0.74, "grad_norm": 7.685029763578393, "learning_rate": 1.6735576942130066e-06, "loss": 0.5991, "step": 7030 }, { "epoch": 0.74, "grad_norm": 2.0884424565510606, "learning_rate": 1.672285526409015e-06, "loss": 0.5875, "step": 7031 }, { "epoch": 0.74, "grad_norm": 4.244543977179305, "learning_rate": 1.6710137452092728e-06, "loss": 0.647, "step": 7032 }, { "epoch": 0.74, "grad_norm": 2.8555347187741154, "learning_rate": 1.6697423507615307e-06, "loss": 0.6764, "step": 7033 }, { "epoch": 0.74, "grad_norm": 2.943847721173569, "learning_rate": 1.6684713432134935e-06, "loss": 0.637, "step": 7034 }, { "epoch": 0.74, "grad_norm": 2.9179340893616694, "learning_rate": 1.6672007227128256e-06, "loss": 0.6128, "step": 7035 }, { "epoch": 0.74, "grad_norm": 2.3128758180540188, "learning_rate": 1.6659304894071437e-06, "loss": 0.5963, "step": 7036 }, { "epoch": 0.74, "grad_norm": 2.279530033918136, "learning_rate": 1.6646606434440216e-06, "loss": 0.5943, "step": 7037 }, { "epoch": 0.74, "grad_norm": 2.4218365961921924, "learning_rate": 1.6633911849709838e-06, "loss": 0.5705, "step": 7038 }, { "epoch": 0.74, "grad_norm": 2.4252321627282565, "learning_rate": 1.6621221141355114e-06, "loss": 0.6823, "step": 7039 }, { "epoch": 0.74, "grad_norm": 2.815209527008716, "learning_rate": 1.6608534310850432e-06, "loss": 0.6023, "step": 7040 }, { "epoch": 0.74, "grad_norm": 3.2696793670858457, "learning_rate": 1.6595851359669723e-06, "loss": 0.6344, "step": 7041 }, { "epoch": 0.74, "grad_norm": 3.048168501203653, "learning_rate": 1.6583172289286447e-06, "loss": 0.6739, "step": 7042 }, { "epoch": 0.74, "grad_norm": 2.345623717215056, "learning_rate": 1.6570497101173595e-06, "loss": 0.5041, "step": 7043 }, { "epoch": 0.74, "grad_norm": 2.656636229228406, "learning_rate": 1.6557825796803755e-06, "loss": 0.5783, "step": 7044 }, { "epoch": 0.74, "grad_norm": 2.047758363020559, "learning_rate": 1.6545158377649063e-06, "loss": 0.6101, "step": 7045 }, { "epoch": 0.74, "grad_norm": 2.4301367175325446, "learning_rate": 1.6532494845181157e-06, "loss": 0.6579, "step": 7046 }, { "epoch": 0.74, "grad_norm": 2.4798435707976627, "learning_rate": 1.6519835200871243e-06, "loss": 0.6268, "step": 7047 }, { "epoch": 0.74, "grad_norm": 2.2104318045494717, "learning_rate": 1.6507179446190091e-06, "loss": 0.5432, "step": 7048 }, { "epoch": 0.74, "grad_norm": 4.720179561419772, "learning_rate": 1.649452758260801e-06, "loss": 0.5763, "step": 7049 }, { "epoch": 0.74, "grad_norm": 2.8768611462675713, "learning_rate": 1.648187961159488e-06, "loss": 0.5999, "step": 7050 }, { "epoch": 0.74, "grad_norm": 2.9574492500850096, "learning_rate": 1.6469235534620087e-06, "loss": 0.6756, "step": 7051 }, { "epoch": 0.74, "grad_norm": 2.4478650282919654, "learning_rate": 1.6456595353152566e-06, "loss": 0.6178, "step": 7052 }, { "epoch": 0.74, "grad_norm": 3.4521440344822647, "learning_rate": 1.644395906866083e-06, "loss": 0.633, "step": 7053 }, { "epoch": 0.74, "grad_norm": 2.3845856771271645, "learning_rate": 1.6431326682612947e-06, "loss": 0.6037, "step": 7054 }, { "epoch": 0.74, "grad_norm": 1.9893993557633762, "learning_rate": 1.6418698196476497e-06, "loss": 0.5822, "step": 7055 }, { "epoch": 0.74, "grad_norm": 2.502449539506666, "learning_rate": 1.6406073611718593e-06, "loss": 0.6611, "step": 7056 }, { "epoch": 0.74, "grad_norm": 2.886805930275283, "learning_rate": 1.6393452929805947e-06, "loss": 0.6163, "step": 7057 }, { "epoch": 0.74, "grad_norm": 2.324217731618782, "learning_rate": 1.63808361522048e-06, "loss": 0.6323, "step": 7058 }, { "epoch": 0.74, "grad_norm": 2.4841967196947907, "learning_rate": 1.6368223280380951e-06, "loss": 0.6249, "step": 7059 }, { "epoch": 0.74, "grad_norm": 2.491514087882754, "learning_rate": 1.6355614315799673e-06, "loss": 0.5953, "step": 7060 }, { "epoch": 0.74, "grad_norm": 4.09150523009862, "learning_rate": 1.6343009259925863e-06, "loss": 0.663, "step": 7061 }, { "epoch": 0.74, "grad_norm": 2.642962343711459, "learning_rate": 1.633040811422395e-06, "loss": 0.6604, "step": 7062 }, { "epoch": 0.74, "grad_norm": 2.222557968930457, "learning_rate": 1.6317810880157908e-06, "loss": 0.5329, "step": 7063 }, { "epoch": 0.74, "grad_norm": 2.7336550978700083, "learning_rate": 1.6305217559191232e-06, "loss": 0.5325, "step": 7064 }, { "epoch": 0.74, "grad_norm": 2.1083210565732484, "learning_rate": 1.629262815278696e-06, "loss": 0.5392, "step": 7065 }, { "epoch": 0.74, "grad_norm": 2.720575901544298, "learning_rate": 1.6280042662407714e-06, "loss": 0.6113, "step": 7066 }, { "epoch": 0.74, "grad_norm": 2.16404863181636, "learning_rate": 1.626746108951565e-06, "loss": 0.5831, "step": 7067 }, { "epoch": 0.74, "grad_norm": 2.4115471190550117, "learning_rate": 1.6254883435572449e-06, "loss": 0.59, "step": 7068 }, { "epoch": 0.74, "grad_norm": 3.0583452239282183, "learning_rate": 1.6242309702039327e-06, "loss": 0.6492, "step": 7069 }, { "epoch": 0.74, "grad_norm": 3.2718246037744843, "learning_rate": 1.6229739890377084e-06, "loss": 0.5989, "step": 7070 }, { "epoch": 0.74, "grad_norm": 2.7419306661734812, "learning_rate": 1.6217174002046032e-06, "loss": 0.5111, "step": 7071 }, { "epoch": 0.74, "grad_norm": 3.0816295246885725, "learning_rate": 1.6204612038506068e-06, "loss": 0.639, "step": 7072 }, { "epoch": 0.74, "grad_norm": 2.2970090731001003, "learning_rate": 1.6192054001216585e-06, "loss": 0.6099, "step": 7073 }, { "epoch": 0.74, "grad_norm": 2.7790697280686962, "learning_rate": 1.6179499891636524e-06, "loss": 0.6486, "step": 7074 }, { "epoch": 0.74, "grad_norm": 2.6588858988791624, "learning_rate": 1.61669497112244e-06, "loss": 0.6991, "step": 7075 }, { "epoch": 0.74, "grad_norm": 2.4764296283414584, "learning_rate": 1.6154403461438273e-06, "loss": 0.6273, "step": 7076 }, { "epoch": 0.74, "grad_norm": 2.658819160311856, "learning_rate": 1.6141861143735716e-06, "loss": 0.5699, "step": 7077 }, { "epoch": 0.74, "grad_norm": 2.315234814191497, "learning_rate": 1.612932275957384e-06, "loss": 0.6531, "step": 7078 }, { "epoch": 0.74, "grad_norm": 4.409181763534464, "learning_rate": 1.6116788310409332e-06, "loss": 0.5654, "step": 7079 }, { "epoch": 0.75, "grad_norm": 2.573872111742514, "learning_rate": 1.6104257797698431e-06, "loss": 0.6174, "step": 7080 }, { "epoch": 0.75, "grad_norm": 2.1867723143982847, "learning_rate": 1.6091731222896877e-06, "loss": 0.6744, "step": 7081 }, { "epoch": 0.75, "grad_norm": 2.436773932857918, "learning_rate": 1.6079208587459954e-06, "loss": 0.6701, "step": 7082 }, { "epoch": 0.75, "grad_norm": 2.6539143787518165, "learning_rate": 1.6066689892842525e-06, "loss": 0.5861, "step": 7083 }, { "epoch": 0.75, "grad_norm": 2.9851718926572968, "learning_rate": 1.6054175140498967e-06, "loss": 0.6667, "step": 7084 }, { "epoch": 0.75, "grad_norm": 2.130011313816518, "learning_rate": 1.6041664331883233e-06, "loss": 0.6442, "step": 7085 }, { "epoch": 0.75, "grad_norm": 2.8378492504021393, "learning_rate": 1.6029157468448775e-06, "loss": 0.6244, "step": 7086 }, { "epoch": 0.75, "grad_norm": 2.2783353748879542, "learning_rate": 1.601665455164858e-06, "loss": 0.6466, "step": 7087 }, { "epoch": 0.75, "grad_norm": 3.5318917740950577, "learning_rate": 1.6004155582935232e-06, "loss": 0.7144, "step": 7088 }, { "epoch": 0.75, "grad_norm": 2.405042680383137, "learning_rate": 1.599166056376083e-06, "loss": 0.554, "step": 7089 }, { "epoch": 0.75, "grad_norm": 2.537663239731998, "learning_rate": 1.5979169495576991e-06, "loss": 0.6213, "step": 7090 }, { "epoch": 0.75, "grad_norm": 2.34724684013334, "learning_rate": 1.5966682379834887e-06, "loss": 0.5951, "step": 7091 }, { "epoch": 0.75, "grad_norm": 2.683042293367126, "learning_rate": 1.5954199217985233e-06, "loss": 0.6249, "step": 7092 }, { "epoch": 0.75, "grad_norm": 2.962353374507347, "learning_rate": 1.5941720011478323e-06, "loss": 0.5715, "step": 7093 }, { "epoch": 0.75, "grad_norm": 2.6239346592286377, "learning_rate": 1.5929244761763924e-06, "loss": 0.5831, "step": 7094 }, { "epoch": 0.75, "grad_norm": 2.338124980340621, "learning_rate": 1.591677347029137e-06, "loss": 0.576, "step": 7095 }, { "epoch": 0.75, "grad_norm": 3.8481926089677714, "learning_rate": 1.5904306138509545e-06, "loss": 0.6755, "step": 7096 }, { "epoch": 0.75, "grad_norm": 2.540089155983873, "learning_rate": 1.5891842767866872e-06, "loss": 0.5622, "step": 7097 }, { "epoch": 0.75, "grad_norm": 2.4998455761237133, "learning_rate": 1.587938335981133e-06, "loss": 0.5839, "step": 7098 }, { "epoch": 0.75, "grad_norm": 3.281273941072614, "learning_rate": 1.5866927915790391e-06, "loss": 0.6369, "step": 7099 }, { "epoch": 0.75, "grad_norm": 2.401257413792379, "learning_rate": 1.585447643725108e-06, "loss": 0.6927, "step": 7100 }, { "epoch": 0.75, "grad_norm": 2.885694164271074, "learning_rate": 1.5842028925640002e-06, "loss": 0.5983, "step": 7101 }, { "epoch": 0.75, "grad_norm": 2.249464206134762, "learning_rate": 1.5829585382403273e-06, "loss": 0.552, "step": 7102 }, { "epoch": 0.75, "grad_norm": 2.2266693760673335, "learning_rate": 1.5817145808986534e-06, "loss": 0.6486, "step": 7103 }, { "epoch": 0.75, "grad_norm": 2.6048570897327545, "learning_rate": 1.5804710206834972e-06, "loss": 0.6163, "step": 7104 }, { "epoch": 0.75, "grad_norm": 2.52361431641286, "learning_rate": 1.5792278577393327e-06, "loss": 0.5478, "step": 7105 }, { "epoch": 0.75, "grad_norm": 3.0440511092050317, "learning_rate": 1.577985092210587e-06, "loss": 0.648, "step": 7106 }, { "epoch": 0.75, "grad_norm": 2.5613872240202467, "learning_rate": 1.5767427242416433e-06, "loss": 0.6007, "step": 7107 }, { "epoch": 0.75, "grad_norm": 2.4150239405907303, "learning_rate": 1.575500753976834e-06, "loss": 0.5187, "step": 7108 }, { "epoch": 0.75, "grad_norm": 3.236040550601962, "learning_rate": 1.5742591815604463e-06, "loss": 0.6156, "step": 7109 }, { "epoch": 0.75, "grad_norm": 2.8872460453894053, "learning_rate": 1.5730180071367247e-06, "loss": 0.6328, "step": 7110 }, { "epoch": 0.75, "grad_norm": 2.329877740414494, "learning_rate": 1.5717772308498651e-06, "loss": 0.6179, "step": 7111 }, { "epoch": 0.75, "grad_norm": 2.635293780550099, "learning_rate": 1.5705368528440178e-06, "loss": 0.6769, "step": 7112 }, { "epoch": 0.75, "grad_norm": 2.5346783855495905, "learning_rate": 1.569296873263283e-06, "loss": 0.4746, "step": 7113 }, { "epoch": 0.75, "grad_norm": 2.1425884362151524, "learning_rate": 1.5680572922517206e-06, "loss": 0.5694, "step": 7114 }, { "epoch": 0.75, "grad_norm": 2.1871638635512443, "learning_rate": 1.5668181099533431e-06, "loss": 0.5599, "step": 7115 }, { "epoch": 0.75, "grad_norm": 2.6126133591306533, "learning_rate": 1.5655793265121132e-06, "loss": 0.6576, "step": 7116 }, { "epoch": 0.75, "grad_norm": 3.6620967020652584, "learning_rate": 1.5643409420719475e-06, "loss": 0.7142, "step": 7117 }, { "epoch": 0.75, "grad_norm": 2.539598366314506, "learning_rate": 1.5631029567767197e-06, "loss": 0.6241, "step": 7118 }, { "epoch": 0.75, "grad_norm": 3.873095508009267, "learning_rate": 1.5618653707702553e-06, "loss": 0.6067, "step": 7119 }, { "epoch": 0.75, "grad_norm": 3.2962610827522454, "learning_rate": 1.560628184196335e-06, "loss": 0.5866, "step": 7120 }, { "epoch": 0.75, "grad_norm": 2.418665449353965, "learning_rate": 1.55939139719869e-06, "loss": 0.5084, "step": 7121 }, { "epoch": 0.75, "grad_norm": 54.21175226317471, "learning_rate": 1.5581550099210053e-06, "loss": 0.5915, "step": 7122 }, { "epoch": 0.75, "grad_norm": 7.5068547209902965, "learning_rate": 1.5569190225069226e-06, "loss": 0.6405, "step": 7123 }, { "epoch": 0.75, "grad_norm": 2.7309517743093017, "learning_rate": 1.5556834351000356e-06, "loss": 0.6173, "step": 7124 }, { "epoch": 0.75, "grad_norm": 1.0711077498083168, "learning_rate": 1.554448247843891e-06, "loss": 0.5749, "step": 7125 }, { "epoch": 0.75, "grad_norm": 2.4429328414022162, "learning_rate": 1.5532134608819876e-06, "loss": 0.5991, "step": 7126 }, { "epoch": 0.75, "grad_norm": 4.816964217476705, "learning_rate": 1.55197907435778e-06, "loss": 0.6666, "step": 7127 }, { "epoch": 0.75, "grad_norm": 2.535197145555227, "learning_rate": 1.5507450884146784e-06, "loss": 0.5548, "step": 7128 }, { "epoch": 0.75, "grad_norm": 2.319066286953146, "learning_rate": 1.5495115031960418e-06, "loss": 0.6441, "step": 7129 }, { "epoch": 0.75, "grad_norm": 2.8047667310373727, "learning_rate": 1.5482783188451822e-06, "loss": 0.6417, "step": 7130 }, { "epoch": 0.75, "grad_norm": 3.9106777141475417, "learning_rate": 1.5470455355053704e-06, "loss": 0.6389, "step": 7131 }, { "epoch": 0.75, "grad_norm": 2.6056116741953828, "learning_rate": 1.5458131533198261e-06, "loss": 0.6165, "step": 7132 }, { "epoch": 0.75, "grad_norm": 2.9784665393315755, "learning_rate": 1.5445811724317267e-06, "loss": 0.584, "step": 7133 }, { "epoch": 0.75, "grad_norm": 2.057328186390915, "learning_rate": 1.5433495929841979e-06, "loss": 0.538, "step": 7134 }, { "epoch": 0.75, "grad_norm": 3.899774453962374, "learning_rate": 1.5421184151203194e-06, "loss": 0.6381, "step": 7135 }, { "epoch": 0.75, "grad_norm": 2.302231817534724, "learning_rate": 1.5408876389831278e-06, "loss": 0.5251, "step": 7136 }, { "epoch": 0.75, "grad_norm": 2.779434622195697, "learning_rate": 1.5396572647156126e-06, "loss": 0.6052, "step": 7137 }, { "epoch": 0.75, "grad_norm": 2.6994213766514155, "learning_rate": 1.538427292460714e-06, "loss": 0.6085, "step": 7138 }, { "epoch": 0.75, "grad_norm": 2.504683013737572, "learning_rate": 1.5371977223613238e-06, "loss": 0.6257, "step": 7139 }, { "epoch": 0.75, "grad_norm": 2.2855288271378664, "learning_rate": 1.535968554560293e-06, "loss": 0.6142, "step": 7140 }, { "epoch": 0.75, "grad_norm": 2.4654873006023608, "learning_rate": 1.5347397892004234e-06, "loss": 0.6748, "step": 7141 }, { "epoch": 0.75, "grad_norm": 2.2166955270138478, "learning_rate": 1.533511426424466e-06, "loss": 0.5971, "step": 7142 }, { "epoch": 0.75, "grad_norm": 2.5878170941910934, "learning_rate": 1.532283466375133e-06, "loss": 0.5781, "step": 7143 }, { "epoch": 0.75, "grad_norm": 3.088438430805986, "learning_rate": 1.5310559091950805e-06, "loss": 0.5924, "step": 7144 }, { "epoch": 0.75, "grad_norm": 3.455905487244043, "learning_rate": 1.5298287550269248e-06, "loss": 0.6324, "step": 7145 }, { "epoch": 0.75, "grad_norm": 2.296776703964561, "learning_rate": 1.5286020040132344e-06, "loss": 0.6318, "step": 7146 }, { "epoch": 0.75, "grad_norm": 3.502122104084021, "learning_rate": 1.5273756562965286e-06, "loss": 0.5917, "step": 7147 }, { "epoch": 0.75, "grad_norm": 2.813679350238381, "learning_rate": 1.5261497120192791e-06, "loss": 0.7005, "step": 7148 }, { "epoch": 0.75, "grad_norm": 2.386900010136105, "learning_rate": 1.5249241713239148e-06, "loss": 0.6208, "step": 7149 }, { "epoch": 0.75, "grad_norm": 2.097816570449662, "learning_rate": 1.5236990343528157e-06, "loss": 0.5714, "step": 7150 }, { "epoch": 0.75, "grad_norm": 2.8641415092943023, "learning_rate": 1.522474301248314e-06, "loss": 0.6228, "step": 7151 }, { "epoch": 0.75, "grad_norm": 2.953458669418631, "learning_rate": 1.5212499721526942e-06, "loss": 0.635, "step": 7152 }, { "epoch": 0.75, "grad_norm": 2.293638293253605, "learning_rate": 1.5200260472081962e-06, "loss": 0.6122, "step": 7153 }, { "epoch": 0.75, "grad_norm": 2.165393072045299, "learning_rate": 1.5188025265570127e-06, "loss": 0.5967, "step": 7154 }, { "epoch": 0.75, "grad_norm": 2.6653448901727215, "learning_rate": 1.51757941034129e-06, "loss": 0.5992, "step": 7155 }, { "epoch": 0.75, "grad_norm": 2.590367390269278, "learning_rate": 1.5163566987031246e-06, "loss": 0.5784, "step": 7156 }, { "epoch": 0.75, "grad_norm": 2.274011024448419, "learning_rate": 1.5151343917845662e-06, "loss": 0.5486, "step": 7157 }, { "epoch": 0.75, "grad_norm": 2.640547755917543, "learning_rate": 1.513912489727621e-06, "loss": 0.6622, "step": 7158 }, { "epoch": 0.75, "grad_norm": 3.435856702185333, "learning_rate": 1.5126909926742461e-06, "loss": 0.5952, "step": 7159 }, { "epoch": 0.75, "grad_norm": 2.327306617131621, "learning_rate": 1.511469900766352e-06, "loss": 0.5737, "step": 7160 }, { "epoch": 0.75, "grad_norm": 2.954132593391894, "learning_rate": 1.510249214145798e-06, "loss": 0.6354, "step": 7161 }, { "epoch": 0.75, "grad_norm": 2.435417507636399, "learning_rate": 1.5090289329544028e-06, "loss": 0.658, "step": 7162 }, { "epoch": 0.75, "grad_norm": 2.9854856200768136, "learning_rate": 1.5078090573339365e-06, "loss": 0.6282, "step": 7163 }, { "epoch": 0.75, "grad_norm": 3.106673158220274, "learning_rate": 1.506589587426119e-06, "loss": 0.5409, "step": 7164 }, { "epoch": 0.75, "grad_norm": 2.4494465926620497, "learning_rate": 1.5053705233726228e-06, "loss": 0.6402, "step": 7165 }, { "epoch": 0.75, "grad_norm": 2.1863048410990804, "learning_rate": 1.5041518653150777e-06, "loss": 0.6386, "step": 7166 }, { "epoch": 0.75, "grad_norm": 2.2078949677416864, "learning_rate": 1.5029336133950635e-06, "loss": 0.5456, "step": 7167 }, { "epoch": 0.75, "grad_norm": 3.738221773697226, "learning_rate": 1.5017157677541144e-06, "loss": 0.6146, "step": 7168 }, { "epoch": 0.75, "grad_norm": 2.250673304439142, "learning_rate": 1.500498328533715e-06, "loss": 0.5573, "step": 7169 }, { "epoch": 0.75, "grad_norm": 2.5150783835660797, "learning_rate": 1.4992812958753023e-06, "loss": 0.5967, "step": 7170 }, { "epoch": 0.75, "grad_norm": 0.9829915284477175, "learning_rate": 1.4980646699202684e-06, "loss": 0.5587, "step": 7171 }, { "epoch": 0.75, "grad_norm": 2.1533475910931696, "learning_rate": 1.4968484508099606e-06, "loss": 0.6385, "step": 7172 }, { "epoch": 0.75, "grad_norm": 5.851990435700211, "learning_rate": 1.4956326386856723e-06, "loss": 0.5869, "step": 7173 }, { "epoch": 0.75, "grad_norm": 2.7719426441646275, "learning_rate": 1.494417233688653e-06, "loss": 0.5874, "step": 7174 }, { "epoch": 0.76, "grad_norm": 3.102882961445121, "learning_rate": 1.4932022359601056e-06, "loss": 0.6421, "step": 7175 }, { "epoch": 0.76, "grad_norm": 2.6384157210222536, "learning_rate": 1.4919876456411875e-06, "loss": 0.5523, "step": 7176 }, { "epoch": 0.76, "grad_norm": 2.378591334938083, "learning_rate": 1.490773462873002e-06, "loss": 0.544, "step": 7177 }, { "epoch": 0.76, "grad_norm": 20.641360117403863, "learning_rate": 1.4895596877966128e-06, "loss": 0.636, "step": 7178 }, { "epoch": 0.76, "grad_norm": 2.4236725068487304, "learning_rate": 1.4883463205530302e-06, "loss": 0.6411, "step": 7179 }, { "epoch": 0.76, "grad_norm": 2.2183036139750714, "learning_rate": 1.4871333612832206e-06, "loss": 0.5712, "step": 7180 }, { "epoch": 0.76, "grad_norm": 3.050796166901124, "learning_rate": 1.4859208101281041e-06, "loss": 0.6366, "step": 7181 }, { "epoch": 0.76, "grad_norm": 2.4152995939569166, "learning_rate": 1.48470866722855e-06, "loss": 0.5498, "step": 7182 }, { "epoch": 0.76, "grad_norm": 2.362918250869478, "learning_rate": 1.4834969327253795e-06, "loss": 0.5972, "step": 7183 }, { "epoch": 0.76, "grad_norm": 2.0541545853510184, "learning_rate": 1.48228560675937e-06, "loss": 0.5475, "step": 7184 }, { "epoch": 0.76, "grad_norm": 4.0605689354251995, "learning_rate": 1.481074689471252e-06, "loss": 0.5439, "step": 7185 }, { "epoch": 0.76, "grad_norm": 2.1804727796434964, "learning_rate": 1.479864181001704e-06, "loss": 0.6155, "step": 7186 }, { "epoch": 0.76, "grad_norm": 4.353882130124197, "learning_rate": 1.4786540814913586e-06, "loss": 0.6792, "step": 7187 }, { "epoch": 0.76, "grad_norm": 2.4609607660129162, "learning_rate": 1.4774443910808023e-06, "loss": 0.6776, "step": 7188 }, { "epoch": 0.76, "grad_norm": 2.6666982763784812, "learning_rate": 1.476235109910576e-06, "loss": 0.6191, "step": 7189 }, { "epoch": 0.76, "grad_norm": 3.56167731467775, "learning_rate": 1.4750262381211665e-06, "loss": 0.5887, "step": 7190 }, { "epoch": 0.76, "grad_norm": 2.44278557698414, "learning_rate": 1.4738177758530208e-06, "loss": 0.6259, "step": 7191 }, { "epoch": 0.76, "grad_norm": 2.5250241812484906, "learning_rate": 1.4726097232465314e-06, "loss": 0.6089, "step": 7192 }, { "epoch": 0.76, "grad_norm": 3.2400336695589744, "learning_rate": 1.471402080442047e-06, "loss": 0.5666, "step": 7193 }, { "epoch": 0.76, "grad_norm": 2.7151462386306657, "learning_rate": 1.470194847579871e-06, "loss": 0.6301, "step": 7194 }, { "epoch": 0.76, "grad_norm": 2.698877450197644, "learning_rate": 1.4689880248002537e-06, "loss": 0.7084, "step": 7195 }, { "epoch": 0.76, "grad_norm": 2.6410464096856665, "learning_rate": 1.467781612243399e-06, "loss": 0.6245, "step": 7196 }, { "epoch": 0.76, "grad_norm": 2.612323546310781, "learning_rate": 1.466575610049466e-06, "loss": 0.6783, "step": 7197 }, { "epoch": 0.76, "grad_norm": 2.7178474296193564, "learning_rate": 1.4653700183585663e-06, "loss": 0.6122, "step": 7198 }, { "epoch": 0.76, "grad_norm": 3.512024045937146, "learning_rate": 1.4641648373107598e-06, "loss": 0.6147, "step": 7199 }, { "epoch": 0.76, "grad_norm": 2.0834058077279685, "learning_rate": 1.4629600670460603e-06, "loss": 0.5514, "step": 7200 }, { "epoch": 0.76, "grad_norm": 2.216935664488474, "learning_rate": 1.4617557077044352e-06, "loss": 0.6227, "step": 7201 }, { "epoch": 0.76, "grad_norm": 2.46166081455518, "learning_rate": 1.4605517594258046e-06, "loss": 0.5871, "step": 7202 }, { "epoch": 0.76, "grad_norm": 2.1948644668696375, "learning_rate": 1.4593482223500406e-06, "loss": 0.6353, "step": 7203 }, { "epoch": 0.76, "grad_norm": 4.33193407924704, "learning_rate": 1.4581450966169648e-06, "loss": 0.6246, "step": 7204 }, { "epoch": 0.76, "grad_norm": 2.624255396763174, "learning_rate": 1.4569423823663515e-06, "loss": 0.6982, "step": 7205 }, { "epoch": 0.76, "grad_norm": 2.3813097303259028, "learning_rate": 1.4557400797379306e-06, "loss": 0.5593, "step": 7206 }, { "epoch": 0.76, "grad_norm": 2.8170072633637067, "learning_rate": 1.4545381888713833e-06, "loss": 0.622, "step": 7207 }, { "epoch": 0.76, "grad_norm": 1.0227331919960505, "learning_rate": 1.45333670990634e-06, "loss": 0.4883, "step": 7208 }, { "epoch": 0.76, "grad_norm": 2.9590588104321056, "learning_rate": 1.452135642982384e-06, "loss": 0.6183, "step": 7209 }, { "epoch": 0.76, "grad_norm": 2.33117855582943, "learning_rate": 1.450934988239054e-06, "loss": 0.6134, "step": 7210 }, { "epoch": 0.76, "grad_norm": 2.4557426055942786, "learning_rate": 1.4497347458158384e-06, "loss": 0.62, "step": 7211 }, { "epoch": 0.76, "grad_norm": 2.926982164970039, "learning_rate": 1.4485349158521756e-06, "loss": 0.6036, "step": 7212 }, { "epoch": 0.76, "grad_norm": 2.2092246121938164, "learning_rate": 1.447335498487462e-06, "loss": 0.6053, "step": 7213 }, { "epoch": 0.76, "grad_norm": 2.4017310344462994, "learning_rate": 1.446136493861039e-06, "loss": 0.6659, "step": 7214 }, { "epoch": 0.76, "grad_norm": 2.1199053957284346, "learning_rate": 1.4449379021122045e-06, "loss": 0.6202, "step": 7215 }, { "epoch": 0.76, "grad_norm": 2.3367095552631816, "learning_rate": 1.4437397233802098e-06, "loss": 0.629, "step": 7216 }, { "epoch": 0.76, "grad_norm": 3.0011471861240357, "learning_rate": 1.4425419578042538e-06, "loss": 0.6302, "step": 7217 }, { "epoch": 0.76, "grad_norm": 2.595981973547213, "learning_rate": 1.4413446055234882e-06, "loss": 0.6064, "step": 7218 }, { "epoch": 0.76, "grad_norm": 3.0100131890890243, "learning_rate": 1.4401476666770191e-06, "loss": 0.6153, "step": 7219 }, { "epoch": 0.76, "grad_norm": 2.068773777984486, "learning_rate": 1.4389511414039053e-06, "loss": 0.5228, "step": 7220 }, { "epoch": 0.76, "grad_norm": 2.183378387130152, "learning_rate": 1.4377550298431536e-06, "loss": 0.6687, "step": 7221 }, { "epoch": 0.76, "grad_norm": 2.8674171438993237, "learning_rate": 1.436559332133724e-06, "loss": 0.6141, "step": 7222 }, { "epoch": 0.76, "grad_norm": 2.8843731419145637, "learning_rate": 1.4353640484145304e-06, "loss": 0.5749, "step": 7223 }, { "epoch": 0.76, "grad_norm": 2.27701809442643, "learning_rate": 1.4341691788244383e-06, "loss": 0.6422, "step": 7224 }, { "epoch": 0.76, "grad_norm": 2.5322604425389112, "learning_rate": 1.4329747235022624e-06, "loss": 0.5397, "step": 7225 }, { "epoch": 0.76, "grad_norm": 2.431501916595479, "learning_rate": 1.431780682586773e-06, "loss": 0.6515, "step": 7226 }, { "epoch": 0.76, "grad_norm": 2.3394101864352983, "learning_rate": 1.4305870562166873e-06, "loss": 0.6069, "step": 7227 }, { "epoch": 0.76, "grad_norm": 2.2242293964368156, "learning_rate": 1.4293938445306798e-06, "loss": 0.5442, "step": 7228 }, { "epoch": 0.76, "grad_norm": 2.5833298123991604, "learning_rate": 1.4282010476673758e-06, "loss": 0.6004, "step": 7229 }, { "epoch": 0.76, "grad_norm": 2.6359655886818913, "learning_rate": 1.427008665765348e-06, "loss": 0.6834, "step": 7230 }, { "epoch": 0.76, "grad_norm": 2.585971432337667, "learning_rate": 1.4258166989631245e-06, "loss": 0.6375, "step": 7231 }, { "epoch": 0.76, "grad_norm": 3.257480641444835, "learning_rate": 1.4246251473991845e-06, "loss": 0.6148, "step": 7232 }, { "epoch": 0.76, "grad_norm": 2.6802149842143397, "learning_rate": 1.423434011211961e-06, "loss": 0.6633, "step": 7233 }, { "epoch": 0.76, "grad_norm": 2.460163635030121, "learning_rate": 1.4222432905398353e-06, "loss": 0.5807, "step": 7234 }, { "epoch": 0.76, "grad_norm": 2.22647375488727, "learning_rate": 1.4210529855211403e-06, "loss": 0.4943, "step": 7235 }, { "epoch": 0.76, "grad_norm": 4.121150139420541, "learning_rate": 1.4198630962941639e-06, "loss": 0.6249, "step": 7236 }, { "epoch": 0.76, "grad_norm": 2.946918461716052, "learning_rate": 1.4186736229971455e-06, "loss": 0.6211, "step": 7237 }, { "epoch": 0.76, "grad_norm": 4.528975031658922, "learning_rate": 1.4174845657682712e-06, "loss": 0.6419, "step": 7238 }, { "epoch": 0.76, "grad_norm": 2.5787193861812496, "learning_rate": 1.4162959247456854e-06, "loss": 0.6206, "step": 7239 }, { "epoch": 0.76, "grad_norm": 3.406544835370247, "learning_rate": 1.4151077000674784e-06, "loss": 0.624, "step": 7240 }, { "epoch": 0.76, "grad_norm": 4.207698162326682, "learning_rate": 1.4139198918716956e-06, "loss": 0.5709, "step": 7241 }, { "epoch": 0.76, "grad_norm": 2.2463252191495924, "learning_rate": 1.4127325002963355e-06, "loss": 0.5574, "step": 7242 }, { "epoch": 0.76, "grad_norm": 2.3509763553384597, "learning_rate": 1.4115455254793436e-06, "loss": 0.5806, "step": 7243 }, { "epoch": 0.76, "grad_norm": 2.52195463891043, "learning_rate": 1.4103589675586176e-06, "loss": 0.6281, "step": 7244 }, { "epoch": 0.76, "grad_norm": 2.6057177351757734, "learning_rate": 1.4091728266720106e-06, "loss": 0.5374, "step": 7245 }, { "epoch": 0.76, "grad_norm": 2.378426724174707, "learning_rate": 1.4079871029573254e-06, "loss": 0.5312, "step": 7246 }, { "epoch": 0.76, "grad_norm": 2.4658224720117308, "learning_rate": 1.4068017965523146e-06, "loss": 0.6043, "step": 7247 }, { "epoch": 0.76, "grad_norm": 2.367796137825628, "learning_rate": 1.4056169075946846e-06, "loss": 0.5532, "step": 7248 }, { "epoch": 0.76, "grad_norm": 2.4057618387618436, "learning_rate": 1.4044324362220912e-06, "loss": 0.608, "step": 7249 }, { "epoch": 0.76, "grad_norm": 3.7601186688014003, "learning_rate": 1.4032483825721432e-06, "loss": 0.6427, "step": 7250 }, { "epoch": 0.76, "grad_norm": 2.5498571521744045, "learning_rate": 1.4020647467824028e-06, "loss": 0.6887, "step": 7251 }, { "epoch": 0.76, "grad_norm": 0.9344224790730087, "learning_rate": 1.4008815289903798e-06, "loss": 0.5365, "step": 7252 }, { "epoch": 0.76, "grad_norm": 2.219760527227203, "learning_rate": 1.3996987293335345e-06, "loss": 0.6433, "step": 7253 }, { "epoch": 0.76, "grad_norm": 8.310538446761164, "learning_rate": 1.3985163479492842e-06, "loss": 0.6225, "step": 7254 }, { "epoch": 0.76, "grad_norm": 2.8517117856482517, "learning_rate": 1.3973343849749948e-06, "loss": 0.6371, "step": 7255 }, { "epoch": 0.76, "grad_norm": 2.661140852133978, "learning_rate": 1.3961528405479824e-06, "loss": 0.6281, "step": 7256 }, { "epoch": 0.76, "grad_norm": 2.2805811042491286, "learning_rate": 1.3949717148055136e-06, "loss": 0.5957, "step": 7257 }, { "epoch": 0.76, "grad_norm": 3.300483005629338, "learning_rate": 1.39379100788481e-06, "loss": 0.6551, "step": 7258 }, { "epoch": 0.76, "grad_norm": 2.485309336234544, "learning_rate": 1.3926107199230442e-06, "loss": 0.6081, "step": 7259 }, { "epoch": 0.76, "grad_norm": 8.341174494875231, "learning_rate": 1.3914308510573354e-06, "loss": 0.6515, "step": 7260 }, { "epoch": 0.76, "grad_norm": 2.8732963511207283, "learning_rate": 1.3902514014247608e-06, "loss": 0.5541, "step": 7261 }, { "epoch": 0.76, "grad_norm": 2.2043168478650896, "learning_rate": 1.3890723711623421e-06, "loss": 0.588, "step": 7262 }, { "epoch": 0.76, "grad_norm": 2.853557533816863, "learning_rate": 1.3878937604070568e-06, "loss": 0.5689, "step": 7263 }, { "epoch": 0.76, "grad_norm": 2.8253444135108845, "learning_rate": 1.386715569295835e-06, "loss": 0.5745, "step": 7264 }, { "epoch": 0.76, "grad_norm": 0.9796152518485929, "learning_rate": 1.3855377979655533e-06, "loss": 0.5749, "step": 7265 }, { "epoch": 0.76, "grad_norm": 3.6160882171024125, "learning_rate": 1.3843604465530398e-06, "loss": 0.5622, "step": 7266 }, { "epoch": 0.76, "grad_norm": 2.0041115107011476, "learning_rate": 1.3831835151950784e-06, "loss": 0.5528, "step": 7267 }, { "epoch": 0.76, "grad_norm": 2.16044720788834, "learning_rate": 1.3820070040284023e-06, "loss": 0.6886, "step": 7268 }, { "epoch": 0.76, "grad_norm": 2.8645110276055927, "learning_rate": 1.380830913189694e-06, "loss": 0.6323, "step": 7269 }, { "epoch": 0.77, "grad_norm": 2.0345813564664548, "learning_rate": 1.3796552428155868e-06, "loss": 0.5463, "step": 7270 }, { "epoch": 0.77, "grad_norm": 2.4861411344231317, "learning_rate": 1.378479993042668e-06, "loss": 0.6813, "step": 7271 }, { "epoch": 0.77, "grad_norm": 2.5548781593021594, "learning_rate": 1.3773051640074764e-06, "loss": 0.6686, "step": 7272 }, { "epoch": 0.77, "grad_norm": 2.7242847584411165, "learning_rate": 1.3761307558464975e-06, "loss": 0.635, "step": 7273 }, { "epoch": 0.77, "grad_norm": 4.079673277666802, "learning_rate": 1.3749567686961728e-06, "loss": 0.6109, "step": 7274 }, { "epoch": 0.77, "grad_norm": 0.9119319275996398, "learning_rate": 1.3737832026928905e-06, "loss": 0.5083, "step": 7275 }, { "epoch": 0.77, "grad_norm": 2.645571565188374, "learning_rate": 1.3726100579729935e-06, "loss": 0.6791, "step": 7276 }, { "epoch": 0.77, "grad_norm": 2.280058640830768, "learning_rate": 1.3714373346727754e-06, "loss": 0.6213, "step": 7277 }, { "epoch": 0.77, "grad_norm": 2.3684177527062755, "learning_rate": 1.3702650329284794e-06, "loss": 0.6239, "step": 7278 }, { "epoch": 0.77, "grad_norm": 3.5713000588210955, "learning_rate": 1.3690931528762974e-06, "loss": 0.6513, "step": 7279 }, { "epoch": 0.77, "grad_norm": 2.191606619177731, "learning_rate": 1.3679216946523771e-06, "loss": 0.6179, "step": 7280 }, { "epoch": 0.77, "grad_norm": 2.520087458009411, "learning_rate": 1.3667506583928163e-06, "loss": 0.6273, "step": 7281 }, { "epoch": 0.77, "grad_norm": 2.8211254894412083, "learning_rate": 1.3655800442336597e-06, "loss": 0.6629, "step": 7282 }, { "epoch": 0.77, "grad_norm": 2.5218498030614986, "learning_rate": 1.3644098523109096e-06, "loss": 0.5631, "step": 7283 }, { "epoch": 0.77, "grad_norm": 2.187549795477403, "learning_rate": 1.3632400827605113e-06, "loss": 0.552, "step": 7284 }, { "epoch": 0.77, "grad_norm": 4.067246657340486, "learning_rate": 1.3620707357183694e-06, "loss": 0.6166, "step": 7285 }, { "epoch": 0.77, "grad_norm": 15.55712958329288, "learning_rate": 1.3609018113203314e-06, "loss": 0.6316, "step": 7286 }, { "epoch": 0.77, "grad_norm": 2.5699486322531566, "learning_rate": 1.3597333097022031e-06, "loss": 0.6579, "step": 7287 }, { "epoch": 0.77, "grad_norm": 2.606631331064434, "learning_rate": 1.3585652309997344e-06, "loss": 0.6924, "step": 7288 }, { "epoch": 0.77, "grad_norm": 2.4639655618590646, "learning_rate": 1.3573975753486313e-06, "loss": 0.6486, "step": 7289 }, { "epoch": 0.77, "grad_norm": 3.494505254711063, "learning_rate": 1.356230342884549e-06, "loss": 0.6138, "step": 7290 }, { "epoch": 0.77, "grad_norm": 2.6073670766020154, "learning_rate": 1.3550635337430928e-06, "loss": 0.5951, "step": 7291 }, { "epoch": 0.77, "grad_norm": 3.2580430689462734, "learning_rate": 1.3538971480598167e-06, "loss": 0.6027, "step": 7292 }, { "epoch": 0.77, "grad_norm": 0.9517406444625436, "learning_rate": 1.3527311859702308e-06, "loss": 0.5322, "step": 7293 }, { "epoch": 0.77, "grad_norm": 2.860206476602887, "learning_rate": 1.3515656476097937e-06, "loss": 0.5832, "step": 7294 }, { "epoch": 0.77, "grad_norm": 4.9771138110934405, "learning_rate": 1.350400533113912e-06, "loss": 0.543, "step": 7295 }, { "epoch": 0.77, "grad_norm": 2.582876236962234, "learning_rate": 1.3492358426179475e-06, "loss": 0.6746, "step": 7296 }, { "epoch": 0.77, "grad_norm": 2.0636423377041937, "learning_rate": 1.3480715762572078e-06, "loss": 0.657, "step": 7297 }, { "epoch": 0.77, "grad_norm": 2.220396192129797, "learning_rate": 1.3469077341669579e-06, "loss": 0.6404, "step": 7298 }, { "epoch": 0.77, "grad_norm": 2.8263887939999264, "learning_rate": 1.3457443164824053e-06, "loss": 0.6004, "step": 7299 }, { "epoch": 0.77, "grad_norm": 2.70882407444034, "learning_rate": 1.3445813233387167e-06, "loss": 0.7198, "step": 7300 }, { "epoch": 0.77, "grad_norm": 3.3517602576307355, "learning_rate": 1.3434187548710014e-06, "loss": 0.6133, "step": 7301 }, { "epoch": 0.77, "grad_norm": 2.8385464767154356, "learning_rate": 1.3422566112143248e-06, "loss": 0.527, "step": 7302 }, { "epoch": 0.77, "grad_norm": 2.7137520222257563, "learning_rate": 1.3410948925037037e-06, "loss": 0.7291, "step": 7303 }, { "epoch": 0.77, "grad_norm": 2.238965834669248, "learning_rate": 1.3399335988741007e-06, "loss": 0.6578, "step": 7304 }, { "epoch": 0.77, "grad_norm": 0.9827149232414178, "learning_rate": 1.338772730460431e-06, "loss": 0.5438, "step": 7305 }, { "epoch": 0.77, "grad_norm": 4.982687189717289, "learning_rate": 1.3376122873975616e-06, "loss": 0.5661, "step": 7306 }, { "epoch": 0.77, "grad_norm": 2.394229778749516, "learning_rate": 1.3364522698203114e-06, "loss": 0.5627, "step": 7307 }, { "epoch": 0.77, "grad_norm": 2.5676561438034358, "learning_rate": 1.3352926778634446e-06, "loss": 0.5664, "step": 7308 }, { "epoch": 0.77, "grad_norm": 4.69224823273842, "learning_rate": 1.3341335116616822e-06, "loss": 0.5727, "step": 7309 }, { "epoch": 0.77, "grad_norm": 2.8984898812420727, "learning_rate": 1.3329747713496904e-06, "loss": 0.6014, "step": 7310 }, { "epoch": 0.77, "grad_norm": 2.5133785385178657, "learning_rate": 1.331816457062089e-06, "loss": 0.6487, "step": 7311 }, { "epoch": 0.77, "grad_norm": 2.550922385813062, "learning_rate": 1.3306585689334494e-06, "loss": 0.605, "step": 7312 }, { "epoch": 0.77, "grad_norm": 2.6330342158074487, "learning_rate": 1.3295011070982906e-06, "loss": 0.5916, "step": 7313 }, { "epoch": 0.77, "grad_norm": 2.906335153096584, "learning_rate": 1.3283440716910812e-06, "loss": 0.6149, "step": 7314 }, { "epoch": 0.77, "grad_norm": 4.095159349488047, "learning_rate": 1.327187462846244e-06, "loss": 0.5979, "step": 7315 }, { "epoch": 0.77, "grad_norm": 2.8365869518598976, "learning_rate": 1.3260312806981517e-06, "loss": 0.4815, "step": 7316 }, { "epoch": 0.77, "grad_norm": 2.072771877951047, "learning_rate": 1.3248755253811236e-06, "loss": 0.5718, "step": 7317 }, { "epoch": 0.77, "grad_norm": 3.5814898012658896, "learning_rate": 1.3237201970294344e-06, "loss": 0.5606, "step": 7318 }, { "epoch": 0.77, "grad_norm": 2.290088791951938, "learning_rate": 1.3225652957773044e-06, "loss": 0.6147, "step": 7319 }, { "epoch": 0.77, "grad_norm": 2.485404940783879, "learning_rate": 1.3214108217589095e-06, "loss": 0.5559, "step": 7320 }, { "epoch": 0.77, "grad_norm": 2.890814837354043, "learning_rate": 1.3202567751083701e-06, "loss": 0.5864, "step": 7321 }, { "epoch": 0.77, "grad_norm": 2.3304011547001933, "learning_rate": 1.3191031559597628e-06, "loss": 0.686, "step": 7322 }, { "epoch": 0.77, "grad_norm": 2.939817245282438, "learning_rate": 1.3179499644471088e-06, "loss": 0.7083, "step": 7323 }, { "epoch": 0.77, "grad_norm": 1.9322448250042434, "learning_rate": 1.3167972007043844e-06, "loss": 0.617, "step": 7324 }, { "epoch": 0.77, "grad_norm": 2.2730836852993885, "learning_rate": 1.3156448648655163e-06, "loss": 0.6175, "step": 7325 }, { "epoch": 0.77, "grad_norm": 2.216691599440806, "learning_rate": 1.3144929570643767e-06, "loss": 0.5699, "step": 7326 }, { "epoch": 0.77, "grad_norm": 2.2709102711271103, "learning_rate": 1.3133414774347903e-06, "loss": 0.6783, "step": 7327 }, { "epoch": 0.77, "grad_norm": 3.7008961073983926, "learning_rate": 1.3121904261105339e-06, "loss": 0.5866, "step": 7328 }, { "epoch": 0.77, "grad_norm": 1.0182432463584763, "learning_rate": 1.3110398032253346e-06, "loss": 0.5087, "step": 7329 }, { "epoch": 0.77, "grad_norm": 1.0497274511645123, "learning_rate": 1.3098896089128666e-06, "loss": 0.5744, "step": 7330 }, { "epoch": 0.77, "grad_norm": 2.2533986618656225, "learning_rate": 1.3087398433067577e-06, "loss": 0.5694, "step": 7331 }, { "epoch": 0.77, "grad_norm": 2.4413558422766193, "learning_rate": 1.307590506540582e-06, "loss": 0.616, "step": 7332 }, { "epoch": 0.77, "grad_norm": 2.3644581867426897, "learning_rate": 1.3064415987478691e-06, "loss": 0.6208, "step": 7333 }, { "epoch": 0.77, "grad_norm": 0.9694715957645057, "learning_rate": 1.3052931200620926e-06, "loss": 0.5448, "step": 7334 }, { "epoch": 0.77, "grad_norm": 2.989251780651652, "learning_rate": 1.3041450706166831e-06, "loss": 0.5686, "step": 7335 }, { "epoch": 0.77, "grad_norm": 2.568697403594937, "learning_rate": 1.3029974505450137e-06, "loss": 0.6258, "step": 7336 }, { "epoch": 0.77, "grad_norm": 2.4006514658503697, "learning_rate": 1.301850259980414e-06, "loss": 0.587, "step": 7337 }, { "epoch": 0.77, "grad_norm": 2.4391008775614704, "learning_rate": 1.3007034990561619e-06, "loss": 0.6487, "step": 7338 }, { "epoch": 0.77, "grad_norm": 2.27176792056776, "learning_rate": 1.2995571679054835e-06, "loss": 0.6879, "step": 7339 }, { "epoch": 0.77, "grad_norm": 2.332459416723728, "learning_rate": 1.2984112666615555e-06, "loss": 0.5647, "step": 7340 }, { "epoch": 0.77, "grad_norm": 1.0350097122071702, "learning_rate": 1.2972657954575064e-06, "loss": 0.541, "step": 7341 }, { "epoch": 0.77, "grad_norm": 2.3128014660034273, "learning_rate": 1.2961207544264149e-06, "loss": 0.6494, "step": 7342 }, { "epoch": 0.77, "grad_norm": 2.281808135510517, "learning_rate": 1.2949761437013059e-06, "loss": 0.6048, "step": 7343 }, { "epoch": 0.77, "grad_norm": 2.495322499540691, "learning_rate": 1.2938319634151597e-06, "loss": 0.5942, "step": 7344 }, { "epoch": 0.77, "grad_norm": 3.914464189765621, "learning_rate": 1.2926882137009012e-06, "loss": 0.66, "step": 7345 }, { "epoch": 0.77, "grad_norm": 3.098291543647439, "learning_rate": 1.2915448946914106e-06, "loss": 0.5978, "step": 7346 }, { "epoch": 0.77, "grad_norm": 2.435837683057105, "learning_rate": 1.2904020065195127e-06, "loss": 0.5946, "step": 7347 }, { "epoch": 0.77, "grad_norm": 2.4918968126223633, "learning_rate": 1.2892595493179876e-06, "loss": 0.6562, "step": 7348 }, { "epoch": 0.77, "grad_norm": 2.2303147723923877, "learning_rate": 1.2881175232195604e-06, "loss": 0.6037, "step": 7349 }, { "epoch": 0.77, "grad_norm": 2.4085569825882223, "learning_rate": 1.2869759283569088e-06, "loss": 0.5633, "step": 7350 }, { "epoch": 0.77, "grad_norm": 2.9401916376736774, "learning_rate": 1.2858347648626623e-06, "loss": 0.6793, "step": 7351 }, { "epoch": 0.77, "grad_norm": 2.620281409590404, "learning_rate": 1.2846940328693952e-06, "loss": 0.5221, "step": 7352 }, { "epoch": 0.77, "grad_norm": 2.460645653228923, "learning_rate": 1.2835537325096364e-06, "loss": 0.6427, "step": 7353 }, { "epoch": 0.77, "grad_norm": 2.1645773174699605, "learning_rate": 1.2824138639158607e-06, "loss": 0.5951, "step": 7354 }, { "epoch": 0.77, "grad_norm": 1.940681938546237, "learning_rate": 1.2812744272204969e-06, "loss": 0.6173, "step": 7355 }, { "epoch": 0.77, "grad_norm": 1.9852641803573394, "learning_rate": 1.2801354225559194e-06, "loss": 0.634, "step": 7356 }, { "epoch": 0.77, "grad_norm": 2.408422665342694, "learning_rate": 1.2789968500544563e-06, "loss": 0.5832, "step": 7357 }, { "epoch": 0.77, "grad_norm": 2.54563268747217, "learning_rate": 1.277858709848382e-06, "loss": 0.5927, "step": 7358 }, { "epoch": 0.77, "grad_norm": 3.892292877622211, "learning_rate": 1.2767210020699234e-06, "loss": 0.6332, "step": 7359 }, { "epoch": 0.77, "grad_norm": 0.9690186601279351, "learning_rate": 1.2755837268512566e-06, "loss": 0.5541, "step": 7360 }, { "epoch": 0.77, "grad_norm": 2.3973108957180673, "learning_rate": 1.2744468843245066e-06, "loss": 0.6973, "step": 7361 }, { "epoch": 0.77, "grad_norm": 9.445703072300839, "learning_rate": 1.2733104746217468e-06, "loss": 0.5102, "step": 7362 }, { "epoch": 0.77, "grad_norm": 2.4460880394351956, "learning_rate": 1.2721744978750028e-06, "loss": 0.6275, "step": 7363 }, { "epoch": 0.77, "grad_norm": 2.5731172106909193, "learning_rate": 1.271038954216251e-06, "loss": 0.567, "step": 7364 }, { "epoch": 0.78, "grad_norm": 3.312904188551475, "learning_rate": 1.269903843777413e-06, "loss": 0.6004, "step": 7365 }, { "epoch": 0.78, "grad_norm": 2.3261426877154916, "learning_rate": 1.2687691666903657e-06, "loss": 0.5648, "step": 7366 }, { "epoch": 0.78, "grad_norm": 4.634024092091831, "learning_rate": 1.2676349230869283e-06, "loss": 0.6585, "step": 7367 }, { "epoch": 0.78, "grad_norm": 2.5573244615411075, "learning_rate": 1.2665011130988786e-06, "loss": 0.5732, "step": 7368 }, { "epoch": 0.78, "grad_norm": 2.2354267402034234, "learning_rate": 1.2653677368579354e-06, "loss": 0.5843, "step": 7369 }, { "epoch": 0.78, "grad_norm": 2.2289656447356867, "learning_rate": 1.2642347944957744e-06, "loss": 0.7049, "step": 7370 }, { "epoch": 0.78, "grad_norm": 3.1454417786703353, "learning_rate": 1.2631022861440145e-06, "loss": 0.5499, "step": 7371 }, { "epoch": 0.78, "grad_norm": 3.367273641722864, "learning_rate": 1.2619702119342286e-06, "loss": 0.5656, "step": 7372 }, { "epoch": 0.78, "grad_norm": 2.156216839264307, "learning_rate": 1.2608385719979394e-06, "loss": 0.6229, "step": 7373 }, { "epoch": 0.78, "grad_norm": 3.707241733582545, "learning_rate": 1.2597073664666159e-06, "loss": 0.5974, "step": 7374 }, { "epoch": 0.78, "grad_norm": 2.589860918286414, "learning_rate": 1.2585765954716773e-06, "loss": 0.6479, "step": 7375 }, { "epoch": 0.78, "grad_norm": 2.1972502473528763, "learning_rate": 1.257446259144494e-06, "loss": 0.6352, "step": 7376 }, { "epoch": 0.78, "grad_norm": 2.8739108933647244, "learning_rate": 1.2563163576163879e-06, "loss": 0.6169, "step": 7377 }, { "epoch": 0.78, "grad_norm": 2.599760260919812, "learning_rate": 1.2551868910186238e-06, "loss": 0.5686, "step": 7378 }, { "epoch": 0.78, "grad_norm": 1.0036011302793741, "learning_rate": 1.2540578594824226e-06, "loss": 0.5261, "step": 7379 }, { "epoch": 0.78, "grad_norm": 3.1369364911691604, "learning_rate": 1.25292926313895e-06, "loss": 0.5366, "step": 7380 }, { "epoch": 0.78, "grad_norm": 2.733863227114797, "learning_rate": 1.251801102119325e-06, "loss": 0.5542, "step": 7381 }, { "epoch": 0.78, "grad_norm": 4.392211563561417, "learning_rate": 1.2506733765546115e-06, "loss": 0.5557, "step": 7382 }, { "epoch": 0.78, "grad_norm": 7.2289899376136315, "learning_rate": 1.2495460865758286e-06, "loss": 0.6201, "step": 7383 }, { "epoch": 0.78, "grad_norm": 2.389324115301021, "learning_rate": 1.2484192323139382e-06, "loss": 0.5614, "step": 7384 }, { "epoch": 0.78, "grad_norm": 2.4415323240042524, "learning_rate": 1.2472928138998569e-06, "loss": 0.5961, "step": 7385 }, { "epoch": 0.78, "grad_norm": 3.376325421905973, "learning_rate": 1.2461668314644499e-06, "loss": 0.551, "step": 7386 }, { "epoch": 0.78, "grad_norm": 2.6372801647944795, "learning_rate": 1.2450412851385275e-06, "loss": 0.6422, "step": 7387 }, { "epoch": 0.78, "grad_norm": 1.0428132195188833, "learning_rate": 1.2439161750528555e-06, "loss": 0.5497, "step": 7388 }, { "epoch": 0.78, "grad_norm": 4.5630278208472985, "learning_rate": 1.2427915013381436e-06, "loss": 0.5869, "step": 7389 }, { "epoch": 0.78, "grad_norm": 2.0559100223708637, "learning_rate": 1.2416672641250548e-06, "loss": 0.5977, "step": 7390 }, { "epoch": 0.78, "grad_norm": 2.4409035745001746, "learning_rate": 1.2405434635441982e-06, "loss": 0.559, "step": 7391 }, { "epoch": 0.78, "grad_norm": 4.831933762565664, "learning_rate": 1.2394200997261358e-06, "loss": 0.6682, "step": 7392 }, { "epoch": 0.78, "grad_norm": 0.8825046392899807, "learning_rate": 1.2382971728013742e-06, "loss": 0.5093, "step": 7393 }, { "epoch": 0.78, "grad_norm": 2.5797992352654386, "learning_rate": 1.2371746829003745e-06, "loss": 0.6392, "step": 7394 }, { "epoch": 0.78, "grad_norm": 2.9414008384348906, "learning_rate": 1.2360526301535408e-06, "loss": 0.5538, "step": 7395 }, { "epoch": 0.78, "grad_norm": 2.37560198530972, "learning_rate": 1.234931014691234e-06, "loss": 0.6542, "step": 7396 }, { "epoch": 0.78, "grad_norm": 2.760017872274412, "learning_rate": 1.2338098366437574e-06, "loss": 0.6122, "step": 7397 }, { "epoch": 0.78, "grad_norm": 2.8422640961336607, "learning_rate": 1.2326890961413663e-06, "loss": 0.5873, "step": 7398 }, { "epoch": 0.78, "grad_norm": 3.1453980578084426, "learning_rate": 1.2315687933142672e-06, "loss": 0.6239, "step": 7399 }, { "epoch": 0.78, "grad_norm": 2.749851183004622, "learning_rate": 1.2304489282926109e-06, "loss": 0.5232, "step": 7400 }, { "epoch": 0.78, "grad_norm": 2.472329629666702, "learning_rate": 1.2293295012065032e-06, "loss": 0.6043, "step": 7401 }, { "epoch": 0.78, "grad_norm": 2.7136693979225757, "learning_rate": 1.228210512185992e-06, "loss": 0.572, "step": 7402 }, { "epoch": 0.78, "grad_norm": 2.3539836790068907, "learning_rate": 1.2270919613610828e-06, "loss": 0.5805, "step": 7403 }, { "epoch": 0.78, "grad_norm": 2.4768244757191487, "learning_rate": 1.2259738488617211e-06, "loss": 0.6012, "step": 7404 }, { "epoch": 0.78, "grad_norm": 2.6680180975475234, "learning_rate": 1.2248561748178094e-06, "loss": 0.647, "step": 7405 }, { "epoch": 0.78, "grad_norm": 2.2801324593756784, "learning_rate": 1.2237389393591931e-06, "loss": 0.688, "step": 7406 }, { "epoch": 0.78, "grad_norm": 3.0717420409196436, "learning_rate": 1.222622142615671e-06, "loss": 0.6413, "step": 7407 }, { "epoch": 0.78, "grad_norm": 2.3216939444158515, "learning_rate": 1.2215057847169904e-06, "loss": 0.5048, "step": 7408 }, { "epoch": 0.78, "grad_norm": 2.7933790970927896, "learning_rate": 1.2203898657928453e-06, "loss": 0.6127, "step": 7409 }, { "epoch": 0.78, "grad_norm": 2.6973298552671556, "learning_rate": 1.2192743859728784e-06, "loss": 0.5957, "step": 7410 }, { "epoch": 0.78, "grad_norm": 3.010078673364939, "learning_rate": 1.2181593453866841e-06, "loss": 0.5948, "step": 7411 }, { "epoch": 0.78, "grad_norm": 1.0077226242761792, "learning_rate": 1.2170447441638067e-06, "loss": 0.5568, "step": 7412 }, { "epoch": 0.78, "grad_norm": 2.3706862467692345, "learning_rate": 1.2159305824337337e-06, "loss": 0.5863, "step": 7413 }, { "epoch": 0.78, "grad_norm": 3.0863837485718935, "learning_rate": 1.2148168603259086e-06, "loss": 0.6177, "step": 7414 }, { "epoch": 0.78, "grad_norm": 3.246542506000711, "learning_rate": 1.213703577969717e-06, "loss": 0.5643, "step": 7415 }, { "epoch": 0.78, "grad_norm": 2.2507630179043505, "learning_rate": 1.2125907354945004e-06, "loss": 0.5588, "step": 7416 }, { "epoch": 0.78, "grad_norm": 3.077759944294835, "learning_rate": 1.2114783330295426e-06, "loss": 0.6429, "step": 7417 }, { "epoch": 0.78, "grad_norm": 2.1001739169623956, "learning_rate": 1.210366370704082e-06, "loss": 0.6044, "step": 7418 }, { "epoch": 0.78, "grad_norm": 2.0635599866105245, "learning_rate": 1.2092548486473e-06, "loss": 0.5829, "step": 7419 }, { "epoch": 0.78, "grad_norm": 2.7552402862941308, "learning_rate": 1.2081437669883323e-06, "loss": 0.611, "step": 7420 }, { "epoch": 0.78, "grad_norm": 2.6069981960534463, "learning_rate": 1.2070331258562612e-06, "loss": 0.6104, "step": 7421 }, { "epoch": 0.78, "grad_norm": 2.173648068395166, "learning_rate": 1.2059229253801164e-06, "loss": 0.6458, "step": 7422 }, { "epoch": 0.78, "grad_norm": 2.606347301566492, "learning_rate": 1.2048131656888801e-06, "loss": 0.6129, "step": 7423 }, { "epoch": 0.78, "grad_norm": 4.002157820915137, "learning_rate": 1.2037038469114775e-06, "loss": 0.6574, "step": 7424 }, { "epoch": 0.78, "grad_norm": 2.1433237595069006, "learning_rate": 1.2025949691767895e-06, "loss": 0.6796, "step": 7425 }, { "epoch": 0.78, "grad_norm": 2.6482892199703247, "learning_rate": 1.2014865326136393e-06, "loss": 0.6542, "step": 7426 }, { "epoch": 0.78, "grad_norm": 2.8554690646087244, "learning_rate": 1.2003785373508054e-06, "loss": 0.5909, "step": 7427 }, { "epoch": 0.78, "grad_norm": 2.1151193571586755, "learning_rate": 1.1992709835170075e-06, "loss": 0.6143, "step": 7428 }, { "epoch": 0.78, "grad_norm": 2.3281163582456483, "learning_rate": 1.198163871240921e-06, "loss": 0.4967, "step": 7429 }, { "epoch": 0.78, "grad_norm": 2.3181484526166045, "learning_rate": 1.197057200651165e-06, "loss": 0.6032, "step": 7430 }, { "epoch": 0.78, "grad_norm": 4.4330393599119065, "learning_rate": 1.195950971876312e-06, "loss": 0.5914, "step": 7431 }, { "epoch": 0.78, "grad_norm": 2.740984634407769, "learning_rate": 1.1948451850448767e-06, "loss": 0.7403, "step": 7432 }, { "epoch": 0.78, "grad_norm": 2.3670282194811905, "learning_rate": 1.1937398402853283e-06, "loss": 0.5251, "step": 7433 }, { "epoch": 0.78, "grad_norm": 1.0276430189430175, "learning_rate": 1.1926349377260843e-06, "loss": 0.538, "step": 7434 }, { "epoch": 0.78, "grad_norm": 2.4380874996957984, "learning_rate": 1.1915304774955054e-06, "loss": 0.6245, "step": 7435 }, { "epoch": 0.78, "grad_norm": 3.242941226659693, "learning_rate": 1.1904264597219078e-06, "loss": 0.5773, "step": 7436 }, { "epoch": 0.78, "grad_norm": 2.993175401734412, "learning_rate": 1.189322884533551e-06, "loss": 0.6, "step": 7437 }, { "epoch": 0.78, "grad_norm": 0.9803046786575148, "learning_rate": 1.1882197520586464e-06, "loss": 0.5364, "step": 7438 }, { "epoch": 0.78, "grad_norm": 3.157862409696275, "learning_rate": 1.1871170624253515e-06, "loss": 0.5523, "step": 7439 }, { "epoch": 0.78, "grad_norm": 2.5580589814696273, "learning_rate": 1.1860148157617757e-06, "loss": 0.6233, "step": 7440 }, { "epoch": 0.78, "grad_norm": 2.4718486982211854, "learning_rate": 1.1849130121959717e-06, "loss": 0.5901, "step": 7441 }, { "epoch": 0.78, "grad_norm": 2.4594217860273098, "learning_rate": 1.1838116518559474e-06, "loss": 0.6861, "step": 7442 }, { "epoch": 0.78, "grad_norm": 0.9338351636324551, "learning_rate": 1.1827107348696526e-06, "loss": 0.5445, "step": 7443 }, { "epoch": 0.78, "grad_norm": 2.3059234955489814, "learning_rate": 1.181610261364991e-06, "loss": 0.5499, "step": 7444 }, { "epoch": 0.78, "grad_norm": 2.581067104733737, "learning_rate": 1.1805102314698103e-06, "loss": 0.6374, "step": 7445 }, { "epoch": 0.78, "grad_norm": 2.9256095254675536, "learning_rate": 1.1794106453119098e-06, "loss": 0.5952, "step": 7446 }, { "epoch": 0.78, "grad_norm": 2.5727809288583776, "learning_rate": 1.1783115030190378e-06, "loss": 0.6924, "step": 7447 }, { "epoch": 0.78, "grad_norm": 4.922966924396616, "learning_rate": 1.1772128047188864e-06, "loss": 0.6865, "step": 7448 }, { "epoch": 0.78, "grad_norm": 2.39429256222939, "learning_rate": 1.1761145505391025e-06, "loss": 0.6303, "step": 7449 }, { "epoch": 0.78, "grad_norm": 2.7885079254569667, "learning_rate": 1.1750167406072743e-06, "loss": 0.5501, "step": 7450 }, { "epoch": 0.78, "grad_norm": 2.4497210304886603, "learning_rate": 1.1739193750509465e-06, "loss": 0.5928, "step": 7451 }, { "epoch": 0.78, "grad_norm": 2.4756214907759198, "learning_rate": 1.1728224539976035e-06, "loss": 0.5829, "step": 7452 }, { "epoch": 0.78, "grad_norm": 2.503009767146594, "learning_rate": 1.1717259775746865e-06, "loss": 0.5743, "step": 7453 }, { "epoch": 0.78, "grad_norm": 2.4617513372135003, "learning_rate": 1.1706299459095776e-06, "loss": 0.6251, "step": 7454 }, { "epoch": 0.78, "grad_norm": 2.2715662368230274, "learning_rate": 1.1695343591296115e-06, "loss": 0.6545, "step": 7455 }, { "epoch": 0.78, "grad_norm": 3.2007462619830287, "learning_rate": 1.1684392173620729e-06, "loss": 0.5534, "step": 7456 }, { "epoch": 0.78, "grad_norm": 0.9869030111837869, "learning_rate": 1.1673445207341882e-06, "loss": 0.534, "step": 7457 }, { "epoch": 0.78, "grad_norm": 0.9843494999894192, "learning_rate": 1.1662502693731393e-06, "loss": 0.5375, "step": 7458 }, { "epoch": 0.78, "grad_norm": 2.964320138816884, "learning_rate": 1.1651564634060509e-06, "loss": 0.5889, "step": 7459 }, { "epoch": 0.79, "grad_norm": 2.3924454080709854, "learning_rate": 1.1640631029600002e-06, "loss": 0.5073, "step": 7460 }, { "epoch": 0.79, "grad_norm": 2.8236225380832143, "learning_rate": 1.1629701881620086e-06, "loss": 0.6282, "step": 7461 }, { "epoch": 0.79, "grad_norm": 3.172480547369297, "learning_rate": 1.1618777191390502e-06, "loss": 0.5964, "step": 7462 }, { "epoch": 0.79, "grad_norm": 2.810127828045121, "learning_rate": 1.1607856960180413e-06, "loss": 0.6193, "step": 7463 }, { "epoch": 0.79, "grad_norm": 2.6027058379471204, "learning_rate": 1.1596941189258542e-06, "loss": 0.5957, "step": 7464 }, { "epoch": 0.79, "grad_norm": 2.5799914326081077, "learning_rate": 1.1586029879893018e-06, "loss": 0.5961, "step": 7465 }, { "epoch": 0.79, "grad_norm": 2.272129728569525, "learning_rate": 1.1575123033351514e-06, "loss": 0.574, "step": 7466 }, { "epoch": 0.79, "grad_norm": 3.353854643372349, "learning_rate": 1.1564220650901126e-06, "loss": 0.548, "step": 7467 }, { "epoch": 0.79, "grad_norm": 2.284598981438445, "learning_rate": 1.1553322733808474e-06, "loss": 0.6057, "step": 7468 }, { "epoch": 0.79, "grad_norm": 2.5638147489723737, "learning_rate": 1.1542429283339669e-06, "loss": 0.625, "step": 7469 }, { "epoch": 0.79, "grad_norm": 2.859248521334551, "learning_rate": 1.153154030076024e-06, "loss": 0.5896, "step": 7470 }, { "epoch": 0.79, "grad_norm": 2.1507209034830352, "learning_rate": 1.1520655787335272e-06, "loss": 0.5909, "step": 7471 }, { "epoch": 0.79, "grad_norm": 2.2804744847150293, "learning_rate": 1.150977574432927e-06, "loss": 0.6216, "step": 7472 }, { "epoch": 0.79, "grad_norm": 2.2755713818694026, "learning_rate": 1.1498900173006271e-06, "loss": 0.5953, "step": 7473 }, { "epoch": 0.79, "grad_norm": 2.4375823375991676, "learning_rate": 1.1488029074629742e-06, "loss": 0.5324, "step": 7474 }, { "epoch": 0.79, "grad_norm": 2.6695360497471374, "learning_rate": 1.1477162450462681e-06, "loss": 0.5473, "step": 7475 }, { "epoch": 0.79, "grad_norm": 3.055784414516847, "learning_rate": 1.1466300301767513e-06, "loss": 0.6207, "step": 7476 }, { "epoch": 0.79, "grad_norm": 2.3379616900640507, "learning_rate": 1.1455442629806208e-06, "loss": 0.57, "step": 7477 }, { "epoch": 0.79, "grad_norm": 2.3050676485829817, "learning_rate": 1.1444589435840136e-06, "loss": 0.5941, "step": 7478 }, { "epoch": 0.79, "grad_norm": 2.2327610274880056, "learning_rate": 1.1433740721130227e-06, "loss": 0.6705, "step": 7479 }, { "epoch": 0.79, "grad_norm": 2.318030102730509, "learning_rate": 1.1422896486936819e-06, "loss": 0.5924, "step": 7480 }, { "epoch": 0.79, "grad_norm": 2.3743209801016847, "learning_rate": 1.1412056734519788e-06, "loss": 0.6173, "step": 7481 }, { "epoch": 0.79, "grad_norm": 2.34093180724465, "learning_rate": 1.1401221465138468e-06, "loss": 0.6462, "step": 7482 }, { "epoch": 0.79, "grad_norm": 2.6932639467851116, "learning_rate": 1.1390390680051649e-06, "loss": 0.6229, "step": 7483 }, { "epoch": 0.79, "grad_norm": 2.346396013718988, "learning_rate": 1.1379564380517648e-06, "loss": 0.6471, "step": 7484 }, { "epoch": 0.79, "grad_norm": 2.5191557971368876, "learning_rate": 1.1368742567794199e-06, "loss": 0.5747, "step": 7485 }, { "epoch": 0.79, "grad_norm": 2.187304261864126, "learning_rate": 1.1357925243138585e-06, "loss": 0.6561, "step": 7486 }, { "epoch": 0.79, "grad_norm": 11.808798126077034, "learning_rate": 1.1347112407807499e-06, "loss": 0.61, "step": 7487 }, { "epoch": 0.79, "grad_norm": 2.5589240903441346, "learning_rate": 1.1336304063057169e-06, "loss": 0.5923, "step": 7488 }, { "epoch": 0.79, "grad_norm": 2.2691294777604085, "learning_rate": 1.1325500210143253e-06, "loss": 0.6155, "step": 7489 }, { "epoch": 0.79, "grad_norm": 2.901817891190817, "learning_rate": 1.1314700850320948e-06, "loss": 0.6281, "step": 7490 }, { "epoch": 0.79, "grad_norm": 2.711266040481578, "learning_rate": 1.1303905984844848e-06, "loss": 0.6484, "step": 7491 }, { "epoch": 0.79, "grad_norm": 2.2630100169064113, "learning_rate": 1.1293115614969109e-06, "loss": 0.5749, "step": 7492 }, { "epoch": 0.79, "grad_norm": 2.207926785870351, "learning_rate": 1.1282329741947295e-06, "loss": 0.584, "step": 7493 }, { "epoch": 0.79, "grad_norm": 2.3024880371784513, "learning_rate": 1.1271548367032487e-06, "loss": 0.6285, "step": 7494 }, { "epoch": 0.79, "grad_norm": 2.259781922595136, "learning_rate": 1.1260771491477252e-06, "loss": 0.534, "step": 7495 }, { "epoch": 0.79, "grad_norm": 2.3229570086330815, "learning_rate": 1.1249999116533589e-06, "loss": 0.5503, "step": 7496 }, { "epoch": 0.79, "grad_norm": 2.553935192730057, "learning_rate": 1.1239231243453025e-06, "loss": 0.664, "step": 7497 }, { "epoch": 0.79, "grad_norm": 0.92848847705716, "learning_rate": 1.122846787348652e-06, "loss": 0.5382, "step": 7498 }, { "epoch": 0.79, "grad_norm": 2.25087708292023, "learning_rate": 1.1217709007884548e-06, "loss": 0.6478, "step": 7499 }, { "epoch": 0.79, "grad_norm": 2.887135255747007, "learning_rate": 1.1206954647897023e-06, "loss": 0.6988, "step": 7500 }, { "epoch": 0.79, "grad_norm": 2.881071494956094, "learning_rate": 1.1196204794773385e-06, "loss": 0.6754, "step": 7501 }, { "epoch": 0.79, "grad_norm": 3.8124395050447877, "learning_rate": 1.1185459449762486e-06, "loss": 0.6218, "step": 7502 }, { "epoch": 0.79, "grad_norm": 5.5635427523252625, "learning_rate": 1.1174718614112711e-06, "loss": 0.6018, "step": 7503 }, { "epoch": 0.79, "grad_norm": 2.1176840263880585, "learning_rate": 1.1163982289071907e-06, "loss": 0.5724, "step": 7504 }, { "epoch": 0.79, "grad_norm": 2.389675591616161, "learning_rate": 1.1153250475887362e-06, "loss": 0.658, "step": 7505 }, { "epoch": 0.79, "grad_norm": 3.2712660526620176, "learning_rate": 1.1142523175805896e-06, "loss": 0.6494, "step": 7506 }, { "epoch": 0.79, "grad_norm": 2.3498472808911988, "learning_rate": 1.113180039007375e-06, "loss": 0.6334, "step": 7507 }, { "epoch": 0.79, "grad_norm": 4.183640363375537, "learning_rate": 1.112108211993669e-06, "loss": 0.6067, "step": 7508 }, { "epoch": 0.79, "grad_norm": 4.308029778683292, "learning_rate": 1.1110368366639906e-06, "loss": 0.6066, "step": 7509 }, { "epoch": 0.79, "grad_norm": 8.528977099429406, "learning_rate": 1.109965913142812e-06, "loss": 0.6271, "step": 7510 }, { "epoch": 0.79, "grad_norm": 2.7928225891739076, "learning_rate": 1.1088954415545478e-06, "loss": 0.5326, "step": 7511 }, { "epoch": 0.79, "grad_norm": 2.4456570967777926, "learning_rate": 1.107825422023564e-06, "loss": 0.6375, "step": 7512 }, { "epoch": 0.79, "grad_norm": 3.0183023237081814, "learning_rate": 1.1067558546741708e-06, "loss": 0.6086, "step": 7513 }, { "epoch": 0.79, "grad_norm": 2.757754038229962, "learning_rate": 1.1056867396306293e-06, "loss": 0.5815, "step": 7514 }, { "epoch": 0.79, "grad_norm": 2.5086536878121533, "learning_rate": 1.1046180770171433e-06, "loss": 0.5206, "step": 7515 }, { "epoch": 0.79, "grad_norm": 1.022738679492978, "learning_rate": 1.1035498669578693e-06, "loss": 0.5553, "step": 7516 }, { "epoch": 0.79, "grad_norm": 2.9716402092398586, "learning_rate": 1.1024821095769089e-06, "loss": 0.5598, "step": 7517 }, { "epoch": 0.79, "grad_norm": 2.7359639039165096, "learning_rate": 1.1014148049983097e-06, "loss": 0.6813, "step": 7518 }, { "epoch": 0.79, "grad_norm": 2.3089260079854244, "learning_rate": 1.1003479533460698e-06, "loss": 0.5112, "step": 7519 }, { "epoch": 0.79, "grad_norm": 3.623336530654308, "learning_rate": 1.099281554744131e-06, "loss": 0.6542, "step": 7520 }, { "epoch": 0.79, "grad_norm": 2.247425868920514, "learning_rate": 1.0982156093163864e-06, "loss": 0.571, "step": 7521 }, { "epoch": 0.79, "grad_norm": 4.231031748647357, "learning_rate": 1.0971501171866717e-06, "loss": 0.5311, "step": 7522 }, { "epoch": 0.79, "grad_norm": 2.718419525290093, "learning_rate": 1.0960850784787763e-06, "loss": 0.5205, "step": 7523 }, { "epoch": 0.79, "grad_norm": 3.0286666822872843, "learning_rate": 1.09502049331643e-06, "loss": 0.611, "step": 7524 }, { "epoch": 0.79, "grad_norm": 2.9672246988207207, "learning_rate": 1.0939563618233156e-06, "loss": 0.5913, "step": 7525 }, { "epoch": 0.79, "grad_norm": 2.6418538967198884, "learning_rate": 1.0928926841230585e-06, "loss": 0.59, "step": 7526 }, { "epoch": 0.79, "grad_norm": 2.2332248750501242, "learning_rate": 1.0918294603392371e-06, "loss": 0.5947, "step": 7527 }, { "epoch": 0.79, "grad_norm": 2.5309030658613185, "learning_rate": 1.0907666905953696e-06, "loss": 0.5941, "step": 7528 }, { "epoch": 0.79, "grad_norm": 2.3540324184287456, "learning_rate": 1.0897043750149277e-06, "loss": 0.6044, "step": 7529 }, { "epoch": 0.79, "grad_norm": 2.770490477689187, "learning_rate": 1.0886425137213297e-06, "loss": 0.594, "step": 7530 }, { "epoch": 0.79, "grad_norm": 2.6612418290795157, "learning_rate": 1.087581106837936e-06, "loss": 0.5736, "step": 7531 }, { "epoch": 0.79, "grad_norm": 2.6742845066681014, "learning_rate": 1.086520154488061e-06, "loss": 0.6691, "step": 7532 }, { "epoch": 0.79, "grad_norm": 2.9951587604725614, "learning_rate": 1.0854596567949605e-06, "loss": 0.5694, "step": 7533 }, { "epoch": 0.79, "grad_norm": 2.5789976556897116, "learning_rate": 1.084399613881843e-06, "loss": 0.6256, "step": 7534 }, { "epoch": 0.79, "grad_norm": 3.7119280648765347, "learning_rate": 1.0833400258718579e-06, "loss": 0.5858, "step": 7535 }, { "epoch": 0.79, "grad_norm": 3.597761699649087, "learning_rate": 1.0822808928881078e-06, "loss": 0.5949, "step": 7536 }, { "epoch": 0.79, "grad_norm": 2.7367475679821016, "learning_rate": 1.0812222150536379e-06, "loss": 0.4645, "step": 7537 }, { "epoch": 0.79, "grad_norm": 2.775373037805519, "learning_rate": 1.0801639924914437e-06, "loss": 0.6371, "step": 7538 }, { "epoch": 0.79, "grad_norm": 2.673800251379294, "learning_rate": 1.0791062253244644e-06, "loss": 0.562, "step": 7539 }, { "epoch": 0.79, "grad_norm": 4.733905978311876, "learning_rate": 1.07804891367559e-06, "loss": 0.6194, "step": 7540 }, { "epoch": 0.79, "grad_norm": 2.189114404251597, "learning_rate": 1.0769920576676569e-06, "loss": 0.5761, "step": 7541 }, { "epoch": 0.79, "grad_norm": 2.283883510957929, "learning_rate": 1.0759356574234447e-06, "loss": 0.6039, "step": 7542 }, { "epoch": 0.79, "grad_norm": 3.347107417686752, "learning_rate": 1.0748797130656862e-06, "loss": 0.6732, "step": 7543 }, { "epoch": 0.79, "grad_norm": 3.3757009779681755, "learning_rate": 1.0738242247170549e-06, "loss": 0.5663, "step": 7544 }, { "epoch": 0.79, "grad_norm": 2.1237347595673874, "learning_rate": 1.0727691925001765e-06, "loss": 0.6165, "step": 7545 }, { "epoch": 0.79, "grad_norm": 3.0239588731460882, "learning_rate": 1.0717146165376202e-06, "loss": 0.6566, "step": 7546 }, { "epoch": 0.79, "grad_norm": 2.560705932085416, "learning_rate": 1.0706604969519052e-06, "loss": 0.6015, "step": 7547 }, { "epoch": 0.79, "grad_norm": 2.331047433053311, "learning_rate": 1.069606833865494e-06, "loss": 0.6184, "step": 7548 }, { "epoch": 0.79, "grad_norm": 2.347877312775048, "learning_rate": 1.0685536274008002e-06, "loss": 0.5979, "step": 7549 }, { "epoch": 0.79, "grad_norm": 2.246678913685663, "learning_rate": 1.0675008776801804e-06, "loss": 0.5749, "step": 7550 }, { "epoch": 0.79, "grad_norm": 7.0685585261266795, "learning_rate": 1.066448584825942e-06, "loss": 0.6043, "step": 7551 }, { "epoch": 0.79, "grad_norm": 2.204865595293493, "learning_rate": 1.065396748960335e-06, "loss": 0.6199, "step": 7552 }, { "epoch": 0.79, "grad_norm": 2.0913674075484185, "learning_rate": 1.06434537020556e-06, "loss": 0.5869, "step": 7553 }, { "epoch": 0.79, "grad_norm": 2.7478603902570047, "learning_rate": 1.0632944486837642e-06, "loss": 0.5817, "step": 7554 }, { "epoch": 0.8, "grad_norm": 2.327839999993541, "learning_rate": 1.0622439845170385e-06, "loss": 0.6111, "step": 7555 }, { "epoch": 0.8, "grad_norm": 3.0045359616694594, "learning_rate": 1.061193977827425e-06, "loss": 0.609, "step": 7556 }, { "epoch": 0.8, "grad_norm": 2.062618698945312, "learning_rate": 1.0601444287369073e-06, "loss": 0.6086, "step": 7557 }, { "epoch": 0.8, "grad_norm": 2.4008049526313604, "learning_rate": 1.0590953373674229e-06, "loss": 0.6595, "step": 7558 }, { "epoch": 0.8, "grad_norm": 4.1811579275109, "learning_rate": 1.0580467038408487e-06, "loss": 0.5707, "step": 7559 }, { "epoch": 0.8, "grad_norm": 2.3155227686288713, "learning_rate": 1.0569985282790145e-06, "loss": 0.6299, "step": 7560 }, { "epoch": 0.8, "grad_norm": 3.5347215291469682, "learning_rate": 1.0559508108036926e-06, "loss": 0.5662, "step": 7561 }, { "epoch": 0.8, "grad_norm": 2.007580421779567, "learning_rate": 1.0549035515366052e-06, "loss": 0.5217, "step": 7562 }, { "epoch": 0.8, "grad_norm": 3.308048869247993, "learning_rate": 1.0538567505994175e-06, "loss": 0.6871, "step": 7563 }, { "epoch": 0.8, "grad_norm": 2.425142794535516, "learning_rate": 1.052810408113746e-06, "loss": 0.643, "step": 7564 }, { "epoch": 0.8, "grad_norm": 5.639780916922588, "learning_rate": 1.051764524201152e-06, "loss": 0.5547, "step": 7565 }, { "epoch": 0.8, "grad_norm": 2.474431995538235, "learning_rate": 1.0507190989831412e-06, "loss": 0.632, "step": 7566 }, { "epoch": 0.8, "grad_norm": 3.2149333641540214, "learning_rate": 1.0496741325811705e-06, "loss": 0.6219, "step": 7567 }, { "epoch": 0.8, "grad_norm": 2.0743039032781865, "learning_rate": 1.0486296251166383e-06, "loss": 0.5175, "step": 7568 }, { "epoch": 0.8, "grad_norm": 2.633639411660315, "learning_rate": 1.0475855767108956e-06, "loss": 0.6497, "step": 7569 }, { "epoch": 0.8, "grad_norm": 2.457721991255091, "learning_rate": 1.0465419874852338e-06, "loss": 0.5951, "step": 7570 }, { "epoch": 0.8, "grad_norm": 2.239650791365524, "learning_rate": 1.0454988575608976e-06, "loss": 0.5779, "step": 7571 }, { "epoch": 0.8, "grad_norm": 3.0137305763833675, "learning_rate": 1.0444561870590707e-06, "loss": 0.6572, "step": 7572 }, { "epoch": 0.8, "grad_norm": 2.832898876677455, "learning_rate": 1.0434139761008915e-06, "loss": 0.592, "step": 7573 }, { "epoch": 0.8, "grad_norm": 2.8742909520681805, "learning_rate": 1.042372224807438e-06, "loss": 0.6899, "step": 7574 }, { "epoch": 0.8, "grad_norm": 2.538021201102594, "learning_rate": 1.0413309332997385e-06, "loss": 0.533, "step": 7575 }, { "epoch": 0.8, "grad_norm": 2.9699203259048996, "learning_rate": 1.0402901016987694e-06, "loss": 0.6227, "step": 7576 }, { "epoch": 0.8, "grad_norm": 2.0629324280519654, "learning_rate": 1.0392497301254489e-06, "loss": 0.5473, "step": 7577 }, { "epoch": 0.8, "grad_norm": 2.150602467486783, "learning_rate": 1.0382098187006463e-06, "loss": 0.6386, "step": 7578 }, { "epoch": 0.8, "grad_norm": 1.012171485780843, "learning_rate": 1.0371703675451732e-06, "loss": 0.5258, "step": 7579 }, { "epoch": 0.8, "grad_norm": 2.144392113247383, "learning_rate": 1.0361313767797932e-06, "loss": 0.5706, "step": 7580 }, { "epoch": 0.8, "grad_norm": 2.4348220328637216, "learning_rate": 1.0350928465252103e-06, "loss": 0.6039, "step": 7581 }, { "epoch": 0.8, "grad_norm": 2.6562323332446294, "learning_rate": 1.0340547769020798e-06, "loss": 0.5633, "step": 7582 }, { "epoch": 0.8, "grad_norm": 0.9865343111730549, "learning_rate": 1.0330171680309996e-06, "loss": 0.5352, "step": 7583 }, { "epoch": 0.8, "grad_norm": 3.3099068356431776, "learning_rate": 1.0319800200325193e-06, "loss": 0.5597, "step": 7584 }, { "epoch": 0.8, "grad_norm": 2.519735515375185, "learning_rate": 1.0309433330271285e-06, "loss": 0.5921, "step": 7585 }, { "epoch": 0.8, "grad_norm": 2.187601261956032, "learning_rate": 1.029907107135269e-06, "loss": 0.5567, "step": 7586 }, { "epoch": 0.8, "grad_norm": 2.348573049735444, "learning_rate": 1.0288713424773238e-06, "loss": 0.6187, "step": 7587 }, { "epoch": 0.8, "grad_norm": 2.701801842597667, "learning_rate": 1.027836039173627e-06, "loss": 0.5421, "step": 7588 }, { "epoch": 0.8, "grad_norm": 2.717130793307138, "learning_rate": 1.026801197344458e-06, "loss": 0.6378, "step": 7589 }, { "epoch": 0.8, "grad_norm": 2.3303124612250543, "learning_rate": 1.0257668171100393e-06, "loss": 0.6418, "step": 7590 }, { "epoch": 0.8, "grad_norm": 2.7662913464027623, "learning_rate": 1.0247328985905446e-06, "loss": 0.5441, "step": 7591 }, { "epoch": 0.8, "grad_norm": 2.544677485406088, "learning_rate": 1.0236994419060892e-06, "loss": 0.568, "step": 7592 }, { "epoch": 0.8, "grad_norm": 2.899566327372865, "learning_rate": 1.0226664471767401e-06, "loss": 0.6502, "step": 7593 }, { "epoch": 0.8, "grad_norm": 2.506201535469169, "learning_rate": 1.021633914522504e-06, "loss": 0.6079, "step": 7594 }, { "epoch": 0.8, "grad_norm": 2.518907467485238, "learning_rate": 1.0206018440633408e-06, "loss": 0.5106, "step": 7595 }, { "epoch": 0.8, "grad_norm": 2.3943093369268227, "learning_rate": 1.0195702359191507e-06, "loss": 0.579, "step": 7596 }, { "epoch": 0.8, "grad_norm": 2.4145954273830927, "learning_rate": 1.0185390902097857e-06, "loss": 0.6422, "step": 7597 }, { "epoch": 0.8, "grad_norm": 1.921201826914954, "learning_rate": 1.017508407055039e-06, "loss": 0.6023, "step": 7598 }, { "epoch": 0.8, "grad_norm": 2.937076600981269, "learning_rate": 1.0164781865746542e-06, "loss": 0.5655, "step": 7599 }, { "epoch": 0.8, "grad_norm": 2.760440801599633, "learning_rate": 1.0154484288883177e-06, "loss": 0.6897, "step": 7600 }, { "epoch": 0.8, "grad_norm": 2.4696937629349898, "learning_rate": 1.014419134115664e-06, "loss": 0.6095, "step": 7601 }, { "epoch": 0.8, "grad_norm": 4.811722355164242, "learning_rate": 1.0133903023762758e-06, "loss": 0.6984, "step": 7602 }, { "epoch": 0.8, "grad_norm": 4.115947584954783, "learning_rate": 1.0123619337896767e-06, "loss": 0.5696, "step": 7603 }, { "epoch": 0.8, "grad_norm": 2.564549686867802, "learning_rate": 1.0113340284753425e-06, "loss": 0.655, "step": 7604 }, { "epoch": 0.8, "grad_norm": 12.618329274026946, "learning_rate": 1.0103065865526895e-06, "loss": 0.5778, "step": 7605 }, { "epoch": 0.8, "grad_norm": 2.8969821170985206, "learning_rate": 1.0092796081410856e-06, "loss": 0.5542, "step": 7606 }, { "epoch": 0.8, "grad_norm": 2.5199683999249087, "learning_rate": 1.0082530933598388e-06, "loss": 0.6197, "step": 7607 }, { "epoch": 0.8, "grad_norm": 3.435215141719612, "learning_rate": 1.0072270423282104e-06, "loss": 0.5886, "step": 7608 }, { "epoch": 0.8, "grad_norm": 2.1869076971904104, "learning_rate": 1.0062014551654015e-06, "loss": 0.5902, "step": 7609 }, { "epoch": 0.8, "grad_norm": 2.5157709264478054, "learning_rate": 1.0051763319905622e-06, "loss": 0.5485, "step": 7610 }, { "epoch": 0.8, "grad_norm": 2.3517546543055747, "learning_rate": 1.0041516729227902e-06, "loss": 0.5734, "step": 7611 }, { "epoch": 0.8, "grad_norm": 2.447965128681716, "learning_rate": 1.0031274780811245e-06, "loss": 0.6666, "step": 7612 }, { "epoch": 0.8, "grad_norm": 2.976761782472005, "learning_rate": 1.0021037475845557e-06, "loss": 0.5641, "step": 7613 }, { "epoch": 0.8, "grad_norm": 2.698568711787718, "learning_rate": 1.0010804815520159e-06, "loss": 0.6378, "step": 7614 }, { "epoch": 0.8, "grad_norm": 3.3363776215524092, "learning_rate": 1.0000576801023876e-06, "loss": 0.6686, "step": 7615 }, { "epoch": 0.8, "grad_norm": 3.7415611338554813, "learning_rate": 9.990353433544935e-07, "loss": 0.6204, "step": 7616 }, { "epoch": 0.8, "grad_norm": 3.136268007867056, "learning_rate": 9.980134714271088e-07, "loss": 0.7083, "step": 7617 }, { "epoch": 0.8, "grad_norm": 2.186125465023069, "learning_rate": 9.969920644389498e-07, "loss": 0.583, "step": 7618 }, { "epoch": 0.8, "grad_norm": 2.6754021896396454, "learning_rate": 9.959711225086822e-07, "loss": 0.6414, "step": 7619 }, { "epoch": 0.8, "grad_norm": 3.3008931024230703, "learning_rate": 9.949506457549135e-07, "loss": 0.5452, "step": 7620 }, { "epoch": 0.8, "grad_norm": 2.6881102400726142, "learning_rate": 9.93930634296203e-07, "loss": 0.5804, "step": 7621 }, { "epoch": 0.8, "grad_norm": 2.510590511858801, "learning_rate": 9.929110882510496e-07, "loss": 0.5921, "step": 7622 }, { "epoch": 0.8, "grad_norm": 2.218316814824091, "learning_rate": 9.91892007737903e-07, "loss": 0.6482, "step": 7623 }, { "epoch": 0.8, "grad_norm": 2.3200140091226107, "learning_rate": 9.908733928751574e-07, "loss": 0.5587, "step": 7624 }, { "epoch": 0.8, "grad_norm": 2.7818896360301317, "learning_rate": 9.89855243781151e-07, "loss": 0.6569, "step": 7625 }, { "epoch": 0.8, "grad_norm": 2.972728123297002, "learning_rate": 9.888375605741713e-07, "loss": 0.587, "step": 7626 }, { "epoch": 0.8, "grad_norm": 2.467181443995698, "learning_rate": 9.87820343372447e-07, "loss": 0.6413, "step": 7627 }, { "epoch": 0.8, "grad_norm": 2.771873420545783, "learning_rate": 9.868035922941594e-07, "loss": 0.6314, "step": 7628 }, { "epoch": 0.8, "grad_norm": 8.073309761721186, "learning_rate": 9.857873074574276e-07, "loss": 0.6316, "step": 7629 }, { "epoch": 0.8, "grad_norm": 2.3110538134158447, "learning_rate": 9.847714889803233e-07, "loss": 0.6234, "step": 7630 }, { "epoch": 0.8, "grad_norm": 2.3475214347720086, "learning_rate": 9.8375613698086e-07, "loss": 0.5873, "step": 7631 }, { "epoch": 0.8, "grad_norm": 3.338962974057601, "learning_rate": 9.827412515770003e-07, "loss": 0.6285, "step": 7632 }, { "epoch": 0.8, "grad_norm": 2.4511140443396053, "learning_rate": 9.817268328866474e-07, "loss": 0.5206, "step": 7633 }, { "epoch": 0.8, "grad_norm": 2.6141500186568076, "learning_rate": 9.807128810276573e-07, "loss": 0.6173, "step": 7634 }, { "epoch": 0.8, "grad_norm": 2.6588577517347343, "learning_rate": 9.796993961178247e-07, "loss": 0.5779, "step": 7635 }, { "epoch": 0.8, "grad_norm": 2.2544484813883354, "learning_rate": 9.786863782748946e-07, "loss": 0.6226, "step": 7636 }, { "epoch": 0.8, "grad_norm": 2.7154960137258373, "learning_rate": 9.776738276165576e-07, "loss": 0.5301, "step": 7637 }, { "epoch": 0.8, "grad_norm": 2.8331620353922093, "learning_rate": 9.76661744260447e-07, "loss": 0.6592, "step": 7638 }, { "epoch": 0.8, "grad_norm": 2.8596209122623604, "learning_rate": 9.75650128324146e-07, "loss": 0.6592, "step": 7639 }, { "epoch": 0.8, "grad_norm": 0.9617449759052116, "learning_rate": 9.746389799251783e-07, "loss": 0.5056, "step": 7640 }, { "epoch": 0.8, "grad_norm": 2.6375463130140555, "learning_rate": 9.736282991810191e-07, "loss": 0.649, "step": 7641 }, { "epoch": 0.8, "grad_norm": 3.170328479172141, "learning_rate": 9.72618086209084e-07, "loss": 0.6085, "step": 7642 }, { "epoch": 0.8, "grad_norm": 2.0826307503241073, "learning_rate": 9.71608341126739e-07, "loss": 0.5132, "step": 7643 }, { "epoch": 0.8, "grad_norm": 5.426019291056326, "learning_rate": 9.705990640512909e-07, "loss": 0.5804, "step": 7644 }, { "epoch": 0.8, "grad_norm": 2.8657766408804495, "learning_rate": 9.695902550999953e-07, "loss": 0.6634, "step": 7645 }, { "epoch": 0.8, "grad_norm": 2.1192272871905335, "learning_rate": 9.685819143900544e-07, "loss": 0.575, "step": 7646 }, { "epoch": 0.8, "grad_norm": 2.9730510980964873, "learning_rate": 9.675740420386132e-07, "loss": 0.6602, "step": 7647 }, { "epoch": 0.8, "grad_norm": 2.8204604472554924, "learning_rate": 9.66566638162762e-07, "loss": 0.6495, "step": 7648 }, { "epoch": 0.8, "grad_norm": 2.539741388640035, "learning_rate": 9.655597028795394e-07, "loss": 0.6173, "step": 7649 }, { "epoch": 0.81, "grad_norm": 2.5656949751155484, "learning_rate": 9.64553236305929e-07, "loss": 0.6095, "step": 7650 }, { "epoch": 0.81, "grad_norm": 2.450376476628125, "learning_rate": 9.635472385588573e-07, "loss": 0.5739, "step": 7651 }, { "epoch": 0.81, "grad_norm": 2.6482970996607156, "learning_rate": 9.625417097552003e-07, "loss": 0.6081, "step": 7652 }, { "epoch": 0.81, "grad_norm": 2.6667414141134174, "learning_rate": 9.615366500117757e-07, "loss": 0.6578, "step": 7653 }, { "epoch": 0.81, "grad_norm": 3.74349389755696, "learning_rate": 9.6053205944535e-07, "loss": 0.5548, "step": 7654 }, { "epoch": 0.81, "grad_norm": 3.010896829422771, "learning_rate": 9.595279381726308e-07, "loss": 0.5475, "step": 7655 }, { "epoch": 0.81, "grad_norm": 2.4405802174170725, "learning_rate": 9.58524286310278e-07, "loss": 0.6354, "step": 7656 }, { "epoch": 0.81, "grad_norm": 4.919810726968023, "learning_rate": 9.575211039748893e-07, "loss": 0.5448, "step": 7657 }, { "epoch": 0.81, "grad_norm": 2.363605925097881, "learning_rate": 9.565183912830134e-07, "loss": 0.6191, "step": 7658 }, { "epoch": 0.81, "grad_norm": 2.4814246714387385, "learning_rate": 9.555161483511434e-07, "loss": 0.6056, "step": 7659 }, { "epoch": 0.81, "grad_norm": 2.3442144571403394, "learning_rate": 9.545143752957143e-07, "loss": 0.5799, "step": 7660 }, { "epoch": 0.81, "grad_norm": 0.9844963618684487, "learning_rate": 9.535130722331121e-07, "loss": 0.5501, "step": 7661 }, { "epoch": 0.81, "grad_norm": 3.841148309195625, "learning_rate": 9.525122392796632e-07, "loss": 0.5798, "step": 7662 }, { "epoch": 0.81, "grad_norm": 2.6946034136889114, "learning_rate": 9.515118765516429e-07, "loss": 0.624, "step": 7663 }, { "epoch": 0.81, "grad_norm": 2.614312745110826, "learning_rate": 9.505119841652688e-07, "loss": 0.6012, "step": 7664 }, { "epoch": 0.81, "grad_norm": 2.2341716999178733, "learning_rate": 9.495125622367079e-07, "loss": 0.699, "step": 7665 }, { "epoch": 0.81, "grad_norm": 2.454492464172891, "learning_rate": 9.485136108820675e-07, "loss": 0.5752, "step": 7666 }, { "epoch": 0.81, "grad_norm": 2.7299044786206266, "learning_rate": 9.475151302174052e-07, "loss": 0.6267, "step": 7667 }, { "epoch": 0.81, "grad_norm": 4.46712121179473, "learning_rate": 9.465171203587192e-07, "loss": 0.69, "step": 7668 }, { "epoch": 0.81, "grad_norm": 2.8581757438763984, "learning_rate": 9.455195814219581e-07, "loss": 0.6329, "step": 7669 }, { "epoch": 0.81, "grad_norm": 2.7749337082880636, "learning_rate": 9.445225135230102e-07, "loss": 0.5863, "step": 7670 }, { "epoch": 0.81, "grad_norm": 2.9200575440191554, "learning_rate": 9.435259167777139e-07, "loss": 0.6594, "step": 7671 }, { "epoch": 0.81, "grad_norm": 3.1961551877877343, "learning_rate": 9.425297913018517e-07, "loss": 0.6262, "step": 7672 }, { "epoch": 0.81, "grad_norm": 5.621987087951769, "learning_rate": 9.41534137211148e-07, "loss": 0.6806, "step": 7673 }, { "epoch": 0.81, "grad_norm": 2.37550460624206, "learning_rate": 9.405389546212779e-07, "loss": 0.4877, "step": 7674 }, { "epoch": 0.81, "grad_norm": 2.1856374832479073, "learning_rate": 9.395442436478558e-07, "loss": 0.6366, "step": 7675 }, { "epoch": 0.81, "grad_norm": 2.3120929072875986, "learning_rate": 9.385500044064472e-07, "loss": 0.5728, "step": 7676 }, { "epoch": 0.81, "grad_norm": 11.267589648078419, "learning_rate": 9.375562370125574e-07, "loss": 0.6151, "step": 7677 }, { "epoch": 0.81, "grad_norm": 3.3911562826517248, "learning_rate": 9.365629415816418e-07, "loss": 0.5745, "step": 7678 }, { "epoch": 0.81, "grad_norm": 2.0931728619715138, "learning_rate": 9.355701182290961e-07, "loss": 0.5957, "step": 7679 }, { "epoch": 0.81, "grad_norm": 2.4056655342238416, "learning_rate": 9.345777670702649e-07, "loss": 0.6034, "step": 7680 }, { "epoch": 0.81, "grad_norm": 3.31101973582715, "learning_rate": 9.335858882204385e-07, "loss": 0.5717, "step": 7681 }, { "epoch": 0.81, "grad_norm": 3.2633888627628336, "learning_rate": 9.325944817948485e-07, "loss": 0.6403, "step": 7682 }, { "epoch": 0.81, "grad_norm": 2.507966888011093, "learning_rate": 9.316035479086727e-07, "loss": 0.6023, "step": 7683 }, { "epoch": 0.81, "grad_norm": 2.6707445301357104, "learning_rate": 9.306130866770364e-07, "loss": 0.694, "step": 7684 }, { "epoch": 0.81, "grad_norm": 2.3698584587195777, "learning_rate": 9.296230982150095e-07, "loss": 0.499, "step": 7685 }, { "epoch": 0.81, "grad_norm": 2.646352948765854, "learning_rate": 9.28633582637603e-07, "loss": 0.5998, "step": 7686 }, { "epoch": 0.81, "grad_norm": 2.3308088558522724, "learning_rate": 9.276445400597795e-07, "loss": 0.6442, "step": 7687 }, { "epoch": 0.81, "grad_norm": 2.7184765380374314, "learning_rate": 9.266559705964401e-07, "loss": 0.7093, "step": 7688 }, { "epoch": 0.81, "grad_norm": 2.623044957668708, "learning_rate": 9.256678743624364e-07, "loss": 0.6417, "step": 7689 }, { "epoch": 0.81, "grad_norm": 5.079400937660165, "learning_rate": 9.246802514725601e-07, "loss": 0.6116, "step": 7690 }, { "epoch": 0.81, "grad_norm": 2.2731708710443503, "learning_rate": 9.236931020415529e-07, "loss": 0.5756, "step": 7691 }, { "epoch": 0.81, "grad_norm": 3.3136227736096924, "learning_rate": 9.227064261840962e-07, "loss": 0.6093, "step": 7692 }, { "epoch": 0.81, "grad_norm": 2.1475285005337974, "learning_rate": 9.21720224014821e-07, "loss": 0.6814, "step": 7693 }, { "epoch": 0.81, "grad_norm": 2.9968897146706133, "learning_rate": 9.207344956483022e-07, "loss": 0.6033, "step": 7694 }, { "epoch": 0.81, "grad_norm": 2.6350679079149866, "learning_rate": 9.197492411990571e-07, "loss": 0.626, "step": 7695 }, { "epoch": 0.81, "grad_norm": 2.72376089543527, "learning_rate": 9.187644607815499e-07, "loss": 0.6423, "step": 7696 }, { "epoch": 0.81, "grad_norm": 2.8530684492710217, "learning_rate": 9.1778015451019e-07, "loss": 0.6458, "step": 7697 }, { "epoch": 0.81, "grad_norm": 2.0539321212137107, "learning_rate": 9.16796322499332e-07, "loss": 0.6108, "step": 7698 }, { "epoch": 0.81, "grad_norm": 2.7055060163420066, "learning_rate": 9.15812964863273e-07, "loss": 0.5557, "step": 7699 }, { "epoch": 0.81, "grad_norm": 2.334028049190322, "learning_rate": 9.148300817162587e-07, "loss": 0.5686, "step": 7700 }, { "epoch": 0.81, "grad_norm": 0.8483172248492421, "learning_rate": 9.138476731724749e-07, "loss": 0.5497, "step": 7701 }, { "epoch": 0.81, "grad_norm": 2.586655116930661, "learning_rate": 9.128657393460583e-07, "loss": 0.5979, "step": 7702 }, { "epoch": 0.81, "grad_norm": 3.314047969656968, "learning_rate": 9.118842803510841e-07, "loss": 0.6292, "step": 7703 }, { "epoch": 0.81, "grad_norm": 2.410214810373217, "learning_rate": 9.10903296301578e-07, "loss": 0.6381, "step": 7704 }, { "epoch": 0.81, "grad_norm": 3.104473643611443, "learning_rate": 9.099227873115047e-07, "loss": 0.6241, "step": 7705 }, { "epoch": 0.81, "grad_norm": 3.4899505415847676, "learning_rate": 9.089427534947792e-07, "loss": 0.6409, "step": 7706 }, { "epoch": 0.81, "grad_norm": 2.1792155269408617, "learning_rate": 9.0796319496526e-07, "loss": 0.6243, "step": 7707 }, { "epoch": 0.81, "grad_norm": 2.648707594215365, "learning_rate": 9.069841118367462e-07, "loss": 0.6185, "step": 7708 }, { "epoch": 0.81, "grad_norm": 2.801759382084992, "learning_rate": 9.060055042229881e-07, "loss": 0.6844, "step": 7709 }, { "epoch": 0.81, "grad_norm": 2.8450393356650685, "learning_rate": 9.050273722376746e-07, "loss": 0.535, "step": 7710 }, { "epoch": 0.81, "grad_norm": 3.237580480646928, "learning_rate": 9.04049715994445e-07, "loss": 0.6118, "step": 7711 }, { "epoch": 0.81, "grad_norm": 4.029884410505219, "learning_rate": 9.030725356068781e-07, "loss": 0.642, "step": 7712 }, { "epoch": 0.81, "grad_norm": 2.5945698031312086, "learning_rate": 9.020958311885019e-07, "loss": 0.6349, "step": 7713 }, { "epoch": 0.81, "grad_norm": 2.455581536260283, "learning_rate": 9.011196028527853e-07, "loss": 0.6013, "step": 7714 }, { "epoch": 0.81, "grad_norm": 0.9942779258867724, "learning_rate": 9.001438507131444e-07, "loss": 0.527, "step": 7715 }, { "epoch": 0.81, "grad_norm": 2.700359868586655, "learning_rate": 8.991685748829404e-07, "loss": 0.6477, "step": 7716 }, { "epoch": 0.81, "grad_norm": 2.104403745631631, "learning_rate": 8.981937754754777e-07, "loss": 0.6163, "step": 7717 }, { "epoch": 0.81, "grad_norm": 2.5972794158820482, "learning_rate": 8.972194526040034e-07, "loss": 0.5271, "step": 7718 }, { "epoch": 0.81, "grad_norm": 2.4719633270189583, "learning_rate": 8.962456063817132e-07, "loss": 0.6623, "step": 7719 }, { "epoch": 0.81, "grad_norm": 0.9667484827218678, "learning_rate": 8.95272236921747e-07, "loss": 0.5675, "step": 7720 }, { "epoch": 0.81, "grad_norm": 2.425564331369107, "learning_rate": 8.942993443371856e-07, "loss": 0.609, "step": 7721 }, { "epoch": 0.81, "grad_norm": 2.947734739772372, "learning_rate": 8.93326928741059e-07, "loss": 0.6371, "step": 7722 }, { "epoch": 0.81, "grad_norm": 2.9080706900479045, "learning_rate": 8.923549902463374e-07, "loss": 0.6628, "step": 7723 }, { "epoch": 0.81, "grad_norm": 3.8252938892041635, "learning_rate": 8.913835289659406e-07, "loss": 0.5489, "step": 7724 }, { "epoch": 0.81, "grad_norm": 2.5458974669966805, "learning_rate": 8.904125450127272e-07, "loss": 0.6616, "step": 7725 }, { "epoch": 0.81, "grad_norm": 2.184591112903663, "learning_rate": 8.894420384995056e-07, "loss": 0.6075, "step": 7726 }, { "epoch": 0.81, "grad_norm": 2.9011418508765647, "learning_rate": 8.884720095390248e-07, "loss": 0.6888, "step": 7727 }, { "epoch": 0.81, "grad_norm": 2.454534274847728, "learning_rate": 8.875024582439801e-07, "loss": 0.6525, "step": 7728 }, { "epoch": 0.81, "grad_norm": 2.3930270904791335, "learning_rate": 8.865333847270135e-07, "loss": 0.6685, "step": 7729 }, { "epoch": 0.81, "grad_norm": 2.5600293308303783, "learning_rate": 8.855647891007075e-07, "loss": 0.5978, "step": 7730 }, { "epoch": 0.81, "grad_norm": 2.437056971277586, "learning_rate": 8.845966714775894e-07, "loss": 0.6665, "step": 7731 }, { "epoch": 0.81, "grad_norm": 21.97578049995487, "learning_rate": 8.836290319701335e-07, "loss": 0.5903, "step": 7732 }, { "epoch": 0.81, "grad_norm": 2.878994513782418, "learning_rate": 8.826618706907585e-07, "loss": 0.6547, "step": 7733 }, { "epoch": 0.81, "grad_norm": 2.6089834334746893, "learning_rate": 8.816951877518243e-07, "loss": 0.6462, "step": 7734 }, { "epoch": 0.81, "grad_norm": 3.5891885718164533, "learning_rate": 8.807289832656396e-07, "loss": 0.6704, "step": 7735 }, { "epoch": 0.81, "grad_norm": 2.53450443321136, "learning_rate": 8.797632573444526e-07, "loss": 0.5234, "step": 7736 }, { "epoch": 0.81, "grad_norm": 2.4367976647015843, "learning_rate": 8.787980101004612e-07, "loss": 0.5515, "step": 7737 }, { "epoch": 0.81, "grad_norm": 3.393038737753617, "learning_rate": 8.77833241645803e-07, "loss": 0.6213, "step": 7738 }, { "epoch": 0.81, "grad_norm": 2.942620580800737, "learning_rate": 8.768689520925638e-07, "loss": 0.5855, "step": 7739 }, { "epoch": 0.81, "grad_norm": 2.2895413377993816, "learning_rate": 8.759051415527697e-07, "loss": 0.6244, "step": 7740 }, { "epoch": 0.81, "grad_norm": 3.1158010305913275, "learning_rate": 8.749418101383944e-07, "loss": 0.5865, "step": 7741 }, { "epoch": 0.81, "grad_norm": 2.6974129522038877, "learning_rate": 8.739789579613572e-07, "loss": 0.5408, "step": 7742 }, { "epoch": 0.81, "grad_norm": 2.895780450687752, "learning_rate": 8.73016585133517e-07, "loss": 0.6004, "step": 7743 }, { "epoch": 0.81, "grad_norm": 3.0182236513579874, "learning_rate": 8.720546917666789e-07, "loss": 0.6118, "step": 7744 }, { "epoch": 0.82, "grad_norm": 2.370869512524687, "learning_rate": 8.710932779725939e-07, "loss": 0.6114, "step": 7745 }, { "epoch": 0.82, "grad_norm": 2.3023012937961846, "learning_rate": 8.701323438629577e-07, "loss": 0.5891, "step": 7746 }, { "epoch": 0.82, "grad_norm": 2.7894697251608, "learning_rate": 8.691718895494067e-07, "loss": 0.5835, "step": 7747 }, { "epoch": 0.82, "grad_norm": 2.2253869804344077, "learning_rate": 8.682119151435258e-07, "loss": 0.657, "step": 7748 }, { "epoch": 0.82, "grad_norm": 2.1968761019330536, "learning_rate": 8.672524207568389e-07, "loss": 0.5731, "step": 7749 }, { "epoch": 0.82, "grad_norm": 2.0451951033529774, "learning_rate": 8.6629340650082e-07, "loss": 0.6249, "step": 7750 }, { "epoch": 0.82, "grad_norm": 5.290396646074176, "learning_rate": 8.653348724868843e-07, "loss": 0.6095, "step": 7751 }, { "epoch": 0.82, "grad_norm": 2.4998002085053828, "learning_rate": 8.643768188263918e-07, "loss": 0.5901, "step": 7752 }, { "epoch": 0.82, "grad_norm": 2.927706275807229, "learning_rate": 8.63419245630644e-07, "loss": 0.6667, "step": 7753 }, { "epoch": 0.82, "grad_norm": 2.514050334470586, "learning_rate": 8.624621530108901e-07, "loss": 0.5498, "step": 7754 }, { "epoch": 0.82, "grad_norm": 2.00269176738163, "learning_rate": 8.615055410783246e-07, "loss": 0.5119, "step": 7755 }, { "epoch": 0.82, "grad_norm": 2.2181227482346895, "learning_rate": 8.605494099440808e-07, "loss": 0.6086, "step": 7756 }, { "epoch": 0.82, "grad_norm": 0.9630978359237016, "learning_rate": 8.595937597192422e-07, "loss": 0.5444, "step": 7757 }, { "epoch": 0.82, "grad_norm": 4.040120236257973, "learning_rate": 8.586385905148304e-07, "loss": 0.5648, "step": 7758 }, { "epoch": 0.82, "grad_norm": 2.7334403291410077, "learning_rate": 8.576839024418165e-07, "loss": 0.6385, "step": 7759 }, { "epoch": 0.82, "grad_norm": 2.2473246156394096, "learning_rate": 8.567296956111121e-07, "loss": 0.5395, "step": 7760 }, { "epoch": 0.82, "grad_norm": 2.5453764969411057, "learning_rate": 8.557759701335755e-07, "loss": 0.6596, "step": 7761 }, { "epoch": 0.82, "grad_norm": 3.755171473082308, "learning_rate": 8.54822726120006e-07, "loss": 0.5935, "step": 7762 }, { "epoch": 0.82, "grad_norm": 2.3621820404124114, "learning_rate": 8.538699636811493e-07, "loss": 0.6247, "step": 7763 }, { "epoch": 0.82, "grad_norm": 2.5573182046892384, "learning_rate": 8.529176829276964e-07, "loss": 0.5769, "step": 7764 }, { "epoch": 0.82, "grad_norm": 2.452128633867755, "learning_rate": 8.519658839702787e-07, "loss": 0.5634, "step": 7765 }, { "epoch": 0.82, "grad_norm": 3.1439091682655964, "learning_rate": 8.51014566919473e-07, "loss": 0.6145, "step": 7766 }, { "epoch": 0.82, "grad_norm": 7.479175976072859, "learning_rate": 8.500637318858018e-07, "loss": 0.6538, "step": 7767 }, { "epoch": 0.82, "grad_norm": 2.430856695338035, "learning_rate": 8.491133789797307e-07, "loss": 0.6279, "step": 7768 }, { "epoch": 0.82, "grad_norm": 2.770347480171204, "learning_rate": 8.481635083116668e-07, "loss": 0.6057, "step": 7769 }, { "epoch": 0.82, "grad_norm": 3.069965846489253, "learning_rate": 8.472141199919664e-07, "loss": 0.5681, "step": 7770 }, { "epoch": 0.82, "grad_norm": 2.9992990174535383, "learning_rate": 8.462652141309242e-07, "loss": 0.6433, "step": 7771 }, { "epoch": 0.82, "grad_norm": 2.79447885278482, "learning_rate": 8.453167908387827e-07, "loss": 0.6942, "step": 7772 }, { "epoch": 0.82, "grad_norm": 2.3694158265158682, "learning_rate": 8.443688502257253e-07, "loss": 0.5695, "step": 7773 }, { "epoch": 0.82, "grad_norm": 2.6521967702602134, "learning_rate": 8.434213924018836e-07, "loss": 0.6175, "step": 7774 }, { "epoch": 0.82, "grad_norm": 6.450142386712109, "learning_rate": 8.424744174773281e-07, "loss": 0.5917, "step": 7775 }, { "epoch": 0.82, "grad_norm": 2.60990893147203, "learning_rate": 8.415279255620762e-07, "loss": 0.7291, "step": 7776 }, { "epoch": 0.82, "grad_norm": 2.3162030741562702, "learning_rate": 8.405819167660906e-07, "loss": 0.5496, "step": 7777 }, { "epoch": 0.82, "grad_norm": 4.454400577973904, "learning_rate": 8.396363911992739e-07, "loss": 0.6948, "step": 7778 }, { "epoch": 0.82, "grad_norm": 2.833872698534407, "learning_rate": 8.386913489714737e-07, "loss": 0.5477, "step": 7779 }, { "epoch": 0.82, "grad_norm": 2.445942443952145, "learning_rate": 8.377467901924835e-07, "loss": 0.5796, "step": 7780 }, { "epoch": 0.82, "grad_norm": 2.537918156805052, "learning_rate": 8.368027149720404e-07, "loss": 0.6614, "step": 7781 }, { "epoch": 0.82, "grad_norm": 2.100483149802904, "learning_rate": 8.358591234198221e-07, "loss": 0.6177, "step": 7782 }, { "epoch": 0.82, "grad_norm": 2.7198326768529277, "learning_rate": 8.34916015645455e-07, "loss": 0.625, "step": 7783 }, { "epoch": 0.82, "grad_norm": 3.8763253745346042, "learning_rate": 8.339733917585041e-07, "loss": 0.504, "step": 7784 }, { "epoch": 0.82, "grad_norm": 4.0339142550402896, "learning_rate": 8.330312518684813e-07, "loss": 0.6544, "step": 7785 }, { "epoch": 0.82, "grad_norm": 3.372358625907128, "learning_rate": 8.320895960848435e-07, "loss": 0.662, "step": 7786 }, { "epoch": 0.82, "grad_norm": 11.12693531528549, "learning_rate": 8.311484245169888e-07, "loss": 0.5841, "step": 7787 }, { "epoch": 0.82, "grad_norm": 3.0258617844542615, "learning_rate": 8.302077372742573e-07, "loss": 0.5996, "step": 7788 }, { "epoch": 0.82, "grad_norm": 2.149848595276636, "learning_rate": 8.292675344659374e-07, "loss": 0.5624, "step": 7789 }, { "epoch": 0.82, "grad_norm": 9.921586434700963, "learning_rate": 8.283278162012604e-07, "loss": 0.6565, "step": 7790 }, { "epoch": 0.82, "grad_norm": 2.223681029796389, "learning_rate": 8.273885825893984e-07, "loss": 0.572, "step": 7791 }, { "epoch": 0.82, "grad_norm": 12.307302243450632, "learning_rate": 8.264498337394683e-07, "loss": 0.7158, "step": 7792 }, { "epoch": 0.82, "grad_norm": 3.185259212276455, "learning_rate": 8.255115697605315e-07, "loss": 0.6205, "step": 7793 }, { "epoch": 0.82, "grad_norm": 2.3742771690682867, "learning_rate": 8.245737907615948e-07, "loss": 0.571, "step": 7794 }, { "epoch": 0.82, "grad_norm": 2.0306399507949875, "learning_rate": 8.236364968516036e-07, "loss": 0.574, "step": 7795 }, { "epoch": 0.82, "grad_norm": 2.95185086622032, "learning_rate": 8.226996881394533e-07, "loss": 0.5969, "step": 7796 }, { "epoch": 0.82, "grad_norm": 2.83828313253317, "learning_rate": 8.217633647339762e-07, "loss": 0.6308, "step": 7797 }, { "epoch": 0.82, "grad_norm": 2.740058288843735, "learning_rate": 8.208275267439536e-07, "loss": 0.6409, "step": 7798 }, { "epoch": 0.82, "grad_norm": 3.205986939831031, "learning_rate": 8.19892174278109e-07, "loss": 0.638, "step": 7799 }, { "epoch": 0.82, "grad_norm": 3.133784418145159, "learning_rate": 8.189573074451084e-07, "loss": 0.5896, "step": 7800 }, { "epoch": 0.82, "grad_norm": 4.651287297043312, "learning_rate": 8.180229263535605e-07, "loss": 0.6102, "step": 7801 }, { "epoch": 0.82, "grad_norm": 2.8083107733624537, "learning_rate": 8.1708903111202e-07, "loss": 0.5911, "step": 7802 }, { "epoch": 0.82, "grad_norm": 2.697503873505146, "learning_rate": 8.161556218289857e-07, "loss": 0.5624, "step": 7803 }, { "epoch": 0.82, "grad_norm": 2.3094841871089065, "learning_rate": 8.15222698612897e-07, "loss": 0.5943, "step": 7804 }, { "epoch": 0.82, "grad_norm": 2.791054703314953, "learning_rate": 8.142902615721371e-07, "loss": 0.5893, "step": 7805 }, { "epoch": 0.82, "grad_norm": 2.2449207941075198, "learning_rate": 8.133583108150345e-07, "loss": 0.5883, "step": 7806 }, { "epoch": 0.82, "grad_norm": 6.264533076641818, "learning_rate": 8.124268464498625e-07, "loss": 0.5802, "step": 7807 }, { "epoch": 0.82, "grad_norm": 2.397265245054088, "learning_rate": 8.114958685848334e-07, "loss": 0.7004, "step": 7808 }, { "epoch": 0.82, "grad_norm": 2.687783198325523, "learning_rate": 8.105653773281074e-07, "loss": 0.633, "step": 7809 }, { "epoch": 0.82, "grad_norm": 4.572173865987845, "learning_rate": 8.096353727877843e-07, "loss": 0.6158, "step": 7810 }, { "epoch": 0.82, "grad_norm": 2.084933298498584, "learning_rate": 8.087058550719107e-07, "loss": 0.6373, "step": 7811 }, { "epoch": 0.82, "grad_norm": 4.117019095691375, "learning_rate": 8.077768242884759e-07, "loss": 0.5337, "step": 7812 }, { "epoch": 0.82, "grad_norm": 2.526067117482907, "learning_rate": 8.068482805454115e-07, "loss": 0.5935, "step": 7813 }, { "epoch": 0.82, "grad_norm": 2.770744913286671, "learning_rate": 8.059202239505915e-07, "loss": 0.6824, "step": 7814 }, { "epoch": 0.82, "grad_norm": 2.3876150794603417, "learning_rate": 8.049926546118359e-07, "loss": 0.6197, "step": 7815 }, { "epoch": 0.82, "grad_norm": 2.301040153613851, "learning_rate": 8.040655726369079e-07, "loss": 0.6231, "step": 7816 }, { "epoch": 0.82, "grad_norm": 2.1640120127578, "learning_rate": 8.031389781335119e-07, "loss": 0.5915, "step": 7817 }, { "epoch": 0.82, "grad_norm": 1.0216851076508051, "learning_rate": 8.022128712092986e-07, "loss": 0.5629, "step": 7818 }, { "epoch": 0.82, "grad_norm": 2.3407168223787758, "learning_rate": 8.012872519718578e-07, "loss": 0.5779, "step": 7819 }, { "epoch": 0.82, "grad_norm": 3.240628782070062, "learning_rate": 8.003621205287271e-07, "loss": 0.6247, "step": 7820 }, { "epoch": 0.82, "grad_norm": 2.5968226751951202, "learning_rate": 7.994374769873864e-07, "loss": 0.5631, "step": 7821 }, { "epoch": 0.82, "grad_norm": 2.9225493166351844, "learning_rate": 7.98513321455257e-07, "loss": 0.5619, "step": 7822 }, { "epoch": 0.82, "grad_norm": 2.5510204032518633, "learning_rate": 7.975896540397038e-07, "loss": 0.5647, "step": 7823 }, { "epoch": 0.82, "grad_norm": 3.2618088874821436, "learning_rate": 7.966664748480362e-07, "loss": 0.681, "step": 7824 }, { "epoch": 0.82, "grad_norm": 2.432627062148813, "learning_rate": 7.957437839875088e-07, "loss": 0.619, "step": 7825 }, { "epoch": 0.82, "grad_norm": 2.881899570004556, "learning_rate": 7.948215815653149e-07, "loss": 0.5353, "step": 7826 }, { "epoch": 0.82, "grad_norm": 2.211030969729491, "learning_rate": 7.938998676885922e-07, "loss": 0.5992, "step": 7827 }, { "epoch": 0.82, "grad_norm": 2.7189882982651374, "learning_rate": 7.929786424644248e-07, "loss": 0.5802, "step": 7828 }, { "epoch": 0.82, "grad_norm": 2.6023452660932165, "learning_rate": 7.920579059998384e-07, "loss": 0.575, "step": 7829 }, { "epoch": 0.82, "grad_norm": 2.417297217989513, "learning_rate": 7.911376584017993e-07, "loss": 0.5887, "step": 7830 }, { "epoch": 0.82, "grad_norm": 2.636618821203334, "learning_rate": 7.90217899777222e-07, "loss": 0.5957, "step": 7831 }, { "epoch": 0.82, "grad_norm": 2.783745505915478, "learning_rate": 7.89298630232958e-07, "loss": 0.6066, "step": 7832 }, { "epoch": 0.82, "grad_norm": 2.211994261833092, "learning_rate": 7.883798498758077e-07, "loss": 0.5615, "step": 7833 }, { "epoch": 0.82, "grad_norm": 2.316992802552742, "learning_rate": 7.874615588125128e-07, "loss": 0.5865, "step": 7834 }, { "epoch": 0.82, "grad_norm": 2.540384973156056, "learning_rate": 7.865437571497569e-07, "loss": 0.5444, "step": 7835 }, { "epoch": 0.82, "grad_norm": 2.2642580877781904, "learning_rate": 7.856264449941664e-07, "loss": 0.5456, "step": 7836 }, { "epoch": 0.82, "grad_norm": 2.5525408933985214, "learning_rate": 7.847096224523132e-07, "loss": 0.607, "step": 7837 }, { "epoch": 0.82, "grad_norm": 2.3245711806217613, "learning_rate": 7.837932896307116e-07, "loss": 0.5573, "step": 7838 }, { "epoch": 0.82, "grad_norm": 3.05579417550974, "learning_rate": 7.82877446635818e-07, "loss": 0.5761, "step": 7839 }, { "epoch": 0.83, "grad_norm": 2.5025364694067136, "learning_rate": 7.819620935740313e-07, "loss": 0.5728, "step": 7840 }, { "epoch": 0.83, "grad_norm": 2.541642302021767, "learning_rate": 7.810472305516947e-07, "loss": 0.6021, "step": 7841 }, { "epoch": 0.83, "grad_norm": 2.6527868881900747, "learning_rate": 7.801328576750971e-07, "loss": 0.5955, "step": 7842 }, { "epoch": 0.83, "grad_norm": 3.035882397492675, "learning_rate": 7.792189750504642e-07, "loss": 0.559, "step": 7843 }, { "epoch": 0.83, "grad_norm": 3.4225984060139196, "learning_rate": 7.783055827839709e-07, "loss": 0.594, "step": 7844 }, { "epoch": 0.83, "grad_norm": 2.435724626217545, "learning_rate": 7.7739268098173e-07, "loss": 0.593, "step": 7845 }, { "epoch": 0.83, "grad_norm": 2.4904962113373412, "learning_rate": 7.764802697498009e-07, "loss": 0.615, "step": 7846 }, { "epoch": 0.83, "grad_norm": 2.464037403138296, "learning_rate": 7.755683491941867e-07, "loss": 0.7109, "step": 7847 }, { "epoch": 0.83, "grad_norm": 2.9748316199128824, "learning_rate": 7.746569194208298e-07, "loss": 0.6551, "step": 7848 }, { "epoch": 0.83, "grad_norm": 2.2358191510721923, "learning_rate": 7.737459805356168e-07, "loss": 0.5977, "step": 7849 }, { "epoch": 0.83, "grad_norm": 2.419490109419338, "learning_rate": 7.728355326443792e-07, "loss": 0.5004, "step": 7850 }, { "epoch": 0.83, "grad_norm": 2.911759699027262, "learning_rate": 7.719255758528904e-07, "loss": 0.604, "step": 7851 }, { "epoch": 0.83, "grad_norm": 2.859440106403186, "learning_rate": 7.710161102668667e-07, "loss": 0.5881, "step": 7852 }, { "epoch": 0.83, "grad_norm": 3.133615283383733, "learning_rate": 7.701071359919654e-07, "loss": 0.5945, "step": 7853 }, { "epoch": 0.83, "grad_norm": 2.754614469699022, "learning_rate": 7.691986531337891e-07, "loss": 0.5677, "step": 7854 }, { "epoch": 0.83, "grad_norm": 2.7443708202298573, "learning_rate": 7.682906617978836e-07, "loss": 0.6404, "step": 7855 }, { "epoch": 0.83, "grad_norm": 3.087205618369263, "learning_rate": 7.673831620897376e-07, "loss": 0.6038, "step": 7856 }, { "epoch": 0.83, "grad_norm": 5.5096389980291365, "learning_rate": 7.664761541147803e-07, "loss": 0.6013, "step": 7857 }, { "epoch": 0.83, "grad_norm": 2.4275211367126226, "learning_rate": 7.65569637978385e-07, "loss": 0.6306, "step": 7858 }, { "epoch": 0.83, "grad_norm": 3.0724412487922104, "learning_rate": 7.646636137858682e-07, "loss": 0.5641, "step": 7859 }, { "epoch": 0.83, "grad_norm": 2.6908765435507656, "learning_rate": 7.637580816424906e-07, "loss": 0.5556, "step": 7860 }, { "epoch": 0.83, "grad_norm": 2.2336816700244158, "learning_rate": 7.628530416534536e-07, "loss": 0.6658, "step": 7861 }, { "epoch": 0.83, "grad_norm": 5.534474436786471, "learning_rate": 7.619484939239008e-07, "loss": 0.5065, "step": 7862 }, { "epoch": 0.83, "grad_norm": 2.25517760727134, "learning_rate": 7.610444385589206e-07, "loss": 0.5669, "step": 7863 }, { "epoch": 0.83, "grad_norm": 2.4664200875331077, "learning_rate": 7.601408756635454e-07, "loss": 0.5449, "step": 7864 }, { "epoch": 0.83, "grad_norm": 2.3983166993158402, "learning_rate": 7.592378053427463e-07, "loss": 0.631, "step": 7865 }, { "epoch": 0.83, "grad_norm": 2.44965513205932, "learning_rate": 7.583352277014405e-07, "loss": 0.6007, "step": 7866 }, { "epoch": 0.83, "grad_norm": 2.783794296236303, "learning_rate": 7.574331428444859e-07, "loss": 0.6112, "step": 7867 }, { "epoch": 0.83, "grad_norm": 7.858796558481055, "learning_rate": 7.565315508766846e-07, "loss": 0.6879, "step": 7868 }, { "epoch": 0.83, "grad_norm": 2.3392675349173246, "learning_rate": 7.556304519027824e-07, "loss": 0.535, "step": 7869 }, { "epoch": 0.83, "grad_norm": 2.345381147919508, "learning_rate": 7.547298460274655e-07, "loss": 0.5671, "step": 7870 }, { "epoch": 0.83, "grad_norm": 1.960829913288072, "learning_rate": 7.538297333553613e-07, "loss": 0.5243, "step": 7871 }, { "epoch": 0.83, "grad_norm": 2.4386836992879974, "learning_rate": 7.529301139910444e-07, "loss": 0.5946, "step": 7872 }, { "epoch": 0.83, "grad_norm": 2.1914556637515674, "learning_rate": 7.520309880390314e-07, "loss": 0.5832, "step": 7873 }, { "epoch": 0.83, "grad_norm": 4.241007701198296, "learning_rate": 7.51132355603778e-07, "loss": 0.5416, "step": 7874 }, { "epoch": 0.83, "grad_norm": 2.0724515989906513, "learning_rate": 7.502342167896847e-07, "loss": 0.5964, "step": 7875 }, { "epoch": 0.83, "grad_norm": 2.5100584726157886, "learning_rate": 7.493365717010947e-07, "loss": 0.631, "step": 7876 }, { "epoch": 0.83, "grad_norm": 2.7356056595469536, "learning_rate": 7.484394204422962e-07, "loss": 0.5424, "step": 7877 }, { "epoch": 0.83, "grad_norm": 2.0885569649811786, "learning_rate": 7.475427631175141e-07, "loss": 0.5181, "step": 7878 }, { "epoch": 0.83, "grad_norm": 2.304547742686197, "learning_rate": 7.466465998309225e-07, "loss": 0.6151, "step": 7879 }, { "epoch": 0.83, "grad_norm": 2.2979784331851913, "learning_rate": 7.457509306866329e-07, "loss": 0.569, "step": 7880 }, { "epoch": 0.83, "grad_norm": 3.0618039183537324, "learning_rate": 7.448557557887021e-07, "loss": 0.586, "step": 7881 }, { "epoch": 0.83, "grad_norm": 3.5231994277364835, "learning_rate": 7.439610752411303e-07, "loss": 0.6385, "step": 7882 }, { "epoch": 0.83, "grad_norm": 2.5269036735363617, "learning_rate": 7.430668891478576e-07, "loss": 0.5901, "step": 7883 }, { "epoch": 0.83, "grad_norm": 2.3674099325580866, "learning_rate": 7.421731976127672e-07, "loss": 0.7775, "step": 7884 }, { "epoch": 0.83, "grad_norm": 2.671376901489743, "learning_rate": 7.41280000739687e-07, "loss": 0.5261, "step": 7885 }, { "epoch": 0.83, "grad_norm": 2.820411629914039, "learning_rate": 7.403872986323862e-07, "loss": 0.569, "step": 7886 }, { "epoch": 0.83, "grad_norm": 2.577175781646432, "learning_rate": 7.394950913945759e-07, "loss": 0.5512, "step": 7887 }, { "epoch": 0.83, "grad_norm": 2.4642031464494174, "learning_rate": 7.386033791299091e-07, "loss": 0.6127, "step": 7888 }, { "epoch": 0.83, "grad_norm": 2.434672247809207, "learning_rate": 7.37712161941983e-07, "loss": 0.6554, "step": 7889 }, { "epoch": 0.83, "grad_norm": 2.5103511043710465, "learning_rate": 7.368214399343371e-07, "loss": 0.5331, "step": 7890 }, { "epoch": 0.83, "grad_norm": 2.7655999882645057, "learning_rate": 7.35931213210454e-07, "loss": 0.7108, "step": 7891 }, { "epoch": 0.83, "grad_norm": 3.2183982961814324, "learning_rate": 7.350414818737562e-07, "loss": 0.5502, "step": 7892 }, { "epoch": 0.83, "grad_norm": 3.586664475078197, "learning_rate": 7.34152246027609e-07, "loss": 0.5924, "step": 7893 }, { "epoch": 0.83, "grad_norm": 2.366201926305009, "learning_rate": 7.332635057753224e-07, "loss": 0.6236, "step": 7894 }, { "epoch": 0.83, "grad_norm": 2.3175961493215635, "learning_rate": 7.323752612201491e-07, "loss": 0.5758, "step": 7895 }, { "epoch": 0.83, "grad_norm": 16.41576198449689, "learning_rate": 7.314875124652815e-07, "loss": 0.6657, "step": 7896 }, { "epoch": 0.83, "grad_norm": 3.516520516533135, "learning_rate": 7.306002596138551e-07, "loss": 0.6438, "step": 7897 }, { "epoch": 0.83, "grad_norm": 2.9227517959801372, "learning_rate": 7.297135027689484e-07, "loss": 0.5942, "step": 7898 }, { "epoch": 0.83, "grad_norm": 2.6069484447619486, "learning_rate": 7.288272420335841e-07, "loss": 0.5786, "step": 7899 }, { "epoch": 0.83, "grad_norm": 2.5621275414127007, "learning_rate": 7.279414775107241e-07, "loss": 0.6239, "step": 7900 }, { "epoch": 0.83, "grad_norm": 2.5478933874139176, "learning_rate": 7.270562093032724e-07, "loss": 0.6154, "step": 7901 }, { "epoch": 0.83, "grad_norm": 2.5146219884970686, "learning_rate": 7.261714375140788e-07, "loss": 0.6525, "step": 7902 }, { "epoch": 0.83, "grad_norm": 4.26095835769191, "learning_rate": 7.252871622459335e-07, "loss": 0.6109, "step": 7903 }, { "epoch": 0.83, "grad_norm": 2.7915685895483926, "learning_rate": 7.244033836015696e-07, "loss": 0.7238, "step": 7904 }, { "epoch": 0.83, "grad_norm": 2.44562542266984, "learning_rate": 7.235201016836613e-07, "loss": 0.5962, "step": 7905 }, { "epoch": 0.83, "grad_norm": 4.6869459471637755, "learning_rate": 7.226373165948241e-07, "loss": 0.5836, "step": 7906 }, { "epoch": 0.83, "grad_norm": 4.282445234624132, "learning_rate": 7.21755028437619e-07, "loss": 0.6254, "step": 7907 }, { "epoch": 0.83, "grad_norm": 4.521493455628609, "learning_rate": 7.208732373145483e-07, "loss": 0.6589, "step": 7908 }, { "epoch": 0.83, "grad_norm": 2.232695529398515, "learning_rate": 7.199919433280555e-07, "loss": 0.6094, "step": 7909 }, { "epoch": 0.83, "grad_norm": 3.962644679645622, "learning_rate": 7.191111465805256e-07, "loss": 0.5981, "step": 7910 }, { "epoch": 0.83, "grad_norm": 2.4471859782636165, "learning_rate": 7.182308471742877e-07, "loss": 0.5816, "step": 7911 }, { "epoch": 0.83, "grad_norm": 2.702379481712408, "learning_rate": 7.173510452116139e-07, "loss": 0.68, "step": 7912 }, { "epoch": 0.83, "grad_norm": 1.0632897742865417, "learning_rate": 7.164717407947142e-07, "loss": 0.5708, "step": 7913 }, { "epoch": 0.83, "grad_norm": 2.423696780817971, "learning_rate": 7.155929340257467e-07, "loss": 0.5654, "step": 7914 }, { "epoch": 0.83, "grad_norm": 2.0545652099506952, "learning_rate": 7.14714625006806e-07, "loss": 0.549, "step": 7915 }, { "epoch": 0.83, "grad_norm": 4.240068457011979, "learning_rate": 7.138368138399327e-07, "loss": 0.575, "step": 7916 }, { "epoch": 0.83, "grad_norm": 2.354272324791377, "learning_rate": 7.129595006271095e-07, "loss": 0.6664, "step": 7917 }, { "epoch": 0.83, "grad_norm": 3.1167965022837523, "learning_rate": 7.120826854702589e-07, "loss": 0.5909, "step": 7918 }, { "epoch": 0.83, "grad_norm": 2.8860947312539493, "learning_rate": 7.112063684712456e-07, "loss": 0.5764, "step": 7919 }, { "epoch": 0.83, "grad_norm": 2.5639149644592307, "learning_rate": 7.103305497318786e-07, "loss": 0.5729, "step": 7920 }, { "epoch": 0.83, "grad_norm": 4.157567164040736, "learning_rate": 7.094552293539098e-07, "loss": 0.6691, "step": 7921 }, { "epoch": 0.83, "grad_norm": 3.017279987323207, "learning_rate": 7.08580407439029e-07, "loss": 0.7188, "step": 7922 }, { "epoch": 0.83, "grad_norm": 3.180206068529602, "learning_rate": 7.077060840888705e-07, "loss": 0.5835, "step": 7923 }, { "epoch": 0.83, "grad_norm": 3.2183678021512128, "learning_rate": 7.068322594050114e-07, "loss": 0.5853, "step": 7924 }, { "epoch": 0.83, "grad_norm": 2.730991122722091, "learning_rate": 7.059589334889705e-07, "loss": 0.5394, "step": 7925 }, { "epoch": 0.83, "grad_norm": 2.7372841955338085, "learning_rate": 7.050861064422087e-07, "loss": 0.6588, "step": 7926 }, { "epoch": 0.83, "grad_norm": 2.8242014928415333, "learning_rate": 7.042137783661273e-07, "loss": 0.6495, "step": 7927 }, { "epoch": 0.83, "grad_norm": 3.3717042988457577, "learning_rate": 7.033419493620708e-07, "loss": 0.6167, "step": 7928 }, { "epoch": 0.83, "grad_norm": 0.9434034854373674, "learning_rate": 7.024706195313258e-07, "loss": 0.5459, "step": 7929 }, { "epoch": 0.83, "grad_norm": 0.948265264022775, "learning_rate": 7.015997889751225e-07, "loss": 0.5389, "step": 7930 }, { "epoch": 0.83, "grad_norm": 2.464153456017488, "learning_rate": 7.007294577946306e-07, "loss": 0.5651, "step": 7931 }, { "epoch": 0.83, "grad_norm": 3.0196234432409446, "learning_rate": 6.998596260909607e-07, "loss": 0.6672, "step": 7932 }, { "epoch": 0.83, "grad_norm": 2.2187036742506003, "learning_rate": 6.989902939651694e-07, "loss": 0.6165, "step": 7933 }, { "epoch": 0.83, "grad_norm": 2.4128874884249774, "learning_rate": 6.981214615182541e-07, "loss": 0.5248, "step": 7934 }, { "epoch": 0.83, "grad_norm": 2.714711543409674, "learning_rate": 6.972531288511514e-07, "loss": 0.6234, "step": 7935 }, { "epoch": 0.84, "grad_norm": 2.566568280987358, "learning_rate": 6.963852960647416e-07, "loss": 0.6142, "step": 7936 }, { "epoch": 0.84, "grad_norm": 2.5912361052965105, "learning_rate": 6.955179632598475e-07, "loss": 0.6019, "step": 7937 }, { "epoch": 0.84, "grad_norm": 3.0046865938581657, "learning_rate": 6.946511305372327e-07, "loss": 0.6551, "step": 7938 }, { "epoch": 0.84, "grad_norm": 2.62750432152481, "learning_rate": 6.937847979976059e-07, "loss": 0.7307, "step": 7939 }, { "epoch": 0.84, "grad_norm": 4.463425594035302, "learning_rate": 6.929189657416136e-07, "loss": 0.6263, "step": 7940 }, { "epoch": 0.84, "grad_norm": 2.293961976197948, "learning_rate": 6.920536338698436e-07, "loss": 0.7305, "step": 7941 }, { "epoch": 0.84, "grad_norm": 3.245455408558447, "learning_rate": 6.911888024828295e-07, "loss": 0.6554, "step": 7942 }, { "epoch": 0.84, "grad_norm": 2.0647694096526474, "learning_rate": 6.903244716810459e-07, "loss": 0.5978, "step": 7943 }, { "epoch": 0.84, "grad_norm": 2.2770572793711135, "learning_rate": 6.894606415649074e-07, "loss": 0.6284, "step": 7944 }, { "epoch": 0.84, "grad_norm": 2.590717718772522, "learning_rate": 6.8859731223477e-07, "loss": 0.6585, "step": 7945 }, { "epoch": 0.84, "grad_norm": 2.1415058085079095, "learning_rate": 6.877344837909334e-07, "loss": 0.6856, "step": 7946 }, { "epoch": 0.84, "grad_norm": 2.5987535704327875, "learning_rate": 6.868721563336406e-07, "loss": 0.6328, "step": 7947 }, { "epoch": 0.84, "grad_norm": 2.5233039406914495, "learning_rate": 6.860103299630722e-07, "loss": 0.6124, "step": 7948 }, { "epoch": 0.84, "grad_norm": 2.524754669248327, "learning_rate": 6.851490047793524e-07, "loss": 0.6072, "step": 7949 }, { "epoch": 0.84, "grad_norm": 2.540427405947607, "learning_rate": 6.84288180882548e-07, "loss": 0.6113, "step": 7950 }, { "epoch": 0.84, "grad_norm": 0.9908048969702398, "learning_rate": 6.834278583726677e-07, "loss": 0.532, "step": 7951 }, { "epoch": 0.84, "grad_norm": 3.2038346124338073, "learning_rate": 6.825680373496618e-07, "loss": 0.5439, "step": 7952 }, { "epoch": 0.84, "grad_norm": 0.9377925721646436, "learning_rate": 6.817087179134208e-07, "loss": 0.5117, "step": 7953 }, { "epoch": 0.84, "grad_norm": 3.407003844113903, "learning_rate": 6.80849900163777e-07, "loss": 0.5797, "step": 7954 }, { "epoch": 0.84, "grad_norm": 2.2839557052539923, "learning_rate": 6.799915842005062e-07, "loss": 0.5703, "step": 7955 }, { "epoch": 0.84, "grad_norm": 2.3947698210437935, "learning_rate": 6.791337701233269e-07, "loss": 0.5937, "step": 7956 }, { "epoch": 0.84, "grad_norm": 2.6739236325422904, "learning_rate": 6.782764580318951e-07, "loss": 0.7199, "step": 7957 }, { "epoch": 0.84, "grad_norm": 2.1116863967686004, "learning_rate": 6.774196480258111e-07, "loss": 0.6494, "step": 7958 }, { "epoch": 0.84, "grad_norm": 2.1683016397788997, "learning_rate": 6.765633402046168e-07, "loss": 0.5955, "step": 7959 }, { "epoch": 0.84, "grad_norm": 3.032305057994704, "learning_rate": 6.757075346677961e-07, "loss": 0.603, "step": 7960 }, { "epoch": 0.84, "grad_norm": 2.2604613175721164, "learning_rate": 6.748522315147744e-07, "loss": 0.6187, "step": 7961 }, { "epoch": 0.84, "grad_norm": 4.969636492342722, "learning_rate": 6.739974308449176e-07, "loss": 0.6081, "step": 7962 }, { "epoch": 0.84, "grad_norm": 3.0339603599056373, "learning_rate": 6.731431327575339e-07, "loss": 0.6901, "step": 7963 }, { "epoch": 0.84, "grad_norm": 3.2703468846615533, "learning_rate": 6.722893373518724e-07, "loss": 0.5622, "step": 7964 }, { "epoch": 0.84, "grad_norm": 2.4646080011429485, "learning_rate": 6.714360447271273e-07, "loss": 0.522, "step": 7965 }, { "epoch": 0.84, "grad_norm": 2.364242035976487, "learning_rate": 6.705832549824293e-07, "loss": 0.6273, "step": 7966 }, { "epoch": 0.84, "grad_norm": 2.7352098181866817, "learning_rate": 6.69730968216853e-07, "loss": 0.6764, "step": 7967 }, { "epoch": 0.84, "grad_norm": 4.616311936586499, "learning_rate": 6.688791845294151e-07, "loss": 0.6525, "step": 7968 }, { "epoch": 0.84, "grad_norm": 2.4255233825081826, "learning_rate": 6.680279040190745e-07, "loss": 0.6588, "step": 7969 }, { "epoch": 0.84, "grad_norm": 2.5378364716116764, "learning_rate": 6.671771267847299e-07, "loss": 0.5982, "step": 7970 }, { "epoch": 0.84, "grad_norm": 2.323293588729379, "learning_rate": 6.663268529252209e-07, "loss": 0.6346, "step": 7971 }, { "epoch": 0.84, "grad_norm": 2.164418560400295, "learning_rate": 6.654770825393303e-07, "loss": 0.6584, "step": 7972 }, { "epoch": 0.84, "grad_norm": 2.9789326506768754, "learning_rate": 6.646278157257824e-07, "loss": 0.5408, "step": 7973 }, { "epoch": 0.84, "grad_norm": 2.4473343292404257, "learning_rate": 6.637790525832438e-07, "loss": 0.6776, "step": 7974 }, { "epoch": 0.84, "grad_norm": 4.333751171962327, "learning_rate": 6.629307932103201e-07, "loss": 0.6228, "step": 7975 }, { "epoch": 0.84, "grad_norm": 4.194575988760013, "learning_rate": 6.620830377055587e-07, "loss": 0.5451, "step": 7976 }, { "epoch": 0.84, "grad_norm": 2.1276202217427196, "learning_rate": 6.612357861674501e-07, "loss": 0.5791, "step": 7977 }, { "epoch": 0.84, "grad_norm": 2.5647125903291776, "learning_rate": 6.603890386944273e-07, "loss": 0.5679, "step": 7978 }, { "epoch": 0.84, "grad_norm": 2.3644218883143338, "learning_rate": 6.59542795384861e-07, "loss": 0.5646, "step": 7979 }, { "epoch": 0.84, "grad_norm": 3.3454428465301946, "learning_rate": 6.586970563370649e-07, "loss": 0.6593, "step": 7980 }, { "epoch": 0.84, "grad_norm": 3.2957829519224, "learning_rate": 6.578518216492951e-07, "loss": 0.6074, "step": 7981 }, { "epoch": 0.84, "grad_norm": 2.989238602602019, "learning_rate": 6.570070914197496e-07, "loss": 0.6612, "step": 7982 }, { "epoch": 0.84, "grad_norm": 2.6361300624581046, "learning_rate": 6.561628657465663e-07, "loss": 0.6422, "step": 7983 }, { "epoch": 0.84, "grad_norm": 2.3784169240024085, "learning_rate": 6.553191447278234e-07, "loss": 0.6598, "step": 7984 }, { "epoch": 0.84, "grad_norm": 4.116665012500234, "learning_rate": 6.544759284615431e-07, "loss": 0.5628, "step": 7985 }, { "epoch": 0.84, "grad_norm": 4.866416810480354, "learning_rate": 6.536332170456877e-07, "loss": 0.5985, "step": 7986 }, { "epoch": 0.84, "grad_norm": 2.122808635192976, "learning_rate": 6.527910105781626e-07, "loss": 0.5883, "step": 7987 }, { "epoch": 0.84, "grad_norm": 2.9397977174918566, "learning_rate": 6.519493091568108e-07, "loss": 0.6253, "step": 7988 }, { "epoch": 0.84, "grad_norm": 5.2026307795457765, "learning_rate": 6.511081128794183e-07, "loss": 0.6072, "step": 7989 }, { "epoch": 0.84, "grad_norm": 2.391514496115847, "learning_rate": 6.502674218437144e-07, "loss": 0.5603, "step": 7990 }, { "epoch": 0.84, "grad_norm": 3.156056886121653, "learning_rate": 6.494272361473681e-07, "loss": 0.6524, "step": 7991 }, { "epoch": 0.84, "grad_norm": 3.732431840720779, "learning_rate": 6.485875558879895e-07, "loss": 0.6481, "step": 7992 }, { "epoch": 0.84, "grad_norm": 1.9873437823454094, "learning_rate": 6.477483811631291e-07, "loss": 0.5987, "step": 7993 }, { "epoch": 0.84, "grad_norm": 2.613588104761939, "learning_rate": 6.469097120702805e-07, "loss": 0.6542, "step": 7994 }, { "epoch": 0.84, "grad_norm": 2.288884381877685, "learning_rate": 6.460715487068781e-07, "loss": 0.5694, "step": 7995 }, { "epoch": 0.84, "grad_norm": 3.0335170876836783, "learning_rate": 6.452338911702994e-07, "loss": 0.7001, "step": 7996 }, { "epoch": 0.84, "grad_norm": 2.7744780023995363, "learning_rate": 6.443967395578565e-07, "loss": 0.6253, "step": 7997 }, { "epoch": 0.84, "grad_norm": 2.6347984361148904, "learning_rate": 6.435600939668096e-07, "loss": 0.592, "step": 7998 }, { "epoch": 0.84, "grad_norm": 3.011072253449798, "learning_rate": 6.42723954494358e-07, "loss": 0.6553, "step": 7999 }, { "epoch": 0.84, "grad_norm": 3.0584864481962364, "learning_rate": 6.418883212376431e-07, "loss": 0.5839, "step": 8000 }, { "epoch": 0.84, "grad_norm": 2.296221820496963, "learning_rate": 6.410531942937448e-07, "loss": 0.5928, "step": 8001 }, { "epoch": 0.84, "grad_norm": 2.406097985573007, "learning_rate": 6.402185737596844e-07, "loss": 0.6986, "step": 8002 }, { "epoch": 0.84, "grad_norm": 2.2589114977154625, "learning_rate": 6.393844597324278e-07, "loss": 0.5709, "step": 8003 }, { "epoch": 0.84, "grad_norm": 2.7836938958633284, "learning_rate": 6.385508523088801e-07, "loss": 0.5831, "step": 8004 }, { "epoch": 0.84, "grad_norm": 2.6551153221400194, "learning_rate": 6.377177515858874e-07, "loss": 0.6167, "step": 8005 }, { "epoch": 0.84, "grad_norm": 2.6555647926872816, "learning_rate": 6.368851576602347e-07, "loss": 0.5478, "step": 8006 }, { "epoch": 0.84, "grad_norm": 3.0795265737705124, "learning_rate": 6.360530706286516e-07, "loss": 0.6234, "step": 8007 }, { "epoch": 0.84, "grad_norm": 5.01122414638164, "learning_rate": 6.352214905878085e-07, "loss": 0.5999, "step": 8008 }, { "epoch": 0.84, "grad_norm": 2.8498907965644706, "learning_rate": 6.343904176343169e-07, "loss": 0.6705, "step": 8009 }, { "epoch": 0.84, "grad_norm": 2.4166442373218096, "learning_rate": 6.335598518647251e-07, "loss": 0.5388, "step": 8010 }, { "epoch": 0.84, "grad_norm": 2.6880105299523067, "learning_rate": 6.327297933755272e-07, "loss": 0.5941, "step": 8011 }, { "epoch": 0.84, "grad_norm": 3.846835922064916, "learning_rate": 6.319002422631582e-07, "loss": 0.6664, "step": 8012 }, { "epoch": 0.84, "grad_norm": 3.031689173749022, "learning_rate": 6.310711986239926e-07, "loss": 0.5696, "step": 8013 }, { "epoch": 0.84, "grad_norm": 2.73606226733396, "learning_rate": 6.302426625543457e-07, "loss": 0.5136, "step": 8014 }, { "epoch": 0.84, "grad_norm": 3.9278610126869204, "learning_rate": 6.294146341504742e-07, "loss": 0.6988, "step": 8015 }, { "epoch": 0.84, "grad_norm": 2.6587658045083002, "learning_rate": 6.285871135085758e-07, "loss": 0.586, "step": 8016 }, { "epoch": 0.84, "grad_norm": 2.5101848696418028, "learning_rate": 6.277601007247913e-07, "loss": 0.6072, "step": 8017 }, { "epoch": 0.84, "grad_norm": 3.094591388241789, "learning_rate": 6.269335958951995e-07, "loss": 0.6101, "step": 8018 }, { "epoch": 0.84, "grad_norm": 2.1190721327022017, "learning_rate": 6.2610759911582e-07, "loss": 0.5325, "step": 8019 }, { "epoch": 0.84, "grad_norm": 2.394049016837729, "learning_rate": 6.252821104826163e-07, "loss": 0.6149, "step": 8020 }, { "epoch": 0.84, "grad_norm": 2.185048534616482, "learning_rate": 6.244571300914909e-07, "loss": 0.5355, "step": 8021 }, { "epoch": 0.84, "grad_norm": 0.9946377288465473, "learning_rate": 6.23632658038289e-07, "loss": 0.5187, "step": 8022 }, { "epoch": 0.84, "grad_norm": 3.8433532638524928, "learning_rate": 6.228086944187939e-07, "loss": 0.6301, "step": 8023 }, { "epoch": 0.84, "grad_norm": 3.239562343118617, "learning_rate": 6.219852393287302e-07, "loss": 0.6108, "step": 8024 }, { "epoch": 0.84, "grad_norm": 3.19755867950319, "learning_rate": 6.211622928637662e-07, "loss": 0.5238, "step": 8025 }, { "epoch": 0.84, "grad_norm": 2.1376360439152657, "learning_rate": 6.2033985511951e-07, "loss": 0.6205, "step": 8026 }, { "epoch": 0.84, "grad_norm": 2.563081310031987, "learning_rate": 6.19517926191509e-07, "loss": 0.6733, "step": 8027 }, { "epoch": 0.84, "grad_norm": 3.266713807274647, "learning_rate": 6.186965061752515e-07, "loss": 0.5803, "step": 8028 }, { "epoch": 0.84, "grad_norm": 2.33107338075643, "learning_rate": 6.178755951661692e-07, "loss": 0.542, "step": 8029 }, { "epoch": 0.84, "grad_norm": 2.1249280857293966, "learning_rate": 6.170551932596336e-07, "loss": 0.6044, "step": 8030 }, { "epoch": 0.85, "grad_norm": 3.222877055640638, "learning_rate": 6.162353005509558e-07, "loss": 0.5909, "step": 8031 }, { "epoch": 0.85, "grad_norm": 2.466145340026216, "learning_rate": 6.154159171353879e-07, "loss": 0.6029, "step": 8032 }, { "epoch": 0.85, "grad_norm": 2.5315597654614805, "learning_rate": 6.145970431081238e-07, "loss": 0.5896, "step": 8033 }, { "epoch": 0.85, "grad_norm": 2.769252012832659, "learning_rate": 6.137786785642985e-07, "loss": 0.6102, "step": 8034 }, { "epoch": 0.85, "grad_norm": 2.4509615775141977, "learning_rate": 6.129608235989881e-07, "loss": 0.5448, "step": 8035 }, { "epoch": 0.85, "grad_norm": 2.141998734885848, "learning_rate": 6.121434783072077e-07, "loss": 0.6219, "step": 8036 }, { "epoch": 0.85, "grad_norm": 2.090304082790666, "learning_rate": 6.113266427839126e-07, "loss": 0.5533, "step": 8037 }, { "epoch": 0.85, "grad_norm": 2.290451018341766, "learning_rate": 6.105103171240018e-07, "loss": 0.6181, "step": 8038 }, { "epoch": 0.85, "grad_norm": 5.270211096142163, "learning_rate": 6.096945014223149e-07, "loss": 0.614, "step": 8039 }, { "epoch": 0.85, "grad_norm": 2.9037913388153433, "learning_rate": 6.088791957736301e-07, "loss": 0.6391, "step": 8040 }, { "epoch": 0.85, "grad_norm": 2.3997821521809874, "learning_rate": 6.080644002726655e-07, "loss": 0.6159, "step": 8041 }, { "epoch": 0.85, "grad_norm": 2.436406794174699, "learning_rate": 6.072501150140824e-07, "loss": 0.6379, "step": 8042 }, { "epoch": 0.85, "grad_norm": 3.1984050878329087, "learning_rate": 6.064363400924839e-07, "loss": 0.6351, "step": 8043 }, { "epoch": 0.85, "grad_norm": 2.9565521370159096, "learning_rate": 6.056230756024123e-07, "loss": 0.5821, "step": 8044 }, { "epoch": 0.85, "grad_norm": 3.4666008560066732, "learning_rate": 6.048103216383472e-07, "loss": 0.5522, "step": 8045 }, { "epoch": 0.85, "grad_norm": 2.3667556756148342, "learning_rate": 6.03998078294713e-07, "loss": 0.5512, "step": 8046 }, { "epoch": 0.85, "grad_norm": 2.6465130816201254, "learning_rate": 6.031863456658754e-07, "loss": 0.5849, "step": 8047 }, { "epoch": 0.85, "grad_norm": 3.036938049317216, "learning_rate": 6.023751238461389e-07, "loss": 0.5304, "step": 8048 }, { "epoch": 0.85, "grad_norm": 2.075249746973687, "learning_rate": 6.015644129297482e-07, "loss": 0.6031, "step": 8049 }, { "epoch": 0.85, "grad_norm": 2.8124802810968377, "learning_rate": 6.007542130108885e-07, "loss": 0.6553, "step": 8050 }, { "epoch": 0.85, "grad_norm": 6.474131988219833, "learning_rate": 5.999445241836877e-07, "loss": 0.5124, "step": 8051 }, { "epoch": 0.85, "grad_norm": 2.668817739499102, "learning_rate": 5.991353465422134e-07, "loss": 0.5076, "step": 8052 }, { "epoch": 0.85, "grad_norm": 2.411857351900381, "learning_rate": 5.983266801804732e-07, "loss": 0.6072, "step": 8053 }, { "epoch": 0.85, "grad_norm": 2.4264765584589165, "learning_rate": 5.975185251924143e-07, "loss": 0.6211, "step": 8054 }, { "epoch": 0.85, "grad_norm": 3.102878662708729, "learning_rate": 5.967108816719264e-07, "loss": 0.6705, "step": 8055 }, { "epoch": 0.85, "grad_norm": 3.401655319504964, "learning_rate": 5.959037497128401e-07, "loss": 0.6786, "step": 8056 }, { "epoch": 0.85, "grad_norm": 3.152666465422462, "learning_rate": 5.950971294089258e-07, "loss": 0.6148, "step": 8057 }, { "epoch": 0.85, "grad_norm": 2.675848160084293, "learning_rate": 5.942910208538943e-07, "loss": 0.595, "step": 8058 }, { "epoch": 0.85, "grad_norm": 3.2338755878945475, "learning_rate": 5.934854241413951e-07, "loss": 0.62, "step": 8059 }, { "epoch": 0.85, "grad_norm": 2.7539432304617, "learning_rate": 5.926803393650215e-07, "loss": 0.563, "step": 8060 }, { "epoch": 0.85, "grad_norm": 2.418072027591026, "learning_rate": 5.918757666183067e-07, "loss": 0.5881, "step": 8061 }, { "epoch": 0.85, "grad_norm": 2.4209012407142225, "learning_rate": 5.91071705994723e-07, "loss": 0.5809, "step": 8062 }, { "epoch": 0.85, "grad_norm": 4.302902584346649, "learning_rate": 5.902681575876822e-07, "loss": 0.5901, "step": 8063 }, { "epoch": 0.85, "grad_norm": 2.3794113470801763, "learning_rate": 5.894651214905395e-07, "loss": 0.5332, "step": 8064 }, { "epoch": 0.85, "grad_norm": 2.4361869128303573, "learning_rate": 5.88662597796591e-07, "loss": 0.5329, "step": 8065 }, { "epoch": 0.85, "grad_norm": 5.815110075655275, "learning_rate": 5.878605865990694e-07, "loss": 0.6081, "step": 8066 }, { "epoch": 0.85, "grad_norm": 2.448042678880235, "learning_rate": 5.870590879911498e-07, "loss": 0.5438, "step": 8067 }, { "epoch": 0.85, "grad_norm": 2.6312265037825413, "learning_rate": 5.862581020659491e-07, "loss": 0.6526, "step": 8068 }, { "epoch": 0.85, "grad_norm": 3.162596770198441, "learning_rate": 5.854576289165232e-07, "loss": 0.6465, "step": 8069 }, { "epoch": 0.85, "grad_norm": 2.5578744017745305, "learning_rate": 5.846576686358696e-07, "loss": 0.6271, "step": 8070 }, { "epoch": 0.85, "grad_norm": 2.8100954598723407, "learning_rate": 5.838582213169247e-07, "loss": 0.6043, "step": 8071 }, { "epoch": 0.85, "grad_norm": 3.2170401537564057, "learning_rate": 5.830592870525647e-07, "loss": 0.6683, "step": 8072 }, { "epoch": 0.85, "grad_norm": 2.379729669844667, "learning_rate": 5.822608659356093e-07, "loss": 0.5669, "step": 8073 }, { "epoch": 0.85, "grad_norm": 2.759833858853163, "learning_rate": 5.814629580588165e-07, "loss": 0.679, "step": 8074 }, { "epoch": 0.85, "grad_norm": 2.379428037812773, "learning_rate": 5.80665563514885e-07, "loss": 0.5477, "step": 8075 }, { "epoch": 0.85, "grad_norm": 0.9686713188136623, "learning_rate": 5.798686823964517e-07, "loss": 0.5656, "step": 8076 }, { "epoch": 0.85, "grad_norm": 2.208901084034373, "learning_rate": 5.79072314796098e-07, "loss": 0.6539, "step": 8077 }, { "epoch": 0.85, "grad_norm": 2.319119693164557, "learning_rate": 5.78276460806343e-07, "loss": 0.5599, "step": 8078 }, { "epoch": 0.85, "grad_norm": 3.486494110324032, "learning_rate": 5.77481120519649e-07, "loss": 0.6557, "step": 8079 }, { "epoch": 0.85, "grad_norm": 2.356169785828175, "learning_rate": 5.766862940284124e-07, "loss": 0.5505, "step": 8080 }, { "epoch": 0.85, "grad_norm": 3.308551249984606, "learning_rate": 5.758919814249753e-07, "loss": 0.6307, "step": 8081 }, { "epoch": 0.85, "grad_norm": 3.1735857620440453, "learning_rate": 5.750981828016189e-07, "loss": 0.5625, "step": 8082 }, { "epoch": 0.85, "grad_norm": 3.0910390532362695, "learning_rate": 5.743048982505656e-07, "loss": 0.5263, "step": 8083 }, { "epoch": 0.85, "grad_norm": 3.0386446705294796, "learning_rate": 5.73512127863976e-07, "loss": 0.5617, "step": 8084 }, { "epoch": 0.85, "grad_norm": 2.7279151957638255, "learning_rate": 5.727198717339511e-07, "loss": 0.6422, "step": 8085 }, { "epoch": 0.85, "grad_norm": 2.677171114462176, "learning_rate": 5.719281299525331e-07, "loss": 0.5548, "step": 8086 }, { "epoch": 0.85, "grad_norm": 3.209112258602768, "learning_rate": 5.711369026117053e-07, "loss": 0.6056, "step": 8087 }, { "epoch": 0.85, "grad_norm": 2.9912493406236726, "learning_rate": 5.703461898033902e-07, "loss": 0.6632, "step": 8088 }, { "epoch": 0.85, "grad_norm": 3.8418625332520318, "learning_rate": 5.695559916194488e-07, "loss": 0.6912, "step": 8089 }, { "epoch": 0.85, "grad_norm": 2.516446973072753, "learning_rate": 5.687663081516853e-07, "loss": 0.6293, "step": 8090 }, { "epoch": 0.85, "grad_norm": 3.7840898044343776, "learning_rate": 5.679771394918427e-07, "loss": 0.5642, "step": 8091 }, { "epoch": 0.85, "grad_norm": 2.6457572435732244, "learning_rate": 5.671884857316051e-07, "loss": 0.559, "step": 8092 }, { "epoch": 0.85, "grad_norm": 2.3271517036024933, "learning_rate": 5.66400346962595e-07, "loss": 0.7182, "step": 8093 }, { "epoch": 0.85, "grad_norm": 2.7706715409747082, "learning_rate": 5.656127232763759e-07, "loss": 0.6649, "step": 8094 }, { "epoch": 0.85, "grad_norm": 2.34603048762334, "learning_rate": 5.64825614764452e-07, "loss": 0.6284, "step": 8095 }, { "epoch": 0.85, "grad_norm": 2.0215689336292253, "learning_rate": 5.640390215182683e-07, "loss": 0.6201, "step": 8096 }, { "epoch": 0.85, "grad_norm": 9.53100525886137, "learning_rate": 5.632529436292083e-07, "loss": 0.5778, "step": 8097 }, { "epoch": 0.85, "grad_norm": 2.126583426550784, "learning_rate": 5.624673811885945e-07, "loss": 0.61, "step": 8098 }, { "epoch": 0.85, "grad_norm": 2.4029139519399867, "learning_rate": 5.616823342876932e-07, "loss": 0.5994, "step": 8099 }, { "epoch": 0.85, "grad_norm": 2.3048343587136615, "learning_rate": 5.608978030177087e-07, "loss": 0.5854, "step": 8100 }, { "epoch": 0.85, "grad_norm": 2.276243991774919, "learning_rate": 5.601137874697859e-07, "loss": 0.6745, "step": 8101 }, { "epoch": 0.85, "grad_norm": 2.5942109454123052, "learning_rate": 5.593302877350076e-07, "loss": 0.6314, "step": 8102 }, { "epoch": 0.85, "grad_norm": 2.9318122332058856, "learning_rate": 5.585473039044004e-07, "loss": 0.5933, "step": 8103 }, { "epoch": 0.85, "grad_norm": 2.4441412822757806, "learning_rate": 5.577648360689281e-07, "loss": 0.6418, "step": 8104 }, { "epoch": 0.85, "grad_norm": 4.2034597908955575, "learning_rate": 5.569828843194969e-07, "loss": 0.5708, "step": 8105 }, { "epoch": 0.85, "grad_norm": 2.6115667573813166, "learning_rate": 5.562014487469502e-07, "loss": 0.6829, "step": 8106 }, { "epoch": 0.85, "grad_norm": 2.6541755315222266, "learning_rate": 5.554205294420733e-07, "loss": 0.5536, "step": 8107 }, { "epoch": 0.85, "grad_norm": 2.9752658226736735, "learning_rate": 5.546401264955909e-07, "loss": 0.5814, "step": 8108 }, { "epoch": 0.85, "grad_norm": 3.8346292608939656, "learning_rate": 5.538602399981696e-07, "loss": 0.593, "step": 8109 }, { "epoch": 0.85, "grad_norm": 2.279904270343523, "learning_rate": 5.530808700404128e-07, "loss": 0.6239, "step": 8110 }, { "epoch": 0.85, "grad_norm": 2.4794073490515975, "learning_rate": 5.523020167128651e-07, "loss": 0.5705, "step": 8111 }, { "epoch": 0.85, "grad_norm": 6.4668680618890395, "learning_rate": 5.51523680106012e-07, "loss": 0.6668, "step": 8112 }, { "epoch": 0.85, "grad_norm": 3.484931444733449, "learning_rate": 5.507458603102783e-07, "loss": 0.6922, "step": 8113 }, { "epoch": 0.85, "grad_norm": 4.674438977252659, "learning_rate": 5.499685574160312e-07, "loss": 0.5529, "step": 8114 }, { "epoch": 0.85, "grad_norm": 2.284875736493241, "learning_rate": 5.491917715135719e-07, "loss": 0.5868, "step": 8115 }, { "epoch": 0.85, "grad_norm": 2.901045185802944, "learning_rate": 5.484155026931459e-07, "loss": 0.6206, "step": 8116 }, { "epoch": 0.85, "grad_norm": 2.196587431457302, "learning_rate": 5.476397510449389e-07, "loss": 0.6526, "step": 8117 }, { "epoch": 0.85, "grad_norm": 3.1539318616200513, "learning_rate": 5.468645166590758e-07, "loss": 0.6617, "step": 8118 }, { "epoch": 0.85, "grad_norm": 2.4155197419898746, "learning_rate": 5.46089799625621e-07, "loss": 0.5818, "step": 8119 }, { "epoch": 0.85, "grad_norm": 2.6201210125396237, "learning_rate": 5.453156000345772e-07, "loss": 0.5792, "step": 8120 }, { "epoch": 0.85, "grad_norm": 2.457503317771027, "learning_rate": 5.445419179758893e-07, "loss": 0.5982, "step": 8121 }, { "epoch": 0.85, "grad_norm": 2.551730796116664, "learning_rate": 5.437687535394431e-07, "loss": 0.5919, "step": 8122 }, { "epoch": 0.85, "grad_norm": 2.726936420412654, "learning_rate": 5.429961068150619e-07, "loss": 0.5739, "step": 8123 }, { "epoch": 0.85, "grad_norm": 2.536441146532936, "learning_rate": 5.422239778925076e-07, "loss": 0.6811, "step": 8124 }, { "epoch": 0.85, "grad_norm": 2.9590736750871485, "learning_rate": 5.414523668614857e-07, "loss": 0.5417, "step": 8125 }, { "epoch": 0.86, "grad_norm": 3.7180436990498595, "learning_rate": 5.406812738116396e-07, "loss": 0.6244, "step": 8126 }, { "epoch": 0.86, "grad_norm": 2.2060907410932096, "learning_rate": 5.399106988325543e-07, "loss": 0.5769, "step": 8127 }, { "epoch": 0.86, "grad_norm": 2.9386958343089704, "learning_rate": 5.39140642013749e-07, "loss": 0.5423, "step": 8128 }, { "epoch": 0.86, "grad_norm": 2.5672726543123683, "learning_rate": 5.383711034446892e-07, "loss": 0.5791, "step": 8129 }, { "epoch": 0.86, "grad_norm": 2.6077162998901873, "learning_rate": 5.376020832147777e-07, "loss": 0.6193, "step": 8130 }, { "epoch": 0.86, "grad_norm": 3.6993050614435203, "learning_rate": 5.368335814133569e-07, "loss": 0.5434, "step": 8131 }, { "epoch": 0.86, "grad_norm": 2.2676443993494813, "learning_rate": 5.360655981297097e-07, "loss": 0.5061, "step": 8132 }, { "epoch": 0.86, "grad_norm": 3.204912156598437, "learning_rate": 5.352981334530555e-07, "loss": 0.5548, "step": 8133 }, { "epoch": 0.86, "grad_norm": 3.3583543994309633, "learning_rate": 5.345311874725584e-07, "loss": 0.5469, "step": 8134 }, { "epoch": 0.86, "grad_norm": 2.73031391634573, "learning_rate": 5.337647602773211e-07, "loss": 0.6302, "step": 8135 }, { "epoch": 0.86, "grad_norm": 2.6740573030486434, "learning_rate": 5.329988519563828e-07, "loss": 0.5692, "step": 8136 }, { "epoch": 0.86, "grad_norm": 1.9923350695353295, "learning_rate": 5.322334625987241e-07, "loss": 0.5938, "step": 8137 }, { "epoch": 0.86, "grad_norm": 2.8706603816339067, "learning_rate": 5.314685922932666e-07, "loss": 0.5283, "step": 8138 }, { "epoch": 0.86, "grad_norm": 2.4156359250535178, "learning_rate": 5.30704241128871e-07, "loss": 0.6129, "step": 8139 }, { "epoch": 0.86, "grad_norm": 2.789191292597911, "learning_rate": 5.299404091943383e-07, "loss": 0.6423, "step": 8140 }, { "epoch": 0.86, "grad_norm": 2.3542836512039425, "learning_rate": 5.291770965784076e-07, "loss": 0.6534, "step": 8141 }, { "epoch": 0.86, "grad_norm": 2.6693420950157534, "learning_rate": 5.284143033697565e-07, "loss": 0.5847, "step": 8142 }, { "epoch": 0.86, "grad_norm": 2.4801931250434497, "learning_rate": 5.276520296570053e-07, "loss": 0.6162, "step": 8143 }, { "epoch": 0.86, "grad_norm": 2.6330298094643676, "learning_rate": 5.268902755287148e-07, "loss": 0.5991, "step": 8144 }, { "epoch": 0.86, "grad_norm": 2.514352693873068, "learning_rate": 5.26129041073381e-07, "loss": 0.5545, "step": 8145 }, { "epoch": 0.86, "grad_norm": 2.334792126153137, "learning_rate": 5.253683263794418e-07, "loss": 0.5464, "step": 8146 }, { "epoch": 0.86, "grad_norm": 2.269857305345398, "learning_rate": 5.246081315352758e-07, "loss": 0.6648, "step": 8147 }, { "epoch": 0.86, "grad_norm": 2.5865095430846656, "learning_rate": 5.238484566292002e-07, "loss": 0.7269, "step": 8148 }, { "epoch": 0.86, "grad_norm": 2.4282223527075573, "learning_rate": 5.230893017494731e-07, "loss": 0.5743, "step": 8149 }, { "epoch": 0.86, "grad_norm": 1.0285336151160749, "learning_rate": 5.223306669842876e-07, "loss": 0.5332, "step": 8150 }, { "epoch": 0.86, "grad_norm": 2.5241337376310526, "learning_rate": 5.215725524217818e-07, "loss": 0.555, "step": 8151 }, { "epoch": 0.86, "grad_norm": 3.159756791738227, "learning_rate": 5.20814958150031e-07, "loss": 0.6031, "step": 8152 }, { "epoch": 0.86, "grad_norm": 2.682488901407614, "learning_rate": 5.200578842570508e-07, "loss": 0.5975, "step": 8153 }, { "epoch": 0.86, "grad_norm": 0.9504110630710011, "learning_rate": 5.19301330830796e-07, "loss": 0.559, "step": 8154 }, { "epoch": 0.86, "grad_norm": 2.283295404157167, "learning_rate": 5.185452979591593e-07, "loss": 0.647, "step": 8155 }, { "epoch": 0.86, "grad_norm": 2.895291731557172, "learning_rate": 5.177897857299752e-07, "loss": 0.5969, "step": 8156 }, { "epoch": 0.86, "grad_norm": 2.0204477401700256, "learning_rate": 5.170347942310177e-07, "loss": 0.4857, "step": 8157 }, { "epoch": 0.86, "grad_norm": 3.281207098958877, "learning_rate": 5.162803235499992e-07, "loss": 0.5998, "step": 8158 }, { "epoch": 0.86, "grad_norm": 2.9244566758863595, "learning_rate": 5.155263737745703e-07, "loss": 0.5635, "step": 8159 }, { "epoch": 0.86, "grad_norm": 2.0701940095232954, "learning_rate": 5.147729449923244e-07, "loss": 0.5843, "step": 8160 }, { "epoch": 0.86, "grad_norm": 2.5067250985588987, "learning_rate": 5.140200372907921e-07, "loss": 0.6793, "step": 8161 }, { "epoch": 0.86, "grad_norm": 3.105940578606609, "learning_rate": 5.132676507574463e-07, "loss": 0.6469, "step": 8162 }, { "epoch": 0.86, "grad_norm": 3.241058072116501, "learning_rate": 5.125157854796925e-07, "loss": 0.4969, "step": 8163 }, { "epoch": 0.86, "grad_norm": 2.792486187126245, "learning_rate": 5.11764441544883e-07, "loss": 0.6391, "step": 8164 }, { "epoch": 0.86, "grad_norm": 2.829466955623266, "learning_rate": 5.11013619040307e-07, "loss": 0.7384, "step": 8165 }, { "epoch": 0.86, "grad_norm": 2.4147912110071825, "learning_rate": 5.10263318053193e-07, "loss": 0.6223, "step": 8166 }, { "epoch": 0.86, "grad_norm": 2.362586700546461, "learning_rate": 5.095135386707084e-07, "loss": 0.588, "step": 8167 }, { "epoch": 0.86, "grad_norm": 4.445979909568467, "learning_rate": 5.087642809799587e-07, "loss": 0.6189, "step": 8168 }, { "epoch": 0.86, "grad_norm": 2.732920831150975, "learning_rate": 5.080155450679924e-07, "loss": 0.5372, "step": 8169 }, { "epoch": 0.86, "grad_norm": 2.705560498251637, "learning_rate": 5.072673310217957e-07, "loss": 0.6203, "step": 8170 }, { "epoch": 0.86, "grad_norm": 2.31980030969563, "learning_rate": 5.065196389282939e-07, "loss": 0.5589, "step": 8171 }, { "epoch": 0.86, "grad_norm": 2.5656538295462887, "learning_rate": 5.057724688743498e-07, "loss": 0.6339, "step": 8172 }, { "epoch": 0.86, "grad_norm": 2.6519050302952736, "learning_rate": 5.050258209467684e-07, "loss": 0.7224, "step": 8173 }, { "epoch": 0.86, "grad_norm": 2.591412532091134, "learning_rate": 5.042796952322943e-07, "loss": 0.5866, "step": 8174 }, { "epoch": 0.86, "grad_norm": 3.212827161480501, "learning_rate": 5.035340918176096e-07, "loss": 0.7674, "step": 8175 }, { "epoch": 0.86, "grad_norm": 2.487115162626594, "learning_rate": 5.027890107893368e-07, "loss": 0.6352, "step": 8176 }, { "epoch": 0.86, "grad_norm": 2.360221538239416, "learning_rate": 5.020444522340351e-07, "loss": 0.5827, "step": 8177 }, { "epoch": 0.86, "grad_norm": 7.6272603981979445, "learning_rate": 5.013004162382068e-07, "loss": 0.5703, "step": 8178 }, { "epoch": 0.86, "grad_norm": 2.297902416786526, "learning_rate": 5.005569028882928e-07, "loss": 0.6296, "step": 8179 }, { "epoch": 0.86, "grad_norm": 2.272681457850102, "learning_rate": 4.998139122706713e-07, "loss": 0.6528, "step": 8180 }, { "epoch": 0.86, "grad_norm": 2.8422597029287604, "learning_rate": 4.990714444716594e-07, "loss": 0.6148, "step": 8181 }, { "epoch": 0.86, "grad_norm": 2.645227694382434, "learning_rate": 4.983294995775167e-07, "loss": 0.6103, "step": 8182 }, { "epoch": 0.86, "grad_norm": 2.283592322689147, "learning_rate": 4.975880776744397e-07, "loss": 0.5401, "step": 8183 }, { "epoch": 0.86, "grad_norm": 2.1952627320177975, "learning_rate": 4.968471788485663e-07, "loss": 0.5755, "step": 8184 }, { "epoch": 0.86, "grad_norm": 2.4356967067289714, "learning_rate": 4.961068031859684e-07, "loss": 0.6096, "step": 8185 }, { "epoch": 0.86, "grad_norm": 2.8567686330899567, "learning_rate": 4.953669507726633e-07, "loss": 0.6889, "step": 8186 }, { "epoch": 0.86, "grad_norm": 3.5528236481312305, "learning_rate": 4.946276216946034e-07, "loss": 0.6052, "step": 8187 }, { "epoch": 0.86, "grad_norm": 3.0251800181776676, "learning_rate": 4.938888160376842e-07, "loss": 0.6357, "step": 8188 }, { "epoch": 0.86, "grad_norm": 3.306752775047198, "learning_rate": 4.931505338877363e-07, "loss": 0.4966, "step": 8189 }, { "epoch": 0.86, "grad_norm": 3.467733910306402, "learning_rate": 4.924127753305308e-07, "loss": 0.5819, "step": 8190 }, { "epoch": 0.86, "grad_norm": 2.5766576901577807, "learning_rate": 4.916755404517787e-07, "loss": 0.6238, "step": 8191 }, { "epoch": 0.86, "grad_norm": 6.477146410617243, "learning_rate": 4.909388293371309e-07, "loss": 0.5792, "step": 8192 }, { "epoch": 0.86, "grad_norm": 2.725439865413354, "learning_rate": 4.902026420721756e-07, "loss": 0.589, "step": 8193 }, { "epoch": 0.86, "grad_norm": 3.1642328795374106, "learning_rate": 4.894669787424399e-07, "loss": 0.5731, "step": 8194 }, { "epoch": 0.86, "grad_norm": 2.7228885313758933, "learning_rate": 4.887318394333923e-07, "loss": 0.6069, "step": 8195 }, { "epoch": 0.86, "grad_norm": 4.187726267604148, "learning_rate": 4.879972242304382e-07, "loss": 0.5925, "step": 8196 }, { "epoch": 0.86, "grad_norm": 2.4064432279219896, "learning_rate": 4.872631332189259e-07, "loss": 0.6251, "step": 8197 }, { "epoch": 0.86, "grad_norm": 2.0589344787984993, "learning_rate": 4.865295664841363e-07, "loss": 0.5597, "step": 8198 }, { "epoch": 0.86, "grad_norm": 2.3406278362981645, "learning_rate": 4.857965241112938e-07, "loss": 0.5722, "step": 8199 }, { "epoch": 0.86, "grad_norm": 3.988444067366749, "learning_rate": 4.850640061855627e-07, "loss": 0.6008, "step": 8200 }, { "epoch": 0.86, "grad_norm": 2.7834751537926765, "learning_rate": 4.843320127920442e-07, "loss": 0.5944, "step": 8201 }, { "epoch": 0.86, "grad_norm": 2.5609186141419173, "learning_rate": 4.836005440157798e-07, "loss": 0.6557, "step": 8202 }, { "epoch": 0.86, "grad_norm": 2.3038313600115945, "learning_rate": 4.828695999417471e-07, "loss": 0.6159, "step": 8203 }, { "epoch": 0.86, "grad_norm": 2.3290658242492674, "learning_rate": 4.821391806548664e-07, "loss": 0.6091, "step": 8204 }, { "epoch": 0.86, "grad_norm": 2.7246422374040185, "learning_rate": 4.814092862399971e-07, "loss": 0.6781, "step": 8205 }, { "epoch": 0.86, "grad_norm": 2.543683504573766, "learning_rate": 4.806799167819354e-07, "loss": 0.5268, "step": 8206 }, { "epoch": 0.86, "grad_norm": 2.248828186134925, "learning_rate": 4.799510723654154e-07, "loss": 0.5292, "step": 8207 }, { "epoch": 0.86, "grad_norm": 3.487968354722907, "learning_rate": 4.792227530751137e-07, "loss": 0.618, "step": 8208 }, { "epoch": 0.86, "grad_norm": 3.112110087489705, "learning_rate": 4.784949589956444e-07, "loss": 0.6297, "step": 8209 }, { "epoch": 0.86, "grad_norm": 3.553846046938456, "learning_rate": 4.777676902115613e-07, "loss": 0.6159, "step": 8210 }, { "epoch": 0.86, "grad_norm": 2.8837009978624635, "learning_rate": 4.770409468073562e-07, "loss": 0.6905, "step": 8211 }, { "epoch": 0.86, "grad_norm": 3.1032584634222546, "learning_rate": 4.7631472886745746e-07, "loss": 0.6244, "step": 8212 }, { "epoch": 0.86, "grad_norm": 2.367855236507304, "learning_rate": 4.755890364762372e-07, "loss": 0.5847, "step": 8213 }, { "epoch": 0.86, "grad_norm": 0.9490535768407505, "learning_rate": 4.748638697180052e-07, "loss": 0.6026, "step": 8214 }, { "epoch": 0.86, "grad_norm": 3.6375758139751895, "learning_rate": 4.741392286770075e-07, "loss": 0.5925, "step": 8215 }, { "epoch": 0.86, "grad_norm": 2.8159200769119366, "learning_rate": 4.734151134374304e-07, "loss": 0.5985, "step": 8216 }, { "epoch": 0.86, "grad_norm": 3.1013992053905923, "learning_rate": 4.7269152408340067e-07, "loss": 0.718, "step": 8217 }, { "epoch": 0.86, "grad_norm": 2.2854215939813325, "learning_rate": 4.7196846069898216e-07, "loss": 0.5956, "step": 8218 }, { "epoch": 0.86, "grad_norm": 2.3465869398994483, "learning_rate": 4.71245923368181e-07, "loss": 0.535, "step": 8219 }, { "epoch": 0.86, "grad_norm": 2.521050340647012, "learning_rate": 4.7052391217493497e-07, "loss": 0.596, "step": 8220 }, { "epoch": 0.87, "grad_norm": 2.4595843214175392, "learning_rate": 4.698024272031276e-07, "loss": 0.5647, "step": 8221 }, { "epoch": 0.87, "grad_norm": 3.029712654205638, "learning_rate": 4.690814685365791e-07, "loss": 0.6448, "step": 8222 }, { "epoch": 0.87, "grad_norm": 2.341790414077542, "learning_rate": 4.683610362590485e-07, "loss": 0.5764, "step": 8223 }, { "epoch": 0.87, "grad_norm": 4.5428852112483105, "learning_rate": 4.6764113045423274e-07, "loss": 0.6887, "step": 8224 }, { "epoch": 0.87, "grad_norm": 2.1809278528567937, "learning_rate": 4.6692175120576834e-07, "loss": 0.6309, "step": 8225 }, { "epoch": 0.87, "grad_norm": 2.412349436289653, "learning_rate": 4.6620289859723114e-07, "loss": 0.583, "step": 8226 }, { "epoch": 0.87, "grad_norm": 2.6851844120083275, "learning_rate": 4.65484572712136e-07, "loss": 0.5324, "step": 8227 }, { "epoch": 0.87, "grad_norm": 2.350365679203484, "learning_rate": 4.6476677363393507e-07, "loss": 0.6305, "step": 8228 }, { "epoch": 0.87, "grad_norm": 2.74850582630496, "learning_rate": 4.6404950144602e-07, "loss": 0.6442, "step": 8229 }, { "epoch": 0.87, "grad_norm": 2.48312204654485, "learning_rate": 4.6333275623172137e-07, "loss": 0.6308, "step": 8230 }, { "epoch": 0.87, "grad_norm": 3.447477016671054, "learning_rate": 4.626165380743086e-07, "loss": 0.5315, "step": 8231 }, { "epoch": 0.87, "grad_norm": 2.6758503396323916, "learning_rate": 4.6190084705699243e-07, "loss": 0.6075, "step": 8232 }, { "epoch": 0.87, "grad_norm": 2.7690378845704013, "learning_rate": 4.6118568326291577e-07, "loss": 0.5348, "step": 8233 }, { "epoch": 0.87, "grad_norm": 3.288950693928659, "learning_rate": 4.604710467751661e-07, "loss": 0.5712, "step": 8234 }, { "epoch": 0.87, "grad_norm": 2.882335822482473, "learning_rate": 4.5975693767676746e-07, "loss": 0.5979, "step": 8235 }, { "epoch": 0.87, "grad_norm": 3.0747373794610313, "learning_rate": 4.590433560506841e-07, "loss": 0.6, "step": 8236 }, { "epoch": 0.87, "grad_norm": 2.6607131094292322, "learning_rate": 4.583303019798174e-07, "loss": 0.6083, "step": 8237 }, { "epoch": 0.87, "grad_norm": 3.6597203389257147, "learning_rate": 4.576177755470068e-07, "loss": 0.6753, "step": 8238 }, { "epoch": 0.87, "grad_norm": 3.4666690631206585, "learning_rate": 4.5690577683503214e-07, "loss": 0.5823, "step": 8239 }, { "epoch": 0.87, "grad_norm": 2.426851608730652, "learning_rate": 4.561943059266122e-07, "loss": 0.6137, "step": 8240 }, { "epoch": 0.87, "grad_norm": 3.5313147718873426, "learning_rate": 4.554833629044031e-07, "loss": 0.5988, "step": 8241 }, { "epoch": 0.87, "grad_norm": 2.4878565766187477, "learning_rate": 4.547729478509993e-07, "loss": 0.6371, "step": 8242 }, { "epoch": 0.87, "grad_norm": 12.485806184499575, "learning_rate": 4.540630608489355e-07, "loss": 0.5358, "step": 8243 }, { "epoch": 0.87, "grad_norm": 2.51891715467682, "learning_rate": 4.533537019806844e-07, "loss": 0.5765, "step": 8244 }, { "epoch": 0.87, "grad_norm": 2.680356743825882, "learning_rate": 4.52644871328658e-07, "loss": 0.6421, "step": 8245 }, { "epoch": 0.87, "grad_norm": 1.0197526172225977, "learning_rate": 4.5193656897520534e-07, "loss": 0.5282, "step": 8246 }, { "epoch": 0.87, "grad_norm": 2.456701500945603, "learning_rate": 4.5122879500261396e-07, "loss": 0.5945, "step": 8247 }, { "epoch": 0.87, "grad_norm": 5.368043410381294, "learning_rate": 4.50521549493112e-07, "loss": 0.6154, "step": 8248 }, { "epoch": 0.87, "grad_norm": 4.207890875247668, "learning_rate": 4.498148325288665e-07, "loss": 0.6075, "step": 8249 }, { "epoch": 0.87, "grad_norm": 2.2518464473290987, "learning_rate": 4.491086441919801e-07, "loss": 0.6067, "step": 8250 }, { "epoch": 0.87, "grad_norm": 2.2804388518048135, "learning_rate": 4.484029845644955e-07, "loss": 0.6258, "step": 8251 }, { "epoch": 0.87, "grad_norm": 2.0120465903949816, "learning_rate": 4.4769785372839493e-07, "loss": 0.533, "step": 8252 }, { "epoch": 0.87, "grad_norm": 2.7135204524309904, "learning_rate": 4.469932517655978e-07, "loss": 0.5617, "step": 8253 }, { "epoch": 0.87, "grad_norm": 2.9167866927413715, "learning_rate": 4.462891787579654e-07, "loss": 0.6386, "step": 8254 }, { "epoch": 0.87, "grad_norm": 2.0215478723122295, "learning_rate": 4.4558563478729113e-07, "loss": 0.5989, "step": 8255 }, { "epoch": 0.87, "grad_norm": 0.910071063555147, "learning_rate": 4.4488261993531233e-07, "loss": 0.5422, "step": 8256 }, { "epoch": 0.87, "grad_norm": 11.796143039706289, "learning_rate": 4.441801342837027e-07, "loss": 0.502, "step": 8257 }, { "epoch": 0.87, "grad_norm": 2.342733719631572, "learning_rate": 4.4347817791407677e-07, "loss": 0.567, "step": 8258 }, { "epoch": 0.87, "grad_norm": 4.3267678983826565, "learning_rate": 4.4277675090798445e-07, "loss": 0.5816, "step": 8259 }, { "epoch": 0.87, "grad_norm": 3.157334595406685, "learning_rate": 4.4207585334691493e-07, "loss": 0.5783, "step": 8260 }, { "epoch": 0.87, "grad_norm": 4.343720857313059, "learning_rate": 4.41375485312297e-07, "loss": 0.5595, "step": 8261 }, { "epoch": 0.87, "grad_norm": 2.109175745260641, "learning_rate": 4.406756468854989e-07, "loss": 0.5158, "step": 8262 }, { "epoch": 0.87, "grad_norm": 2.3196868345115775, "learning_rate": 4.3997633814782393e-07, "loss": 0.5602, "step": 8263 }, { "epoch": 0.87, "grad_norm": 2.7303164750460387, "learning_rate": 4.392775591805154e-07, "loss": 0.5038, "step": 8264 }, { "epoch": 0.87, "grad_norm": 2.74241779032744, "learning_rate": 4.385793100647567e-07, "loss": 0.6497, "step": 8265 }, { "epoch": 0.87, "grad_norm": 2.430609319723213, "learning_rate": 4.378815908816675e-07, "loss": 0.5677, "step": 8266 }, { "epoch": 0.87, "grad_norm": 2.7793948165517435, "learning_rate": 4.371844017123095e-07, "loss": 0.5426, "step": 8267 }, { "epoch": 0.87, "grad_norm": 3.4548513277509145, "learning_rate": 4.3648774263767624e-07, "loss": 0.5881, "step": 8268 }, { "epoch": 0.87, "grad_norm": 2.2805067121865363, "learning_rate": 4.3579161373870526e-07, "loss": 0.6534, "step": 8269 }, { "epoch": 0.87, "grad_norm": 2.521977095171458, "learning_rate": 4.350960150962702e-07, "loss": 0.5821, "step": 8270 }, { "epoch": 0.87, "grad_norm": 3.045332855637481, "learning_rate": 4.344009467911858e-07, "loss": 0.5343, "step": 8271 }, { "epoch": 0.87, "grad_norm": 3.071925089409981, "learning_rate": 4.3370640890420145e-07, "loss": 0.5714, "step": 8272 }, { "epoch": 0.87, "grad_norm": 2.3770918532643552, "learning_rate": 4.3301240151600587e-07, "loss": 0.6368, "step": 8273 }, { "epoch": 0.87, "grad_norm": 2.0377462716813706, "learning_rate": 4.3231892470722736e-07, "loss": 0.5848, "step": 8274 }, { "epoch": 0.87, "grad_norm": 2.3436477279860513, "learning_rate": 4.316259785584337e-07, "loss": 0.524, "step": 8275 }, { "epoch": 0.87, "grad_norm": 2.7364690734810475, "learning_rate": 4.309335631501277e-07, "loss": 0.635, "step": 8276 }, { "epoch": 0.87, "grad_norm": 2.5489141180850075, "learning_rate": 4.3024167856275166e-07, "loss": 0.5364, "step": 8277 }, { "epoch": 0.87, "grad_norm": 3.3087010035902105, "learning_rate": 4.2955032487668745e-07, "loss": 0.5727, "step": 8278 }, { "epoch": 0.87, "grad_norm": 2.3875544612935014, "learning_rate": 4.2885950217225525e-07, "loss": 0.573, "step": 8279 }, { "epoch": 0.87, "grad_norm": 0.997641790782197, "learning_rate": 4.281692105297125e-07, "loss": 0.5424, "step": 8280 }, { "epoch": 0.87, "grad_norm": 3.5825231781643305, "learning_rate": 4.2747945002925507e-07, "loss": 0.604, "step": 8281 }, { "epoch": 0.87, "grad_norm": 2.751380772733463, "learning_rate": 4.267902207510166e-07, "loss": 0.6387, "step": 8282 }, { "epoch": 0.87, "grad_norm": 2.7858717540005844, "learning_rate": 4.261015227750709e-07, "loss": 0.5677, "step": 8283 }, { "epoch": 0.87, "grad_norm": 1.001147795122434, "learning_rate": 4.254133561814289e-07, "loss": 0.5422, "step": 8284 }, { "epoch": 0.87, "grad_norm": 2.4257602264136264, "learning_rate": 4.247257210500394e-07, "loss": 0.5679, "step": 8285 }, { "epoch": 0.87, "grad_norm": 0.98287549752436, "learning_rate": 4.240386174607891e-07, "loss": 0.5719, "step": 8286 }, { "epoch": 0.87, "grad_norm": 2.0556012748341397, "learning_rate": 4.2335204549350415e-07, "loss": 0.5993, "step": 8287 }, { "epoch": 0.87, "grad_norm": 2.1184763996018456, "learning_rate": 4.226660052279491e-07, "loss": 0.5858, "step": 8288 }, { "epoch": 0.87, "grad_norm": 2.4200580318072165, "learning_rate": 4.219804967438279e-07, "loss": 0.5608, "step": 8289 }, { "epoch": 0.87, "grad_norm": 2.2140173131580885, "learning_rate": 4.2129552012077636e-07, "loss": 0.577, "step": 8290 }, { "epoch": 0.87, "grad_norm": 2.8022432950471288, "learning_rate": 4.2061107543837633e-07, "loss": 0.5129, "step": 8291 }, { "epoch": 0.87, "grad_norm": 2.3001424430333746, "learning_rate": 4.1992716277614365e-07, "loss": 0.5429, "step": 8292 }, { "epoch": 0.87, "grad_norm": 2.0628372093439378, "learning_rate": 4.1924378221353425e-07, "loss": 0.5649, "step": 8293 }, { "epoch": 0.87, "grad_norm": 3.0938803545878013, "learning_rate": 4.185609338299407e-07, "loss": 0.6533, "step": 8294 }, { "epoch": 0.87, "grad_norm": 3.2890771263913123, "learning_rate": 4.178786177046934e-07, "loss": 0.633, "step": 8295 }, { "epoch": 0.87, "grad_norm": 2.1087154492100217, "learning_rate": 4.1719683391706235e-07, "loss": 0.5208, "step": 8296 }, { "epoch": 0.87, "grad_norm": 3.20130117340025, "learning_rate": 4.165155825462569e-07, "loss": 0.66, "step": 8297 }, { "epoch": 0.87, "grad_norm": 2.38259009410489, "learning_rate": 4.158348636714216e-07, "loss": 0.698, "step": 8298 }, { "epoch": 0.87, "grad_norm": 3.955775034898537, "learning_rate": 4.151546773716392e-07, "loss": 0.6124, "step": 8299 }, { "epoch": 0.87, "grad_norm": 2.598068006883137, "learning_rate": 4.1447502372593316e-07, "loss": 0.5196, "step": 8300 }, { "epoch": 0.87, "grad_norm": 2.1646914007025493, "learning_rate": 4.137959028132632e-07, "loss": 0.631, "step": 8301 }, { "epoch": 0.87, "grad_norm": 2.3381901988980673, "learning_rate": 4.1311731471253e-07, "loss": 0.5472, "step": 8302 }, { "epoch": 0.87, "grad_norm": 4.41287496079573, "learning_rate": 4.1243925950256616e-07, "loss": 0.6113, "step": 8303 }, { "epoch": 0.87, "grad_norm": 2.709280007140201, "learning_rate": 4.117617372621474e-07, "loss": 0.5998, "step": 8304 }, { "epoch": 0.87, "grad_norm": 2.2689051954483386, "learning_rate": 4.11084748069987e-07, "loss": 0.6169, "step": 8305 }, { "epoch": 0.87, "grad_norm": 2.3500449701909503, "learning_rate": 4.1040829200473643e-07, "loss": 0.6554, "step": 8306 }, { "epoch": 0.87, "grad_norm": 2.4784819446542996, "learning_rate": 4.0973236914498284e-07, "loss": 0.6293, "step": 8307 }, { "epoch": 0.87, "grad_norm": 2.513599880145634, "learning_rate": 4.090569795692528e-07, "loss": 0.5934, "step": 8308 }, { "epoch": 0.87, "grad_norm": 2.8994935140490363, "learning_rate": 4.08382123356012e-07, "loss": 0.6651, "step": 8309 }, { "epoch": 0.87, "grad_norm": 2.715503190088068, "learning_rate": 4.077078005836638e-07, "loss": 0.5931, "step": 8310 }, { "epoch": 0.87, "grad_norm": 2.8979413858053675, "learning_rate": 4.070340113305482e-07, "loss": 0.6289, "step": 8311 }, { "epoch": 0.87, "grad_norm": 2.316723946930344, "learning_rate": 4.0636075567494384e-07, "loss": 0.6424, "step": 8312 }, { "epoch": 0.87, "grad_norm": 2.6873395112200575, "learning_rate": 4.056880336950675e-07, "loss": 0.6267, "step": 8313 }, { "epoch": 0.87, "grad_norm": 3.2114274473336453, "learning_rate": 4.05015845469075e-07, "loss": 0.6438, "step": 8314 }, { "epoch": 0.87, "grad_norm": 2.1479506466214247, "learning_rate": 4.043441910750595e-07, "loss": 0.6171, "step": 8315 }, { "epoch": 0.88, "grad_norm": 7.941279814979407, "learning_rate": 4.036730705910513e-07, "loss": 0.6145, "step": 8316 }, { "epoch": 0.88, "grad_norm": 2.467418133816867, "learning_rate": 4.030024840950181e-07, "loss": 0.592, "step": 8317 }, { "epoch": 0.88, "grad_norm": 2.429232092167091, "learning_rate": 4.0233243166486804e-07, "loss": 0.5985, "step": 8318 }, { "epoch": 0.88, "grad_norm": 2.965427954109061, "learning_rate": 4.016629133784461e-07, "loss": 0.6265, "step": 8319 }, { "epoch": 0.88, "grad_norm": 2.148171593670399, "learning_rate": 4.0099392931353454e-07, "loss": 0.5878, "step": 8320 }, { "epoch": 0.88, "grad_norm": 2.435832787059743, "learning_rate": 4.0032547954785286e-07, "loss": 0.6319, "step": 8321 }, { "epoch": 0.88, "grad_norm": 2.6890143714422567, "learning_rate": 3.996575641590611e-07, "loss": 0.5508, "step": 8322 }, { "epoch": 0.88, "grad_norm": 2.2991123215639337, "learning_rate": 3.9899018322475503e-07, "loss": 0.5276, "step": 8323 }, { "epoch": 0.88, "grad_norm": 5.492027408747583, "learning_rate": 3.983233368224709e-07, "loss": 0.639, "step": 8324 }, { "epoch": 0.88, "grad_norm": 2.436217435669819, "learning_rate": 3.9765702502967795e-07, "loss": 0.5844, "step": 8325 }, { "epoch": 0.88, "grad_norm": 2.2735941889345, "learning_rate": 3.969912479237875e-07, "loss": 0.5295, "step": 8326 }, { "epoch": 0.88, "grad_norm": 2.4581911213333942, "learning_rate": 3.963260055821477e-07, "loss": 0.5177, "step": 8327 }, { "epoch": 0.88, "grad_norm": 2.4753926504162496, "learning_rate": 3.9566129808204624e-07, "loss": 0.6626, "step": 8328 }, { "epoch": 0.88, "grad_norm": 2.324542564189604, "learning_rate": 3.9499712550070513e-07, "loss": 0.6273, "step": 8329 }, { "epoch": 0.88, "grad_norm": 3.14806628673995, "learning_rate": 3.943334879152849e-07, "loss": 0.6657, "step": 8330 }, { "epoch": 0.88, "grad_norm": 2.2230261640469764, "learning_rate": 3.936703854028873e-07, "loss": 0.5797, "step": 8331 }, { "epoch": 0.88, "grad_norm": 2.3480123532810095, "learning_rate": 3.9300781804054887e-07, "loss": 0.5624, "step": 8332 }, { "epoch": 0.88, "grad_norm": 3.982281191390795, "learning_rate": 3.9234578590524486e-07, "loss": 0.4974, "step": 8333 }, { "epoch": 0.88, "grad_norm": 2.5837995450453755, "learning_rate": 3.9168428907388755e-07, "loss": 0.6192, "step": 8334 }, { "epoch": 0.88, "grad_norm": 2.346120506226917, "learning_rate": 3.9102332762332775e-07, "loss": 0.5529, "step": 8335 }, { "epoch": 0.88, "grad_norm": 2.9988359051893787, "learning_rate": 3.903629016303551e-07, "loss": 0.6231, "step": 8336 }, { "epoch": 0.88, "grad_norm": 2.929097624377366, "learning_rate": 3.897030111716971e-07, "loss": 0.5565, "step": 8337 }, { "epoch": 0.88, "grad_norm": 4.490972492353745, "learning_rate": 3.890436563240141e-07, "loss": 0.5473, "step": 8338 }, { "epoch": 0.88, "grad_norm": 6.123463476780392, "learning_rate": 3.883848371639104e-07, "loss": 0.6492, "step": 8339 }, { "epoch": 0.88, "grad_norm": 2.1499837297094007, "learning_rate": 3.8772655376792535e-07, "loss": 0.5906, "step": 8340 }, { "epoch": 0.88, "grad_norm": 2.667246941203882, "learning_rate": 3.870688062125377e-07, "loss": 0.6599, "step": 8341 }, { "epoch": 0.88, "grad_norm": 2.3272993650955405, "learning_rate": 3.864115945741609e-07, "loss": 0.601, "step": 8342 }, { "epoch": 0.88, "grad_norm": 2.81091418829116, "learning_rate": 3.8575491892914816e-07, "loss": 0.6082, "step": 8343 }, { "epoch": 0.88, "grad_norm": 1.9611755295529303, "learning_rate": 3.8509877935379083e-07, "loss": 0.5979, "step": 8344 }, { "epoch": 0.88, "grad_norm": 2.61950855812886, "learning_rate": 3.8444317592431724e-07, "loss": 0.6358, "step": 8345 }, { "epoch": 0.88, "grad_norm": 0.9406115494374441, "learning_rate": 3.837881087168932e-07, "loss": 0.4899, "step": 8346 }, { "epoch": 0.88, "grad_norm": 2.38460628767397, "learning_rate": 3.8313357780762227e-07, "loss": 0.6332, "step": 8347 }, { "epoch": 0.88, "grad_norm": 2.7558226853136016, "learning_rate": 3.8247958327254586e-07, "loss": 0.5917, "step": 8348 }, { "epoch": 0.88, "grad_norm": 2.5358732790727547, "learning_rate": 3.8182612518764374e-07, "loss": 0.545, "step": 8349 }, { "epoch": 0.88, "grad_norm": 2.3595312762742418, "learning_rate": 3.811732036288335e-07, "loss": 0.608, "step": 8350 }, { "epoch": 0.88, "grad_norm": 2.7712731901406173, "learning_rate": 3.805208186719689e-07, "loss": 0.6207, "step": 8351 }, { "epoch": 0.88, "grad_norm": 0.9142414884157021, "learning_rate": 3.7986897039284043e-07, "loss": 0.5117, "step": 8352 }, { "epoch": 0.88, "grad_norm": 2.316330933057498, "learning_rate": 3.792176588671803e-07, "loss": 0.6112, "step": 8353 }, { "epoch": 0.88, "grad_norm": 2.9741114811382747, "learning_rate": 3.785668841706558e-07, "loss": 0.6341, "step": 8354 }, { "epoch": 0.88, "grad_norm": 2.0963466583939874, "learning_rate": 3.7791664637887137e-07, "loss": 0.5715, "step": 8355 }, { "epoch": 0.88, "grad_norm": 3.1371953655685396, "learning_rate": 3.7726694556736943e-07, "loss": 0.4608, "step": 8356 }, { "epoch": 0.88, "grad_norm": 2.2052960552688363, "learning_rate": 3.7661778181163067e-07, "loss": 0.5932, "step": 8357 }, { "epoch": 0.88, "grad_norm": 2.7273903186451576, "learning_rate": 3.759691551870737e-07, "loss": 0.6266, "step": 8358 }, { "epoch": 0.88, "grad_norm": 2.5969037148869796, "learning_rate": 3.753210657690537e-07, "loss": 0.5756, "step": 8359 }, { "epoch": 0.88, "grad_norm": 2.5347492281922293, "learning_rate": 3.746735136328633e-07, "loss": 0.6715, "step": 8360 }, { "epoch": 0.88, "grad_norm": 2.8106795258984225, "learning_rate": 3.740264988537329e-07, "loss": 0.5104, "step": 8361 }, { "epoch": 0.88, "grad_norm": 4.00381965510081, "learning_rate": 3.7338002150683174e-07, "loss": 0.6463, "step": 8362 }, { "epoch": 0.88, "grad_norm": 3.173783144313059, "learning_rate": 3.727340816672664e-07, "loss": 0.6332, "step": 8363 }, { "epoch": 0.88, "grad_norm": 2.9002556770934373, "learning_rate": 3.7208867941007974e-07, "loss": 0.6431, "step": 8364 }, { "epoch": 0.88, "grad_norm": 2.7468354426920008, "learning_rate": 3.7144381481025114e-07, "loss": 0.5672, "step": 8365 }, { "epoch": 0.88, "grad_norm": 2.4532097840078, "learning_rate": 3.707994879427007e-07, "loss": 0.6065, "step": 8366 }, { "epoch": 0.88, "grad_norm": 2.664239854523862, "learning_rate": 3.7015569888228464e-07, "loss": 0.5785, "step": 8367 }, { "epoch": 0.88, "grad_norm": 3.1037887455483544, "learning_rate": 3.6951244770379593e-07, "loss": 0.5904, "step": 8368 }, { "epoch": 0.88, "grad_norm": 2.4768409598228636, "learning_rate": 3.6886973448196475e-07, "loss": 0.5523, "step": 8369 }, { "epoch": 0.88, "grad_norm": 2.7756714612946563, "learning_rate": 3.682275592914608e-07, "loss": 0.6883, "step": 8370 }, { "epoch": 0.88, "grad_norm": 0.9591826331531919, "learning_rate": 3.675859222068895e-07, "loss": 0.5301, "step": 8371 }, { "epoch": 0.88, "grad_norm": 0.9503987597108043, "learning_rate": 3.669448233027967e-07, "loss": 0.5432, "step": 8372 }, { "epoch": 0.88, "grad_norm": 2.2208818742361625, "learning_rate": 3.6630426265366003e-07, "loss": 0.5948, "step": 8373 }, { "epoch": 0.88, "grad_norm": 0.9397750897472155, "learning_rate": 3.6566424033389947e-07, "loss": 0.5438, "step": 8374 }, { "epoch": 0.88, "grad_norm": 2.765113073654002, "learning_rate": 3.6502475641787107e-07, "loss": 0.6899, "step": 8375 }, { "epoch": 0.88, "grad_norm": 2.389118275586379, "learning_rate": 3.6438581097986867e-07, "loss": 0.7196, "step": 8376 }, { "epoch": 0.88, "grad_norm": 2.1693703595965332, "learning_rate": 3.637474040941225e-07, "loss": 0.5825, "step": 8377 }, { "epoch": 0.88, "grad_norm": 3.064136002731287, "learning_rate": 3.6310953583480024e-07, "loss": 0.6088, "step": 8378 }, { "epoch": 0.88, "grad_norm": 2.293347710145248, "learning_rate": 3.6247220627600833e-07, "loss": 0.6058, "step": 8379 }, { "epoch": 0.88, "grad_norm": 2.6363547175045237, "learning_rate": 3.6183541549179025e-07, "loss": 0.5608, "step": 8380 }, { "epoch": 0.88, "grad_norm": 2.2724907939257672, "learning_rate": 3.6119916355612627e-07, "loss": 0.5671, "step": 8381 }, { "epoch": 0.88, "grad_norm": 3.166605602109891, "learning_rate": 3.6056345054293283e-07, "loss": 0.6426, "step": 8382 }, { "epoch": 0.88, "grad_norm": 2.242029102578855, "learning_rate": 3.59928276526067e-07, "loss": 0.5707, "step": 8383 }, { "epoch": 0.88, "grad_norm": 2.7693849737510656, "learning_rate": 3.592936415793208e-07, "loss": 0.6228, "step": 8384 }, { "epoch": 0.88, "grad_norm": 2.104105928630141, "learning_rate": 3.586595457764247e-07, "loss": 0.5438, "step": 8385 }, { "epoch": 0.88, "grad_norm": 2.368575714906391, "learning_rate": 3.580259891910465e-07, "loss": 0.6578, "step": 8386 }, { "epoch": 0.88, "grad_norm": 2.8815306378727747, "learning_rate": 3.573929718967889e-07, "loss": 0.6015, "step": 8387 }, { "epoch": 0.88, "grad_norm": 2.2975996264972705, "learning_rate": 3.567604939671959e-07, "loss": 0.6111, "step": 8388 }, { "epoch": 0.88, "grad_norm": 3.8232534667191658, "learning_rate": 3.561285554757471e-07, "loss": 0.567, "step": 8389 }, { "epoch": 0.88, "grad_norm": 4.616180660257726, "learning_rate": 3.554971564958587e-07, "loss": 0.5183, "step": 8390 }, { "epoch": 0.88, "grad_norm": 5.4538629426171275, "learning_rate": 3.548662971008837e-07, "loss": 0.6621, "step": 8391 }, { "epoch": 0.88, "grad_norm": 2.688169563692595, "learning_rate": 3.5423597736411463e-07, "loss": 0.6208, "step": 8392 }, { "epoch": 0.88, "grad_norm": 3.685828822376977, "learning_rate": 3.536061973587812e-07, "loss": 0.7017, "step": 8393 }, { "epoch": 0.88, "grad_norm": 3.36466383731313, "learning_rate": 3.5297695715804825e-07, "loss": 0.6273, "step": 8394 }, { "epoch": 0.88, "grad_norm": 7.105461376785732, "learning_rate": 3.523482568350184e-07, "loss": 0.5799, "step": 8395 }, { "epoch": 0.88, "grad_norm": 3.2543324276401413, "learning_rate": 3.517200964627332e-07, "loss": 0.6232, "step": 8396 }, { "epoch": 0.88, "grad_norm": 6.139641724312683, "learning_rate": 3.510924761141704e-07, "loss": 0.6816, "step": 8397 }, { "epoch": 0.88, "grad_norm": 3.1532567391885493, "learning_rate": 3.504653958622456e-07, "loss": 0.5811, "step": 8398 }, { "epoch": 0.88, "grad_norm": 2.7533862384085492, "learning_rate": 3.49838855779811e-07, "loss": 0.5851, "step": 8399 }, { "epoch": 0.88, "grad_norm": 2.2806278164148184, "learning_rate": 3.492128559396552e-07, "loss": 0.5411, "step": 8400 }, { "epoch": 0.88, "grad_norm": 5.786894398505118, "learning_rate": 3.485873964145053e-07, "loss": 0.5874, "step": 8401 }, { "epoch": 0.88, "grad_norm": 2.223189773086896, "learning_rate": 3.479624772770268e-07, "loss": 0.6098, "step": 8402 }, { "epoch": 0.88, "grad_norm": 2.559630254288751, "learning_rate": 3.4733809859982037e-07, "loss": 0.5536, "step": 8403 }, { "epoch": 0.88, "grad_norm": 3.389108579984836, "learning_rate": 3.46714260455423e-07, "loss": 0.517, "step": 8404 }, { "epoch": 0.88, "grad_norm": 2.5393552430851085, "learning_rate": 3.460909629163117e-07, "loss": 0.6811, "step": 8405 }, { "epoch": 0.88, "grad_norm": 2.7176090256610452, "learning_rate": 3.4546820605489974e-07, "loss": 0.5495, "step": 8406 }, { "epoch": 0.88, "grad_norm": 2.11043021583628, "learning_rate": 3.448459899435369e-07, "loss": 0.6564, "step": 8407 }, { "epoch": 0.88, "grad_norm": 2.5053404923404297, "learning_rate": 3.442243146545093e-07, "loss": 0.5676, "step": 8408 }, { "epoch": 0.88, "grad_norm": 2.246167043873439, "learning_rate": 3.436031802600426e-07, "loss": 0.6413, "step": 8409 }, { "epoch": 0.88, "grad_norm": 2.9518371356414472, "learning_rate": 3.4298258683229836e-07, "loss": 0.6431, "step": 8410 }, { "epoch": 0.89, "grad_norm": 2.5332175625722124, "learning_rate": 3.423625344433756e-07, "loss": 0.6081, "step": 8411 }, { "epoch": 0.89, "grad_norm": 0.9693277831137703, "learning_rate": 3.417430231653096e-07, "loss": 0.5281, "step": 8412 }, { "epoch": 0.89, "grad_norm": 2.1510911382680074, "learning_rate": 3.4112405307007266e-07, "loss": 0.6343, "step": 8413 }, { "epoch": 0.89, "grad_norm": 2.465031585748283, "learning_rate": 3.4050562422957624e-07, "loss": 0.5676, "step": 8414 }, { "epoch": 0.89, "grad_norm": 2.664072247681827, "learning_rate": 3.3988773671566777e-07, "loss": 0.6398, "step": 8415 }, { "epoch": 0.89, "grad_norm": 3.75888208434279, "learning_rate": 3.3927039060013045e-07, "loss": 0.6638, "step": 8416 }, { "epoch": 0.89, "grad_norm": 2.388691306338053, "learning_rate": 3.3865358595468635e-07, "loss": 0.5692, "step": 8417 }, { "epoch": 0.89, "grad_norm": 2.0916358970739397, "learning_rate": 3.380373228509937e-07, "loss": 0.5645, "step": 8418 }, { "epoch": 0.89, "grad_norm": 2.745762404593912, "learning_rate": 3.374216013606485e-07, "loss": 0.5756, "step": 8419 }, { "epoch": 0.89, "grad_norm": 2.1892760583248885, "learning_rate": 3.368064215551842e-07, "loss": 0.6245, "step": 8420 }, { "epoch": 0.89, "grad_norm": 2.857345470679988, "learning_rate": 3.3619178350607016e-07, "loss": 0.6549, "step": 8421 }, { "epoch": 0.89, "grad_norm": 2.379512536999841, "learning_rate": 3.355776872847122e-07, "loss": 0.5277, "step": 8422 }, { "epoch": 0.89, "grad_norm": 2.321362797949176, "learning_rate": 3.3496413296245536e-07, "loss": 0.6036, "step": 8423 }, { "epoch": 0.89, "grad_norm": 2.245567667052969, "learning_rate": 3.343511206105804e-07, "loss": 0.5352, "step": 8424 }, { "epoch": 0.89, "grad_norm": 2.045437582667061, "learning_rate": 3.3373865030030536e-07, "loss": 0.5313, "step": 8425 }, { "epoch": 0.89, "grad_norm": 2.440444633467484, "learning_rate": 3.331267221027845e-07, "loss": 0.602, "step": 8426 }, { "epoch": 0.89, "grad_norm": 0.9346701755245185, "learning_rate": 3.325153360891109e-07, "loss": 0.5728, "step": 8427 }, { "epoch": 0.89, "grad_norm": 0.964299003618511, "learning_rate": 3.319044923303133e-07, "loss": 0.5423, "step": 8428 }, { "epoch": 0.89, "grad_norm": 2.7005210221802867, "learning_rate": 3.3129419089735825e-07, "loss": 0.6404, "step": 8429 }, { "epoch": 0.89, "grad_norm": 2.6961944982240307, "learning_rate": 3.306844318611474e-07, "loss": 0.6103, "step": 8430 }, { "epoch": 0.89, "grad_norm": 2.9859468059962073, "learning_rate": 3.300752152925213e-07, "loss": 0.616, "step": 8431 }, { "epoch": 0.89, "grad_norm": 2.782849535399388, "learning_rate": 3.2946654126225776e-07, "loss": 0.6134, "step": 8432 }, { "epoch": 0.89, "grad_norm": 2.3605109038050074, "learning_rate": 3.288584098410708e-07, "loss": 0.578, "step": 8433 }, { "epoch": 0.89, "grad_norm": 3.0653052962653273, "learning_rate": 3.282508210996105e-07, "loss": 0.5938, "step": 8434 }, { "epoch": 0.89, "grad_norm": 5.320847704691404, "learning_rate": 3.276437751084649e-07, "loss": 0.6857, "step": 8435 }, { "epoch": 0.89, "grad_norm": 4.171177977533934, "learning_rate": 3.270372719381587e-07, "loss": 0.5834, "step": 8436 }, { "epoch": 0.89, "grad_norm": 7.886349099099324, "learning_rate": 3.264313116591555e-07, "loss": 0.6321, "step": 8437 }, { "epoch": 0.89, "grad_norm": 2.8574070263501765, "learning_rate": 3.2582589434185184e-07, "loss": 0.593, "step": 8438 }, { "epoch": 0.89, "grad_norm": 0.9494414704404982, "learning_rate": 3.252210200565842e-07, "loss": 0.5359, "step": 8439 }, { "epoch": 0.89, "grad_norm": 3.1811163106053844, "learning_rate": 3.2461668887362407e-07, "loss": 0.6684, "step": 8440 }, { "epoch": 0.89, "grad_norm": 2.381250101023003, "learning_rate": 3.2401290086318315e-07, "loss": 0.5573, "step": 8441 }, { "epoch": 0.89, "grad_norm": 2.4346013172983683, "learning_rate": 3.2340965609540643e-07, "loss": 0.6883, "step": 8442 }, { "epoch": 0.89, "grad_norm": 2.3133127483274003, "learning_rate": 3.228069546403767e-07, "loss": 0.5346, "step": 8443 }, { "epoch": 0.89, "grad_norm": 4.184411233315387, "learning_rate": 3.222047965681141e-07, "loss": 0.5935, "step": 8444 }, { "epoch": 0.89, "grad_norm": 3.0278851604452073, "learning_rate": 3.2160318194857655e-07, "loss": 0.57, "step": 8445 }, { "epoch": 0.89, "grad_norm": 2.526714549043835, "learning_rate": 3.210021108516581e-07, "loss": 0.5528, "step": 8446 }, { "epoch": 0.89, "grad_norm": 7.168643812080329, "learning_rate": 3.204015833471885e-07, "loss": 0.5877, "step": 8447 }, { "epoch": 0.89, "grad_norm": 2.058474171768321, "learning_rate": 3.1980159950493526e-07, "loss": 0.6024, "step": 8448 }, { "epoch": 0.89, "grad_norm": 2.4688281862366876, "learning_rate": 3.1920215939460263e-07, "loss": 0.5965, "step": 8449 }, { "epoch": 0.89, "grad_norm": 5.718194185512959, "learning_rate": 3.186032630858332e-07, "loss": 0.5933, "step": 8450 }, { "epoch": 0.89, "grad_norm": 2.4889377896135856, "learning_rate": 3.180049106482047e-07, "loss": 0.5516, "step": 8451 }, { "epoch": 0.89, "grad_norm": 12.775983633424683, "learning_rate": 3.1740710215122985e-07, "loss": 0.5611, "step": 8452 }, { "epoch": 0.89, "grad_norm": 2.2721038696367137, "learning_rate": 3.1680983766436244e-07, "loss": 0.6155, "step": 8453 }, { "epoch": 0.89, "grad_norm": 3.1139767872960693, "learning_rate": 3.16213117256991e-07, "loss": 0.5942, "step": 8454 }, { "epoch": 0.89, "grad_norm": 3.630434059537754, "learning_rate": 3.1561694099843885e-07, "loss": 0.6442, "step": 8455 }, { "epoch": 0.89, "grad_norm": 2.577929607649873, "learning_rate": 3.1502130895797066e-07, "loss": 0.6169, "step": 8456 }, { "epoch": 0.89, "grad_norm": 2.8393452527215897, "learning_rate": 3.144262212047833e-07, "loss": 0.6314, "step": 8457 }, { "epoch": 0.89, "grad_norm": 3.113366300161247, "learning_rate": 3.138316778080125e-07, "loss": 0.6043, "step": 8458 }, { "epoch": 0.89, "grad_norm": 2.6875673979733223, "learning_rate": 3.1323767883673193e-07, "loss": 0.5829, "step": 8459 }, { "epoch": 0.89, "grad_norm": 2.46249726044324, "learning_rate": 3.1264422435994977e-07, "loss": 0.6048, "step": 8460 }, { "epoch": 0.89, "grad_norm": 2.3009561213241656, "learning_rate": 3.120513144466109e-07, "loss": 0.6559, "step": 8461 }, { "epoch": 0.89, "grad_norm": 2.97192574369102, "learning_rate": 3.114589491655989e-07, "loss": 0.5194, "step": 8462 }, { "epoch": 0.89, "grad_norm": 0.9106568447644855, "learning_rate": 3.1086712858573396e-07, "loss": 0.546, "step": 8463 }, { "epoch": 0.89, "grad_norm": 2.8679076537404593, "learning_rate": 3.10275852775771e-07, "loss": 0.6296, "step": 8464 }, { "epoch": 0.89, "grad_norm": 2.538688766180127, "learning_rate": 3.0968512180440225e-07, "loss": 0.5939, "step": 8465 }, { "epoch": 0.89, "grad_norm": 2.185856983077277, "learning_rate": 3.090949357402573e-07, "loss": 0.5949, "step": 8466 }, { "epoch": 0.89, "grad_norm": 2.1121250006231236, "learning_rate": 3.0850529465190295e-07, "loss": 0.6447, "step": 8467 }, { "epoch": 0.89, "grad_norm": 3.842408960010343, "learning_rate": 3.079161986078427e-07, "loss": 0.7068, "step": 8468 }, { "epoch": 0.89, "grad_norm": 2.2491385619430364, "learning_rate": 3.073276476765147e-07, "loss": 0.6113, "step": 8469 }, { "epoch": 0.89, "grad_norm": 2.786879233199517, "learning_rate": 3.0673964192629466e-07, "loss": 0.5902, "step": 8470 }, { "epoch": 0.89, "grad_norm": 3.7520700236563482, "learning_rate": 3.061521814254964e-07, "loss": 0.6111, "step": 8471 }, { "epoch": 0.89, "grad_norm": 3.037239255860453, "learning_rate": 3.0556526624237025e-07, "loss": 0.5664, "step": 8472 }, { "epoch": 0.89, "grad_norm": 2.6847363520434486, "learning_rate": 3.049788964451006e-07, "loss": 0.6382, "step": 8473 }, { "epoch": 0.89, "grad_norm": 2.446565844450911, "learning_rate": 3.043930721018107e-07, "loss": 0.5745, "step": 8474 }, { "epoch": 0.89, "grad_norm": 2.727557086897107, "learning_rate": 3.0380779328055945e-07, "loss": 0.5672, "step": 8475 }, { "epoch": 0.89, "grad_norm": 2.677310040784338, "learning_rate": 3.0322306004934467e-07, "loss": 0.6836, "step": 8476 }, { "epoch": 0.89, "grad_norm": 12.712228333925863, "learning_rate": 3.026388724760976e-07, "loss": 0.5878, "step": 8477 }, { "epoch": 0.89, "grad_norm": 2.7959771086323078, "learning_rate": 3.020552306286867e-07, "loss": 0.5857, "step": 8478 }, { "epoch": 0.89, "grad_norm": 2.3823323020029257, "learning_rate": 3.0147213457491887e-07, "loss": 0.6022, "step": 8479 }, { "epoch": 0.89, "grad_norm": 2.488707780332215, "learning_rate": 3.0088958438253656e-07, "loss": 0.5982, "step": 8480 }, { "epoch": 0.89, "grad_norm": 2.3382438488043396, "learning_rate": 3.00307580119219e-07, "loss": 0.543, "step": 8481 }, { "epoch": 0.89, "grad_norm": 1.034614711972979, "learning_rate": 2.9972612185258155e-07, "loss": 0.5225, "step": 8482 }, { "epoch": 0.89, "grad_norm": 3.30925472166408, "learning_rate": 2.9914520965017515e-07, "loss": 0.6756, "step": 8483 }, { "epoch": 0.89, "grad_norm": 1.0845217256514865, "learning_rate": 2.985648435794897e-07, "loss": 0.5391, "step": 8484 }, { "epoch": 0.89, "grad_norm": 2.0947266192143617, "learning_rate": 2.9798502370795123e-07, "loss": 0.6038, "step": 8485 }, { "epoch": 0.89, "grad_norm": 2.143425501436348, "learning_rate": 2.974057501029204e-07, "loss": 0.6399, "step": 8486 }, { "epoch": 0.89, "grad_norm": 2.9656804898570805, "learning_rate": 2.968270228316944e-07, "loss": 0.5797, "step": 8487 }, { "epoch": 0.89, "grad_norm": 3.1462792820258367, "learning_rate": 2.9624884196151003e-07, "loss": 0.689, "step": 8488 }, { "epoch": 0.89, "grad_norm": 2.3954574670266324, "learning_rate": 2.956712075595386e-07, "loss": 0.6849, "step": 8489 }, { "epoch": 0.89, "grad_norm": 2.4138440964653873, "learning_rate": 2.950941196928869e-07, "loss": 0.6271, "step": 8490 }, { "epoch": 0.89, "grad_norm": 2.0756281900786377, "learning_rate": 2.945175784286003e-07, "loss": 0.4975, "step": 8491 }, { "epoch": 0.89, "grad_norm": 2.6184403922706543, "learning_rate": 2.93941583833659e-07, "loss": 0.6096, "step": 8492 }, { "epoch": 0.89, "grad_norm": 3.0901832655249137, "learning_rate": 2.933661359749801e-07, "loss": 0.6738, "step": 8493 }, { "epoch": 0.89, "grad_norm": 3.0034179637882055, "learning_rate": 2.9279123491941895e-07, "loss": 0.5927, "step": 8494 }, { "epoch": 0.89, "grad_norm": 2.4572881737070493, "learning_rate": 2.9221688073376497e-07, "loss": 0.5141, "step": 8495 }, { "epoch": 0.89, "grad_norm": 1.101626346769178, "learning_rate": 2.916430734847442e-07, "loss": 0.5403, "step": 8496 }, { "epoch": 0.89, "grad_norm": 2.641326366393812, "learning_rate": 2.910698132390211e-07, "loss": 0.5564, "step": 8497 }, { "epoch": 0.89, "grad_norm": 2.471920861357954, "learning_rate": 2.904971000631951e-07, "loss": 0.6577, "step": 8498 }, { "epoch": 0.89, "grad_norm": 2.4266070572352922, "learning_rate": 2.899249340238025e-07, "loss": 0.5911, "step": 8499 }, { "epoch": 0.89, "grad_norm": 2.536560963334578, "learning_rate": 2.893533151873146e-07, "loss": 0.5689, "step": 8500 }, { "epoch": 0.89, "grad_norm": 2.6887191325980644, "learning_rate": 2.887822436201415e-07, "loss": 0.634, "step": 8501 }, { "epoch": 0.89, "grad_norm": 3.3999765388204106, "learning_rate": 2.882117193886297e-07, "loss": 0.5534, "step": 8502 }, { "epoch": 0.89, "grad_norm": 2.2917807653579576, "learning_rate": 2.8764174255905886e-07, "loss": 0.6365, "step": 8503 }, { "epoch": 0.89, "grad_norm": 2.318274584316322, "learning_rate": 2.870723131976494e-07, "loss": 0.5574, "step": 8504 }, { "epoch": 0.89, "grad_norm": 4.890909130662085, "learning_rate": 2.865034313705539e-07, "loss": 0.5967, "step": 8505 }, { "epoch": 0.9, "grad_norm": 2.0997713123306756, "learning_rate": 2.8593509714386456e-07, "loss": 0.5588, "step": 8506 }, { "epoch": 0.9, "grad_norm": 2.617809589685534, "learning_rate": 2.853673105836091e-07, "loss": 0.5374, "step": 8507 }, { "epoch": 0.9, "grad_norm": 2.68605129954623, "learning_rate": 2.8480007175575144e-07, "loss": 0.5403, "step": 8508 }, { "epoch": 0.9, "grad_norm": 2.2348488550547074, "learning_rate": 2.842333807261899e-07, "loss": 0.575, "step": 8509 }, { "epoch": 0.9, "grad_norm": 2.701909082687133, "learning_rate": 2.836672375607624e-07, "loss": 0.5603, "step": 8510 }, { "epoch": 0.9, "grad_norm": 2.982475752321344, "learning_rate": 2.831016423252425e-07, "loss": 0.5673, "step": 8511 }, { "epoch": 0.9, "grad_norm": 2.9635176976288626, "learning_rate": 2.825365950853387e-07, "loss": 0.6621, "step": 8512 }, { "epoch": 0.9, "grad_norm": 12.14753234673202, "learning_rate": 2.8197209590669573e-07, "loss": 0.5816, "step": 8513 }, { "epoch": 0.9, "grad_norm": 2.4088842765153546, "learning_rate": 2.814081448548961e-07, "loss": 0.5515, "step": 8514 }, { "epoch": 0.9, "grad_norm": 2.5924726034885115, "learning_rate": 2.8084474199545907e-07, "loss": 0.6995, "step": 8515 }, { "epoch": 0.9, "grad_norm": 2.319648207905272, "learning_rate": 2.802818873938373e-07, "loss": 0.5947, "step": 8516 }, { "epoch": 0.9, "grad_norm": 2.9901729743477024, "learning_rate": 2.79719581115423e-07, "loss": 0.6025, "step": 8517 }, { "epoch": 0.9, "grad_norm": 2.1532085821221703, "learning_rate": 2.7915782322554265e-07, "loss": 0.574, "step": 8518 }, { "epoch": 0.9, "grad_norm": 2.386635632359815, "learning_rate": 2.7859661378945966e-07, "loss": 0.5769, "step": 8519 }, { "epoch": 0.9, "grad_norm": 2.495915367797323, "learning_rate": 2.7803595287237416e-07, "loss": 0.6033, "step": 8520 }, { "epoch": 0.9, "grad_norm": 2.2480787603907597, "learning_rate": 2.7747584053942236e-07, "loss": 0.5373, "step": 8521 }, { "epoch": 0.9, "grad_norm": 2.755749524388031, "learning_rate": 2.7691627685567545e-07, "loss": 0.6319, "step": 8522 }, { "epoch": 0.9, "grad_norm": 3.8563418049060942, "learning_rate": 2.763572618861421e-07, "loss": 0.6336, "step": 8523 }, { "epoch": 0.9, "grad_norm": 0.9803488628787727, "learning_rate": 2.7579879569576805e-07, "loss": 0.5288, "step": 8524 }, { "epoch": 0.9, "grad_norm": 2.158913363300771, "learning_rate": 2.7524087834943257e-07, "loss": 0.5508, "step": 8525 }, { "epoch": 0.9, "grad_norm": 2.2990257664705704, "learning_rate": 2.746835099119555e-07, "loss": 0.5405, "step": 8526 }, { "epoch": 0.9, "grad_norm": 2.722742879522637, "learning_rate": 2.7412669044808714e-07, "loss": 0.59, "step": 8527 }, { "epoch": 0.9, "grad_norm": 2.7098085005125347, "learning_rate": 2.7357042002251977e-07, "loss": 0.6599, "step": 8528 }, { "epoch": 0.9, "grad_norm": 2.414604589485784, "learning_rate": 2.730146986998783e-07, "loss": 0.5914, "step": 8529 }, { "epoch": 0.9, "grad_norm": 2.9999875277016037, "learning_rate": 2.7245952654472495e-07, "loss": 0.6089, "step": 8530 }, { "epoch": 0.9, "grad_norm": 2.4698756738709617, "learning_rate": 2.7190490362155706e-07, "loss": 0.7083, "step": 8531 }, { "epoch": 0.9, "grad_norm": 4.0071257148376604, "learning_rate": 2.7135082999481033e-07, "loss": 0.6586, "step": 8532 }, { "epoch": 0.9, "grad_norm": 2.4350296312759276, "learning_rate": 2.707973057288554e-07, "loss": 0.5523, "step": 8533 }, { "epoch": 0.9, "grad_norm": 0.9785381511638156, "learning_rate": 2.7024433088799874e-07, "loss": 0.5391, "step": 8534 }, { "epoch": 0.9, "grad_norm": 3.7213298482636703, "learning_rate": 2.696919055364827e-07, "loss": 0.6217, "step": 8535 }, { "epoch": 0.9, "grad_norm": 2.7426731105858804, "learning_rate": 2.691400297384872e-07, "loss": 0.622, "step": 8536 }, { "epoch": 0.9, "grad_norm": 2.9682834230681583, "learning_rate": 2.6858870355812807e-07, "loss": 0.6019, "step": 8537 }, { "epoch": 0.9, "grad_norm": 2.5959681300857804, "learning_rate": 2.6803792705945574e-07, "loss": 0.5257, "step": 8538 }, { "epoch": 0.9, "grad_norm": 3.117284087962713, "learning_rate": 2.674877003064591e-07, "loss": 0.6142, "step": 8539 }, { "epoch": 0.9, "grad_norm": 2.1014683352430614, "learning_rate": 2.669380233630603e-07, "loss": 0.6515, "step": 8540 }, { "epoch": 0.9, "grad_norm": 2.665551926914057, "learning_rate": 2.6638889629311994e-07, "loss": 0.5391, "step": 8541 }, { "epoch": 0.9, "grad_norm": 3.222214668746664, "learning_rate": 2.6584031916043476e-07, "loss": 0.6326, "step": 8542 }, { "epoch": 0.9, "grad_norm": 2.6557840721881734, "learning_rate": 2.652922920287365e-07, "loss": 0.557, "step": 8543 }, { "epoch": 0.9, "grad_norm": 2.4193798372931403, "learning_rate": 2.647448149616921e-07, "loss": 0.5985, "step": 8544 }, { "epoch": 0.9, "grad_norm": 4.689574105729332, "learning_rate": 2.641978880229074e-07, "loss": 0.608, "step": 8545 }, { "epoch": 0.9, "grad_norm": 2.743675378873042, "learning_rate": 2.636515112759225e-07, "loss": 0.5973, "step": 8546 }, { "epoch": 0.9, "grad_norm": 2.5656395322683725, "learning_rate": 2.631056847842134e-07, "loss": 0.5468, "step": 8547 }, { "epoch": 0.9, "grad_norm": 3.1810462861004862, "learning_rate": 2.625604086111927e-07, "loss": 0.5399, "step": 8548 }, { "epoch": 0.9, "grad_norm": 2.5217278501998113, "learning_rate": 2.620156828202092e-07, "loss": 0.6972, "step": 8549 }, { "epoch": 0.9, "grad_norm": 2.4534326162271425, "learning_rate": 2.6147150747454776e-07, "loss": 0.6776, "step": 8550 }, { "epoch": 0.9, "grad_norm": 2.7071917479562653, "learning_rate": 2.609278826374284e-07, "loss": 0.6411, "step": 8551 }, { "epoch": 0.9, "grad_norm": 2.581422750056675, "learning_rate": 2.6038480837200896e-07, "loss": 0.6482, "step": 8552 }, { "epoch": 0.9, "grad_norm": 2.335476941637362, "learning_rate": 2.5984228474138115e-07, "loss": 0.535, "step": 8553 }, { "epoch": 0.9, "grad_norm": 2.4914563871633013, "learning_rate": 2.593003118085746e-07, "loss": 0.539, "step": 8554 }, { "epoch": 0.9, "grad_norm": 2.477999239774418, "learning_rate": 2.5875888963655396e-07, "loss": 0.5921, "step": 8555 }, { "epoch": 0.9, "grad_norm": 3.0840194634488522, "learning_rate": 2.582180182882205e-07, "loss": 0.579, "step": 8556 }, { "epoch": 0.9, "grad_norm": 2.206238807030622, "learning_rate": 2.576776978264095e-07, "loss": 0.6189, "step": 8557 }, { "epoch": 0.9, "grad_norm": 2.937734436218125, "learning_rate": 2.5713792831389473e-07, "loss": 0.6302, "step": 8558 }, { "epoch": 0.9, "grad_norm": 2.714921870739376, "learning_rate": 2.565987098133865e-07, "loss": 0.597, "step": 8559 }, { "epoch": 0.9, "grad_norm": 3.5143184938458027, "learning_rate": 2.56060042387527e-07, "loss": 0.6705, "step": 8560 }, { "epoch": 0.9, "grad_norm": 2.617749300044612, "learning_rate": 2.5552192609890004e-07, "loss": 0.6626, "step": 8561 }, { "epoch": 0.9, "grad_norm": 2.422359435875593, "learning_rate": 2.5498436101001946e-07, "loss": 0.5862, "step": 8562 }, { "epoch": 0.9, "grad_norm": 2.87556986453871, "learning_rate": 2.544473471833403e-07, "loss": 0.5421, "step": 8563 }, { "epoch": 0.9, "grad_norm": 2.5521477189588757, "learning_rate": 2.5391088468124934e-07, "loss": 0.6127, "step": 8564 }, { "epoch": 0.9, "grad_norm": 2.626589882214039, "learning_rate": 2.533749735660729e-07, "loss": 0.6057, "step": 8565 }, { "epoch": 0.9, "grad_norm": 2.272089311491973, "learning_rate": 2.528396139000705e-07, "loss": 0.5462, "step": 8566 }, { "epoch": 0.9, "grad_norm": 2.497924231522892, "learning_rate": 2.5230480574543914e-07, "loss": 0.6173, "step": 8567 }, { "epoch": 0.9, "grad_norm": 2.5946958140611014, "learning_rate": 2.5177054916431186e-07, "loss": 0.5676, "step": 8568 }, { "epoch": 0.9, "grad_norm": 4.622437188040537, "learning_rate": 2.5123684421875627e-07, "loss": 0.611, "step": 8569 }, { "epoch": 0.9, "grad_norm": 2.4619075515236317, "learning_rate": 2.507036909707766e-07, "loss": 0.597, "step": 8570 }, { "epoch": 0.9, "grad_norm": 2.130051490960946, "learning_rate": 2.5017108948231284e-07, "loss": 0.571, "step": 8571 }, { "epoch": 0.9, "grad_norm": 2.3145413732517084, "learning_rate": 2.4963903981524265e-07, "loss": 0.6216, "step": 8572 }, { "epoch": 0.9, "grad_norm": 2.6281633995236575, "learning_rate": 2.4910754203137597e-07, "loss": 0.5933, "step": 8573 }, { "epoch": 0.9, "grad_norm": 0.9962463423913068, "learning_rate": 2.4857659619246246e-07, "loss": 0.4788, "step": 8574 }, { "epoch": 0.9, "grad_norm": 3.509203947561383, "learning_rate": 2.4804620236018376e-07, "loss": 0.6344, "step": 8575 }, { "epoch": 0.9, "grad_norm": 2.3901552696802058, "learning_rate": 2.475163605961617e-07, "loss": 0.6128, "step": 8576 }, { "epoch": 0.9, "grad_norm": 2.851401165089648, "learning_rate": 2.4698707096195094e-07, "loss": 0.5852, "step": 8577 }, { "epoch": 0.9, "grad_norm": 3.7359676704634306, "learning_rate": 2.4645833351904235e-07, "loss": 0.5583, "step": 8578 }, { "epoch": 0.9, "grad_norm": 2.636845767665651, "learning_rate": 2.4593014832886344e-07, "loss": 0.6295, "step": 8579 }, { "epoch": 0.9, "grad_norm": 4.772427246864486, "learning_rate": 2.4540251545277726e-07, "loss": 0.6169, "step": 8580 }, { "epoch": 0.9, "grad_norm": 2.3697955879151564, "learning_rate": 2.448754349520832e-07, "loss": 0.5962, "step": 8581 }, { "epoch": 0.9, "grad_norm": 2.7219924370672866, "learning_rate": 2.4434890688801504e-07, "loss": 0.6225, "step": 8582 }, { "epoch": 0.9, "grad_norm": 2.566350444650246, "learning_rate": 2.4382293132174384e-07, "loss": 0.5551, "step": 8583 }, { "epoch": 0.9, "grad_norm": 2.5372826204659056, "learning_rate": 2.4329750831437514e-07, "loss": 0.6557, "step": 8584 }, { "epoch": 0.9, "grad_norm": 0.8353964126790068, "learning_rate": 2.427726379269524e-07, "loss": 0.5587, "step": 8585 }, { "epoch": 0.9, "grad_norm": 2.1888874583305187, "learning_rate": 2.422483202204523e-07, "loss": 0.692, "step": 8586 }, { "epoch": 0.9, "grad_norm": 2.4217812609046585, "learning_rate": 2.417245552557901e-07, "loss": 0.5887, "step": 8587 }, { "epoch": 0.9, "grad_norm": 3.31796627980692, "learning_rate": 2.4120134309381315e-07, "loss": 0.7178, "step": 8588 }, { "epoch": 0.9, "grad_norm": 2.213415629562026, "learning_rate": 2.406786837953079e-07, "loss": 0.5923, "step": 8589 }, { "epoch": 0.9, "grad_norm": 0.941272484421341, "learning_rate": 2.401565774209963e-07, "loss": 0.5444, "step": 8590 }, { "epoch": 0.9, "grad_norm": 3.0359025963461446, "learning_rate": 2.396350240315337e-07, "loss": 0.5498, "step": 8591 }, { "epoch": 0.9, "grad_norm": 2.6306647881917153, "learning_rate": 2.391140236875128e-07, "loss": 0.6251, "step": 8592 }, { "epoch": 0.9, "grad_norm": 2.6487766622684172, "learning_rate": 2.3859357644946233e-07, "loss": 0.6471, "step": 8593 }, { "epoch": 0.9, "grad_norm": 2.645864611230044, "learning_rate": 2.3807368237784735e-07, "loss": 0.5693, "step": 8594 }, { "epoch": 0.9, "grad_norm": 2.212215219692308, "learning_rate": 2.3755434153306555e-07, "loss": 0.5178, "step": 8595 }, { "epoch": 0.9, "grad_norm": 2.5525173270216794, "learning_rate": 2.370355539754543e-07, "loss": 0.5043, "step": 8596 }, { "epoch": 0.9, "grad_norm": 2.973557160116561, "learning_rate": 2.3651731976528314e-07, "loss": 0.5824, "step": 8597 }, { "epoch": 0.9, "grad_norm": 2.368358688574046, "learning_rate": 2.3599963896276113e-07, "loss": 0.6403, "step": 8598 }, { "epoch": 0.9, "grad_norm": 3.541437487586155, "learning_rate": 2.354825116280285e-07, "loss": 0.6646, "step": 8599 }, { "epoch": 0.9, "grad_norm": 2.6360131144611247, "learning_rate": 2.3496593782116607e-07, "loss": 0.5936, "step": 8600 }, { "epoch": 0.91, "grad_norm": 2.4729013605037418, "learning_rate": 2.3444991760218526e-07, "loss": 0.6654, "step": 8601 }, { "epoch": 0.91, "grad_norm": 2.4321075434936485, "learning_rate": 2.3393445103103762e-07, "loss": 0.6181, "step": 8602 }, { "epoch": 0.91, "grad_norm": 2.557292183237137, "learning_rate": 2.334195381676091e-07, "loss": 0.6118, "step": 8603 }, { "epoch": 0.91, "grad_norm": 2.7334084120232323, "learning_rate": 2.3290517907171962e-07, "loss": 0.5789, "step": 8604 }, { "epoch": 0.91, "grad_norm": 3.2292498265081755, "learning_rate": 2.3239137380312526e-07, "loss": 0.6254, "step": 8605 }, { "epoch": 0.91, "grad_norm": 2.2190579271215136, "learning_rate": 2.3187812242151996e-07, "loss": 0.6202, "step": 8606 }, { "epoch": 0.91, "grad_norm": 2.6556837855127435, "learning_rate": 2.3136542498653103e-07, "loss": 0.5697, "step": 8607 }, { "epoch": 0.91, "grad_norm": 2.923481136322459, "learning_rate": 2.308532815577219e-07, "loss": 0.5777, "step": 8608 }, { "epoch": 0.91, "grad_norm": 2.1026727157977447, "learning_rate": 2.3034169219459336e-07, "loss": 0.5759, "step": 8609 }, { "epoch": 0.91, "grad_norm": 2.2566630684707336, "learning_rate": 2.2983065695657835e-07, "loss": 0.605, "step": 8610 }, { "epoch": 0.91, "grad_norm": 3.0101849840783994, "learning_rate": 2.2932017590304945e-07, "loss": 0.6228, "step": 8611 }, { "epoch": 0.91, "grad_norm": 2.9371051066532257, "learning_rate": 2.2881024909331084e-07, "loss": 0.5492, "step": 8612 }, { "epoch": 0.91, "grad_norm": 3.594191038842626, "learning_rate": 2.2830087658660626e-07, "loss": 0.6286, "step": 8613 }, { "epoch": 0.91, "grad_norm": 2.2603072217979463, "learning_rate": 2.2779205844211115e-07, "loss": 0.704, "step": 8614 }, { "epoch": 0.91, "grad_norm": 0.8989627424235347, "learning_rate": 2.2728379471893992e-07, "loss": 0.4864, "step": 8615 }, { "epoch": 0.91, "grad_norm": 16.729220556087142, "learning_rate": 2.2677608547614195e-07, "loss": 0.5918, "step": 8616 }, { "epoch": 0.91, "grad_norm": 2.2488176924383954, "learning_rate": 2.2626893077269952e-07, "loss": 0.582, "step": 8617 }, { "epoch": 0.91, "grad_norm": 2.4475963095140343, "learning_rate": 2.2576233066753328e-07, "loss": 0.6073, "step": 8618 }, { "epoch": 0.91, "grad_norm": 2.50017796036233, "learning_rate": 2.252562852194984e-07, "loss": 0.6705, "step": 8619 }, { "epoch": 0.91, "grad_norm": 1.0355653284721114, "learning_rate": 2.2475079448738667e-07, "loss": 0.5797, "step": 8620 }, { "epoch": 0.91, "grad_norm": 5.740952195511575, "learning_rate": 2.2424585852992287e-07, "loss": 0.6276, "step": 8621 }, { "epoch": 0.91, "grad_norm": 2.624621427504004, "learning_rate": 2.2374147740577058e-07, "loss": 0.5743, "step": 8622 }, { "epoch": 0.91, "grad_norm": 2.4340730079804325, "learning_rate": 2.2323765117352625e-07, "loss": 0.5585, "step": 8623 }, { "epoch": 0.91, "grad_norm": 2.1286673679109693, "learning_rate": 2.2273437989172308e-07, "loss": 0.6019, "step": 8624 }, { "epoch": 0.91, "grad_norm": 4.029911691462066, "learning_rate": 2.2223166361883096e-07, "loss": 0.5091, "step": 8625 }, { "epoch": 0.91, "grad_norm": 3.1602463814705746, "learning_rate": 2.217295024132532e-07, "loss": 0.627, "step": 8626 }, { "epoch": 0.91, "grad_norm": 3.55085504048545, "learning_rate": 2.2122789633332808e-07, "loss": 0.6616, "step": 8627 }, { "epoch": 0.91, "grad_norm": 3.3530586363040547, "learning_rate": 2.2072684543733236e-07, "loss": 0.6542, "step": 8628 }, { "epoch": 0.91, "grad_norm": 2.8871688456843403, "learning_rate": 2.2022634978347668e-07, "loss": 0.7099, "step": 8629 }, { "epoch": 0.91, "grad_norm": 2.5020918168833344, "learning_rate": 2.197264094299062e-07, "loss": 0.6461, "step": 8630 }, { "epoch": 0.91, "grad_norm": 2.9739866771027983, "learning_rate": 2.192270244347039e-07, "loss": 0.6444, "step": 8631 }, { "epoch": 0.91, "grad_norm": 2.453684270588019, "learning_rate": 2.1872819485588504e-07, "loss": 0.5701, "step": 8632 }, { "epoch": 0.91, "grad_norm": 2.9298917878750905, "learning_rate": 2.1822992075140382e-07, "loss": 0.6017, "step": 8633 }, { "epoch": 0.91, "grad_norm": 4.21769668421678, "learning_rate": 2.177322021791478e-07, "loss": 0.552, "step": 8634 }, { "epoch": 0.91, "grad_norm": 2.333398707424515, "learning_rate": 2.1723503919694022e-07, "loss": 0.5818, "step": 8635 }, { "epoch": 0.91, "grad_norm": 0.9600492185451742, "learning_rate": 2.167384318625404e-07, "loss": 0.5513, "step": 8636 }, { "epoch": 0.91, "grad_norm": 2.1619655902215342, "learning_rate": 2.1624238023364164e-07, "loss": 0.6071, "step": 8637 }, { "epoch": 0.91, "grad_norm": 2.30009030782602, "learning_rate": 2.1574688436787616e-07, "loss": 0.5676, "step": 8638 }, { "epoch": 0.91, "grad_norm": 2.261626619968899, "learning_rate": 2.152519443228074e-07, "loss": 0.6164, "step": 8639 }, { "epoch": 0.91, "grad_norm": 2.5969295591844355, "learning_rate": 2.1475756015593597e-07, "loss": 0.577, "step": 8640 }, { "epoch": 0.91, "grad_norm": 2.8285213912360896, "learning_rate": 2.142637319246982e-07, "loss": 0.5747, "step": 8641 }, { "epoch": 0.91, "grad_norm": 2.6663798524618434, "learning_rate": 2.1377045968646648e-07, "loss": 0.6167, "step": 8642 }, { "epoch": 0.91, "grad_norm": 3.875625734126703, "learning_rate": 2.1327774349854669e-07, "loss": 0.642, "step": 8643 }, { "epoch": 0.91, "grad_norm": 1.0023913927338757, "learning_rate": 2.1278558341818245e-07, "loss": 0.549, "step": 8644 }, { "epoch": 0.91, "grad_norm": 2.492821927184673, "learning_rate": 2.1229397950254971e-07, "loss": 0.6158, "step": 8645 }, { "epoch": 0.91, "grad_norm": 3.954417059877932, "learning_rate": 2.1180293180876333e-07, "loss": 0.5902, "step": 8646 }, { "epoch": 0.91, "grad_norm": 2.4328009320824684, "learning_rate": 2.11312440393871e-07, "loss": 0.6403, "step": 8647 }, { "epoch": 0.91, "grad_norm": 2.6647018614968547, "learning_rate": 2.1082250531485658e-07, "loss": 0.5715, "step": 8648 }, { "epoch": 0.91, "grad_norm": 1.0060574812970433, "learning_rate": 2.1033312662863902e-07, "loss": 0.5627, "step": 8649 }, { "epoch": 0.91, "grad_norm": 2.641443035192098, "learning_rate": 2.0984430439207337e-07, "loss": 0.5118, "step": 8650 }, { "epoch": 0.91, "grad_norm": 3.9361538566284255, "learning_rate": 2.0935603866194975e-07, "loss": 0.6679, "step": 8651 }, { "epoch": 0.91, "grad_norm": 2.39373289597829, "learning_rate": 2.0886832949499337e-07, "loss": 0.5668, "step": 8652 }, { "epoch": 0.91, "grad_norm": 3.3861532699457686, "learning_rate": 2.083811769478644e-07, "loss": 0.6525, "step": 8653 }, { "epoch": 0.91, "grad_norm": 3.936995441518683, "learning_rate": 2.0789458107715876e-07, "loss": 0.7025, "step": 8654 }, { "epoch": 0.91, "grad_norm": 2.349046763306768, "learning_rate": 2.0740854193940896e-07, "loss": 0.6309, "step": 8655 }, { "epoch": 0.91, "grad_norm": 2.2596999928277253, "learning_rate": 2.0692305959107982e-07, "loss": 0.6011, "step": 8656 }, { "epoch": 0.91, "grad_norm": 0.9228352521725202, "learning_rate": 2.0643813408857516e-07, "loss": 0.4917, "step": 8657 }, { "epoch": 0.91, "grad_norm": 2.6385919262613746, "learning_rate": 2.05953765488231e-07, "loss": 0.5755, "step": 8658 }, { "epoch": 0.91, "grad_norm": 2.2137952255821753, "learning_rate": 2.0546995384632008e-07, "loss": 0.6005, "step": 8659 }, { "epoch": 0.91, "grad_norm": 2.5803538966845636, "learning_rate": 2.0498669921905024e-07, "loss": 0.7106, "step": 8660 }, { "epoch": 0.91, "grad_norm": 2.9792364157241105, "learning_rate": 2.045040016625649e-07, "loss": 0.5886, "step": 8661 }, { "epoch": 0.91, "grad_norm": 2.3666959842671464, "learning_rate": 2.04021861232942e-07, "loss": 0.5773, "step": 8662 }, { "epoch": 0.91, "grad_norm": 3.6824064705468786, "learning_rate": 2.0354027798619557e-07, "loss": 0.6144, "step": 8663 }, { "epoch": 0.91, "grad_norm": 2.435875779491401, "learning_rate": 2.030592519782748e-07, "loss": 0.6403, "step": 8664 }, { "epoch": 0.91, "grad_norm": 2.6128038565557805, "learning_rate": 2.0257878326506386e-07, "loss": 0.6337, "step": 8665 }, { "epoch": 0.91, "grad_norm": 2.282841908523947, "learning_rate": 2.020988719023814e-07, "loss": 0.5942, "step": 8666 }, { "epoch": 0.91, "grad_norm": 4.370234830147251, "learning_rate": 2.0161951794598233e-07, "loss": 0.5383, "step": 8667 }, { "epoch": 0.91, "grad_norm": 2.1849633361416223, "learning_rate": 2.011407214515576e-07, "loss": 0.6604, "step": 8668 }, { "epoch": 0.91, "grad_norm": 1.9975177789546006, "learning_rate": 2.0066248247473108e-07, "loss": 0.6303, "step": 8669 }, { "epoch": 0.91, "grad_norm": 2.522737370998732, "learning_rate": 2.0018480107106496e-07, "loss": 0.5688, "step": 8670 }, { "epoch": 0.91, "grad_norm": 2.4993421173578714, "learning_rate": 1.9970767729605268e-07, "loss": 0.5487, "step": 8671 }, { "epoch": 0.91, "grad_norm": 3.527990417897828, "learning_rate": 1.992311112051265e-07, "loss": 0.5992, "step": 8672 }, { "epoch": 0.91, "grad_norm": 6.350490216687445, "learning_rate": 1.9875510285365273e-07, "loss": 0.5894, "step": 8673 }, { "epoch": 0.91, "grad_norm": 2.5923844574515673, "learning_rate": 1.9827965229693215e-07, "loss": 0.6643, "step": 8674 }, { "epoch": 0.91, "grad_norm": 2.8259634397156805, "learning_rate": 1.978047595902005e-07, "loss": 0.5955, "step": 8675 }, { "epoch": 0.91, "grad_norm": 3.233787620175592, "learning_rate": 1.973304247886304e-07, "loss": 0.5663, "step": 8676 }, { "epoch": 0.91, "grad_norm": 2.9852555325806147, "learning_rate": 1.9685664794732884e-07, "loss": 0.6144, "step": 8677 }, { "epoch": 0.91, "grad_norm": 4.705513555230461, "learning_rate": 1.963834291213368e-07, "loss": 0.5355, "step": 8678 }, { "epoch": 0.91, "grad_norm": 5.025851027664062, "learning_rate": 1.959107683656325e-07, "loss": 0.5568, "step": 8679 }, { "epoch": 0.91, "grad_norm": 2.5826049637506676, "learning_rate": 1.954386657351276e-07, "loss": 0.6396, "step": 8680 }, { "epoch": 0.91, "grad_norm": 3.650551552519513, "learning_rate": 1.9496712128467043e-07, "loss": 0.6257, "step": 8681 }, { "epoch": 0.91, "grad_norm": 3.075805993953122, "learning_rate": 1.9449613506904275e-07, "loss": 0.6136, "step": 8682 }, { "epoch": 0.91, "grad_norm": 2.6518292689904186, "learning_rate": 1.9402570714296353e-07, "loss": 0.5471, "step": 8683 }, { "epoch": 0.91, "grad_norm": 2.6510281697671734, "learning_rate": 1.9355583756108408e-07, "loss": 0.5416, "step": 8684 }, { "epoch": 0.91, "grad_norm": 2.7279535357511, "learning_rate": 1.9308652637799352e-07, "loss": 0.5669, "step": 8685 }, { "epoch": 0.91, "grad_norm": 2.359794373937808, "learning_rate": 1.9261777364821542e-07, "loss": 0.6428, "step": 8686 }, { "epoch": 0.91, "grad_norm": 0.9251054541397337, "learning_rate": 1.9214957942620738e-07, "loss": 0.5481, "step": 8687 }, { "epoch": 0.91, "grad_norm": 2.718945841396238, "learning_rate": 1.9168194376636308e-07, "loss": 0.6115, "step": 8688 }, { "epoch": 0.91, "grad_norm": 2.0986880391335343, "learning_rate": 1.912148667230107e-07, "loss": 0.5174, "step": 8689 }, { "epoch": 0.91, "grad_norm": 2.4561034728267597, "learning_rate": 1.9074834835041523e-07, "loss": 0.6184, "step": 8690 }, { "epoch": 0.91, "grad_norm": 2.3130570678521942, "learning_rate": 1.9028238870277383e-07, "loss": 0.5663, "step": 8691 }, { "epoch": 0.91, "grad_norm": 2.233247914946738, "learning_rate": 1.8981698783422154e-07, "loss": 0.5151, "step": 8692 }, { "epoch": 0.91, "grad_norm": 2.142511538639313, "learning_rate": 1.8935214579882622e-07, "loss": 0.6681, "step": 8693 }, { "epoch": 0.91, "grad_norm": 2.3532758793115693, "learning_rate": 1.88887862650593e-07, "loss": 0.5731, "step": 8694 }, { "epoch": 0.91, "grad_norm": 2.9857413005512314, "learning_rate": 1.8842413844345986e-07, "loss": 0.5648, "step": 8695 }, { "epoch": 0.92, "grad_norm": 3.9566011801313214, "learning_rate": 1.8796097323130202e-07, "loss": 0.5285, "step": 8696 }, { "epoch": 0.92, "grad_norm": 2.808602535252724, "learning_rate": 1.8749836706792758e-07, "loss": 0.6272, "step": 8697 }, { "epoch": 0.92, "grad_norm": 2.524622740136097, "learning_rate": 1.8703632000708128e-07, "loss": 0.5818, "step": 8698 }, { "epoch": 0.92, "grad_norm": 12.660611398706083, "learning_rate": 1.8657483210244298e-07, "loss": 0.6365, "step": 8699 }, { "epoch": 0.92, "grad_norm": 2.387834624346501, "learning_rate": 1.8611390340762647e-07, "loss": 0.6027, "step": 8700 }, { "epoch": 0.92, "grad_norm": 3.142755060247217, "learning_rate": 1.8565353397618057e-07, "loss": 0.6635, "step": 8701 }, { "epoch": 0.92, "grad_norm": 3.134961455919104, "learning_rate": 1.8519372386159028e-07, "loss": 0.5493, "step": 8702 }, { "epoch": 0.92, "grad_norm": 2.8210863113852738, "learning_rate": 1.8473447311727567e-07, "loss": 0.6306, "step": 8703 }, { "epoch": 0.92, "grad_norm": 2.286001395669524, "learning_rate": 1.8427578179658957e-07, "loss": 0.5625, "step": 8704 }, { "epoch": 0.92, "grad_norm": 2.1320659323325755, "learning_rate": 1.8381764995282269e-07, "loss": 0.6272, "step": 8705 }, { "epoch": 0.92, "grad_norm": 2.786721797593951, "learning_rate": 1.8336007763919916e-07, "loss": 0.5459, "step": 8706 }, { "epoch": 0.92, "grad_norm": 2.9741697703618373, "learning_rate": 1.8290306490887866e-07, "loss": 0.6213, "step": 8707 }, { "epoch": 0.92, "grad_norm": 7.392970288943625, "learning_rate": 1.8244661181495426e-07, "loss": 0.5805, "step": 8708 }, { "epoch": 0.92, "grad_norm": 1.9781416987342864, "learning_rate": 1.8199071841045746e-07, "loss": 0.575, "step": 8709 }, { "epoch": 0.92, "grad_norm": 1.9655678631701408, "learning_rate": 1.8153538474835086e-07, "loss": 0.6157, "step": 8710 }, { "epoch": 0.92, "grad_norm": 2.1680563446589924, "learning_rate": 1.810806108815344e-07, "loss": 0.5988, "step": 8711 }, { "epoch": 0.92, "grad_norm": 2.4250575483212824, "learning_rate": 1.80626396862843e-07, "loss": 0.5865, "step": 8712 }, { "epoch": 0.92, "grad_norm": 2.1845565551153676, "learning_rate": 1.801727427450445e-07, "loss": 0.6155, "step": 8713 }, { "epoch": 0.92, "grad_norm": 2.311124360025313, "learning_rate": 1.79719648580845e-07, "loss": 0.626, "step": 8714 }, { "epoch": 0.92, "grad_norm": 0.9616951924685692, "learning_rate": 1.7926711442288247e-07, "loss": 0.5346, "step": 8715 }, { "epoch": 0.92, "grad_norm": 3.549218920031044, "learning_rate": 1.7881514032373147e-07, "loss": 0.6247, "step": 8716 }, { "epoch": 0.92, "grad_norm": 2.8740535822228344, "learning_rate": 1.7836372633590005e-07, "loss": 0.5935, "step": 8717 }, { "epoch": 0.92, "grad_norm": 5.583700320203049, "learning_rate": 1.7791287251183398e-07, "loss": 0.6242, "step": 8718 }, { "epoch": 0.92, "grad_norm": 2.7565126848470003, "learning_rate": 1.7746257890391027e-07, "loss": 0.5967, "step": 8719 }, { "epoch": 0.92, "grad_norm": 3.684123664351955, "learning_rate": 1.7701284556444377e-07, "loss": 0.5584, "step": 8720 }, { "epoch": 0.92, "grad_norm": 2.7777890746410563, "learning_rate": 1.7656367254568374e-07, "loss": 0.6293, "step": 8721 }, { "epoch": 0.92, "grad_norm": 2.4453226597465716, "learning_rate": 1.7611505989981293e-07, "loss": 0.6327, "step": 8722 }, { "epoch": 0.92, "grad_norm": 3.0727605055674445, "learning_rate": 1.7566700767894906e-07, "loss": 0.6328, "step": 8723 }, { "epoch": 0.92, "grad_norm": 3.726441601765811, "learning_rate": 1.7521951593514718e-07, "loss": 0.5472, "step": 8724 }, { "epoch": 0.92, "grad_norm": 3.130050707112883, "learning_rate": 1.7477258472039517e-07, "loss": 0.5057, "step": 8725 }, { "epoch": 0.92, "grad_norm": 2.2032936513711974, "learning_rate": 1.7432621408661532e-07, "loss": 0.5802, "step": 8726 }, { "epoch": 0.92, "grad_norm": 2.577694971816813, "learning_rate": 1.7388040408566674e-07, "loss": 0.6737, "step": 8727 }, { "epoch": 0.92, "grad_norm": 2.571554032274808, "learning_rate": 1.7343515476934136e-07, "loss": 0.5762, "step": 8728 }, { "epoch": 0.92, "grad_norm": 2.2675681775342817, "learning_rate": 1.729904661893683e-07, "loss": 0.5929, "step": 8729 }, { "epoch": 0.92, "grad_norm": 2.4825069172397907, "learning_rate": 1.725463383974091e-07, "loss": 0.5898, "step": 8730 }, { "epoch": 0.92, "grad_norm": 0.9735035948128292, "learning_rate": 1.7210277144506182e-07, "loss": 0.5335, "step": 8731 }, { "epoch": 0.92, "grad_norm": 2.631721683378093, "learning_rate": 1.7165976538385753e-07, "loss": 0.5517, "step": 8732 }, { "epoch": 0.92, "grad_norm": 1.0076500063785254, "learning_rate": 1.7121732026526506e-07, "loss": 0.5407, "step": 8733 }, { "epoch": 0.92, "grad_norm": 3.581508418879825, "learning_rate": 1.7077543614068604e-07, "loss": 0.6803, "step": 8734 }, { "epoch": 0.92, "grad_norm": 2.4861781722789957, "learning_rate": 1.703341130614572e-07, "loss": 0.6355, "step": 8735 }, { "epoch": 0.92, "grad_norm": 3.515627205530569, "learning_rate": 1.6989335107884863e-07, "loss": 0.6033, "step": 8736 }, { "epoch": 0.92, "grad_norm": 2.3777058580194343, "learning_rate": 1.6945315024406883e-07, "loss": 0.513, "step": 8737 }, { "epoch": 0.92, "grad_norm": 0.8876260556728341, "learning_rate": 1.6901351060825854e-07, "loss": 0.5056, "step": 8738 }, { "epoch": 0.92, "grad_norm": 2.7599871753116862, "learning_rate": 1.685744322224936e-07, "loss": 0.5685, "step": 8739 }, { "epoch": 0.92, "grad_norm": 3.2326213858288804, "learning_rate": 1.681359151377848e-07, "loss": 0.5538, "step": 8740 }, { "epoch": 0.92, "grad_norm": 2.901673864970551, "learning_rate": 1.676979594050776e-07, "loss": 0.6152, "step": 8741 }, { "epoch": 0.92, "grad_norm": 0.8826659096790682, "learning_rate": 1.6726056507525347e-07, "loss": 0.5152, "step": 8742 }, { "epoch": 0.92, "grad_norm": 2.665982233334478, "learning_rate": 1.668237321991262e-07, "loss": 0.5803, "step": 8743 }, { "epoch": 0.92, "grad_norm": 2.5198209538212146, "learning_rate": 1.6638746082744684e-07, "loss": 0.6382, "step": 8744 }, { "epoch": 0.92, "grad_norm": 2.568498917912977, "learning_rate": 1.6595175101089877e-07, "loss": 0.6567, "step": 8745 }, { "epoch": 0.92, "grad_norm": 3.409410318358604, "learning_rate": 1.6551660280010316e-07, "loss": 0.6195, "step": 8746 }, { "epoch": 0.92, "grad_norm": 2.719189581656458, "learning_rate": 1.6508201624561404e-07, "loss": 0.6318, "step": 8747 }, { "epoch": 0.92, "grad_norm": 9.214568272301674, "learning_rate": 1.6464799139791877e-07, "loss": 0.5042, "step": 8748 }, { "epoch": 0.92, "grad_norm": 4.086249960723928, "learning_rate": 1.6421452830744366e-07, "loss": 0.6093, "step": 8749 }, { "epoch": 0.92, "grad_norm": 3.992792950152544, "learning_rate": 1.6378162702454458e-07, "loss": 0.5714, "step": 8750 }, { "epoch": 0.92, "grad_norm": 4.417350418321379, "learning_rate": 1.6334928759951684e-07, "loss": 0.6329, "step": 8751 }, { "epoch": 0.92, "grad_norm": 2.3306711526974686, "learning_rate": 1.6291751008258693e-07, "loss": 0.5744, "step": 8752 }, { "epoch": 0.92, "grad_norm": 2.8025552214715774, "learning_rate": 1.6248629452391862e-07, "loss": 0.5275, "step": 8753 }, { "epoch": 0.92, "grad_norm": 2.15974372047649, "learning_rate": 1.620556409736085e-07, "loss": 0.6049, "step": 8754 }, { "epoch": 0.92, "grad_norm": 3.655812836017327, "learning_rate": 1.616255494816893e-07, "loss": 0.5968, "step": 8755 }, { "epoch": 0.92, "grad_norm": 2.7698264187396346, "learning_rate": 1.6119602009812663e-07, "loss": 0.5531, "step": 8756 }, { "epoch": 0.92, "grad_norm": 3.424978511207146, "learning_rate": 1.6076705287282336e-07, "loss": 0.6074, "step": 8757 }, { "epoch": 0.92, "grad_norm": 2.5086511137119807, "learning_rate": 1.6033864785561515e-07, "loss": 0.567, "step": 8758 }, { "epoch": 0.92, "grad_norm": 2.707797137818906, "learning_rate": 1.5991080509627222e-07, "loss": 0.6671, "step": 8759 }, { "epoch": 0.92, "grad_norm": 2.3155332443393775, "learning_rate": 1.5948352464450146e-07, "loss": 0.5503, "step": 8760 }, { "epoch": 0.92, "grad_norm": 3.2510331858276005, "learning_rate": 1.590568065499415e-07, "loss": 0.6343, "step": 8761 }, { "epoch": 0.92, "grad_norm": 2.6772676116951213, "learning_rate": 1.5863065086216878e-07, "loss": 0.5982, "step": 8762 }, { "epoch": 0.92, "grad_norm": 2.394546302211017, "learning_rate": 1.582050576306915e-07, "loss": 0.6014, "step": 8763 }, { "epoch": 0.92, "grad_norm": 2.0972459059941415, "learning_rate": 1.5778002690495453e-07, "loss": 0.5364, "step": 8764 }, { "epoch": 0.92, "grad_norm": 2.9104609457881114, "learning_rate": 1.5735555873433673e-07, "loss": 0.5498, "step": 8765 }, { "epoch": 0.92, "grad_norm": 3.1530370674231816, "learning_rate": 1.569316531681514e-07, "loss": 0.6073, "step": 8766 }, { "epoch": 0.92, "grad_norm": 2.348747898326347, "learning_rate": 1.565083102556464e-07, "loss": 0.5991, "step": 8767 }, { "epoch": 0.92, "grad_norm": 2.64211735957492, "learning_rate": 1.560855300460057e-07, "loss": 0.5962, "step": 8768 }, { "epoch": 0.92, "grad_norm": 2.5983785487904294, "learning_rate": 1.5566331258834498e-07, "loss": 0.6021, "step": 8769 }, { "epoch": 0.92, "grad_norm": 3.48505144340559, "learning_rate": 1.552416579317173e-07, "loss": 0.5457, "step": 8770 }, { "epoch": 0.92, "grad_norm": 4.118622729954412, "learning_rate": 1.5482056612510898e-07, "loss": 0.5525, "step": 8771 }, { "epoch": 0.92, "grad_norm": 2.453328716580977, "learning_rate": 1.544000372174409e-07, "loss": 0.5662, "step": 8772 }, { "epoch": 0.92, "grad_norm": 2.715493793911031, "learning_rate": 1.539800712575701e-07, "loss": 0.6442, "step": 8773 }, { "epoch": 0.92, "grad_norm": 2.534362203603708, "learning_rate": 1.5356066829428529e-07, "loss": 0.5944, "step": 8774 }, { "epoch": 0.92, "grad_norm": 2.7191866220592633, "learning_rate": 1.531418283763131e-07, "loss": 0.605, "step": 8775 }, { "epoch": 0.92, "grad_norm": 3.6751125356427465, "learning_rate": 1.5272355155231233e-07, "loss": 0.5287, "step": 8776 }, { "epoch": 0.92, "grad_norm": 2.6076140742413605, "learning_rate": 1.5230583787087693e-07, "loss": 0.6233, "step": 8777 }, { "epoch": 0.92, "grad_norm": 3.2526568139523055, "learning_rate": 1.5188868738053643e-07, "loss": 0.6007, "step": 8778 }, { "epoch": 0.92, "grad_norm": 2.556172706540122, "learning_rate": 1.5147210012975366e-07, "loss": 0.5975, "step": 8779 }, { "epoch": 0.92, "grad_norm": 2.317968435237044, "learning_rate": 1.5105607616692665e-07, "loss": 0.5119, "step": 8780 }, { "epoch": 0.92, "grad_norm": 2.364058422346575, "learning_rate": 1.5064061554038723e-07, "loss": 0.6071, "step": 8781 }, { "epoch": 0.92, "grad_norm": 2.123856213413403, "learning_rate": 1.5022571829840404e-07, "loss": 0.5481, "step": 8782 }, { "epoch": 0.92, "grad_norm": 2.5488265654803324, "learning_rate": 1.4981138448917686e-07, "loss": 0.5901, "step": 8783 }, { "epoch": 0.92, "grad_norm": 3.9357992577530965, "learning_rate": 1.4939761416084274e-07, "loss": 0.5701, "step": 8784 }, { "epoch": 0.92, "grad_norm": 2.0256367627019287, "learning_rate": 1.4898440736147213e-07, "loss": 0.6026, "step": 8785 }, { "epoch": 0.92, "grad_norm": 4.257081804924682, "learning_rate": 1.4857176413907048e-07, "loss": 0.5129, "step": 8786 }, { "epoch": 0.92, "grad_norm": 2.7139299347714076, "learning_rate": 1.481596845415767e-07, "loss": 0.5782, "step": 8787 }, { "epoch": 0.92, "grad_norm": 2.4990484620871873, "learning_rate": 1.4774816861686636e-07, "loss": 0.6671, "step": 8788 }, { "epoch": 0.92, "grad_norm": 2.9970898191679884, "learning_rate": 1.4733721641274677e-07, "loss": 0.653, "step": 8789 }, { "epoch": 0.92, "grad_norm": 3.3568839354768514, "learning_rate": 1.4692682797696201e-07, "loss": 0.5761, "step": 8790 }, { "epoch": 0.93, "grad_norm": 2.61930351344336, "learning_rate": 1.4651700335718887e-07, "loss": 0.6616, "step": 8791 }, { "epoch": 0.93, "grad_norm": 0.9479674201636344, "learning_rate": 1.4610774260104155e-07, "loss": 0.5103, "step": 8792 }, { "epoch": 0.93, "grad_norm": 2.5330814701077635, "learning_rate": 1.456990457560642e-07, "loss": 0.5497, "step": 8793 }, { "epoch": 0.93, "grad_norm": 2.309748805294827, "learning_rate": 1.4529091286973994e-07, "loss": 0.6107, "step": 8794 }, { "epoch": 0.93, "grad_norm": 2.547398975416799, "learning_rate": 1.4488334398948424e-07, "loss": 0.6341, "step": 8795 }, { "epoch": 0.93, "grad_norm": 2.0790737085851827, "learning_rate": 1.444763391626458e-07, "loss": 0.5955, "step": 8796 }, { "epoch": 0.93, "grad_norm": 2.7131868065415845, "learning_rate": 1.4406989843651186e-07, "loss": 0.6069, "step": 8797 }, { "epoch": 0.93, "grad_norm": 2.1351827973252804, "learning_rate": 1.4366402185829852e-07, "loss": 0.5892, "step": 8798 }, { "epoch": 0.93, "grad_norm": 2.532366496256515, "learning_rate": 1.4325870947516195e-07, "loss": 0.6275, "step": 8799 }, { "epoch": 0.93, "grad_norm": 3.5352178877025415, "learning_rate": 1.4285396133418894e-07, "loss": 0.6247, "step": 8800 }, { "epoch": 0.93, "grad_norm": 2.3725771799289634, "learning_rate": 1.424497774824024e-07, "loss": 0.5422, "step": 8801 }, { "epoch": 0.93, "grad_norm": 2.453635363952836, "learning_rate": 1.4204615796675813e-07, "loss": 0.6249, "step": 8802 }, { "epoch": 0.93, "grad_norm": 3.6864136459930834, "learning_rate": 1.4164310283414917e-07, "loss": 0.6058, "step": 8803 }, { "epoch": 0.93, "grad_norm": 2.5101928312119233, "learning_rate": 1.4124061213139973e-07, "loss": 0.674, "step": 8804 }, { "epoch": 0.93, "grad_norm": 2.5816393554893198, "learning_rate": 1.4083868590527128e-07, "loss": 0.56, "step": 8805 }, { "epoch": 0.93, "grad_norm": 1.008204586126537, "learning_rate": 1.4043732420245703e-07, "loss": 0.5047, "step": 8806 }, { "epoch": 0.93, "grad_norm": 2.6435612166868547, "learning_rate": 1.400365270695875e-07, "loss": 0.6431, "step": 8807 }, { "epoch": 0.93, "grad_norm": 2.3837590071708403, "learning_rate": 1.3963629455322536e-07, "loss": 0.6289, "step": 8808 }, { "epoch": 0.93, "grad_norm": 2.2165814241637776, "learning_rate": 1.3923662669986847e-07, "loss": 0.6364, "step": 8809 }, { "epoch": 0.93, "grad_norm": 3.8103240249394665, "learning_rate": 1.388375235559497e-07, "loss": 0.6198, "step": 8810 }, { "epoch": 0.93, "grad_norm": 2.745817288882361, "learning_rate": 1.3843898516783528e-07, "loss": 0.6645, "step": 8811 }, { "epoch": 0.93, "grad_norm": 0.9439152142723546, "learning_rate": 1.3804101158182592e-07, "loss": 0.5582, "step": 8812 }, { "epoch": 0.93, "grad_norm": 2.2299912321829636, "learning_rate": 1.3764360284415745e-07, "loss": 0.5961, "step": 8813 }, { "epoch": 0.93, "grad_norm": 3.274213469206632, "learning_rate": 1.372467590009996e-07, "loss": 0.6478, "step": 8814 }, { "epoch": 0.93, "grad_norm": 2.736445144043836, "learning_rate": 1.3685048009845602e-07, "loss": 0.5455, "step": 8815 }, { "epoch": 0.93, "grad_norm": 2.3511706665739376, "learning_rate": 1.3645476618256658e-07, "loss": 0.6518, "step": 8816 }, { "epoch": 0.93, "grad_norm": 2.9759636145549075, "learning_rate": 1.3605961729930283e-07, "loss": 0.6208, "step": 8817 }, { "epoch": 0.93, "grad_norm": 2.6395425552448972, "learning_rate": 1.3566503349457193e-07, "loss": 0.5986, "step": 8818 }, { "epoch": 0.93, "grad_norm": 2.5256137921850588, "learning_rate": 1.3527101481421722e-07, "loss": 0.5586, "step": 8819 }, { "epoch": 0.93, "grad_norm": 2.365812363582891, "learning_rate": 1.3487756130401264e-07, "loss": 0.5482, "step": 8820 }, { "epoch": 0.93, "grad_norm": 2.41157315584283, "learning_rate": 1.3448467300966995e-07, "loss": 0.5167, "step": 8821 }, { "epoch": 0.93, "grad_norm": 2.350934489365909, "learning_rate": 1.3409234997683262e-07, "loss": 0.5623, "step": 8822 }, { "epoch": 0.93, "grad_norm": 2.8590303256325225, "learning_rate": 1.3370059225108088e-07, "loss": 0.615, "step": 8823 }, { "epoch": 0.93, "grad_norm": 2.477586205368679, "learning_rate": 1.3330939987792668e-07, "loss": 0.6072, "step": 8824 }, { "epoch": 0.93, "grad_norm": 2.401994252587328, "learning_rate": 1.3291877290281864e-07, "loss": 0.574, "step": 8825 }, { "epoch": 0.93, "grad_norm": 3.7781870354990517, "learning_rate": 1.3252871137113764e-07, "loss": 0.56, "step": 8826 }, { "epoch": 0.93, "grad_norm": 2.367412583097113, "learning_rate": 1.3213921532820084e-07, "loss": 0.678, "step": 8827 }, { "epoch": 0.93, "grad_norm": 1.9230400310337836, "learning_rate": 1.3175028481925865e-07, "loss": 0.6137, "step": 8828 }, { "epoch": 0.93, "grad_norm": 2.6899938815586157, "learning_rate": 1.3136191988949498e-07, "loss": 0.5719, "step": 8829 }, { "epoch": 0.93, "grad_norm": 2.313782561671406, "learning_rate": 1.3097412058403036e-07, "loss": 0.6413, "step": 8830 }, { "epoch": 0.93, "grad_norm": 2.3968960300093514, "learning_rate": 1.305868869479171e-07, "loss": 0.6002, "step": 8831 }, { "epoch": 0.93, "grad_norm": 0.9061105754688217, "learning_rate": 1.3020021902614366e-07, "loss": 0.5138, "step": 8832 }, { "epoch": 0.93, "grad_norm": 3.361235492021561, "learning_rate": 1.2981411686363132e-07, "loss": 0.6635, "step": 8833 }, { "epoch": 0.93, "grad_norm": 2.299548642666442, "learning_rate": 1.29428580505237e-07, "loss": 0.574, "step": 8834 }, { "epoch": 0.93, "grad_norm": 2.8233240276159557, "learning_rate": 1.290436099957504e-07, "loss": 0.6384, "step": 8835 }, { "epoch": 0.93, "grad_norm": 2.4935098889156753, "learning_rate": 1.2865920537989683e-07, "loss": 0.5546, "step": 8836 }, { "epoch": 0.93, "grad_norm": 2.7213656000819872, "learning_rate": 1.2827536670233508e-07, "loss": 0.6305, "step": 8837 }, { "epoch": 0.93, "grad_norm": 2.741318595224126, "learning_rate": 1.2789209400765889e-07, "loss": 0.6613, "step": 8838 }, { "epoch": 0.93, "grad_norm": 3.396722910728486, "learning_rate": 1.2750938734039486e-07, "loss": 0.5485, "step": 8839 }, { "epoch": 0.93, "grad_norm": 2.597262875682383, "learning_rate": 1.2712724674500575e-07, "loss": 0.6165, "step": 8840 }, { "epoch": 0.93, "grad_norm": 2.5915988746408636, "learning_rate": 1.2674567226588662e-07, "loss": 0.658, "step": 8841 }, { "epoch": 0.93, "grad_norm": 12.953891578992364, "learning_rate": 1.2636466394736758e-07, "loss": 0.5931, "step": 8842 }, { "epoch": 0.93, "grad_norm": 4.102318058166209, "learning_rate": 1.2598422183371484e-07, "loss": 0.5928, "step": 8843 }, { "epoch": 0.93, "grad_norm": 3.2543978164700937, "learning_rate": 1.256043459691253e-07, "loss": 0.6385, "step": 8844 }, { "epoch": 0.93, "grad_norm": 0.9749011154837868, "learning_rate": 1.2522503639773254e-07, "loss": 0.5538, "step": 8845 }, { "epoch": 0.93, "grad_norm": 2.8982951468251628, "learning_rate": 1.2484629316360297e-07, "loss": 0.5987, "step": 8846 }, { "epoch": 0.93, "grad_norm": 4.042274517432498, "learning_rate": 1.244681163107392e-07, "loss": 0.5936, "step": 8847 }, { "epoch": 0.93, "grad_norm": 2.543924226290593, "learning_rate": 1.2409050588307547e-07, "loss": 0.6316, "step": 8848 }, { "epoch": 0.93, "grad_norm": 2.275882847499862, "learning_rate": 1.237134619244823e-07, "loss": 0.6165, "step": 8849 }, { "epoch": 0.93, "grad_norm": 2.5971023387508803, "learning_rate": 1.2333698447876296e-07, "loss": 0.6973, "step": 8850 }, { "epoch": 0.93, "grad_norm": 2.306532467495735, "learning_rate": 1.229610735896558e-07, "loss": 0.5684, "step": 8851 }, { "epoch": 0.93, "grad_norm": 2.5310472966419377, "learning_rate": 1.2258572930083313e-07, "loss": 0.7, "step": 8852 }, { "epoch": 0.93, "grad_norm": 2.187034442784481, "learning_rate": 1.222109516559006e-07, "loss": 0.5667, "step": 8853 }, { "epoch": 0.93, "grad_norm": 2.2888506566786493, "learning_rate": 1.2183674069840057e-07, "loss": 0.6023, "step": 8854 }, { "epoch": 0.93, "grad_norm": 1.9527027435099331, "learning_rate": 1.2146309647180554e-07, "loss": 0.4499, "step": 8855 }, { "epoch": 0.93, "grad_norm": 2.730891050434837, "learning_rate": 1.2109001901952633e-07, "loss": 0.6274, "step": 8856 }, { "epoch": 0.93, "grad_norm": 0.916048809399537, "learning_rate": 1.2071750838490492e-07, "loss": 0.5672, "step": 8857 }, { "epoch": 0.93, "grad_norm": 2.2808207217906893, "learning_rate": 1.2034556461121894e-07, "loss": 0.6706, "step": 8858 }, { "epoch": 0.93, "grad_norm": 2.48748766003941, "learning_rate": 1.1997418774167934e-07, "loss": 0.6039, "step": 8859 }, { "epoch": 0.93, "grad_norm": 2.37457209874063, "learning_rate": 1.196033778194322e-07, "loss": 0.6669, "step": 8860 }, { "epoch": 0.93, "grad_norm": 2.1767809049061233, "learning_rate": 1.1923313488755638e-07, "loss": 0.5488, "step": 8861 }, { "epoch": 0.93, "grad_norm": 2.2318763739789023, "learning_rate": 1.1886345898906693e-07, "loss": 0.6166, "step": 8862 }, { "epoch": 0.93, "grad_norm": 2.8605686514104973, "learning_rate": 1.1849435016691003e-07, "loss": 0.606, "step": 8863 }, { "epoch": 0.93, "grad_norm": 2.908693940117577, "learning_rate": 1.1812580846396915e-07, "loss": 0.5751, "step": 8864 }, { "epoch": 0.93, "grad_norm": 3.700097097894084, "learning_rate": 1.1775783392305895e-07, "loss": 0.6018, "step": 8865 }, { "epoch": 0.93, "grad_norm": 2.782107805001388, "learning_rate": 1.1739042658693079e-07, "loss": 0.5801, "step": 8866 }, { "epoch": 0.93, "grad_norm": 2.6177299400948395, "learning_rate": 1.1702358649826939e-07, "loss": 0.6479, "step": 8867 }, { "epoch": 0.93, "grad_norm": 2.350114941297819, "learning_rate": 1.166573136996918e-07, "loss": 0.5391, "step": 8868 }, { "epoch": 0.93, "grad_norm": 2.5754262051694763, "learning_rate": 1.1629160823375118e-07, "loss": 0.6068, "step": 8869 }, { "epoch": 0.93, "grad_norm": 3.382289379949096, "learning_rate": 1.1592647014293412e-07, "loss": 0.6544, "step": 8870 }, { "epoch": 0.93, "grad_norm": 2.3588925482663523, "learning_rate": 1.1556189946966168e-07, "loss": 0.5807, "step": 8871 }, { "epoch": 0.93, "grad_norm": 2.582600577577335, "learning_rate": 1.151978962562883e-07, "loss": 0.6165, "step": 8872 }, { "epoch": 0.93, "grad_norm": 2.245726951542925, "learning_rate": 1.1483446054510294e-07, "loss": 0.5569, "step": 8873 }, { "epoch": 0.93, "grad_norm": 2.638961684798639, "learning_rate": 1.144715923783274e-07, "loss": 0.6425, "step": 8874 }, { "epoch": 0.93, "grad_norm": 4.337475255641479, "learning_rate": 1.1410929179812069e-07, "loss": 0.6021, "step": 8875 }, { "epoch": 0.93, "grad_norm": 2.6499250104828334, "learning_rate": 1.1374755884657195e-07, "loss": 0.589, "step": 8876 }, { "epoch": 0.93, "grad_norm": 3.7819394045598353, "learning_rate": 1.1338639356570758e-07, "loss": 0.6157, "step": 8877 }, { "epoch": 0.93, "grad_norm": 3.9903219653791866, "learning_rate": 1.130257959974862e-07, "loss": 0.566, "step": 8878 }, { "epoch": 0.93, "grad_norm": 0.9474747256626247, "learning_rate": 1.1266576618380098e-07, "loss": 0.5611, "step": 8879 }, { "epoch": 0.93, "grad_norm": 2.8711918810126424, "learning_rate": 1.1230630416647958e-07, "loss": 0.5494, "step": 8880 }, { "epoch": 0.93, "grad_norm": 2.991121349490518, "learning_rate": 1.1194740998728193e-07, "loss": 0.6804, "step": 8881 }, { "epoch": 0.93, "grad_norm": 3.1006023896574195, "learning_rate": 1.1158908368790523e-07, "loss": 0.7024, "step": 8882 }, { "epoch": 0.93, "grad_norm": 2.1579700060562166, "learning_rate": 1.1123132530997727e-07, "loss": 0.6184, "step": 8883 }, { "epoch": 0.93, "grad_norm": 2.466363894718886, "learning_rate": 1.1087413489506205e-07, "loss": 0.6905, "step": 8884 }, { "epoch": 0.93, "grad_norm": 3.6365001531373107, "learning_rate": 1.1051751248465691e-07, "loss": 0.5644, "step": 8885 }, { "epoch": 0.94, "grad_norm": 3.007115123145037, "learning_rate": 1.1016145812019319e-07, "loss": 0.6451, "step": 8886 }, { "epoch": 0.94, "grad_norm": 2.339803693912085, "learning_rate": 1.098059718430361e-07, "loss": 0.6171, "step": 8887 }, { "epoch": 0.94, "grad_norm": 4.514752409454256, "learning_rate": 1.0945105369448483e-07, "loss": 0.6357, "step": 8888 }, { "epoch": 0.94, "grad_norm": 3.1616055453161453, "learning_rate": 1.0909670371577308e-07, "loss": 0.6318, "step": 8889 }, { "epoch": 0.94, "grad_norm": 2.3220638444142474, "learning_rate": 1.087429219480679e-07, "loss": 0.6093, "step": 8890 }, { "epoch": 0.94, "grad_norm": 2.3976532258477015, "learning_rate": 1.0838970843247143e-07, "loss": 0.6613, "step": 8891 }, { "epoch": 0.94, "grad_norm": 2.661253383224113, "learning_rate": 1.0803706321001805e-07, "loss": 0.6697, "step": 8892 }, { "epoch": 0.94, "grad_norm": 2.6683239924659907, "learning_rate": 1.0768498632167779e-07, "loss": 0.6107, "step": 8893 }, { "epoch": 0.94, "grad_norm": 2.5718421574140513, "learning_rate": 1.0733347780835346e-07, "loss": 0.5767, "step": 8894 }, { "epoch": 0.94, "grad_norm": 2.146294517128654, "learning_rate": 1.0698253771088241e-07, "loss": 0.591, "step": 8895 }, { "epoch": 0.94, "grad_norm": 2.2452315585617613, "learning_rate": 1.0663216607003535e-07, "loss": 0.5671, "step": 8896 }, { "epoch": 0.94, "grad_norm": 2.3863559337974825, "learning_rate": 1.0628236292651861e-07, "loss": 0.5705, "step": 8897 }, { "epoch": 0.94, "grad_norm": 2.6440910292337767, "learning_rate": 1.0593312832097025e-07, "loss": 0.5802, "step": 8898 }, { "epoch": 0.94, "grad_norm": 0.9645714063530803, "learning_rate": 1.055844622939639e-07, "loss": 0.5883, "step": 8899 }, { "epoch": 0.94, "grad_norm": 2.58708264588437, "learning_rate": 1.0523636488600664e-07, "loss": 0.5565, "step": 8900 }, { "epoch": 0.94, "grad_norm": 2.265603984151188, "learning_rate": 1.048888361375383e-07, "loss": 0.6022, "step": 8901 }, { "epoch": 0.94, "grad_norm": 2.357562605848145, "learning_rate": 1.045418760889355e-07, "loss": 0.5128, "step": 8902 }, { "epoch": 0.94, "grad_norm": 3.4292025931619565, "learning_rate": 1.0419548478050601e-07, "loss": 0.5251, "step": 8903 }, { "epoch": 0.94, "grad_norm": 2.0323838830808834, "learning_rate": 1.038496622524926e-07, "loss": 0.5835, "step": 8904 }, { "epoch": 0.94, "grad_norm": 4.6168280543265166, "learning_rate": 1.0350440854507205e-07, "loss": 0.636, "step": 8905 }, { "epoch": 0.94, "grad_norm": 2.052977096577669, "learning_rate": 1.0315972369835559e-07, "loss": 0.6365, "step": 8906 }, { "epoch": 0.94, "grad_norm": 2.2130917241762083, "learning_rate": 1.0281560775238619e-07, "loss": 0.6581, "step": 8907 }, { "epoch": 0.94, "grad_norm": 3.7050724972147653, "learning_rate": 1.0247206074714411e-07, "loss": 0.6105, "step": 8908 }, { "epoch": 0.94, "grad_norm": 5.090324313422908, "learning_rate": 1.0212908272253963e-07, "loss": 0.6013, "step": 8909 }, { "epoch": 0.94, "grad_norm": 3.5235691542628236, "learning_rate": 1.0178667371842088e-07, "loss": 0.6415, "step": 8910 }, { "epoch": 0.94, "grad_norm": 2.4523022912907857, "learning_rate": 1.014448337745666e-07, "loss": 0.6337, "step": 8911 }, { "epoch": 0.94, "grad_norm": 2.318885308337774, "learning_rate": 1.0110356293069168e-07, "loss": 0.6198, "step": 8912 }, { "epoch": 0.94, "grad_norm": 2.0757919001418195, "learning_rate": 1.0076286122644274e-07, "loss": 0.6183, "step": 8913 }, { "epoch": 0.94, "grad_norm": 2.5362575196035597, "learning_rate": 1.0042272870140258e-07, "loss": 0.6237, "step": 8914 }, { "epoch": 0.94, "grad_norm": 2.31713074091154, "learning_rate": 1.0008316539508733e-07, "loss": 0.6719, "step": 8915 }, { "epoch": 0.94, "grad_norm": 2.767274831301876, "learning_rate": 9.974417134694491e-08, "loss": 0.5058, "step": 8916 }, { "epoch": 0.94, "grad_norm": 0.9939390220324551, "learning_rate": 9.940574659635993e-08, "loss": 0.5415, "step": 8917 }, { "epoch": 0.94, "grad_norm": 3.3485962773421636, "learning_rate": 9.90678911826487e-08, "loss": 0.5924, "step": 8918 }, { "epoch": 0.94, "grad_norm": 2.8816111251299037, "learning_rate": 9.873060514506316e-08, "loss": 0.6027, "step": 8919 }, { "epoch": 0.94, "grad_norm": 1.0275962573135506, "learning_rate": 9.839388852278752e-08, "loss": 0.5166, "step": 8920 }, { "epoch": 0.94, "grad_norm": 2.5666272152464558, "learning_rate": 9.805774135494106e-08, "loss": 0.6445, "step": 8921 }, { "epoch": 0.94, "grad_norm": 2.4748907614724827, "learning_rate": 9.772216368057586e-08, "loss": 0.6334, "step": 8922 }, { "epoch": 0.94, "grad_norm": 2.942060956680936, "learning_rate": 9.738715553867851e-08, "loss": 0.6156, "step": 8923 }, { "epoch": 0.94, "grad_norm": 3.9951301127561973, "learning_rate": 9.705271696816954e-08, "loss": 0.5977, "step": 8924 }, { "epoch": 0.94, "grad_norm": 2.777453454552441, "learning_rate": 9.671884800790288e-08, "loss": 0.6252, "step": 8925 }, { "epoch": 0.94, "grad_norm": 2.348283390971774, "learning_rate": 9.638554869666695e-08, "loss": 0.6118, "step": 8926 }, { "epoch": 0.94, "grad_norm": 2.4067385375615493, "learning_rate": 9.605281907318243e-08, "loss": 0.6271, "step": 8927 }, { "epoch": 0.94, "grad_norm": 0.9552504322728489, "learning_rate": 9.572065917610618e-08, "loss": 0.5533, "step": 8928 }, { "epoch": 0.94, "grad_norm": 2.271928583094003, "learning_rate": 9.538906904402623e-08, "loss": 0.6035, "step": 8929 }, { "epoch": 0.94, "grad_norm": 3.3223263454773297, "learning_rate": 9.505804871546731e-08, "loss": 0.6097, "step": 8930 }, { "epoch": 0.94, "grad_norm": 4.923685715911361, "learning_rate": 9.472759822888478e-08, "loss": 0.576, "step": 8931 }, { "epoch": 0.94, "grad_norm": 2.249066974617624, "learning_rate": 9.439771762267069e-08, "loss": 0.6664, "step": 8932 }, { "epoch": 0.94, "grad_norm": 2.2985171963927464, "learning_rate": 9.40684069351483e-08, "loss": 0.6363, "step": 8933 }, { "epoch": 0.94, "grad_norm": 2.1586975547776857, "learning_rate": 9.373966620457753e-08, "loss": 0.5921, "step": 8934 }, { "epoch": 0.94, "grad_norm": 2.699332277329513, "learning_rate": 9.341149546914951e-08, "loss": 0.6785, "step": 8935 }, { "epoch": 0.94, "grad_norm": 2.705681753410133, "learning_rate": 9.308389476699043e-08, "loss": 0.6113, "step": 8936 }, { "epoch": 0.94, "grad_norm": 2.299817850591371, "learning_rate": 9.27568641361598e-08, "loss": 0.6317, "step": 8937 }, { "epoch": 0.94, "grad_norm": 2.6726846572437575, "learning_rate": 9.243040361465172e-08, "loss": 0.5638, "step": 8938 }, { "epoch": 0.94, "grad_norm": 2.0645519294317953, "learning_rate": 9.210451324039304e-08, "loss": 0.6308, "step": 8939 }, { "epoch": 0.94, "grad_norm": 2.4527040374452262, "learning_rate": 9.177919305124405e-08, "loss": 0.6753, "step": 8940 }, { "epoch": 0.94, "grad_norm": 0.9497404981411139, "learning_rate": 9.145444308500117e-08, "loss": 0.5437, "step": 8941 }, { "epoch": 0.94, "grad_norm": 5.86280566237663, "learning_rate": 9.11302633793909e-08, "loss": 0.5582, "step": 8942 }, { "epoch": 0.94, "grad_norm": 2.231382224332527, "learning_rate": 9.080665397207755e-08, "loss": 0.5663, "step": 8943 }, { "epoch": 0.94, "grad_norm": 2.383310389908959, "learning_rate": 9.048361490065549e-08, "loss": 0.5818, "step": 8944 }, { "epoch": 0.94, "grad_norm": 2.365476329509127, "learning_rate": 9.016114620265526e-08, "loss": 0.6667, "step": 8945 }, { "epoch": 0.94, "grad_norm": 2.6118574377559023, "learning_rate": 8.983924791553966e-08, "loss": 0.7023, "step": 8946 }, { "epoch": 0.94, "grad_norm": 3.2390121343870013, "learning_rate": 8.951792007670713e-08, "loss": 0.5439, "step": 8947 }, { "epoch": 0.94, "grad_norm": 2.824711199639862, "learning_rate": 8.919716272348722e-08, "loss": 0.6532, "step": 8948 }, { "epoch": 0.94, "grad_norm": 2.7642587541543717, "learning_rate": 8.88769758931457e-08, "loss": 0.5559, "step": 8949 }, { "epoch": 0.94, "grad_norm": 2.7721849377139645, "learning_rate": 8.855735962288059e-08, "loss": 0.6408, "step": 8950 }, { "epoch": 0.94, "grad_norm": 12.581276150709563, "learning_rate": 8.823831394982329e-08, "loss": 0.5789, "step": 8951 }, { "epoch": 0.94, "grad_norm": 2.6512425220755578, "learning_rate": 8.791983891104084e-08, "loss": 0.557, "step": 8952 }, { "epoch": 0.94, "grad_norm": 2.4054321801371494, "learning_rate": 8.760193454353194e-08, "loss": 0.6467, "step": 8953 }, { "epoch": 0.94, "grad_norm": 2.9064106195783275, "learning_rate": 8.728460088422985e-08, "loss": 0.5413, "step": 8954 }, { "epoch": 0.94, "grad_norm": 2.5100181773055414, "learning_rate": 8.696783797000174e-08, "loss": 0.6023, "step": 8955 }, { "epoch": 0.94, "grad_norm": 0.9356433470805314, "learning_rate": 8.665164583764818e-08, "loss": 0.5343, "step": 8956 }, { "epoch": 0.94, "grad_norm": 2.9024523813801144, "learning_rate": 8.633602452390311e-08, "loss": 0.6028, "step": 8957 }, { "epoch": 0.94, "grad_norm": 2.9298897593318474, "learning_rate": 8.602097406543442e-08, "loss": 0.6587, "step": 8958 }, { "epoch": 0.94, "grad_norm": 2.6001966117983955, "learning_rate": 8.570649449884505e-08, "loss": 0.6481, "step": 8959 }, { "epoch": 0.94, "grad_norm": 2.45160147679471, "learning_rate": 8.539258586066912e-08, "loss": 0.5524, "step": 8960 }, { "epoch": 0.94, "grad_norm": 4.5237192425119686, "learning_rate": 8.507924818737523e-08, "loss": 0.7146, "step": 8961 }, { "epoch": 0.94, "grad_norm": 2.5557536371206893, "learning_rate": 8.476648151536704e-08, "loss": 0.6013, "step": 8962 }, { "epoch": 0.94, "grad_norm": 3.195273209399613, "learning_rate": 8.445428588098048e-08, "loss": 0.6442, "step": 8963 }, { "epoch": 0.94, "grad_norm": 2.174929174236636, "learning_rate": 8.414266132048543e-08, "loss": 0.5563, "step": 8964 }, { "epoch": 0.94, "grad_norm": 4.334437374273213, "learning_rate": 8.383160787008627e-08, "loss": 0.6087, "step": 8965 }, { "epoch": 0.94, "grad_norm": 2.2982581898665733, "learning_rate": 8.352112556591907e-08, "loss": 0.5159, "step": 8966 }, { "epoch": 0.94, "grad_norm": 2.485884461295592, "learning_rate": 8.321121444405611e-08, "loss": 0.6474, "step": 8967 }, { "epoch": 0.94, "grad_norm": 0.9202366397635555, "learning_rate": 8.29018745405008e-08, "loss": 0.4943, "step": 8968 }, { "epoch": 0.94, "grad_norm": 2.242007414517413, "learning_rate": 8.259310589119162e-08, "loss": 0.5891, "step": 8969 }, { "epoch": 0.94, "grad_norm": 2.159210998912373, "learning_rate": 8.2284908532001e-08, "loss": 0.6245, "step": 8970 }, { "epoch": 0.94, "grad_norm": 2.5880306782993467, "learning_rate": 8.19772824987336e-08, "loss": 0.5807, "step": 8971 }, { "epoch": 0.94, "grad_norm": 0.9458064039539235, "learning_rate": 8.167022782712919e-08, "loss": 0.5854, "step": 8972 }, { "epoch": 0.94, "grad_norm": 2.2865336633479743, "learning_rate": 8.136374455286033e-08, "loss": 0.6299, "step": 8973 }, { "epoch": 0.94, "grad_norm": 0.9344831427818824, "learning_rate": 8.105783271153356e-08, "loss": 0.5415, "step": 8974 }, { "epoch": 0.94, "grad_norm": 3.63891046584668, "learning_rate": 8.075249233868821e-08, "loss": 0.6369, "step": 8975 }, { "epoch": 0.94, "grad_norm": 1.0932455433990051, "learning_rate": 8.044772346979812e-08, "loss": 0.5487, "step": 8976 }, { "epoch": 0.94, "grad_norm": 0.8253603754509732, "learning_rate": 8.014352614027054e-08, "loss": 0.531, "step": 8977 }, { "epoch": 0.94, "grad_norm": 2.3887697326667325, "learning_rate": 7.983990038544664e-08, "loss": 0.5317, "step": 8978 }, { "epoch": 0.94, "grad_norm": 0.954945688802483, "learning_rate": 7.953684624059987e-08, "loss": 0.5603, "step": 8979 }, { "epoch": 0.94, "grad_norm": 2.949484917320116, "learning_rate": 7.923436374093929e-08, "loss": 0.5994, "step": 8980 }, { "epoch": 0.95, "grad_norm": 2.027629114424706, "learning_rate": 7.893245292160511e-08, "loss": 0.601, "step": 8981 }, { "epoch": 0.95, "grad_norm": 2.3171055100942057, "learning_rate": 7.863111381767374e-08, "loss": 0.544, "step": 8982 }, { "epoch": 0.95, "grad_norm": 2.5697202512374178, "learning_rate": 7.833034646415272e-08, "loss": 0.6016, "step": 8983 }, { "epoch": 0.95, "grad_norm": 2.502909881767718, "learning_rate": 7.80301508959852e-08, "loss": 0.6382, "step": 8984 }, { "epoch": 0.95, "grad_norm": 2.4985875211152675, "learning_rate": 7.773052714804719e-08, "loss": 0.5886, "step": 8985 }, { "epoch": 0.95, "grad_norm": 2.9604096707667176, "learning_rate": 7.743147525514749e-08, "loss": 0.6592, "step": 8986 }, { "epoch": 0.95, "grad_norm": 2.074366972373142, "learning_rate": 7.713299525202944e-08, "loss": 0.5226, "step": 8987 }, { "epoch": 0.95, "grad_norm": 4.844749218296161, "learning_rate": 7.683508717336918e-08, "loss": 0.6071, "step": 8988 }, { "epoch": 0.95, "grad_norm": 2.591141612749998, "learning_rate": 7.653775105377737e-08, "loss": 0.6346, "step": 8989 }, { "epoch": 0.95, "grad_norm": 5.626446005912948, "learning_rate": 7.62409869277969e-08, "loss": 0.6454, "step": 8990 }, { "epoch": 0.95, "grad_norm": 2.5111955019235888, "learning_rate": 7.59447948299058e-08, "loss": 0.6752, "step": 8991 }, { "epoch": 0.95, "grad_norm": 3.0568359259106237, "learning_rate": 7.564917479451373e-08, "loss": 0.617, "step": 8992 }, { "epoch": 0.95, "grad_norm": 2.22905556576431, "learning_rate": 7.535412685596599e-08, "loss": 0.658, "step": 8993 }, { "epoch": 0.95, "grad_norm": 3.687646814338487, "learning_rate": 7.505965104854073e-08, "loss": 0.7067, "step": 8994 }, { "epoch": 0.95, "grad_norm": 2.1712557178132945, "learning_rate": 7.476574740644838e-08, "loss": 0.5687, "step": 8995 }, { "epoch": 0.95, "grad_norm": 3.390681967447077, "learning_rate": 7.44724159638338e-08, "loss": 0.6088, "step": 8996 }, { "epoch": 0.95, "grad_norm": 1.0375781519569047, "learning_rate": 7.417965675477534e-08, "loss": 0.541, "step": 8997 }, { "epoch": 0.95, "grad_norm": 2.695143332239307, "learning_rate": 7.388746981328632e-08, "loss": 0.5657, "step": 8998 }, { "epoch": 0.95, "grad_norm": 3.5000885655908025, "learning_rate": 7.359585517331014e-08, "loss": 0.643, "step": 8999 }, { "epoch": 0.95, "grad_norm": 2.556164468290286, "learning_rate": 7.330481286872749e-08, "loss": 0.5475, "step": 9000 }, { "epoch": 0.95, "grad_norm": 0.8838512097484564, "learning_rate": 7.301434293334908e-08, "loss": 0.5353, "step": 9001 }, { "epoch": 0.95, "grad_norm": 2.274766804514225, "learning_rate": 7.272444540092294e-08, "loss": 0.5828, "step": 9002 }, { "epoch": 0.95, "grad_norm": 2.1596373340527326, "learning_rate": 7.243512030512656e-08, "loss": 0.6407, "step": 9003 }, { "epoch": 0.95, "grad_norm": 2.3934408334922597, "learning_rate": 7.214636767957417e-08, "loss": 0.5951, "step": 9004 }, { "epoch": 0.95, "grad_norm": 2.1077351249376233, "learning_rate": 7.18581875578117e-08, "loss": 0.5951, "step": 9005 }, { "epoch": 0.95, "grad_norm": 2.7426426194662987, "learning_rate": 7.157057997331907e-08, "loss": 0.5838, "step": 9006 }, { "epoch": 0.95, "grad_norm": 3.3577955817656235, "learning_rate": 7.128354495951006e-08, "loss": 0.6553, "step": 9007 }, { "epoch": 0.95, "grad_norm": 3.089592673711738, "learning_rate": 7.099708254973136e-08, "loss": 0.542, "step": 9008 }, { "epoch": 0.95, "grad_norm": 2.6306257005655937, "learning_rate": 7.071119277726301e-08, "loss": 0.5938, "step": 9009 }, { "epoch": 0.95, "grad_norm": 4.773569073948341, "learning_rate": 7.0425875675319e-08, "loss": 0.6349, "step": 9010 }, { "epoch": 0.95, "grad_norm": 2.1860135695945595, "learning_rate": 7.014113127704725e-08, "loss": 0.5982, "step": 9011 }, { "epoch": 0.95, "grad_norm": 3.076079644508135, "learning_rate": 6.985695961552796e-08, "loss": 0.6265, "step": 9012 }, { "epoch": 0.95, "grad_norm": 0.9415297800104788, "learning_rate": 6.957336072377586e-08, "loss": 0.5714, "step": 9013 }, { "epoch": 0.95, "grad_norm": 2.7068897773748826, "learning_rate": 6.929033463473789e-08, "loss": 0.5468, "step": 9014 }, { "epoch": 0.95, "grad_norm": 2.2176189144911542, "learning_rate": 6.900788138129554e-08, "loss": 0.6505, "step": 9015 }, { "epoch": 0.95, "grad_norm": 2.3975049310120897, "learning_rate": 6.872600099626369e-08, "loss": 0.5567, "step": 9016 }, { "epoch": 0.95, "grad_norm": 0.9140350212678829, "learning_rate": 6.844469351239003e-08, "loss": 0.5619, "step": 9017 }, { "epoch": 0.95, "grad_norm": 2.5388047256734287, "learning_rate": 6.816395896235617e-08, "loss": 0.6404, "step": 9018 }, { "epoch": 0.95, "grad_norm": 2.61129763755357, "learning_rate": 6.78837973787766e-08, "loss": 0.6912, "step": 9019 }, { "epoch": 0.95, "grad_norm": 2.7678872176224463, "learning_rate": 6.760420879420082e-08, "loss": 0.5791, "step": 9020 }, { "epoch": 0.95, "grad_norm": 5.085235281927168, "learning_rate": 6.732519324111009e-08, "loss": 0.6828, "step": 9021 }, { "epoch": 0.95, "grad_norm": 2.045301446382808, "learning_rate": 6.704675075191902e-08, "loss": 0.6762, "step": 9022 }, { "epoch": 0.95, "grad_norm": 3.2248231907993987, "learning_rate": 6.676888135897674e-08, "loss": 0.649, "step": 9023 }, { "epoch": 0.95, "grad_norm": 2.704494653374823, "learning_rate": 6.649158509456576e-08, "loss": 0.5956, "step": 9024 }, { "epoch": 0.95, "grad_norm": 0.9722338123614437, "learning_rate": 6.621486199090088e-08, "loss": 0.5335, "step": 9025 }, { "epoch": 0.95, "grad_norm": 2.591910857960895, "learning_rate": 6.593871208013136e-08, "loss": 0.6333, "step": 9026 }, { "epoch": 0.95, "grad_norm": 2.8111496930068176, "learning_rate": 6.566313539433877e-08, "loss": 0.6471, "step": 9027 }, { "epoch": 0.95, "grad_norm": 2.432987898988874, "learning_rate": 6.538813196553973e-08, "loss": 0.5657, "step": 9028 }, { "epoch": 0.95, "grad_norm": 2.439770278419752, "learning_rate": 6.511370182568311e-08, "loss": 0.6174, "step": 9029 }, { "epoch": 0.95, "grad_norm": 2.3437842495883623, "learning_rate": 6.483984500665119e-08, "loss": 0.5372, "step": 9030 }, { "epoch": 0.95, "grad_norm": 2.638108993601756, "learning_rate": 6.456656154025964e-08, "loss": 0.6398, "step": 9031 }, { "epoch": 0.95, "grad_norm": 2.5417156476529437, "learning_rate": 6.429385145825861e-08, "loss": 0.5727, "step": 9032 }, { "epoch": 0.95, "grad_norm": 2.6367489893136846, "learning_rate": 6.402171479233e-08, "loss": 0.6402, "step": 9033 }, { "epoch": 0.95, "grad_norm": 2.001287522657275, "learning_rate": 6.375015157409015e-08, "loss": 0.6013, "step": 9034 }, { "epoch": 0.95, "grad_norm": 2.4098090763309523, "learning_rate": 6.347916183508828e-08, "loss": 0.5185, "step": 9035 }, { "epoch": 0.95, "grad_norm": 2.6963976959673808, "learning_rate": 6.320874560680757e-08, "loss": 0.7315, "step": 9036 }, { "epoch": 0.95, "grad_norm": 0.8986435359967851, "learning_rate": 6.293890292066395e-08, "loss": 0.5098, "step": 9037 }, { "epoch": 0.95, "grad_norm": 3.122976565906962, "learning_rate": 6.266963380800684e-08, "loss": 0.6318, "step": 9038 }, { "epoch": 0.95, "grad_norm": 2.4548075643836396, "learning_rate": 6.24009383001195e-08, "loss": 0.6208, "step": 9039 }, { "epoch": 0.95, "grad_norm": 3.0915406479900294, "learning_rate": 6.213281642821811e-08, "loss": 0.562, "step": 9040 }, { "epoch": 0.95, "grad_norm": 2.875966474710966, "learning_rate": 6.186526822345163e-08, "loss": 0.6213, "step": 9041 }, { "epoch": 0.95, "grad_norm": 3.426133515510632, "learning_rate": 6.159829371690407e-08, "loss": 0.5456, "step": 9042 }, { "epoch": 0.95, "grad_norm": 2.21552519795364, "learning_rate": 6.133189293959175e-08, "loss": 0.5443, "step": 9043 }, { "epoch": 0.95, "grad_norm": 3.182803894427842, "learning_rate": 6.106606592246267e-08, "loss": 0.7024, "step": 9044 }, { "epoch": 0.95, "grad_norm": 2.838797575657555, "learning_rate": 6.08008126964016e-08, "loss": 0.5106, "step": 9045 }, { "epoch": 0.95, "grad_norm": 2.464352882434948, "learning_rate": 6.053613329222441e-08, "loss": 0.6797, "step": 9046 }, { "epoch": 0.95, "grad_norm": 2.5403063737877813, "learning_rate": 6.027202774068042e-08, "loss": 0.6741, "step": 9047 }, { "epoch": 0.95, "grad_norm": 2.275405839493515, "learning_rate": 6.00084960724534e-08, "loss": 0.5259, "step": 9048 }, { "epoch": 0.95, "grad_norm": 2.4141326847728526, "learning_rate": 5.974553831815888e-08, "loss": 0.6703, "step": 9049 }, { "epoch": 0.95, "grad_norm": 2.7803880388763815, "learning_rate": 5.9483154508347406e-08, "loss": 0.6808, "step": 9050 }, { "epoch": 0.95, "grad_norm": 2.32854036522692, "learning_rate": 5.9221344673500714e-08, "loss": 0.6318, "step": 9051 }, { "epoch": 0.95, "grad_norm": 2.7007162566736373, "learning_rate": 5.896010884403669e-08, "loss": 0.6192, "step": 9052 }, { "epoch": 0.95, "grad_norm": 2.6424830019293544, "learning_rate": 5.8699447050303284e-08, "loss": 0.6002, "step": 9053 }, { "epoch": 0.95, "grad_norm": 1.9699800972674935, "learning_rate": 5.84393593225846e-08, "loss": 0.5143, "step": 9054 }, { "epoch": 0.95, "grad_norm": 2.1870238921593677, "learning_rate": 5.817984569109702e-08, "loss": 0.6391, "step": 9055 }, { "epoch": 0.95, "grad_norm": 2.3894652143809623, "learning_rate": 5.792090618598922e-08, "loss": 0.5925, "step": 9056 }, { "epoch": 0.95, "grad_norm": 2.0806139077721775, "learning_rate": 5.766254083734435e-08, "loss": 0.6226, "step": 9057 }, { "epoch": 0.95, "grad_norm": 2.0742443711293004, "learning_rate": 5.740474967517839e-08, "loss": 0.7206, "step": 9058 }, { "epoch": 0.95, "grad_norm": 4.751056686871944, "learning_rate": 5.714753272944129e-08, "loss": 0.63, "step": 9059 }, { "epoch": 0.95, "grad_norm": 2.9924140950273572, "learning_rate": 5.68908900300158e-08, "loss": 0.5352, "step": 9060 }, { "epoch": 0.95, "grad_norm": 2.401100392375077, "learning_rate": 5.6634821606717514e-08, "loss": 0.6337, "step": 9061 }, { "epoch": 0.95, "grad_norm": 3.3084059799813343, "learning_rate": 5.6379327489295424e-08, "loss": 0.5915, "step": 9062 }, { "epoch": 0.95, "grad_norm": 2.4362237430967926, "learning_rate": 5.6124407707432436e-08, "loss": 0.6301, "step": 9063 }, { "epoch": 0.95, "grad_norm": 2.974258772040746, "learning_rate": 5.5870062290744876e-08, "loss": 0.6755, "step": 9064 }, { "epoch": 0.95, "grad_norm": 2.330894208256594, "learning_rate": 5.5616291268781875e-08, "loss": 0.6224, "step": 9065 }, { "epoch": 0.95, "grad_norm": 2.1719011136515, "learning_rate": 5.53630946710243e-08, "loss": 0.6096, "step": 9066 }, { "epoch": 0.95, "grad_norm": 2.645578213445915, "learning_rate": 5.5110472526889725e-08, "loss": 0.5871, "step": 9067 }, { "epoch": 0.95, "grad_norm": 2.176097941547101, "learning_rate": 5.485842486572579e-08, "loss": 0.6087, "step": 9068 }, { "epoch": 0.95, "grad_norm": 2.84982524682506, "learning_rate": 5.4606951716815735e-08, "loss": 0.6064, "step": 9069 }, { "epoch": 0.95, "grad_norm": 2.4460753590123403, "learning_rate": 5.435605310937342e-08, "loss": 0.6023, "step": 9070 }, { "epoch": 0.95, "grad_norm": 3.2859655207695355, "learning_rate": 5.410572907254885e-08, "loss": 0.6332, "step": 9071 }, { "epoch": 0.95, "grad_norm": 2.7744254402541415, "learning_rate": 5.3855979635423774e-08, "loss": 0.6184, "step": 9072 }, { "epoch": 0.95, "grad_norm": 2.2908907666964726, "learning_rate": 5.360680482701275e-08, "loss": 0.6022, "step": 9073 }, { "epoch": 0.95, "grad_norm": 2.659397892050746, "learning_rate": 5.3358204676264844e-08, "loss": 0.5827, "step": 9074 }, { "epoch": 0.95, "grad_norm": 2.568905156288951, "learning_rate": 5.3110179212061406e-08, "loss": 0.6192, "step": 9075 }, { "epoch": 0.96, "grad_norm": 2.292649322204462, "learning_rate": 5.286272846321716e-08, "loss": 0.5548, "step": 9076 }, { "epoch": 0.96, "grad_norm": 3.0132313937949267, "learning_rate": 5.2615852458480775e-08, "loss": 0.6589, "step": 9077 }, { "epoch": 0.96, "grad_norm": 4.892679189485966, "learning_rate": 5.23695512265332e-08, "loss": 0.5454, "step": 9078 }, { "epoch": 0.96, "grad_norm": 2.2508615371235727, "learning_rate": 5.2123824795988764e-08, "loss": 0.6351, "step": 9079 }, { "epoch": 0.96, "grad_norm": 4.9551130391975455, "learning_rate": 5.187867319539519e-08, "loss": 0.6406, "step": 9080 }, { "epoch": 0.96, "grad_norm": 10.096299527370705, "learning_rate": 5.163409645323414e-08, "loss": 0.6185, "step": 9081 }, { "epoch": 0.96, "grad_norm": 3.0533996853064957, "learning_rate": 5.139009459791955e-08, "loss": 0.5818, "step": 9082 }, { "epoch": 0.96, "grad_norm": 2.9947071352082357, "learning_rate": 5.1146667657798744e-08, "loss": 0.5878, "step": 9083 }, { "epoch": 0.96, "grad_norm": 2.6515995229170928, "learning_rate": 5.0903815661152435e-08, "loss": 0.6047, "step": 9084 }, { "epoch": 0.96, "grad_norm": 2.216360216245191, "learning_rate": 5.0661538636194164e-08, "loss": 0.6247, "step": 9085 }, { "epoch": 0.96, "grad_norm": 3.268655449300178, "learning_rate": 5.041983661107142e-08, "loss": 0.622, "step": 9086 }, { "epoch": 0.96, "grad_norm": 21.215101180819993, "learning_rate": 5.017870961386451e-08, "loss": 0.568, "step": 9087 }, { "epoch": 0.96, "grad_norm": 2.415400161597065, "learning_rate": 4.9938157672586585e-08, "loss": 0.664, "step": 9088 }, { "epoch": 0.96, "grad_norm": 2.498930091348122, "learning_rate": 4.9698180815183626e-08, "loss": 0.6175, "step": 9089 }, { "epoch": 0.96, "grad_norm": 2.2002817993561457, "learning_rate": 4.945877906953722e-08, "loss": 0.5586, "step": 9090 }, { "epoch": 0.96, "grad_norm": 2.936573265428427, "learning_rate": 4.921995246345901e-08, "loss": 0.5832, "step": 9091 }, { "epoch": 0.96, "grad_norm": 2.2660732599551445, "learning_rate": 4.898170102469513e-08, "loss": 0.6299, "step": 9092 }, { "epoch": 0.96, "grad_norm": 2.3604212255644623, "learning_rate": 4.87440247809251e-08, "loss": 0.6544, "step": 9093 }, { "epoch": 0.96, "grad_norm": 2.441174650138323, "learning_rate": 4.850692375976185e-08, "loss": 0.6546, "step": 9094 }, { "epoch": 0.96, "grad_norm": 4.559330779198589, "learning_rate": 4.827039798875111e-08, "loss": 0.5868, "step": 9095 }, { "epoch": 0.96, "grad_norm": 2.6264220920489487, "learning_rate": 4.803444749537145e-08, "loss": 0.592, "step": 9096 }, { "epoch": 0.96, "grad_norm": 2.346201879455138, "learning_rate": 4.7799072307034845e-08, "loss": 0.5913, "step": 9097 }, { "epoch": 0.96, "grad_norm": 2.2187162896332735, "learning_rate": 4.756427245108664e-08, "loss": 0.5922, "step": 9098 }, { "epoch": 0.96, "grad_norm": 2.665266994162754, "learning_rate": 4.733004795480556e-08, "loss": 0.5226, "step": 9099 }, { "epoch": 0.96, "grad_norm": 2.309703067456407, "learning_rate": 4.709639884540262e-08, "loss": 0.6754, "step": 9100 }, { "epoch": 0.96, "grad_norm": 3.519489921324912, "learning_rate": 4.686332515002223e-08, "loss": 0.5979, "step": 9101 }, { "epoch": 0.96, "grad_norm": 3.4734030677173644, "learning_rate": 4.663082689574328e-08, "loss": 0.5856, "step": 9102 }, { "epoch": 0.96, "grad_norm": 0.9763445326065605, "learning_rate": 4.6398904109575815e-08, "loss": 0.5201, "step": 9103 }, { "epoch": 0.96, "grad_norm": 2.2503233196738193, "learning_rate": 4.616755681846441e-08, "loss": 0.543, "step": 9104 }, { "epoch": 0.96, "grad_norm": 2.2179469381251273, "learning_rate": 4.593678504928589e-08, "loss": 0.6158, "step": 9105 }, { "epoch": 0.96, "grad_norm": 2.623231471830531, "learning_rate": 4.570658882885104e-08, "loss": 0.6553, "step": 9106 }, { "epoch": 0.96, "grad_norm": 2.50946392413075, "learning_rate": 4.547696818390346e-08, "loss": 0.5586, "step": 9107 }, { "epoch": 0.96, "grad_norm": 2.2550240930054586, "learning_rate": 4.524792314111959e-08, "loss": 0.6628, "step": 9108 }, { "epoch": 0.96, "grad_norm": 2.249364536937027, "learning_rate": 4.501945372710925e-08, "loss": 0.5307, "step": 9109 }, { "epoch": 0.96, "grad_norm": 2.641533439490308, "learning_rate": 4.4791559968415664e-08, "loss": 0.581, "step": 9110 }, { "epoch": 0.96, "grad_norm": 2.400735266374335, "learning_rate": 4.456424189151376e-08, "loss": 0.5803, "step": 9111 }, { "epoch": 0.96, "grad_norm": 2.013311376301837, "learning_rate": 4.433749952281463e-08, "loss": 0.5211, "step": 9112 }, { "epoch": 0.96, "grad_norm": 2.6401002509833007, "learning_rate": 4.4111332888658876e-08, "loss": 0.6004, "step": 9113 }, { "epoch": 0.96, "grad_norm": 0.9931452731017555, "learning_rate": 4.388574201532214e-08, "loss": 0.5143, "step": 9114 }, { "epoch": 0.96, "grad_norm": 2.6549315124291772, "learning_rate": 4.366072692901346e-08, "loss": 0.6261, "step": 9115 }, { "epoch": 0.96, "grad_norm": 3.012204151927376, "learning_rate": 4.343628765587471e-08, "loss": 0.6443, "step": 9116 }, { "epoch": 0.96, "grad_norm": 1.0512576287150874, "learning_rate": 4.321242422197946e-08, "loss": 0.5194, "step": 9117 }, { "epoch": 0.96, "grad_norm": 2.609812624572654, "learning_rate": 4.298913665333637e-08, "loss": 0.5724, "step": 9118 }, { "epoch": 0.96, "grad_norm": 3.067384788809318, "learning_rate": 4.276642497588579e-08, "loss": 0.5614, "step": 9119 }, { "epoch": 0.96, "grad_norm": 3.4934409570440055, "learning_rate": 4.2544289215502576e-08, "loss": 0.5847, "step": 9120 }, { "epoch": 0.96, "grad_norm": 2.470305736923208, "learning_rate": 4.2322729397992755e-08, "loss": 0.6099, "step": 9121 }, { "epoch": 0.96, "grad_norm": 4.735639182147766, "learning_rate": 4.210174554909796e-08, "loss": 0.6468, "step": 9122 }, { "epoch": 0.96, "grad_norm": 2.4294443877199807, "learning_rate": 4.188133769448932e-08, "loss": 0.5507, "step": 9123 }, { "epoch": 0.96, "grad_norm": 3.268281292601878, "learning_rate": 4.1661505859775245e-08, "loss": 0.6073, "step": 9124 }, { "epoch": 0.96, "grad_norm": 3.292116636642608, "learning_rate": 4.1442250070494186e-08, "loss": 0.625, "step": 9125 }, { "epoch": 0.96, "grad_norm": 3.0214437747145553, "learning_rate": 4.1223570352118545e-08, "loss": 0.5964, "step": 9126 }, { "epoch": 0.96, "grad_norm": 3.237561901276038, "learning_rate": 4.100546673005412e-08, "loss": 0.6468, "step": 9127 }, { "epoch": 0.96, "grad_norm": 3.1688911358412177, "learning_rate": 4.078793922963953e-08, "loss": 0.6302, "step": 9128 }, { "epoch": 0.96, "grad_norm": 3.447810752311471, "learning_rate": 4.057098787614677e-08, "loss": 0.5529, "step": 9129 }, { "epoch": 0.96, "grad_norm": 2.636676069560152, "learning_rate": 4.035461269478014e-08, "loss": 0.653, "step": 9130 }, { "epoch": 0.96, "grad_norm": 1.8829981767535657, "learning_rate": 4.013881371067841e-08, "loss": 0.617, "step": 9131 }, { "epoch": 0.96, "grad_norm": 3.756084256939002, "learning_rate": 3.992359094891096e-08, "loss": 0.5825, "step": 9132 }, { "epoch": 0.96, "grad_norm": 2.2300017538671466, "learning_rate": 3.970894443448281e-08, "loss": 0.5501, "step": 9133 }, { "epoch": 0.96, "grad_norm": 3.0651156169351963, "learning_rate": 3.949487419233122e-08, "loss": 0.5455, "step": 9134 }, { "epoch": 0.96, "grad_norm": 2.4150439079741504, "learning_rate": 3.92813802473252e-08, "loss": 0.6961, "step": 9135 }, { "epoch": 0.96, "grad_norm": 3.197514010373594, "learning_rate": 3.906846262426878e-08, "loss": 0.6243, "step": 9136 }, { "epoch": 0.96, "grad_norm": 2.122714920113005, "learning_rate": 3.885612134789718e-08, "loss": 0.6401, "step": 9137 }, { "epoch": 0.96, "grad_norm": 0.9331466918924591, "learning_rate": 3.864435644288123e-08, "loss": 0.5596, "step": 9138 }, { "epoch": 0.96, "grad_norm": 3.5512512202448434, "learning_rate": 3.8433167933821234e-08, "loss": 0.6279, "step": 9139 }, { "epoch": 0.96, "grad_norm": 2.3384806493446684, "learning_rate": 3.822255584525369e-08, "loss": 0.6029, "step": 9140 }, { "epoch": 0.96, "grad_norm": 2.7384044773724057, "learning_rate": 3.8012520201646255e-08, "loss": 0.5762, "step": 9141 }, { "epoch": 0.96, "grad_norm": 2.537480825998291, "learning_rate": 3.780306102740105e-08, "loss": 0.6246, "step": 9142 }, { "epoch": 0.96, "grad_norm": 2.535534515356477, "learning_rate": 3.7594178346851974e-08, "loss": 0.6051, "step": 9143 }, { "epoch": 0.96, "grad_norm": 2.6217629251128436, "learning_rate": 3.738587218426626e-08, "loss": 0.5192, "step": 9144 }, { "epoch": 0.96, "grad_norm": 3.930611934484491, "learning_rate": 3.7178142563844e-08, "loss": 0.554, "step": 9145 }, { "epoch": 0.96, "grad_norm": 2.4744564645686906, "learning_rate": 3.697098950971922e-08, "loss": 0.6043, "step": 9146 }, { "epoch": 0.96, "grad_norm": 2.800360974943865, "learning_rate": 3.676441304595879e-08, "loss": 0.5262, "step": 9147 }, { "epoch": 0.96, "grad_norm": 2.2752239967517283, "learning_rate": 3.655841319656128e-08, "loss": 0.641, "step": 9148 }, { "epoch": 0.96, "grad_norm": 2.9374913960050826, "learning_rate": 3.635298998545922e-08, "loss": 0.62, "step": 9149 }, { "epoch": 0.96, "grad_norm": 2.780995594486591, "learning_rate": 3.614814343651851e-08, "loss": 0.6157, "step": 9150 }, { "epoch": 0.96, "grad_norm": 2.194471742530128, "learning_rate": 3.5943873573537903e-08, "loss": 0.5811, "step": 9151 }, { "epoch": 0.96, "grad_norm": 2.3614260129521387, "learning_rate": 3.574018042024785e-08, "loss": 0.5898, "step": 9152 }, { "epoch": 0.96, "grad_norm": 3.281118604888936, "learning_rate": 3.553706400031331e-08, "loss": 0.624, "step": 9153 }, { "epoch": 0.96, "grad_norm": 3.4511839932038293, "learning_rate": 3.533452433733209e-08, "loss": 0.6539, "step": 9154 }, { "epoch": 0.96, "grad_norm": 3.0189891199871286, "learning_rate": 3.513256145483479e-08, "loss": 0.5865, "step": 9155 }, { "epoch": 0.96, "grad_norm": 5.371611401435088, "learning_rate": 3.493117537628432e-08, "loss": 0.6437, "step": 9156 }, { "epoch": 0.96, "grad_norm": 0.8768550617990599, "learning_rate": 3.4730366125076966e-08, "loss": 0.5259, "step": 9157 }, { "epoch": 0.96, "grad_norm": 4.802661184621188, "learning_rate": 3.453013372454295e-08, "loss": 0.5858, "step": 9158 }, { "epoch": 0.96, "grad_norm": 2.4916523907020736, "learning_rate": 3.433047819794366e-08, "loss": 0.6361, "step": 9159 }, { "epoch": 0.96, "grad_norm": 2.3170062905496733, "learning_rate": 3.413139956847611e-08, "loss": 0.6925, "step": 9160 }, { "epoch": 0.96, "grad_norm": 3.102122288265513, "learning_rate": 3.3932897859267346e-08, "loss": 0.6089, "step": 9161 }, { "epoch": 0.96, "grad_norm": 2.212048736855744, "learning_rate": 3.3734973093378367e-08, "loss": 0.678, "step": 9162 }, { "epoch": 0.96, "grad_norm": 3.5526846645578467, "learning_rate": 3.353762529380466e-08, "loss": 0.6322, "step": 9163 }, { "epoch": 0.96, "grad_norm": 2.6953009571329827, "learning_rate": 3.334085448347346e-08, "loss": 0.6071, "step": 9164 }, { "epoch": 0.96, "grad_norm": 2.09081032023049, "learning_rate": 3.314466068524425e-08, "loss": 0.5649, "step": 9165 }, { "epoch": 0.96, "grad_norm": 2.8019153487339916, "learning_rate": 3.294904392191045e-08, "loss": 0.6405, "step": 9166 }, { "epoch": 0.96, "grad_norm": 2.2938098097617368, "learning_rate": 3.27540042161989e-08, "loss": 0.6646, "step": 9167 }, { "epoch": 0.96, "grad_norm": 2.475187200092886, "learning_rate": 3.255954159076813e-08, "loss": 0.5879, "step": 9168 }, { "epoch": 0.96, "grad_norm": 3.1691062969062194, "learning_rate": 3.236565606821007e-08, "loss": 0.5371, "step": 9169 }, { "epoch": 0.96, "grad_norm": 2.4161686022837277, "learning_rate": 3.2172347671050596e-08, "loss": 0.6002, "step": 9170 }, { "epoch": 0.97, "grad_norm": 2.853074919404103, "learning_rate": 3.197961642174674e-08, "loss": 0.5644, "step": 9171 }, { "epoch": 0.97, "grad_norm": 4.210820383910226, "learning_rate": 3.1787462342690036e-08, "loss": 0.6206, "step": 9172 }, { "epoch": 0.97, "grad_norm": 2.628828526440614, "learning_rate": 3.1595885456204845e-08, "loss": 0.6029, "step": 9173 }, { "epoch": 0.97, "grad_norm": 2.5487802779308995, "learning_rate": 3.1404885784547256e-08, "loss": 0.645, "step": 9174 }, { "epoch": 0.97, "grad_norm": 0.9690656931951284, "learning_rate": 3.1214463349907295e-08, "loss": 0.5252, "step": 9175 }, { "epoch": 0.97, "grad_norm": 1.965446772712665, "learning_rate": 3.102461817440727e-08, "loss": 0.6123, "step": 9176 }, { "epoch": 0.97, "grad_norm": 2.6886931228239357, "learning_rate": 3.083535028010343e-08, "loss": 0.6453, "step": 9177 }, { "epoch": 0.97, "grad_norm": 5.884853895703779, "learning_rate": 3.064665968898428e-08, "loss": 0.7053, "step": 9178 }, { "epoch": 0.97, "grad_norm": 3.1113257956144733, "learning_rate": 3.045854642297175e-08, "loss": 0.5508, "step": 9179 }, { "epoch": 0.97, "grad_norm": 2.493207760394977, "learning_rate": 3.0271010503918896e-08, "loss": 0.5488, "step": 9180 }, { "epoch": 0.97, "grad_norm": 2.3766479319028724, "learning_rate": 3.0084051953614414e-08, "loss": 0.6282, "step": 9181 }, { "epoch": 0.97, "grad_norm": 2.390922910780254, "learning_rate": 2.98976707937787e-08, "loss": 0.5949, "step": 9182 }, { "epoch": 0.97, "grad_norm": 4.637454787285773, "learning_rate": 2.971186704606388e-08, "loss": 0.6665, "step": 9183 }, { "epoch": 0.97, "grad_norm": 3.1213890226656327, "learning_rate": 2.9526640732056577e-08, "loss": 0.51, "step": 9184 }, { "epoch": 0.97, "grad_norm": 2.490219535861415, "learning_rate": 2.9341991873276244e-08, "loss": 0.6355, "step": 9185 }, { "epoch": 0.97, "grad_norm": 2.218227767506622, "learning_rate": 2.9157920491174606e-08, "loss": 0.683, "step": 9186 }, { "epoch": 0.97, "grad_norm": 2.2005059135053537, "learning_rate": 2.8974426607136784e-08, "loss": 0.5855, "step": 9187 }, { "epoch": 0.97, "grad_norm": 2.519798208810904, "learning_rate": 2.8791510242480168e-08, "loss": 0.6008, "step": 9188 }, { "epoch": 0.97, "grad_norm": 1.0120446604343019, "learning_rate": 2.8609171418454985e-08, "loss": 0.5326, "step": 9189 }, { "epoch": 0.97, "grad_norm": 2.450219497355183, "learning_rate": 2.842741015624595e-08, "loss": 0.6363, "step": 9190 }, { "epoch": 0.97, "grad_norm": 3.052648319059015, "learning_rate": 2.824622647696895e-08, "loss": 0.5779, "step": 9191 }, { "epoch": 0.97, "grad_norm": 3.335643356301621, "learning_rate": 2.8065620401673823e-08, "loss": 0.5543, "step": 9192 }, { "epoch": 0.97, "grad_norm": 2.8344656986626906, "learning_rate": 2.7885591951342104e-08, "loss": 0.549, "step": 9193 }, { "epoch": 0.97, "grad_norm": 3.599346650297371, "learning_rate": 2.77061411468893e-08, "loss": 0.6037, "step": 9194 }, { "epoch": 0.97, "grad_norm": 3.0866091554980106, "learning_rate": 2.752726800916372e-08, "loss": 0.5579, "step": 9195 }, { "epoch": 0.97, "grad_norm": 2.5707910612614655, "learning_rate": 2.734897255894653e-08, "loss": 0.6654, "step": 9196 }, { "epoch": 0.97, "grad_norm": 2.5872290746708435, "learning_rate": 2.71712548169506e-08, "loss": 0.6281, "step": 9197 }, { "epoch": 0.97, "grad_norm": 3.2731949836335774, "learning_rate": 2.6994114803823858e-08, "loss": 0.6685, "step": 9198 }, { "epoch": 0.97, "grad_norm": 2.899660903128125, "learning_rate": 2.6817552540144842e-08, "loss": 0.6137, "step": 9199 }, { "epoch": 0.97, "grad_norm": 2.320763158061146, "learning_rate": 2.6641568046427146e-08, "loss": 0.6083, "step": 9200 }, { "epoch": 0.97, "grad_norm": 2.363696210436481, "learning_rate": 2.646616134311497e-08, "loss": 0.6219, "step": 9201 }, { "epoch": 0.97, "grad_norm": 2.2956123393115724, "learning_rate": 2.629133245058757e-08, "loss": 0.6388, "step": 9202 }, { "epoch": 0.97, "grad_norm": 3.8842450507583464, "learning_rate": 2.6117081389155362e-08, "loss": 0.6312, "step": 9203 }, { "epoch": 0.97, "grad_norm": 2.3338099016770926, "learning_rate": 2.5943408179062713e-08, "loss": 0.5896, "step": 9204 }, { "epoch": 0.97, "grad_norm": 2.3320839464277805, "learning_rate": 2.5770312840486255e-08, "loss": 0.6366, "step": 9205 }, { "epoch": 0.97, "grad_norm": 2.984624737573807, "learning_rate": 2.5597795393536017e-08, "loss": 0.6062, "step": 9206 }, { "epoch": 0.97, "grad_norm": 0.9651826359687896, "learning_rate": 2.5425855858253744e-08, "loss": 0.5126, "step": 9207 }, { "epoch": 0.97, "grad_norm": 2.288749577144304, "learning_rate": 2.5254494254616236e-08, "loss": 0.6235, "step": 9208 }, { "epoch": 0.97, "grad_norm": 2.813589985884465, "learning_rate": 2.5083710602530897e-08, "loss": 0.6208, "step": 9209 }, { "epoch": 0.97, "grad_norm": 2.407381015638563, "learning_rate": 2.4913504921839084e-08, "loss": 0.5145, "step": 9210 }, { "epoch": 0.97, "grad_norm": 2.2924984215742583, "learning_rate": 2.4743877232314416e-08, "loss": 0.5947, "step": 9211 }, { "epoch": 0.97, "grad_norm": 3.1338707036223505, "learning_rate": 2.457482755366447e-08, "loss": 0.5781, "step": 9212 }, { "epoch": 0.97, "grad_norm": 2.277076857886843, "learning_rate": 2.4406355905528534e-08, "loss": 0.6377, "step": 9213 }, { "epoch": 0.97, "grad_norm": 2.5947856777870917, "learning_rate": 2.4238462307478727e-08, "loss": 0.6077, "step": 9214 }, { "epoch": 0.97, "grad_norm": 2.869549468783701, "learning_rate": 2.4071146779021116e-08, "loss": 0.5939, "step": 9215 }, { "epoch": 0.97, "grad_norm": 2.5062122517145817, "learning_rate": 2.3904409339594036e-08, "loss": 0.5345, "step": 9216 }, { "epoch": 0.97, "grad_norm": 2.9256223760450024, "learning_rate": 2.373825000856811e-08, "loss": 0.5867, "step": 9217 }, { "epoch": 0.97, "grad_norm": 2.919362951808869, "learning_rate": 2.3572668805247335e-08, "loss": 0.6491, "step": 9218 }, { "epoch": 0.97, "grad_norm": 2.6991155767439214, "learning_rate": 2.3407665748868548e-08, "loss": 0.5601, "step": 9219 }, { "epoch": 0.97, "grad_norm": 2.246298603916671, "learning_rate": 2.324324085860086e-08, "loss": 0.5792, "step": 9220 }, { "epoch": 0.97, "grad_norm": 2.4031521640333504, "learning_rate": 2.3079394153547874e-08, "loss": 0.6562, "step": 9221 }, { "epoch": 0.97, "grad_norm": 2.4774885599982843, "learning_rate": 2.2916125652743814e-08, "loss": 0.5821, "step": 9222 }, { "epoch": 0.97, "grad_norm": 2.9008654534964395, "learning_rate": 2.2753435375156284e-08, "loss": 0.6582, "step": 9223 }, { "epoch": 0.97, "grad_norm": 6.023778464208766, "learning_rate": 2.2591323339687387e-08, "loss": 0.6167, "step": 9224 }, { "epoch": 0.97, "grad_norm": 2.3713659550667208, "learning_rate": 2.242978956517039e-08, "loss": 0.6183, "step": 9225 }, { "epoch": 0.97, "grad_norm": 2.306655048988526, "learning_rate": 2.2268834070371946e-08, "loss": 0.6889, "step": 9226 }, { "epoch": 0.97, "grad_norm": 3.0952640364000117, "learning_rate": 2.210845687399099e-08, "loss": 0.5602, "step": 9227 }, { "epoch": 0.97, "grad_norm": 2.3413653802222494, "learning_rate": 2.1948657994659838e-08, "loss": 0.5923, "step": 9228 }, { "epoch": 0.97, "grad_norm": 2.77564140602996, "learning_rate": 2.1789437450943084e-08, "loss": 0.5727, "step": 9229 }, { "epoch": 0.97, "grad_norm": 2.421552954655445, "learning_rate": 2.163079526133982e-08, "loss": 0.611, "step": 9230 }, { "epoch": 0.97, "grad_norm": 2.169302617388929, "learning_rate": 2.1472731444279193e-08, "loss": 0.5952, "step": 9231 }, { "epoch": 0.97, "grad_norm": 2.101895600351323, "learning_rate": 2.13152460181254e-08, "loss": 0.6372, "step": 9232 }, { "epoch": 0.97, "grad_norm": 3.657299806141878, "learning_rate": 2.115833900117381e-08, "loss": 0.6147, "step": 9233 }, { "epoch": 0.97, "grad_norm": 2.239460891177711, "learning_rate": 2.1002010411654838e-08, "loss": 0.591, "step": 9234 }, { "epoch": 0.97, "grad_norm": 3.488691890857634, "learning_rate": 2.0846260267728957e-08, "loss": 0.6899, "step": 9235 }, { "epoch": 0.97, "grad_norm": 2.1238152317575727, "learning_rate": 2.069108858749169e-08, "loss": 0.6646, "step": 9236 }, { "epoch": 0.97, "grad_norm": 2.487320361580777, "learning_rate": 2.0536495388969734e-08, "loss": 0.6297, "step": 9237 }, { "epoch": 0.97, "grad_norm": 2.341768489528419, "learning_rate": 2.0382480690123718e-08, "loss": 0.472, "step": 9238 }, { "epoch": 0.97, "grad_norm": 3.233011462575182, "learning_rate": 2.0229044508845997e-08, "loss": 0.6539, "step": 9239 }, { "epoch": 0.97, "grad_norm": 2.121493412352339, "learning_rate": 2.007618686296342e-08, "loss": 0.5738, "step": 9240 }, { "epoch": 0.97, "grad_norm": 0.9581343879954742, "learning_rate": 1.9923907770233453e-08, "loss": 0.5091, "step": 9241 }, { "epoch": 0.97, "grad_norm": 2.403223954181605, "learning_rate": 1.9772207248348607e-08, "loss": 0.6106, "step": 9242 }, { "epoch": 0.97, "grad_norm": 0.8777448291544866, "learning_rate": 1.962108531493201e-08, "loss": 0.5469, "step": 9243 }, { "epoch": 0.97, "grad_norm": 2.9765736631382067, "learning_rate": 1.9470541987540727e-08, "loss": 0.5947, "step": 9244 }, { "epoch": 0.97, "grad_norm": 3.5869579127381717, "learning_rate": 1.9320577283664656e-08, "loss": 0.6733, "step": 9245 }, { "epoch": 0.97, "grad_norm": 2.7935438970542363, "learning_rate": 1.9171191220726527e-08, "loss": 0.5995, "step": 9246 }, { "epoch": 0.97, "grad_norm": 2.928327785902899, "learning_rate": 1.902238381608079e-08, "loss": 0.5998, "step": 9247 }, { "epoch": 0.97, "grad_norm": 3.4791238402015456, "learning_rate": 1.88741550870164e-08, "loss": 0.5935, "step": 9248 }, { "epoch": 0.97, "grad_norm": 2.863133326433092, "learning_rate": 1.8726505050753464e-08, "loss": 0.5795, "step": 9249 }, { "epoch": 0.97, "grad_norm": 4.0631417802895955, "learning_rate": 1.8579433724446037e-08, "loss": 0.5772, "step": 9250 }, { "epoch": 0.97, "grad_norm": 2.2432282328723803, "learning_rate": 1.8432941125179904e-08, "loss": 0.5114, "step": 9251 }, { "epoch": 0.97, "grad_norm": 2.5097002070968535, "learning_rate": 1.8287027269974777e-08, "loss": 0.6283, "step": 9252 }, { "epoch": 0.97, "grad_norm": 2.415419955262919, "learning_rate": 1.81416921757821e-08, "loss": 0.6065, "step": 9253 }, { "epoch": 0.97, "grad_norm": 2.2888006379470407, "learning_rate": 1.7996935859486143e-08, "loss": 0.5377, "step": 9254 }, { "epoch": 0.97, "grad_norm": 0.95190793167679, "learning_rate": 1.7852758337904564e-08, "loss": 0.5234, "step": 9255 }, { "epoch": 0.97, "grad_norm": 3.928375135216635, "learning_rate": 1.7709159627787853e-08, "loss": 0.5789, "step": 9256 }, { "epoch": 0.97, "grad_norm": 2.772545157709395, "learning_rate": 1.7566139745818778e-08, "loss": 0.6105, "step": 9257 }, { "epoch": 0.97, "grad_norm": 0.9706682161569412, "learning_rate": 1.7423698708612935e-08, "loss": 0.5673, "step": 9258 }, { "epoch": 0.97, "grad_norm": 2.5143022423925516, "learning_rate": 1.72818365327182e-08, "loss": 0.5667, "step": 9259 }, { "epoch": 0.97, "grad_norm": 3.5152224135253496, "learning_rate": 1.7140553234616385e-08, "loss": 0.5852, "step": 9260 }, { "epoch": 0.97, "grad_norm": 2.6502532483318486, "learning_rate": 1.6999848830721033e-08, "loss": 0.585, "step": 9261 }, { "epoch": 0.97, "grad_norm": 2.643359676277189, "learning_rate": 1.6859723337379064e-08, "loss": 0.5682, "step": 9262 }, { "epoch": 0.97, "grad_norm": 2.073844785345583, "learning_rate": 1.6720176770869124e-08, "loss": 0.6263, "step": 9263 }, { "epoch": 0.97, "grad_norm": 4.2055735703872275, "learning_rate": 1.6581209147404355e-08, "loss": 0.6097, "step": 9264 }, { "epoch": 0.97, "grad_norm": 2.6043559102032106, "learning_rate": 1.6442820483128508e-08, "loss": 0.6575, "step": 9265 }, { "epoch": 0.98, "grad_norm": 2.1311347692762466, "learning_rate": 1.630501079412039e-08, "loss": 0.5999, "step": 9266 }, { "epoch": 0.98, "grad_norm": 3.1904074031842913, "learning_rate": 1.6167780096389417e-08, "loss": 0.6478, "step": 9267 }, { "epoch": 0.98, "grad_norm": 2.1605938131759217, "learning_rate": 1.603112840587895e-08, "loss": 0.5674, "step": 9268 }, { "epoch": 0.98, "grad_norm": 2.7661197207313495, "learning_rate": 1.589505573846517e-08, "loss": 0.6057, "step": 9269 }, { "epoch": 0.98, "grad_norm": 2.6447415105460883, "learning_rate": 1.5759562109955993e-08, "loss": 0.6398, "step": 9270 }, { "epoch": 0.98, "grad_norm": 3.2687696784623625, "learning_rate": 1.562464753609272e-08, "loss": 0.6534, "step": 9271 }, { "epoch": 0.98, "grad_norm": 2.6228867844890065, "learning_rate": 1.549031203254947e-08, "loss": 0.627, "step": 9272 }, { "epoch": 0.98, "grad_norm": 3.4312244311880393, "learning_rate": 1.535655561493321e-08, "loss": 0.6083, "step": 9273 }, { "epoch": 0.98, "grad_norm": 2.4986020292339344, "learning_rate": 1.5223378298783174e-08, "loss": 0.5576, "step": 9274 }, { "epoch": 0.98, "grad_norm": 2.519385627634563, "learning_rate": 1.5090780099571435e-08, "loss": 0.5539, "step": 9275 }, { "epoch": 0.98, "grad_norm": 9.883104133354655, "learning_rate": 1.4958761032702885e-08, "loss": 0.5969, "step": 9276 }, { "epoch": 0.98, "grad_norm": 2.536493973434565, "learning_rate": 1.4827321113515259e-08, "loss": 0.6409, "step": 9277 }, { "epoch": 0.98, "grad_norm": 2.276130559735301, "learning_rate": 1.4696460357279118e-08, "loss": 0.5695, "step": 9278 }, { "epoch": 0.98, "grad_norm": 2.7168915784835974, "learning_rate": 1.4566178779197305e-08, "loss": 0.5578, "step": 9279 }, { "epoch": 0.98, "grad_norm": 2.303485469832306, "learning_rate": 1.443647639440493e-08, "loss": 0.619, "step": 9280 }, { "epoch": 0.98, "grad_norm": 2.7280656636349336, "learning_rate": 1.43073532179705e-08, "loss": 0.7012, "step": 9281 }, { "epoch": 0.98, "grad_norm": 2.803606301743524, "learning_rate": 1.4178809264896454e-08, "loss": 0.5857, "step": 9282 }, { "epoch": 0.98, "grad_norm": 5.551430987761346, "learning_rate": 1.4050844550115295e-08, "loss": 0.6231, "step": 9283 }, { "epoch": 0.98, "grad_norm": 3.3909002445584813, "learning_rate": 1.3923459088494574e-08, "loss": 0.5716, "step": 9284 }, { "epoch": 0.98, "grad_norm": 2.489847512775473, "learning_rate": 1.3796652894832452e-08, "loss": 0.6399, "step": 9285 }, { "epoch": 0.98, "grad_norm": 1.8687603509839055, "learning_rate": 1.367042598386159e-08, "loss": 0.5983, "step": 9286 }, { "epoch": 0.98, "grad_norm": 2.603423975622383, "learning_rate": 1.3544778370246924e-08, "loss": 0.6098, "step": 9287 }, { "epoch": 0.98, "grad_norm": 2.520998350761481, "learning_rate": 1.3419710068585668e-08, "loss": 0.5934, "step": 9288 }, { "epoch": 0.98, "grad_norm": 4.649199879398089, "learning_rate": 1.3295221093407862e-08, "loss": 0.6294, "step": 9289 }, { "epoch": 0.98, "grad_norm": 3.3152207409028662, "learning_rate": 1.3171311459175829e-08, "loss": 0.6102, "step": 9290 }, { "epoch": 0.98, "grad_norm": 8.91183070336439, "learning_rate": 1.3047981180285274e-08, "loss": 0.5617, "step": 9291 }, { "epoch": 0.98, "grad_norm": 2.536923939716204, "learning_rate": 1.2925230271064736e-08, "loss": 0.6005, "step": 9292 }, { "epoch": 0.98, "grad_norm": 2.5935899938613978, "learning_rate": 1.2803058745774477e-08, "loss": 0.5446, "step": 9293 }, { "epoch": 0.98, "grad_norm": 2.67588488131424, "learning_rate": 1.2681466618608696e-08, "loss": 0.6637, "step": 9294 }, { "epoch": 0.98, "grad_norm": 2.298948783588412, "learning_rate": 1.256045390369276e-08, "loss": 0.5459, "step": 9295 }, { "epoch": 0.98, "grad_norm": 3.32146484587026, "learning_rate": 1.2440020615086534e-08, "loss": 0.6503, "step": 9296 }, { "epoch": 0.98, "grad_norm": 2.382979182133977, "learning_rate": 1.232016676678105e-08, "loss": 0.5351, "step": 9297 }, { "epoch": 0.98, "grad_norm": 2.8108107342818927, "learning_rate": 1.2200892372700168e-08, "loss": 0.5628, "step": 9298 }, { "epoch": 0.98, "grad_norm": 2.5429167783954405, "learning_rate": 1.2082197446701693e-08, "loss": 0.5136, "step": 9299 }, { "epoch": 0.98, "grad_norm": 2.446935675654442, "learning_rate": 1.196408200257515e-08, "loss": 0.5591, "step": 9300 }, { "epoch": 0.98, "grad_norm": 2.4390888642246606, "learning_rate": 1.1846546054042341e-08, "loss": 0.63, "step": 9301 }, { "epoch": 0.98, "grad_norm": 3.182765263784984, "learning_rate": 1.1729589614758452e-08, "loss": 0.6281, "step": 9302 }, { "epoch": 0.98, "grad_norm": 2.6515402589211248, "learning_rate": 1.1613212698311504e-08, "loss": 0.5422, "step": 9303 }, { "epoch": 0.98, "grad_norm": 3.006167180494317, "learning_rate": 1.1497415318221239e-08, "loss": 0.5991, "step": 9304 }, { "epoch": 0.98, "grad_norm": 2.6423248229678973, "learning_rate": 1.1382197487941337e-08, "loss": 0.5174, "step": 9305 }, { "epoch": 0.98, "grad_norm": 2.713564676817919, "learning_rate": 1.1267559220857204e-08, "loss": 0.6037, "step": 9306 }, { "epoch": 0.98, "grad_norm": 2.653891032493835, "learning_rate": 1.1153500530286521e-08, "loss": 0.5838, "step": 9307 }, { "epoch": 0.98, "grad_norm": 3.3097916904344333, "learning_rate": 1.1040021429480907e-08, "loss": 0.5577, "step": 9308 }, { "epoch": 0.98, "grad_norm": 2.68913204932153, "learning_rate": 1.0927121931624263e-08, "loss": 0.6034, "step": 9309 }, { "epoch": 0.98, "grad_norm": 3.6960977207308368, "learning_rate": 1.0814802049832762e-08, "loss": 0.5788, "step": 9310 }, { "epoch": 0.98, "grad_norm": 2.5549598391454404, "learning_rate": 1.0703061797154857e-08, "loss": 0.6632, "step": 9311 }, { "epoch": 0.98, "grad_norm": 3.4964467940181576, "learning_rate": 1.059190118657294e-08, "loss": 0.5849, "step": 9312 }, { "epoch": 0.98, "grad_norm": 3.4191869666444825, "learning_rate": 1.0481320231001124e-08, "loss": 0.6446, "step": 9313 }, { "epoch": 0.98, "grad_norm": 2.233757773597287, "learning_rate": 1.0371318943285802e-08, "loss": 0.5923, "step": 9314 }, { "epoch": 0.98, "grad_norm": 3.207322331157825, "learning_rate": 1.0261897336207305e-08, "loss": 0.617, "step": 9315 }, { "epoch": 0.98, "grad_norm": 3.9505737873093274, "learning_rate": 1.0153055422477686e-08, "loss": 0.6109, "step": 9316 }, { "epoch": 0.98, "grad_norm": 3.9526141318436947, "learning_rate": 1.0044793214742387e-08, "loss": 0.6762, "step": 9317 }, { "epoch": 0.98, "grad_norm": 3.42531517384415, "learning_rate": 9.937110725578015e-09, "loss": 0.6146, "step": 9318 }, { "epoch": 0.98, "grad_norm": 2.143828176250109, "learning_rate": 9.83000796749567e-09, "loss": 0.5419, "step": 9319 }, { "epoch": 0.98, "grad_norm": 2.7349892039380914, "learning_rate": 9.723484952937623e-09, "loss": 0.6015, "step": 9320 }, { "epoch": 0.98, "grad_norm": 2.880357843332127, "learning_rate": 9.617541694279532e-09, "loss": 0.5888, "step": 9321 }, { "epoch": 0.98, "grad_norm": 3.046184759076013, "learning_rate": 9.512178203829881e-09, "loss": 0.5733, "step": 9322 }, { "epoch": 0.98, "grad_norm": 2.0691863659915253, "learning_rate": 9.407394493829436e-09, "loss": 0.5912, "step": 9323 }, { "epoch": 0.98, "grad_norm": 3.048172174777919, "learning_rate": 9.303190576451237e-09, "loss": 0.641, "step": 9324 }, { "epoch": 0.98, "grad_norm": 6.232433634613707, "learning_rate": 9.199566463801712e-09, "loss": 0.583, "step": 9325 }, { "epoch": 0.98, "grad_norm": 2.7792858367309243, "learning_rate": 9.09652216792012e-09, "loss": 0.6, "step": 9326 }, { "epoch": 0.98, "grad_norm": 3.136354810393291, "learning_rate": 8.994057700776881e-09, "loss": 0.5886, "step": 9327 }, { "epoch": 0.98, "grad_norm": 7.469179826762709, "learning_rate": 8.892173074276921e-09, "loss": 0.6292, "step": 9328 }, { "epoch": 0.98, "grad_norm": 2.404099135191639, "learning_rate": 8.790868300255773e-09, "loss": 0.6475, "step": 9329 }, { "epoch": 0.98, "grad_norm": 3.296160070077247, "learning_rate": 8.690143390484018e-09, "loss": 0.6183, "step": 9330 }, { "epoch": 0.98, "grad_norm": 2.6296150848981825, "learning_rate": 8.589998356662854e-09, "loss": 0.6401, "step": 9331 }, { "epoch": 0.98, "grad_norm": 3.253263381499738, "learning_rate": 8.490433210426862e-09, "loss": 0.6144, "step": 9332 }, { "epoch": 0.98, "grad_norm": 3.110522658122041, "learning_rate": 8.391447963343457e-09, "loss": 0.5804, "step": 9333 }, { "epoch": 0.98, "grad_norm": 6.067607575219891, "learning_rate": 8.293042626912328e-09, "loss": 0.5936, "step": 9334 }, { "epoch": 0.98, "grad_norm": 2.5019568835203203, "learning_rate": 8.195217212565998e-09, "loss": 0.6324, "step": 9335 }, { "epoch": 0.98, "grad_norm": 2.925396010634107, "learning_rate": 8.097971731669263e-09, "loss": 0.583, "step": 9336 }, { "epoch": 0.98, "grad_norm": 3.319061858545403, "learning_rate": 8.001306195520309e-09, "loss": 0.6931, "step": 9337 }, { "epoch": 0.98, "grad_norm": 2.2308896693198204, "learning_rate": 7.90522061534904e-09, "loss": 0.5912, "step": 9338 }, { "epoch": 0.98, "grad_norm": 2.557196137943084, "learning_rate": 7.809715002318751e-09, "loss": 0.6286, "step": 9339 }, { "epoch": 0.98, "grad_norm": 2.8420399596277703, "learning_rate": 7.714789367524456e-09, "loss": 0.5345, "step": 9340 }, { "epoch": 0.98, "grad_norm": 2.9739842730635013, "learning_rate": 7.620443721995107e-09, "loss": 0.6429, "step": 9341 }, { "epoch": 0.98, "grad_norm": 2.265940840873313, "learning_rate": 7.52667807669083e-09, "loss": 0.5545, "step": 9342 }, { "epoch": 0.98, "grad_norm": 2.287899726527495, "learning_rate": 7.43349244250513e-09, "loss": 0.6421, "step": 9343 }, { "epoch": 0.98, "grad_norm": 2.5574409267349916, "learning_rate": 7.340886830264904e-09, "loss": 0.6416, "step": 9344 }, { "epoch": 0.98, "grad_norm": 2.8843132936950244, "learning_rate": 7.2488612507276564e-09, "loss": 0.6486, "step": 9345 }, { "epoch": 0.98, "grad_norm": 3.5891319217005697, "learning_rate": 7.157415714584836e-09, "loss": 0.6042, "step": 9346 }, { "epoch": 0.98, "grad_norm": 3.5665853233741407, "learning_rate": 7.066550232461278e-09, "loss": 0.5836, "step": 9347 }, { "epoch": 0.98, "grad_norm": 2.156939647962542, "learning_rate": 6.976264814912426e-09, "loss": 0.6158, "step": 9348 }, { "epoch": 0.98, "grad_norm": 2.318404717092306, "learning_rate": 6.886559472427667e-09, "loss": 0.5961, "step": 9349 }, { "epoch": 0.98, "grad_norm": 0.9679186604158867, "learning_rate": 6.797434215429222e-09, "loss": 0.5583, "step": 9350 }, { "epoch": 0.98, "grad_norm": 2.882857695190735, "learning_rate": 6.708889054270473e-09, "loss": 0.6257, "step": 9351 }, { "epoch": 0.98, "grad_norm": 2.9855183026376255, "learning_rate": 6.620923999239304e-09, "loss": 0.5986, "step": 9352 }, { "epoch": 0.98, "grad_norm": 2.965482349734234, "learning_rate": 6.533539060554761e-09, "loss": 0.5749, "step": 9353 }, { "epoch": 0.98, "grad_norm": 2.4913808973827263, "learning_rate": 6.446734248368725e-09, "loss": 0.6426, "step": 9354 }, { "epoch": 0.98, "grad_norm": 2.7932074858752802, "learning_rate": 6.360509572765905e-09, "loss": 0.5945, "step": 9355 }, { "epoch": 0.98, "grad_norm": 2.200283514862744, "learning_rate": 6.2748650437644e-09, "loss": 0.6131, "step": 9356 }, { "epoch": 0.98, "grad_norm": 4.443831408522394, "learning_rate": 6.189800671314028e-09, "loss": 0.6324, "step": 9357 }, { "epoch": 0.98, "grad_norm": 2.722697074400931, "learning_rate": 6.10531646529633e-09, "loss": 0.5599, "step": 9358 }, { "epoch": 0.98, "grad_norm": 2.8796778714377784, "learning_rate": 6.021412435527341e-09, "loss": 0.5826, "step": 9359 }, { "epoch": 0.98, "grad_norm": 2.1980873977664355, "learning_rate": 5.938088591754265e-09, "loss": 0.5682, "step": 9360 }, { "epoch": 0.99, "grad_norm": 2.506953681588526, "learning_rate": 5.855344943658248e-09, "loss": 0.6596, "step": 9361 }, { "epoch": 0.99, "grad_norm": 2.4320110623826006, "learning_rate": 5.773181500851044e-09, "loss": 0.5329, "step": 9362 }, { "epoch": 0.99, "grad_norm": 2.673093388240049, "learning_rate": 5.691598272878907e-09, "loss": 0.6882, "step": 9363 }, { "epoch": 0.99, "grad_norm": 5.141915304940063, "learning_rate": 5.610595269220364e-09, "loss": 0.6534, "step": 9364 }, { "epoch": 0.99, "grad_norm": 2.2742761378178487, "learning_rate": 5.530172499285113e-09, "loss": 0.5644, "step": 9365 }, { "epoch": 0.99, "grad_norm": 3.7078245584599014, "learning_rate": 5.45032997241679e-09, "loss": 0.6237, "step": 9366 }, { "epoch": 0.99, "grad_norm": 2.1910604685165675, "learning_rate": 5.371067697891308e-09, "loss": 0.6524, "step": 9367 }, { "epoch": 0.99, "grad_norm": 2.263472973076441, "learning_rate": 5.292385684917411e-09, "loss": 0.5387, "step": 9368 }, { "epoch": 0.99, "grad_norm": 2.6018067358751327, "learning_rate": 5.214283942635567e-09, "loss": 0.5654, "step": 9369 }, { "epoch": 0.99, "grad_norm": 3.20137961246332, "learning_rate": 5.136762480120183e-09, "loss": 0.5999, "step": 9370 }, { "epoch": 0.99, "grad_norm": 2.5099314930574357, "learning_rate": 5.059821306376833e-09, "loss": 0.5936, "step": 9371 }, { "epoch": 0.99, "grad_norm": 2.70471458958764, "learning_rate": 4.9834604303444774e-09, "loss": 0.631, "step": 9372 }, { "epoch": 0.99, "grad_norm": 3.1223721533213684, "learning_rate": 4.907679860894355e-09, "loss": 0.6049, "step": 9373 }, { "epoch": 0.99, "grad_norm": 2.5657179600814275, "learning_rate": 4.832479606831086e-09, "loss": 0.5107, "step": 9374 }, { "epoch": 0.99, "grad_norm": 5.080680659842182, "learning_rate": 4.757859676891019e-09, "loss": 0.5991, "step": 9375 }, { "epoch": 0.99, "grad_norm": 2.8969187425122196, "learning_rate": 4.683820079742218e-09, "loss": 0.5855, "step": 9376 }, { "epoch": 0.99, "grad_norm": 2.407260977526988, "learning_rate": 4.610360823987803e-09, "loss": 0.652, "step": 9377 }, { "epoch": 0.99, "grad_norm": 2.990129810345131, "learning_rate": 4.5374819181615015e-09, "loss": 0.5611, "step": 9378 }, { "epoch": 0.99, "grad_norm": 3.437506450033328, "learning_rate": 4.465183370729875e-09, "loss": 0.5923, "step": 9379 }, { "epoch": 0.99, "grad_norm": 3.476756229416724, "learning_rate": 4.393465190092316e-09, "loss": 0.6029, "step": 9380 }, { "epoch": 0.99, "grad_norm": 2.580270764805957, "learning_rate": 4.322327384581604e-09, "loss": 0.6063, "step": 9381 }, { "epoch": 0.99, "grad_norm": 2.062993744196928, "learning_rate": 4.251769962461683e-09, "loss": 0.6294, "step": 9382 }, { "epoch": 0.99, "grad_norm": 3.1334282612359785, "learning_rate": 4.181792931929885e-09, "loss": 0.6693, "step": 9383 }, { "epoch": 0.99, "grad_norm": 1.989553082617997, "learning_rate": 4.1123963011158175e-09, "loss": 0.5787, "step": 9384 }, { "epoch": 0.99, "grad_norm": 2.2591858616247964, "learning_rate": 4.043580078081921e-09, "loss": 0.5738, "step": 9385 }, { "epoch": 0.99, "grad_norm": 2.6256761692714603, "learning_rate": 3.975344270823467e-09, "loss": 0.5571, "step": 9386 }, { "epoch": 0.99, "grad_norm": 2.9188998285720706, "learning_rate": 3.9076888872668914e-09, "loss": 0.6569, "step": 9387 }, { "epoch": 0.99, "grad_norm": 2.4406062966062727, "learning_rate": 3.84061393527313e-09, "loss": 0.5535, "step": 9388 }, { "epoch": 0.99, "grad_norm": 2.499796926984259, "learning_rate": 3.774119422634282e-09, "loss": 0.6678, "step": 9389 }, { "epoch": 0.99, "grad_norm": 2.50651659058804, "learning_rate": 3.7082053570758338e-09, "loss": 0.6311, "step": 9390 }, { "epoch": 0.99, "grad_norm": 2.9483823752948752, "learning_rate": 3.6428717462549944e-09, "loss": 0.5463, "step": 9391 }, { "epoch": 0.99, "grad_norm": 3.723110555766217, "learning_rate": 3.578118597762914e-09, "loss": 0.6034, "step": 9392 }, { "epoch": 0.99, "grad_norm": 2.5286683838105293, "learning_rate": 3.5139459191213533e-09, "loss": 0.6347, "step": 9393 }, { "epoch": 0.99, "grad_norm": 3.4531428030517914, "learning_rate": 3.4503537177860145e-09, "loss": 0.6048, "step": 9394 }, { "epoch": 0.99, "grad_norm": 3.04607450890029, "learning_rate": 3.3873420011448778e-09, "loss": 0.5767, "step": 9395 }, { "epoch": 0.99, "grad_norm": 2.4392946355168275, "learning_rate": 3.324910776519308e-09, "loss": 0.596, "step": 9396 }, { "epoch": 0.99, "grad_norm": 3.422625198663431, "learning_rate": 3.263060051161282e-09, "loss": 0.5568, "step": 9397 }, { "epoch": 0.99, "grad_norm": 2.418789715138034, "learning_rate": 3.2017898322567185e-09, "loss": 0.6036, "step": 9398 }, { "epoch": 0.99, "grad_norm": 4.929592023564839, "learning_rate": 3.1411001269238127e-09, "loss": 0.6467, "step": 9399 }, { "epoch": 0.99, "grad_norm": 2.2696725064713115, "learning_rate": 3.080990942213591e-09, "loss": 0.5624, "step": 9400 }, { "epoch": 0.99, "grad_norm": 2.887084023089943, "learning_rate": 3.0214622851093555e-09, "loss": 0.6042, "step": 9401 }, { "epoch": 0.99, "grad_norm": 2.2400748396131984, "learning_rate": 2.9625141625266863e-09, "loss": 0.6244, "step": 9402 }, { "epoch": 0.99, "grad_norm": 2.4355210959399383, "learning_rate": 2.9041465813145486e-09, "loss": 0.5651, "step": 9403 }, { "epoch": 0.99, "grad_norm": 2.590028657934874, "learning_rate": 2.8463595482530747e-09, "loss": 0.6597, "step": 9404 }, { "epoch": 0.99, "grad_norm": 4.014180884320301, "learning_rate": 2.7891530700563387e-09, "loss": 0.6173, "step": 9405 }, { "epoch": 0.99, "grad_norm": 3.4571650309323445, "learning_rate": 2.73252715337069e-09, "loss": 0.6504, "step": 9406 }, { "epoch": 0.99, "grad_norm": 2.9475010958623757, "learning_rate": 2.6764818047736453e-09, "loss": 0.5891, "step": 9407 }, { "epoch": 0.99, "grad_norm": 3.3576520108967984, "learning_rate": 2.6210170307777726e-09, "loss": 0.6288, "step": 9408 }, { "epoch": 0.99, "grad_norm": 2.4213655278930717, "learning_rate": 2.5661328378262516e-09, "loss": 0.5432, "step": 9409 }, { "epoch": 0.99, "grad_norm": 3.796645905475894, "learning_rate": 2.5118292322950933e-09, "loss": 0.5317, "step": 9410 }, { "epoch": 0.99, "grad_norm": 2.7845772144730496, "learning_rate": 2.4581062204931395e-09, "loss": 0.5843, "step": 9411 }, { "epoch": 0.99, "grad_norm": 3.8648265905534904, "learning_rate": 2.404963808662064e-09, "loss": 0.6527, "step": 9412 }, { "epoch": 0.99, "grad_norm": 2.7630651447270407, "learning_rate": 2.3524020029758175e-09, "loss": 0.687, "step": 9413 }, { "epoch": 0.99, "grad_norm": 8.873063326108902, "learning_rate": 2.3004208095406268e-09, "loss": 0.6214, "step": 9414 }, { "epoch": 0.99, "grad_norm": 3.581099597260234, "learning_rate": 2.249020234395549e-09, "loss": 0.6127, "step": 9415 }, { "epoch": 0.99, "grad_norm": 0.8980029935743684, "learning_rate": 2.198200283512475e-09, "loss": 0.5578, "step": 9416 }, { "epoch": 0.99, "grad_norm": 2.843841317169783, "learning_rate": 2.14796096279557e-09, "loss": 0.6437, "step": 9417 }, { "epoch": 0.99, "grad_norm": 2.669266532272996, "learning_rate": 2.0983022780807217e-09, "loss": 0.5516, "step": 9418 }, { "epoch": 0.99, "grad_norm": 2.9826082921006214, "learning_rate": 2.049224235138314e-09, "loss": 0.6909, "step": 9419 }, { "epoch": 0.99, "grad_norm": 2.724117248063, "learning_rate": 2.0007268396687873e-09, "loss": 0.6637, "step": 9420 }, { "epoch": 0.99, "grad_norm": 2.8185604270090447, "learning_rate": 1.9528100973070784e-09, "loss": 0.5829, "step": 9421 }, { "epoch": 0.99, "grad_norm": 3.095495170435446, "learning_rate": 1.9054740136204007e-09, "loss": 0.5911, "step": 9422 }, { "epoch": 0.99, "grad_norm": 3.3241775905169297, "learning_rate": 1.858718594107689e-09, "loss": 0.6762, "step": 9423 }, { "epoch": 0.99, "grad_norm": 2.856463977964485, "learning_rate": 1.8125438442007093e-09, "loss": 0.5795, "step": 9424 }, { "epoch": 0.99, "grad_norm": 1.1071167853953825, "learning_rate": 1.766949769264059e-09, "loss": 0.5487, "step": 9425 }, { "epoch": 0.99, "grad_norm": 0.898970735503558, "learning_rate": 1.7219363745946127e-09, "loss": 0.5282, "step": 9426 }, { "epoch": 0.99, "grad_norm": 8.223703639655533, "learning_rate": 1.6775036654226307e-09, "loss": 0.5858, "step": 9427 }, { "epoch": 0.99, "grad_norm": 2.6009487272996834, "learning_rate": 1.6336516469089846e-09, "loss": 0.6202, "step": 9428 }, { "epoch": 0.99, "grad_norm": 3.470594389195533, "learning_rate": 1.5903803241490435e-09, "loss": 0.611, "step": 9429 }, { "epoch": 0.99, "grad_norm": 2.4607686425198088, "learning_rate": 1.5476897021698968e-09, "loss": 0.6361, "step": 9430 }, { "epoch": 0.99, "grad_norm": 3.015171435256967, "learning_rate": 1.5055797859309108e-09, "loss": 0.6213, "step": 9431 }, { "epoch": 0.99, "grad_norm": 2.3552305276446104, "learning_rate": 1.4640505803248384e-09, "loss": 0.5813, "step": 9432 }, { "epoch": 0.99, "grad_norm": 3.0756788209013686, "learning_rate": 1.4231020901755988e-09, "loss": 0.5959, "step": 9433 }, { "epoch": 0.99, "grad_norm": 2.8917362317695483, "learning_rate": 1.3827343202410527e-09, "loss": 0.6495, "step": 9434 }, { "epoch": 0.99, "grad_norm": 2.4911357145002206, "learning_rate": 1.342947275211337e-09, "loss": 0.6533, "step": 9435 }, { "epoch": 0.99, "grad_norm": 4.243456265519813, "learning_rate": 1.3037409597077555e-09, "loss": 0.5686, "step": 9436 }, { "epoch": 0.99, "grad_norm": 2.4091325818193186, "learning_rate": 1.265115378286108e-09, "loss": 0.5949, "step": 9437 }, { "epoch": 0.99, "grad_norm": 2.6371286497158675, "learning_rate": 1.2270705354333612e-09, "loss": 0.6817, "step": 9438 }, { "epoch": 0.99, "grad_norm": 2.8038456517621673, "learning_rate": 1.1896064355698678e-09, "loss": 0.6074, "step": 9439 }, { "epoch": 0.99, "grad_norm": 2.2758737769300783, "learning_rate": 1.152723083047702e-09, "loss": 0.5709, "step": 9440 }, { "epoch": 0.99, "grad_norm": 2.5994595563728633, "learning_rate": 1.11642048215177e-09, "loss": 0.6447, "step": 9441 }, { "epoch": 0.99, "grad_norm": 2.5885198513951937, "learning_rate": 1.0806986370998086e-09, "loss": 0.6026, "step": 9442 }, { "epoch": 0.99, "grad_norm": 2.56828110164626, "learning_rate": 1.0455575520418315e-09, "loss": 0.6416, "step": 9443 }, { "epoch": 0.99, "grad_norm": 2.5864916103094, "learning_rate": 1.0109972310606842e-09, "loss": 0.5943, "step": 9444 }, { "epoch": 0.99, "grad_norm": 3.1320803492939415, "learning_rate": 9.770176781709329e-10, "loss": 0.5664, "step": 9445 }, { "epoch": 0.99, "grad_norm": 2.401636357615803, "learning_rate": 9.436188973210858e-10, "loss": 0.5257, "step": 9446 }, { "epoch": 0.99, "grad_norm": 2.253966978691805, "learning_rate": 9.108008923902623e-10, "loss": 0.5868, "step": 9447 }, { "epoch": 0.99, "grad_norm": 2.506566563109066, "learning_rate": 8.785636671920783e-10, "loss": 0.6921, "step": 9448 }, { "epoch": 0.99, "grad_norm": 2.107145184612608, "learning_rate": 8.469072254713162e-10, "loss": 0.5729, "step": 9449 }, { "epoch": 0.99, "grad_norm": 0.906976575314038, "learning_rate": 8.158315709055897e-10, "loss": 0.4864, "step": 9450 }, { "epoch": 0.99, "grad_norm": 1.135047161405744, "learning_rate": 7.853367071053441e-10, "loss": 0.5252, "step": 9451 }, { "epoch": 0.99, "grad_norm": 2.453216748538149, "learning_rate": 7.554226376133012e-10, "loss": 0.6918, "step": 9452 }, { "epoch": 0.99, "grad_norm": 3.3255864656684557, "learning_rate": 7.26089365905569e-10, "loss": 0.5933, "step": 9453 }, { "epoch": 0.99, "grad_norm": 3.4761913980531283, "learning_rate": 6.97336895388867e-10, "loss": 0.5683, "step": 9454 }, { "epoch": 0.99, "grad_norm": 2.9519019016205963, "learning_rate": 6.691652294038564e-10, "loss": 0.6096, "step": 9455 }, { "epoch": 1.0, "grad_norm": 3.0498036153708985, "learning_rate": 6.415743712240296e-10, "loss": 0.5917, "step": 9456 }, { "epoch": 1.0, "grad_norm": 2.0448500807091055, "learning_rate": 6.145643240540456e-10, "loss": 0.5391, "step": 9457 }, { "epoch": 1.0, "grad_norm": 2.6179190080516817, "learning_rate": 5.881350910325046e-10, "loss": 0.5956, "step": 9458 }, { "epoch": 1.0, "grad_norm": 2.3232050763453804, "learning_rate": 5.622866752291734e-10, "loss": 0.6394, "step": 9459 }, { "epoch": 1.0, "grad_norm": 2.7020619835422437, "learning_rate": 5.370190796483155e-10, "loss": 0.6787, "step": 9460 }, { "epoch": 1.0, "grad_norm": 2.2927217439792016, "learning_rate": 5.123323072236952e-10, "loss": 0.6097, "step": 9461 }, { "epoch": 1.0, "grad_norm": 2.10178763835819, "learning_rate": 4.88226360824684e-10, "loss": 0.64, "step": 9462 }, { "epoch": 1.0, "grad_norm": 2.5684498862280543, "learning_rate": 4.647012432512643e-10, "loss": 0.4799, "step": 9463 }, { "epoch": 1.0, "grad_norm": 2.2494270384655715, "learning_rate": 4.417569572368052e-10, "loss": 0.6441, "step": 9464 }, { "epoch": 1.0, "grad_norm": 2.678614389562663, "learning_rate": 4.1939350544695224e-10, "loss": 0.5611, "step": 9465 }, { "epoch": 1.0, "grad_norm": 2.949351619951574, "learning_rate": 3.9761089047907206e-10, "loss": 0.5365, "step": 9466 }, { "epoch": 1.0, "grad_norm": 2.470566883786651, "learning_rate": 3.764091148650284e-10, "loss": 0.5893, "step": 9467 }, { "epoch": 1.0, "grad_norm": 2.925739305236972, "learning_rate": 3.5578818106674073e-10, "loss": 0.4812, "step": 9468 }, { "epoch": 1.0, "grad_norm": 2.416620880372823, "learning_rate": 3.3574809148062546e-10, "loss": 0.5753, "step": 9469 }, { "epoch": 1.0, "grad_norm": 2.779623019239646, "learning_rate": 3.1628884843537546e-10, "loss": 0.6176, "step": 9470 }, { "epoch": 1.0, "grad_norm": 2.564403061347666, "learning_rate": 2.974104541902945e-10, "loss": 0.539, "step": 9471 }, { "epoch": 1.0, "grad_norm": 2.6305758356659785, "learning_rate": 2.7911291093973835e-10, "loss": 0.6094, "step": 9472 }, { "epoch": 1.0, "grad_norm": 2.6682590701908198, "learning_rate": 2.61396220808674e-10, "loss": 0.6554, "step": 9473 }, { "epoch": 1.0, "grad_norm": 2.690023661401972, "learning_rate": 2.4426038585656507e-10, "loss": 0.5741, "step": 9474 }, { "epoch": 1.0, "grad_norm": 6.037525938734319, "learning_rate": 2.277054080729313e-10, "loss": 0.6665, "step": 9475 }, { "epoch": 1.0, "grad_norm": 3.649606612868773, "learning_rate": 2.117312893817891e-10, "loss": 0.6428, "step": 9476 }, { "epoch": 1.0, "grad_norm": 2.1735139647394646, "learning_rate": 1.9633803163887633e-10, "loss": 0.5668, "step": 9477 }, { "epoch": 1.0, "grad_norm": 2.423065344674815, "learning_rate": 1.8152563663220712e-10, "loss": 0.5754, "step": 9478 }, { "epoch": 1.0, "grad_norm": 2.4791765768968714, "learning_rate": 1.672941060826272e-10, "loss": 0.6408, "step": 9479 }, { "epoch": 1.0, "grad_norm": 2.3385528810246217, "learning_rate": 1.5364344164436885e-10, "loss": 0.557, "step": 9480 }, { "epoch": 1.0, "grad_norm": 2.724307971466079, "learning_rate": 1.4057364490227542e-10, "loss": 0.5809, "step": 9481 }, { "epoch": 1.0, "grad_norm": 2.66578465438902, "learning_rate": 1.2808471737568717e-10, "loss": 0.5427, "step": 9482 }, { "epoch": 1.0, "grad_norm": 2.2921319289193, "learning_rate": 1.1617666051455534e-10, "loss": 0.6357, "step": 9483 }, { "epoch": 1.0, "grad_norm": 2.6535598474354454, "learning_rate": 1.0484947570277293e-10, "loss": 0.5376, "step": 9484 }, { "epoch": 1.0, "grad_norm": 2.6190379885009096, "learning_rate": 9.410316425706445e-11, "loss": 0.6051, "step": 9485 }, { "epoch": 1.0, "grad_norm": 2.5315721986313395, "learning_rate": 8.393772742421036e-11, "loss": 0.6413, "step": 9486 }, { "epoch": 1.0, "grad_norm": 2.7299924264341318, "learning_rate": 7.435316638715329e-11, "loss": 0.5898, "step": 9487 }, { "epoch": 1.0, "grad_norm": 0.9023624234891299, "learning_rate": 6.53494822577816e-11, "loss": 0.4633, "step": 9488 }, { "epoch": 1.0, "grad_norm": 2.4516895016558786, "learning_rate": 5.6926676083035593e-11, "loss": 0.6467, "step": 9489 }, { "epoch": 1.0, "grad_norm": 2.440327397988931, "learning_rate": 4.908474884102177e-11, "loss": 0.558, "step": 9490 }, { "epoch": 1.0, "grad_norm": 3.9948194582261793, "learning_rate": 4.1823701442678114e-11, "loss": 0.6246, "step": 9491 }, { "epoch": 1.0, "grad_norm": 2.751705490521763, "learning_rate": 3.514353473232923e-11, "loss": 0.5296, "step": 9492 }, { "epoch": 1.0, "grad_norm": 3.4070953890206592, "learning_rate": 2.9044249485465914e-11, "loss": 0.5484, "step": 9493 }, { "epoch": 1.0, "grad_norm": 3.8452438404500078, "learning_rate": 2.3525846410965557e-11, "loss": 0.6063, "step": 9494 }, { "epoch": 1.0, "grad_norm": 2.5585166775524413, "learning_rate": 1.858832614942685e-11, "loss": 0.568, "step": 9495 }, { "epoch": 1.0, "grad_norm": 3.292945274005528, "learning_rate": 1.4231689274835093e-11, "loss": 0.6767, "step": 9496 }, { "epoch": 1.0, "grad_norm": 2.339794191169871, "learning_rate": 1.0455936293451985e-11, "loss": 0.5886, "step": 9497 }, { "epoch": 1.0, "grad_norm": 2.5328995861443517, "learning_rate": 7.261067643815622e-12, "loss": 0.5278, "step": 9498 }, { "epoch": 1.0, "grad_norm": 2.1976850234192837, "learning_rate": 4.647083696740495e-12, "loss": 0.5848, "step": 9499 }, { "epoch": 1.0, "grad_norm": 2.9136231286554226, "learning_rate": 2.613984756427712e-12, "loss": 0.6258, "step": 9500 }, { "epoch": 1.0, "grad_norm": 2.1228901797117725, "learning_rate": 1.161771059354777e-12, "loss": 0.6679, "step": 9501 }, { "epoch": 1.0, "grad_norm": 2.8882038893522686, "learning_rate": 2.90442773165367e-13, "loss": 0.5436, "step": 9502 }, { "epoch": 1.0, "grad_norm": 3.3614569935196172, "learning_rate": 0.0, "loss": 0.5138, "step": 9503 }, { "epoch": 1.0, "step": 9503, "total_flos": 4.083480469386035e+16, "train_loss": 0.6581587371017812, "train_runtime": 215857.4487, "train_samples_per_second": 8.453, "train_steps_per_second": 0.044 } ], "logging_steps": 1.0, "max_steps": 9503, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 30000, "total_flos": 4.083480469386035e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }